npm - @sanity/ailf - Versions diffs - 0.1.20 → 0.1.22 - Mend

@sanity/ailf 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/_vendor/ailf-core/types/index.d.ts +24 -0
package/dist/pipeline/calculate-scores.js +15 -3
package/package.json +1 -1

package/dist/_vendor/ailf-core/types/index.d.ts CHANGED Viewed

@@ -565,8 +565,30 @@ export interface RetrievalMetrics {
         avgRecall: number;
     };
 }
+/** Per-feature agent behavior data — how agents interacted with docs */
+export interface FeatureAgentBehavior {
+    avgDocPagesVisited: number;
+    avgNetworkTimeMs: number;
+    avgSearchesPerformed: number;
+    docSlugsVisited: string[];
+    externalDomains: string[];
+    feature: string;
+    searchQueries: string[];
+    tasksWithBehaviorData: number;
+}
+/** Aggregate agent behavior stats across all features */
+export interface OverallAgentBehavior {
+    avgDocPagesVisited: number;
+    avgNetworkTimeMs: number;
+    avgSearchesPerformed: number;
+    testsWithBehaviorData: number;
+    totalUniqueDocSlugs: number;
+    totalUniqueSearchQueries: number;
+}
 /** Top-level score summary (the shape of score-summary.json) */
 export interface ScoreSummary {
+    /** Per-feature agent behavior data (only present when agentic mode ran) */
+    agentBehavior?: FeatureAgentBehavior[];
     belowCritical: string[];
     /**
      * All Sanity documents used across the entire evaluation.
@@ -604,6 +626,8 @@ export interface ScoreSummary {
         docLift: number;
     }[];
     overall: {
+        /** Aggregate agent behavior stats (only present when agentic mode ran) */
+        agentBehavior?: OverallAgentBehavior;
         /** Average actual (agentic) score across areas. Absent if no agentic data. */
         avgActualScore?: number;
         /** Average ceiling score across all areas */

package/dist/pipeline/calculate-scores.js CHANGED Viewed

@@ -324,13 +324,25 @@ function calculateScores(resultsPath, weights) {
 /**
  * Extracts agent behavior summary from a test result's metadata.
  * Returns null if the test was not run with the instrumented provider.
+ *
+ * Checks two locations because Promptfoo may flatten/merge the metadata
+ * object differently than how the provider originally set it:
+ *   1. metadata.agentBehaviorSummary (set directly by AgenticProvider)
+ *   2. metadata.agentBehavior.summary (nested in the full behavior log)
  */
 function extractAgentBehavior(test) {
     const { metadata } = test;
-    if (!metadata?.agentBehaviorSummary) {
-        return null;
+    if (metadata?.agentBehaviorSummary) {
+        return metadata.agentBehaviorSummary;
+    }
+    // Fallback: Promptfoo may drop the top-level agentBehaviorSummary
+    // field during serialization, but the data is nested inside the
+    // full agentBehavior log.
+    const behavior = metadata?.agentBehavior;
+    if (behavior?.summary) {
+        return behavior.summary;
     }
-    return metadata.agentBehaviorSummary;
+    return null;
 }
 /**
  * Extracts grader (assertion) cost from the raw Promptfoo results file.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.20",
+  "version": "0.1.22",
   "private": false,
   "publishConfig": {
     "access": "restricted"