@sanity/ailf-studio 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +33 -0
- package/dist/index.js +816 -363
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -193,6 +193,15 @@ declare const GLOSSARY: {
|
|
|
193
193
|
readonly failureMode: "The type of documentation problem: missing-docs (functionality not covered), incorrect-docs (factual errors), outdated-docs (stale API/patterns), or poor-structure (hard to find/understand).";
|
|
194
194
|
readonly estimatedLift: "Estimated composite score improvement if this gap is fully fixed. Based on raising bottleneck dimensions to the median of non-bottlenecked dimensions.";
|
|
195
195
|
readonly confidence: "How confident the classifier is in this diagnosis. High = strong keyword + structural signal agreement. Medium = partial agreement. Low = weak signals only.";
|
|
196
|
+
readonly agentBehaviorOverview: "How AI agents interacted with your documentation during evaluation: what they searched for, which pages they visited, and how much time they spent on network requests.";
|
|
197
|
+
readonly searchQueries: "The exact search queries agents used to find documentation. Helps you understand how agents discover your content and whether your docs appear for relevant queries.";
|
|
198
|
+
readonly docSlugsVisited: "Documentation page slugs that agents actually visited during evaluation. Compare against canonical docs to see if agents found the right pages.";
|
|
199
|
+
readonly externalDomains: "Non-Sanity domains that agents contacted during evaluation. High external domain counts may indicate agents couldn't find what they needed in your docs.";
|
|
200
|
+
readonly avgDocPagesVisited: "Average number of documentation pages visited per test. Higher counts can mean agents need to consult many pages (complex task) or can't find the right one quickly.";
|
|
201
|
+
readonly avgSearchesPerformed: "Average number of web searches performed per test. High search counts can indicate docs are hard to discover through search engines.";
|
|
202
|
+
readonly avgNetworkTimeMs: "Average time spent on network requests per test. Includes page fetches, search queries, and API calls.";
|
|
203
|
+
readonly totalRequests: "Total number of HTTP requests the agent made during the test, including searches, page visits, and API calls.";
|
|
204
|
+
readonly totalBytesDownloaded: "Total bytes downloaded by the agent. Large downloads may indicate the agent is fetching many pages or very large documents.";
|
|
196
205
|
readonly dimTaskCompletion: "Change in task completion between runs. Positive means implementations are more complete.";
|
|
197
206
|
readonly dimCodeCorrectness: "Change in code correctness between runs. Positive means better code quality.";
|
|
198
207
|
readonly dimDocCoverage: "Change in doc coverage between runs. Positive means the docs are providing more useful information.";
|
|
@@ -656,8 +665,30 @@ interface JudgmentData {
|
|
|
656
665
|
score: number;
|
|
657
666
|
taskId: string;
|
|
658
667
|
}
|
|
668
|
+
/** Per-feature agent behavior data — how agents interacted with docs */
|
|
669
|
+
interface FeatureAgentBehaviorData {
|
|
670
|
+
avgDocPagesVisited: number;
|
|
671
|
+
avgNetworkTimeMs: number;
|
|
672
|
+
avgSearchesPerformed: number;
|
|
673
|
+
docSlugsVisited: string[];
|
|
674
|
+
externalDomains: string[];
|
|
675
|
+
feature: string;
|
|
676
|
+
searchQueries: string[];
|
|
677
|
+
tasksWithBehaviorData: number;
|
|
678
|
+
}
|
|
679
|
+
/** Overall agent behavior stats (aggregated across all features) */
|
|
680
|
+
interface OverallAgentBehaviorData {
|
|
681
|
+
avgDocPagesVisited: number;
|
|
682
|
+
avgNetworkTimeMs: number;
|
|
683
|
+
avgSearchesPerformed: number;
|
|
684
|
+
testsWithBehaviorData: number;
|
|
685
|
+
totalUniqueDocSlugs: number;
|
|
686
|
+
totalUniqueSearchQueries: number;
|
|
687
|
+
}
|
|
659
688
|
/** Summary data as stored in Sanity */
|
|
660
689
|
interface SummaryData {
|
|
690
|
+
/** Per-feature agent behavior data (only present when agentic mode ran) */
|
|
691
|
+
agentBehavior?: FeatureAgentBehaviorData[] | null;
|
|
661
692
|
belowCritical: string[];
|
|
662
693
|
/** All Sanity documents used across the entire evaluation */
|
|
663
694
|
documentManifest?: DocumentRef[];
|
|
@@ -665,6 +696,8 @@ interface SummaryData {
|
|
|
665
696
|
lowestArea: string;
|
|
666
697
|
lowestScore: number;
|
|
667
698
|
overall: {
|
|
699
|
+
/** Aggregate agent behavior stats (only present when agentic mode ran) */
|
|
700
|
+
agentBehavior?: OverallAgentBehaviorData;
|
|
668
701
|
avgDocLift: number;
|
|
669
702
|
avgScore: number;
|
|
670
703
|
avgCeilingScore?: number;
|