@sanity/ailf-studio 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +22 -7
- package/dist/index.js +762 -695
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -2585,8 +2585,8 @@ var taskSchema = defineType5({
|
|
|
2585
2585
|
type: "boolean"
|
|
2586
2586
|
}),
|
|
2587
2587
|
defineField5({
|
|
2588
|
-
description: 'Rubric mode for baseline. "
|
|
2589
|
-
initialValue: "
|
|
2588
|
+
description: 'Rubric mode for baseline. "full" uses the same rubric as gold, "abbreviated" uses a shorter rubric, "none" skips rubric grading.',
|
|
2589
|
+
initialValue: "full",
|
|
2590
2590
|
name: "rubric",
|
|
2591
2591
|
options: {
|
|
2592
2592
|
list: [
|
|
@@ -3064,14 +3064,14 @@ import {
|
|
|
3064
3064
|
Box as Box25,
|
|
3065
3065
|
Button as Button9,
|
|
3066
3066
|
Container,
|
|
3067
|
-
Flex as
|
|
3067
|
+
Flex as Flex31,
|
|
3068
3068
|
Stack as Stack34,
|
|
3069
3069
|
Tab as Tab2,
|
|
3070
3070
|
TabList as TabList2,
|
|
3071
3071
|
TabPanel as TabPanel2,
|
|
3072
|
-
Text as
|
|
3072
|
+
Text as Text41
|
|
3073
3073
|
} from "@sanity/ui";
|
|
3074
|
-
import { useCallback as
|
|
3074
|
+
import { useCallback as useCallback27 } from "react";
|
|
3075
3075
|
import { useRouter as useRouter3 } from "sanity/router";
|
|
3076
3076
|
|
|
3077
3077
|
// src/lib/help-context.ts
|
|
@@ -3305,7 +3305,7 @@ Click into any report for the full breakdown: per-area scores, diagnostics, and
|
|
|
3305
3305
|
{
|
|
3306
3306
|
"id": "scoring-model",
|
|
3307
3307
|
"title": "Understanding Scores",
|
|
3308
|
-
"body": "## The three dimensions\n\nEvery evaluation task is scored on three dimensions, each graded 0\u2013100:\n\n- **Task Completion (50% weight)** \u2014 Can the AI implement the requested feature?\n Does the output actually do what was asked?\n- **Code Correctness (25% weight)** \u2014 Is the generated code idiomatic, correct,\n and following best practices?\n- **Doc Coverage (25% weight)** \u2014 Did the documentation provide the information\n needed to implement the feature?\n\n## How the overall score is calculated\n\nThe three dimensions combine into a single **AI Literacy Score** per task
|
|
3308
|
+
"body": "## The three dimensions\n\nEvery evaluation task is scored on three dimensions, each graded 0\u2013100:\n\n- **Task Completion (50% weight)** \u2014 Can the AI implement the requested feature?\n Does the output actually do what was asked?\n- **Code Correctness (25% weight)** \u2014 Is the generated code idiomatic, correct,\n and following best practices?\n- **Doc Coverage (25% weight)** \u2014 Did the documentation provide the information\n needed to implement the feature?\n\n## How the overall score is calculated\n\nThe three dimensions combine into a single **AI Literacy Score** per task using\nnamed scoring profiles from `config/rubrics.yaml`:\n\n```\nGold (with docs): Total = Task \xD7 0.50 + Code \xD7 0.25 + Docs \xD7 0.25\nBaseline (no docs): Total = Task \xD7 0.60 + Code \xD7 0.40\n```\n\nThe gold profile includes all three dimensions. The baseline profile excludes\nDoc Coverage because it is undefined when no documentation is provided. This\nensures Doc Lift (ceiling \u2212 floor) is a clean structural measurement of\ndocumentation value.\n\nThe weighted composite produces a score from 0\u2013100. Scores are then averaged\nacross all tasks in a feature area to produce a **per-area score**, and across\nall areas to produce the **overall score**.\n\n## What the numbers mean\n\n| Score range | Interpretation |\n| ------------ | ----------------------------------------------------------------- |\n| **80\u2013100** | Docs are working well \u2014 AI agents produce correct implementations |\n| **70\u201379** | Needs attention \u2014 there may be gaps in specific dimensions |\n| **Below 70** | Weak \u2014 AI agents consistently struggle with this area |\n\n## Ceiling decomposition (baseline mode)\n\nWhen running in baseline mode, each task is evaluated twice \u2014 with and without\ndocumentation. This produces:\n\n- **Floor score** \u2014 Score without docs (what the model knows from training data\n alone)\n- **Ceiling score** \u2014 Score with gold-standard docs injected directly into the\n prompt\n- **Doc Lift** \u2014 Ceiling minus floor. Positive means docs help; negative means\n docs hurt.\n- **Doc Quality Gap** \u2014 100 minus ceiling. Room for documentation improvement.\n\n## Three-layer decomposition (full mode)\n\nFull mode adds a third measurement \u2014 what happens when AI agents find docs on\ntheir own:\n\n- **Floor** \u2014 No docs (parametric knowledge only)\n- **Ceiling** \u2014 Gold-standard docs injected (best the docs can do)\n- **Actual** \u2014 Agent-retrieved docs (real-world performance)\n- **Retrieval Gap** \u2014 Ceiling minus actual (quality lost to findability)\n- **Infrastructure Efficiency** \u2014 Actual \xF7 ceiling (what fraction of doc quality\n reaches agents)\n\n## Cost tracking\n\nEach evaluation also tracks token costs:\n\n- **Provider cost** \u2014 Token usage for generating implementations\n- **Grader cost** \u2014 Token usage for the grading model's assessments\n- **Total cost** \u2014 Both combined, reported in the score summary",
|
|
3309
3309
|
"source": "docs/help/scoring-model.md",
|
|
3310
3310
|
"related": [
|
|
3311
3311
|
"three-layer",
|
|
@@ -3337,7 +3337,7 @@ Click into any report for the full breakdown: per-area scores, diagnostics, and
|
|
|
3337
3337
|
{
|
|
3338
3338
|
"id": "glossary",
|
|
3339
3339
|
"title": "Glossary",
|
|
3340
|
-
"body": "**Overall Score**\n: A weighted average across all feature areas: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%).\n\n**Doc Lift**\n: How much the docs help, compared to the model's training data alone. This is the score with docs minus the score without. Higher is better.\n\n**Actual Score**\n: How well an AI agent scores when it has to find docs on its own through web search and page fetching. This is the real-world scenario. Only available in full mode.\n\n**Retrieval Gap**\n: The score lost because agents can't find or use all the relevant docs. Calculated as ceiling minus actual. Lower is better; zero means agents find everything.\n\n**Infra Efficiency**\n: What percentage of the docs' potential quality actually reaches agents (actual \xF7 ceiling). 100% means agents find and use all relevant docs perfectly.\n\n**Floor**\n: Score without any documentation. This tells you what the model already knows from its training data.\n\n**Ceiling**\n: Score with gold-standard docs injected directly into the prompt. This is the best the documentation can do.\n\n**Actual**\n: Score when an AI agent finds docs on its own through web search and page fetching. This is the real-world experience.\n\n**Ret Gap**\n: Quality lost to discoverability (ceiling minus actual). The gap between what the docs could deliver and what agents actually get.\n\n**Efficiency**\n: What fraction of the docs' quality reaches agents in practice (actual \xF7 ceiling, shown as a percentage).\n\n**Inverted Ret Gap**\n: \u26A0\uFE0F Inverted retrieval gap: agents that can't find the docs actually score higher, because the docs hurt performance. This usually means there's a doc quality problem.\n\n**Score**\n: Weighted score for this feature area: Task Completion \xD7 50% + Code Correctness \xD7 25% + Doc Coverage \xD7 25%.\n\n**Task Completion**\n: Can the LLM implement the requested feature? Graded 0\u2013100.\n\n**Code Correctness**\n: Is the generated code idiomatic, correct, and following best practices? Graded 0\u2013100.\n\n**Doc Coverage**\n: Did the docs provide the information needed to implement the feature? Graded 0\u2013100.\n\n**Tests**\n: Number of test cases in this feature area.\n\n**Overall Delta**\n: Change in overall score between the two runs. Positive means the experiment scored higher.\n\n**Actual Delta**\n: Change in actual (agent-retrieved) score between runs. Positive means agents did better.\n\n**Ret Gap Delta**\n: Change in retrieval gap between runs. Negative is good here: it means the gap shrank and agents found more relevant docs.\n\n**Efficiency Delta**\n: Change in infrastructure efficiency between runs. Positive means agents are capturing more of the docs' potential.\n\n**Baseline**\n: The reference run you're comparing against.\n\n**Experiment**\n: The new run you're evaluating.\n\n**Delta**\n: Difference between experiment and baseline. Positive means improvement, negative means regression.\n\n**Change**\n: Whether the change is meaningful: improved, regressed, or unchanged (within the noise threshold).\n\n**Low Scoring Judgments**\n: The grading model's explanations for tests that scored below 70/100.\n\n**Judgment Reason**\n: The grading model's natural language explanation of what went wrong.\n\n**Health Strong**\n: Feature areas scoring 80 or above. The docs are working well for these features \u2014 AI agents produce correct, complete implementations.\n\n**Health Attention**\n: Feature areas scoring 70\u201379. These are okay but could be improved \u2014 there may be gaps in specific dimensions like doc coverage or code correctness.\n\n**Health Weak**\n: Feature areas scoring below 70. The docs are not providing enough support for AI agents to implement these features correctly.\n\n**Negative Doc Lift Metric**\n: Number of areas where the documentation actually hurts AI performance \u2014 the model scores higher without docs than with them. This usually means the docs contain outdated patterns or incorrect examples.\n\n**Weak Areas**\n: Feature areas where the overall score is below 70. These need the most attention \u2014 low scores mean AI agents consistently struggle to implement these features.\n\n**Docs Hurt**\n: Areas where the floor score (no docs) is higher than the ceiling score (with docs). The documentation is actively misleading the model. These docs need to be rewritten or removed.\n\n**Retrieval Issues**\n: Areas where AI agents can find less than 70% of the available doc quality. The docs exist and are good, but agents can't discover them through search. Consider improving page titles, metadata, or search engine indexing.\n\n**Dim Weaknesses**\n: Individual grading dimensions scoring below 50 within an area. These are the specific skills where AI agents fail most \u2014 task completion (can it build the feature?), code correctness (is the code right?), or doc coverage (did it use the docs?).\n\n**Efficiency Anomalies**\n: Areas where agent efficiency exceeds 100% \u2014 meaning agents perform better with self-found docs than with gold-standard docs injected directly. This can indicate doc quality issues (injected docs confuse the model) or agent memorization.\n\n**Doc Lift Wins**\n: Areas where documentation boosts AI performance by 5 or more points. Higher doc lift means the docs are providing crucial information that the model doesn't already know.\n\n**Retrieval Excellence**\n: Areas where AI agents successfully find and use at least 85% of the available doc quality through web search. Good retrieval means your docs are well-indexed and easy for agents to discover.\n\n**Strengths**\n: What's working well: high-scoring areas, dimensions where the docs are strong, and areas where AI agents successfully find and use the documentation.\n\n**Recommendations**\n: Prioritized remediation plan from gap analysis. Each recommendation identifies a documentation problem, the affected feature area, and the estimated score lift from fixing it.\n\n**Total Potential Lift**\n: Aggregate potential score lift if all identified gaps were fixed. This is a conservative estimate \u2014 each gap targets the median of non-bottlenecked dimensions, not 100.\n\n**Failure Mode**\n: The type of documentation problem: missing-docs (functionality not covered), incorrect-docs (factual errors), outdated-docs (stale API/patterns), or poor-structure (hard to find/understand).\n\n**Estimated Lift**\n: Estimated composite score improvement if this gap is fully fixed. Based on raising bottleneck dimensions to the median of non-bottlenecked dimensions.\n\n**Confidence**\n: How confident the classifier is in this diagnosis. High = strong keyword + structural signal agreement. Medium = partial agreement. Low = weak signals only.\n\n**Agent Behavior Overview**\n: How AI agents interacted with your documentation during evaluation: what they searched for, which pages they visited, and how much time they spent on network requests.\n\n**Search Queries**\n: The exact search queries agents used to find documentation. Helps you understand how agents discover your content and whether your docs appear for relevant queries.\n\n**Doc Slugs Visited**\n: Documentation page slugs that agents actually visited during evaluation. Compare against canonical docs to see if agents found the right pages.\n\n**External Domains**\n: Non-Sanity domains that agents contacted during evaluation. High external domain counts may indicate agents couldn't find what they needed in your docs.\n\n**Avg Doc Pages Visited**\n: Average number of documentation pages visited per test. Higher counts can mean agents need to consult many pages (complex task) or can't find the right one quickly.\n\n**Avg Searches Performed**\n: Average number of web searches performed per test. High search counts can indicate docs are hard to discover through search engines.\n\n**Avg Network Time Ms**\n: Average time spent on network requests per test. Includes page fetches, search queries, and API calls.\n\n**Total Requests**\n: Total number of HTTP requests the agent made during the test, including searches, page visits, and API calls.\n\n**Total Bytes Downloaded**\n: Total bytes downloaded by the agent. Large downloads may indicate the agent is fetching many pages or very large documents.\n\n**Dim Task Completion**\n: Change in task completion between runs. Positive means implementations are more complete.\n\n**Dim Code Correctness**\n: Change in code correctness between runs. Positive means better code quality.\n\n**Dim Doc Coverage**\n: Change in doc coverage between runs. Positive means the docs are providing more useful information.\n\n**Area Delta**\n: Score change for this area compared to the previous evaluation run.\n\n**Source Production**\n: Production source \u2014 docs fetched from the live production dataset. Scores reflect what real users and AI agents experience today.\n\n**Source Branch**\n: Branch source \u2014 docs fetched from a branch or draft dataset. Use this to preview how content changes affect scores before publishing.\n\n**Source Local**\n: Local source \u2014 docs fetched from local files or a local dev server. Useful for testing doc changes before pushing.\n\n**Report Score**\n: The overall weighted score for this evaluation run: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%), averaged across all feature areas.\n\n**Report Mode**\n: The evaluation mode determines which reference points are measured. Different modes test different aspects of how AI agents interact with documentation.\n\n**Report Trigger**\n: What initiated this evaluation run. Knowing the trigger helps you understand whether a score change was from a content edit, a code deploy, or a scheduled check.\n\n**Mode Baseline**\n: Baseline mode \u2014 tests the model with gold-standard docs injected directly. Measures ceiling performance (best the docs can do).\n\n**Mode Full**\n: Full mode \u2014 runs baseline + agentic. Compares ceiling (injected docs) against actual (agent-retrieved docs) to measure retrieval gap and infrastructure efficiency.\n\n**Mode Agentic**\n: Agentic mode \u2014 the AI agent finds docs on its own via web search. Measures real-world performance: can agents actually discover and use your documentation?\n\n**Mode Observed**\n: Observed mode \u2014 records how agents interact with docs without scoring. Captures search queries, pages visited, and browsing patterns for analysis.\n\n**Mode Debug**\n: Debug mode \u2014 a diagnostic run for pipeline development. May use non-standard configurations or limited task sets.\n\n**Trigger Manual**\n: Manually triggered \u2014 someone ran the evaluation pipeline by hand, either locally or via the Studio UI.\n\n**Trigger Ci**\n: CI-triggered \u2014 the evaluation ran automatically as part of a pull request or merge pipeline.\n\n**Trigger Schedule**\n: Scheduled \u2014 the evaluation ran on a recurring schedule (e.g. nightly or weekly) to track score trends over time.\n\n**Trigger Webhook**\n: Webhook-triggered \u2014 a content change in Sanity triggered the evaluation automatically. Helps catch doc regressions early.\n\n**Trigger Cross Repo**\n: Cross-repo \u2014 triggered from another repository via the dispatch API. Used when external repos want to validate their docs against AILF tasks.",
|
|
3340
|
+
"body": "**Overall Score**\n: A weighted average across all feature areas, using the gold scoring profile: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%).\n\n**Doc Lift**\n: How much the docs help, compared to the model's training data alone. Calculated as ceiling minus floor, where ceiling includes Doc Coverage and floor does not. Higher is better.\n\n**Actual Score**\n: How well an AI agent scores when it has to find docs on its own through web search and page fetching. This is the real-world scenario. Only available in full mode.\n\n**Retrieval Gap**\n: The score lost because agents can't find or use all the relevant docs. Calculated as ceiling minus actual. Lower is better; zero means agents find everything.\n\n**Infra Efficiency**\n: What percentage of the docs' potential quality actually reaches agents (actual \xF7 ceiling). 100% means agents find and use all relevant docs perfectly.\n\n**Floor**\n: Output-quality composite without documentation \u2014 Task Completion (60%) and Code Correctness (40%) only. Doc Coverage is excluded because it's undefined when no docs are provided. This tells you what the model already knows from its training data.\n\n**Ceiling**\n: Score with gold-standard docs injected directly into the prompt. This is the best the documentation can do.\n\n**Actual**\n: Score when an AI agent finds docs on its own through web search and page fetching. This is the real-world experience.\n\n**Ret Gap**\n: Quality lost to discoverability (ceiling minus actual). The gap between what the docs could deliver and what agents actually get.\n\n**Efficiency**\n: What fraction of the docs' quality reaches agents in practice (actual \xF7 ceiling, shown as a percentage).\n\n**Inverted Ret Gap**\n: \u26A0\uFE0F Inverted retrieval gap: agents that can't find the docs actually score higher, because the docs hurt performance. This usually means there's a doc quality problem.\n\n**Score**\n: Ceiling composite for this feature area: Task Completion \xD7 50% + Code Correctness \xD7 25% + Doc Coverage \xD7 25%. The floor uses a different profile (Task \xD7 60% + Code \xD7 40%, no Doc Coverage).\n\n**Task Completion**\n: Can the LLM implement the requested feature? Graded 0\u2013100.\n\n**Code Correctness**\n: Is the generated code idiomatic, correct, and following best practices? Graded 0\u2013100.\n\n**Doc Coverage**\n: Did the docs provide the information needed to implement the feature? Graded 0\u2013100. This dimension only contributes to the ceiling composite (with docs) \u2014 it's excluded from the floor composite because it's undefined without documentation.\n\n**Tests**\n: Number of test cases in this feature area.\n\n**Overall Delta**\n: Change in overall score between the two runs. Positive means the experiment scored higher.\n\n**Actual Delta**\n: Change in actual (agent-retrieved) score between runs. Positive means agents did better.\n\n**Ret Gap Delta**\n: Change in retrieval gap between runs. Negative is good here: it means the gap shrank and agents found more relevant docs.\n\n**Efficiency Delta**\n: Change in infrastructure efficiency between runs. Positive means agents are capturing more of the docs' potential.\n\n**Baseline**\n: The reference run you're comparing against.\n\n**Experiment**\n: The new run you're evaluating.\n\n**Delta**\n: Difference between experiment and baseline. Positive means improvement, negative means regression.\n\n**Change**\n: Whether the change is meaningful: improved, regressed, or unchanged (within the noise threshold).\n\n**Low Scoring Judgments**\n: The grading model's explanations for tests that scored below 70/100.\n\n**Judgment Reason**\n: The grading model's natural language explanation of what went wrong.\n\n**Health Strong**\n: Feature areas scoring 80 or above. The docs are working well for these features \u2014 AI agents produce correct, complete implementations.\n\n**Health Attention**\n: Feature areas scoring 70\u201379. These are okay but could be improved \u2014 there may be gaps in specific dimensions like doc coverage or code correctness.\n\n**Health Weak**\n: Feature areas scoring below 70. The docs are not providing enough support for AI agents to implement these features correctly.\n\n**Negative Doc Lift Metric**\n: Number of areas where the documentation actually hurts AI performance \u2014 the model scores higher without docs than with them. This usually means the docs contain outdated patterns or incorrect examples.\n\n**Weak Areas**\n: Feature areas where the overall score is below 70. These need the most attention \u2014 low scores mean AI agents consistently struggle to implement these features.\n\n**Docs Hurt**\n: Areas where the floor score (no docs) is higher than the ceiling score (with docs). The documentation is actively misleading the model. These docs need to be rewritten or removed.\n\n**Retrieval Issues**\n: Areas where AI agents can find less than 70% of the available doc quality. The docs exist and are good, but agents can't discover them through search. Consider improving page titles, metadata, or search engine indexing.\n\n**Dim Weaknesses**\n: Individual grading dimensions scoring below 50 within an area. These are the specific skills where AI agents fail most \u2014 task completion (can it build the feature?), code correctness (is the code right?), or doc coverage (did it use the docs?).\n\n**Efficiency Anomalies**\n: Areas where agent efficiency exceeds 100% \u2014 meaning agents perform better with self-found docs than with gold-standard docs injected directly. This can indicate doc quality issues (injected docs confuse the model) or agent memorization.\n\n**Doc Lift Wins**\n: Areas where documentation boosts AI performance by 5 or more points. Higher doc lift means the docs are providing crucial information that the model doesn't already know.\n\n**Retrieval Excellence**\n: Areas where AI agents successfully find and use at least 85% of the available doc quality through web search. Good retrieval means your docs are well-indexed and easy for agents to discover.\n\n**Model Breakdown**\n: Break down scores by individual LLM model. The default 'All Models' view shows the cross-model average. Select a specific model to see how it performed independently \u2014 useful for spotting models that struggle with specific feature areas.\n\n**Strengths**\n: What's working well: high-scoring areas, dimensions where the docs are strong, and areas where AI agents successfully find and use the documentation.\n\n**Recommendations**\n: Prioritized remediation plan from gap analysis. Each recommendation identifies a documentation problem, the affected feature area, and the estimated score lift from fixing it.\n\n**Total Potential Lift**\n: Aggregate potential score lift if all identified gaps were fixed. This is a conservative estimate \u2014 each gap targets the median of non-bottlenecked dimensions, not 100.\n\n**Failure Mode**\n: The type of documentation problem: missing-docs (functionality not covered), incorrect-docs (factual errors), outdated-docs (stale API/patterns), or poor-structure (hard to find/understand).\n\n**Estimated Lift**\n: Estimated composite score improvement if this gap is fully fixed. Based on raising bottleneck dimensions to the median of non-bottlenecked dimensions.\n\n**Confidence**\n: How confident the classifier is in this diagnosis. High = strong keyword + structural signal agreement. Medium = partial agreement. Low = weak signals only.\n\n**Agent Behavior Overview**\n: How AI agents interacted with your documentation during evaluation: what they searched for, which pages they visited, and how much time they spent on network requests.\n\n**Search Queries**\n: The exact search queries agents used to find documentation. Helps you understand how agents discover your content and whether your docs appear for relevant queries.\n\n**Doc Slugs Visited**\n: Documentation page slugs that agents actually visited during evaluation. Compare against canonical docs to see if agents found the right pages.\n\n**External Domains**\n: Non-Sanity domains that agents contacted during evaluation. High external domain counts may indicate agents couldn't find what they needed in your docs.\n\n**Avg Doc Pages Visited**\n: Average number of documentation pages visited per test. Higher counts can mean agents need to consult many pages (complex task) or can't find the right one quickly.\n\n**Avg Searches Performed**\n: Average number of web searches performed per test. High search counts can indicate docs are hard to discover through search engines.\n\n**Avg Network Time Ms**\n: Average time spent on network requests per test. Includes page fetches, search queries, and API calls.\n\n**Total Requests**\n: Total number of HTTP requests the agent made during the test, including searches, page visits, and API calls.\n\n**Total Bytes Downloaded**\n: Total bytes downloaded by the agent. Large downloads may indicate the agent is fetching many pages or very large documents.\n\n**Dim Task Completion**\n: Change in task completion between runs. Positive means implementations are more complete.\n\n**Dim Code Correctness**\n: Change in code correctness between runs. Positive means better code quality.\n\n**Dim Doc Coverage**\n: Change in doc coverage between runs. Positive means the docs are providing more useful information.\n\n**Area Delta**\n: Score change for this area compared to the previous evaluation run.\n\n**Source Production**\n: Production source \u2014 docs fetched from the live production dataset. Scores reflect what real users and AI agents experience today.\n\n**Source Branch**\n: Branch source \u2014 docs fetched from a branch or draft dataset. Use this to preview how content changes affect scores before publishing.\n\n**Source Local**\n: Local source \u2014 docs fetched from local files or a local dev server. Useful for testing doc changes before pushing.\n\n**Report Score**\n: The overall ceiling composite for this evaluation run: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%), averaged across all feature areas.\n\n**Report Mode**\n: The evaluation mode determines which reference points are measured. Different modes test different aspects of how AI agents interact with documentation.\n\n**Report Trigger**\n: What initiated this evaluation run. Knowing the trigger helps you understand whether a score change was from a content edit, a code deploy, or a scheduled check.\n\n**Mode Baseline**\n: Baseline mode \u2014 tests the model with gold-standard docs injected directly. Measures ceiling performance (best the docs can do).\n\n**Mode Full**\n: Full mode \u2014 runs baseline + agentic. Compares ceiling (injected docs) against actual (agent-retrieved docs) to measure retrieval gap and infrastructure efficiency.\n\n**Mode Agentic**\n: Agentic mode \u2014 the AI agent finds docs on its own via web search. Measures real-world performance: can agents actually discover and use your documentation?\n\n**Mode Observed**\n: Observed mode \u2014 records how agents interact with docs without scoring. Captures search queries, pages visited, and browsing patterns for analysis.\n\n**Mode Debug**\n: Debug mode \u2014 a diagnostic run for pipeline development. May use non-standard configurations or limited task sets.\n\n**Trigger Manual**\n: Manually triggered \u2014 someone ran the evaluation pipeline by hand, either locally or via the Studio UI.\n\n**Trigger Ci**\n: CI-triggered \u2014 the evaluation ran automatically as part of a pull request or merge pipeline.\n\n**Trigger Schedule**\n: Scheduled \u2014 the evaluation ran on a recurring schedule (e.g. nightly or weekly) to track score trends over time.\n\n**Trigger Webhook**\n: Webhook-triggered \u2014 a content change in Sanity triggered the evaluation automatically. Helps catch doc regressions early.\n\n**Trigger Cross Repo**\n: Cross-repo \u2014 triggered from another repository via the dispatch API. Used when external repos want to validate their docs against AILF tasks.",
|
|
3341
3341
|
"source": "packages/studio/src/glossary.ts",
|
|
3342
3342
|
"tags": [
|
|
3343
3343
|
"reference",
|
|
@@ -3386,23 +3386,23 @@ import { useClient as useClient3 } from "sanity";
|
|
|
3386
3386
|
// src/glossary.ts
|
|
3387
3387
|
var GLOSSARY = {
|
|
3388
3388
|
// -- Overview stats -------------------------------------------------------
|
|
3389
|
-
overallScore: "A weighted average across all feature areas: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%).",
|
|
3390
|
-
docLift: "How much the docs help, compared to the model's training data alone.
|
|
3389
|
+
overallScore: "A weighted average across all feature areas, using the gold scoring profile: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%).",
|
|
3390
|
+
docLift: "How much the docs help, compared to the model's training data alone. Calculated as ceiling minus floor, where ceiling includes Doc Coverage and floor does not. Higher is better.",
|
|
3391
3391
|
actualScore: "How well an AI agent scores when it has to find docs on its own through web search and page fetching. This is the real-world scenario. Only available in full mode.",
|
|
3392
3392
|
retrievalGap: "The score lost because agents can't find or use all the relevant docs. Calculated as ceiling minus actual. Lower is better; zero means agents find everything.",
|
|
3393
3393
|
infraEfficiency: "What percentage of the docs' potential quality actually reaches agents (actual \xF7 ceiling). 100% means agents find and use all relevant docs perfectly.",
|
|
3394
3394
|
// -- Three-layer decomposition columns ------------------------------------
|
|
3395
|
-
floor: "
|
|
3395
|
+
floor: "Output-quality composite without documentation \u2014 Task Completion (60%) and Code Correctness (40%) only. Doc Coverage is excluded because it's undefined when no docs are provided. This tells you what the model already knows from its training data.",
|
|
3396
3396
|
ceiling: "Score with gold-standard docs injected directly into the prompt. This is the best the documentation can do.",
|
|
3397
3397
|
actual: "Score when an AI agent finds docs on its own through web search and page fetching. This is the real-world experience.",
|
|
3398
3398
|
retGap: "Quality lost to discoverability (ceiling minus actual). The gap between what the docs could deliver and what agents actually get.",
|
|
3399
3399
|
efficiency: "What fraction of the docs' quality reaches agents in practice (actual \xF7 ceiling, shown as a percentage).",
|
|
3400
3400
|
invertedRetGap: "\u26A0\uFE0F Inverted retrieval gap: agents that can't find the docs actually score higher, because the docs hurt performance. This usually means there's a doc quality problem.",
|
|
3401
3401
|
// -- Per-area score columns -----------------------------------------------
|
|
3402
|
-
score: "
|
|
3402
|
+
score: "Ceiling composite for this feature area: Task Completion \xD7 50% + Code Correctness \xD7 25% + Doc Coverage \xD7 25%. The floor uses a different profile (Task \xD7 60% + Code \xD7 40%, no Doc Coverage).",
|
|
3403
3403
|
taskCompletion: "Can the LLM implement the requested feature? Graded 0\u2013100.",
|
|
3404
3404
|
codeCorrectness: "Is the generated code idiomatic, correct, and following best practices? Graded 0\u2013100.",
|
|
3405
|
-
docCoverage: "Did the docs provide the information needed to implement the feature? Graded 0\u2013100.",
|
|
3405
|
+
docCoverage: "Did the docs provide the information needed to implement the feature? Graded 0\u2013100. This dimension only contributes to the ceiling composite (with docs) \u2014 it's excluded from the floor composite because it's undefined without documentation.",
|
|
3406
3406
|
tests: "Number of test cases in this feature area.",
|
|
3407
3407
|
// -- Comparison deltas ----------------------------------------------------
|
|
3408
3408
|
overallDelta: "Change in overall score between the two runs. Positive means the experiment scored higher.",
|
|
@@ -3429,6 +3429,8 @@ var GLOSSARY = {
|
|
|
3429
3429
|
efficiencyAnomalies: "Areas where agent efficiency exceeds 100% \u2014 meaning agents perform better with self-found docs than with gold-standard docs injected directly. This can indicate doc quality issues (injected docs confuse the model) or agent memorization.",
|
|
3430
3430
|
docLiftWins: "Areas where documentation boosts AI performance by 5 or more points. Higher doc lift means the docs are providing crucial information that the model doesn't already know.",
|
|
3431
3431
|
retrievalExcellence: "Areas where AI agents successfully find and use at least 85% of the available doc quality through web search. Good retrieval means your docs are well-indexed and easy for agents to discover.",
|
|
3432
|
+
// -- Model breakdown --------------------------------------------------------
|
|
3433
|
+
modelBreakdown: "Break down scores by individual LLM model. The default 'All Models' view shows the cross-model average. Select a specific model to see how it performed independently \u2014 useful for spotting models that struggle with specific feature areas.",
|
|
3432
3434
|
// -- Strengths (positive diagnostics) ---------------------------------------
|
|
3433
3435
|
strengths: "What's working well: high-scoring areas, dimensions where the docs are strong, and areas where AI agents successfully find and use the documentation.",
|
|
3434
3436
|
// -- Recommendations / gap analysis ----------------------------------------
|
|
@@ -3458,7 +3460,7 @@ var GLOSSARY = {
|
|
|
3458
3460
|
sourceBranch: "Branch source \u2014 docs fetched from a branch or draft dataset. Use this to preview how content changes affect scores before publishing.",
|
|
3459
3461
|
sourceLocal: "Local source \u2014 docs fetched from local files or a local dev server. Useful for testing doc changes before pushing.",
|
|
3460
3462
|
// -- Report list columns ----------------------------------------------------
|
|
3461
|
-
reportScore: "The overall
|
|
3463
|
+
reportScore: "The overall ceiling composite for this evaluation run: Task Completion (50%), Code Correctness (25%), and Doc Coverage (25%), averaged across all feature areas.",
|
|
3462
3464
|
reportMode: "The evaluation mode determines which reference points are measured. Different modes test different aspects of how AI agents interact with documentation.",
|
|
3463
3465
|
reportTrigger: "What initiated this evaluation run. Knowing the trigger helps you understand whether a score change was from a content edit, a code deploy, or a scheduled check.",
|
|
3464
3466
|
// -- Mode values -----------------------------------------------------------
|
|
@@ -5591,10 +5593,10 @@ function LatestReports({
|
|
|
5591
5593
|
import { ArrowLeftIcon as ArrowLeftIcon3 } from "@sanity/icons";
|
|
5592
5594
|
import {
|
|
5593
5595
|
Badge as Badge7,
|
|
5594
|
-
Box as
|
|
5596
|
+
Box as Box22,
|
|
5595
5597
|
Button as Button8,
|
|
5596
5598
|
Flex as Flex26,
|
|
5597
|
-
Stack as
|
|
5599
|
+
Stack as Stack28,
|
|
5598
5600
|
Tab,
|
|
5599
5601
|
TabList,
|
|
5600
5602
|
TabPanel,
|
|
@@ -5602,10 +5604,10 @@ import {
|
|
|
5602
5604
|
Tooltip as Tooltip8
|
|
5603
5605
|
} from "@sanity/ui";
|
|
5604
5606
|
import {
|
|
5605
|
-
useCallback as
|
|
5607
|
+
useCallback as useCallback25,
|
|
5606
5608
|
useEffect as useEffect9,
|
|
5607
|
-
useMemo as
|
|
5608
|
-
useState as
|
|
5609
|
+
useMemo as useMemo9,
|
|
5610
|
+
useState as useState19
|
|
5609
5611
|
} from "react";
|
|
5610
5612
|
import { useClient as useClient10 } from "sanity";
|
|
5611
5613
|
|
|
@@ -5971,21 +5973,6 @@ function scoreBg(score) {
|
|
|
5971
5973
|
function scoreBorder(score) {
|
|
5972
5974
|
return COLORS[colorForScore(score)].border;
|
|
5973
5975
|
}
|
|
5974
|
-
function scoreBoxStyle(score) {
|
|
5975
|
-
const key = colorForScore(score);
|
|
5976
|
-
return {
|
|
5977
|
-
alignItems: "center",
|
|
5978
|
-
backgroundColor: COLORS[key].bg,
|
|
5979
|
-
borderRadius: 6,
|
|
5980
|
-
color: COLORS[key].text,
|
|
5981
|
-
display: "flex",
|
|
5982
|
-
fontFamily: "var(--font-code-size)",
|
|
5983
|
-
fontWeight: 700,
|
|
5984
|
-
height: 48,
|
|
5985
|
-
justifyContent: "center",
|
|
5986
|
-
width: 48
|
|
5987
|
-
};
|
|
5988
|
-
}
|
|
5989
5976
|
function barFillColor(score) {
|
|
5990
5977
|
const key = colorForScore(score);
|
|
5991
5978
|
switch (key) {
|
|
@@ -6055,160 +6042,180 @@ function DiagnosticsOverview({
|
|
|
6055
6042
|
);
|
|
6056
6043
|
const weak = scores.filter((s) => s.totalScore < SCORE_CAUTION);
|
|
6057
6044
|
const negativeDocLiftCount = scores.filter((s) => s.docLift < 0).length;
|
|
6045
|
+
const hasAgenticData = overall.avgActualScore != null;
|
|
6058
6046
|
const improved = comparison?.improved ?? [];
|
|
6059
6047
|
const regressed = comparison?.regressed ?? [];
|
|
6060
6048
|
const unchanged = comparison?.unchanged ?? [];
|
|
6061
6049
|
const hasComparison = improved.length > 0 || regressed.length > 0 || unchanged.length > 0;
|
|
6062
6050
|
return /* @__PURE__ */ jsxs21(Stack18, { space: 4, children: [
|
|
6063
|
-
/* @__PURE__ */ jsxs21(
|
|
6064
|
-
"
|
|
6065
|
-
{
|
|
6066
|
-
|
|
6067
|
-
|
|
6068
|
-
|
|
6069
|
-
|
|
6070
|
-
|
|
6071
|
-
|
|
6072
|
-
|
|
6073
|
-
|
|
6074
|
-
|
|
6075
|
-
|
|
6076
|
-
|
|
6077
|
-
|
|
6078
|
-
|
|
6079
|
-
|
|
6080
|
-
|
|
6081
|
-
|
|
6082
|
-
|
|
6083
|
-
|
|
6084
|
-
|
|
6085
|
-
|
|
6086
|
-
|
|
6087
|
-
|
|
6088
|
-
|
|
6089
|
-
|
|
6090
|
-
|
|
6091
|
-
|
|
6092
|
-
|
|
6093
|
-
|
|
6094
|
-
|
|
6095
|
-
|
|
6096
|
-
|
|
6097
|
-
|
|
6098
|
-
|
|
6099
|
-
|
|
6100
|
-
|
|
6101
|
-
|
|
6102
|
-
|
|
6103
|
-
|
|
6104
|
-
|
|
6105
|
-
|
|
6106
|
-
|
|
6107
|
-
|
|
6108
|
-
|
|
6109
|
-
|
|
6110
|
-
|
|
6111
|
-
|
|
6112
|
-
|
|
6113
|
-
|
|
6114
|
-
|
|
6115
|
-
|
|
6116
|
-
|
|
6117
|
-
|
|
6118
|
-
|
|
6119
|
-
|
|
6120
|
-
|
|
6121
|
-
|
|
6122
|
-
|
|
6123
|
-
|
|
6124
|
-
|
|
6125
|
-
|
|
6126
|
-
|
|
6127
|
-
|
|
6128
|
-
|
|
6129
|
-
|
|
6130
|
-
|
|
6131
|
-
|
|
6132
|
-
|
|
6133
|
-
|
|
6134
|
-
|
|
6135
|
-
|
|
6136
|
-
|
|
6137
|
-
|
|
6138
|
-
|
|
6139
|
-
|
|
6140
|
-
|
|
6141
|
-
|
|
6142
|
-
|
|
6143
|
-
|
|
6144
|
-
|
|
6145
|
-
|
|
6146
|
-
|
|
6147
|
-
|
|
6148
|
-
|
|
6149
|
-
|
|
6150
|
-
|
|
6151
|
-
|
|
6152
|
-
|
|
6153
|
-
|
|
6154
|
-
|
|
6155
|
-
|
|
6156
|
-
|
|
6157
|
-
|
|
6158
|
-
|
|
6159
|
-
|
|
6160
|
-
|
|
6161
|
-
|
|
6162
|
-
|
|
6163
|
-
|
|
6164
|
-
|
|
6165
|
-
|
|
6166
|
-
|
|
6167
|
-
|
|
6168
|
-
|
|
6169
|
-
|
|
6170
|
-
|
|
6171
|
-
|
|
6172
|
-
|
|
6173
|
-
|
|
6174
|
-
}
|
|
6175
|
-
|
|
6176
|
-
|
|
6177
|
-
|
|
6178
|
-
|
|
6179
|
-
|
|
6180
|
-
|
|
6181
|
-
|
|
6182
|
-
|
|
6183
|
-
|
|
6184
|
-
|
|
6185
|
-
|
|
6186
|
-
|
|
6187
|
-
|
|
6188
|
-
|
|
6189
|
-
|
|
6190
|
-
|
|
6191
|
-
|
|
6192
|
-
|
|
6193
|
-
|
|
6194
|
-
|
|
6195
|
-
|
|
6196
|
-
|
|
6197
|
-
|
|
6198
|
-
|
|
6199
|
-
|
|
6200
|
-
|
|
6201
|
-
|
|
6202
|
-
|
|
6203
|
-
|
|
6204
|
-
|
|
6205
|
-
|
|
6206
|
-
|
|
6207
|
-
|
|
6208
|
-
|
|
6209
|
-
|
|
6210
|
-
|
|
6211
|
-
|
|
6051
|
+
/* @__PURE__ */ jsxs21(Box15, { style: sectionWrapperStyle, children: [
|
|
6052
|
+
/* @__PURE__ */ jsx25(Box15, { padding: 3, style: sectionHeaderStyle, children: /* @__PURE__ */ jsx25(SectionLabel, { label: "Baseline" }) }),
|
|
6053
|
+
/* @__PURE__ */ jsxs21(Stack18, { space: 3, padding: 3, children: [
|
|
6054
|
+
/* @__PURE__ */ jsxs21(
|
|
6055
|
+
"div",
|
|
6056
|
+
{
|
|
6057
|
+
style: {
|
|
6058
|
+
display: "grid",
|
|
6059
|
+
gap: 12,
|
|
6060
|
+
gridTemplateColumns: "repeat(3, 1fr)"
|
|
6061
|
+
},
|
|
6062
|
+
children: [
|
|
6063
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.overallScore, children: /* @__PURE__ */ jsx25(
|
|
6064
|
+
ScoreCard,
|
|
6065
|
+
{
|
|
6066
|
+
delta: comparison?.deltas.overall,
|
|
6067
|
+
label: "AVG SCORE",
|
|
6068
|
+
sentiment: scoreSentiment(overall.avgScore),
|
|
6069
|
+
subtitle: "With-docs ceiling",
|
|
6070
|
+
value: Math.round(overall.avgScore)
|
|
6071
|
+
}
|
|
6072
|
+
) }),
|
|
6073
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.docLift, children: /* @__PURE__ */ jsx25(
|
|
6074
|
+
ScoreCard,
|
|
6075
|
+
{
|
|
6076
|
+
delta: comparison?.deltas.docLift,
|
|
6077
|
+
label: "DOC LIFT",
|
|
6078
|
+
sentiment: docLiftSentiment(overall.avgDocLift),
|
|
6079
|
+
subtitle: "Improvement from docs",
|
|
6080
|
+
value: Math.round(overall.avgDocLift)
|
|
6081
|
+
}
|
|
6082
|
+
) }),
|
|
6083
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.floor, children: /* @__PURE__ */ jsx25(
|
|
6084
|
+
ScoreCard,
|
|
6085
|
+
{
|
|
6086
|
+
label: "FLOOR",
|
|
6087
|
+
sentiment: scoreSentiment(overall.avgFloorScore ?? 0),
|
|
6088
|
+
subtitle: "Without docs baseline",
|
|
6089
|
+
value: Math.round(overall.avgFloorScore ?? 0)
|
|
6090
|
+
}
|
|
6091
|
+
) })
|
|
6092
|
+
]
|
|
6093
|
+
}
|
|
6094
|
+
),
|
|
6095
|
+
/* @__PURE__ */ jsxs21(
|
|
6096
|
+
"div",
|
|
6097
|
+
{
|
|
6098
|
+
style: {
|
|
6099
|
+
display: "grid",
|
|
6100
|
+
gap: 12,
|
|
6101
|
+
gridTemplateColumns: "repeat(3, 1fr)"
|
|
6102
|
+
},
|
|
6103
|
+
children: [
|
|
6104
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.negativeDocLiftMetric, children: /* @__PURE__ */ jsx25(
|
|
6105
|
+
MetricCard,
|
|
6106
|
+
{
|
|
6107
|
+
label: "Negative Doc Lift",
|
|
6108
|
+
sentiment: negativeDocLiftSentiment(negativeDocLiftCount),
|
|
6109
|
+
value: `${negativeDocLiftCount} area${negativeDocLiftCount === 1 ? "" : "s"}`
|
|
6110
|
+
}
|
|
6111
|
+
) }),
|
|
6112
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.tests, children: /* @__PURE__ */ jsx25(MetricCard, { label: "Tests", value: String(totalTests ?? 0) }) }),
|
|
6113
|
+
durationMs != null && durationMs > 0 ? /* @__PURE__ */ jsx25(
|
|
6114
|
+
HoverTip,
|
|
6115
|
+
{
|
|
6116
|
+
display: "block",
|
|
6117
|
+
text: "Total wall-clock time for the evaluation pipeline run.",
|
|
6118
|
+
children: /* @__PURE__ */ jsx25(
|
|
6119
|
+
MetricCard,
|
|
6120
|
+
{
|
|
6121
|
+
label: "Duration",
|
|
6122
|
+
value: formatDuration(durationMs)
|
|
6123
|
+
}
|
|
6124
|
+
)
|
|
6125
|
+
}
|
|
6126
|
+
) : /* @__PURE__ */ jsx25("div", {})
|
|
6127
|
+
]
|
|
6128
|
+
}
|
|
6129
|
+
)
|
|
6130
|
+
] })
|
|
6131
|
+
] }),
|
|
6132
|
+
hasAgenticData && /* @__PURE__ */ jsxs21(Box15, { style: sectionWrapperStyle, children: [
|
|
6133
|
+
/* @__PURE__ */ jsx25(Box15, { padding: 3, style: sectionHeaderStyle, children: /* @__PURE__ */ jsx25(SectionLabel, { label: "Agent Performance" }) }),
|
|
6134
|
+
/* @__PURE__ */ jsx25(Stack18, { space: 3, padding: 3, children: /* @__PURE__ */ jsxs21(
|
|
6135
|
+
"div",
|
|
6136
|
+
{
|
|
6137
|
+
style: {
|
|
6138
|
+
display: "grid",
|
|
6139
|
+
gap: 12,
|
|
6140
|
+
gridTemplateColumns: "repeat(3, 1fr)"
|
|
6141
|
+
},
|
|
6142
|
+
children: [
|
|
6143
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.actualScore, children: /* @__PURE__ */ jsx25(
|
|
6144
|
+
ScoreCard,
|
|
6145
|
+
{
|
|
6146
|
+
delta: comparison?.deltas.actualDelta,
|
|
6147
|
+
label: "ACTUAL SCORE",
|
|
6148
|
+
sentiment: scoreSentiment(overall.avgActualScore),
|
|
6149
|
+
subtitle: "Agent-retrieved docs",
|
|
6150
|
+
value: Math.round(overall.avgActualScore)
|
|
6151
|
+
}
|
|
6152
|
+
) }),
|
|
6153
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.retrievalGap, children: /* @__PURE__ */ jsx25(
|
|
6154
|
+
ScoreCard,
|
|
6155
|
+
{
|
|
6156
|
+
label: "RETRIEVAL GAP",
|
|
6157
|
+
sentiment: overall.avgRetrievalGap != null ? retrievalGapSentiment(overall.avgRetrievalGap) : void 0,
|
|
6158
|
+
subtitle: "Lost to findability",
|
|
6159
|
+
suffix: "pts",
|
|
6160
|
+
value: overall.avgRetrievalGap != null ? Math.round(overall.avgRetrievalGap) : 0
|
|
6161
|
+
}
|
|
6162
|
+
) }),
|
|
6163
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.infraEfficiency, children: /* @__PURE__ */ jsx25(
|
|
6164
|
+
ScoreCard,
|
|
6165
|
+
{
|
|
6166
|
+
label: "EFFICIENCY",
|
|
6167
|
+
sentiment: overall.avgInfrastructureEfficiency != null ? efficiencySentiment(overall.avgInfrastructureEfficiency) : void 0,
|
|
6168
|
+
subtitle: "Doc quality reaching agents",
|
|
6169
|
+
suffix: "%",
|
|
6170
|
+
value: overall.avgInfrastructureEfficiency != null ? Math.round(overall.avgInfrastructureEfficiency * 100) : 0
|
|
6171
|
+
}
|
|
6172
|
+
) })
|
|
6173
|
+
]
|
|
6174
|
+
}
|
|
6175
|
+
) })
|
|
6176
|
+
] }),
|
|
6177
|
+
/* @__PURE__ */ jsxs21(Box15, { style: sectionWrapperStyle, children: [
|
|
6178
|
+
/* @__PURE__ */ jsx25(Box15, { padding: 3, style: sectionHeaderStyle, children: /* @__PURE__ */ jsx25(SectionLabel, { label: "Area Health" }) }),
|
|
6179
|
+
/* @__PURE__ */ jsx25(Box15, { padding: 3, children: /* @__PURE__ */ jsxs21(
|
|
6180
|
+
"div",
|
|
6181
|
+
{
|
|
6182
|
+
style: {
|
|
6183
|
+
display: "grid",
|
|
6184
|
+
gap: 12,
|
|
6185
|
+
gridTemplateColumns: "1fr 1fr 1fr"
|
|
6186
|
+
},
|
|
6187
|
+
children: [
|
|
6188
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.healthStrong, children: /* @__PURE__ */ jsx25(
|
|
6189
|
+
HealthCard,
|
|
6190
|
+
{
|
|
6191
|
+
color: strong.length > 0 ? "emerald" : "muted",
|
|
6192
|
+
count: strong.length,
|
|
6193
|
+
icon: /* @__PURE__ */ jsx25(CheckmarkCircleIcon, {}),
|
|
6194
|
+
label: "Strong (80+)"
|
|
6195
|
+
}
|
|
6196
|
+
) }),
|
|
6197
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.healthAttention, children: /* @__PURE__ */ jsx25(
|
|
6198
|
+
HealthCard,
|
|
6199
|
+
{
|
|
6200
|
+
color: attention.length === 0 ? "muted" : "amber",
|
|
6201
|
+
count: attention.length,
|
|
6202
|
+
icon: /* @__PURE__ */ jsx25(WarningOutlineIcon, {}),
|
|
6203
|
+
label: "Attention (70-79)"
|
|
6204
|
+
}
|
|
6205
|
+
) }),
|
|
6206
|
+
/* @__PURE__ */ jsx25(HoverTip, { display: "block", text: GLOSSARY.healthWeak, children: /* @__PURE__ */ jsx25(
|
|
6207
|
+
HealthCard,
|
|
6208
|
+
{
|
|
6209
|
+
color: weak.length === 0 ? "muted" : "red",
|
|
6210
|
+
count: weak.length,
|
|
6211
|
+
icon: /* @__PURE__ */ jsx25(ErrorOutlineIcon, {}),
|
|
6212
|
+
label: "Weak (<70)"
|
|
6213
|
+
}
|
|
6214
|
+
) })
|
|
6215
|
+
]
|
|
6216
|
+
}
|
|
6217
|
+
) })
|
|
6218
|
+
] }),
|
|
6212
6219
|
hasComparison && /* @__PURE__ */ jsxs21(Box15, { style: neutralCardStyle, children: [
|
|
6213
6220
|
/* @__PURE__ */ jsx25(
|
|
6214
6221
|
Box15,
|
|
@@ -6289,6 +6296,26 @@ function DiagnosticsOverview({
|
|
|
6289
6296
|
] })
|
|
6290
6297
|
] });
|
|
6291
6298
|
}
|
|
6299
|
+
var sectionWrapperStyle = {
|
|
6300
|
+
border: "1px solid var(--card-border-color)",
|
|
6301
|
+
borderRadius: 6,
|
|
6302
|
+
overflow: "hidden"
|
|
6303
|
+
};
|
|
6304
|
+
var sectionHeaderStyle = {
|
|
6305
|
+
borderBottom: "1px solid var(--card-border-color)"
|
|
6306
|
+
};
|
|
6307
|
+
function SectionLabel({ label }) {
|
|
6308
|
+
return /* @__PURE__ */ jsx25(
|
|
6309
|
+
Text23,
|
|
6310
|
+
{
|
|
6311
|
+
muted: true,
|
|
6312
|
+
size: 1,
|
|
6313
|
+
style: { letterSpacing: "0.08em", textTransform: "uppercase" },
|
|
6314
|
+
weight: "semibold",
|
|
6315
|
+
children: label
|
|
6316
|
+
}
|
|
6317
|
+
);
|
|
6318
|
+
}
|
|
6292
6319
|
function ScoreCard({
|
|
6293
6320
|
delta,
|
|
6294
6321
|
label,
|
|
@@ -7722,11 +7749,12 @@ function ReportHeader({
|
|
|
7722
7749
|
}
|
|
7723
7750
|
|
|
7724
7751
|
// src/components/report-detail/StrengthsList.tsx
|
|
7752
|
+
import { useMemo as useMemo8 } from "react";
|
|
7725
7753
|
import { CheckmarkCircleIcon as CheckmarkCircleIcon2, SearchIcon as SearchIcon6 } from "@sanity/icons";
|
|
7726
|
-
import { Box as Box20, Flex as
|
|
7754
|
+
import { Box as Box20, Flex as Flex24, Stack as Stack26, Text as Text32 } from "@sanity/ui";
|
|
7727
7755
|
|
|
7728
|
-
// src/components/report-detail/
|
|
7729
|
-
import {
|
|
7756
|
+
// src/components/report-detail/AreaScoresGrid.tsx
|
|
7757
|
+
import React3, {
|
|
7730
7758
|
useCallback as useCallback22,
|
|
7731
7759
|
useMemo as useMemo6,
|
|
7732
7760
|
useState as useState17
|
|
@@ -7739,14 +7767,27 @@ function tableTier2(width) {
|
|
|
7739
7767
|
if (width >= 600) return "compact";
|
|
7740
7768
|
return "narrow";
|
|
7741
7769
|
}
|
|
7742
|
-
|
|
7743
|
-
|
|
7744
|
-
|
|
7745
|
-
|
|
7746
|
-
|
|
7747
|
-
|
|
7770
|
+
function gridColumns(tier, hasActual) {
|
|
7771
|
+
switch (tier) {
|
|
7772
|
+
case "full":
|
|
7773
|
+
return hasActual ? "120px 1fr 1fr 1fr 1fr 80px 72px 72px" : "120px 1fr 1fr 1fr 1fr 80px 72px";
|
|
7774
|
+
case "compact":
|
|
7775
|
+
return "96px 1fr 1fr 1fr 1fr 80px";
|
|
7776
|
+
case "narrow":
|
|
7777
|
+
return "56px 1fr 1fr 1fr 1fr";
|
|
7778
|
+
}
|
|
7779
|
+
}
|
|
7780
|
+
function AreaScoresGrid({
|
|
7781
|
+
scores,
|
|
7782
|
+
perArea,
|
|
7783
|
+
perModel
|
|
7784
|
+
}) {
|
|
7748
7785
|
const { ref: containerRef, width } = useContainerWidth();
|
|
7749
7786
|
const tier = tableTier2(width);
|
|
7787
|
+
const hasActual = useMemo6(
|
|
7788
|
+
() => scores.some((s) => s.actualScore != null),
|
|
7789
|
+
[scores]
|
|
7790
|
+
);
|
|
7750
7791
|
const [sortField, setSortField] = useState17("score");
|
|
7751
7792
|
const [sortDir, setSortDir] = useState17("desc");
|
|
7752
7793
|
const handleSort = useCallback22(
|
|
@@ -7781,6 +7822,24 @@ function StrengthsTable({ scores, perArea }) {
|
|
|
7781
7822
|
}
|
|
7782
7823
|
});
|
|
7783
7824
|
}, [scores, sortField, sortDir]);
|
|
7825
|
+
const modelScoresByFeature = useMemo6(() => {
|
|
7826
|
+
if (!perModel) return null;
|
|
7827
|
+
const map = /* @__PURE__ */ new Map();
|
|
7828
|
+
for (const model of perModel) {
|
|
7829
|
+
for (const score of model.scores) {
|
|
7830
|
+
let list = map.get(score.feature);
|
|
7831
|
+
if (!list) {
|
|
7832
|
+
list = [];
|
|
7833
|
+
map.set(score.feature, list);
|
|
7834
|
+
}
|
|
7835
|
+
list.push({ label: model.label, scores: score });
|
|
7836
|
+
}
|
|
7837
|
+
}
|
|
7838
|
+
for (const list of map.values()) {
|
|
7839
|
+
list.sort((a, b) => a.label.localeCompare(b.label));
|
|
7840
|
+
}
|
|
7841
|
+
return map;
|
|
7842
|
+
}, [perModel]);
|
|
7784
7843
|
return /* @__PURE__ */ jsxs29(Box19, { ref: containerRef, style: { ...neutralCardStyle, overflow: "auto" }, children: [
|
|
7785
7844
|
/* @__PURE__ */ jsxs29(
|
|
7786
7845
|
"div",
|
|
@@ -7789,7 +7848,7 @@ function StrengthsTable({ scores, perArea }) {
|
|
|
7789
7848
|
borderBottom: "1px solid var(--card-border-color)",
|
|
7790
7849
|
display: "grid",
|
|
7791
7850
|
gap: "0 12px",
|
|
7792
|
-
gridTemplateColumns:
|
|
7851
|
+
gridTemplateColumns: gridColumns(tier, hasActual),
|
|
7793
7852
|
padding: "12px 16px 8px"
|
|
7794
7853
|
},
|
|
7795
7854
|
children: [
|
|
@@ -7800,7 +7859,7 @@ function StrengthsTable({ scores, perArea }) {
|
|
|
7800
7859
|
direction: sortDir,
|
|
7801
7860
|
label: "Score",
|
|
7802
7861
|
onClick: () => handleSort("score"),
|
|
7803
|
-
tooltip: GLOSSARY.score
|
|
7862
|
+
tooltip: `${GLOSSARY.score} This is the ceiling score \u2014 with gold-standard docs injected.`
|
|
7804
7863
|
}
|
|
7805
7864
|
),
|
|
7806
7865
|
/* @__PURE__ */ jsx41(
|
|
@@ -7852,27 +7911,153 @@ function StrengthsTable({ scores, perArea }) {
|
|
|
7852
7911
|
tooltip: GLOSSARY.docLift
|
|
7853
7912
|
}
|
|
7854
7913
|
),
|
|
7855
|
-
tier === "full" && /* @__PURE__ */
|
|
7856
|
-
|
|
7857
|
-
/* @__PURE__ */ jsx41(ColHeader3, { label: "Ceil", tooltip: GLOSSARY.ceiling })
|
|
7858
|
-
] })
|
|
7914
|
+
tier === "full" && /* @__PURE__ */ jsx41(ColHeader3, { label: "Floor", tooltip: GLOSSARY.floor }),
|
|
7915
|
+
tier === "full" && hasActual && /* @__PURE__ */ jsx41(ColHeader3, { label: "Actual", tooltip: GLOSSARY.actualScore })
|
|
7859
7916
|
]
|
|
7860
7917
|
}
|
|
7861
7918
|
),
|
|
7862
|
-
sorted.map((area) => /* @__PURE__ */
|
|
7863
|
-
|
|
7864
|
-
|
|
7865
|
-
|
|
7866
|
-
|
|
7867
|
-
|
|
7868
|
-
|
|
7869
|
-
|
|
7870
|
-
|
|
7919
|
+
sorted.map((area) => /* @__PURE__ */ jsxs29(React3.Fragment, { children: [
|
|
7920
|
+
/* @__PURE__ */ jsx41(
|
|
7921
|
+
AreaRow,
|
|
7922
|
+
{
|
|
7923
|
+
area,
|
|
7924
|
+
delta: perArea?.[area.feature],
|
|
7925
|
+
hasActual,
|
|
7926
|
+
tier
|
|
7927
|
+
}
|
|
7928
|
+
),
|
|
7929
|
+
modelScoresByFeature && /* @__PURE__ */ jsx41(
|
|
7930
|
+
ModelSubRows,
|
|
7931
|
+
{
|
|
7932
|
+
hasActual,
|
|
7933
|
+
models: modelScoresByFeature.get(area.feature),
|
|
7934
|
+
tier
|
|
7935
|
+
}
|
|
7936
|
+
)
|
|
7937
|
+
] }, area.feature))
|
|
7871
7938
|
] });
|
|
7872
7939
|
}
|
|
7940
|
+
function ModelSubRows({
|
|
7941
|
+
hasActual,
|
|
7942
|
+
models,
|
|
7943
|
+
tier
|
|
7944
|
+
}) {
|
|
7945
|
+
if (!models || models.length === 0) return null;
|
|
7946
|
+
return /* @__PURE__ */ jsx41(Fragment11, { children: models.map((entry) => /* @__PURE__ */ jsx41(
|
|
7947
|
+
ModelRow,
|
|
7948
|
+
{
|
|
7949
|
+
hasActual,
|
|
7950
|
+
label: entry.label,
|
|
7951
|
+
scores: entry.scores,
|
|
7952
|
+
tier
|
|
7953
|
+
},
|
|
7954
|
+
entry.label
|
|
7955
|
+
)) });
|
|
7956
|
+
}
|
|
7957
|
+
function ModelRow({
|
|
7958
|
+
hasActual,
|
|
7959
|
+
label,
|
|
7960
|
+
scores,
|
|
7961
|
+
tier
|
|
7962
|
+
}) {
|
|
7963
|
+
const isNarrow = tier === "narrow";
|
|
7964
|
+
return /* @__PURE__ */ jsxs29(
|
|
7965
|
+
"div",
|
|
7966
|
+
{
|
|
7967
|
+
style: {
|
|
7968
|
+
alignItems: "center",
|
|
7969
|
+
backgroundColor: "var(--card-bg2-color, rgba(255,255,255,0.02))",
|
|
7970
|
+
borderBottom: "1px solid var(--card-border-color)",
|
|
7971
|
+
display: "grid",
|
|
7972
|
+
gap: "0 12px",
|
|
7973
|
+
gridTemplateColumns: gridColumns(tier, hasActual),
|
|
7974
|
+
padding: isNarrow ? "6px 12px 6px 20px" : "6px 16px 6px 28px"
|
|
7975
|
+
},
|
|
7976
|
+
children: [
|
|
7977
|
+
/* @__PURE__ */ jsx41(Flex22, { align: "center", children: /* @__PURE__ */ jsx41(
|
|
7978
|
+
Text30,
|
|
7979
|
+
{
|
|
7980
|
+
size: 1,
|
|
7981
|
+
style: {
|
|
7982
|
+
color: scoreColor(scores.totalScore),
|
|
7983
|
+
fontFamily: "var(--font-code-size, monospace)",
|
|
7984
|
+
fontWeight: 600
|
|
7985
|
+
},
|
|
7986
|
+
children: Math.round(scores.totalScore)
|
|
7987
|
+
}
|
|
7988
|
+
) }),
|
|
7989
|
+
/* @__PURE__ */ jsx41(Flex22, { align: "center", gap: 2, children: /* @__PURE__ */ jsx41(Text30, { muted: true, size: 1, children: label }) }),
|
|
7990
|
+
/* @__PURE__ */ jsx41(
|
|
7991
|
+
DimCell,
|
|
7992
|
+
{
|
|
7993
|
+
area: label,
|
|
7994
|
+
dim: "Task Completion",
|
|
7995
|
+
size: "small",
|
|
7996
|
+
value: scores.taskCompletion
|
|
7997
|
+
}
|
|
7998
|
+
),
|
|
7999
|
+
/* @__PURE__ */ jsx41(
|
|
8000
|
+
DimCell,
|
|
8001
|
+
{
|
|
8002
|
+
area: label,
|
|
8003
|
+
dim: "Code Correctness",
|
|
8004
|
+
size: "small",
|
|
8005
|
+
value: scores.codeCorrectness
|
|
8006
|
+
}
|
|
8007
|
+
),
|
|
8008
|
+
/* @__PURE__ */ jsx41(
|
|
8009
|
+
DimCell,
|
|
8010
|
+
{
|
|
8011
|
+
area: label,
|
|
8012
|
+
dim: "Doc Coverage",
|
|
8013
|
+
size: "small",
|
|
8014
|
+
value: scores.docCoverage
|
|
8015
|
+
}
|
|
8016
|
+
),
|
|
8017
|
+
!isNarrow && /* @__PURE__ */ jsxs29(
|
|
8018
|
+
Text30,
|
|
8019
|
+
{
|
|
8020
|
+
size: 1,
|
|
8021
|
+
style: {
|
|
8022
|
+
color: scores.docLift >= 5 ? "#34d399" : scores.docLift < 0 ? "#f87171" : "var(--card-muted-fg-color)",
|
|
8023
|
+
fontFamily: "var(--font-code-size, monospace)",
|
|
8024
|
+
fontWeight: 500
|
|
8025
|
+
},
|
|
8026
|
+
children: [
|
|
8027
|
+
scores.docLift > 0 ? "+" : "",
|
|
8028
|
+
scores.docLift
|
|
8029
|
+
]
|
|
8030
|
+
}
|
|
8031
|
+
),
|
|
8032
|
+
tier === "full" && /* @__PURE__ */ jsx41(
|
|
8033
|
+
Text30,
|
|
8034
|
+
{
|
|
8035
|
+
muted: true,
|
|
8036
|
+
size: 1,
|
|
8037
|
+
style: { fontFamily: "var(--font-code-size, monospace)" },
|
|
8038
|
+
children: Math.round(scores.floorScore ?? 0)
|
|
8039
|
+
}
|
|
8040
|
+
),
|
|
8041
|
+
tier === "full" && hasActual && /* @__PURE__ */ jsx41(
|
|
8042
|
+
Text30,
|
|
8043
|
+
{
|
|
8044
|
+
size: 1,
|
|
8045
|
+
style: {
|
|
8046
|
+
color: scores.actualScore != null ? scoreColor(scores.actualScore) : "var(--card-muted-fg-color)",
|
|
8047
|
+
fontFamily: "var(--font-code-size, monospace)",
|
|
8048
|
+
fontWeight: 500
|
|
8049
|
+
},
|
|
8050
|
+
children: scores.actualScore != null ? Math.round(scores.actualScore) : "\u2014"
|
|
8051
|
+
}
|
|
8052
|
+
)
|
|
8053
|
+
]
|
|
8054
|
+
}
|
|
8055
|
+
);
|
|
8056
|
+
}
|
|
7873
8057
|
function AreaRow({
|
|
7874
8058
|
area,
|
|
7875
8059
|
delta,
|
|
8060
|
+
hasActual,
|
|
7876
8061
|
tier
|
|
7877
8062
|
}) {
|
|
7878
8063
|
const isNarrow = tier === "narrow";
|
|
@@ -7884,7 +8069,7 @@ function AreaRow({
|
|
|
7884
8069
|
borderBottom: "1px solid var(--card-border-color)",
|
|
7885
8070
|
display: "grid",
|
|
7886
8071
|
gap: "0 12px",
|
|
7887
|
-
gridTemplateColumns:
|
|
8072
|
+
gridTemplateColumns: gridColumns(tier, hasActual),
|
|
7888
8073
|
padding: isNarrow ? "8px 12px" : "10px 16px"
|
|
7889
8074
|
},
|
|
7890
8075
|
children: [
|
|
@@ -7894,7 +8079,7 @@ function AreaRow({
|
|
|
7894
8079
|
{
|
|
7895
8080
|
text: /* @__PURE__ */ jsxs29(Text30, { size: 2, style: { lineHeight: 1.5 }, children: [
|
|
7896
8081
|
/* @__PURE__ */ jsx41("span", { style: { fontWeight: 600 }, children: area.feature }),
|
|
7897
|
-
"
|
|
8082
|
+
" ceiling score:",
|
|
7898
8083
|
" ",
|
|
7899
8084
|
/* @__PURE__ */ jsx41(
|
|
7900
8085
|
"span",
|
|
@@ -7910,7 +8095,8 @@ function AreaRow({
|
|
|
7910
8095
|
/* @__PURE__ */ jsx41("span", { style: { color: "var(--card-muted-fg-color)" }, children: "/100" }),
|
|
7911
8096
|
".",
|
|
7912
8097
|
" ",
|
|
7913
|
-
GLOSSARY.score
|
|
8098
|
+
GLOSSARY.score,
|
|
8099
|
+
" This is the ceiling \u2014 with gold-standard docs injected."
|
|
7914
8100
|
] }),
|
|
7915
8101
|
children: /* @__PURE__ */ jsx41(
|
|
7916
8102
|
"div",
|
|
@@ -8029,13 +8215,22 @@ function AreaRow({
|
|
|
8029
8215
|
children: Math.round(area.floorScore ?? 0)
|
|
8030
8216
|
}
|
|
8031
8217
|
),
|
|
8032
|
-
tier === "full" && /* @__PURE__ */ jsx41(
|
|
8033
|
-
|
|
8218
|
+
tier === "full" && hasActual && /* @__PURE__ */ jsx41(
|
|
8219
|
+
HoverTip,
|
|
8034
8220
|
{
|
|
8035
|
-
|
|
8036
|
-
|
|
8037
|
-
|
|
8038
|
-
|
|
8221
|
+
text: area.actualScore != null ? `${area.feature} actual score: ${Math.round(area.actualScore)}/100. ${GLOSSARY.actualScore}` : `No agentic data for ${area.feature}.`,
|
|
8222
|
+
children: /* @__PURE__ */ jsx41(
|
|
8223
|
+
Text30,
|
|
8224
|
+
{
|
|
8225
|
+
size: 2,
|
|
8226
|
+
style: {
|
|
8227
|
+
color: area.actualScore != null ? scoreColor(area.actualScore) : "var(--card-muted-fg-color)",
|
|
8228
|
+
fontFamily: "var(--font-code-size, monospace)",
|
|
8229
|
+
fontWeight: 600
|
|
8230
|
+
},
|
|
8231
|
+
children: area.actualScore != null ? Math.round(area.actualScore) : "\u2014"
|
|
8232
|
+
}
|
|
8233
|
+
)
|
|
8039
8234
|
}
|
|
8040
8235
|
)
|
|
8041
8236
|
]
|
|
@@ -8045,6 +8240,7 @@ function AreaRow({
|
|
|
8045
8240
|
function DimCell({
|
|
8046
8241
|
area,
|
|
8047
8242
|
dim,
|
|
8243
|
+
size = "normal",
|
|
8048
8244
|
value
|
|
8049
8245
|
}) {
|
|
8050
8246
|
const glossary = {
|
|
@@ -8052,6 +8248,8 @@ function DimCell({
|
|
|
8052
8248
|
"Code Correctness": GLOSSARY.codeCorrectness,
|
|
8053
8249
|
"Doc Coverage": GLOSSARY.docCoverage
|
|
8054
8250
|
};
|
|
8251
|
+
const textSize = size === "small" ? 0 : 1;
|
|
8252
|
+
const barHeight = size === "small" ? 3 : 4;
|
|
8055
8253
|
return /* @__PURE__ */ jsx41(
|
|
8056
8254
|
HoverTip,
|
|
8057
8255
|
{
|
|
@@ -8082,7 +8280,7 @@ function DimCell({
|
|
|
8082
8280
|
/* @__PURE__ */ jsx41(
|
|
8083
8281
|
Text30,
|
|
8084
8282
|
{
|
|
8085
|
-
size:
|
|
8283
|
+
size: textSize,
|
|
8086
8284
|
style: {
|
|
8087
8285
|
color: scoreColor(value),
|
|
8088
8286
|
fontFamily: "var(--font-code-size, monospace)",
|
|
@@ -8097,7 +8295,7 @@ function DimCell({
|
|
|
8097
8295
|
style: {
|
|
8098
8296
|
backgroundColor: "var(--card-border-color)",
|
|
8099
8297
|
borderRadius: 999,
|
|
8100
|
-
height:
|
|
8298
|
+
height: barHeight,
|
|
8101
8299
|
overflow: "hidden",
|
|
8102
8300
|
width: "100%"
|
|
8103
8301
|
},
|
|
@@ -8170,51 +8368,219 @@ function ColHeader3({
|
|
|
8170
8368
|
] });
|
|
8171
8369
|
}
|
|
8172
8370
|
|
|
8173
|
-
// src/components/report-detail/
|
|
8371
|
+
// src/components/report-detail/ModelSelector.tsx
|
|
8372
|
+
import { useCallback as useCallback23 } from "react";
|
|
8373
|
+
import { Flex as Flex23, Text as Text31 } from "@sanity/ui";
|
|
8174
8374
|
import { jsx as jsx42, jsxs as jsxs30 } from "react/jsx-runtime";
|
|
8175
|
-
|
|
8176
|
-
|
|
8375
|
+
var pillBase = {
|
|
8376
|
+
borderColor: "var(--card-border-color)",
|
|
8377
|
+
borderRadius: 999,
|
|
8378
|
+
borderStyle: "solid",
|
|
8379
|
+
borderWidth: 1,
|
|
8380
|
+
cursor: "pointer",
|
|
8381
|
+
fontSize: 13,
|
|
8382
|
+
fontWeight: 500,
|
|
8383
|
+
lineHeight: 1,
|
|
8384
|
+
padding: "5px 12px",
|
|
8385
|
+
transition: "all 150ms ease",
|
|
8386
|
+
userSelect: "none",
|
|
8387
|
+
whiteSpace: "nowrap"
|
|
8388
|
+
};
|
|
8389
|
+
var pillDefault = {
|
|
8390
|
+
...pillBase,
|
|
8391
|
+
backgroundColor: "transparent",
|
|
8392
|
+
color: "var(--card-muted-fg-color)"
|
|
8393
|
+
};
|
|
8394
|
+
var pillSelected = {
|
|
8395
|
+
...pillBase,
|
|
8396
|
+
backgroundColor: "rgba(16,185,129,0.15)",
|
|
8397
|
+
borderColor: "rgba(16,185,129,0.40)",
|
|
8398
|
+
color: "#34d399"
|
|
8399
|
+
};
|
|
8400
|
+
function ModelSelector({
|
|
8401
|
+
models,
|
|
8402
|
+
selection,
|
|
8403
|
+
onChange
|
|
8404
|
+
}) {
|
|
8405
|
+
return /* @__PURE__ */ jsxs30(Flex23, { align: "center", gap: 1, wrap: "wrap", children: [
|
|
8406
|
+
/* @__PURE__ */ jsx42(
|
|
8407
|
+
Pill2,
|
|
8408
|
+
{
|
|
8409
|
+
isSelected: selection === null,
|
|
8410
|
+
label: "All Models",
|
|
8411
|
+
onClick: () => onChange(null)
|
|
8412
|
+
}
|
|
8413
|
+
),
|
|
8414
|
+
models.map((model) => /* @__PURE__ */ jsx42(
|
|
8415
|
+
Pill2,
|
|
8416
|
+
{
|
|
8417
|
+
isSelected: selection === model.modelId,
|
|
8418
|
+
label: model.label,
|
|
8419
|
+
onClick: () => onChange(model.modelId)
|
|
8420
|
+
},
|
|
8421
|
+
model.modelId
|
|
8422
|
+
)),
|
|
8423
|
+
/* @__PURE__ */ jsx42(
|
|
8424
|
+
"div",
|
|
8425
|
+
{
|
|
8426
|
+
style: {
|
|
8427
|
+
backgroundColor: "var(--card-border-color)",
|
|
8428
|
+
height: 16,
|
|
8429
|
+
marginInline: 4,
|
|
8430
|
+
width: 1
|
|
8431
|
+
}
|
|
8432
|
+
}
|
|
8433
|
+
),
|
|
8434
|
+
/* @__PURE__ */ jsx42(
|
|
8435
|
+
Pill2,
|
|
8436
|
+
{
|
|
8437
|
+
isSelected: selection === "expanded",
|
|
8438
|
+
label: "By Model",
|
|
8439
|
+
onClick: () => onChange("expanded")
|
|
8440
|
+
}
|
|
8441
|
+
)
|
|
8442
|
+
] });
|
|
8443
|
+
}
|
|
8444
|
+
function Pill2({
|
|
8445
|
+
isSelected,
|
|
8446
|
+
label,
|
|
8447
|
+
onClick
|
|
8448
|
+
}) {
|
|
8449
|
+
const handleKeyDown = useCallback23(
|
|
8450
|
+
(e) => {
|
|
8451
|
+
if (e.key === "Enter" || e.key === " ") {
|
|
8452
|
+
e.preventDefault();
|
|
8453
|
+
onClick();
|
|
8454
|
+
}
|
|
8455
|
+
},
|
|
8456
|
+
[onClick]
|
|
8457
|
+
);
|
|
8458
|
+
return /* @__PURE__ */ jsx42(
|
|
8459
|
+
"span",
|
|
8460
|
+
{
|
|
8461
|
+
onClick,
|
|
8462
|
+
onKeyDown: handleKeyDown,
|
|
8463
|
+
role: "button",
|
|
8464
|
+
style: isSelected ? pillSelected : pillDefault,
|
|
8465
|
+
tabIndex: 0,
|
|
8466
|
+
children: /* @__PURE__ */ jsx42(
|
|
8467
|
+
Text31,
|
|
8468
|
+
{
|
|
8469
|
+
size: 1,
|
|
8470
|
+
style: {
|
|
8471
|
+
color: "inherit",
|
|
8472
|
+
fontWeight: "inherit"
|
|
8473
|
+
},
|
|
8474
|
+
children: label
|
|
8475
|
+
}
|
|
8476
|
+
)
|
|
8477
|
+
}
|
|
8478
|
+
);
|
|
8479
|
+
}
|
|
8480
|
+
|
|
8481
|
+
// src/components/report-detail/useModelSelection.ts
|
|
8482
|
+
import { useCallback as useCallback24, useMemo as useMemo7, useState as useState18 } from "react";
|
|
8483
|
+
function useModelSelection({
|
|
8484
|
+
scores,
|
|
8485
|
+
perModel
|
|
8486
|
+
}) {
|
|
8487
|
+
const [selection, setSelection] = useState18(null);
|
|
8488
|
+
const onSelectionChange = useCallback24((next) => {
|
|
8489
|
+
setSelection(next);
|
|
8490
|
+
}, []);
|
|
8491
|
+
const isExpanded = selection === "expanded";
|
|
8492
|
+
const hasModels = perModel != null && perModel.length > 1;
|
|
8493
|
+
const resolvedScores = useMemo7(() => {
|
|
8494
|
+
if (isExpanded || selection === null || !perModel) return scores;
|
|
8495
|
+
const model = perModel.find((m) => m.modelId === selection);
|
|
8496
|
+
return model?.scores ?? scores;
|
|
8497
|
+
}, [isExpanded, selection, perModel, scores]);
|
|
8498
|
+
const expandedPerModel = isExpanded ? perModel ?? void 0 : void 0;
|
|
8499
|
+
return {
|
|
8500
|
+
selection,
|
|
8501
|
+
onSelectionChange,
|
|
8502
|
+
resolvedScores,
|
|
8503
|
+
isExpanded,
|
|
8504
|
+
hasModels,
|
|
8505
|
+
expandedPerModel
|
|
8506
|
+
};
|
|
8507
|
+
}
|
|
8508
|
+
|
|
8509
|
+
// src/components/report-detail/StrengthsList.tsx
|
|
8510
|
+
import { jsx as jsx43, jsxs as jsxs31 } from "react/jsx-runtime";
|
|
8511
|
+
function StrengthsList({
|
|
8512
|
+
scores,
|
|
8513
|
+
comparison,
|
|
8514
|
+
perModel
|
|
8515
|
+
}) {
|
|
8516
|
+
const {
|
|
8517
|
+
selection,
|
|
8518
|
+
onSelectionChange,
|
|
8519
|
+
resolvedScores,
|
|
8520
|
+
hasModels,
|
|
8521
|
+
expandedPerModel
|
|
8522
|
+
} = useModelSelection({ scores, perModel });
|
|
8523
|
+
const displayedScores = useMemo8(
|
|
8524
|
+
() => resolvedScores.filter((s) => s.totalScore >= SCORE_CAUTION),
|
|
8525
|
+
[resolvedScores]
|
|
8526
|
+
);
|
|
8527
|
+
const retrievalSuccesses = displayedScores.filter(
|
|
8177
8528
|
(s) => s.infrastructureEfficiency != null && s.infrastructureEfficiency >= EFFICIENCY_POSITIVE && !s.invertedRetrievalGap
|
|
8178
8529
|
).sort(
|
|
8179
8530
|
(a, b) => (b.infrastructureEfficiency ?? 0) - (a.infrastructureEfficiency ?? 0)
|
|
8180
8531
|
);
|
|
8181
|
-
if (
|
|
8182
|
-
return /* @__PURE__ */
|
|
8183
|
-
/* @__PURE__ */
|
|
8184
|
-
/* @__PURE__ */
|
|
8185
|
-
/* @__PURE__ */
|
|
8186
|
-
/* @__PURE__ */
|
|
8187
|
-
/* @__PURE__ */
|
|
8532
|
+
if (displayedScores.length === 0) return null;
|
|
8533
|
+
return /* @__PURE__ */ jsxs31(Stack26, { space: 5, children: [
|
|
8534
|
+
/* @__PURE__ */ jsxs31(Stack26, { space: 3, children: [
|
|
8535
|
+
/* @__PURE__ */ jsxs31(Flex24, { align: "center", gap: 2, wrap: "wrap", children: [
|
|
8536
|
+
/* @__PURE__ */ jsx43(CheckmarkCircleIcon2, { style: { color: "#34d399" } }),
|
|
8537
|
+
/* @__PURE__ */ jsx43(Text32, { size: 2, weight: "medium", children: "Strong Areas (70+)" }),
|
|
8538
|
+
/* @__PURE__ */ jsx43(InfoTip, { text: GLOSSARY.strengths }),
|
|
8539
|
+
hasModels && /* @__PURE__ */ jsx43(Box20, { style: { marginLeft: "auto" }, children: /* @__PURE__ */ jsx43(
|
|
8540
|
+
ModelSelector,
|
|
8541
|
+
{
|
|
8542
|
+
models: perModel,
|
|
8543
|
+
onChange: onSelectionChange,
|
|
8544
|
+
selection
|
|
8545
|
+
}
|
|
8546
|
+
) })
|
|
8188
8547
|
] }),
|
|
8189
|
-
/* @__PURE__ */
|
|
8548
|
+
/* @__PURE__ */ jsx43(
|
|
8549
|
+
AreaScoresGrid,
|
|
8550
|
+
{
|
|
8551
|
+
perArea: comparison?.deltas?.perArea,
|
|
8552
|
+
perModel: expandedPerModel,
|
|
8553
|
+
scores: displayedScores
|
|
8554
|
+
}
|
|
8555
|
+
)
|
|
8190
8556
|
] }),
|
|
8191
|
-
retrievalSuccesses.length > 0 && /* @__PURE__ */
|
|
8192
|
-
/* @__PURE__ */
|
|
8557
|
+
retrievalSuccesses.length > 0 && /* @__PURE__ */ jsxs31(Box20, { style: neutralCardStyle, children: [
|
|
8558
|
+
/* @__PURE__ */ jsx43(
|
|
8193
8559
|
Box20,
|
|
8194
8560
|
{
|
|
8195
8561
|
padding: 4,
|
|
8196
8562
|
style: { borderBottom: "1px solid var(--card-border-color)" },
|
|
8197
|
-
children: /* @__PURE__ */
|
|
8198
|
-
/* @__PURE__ */
|
|
8199
|
-
/* @__PURE__ */
|
|
8563
|
+
children: /* @__PURE__ */ jsxs31(Flex24, { align: "center", gap: 2, children: [
|
|
8564
|
+
/* @__PURE__ */ jsx43(SearchIcon6, { style: { color: "#34d399" } }),
|
|
8565
|
+
/* @__PURE__ */ jsxs31(Text32, { size: 2, weight: "medium", children: [
|
|
8200
8566
|
"Retrieval Successes (",
|
|
8201
8567
|
Math.round(EFFICIENCY_POSITIVE * 100),
|
|
8202
8568
|
"%+ efficiency)"
|
|
8203
8569
|
] }),
|
|
8204
|
-
/* @__PURE__ */
|
|
8570
|
+
/* @__PURE__ */ jsx43(InfoTip, { text: GLOSSARY.retrievalExcellence })
|
|
8205
8571
|
] })
|
|
8206
8572
|
}
|
|
8207
8573
|
),
|
|
8208
|
-
/* @__PURE__ */
|
|
8209
|
-
|
|
8574
|
+
/* @__PURE__ */ jsx43(Stack26, { children: retrievalSuccesses.map((area, i) => /* @__PURE__ */ jsxs31(
|
|
8575
|
+
Flex24,
|
|
8210
8576
|
{
|
|
8211
8577
|
align: "center",
|
|
8212
8578
|
justify: "space-between",
|
|
8213
8579
|
padding: 4,
|
|
8214
8580
|
style: i > 0 ? dividerStyle : void 0,
|
|
8215
8581
|
children: [
|
|
8216
|
-
/* @__PURE__ */
|
|
8217
|
-
/* @__PURE__ */
|
|
8582
|
+
/* @__PURE__ */ jsx43(Text32, { size: 2, children: area.feature }),
|
|
8583
|
+
/* @__PURE__ */ jsx43(
|
|
8218
8584
|
"span",
|
|
8219
8585
|
{
|
|
8220
8586
|
style: {
|
|
@@ -8240,392 +8606,72 @@ function StrengthsList({ scores, comparison }) {
|
|
|
8240
8606
|
import {
|
|
8241
8607
|
ErrorOutlineIcon as ErrorOutlineIcon3,
|
|
8242
8608
|
SearchIcon as SearchIcon7,
|
|
8243
|
-
WarningOutlineIcon as
|
|
8609
|
+
WarningOutlineIcon as WarningOutlineIcon3,
|
|
8244
8610
|
BoltIcon as BoltIcon2,
|
|
8245
8611
|
ArrowDownIcon as ArrowDownIcon2
|
|
8246
8612
|
} from "@sanity/icons";
|
|
8247
|
-
import { Box as
|
|
8248
|
-
|
|
8249
|
-
// src/components/report-detail/AreaScoreRow.tsx
|
|
8250
|
-
import { WarningOutlineIcon as WarningOutlineIcon3 } from "@sanity/icons";
|
|
8251
|
-
import { Box as Box21, Flex as Flex24, Stack as Stack27, Text as Text32 } from "@sanity/ui";
|
|
8252
|
-
import { jsx as jsx43, jsxs as jsxs31 } from "react/jsx-runtime";
|
|
8253
|
-
function AreaScoreRow({ area, showTrend }) {
|
|
8254
|
-
return /* @__PURE__ */ jsx43(Box21, { style: { ...neutralCardStyle, padding: 20 }, children: /* @__PURE__ */ jsxs31(Stack27, { space: 4, children: [
|
|
8255
|
-
/* @__PURE__ */ jsxs31(Flex24, { align: "flex-start", gap: 3, justify: "space-between", wrap: "wrap", children: [
|
|
8256
|
-
/* @__PURE__ */ jsxs31(Flex24, { align: "center", gap: 3, children: [
|
|
8257
|
-
/* @__PURE__ */ jsx43(
|
|
8258
|
-
HoverTip,
|
|
8259
|
-
{
|
|
8260
|
-
text: /* @__PURE__ */ jsxs31(Text32, { size: 2, style: { lineHeight: 1.5 }, children: [
|
|
8261
|
-
/* @__PURE__ */ jsx43("span", { style: tipBold, children: area.feature }),
|
|
8262
|
-
" composite score:",
|
|
8263
|
-
" ",
|
|
8264
|
-
/* @__PURE__ */ jsx43(
|
|
8265
|
-
"span",
|
|
8266
|
-
{
|
|
8267
|
-
style: { ...tipValue, color: scoreColor(area.totalScore) },
|
|
8268
|
-
children: Math.round(area.totalScore)
|
|
8269
|
-
}
|
|
8270
|
-
),
|
|
8271
|
-
/* @__PURE__ */ jsx43("span", { style: { color: "var(--card-muted-fg-color)" }, children: "/100" }),
|
|
8272
|
-
". ",
|
|
8273
|
-
GLOSSARY.score
|
|
8274
|
-
] }),
|
|
8275
|
-
children: /* @__PURE__ */ jsx43(Box21, { style: scoreBoxStyle(area.totalScore), children: /* @__PURE__ */ jsx43("span", { style: { fontSize: 20 }, children: Math.round(area.totalScore) }) })
|
|
8276
|
-
}
|
|
8277
|
-
),
|
|
8278
|
-
/* @__PURE__ */ jsxs31(Stack27, { space: 2, children: [
|
|
8279
|
-
/* @__PURE__ */ jsxs31(Flex24, { align: "center", gap: 2, wrap: "wrap", children: [
|
|
8280
|
-
/* @__PURE__ */ jsx43(Text32, { size: 3, weight: "semibold", children: area.feature }),
|
|
8281
|
-
area.negativeDocLift && /* @__PURE__ */ jsx43(HoverTip, { text: GLOSSARY.docsHurt, children: /* @__PURE__ */ jsxs31(
|
|
8282
|
-
"span",
|
|
8283
|
-
{
|
|
8284
|
-
style: {
|
|
8285
|
-
alignItems: "center",
|
|
8286
|
-
backgroundColor: "rgba(239,68,68,0.2)",
|
|
8287
|
-
borderRadius: 4,
|
|
8288
|
-
color: "#f87171",
|
|
8289
|
-
display: "inline-flex",
|
|
8290
|
-
fontSize: 13,
|
|
8291
|
-
gap: 4,
|
|
8292
|
-
padding: "3px 8px"
|
|
8293
|
-
},
|
|
8294
|
-
children: [
|
|
8295
|
-
/* @__PURE__ */ jsx43(WarningOutlineIcon3, {}),
|
|
8296
|
-
"Docs Hurt"
|
|
8297
|
-
]
|
|
8298
|
-
}
|
|
8299
|
-
) }),
|
|
8300
|
-
area.invertedRetrievalGap && /* @__PURE__ */ jsx43(HoverTip, { text: GLOSSARY.invertedRetGap, children: /* @__PURE__ */ jsx43(
|
|
8301
|
-
"span",
|
|
8302
|
-
{
|
|
8303
|
-
style: {
|
|
8304
|
-
backgroundColor: "rgba(245,158,11,0.2)",
|
|
8305
|
-
borderRadius: 4,
|
|
8306
|
-
color: "#fbbf24",
|
|
8307
|
-
fontSize: 13,
|
|
8308
|
-
padding: "3px 8px"
|
|
8309
|
-
},
|
|
8310
|
-
children: "Inverted Retrieval"
|
|
8311
|
-
}
|
|
8312
|
-
) })
|
|
8313
|
-
] }),
|
|
8314
|
-
/* @__PURE__ */ jsxs31(Text32, { muted: true, size: 2, children: [
|
|
8315
|
-
area.testCount,
|
|
8316
|
-
" test",
|
|
8317
|
-
area.testCount === 1 ? "" : "s"
|
|
8318
|
-
] })
|
|
8319
|
-
] })
|
|
8320
|
-
] }),
|
|
8321
|
-
showTrend && /* @__PURE__ */ jsx43(
|
|
8322
|
-
"span",
|
|
8323
|
-
{
|
|
8324
|
-
style: {
|
|
8325
|
-
backgroundColor: showTrend === "improved" ? "rgba(16,185,129,0.2)" : showTrend === "regressed" ? "rgba(239,68,68,0.2)" : "var(--card-muted-bg-color)",
|
|
8326
|
-
borderRadius: 4,
|
|
8327
|
-
color: showTrend === "improved" ? "#34d399" : showTrend === "regressed" ? "#f87171" : "var(--card-muted-fg-color)",
|
|
8328
|
-
fontSize: 13,
|
|
8329
|
-
fontWeight: 500,
|
|
8330
|
-
padding: "4px 10px"
|
|
8331
|
-
},
|
|
8332
|
-
children: showTrend
|
|
8333
|
-
}
|
|
8334
|
-
)
|
|
8335
|
-
] }),
|
|
8336
|
-
/* @__PURE__ */ jsxs31(
|
|
8337
|
-
"div",
|
|
8338
|
-
{
|
|
8339
|
-
style: {
|
|
8340
|
-
display: "grid",
|
|
8341
|
-
gap: 16,
|
|
8342
|
-
gridTemplateColumns: "1fr 1fr 1fr"
|
|
8343
|
-
},
|
|
8344
|
-
children: [
|
|
8345
|
-
/* @__PURE__ */ jsx43(
|
|
8346
|
-
DimBar,
|
|
8347
|
-
{
|
|
8348
|
-
label: "Task Completion",
|
|
8349
|
-
tip: dimBarTip(
|
|
8350
|
-
area.feature,
|
|
8351
|
-
"Task Completion",
|
|
8352
|
-
area.taskCompletion,
|
|
8353
|
-
GLOSSARY.taskCompletion
|
|
8354
|
-
),
|
|
8355
|
-
value: area.taskCompletion
|
|
8356
|
-
}
|
|
8357
|
-
),
|
|
8358
|
-
/* @__PURE__ */ jsx43(
|
|
8359
|
-
DimBar,
|
|
8360
|
-
{
|
|
8361
|
-
label: "Code Correctness",
|
|
8362
|
-
tip: dimBarTip(
|
|
8363
|
-
area.feature,
|
|
8364
|
-
"Code Correctness",
|
|
8365
|
-
area.codeCorrectness,
|
|
8366
|
-
GLOSSARY.codeCorrectness
|
|
8367
|
-
),
|
|
8368
|
-
value: area.codeCorrectness
|
|
8369
|
-
}
|
|
8370
|
-
),
|
|
8371
|
-
/* @__PURE__ */ jsx43(
|
|
8372
|
-
DimBar,
|
|
8373
|
-
{
|
|
8374
|
-
label: "Doc Coverage",
|
|
8375
|
-
tip: dimBarTip(
|
|
8376
|
-
area.feature,
|
|
8377
|
-
"Doc Coverage",
|
|
8378
|
-
area.docCoverage,
|
|
8379
|
-
GLOSSARY.docCoverage
|
|
8380
|
-
),
|
|
8381
|
-
value: area.docCoverage
|
|
8382
|
-
}
|
|
8383
|
-
)
|
|
8384
|
-
]
|
|
8385
|
-
}
|
|
8386
|
-
),
|
|
8387
|
-
/* @__PURE__ */ jsxs31(Flex24, { gap: 5, style: { ...dividerStyle, paddingTop: 12 }, wrap: "wrap", children: [
|
|
8388
|
-
/* @__PURE__ */ jsx43(
|
|
8389
|
-
MetricPair,
|
|
8390
|
-
{
|
|
8391
|
-
color: area.negativeDocLift ? "#f87171" : "#34d399",
|
|
8392
|
-
label: "Doc Lift",
|
|
8393
|
-
tip: metricTip(
|
|
8394
|
-
area.feature,
|
|
8395
|
-
"Doc Lift",
|
|
8396
|
-
`${area.docLift > 0 ? "+" : ""}${area.docLift}`,
|
|
8397
|
-
GLOSSARY.docLift
|
|
8398
|
-
),
|
|
8399
|
-
value: `${area.docLift > 0 ? "+" : ""}${area.docLift}`
|
|
8400
|
-
}
|
|
8401
|
-
),
|
|
8402
|
-
/* @__PURE__ */ jsx43(
|
|
8403
|
-
MetricPair,
|
|
8404
|
-
{
|
|
8405
|
-
label: "Ceiling",
|
|
8406
|
-
tip: metricTip(
|
|
8407
|
-
area.feature,
|
|
8408
|
-
"Ceiling",
|
|
8409
|
-
String(Math.round(area.ceilingScore ?? 0)),
|
|
8410
|
-
GLOSSARY.ceiling
|
|
8411
|
-
),
|
|
8412
|
-
value: String(Math.round(area.ceilingScore ?? 0))
|
|
8413
|
-
}
|
|
8414
|
-
),
|
|
8415
|
-
/* @__PURE__ */ jsx43(
|
|
8416
|
-
MetricPair,
|
|
8417
|
-
{
|
|
8418
|
-
label: "Floor",
|
|
8419
|
-
tip: metricTip(
|
|
8420
|
-
area.feature,
|
|
8421
|
-
"Floor",
|
|
8422
|
-
String(Math.round(area.floorScore ?? 0)),
|
|
8423
|
-
GLOSSARY.floor
|
|
8424
|
-
),
|
|
8425
|
-
value: String(Math.round(area.floorScore ?? 0))
|
|
8426
|
-
}
|
|
8427
|
-
),
|
|
8428
|
-
area.actualScore != null && /* @__PURE__ */ jsx43(
|
|
8429
|
-
MetricPair,
|
|
8430
|
-
{
|
|
8431
|
-
label: "Actual",
|
|
8432
|
-
tip: metricTip(
|
|
8433
|
-
area.feature,
|
|
8434
|
-
"Actual",
|
|
8435
|
-
String(Math.round(area.actualScore)),
|
|
8436
|
-
GLOSSARY.actualScore
|
|
8437
|
-
),
|
|
8438
|
-
value: String(Math.round(area.actualScore))
|
|
8439
|
-
}
|
|
8440
|
-
),
|
|
8441
|
-
area.infrastructureEfficiency != null && /* @__PURE__ */ jsx43(
|
|
8442
|
-
MetricPair,
|
|
8443
|
-
{
|
|
8444
|
-
color: efficiencyColor(area.infrastructureEfficiency),
|
|
8445
|
-
label: "Efficiency",
|
|
8446
|
-
tip: metricTip(
|
|
8447
|
-
area.feature,
|
|
8448
|
-
"Efficiency",
|
|
8449
|
-
formatPercent(area.infrastructureEfficiency),
|
|
8450
|
-
GLOSSARY.infraEfficiency
|
|
8451
|
-
),
|
|
8452
|
-
value: formatPercent(area.infrastructureEfficiency)
|
|
8453
|
-
}
|
|
8454
|
-
),
|
|
8455
|
-
area.retrievalGap != null && /* @__PURE__ */ jsx43(
|
|
8456
|
-
MetricPair,
|
|
8457
|
-
{
|
|
8458
|
-
label: "Ret Gap",
|
|
8459
|
-
tip: metricTip(
|
|
8460
|
-
area.feature,
|
|
8461
|
-
"Retrieval Gap",
|
|
8462
|
-
String(area.retrievalGap),
|
|
8463
|
-
GLOSSARY.retrievalGap
|
|
8464
|
-
),
|
|
8465
|
-
value: String(area.retrievalGap)
|
|
8466
|
-
}
|
|
8467
|
-
)
|
|
8468
|
-
] })
|
|
8469
|
-
] }) });
|
|
8470
|
-
}
|
|
8471
|
-
var tipValue = {
|
|
8472
|
-
fontFamily: "var(--font-code-size, monospace)",
|
|
8473
|
-
fontWeight: 600
|
|
8474
|
-
};
|
|
8475
|
-
var tipBold = { fontWeight: 600 };
|
|
8476
|
-
function dimBarTip(area, dim, score, description) {
|
|
8477
|
-
return /* @__PURE__ */ jsxs31(Text32, { size: 2, style: { lineHeight: 1.5 }, children: [
|
|
8478
|
-
/* @__PURE__ */ jsx43("span", { style: tipBold, children: area }),
|
|
8479
|
-
" \u2192 ",
|
|
8480
|
-
/* @__PURE__ */ jsx43("span", { style: tipBold, children: dim }),
|
|
8481
|
-
":",
|
|
8482
|
-
" ",
|
|
8483
|
-
/* @__PURE__ */ jsx43("span", { style: { ...tipValue, color: scoreColor(score) }, children: Math.round(score) }),
|
|
8484
|
-
/* @__PURE__ */ jsx43("span", { style: { color: "var(--card-muted-fg-color)" }, children: "/100" }),
|
|
8485
|
-
".",
|
|
8486
|
-
" ",
|
|
8487
|
-
description
|
|
8488
|
-
] });
|
|
8489
|
-
}
|
|
8490
|
-
function DimBar({
|
|
8491
|
-
label,
|
|
8492
|
-
value,
|
|
8493
|
-
tip
|
|
8494
|
-
}) {
|
|
8495
|
-
const bar = /* @__PURE__ */ jsxs31(Stack27, { space: 2, style: { flex: 1 }, children: [
|
|
8496
|
-
/* @__PURE__ */ jsxs31(Flex24, { align: "center", justify: "space-between", children: [
|
|
8497
|
-
/* @__PURE__ */ jsx43(Text32, { muted: true, size: 1, children: label }),
|
|
8498
|
-
/* @__PURE__ */ jsx43(
|
|
8499
|
-
"span",
|
|
8500
|
-
{
|
|
8501
|
-
style: {
|
|
8502
|
-
color: scoreColor(value),
|
|
8503
|
-
fontFamily: "var(--font-code-size, monospace)",
|
|
8504
|
-
fontSize: 14,
|
|
8505
|
-
fontWeight: 600
|
|
8506
|
-
},
|
|
8507
|
-
children: Math.round(value)
|
|
8508
|
-
}
|
|
8509
|
-
)
|
|
8510
|
-
] }),
|
|
8511
|
-
/* @__PURE__ */ jsx43(
|
|
8512
|
-
Box21,
|
|
8513
|
-
{
|
|
8514
|
-
style: {
|
|
8515
|
-
backgroundColor: "var(--card-border-color)",
|
|
8516
|
-
borderRadius: 999,
|
|
8517
|
-
height: 6,
|
|
8518
|
-
overflow: "hidden"
|
|
8519
|
-
},
|
|
8520
|
-
children: /* @__PURE__ */ jsx43(
|
|
8521
|
-
Box21,
|
|
8522
|
-
{
|
|
8523
|
-
style: {
|
|
8524
|
-
backgroundColor: barFillColor(value),
|
|
8525
|
-
borderRadius: 999,
|
|
8526
|
-
height: "100%",
|
|
8527
|
-
transition: "width 0.3s",
|
|
8528
|
-
width: `${Math.min(value, 100)}%`
|
|
8529
|
-
}
|
|
8530
|
-
}
|
|
8531
|
-
)
|
|
8532
|
-
}
|
|
8533
|
-
)
|
|
8534
|
-
] });
|
|
8535
|
-
if (tip) {
|
|
8536
|
-
return /* @__PURE__ */ jsx43(HoverTip, { text: tip, children: bar });
|
|
8537
|
-
}
|
|
8538
|
-
return bar;
|
|
8539
|
-
}
|
|
8540
|
-
function metricTip(area, metric, displayValue, description) {
|
|
8541
|
-
return /* @__PURE__ */ jsxs31(Text32, { size: 2, style: { lineHeight: 1.5 }, children: [
|
|
8542
|
-
/* @__PURE__ */ jsx43("span", { style: tipBold, children: area }),
|
|
8543
|
-
" \u2192",
|
|
8544
|
-
" ",
|
|
8545
|
-
/* @__PURE__ */ jsx43("span", { style: tipBold, children: metric }),
|
|
8546
|
-
":",
|
|
8547
|
-
" ",
|
|
8548
|
-
/* @__PURE__ */ jsx43("span", { style: tipValue, children: displayValue }),
|
|
8549
|
-
". ",
|
|
8550
|
-
description
|
|
8551
|
-
] });
|
|
8552
|
-
}
|
|
8553
|
-
function MetricPair({
|
|
8554
|
-
color,
|
|
8555
|
-
label,
|
|
8556
|
-
value,
|
|
8557
|
-
tip
|
|
8558
|
-
}) {
|
|
8559
|
-
const content = /* @__PURE__ */ jsxs31(Text32, { muted: true, size: 1, children: [
|
|
8560
|
-
label,
|
|
8561
|
-
":",
|
|
8562
|
-
" ",
|
|
8563
|
-
/* @__PURE__ */ jsx43(
|
|
8564
|
-
"span",
|
|
8565
|
-
{
|
|
8566
|
-
style: {
|
|
8567
|
-
color: color ?? "var(--card-fg-color)",
|
|
8568
|
-
fontFamily: "var(--font-code-size, monospace)",
|
|
8569
|
-
fontWeight: 500
|
|
8570
|
-
},
|
|
8571
|
-
children: value
|
|
8572
|
-
}
|
|
8573
|
-
)
|
|
8574
|
-
] });
|
|
8575
|
-
if (tip) {
|
|
8576
|
-
return /* @__PURE__ */ jsx43(HoverTip, { text: tip, children: content });
|
|
8577
|
-
}
|
|
8578
|
-
return content;
|
|
8579
|
-
}
|
|
8580
|
-
|
|
8581
|
-
// src/components/report-detail/WeaknessesList.tsx
|
|
8613
|
+
import { Box as Box21, Flex as Flex25, Stack as Stack27, Text as Text33 } from "@sanity/ui";
|
|
8582
8614
|
import { jsx as jsx44, jsxs as jsxs32 } from "react/jsx-runtime";
|
|
8583
|
-
function WeaknessesList({
|
|
8584
|
-
|
|
8615
|
+
function WeaknessesList({
|
|
8616
|
+
scores,
|
|
8617
|
+
comparison,
|
|
8618
|
+
perModel
|
|
8619
|
+
}) {
|
|
8620
|
+
const {
|
|
8621
|
+
selection,
|
|
8622
|
+
onSelectionChange,
|
|
8623
|
+
resolvedScores,
|
|
8624
|
+
hasModels,
|
|
8625
|
+
expandedPerModel
|
|
8626
|
+
} = useModelSelection({ scores, perModel });
|
|
8627
|
+
const weakFeatures = new Set(
|
|
8628
|
+
scores.filter((s) => s.totalScore < SCORE_CAUTION).map((s) => s.feature)
|
|
8629
|
+
);
|
|
8630
|
+
const weakAreas = resolvedScores.filter((s) => weakFeatures.has(s.feature)).sort((a, b) => a.totalScore - b.totalScore);
|
|
8585
8631
|
const docsHurt = scores.filter((s) => s.negativeDocLift);
|
|
8586
8632
|
const retrievalIssues = scores.filter(
|
|
8587
8633
|
(s) => s.infrastructureEfficiency != null && s.infrastructureEfficiency < EFFICIENCY_CAUTION && !s.invertedRetrievalGap
|
|
8588
8634
|
);
|
|
8589
8635
|
const dimWeaknesses = scores.map((s) => ({ area: s, dims: getDimensionWeaknesses(s) })).filter(({ dims }) => dims.length > 0);
|
|
8590
8636
|
const regressed = comparison?.regressed ?? [];
|
|
8591
|
-
const improved = comparison?.improved ?? [];
|
|
8592
|
-
const unchanged = comparison?.unchanged ?? [];
|
|
8593
8637
|
const perArea = comparison?.deltas?.perArea;
|
|
8594
8638
|
const efficiencyAnomalies = scores.filter(
|
|
8595
8639
|
(s) => s.infrastructureEfficiency != null && s.infrastructureEfficiency > EFFICIENCY_ANOMALY
|
|
8596
8640
|
);
|
|
8597
8641
|
const hasContent = weakAreas.length > 0 || docsHurt.length > 0 || retrievalIssues.length > 0 || dimWeaknesses.length > 0 || regressed.length > 0 || efficiencyAnomalies.length > 0;
|
|
8598
8642
|
if (!hasContent) return null;
|
|
8599
|
-
|
|
8600
|
-
|
|
8601
|
-
|
|
8602
|
-
if (unchanged.includes(feature)) return "unchanged";
|
|
8603
|
-
return null;
|
|
8604
|
-
};
|
|
8605
|
-
return /* @__PURE__ */ jsxs32(Stack28, { space: 5, children: [
|
|
8606
|
-
weakAreas.length > 0 && /* @__PURE__ */ jsxs32(Stack28, { space: 3, children: [
|
|
8607
|
-
/* @__PURE__ */ jsxs32(Flex25, { align: "center", gap: 2, children: [
|
|
8643
|
+
return /* @__PURE__ */ jsxs32(Stack27, { space: 5, children: [
|
|
8644
|
+
weakAreas.length > 0 && /* @__PURE__ */ jsxs32(Stack27, { space: 3, children: [
|
|
8645
|
+
/* @__PURE__ */ jsxs32(Flex25, { align: "center", gap: 2, wrap: "wrap", children: [
|
|
8608
8646
|
/* @__PURE__ */ jsx44(ErrorOutlineIcon3, { style: { color: "#f87171" } }),
|
|
8609
8647
|
/* @__PURE__ */ jsx44(Text33, { size: 2, weight: "medium", children: "Weak Areas (<70)" }),
|
|
8610
|
-
/* @__PURE__ */ jsx44(InfoTip, { text: GLOSSARY.weakAreas })
|
|
8648
|
+
/* @__PURE__ */ jsx44(InfoTip, { text: GLOSSARY.weakAreas }),
|
|
8649
|
+
hasModels && /* @__PURE__ */ jsx44(Box21, { style: { marginLeft: "auto" }, children: /* @__PURE__ */ jsx44(
|
|
8650
|
+
ModelSelector,
|
|
8651
|
+
{
|
|
8652
|
+
models: perModel,
|
|
8653
|
+
onChange: onSelectionChange,
|
|
8654
|
+
selection
|
|
8655
|
+
}
|
|
8656
|
+
) })
|
|
8611
8657
|
] }),
|
|
8612
|
-
/* @__PURE__ */ jsx44(
|
|
8613
|
-
|
|
8658
|
+
/* @__PURE__ */ jsx44(
|
|
8659
|
+
AreaScoresGrid,
|
|
8614
8660
|
{
|
|
8615
|
-
|
|
8616
|
-
|
|
8617
|
-
|
|
8618
|
-
|
|
8619
|
-
)
|
|
8661
|
+
perArea,
|
|
8662
|
+
perModel: expandedPerModel,
|
|
8663
|
+
scores: weakAreas
|
|
8664
|
+
}
|
|
8665
|
+
)
|
|
8620
8666
|
] }),
|
|
8621
|
-
docsHurt.length > 0 && /* @__PURE__ */ jsxs32(
|
|
8667
|
+
docsHurt.length > 0 && /* @__PURE__ */ jsxs32(Stack27, { space: 3, children: [
|
|
8622
8668
|
/* @__PURE__ */ jsxs32(Flex25, { align: "center", gap: 2, children: [
|
|
8623
8669
|
/* @__PURE__ */ jsx44(ErrorOutlineIcon3, { style: { color: "#f87171" } }),
|
|
8624
8670
|
/* @__PURE__ */ jsx44(Text33, { size: 2, weight: "medium", children: "Docs Hurt Performance (Negative Doc Lift)" }),
|
|
8625
8671
|
/* @__PURE__ */ jsx44(InfoTip, { text: GLOSSARY.docsHurt })
|
|
8626
8672
|
] }),
|
|
8627
|
-
/* @__PURE__ */ jsx44(
|
|
8628
|
-
|
|
8673
|
+
/* @__PURE__ */ jsx44(Box21, { style: sectionStyle("red"), children: docsHurt.map((area, i) => /* @__PURE__ */ jsxs32(
|
|
8674
|
+
Box21,
|
|
8629
8675
|
{
|
|
8630
8676
|
padding: 4,
|
|
8631
8677
|
style: i > 0 ? { borderTop: "1px solid rgba(239,68,68,0.2)" } : void 0,
|
|
@@ -8661,7 +8707,7 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8661
8707
|
}
|
|
8662
8708
|
)
|
|
8663
8709
|
] }),
|
|
8664
|
-
/* @__PURE__ */ jsx44(
|
|
8710
|
+
/* @__PURE__ */ jsx44(Box21, { paddingTop: 2, children: /* @__PURE__ */ jsxs32(Text33, { muted: true, size: 2, children: [
|
|
8665
8711
|
area.invertedRetrievalGap && /* @__PURE__ */ jsxs32("span", { style: { color: "#fbbf24" }, children: [
|
|
8666
8712
|
"Agent does better by NOT finding these docs.",
|
|
8667
8713
|
" "
|
|
@@ -8677,14 +8723,14 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8677
8723
|
area.feature
|
|
8678
8724
|
)) })
|
|
8679
8725
|
] }),
|
|
8680
|
-
retrievalIssues.length > 0 && /* @__PURE__ */ jsxs32(
|
|
8726
|
+
retrievalIssues.length > 0 && /* @__PURE__ */ jsxs32(Stack27, { space: 3, children: [
|
|
8681
8727
|
/* @__PURE__ */ jsxs32(Flex25, { align: "center", gap: 2, children: [
|
|
8682
8728
|
/* @__PURE__ */ jsx44(SearchIcon7, { style: { color: "#fbbf24" } }),
|
|
8683
8729
|
/* @__PURE__ */ jsx44(Text33, { size: 2, weight: "medium", children: "Retrieval Issues (<70% efficiency)" }),
|
|
8684
8730
|
/* @__PURE__ */ jsx44(InfoTip, { text: GLOSSARY.retrievalIssues })
|
|
8685
8731
|
] }),
|
|
8686
|
-
/* @__PURE__ */ jsx44(
|
|
8687
|
-
|
|
8732
|
+
/* @__PURE__ */ jsx44(Box21, { style: sectionStyle("amber"), children: retrievalIssues.map((area, i) => /* @__PURE__ */ jsxs32(
|
|
8733
|
+
Box21,
|
|
8688
8734
|
{
|
|
8689
8735
|
padding: 4,
|
|
8690
8736
|
style: i > 0 ? { borderTop: "1px solid rgba(245,158,11,0.2)" } : void 0,
|
|
@@ -8720,7 +8766,7 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8720
8766
|
}
|
|
8721
8767
|
)
|
|
8722
8768
|
] }),
|
|
8723
|
-
/* @__PURE__ */ jsx44(
|
|
8769
|
+
/* @__PURE__ */ jsx44(Box21, { paddingTop: 2, children: /* @__PURE__ */ jsxs32(Text33, { muted: true, size: 2, children: [
|
|
8724
8770
|
"Actual score (",
|
|
8725
8771
|
Math.round(area.actualScore ?? 0),
|
|
8726
8772
|
") is much lower than ceiling (",
|
|
@@ -8735,14 +8781,14 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8735
8781
|
area.feature
|
|
8736
8782
|
)) })
|
|
8737
8783
|
] }),
|
|
8738
|
-
dimWeaknesses.length > 0 && /* @__PURE__ */ jsxs32(
|
|
8784
|
+
dimWeaknesses.length > 0 && /* @__PURE__ */ jsxs32(Stack27, { space: 3, children: [
|
|
8739
8785
|
/* @__PURE__ */ jsxs32(Flex25, { align: "center", gap: 2, children: [
|
|
8740
|
-
/* @__PURE__ */ jsx44(
|
|
8786
|
+
/* @__PURE__ */ jsx44(WarningOutlineIcon3, { style: { color: "#fbbf24" } }),
|
|
8741
8787
|
/* @__PURE__ */ jsx44(Text33, { size: 2, weight: "medium", children: "Dimension Weaknesses (<50)" }),
|
|
8742
8788
|
/* @__PURE__ */ jsx44(InfoTip, { text: GLOSSARY.dimWeaknesses })
|
|
8743
8789
|
] }),
|
|
8744
|
-
/* @__PURE__ */ jsx44(
|
|
8745
|
-
|
|
8790
|
+
/* @__PURE__ */ jsx44(Box21, { style: neutralCardStyle, children: dimWeaknesses.map(({ area, dims }, i) => /* @__PURE__ */ jsxs32(
|
|
8791
|
+
Box21,
|
|
8746
8792
|
{
|
|
8747
8793
|
padding: 4,
|
|
8748
8794
|
style: i > 0 ? dividerStyle : void 0,
|
|
@@ -8786,9 +8832,9 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8786
8832
|
area.feature
|
|
8787
8833
|
)) })
|
|
8788
8834
|
] }),
|
|
8789
|
-
regressed.length > 0 && /* @__PURE__ */ jsxs32(
|
|
8835
|
+
regressed.length > 0 && /* @__PURE__ */ jsxs32(Box21, { style: neutralCardStyle, children: [
|
|
8790
8836
|
/* @__PURE__ */ jsx44(
|
|
8791
|
-
|
|
8837
|
+
Box21,
|
|
8792
8838
|
{
|
|
8793
8839
|
padding: 4,
|
|
8794
8840
|
style: { borderBottom: "1px solid var(--card-border-color)" },
|
|
@@ -8798,7 +8844,7 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8798
8844
|
] })
|
|
8799
8845
|
}
|
|
8800
8846
|
),
|
|
8801
|
-
/* @__PURE__ */ jsx44(
|
|
8847
|
+
/* @__PURE__ */ jsx44(Stack27, { children: regressed.map((featureName, i) => {
|
|
8802
8848
|
const area = scores.find((s) => s.feature === featureName);
|
|
8803
8849
|
const areaDelta = perArea?.[featureName];
|
|
8804
8850
|
return /* @__PURE__ */ jsxs32(
|
|
@@ -8834,13 +8880,13 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8834
8880
|
);
|
|
8835
8881
|
}) })
|
|
8836
8882
|
] }),
|
|
8837
|
-
efficiencyAnomalies.length > 0 && /* @__PURE__ */ jsxs32(
|
|
8883
|
+
efficiencyAnomalies.length > 0 && /* @__PURE__ */ jsxs32(Box21, { style: neutralCardStyle, children: [
|
|
8838
8884
|
/* @__PURE__ */ jsx44(
|
|
8839
|
-
|
|
8885
|
+
Box21,
|
|
8840
8886
|
{
|
|
8841
8887
|
padding: 4,
|
|
8842
8888
|
style: { borderBottom: "1px solid var(--card-border-color)" },
|
|
8843
|
-
children: /* @__PURE__ */ jsxs32(
|
|
8889
|
+
children: /* @__PURE__ */ jsxs32(Stack27, { space: 2, children: [
|
|
8844
8890
|
/* @__PURE__ */ jsxs32(Flex25, { align: "center", gap: 2, children: [
|
|
8845
8891
|
/* @__PURE__ */ jsx44(BoltIcon2, { style: { color: "#fbbf24" } }),
|
|
8846
8892
|
/* @__PURE__ */ jsx44(Text33, { size: 2, weight: "medium", children: "Efficiency Anomalies (>100%)" }),
|
|
@@ -8850,7 +8896,7 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8850
8896
|
] })
|
|
8851
8897
|
}
|
|
8852
8898
|
),
|
|
8853
|
-
/* @__PURE__ */ jsx44(
|
|
8899
|
+
/* @__PURE__ */ jsx44(Stack27, { children: efficiencyAnomalies.map((area, i) => /* @__PURE__ */ jsxs32(
|
|
8854
8900
|
Flex25,
|
|
8855
8901
|
{
|
|
8856
8902
|
align: "center",
|
|
@@ -8878,7 +8924,7 @@ function WeaknessesList({ scores, comparison }) {
|
|
|
8878
8924
|
] })
|
|
8879
8925
|
] });
|
|
8880
8926
|
}
|
|
8881
|
-
var
|
|
8927
|
+
var tipValue = {
|
|
8882
8928
|
color: "#f87171",
|
|
8883
8929
|
fontFamily: "var(--font-code-size, monospace)",
|
|
8884
8930
|
fontWeight: 600
|
|
@@ -8891,7 +8937,7 @@ function dimTip(area, dim, score, description) {
|
|
|
8891
8937
|
/* @__PURE__ */ jsx44("span", { style: tipArea, children: area }),
|
|
8892
8938
|
" scores",
|
|
8893
8939
|
" ",
|
|
8894
|
-
/* @__PURE__ */ jsx44("span", { style:
|
|
8940
|
+
/* @__PURE__ */ jsx44("span", { style: tipValue, children: score }),
|
|
8895
8941
|
/* @__PURE__ */ jsx44("span", { style: { color: "var(--card-muted-fg-color)" }, children: "/100" }),
|
|
8896
8942
|
" on",
|
|
8897
8943
|
" ",
|
|
@@ -8956,8 +9002,8 @@ function ReportDetail({
|
|
|
8956
9002
|
subTab
|
|
8957
9003
|
}) {
|
|
8958
9004
|
const client = useClient10({ apiVersion: API_VERSION });
|
|
8959
|
-
const [loading, setLoading] =
|
|
8960
|
-
const [report, setReport] =
|
|
9005
|
+
const [loading, setLoading] = useState19(true);
|
|
9006
|
+
const [report, setReport] = useState19(null);
|
|
8961
9007
|
useEffect9(() => {
|
|
8962
9008
|
let cancelled = false;
|
|
8963
9009
|
setLoading(true);
|
|
@@ -8983,22 +9029,22 @@ function ReportDetail({
|
|
|
8983
9029
|
const hasAgentActivity = Boolean(
|
|
8984
9030
|
summary?.agentBehavior && summary.agentBehavior.length > 0
|
|
8985
9031
|
);
|
|
8986
|
-
const tabs =
|
|
9032
|
+
const tabs = useMemo9(
|
|
8987
9033
|
() => [OVERVIEW_TAB, DIAGNOSTICS_TAB, ACTIVITY_TAB],
|
|
8988
9034
|
[]
|
|
8989
9035
|
);
|
|
8990
|
-
const disabledTabs =
|
|
9036
|
+
const disabledTabs = useMemo9(() => {
|
|
8991
9037
|
const set2 = /* @__PURE__ */ new Set();
|
|
8992
9038
|
if (!hasDiagnostics) set2.add("diagnostics");
|
|
8993
9039
|
if (!hasAgentActivity) set2.add("activity");
|
|
8994
9040
|
return set2;
|
|
8995
9041
|
}, [hasDiagnostics, hasAgentActivity]);
|
|
8996
|
-
const currentTab =
|
|
9042
|
+
const currentTab = useMemo9(() => {
|
|
8997
9043
|
const parsed = parseTab(activeTab);
|
|
8998
9044
|
if (disabledTabs.has(parsed)) return "overview";
|
|
8999
9045
|
return tabs.some((t) => t.id === parsed) ? parsed : "overview";
|
|
9000
9046
|
}, [activeTab, disabledTabs, tabs]);
|
|
9001
|
-
const handleTabClick =
|
|
9047
|
+
const handleTabClick = useCallback25(
|
|
9002
9048
|
(tabId) => {
|
|
9003
9049
|
onTabChange(tabId === "overview" ? null : tabId, null, null);
|
|
9004
9050
|
},
|
|
@@ -9008,7 +9054,7 @@ function ReportDetail({
|
|
|
9008
9054
|
return /* @__PURE__ */ jsx45(LoadingState, { message: "Loading report\u2026" });
|
|
9009
9055
|
}
|
|
9010
9056
|
if (!report || !summary) {
|
|
9011
|
-
return /* @__PURE__ */ jsx45(
|
|
9057
|
+
return /* @__PURE__ */ jsx45(Box22, { padding: 5, children: /* @__PURE__ */ jsxs33(Stack28, { space: 4, children: [
|
|
9012
9058
|
/* @__PURE__ */ jsx45(
|
|
9013
9059
|
Button8,
|
|
9014
9060
|
{
|
|
@@ -9023,7 +9069,7 @@ function ReportDetail({
|
|
|
9023
9069
|
}
|
|
9024
9070
|
const { comparison, provenance } = report;
|
|
9025
9071
|
const totalTests = summary.scores.reduce((n, s) => n + s.testCount, 0);
|
|
9026
|
-
return /* @__PURE__ */ jsx45(
|
|
9072
|
+
return /* @__PURE__ */ jsx45(Box22, { padding: 4, children: /* @__PURE__ */ jsxs33(Stack28, { space: 5, children: [
|
|
9027
9073
|
/* @__PURE__ */ jsx45(
|
|
9028
9074
|
ReportHeader,
|
|
9029
9075
|
{
|
|
@@ -9051,7 +9097,7 @@ function ReportDetail({
|
|
|
9051
9097
|
return isDisabled && tooltip ? /* @__PURE__ */ jsx45(
|
|
9052
9098
|
Tooltip8,
|
|
9053
9099
|
{
|
|
9054
|
-
content: /* @__PURE__ */ jsx45(
|
|
9100
|
+
content: /* @__PURE__ */ jsx45(Box22, { padding: 2, style: { maxWidth: 280 }, children: tooltip }),
|
|
9055
9101
|
placement: "bottom",
|
|
9056
9102
|
portal: true,
|
|
9057
9103
|
children: /* @__PURE__ */ jsx45("span", { style: { display: "inline-block" }, children: tabElement })
|
|
@@ -9085,7 +9131,7 @@ function ReportDetail({
|
|
|
9085
9131
|
"aria-labelledby": "tab-overview",
|
|
9086
9132
|
hidden: currentTab !== "overview",
|
|
9087
9133
|
id: "panel-overview",
|
|
9088
|
-
children: /* @__PURE__ */ jsxs33(
|
|
9134
|
+
children: /* @__PURE__ */ jsxs33(Stack28, { space: 5, children: [
|
|
9089
9135
|
/* @__PURE__ */ jsx45(
|
|
9090
9136
|
DiagnosticsOverview,
|
|
9091
9137
|
{
|
|
@@ -9108,6 +9154,7 @@ function ReportDetail({
|
|
|
9108
9154
|
focus,
|
|
9109
9155
|
judgments: summary.lowScoringJudgments,
|
|
9110
9156
|
onNavigate: (newSubTab, newFocus) => onTabChange("diagnostics", newSubTab, newFocus),
|
|
9157
|
+
perModel: summary.perModel,
|
|
9111
9158
|
recommendations: summary.recommendations,
|
|
9112
9159
|
scores: summary.scores,
|
|
9113
9160
|
subTab
|
|
@@ -9143,6 +9190,7 @@ function DiagnosticsPanel({
|
|
|
9143
9190
|
focus,
|
|
9144
9191
|
judgments,
|
|
9145
9192
|
onNavigate,
|
|
9193
|
+
perModel,
|
|
9146
9194
|
recommendations,
|
|
9147
9195
|
scores,
|
|
9148
9196
|
subTab: subTabParam
|
|
@@ -9151,7 +9199,7 @@ function DiagnosticsPanel({
|
|
|
9151
9199
|
const issueCount = scores.filter((s) => s.totalScore < SCORE_CAUTION).length + scores.filter((s) => s.negativeDocLift).length + scores.filter(
|
|
9152
9200
|
(s) => s.infrastructureEfficiency != null && s.infrastructureEfficiency < EFFICIENCY_CAUTION && !s.invertedRetrievalGap
|
|
9153
9201
|
).length;
|
|
9154
|
-
return /* @__PURE__ */ jsx45(TabPanel, { "aria-labelledby": "tab-diagnostics", id: "panel-diagnostics", children: /* @__PURE__ */ jsxs33(
|
|
9202
|
+
return /* @__PURE__ */ jsx45(TabPanel, { "aria-labelledby": "tab-diagnostics", id: "panel-diagnostics", children: /* @__PURE__ */ jsxs33(Stack28, { space: 4, children: [
|
|
9155
9203
|
/* @__PURE__ */ jsx45(
|
|
9156
9204
|
Flex26,
|
|
9157
9205
|
{
|
|
@@ -9202,9 +9250,23 @@ function DiagnosticsPanel({
|
|
|
9202
9250
|
))
|
|
9203
9251
|
}
|
|
9204
9252
|
),
|
|
9205
|
-
subTab === "strengths" && /* @__PURE__ */ jsx45(
|
|
9206
|
-
|
|
9207
|
-
|
|
9253
|
+
subTab === "strengths" && /* @__PURE__ */ jsx45(
|
|
9254
|
+
StrengthsList,
|
|
9255
|
+
{
|
|
9256
|
+
comparison,
|
|
9257
|
+
perModel,
|
|
9258
|
+
scores
|
|
9259
|
+
}
|
|
9260
|
+
),
|
|
9261
|
+
subTab === "issues" && /* @__PURE__ */ jsxs33(Stack28, { space: 5, children: [
|
|
9262
|
+
/* @__PURE__ */ jsx45(
|
|
9263
|
+
WeaknessesList,
|
|
9264
|
+
{
|
|
9265
|
+
comparison,
|
|
9266
|
+
perModel,
|
|
9267
|
+
scores
|
|
9268
|
+
}
|
|
9269
|
+
),
|
|
9208
9270
|
recommendations && recommendations.gaps.length > 0 && /* @__PURE__ */ jsx45(RecommendationsSection, { recommendations }),
|
|
9209
9271
|
judgments && judgments.length > 0 && /* @__PURE__ */ jsx45(
|
|
9210
9272
|
JudgmentList,
|
|
@@ -9256,9 +9318,14 @@ function getDisabledTabTooltip(tabId, summary) {
|
|
|
9256
9318
|
}
|
|
9257
9319
|
}
|
|
9258
9320
|
|
|
9321
|
+
// src/components/report-detail/AreaScoreRow.tsx
|
|
9322
|
+
import { WarningOutlineIcon as WarningOutlineIcon4 } from "@sanity/icons";
|
|
9323
|
+
import { Box as Box23, Flex as Flex27, Stack as Stack29, Text as Text35 } from "@sanity/ui";
|
|
9324
|
+
import { jsx as jsx46, jsxs as jsxs34 } from "react/jsx-runtime";
|
|
9325
|
+
|
|
9259
9326
|
// src/components/report-detail/AreaScoreTable.tsx
|
|
9260
9327
|
import React4 from "react";
|
|
9261
|
-
import { Card as Card17, Stack as Stack30, Text as
|
|
9328
|
+
import { Card as Card17, Stack as Stack30, Text as Text37 } from "@sanity/ui";
|
|
9262
9329
|
|
|
9263
9330
|
// src/lib/scoring.ts
|
|
9264
9331
|
var HEX_MAP = {
|
|
@@ -9275,30 +9342,30 @@ function scoreHex(score) {
|
|
|
9275
9342
|
}
|
|
9276
9343
|
|
|
9277
9344
|
// src/components/primitives/ScoreCell.tsx
|
|
9278
|
-
import { Card as Card16, Text as
|
|
9279
|
-
import { jsx as
|
|
9345
|
+
import { Card as Card16, Text as Text36 } from "@sanity/ui";
|
|
9346
|
+
import { jsx as jsx47 } from "react/jsx-runtime";
|
|
9280
9347
|
|
|
9281
9348
|
// src/components/report-detail/AreaScoreTable.tsx
|
|
9282
|
-
import { jsx as
|
|
9349
|
+
import { jsx as jsx48, jsxs as jsxs35 } from "react/jsx-runtime";
|
|
9283
9350
|
|
|
9284
9351
|
// src/components/report-detail/ComparisonSummary.tsx
|
|
9285
|
-
import { Badge as Badge8, Box as Box24, Card as Card18, Flex as
|
|
9286
|
-
import { jsx as
|
|
9352
|
+
import { Badge as Badge8, Box as Box24, Card as Card18, Flex as Flex28, Grid as Grid4, Stack as Stack31, Text as Text38, Tooltip as Tooltip9 } from "@sanity/ui";
|
|
9353
|
+
import { jsx as jsx49, jsxs as jsxs36 } from "react/jsx-runtime";
|
|
9287
9354
|
|
|
9288
9355
|
// src/components/report-detail/OverviewStats.tsx
|
|
9289
9356
|
import { Grid as Grid5 } from "@sanity/ui";
|
|
9290
|
-
import { jsx as
|
|
9357
|
+
import { jsx as jsx50, jsxs as jsxs37 } from "react/jsx-runtime";
|
|
9291
9358
|
|
|
9292
9359
|
// src/components/report-detail/ThreeLayerTable.tsx
|
|
9293
9360
|
import React5 from "react";
|
|
9294
|
-
import { Badge as Badge9, Card as Card19, Flex as
|
|
9295
|
-
import { jsx as
|
|
9361
|
+
import { Badge as Badge9, Card as Card19, Flex as Flex29, Stack as Stack32, Text as Text39 } from "@sanity/ui";
|
|
9362
|
+
import { jsx as jsx51, jsxs as jsxs38 } from "react/jsx-runtime";
|
|
9296
9363
|
|
|
9297
9364
|
// src/components/ScoreTimeline.tsx
|
|
9298
|
-
import { Card as Card20, Flex as
|
|
9299
|
-
import { useCallback as
|
|
9365
|
+
import { Card as Card20, Flex as Flex30, Select as Select2, Stack as Stack33, Text as Text40 } from "@sanity/ui";
|
|
9366
|
+
import { useCallback as useCallback26, useEffect as useEffect10, useMemo as useMemo10, useState as useState20 } from "react";
|
|
9300
9367
|
import { useClient as useClient11 } from "sanity";
|
|
9301
|
-
import { jsx as
|
|
9368
|
+
import { jsx as jsx52, jsxs as jsxs39 } from "react/jsx-runtime";
|
|
9302
9369
|
var CHART_HEIGHT = 220;
|
|
9303
9370
|
var CHART_WIDTH = 800;
|
|
9304
9371
|
var PAD_BOTTOM = 30;
|
|
@@ -9333,11 +9400,11 @@ function scoreForPoint(point, area) {
|
|
|
9333
9400
|
}
|
|
9334
9401
|
function ScoreTimeline({ mode = null, source = null }) {
|
|
9335
9402
|
const client = useClient11({ apiVersion: API_VERSION });
|
|
9336
|
-
const [dataPoints, setDataPoints] =
|
|
9337
|
-
const [loading, setLoading] =
|
|
9338
|
-
const [rangeDays, setRangeDays] =
|
|
9339
|
-
const [selectedArea, setSelectedArea] =
|
|
9340
|
-
const areaNames =
|
|
9403
|
+
const [dataPoints, setDataPoints] = useState20([]);
|
|
9404
|
+
const [loading, setLoading] = useState20(true);
|
|
9405
|
+
const [rangeDays, setRangeDays] = useState20(30);
|
|
9406
|
+
const [selectedArea, setSelectedArea] = useState20(null);
|
|
9407
|
+
const areaNames = useMemo10(() => {
|
|
9341
9408
|
const names = /* @__PURE__ */ new Set();
|
|
9342
9409
|
for (const dp of dataPoints) {
|
|
9343
9410
|
for (const s of dp.scores) {
|
|
@@ -9346,7 +9413,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9346
9413
|
}
|
|
9347
9414
|
return Array.from(names).sort();
|
|
9348
9415
|
}, [dataPoints]);
|
|
9349
|
-
const fetchData =
|
|
9416
|
+
const fetchData = useCallback26(async () => {
|
|
9350
9417
|
setLoading(true);
|
|
9351
9418
|
try {
|
|
9352
9419
|
const startDate = rangeDays ? daysAgo(rangeDays) : "1970-01-01T00:00:00Z";
|
|
@@ -9364,7 +9431,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9364
9431
|
useEffect10(() => {
|
|
9365
9432
|
void fetchData();
|
|
9366
9433
|
}, [fetchData]);
|
|
9367
|
-
const chartPoints =
|
|
9434
|
+
const chartPoints = useMemo10(() => {
|
|
9368
9435
|
const pts = [];
|
|
9369
9436
|
const scored = dataPoints.map((dp) => ({
|
|
9370
9437
|
date: dp.completedAt,
|
|
@@ -9378,18 +9445,18 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9378
9445
|
});
|
|
9379
9446
|
return pts;
|
|
9380
9447
|
}, [dataPoints, selectedArea]);
|
|
9381
|
-
const avgScore =
|
|
9448
|
+
const avgScore = useMemo10(() => {
|
|
9382
9449
|
if (chartPoints.length === 0) return 0;
|
|
9383
9450
|
return chartPoints.reduce((sum, p) => sum + p.score, 0) / chartPoints.length;
|
|
9384
9451
|
}, [chartPoints]);
|
|
9385
|
-
const handleRangeChange =
|
|
9452
|
+
const handleRangeChange = useCallback26(
|
|
9386
9453
|
(e) => {
|
|
9387
9454
|
const val = e.currentTarget.value;
|
|
9388
9455
|
setRangeDays(val === "all" ? null : Number(val));
|
|
9389
9456
|
},
|
|
9390
9457
|
[]
|
|
9391
9458
|
);
|
|
9392
|
-
const handleAreaChange =
|
|
9459
|
+
const handleAreaChange = useCallback26(
|
|
9393
9460
|
(e) => {
|
|
9394
9461
|
const val = e.currentTarget.value;
|
|
9395
9462
|
setSelectedArea(val || null);
|
|
@@ -9397,22 +9464,22 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9397
9464
|
[]
|
|
9398
9465
|
);
|
|
9399
9466
|
const polylinePoints = chartPoints.map((p) => `${p.x},${p.y}`).join(" ");
|
|
9400
|
-
return /* @__PURE__ */
|
|
9401
|
-
/* @__PURE__ */
|
|
9402
|
-
/* @__PURE__ */
|
|
9467
|
+
return /* @__PURE__ */ jsxs39(Stack33, { space: 4, children: [
|
|
9468
|
+
/* @__PURE__ */ jsxs39(Flex30, { gap: 3, children: [
|
|
9469
|
+
/* @__PURE__ */ jsx52(
|
|
9403
9470
|
Select2,
|
|
9404
9471
|
{
|
|
9405
9472
|
onChange: handleRangeChange,
|
|
9406
9473
|
value: rangeDays?.toString() ?? "all",
|
|
9407
|
-
children: TIME_RANGES.map((r) => /* @__PURE__ */
|
|
9474
|
+
children: TIME_RANGES.map((r) => /* @__PURE__ */ jsx52("option", { value: r.days?.toString() ?? "all", children: r.label }, r.label))
|
|
9408
9475
|
}
|
|
9409
9476
|
),
|
|
9410
|
-
/* @__PURE__ */
|
|
9411
|
-
/* @__PURE__ */
|
|
9412
|
-
areaNames.map((name) => /* @__PURE__ */
|
|
9477
|
+
/* @__PURE__ */ jsxs39(Select2, { onChange: handleAreaChange, value: selectedArea ?? "", children: [
|
|
9478
|
+
/* @__PURE__ */ jsx52("option", { value: "", children: "Overall" }),
|
|
9479
|
+
areaNames.map((name) => /* @__PURE__ */ jsx52("option", { value: name, children: name }, name))
|
|
9413
9480
|
] })
|
|
9414
9481
|
] }),
|
|
9415
|
-
/* @__PURE__ */
|
|
9482
|
+
/* @__PURE__ */ jsx52(Card20, { padding: 3, radius: 2, shadow: 1, children: loading ? /* @__PURE__ */ jsx52(Flex30, { align: "center", justify: "center", style: { height: 200 }, children: /* @__PURE__ */ jsx52(Text40, { muted: true, size: 2, children: "Loading\u2026" }) }) : chartPoints.length === 0 ? /* @__PURE__ */ jsx52(Flex30, { align: "center", justify: "center", style: { height: 200 }, children: /* @__PURE__ */ jsx52(Text40, { muted: true, size: 2, children: "No reports found for this time range" }) }) : /* @__PURE__ */ jsxs39(
|
|
9416
9483
|
"svg",
|
|
9417
9484
|
{
|
|
9418
9485
|
style: { display: "block", width: "100%" },
|
|
@@ -9420,8 +9487,8 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9420
9487
|
children: [
|
|
9421
9488
|
Y_TICKS.map((tick) => {
|
|
9422
9489
|
const y = PAD_TOP + PLOT_HEIGHT - tick / Y_MAX * PLOT_HEIGHT;
|
|
9423
|
-
return /* @__PURE__ */
|
|
9424
|
-
/* @__PURE__ */
|
|
9490
|
+
return /* @__PURE__ */ jsxs39("g", { children: [
|
|
9491
|
+
/* @__PURE__ */ jsx52(
|
|
9425
9492
|
"line",
|
|
9426
9493
|
{
|
|
9427
9494
|
stroke: "#ccc",
|
|
@@ -9432,7 +9499,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9432
9499
|
y2: y
|
|
9433
9500
|
}
|
|
9434
9501
|
),
|
|
9435
|
-
/* @__PURE__ */
|
|
9502
|
+
/* @__PURE__ */ jsx52(
|
|
9436
9503
|
"text",
|
|
9437
9504
|
{
|
|
9438
9505
|
dominantBaseline: "middle",
|
|
@@ -9452,7 +9519,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9452
9519
|
chartPoints.length - 1
|
|
9453
9520
|
].map((idx) => {
|
|
9454
9521
|
const p = chartPoints[idx];
|
|
9455
|
-
return /* @__PURE__ */
|
|
9522
|
+
return /* @__PURE__ */ jsx52(
|
|
9456
9523
|
"text",
|
|
9457
9524
|
{
|
|
9458
9525
|
fill: "#999",
|
|
@@ -9464,7 +9531,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9464
9531
|
},
|
|
9465
9532
|
idx
|
|
9466
9533
|
);
|
|
9467
|
-
}) : chartPoints.map((p, idx) => /* @__PURE__ */
|
|
9534
|
+
}) : chartPoints.map((p, idx) => /* @__PURE__ */ jsx52(
|
|
9468
9535
|
"text",
|
|
9469
9536
|
{
|
|
9470
9537
|
fill: "#999",
|
|
@@ -9476,7 +9543,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9476
9543
|
},
|
|
9477
9544
|
idx
|
|
9478
9545
|
)),
|
|
9479
|
-
/* @__PURE__ */
|
|
9546
|
+
/* @__PURE__ */ jsx52(
|
|
9480
9547
|
"polyline",
|
|
9481
9548
|
{
|
|
9482
9549
|
fill: "none",
|
|
@@ -9486,7 +9553,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9486
9553
|
strokeWidth: 2.5
|
|
9487
9554
|
}
|
|
9488
9555
|
),
|
|
9489
|
-
chartPoints.map((p, idx) => /* @__PURE__ */
|
|
9556
|
+
chartPoints.map((p, idx) => /* @__PURE__ */ jsx52(
|
|
9490
9557
|
"circle",
|
|
9491
9558
|
{
|
|
9492
9559
|
cx: p.x,
|
|
@@ -9495,7 +9562,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9495
9562
|
r: 4,
|
|
9496
9563
|
stroke: "#fff",
|
|
9497
9564
|
strokeWidth: 1.5,
|
|
9498
|
-
children: /* @__PURE__ */
|
|
9565
|
+
children: /* @__PURE__ */ jsxs39("title", { children: [
|
|
9499
9566
|
formatDate(p.date),
|
|
9500
9567
|
": ",
|
|
9501
9568
|
Math.round(p.score)
|
|
@@ -9506,7 +9573,7 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9506
9573
|
]
|
|
9507
9574
|
}
|
|
9508
9575
|
) }),
|
|
9509
|
-
/* @__PURE__ */
|
|
9576
|
+
/* @__PURE__ */ jsxs39(Text40, { muted: true, size: 2, children: [
|
|
9510
9577
|
chartPoints.length,
|
|
9511
9578
|
" data point",
|
|
9512
9579
|
chartPoints.length !== 1 ? "s" : ""
|
|
@@ -9516,15 +9583,15 @@ function ScoreTimeline({ mode = null, source = null }) {
|
|
|
9516
9583
|
var ScoreTimeline_default = ScoreTimeline;
|
|
9517
9584
|
|
|
9518
9585
|
// src/components/Dashboard.tsx
|
|
9519
|
-
import { jsx as
|
|
9586
|
+
import { jsx as jsx53, jsxs as jsxs40 } from "react/jsx-runtime";
|
|
9520
9587
|
var VIEW_PARAM_MAP = {
|
|
9521
9588
|
compare: "compare",
|
|
9522
9589
|
timeline: "timeline"
|
|
9523
9590
|
};
|
|
9524
9591
|
function Dashboard() {
|
|
9525
|
-
return /* @__PURE__ */
|
|
9526
|
-
/* @__PURE__ */
|
|
9527
|
-
/* @__PURE__ */
|
|
9592
|
+
return /* @__PURE__ */ jsx53(HelpProvider, { children: /* @__PURE__ */ jsxs40(Flex31, { style: { height: "100%" }, children: [
|
|
9593
|
+
/* @__PURE__ */ jsx53(Box25, { flex: 1, overflow: "auto", children: /* @__PURE__ */ jsx53(DashboardContent, {}) }),
|
|
9594
|
+
/* @__PURE__ */ jsx53(HelpDrawer, {})
|
|
9528
9595
|
] }) });
|
|
9529
9596
|
}
|
|
9530
9597
|
function DashboardContent() {
|
|
@@ -9535,7 +9602,7 @@ function DashboardContent() {
|
|
|
9535
9602
|
const isDetail = reportId !== null;
|
|
9536
9603
|
const activeTab = isDetail ? "latest" : VIEW_PARAM_MAP[routerState.view ?? ""] ?? "latest";
|
|
9537
9604
|
const defaultTopic = deriveHelpTopic(routerState);
|
|
9538
|
-
const navigateToTab =
|
|
9605
|
+
const navigateToTab = useCallback27(
|
|
9539
9606
|
(tab) => {
|
|
9540
9607
|
if (tab === "latest") {
|
|
9541
9608
|
router.navigate({});
|
|
@@ -9545,13 +9612,13 @@ function DashboardContent() {
|
|
|
9545
9612
|
},
|
|
9546
9613
|
[router]
|
|
9547
9614
|
);
|
|
9548
|
-
const handleSelectReport =
|
|
9615
|
+
const handleSelectReport = useCallback27(
|
|
9549
9616
|
(id) => {
|
|
9550
9617
|
router.navigate({ reportId: id });
|
|
9551
9618
|
},
|
|
9552
9619
|
[router]
|
|
9553
9620
|
);
|
|
9554
|
-
const handleTabChange =
|
|
9621
|
+
const handleTabChange = useCallback27(
|
|
9555
9622
|
(tab, subTab, focus) => {
|
|
9556
9623
|
if (!routerState.reportId) return;
|
|
9557
9624
|
const state = {
|
|
@@ -9564,19 +9631,19 @@ function DashboardContent() {
|
|
|
9564
9631
|
},
|
|
9565
9632
|
[router, routerState.reportId]
|
|
9566
9633
|
);
|
|
9567
|
-
const handleBack =
|
|
9634
|
+
const handleBack = useCallback27(() => {
|
|
9568
9635
|
router.navigate({});
|
|
9569
9636
|
}, [router]);
|
|
9570
|
-
const handleOpenHelp =
|
|
9637
|
+
const handleOpenHelp = useCallback27(() => {
|
|
9571
9638
|
openHelp(defaultTopic);
|
|
9572
9639
|
}, [openHelp, defaultTopic]);
|
|
9573
|
-
return /* @__PURE__ */
|
|
9574
|
-
/* @__PURE__ */
|
|
9575
|
-
/* @__PURE__ */
|
|
9576
|
-
/* @__PURE__ */
|
|
9577
|
-
/* @__PURE__ */
|
|
9640
|
+
return /* @__PURE__ */ jsx53(Container, { width: 4, children: /* @__PURE__ */ jsxs40(Stack34, { padding: 4, space: 4, children: [
|
|
9641
|
+
/* @__PURE__ */ jsxs40(Flex31, { align: "center", gap: 3, children: [
|
|
9642
|
+
/* @__PURE__ */ jsxs40(Stack34, { flex: 1, space: 1, children: [
|
|
9643
|
+
/* @__PURE__ */ jsx53(Text41, { size: 4, weight: "bold", children: "AI Literacy Framework" }),
|
|
9644
|
+
/* @__PURE__ */ jsx53(Text41, { muted: true, size: 2, children: "Evaluation reports and score trends" })
|
|
9578
9645
|
] }),
|
|
9579
|
-
/* @__PURE__ */
|
|
9646
|
+
/* @__PURE__ */ jsx53(
|
|
9580
9647
|
Button9,
|
|
9581
9648
|
{
|
|
9582
9649
|
icon: HelpCircleIcon8,
|
|
@@ -9587,8 +9654,8 @@ function DashboardContent() {
|
|
|
9587
9654
|
}
|
|
9588
9655
|
)
|
|
9589
9656
|
] }),
|
|
9590
|
-
!isDetail && /* @__PURE__ */
|
|
9591
|
-
/* @__PURE__ */
|
|
9657
|
+
!isDetail && /* @__PURE__ */ jsxs40(TabList2, { space: 1, children: [
|
|
9658
|
+
/* @__PURE__ */ jsx53(
|
|
9592
9659
|
Tab2,
|
|
9593
9660
|
{
|
|
9594
9661
|
"aria-controls": "latest-panel",
|
|
@@ -9598,7 +9665,7 @@ function DashboardContent() {
|
|
|
9598
9665
|
selected: activeTab === "latest"
|
|
9599
9666
|
}
|
|
9600
9667
|
),
|
|
9601
|
-
/* @__PURE__ */
|
|
9668
|
+
/* @__PURE__ */ jsx53(
|
|
9602
9669
|
Tab2,
|
|
9603
9670
|
{
|
|
9604
9671
|
"aria-controls": "timeline-panel",
|
|
@@ -9608,7 +9675,7 @@ function DashboardContent() {
|
|
|
9608
9675
|
selected: activeTab === "timeline"
|
|
9609
9676
|
}
|
|
9610
9677
|
),
|
|
9611
|
-
/* @__PURE__ */
|
|
9678
|
+
/* @__PURE__ */ jsx53(
|
|
9612
9679
|
Tab2,
|
|
9613
9680
|
{
|
|
9614
9681
|
"aria-controls": "compare-panel",
|
|
@@ -9619,10 +9686,10 @@ function DashboardContent() {
|
|
|
9619
9686
|
}
|
|
9620
9687
|
)
|
|
9621
9688
|
] }),
|
|
9622
|
-
!isDetail && activeTab === "latest" && /* @__PURE__ */
|
|
9623
|
-
!isDetail && activeTab === "timeline" && /* @__PURE__ */
|
|
9624
|
-
!isDetail && activeTab === "compare" && /* @__PURE__ */
|
|
9625
|
-
isDetail && reportId && /* @__PURE__ */
|
|
9689
|
+
!isDetail && activeTab === "latest" && /* @__PURE__ */ jsx53(TabPanel2, { "aria-labelledby": "latest-tab", id: "latest-panel", children: /* @__PURE__ */ jsx53(LatestReports, { onSelectReport: handleSelectReport }) }),
|
|
9690
|
+
!isDetail && activeTab === "timeline" && /* @__PURE__ */ jsx53(TabPanel2, { "aria-labelledby": "timeline-tab", id: "timeline-panel", children: /* @__PURE__ */ jsx53(ScoreTimeline_default, {}) }),
|
|
9691
|
+
!isDetail && activeTab === "compare" && /* @__PURE__ */ jsx53(TabPanel2, { "aria-labelledby": "compare-tab", id: "compare-panel", children: /* @__PURE__ */ jsx53(ComparisonView, {}) }),
|
|
9692
|
+
isDetail && reportId && /* @__PURE__ */ jsx53(
|
|
9626
9693
|
ReportDetail,
|
|
9627
9694
|
{
|
|
9628
9695
|
activeTab: routerState.tab ?? null,
|
|
@@ -9658,7 +9725,7 @@ function ailfTool(options = {}) {
|
|
|
9658
9725
|
// src/actions/RunEvaluationAction.tsx
|
|
9659
9726
|
import { BarChartIcon as BarChartIcon2 } from "@sanity/icons";
|
|
9660
9727
|
import { useToast as useToast10 } from "@sanity/ui";
|
|
9661
|
-
import { useCallback as
|
|
9728
|
+
import { useCallback as useCallback28, useEffect as useEffect11, useRef as useRef6, useState as useState21 } from "react";
|
|
9662
9729
|
import {
|
|
9663
9730
|
getReleaseIdFromReleaseDocumentId as getReleaseIdFromReleaseDocumentId3,
|
|
9664
9731
|
useClient as useClient12,
|
|
@@ -9689,7 +9756,7 @@ function createRunEvaluationAction(options = {}) {
|
|
|
9689
9756
|
const projectId = useProjectId2();
|
|
9690
9757
|
const currentUser = useCurrentUser4();
|
|
9691
9758
|
const toast = useToast10();
|
|
9692
|
-
const [state, setState] =
|
|
9759
|
+
const [state, setState] = useState21({ status: "loading" });
|
|
9693
9760
|
const requestedAtRef = useRef6(null);
|
|
9694
9761
|
const perspectiveId = getReleaseIdFromReleaseDocumentId3(release._id);
|
|
9695
9762
|
useEffect11(() => {
|
|
@@ -9785,7 +9852,7 @@ function createRunEvaluationAction(options = {}) {
|
|
|
9785
9852
|
}, 15e3);
|
|
9786
9853
|
return () => clearTimeout(timer);
|
|
9787
9854
|
}, [client, perspectiveId, state]);
|
|
9788
|
-
const handleRequest =
|
|
9855
|
+
const handleRequest = useCallback28(async () => {
|
|
9789
9856
|
const releaseTitle = release.metadata?.title ?? perspectiveId ?? "release";
|
|
9790
9857
|
const tag = `release-${slugify3(releaseTitle)}-${dateStamp3()}`;
|
|
9791
9858
|
const now = Date.now();
|