@sanity/ailf 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/LICENSE +21 -0
  2. package/dist/_vendor/ailf-core/examples/index.d.ts +50 -1
  3. package/dist/_vendor/ailf-core/examples/index.js +66 -1
  4. package/dist/agent-harness/assertions-runtime.d.ts +49 -0
  5. package/dist/agent-harness/assertions-runtime.js +138 -0
  6. package/dist/agent-harness/provider.d.ts +58 -0
  7. package/dist/agent-harness/provider.js +104 -0
  8. package/dist/cli.js +0 -0
  9. package/dist/commands/init.js +3 -0
  10. package/dist/orchestration/steps/generate-configs-step.d.ts +7 -0
  11. package/dist/orchestration/steps/generate-configs-step.js +35 -2
  12. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +39 -25
  13. package/dist/pipeline/compiler/compiler-to-yaml.js +78 -7
  14. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.d.ts +9 -0
  15. package/dist/pipeline/compiler/mode-handlers/agent-harness/assertions.js +28 -85
  16. package/dist/pipeline/compiler/mode-handlers/agent-harness/compiler.js +22 -15
  17. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.d.ts +8 -1
  18. package/dist/pipeline/compiler/mode-handlers/agent-harness/sandbox.js +42 -12
  19. package/package.json +25 -24
  20. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  21. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  22. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  23. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  24. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  25. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  26. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  27. package/dist/_vendor/ailf-tasks/index.js +0 -16
  28. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  29. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  30. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  31. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  32. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  33. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  34. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  35. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  36. package/dist/agent-observer/test-imports.d.ts +0 -7
  37. package/dist/agent-observer/test-imports.js +0 -185
  38. package/dist/commands/update-quality-scores.d.ts +0 -5
  39. package/dist/commands/update-quality-scores.js +0 -20
  40. package/dist/lib/agent-behavior-report.d.ts +0 -8
  41. package/dist/lib/agent-behavior-report.js +0 -185
  42. package/dist/lib/baseline.d.ts +0 -19
  43. package/dist/lib/baseline.js +0 -153
  44. package/dist/lib/calculate-scores.d.ts +0 -23
  45. package/dist/lib/calculate-scores.js +0 -42
  46. package/dist/lib/compare.d.ts +0 -18
  47. package/dist/lib/compare.js +0 -170
  48. package/dist/lib/coverage-audit.d.ts +0 -4
  49. package/dist/lib/coverage-audit.js +0 -42
  50. package/dist/lib/discovery-report.d.ts +0 -13
  51. package/dist/lib/discovery-report.js +0 -57
  52. package/dist/lib/fetch-docs.d.ts +0 -30
  53. package/dist/lib/fetch-docs.js +0 -171
  54. package/dist/lib/generate-configs.d.ts +0 -25
  55. package/dist/lib/generate-configs.js +0 -42
  56. package/dist/lib/grader-api.d.ts +0 -21
  57. package/dist/lib/grader-api.js +0 -34
  58. package/dist/lib/grader-compare.d.ts +0 -19
  59. package/dist/lib/grader-compare.js +0 -91
  60. package/dist/lib/grader-consistency.d.ts +0 -27
  61. package/dist/lib/grader-consistency.js +0 -79
  62. package/dist/lib/grader-sensitivity.d.ts +0 -19
  63. package/dist/lib/grader-sensitivity.js +0 -75
  64. package/dist/lib/grader-validate.d.ts +0 -19
  65. package/dist/lib/grader-validate.js +0 -78
  66. package/dist/lib/measure-retrieval.d.ts +0 -14
  67. package/dist/lib/measure-retrieval.js +0 -71
  68. package/dist/lib/pr-comment.d.ts +0 -16
  69. package/dist/lib/pr-comment.js +0 -28
  70. package/dist/lib/readiness-report.d.ts +0 -13
  71. package/dist/lib/readiness-report.js +0 -108
  72. package/dist/lib/webhook-server.d.ts +0 -11
  73. package/dist/lib/webhook-server.js +0 -24
  74. package/dist/lib/weekly-digest.d.ts +0 -24
  75. package/dist/lib/weekly-digest.js +0 -148
  76. package/dist/orchestration/env-bridge.d.ts +0 -21
  77. package/dist/orchestration/env-bridge.js +0 -66
  78. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  79. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  80. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  81. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  82. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  83. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  84. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  85. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  86. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  87. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  88. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  89. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  90. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  91. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  92. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  93. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  94. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  95. package/dist/pipeline/compiler/task-bridge.js +0 -92
  96. package/dist/pipeline/expand-tasks.d.ts +0 -232
  97. package/dist/pipeline/expand-tasks.js +0 -467
  98. package/dist/pipeline/generate-configs.d.ts +0 -92
  99. package/dist/pipeline/generate-configs.js +0 -445
  100. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  102. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  103. package/dist/pipeline/steps/compare-step.js +0 -90
  104. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  105. package/dist/pipeline/steps/eval-step.js +0 -347
  106. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  107. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  108. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  109. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  110. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  111. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  112. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  113. package/dist/pipeline/steps/publish-report-step.js +0 -243
  114. package/dist/pipeline/steps/report-step.d.ts +0 -13
  115. package/dist/pipeline/steps/report-step.js +0 -56
  116. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  117. package/dist/pipeline/steps/update-scores-step.js +0 -42
  118. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  119. package/dist/scripts/agent-behavior-report.js +0 -315
  120. package/dist/scripts/baseline.d.ts +0 -43
  121. package/dist/scripts/baseline.js +0 -267
  122. package/dist/scripts/calculate-scores.d.ts +0 -166
  123. package/dist/scripts/calculate-scores.js +0 -1296
  124. package/dist/scripts/compare.d.ts +0 -22
  125. package/dist/scripts/compare.js +0 -334
  126. package/dist/scripts/coverage-audit.d.ts +0 -44
  127. package/dist/scripts/coverage-audit.js +0 -209
  128. package/dist/scripts/debug-eval.d.ts +0 -19
  129. package/dist/scripts/debug-eval.js +0 -73
  130. package/dist/scripts/discovery-report.d.ts +0 -58
  131. package/dist/scripts/discovery-report.js +0 -250
  132. package/dist/scripts/fetch-docs.d.ts +0 -35
  133. package/dist/scripts/fetch-docs.js +0 -472
  134. package/dist/scripts/generate-configs.d.ts +0 -66
  135. package/dist/scripts/generate-configs.js +0 -459
  136. package/dist/scripts/grader-api.d.ts +0 -27
  137. package/dist/scripts/grader-api.js +0 -206
  138. package/dist/scripts/grader-compare.d.ts +0 -22
  139. package/dist/scripts/grader-compare.js +0 -368
  140. package/dist/scripts/grader-consistency.d.ts +0 -20
  141. package/dist/scripts/grader-consistency.js +0 -313
  142. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  143. package/dist/scripts/grader-sensitivity.js +0 -354
  144. package/dist/scripts/grader-validate.d.ts +0 -19
  145. package/dist/scripts/grader-validate.js +0 -267
  146. package/dist/scripts/measure-retrieval.d.ts +0 -10
  147. package/dist/scripts/measure-retrieval.js +0 -145
  148. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  149. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  150. package/dist/scripts/pipeline.d.ts +0 -76
  151. package/dist/scripts/pipeline.js +0 -1031
  152. package/dist/scripts/pr-comment.d.ts +0 -10
  153. package/dist/scripts/pr-comment.js +0 -510
  154. package/dist/scripts/readiness-report.d.ts +0 -88
  155. package/dist/scripts/readiness-report.js +0 -342
  156. package/dist/scripts/update-quality-scores.d.ts +0 -15
  157. package/dist/scripts/update-quality-scores.js +0 -184
  158. package/dist/scripts/validate-task-sources.d.ts +0 -21
  159. package/dist/scripts/validate-task-sources.js +0 -210
  160. package/dist/scripts/validate.d.ts +0 -13
  161. package/dist/scripts/validate.js +0 -79
  162. package/dist/scripts/webhook-server.d.ts +0 -26
  163. package/dist/scripts/webhook-server.js +0 -147
  164. package/dist/scripts/weekly-digest.d.ts +0 -24
  165. package/dist/scripts/weekly-digest.js +0 -144
  166. package/dist/sinks/format-slack.d.ts +0 -64
  167. package/dist/sinks/format-slack.js +0 -306
  168. package/dist/sinks/slack-sink.d.ts +0 -27
  169. package/dist/sinks/slack-sink.js +0 -78
  170. package/dist/sinks/webhook-sink.d.ts +0 -19
  171. package/dist/sinks/webhook-sink.js +0 -50
  172. package/tasks/.expanded.agentic.yaml +0 -280
  173. package/tasks/.expanded.yaml +0 -565
@@ -1,342 +0,0 @@
1
- /**
2
- * readiness-report.ts
3
- *
4
- * Launch readiness report generator — Phase 5b of the Scenario Matrix
5
- * implementation. Combines threshold evaluation, ceiling decomposition,
6
- * and gap analysis into a single actionable readiness checklist for a
7
- * given feature area.
8
- *
9
- * Usage:
10
- * pnpm readiness-report --area visual-editing
11
- * pnpm readiness-report --area groq --history
12
- * pnpm readiness-report --area groq --output readiness.md
13
- *
14
- * Exports pure functions for unit testing:
15
- * - generateReadinessReport() — builds the structured report
16
- * - formatReadinessMarkdown() — renders the report as markdown
17
- *
18
- * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-5-readiness-thresholds.md
19
- */
20
- import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
21
- import { dirname, join, resolve } from "node:path";
22
- import { fileURLToPath } from "node:url";
23
- import { load } from "js-yaml";
24
- import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
25
- import { evaluateThresholds } from "../pipeline/thresholds.js";
26
- const __dirname = dirname(fileURLToPath(import.meta.url));
27
- const ROOT = resolve(__dirname, "..", "..");
28
- const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
29
- const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
30
- const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
31
- const BASELINES_DIR = join(ROOT, "results", "baselines");
32
- // ---------------------------------------------------------------------------
33
- // Pure functions (exported for testing)
34
- // ---------------------------------------------------------------------------
35
- /**
36
- * Format a readiness report as markdown.
37
- *
38
- * Pure function — takes a structured report and returns a markdown string.
39
- */
40
- export function formatReadinessMarkdown(report) {
41
- const lines = [];
42
- const areaLabel = formatAreaLabel(report.area);
43
- // Header
44
- const statusEmoji = report.pass ? "✅" : "❌";
45
- const statusLabel = report.pass ? "READY" : "NOT READY";
46
- lines.push(`## 🚀 Launch Readiness: ${areaLabel}`);
47
- lines.push("");
48
- lines.push(`**Overall:** ${statusEmoji} ${statusLabel} (${fmt(report.score)}/100, threshold: ${report.threshold})`);
49
- lines.push("");
50
- // Dimension Checklist
51
- lines.push("### Dimension Checklist");
52
- lines.push("");
53
- lines.push("| Dimension | Score | Threshold | Status |");
54
- lines.push("|---|---|---|---|");
55
- for (const dim of report.dimensions) {
56
- const status = dim.pass ? "✅ Meets threshold" : "❌ Below threshold";
57
- lines.push(`| ${dim.dimension} | ${fmt(dim.score)} | ${fmt(dim.threshold)} | ${status} |`);
58
- }
59
- lines.push("");
60
- // Ceiling Analysis
61
- lines.push("### Ceiling Analysis");
62
- lines.push("");
63
- lines.push("| Metric | Value | Assessment |");
64
- lines.push("|---|---|---|");
65
- const ceilingAssessment = report.ceiling.ceilingScore >= 60
66
- ? "✅ Docs enable reasonable performance"
67
- : "⚠️ Below 60 — docs need improvement";
68
- lines.push(`| Ceiling Score | ${fmt(report.ceiling.ceilingScore)} | ${ceilingAssessment} |`);
69
- const floorAssessment = report.ceiling.floorScore >= 30
70
- ? "Model has moderate baseline knowledge"
71
- : "Model has limited baseline knowledge";
72
- lines.push(`| Floor Score | ${fmt(report.ceiling.floorScore)} | ${floorAssessment} |`);
73
- const liftSign = report.ceiling.docLift >= 0 ? "+" : "";
74
- const liftAssessment = report.ceiling.docLift < 0
75
- ? "❌ Docs are hurting performance"
76
- : report.ceiling.docLift >= 10
77
- ? "✅ Docs add significant value"
78
- : "⚠️ Docs add minimal value";
79
- lines.push(`| Doc Lift | ${liftSign}${fmt(report.ceiling.docLift)} | ${liftAssessment} |`);
80
- lines.push(`| Doc Quality Gap | ${fmt(report.ceiling.docQualityGap)} | ${report.ceiling.docQualityGap > 30 ? "Room for improvement via documentation" : "✅ Docs are high quality"} |`);
81
- lines.push("");
82
- // Failing Criteria (only shown when there are violations)
83
- if (report.violations.length > 0) {
84
- lines.push("### Failing Criteria");
85
- for (let i = 0; i < report.violations.length; i++) {
86
- const v = report.violations[i];
87
- lines.push(`${i + 1}. **${v.description}**`);
88
- }
89
- lines.push("");
90
- lines.push("### Recommendation");
91
- const count = report.violations.length;
92
- const itemWord = count === 1 ? "item" : "items";
93
- lines.push(`Fix the ${count} ${itemWord} above and re-evaluate.`);
94
- lines.push("");
95
- }
96
- // Gap Analysis (if available)
97
- if (report.gaps.length > 0) {
98
- lines.push("### Gap Analysis");
99
- lines.push("");
100
- lines.push("| Failure Mode | Est. Lift | Confidence | Remediation |");
101
- lines.push("|---|---|---|---|");
102
- for (const gap of report.gaps) {
103
- const confIcon = gap.confidence === "high"
104
- ? "🟢"
105
- : gap.confidence === "medium"
106
- ? "🟡"
107
- : "🔴";
108
- lines.push(`| ${gap.failureMode} | +${gap.estimatedLift.toFixed(1)} | ${confIcon} ${gap.confidence} | ${gap.remediation} |`);
109
- }
110
- lines.push("");
111
- }
112
- // Historical Progress (if available)
113
- if (report.history.length > 0) {
114
- lines.push("### Historical Progress");
115
- lines.push("");
116
- lines.push("| Date | Score | Tag |");
117
- lines.push("|---|---|---|");
118
- for (const entry of report.history) {
119
- const date = entry.timestamp.slice(0, 10);
120
- const tag = entry.tag ?? "—";
121
- lines.push(`| ${date} | ${fmt(entry.score)} | ${tag} |`);
122
- }
123
- // Show current score as the last row
124
- lines.push(`| ${new Date().toISOString().slice(0, 10)} | ${fmt(report.score)} | *current* |`);
125
- lines.push("");
126
- }
127
- return lines.join("\n");
128
- }
129
- /**
130
- * Generate a structured readiness report for a given feature area.
131
- *
132
- * This is a pure function — it takes all data as parameters and produces
133
- * a structured report. No I/O.
134
- */
135
- export function generateReadinessReport(opts) {
136
- const { area, gapAnalysis, history = [], scoreSummary, thresholdConfig, } = opts;
137
- // Find the area's scores
138
- const areaScore = scoreSummary.scores.find((s) => s.feature === area);
139
- if (!areaScore) {
140
- throw new Error(`Area "${area}" not found in score summary. Available areas: ${scoreSummary.scores.map((s) => s.feature).join(", ")}`);
141
- }
142
- // Evaluate thresholds for the full summary (to get violations)
143
- const thresholdEvaluation = evaluateThresholds(scoreSummary, thresholdConfig);
144
- // Filter violations to only this area
145
- const areaViolations = thresholdEvaluation.violations.filter((v) => v.area === area);
146
- // Resolve per-area thresholds (with defaults)
147
- const areaOverrides = thresholdConfig.areas?.[area];
148
- const compositeThreshold = areaOverrides?.composite ?? thresholdConfig.defaults.composite;
149
- const dimDefaults = thresholdConfig.defaults.dimensions ?? {};
150
- const dimOverrides = areaOverrides?.dimensions ?? {};
151
- // Build dimension checks
152
- const dimensions = [
153
- {
154
- dimension: "Task Completion",
155
- pass: areaScore.taskCompletion >=
156
- (dimOverrides["task-completion"] ??
157
- dimDefaults["task-completion"] ??
158
- 0),
159
- score: areaScore.taskCompletion,
160
- threshold: dimOverrides["task-completion"] ?? dimDefaults["task-completion"] ?? 0,
161
- },
162
- {
163
- dimension: "Code Correctness",
164
- pass: areaScore.codeCorrectness >=
165
- (dimOverrides["code-correctness"] ??
166
- dimDefaults["code-correctness"] ??
167
- 0),
168
- score: areaScore.codeCorrectness,
169
- threshold: dimOverrides["code-correctness"] ??
170
- dimDefaults["code-correctness"] ??
171
- 0,
172
- },
173
- {
174
- dimension: "Doc Coverage",
175
- pass: areaScore.docCoverage >=
176
- (dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0),
177
- score: areaScore.docCoverage,
178
- threshold: dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0,
179
- },
180
- ];
181
- // Filter gap analysis to this area
182
- const areaGaps = gapAnalysis?.gaps.filter((g) => g.area === area) ?? [];
183
- // Ceiling decomposition
184
- const ceiling = {
185
- ceilingScore: areaScore.ceilingScore,
186
- docLift: areaScore.docLift,
187
- docQualityGap: areaScore.docQualityGap,
188
- floorScore: areaScore.floorScore,
189
- };
190
- const pass = areaViolations.length === 0;
191
- return {
192
- area,
193
- ceiling,
194
- dimensions,
195
- gaps: areaGaps,
196
- history,
197
- pass,
198
- score: areaScore.totalScore,
199
- threshold: compositeThreshold,
200
- thresholdEvaluation,
201
- violations: areaViolations,
202
- };
203
- }
204
- // ---------------------------------------------------------------------------
205
- // Formatting helpers (private)
206
- // ---------------------------------------------------------------------------
207
- /** Format a score for display (round to nearest integer) */
208
- function fmt(n) {
209
- return String(Math.round(n));
210
- }
211
- /** Convert kebab-case area name to title case */
212
- function formatAreaLabel(area) {
213
- return area
214
- .split("-")
215
- .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
216
- .join(" ");
217
- }
218
- // ---------------------------------------------------------------------------
219
- // I/O helpers (used by CLI, not exported for testing)
220
- // ---------------------------------------------------------------------------
221
- function loadGapAnalysis(path) {
222
- if (!existsSync(path))
223
- return undefined;
224
- return JSON.parse(readFileSync(path, "utf-8"));
225
- }
226
- function loadHistory(area, baselinesDir) {
227
- if (!existsSync(baselinesDir))
228
- return [];
229
- const files = readdirSync(baselinesDir)
230
- .filter((f) => f.endsWith(".json"))
231
- .sort();
232
- const entries = [];
233
- for (const file of files) {
234
- try {
235
- const raw = readFileSync(join(baselinesDir, file), "utf-8");
236
- const data = JSON.parse(raw);
237
- const areaScore = data.scores?.find((s) => s.feature === area);
238
- if (!areaScore)
239
- continue;
240
- // Extract tag from filename (e.g., "20260304_16_34_45_pre-groq.json")
241
- const nameWithoutExt = file.replace(/\.json$/, "");
242
- const parts = nameWithoutExt.split("_");
243
- // Timestamps are like "20260304_16_34_45" (4 parts), rest is tag
244
- const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
245
- entries.push({
246
- score: areaScore.totalScore,
247
- tag,
248
- timestamp: data.timestamp ?? nameWithoutExt,
249
- });
250
- }
251
- catch {
252
- // Skip malformed baseline files
253
- }
254
- }
255
- return entries;
256
- }
257
- function loadScoreSummary(path) {
258
- if (!existsSync(path)) {
259
- throw new Error(`Score summary not found at ${path}. Run \`pnpm pipeline\` first.`);
260
- }
261
- return JSON.parse(readFileSync(path, "utf-8"));
262
- }
263
- function loadThresholdConfig(path) {
264
- if (!existsSync(path)) {
265
- throw new Error(`Threshold config not found at ${path}.`);
266
- }
267
- const raw = readFileSync(path, "utf-8");
268
- const parsed = load(raw);
269
- const result = ThresholdConfigSchema.safeParse(parsed);
270
- if (!result.success) {
271
- const messages = result.error.issues
272
- .map((i) => ` ${i.path.join(".")}: ${i.message}`)
273
- .join("\n");
274
- throw new Error(`Invalid thresholds.yaml:\n${messages}`);
275
- }
276
- return result.data;
277
- }
278
- // ---------------------------------------------------------------------------
279
- // CLI
280
- // ---------------------------------------------------------------------------
281
- function main() {
282
- const { area, history: includeHistory, output } = parseArgs(process.argv);
283
- // Load data
284
- const scoreSummary = loadScoreSummary(SCORE_SUMMARY_PATH);
285
- const thresholdConfig = loadThresholdConfig(THRESHOLDS_PATH);
286
- const gapAnalysis = loadGapAnalysis(GAP_ANALYSIS_PATH);
287
- const history = includeHistory ? loadHistory(area, BASELINES_DIR) : [];
288
- // Generate report
289
- const report = generateReadinessReport({
290
- area,
291
- gapAnalysis,
292
- history,
293
- scoreSummary,
294
- thresholdConfig,
295
- });
296
- // Format and output
297
- const markdown = formatReadinessMarkdown(report);
298
- if (output) {
299
- writeFileSync(output, markdown, "utf-8");
300
- console.error(`✅ Readiness report written to ${output}`);
301
- }
302
- else {
303
- console.log(markdown);
304
- }
305
- // Exit with non-zero if not ready
306
- if (!report.pass) {
307
- process.exit(1);
308
- }
309
- }
310
- function parseArgs(argv) {
311
- const args = argv.slice(2);
312
- let area;
313
- let history = false;
314
- let output;
315
- for (let i = 0; i < args.length; i++) {
316
- const arg = args[i];
317
- if (arg === "--area" && i + 1 < args.length) {
318
- area = args[++i];
319
- }
320
- else if (arg === "--history") {
321
- history = true;
322
- }
323
- else if (arg === "--output" && i + 1 < args.length) {
324
- output = args[++i];
325
- }
326
- }
327
- if (!area) {
328
- console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
329
- console.error("");
330
- console.error("Options:");
331
- console.error(" --area <area> Feature area to check (required)");
332
- console.error(" --history Include historical progress from baselines");
333
- console.error(" --output <file> Write markdown to file instead of stdout");
334
- process.exit(1);
335
- }
336
- return { area, history, output };
337
- }
338
- // Only run when invoked directly
339
- if (process.argv[1]?.endsWith("readiness-report.ts") ||
340
- process.argv[1]?.endsWith("readiness-report.js")) {
341
- main();
342
- }
@@ -1,15 +0,0 @@
1
- /**
2
- * update-quality-scores.ts
3
- *
4
- * Reads score-summary.json and updates the feature area quality grades
5
- * table in docs/QUALITY_SCORE.md. Designed to run automatically after
6
- * each evaluation as the final pipeline step.
7
- *
8
- * Usage:
9
- * pnpm update-quality-scores
10
- * tsx src/scripts/update-quality-scores.ts
11
- */
12
- export declare function updateQualityScores(): {
13
- success: boolean;
14
- message: string;
15
- };
@@ -1,184 +0,0 @@
1
- /**
2
- * update-quality-scores.ts
3
- *
4
- * Reads score-summary.json and updates the feature area quality grades
5
- * table in docs/QUALITY_SCORE.md. Designed to run automatically after
6
- * each evaluation as the final pipeline step.
7
- *
8
- * Usage:
9
- * pnpm update-quality-scores
10
- * tsx src/scripts/update-quality-scores.ts
11
- */
12
- import { execSync } from "child_process";
13
- import { existsSync, readFileSync, writeFileSync } from "fs";
14
- import { dirname, join, resolve } from "path";
15
- import { fileURLToPath } from "url";
16
- const __dirname = dirname(fileURLToPath(import.meta.url));
17
- const ROOT = resolve(__dirname, "..", "..");
18
- const REPO_ROOT = resolve(ROOT, "..", "..");
19
- const QUALITY_SCORE_PATH = join(REPO_ROOT, "docs", "QUALITY_SCORE.md");
20
- const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
21
- // ---------------------------------------------------------------------------
22
- // Grading
23
- // ---------------------------------------------------------------------------
24
- export function updateQualityScores() {
25
- // Read score summary
26
- if (!existsSync(SCORE_SUMMARY_PATH)) {
27
- return {
28
- message: `Score summary not found at ${SCORE_SUMMARY_PATH}. Run 'pnpm calculate-scores' first.`,
29
- success: false,
30
- };
31
- }
32
- let summary;
33
- try {
34
- const raw = readFileSync(SCORE_SUMMARY_PATH, "utf-8");
35
- const parsed = JSON.parse(raw);
36
- // Normalize legacy field names (liftFromDocs → docLift)
37
- summary = {
38
- ...parsed,
39
- scores: parsed.scores.map((s) => ({
40
- ...s,
41
- docLift: s.docLift ??
42
- s.liftFromDocs ??
43
- 0,
44
- })),
45
- };
46
- }
47
- catch (err) {
48
- return {
49
- message: `Failed to parse score summary: ${err instanceof Error ? err.message : String(err)}`,
50
- success: false,
51
- };
52
- }
53
- if (!summary.scores || summary.scores.length === 0) {
54
- return { message: "Score summary contains no scores.", success: false };
55
- }
56
- // Read QUALITY_SCORE.md
57
- if (!existsSync(QUALITY_SCORE_PATH)) {
58
- return {
59
- message: `QUALITY_SCORE.md not found at ${QUALITY_SCORE_PATH}.`,
60
- success: false,
61
- };
62
- }
63
- let markdown = readFileSync(QUALITY_SCORE_PATH, "utf-8");
64
- // Replace the feature area table
65
- const newTable = generateTable(summary.scores);
66
- const tablePattern = /\| Feature Area\s+\| Score\s+\| Grade\s+\| Doc Lift\s+\| Key gap[^|]*\|\n\| [-\s|]+\|\n(\|[^\n]+\|\n)*/;
67
- const match = tablePattern.exec(markdown);
68
- if (!match) {
69
- return {
70
- message: "Could not find the feature area quality grades table in QUALITY_SCORE.md.",
71
- success: false,
72
- };
73
- }
74
- markdown =
75
- markdown.slice(0, match.index) +
76
- newTable +
77
- "\n" +
78
- markdown.slice(match.index + match[0].length);
79
- // Write back
80
- writeFileSync(QUALITY_SCORE_PATH, markdown);
81
- // Format with Prettier to ensure consistent table formatting
82
- // (emoji widths differ between padEnd and Prettier's table formatter)
83
- try {
84
- execSync("npx prettier --write " + QUALITY_SCORE_PATH, {
85
- cwd: REPO_ROOT,
86
- stdio: "pipe",
87
- });
88
- }
89
- catch {
90
- // Non-fatal — formatting is nice-to-have
91
- }
92
- return {
93
- message: `Updated ${summary.scores.length} feature area scores in QUALITY_SCORE.md (avg: ${Math.round(summary.overall.avgScore)}, lift: +${Math.round(summary.overall.avgDocLift)})`,
94
- success: true,
95
- };
96
- }
97
- function generateTable(scores) {
98
- // Sort by score descending
99
- const sorted = [...scores].sort((a, b) => b.totalScore - a.totalScore);
100
- // Build rows with data
101
- const rows = sorted.map((s) => ({
102
- feature: s.feature,
103
- gap: keyGap(s, scores),
104
- grade: grade(s.totalScore),
105
- lift: "+" + s.docLift,
106
- score: String(s.totalScore),
107
- }));
108
- // Calculate column widths from data (minimum widths from headers)
109
- const cols = {
110
- feature: Math.max(14, ...rows.map((r) => r.feature.length)),
111
- gap: Math.max(7, ...rows.map((r) => r.gap.length)),
112
- grade: 5,
113
- lift: 8,
114
- score: 5,
115
- };
116
- const fmtRow = (r) => `| ${r.feature.padEnd(cols.feature)} | ${r.score.padEnd(cols.score)} | ${r.grade.padEnd(cols.grade)} | ${r.lift.padEnd(cols.lift)} | ${r.gap.padEnd(cols.gap)} |`;
117
- const header = fmtRow({
118
- feature: "Feature Area",
119
- gap: "Key gap",
120
- grade: "Grade",
121
- lift: "Doc Lift",
122
- score: "Score",
123
- });
124
- const sep = `| ${"-".repeat(cols.feature)} | ${"-".repeat(cols.score)} | ${"-".repeat(cols.grade)} | ${"-".repeat(cols.lift)} | ${"-".repeat(cols.gap)} |`;
125
- return [header, sep, ...rows.map(fmtRow)].join("\n");
126
- }
127
- // ---------------------------------------------------------------------------
128
- // Table generation
129
- // ---------------------------------------------------------------------------
130
- function grade(score) {
131
- if (score >= 80)
132
- return "✅ A";
133
- if (score >= 60)
134
- return "🟡 B";
135
- if (score >= 40)
136
- return "🟠 C";
137
- return "🔴 D";
138
- }
139
- // ---------------------------------------------------------------------------
140
- // File update
141
- // ---------------------------------------------------------------------------
142
- function keyGap(s, allScores) {
143
- // Below critical threshold
144
- if (s.totalScore < 40) {
145
- return "⚠️ Below critical — all dimensions underperform";
146
- }
147
- // Find the weakest dimension relative to max possible (all 0–100)
148
- const dims = [
149
- { max: 100, name: "task completion", score: s.taskCompletion },
150
- { max: 100, name: "code correctness", score: s.codeCorrectness },
151
- { max: 100, name: "doc coverage", score: s.docCoverage },
152
- ];
153
- // Sort by ratio (lowest first)
154
- dims.sort((a, b) => a.score / a.max - b.score / b.max);
155
- const weakest = dims[0];
156
- // Check for notable strengths
157
- const maxLift = Math.max(...allScores.map((sc) => sc.docLift));
158
- const maxScore = Math.max(...allScores.map((sc) => sc.totalScore));
159
- if (s.totalScore === maxScore) {
160
- return `Strong — highest score; ${weakest.name} (${weakest.score}/${weakest.max})`;
161
- }
162
- if (s.docLift === maxLift) {
163
- return `Highest doc lift; ${weakest.name} (${weakest.score}/${weakest.max})`;
164
- }
165
- if (weakest.score === 0) {
166
- return `Zero ${weakest.name} score; lowest doc lift`;
167
- }
168
- return `${weakest.name[0].toUpperCase() + weakest.name.slice(1)} (${weakest.score}/${weakest.max}) holds back total score`;
169
- }
170
- // ---------------------------------------------------------------------------
171
- // Main (when run directly)
172
- // ---------------------------------------------------------------------------
173
- if (process.argv[1]?.endsWith("update-quality-scores.ts") ||
174
- process.argv[1]?.endsWith("update-quality-scores.js")) {
175
- console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
176
- const result = updateQualityScores();
177
- if (result.success) {
178
- console.log(` ✅ ${result.message}`);
179
- }
180
- else {
181
- console.error(` ❌ ${result.message}`);
182
- process.exit(1);
183
- }
184
- }
@@ -1,21 +0,0 @@
1
- #!/usr/bin/env tsx
2
- /**
3
- * Validation script: Compare YamlTaskSource vs ContentLakeTaskSource
4
- *
5
- * Loads tasks from both sources and compares them field-by-field to verify
6
- * that the Content Lake migration produced identical LiteracyTaskDefinition[] output.
7
- *
8
- * This is Phase 3b of the tasks-as-content exec plan — parallel validation
9
- * before deleting YAML files.
10
- *
11
- * Usage:
12
- * cd packages/eval
13
- * npx tsx src/scripts/validate-task-sources.ts
14
- *
15
- * Prerequisites:
16
- * - Migration script has been run (ailf.task documents exist in CL)
17
- * - SANITY_API_TOKEN configured for Content Lake reads
18
- *
19
- * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
20
- */
21
- export {};