@sanity/ailf 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/LICENSE +21 -0
  2. package/dist/cli.js +0 -0
  3. package/package.json +24 -24
  4. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
  5. package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
  6. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
  7. package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
  8. package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
  9. package/dist/_vendor/ailf-tasks/cli.js +0 -61
  10. package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
  11. package/dist/_vendor/ailf-tasks/index.js +0 -16
  12. package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
  13. package/dist/_vendor/ailf-tasks/parser.js +0 -73
  14. package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
  15. package/dist/_vendor/ailf-tasks/schemas.js +0 -180
  16. package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
  17. package/dist/_vendor/ailf-tasks/validation.js +0 -162
  18. package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
  19. package/dist/adapters/task-sources/yaml-task-source.js +0 -139
  20. package/dist/agent-observer/test-imports.d.ts +0 -7
  21. package/dist/agent-observer/test-imports.js +0 -185
  22. package/dist/commands/update-quality-scores.d.ts +0 -5
  23. package/dist/commands/update-quality-scores.js +0 -20
  24. package/dist/lib/agent-behavior-report.d.ts +0 -8
  25. package/dist/lib/agent-behavior-report.js +0 -185
  26. package/dist/lib/baseline.d.ts +0 -19
  27. package/dist/lib/baseline.js +0 -153
  28. package/dist/lib/calculate-scores.d.ts +0 -23
  29. package/dist/lib/calculate-scores.js +0 -42
  30. package/dist/lib/compare.d.ts +0 -18
  31. package/dist/lib/compare.js +0 -170
  32. package/dist/lib/coverage-audit.d.ts +0 -4
  33. package/dist/lib/coverage-audit.js +0 -42
  34. package/dist/lib/discovery-report.d.ts +0 -13
  35. package/dist/lib/discovery-report.js +0 -57
  36. package/dist/lib/fetch-docs.d.ts +0 -30
  37. package/dist/lib/fetch-docs.js +0 -171
  38. package/dist/lib/generate-configs.d.ts +0 -25
  39. package/dist/lib/generate-configs.js +0 -42
  40. package/dist/lib/grader-api.d.ts +0 -21
  41. package/dist/lib/grader-api.js +0 -34
  42. package/dist/lib/grader-compare.d.ts +0 -19
  43. package/dist/lib/grader-compare.js +0 -91
  44. package/dist/lib/grader-consistency.d.ts +0 -27
  45. package/dist/lib/grader-consistency.js +0 -79
  46. package/dist/lib/grader-sensitivity.d.ts +0 -19
  47. package/dist/lib/grader-sensitivity.js +0 -75
  48. package/dist/lib/grader-validate.d.ts +0 -19
  49. package/dist/lib/grader-validate.js +0 -78
  50. package/dist/lib/measure-retrieval.d.ts +0 -14
  51. package/dist/lib/measure-retrieval.js +0 -71
  52. package/dist/lib/pr-comment.d.ts +0 -16
  53. package/dist/lib/pr-comment.js +0 -28
  54. package/dist/lib/readiness-report.d.ts +0 -13
  55. package/dist/lib/readiness-report.js +0 -108
  56. package/dist/lib/webhook-server.d.ts +0 -11
  57. package/dist/lib/webhook-server.js +0 -24
  58. package/dist/lib/weekly-digest.d.ts +0 -24
  59. package/dist/lib/weekly-digest.js +0 -148
  60. package/dist/orchestration/env-bridge.d.ts +0 -21
  61. package/dist/orchestration/env-bridge.js +0 -66
  62. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  63. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  64. package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
  65. package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
  66. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
  67. package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
  68. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
  69. package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
  70. package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
  71. package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
  72. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
  73. package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
  74. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
  75. package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
  76. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
  77. package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
  78. package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
  79. package/dist/pipeline/compiler/task-bridge.js +0 -92
  80. package/dist/pipeline/expand-tasks.d.ts +0 -232
  81. package/dist/pipeline/expand-tasks.js +0 -467
  82. package/dist/pipeline/generate-configs.d.ts +0 -92
  83. package/dist/pipeline/generate-configs.js +0 -445
  84. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  85. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  86. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  87. package/dist/pipeline/steps/compare-step.js +0 -90
  88. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  89. package/dist/pipeline/steps/eval-step.js +0 -347
  90. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  91. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  92. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  93. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  94. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  95. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  96. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  97. package/dist/pipeline/steps/publish-report-step.js +0 -243
  98. package/dist/pipeline/steps/report-step.d.ts +0 -13
  99. package/dist/pipeline/steps/report-step.js +0 -56
  100. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  101. package/dist/pipeline/steps/update-scores-step.js +0 -42
  102. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  103. package/dist/scripts/agent-behavior-report.js +0 -315
  104. package/dist/scripts/baseline.d.ts +0 -43
  105. package/dist/scripts/baseline.js +0 -267
  106. package/dist/scripts/calculate-scores.d.ts +0 -166
  107. package/dist/scripts/calculate-scores.js +0 -1296
  108. package/dist/scripts/compare.d.ts +0 -22
  109. package/dist/scripts/compare.js +0 -334
  110. package/dist/scripts/coverage-audit.d.ts +0 -44
  111. package/dist/scripts/coverage-audit.js +0 -209
  112. package/dist/scripts/debug-eval.d.ts +0 -19
  113. package/dist/scripts/debug-eval.js +0 -73
  114. package/dist/scripts/discovery-report.d.ts +0 -58
  115. package/dist/scripts/discovery-report.js +0 -250
  116. package/dist/scripts/fetch-docs.d.ts +0 -35
  117. package/dist/scripts/fetch-docs.js +0 -472
  118. package/dist/scripts/generate-configs.d.ts +0 -66
  119. package/dist/scripts/generate-configs.js +0 -459
  120. package/dist/scripts/grader-api.d.ts +0 -27
  121. package/dist/scripts/grader-api.js +0 -206
  122. package/dist/scripts/grader-compare.d.ts +0 -22
  123. package/dist/scripts/grader-compare.js +0 -368
  124. package/dist/scripts/grader-consistency.d.ts +0 -20
  125. package/dist/scripts/grader-consistency.js +0 -313
  126. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  127. package/dist/scripts/grader-sensitivity.js +0 -354
  128. package/dist/scripts/grader-validate.d.ts +0 -19
  129. package/dist/scripts/grader-validate.js +0 -267
  130. package/dist/scripts/measure-retrieval.d.ts +0 -10
  131. package/dist/scripts/measure-retrieval.js +0 -145
  132. package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
  133. package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
  134. package/dist/scripts/pipeline.d.ts +0 -76
  135. package/dist/scripts/pipeline.js +0 -1031
  136. package/dist/scripts/pr-comment.d.ts +0 -10
  137. package/dist/scripts/pr-comment.js +0 -510
  138. package/dist/scripts/readiness-report.d.ts +0 -88
  139. package/dist/scripts/readiness-report.js +0 -342
  140. package/dist/scripts/update-quality-scores.d.ts +0 -15
  141. package/dist/scripts/update-quality-scores.js +0 -184
  142. package/dist/scripts/validate-task-sources.d.ts +0 -21
  143. package/dist/scripts/validate-task-sources.js +0 -210
  144. package/dist/scripts/validate.d.ts +0 -13
  145. package/dist/scripts/validate.js +0 -79
  146. package/dist/scripts/webhook-server.d.ts +0 -26
  147. package/dist/scripts/webhook-server.js +0 -147
  148. package/dist/scripts/weekly-digest.d.ts +0 -24
  149. package/dist/scripts/weekly-digest.js +0 -144
  150. package/dist/sinks/format-slack.d.ts +0 -64
  151. package/dist/sinks/format-slack.js +0 -306
  152. package/dist/sinks/slack-sink.d.ts +0 -27
  153. package/dist/sinks/slack-sink.js +0 -78
  154. package/dist/sinks/webhook-sink.d.ts +0 -19
  155. package/dist/sinks/webhook-sink.js +0 -50
  156. package/tasks/.expanded.agentic.yaml +0 -280
  157. package/tasks/.expanded.yaml +0 -565
@@ -1,342 +0,0 @@
1
- /**
2
- * readiness-report.ts
3
- *
4
- * Launch readiness report generator — Phase 5b of the Scenario Matrix
5
- * implementation. Combines threshold evaluation, ceiling decomposition,
6
- * and gap analysis into a single actionable readiness checklist for a
7
- * given feature area.
8
- *
9
- * Usage:
10
- * pnpm readiness-report --area visual-editing
11
- * pnpm readiness-report --area groq --history
12
- * pnpm readiness-report --area groq --output readiness.md
13
- *
14
- * Exports pure functions for unit testing:
15
- * - generateReadinessReport() — builds the structured report
16
- * - formatReadinessMarkdown() — renders the report as markdown
17
- *
18
- * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-5-readiness-thresholds.md
19
- */
20
- import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
21
- import { dirname, join, resolve } from "node:path";
22
- import { fileURLToPath } from "node:url";
23
- import { load } from "js-yaml";
24
- import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
25
- import { evaluateThresholds } from "../pipeline/thresholds.js";
26
- const __dirname = dirname(fileURLToPath(import.meta.url));
27
- const ROOT = resolve(__dirname, "..", "..");
28
- const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
29
- const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
30
- const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
31
- const BASELINES_DIR = join(ROOT, "results", "baselines");
32
- // ---------------------------------------------------------------------------
33
- // Pure functions (exported for testing)
34
- // ---------------------------------------------------------------------------
35
- /**
36
- * Format a readiness report as markdown.
37
- *
38
- * Pure function — takes a structured report and returns a markdown string.
39
- */
40
- export function formatReadinessMarkdown(report) {
41
- const lines = [];
42
- const areaLabel = formatAreaLabel(report.area);
43
- // Header
44
- const statusEmoji = report.pass ? "✅" : "❌";
45
- const statusLabel = report.pass ? "READY" : "NOT READY";
46
- lines.push(`## 🚀 Launch Readiness: ${areaLabel}`);
47
- lines.push("");
48
- lines.push(`**Overall:** ${statusEmoji} ${statusLabel} (${fmt(report.score)}/100, threshold: ${report.threshold})`);
49
- lines.push("");
50
- // Dimension Checklist
51
- lines.push("### Dimension Checklist");
52
- lines.push("");
53
- lines.push("| Dimension | Score | Threshold | Status |");
54
- lines.push("|---|---|---|---|");
55
- for (const dim of report.dimensions) {
56
- const status = dim.pass ? "✅ Meets threshold" : "❌ Below threshold";
57
- lines.push(`| ${dim.dimension} | ${fmt(dim.score)} | ${fmt(dim.threshold)} | ${status} |`);
58
- }
59
- lines.push("");
60
- // Ceiling Analysis
61
- lines.push("### Ceiling Analysis");
62
- lines.push("");
63
- lines.push("| Metric | Value | Assessment |");
64
- lines.push("|---|---|---|");
65
- const ceilingAssessment = report.ceiling.ceilingScore >= 60
66
- ? "✅ Docs enable reasonable performance"
67
- : "⚠️ Below 60 — docs need improvement";
68
- lines.push(`| Ceiling Score | ${fmt(report.ceiling.ceilingScore)} | ${ceilingAssessment} |`);
69
- const floorAssessment = report.ceiling.floorScore >= 30
70
- ? "Model has moderate baseline knowledge"
71
- : "Model has limited baseline knowledge";
72
- lines.push(`| Floor Score | ${fmt(report.ceiling.floorScore)} | ${floorAssessment} |`);
73
- const liftSign = report.ceiling.docLift >= 0 ? "+" : "";
74
- const liftAssessment = report.ceiling.docLift < 0
75
- ? "❌ Docs are hurting performance"
76
- : report.ceiling.docLift >= 10
77
- ? "✅ Docs add significant value"
78
- : "⚠️ Docs add minimal value";
79
- lines.push(`| Doc Lift | ${liftSign}${fmt(report.ceiling.docLift)} | ${liftAssessment} |`);
80
- lines.push(`| Doc Quality Gap | ${fmt(report.ceiling.docQualityGap)} | ${report.ceiling.docQualityGap > 30 ? "Room for improvement via documentation" : "✅ Docs are high quality"} |`);
81
- lines.push("");
82
- // Failing Criteria (only shown when there are violations)
83
- if (report.violations.length > 0) {
84
- lines.push("### Failing Criteria");
85
- for (let i = 0; i < report.violations.length; i++) {
86
- const v = report.violations[i];
87
- lines.push(`${i + 1}. **${v.description}**`);
88
- }
89
- lines.push("");
90
- lines.push("### Recommendation");
91
- const count = report.violations.length;
92
- const itemWord = count === 1 ? "item" : "items";
93
- lines.push(`Fix the ${count} ${itemWord} above and re-evaluate.`);
94
- lines.push("");
95
- }
96
- // Gap Analysis (if available)
97
- if (report.gaps.length > 0) {
98
- lines.push("### Gap Analysis");
99
- lines.push("");
100
- lines.push("| Failure Mode | Est. Lift | Confidence | Remediation |");
101
- lines.push("|---|---|---|---|");
102
- for (const gap of report.gaps) {
103
- const confIcon = gap.confidence === "high"
104
- ? "🟢"
105
- : gap.confidence === "medium"
106
- ? "🟡"
107
- : "🔴";
108
- lines.push(`| ${gap.failureMode} | +${gap.estimatedLift.toFixed(1)} | ${confIcon} ${gap.confidence} | ${gap.remediation} |`);
109
- }
110
- lines.push("");
111
- }
112
- // Historical Progress (if available)
113
- if (report.history.length > 0) {
114
- lines.push("### Historical Progress");
115
- lines.push("");
116
- lines.push("| Date | Score | Tag |");
117
- lines.push("|---|---|---|");
118
- for (const entry of report.history) {
119
- const date = entry.timestamp.slice(0, 10);
120
- const tag = entry.tag ?? "—";
121
- lines.push(`| ${date} | ${fmt(entry.score)} | ${tag} |`);
122
- }
123
- // Show current score as the last row
124
- lines.push(`| ${new Date().toISOString().slice(0, 10)} | ${fmt(report.score)} | *current* |`);
125
- lines.push("");
126
- }
127
- return lines.join("\n");
128
- }
129
- /**
130
- * Generate a structured readiness report for a given feature area.
131
- *
132
- * This is a pure function — it takes all data as parameters and produces
133
- * a structured report. No I/O.
134
- */
135
- export function generateReadinessReport(opts) {
136
- const { area, gapAnalysis, history = [], scoreSummary, thresholdConfig, } = opts;
137
- // Find the area's scores
138
- const areaScore = scoreSummary.scores.find((s) => s.feature === area);
139
- if (!areaScore) {
140
- throw new Error(`Area "${area}" not found in score summary. Available areas: ${scoreSummary.scores.map((s) => s.feature).join(", ")}`);
141
- }
142
- // Evaluate thresholds for the full summary (to get violations)
143
- const thresholdEvaluation = evaluateThresholds(scoreSummary, thresholdConfig);
144
- // Filter violations to only this area
145
- const areaViolations = thresholdEvaluation.violations.filter((v) => v.area === area);
146
- // Resolve per-area thresholds (with defaults)
147
- const areaOverrides = thresholdConfig.areas?.[area];
148
- const compositeThreshold = areaOverrides?.composite ?? thresholdConfig.defaults.composite;
149
- const dimDefaults = thresholdConfig.defaults.dimensions ?? {};
150
- const dimOverrides = areaOverrides?.dimensions ?? {};
151
- // Build dimension checks
152
- const dimensions = [
153
- {
154
- dimension: "Task Completion",
155
- pass: areaScore.taskCompletion >=
156
- (dimOverrides["task-completion"] ??
157
- dimDefaults["task-completion"] ??
158
- 0),
159
- score: areaScore.taskCompletion,
160
- threshold: dimOverrides["task-completion"] ?? dimDefaults["task-completion"] ?? 0,
161
- },
162
- {
163
- dimension: "Code Correctness",
164
- pass: areaScore.codeCorrectness >=
165
- (dimOverrides["code-correctness"] ??
166
- dimDefaults["code-correctness"] ??
167
- 0),
168
- score: areaScore.codeCorrectness,
169
- threshold: dimOverrides["code-correctness"] ??
170
- dimDefaults["code-correctness"] ??
171
- 0,
172
- },
173
- {
174
- dimension: "Doc Coverage",
175
- pass: areaScore.docCoverage >=
176
- (dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0),
177
- score: areaScore.docCoverage,
178
- threshold: dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0,
179
- },
180
- ];
181
- // Filter gap analysis to this area
182
- const areaGaps = gapAnalysis?.gaps.filter((g) => g.area === area) ?? [];
183
- // Ceiling decomposition
184
- const ceiling = {
185
- ceilingScore: areaScore.ceilingScore,
186
- docLift: areaScore.docLift,
187
- docQualityGap: areaScore.docQualityGap,
188
- floorScore: areaScore.floorScore,
189
- };
190
- const pass = areaViolations.length === 0;
191
- return {
192
- area,
193
- ceiling,
194
- dimensions,
195
- gaps: areaGaps,
196
- history,
197
- pass,
198
- score: areaScore.totalScore,
199
- threshold: compositeThreshold,
200
- thresholdEvaluation,
201
- violations: areaViolations,
202
- };
203
- }
204
- // ---------------------------------------------------------------------------
205
- // Formatting helpers (private)
206
- // ---------------------------------------------------------------------------
207
- /** Format a score for display (round to nearest integer) */
208
- function fmt(n) {
209
- return String(Math.round(n));
210
- }
211
- /** Convert kebab-case area name to title case */
212
- function formatAreaLabel(area) {
213
- return area
214
- .split("-")
215
- .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
216
- .join(" ");
217
- }
218
- // ---------------------------------------------------------------------------
219
- // I/O helpers (used by CLI, not exported for testing)
220
- // ---------------------------------------------------------------------------
221
- function loadGapAnalysis(path) {
222
- if (!existsSync(path))
223
- return undefined;
224
- return JSON.parse(readFileSync(path, "utf-8"));
225
- }
226
- function loadHistory(area, baselinesDir) {
227
- if (!existsSync(baselinesDir))
228
- return [];
229
- const files = readdirSync(baselinesDir)
230
- .filter((f) => f.endsWith(".json"))
231
- .sort();
232
- const entries = [];
233
- for (const file of files) {
234
- try {
235
- const raw = readFileSync(join(baselinesDir, file), "utf-8");
236
- const data = JSON.parse(raw);
237
- const areaScore = data.scores?.find((s) => s.feature === area);
238
- if (!areaScore)
239
- continue;
240
- // Extract tag from filename (e.g., "20260304_16_34_45_pre-groq.json")
241
- const nameWithoutExt = file.replace(/\.json$/, "");
242
- const parts = nameWithoutExt.split("_");
243
- // Timestamps are like "20260304_16_34_45" (4 parts), rest is tag
244
- const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
245
- entries.push({
246
- score: areaScore.totalScore,
247
- tag,
248
- timestamp: data.timestamp ?? nameWithoutExt,
249
- });
250
- }
251
- catch {
252
- // Skip malformed baseline files
253
- }
254
- }
255
- return entries;
256
- }
257
- function loadScoreSummary(path) {
258
- if (!existsSync(path)) {
259
- throw new Error(`Score summary not found at ${path}. Run \`pnpm pipeline\` first.`);
260
- }
261
- return JSON.parse(readFileSync(path, "utf-8"));
262
- }
263
- function loadThresholdConfig(path) {
264
- if (!existsSync(path)) {
265
- throw new Error(`Threshold config not found at ${path}.`);
266
- }
267
- const raw = readFileSync(path, "utf-8");
268
- const parsed = load(raw);
269
- const result = ThresholdConfigSchema.safeParse(parsed);
270
- if (!result.success) {
271
- const messages = result.error.issues
272
- .map((i) => ` ${i.path.join(".")}: ${i.message}`)
273
- .join("\n");
274
- throw new Error(`Invalid thresholds.yaml:\n${messages}`);
275
- }
276
- return result.data;
277
- }
278
- // ---------------------------------------------------------------------------
279
- // CLI
280
- // ---------------------------------------------------------------------------
281
- function main() {
282
- const { area, history: includeHistory, output } = parseArgs(process.argv);
283
- // Load data
284
- const scoreSummary = loadScoreSummary(SCORE_SUMMARY_PATH);
285
- const thresholdConfig = loadThresholdConfig(THRESHOLDS_PATH);
286
- const gapAnalysis = loadGapAnalysis(GAP_ANALYSIS_PATH);
287
- const history = includeHistory ? loadHistory(area, BASELINES_DIR) : [];
288
- // Generate report
289
- const report = generateReadinessReport({
290
- area,
291
- gapAnalysis,
292
- history,
293
- scoreSummary,
294
- thresholdConfig,
295
- });
296
- // Format and output
297
- const markdown = formatReadinessMarkdown(report);
298
- if (output) {
299
- writeFileSync(output, markdown, "utf-8");
300
- console.error(`✅ Readiness report written to ${output}`);
301
- }
302
- else {
303
- console.log(markdown);
304
- }
305
- // Exit with non-zero if not ready
306
- if (!report.pass) {
307
- process.exit(1);
308
- }
309
- }
310
- function parseArgs(argv) {
311
- const args = argv.slice(2);
312
- let area;
313
- let history = false;
314
- let output;
315
- for (let i = 0; i < args.length; i++) {
316
- const arg = args[i];
317
- if (arg === "--area" && i + 1 < args.length) {
318
- area = args[++i];
319
- }
320
- else if (arg === "--history") {
321
- history = true;
322
- }
323
- else if (arg === "--output" && i + 1 < args.length) {
324
- output = args[++i];
325
- }
326
- }
327
- if (!area) {
328
- console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
329
- console.error("");
330
- console.error("Options:");
331
- console.error(" --area <area> Feature area to check (required)");
332
- console.error(" --history Include historical progress from baselines");
333
- console.error(" --output <file> Write markdown to file instead of stdout");
334
- process.exit(1);
335
- }
336
- return { area, history, output };
337
- }
338
- // Only run when invoked directly
339
- if (process.argv[1]?.endsWith("readiness-report.ts") ||
340
- process.argv[1]?.endsWith("readiness-report.js")) {
341
- main();
342
- }
@@ -1,15 +0,0 @@
1
- /**
2
- * update-quality-scores.ts
3
- *
4
- * Reads score-summary.json and updates the feature area quality grades
5
- * table in docs/QUALITY_SCORE.md. Designed to run automatically after
6
- * each evaluation as the final pipeline step.
7
- *
8
- * Usage:
9
- * pnpm update-quality-scores
10
- * tsx src/scripts/update-quality-scores.ts
11
- */
12
- export declare function updateQualityScores(): {
13
- success: boolean;
14
- message: string;
15
- };
@@ -1,184 +0,0 @@
1
- /**
2
- * update-quality-scores.ts
3
- *
4
- * Reads score-summary.json and updates the feature area quality grades
5
- * table in docs/QUALITY_SCORE.md. Designed to run automatically after
6
- * each evaluation as the final pipeline step.
7
- *
8
- * Usage:
9
- * pnpm update-quality-scores
10
- * tsx src/scripts/update-quality-scores.ts
11
- */
12
- import { execSync } from "child_process";
13
- import { existsSync, readFileSync, writeFileSync } from "fs";
14
- import { dirname, join, resolve } from "path";
15
- import { fileURLToPath } from "url";
16
- const __dirname = dirname(fileURLToPath(import.meta.url));
17
- const ROOT = resolve(__dirname, "..", "..");
18
- const REPO_ROOT = resolve(ROOT, "..", "..");
19
- const QUALITY_SCORE_PATH = join(REPO_ROOT, "docs", "QUALITY_SCORE.md");
20
- const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
21
- // ---------------------------------------------------------------------------
22
- // Grading
23
- // ---------------------------------------------------------------------------
24
- export function updateQualityScores() {
25
- // Read score summary
26
- if (!existsSync(SCORE_SUMMARY_PATH)) {
27
- return {
28
- message: `Score summary not found at ${SCORE_SUMMARY_PATH}. Run 'pnpm calculate-scores' first.`,
29
- success: false,
30
- };
31
- }
32
- let summary;
33
- try {
34
- const raw = readFileSync(SCORE_SUMMARY_PATH, "utf-8");
35
- const parsed = JSON.parse(raw);
36
- // Normalize legacy field names (liftFromDocs → docLift)
37
- summary = {
38
- ...parsed,
39
- scores: parsed.scores.map((s) => ({
40
- ...s,
41
- docLift: s.docLift ??
42
- s.liftFromDocs ??
43
- 0,
44
- })),
45
- };
46
- }
47
- catch (err) {
48
- return {
49
- message: `Failed to parse score summary: ${err instanceof Error ? err.message : String(err)}`,
50
- success: false,
51
- };
52
- }
53
- if (!summary.scores || summary.scores.length === 0) {
54
- return { message: "Score summary contains no scores.", success: false };
55
- }
56
- // Read QUALITY_SCORE.md
57
- if (!existsSync(QUALITY_SCORE_PATH)) {
58
- return {
59
- message: `QUALITY_SCORE.md not found at ${QUALITY_SCORE_PATH}.`,
60
- success: false,
61
- };
62
- }
63
- let markdown = readFileSync(QUALITY_SCORE_PATH, "utf-8");
64
- // Replace the feature area table
65
- const newTable = generateTable(summary.scores);
66
- const tablePattern = /\| Feature Area\s+\| Score\s+\| Grade\s+\| Doc Lift\s+\| Key gap[^|]*\|\n\| [-\s|]+\|\n(\|[^\n]+\|\n)*/;
67
- const match = tablePattern.exec(markdown);
68
- if (!match) {
69
- return {
70
- message: "Could not find the feature area quality grades table in QUALITY_SCORE.md.",
71
- success: false,
72
- };
73
- }
74
- markdown =
75
- markdown.slice(0, match.index) +
76
- newTable +
77
- "\n" +
78
- markdown.slice(match.index + match[0].length);
79
- // Write back
80
- writeFileSync(QUALITY_SCORE_PATH, markdown);
81
- // Format with Prettier to ensure consistent table formatting
82
- // (emoji widths differ between padEnd and Prettier's table formatter)
83
- try {
84
- execSync("npx prettier --write " + QUALITY_SCORE_PATH, {
85
- cwd: REPO_ROOT,
86
- stdio: "pipe",
87
- });
88
- }
89
- catch {
90
- // Non-fatal — formatting is nice-to-have
91
- }
92
- return {
93
- message: `Updated ${summary.scores.length} feature area scores in QUALITY_SCORE.md (avg: ${Math.round(summary.overall.avgScore)}, lift: +${Math.round(summary.overall.avgDocLift)})`,
94
- success: true,
95
- };
96
- }
97
- function generateTable(scores) {
98
- // Sort by score descending
99
- const sorted = [...scores].sort((a, b) => b.totalScore - a.totalScore);
100
- // Build rows with data
101
- const rows = sorted.map((s) => ({
102
- feature: s.feature,
103
- gap: keyGap(s, scores),
104
- grade: grade(s.totalScore),
105
- lift: "+" + s.docLift,
106
- score: String(s.totalScore),
107
- }));
108
- // Calculate column widths from data (minimum widths from headers)
109
- const cols = {
110
- feature: Math.max(14, ...rows.map((r) => r.feature.length)),
111
- gap: Math.max(7, ...rows.map((r) => r.gap.length)),
112
- grade: 5,
113
- lift: 8,
114
- score: 5,
115
- };
116
- const fmtRow = (r) => `| ${r.feature.padEnd(cols.feature)} | ${r.score.padEnd(cols.score)} | ${r.grade.padEnd(cols.grade)} | ${r.lift.padEnd(cols.lift)} | ${r.gap.padEnd(cols.gap)} |`;
117
- const header = fmtRow({
118
- feature: "Feature Area",
119
- gap: "Key gap",
120
- grade: "Grade",
121
- lift: "Doc Lift",
122
- score: "Score",
123
- });
124
- const sep = `| ${"-".repeat(cols.feature)} | ${"-".repeat(cols.score)} | ${"-".repeat(cols.grade)} | ${"-".repeat(cols.lift)} | ${"-".repeat(cols.gap)} |`;
125
- return [header, sep, ...rows.map(fmtRow)].join("\n");
126
- }
127
- // ---------------------------------------------------------------------------
128
- // Table generation
129
- // ---------------------------------------------------------------------------
130
- function grade(score) {
131
- if (score >= 80)
132
- return "✅ A";
133
- if (score >= 60)
134
- return "🟡 B";
135
- if (score >= 40)
136
- return "🟠 C";
137
- return "🔴 D";
138
- }
139
- // ---------------------------------------------------------------------------
140
- // File update
141
- // ---------------------------------------------------------------------------
142
- function keyGap(s, allScores) {
143
- // Below critical threshold
144
- if (s.totalScore < 40) {
145
- return "⚠️ Below critical — all dimensions underperform";
146
- }
147
- // Find the weakest dimension relative to max possible (all 0–100)
148
- const dims = [
149
- { max: 100, name: "task completion", score: s.taskCompletion },
150
- { max: 100, name: "code correctness", score: s.codeCorrectness },
151
- { max: 100, name: "doc coverage", score: s.docCoverage },
152
- ];
153
- // Sort by ratio (lowest first)
154
- dims.sort((a, b) => a.score / a.max - b.score / b.max);
155
- const weakest = dims[0];
156
- // Check for notable strengths
157
- const maxLift = Math.max(...allScores.map((sc) => sc.docLift));
158
- const maxScore = Math.max(...allScores.map((sc) => sc.totalScore));
159
- if (s.totalScore === maxScore) {
160
- return `Strong — highest score; ${weakest.name} (${weakest.score}/${weakest.max})`;
161
- }
162
- if (s.docLift === maxLift) {
163
- return `Highest doc lift; ${weakest.name} (${weakest.score}/${weakest.max})`;
164
- }
165
- if (weakest.score === 0) {
166
- return `Zero ${weakest.name} score; lowest doc lift`;
167
- }
168
- return `${weakest.name[0].toUpperCase() + weakest.name.slice(1)} (${weakest.score}/${weakest.max}) holds back total score`;
169
- }
170
- // ---------------------------------------------------------------------------
171
- // Main (when run directly)
172
- // ---------------------------------------------------------------------------
173
- if (process.argv[1]?.endsWith("update-quality-scores.ts") ||
174
- process.argv[1]?.endsWith("update-quality-scores.js")) {
175
- console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
176
- const result = updateQualityScores();
177
- if (result.success) {
178
- console.log(` ✅ ${result.message}`);
179
- }
180
- else {
181
- console.error(` ❌ ${result.message}`);
182
- process.exit(1);
183
- }
184
- }
@@ -1,21 +0,0 @@
1
- #!/usr/bin/env tsx
2
- /**
3
- * Validation script: Compare YamlTaskSource vs ContentLakeTaskSource
4
- *
5
- * Loads tasks from both sources and compares them field-by-field to verify
6
- * that the Content Lake migration produced identical LiteracyTaskDefinition[] output.
7
- *
8
- * This is Phase 3b of the tasks-as-content exec plan — parallel validation
9
- * before deleting YAML files.
10
- *
11
- * Usage:
12
- * cd packages/eval
13
- * npx tsx src/scripts/validate-task-sources.ts
14
- *
15
- * Prerequisites:
16
- * - Migration script has been run (ailf.task documents exist in CL)
17
- * - SANITY_API_TOKEN configured for Content Lake reads
18
- *
19
- * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
20
- */
21
- export {};