npm - @sanity/ailf - Versions diffs - 2.0.2 → 2.1.0 - Mend

@sanity/ailf 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

package/LICENSE +21 -0
package/dist/cli.js +0 -0
package/package.json +24 -24
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.d.ts +0 -10
package/dist/_vendor/ailf-core/__tests__/comparison-formatters.test.js +0 -185
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.d.ts +0 -6
package/dist/_vendor/ailf-core/artifact-capture/__tests__/noop-collector.test.js +0 -42
package/dist/_vendor/ailf-tasks/cli.d.ts +0 -8
package/dist/_vendor/ailf-tasks/cli.js +0 -61
package/dist/_vendor/ailf-tasks/index.d.ts +0 -13
package/dist/_vendor/ailf-tasks/index.js +0 -16
package/dist/_vendor/ailf-tasks/parser.d.ts +0 -27
package/dist/_vendor/ailf-tasks/parser.js +0 -73
package/dist/_vendor/ailf-tasks/schemas.d.ts +0 -198
package/dist/_vendor/ailf-tasks/schemas.js +0 -180
package/dist/_vendor/ailf-tasks/validation.d.ts +0 -47
package/dist/_vendor/ailf-tasks/validation.js +0 -162
package/dist/adapters/task-sources/yaml-task-source.d.ts +0 -18
package/dist/adapters/task-sources/yaml-task-source.js +0 -139
package/dist/agent-observer/test-imports.d.ts +0 -7
package/dist/agent-observer/test-imports.js +0 -185
package/dist/commands/update-quality-scores.d.ts +0 -5
package/dist/commands/update-quality-scores.js +0 -20
package/dist/lib/agent-behavior-report.d.ts +0 -8
package/dist/lib/agent-behavior-report.js +0 -185
package/dist/lib/baseline.d.ts +0 -19
package/dist/lib/baseline.js +0 -153
package/dist/lib/calculate-scores.d.ts +0 -23
package/dist/lib/calculate-scores.js +0 -42
package/dist/lib/compare.d.ts +0 -18
package/dist/lib/compare.js +0 -170
package/dist/lib/coverage-audit.d.ts +0 -4
package/dist/lib/coverage-audit.js +0 -42
package/dist/lib/discovery-report.d.ts +0 -13
package/dist/lib/discovery-report.js +0 -57
package/dist/lib/fetch-docs.d.ts +0 -30
package/dist/lib/fetch-docs.js +0 -171
package/dist/lib/generate-configs.d.ts +0 -25
package/dist/lib/generate-configs.js +0 -42
package/dist/lib/grader-api.d.ts +0 -21
package/dist/lib/grader-api.js +0 -34
package/dist/lib/grader-compare.d.ts +0 -19
package/dist/lib/grader-compare.js +0 -91
package/dist/lib/grader-consistency.d.ts +0 -27
package/dist/lib/grader-consistency.js +0 -79
package/dist/lib/grader-sensitivity.d.ts +0 -19
package/dist/lib/grader-sensitivity.js +0 -75
package/dist/lib/grader-validate.d.ts +0 -19
package/dist/lib/grader-validate.js +0 -78
package/dist/lib/measure-retrieval.d.ts +0 -14
package/dist/lib/measure-retrieval.js +0 -71
package/dist/lib/pr-comment.d.ts +0 -16
package/dist/lib/pr-comment.js +0 -28
package/dist/lib/readiness-report.d.ts +0 -13
package/dist/lib/readiness-report.js +0 -108
package/dist/lib/webhook-server.d.ts +0 -11
package/dist/lib/webhook-server.js +0 -24
package/dist/lib/weekly-digest.d.ts +0 -24
package/dist/lib/weekly-digest.js +0 -148
package/dist/orchestration/env-bridge.d.ts +0 -21
package/dist/orchestration/env-bridge.js +0 -66
package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
package/dist/pipeline/compiler/__tests__/task-bridge.test.d.ts +0 -9
package/dist/pipeline/compiler/__tests__/task-bridge.test.js +0 -339
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.d.ts +0 -70
package/dist/pipeline/compiler/mode-handlers/agent-harness-handler.js +0 -485
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.d.ts +0 -76
package/dist/pipeline/compiler/mode-handlers/knowledge-probe-handler.js +0 -245
package/dist/pipeline/compiler/mode-handlers/literacy-handler.d.ts +0 -89
package/dist/pipeline/compiler/mode-handlers/literacy-handler.js +0 -379
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.d.ts +0 -50
package/dist/pipeline/compiler/mode-handlers/mcp-assertions.js +0 -334
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.d.ts +0 -69
package/dist/pipeline/compiler/mode-handlers/mcp-server-handler.js +0 -307
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.d.ts +0 -65
package/dist/pipeline/compiler/mode-handlers/mcp-tool-provider.js +0 -368
package/dist/pipeline/compiler/task-bridge.d.ts +0 -41
package/dist/pipeline/compiler/task-bridge.js +0 -92
package/dist/pipeline/expand-tasks.d.ts +0 -232
package/dist/pipeline/expand-tasks.js +0 -467
package/dist/pipeline/generate-configs.d.ts +0 -92
package/dist/pipeline/generate-configs.js +0 -445
package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
package/dist/pipeline/steps/calculate-scores-step.js +0 -89
package/dist/pipeline/steps/compare-step.d.ts +0 -18
package/dist/pipeline/steps/compare-step.js +0 -90
package/dist/pipeline/steps/eval-step.d.ts +0 -53
package/dist/pipeline/steps/eval-step.js +0 -347
package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
package/dist/pipeline/steps/fetch-docs-step.js +0 -84
package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
package/dist/pipeline/steps/generate-configs-step.js +0 -98
package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
package/dist/pipeline/steps/grader-consistency-step.js +0 -74
package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
package/dist/pipeline/steps/publish-report-step.js +0 -243
package/dist/pipeline/steps/report-step.d.ts +0 -13
package/dist/pipeline/steps/report-step.js +0 -56
package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
package/dist/pipeline/steps/update-scores-step.js +0 -42
package/dist/scripts/agent-behavior-report.d.ts +0 -19
package/dist/scripts/agent-behavior-report.js +0 -315
package/dist/scripts/baseline.d.ts +0 -43
package/dist/scripts/baseline.js +0 -267
package/dist/scripts/calculate-scores.d.ts +0 -166
package/dist/scripts/calculate-scores.js +0 -1296
package/dist/scripts/compare.d.ts +0 -22
package/dist/scripts/compare.js +0 -334
package/dist/scripts/coverage-audit.d.ts +0 -44
package/dist/scripts/coverage-audit.js +0 -209
package/dist/scripts/debug-eval.d.ts +0 -19
package/dist/scripts/debug-eval.js +0 -73
package/dist/scripts/discovery-report.d.ts +0 -58
package/dist/scripts/discovery-report.js +0 -250
package/dist/scripts/fetch-docs.d.ts +0 -35
package/dist/scripts/fetch-docs.js +0 -472
package/dist/scripts/generate-configs.d.ts +0 -66
package/dist/scripts/generate-configs.js +0 -459
package/dist/scripts/grader-api.d.ts +0 -27
package/dist/scripts/grader-api.js +0 -206
package/dist/scripts/grader-compare.d.ts +0 -22
package/dist/scripts/grader-compare.js +0 -368
package/dist/scripts/grader-consistency.d.ts +0 -20
package/dist/scripts/grader-consistency.js +0 -313
package/dist/scripts/grader-sensitivity.d.ts +0 -22
package/dist/scripts/grader-sensitivity.js +0 -354
package/dist/scripts/grader-validate.d.ts +0 -19
package/dist/scripts/grader-validate.js +0 -267
package/dist/scripts/measure-retrieval.d.ts +0 -10
package/dist/scripts/measure-retrieval.js +0 -145
package/dist/scripts/migrate-tasks-to-content-lake.d.ts +0 -24
package/dist/scripts/migrate-tasks-to-content-lake.js +0 -328
package/dist/scripts/pipeline.d.ts +0 -76
package/dist/scripts/pipeline.js +0 -1031
package/dist/scripts/pr-comment.d.ts +0 -10
package/dist/scripts/pr-comment.js +0 -510
package/dist/scripts/readiness-report.d.ts +0 -88
package/dist/scripts/readiness-report.js +0 -342
package/dist/scripts/update-quality-scores.d.ts +0 -15
package/dist/scripts/update-quality-scores.js +0 -184
package/dist/scripts/validate-task-sources.d.ts +0 -21
package/dist/scripts/validate-task-sources.js +0 -210
package/dist/scripts/validate.d.ts +0 -13
package/dist/scripts/validate.js +0 -79
package/dist/scripts/webhook-server.d.ts +0 -26
package/dist/scripts/webhook-server.js +0 -147
package/dist/scripts/weekly-digest.d.ts +0 -24
package/dist/scripts/weekly-digest.js +0 -144
package/dist/sinks/format-slack.d.ts +0 -64
package/dist/sinks/format-slack.js +0 -306
package/dist/sinks/slack-sink.d.ts +0 -27
package/dist/sinks/slack-sink.js +0 -78
package/dist/sinks/webhook-sink.d.ts +0 -19
package/dist/sinks/webhook-sink.js +0 -50
package/tasks/.expanded.agentic.yaml +0 -280
package/tasks/.expanded.yaml +0 -565

package/dist/scripts/readiness-report.js DELETED Viewed

@@ -1,342 +0,0 @@
-/**
- * readiness-report.ts
- *
- * Launch readiness report generator — Phase 5b of the Scenario Matrix
- * implementation. Combines threshold evaluation, ceiling decomposition,
- * and gap analysis into a single actionable readiness checklist for a
- * given feature area.
- *
- * Usage:
- *   pnpm readiness-report --area visual-editing
- *   pnpm readiness-report --area groq --history
- *   pnpm readiness-report --area groq --output readiness.md
- *
- * Exports pure functions for unit testing:
- *   - generateReadinessReport()  — builds the structured report
- *   - formatReadinessMarkdown()  — renders the report as markdown
- *
- * @see docs/exec-plans/completed/scenario-matrix-implementation/phase-5-readiness-thresholds.md
- */
-import { existsSync, readFileSync, readdirSync, writeFileSync } from "node:fs";
-import { dirname, join, resolve } from "node:path";
-import { fileURLToPath } from "node:url";
-import { load } from "js-yaml";
-import { ThresholdConfigSchema, } from "../pipeline/schemas.js";
-import { evaluateThresholds } from "../pipeline/thresholds.js";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = resolve(__dirname, "..", "..");
-const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
-const GAP_ANALYSIS_PATH = join(ROOT, "results", "latest", "gap-analysis.json");
-const THRESHOLDS_PATH = join(ROOT, "config", "thresholds.yaml");
-const BASELINES_DIR = join(ROOT, "results", "baselines");
-// ---------------------------------------------------------------------------
-// Pure functions (exported for testing)
-// ---------------------------------------------------------------------------
-/**
- * Format a readiness report as markdown.
- *
- * Pure function — takes a structured report and returns a markdown string.
- */
-export function formatReadinessMarkdown(report) {
-    const lines = [];
-    const areaLabel = formatAreaLabel(report.area);
-    // Header
-    const statusEmoji = report.pass ? "✅" : "❌";
-    const statusLabel = report.pass ? "READY" : "NOT READY";
-    lines.push(`## 🚀 Launch Readiness: ${areaLabel}`);
-    lines.push("");
-    lines.push(`**Overall:** ${statusEmoji} ${statusLabel} (${fmt(report.score)}/100, threshold: ${report.threshold})`);
-    lines.push("");
-    // Dimension Checklist
-    lines.push("### Dimension Checklist");
-    lines.push("");
-    lines.push("| Dimension | Score | Threshold | Status |");
-    lines.push("|---|---|---|---|");
-    for (const dim of report.dimensions) {
-        const status = dim.pass ? "✅ Meets threshold" : "❌ Below threshold";
-        lines.push(`| ${dim.dimension} | ${fmt(dim.score)} | ${fmt(dim.threshold)} | ${status} |`);
-    }
-    lines.push("");
-    // Ceiling Analysis
-    lines.push("### Ceiling Analysis");
-    lines.push("");
-    lines.push("| Metric | Value | Assessment |");
-    lines.push("|---|---|---|");
-    const ceilingAssessment = report.ceiling.ceilingScore >= 60
-        ? "✅ Docs enable reasonable performance"
-        : "⚠️ Below 60 — docs need improvement";
-    lines.push(`| Ceiling Score | ${fmt(report.ceiling.ceilingScore)} | ${ceilingAssessment} |`);
-    const floorAssessment = report.ceiling.floorScore >= 30
-        ? "Model has moderate baseline knowledge"
-        : "Model has limited baseline knowledge";
-    lines.push(`| Floor Score | ${fmt(report.ceiling.floorScore)} | ${floorAssessment} |`);
-    const liftSign = report.ceiling.docLift >= 0 ? "+" : "";
-    const liftAssessment = report.ceiling.docLift < 0
-        ? "❌ Docs are hurting performance"
-        : report.ceiling.docLift >= 10
-            ? "✅ Docs add significant value"
-            : "⚠️ Docs add minimal value";
-    lines.push(`| Doc Lift | ${liftSign}${fmt(report.ceiling.docLift)} | ${liftAssessment} |`);
-    lines.push(`| Doc Quality Gap | ${fmt(report.ceiling.docQualityGap)} | ${report.ceiling.docQualityGap > 30 ? "Room for improvement via documentation" : "✅ Docs are high quality"} |`);
-    lines.push("");
-    // Failing Criteria (only shown when there are violations)
-    if (report.violations.length > 0) {
-        lines.push("### Failing Criteria");
-        for (let i = 0; i < report.violations.length; i++) {
-            const v = report.violations[i];
-            lines.push(`${i + 1}. **${v.description}**`);
-        }
-        lines.push("");
-        lines.push("### Recommendation");
-        const count = report.violations.length;
-        const itemWord = count === 1 ? "item" : "items";
-        lines.push(`Fix the ${count} ${itemWord} above and re-evaluate.`);
-        lines.push("");
-    }
-    // Gap Analysis (if available)
-    if (report.gaps.length > 0) {
-        lines.push("### Gap Analysis");
-        lines.push("");
-        lines.push("| Failure Mode | Est. Lift | Confidence | Remediation |");
-        lines.push("|---|---|---|---|");
-        for (const gap of report.gaps) {
-            const confIcon = gap.confidence === "high"
-                ? "🟢"
-                : gap.confidence === "medium"
-                    ? "🟡"
-                    : "🔴";
-            lines.push(`| ${gap.failureMode} | +${gap.estimatedLift.toFixed(1)} | ${confIcon} ${gap.confidence} | ${gap.remediation} |`);
-        }
-        lines.push("");
-    }
-    // Historical Progress (if available)
-    if (report.history.length > 0) {
-        lines.push("### Historical Progress");
-        lines.push("");
-        lines.push("| Date | Score | Tag |");
-        lines.push("|---|---|---|");
-        for (const entry of report.history) {
-            const date = entry.timestamp.slice(0, 10);
-            const tag = entry.tag ?? "—";
-            lines.push(`| ${date} | ${fmt(entry.score)} | ${tag} |`);
-        }
-        // Show current score as the last row
-        lines.push(`| ${new Date().toISOString().slice(0, 10)} | ${fmt(report.score)} | *current* |`);
-        lines.push("");
-    }
-    return lines.join("\n");
-}
-/**
- * Generate a structured readiness report for a given feature area.
- *
- * This is a pure function — it takes all data as parameters and produces
- * a structured report. No I/O.
- */
-export function generateReadinessReport(opts) {
-    const { area, gapAnalysis, history = [], scoreSummary, thresholdConfig, } = opts;
-    // Find the area's scores
-    const areaScore = scoreSummary.scores.find((s) => s.feature === area);
-    if (!areaScore) {
-        throw new Error(`Area "${area}" not found in score summary. Available areas: ${scoreSummary.scores.map((s) => s.feature).join(", ")}`);
-    }
-    // Evaluate thresholds for the full summary (to get violations)
-    const thresholdEvaluation = evaluateThresholds(scoreSummary, thresholdConfig);
-    // Filter violations to only this area
-    const areaViolations = thresholdEvaluation.violations.filter((v) => v.area === area);
-    // Resolve per-area thresholds (with defaults)
-    const areaOverrides = thresholdConfig.areas?.[area];
-    const compositeThreshold = areaOverrides?.composite ?? thresholdConfig.defaults.composite;
-    const dimDefaults = thresholdConfig.defaults.dimensions ?? {};
-    const dimOverrides = areaOverrides?.dimensions ?? {};
-    // Build dimension checks
-    const dimensions = [
-        {
-            dimension: "Task Completion",
-            pass: areaScore.taskCompletion >=
-                (dimOverrides["task-completion"] ??
-                    dimDefaults["task-completion"] ??
-                    0),
-            score: areaScore.taskCompletion,
-            threshold: dimOverrides["task-completion"] ?? dimDefaults["task-completion"] ?? 0,
-        },
-        {
-            dimension: "Code Correctness",
-            pass: areaScore.codeCorrectness >=
-                (dimOverrides["code-correctness"] ??
-                    dimDefaults["code-correctness"] ??
-                    0),
-            score: areaScore.codeCorrectness,
-            threshold: dimOverrides["code-correctness"] ??
-                dimDefaults["code-correctness"] ??
-                0,
-        },
-        {
-            dimension: "Doc Coverage",
-            pass: areaScore.docCoverage >=
-                (dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0),
-            score: areaScore.docCoverage,
-            threshold: dimOverrides["doc-coverage"] ?? dimDefaults["doc-coverage"] ?? 0,
-        },
-    ];
-    // Filter gap analysis to this area
-    const areaGaps = gapAnalysis?.gaps.filter((g) => g.area === area) ?? [];
-    // Ceiling decomposition
-    const ceiling = {
-        ceilingScore: areaScore.ceilingScore,
-        docLift: areaScore.docLift,
-        docQualityGap: areaScore.docQualityGap,
-        floorScore: areaScore.floorScore,
-    };
-    const pass = areaViolations.length === 0;
-    return {
-        area,
-        ceiling,
-        dimensions,
-        gaps: areaGaps,
-        history,
-        pass,
-        score: areaScore.totalScore,
-        threshold: compositeThreshold,
-        thresholdEvaluation,
-        violations: areaViolations,
-    };
-}
-// ---------------------------------------------------------------------------
-// Formatting helpers (private)
-// ---------------------------------------------------------------------------
-/** Format a score for display (round to nearest integer) */
-function fmt(n) {
-    return String(Math.round(n));
-}
-/** Convert kebab-case area name to title case */
-function formatAreaLabel(area) {
-    return area
-        .split("-")
-        .map((w) => w.charAt(0).toUpperCase() + w.slice(1))
-        .join(" ");
-}
-// ---------------------------------------------------------------------------
-// I/O helpers (used by CLI, not exported for testing)
-// ---------------------------------------------------------------------------
-function loadGapAnalysis(path) {
-    if (!existsSync(path))
-        return undefined;
-    return JSON.parse(readFileSync(path, "utf-8"));
-}
-function loadHistory(area, baselinesDir) {
-    if (!existsSync(baselinesDir))
-        return [];
-    const files = readdirSync(baselinesDir)
-        .filter((f) => f.endsWith(".json"))
-        .sort();
-    const entries = [];
-    for (const file of files) {
-        try {
-            const raw = readFileSync(join(baselinesDir, file), "utf-8");
-            const data = JSON.parse(raw);
-            const areaScore = data.scores?.find((s) => s.feature === area);
-            if (!areaScore)
-                continue;
-            // Extract tag from filename (e.g., "20260304_16_34_45_pre-groq.json")
-            const nameWithoutExt = file.replace(/\.json$/, "");
-            const parts = nameWithoutExt.split("_");
-            // Timestamps are like "20260304_16_34_45" (4 parts), rest is tag
-            const tag = parts.length > 4 ? parts.slice(4).join("_") : undefined;
-            entries.push({
-                score: areaScore.totalScore,
-                tag,
-                timestamp: data.timestamp ?? nameWithoutExt,
-            });
-        }
-        catch {
-            // Skip malformed baseline files
-        }
-    }
-    return entries;
-}
-function loadScoreSummary(path) {
-    if (!existsSync(path)) {
-        throw new Error(`Score summary not found at ${path}. Run \`pnpm pipeline\` first.`);
-    }
-    return JSON.parse(readFileSync(path, "utf-8"));
-}
-function loadThresholdConfig(path) {
-    if (!existsSync(path)) {
-        throw new Error(`Threshold config not found at ${path}.`);
-    }
-    const raw = readFileSync(path, "utf-8");
-    const parsed = load(raw);
-    const result = ThresholdConfigSchema.safeParse(parsed);
-    if (!result.success) {
-        const messages = result.error.issues
-            .map((i) => `  ${i.path.join(".")}: ${i.message}`)
-            .join("\n");
-        throw new Error(`Invalid thresholds.yaml:\n${messages}`);
-    }
-    return result.data;
-}
-// ---------------------------------------------------------------------------
-// CLI
-// ---------------------------------------------------------------------------
-function main() {
-    const { area, history: includeHistory, output } = parseArgs(process.argv);
-    // Load data
-    const scoreSummary = loadScoreSummary(SCORE_SUMMARY_PATH);
-    const thresholdConfig = loadThresholdConfig(THRESHOLDS_PATH);
-    const gapAnalysis = loadGapAnalysis(GAP_ANALYSIS_PATH);
-    const history = includeHistory ? loadHistory(area, BASELINES_DIR) : [];
-    // Generate report
-    const report = generateReadinessReport({
-        area,
-        gapAnalysis,
-        history,
-        scoreSummary,
-        thresholdConfig,
-    });
-    // Format and output
-    const markdown = formatReadinessMarkdown(report);
-    if (output) {
-        writeFileSync(output, markdown, "utf-8");
-        console.error(`✅ Readiness report written to ${output}`);
-    }
-    else {
-        console.log(markdown);
-    }
-    // Exit with non-zero if not ready
-    if (!report.pass) {
-        process.exit(1);
-    }
-}
-function parseArgs(argv) {
-    const args = argv.slice(2);
-    let area;
-    let history = false;
-    let output;
-    for (let i = 0; i < args.length; i++) {
-        const arg = args[i];
-        if (arg === "--area" && i + 1 < args.length) {
-            area = args[++i];
-        }
-        else if (arg === "--history") {
-            history = true;
-        }
-        else if (arg === "--output" && i + 1 < args.length) {
-            output = args[++i];
-        }
-    }
-    if (!area) {
-        console.error("Usage: readiness-report --area <area> [--history] [--output <file>]");
-        console.error("");
-        console.error("Options:");
-        console.error("  --area <area>     Feature area to check (required)");
-        console.error("  --history         Include historical progress from baselines");
-        console.error("  --output <file>   Write markdown to file instead of stdout");
-        process.exit(1);
-    }
-    return { area, history, output };
-}
-// Only run when invoked directly
-if (process.argv[1]?.endsWith("readiness-report.ts") ||
-    process.argv[1]?.endsWith("readiness-report.js")) {
-    main();
-}

package/dist/scripts/update-quality-scores.d.ts DELETED Viewed

@@ -1,15 +0,0 @@
-/**
- * update-quality-scores.ts
- *
- * Reads score-summary.json and updates the feature area quality grades
- * table in docs/QUALITY_SCORE.md. Designed to run automatically after
- * each evaluation as the final pipeline step.
- *
- * Usage:
- *   pnpm update-quality-scores
- *   tsx src/scripts/update-quality-scores.ts
- */
-export declare function updateQualityScores(): {
-    success: boolean;
-    message: string;
-};

package/dist/scripts/update-quality-scores.js DELETED Viewed

@@ -1,184 +0,0 @@
-/**
- * update-quality-scores.ts
- *
- * Reads score-summary.json and updates the feature area quality grades
- * table in docs/QUALITY_SCORE.md. Designed to run automatically after
- * each evaluation as the final pipeline step.
- *
- * Usage:
- *   pnpm update-quality-scores
- *   tsx src/scripts/update-quality-scores.ts
- */
-import { execSync } from "child_process";
-import { existsSync, readFileSync, writeFileSync } from "fs";
-import { dirname, join, resolve } from "path";
-import { fileURLToPath } from "url";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = resolve(__dirname, "..", "..");
-const REPO_ROOT = resolve(ROOT, "..", "..");
-const QUALITY_SCORE_PATH = join(REPO_ROOT, "docs", "QUALITY_SCORE.md");
-const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
-// ---------------------------------------------------------------------------
-// Grading
-// ---------------------------------------------------------------------------
-export function updateQualityScores() {
-    // Read score summary
-    if (!existsSync(SCORE_SUMMARY_PATH)) {
-        return {
-            message: `Score summary not found at ${SCORE_SUMMARY_PATH}. Run 'pnpm calculate-scores' first.`,
-            success: false,
-        };
-    }
-    let summary;
-    try {
-        const raw = readFileSync(SCORE_SUMMARY_PATH, "utf-8");
-        const parsed = JSON.parse(raw);
-        // Normalize legacy field names (liftFromDocs → docLift)
-        summary = {
-            ...parsed,
-            scores: parsed.scores.map((s) => ({
-                ...s,
-                docLift: s.docLift ??
-                    s.liftFromDocs ??
-                    0,
-            })),
-        };
-    }
-    catch (err) {
-        return {
-            message: `Failed to parse score summary: ${err instanceof Error ? err.message : String(err)}`,
-            success: false,
-        };
-    }
-    if (!summary.scores || summary.scores.length === 0) {
-        return { message: "Score summary contains no scores.", success: false };
-    }
-    // Read QUALITY_SCORE.md
-    if (!existsSync(QUALITY_SCORE_PATH)) {
-        return {
-            message: `QUALITY_SCORE.md not found at ${QUALITY_SCORE_PATH}.`,
-            success: false,
-        };
-    }
-    let markdown = readFileSync(QUALITY_SCORE_PATH, "utf-8");
-    // Replace the feature area table
-    const newTable = generateTable(summary.scores);
-    const tablePattern = /\| Feature Area\s+\| Score\s+\| Grade\s+\| Doc Lift\s+\| Key gap[^|]*\|\n\| [-\s|]+\|\n(\|[^\n]+\|\n)*/;
-    const match = tablePattern.exec(markdown);
-    if (!match) {
-        return {
-            message: "Could not find the feature area quality grades table in QUALITY_SCORE.md.",
-            success: false,
-        };
-    }
-    markdown =
-        markdown.slice(0, match.index) +
-            newTable +
-            "\n" +
-            markdown.slice(match.index + match[0].length);
-    // Write back
-    writeFileSync(QUALITY_SCORE_PATH, markdown);
-    // Format with Prettier to ensure consistent table formatting
-    // (emoji widths differ between padEnd and Prettier's table formatter)
-    try {
-        execSync("npx prettier --write " + QUALITY_SCORE_PATH, {
-            cwd: REPO_ROOT,
-            stdio: "pipe",
-        });
-    }
-    catch {
-        // Non-fatal — formatting is nice-to-have
-    }
-    return {
-        message: `Updated ${summary.scores.length} feature area scores in QUALITY_SCORE.md (avg: ${Math.round(summary.overall.avgScore)}, lift: +${Math.round(summary.overall.avgDocLift)})`,
-        success: true,
-    };
-}
-function generateTable(scores) {
-    // Sort by score descending
-    const sorted = [...scores].sort((a, b) => b.totalScore - a.totalScore);
-    // Build rows with data
-    const rows = sorted.map((s) => ({
-        feature: s.feature,
-        gap: keyGap(s, scores),
-        grade: grade(s.totalScore),
-        lift: "+" + s.docLift,
-        score: String(s.totalScore),
-    }));
-    // Calculate column widths from data (minimum widths from headers)
-    const cols = {
-        feature: Math.max(14, ...rows.map((r) => r.feature.length)),
-        gap: Math.max(7, ...rows.map((r) => r.gap.length)),
-        grade: 5,
-        lift: 8,
-        score: 5,
-    };
-    const fmtRow = (r) => `| ${r.feature.padEnd(cols.feature)} | ${r.score.padEnd(cols.score)} | ${r.grade.padEnd(cols.grade)} | ${r.lift.padEnd(cols.lift)} | ${r.gap.padEnd(cols.gap)} |`;
-    const header = fmtRow({
-        feature: "Feature Area",
-        gap: "Key gap",
-        grade: "Grade",
-        lift: "Doc Lift",
-        score: "Score",
-    });
-    const sep = `| ${"-".repeat(cols.feature)} | ${"-".repeat(cols.score)} | ${"-".repeat(cols.grade)} | ${"-".repeat(cols.lift)} | ${"-".repeat(cols.gap)} |`;
-    return [header, sep, ...rows.map(fmtRow)].join("\n");
-}
-// ---------------------------------------------------------------------------
-// Table generation
-// ---------------------------------------------------------------------------
-function grade(score) {
-    if (score >= 80)
-        return "✅ A";
-    if (score >= 60)
-        return "🟡 B";
-    if (score >= 40)
-        return "🟠 C";
-    return "🔴 D";
-}
-// ---------------------------------------------------------------------------
-// File update
-// ---------------------------------------------------------------------------
-function keyGap(s, allScores) {
-    // Below critical threshold
-    if (s.totalScore < 40) {
-        return "⚠️ Below critical — all dimensions underperform";
-    }
-    // Find the weakest dimension relative to max possible (all 0–100)
-    const dims = [
-        { max: 100, name: "task completion", score: s.taskCompletion },
-        { max: 100, name: "code correctness", score: s.codeCorrectness },
-        { max: 100, name: "doc coverage", score: s.docCoverage },
-    ];
-    // Sort by ratio (lowest first)
-    dims.sort((a, b) => a.score / a.max - b.score / b.max);
-    const weakest = dims[0];
-    // Check for notable strengths
-    const maxLift = Math.max(...allScores.map((sc) => sc.docLift));
-    const maxScore = Math.max(...allScores.map((sc) => sc.totalScore));
-    if (s.totalScore === maxScore) {
-        return `Strong — highest score; ${weakest.name} (${weakest.score}/${weakest.max})`;
-    }
-    if (s.docLift === maxLift) {
-        return `Highest doc lift; ${weakest.name} (${weakest.score}/${weakest.max})`;
-    }
-    if (weakest.score === 0) {
-        return `Zero ${weakest.name} score; lowest doc lift`;
-    }
-    return `${weakest.name[0].toUpperCase() + weakest.name.slice(1)} (${weakest.score}/${weakest.max}) holds back total score`;
-}
-// ---------------------------------------------------------------------------
-// Main (when run directly)
-// ---------------------------------------------------------------------------
-if (process.argv[1]?.endsWith("update-quality-scores.ts") ||
-    process.argv[1]?.endsWith("update-quality-scores.js")) {
-    console.log("=== Updating QUALITY_SCORE.md from score-summary.json ===\n");
-    const result = updateQualityScores();
-    if (result.success) {
-        console.log(`  ✅ ${result.message}`);
-    }
-    else {
-        console.error(`  ❌ ${result.message}`);
-        process.exit(1);
-    }
-}

package/dist/scripts/validate-task-sources.d.ts DELETED Viewed

@@ -1,21 +0,0 @@
-#!/usr/bin/env tsx
-/**
- * Validation script: Compare YamlTaskSource vs ContentLakeTaskSource
- *
- * Loads tasks from both sources and compares them field-by-field to verify
- * that the Content Lake migration produced identical LiteracyTaskDefinition[] output.
- *
- * This is Phase 3b of the tasks-as-content exec plan — parallel validation
- * before deleting YAML files.
- *
- * Usage:
- *   cd packages/eval
- *   npx tsx src/scripts/validate-task-sources.ts
- *
- * Prerequisites:
- *   - Migration script has been run (ailf.task documents exist in CL)
- *   - SANITY_API_TOKEN configured for Content Lake reads
- *
- * @see docs/archive/exec-plans/tasks-as-content/phase-3-migration.md
- */
-export {};