npm - @sanity/ailf - Versions diffs - 0.1.0 → 0.1.1 - Mend

@sanity/ailf 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (125) hide show

package/dist/_vendor/ailf-core/examples/index.d.ts +6 -4
package/dist/_vendor/ailf-core/examples/index.js +9 -4
package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
package/dist/adapters/task-sources/repo-schemas.d.ts +12 -2
package/dist/adapters/task-sources/repo-schemas.js +28 -2
package/dist/cli.js +0 -0
package/dist/commands/init.js +39 -5
package/dist/commands/pipeline-action.js +44 -6
package/dist/commands/publish.js +2 -1
package/dist/commands/validate-tasks.js +4 -1
package/dist/composition-root.js +9 -5
package/dist/orchestration/build-app-context.js +2 -0
package/package.json +1 -1
package/dist/commands/update-quality-scores.d.ts +0 -5
package/dist/commands/update-quality-scores.js +0 -20
package/dist/lib/agent-behavior-report.d.ts +0 -8
package/dist/lib/agent-behavior-report.js +0 -185
package/dist/lib/baseline.d.ts +0 -19
package/dist/lib/baseline.js +0 -153
package/dist/lib/calculate-scores.d.ts +0 -23
package/dist/lib/calculate-scores.js +0 -42
package/dist/lib/compare.d.ts +0 -18
package/dist/lib/compare.js +0 -170
package/dist/lib/coverage-audit.d.ts +0 -4
package/dist/lib/coverage-audit.js +0 -42
package/dist/lib/discovery-report.d.ts +0 -13
package/dist/lib/discovery-report.js +0 -57
package/dist/lib/fetch-docs.d.ts +0 -30
package/dist/lib/fetch-docs.js +0 -171
package/dist/lib/generate-configs.d.ts +0 -25
package/dist/lib/generate-configs.js +0 -42
package/dist/lib/grader-api.d.ts +0 -21
package/dist/lib/grader-api.js +0 -34
package/dist/lib/grader-compare.d.ts +0 -19
package/dist/lib/grader-compare.js +0 -91
package/dist/lib/grader-consistency.d.ts +0 -27
package/dist/lib/grader-consistency.js +0 -79
package/dist/lib/grader-sensitivity.d.ts +0 -19
package/dist/lib/grader-sensitivity.js +0 -75
package/dist/lib/grader-validate.d.ts +0 -19
package/dist/lib/grader-validate.js +0 -78
package/dist/lib/measure-retrieval.d.ts +0 -14
package/dist/lib/measure-retrieval.js +0 -71
package/dist/lib/pr-comment.d.ts +0 -16
package/dist/lib/pr-comment.js +0 -28
package/dist/lib/readiness-report.d.ts +0 -13
package/dist/lib/readiness-report.js +0 -108
package/dist/lib/webhook-server.d.ts +0 -11
package/dist/lib/webhook-server.js +0 -24
package/dist/lib/weekly-digest.d.ts +0 -24
package/dist/lib/weekly-digest.js +0 -148
package/dist/orchestration/env-bridge.d.ts +0 -21
package/dist/orchestration/env-bridge.js +0 -66
package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
package/dist/pipeline/steps/calculate-scores-step.js +0 -89
package/dist/pipeline/steps/compare-step.d.ts +0 -18
package/dist/pipeline/steps/compare-step.js +0 -90
package/dist/pipeline/steps/eval-step.d.ts +0 -53
package/dist/pipeline/steps/eval-step.js +0 -347
package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
package/dist/pipeline/steps/fetch-docs-step.js +0 -84
package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
package/dist/pipeline/steps/generate-configs-step.js +0 -98
package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
package/dist/pipeline/steps/grader-consistency-step.js +0 -74
package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
package/dist/pipeline/steps/publish-report-step.js +0 -243
package/dist/pipeline/steps/report-step.d.ts +0 -13
package/dist/pipeline/steps/report-step.js +0 -56
package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
package/dist/pipeline/steps/update-scores-step.js +0 -42
package/dist/scripts/agent-behavior-report.d.ts +0 -19
package/dist/scripts/agent-behavior-report.js +0 -315
package/dist/scripts/baseline.d.ts +0 -43
package/dist/scripts/baseline.js +0 -267
package/dist/scripts/calculate-scores.d.ts +0 -166
package/dist/scripts/calculate-scores.js +0 -1296
package/dist/scripts/compare.d.ts +0 -22
package/dist/scripts/compare.js +0 -334
package/dist/scripts/coverage-audit.d.ts +0 -44
package/dist/scripts/coverage-audit.js +0 -209
package/dist/scripts/debug-eval.d.ts +0 -19
package/dist/scripts/debug-eval.js +0 -73
package/dist/scripts/discovery-report.d.ts +0 -58
package/dist/scripts/discovery-report.js +0 -250
package/dist/scripts/fetch-docs.d.ts +0 -35
package/dist/scripts/fetch-docs.js +0 -472
package/dist/scripts/generate-configs.d.ts +0 -66
package/dist/scripts/generate-configs.js +0 -459
package/dist/scripts/grader-api.d.ts +0 -27
package/dist/scripts/grader-api.js +0 -206
package/dist/scripts/grader-compare.d.ts +0 -22
package/dist/scripts/grader-compare.js +0 -368
package/dist/scripts/grader-consistency.d.ts +0 -20
package/dist/scripts/grader-consistency.js +0 -313
package/dist/scripts/grader-sensitivity.d.ts +0 -22
package/dist/scripts/grader-sensitivity.js +0 -354
package/dist/scripts/grader-validate.d.ts +0 -19
package/dist/scripts/grader-validate.js +0 -267
package/dist/scripts/measure-retrieval.d.ts +0 -10
package/dist/scripts/measure-retrieval.js +0 -145
package/dist/scripts/pipeline.d.ts +0 -76
package/dist/scripts/pipeline.js +0 -1031
package/dist/scripts/pr-comment.d.ts +0 -10
package/dist/scripts/pr-comment.js +0 -510
package/dist/scripts/readiness-report.d.ts +0 -88
package/dist/scripts/readiness-report.js +0 -342
package/dist/scripts/update-quality-scores.d.ts +0 -15
package/dist/scripts/update-quality-scores.js +0 -184
package/dist/scripts/validate.d.ts +0 -13
package/dist/scripts/validate.js +0 -79
package/dist/scripts/webhook-server.d.ts +0 -26
package/dist/scripts/webhook-server.js +0 -147
package/dist/scripts/weekly-digest.d.ts +0 -24
package/dist/scripts/weekly-digest.js +0 -144
package/dist/sinks/format-slack.d.ts +0 -64
package/dist/sinks/format-slack.js +0 -306
package/dist/sinks/slack-sink.d.ts +0 -27
package/dist/sinks/slack-sink.js +0 -78
package/dist/sinks/webhook-sink.d.ts +0 -19
package/dist/sinks/webhook-sink.js +0 -50
package/tasks/.expanded.agentic.yaml +0 -51
package/tasks/.expanded.yaml +0 -66

package/dist/lib/agent-behavior-report.js DELETED Viewed

@@ -1,185 +0,0 @@
-/**
- * lib/agent-behavior-report.ts — DEPRECATED re-export shim.
- * @deprecated Import from ../pipeline/agent-behavior-report.js instead.
- */
-// oxlint-disable-next-line import/no-unassigned-import -- side-effect: loads .env into process.env
-import "dotenv/config";
-import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
-import { dirname, join } from "path";
-export { analyzeResults, CANONICAL_DOC_MAP, detectFeatureArea, } from "../pipeline/agent-behavior-report.js";
-import { analyzeResults, } from "../pipeline/agent-behavior-report.js";
-export function main(resultsPathArg) {
-    const ROOT = join(dirname(new URL(import.meta.url).pathname), "..", "..");
-    const resultsPath = resultsPathArg ??
-        process.argv[2] ??
-        join(ROOT, "results", "latest", "eval-results.json");
-    if (!existsSync(resultsPath)) {
-        console.error(`Results file not found: ${resultsPath}`);
-        console.error("Run an evaluation first: pnpm eval:observed");
-        process.exit(1);
-    }
-    console.log(`Reading results from: ${resultsPath}`);
-    console.log();
-    const json = JSON.parse(readFileSync(resultsPath, "utf-8"));
-    const rawResults = Array.isArray(json.results)
-        ? json.results
-        : json.results.results;
-    const analysis = analyzeResults(rawResults);
-    if (!analysis.hasData) {
-        console.log("No agent behavior data found in the results.");
-        console.log("Make sure you ran the evaluation with the observed config:");
-        console.log("  pnpm eval:observed");
-        process.exit(0);
-    }
-    printReport(analysis);
-    // Persist detailed report as JSON
-    const outDir = join(ROOT, "results", "latest");
-    mkdirSync(outDir, { recursive: true });
-    const reportData = {
-        features: analysis.features.map((f) => ({
-            avgDocPages: f.avgDocPages,
-            avgNetworkMs: f.avgNetworkMs,
-            avgSearches: f.avgSearches,
-            canonicalCoverage: f.canonicalCoverage,
-            canonicalSlugs: f.canonicalSlugs,
-            docSlugsVisited: f.allDocSlugs,
-            externalDomains: f.allExternalDomains,
-            feature: f.feature,
-            searchQueries: f.allSearchQueries,
-            taskCount: f.tasks.length,
-        })),
-        tasks: analysis.tasks.map((t) => ({
-            behavior: t.behavior,
-            description: t.description,
-            feature: t.feature,
-            hasDocs: t.hasDocs,
-        })),
-        timestamp: new Date().toISOString(),
-        totalTasks: analysis.tasks.length,
-    };
-    writeFileSync(join(outDir, "agent-behavior-report.json"), JSON.stringify(reportData, null, 2));
-    console.log("Agent behavior report written to results/latest/agent-behavior-report.json");
-}
-// ---------------------------------------------------------------------------
-// Report output (kept in shim for backward compat)
-// ---------------------------------------------------------------------------
-function printReport(analysis) {
-    console.log("=".repeat(80));
-    console.log("            AGENT BEHAVIOR OBSERVATION REPORT");
-    console.log("=".repeat(80));
-    console.log();
-    // ---- Overview table ----
-    console.log("OVERVIEW BY FEATURE AREA");
-    console.log("-".repeat(80));
-    const h = "| Feature Area        | Tasks | Avg Docs | Avg Search | Avg Net(ms) | Canon% |";
-    const sep = "|---------------------|-------|----------|------------|-------------|--------|";
-    console.log(h);
-    console.log(sep);
-    for (const f of analysis.features) {
-        console.log(`| ${f.feature.padEnd(19)} | ` +
-            `${f.tasks.length.toString().padStart(5)} | ` +
-            `${f.avgDocPages.toFixed(1).padStart(8)} | ` +
-            `${f.avgSearches.toFixed(1).padStart(10)} | ` +
-            `${Math.round(f.avgNetworkMs).toString().padStart(11)} | ` +
-            `${(f.canonicalCoverage * 100).toFixed(0).padStart(5)}% |`);
-    }
-    console.log();
-    // ---- Canonical coverage breakdown ----
-    console.log("CANONICAL DOCUMENTATION COVERAGE");
-    console.log("-".repeat(80));
-    console.log();
-    for (const f of analysis.features) {
-        console.log(`  ${f.feature} (${(f.canonicalCoverage * 100).toFixed(0)}% canonical coverage):`);
-        if (f.canonicalSlugs.length === 0) {
-            console.log("    (no canonical docs defined)");
-        }
-        else {
-            for (const slug of f.canonicalSlugs) {
-                const found = f.allDocSlugs.some((visited) => visited.includes(slug));
-                const marker = found ? "[x]" : "[ ]";
-                console.log(`    ${marker} ${slug}`);
-            }
-        }
-        if (f.allDocSlugs.length > 0) {
-            const nonCanonical = f.allDocSlugs.filter((slug) => !f.canonicalSlugs.some((c) => slug.includes(c)));
-            if (nonCanonical.length > 0) {
-                console.log("    Additional docs visited:");
-                for (const slug of nonCanonical) {
-                    console.log(`      + ${slug}`);
-                }
-            }
-        }
-        console.log();
-    }
-    // ---- Search strategy ----
-    const allSearches = analysis.features.flatMap((f) => f.allSearchQueries);
-    if (allSearches.length > 0) {
-        console.log("SEARCH STRATEGY");
-        console.log("-".repeat(80));
-        console.log();
-        for (const f of analysis.features) {
-            if (f.allSearchQueries.length === 0)
-                continue;
-            console.log(`  ${f.feature}:`);
-            for (const q of f.allSearchQueries) {
-                console.log(`    -> "${q}"`);
-            }
-        }
-        console.log();
-    }
-    // ---- Per-task detail ----
-    console.log("PER-TASK DETAIL");
-    console.log("-".repeat(80));
-    console.log();
-    for (const f of analysis.features) {
-        console.log(`  ## ${f.feature}`);
-        console.log();
-        for (const t of f.tasks) {
-            const variant = t.hasDocs ? "[gold]" : "[baseline]";
-            console.log(`  ${variant} ${t.description}`);
-            console.log(`    Requests: ${t.behavior.totalRequests} | ` +
-                `Doc pages: ${t.behavior.docPagesVisited} | ` +
-                `Searches: ${t.behavior.searchesPerformed} | ` +
-                `External: ${t.behavior.externalRequestCount}`);
-            if (t.behavior.docSlugsVisited.length > 0) {
-                console.log(`    Docs: ${t.behavior.docSlugsVisited.join(", ")}`);
-            }
-            if (t.behavior.uniqueSearchQueries.length > 0) {
-                console.log(`    Queries: ${t.behavior.uniqueSearchQueries.map((q) => `"${q}"`).join(", ")}`);
-            }
-            console.log();
-        }
-    }
-    // ---- External domains ----
-    const allDomains = [
-        ...new Set(analysis.features.flatMap((f) => f.allExternalDomains)),
-    ];
-    if (allDomains.length > 0) {
-        console.log("EXTERNAL DOMAINS");
-        console.log("-".repeat(80));
-        console.log();
-        for (const d of allDomains) {
-            console.log(`  - ${d}`);
-        }
-        console.log();
-    }
-    // ---- Summary stats ----
-    console.log("OVERALL STATISTICS");
-    console.log("-".repeat(80));
-    console.log();
-    const totalTasks = analysis.tasks.length;
-    const tasksUsingDocs = analysis.tasks.filter((t) => t.behavior.usedDocs).length;
-    const tasksUsingSearch = analysis.tasks.filter((t) => t.behavior.usedSearch).length;
-    const avgCanonical = analysis.features.reduce((s, f) => s + f.canonicalCoverage, 0) /
-        (analysis.features.length || 1);
-    console.log(`  Total tasks observed:    ${totalTasks}`);
-    console.log(`  Tasks that used docs:    ${tasksUsingDocs}/${totalTasks} (${((tasksUsingDocs / totalTasks) * 100).toFixed(0)}%)`);
-    console.log(`  Tasks that used search:  ${tasksUsingSearch}/${totalTasks} (${((tasksUsingSearch / totalTasks) * 100).toFixed(0)}%)`);
-    console.log(`  Avg canonical coverage:  ${(avgCanonical * 100).toFixed(1)}%`);
-    console.log();
-}
-// Only run when invoked directly (not when imported)
-if (process.argv[1]?.endsWith("agent-behavior-report.ts") ||
-    process.argv[1]?.endsWith("agent-behavior-report.js")) {
-    main();
-}

package/dist/lib/baseline.d.ts DELETED Viewed

@@ -1,19 +0,0 @@
-/**
- * lib/baseline.ts — DEPRECATED re-export shim.
- *
- * The real implementation has moved to pipeline/baseline.ts.
- * This shim preserves backward compatibility for:
- *   - Direct CLI invocation: `tsx src/lib/baseline.ts`
- *   - Test imports that haven't been updated yet
- *
- * TODO: Update all importers to use pipeline/baseline.ts, then delete this file.
- *
- * @deprecated Import from ../pipeline/baseline.js instead.
- */
-export type { BaselineMetadata, CompareResult, ScoreComparison, } from "../pipeline/baseline.js";
-export declare function saveBaseline(tag?: string): {
-    success: boolean;
-    message: string;
-};
-export declare function compareBaseline(baselineFile?: string): import("./baseline.js").CompareResult;
-export declare function listBaselines(): import("./baseline.js").BaselineMetadata[];

package/dist/lib/baseline.js DELETED Viewed

@@ -1,153 +0,0 @@
-/**
- * lib/baseline.ts — DEPRECATED re-export shim.
- *
- * The real implementation has moved to pipeline/baseline.ts.
- * This shim preserves backward compatibility for:
- *   - Direct CLI invocation: `tsx src/lib/baseline.ts`
- *   - Test imports that haven't been updated yet
- *
- * TODO: Update all importers to use pipeline/baseline.ts, then delete this file.
- *
- * @deprecated Import from ../pipeline/baseline.js instead.
- */
-import { dirname, resolve } from "path";
-import { fileURLToPath } from "url";
-import { saveBaseline as _saveBaseline, compareBaseline as _compareBaseline, listBaselines as _listBaselines, } from "../pipeline/baseline.js";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = resolve(__dirname, "../..");
-export function saveBaseline(tag) {
-    return _saveBaseline(ROOT, tag);
-}
-export function compareBaseline(baselineFile) {
-    return _compareBaseline(ROOT, baselineFile);
-}
-export function listBaselines() {
-    return _listBaselines(ROOT);
-}
-// ---------------------------------------------------------------------------
-// CLI
-// ---------------------------------------------------------------------------
-if (process.argv[1]?.endsWith("baseline.ts") ||
-    process.argv[1]?.endsWith("baseline.js")) {
-    const args = process.argv.slice(2);
-    const command = args[0] || "save";
-    function getArg(name) {
-        const idx = args.indexOf(`--${name}`);
-        return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
-    }
-    switch (command) {
-        case "compare": {
-            const file = getArg("file");
-            console.log("=== Baseline Comparison ===\n");
-            const result = compareBaseline(file);
-            if (!result.success) {
-                console.error(`  ❌ ${result.message}`);
-                process.exit(1);
-            }
-            console.log(`  ${result.message}\n`);
-            console.log("  " +
-                "Feature Area".padEnd(18) +
-                "Current".padEnd(10) +
-                "Baseline".padEnd(10) +
-                "Delta");
-            console.log("  " + "-".repeat(50));
-            for (const c of result.comparisons) {
-                const deltaStr = c.delta > 0 ? `+${c.delta}` : c.delta === 0 ? "=" : String(c.delta);
-                const icon = c.delta > 0 ? "📈" : c.delta < 0 ? "📉" : "➡️";
-                console.log("  " +
-                    c.feature.padEnd(18) +
-                    String(c.current).padEnd(10) +
-                    String(c.baseline).padEnd(10) +
-                    `${icon} ${deltaStr}`);
-            }
-            // Cost comparison (only if cost data exists)
-            const hasCostData = result.comparisons.some((c) => c.costCurrent !== undefined || c.costBaseline !== undefined);
-            if (hasCostData) {
-                console.log();
-                console.log("  Cost Comparison:");
-                console.log("  " +
-                    "Feature Area".padEnd(18) +
-                    "Current".padEnd(10) +
-                    "Baseline".padEnd(10) +
-                    "Delta");
-                console.log("  " + "-".repeat(50));
-                for (const c of result.comparisons) {
-                    if (c.costCurrent === undefined && c.costBaseline === undefined) {
-                        continue;
-                    }
-                    const cur = `$${(c.costCurrent ?? 0).toFixed(4)}`;
-                    const base = `$${(c.costBaseline ?? 0).toFixed(4)}`;
-                    const delta = c.costDelta ?? 0;
-                    const deltaStr = delta > 0
-                        ? `+$${delta.toFixed(4)}`
-                        : delta < 0
-                            ? `-$${Math.abs(delta).toFixed(4)}`
-                            : "=";
-                    const icon = delta > 0 ? "📈" : delta < 0 ? "📉" : "➡️";
-                    console.log("  " +
-                        c.feature.padEnd(18) +
-                        cur.padEnd(10) +
-                        base.padEnd(10) +
-                        `${icon} ${deltaStr}`);
-                }
-            }
-            console.log();
-            const overallIcon = result.overallDelta > 0 ? "📈" : result.overallDelta < 0 ? "📉" : "➡️";
-            const overallStr = result.overallDelta > 0
-                ? `+${result.overallDelta}`
-                : result.overallDelta === 0
-                    ? "="
-                    : String(result.overallDelta);
-            console.log(`  Overall: ${overallIcon} ${overallStr} points`);
-            break;
-        }
-        case "history": {
-            console.log("=== Baseline History ===\n");
-            const baselines = listBaselines();
-            if (baselines.length === 0) {
-                console.log("  No baselines saved yet.");
-            }
-            else {
-                const hasCosts = baselines.some((b) => b.totalCost !== undefined || b.graderCost !== undefined);
-                const costHeader = hasCosts ? "Cost".padEnd(10) : "";
-                console.log("  " +
-                    "Date".padEnd(22) +
-                    "Avg".padEnd(6) +
-                    "Areas".padEnd(7) +
-                    costHeader +
-                    "Tag");
-                console.log("  " + "-".repeat(hasCosts ? 60 : 50));
-                for (const b of baselines) {
-                    const date = new Date(b.timestamp).toLocaleString();
-                    const combinedCost = (b.totalCost ?? 0) + (b.graderCost ?? 0);
-                    const costStr = hasCosts
-                        ? (combinedCost > 0 ? `$${combinedCost.toFixed(2)}` : "-").padEnd(10)
-                        : "";
-                    console.log("  " +
-                        date.padEnd(22) +
-                        String(b.avgScore).padEnd(6) +
-                        String(b.areaCount).padEnd(7) +
-                        costStr +
-                        (b.tag ?? ""));
-                }
-            }
-            break;
-        }
-        case "save": {
-            const tag = getArg("tag");
-            console.log("=== Saving baseline snapshot ===\n");
-            const result = saveBaseline(tag);
-            if (result.success) {
-                console.log(`  ✅ ${result.message}`);
-            }
-            else {
-                console.error(`  ❌ ${result.message}`);
-                process.exit(1);
-            }
-            break;
-        }
-        default:
-            console.error(`Unknown command: "${command}". Use: save, history, compare`);
-            process.exit(1);
-    }
-}

package/dist/lib/calculate-scores.d.ts DELETED Viewed

@@ -1,23 +0,0 @@
-/**
- * lib/calculate-scores.ts — DEPRECATED re-export shim.
- *
- * The real implementation has moved to pipeline/calculate-scores.ts.
- * This shim preserves backward compatibility for:
- *   - Direct CLI invocation: `tsx src/lib/calculate-scores.ts`
- *   - Test imports that haven't been updated yet
- *
- * TODO: Update all importers to use pipeline/calculate-scores.ts, then delete this file.
- *
- * @deprecated Import from ../pipeline/calculate-scores.js instead.
- */
-export { calculateAndWriteScores, calculateScoresPerModel, extractGraderJudgments, scoreAgenticResults, type CalculateScoresOptions, type PromptfooResultsWrapper, type RawPromptfooFile, type RawTestResult, } from "../pipeline/calculate-scores.js";
-export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, type ActualScoreEntry, type ComponentResult, type TestResult, type UrlMetadata, } from "../_vendor/ailf-core/index.d.ts";
-import type { CalculateScoresOptions } from "../pipeline/calculate-scores.js";
-/**
- * Legacy main() entry point — wraps calculateAndWriteScores() with env var fallbacks.
- *
- * @deprecated Use calculateAndWriteScores() from pipeline/calculate-scores.ts instead.
- */
-export declare function main(options?: Omit<CalculateScoresOptions, "rootDir"> & {
-    rootDir?: string;
-}): void;

package/dist/lib/calculate-scores.js DELETED Viewed

@@ -1,42 +0,0 @@
-/**
- * lib/calculate-scores.ts — DEPRECATED re-export shim.
- *
- * The real implementation has moved to pipeline/calculate-scores.ts.
- * This shim preserves backward compatibility for:
- *   - Direct CLI invocation: `tsx src/lib/calculate-scores.ts`
- *   - Test imports that haven't been updated yet
- *
- * TODO: Update all importers to use pipeline/calculate-scores.ts, then delete this file.
- *
- * @deprecated Import from ../pipeline/calculate-scores.js instead.
- */
-import { dirname, join } from "path";
-import { fileURLToPath } from "url";
-// Re-export everything from the real implementation
-export { calculateAndWriteScores, calculateScoresPerModel, extractGraderJudgments, scoreAgenticResults, } from "../pipeline/calculate-scores.js";
-// Re-export core types/functions for backward compatibility
-export { classifyRubric, detectFeatureArea, extractUrlMetadata, mergeScores, parseRubricScore, } from "../_vendor/ailf-core/index.js";
-import { calculateAndWriteScores } from "../pipeline/calculate-scores.js";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = join(__dirname, "..", "..");
-/**
- * Legacy main() entry point — wraps calculateAndWriteScores() with env var fallbacks.
- *
- * @deprecated Use calculateAndWriteScores() from pipeline/calculate-scores.ts instead.
- */
-export function main(options) {
-    calculateAndWriteScores({
-        rootDir: options?.rootDir ?? ROOT,
-        allowedOrigins: options?.allowedOrigins,
-        mode: options?.mode ?? process.env.EVAL_MODE ?? "baseline",
-        resolvedSource: options?.resolvedSource,
-        resultsPath: options?.resultsPath,
-        searchMode: options?.searchMode,
-        source: options?.source,
-    });
-}
-// Only run when invoked directly (not when imported for testing)
-if (process.argv[1]?.endsWith("calculate-scores.ts") ||
-    process.argv[1]?.endsWith("calculate-scores.js")) {
-    main();
-}

package/dist/lib/compare.d.ts DELETED Viewed

@@ -1,18 +0,0 @@
-/**
- * compare.ts
- *
- * CLI for structured comparison between two evaluation runs.
- *
- * Usage:
- *   pnpm compare                                # compare current vs latest baseline
- *   pnpm compare --baseline <path>              # compare current vs specific file
- *   pnpm compare --baseline <path> --experiment <path>  # compare two specific files
- *   pnpm compare --threshold 5                  # custom noise threshold
- *   pnpm compare --output /tmp/comparison.json  # write JSON report to file
- *   pnpm compare --format json                  # output raw JSON (default: table)
- *
- * Reads: results/latest/score-summary.json (as experiment, unless --experiment)
- * Reads: results/baselines/<latest>.json (as baseline, unless --baseline)
- */
-export { formatComparisonMarkdown, formatComparisonTable, } from "../_vendor/ailf-core/index.d.ts";
-export declare function main(): void;

package/dist/lib/compare.js DELETED Viewed

@@ -1,170 +0,0 @@
-/**
- * compare.ts
- *
- * CLI for structured comparison between two evaluation runs.
- *
- * Usage:
- *   pnpm compare                                # compare current vs latest baseline
- *   pnpm compare --baseline <path>              # compare current vs specific file
- *   pnpm compare --baseline <path> --experiment <path>  # compare two specific files
- *   pnpm compare --threshold 5                  # custom noise threshold
- *   pnpm compare --output /tmp/comparison.json  # write JSON report to file
- *   pnpm compare --format json                  # output raw JSON (default: table)
- *
- * Reads: results/latest/score-summary.json (as experiment, unless --experiment)
- * Reads: results/baselines/<latest>.json (as baseline, unless --baseline)
- */
-import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
-import { dirname, join, resolve } from "path";
-import { fileURLToPath } from "url";
-import { formatComparisonTable, } from "../_vendor/ailf-core/index.js";
-import { compare } from "../pipeline/compare.js";
-import { DEFAULT_NOISE_THRESHOLD, } from "../pipeline/types.js";
-// Re-export pure formatters from core for backward compatibility.
-export { formatComparisonMarkdown, formatComparisonTable, } from "../_vendor/ailf-core/index.js";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = resolve(__dirname, "..", "..");
-const BASELINES_DIR = join(ROOT, "results", "baselines");
-const SCORE_SUMMARY_PATH = join(ROOT, "results", "latest", "score-summary.json");
-// ---------------------------------------------------------------------------
-// CLI argument parsing
-// ---------------------------------------------------------------------------
-const args = process.argv.slice(2);
-function getFlag(name) {
-    return args.includes(`--${name}`);
-}
-function getOption(name) {
-    const idx = args.indexOf(`--${name}`);
-    return idx !== -1 && idx + 1 < args.length ? args[idx + 1] : undefined;
-}
-const baselinePath = getOption("baseline");
-const experimentPath = getOption("experiment");
-const thresholdStr = getOption("threshold");
-const threshold = thresholdStr
-    ? parseFloat(thresholdStr)
-    : DEFAULT_NOISE_THRESHOLD;
-const outputPath = getOption("output");
-const format = getOption("format") ?? "table";
-const showHelp = getFlag("help") || getFlag("h");
-if (showHelp) {
-    console.log(`
-Usage: pnpm compare [options]
-Compare two evaluation score summaries and produce structured deltas.
-Options:
-  --baseline <path>     Baseline score-summary.json (default: latest baseline)
-  --experiment <path>   Experiment score-summary.json (default: results/latest/score-summary.json)
-  --threshold <n>       Noise threshold for unchanged classification (default: ${DEFAULT_NOISE_THRESHOLD})
-  --output <path>       Write JSON report to file
-  --format <fmt>        Output format: table (default) or json
-  --help, -h            Show this help
-Examples:
-  pnpm compare                           # current scores vs latest baseline
-  pnpm compare --threshold 5             # wider noise band
-  pnpm compare --format json             # machine-readable output
-  pnpm compare --baseline results/baselines/20260310_02_43_44.json
-  pnpm compare --baseline before.json --experiment after.json
-`);
-    process.exit(0);
-}
-// ---------------------------------------------------------------------------
-// File loading helpers
-// ---------------------------------------------------------------------------
-/**
- * formatComparisonMarkdown — re-exported from @sanity/ailf-core above.
- * formatComparisonTable — re-exported from @sanity/ailf-core above.
- */
-function findLatestBaseline() {
-    if (!existsSync(BASELINES_DIR))
-        return null;
-    const files = readdirSync(BASELINES_DIR)
-        .filter((f) => f.endsWith(".json"))
-        .sort()
-        .reverse();
-    return files.length > 0 ? join(BASELINES_DIR, files[0]) : null;
-}
-function loadSummary(path) {
-    if (!existsSync(path)) {
-        console.error(`❌ File not found: ${path}`);
-        process.exit(1);
-    }
-    const raw = readFileSync(path, "utf-8");
-    return JSON.parse(raw);
-}
-// ---------------------------------------------------------------------------
-// Main
-// ---------------------------------------------------------------------------
-export function main() {
-    // Resolve experiment path
-    const expPath = experimentPath ?? SCORE_SUMMARY_PATH;
-    const experiment = loadSummary(expPath);
-    // Resolve baseline path
-    let basePath;
-    if (baselinePath) {
-        basePath = resolve(baselinePath);
-    }
-    else {
-        const latest = findLatestBaseline();
-        if (!latest) {
-            console.error("❌ No baselines found. Run 'pnpm baseline:save' first, or use --baseline <path>.");
-            process.exit(1);
-        }
-        basePath = latest;
-    }
-    const baseline = loadSummary(basePath);
-    // Try to load grader consistency data for empirical thresholds
-    const consistencyPath = join(ROOT, "results", "latest", "grader-consistency.json");
-    let graderConsistency;
-    if (existsSync(consistencyPath) && !thresholdStr) {
-        try {
-            const consistencyRaw = JSON.parse(readFileSync(consistencyPath, "utf-8"));
-            if (consistencyRaw.recommendedThreshold && consistencyRaw.perDimension) {
-                graderConsistency =
-                    consistencyRaw;
-                console.log(`  📊 Using empirical noise threshold: ±${graderConsistency.recommendedThreshold.toFixed(1)} (from grader consistency data)`);
-            }
-        }
-        catch {
-            // Non-fatal — fall back to default threshold
-        }
-    }
-    console.log(`  Baseline:   ${basePath}`);
-    console.log(`  Experiment: ${expPath}`);
-    if (!graderConsistency) {
-        console.log(`  Threshold:  ±${threshold} (default — run --grader-replications for empirical threshold)`);
-    }
-    console.log("");
-    const report = compare(baseline, experiment, {
-        graderConsistency,
-        noiseThreshold: threshold,
-    });
-    if (format === "json") {
-        const json = JSON.stringify(report, null, 2);
-        if (outputPath) {
-            writeFileSync(outputPath, json);
-            console.log(`  ✅ Comparison report written to ${outputPath}`);
-        }
-        else {
-            console.log(json);
-        }
-    }
-    else {
-        const table = formatComparisonTable(report);
-        console.log(table);
-        if (outputPath) {
-            const json = JSON.stringify(report, null, 2);
-            writeFileSync(outputPath, json);
-            console.log(`  ✅ Comparison report also written to ${outputPath}`);
-        }
-    }
-    // Write comparison report to results/latest for other steps to consume
-    const latestComparisonPath = join(ROOT, "results", "latest", "comparison-report.json");
-    writeFileSync(latestComparisonPath, JSON.stringify(report, null, 2));
-}
-// Only run when invoked directly
-if (process.argv[1]?.endsWith("compare.ts") ||
-    process.argv[1]?.endsWith("compare.js")) {
-    main();
-}

package/dist/lib/coverage-audit.d.ts DELETED Viewed

@@ -1,4 +0,0 @@
-export { countReferencedDocs, countTasksByArea, formatCoverageConsole, formatCoverageMarkdown, loadFeatureRegistry, runCoverageAudit, } from "../pipeline/coverage-audit.js";
-export type { CoverageAuditReport, ProductFeature } from "../pipeline/types.js";
-/** @deprecated Use pipeline functions directly with explicit rootDir */
-export declare function main(): void;

package/dist/lib/coverage-audit.js DELETED Viewed

@@ -1,42 +0,0 @@
-/**
- * lib/coverage-audit.ts — DEPRECATED re-export shim.
- * @deprecated Import from ../pipeline/coverage-audit.js instead.
- */
-import { dirname, resolve } from "path";
-import { fileURLToPath } from "url";
-export { countReferencedDocs, countTasksByArea, formatCoverageConsole, formatCoverageMarkdown, loadFeatureRegistry, runCoverageAudit, } from "../pipeline/coverage-audit.js";
-import { countReferencedDocs, formatCoverageConsole, formatCoverageMarkdown, runCoverageAudit, } from "../pipeline/coverage-audit.js";
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const ROOT = resolve(__dirname, "../..");
-/** @deprecated Use pipeline functions directly with explicit rootDir */
-export function main() {
-    const args = process.argv.slice(2);
-    const formatArg = args.includes("--format")
-        ? args[args.indexOf("--format") + 1]
-        : undefined;
-    const jsonOutput = args.includes("--json");
-    const report = runCoverageAudit(ROOT);
-    if (!report) {
-        console.error("❌ Coverage audit failed.");
-        process.exit(1);
-    }
-    if (jsonOutput) {
-        console.log(JSON.stringify(report, null, 2));
-    }
-    else if (formatArg === "md" || formatArg === "markdown") {
-        console.log(formatCoverageMarkdown(report));
-    }
-    else {
-        console.log(formatCoverageConsole(report));
-    }
-    if (!jsonOutput && formatArg !== "md") {
-        const docStats = countReferencedDocs(ROOT);
-        console.log("DOCUMENT UTILIZATION:");
-        console.log(`  ${docStats.total} unique document slugs referenced across evaluation tasks`);
-        console.log("");
-    }
-}
-if (process.argv[1]?.endsWith("coverage-audit.ts") ||
-    process.argv[1]?.endsWith("coverage-audit.js")) {
-    main();
-}