@sanity/ailf 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/dist/_vendor/ailf-core/examples/index.d.ts +16 -12
  2. package/dist/_vendor/ailf-core/examples/index.js +19 -12
  3. package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
  4. package/dist/adapters/task-sources/repo-schemas.d.ts +12 -2
  5. package/dist/adapters/task-sources/repo-schemas.js +28 -2
  6. package/dist/cli.js +0 -0
  7. package/dist/commands/init.js +17 -5
  8. package/dist/commands/pipeline-action.js +44 -6
  9. package/dist/commands/publish.js +2 -1
  10. package/dist/commands/validate-tasks.js +4 -1
  11. package/dist/composition-root.js +9 -5
  12. package/dist/orchestration/build-app-context.js +2 -0
  13. package/package.json +1 -1
  14. package/dist/commands/update-quality-scores.d.ts +0 -5
  15. package/dist/commands/update-quality-scores.js +0 -20
  16. package/dist/lib/agent-behavior-report.d.ts +0 -8
  17. package/dist/lib/agent-behavior-report.js +0 -185
  18. package/dist/lib/baseline.d.ts +0 -19
  19. package/dist/lib/baseline.js +0 -153
  20. package/dist/lib/calculate-scores.d.ts +0 -23
  21. package/dist/lib/calculate-scores.js +0 -42
  22. package/dist/lib/compare.d.ts +0 -18
  23. package/dist/lib/compare.js +0 -170
  24. package/dist/lib/coverage-audit.d.ts +0 -4
  25. package/dist/lib/coverage-audit.js +0 -42
  26. package/dist/lib/discovery-report.d.ts +0 -13
  27. package/dist/lib/discovery-report.js +0 -57
  28. package/dist/lib/fetch-docs.d.ts +0 -30
  29. package/dist/lib/fetch-docs.js +0 -171
  30. package/dist/lib/generate-configs.d.ts +0 -25
  31. package/dist/lib/generate-configs.js +0 -42
  32. package/dist/lib/grader-api.d.ts +0 -21
  33. package/dist/lib/grader-api.js +0 -34
  34. package/dist/lib/grader-compare.d.ts +0 -19
  35. package/dist/lib/grader-compare.js +0 -91
  36. package/dist/lib/grader-consistency.d.ts +0 -27
  37. package/dist/lib/grader-consistency.js +0 -79
  38. package/dist/lib/grader-sensitivity.d.ts +0 -19
  39. package/dist/lib/grader-sensitivity.js +0 -75
  40. package/dist/lib/grader-validate.d.ts +0 -19
  41. package/dist/lib/grader-validate.js +0 -78
  42. package/dist/lib/measure-retrieval.d.ts +0 -14
  43. package/dist/lib/measure-retrieval.js +0 -71
  44. package/dist/lib/pr-comment.d.ts +0 -16
  45. package/dist/lib/pr-comment.js +0 -28
  46. package/dist/lib/readiness-report.d.ts +0 -13
  47. package/dist/lib/readiness-report.js +0 -108
  48. package/dist/lib/webhook-server.d.ts +0 -11
  49. package/dist/lib/webhook-server.js +0 -24
  50. package/dist/lib/weekly-digest.d.ts +0 -24
  51. package/dist/lib/weekly-digest.js +0 -148
  52. package/dist/orchestration/env-bridge.d.ts +0 -21
  53. package/dist/orchestration/env-bridge.js +0 -66
  54. package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
  55. package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
  56. package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
  57. package/dist/pipeline/steps/calculate-scores-step.js +0 -89
  58. package/dist/pipeline/steps/compare-step.d.ts +0 -18
  59. package/dist/pipeline/steps/compare-step.js +0 -90
  60. package/dist/pipeline/steps/eval-step.d.ts +0 -53
  61. package/dist/pipeline/steps/eval-step.js +0 -347
  62. package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
  63. package/dist/pipeline/steps/fetch-docs-step.js +0 -84
  64. package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
  65. package/dist/pipeline/steps/generate-configs-step.js +0 -98
  66. package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
  67. package/dist/pipeline/steps/grader-consistency-step.js +0 -74
  68. package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
  69. package/dist/pipeline/steps/publish-report-step.js +0 -243
  70. package/dist/pipeline/steps/report-step.d.ts +0 -13
  71. package/dist/pipeline/steps/report-step.js +0 -56
  72. package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
  73. package/dist/pipeline/steps/update-scores-step.js +0 -42
  74. package/dist/scripts/agent-behavior-report.d.ts +0 -19
  75. package/dist/scripts/agent-behavior-report.js +0 -315
  76. package/dist/scripts/baseline.d.ts +0 -43
  77. package/dist/scripts/baseline.js +0 -267
  78. package/dist/scripts/calculate-scores.d.ts +0 -166
  79. package/dist/scripts/calculate-scores.js +0 -1296
  80. package/dist/scripts/compare.d.ts +0 -22
  81. package/dist/scripts/compare.js +0 -334
  82. package/dist/scripts/coverage-audit.d.ts +0 -44
  83. package/dist/scripts/coverage-audit.js +0 -209
  84. package/dist/scripts/debug-eval.d.ts +0 -19
  85. package/dist/scripts/debug-eval.js +0 -73
  86. package/dist/scripts/discovery-report.d.ts +0 -58
  87. package/dist/scripts/discovery-report.js +0 -250
  88. package/dist/scripts/fetch-docs.d.ts +0 -35
  89. package/dist/scripts/fetch-docs.js +0 -472
  90. package/dist/scripts/generate-configs.d.ts +0 -66
  91. package/dist/scripts/generate-configs.js +0 -459
  92. package/dist/scripts/grader-api.d.ts +0 -27
  93. package/dist/scripts/grader-api.js +0 -206
  94. package/dist/scripts/grader-compare.d.ts +0 -22
  95. package/dist/scripts/grader-compare.js +0 -368
  96. package/dist/scripts/grader-consistency.d.ts +0 -20
  97. package/dist/scripts/grader-consistency.js +0 -313
  98. package/dist/scripts/grader-sensitivity.d.ts +0 -22
  99. package/dist/scripts/grader-sensitivity.js +0 -354
  100. package/dist/scripts/grader-validate.d.ts +0 -19
  101. package/dist/scripts/grader-validate.js +0 -267
  102. package/dist/scripts/measure-retrieval.d.ts +0 -10
  103. package/dist/scripts/measure-retrieval.js +0 -145
  104. package/dist/scripts/pipeline.d.ts +0 -76
  105. package/dist/scripts/pipeline.js +0 -1031
  106. package/dist/scripts/pr-comment.d.ts +0 -10
  107. package/dist/scripts/pr-comment.js +0 -510
  108. package/dist/scripts/readiness-report.d.ts +0 -88
  109. package/dist/scripts/readiness-report.js +0 -342
  110. package/dist/scripts/update-quality-scores.d.ts +0 -15
  111. package/dist/scripts/update-quality-scores.js +0 -184
  112. package/dist/scripts/validate.d.ts +0 -13
  113. package/dist/scripts/validate.js +0 -79
  114. package/dist/scripts/webhook-server.d.ts +0 -26
  115. package/dist/scripts/webhook-server.js +0 -147
  116. package/dist/scripts/weekly-digest.d.ts +0 -24
  117. package/dist/scripts/weekly-digest.js +0 -144
  118. package/dist/sinks/format-slack.d.ts +0 -64
  119. package/dist/sinks/format-slack.js +0 -306
  120. package/dist/sinks/slack-sink.d.ts +0 -27
  121. package/dist/sinks/slack-sink.js +0 -78
  122. package/dist/sinks/webhook-sink.d.ts +0 -19
  123. package/dist/sinks/webhook-sink.js +0 -50
  124. package/tasks/.expanded.agentic.yaml +0 -51
  125. package/tasks/.expanded.yaml +0 -66
@@ -1,66 +0,0 @@
1
- /**
2
- * Environment variable bridge — writes ResolvedConfig values to process.env
3
- * so that lib/*.ts modules (which still read process.env) work correctly.
4
- *
5
- * This replaces the former global applyEnvironment() with an explicit
6
- * per-step bridge. Each orchestration step calls this before invoking
7
- * its lib/*.ts main() function.
8
- *
9
- * Phase 9 will eliminate this file entirely by giving lib/*.ts main()
10
- * functions typed option parameters.
11
- *
12
- * @see docs/exec-plans/active/ports-and-adapters/phase-8-delete-legacy-step-layer.md
13
- */
14
- /**
15
- * Bridge ResolvedConfig values to process.env.
16
- *
17
- * Idempotent — safe to call multiple times. Only sets env vars for
18
- * config values that are defined (never deletes or resets).
19
- */
20
- export function bridgeConfigToEnv(config) {
21
- // Mode
22
- process.env.EVAL_MODE = config.mode;
23
- // Search mode
24
- if (config.searchMode !== "open") {
25
- process.env.EVAL_SEARCH_MODE = config.searchMode;
26
- }
27
- // Source
28
- if (config.source) {
29
- process.env.DOC_SOURCE = config.source;
30
- }
31
- // URL-derived overrides
32
- if (config.urls?.[0]) {
33
- process.env.DOC_BASE_URL = config.urls[0];
34
- }
35
- // Sanity overrides
36
- if (config.datasetOverride) {
37
- process.env.SANITY_DATASET = config.datasetOverride;
38
- }
39
- if (config.projectIdOverride) {
40
- process.env.SANITY_PROJECT_ID = config.projectIdOverride;
41
- }
42
- if (config.perspectiveOverride) {
43
- process.env.SANITY_PERSPECTIVE = config.perspectiveOverride;
44
- }
45
- if (config.studioOriginOverride) {
46
- process.env.SANITY_STUDIO_ORIGIN = config.studioOriginOverride;
47
- }
48
- if (config.sanityDocumentArgs?.length) {
49
- process.env.SANITY_DOCUMENT_IDS = config.sanityDocumentArgs.join(",");
50
- }
51
- // Custom headers
52
- if (config.headers) {
53
- process.env.DOC_HEADERS = JSON.stringify(config.headers);
54
- }
55
- // Allowed origins
56
- if (config.allowedOrigins?.length) {
57
- process.env.DOC_ALLOWED_ORIGINS = config.allowedOrigins.join(",");
58
- }
59
- // Scoping filters
60
- if (config.areas) {
61
- process.env.EVAL_FILTER_AREAS = config.areas.join(",");
62
- }
63
- if (config.tasks) {
64
- process.env.EVAL_FILTER_TASKS = config.tasks.join(",");
65
- }
66
- }
@@ -1,17 +0,0 @@
1
- /**
2
- * Shell delegation for the fetch-docs step.
3
- *
4
- * Isolates the execSync call so it can be replaced when the pipeline
5
- * fully migrates to the DocFetcher port.
6
- */
7
- export interface ShellResult {
8
- ok: boolean;
9
- error?: string;
10
- }
11
- /**
12
- * Run `pnpm fetch-docs` via shell.
13
- *
14
- * Returns a result object instead of throwing so the step can
15
- * handle the failure uniformly.
16
- */
17
- export declare function runFetchDocsShell(rootDir: string, source?: string): ShellResult;
@@ -1,30 +0,0 @@
1
- /**
2
- * Shell delegation for the fetch-docs step.
3
- *
4
- * Isolates the execSync call so it can be replaced when the pipeline
5
- * fully migrates to the DocFetcher port.
6
- */
7
- import { execSync } from "child_process";
8
- /**
9
- * Run `pnpm fetch-docs` via shell.
10
- *
11
- * Returns a result object instead of throwing so the step can
12
- * handle the failure uniformly.
13
- */
14
- export function runFetchDocsShell(rootDir, source) {
15
- try {
16
- const sourceArg = source ? ` --source ${source}` : "";
17
- execSync(`pnpm fetch-docs${sourceArg}`, {
18
- cwd: rootDir,
19
- env: process.env,
20
- stdio: "inherit",
21
- });
22
- return { ok: true };
23
- }
24
- catch (err) {
25
- return {
26
- ok: false,
27
- error: err instanceof Error ? err.message : String(err),
28
- };
29
- }
30
- }
@@ -1,11 +0,0 @@
1
- /**
2
- * Pipeline step: Calculate AI Literacy Scores from eval results.
3
- *
4
- * Preconditions: eval-results.json exists and is valid
5
- * Postconditions: score-summary.json exists and is valid
6
- *
7
- * Cache key: eval results JSON file(s)
8
- * Cache outputs: results/latest/score-summary.json
9
- */
10
- import type { EvalMode, StepResult } from "../types.js";
11
- export declare function runCalculateScores(source?: string, mode?: EvalMode, noCache?: boolean): StepResult;
@@ -1,89 +0,0 @@
1
- /**
2
- * Pipeline step: Calculate AI Literacy Scores from eval results.
3
- *
4
- * Preconditions: eval-results.json exists and is valid
5
- * Postconditions: score-summary.json exists and is valid
6
- *
7
- * Cache key: eval results JSON file(s)
8
- * Cache outputs: results/latest/score-summary.json
9
- */
10
- import { execSync } from "child_process";
11
- import { dirname, resolve } from "path";
12
- import { fileURLToPath } from "url";
13
- import { getStepInputPaths, hashFiles, lookupCache, recordCache, } from "../cache.js";
14
- import { checkResultsExist, checkScoreSummaryValid } from "../checks.js";
15
- import { RESULTS_FILES } from "./eval-step.js";
16
- const __dirname = dirname(fileURLToPath(import.meta.url));
17
- const ROOT = resolve(__dirname, "..", "..", "..");
18
- export function runCalculateScores(source, mode = "baseline", noCache = false) {
19
- const start = Date.now();
20
- // For full mode, use the baseline results file as the primary input
21
- // (calculate-scores reads all available results files internally)
22
- const primaryMode = mode === "full" ? "baseline" : mode;
23
- const resultsFile = RESULTS_FILES[primaryMode];
24
- const resultsIssues = checkResultsExist(ROOT, resultsFile);
25
- const resultsErrors = resultsIssues.filter((i) => i.severity === "error");
26
- if (resultsErrors.length > 0) {
27
- return {
28
- durationMs: Date.now() - start,
29
- error: `Results missing: ${resultsErrors.map((e) => e.message).join("; ")}`,
30
- status: "failed",
31
- };
32
- }
33
- // Cache check
34
- if (!noCache) {
35
- const cacheResult = lookupCache(ROOT, "calculate-scores");
36
- if (cacheResult.hit) {
37
- return {
38
- durationMs: Date.now() - start,
39
- status: "success",
40
- summary: `Skipped (cached) — ${cacheResult.entry.summary}`,
41
- };
42
- }
43
- }
44
- // Execute — note: calculate-scores exits 1 when areas are below critical,
45
- // which is expected behavior, not an error
46
- try {
47
- const sourceArg = source ? ` --source ${source}` : "";
48
- const resultsArg = primaryMode !== "baseline" ? ` ${resultsFile}` : "";
49
- execSync(`tsx src/lib/calculate-scores.ts${resultsArg}${sourceArg}`, {
50
- cwd: ROOT,
51
- env: process.env,
52
- stdio: "inherit",
53
- });
54
- }
55
- catch (err) {
56
- const code = err !== null && typeof err === "object" && "status" in err
57
- ? err.status
58
- : 1;
59
- // Exit code 1 means "areas below critical" — that's expected
60
- if (code !== 1) {
61
- return {
62
- durationMs: Date.now() - start,
63
- error: `calculate-scores failed with exit code ${code}`,
64
- status: "failed",
65
- };
66
- }
67
- }
68
- // Postcondition: score summary exists and is valid
69
- const summaryIssues = checkScoreSummaryValid(ROOT);
70
- const summaryErrors = summaryIssues.filter((i) => i.severity === "error");
71
- if (summaryErrors.length > 0) {
72
- return {
73
- durationMs: Date.now() - start,
74
- error: `Postcondition failed: ${summaryErrors.map((e) => e.message).join("; ")}`,
75
- status: "failed",
76
- };
77
- }
78
- const durationMs = Date.now() - start;
79
- const summary = "Scores calculated and summary written";
80
- // Record cache
81
- if (!noCache) {
82
- const inputPaths = getStepInputPaths(ROOT, "calculate-scores");
83
- const inputHash = hashFiles(inputPaths);
84
- recordCache(ROOT, "calculate-scores", inputHash, summary, durationMs, [
85
- "results/latest/score-summary.json",
86
- ]);
87
- }
88
- return { durationMs, status: "success", summary };
89
- }
@@ -1,18 +0,0 @@
1
- /**
2
- * Pipeline step: Compare current scores against a baseline.
3
- *
4
- * Preconditions: score-summary.json exists
5
- * Postconditions: comparison-report.json written to results/latest/
6
- *
7
- * This step is optional — it only runs when --compare is passed
8
- * (or a baseline exists and auto-compare is enabled).
9
- */
10
- import type { CompareOptions, StepResult } from "../types.js";
11
- /**
12
- * Run comparison against a baseline.
13
- *
14
- * @param rootDir Package root directory
15
- * @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
16
- * @param options Compare options (noise threshold, etc.)
17
- */
18
- export declare function runCompare(rootDir: string, baselinePath?: string, options?: CompareOptions): StepResult;
@@ -1,90 +0,0 @@
1
- /**
2
- * Pipeline step: Compare current scores against a baseline.
3
- *
4
- * Preconditions: score-summary.json exists
5
- * Postconditions: comparison-report.json written to results/latest/
6
- *
7
- * This step is optional — it only runs when --compare is passed
8
- * (or a baseline exists and auto-compare is enabled).
9
- */
10
- import { existsSync, readFileSync, readdirSync, writeFileSync } from "fs";
11
- import { join, resolve } from "path";
12
- import { compare } from "../compare.js";
13
- /**
14
- * Run comparison against a baseline.
15
- *
16
- * @param rootDir Package root directory
17
- * @param baselinePath Explicit baseline file path (optional — uses latest if omitted)
18
- * @param options Compare options (noise threshold, etc.)
19
- */
20
- export function runCompare(rootDir, baselinePath, options) {
21
- const start = Date.now();
22
- const scoreSummaryPath = resolve(rootDir, "results", "latest", "score-summary.json");
23
- if (!existsSync(scoreSummaryPath)) {
24
- return {
25
- durationMs: Date.now() - start,
26
- error: "score-summary.json not found. Run calculate-scores first.",
27
- status: "failed",
28
- };
29
- }
30
- // Load experiment (current run)
31
- const experiment = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
32
- // Resolve baseline
33
- let resolvedBaselinePath;
34
- if (baselinePath) {
35
- resolvedBaselinePath = resolve(baselinePath);
36
- }
37
- else {
38
- const baselinesDir = resolve(rootDir, "results", "baselines");
39
- if (!existsSync(baselinesDir)) {
40
- return {
41
- reason: "No baselines directory found. Run 'pnpm baseline:save' first.",
42
- status: "skipped",
43
- };
44
- }
45
- const files = readdirSync(baselinesDir)
46
- .filter((f) => f.endsWith(".json"))
47
- .sort()
48
- .reverse();
49
- if (files.length === 0) {
50
- return {
51
- reason: "No baseline files found. Run 'pnpm baseline:save' first.",
52
- status: "skipped",
53
- };
54
- }
55
- resolvedBaselinePath = join(baselinesDir, files[0]);
56
- }
57
- if (!existsSync(resolvedBaselinePath)) {
58
- return {
59
- durationMs: Date.now() - start,
60
- error: `Baseline file not found: ${resolvedBaselinePath}`,
61
- status: "failed",
62
- };
63
- }
64
- const baseline = JSON.parse(readFileSync(resolvedBaselinePath, "utf-8"));
65
- // Run comparison
66
- const report = compare(baseline, experiment, options);
67
- // Write report
68
- const reportPath = resolve(rootDir, "results", "latest", "comparison-report.json");
69
- writeFileSync(reportPath, JSON.stringify(report, null, 2));
70
- // Build summary
71
- const improved = report.improved.length;
72
- const regressed = report.regressed.length;
73
- const unchanged = report.unchanged.length;
74
- const overallDelta = report.deltas.overall;
75
- const deltaStr = overallDelta > 0
76
- ? `+${Math.round(overallDelta)}`
77
- : String(Math.round(overallDelta));
78
- const parts = [`Overall: ${deltaStr}`];
79
- if (improved > 0)
80
- parts.push(`${improved} improved`);
81
- if (regressed > 0)
82
- parts.push(`${regressed} regressed`);
83
- if (unchanged > 0)
84
- parts.push(`${unchanged} unchanged`);
85
- return {
86
- durationMs: Date.now() - start,
87
- status: "success",
88
- summary: parts.join(", "),
89
- };
90
- }
@@ -1,53 +0,0 @@
1
- /**
2
- * Pipeline step: Run promptfoo evaluation.
3
- *
4
- * Preconditions: config files and context files exist
5
- * Postconditions: eval-results.json exists and is valid
6
- *
7
- * Cache key: promptfooconfig*.yaml + contexts/*.md + tasks/*.yaml +
8
- * canonical contexts + reference solutions + config/models.yaml
9
- * Cache outputs: results/latest/eval-results*.json
10
- *
11
- * Remote cache: When local cache misses and a Sanity token is available,
12
- * the step queries the Content Lake for a report with a matching eval
13
- * fingerprint. On a hit, the cached score-summary.json is written to disk
14
- * and the eval + calculate-scores steps are skipped entirely.
15
- *
16
- * @see docs/design-docs/content-lake-eval-caching.md
17
- */
18
- import type { ConcreteEvalMode, DebugOptions, FilterOptions, StepResult } from "../types.js";
19
- /** Each mode writes eval results to a different file (set in the config's outputPath) */
20
- export declare const RESULTS_FILES: Record<ConcreteEvalMode, string>;
21
- /** Extended step result that carries cache metadata for downstream steps */
22
- export interface EvalStepResult {
23
- /** The computed eval fingerprint (for publishing in provenance) */
24
- evalFingerprint?: string;
25
- /** Whether this result came from a remote cache hit */
26
- remoteCacheHit?: boolean;
27
- /** The step result */
28
- stepResult: StepResult;
29
- }
30
- /** Options for the remote cache (Content Lake fingerprint lookup) */
31
- export interface RemoteCacheOptions {
32
- /** Whether this is a debug run (debug runs don't use remote cache) */
33
- debug?: boolean;
34
- /** Filter options used for fingerprint computation */
35
- filter?: FilterOptions;
36
- /** Grader model identifier from models.yaml */
37
- graderModel: string;
38
- /** Disable remote cache lookup (--no-remote-cache) */
39
- noRemoteCache?: boolean;
40
- /** Sanity API token for reading cached reports */
41
- sanityToken?: string;
42
- }
43
- export declare function buildFilterFlags(debug?: DebugOptions): string;
44
- /**
45
- * Extract the Promptfoo share URL from the eval results JSON.
46
- *
47
- * Promptfoo writes a `shareableUrl` field into the results file when
48
- * `PROMPTFOO_API_KEY` is set. This replaces the previous approach of
49
- * scraping the URL from a captured log file (which required piping
50
- * through `tee` and broke TTY progress reporting).
51
- */
52
- export declare function extractShareUrl(mode: ConcreteEvalMode): string | undefined;
53
- export declare function runEval(mode: ConcreteEvalMode, debug?: DebugOptions, concurrency?: number, noCache?: boolean, remoteCacheOpts?: RemoteCacheOptions): Promise<EvalStepResult>;