@sanity/ailf 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/examples/index.d.ts +16 -12
- package/dist/_vendor/ailf-core/examples/index.js +19 -12
- package/dist/_vendor/ailf-core/ports/context.d.ts +4 -0
- package/dist/adapters/task-sources/repo-schemas.d.ts +12 -2
- package/dist/adapters/task-sources/repo-schemas.js +28 -2
- package/dist/cli.js +0 -0
- package/dist/commands/init.js +17 -5
- package/dist/commands/pipeline-action.js +44 -6
- package/dist/commands/publish.js +2 -1
- package/dist/commands/validate-tasks.js +4 -1
- package/dist/composition-root.js +9 -5
- package/dist/orchestration/build-app-context.js +2 -0
- package/package.json +1 -1
- package/dist/commands/update-quality-scores.d.ts +0 -5
- package/dist/commands/update-quality-scores.js +0 -20
- package/dist/lib/agent-behavior-report.d.ts +0 -8
- package/dist/lib/agent-behavior-report.js +0 -185
- package/dist/lib/baseline.d.ts +0 -19
- package/dist/lib/baseline.js +0 -153
- package/dist/lib/calculate-scores.d.ts +0 -23
- package/dist/lib/calculate-scores.js +0 -42
- package/dist/lib/compare.d.ts +0 -18
- package/dist/lib/compare.js +0 -170
- package/dist/lib/coverage-audit.d.ts +0 -4
- package/dist/lib/coverage-audit.js +0 -42
- package/dist/lib/discovery-report.d.ts +0 -13
- package/dist/lib/discovery-report.js +0 -57
- package/dist/lib/fetch-docs.d.ts +0 -30
- package/dist/lib/fetch-docs.js +0 -171
- package/dist/lib/generate-configs.d.ts +0 -25
- package/dist/lib/generate-configs.js +0 -42
- package/dist/lib/grader-api.d.ts +0 -21
- package/dist/lib/grader-api.js +0 -34
- package/dist/lib/grader-compare.d.ts +0 -19
- package/dist/lib/grader-compare.js +0 -91
- package/dist/lib/grader-consistency.d.ts +0 -27
- package/dist/lib/grader-consistency.js +0 -79
- package/dist/lib/grader-sensitivity.d.ts +0 -19
- package/dist/lib/grader-sensitivity.js +0 -75
- package/dist/lib/grader-validate.d.ts +0 -19
- package/dist/lib/grader-validate.js +0 -78
- package/dist/lib/measure-retrieval.d.ts +0 -14
- package/dist/lib/measure-retrieval.js +0 -71
- package/dist/lib/pr-comment.d.ts +0 -16
- package/dist/lib/pr-comment.js +0 -28
- package/dist/lib/readiness-report.d.ts +0 -13
- package/dist/lib/readiness-report.js +0 -108
- package/dist/lib/webhook-server.d.ts +0 -11
- package/dist/lib/webhook-server.js +0 -24
- package/dist/lib/weekly-digest.d.ts +0 -24
- package/dist/lib/weekly-digest.js +0 -148
- package/dist/orchestration/env-bridge.d.ts +0 -21
- package/dist/orchestration/env-bridge.js +0 -66
- package/dist/orchestration/steps/fetch-docs-shell.d.ts +0 -17
- package/dist/orchestration/steps/fetch-docs-shell.js +0 -30
- package/dist/pipeline/steps/calculate-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/calculate-scores-step.js +0 -89
- package/dist/pipeline/steps/compare-step.d.ts +0 -18
- package/dist/pipeline/steps/compare-step.js +0 -90
- package/dist/pipeline/steps/eval-step.d.ts +0 -53
- package/dist/pipeline/steps/eval-step.js +0 -347
- package/dist/pipeline/steps/fetch-docs-step.d.ts +0 -11
- package/dist/pipeline/steps/fetch-docs-step.js +0 -84
- package/dist/pipeline/steps/generate-configs-step.d.ts +0 -11
- package/dist/pipeline/steps/generate-configs-step.js +0 -98
- package/dist/pipeline/steps/grader-consistency-step.d.ts +0 -21
- package/dist/pipeline/steps/grader-consistency-step.js +0 -74
- package/dist/pipeline/steps/publish-report-step.d.ts +0 -57
- package/dist/pipeline/steps/publish-report-step.js +0 -243
- package/dist/pipeline/steps/report-step.d.ts +0 -13
- package/dist/pipeline/steps/report-step.js +0 -56
- package/dist/pipeline/steps/update-scores-step.d.ts +0 -11
- package/dist/pipeline/steps/update-scores-step.js +0 -42
- package/dist/scripts/agent-behavior-report.d.ts +0 -19
- package/dist/scripts/agent-behavior-report.js +0 -315
- package/dist/scripts/baseline.d.ts +0 -43
- package/dist/scripts/baseline.js +0 -267
- package/dist/scripts/calculate-scores.d.ts +0 -166
- package/dist/scripts/calculate-scores.js +0 -1296
- package/dist/scripts/compare.d.ts +0 -22
- package/dist/scripts/compare.js +0 -334
- package/dist/scripts/coverage-audit.d.ts +0 -44
- package/dist/scripts/coverage-audit.js +0 -209
- package/dist/scripts/debug-eval.d.ts +0 -19
- package/dist/scripts/debug-eval.js +0 -73
- package/dist/scripts/discovery-report.d.ts +0 -58
- package/dist/scripts/discovery-report.js +0 -250
- package/dist/scripts/fetch-docs.d.ts +0 -35
- package/dist/scripts/fetch-docs.js +0 -472
- package/dist/scripts/generate-configs.d.ts +0 -66
- package/dist/scripts/generate-configs.js +0 -459
- package/dist/scripts/grader-api.d.ts +0 -27
- package/dist/scripts/grader-api.js +0 -206
- package/dist/scripts/grader-compare.d.ts +0 -22
- package/dist/scripts/grader-compare.js +0 -368
- package/dist/scripts/grader-consistency.d.ts +0 -20
- package/dist/scripts/grader-consistency.js +0 -313
- package/dist/scripts/grader-sensitivity.d.ts +0 -22
- package/dist/scripts/grader-sensitivity.js +0 -354
- package/dist/scripts/grader-validate.d.ts +0 -19
- package/dist/scripts/grader-validate.js +0 -267
- package/dist/scripts/measure-retrieval.d.ts +0 -10
- package/dist/scripts/measure-retrieval.js +0 -145
- package/dist/scripts/pipeline.d.ts +0 -76
- package/dist/scripts/pipeline.js +0 -1031
- package/dist/scripts/pr-comment.d.ts +0 -10
- package/dist/scripts/pr-comment.js +0 -510
- package/dist/scripts/readiness-report.d.ts +0 -88
- package/dist/scripts/readiness-report.js +0 -342
- package/dist/scripts/update-quality-scores.d.ts +0 -15
- package/dist/scripts/update-quality-scores.js +0 -184
- package/dist/scripts/validate.d.ts +0 -13
- package/dist/scripts/validate.js +0 -79
- package/dist/scripts/webhook-server.d.ts +0 -26
- package/dist/scripts/webhook-server.js +0 -147
- package/dist/scripts/weekly-digest.d.ts +0 -24
- package/dist/scripts/weekly-digest.js +0 -144
- package/dist/sinks/format-slack.d.ts +0 -64
- package/dist/sinks/format-slack.js +0 -306
- package/dist/sinks/slack-sink.d.ts +0 -27
- package/dist/sinks/slack-sink.js +0 -78
- package/dist/sinks/webhook-sink.d.ts +0 -19
- package/dist/sinks/webhook-sink.js +0 -50
- package/tasks/.expanded.agentic.yaml +0 -51
- package/tasks/.expanded.yaml +0 -66
package/dist/scripts/pipeline.js
DELETED
|
@@ -1,1031 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* pipeline.ts
|
|
3
|
-
*
|
|
4
|
-
* CLI orchestrator for the modular evaluation pipeline.
|
|
5
|
-
* Runs steps in sequence with validation between each.
|
|
6
|
-
*
|
|
7
|
-
* This is the single entry point for both local and CI evaluation.
|
|
8
|
-
* The CI workflow (eval.yml) calls this script, then layers on
|
|
9
|
-
* CI-specific post-steps (PR comment posting, artifact upload).
|
|
10
|
-
*
|
|
11
|
-
* Usage:
|
|
12
|
-
* pnpm pipeline # full baseline pipeline
|
|
13
|
-
* pnpm pipeline --dry-run # validate only, no execution
|
|
14
|
-
* pnpm pipeline --skip-fetch # reuse cached doc contexts
|
|
15
|
-
* pnpm pipeline --skip-eval # recalculate from existing results
|
|
16
|
-
* pnpm pipeline --mode agentic # run agentic pipeline
|
|
17
|
-
* pnpm pipeline --mode observed # run observed pipeline
|
|
18
|
-
* pnpm pipeline --source staging # use staging doc source
|
|
19
|
-
* pnpm pipeline --debug # run first 2 tests only (fast)
|
|
20
|
-
* pnpm pipeline --debug-n 5 # run first 5 tests
|
|
21
|
-
* pnpm pipeline --debug-pattern "Blog" # filter by description
|
|
22
|
-
* pnpm pipeline --debug-sample 3 # random sample of 3 tests
|
|
23
|
-
* pnpm pipeline --no-cache # bypass caching, force re-run
|
|
24
|
-
* pnpm pipeline --concurrency 64 # override max parallel API calls
|
|
25
|
-
* pnpm pipeline --area groq,frameworks # only evaluate these areas
|
|
26
|
-
* pnpm pipeline --task groq-blog-queries # only evaluate this task
|
|
27
|
-
* pnpm pipeline --changed-docs groq-introduction,how-queries-work
|
|
28
|
-
* # auto-scope to affected tasks
|
|
29
|
-
* pnpm pipeline --url https://... # override docs base URL
|
|
30
|
-
* pnpm pipeline --sanity-dataset staging # override Sanity dataset
|
|
31
|
-
* pnpm pipeline --sanity-project abc123 # override Sanity project ID
|
|
32
|
-
* pnpm pipeline --sanity-perspective agent-c7OKTk
|
|
33
|
-
* # evaluate a Sanity release
|
|
34
|
-
* pnpm pipeline --sanity-document <uuid>
|
|
35
|
-
* # evaluate specific document(s)
|
|
36
|
-
* pnpm pipeline --sanity-document <uuid> --sanity-documents <uuid>
|
|
37
|
-
* # singular and plural aliases work
|
|
38
|
-
* pnpm pipeline --header "X-Vercel-Protection-Bypass: <secret>"
|
|
39
|
-
* # custom HTTP header (repeatable)
|
|
40
|
-
* pnpm pipeline --allowed-origin my-branch.sanity.build
|
|
41
|
-
* # sandbox agent to this origin
|
|
42
|
-
* pnpm pipeline --before published # run before/after impact evaluation
|
|
43
|
-
* pnpm pipeline --before production # before = production source
|
|
44
|
-
* pnpm pipeline --before results/baselines/20260310.json # use existing scores
|
|
45
|
-
* pnpm pipeline --before latest-baseline # use most recent baseline
|
|
46
|
-
* pnpm pipeline --compare # compare scores against latest baseline
|
|
47
|
-
* pnpm pipeline --compare --compare-baseline <path> # compare against specific file
|
|
48
|
-
* pnpm pipeline --compare --threshold 5 # noise threshold for unchanged (default: 2)
|
|
49
|
-
* pnpm pipeline --output /tmp/report.md # write report to specific path
|
|
50
|
-
* pnpm pipeline --promptfoo-url <url> # include Promptfoo URL in report
|
|
51
|
-
* pnpm pipeline --gap-analysis # run failure mode + impact analysis
|
|
52
|
-
* pnpm pipeline --publish # write report to Sanity + fan out to sinks
|
|
53
|
-
* pnpm pipeline --publish --publish-tag "daily-2026-03-11" # tag the report
|
|
54
|
-
* pnpm pipeline --publish --report-dataset ailf-reports # report store dataset
|
|
55
|
-
* pnpm pipeline --publish --report-project abc123 # report store project
|
|
56
|
-
*
|
|
57
|
-
* Override precedence (highest wins):
|
|
58
|
-
* CLI flag (--url, --sanity-dataset, --sanity-project, --allowed-origin)
|
|
59
|
-
* → Environment variable (DOC_BASE_URL, SANITY_DATASET, SANITY_PROJECT_ID, DOC_ALLOWED_ORIGIN)
|
|
60
|
-
* → config/sources.yaml default value
|
|
61
|
-
*
|
|
62
|
-
* --header flags are additive and do not override env vars — they are
|
|
63
|
-
* always merged with any headers defined in DOC_HEADERS env var.
|
|
64
|
-
*
|
|
65
|
-
* Environment variable fallbacks (for CI):
|
|
66
|
-
* DEBUG_EVAL=1 → --debug
|
|
67
|
-
* DEBUG_EVAL_N=2 → --debug-n 2
|
|
68
|
-
* DEBUG_EVAL_PATTERN → --debug-pattern
|
|
69
|
-
* DEBUG_EVAL_SAMPLE → --debug-sample
|
|
70
|
-
* EVAL_FILTER_AREAS → --area
|
|
71
|
-
* EVAL_FILTER_TASKS → --task
|
|
72
|
-
* EVAL_CHANGED_DOCS → --changed-docs
|
|
73
|
-
* AILF_REPORT_DATASET → --report-dataset (report store, not eval)
|
|
74
|
-
* AILF_REPORT_PROJECT_ID → --report-project (report store, not eval)
|
|
75
|
-
*/
|
|
76
|
-
import { config as dotenvConfig } from "dotenv";
|
|
77
|
-
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
78
|
-
import { load } from "js-yaml";
|
|
79
|
-
import { dirname, join, resolve } from "path";
|
|
80
|
-
import { fileURLToPath } from "url";
|
|
81
|
-
import { createCacheStats } from "../pipeline/cache.js";
|
|
82
|
-
import { checkEnvironment } from "../pipeline/checks.js";
|
|
83
|
-
import { runCalculateScores } from "../pipeline/steps/calculate-scores-step.js";
|
|
84
|
-
import { runCompare } from "../pipeline/steps/compare-step.js";
|
|
85
|
-
import { runGraderConsistency } from "../pipeline/steps/grader-consistency-step.js";
|
|
86
|
-
import { extractShareUrl, runEval } from "../pipeline/steps/eval-step.js";
|
|
87
|
-
import { runFetchDocs } from "../pipeline/steps/fetch-docs-step.js";
|
|
88
|
-
import { runGenerateConfigs } from "../pipeline/steps/generate-configs-step.js";
|
|
89
|
-
import { runPublishReport } from "../pipeline/steps/publish-report-step.js";
|
|
90
|
-
import { runReport } from "../pipeline/steps/report-step.js";
|
|
91
|
-
import { assessImpact, buildReverseMapping, } from "../pipeline/reverse-mapping.js";
|
|
92
|
-
import { classifyUrls } from "../pipeline/classify-url.js";
|
|
93
|
-
import { validateConfiguration } from "../pipeline/validate.js";
|
|
94
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
95
|
-
const ROOT = resolve(__dirname, "..", "..");
|
|
96
|
-
// ---------------------------------------------------------------------------
|
|
97
|
-
// Load root .env — override shell env vars for consistency with individual
|
|
98
|
-
// scripts that use `dotenv -e ../../.env -o --`. The `-o` (override) flag in
|
|
99
|
-
// those scripts means .env always wins, so we replicate that here.
|
|
100
|
-
// In CI, env vars are set via workflow `env:` blocks, so this is a no-op
|
|
101
|
-
// (the .env file doesn't exist in CI).
|
|
102
|
-
// ---------------------------------------------------------------------------
|
|
103
|
-
const envPath = resolve(ROOT, "..", "..", ".env");
|
|
104
|
-
if (existsSync(envPath)) {
|
|
105
|
-
dotenvConfig({ override: true, path: envPath });
|
|
106
|
-
}
|
|
107
|
-
// ---------------------------------------------------------------------------
|
|
108
|
-
// CLI argument parsing
|
|
109
|
-
// ---------------------------------------------------------------------------
|
|
110
|
-
const args = process.argv.slice(2);
|
|
111
|
-
function getAllOptions(...names) {
|
|
112
|
-
const results = [];
|
|
113
|
-
for (const name of names) {
|
|
114
|
-
const flag = `--${name}`;
|
|
115
|
-
for (let i = 0; i < args.length; i++) {
|
|
116
|
-
if (args[i] === flag && i + 1 < args.length) {
|
|
117
|
-
results.push(args[i + 1]);
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
return results;
|
|
122
|
-
}
|
|
123
|
-
function getFlag(name) {
|
|
124
|
-
return args.includes(`--${name}`);
|
|
125
|
-
}
|
|
126
|
-
function getOption(...names) {
|
|
127
|
-
for (const name of names) {
|
|
128
|
-
const idx = args.indexOf(`--${name}`);
|
|
129
|
-
if (idx !== -1 && idx + 1 < args.length)
|
|
130
|
-
return args[idx + 1];
|
|
131
|
-
}
|
|
132
|
-
return undefined;
|
|
133
|
-
}
|
|
134
|
-
const dryRun = getFlag("dry-run");
|
|
135
|
-
const skipFetch = getFlag("skip-fetch");
|
|
136
|
-
const skipEval = getFlag("skip-eval");
|
|
137
|
-
const noCache = getFlag("no-cache");
|
|
138
|
-
const mode = (getOption("mode") ?? "baseline");
|
|
139
|
-
const source = getOption("source");
|
|
140
|
-
const outputPath = getOption("output");
|
|
141
|
-
const promptfooUrl = getOption("promptfoo-url");
|
|
142
|
-
// Debug: CLI flags take precedence, then env vars.
|
|
143
|
-
// Any sub-flag (--debug-n, --debug-pattern, --debug-sample) implies --debug.
|
|
144
|
-
const debugN = getOption("debug-n") ?? process.env.DEBUG_EVAL_N ?? undefined;
|
|
145
|
-
const debugPattern = getOption("debug-pattern") ?? process.env.DEBUG_EVAL_PATTERN ?? undefined;
|
|
146
|
-
const debugSample = getOption("debug-sample") ?? process.env.DEBUG_EVAL_SAMPLE ?? undefined;
|
|
147
|
-
const debugEnabled = getFlag("debug") ||
|
|
148
|
-
process.env.DEBUG_EVAL === "1" ||
|
|
149
|
-
debugN !== undefined ||
|
|
150
|
-
debugPattern !== undefined ||
|
|
151
|
-
debugSample !== undefined;
|
|
152
|
-
const debug = debugEnabled
|
|
153
|
-
? {
|
|
154
|
-
enabled: true,
|
|
155
|
-
firstN: debugN ? parseInt(debugN, 10) : undefined,
|
|
156
|
-
pattern: debugPattern,
|
|
157
|
-
sample: debugSample ? parseInt(debugSample, 10) : undefined,
|
|
158
|
-
}
|
|
159
|
-
: undefined;
|
|
160
|
-
const concurrencyStr = getOption("concurrency");
|
|
161
|
-
const concurrency = concurrencyStr ? parseInt(concurrencyStr, 10) : undefined;
|
|
162
|
-
// Scoping: filter to specific areas or tasks
|
|
163
|
-
const areaOption = getOption("area");
|
|
164
|
-
const taskOption = getOption("task");
|
|
165
|
-
// Document-driven scoping: --changed-docs slug1,slug2 or env var
|
|
166
|
-
const changedDocsOption = getOption("changed-docs") ?? process.env.EVAL_CHANGED_DOCS ?? undefined;
|
|
167
|
-
// Grader consistency: measure grading variance
|
|
168
|
-
const graderReplicationsStr = getOption("grader-replications");
|
|
169
|
-
const graderReplications = graderReplicationsStr
|
|
170
|
-
? parseInt(graderReplicationsStr, 10)
|
|
171
|
-
: undefined;
|
|
172
|
-
// Before/after impact evaluation: --before <source>
|
|
173
|
-
// When provided, runs the evaluation twice (before + after) and auto-compares.
|
|
174
|
-
// Accepts: "published" (Sanity published state), "production" (prod source),
|
|
175
|
-
// a path to an existing score-summary.json, or "latest-baseline".
|
|
176
|
-
const beforeOption = getOption("before");
|
|
177
|
-
// Comparison: compare current scores against a baseline
|
|
178
|
-
const compareEnabled = getFlag("compare") || beforeOption !== undefined;
|
|
179
|
-
const compareBaseline = getOption("compare-baseline");
|
|
180
|
-
const compareThresholdStr = getOption("threshold");
|
|
181
|
-
const compareThreshold = compareThresholdStr
|
|
182
|
-
? parseFloat(compareThresholdStr)
|
|
183
|
-
: undefined;
|
|
184
|
-
// Gap analysis: run failure mode classification + impact estimation (Phase 3)
|
|
185
|
-
const gapAnalysisEnabled = getFlag("gap-analysis");
|
|
186
|
-
// Readiness report: launch readiness checklist for a specific area (Phase 5b)
|
|
187
|
-
const readinessEnabled = getFlag("readiness");
|
|
188
|
-
// Discovery report: agent discoverability from agentic retrieval metrics (Phase 5c)
|
|
189
|
-
const discoveryReportEnabled = getFlag("discovery-report");
|
|
190
|
-
// Publish: write report to Sanity Content Lake + fan out to sinks
|
|
191
|
-
const publishEnabled = getFlag("publish") || process.env.AILF_PUBLISH === "1";
|
|
192
|
-
const publishTag = getOption("publish-tag");
|
|
193
|
-
// Report store: target dataset/project for report persistence
|
|
194
|
-
// Independent of SANITY_DATASET/SANITY_PROJECT_ID (which control doc evaluation)
|
|
195
|
-
const reportDataset = getOption("report-dataset") ?? process.env.AILF_REPORT_DATASET ?? undefined;
|
|
196
|
-
const reportProjectId = getOption("report-project") ?? process.env.AILF_REPORT_PROJECT_ID ?? undefined;
|
|
197
|
-
// Search mode: controls web_search tool in agentic mode
|
|
198
|
-
// --search open (default), --search origin-only, --search off
|
|
199
|
-
const searchMode = getOption("search") ?? process.env.EVAL_SEARCH_MODE ?? "open";
|
|
200
|
-
const VALID_SEARCH_MODES = ["open", "origin-only", "off"];
|
|
201
|
-
if (!VALID_SEARCH_MODES.includes(searchMode)) {
|
|
202
|
-
console.error(`❌ Invalid --search mode "${searchMode}". Must be one of: ${VALID_SEARCH_MODES.join(", ")}`);
|
|
203
|
-
process.exit(1);
|
|
204
|
-
}
|
|
205
|
-
// Pass through to generate-configs via env var
|
|
206
|
-
if (searchMode !== "open") {
|
|
207
|
-
process.env.EVAL_SEARCH_MODE = searchMode;
|
|
208
|
-
}
|
|
209
|
-
// Source overrides: --url/--urls, --sanity-dataset, --sanity-project, --sanity-perspective
|
|
210
|
-
// CLI flag → env var → config/sources.yaml default
|
|
211
|
-
// These set process.env so the interpolation layer in sources.ts picks them up.
|
|
212
|
-
const urlArgs = getAllOptions("url", "urls");
|
|
213
|
-
const datasetOverride = getOption("sanity-dataset");
|
|
214
|
-
const projectIdOverride = getOption("sanity-project");
|
|
215
|
-
const perspectiveOverride = getOption("sanity-perspective");
|
|
216
|
-
const studioOriginOverride = getOption("sanity-studio-origin");
|
|
217
|
-
const sanityDocumentArgs = getAllOptions("sanity-document", "sanity-documents");
|
|
218
|
-
// Classify URLs: detect Sanity releases, documents, and direct URLs
|
|
219
|
-
if (urlArgs.length > 0) {
|
|
220
|
-
const classification = classifyUrls(urlArgs);
|
|
221
|
-
// First URL becomes the base URL (used for agentic mode, llms.txt, etc.)
|
|
222
|
-
process.env.DOC_BASE_URL = urlArgs[0];
|
|
223
|
-
// Merge classified document IDs with explicit --sanity-document args
|
|
224
|
-
if (classification.documentIds.length > 0) {
|
|
225
|
-
const existing = sanityDocumentArgs.length > 0 ? sanityDocumentArgs : [];
|
|
226
|
-
const merged = [...new Set([...existing, ...classification.documentIds])];
|
|
227
|
-
process.env.SANITY_DOCUMENT_IDS = merged.join(",");
|
|
228
|
-
}
|
|
229
|
-
// Use inferred perspective if no explicit one was provided
|
|
230
|
-
if (classification.inferredPerspective && !perspectiveOverride) {
|
|
231
|
-
process.env.SANITY_PERSPECTIVE = classification.inferredPerspective;
|
|
232
|
-
}
|
|
233
|
-
// Use inferred studio origin if no explicit one was provided
|
|
234
|
-
if (classification.inferredStudioOrigin && !studioOriginOverride) {
|
|
235
|
-
process.env.SANITY_STUDIO_ORIGIN = classification.inferredStudioOrigin;
|
|
236
|
-
}
|
|
237
|
-
// Direct URLs: pass through for baseline mode doc fetching
|
|
238
|
-
if (classification.directUrls.length > 0) {
|
|
239
|
-
process.env.DOC_DIRECT_URLS = classification.directUrls.join(",");
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
if (datasetOverride)
|
|
243
|
-
process.env.SANITY_DATASET = datasetOverride;
|
|
244
|
-
if (projectIdOverride)
|
|
245
|
-
process.env.SANITY_PROJECT_ID = projectIdOverride;
|
|
246
|
-
if (perspectiveOverride)
|
|
247
|
-
process.env.SANITY_PERSPECTIVE = perspectiveOverride;
|
|
248
|
-
if (studioOriginOverride)
|
|
249
|
-
process.env.SANITY_STUDIO_ORIGIN = studioOriginOverride;
|
|
250
|
-
if (sanityDocumentArgs.length > 0)
|
|
251
|
-
process.env.SANITY_DOCUMENT_IDS = sanityDocumentArgs.join(",");
|
|
252
|
-
// Custom HTTP headers: --header "Key: Value" (repeatable, like curl -H)
|
|
253
|
-
// Parsed into DOC_HEADERS env var as JSON for sources.ts to pick up.
|
|
254
|
-
const headerArgs = getAllOptions("header", "headers");
|
|
255
|
-
if (headerArgs.length > 0) {
|
|
256
|
-
const headers = {};
|
|
257
|
-
for (const h of headerArgs) {
|
|
258
|
-
const colonIdx = h.indexOf(":");
|
|
259
|
-
if (colonIdx === -1) {
|
|
260
|
-
console.error(`❌ Invalid header format: "${h}". Expected "Key: Value".`);
|
|
261
|
-
process.exit(1);
|
|
262
|
-
}
|
|
263
|
-
const key = h.slice(0, colonIdx).trim();
|
|
264
|
-
const value = h.slice(colonIdx + 1).trim();
|
|
265
|
-
if (!key) {
|
|
266
|
-
console.error(`❌ Invalid header: empty key in "${h}"`);
|
|
267
|
-
process.exit(1);
|
|
268
|
-
}
|
|
269
|
-
headers[key] = value;
|
|
270
|
-
}
|
|
271
|
-
process.env.DOC_HEADERS = JSON.stringify(headers);
|
|
272
|
-
}
|
|
273
|
-
// Allowed origin sandboxing: --allowed-origin my-branch.sanity.build (repeatable)
|
|
274
|
-
// Restricts agentic providers to only fetch pages from these origins.
|
|
275
|
-
// Supports glob patterns: --allowed-origin "*.sanity.build"
|
|
276
|
-
// Plural alias: --allowed-origins
|
|
277
|
-
const allowedOriginArgs = getAllOptions("allowed-origin", "allowed-origins");
|
|
278
|
-
// Auto-infer allowed origin from --url when no explicit origins are provided.
|
|
279
|
-
// This makes agentic isolation the default when a --url is passed.
|
|
280
|
-
if (urlArgs.length > 0 && allowedOriginArgs.length === 0) {
|
|
281
|
-
try {
|
|
282
|
-
const hostname = new URL(urlArgs[0]).hostname.replace(/^www\./, "");
|
|
283
|
-
allowedOriginArgs.push(hostname);
|
|
284
|
-
}
|
|
285
|
-
catch {
|
|
286
|
-
// Invalid URL — will be caught later in validation
|
|
287
|
-
}
|
|
288
|
-
}
|
|
289
|
-
// Set env var for downstream consumption (comma-separated)
|
|
290
|
-
// DOC_ALLOWED_ORIGINS (plural) is the canonical env var.
|
|
291
|
-
// DOC_ALLOWED_ORIGIN (singular) is kept for backward compatibility.
|
|
292
|
-
if (allowedOriginArgs.length > 0) {
|
|
293
|
-
process.env.DOC_ALLOWED_ORIGINS = allowedOriginArgs.join(",");
|
|
294
|
-
}
|
|
295
|
-
// Document-driven scoping: resolve changed docs to areas and tasks
|
|
296
|
-
// This runs before area/task env var assignment so it can set defaults
|
|
297
|
-
// that are then overridden or intersected by explicit --area/--task flags.
|
|
298
|
-
let impactSummary;
|
|
299
|
-
if (changedDocsOption) {
|
|
300
|
-
const changedSlugs = changedDocsOption
|
|
301
|
-
.split(",")
|
|
302
|
-
.map((s) => s.trim())
|
|
303
|
-
.filter(Boolean);
|
|
304
|
-
if (changedSlugs.length > 0) {
|
|
305
|
-
const reverseMapping = buildReverseMapping(ROOT);
|
|
306
|
-
impactSummary = assessImpact(changedSlugs, reverseMapping);
|
|
307
|
-
if (impactSummary.areas.length === 0) {
|
|
308
|
-
console.warn(`\n⚠️ No evaluation tasks reference any of the changed documents:`);
|
|
309
|
-
for (const slug of changedSlugs) {
|
|
310
|
-
console.warn(` - ${slug}`);
|
|
311
|
-
}
|
|
312
|
-
console.warn(`\n Score impact cannot be measured for these documents.\n`);
|
|
313
|
-
}
|
|
314
|
-
else {
|
|
315
|
-
// Apply document-driven scoping: set areas and tasks from impact
|
|
316
|
-
// If --area is also provided, intersect (both filters must match)
|
|
317
|
-
if (areaOption) {
|
|
318
|
-
const explicitAreas = new Set(areaOption.split(",").map((s) => s.trim()));
|
|
319
|
-
const intersected = impactSummary.areas.filter((a) => explicitAreas.has(a));
|
|
320
|
-
if (intersected.length > 0) {
|
|
321
|
-
process.env.EVAL_FILTER_AREAS = intersected.join(",");
|
|
322
|
-
}
|
|
323
|
-
else {
|
|
324
|
-
console.warn(`\n⚠️ No overlap between --area (${areaOption}) and affected areas (${impactSummary.areas.join(", ")})`);
|
|
325
|
-
console.warn(` Running with --area filter only (${areaOption})\n`);
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
else {
|
|
329
|
-
process.env.EVAL_FILTER_AREAS = impactSummary.areas.join(",");
|
|
330
|
-
}
|
|
331
|
-
// If --task is also provided, intersect
|
|
332
|
-
if (taskOption) {
|
|
333
|
-
const explicitTasks = new Set(taskOption.split(",").map((s) => s.trim()));
|
|
334
|
-
const intersected = impactSummary.taskIds.filter((t) => explicitTasks.has(t));
|
|
335
|
-
if (intersected.length > 0) {
|
|
336
|
-
process.env.EVAL_FILTER_TASKS = intersected.join(",");
|
|
337
|
-
}
|
|
338
|
-
// If no overlap, let the explicit --task filter win
|
|
339
|
-
}
|
|
340
|
-
else {
|
|
341
|
-
process.env.EVAL_FILTER_TASKS = impactSummary.taskIds.join(",");
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
}
|
|
346
|
-
// Area/task scoping: passed via env vars to generate-configs subprocess
|
|
347
|
-
// Explicit --area/--task flags override document-driven scoping (if not already set above)
|
|
348
|
-
if (areaOption && !process.env.EVAL_FILTER_AREAS)
|
|
349
|
-
process.env.EVAL_FILTER_AREAS = areaOption;
|
|
350
|
-
if (taskOption && !process.env.EVAL_FILTER_TASKS)
|
|
351
|
-
process.env.EVAL_FILTER_TASKS = taskOption;
|
|
352
|
-
// Validate mode
|
|
353
|
-
const VALID_MODES = ["baseline", "observed", "agentic"];
|
|
354
|
-
if (!VALID_MODES.includes(mode)) {
|
|
355
|
-
console.error(`❌ Invalid mode "${mode}". Must be one of: ${VALID_MODES.join(", ")}`);
|
|
356
|
-
process.exit(1);
|
|
357
|
-
}
|
|
358
|
-
// Export mode for downstream consumption (calculate-scores source verification)
|
|
359
|
-
process.env.EVAL_MODE = mode;
|
|
360
|
-
// ---------------------------------------------------------------------------
|
|
361
|
-
// Promptfoo share URL extraction
|
|
362
|
-
// ---------------------------------------------------------------------------
|
|
363
|
-
function buildResult(pipelineStart, steps, validation, cacheStats, resolvedPromptfooUrl) {
|
|
364
|
-
const durationMs = Date.now() - pipelineStart;
|
|
365
|
-
const success = Object.values(steps).every((s) => s.status === "success" || s.status === "skipped");
|
|
366
|
-
return {
|
|
367
|
-
durationMs,
|
|
368
|
-
promptfooUrl: resolvedPromptfooUrl,
|
|
369
|
-
steps,
|
|
370
|
-
success,
|
|
371
|
-
validation,
|
|
372
|
-
...(cacheStats && {
|
|
373
|
-
cache: {
|
|
374
|
-
hits: cacheStats.hits,
|
|
375
|
-
misses: cacheStats.misses,
|
|
376
|
-
skipped: cacheStats.skipped,
|
|
377
|
-
total: cacheStats.total,
|
|
378
|
-
},
|
|
379
|
-
}),
|
|
380
|
-
};
|
|
381
|
-
}
|
|
382
|
-
// ---------------------------------------------------------------------------
|
|
383
|
-
// Formatting helpers
|
|
384
|
-
// ---------------------------------------------------------------------------
|
|
385
|
-
function formatDuration(ms) {
|
|
386
|
-
if (ms < 1000)
|
|
387
|
-
return `${ms}ms`;
|
|
388
|
-
if (ms < 60000)
|
|
389
|
-
return `${(ms / 1000).toFixed(1)}s`;
|
|
390
|
-
const min = Math.floor(ms / 60000);
|
|
391
|
-
const sec = Math.round((ms % 60000) / 1000);
|
|
392
|
-
return `${min}m ${sec}s`;
|
|
393
|
-
}
|
|
394
|
-
function printStepResult(name, result) {
|
|
395
|
-
const icon = stepIcon(result);
|
|
396
|
-
switch (result.status) {
|
|
397
|
-
case "failed":
|
|
398
|
-
console.log(` ${icon} ${name} (${formatDuration(result.durationMs)}) — ${result.error}`);
|
|
399
|
-
break;
|
|
400
|
-
case "skipped":
|
|
401
|
-
console.log(` ${icon} ${name} — ${result.reason}`);
|
|
402
|
-
break;
|
|
403
|
-
case "success":
|
|
404
|
-
console.log(` ${icon} ${name} (${formatDuration(result.durationMs)}) — ${result.summary}`);
|
|
405
|
-
break;
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
// ---------------------------------------------------------------------------
|
|
409
|
-
// Pipeline execution
|
|
410
|
-
// ---------------------------------------------------------------------------
|
|
411
|
-
async function runPipeline() {
|
|
412
|
-
const pipelineStart = Date.now();
|
|
413
|
-
const steps = {};
|
|
414
|
-
console.log("=== ai-literacy-framework — Evaluation Pipeline ===\n");
|
|
415
|
-
const cacheStats = createCacheStats();
|
|
416
|
-
console.log(` Mode: ${mode}`);
|
|
417
|
-
console.log(` Source: ${source ?? "default (production)"}`);
|
|
418
|
-
console.log(` Dry run: ${dryRun}`);
|
|
419
|
-
console.log(` Skip fetch: ${skipFetch}`);
|
|
420
|
-
console.log(` Skip eval: ${skipEval}`);
|
|
421
|
-
console.log(` Cache: ${noCache ? "disabled (--no-cache)" : "enabled"}`);
|
|
422
|
-
if (urlArgs.length > 0) {
|
|
423
|
-
console.log(` URLs: ${urlArgs.length} URL(s)`);
|
|
424
|
-
for (const u of urlArgs)
|
|
425
|
-
console.log(` ${u}`);
|
|
426
|
-
}
|
|
427
|
-
if (datasetOverride) {
|
|
428
|
-
console.log(` Dataset: ${datasetOverride}`);
|
|
429
|
-
}
|
|
430
|
-
if (projectIdOverride) {
|
|
431
|
-
console.log(` Project ID: ${projectIdOverride}`);
|
|
432
|
-
}
|
|
433
|
-
if (perspectiveOverride) {
|
|
434
|
-
console.log(` Perspective: ${perspectiveOverride}`);
|
|
435
|
-
}
|
|
436
|
-
if (studioOriginOverride) {
|
|
437
|
-
console.log(` Studio: ${studioOriginOverride}`);
|
|
438
|
-
}
|
|
439
|
-
if (sanityDocumentArgs.length > 0) {
|
|
440
|
-
console.log(` Documents: ${sanityDocumentArgs.length} document(s)`);
|
|
441
|
-
for (const d of sanityDocumentArgs)
|
|
442
|
-
console.log(` ${d}`);
|
|
443
|
-
}
|
|
444
|
-
if (allowedOriginArgs.length > 0) {
|
|
445
|
-
const originDisplay = allowedOriginArgs.join(", ");
|
|
446
|
-
const autoInferred = urlArgs.length > 0 &&
|
|
447
|
-
!args.some((a) => a === "--allowed-origin" || a === "--allowed-origins");
|
|
448
|
-
console.log(` Origin: ${originDisplay}${autoInferred ? " (auto-inferred)" : " (sandbox)"}`);
|
|
449
|
-
}
|
|
450
|
-
if (searchMode !== "open") {
|
|
451
|
-
const searchDesc = searchMode === "off" ? "disabled" : "results filtered to allowed origins";
|
|
452
|
-
console.log(` Search: ${searchMode} (${searchDesc})`);
|
|
453
|
-
}
|
|
454
|
-
if (headerArgs.length > 0) {
|
|
455
|
-
console.log(` Headers: ${headerArgs.length} custom header(s)`);
|
|
456
|
-
}
|
|
457
|
-
if (outputPath) {
|
|
458
|
-
console.log(` Output: ${outputPath}`);
|
|
459
|
-
}
|
|
460
|
-
if (debug) {
|
|
461
|
-
const filters = [
|
|
462
|
-
debug.firstN ? `first ${debug.firstN}` : null,
|
|
463
|
-
debug.pattern ? `pattern "${debug.pattern}"` : null,
|
|
464
|
-
debug.sample ? `sample ${debug.sample}` : null,
|
|
465
|
-
].filter(Boolean);
|
|
466
|
-
console.log(` Debug: ${filters.length > 0 ? filters.join(", ") : "first 2 (default)"}`);
|
|
467
|
-
}
|
|
468
|
-
if (concurrency) {
|
|
469
|
-
console.log(` Concurrency: ${concurrency}`);
|
|
470
|
-
}
|
|
471
|
-
if (changedDocsOption) {
|
|
472
|
-
const slugs = changedDocsOption
|
|
473
|
-
.split(",")
|
|
474
|
-
.map((s) => s.trim())
|
|
475
|
-
.filter(Boolean);
|
|
476
|
-
console.log(` Changed: ${slugs.length} document(s)`);
|
|
477
|
-
for (const s of slugs)
|
|
478
|
-
console.log(` ${s}`);
|
|
479
|
-
if (impactSummary) {
|
|
480
|
-
if (impactSummary.areas.length > 0) {
|
|
481
|
-
console.log(` Affected: ${impactSummary.taskIds.length} task(s) in ${impactSummary.areas.length} area(s)`);
|
|
482
|
-
console.log(` areas: ${impactSummary.areas.join(", ")}`);
|
|
483
|
-
console.log(` tasks: ${impactSummary.taskIds.join(", ")}`);
|
|
484
|
-
}
|
|
485
|
-
if (impactSummary.unmatchedSlugs.length > 0) {
|
|
486
|
-
console.log(` Unmatched: ${impactSummary.unmatchedSlugs.length} document(s) not in any task`);
|
|
487
|
-
for (const s of impactSummary.unmatchedSlugs)
|
|
488
|
-
console.log(` ${s}`);
|
|
489
|
-
}
|
|
490
|
-
}
|
|
491
|
-
}
|
|
492
|
-
if (areaOption) {
|
|
493
|
-
console.log(` Areas: ${process.env.EVAL_FILTER_AREAS ?? areaOption}`);
|
|
494
|
-
}
|
|
495
|
-
else if (impactSummary?.areas.length) {
|
|
496
|
-
console.log(` Areas: ${impactSummary.areas.join(", ")} (from changed docs)`);
|
|
497
|
-
}
|
|
498
|
-
if (taskOption) {
|
|
499
|
-
console.log(` Tasks: ${process.env.EVAL_FILTER_TASKS ?? taskOption}`);
|
|
500
|
-
}
|
|
501
|
-
else if (impactSummary?.taskIds.length) {
|
|
502
|
-
console.log(` Tasks: ${impactSummary.taskIds.join(", ")} (from changed docs)`);
|
|
503
|
-
}
|
|
504
|
-
if (graderReplications) {
|
|
505
|
-
console.log(` Grader: ${graderReplications} replications`);
|
|
506
|
-
}
|
|
507
|
-
if (beforeOption) {
|
|
508
|
-
console.log(` Before: ${beforeOption}`);
|
|
509
|
-
}
|
|
510
|
-
if (compareEnabled) {
|
|
511
|
-
const vsTarget = compareBaseline
|
|
512
|
-
? ` (vs ${compareBaseline})`
|
|
513
|
-
: beforeOption
|
|
514
|
-
? ` (vs --before ${beforeOption})`
|
|
515
|
-
: " (vs latest baseline)";
|
|
516
|
-
console.log(` Compare: enabled${vsTarget}`);
|
|
517
|
-
if (compareThreshold !== undefined) {
|
|
518
|
-
console.log(` Threshold: ±${compareThreshold}`);
|
|
519
|
-
}
|
|
520
|
-
}
|
|
521
|
-
if (readinessEnabled) {
|
|
522
|
-
const readinessArea = areaOption ?? process.env.EVAL_FILTER_AREAS;
|
|
523
|
-
console.log(` Readiness: enabled${readinessArea ? ` (area: ${readinessArea})` : " (all areas)"}`);
|
|
524
|
-
}
|
|
525
|
-
if (discoveryReportEnabled) {
|
|
526
|
-
console.log(" Discovery: enabled");
|
|
527
|
-
}
|
|
528
|
-
if (publishEnabled) {
|
|
529
|
-
console.log(` Publish: enabled${publishTag ? ` (tag: "${publishTag}")` : ""}`);
|
|
530
|
-
}
|
|
531
|
-
console.log();
|
|
532
|
-
// -----------------------------------------------------------------------
|
|
533
|
-
// Step 0: Validate configuration
|
|
534
|
-
// -----------------------------------------------------------------------
|
|
535
|
-
console.log("─── Step 0: Validate Configuration ────────────────────────\n");
|
|
536
|
-
const validation = validateConfiguration(ROOT);
|
|
537
|
-
const envIssues = checkEnvironment(ROOT);
|
|
538
|
-
validation.issues.push(...envIssues);
|
|
539
|
-
validation.valid =
|
|
540
|
-
validation.valid && envIssues.every((i) => i.severity !== "error");
|
|
541
|
-
const errors = validation.issues.filter((i) => i.severity === "error");
|
|
542
|
-
const warnings = validation.issues.filter((i) => i.severity === "warning");
|
|
543
|
-
if (warnings.length > 0) {
|
|
544
|
-
for (const w of warnings) {
|
|
545
|
-
console.log(` ⚠️ [${w.source}] ${w.message}`);
|
|
546
|
-
}
|
|
547
|
-
console.log();
|
|
548
|
-
}
|
|
549
|
-
if (errors.length > 0) {
|
|
550
|
-
console.log("❌ Configuration validation failed:\n");
|
|
551
|
-
for (const e of errors) {
|
|
552
|
-
console.log(` ERROR [${e.source}] ${e.message}`);
|
|
553
|
-
if (e.path)
|
|
554
|
-
console.log(` at ${e.path}`);
|
|
555
|
-
}
|
|
556
|
-
return {
|
|
557
|
-
durationMs: Date.now() - pipelineStart,
|
|
558
|
-
steps,
|
|
559
|
-
success: false,
|
|
560
|
-
validation,
|
|
561
|
-
};
|
|
562
|
-
}
|
|
563
|
-
console.log(" ✅ Configuration is valid\n");
|
|
564
|
-
if (dryRun) {
|
|
565
|
-
console.log("─── Dry run complete ────────────────────────────────────────────\n");
|
|
566
|
-
console.log(" Pipeline configuration is valid. No steps were executed.");
|
|
567
|
-
console.log(" Remove --dry-run to execute the full pipeline.\n");
|
|
568
|
-
return {
|
|
569
|
-
durationMs: Date.now() - pipelineStart,
|
|
570
|
-
steps,
|
|
571
|
-
success: true,
|
|
572
|
-
validation,
|
|
573
|
-
};
|
|
574
|
-
}
|
|
575
|
-
// -----------------------------------------------------------------------
|
|
576
|
-
// Step 1: Fetch documentation
|
|
577
|
-
// -----------------------------------------------------------------------
|
|
578
|
-
console.log("─── Step 1: Fetch Documentation ───────────────────────────\n");
|
|
579
|
-
if (skipFetch) {
|
|
580
|
-
steps["fetch-docs"] = { reason: "--skip-fetch", status: "skipped" };
|
|
581
|
-
cacheStats.skipped++;
|
|
582
|
-
}
|
|
583
|
-
else {
|
|
584
|
-
steps["fetch-docs"] = await runFetchDocs(source, noCache);
|
|
585
|
-
}
|
|
586
|
-
trackCacheStats(cacheStats, "fetch-docs", steps["fetch-docs"], noCache);
|
|
587
|
-
printStepResult("fetch-docs", steps["fetch-docs"]);
|
|
588
|
-
console.log();
|
|
589
|
-
if (steps["fetch-docs"].status === "failed") {
|
|
590
|
-
return buildResult(pipelineStart, steps, validation, cacheStats);
|
|
591
|
-
}
|
|
592
|
-
// -----------------------------------------------------------------------
|
|
593
|
-
// Step 2: Generate configs
|
|
594
|
-
// -----------------------------------------------------------------------
|
|
595
|
-
console.log("─── Step 2: Generate Configs ──────────────────────────────\n");
|
|
596
|
-
steps["generate-configs"] = runGenerateConfigs(source, noCache);
|
|
597
|
-
trackCacheStats(cacheStats, "generate-configs", steps["generate-configs"], noCache);
|
|
598
|
-
printStepResult("generate-configs", steps["generate-configs"]);
|
|
599
|
-
console.log();
|
|
600
|
-
if (steps["generate-configs"].status === "failed") {
|
|
601
|
-
return buildResult(pipelineStart, steps, validation, cacheStats);
|
|
602
|
-
}
|
|
603
|
-
// -----------------------------------------------------------------------
|
|
604
|
-
// Step 3: Run evaluation
|
|
605
|
-
// -----------------------------------------------------------------------
|
|
606
|
-
console.log("─── Step 3: Run Evaluation ────────────────────────────────\n");
|
|
607
|
-
if (skipEval) {
|
|
608
|
-
steps["eval"] = { reason: "--skip-eval", status: "skipped" };
|
|
609
|
-
cacheStats.skipped++;
|
|
610
|
-
}
|
|
611
|
-
else {
|
|
612
|
-
const evalResult = await runEval(mode, debug, concurrency, noCache);
|
|
613
|
-
steps["eval"] = evalResult.stepResult;
|
|
614
|
-
}
|
|
615
|
-
trackCacheStats(cacheStats, "eval", steps["eval"], noCache);
|
|
616
|
-
printStepResult("eval", steps["eval"]);
|
|
617
|
-
console.log();
|
|
618
|
-
if (steps["eval"].status === "failed") {
|
|
619
|
-
return buildResult(pipelineStart, steps, validation, cacheStats);
|
|
620
|
-
}
|
|
621
|
-
// -----------------------------------------------------------------------
|
|
622
|
-
// Step 3b: Extract Promptfoo share URL (non-blocking)
|
|
623
|
-
// -----------------------------------------------------------------------
|
|
624
|
-
// Promptfoo auto-shares when PROMPTFOO_API_KEY is set during eval.
|
|
625
|
-
// Extract the URL from the eval results JSON for inclusion in the report.
|
|
626
|
-
let resolvedPromptfooUrl = promptfooUrl;
|
|
627
|
-
if (!resolvedPromptfooUrl && !skipEval) {
|
|
628
|
-
resolvedPromptfooUrl = extractShareUrl(mode);
|
|
629
|
-
if (resolvedPromptfooUrl) {
|
|
630
|
-
console.log(` 📤 Promptfoo results: ${resolvedPromptfooUrl}\n`);
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
// -----------------------------------------------------------------------
|
|
634
|
-
// Step 3c: Grader consistency analysis (optional)
|
|
635
|
-
// -----------------------------------------------------------------------
|
|
636
|
-
if (graderReplications) {
|
|
637
|
-
console.log("─── Step 3c: Grader Consistency Analysis ────────────────────\n");
|
|
638
|
-
steps["grader-consistency"] = runGraderConsistency(graderReplications, mode);
|
|
639
|
-
printStepResult("grader-consistency", steps["grader-consistency"]);
|
|
640
|
-
console.log();
|
|
641
|
-
if (steps["grader-consistency"].status === "failed") {
|
|
642
|
-
return buildResult(pipelineStart, steps, validation, cacheStats);
|
|
643
|
-
}
|
|
644
|
-
}
|
|
645
|
-
// -----------------------------------------------------------------------
|
|
646
|
-
// Step 4: Calculate scores
|
|
647
|
-
// -----------------------------------------------------------------------
|
|
648
|
-
console.log("─── Step 4: Calculate Scores ──────────────────────────────\n");
|
|
649
|
-
steps["calculate-scores"] = runCalculateScores(source, mode, noCache);
|
|
650
|
-
trackCacheStats(cacheStats, "calculate-scores", steps["calculate-scores"], noCache);
|
|
651
|
-
printStepResult("calculate-scores", steps["calculate-scores"]);
|
|
652
|
-
console.log();
|
|
653
|
-
if (steps["calculate-scores"].status === "failed") {
|
|
654
|
-
return buildResult(pipelineStart, steps, validation, cacheStats);
|
|
655
|
-
}
|
|
656
|
-
// -----------------------------------------------------------------------
|
|
657
|
-
// Step 4b: Gap analysis (optional, Phase 3)
|
|
658
|
-
// -----------------------------------------------------------------------
|
|
659
|
-
if (gapAnalysisEnabled) {
|
|
660
|
-
console.log("─── Step 4b: Gap Analysis ─────────────────────────────────\n");
|
|
661
|
-
const judgmentsPath = resolve(ROOT, "results", "latest", "grader-judgments.json");
|
|
662
|
-
const scoreSummaryPath = resolve(ROOT, "results", "latest", "score-summary.json");
|
|
663
|
-
if (!existsSync(judgmentsPath)) {
|
|
664
|
-
steps["gap-analysis"] = {
|
|
665
|
-
reason: "No grader-judgments.json — run a full evaluation first",
|
|
666
|
-
status: "skipped",
|
|
667
|
-
};
|
|
668
|
-
}
|
|
669
|
-
else if (!existsSync(scoreSummaryPath)) {
|
|
670
|
-
steps["gap-analysis"] = {
|
|
671
|
-
reason: "No score-summary.json",
|
|
672
|
-
status: "skipped",
|
|
673
|
-
};
|
|
674
|
-
}
|
|
675
|
-
else {
|
|
676
|
-
const gaStart = Date.now();
|
|
677
|
-
try {
|
|
678
|
-
const { buildFailureModeReport, formatFailureModesConsole } = await import("../pipeline/failure-modes.js");
|
|
679
|
-
const { buildGapAnalysisReport, formatGapAnalysisConsole } = await import("../pipeline/gap-analysis.js");
|
|
680
|
-
const judgments = JSON.parse(readFileSync(judgmentsPath, "utf-8"));
|
|
681
|
-
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
682
|
-
// Phase 3a: Failure mode classification
|
|
683
|
-
const failureModeReport = buildFailureModeReport(judgments, scoreSummary.scores);
|
|
684
|
-
console.log(formatFailureModesConsole(failureModeReport));
|
|
685
|
-
// Phase 3b: Gap analysis (impact estimation)
|
|
686
|
-
const gapReport = buildGapAnalysisReport(failureModeReport, scoreSummary.scores);
|
|
687
|
-
console.log(formatGapAnalysisConsole(gapReport));
|
|
688
|
-
// Persist reports
|
|
689
|
-
const outDir = resolve(ROOT, "results", "latest");
|
|
690
|
-
writeFileSync(join(outDir, "failure-modes.json"), JSON.stringify(failureModeReport, null, 2));
|
|
691
|
-
writeFileSync(join(outDir, "gap-analysis.json"), JSON.stringify(gapReport, null, 2));
|
|
692
|
-
// Enrich score-summary.json with failure mode data
|
|
693
|
-
const enrichedSummary = {
|
|
694
|
-
...scoreSummary,
|
|
695
|
-
failureModes: failureModeReport,
|
|
696
|
-
};
|
|
697
|
-
writeFileSync(scoreSummaryPath, JSON.stringify(enrichedSummary, null, 2));
|
|
698
|
-
const gapCount = gapReport.gaps.length;
|
|
699
|
-
const classRate = failureModeReport.classificationRate.toFixed(0);
|
|
700
|
-
steps["gap-analysis"] = {
|
|
701
|
-
durationMs: Date.now() - gaStart,
|
|
702
|
-
status: "success",
|
|
703
|
-
summary: `${failureModeReport.totalJudgments} judgments analyzed (${classRate}% classified), ${gapCount} actionable gaps identified`,
|
|
704
|
-
};
|
|
705
|
-
}
|
|
706
|
-
catch (err) {
|
|
707
|
-
steps["gap-analysis"] = {
|
|
708
|
-
durationMs: Date.now() - gaStart,
|
|
709
|
-
error: err instanceof Error ? err.message : String(err),
|
|
710
|
-
status: "failed",
|
|
711
|
-
};
|
|
712
|
-
}
|
|
713
|
-
}
|
|
714
|
-
printStepResult("gap-analysis", steps["gap-analysis"]);
|
|
715
|
-
console.log();
|
|
716
|
-
// Gap analysis failures are non-fatal — it's diagnostic, not blocking
|
|
717
|
-
if (steps["gap-analysis"].status === "failed") {
|
|
718
|
-
console.warn(" ⚠️ Gap analysis failed — continuing without diagnostic data\n");
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
// -----------------------------------------------------------------------
|
|
722
|
-
// Step 4c: Publish report to store + sinks (optional)
|
|
723
|
-
// -----------------------------------------------------------------------
|
|
724
|
-
if (publishEnabled) {
|
|
725
|
-
console.log("─── Step 4b: Publish Report ───────────────────────────────\n");
|
|
726
|
-
steps["publish-report"] = await runPublishReport(pipelineStart, {
|
|
727
|
-
promptfooUrl: resolvedPromptfooUrl,
|
|
728
|
-
reportDataset,
|
|
729
|
-
reportProjectId,
|
|
730
|
-
tag: publishTag,
|
|
731
|
-
});
|
|
732
|
-
printStepResult("publish-report", steps["publish-report"]);
|
|
733
|
-
console.log();
|
|
734
|
-
// Publish failures are non-fatal (P5: local-first)
|
|
735
|
-
if (steps["publish-report"].status === "failed") {
|
|
736
|
-
console.warn(" ⚠️ Publish failed — continuing with local results only\n");
|
|
737
|
-
}
|
|
738
|
-
}
|
|
739
|
-
// -----------------------------------------------------------------------
|
|
740
|
-
// Step 5: Generate report
|
|
741
|
-
// -----------------------------------------------------------------------
|
|
742
|
-
console.log("─── Step 5: Generate Report ───────────────────────────────\n");
|
|
743
|
-
steps["report"] = runReport(outputPath, resolvedPromptfooUrl);
|
|
744
|
-
printStepResult("report", steps["report"]);
|
|
745
|
-
console.log();
|
|
746
|
-
// -----------------------------------------------------------------------
|
|
747
|
-
// Step 5b: Compare against baseline (optional)
|
|
748
|
-
// -----------------------------------------------------------------------
|
|
749
|
-
if (compareEnabled) {
|
|
750
|
-
console.log("─── Step 5b: Compare Against Baseline ─────────────────────\n");
|
|
751
|
-
// Resolve --before option to a baseline file path
|
|
752
|
-
let resolvedBaseline = compareBaseline;
|
|
753
|
-
if (beforeOption && !resolvedBaseline) {
|
|
754
|
-
if (beforeOption === "latest-baseline") {
|
|
755
|
-
// Use the most recent baseline file (compare step handles this by default)
|
|
756
|
-
resolvedBaseline = undefined;
|
|
757
|
-
}
|
|
758
|
-
else if (beforeOption === "published" ||
|
|
759
|
-
beforeOption === "production") {
|
|
760
|
-
// TODO (Phase 2b full): Run before-evaluation with the specified source.
|
|
761
|
-
// For now, fall back to latest baseline and log a message.
|
|
762
|
-
console.log(` ℹ️ --before ${beforeOption}: full before/after orchestration not yet implemented.`);
|
|
763
|
-
console.log(" Falling back to comparison against latest baseline.\n");
|
|
764
|
-
resolvedBaseline = undefined;
|
|
765
|
-
}
|
|
766
|
-
else if (existsSync(beforeOption)) {
|
|
767
|
-
// Treat as a path to a score-summary.json file
|
|
768
|
-
resolvedBaseline = beforeOption;
|
|
769
|
-
console.log(` 📂 Using before-state from file: ${resolvedBaseline}\n`);
|
|
770
|
-
}
|
|
771
|
-
else {
|
|
772
|
-
console.warn(` ⚠️ --before "${beforeOption}" is not a recognized option or file path.`);
|
|
773
|
-
console.warn(" Expected: published, production, latest-baseline, or a file path.\n");
|
|
774
|
-
}
|
|
775
|
-
}
|
|
776
|
-
steps["compare"] = runCompare(ROOT, resolvedBaseline, {
|
|
777
|
-
noiseThreshold: compareThreshold,
|
|
778
|
-
});
|
|
779
|
-
printStepResult("compare", steps["compare"]);
|
|
780
|
-
// If --changed-docs was provided and comparison succeeded, run attribution
|
|
781
|
-
if (steps["compare"].status === "success" &&
|
|
782
|
-
changedDocsOption &&
|
|
783
|
-
impactSummary) {
|
|
784
|
-
const comparisonReportPath = resolve(ROOT, "results", "latest", "comparison-report.json");
|
|
785
|
-
if (existsSync(comparisonReportPath)) {
|
|
786
|
-
try {
|
|
787
|
-
const { attributeChanges, formatAttributionConsole } = await import("../pipeline/attribution.js");
|
|
788
|
-
const { resolveMappings } = await import("../pipeline/resolve-mappings.js");
|
|
789
|
-
const comparisonReport = JSON.parse(readFileSync(comparisonReportPath, "utf-8"));
|
|
790
|
-
const mappings = resolveMappings(ROOT);
|
|
791
|
-
const changedSlugs = changedDocsOption
|
|
792
|
-
.split(",")
|
|
793
|
-
.map((s) => s.trim())
|
|
794
|
-
.filter(Boolean);
|
|
795
|
-
const attribution = attributeChanges(comparisonReport, changedSlugs, mappings, comparisonReport.noiseThreshold);
|
|
796
|
-
// Write attribution to comparison report
|
|
797
|
-
const enrichedReport = { ...comparisonReport, attribution };
|
|
798
|
-
writeFileSync(comparisonReportPath, JSON.stringify(enrichedReport, null, 2));
|
|
799
|
-
// Print attribution to console
|
|
800
|
-
console.log();
|
|
801
|
-
console.log(formatAttributionConsole(attribution));
|
|
802
|
-
}
|
|
803
|
-
catch (err) {
|
|
804
|
-
console.warn(` ⚠️ Attribution failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
805
|
-
}
|
|
806
|
-
}
|
|
807
|
-
}
|
|
808
|
-
console.log();
|
|
809
|
-
}
|
|
810
|
-
// -----------------------------------------------------------------------
|
|
811
|
-
// Step 6: Readiness report (optional, Phase 5b)
|
|
812
|
-
// -----------------------------------------------------------------------
|
|
813
|
-
if (readinessEnabled) {
|
|
814
|
-
console.log("─── Step 6b: Readiness Report ─────────────────────────────────\n");
|
|
815
|
-
const readinessStart = Date.now();
|
|
816
|
-
try {
|
|
817
|
-
const { formatReadinessMarkdown, generateReadinessReport } = await import("../scripts/readiness-report.js");
|
|
818
|
-
const { ThresholdConfigSchema } = await import("../pipeline/schemas.js");
|
|
819
|
-
const scoreSummaryPath = resolve(ROOT, "results", "latest", "score-summary.json");
|
|
820
|
-
const thresholdsPath = resolve(ROOT, "config", "thresholds.yaml");
|
|
821
|
-
if (!existsSync(scoreSummaryPath)) {
|
|
822
|
-
steps["readiness"] = {
|
|
823
|
-
durationMs: Date.now() - readinessStart,
|
|
824
|
-
error: "score-summary.json not found",
|
|
825
|
-
status: "failed",
|
|
826
|
-
};
|
|
827
|
-
}
|
|
828
|
-
else if (!existsSync(thresholdsPath)) {
|
|
829
|
-
steps["readiness"] = {
|
|
830
|
-
durationMs: Date.now() - readinessStart,
|
|
831
|
-
error: "config/thresholds.yaml not found — required for readiness report",
|
|
832
|
-
status: "failed",
|
|
833
|
-
};
|
|
834
|
-
}
|
|
835
|
-
else {
|
|
836
|
-
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
837
|
-
const rawThresholds = load(readFileSync(thresholdsPath, "utf-8"));
|
|
838
|
-
const thresholdConfig = ThresholdConfigSchema.parse(rawThresholds);
|
|
839
|
-
// Load gap analysis if available
|
|
840
|
-
const gapPath = resolve(ROOT, "results", "latest", "gap-analysis.json");
|
|
841
|
-
const gapAnalysis = existsSync(gapPath)
|
|
842
|
-
? JSON.parse(readFileSync(gapPath, "utf-8"))
|
|
843
|
-
: undefined;
|
|
844
|
-
// Determine which areas to generate readiness for
|
|
845
|
-
const readinessAreas = areaOption
|
|
846
|
-
? areaOption.split(",").map((s) => s.trim())
|
|
847
|
-
: process.env.EVAL_FILTER_AREAS
|
|
848
|
-
? process.env.EVAL_FILTER_AREAS.split(",").map((s) => s.trim())
|
|
849
|
-
: scoreSummary.scores.map((s) => s.feature);
|
|
850
|
-
const readinessLines = [];
|
|
851
|
-
for (const area of readinessAreas) {
|
|
852
|
-
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
853
|
-
if (!areaScore) {
|
|
854
|
-
console.warn(` ⚠️ Area "${area}" not found in scores — skipping`);
|
|
855
|
-
continue;
|
|
856
|
-
}
|
|
857
|
-
const report = generateReadinessReport({
|
|
858
|
-
area,
|
|
859
|
-
gapAnalysis,
|
|
860
|
-
scoreSummary,
|
|
861
|
-
thresholdConfig,
|
|
862
|
-
});
|
|
863
|
-
const md = formatReadinessMarkdown(report);
|
|
864
|
-
readinessLines.push(md);
|
|
865
|
-
console.log(md);
|
|
866
|
-
}
|
|
867
|
-
// Write combined readiness output
|
|
868
|
-
if (readinessLines.length > 0) {
|
|
869
|
-
const readinessOutPath = resolve(ROOT, "results", "latest", "readiness-report.md");
|
|
870
|
-
writeFileSync(readinessOutPath, readinessLines.join("\n---\n\n"));
|
|
871
|
-
}
|
|
872
|
-
const passCount = readinessAreas.filter((area) => {
|
|
873
|
-
const areaScore = scoreSummary.scores.find((s) => s.feature === area);
|
|
874
|
-
if (!areaScore)
|
|
875
|
-
return false;
|
|
876
|
-
const report = generateReadinessReport({
|
|
877
|
-
area,
|
|
878
|
-
scoreSummary,
|
|
879
|
-
thresholdConfig,
|
|
880
|
-
});
|
|
881
|
-
return report.pass;
|
|
882
|
-
}).length;
|
|
883
|
-
steps["readiness"] = {
|
|
884
|
-
durationMs: Date.now() - readinessStart,
|
|
885
|
-
status: "success",
|
|
886
|
-
summary: `${passCount}/${readinessAreas.length} areas ready`,
|
|
887
|
-
};
|
|
888
|
-
}
|
|
889
|
-
}
|
|
890
|
-
catch (err) {
|
|
891
|
-
steps["readiness"] = {
|
|
892
|
-
durationMs: Date.now() - readinessStart,
|
|
893
|
-
error: err instanceof Error ? err.message : String(err),
|
|
894
|
-
status: "failed",
|
|
895
|
-
};
|
|
896
|
-
}
|
|
897
|
-
printStepResult("readiness", steps["readiness"]);
|
|
898
|
-
console.log();
|
|
899
|
-
}
|
|
900
|
-
// -----------------------------------------------------------------------
|
|
901
|
-
// Step 6c: Discovery report (optional, Phase 5c)
|
|
902
|
-
// -----------------------------------------------------------------------
|
|
903
|
-
if (discoveryReportEnabled) {
|
|
904
|
-
console.log("─── Step 6c: Discovery Report ─────────────────────────────────\n");
|
|
905
|
-
const discoveryStart = Date.now();
|
|
906
|
-
try {
|
|
907
|
-
const { formatDiscoveryMarkdown, generateDiscoveryReport } = await import("../scripts/discovery-report.js");
|
|
908
|
-
const scoreSummaryPath = resolve(ROOT, "results", "latest", "score-summary.json");
|
|
909
|
-
if (!existsSync(scoreSummaryPath)) {
|
|
910
|
-
steps["discovery-report"] = {
|
|
911
|
-
durationMs: Date.now() - discoveryStart,
|
|
912
|
-
error: "score-summary.json not found",
|
|
913
|
-
status: "failed",
|
|
914
|
-
};
|
|
915
|
-
}
|
|
916
|
-
else {
|
|
917
|
-
const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
|
|
918
|
-
if (!scoreSummary.retrievalMetrics) {
|
|
919
|
-
steps["discovery-report"] = {
|
|
920
|
-
reason: "No retrieval metrics in score summary — run an agentic evaluation first",
|
|
921
|
-
status: "skipped",
|
|
922
|
-
};
|
|
923
|
-
}
|
|
924
|
-
else {
|
|
925
|
-
const areaFilter = areaOption
|
|
926
|
-
? areaOption.split(",").map((s) => s.trim())
|
|
927
|
-
: undefined;
|
|
928
|
-
const report = generateDiscoveryReport(scoreSummary, areaFilter);
|
|
929
|
-
const md = formatDiscoveryMarkdown(report);
|
|
930
|
-
// Write to file
|
|
931
|
-
const discoveryOutPath = resolve(ROOT, "results", "latest", "discovery-report.md");
|
|
932
|
-
writeFileSync(discoveryOutPath, md);
|
|
933
|
-
console.log(md);
|
|
934
|
-
const invisible = report.invisibleDocs.length;
|
|
935
|
-
const f1 = report.overall.avgF1.toFixed(2);
|
|
936
|
-
steps["discovery-report"] = {
|
|
937
|
-
durationMs: Date.now() - discoveryStart,
|
|
938
|
-
status: "success",
|
|
939
|
-
summary: `F1=${f1}, ${invisible} invisible doc${invisible === 1 ? "" : "s"}, ${report.recommendations.length} recommendation${report.recommendations.length === 1 ? "" : "s"}`,
|
|
940
|
-
};
|
|
941
|
-
}
|
|
942
|
-
}
|
|
943
|
-
}
|
|
944
|
-
catch (err) {
|
|
945
|
-
steps["discovery-report"] = {
|
|
946
|
-
durationMs: Date.now() - discoveryStart,
|
|
947
|
-
error: err instanceof Error ? err.message : String(err),
|
|
948
|
-
status: "failed",
|
|
949
|
-
};
|
|
950
|
-
}
|
|
951
|
-
printStepResult("discovery-report", steps["discovery-report"]);
|
|
952
|
-
console.log();
|
|
953
|
-
}
|
|
954
|
-
return buildResult(pipelineStart, steps, validation, cacheStats, resolvedPromptfooUrl);
|
|
955
|
-
}
|
|
956
|
-
function stepIcon(result) {
|
|
957
|
-
switch (result.status) {
|
|
958
|
-
case "failed":
|
|
959
|
-
return "❌";
|
|
960
|
-
case "skipped":
|
|
961
|
-
return "⏭️";
|
|
962
|
-
case "success":
|
|
963
|
-
return "✅";
|
|
964
|
-
}
|
|
965
|
-
}
|
|
966
|
-
/**
|
|
967
|
-
* Track cache hit/miss stats for a step based on its result summary.
|
|
968
|
-
* Steps that return "Skipped (cached)" in their summary are cache hits.
|
|
969
|
-
*/
|
|
970
|
-
function trackCacheStats(stats, stepName, result, noCacheFlag) {
|
|
971
|
-
if (result.status === "skipped") {
|
|
972
|
-
stats.steps[stepName] = "skipped";
|
|
973
|
-
// skipped count already incremented at skip site
|
|
974
|
-
return;
|
|
975
|
-
}
|
|
976
|
-
if (noCacheFlag) {
|
|
977
|
-
stats.total++;
|
|
978
|
-
stats.misses++;
|
|
979
|
-
stats.steps[stepName] = "disabled";
|
|
980
|
-
return;
|
|
981
|
-
}
|
|
982
|
-
stats.total++;
|
|
983
|
-
if (result.status === "success" &&
|
|
984
|
-
result.summary.startsWith("Skipped (cached)")) {
|
|
985
|
-
stats.hits++;
|
|
986
|
-
stats.steps[stepName] = "hit";
|
|
987
|
-
}
|
|
988
|
-
else {
|
|
989
|
-
stats.misses++;
|
|
990
|
-
stats.steps[stepName] = "miss";
|
|
991
|
-
}
|
|
992
|
-
}
|
|
993
|
-
// ---------------------------------------------------------------------------
|
|
994
|
-
// Main
|
|
995
|
-
// ---------------------------------------------------------------------------
|
|
996
|
-
const result = await runPipeline();
|
|
997
|
-
// Print summary
|
|
998
|
-
console.log("═══════════════════════════════════════════════════════════════\n");
|
|
999
|
-
if (result.success) {
|
|
1000
|
-
console.log(` ✅ Pipeline completed successfully (${formatDuration(result.durationMs)})`);
|
|
1001
|
-
}
|
|
1002
|
-
else {
|
|
1003
|
-
console.log(` ❌ Pipeline failed (${formatDuration(result.durationMs)})`);
|
|
1004
|
-
}
|
|
1005
|
-
console.log();
|
|
1006
|
-
console.log(" Steps:");
|
|
1007
|
-
for (const [name, step] of Object.entries(result.steps)) {
|
|
1008
|
-
printStepResult(name, step);
|
|
1009
|
-
}
|
|
1010
|
-
if (result.cache && result.cache.total > 0) {
|
|
1011
|
-
const c = result.cache;
|
|
1012
|
-
const parts = [];
|
|
1013
|
-
if (c.hits > 0)
|
|
1014
|
-
parts.push(`${c.hits} cached`);
|
|
1015
|
-
if (c.misses > 0)
|
|
1016
|
-
parts.push(`${c.misses} evaluated`);
|
|
1017
|
-
if (c.skipped > 0)
|
|
1018
|
-
parts.push(`${c.skipped} skipped`);
|
|
1019
|
-
console.log(`\n 📦 Cache: ${parts.join(", ")} (${c.total} total steps)`);
|
|
1020
|
-
}
|
|
1021
|
-
console.log();
|
|
1022
|
-
// Write machine-readable result for CI consumption
|
|
1023
|
-
const resultFile = resolve(ROOT, "results", "latest", "pipeline-result.json");
|
|
1024
|
-
try {
|
|
1025
|
-
writeFileSync(resultFile, JSON.stringify(result, null, 2));
|
|
1026
|
-
console.log(` 📄 Pipeline result: ${resultFile}\n`);
|
|
1027
|
-
}
|
|
1028
|
-
catch {
|
|
1029
|
-
// results/latest/ may not exist yet on first run — not critical
|
|
1030
|
-
}
|
|
1031
|
-
process.exit(result.success ? 0 : 1);
|