@sanity/ailf 2.7.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/_vendor/ailf-core/artifact-registry.d.ts +72 -0
  2. package/dist/_vendor/ailf-core/artifact-registry.js +150 -0
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
  4. package/dist/_vendor/ailf-core/examples/index.js +1 -1
  5. package/dist/_vendor/ailf-core/index.d.ts +2 -1
  6. package/dist/_vendor/ailf-core/index.js +2 -1
  7. package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +3 -3
  8. package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +56 -0
  9. package/dist/_vendor/ailf-core/ports/artifact-writer.js +28 -0
  10. package/dist/_vendor/ailf-core/ports/context.d.ts +13 -3
  11. package/dist/_vendor/ailf-core/ports/index.d.ts +3 -3
  12. package/dist/_vendor/ailf-core/ports/index.js +1 -1
  13. package/dist/_vendor/ailf-core/types/branded-ids.d.ts +9 -0
  14. package/dist/_vendor/ailf-core/types/branded-ids.js +21 -0
  15. package/dist/_vendor/ailf-core/types/index.d.ts +117 -70
  16. package/dist/_vendor/ailf-core/types/index.js +1 -1
  17. package/dist/_vendor/ailf-shared/index.d.ts +2 -0
  18. package/dist/_vendor/ailf-shared/index.js +2 -0
  19. package/dist/_vendor/ailf-shared/run-context.d.ts +55 -0
  20. package/dist/_vendor/ailf-shared/run-context.js +17 -0
  21. package/dist/_vendor/ailf-shared/run-trigger.d.ts +30 -0
  22. package/dist/_vendor/ailf-shared/run-trigger.js +13 -0
  23. package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +39 -0
  24. package/dist/artifact-capture/api-gateway-artifact-writer.js +148 -0
  25. package/dist/artifact-capture/gcs-artifact-writer.d.ts +30 -0
  26. package/dist/artifact-capture/gcs-artifact-writer.js +119 -0
  27. package/dist/commands/init.js +2 -6
  28. package/dist/commands/publish.js +3 -2
  29. package/dist/composition-root.d.ts +3 -3
  30. package/dist/composition-root.js +20 -15
  31. package/dist/orchestration/build-step-sequence.js +6 -1
  32. package/dist/orchestration/steps/calculate-scores-step.js +42 -2
  33. package/dist/orchestration/steps/finalize-run-step.d.ts +29 -0
  34. package/dist/orchestration/steps/finalize-run-step.js +103 -0
  35. package/dist/orchestration/steps/publish-report-step.js +25 -27
  36. package/dist/pipeline/calculate-scores.js +13 -2
  37. package/dist/pipeline/provenance.d.ts +24 -44
  38. package/dist/pipeline/provenance.js +17 -165
  39. package/dist/pipeline/report-title.d.ts +2 -2
  40. package/dist/pipeline/run-context.d.ts +57 -0
  41. package/dist/pipeline/run-context.js +156 -0
  42. package/dist/pipeline/upload-test-outputs.d.ts +26 -0
  43. package/dist/pipeline/upload-test-outputs.js +34 -0
  44. package/dist/report-store.js +4 -2
  45. package/package.json +1 -1
  46. package/dist/_vendor/ailf-core/ports/artifact-uploader.d.ts +0 -35
  47. package/dist/_vendor/ailf-core/ports/artifact-uploader.js +0 -18
  48. package/dist/artifact-capture/api-gateway-artifact-uploader.d.ts +0 -41
  49. package/dist/artifact-capture/api-gateway-artifact-uploader.js +0 -123
  50. package/dist/artifact-capture/gcs-report-artifact-uploader.d.ts +0 -31
  51. package/dist/artifact-capture/gcs-report-artifact-uploader.js +0 -66
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Artifact registry — single source of truth for AILF's external artifact types.
3
+ *
4
+ * Every artifact that lives in GCS declares itself here exactly once:
5
+ * layout, path builder, entry schema, and (for per-entry layouts) key parser.
6
+ * Eval writers, the API Gateway's signing endpoint, and the Studio hook all
7
+ * consume this same record.
8
+ *
9
+ * Adding a new artifact type = one entry here. No call-site changes needed in
10
+ * the generic writer / signer / hook — they all iterate the registry.
11
+ *
12
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
13
+ * @see docs/design-docs/run-artifact-store.md (§ Move 4 — Artifact Registry)
14
+ */
15
+ import { z } from "zod";
16
+ import type { RunId } from "./types/branded-ids.js";
17
+ /** Layouts supported by the artifact store. */
18
+ export type ArtifactLayout = "bulk" | "per-entry";
19
+ /** The union of every artifact type known to AILF. */
20
+ export type ArtifactType = "testOutputs" | "renderedPrompts" | "rawResults" | "graderPrompts" | "taskDefinitions" | "evalResults" | "traces";
21
+ /**
22
+ * Result of parsing a per-entry key into a sanitized filename component.
23
+ * Success carries the sanitized value; failure carries a reason for 4xx responses.
24
+ */
25
+ export type ParsedEntryKey = {
26
+ ok: true;
27
+ sanitized: string;
28
+ } | {
29
+ ok: false;
30
+ reason: string;
31
+ };
32
+ /**
33
+ * Per-type declaration consumed by writers, signers, and readers.
34
+ *
35
+ * @typeParam TEntry - The shape of a single entry. For bulk layouts this is
36
+ * the shape of each value in the bulk object's index; for
37
+ * per-entry layouts it's the shape of a single GCS object.
38
+ */
39
+ export interface ArtifactDescriptor<TEntry = unknown> {
40
+ /** The artifact type identifier (matches the key in ARTIFACT_REGISTRY). */
41
+ type: ArtifactType;
42
+ /** Bulk (one object per run) or per-entry (one object per entryKey). */
43
+ layout: ArtifactLayout;
44
+ /** Kebab-case filename stem. Used by both bulk paths and per-entry dir names. */
45
+ slug: string;
46
+ /** Zod schema for validating a single entry. */
47
+ entrySchema: z.ZodType<TEntry>;
48
+ /**
49
+ * Build the GCS object path for this artifact.
50
+ * - bulk: returns `runs/{runId}/{slug}.json`; `entryKey` is ignored.
51
+ * - per-entry: requires `entryKey`; returns `runs/{runId}/{slug}/{sanitized}.json`.
52
+ */
53
+ objectPath: (runId: RunId, entryKey?: string) => string;
54
+ /**
55
+ * Validate a per-entry key and return its sanitized filename component.
56
+ * Only meaningful for `layout === "per-entry"` — unused when layout is bulk,
57
+ * but may be pre-declared so a future layout flip is a one-line change.
58
+ */
59
+ parseEntryKey?: (key: string) => ParsedEntryKey;
60
+ }
61
+ /**
62
+ * The canonical artifact descriptor for every artifact type. Iterate with
63
+ * `Object.values(ARTIFACT_REGISTRY)` or look up by `ARTIFACT_REGISTRY[type]`.
64
+ */
65
+ export declare const ARTIFACT_REGISTRY: Record<ArtifactType, ArtifactDescriptor>;
66
+ /** All artifact types in declaration order. */
67
+ export declare const ARTIFACT_TYPES: readonly ArtifactType[];
68
+ /**
69
+ * Type guard — validates that an arbitrary string is a known artifact type.
70
+ * Useful at API Gateway boundaries where the type comes from a URL parameter.
71
+ */
72
+ export declare function isArtifactType(value: string): value is ArtifactType;
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Artifact registry — single source of truth for AILF's external artifact types.
3
+ *
4
+ * Every artifact that lives in GCS declares itself here exactly once:
5
+ * layout, path builder, entry schema, and (for per-entry layouts) key parser.
6
+ * Eval writers, the API Gateway's signing endpoint, and the Studio hook all
7
+ * consume this same record.
8
+ *
9
+ * Adding a new artifact type = one entry here. No call-site changes needed in
10
+ * the generic writer / signer / hook — they all iterate the registry.
11
+ *
12
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
13
+ * @see docs/design-docs/run-artifact-store.md (§ Move 4 — Artifact Registry)
14
+ */
15
+ import { z } from "zod";
16
+ // ---------------------------------------------------------------------------
17
+ // Path + key helpers
18
+ // ---------------------------------------------------------------------------
19
+ function bulkPath(slug) {
20
+ return (runId) => `runs/${runId}/${slug}.json`;
21
+ }
22
+ function perEntryPath(slug) {
23
+ return (runId, entryKey) => {
24
+ if (entryKey === undefined) {
25
+ throw new Error(`Artifact "${slug}" uses per-entry layout; an entry key is required`);
26
+ }
27
+ const sanitized = sanitizeEntryKey(entryKey);
28
+ return `runs/${runId}/${slug}/${sanitized}.json`;
29
+ };
30
+ }
31
+ /**
32
+ * Convert an entry key (wire format, e.g. `{taskId}::{modelId}`) to a
33
+ * filename-safe component.
34
+ *
35
+ * - `::` → `--` so the wire separator doesn't show up in the filename.
36
+ * - `/` → `_` so task names like "Content Lake with @sanity/client" don't
37
+ * create unintended GCS subdirectories (`.../test-outputs/@sanity/client…`)
38
+ * and so `ls` against the per-entry directory shows one row per entry.
39
+ *
40
+ * Single colons (`:`) are preserved — modelIds like
41
+ * `anthropic:messages:claude-opus-4-6` are valid GCS object names.
42
+ *
43
+ * NOTE: this mapping is not bijective. A taskId containing literal `--`
44
+ * combined with a modelId could in theory collide with one whose taskId
45
+ * contains `::`, and `_` collides with `/`. In practice, production
46
+ * taskIds don't exercise these combinations. If collision-safety becomes a
47
+ * concern (e.g., user-provided free-form task names), switch to
48
+ * percent-encoding or a hash-based scheme at the key boundary.
49
+ */
50
+ function sanitizeEntryKey(key) {
51
+ return key.replace(/::/g, "--").replace(/\//g, "_");
52
+ }
53
+ /**
54
+ * Entry-key parser for artifacts keyed by `{taskId}::{modelId}` — testOutputs
55
+ * today, other per-entry types in future.
56
+ *
57
+ * The separator is `::` (double colon). Either segment may contain single
58
+ * colons: production model ids commonly look like
59
+ * `anthropic:messages:claude-opus-4-6`. The constraint is that `::` must
60
+ * appear exactly once and neither segment is empty, so the API Gateway can
61
+ * return 400 on malformed input.
62
+ */
63
+ function parseTaskModelKey(key) {
64
+ const parts = key.split("::");
65
+ if (parts.length !== 2 || !parts[0] || !parts[1]) {
66
+ return {
67
+ ok: false,
68
+ reason: `Entry key "${key}" must match {taskId}::{modelId} with exactly one "::" separator and non-empty segments`,
69
+ };
70
+ }
71
+ return { ok: true, sanitized: sanitizeEntryKey(key) };
72
+ }
73
+ // ---------------------------------------------------------------------------
74
+ // Entry schemas
75
+ // ---------------------------------------------------------------------------
76
+ const testOutputEntrySchema = z.object({
77
+ responseOutput: z.string(),
78
+ responseOutputTruncated: z.boolean(),
79
+ });
80
+ // Aspirational: renderedPrompts / rawResults / traces / etc. currently have
81
+ // loose shapes. Tighten per-type as consumers stabilize.
82
+ const unknownEntry = z.unknown();
83
+ // ---------------------------------------------------------------------------
84
+ // The registry
85
+ // ---------------------------------------------------------------------------
86
+ /**
87
+ * The canonical artifact descriptor for every artifact type. Iterate with
88
+ * `Object.values(ARTIFACT_REGISTRY)` or look up by `ARTIFACT_REGISTRY[type]`.
89
+ */
90
+ export const ARTIFACT_REGISTRY = {
91
+ testOutputs: {
92
+ type: "testOutputs",
93
+ layout: "per-entry",
94
+ slug: "test-outputs",
95
+ entrySchema: testOutputEntrySchema,
96
+ objectPath: perEntryPath("test-outputs"),
97
+ parseEntryKey: parseTaskModelKey,
98
+ },
99
+ renderedPrompts: {
100
+ type: "renderedPrompts",
101
+ layout: "bulk",
102
+ slug: "rendered-prompts",
103
+ entrySchema: unknownEntry,
104
+ objectPath: bulkPath("rendered-prompts"),
105
+ },
106
+ rawResults: {
107
+ type: "rawResults",
108
+ layout: "bulk",
109
+ slug: "raw-results",
110
+ entrySchema: unknownEntry,
111
+ objectPath: bulkPath("raw-results"),
112
+ },
113
+ graderPrompts: {
114
+ type: "graderPrompts",
115
+ layout: "bulk",
116
+ slug: "grader-prompts",
117
+ entrySchema: unknownEntry,
118
+ objectPath: bulkPath("grader-prompts"),
119
+ },
120
+ taskDefinitions: {
121
+ type: "taskDefinitions",
122
+ layout: "bulk",
123
+ slug: "task-definitions",
124
+ entrySchema: unknownEntry,
125
+ objectPath: bulkPath("task-definitions"),
126
+ },
127
+ evalResults: {
128
+ type: "evalResults",
129
+ layout: "bulk",
130
+ slug: "eval-results",
131
+ entrySchema: unknownEntry,
132
+ objectPath: bulkPath("eval-results"),
133
+ },
134
+ traces: {
135
+ type: "traces",
136
+ layout: "bulk",
137
+ slug: "traces",
138
+ entrySchema: unknownEntry,
139
+ objectPath: bulkPath("traces"),
140
+ },
141
+ };
142
+ /** All artifact types in declaration order. */
143
+ export const ARTIFACT_TYPES = Object.keys(ARTIFACT_REGISTRY);
144
+ /**
145
+ * Type guard — validates that an arbitrary string is a known artifact type.
146
+ * Useful at API Gateway boundaries where the type comes from a URL parameter.
147
+ */
148
+ export function isArtifactType(value) {
149
+ return value in ARTIFACT_REGISTRY;
150
+ }
@@ -433,6 +433,6 @@ export interface ExampleRecord {
433
433
  }
434
434
  export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
435
435
  /** GitHub Actions workflow template for AI Literacy evaluation */
436
- export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n# NPM_TOKEN \u2014 npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Configure npm for @sanity scope\n run:\n echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n ~/.npmrc\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
436
+ export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
437
437
  /** TypeScript project configuration template (ailf.config.ts) */
438
438
  export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
@@ -630,7 +630,7 @@ export const EXAMPLES = {
630
630
  // Raw file exports (non-data files, exported as raw strings)
631
631
  // ---------------------------------------------------------------------------
632
632
  /** GitHub Actions workflow template for AI Literacy evaluation */
633
- export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n# NPM_TOKEN npm token with read access to @sanity scope\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Configure npm for @sanity scope\n run:\n echo \"//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}\" >>\n ~/.npmrc\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
633
+ export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
634
634
  // ---------------------------------------------------------------------------
635
635
  // TypeScript template exports (for ailf init --output-format ts)
636
636
  // ---------------------------------------------------------------------------
@@ -15,8 +15,9 @@ export * from "./schemas/index.js";
15
15
  export * from "./ports/index.js";
16
16
  export * from "./services/index.js";
17
17
  export * from "./examples/index.js";
18
+ export * from "./artifact-registry.js";
18
19
  export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
19
20
  export type { PricingEntry, PromptEntry, SourceEntry, } from "./config-helpers.js";
20
21
  export { env } from "./env-helper.js";
21
22
  export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";
22
- export { NoOpArtifactUploader } from "./ports/artifact-uploader.js";
23
+ export { NoOpArtifactWriter } from "./ports/artifact-writer.js";
@@ -15,10 +15,11 @@ export * from "./schemas/index.js";
15
15
  export * from "./ports/index.js";
16
16
  export * from "./services/index.js";
17
17
  export * from "./examples/index.js";
18
+ export * from "./artifact-registry.js";
18
19
  // ---------------------------------------------------------------------------
19
20
  // Architecture overhaul — Phase 0 helpers
20
21
  // ---------------------------------------------------------------------------
21
22
  export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
22
23
  export { env } from "./env-helper.js";
23
24
  export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";
24
- export { NoOpArtifactUploader } from "./ports/artifact-uploader.js";
25
+ export { NoOpArtifactWriter } from "./ports/artifact-writer.js";
@@ -62,7 +62,7 @@ export interface CaptureFlushResult {
62
62
  compressed: boolean;
63
63
  }
64
64
  /** A single entry in the capture manifest. */
65
- export interface ArtifactManifestEntry {
65
+ export interface CaptureManifestEntry {
66
66
  /** Pipeline step that produced this artifact */
67
67
  step: string;
68
68
  /** Artifact type identifier */
@@ -79,7 +79,7 @@ export interface ArtifactManifestEntry {
79
79
  meta?: Record<string, unknown>;
80
80
  }
81
81
  /** The manifest.json written to each capture directory. */
82
- export interface ArtifactManifest {
82
+ export interface CaptureManifest {
83
83
  version: 1;
84
84
  captureId: string;
85
85
  startedAt: string;
@@ -90,5 +90,5 @@ export interface ArtifactManifest {
90
90
  source?: string;
91
91
  areas?: string[];
92
92
  };
93
- artifacts: ArtifactManifestEntry[];
93
+ artifacts: CaptureManifestEntry[];
94
94
  }
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Port: ArtifactWriter — writes run artifacts + the run manifest to external storage.
3
+ *
4
+ * Replaces the older `ArtifactUploader` port from D0030. Differences:
5
+ * - Paths anchor to `RunId` (not `ReportId`) via the registry's `objectPath`.
6
+ * - Supports both `"bulk"` and `"per-entry"` layouts.
7
+ * - A dedicated `writeManifest()` method for the run manifest at
8
+ * `runs/{runId}/manifest.json`.
9
+ *
10
+ * Producer steps call writer methods directly with the artifact type from
11
+ * `ARTIFACT_REGISTRY`. Path construction, schema validation, and entry-key
12
+ * sanitization live in the registry, not the call site.
13
+ *
14
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
15
+ * @see packages/core/src/artifact-registry.ts
16
+ */
17
+ import type { ArtifactType } from "../artifact-registry.js";
18
+ import type { RunId } from "../types/branded-ids.js";
19
+ import type { ArtifactRef, RunManifest } from "../types/index.js";
20
+ /**
21
+ * An entry in a per-entry upload. The `key` is the wire-format identifier
22
+ * (e.g. `{taskId}::{modelId}` for testOutputs); the writer sanitizes it into
23
+ * the filename using the registry's `parseEntryKey`.
24
+ */
25
+ export interface ArtifactEntry<TData = unknown> {
26
+ key: string;
27
+ data: TData;
28
+ }
29
+ export interface ArtifactWriter {
30
+ /**
31
+ * Write a bulk artifact — one JSON object per (runId, type).
32
+ *
33
+ * @returns An `ArtifactRef` pointing at `runs/{runId}/{slug}.json`, or
34
+ * `null` when upload is skipped or fails (P5: non-blocking).
35
+ */
36
+ writeBulk(type: ArtifactType, runId: RunId, data: unknown): Promise<ArtifactRef | null>;
37
+ /**
38
+ * Write a per-entry artifact — one JSON object per entry, all under
39
+ * `runs/{runId}/{slug}/`.
40
+ *
41
+ * The returned `ArtifactRef.entries` inlines the catalog so consumers
42
+ * can render drill-down state without a second listing call.
43
+ */
44
+ writePerEntry(type: ArtifactType, runId: RunId, entries: readonly ArtifactEntry[]): Promise<ArtifactRef | null>;
45
+ /**
46
+ * Write the run manifest to `runs/{runId}/manifest.json`. Single-writer
47
+ * per run; subsequent publishes may rewrite to append `reportIds[]`.
48
+ */
49
+ writeManifest(runId: RunId, manifest: RunManifest): Promise<ArtifactRef | null>;
50
+ }
51
+ /** No-op writer — every method returns null. Used when no storage is configured. */
52
+ export declare class NoOpArtifactWriter implements ArtifactWriter {
53
+ writeBulk(): Promise<null>;
54
+ writePerEntry(): Promise<null>;
55
+ writeManifest(): Promise<null>;
56
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Port: ArtifactWriter — writes run artifacts + the run manifest to external storage.
3
+ *
4
+ * Replaces the older `ArtifactUploader` port from D0030. Differences:
5
+ * - Paths anchor to `RunId` (not `ReportId`) via the registry's `objectPath`.
6
+ * - Supports both `"bulk"` and `"per-entry"` layouts.
7
+ * - A dedicated `writeManifest()` method for the run manifest at
8
+ * `runs/{runId}/manifest.json`.
9
+ *
10
+ * Producer steps call writer methods directly with the artifact type from
11
+ * `ARTIFACT_REGISTRY`. Path construction, schema validation, and entry-key
12
+ * sanitization live in the registry, not the call site.
13
+ *
14
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
15
+ * @see packages/core/src/artifact-registry.ts
16
+ */
17
+ /** No-op writer — every method returns null. Used when no storage is configured. */
18
+ export class NoOpArtifactWriter {
19
+ async writeBulk() {
20
+ return null;
21
+ }
22
+ async writePerEntry() {
23
+ return null;
24
+ }
25
+ async writeManifest() {
26
+ return null;
27
+ }
28
+ }
@@ -11,9 +11,10 @@
11
11
  * Fields marked optional are transitional — they will become required
12
12
  * as downstream consumers are converted to use them.
13
13
  */
14
+ import type { RunId } from "../types/branded-ids.js";
14
15
  import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
15
16
  import type { ArtifactCollector } from "./artifact-collector.js";
16
- import type { ArtifactUploader } from "./artifact-uploader.js";
17
+ import type { ArtifactWriter } from "./artifact-writer.js";
17
18
  import type { CacheStore } from "./cache-store.js";
18
19
  import type { DocFetcher } from "./doc-fetcher.js";
19
20
  import type { EvalRunner } from "./eval-runner.js";
@@ -197,8 +198,8 @@ export interface ResolvedConfig {
197
198
  * Created per-test by createTestContext().
198
199
  */
199
200
  export interface AppContext {
200
- /** Report artifact uploader uploads structured files to GCS for Studio (D0030) */
201
- readonly artifactUploader?: ArtifactUploader;
201
+ /** Artifact writerwrites run artifacts + manifest to GCS (D0032) */
202
+ readonly artifactWriter?: ArtifactWriter;
202
203
  /** Evaluation caching (filesystem + optional Content Lake fallback) */
203
204
  readonly cache?: CacheStore;
204
205
  /** Artifact capture collector (no-op when --capture is not set) */
@@ -220,6 +221,15 @@ export interface AppContext {
220
221
  * require this field.
221
222
  */
222
223
  readonly reportStore?: ReportStorePort;
224
+ /**
225
+ * Identity for this pipeline run. Generated once at composition-root
226
+ * time; every step reads it from the context rather than regenerating.
227
+ * Artifacts anchor to this id (`gs://…/runs/{runId}/…`) regardless of
228
+ * whether the run eventually publishes a report.
229
+ *
230
+ * @see docs/decisions/D0032-run-anchored-artifact-store.md
231
+ */
232
+ readonly runId: RunId;
223
233
  /**
224
234
  * Report delivery sinks (Slack, BigQuery, webhooks).
225
235
  * Empty array when no sinks are configured.
@@ -4,9 +4,9 @@
4
4
  * Ports define the contracts between the domain kernel and the outside world.
5
5
  * Adapters (in packages/eval) implement these interfaces.
6
6
  */
7
- export type { ArtifactCollector, ArtifactManifest, ArtifactManifestEntry, CaptureFlushResult, } from "./artifact-collector.js";
8
- export type { ArtifactUploader } from "./artifact-uploader.js";
9
- export { NoOpArtifactUploader } from "./artifact-uploader.js";
7
+ export type { ArtifactCollector, CaptureFlushResult, CaptureManifest, CaptureManifestEntry, } from "./artifact-collector.js";
8
+ export type { ArtifactEntry, ArtifactWriter } from "./artifact-writer.js";
9
+ export { NoOpArtifactWriter } from "./artifact-writer.js";
10
10
  export type { ArtifactContentDiff, CaptureDiffReport, ComparisonMode, ComparisonOptions, InventoryDiff, JsonDiffEntry, MetadataComparison, ScoreComparison, SecurityScan, TimingComparison, } from "./capture-comparator.js";
11
11
  export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
12
12
  export type { ConfigSource } from "./config-source.js";
@@ -4,5 +4,5 @@
4
4
  * Ports define the contracts between the domain kernel and the outside world.
5
5
  * Adapters (in packages/eval) implement these interfaces.
6
6
  */
7
- export { NoOpArtifactUploader } from "./artifact-uploader.js";
7
+ export { NoOpArtifactWriter } from "./artifact-writer.js";
8
8
  export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
@@ -104,6 +104,15 @@ export declare function taskId(raw: string): Result<TaskId, IdValidationError>;
104
104
  * Valid format: `run_` prefix followed by alphanumeric characters.
105
105
  */
106
106
  export declare function runId(raw: string): Result<RunId, IdValidationError>;
107
+ /**
108
+ * Generate a new `RunId` using a time-sortable UUIDv7 payload.
109
+ *
110
+ * The `run_` prefix plus the hyphen-stripped UUIDv7 yields 36 characters
111
+ * that sort lexicographically by creation time — same pattern used for
112
+ * `ReportId`. One generator call per pipeline invocation; every step
113
+ * reads the resulting id from `AppContext.runId`.
114
+ */
115
+ export declare function generateRunId(): RunId;
107
116
  /**
108
117
  * Parse a raw string into a `SuiteId`.
109
118
  *
@@ -59,6 +59,27 @@ export function runId(raw) {
59
59
  }
60
60
  return ok(raw);
61
61
  }
62
+ /**
63
+ * Generate a new `RunId` using a time-sortable UUIDv7 payload.
64
+ *
65
+ * The `run_` prefix plus the hyphen-stripped UUIDv7 yields 36 characters
66
+ * that sort lexicographically by creation time — same pattern used for
67
+ * `ReportId`. One generator call per pipeline invocation; every step
68
+ * reads the resulting id from `AppContext.runId`.
69
+ */
70
+ export function generateRunId() {
71
+ const now = Date.now();
72
+ const uuid = crypto.randomUUID();
73
+ // UUID v7: encode 48-bit timestamp in the first 12 hex chars
74
+ const hex = now.toString(16).padStart(12, "0");
75
+ const v7 = hex.slice(0, 8) +
76
+ hex.slice(8, 12) +
77
+ "7" +
78
+ uuid.slice(15, 18) +
79
+ uuid.slice(19, 23) +
80
+ uuid.slice(24);
81
+ return `run_${v7}`;
82
+ }
62
83
  /**
63
84
  * Parse a raw string into a `SuiteId`.
64
85
  *