@sanity/ailf 2.7.0 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_vendor/ailf-core/artifact-registry.d.ts +72 -0
- package/dist/_vendor/ailf-core/artifact-registry.js +150 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +1 -1
- package/dist/_vendor/ailf-core/examples/index.js +1 -1
- package/dist/_vendor/ailf-core/index.d.ts +2 -1
- package/dist/_vendor/ailf-core/index.js +2 -1
- package/dist/_vendor/ailf-core/ports/artifact-collector.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/artifact-writer.d.ts +56 -0
- package/dist/_vendor/ailf-core/ports/artifact-writer.js +28 -0
- package/dist/_vendor/ailf-core/ports/context.d.ts +13 -3
- package/dist/_vendor/ailf-core/ports/index.d.ts +3 -3
- package/dist/_vendor/ailf-core/ports/index.js +1 -1
- package/dist/_vendor/ailf-core/types/branded-ids.d.ts +9 -0
- package/dist/_vendor/ailf-core/types/branded-ids.js +21 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +117 -70
- package/dist/_vendor/ailf-core/types/index.js +1 -1
- package/dist/_vendor/ailf-shared/index.d.ts +2 -0
- package/dist/_vendor/ailf-shared/index.js +2 -0
- package/dist/_vendor/ailf-shared/run-context.d.ts +55 -0
- package/dist/_vendor/ailf-shared/run-context.js +17 -0
- package/dist/_vendor/ailf-shared/run-trigger.d.ts +30 -0
- package/dist/_vendor/ailf-shared/run-trigger.js +13 -0
- package/dist/artifact-capture/api-gateway-artifact-writer.d.ts +39 -0
- package/dist/artifact-capture/api-gateway-artifact-writer.js +148 -0
- package/dist/artifact-capture/gcs-artifact-writer.d.ts +30 -0
- package/dist/artifact-capture/gcs-artifact-writer.js +119 -0
- package/dist/commands/init.js +2 -6
- package/dist/commands/publish.js +3 -2
- package/dist/composition-root.d.ts +3 -3
- package/dist/composition-root.js +20 -15
- package/dist/orchestration/build-step-sequence.js +6 -1
- package/dist/orchestration/steps/calculate-scores-step.js +42 -2
- package/dist/orchestration/steps/finalize-run-step.d.ts +29 -0
- package/dist/orchestration/steps/finalize-run-step.js +103 -0
- package/dist/orchestration/steps/publish-report-step.js +25 -27
- package/dist/pipeline/calculate-scores.js +13 -2
- package/dist/pipeline/provenance.d.ts +24 -44
- package/dist/pipeline/provenance.js +17 -165
- package/dist/pipeline/report-title.d.ts +2 -2
- package/dist/pipeline/run-context.d.ts +57 -0
- package/dist/pipeline/run-context.js +156 -0
- package/dist/pipeline/upload-test-outputs.d.ts +26 -0
- package/dist/pipeline/upload-test-outputs.js +34 -0
- package/dist/report-store.js +4 -2
- package/package.json +1 -1
- package/dist/_vendor/ailf-core/ports/artifact-uploader.d.ts +0 -35
- package/dist/_vendor/ailf-core/ports/artifact-uploader.js +0 -18
- package/dist/artifact-capture/api-gateway-artifact-uploader.d.ts +0 -41
- package/dist/artifact-capture/api-gateway-artifact-uploader.js +0 -123
- package/dist/artifact-capture/gcs-report-artifact-uploader.d.ts +0 -31
- package/dist/artifact-capture/gcs-report-artifact-uploader.js +0 -66
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Artifact registry — single source of truth for AILF's external artifact types.
|
|
3
|
+
*
|
|
4
|
+
* Every artifact that lives in GCS declares itself here exactly once:
|
|
5
|
+
* layout, path builder, entry schema, and (for per-entry layouts) key parser.
|
|
6
|
+
* Eval writers, the API Gateway's signing endpoint, and the Studio hook all
|
|
7
|
+
* consume this same record.
|
|
8
|
+
*
|
|
9
|
+
* Adding a new artifact type = one entry here. No call-site changes needed in
|
|
10
|
+
* the generic writer / signer / hook — they all iterate the registry.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
13
|
+
* @see docs/design-docs/run-artifact-store.md (§ Move 4 — Artifact Registry)
|
|
14
|
+
*/
|
|
15
|
+
import { z } from "zod";
|
|
16
|
+
import type { RunId } from "./types/branded-ids.js";
|
|
17
|
+
/** Layouts supported by the artifact store. */
|
|
18
|
+
export type ArtifactLayout = "bulk" | "per-entry";
|
|
19
|
+
/** The union of every artifact type known to AILF. */
|
|
20
|
+
export type ArtifactType = "testOutputs" | "renderedPrompts" | "rawResults" | "graderPrompts" | "taskDefinitions" | "evalResults" | "traces";
|
|
21
|
+
/**
|
|
22
|
+
* Result of parsing a per-entry key into a sanitized filename component.
|
|
23
|
+
* Success carries the sanitized value; failure carries a reason for 4xx responses.
|
|
24
|
+
*/
|
|
25
|
+
export type ParsedEntryKey = {
|
|
26
|
+
ok: true;
|
|
27
|
+
sanitized: string;
|
|
28
|
+
} | {
|
|
29
|
+
ok: false;
|
|
30
|
+
reason: string;
|
|
31
|
+
};
|
|
32
|
+
/**
|
|
33
|
+
* Per-type declaration consumed by writers, signers, and readers.
|
|
34
|
+
*
|
|
35
|
+
* @typeParam TEntry - The shape of a single entry. For bulk layouts this is
|
|
36
|
+
* the shape of each value in the bulk object's index; for
|
|
37
|
+
* per-entry layouts it's the shape of a single GCS object.
|
|
38
|
+
*/
|
|
39
|
+
export interface ArtifactDescriptor<TEntry = unknown> {
|
|
40
|
+
/** The artifact type identifier (matches the key in ARTIFACT_REGISTRY). */
|
|
41
|
+
type: ArtifactType;
|
|
42
|
+
/** Bulk (one object per run) or per-entry (one object per entryKey). */
|
|
43
|
+
layout: ArtifactLayout;
|
|
44
|
+
/** Kebab-case filename stem. Used by both bulk paths and per-entry dir names. */
|
|
45
|
+
slug: string;
|
|
46
|
+
/** Zod schema for validating a single entry. */
|
|
47
|
+
entrySchema: z.ZodType<TEntry>;
|
|
48
|
+
/**
|
|
49
|
+
* Build the GCS object path for this artifact.
|
|
50
|
+
* - bulk: returns `runs/{runId}/{slug}.json`; `entryKey` is ignored.
|
|
51
|
+
* - per-entry: requires `entryKey`; returns `runs/{runId}/{slug}/{sanitized}.json`.
|
|
52
|
+
*/
|
|
53
|
+
objectPath: (runId: RunId, entryKey?: string) => string;
|
|
54
|
+
/**
|
|
55
|
+
* Validate a per-entry key and return its sanitized filename component.
|
|
56
|
+
* Only meaningful for `layout === "per-entry"` — unused when layout is bulk,
|
|
57
|
+
* but may be pre-declared so a future layout flip is a one-line change.
|
|
58
|
+
*/
|
|
59
|
+
parseEntryKey?: (key: string) => ParsedEntryKey;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* The canonical artifact descriptor for every artifact type. Iterate with
|
|
63
|
+
* `Object.values(ARTIFACT_REGISTRY)` or look up by `ARTIFACT_REGISTRY[type]`.
|
|
64
|
+
*/
|
|
65
|
+
export declare const ARTIFACT_REGISTRY: Record<ArtifactType, ArtifactDescriptor>;
|
|
66
|
+
/** All artifact types in declaration order. */
|
|
67
|
+
export declare const ARTIFACT_TYPES: readonly ArtifactType[];
|
|
68
|
+
/**
|
|
69
|
+
* Type guard — validates that an arbitrary string is a known artifact type.
|
|
70
|
+
* Useful at API Gateway boundaries where the type comes from a URL parameter.
|
|
71
|
+
*/
|
|
72
|
+
export declare function isArtifactType(value: string): value is ArtifactType;
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Artifact registry — single source of truth for AILF's external artifact types.
|
|
3
|
+
*
|
|
4
|
+
* Every artifact that lives in GCS declares itself here exactly once:
|
|
5
|
+
* layout, path builder, entry schema, and (for per-entry layouts) key parser.
|
|
6
|
+
* Eval writers, the API Gateway's signing endpoint, and the Studio hook all
|
|
7
|
+
* consume this same record.
|
|
8
|
+
*
|
|
9
|
+
* Adding a new artifact type = one entry here. No call-site changes needed in
|
|
10
|
+
* the generic writer / signer / hook — they all iterate the registry.
|
|
11
|
+
*
|
|
12
|
+
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
13
|
+
* @see docs/design-docs/run-artifact-store.md (§ Move 4 — Artifact Registry)
|
|
14
|
+
*/
|
|
15
|
+
import { z } from "zod";
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Path + key helpers
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
function bulkPath(slug) {
|
|
20
|
+
return (runId) => `runs/${runId}/${slug}.json`;
|
|
21
|
+
}
|
|
22
|
+
function perEntryPath(slug) {
|
|
23
|
+
return (runId, entryKey) => {
|
|
24
|
+
if (entryKey === undefined) {
|
|
25
|
+
throw new Error(`Artifact "${slug}" uses per-entry layout; an entry key is required`);
|
|
26
|
+
}
|
|
27
|
+
const sanitized = sanitizeEntryKey(entryKey);
|
|
28
|
+
return `runs/${runId}/${slug}/${sanitized}.json`;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Convert an entry key (wire format, e.g. `{taskId}::{modelId}`) to a
|
|
33
|
+
* filename-safe component.
|
|
34
|
+
*
|
|
35
|
+
* - `::` → `--` so the wire separator doesn't show up in the filename.
|
|
36
|
+
* - `/` → `_` so task names like "Content Lake with @sanity/client" don't
|
|
37
|
+
* create unintended GCS subdirectories (`.../test-outputs/@sanity/client…`)
|
|
38
|
+
* and so `ls` against the per-entry directory shows one row per entry.
|
|
39
|
+
*
|
|
40
|
+
* Single colons (`:`) are preserved — modelIds like
|
|
41
|
+
* `anthropic:messages:claude-opus-4-6` are valid GCS object names.
|
|
42
|
+
*
|
|
43
|
+
* NOTE: this mapping is not bijective. A taskId containing literal `--`
|
|
44
|
+
* combined with a modelId could in theory collide with one whose taskId
|
|
45
|
+
* contains `::`, and `_` collides with `/`. In practice, production
|
|
46
|
+
* taskIds don't exercise these combinations. If collision-safety becomes a
|
|
47
|
+
* concern (e.g., user-provided free-form task names), switch to
|
|
48
|
+
* percent-encoding or a hash-based scheme at the key boundary.
|
|
49
|
+
*/
|
|
50
|
+
function sanitizeEntryKey(key) {
|
|
51
|
+
return key.replace(/::/g, "--").replace(/\//g, "_");
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Entry-key parser for artifacts keyed by `{taskId}::{modelId}` — testOutputs
|
|
55
|
+
* today, other per-entry types in future.
|
|
56
|
+
*
|
|
57
|
+
* The separator is `::` (double colon). Either segment may contain single
|
|
58
|
+
* colons: production model ids commonly look like
|
|
59
|
+
* `anthropic:messages:claude-opus-4-6`. The constraint is that `::` must
|
|
60
|
+
* appear exactly once and neither segment is empty, so the API Gateway can
|
|
61
|
+
* return 400 on malformed input.
|
|
62
|
+
*/
|
|
63
|
+
function parseTaskModelKey(key) {
|
|
64
|
+
const parts = key.split("::");
|
|
65
|
+
if (parts.length !== 2 || !parts[0] || !parts[1]) {
|
|
66
|
+
return {
|
|
67
|
+
ok: false,
|
|
68
|
+
reason: `Entry key "${key}" must match {taskId}::{modelId} with exactly one "::" separator and non-empty segments`,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
return { ok: true, sanitized: sanitizeEntryKey(key) };
|
|
72
|
+
}
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
// Entry schemas
|
|
75
|
+
// ---------------------------------------------------------------------------
|
|
76
|
+
const testOutputEntrySchema = z.object({
|
|
77
|
+
responseOutput: z.string(),
|
|
78
|
+
responseOutputTruncated: z.boolean(),
|
|
79
|
+
});
|
|
80
|
+
// Aspirational: renderedPrompts / rawResults / traces / etc. currently have
|
|
81
|
+
// loose shapes. Tighten per-type as consumers stabilize.
|
|
82
|
+
const unknownEntry = z.unknown();
|
|
83
|
+
// ---------------------------------------------------------------------------
|
|
84
|
+
// The registry
|
|
85
|
+
// ---------------------------------------------------------------------------
|
|
86
|
+
/**
|
|
87
|
+
* The canonical artifact descriptor for every artifact type. Iterate with
|
|
88
|
+
* `Object.values(ARTIFACT_REGISTRY)` or look up by `ARTIFACT_REGISTRY[type]`.
|
|
89
|
+
*/
|
|
90
|
+
export const ARTIFACT_REGISTRY = {
|
|
91
|
+
testOutputs: {
|
|
92
|
+
type: "testOutputs",
|
|
93
|
+
layout: "per-entry",
|
|
94
|
+
slug: "test-outputs",
|
|
95
|
+
entrySchema: testOutputEntrySchema,
|
|
96
|
+
objectPath: perEntryPath("test-outputs"),
|
|
97
|
+
parseEntryKey: parseTaskModelKey,
|
|
98
|
+
},
|
|
99
|
+
renderedPrompts: {
|
|
100
|
+
type: "renderedPrompts",
|
|
101
|
+
layout: "bulk",
|
|
102
|
+
slug: "rendered-prompts",
|
|
103
|
+
entrySchema: unknownEntry,
|
|
104
|
+
objectPath: bulkPath("rendered-prompts"),
|
|
105
|
+
},
|
|
106
|
+
rawResults: {
|
|
107
|
+
type: "rawResults",
|
|
108
|
+
layout: "bulk",
|
|
109
|
+
slug: "raw-results",
|
|
110
|
+
entrySchema: unknownEntry,
|
|
111
|
+
objectPath: bulkPath("raw-results"),
|
|
112
|
+
},
|
|
113
|
+
graderPrompts: {
|
|
114
|
+
type: "graderPrompts",
|
|
115
|
+
layout: "bulk",
|
|
116
|
+
slug: "grader-prompts",
|
|
117
|
+
entrySchema: unknownEntry,
|
|
118
|
+
objectPath: bulkPath("grader-prompts"),
|
|
119
|
+
},
|
|
120
|
+
taskDefinitions: {
|
|
121
|
+
type: "taskDefinitions",
|
|
122
|
+
layout: "bulk",
|
|
123
|
+
slug: "task-definitions",
|
|
124
|
+
entrySchema: unknownEntry,
|
|
125
|
+
objectPath: bulkPath("task-definitions"),
|
|
126
|
+
},
|
|
127
|
+
evalResults: {
|
|
128
|
+
type: "evalResults",
|
|
129
|
+
layout: "bulk",
|
|
130
|
+
slug: "eval-results",
|
|
131
|
+
entrySchema: unknownEntry,
|
|
132
|
+
objectPath: bulkPath("eval-results"),
|
|
133
|
+
},
|
|
134
|
+
traces: {
|
|
135
|
+
type: "traces",
|
|
136
|
+
layout: "bulk",
|
|
137
|
+
slug: "traces",
|
|
138
|
+
entrySchema: unknownEntry,
|
|
139
|
+
objectPath: bulkPath("traces"),
|
|
140
|
+
},
|
|
141
|
+
};
|
|
142
|
+
/** All artifact types in declaration order. */
|
|
143
|
+
export const ARTIFACT_TYPES = Object.keys(ARTIFACT_REGISTRY);
|
|
144
|
+
/**
|
|
145
|
+
* Type guard — validates that an arbitrary string is a known artifact type.
|
|
146
|
+
* Useful at API Gateway boundaries where the type comes from a URL parameter.
|
|
147
|
+
*/
|
|
148
|
+
export function isArtifactType(value) {
|
|
149
|
+
return value in ARTIFACT_REGISTRY;
|
|
150
|
+
}
|
|
@@ -433,6 +433,6 @@ export interface ExampleRecord {
|
|
|
433
433
|
}
|
|
434
434
|
export declare const EXAMPLES: Record<ExampleType, ExampleRecord>;
|
|
435
435
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
436
|
-
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#
|
|
436
|
+
export declare const workflowYaml = "# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n# AI Literacy Evaluation \u2014 GitHub Actions workflow\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings \u2192 Secrets \u2192 Actions):\n# AILF_API_KEY \u2014 your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## \u26A0\uFE0F AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet \u2014 everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?\u00B7\\s*([^\u00B7<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>\uD83D\uDCDC ${date} \u2014 ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### \uD83D\uDCDC Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## \u26A0\uFE0F AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
437
437
|
/** TypeScript project configuration template (ailf.config.ts) */
|
|
438
438
|
export declare const ailfConfigTs = "/**\n * .ailf/ailf.config.ts \u2014 AI Literacy Framework project configuration.\n *\n * This file configures how the AILF evaluation pipeline runs in this\n * repository. Place it at .ailf/ailf.config.ts in your project root.\n *\n * Evaluations are submitted to the AILF API (ailf-api.sanity.build).\n * The API handles LLM calls, doc fetching, grading, and report\n * publishing. Your repo only needs one secret: AILF_API_KEY.\n *\n * Docs: https://github.com/sanity-labs/ai-literacy-framework\n */\n\nexport default {\n /**\n * Documentation source \u2014 which docs are being evaluated.\n *\n * This tells the pipeline which Sanity project and dataset contain\n * the documentation under test. For most users, this is Sanity's own\n * docs project.\n */\n source: {\n /** Sanity project ID (find yours at sanity.io/manage) */\n projectId: \"3do82whm\",\n /** The dataset to query (e.g., \"production\", \"next\") */\n dataset: \"next\",\n /**\n * The public URL of your documentation site.\n * Used by agentic mode to test agent discoverability.\n */\n baseUrl: \"https://www.sanity.io/docs\",\n },\n\n /**\n * Trigger configuration \u2014 when evaluations run automatically.\n *\n * Each key is a trigger context. The pipeline checks which trigger\n * matches the current execution context (PR, merge, schedule, etc.)\n * and applies its settings.\n *\n * Mode options:\n * \"validate-only\" \u2014 check that task files parse correctly (fast, no LLM calls)\n * \"eval\" \u2014 run the full evaluation pipeline\n */\n triggers: {\n /** On pull requests: just validate task files parse correctly. */\n pr: {\n mode: \"validate-only\",\n },\n\n /** When .ailf/ files change in a PR: run a real evaluation. */\n \"pr-task-change\": {\n mode: \"eval\",\n paths: [\".ailf/**\"],\n },\n\n /** On merge to main: run evaluation (non-blocking). */\n main: {\n mode: \"eval\",\n blocking: false,\n notify: true,\n },\n },\n}\n";
|
|
@@ -630,7 +630,7 @@ export const EXAMPLES = {
|
|
|
630
630
|
// Raw file exports (non-data files, exported as raw strings)
|
|
631
631
|
// ---------------------------------------------------------------------------
|
|
632
632
|
/** GitHub Actions workflow template for AI Literacy evaluation */
|
|
633
|
-
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#
|
|
633
|
+
export const workflowYaml = "# ──────────────────────────────────────────────────────────────────────\n# AI Literacy Evaluation — GitHub Actions workflow\n# ──────────────────────────────────────────────────────────────────────\n#\n# Evaluates your documentation quality on every pull request.\n# The AILF CLI reads your .ailf/tasks/ definitions, submits them\n# to the AILF API for evaluation, and writes a score report.\n#\n# Prerequisites:\n# Add one secret to your repository (Settings → Secrets → Actions):\n# AILF_API_KEY — your API key (starts with ailf_live_sk_)\n#\n# @sanity/ailf is published with public npm access, so no npm token is\n# needed to install the CLI.\n#\n# Customization:\n# - Narrow the trigger paths to reduce cost (see comment below)\n# - Check debug_mode for faster iteration (fewer tests)\n# - See: https://github.com/sanity-labs/ai-literacy-framework\n# ──────────────────────────────────────────────────────────────────────\n\nname: AI Literacy Eval\n\non:\n pull_request:\n branches: [main]\n # Runs on every PR to main by default. To reduce cost:\n # paths: [\".ailf/**\", \"docs/**\"]\n\n workflow_dispatch:\n inputs:\n debug_mode:\n description: \"Run in debug mode (fewer tests, faster iteration)\"\n type: boolean\n default: false\n\nconcurrency:\n group: ailf-eval-${{ github.event.pull_request.number || github.ref }}\n cancel-in-progress: true\n\njobs:\n evaluate:\n name: AI Literacy Evaluation\n runs-on: ubuntu-latest\n permissions:\n contents: read\n pull-requests: write\n steps:\n - uses: actions/checkout@v4\n\n - name: Run evaluation\n id: eval\n env:\n AILF_API_KEY: ${{ secrets.AILF_API_KEY }}\n run: |\n npx @sanity/ailf@latest pipeline --remote \\\n --output /tmp/ailf-report.md \\\n ${{ inputs.debug_mode && '--debug' || '' }}\n\n - name: Post PR comment\n if: always() && github.event_name == 'pull_request'\n uses: actions/github-script@v7\n with:\n script: |\n const fs = require('fs');\n\n // --- Constants ---\n const MARKER = '<!-- ailf-score-report -->';\n const HISTORY_START = '<!-- ailf-score-history -->';\n const HISTORY_END = '<!-- /ailf-score-history -->';\n const MAX_HISTORY = 3; // keep at most 3 prior runs\n\n // --- Read new report ---\n let newReport;\n try {\n newReport = fs.readFileSync('/tmp/ailf-report.md', 'utf-8');\n } catch {\n newReport = `## ⚠️ AI Literacy Evaluation\\n\\nNo report generated. Check the [workflow logs](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}).`;\n }\n\n const prNumber = context.issue?.number || context.payload?.pull_request?.number;\n if (!prNumber) {\n console.log('No PR number found, skipping comment');\n return;\n }\n\n // --- Find existing comment ---\n const { data: comments } = await github.rest.issues.listComments({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber,\n });\n const existing = comments.find(c => c.body?.includes(MARKER));\n\n // --- Build history from previous comment ---\n let historyEntries = [];\n if (existing) {\n const oldBody = existing.body || '';\n\n // Collect existing collapsed history entries\n const histStart = oldBody.indexOf(HISTORY_START);\n const histEnd = oldBody.indexOf(HISTORY_END);\n if (histStart !== -1 && histEnd !== -1) {\n const historyContent = oldBody.slice(histStart + HISTORY_START.length, histEnd).trim();\n // Split on </details> boundaries to get individual entries\n if (historyContent) {\n historyEntries = historyContent\n .split(/<\\/details>\\s*/)\n .map(s => s.trim())\n .filter(s => s.startsWith('<details>'))\n .map(s => s + '\\n</details>');\n }\n }\n\n // Extract the current report (will become the newest history entry)\n let previousReport = '';\n if (histStart !== -1) {\n // Report is between MARKER and the \"Previous runs\" heading (or history section)\n const markerIdx = oldBody.indexOf(MARKER);\n // Find the --- separator before history\n const separatorIdx = oldBody.lastIndexOf('---', histStart);\n const endIdx = separatorIdx > markerIdx ? separatorIdx : histStart;\n previousReport = oldBody.slice(markerIdx + MARKER.length, endIdx).trim();\n } else {\n // No history yet — everything after MARKER is the report\n const markerIdx = oldBody.indexOf(MARKER);\n if (markerIdx !== -1) {\n previousReport = oldBody.slice(markerIdx + MARKER.length).trim();\n }\n }\n\n // Collapse the previous report into a <details> entry\n if (previousReport) {\n const scoreMatch = previousReport.match(/Overall:\\s*(\\d+)\\/100/);\n const score = scoreMatch ? scoreMatch[1] : '?';\n const dateMatch = previousReport.match(/Generated by.*?·\\s*([^·<\\n*]+)/);\n const date = dateMatch\n ? dateMatch[1].trim()\n : new Date().toISOString().slice(0, 16).replace('T', ' ') + ' UTC';\n const entry = `<details>\\n<summary>📜 ${date} — ${score}/100</summary>\\n\\n${previousReport}\\n\\n</details>`;\n historyEntries.unshift(entry); // newest first\n }\n\n // Enforce max history limit\n historyEntries = historyEntries.slice(0, MAX_HISTORY);\n }\n\n // --- Assemble final comment ---\n const historySection = historyEntries.length > 0\n ? `\\n\\n---\\n\\n### 📜 Previous runs\\n\\n${HISTORY_START}\\n${historyEntries.join('\\n\\n')}\\n${HISTORY_END}`\n : '';\n const finalBody = `${MARKER}\\n${newReport}${historySection}`;\n\n if (existing) {\n await github.rest.issues.updateComment({\n owner: context.repo.owner, repo: context.repo.repo,\n comment_id: existing.id, body: finalBody,\n });\n console.log(`Updated comment (${historyEntries.length} history entries)`);\n } else {\n await github.rest.issues.createComment({\n owner: context.repo.owner, repo: context.repo.repo,\n issue_number: prNumber, body: finalBody,\n });\n console.log('Created new PR comment');\n }\n\n - name: Summary\n if: always()\n run: |\n if [ -f /tmp/ailf-report.md ]; then\n cat /tmp/ailf-report.md >> \"$GITHUB_STEP_SUMMARY\"\n else\n echo \"## ⚠️ AI Literacy Evaluation\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"\" >> \"$GITHUB_STEP_SUMMARY\"\n echo \"No report generated. Check the workflow logs.\" >> \"$GITHUB_STEP_SUMMARY\"\n fi\n";
|
|
634
634
|
// ---------------------------------------------------------------------------
|
|
635
635
|
// TypeScript template exports (for ailf init --output-format ts)
|
|
636
636
|
// ---------------------------------------------------------------------------
|
|
@@ -15,8 +15,9 @@ export * from "./schemas/index.js";
|
|
|
15
15
|
export * from "./ports/index.js";
|
|
16
16
|
export * from "./services/index.js";
|
|
17
17
|
export * from "./examples/index.js";
|
|
18
|
+
export * from "./artifact-registry.js";
|
|
18
19
|
export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
|
|
19
20
|
export type { PricingEntry, PromptEntry, SourceEntry, } from "./config-helpers.js";
|
|
20
21
|
export { env } from "./env-helper.js";
|
|
21
22
|
export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";
|
|
22
|
-
export {
|
|
23
|
+
export { NoOpArtifactWriter } from "./ports/artifact-writer.js";
|
|
@@ -15,10 +15,11 @@ export * from "./schemas/index.js";
|
|
|
15
15
|
export * from "./ports/index.js";
|
|
16
16
|
export * from "./services/index.js";
|
|
17
17
|
export * from "./examples/index.js";
|
|
18
|
+
export * from "./artifact-registry.js";
|
|
18
19
|
// ---------------------------------------------------------------------------
|
|
19
20
|
// Architecture overhaul — Phase 0 helpers
|
|
20
21
|
// ---------------------------------------------------------------------------
|
|
21
22
|
export { defineConfig, defineFeatures, defineModeBase, defineModels, definePricingTable, definePreset, definePrompts, defineRubrics, defineSchedules, defineSinks, defineSources, defineTask, defineThresholds, } from "./config-helpers.js";
|
|
22
23
|
export { env } from "./env-helper.js";
|
|
23
24
|
export { NoOpArtifactCollector } from "./artifact-capture/noop-collector.js";
|
|
24
|
-
export {
|
|
25
|
+
export { NoOpArtifactWriter } from "./ports/artifact-writer.js";
|
|
@@ -62,7 +62,7 @@ export interface CaptureFlushResult {
|
|
|
62
62
|
compressed: boolean;
|
|
63
63
|
}
|
|
64
64
|
/** A single entry in the capture manifest. */
|
|
65
|
-
export interface
|
|
65
|
+
export interface CaptureManifestEntry {
|
|
66
66
|
/** Pipeline step that produced this artifact */
|
|
67
67
|
step: string;
|
|
68
68
|
/** Artifact type identifier */
|
|
@@ -79,7 +79,7 @@ export interface ArtifactManifestEntry {
|
|
|
79
79
|
meta?: Record<string, unknown>;
|
|
80
80
|
}
|
|
81
81
|
/** The manifest.json written to each capture directory. */
|
|
82
|
-
export interface
|
|
82
|
+
export interface CaptureManifest {
|
|
83
83
|
version: 1;
|
|
84
84
|
captureId: string;
|
|
85
85
|
startedAt: string;
|
|
@@ -90,5 +90,5 @@ export interface ArtifactManifest {
|
|
|
90
90
|
source?: string;
|
|
91
91
|
areas?: string[];
|
|
92
92
|
};
|
|
93
|
-
artifacts:
|
|
93
|
+
artifacts: CaptureManifestEntry[];
|
|
94
94
|
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Port: ArtifactWriter — writes run artifacts + the run manifest to external storage.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the older `ArtifactUploader` port from D0030. Differences:
|
|
5
|
+
* - Paths anchor to `RunId` (not `ReportId`) via the registry's `objectPath`.
|
|
6
|
+
* - Supports both `"bulk"` and `"per-entry"` layouts.
|
|
7
|
+
* - A dedicated `writeManifest()` method for the run manifest at
|
|
8
|
+
* `runs/{runId}/manifest.json`.
|
|
9
|
+
*
|
|
10
|
+
* Producer steps call writer methods directly with the artifact type from
|
|
11
|
+
* `ARTIFACT_REGISTRY`. Path construction, schema validation, and entry-key
|
|
12
|
+
* sanitization live in the registry, not the call site.
|
|
13
|
+
*
|
|
14
|
+
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
15
|
+
* @see packages/core/src/artifact-registry.ts
|
|
16
|
+
*/
|
|
17
|
+
import type { ArtifactType } from "../artifact-registry.js";
|
|
18
|
+
import type { RunId } from "../types/branded-ids.js";
|
|
19
|
+
import type { ArtifactRef, RunManifest } from "../types/index.js";
|
|
20
|
+
/**
|
|
21
|
+
* An entry in a per-entry upload. The `key` is the wire-format identifier
|
|
22
|
+
* (e.g. `{taskId}::{modelId}` for testOutputs); the writer sanitizes it into
|
|
23
|
+
* the filename using the registry's `parseEntryKey`.
|
|
24
|
+
*/
|
|
25
|
+
export interface ArtifactEntry<TData = unknown> {
|
|
26
|
+
key: string;
|
|
27
|
+
data: TData;
|
|
28
|
+
}
|
|
29
|
+
export interface ArtifactWriter {
|
|
30
|
+
/**
|
|
31
|
+
* Write a bulk artifact — one JSON object per (runId, type).
|
|
32
|
+
*
|
|
33
|
+
* @returns An `ArtifactRef` pointing at `runs/{runId}/{slug}.json`, or
|
|
34
|
+
* `null` when upload is skipped or fails (P5: non-blocking).
|
|
35
|
+
*/
|
|
36
|
+
writeBulk(type: ArtifactType, runId: RunId, data: unknown): Promise<ArtifactRef | null>;
|
|
37
|
+
/**
|
|
38
|
+
* Write a per-entry artifact — one JSON object per entry, all under
|
|
39
|
+
* `runs/{runId}/{slug}/`.
|
|
40
|
+
*
|
|
41
|
+
* The returned `ArtifactRef.entries` inlines the catalog so consumers
|
|
42
|
+
* can render drill-down state without a second listing call.
|
|
43
|
+
*/
|
|
44
|
+
writePerEntry(type: ArtifactType, runId: RunId, entries: readonly ArtifactEntry[]): Promise<ArtifactRef | null>;
|
|
45
|
+
/**
|
|
46
|
+
* Write the run manifest to `runs/{runId}/manifest.json`. Single-writer
|
|
47
|
+
* per run; subsequent publishes may rewrite to append `reportIds[]`.
|
|
48
|
+
*/
|
|
49
|
+
writeManifest(runId: RunId, manifest: RunManifest): Promise<ArtifactRef | null>;
|
|
50
|
+
}
|
|
51
|
+
/** No-op writer — every method returns null. Used when no storage is configured. */
|
|
52
|
+
export declare class NoOpArtifactWriter implements ArtifactWriter {
|
|
53
|
+
writeBulk(): Promise<null>;
|
|
54
|
+
writePerEntry(): Promise<null>;
|
|
55
|
+
writeManifest(): Promise<null>;
|
|
56
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Port: ArtifactWriter — writes run artifacts + the run manifest to external storage.
|
|
3
|
+
*
|
|
4
|
+
* Replaces the older `ArtifactUploader` port from D0030. Differences:
|
|
5
|
+
* - Paths anchor to `RunId` (not `ReportId`) via the registry's `objectPath`.
|
|
6
|
+
* - Supports both `"bulk"` and `"per-entry"` layouts.
|
|
7
|
+
* - A dedicated `writeManifest()` method for the run manifest at
|
|
8
|
+
* `runs/{runId}/manifest.json`.
|
|
9
|
+
*
|
|
10
|
+
* Producer steps call writer methods directly with the artifact type from
|
|
11
|
+
* `ARTIFACT_REGISTRY`. Path construction, schema validation, and entry-key
|
|
12
|
+
* sanitization live in the registry, not the call site.
|
|
13
|
+
*
|
|
14
|
+
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
15
|
+
* @see packages/core/src/artifact-registry.ts
|
|
16
|
+
*/
|
|
17
|
+
/** No-op writer — every method returns null. Used when no storage is configured. */
|
|
18
|
+
export class NoOpArtifactWriter {
|
|
19
|
+
async writeBulk() {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
async writePerEntry() {
|
|
23
|
+
return null;
|
|
24
|
+
}
|
|
25
|
+
async writeManifest() {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
@@ -11,9 +11,10 @@
|
|
|
11
11
|
* Fields marked optional are transitional — they will become required
|
|
12
12
|
* as downstream consumers are converted to use them.
|
|
13
13
|
*/
|
|
14
|
+
import type { RunId } from "../types/branded-ids.js";
|
|
14
15
|
import type { DebugOptions, EvalMode, PluginRegistry } from "../types/index.js";
|
|
15
16
|
import type { ArtifactCollector } from "./artifact-collector.js";
|
|
16
|
-
import type {
|
|
17
|
+
import type { ArtifactWriter } from "./artifact-writer.js";
|
|
17
18
|
import type { CacheStore } from "./cache-store.js";
|
|
18
19
|
import type { DocFetcher } from "./doc-fetcher.js";
|
|
19
20
|
import type { EvalRunner } from "./eval-runner.js";
|
|
@@ -197,8 +198,8 @@ export interface ResolvedConfig {
|
|
|
197
198
|
* Created per-test by createTestContext().
|
|
198
199
|
*/
|
|
199
200
|
export interface AppContext {
|
|
200
|
-
/**
|
|
201
|
-
readonly
|
|
201
|
+
/** Artifact writer — writes run artifacts + manifest to GCS (D0032) */
|
|
202
|
+
readonly artifactWriter?: ArtifactWriter;
|
|
202
203
|
/** Evaluation caching (filesystem + optional Content Lake fallback) */
|
|
203
204
|
readonly cache?: CacheStore;
|
|
204
205
|
/** Artifact capture collector (no-op when --capture is not set) */
|
|
@@ -220,6 +221,15 @@ export interface AppContext {
|
|
|
220
221
|
* require this field.
|
|
221
222
|
*/
|
|
222
223
|
readonly reportStore?: ReportStorePort;
|
|
224
|
+
/**
|
|
225
|
+
* Identity for this pipeline run. Generated once at composition-root
|
|
226
|
+
* time; every step reads it from the context rather than regenerating.
|
|
227
|
+
* Artifacts anchor to this id (`gs://…/runs/{runId}/…`) regardless of
|
|
228
|
+
* whether the run eventually publishes a report.
|
|
229
|
+
*
|
|
230
|
+
* @see docs/decisions/D0032-run-anchored-artifact-store.md
|
|
231
|
+
*/
|
|
232
|
+
readonly runId: RunId;
|
|
223
233
|
/**
|
|
224
234
|
* Report delivery sinks (Slack, BigQuery, webhooks).
|
|
225
235
|
* Empty array when no sinks are configured.
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
* Ports define the contracts between the domain kernel and the outside world.
|
|
5
5
|
* Adapters (in packages/eval) implement these interfaces.
|
|
6
6
|
*/
|
|
7
|
-
export type { ArtifactCollector,
|
|
8
|
-
export type {
|
|
9
|
-
export {
|
|
7
|
+
export type { ArtifactCollector, CaptureFlushResult, CaptureManifest, CaptureManifestEntry, } from "./artifact-collector.js";
|
|
8
|
+
export type { ArtifactEntry, ArtifactWriter } from "./artifact-writer.js";
|
|
9
|
+
export { NoOpArtifactWriter } from "./artifact-writer.js";
|
|
10
10
|
export type { ArtifactContentDiff, CaptureDiffReport, ComparisonMode, ComparisonOptions, InventoryDiff, JsonDiffEntry, MetadataComparison, ScoreComparison, SecurityScan, TimingComparison, } from "./capture-comparator.js";
|
|
11
11
|
export type { CacheEntryMetadata, CacheKey, CacheLookupResult, CacheRecordInput, CacheStore, } from "./cache-store.js";
|
|
12
12
|
export type { ConfigSource } from "./config-source.js";
|
|
@@ -4,5 +4,5 @@
|
|
|
4
4
|
* Ports define the contracts between the domain kernel and the outside world.
|
|
5
5
|
* Adapters (in packages/eval) implement these interfaces.
|
|
6
6
|
*/
|
|
7
|
-
export {
|
|
7
|
+
export { NoOpArtifactWriter } from "./artifact-writer.js";
|
|
8
8
|
export { canonicalDocRefLabel, isIdRef, isPathRef, isPerspectiveRef, isSlugRef, isTemplatedAssertion, } from "./task-source.js";
|
|
@@ -104,6 +104,15 @@ export declare function taskId(raw: string): Result<TaskId, IdValidationError>;
|
|
|
104
104
|
* Valid format: `run_` prefix followed by alphanumeric characters.
|
|
105
105
|
*/
|
|
106
106
|
export declare function runId(raw: string): Result<RunId, IdValidationError>;
|
|
107
|
+
/**
|
|
108
|
+
* Generate a new `RunId` using a time-sortable UUIDv7 payload.
|
|
109
|
+
*
|
|
110
|
+
* The `run_` prefix plus the hyphen-stripped UUIDv7 yields 36 characters
|
|
111
|
+
* that sort lexicographically by creation time — same pattern used for
|
|
112
|
+
* `ReportId`. One generator call per pipeline invocation; every step
|
|
113
|
+
* reads the resulting id from `AppContext.runId`.
|
|
114
|
+
*/
|
|
115
|
+
export declare function generateRunId(): RunId;
|
|
107
116
|
/**
|
|
108
117
|
* Parse a raw string into a `SuiteId`.
|
|
109
118
|
*
|
|
@@ -59,6 +59,27 @@ export function runId(raw) {
|
|
|
59
59
|
}
|
|
60
60
|
return ok(raw);
|
|
61
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Generate a new `RunId` using a time-sortable UUIDv7 payload.
|
|
64
|
+
*
|
|
65
|
+
* The `run_` prefix plus the hyphen-stripped UUIDv7 yields 36 characters
|
|
66
|
+
* that sort lexicographically by creation time — same pattern used for
|
|
67
|
+
* `ReportId`. One generator call per pipeline invocation; every step
|
|
68
|
+
* reads the resulting id from `AppContext.runId`.
|
|
69
|
+
*/
|
|
70
|
+
export function generateRunId() {
|
|
71
|
+
const now = Date.now();
|
|
72
|
+
const uuid = crypto.randomUUID();
|
|
73
|
+
// UUID v7: encode 48-bit timestamp in the first 12 hex chars
|
|
74
|
+
const hex = now.toString(16).padStart(12, "0");
|
|
75
|
+
const v7 = hex.slice(0, 8) +
|
|
76
|
+
hex.slice(8, 12) +
|
|
77
|
+
"7" +
|
|
78
|
+
uuid.slice(15, 18) +
|
|
79
|
+
uuid.slice(19, 23) +
|
|
80
|
+
uuid.slice(24);
|
|
81
|
+
return `run_${v7}`;
|
|
82
|
+
}
|
|
62
83
|
/**
|
|
63
84
|
* Parse a raw string into a `SuiteId`.
|
|
64
85
|
*
|