@sanity/ailf 4.0.7 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ailf.js +6 -1
- package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
- package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
- package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
- package/dist/_vendor/ailf-core/schemas/report.js +235 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/services/index.js +1 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
- package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
- package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
- package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
- package/dist/_vendor/ailf-shared/index.d.ts +7 -5
- package/dist/_vendor/ailf-shared/index.js +7 -5
- package/dist/adapters/api-client/types.d.ts +2 -5
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
- package/dist/adapters/task-sources/index.d.ts +1 -1
- package/dist/adapters/task-sources/index.js +1 -1
- package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
- package/dist/adapters/task-sources/repo-schemas.js +3 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
- package/dist/adapters/task-sources/repo-validation.js +1 -1
- package/dist/agent-observer/agentic-provider.d.ts +1 -0
- package/dist/agent-observer/agentic-provider.js +43 -36
- package/dist/agent-observer/config-schemas.d.ts +61 -0
- package/dist/agent-observer/config-schemas.js +65 -0
- package/dist/agent-observer/provider.d.ts +1 -0
- package/dist/agent-observer/provider.js +19 -17
- package/dist/cli.js +4 -4
- package/dist/commands/validate-tasks.js +2 -2
- package/dist/composition-root.js +4 -2
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/job-store.js +2 -2
- package/dist/lib/dotenv-resolution.d.ts +21 -0
- package/dist/lib/dotenv-resolution.js +30 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +14 -3
- package/dist/orchestration/steps/run-eval-step.js +21 -3
- package/dist/pipeline/agent-behavior-report.d.ts +2 -8
- package/dist/pipeline/cache.d.ts +2 -2
- package/dist/pipeline/checks.d.ts +10 -2
- package/dist/pipeline/checks.js +14 -4
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
- package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
- package/dist/pipeline/compiler/provider-assembler.js +33 -3
- package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
- package/dist/pipeline/mirror-repo-tasks.js +16 -8
- package/dist/pipeline/pr-comment.d.ts +22 -9
- package/dist/pipeline/pr-comment.js +52 -472
- package/dist/pipeline/resolve-mappings.d.ts +8 -3
- package/dist/promptfoo-providers/mock-path.d.ts +12 -0
- package/dist/promptfoo-providers/mock-path.js +15 -0
- package/dist/report-store.d.ts +63 -1
- package/dist/report-store.js +111 -31
- package/dist/sanity/client.d.ts +58 -0
- package/dist/sanity/client.js +106 -0
- package/package.json +8 -7
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @sanity/ailf-core — Report schema (W0191)
|
|
3
|
+
*
|
|
4
|
+
* Runtime Zod gate for `ailf.report` documents at the ReportStore
|
|
5
|
+
* read/write boundary. Mirrors the W0073 pattern that
|
|
6
|
+
* `ContentLakeAuthorableTaskSchema` applies to `ailf.task` — turn silent
|
|
7
|
+
* shape drift into a loud parse failure at the Content Lake boundary.
|
|
8
|
+
*
|
|
9
|
+
* **Scope A** (this file): top-level shape is `passthrough()` so that
|
|
10
|
+
* adding a new top-level Report field on the TS side does not silently
|
|
11
|
+
* fail validation before the schema author updates this mirror.
|
|
12
|
+
* `provenance` (and nested `provenance.lineage`) are `strict()` because
|
|
13
|
+
* they are fully mirrored — unknown keys there signal real drift.
|
|
14
|
+
* `summary`, `comparison`, and `artifactManifest` are modeled as
|
|
15
|
+
* non-empty objects with passthrough; their deep contents
|
|
16
|
+
* (`ScoreSummary`, `FeatureScore`, the W0051 slim shapes, etc.) are
|
|
17
|
+
* intentionally out of scope and may grow into a Scope B follow-up.
|
|
18
|
+
*
|
|
19
|
+
* @see packages/eval/src/adapters/task-sources/content-lake-task-source.ts
|
|
20
|
+
* — sibling W0073 gate
|
|
21
|
+
* @see packages/core/src/types/index.ts — `Report`, `ReportProvenance`,
|
|
22
|
+
* `ReportLineage`
|
|
23
|
+
* @see packages/shared/src/run-context.ts — `RunContext`
|
|
24
|
+
* @see docs/work-items/W0191-report-store-schema-gate.json
|
|
25
|
+
*/
|
|
26
|
+
import { z } from "zod";
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// RunContext building blocks (mirrors packages/shared/src/run-context.ts)
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
const RunOwnerSchema = z
|
|
31
|
+
.object({
|
|
32
|
+
team: z.string().min(1),
|
|
33
|
+
individual: z.string().optional(),
|
|
34
|
+
})
|
|
35
|
+
.strict();
|
|
36
|
+
const RunExecutorUserSchema = z
|
|
37
|
+
.object({
|
|
38
|
+
type: z.literal("user"),
|
|
39
|
+
name: z.string().optional(),
|
|
40
|
+
// `email` carries PII and is gated by AILF_CAPTURE_EMAIL on capture; it
|
|
41
|
+
// may legitimately appear here on stored documents that captured it.
|
|
42
|
+
email: z.string().optional(),
|
|
43
|
+
surface: z.enum(["cli", "studio", "api"]),
|
|
44
|
+
githubActor: z.string().optional(),
|
|
45
|
+
})
|
|
46
|
+
.strict();
|
|
47
|
+
const RunExecutorSystemSchema = z
|
|
48
|
+
.object({
|
|
49
|
+
type: z.literal("system"),
|
|
50
|
+
name: z.string().min(1),
|
|
51
|
+
workflow: z.string().optional(),
|
|
52
|
+
runId: z.string().optional(),
|
|
53
|
+
})
|
|
54
|
+
.strict();
|
|
55
|
+
const RunExecutorSchema = z.discriminatedUnion("type", [
|
|
56
|
+
RunExecutorUserSchema,
|
|
57
|
+
RunExecutorSystemSchema,
|
|
58
|
+
]);
|
|
59
|
+
const RunHostSchema = z
|
|
60
|
+
.object({
|
|
61
|
+
platform: z.string().min(1),
|
|
62
|
+
arch: z.string().min(1),
|
|
63
|
+
ci: z.string().optional(),
|
|
64
|
+
})
|
|
65
|
+
.strict();
|
|
66
|
+
const RunToolSchema = z
|
|
67
|
+
.object({
|
|
68
|
+
ailfVersion: z.string().min(1),
|
|
69
|
+
nodeVersion: z.string().min(1),
|
|
70
|
+
})
|
|
71
|
+
.strict();
|
|
72
|
+
const RunTriggerSchema = z.discriminatedUnion("type", [
|
|
73
|
+
z
|
|
74
|
+
.object({
|
|
75
|
+
type: z.literal("ci"),
|
|
76
|
+
runId: z.string().min(1),
|
|
77
|
+
workflow: z.string().min(1),
|
|
78
|
+
})
|
|
79
|
+
.strict(),
|
|
80
|
+
z
|
|
81
|
+
.object({
|
|
82
|
+
type: z.literal("cross-repo"),
|
|
83
|
+
callerRef: z.string().optional(),
|
|
84
|
+
callerRepo: z.string().min(1),
|
|
85
|
+
})
|
|
86
|
+
.strict(),
|
|
87
|
+
z.object({ type: z.literal("manual") }).strict(),
|
|
88
|
+
z
|
|
89
|
+
.object({
|
|
90
|
+
type: z.literal("scheduled"),
|
|
91
|
+
schedule: z.string().min(1),
|
|
92
|
+
})
|
|
93
|
+
.strict(),
|
|
94
|
+
z
|
|
95
|
+
.object({
|
|
96
|
+
type: z.literal("webhook"),
|
|
97
|
+
documentId: z.string().optional(),
|
|
98
|
+
source: z.string().min(1),
|
|
99
|
+
})
|
|
100
|
+
.strict(),
|
|
101
|
+
]);
|
|
102
|
+
const RunGitSchema = z
|
|
103
|
+
.object({
|
|
104
|
+
branch: z.string().min(1),
|
|
105
|
+
prNumber: z.number().optional(),
|
|
106
|
+
repo: z.string().min(1),
|
|
107
|
+
sha: z.string().min(1),
|
|
108
|
+
})
|
|
109
|
+
.strict();
|
|
110
|
+
const RunSourceSchema = z
|
|
111
|
+
.object({
|
|
112
|
+
baseUrl: z.string().min(1),
|
|
113
|
+
dataset: z.string().optional(),
|
|
114
|
+
name: z.string().min(1),
|
|
115
|
+
perspective: z.string().optional(),
|
|
116
|
+
projectId: z.string().optional(),
|
|
117
|
+
})
|
|
118
|
+
.strict();
|
|
119
|
+
const RunModelEntrySchema = z
|
|
120
|
+
.object({
|
|
121
|
+
id: z.string().min(1),
|
|
122
|
+
label: z.string().min(1),
|
|
123
|
+
})
|
|
124
|
+
.strict();
|
|
125
|
+
// ---------------------------------------------------------------------------
|
|
126
|
+
// ReportLineage (mirrors `ReportLineage` in core types)
|
|
127
|
+
//
|
|
128
|
+
// The TS type narrows `RunContext.lineage` (which is `RunLineage` with
|
|
129
|
+
// `parentJobId`) down to `{ rerunOf?, comparedAgainst? }`. In practice
|
|
130
|
+
// `publish-report-step.ts` spreads existing lineage, so a stored
|
|
131
|
+
// `provenance.lineage` may legitimately carry `parentJobId` inherited
|
|
132
|
+
// from the run side. We model the runtime contract — strict, but
|
|
133
|
+
// `parentJobId` allowed — so that legitimate data does not trip the gate.
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
export const ReportLineageSchema = z
|
|
136
|
+
.object({
|
|
137
|
+
rerunOf: z.string().optional(),
|
|
138
|
+
comparedAgainst: z.string().optional(),
|
|
139
|
+
parentJobId: z.string().optional(),
|
|
140
|
+
})
|
|
141
|
+
.strict();
|
|
142
|
+
// ---------------------------------------------------------------------------
|
|
143
|
+
// ReportAutoScope (mirrors `ReportAutoScope` in core types)
|
|
144
|
+
// ---------------------------------------------------------------------------
|
|
145
|
+
const ReportAutoScopeSchema = z
|
|
146
|
+
.object({
|
|
147
|
+
enabled: z.boolean(),
|
|
148
|
+
affectedTaskIds: z.array(z.string()),
|
|
149
|
+
skippedTaskIds: z.array(z.string()),
|
|
150
|
+
perspective: z.string().min(1),
|
|
151
|
+
impactSummary: z
|
|
152
|
+
.object({
|
|
153
|
+
added: z.number(),
|
|
154
|
+
modified: z.number(),
|
|
155
|
+
removed: z.number(),
|
|
156
|
+
})
|
|
157
|
+
.strict(),
|
|
158
|
+
})
|
|
159
|
+
.strict();
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// PromptfooUrlEntry (mirrors `PromptfooUrlEntry` in core types)
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
const PromptfooUrlEntrySchema = z
|
|
164
|
+
.object({
|
|
165
|
+
mode: z.string().min(1),
|
|
166
|
+
url: z.string().min(1),
|
|
167
|
+
})
|
|
168
|
+
.strict();
|
|
169
|
+
// ---------------------------------------------------------------------------
|
|
170
|
+
// ReportProvenance — full mirror of `ReportProvenance extends RunContext`
|
|
171
|
+
// ---------------------------------------------------------------------------
|
|
172
|
+
export const ReportProvenanceSchema = z
|
|
173
|
+
.object({
|
|
174
|
+
// RunContext fields
|
|
175
|
+
areas: z.array(z.string()),
|
|
176
|
+
classification: z.enum([
|
|
177
|
+
"official",
|
|
178
|
+
"adhoc",
|
|
179
|
+
"experimental",
|
|
180
|
+
"test",
|
|
181
|
+
"external",
|
|
182
|
+
]),
|
|
183
|
+
evalFingerprint: z.string().optional(),
|
|
184
|
+
executor: RunExecutorSchema,
|
|
185
|
+
git: RunGitSchema.optional(),
|
|
186
|
+
graderModel: z.string().min(1),
|
|
187
|
+
host: RunHostSchema.optional(),
|
|
188
|
+
labels: z.array(z.string()).optional(),
|
|
189
|
+
lineage: ReportLineageSchema.optional(),
|
|
190
|
+
mode: z.string().min(1),
|
|
191
|
+
models: z.array(RunModelEntrySchema),
|
|
192
|
+
owner: RunOwnerSchema,
|
|
193
|
+
purpose: z.string().optional(),
|
|
194
|
+
source: RunSourceSchema,
|
|
195
|
+
taskIds: z.array(z.string()).optional(),
|
|
196
|
+
tool: RunToolSchema.optional(),
|
|
197
|
+
trigger: RunTriggerSchema,
|
|
198
|
+
// ReportProvenance additions
|
|
199
|
+
autoScope: ReportAutoScopeSchema.optional(),
|
|
200
|
+
contextHash: z.string().optional(),
|
|
201
|
+
promptfooUrl: z.string().optional(),
|
|
202
|
+
promptfooUrls: z.array(PromptfooUrlEntrySchema).optional(),
|
|
203
|
+
runId: z.string().min(1),
|
|
204
|
+
targetDocuments: z.array(z.string()).optional(),
|
|
205
|
+
})
|
|
206
|
+
.strict();
|
|
207
|
+
// ---------------------------------------------------------------------------
|
|
208
|
+
// ReportSchema — top-level Report shape
|
|
209
|
+
//
|
|
210
|
+
// Top-level uses `.passthrough()`: adding a new top-level Report field on
|
|
211
|
+
// the TS side should not break stored documents at the read boundary
|
|
212
|
+
// before the schema author updates this mirror. `summary`, `comparison`,
|
|
213
|
+
// and `artifactManifest` are also passthrough — their deep shapes
|
|
214
|
+
// (ScoreSummary, FeatureScore, the W0051 slim types) are out of Scope A.
|
|
215
|
+
// ---------------------------------------------------------------------------
|
|
216
|
+
const RecordPassthroughSchema = z.record(z.string(), z.unknown());
|
|
217
|
+
export const ReportSchema = z
|
|
218
|
+
.object({
|
|
219
|
+
id: z.string().min(1),
|
|
220
|
+
completedAt: z.iso.datetime({ offset: true }),
|
|
221
|
+
durationMs: z.number().nonnegative(),
|
|
222
|
+
summary: RecordPassthroughSchema.refine((s) => Object.keys(s).length > 0, {
|
|
223
|
+
message: "summary must be a non-empty object",
|
|
224
|
+
}),
|
|
225
|
+
provenance: ReportProvenanceSchema,
|
|
226
|
+
// The eval write path persists `comparison: null` when the slim copy
|
|
227
|
+
// is empty, so the schema accepts null at the wire boundary.
|
|
228
|
+
comparison: RecordPassthroughSchema.nullable().optional(),
|
|
229
|
+
artifactManifest: RecordPassthroughSchema.optional(),
|
|
230
|
+
// The eval write path stores `tag: report.tag ?? null` and
|
|
231
|
+
// `title: report.title ?? null`, so the schema accepts null on both.
|
|
232
|
+
tag: z.string().nullable().optional(),
|
|
233
|
+
title: z.string().nullable().optional(),
|
|
234
|
+
})
|
|
235
|
+
.passthrough();
|
|
@@ -12,3 +12,4 @@ export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-f
|
|
|
12
12
|
export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, type AggregationStrategy, type AreaScore, type AssertionScore, type DimensionScore, type EnsembleGradingConfig, type GraderTransitionConfig, type TaskScore, type TaskScoreOptions, } from "./scoring-engine.js";
|
|
13
13
|
export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
|
|
14
14
|
export { buildSlimReportSummary } from "./slim-report-summary.js";
|
|
15
|
+
export { reportToMarkdown, type RenderableReport, } from "./report-to-markdown.js";
|
|
@@ -12,3 +12,4 @@ export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-f
|
|
|
12
12
|
export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, } from "./scoring-engine.js";
|
|
13
13
|
export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
|
|
14
14
|
export { buildSlimReportSummary } from "./slim-report-summary.js";
|
|
15
|
+
export { reportToMarkdown, } from "./report-to-markdown.js";
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* report-to-markdown.ts — Canonical PR-comment markdown renderer (W0150).
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for rendering an AILF report as PR-comment
|
|
5
|
+
* markdown. Used by:
|
|
6
|
+
* - the API gateway (`/v1/reports/:id/markdown`)
|
|
7
|
+
* - the eval pipeline (`ailf pr-comment` CLI / `pipeline/pr-comment.ts`)
|
|
8
|
+
*
|
|
9
|
+
* Operates on a structurally lenient `RenderableReport` shape so callers
|
|
10
|
+
* can pass either the persisted slim Report (Sanity doc) or an in-memory
|
|
11
|
+
* envelope built from `score-summary.json` + `comparison-report.json`.
|
|
12
|
+
*
|
|
13
|
+
* Canonical formatting decisions (W0150):
|
|
14
|
+
* - Header: level-1 (`# {emoji} AI Literacy Score Report`).
|
|
15
|
+
* - Footer: markdown link form `[view detailed results](url)`. The link
|
|
16
|
+
* URL comes from `provenance.promptfooUrls[0].url`.
|
|
17
|
+
* - Source verification block (sourceVerification + sourceIsolation) is
|
|
18
|
+
* rendered when present on the summary — preserves info from agentic
|
|
19
|
+
* / sandboxed local-mode runs without breaking remote-mode reports
|
|
20
|
+
* that don't carry those fields.
|
|
21
|
+
*/
|
|
22
|
+
export interface RenderableReport {
|
|
23
|
+
/** Sanity report id; absent for local-only runs. */
|
|
24
|
+
id?: string;
|
|
25
|
+
/** ISO timestamp; falls back to `summary.timestamp` when absent. */
|
|
26
|
+
completedAt?: string;
|
|
27
|
+
/** Run duration in ms; absent for local-only runs. */
|
|
28
|
+
durationMs?: number;
|
|
29
|
+
/** Run tag/label. */
|
|
30
|
+
tag?: string;
|
|
31
|
+
/** Slim or full summary. Structurally lenient. */
|
|
32
|
+
summary: unknown;
|
|
33
|
+
/** Comparison report (delta against a baseline). */
|
|
34
|
+
comparison?: unknown;
|
|
35
|
+
/** Provenance — surfaces promptfooUrls for the footer link. */
|
|
36
|
+
provenance?: unknown;
|
|
37
|
+
}
|
|
38
|
+
export declare function reportToMarkdown(report: RenderableReport): string;
|