@sanity/ailf 4.0.7 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/bin/ailf.js +6 -1
  2. package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
  3. package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
  4. package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
  5. package/dist/_vendor/ailf-core/schemas/index.js +2 -0
  6. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
  7. package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
  8. package/dist/_vendor/ailf-core/schemas/report.js +235 -0
  9. package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
  10. package/dist/_vendor/ailf-core/services/index.js +1 -0
  11. package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
  12. package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
  13. package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
  14. package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
  15. package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
  16. package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
  17. package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
  18. package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
  19. package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
  20. package/dist/_vendor/ailf-shared/index.d.ts +7 -5
  21. package/dist/_vendor/ailf-shared/index.js +7 -5
  22. package/dist/adapters/api-client/types.d.ts +2 -5
  23. package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
  24. package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
  25. package/dist/adapters/task-sources/index.d.ts +1 -1
  26. package/dist/adapters/task-sources/index.js +1 -1
  27. package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
  28. package/dist/adapters/task-sources/repo-schemas.js +3 -1
  29. package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
  30. package/dist/adapters/task-sources/repo-validation.js +1 -1
  31. package/dist/agent-observer/agentic-provider.d.ts +1 -0
  32. package/dist/agent-observer/agentic-provider.js +43 -36
  33. package/dist/agent-observer/config-schemas.d.ts +61 -0
  34. package/dist/agent-observer/config-schemas.js +65 -0
  35. package/dist/agent-observer/provider.d.ts +1 -0
  36. package/dist/agent-observer/provider.js +19 -17
  37. package/dist/cli.js +4 -4
  38. package/dist/commands/validate-tasks.js +2 -2
  39. package/dist/composition-root.js +4 -2
  40. package/dist/index.d.ts +1 -1
  41. package/dist/index.js +1 -1
  42. package/dist/job-store.js +2 -2
  43. package/dist/lib/dotenv-resolution.d.ts +21 -0
  44. package/dist/lib/dotenv-resolution.js +30 -0
  45. package/dist/orchestration/steps/mirror-repo-tasks-step.js +14 -3
  46. package/dist/orchestration/steps/run-eval-step.js +21 -3
  47. package/dist/pipeline/agent-behavior-report.d.ts +2 -8
  48. package/dist/pipeline/cache.d.ts +2 -2
  49. package/dist/pipeline/checks.d.ts +10 -2
  50. package/dist/pipeline/checks.js +14 -4
  51. package/dist/pipeline/compiler/literacy-bridge.js +2 -2
  52. package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
  53. package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
  54. package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
  55. package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
  56. package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
  57. package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
  58. package/dist/pipeline/compiler/provider-assembler.js +33 -3
  59. package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
  60. package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
  61. package/dist/pipeline/mirror-repo-tasks.js +16 -8
  62. package/dist/pipeline/pr-comment.d.ts +22 -9
  63. package/dist/pipeline/pr-comment.js +52 -472
  64. package/dist/pipeline/resolve-mappings.d.ts +8 -3
  65. package/dist/promptfoo-providers/mock-path.d.ts +12 -0
  66. package/dist/promptfoo-providers/mock-path.js +15 -0
  67. package/dist/report-store.d.ts +63 -1
  68. package/dist/report-store.js +111 -31
  69. package/dist/sanity/client.d.ts +58 -0
  70. package/dist/sanity/client.js +106 -0
  71. package/package.json +8 -7
@@ -0,0 +1,235 @@
1
+ /**
2
+ * @sanity/ailf-core — Report schema (W0191)
3
+ *
4
+ * Runtime Zod gate for `ailf.report` documents at the ReportStore
5
+ * read/write boundary. Mirrors the W0073 pattern that
6
+ * `ContentLakeAuthorableTaskSchema` applies to `ailf.task` — turn silent
7
+ * shape drift into a loud parse failure at the Content Lake boundary.
8
+ *
9
+ * **Scope A** (this file): top-level shape is `passthrough()` so that
10
+ * adding a new top-level Report field on the TS side does not silently
11
+ * fail validation before the schema author updates this mirror.
12
+ * `provenance` (and nested `provenance.lineage`) are `strict()` because
13
+ * they are fully mirrored — unknown keys there signal real drift.
14
+ * `summary`, `comparison`, and `artifactManifest` are modeled as
15
+ * non-empty objects with passthrough; their deep contents
16
+ * (`ScoreSummary`, `FeatureScore`, the W0051 slim shapes, etc.) are
17
+ * intentionally out of scope and may grow into a Scope B follow-up.
18
+ *
19
+ * @see packages/eval/src/adapters/task-sources/content-lake-task-source.ts
20
+ * — sibling W0073 gate
21
+ * @see packages/core/src/types/index.ts — `Report`, `ReportProvenance`,
22
+ * `ReportLineage`
23
+ * @see packages/shared/src/run-context.ts — `RunContext`
24
+ * @see docs/work-items/W0191-report-store-schema-gate.json
25
+ */
26
+ import { z } from "zod";
27
+ // ---------------------------------------------------------------------------
28
+ // RunContext building blocks (mirrors packages/shared/src/run-context.ts)
29
+ // ---------------------------------------------------------------------------
30
+ const RunOwnerSchema = z
31
+ .object({
32
+ team: z.string().min(1),
33
+ individual: z.string().optional(),
34
+ })
35
+ .strict();
36
+ const RunExecutorUserSchema = z
37
+ .object({
38
+ type: z.literal("user"),
39
+ name: z.string().optional(),
40
+ // `email` carries PII and is gated by AILF_CAPTURE_EMAIL on capture; it
41
+ // may legitimately appear here on stored documents that captured it.
42
+ email: z.string().optional(),
43
+ surface: z.enum(["cli", "studio", "api"]),
44
+ githubActor: z.string().optional(),
45
+ })
46
+ .strict();
47
+ const RunExecutorSystemSchema = z
48
+ .object({
49
+ type: z.literal("system"),
50
+ name: z.string().min(1),
51
+ workflow: z.string().optional(),
52
+ runId: z.string().optional(),
53
+ })
54
+ .strict();
55
+ const RunExecutorSchema = z.discriminatedUnion("type", [
56
+ RunExecutorUserSchema,
57
+ RunExecutorSystemSchema,
58
+ ]);
59
+ const RunHostSchema = z
60
+ .object({
61
+ platform: z.string().min(1),
62
+ arch: z.string().min(1),
63
+ ci: z.string().optional(),
64
+ })
65
+ .strict();
66
+ const RunToolSchema = z
67
+ .object({
68
+ ailfVersion: z.string().min(1),
69
+ nodeVersion: z.string().min(1),
70
+ })
71
+ .strict();
72
+ const RunTriggerSchema = z.discriminatedUnion("type", [
73
+ z
74
+ .object({
75
+ type: z.literal("ci"),
76
+ runId: z.string().min(1),
77
+ workflow: z.string().min(1),
78
+ })
79
+ .strict(),
80
+ z
81
+ .object({
82
+ type: z.literal("cross-repo"),
83
+ callerRef: z.string().optional(),
84
+ callerRepo: z.string().min(1),
85
+ })
86
+ .strict(),
87
+ z.object({ type: z.literal("manual") }).strict(),
88
+ z
89
+ .object({
90
+ type: z.literal("scheduled"),
91
+ schedule: z.string().min(1),
92
+ })
93
+ .strict(),
94
+ z
95
+ .object({
96
+ type: z.literal("webhook"),
97
+ documentId: z.string().optional(),
98
+ source: z.string().min(1),
99
+ })
100
+ .strict(),
101
+ ]);
102
+ const RunGitSchema = z
103
+ .object({
104
+ branch: z.string().min(1),
105
+ prNumber: z.number().optional(),
106
+ repo: z.string().min(1),
107
+ sha: z.string().min(1),
108
+ })
109
+ .strict();
110
+ const RunSourceSchema = z
111
+ .object({
112
+ baseUrl: z.string().min(1),
113
+ dataset: z.string().optional(),
114
+ name: z.string().min(1),
115
+ perspective: z.string().optional(),
116
+ projectId: z.string().optional(),
117
+ })
118
+ .strict();
119
+ const RunModelEntrySchema = z
120
+ .object({
121
+ id: z.string().min(1),
122
+ label: z.string().min(1),
123
+ })
124
+ .strict();
125
+ // ---------------------------------------------------------------------------
126
+ // ReportLineage (mirrors `ReportLineage` in core types)
127
+ //
128
+ // The TS type narrows `RunContext.lineage` (which is `RunLineage` with
129
+ // `parentJobId`) down to `{ rerunOf?, comparedAgainst? }`. In practice
130
+ // `publish-report-step.ts` spreads existing lineage, so a stored
131
+ // `provenance.lineage` may legitimately carry `parentJobId` inherited
132
+ // from the run side. We model the runtime contract — strict, but
133
+ // `parentJobId` allowed — so that legitimate data does not trip the gate.
134
+ // ---------------------------------------------------------------------------
135
+ export const ReportLineageSchema = z
136
+ .object({
137
+ rerunOf: z.string().optional(),
138
+ comparedAgainst: z.string().optional(),
139
+ parentJobId: z.string().optional(),
140
+ })
141
+ .strict();
142
+ // ---------------------------------------------------------------------------
143
+ // ReportAutoScope (mirrors `ReportAutoScope` in core types)
144
+ // ---------------------------------------------------------------------------
145
+ const ReportAutoScopeSchema = z
146
+ .object({
147
+ enabled: z.boolean(),
148
+ affectedTaskIds: z.array(z.string()),
149
+ skippedTaskIds: z.array(z.string()),
150
+ perspective: z.string().min(1),
151
+ impactSummary: z
152
+ .object({
153
+ added: z.number(),
154
+ modified: z.number(),
155
+ removed: z.number(),
156
+ })
157
+ .strict(),
158
+ })
159
+ .strict();
160
+ // ---------------------------------------------------------------------------
161
+ // PromptfooUrlEntry (mirrors `PromptfooUrlEntry` in core types)
162
+ // ---------------------------------------------------------------------------
163
+ const PromptfooUrlEntrySchema = z
164
+ .object({
165
+ mode: z.string().min(1),
166
+ url: z.string().min(1),
167
+ })
168
+ .strict();
169
+ // ---------------------------------------------------------------------------
170
+ // ReportProvenance — full mirror of `ReportProvenance extends RunContext`
171
+ // ---------------------------------------------------------------------------
172
+ export const ReportProvenanceSchema = z
173
+ .object({
174
+ // RunContext fields
175
+ areas: z.array(z.string()),
176
+ classification: z.enum([
177
+ "official",
178
+ "adhoc",
179
+ "experimental",
180
+ "test",
181
+ "external",
182
+ ]),
183
+ evalFingerprint: z.string().optional(),
184
+ executor: RunExecutorSchema,
185
+ git: RunGitSchema.optional(),
186
+ graderModel: z.string().min(1),
187
+ host: RunHostSchema.optional(),
188
+ labels: z.array(z.string()).optional(),
189
+ lineage: ReportLineageSchema.optional(),
190
+ mode: z.string().min(1),
191
+ models: z.array(RunModelEntrySchema),
192
+ owner: RunOwnerSchema,
193
+ purpose: z.string().optional(),
194
+ source: RunSourceSchema,
195
+ taskIds: z.array(z.string()).optional(),
196
+ tool: RunToolSchema.optional(),
197
+ trigger: RunTriggerSchema,
198
+ // ReportProvenance additions
199
+ autoScope: ReportAutoScopeSchema.optional(),
200
+ contextHash: z.string().optional(),
201
+ promptfooUrl: z.string().optional(),
202
+ promptfooUrls: z.array(PromptfooUrlEntrySchema).optional(),
203
+ runId: z.string().min(1),
204
+ targetDocuments: z.array(z.string()).optional(),
205
+ })
206
+ .strict();
207
+ // ---------------------------------------------------------------------------
208
+ // ReportSchema — top-level Report shape
209
+ //
210
+ // Top-level uses `.passthrough()`: adding a new top-level Report field on
211
+ // the TS side should not break stored documents at the read boundary
212
+ // before the schema author updates this mirror. `summary`, `comparison`,
213
+ // and `artifactManifest` are also passthrough — their deep shapes
214
+ // (ScoreSummary, FeatureScore, the W0051 slim types) are out of Scope A.
215
+ // ---------------------------------------------------------------------------
216
+ const RecordPassthroughSchema = z.record(z.string(), z.unknown());
217
+ export const ReportSchema = z
218
+ .object({
219
+ id: z.string().min(1),
220
+ completedAt: z.iso.datetime({ offset: true }),
221
+ durationMs: z.number().nonnegative(),
222
+ summary: RecordPassthroughSchema.refine((s) => Object.keys(s).length > 0, {
223
+ message: "summary must be a non-empty object",
224
+ }),
225
+ provenance: ReportProvenanceSchema,
226
+ // The eval write path persists `comparison: null` when the slim copy
227
+ // is empty, so the schema accepts null at the wire boundary.
228
+ comparison: RecordPassthroughSchema.nullable().optional(),
229
+ artifactManifest: RecordPassthroughSchema.optional(),
230
+ // The eval write path stores `tag: report.tag ?? null` and
231
+ // `title: report.title ?? null`, so the schema accepts null on both.
232
+ tag: z.string().nullable().optional(),
233
+ title: z.string().nullable().optional(),
234
+ })
235
+ .passthrough();
@@ -12,3 +12,4 @@ export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-f
12
12
  export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, type AggregationStrategy, type AreaScore, type AssertionScore, type DimensionScore, type EnsembleGradingConfig, type GraderTransitionConfig, type TaskScore, type TaskScoreOptions, } from "./scoring-engine.js";
13
13
  export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
14
14
  export { buildSlimReportSummary } from "./slim-report-summary.js";
15
+ export { reportToMarkdown, type RenderableReport, } from "./report-to-markdown.js";
@@ -12,3 +12,4 @@ export { formatComparisonMarkdown, formatComparisonTable, } from "./comparison-f
12
12
  export { aggregateAreas, aggregateDimensions, computeEnsembleScore, computeTaskScore, normalizeScore, } from "./scoring-engine.js";
13
13
  export { extractModelName, extractProvider, mergeConfig, modelMatchesMode, resolveModelVariants, } from "./config-helpers.js";
14
14
  export { buildSlimReportSummary } from "./slim-report-summary.js";
15
+ export { reportToMarkdown, } from "./report-to-markdown.js";
@@ -0,0 +1,38 @@
1
+ /**
2
+ * report-to-markdown.ts — Canonical PR-comment markdown renderer (W0150).
3
+ *
4
+ * Single source of truth for rendering an AILF report as PR-comment
5
+ * markdown. Used by:
6
+ * - the API gateway (`/v1/reports/:id/markdown`)
7
+ * - the eval pipeline (`ailf pr-comment` CLI / `pipeline/pr-comment.ts`)
8
+ *
9
+ * Operates on a structurally lenient `RenderableReport` shape so callers
10
+ * can pass either the persisted slim Report (Sanity doc) or an in-memory
11
+ * envelope built from `score-summary.json` + `comparison-report.json`.
12
+ *
13
+ * Canonical formatting decisions (W0150):
14
+ * - Header: level-1 (`# {emoji} AI Literacy Score Report`).
15
+ * - Footer: markdown link form `[view detailed results](url)`. The link
16
+ * URL comes from `provenance.promptfooUrls[0].url`.
17
+ * - Source verification block (sourceVerification + sourceIsolation) is
18
+ * rendered when present on the summary — preserves info from agentic
19
+ * / sandboxed local-mode runs without breaking remote-mode reports
20
+ * that don't carry those fields.
21
+ */
22
+ export interface RenderableReport {
23
+ /** Sanity report id; absent for local-only runs. */
24
+ id?: string;
25
+ /** ISO timestamp; falls back to `summary.timestamp` when absent. */
26
+ completedAt?: string;
27
+ /** Run duration in ms; absent for local-only runs. */
28
+ durationMs?: number;
29
+ /** Run tag/label. */
30
+ tag?: string;
31
+ /** Slim or full summary. Structurally lenient. */
32
+ summary: unknown;
33
+ /** Comparison report (delta against a baseline). */
34
+ comparison?: unknown;
35
+ /** Provenance — surfaces promptfooUrls for the footer link. */
36
+ provenance?: unknown;
37
+ }
38
+ export declare function reportToMarkdown(report: RenderableReport): string;