@sanity/ailf 4.0.7 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ailf.js +6 -1
- package/dist/_vendor/ailf-core/schemas/external-providers.d.ts +136 -0
- package/dist/_vendor/ailf-core/schemas/external-providers.js +136 -0
- package/dist/_vendor/ailf-core/schemas/index.d.ts +2 -0
- package/dist/_vendor/ailf-core/schemas/index.js +2 -0
- package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +2 -3
- package/dist/_vendor/ailf-core/schemas/report.d.ts +251 -0
- package/dist/_vendor/ailf-core/schemas/report.js +235 -0
- package/dist/_vendor/ailf-core/services/index.d.ts +1 -0
- package/dist/_vendor/ailf-core/services/index.js +1 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.d.ts +38 -0
- package/dist/_vendor/ailf-core/services/report-to-markdown.js +696 -0
- package/dist/_vendor/ailf-core/types/api-requests.d.ts +159 -0
- package/dist/_vendor/ailf-core/types/api-requests.js +27 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +3 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.d.ts +112 -0
- package/dist/_vendor/ailf-core/types/pipeline-request.js +18 -0
- package/dist/_vendor/ailf-core/types/repo-config.d.ts +146 -0
- package/dist/_vendor/ailf-core/types/repo-config.js +18 -0
- package/dist/_vendor/ailf-shared/index.d.ts +7 -5
- package/dist/_vendor/ailf-shared/index.js +7 -5
- package/dist/adapters/api-client/types.d.ts +2 -5
- package/dist/adapters/task-sources/content-lake-task-source.d.ts +58 -1
- package/dist/adapters/task-sources/content-lake-task-source.js +1 -1
- package/dist/adapters/task-sources/index.d.ts +1 -1
- package/dist/adapters/task-sources/index.js +1 -1
- package/dist/adapters/task-sources/repo-schemas.d.ts +3 -2
- package/dist/adapters/task-sources/repo-schemas.js +3 -1
- package/dist/adapters/task-sources/repo-validation.d.ts +6 -6
- package/dist/adapters/task-sources/repo-validation.js +1 -1
- package/dist/agent-observer/agentic-provider.d.ts +1 -0
- package/dist/agent-observer/agentic-provider.js +43 -36
- package/dist/agent-observer/config-schemas.d.ts +61 -0
- package/dist/agent-observer/config-schemas.js +65 -0
- package/dist/agent-observer/provider.d.ts +1 -0
- package/dist/agent-observer/provider.js +19 -17
- package/dist/cli.js +4 -4
- package/dist/commands/validate-tasks.js +2 -2
- package/dist/composition-root.js +4 -2
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/job-store.js +2 -2
- package/dist/lib/dotenv-resolution.d.ts +21 -0
- package/dist/lib/dotenv-resolution.js +30 -0
- package/dist/orchestration/steps/mirror-repo-tasks-step.js +14 -3
- package/dist/orchestration/steps/run-eval-step.js +21 -3
- package/dist/pipeline/agent-behavior-report.d.ts +2 -8
- package/dist/pipeline/cache.d.ts +2 -2
- package/dist/pipeline/checks.d.ts +10 -2
- package/dist/pipeline/checks.js +14 -4
- package/dist/pipeline/compiler/literacy-bridge.js +2 -2
- package/dist/pipeline/compiler/mode-handlers/agent-harness/types.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/knowledge-probe/types.d.ts +2 -2
- package/dist/pipeline/compiler/mode-handlers/literacy/index.d.ts +1 -1
- package/dist/pipeline/compiler/mode-handlers/literacy/types.d.ts +3 -3
- package/dist/pipeline/compiler/promptfoo-compiler.js +7 -11
- package/dist/pipeline/compiler/provider-assembler.js +33 -3
- package/dist/pipeline/compiler/rubric-resolution.d.ts +2 -2
- package/dist/pipeline/mirror-repo-tasks.d.ts +13 -5
- package/dist/pipeline/mirror-repo-tasks.js +16 -8
- package/dist/pipeline/pr-comment.d.ts +22 -9
- package/dist/pipeline/pr-comment.js +52 -472
- package/dist/pipeline/resolve-mappings.d.ts +8 -3
- package/dist/promptfoo-providers/mock-path.d.ts +12 -0
- package/dist/promptfoo-providers/mock-path.js +15 -0
- package/dist/report-store.d.ts +63 -1
- package/dist/report-store.js +111 -31
- package/dist/sanity/client.d.ts +58 -0
- package/dist/sanity/client.js +106 -0
- package/package.json +8 -7
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* API Service Gateway request envelope types.
|
|
3
|
+
*
|
|
4
|
+
* These types describe the HTTP request bodies the API gateway accepts on
|
|
5
|
+
* its public-facing routes. They live in core (not in packages/api/src/) so
|
|
6
|
+
* the gateway's Zod validators can `satisfies z.ZodType<...>` against them
|
|
7
|
+
* — making the parse-output → domain-type contract a compile-time gate
|
|
8
|
+
* rather than a runtime assumption (D0045 / W0187).
|
|
9
|
+
*
|
|
10
|
+
* The shapes here are intentionally a LOOSE subset of the canonical domain
|
|
11
|
+
* types they relate to:
|
|
12
|
+
*
|
|
13
|
+
* - `ValidateConfigRequest.config` overlaps with `ModelsConfig`, but the
|
|
14
|
+
* API surface accepts string[] for modes (not branded EvalMode[]) and
|
|
15
|
+
* all fields are optional. This is a deliberate gateway contract — the
|
|
16
|
+
* route's job is to validate the user's input, not to require a fully
|
|
17
|
+
* domain-shaped payload.
|
|
18
|
+
* - `ValidateTaskDef` overlaps with `GeneralizedTaskDefinition` but uses
|
|
19
|
+
* the older flat shape (`featureArea`, `canonicalDocs`) instead of the
|
|
20
|
+
* mode-discriminated union. Harmonizing these is tracked separately —
|
|
21
|
+
* out of scope for W0187.
|
|
22
|
+
*
|
|
23
|
+
* Any fields not on these types are rejected by the gateway's validators
|
|
24
|
+
* and won't reach domain code; the satisfies clause is what pins that
|
|
25
|
+
* contract in place.
|
|
26
|
+
*/
|
|
27
|
+
/**
|
|
28
|
+
* POST /v1/validate/config — request envelope.
|
|
29
|
+
* @see packages/api/src/schemas/validate-requests.ts
|
|
30
|
+
*/
|
|
31
|
+
export interface ValidateConfigRequest {
|
|
32
|
+
config: {
|
|
33
|
+
defaults?: Record<string, unknown>;
|
|
34
|
+
grader?: {
|
|
35
|
+
id: string;
|
|
36
|
+
label?: string;
|
|
37
|
+
};
|
|
38
|
+
maxConcurrency?: number;
|
|
39
|
+
models?: {
|
|
40
|
+
config?: Record<string, unknown>;
|
|
41
|
+
id: string;
|
|
42
|
+
label?: string;
|
|
43
|
+
modes?: string[];
|
|
44
|
+
}[];
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Canonical doc reference used inside `ValidateTaskDef.canonicalDocs`.
|
|
49
|
+
* Mirrors the four shapes the API gateway accepts.
|
|
50
|
+
*/
|
|
51
|
+
export type ValidateTaskCanonicalDocRef = {
|
|
52
|
+
reason?: string;
|
|
53
|
+
slug: string;
|
|
54
|
+
} | {
|
|
55
|
+
path: string;
|
|
56
|
+
reason?: string;
|
|
57
|
+
} | {
|
|
58
|
+
id: string;
|
|
59
|
+
path?: string;
|
|
60
|
+
reason?: string;
|
|
61
|
+
slug?: string;
|
|
62
|
+
} | {
|
|
63
|
+
perspective: string;
|
|
64
|
+
reason?: string;
|
|
65
|
+
};
|
|
66
|
+
/**
|
|
67
|
+
* Assertion definition accepted by the API gateway's task validator.
|
|
68
|
+
* Either an `llm-rubric` shape with explicit fields, or a generic
|
|
69
|
+
* type+value pair for non-LLM assertions.
|
|
70
|
+
*/
|
|
71
|
+
export type ValidateTaskAssertionDef = {
|
|
72
|
+
criteria: string[];
|
|
73
|
+
template: string;
|
|
74
|
+
type: "llm-rubric";
|
|
75
|
+
weight?: number;
|
|
76
|
+
} | {
|
|
77
|
+
type: string;
|
|
78
|
+
value?: unknown;
|
|
79
|
+
weight?: number;
|
|
80
|
+
};
|
|
81
|
+
/**
|
|
82
|
+
* The "task definition" shape accepted by POST /v1/validate/task.
|
|
83
|
+
*
|
|
84
|
+
* Distinct from `GeneralizedTaskDefinition` (the mode-discriminated
|
|
85
|
+
* canonical task type): this is the older flat shape preserved at the
|
|
86
|
+
* API surface. Harmonization tracked separately.
|
|
87
|
+
*/
|
|
88
|
+
export interface ValidateTaskDef {
|
|
89
|
+
assertions: ValidateTaskAssertionDef[];
|
|
90
|
+
baseline?: {
|
|
91
|
+
enabled?: boolean;
|
|
92
|
+
rubric?: "abbreviated" | "full" | "none";
|
|
93
|
+
};
|
|
94
|
+
canonicalDocs: ValidateTaskCanonicalDocRef[];
|
|
95
|
+
description: string;
|
|
96
|
+
docCoverage: boolean;
|
|
97
|
+
extraVars?: Record<string, unknown>;
|
|
98
|
+
featureArea: string;
|
|
99
|
+
id: string;
|
|
100
|
+
referenceSolution: string;
|
|
101
|
+
taskPrompt: string;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* POST /v1/validate/task — request envelope.
|
|
105
|
+
* @see packages/api/src/schemas/validate-requests.ts
|
|
106
|
+
*/
|
|
107
|
+
export interface ValidateTaskRequest {
|
|
108
|
+
strict: boolean;
|
|
109
|
+
tasks: ValidateTaskDef[];
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* POST /v1/validate/source — request envelope.
|
|
113
|
+
* @see packages/api/src/schemas/validate-requests.ts
|
|
114
|
+
*/
|
|
115
|
+
export interface ValidateSourceRequest {
|
|
116
|
+
source: {
|
|
117
|
+
baseUrl?: string;
|
|
118
|
+
dataset?: string;
|
|
119
|
+
projectId?: string;
|
|
120
|
+
token?: string;
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* POST /v1/lookup-doc — request envelope.
|
|
125
|
+
* @see packages/api/src/schemas/lookup-request.ts
|
|
126
|
+
*/
|
|
127
|
+
export interface LookupDocRequest {
|
|
128
|
+
keyword: string;
|
|
129
|
+
limit: number;
|
|
130
|
+
source: string;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Inline-score-or-report-id reference used by `CompareRequest`.
|
|
134
|
+
*
|
|
135
|
+
* Either:
|
|
136
|
+
* - A partial inline score summary (used by the gateway's compare()
|
|
137
|
+
* endpoint without a Content Lake round-trip), with `passthrough`
|
|
138
|
+
* extra fields preserved, or
|
|
139
|
+
* - A string report ID (the gateway fetches the corresponding
|
|
140
|
+
* `ScoreSummary` from Content Lake before comparing).
|
|
141
|
+
*/
|
|
142
|
+
export type CompareScoreSummaryRef = string | {
|
|
143
|
+
[extra: string]: unknown;
|
|
144
|
+
features?: Record<string, unknown>;
|
|
145
|
+
overall: {
|
|
146
|
+
[extra: string]: unknown;
|
|
147
|
+
areaCount: number;
|
|
148
|
+
avgScore: number;
|
|
149
|
+
};
|
|
150
|
+
};
|
|
151
|
+
/**
|
|
152
|
+
* POST /v1/compare — request envelope.
|
|
153
|
+
* @see packages/api/src/schemas/compare-request.ts
|
|
154
|
+
*/
|
|
155
|
+
export interface CompareRequest {
|
|
156
|
+
baseline: CompareScoreSummaryRef;
|
|
157
|
+
experiment: CompareScoreSummaryRef;
|
|
158
|
+
threshold: number;
|
|
159
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* API Service Gateway request envelope types.
|
|
3
|
+
*
|
|
4
|
+
* These types describe the HTTP request bodies the API gateway accepts on
|
|
5
|
+
* its public-facing routes. They live in core (not in packages/api/src/) so
|
|
6
|
+
* the gateway's Zod validators can `satisfies z.ZodType<...>` against them
|
|
7
|
+
* — making the parse-output → domain-type contract a compile-time gate
|
|
8
|
+
* rather than a runtime assumption (D0045 / W0187).
|
|
9
|
+
*
|
|
10
|
+
* The shapes here are intentionally a LOOSE subset of the canonical domain
|
|
11
|
+
* types they relate to:
|
|
12
|
+
*
|
|
13
|
+
* - `ValidateConfigRequest.config` overlaps with `ModelsConfig`, but the
|
|
14
|
+
* API surface accepts string[] for modes (not branded EvalMode[]) and
|
|
15
|
+
* all fields are optional. This is a deliberate gateway contract — the
|
|
16
|
+
* route's job is to validate the user's input, not to require a fully
|
|
17
|
+
* domain-shaped payload.
|
|
18
|
+
* - `ValidateTaskDef` overlaps with `GeneralizedTaskDefinition` but uses
|
|
19
|
+
* the older flat shape (`featureArea`, `canonicalDocs`) instead of the
|
|
20
|
+
* mode-discriminated union. Harmonizing these is tracked separately —
|
|
21
|
+
* out of scope for W0187.
|
|
22
|
+
*
|
|
23
|
+
* Any fields not on these types are rejected by the gateway's validators
|
|
24
|
+
* and won't reach domain code; the satisfies clause is what pins that
|
|
25
|
+
* contract in place.
|
|
26
|
+
*/
|
|
27
|
+
export {};
|
|
@@ -23,6 +23,9 @@ export { evalModeType } from "./eval-mode-config.js";
|
|
|
23
23
|
export type { DependencyEdge, ResolvedFixture, TaskGraph, TaskNode, } from "./task-graph.js";
|
|
24
24
|
export type { VariableDeclaration, VariableEnvelope, VariableProvenance, VariableSource, } from "./variable-envelope.js";
|
|
25
25
|
export type { EvalTrace, ToolCallCategory, ToolCallRecord, TraceEvent, TraceSpan, TraceTokenUsage, } from "./trace.js";
|
|
26
|
+
export type { CompareRequest, CompareScoreSummaryRef, LookupDocRequest, ValidateConfigRequest, ValidateSourceRequest, ValidateTaskAssertionDef, ValidateTaskCanonicalDocRef, ValidateTaskDef, ValidateTaskRequest, } from "./api-requests.js";
|
|
27
|
+
export type { AilfEvalWorkflow, AilfEvalWorkflowJob, AilfEvalWorkflowStep, RepoAgenticConfig, RepoArtifactsConfig, RepoConfig, RepoExecutionConfig, RepoOutputConfig, RepoOwnerConfig, RepoPublishConfig, RepoReportStoreConfig, RepoSourceConfig, RepoTaskSourceConfig, RepoTriggersConfig, ScheduleTriggerConfig, TriggerConfig, TriggerMode, } from "./repo-config.js";
|
|
28
|
+
export type { PipelineRequest, PipelineRequestCallback, PipelineRequestCallerExecutor, PipelineRequestCallerGit, PipelineRequestCallerOwner, PipelineRequestDebug, PipelineRequestTaskSource, } from "./pipeline-request.js";
|
|
26
29
|
export type { ArtifactId, AssociationAxis, AssociationValues, Brand, EntryKey, Err, FixtureId, IdValidationError, NewReportId, Ok, ProviderId, PromptId, Result, ResultId, RubricId, RunFingerprint, RunId, SuiteId, TaskId, TaskSlug, TraceId, } from "./branded-ids.js";
|
|
27
30
|
export { err, fixtureId, generateRunId, ok, providerId, resultId, runId, suiteId, taskId, traceId, } from "./branded-ids.js";
|
|
28
31
|
export type { AgentHarnessTaskDefinition, ContentLakeAuthorableMode, ContentLakeAuthorableTask, CustomTaskDefinition, GeneralizedAssertionDefinition, GeneralizedDocRef, GeneralizedTaskDefinition, GeneralizedTemplatedAssertion, GeneralizedValueAssertion, IdDocRef, KnowledgeProbeTaskDefinition, LiteracyTaskDefinition, MCPServerTaskDefinition, PathDocRef, PerspectiveDocRef, RubricRef, SlugDocRef, TaskCommonFields, TaskDifficulty, TaskOptions, TaskProviderConfig, TaskStatus, } from "./generalized-task.js";
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline-request domain types.
|
|
3
|
+
*
|
|
4
|
+
* Authored independently of `PipelineRequestSchema` in
|
|
5
|
+
* `packages/core/src/schemas/pipeline-request.ts` so the schema can
|
|
6
|
+
* `satisfies z.ZodType<PipelineRequest>` against this type — making
|
|
7
|
+
* schema/type drift a compile error rather than a silent regression
|
|
8
|
+
* (D0045 / W0187).
|
|
9
|
+
*
|
|
10
|
+
* `PipelineRequestSchema` is the universal pipeline invocation contract,
|
|
11
|
+
* shared between the API gateway (POST /v1/pipeline), the CLI (`--config`
|
|
12
|
+
* flag), and GitHub Actions workflows (`repository_dispatch` payload).
|
|
13
|
+
*
|
|
14
|
+
* Names here are prefixed `PipelineRequest*` to avoid colliding with the
|
|
15
|
+
* existing `DebugOptions` in `core/types/index.ts`, which has a stricter
|
|
16
|
+
* (required `enabled`) shape used by other pipeline code.
|
|
17
|
+
*/
|
|
18
|
+
import type { LiteracyVariant, RawEvalMode, RunClassification, RunExecutorSurface } from "../../ailf-shared/index.d.ts";
|
|
19
|
+
/** Debug-mode options as accepted on the wire (looser than core's `DebugOptions`). */
|
|
20
|
+
export interface PipelineRequestDebug {
|
|
21
|
+
enabled?: boolean;
|
|
22
|
+
firstN?: number;
|
|
23
|
+
pattern?: string;
|
|
24
|
+
sample?: number;
|
|
25
|
+
}
|
|
26
|
+
/** Webhook callback target for API-triggered evaluations. */
|
|
27
|
+
export interface PipelineRequestCallback {
|
|
28
|
+
headers?: Record<string, string>;
|
|
29
|
+
url: string;
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Git context from the *calling* repository (not the AILF runner repo).
|
|
33
|
+
* Carries provenance across cross-repo dispatches.
|
|
34
|
+
*/
|
|
35
|
+
export interface PipelineRequestCallerGit {
|
|
36
|
+
branch?: string;
|
|
37
|
+
prNumber?: number;
|
|
38
|
+
repo: string;
|
|
39
|
+
sha?: string;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Caller-side `RunOwner` payload (D0037). Team is required; individual
|
|
43
|
+
* optional.
|
|
44
|
+
*/
|
|
45
|
+
export interface PipelineRequestCallerOwner {
|
|
46
|
+
individual?: string;
|
|
47
|
+
team: string;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Caller-side executor payload — user variant only. System executors are
|
|
51
|
+
* always server-detected; the wire format excludes them. The Zod schema
|
|
52
|
+
* is `.strict()` to reject any unknown key (PII guard).
|
|
53
|
+
*/
|
|
54
|
+
export interface PipelineRequestCallerExecutor {
|
|
55
|
+
githubActor?: string;
|
|
56
|
+
name?: string;
|
|
57
|
+
surface: RunExecutorSurface;
|
|
58
|
+
type: "user";
|
|
59
|
+
}
|
|
60
|
+
/** Task-source override (W0077 Phase 6h) — mirrors `RepoConfig.taskSource`. */
|
|
61
|
+
export interface PipelineRequestTaskSource {
|
|
62
|
+
repoTasksPath?: string;
|
|
63
|
+
type?: "content-lake" | "repo";
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* The universal pipeline invocation contract.
|
|
67
|
+
*
|
|
68
|
+
* The Zod schema at `packages/core/src/schemas/pipeline-request.ts`
|
|
69
|
+
* (`PipelineRequestSchema`) asserts `satisfies z.ZodType<PipelineRequest>`
|
|
70
|
+
* against this type. Field set must match the schema exactly.
|
|
71
|
+
*/
|
|
72
|
+
export interface PipelineRequest {
|
|
73
|
+
allowedOrigins?: string[];
|
|
74
|
+
areas?: string[];
|
|
75
|
+
backend?: "cloud-run" | "github-actions";
|
|
76
|
+
callback?: PipelineRequestCallback;
|
|
77
|
+
callerGit?: PipelineRequestCallerGit;
|
|
78
|
+
changedDocs?: string[];
|
|
79
|
+
classification?: RunClassification;
|
|
80
|
+
compare?: boolean;
|
|
81
|
+
compareBaseline?: string;
|
|
82
|
+
compareThreshold?: number;
|
|
83
|
+
concurrency?: number;
|
|
84
|
+
dataset?: string;
|
|
85
|
+
debug?: PipelineRequestDebug | boolean;
|
|
86
|
+
executor?: PipelineRequestCallerExecutor;
|
|
87
|
+
gapAnalysis?: boolean;
|
|
88
|
+
graderReplications?: number;
|
|
89
|
+
headers?: Record<string, string>;
|
|
90
|
+
inlineTasks?: Record<string, unknown>[];
|
|
91
|
+
jobId?: string;
|
|
92
|
+
labels?: string[];
|
|
93
|
+
mode?: RawEvalMode;
|
|
94
|
+
noAutoScope?: boolean;
|
|
95
|
+
noCache?: boolean;
|
|
96
|
+
noRemoteCache?: boolean;
|
|
97
|
+
owner?: PipelineRequestCallerOwner;
|
|
98
|
+
perspective?: string;
|
|
99
|
+
presets?: string[];
|
|
100
|
+
projectId?: string;
|
|
101
|
+
publish?: boolean;
|
|
102
|
+
publishTag?: string;
|
|
103
|
+
purpose?: string;
|
|
104
|
+
searchMode?: "off" | "open" | "origin-only";
|
|
105
|
+
source?: string;
|
|
106
|
+
sourceReportId?: string;
|
|
107
|
+
taskMode?: "content-lake" | "inline";
|
|
108
|
+
taskSource?: PipelineRequestTaskSource;
|
|
109
|
+
tasks?: string[];
|
|
110
|
+
urls?: string[];
|
|
111
|
+
variant?: LiteracyVariant;
|
|
112
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pipeline-request domain types.
|
|
3
|
+
*
|
|
4
|
+
* Authored independently of `PipelineRequestSchema` in
|
|
5
|
+
* `packages/core/src/schemas/pipeline-request.ts` so the schema can
|
|
6
|
+
* `satisfies z.ZodType<PipelineRequest>` against this type — making
|
|
7
|
+
* schema/type drift a compile error rather than a silent regression
|
|
8
|
+
* (D0045 / W0187).
|
|
9
|
+
*
|
|
10
|
+
* `PipelineRequestSchema` is the universal pipeline invocation contract,
|
|
11
|
+
* shared between the API gateway (POST /v1/pipeline), the CLI (`--config`
|
|
12
|
+
* flag), and GitHub Actions workflows (`repository_dispatch` payload).
|
|
13
|
+
*
|
|
14
|
+
* Names here are prefixed `PipelineRequest*` to avoid colliding with the
|
|
15
|
+
* existing `DebugOptions` in `core/types/index.ts`, which has a stricter
|
|
16
|
+
* (required `enabled`) shape used by other pipeline code.
|
|
17
|
+
*/
|
|
18
|
+
export {};
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repo-level configuration types for the AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* These describe `.ailf/config.yaml` (`RepoConfig`) and the structural
|
|
5
|
+
* contract on `.github/workflows/ailf-eval.yml` (`AilfEvalWorkflow`) used
|
|
6
|
+
* by `ailf init` and the Tier 1 init contract test.
|
|
7
|
+
*
|
|
8
|
+
* Authored independently of the Zod schemas in
|
|
9
|
+
* `packages/eval/src/adapters/task-sources/repo-schemas.ts` so the schemas
|
|
10
|
+
* can `satisfies z.ZodType<...>` against them — making schema/type drift a
|
|
11
|
+
* compile error rather than a silent regression (D0045 / W0187).
|
|
12
|
+
*
|
|
13
|
+
* Note on optionality: schema fields declared with `.optional()` produce
|
|
14
|
+
* `T | undefined` from `z.infer`; we mirror that with `?:` properties.
|
|
15
|
+
* Top-level `RepoConfig` itself has all optional groups because a
|
|
16
|
+
* `.ailf/config.yaml` may legitimately omit any of them.
|
|
17
|
+
*/
|
|
18
|
+
/** Mode of an evaluation trigger — drives whether the run executes or just validates. */
|
|
19
|
+
export type TriggerMode = "validate-only" | "eval";
|
|
20
|
+
export interface TriggerConfig {
|
|
21
|
+
blocking?: boolean;
|
|
22
|
+
compare?: boolean;
|
|
23
|
+
mode: TriggerMode;
|
|
24
|
+
notify?: boolean;
|
|
25
|
+
paths?: string[];
|
|
26
|
+
}
|
|
27
|
+
export interface ScheduleTriggerConfig extends TriggerConfig {
|
|
28
|
+
cron: string;
|
|
29
|
+
}
|
|
30
|
+
/** Documentation source — which Sanity project's docs the eval reads. */
|
|
31
|
+
export interface RepoSourceConfig {
|
|
32
|
+
baseUrl?: string;
|
|
33
|
+
dataset?: string;
|
|
34
|
+
projectId?: string;
|
|
35
|
+
studioOrigin?: string;
|
|
36
|
+
}
|
|
37
|
+
/** Report-store destination — which Sanity project receives `ailf.report` documents. */
|
|
38
|
+
export interface RepoReportStoreConfig {
|
|
39
|
+
dataset: string;
|
|
40
|
+
projectId: string;
|
|
41
|
+
}
|
|
42
|
+
/** Publish policy — when `ailf run` writes a report without an explicit flag. */
|
|
43
|
+
export interface RepoPublishConfig {
|
|
44
|
+
auto?: "always" | "full-runs" | "never";
|
|
45
|
+
tag?: string;
|
|
46
|
+
}
|
|
47
|
+
/** Execution-tier knobs — replaces the retired `--concurrency` / `--api-url` flags. */
|
|
48
|
+
export interface RepoExecutionConfig {
|
|
49
|
+
apiUrl?: string;
|
|
50
|
+
concurrency?: number;
|
|
51
|
+
gapAnalysis?: boolean;
|
|
52
|
+
graderReplications?: number;
|
|
53
|
+
}
|
|
54
|
+
/** Where the pipeline writes results (replaces `--output-dir`). */
|
|
55
|
+
export interface RepoOutputConfig {
|
|
56
|
+
dir?: string;
|
|
57
|
+
}
|
|
58
|
+
/** Run-attribution defaults (replaces `--owner-team` / `--owner-individual`). */
|
|
59
|
+
export interface RepoOwnerConfig {
|
|
60
|
+
individual?: string;
|
|
61
|
+
team?: string;
|
|
62
|
+
}
|
|
63
|
+
/** Agentic-mode runtime config (replaces `--header` / `--allowed-origin`). */
|
|
64
|
+
export interface RepoAgenticConfig {
|
|
65
|
+
allowedOrigins?: string[];
|
|
66
|
+
headers?: Record<string, string>;
|
|
67
|
+
}
|
|
68
|
+
/** Artifact-writer config (replaces `--no-artifacts` / `--artifacts-dir` / `--artifacts-exclude`). */
|
|
69
|
+
export interface RepoArtifactsConfig {
|
|
70
|
+
dir?: string;
|
|
71
|
+
enabled?: boolean;
|
|
72
|
+
exclude?: string[];
|
|
73
|
+
}
|
|
74
|
+
/** Task-source config (replaces `--task-source` / `--repo-tasks-path`). */
|
|
75
|
+
export interface RepoTaskSourceConfig {
|
|
76
|
+
repoTasksPath?: string;
|
|
77
|
+
type?: "content-lake" | "repo";
|
|
78
|
+
}
|
|
79
|
+
/** Trigger overrides keyed by trigger name. */
|
|
80
|
+
export interface RepoTriggersConfig {
|
|
81
|
+
main?: TriggerConfig;
|
|
82
|
+
pr?: TriggerConfig;
|
|
83
|
+
"pr-task-change"?: TriggerConfig;
|
|
84
|
+
schedule?: ScheduleTriggerConfig;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Parsed shape of `.ailf/config.yaml`.
|
|
88
|
+
*
|
|
89
|
+
* Every field is optional at the top level because a config file may omit
|
|
90
|
+
* any group. The Zod schema at `packages/eval/src/adapters/task-sources/repo-schemas.ts`
|
|
91
|
+
* (`RepoConfigSchema`) asserts `satisfies z.ZodType<RepoConfig>` against
|
|
92
|
+
* this type.
|
|
93
|
+
*/
|
|
94
|
+
export interface RepoConfig {
|
|
95
|
+
agentic?: RepoAgenticConfig;
|
|
96
|
+
artifacts?: RepoArtifactsConfig;
|
|
97
|
+
execution?: RepoExecutionConfig;
|
|
98
|
+
output?: RepoOutputConfig;
|
|
99
|
+
owner?: RepoOwnerConfig;
|
|
100
|
+
publish?: RepoPublishConfig;
|
|
101
|
+
reportStore?: RepoReportStoreConfig;
|
|
102
|
+
source?: RepoSourceConfig;
|
|
103
|
+
taskSource?: RepoTaskSourceConfig;
|
|
104
|
+
triggers?: RepoTriggersConfig;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* One step inside a GitHub Actions workflow job. Loose by design — this is
|
|
108
|
+
* a structural contract, not a literal-byte one. Extra fields are accepted.
|
|
109
|
+
*/
|
|
110
|
+
export interface AilfEvalWorkflowStep {
|
|
111
|
+
[extra: string]: unknown;
|
|
112
|
+
env?: Record<string, unknown>;
|
|
113
|
+
id?: string;
|
|
114
|
+
if?: string;
|
|
115
|
+
name?: string;
|
|
116
|
+
run?: string;
|
|
117
|
+
uses?: string;
|
|
118
|
+
with?: Record<string, unknown>;
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* One job entry inside a GitHub Actions workflow. Loose by design.
|
|
122
|
+
*/
|
|
123
|
+
export interface AilfEvalWorkflowJob {
|
|
124
|
+
[extra: string]: unknown;
|
|
125
|
+
name?: string;
|
|
126
|
+
permissions?: Record<string, unknown>;
|
|
127
|
+
"runs-on": string | string[];
|
|
128
|
+
steps: AilfEvalWorkflowStep[];
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Structural shape of the `ailf-eval.yml` workflow template emitted by
|
|
132
|
+
* `ailf init`. Validates the consumer-visible contract: a workflow with a
|
|
133
|
+
* non-empty `name`, an `on` trigger spec, and at least one job. The
|
|
134
|
+
* literal YAML body is intentionally not pinned — comments, step
|
|
135
|
+
* ordering, and option flags can shift without breaking consumers.
|
|
136
|
+
*
|
|
137
|
+
* The Zod schema at `packages/eval/src/adapters/task-sources/repo-schemas.ts`
|
|
138
|
+
* (`AilfEvalWorkflowSchema`) asserts `satisfies z.ZodType<AilfEvalWorkflow>`
|
|
139
|
+
* against this type.
|
|
140
|
+
*/
|
|
141
|
+
export interface AilfEvalWorkflow {
|
|
142
|
+
[extra: string]: unknown;
|
|
143
|
+
jobs: Record<string, AilfEvalWorkflowJob>;
|
|
144
|
+
name: string;
|
|
145
|
+
on: unknown;
|
|
146
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Repo-level configuration types for the AI Literacy Framework.
|
|
3
|
+
*
|
|
4
|
+
* These describe `.ailf/config.yaml` (`RepoConfig`) and the structural
|
|
5
|
+
* contract on `.github/workflows/ailf-eval.yml` (`AilfEvalWorkflow`) used
|
|
6
|
+
* by `ailf init` and the Tier 1 init contract test.
|
|
7
|
+
*
|
|
8
|
+
* Authored independently of the Zod schemas in
|
|
9
|
+
* `packages/eval/src/adapters/task-sources/repo-schemas.ts` so the schemas
|
|
10
|
+
* can `satisfies z.ZodType<...>` against them — making schema/type drift a
|
|
11
|
+
* compile error rather than a silent regression (D0045 / W0187).
|
|
12
|
+
*
|
|
13
|
+
* Note on optionality: schema fields declared with `.optional()` produce
|
|
14
|
+
* `T | undefined` from `z.infer`; we mirror that with `?:` properties.
|
|
15
|
+
* Top-level `RepoConfig` itself has all optional groups because a
|
|
16
|
+
* `.ailf/config.yaml` may legitimately omit any of them.
|
|
17
|
+
*/
|
|
18
|
+
export {};
|
|
@@ -9,11 +9,13 @@
|
|
|
9
9
|
* from @sanity/ailf-core, @sanity/ailf, or
|
|
10
10
|
* @sanity/ailf-studio. It is the leaf of the dependency graph.
|
|
11
11
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
12
|
+
* Barrel hygiene rule (D0045): re-exports are explicit (named) rather than
|
|
13
|
+
* `export *`. `export *` chains across many modules trip rollup-plugin-dts's
|
|
14
|
+
* "Ambiguous external namespace resolution" warning even when no symbol
|
|
15
|
+
* actually collides, and worse — silently change which module's runtime
|
|
16
|
+
* symbol downstream consumers receive. The regression caught by W0124 was
|
|
17
|
+
* generalized to the whole barrel under W0186; `barrel.test.ts` locks the
|
|
18
|
+
* surface against future regressions.
|
|
17
19
|
*/
|
|
18
20
|
export { computeCanaryDrift, type CanaryDriftReport, type CanaryReportSlim, type DriftEntry, type DriftThresholds, type DriftVerdict, } from "./canary-drift.js";
|
|
19
21
|
export { type DocumentRef } from "./document-ref.js";
|
|
@@ -9,11 +9,13 @@
|
|
|
9
9
|
* from @sanity/ailf-core, @sanity/ailf, or
|
|
10
10
|
* @sanity/ailf-studio. It is the leaf of the dependency graph.
|
|
11
11
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
12
|
+
* Barrel hygiene rule (D0045): re-exports are explicit (named) rather than
|
|
13
|
+
* `export *`. `export *` chains across many modules trip rollup-plugin-dts's
|
|
14
|
+
* "Ambiguous external namespace resolution" warning even when no symbol
|
|
15
|
+
* actually collides, and worse — silently change which module's runtime
|
|
16
|
+
* symbol downstream consumers receive. The regression caught by W0124 was
|
|
17
|
+
* generalized to the whole barrel under W0186; `barrel.test.ts` locks the
|
|
18
|
+
* surface against future regressions.
|
|
17
19
|
*/
|
|
18
20
|
export { computeCanaryDrift, } from "./canary-drift.js";
|
|
19
21
|
export { FEATURE_FLAGS, } from "./feature-flags.js";
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
* @see packages/api/src/routes/pipeline.ts — POST /v1/pipeline response
|
|
12
12
|
* @see packages/api/src/routes/jobs.ts — GET /v1/jobs/:id response
|
|
13
13
|
*/
|
|
14
|
+
import type { ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
15
|
+
export type { ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
|
|
14
16
|
/** Standard envelope fields present on every API response. */
|
|
15
17
|
export interface ApiEnvelope {
|
|
16
18
|
object: string;
|
|
@@ -59,11 +61,6 @@ export interface SubmitResponse extends ApiEnvelope {
|
|
|
59
61
|
estimatedDurationMs?: number;
|
|
60
62
|
runUrl?: string;
|
|
61
63
|
}
|
|
62
|
-
export interface ValidationIssue {
|
|
63
|
-
severity: string;
|
|
64
|
-
source: string;
|
|
65
|
-
message: string;
|
|
66
|
-
}
|
|
67
64
|
export interface ValidationResponse extends ApiEnvelope {
|
|
68
65
|
object: "validation";
|
|
69
66
|
valid: boolean;
|
|
@@ -16,9 +16,66 @@
|
|
|
16
16
|
* @see docs/decisions/D0038-content-lake-authorable-task-modes.md
|
|
17
17
|
*/
|
|
18
18
|
import type { SanityClient } from "@sanity/client";
|
|
19
|
-
import type { FilterOptions, GeneralizedTaskDefinition, TaskSource } from "../../_vendor/ailf-core/index.d.ts";
|
|
19
|
+
import type { ContentLakeAuthorableTask, FilterOptions, GeneralizedTaskDefinition, TaskSource, TaskStatus } from "../../_vendor/ailf-core/index.d.ts";
|
|
20
|
+
/** Shape of a single ailf.task document as returned by the GROQ query. */
|
|
21
|
+
export interface ContentLakeTask {
|
|
22
|
+
areaId?: string;
|
|
23
|
+
assertions?: ContentLakeAssertion[];
|
|
24
|
+
baseline?: {
|
|
25
|
+
enabled?: boolean;
|
|
26
|
+
rubric?: string;
|
|
27
|
+
};
|
|
28
|
+
contextDocs?: ContentLakeCanonicalDoc[];
|
|
29
|
+
description?: string;
|
|
30
|
+
docCoverage?: boolean;
|
|
31
|
+
promptText?: string;
|
|
32
|
+
rawAssert?: {
|
|
33
|
+
threshold?: number;
|
|
34
|
+
type?: string;
|
|
35
|
+
value?: string;
|
|
36
|
+
}[];
|
|
37
|
+
referenceSolutionTitle?: string;
|
|
38
|
+
status?: TaskStatus;
|
|
39
|
+
tags?: string[];
|
|
40
|
+
taskId?: string;
|
|
41
|
+
title?: string;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Context doc ref shape from the Content Lake.
|
|
45
|
+
* The GROQ query projects refType + all possible value fields.
|
|
46
|
+
* Only the field matching refType will have a value.
|
|
47
|
+
*/
|
|
48
|
+
interface ContentLakeCanonicalDoc {
|
|
49
|
+
docId?: string;
|
|
50
|
+
docRefId?: string;
|
|
51
|
+
path?: string;
|
|
52
|
+
perspective?: string;
|
|
53
|
+
reason?: string;
|
|
54
|
+
refType?: string;
|
|
55
|
+
sectionSlug?: string;
|
|
56
|
+
slug?: string;
|
|
57
|
+
}
|
|
58
|
+
/** Assertion shape from the Content Lake (mirrors the Studio schema). */
|
|
59
|
+
interface ContentLakeAssertion {
|
|
60
|
+
criteria?: string[];
|
|
61
|
+
template?: string;
|
|
62
|
+
threshold?: number;
|
|
63
|
+
type?: string;
|
|
64
|
+
value?: string;
|
|
65
|
+
weight?: number;
|
|
66
|
+
}
|
|
20
67
|
export declare class ContentLakeTaskSource implements TaskSource {
|
|
21
68
|
private readonly client;
|
|
22
69
|
constructor(client: SanityClient);
|
|
23
70
|
loadTasks(filter?: FilterOptions): Promise<GeneralizedTaskDefinition[]>;
|
|
24
71
|
}
|
|
72
|
+
/**
|
|
73
|
+
* Map a Content Lake ailf.task document to a `ContentLakeAuthorableTask`.
|
|
74
|
+
*
|
|
75
|
+
* Returns null if the document is missing required fields (taskId,
|
|
76
|
+
* title, areaId, promptText). These are required by the
|
|
77
|
+
* Studio schema, but defensive coding handles edge cases (drafts,
|
|
78
|
+
* partially-created documents, etc.).
|
|
79
|
+
*/
|
|
80
|
+
export declare function mapToAuthorableTask(raw: ContentLakeTask): ContentLakeAuthorableTask | null;
|
|
81
|
+
export {};
|
|
@@ -148,7 +148,7 @@ function buildGroqParams(filter) {
|
|
|
148
148
|
* Studio schema, but defensive coding handles edge cases (drafts,
|
|
149
149
|
* partially-created documents, etc.).
|
|
150
150
|
*/
|
|
151
|
-
function mapToAuthorableTask(raw) {
|
|
151
|
+
export function mapToAuthorableTask(raw) {
|
|
152
152
|
// Required fields — skip malformed documents
|
|
153
153
|
if (!raw.taskId || !raw.title || !raw.areaId || !raw.promptText) {
|
|
154
154
|
return null;
|
|
@@ -3,5 +3,5 @@ export { ContentLakeTaskSource } from "./content-lake-task-source.js";
|
|
|
3
3
|
export { AilfEvalWorkflowSchema, CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseAilfEvalWorkflow, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, type AilfEvalWorkflow, type CanonicalTask, type ContentLakeAuthorableTaskParsed, type CuratedAssertionType, type RepoConfig, type RubricTemplateName, } from "./repo-schemas.js";
|
|
4
4
|
export { RepoTaskSource } from "./repo-task-source.js";
|
|
5
5
|
export { detectTriggerContext, resolveTrigger, type ResolvedTrigger, type TriggerContext, } from "./repo-trigger.js";
|
|
6
|
-
export {
|
|
6
|
+
export { formatRepoValidationResult, validateCanonicalTasks, type RepoValidationMessage, type RepoValidationResult, } from "./repo-validation.js";
|
|
7
7
|
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, loadTsTaskFileSync, } from "./task-file-loader.js";
|
|
@@ -3,5 +3,5 @@ export { ContentLakeTaskSource } from "./content-lake-task-source.js";
|
|
|
3
3
|
export { AilfEvalWorkflowSchema, CanonicalTaskFileSchema, CanonicalTaskSchema, ContentLakeAuthorableTaskSchema, CURATED_ASSERTION_TYPES, detectLegacyFieldNames, parseAilfEvalWorkflow, parseCanonicalTaskFile, parseRepoConfig, RepoConfigSchema, RUBRIC_TEMPLATE_NAMES, } from "./repo-schemas.js";
|
|
4
4
|
export { RepoTaskSource } from "./repo-task-source.js";
|
|
5
5
|
export { detectTriggerContext, resolveTrigger, } from "./repo-trigger.js";
|
|
6
|
-
export {
|
|
6
|
+
export { formatRepoValidationResult, validateCanonicalTasks, } from "./repo-validation.js";
|
|
7
7
|
export { discoverTsTaskFiles, loadAllTsTaskFiles, loadTsTaskFile, loadTsTaskFileSync, } from "./task-file-loader.js";
|