@sanity/ailf 0.1.33 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/config/airbyte/ai_literacy_framework.connector.yaml +6 -0
- package/config/bigquery/views/reports.sql +1 -0
- package/dist/_vendor/ailf-core/examples/index.d.ts +10 -20
- package/dist/_vendor/ailf-core/examples/index.js +10 -20
- package/dist/_vendor/ailf-core/ports/context.d.ts +1 -1
- package/dist/_vendor/ailf-core/ports/task-source.d.ts +2 -0
- package/dist/_vendor/ailf-core/types/index.d.ts +19 -1
- package/dist/_vendor/ailf-tasks/schemas.d.ts +12 -0
- package/dist/_vendor/ailf-tasks/schemas.js +4 -0
- package/dist/adapters/task-sources/content-lake-task-source.js +9 -1
- package/dist/adapters/task-sources/repo-task-source.js +19 -4
- package/dist/commands/calculate-scores.js +5 -1
- package/dist/commands/publish.js +3 -0
- package/dist/orchestration/steps/calculate-scores-step.js +18 -19
- package/dist/orchestration/steps/publish-report-step.js +12 -1
- package/dist/pipeline/calculate-scores.d.ts +6 -1
- package/dist/pipeline/calculate-scores.js +5 -13
- package/dist/pipeline/compare.js +12 -5
- package/dist/pipeline/generate-configs.js +4 -9
- package/dist/pipeline/mirror-repo-tasks.d.ts +77 -0
- package/dist/pipeline/mirror-repo-tasks.js +141 -27
- package/dist/pipeline/pr-comment.js +5 -2
- package/dist/pipeline/release-report.js +4 -0
- package/dist/pipeline/report-title.d.ts +66 -0
- package/dist/pipeline/report-title.js +118 -0
- package/dist/report-store.d.ts +5 -1
- package/dist/report-store.js +31 -2
- package/dist/sinks/bigquery/index.d.ts +1 -0
- package/dist/sinks/bigquery/index.js +1 -0
- package/dist/sinks/slack/format.js +10 -0
- package/package.json +23 -23
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/report-title.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure function that generates descriptive report titles from provenance
|
|
5
|
+
* metadata. The title is the primary display string shown in dashboards,
|
|
6
|
+
* Slack digests, and Studio views — it conveys trigger context, evaluated
|
|
7
|
+
* areas, source/perspective, and document scope at a glance.
|
|
8
|
+
*
|
|
9
|
+
* Score is intentionally omitted from the title since it is surfaced
|
|
10
|
+
* heavily elsewhere in the UI. The `tag` field (on Report) is preserved
|
|
11
|
+
* as a secondary label; the title is the primary display string.
|
|
12
|
+
*
|
|
13
|
+
* Segments are joined with ` · ` (middle dot with spaces).
|
|
14
|
+
*
|
|
15
|
+
* @see docs/design-docs/report-store/domain-model.md
|
|
16
|
+
* @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
|
|
17
|
+
*/
|
|
18
|
+
import type { EvalMode, ReportTrigger } from "./types.js";
|
|
19
|
+
/** Input required to generate a human-readable report title. */
|
|
20
|
+
export interface ReportTitleInput {
|
|
21
|
+
provenance: {
|
|
22
|
+
/** Feature areas that were evaluated */
|
|
23
|
+
areas: string[];
|
|
24
|
+
/** Evaluation mode */
|
|
25
|
+
mode: EvalMode;
|
|
26
|
+
/** Resolved documentation source */
|
|
27
|
+
source: {
|
|
28
|
+
name: string;
|
|
29
|
+
perspective?: string;
|
|
30
|
+
};
|
|
31
|
+
/** Sanity document IDs targeted (when scoped to specific documents) */
|
|
32
|
+
targetDocuments?: string[];
|
|
33
|
+
/** What triggered the evaluation */
|
|
34
|
+
trigger: ReportTrigger;
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Total number of known feature areas in the system.
|
|
38
|
+
* Used to determine whether to show "All areas" vs "N areas"
|
|
39
|
+
* when more than 3 areas are evaluated.
|
|
40
|
+
*/
|
|
41
|
+
totalAreaCount?: number;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Generate a descriptive report title from provenance metadata.
|
|
45
|
+
*
|
|
46
|
+
* The title is composed of up to four segments separated by ` · `:
|
|
47
|
+
*
|
|
48
|
+
* 1. **Trigger context** — what initiated the evaluation (always present)
|
|
49
|
+
* 2. **Areas** — which feature areas were evaluated (omitted if empty)
|
|
50
|
+
* 3. **Source context** — non-default source or perspective (omitted if default)
|
|
51
|
+
* 4. **Target documents** — scoped document IDs (omitted if not scoped)
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```ts
|
|
55
|
+
* generateReportTitle({
|
|
56
|
+
* provenance: {
|
|
57
|
+
* areas: ["GROQ", "Mutations"],
|
|
58
|
+
* mode: "baseline",
|
|
59
|
+
* source: { name: "production" },
|
|
60
|
+
* trigger: { type: "manual" },
|
|
61
|
+
* },
|
|
62
|
+
* })
|
|
63
|
+
* // → "Manual eval · GROQ, Mutations"
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
66
|
+
export declare function generateReportTitle(input: ReportTitleInput): string;
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pipeline/report-title.ts
|
|
3
|
+
*
|
|
4
|
+
* Pure function that generates descriptive report titles from provenance
|
|
5
|
+
* metadata. The title is the primary display string shown in dashboards,
|
|
6
|
+
* Slack digests, and Studio views — it conveys trigger context, evaluated
|
|
7
|
+
* areas, source/perspective, and document scope at a glance.
|
|
8
|
+
*
|
|
9
|
+
* Score is intentionally omitted from the title since it is surfaced
|
|
10
|
+
* heavily elsewhere in the UI. The `tag` field (on Report) is preserved
|
|
11
|
+
* as a secondary label; the title is the primary display string.
|
|
12
|
+
*
|
|
13
|
+
* Segments are joined with ` · ` (middle dot with spaces).
|
|
14
|
+
*
|
|
15
|
+
* @see docs/design-docs/report-store/domain-model.md
|
|
16
|
+
* @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
|
|
17
|
+
*/
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
// Segment builders
|
|
20
|
+
// ---------------------------------------------------------------------------
|
|
21
|
+
const SEPARATOR = " · ";
|
|
22
|
+
/** Segment 1 — human-readable trigger context */
|
|
23
|
+
function triggerSegment(trigger) {
|
|
24
|
+
switch (trigger.type) {
|
|
25
|
+
case "scheduled": {
|
|
26
|
+
const name = trigger.schedule.replace(/-/g, " ");
|
|
27
|
+
return name.charAt(0).toUpperCase() + name.slice(1);
|
|
28
|
+
}
|
|
29
|
+
case "ci":
|
|
30
|
+
return "CI eval";
|
|
31
|
+
case "webhook":
|
|
32
|
+
return "Content change";
|
|
33
|
+
case "cross-repo": {
|
|
34
|
+
// Only show the repo name if callerRepo looks like "owner/repo".
|
|
35
|
+
// Numeric IDs (e.g. GITHUB_REPOSITORY_OWNER_ID fallback) are not useful.
|
|
36
|
+
const repo = trigger.callerRepo;
|
|
37
|
+
if (repo.includes("/")) {
|
|
38
|
+
const shortName = repo.split("/").pop() ?? repo;
|
|
39
|
+
return `Cross-repo (${shortName})`;
|
|
40
|
+
}
|
|
41
|
+
return "Cross-repo";
|
|
42
|
+
}
|
|
43
|
+
case "manual":
|
|
44
|
+
return "Manual eval";
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
/** Segment 2 — areas evaluated (omitted when empty) */
|
|
48
|
+
function areasSegment(areas, totalAreaCount) {
|
|
49
|
+
if (areas.length === 0)
|
|
50
|
+
return undefined;
|
|
51
|
+
if (areas.length <= 3) {
|
|
52
|
+
return areas.join(", ");
|
|
53
|
+
}
|
|
54
|
+
if (totalAreaCount !== undefined && areas.length === totalAreaCount) {
|
|
55
|
+
return "All areas";
|
|
56
|
+
}
|
|
57
|
+
return `${areas.length} areas`;
|
|
58
|
+
}
|
|
59
|
+
/** Segment 3 — source context (omitted when default production, no perspective) */
|
|
60
|
+
function sourceSegment(source) {
|
|
61
|
+
const parts = [];
|
|
62
|
+
if (source.perspective) {
|
|
63
|
+
parts.push(`perspective: ${source.perspective}`);
|
|
64
|
+
}
|
|
65
|
+
if (source.name !== "production") {
|
|
66
|
+
parts.push(source.name);
|
|
67
|
+
}
|
|
68
|
+
return parts.length > 0 ? parts.join(", ") : undefined;
|
|
69
|
+
}
|
|
70
|
+
/** Segment 4 — target documents (omitted when not scoped) */
|
|
71
|
+
function targetDocumentsSegment(targetDocuments) {
|
|
72
|
+
if (!targetDocuments || targetDocuments.length === 0)
|
|
73
|
+
return undefined;
|
|
74
|
+
if (targetDocuments.length === 1) {
|
|
75
|
+
return targetDocuments[0];
|
|
76
|
+
}
|
|
77
|
+
return `${targetDocuments.length} documents`;
|
|
78
|
+
}
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// Public API
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
/**
|
|
83
|
+
* Generate a descriptive report title from provenance metadata.
|
|
84
|
+
*
|
|
85
|
+
* The title is composed of up to four segments separated by ` · `:
|
|
86
|
+
*
|
|
87
|
+
* 1. **Trigger context** — what initiated the evaluation (always present)
|
|
88
|
+
* 2. **Areas** — which feature areas were evaluated (omitted if empty)
|
|
89
|
+
* 3. **Source context** — non-default source or perspective (omitted if default)
|
|
90
|
+
* 4. **Target documents** — scoped document IDs (omitted if not scoped)
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```ts
|
|
94
|
+
* generateReportTitle({
|
|
95
|
+
* provenance: {
|
|
96
|
+
* areas: ["GROQ", "Mutations"],
|
|
97
|
+
* mode: "baseline",
|
|
98
|
+
* source: { name: "production" },
|
|
99
|
+
* trigger: { type: "manual" },
|
|
100
|
+
* },
|
|
101
|
+
* })
|
|
102
|
+
* // → "Manual eval · GROQ, Mutations"
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
export function generateReportTitle(input) {
|
|
106
|
+
const { provenance, totalAreaCount } = input;
|
|
107
|
+
const segments = [triggerSegment(provenance.trigger)];
|
|
108
|
+
const areas = areasSegment(provenance.areas, totalAreaCount);
|
|
109
|
+
if (areas)
|
|
110
|
+
segments.push(areas);
|
|
111
|
+
const source = sourceSegment(provenance.source);
|
|
112
|
+
if (source)
|
|
113
|
+
segments.push(source);
|
|
114
|
+
const docs = targetDocumentsSegment(provenance.targetDocuments);
|
|
115
|
+
if (docs)
|
|
116
|
+
segments.push(docs);
|
|
117
|
+
return segments.join(SEPARATOR);
|
|
118
|
+
}
|
package/dist/report-store.d.ts
CHANGED
|
@@ -51,9 +51,13 @@ export declare class ReportStore {
|
|
|
51
51
|
* Returns the comparison plus the baseline report ID so the caller
|
|
52
52
|
* can record `provenance.lineage.comparedAgainst`.
|
|
53
53
|
*
|
|
54
|
+
* @param scopedAreas When provided, the baseline's scores are filtered to
|
|
55
|
+
* only include these areas before comparison. This prevents mismatched
|
|
56
|
+
* areas from polluting the overall delta (e.g., release auto-scope
|
|
57
|
+
* evaluates only GROQ but the baseline has all areas).
|
|
54
58
|
* @returns The comparison result with baseline ID, or null if no baseline found
|
|
55
59
|
*/
|
|
56
|
-
autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp): Promise<AutoCompareResult | null>;
|
|
60
|
+
autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp, scopedAreas?: Set<string>): Promise<AutoCompareResult | null>;
|
|
57
61
|
/**
|
|
58
62
|
* Find a report by its evaluation fingerprint (cross-environment cache lookup).
|
|
59
63
|
*
|
package/dist/report-store.js
CHANGED
|
@@ -49,9 +49,13 @@ export class ReportStore {
|
|
|
49
49
|
* Returns the comparison plus the baseline report ID so the caller
|
|
50
50
|
* can record `provenance.lineage.comparedAgainst`.
|
|
51
51
|
*
|
|
52
|
+
* @param scopedAreas When provided, the baseline's scores are filtered to
|
|
53
|
+
* only include these areas before comparison. This prevents mismatched
|
|
54
|
+
* areas from polluting the overall delta (e.g., release auto-scope
|
|
55
|
+
* evaluates only GROQ but the baseline has all areas).
|
|
52
56
|
* @returns The comparison result with baseline ID, or null if no baseline found
|
|
53
57
|
*/
|
|
54
|
-
async autoCompare(currentSummary, provenance, completedAt) {
|
|
58
|
+
async autoCompare(currentSummary, provenance, completedAt, scopedAreas) {
|
|
55
59
|
// 1. Prefer explicit lineage source (deterministic re-run comparison)
|
|
56
60
|
const rerunSourceId = provenance.lineage?.rerunOf;
|
|
57
61
|
let baseline = null;
|
|
@@ -76,7 +80,30 @@ export class ReportStore {
|
|
|
76
80
|
return null;
|
|
77
81
|
}
|
|
78
82
|
try {
|
|
79
|
-
|
|
83
|
+
// When auto-scope is active, filter the baseline to only include
|
|
84
|
+
// areas that were actually evaluated. This produces a fair
|
|
85
|
+
// comparison where the overall delta reflects only tested areas.
|
|
86
|
+
let baselineSummary = baseline.summary;
|
|
87
|
+
if (scopedAreas && scopedAreas.size > 0) {
|
|
88
|
+
const filteredScores = baselineSummary.scores.filter((s) => scopedAreas.has(s.feature));
|
|
89
|
+
if (filteredScores.length > 0 &&
|
|
90
|
+
filteredScores.length < baselineSummary.scores.length) {
|
|
91
|
+
const len = filteredScores.length;
|
|
92
|
+
const avgScore = filteredScores.reduce((s, sc) => s + sc.totalScore, 0) / len;
|
|
93
|
+
const avgDocLift = filteredScores.reduce((s, sc) => s + sc.docLift, 0) / len;
|
|
94
|
+
baselineSummary = {
|
|
95
|
+
...baselineSummary,
|
|
96
|
+
overall: {
|
|
97
|
+
...baselineSummary.overall,
|
|
98
|
+
avgScore,
|
|
99
|
+
avgDocLift,
|
|
100
|
+
},
|
|
101
|
+
scores: filteredScores,
|
|
102
|
+
};
|
|
103
|
+
console.log(` 🎯 Scoped baseline to ${filteredScores.length} of ${baseline.summary.scores.length} areas for comparison`);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
const comparison = compare(baselineSummary, currentSummary);
|
|
80
107
|
return { baselineReportId: baseline.id, comparison };
|
|
81
108
|
}
|
|
82
109
|
catch (error) {
|
|
@@ -176,6 +203,7 @@ export class ReportStore {
|
|
|
176
203
|
reportId: report.id,
|
|
177
204
|
summary: report.summary,
|
|
178
205
|
tag: report.tag ?? null,
|
|
206
|
+
title: report.title ?? null,
|
|
179
207
|
});
|
|
180
208
|
return report.id;
|
|
181
209
|
}
|
|
@@ -228,5 +256,6 @@ function toReport(doc) {
|
|
|
228
256
|
provenance: doc.provenance,
|
|
229
257
|
summary: doc.summary,
|
|
230
258
|
tag: doc.tag,
|
|
259
|
+
title: doc.title,
|
|
231
260
|
};
|
|
232
261
|
}
|
|
@@ -213,6 +213,7 @@ export function flattenReportRow(report) {
|
|
|
213
213
|
source_name: provenance.source.name,
|
|
214
214
|
source_perspective: provenance.source.perspective ?? null,
|
|
215
215
|
tag: report.tag ?? null,
|
|
216
|
+
title: report.title ?? null,
|
|
216
217
|
total_cost: summary.overall.cost?.total ?? null,
|
|
217
218
|
trigger_caller_repo: provenance.trigger.type === "cross-repo"
|
|
218
219
|
? provenance.trigger.callerRepo
|
|
@@ -110,6 +110,16 @@ export function formatRegressionAlert(report) {
|
|
|
110
110
|
type: "section",
|
|
111
111
|
});
|
|
112
112
|
}
|
|
113
|
+
// Not-evaluated areas — informational mention
|
|
114
|
+
if (comparison.notEvaluated?.length > 0) {
|
|
115
|
+
blocks.push({
|
|
116
|
+
text: {
|
|
117
|
+
text: `⏭️ ${comparison.notEvaluated.length} area${comparison.notEvaluated.length === 1 ? "" : "s"} not evaluated: ${comparison.notEvaluated.join(", ")}`,
|
|
118
|
+
type: "mrkdwn",
|
|
119
|
+
},
|
|
120
|
+
type: "section",
|
|
121
|
+
});
|
|
122
|
+
}
|
|
113
123
|
return {
|
|
114
124
|
blocks,
|
|
115
125
|
text: `📉 AI Literacy Score Regression: ${baselineScore} → ${experimentScore} (${formatDelta(delta)})`,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@sanity/ailf",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"private": false,
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "restricted"
|
|
@@ -23,6 +23,27 @@
|
|
|
23
23
|
"canonical",
|
|
24
24
|
"tasks"
|
|
25
25
|
],
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"@google-cloud/bigquery": "^8.1.1",
|
|
28
|
+
"@inquirer/prompts": "^8.3.0",
|
|
29
|
+
"@portabletext/markdown": "^1.0.0",
|
|
30
|
+
"@sanity/client": "^7.3.0",
|
|
31
|
+
"commander": "^14.0.3",
|
|
32
|
+
"dotenv": "^16.4.7",
|
|
33
|
+
"dotenv-cli": "^11.0.0",
|
|
34
|
+
"js-yaml": "^4.1.0",
|
|
35
|
+
"promptfoo": "^0.120.24",
|
|
36
|
+
"zod": "^4.3.6"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@types/js-yaml": "^4.0.9",
|
|
40
|
+
"@types/node": "^22.13.1",
|
|
41
|
+
"tsx": "^4.19.2",
|
|
42
|
+
"typescript": "^5.7.3",
|
|
43
|
+
"@sanity/ailf-core": "0.1.0",
|
|
44
|
+
"@sanity/ailf-shared": "0.1.0",
|
|
45
|
+
"@sanity/ailf-tasks": "0.1.4"
|
|
46
|
+
},
|
|
26
47
|
"scripts": {
|
|
27
48
|
"build": "tsc && tsx scripts/bundle-workspace-deps.ts",
|
|
28
49
|
"generate-configs": "tsx src/cli.ts generate-configs",
|
|
@@ -48,26 +69,5 @@
|
|
|
48
69
|
"discovery-report": "tsx src/cli.ts discovery-report",
|
|
49
70
|
"webhook-server": "tsx src/cli.ts webhook-server",
|
|
50
71
|
"weekly-digest": "tsx src/cli.ts weekly-digest"
|
|
51
|
-
},
|
|
52
|
-
"dependencies": {
|
|
53
|
-
"@google-cloud/bigquery": "^8.1.1",
|
|
54
|
-
"@inquirer/prompts": "^8.3.0",
|
|
55
|
-
"@portabletext/markdown": "^1.0.0",
|
|
56
|
-
"@sanity/client": "^7.3.0",
|
|
57
|
-
"commander": "^14.0.3",
|
|
58
|
-
"dotenv": "^16.4.7",
|
|
59
|
-
"dotenv-cli": "^11.0.0",
|
|
60
|
-
"js-yaml": "^4.1.0",
|
|
61
|
-
"promptfoo": "^0.120.24",
|
|
62
|
-
"zod": "^4.3.6"
|
|
63
|
-
},
|
|
64
|
-
"devDependencies": {
|
|
65
|
-
"@sanity/ailf-core": "workspace:*",
|
|
66
|
-
"@sanity/ailf-shared": "workspace:*",
|
|
67
|
-
"@sanity/ailf-tasks": "workspace:*",
|
|
68
|
-
"@types/js-yaml": "^4.0.9",
|
|
69
|
-
"@types/node": "^22.13.1",
|
|
70
|
-
"tsx": "^4.19.2",
|
|
71
|
-
"typescript": "^5.7.3"
|
|
72
72
|
}
|
|
73
|
-
}
|
|
73
|
+
}
|