@sanity/ailf 0.1.33 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/LICENSE +21 -0
  2. package/config/airbyte/ai_literacy_framework.connector.yaml +6 -0
  3. package/config/bigquery/views/reports.sql +1 -0
  4. package/dist/_vendor/ailf-core/examples/index.d.ts +10 -20
  5. package/dist/_vendor/ailf-core/examples/index.js +10 -20
  6. package/dist/_vendor/ailf-core/ports/context.d.ts +1 -1
  7. package/dist/_vendor/ailf-core/ports/task-source.d.ts +2 -0
  8. package/dist/_vendor/ailf-core/types/index.d.ts +19 -1
  9. package/dist/_vendor/ailf-tasks/schemas.d.ts +12 -0
  10. package/dist/_vendor/ailf-tasks/schemas.js +4 -0
  11. package/dist/adapters/task-sources/content-lake-task-source.js +9 -1
  12. package/dist/adapters/task-sources/repo-task-source.js +19 -4
  13. package/dist/commands/calculate-scores.js +5 -1
  14. package/dist/commands/publish.js +3 -0
  15. package/dist/orchestration/steps/calculate-scores-step.js +18 -19
  16. package/dist/orchestration/steps/publish-report-step.js +12 -1
  17. package/dist/pipeline/calculate-scores.d.ts +6 -1
  18. package/dist/pipeline/calculate-scores.js +5 -13
  19. package/dist/pipeline/compare.js +12 -5
  20. package/dist/pipeline/generate-configs.js +4 -9
  21. package/dist/pipeline/mirror-repo-tasks.d.ts +77 -0
  22. package/dist/pipeline/mirror-repo-tasks.js +141 -27
  23. package/dist/pipeline/pr-comment.js +5 -2
  24. package/dist/pipeline/release-report.js +4 -0
  25. package/dist/pipeline/report-title.d.ts +66 -0
  26. package/dist/pipeline/report-title.js +118 -0
  27. package/dist/report-store.d.ts +5 -1
  28. package/dist/report-store.js +31 -2
  29. package/dist/sinks/bigquery/index.d.ts +1 -0
  30. package/dist/sinks/bigquery/index.js +1 -0
  31. package/dist/sinks/slack/format.js +10 -0
  32. package/package.json +23 -23
@@ -0,0 +1,66 @@
1
+ /**
2
+ * pipeline/report-title.ts
3
+ *
4
+ * Pure function that generates descriptive report titles from provenance
5
+ * metadata. The title is the primary display string shown in dashboards,
6
+ * Slack digests, and Studio views — it conveys trigger context, evaluated
7
+ * areas, source/perspective, and document scope at a glance.
8
+ *
9
+ * Score is intentionally omitted from the title since it is surfaced
10
+ * heavily elsewhere in the UI. The `tag` field (on Report) is preserved
11
+ * as a secondary label; the title is the primary display string.
12
+ *
13
+ * Segments are joined with ` · ` (middle dot with spaces).
14
+ *
15
+ * @see docs/design-docs/report-store/domain-model.md
16
+ * @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
17
+ */
18
+ import type { EvalMode, ReportTrigger } from "./types.js";
19
+ /** Input required to generate a human-readable report title. */
20
+ export interface ReportTitleInput {
21
+ provenance: {
22
+ /** Feature areas that were evaluated */
23
+ areas: string[];
24
+ /** Evaluation mode */
25
+ mode: EvalMode;
26
+ /** Resolved documentation source */
27
+ source: {
28
+ name: string;
29
+ perspective?: string;
30
+ };
31
+ /** Sanity document IDs targeted (when scoped to specific documents) */
32
+ targetDocuments?: string[];
33
+ /** What triggered the evaluation */
34
+ trigger: ReportTrigger;
35
+ };
36
+ /**
37
+ * Total number of known feature areas in the system.
38
+ * Used to determine whether to show "All areas" vs "N areas"
39
+ * when more than 3 areas are evaluated.
40
+ */
41
+ totalAreaCount?: number;
42
+ }
43
+ /**
44
+ * Generate a descriptive report title from provenance metadata.
45
+ *
46
+ * The title is composed of up to four segments separated by ` · `:
47
+ *
48
+ * 1. **Trigger context** — what initiated the evaluation (always present)
49
+ * 2. **Areas** — which feature areas were evaluated (omitted if empty)
50
+ * 3. **Source context** — non-default source or perspective (omitted if default)
51
+ * 4. **Target documents** — scoped document IDs (omitted if not scoped)
52
+ *
53
+ * @example
54
+ * ```ts
55
+ * generateReportTitle({
56
+ * provenance: {
57
+ * areas: ["GROQ", "Mutations"],
58
+ * mode: "baseline",
59
+ * source: { name: "production" },
60
+ * trigger: { type: "manual" },
61
+ * },
62
+ * })
63
+ * // → "Manual eval · GROQ, Mutations"
64
+ * ```
65
+ */
66
+ export declare function generateReportTitle(input: ReportTitleInput): string;
@@ -0,0 +1,118 @@
1
+ /**
2
+ * pipeline/report-title.ts
3
+ *
4
+ * Pure function that generates descriptive report titles from provenance
5
+ * metadata. The title is the primary display string shown in dashboards,
6
+ * Slack digests, and Studio views — it conveys trigger context, evaluated
7
+ * areas, source/perspective, and document scope at a glance.
8
+ *
9
+ * Score is intentionally omitted from the title since it is surfaced
10
+ * heavily elsewhere in the UI. The `tag` field (on Report) is preserved
11
+ * as a secondary label; the title is the primary display string.
12
+ *
13
+ * Segments are joined with ` · ` (middle dot with spaces).
14
+ *
15
+ * @see docs/design-docs/report-store/domain-model.md
16
+ * @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
17
+ */
18
+ // ---------------------------------------------------------------------------
19
+ // Segment builders
20
+ // ---------------------------------------------------------------------------
21
+ const SEPARATOR = " · ";
22
+ /** Segment 1 — human-readable trigger context */
23
+ function triggerSegment(trigger) {
24
+ switch (trigger.type) {
25
+ case "scheduled": {
26
+ const name = trigger.schedule.replace(/-/g, " ");
27
+ return name.charAt(0).toUpperCase() + name.slice(1);
28
+ }
29
+ case "ci":
30
+ return "CI eval";
31
+ case "webhook":
32
+ return "Content change";
33
+ case "cross-repo": {
34
+ // Only show the repo name if callerRepo looks like "owner/repo".
35
+ // Numeric IDs (e.g. GITHUB_REPOSITORY_OWNER_ID fallback) are not useful.
36
+ const repo = trigger.callerRepo;
37
+ if (repo.includes("/")) {
38
+ const shortName = repo.split("/").pop() ?? repo;
39
+ return `Cross-repo (${shortName})`;
40
+ }
41
+ return "Cross-repo";
42
+ }
43
+ case "manual":
44
+ return "Manual eval";
45
+ }
46
+ }
47
+ /** Segment 2 — areas evaluated (omitted when empty) */
48
+ function areasSegment(areas, totalAreaCount) {
49
+ if (areas.length === 0)
50
+ return undefined;
51
+ if (areas.length <= 3) {
52
+ return areas.join(", ");
53
+ }
54
+ if (totalAreaCount !== undefined && areas.length === totalAreaCount) {
55
+ return "All areas";
56
+ }
57
+ return `${areas.length} areas`;
58
+ }
59
+ /** Segment 3 — source context (omitted when default production, no perspective) */
60
+ function sourceSegment(source) {
61
+ const parts = [];
62
+ if (source.perspective) {
63
+ parts.push(`perspective: ${source.perspective}`);
64
+ }
65
+ if (source.name !== "production") {
66
+ parts.push(source.name);
67
+ }
68
+ return parts.length > 0 ? parts.join(", ") : undefined;
69
+ }
70
+ /** Segment 4 — target documents (omitted when not scoped) */
71
+ function targetDocumentsSegment(targetDocuments) {
72
+ if (!targetDocuments || targetDocuments.length === 0)
73
+ return undefined;
74
+ if (targetDocuments.length === 1) {
75
+ return targetDocuments[0];
76
+ }
77
+ return `${targetDocuments.length} documents`;
78
+ }
79
+ // ---------------------------------------------------------------------------
80
+ // Public API
81
+ // ---------------------------------------------------------------------------
82
+ /**
83
+ * Generate a descriptive report title from provenance metadata.
84
+ *
85
+ * The title is composed of up to four segments separated by ` · `:
86
+ *
87
+ * 1. **Trigger context** — what initiated the evaluation (always present)
88
+ * 2. **Areas** — which feature areas were evaluated (omitted if empty)
89
+ * 3. **Source context** — non-default source or perspective (omitted if default)
90
+ * 4. **Target documents** — scoped document IDs (omitted if not scoped)
91
+ *
92
+ * @example
93
+ * ```ts
94
+ * generateReportTitle({
95
+ * provenance: {
96
+ * areas: ["GROQ", "Mutations"],
97
+ * mode: "baseline",
98
+ * source: { name: "production" },
99
+ * trigger: { type: "manual" },
100
+ * },
101
+ * })
102
+ * // → "Manual eval · GROQ, Mutations"
103
+ * ```
104
+ */
105
+ export function generateReportTitle(input) {
106
+ const { provenance, totalAreaCount } = input;
107
+ const segments = [triggerSegment(provenance.trigger)];
108
+ const areas = areasSegment(provenance.areas, totalAreaCount);
109
+ if (areas)
110
+ segments.push(areas);
111
+ const source = sourceSegment(provenance.source);
112
+ if (source)
113
+ segments.push(source);
114
+ const docs = targetDocumentsSegment(provenance.targetDocuments);
115
+ if (docs)
116
+ segments.push(docs);
117
+ return segments.join(SEPARATOR);
118
+ }
@@ -51,9 +51,13 @@ export declare class ReportStore {
51
51
  * Returns the comparison plus the baseline report ID so the caller
52
52
  * can record `provenance.lineage.comparedAgainst`.
53
53
  *
54
+ * @param scopedAreas When provided, the baseline's scores are filtered to
55
+ * only include these areas before comparison. This prevents mismatched
56
+ * areas from polluting the overall delta (e.g., release auto-scope
57
+ * evaluates only GROQ but the baseline has all areas).
54
58
  * @returns The comparison result with baseline ID, or null if no baseline found
55
59
  */
56
- autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp): Promise<AutoCompareResult | null>;
60
+ autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp, scopedAreas?: Set<string>): Promise<AutoCompareResult | null>;
57
61
  /**
58
62
  * Find a report by its evaluation fingerprint (cross-environment cache lookup).
59
63
  *
@@ -49,9 +49,13 @@ export class ReportStore {
49
49
  * Returns the comparison plus the baseline report ID so the caller
50
50
  * can record `provenance.lineage.comparedAgainst`.
51
51
  *
52
+ * @param scopedAreas When provided, the baseline's scores are filtered to
53
+ * only include these areas before comparison. This prevents mismatched
54
+ * areas from polluting the overall delta (e.g., release auto-scope
55
+ * evaluates only GROQ but the baseline has all areas).
52
56
  * @returns The comparison result with baseline ID, or null if no baseline found
53
57
  */
54
- async autoCompare(currentSummary, provenance, completedAt) {
58
+ async autoCompare(currentSummary, provenance, completedAt, scopedAreas) {
55
59
  // 1. Prefer explicit lineage source (deterministic re-run comparison)
56
60
  const rerunSourceId = provenance.lineage?.rerunOf;
57
61
  let baseline = null;
@@ -76,7 +80,30 @@ export class ReportStore {
76
80
  return null;
77
81
  }
78
82
  try {
79
- const comparison = compare(baseline.summary, currentSummary);
83
+ // When auto-scope is active, filter the baseline to only include
84
+ // areas that were actually evaluated. This produces a fair
85
+ // comparison where the overall delta reflects only tested areas.
86
+ let baselineSummary = baseline.summary;
87
+ if (scopedAreas && scopedAreas.size > 0) {
88
+ const filteredScores = baselineSummary.scores.filter((s) => scopedAreas.has(s.feature));
89
+ if (filteredScores.length > 0 &&
90
+ filteredScores.length < baselineSummary.scores.length) {
91
+ const len = filteredScores.length;
92
+ const avgScore = filteredScores.reduce((s, sc) => s + sc.totalScore, 0) / len;
93
+ const avgDocLift = filteredScores.reduce((s, sc) => s + sc.docLift, 0) / len;
94
+ baselineSummary = {
95
+ ...baselineSummary,
96
+ overall: {
97
+ ...baselineSummary.overall,
98
+ avgScore,
99
+ avgDocLift,
100
+ },
101
+ scores: filteredScores,
102
+ };
103
+ console.log(` 🎯 Scoped baseline to ${filteredScores.length} of ${baseline.summary.scores.length} areas for comparison`);
104
+ }
105
+ }
106
+ const comparison = compare(baselineSummary, currentSummary);
80
107
  return { baselineReportId: baseline.id, comparison };
81
108
  }
82
109
  catch (error) {
@@ -176,6 +203,7 @@ export class ReportStore {
176
203
  reportId: report.id,
177
204
  summary: report.summary,
178
205
  tag: report.tag ?? null,
206
+ title: report.title ?? null,
179
207
  });
180
208
  return report.id;
181
209
  }
@@ -228,5 +256,6 @@ function toReport(doc) {
228
256
  provenance: doc.provenance,
229
257
  summary: doc.summary,
230
258
  tag: doc.tag,
259
+ title: doc.title,
231
260
  };
232
261
  }
@@ -71,6 +71,7 @@ export interface ReportRow {
71
71
  source_name: string;
72
72
  source_perspective: null | string;
73
73
  tag: null | string;
74
+ title: null | string;
74
75
  total_cost: null | number;
75
76
  trigger_caller_repo: null | string;
76
77
  trigger_type: string;
@@ -213,6 +213,7 @@ export function flattenReportRow(report) {
213
213
  source_name: provenance.source.name,
214
214
  source_perspective: provenance.source.perspective ?? null,
215
215
  tag: report.tag ?? null,
216
+ title: report.title ?? null,
216
217
  total_cost: summary.overall.cost?.total ?? null,
217
218
  trigger_caller_repo: provenance.trigger.type === "cross-repo"
218
219
  ? provenance.trigger.callerRepo
@@ -110,6 +110,16 @@ export function formatRegressionAlert(report) {
110
110
  type: "section",
111
111
  });
112
112
  }
113
+ // Not-evaluated areas — informational mention
114
+ if (comparison.notEvaluated?.length > 0) {
115
+ blocks.push({
116
+ text: {
117
+ text: `⏭️ ${comparison.notEvaluated.length} area${comparison.notEvaluated.length === 1 ? "" : "s"} not evaluated: ${comparison.notEvaluated.join(", ")}`,
118
+ type: "mrkdwn",
119
+ },
120
+ type: "section",
121
+ });
122
+ }
113
123
  return {
114
124
  blocks,
115
125
  text: `📉 AI Literacy Score Regression: ${baselineScore} → ${experimentScore} (${formatDelta(delta)})`,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "0.1.33",
3
+ "version": "0.2.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "restricted"
@@ -23,6 +23,27 @@
23
23
  "canonical",
24
24
  "tasks"
25
25
  ],
26
+ "dependencies": {
27
+ "@google-cloud/bigquery": "^8.1.1",
28
+ "@inquirer/prompts": "^8.3.0",
29
+ "@portabletext/markdown": "^1.0.0",
30
+ "@sanity/client": "^7.3.0",
31
+ "commander": "^14.0.3",
32
+ "dotenv": "^16.4.7",
33
+ "dotenv-cli": "^11.0.0",
34
+ "js-yaml": "^4.1.0",
35
+ "promptfoo": "^0.120.24",
36
+ "zod": "^4.3.6"
37
+ },
38
+ "devDependencies": {
39
+ "@types/js-yaml": "^4.0.9",
40
+ "@types/node": "^22.13.1",
41
+ "tsx": "^4.19.2",
42
+ "typescript": "^5.7.3",
43
+ "@sanity/ailf-core": "0.1.0",
44
+ "@sanity/ailf-shared": "0.1.0",
45
+ "@sanity/ailf-tasks": "0.1.4"
46
+ },
26
47
  "scripts": {
27
48
  "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
28
49
  "generate-configs": "tsx src/cli.ts generate-configs",
@@ -48,26 +69,5 @@
48
69
  "discovery-report": "tsx src/cli.ts discovery-report",
49
70
  "webhook-server": "tsx src/cli.ts webhook-server",
50
71
  "weekly-digest": "tsx src/cli.ts weekly-digest"
51
- },
52
- "dependencies": {
53
- "@google-cloud/bigquery": "^8.1.1",
54
- "@inquirer/prompts": "^8.3.0",
55
- "@portabletext/markdown": "^1.0.0",
56
- "@sanity/client": "^7.3.0",
57
- "commander": "^14.0.3",
58
- "dotenv": "^16.4.7",
59
- "dotenv-cli": "^11.0.0",
60
- "js-yaml": "^4.1.0",
61
- "promptfoo": "^0.120.24",
62
- "zod": "^4.3.6"
63
- },
64
- "devDependencies": {
65
- "@sanity/ailf-core": "workspace:*",
66
- "@sanity/ailf-shared": "workspace:*",
67
- "@sanity/ailf-tasks": "workspace:*",
68
- "@types/js-yaml": "^4.0.9",
69
- "@types/node": "^22.13.1",
70
- "tsx": "^4.19.2",
71
- "typescript": "^5.7.3"
72
72
  }
73
- }
73
+ }