npm - @sanity/ailf - Versions diffs - 0.1.33 → 0.2.0 - Mend

@sanity/ailf 0.1.33 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/LICENSE +21 -0
package/config/airbyte/ai_literacy_framework.connector.yaml +6 -0
package/config/bigquery/views/reports.sql +1 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +10 -20
package/dist/_vendor/ailf-core/examples/index.js +10 -20
package/dist/_vendor/ailf-core/ports/context.d.ts +1 -1
package/dist/_vendor/ailf-core/ports/task-source.d.ts +2 -0
package/dist/_vendor/ailf-core/types/index.d.ts +19 -1
package/dist/_vendor/ailf-tasks/schemas.d.ts +12 -0
package/dist/_vendor/ailf-tasks/schemas.js +4 -0
package/dist/adapters/task-sources/content-lake-task-source.js +9 -1
package/dist/adapters/task-sources/repo-task-source.js +19 -4
package/dist/commands/calculate-scores.js +5 -1
package/dist/commands/publish.js +3 -0
package/dist/orchestration/steps/calculate-scores-step.js +18 -19
package/dist/orchestration/steps/publish-report-step.js +12 -1
package/dist/pipeline/calculate-scores.d.ts +6 -1
package/dist/pipeline/calculate-scores.js +5 -13
package/dist/pipeline/compare.js +12 -5
package/dist/pipeline/generate-configs.js +4 -9
package/dist/pipeline/mirror-repo-tasks.d.ts +77 -0
package/dist/pipeline/mirror-repo-tasks.js +141 -27
package/dist/pipeline/pr-comment.js +5 -2
package/dist/pipeline/release-report.js +4 -0
package/dist/pipeline/report-title.d.ts +66 -0
package/dist/pipeline/report-title.js +118 -0
package/dist/report-store.d.ts +5 -1
package/dist/report-store.js +31 -2
package/dist/sinks/bigquery/index.d.ts +1 -0
package/dist/sinks/bigquery/index.js +1 -0
package/dist/sinks/slack/format.js +10 -0
package/package.json +23 -23

package/dist/pipeline/report-title.d.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * pipeline/report-title.ts
+ *
+ * Pure function that generates descriptive report titles from provenance
+ * metadata. The title is the primary display string shown in dashboards,
+ * Slack digests, and Studio views — it conveys trigger context, evaluated
+ * areas, source/perspective, and document scope at a glance.
+ *
+ * Score is intentionally omitted from the title since it is surfaced
+ * heavily elsewhere in the UI. The `tag` field (on Report) is preserved
+ * as a secondary label; the title is the primary display string.
+ *
+ * Segments are joined with ` · ` (middle dot with spaces).
+ *
+ * @see docs/design-docs/report-store/domain-model.md
+ * @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
+ */
+import type { EvalMode, ReportTrigger } from "./types.js";
+/** Input required to generate a human-readable report title. */
+export interface ReportTitleInput {
+    provenance: {
+        /** Feature areas that were evaluated */
+        areas: string[];
+        /** Evaluation mode */
+        mode: EvalMode;
+        /** Resolved documentation source */
+        source: {
+            name: string;
+            perspective?: string;
+        };
+        /** Sanity document IDs targeted (when scoped to specific documents) */
+        targetDocuments?: string[];
+        /** What triggered the evaluation */
+        trigger: ReportTrigger;
+    };
+    /**
+     * Total number of known feature areas in the system.
+     * Used to determine whether to show "All areas" vs "N areas"
+     * when more than 3 areas are evaluated.
+     */
+    totalAreaCount?: number;
+}
+/**
+ * Generate a descriptive report title from provenance metadata.
+ *
+ * The title is composed of up to four segments separated by ` · `:
+ *
+ * 1. **Trigger context** — what initiated the evaluation (always present)
+ * 2. **Areas** — which feature areas were evaluated (omitted if empty)
+ * 3. **Source context** — non-default source or perspective (omitted if default)
+ * 4. **Target documents** — scoped document IDs (omitted if not scoped)
+ *
+ * @example
+ * ```ts
+ * generateReportTitle({
+ *   provenance: {
+ *     areas: ["GROQ", "Mutations"],
+ *     mode: "baseline",
+ *     source: { name: "production" },
+ *     trigger: { type: "manual" },
+ *   },
+ * })
+ * // → "Manual eval · GROQ, Mutations"
+ * ```
+ */
+export declare function generateReportTitle(input: ReportTitleInput): string;

package/dist/pipeline/report-title.js ADDED Viewed

@@ -0,0 +1,118 @@
+/**
+ * pipeline/report-title.ts
+ *
+ * Pure function that generates descriptive report titles from provenance
+ * metadata. The title is the primary display string shown in dashboards,
+ * Slack digests, and Studio views — it conveys trigger context, evaluated
+ * areas, source/perspective, and document scope at a glance.
+ *
+ * Score is intentionally omitted from the title since it is surfaced
+ * heavily elsewhere in the UI. The `tag` field (on Report) is preserved
+ * as a secondary label; the title is the primary display string.
+ *
+ * Segments are joined with ` · ` (middle dot with spaces).
+ *
+ * @see docs/design-docs/report-store/domain-model.md
+ * @see packages/eval/src/pipeline/provenance.ts — builds the provenance input
+ */
+// ---------------------------------------------------------------------------
+// Segment builders
+// ---------------------------------------------------------------------------
+const SEPARATOR = " · ";
+/** Segment 1 — human-readable trigger context */
+function triggerSegment(trigger) {
+    switch (trigger.type) {
+        case "scheduled": {
+            const name = trigger.schedule.replace(/-/g, " ");
+            return name.charAt(0).toUpperCase() + name.slice(1);
+        }
+        case "ci":
+            return "CI eval";
+        case "webhook":
+            return "Content change";
+        case "cross-repo": {
+            // Only show the repo name if callerRepo looks like "owner/repo".
+            // Numeric IDs (e.g. GITHUB_REPOSITORY_OWNER_ID fallback) are not useful.
+            const repo = trigger.callerRepo;
+            if (repo.includes("/")) {
+                const shortName = repo.split("/").pop() ?? repo;
+                return `Cross-repo (${shortName})`;
+            }
+            return "Cross-repo";
+        }
+        case "manual":
+            return "Manual eval";
+    }
+}
+/** Segment 2 — areas evaluated (omitted when empty) */
+function areasSegment(areas, totalAreaCount) {
+    if (areas.length === 0)
+        return undefined;
+    if (areas.length <= 3) {
+        return areas.join(", ");
+    }
+    if (totalAreaCount !== undefined && areas.length === totalAreaCount) {
+        return "All areas";
+    }
+    return `${areas.length} areas`;
+}
+/** Segment 3 — source context (omitted when default production, no perspective) */
+function sourceSegment(source) {
+    const parts = [];
+    if (source.perspective) {
+        parts.push(`perspective: ${source.perspective}`);
+    }
+    if (source.name !== "production") {
+        parts.push(source.name);
+    }
+    return parts.length > 0 ? parts.join(", ") : undefined;
+}
+/** Segment 4 — target documents (omitted when not scoped) */
+function targetDocumentsSegment(targetDocuments) {
+    if (!targetDocuments || targetDocuments.length === 0)
+        return undefined;
+    if (targetDocuments.length === 1) {
+        return targetDocuments[0];
+    }
+    return `${targetDocuments.length} documents`;
+}
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Generate a descriptive report title from provenance metadata.
+ *
+ * The title is composed of up to four segments separated by ` · `:
+ *
+ * 1. **Trigger context** — what initiated the evaluation (always present)
+ * 2. **Areas** — which feature areas were evaluated (omitted if empty)
+ * 3. **Source context** — non-default source or perspective (omitted if default)
+ * 4. **Target documents** — scoped document IDs (omitted if not scoped)
+ *
+ * @example
+ * ```ts
+ * generateReportTitle({
+ *   provenance: {
+ *     areas: ["GROQ", "Mutations"],
+ *     mode: "baseline",
+ *     source: { name: "production" },
+ *     trigger: { type: "manual" },
+ *   },
+ * })
+ * // → "Manual eval · GROQ, Mutations"
+ * ```
+ */
+export function generateReportTitle(input) {
+    const { provenance, totalAreaCount } = input;
+    const segments = [triggerSegment(provenance.trigger)];
+    const areas = areasSegment(provenance.areas, totalAreaCount);
+    if (areas)
+        segments.push(areas);
+    const source = sourceSegment(provenance.source);
+    if (source)
+        segments.push(source);
+    const docs = targetDocumentsSegment(provenance.targetDocuments);
+    if (docs)
+        segments.push(docs);
+    return segments.join(SEPARATOR);
+}

package/dist/report-store.d.ts CHANGED Viewed

@@ -51,9 +51,13 @@ export declare class ReportStore {
      * Returns the comparison plus the baseline report ID so the caller
      * can record `provenance.lineage.comparedAgainst`.
      *
+     * @param scopedAreas When provided, the baseline's scores are filtered to
+     *   only include these areas before comparison. This prevents mismatched
+     *   areas from polluting the overall delta (e.g., release auto-scope
+     *   evaluates only GROQ but the baseline has all areas).
      * @returns The comparison result with baseline ID, or null if no baseline found
      */
-    autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp): Promise<AutoCompareResult | null>;
+    autoCompare(currentSummary: ScoreSummary, provenance: ReportProvenance, completedAt: ISOTimestamp, scopedAreas?: Set<string>): Promise<AutoCompareResult | null>;
     /**
      * Find a report by its evaluation fingerprint (cross-environment cache lookup).
      *

package/dist/report-store.js CHANGED Viewed

@@ -49,9 +49,13 @@ export class ReportStore {
      * Returns the comparison plus the baseline report ID so the caller
      * can record `provenance.lineage.comparedAgainst`.
      *
+     * @param scopedAreas When provided, the baseline's scores are filtered to
+     *   only include these areas before comparison. This prevents mismatched
+     *   areas from polluting the overall delta (e.g., release auto-scope
+     *   evaluates only GROQ but the baseline has all areas).
      * @returns The comparison result with baseline ID, or null if no baseline found
      */
-    async autoCompare(currentSummary, provenance, completedAt) {
+    async autoCompare(currentSummary, provenance, completedAt, scopedAreas) {
         // 1. Prefer explicit lineage source (deterministic re-run comparison)
         const rerunSourceId = provenance.lineage?.rerunOf;
         let baseline = null;
@@ -76,7 +80,30 @@ export class ReportStore {
             return null;
         }
         try {
-            const comparison = compare(baseline.summary, currentSummary);
+            // When auto-scope is active, filter the baseline to only include
+            // areas that were actually evaluated. This produces a fair
+            // comparison where the overall delta reflects only tested areas.
+            let baselineSummary = baseline.summary;
+            if (scopedAreas && scopedAreas.size > 0) {
+                const filteredScores = baselineSummary.scores.filter((s) => scopedAreas.has(s.feature));
+                if (filteredScores.length > 0 &&
+                    filteredScores.length < baselineSummary.scores.length) {
+                    const len = filteredScores.length;
+                    const avgScore = filteredScores.reduce((s, sc) => s + sc.totalScore, 0) / len;
+                    const avgDocLift = filteredScores.reduce((s, sc) => s + sc.docLift, 0) / len;
+                    baselineSummary = {
+                        ...baselineSummary,
+                        overall: {
+                            ...baselineSummary.overall,
+                            avgScore,
+                            avgDocLift,
+                        },
+                        scores: filteredScores,
+                    };
+                    console.log(`  🎯 Scoped baseline to ${filteredScores.length} of ${baseline.summary.scores.length} areas for comparison`);
+                }
+            }
+            const comparison = compare(baselineSummary, currentSummary);
             return { baselineReportId: baseline.id, comparison };
         }
         catch (error) {
@@ -176,6 +203,7 @@ export class ReportStore {
                 reportId: report.id,
                 summary: report.summary,
                 tag: report.tag ?? null,
+                title: report.title ?? null,
             });
             return report.id;
         }
@@ -228,5 +256,6 @@ function toReport(doc) {
         provenance: doc.provenance,
         summary: doc.summary,
         tag: doc.tag,
+        title: doc.title,
     };
 }

package/dist/sinks/bigquery/index.d.ts CHANGED Viewed

@@ -71,6 +71,7 @@ export interface ReportRow {
     source_name: string;
     source_perspective: null | string;
     tag: null | string;
+    title: null | string;
     total_cost: null | number;
     trigger_caller_repo: null | string;
     trigger_type: string;

package/dist/sinks/bigquery/index.js CHANGED Viewed

@@ -213,6 +213,7 @@ export function flattenReportRow(report) {
         source_name: provenance.source.name,
         source_perspective: provenance.source.perspective ?? null,
         tag: report.tag ?? null,
+        title: report.title ?? null,
         total_cost: summary.overall.cost?.total ?? null,
         trigger_caller_repo: provenance.trigger.type === "cross-repo"
             ? provenance.trigger.callerRepo

package/dist/sinks/slack/format.js CHANGED Viewed

@@ -110,6 +110,16 @@ export function formatRegressionAlert(report) {
             type: "section",
         });
     }
+    // Not-evaluated areas — informational mention
+    if (comparison.notEvaluated?.length > 0) {
+        blocks.push({
+            text: {
+                text: `⏭️ ${comparison.notEvaluated.length} area${comparison.notEvaluated.length === 1 ? "" : "s"} not evaluated: ${comparison.notEvaluated.join(", ")}`,
+                type: "mrkdwn",
+            },
+            type: "section",
+        });
+    }
     return {
         blocks,
         text: `📉 AI Literacy Score Regression: ${baselineScore} → ${experimentScore} (${formatDelta(delta)})`,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@sanity/ailf",
-  "version": "0.1.33",
+  "version": "0.2.0",
   "private": false,
   "publishConfig": {
     "access": "restricted"
@@ -23,6 +23,27 @@
     "canonical",
     "tasks"
   ],
+  "dependencies": {
+    "@google-cloud/bigquery": "^8.1.1",
+    "@inquirer/prompts": "^8.3.0",
+    "@portabletext/markdown": "^1.0.0",
+    "@sanity/client": "^7.3.0",
+    "commander": "^14.0.3",
+    "dotenv": "^16.4.7",
+    "dotenv-cli": "^11.0.0",
+    "js-yaml": "^4.1.0",
+    "promptfoo": "^0.120.24",
+    "zod": "^4.3.6"
+  },
+  "devDependencies": {
+    "@types/js-yaml": "^4.0.9",
+    "@types/node": "^22.13.1",
+    "tsx": "^4.19.2",
+    "typescript": "^5.7.3",
+    "@sanity/ailf-core": "0.1.0",
+    "@sanity/ailf-shared": "0.1.0",
+    "@sanity/ailf-tasks": "0.1.4"
+  },
   "scripts": {
     "build": "tsc && tsx scripts/bundle-workspace-deps.ts",
     "generate-configs": "tsx src/cli.ts generate-configs",
@@ -48,26 +69,5 @@
     "discovery-report": "tsx src/cli.ts discovery-report",
     "webhook-server": "tsx src/cli.ts webhook-server",
     "weekly-digest": "tsx src/cli.ts weekly-digest"
-  },
-  "dependencies": {
-    "@google-cloud/bigquery": "^8.1.1",
-    "@inquirer/prompts": "^8.3.0",
-    "@portabletext/markdown": "^1.0.0",
-    "@sanity/client": "^7.3.0",
-    "commander": "^14.0.3",
-    "dotenv": "^16.4.7",
-    "dotenv-cli": "^11.0.0",
-    "js-yaml": "^4.1.0",
-    "promptfoo": "^0.120.24",
-    "zod": "^4.3.6"
-  },
-  "devDependencies": {
-    "@sanity/ailf-core": "workspace:*",
-    "@sanity/ailf-shared": "workspace:*",
-    "@sanity/ailf-tasks": "workspace:*",
-    "@types/js-yaml": "^4.0.9",
-    "@types/node": "^22.13.1",
-    "tsx": "^4.19.2",
-    "typescript": "^5.7.3"
   }
-}
+}