npm - @sanity/ailf - Versions diffs - 0.1.34 → 0.3.0 - Mend

@sanity/ailf 0.1.34 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/LICENSE +21 -0
package/config/airbyte/ai_literacy_framework.connector.yaml +6 -0
package/config/bigquery/views/reports.sql +1 -0
package/dist/_vendor/ailf-core/examples/index.d.ts +10 -20
package/dist/_vendor/ailf-core/examples/index.js +10 -20
package/dist/_vendor/ailf-core/ports/task-source.d.ts +2 -0
package/dist/_vendor/ailf-core/types/index.d.ts +65 -0
package/dist/_vendor/ailf-tasks/schemas.d.ts +12 -0
package/dist/_vendor/ailf-tasks/schemas.js +4 -0
package/dist/adapters/task-sources/content-lake-task-source.js +9 -1
package/dist/adapters/task-sources/repo-task-source.js +19 -4
package/dist/commands/calculate-scores.js +5 -1
package/dist/commands/publish.js +3 -0
package/dist/composition-root.js +7 -2
package/dist/orchestration/pipeline-orchestrator.js +27 -2
package/dist/orchestration/step-runner.js +8 -0
package/dist/orchestration/steps/calculate-scores-step.js +22 -19
package/dist/orchestration/steps/generate-configs-step.js +1 -0
package/dist/orchestration/steps/grader-consistency-step.js +1 -0
package/dist/orchestration/steps/mirror-repo-tasks-step.js +2 -1
package/dist/orchestration/steps/publish-report-step.js +3 -0
package/dist/pipeline/calculate-scores.d.ts +11 -1
package/dist/pipeline/calculate-scores.js +222 -157
package/dist/pipeline/coverage-audit.d.ts +2 -1
package/dist/pipeline/coverage-audit.js +5 -3
package/dist/pipeline/expand-tasks.d.ts +2 -1
package/dist/pipeline/expand-tasks.js +33 -2
package/dist/pipeline/generate-configs.d.ts +3 -1
package/dist/pipeline/generate-configs.js +51 -37
package/dist/pipeline/grader-api.d.ts +2 -1
package/dist/pipeline/grader-api.js +11 -9
package/dist/pipeline/grader-compare-runner.d.ts +3 -0
package/dist/pipeline/grader-compare-runner.js +21 -19
package/dist/pipeline/grader-consistency-runner.d.ts +3 -0
package/dist/pipeline/grader-consistency-runner.js +16 -14
package/dist/pipeline/grader-sensitivity-runner.d.ts +3 -0
package/dist/pipeline/grader-sensitivity-runner.js +18 -16
package/dist/pipeline/grader-validate-runner.d.ts +3 -0
package/dist/pipeline/grader-validate-runner.js +16 -14
package/dist/pipeline/mirror-repo-tasks.d.ts +80 -1
package/dist/pipeline/mirror-repo-tasks.js +148 -32
package/dist/pipeline/provenance.d.ts +3 -0
package/dist/pipeline/provenance.js +25 -3
package/dist/pipeline/report-title.d.ts +66 -0
package/dist/pipeline/report-title.js +118 -0
package/dist/report-store.js +2 -0
package/dist/sinks/bigquery/index.d.ts +1 -0
package/dist/sinks/bigquery/index.js +1 -0
package/dist/sources.d.ts +2 -1
package/dist/sources.js +28 -1
package/package.json +23 -23

package/dist/commands/calculate-scores.js CHANGED Viewed

@@ -36,11 +36,15 @@ export function createCalculateScoresCommand() {
                 remote: false,
                 apiUrl: "https://ailf-api.sanity.build",
             });
-            calculateAndWriteScores({
+            const result = calculateAndWriteScores({
                 resultsPath,
                 rootDir: ctx.config.rootDir,
                 source: opts.source,
             });
+            // At the CLI boundary, exit non-zero if areas are below threshold
+            if (result.belowCritical.length > 0) {
+                process.exitCode = 1;
+            }
         }
         catch (err) {
             process.exitCode = 1;

package/dist/commands/publish.js CHANGED Viewed

@@ -24,6 +24,7 @@ import { fileURLToPath } from "url";
 import { Command } from "commander";
 import { createAppContext } from "../composition-root.js";
 import { buildProvenance, } from "../pipeline/provenance.js";
+import { generateReportTitle } from "../pipeline/report-title.js";
 import { generateReportId, } from "../report-store.js";
 import { withRetry } from "../sinks/retry.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -166,6 +167,7 @@ async function runPublishCommand(summaryPath, opts) {
         };
     }
     const reportId = generateReportId();
+    const title = generateReportTitle({ provenance });
     const report = {
         comparison: comparison ?? undefined,
         completedAt: now,
@@ -174,6 +176,7 @@ async function runPublishCommand(summaryPath, opts) {
         provenance,
         summary,
         tag: opts.tag,
+        title,
     };
     // -----------------------------------------------------------------------
     // 4. Dry run — print preview and exit

package/dist/composition-root.js CHANGED Viewed

@@ -63,9 +63,14 @@ export function createAppContext(config) {
 function createLogger() {
     if (process.env.AILF_LOG_FORMAT === "json")
         return new JsonLogger();
-    if (process.env.AILF_QUIET === "1")
+    if (process.env.AILF_LOG_LEVEL === "quiet" ||
+        process.env.AILF_QUIET === "1") {
         return new QuietLogger();
-    return new ConsoleLogger({ verbose: process.env.AILF_VERBOSE === "1" });
+    }
+    return new ConsoleLogger({
+        verbose: process.env.AILF_LOG_LEVEL === "verbose" ||
+            process.env.AILF_VERBOSE === "1",
+    });
 }
 function createCache(config) {
     const local = new FilesystemCache(config.rootDir);

package/dist/orchestration/pipeline-orchestrator.js CHANGED Viewed

@@ -77,12 +77,20 @@ export async function orchestratePipeline(ctx, steps) {
     const pipelineStart = Date.now();
     const hasJob = !!ctx.config.jobId;
     ctx.logger.section("ai-literacy-framework — Evaluation Pipeline");
+    ctx.logger.debug(`Pipeline starting with ${steps.length} steps`, {
+        steps: steps.map((s) => s.name),
+        mode: ctx.config.mode,
+        source: ctx.config.source,
+        noCache: ctx.config.noCache,
+        jobId: ctx.config.jobId,
+    });
     // Report initial running status
     if (hasJob) {
         await reportJobProgress(ctx, steps[0]?.name ?? "init", 0, steps.length, "running");
     }
     for (let i = 0; i < steps.length; i++) {
         const step = steps[i];
+        ctx.logger.debug(`Starting step ${i + 1}/${steps.length}: ${step.name}`);
         ctx.logger.section(step.name);
         // Report current step progress
         if (hasJob) {
@@ -97,16 +105,23 @@ export async function orchestratePipeline(ctx, steps) {
         // Fail fast on required step failure
         if (result.status === "failed" && !step.optional) {
             ctx.logger.error(`Pipeline aborted: ${step.name} failed`);
+            const failedError = result.status === "failed" ? result.error : `${step.name} failed`;
             // Report failure to job store
             if (hasJob) {
-                const errorMsg = result.status === "failed" ? result.error : `${step.name} failed`;
                 await reportJobProgress(ctx, step.name, i + 1, steps.length, "failed", {
-                    message: errorMsg,
+                    message: failedError,
                     step: step.name,
                 });
             }
             return {
+                belowCritical: state.belowCritical,
                 durationMs: Date.now() - pipelineStart,
+                failureReason: {
+                    type: "step-failed",
+                    step: step.name,
+                    message: failedError,
+                },
+                promptfooUrls: state.promptfooUrls,
                 steps: results,
                 success: false,
                 validation,
@@ -120,6 +135,13 @@ export async function orchestratePipeline(ctx, steps) {
     const durationMs = Date.now() - pipelineStart;
     ctx.logger.section("Pipeline Complete");
     ctx.logger.info(`All steps completed in ${durationMs}ms`);
+    ctx.logger.debug("Pipeline state at completion", {
+        belowCritical: state.belowCritical,
+        reportId: state.reportId,
+        remoteCacheHits: state.remoteCacheHits
+            ? [...state.remoteCacheHits]
+            : undefined,
+    });
     // Report completion to job store (with reportId from state if available)
     if (hasJob) {
         try {
@@ -145,9 +167,12 @@ export async function orchestratePipeline(ctx, steps) {
         }
     }
     return {
+        belowCritical: state.belowCritical,
         durationMs,
+        promptfooUrls: state.promptfooUrls,
         steps: results,
         success: true,
+        testSummary: state.testSummary,
         validation,
     };
 }

package/dist/orchestration/step-runner.js CHANGED Viewed

@@ -36,10 +36,13 @@ export async function runStep(step, ctx, state = {}) {
     if (canCache) {
         try {
             const inputs = step.cacheInputs(ctx);
+            ctx.logger.debug(`[${step.name}] Cache inputs: ${inputs.length} files`);
             const key = await ctx.cache.computeKey(inputs);
             cacheKey = key;
+            ctx.logger.debug(`[${step.name}] Cache key: ${key}`);
             const cached = await ctx.cache.lookup(step.name, key);
             if (cached.hit) {
+                ctx.logger.debug(`[${step.name}] Cache HIT — skipping execution`);
                 const result = {
                     durationMs: Date.now() - start,
                     status: "success",
@@ -48,11 +51,16 @@ export async function runStep(step, ctx, state = {}) {
                 ctx.logger.step(step.name, result);
                 return result;
             }
+            ctx.logger.debug(`[${step.name}] Cache MISS — executing`);
         }
         catch {
             // Cache lookup failure is non-fatal — proceed to execute
+            ctx.logger.debug(`[${step.name}] Cache lookup failed — proceeding`);
         }
     }
+    else {
+        ctx.logger.debug(`[${step.name}] Cache skipped (${!ctx.cache ? "no cache adapter" : ctx.config.noCache ? "--no-cache" : "no cacheInputs"})`);
+    }
     // 3. Execute
     try {
         const result = await step.execute(ctx, state);

package/dist/orchestration/steps/calculate-scores-step.js CHANGED Viewed

@@ -63,9 +63,11 @@ export class CalculateScoresStep {
         catch {
             // Non-fatal — proceed without source metadata
         }
+        let belowCritical = [];
         try {
-            calculateAndWriteScores({
+            const result = calculateAndWriteScores({
                 allowedOrigins: ctx.config.allowedOrigins,
+                logger: ctx.logger,
                 mode: ctx.config.mode,
                 resolvedSource,
                 resultsPath: primaryMode !== "baseline"
@@ -75,25 +77,17 @@ export class CalculateScoresStep {
                 searchMode: ctx.config.searchMode,
                 source: ctx.config.source,
             });
+            belowCritical = result.belowCritical;
+            if (result.testSummary) {
+                state.testSummary = result.testSummary;
+            }
         }
         catch (err) {
-            const code = err !== null && typeof err === "object" && "status" in err
-                ? err.status
-                : undefined;
-            if (code !== undefined && code !== 1) {
-                return {
-                    durationMs: Date.now() - start,
-                    error: `calculate-scores failed with exit code ${code}`,
-                    status: "failed",
-                };
-            }
-            if (code === undefined) {
-                return {
-                    durationMs: Date.now() - start,
-                    error: `calculate-scores failed: ${err instanceof Error ? err.message : String(err)}`,
-                    status: "failed",
-                };
-            }
+            return {
+                durationMs: Date.now() - start,
+                error: `calculate-scores failed: ${err instanceof Error ? err.message : String(err)}`,
+                status: "failed",
+            };
         }
         // Postcondition: score summary exists and is valid
         const summaryIssues = checkScoreSummaryValid(ctx.config.rootDir);
@@ -105,10 +99,19 @@ export class CalculateScoresStep {
                 status: "failed",
             };
         }
+        // Propagate belowCritical into pipeline state for downstream consumers
+        // (e.g., orchestrator reporting, publish step metadata).
+        // This is informational — the pipeline continues to run subsequent steps.
+        if (belowCritical.length > 0) {
+            state.belowCritical = belowCritical;
+        }
+        const criticalSuffix = belowCritical.length > 0
+            ? ` (${belowCritical.length} area(s) below critical threshold: ${belowCritical.join(", ")})`
+            : "";
         return {
             durationMs: Date.now() - start,
             status: "success",
-            summary: "Scores calculated and summary written",
+            summary: `Scores calculated and summary written${criticalSuffix}`,
         };
     }
     cacheInputs(ctx) {

package/dist/orchestration/steps/generate-configs-step.js CHANGED Viewed

@@ -65,6 +65,7 @@ export class GenerateConfigsStep {
                         tags: ctx.config.tags,
                     }
                     : undefined,
+                logger: ctx.logger,
                 resolvedSource,
                 rootDir: ctx.config.rootDir,
                 searchMode: ctx.config.searchMode,

package/dist/orchestration/steps/grader-consistency-step.js CHANGED Viewed

@@ -34,6 +34,7 @@ export class GraderConsistencyStep {
         }
         try {
             await runGraderConsistency({
+                logger: ctx.logger,
                 replications,
                 resultsPath: resolve(ctx.config.rootDir, resultsFile),
                 rootDir: ctx.config.rootDir,

package/dist/orchestration/steps/mirror-repo-tasks-step.js CHANGED Viewed

@@ -59,8 +59,9 @@ export class MirrorRepoTasksStep {
             // Run the mirror
             const result = await mirrorRepoTasks({
                 client,
-                tasks: repoTasks,
                 git,
+                logger: ctx.logger,
+                tasks: repoTasks,
             });
             // Log results
             if (result.areasCreated.length > 0) {

package/dist/orchestration/steps/publish-report-step.js CHANGED Viewed

@@ -14,6 +14,7 @@ import { readFileSync } from "fs";
 import { resolve } from "path";
 import { checkScoreSummaryValid } from "../../pipeline/checks.js";
 import { buildProvenance, } from "../../pipeline/provenance.js";
+import { generateReportTitle } from "../../pipeline/report-title.js";
 import { generateReportId } from "../../report-store.js";
 import { withRetry } from "../../sinks/retry.js";
 export class PublishReportStep {
@@ -101,6 +102,7 @@ export class PublishReportStep {
                 comparedAgainst: autoCompareResult.baselineReportId,
             };
         }
+        const title = generateReportTitle({ provenance });
         const report = {
             comparison: comparison ?? undefined,
             completedAt: now,
@@ -109,6 +111,7 @@ export class PublishReportStep {
             provenance,
             summary,
             tag: this.options.publishTag ?? ctx.config.publishTag,
+            title,
         };
         // Share reportId with downstream steps (CallbackStep + orchestrator job update)
         state.reportId = reportId;

package/dist/pipeline/calculate-scores.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import type { Logger, TestSummary } from "../_vendor/ailf-core/index.d.ts";
 import { type ResolvedSourceConfig } from "../sources.js";
 import { type ActualScoreEntry, type ComponentResult } from "../_vendor/ailf-core/index.d.ts";
 import type { GraderJudgment, PerModelEntry } from "./types.js";
@@ -86,6 +87,8 @@ export declare function scoreAgenticResults(resultsPath: string, weights: Record
 export interface CalculateScoresOptions {
     /** Allowed origins for source isolation reporting */
     allowedOrigins?: string[];
+    /** Logger instance (defaults to ConsoleLogger if not provided) */
+    logger?: Logger;
     /** Evaluation mode (controls which result files are read) */
     mode?: string;
     /** Pre-resolved source config (skips loadSource() call) */
@@ -99,4 +102,11 @@ export interface CalculateScoresOptions {
     /** Documentation source name */
     source?: string;
 }
-export declare function calculateAndWriteScores(options: CalculateScoresOptions): void;
+/** Result from calculateAndWriteScores — replaces process.exit() calls. */
+export interface CalculateScoresResult {
+    /** Feature areas that scored below the critical threshold (40). */
+    belowCritical: string[];
+    /** Summary of test execution outcomes (total, passed, failed, errored). */
+    testSummary?: TestSummary;
+}
+export declare function calculateAndWriteScores(options: CalculateScoresOptions): CalculateScoresResult;