@sanity/ailf 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  19. package/dist/adapters/task-sources/repo-schemas.d.ts +38 -0
  20. package/dist/adapters/task-sources/repo-schemas.js +127 -0
  21. package/dist/cli.d.ts +2 -2
  22. package/dist/cli.js +134 -38
  23. package/dist/commands/agent-report.js +1 -1
  24. package/dist/commands/calculate-scores.js +0 -2
  25. package/dist/commands/check-staleness.js +1 -1
  26. package/dist/commands/chronic-failures.js +4 -4
  27. package/dist/commands/coverage-audit.js +6 -7
  28. package/dist/commands/discovery-report.js +16 -4
  29. package/dist/commands/eval.d.ts +1 -1
  30. package/dist/commands/eval.js +1 -1
  31. package/dist/commands/explain-handler.d.ts +1 -1
  32. package/dist/commands/explain-handler.js +13 -44
  33. package/dist/commands/fetch-docs.js +0 -2
  34. package/dist/commands/generate-configs.js +0 -2
  35. package/dist/commands/grader/index.js +3 -3
  36. package/dist/commands/init.d.ts +2 -2
  37. package/dist/commands/init.js +10 -9
  38. package/dist/commands/interactive.d.ts +1 -1
  39. package/dist/commands/interactive.js +8 -8
  40. package/dist/commands/pipeline-action.d.ts +1 -3
  41. package/dist/commands/pipeline-action.js +174 -140
  42. package/dist/commands/pr-comment.js +1 -3
  43. package/dist/commands/publish.d.ts +1 -1
  44. package/dist/commands/publish.js +2 -4
  45. package/dist/commands/readiness-report.js +17 -8
  46. package/dist/commands/remote-pipeline.d.ts +1 -1
  47. package/dist/commands/remote-pipeline.js +1 -3
  48. package/dist/commands/run.d.ts +64 -0
  49. package/dist/commands/{pipeline.js → run.js} +19 -30
  50. package/dist/commands/shared/help.js +4 -4
  51. package/dist/commands/shared/options.d.ts +29 -3
  52. package/dist/commands/shared/options.js +37 -13
  53. package/dist/commands/validate-tasks.js +1 -1
  54. package/dist/commands/validate.d.ts +1 -1
  55. package/dist/commands/validate.js +2 -2
  56. package/dist/commands/weekly-digest.js +3 -3
  57. package/dist/config/thresholds.ts +3 -3
  58. package/dist/orchestration/build-app-context.js +0 -2
  59. package/dist/orchestration/build-step-sequence.js +1 -11
  60. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  61. package/dist/orchestration/steps/index.d.ts +0 -2
  62. package/dist/orchestration/steps/index.js +0 -2
  63. package/dist/orchestration/steps/run-eval-step.js +1 -1
  64. package/dist/pipeline/cache.d.ts +1 -1
  65. package/dist/pipeline/map-request-to-config.js +0 -2
  66. package/dist/pipeline/plan.d.ts +2 -4
  67. package/dist/pipeline/plan.js +4 -32
  68. package/dist/pipeline/run-context.d.ts +1 -1
  69. package/dist/pipeline/run-context.js +4 -4
  70. package/dist/pipeline/validate.d.ts +1 -1
  71. package/dist/pipeline/validate.js +1 -1
  72. package/package.json +7 -7
  73. package/dist/commands/pipeline.d.ts +0 -77
  74. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  75. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  76. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  77. package/dist/orchestration/steps/readiness-step.js +0 -98
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "3.7.0",
3
+ "version": "3.8.0",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -67,20 +67,20 @@
67
67
  "grader-compare": "tsx src/cli.ts grader compare",
68
68
  "grader-sensitivity": "tsx src/cli.ts grader sensitivity",
69
69
  "calculate-scores": "tsx src/cli.ts calculate-scores",
70
- "agent-report": "tsx src/cli.ts agent-report",
70
+ "agent-report": "tsx src/cli.ts report agent",
71
71
  "share": "dotenv -e ../../.env -o -- promptfoo share",
72
72
  "view": "dotenv -e ../../.env -o -- promptfoo view",
73
73
  "cli": "tsx src/cli.ts",
74
74
  "pipeline": "tsx src/cli.ts pipeline",
75
- "validate": "tsx src/cli.ts validate",
75
+ "validate": "tsx src/cli.ts validate config",
76
76
  "test": "tsx --test src/__tests__/*.test.ts",
77
77
  "test:e2e": "AILF_E2E=1 tsx --test src/__tests__/e2e/*.e2e.test.ts",
78
78
  "test:all": "AILF_E2E=1 tsx --test src/__tests__/*.test.ts src/pipeline/compiler/__tests__/*.test.ts src/__tests__/e2e/*.e2e.test.ts",
79
79
  "pr-comment": "tsx src/cli.ts pr-comment",
80
- "coverage-audit": "tsx src/cli.ts coverage-audit",
81
- "readiness-report": "tsx src/cli.ts readiness-report",
82
- "discovery-report": "tsx src/cli.ts discovery-report",
80
+ "coverage-audit": "tsx src/cli.ts report coverage",
81
+ "readiness-report": "tsx src/cli.ts report readiness",
82
+ "discovery-report": "tsx src/cli.ts report discovery",
83
83
  "webhook-server": "tsx src/cli.ts webhook-server",
84
- "weekly-digest": "tsx src/cli.ts weekly-digest"
84
+ "weekly-digest": "tsx src/cli.ts report digest"
85
85
  }
86
86
  }
@@ -1,77 +0,0 @@
1
- /**
2
- * pipeline command — the main evaluation pipeline orchestrator.
3
- *
4
- * Defines all 36+ CLI flags via Commander, resolves them into a typed
5
- * options object, bridges to process.env for downstream modules, and
6
- * delegates to runPipeline().
7
- *
8
- * @see docs/cli.md for the full flag reference.
9
- */
10
- import { Command } from "commander";
11
- /**
12
- * Raw CLI options as parsed by Commander.
13
- * Field names follow Commander's camelCase convention for kebab-case flags.
14
- */
15
- export interface PipelineCliOptions {
16
- allowedOrigin: string[];
17
- allowedOrigins: string[];
18
- area?: string;
19
- autoScope: boolean;
20
- before?: string;
21
- cache: boolean;
22
- changedDocs?: string;
23
- compare: boolean;
24
- compareBaseline?: string;
25
- concurrency?: number;
26
- config?: string;
27
- debug: boolean;
28
- debugN?: number;
29
- debugPattern?: string;
30
- debugSample?: number;
31
- discoveryReport: boolean;
32
- dryRun: boolean;
33
- gapAnalysis: boolean;
34
- graderReplications?: number;
35
- header: string[];
36
- headers: string[];
37
- mode: string;
38
- variant?: string;
39
- output?: string;
40
- outputDir?: string;
41
- promptfooUrl?: string;
42
- publish?: boolean;
43
- publishTag?: string;
44
- readiness: boolean;
45
- remoteCache?: boolean;
46
- reportDataset?: string;
47
- reportProject?: string;
48
- sanityDataset?: string;
49
- sanityDocument: string[];
50
- sanityDocuments: string[];
51
- sanityPerspective?: string;
52
- sanityProject?: string;
53
- sanityStudioOrigin?: string;
54
- search?: string;
55
- skipEval: boolean;
56
- skipFetch: boolean;
57
- source?: string;
58
- remote: boolean;
59
- repoTasksPath?: string;
60
- task?: string;
61
- tag: string[];
62
- taskSource?: string;
63
- threshold?: number;
64
- url: string[];
65
- urls: string[];
66
- apiUrl?: string;
67
- artifacts: boolean;
68
- artifactsDir?: string;
69
- artifactsDryRun: boolean;
70
- artifactsExclude?: string;
71
- classification?: string;
72
- ownerTeam?: string;
73
- ownerIndividual?: string;
74
- purpose?: string;
75
- label: string[];
76
- }
77
- export declare function createPipelineCommand(): Command;
@@ -1,13 +0,0 @@
1
- /**
2
- * Pipeline step: Discovery report (agent discoverability analysis).
3
- *
4
- * Calls pure functions from pipeline/discovery-report.ts directly.
5
- * Optional step — failure doesn't stop the pipeline.
6
- */
7
- import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
- export declare class DiscoveryReportStep implements PipelineStep {
9
- readonly name = "discovery-report";
10
- readonly optional = true;
11
- check(): ValidationIssue[];
12
- execute(ctx: AppContext): Promise<StepResult>;
13
- }
@@ -1,62 +0,0 @@
1
- /**
2
- * Pipeline step: Discovery report (agent discoverability analysis).
3
- *
4
- * Calls pure functions from pipeline/discovery-report.ts directly.
5
- * Optional step — failure doesn't stop the pipeline.
6
- */
7
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
8
- import { resolve } from "path";
9
- import { assoc, } from "../../_vendor/ailf-core/index.js";
10
- import { emitFileContents } from "../../artifact-capture/emit-file.js";
11
- import { formatDiscoveryMarkdown, generateDiscoveryReport, } from "../../pipeline/discovery-report.js";
12
- export class DiscoveryReportStep {
13
- name = "discovery-report";
14
- optional = true;
15
- check() {
16
- return [];
17
- }
18
- async execute(ctx) {
19
- const root = ctx.config.rootDir;
20
- const start = Date.now();
21
- try {
22
- const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
23
- if (!existsSync(scoreSummaryPath)) {
24
- return {
25
- durationMs: Date.now() - start,
26
- error: "score-summary.json not found",
27
- status: "failed",
28
- };
29
- }
30
- const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
31
- if (!scoreSummary.retrievalMetrics) {
32
- return {
33
- status: "skipped",
34
- reason: "No retrieval metrics in score summary — run an agentic evaluation first",
35
- };
36
- }
37
- const report = generateDiscoveryReport(scoreSummary, ctx.config.areas);
38
- const md = formatDiscoveryMarkdown(report);
39
- // Write to outputDir (respects --output-dir)
40
- mkdirSync(ctx.config.outputDir, { recursive: true });
41
- const discoveryPath = resolve(ctx.config.outputDir, "discovery-report.md");
42
- writeFileSync(discoveryPath, md);
43
- // W0050 — discoveryReport is per-entry keyed by mode.
44
- await emitFileContents(ctx.artifactWriter, "discoveryReport", assoc(ctx, { mode: ctx.config.mode }), discoveryPath);
45
- console.log(md);
46
- const invisible = report.invisibleDocs.length;
47
- const f1 = report.overall.avgF1.toFixed(2);
48
- return {
49
- durationMs: Date.now() - start,
50
- status: "success",
51
- summary: `F1=${f1}, ${invisible} invisible doc${invisible === 1 ? "" : "s"}, ${report.recommendations.length} recommendation${report.recommendations.length === 1 ? "" : "s"}`,
52
- };
53
- }
54
- catch (err) {
55
- return {
56
- durationMs: Date.now() - start,
57
- error: err instanceof Error ? err.message : String(err),
58
- status: "failed",
59
- };
60
- }
61
- }
62
- }
@@ -1,13 +0,0 @@
1
- /**
2
- * Pipeline step: Launch readiness report.
3
- *
4
- * Calls pure functions from pipeline/readiness-report.ts directly.
5
- * Optional step — failure doesn't stop the pipeline.
6
- */
7
- import { type AppContext, type PipelineStep, type StepResult, type ValidationIssue } from "../../_vendor/ailf-core/index.d.ts";
8
- export declare class ReadinessStep implements PipelineStep {
9
- readonly name = "readiness";
10
- readonly optional = true;
11
- check(): ValidationIssue[];
12
- execute(ctx: AppContext): Promise<StepResult>;
13
- }
@@ -1,98 +0,0 @@
1
- /**
2
- * Pipeline step: Launch readiness report.
3
- *
4
- * Calls pure functions from pipeline/readiness-report.ts directly.
5
- * Optional step — failure doesn't stop the pipeline.
6
- */
7
- import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
8
- import { resolve } from "path";
9
- import { tryLoadConfigFile } from "../../pipeline/compiler/config-loader.js";
10
- import { assoc, } from "../../_vendor/ailf-core/index.js";
11
- import { emitFileContents } from "../../artifact-capture/emit-file.js";
12
- import { formatReadinessMarkdown, generateReadinessReport, } from "../../pipeline/readiness-report.js";
13
- import { ThresholdConfigSchema } from "../../pipeline/schemas.js";
14
- export class ReadinessStep {
15
- name = "readiness";
16
- optional = true;
17
- check() {
18
- return [];
19
- }
20
- async execute(ctx) {
21
- const root = ctx.config.rootDir;
22
- const start = Date.now();
23
- try {
24
- const scoreSummaryPath = resolve(root, "results", "latest", "score-summary.json");
25
- if (!existsSync(scoreSummaryPath)) {
26
- return {
27
- durationMs: Date.now() - start,
28
- error: "score-summary.json not found",
29
- status: "failed",
30
- };
31
- }
32
- const thresholdsLoaded = tryLoadConfigFile("thresholds", root);
33
- if (!thresholdsLoaded) {
34
- return {
35
- durationMs: Date.now() - start,
36
- error: "config/thresholds not found",
37
- status: "failed",
38
- };
39
- }
40
- const scoreSummary = JSON.parse(readFileSync(scoreSummaryPath, "utf-8"));
41
- const thresholdConfig = ThresholdConfigSchema.parse(thresholdsLoaded.data);
42
- // Read gap-analysis.json from outputDir (gap-analysis step writes there)
43
- const gapPath = resolve(ctx.config.outputDir, "gap-analysis.json");
44
- const gapAnalysis = existsSync(gapPath)
45
- ? JSON.parse(readFileSync(gapPath, "utf-8"))
46
- : undefined;
47
- const readinessAreas = ctx.config.areas ?? scoreSummary.scores.map((s) => s.feature);
48
- const readinessLines = [];
49
- for (const area of readinessAreas) {
50
- const areaScore = scoreSummary.scores.find((s) => s.feature === area);
51
- if (!areaScore) {
52
- ctx.logger.warn(`Area "${area}" not found in scores — skipping`);
53
- continue;
54
- }
55
- const report = generateReadinessReport({
56
- area,
57
- gapAnalysis,
58
- scoreSummary,
59
- thresholdConfig,
60
- });
61
- const md = formatReadinessMarkdown(report);
62
- readinessLines.push(md);
63
- console.log(md);
64
- }
65
- if (readinessLines.length > 0) {
66
- // Write to outputDir (respects --output-dir)
67
- mkdirSync(ctx.config.outputDir, { recursive: true });
68
- const readinessPath = resolve(ctx.config.outputDir, "readiness-report.md");
69
- writeFileSync(readinessPath, readinessLines.join("\n---\n\n"));
70
- // W0050 — readinessReport is run-scoped bulk markdown.
71
- await emitFileContents(ctx.artifactWriter, "readinessReport", assoc(ctx), readinessPath);
72
- }
73
- const passCount = readinessAreas.filter((area) => {
74
- const areaScore = scoreSummary.scores.find((s) => s.feature === area);
75
- if (!areaScore)
76
- return false;
77
- const report = generateReadinessReport({
78
- area,
79
- scoreSummary,
80
- thresholdConfig,
81
- });
82
- return report.pass;
83
- }).length;
84
- return {
85
- durationMs: Date.now() - start,
86
- status: "success",
87
- summary: `${passCount}/${readinessAreas.length} areas ready`,
88
- };
89
- }
90
- catch (err) {
91
- return {
92
- durationMs: Date.now() - start,
93
- error: err instanceof Error ? err.message : String(err),
94
- status: "failed",
95
- };
96
- }
97
- }
98
- }