@sanity/ailf 3.7.0 → 3.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +42 -17
  19. package/dist/adapters/task-sources/repo-schemas.d.ts +41 -3
  20. package/dist/adapters/task-sources/repo-schemas.js +127 -0
  21. package/dist/cli-program.d.ts +39 -0
  22. package/dist/cli-program.js +137 -0
  23. package/dist/cli.d.ts +8 -2
  24. package/dist/cli.js +128 -142
  25. package/dist/commands/agent-report.js +1 -1
  26. package/dist/commands/calculate-scores.js +0 -2
  27. package/dist/commands/check-staleness.js +1 -1
  28. package/dist/commands/chronic-failures.js +4 -4
  29. package/dist/commands/coverage-audit.js +6 -7
  30. package/dist/commands/discovery-report.js +16 -4
  31. package/dist/commands/eval.d.ts +1 -1
  32. package/dist/commands/eval.js +1 -1
  33. package/dist/commands/explain-handler.d.ts +1 -1
  34. package/dist/commands/explain-handler.js +13 -44
  35. package/dist/commands/fetch-docs.js +0 -2
  36. package/dist/commands/generate-configs.js +0 -2
  37. package/dist/commands/grader/index.js +3 -3
  38. package/dist/commands/init.d.ts +2 -2
  39. package/dist/commands/init.js +10 -9
  40. package/dist/commands/interactive.d.ts +1 -1
  41. package/dist/commands/interactive.js +8 -8
  42. package/dist/commands/pipeline-action.d.ts +1 -3
  43. package/dist/commands/pipeline-action.js +174 -140
  44. package/dist/commands/pr-comment.js +1 -3
  45. package/dist/commands/publish.d.ts +1 -1
  46. package/dist/commands/publish.js +2 -4
  47. package/dist/commands/readiness-report.js +17 -8
  48. package/dist/commands/remote-pipeline.d.ts +1 -1
  49. package/dist/commands/remote-pipeline.js +1 -3
  50. package/dist/commands/run.d.ts +64 -0
  51. package/dist/commands/{pipeline.js → run.js} +19 -30
  52. package/dist/commands/shared/help.js +4 -4
  53. package/dist/commands/shared/options.d.ts +29 -3
  54. package/dist/commands/shared/options.js +37 -13
  55. package/dist/commands/validate-tasks.js +1 -1
  56. package/dist/commands/validate.d.ts +1 -1
  57. package/dist/commands/validate.js +2 -2
  58. package/dist/commands/weekly-digest.js +3 -3
  59. package/dist/config/thresholds.ts +3 -3
  60. package/dist/orchestration/build-app-context.js +0 -2
  61. package/dist/orchestration/build-step-sequence.js +1 -11
  62. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  63. package/dist/orchestration/steps/index.d.ts +0 -2
  64. package/dist/orchestration/steps/index.js +0 -2
  65. package/dist/orchestration/steps/run-eval-step.js +1 -1
  66. package/dist/pipeline/cache.d.ts +1 -1
  67. package/dist/pipeline/map-request-to-config.js +0 -2
  68. package/dist/pipeline/mirror-repo-tasks.d.ts +1 -1
  69. package/dist/pipeline/plan.d.ts +2 -4
  70. package/dist/pipeline/plan.js +4 -32
  71. package/dist/pipeline/run-context.d.ts +1 -1
  72. package/dist/pipeline/run-context.js +4 -4
  73. package/dist/pipeline/validate.d.ts +1 -1
  74. package/dist/pipeline/validate.js +1 -1
  75. package/package.json +11 -9
  76. package/dist/commands/pipeline.d.ts +0 -77
  77. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  78. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  79. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  80. package/dist/orchestration/steps/readiness-step.js +0 -98
  81. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.d.ts +0 -10
  82. package/dist/pipeline/compiler/__tests__/agent-harness-handler.test.js +0 -366
  83. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.d.ts +0 -9
  84. package/dist/pipeline/compiler/__tests__/assertion-mapper.test.js +0 -145
  85. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.d.ts +0 -10
  86. package/dist/pipeline/compiler/__tests__/knowledge-probe-handler.test.js +0 -314
  87. package/dist/pipeline/compiler/__tests__/literacy-handler.test.d.ts +0 -10
  88. package/dist/pipeline/compiler/__tests__/literacy-handler.test.js +0 -486
  89. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.d.ts +0 -10
  90. package/dist/pipeline/compiler/__tests__/mcp-server-handler.test.js +0 -425
  91. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.d.ts +0 -9
  92. package/dist/pipeline/compiler/__tests__/promptfoo-compiler.test.js +0 -332
  93. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.d.ts +0 -12
  94. package/dist/pipeline/compiler/__tests__/sandbox-and-fixtures.test.js +0 -210
  95. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.d.ts +0 -7
  96. package/dist/pipeline/compiler/__tests__/scoring-and-presets.test.js +0 -404
  97. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.d.ts +0 -10
  98. package/dist/pipeline/compiler/__tests__/scoring-bridge.test.js +0 -184
  99. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.d.ts +0 -8
  100. package/dist/pipeline/compiler/__tests__/task-graph-builder.test.js +0 -301
  101. package/dist/pipeline/compiler/__tests__/telemetry.test.d.ts +0 -9
  102. package/dist/pipeline/compiler/__tests__/telemetry.test.js +0 -503
  103. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.d.ts +0 -10
  104. package/dist/pipeline/compiler/__tests__/tool-loop-openai.test.js +0 -509
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FileConfigAdapter — resolves pipeline config from a local config file.
3
3
  *
4
- * Enables `ailf pipeline --config <path>` to load all pipeline options
4
+ * Enables `ailf run --config <path>` to load all pipeline options
5
5
  * from a file instead of CLI flags. Supports multiple formats in
6
6
  * priority order:
7
7
  *
@@ -47,7 +47,7 @@ export class FileConfigAdapter {
47
47
  return this.validateAndMap(result.value, ext);
48
48
  }
49
49
  // YAML / JSON files — load via fs
50
- const raw = readConfigFile(this.filePath);
50
+ const raw = await readConfigFile(this.filePath);
51
51
  return this.validateAndMap(raw, ext);
52
52
  }
53
53
  /**
@@ -69,13 +69,12 @@ export class FileConfigAdapter {
69
69
  // ---------------------------------------------------------------------------
70
70
  // Helpers
71
71
  // ---------------------------------------------------------------------------
72
- function readConfigFile(filePath) {
72
+ async function readConfigFile(filePath) {
73
73
  const content = readFileSync(filePath, "utf-8");
74
74
  const ext = extname(filePath).toLowerCase();
75
75
  if (ext === ".yaml" || ext === ".yml") {
76
- // Dynamic import for yaml parser — only needed when reading YAML configs
77
- // eslint-disable-next-line @typescript-eslint/no-require-imports
78
- const { parse } = require("yaml");
76
+ // Dynamic ESM import — only loaded when reading YAML configs.
77
+ const { parse } = await import("yaml");
79
78
  return parse(content);
80
79
  }
81
80
  return JSON.parse(content);
@@ -91,9 +90,16 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
91
90
  }
92
91
  // Normalize legacy mode names (e.g., "full" → literacy + variant)
93
92
  const normalized = normalizeMode(config.mode ?? "literacy");
93
+ // Output directory (W0077 Phase 6c) — resolve `output.dir` relative to the
94
+ // rootDir (the caller's workspace, set by the FileConfigAdapter caller).
95
+ // When unset, fall back to <rootDir>/.ailf/results/latest/ to mirror the
96
+ // CLI's default. This matches `resolveOutputDir` for the auto-load path.
97
+ const outputDir = config.output?.dir
98
+ ? resolve(rootDir, config.output.dir)
99
+ : resolve(rootDir, ".ailf", "results", "latest");
94
100
  return {
95
101
  rootDir,
96
- outputDir: resolve(rootDir, "results", "latest"),
102
+ outputDir,
97
103
  mode: normalized.mode,
98
104
  variant: normalized.variant,
99
105
  noAutoScope: config.noAutoScope ?? false,
@@ -107,21 +113,40 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
107
113
  compareEnabled: config.compare ?? false,
108
114
  compareThreshold: config.compareThreshold,
109
115
  compareBaseline: config.compareBaseline,
110
- gapAnalysisEnabled: config.gapAnalysis ?? true,
111
- readinessEnabled: config.readiness ?? false,
112
- discoveryReportEnabled: config.discoveryReport ?? false,
113
- publishEnabled: config.publish ?? false,
114
- publishTag: config.publishTag,
116
+ gapAnalysisEnabled: config.execution?.gapAnalysis ?? true,
117
+ // W0077 Phase 4 — `publish` is now a policy object. Map the auto value
118
+ // directly to a boolean for the file-config path; the runtime
119
+ // smart-default logic in pipeline-action.ts isn't relevant here because
120
+ // the user has explicitly handed us a config file.
121
+ publishEnabled: config.publish?.auto === "never"
122
+ ? false
123
+ : config.publish?.auto !== undefined,
124
+ publishTag: config.publish?.tag,
115
125
  noCache: config.noCache ?? false,
116
126
  noRemoteCache: config.noRemoteCache ?? false,
117
- graderReplications: config.graderReplications,
127
+ graderReplications: config.execution?.graderReplications,
118
128
  urls: config.urls,
119
- headers: config.headers,
120
- allowedOrigins: config.allowedOrigins,
129
+ headers: config.agentic?.headers,
130
+ allowedOrigins: config.agentic?.allowedOrigins,
121
131
  searchMode: config.searchMode ?? "open",
122
- concurrency: config.concurrency,
132
+ concurrency: config.execution?.concurrency,
123
133
  remote: false,
124
- apiUrl: "https://ailf-api.sanity.build",
134
+ apiUrl: config.execution?.apiUrl ?? "https://ailf-api.sanity.build",
135
+ // W0077 Phase 6g — artifact writer settings. `enabled: false` flips
136
+ // `artifactsDisabled` so composition-root selects the NoOp writer.
137
+ artifactsDisabled: config.artifacts?.enabled === false ? true : undefined,
138
+ artifactsDir: config.artifacts?.dir
139
+ ? resolve(rootDir, config.artifacts.dir)
140
+ : undefined,
141
+ artifactsExclude: config.artifacts?.exclude,
142
+ // W0077 Phase 6h — task-source selection. Default is content-lake
143
+ // (signaled by `taskSourceType` undefined); when `repo`, the
144
+ // composition-root resolves `repoTasksPath` (defaulting to
145
+ // `<rootDir>/.ailf/tasks/` when unset).
146
+ taskSourceType: config.taskSource?.type === "repo" ? "repo" : undefined,
147
+ repoTasksPath: config.taskSource?.repoTasksPath
148
+ ? resolve(rootDir, config.taskSource.repoTasksPath)
149
+ : undefined,
125
150
  presets: config.presets,
126
151
  };
127
152
  }
@@ -147,8 +147,8 @@ export declare const CanonicalTaskSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
147
147
  baseline: z.ZodOptional<z.ZodObject<{
148
148
  enabled: z.ZodOptional<z.ZodBoolean>;
149
149
  rubric: z.ZodOptional<z.ZodEnum<{
150
- full: "full";
151
150
  abbreviated: "abbreviated";
151
+ full: "full";
152
152
  none: "none";
153
153
  }>>;
154
154
  }, z.core.$strip>>;
@@ -773,8 +773,8 @@ export declare const ContentLakeAuthorableTaskSchema: z.ZodObject<{
773
773
  baseline: z.ZodOptional<z.ZodObject<{
774
774
  enabled: z.ZodOptional<z.ZodBoolean>;
775
775
  rubric: z.ZodOptional<z.ZodEnum<{
776
- full: "full";
777
776
  abbreviated: "abbreviated";
777
+ full: "full";
778
778
  none: "none";
779
779
  }>>;
780
780
  }, z.core.$strip>>;
@@ -893,8 +893,8 @@ export declare const CanonicalTaskFileSchema: z.ZodArray<z.ZodDiscriminatedUnion
893
893
  baseline: z.ZodOptional<z.ZodObject<{
894
894
  enabled: z.ZodOptional<z.ZodBoolean>;
895
895
  rubric: z.ZodOptional<z.ZodEnum<{
896
- full: "full";
897
896
  abbreviated: "abbreviated";
897
+ full: "full";
898
898
  none: "none";
899
899
  }>>;
900
900
  }, z.core.$strip>>;
@@ -1434,11 +1434,49 @@ export declare const RepoConfigSchema: z.ZodObject<{
1434
1434
  projectId: z.ZodOptional<z.ZodString>;
1435
1435
  dataset: z.ZodOptional<z.ZodString>;
1436
1436
  baseUrl: z.ZodOptional<z.ZodString>;
1437
+ studioOrigin: z.ZodOptional<z.ZodString>;
1437
1438
  }, z.core.$strip>>;
1438
1439
  reportStore: z.ZodOptional<z.ZodObject<{
1439
1440
  projectId: z.ZodString;
1440
1441
  dataset: z.ZodString;
1441
1442
  }, z.core.$strip>>;
1443
+ publish: z.ZodOptional<z.ZodObject<{
1444
+ auto: z.ZodOptional<z.ZodEnum<{
1445
+ never: "never";
1446
+ always: "always";
1447
+ "full-runs": "full-runs";
1448
+ }>>;
1449
+ tag: z.ZodOptional<z.ZodString>;
1450
+ }, z.core.$strip>>;
1451
+ execution: z.ZodOptional<z.ZodObject<{
1452
+ concurrency: z.ZodOptional<z.ZodNumber>;
1453
+ graderReplications: z.ZodOptional<z.ZodNumber>;
1454
+ gapAnalysis: z.ZodOptional<z.ZodBoolean>;
1455
+ apiUrl: z.ZodOptional<z.ZodString>;
1456
+ }, z.core.$strip>>;
1457
+ output: z.ZodOptional<z.ZodObject<{
1458
+ dir: z.ZodOptional<z.ZodString>;
1459
+ }, z.core.$strip>>;
1460
+ owner: z.ZodOptional<z.ZodObject<{
1461
+ team: z.ZodOptional<z.ZodString>;
1462
+ individual: z.ZodOptional<z.ZodString>;
1463
+ }, z.core.$strip>>;
1464
+ agentic: z.ZodOptional<z.ZodObject<{
1465
+ headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
1466
+ allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
1467
+ }, z.core.$strip>>;
1468
+ artifacts: z.ZodOptional<z.ZodObject<{
1469
+ enabled: z.ZodOptional<z.ZodBoolean>;
1470
+ dir: z.ZodOptional<z.ZodString>;
1471
+ exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
1472
+ }, z.core.$strip>>;
1473
+ taskSource: z.ZodOptional<z.ZodObject<{
1474
+ type: z.ZodOptional<z.ZodEnum<{
1475
+ "content-lake": "content-lake";
1476
+ repo: "repo";
1477
+ }>>;
1478
+ repoTasksPath: z.ZodOptional<z.ZodString>;
1479
+ }, z.core.$strip>>;
1442
1480
  triggers: z.ZodOptional<z.ZodObject<{
1443
1481
  pr: z.ZodOptional<z.ZodObject<{
1444
1482
  mode: z.ZodDefault<z.ZodEnum<{
@@ -427,12 +427,17 @@ const ScheduleTriggerSchema = TriggerConfigSchema.extend({
427
427
  /**
428
428
  * Documentation source configuration.
429
429
  * Defines which Sanity project holds the documentation being evaluated.
430
+ *
431
+ * `studioOrigin` (W0077 Phase 6d) replaces the retired
432
+ * `--sanity-studio-origin` CLI flag on `ailf run`. The `SANITY_STUDIO_ORIGIN`
433
+ * env var still wins over this value at resolution time.
430
434
  */
431
435
  const SourceConfigSchema = z
432
436
  .object({
433
437
  projectId: z.string().min(1).optional(),
434
438
  dataset: z.string().min(1).optional(),
435
439
  baseUrl: z.string().url().optional(),
440
+ studioOrigin: z.string().url().optional(),
436
441
  })
437
442
  .optional();
438
443
  /**
@@ -445,6 +450,121 @@ const ReportStoreConfigSchema = z
445
450
  dataset: z.string().min(1),
446
451
  })
447
452
  .optional();
453
+ /**
454
+ * Publish policy. Controls when `ailf run` writes a report to the Content
455
+ * Lake without an explicit `--publish` / `--no-publish` flag.
456
+ *
457
+ * - `auto: "always"` — publish any run with a configured report store
458
+ * - `auto: "full-runs"` — publish non-debug runs (default)
459
+ * - `auto: "never"` — never auto-publish; users must pass --publish
460
+ *
461
+ * `tag` is a default value for `--publish-tag` when not passed at the CLI.
462
+ *
463
+ * @see docs/design-docs/pipeline-command-surface.md §5.3
464
+ */
465
+ const PublishConfigSchema = z
466
+ .object({
467
+ auto: z.enum(["always", "full-runs", "never"]).optional(),
468
+ tag: z.string().optional(),
469
+ })
470
+ .optional();
471
+ /**
472
+ * Execution-tier configuration. Per-environment values that the four
473
+ * retired CLI flags used to set: concurrency, grader replications, gap
474
+ * analysis toggle, and the AILF API URL.
475
+ *
476
+ * @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6b)
477
+ */
478
+ const ExecutionConfigSchema = z
479
+ .object({
480
+ concurrency: z.number().int().positive().optional(),
481
+ graderReplications: z.number().int().positive().optional(),
482
+ gapAnalysis: z.boolean().optional(),
483
+ apiUrl: z.string().url().optional(),
484
+ })
485
+ .optional();
486
+ /**
487
+ * Task-source configuration (W0077 Phase 6h). Replaces the retired
488
+ * `--task-source` and `--repo-tasks-path` CLI flags on `ailf run`.
489
+ *
490
+ * - `type` — `content-lake` (default) or `repo`. When `repo`,
491
+ * tasks load from `repoTasksPath` (or
492
+ * `<cwd>/.ailf/tasks/` if unset).
493
+ * - `repoTasksPath` — optional explicit path. Resolved relative to the
494
+ * caller's cwd. Required to exist on disk.
495
+ *
496
+ * No env-var fallback today; cascade is config-file → built-in default.
497
+ */
498
+ const TaskSourceConfigSchema = z
499
+ .object({
500
+ type: z.enum(["content-lake", "repo"]).optional(),
501
+ repoTasksPath: z.string().min(1).optional(),
502
+ })
503
+ .optional();
504
+ /**
505
+ * Artifact-writer configuration (W0077 Phase 6g). Replaces the retired
506
+ * `--no-artifacts`, `--artifacts-dir`, and `--artifacts-exclude` CLI flags
507
+ * on `ailf run`. The `AILF_ARTIFACTS_DIR` env var still wins over
508
+ * `artifacts.dir` at resolution time. `artifacts.enabled` defaults to
509
+ * `true` (writers attached); set `false` to disable all writers (mirrors
510
+ * the legacy `--no-artifacts` semantics).
511
+ *
512
+ * Other commands (`ailf runs export`, etc.) keep their `--artifacts-dir`
513
+ * flag — that's a "read from this directory" override, distinct from the
514
+ * pipeline's write-side `artifacts.dir`.
515
+ */
516
+ const ArtifactsConfigSchema = z
517
+ .object({
518
+ enabled: z.boolean().optional(),
519
+ dir: z.string().min(1).optional(),
520
+ exclude: z.array(z.string().min(1)).optional(),
521
+ })
522
+ .optional();
523
+ /**
524
+ * Owner attribution (W0077 Phase 6f). Replaces the retired `--owner-team`
525
+ * and `--owner-individual` CLI flags. Both feed the D0037 caller envelope
526
+ * that surfaces in remote-mode runs. Env vars `AILF_OWNER_TEAM` and
527
+ * `AILF_OWNER_INDIVIDUAL` still win over these values at resolution time.
528
+ */
529
+ const OwnerConfigSchema = z
530
+ .object({
531
+ team: z.string().min(1).optional(),
532
+ individual: z.string().min(1).optional(),
533
+ })
534
+ .optional();
535
+ /**
536
+ * Agentic-mode configuration (W0077 Phase 6f). Replaces the retired
537
+ * `--header` and `--allowed-origin` CLI flags. `headers` is a key/value
538
+ * object (mirrors `DOC_HEADERS` env-var JSON shape); `allowedOrigins` is a
539
+ * list of origin globs. The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env
540
+ * vars still apply downstream as additive merges.
541
+ */
542
+ const AgenticConfigSchema = z
543
+ .object({
544
+ headers: z.record(z.string(), z.string()).optional(),
545
+ allowedOrigins: z.array(z.string().min(1)).optional(),
546
+ })
547
+ .optional();
548
+ /**
549
+ * Output-directory configuration. Replaces the retired `--output-dir`
550
+ * CLI flag on `ailf run`. Resolution order:
551
+ *
552
+ * .ailf/config.yaml `output.dir` > built-in default
553
+ *
554
+ * Path is resolved relative to the caller's cwd. The built-in default is
555
+ * `<cwd>/.ailf/results/latest/` (see resolve-output-dir.ts). Other commands
556
+ * (`ailf publish`, `ailf pr-comment`, etc.) keep their `--output-dir`
557
+ * flag — that's a "read from this directory" override, distinct from the
558
+ * pipeline's write-side `output.dir`.
559
+ *
560
+ * @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6c)
561
+ * @see docs/design-docs/output-dir-routing.md
562
+ */
563
+ const OutputConfigSchema = z
564
+ .object({
565
+ dir: z.string().min(1).optional(),
566
+ })
567
+ .optional();
448
568
  /**
449
569
  * Zod schema for .ailf/config.yaml — controls documentation source,
450
570
  * report destination, and trigger behavior for evaluations from an
@@ -453,6 +573,13 @@ const ReportStoreConfigSchema = z
453
573
  export const RepoConfigSchema = z.object({
454
574
  source: SourceConfigSchema,
455
575
  reportStore: ReportStoreConfigSchema,
576
+ publish: PublishConfigSchema,
577
+ execution: ExecutionConfigSchema,
578
+ output: OutputConfigSchema,
579
+ owner: OwnerConfigSchema,
580
+ agentic: AgenticConfigSchema,
581
+ artifacts: ArtifactsConfigSchema,
582
+ taskSource: TaskSourceConfigSchema,
456
583
  triggers: z
457
584
  .object({
458
585
  pr: TriggerConfigSchema.optional(),
@@ -0,0 +1,39 @@
1
+ /**
2
+ * cli-program.ts — pure factory for the AILF Commander program.
3
+ *
4
+ * Splits the program construction out of cli.ts so the CLI is testable
5
+ * in-process. cli.ts owns bootstrap side effects (dotenv loading,
6
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
7
+ * module owns command wiring.
8
+ *
9
+ * The W0078 M4 black-box harness imports `buildCliProgram()` directly so
10
+ * tests can construct a fresh program, attach `exitOverride()`, capture
11
+ * stdout/stderr, and parse a synthetic argv — all without spawning a
12
+ * subprocess.
13
+ *
14
+ * @see packages/eval/src/__tests__/cli-harness/run-cli.ts
15
+ */
16
+ import { Command } from "commander";
17
+ /**
18
+ * Options for `buildCliProgram`.
19
+ */
20
+ export interface BuildCliProgramOptions {
21
+ /**
22
+ * Path to the eval package root (the directory containing package.json).
23
+ * Used to resolve the version string and as the root passed to the
24
+ * `--explain` handler.
25
+ */
26
+ evalRoot: string;
27
+ }
28
+ /**
29
+ * Construct the Commander program with every subcommand registered.
30
+ *
31
+ * Pure factory — no I/O beyond reading package.json for the version, no
32
+ * `process.exit()`, no `process.argv` access. Tests can call this and
33
+ * attach `program.exitOverride()` before parsing to capture exit codes
34
+ * instead of terminating the process.
35
+ *
36
+ * Registration order determines group display order in `--help`. Commands
37
+ * within a group appear in the order they're added.
38
+ */
39
+ export declare function buildCliProgram(opts: BuildCliProgramOptions): Command;
@@ -0,0 +1,137 @@
1
+ /**
2
+ * cli-program.ts — pure factory for the AILF Commander program.
3
+ *
4
+ * Splits the program construction out of cli.ts so the CLI is testable
5
+ * in-process. cli.ts owns bootstrap side effects (dotenv loading,
6
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync); this
7
+ * module owns command wiring.
8
+ *
9
+ * The W0078 M4 black-box harness imports `buildCliProgram()` directly so
10
+ * tests can construct a fresh program, attach `exitOverride()`, capture
11
+ * stdout/stderr, and parse a synthetic argv — all without spawning a
12
+ * subprocess.
13
+ *
14
+ * @see packages/eval/src/__tests__/cli-harness/run-cli.ts
15
+ */
16
+ import { Command } from "commander";
17
+ import { readFileSync } from "node:fs";
18
+ import { resolve } from "node:path";
19
+ import { CommandGroup, configureProgram } from "./commands/shared/help.js";
20
+ import { createAgentReportCommand } from "./commands/agent-report.js";
21
+ import { createBaselineCommand } from "./commands/baseline.js";
22
+ import { createCacheCommand } from "./commands/cache.js";
23
+ import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
24
+ import { createCheckStalenessCommand } from "./commands/check-staleness.js";
25
+ import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
26
+ import { createCompareCommand } from "./commands/compare.js";
27
+ import { createCompletionCommand } from "./commands/completion.js";
28
+ import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
29
+ import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
30
+ import { createEvalCommand } from "./commands/eval.js";
31
+ import { createFetchDocsCommand } from "./commands/fetch-docs.js";
32
+ import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
33
+ import { createGraderCommand } from "./commands/grader/index.js";
34
+ import { createInitCommand } from "./commands/init.js";
35
+ import { createInteractiveCommand } from "./commands/interactive.js";
36
+ import { createLookupDocCommand } from "./commands/lookup-doc.js";
37
+ import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
38
+ import { createPrCommentCommand } from "./commands/pr-comment.js";
39
+ import { createPublishCommand } from "./commands/publish.js";
40
+ import { createReadinessReportCommand } from "./commands/readiness-report.js";
41
+ import { createRunCommand } from "./commands/run.js";
42
+ import { createRunsCommand } from "./commands/runs.js";
43
+ import { createValidateConfigCommand } from "./commands/validate.js";
44
+ import { createValidateTasksCommand } from "./commands/validate-tasks.js";
45
+ import { createWebhookServerCommand } from "./commands/webhook-server.js";
46
+ import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
47
+ /**
48
+ * Construct the Commander program with every subcommand registered.
49
+ *
50
+ * Pure factory — no I/O beyond reading package.json for the version, no
51
+ * `process.exit()`, no `process.argv` access. Tests can call this and
52
+ * attach `program.exitOverride()` before parsing to capture exit codes
53
+ * instead of terminating the process.
54
+ *
55
+ * Registration order determines group display order in `--help`. Commands
56
+ * within a group appear in the order they're added.
57
+ */
58
+ export function buildCliProgram(opts) {
59
+ const { evalRoot } = opts;
60
+ const pkgPath = resolve(evalRoot, "package.json");
61
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
62
+ const program = new Command()
63
+ .name("ailf")
64
+ .description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
65
+ .version(pkg.version)
66
+ .option("-v, --verbose", "Increase log output")
67
+ .option("-q, --quiet", "Suppress non-error output")
68
+ .option("--dotenv <path>", "Override default .env file path")
69
+ .option("--explain", "Show execution plan without running")
70
+ .option("--format <fmt>", "Output format for --explain (console, json)", "console")
71
+ .option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
72
+ configureProgram(program);
73
+ // Global --explain hook — intercepts any command before execution
74
+ program.hook("preAction", async (thisCommand, actionCommand) => {
75
+ const globalOpts = thisCommand.opts();
76
+ if (!globalOpts.explain)
77
+ return;
78
+ const { handleExplain } = await import("./commands/explain-handler.js");
79
+ try {
80
+ await handleExplain(actionCommand, globalOpts.yes ?? false, evalRoot);
81
+ process.exit(0);
82
+ }
83
+ catch (err) {
84
+ // Sentinel from --yes confirmation: user wants to proceed
85
+ if (err !== null &&
86
+ typeof err === "object" &&
87
+ "__proceedArgv" in err) {
88
+ const filteredArgv = err.__proceedArgv;
89
+ console.log("\n ▸ Proceeding with execution…\n");
90
+ await program.parseAsync(filteredArgv);
91
+ return;
92
+ }
93
+ throw err;
94
+ }
95
+ });
96
+ // ── Core Workflow ──────────────────────────────────────────────────────
97
+ program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
98
+ program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
99
+ program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
100
+ program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
101
+ program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
102
+ // ── Analysis & Reports ────────────────────────────────────────────────
103
+ const reportCommand = new Command("report")
104
+ .description("Generate analysis and reporting outputs from evaluation runs")
105
+ .addCommand(createReadinessReportCommand())
106
+ .addCommand(createChronicFailuresCommand())
107
+ .addCommand(createCoverageAuditCommand())
108
+ .addCommand(createDiscoveryReportCommand())
109
+ .addCommand(createAgentReportCommand())
110
+ .addCommand(createWeeklyDigestCommand())
111
+ .addCommand(createCheckStalenessCommand());
112
+ program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
113
+ // ── Grader Reliability ────────────────────────────────────────────────
114
+ program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
115
+ // ── Setup & Configuration ─────────────────────────────────────────────
116
+ program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
117
+ const validateCommand = new Command("validate")
118
+ .description("Validate AILF configuration and task files")
119
+ .addCommand(createValidateConfigCommand())
120
+ .addCommand(createValidateTasksCommand());
121
+ program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
122
+ program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
123
+ program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
124
+ // ── Pipeline Internals ────────────────────────────────────────────────
125
+ program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
126
+ program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
127
+ program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
128
+ program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.PipelineInternals));
129
+ program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
130
+ program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
131
+ program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
132
+ // ── Developer Tools ───────────────────────────────────────────────────
133
+ program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
134
+ // Shell completion — must be registered last (needs full program tree)
135
+ program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
136
+ return program;
137
+ }
package/dist/cli.d.ts CHANGED
@@ -11,7 +11,7 @@
11
11
  * appends Quick Start examples.
12
12
  *
13
13
  * Usage:
14
- * ailf pipeline [flags] # full evaluation pipeline
14
+ * ailf run [flags] # full evaluation run
15
15
  * ailf compare [flags] # compare evaluation runs
16
16
  * ailf baseline <cmd> [flags] # baseline management
17
17
  * ailf validate [flags] # config validation
@@ -24,6 +24,12 @@
24
24
  * --dotenv <path> # override default .env path
25
25
  *
26
26
  * Dev mode (without building):
27
- * tsx src/cli.ts pipeline --debug
27
+ * tsx src/cli.ts run --debug
28
+ *
29
+ * Module split: this file owns *bootstrap side effects* (dotenv,
30
+ * retired-flag/env/cmd checks, AILF_LOG_LEVEL pre-scan, parseAsync).
31
+ * The Commander wiring lives in ./cli-program.ts so the W0078 M4 black-box
32
+ * harness can build the program in-process without firing those side
33
+ * effects.
28
34
  */
29
35
  export {};