@sanity/ailf 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/config/airbyte/ai_literacy_framework.connector.yaml +1 -1
  2. package/config/thresholds.ts +3 -3
  3. package/dist/_vendor/ailf-core/examples/index.d.ts +2 -2
  4. package/dist/_vendor/ailf-core/examples/index.js +2 -2
  5. package/dist/_vendor/ailf-core/ports/context.d.ts +0 -4
  6. package/dist/_vendor/ailf-core/schemas/eval-config.d.ts +38 -12
  7. package/dist/_vendor/ailf-core/schemas/eval-config.js +102 -22
  8. package/dist/_vendor/ailf-core/schemas/pipeline-request.d.ts +4 -6
  9. package/dist/_vendor/ailf-core/schemas/pipeline-request.js +1 -3
  10. package/dist/_vendor/ailf-core/schemas/schedules.d.ts +2 -2
  11. package/dist/_vendor/ailf-shared/run-classification.d.ts +2 -2
  12. package/dist/_vendor/ailf-shared/run-classification.js +1 -1
  13. package/dist/_vendor/ailf-shared/run-context.d.ts +1 -1
  14. package/dist/adapters/api-client/build-request.d.ts +0 -2
  15. package/dist/adapters/api-client/build-request.js +2 -6
  16. package/dist/adapters/config-sources/cli-config-adapter.d.ts +1 -1
  17. package/dist/adapters/config-sources/file-config-adapter.d.ts +1 -1
  18. package/dist/adapters/config-sources/file-config-adapter.js +38 -12
  19. package/dist/adapters/task-sources/repo-schemas.d.ts +38 -0
  20. package/dist/adapters/task-sources/repo-schemas.js +127 -0
  21. package/dist/cli.d.ts +2 -2
  22. package/dist/cli.js +134 -38
  23. package/dist/commands/agent-report.js +1 -1
  24. package/dist/commands/calculate-scores.js +0 -2
  25. package/dist/commands/check-staleness.js +1 -1
  26. package/dist/commands/chronic-failures.js +4 -4
  27. package/dist/commands/coverage-audit.js +6 -7
  28. package/dist/commands/discovery-report.js +16 -4
  29. package/dist/commands/eval.d.ts +1 -1
  30. package/dist/commands/eval.js +1 -1
  31. package/dist/commands/explain-handler.d.ts +1 -1
  32. package/dist/commands/explain-handler.js +13 -44
  33. package/dist/commands/fetch-docs.js +0 -2
  34. package/dist/commands/generate-configs.js +0 -2
  35. package/dist/commands/grader/index.js +3 -3
  36. package/dist/commands/init.d.ts +2 -2
  37. package/dist/commands/init.js +10 -9
  38. package/dist/commands/interactive.d.ts +1 -1
  39. package/dist/commands/interactive.js +8 -8
  40. package/dist/commands/pipeline-action.d.ts +1 -3
  41. package/dist/commands/pipeline-action.js +174 -140
  42. package/dist/commands/pr-comment.js +1 -3
  43. package/dist/commands/publish.d.ts +1 -1
  44. package/dist/commands/publish.js +2 -4
  45. package/dist/commands/readiness-report.js +17 -8
  46. package/dist/commands/remote-pipeline.d.ts +1 -1
  47. package/dist/commands/remote-pipeline.js +1 -3
  48. package/dist/commands/run.d.ts +64 -0
  49. package/dist/commands/{pipeline.js → run.js} +19 -30
  50. package/dist/commands/shared/help.js +4 -4
  51. package/dist/commands/shared/options.d.ts +29 -3
  52. package/dist/commands/shared/options.js +37 -13
  53. package/dist/commands/validate-tasks.js +1 -1
  54. package/dist/commands/validate.d.ts +1 -1
  55. package/dist/commands/validate.js +2 -2
  56. package/dist/commands/weekly-digest.js +3 -3
  57. package/dist/config/thresholds.ts +3 -3
  58. package/dist/orchestration/build-app-context.js +0 -2
  59. package/dist/orchestration/build-step-sequence.js +1 -11
  60. package/dist/orchestration/steps/fetch-docs-step.js +1 -1
  61. package/dist/orchestration/steps/index.d.ts +0 -2
  62. package/dist/orchestration/steps/index.js +0 -2
  63. package/dist/orchestration/steps/run-eval-step.js +1 -1
  64. package/dist/pipeline/cache.d.ts +1 -1
  65. package/dist/pipeline/map-request-to-config.js +0 -2
  66. package/dist/pipeline/plan.d.ts +2 -4
  67. package/dist/pipeline/plan.js +4 -32
  68. package/dist/pipeline/run-context.d.ts +1 -1
  69. package/dist/pipeline/run-context.js +4 -4
  70. package/dist/pipeline/validate.d.ts +1 -1
  71. package/dist/pipeline/validate.js +1 -1
  72. package/package.json +7 -7
  73. package/dist/commands/pipeline.d.ts +0 -77
  74. package/dist/orchestration/steps/discovery-report-step.d.ts +0 -13
  75. package/dist/orchestration/steps/discovery-report-step.js +0 -62
  76. package/dist/orchestration/steps/readiness-step.d.ts +0 -13
  77. package/dist/orchestration/steps/readiness-step.js +0 -98
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * FileConfigAdapter — resolves pipeline config from a local config file.
3
3
  *
4
- * Enables `ailf pipeline --config <path>` to load all pipeline options
4
+ * Enables `ailf run --config <path>` to load all pipeline options
5
5
  * from a file instead of CLI flags. Supports multiple formats in
6
6
  * priority order:
7
7
  *
@@ -91,9 +91,16 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
91
91
  }
92
92
  // Normalize legacy mode names (e.g., "full" → literacy + variant)
93
93
  const normalized = normalizeMode(config.mode ?? "literacy");
94
+ // Output directory (W0077 Phase 6c) — resolve `output.dir` relative to the
95
+ // rootDir (the caller's workspace, set by the FileConfigAdapter caller).
96
+ // When unset, fall back to <rootDir>/.ailf/results/latest/ to mirror the
97
+ // CLI's default. This matches `resolveOutputDir` for the auto-load path.
98
+ const outputDir = config.output?.dir
99
+ ? resolve(rootDir, config.output.dir)
100
+ : resolve(rootDir, ".ailf", "results", "latest");
94
101
  return {
95
102
  rootDir,
96
- outputDir: resolve(rootDir, "results", "latest"),
103
+ outputDir,
97
104
  mode: normalized.mode,
98
105
  variant: normalized.variant,
99
106
  noAutoScope: config.noAutoScope ?? false,
@@ -107,21 +114,40 @@ function mapEvalConfigToResolvedConfig(config, rootDir) {
107
114
  compareEnabled: config.compare ?? false,
108
115
  compareThreshold: config.compareThreshold,
109
116
  compareBaseline: config.compareBaseline,
110
- gapAnalysisEnabled: config.gapAnalysis ?? true,
111
- readinessEnabled: config.readiness ?? false,
112
- discoveryReportEnabled: config.discoveryReport ?? false,
113
- publishEnabled: config.publish ?? false,
114
- publishTag: config.publishTag,
117
+ gapAnalysisEnabled: config.execution?.gapAnalysis ?? true,
118
+ // W0077 Phase 4 — `publish` is now a policy object. Map the auto value
119
+ // directly to a boolean for the file-config path; the runtime
120
+ // smart-default logic in pipeline-action.ts isn't relevant here because
121
+ // the user has explicitly handed us a config file.
122
+ publishEnabled: config.publish?.auto === "never"
123
+ ? false
124
+ : config.publish?.auto !== undefined,
125
+ publishTag: config.publish?.tag,
115
126
  noCache: config.noCache ?? false,
116
127
  noRemoteCache: config.noRemoteCache ?? false,
117
- graderReplications: config.graderReplications,
128
+ graderReplications: config.execution?.graderReplications,
118
129
  urls: config.urls,
119
- headers: config.headers,
120
- allowedOrigins: config.allowedOrigins,
130
+ headers: config.agentic?.headers,
131
+ allowedOrigins: config.agentic?.allowedOrigins,
121
132
  searchMode: config.searchMode ?? "open",
122
- concurrency: config.concurrency,
133
+ concurrency: config.execution?.concurrency,
123
134
  remote: false,
124
- apiUrl: "https://ailf-api.sanity.build",
135
+ apiUrl: config.execution?.apiUrl ?? "https://ailf-api.sanity.build",
136
+ // W0077 Phase 6g — artifact writer settings. `enabled: false` flips
137
+ // `artifactsDisabled` so composition-root selects the NoOp writer.
138
+ artifactsDisabled: config.artifacts?.enabled === false ? true : undefined,
139
+ artifactsDir: config.artifacts?.dir
140
+ ? resolve(rootDir, config.artifacts.dir)
141
+ : undefined,
142
+ artifactsExclude: config.artifacts?.exclude,
143
+ // W0077 Phase 6h — task-source selection. Default is content-lake
144
+ // (signaled by `taskSourceType` undefined); when `repo`, the
145
+ // composition-root resolves `repoTasksPath` (defaulting to
146
+ // `<rootDir>/.ailf/tasks/` when unset).
147
+ taskSourceType: config.taskSource?.type === "repo" ? "repo" : undefined,
148
+ repoTasksPath: config.taskSource?.repoTasksPath
149
+ ? resolve(rootDir, config.taskSource.repoTasksPath)
150
+ : undefined,
125
151
  presets: config.presets,
126
152
  };
127
153
  }
@@ -1434,11 +1434,49 @@ export declare const RepoConfigSchema: z.ZodObject<{
1434
1434
  projectId: z.ZodOptional<z.ZodString>;
1435
1435
  dataset: z.ZodOptional<z.ZodString>;
1436
1436
  baseUrl: z.ZodOptional<z.ZodString>;
1437
+ studioOrigin: z.ZodOptional<z.ZodString>;
1437
1438
  }, z.core.$strip>>;
1438
1439
  reportStore: z.ZodOptional<z.ZodObject<{
1439
1440
  projectId: z.ZodString;
1440
1441
  dataset: z.ZodString;
1441
1442
  }, z.core.$strip>>;
1443
+ publish: z.ZodOptional<z.ZodObject<{
1444
+ auto: z.ZodOptional<z.ZodEnum<{
1445
+ never: "never";
1446
+ always: "always";
1447
+ "full-runs": "full-runs";
1448
+ }>>;
1449
+ tag: z.ZodOptional<z.ZodString>;
1450
+ }, z.core.$strip>>;
1451
+ execution: z.ZodOptional<z.ZodObject<{
1452
+ concurrency: z.ZodOptional<z.ZodNumber>;
1453
+ graderReplications: z.ZodOptional<z.ZodNumber>;
1454
+ gapAnalysis: z.ZodOptional<z.ZodBoolean>;
1455
+ apiUrl: z.ZodOptional<z.ZodString>;
1456
+ }, z.core.$strip>>;
1457
+ output: z.ZodOptional<z.ZodObject<{
1458
+ dir: z.ZodOptional<z.ZodString>;
1459
+ }, z.core.$strip>>;
1460
+ owner: z.ZodOptional<z.ZodObject<{
1461
+ team: z.ZodOptional<z.ZodString>;
1462
+ individual: z.ZodOptional<z.ZodString>;
1463
+ }, z.core.$strip>>;
1464
+ agentic: z.ZodOptional<z.ZodObject<{
1465
+ headers: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
1466
+ allowedOrigins: z.ZodOptional<z.ZodArray<z.ZodString>>;
1467
+ }, z.core.$strip>>;
1468
+ artifacts: z.ZodOptional<z.ZodObject<{
1469
+ enabled: z.ZodOptional<z.ZodBoolean>;
1470
+ dir: z.ZodOptional<z.ZodString>;
1471
+ exclude: z.ZodOptional<z.ZodArray<z.ZodString>>;
1472
+ }, z.core.$strip>>;
1473
+ taskSource: z.ZodOptional<z.ZodObject<{
1474
+ type: z.ZodOptional<z.ZodEnum<{
1475
+ "content-lake": "content-lake";
1476
+ repo: "repo";
1477
+ }>>;
1478
+ repoTasksPath: z.ZodOptional<z.ZodString>;
1479
+ }, z.core.$strip>>;
1442
1480
  triggers: z.ZodOptional<z.ZodObject<{
1443
1481
  pr: z.ZodOptional<z.ZodObject<{
1444
1482
  mode: z.ZodDefault<z.ZodEnum<{
@@ -427,12 +427,17 @@ const ScheduleTriggerSchema = TriggerConfigSchema.extend({
427
427
  /**
428
428
  * Documentation source configuration.
429
429
  * Defines which Sanity project holds the documentation being evaluated.
430
+ *
431
+ * `studioOrigin` (W0077 Phase 6d) replaces the retired
432
+ * `--sanity-studio-origin` CLI flag on `ailf run`. The `SANITY_STUDIO_ORIGIN`
433
+ * env var still wins over this value at resolution time.
430
434
  */
431
435
  const SourceConfigSchema = z
432
436
  .object({
433
437
  projectId: z.string().min(1).optional(),
434
438
  dataset: z.string().min(1).optional(),
435
439
  baseUrl: z.string().url().optional(),
440
+ studioOrigin: z.string().url().optional(),
436
441
  })
437
442
  .optional();
438
443
  /**
@@ -445,6 +450,121 @@ const ReportStoreConfigSchema = z
445
450
  dataset: z.string().min(1),
446
451
  })
447
452
  .optional();
453
+ /**
454
+ * Publish policy. Controls when `ailf run` writes a report to the Content
455
+ * Lake without an explicit `--publish` / `--no-publish` flag.
456
+ *
457
+ * - `auto: "always"` — publish any run with a configured report store
458
+ * - `auto: "full-runs"` — publish non-debug runs (default)
459
+ * - `auto: "never"` — never auto-publish; users must pass --publish
460
+ *
461
+ * `tag` is a default value for `--publish-tag` when not passed at the CLI.
462
+ *
463
+ * @see docs/design-docs/pipeline-command-surface.md §5.3
464
+ */
465
+ const PublishConfigSchema = z
466
+ .object({
467
+ auto: z.enum(["always", "full-runs", "never"]).optional(),
468
+ tag: z.string().optional(),
469
+ })
470
+ .optional();
471
+ /**
472
+ * Execution-tier configuration. Per-environment values that the four
473
+ * retired CLI flags used to set: concurrency, grader replications, gap
474
+ * analysis toggle, and the AILF API URL.
475
+ *
476
+ * @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6b)
477
+ */
478
+ const ExecutionConfigSchema = z
479
+ .object({
480
+ concurrency: z.number().int().positive().optional(),
481
+ graderReplications: z.number().int().positive().optional(),
482
+ gapAnalysis: z.boolean().optional(),
483
+ apiUrl: z.string().url().optional(),
484
+ })
485
+ .optional();
486
+ /**
487
+ * Task-source configuration (W0077 Phase 6h). Replaces the retired
488
+ * `--task-source` and `--repo-tasks-path` CLI flags on `ailf run`.
489
+ *
490
+ * - `type` — `content-lake` (default) or `repo`. When `repo`,
491
+ * tasks load from `repoTasksPath` (or
492
+ * `<cwd>/.ailf/tasks/` if unset).
493
+ * - `repoTasksPath` — optional explicit path. Resolved relative to the
494
+ * caller's cwd. Required to exist on disk.
495
+ *
496
+ * No env-var fallback today; cascade is config-file → built-in default.
497
+ */
498
+ const TaskSourceConfigSchema = z
499
+ .object({
500
+ type: z.enum(["content-lake", "repo"]).optional(),
501
+ repoTasksPath: z.string().min(1).optional(),
502
+ })
503
+ .optional();
504
+ /**
505
+ * Artifact-writer configuration (W0077 Phase 6g). Replaces the retired
506
+ * `--no-artifacts`, `--artifacts-dir`, and `--artifacts-exclude` CLI flags
507
+ * on `ailf run`. The `AILF_ARTIFACTS_DIR` env var still wins over
508
+ * `artifacts.dir` at resolution time. `artifacts.enabled` defaults to
509
+ * `true` (writers attached); set `false` to disable all writers (mirrors
510
+ * the legacy `--no-artifacts` semantics).
511
+ *
512
+ * Other commands (`ailf runs export`, etc.) keep their `--artifacts-dir`
513
+ * flag — that's a "read from this directory" override, distinct from the
514
+ * pipeline's write-side `artifacts.dir`.
515
+ */
516
+ const ArtifactsConfigSchema = z
517
+ .object({
518
+ enabled: z.boolean().optional(),
519
+ dir: z.string().min(1).optional(),
520
+ exclude: z.array(z.string().min(1)).optional(),
521
+ })
522
+ .optional();
523
+ /**
524
+ * Owner attribution (W0077 Phase 6f). Replaces the retired `--owner-team`
525
+ * and `--owner-individual` CLI flags. Both feed the D0037 caller envelope
526
+ * that surfaces in remote-mode runs. Env vars `AILF_OWNER_TEAM` and
527
+ * `AILF_OWNER_INDIVIDUAL` still win over these values at resolution time.
528
+ */
529
+ const OwnerConfigSchema = z
530
+ .object({
531
+ team: z.string().min(1).optional(),
532
+ individual: z.string().min(1).optional(),
533
+ })
534
+ .optional();
535
+ /**
536
+ * Agentic-mode configuration (W0077 Phase 6f). Replaces the retired
537
+ * `--header` and `--allowed-origin` CLI flags. `headers` is a key/value
538
+ * object (mirrors `DOC_HEADERS` env-var JSON shape); `allowedOrigins` is a
539
+ * list of origin globs. The `DOC_HEADERS` and `DOC_ALLOWED_ORIGIN(S)` env
540
+ * vars still apply downstream as additive merges.
541
+ */
542
+ const AgenticConfigSchema = z
543
+ .object({
544
+ headers: z.record(z.string(), z.string()).optional(),
545
+ allowedOrigins: z.array(z.string().min(1)).optional(),
546
+ })
547
+ .optional();
548
+ /**
549
+ * Output-directory configuration. Replaces the retired `--output-dir`
550
+ * CLI flag on `ailf run`. Resolution order:
551
+ *
552
+ * .ailf/config.yaml `output.dir` > built-in default
553
+ *
554
+ * Path is resolved relative to the caller's cwd. The built-in default is
555
+ * `<cwd>/.ailf/results/latest/` (see resolve-output-dir.ts). Other commands
556
+ * (`ailf publish`, `ailf pr-comment`, etc.) keep their `--output-dir`
557
+ * flag — that's a "read from this directory" override, distinct from the
558
+ * pipeline's write-side `output.dir`.
559
+ *
560
+ * @see docs/design-docs/pipeline-command-surface.md §5.7 (W0077 Phase 6c)
561
+ * @see docs/design-docs/output-dir-routing.md
562
+ */
563
+ const OutputConfigSchema = z
564
+ .object({
565
+ dir: z.string().min(1).optional(),
566
+ })
567
+ .optional();
448
568
  /**
449
569
  * Zod schema for .ailf/config.yaml — controls documentation source,
450
570
  * report destination, and trigger behavior for evaluations from an
@@ -453,6 +573,13 @@ const ReportStoreConfigSchema = z
453
573
  export const RepoConfigSchema = z.object({
454
574
  source: SourceConfigSchema,
455
575
  reportStore: ReportStoreConfigSchema,
576
+ publish: PublishConfigSchema,
577
+ execution: ExecutionConfigSchema,
578
+ output: OutputConfigSchema,
579
+ owner: OwnerConfigSchema,
580
+ agentic: AgenticConfigSchema,
581
+ artifacts: ArtifactsConfigSchema,
582
+ taskSource: TaskSourceConfigSchema,
456
583
  triggers: z
457
584
  .object({
458
585
  pr: TriggerConfigSchema.optional(),
package/dist/cli.d.ts CHANGED
@@ -11,7 +11,7 @@
11
11
  * appends Quick Start examples.
12
12
  *
13
13
  * Usage:
14
- * ailf pipeline [flags] # full evaluation pipeline
14
+ * ailf run [flags] # full evaluation run
15
15
  * ailf compare [flags] # compare evaluation runs
16
16
  * ailf baseline <cmd> [flags] # baseline management
17
17
  * ailf validate [flags] # config validation
@@ -24,6 +24,6 @@
24
24
  * --dotenv <path> # override default .env path
25
25
  *
26
26
  * Dev mode (without building):
27
- * tsx src/cli.ts pipeline --debug
27
+ * tsx src/cli.ts run --debug
28
28
  */
29
29
  export {};
package/dist/cli.js CHANGED
@@ -13,7 +13,7 @@
13
13
  * appends Quick Start examples.
14
14
  *
15
15
  * Usage:
16
- * ailf pipeline [flags] # full evaluation pipeline
16
+ * ailf run [flags] # full evaluation run
17
17
  * ailf compare [flags] # compare evaluation runs
18
18
  * ailf baseline <cmd> [flags] # baseline management
19
19
  * ailf validate [flags] # config validation
@@ -26,7 +26,7 @@
26
26
  * --dotenv <path> # override default .env path
27
27
  *
28
28
  * Dev mode (without building):
29
- * tsx src/cli.ts pipeline --debug
29
+ * tsx src/cli.ts run --debug
30
30
  */
31
31
  import { config as dotenvConfig } from "dotenv";
32
32
  import { existsSync, readFileSync } from "fs";
@@ -76,22 +76,75 @@ else if (process.argv.includes("--quiet") || process.argv.includes("-q")) {
76
76
  process.env.AILF_LOG_LEVEL = "quiet";
77
77
  }
78
78
  // ---------------------------------------------------------------------------
79
- // W0052 — hard-error on retired capture flags and env vars.
80
- // --------------------------------------------------------------------------
81
- // The legacy collector has been removed. Callers still using
82
- // --capture / --capture-dir / --no-capture-compress / --no-capture-extras
83
- // or AILF_CAPTURE* / AILF_LEGACY_COLLECTOR / AILF_UNIFIED_ARTIFACTS must
84
- // migrate to --artifacts-dir / --no-artifacts / --artifacts-exclude. We
85
- // print a clear pointer so failures don't bubble up as opaque "unknown
86
- // option" errors from Commander.
79
+ // Hard-error on retired flags, env vars, and commands with a migration hint.
87
80
  // ---------------------------------------------------------------------------
88
- const RETIRED_FLAGS = [
89
- "--capture",
90
- "--capture-dir",
91
- "--no-capture-compress",
92
- "--no-capture-extras",
93
- "--capture-exclude",
94
- ];
81
+ // Each entry maps an old identifier to the message shown when it's seen, so
82
+ // failures don't bubble up as opaque "unknown option" errors from Commander.
83
+ // W0052 retired the legacy artifact collector; W0075 retired the --skip-*
84
+ // negation prefix, the --debug-{n,pattern,sample} filter flags, and several
85
+ // top-level report/validator commands that were consolidated into umbrellas.
86
+ // ---------------------------------------------------------------------------
87
+ const RETIRED_CAPTURE_HINT = " Use --artifacts-dir / --no-artifacts / --artifacts-exclude instead.\n" +
88
+ " See docs/guides/cli-guide.md and docs/decisions/D0033-unified-run-anchored-artifact-capture.md.";
89
+ const RETIRED_FLAG_HINTS = {
90
+ "--capture": RETIRED_CAPTURE_HINT,
91
+ "--capture-dir": RETIRED_CAPTURE_HINT,
92
+ "--no-capture-compress": RETIRED_CAPTURE_HINT,
93
+ "--no-capture-extras": RETIRED_CAPTURE_HINT,
94
+ "--capture-exclude": RETIRED_CAPTURE_HINT,
95
+ "--skip-fetch": " Use --no-fetch instead. See docs/design-docs/cli-naming-convention.md (W0075).",
96
+ "--skip-eval": " Use --no-eval instead. See docs/design-docs/cli-naming-convention.md (W0075).",
97
+ "--debug-n": " Use --filter-first-n instead. See docs/design-docs/cli-naming-convention.md (W0075).",
98
+ "--debug-pattern": " Use --filter-pattern instead. See docs/design-docs/cli-naming-convention.md (W0075).",
99
+ "--debug-sample": " Use --filter-sample instead. See docs/design-docs/cli-naming-convention.md (W0075).",
100
+ "--output-format": " Use --format instead. See docs/design-docs/cli-naming-convention.md (W0075).",
101
+ "--artifacts-dry-run": " Use --no-artifacts-write instead. See docs/design-docs/cli-naming-convention.md (W0075).",
102
+ "--readiness": " Use `ailf report readiness --from-run <path>` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
103
+ "--discovery-report": " Use `ailf report discovery --from-run <path>` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
104
+ "--compare-baseline": " Use `--compare <path>` instead. `--compare` now takes an optional baseline argument. See docs/design-docs/pipeline-command-surface.md (W0077).",
105
+ "--before": " Use --before-source instead. The flag was renamed to disambiguate from baseline comparison. See docs/design-docs/pipeline-command-surface.md (W0077).",
106
+ "--concurrency": " Set `execution.concurrency` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
107
+ "--grader-replications": " Set `execution.graderReplications` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
108
+ "--no-gap-analysis": " Set `execution.gapAnalysis: false` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
109
+ "--api-url": " Set `execution.apiUrl` in .ailf/config.yaml or use the AILF_API_URL env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6b).",
110
+ "--report-dataset": " Set `reportStore.dataset` in .ailf/config.yaml or use the AILF_REPORT_DATASET env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6e).",
111
+ "--report-project": " Set `reportStore.projectId` in .ailf/config.yaml or use the AILF_REPORT_PROJECT_ID env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6e).",
112
+ "--owner-team": " Set `owner.team` in .ailf/config.yaml or use the AILF_OWNER_TEAM env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
113
+ "--owner-individual": " Set `owner.individual` in .ailf/config.yaml or use the AILF_OWNER_INDIVIDUAL env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
114
+ "--header": " Set `agentic.headers` (key/value object) in .ailf/config.yaml or use the DOC_HEADERS env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
115
+ "--allowed-origin": " Set `agentic.allowedOrigins` (list of globs) in .ailf/config.yaml or use the DOC_ALLOWED_ORIGINS env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6f).",
116
+ "--task-source": " Set `taskSource.type` (content-lake | repo) in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6h).",
117
+ "--repo-tasks-path": " Set `taskSource.repoTasksPath` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6h).",
118
+ };
119
+ const RETIRED_COMMAND_HINTS = {
120
+ pipeline: " Use `ailf run` instead. See docs/design-docs/pipeline-command-surface.md (W0077).",
121
+ "validate-tasks": " Use `ailf validate tasks` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
122
+ "readiness-report": " Use `ailf report readiness` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
123
+ "chronic-failures": " Use `ailf report chronic-failures` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
124
+ "coverage-audit": " Use `ailf report coverage` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
125
+ "discovery-report": " Use `ailf report discovery` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
126
+ "agent-report": " Use `ailf report agent` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
127
+ "weekly-digest": " Use `ailf report digest` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
128
+ "check-staleness": " Use `ailf report staleness` instead. See docs/design-docs/cli-naming-convention.md (W0075).",
129
+ };
130
+ /**
131
+ * Per-subcommand retired-flag hints. Use this for flags that were retired
132
+ * from one subcommand but still exist on others (e.g. `--output-dir` is
133
+ * retired from `ailf run` but still present on `ailf publish`,
134
+ * `ailf pr-comment`, etc.). Keys are subcommand names; values share the
135
+ * same shape as `RETIRED_FLAG_HINTS`.
136
+ */
137
+ const RETIRED_FLAG_HINTS_BY_COMMAND = {
138
+ run: {
139
+ "--output-dir": " Set `output.dir` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6c).",
140
+ "--sanity-dataset": " Set `source.dataset` in .ailf/config.yaml or use the SANITY_DATASET env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
141
+ "--sanity-project": " Set `source.projectId` in .ailf/config.yaml or use the SANITY_PROJECT_ID env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
142
+ "--sanity-studio-origin": " Set `source.studioOrigin` in .ailf/config.yaml or use the SANITY_STUDIO_ORIGIN env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6d).",
143
+ "--no-artifacts": " Set `artifacts.enabled: false` in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
144
+ "--artifacts-dir": " Set `artifacts.dir` in .ailf/config.yaml or use the AILF_ARTIFACTS_DIR env var. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
145
+ "--artifacts-exclude": " Set `artifacts.exclude` (list of artifact-type names) in .ailf/config.yaml instead. See docs/design-docs/pipeline-command-surface.md (W0077 Phase 6g).",
146
+ },
147
+ };
95
148
  const RETIRED_ENV_VARS = [
96
149
  "AILF_CAPTURE",
97
150
  "AILF_CAPTURE_DIR",
@@ -102,15 +155,45 @@ const RETIRED_ENV_VARS = [
102
155
  "AILF_LEGACY_COLLECTOR",
103
156
  "AILF_UNIFIED_ARTIFACTS",
104
157
  ];
158
+ /**
159
+ * Identify the subcommand the user invoked — the first non-flag arg after
160
+ * `ailf` (argv[0]=node, argv[1]=cli.ts). Returns undefined if none.
161
+ */
162
+ function findInvokedSubcommand() {
163
+ for (let i = 2; i < process.argv.length; i++) {
164
+ const arg = process.argv[i];
165
+ if (!arg.startsWith("-"))
166
+ return arg;
167
+ }
168
+ return undefined;
169
+ }
105
170
  function findRetiredFlag() {
171
+ const subcommand = findInvokedSubcommand();
172
+ const subcommandHints = subcommand
173
+ ? RETIRED_FLAG_HINTS_BY_COMMAND[subcommand]
174
+ : undefined;
106
175
  for (const arg of process.argv) {
107
176
  const bare = arg.split("=")[0];
108
- if (RETIRED_FLAGS.includes(bare)) {
109
- return bare;
177
+ if (subcommandHints && bare in subcommandHints) {
178
+ return { flag: bare, hint: subcommandHints[bare] };
179
+ }
180
+ if (bare in RETIRED_FLAG_HINTS) {
181
+ return { flag: bare, hint: RETIRED_FLAG_HINTS[bare] };
110
182
  }
111
183
  }
112
184
  return undefined;
113
185
  }
186
+ function findRetiredCommand() {
187
+ // The first non-flag argument after `ailf` (argv[0]=node, argv[1]=cli.ts).
188
+ for (let i = 2; i < process.argv.length; i++) {
189
+ const arg = process.argv[i];
190
+ if (!arg.startsWith("-") && arg in RETIRED_COMMAND_HINTS)
191
+ return arg;
192
+ if (!arg.startsWith("-"))
193
+ return undefined;
194
+ }
195
+ return undefined;
196
+ }
114
197
  function findRetiredEnv() {
115
198
  for (const name of RETIRED_ENV_VARS) {
116
199
  if (process.env[name] !== undefined)
@@ -119,14 +202,21 @@ function findRetiredEnv() {
119
202
  return undefined;
120
203
  }
121
204
  const retiredFlag = findRetiredFlag();
205
+ const retiredCommand = findRetiredCommand();
122
206
  const retiredEnv = findRetiredEnv();
123
- if (retiredFlag || retiredEnv) {
124
- const source = retiredFlag
125
- ? `flag "${retiredFlag}"`
126
- : `environment variable "${retiredEnv}"`;
127
- console.error(`❌ ${source} was retired in W0052 along with the legacy artifact collector.`);
128
- console.error(" Use --artifacts-dir / --no-artifacts / --artifacts-exclude instead.");
129
- console.error(" See docs/cli.md and docs/decisions/D0033-unified-run-anchored-artifact-capture.md.");
207
+ if (retiredFlag || retiredCommand || retiredEnv) {
208
+ if (retiredFlag) {
209
+ console.error(`❌ flag "${retiredFlag.flag}" was retired.`);
210
+ console.error(retiredFlag.hint);
211
+ }
212
+ else if (retiredCommand) {
213
+ console.error(`❌ command "${retiredCommand}" was retired.`);
214
+ console.error(RETIRED_COMMAND_HINTS[retiredCommand]);
215
+ }
216
+ else if (retiredEnv) {
217
+ console.error(`❌ environment variable "${retiredEnv}" was retired in W0052 along with the legacy artifact collector.`);
218
+ console.error(RETIRED_CAPTURE_HINT);
219
+ }
130
220
  process.exit(2);
131
221
  }
132
222
  // ---------------------------------------------------------------------------
@@ -180,8 +270,8 @@ program.hook("preAction", async (thisCommand, actionCommand) => {
180
270
  // Within each group, commands appear in the order they are added.
181
271
  // ---------------------------------------------------------------------------
182
272
  // ── Core Workflow ──────────────────────────────────────────────────────
183
- import { createPipelineCommand } from "./commands/pipeline.js";
184
- program.addCommand(createPipelineCommand().helpGroup(CommandGroup.CoreWorkflow));
273
+ import { createRunCommand } from "./commands/run.js";
274
+ program.addCommand(createRunCommand().helpGroup(CommandGroup.CoreWorkflow));
185
275
  import { createCompareCommand } from "./commands/compare.js";
186
276
  program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
187
277
  import { createBaselineCommand } from "./commands/baseline.js";
@@ -192,29 +282,35 @@ import { createRunsCommand } from "./commands/runs.js";
192
282
  program.addCommand(createRunsCommand().helpGroup(CommandGroup.CoreWorkflow));
193
283
  // ── Analysis & Reports ────────────────────────────────────────────────
194
284
  import { createReadinessReportCommand } from "./commands/readiness-report.js";
195
- program.addCommand(createReadinessReportCommand().helpGroup(CommandGroup.AnalysisReports));
196
285
  import { createChronicFailuresCommand } from "./commands/chronic-failures.js";
197
- program.addCommand(createChronicFailuresCommand().helpGroup(CommandGroup.AnalysisReports));
198
286
  import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
199
- program.addCommand(createCoverageAuditCommand().helpGroup(CommandGroup.AnalysisReports));
200
287
  import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
201
- program.addCommand(createDiscoveryReportCommand().helpGroup(CommandGroup.AnalysisReports));
202
288
  import { createAgentReportCommand } from "./commands/agent-report.js";
203
- program.addCommand(createAgentReportCommand().helpGroup(CommandGroup.AnalysisReports));
204
289
  import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
205
- program.addCommand(createWeeklyDigestCommand().helpGroup(CommandGroup.AnalysisReports));
206
290
  import { createCheckStalenessCommand } from "./commands/check-staleness.js";
207
- program.addCommand(createCheckStalenessCommand().helpGroup(CommandGroup.AnalysisReports));
291
+ const reportCommand = new Command("report")
292
+ .description("Generate analysis and reporting outputs from evaluation runs")
293
+ .addCommand(createReadinessReportCommand())
294
+ .addCommand(createChronicFailuresCommand())
295
+ .addCommand(createCoverageAuditCommand())
296
+ .addCommand(createDiscoveryReportCommand())
297
+ .addCommand(createAgentReportCommand())
298
+ .addCommand(createWeeklyDigestCommand())
299
+ .addCommand(createCheckStalenessCommand());
300
+ program.addCommand(reportCommand.helpGroup(CommandGroup.AnalysisReports));
208
301
  // ── Grader Reliability ────────────────────────────────────────────────
209
302
  import { createGraderCommand } from "./commands/grader/index.js";
210
303
  program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
211
304
  // ── Setup & Configuration ─────────────────────────────────────────────
212
305
  import { createInitCommand } from "./commands/init.js";
213
306
  program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
214
- import { createValidateCommand } from "./commands/validate.js";
215
- program.addCommand(createValidateCommand().helpGroup(CommandGroup.SetupConfig));
307
+ import { createValidateConfigCommand } from "./commands/validate.js";
216
308
  import { createValidateTasksCommand } from "./commands/validate-tasks.js";
217
- program.addCommand(createValidateTasksCommand().helpGroup(CommandGroup.SetupConfig));
309
+ const validateCommand = new Command("validate")
310
+ .description("Validate AILF configuration and task files")
311
+ .addCommand(createValidateConfigCommand())
312
+ .addCommand(createValidateTasksCommand());
313
+ program.addCommand(validateCommand.helpGroup(CommandGroup.SetupConfig));
218
314
  import { createFetchDocsCommand } from "./commands/fetch-docs.js";
219
315
  program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
220
316
  import { createCacheCommand } from "./commands/cache.js";
@@ -6,7 +6,7 @@ import { dirname, join } from "path";
6
6
  import { Command } from "commander";
7
7
  import { analyzeResults } from "../pipeline/agent-behavior-report.js";
8
8
  export function createAgentReportCommand() {
9
- return new Command("agent-report")
9
+ return new Command("agent")
10
10
  .description("Generate an agent behavior observation report from eval results")
11
11
  .argument("[results-path]", "Path to eval-results.json (default: results/latest/eval-results.json)")
12
12
  .action(async (resultsPath) => {
@@ -30,8 +30,6 @@ export function createCalculateScoresCommand() {
30
30
  skipEval: true,
31
31
  compareEnabled: false,
32
32
  gapAnalysisEnabled: false,
33
- readinessEnabled: false,
34
- discoveryReportEnabled: false,
35
33
  publishEnabled: false,
36
34
  noCache: true,
37
35
  noRemoteCache: true,
@@ -12,7 +12,7 @@
12
12
  */
13
13
  import { Command } from "commander";
14
14
  export function createCheckStalenessCommand() {
15
- return new Command("check-staleness")
15
+ return new Command("staleness")
16
16
  .description("Exit 1 if no evaluation report has been produced within the max-age window")
17
17
  .option("--max-age <days>", "Max age in days before reports are considered stale", (v) => Number.parseInt(v, 10), 3)
18
18
  .action(async (opts) => {
@@ -11,15 +11,15 @@ export function createChronicFailuresCommand() {
11
11
  return new Command("chronic-failures")
12
12
  .description("Identify tasks that error in >50% of recent evaluation runs")
13
13
  .option("--lookback <n>", "Number of recent reports to analyze", (v) => parseInt(v, 10), 10)
14
- .option("--threshold <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
15
- .option("--json", "Output raw JSON", false)
14
+ .option("--error-rate <n>", "Error rate threshold (0-1) for chronic classification", (v) => parseFloat(v), 0.5)
15
+ .option("-f, --format <fmt>", "Output format: console or json", "console")
16
16
  .action(async (opts) => {
17
17
  const reportStore = new ReportStore();
18
18
  const report = await detectChronicFailures(reportStore, {
19
19
  lookback: opts.lookback,
20
- threshold: opts.threshold,
20
+ threshold: opts.errorRate,
21
21
  });
22
- if (opts.json) {
22
+ if (opts.format === "json") {
23
23
  console.log(JSON.stringify(report, null, 2));
24
24
  }
25
25
  else {
@@ -13,10 +13,9 @@ import { createSanityLiteracyPreset } from "../pipeline/compiler/presets/index.j
13
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
14
14
  const ROOT = resolve(__dirname, "..", "..");
15
15
  export function createCoverageAuditCommand() {
16
- return new Command("coverage-audit")
16
+ return new Command("coverage")
17
17
  .description("Run documentation coverage audit against feature registry")
18
- .option("--format <fmt>", "Output format: table, md, markdown")
19
- .option("--json", "Output raw JSON", false)
18
+ .option("-f, --format <fmt>", "Output format: table, md, markdown, json", "table")
20
19
  .action(async (opts) => {
21
20
  // Build a registry with mode base + preset so coverage audit works
22
21
  // even when config/features.ts is empty (preset is source of truth).
@@ -28,17 +27,17 @@ export function createCoverageAuditCommand() {
28
27
  console.error("❌ Coverage audit failed. Ensure config/features.yaml exists and is valid.");
29
28
  process.exit(1);
30
29
  }
31
- if (opts.json) {
30
+ const isMarkdown = opts.format === "md" || opts.format === "markdown";
31
+ if (opts.format === "json") {
32
32
  console.log(JSON.stringify(report, null, 2));
33
33
  }
34
- else if (opts.format === "md" || opts.format === "markdown") {
34
+ else if (isMarkdown) {
35
35
  console.log(formatCoverageMarkdown(report));
36
36
  }
37
37
  else {
38
38
  console.log(formatCoverageConsole(report));
39
39
  }
40
- // Print document utilization stats for non-JSON console output
41
- if (!opts.json && opts.format !== "md" && opts.format !== "markdown") {
40
+ if (opts.format !== "json" && !isMarkdown) {
42
41
  const docStats = countReferencedDocs(ROOT);
43
42
  console.log("DOCUMENT UTILIZATION:");
44
43
  console.log(` ${docStats.total} unique document slugs referenced across evaluation tasks`);