@sanity/ailf 0.1.28 → 0.1.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,6 +35,8 @@ export interface ResolvedConfig {
35
35
  areas?: string[];
36
36
  /** Task ID filter */
37
37
  tasks?: string[];
38
+ /** Tag filter — tasks must have at least one matching tag */
39
+ tags?: string[];
38
40
  /** Changed doc slugs for impact scoping */
39
41
  changedDocs?: string[];
40
42
  /** Documentation source name */
@@ -112,6 +112,8 @@ export interface TaskDefinition {
112
112
  baseline?: BaselineConfig;
113
113
  /** Additional template variables beyond task (e.g., custom vars) */
114
114
  extraVars?: Record<string, unknown>;
115
+ /** Freeform labels for filtering and organization */
116
+ tags?: string[];
115
117
  }
116
118
  /** Check if a canonical doc ref resolves by slug.
117
119
  *
@@ -179,6 +179,8 @@ export interface FeatureScore {
179
179
  export interface FilterOptions {
180
180
  /** Feature areas to include (filename stems, e.g., ["groq", "frameworks"]) */
181
181
  areas?: string[];
182
+ /** Tags to include — tasks must have at least one matching tag */
183
+ tags?: string[];
182
184
  /** Specific task IDs to include (e.g., ["groq-blog-queries"]) */
183
185
  taskIds?: string[];
184
186
  }
@@ -35,6 +35,7 @@ export interface RemoteConfigSlice {
35
35
  };
36
36
  areas?: string[];
37
37
  tasks?: string[];
38
+ tags?: string[];
38
39
  changedDocs?: string[];
39
40
  source?: string;
40
41
  compareEnabled?: boolean;
@@ -167,12 +167,16 @@ function taskToInlineFormat(task) {
167
167
  if (task.baseline) {
168
168
  inline.baseline = task.baseline;
169
169
  }
170
+ if (task.tags?.length) {
171
+ inline.tags = task.tags;
172
+ }
170
173
  return inline;
171
174
  }
172
175
  function buildFilterOptions(config) {
173
176
  const areas = config.areas?.length ? config.areas : undefined;
174
177
  const taskIds = config.tasks?.length ? config.tasks : undefined;
175
- if (!areas && !taskIds)
178
+ const tags = config.tags?.length ? config.tags : undefined;
179
+ if (!areas && !taskIds && !tags)
176
180
  return undefined;
177
- return { areas, taskIds };
181
+ return { areas, taskIds, tags };
178
182
  }
@@ -32,6 +32,7 @@ const TASKS_QUERY = /* groq */ `
32
32
  && (!defined($areas) || featureArea->areaId.current in $areas)
33
33
  && (!defined($taskIds) || id.current in $taskIds)
34
34
  && (execution.enabled != false)
35
+ && (!defined($tags) || count((tags)[@ in $tags]) > 0)
35
36
  ] | order(featureArea->areaId.current asc, id.current asc) {
36
37
  "taskId": id.current,
37
38
  description,
@@ -51,6 +52,7 @@ const TASKS_QUERY = /* groq */ `
51
52
  assert,
52
53
  rawAssert,
53
54
  baseline,
55
+ tags,
54
56
  "referenceSolutionTitle": referenceSolution->title
55
57
  }
56
58
  `;
@@ -90,6 +92,7 @@ function buildGroqParams(filter) {
90
92
  areas: filter?.areas && filter.areas.length > 0
91
93
  ? filter.areas.map((a) => a.toLowerCase())
92
94
  : null,
95
+ tags: filter?.tags && filter.tags.length > 0 ? filter.tags : null,
93
96
  taskIds: filter?.taskIds && filter.taskIds.length > 0 ? filter.taskIds : null,
94
97
  };
95
98
  }
@@ -116,6 +119,21 @@ function mapToTaskDefinition(raw) {
116
119
  .map(mapCanonicalDocRef)
117
120
  .filter((d) => d !== null);
118
121
  const assertions = mapAssertions(raw.assert ?? []);
122
+ // Append raw pass-through assertions (escape hatch for arbitrary Promptfoo
123
+ // assertion types that aren't in the curated list). These bypass template
124
+ // resolution and flow directly into the expanded Promptfoo test case as
125
+ // value-based assertions. In baseline mode, buildBaselineAsserts() with
126
+ // "abbreviated" (the default) drops non-rubric assertions, so rawAssert
127
+ // entries only run in the gold variant — consistent with how regular
128
+ // value-based assertions like `contains` or `regex` behave.
129
+ const rawAssertions = (raw.rawAssert ?? [])
130
+ .filter((a) => !!a.type)
131
+ .map((a) => ({
132
+ type: a.type,
133
+ ...(a.value !== undefined ? { value: a.value } : {}),
134
+ ...(a.threshold !== undefined ? { threshold: a.threshold } : {}),
135
+ }));
136
+ const allAssertions = [...assertions, ...rawAssertions];
119
137
  const baseline = raw.baseline
120
138
  ? {
121
139
  ...(raw.baseline.enabled !== undefined
@@ -129,7 +147,7 @@ function mapToTaskDefinition(raw) {
129
147
  }
130
148
  : undefined;
131
149
  return {
132
- assertions,
150
+ assertions: allAssertions,
133
151
  canonicalDocs,
134
152
  description: raw.description,
135
153
  docCoverage: raw.docCoverage ?? false,
@@ -143,6 +161,7 @@ function mapToTaskDefinition(raw) {
143
161
  referenceSolution: "",
144
162
  taskPrompt: raw.taskPrompt,
145
163
  ...(baseline ? { baseline } : {}),
164
+ ...(raw.tags?.length ? { tags: raw.tags } : {}),
146
165
  };
147
166
  }
148
167
  /**
@@ -79,6 +79,12 @@ export class RepoTaskSource {
79
79
  if (entry.execution?.enabled === false) {
80
80
  continue;
81
81
  }
82
+ // Tag filter — skip tasks that don't match any requested tag
83
+ if (filter?.tags &&
84
+ filter.tags.length > 0 &&
85
+ (!entry.tags || !entry.tags.some((t) => filter.tags.includes(t)))) {
86
+ continue;
87
+ }
82
88
  definitions.push(mapToTaskDefinition(entry));
83
89
  }
84
90
  }
@@ -108,5 +114,6 @@ function mapToTaskDefinition(raw) {
108
114
  taskPrompt: typeof task === "string" ? task : "",
109
115
  ...(raw.baseline ? { baseline: raw.baseline } : {}),
110
116
  ...(extraVars ? { extraVars } : {}),
117
+ ...(raw.tags?.length ? { tags: raw.tags } : {}),
111
118
  };
112
119
  }
package/dist/cli.d.ts CHANGED
@@ -5,6 +5,11 @@
5
5
  * Unified command-line interface for the AI Literacy Framework.
6
6
  * All evaluation commands are exposed as subcommands under `ailf`.
7
7
  *
8
+ * Commands are organized into semantic groups (Core Workflow, Analysis &
9
+ * Reports, etc.) using Commander v14's native `helpGroup()` API. The
10
+ * custom help formatter in `./commands/shared/help.ts` adds styling and
11
+ * appends Quick Start examples.
12
+ *
8
13
  * Usage:
9
14
  * ailf pipeline [flags] # full evaluation pipeline
10
15
  * ailf compare [flags] # compare evaluation runs
package/dist/cli.js CHANGED
@@ -7,6 +7,11 @@
7
7
  * Unified command-line interface for the AI Literacy Framework.
8
8
  * All evaluation commands are exposed as subcommands under `ailf`.
9
9
  *
10
+ * Commands are organized into semantic groups (Core Workflow, Analysis &
11
+ * Reports, etc.) using Commander v14's native `helpGroup()` API. The
12
+ * custom help formatter in `./commands/shared/help.ts` adds styling and
13
+ * appends Quick Start examples.
14
+ *
10
15
  * Usage:
11
16
  * ailf pipeline [flags] # full evaluation pipeline
12
17
  * ailf compare [flags] # compare evaluation runs
@@ -74,12 +79,13 @@ else if (process.argv.includes("--quiet") || process.argv.includes("-q")) {
74
79
  // Build CLI program
75
80
  // ---------------------------------------------------------------------------
76
81
  import { Command } from "commander";
82
+ import { CommandGroup, configureProgram } from "./commands/shared/help.js";
77
83
  // Read version from package.json
78
84
  const pkgPath = resolve(ROOT, "package.json");
79
85
  const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
80
86
  const program = new Command()
81
87
  .name("ailf")
82
- .description("AI Literacy Framework — evaluate how well docs enable AI coding tools")
88
+ .description("AI Literacy Framework — evaluate how well docs enable AI coding tools\n\nMeasure whether AI coding agents can find the right documentation\nand produce correct implementations of your product features.")
83
89
  .version(pkg.version)
84
90
  .option("-v, --verbose", "Increase log output")
85
91
  .option("-q, --quiet", "Suppress non-error output")
@@ -87,6 +93,7 @@ const program = new Command()
87
93
  .option("--explain", "Show execution plan without running")
88
94
  .option("--format <fmt>", "Output format for --explain (console, json)", "console")
89
95
  .option("-y, --yes", "With --explain: show plan then prompt to confirm execution");
96
+ configureProgram(program);
90
97
  // ---------------------------------------------------------------------------
91
98
  // Global --explain hook — intercepts any command before execution
92
99
  // ---------------------------------------------------------------------------
@@ -114,85 +121,72 @@ program.hook("preAction", async (thisCommand, actionCommand) => {
114
121
  });
115
122
  // ---------------------------------------------------------------------------
116
123
  // Register commands
124
+ //
125
+ // Registration order determines group display order in --help.
126
+ // Within each group, commands appear in the order they are added.
117
127
  // ---------------------------------------------------------------------------
118
- // Pipeline the main orchestrator
128
+ // ── Core Workflow ──────────────────────────────────────────────────────
119
129
  import { createPipelineCommand } from "./commands/pipeline.js";
120
- program.addCommand(createPipelineCommand());
121
- // Compare — structured score comparison
130
+ program.addCommand(createPipelineCommand().helpGroup(CommandGroup.CoreWorkflow));
122
131
  import { createCompareCommand } from "./commands/compare.js";
123
- program.addCommand(createCompareCommand());
124
- // Baseline — save/compare/history
132
+ program.addCommand(createCompareCommand().helpGroup(CommandGroup.CoreWorkflow));
125
133
  import { createBaselineCommand } from "./commands/baseline.js";
126
- program.addCommand(createBaselineCommand());
127
- // Validate config validation
128
- import { createValidateCommand } from "./commands/validate.js";
129
- program.addCommand(createValidateCommand());
130
- // Coverage audit — feature coverage analysis
131
- import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
132
- program.addCommand(createCoverageAuditCommand());
133
- // Weekly digest — trend digest delivery
134
- import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
135
- program.addCommand(createWeeklyDigestCommand());
136
- // Readiness report — launch readiness checklist
134
+ program.addCommand(createBaselineCommand().helpGroup(CommandGroup.CoreWorkflow));
135
+ import { createPublishCommand } from "./commands/publish.js";
136
+ program.addCommand(createPublishCommand().helpGroup(CommandGroup.CoreWorkflow));
137
+ // ── Analysis & Reports ────────────────────────────────────────────────
137
138
  import { createReadinessReportCommand } from "./commands/readiness-report.js";
138
- program.addCommand(createReadinessReportCommand());
139
- // Discovery report agent discoverability analysis
139
+ program.addCommand(createReadinessReportCommand().helpGroup(CommandGroup.AnalysisReports));
140
+ import { createCoverageAuditCommand } from "./commands/coverage-audit.js";
141
+ program.addCommand(createCoverageAuditCommand().helpGroup(CommandGroup.AnalysisReports));
140
142
  import { createDiscoveryReportCommand } from "./commands/discovery-report.js";
141
- program.addCommand(createDiscoveryReportCommand());
142
- // Grader reliability tools (consistency, compare, sensitivity, validate)
143
+ program.addCommand(createDiscoveryReportCommand().helpGroup(CommandGroup.AnalysisReports));
144
+ import { createAgentReportCommand } from "./commands/agent-report.js";
145
+ program.addCommand(createAgentReportCommand().helpGroup(CommandGroup.AnalysisReports));
146
+ import { createWeeklyDigestCommand } from "./commands/weekly-digest.js";
147
+ program.addCommand(createWeeklyDigestCommand().helpGroup(CommandGroup.AnalysisReports));
148
+ // ── Grader Reliability ────────────────────────────────────────────────
143
149
  import { createGraderCommand } from "./commands/grader/index.js";
144
- program.addCommand(createGraderCommand());
145
- // Fetch docs pull documentation from Sanity CMS
150
+ program.addCommand(createGraderCommand().helpGroup(CommandGroup.GraderReliability));
151
+ // ── Setup & Configuration ─────────────────────────────────────────────
152
+ import { createInitCommand } from "./commands/init.js";
153
+ program.addCommand(createInitCommand().helpGroup(CommandGroup.SetupConfig));
154
+ import { createValidateCommand } from "./commands/validate.js";
155
+ program.addCommand(createValidateCommand().helpGroup(CommandGroup.SetupConfig));
156
+ import { createValidateTasksCommand } from "./commands/validate-tasks.js";
157
+ program.addCommand(createValidateTasksCommand().helpGroup(CommandGroup.SetupConfig));
146
158
  import { createFetchDocsCommand } from "./commands/fetch-docs.js";
147
- program.addCommand(createFetchDocsCommand());
148
- // Generate configs — generate promptfoo config files
159
+ program.addCommand(createFetchDocsCommand().helpGroup(CommandGroup.SetupConfig));
149
160
  import { createGenerateConfigsCommand } from "./commands/generate-configs.js";
150
- program.addCommand(createGenerateConfigsCommand());
151
- // Calculate scores compute AI Literacy Scores from eval results
152
- import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
153
- program.addCommand(createCalculateScoresCommand());
154
- // Eval — direct promptfoo eval passthrough
161
+ program.addCommand(createGenerateConfigsCommand().helpGroup(CommandGroup.SetupConfig));
162
+ import { createCacheCommand } from "./commands/cache.js";
163
+ program.addCommand(createCacheCommand().helpGroup(CommandGroup.SetupConfig));
164
+ // ── Pipeline Internals ────────────────────────────────────────────────
155
165
  import { createEvalCommand } from "./commands/eval.js";
156
- program.addCommand(createEvalCommand());
157
- // PR comment generate markdown PR comment
166
+ program.addCommand(createEvalCommand().helpGroup(CommandGroup.PipelineInternals));
167
+ import { createCalculateScoresCommand } from "./commands/calculate-scores.js";
168
+ program.addCommand(createCalculateScoresCommand().helpGroup(CommandGroup.PipelineInternals));
158
169
  import { createPrCommentCommand } from "./commands/pr-comment.js";
159
- program.addCommand(createPrCommentCommand());
160
- // Publish — standalone report publishing to Sanity Content Lake
161
- import { createPublishCommand } from "./commands/publish.js";
162
- program.addCommand(createPublishCommand());
163
- // Agent report — agent behavior observation report
164
- import { createAgentReportCommand } from "./commands/agent-report.js";
165
- program.addCommand(createAgentReportCommand());
166
- // Cache — local pipeline cache management
167
- import { createCacheCommand } from "./commands/cache.js";
168
- program.addCommand(createCacheCommand());
169
- // Webhook server — local development server
170
- import { createWebhookServerCommand } from "./commands/webhook-server.js";
171
- program.addCommand(createWebhookServerCommand());
172
- // Lookup doc — search Sanity for documentation articles
173
- import { createLookupDocCommand } from "./commands/lookup-doc.js";
174
- program.addCommand(createLookupDocCommand());
175
- // Measure retrieval — retrieval quality measurement
170
+ program.addCommand(createPrCommentCommand().helpGroup(CommandGroup.PipelineInternals));
176
171
  import { createMeasureRetrievalCommand } from "./commands/measure-retrieval.js";
177
- program.addCommand(createMeasureRetrievalCommand());
178
- // Init initialize a directory for AILF
179
- import { createInitCommand } from "./commands/init.js";
180
- program.addCommand(createInitCommand());
181
- // Validate tasks — standalone repo task validation
182
- import { createValidateTasksCommand } from "./commands/validate-tasks.js";
183
- program.addCommand(createValidateTasksCommand());
184
- // Interactive — guided wizard
172
+ program.addCommand(createMeasureRetrievalCommand().helpGroup(CommandGroup.PipelineInternals));
173
+ import { createLookupDocCommand } from "./commands/lookup-doc.js";
174
+ program.addCommand(createLookupDocCommand().helpGroup(CommandGroup.PipelineInternals));
175
+ import { createWebhookServerCommand } from "./commands/webhook-server.js";
176
+ program.addCommand(createWebhookServerCommand().helpGroup(CommandGroup.PipelineInternals));
177
+ // ── Developer Tools ───────────────────────────────────────────────────
185
178
  import { createInteractiveCommand } from "./commands/interactive.js";
186
- program.addCommand(createInteractiveCommand());
179
+ program.addCommand(createInteractiveCommand().helpGroup(CommandGroup.DeveloperTools));
187
180
  // Shell completion — must be registered last (needs full program tree)
188
181
  import { createCompletionCommand } from "./commands/completion.js";
189
- program.addCommand(createCompletionCommand(program));
182
+ program.addCommand(createCompletionCommand(program).helpGroup(CommandGroup.DeveloperTools));
190
183
  // ---------------------------------------------------------------------------
191
- // Parse and run — default to interactive mode when no arguments given
184
+ // Parse and run — default to showing help when no arguments given
192
185
  // ---------------------------------------------------------------------------
193
- // If no command is specified (just `ailf`), launch interactive mode
186
+ // If no command is specified (just `ailf`), show help.
187
+ // The interactive wizard is still available via `ailf interactive`.
194
188
  if (process.argv.length <= 2) {
195
- await program.parseAsync([...process.argv, "interactive"]);
189
+ program.outputHelp();
196
190
  }
197
191
  else {
198
192
  await program.parseAsync();
@@ -688,6 +688,7 @@ async function buildPipelineExplainPlan(actionCommand, rootDir) {
688
688
  skipEval: raw.skipEval ?? false,
689
689
  skipFetch: raw.skipFetch ?? false,
690
690
  source: raw.source,
691
+ tag: raw.tag ?? [],
691
692
  task: raw.task,
692
693
  threshold: raw.threshold,
693
694
  url: raw.url ?? [],
@@ -54,6 +54,7 @@ export interface ResolvedOptions {
54
54
  remote: boolean;
55
55
  repoTasksPath?: string;
56
56
  taskOption?: string;
57
+ tagOption?: string[];
57
58
  taskSourceType?: "content-lake" | "repo" | "yaml";
58
59
  urlArgs: string[];
59
60
  apiUrl: string;
@@ -126,6 +126,13 @@ export function computeResolvedOptions(opts) {
126
126
  // Scoping
127
127
  const areaOption = opts.area ?? process.env.EVAL_FILTER_AREAS ?? undefined;
128
128
  const taskOption = opts.task ?? process.env.EVAL_FILTER_TASKS ?? undefined;
129
+ const tagOption = opts.tag?.length
130
+ ? opts.tag
131
+ : process.env.EVAL_FILTER_TAGS
132
+ ? process.env.EVAL_FILTER_TAGS.split(",")
133
+ .map((s) => s.trim())
134
+ .filter(Boolean)
135
+ : undefined;
129
136
  const changedDocsOption = opts.changedDocs ?? process.env.EVAL_CHANGED_DOCS ?? undefined;
130
137
  // Document-driven scoping (pure — computes impactSummary without env writes)
131
138
  let impactSummary;
@@ -237,6 +244,7 @@ export function computeResolvedOptions(opts) {
237
244
  ? resolve(callerCwd, opts.repoTasksPath)
238
245
  : undefined,
239
246
  taskOption,
247
+ tagOption,
240
248
  taskSourceType: resolveTaskSourceType(opts.taskSource),
241
249
  urlArgs,
242
250
  };
@@ -5,7 +5,7 @@
5
5
  * options object, bridges to process.env for downstream modules, and
6
6
  * delegates to runPipeline().
7
7
  *
8
- * @see docs/API.md for the full flag reference.
8
+ * @see docs/CLI.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
11
  /**
@@ -56,6 +56,7 @@ export interface PipelineCliOptions {
56
56
  remote: boolean;
57
57
  repoTasksPath?: string;
58
58
  task?: string;
59
+ tag: string[];
59
60
  taskSource?: string;
60
61
  threshold?: number;
61
62
  url: string[];
@@ -5,7 +5,7 @@
5
5
  * options object, bridges to process.env for downstream modules, and
6
6
  * delegates to runPipeline().
7
7
  *
8
- * @see docs/API.md for the full flag reference.
8
+ * @see docs/CLI.md for the full flag reference.
9
9
  */
10
10
  import { Command } from "commander";
11
11
  import { addAgenticOptions, addDebugOptions, addSanitySourceOptions, } from "./shared/options.js";
@@ -22,6 +22,13 @@ export function createPipelineCommand() {
22
22
  .option("--no-auto-scope", "Disable release-aware auto-scoping (evaluate all tasks even when a perspective is set)")
23
23
  .option("-a, --area <areas>", "Scope to feature areas (comma-separated)")
24
24
  .option("-t, --task <id>", "Scope to specific task ID")
25
+ .option("--tag <tags>", "Scope to tasks with matching tags (comma-separated, repeatable)", (val, prev) => [
26
+ ...prev,
27
+ ...val
28
+ .split(",")
29
+ .map((s) => s.trim())
30
+ .filter(Boolean),
31
+ ], [])
25
32
  .option("--changed-docs <slugs>", "Auto-scope to tasks affected by these document slugs")
26
33
  .option("-j, --concurrency <n>", "Max parallel API calls during evaluation", parseInt)
27
34
  .option("--grader-replications <n>", "Grader consistency replications", parseInt)
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Custom help configuration for the AILF CLI.
3
+ *
4
+ * Uses Commander v14's native help customization APIs:
5
+ * - `helpGroup()` on Command for grouped subcommand listings
6
+ * - `configureHelp()` with a custom Help subclass for styling
7
+ * - `addHelpText('after', ...)` for contextual examples
8
+ *
9
+ * This avoids ad-hoc console.log formatting and keeps help output
10
+ * consistent, testable, and automatically aligned with the command tree.
11
+ */
12
+ import { type Command } from "commander";
13
+ /**
14
+ * Semantic group headings displayed in `ailf --help`.
15
+ *
16
+ * Groups appear in the order their first member is registered with the
17
+ * program. Keep the registration order in cli.ts intentional.
18
+ */
19
+ export declare const CommandGroup: {
20
+ readonly CoreWorkflow: "Core Workflow:";
21
+ readonly AnalysisReports: "Analysis & Reports:";
22
+ readonly GraderReliability: "Grader Reliability:";
23
+ readonly SetupConfig: "Setup & Configuration:";
24
+ readonly PipelineInternals: "Pipeline Internals:";
25
+ readonly DeveloperTools: "Developer Tools:";
26
+ };
27
+ export type CommandGroupHeading = (typeof CommandGroup)[keyof typeof CommandGroup];
28
+ /**
29
+ * Apply help configuration to the root program.
30
+ *
31
+ * Call this once in cli.ts after creating the program but before
32
+ * registering commands. It:
33
+ * 1. Sets the custom Help formatter via `configureHelp()`
34
+ * 2. Appends Quick Start examples via `addHelpText('after', ...)`
35
+ * 3. Customizes the built-in help command description
36
+ */
37
+ export declare function configureProgram(program: Command): void;
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Custom help configuration for the AILF CLI.
3
+ *
4
+ * Uses Commander v14's native help customization APIs:
5
+ * - `helpGroup()` on Command for grouped subcommand listings
6
+ * - `configureHelp()` with a custom Help subclass for styling
7
+ * - `addHelpText('after', ...)` for contextual examples
8
+ *
9
+ * This avoids ad-hoc console.log formatting and keeps help output
10
+ * consistent, testable, and automatically aligned with the command tree.
11
+ */
12
+ import { Help } from "commander";
13
+ // ---------------------------------------------------------------------------
14
+ // Command group headings
15
+ // ---------------------------------------------------------------------------
16
+ /**
17
+ * Semantic group headings displayed in `ailf --help`.
18
+ *
19
+ * Groups appear in the order their first member is registered with the
20
+ * program. Keep the registration order in cli.ts intentional.
21
+ */
22
+ export const CommandGroup = {
23
+ CoreWorkflow: "Core Workflow:",
24
+ AnalysisReports: "Analysis & Reports:",
25
+ GraderReliability: "Grader Reliability:",
26
+ SetupConfig: "Setup & Configuration:",
27
+ PipelineInternals: "Pipeline Internals:",
28
+ DeveloperTools: "Developer Tools:",
29
+ };
30
+ // ---------------------------------------------------------------------------
31
+ // Custom Help formatter
32
+ // ---------------------------------------------------------------------------
33
+ /**
34
+ * Extended Help class that adds subtle ANSI styling to section headings.
35
+ *
36
+ * Commander v14 calls these style methods during `formatHelp()`.
37
+ * Terminal emulators that don't support ANSI codes degrade gracefully
38
+ * (the escape sequences are invisible in raw-mode pipes like `| cat`).
39
+ */
40
+ class AilfHelp extends Help {
41
+ /** Bold section titles (Options:, Commands:, group headings). */
42
+ styleTitle(str) {
43
+ if (!hasColorSupport())
44
+ return str;
45
+ return `\x1b[1m${str}\x1b[0m`;
46
+ }
47
+ /** Dim the description text slightly for visual hierarchy. */
48
+ styleDescriptionText(str) {
49
+ if (!hasColorSupport())
50
+ return str;
51
+ return `\x1b[2m${str}\x1b[0m`;
52
+ }
53
+ }
54
+ // ---------------------------------------------------------------------------
55
+ // Color support detection
56
+ // ---------------------------------------------------------------------------
57
+ /** Conservative check — disable color when piped or when NO_COLOR is set. */
58
+ function hasColorSupport() {
59
+ if (process.env.NO_COLOR !== undefined)
60
+ return false;
61
+ if (process.env.FORCE_COLOR !== undefined)
62
+ return true;
63
+ return Boolean(process.stdout.isTTY);
64
+ }
65
+ // ---------------------------------------------------------------------------
66
+ // After-help text (Quick Start + links)
67
+ // ---------------------------------------------------------------------------
68
+ const afterHelpText = `
69
+ Quick Start:
70
+ $ ailf pipeline --debug Run a quick evaluation (first 2 tests)
71
+ $ ailf pipeline --area groq Evaluate a specific feature area
72
+ $ ailf pipeline --explain Preview the execution plan
73
+ $ ailf init Set up AILF in a new project
74
+
75
+ Documentation:
76
+ Repository https://github.com/sanity-io/ai-literacy-framework
77
+ CLI Guide https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/CLI.md
78
+ Getting Started https://github.com/sanity-io/ai-literacy-framework/blob/main/docs/GETTING_STARTED.md
79
+
80
+ Run ailf <command> --help for detailed usage of any command.`;
81
+ // ---------------------------------------------------------------------------
82
+ // Program configuration
83
+ // ---------------------------------------------------------------------------
84
+ /**
85
+ * Apply help configuration to the root program.
86
+ *
87
+ * Call this once in cli.ts after creating the program but before
88
+ * registering commands. It:
89
+ * 1. Sets the custom Help formatter via `configureHelp()`
90
+ * 2. Appends Quick Start examples via `addHelpText('after', ...)`
91
+ * 3. Customizes the built-in help command description
92
+ */
93
+ export function configureProgram(program) {
94
+ program
95
+ .configureHelp(new AilfHelp())
96
+ .addHelpText("after", afterHelpText)
97
+ .addHelpCommand("help [command]", "Show help for a command");
98
+ }
@@ -30,6 +30,7 @@ export function mapToResolvedConfig(opts, rootDir) {
30
30
  ?.split(",")
31
31
  .map((s) => s.trim())
32
32
  .filter(Boolean),
33
+ tags: opts.tagOption,
33
34
  changedDocs: opts.changedDocsOption
34
35
  ?.split(",")
35
36
  .map((s) => s.trim())
@@ -121,12 +121,13 @@ export class FetchDocsStep {
121
121
  // Helpers
122
122
  // ---------------------------------------------------------------------------
123
123
  function buildFilter(ctx) {
124
- const { areas, tasks } = ctx.config;
125
- if (!areas && !tasks)
124
+ const { areas, tasks, tags } = ctx.config;
125
+ if (!areas && !tasks && !tags)
126
126
  return undefined;
127
127
  return {
128
128
  ...(areas ? { areas } : {}),
129
129
  ...(tasks ? { taskIds: tasks } : {}),
130
+ ...(tags ? { tags } : {}),
130
131
  };
131
132
  }
132
133
  /**
@@ -28,8 +28,12 @@ export class GenerateConfigsStep {
28
28
  // repo-based, and YAML tasks depending on which adapter is wired.
29
29
  let tasks;
30
30
  try {
31
- const filter = ctx.config.areas || ctx.config.tasks
32
- ? { areas: ctx.config.areas, taskIds: ctx.config.tasks }
31
+ const filter = ctx.config.areas || ctx.config.tasks || ctx.config.tags
32
+ ? {
33
+ areas: ctx.config.areas,
34
+ taskIds: ctx.config.tasks,
35
+ tags: ctx.config.tags,
36
+ }
33
37
  : undefined;
34
38
  tasks = await ctx.taskSource.loadTasks(filter);
35
39
  }
@@ -54,10 +58,11 @@ export class GenerateConfigsStep {
54
58
  try {
55
59
  generateConfigs({
56
60
  allowedOrigins: ctx.config.allowedOrigins,
57
- filter: ctx.config.areas || ctx.config.tasks
61
+ filter: ctx.config.areas || ctx.config.tasks || ctx.config.tags
58
62
  ? {
59
63
  areas: ctx.config.areas,
60
64
  taskIds: ctx.config.tasks,
65
+ tags: ctx.config.tags,
61
66
  }
62
67
  : undefined,
63
68
  resolvedSource,
@@ -40,10 +40,11 @@ export class RunEvalStep {
40
40
  // Precondition: canonical context files exist for filtered tasks.
41
41
  // Must apply the same area/task filter as fetch-docs so we only
42
42
  // check contexts that were actually fetched.
43
- const filter = ctx.config.areas || ctx.config.tasks
43
+ const filter = ctx.config.areas || ctx.config.tasks || ctx.config.tags
44
44
  ? {
45
45
  ...(ctx.config.areas ? { areas: ctx.config.areas } : {}),
46
46
  ...(ctx.config.tasks ? { taskIds: ctx.config.tasks } : {}),
47
+ ...(ctx.config.tags ? { tags: ctx.config.tags } : {}),
47
48
  }
48
49
  : undefined;
49
50
  let tasks = await ctx.taskSource.loadTasks(filter);
@@ -76,10 +77,11 @@ export class RunEvalStep {
76
77
  if (!debug?.enabled) {
77
78
  try {
78
79
  evalFingerprint = computeEvalFingerprint({
79
- filter: ctx.config.areas || ctx.config.tasks
80
+ filter: ctx.config.areas || ctx.config.tasks || ctx.config.tags
80
81
  ? {
81
82
  areas: ctx.config.areas,
82
83
  taskIds: ctx.config.tasks,
84
+ tags: ctx.config.tags,
83
85
  }
84
86
  : undefined,
85
87
  graderModel: "default",
@@ -145,6 +145,7 @@ export interface PlanOptions {
145
145
  skipEval: boolean;
146
146
  skipFetch: boolean;
147
147
  source?: string;
148
+ tagOption?: string[];
148
149
  taskOption?: string;
149
150
  }
150
151
  /**
@@ -117,7 +117,7 @@ export async function buildPipelinePlan(opts, rootDir) {
117
117
  .filter((i) => i.severity === "error")
118
118
  .map((i) => `[${i.source}] ${i.message}`);
119
119
  // 2. Expand tasks with filters
120
- const filter = opts.areaOption || opts.taskOption
120
+ const filter = opts.areaOption || opts.taskOption || opts.tagOption?.length
121
121
  ? {
122
122
  areas: opts.areaOption
123
123
  ? opts.areaOption.split(",").map((a) => a.trim())
@@ -125,6 +125,7 @@ export async function buildPipelinePlan(opts, rootDir) {
125
125
  taskIds: opts.taskOption
126
126
  ? opts.taskOption.split(",").map((t) => t.trim())
127
127
  : undefined,
128
+ tags: opts.tagOption,
128
129
  }
129
130
  : undefined;
130
131
  let totalTests = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@sanity/ailf",
3
- "version": "0.1.28",
3
+ "version": "0.1.30",
4
4
  "private": false,
5
5
  "publishConfig": {
6
6
  "access": "restricted"