@whitehatd/crag 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,417 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Gate inference per language/runtime.
5
+ *
6
+ * Reads the `_manifests` attached by stacks.js and produces concrete shell
7
+ * commands that should run as governance gates. Each inferLanguageGates
8
+ * function appends to result.linters / result.testers / result.builders
9
+ * (the existing structure), which the generator in analyze.js turns into
10
+ * the ### Lint / ### Test / ### Build sections of governance.md.
11
+ *
12
+ * Golden rule: prefer conservative, canonical commands. The user can always
13
+ * add exotic flags; we must not guess them.
14
+ */
15
+
16
+ const fs = require('fs');
17
+ const path = require('path');
18
+ const { safeRead, safeJson, parseSimpleToml } = require('./stacks');
19
+
20
+ const push = (arr, cmd) => {
21
+ if (!arr.includes(cmd)) arr.push(cmd);
22
+ };
23
+
24
+ function inferGates(dir, result) {
25
+ inferNodeGates(dir, result);
26
+ inferDenoGates(dir, result);
27
+ inferBunGates(dir, result);
28
+ inferPythonGates(dir, result);
29
+ inferRustGates(dir, result);
30
+ inferGoGates(dir, result);
31
+ inferJavaGates(dir, result);
32
+ inferKotlinGates(dir, result);
33
+ inferDotNetGates(dir, result);
34
+ inferSwiftGates(dir, result);
35
+ inferElixirGates(dir, result);
36
+ inferRubyGates(dir, result);
37
+ inferPhpGates(dir, result);
38
+ inferInfrastructureGates(dir, result);
39
+ }
40
+
41
+ // --- Node ------------------------------------------------------------------
42
+
43
+ function inferNodeGates(dir, result) {
44
+ const pkg = result._manifests.packageJson;
45
+ if (!pkg) return;
46
+
47
+ const scripts = pkg.scripts || {};
48
+ const deps = { ...pkg.dependencies, ...pkg.devDependencies };
49
+
50
+ // Prefer explicit scripts (most reliable signal)
51
+ if (scripts.test) push(result.testers, 'npm run test');
52
+ if (scripts.lint) push(result.linters, 'npm run lint');
53
+ if (scripts.build) push(result.builders, 'npm run build');
54
+ if (scripts.typecheck || scripts['type-check']) {
55
+ push(result.builders, scripts.typecheck ? 'npm run typecheck' : 'npm run type-check');
56
+ }
57
+ if (scripts['format:check']) push(result.linters, 'npm run format:check');
58
+
59
+ // Fall back to tool detection for repos that don't use canonical script names
60
+ if (!scripts.lint) {
61
+ // Modern flat config (eslint.config.*) or legacy (.eslintrc*)
62
+ const hasEslintConfig = ['eslint.config.js', 'eslint.config.mjs', 'eslint.config.cjs',
63
+ 'eslint.config.ts', '.eslintrc', '.eslintrc.js',
64
+ '.eslintrc.json', '.eslintrc.cjs', '.eslintrc.yaml']
65
+ .some(f => fs.existsSync(path.join(dir, f)));
66
+ if (hasEslintConfig || deps.eslint) {
67
+ push(result.linters, 'npx eslint . --max-warnings 0');
68
+ }
69
+
70
+ // XO — used by sindresorhus-family projects (chalk, etc.)
71
+ if (deps.xo || (pkg.xo !== undefined)) {
72
+ push(result.linters, 'npx xo');
73
+ }
74
+
75
+ // Biome replaces eslint + prettier in modern projects
76
+ if (fs.existsSync(path.join(dir, 'biome.json')) ||
77
+ fs.existsSync(path.join(dir, 'biome.jsonc')) ||
78
+ deps['@biomejs/biome']) {
79
+ push(result.linters, 'npx biome check .');
80
+ }
81
+ }
82
+
83
+ // TypeScript type-check as a gate
84
+ if ((deps.typescript || fs.existsSync(path.join(dir, 'tsconfig.json'))) &&
85
+ !scripts.typecheck && !scripts['type-check']) {
86
+ push(result.linters, 'npx tsc --noEmit');
87
+ }
88
+
89
+ // Syntax check for CLI projects
90
+ if (pkg.bin && !result.stack.includes('next.js') && !result.stack.includes('react')) {
91
+ const binFiles = typeof pkg.bin === 'string' ? [pkg.bin] : Object.values(pkg.bin);
92
+ for (const bin of binFiles) {
93
+ push(result.builders, `node --check ${bin}`);
94
+ }
95
+ }
96
+ }
97
+
98
+ // --- Deno ------------------------------------------------------------------
99
+
100
+ function inferDenoGates(dir, result) {
101
+ if (!result.stack.includes('deno')) return;
102
+ push(result.testers, 'deno test -A');
103
+ push(result.linters, 'deno lint');
104
+ push(result.linters, 'deno fmt --check');
105
+ }
106
+
107
+ // --- Bun -------------------------------------------------------------------
108
+
109
+ function inferBunGates(dir, result) {
110
+ if (!result.stack.includes('bun')) return;
111
+ // Only add bun test if there's no Node scripts already covering it
112
+ const pkg = result._manifests.packageJson;
113
+ if (!pkg || !pkg.scripts || !pkg.scripts.test) {
114
+ push(result.testers, 'bun test');
115
+ }
116
+ }
117
+
118
+ // --- Python ----------------------------------------------------------------
119
+
120
+ function inferPythonGates(dir, result) {
121
+ if (!result.stack.includes('python')) return;
122
+
123
+ const pyproject = result._manifests.pyproject;
124
+ const runner = result._manifests.pythonRunner;
125
+ const hasTox = result._manifests.hasTox;
126
+ const hasNox = result._manifests.hasNox;
127
+
128
+ // Build the runner prefix (e.g. "uv run", "poetry run", "pdm run", "hatch run", "")
129
+ const prefix = runnerPrefix(runner);
130
+
131
+ // Test selection (priority: tox > nox > pytest directly)
132
+ if (hasTox) {
133
+ push(result.testers, `${prefix}tox run`.trim());
134
+ } else if (hasNox) {
135
+ push(result.testers, `${prefix}nox`.trim());
136
+ } else if (pyproject && (pyproject.sections.has('tool.pytest.ini_options') ||
137
+ pyprojectHasDep(pyproject, 'pytest'))) {
138
+ push(result.testers, `${prefix}pytest`.trim());
139
+ } else if (fs.existsSync(path.join(dir, 'pytest.ini')) ||
140
+ fs.existsSync(path.join(dir, 'tests')) ||
141
+ fs.existsSync(path.join(dir, 'test'))) {
142
+ // Bare pytest fallback if there's a tests/ directory
143
+ push(result.testers, `${prefix}pytest`.trim());
144
+ }
145
+
146
+ // Lint: ruff > flake8 > pylint
147
+ if ((pyproject && pyproject.sections.has('tool.ruff')) ||
148
+ fs.existsSync(path.join(dir, 'ruff.toml')) ||
149
+ fs.existsSync(path.join(dir, '.ruff.toml')) ||
150
+ pyprojectHasDep(pyproject, 'ruff')) {
151
+ push(result.linters, `${prefix}ruff check .`.trim());
152
+ push(result.linters, `${prefix}ruff format --check .`.trim());
153
+ } else if (pyprojectHasDep(pyproject, 'flake8') || fs.existsSync(path.join(dir, '.flake8'))) {
154
+ push(result.linters, `${prefix}flake8`.trim());
155
+ }
156
+
157
+ // Type-check: mypy
158
+ if ((pyproject && pyproject.sections.has('tool.mypy')) ||
159
+ fs.existsSync(path.join(dir, 'mypy.ini')) ||
160
+ fs.existsSync(path.join(dir, '.mypy.ini')) ||
161
+ pyprojectHasDep(pyproject, 'mypy')) {
162
+ push(result.linters, `${prefix}mypy .`.trim());
163
+ }
164
+
165
+ // Format-only: black (if not covered by ruff format)
166
+ if (pyprojectHasDep(pyproject, 'black') &&
167
+ !result.linters.some(l => l.includes('ruff format'))) {
168
+ push(result.linters, `${prefix}black --check .`.trim());
169
+ }
170
+
171
+ // Build
172
+ if (pyproject && pyproject.sections.has('build-system')) {
173
+ push(result.builders, 'python -m build');
174
+ }
175
+ }
176
+
177
+ function runnerPrefix(runner) {
178
+ switch (runner) {
179
+ case 'uv': return 'uv run ';
180
+ case 'poetry': return 'poetry run ';
181
+ case 'pdm': return 'pdm run ';
182
+ case 'hatch': return 'hatch run ';
183
+ case 'rye': return 'rye run ';
184
+ case 'pipenv': return 'pipenv run ';
185
+ default: return '';
186
+ }
187
+ }
188
+
189
+ function pyprojectHasDep(pyproject, name) {
190
+ if (!pyproject) return false;
191
+ // This is a very loose check — a full TOML parser would inspect
192
+ // project.dependencies / project.optional-dependencies / tool.poetry.dev-dependencies
193
+ // arrays, but we don't have one. We approximate by checking raw content for
194
+ // the dep name appearing in a dependency-like context.
195
+ // For now: check if any section references the tool (e.g. [tool.black] exists).
196
+ const toolSection = `tool.${name}`;
197
+ if (pyproject.sections.has(toolSection)) return true;
198
+ // Also check values for common patterns like "pytest>=7.0"
199
+ for (const [, v] of pyproject.values) {
200
+ if (v.includes(name)) return true;
201
+ }
202
+ return false;
203
+ }
204
+
205
+ // --- Rust ------------------------------------------------------------------
206
+
207
+ function inferRustGates(dir, result) {
208
+ if (!result.stack.includes('rust')) return;
209
+ push(result.testers, 'cargo test');
210
+ push(result.linters, 'cargo clippy -- -D warnings');
211
+ push(result.linters, 'cargo fmt --check');
212
+ }
213
+
214
+ // --- Go --------------------------------------------------------------------
215
+
216
+ function inferGoGates(dir, result) {
217
+ if (!result.stack.includes('go')) return;
218
+ push(result.testers, 'go test ./...');
219
+ push(result.linters, 'go vet ./...');
220
+ // golangci-lint if configured
221
+ if (['.golangci.yml', '.golangci.yaml', '.golangci.toml']
222
+ .some(f => fs.existsSync(path.join(dir, f)))) {
223
+ push(result.linters, 'golangci-lint run');
224
+ }
225
+ }
226
+
227
+ // --- Java ------------------------------------------------------------------
228
+
229
+ function inferJavaGates(dir, result) {
230
+ const buildSystem = result._manifests.javaBuildSystem;
231
+ if (!buildSystem) return;
232
+ // Kotlin projects get their own gates (inferKotlinGates replaces these)
233
+ if (result.stack.includes('kotlin')) return;
234
+
235
+ if (buildSystem === 'maven') {
236
+ const cmd = result._manifests.javaWrapper ? './mvnw' : 'mvn';
237
+ push(result.testers, `${cmd} test`);
238
+ push(result.builders, `${cmd} verify`);
239
+ // Checkstyle / Spotbugs if configured
240
+ if (fs.existsSync(path.join(dir, 'checkstyle.xml'))) {
241
+ push(result.linters, `${cmd} checkstyle:check`);
242
+ }
243
+ } else if (buildSystem === 'gradle') {
244
+ const cmd = result._manifests.gradleWrapper ? './gradlew' : 'gradle';
245
+ push(result.testers, `${cmd} test`);
246
+ push(result.builders, `${cmd} build`);
247
+ if (fs.existsSync(path.join(dir, 'config', 'checkstyle'))) {
248
+ push(result.linters, `${cmd} checkstyleMain`);
249
+ }
250
+ }
251
+ }
252
+
253
+ // --- Kotlin ----------------------------------------------------------------
254
+
255
+ function inferKotlinGates(dir, result) {
256
+ if (!result.stack.includes('kotlin')) return;
257
+ const cmd = result._manifests.gradleWrapper ? './gradlew' : 'gradle';
258
+ push(result.testers, `${cmd} test`);
259
+ push(result.builders, `${cmd} build`);
260
+ // ktlint
261
+ if (fs.existsSync(path.join(dir, '.editorconfig'))) {
262
+ // ktlint uses .editorconfig; advisory only
263
+ }
264
+ // detekt is a common kotlin static analyzer
265
+ if (fs.existsSync(path.join(dir, 'detekt.yml')) ||
266
+ fs.existsSync(path.join(dir, 'detekt-config.yml'))) {
267
+ push(result.linters, `${cmd} detekt`);
268
+ }
269
+ }
270
+
271
+ // --- .NET ------------------------------------------------------------------
272
+
273
+ function inferDotNetGates(dir, result) {
274
+ if (!result.stack.includes('dotnet')) return;
275
+ push(result.builders, 'dotnet build --no-restore');
276
+ push(result.testers, 'dotnet test --no-build --verbosity normal');
277
+ push(result.linters, 'dotnet format --verify-no-changes');
278
+ }
279
+
280
+ // --- Swift -----------------------------------------------------------------
281
+
282
+ function inferSwiftGates(dir, result) {
283
+ if (!result.stack.includes('swift')) return;
284
+ push(result.builders, 'swift build');
285
+ push(result.testers, 'swift test');
286
+ if (fs.existsSync(path.join(dir, '.swiftlint.yml'))) {
287
+ push(result.linters, 'swiftlint lint --strict');
288
+ }
289
+ }
290
+
291
+ // --- Elixir ----------------------------------------------------------------
292
+
293
+ function inferElixirGates(dir, result) {
294
+ if (!result.stack.includes('elixir')) return;
295
+ push(result.testers, 'mix test');
296
+ push(result.linters, 'mix format --check-formatted');
297
+ // Credo is the ubiquitous Elixir linter
298
+ const mixExs = safeRead(path.join(dir, 'mix.exs'));
299
+ if (mixExs.includes('credo')) {
300
+ push(result.linters, 'mix credo --strict');
301
+ }
302
+ if (mixExs.includes('dialyxir') || mixExs.includes('dialyzer')) {
303
+ push(result.linters, 'mix dialyzer');
304
+ }
305
+ }
306
+
307
+ // --- Ruby ------------------------------------------------------------------
308
+
309
+ function inferRubyGates(dir, result) {
310
+ const ruby = result._manifests.ruby;
311
+ if (!ruby) return;
312
+
313
+ // Test runner
314
+ if (ruby.hasRspec) {
315
+ push(result.testers, 'bundle exec rspec');
316
+ } else if (ruby.rakefile) {
317
+ // `rake test` is the idiomatic target
318
+ push(result.testers, 'bundle exec rake test');
319
+ } else if (ruby.hasMinitest) {
320
+ push(result.testers, 'bundle exec rake test');
321
+ }
322
+
323
+ // Linters
324
+ if (ruby.hasRubocop) {
325
+ push(result.linters, 'bundle exec rubocop');
326
+ }
327
+ if (ruby.hasStandardRb) {
328
+ push(result.linters, 'bundle exec standardrb');
329
+ }
330
+ if (ruby.hasReek) {
331
+ push(result.linters, 'bundle exec reek');
332
+ }
333
+ if (ruby.hasBrakeman) {
334
+ push(result.linters, 'bundle exec brakeman -q --no-pager');
335
+ }
336
+
337
+ // Bundle audit is a sensible security default if Gemfile exists
338
+ if (ruby.gemfile) {
339
+ push(result.linters, 'bundle exec bundle-audit check --update');
340
+ }
341
+ }
342
+
343
+ // --- PHP -------------------------------------------------------------------
344
+
345
+ function inferPhpGates(dir, result) {
346
+ const php = result._manifests.php;
347
+ if (!php) return;
348
+
349
+ // Prefer composer scripts if they exist (most reliable)
350
+ if (php.scripts && php.scripts.test) {
351
+ push(result.testers, 'composer test');
352
+ } else if (php.hasPest) {
353
+ push(result.testers, 'vendor/bin/pest');
354
+ } else if (php.hasPhpunit) {
355
+ push(result.testers, 'vendor/bin/phpunit');
356
+ }
357
+
358
+ if (php.hasPhpcs) {
359
+ push(result.linters, 'vendor/bin/phpcs');
360
+ }
361
+ if (php.hasPhpStan) {
362
+ push(result.linters, 'vendor/bin/phpstan analyse');
363
+ }
364
+ if (php.hasPsalm) {
365
+ push(result.linters, 'vendor/bin/psalm');
366
+ }
367
+ if (php.hasPhpCsFixer) {
368
+ push(result.linters, 'vendor/bin/php-cs-fixer fix --dry-run --diff');
369
+ }
370
+ if (php.hasRector) {
371
+ push(result.linters, 'vendor/bin/rector process --dry-run');
372
+ }
373
+
374
+ // composer validate is a near-universal sanity gate
375
+ push(result.linters, 'composer validate --strict');
376
+ }
377
+
378
+ // --- Infrastructure --------------------------------------------------------
379
+
380
+ function inferInfrastructureGates(dir, result) {
381
+ const infra = result._manifests.infra;
382
+ if (!infra) return;
383
+
384
+ if (infra.terraform) {
385
+ push(result.linters, 'terraform fmt -check -recursive');
386
+ push(result.linters, 'terraform validate');
387
+ if (fs.existsSync(path.join(dir, '.tflint.hcl'))) {
388
+ push(result.linters, 'tflint');
389
+ }
390
+ }
391
+
392
+ if (infra.helm) {
393
+ push(result.linters, 'helm lint');
394
+ }
395
+
396
+ if (infra.openapi) {
397
+ push(result.linters, `npx @stoplight/spectral-cli lint ${infra.openapi}`);
398
+ }
399
+
400
+ if (infra.proto) {
401
+ push(result.linters, 'buf lint');
402
+ }
403
+
404
+ // Dockerfile → hadolint (advisory — too noisy by default to be mandatory)
405
+ if (result.stack.includes('docker')) {
406
+ result._advisories = result._advisories || [];
407
+ result._advisories.push('hadolint Dockerfile');
408
+ }
409
+
410
+ // GitHub Actions workflows → actionlint (advisory)
411
+ if (fs.existsSync(path.join(dir, '.github', 'workflows'))) {
412
+ result._advisories = result._advisories || [];
413
+ result._advisories.push('actionlint');
414
+ }
415
+ }
416
+
417
+ module.exports = { inferGates };
@@ -0,0 +1,146 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * CI step normalization.
5
+ *
6
+ * The benchmark showed crag analyze producing 40-step dumps for fastify and
7
+ * axios. Most of those steps are either:
8
+ * - Matrix expansions of the same command
9
+ * - Background processes (servers started with &)
10
+ * - Shell noise (echo, export, set-output, GITHUB_CONTEXT dumps)
11
+ * - `npm install --no-save ../artifacts/x-*.tgz` style throwaway setup
12
+ * - Repeated `npm install --ignore-scripts` across jobs
13
+ *
14
+ * This module turns a raw list of extracted CI commands into a short,
15
+ * unique list of actual gates. It runs AFTER extraction and BEFORE
16
+ * governance generation.
17
+ */
18
+
19
+ /** Canonicalize `${{ matrix.X }}` and similar expressions to a single form. */
20
+ function canonicalize(cmd) {
21
+ return cmd
22
+ .replace(/\$\{\{\s*matrix\.[A-Za-z0-9_-]+\s*\}\}/g, '<matrix>')
23
+ .replace(/\$\{\{\s*env\.[A-Za-z0-9_-]+\s*\}\}/g, '<env>')
24
+ .replace(/\$\{\{\s*[A-Za-z0-9_.-]+\s*\}\}/g, '<expr>')
25
+ .replace(/\s+/g, ' ')
26
+ .trim();
27
+ }
28
+
29
+ /** A command is "noise" if it's setup, plumbing, or side-effecting rather than a gate. */
30
+ function isNoise(cmd) {
31
+ const trimmed = cmd.trim();
32
+ if (!trimmed) return true;
33
+
34
+ // Background processes are not gates (they're servers/daemons for tests)
35
+ if (trimmed.endsWith('&')) return true;
36
+
37
+ // Echo / printf / export / set-output — shell plumbing
38
+ if (/^(echo|printf|export|set)\s/.test(trimmed)) return true;
39
+ if (trimmed.startsWith('echo "::set-output')) return true;
40
+ if (trimmed.startsWith('echo "$GITHUB_CONTEXT')) return true;
41
+ if (/^echo\s+['"]::/.test(trimmed)) return true;
42
+
43
+ // Throwaway setup
44
+ if (/^(mkdir|rm|cp|mv|touch)\s/.test(trimmed)) return true;
45
+ if (/^git\s+(config|submodule)/.test(trimmed)) return true;
46
+ if (/^cd\s/.test(trimmed) && !trimmed.includes('&&')) return true;
47
+
48
+ // npm install variants — these are setup, not gates (we capture npm test/lint directly)
49
+ if (/^npm\s+(ci|install)(\s|$)/.test(trimmed) && !trimmed.includes('&&')) return true;
50
+ if (/^yarn\s+(install)(\s|$)/.test(trimmed) && !trimmed.includes('&&')) return true;
51
+ if (/^pnpm\s+(install|i)(\s|$)/.test(trimmed)) return true;
52
+ if (/^bun\s+install/.test(trimmed)) return true;
53
+ if (/^npm\s+install\s+--no-save/.test(trimmed)) return true;
54
+ if (/^npm\s+install\s+--global/.test(trimmed)) return true;
55
+ if (/^pip\s+install/.test(trimmed)) return true;
56
+ if (/^python\s+-m\s+pip\s+(install|uninstall)/.test(trimmed)) return true;
57
+ if (/^uv\s+(sync|pip|lock)/.test(trimmed)) return true;
58
+ if (/^poetry\s+install/.test(trimmed)) return true;
59
+ if (/^composer\s+(install|update|require)/.test(trimmed)) return true;
60
+ if (/^bundle\s+install/.test(trimmed)) return true;
61
+ if (/^cargo\s+fetch/.test(trimmed)) return true;
62
+
63
+ // Action setup steps (rustup, etc.)
64
+ if (/^rustup\s/.test(trimmed)) return true;
65
+
66
+ // Release / publish steps — not gates
67
+ if (/^npm\s+publish/.test(trimmed)) return true;
68
+ if (/^cargo\s+publish/.test(trimmed)) return true;
69
+ if (/^docker\s+push/.test(trimmed)) return true;
70
+ // bundle exec rake install:* and release:* are publish targets, not gates
71
+ if (/^bundle\s+exec\s+rake\s+(install|release|build):/.test(trimmed)) return true;
72
+ if (/^rake\s+(install|release|build):/.test(trimmed)) return true;
73
+
74
+ // README / doc generation scripts
75
+ if (trimmed.includes('update-readme')) return true;
76
+ if (trimmed.includes('clean-cspell')) return true;
77
+ if (trimmed.includes('validate-ecosystem-links')) return true;
78
+
79
+ // Benchmark/micro-regression one-offs
80
+ if (/--debug-benchmark/.test(trimmed)) return true;
81
+ if (/grep\s+"['"]?Latency avg/.test(trimmed)) return true;
82
+ if (/-v\s*\|\s*grep/.test(trimmed)) return true;
83
+
84
+ // YAML ternary / expression fragment leaks. When a `run: |` block scalar
85
+ // wraps a multi-line ${{ ... && ... || ... }} expression, our line-based
86
+ // extractor pulls out the inner fragment as a pseudo-command. These look
87
+ // like `'--flag value' || '--other' }}` or similar.
88
+ if (/^['"][^'"]*['"]\s*(\|\||&&)/.test(trimmed)) return true;
89
+ if (/\}\}\s*$/.test(trimmed)) return true;
90
+
91
+ // License checker long one-liners with baked-in allow lists — not stable gates
92
+ if (/^npx\s+license-checker/.test(trimmed)) return true;
93
+
94
+ // License checkers are typically gates, but their exact invocation is
95
+ // long and project-specific. Keep them.
96
+
97
+ return false;
98
+ }
99
+
100
+ /**
101
+ * Extract the first real gate command from a compound shell line.
102
+ * "cd test/bundler/webpack && npm install && npm run test" → "npm run test"
103
+ * (we keep the last non-install command, which is usually the gate)
104
+ */
105
+ function extractMainCommand(cmd) {
106
+ if (!cmd.includes('&&')) return cmd;
107
+ const parts = cmd.split('&&').map(s => s.trim()).filter(Boolean);
108
+ // Find the last non-install, non-cd command
109
+ for (let i = parts.length - 1; i >= 0; i--) {
110
+ if (!isNoise(parts[i]) && !parts[i].startsWith('cd ')) return parts[i];
111
+ }
112
+ // All parts are noise — return the last part so the caller can reject it
113
+ // via isNoise(main) rather than letting the compound leak through.
114
+ return parts[parts.length - 1] || cmd;
115
+ }
116
+
117
+ /**
118
+ * Normalize a raw list of CI commands into deduped, filtered gates.
119
+ *
120
+ * @param {string[]} rawCommands — output of extractRunCommands()
121
+ * @param {object} [opts]
122
+ * @param {number} [opts.maxGates=8] — cap the returned list
123
+ * @returns {string[]} — canonical, deduped, gate-worthy commands
124
+ */
125
+ function normalizeCiGates(rawCommands, opts = {}) {
126
+ const { maxGates = 8 } = opts;
127
+ const seen = new Set();
128
+ const result = [];
129
+
130
+ for (const raw of rawCommands) {
131
+ if (!raw || typeof raw !== 'string') continue;
132
+ const main = extractMainCommand(raw);
133
+ if (isNoise(main)) continue;
134
+ const canonical = canonicalize(main);
135
+ if (!canonical) continue;
136
+ if (seen.has(canonical)) continue;
137
+ seen.add(canonical);
138
+ // Keep the first human-readable form (not the canonical with <matrix>)
139
+ result.push(main.replace(/\s+/g, ' ').trim());
140
+ if (result.length >= maxGates) break;
141
+ }
142
+
143
+ return result;
144
+ }
145
+
146
+ module.exports = { normalizeCiGates, canonicalize, isNoise, extractMainCommand };