@whitehatd/crag 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,7 +5,8 @@
5
5
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
6
6
  [![Node](https://img.shields.io/node/v/%40whitehatd%2Fcrag)](https://nodejs.org)
7
7
  [![Zero dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)](./package.json)
8
- [![159 tests](https://img.shields.io/badge/tests-159%20passing-brightgreen)](./test)
8
+ [![228 tests](https://img.shields.io/badge/tests-228%20passing-brightgreen)](./test)
9
+ [![Security hardened](https://img.shields.io/badge/security-hardened-brightgreen)](./SECURITY.md)
9
10
 
10
11
  **The bedrock layer for AI coding agents. One `governance.md`. Any project. Never stale.**
11
12
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@whitehatd/crag",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "The bedrock layer for AI coding agents. One governance.md. Any project. Never stale.",
5
5
  "bin": {
6
6
  "crag": "bin/crag.js"
@@ -0,0 +1,317 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Multi-CI step extraction.
5
+ *
6
+ * The GitHub Actions path already lives in src/governance/yaml-run.js
7
+ * (extractRunCommands) and we reuse it here. This module adds support for:
8
+ * - GitLab CI (.gitlab-ci.yml)
9
+ * - CircleCI (.circleci/config.yml)
10
+ * - Travis CI (.travis.yml)
11
+ * - Azure Pipelines (azure-pipelines.yml, .azure-pipelines/)
12
+ * - Buildkite (.buildkite/pipeline.yml, .buildkite/pipeline.yaml)
13
+ * - Drone (.drone.yml)
14
+ * - Woodpecker (.woodpecker.yml, .woodpecker/*.yml)
15
+ * - Bitbucket (bitbucket-pipelines.yml)
16
+ *
17
+ * Each extractor returns a list of raw shell command strings. The CI
18
+ * normalizer (normalize.js) dedups and filters them uniformly regardless
19
+ * of which CI system they came from.
20
+ */
21
+
22
+ const fs = require('fs');
23
+ const path = require('path');
24
+ const { extractRunCommands } = require('../governance/yaml-run');
25
+ const { safeRead } = require('./stacks');
26
+
27
+ /**
28
+ * Detect which CI system(s) a project uses and extract commands from each.
29
+ * Returns { system: 'name-or-null', commands: string[] }
30
+ */
31
+ function extractCiCommands(dir) {
32
+ const commands = [];
33
+ let primary = null;
34
+
35
+ // GitHub Actions
36
+ const ghDir = path.join(dir, '.github', 'workflows');
37
+ if (fs.existsSync(ghDir)) {
38
+ primary = primary || 'github-actions';
39
+ for (const file of walkYaml(ghDir)) {
40
+ const content = safeRead(file);
41
+ commands.push(...extractRunCommands(content));
42
+ }
43
+ }
44
+
45
+ // GitLab CI
46
+ const gitlabFile = path.join(dir, '.gitlab-ci.yml');
47
+ if (fs.existsSync(gitlabFile)) {
48
+ primary = primary || 'gitlab-ci';
49
+ commands.push(...extractGitlabCommands(safeRead(gitlabFile)));
50
+ }
51
+
52
+ // CircleCI
53
+ const circleFile = path.join(dir, '.circleci', 'config.yml');
54
+ if (fs.existsSync(circleFile)) {
55
+ primary = primary || 'circle-ci';
56
+ commands.push(...extractCircleCommands(safeRead(circleFile)));
57
+ }
58
+
59
+ // Travis CI
60
+ const travisFile = path.join(dir, '.travis.yml');
61
+ if (fs.existsSync(travisFile)) {
62
+ primary = primary || 'travis-ci';
63
+ commands.push(...extractTravisCommands(safeRead(travisFile)));
64
+ }
65
+
66
+ // Azure Pipelines
67
+ for (const azureFile of ['azure-pipelines.yml', 'azure-pipelines.yaml']) {
68
+ const p = path.join(dir, azureFile);
69
+ if (fs.existsSync(p)) {
70
+ primary = primary || 'azure-pipelines';
71
+ commands.push(...extractAzureCommands(safeRead(p)));
72
+ }
73
+ }
74
+ const azureDir = path.join(dir, '.azure-pipelines');
75
+ if (fs.existsSync(azureDir)) {
76
+ primary = primary || 'azure-pipelines';
77
+ for (const file of walkYaml(azureDir)) {
78
+ commands.push(...extractAzureCommands(safeRead(file)));
79
+ }
80
+ }
81
+
82
+ // Buildkite
83
+ for (const bkFile of ['.buildkite/pipeline.yml', '.buildkite/pipeline.yaml']) {
84
+ const p = path.join(dir, bkFile);
85
+ if (fs.existsSync(p)) {
86
+ primary = primary || 'buildkite';
87
+ commands.push(...extractBuildkiteCommands(safeRead(p)));
88
+ }
89
+ }
90
+
91
+ // Drone
92
+ const droneFile = path.join(dir, '.drone.yml');
93
+ if (fs.existsSync(droneFile)) {
94
+ primary = primary || 'drone';
95
+ commands.push(...extractDroneCommands(safeRead(droneFile)));
96
+ }
97
+
98
+ // Woodpecker
99
+ const woodFile = path.join(dir, '.woodpecker.yml');
100
+ if (fs.existsSync(woodFile)) {
101
+ primary = primary || 'woodpecker';
102
+ commands.push(...extractDroneCommands(safeRead(woodFile))); // same format
103
+ }
104
+ const woodDir = path.join(dir, '.woodpecker');
105
+ if (fs.existsSync(woodDir)) {
106
+ primary = primary || 'woodpecker';
107
+ for (const file of walkYaml(woodDir)) {
108
+ commands.push(...extractDroneCommands(safeRead(file)));
109
+ }
110
+ }
111
+
112
+ // Bitbucket
113
+ const bbFile = path.join(dir, 'bitbucket-pipelines.yml');
114
+ if (fs.existsSync(bbFile)) {
115
+ primary = primary || 'bitbucket';
116
+ commands.push(...extractBitbucketCommands(safeRead(bbFile)));
117
+ }
118
+
119
+ // Jenkins — Jenkinsfiles are Groovy, not YAML. We do not try to parse them.
120
+ if (fs.existsSync(path.join(dir, 'Jenkinsfile'))) {
121
+ primary = primary || 'jenkins';
122
+ }
123
+
124
+ return { system: primary, commands };
125
+ }
126
+
127
+ function walkYaml(dir) {
128
+ const out = [];
129
+ try {
130
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
131
+ const full = path.join(dir, entry.name);
132
+ if (entry.isDirectory()) out.push(...walkYaml(full));
133
+ else if (entry.name.endsWith('.yml') || entry.name.endsWith('.yaml')) {
134
+ out.push(full);
135
+ }
136
+ }
137
+ } catch { /* skip */ }
138
+ return out;
139
+ }
140
+
141
+ // --- GitLab CI -------------------------------------------------------------
142
+
143
+ /**
144
+ * GitLab CI uses `script:`, `before_script:`, `after_script:` keys containing
145
+ * either a single string or a list of strings.
146
+ */
147
+ function extractGitlabCommands(content) {
148
+ return extractYamlListField(content, ['script', 'before_script', 'after_script']);
149
+ }
150
+
151
+ // --- CircleCI --------------------------------------------------------------
152
+
153
+ /**
154
+ * CircleCI uses `run: cmd` (inline) or `run: { command: "..." }` or
155
+ * `run: { command: | ... }` inside a steps: array.
156
+ */
157
+ function extractCircleCommands(content) {
158
+ const commands = [];
159
+ const lines = content.split(/\r?\n/);
160
+ for (let i = 0; i < lines.length; i++) {
161
+ const line = lines[i];
162
+ // Inline: - run: npm test
163
+ const inline = line.match(/^\s*-?\s*run:\s*(.+)$/);
164
+ if (inline) {
165
+ const rest = inline[1].trim();
166
+ if (rest && !rest.startsWith('#') && !rest.startsWith('|') && !rest.startsWith('>') &&
167
+ !rest.startsWith('{') && !rest.startsWith('name:') && !rest.startsWith('command:')) {
168
+ commands.push(rest.replace(/^["']|["']$/g, ''));
169
+ }
170
+ }
171
+ // Nested: command: ...
172
+ const cmdMatch = line.match(/^\s*command:\s*(.+)$/);
173
+ if (cmdMatch) {
174
+ const rest = cmdMatch[1].trim();
175
+ if (rest && !rest.startsWith('|') && !rest.startsWith('>') && !rest.startsWith('#')) {
176
+ commands.push(rest.replace(/^["']|["']$/g, ''));
177
+ } else if (rest === '|' || rest === '>-' || rest.startsWith('|') || rest.startsWith('>')) {
178
+ // Block scalar — collect following lines with greater indent
179
+ const baseIndent = (line.match(/^(\s*)/) || ['', ''])[1].length;
180
+ for (let j = i + 1; j < lines.length; j++) {
181
+ const inner = lines[j];
182
+ if (inner.trim() === '') continue;
183
+ const innerIndent = (inner.match(/^(\s*)/) || ['', ''])[1].length;
184
+ if (innerIndent <= baseIndent) break;
185
+ commands.push(inner.trim());
186
+ }
187
+ }
188
+ }
189
+ }
190
+ return commands;
191
+ }
192
+
193
+ // --- Travis CI -------------------------------------------------------------
194
+
195
+ function extractTravisCommands(content) {
196
+ return extractYamlListField(content, ['script', 'before_script', 'install']);
197
+ }
198
+
199
+ // --- Azure Pipelines -------------------------------------------------------
200
+
201
+ /**
202
+ * Azure Pipelines uses `- script: cmd` or `- bash: cmd` or `- pwsh: cmd`.
203
+ */
204
+ function extractAzureCommands(content) {
205
+ const commands = [];
206
+ const lines = content.split(/\r?\n/);
207
+ for (let i = 0; i < lines.length; i++) {
208
+ const line = lines[i];
209
+ const m = line.match(/^\s*-?\s*(script|bash|pwsh|powershell):\s*(.*)$/);
210
+ if (!m) continue;
211
+ const rest = m[2].trim();
212
+ if (/^[|>][+-]?\s*$/.test(rest)) {
213
+ // Block scalar
214
+ const baseIndent = (line.match(/^(\s*)/) || ['', ''])[1].length;
215
+ for (let j = i + 1; j < lines.length; j++) {
216
+ const inner = lines[j];
217
+ if (inner.trim() === '') continue;
218
+ const innerIndent = (inner.match(/^(\s*)/) || ['', ''])[1].length;
219
+ if (innerIndent <= baseIndent) break;
220
+ commands.push(inner.trim());
221
+ }
222
+ } else if (rest && !rest.startsWith('#')) {
223
+ commands.push(rest.replace(/^["']|["']$/g, ''));
224
+ }
225
+ }
226
+ return commands;
227
+ }
228
+
229
+ // --- Buildkite -------------------------------------------------------------
230
+
231
+ /**
232
+ * Buildkite uses `command: cmd` (single) or `commands: [list]`.
233
+ */
234
+ function extractBuildkiteCommands(content) {
235
+ return extractYamlListField(content, ['command', 'commands']);
236
+ }
237
+
238
+ // --- Drone / Woodpecker ----------------------------------------------------
239
+
240
+ /**
241
+ * Drone and Woodpecker use `commands:` lists inside pipeline steps.
242
+ */
243
+ function extractDroneCommands(content) {
244
+ return extractYamlListField(content, ['commands']);
245
+ }
246
+
247
+ // --- Bitbucket Pipelines ---------------------------------------------------
248
+
249
+ /**
250
+ * Bitbucket uses `script: [list]` inside step: blocks.
251
+ */
252
+ function extractBitbucketCommands(content) {
253
+ return extractYamlListField(content, ['script']);
254
+ }
255
+
256
+ // --- Generic YAML list field extractor -------------------------------------
257
+
258
+ /**
259
+ * Extract commands from YAML keys that can be either a single string or a
260
+ * list of strings. Handles both inline and block-scalar forms. This is the
261
+ * workhorse used by GitLab, Travis, Buildkite, Drone, Bitbucket.
262
+ *
263
+ * It is deliberately heuristic — a full YAML parser would be more accurate
264
+ * but we don't ship dependencies. The parser accepts false positives (which
265
+ * normalize.js filters) over missing real gates.
266
+ */
267
+ function extractYamlListField(content, fields) {
268
+ const commands = [];
269
+ const lines = content.split(/\r?\n/);
270
+ const fieldRegex = new RegExp('^(\\s*)-?\\s*(' + fields.join('|') + '):\\s*(.*)$');
271
+
272
+ for (let i = 0; i < lines.length; i++) {
273
+ const line = lines[i];
274
+ const m = line.match(fieldRegex);
275
+ if (!m) continue;
276
+
277
+ const baseIndent = m[1].length;
278
+ const rest = m[3].trim();
279
+
280
+ if (!rest) {
281
+ // List form: field: then lines below are " - cmd"
282
+ for (let j = i + 1; j < lines.length; j++) {
283
+ const inner = lines[j];
284
+ if (inner.trim() === '') continue;
285
+ const indentMatch = inner.match(/^(\s*)/);
286
+ const innerIndent = indentMatch[1].length;
287
+ if (innerIndent <= baseIndent) break;
288
+ const listItem = inner.match(/^\s*-\s*(.+)$/);
289
+ if (listItem) {
290
+ commands.push(listItem[1].trim().replace(/^["']|["']$/g, ''));
291
+ }
292
+ }
293
+ } else if (/^[|>][+-]?\s*$/.test(rest)) {
294
+ // Block scalar
295
+ for (let j = i + 1; j < lines.length; j++) {
296
+ const inner = lines[j];
297
+ if (inner.trim() === '') continue;
298
+ const indentMatch = inner.match(/^(\s*)/);
299
+ if (indentMatch[1].length <= baseIndent) break;
300
+ commands.push(inner.trim());
301
+ }
302
+ } else if (rest.startsWith('[')) {
303
+ // Inline list: script: [cmd1, cmd2]
304
+ const inner = rest.slice(1, rest.indexOf(']') === -1 ? rest.length : rest.indexOf(']'));
305
+ for (const item of inner.split(',')) {
306
+ const trimmed = item.trim().replace(/^["']|["']$/g, '');
307
+ if (trimmed) commands.push(trimmed);
308
+ }
309
+ } else if (!rest.startsWith('#')) {
310
+ commands.push(rest.replace(/^["']|["']$/g, ''));
311
+ }
312
+ }
313
+
314
+ return commands;
315
+ }
316
+
317
+ module.exports = { extractCiCommands, walkYaml, extractYamlListField };
@@ -0,0 +1,142 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Documentation-based gate mining.
5
+ *
6
+ * A CONTRIBUTING.md that says "Before submitting a PR, run `make test`
7
+ * and `make lint`" is as authoritative as a CI workflow — maintainers
8
+ * codify their expectations there. We scan these files for shell commands
9
+ * in code fences and in inline backticks that look like gate candidates.
10
+ *
11
+ * Mined gates are returned as ADVISORY — the user should confirm them
12
+ * rather than have them enforced immediately.
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+ const { safeRead } = require('./stacks');
18
+
19
+ const DOC_FILES = [
20
+ 'CONTRIBUTING.md',
21
+ 'CONTRIBUTING',
22
+ '.github/CONTRIBUTING.md',
23
+ 'docs/CONTRIBUTING.md',
24
+ '.github/PULL_REQUEST_TEMPLATE.md',
25
+ '.github/pull_request_template.md',
26
+ 'DEVELOPING.md',
27
+ 'DEVELOPMENT.md',
28
+ 'HACKING.md',
29
+ ];
30
+
31
+ const GATE_COMMAND_PATTERNS = [
32
+ /^make\s+\w+/,
33
+ /^just\s+\w+/,
34
+ /^task\s+\w+/,
35
+ /^npm\s+(run|test|ci)/,
36
+ /^yarn\s+(test|lint|build|check)/,
37
+ /^pnpm\s+(test|lint|build|check|run)/,
38
+ /^bun\s+(test|run)/,
39
+ /^cargo\s+(test|check|clippy|fmt)/,
40
+ /^go\s+(test|vet|build)/,
41
+ /^pytest/,
42
+ /^python\s+-m\s+pytest/,
43
+ /^tox\s+run/,
44
+ /^uv\s+run/,
45
+ /^poetry\s+run/,
46
+ /^pdm\s+run/,
47
+ /^hatch\s+run/,
48
+ /^nox(\s|$)/,
49
+ /^ruff\s+(check|format)/,
50
+ /^mypy\s/,
51
+ /^black\s/,
52
+ /^bundle\s+exec\s+(rspec|rake|rubocop)/,
53
+ /^composer\s+(test|lint)/,
54
+ /^vendor\/bin\/(phpunit|phpcs|phpstan|pest)/,
55
+ /^mix\s+(test|format|credo)/,
56
+ /^dotnet\s+(test|build|format)/,
57
+ /^swift\s+(test|build)/,
58
+ /^mvn\s+(test|verify)/,
59
+ /^\.\/(mvnw|gradlew)\s/,
60
+ /^gradle\s/,
61
+ /^terraform\s+(fmt|validate)/,
62
+ /^helm\s+lint/,
63
+ ];
64
+
65
+ /**
66
+ * Mine gate candidates from contributor documentation.
67
+ * Returns an array of { command, source } where source is the relative path
68
+ * of the file the command was found in. Duplicates are removed.
69
+ *
70
+ * Doc mining is conservative: it only keeps commands that match canonical
71
+ * patterns (test/lint/build/check verbs) and caps the output at
72
+ * `opts.maxCandidates` (default 5) to avoid overwhelming governance with
73
+ * every example snippet.
74
+ */
75
+ function mineDocGates(dir, opts = {}) {
76
+ const { maxCandidates = 5 } = opts;
77
+ const candidates = new Map(); // command → source
78
+
79
+ for (const relPath of DOC_FILES) {
80
+ const full = path.join(dir, relPath);
81
+ if (!fs.existsSync(full)) continue;
82
+ const content = safeRead(full);
83
+ if (!content) continue;
84
+
85
+ // Code fences — multi-line blocks
86
+ const fenceMatches = content.matchAll(/```(?:bash|sh|shell|console)?\n([\s\S]*?)```/g);
87
+ for (const match of fenceMatches) {
88
+ for (const line of match[1].split(/\r?\n/)) {
89
+ const cleaned = cleanCommandLine(line);
90
+ if (cleaned && isGateCandidate(cleaned) && looksCanonical(cleaned) && !candidates.has(cleaned)) {
91
+ candidates.set(cleaned, relPath);
92
+ }
93
+ }
94
+ }
95
+
96
+ // Inline backticks — single-line snippets that look like commands
97
+ const inlineMatches = content.matchAll(/`([^`\n]+)`/g);
98
+ for (const match of inlineMatches) {
99
+ const cleaned = cleanCommandLine(match[1]);
100
+ if (cleaned && isGateCandidate(cleaned) && looksCanonical(cleaned) && !candidates.has(cleaned)) {
101
+ candidates.set(cleaned, relPath);
102
+ }
103
+ }
104
+ }
105
+
106
+ const list = [...candidates.entries()].map(([command, source]) => ({ command, source }));
107
+ return list.slice(0, maxCandidates);
108
+ }
109
+
110
+ /**
111
+ * A command "looks canonical" if it names a real gate verb (test/lint/build/
112
+ * fmt/format/check/typecheck) without placeholder markers that imply it's a
113
+ * partial example (like `pnpm run test-serve [match]`).
114
+ */
115
+ function looksCanonical(cmd) {
116
+ // Reject commands containing placeholder markers
117
+ if (/\[.*?\]/.test(cmd)) return false;
118
+ if (/\{.*?\}/.test(cmd)) return false;
119
+ if (cmd.includes('<') && cmd.includes('>')) return false;
120
+ // Reject extremely long examples (typically worked examples, not gates)
121
+ if (cmd.split(/\s+/).length > 8) return false;
122
+ // Accept any command that contains a gate verb as one of its tokens
123
+ const verbs = /\b(test|tests|spec|lint|build|check|fmt|format|typecheck|type-check|verify|validate|clippy|vet|rspec|rubocop|phpunit|phpstan|analyse|credo|dialyzer|pytest|mypy|ruff|black)\b/;
124
+ return verbs.test(cmd);
125
+ }
126
+
127
+ function cleanCommandLine(line) {
128
+ let cleaned = line.trim();
129
+ // Strip shell prompts like "$ ", "> ", "# "
130
+ cleaned = cleaned.replace(/^[$#>]\s+/, '');
131
+ // Strip trailing comments
132
+ cleaned = cleaned.replace(/\s+#.*$/, '');
133
+ return cleaned;
134
+ }
135
+
136
+ function isGateCandidate(cmd) {
137
+ if (!cmd || cmd.length > 120) return false;
138
+ if (cmd.includes('\n')) return false;
139
+ return GATE_COMMAND_PATTERNS.some(p => p.test(cmd));
140
+ }
141
+
142
+ module.exports = { mineDocGates, isGateCandidate };