npm - @whitehatd/crag - Versions diffs - 0.2.3 → 0.2.4 - Mend

@whitehatd/crag 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +2 -1
package/package.json +1 -1
package/src/analyze/ci-extractors.js +317 -0
package/src/analyze/doc-mining.js +142 -0
package/src/analyze/gates.js +417 -0
package/src/analyze/normalize.js +146 -0
package/src/analyze/stacks.js +453 -0
package/src/analyze/task-runners.js +146 -0
package/src/commands/analyze.js +158 -205
package/src/governance/yaml-run.js +58 -2

package/README.md CHANGED Viewed

@@ -5,7 +5,8 @@
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](./LICENSE)
 [![Node](https://img.shields.io/node/v/%40whitehatd%2Fcrag)](https://nodejs.org)
 [![Zero dependencies](https://img.shields.io/badge/dependencies-0-brightgreen)](./package.json)
-[![159 tests](https://img.shields.io/badge/tests-159%20passing-brightgreen)](./test)
+[![228 tests](https://img.shields.io/badge/tests-228%20passing-brightgreen)](./test)
+[![Security hardened](https://img.shields.io/badge/security-hardened-brightgreen)](./SECURITY.md)
 **The bedrock layer for AI coding agents. One `governance.md`. Any project. Never stale.**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@whitehatd/crag",
-  "version": "0.2.3",
+  "version": "0.2.4",
   "description": "The bedrock layer for AI coding agents. One governance.md. Any project. Never stale.",
   "bin": {
     "crag": "bin/crag.js"

package/src/analyze/ci-extractors.js ADDED Viewed

@@ -0,0 +1,317 @@
+'use strict';
+/**
+ * Multi-CI step extraction.
+ *
+ * The GitHub Actions path already lives in src/governance/yaml-run.js
+ * (extractRunCommands) and we reuse it here. This module adds support for:
+ *   - GitLab CI          (.gitlab-ci.yml)
+ *   - CircleCI           (.circleci/config.yml)
+ *   - Travis CI          (.travis.yml)
+ *   - Azure Pipelines    (azure-pipelines.yml, .azure-pipelines/)
+ *   - Buildkite          (.buildkite/pipeline.yml, .buildkite/pipeline.yaml)
+ *   - Drone              (.drone.yml)
+ *   - Woodpecker         (.woodpecker.yml, .woodpecker/*.yml)
+ *   - Bitbucket          (bitbucket-pipelines.yml)
+ *
+ * Each extractor returns a list of raw shell command strings. The CI
+ * normalizer (normalize.js) dedups and filters them uniformly regardless
+ * of which CI system they came from.
+ */
+const fs = require('fs');
+const path = require('path');
+const { extractRunCommands } = require('../governance/yaml-run');
+const { safeRead } = require('./stacks');
+/**
+ * Detect which CI system(s) a project uses and extract commands from each.
+ * Returns { system: 'name-or-null', commands: string[] }
+ */
+function extractCiCommands(dir) {
+  const commands = [];
+  let primary = null;
+  // GitHub Actions
+  const ghDir = path.join(dir, '.github', 'workflows');
+  if (fs.existsSync(ghDir)) {
+    primary = primary || 'github-actions';
+    for (const file of walkYaml(ghDir)) {
+      const content = safeRead(file);
+      commands.push(...extractRunCommands(content));
+    }
+  }
+  // GitLab CI
+  const gitlabFile = path.join(dir, '.gitlab-ci.yml');
+  if (fs.existsSync(gitlabFile)) {
+    primary = primary || 'gitlab-ci';
+    commands.push(...extractGitlabCommands(safeRead(gitlabFile)));
+  }
+  // CircleCI
+  const circleFile = path.join(dir, '.circleci', 'config.yml');
+  if (fs.existsSync(circleFile)) {
+    primary = primary || 'circle-ci';
+    commands.push(...extractCircleCommands(safeRead(circleFile)));
+  }
+  // Travis CI
+  const travisFile = path.join(dir, '.travis.yml');
+  if (fs.existsSync(travisFile)) {
+    primary = primary || 'travis-ci';
+    commands.push(...extractTravisCommands(safeRead(travisFile)));
+  }
+  // Azure Pipelines
+  for (const azureFile of ['azure-pipelines.yml', 'azure-pipelines.yaml']) {
+    const p = path.join(dir, azureFile);
+    if (fs.existsSync(p)) {
+      primary = primary || 'azure-pipelines';
+      commands.push(...extractAzureCommands(safeRead(p)));
+    }
+  }
+  const azureDir = path.join(dir, '.azure-pipelines');
+  if (fs.existsSync(azureDir)) {
+    primary = primary || 'azure-pipelines';
+    for (const file of walkYaml(azureDir)) {
+      commands.push(...extractAzureCommands(safeRead(file)));
+    }
+  }
+  // Buildkite
+  for (const bkFile of ['.buildkite/pipeline.yml', '.buildkite/pipeline.yaml']) {
+    const p = path.join(dir, bkFile);
+    if (fs.existsSync(p)) {
+      primary = primary || 'buildkite';
+      commands.push(...extractBuildkiteCommands(safeRead(p)));
+    }
+  }
+  // Drone
+  const droneFile = path.join(dir, '.drone.yml');
+  if (fs.existsSync(droneFile)) {
+    primary = primary || 'drone';
+    commands.push(...extractDroneCommands(safeRead(droneFile)));
+  }
+  // Woodpecker
+  const woodFile = path.join(dir, '.woodpecker.yml');
+  if (fs.existsSync(woodFile)) {
+    primary = primary || 'woodpecker';
+    commands.push(...extractDroneCommands(safeRead(woodFile))); // same format
+  }
+  const woodDir = path.join(dir, '.woodpecker');
+  if (fs.existsSync(woodDir)) {
+    primary = primary || 'woodpecker';
+    for (const file of walkYaml(woodDir)) {
+      commands.push(...extractDroneCommands(safeRead(file)));
+    }
+  }
+  // Bitbucket
+  const bbFile = path.join(dir, 'bitbucket-pipelines.yml');
+  if (fs.existsSync(bbFile)) {
+    primary = primary || 'bitbucket';
+    commands.push(...extractBitbucketCommands(safeRead(bbFile)));
+  }
+  // Jenkins — Jenkinsfiles are Groovy, not YAML. We do not try to parse them.
+  if (fs.existsSync(path.join(dir, 'Jenkinsfile'))) {
+    primary = primary || 'jenkins';
+  }
+  return { system: primary, commands };
+}
+function walkYaml(dir) {
+  const out = [];
+  try {
+    for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
+      const full = path.join(dir, entry.name);
+      if (entry.isDirectory()) out.push(...walkYaml(full));
+      else if (entry.name.endsWith('.yml') || entry.name.endsWith('.yaml')) {
+        out.push(full);
+      }
+    }
+  } catch { /* skip */ }
+  return out;
+}
+// --- GitLab CI -------------------------------------------------------------
+/**
+ * GitLab CI uses `script:`, `before_script:`, `after_script:` keys containing
+ * either a single string or a list of strings.
+ */
+function extractGitlabCommands(content) {
+  return extractYamlListField(content, ['script', 'before_script', 'after_script']);
+}
+// --- CircleCI --------------------------------------------------------------
+/**
+ * CircleCI uses `run: cmd` (inline) or `run: { command: "..." }` or
+ * `run: { command: | ... }` inside a steps: array.
+ */
+function extractCircleCommands(content) {
+  const commands = [];
+  const lines = content.split(/\r?\n/);
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    // Inline: - run: npm test
+    const inline = line.match(/^\s*-?\s*run:\s*(.+)$/);
+    if (inline) {
+      const rest = inline[1].trim();
+      if (rest && !rest.startsWith('#') && !rest.startsWith('|') && !rest.startsWith('>') &&
+          !rest.startsWith('{') && !rest.startsWith('name:') && !rest.startsWith('command:')) {
+        commands.push(rest.replace(/^["']|["']$/g, ''));
+      }
+    }
+    // Nested: command: ...
+    const cmdMatch = line.match(/^\s*command:\s*(.+)$/);
+    if (cmdMatch) {
+      const rest = cmdMatch[1].trim();
+      if (rest && !rest.startsWith('|') && !rest.startsWith('>') && !rest.startsWith('#')) {
+        commands.push(rest.replace(/^["']|["']$/g, ''));
+      } else if (rest === '|' || rest === '>-' || rest.startsWith('|') || rest.startsWith('>')) {
+        // Block scalar — collect following lines with greater indent
+        const baseIndent = (line.match(/^(\s*)/) || ['', ''])[1].length;
+        for (let j = i + 1; j < lines.length; j++) {
+          const inner = lines[j];
+          if (inner.trim() === '') continue;
+          const innerIndent = (inner.match(/^(\s*)/) || ['', ''])[1].length;
+          if (innerIndent <= baseIndent) break;
+          commands.push(inner.trim());
+        }
+      }
+    }
+  }
+  return commands;
+}
+// --- Travis CI -------------------------------------------------------------
+function extractTravisCommands(content) {
+  return extractYamlListField(content, ['script', 'before_script', 'install']);
+}
+// --- Azure Pipelines -------------------------------------------------------
+/**
+ * Azure Pipelines uses `- script: cmd` or `- bash: cmd` or `- pwsh: cmd`.
+ */
+function extractAzureCommands(content) {
+  const commands = [];
+  const lines = content.split(/\r?\n/);
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const m = line.match(/^\s*-?\s*(script|bash|pwsh|powershell):\s*(.*)$/);
+    if (!m) continue;
+    const rest = m[2].trim();
+    if (/^[|>][+-]?\s*$/.test(rest)) {
+      // Block scalar
+      const baseIndent = (line.match(/^(\s*)/) || ['', ''])[1].length;
+      for (let j = i + 1; j < lines.length; j++) {
+        const inner = lines[j];
+        if (inner.trim() === '') continue;
+        const innerIndent = (inner.match(/^(\s*)/) || ['', ''])[1].length;
+        if (innerIndent <= baseIndent) break;
+        commands.push(inner.trim());
+      }
+    } else if (rest && !rest.startsWith('#')) {
+      commands.push(rest.replace(/^["']|["']$/g, ''));
+    }
+  }
+  return commands;
+}
+// --- Buildkite -------------------------------------------------------------
+/**
+ * Buildkite uses `command: cmd` (single) or `commands: [list]`.
+ */
+function extractBuildkiteCommands(content) {
+  return extractYamlListField(content, ['command', 'commands']);
+}
+// --- Drone / Woodpecker ----------------------------------------------------
+/**
+ * Drone and Woodpecker use `commands:` lists inside pipeline steps.
+ */
+function extractDroneCommands(content) {
+  return extractYamlListField(content, ['commands']);
+}
+// --- Bitbucket Pipelines ---------------------------------------------------
+/**
+ * Bitbucket uses `script: [list]` inside step: blocks.
+ */
+function extractBitbucketCommands(content) {
+  return extractYamlListField(content, ['script']);
+}
+// --- Generic YAML list field extractor -------------------------------------
+/**
+ * Extract commands from YAML keys that can be either a single string or a
+ * list of strings. Handles both inline and block-scalar forms. This is the
+ * workhorse used by GitLab, Travis, Buildkite, Drone, Bitbucket.
+ *
+ * It is deliberately heuristic — a full YAML parser would be more accurate
+ * but we don't ship dependencies. The parser accepts false positives (which
+ * normalize.js filters) over missing real gates.
+ */
+function extractYamlListField(content, fields) {
+  const commands = [];
+  const lines = content.split(/\r?\n/);
+  const fieldRegex = new RegExp('^(\\s*)-?\\s*(' + fields.join('|') + '):\\s*(.*)$');
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const m = line.match(fieldRegex);
+    if (!m) continue;
+    const baseIndent = m[1].length;
+    const rest = m[3].trim();
+    if (!rest) {
+      // List form: field: then lines below are "  - cmd"
+      for (let j = i + 1; j < lines.length; j++) {
+        const inner = lines[j];
+        if (inner.trim() === '') continue;
+        const indentMatch = inner.match(/^(\s*)/);
+        const innerIndent = indentMatch[1].length;
+        if (innerIndent <= baseIndent) break;
+        const listItem = inner.match(/^\s*-\s*(.+)$/);
+        if (listItem) {
+          commands.push(listItem[1].trim().replace(/^["']|["']$/g, ''));
+        }
+      }
+    } else if (/^[|>][+-]?\s*$/.test(rest)) {
+      // Block scalar
+      for (let j = i + 1; j < lines.length; j++) {
+        const inner = lines[j];
+        if (inner.trim() === '') continue;
+        const indentMatch = inner.match(/^(\s*)/);
+        if (indentMatch[1].length <= baseIndent) break;
+        commands.push(inner.trim());
+      }
+    } else if (rest.startsWith('[')) {
+      // Inline list: script: [cmd1, cmd2]
+      const inner = rest.slice(1, rest.indexOf(']') === -1 ? rest.length : rest.indexOf(']'));
+      for (const item of inner.split(',')) {
+        const trimmed = item.trim().replace(/^["']|["']$/g, '');
+        if (trimmed) commands.push(trimmed);
+      }
+    } else if (!rest.startsWith('#')) {
+      commands.push(rest.replace(/^["']|["']$/g, ''));
+    }
+  }
+  return commands;
+}
+module.exports = { extractCiCommands, walkYaml, extractYamlListField };

package/src/analyze/doc-mining.js ADDED Viewed

@@ -0,0 +1,142 @@
+'use strict';
+/**
+ * Documentation-based gate mining.
+ *
+ * A CONTRIBUTING.md that says "Before submitting a PR, run `make test`
+ * and `make lint`" is as authoritative as a CI workflow — maintainers
+ * codify their expectations there. We scan these files for shell commands
+ * in code fences and in inline backticks that look like gate candidates.
+ *
+ * Mined gates are returned as ADVISORY — the user should confirm them
+ * rather than have them enforced immediately.
+ */
+const fs = require('fs');
+const path = require('path');
+const { safeRead } = require('./stacks');
+const DOC_FILES = [
+  'CONTRIBUTING.md',
+  'CONTRIBUTING',
+  '.github/CONTRIBUTING.md',
+  'docs/CONTRIBUTING.md',
+  '.github/PULL_REQUEST_TEMPLATE.md',
+  '.github/pull_request_template.md',
+  'DEVELOPING.md',
+  'DEVELOPMENT.md',
+  'HACKING.md',
+];
+const GATE_COMMAND_PATTERNS = [
+  /^make\s+\w+/,
+  /^just\s+\w+/,
+  /^task\s+\w+/,
+  /^npm\s+(run|test|ci)/,
+  /^yarn\s+(test|lint|build|check)/,
+  /^pnpm\s+(test|lint|build|check|run)/,
+  /^bun\s+(test|run)/,
+  /^cargo\s+(test|check|clippy|fmt)/,
+  /^go\s+(test|vet|build)/,
+  /^pytest/,
+  /^python\s+-m\s+pytest/,
+  /^tox\s+run/,
+  /^uv\s+run/,
+  /^poetry\s+run/,
+  /^pdm\s+run/,
+  /^hatch\s+run/,
+  /^nox(\s|$)/,
+  /^ruff\s+(check|format)/,
+  /^mypy\s/,
+  /^black\s/,
+  /^bundle\s+exec\s+(rspec|rake|rubocop)/,
+  /^composer\s+(test|lint)/,
+  /^vendor\/bin\/(phpunit|phpcs|phpstan|pest)/,
+  /^mix\s+(test|format|credo)/,
+  /^dotnet\s+(test|build|format)/,
+  /^swift\s+(test|build)/,
+  /^mvn\s+(test|verify)/,
+  /^\.\/(mvnw|gradlew)\s/,
+  /^gradle\s/,
+  /^terraform\s+(fmt|validate)/,
+  /^helm\s+lint/,
+];
+/**
+ * Mine gate candidates from contributor documentation.
+ * Returns an array of { command, source } where source is the relative path
+ * of the file the command was found in. Duplicates are removed.
+ *
+ * Doc mining is conservative: it only keeps commands that match canonical
+ * patterns (test/lint/build/check verbs) and caps the output at
+ * `opts.maxCandidates` (default 5) to avoid overwhelming governance with
+ * every example snippet.
+ */
+function mineDocGates(dir, opts = {}) {
+  const { maxCandidates = 5 } = opts;
+  const candidates = new Map(); // command → source
+  for (const relPath of DOC_FILES) {
+    const full = path.join(dir, relPath);
+    if (!fs.existsSync(full)) continue;
+    const content = safeRead(full);
+    if (!content) continue;
+    // Code fences — multi-line blocks
+    const fenceMatches = content.matchAll(/```(?:bash|sh|shell|console)?\n([\s\S]*?)```/g);
+    for (const match of fenceMatches) {
+      for (const line of match[1].split(/\r?\n/)) {
+        const cleaned = cleanCommandLine(line);
+        if (cleaned && isGateCandidate(cleaned) && looksCanonical(cleaned) && !candidates.has(cleaned)) {
+          candidates.set(cleaned, relPath);
+        }
+      }
+    }
+    // Inline backticks — single-line snippets that look like commands
+    const inlineMatches = content.matchAll(/`([^`\n]+)`/g);
+    for (const match of inlineMatches) {
+      const cleaned = cleanCommandLine(match[1]);
+      if (cleaned && isGateCandidate(cleaned) && looksCanonical(cleaned) && !candidates.has(cleaned)) {
+        candidates.set(cleaned, relPath);
+      }
+    }
+  }
+  const list = [...candidates.entries()].map(([command, source]) => ({ command, source }));
+  return list.slice(0, maxCandidates);
+}
+/**
+ * A command "looks canonical" if it names a real gate verb (test/lint/build/
+ * fmt/format/check/typecheck) without placeholder markers that imply it's a
+ * partial example (like `pnpm run test-serve [match]`).
+ */
+function looksCanonical(cmd) {
+  // Reject commands containing placeholder markers
+  if (/\[.*?\]/.test(cmd)) return false;
+  if (/\{.*?\}/.test(cmd)) return false;
+  if (cmd.includes('<') && cmd.includes('>')) return false;
+  // Reject extremely long examples (typically worked examples, not gates)
+  if (cmd.split(/\s+/).length > 8) return false;
+  // Accept any command that contains a gate verb as one of its tokens
+  const verbs = /\b(test|tests|spec|lint|build|check|fmt|format|typecheck|type-check|verify|validate|clippy|vet|rspec|rubocop|phpunit|phpstan|analyse|credo|dialyzer|pytest|mypy|ruff|black)\b/;
+  return verbs.test(cmd);
+}
+function cleanCommandLine(line) {
+  let cleaned = line.trim();
+  // Strip shell prompts like "$ ", "> ", "# "
+  cleaned = cleaned.replace(/^[$#>]\s+/, '');
+  // Strip trailing comments
+  cleaned = cleaned.replace(/\s+#.*$/, '');
+  return cleaned;
+}
+function isGateCandidate(cmd) {
+  if (!cmd || cmd.length > 120) return false;
+  if (cmd.includes('\n')) return false;
+  return GATE_COMMAND_PATTERNS.some(p => p.test(cmd));
+}
+module.exports = { mineDocGates, isGateCandidate };