npm - @whitehatd/crag - Versions diffs - 0.2.2 → 0.2.4 - Mend

@whitehatd/crag 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +2 -1
package/package.json +1 -1
package/src/analyze/ci-extractors.js +317 -0
package/src/analyze/doc-mining.js +142 -0
package/src/analyze/gates.js +417 -0
package/src/analyze/normalize.js +146 -0
package/src/analyze/stacks.js +453 -0
package/src/analyze/task-runners.js +146 -0
package/src/cli-errors.js +55 -0
package/src/cli.js +10 -2
package/src/commands/analyze.js +185 -271
package/src/commands/check.js +67 -34
package/src/commands/compile.js +69 -22
package/src/commands/diff.js +10 -43
package/src/commands/init.js +55 -31
package/src/compile/atomic-write.js +12 -4
package/src/compile/github-actions.js +17 -11
package/src/compile/husky.js +6 -5
package/src/compile/pre-commit.js +13 -5
package/src/governance/gate-to-shell.js +11 -1
package/src/governance/parse.js +41 -3
package/src/governance/yaml-run.js +145 -0
package/src/skills/post-start-validation.md +1 -1
package/src/skills/pre-start-context.md +1 -1
package/src/update/integrity.js +20 -7
package/src/update/skill-sync.js +1 -1

package/src/governance/gate-to-shell.js CHANGED Viewed

@@ -3,11 +3,21 @@
 /**
  * Escape a string for safe inclusion inside double quotes in a shell command.
  * Escapes: backslash, backtick, dollar sign, double quote.
+ * Backslash MUST be replaced first so its replacement isn't re-escaped.
  */
 function shellEscapeDoubleQuoted(s) {
   return String(s).replace(/[\\`"$]/g, '\\$&');
 }
+/**
+ * Escape a string for safe inclusion inside single quotes in a shell command.
+ * Single quotes cannot be escaped inside single quotes — the standard pattern
+ * is to close the quote, emit an escaped quote, and reopen: 'foo'\''bar'.
+ */
+function shellEscapeSingleQuoted(s) {
+  return String(s).replace(/'/g, "'\\''");
+}
 /**
  * Convert human-readable gate descriptions to shell commands.
  * e.g. Verify src/skills/pre-start-context.md contains "discovers any project"
@@ -25,4 +35,4 @@ function gateToShell(cmd) {
   return cmd;
 }
-module.exports = { gateToShell, shellEscapeDoubleQuoted };
+module.exports = { gateToShell, shellEscapeDoubleQuoted, shellEscapeSingleQuoted };

package/src/governance/parse.js CHANGED Viewed

@@ -14,6 +14,30 @@
 // Protects against ReDoS on catastrophic-backtracking-prone regex.
 const MAX_CONTENT_SIZE = 256 * 1024; // 256 KB
+/**
+ * Validate an annotation path (used for `path:` and `if:` on gate sections).
+ *
+ * Rejects:
+ *   - Absolute paths (/, C:\, \\server\share)
+ *   - Parent traversal (..)
+ *   - Newlines or null bytes (defense against injection into generated YAML/shell)
+ *
+ * These paths are interpolated into shell commands and YAML scalars downstream
+ * (husky, pre-commit, github-actions), so the parser is the single chokepoint
+ * where untrusted path strings from governance.md become structured data.
+ */
+function isValidAnnotationPath(p) {
+  if (typeof p !== 'string' || p.length === 0) return false;
+  if (p.length > 512) return false;
+  if (/[\n\r\x00]/.test(p)) return false;
+  // POSIX absolute or Windows drive-letter / UNC
+  if (p.startsWith('/') || /^[A-Za-z]:[\\/]/.test(p) || p.startsWith('\\\\')) return false;
+  // Parent traversal (match as a path segment, not as substring of a name)
+  const segments = p.split(/[\\/]/);
+  if (segments.includes('..')) return false;
+  return true;
+}
 /**
  * Extract a markdown section body by heading name.
  * Starts after the first line matching `## <name>` (with optional trailing text),
@@ -90,8 +114,22 @@ function parseGovernance(content) {
       if (sub) {
         section = sub[1].trim().toLowerCase();
         sectionMeta = { path: null, condition: null };
-        if (sub[2] === 'path') sectionMeta.path = sub[3].trim();
-        if (sub[2] === 'if') sectionMeta.condition = sub[3].trim();
+        if (sub[2] === 'path') {
+          const raw = sub[3].trim();
+          if (isValidAnnotationPath(raw)) {
+            sectionMeta.path = raw;
+          } else {
+            result.warnings.push(`Invalid path annotation in section "${sub[1].trim()}": ${JSON.stringify(raw)} (must be a relative in-repo path without "..")`);
+          }
+        }
+        if (sub[2] === 'if') {
+          const raw = sub[3].trim();
+          if (isValidAnnotationPath(raw)) {
+            sectionMeta.condition = raw;
+          } else {
+            result.warnings.push(`Invalid if annotation in section "${sub[1].trim()}": ${JSON.stringify(raw)} (must be a relative in-repo path without "..")`);
+          }
+        }
         result.gates[section] = {
           commands: [],
           path: sectionMeta.path,
@@ -179,4 +217,4 @@ function flattenGatesRich(gates) {
   return out;
 }
-module.exports = { parseGovernance, flattenGates, flattenGatesRich, extractSection };
+module.exports = { parseGovernance, flattenGates, flattenGatesRich, extractSection, isValidAnnotationPath };

package/src/governance/yaml-run.js ADDED Viewed

@@ -0,0 +1,145 @@
+'use strict';
+/**
+ * Shared YAML `run:` command extraction for GitHub Actions workflows.
+ *
+ * Both `crag analyze` and `crag diff` need to enumerate the shell commands
+ * inside CI workflows to either generate gates from them or compare them
+ * against governance. This module is the single source of truth so a fix to
+ * the parser benefits both commands.
+ *
+ * Handles:
+ *   run: npm test                    (inline)
+ *   run: "npm test"                  (inline, quoted)
+ *   run: |                           (literal block scalar)
+ *     npm test
+ *     npm run build
+ *   run: >-                          (folded block scalar)
+ *     npm test
+ *
+ * Comment-only lines and blank lines inside blocks are skipped.
+ */
+function extractRunCommands(content) {
+  const commands = [];
+  const lines = String(content).split(/\r?\n/);
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const m = line.match(/^(\s*)-?\s*run:\s*(.*)$/);
+    if (!m) continue;
+    const baseIndent = m[1].length;
+    const rest = m[2].trim();
+    if (/^[|>][+-]?\s*$/.test(rest)) {
+      // Block scalar: collect following lines with greater indent than the key
+      for (let j = i + 1; j < lines.length; j++) {
+        const ln = lines[j];
+        if (ln.trim() === '') continue;
+        const indentMatch = ln.match(/^(\s*)/);
+        if (indentMatch[1].length <= baseIndent) break;
+        const trimmed = ln.trim();
+        if (trimmed && !trimmed.startsWith('#')) commands.push(trimmed);
+      }
+    } else if (rest && !rest.startsWith('#')) {
+      // Inline: strip surrounding single/double quotes if present
+      commands.push(rest.replace(/^["']|["']$/g, ''));
+    }
+  }
+  return commands;
+}
+/**
+ * Classify a shell command as a "gate" — i.e., a quality check that belongs
+ * in governance.md (test, lint, typecheck, build, etc.) as opposed to
+ * deployment, git operations, or environment setup.
+ *
+ * This is a heuristic and intentionally conservative: false positives
+ * (extra gates) are easier to spot than false negatives (missing gates).
+ */
+function isGateCommand(cmd) {
+  const patterns = [
+    // Node ecosystem
+    /\bnpm (run |ci|test|install)/,
+    /\bnpx /,
+    /\bnode /,
+    /\byarn (test|lint|build|check)/,
+    /\bpnpm (run |test|lint|build|check|install|i\b)/,
+    /\bbun (test|run)/,
+    /\bdeno (test|lint|fmt|check)/,
+    // Rust
+    /\bcargo (test|build|check|clippy|fmt)/,
+    /\brustfmt/,
+    // Go
+    /\bgo (test|build|vet)/,
+    /\bgolangci-lint/,
+    // Python — direct + modern runner wrappers
+    /\bpytest/,
+    /\bpython -m/,
+    /\bruff/,
+    /\bmypy/,
+    /\bflake8/,
+    /\bblack\b/,
+    /\bisort\b/,
+    /\bpylint\b/,
+    /\btox\s+(run|r)/,
+    /\buv run /,
+    /\bpoetry run /,
+    /\bpdm run /,
+    /\bhatch run /,
+    /\brye run /,
+    /\bnox\b/,
+    // JVM
+    /\bgradle/,
+    /\bmvn /,
+    /\bmaven/,
+    /\.\/gradlew/,
+    /\.\/mvnw/,
+    // Ruby
+    /\bbundle exec /,
+    /\brake\b/,
+    /\brspec\b/,
+    /\brubocop/,
+    // PHP
+    /\bcomposer (test|lint|run|validate)/,
+    /\bvendor\/bin\/(phpunit|phpcs|phpstan|psalm|pest|php-cs-fixer|rector)/,
+    // .NET
+    /\bdotnet (test|build|format)/,
+    // Swift
+    /\bswift (test|build)/,
+    /\bswiftlint/,
+    // Elixir
+    /\bmix (test|format|credo|dialyzer)/,
+    // Node linters
+    /\beslint/,
+    /\bbiome/,
+    /\bprettier/,
+    /\btsc/,
+    /\bxo\b/,
+    // Task runners
+    /\bmake /,
+    /\bjust /,
+    /\btask /,
+    // Containers / infra
+    /\bdocker (build|compose)/,
+    /\bterraform (fmt|validate|plan)/,
+    /\btflint/,
+    /\bhelm (lint|template)/,
+    /\bkubeconform/,
+    /\bkubeval/,
+    /\bhadolint/,
+    /\bactionlint/,
+    /\bmarkdownlint/,
+    /\byamllint/,
+    /\bbuf (lint|build)/,
+    /\bspectral lint/,
+    /\bshellcheck/,
+    /\bsemgrep/,
+    /\btrivy/,
+    /\bgitleaks/,
+  ];
+  return patterns.some((p) => p.test(cmd));
+}
+module.exports = { extractRunCommands, isGateCommand };

package/src/skills/post-start-validation.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: post-start-validation
-version: 0.2.1
+version: 0.2.2
 source_hash: 5a64dfe68b13577dff818fa63ddb6185be360c80b100f205bc586aac39e19e80
 description: Universal validation and knowledge capture. Detects what changed, runs governance gates, captures knowledge, verifies deployment. Works for any project.
 ---

package/src/skills/pre-start-context.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: pre-start-context
-version: 0.2.1
+version: 0.2.2
 source_hash: b7be8434b99d5b189c904263e783d573c82109218725cc31fbd4fa1bf81538b6
 description: Universal context loader. Discovers any project's stack, architecture, and state at runtime. Reads governance.md for project-specific rules. Works for any language, framework, or deployment target.
 ---

package/src/update/integrity.js CHANGED Viewed

@@ -47,8 +47,18 @@ function readFrontmatter(filePath) {
 }
 /**
- * Format a value for YAML frontmatter.
+ * Format a value for YAML frontmatter or YAML block scalars.
  * Quotes strings that contain special characters or could be ambiguous.
+ *
+ * Rules roughly follow YAML 1.2 plain-scalar constraints:
+ *   - Leading/trailing whitespace → must quote
+ *   - Special markers anywhere: : # & * ! | > ' " % @ `
+ *   - Leading flow indicators: [ ] { } , (would start a flow sequence/map)
+ *   - Leading dash + space looks like a block sequence entry
+ *   - Leading ? or ! looks like a YAML tag or complex-key marker
+ *   - Reserved words that coerce to other types: true/false/null/yes/no/~
+ *   - Number-like strings
+ *   - Empty string
  */
 function yamlScalar(value) {
   if (value == null) return '';
@@ -60,15 +70,18 @@ function yamlScalar(value) {
     return `|\n${indented}`;
   }
-  // Characters that require quoting in YAML plain scalar:
-  //   : leading/trailing or followed by space (key separator)
-  //   # comment marker
-  //   special markers: & * ! | > ' " % @ `
-  //   leading/trailing whitespace
-  //   strings that could be misread as other types: true, false, null, yes, no, numbers
+  // Control characters (tabs, etc.) must be quoted so they survive round-trip.
+  // eslint-disable-next-line no-control-regex
+  if (/[\x00-\x1f]/.test(str)) {
+    return `"${str.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\t/g, '\\t').replace(/\r/g, '\\r')}"`;
+  }
   const needsQuoting =
     /^[\s]|[\s]$/.test(str) ||
     /[:#&*!|>'"%@`]/.test(str) ||
+    /^[\[\]{},]/.test(str) ||
+    /^- /.test(str) ||
+    /^[?!]/.test(str) ||
     /^(true|false|null|yes|no|~)$/i.test(str) ||
     /^-?\d+(\.\d+)?$/.test(str) ||
     str === '';

package/src/update/skill-sync.js CHANGED Viewed

@@ -113,4 +113,4 @@ function syncSkills(targetDir, options = {}) {
   return result;
 }
-module.exports = { syncSkills };
+module.exports = { syncSkills, isTrustedSource };