npm - @prodcycle/prodcycle - Versions diffs - 0.5.0 → 0.6.0 - Mend

@prodcycle/prodcycle 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md CHANGED Viewed

@@ -31,18 +31,27 @@ npm install @prodcycle/prodcycle
 ### CLI
 ```bash
-# Scan current directory against SOC 2 and HIPAA
+# Scan current directory against all 3 frameworks (default: soc2, hipaa, nist-csf).
+# Auto-flips to SARIF in known CI environments so output drops into
+# code-scanning dashboards without extra wiring.
+prodcycle scan .
+# Pin a specific framework or subset
 prodcycle scan . --framework soc2,hipaa
+prodcycle scan . --framework hipaa --severity-threshold high
-# Output as SARIF for GitHub Code Scanning
-prodcycle scan . --framework soc2 --format sarif --output results.sarif
+# Explicit SARIF (overrides the CI auto-flip)
+prodcycle scan . --format sarif --output results.sarif
-# Set severity threshold (only report HIGH and above)
-prodcycle scan . --framework hipaa --severity-threshold high
+# CI: scan only files changed in the PR
+prodcycle scan . --pr origin/main..HEAD
 # Auto-configure compliance hooks/instructions for your coding agents
 # (Claude Code, Cursor, Codex, OpenCode, GitHub Copilot, Gemini CLI)
 prodcycle init --agent all
+# Scaffold a CI workflow that delegates to prodcycle/actions/compliance
+prodcycle init --ci github     # also: gitlab | circleci
 ```
 Subcommands: `scan` (full repo scan), `gate` (JSON payload from stdin), `hook` (coding-agent post-edit hook), `init` (agent setup).

package/dist/api-client.d.ts CHANGED Viewed

@@ -16,6 +16,30 @@ export interface GateOptions {
     apiUrl?: string;
     config?: Record<string, unknown>;
 }
+/**
+ * Set when `validateChunked`'s post-`/complete` enrichment GET failed and the
+ * structured `findings` could not be recovered. Distinguishable from the
+ * server-side `scannerError`: this signals "we know there are N findings (per
+ * the summary) but we couldn't fetch them — retry with `prodcycle scans
+ * <id>`," not "we couldn't certify the scan." Surfaced as a named field
+ * (rather than via the `[key: string]: unknown` index signature) so
+ * TypeScript callers get a typed contract instead of `unknown`.
+ *
+ * `code` distinguishes:
+ *   - `BACKFILL_GET_FAILED`: GET threw / non-2xx — backfill couldn't run
+ *   - `BACKFILL_GET_RETURNED_EMPTY`: GET succeeded but findings were still
+ *     empty despite `summary.total > 0`. Usually means eventual consistency
+ *     between the `/complete` writer and the scan-record reader; retrying
+ *     after a short delay typically populates the findings. Surfaced as a
+ *     separate code so SARIF/dashboard consumers can decide whether to
+ *     auto-retry vs. surface as a hard error.
+ */
+export interface BackfillError {
+    code: 'BACKFILL_GET_FAILED' | 'BACKFILL_GET_RETURNED_EMPTY';
+    message: string;
+    scanId: string;
+    summaryTotal: number;
+}
 export interface ScanResult {
     scanId?: string;
     passed: boolean;
@@ -24,6 +48,7 @@ export interface ScanResult {
     summary?: unknown;
     prompt?: string;
     status?: 'IN_PROGRESS' | 'COMPLETED' | 'FAILED';
+    backfillError?: BackfillError;
     [key: string]: unknown;
 }
 interface ApiErrorBody {
@@ -107,6 +132,22 @@ export declare class ComplianceApiClient {
      * to override the conservative defaults.
      */
     validateChunked(files: Record<string, string>, frameworks: string[], options?: ScanOptions): Promise<ScanResult>;
+    /**
+     * Some server versions of `POST /scans/:id/complete` return only the summary,
+     * leaving `findings` empty even when `summary.total > 0`. The findings are
+     * persisted on the scan record and recoverable via `GET /scans/:id`. Call
+     * this after `completeSession` (and any other path where the response shape
+     * may be summary-only) so SARIF/JSON consumers always see structured findings,
+     * not just a count. No-op when findings are already present or the scan is
+     * genuinely clean.
+     *
+     * Timeout: the follow-up GET goes through `this.request`, which wraps every
+     * fetch with `AbortSignal.timeout(REQUEST_TIMEOUT_MS)` (120 s default,
+     * tunable via `PC_REQUEST_TIMEOUT_MS`). A stalled server can't hang
+     * `validateChunked` indefinitely; if the abort fires, the catch below
+     * falls through with the original summary-only result.
+     */
+    private backfillFindingsIfMissing;
     /**
      * Async-validate: returns a `scanId` immediately; caller polls
      * `getScan(scanId)` until status is COMPLETED or FAILED. Useful for CI

package/dist/api-client.js CHANGED Viewed

@@ -176,7 +176,79 @@ class ComplianceApiClient {
             await this.appendChunk(session.scanId, chunk);
         }
         const result = await this.completeSession(session.scanId);
-        return { scanId: session.scanId, ...result };
+        const enriched = await this.backfillFindingsIfMissing(session.scanId, result);
+        return { scanId: session.scanId, ...enriched };
+    }
+    /**
+     * Some server versions of `POST /scans/:id/complete` return only the summary,
+     * leaving `findings` empty even when `summary.total > 0`. The findings are
+     * persisted on the scan record and recoverable via `GET /scans/:id`. Call
+     * this after `completeSession` (and any other path where the response shape
+     * may be summary-only) so SARIF/JSON consumers always see structured findings,
+     * not just a count. No-op when findings are already present or the scan is
+     * genuinely clean.
+     *
+     * Timeout: the follow-up GET goes through `this.request`, which wraps every
+     * fetch with `AbortSignal.timeout(REQUEST_TIMEOUT_MS)` (120 s default,
+     * tunable via `PC_REQUEST_TIMEOUT_MS`). A stalled server can't hang
+     * `validateChunked` indefinitely; if the abort fires, the catch below
+     * falls through with the original summary-only result.
+     */
+    async backfillFindingsIfMissing(scanId, result) {
+        const findingsLength = Array.isArray(result.findings) ? result.findings.length : 0;
+        const summaryTotal = result.summary?.total ?? 0;
+        if (findingsLength > 0 || summaryTotal === 0)
+            return result;
+        try {
+            const full = await this.getScan(scanId);
+            if (Array.isArray(full.findings) && full.findings.length > 0) {
+                return { ...result, findings: full.findings };
+            }
+            // GET succeeded but findings were empty despite `summary.total > 0`.
+            // Most likely cause: eventual consistency between `/complete`'s summary
+            // computation and the scan-record findings writer. Without surfacing a
+            // signal here, the caller would see exactly the silent-drop state the
+            // backfill was added to prevent. Mark as `BACKFILL_GET_RETURNED_EMPTY`
+            // (distinct from the throw case) so consumers can branch on retry vs.
+            // hard-fail behavior.
+            const message = `findings still empty after GET /scans/${scanId} (summary reports ${summaryTotal})`;
+            process.stderr.write(`⚠ Findings backfill ${message}. ` +
+                `Run \`prodcycle scans ${scanId}\` after a short delay to retry.\n`);
+            return {
+                ...result,
+                backfillError: {
+                    code: 'BACKFILL_GET_RETURNED_EMPTY',
+                    message,
+                    scanId,
+                    summaryTotal,
+                },
+            };
+        }
+        catch (err) {
+            // Best-effort enrichment: if the follow-up GET fails, fall through with
+            // the original result rather than break the scan call. The user still
+            // has the summary + scanId.
+            //
+            // BUT — without a user-facing signal, the resulting state (`findings: []`
+            // alongside `summary.total > 0`) looks exactly like the original bug we
+            // were fixing, and the user has no way to know they need to retry via
+            // `prodcycle scans <id>`. Surface the failure as both:
+            //   - a stderr warning (humans running the CLI interactively)
+            //   - a structured `backfillError` field (programmatic consumers / SARIF)
+            const message = err instanceof Error ? err.message : String(err);
+            process.stderr.write(`⚠ Findings backfill GET /scans/${scanId} failed (${message}). ` +
+                `${summaryTotal} finding(s) were detected but only the summary is available. ` +
+                `Run \`prodcycle scans ${scanId}\` to fetch the structured findings.\n`);
+            return {
+                ...result,
+                backfillError: {
+                    code: 'BACKFILL_GET_FAILED',
+                    message,
+                    scanId,
+                    summaryTotal,
+                },
+            };
+        }
     }
     // ─── Async validate ─────────────────────────────────────────────────────
     /**

package/dist/cli.d.ts CHANGED Viewed

@@ -1,2 +1,15 @@
 #!/usr/bin/env node
-export {};
+/**
+ * Detect CI environment via well-known env vars set by the major
+ * platforms. When CI is detected, default `--format` flips to `sarif`
+ * (so output drops straight into GitHub code scanning / GitLab security
+ * dashboards / etc. without extra configuration). Users can still
+ * override with `--format table|json|prompt`.
+ *
+ * The flip is opt-out (set `--format table` explicitly to keep the
+ * human-readable output in CI logs). Heuristic, not load-bearing — if
+ * we miss a CI platform here the user gets the same default they
+ * would have anyway (`table`), they just have to add `--format sarif`
+ * by hand.
+ */
+export declare function isCiEnvironment(env?: NodeJS.ProcessEnv): boolean;

package/dist/cli.js CHANGED Viewed

@@ -34,7 +34,9 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.isCiEnvironment = isCiEnvironment;
 const commander_1 = require("commander");
+const child_process_1 = require("child_process");
 const fs = __importStar(require("fs"));
 const path = __importStar(require("path"));
 const index_1 = require("./index");
@@ -111,13 +113,55 @@ program
     .name('prodcycle')
     .description('Multi-framework policy-as-code compliance scanner for infrastructure and application code.')
     .version(PKG_VERSION);
+/**
+ * Detect CI environment via well-known env vars set by the major
+ * platforms. When CI is detected, default `--format` flips to `sarif`
+ * (so output drops straight into GitHub code scanning / GitLab security
+ * dashboards / etc. without extra configuration). Users can still
+ * override with `--format table|json|prompt`.
+ *
+ * The flip is opt-out (set `--format table` explicitly to keep the
+ * human-readable output in CI logs). Heuristic, not load-bearing — if
+ * we miss a CI platform here the user gets the same default they
+ * would have anyway (`table`), they just have to add `--format sarif`
+ * by hand.
+ */
+function isCiEnvironment(env = process.env) {
+    // Generic `CI`: match any non-empty value. Most platforms set `CI=true`
+    // but some (Drone CI, Woodpecker CI, custom Jenkins pipelines) use
+    // `CI=1` or another truthy string. Specific platforms below cover the
+    // happy path; this is a defense-in-depth fallback so we don't miss
+    // edge-case environments.
+    return ((env['CI'] != null && env['CI'] !== '') ||
+        env['GITHUB_ACTIONS'] === 'true' ||
+        env['GITLAB_CI'] === 'true' ||
+        env['CIRCLECI'] === 'true' ||
+        env['JENKINS_URL'] != null ||
+        env['BUILDKITE'] === 'true' ||
+        env['TRAVIS'] === 'true' ||
+        env['BITBUCKET_BUILD_NUMBER'] != null);
+}
 // ── scan ────────────────────────────────────────────────────────────────────
 program
     .command('scan [repo_path]')
     .description('Scan a repository for compliance violations')
-    .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
-    .option('--format <format>', 'Output format: json, sarif, table, prompt', 'table')
-    .option('--severity-threshold <severity>', 'Minimum severity to include in report', 'low')
+    // Default frameworks: all three. The unique value of this scanner is
+    // cross-framework evaluation in one pass; defaulting to `soc2` only
+    // hid the HIPAA + NIST CSF capability from users who never thought
+    // to override the flag. If users need only one framework they can
+    // still pass `--framework soc2` explicitly.
+    .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
+    // Default format: `table` for interactive use, but auto-flipped to
+    // `sarif` when CI is detected (see isCiEnvironment above) so GitHub
+    // Code Scanning / GitLab dashboards pick the report up without any
+    // extra wiring. The CLI's --format flag overrides the auto-flip.
+    .option('--format <format>', 'Output format: json, sarif, table, prompt (auto-defaults to sarif in CI)', undefined)
+    // Default severity-threshold: `medium`. `low` includes too many
+    // tier-3 advisory findings that are typically noise unless the user
+    // explicitly opts in; `high` would hide medium-severity weak-crypto
+    // findings that ARE actionable. Medium is the right balance for
+    // first-time users.
+    .option('--severity-threshold <severity>', 'Minimum severity to include in report', 'medium')
     .option('--fail-on <levels>', 'Comma-separated severities that cause non-zero exit', 'critical,high')
     .option('--include <patterns>', 'Comma-separated glob patterns to include')
     .option('--exclude <patterns>', 'Comma-separated glob patterns to exclude')
@@ -126,12 +170,19 @@ program
     .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
     .option('--async', 'Use the async-validate flow (server returns 202 immediately; CLI polls until COMPLETED). Useful for large scans where holding a connection isn’t practical.')
     .option('--chunked', 'Force the chunked-session flow regardless of payload size. The default already auto-falls-back to chunked when /validate returns 413 with a chunked-endpoint suggestion.')
+    .option('--pr <range>', 'Scan only files changed in a git diff range (e.g. "origin/main..HEAD"). Cuts CI scan time on large repos by skipping unchanged files. Requires baseDir to be the git repo root.')
     .action(async (repoPath, opts) => {
     try {
         const target = repoPath ?? '.';
-        const frameworks = parseList(opts.framework) ?? ['soc2'];
+        const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
         const failOn = parseList(opts.failOn) ?? ['critical', 'high'];
-        const format = (opts.format ?? 'table');
+        // Format resolution:
+        //   1. explicit --format wins
+        //   2. otherwise: sarif when CI is detected, table when interactive
+        // SARIF in CI lets GitHub code scanning / GitLab security dashboards
+        // ingest results with zero extra configuration; table in interactive
+        // shells gives the human-readable summary first-time users expect.
+        const format = (opts.format ?? (isCiEnvironment() ? 'sarif' : 'table'));
         // --async and --chunked are mutually exclusive; pick the explicit
         // mode if either flag is set, otherwise let `scan()` pick (sync
         // with auto-fallback to chunked on 413).
@@ -144,6 +195,20 @@ program
             mode = 'async';
         else if (opts.chunked)
             mode = 'chunked';
+        // --pr: restrict the scan to files in `git diff --name-only <range>`.
+        // Empty diff → exit 0 immediately (nothing to scan).
+        let include = parseList(opts.include);
+        if (opts.pr) {
+            const changed = computeChangedFiles(target, opts.pr);
+            if (changed.length === 0) {
+                console.error(`No files changed in range "${opts.pr}". Nothing to scan.`);
+                process.exit(0);
+            }
+            console.error(`--pr ${opts.pr}: restricting scan to ${changed.length} changed file(s).`);
+            // Use the diff list as exact-match include patterns. minimatch treats
+            // ordinary paths (no glob chars) as literal matches against relPath.
+            include = changed;
+        }
         console.error(`Scanning ${path.resolve(target)} for ${frameworks.join(', ')}` +
             (mode === 'sync' ? '' : ` (${mode} mode)`) +
             '...');
@@ -153,7 +218,7 @@ program
             options: {
                 severityThreshold: opts.severityThreshold,
                 failOn: failOn,
-                include: parseList(opts.include),
+                include,
                 exclude: parseList(opts.exclude),
                 apiUrl: opts.apiUrl,
                 apiKey: opts.apiKey,
@@ -231,23 +296,31 @@ program
         const { ComplianceApiClient } = await Promise.resolve().then(() => __importStar(require('./api-client')));
         const client = new ComplianceApiClient(opts.apiUrl, opts.apiKey);
         const scan = await client.getScan(scanId);
+        // Same scannerError / exit-code-2 plumbing as scan() / gate(): a
+        // user retrieving a stored scan that failed for scanner reasons
+        // must see the same distinction (exit 2, scannerError surfaced).
+        const scannerError = scan.scannerError;
+        const exitCode = scannerError ? 2 : scan.passed ? 0 : 1;
         const payload = {
             scanId,
             passed: scan.passed,
             status: scan.status ?? 'COMPLETED',
             findings: scan.findings ?? [],
             summary: scan.summary,
-            exitCode: scan.passed ? 0 : 1,
+            exitCode,
+            ...(scannerError ? { scannerError } : {}),
         };
         // Use the same renderer as `scan` so format=table/sarif/prompt all work.
         writeOutput(renderReport(payload, format), opts.output);
+        if (scannerError)
+            (0, index_1.emitScannerErrorWarning)(scannerError);
         // Exit 2 if scan is still in progress — the CLI run shouldn't gate on
         // an indeterminate result.
         if (scan.status === 'IN_PROGRESS') {
             console.error(`Scan ${scanId} is still IN_PROGRESS. Re-run the same command to keep polling, or use 'pc scan --async' to wait for completion.`);
             process.exit(2);
         }
-        process.exit(payload.exitCode);
+        process.exit(exitCode);
     }
     catch (error) {
         console.error(`✗ Error: ${error.message}`);
@@ -348,18 +421,22 @@ async function collectHookFiles(filePath) {
 // ── init ────────────────────────────────────────────────────────────────────
 program
     .command('init')
-    .description('Configure compliance hooks for coding agents')
+    .description('Configure compliance hooks for coding agents and/or CI workflows')
     .option('--agent <agents>', 'Comma-separated agents to configure (claude, cursor, codex, opencode, github-copilot, gemini-cli). Use "all" to configure every agent. Default: auto-detect.')
+    .option('--ci <providers>', 'Comma-separated CI providers to scaffold (github, gitlab, circleci). Use "all" for every provider. Opt-in only \u2014 never auto-detected.')
     .option('--force', 'Overwrite existing compliance hook entries')
     .option('--dir <path>', 'Project directory to configure', '.')
     .action((opts) => {
     try {
         const dir = path.resolve(opts.dir ?? '.');
         const agents = resolveAgents(opts.agent, dir);
-        if (agents.length === 0) {
-            console.error('init: no agents selected and none auto-detected. ' +
-                'Use --agent <name> to configure explicitly (claude, cursor, codex, ' +
-                'opencode, github-copilot, gemini-cli, or "all").');
+        const ciProviders = resolveCiProviders(opts.ci);
+        if (agents.length === 0 && ciProviders.length === 0) {
+            console.error('init: nothing to do. ' +
+                'Use --agent <name> to configure a coding agent (claude, cursor, codex, ' +
+                'opencode, github-copilot, gemini-cli, or "all"), and/or --ci <provider> ' +
+                'to scaffold CI workflows (github, gitlab, circleci, or "all"). ' +
+                'Without --agent the CLI also auto-detects agents already in use.');
             process.exit(2);
         }
         let anyFailed = false;
@@ -370,6 +447,12 @@ program
             if (result.status === 'failed')
                 anyFailed = true;
         }
+        for (const provider of ciProviders) {
+            const result = configureCiProvider(provider, dir, !!opts.force);
+            process.stdout.write(result.message + '\n');
+            if (result.status === 'failed')
+                anyFailed = true;
+        }
         process.exit(anyFailed ? 1 : 0);
     }
     catch (error) {
@@ -605,6 +688,224 @@ function configureInstructionFile(agent, dir, relPath, force, writtenPaths) {
 function escapeRegExp(s) {
     return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
 }
+const ALL_CI_PROVIDERS = ['github', 'gitlab', 'circleci'];
+function isCiProvider(name) {
+    return ALL_CI_PROVIDERS.includes(name);
+}
+function resolveCiProviders(userChoice) {
+    if (!userChoice)
+        return [];
+    const list = parseList(userChoice) ?? [];
+    if (list.length === 1 && list[0] === 'all')
+        return ALL_CI_PROVIDERS.slice();
+    const valid = [];
+    for (const name of list) {
+        if (isCiProvider(name))
+            valid.push(name);
+        else
+            console.error(`init: unknown CI provider "${name}" — ignoring`);
+    }
+    return valid;
+}
+function configureCiProvider(provider, dir, force) {
+    switch (provider) {
+        case 'github':
+            return writeCiFile(provider, dir, path.join('.github', 'workflows', 'prodcycle.yml'), GITHUB_WORKFLOW, force);
+        case 'gitlab':
+            return writeCiFile(provider, dir, '.gitlab-ci.prodcycle.yml', GITLAB_WORKFLOW, force);
+        case 'circleci':
+            return writeCiFile(provider, dir, path.join('.circleci', 'prodcycle.yml'), CIRCLECI_WORKFLOW, force);
+    }
+}
+function writeCiFile(provider, dir, relPath, content, force) {
+    const fullPath = path.join(dir, relPath);
+    if (fs.existsSync(fullPath) && !force) {
+        return {
+            status: 'already',
+            message: `[ci:${provider}] ${relPath} already exists. Use --force to overwrite.`,
+        };
+    }
+    const parent = path.dirname(fullPath);
+    if (!fs.existsSync(parent))
+        fs.mkdirSync(parent, { recursive: true });
+    fs.writeFileSync(fullPath, content);
+    // GitHub uses the `prodcycle/actions/compliance` action, which reads
+    // its key from `secrets.PRODCYCLE_API_KEY`. GitLab and CircleCI invoke
+    // the CLI directly, which reads `PC_API_KEY` from the environment.
+    const followup = provider === 'gitlab'
+        ? `Include it from .gitlab-ci.yml: \`include: '${relPath}'\`. `
+        : provider === 'circleci'
+            ? `Reference it from .circleci/config.yml or merge the contents in. `
+            : '';
+    const secretName = provider === 'github' ? 'PRODCYCLE_API_KEY' : 'PC_API_KEY';
+    return {
+        status: 'installed',
+        message: `[ci:${provider}] wrote ${fullPath}. ` +
+            followup +
+            `Set ${secretName} as a secret/variable in your ${provider} project before the first run.`,
+    };
+}
+// GitHub: delegate to the dedicated `prodcycle/actions/compliance` GitHub
+// Action rather than calling the CLI directly. The action handles diff vs
+// full-repo scan automatically (PR events vs push events), posts inline
+// annotations on the PR diff, and writes a summary comment — none of
+// which the CLI's own SARIF output reproduces. See
+// https://github.com/prodcycle/actions for the full input reference.
+const GITHUB_WORKFLOW = `name: Prodcycle Compliance
+on:
+  pull_request:
+  push:
+    # Update this list to match your repo's default branch (e.g. master,
+    # develop). GitHub Actions does not support a dynamic
+    # \$default-branch / \${{ github.event.repository.default_branch }}
+    # value here, so the branch name has to be literal.
+    branches: [main]
+jobs:
+  scan:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: prodcycle/actions/compliance@v2
+        with:
+          api-key: \${{ secrets.PRODCYCLE_API_KEY }}
+`;
+const GITLAB_WORKFLOW = `# Prodcycle compliance scan. Include from your main .gitlab-ci.yml:
+#   include:
+#     - local: .gitlab-ci.prodcycle.yml
+#
+# Set PC_API_KEY as a CI/CD variable (Settings → CI/CD → Variables) before
+# the first run. Mark it Masked + Protected.
+prodcycle:
+  stage: test
+  image: node:22-alpine
+  variables:
+    GIT_DEPTH: "0"
+  before_script:
+    - apk add --no-cache git
+  script:
+    - |
+      if [ "$CI_PIPELINE_SOURCE" = "merge_request_event" ]; then
+        git fetch --no-tags origin "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
+        npx --yes prodcycle scan . \\
+          --pr "origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME..HEAD" \\
+          --format sarif --output prodcycle.sarif
+      else
+        npx --yes prodcycle scan . --format sarif --output prodcycle.sarif
+      fi
+  artifacts:
+    when: always
+    paths:
+      - prodcycle.sarif
+    reports:
+      sast: prodcycle.sarif
+  rules:
+    - if: $CI_PIPELINE_SOURCE == "merge_request_event"
+    - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
+`;
+const CIRCLECI_WORKFLOW = `# Prodcycle compliance scan. To use this, either replace .circleci/config.yml
+# or include it as a continuation/orb. Minimum example:
+#
+#   version: 2.1
+#   workflows:
+#     compliance:
+#       jobs:
+#         - prodcycle-scan
+#
+# Set PC_API_KEY as a project environment variable in CircleCI before the
+# first run.
+#
+# CircleCI does not expose the PR target branch as a built-in env var
+# (\`CIRCLE_BASE_BRANCH\` does not exist; see
+# https://circleci.com/docs/reference/variables/), so to scope PR scans
+# to changed files only, set a project-level env var \`PRODCYCLE_PR_BASE\`
+# to the branch your PRs target (e.g. \`main\`, \`develop\`). When unset,
+# this template runs a full-repo scan.
+version: 2.1
+jobs:
+  prodcycle-scan:
+    docker:
+      - image: cimg/node:22.0
+    steps:
+      - checkout
+      - run:
+          name: Run Prodcycle compliance scan
+          command: |
+            if [ -n "\${PRODCYCLE_PR_BASE:-}" ] && [ -n "\${CIRCLE_PULL_REQUEST:-}" ]; then
+              git fetch --no-tags origin "$PRODCYCLE_PR_BASE"
+              npx --yes prodcycle scan . \\
+                --pr "origin/$PRODCYCLE_PR_BASE..HEAD" \\
+                --format sarif --output prodcycle.sarif
+            else
+              npx --yes prodcycle scan . --format sarif --output prodcycle.sarif
+            fi
+      # \`when: always\` so the SARIF artifact uploads even when the scan
+      # exits non-zero — compliance scanners exit 1 when findings exist,
+      # which is precisely the case where you want the report preserved.
+      - store_artifacts:
+          path: prodcycle.sarif
+          destination: prodcycle-sarif
+          when: always
+workflows:
+  compliance:
+    jobs:
+      - prodcycle-scan
+`;
+/**
+ * Compute the list of files changed in a git diff range, relative to repo root.
+ * Filters to ACMR (Added/Copied/Modified/Renamed) so deleted files don't get
+ * scanned (they're not on disk anymore, and walk() would skip them anyway).
+ *
+ * Errors handled explicitly:
+ *   - `ENOENT` (git not in PATH) → actionable "git executable not found"
+ *   - `ETIMEDOUT` (git stalled — credential helper / auth prompt / etc.)
+ *     → fail fast with a 30s timeout so CI jobs don't hang
+ *   - non-zero exit → forward git's stderr so the user can see e.g. the
+ *     "fatal: bad revision" message and fix the range argument
+ *
+ * Output paths are normalised to the platform separator: git emits POSIX
+ * forward-slashes always, but the file walker on Windows produces
+ * back-slashed `relPath` values. Without this conversion the literal
+ * minimatch comparison silently excludes every changed file on Windows.
+ */
+const GIT_DIFF_TIMEOUT_MS = 30_000;
+function computeChangedFiles(repoPath, range) {
+    let stdout;
+    try {
+        stdout = (0, child_process_1.execFileSync)('git', ['-C', repoPath, 'diff', '--name-only', '--diff-filter=ACMR', range], {
+            encoding: 'utf8',
+            stdio: ['ignore', 'pipe', 'pipe'],
+            timeout: GIT_DIFF_TIMEOUT_MS,
+        });
+    }
+    catch (e) {
+        if (e?.code === 'ENOENT') {
+            console.error('--pr: git executable not found in PATH');
+            process.exit(2);
+        }
+        if (e?.code === 'ETIMEDOUT' || e?.signal === 'SIGTERM') {
+            console.error(`--pr: git diff timed out after ${GIT_DIFF_TIMEOUT_MS}ms (range "${range}"). ` +
+                'Check that the range does not require network access or credentials.');
+            process.exit(2);
+        }
+        const stderr = e?.stderr?.toString?.() ?? e?.message ?? 'unknown error';
+        console.error(`--pr: git diff failed for range "${range}": ${stderr.trim()}`);
+        process.exit(2);
+    }
+    return stdout
+        .split('\n')
+        .map((s) => s.trim().split('/').join(path.sep))
+        .filter(Boolean);
+}
 function readStdin() {
     return new Promise((resolve, reject) => {
         if (process.stdin.isTTY) {
@@ -617,4 +918,10 @@ function readStdin() {
         process.stdin.on('error', reject);
     });
 }
-program.parse(injectScanDefault(process.argv));
+// Only auto-parse when invoked as a script (i.e. via the `prodcycle`
+// bin entry). Importing this module from tests must NOT execute the
+// CLI — otherwise `node --test` triggers a real `program.parse` and
+// fails before the test cases can run.
+if (require.main === module) {
+    program.parse(injectScanDefault(process.argv));
+}

package/dist/index.d.ts CHANGED Viewed

@@ -1,8 +1,28 @@
-import { ScanOptions, GateOptions } from './api-client';
+import { ScanOptions, GateOptions, BackfillError } from './api-client';
 export * from './api-client';
 export * from './formatters/table';
 export * from './formatters/prompt';
 export * from './formatters/sarif';
+/**
+ * Set when the server-side scanner threw and the API was configured to
+ * fail closed (the default). When this is present, callers MUST treat
+ * `passed: false` as "scanner unavailable — cannot certify compliance"
+ * rather than "code is dirty." Mirrors the API's `ScannerErrorInfo`
+ * shape; see `packages/compliance-code-scanner/api/src/domain/services/
+ * compliance-scan.service.ts` (`ScannerErrorInfo`) for the field
+ * contract.
+ *
+ * Without this surfaced to the CLI's --output JSON, a benchmark or CI
+ * report shows `passed: false, findings: []` and the user can't tell
+ * whether the code passed (no findings, all clean) from whether the
+ * scanner failed (no findings because nothing got evaluated).
+ */
+export interface ScannerError {
+    code: 'SCANNER_GATE_THREW';
+    message: string;
+    errorClass?: string;
+    errorCode?: string;
+}
 interface ScanReturn {
     scanId?: string;
     passed: boolean;
@@ -10,7 +30,22 @@ interface ScanReturn {
     findings: unknown[];
     report: unknown;
     summary: unknown;
+    scannerError?: ScannerError;
+    /**
+     * Set when `validateChunked`'s findings-backfill GET failed. The summary
+     * still reflects the real finding count, but the structured findings are
+     * unavailable in this response. Callers should retry via `prodcycle scans
+     * <scanId>` to recover them. SARIF/JSON consumers branch on this to flag
+     * the result as incomplete rather than mistakenly clean.
+     */
+    backfillError?: BackfillError;
 }
+/**
+ * Format and write the scanner-error warning to stderr. Centralized so the
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
+ * CLI subcommand.
+ */
+export declare function emitScannerErrorWarning(scannerError: ScannerError): void;
 /**
  * Scan a repository by collecting files and sending them to the API.
  *
@@ -31,6 +66,7 @@ export declare function scan(params: {
  * endpoint, used by coding-agent post-edit hooks).
  */
 export declare function gate(options: GateOptions): Promise<{
+    scannerError?: ScannerError | undefined;
     passed: boolean;
     exitCode: number;
     findings: unknown[];

package/dist/index.js CHANGED Viewed

@@ -14,6 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.emitScannerErrorWarning = emitScannerErrorWarning;
 exports.scan = scan;
 exports.gate = gate;
 const api_client_1 = require("./api-client");
@@ -22,6 +23,17 @@ __exportStar(require("./api-client"), exports);
 __exportStar(require("./formatters/table"), exports);
 __exportStar(require("./formatters/prompt"), exports);
 __exportStar(require("./formatters/sarif"), exports);
+/**
+ * Format and write the scanner-error warning to stderr. Centralized so the
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
+ * CLI subcommand.
+ */
+function emitScannerErrorWarning(scannerError) {
+    process.stderr.write(`⚠ Scanner error: ${scannerError.message}` +
+        (scannerError.errorClass ? ` (errorClass=${scannerError.errorClass})` : '') +
+        (scannerError.errorCode ? ` (errorCode=${scannerError.errorCode})` : '') +
+        '\n');
+}
 /**
  * Scan a repository by collecting files and sending them to the API.
  *
@@ -56,13 +68,40 @@ async function scan(params) {
     else {
         response = await client.validate(files, frameworks, options);
     }
+    // Pull `scannerError` through if the API set it. Picking the field
+    // explicitly (rather than `...response`) so the CLI's public surface
+    // doesn't accidentally expose internal fields if the API adds them.
+    // `scannerError` lives in this module rather than `api-client.ts`, so the
+    // cast bridges the type boundary; `backfillError` is typed in
+    // `ScanResult` and needs no cast.
+    const scannerError = response.scannerError;
+    const backfillError = response.backfillError;
+    // Exit code semantics:
+    //   0 = passed (no actionable findings, no scanner error)
+    //   1 = findings present, code not clean
+    //   2 = scanner unavailable — could not certify either way; fail-closed
+    // Distinguish (1) from (2) so CI policy can decide whether a non-zero
+    // exit means "developer must fix code" or "operator must investigate
+    // scanner."
+    const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
+    // Surface scanner errors prominently to stderr so the user sees the
+    // distinction between a clean pass and an undetermined result. The
+    // JSON output already carries the structured field for programmatic
+    // consumers; this is for humans running the CLI interactively.
+    if (scannerError)
+        emitScannerErrorWarning(scannerError);
     return {
         scanId: response.scanId,
         passed: response.passed,
-        exitCode: response.passed ? 0 : 1,
+        exitCode,
         findings: response.findings ?? [],
         report: response.report ?? null,
         summary: response.summary,
+        ...(scannerError ? { scannerError } : {}),
+        // Forward `backfillError` so SARIF/JSON consumers downstream of `scan()`
+        // can detect "summary is real but findings unavailable" without parsing
+        // stderr. validateChunked sets it when its enrichment GET fails.
+        ...(backfillError ? { backfillError } : {}),
     };
 }
 /**
@@ -73,11 +112,19 @@ async function gate(options) {
     const { files, frameworks = ['soc2'], ...scanOpts } = options;
     const client = new api_client_1.ComplianceApiClient(options.apiUrl, options.apiKey);
     const response = await client.hook(files, frameworks, scanOpts);
+    // Same scannerError plumbing as scan() above. Coding-agent hooks
+    // especially need to distinguish "code is clean" from "scanner is
+    // down" — agents should NOT proceed on the latter.
+    const scannerError = response.scannerError;
+    const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
+    if (scannerError)
+        emitScannerErrorWarning(scannerError);
     return {
         passed: response.passed,
-        exitCode: response.passed ? 0 : 1,
+        exitCode,
         findings: response.findings ?? [],
         prompt: response.prompt,
         summary: response.summary,
+        ...(scannerError ? { scannerError } : {}),
     };
 }

package/dist/utils/fs.js CHANGED Viewed

@@ -38,7 +38,73 @@ const fs = __importStar(require("fs"));
 const path = __importStar(require("path"));
 const minimatch_1 = require("minimatch");
 const MAX_FILE_SIZE = 256 * 1024; // 256 KB
-const MAX_TOTAL_FILES = 10_000;
+/**
+ * Total file ceiling per scan. Hit on the OSS-CLI benchmark scanning
+ * `hapifhir/hapi-fhir` (~13k files) — the CLI silently dropped ~3k files
+ * past the cap. Default raised from the original 10k to 50k, and now
+ * overridable via `PRODCYCLE_MAX_FILES` for monorepos that need a
+ * different ceiling without patching/rebuilding. The API's chunked-
+ * session endpoint already supports up to 2,000 files per chunk, so a
+ * 50k-file repo is fed in 25+ chunks; the cap is here purely so a
+ * pathological symlink loop or `.git`-tracked-as-source repo doesn't
+ * exhaust the client's memory before the SCANNABLE_EXTENSIONS filter
+ * has a chance to drop most of the entries.
+ */
+const MAX_TOTAL_FILES = (() => {
+    const raw = process.env['PRODCYCLE_MAX_FILES'];
+    if (!raw)
+        return 50_000;
+    const parsed = parseInt(raw, 10);
+    return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
+})();
+/**
+ * Extensions and exact filenames the server-side `isScannable` filter
+ * accepts. Pre-filtering client-side avoids:
+ *   - bloating the wire payload with images / fonts / docs / archives
+ *     that the API just drops on receipt
+ *   - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
+ *     kernel where most files are not scannable
+ *
+ * Keep in lock-step with `api/src/domain/services/compliance-scan.service.ts`:
+ *   - APPLICATION_CODE_EXTENSIONS (the source-code allowlist)
+ *   - INFRASTRUCTURE_EXTENSIONS (.tf, .yaml, .yml, .json, .sql)
+ *   - INFRASTRUCTURE_FILENAMES (dockerfile, .env)
+ *
+ * Files outside this set are skipped during walk. Source-of-truth is
+ * the server filter; this is just an optimization so we don't pay the
+ * wire cost for files the server will reject anyway.
+ */
+const SCANNABLE_EXTENSIONS = new Set([
+    // Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
+    '.ts',
+    '.tsx',
+    '.js',
+    '.jsx',
+    '.py',
+    '.go',
+    '.java',
+    '.rb',
+    '.php',
+    '.rs',
+    '.cs',
+    '.kt',
+    '.scala',
+    '.c',
+    '.cpp',
+    '.h',
+    '.hpp',
+    // Infrastructure-as-code (must mirror INFRASTRUCTURE_EXTENSIONS in the API)
+    '.tf',
+    '.yaml',
+    '.yml',
+    '.json',
+    '.sql',
+]);
+const SCANNABLE_FILENAMES = new Set([
+    'dockerfile',
+    'containerfile',
+    '.env',
+]);
 /**
  * Directories skipped unconditionally. Kept in parity with
  * `packages/compliance-code-scanner/src/ignore-utils.ts`.
@@ -79,7 +145,15 @@ const SKIP_DIRS = new Set([
 ]);
 const SKIP_DIR_SUFFIXES = ['.egg-info'];
 const SKIP_FILE_EXTENSIONS = ['.lock', '.min.js', '.min.css', '.map', '.bundle.js', '.tfstate', '.tfstate.backup'];
-const SKIP_FILE_NAMES = new Set(['package-lock.json']);
+// Files the server-side `isScannable` filter drops on receipt. Listing
+// them client-side avoids paying the wire cost just to have the API
+// throw the bytes away. Keep in lock-step with the server's filter in
+// `compliance-code-scanner/src/collector.ts`.
+const SKIP_FILE_NAMES = new Set([
+    'package-lock.json',
+    'package.json',
+    'tsconfig.json',
+]);
 /**
  * Load .gitignore patterns from the repo root.
  *
@@ -132,8 +206,15 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
         if (matchesAny(relPath, userExcludes))
             return true;
     }
-    // .env* files are always scanned, even if listed in .gitignore (common case)
-    if (name.startsWith('.env') || name.endsWith('.env'))
+    // .env-family files are always scanned even if .gitignored — the
+    // common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
+    // etc., where the whole point of scanning is to catch hardcoded
+    // secrets. The previous `endsWith('.env')` half of this carve-out
+    // also matched arbitrary `foo.env` files (a build artifact, a config
+    // dump, etc.), which let unrelated files bypass gitignore. Restrict
+    // to names that start with `.env`. Keep in lock-step with
+    // `compliance-code-scanner/src/ignore-utils.ts`.
+    if (name.startsWith('.env'))
         return false;
     for (const pattern of ignores) {
         if (name === pattern ||
@@ -159,6 +240,30 @@ function shouldSkipFileByName(name) {
         return true;
     return SKIP_FILE_EXTENSIONS.some((ext) => name.endsWith(ext));
 }
+/**
+ * Mirror of the server's `isScannable` filter, applied client-side so we
+ * don't ship files the API will just drop. Also keeps repos like
+ * hapi-fhir (~13k files, mostly Java + some CSS/HTML/templates) from
+ * tripping MAX_TOTAL_FILES on non-scannable noise.
+ */
+function isScannableFilename(name) {
+    const lower = name.toLowerCase();
+    if (SCANNABLE_FILENAMES.has(lower))
+        return true;
+    // Dockerfile variants (dockerfile.prod, dockerfile.dev, …)
+    if (lower.startsWith('dockerfile.'))
+        return true;
+    // Any .env* file — kept in lock-step with the carve-out in `shouldIgnore`,
+    // which preserves the whole .env* family from gitignore. The server may
+    // drop unknown variants (e.g. .envrc) but it's better to forward them than
+    // silently diverge from the ignore policy.
+    if (lower.startsWith('.env'))
+        return true;
+    const dot = lower.lastIndexOf('.');
+    if (dot === -1)
+        return false;
+    return SCANNABLE_EXTENSIONS.has(lower.slice(dot));
+}
 async function collectFiles(baseDir, includePatterns, excludePatterns) {
     const repoRoot = path.resolve(baseDir);
     const ignores = loadGitignore(repoRoot);
@@ -195,6 +300,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
             continue;
         if (shouldSkipFileByName(name))
             continue;
+        // Skip files the server-side `isScannable` filter will drop anyway.
+        // No point paying the wire cost. When `--include` patterns are given
+        // we honor those instead — explicit user intent overrides the
+        // server-shape allowlist.
+        if ((!includePatterns || includePatterns.length === 0) &&
+            !isScannableFilename(name)) {
+            continue;
+        }
         if (includePatterns && includePatterns.length > 0 && !matchesAny(relPath, includePatterns)) {
             continue;
         }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@prodcycle/prodcycle",
-  "version": "0.5.0",
+  "version": "0.6.0",
   "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
   "homepage": "https://docs.prodcycle.com",
   "repository": {
@@ -20,6 +20,7 @@
   },
   "scripts": {
     "build": "tsc",
+    "test": "npm run build && node --test test/*.test.mjs",
     "prepublishOnly": "npm run build"
   },
   "keywords": [