@prodcycle/prodcycle 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -31,18 +31,27 @@ npm install @prodcycle/prodcycle
31
31
  ### CLI
32
32
 
33
33
  ```bash
34
- # Scan current directory against SOC 2 and HIPAA
34
+ # Scan current directory against all 3 frameworks (default: soc2, hipaa, nist-csf).
35
+ # Auto-flips to SARIF in known CI environments so output drops into
36
+ # code-scanning dashboards without extra wiring.
37
+ prodcycle scan .
38
+
39
+ # Pin a specific framework or subset
35
40
  prodcycle scan . --framework soc2,hipaa
41
+ prodcycle scan . --framework hipaa --severity-threshold high
36
42
 
37
- # Output as SARIF for GitHub Code Scanning
38
- prodcycle scan . --framework soc2 --format sarif --output results.sarif
43
+ # Explicit SARIF (overrides the CI auto-flip)
44
+ prodcycle scan . --format sarif --output results.sarif
39
45
 
40
- # Set severity threshold (only report HIGH and above)
41
- prodcycle scan . --framework hipaa --severity-threshold high
46
+ # CI: scan only files changed in the PR
47
+ prodcycle scan . --pr origin/main..HEAD
42
48
 
43
49
  # Auto-configure compliance hooks/instructions for your coding agents
44
50
  # (Claude Code, Cursor, Codex, OpenCode, GitHub Copilot, Gemini CLI)
45
51
  prodcycle init --agent all
52
+
53
+ # Scaffold a CI workflow that delegates to prodcycle/actions/compliance
54
+ prodcycle init --ci github # also: gitlab | circleci
46
55
  ```
47
56
 
48
57
  Subcommands: `scan` (full repo scan), `gate` (JSON payload from stdin), `hook` (coding-agent post-edit hook), `init` (agent setup).
@@ -16,6 +16,30 @@ export interface GateOptions {
16
16
  apiUrl?: string;
17
17
  config?: Record<string, unknown>;
18
18
  }
19
+ /**
20
+ * Set when `validateChunked`'s post-`/complete` enrichment GET failed and the
21
+ * structured `findings` could not be recovered. Distinguishable from the
22
+ * server-side `scannerError`: this signals "we know there are N findings (per
23
+ * the summary) but we couldn't fetch them — retry with `prodcycle scans
24
+ * <id>`," not "we couldn't certify the scan." Surfaced as a named field
25
+ * (rather than via the `[key: string]: unknown` index signature) so
26
+ * TypeScript callers get a typed contract instead of `unknown`.
27
+ *
28
+ * `code` distinguishes:
29
+ * - `BACKFILL_GET_FAILED`: GET threw / non-2xx — backfill couldn't run
30
+ * - `BACKFILL_GET_RETURNED_EMPTY`: GET succeeded but findings were still
31
+ * empty despite `summary.total > 0`. Usually means eventual consistency
32
+ * between the `/complete` writer and the scan-record reader; retrying
33
+ * after a short delay typically populates the findings. Surfaced as a
34
+ * separate code so SARIF/dashboard consumers can decide whether to
35
+ * auto-retry vs. surface as a hard error.
36
+ */
37
+ export interface BackfillError {
38
+ code: 'BACKFILL_GET_FAILED' | 'BACKFILL_GET_RETURNED_EMPTY';
39
+ message: string;
40
+ scanId: string;
41
+ summaryTotal: number;
42
+ }
19
43
  export interface ScanResult {
20
44
  scanId?: string;
21
45
  passed: boolean;
@@ -24,6 +48,7 @@ export interface ScanResult {
24
48
  summary?: unknown;
25
49
  prompt?: string;
26
50
  status?: 'IN_PROGRESS' | 'COMPLETED' | 'FAILED';
51
+ backfillError?: BackfillError;
27
52
  [key: string]: unknown;
28
53
  }
29
54
  interface ApiErrorBody {
@@ -107,6 +132,22 @@ export declare class ComplianceApiClient {
107
132
  * to override the conservative defaults.
108
133
  */
109
134
  validateChunked(files: Record<string, string>, frameworks: string[], options?: ScanOptions): Promise<ScanResult>;
135
+ /**
136
+ * Some server versions of `POST /scans/:id/complete` return only the summary,
137
+ * leaving `findings` empty even when `summary.total > 0`. The findings are
138
+ * persisted on the scan record and recoverable via `GET /scans/:id`. Call
139
+ * this after `completeSession` (and any other path where the response shape
140
+ * may be summary-only) so SARIF/JSON consumers always see structured findings,
141
+ * not just a count. No-op when findings are already present or the scan is
142
+ * genuinely clean.
143
+ *
144
+ * Timeout: the follow-up GET goes through `this.request`, which wraps every
145
+ * fetch with `AbortSignal.timeout(REQUEST_TIMEOUT_MS)` (120 s default,
146
+ * tunable via `PC_REQUEST_TIMEOUT_MS`). A stalled server can't hang
147
+ * `validateChunked` indefinitely; if the abort fires, the catch below
148
+ * falls through with the original summary-only result.
149
+ */
150
+ private backfillFindingsIfMissing;
110
151
  /**
111
152
  * Async-validate: returns a `scanId` immediately; caller polls
112
153
  * `getScan(scanId)` until status is COMPLETED or FAILED. Useful for CI
@@ -176,7 +176,79 @@ class ComplianceApiClient {
176
176
  await this.appendChunk(session.scanId, chunk);
177
177
  }
178
178
  const result = await this.completeSession(session.scanId);
179
- return { scanId: session.scanId, ...result };
179
+ const enriched = await this.backfillFindingsIfMissing(session.scanId, result);
180
+ return { scanId: session.scanId, ...enriched };
181
+ }
182
+ /**
183
+ * Some server versions of `POST /scans/:id/complete` return only the summary,
184
+ * leaving `findings` empty even when `summary.total > 0`. The findings are
185
+ * persisted on the scan record and recoverable via `GET /scans/:id`. Call
186
+ * this after `completeSession` (and any other path where the response shape
187
+ * may be summary-only) so SARIF/JSON consumers always see structured findings,
188
+ * not just a count. No-op when findings are already present or the scan is
189
+ * genuinely clean.
190
+ *
191
+ * Timeout: the follow-up GET goes through `this.request`, which wraps every
192
+ * fetch with `AbortSignal.timeout(REQUEST_TIMEOUT_MS)` (120 s default,
193
+ * tunable via `PC_REQUEST_TIMEOUT_MS`). A stalled server can't hang
194
+ * `validateChunked` indefinitely; if the abort fires, the catch below
195
+ * falls through with the original summary-only result.
196
+ */
197
+ async backfillFindingsIfMissing(scanId, result) {
198
+ const findingsLength = Array.isArray(result.findings) ? result.findings.length : 0;
199
+ const summaryTotal = result.summary?.total ?? 0;
200
+ if (findingsLength > 0 || summaryTotal === 0)
201
+ return result;
202
+ try {
203
+ const full = await this.getScan(scanId);
204
+ if (Array.isArray(full.findings) && full.findings.length > 0) {
205
+ return { ...result, findings: full.findings };
206
+ }
207
+ // GET succeeded but findings were empty despite `summary.total > 0`.
208
+ // Most likely cause: eventual consistency between `/complete`'s summary
209
+ // computation and the scan-record findings writer. Without surfacing a
210
+ // signal here, the caller would see exactly the silent-drop state the
211
+ // backfill was added to prevent. Mark as `BACKFILL_GET_RETURNED_EMPTY`
212
+ // (distinct from the throw case) so consumers can branch on retry vs.
213
+ // hard-fail behavior.
214
+ const message = `findings still empty after GET /scans/${scanId} (summary reports ${summaryTotal})`;
215
+ process.stderr.write(`⚠ Findings backfill ${message}. ` +
216
+ `Run \`prodcycle scans ${scanId}\` after a short delay to retry.\n`);
217
+ return {
218
+ ...result,
219
+ backfillError: {
220
+ code: 'BACKFILL_GET_RETURNED_EMPTY',
221
+ message,
222
+ scanId,
223
+ summaryTotal,
224
+ },
225
+ };
226
+ }
227
+ catch (err) {
228
+ // Best-effort enrichment: if the follow-up GET fails, fall through with
229
+ // the original result rather than break the scan call. The user still
230
+ // has the summary + scanId.
231
+ //
232
+ // BUT — without a user-facing signal, the resulting state (`findings: []`
233
+ // alongside `summary.total > 0`) looks exactly like the original bug we
234
+ // were fixing, and the user has no way to know they need to retry via
235
+ // `prodcycle scans <id>`. Surface the failure as both:
236
+ // - a stderr warning (humans running the CLI interactively)
237
+ // - a structured `backfillError` field (programmatic consumers / SARIF)
238
+ const message = err instanceof Error ? err.message : String(err);
239
+ process.stderr.write(`⚠ Findings backfill GET /scans/${scanId} failed (${message}). ` +
240
+ `${summaryTotal} finding(s) were detected but only the summary is available. ` +
241
+ `Run \`prodcycle scans ${scanId}\` to fetch the structured findings.\n`);
242
+ return {
243
+ ...result,
244
+ backfillError: {
245
+ code: 'BACKFILL_GET_FAILED',
246
+ message,
247
+ scanId,
248
+ summaryTotal,
249
+ },
250
+ };
251
+ }
180
252
  }
181
253
  // ─── Async validate ─────────────────────────────────────────────────────
182
254
  /**
package/dist/cli.d.ts CHANGED
@@ -1,2 +1,15 @@
1
1
  #!/usr/bin/env node
2
- export {};
2
+ /**
3
+ * Detect CI environment via well-known env vars set by the major
4
+ * platforms. When CI is detected, default `--format` flips to `sarif`
5
+ * (so output drops straight into GitHub code scanning / GitLab security
6
+ * dashboards / etc. without extra configuration). Users can still
7
+ * override with `--format table|json|prompt`.
8
+ *
9
+ * The flip is opt-out (set `--format table` explicitly to keep the
10
+ * human-readable output in CI logs). Heuristic, not load-bearing — if
11
+ * we miss a CI platform here the user gets the same default they
12
+ * would have anyway (`table`), they just have to add `--format sarif`
13
+ * by hand.
14
+ */
15
+ export declare function isCiEnvironment(env?: NodeJS.ProcessEnv): boolean;
package/dist/cli.js CHANGED
@@ -34,7 +34,9 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  };
35
35
  })();
36
36
  Object.defineProperty(exports, "__esModule", { value: true });
37
+ exports.isCiEnvironment = isCiEnvironment;
37
38
  const commander_1 = require("commander");
39
+ const child_process_1 = require("child_process");
38
40
  const fs = __importStar(require("fs"));
39
41
  const path = __importStar(require("path"));
40
42
  const index_1 = require("./index");
@@ -111,13 +113,55 @@ program
111
113
  .name('prodcycle')
112
114
  .description('Multi-framework policy-as-code compliance scanner for infrastructure and application code.')
113
115
  .version(PKG_VERSION);
116
+ /**
117
+ * Detect CI environment via well-known env vars set by the major
118
+ * platforms. When CI is detected, default `--format` flips to `sarif`
119
+ * (so output drops straight into GitHub code scanning / GitLab security
120
+ * dashboards / etc. without extra configuration). Users can still
121
+ * override with `--format table|json|prompt`.
122
+ *
123
+ * The flip is opt-out (set `--format table` explicitly to keep the
124
+ * human-readable output in CI logs). Heuristic, not load-bearing — if
125
+ * we miss a CI platform here the user gets the same default they
126
+ * would have anyway (`table`), they just have to add `--format sarif`
127
+ * by hand.
128
+ */
129
+ function isCiEnvironment(env = process.env) {
130
+ // Generic `CI`: match any non-empty value. Most platforms set `CI=true`
131
+ // but some (Drone CI, Woodpecker CI, custom Jenkins pipelines) use
132
+ // `CI=1` or another truthy string. Specific platforms below cover the
133
+ // happy path; this is a defense-in-depth fallback so we don't miss
134
+ // edge-case environments.
135
+ return ((env['CI'] != null && env['CI'] !== '') ||
136
+ env['GITHUB_ACTIONS'] === 'true' ||
137
+ env['GITLAB_CI'] === 'true' ||
138
+ env['CIRCLECI'] === 'true' ||
139
+ env['JENKINS_URL'] != null ||
140
+ env['BUILDKITE'] === 'true' ||
141
+ env['TRAVIS'] === 'true' ||
142
+ env['BITBUCKET_BUILD_NUMBER'] != null);
143
+ }
114
144
  // ── scan ────────────────────────────────────────────────────────────────────
115
145
  program
116
146
  .command('scan [repo_path]')
117
147
  .description('Scan a repository for compliance violations')
118
- .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
119
- .option('--format <format>', 'Output format: json, sarif, table, prompt', 'table')
120
- .option('--severity-threshold <severity>', 'Minimum severity to include in report', 'low')
148
+ // Default frameworks: all three. The unique value of this scanner is
149
+ // cross-framework evaluation in one pass; defaulting to `soc2` only
150
+ // hid the HIPAA + NIST CSF capability from users who never thought
151
+ // to override the flag. If users need only one framework they can
152
+ // still pass `--framework soc2` explicitly.
153
+ .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
154
+ // Default format: `table` for interactive use, but auto-flipped to
155
+ // `sarif` when CI is detected (see isCiEnvironment above) so GitHub
156
+ // Code Scanning / GitLab dashboards pick the report up without any
157
+ // extra wiring. The CLI's --format flag overrides the auto-flip.
158
+ .option('--format <format>', 'Output format: json, sarif, table, prompt (auto-defaults to sarif in CI)', undefined)
159
+ // Default severity-threshold: `medium`. `low` includes too many
160
+ // tier-3 advisory findings that are typically noise unless the user
161
+ // explicitly opts in; `high` would hide medium-severity weak-crypto
162
+ // findings that ARE actionable. Medium is the right balance for
163
+ // first-time users.
164
+ .option('--severity-threshold <severity>', 'Minimum severity to include in report', 'medium')
121
165
  .option('--fail-on <levels>', 'Comma-separated severities that cause non-zero exit', 'critical,high')
122
166
  .option('--include <patterns>', 'Comma-separated glob patterns to include')
123
167
  .option('--exclude <patterns>', 'Comma-separated glob patterns to exclude')
@@ -126,12 +170,19 @@ program
126
170
  .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
127
171
  .option('--async', 'Use the async-validate flow (server returns 202 immediately; CLI polls until COMPLETED). Useful for large scans where holding a connection isn’t practical.')
128
172
  .option('--chunked', 'Force the chunked-session flow regardless of payload size. The default already auto-falls-back to chunked when /validate returns 413 with a chunked-endpoint suggestion.')
173
+ .option('--pr <range>', 'Scan only files changed in a git diff range (e.g. "origin/main..HEAD"). Cuts CI scan time on large repos by skipping unchanged files. Requires baseDir to be the git repo root.')
129
174
  .action(async (repoPath, opts) => {
130
175
  try {
131
176
  const target = repoPath ?? '.';
132
- const frameworks = parseList(opts.framework) ?? ['soc2'];
177
+ const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
133
178
  const failOn = parseList(opts.failOn) ?? ['critical', 'high'];
134
- const format = (opts.format ?? 'table');
179
+ // Format resolution:
180
+ // 1. explicit --format wins
181
+ // 2. otherwise: sarif when CI is detected, table when interactive
182
+ // SARIF in CI lets GitHub code scanning / GitLab security dashboards
183
+ // ingest results with zero extra configuration; table in interactive
184
+ // shells gives the human-readable summary first-time users expect.
185
+ const format = (opts.format ?? (isCiEnvironment() ? 'sarif' : 'table'));
135
186
  // --async and --chunked are mutually exclusive; pick the explicit
136
187
  // mode if either flag is set, otherwise let `scan()` pick (sync
137
188
  // with auto-fallback to chunked on 413).
@@ -144,6 +195,20 @@ program
144
195
  mode = 'async';
145
196
  else if (opts.chunked)
146
197
  mode = 'chunked';
198
+ // --pr: restrict the scan to files in `git diff --name-only <range>`.
199
+ // Empty diff → exit 0 immediately (nothing to scan).
200
+ let include = parseList(opts.include);
201
+ if (opts.pr) {
202
+ const changed = computeChangedFiles(target, opts.pr);
203
+ if (changed.length === 0) {
204
+ console.error(`No files changed in range "${opts.pr}". Nothing to scan.`);
205
+ process.exit(0);
206
+ }
207
+ console.error(`--pr ${opts.pr}: restricting scan to ${changed.length} changed file(s).`);
208
+ // Use the diff list as exact-match include patterns. minimatch treats
209
+ // ordinary paths (no glob chars) as literal matches against relPath.
210
+ include = changed;
211
+ }
147
212
  console.error(`Scanning ${path.resolve(target)} for ${frameworks.join(', ')}` +
148
213
  (mode === 'sync' ? '' : ` (${mode} mode)`) +
149
214
  '...');
@@ -153,7 +218,7 @@ program
153
218
  options: {
154
219
  severityThreshold: opts.severityThreshold,
155
220
  failOn: failOn,
156
- include: parseList(opts.include),
221
+ include,
157
222
  exclude: parseList(opts.exclude),
158
223
  apiUrl: opts.apiUrl,
159
224
  apiKey: opts.apiKey,
@@ -231,23 +296,31 @@ program
231
296
  const { ComplianceApiClient } = await Promise.resolve().then(() => __importStar(require('./api-client')));
232
297
  const client = new ComplianceApiClient(opts.apiUrl, opts.apiKey);
233
298
  const scan = await client.getScan(scanId);
299
+ // Same scannerError / exit-code-2 plumbing as scan() / gate(): a
300
+ // user retrieving a stored scan that failed for scanner reasons
301
+ // must see the same distinction (exit 2, scannerError surfaced).
302
+ const scannerError = scan.scannerError;
303
+ const exitCode = scannerError ? 2 : scan.passed ? 0 : 1;
234
304
  const payload = {
235
305
  scanId,
236
306
  passed: scan.passed,
237
307
  status: scan.status ?? 'COMPLETED',
238
308
  findings: scan.findings ?? [],
239
309
  summary: scan.summary,
240
- exitCode: scan.passed ? 0 : 1,
310
+ exitCode,
311
+ ...(scannerError ? { scannerError } : {}),
241
312
  };
242
313
  // Use the same renderer as `scan` so format=table/sarif/prompt all work.
243
314
  writeOutput(renderReport(payload, format), opts.output);
315
+ if (scannerError)
316
+ (0, index_1.emitScannerErrorWarning)(scannerError);
244
317
  // Exit 2 if scan is still in progress — the CLI run shouldn't gate on
245
318
  // an indeterminate result.
246
319
  if (scan.status === 'IN_PROGRESS') {
247
320
  console.error(`Scan ${scanId} is still IN_PROGRESS. Re-run the same command to keep polling, or use 'pc scan --async' to wait for completion.`);
248
321
  process.exit(2);
249
322
  }
250
- process.exit(payload.exitCode);
323
+ process.exit(exitCode);
251
324
  }
252
325
  catch (error) {
253
326
  console.error(`✗ Error: ${error.message}`);
@@ -348,18 +421,22 @@ async function collectHookFiles(filePath) {
348
421
  // ── init ────────────────────────────────────────────────────────────────────
349
422
  program
350
423
  .command('init')
351
- .description('Configure compliance hooks for coding agents')
424
+ .description('Configure compliance hooks for coding agents and/or CI workflows')
352
425
  .option('--agent <agents>', 'Comma-separated agents to configure (claude, cursor, codex, opencode, github-copilot, gemini-cli). Use "all" to configure every agent. Default: auto-detect.')
426
+ .option('--ci <providers>', 'Comma-separated CI providers to scaffold (github, gitlab, circleci). Use "all" for every provider. Opt-in only \u2014 never auto-detected.')
353
427
  .option('--force', 'Overwrite existing compliance hook entries')
354
428
  .option('--dir <path>', 'Project directory to configure', '.')
355
429
  .action((opts) => {
356
430
  try {
357
431
  const dir = path.resolve(opts.dir ?? '.');
358
432
  const agents = resolveAgents(opts.agent, dir);
359
- if (agents.length === 0) {
360
- console.error('init: no agents selected and none auto-detected. ' +
361
- 'Use --agent <name> to configure explicitly (claude, cursor, codex, ' +
362
- 'opencode, github-copilot, gemini-cli, or "all").');
433
+ const ciProviders = resolveCiProviders(opts.ci);
434
+ if (agents.length === 0 && ciProviders.length === 0) {
435
+ console.error('init: nothing to do. ' +
436
+ 'Use --agent <name> to configure a coding agent (claude, cursor, codex, ' +
437
+ 'opencode, github-copilot, gemini-cli, or "all"), and/or --ci <provider> ' +
438
+ 'to scaffold CI workflows (github, gitlab, circleci, or "all"). ' +
439
+ 'Without --agent the CLI also auto-detects agents already in use.');
363
440
  process.exit(2);
364
441
  }
365
442
  let anyFailed = false;
@@ -370,6 +447,12 @@ program
370
447
  if (result.status === 'failed')
371
448
  anyFailed = true;
372
449
  }
450
+ for (const provider of ciProviders) {
451
+ const result = configureCiProvider(provider, dir, !!opts.force);
452
+ process.stdout.write(result.message + '\n');
453
+ if (result.status === 'failed')
454
+ anyFailed = true;
455
+ }
373
456
  process.exit(anyFailed ? 1 : 0);
374
457
  }
375
458
  catch (error) {
@@ -605,6 +688,224 @@ function configureInstructionFile(agent, dir, relPath, force, writtenPaths) {
605
688
  function escapeRegExp(s) {
606
689
  return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
607
690
  }
691
+ const ALL_CI_PROVIDERS = ['github', 'gitlab', 'circleci'];
692
+ function isCiProvider(name) {
693
+ return ALL_CI_PROVIDERS.includes(name);
694
+ }
695
+ function resolveCiProviders(userChoice) {
696
+ if (!userChoice)
697
+ return [];
698
+ const list = parseList(userChoice) ?? [];
699
+ if (list.length === 1 && list[0] === 'all')
700
+ return ALL_CI_PROVIDERS.slice();
701
+ const valid = [];
702
+ for (const name of list) {
703
+ if (isCiProvider(name))
704
+ valid.push(name);
705
+ else
706
+ console.error(`init: unknown CI provider "${name}" — ignoring`);
707
+ }
708
+ return valid;
709
+ }
710
+ function configureCiProvider(provider, dir, force) {
711
+ switch (provider) {
712
+ case 'github':
713
+ return writeCiFile(provider, dir, path.join('.github', 'workflows', 'prodcycle.yml'), GITHUB_WORKFLOW, force);
714
+ case 'gitlab':
715
+ return writeCiFile(provider, dir, '.gitlab-ci.prodcycle.yml', GITLAB_WORKFLOW, force);
716
+ case 'circleci':
717
+ return writeCiFile(provider, dir, path.join('.circleci', 'prodcycle.yml'), CIRCLECI_WORKFLOW, force);
718
+ }
719
+ }
720
+ function writeCiFile(provider, dir, relPath, content, force) {
721
+ const fullPath = path.join(dir, relPath);
722
+ if (fs.existsSync(fullPath) && !force) {
723
+ return {
724
+ status: 'already',
725
+ message: `[ci:${provider}] ${relPath} already exists. Use --force to overwrite.`,
726
+ };
727
+ }
728
+ const parent = path.dirname(fullPath);
729
+ if (!fs.existsSync(parent))
730
+ fs.mkdirSync(parent, { recursive: true });
731
+ fs.writeFileSync(fullPath, content);
732
+ // GitHub uses the `prodcycle/actions/compliance` action, which reads
733
+ // its key from `secrets.PRODCYCLE_API_KEY`. GitLab and CircleCI invoke
734
+ // the CLI directly, which reads `PC_API_KEY` from the environment.
735
+ const followup = provider === 'gitlab'
736
+ ? `Include it from .gitlab-ci.yml: \`include: '${relPath}'\`. `
737
+ : provider === 'circleci'
738
+ ? `Reference it from .circleci/config.yml or merge the contents in. `
739
+ : '';
740
+ const secretName = provider === 'github' ? 'PRODCYCLE_API_KEY' : 'PC_API_KEY';
741
+ return {
742
+ status: 'installed',
743
+ message: `[ci:${provider}] wrote ${fullPath}. ` +
744
+ followup +
745
+ `Set ${secretName} as a secret/variable in your ${provider} project before the first run.`,
746
+ };
747
+ }
748
+ // GitHub: delegate to the dedicated `prodcycle/actions/compliance` GitHub
749
+ // Action rather than calling the CLI directly. The action handles diff vs
750
+ // full-repo scan automatically (PR events vs push events), posts inline
751
+ // annotations on the PR diff, and writes a summary comment — none of
752
+ // which the CLI's own SARIF output reproduces. See
753
+ // https://github.com/prodcycle/actions for the full input reference.
754
+ const GITHUB_WORKFLOW = `name: Prodcycle Compliance
755
+
756
+ on:
757
+ pull_request:
758
+ push:
759
+ # Update this list to match your repo's default branch (e.g. master,
760
+ # develop). GitHub Actions does not support a dynamic
761
+ # \$default-branch / \${{ github.event.repository.default_branch }}
762
+ # value here, so the branch name has to be literal.
763
+ branches: [main]
764
+
765
+ jobs:
766
+ scan:
767
+ runs-on: ubuntu-latest
768
+ permissions:
769
+ contents: read
770
+ pull-requests: write
771
+ steps:
772
+ - uses: actions/checkout@v4
773
+ with:
774
+ fetch-depth: 0
775
+ - uses: prodcycle/actions/compliance@v2
776
+ with:
777
+ api-key: \${{ secrets.PRODCYCLE_API_KEY }}
778
+ `;
779
+ const GITLAB_WORKFLOW = `# Prodcycle compliance scan. Include from your main .gitlab-ci.yml:
780
+ # include:
781
+ # - local: .gitlab-ci.prodcycle.yml
782
+ #
783
+ # Set PC_API_KEY as a CI/CD variable (Settings → CI/CD → Variables) before
784
+ # the first run. Mark it Masked + Protected.
785
+
786
+ prodcycle:
787
+ stage: test
788
+ image: node:22-alpine
789
+ variables:
790
+ GIT_DEPTH: "0"
791
+ before_script:
792
+ - apk add --no-cache git
793
+ script:
794
+ - |
795
+ if [ "$CI_PIPELINE_SOURCE" = "merge_request_event" ]; then
796
+ git fetch --no-tags origin "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
797
+ npx --yes prodcycle scan . \\
798
+ --pr "origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME..HEAD" \\
799
+ --format sarif --output prodcycle.sarif
800
+ else
801
+ npx --yes prodcycle scan . --format sarif --output prodcycle.sarif
802
+ fi
803
+ artifacts:
804
+ when: always
805
+ paths:
806
+ - prodcycle.sarif
807
+ reports:
808
+ sast: prodcycle.sarif
809
+ rules:
810
+ - if: $CI_PIPELINE_SOURCE == "merge_request_event"
811
+ - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
812
+ `;
813
+ const CIRCLECI_WORKFLOW = `# Prodcycle compliance scan. To use this, either replace .circleci/config.yml
814
+ # or include it as a continuation/orb. Minimum example:
815
+ #
816
+ # version: 2.1
817
+ # workflows:
818
+ # compliance:
819
+ # jobs:
820
+ # - prodcycle-scan
821
+ #
822
+ # Set PC_API_KEY as a project environment variable in CircleCI before the
823
+ # first run.
824
+ #
825
+ # CircleCI does not expose the PR target branch as a built-in env var
826
+ # (\`CIRCLE_BASE_BRANCH\` does not exist; see
827
+ # https://circleci.com/docs/reference/variables/), so to scope PR scans
828
+ # to changed files only, set a project-level env var \`PRODCYCLE_PR_BASE\`
829
+ # to the branch your PRs target (e.g. \`main\`, \`develop\`). When unset,
830
+ # this template runs a full-repo scan.
831
+
832
+ version: 2.1
833
+ jobs:
834
+ prodcycle-scan:
835
+ docker:
836
+ - image: cimg/node:22.0
837
+ steps:
838
+ - checkout
839
+ - run:
840
+ name: Run Prodcycle compliance scan
841
+ command: |
842
+ if [ -n "\${PRODCYCLE_PR_BASE:-}" ] && [ -n "\${CIRCLE_PULL_REQUEST:-}" ]; then
843
+ git fetch --no-tags origin "$PRODCYCLE_PR_BASE"
844
+ npx --yes prodcycle scan . \\
845
+ --pr "origin/$PRODCYCLE_PR_BASE..HEAD" \\
846
+ --format sarif --output prodcycle.sarif
847
+ else
848
+ npx --yes prodcycle scan . --format sarif --output prodcycle.sarif
849
+ fi
850
+ # \`when: always\` so the SARIF artifact uploads even when the scan
851
+ # exits non-zero — compliance scanners exit 1 when findings exist,
852
+ # which is precisely the case where you want the report preserved.
853
+ - store_artifacts:
854
+ path: prodcycle.sarif
855
+ destination: prodcycle-sarif
856
+ when: always
857
+
858
+ workflows:
859
+ compliance:
860
+ jobs:
861
+ - prodcycle-scan
862
+ `;
863
+ /**
864
+ * Compute the list of files changed in a git diff range, relative to repo root.
865
+ * Filters to ACMR (Added/Copied/Modified/Renamed) so deleted files don't get
866
+ * scanned (they're not on disk anymore, and walk() would skip them anyway).
867
+ *
868
+ * Errors handled explicitly:
869
+ * - `ENOENT` (git not in PATH) → actionable "git executable not found"
870
+ * - `ETIMEDOUT` (git stalled — credential helper / auth prompt / etc.)
871
+ * → fail fast with a 30s timeout so CI jobs don't hang
872
+ * - non-zero exit → forward git's stderr so the user can see e.g. the
873
+ * "fatal: bad revision" message and fix the range argument
874
+ *
875
+ * Output paths are normalised to the platform separator: git emits POSIX
876
+ * forward-slashes always, but the file walker on Windows produces
877
+ * back-slashed `relPath` values. Without this conversion the literal
878
+ * minimatch comparison silently excludes every changed file on Windows.
879
+ */
880
+ const GIT_DIFF_TIMEOUT_MS = 30_000;
881
+ function computeChangedFiles(repoPath, range) {
882
+ let stdout;
883
+ try {
884
+ stdout = (0, child_process_1.execFileSync)('git', ['-C', repoPath, 'diff', '--name-only', '--diff-filter=ACMR', range], {
885
+ encoding: 'utf8',
886
+ stdio: ['ignore', 'pipe', 'pipe'],
887
+ timeout: GIT_DIFF_TIMEOUT_MS,
888
+ });
889
+ }
890
+ catch (e) {
891
+ if (e?.code === 'ENOENT') {
892
+ console.error('--pr: git executable not found in PATH');
893
+ process.exit(2);
894
+ }
895
+ if (e?.code === 'ETIMEDOUT' || e?.signal === 'SIGTERM') {
896
+ console.error(`--pr: git diff timed out after ${GIT_DIFF_TIMEOUT_MS}ms (range "${range}"). ` +
897
+ 'Check that the range does not require network access or credentials.');
898
+ process.exit(2);
899
+ }
900
+ const stderr = e?.stderr?.toString?.() ?? e?.message ?? 'unknown error';
901
+ console.error(`--pr: git diff failed for range "${range}": ${stderr.trim()}`);
902
+ process.exit(2);
903
+ }
904
+ return stdout
905
+ .split('\n')
906
+ .map((s) => s.trim().split('/').join(path.sep))
907
+ .filter(Boolean);
908
+ }
608
909
  function readStdin() {
609
910
  return new Promise((resolve, reject) => {
610
911
  if (process.stdin.isTTY) {
@@ -617,4 +918,10 @@ function readStdin() {
617
918
  process.stdin.on('error', reject);
618
919
  });
619
920
  }
620
- program.parse(injectScanDefault(process.argv));
921
+ // Only auto-parse when invoked as a script (i.e. via the `prodcycle`
922
+ // bin entry). Importing this module from tests must NOT execute the
923
+ // CLI — otherwise `node --test` triggers a real `program.parse` and
924
+ // fails before the test cases can run.
925
+ if (require.main === module) {
926
+ program.parse(injectScanDefault(process.argv));
927
+ }
package/dist/index.d.ts CHANGED
@@ -1,8 +1,28 @@
1
- import { ScanOptions, GateOptions } from './api-client';
1
+ import { ScanOptions, GateOptions, BackfillError } from './api-client';
2
2
  export * from './api-client';
3
3
  export * from './formatters/table';
4
4
  export * from './formatters/prompt';
5
5
  export * from './formatters/sarif';
6
+ /**
7
+ * Set when the server-side scanner threw and the API was configured to
8
+ * fail closed (the default). When this is present, callers MUST treat
9
+ * `passed: false` as "scanner unavailable — cannot certify compliance"
10
+ * rather than "code is dirty." Mirrors the API's `ScannerErrorInfo`
11
+ * shape; see `packages/compliance-code-scanner/api/src/domain/services/
12
+ * compliance-scan.service.ts` (`ScannerErrorInfo`) for the field
13
+ * contract.
14
+ *
15
+ * Without this surfaced to the CLI's --output JSON, a benchmark or CI
16
+ * report shows `passed: false, findings: []` and the user can't tell
17
+ * whether the code passed (no findings, all clean) from whether the
18
+ * scanner failed (no findings because nothing got evaluated).
19
+ */
20
+ export interface ScannerError {
21
+ code: 'SCANNER_GATE_THREW';
22
+ message: string;
23
+ errorClass?: string;
24
+ errorCode?: string;
25
+ }
6
26
  interface ScanReturn {
7
27
  scanId?: string;
8
28
  passed: boolean;
@@ -10,7 +30,22 @@ interface ScanReturn {
10
30
  findings: unknown[];
11
31
  report: unknown;
12
32
  summary: unknown;
33
+ scannerError?: ScannerError;
34
+ /**
35
+ * Set when `validateChunked`'s findings-backfill GET failed. The summary
36
+ * still reflects the real finding count, but the structured findings are
37
+ * unavailable in this response. Callers should retry via `prodcycle scans
38
+ * <scanId>` to recover them. SARIF/JSON consumers branch on this to flag
39
+ * the result as incomplete rather than mistakenly clean.
40
+ */
41
+ backfillError?: BackfillError;
13
42
  }
43
+ /**
44
+ * Format and write the scanner-error warning to stderr. Centralized so the
45
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
46
+ * CLI subcommand.
47
+ */
48
+ export declare function emitScannerErrorWarning(scannerError: ScannerError): void;
14
49
  /**
15
50
  * Scan a repository by collecting files and sending them to the API.
16
51
  *
@@ -31,6 +66,7 @@ export declare function scan(params: {
31
66
  * endpoint, used by coding-agent post-edit hooks).
32
67
  */
33
68
  export declare function gate(options: GateOptions): Promise<{
69
+ scannerError?: ScannerError | undefined;
34
70
  passed: boolean;
35
71
  exitCode: number;
36
72
  findings: unknown[];
package/dist/index.js CHANGED
@@ -14,6 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.emitScannerErrorWarning = emitScannerErrorWarning;
17
18
  exports.scan = scan;
18
19
  exports.gate = gate;
19
20
  const api_client_1 = require("./api-client");
@@ -22,6 +23,17 @@ __exportStar(require("./api-client"), exports);
22
23
  __exportStar(require("./formatters/table"), exports);
23
24
  __exportStar(require("./formatters/prompt"), exports);
24
25
  __exportStar(require("./formatters/sarif"), exports);
26
+ /**
27
+ * Format and write the scanner-error warning to stderr. Centralized so the
28
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
29
+ * CLI subcommand.
30
+ */
31
+ function emitScannerErrorWarning(scannerError) {
32
+ process.stderr.write(`⚠ Scanner error: ${scannerError.message}` +
33
+ (scannerError.errorClass ? ` (errorClass=${scannerError.errorClass})` : '') +
34
+ (scannerError.errorCode ? ` (errorCode=${scannerError.errorCode})` : '') +
35
+ '\n');
36
+ }
25
37
  /**
26
38
  * Scan a repository by collecting files and sending them to the API.
27
39
  *
@@ -56,13 +68,40 @@ async function scan(params) {
56
68
  else {
57
69
  response = await client.validate(files, frameworks, options);
58
70
  }
71
+ // Pull `scannerError` through if the API set it. Picking the field
72
+ // explicitly (rather than `...response`) so the CLI's public surface
73
+ // doesn't accidentally expose internal fields if the API adds them.
74
+ // `scannerError` lives in this module rather than `api-client.ts`, so the
75
+ // cast bridges the type boundary; `backfillError` is typed in
76
+ // `ScanResult` and needs no cast.
77
+ const scannerError = response.scannerError;
78
+ const backfillError = response.backfillError;
79
+ // Exit code semantics:
80
+ // 0 = passed (no actionable findings, no scanner error)
81
+ // 1 = findings present, code not clean
82
+ // 2 = scanner unavailable — could not certify either way; fail-closed
83
+ // Distinguish (1) from (2) so CI policy can decide whether a non-zero
84
+ // exit means "developer must fix code" or "operator must investigate
85
+ // scanner."
86
+ const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
87
+ // Surface scanner errors prominently to stderr so the user sees the
88
+ // distinction between a clean pass and an undetermined result. The
89
+ // JSON output already carries the structured field for programmatic
90
+ // consumers; this is for humans running the CLI interactively.
91
+ if (scannerError)
92
+ emitScannerErrorWarning(scannerError);
59
93
  return {
60
94
  scanId: response.scanId,
61
95
  passed: response.passed,
62
- exitCode: response.passed ? 0 : 1,
96
+ exitCode,
63
97
  findings: response.findings ?? [],
64
98
  report: response.report ?? null,
65
99
  summary: response.summary,
100
+ ...(scannerError ? { scannerError } : {}),
101
+ // Forward `backfillError` so SARIF/JSON consumers downstream of `scan()`
102
+ // can detect "summary is real but findings unavailable" without parsing
103
+ // stderr. validateChunked sets it when its enrichment GET fails.
104
+ ...(backfillError ? { backfillError } : {}),
66
105
  };
67
106
  }
68
107
  /**
@@ -73,11 +112,19 @@ async function gate(options) {
73
112
  const { files, frameworks = ['soc2'], ...scanOpts } = options;
74
113
  const client = new api_client_1.ComplianceApiClient(options.apiUrl, options.apiKey);
75
114
  const response = await client.hook(files, frameworks, scanOpts);
115
+ // Same scannerError plumbing as scan() above. Coding-agent hooks
116
+ // especially need to distinguish "code is clean" from "scanner is
117
+ // down" — agents should NOT proceed on the latter.
118
+ const scannerError = response.scannerError;
119
+ const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
120
+ if (scannerError)
121
+ emitScannerErrorWarning(scannerError);
76
122
  return {
77
123
  passed: response.passed,
78
- exitCode: response.passed ? 0 : 1,
124
+ exitCode,
79
125
  findings: response.findings ?? [],
80
126
  prompt: response.prompt,
81
127
  summary: response.summary,
128
+ ...(scannerError ? { scannerError } : {}),
82
129
  };
83
130
  }
package/dist/utils/fs.js CHANGED
@@ -38,7 +38,73 @@ const fs = __importStar(require("fs"));
38
38
  const path = __importStar(require("path"));
39
39
  const minimatch_1 = require("minimatch");
40
40
  const MAX_FILE_SIZE = 256 * 1024; // 256 KB
41
- const MAX_TOTAL_FILES = 10_000;
41
+ /**
42
+ * Total file ceiling per scan. Hit on the OSS-CLI benchmark scanning
43
+ * `hapifhir/hapi-fhir` (~13k files) — the CLI silently dropped ~3k files
44
+ * past the cap. Default raised from the original 10k to 50k, and now
45
+ * overridable via `PRODCYCLE_MAX_FILES` for monorepos that need a
46
+ * different ceiling without patching/rebuilding. The API's chunked-
47
+ * session endpoint already supports up to 2,000 files per chunk, so a
48
+ * 50k-file repo is fed in 25+ chunks; the cap is here purely so a
49
+ * pathological symlink loop or `.git`-tracked-as-source repo doesn't
50
+ * exhaust the client's memory before the SCANNABLE_EXTENSIONS filter
51
+ * has a chance to drop most of the entries.
52
+ */
53
+ const MAX_TOTAL_FILES = (() => {
54
+ const raw = process.env['PRODCYCLE_MAX_FILES'];
55
+ if (!raw)
56
+ return 50_000;
57
+ const parsed = parseInt(raw, 10);
58
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
59
+ })();
60
+ /**
61
+ * Extensions and exact filenames the server-side `isScannable` filter
62
+ * accepts. Pre-filtering client-side avoids:
63
+ * - bloating the wire payload with images / fonts / docs / archives
64
+ * that the API just drops on receipt
65
+ * - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
66
+ * kernel where most files are not scannable
67
+ *
68
+ * Keep in lock-step with `api/src/domain/services/compliance-scan.service.ts`:
69
+ * - APPLICATION_CODE_EXTENSIONS (the source-code allowlist)
70
+ * - INFRASTRUCTURE_EXTENSIONS (.tf, .yaml, .yml, .json, .sql)
71
+ * - INFRASTRUCTURE_FILENAMES (dockerfile, .env)
72
+ *
73
+ * Files outside this set are skipped during walk. Source-of-truth is
74
+ * the server filter; this is just an optimization so we don't pay the
75
+ * wire cost for files the server will reject anyway.
76
+ */
77
+ const SCANNABLE_EXTENSIONS = new Set([
78
+ // Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
79
+ '.ts',
80
+ '.tsx',
81
+ '.js',
82
+ '.jsx',
83
+ '.py',
84
+ '.go',
85
+ '.java',
86
+ '.rb',
87
+ '.php',
88
+ '.rs',
89
+ '.cs',
90
+ '.kt',
91
+ '.scala',
92
+ '.c',
93
+ '.cpp',
94
+ '.h',
95
+ '.hpp',
96
+ // Infrastructure-as-code (must mirror INFRASTRUCTURE_EXTENSIONS in the API)
97
+ '.tf',
98
+ '.yaml',
99
+ '.yml',
100
+ '.json',
101
+ '.sql',
102
+ ]);
103
+ const SCANNABLE_FILENAMES = new Set([
104
+ 'dockerfile',
105
+ 'containerfile',
106
+ '.env',
107
+ ]);
42
108
  /**
43
109
  * Directories skipped unconditionally. Kept in parity with
44
110
  * `packages/compliance-code-scanner/src/ignore-utils.ts`.
@@ -79,7 +145,15 @@ const SKIP_DIRS = new Set([
79
145
  ]);
80
146
  const SKIP_DIR_SUFFIXES = ['.egg-info'];
81
147
  const SKIP_FILE_EXTENSIONS = ['.lock', '.min.js', '.min.css', '.map', '.bundle.js', '.tfstate', '.tfstate.backup'];
82
- const SKIP_FILE_NAMES = new Set(['package-lock.json']);
148
+ // Files the server-side `isScannable` filter drops on receipt. Listing
149
+ // them client-side avoids paying the wire cost just to have the API
150
+ // throw the bytes away. Keep in lock-step with the server's filter in
151
+ // `compliance-code-scanner/src/collector.ts`.
152
+ const SKIP_FILE_NAMES = new Set([
153
+ 'package-lock.json',
154
+ 'package.json',
155
+ 'tsconfig.json',
156
+ ]);
83
157
  /**
84
158
  * Load .gitignore patterns from the repo root.
85
159
  *
@@ -132,8 +206,15 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
132
206
  if (matchesAny(relPath, userExcludes))
133
207
  return true;
134
208
  }
135
- // .env* files are always scanned, even if listed in .gitignore (common case)
136
- if (name.startsWith('.env') || name.endsWith('.env'))
209
+ // .env-family files are always scanned even if .gitignored the
210
+ // common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
211
+ // etc., where the whole point of scanning is to catch hardcoded
212
+ // secrets. The previous `endsWith('.env')` half of this carve-out
213
+ // also matched arbitrary `foo.env` files (a build artifact, a config
214
+ // dump, etc.), which let unrelated files bypass gitignore. Restrict
215
+ // to names that start with `.env`. Keep in lock-step with
216
+ // `compliance-code-scanner/src/ignore-utils.ts`.
217
+ if (name.startsWith('.env'))
137
218
  return false;
138
219
  for (const pattern of ignores) {
139
220
  if (name === pattern ||
@@ -159,6 +240,30 @@ function shouldSkipFileByName(name) {
159
240
  return true;
160
241
  return SKIP_FILE_EXTENSIONS.some((ext) => name.endsWith(ext));
161
242
  }
243
+ /**
244
+ * Mirror of the server's `isScannable` filter, applied client-side so we
245
+ * don't ship files the API will just drop. Also keeps repos like
246
+ * hapi-fhir (~13k files, mostly Java + some CSS/HTML/templates) from
247
+ * tripping MAX_TOTAL_FILES on non-scannable noise.
248
+ */
249
+ function isScannableFilename(name) {
250
+ const lower = name.toLowerCase();
251
+ if (SCANNABLE_FILENAMES.has(lower))
252
+ return true;
253
+ // Dockerfile variants (dockerfile.prod, dockerfile.dev, …)
254
+ if (lower.startsWith('dockerfile.'))
255
+ return true;
256
+ // Any .env* file — kept in lock-step with the carve-out in `shouldIgnore`,
257
+ // which preserves the whole .env* family from gitignore. The server may
258
+ // drop unknown variants (e.g. .envrc) but it's better to forward them than
259
+ // silently diverge from the ignore policy.
260
+ if (lower.startsWith('.env'))
261
+ return true;
262
+ const dot = lower.lastIndexOf('.');
263
+ if (dot === -1)
264
+ return false;
265
+ return SCANNABLE_EXTENSIONS.has(lower.slice(dot));
266
+ }
162
267
  async function collectFiles(baseDir, includePatterns, excludePatterns) {
163
268
  const repoRoot = path.resolve(baseDir);
164
269
  const ignores = loadGitignore(repoRoot);
@@ -195,6 +300,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
195
300
  continue;
196
301
  if (shouldSkipFileByName(name))
197
302
  continue;
303
+ // Skip files the server-side `isScannable` filter will drop anyway.
304
+ // No point paying the wire cost. When `--include` patterns are given
305
+ // we honor those instead — explicit user intent overrides the
306
+ // server-shape allowlist.
307
+ if ((!includePatterns || includePatterns.length === 0) &&
308
+ !isScannableFilename(name)) {
309
+ continue;
310
+ }
198
311
  if (includePatterns && includePatterns.length > 0 && !matchesAny(relPath, includePatterns)) {
199
312
  continue;
200
313
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prodcycle/prodcycle",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
5
5
  "homepage": "https://docs.prodcycle.com",
6
6
  "repository": {
@@ -20,6 +20,7 @@
20
20
  },
21
21
  "scripts": {
22
22
  "build": "tsc",
23
+ "test": "npm run build && node --test test/*.test.mjs",
23
24
  "prepublishOnly": "npm run build"
24
25
  },
25
26
  "keywords": [