@prodcycle/prodcycle 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -31,18 +31,27 @@ npm install @prodcycle/prodcycle
31
31
  ### CLI
32
32
 
33
33
  ```bash
34
- # Scan current directory against SOC 2 and HIPAA
34
+ # Scan current directory against all 3 frameworks (default: soc2, hipaa, nist-csf).
35
+ # Auto-flips to SARIF in known CI environments so output drops into
36
+ # code-scanning dashboards without extra wiring.
37
+ prodcycle scan .
38
+
39
+ # Pin a specific framework or subset
35
40
  prodcycle scan . --framework soc2,hipaa
41
+ prodcycle scan . --framework hipaa --severity-threshold high
36
42
 
37
- # Output as SARIF for GitHub Code Scanning
38
- prodcycle scan . --framework soc2 --format sarif --output results.sarif
43
+ # Explicit SARIF (overrides the CI auto-flip)
44
+ prodcycle scan . --format sarif --output results.sarif
39
45
 
40
- # Set severity threshold (only report HIGH and above)
41
- prodcycle scan . --framework hipaa --severity-threshold high
46
+ # CI: scan only files changed in the PR
47
+ prodcycle scan . --pr origin/main..HEAD
42
48
 
43
49
  # Auto-configure compliance hooks/instructions for your coding agents
44
50
  # (Claude Code, Cursor, Codex, OpenCode, GitHub Copilot, Gemini CLI)
45
51
  prodcycle init --agent all
52
+
53
+ # Scaffold a CI workflow that delegates to prodcycle/actions/compliance
54
+ prodcycle init --ci github # also: gitlab | circleci
46
55
  ```
47
56
 
48
57
  Subcommands: `scan` (full repo scan), `gate` (JSON payload from stdin), `hook` (coding-agent post-edit hook), `init` (agent setup).
@@ -16,6 +16,30 @@ export interface GateOptions {
16
16
  apiUrl?: string;
17
17
  config?: Record<string, unknown>;
18
18
  }
19
+ /**
20
+ * Set when `validateChunked`'s post-`/complete` enrichment GET failed and the
21
+ * structured `findings` could not be recovered. Distinguishable from the
22
+ * server-side `scannerError`: this signals "we know there are N findings (per
23
+ * the summary) but we couldn't fetch them — retry with `prodcycle scans
24
+ * <id>`," not "we couldn't certify the scan." Surfaced as a named field
25
+ * (rather than via the `[key: string]: unknown` index signature) so
26
+ * TypeScript callers get a typed contract instead of `unknown`.
27
+ *
28
+ * `code` distinguishes:
29
+ * - `BACKFILL_GET_FAILED`: GET threw / non-2xx — backfill couldn't run
30
+ * - `BACKFILL_GET_RETURNED_EMPTY`: GET succeeded but findings were still
31
+ * empty despite `summary.total > 0`. Usually means eventual consistency
32
+ * between the `/complete` writer and the scan-record reader; retrying
33
+ * after a short delay typically populates the findings. Surfaced as a
34
+ * separate code so SARIF/dashboard consumers can decide whether to
35
+ * auto-retry vs. surface as a hard error.
36
+ */
37
+ export interface BackfillError {
38
+ code: 'BACKFILL_GET_FAILED' | 'BACKFILL_GET_RETURNED_EMPTY';
39
+ message: string;
40
+ scanId: string;
41
+ summaryTotal: number;
42
+ }
19
43
  export interface ScanResult {
20
44
  scanId?: string;
21
45
  passed: boolean;
@@ -24,6 +48,7 @@ export interface ScanResult {
24
48
  summary?: unknown;
25
49
  prompt?: string;
26
50
  status?: 'IN_PROGRESS' | 'COMPLETED' | 'FAILED';
51
+ backfillError?: BackfillError;
27
52
  [key: string]: unknown;
28
53
  }
29
54
  interface ApiErrorBody {
@@ -107,6 +132,22 @@ export declare class ComplianceApiClient {
107
132
  * to override the conservative defaults.
108
133
  */
109
134
  validateChunked(files: Record<string, string>, frameworks: string[], options?: ScanOptions): Promise<ScanResult>;
135
+ /**
136
+ * Some server versions of `POST /scans/:id/complete` return only the summary,
137
+ * leaving `findings` empty even when `summary.total > 0`. The findings are
138
+ * persisted on the scan record and recoverable via `GET /scans/:id`. Call
139
+ * this after `completeSession` (and any other path where the response shape
140
+ * may be summary-only) so SARIF/JSON consumers always see structured findings,
141
+ * not just a count. No-op when findings are already present or the scan is
142
+ * genuinely clean.
143
+ *
144
+ * Timeout: the follow-up GET goes through `this.request`, which wraps every
145
+ * fetch with `AbortSignal.timeout(REQUEST_TIMEOUT_MS)` (120 s default,
146
+ * tunable via `PC_REQUEST_TIMEOUT_MS`). A stalled server can't hang
147
+ * `validateChunked` indefinitely; if the abort fires, the catch below
148
+ * falls through with the original summary-only result.
149
+ */
150
+ private backfillFindingsIfMissing;
110
151
  /**
111
152
  * Async-validate: returns a `scanId` immediately; caller polls
112
153
  * `getScan(scanId)` until status is COMPLETED or FAILED. Useful for CI
@@ -176,7 +176,79 @@ class ComplianceApiClient {
176
176
  await this.appendChunk(session.scanId, chunk);
177
177
  }
178
178
  const result = await this.completeSession(session.scanId);
179
- return { scanId: session.scanId, ...result };
179
+ const enriched = await this.backfillFindingsIfMissing(session.scanId, result);
180
+ return { scanId: session.scanId, ...enriched };
181
+ }
182
+ /**
183
+ * Some server versions of `POST /scans/:id/complete` return only the summary,
184
+ * leaving `findings` empty even when `summary.total > 0`. The findings are
185
+ * persisted on the scan record and recoverable via `GET /scans/:id`. Call
186
+ * this after `completeSession` (and any other path where the response shape
187
+ * may be summary-only) so SARIF/JSON consumers always see structured findings,
188
+ * not just a count. No-op when findings are already present or the scan is
189
+ * genuinely clean.
190
+ *
191
+ * Timeout: the follow-up GET goes through `this.request`, which wraps every
192
+ * fetch with `AbortSignal.timeout(REQUEST_TIMEOUT_MS)` (120 s default,
193
+ * tunable via `PC_REQUEST_TIMEOUT_MS`). A stalled server can't hang
194
+ * `validateChunked` indefinitely; if the abort fires, the catch below
195
+ * falls through with the original summary-only result.
196
+ */
197
+ async backfillFindingsIfMissing(scanId, result) {
198
+ const findingsLength = Array.isArray(result.findings) ? result.findings.length : 0;
199
+ const summaryTotal = result.summary?.total ?? 0;
200
+ if (findingsLength > 0 || summaryTotal === 0)
201
+ return result;
202
+ try {
203
+ const full = await this.getScan(scanId);
204
+ if (Array.isArray(full.findings) && full.findings.length > 0) {
205
+ return { ...result, findings: full.findings };
206
+ }
207
+ // GET succeeded but findings were empty despite `summary.total > 0`.
208
+ // Most likely cause: eventual consistency between `/complete`'s summary
209
+ // computation and the scan-record findings writer. Without surfacing a
210
+ // signal here, the caller would see exactly the silent-drop state the
211
+ // backfill was added to prevent. Mark as `BACKFILL_GET_RETURNED_EMPTY`
212
+ // (distinct from the throw case) so consumers can branch on retry vs.
213
+ // hard-fail behavior.
214
+ const message = `findings still empty after GET /scans/${scanId} (summary reports ${summaryTotal})`;
215
+ process.stderr.write(`⚠ Findings backfill ${message}. ` +
216
+ `Run \`prodcycle scans ${scanId}\` after a short delay to retry.\n`);
217
+ return {
218
+ ...result,
219
+ backfillError: {
220
+ code: 'BACKFILL_GET_RETURNED_EMPTY',
221
+ message,
222
+ scanId,
223
+ summaryTotal,
224
+ },
225
+ };
226
+ }
227
+ catch (err) {
228
+ // Best-effort enrichment: if the follow-up GET fails, fall through with
229
+ // the original result rather than break the scan call. The user still
230
+ // has the summary + scanId.
231
+ //
232
+ // BUT — without a user-facing signal, the resulting state (`findings: []`
233
+ // alongside `summary.total > 0`) looks exactly like the original bug we
234
+ // were fixing, and the user has no way to know they need to retry via
235
+ // `prodcycle scans <id>`. Surface the failure as both:
236
+ // - a stderr warning (humans running the CLI interactively)
237
+ // - a structured `backfillError` field (programmatic consumers / SARIF)
238
+ const message = err instanceof Error ? err.message : String(err);
239
+ process.stderr.write(`⚠ Findings backfill GET /scans/${scanId} failed (${message}). ` +
240
+ `${summaryTotal} finding(s) were detected but only the summary is available. ` +
241
+ `Run \`prodcycle scans ${scanId}\` to fetch the structured findings.\n`);
242
+ return {
243
+ ...result,
244
+ backfillError: {
245
+ code: 'BACKFILL_GET_FAILED',
246
+ message,
247
+ scanId,
248
+ summaryTotal,
249
+ },
250
+ };
251
+ }
180
252
  }
181
253
  // ─── Async validate ─────────────────────────────────────────────────────
182
254
  /**
package/dist/cli.d.ts CHANGED
@@ -1,2 +1,52 @@
1
1
  #!/usr/bin/env node
2
- export {};
2
+ /**
3
+ * Detect CI environment via well-known env vars set by the major
4
+ * platforms. When CI is detected, default `--format` flips to `sarif`
5
+ * (so output drops straight into GitHub code scanning / GitLab security
6
+ * dashboards / etc. without extra configuration). Users can still
7
+ * override with `--format table|json|prompt`.
8
+ *
9
+ * The flip is opt-out (set `--format table` explicitly to keep the
10
+ * human-readable output in CI logs). Heuristic, not load-bearing — if
11
+ * we miss a CI platform here the user gets the same default they
12
+ * would have anyway (`table`), they just have to add `--format sarif`
13
+ * by hand.
14
+ */
15
+ export declare function isCiEnvironment(env?: NodeJS.ProcessEnv): boolean;
16
+ /**
17
+ * Resolve the files to scan for a `hook` invocation. Supports:
18
+ * - `--file <path>` — read that file from disk
19
+ * - stdin: `{"files": {path: content}}` (same as gate)
20
+ * - stdin: `{"file_path": "...", "content": "..."}` (single file)
21
+ * - stdin: Claude Code PostToolUse shape —
22
+ * `{"tool_input": {"file_path": "...", "content"|"new_string": "..."}}`
23
+ * When only `file_path` is given and we can read the file, we do.
24
+ */
25
+ /**
26
+ * Convert the user-supplied `--file <path>` value into a repo-relative key.
27
+ *
28
+ * The compliance API rejects absolute paths (`File path must be relative`).
29
+ * Two failure modes the naive implementation hit on macOS:
30
+ *
31
+ * 1. Absolute paths under cwd silently became cryptic 400s. Fixed by
32
+ * `path.relative(cwd, absolute)` — works on Linux.
33
+ * 2. macOS `/tmp` is a symlink to `/private/tmp`. `path.resolve()` does
34
+ * NOT follow symlinks, but `process.cwd()` returns the physical path
35
+ * via the kernel's `getcwd()`. Result: `path.resolve('/tmp/repo/x')`
36
+ * = `/tmp/repo/x` while cwd is `/private/tmp/repo`, so the relative
37
+ * path is `../../../tmp/repo/x` and the file is incorrectly rejected
38
+ * as "outside cwd" — exactly the agent-hook scenario this targets.
39
+ * Fix: realpath both sides before comparing.
40
+ *
41
+ * Pure function (no fs I/O of its own, no `process.exit`) so it's directly
42
+ * unit-testable. Caller passes the original input, the realpath of the
43
+ * resolved file (`fs.realpathSync(path.resolve(filePath))`), and the
44
+ * realpath of cwd. Tests construct realpath inputs themselves.
45
+ */
46
+ export declare function resolveHookFileKey(inputPath: string, realpathFile: string, realpathCwd: string): {
47
+ ok: true;
48
+ key: string;
49
+ } | {
50
+ ok: false;
51
+ error: string;
52
+ };
package/dist/cli.js CHANGED
@@ -34,7 +34,10 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  };
35
35
  })();
36
36
  Object.defineProperty(exports, "__esModule", { value: true });
37
+ exports.isCiEnvironment = isCiEnvironment;
38
+ exports.resolveHookFileKey = resolveHookFileKey;
37
39
  const commander_1 = require("commander");
40
+ const child_process_1 = require("child_process");
38
41
  const fs = __importStar(require("fs"));
39
42
  const path = __importStar(require("path"));
40
43
  const index_1 = require("./index");
@@ -111,13 +114,55 @@ program
111
114
  .name('prodcycle')
112
115
  .description('Multi-framework policy-as-code compliance scanner for infrastructure and application code.')
113
116
  .version(PKG_VERSION);
117
+ /**
118
+ * Detect CI environment via well-known env vars set by the major
119
+ * platforms. When CI is detected, default `--format` flips to `sarif`
120
+ * (so output drops straight into GitHub code scanning / GitLab security
121
+ * dashboards / etc. without extra configuration). Users can still
122
+ * override with `--format table|json|prompt`.
123
+ *
124
+ * The flip is opt-out (set `--format table` explicitly to keep the
125
+ * human-readable output in CI logs). Heuristic, not load-bearing — if
126
+ * we miss a CI platform here the user gets the same default they
127
+ * would have anyway (`table`), they just have to add `--format sarif`
128
+ * by hand.
129
+ */
130
+ function isCiEnvironment(env = process.env) {
131
+ // Generic `CI`: match any non-empty value. Most platforms set `CI=true`
132
+ // but some (Drone CI, Woodpecker CI, custom Jenkins pipelines) use
133
+ // `CI=1` or another truthy string. Specific platforms below cover the
134
+ // happy path; this is a defense-in-depth fallback so we don't miss
135
+ // edge-case environments.
136
+ return ((env['CI'] != null && env['CI'] !== '') ||
137
+ env['GITHUB_ACTIONS'] === 'true' ||
138
+ env['GITLAB_CI'] === 'true' ||
139
+ env['CIRCLECI'] === 'true' ||
140
+ env['JENKINS_URL'] != null ||
141
+ env['BUILDKITE'] === 'true' ||
142
+ env['TRAVIS'] === 'true' ||
143
+ env['BITBUCKET_BUILD_NUMBER'] != null);
144
+ }
114
145
  // ── scan ────────────────────────────────────────────────────────────────────
115
146
  program
116
147
  .command('scan [repo_path]')
117
148
  .description('Scan a repository for compliance violations')
118
- .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
119
- .option('--format <format>', 'Output format: json, sarif, table, prompt', 'table')
120
- .option('--severity-threshold <severity>', 'Minimum severity to include in report', 'low')
149
+ // Default frameworks: all three. The unique value of this scanner is
150
+ // cross-framework evaluation in one pass; defaulting to `soc2` only
151
+ // hid the HIPAA + NIST CSF capability from users who never thought
152
+ // to override the flag. If users need only one framework they can
153
+ // still pass `--framework soc2` explicitly.
154
+ .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
155
+ // Default format: `table` for interactive use, but auto-flipped to
156
+ // `sarif` when CI is detected (see isCiEnvironment above) so GitHub
157
+ // Code Scanning / GitLab dashboards pick the report up without any
158
+ // extra wiring. The CLI's --format flag overrides the auto-flip.
159
+ .option('--format <format>', 'Output format: json, sarif, table, prompt (auto-defaults to sarif in CI)', undefined)
160
+ // Default severity-threshold: `medium`. `low` includes too many
161
+ // tier-3 advisory findings that are typically noise unless the user
162
+ // explicitly opts in; `high` would hide medium-severity weak-crypto
163
+ // findings that ARE actionable. Medium is the right balance for
164
+ // first-time users.
165
+ .option('--severity-threshold <severity>', 'Minimum severity to include in report', 'medium')
121
166
  .option('--fail-on <levels>', 'Comma-separated severities that cause non-zero exit', 'critical,high')
122
167
  .option('--include <patterns>', 'Comma-separated glob patterns to include')
123
168
  .option('--exclude <patterns>', 'Comma-separated glob patterns to exclude')
@@ -126,12 +171,19 @@ program
126
171
  .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
127
172
  .option('--async', 'Use the async-validate flow (server returns 202 immediately; CLI polls until COMPLETED). Useful for large scans where holding a connection isn’t practical.')
128
173
  .option('--chunked', 'Force the chunked-session flow regardless of payload size. The default already auto-falls-back to chunked when /validate returns 413 with a chunked-endpoint suggestion.')
174
+ .option('--pr <range>', 'Scan only files changed in a git diff range (e.g. "origin/main..HEAD"). Cuts CI scan time on large repos by skipping unchanged files. Requires baseDir to be the git repo root.')
129
175
  .action(async (repoPath, opts) => {
130
176
  try {
131
177
  const target = repoPath ?? '.';
132
- const frameworks = parseList(opts.framework) ?? ['soc2'];
178
+ const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
133
179
  const failOn = parseList(opts.failOn) ?? ['critical', 'high'];
134
- const format = (opts.format ?? 'table');
180
+ // Format resolution:
181
+ // 1. explicit --format wins
182
+ // 2. otherwise: sarif when CI is detected, table when interactive
183
+ // SARIF in CI lets GitHub code scanning / GitLab security dashboards
184
+ // ingest results with zero extra configuration; table in interactive
185
+ // shells gives the human-readable summary first-time users expect.
186
+ const format = (opts.format ?? (isCiEnvironment() ? 'sarif' : 'table'));
135
187
  // --async and --chunked are mutually exclusive; pick the explicit
136
188
  // mode if either flag is set, otherwise let `scan()` pick (sync
137
189
  // with auto-fallback to chunked on 413).
@@ -144,6 +196,20 @@ program
144
196
  mode = 'async';
145
197
  else if (opts.chunked)
146
198
  mode = 'chunked';
199
+ // --pr: restrict the scan to files in `git diff --name-only <range>`.
200
+ // Empty diff → exit 0 immediately (nothing to scan).
201
+ let include = parseList(opts.include);
202
+ if (opts.pr) {
203
+ const changed = computeChangedFiles(target, opts.pr);
204
+ if (changed.length === 0) {
205
+ console.error(`No files changed in range "${opts.pr}". Nothing to scan.`);
206
+ process.exit(0);
207
+ }
208
+ console.error(`--pr ${opts.pr}: restricting scan to ${changed.length} changed file(s).`);
209
+ // Use the diff list as exact-match include patterns. minimatch treats
210
+ // ordinary paths (no glob chars) as literal matches against relPath.
211
+ include = changed;
212
+ }
147
213
  console.error(`Scanning ${path.resolve(target)} for ${frameworks.join(', ')}` +
148
214
  (mode === 'sync' ? '' : ` (${mode} mode)`) +
149
215
  '...');
@@ -153,7 +219,7 @@ program
153
219
  options: {
154
220
  severityThreshold: opts.severityThreshold,
155
221
  failOn: failOn,
156
- include: parseList(opts.include),
222
+ include,
157
223
  exclude: parseList(opts.exclude),
158
224
  apiUrl: opts.apiUrl,
159
225
  apiKey: opts.apiKey,
@@ -172,14 +238,14 @@ program
172
238
  program
173
239
  .command('gate')
174
240
  .description('Evaluate a JSON payload of files from stdin (low-latency hook endpoint)')
175
- .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
241
+ .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
176
242
  .option('--format <format>', 'Output format: json, sarif, table, prompt', 'prompt')
177
243
  .option('--output <file>', 'Write report to file')
178
244
  .option('--api-url <url>', 'Compliance API base URL (or PC_API_URL env)')
179
245
  .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
180
246
  .action(async (opts) => {
181
247
  try {
182
- const frameworks = parseList(opts.framework) ?? ['soc2'];
248
+ const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
183
249
  const format = (opts.format ?? 'prompt');
184
250
  const stdin = await readStdin();
185
251
  if (!stdin.trim()) {
@@ -231,23 +297,31 @@ program
231
297
  const { ComplianceApiClient } = await Promise.resolve().then(() => __importStar(require('./api-client')));
232
298
  const client = new ComplianceApiClient(opts.apiUrl, opts.apiKey);
233
299
  const scan = await client.getScan(scanId);
300
+ // Same scannerError / exit-code-2 plumbing as scan() / gate(): a
301
+ // user retrieving a stored scan that failed for scanner reasons
302
+ // must see the same distinction (exit 2, scannerError surfaced).
303
+ const scannerError = scan.scannerError;
304
+ const exitCode = scannerError ? 2 : scan.passed ? 0 : 1;
234
305
  const payload = {
235
306
  scanId,
236
307
  passed: scan.passed,
237
308
  status: scan.status ?? 'COMPLETED',
238
309
  findings: scan.findings ?? [],
239
310
  summary: scan.summary,
240
- exitCode: scan.passed ? 0 : 1,
311
+ exitCode,
312
+ ...(scannerError ? { scannerError } : {}),
241
313
  };
242
314
  // Use the same renderer as `scan` so format=table/sarif/prompt all work.
243
315
  writeOutput(renderReport(payload, format), opts.output);
316
+ if (scannerError)
317
+ (0, index_1.emitScannerErrorWarning)(scannerError);
244
318
  // Exit 2 if scan is still in progress — the CLI run shouldn't gate on
245
319
  // an indeterminate result.
246
320
  if (scan.status === 'IN_PROGRESS') {
247
321
  console.error(`Scan ${scanId} is still IN_PROGRESS. Re-run the same command to keep polling, or use 'pc scan --async' to wait for completion.`);
248
322
  process.exit(2);
249
323
  }
250
- process.exit(payload.exitCode);
324
+ process.exit(exitCode);
251
325
  }
252
326
  catch (error) {
253
327
  console.error(`✗ Error: ${error.message}`);
@@ -258,7 +332,7 @@ program
258
332
  program
259
333
  .command('hook')
260
334
  .description('Run as coding-agent post-edit hook (reads stdin or --file)')
261
- .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
335
+ .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
262
336
  .option('--format <format>', 'Output format: json, sarif, table, prompt', 'prompt')
263
337
  .option('--file <path>', 'Scan this file from disk (alternative to reading content from stdin)')
264
338
  .option('--fail-on <levels>', 'Severities that cause non-zero exit', 'critical,high')
@@ -267,7 +341,7 @@ program
267
341
  .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
268
342
  .action(async (opts) => {
269
343
  try {
270
- const frameworks = parseList(opts.framework) ?? ['soc2'];
344
+ const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
271
345
  const format = (opts.format ?? 'prompt');
272
346
  const files = await collectHookFiles(opts.file);
273
347
  if (!files || Object.keys(files).length === 0) {
@@ -298,6 +372,43 @@ program
298
372
  * `{"tool_input": {"file_path": "...", "content"|"new_string": "..."}}`
299
373
  * When only `file_path` is given and we can read the file, we do.
300
374
  */
375
+ /**
376
+ * Convert the user-supplied `--file <path>` value into a repo-relative key.
377
+ *
378
+ * The compliance API rejects absolute paths (`File path must be relative`).
379
+ * Two failure modes the naive implementation hit on macOS:
380
+ *
381
+ * 1. Absolute paths under cwd silently became cryptic 400s. Fixed by
382
+ * `path.relative(cwd, absolute)` — works on Linux.
383
+ * 2. macOS `/tmp` is a symlink to `/private/tmp`. `path.resolve()` does
384
+ * NOT follow symlinks, but `process.cwd()` returns the physical path
385
+ * via the kernel's `getcwd()`. Result: `path.resolve('/tmp/repo/x')`
386
+ * = `/tmp/repo/x` while cwd is `/private/tmp/repo`, so the relative
387
+ * path is `../../../tmp/repo/x` and the file is incorrectly rejected
388
+ * as "outside cwd" — exactly the agent-hook scenario this targets.
389
+ * Fix: realpath both sides before comparing.
390
+ *
391
+ * Pure function (no fs I/O of its own, no `process.exit`) so it's directly
392
+ * unit-testable. Caller passes the original input, the realpath of the
393
+ * resolved file (`fs.realpathSync(path.resolve(filePath))`), and the
394
+ * realpath of cwd. Tests construct realpath inputs themselves.
395
+ */
396
+ function resolveHookFileKey(inputPath, realpathFile, realpathCwd) {
397
+ if (!path.isAbsolute(inputPath)) {
398
+ // Relative input passes through verbatim — no symlink ambiguity.
399
+ return { ok: true, key: inputPath };
400
+ }
401
+ const relative = path.relative(realpathCwd, realpathFile);
402
+ if (relative.startsWith('..') || path.isAbsolute(relative)) {
403
+ return {
404
+ ok: false,
405
+ error: `hook: --file ${inputPath} is outside the current directory ` +
406
+ `(${realpathCwd}). Pass a path relative to the repo root, or ` +
407
+ `cd into the repo first.`,
408
+ };
409
+ }
410
+ return { ok: true, key: relative };
411
+ }
301
412
  async function collectHookFiles(filePath) {
302
413
  if (filePath) {
303
414
  const absolute = path.resolve(filePath);
@@ -306,7 +417,17 @@ async function collectHookFiles(filePath) {
306
417
  process.exit(2);
307
418
  }
308
419
  const content = fs.readFileSync(absolute, 'utf8');
309
- return { [filePath]: content };
420
+ // Realpath both sides so the macOS `/tmp → /private/tmp` symlink doesn't
421
+ // make a valid agent-hook path (e.g. `/tmp/repo/main.tf`) appear outside
422
+ // cwd. See `resolveHookFileKey` JSDoc for the full rationale.
423
+ const realpathFile = fs.realpathSync(absolute);
424
+ const realpathCwd = fs.realpathSync(process.cwd());
425
+ const resolved = resolveHookFileKey(filePath, realpathFile, realpathCwd);
426
+ if (!resolved.ok) {
427
+ console.error(resolved.error);
428
+ process.exit(2);
429
+ }
430
+ return { [resolved.key]: content };
310
431
  }
311
432
  const stdin = await readStdin();
312
433
  if (!stdin.trim()) {
@@ -348,18 +469,22 @@ async function collectHookFiles(filePath) {
348
469
  // ── init ────────────────────────────────────────────────────────────────────
349
470
  program
350
471
  .command('init')
351
- .description('Configure compliance hooks for coding agents')
472
+ .description('Configure compliance hooks for coding agents and/or CI workflows')
352
473
  .option('--agent <agents>', 'Comma-separated agents to configure (claude, cursor, codex, opencode, github-copilot, gemini-cli). Use "all" to configure every agent. Default: auto-detect.')
474
+ .option('--ci <providers>', 'Comma-separated CI providers to scaffold (github, gitlab, circleci). Use "all" for every provider. Opt-in only \u2014 never auto-detected.')
353
475
  .option('--force', 'Overwrite existing compliance hook entries')
354
476
  .option('--dir <path>', 'Project directory to configure', '.')
355
477
  .action((opts) => {
356
478
  try {
357
479
  const dir = path.resolve(opts.dir ?? '.');
358
480
  const agents = resolveAgents(opts.agent, dir);
359
- if (agents.length === 0) {
360
- console.error('init: no agents selected and none auto-detected. ' +
361
- 'Use --agent <name> to configure explicitly (claude, cursor, codex, ' +
362
- 'opencode, github-copilot, gemini-cli, or "all").');
481
+ const ciProviders = resolveCiProviders(opts.ci);
482
+ if (agents.length === 0 && ciProviders.length === 0) {
483
+ console.error('init: nothing to do. ' +
484
+ 'Use --agent <name> to configure a coding agent (claude, cursor, codex, ' +
485
+ 'opencode, github-copilot, gemini-cli, or "all"), and/or --ci <provider> ' +
486
+ 'to scaffold CI workflows (github, gitlab, circleci, or "all"). ' +
487
+ 'Without --agent the CLI also auto-detects agents already in use.');
363
488
  process.exit(2);
364
489
  }
365
490
  let anyFailed = false;
@@ -370,6 +495,12 @@ program
370
495
  if (result.status === 'failed')
371
496
  anyFailed = true;
372
497
  }
498
+ for (const provider of ciProviders) {
499
+ const result = configureCiProvider(provider, dir, !!opts.force);
500
+ process.stdout.write(result.message + '\n');
501
+ if (result.status === 'failed')
502
+ anyFailed = true;
503
+ }
373
504
  process.exit(anyFailed ? 1 : 0);
374
505
  }
375
506
  catch (error) {
@@ -605,6 +736,224 @@ function configureInstructionFile(agent, dir, relPath, force, writtenPaths) {
605
736
  function escapeRegExp(s) {
606
737
  return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
607
738
  }
739
+ const ALL_CI_PROVIDERS = ['github', 'gitlab', 'circleci'];
740
+ function isCiProvider(name) {
741
+ return ALL_CI_PROVIDERS.includes(name);
742
+ }
743
+ function resolveCiProviders(userChoice) {
744
+ if (!userChoice)
745
+ return [];
746
+ const list = parseList(userChoice) ?? [];
747
+ if (list.length === 1 && list[0] === 'all')
748
+ return ALL_CI_PROVIDERS.slice();
749
+ const valid = [];
750
+ for (const name of list) {
751
+ if (isCiProvider(name))
752
+ valid.push(name);
753
+ else
754
+ console.error(`init: unknown CI provider "${name}" — ignoring`);
755
+ }
756
+ return valid;
757
+ }
758
+ function configureCiProvider(provider, dir, force) {
759
+ switch (provider) {
760
+ case 'github':
761
+ return writeCiFile(provider, dir, path.join('.github', 'workflows', 'prodcycle.yml'), GITHUB_WORKFLOW, force);
762
+ case 'gitlab':
763
+ return writeCiFile(provider, dir, '.gitlab-ci.prodcycle.yml', GITLAB_WORKFLOW, force);
764
+ case 'circleci':
765
+ return writeCiFile(provider, dir, path.join('.circleci', 'prodcycle.yml'), CIRCLECI_WORKFLOW, force);
766
+ }
767
+ }
768
+ function writeCiFile(provider, dir, relPath, content, force) {
769
+ const fullPath = path.join(dir, relPath);
770
+ if (fs.existsSync(fullPath) && !force) {
771
+ return {
772
+ status: 'already',
773
+ message: `[ci:${provider}] ${relPath} already exists. Use --force to overwrite.`,
774
+ };
775
+ }
776
+ const parent = path.dirname(fullPath);
777
+ if (!fs.existsSync(parent))
778
+ fs.mkdirSync(parent, { recursive: true });
779
+ fs.writeFileSync(fullPath, content);
780
+ // GitHub uses the `prodcycle/actions/compliance` action, which reads
781
+ // its key from `secrets.PRODCYCLE_API_KEY`. GitLab and CircleCI invoke
782
+ // the CLI directly, which reads `PC_API_KEY` from the environment.
783
+ const followup = provider === 'gitlab'
784
+ ? `Include it from .gitlab-ci.yml: \`include: '${relPath}'\`. `
785
+ : provider === 'circleci'
786
+ ? `Reference it from .circleci/config.yml or merge the contents in. `
787
+ : '';
788
+ const secretName = provider === 'github' ? 'PRODCYCLE_API_KEY' : 'PC_API_KEY';
789
+ return {
790
+ status: 'installed',
791
+ message: `[ci:${provider}] wrote ${fullPath}. ` +
792
+ followup +
793
+ `Set ${secretName} as a secret/variable in your ${provider} project before the first run.`,
794
+ };
795
+ }
796
+ // GitHub: delegate to the dedicated `prodcycle/actions/compliance` GitHub
797
+ // Action rather than calling the CLI directly. The action handles diff vs
798
+ // full-repo scan automatically (PR events vs push events), posts inline
799
+ // annotations on the PR diff, and writes a summary comment — none of
800
+ // which the CLI's own SARIF output reproduces. See
801
+ // https://github.com/prodcycle/actions for the full input reference.
802
+ const GITHUB_WORKFLOW = `name: Prodcycle Compliance
803
+
804
+ on:
805
+ pull_request:
806
+ push:
807
+ # Update this list to match your repo's default branch (e.g. master,
808
+ # develop). GitHub Actions does not support a dynamic
809
+ # \$default-branch / \${{ github.event.repository.default_branch }}
810
+ # value here, so the branch name has to be literal.
811
+ branches: [main]
812
+
813
+ jobs:
814
+ scan:
815
+ runs-on: ubuntu-latest
816
+ permissions:
817
+ contents: read
818
+ pull-requests: write
819
+ steps:
820
+ - uses: actions/checkout@v4
821
+ with:
822
+ fetch-depth: 0
823
+ - uses: prodcycle/actions/compliance@v2
824
+ with:
825
+ api-key: \${{ secrets.PRODCYCLE_API_KEY }}
826
+ `;
827
+ const GITLAB_WORKFLOW = `# Prodcycle compliance scan. Include from your main .gitlab-ci.yml:
828
+ # include:
829
+ # - local: .gitlab-ci.prodcycle.yml
830
+ #
831
+ # Set PC_API_KEY as a CI/CD variable (Settings → CI/CD → Variables) before
832
+ # the first run. Mark it Masked + Protected.
833
+
834
+ prodcycle:
835
+ stage: test
836
+ image: node:22-alpine
837
+ variables:
838
+ GIT_DEPTH: "0"
839
+ before_script:
840
+ - apk add --no-cache git
841
+ script:
842
+ - |
843
+ if [ "$CI_PIPELINE_SOURCE" = "merge_request_event" ]; then
844
+ git fetch --no-tags origin "$CI_MERGE_REQUEST_TARGET_BRANCH_NAME"
845
+ npx --yes prodcycle scan . \\
846
+ --pr "origin/$CI_MERGE_REQUEST_TARGET_BRANCH_NAME..HEAD" \\
847
+ --format sarif --output prodcycle.sarif
848
+ else
849
+ npx --yes prodcycle scan . --format sarif --output prodcycle.sarif
850
+ fi
851
+ artifacts:
852
+ when: always
853
+ paths:
854
+ - prodcycle.sarif
855
+ reports:
856
+ sast: prodcycle.sarif
857
+ rules:
858
+ - if: $CI_PIPELINE_SOURCE == "merge_request_event"
859
+ - if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
860
+ `;
861
+ const CIRCLECI_WORKFLOW = `# Prodcycle compliance scan. To use this, either replace .circleci/config.yml
862
+ # or include it as a continuation/orb. Minimum example:
863
+ #
864
+ # version: 2.1
865
+ # workflows:
866
+ # compliance:
867
+ # jobs:
868
+ # - prodcycle-scan
869
+ #
870
+ # Set PC_API_KEY as a project environment variable in CircleCI before the
871
+ # first run.
872
+ #
873
+ # CircleCI does not expose the PR target branch as a built-in env var
874
+ # (\`CIRCLE_BASE_BRANCH\` does not exist; see
875
+ # https://circleci.com/docs/reference/variables/), so to scope PR scans
876
+ # to changed files only, set a project-level env var \`PRODCYCLE_PR_BASE\`
877
+ # to the branch your PRs target (e.g. \`main\`, \`develop\`). When unset,
878
+ # this template runs a full-repo scan.
879
+
880
+ version: 2.1
881
+ jobs:
882
+ prodcycle-scan:
883
+ docker:
884
+ - image: cimg/node:22.0
885
+ steps:
886
+ - checkout
887
+ - run:
888
+ name: Run Prodcycle compliance scan
889
+ command: |
890
+ if [ -n "\${PRODCYCLE_PR_BASE:-}" ] && [ -n "\${CIRCLE_PULL_REQUEST:-}" ]; then
891
+ git fetch --no-tags origin "$PRODCYCLE_PR_BASE"
892
+ npx --yes prodcycle scan . \\
893
+ --pr "origin/$PRODCYCLE_PR_BASE..HEAD" \\
894
+ --format sarif --output prodcycle.sarif
895
+ else
896
+ npx --yes prodcycle scan . --format sarif --output prodcycle.sarif
897
+ fi
898
+ # \`when: always\` so the SARIF artifact uploads even when the scan
899
+ # exits non-zero — compliance scanners exit 1 when findings exist,
900
+ # which is precisely the case where you want the report preserved.
901
+ - store_artifacts:
902
+ path: prodcycle.sarif
903
+ destination: prodcycle-sarif
904
+ when: always
905
+
906
+ workflows:
907
+ compliance:
908
+ jobs:
909
+ - prodcycle-scan
910
+ `;
911
+ /**
912
+ * Compute the list of files changed in a git diff range, relative to repo root.
913
+ * Filters to ACMR (Added/Copied/Modified/Renamed) so deleted files don't get
914
+ * scanned (they're not on disk anymore, and walk() would skip them anyway).
915
+ *
916
+ * Errors handled explicitly:
917
+ * - `ENOENT` (git not in PATH) → actionable "git executable not found"
918
+ * - `ETIMEDOUT` (git stalled — credential helper / auth prompt / etc.)
919
+ * → fail fast with a 30s timeout so CI jobs don't hang
920
+ * - non-zero exit → forward git's stderr so the user can see e.g. the
921
+ * "fatal: bad revision" message and fix the range argument
922
+ *
923
+ * Output paths are normalised to the platform separator: git emits POSIX
924
+ * forward-slashes always, but the file walker on Windows produces
925
+ * back-slashed `relPath` values. Without this conversion the literal
926
+ * minimatch comparison silently excludes every changed file on Windows.
927
+ */
928
+ const GIT_DIFF_TIMEOUT_MS = 30_000;
929
+ function computeChangedFiles(repoPath, range) {
930
+ let stdout;
931
+ try {
932
+ stdout = (0, child_process_1.execFileSync)('git', ['-C', repoPath, 'diff', '--name-only', '--diff-filter=ACMR', range], {
933
+ encoding: 'utf8',
934
+ stdio: ['ignore', 'pipe', 'pipe'],
935
+ timeout: GIT_DIFF_TIMEOUT_MS,
936
+ });
937
+ }
938
+ catch (e) {
939
+ if (e?.code === 'ENOENT') {
940
+ console.error('--pr: git executable not found in PATH');
941
+ process.exit(2);
942
+ }
943
+ if (e?.code === 'ETIMEDOUT' || e?.signal === 'SIGTERM') {
944
+ console.error(`--pr: git diff timed out after ${GIT_DIFF_TIMEOUT_MS}ms (range "${range}"). ` +
945
+ 'Check that the range does not require network access or credentials.');
946
+ process.exit(2);
947
+ }
948
+ const stderr = e?.stderr?.toString?.() ?? e?.message ?? 'unknown error';
949
+ console.error(`--pr: git diff failed for range "${range}": ${stderr.trim()}`);
950
+ process.exit(2);
951
+ }
952
+ return stdout
953
+ .split('\n')
954
+ .map((s) => s.trim().split('/').join(path.sep))
955
+ .filter(Boolean);
956
+ }
608
957
  function readStdin() {
609
958
  return new Promise((resolve, reject) => {
610
959
  if (process.stdin.isTTY) {
@@ -617,4 +966,10 @@ function readStdin() {
617
966
  process.stdin.on('error', reject);
618
967
  });
619
968
  }
620
- program.parse(injectScanDefault(process.argv));
969
+ // Only auto-parse when invoked as a script (i.e. via the `prodcycle`
970
+ // bin entry). Importing this module from tests must NOT execute the
971
+ // CLI — otherwise `node --test` triggers a real `program.parse` and
972
+ // fails before the test cases can run.
973
+ if (require.main === module) {
974
+ program.parse(injectScanDefault(process.argv));
975
+ }
package/dist/index.d.ts CHANGED
@@ -1,8 +1,28 @@
1
- import { ScanOptions, GateOptions } from './api-client';
1
+ import { ScanOptions, GateOptions, BackfillError } from './api-client';
2
2
  export * from './api-client';
3
3
  export * from './formatters/table';
4
4
  export * from './formatters/prompt';
5
5
  export * from './formatters/sarif';
6
+ /**
7
+ * Set when the server-side scanner threw and the API was configured to
8
+ * fail closed (the default). When this is present, callers MUST treat
9
+ * `passed: false` as "scanner unavailable — cannot certify compliance"
10
+ * rather than "code is dirty." Mirrors the API's `ScannerErrorInfo`
11
+ * shape; see `packages/compliance-code-scanner/api/src/domain/services/
12
+ * compliance-scan.service.ts` (`ScannerErrorInfo`) for the field
13
+ * contract.
14
+ *
15
+ * Without this surfaced to the CLI's --output JSON, a benchmark or CI
16
+ * report shows `passed: false, findings: []` and the user can't tell
17
+ * whether the code passed (no findings, all clean) from whether the
18
+ * scanner failed (no findings because nothing got evaluated).
19
+ */
20
+ export interface ScannerError {
21
+ code: 'SCANNER_GATE_THREW';
22
+ message: string;
23
+ errorClass?: string;
24
+ errorCode?: string;
25
+ }
6
26
  interface ScanReturn {
7
27
  scanId?: string;
8
28
  passed: boolean;
@@ -10,7 +30,22 @@ interface ScanReturn {
10
30
  findings: unknown[];
11
31
  report: unknown;
12
32
  summary: unknown;
33
+ scannerError?: ScannerError;
34
+ /**
35
+ * Set when `validateChunked`'s findings-backfill GET failed. The summary
36
+ * still reflects the real finding count, but the structured findings are
37
+ * unavailable in this response. Callers should retry via `prodcycle scans
38
+ * <scanId>` to recover them. SARIF/JSON consumers branch on this to flag
39
+ * the result as incomplete rather than mistakenly clean.
40
+ */
41
+ backfillError?: BackfillError;
13
42
  }
43
+ /**
44
+ * Format and write the scanner-error warning to stderr. Centralized so the
45
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
46
+ * CLI subcommand.
47
+ */
48
+ export declare function emitScannerErrorWarning(scannerError: ScannerError): void;
14
49
  /**
15
50
  * Scan a repository by collecting files and sending them to the API.
16
51
  *
@@ -31,6 +66,7 @@ export declare function scan(params: {
31
66
  * endpoint, used by coding-agent post-edit hooks).
32
67
  */
33
68
  export declare function gate(options: GateOptions): Promise<{
69
+ scannerError?: ScannerError | undefined;
34
70
  passed: boolean;
35
71
  exitCode: number;
36
72
  findings: unknown[];
package/dist/index.js CHANGED
@@ -14,6 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.emitScannerErrorWarning = emitScannerErrorWarning;
17
18
  exports.scan = scan;
18
19
  exports.gate = gate;
19
20
  const api_client_1 = require("./api-client");
@@ -22,6 +23,17 @@ __exportStar(require("./api-client"), exports);
22
23
  __exportStar(require("./formatters/table"), exports);
23
24
  __exportStar(require("./formatters/prompt"), exports);
24
25
  __exportStar(require("./formatters/sarif"), exports);
26
+ /**
27
+ * Format and write the scanner-error warning to stderr. Centralized so the
28
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
29
+ * CLI subcommand.
30
+ */
31
+ function emitScannerErrorWarning(scannerError) {
32
+ process.stderr.write(`⚠ Scanner error: ${scannerError.message}` +
33
+ (scannerError.errorClass ? ` (errorClass=${scannerError.errorClass})` : '') +
34
+ (scannerError.errorCode ? ` (errorCode=${scannerError.errorCode})` : '') +
35
+ '\n');
36
+ }
25
37
  /**
26
38
  * Scan a repository by collecting files and sending them to the API.
27
39
  *
@@ -56,13 +68,40 @@ async function scan(params) {
56
68
  else {
57
69
  response = await client.validate(files, frameworks, options);
58
70
  }
71
+ // Pull `scannerError` through if the API set it. Picking the field
72
+ // explicitly (rather than `...response`) so the CLI's public surface
73
+ // doesn't accidentally expose internal fields if the API adds them.
74
+ // `scannerError` lives in this module rather than `api-client.ts`, so the
75
+ // cast bridges the type boundary; `backfillError` is typed in
76
+ // `ScanResult` and needs no cast.
77
+ const scannerError = response.scannerError;
78
+ const backfillError = response.backfillError;
79
+ // Exit code semantics:
80
+ // 0 = passed (no actionable findings, no scanner error)
81
+ // 1 = findings present, code not clean
82
+ // 2 = scanner unavailable — could not certify either way; fail-closed
83
+ // Distinguish (1) from (2) so CI policy can decide whether a non-zero
84
+ // exit means "developer must fix code" or "operator must investigate
85
+ // scanner."
86
+ const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
87
+ // Surface scanner errors prominently to stderr so the user sees the
88
+ // distinction between a clean pass and an undetermined result. The
89
+ // JSON output already carries the structured field for programmatic
90
+ // consumers; this is for humans running the CLI interactively.
91
+ if (scannerError)
92
+ emitScannerErrorWarning(scannerError);
59
93
  return {
60
94
  scanId: response.scanId,
61
95
  passed: response.passed,
62
- exitCode: response.passed ? 0 : 1,
96
+ exitCode,
63
97
  findings: response.findings ?? [],
64
98
  report: response.report ?? null,
65
99
  summary: response.summary,
100
+ ...(scannerError ? { scannerError } : {}),
101
+ // Forward `backfillError` so SARIF/JSON consumers downstream of `scan()`
102
+ // can detect "summary is real but findings unavailable" without parsing
103
+ // stderr. validateChunked sets it when its enrichment GET fails.
104
+ ...(backfillError ? { backfillError } : {}),
66
105
  };
67
106
  }
68
107
  /**
@@ -73,11 +112,19 @@ async function gate(options) {
73
112
  const { files, frameworks = ['soc2'], ...scanOpts } = options;
74
113
  const client = new api_client_1.ComplianceApiClient(options.apiUrl, options.apiKey);
75
114
  const response = await client.hook(files, frameworks, scanOpts);
115
+ // Same scannerError plumbing as scan() above. Coding-agent hooks
116
+ // especially need to distinguish "code is clean" from "scanner is
117
+ // down" — agents should NOT proceed on the latter.
118
+ const scannerError = response.scannerError;
119
+ const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
120
+ if (scannerError)
121
+ emitScannerErrorWarning(scannerError);
76
122
  return {
77
123
  passed: response.passed,
78
- exitCode: response.passed ? 0 : 1,
124
+ exitCode,
79
125
  findings: response.findings ?? [],
80
126
  prompt: response.prompt,
81
127
  summary: response.summary,
128
+ ...(scannerError ? { scannerError } : {}),
82
129
  };
83
130
  }
package/dist/utils/fs.js CHANGED
@@ -38,7 +38,73 @@ const fs = __importStar(require("fs"));
38
38
  const path = __importStar(require("path"));
39
39
  const minimatch_1 = require("minimatch");
40
40
  const MAX_FILE_SIZE = 256 * 1024; // 256 KB
41
- const MAX_TOTAL_FILES = 10_000;
41
+ /**
42
+ * Total file ceiling per scan. Hit on the OSS-CLI benchmark scanning
43
+ * `hapifhir/hapi-fhir` (~13k files) — the CLI silently dropped ~3k files
44
+ * past the cap. Default raised from the original 10k to 50k, and now
45
+ * overridable via `PRODCYCLE_MAX_FILES` for monorepos that need a
46
+ * different ceiling without patching/rebuilding. The API's chunked-
47
+ * session endpoint already supports up to 2,000 files per chunk, so a
48
+ * 50k-file repo is fed in 25+ chunks; the cap is here purely so a
49
+ * pathological symlink loop or `.git`-tracked-as-source repo doesn't
50
+ * exhaust the client's memory before the SCANNABLE_EXTENSIONS filter
51
+ * has a chance to drop most of the entries.
52
+ */
53
+ const MAX_TOTAL_FILES = (() => {
54
+ const raw = process.env['PRODCYCLE_MAX_FILES'];
55
+ if (!raw)
56
+ return 50_000;
57
+ const parsed = parseInt(raw, 10);
58
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
59
+ })();
60
+ /**
61
+ * Extensions and exact filenames the server-side `isScannable` filter
62
+ * accepts. Pre-filtering client-side avoids:
63
+ * - bloating the wire payload with images / fonts / docs / archives
64
+ * that the API just drops on receipt
65
+ * - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
66
+ * kernel where most files are not scannable
67
+ *
68
+ * Keep in lock-step with `api/src/domain/services/compliance-scan.service.ts`:
69
+ * - APPLICATION_CODE_EXTENSIONS (the source-code allowlist)
70
+ * - INFRASTRUCTURE_EXTENSIONS (.tf, .yaml, .yml, .json, .sql)
71
+ * - INFRASTRUCTURE_FILENAMES (dockerfile, .env)
72
+ *
73
+ * Files outside this set are skipped during walk. Source-of-truth is
74
+ * the server filter; this is just an optimization so we don't pay the
75
+ * wire cost for files the server will reject anyway.
76
+ */
77
+ const SCANNABLE_EXTENSIONS = new Set([
78
+ // Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
79
+ '.ts',
80
+ '.tsx',
81
+ '.js',
82
+ '.jsx',
83
+ '.py',
84
+ '.go',
85
+ '.java',
86
+ '.rb',
87
+ '.php',
88
+ '.rs',
89
+ '.cs',
90
+ '.kt',
91
+ '.scala',
92
+ '.c',
93
+ '.cpp',
94
+ '.h',
95
+ '.hpp',
96
+ // Infrastructure-as-code (must mirror INFRASTRUCTURE_EXTENSIONS in the API)
97
+ '.tf',
98
+ '.yaml',
99
+ '.yml',
100
+ '.json',
101
+ '.sql',
102
+ ]);
103
+ const SCANNABLE_FILENAMES = new Set([
104
+ 'dockerfile',
105
+ 'containerfile',
106
+ '.env',
107
+ ]);
42
108
  /**
43
109
  * Directories skipped unconditionally. Kept in parity with
44
110
  * `packages/compliance-code-scanner/src/ignore-utils.ts`.
@@ -79,7 +145,15 @@ const SKIP_DIRS = new Set([
79
145
  ]);
80
146
  const SKIP_DIR_SUFFIXES = ['.egg-info'];
81
147
  const SKIP_FILE_EXTENSIONS = ['.lock', '.min.js', '.min.css', '.map', '.bundle.js', '.tfstate', '.tfstate.backup'];
82
- const SKIP_FILE_NAMES = new Set(['package-lock.json']);
148
+ // Files the server-side `isScannable` filter drops on receipt. Listing
149
+ // them client-side avoids paying the wire cost just to have the API
150
+ // throw the bytes away. Keep in lock-step with the server's filter in
151
+ // `compliance-code-scanner/src/collector.ts`.
152
+ const SKIP_FILE_NAMES = new Set([
153
+ 'package-lock.json',
154
+ 'package.json',
155
+ 'tsconfig.json',
156
+ ]);
83
157
  /**
84
158
  * Load .gitignore patterns from the repo root.
85
159
  *
@@ -132,8 +206,15 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
132
206
  if (matchesAny(relPath, userExcludes))
133
207
  return true;
134
208
  }
135
- // .env* files are always scanned, even if listed in .gitignore (common case)
136
- if (name.startsWith('.env') || name.endsWith('.env'))
209
+ // .env-family files are always scanned even if .gitignored the
210
+ // common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
211
+ // etc., where the whole point of scanning is to catch hardcoded
212
+ // secrets. The previous `endsWith('.env')` half of this carve-out
213
+ // also matched arbitrary `foo.env` files (a build artifact, a config
214
+ // dump, etc.), which let unrelated files bypass gitignore. Restrict
215
+ // to names that start with `.env`. Keep in lock-step with
216
+ // `compliance-code-scanner/src/ignore-utils.ts`.
217
+ if (name.startsWith('.env'))
137
218
  return false;
138
219
  for (const pattern of ignores) {
139
220
  if (name === pattern ||
@@ -159,6 +240,30 @@ function shouldSkipFileByName(name) {
159
240
  return true;
160
241
  return SKIP_FILE_EXTENSIONS.some((ext) => name.endsWith(ext));
161
242
  }
243
+ /**
244
+ * Mirror of the server's `isScannable` filter, applied client-side so we
245
+ * don't ship files the API will just drop. Also keeps repos like
246
+ * hapi-fhir (~13k files, mostly Java + some CSS/HTML/templates) from
247
+ * tripping MAX_TOTAL_FILES on non-scannable noise.
248
+ */
249
+ function isScannableFilename(name) {
250
+ const lower = name.toLowerCase();
251
+ if (SCANNABLE_FILENAMES.has(lower))
252
+ return true;
253
+ // Dockerfile variants (dockerfile.prod, dockerfile.dev, …)
254
+ if (lower.startsWith('dockerfile.'))
255
+ return true;
256
+ // Any .env* file — kept in lock-step with the carve-out in `shouldIgnore`,
257
+ // which preserves the whole .env* family from gitignore. The server may
258
+ // drop unknown variants (e.g. .envrc) but it's better to forward them than
259
+ // silently diverge from the ignore policy.
260
+ if (lower.startsWith('.env'))
261
+ return true;
262
+ const dot = lower.lastIndexOf('.');
263
+ if (dot === -1)
264
+ return false;
265
+ return SCANNABLE_EXTENSIONS.has(lower.slice(dot));
266
+ }
162
267
  async function collectFiles(baseDir, includePatterns, excludePatterns) {
163
268
  const repoRoot = path.resolve(baseDir);
164
269
  const ignores = loadGitignore(repoRoot);
@@ -195,6 +300,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
195
300
  continue;
196
301
  if (shouldSkipFileByName(name))
197
302
  continue;
303
+ // Skip files the server-side `isScannable` filter will drop anyway.
304
+ // No point paying the wire cost. When `--include` patterns are given
305
+ // we honor those instead — explicit user intent overrides the
306
+ // server-shape allowlist.
307
+ if ((!includePatterns || includePatterns.length === 0) &&
308
+ !isScannableFilename(name)) {
309
+ continue;
310
+ }
198
311
  if (includePatterns && includePatterns.length > 0 && !matchesAny(relPath, includePatterns)) {
199
312
  continue;
200
313
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prodcycle/prodcycle",
3
- "version": "0.5.0",
3
+ "version": "0.6.1",
4
4
  "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
5
5
  "homepage": "https://docs.prodcycle.com",
6
6
  "repository": {
@@ -20,6 +20,7 @@
20
20
  },
21
21
  "scripts": {
22
22
  "build": "tsc",
23
+ "test": "npm run build && node --test test/*.test.mjs",
23
24
  "prepublishOnly": "npm run build"
24
25
  },
25
26
  "keywords": [