@prodcycle/prodcycle 0.4.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -14,6 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
+ exports.emitScannerErrorWarning = emitScannerErrorWarning;
17
18
  exports.scan = scan;
18
19
  exports.gate = gate;
19
20
  const api_client_1 = require("./api-client");
@@ -23,42 +24,107 @@ __exportStar(require("./formatters/table"), exports);
23
24
  __exportStar(require("./formatters/prompt"), exports);
24
25
  __exportStar(require("./formatters/sarif"), exports);
25
26
  /**
26
- * Scan a repository by collecting files and sending them to the API
27
+ * Format and write the scanner-error warning to stderr. Centralized so the
28
+ * wording stays consistent across `scan()`, `gate()`, and the `scans <id>`
29
+ * CLI subcommand.
30
+ */
31
+ function emitScannerErrorWarning(scannerError) {
32
+ process.stderr.write(`⚠ Scanner error: ${scannerError.message}` +
33
+ (scannerError.errorClass ? ` (errorClass=${scannerError.errorClass})` : '') +
34
+ (scannerError.errorCode ? ` (errorCode=${scannerError.errorCode})` : '') +
35
+ '\n');
36
+ }
37
+ /**
38
+ * Scan a repository by collecting files and sending them to the API.
39
+ *
40
+ * Modes (selectable via `options.config`):
41
+ * - default: synchronous validate; auto-falls-back to chunked sessions
42
+ * if the server returns 413 with `suggestedEndpoint=/v1/compliance/scans`
43
+ * - `mode: 'async'`: kicks off a 202 async-validate and polls until
44
+ * terminal (returns same shape as default)
45
+ * - `mode: 'chunked'`: explicit chunked-session flow regardless of size
27
46
  */
28
47
  async function scan(params) {
29
48
  const { repoPath, frameworks = ['soc2'], options = {} } = params;
30
- // Collect files
31
49
  const files = await (0, fs_1.collectFiles)(repoPath, options.include, options.exclude);
32
50
  if (Object.keys(files).length === 0) {
33
51
  return {
34
52
  passed: true,
35
53
  exitCode: 0,
36
54
  findings: [],
37
- report: null
55
+ report: null,
56
+ summary: undefined,
38
57
  };
39
58
  }
40
59
  const client = new api_client_1.ComplianceApiClient(options.apiUrl, options.apiKey);
41
- const response = await client.validate(files, frameworks, options);
60
+ const mode = options.config?.mode ?? 'sync';
61
+ let response;
62
+ if (mode === 'async') {
63
+ response = await client.validateAndPoll(files, frameworks, options);
64
+ }
65
+ else if (mode === 'chunked') {
66
+ response = await client.validateChunked(files, frameworks, options);
67
+ }
68
+ else {
69
+ response = await client.validate(files, frameworks, options);
70
+ }
71
+ // Pull `scannerError` through if the API set it. Picking the field
72
+ // explicitly (rather than `...response`) so the CLI's public surface
73
+ // doesn't accidentally expose internal fields if the API adds them.
74
+ // `scannerError` lives in this module rather than `api-client.ts`, so the
75
+ // cast bridges the type boundary; `backfillError` is typed in
76
+ // `ScanResult` and needs no cast.
77
+ const scannerError = response.scannerError;
78
+ const backfillError = response.backfillError;
79
+ // Exit code semantics:
80
+ // 0 = passed (no actionable findings, no scanner error)
81
+ // 1 = findings present, code not clean
82
+ // 2 = scanner unavailable — could not certify either way; fail-closed
83
+ // Distinguish (1) from (2) so CI policy can decide whether a non-zero
84
+ // exit means "developer must fix code" or "operator must investigate
85
+ // scanner."
86
+ const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
87
+ // Surface scanner errors prominently to stderr so the user sees the
88
+ // distinction between a clean pass and an undetermined result. The
89
+ // JSON output already carries the structured field for programmatic
90
+ // consumers; this is for humans running the CLI interactively.
91
+ if (scannerError)
92
+ emitScannerErrorWarning(scannerError);
42
93
  return {
94
+ scanId: response.scanId,
43
95
  passed: response.passed,
44
- exitCode: response.passed ? 0 : 1,
45
- findings: response.findings || [],
46
- report: response.report, // The API should return the full report object if requested, or we synthesize it
47
- summary: response.summary
96
+ exitCode,
97
+ findings: response.findings ?? [],
98
+ report: response.report ?? null,
99
+ summary: response.summary,
100
+ ...(scannerError ? { scannerError } : {}),
101
+ // Forward `backfillError` so SARIF/JSON consumers downstream of `scan()`
102
+ // can detect "summary is real but findings unavailable" without parsing
103
+ // stderr. validateChunked sets it when its enrichment GET fails.
104
+ ...(backfillError ? { backfillError } : {}),
48
105
  };
49
106
  }
50
107
  /**
51
- * Gate code strings directly without writing to disk
108
+ * Gate code strings directly without writing to disk (low-latency hook
109
+ * endpoint, used by coding-agent post-edit hooks).
52
110
  */
53
111
  async function gate(options) {
54
112
  const { files, frameworks = ['soc2'], ...scanOpts } = options;
55
113
  const client = new api_client_1.ComplianceApiClient(options.apiUrl, options.apiKey);
56
114
  const response = await client.hook(files, frameworks, scanOpts);
115
+ // Same scannerError plumbing as scan() above. Coding-agent hooks
116
+ // especially need to distinguish "code is clean" from "scanner is
117
+ // down" — agents should NOT proceed on the latter.
118
+ const scannerError = response.scannerError;
119
+ const exitCode = scannerError ? 2 : response.passed ? 0 : 1;
120
+ if (scannerError)
121
+ emitScannerErrorWarning(scannerError);
57
122
  return {
58
123
  passed: response.passed,
59
- exitCode: response.passed ? 0 : 1,
60
- findings: response.findings || [],
124
+ exitCode,
125
+ findings: response.findings ?? [],
61
126
  prompt: response.prompt,
62
- summary: response.summary
127
+ summary: response.summary,
128
+ ...(scannerError ? { scannerError } : {}),
63
129
  };
64
130
  }
package/dist/utils/fs.js CHANGED
@@ -38,7 +38,73 @@ const fs = __importStar(require("fs"));
38
38
  const path = __importStar(require("path"));
39
39
  const minimatch_1 = require("minimatch");
40
40
  const MAX_FILE_SIZE = 256 * 1024; // 256 KB
41
- const MAX_TOTAL_FILES = 10_000;
41
+ /**
42
+ * Total file ceiling per scan. Hit on the OSS-CLI benchmark scanning
43
+ * `hapifhir/hapi-fhir` (~13k files) — the CLI silently dropped ~3k files
44
+ * past the cap. Default raised from the original 10k to 50k, and now
45
+ * overridable via `PRODCYCLE_MAX_FILES` for monorepos that need a
46
+ * different ceiling without patching/rebuilding. The API's chunked-
47
+ * session endpoint already supports up to 2,000 files per chunk, so a
48
+ * 50k-file repo is fed in 25+ chunks; the cap is here purely so a
49
+ * pathological symlink loop or `.git`-tracked-as-source repo doesn't
50
+ * exhaust the client's memory before the SCANNABLE_EXTENSIONS filter
51
+ * has a chance to drop most of the entries.
52
+ */
53
+ const MAX_TOTAL_FILES = (() => {
54
+ const raw = process.env['PRODCYCLE_MAX_FILES'];
55
+ if (!raw)
56
+ return 50_000;
57
+ const parsed = parseInt(raw, 10);
58
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
59
+ })();
60
+ /**
61
+ * Extensions and exact filenames the server-side `isScannable` filter
62
+ * accepts. Pre-filtering client-side avoids:
63
+ * - bloating the wire payload with images / fonts / docs / archives
64
+ * that the API just drops on receipt
65
+ * - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
66
+ * kernel where most files are not scannable
67
+ *
68
+ * Keep in lock-step with `api/src/domain/services/compliance-scan.service.ts`:
69
+ * - APPLICATION_CODE_EXTENSIONS (the source-code allowlist)
70
+ * - INFRASTRUCTURE_EXTENSIONS (.tf, .yaml, .yml, .json, .sql)
71
+ * - INFRASTRUCTURE_FILENAMES (dockerfile, .env)
72
+ *
73
+ * Files outside this set are skipped during walk. Source-of-truth is
74
+ * the server filter; this is just an optimization so we don't pay the
75
+ * wire cost for files the server will reject anyway.
76
+ */
77
+ const SCANNABLE_EXTENSIONS = new Set([
78
+ // Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
79
+ '.ts',
80
+ '.tsx',
81
+ '.js',
82
+ '.jsx',
83
+ '.py',
84
+ '.go',
85
+ '.java',
86
+ '.rb',
87
+ '.php',
88
+ '.rs',
89
+ '.cs',
90
+ '.kt',
91
+ '.scala',
92
+ '.c',
93
+ '.cpp',
94
+ '.h',
95
+ '.hpp',
96
+ // Infrastructure-as-code (must mirror INFRASTRUCTURE_EXTENSIONS in the API)
97
+ '.tf',
98
+ '.yaml',
99
+ '.yml',
100
+ '.json',
101
+ '.sql',
102
+ ]);
103
+ const SCANNABLE_FILENAMES = new Set([
104
+ 'dockerfile',
105
+ 'containerfile',
106
+ '.env',
107
+ ]);
42
108
  /**
43
109
  * Directories skipped unconditionally. Kept in parity with
44
110
  * `packages/compliance-code-scanner/src/ignore-utils.ts`.
@@ -79,7 +145,15 @@ const SKIP_DIRS = new Set([
79
145
  ]);
80
146
  const SKIP_DIR_SUFFIXES = ['.egg-info'];
81
147
  const SKIP_FILE_EXTENSIONS = ['.lock', '.min.js', '.min.css', '.map', '.bundle.js', '.tfstate', '.tfstate.backup'];
82
- const SKIP_FILE_NAMES = new Set(['package-lock.json']);
148
+ // Files the server-side `isScannable` filter drops on receipt. Listing
149
+ // them client-side avoids paying the wire cost just to have the API
150
+ // throw the bytes away. Keep in lock-step with the server's filter in
151
+ // `compliance-code-scanner/src/collector.ts`.
152
+ const SKIP_FILE_NAMES = new Set([
153
+ 'package-lock.json',
154
+ 'package.json',
155
+ 'tsconfig.json',
156
+ ]);
83
157
  /**
84
158
  * Load .gitignore patterns from the repo root.
85
159
  *
@@ -132,8 +206,15 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
132
206
  if (matchesAny(relPath, userExcludes))
133
207
  return true;
134
208
  }
135
- // .env* files are always scanned, even if listed in .gitignore (common case)
136
- if (name.startsWith('.env') || name.endsWith('.env'))
209
+ // .env-family files are always scanned even if .gitignored the
210
+ // common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
211
+ // etc., where the whole point of scanning is to catch hardcoded
212
+ // secrets. The previous `endsWith('.env')` half of this carve-out
213
+ // also matched arbitrary `foo.env` files (a build artifact, a config
214
+ // dump, etc.), which let unrelated files bypass gitignore. Restrict
215
+ // to names that start with `.env`. Keep in lock-step with
216
+ // `compliance-code-scanner/src/ignore-utils.ts`.
217
+ if (name.startsWith('.env'))
137
218
  return false;
138
219
  for (const pattern of ignores) {
139
220
  if (name === pattern ||
@@ -159,6 +240,30 @@ function shouldSkipFileByName(name) {
159
240
  return true;
160
241
  return SKIP_FILE_EXTENSIONS.some((ext) => name.endsWith(ext));
161
242
  }
243
+ /**
244
+ * Mirror of the server's `isScannable` filter, applied client-side so we
245
+ * don't ship files the API will just drop. Also keeps repos like
246
+ * hapi-fhir (~13k files, mostly Java + some CSS/HTML/templates) from
247
+ * tripping MAX_TOTAL_FILES on non-scannable noise.
248
+ */
249
+ function isScannableFilename(name) {
250
+ const lower = name.toLowerCase();
251
+ if (SCANNABLE_FILENAMES.has(lower))
252
+ return true;
253
+ // Dockerfile variants (dockerfile.prod, dockerfile.dev, …)
254
+ if (lower.startsWith('dockerfile.'))
255
+ return true;
256
+ // Any .env* file — kept in lock-step with the carve-out in `shouldIgnore`,
257
+ // which preserves the whole .env* family from gitignore. The server may
258
+ // drop unknown variants (e.g. .envrc) but it's better to forward them than
259
+ // silently diverge from the ignore policy.
260
+ if (lower.startsWith('.env'))
261
+ return true;
262
+ const dot = lower.lastIndexOf('.');
263
+ if (dot === -1)
264
+ return false;
265
+ return SCANNABLE_EXTENSIONS.has(lower.slice(dot));
266
+ }
162
267
  async function collectFiles(baseDir, includePatterns, excludePatterns) {
163
268
  const repoRoot = path.resolve(baseDir);
164
269
  const ignores = loadGitignore(repoRoot);
@@ -195,6 +300,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
195
300
  continue;
196
301
  if (shouldSkipFileByName(name))
197
302
  continue;
303
+ // Skip files the server-side `isScannable` filter will drop anyway.
304
+ // No point paying the wire cost. When `--include` patterns are given
305
+ // we honor those instead — explicit user intent overrides the
306
+ // server-shape allowlist.
307
+ if ((!includePatterns || includePatterns.length === 0) &&
308
+ !isScannableFilename(name)) {
309
+ continue;
310
+ }
198
311
  if (includePatterns && includePatterns.length > 0 && !matchesAny(relPath, includePatterns)) {
199
312
  continue;
200
313
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prodcycle/prodcycle",
3
- "version": "0.4.2",
3
+ "version": "0.6.0",
4
4
  "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
5
5
  "homepage": "https://docs.prodcycle.com",
6
6
  "repository": {
@@ -20,6 +20,7 @@
20
20
  },
21
21
  "scripts": {
22
22
  "build": "tsc",
23
+ "test": "npm run build && node --test test/*.test.mjs",
23
24
  "prepublishOnly": "npm run build"
24
25
  },
25
26
  "keywords": [