@prodcycle/prodcycle 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -4,13 +4,10 @@ export * from './formatters/table';
4
4
  export * from './formatters/prompt';
5
5
  export * from './formatters/sarif';
6
6
  /**
7
- * Set when the server-side scanner threw and the API was configured to
7
+ * Set when the upstream scanner threw and the service was configured to
8
8
  * fail closed (the default). When this is present, callers MUST treat
9
9
  * `passed: false` as "scanner unavailable — cannot certify compliance"
10
- * rather than "code is dirty." Mirrors the API's `ScannerErrorInfo`
11
- * shape; see `packages/compliance-code-scanner/api/src/domain/services/
12
- * compliance-scan.service.ts` (`ScannerErrorInfo`) for the field
13
- * contract.
10
+ * rather than "code is dirty."
14
11
  *
15
12
  * Without this surfaced to the CLI's --output JSON, a benchmark or CI
16
13
  * report shows `passed: false, findings: []` and the user can't tell
package/dist/utils/fs.js CHANGED
@@ -58,21 +58,18 @@ const MAX_TOTAL_FILES = (() => {
58
58
  return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
59
59
  })();
60
60
  /**
61
- * Extensions and exact filenames the server-side `isScannable` filter
62
- * accepts. Pre-filtering client-side avoids:
61
+ * Extensions and exact filenames the upstream scanner accepts. Pre-
62
+ * filtering client-side avoids:
63
63
  * - bloating the wire payload with images / fonts / docs / archives
64
- * that the API just drops on receipt
64
+ * that the service just drops on receipt
65
65
  * - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
66
66
  * kernel where most files are not scannable
67
67
  *
68
- * Keep in lock-step with `api/src/domain/services/compliance-scan.service.ts`:
69
- * - APPLICATION_CODE_EXTENSIONS (the source-code allowlist)
70
- * - INFRASTRUCTURE_EXTENSIONS (.tf, .yaml, .yml, .json, .sql)
71
- * - INFRASTRUCTURE_FILENAMES (dockerfile, .env)
72
- *
73
- * Files outside this set are skipped during walk. Source-of-truth is
74
- * the server filter; this is just an optimization so we don't pay the
75
- * wire cost for files the server will reject anyway.
68
+ * The upstream allowlist is the source of truth — this set is an
69
+ * optimization, not a security boundary. Drift between the two sets
70
+ * is benign (extra entries here just send files the service will
71
+ * drop; missing entries here just send extra non-scannable files).
72
+ * The lockstep contract is enforced by `lockstep-fs.test.mjs`.
76
73
  */
77
74
  const SCANNABLE_EXTENSIONS = new Set([
78
75
  // Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
@@ -106,8 +103,9 @@ const SCANNABLE_FILENAMES = new Set([
106
103
  '.env',
107
104
  ]);
108
105
  /**
109
- * Directories skipped unconditionally. Kept in parity with
110
- * `packages/compliance-code-scanner/src/ignore-utils.ts`.
106
+ * Directories skipped unconditionally. Kept in parity with the upstream
107
+ * scanner's directory blocklist; the lockstep contract is enforced by
108
+ * `lockstep-fs.test.mjs`.
111
109
  */
112
110
  const SKIP_DIRS = new Set([
113
111
  'node_modules',
@@ -162,11 +160,37 @@ const SKIP_FILE_NAMES = new Set([
162
160
  * (see server-side fix in ignore-utils.ts).
163
161
  */
164
162
  function loadGitignore(repoPath) {
163
+ return readIgnoreFile(path.join(repoPath, '.gitignore'));
164
+ }
165
+ /**
166
+ * Load .prodcycleignore patterns from the repo root. Same gitignore-style
167
+ * syntax as `.gitignore`, applied additively on top of it.
168
+ *
169
+ * Use case: opt-in suppression for files that should be skipped at scan
170
+ * time but kept in version control. Concrete motivating cases from the
171
+ * OSS-bench sweep:
172
+ *
173
+ * - `gitleaks/cmd/generate/config/rules/aws.go` (and siblings) — scanner
174
+ * rule definitions embed example credentials inside Go struct literals
175
+ * (e.g. `AKIA[0-9A-Z]{16}` as a regex pattern); the SOC2/HIPAA
176
+ * hardcoded-credential rule has no way to distinguish those from real
177
+ * creds.
178
+ * - `bandit/.../extension_loader.py` (and siblings) — same FP class:
179
+ * scanner source ships representative credential-shape patterns as
180
+ * Python string literals.
181
+ *
182
+ * Patterns in `.prodcycleignore` are also dropped if they start with `!`
183
+ * (same negation-handling limitation as `.gitignore` here — the simple
184
+ * minimatch path doesn't have gitignore's re-include semantics).
185
+ */
186
+ function loadProdcycleIgnore(repoPath) {
187
+ return readIgnoreFile(path.join(repoPath, '.prodcycleignore'));
188
+ }
189
+ function readIgnoreFile(filePath) {
165
190
  try {
166
- const gitignorePath = path.join(repoPath, '.gitignore');
167
- if (!fs.existsSync(gitignorePath))
191
+ if (!fs.existsSync(filePath))
168
192
  return [];
169
- const content = fs.readFileSync(gitignorePath, 'utf-8');
193
+ const content = fs.readFileSync(filePath, 'utf-8');
170
194
  return content
171
195
  .split('\n')
172
196
  .map((line) => line.trim())
@@ -184,8 +208,23 @@ function matchesAny(filePath, patterns) {
184
208
  * Decide whether a directory or file entry should be excluded from collection.
185
209
  * Mirrors server `shouldIgnore` so scanner results stay consistent between
186
210
  * client-collected (CLI) and server-collected paths.
211
+ *
212
+ * Precedence (highest → lowest):
213
+ * 1. `SKIP_DIRS` / hidden-dir / suffix rules — unconditional.
214
+ * 2. `userExcludes` (CLI `--exclude`) — explicit user intent on this
215
+ * invocation.
216
+ * 3. `prodcycleIgnores` (`.prodcycleignore`) — explicit user intent
217
+ * written into the repo. Overrides the `.env*` carve-out below
218
+ * because the user is opting out at scan time on purpose (e.g.
219
+ * `.env.example` containing placeholder credentials in a
220
+ * scanner-source repo). `.gitignore` is NOT promoted to this
221
+ * precedence because gitignored `.env*` files are routinely
222
+ * committed-but-ignored real-credential locations and we want
223
+ * the secret-detection rule to fire.
224
+ * 4. `.env*` carve-out — always-scan, overrides `.gitignore` only.
225
+ * 5. `gitignores` (`.gitignore`) — incidental git-tracking config.
187
226
  */
188
- function shouldIgnore(name, relPath, ignores, userExcludes) {
227
+ function shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes) {
189
228
  if (SKIP_DIRS.has(name) ||
190
229
  SKIP_DIR_SUFFIXES.some((s) => name.endsWith(s)) ||
191
230
  (name.startsWith('.') &&
@@ -206,6 +245,25 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
206
245
  if (matchesAny(relPath, userExcludes))
207
246
  return true;
208
247
  }
248
+ // `.prodcycleignore` patterns are user-explicit scan-time intent —
249
+ // higher precedence than the `.env*` carve-out below. Without this
250
+ // ordering, a user trying to suppress a `.env.example` containing
251
+ // placeholder credentials in a scanner-source repo would find their
252
+ // pattern silently ignored. `.gitignore` patterns intentionally stay
253
+ // below the carve-out because gitignored `.env*` files are commonly
254
+ // real credentials we want to surface.
255
+ if (prodcycleIgnores.length > 0) {
256
+ for (const pattern of prodcycleIgnores) {
257
+ if (name === pattern ||
258
+ name + '/' === pattern ||
259
+ relPath === pattern ||
260
+ relPath + '/' === pattern) {
261
+ return true;
262
+ }
263
+ }
264
+ if (matchesAny(relPath, prodcycleIgnores))
265
+ return true;
266
+ }
209
267
  // .env-family files are always scanned even if .gitignored — the
210
268
  // common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
211
269
  // etc., where the whole point of scanning is to catch hardcoded
@@ -216,7 +274,7 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
216
274
  // `compliance-code-scanner/src/ignore-utils.ts`.
217
275
  if (name.startsWith('.env'))
218
276
  return false;
219
- for (const pattern of ignores) {
277
+ for (const pattern of gitignores) {
220
278
  if (name === pattern ||
221
279
  name + '/' === pattern ||
222
280
  relPath === pattern ||
@@ -224,7 +282,7 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
224
282
  return true;
225
283
  }
226
284
  }
227
- if (matchesAny(relPath, ignores))
285
+ if (matchesAny(relPath, gitignores))
228
286
  return true;
229
287
  return false;
230
288
  }
@@ -266,13 +324,14 @@ function isScannableFilename(name) {
266
324
  }
267
325
  async function collectFiles(baseDir, includePatterns, excludePatterns) {
268
326
  const repoRoot = path.resolve(baseDir);
269
- const ignores = loadGitignore(repoRoot);
327
+ const gitignores = loadGitignore(repoRoot);
328
+ const prodcycleIgnores = loadProdcycleIgnore(repoRoot);
270
329
  const files = {};
271
330
  const state = { count: 0, limitReached: false };
272
- walk(repoRoot, repoRoot, ignores, includePatterns, excludePatterns, files, state);
331
+ walk(repoRoot, repoRoot, gitignores, prodcycleIgnores, includePatterns, excludePatterns, files, state);
273
332
  return files;
274
333
  }
275
- function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, state) {
334
+ function walk(dir, repoRoot, gitignores, prodcycleIgnores, includePatterns, userExcludes, files, state) {
276
335
  if (state.limitReached)
277
336
  return;
278
337
  let entries;
@@ -289,14 +348,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
289
348
  const fullPath = path.join(dir, name);
290
349
  const relPath = path.relative(repoRoot, fullPath);
291
350
  if (entry.isDirectory()) {
292
- if (shouldIgnore(name, relPath, ignores, userExcludes))
351
+ if (shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes))
293
352
  continue;
294
- walk(fullPath, repoRoot, ignores, includePatterns, userExcludes, files, state);
353
+ walk(fullPath, repoRoot, gitignores, prodcycleIgnores, includePatterns, userExcludes, files, state);
295
354
  continue;
296
355
  }
297
356
  if (!entry.isFile())
298
357
  continue;
299
- if (shouldIgnore(name, relPath, ignores, userExcludes))
358
+ if (shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes))
300
359
  continue;
301
360
  if (shouldSkipFileByName(name))
302
361
  continue;
@@ -323,6 +382,10 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
323
382
  catch {
324
383
  continue;
325
384
  }
385
+ // Cheap pre-read filter: skip files obviously above the limit by disk
386
+ // size so we don't read multi-MB files into the heap only to reject
387
+ // them. Files that pass this check get a second post-decode check
388
+ // below.
326
389
  if (stats.size > MAX_FILE_SIZE)
327
390
  continue;
328
391
  let buffer;
@@ -334,7 +397,21 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
334
397
  }
335
398
  if (isBinary(buffer))
336
399
  continue;
337
- files[relPath] = buffer.toString('utf8');
400
+ const content = buffer.toString('utf8');
401
+ // Post-decode filter: the service enforces the 256 KB per-file limit
402
+ // on the UTF-8 byte length of the decoded string content, which can
403
+ // differ from the file's on-disk byte count. `buffer.toString('utf8')`
404
+ // silently replaces invalid UTF-8 byte sequences with U+FFFD (3 UTF-8
405
+ // bytes each), so a file with invalid bytes that's under 256 KB on
406
+ // disk can balloon over the limit after the round trip. The cheap
407
+ // `stats.size` check above would let it through; the service then
408
+ // rejects the entire chunk with 413 and torpedoes the scan.
409
+ // Re-measuring here keeps the CLI's filter aligned with the service
410
+ // enforcement. Concrete case from the GA-validation sweep:
411
+ // `web/pnpm-lock.yaml` with stray non-UTF-8 bytes.
412
+ if (Buffer.byteLength(content, 'utf8') > MAX_FILE_SIZE)
413
+ continue;
414
+ files[relPath] = content;
338
415
  state.count++;
339
416
  }
340
417
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prodcycle/prodcycle",
3
- "version": "0.6.1",
3
+ "version": "0.6.2",
4
4
  "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
5
5
  "homepage": "https://docs.prodcycle.com",
6
6
  "repository": {