@prodcycle/prodcycle 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api-client.js +16 -4
- package/dist/index.d.ts +2 -5
- package/dist/utils/fs.js +103 -26
- package/package.json +1 -1
package/dist/api-client.js
CHANGED
|
@@ -322,6 +322,7 @@ class ComplianceApiClient {
|
|
|
322
322
|
let lastError = null;
|
|
323
323
|
for (let attempt = 0; attempt < MAX_RETRY_ATTEMPTS; attempt++) {
|
|
324
324
|
let response;
|
|
325
|
+
let responseText;
|
|
325
326
|
try {
|
|
326
327
|
response = await fetch(url, {
|
|
327
328
|
method,
|
|
@@ -332,11 +333,23 @@ class ComplianceApiClient {
|
|
|
332
333
|
...(data !== null ? { body: JSON.stringify(data) } : {}),
|
|
333
334
|
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS),
|
|
334
335
|
});
|
|
336
|
+
// Body read inside the same try/catch as fetch() because undici can
|
|
337
|
+
// throw mid-stream (ALB drops the connection, abort signal fires
|
|
338
|
+
// during body read, server sends a partial response). Pre-fix this
|
|
339
|
+
// leaked out of `request()` as an unhandled error instead of being
|
|
340
|
+
// retried — long chunked-session scans were especially exposed,
|
|
341
|
+
// since every `appendChunk` call is its own request and any one
|
|
342
|
+
// mid-stream drop torpedoed the whole scan. The chunk write is
|
|
343
|
+
// idempotent on the server (unique `(scan_id, fingerprint)` index),
|
|
344
|
+
// so retry is safe. Mirror of the Python client's catch over
|
|
345
|
+
// `OSError, http.client.HTTPException` — keep both in lockstep.
|
|
346
|
+
responseText = await response.text();
|
|
335
347
|
}
|
|
336
348
|
catch (networkErr) {
|
|
337
|
-
// Connection-level failures (DNS, TCP, TLS)
|
|
338
|
-
//
|
|
339
|
-
// the
|
|
349
|
+
// Connection-level failures (DNS, TCP, TLS) OR body-read-level
|
|
350
|
+
// failures (abort, RST mid-stream). Treat as retryable up to the
|
|
351
|
+
// same cap as 503 — the server may be momentarily down or the
|
|
352
|
+
// network blip may resolve.
|
|
340
353
|
lastError =
|
|
341
354
|
networkErr instanceof Error ? networkErr : new Error(String(networkErr));
|
|
342
355
|
if (attempt < MAX_RETRY_ATTEMPTS - 1) {
|
|
@@ -345,7 +358,6 @@ class ComplianceApiClient {
|
|
|
345
358
|
}
|
|
346
359
|
throw new Error(`Failed to connect to ProdCycle API: ${lastError.message}`);
|
|
347
360
|
}
|
|
348
|
-
const responseText = await response.text();
|
|
349
361
|
let parsed = null;
|
|
350
362
|
try {
|
|
351
363
|
parsed = responseText ? JSON.parse(responseText) : null;
|
package/dist/index.d.ts
CHANGED
|
@@ -4,13 +4,10 @@ export * from './formatters/table';
|
|
|
4
4
|
export * from './formatters/prompt';
|
|
5
5
|
export * from './formatters/sarif';
|
|
6
6
|
/**
|
|
7
|
-
* Set when the
|
|
7
|
+
* Set when the upstream scanner threw and the service was configured to
|
|
8
8
|
* fail closed (the default). When this is present, callers MUST treat
|
|
9
9
|
* `passed: false` as "scanner unavailable — cannot certify compliance"
|
|
10
|
-
* rather than "code is dirty."
|
|
11
|
-
* shape; see `packages/compliance-code-scanner/api/src/domain/services/
|
|
12
|
-
* compliance-scan.service.ts` (`ScannerErrorInfo`) for the field
|
|
13
|
-
* contract.
|
|
10
|
+
* rather than "code is dirty."
|
|
14
11
|
*
|
|
15
12
|
* Without this surfaced to the CLI's --output JSON, a benchmark or CI
|
|
16
13
|
* report shows `passed: false, findings: []` and the user can't tell
|
package/dist/utils/fs.js
CHANGED
|
@@ -58,21 +58,18 @@ const MAX_TOTAL_FILES = (() => {
|
|
|
58
58
|
return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
|
|
59
59
|
})();
|
|
60
60
|
/**
|
|
61
|
-
* Extensions and exact filenames the
|
|
62
|
-
*
|
|
61
|
+
* Extensions and exact filenames the upstream scanner accepts. Pre-
|
|
62
|
+
* filtering client-side avoids:
|
|
63
63
|
* - bloating the wire payload with images / fonts / docs / archives
|
|
64
|
-
* that the
|
|
64
|
+
* that the service just drops on receipt
|
|
65
65
|
* - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
|
|
66
66
|
* kernel where most files are not scannable
|
|
67
67
|
*
|
|
68
|
-
*
|
|
69
|
-
*
|
|
70
|
-
*
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
* Files outside this set are skipped during walk. Source-of-truth is
|
|
74
|
-
* the server filter; this is just an optimization so we don't pay the
|
|
75
|
-
* wire cost for files the server will reject anyway.
|
|
68
|
+
* The upstream allowlist is the source of truth — this set is an
|
|
69
|
+
* optimization, not a security boundary. Drift between the two sets
|
|
70
|
+
* is benign (extra entries here just send files the service will
|
|
71
|
+
* drop; missing entries here just send extra non-scannable files).
|
|
72
|
+
* The lockstep contract is enforced by `lockstep-fs.test.mjs`.
|
|
76
73
|
*/
|
|
77
74
|
const SCANNABLE_EXTENSIONS = new Set([
|
|
78
75
|
// Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
|
|
@@ -106,8 +103,9 @@ const SCANNABLE_FILENAMES = new Set([
|
|
|
106
103
|
'.env',
|
|
107
104
|
]);
|
|
108
105
|
/**
|
|
109
|
-
* Directories skipped unconditionally. Kept in parity with
|
|
110
|
-
*
|
|
106
|
+
* Directories skipped unconditionally. Kept in parity with the upstream
|
|
107
|
+
* scanner's directory blocklist; the lockstep contract is enforced by
|
|
108
|
+
* `lockstep-fs.test.mjs`.
|
|
111
109
|
*/
|
|
112
110
|
const SKIP_DIRS = new Set([
|
|
113
111
|
'node_modules',
|
|
@@ -162,11 +160,37 @@ const SKIP_FILE_NAMES = new Set([
|
|
|
162
160
|
* (see server-side fix in ignore-utils.ts).
|
|
163
161
|
*/
|
|
164
162
|
function loadGitignore(repoPath) {
|
|
163
|
+
return readIgnoreFile(path.join(repoPath, '.gitignore'));
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Load .prodcycleignore patterns from the repo root. Same gitignore-style
|
|
167
|
+
* syntax as `.gitignore`, applied additively on top of it.
|
|
168
|
+
*
|
|
169
|
+
* Use case: opt-in suppression for files that should be skipped at scan
|
|
170
|
+
* time but kept in version control. Concrete motivating cases from the
|
|
171
|
+
* OSS-bench sweep:
|
|
172
|
+
*
|
|
173
|
+
* - `gitleaks/cmd/generate/config/rules/aws.go` (and siblings) — scanner
|
|
174
|
+
* rule definitions embed example credentials inside Go struct literals
|
|
175
|
+
* (e.g. `AKIA[0-9A-Z]{16}` as a regex pattern); the SOC2/HIPAA
|
|
176
|
+
* hardcoded-credential rule has no way to distinguish those from real
|
|
177
|
+
* creds.
|
|
178
|
+
* - `bandit/.../extension_loader.py` (and siblings) — same FP class:
|
|
179
|
+
* scanner source ships representative credential-shape patterns as
|
|
180
|
+
* Python string literals.
|
|
181
|
+
*
|
|
182
|
+
* Patterns in `.prodcycleignore` are also dropped if they start with `!`
|
|
183
|
+
* (same negation-handling limitation as `.gitignore` here — the simple
|
|
184
|
+
* minimatch path doesn't have gitignore's re-include semantics).
|
|
185
|
+
*/
|
|
186
|
+
function loadProdcycleIgnore(repoPath) {
|
|
187
|
+
return readIgnoreFile(path.join(repoPath, '.prodcycleignore'));
|
|
188
|
+
}
|
|
189
|
+
function readIgnoreFile(filePath) {
|
|
165
190
|
try {
|
|
166
|
-
|
|
167
|
-
if (!fs.existsSync(gitignorePath))
|
|
191
|
+
if (!fs.existsSync(filePath))
|
|
168
192
|
return [];
|
|
169
|
-
const content = fs.readFileSync(
|
|
193
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
170
194
|
return content
|
|
171
195
|
.split('\n')
|
|
172
196
|
.map((line) => line.trim())
|
|
@@ -184,8 +208,23 @@ function matchesAny(filePath, patterns) {
|
|
|
184
208
|
* Decide whether a directory or file entry should be excluded from collection.
|
|
185
209
|
* Mirrors server `shouldIgnore` so scanner results stay consistent between
|
|
186
210
|
* client-collected (CLI) and server-collected paths.
|
|
211
|
+
*
|
|
212
|
+
* Precedence (highest → lowest):
|
|
213
|
+
* 1. `SKIP_DIRS` / hidden-dir / suffix rules — unconditional.
|
|
214
|
+
* 2. `userExcludes` (CLI `--exclude`) — explicit user intent on this
|
|
215
|
+
* invocation.
|
|
216
|
+
* 3. `prodcycleIgnores` (`.prodcycleignore`) — explicit user intent
|
|
217
|
+
* written into the repo. Overrides the `.env*` carve-out below
|
|
218
|
+
* because the user is opting out at scan time on purpose (e.g.
|
|
219
|
+
* `.env.example` containing placeholder credentials in a
|
|
220
|
+
* scanner-source repo). `.gitignore` is NOT promoted to this
|
|
221
|
+
* precedence because gitignored `.env*` files are routinely
|
|
222
|
+
* committed-but-ignored real-credential locations and we want
|
|
223
|
+
* the secret-detection rule to fire.
|
|
224
|
+
* 4. `.env*` carve-out — always-scan, overrides `.gitignore` only.
|
|
225
|
+
* 5. `gitignores` (`.gitignore`) — incidental git-tracking config.
|
|
187
226
|
*/
|
|
188
|
-
function shouldIgnore(name, relPath,
|
|
227
|
+
function shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes) {
|
|
189
228
|
if (SKIP_DIRS.has(name) ||
|
|
190
229
|
SKIP_DIR_SUFFIXES.some((s) => name.endsWith(s)) ||
|
|
191
230
|
(name.startsWith('.') &&
|
|
@@ -206,6 +245,25 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
|
|
|
206
245
|
if (matchesAny(relPath, userExcludes))
|
|
207
246
|
return true;
|
|
208
247
|
}
|
|
248
|
+
// `.prodcycleignore` patterns are user-explicit scan-time intent —
|
|
249
|
+
// higher precedence than the `.env*` carve-out below. Without this
|
|
250
|
+
// ordering, a user trying to suppress a `.env.example` containing
|
|
251
|
+
// placeholder credentials in a scanner-source repo would find their
|
|
252
|
+
// pattern silently ignored. `.gitignore` patterns intentionally stay
|
|
253
|
+
// below the carve-out because gitignored `.env*` files are commonly
|
|
254
|
+
// real credentials we want to surface.
|
|
255
|
+
if (prodcycleIgnores.length > 0) {
|
|
256
|
+
for (const pattern of prodcycleIgnores) {
|
|
257
|
+
if (name === pattern ||
|
|
258
|
+
name + '/' === pattern ||
|
|
259
|
+
relPath === pattern ||
|
|
260
|
+
relPath + '/' === pattern) {
|
|
261
|
+
return true;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
if (matchesAny(relPath, prodcycleIgnores))
|
|
265
|
+
return true;
|
|
266
|
+
}
|
|
209
267
|
// .env-family files are always scanned even if .gitignored — the
|
|
210
268
|
// common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
|
|
211
269
|
// etc., where the whole point of scanning is to catch hardcoded
|
|
@@ -216,7 +274,7 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
|
|
|
216
274
|
// `compliance-code-scanner/src/ignore-utils.ts`.
|
|
217
275
|
if (name.startsWith('.env'))
|
|
218
276
|
return false;
|
|
219
|
-
for (const pattern of
|
|
277
|
+
for (const pattern of gitignores) {
|
|
220
278
|
if (name === pattern ||
|
|
221
279
|
name + '/' === pattern ||
|
|
222
280
|
relPath === pattern ||
|
|
@@ -224,7 +282,7 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
|
|
|
224
282
|
return true;
|
|
225
283
|
}
|
|
226
284
|
}
|
|
227
|
-
if (matchesAny(relPath,
|
|
285
|
+
if (matchesAny(relPath, gitignores))
|
|
228
286
|
return true;
|
|
229
287
|
return false;
|
|
230
288
|
}
|
|
@@ -266,13 +324,14 @@ function isScannableFilename(name) {
|
|
|
266
324
|
}
|
|
267
325
|
async function collectFiles(baseDir, includePatterns, excludePatterns) {
|
|
268
326
|
const repoRoot = path.resolve(baseDir);
|
|
269
|
-
const
|
|
327
|
+
const gitignores = loadGitignore(repoRoot);
|
|
328
|
+
const prodcycleIgnores = loadProdcycleIgnore(repoRoot);
|
|
270
329
|
const files = {};
|
|
271
330
|
const state = { count: 0, limitReached: false };
|
|
272
|
-
walk(repoRoot, repoRoot,
|
|
331
|
+
walk(repoRoot, repoRoot, gitignores, prodcycleIgnores, includePatterns, excludePatterns, files, state);
|
|
273
332
|
return files;
|
|
274
333
|
}
|
|
275
|
-
function walk(dir, repoRoot,
|
|
334
|
+
function walk(dir, repoRoot, gitignores, prodcycleIgnores, includePatterns, userExcludes, files, state) {
|
|
276
335
|
if (state.limitReached)
|
|
277
336
|
return;
|
|
278
337
|
let entries;
|
|
@@ -289,14 +348,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
|
|
|
289
348
|
const fullPath = path.join(dir, name);
|
|
290
349
|
const relPath = path.relative(repoRoot, fullPath);
|
|
291
350
|
if (entry.isDirectory()) {
|
|
292
|
-
if (shouldIgnore(name, relPath,
|
|
351
|
+
if (shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes))
|
|
293
352
|
continue;
|
|
294
|
-
walk(fullPath, repoRoot,
|
|
353
|
+
walk(fullPath, repoRoot, gitignores, prodcycleIgnores, includePatterns, userExcludes, files, state);
|
|
295
354
|
continue;
|
|
296
355
|
}
|
|
297
356
|
if (!entry.isFile())
|
|
298
357
|
continue;
|
|
299
|
-
if (shouldIgnore(name, relPath,
|
|
358
|
+
if (shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes))
|
|
300
359
|
continue;
|
|
301
360
|
if (shouldSkipFileByName(name))
|
|
302
361
|
continue;
|
|
@@ -323,6 +382,10 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
|
|
|
323
382
|
catch {
|
|
324
383
|
continue;
|
|
325
384
|
}
|
|
385
|
+
// Cheap pre-read filter: skip files obviously above the limit by disk
|
|
386
|
+
// size so we don't read multi-MB files into the heap only to reject
|
|
387
|
+
// them. Files that pass this check get a second post-decode check
|
|
388
|
+
// below.
|
|
326
389
|
if (stats.size > MAX_FILE_SIZE)
|
|
327
390
|
continue;
|
|
328
391
|
let buffer;
|
|
@@ -334,7 +397,21 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
|
|
|
334
397
|
}
|
|
335
398
|
if (isBinary(buffer))
|
|
336
399
|
continue;
|
|
337
|
-
|
|
400
|
+
const content = buffer.toString('utf8');
|
|
401
|
+
// Post-decode filter: the service enforces the 256 KB per-file limit
|
|
402
|
+
// on the UTF-8 byte length of the decoded string content, which can
|
|
403
|
+
// differ from the file's on-disk byte count. `buffer.toString('utf8')`
|
|
404
|
+
// silently replaces invalid UTF-8 byte sequences with U+FFFD (3 UTF-8
|
|
405
|
+
// bytes each), so a file with invalid bytes that's under 256 KB on
|
|
406
|
+
// disk can balloon over the limit after the round trip. The cheap
|
|
407
|
+
// `stats.size` check above would let it through; the service then
|
|
408
|
+
// rejects the entire chunk with 413 and torpedoes the scan.
|
|
409
|
+
// Re-measuring here keeps the CLI's filter aligned with the service
|
|
410
|
+
// enforcement. Concrete case from the GA-validation sweep:
|
|
411
|
+
// `web/pnpm-lock.yaml` with stray non-UTF-8 bytes.
|
|
412
|
+
if (Buffer.byteLength(content, 'utf8') > MAX_FILE_SIZE)
|
|
413
|
+
continue;
|
|
414
|
+
files[relPath] = content;
|
|
338
415
|
state.count++;
|
|
339
416
|
}
|
|
340
417
|
}
|
package/package.json
CHANGED