@prodcycle/prodcycle 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.d.ts CHANGED
@@ -13,3 +13,40 @@
13
13
  * by hand.
14
14
  */
15
15
  export declare function isCiEnvironment(env?: NodeJS.ProcessEnv): boolean;
16
+ /**
17
+ * Resolve the files to scan for a `hook` invocation. Supports:
18
+ * - `--file <path>` — read that file from disk
19
+ * - stdin: `{"files": {path: content}}` (same as gate)
20
+ * - stdin: `{"file_path": "...", "content": "..."}` (single file)
21
+ * - stdin: Claude Code PostToolUse shape —
22
+ * `{"tool_input": {"file_path": "...", "content"|"new_string": "..."}}`
23
+ * When only `file_path` is given and we can read the file, we do.
24
+ */
25
+ /**
26
+ * Convert the user-supplied `--file <path>` value into a repo-relative key.
27
+ *
28
+ * The compliance API rejects absolute paths (`File path must be relative`).
29
+ * Two failure modes the naive implementation hit on macOS:
30
+ *
31
+ * 1. Absolute paths under cwd silently became cryptic 400s. Fixed by
32
+ * `path.relative(cwd, absolute)` — works on Linux.
33
+ * 2. macOS `/tmp` is a symlink to `/private/tmp`. `path.resolve()` does
34
+ * NOT follow symlinks, but `process.cwd()` returns the physical path
35
+ * via the kernel's `getcwd()`. Result: `path.resolve('/tmp/repo/x')`
36
+ * = `/tmp/repo/x` while cwd is `/private/tmp/repo`, so the relative
37
+ * path is `../../../tmp/repo/x` and the file is incorrectly rejected
38
+ * as "outside cwd" — exactly the agent-hook scenario this targets.
39
+ * Fix: realpath both sides before comparing.
40
+ *
41
+ * Pure function (no fs I/O of its own, no `process.exit`) so it's directly
42
+ * unit-testable. Caller passes the original input, the realpath of the
43
+ * resolved file (`fs.realpathSync(path.resolve(filePath))`), and the
44
+ * realpath of cwd. Tests construct realpath inputs themselves.
45
+ */
46
+ export declare function resolveHookFileKey(inputPath: string, realpathFile: string, realpathCwd: string): {
47
+ ok: true;
48
+ key: string;
49
+ } | {
50
+ ok: false;
51
+ error: string;
52
+ };
package/dist/cli.js CHANGED
@@ -35,6 +35,7 @@ var __importStar = (this && this.__importStar) || (function () {
35
35
  })();
36
36
  Object.defineProperty(exports, "__esModule", { value: true });
37
37
  exports.isCiEnvironment = isCiEnvironment;
38
+ exports.resolveHookFileKey = resolveHookFileKey;
38
39
  const commander_1 = require("commander");
39
40
  const child_process_1 = require("child_process");
40
41
  const fs = __importStar(require("fs"));
@@ -237,14 +238,14 @@ program
237
238
  program
238
239
  .command('gate')
239
240
  .description('Evaluate a JSON payload of files from stdin (low-latency hook endpoint)')
240
- .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
241
+ .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
241
242
  .option('--format <format>', 'Output format: json, sarif, table, prompt', 'prompt')
242
243
  .option('--output <file>', 'Write report to file')
243
244
  .option('--api-url <url>', 'Compliance API base URL (or PC_API_URL env)')
244
245
  .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
245
246
  .action(async (opts) => {
246
247
  try {
247
- const frameworks = parseList(opts.framework) ?? ['soc2'];
248
+ const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
248
249
  const format = (opts.format ?? 'prompt');
249
250
  const stdin = await readStdin();
250
251
  if (!stdin.trim()) {
@@ -331,7 +332,7 @@ program
331
332
  program
332
333
  .command('hook')
333
334
  .description('Run as coding-agent post-edit hook (reads stdin or --file)')
334
- .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2')
335
+ .option('--framework <ids>', 'Comma-separated framework IDs to evaluate', 'soc2,hipaa,nist-csf')
335
336
  .option('--format <format>', 'Output format: json, sarif, table, prompt', 'prompt')
336
337
  .option('--file <path>', 'Scan this file from disk (alternative to reading content from stdin)')
337
338
  .option('--fail-on <levels>', 'Severities that cause non-zero exit', 'critical,high')
@@ -340,7 +341,7 @@ program
340
341
  .option('--api-key <key>', 'API key for compliance API (or PC_API_KEY env)')
341
342
  .action(async (opts) => {
342
343
  try {
343
- const frameworks = parseList(opts.framework) ?? ['soc2'];
344
+ const frameworks = parseList(opts.framework) ?? ['soc2', 'hipaa', 'nist-csf'];
344
345
  const format = (opts.format ?? 'prompt');
345
346
  const files = await collectHookFiles(opts.file);
346
347
  if (!files || Object.keys(files).length === 0) {
@@ -371,6 +372,43 @@ program
371
372
  * `{"tool_input": {"file_path": "...", "content"|"new_string": "..."}}`
372
373
  * When only `file_path` is given and we can read the file, we do.
373
374
  */
375
+ /**
376
+ * Convert the user-supplied `--file <path>` value into a repo-relative key.
377
+ *
378
+ * The compliance API rejects absolute paths (`File path must be relative`).
379
+ * Two failure modes the naive implementation hit on macOS:
380
+ *
381
+ * 1. Absolute paths under cwd silently became cryptic 400s. Fixed by
382
+ * `path.relative(cwd, absolute)` — works on Linux.
383
+ * 2. macOS `/tmp` is a symlink to `/private/tmp`. `path.resolve()` does
384
+ * NOT follow symlinks, but `process.cwd()` returns the physical path
385
+ * via the kernel's `getcwd()`. Result: `path.resolve('/tmp/repo/x')`
386
+ * = `/tmp/repo/x` while cwd is `/private/tmp/repo`, so the relative
387
+ * path is `../../../tmp/repo/x` and the file is incorrectly rejected
388
+ * as "outside cwd" — exactly the agent-hook scenario this targets.
389
+ * Fix: realpath both sides before comparing.
390
+ *
391
+ * Pure function (no fs I/O of its own, no `process.exit`) so it's directly
392
+ * unit-testable. Caller passes the original input, the realpath of the
393
+ * resolved file (`fs.realpathSync(path.resolve(filePath))`), and the
394
+ * realpath of cwd. Tests construct realpath inputs themselves.
395
+ */
396
+ function resolveHookFileKey(inputPath, realpathFile, realpathCwd) {
397
+ if (!path.isAbsolute(inputPath)) {
398
+ // Relative input passes through verbatim — no symlink ambiguity.
399
+ return { ok: true, key: inputPath };
400
+ }
401
+ const relative = path.relative(realpathCwd, realpathFile);
402
+ if (relative.startsWith('..') || path.isAbsolute(relative)) {
403
+ return {
404
+ ok: false,
405
+ error: `hook: --file ${inputPath} is outside the current directory ` +
406
+ `(${realpathCwd}). Pass a path relative to the repo root, or ` +
407
+ `cd into the repo first.`,
408
+ };
409
+ }
410
+ return { ok: true, key: relative };
411
+ }
374
412
  async function collectHookFiles(filePath) {
375
413
  if (filePath) {
376
414
  const absolute = path.resolve(filePath);
@@ -379,7 +417,17 @@ async function collectHookFiles(filePath) {
379
417
  process.exit(2);
380
418
  }
381
419
  const content = fs.readFileSync(absolute, 'utf8');
382
- return { [filePath]: content };
420
+ // Realpath both sides so the macOS `/tmp → /private/tmp` symlink doesn't
421
+ // make a valid agent-hook path (e.g. `/tmp/repo/main.tf`) appear outside
422
+ // cwd. See `resolveHookFileKey` JSDoc for the full rationale.
423
+ const realpathFile = fs.realpathSync(absolute);
424
+ const realpathCwd = fs.realpathSync(process.cwd());
425
+ const resolved = resolveHookFileKey(filePath, realpathFile, realpathCwd);
426
+ if (!resolved.ok) {
427
+ console.error(resolved.error);
428
+ process.exit(2);
429
+ }
430
+ return { [resolved.key]: content };
383
431
  }
384
432
  const stdin = await readStdin();
385
433
  if (!stdin.trim()) {
package/dist/index.d.ts CHANGED
@@ -4,13 +4,10 @@ export * from './formatters/table';
4
4
  export * from './formatters/prompt';
5
5
  export * from './formatters/sarif';
6
6
  /**
7
- * Set when the server-side scanner threw and the API was configured to
7
+ * Set when the upstream scanner threw and the service was configured to
8
8
  * fail closed (the default). When this is present, callers MUST treat
9
9
  * `passed: false` as "scanner unavailable — cannot certify compliance"
10
- * rather than "code is dirty." Mirrors the API's `ScannerErrorInfo`
11
- * shape; see `packages/compliance-code-scanner/api/src/domain/services/
12
- * compliance-scan.service.ts` (`ScannerErrorInfo`) for the field
13
- * contract.
10
+ * rather than "code is dirty."
14
11
  *
15
12
  * Without this surfaced to the CLI's --output JSON, a benchmark or CI
16
13
  * report shows `passed: false, findings: []` and the user can't tell
package/dist/utils/fs.js CHANGED
@@ -58,21 +58,18 @@ const MAX_TOTAL_FILES = (() => {
58
58
  return Number.isFinite(parsed) && parsed > 0 ? parsed : 50_000;
59
59
  })();
60
60
  /**
61
- * Extensions and exact filenames the server-side `isScannable` filter
62
- * accepts. Pre-filtering client-side avoids:
61
+ * Extensions and exact filenames the upstream scanner accepts. Pre-
62
+ * filtering client-side avoids:
63
63
  * - bloating the wire payload with images / fonts / docs / archives
64
- * that the API just drops on receipt
64
+ * that the service just drops on receipt
65
65
  * - hitting MAX_TOTAL_FILES on repos like hapi-fhir or the Linux
66
66
  * kernel where most files are not scannable
67
67
  *
68
- * Keep in lock-step with `api/src/domain/services/compliance-scan.service.ts`:
69
- * - APPLICATION_CODE_EXTENSIONS (the source-code allowlist)
70
- * - INFRASTRUCTURE_EXTENSIONS (.tf, .yaml, .yml, .json, .sql)
71
- * - INFRASTRUCTURE_FILENAMES (dockerfile, .env)
72
- *
73
- * Files outside this set are skipped during walk. Source-of-truth is
74
- * the server filter; this is just an optimization so we don't pay the
75
- * wire cost for files the server will reject anyway.
68
+ * The upstream allowlist is the source of truth — this set is an
69
+ * optimization, not a security boundary. Drift between the two sets
70
+ * is benign (extra entries here just send files the service will
71
+ * drop; missing entries here just send extra non-scannable files).
72
+ * The lockstep contract is enforced by `lockstep-fs.test.mjs`.
76
73
  */
77
74
  const SCANNABLE_EXTENSIONS = new Set([
78
75
  // Application code (must mirror APPLICATION_CODE_EXTENSIONS in the API)
@@ -106,8 +103,9 @@ const SCANNABLE_FILENAMES = new Set([
106
103
  '.env',
107
104
  ]);
108
105
  /**
109
- * Directories skipped unconditionally. Kept in parity with
110
- * `packages/compliance-code-scanner/src/ignore-utils.ts`.
106
+ * Directories skipped unconditionally. Kept in parity with the upstream
107
+ * scanner's directory blocklist; the lockstep contract is enforced by
108
+ * `lockstep-fs.test.mjs`.
111
109
  */
112
110
  const SKIP_DIRS = new Set([
113
111
  'node_modules',
@@ -162,11 +160,37 @@ const SKIP_FILE_NAMES = new Set([
162
160
  * (see server-side fix in ignore-utils.ts).
163
161
  */
164
162
  function loadGitignore(repoPath) {
163
+ return readIgnoreFile(path.join(repoPath, '.gitignore'));
164
+ }
165
+ /**
166
+ * Load .prodcycleignore patterns from the repo root. Same gitignore-style
167
+ * syntax as `.gitignore`, applied additively on top of it.
168
+ *
169
+ * Use case: opt-in suppression for files that should be skipped at scan
170
+ * time but kept in version control. Concrete motivating cases from the
171
+ * OSS-bench sweep:
172
+ *
173
+ * - `gitleaks/cmd/generate/config/rules/aws.go` (and siblings) — scanner
174
+ * rule definitions embed example credentials inside Go struct literals
175
+ * (e.g. `AKIA[0-9A-Z]{16}` as a regex pattern); the SOC2/HIPAA
176
+ * hardcoded-credential rule has no way to distinguish those from real
177
+ * creds.
178
+ * - `bandit/.../extension_loader.py` (and siblings) — same FP class:
179
+ * scanner source ships representative credential-shape patterns as
180
+ * Python string literals.
181
+ *
182
+ * Patterns in `.prodcycleignore` are also dropped if they start with `!`
183
+ * (same negation-handling limitation as `.gitignore` here — the simple
184
+ * minimatch path doesn't have gitignore's re-include semantics).
185
+ */
186
+ function loadProdcycleIgnore(repoPath) {
187
+ return readIgnoreFile(path.join(repoPath, '.prodcycleignore'));
188
+ }
189
+ function readIgnoreFile(filePath) {
165
190
  try {
166
- const gitignorePath = path.join(repoPath, '.gitignore');
167
- if (!fs.existsSync(gitignorePath))
191
+ if (!fs.existsSync(filePath))
168
192
  return [];
169
- const content = fs.readFileSync(gitignorePath, 'utf-8');
193
+ const content = fs.readFileSync(filePath, 'utf-8');
170
194
  return content
171
195
  .split('\n')
172
196
  .map((line) => line.trim())
@@ -184,8 +208,23 @@ function matchesAny(filePath, patterns) {
184
208
  * Decide whether a directory or file entry should be excluded from collection.
185
209
  * Mirrors server `shouldIgnore` so scanner results stay consistent between
186
210
  * client-collected (CLI) and server-collected paths.
211
+ *
212
+ * Precedence (highest → lowest):
213
+ * 1. `SKIP_DIRS` / hidden-dir / suffix rules — unconditional.
214
+ * 2. `userExcludes` (CLI `--exclude`) — explicit user intent on this
215
+ * invocation.
216
+ * 3. `prodcycleIgnores` (`.prodcycleignore`) — explicit user intent
217
+ * written into the repo. Overrides the `.env*` carve-out below
218
+ * because the user is opting out at scan time on purpose (e.g.
219
+ * `.env.example` containing placeholder credentials in a
220
+ * scanner-source repo). `.gitignore` is NOT promoted to this
221
+ * precedence because gitignored `.env*` files are routinely
222
+ * committed-but-ignored real-credential locations and we want
223
+ * the secret-detection rule to fire.
224
+ * 4. `.env*` carve-out — always-scan, overrides `.gitignore` only.
225
+ * 5. `gitignores` (`.gitignore`) — incidental git-tracking config.
187
226
  */
188
- function shouldIgnore(name, relPath, ignores, userExcludes) {
227
+ function shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes) {
189
228
  if (SKIP_DIRS.has(name) ||
190
229
  SKIP_DIR_SUFFIXES.some((s) => name.endsWith(s)) ||
191
230
  (name.startsWith('.') &&
@@ -206,6 +245,25 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
206
245
  if (matchesAny(relPath, userExcludes))
207
246
  return true;
208
247
  }
248
+ // `.prodcycleignore` patterns are user-explicit scan-time intent —
249
+ // higher precedence than the `.env*` carve-out below. Without this
250
+ // ordering, a user trying to suppress a `.env.example` containing
251
+ // placeholder credentials in a scanner-source repo would find their
252
+ // pattern silently ignored. `.gitignore` patterns intentionally stay
253
+ // below the carve-out because gitignored `.env*` files are commonly
254
+ // real credentials we want to surface.
255
+ if (prodcycleIgnores.length > 0) {
256
+ for (const pattern of prodcycleIgnores) {
257
+ if (name === pattern ||
258
+ name + '/' === pattern ||
259
+ relPath === pattern ||
260
+ relPath + '/' === pattern) {
261
+ return true;
262
+ }
263
+ }
264
+ if (matchesAny(relPath, prodcycleIgnores))
265
+ return true;
266
+ }
209
267
  // .env-family files are always scanned even if .gitignored — the
210
268
  // common case for `.env`, `.env.local`, `.env.production`, `.envrc`,
211
269
  // etc., where the whole point of scanning is to catch hardcoded
@@ -216,7 +274,7 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
216
274
  // `compliance-code-scanner/src/ignore-utils.ts`.
217
275
  if (name.startsWith('.env'))
218
276
  return false;
219
- for (const pattern of ignores) {
277
+ for (const pattern of gitignores) {
220
278
  if (name === pattern ||
221
279
  name + '/' === pattern ||
222
280
  relPath === pattern ||
@@ -224,7 +282,7 @@ function shouldIgnore(name, relPath, ignores, userExcludes) {
224
282
  return true;
225
283
  }
226
284
  }
227
- if (matchesAny(relPath, ignores))
285
+ if (matchesAny(relPath, gitignores))
228
286
  return true;
229
287
  return false;
230
288
  }
@@ -266,13 +324,14 @@ function isScannableFilename(name) {
266
324
  }
267
325
  async function collectFiles(baseDir, includePatterns, excludePatterns) {
268
326
  const repoRoot = path.resolve(baseDir);
269
- const ignores = loadGitignore(repoRoot);
327
+ const gitignores = loadGitignore(repoRoot);
328
+ const prodcycleIgnores = loadProdcycleIgnore(repoRoot);
270
329
  const files = {};
271
330
  const state = { count: 0, limitReached: false };
272
- walk(repoRoot, repoRoot, ignores, includePatterns, excludePatterns, files, state);
331
+ walk(repoRoot, repoRoot, gitignores, prodcycleIgnores, includePatterns, excludePatterns, files, state);
273
332
  return files;
274
333
  }
275
- function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, state) {
334
+ function walk(dir, repoRoot, gitignores, prodcycleIgnores, includePatterns, userExcludes, files, state) {
276
335
  if (state.limitReached)
277
336
  return;
278
337
  let entries;
@@ -289,14 +348,14 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
289
348
  const fullPath = path.join(dir, name);
290
349
  const relPath = path.relative(repoRoot, fullPath);
291
350
  if (entry.isDirectory()) {
292
- if (shouldIgnore(name, relPath, ignores, userExcludes))
351
+ if (shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes))
293
352
  continue;
294
- walk(fullPath, repoRoot, ignores, includePatterns, userExcludes, files, state);
353
+ walk(fullPath, repoRoot, gitignores, prodcycleIgnores, includePatterns, userExcludes, files, state);
295
354
  continue;
296
355
  }
297
356
  if (!entry.isFile())
298
357
  continue;
299
- if (shouldIgnore(name, relPath, ignores, userExcludes))
358
+ if (shouldIgnore(name, relPath, gitignores, prodcycleIgnores, userExcludes))
300
359
  continue;
301
360
  if (shouldSkipFileByName(name))
302
361
  continue;
@@ -323,6 +382,10 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
323
382
  catch {
324
383
  continue;
325
384
  }
385
+ // Cheap pre-read filter: skip files obviously above the limit by disk
386
+ // size so we don't read multi-MB files into the heap only to reject
387
+ // them. Files that pass this check get a second post-decode check
388
+ // below.
326
389
  if (stats.size > MAX_FILE_SIZE)
327
390
  continue;
328
391
  let buffer;
@@ -334,7 +397,21 @@ function walk(dir, repoRoot, ignores, includePatterns, userExcludes, files, stat
334
397
  }
335
398
  if (isBinary(buffer))
336
399
  continue;
337
- files[relPath] = buffer.toString('utf8');
400
+ const content = buffer.toString('utf8');
401
+ // Post-decode filter: the service enforces the 256 KB per-file limit
402
+ // on the UTF-8 byte length of the decoded string content, which can
403
+ // differ from the file's on-disk byte count. `buffer.toString('utf8')`
404
+ // silently replaces invalid UTF-8 byte sequences with U+FFFD (3 UTF-8
405
+ // bytes each), so a file with invalid bytes that's under 256 KB on
406
+ // disk can balloon over the limit after the round trip. The cheap
407
+ // `stats.size` check above would let it through; the service then
408
+ // rejects the entire chunk with 413 and torpedoes the scan.
409
+ // Re-measuring here keeps the CLI's filter aligned with the service
410
+ // enforcement. Concrete case from the GA-validation sweep:
411
+ // `web/pnpm-lock.yaml` with stray non-UTF-8 bytes.
412
+ if (Buffer.byteLength(content, 'utf8') > MAX_FILE_SIZE)
413
+ continue;
414
+ files[relPath] = content;
338
415
  state.count++;
339
416
  }
340
417
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@prodcycle/prodcycle",
3
- "version": "0.6.0",
3
+ "version": "0.6.2",
4
4
  "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
5
5
  "homepage": "https://docs.prodcycle.com",
6
6
  "repository": {