npm - @prodcycle/prodcycle - Versions diffs - 0.6.3 → 0.6.5 - Mend

@prodcycle/prodcycle 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/api-client.js CHANGED Viewed

@@ -47,10 +47,25 @@ const MAX_RETRY_AFTER_SECONDS = envInt('PC_MAX_RETRY_AFTER_SECONDS', 300);
 /**
  * Per-request fetch timeout. Without this a stalled connection would tie
  * up the CLI indefinitely, bypassing both the retry cap and the async-poll
- * deadline. Default is 2 minutes — long enough for the largest non-async
- * sync `/validate` call, short enough that a hung TCP socket gets aborted.
+ * deadline.
+ *
+ * Default is 5 minutes — chosen so the chunked-session `/chunks` upload
+ * path has enough headroom under server-side load. The bottleneck on
+ * busy servers is the per-chunk transaction (policy eval + per-finding
+ * unique-index check on `(scan_id, fingerprint)`), which can take tens
+ * of seconds on big chunks. Sync `/validate` scans normally finish in
+ * seconds, so a longer default doesn't hurt them — it only matters
+ * when a single request stalls. CI runs that want tighter feedback can
+ * shrink via `PC_REQUEST_TIMEOUT_MS`.
+ *
+ * Pre-fix this was 120 s and a megarepo chunked scan (infisical-
+ * infisical, ~11.5 k files, 2026-05-13 GA-validation sweep) burned
+ * through the full retry budget (4 × 120 s per stuck chunk) before
+ * giving up with `Failed to connect to ProdCycle API: The operation
+ * was aborted due to timeout`. The body-read retry path from #30 was
+ * firing correctly — it just wasn't enough budget.
  */
-const REQUEST_TIMEOUT_MS = envInt('PC_REQUEST_TIMEOUT_MS', 120_000);
+const REQUEST_TIMEOUT_MS = envInt('PC_REQUEST_TIMEOUT_MS', 300_000);
 /**
  * Conservative client-side chunk sizing for the chunked-session flow. The
  * /chunks endpoint accepts up to 50 MB / 2000 files per request, but most

package/dist/utils/fs.js CHANGED Viewed

@@ -38,6 +38,12 @@ const fs = __importStar(require("fs"));
 const path = __importStar(require("path"));
 const minimatch_1 = require("minimatch");
 const MAX_FILE_SIZE = 256 * 1024; // 256 KB
+// Reusable strict UTF-8 decoder. `TextDecoder` is stateless on the same
+// instance (encoding + fatal are fixed at construction), so we allocate
+// once at module load instead of once per file in the walk loop —
+// repos with thousands of files would otherwise produce thousands of
+// short-lived decoder objects per scan.
+const UTF8_DECODER = new TextDecoder('utf-8', { fatal: true });
 /**
  * Total file ceiling per scan. Hit on the OSS-CLI benchmark scanning
  * `hapifhir/hapi-fhir` (~13k files) — the CLI silently dropped ~3k files
@@ -397,18 +403,31 @@ function walk(dir, repoRoot, gitignores, prodcycleIgnores, includePatterns, user
         }
         if (isBinary(buffer))
             continue;
-        const content = buffer.toString('utf8');
-        // Post-decode filter: the service enforces the 256 KB per-file limit
-        // on the UTF-8 byte length of the decoded string content, which can
-        // differ from the file's on-disk byte count. `buffer.toString('utf8')`
-        // silently replaces invalid UTF-8 byte sequences with U+FFFD (3 UTF-8
-        // bytes each), so a file with invalid bytes that's under 256 KB on
-        // disk can balloon over the limit after the round trip. The cheap
-        // `stats.size` check above would let it through; the service then
-        // rejects the entire chunk with 413 and torpedoes the scan.
-        // Re-measuring here keeps the CLI's filter aligned with the service
-        // enforcement. Concrete case from the GA-validation sweep:
-        // `web/pnpm-lock.yaml` with stray non-UTF-8 bytes.
+        // Strict UTF-8 decode: throw (and skip the file) on invalid byte
+        // sequences. Pre-fix this was `buffer.toString('utf8')`, which
+        // silently replaces invalid bytes with U+FFFD and includes the file
+        // anyway. Python's `open(encoding='utf-8')` raises UnicodeDecodeError
+        // on the same input and skips the file via its except clause, so
+        // Node ended up sending files Python wouldn't. Those files were
+        // overwhelmingly garbage (U+FFFD soup with no real content), but
+        // the inflated payload pushed many scans over the sync /validate
+        // limit and into the chunked-session fallback — Node ran 5–75x
+        // slower than Python on the same repos (dexidp-dex: npm=525s vs
+        // py=7s, frappe-erpnext: 1448s vs 42s, both 0 finding differences)
+        // during the 2026-05-12 GA-validation sweep. Catch the decode
+        // error and treat exactly like Python.
+        let content;
+        try {
+            content = UTF8_DECODER.decode(buffer);
+        }
+        catch {
+            continue;
+        }
+        // Post-decode size filter mirrors the service's 256 KB per-file
+        // enforcement. Without invalid-UTF-8 inflation (now skipped above),
+        // post-decode byte length usually matches `stats.size`, but a BOM
+        // or rare normalization edge case can still differ — keep the
+        // re-measure as defense-in-depth.
         if (Buffer.byteLength(content, 'utf8') > MAX_FILE_SIZE)
             continue;
         files[relPath] = content;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@prodcycle/prodcycle",
-  "version": "0.6.3",
+  "version": "0.6.5",
   "description": "Multi-framework policy-as-code compliance scanner for infrastructure and application code.",
   "homepage": "https://docs.prodcycle.com",
   "repository": {