npm - @blamejs/exceptd-skills - Versions diffs - 0.16.28 → 0.16.30 - Mend

@blamejs/exceptd-skills 0.16.28 → 0.16.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +28 -0
package/README.md +1 -1
package/bin/exceptd.js +251 -18
package/data/_indexes/_meta.json +4 -3
package/data/_indexes/jurisdiction-map.json +31 -158
package/data/playbooks/crypto.json +6 -0
package/lib/auto-discovery.js +8 -0
package/lib/collectors/README.md +3 -2
package/lib/collectors/library-author.js +26 -9
package/lib/collectors/secrets.js +8 -1
package/lib/cross-ref-api.js +96 -31
package/lib/lint-skills.js +6 -1
package/lib/playbook-runner.js +264 -52
package/lib/prefetch.js +78 -6
package/lib/refresh-external.js +106 -5
package/lib/scoring.js +49 -5
package/lib/validate-cve-catalog.js +14 -2
package/lib/validate-indexes.js +5 -0
package/lib/validate-playbooks.js +133 -38
package/manifest.json +53 -53
package/orchestrator/pipeline.js +16 -4
package/package.json +1 -1
package/sbom.cdx.json +73 -58
package/scripts/build-indexes.js +12 -1
package/scripts/check-sbom-currency.js +76 -14
package/scripts/refresh-sbom.js +1 -1
package/scripts/run-e2e-scenarios.js +41 -11
package/scripts/sync-package-description.js +74 -0
package/scripts/verify-shipped-tarball.js +18 -7
package/sources/validators/cve-validator.js +16 -6

package/lib/playbook-runner.js CHANGED Viewed

@@ -1191,10 +1191,21 @@ function analyze(playbookId, directiveId, detectResult, agentSignals = {}, runOp
   // `factor_cve_source: 'evidence' | 'domain' | 'none'` so operators see
   // which fallback was used.
   let factorCveSource = 'none';
-  let factorCve = matchedCves[0] || null;
+  // Prefer an RWEP-eligible (non-VEX-fixed) matched CVE to drive factor
+  // scaling — a vendor-patched CVE must not inflate adjusted RWEP via its
+  // exploitation / KEV / PoC multipliers. Do NOT fall back to matchedCves[0]:
+  // when EVERY evidence-correlated CVE is VEX-fixed (rwepEligible empty but
+  // matchedCves non-empty) the finding is remediated, so factor scaling must be
+  // suppressed entirely — base is already 0 and the fired factors must not
+  // raise the adjusted score (a vendor-fixed CVE's KEV/exploitation/PoC would
+  // otherwise lift it above 0). The domain-CVE and class-weight fallbacks below
+  // are skipped in that case too, so every fired factor scales by 0 via
+  // _factorScale(factor, null, …).
+  const allMatchedVexFixed = matchedCves.length > 0 && rwepEligible.length === 0;
+  let factorCve = rwepEligible[0] || null;
   if (factorCve) {
     factorCveSource = 'evidence';
-  } else if (workingCatalogCves.length > 0) {
+  } else if (!allMatchedVexFixed && workingCatalogCves.length > 0) {
     // Highest rwep_score from domain refs.
     factorCve = workingCatalogCves.reduce((worst, c) =>
       (typeof c.rwep_score === 'number' && (!worst || c.rwep_score > worst.rwep_score)) ? c : worst,
@@ -1211,7 +1222,7 @@ function analyze(playbookId, directiveId, detectResult, agentSignals = {}, runOp
   // semantics for this case only: apply the declared weight as-is
   // (factor_scale=1, legacy semantics). The factor_cve_source annotation
   // surfaces 'class' so operators see which mode the run used.
-  const _classScaleFallback = !factorCve;
+  const _classScaleFallback = !factorCve && !allMatchedVexFixed;
   let adjustedRwep = baseRwep;
   const rwepBreakdown = [];
   for (const input of an.rwep_inputs || []) {
@@ -1357,9 +1368,15 @@ function analyze(playbookId, directiveId, detectResult, agentSignals = {}, runOp
       filter_applied: true,
       dropped_cve_count: vexDropped.length,
       dropped_cves: vexDropped,
+      // Vendor-fixed CVEs are a KEEP disposition — they stay in matched_cves
+      // annotated vex_status:'fixed' and never enter vexDropped. Surface them
+      // so the two dispositions are distinguishable and the note can be
+      // accurate (the drop note must not list a keep-disposition as a reason).
+      fixed_cves: vexFixedIds,
+      fixed_cve_count: vexFixedIds.length,
       note: vexDropped.length
-        ? `${vexDropped.length} CVE(s) dropped from analyze because the operator-supplied VEX statement marks them not_affected / resolved / false_positive. They remain in cve-catalog.json; the disposition lives in the VEX file.`
-        : "VEX filter supplied; zero matches dropped (no CVEs in domain.cve_refs matched the VEX not-affected set)."
+        ? `${vexDropped.length} CVE(s) dropped from analyze because the operator-supplied VEX statement marks them not_affected / false_positive. Vendor-fixed CVEs are NOT dropped — they remain in matched_cves with vex_status:'fixed'. The dropped CVEs remain in cve-catalog.json; the disposition lives in the VEX file.`
+        : "VEX filter supplied; zero matches dropped (no CVEs in domain.cve_refs matched the VEX not-affected / false_positive set)."
     } : null,
     // Regex-eval failures surfaced here so operators can see WHICH
     // condition expression crashed without the runner dying. Only present
@@ -1661,6 +1678,15 @@ function close(playbookId, directiveId, analyzeResult, validateResult, agentSign
   // upstream `govern.jurisdiction_obligations` has the real data — carry it
   // forward. `notification_deadline` is published as an alias for `deadline`
   // (matches the field name compliance teams expect on a notification record).
+  // Which engine phases completed in this run. analyze_complete /
+  // validate_complete jurisdictional clocks auto-start (under --ack) only when
+  // their named phase actually ran — by the time close() executes, a
+  // populated analyzeResult / validateResult proves the phase completed in the
+  // same synchronous pass.
+  const phaseFlags = {
+    analyze_complete: !!(analyzeResult && typeof analyzeResult === 'object'),
+    validate_complete: !!(validateResult && typeof validateResult === 'object'),
+  };
   const enrichNotification = (na) => {
     const obligation = (g.jurisdiction_obligations || []).find(o =>
       `${o.jurisdiction}/${o.regulation} ${o.window_hours}h` === na.obligation_ref
@@ -1671,17 +1697,31 @@ function close(playbookId, directiveId, analyzeResult, validateResult, agentSign
     // starts the clock even without a separately-submitted classification.
     const engineClassification = analyzeResult?._detect_classification || null;
     const clockStart = obligation
-      ? computeClockStart(obligation.clock_starts, agentSignals, runOpts, engineClassification)
+      ? computeClockStart(obligation.clock_starts, agentSignals, runOpts, engineClassification, phaseFlags, frozenEpoch)
       : null;
-    // When the clock event is detect_confirmed AND detection was confirmed
-    // (by the agent OR the engine) AND the operator did NOT pass --ack,
-    // surface clock_pending_ack so the notification record is visibly waiting
-    // on acknowledgement.
-    const clockPendingAck = !clockStart
-      && obligation?.clock_starts === 'detect_confirmed'
-      && (agentSignals?.detection_classification === 'detected' || engineClassification === 'detected')
+    // A valid clock is a real Date whose getTime() is finite. computeClockStart
+    // already returns null on an unparseable operator timestamp, but guard the
+    // arithmetic below independently so no caller (or future code path) can
+    // ever reach new Date(NaN).toISOString() and crash the close phase.
+    const clockValid = clockStart instanceof Date && !Number.isNaN(clockStart.getTime());
+    // Surface clock_pending_ack when an auto-startable event was confirmed but
+    // the operator did NOT pass --ack, so the notification record is visibly
+    // waiting on acknowledgement rather than silently stalled.
+    const autoStartEvent = obligation
+      && (obligation.clock_starts === 'detect_confirmed'
+        || obligation.clock_starts === 'analyze_complete'
+        || obligation.clock_starts === 'validate_complete');
+    const eventReady = obligation && (
+      (obligation.clock_starts === 'detect_confirmed'
+        && (agentSignals?.detection_classification === 'detected' || engineClassification === 'detected'))
+      || (obligation.clock_starts === 'analyze_complete' && phaseFlags.analyze_complete)
+      || (obligation.clock_starts === 'validate_complete' && phaseFlags.validate_complete)
+    );
+    const clockPendingAck = !clockValid
+      && autoStartEvent
+      && eventReady
       && !(runOpts && runOpts.operator_consent && runOpts.operator_consent.explicit === true);
-    const deadline = obligation && clockStart
+    const deadline = obligation && clockValid
       ? new Date(clockStart.getTime() + obligation.window_hours * 3600 * 1000).toISOString()
       : 'pending_clock_start_event';
     return {
@@ -1694,7 +1734,11 @@ function close(playbookId, directiveId, analyzeResult, validateResult, agentSign
       obligation_type: obligation?.obligation || null,
       window_hours: obligation?.window_hours ?? null,
       clock_start_event: obligation?.clock_starts || null,
-      clock_started_at: clockStart?.toISOString() || null,
+      // Use the validity gate, not optional-chaining: optional-chaining only
+      // short-circuits null/undefined, so a (hypothetical) Invalid Date would
+      // still reach .toISOString() and throw. clockValid guarantees a finite
+      // instant before we serialize.
+      clock_started_at: clockValid ? clockStart.toISOString() : null,
       ...(clockPendingAck ? { clock_pending_ack: true } : {}),
       deadline,
       // Alias matching compliance-team vocabulary.
@@ -1972,7 +2016,9 @@ function analyzeFindingShape(a) {
     // CVEs. A .find() lookup would return the first truthy entry — e.g.
     // 'suspected' on CVE #1 when CVE #2 is 'confirmed' — under-stating
     // the threat in notification drafts.
-    active_exploitation: worstActiveExploitation(matched),
+    // Exclude VEX-fixed (vendor-patched) CVEs: a notification draft must not
+    // assert active exploitation sourced from an already-remediated CVE.
+    active_exploitation: worstActiveExploitation(matched.filter(c => c.vex_status !== 'fixed')),
     rwep_adjusted: rwepAdjusted,
     rwep_base: a.rwep?.base ?? 0,
     // Severity surface for playbook conditions.
@@ -1983,18 +2029,47 @@ function analyzeFindingShape(a) {
   };
 }
+// Map a vulnerability identifier to its issuing authority + the canonical
+// human-readable advisory URL for that authority. CVE ids resolve to NVD;
+// GHSA/OSV/RUSTSEC/SNYK each have their own advisory database. A MAL- malicious
+// -package id has no public per-id advisory page, so helpUri is null (the id is
+// still labelled with its system_name). An unrecognised prefix resolves to a
+// null helpUri rather than a fabricated link.
+//
+// Used by the SARIF rule emitter (helpUri) so non-CVE matched ids no longer
+// get a hardcoded nvd.nist.gov/vuln/detail/<id> URL — that URL 404s for every
+// MAL-/GHSA-/OSV-/RUSTSEC- id and mislabels it as an NVD CVE. The same
+// prefix→authority knowledge lives in the CSAF ids[] branch (csafIdsFor); both
+// derive from this table so the two exports cannot drift.
+const CVE_ID_RE = /^CVE-\d{4}-\d{4,}$/;
+function advisoryAuthorityFor(id) {
+  if (typeof id !== 'string' || !id) return { system_name: null, helpUri: null };
+  if (CVE_ID_RE.test(id)) return { system_name: 'NVD', helpUri: `https://nvd.nist.gov/vuln/detail/${id}` };
+  if (id.startsWith('GHSA-')) return { system_name: 'GHSA', helpUri: `https://github.com/advisories/${id}` };
+  if (id.startsWith('OSV-')) return { system_name: 'OSV', helpUri: `https://osv.dev/vulnerability/${id}` };
+  if (id.startsWith('RUSTSEC-')) return { system_name: 'RUSTSEC', helpUri: `https://rustsec.org/advisories/${id}.html` };
+  if (id.startsWith('SNYK-')) return { system_name: 'Snyk', helpUri: `https://security.snyk.io/vuln/${id}` };
+  // Malicious-package ids have no canonical per-id advisory page; label the
+  // authority but emit no link rather than a fabricated NVD URL.
+  if (id.startsWith('MAL-')) return { system_name: 'Malicious-Package', helpUri: null };
+  return { system_name: 'exceptd-unknown', helpUri: null };
+}
 // Route a vulnerability identifier to its registry-specific URN namespace.
 // CVE-/GHSA-/RUSTSEC-/MAL-* identifiers each have a registered URN namespace;
 // unrecognised prefixes route to the `urn:exceptd:advisory:` private
 // namespace so OpenVEX statements still carry a valid IRI per RFC 8141.
 function vulnIdToUrn(id) {
-  const slug = urnSlug(id);
-  if (typeof id !== 'string' || id.length === 0) return `urn:exceptd:advisory:${slug}`;
-  if (/^CVE-/i.test(id)) return `urn:cve:${slug}`;
-  if (/^GHSA-/i.test(id)) return `urn:ghsa:${slug}`;
-  if (/^RUSTSEC-/i.test(id)) return `urn:rustsec:${slug}`;
-  if (/^MAL-/i.test(id)) return `urn:malicious-package:${slug}`;
-  return `urn:exceptd:advisory:${slug}`;
+  if (typeof id !== 'string' || id.length === 0) return `urn:exceptd:advisory:${urnSlug(id)}`;
+  // Registered identifiers keep their canonical case in the NSS so the @id
+  // matches the OpenVEX `name` / CSAF id exactly. The private advisory
+  // namespace slugs arbitrary text, so it stays lowercase.
+  const canonical = urnSlug(id, true);
+  if (/^CVE-/i.test(id)) return `urn:cve:${canonical}`;
+  if (/^GHSA-/i.test(id)) return `urn:ghsa:${canonical}`;
+  if (/^RUSTSEC-/i.test(id)) return `urn:rustsec:${canonical}`;
+  if (/^MAL-/i.test(id)) return `urn:malicious-package:${canonical}`;
+  return `urn:exceptd:advisory:${urnSlug(id)}`;
 }
 // Build a CSAF product_tree.branches[] tree (vendor → product_name →
@@ -2020,6 +2095,12 @@ function buildCsafBranches(matchedCves, runOpts) {
     products.get(product).add(version);
   };
+  // Comparison / range operators that appear between a package name and a
+  // version in the catalog's dominant `package OP version` affected_versions
+  // shape (e.g. "linux-kernel >= 4.14", "runc <= 1.1.11", "litellm < 1.83.7").
+  // These are operators, never package names.
+  const RANGE_OP_RE = /^(<=|>=|==|!=|~>|<|>|=|~|\^)$/;
   // Heuristic parser. Returns { vendor, product, version } or null.
   const parseComponentString = (s) => {
     if (typeof s !== 'string' || !s.trim()) return null;
@@ -2027,9 +2108,28 @@ function buildCsafBranches(matchedCves, runOpts) {
     // `vendor/product@version`
     let m = trimmed.match(/^([^/\s@]+)\/([^/\s@]+)@(.+)$/);
     if (m) return { vendor: m[1], product: m[2], version: m[3].trim() };
+    const parts = trimmed.split(/\s+/);
+    // `package OP version` — the catalog's dominant shape. The token before
+    // the version is a comparison/range operator, so the PACKAGE is the
+    // product name and the operator belongs to the version qualifier, not the
+    // product_name. Pre-fix this split named the product after the operator
+    // ('>=', '<', '=='), corrupting the CSAF affected-product list. Carry the
+    // operator into the version string ('>= 4.14') so the range qualifier
+    // survives while the product_name stays the real package. Multiple leading
+    // operator tokens (a compound range emitted as one string) collapse into
+    // the version qualifier too.
+    if (parts.length >= 3 && RANGE_OP_RE.test(parts[1])) {
+      const product = parts[0];
+      const versionTokens = parts.slice(1);
+      // Only accept the shape when the trailing token is an actual version
+      // (starts with a digit or v\d) — otherwise it isn't a `package OP version`.
+      const lastTok = versionTokens[versionTokens.length - 1];
+      if (/^v?\d/.test(lastTok)) {
+        return { vendor: product, product, version: versionTokens.join(' ') };
+      }
+    }
     // `vendor product version` — exactly three whitespace-separated tokens
     // where the last token starts with a digit or `v\d`.
-    const parts = trimmed.split(/\s+/);
     if (parts.length >= 3) {
       const last = parts[parts.length - 1];
       if (/^v?\d/.test(last)) {
@@ -2098,13 +2198,17 @@ function buildCsafBranches(matchedCves, runOpts) {
   return { branches, productIds };
 }
-// Slugify a string into a URN-safe segment ([a-z0-9_-]+ per RFC 8141 NSS).
-// Empty input → 'unknown' so we never emit zero-length segments.
-function urnSlug(s) {
+// Slugify a string into a URN-safe segment (RFC 8141 NSS). Empty input →
+// 'unknown' so we never emit zero-length segments. preserveCase keeps the
+// canonical case of registered identifiers (e.g. CVE-2026-43284) — the NSS is
+// case-sensitive per RFC 8141, and the OpenVEX `name`/CSAF id fields carry the
+// canonical case, so the URN @id must match rather than fold to lowercase.
+function urnSlug(s, preserveCase = false) {
   if (s == null) return 'unknown';
-  const slug = String(s)
-    .toLowerCase()
-    .replace(/[^a-z0-9_-]+/g, '-')
+  let str = String(s);
+  if (!preserveCase) str = str.toLowerCase();
+  const slug = str
+    .replace(preserveCase ? /[^A-Za-z0-9_-]+/g : /[^a-z0-9_-]+/g, '-')
     .replace(/^-+|-+$/g, '');
   return slug.length ? slug : 'unknown';
 }
@@ -2810,12 +2914,25 @@ function buildEvidenceBundle(format, playbook, analyze, validate, agentSignals,
       message: { text: `${g.framework}: ${g.claimed_control} — ${g.actual_gap}${g.required_control ? '. Required: ' + g.required_control : ''}` },
       properties: stripNulls({ kind: 'framework_gap', framework: g.framework, control: g.claimed_control }),
     }));
-    const cveRules = analyze.matched_cves.map(c => ({
-      id: `${rulePrefix}${c.cve_id}`, shortDescription: { text: c.cve_id },
-      fullDescription: { text: `RWEP ${c.rwep} · KEV=${c.cisa_kev} · active_exploitation=${c.active_exploitation}` },
-      defaultConfiguration: { level: c.rwep >= 90 ? 'error' : c.rwep >= 70 ? 'warning' : 'note' },
-      helpUri: `https://nvd.nist.gov/vuln/detail/${c.cve_id}`,
-    }));
+    const cveRules = analyze.matched_cves.map(c => {
+      // Resolve the issuing authority by id shape rather than hardcoding NVD.
+      // A non-CVE matched id (MAL-/GHSA-/OSV-/RUSTSEC-/SNYK-) must NOT carry an
+      // nvd.nist.gov URL — that link 404s and presents the id as an NVD CVE.
+      const authority = advisoryAuthorityFor(c.cve_id);
+      const isCve = CVE_ID_RE.test(typeof c.cve_id === 'string' ? c.cve_id : '');
+      const rule = {
+        id: `${rulePrefix}${c.cve_id}`,
+        // For a non-CVE id, qualify the short description with its authority so
+        // a SARIF viewer doesn't read e.g. a MAL- id as an NVD CVE.
+        shortDescription: { text: isCve ? c.cve_id : `${c.cve_id} (${authority.system_name || 'non-CVE advisory'})` },
+        fullDescription: { text: `RWEP ${c.rwep} · KEV=${c.cisa_kev} · active_exploitation=${c.active_exploitation}` },
+        defaultConfiguration: { level: c.rwep >= 90 ? 'error' : c.rwep >= 70 ? 'warning' : 'note' },
+      };
+      // helpUri is optional in SARIF 2.1.0; omit it entirely when the authority
+      // has no canonical per-id advisory page rather than emit a broken link.
+      if (authority.helpUri) rule.helpUri = authority.helpUri;
+      return rule;
+    });
     const indicatorRules = indicatorHits.map(i => ({
       id: `${rulePrefix}${i.id}`, shortDescription: { text: i.id },
       fullDescription: { text: `Indicator from playbook ${playbook._meta.id}. Type: ${i.type}. Confidence: ${i.confidence}.` },
@@ -3877,6 +3994,51 @@ function stripOuterParens(expr) {
   return expr;
 }
+// Parse an operator-supplied clock_started_at_<event> timestamp into a valid
+// Date, host-timezone-independently. Two failure modes a raw `new Date(s)`
+// silently produces are guarded here:
+//
+//   1. Unparseable value ('not-a-date', '2026-13-99'). new Date() returns an
+//      Invalid Date — a truthy object whose getTime() is NaN. Calling
+//      .toISOString() on it later throws RangeError, which crashes the
+//      deadline math in close() and propagates uncaught out of run(),
+//      destroying the entire phase-7 notification/CSAF/deadline output. We
+//      return { date: null } so the caller routes to the pending-clock branch
+//      instead of crashing.
+//
+//   2. Zone-less ISO ('2026-06-12T10:00:00' or '2026-06-12 10:00:00'). new
+//      Date() interprets a designator-less datetime in the HOST timezone, so
+//      the published statutory deadline shifts by the host's UTC offset — a
+//      4h DORA window computed on a UTC-7 host lands 7h late. We normalize the
+//      space separator to 'T' and append 'Z' so a zone-less value is read as
+//      UTC deterministically on every host, and flag assumed_utc so the caller
+//      can surface that the zone was assumed.
+//
+// Returns { date, assumed_utc } where date is a valid Date or null, and
+// assumed_utc is true only when a zone-less value was coerced to UTC.
+function parseOperatorClock(raw) {
+  if (typeof raw !== 'string') {
+    const d = new Date(raw);
+    return { date: Number.isNaN(d.getTime()) ? null : d, assumed_utc: false };
+  }
+  const trimmed = raw.trim();
+  if (!trimmed) return { date: null, assumed_utc: false };
+  // A full ISO datetime whose only missing piece is the zone designator:
+  // YYYY-MM-DD, a 'T' or single space separator, HH:MM(:SS(.ms)?)?, and NO
+  // trailing 'Z' / [+-]HH:MM offset. Date-only values (YYYY-MM-DD) are already
+  // parsed as UTC midnight by spec, so they are left untouched.
+  let assumedUtc = false;
+  let candidate = trimmed;
+  const zonelessDateTime = /^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}(:\d{2}(\.\d+)?)?$/;
+  if (zonelessDateTime.test(trimmed)) {
+    candidate = trimmed.replace(' ', 'T') + 'Z';
+    assumedUtc = true;
+  }
+  const d = new Date(candidate);
+  if (Number.isNaN(d.getTime())) return { date: null, assumed_utc: false };
+  return { date: d, assumed_utc: assumedUtc };
+}
 /**
  * Compute the start instant for a jurisdictional clock event. The agent
  * submits clock_started_at_<event> ISO strings as it progresses through
@@ -3888,22 +4050,57 @@ function stripOuterParens(expr) {
  * whenever the engine classifies as detected would be incorrect: the
  * operator may not have seen the result yet. Semantics:
  *
- *   - If the agent explicitly submits clock_started_at_<event>: use it.
- *   - Otherwise, for 'detect_confirmed' with classification='detected':
+ *   - If the agent explicitly submits clock_started_at_<event>: use it,
+ *     after validating it parses and normalizing a zone-less value to UTC.
+ *     An unparseable value returns null (clock stays pending) and surfaces
+ *     a runtime error naming the offending key, instead of crashing close().
+ *   - Otherwise, for 'detect_confirmed' with classification='detected', and
+ *     for 'analyze_complete' / 'validate_complete' once their phase has run:
  *     stamp `now` ONLY if runOpts.operator_consent?.explicit === true
- *     (i.e. the operator passed --ack). Without --ack, return null and
- *     the caller (close()) surfaces clock_pending_ack: true on the
- *     notification_actions entry so the operator sees that the clock is
- *     waiting on acknowledgement.
- *   - All other events without an explicit timestamp: return null.
+ *     (i.e. the operator passed --ack). The analyze/validate phases provably
+ *     complete inside the same synchronous run before close() computes these
+ *     clocks, so under --ack there is no operator-awareness gap. Without
+ *     --ack, return null and the caller (close()) surfaces clock_pending_ack:
+ *     true on the notification_actions entry so the operator sees that the
+ *     clock is waiting on acknowledgement.
+ *   - 'manual' and any other event without an explicit timestamp: return null.
+ *
+ * `phaseFlags` carries which engine phases completed in this run
+ * ({ analyze_complete, validate_complete }) so the auto-stamp for those two
+ * events only fires when the named phase actually ran.
  */
-function computeClockStart(eventName, agentSignals, runOpts = {}, engineClassification = null) {
+function computeClockStart(eventName, agentSignals, runOpts = {}, engineClassification = null, phaseFlags = {}, frozenEpoch = null) {
   // The agent submits clock_started_at_<event> ISO strings as it progresses.
   const key = `clock_started_at_${eventName}`;
-  if (agentSignals && agentSignals[key]) return new Date(agentSignals[key]);
-  // For detect_confirmed: only auto-stamp when the operator has explicitly
-  // acknowledged the result via --ack. Otherwise leave the clock pending.
-  // Detection is "confirmed" when EITHER the agent submitted
+  if (agentSignals && agentSignals[key]) {
+    const { date, assumed_utc } = parseOperatorClock(agentSignals[key]);
+    if (!date) {
+      // A present-but-unparseable timestamp must not crash the close phase via
+      // a downstream new Date(NaN).toISOString(). Null routes to the pending
+      // branch; the runtime error tells the operator which field was bad.
+      if (runOpts && Array.isArray(runOpts._runErrors)) {
+        pushRunError(runOpts._runErrors, {
+          kind: 'invalid_clock_value',
+          clock_event: eventName,
+          key,
+          supplied: String(agentSignals[key]).slice(0, 80),
+          message: `${key} is not a valid ISO instant; the jurisdictional clock did not start. Submit an ISO-8601 timestamp (e.g. 2026-06-12T10:00:00Z).`,
+        }, { dedupeKey: e => e.key || '' });
+      }
+      return null;
+    }
+    if (assumed_utc && runOpts && Array.isArray(runOpts._runErrors)) {
+      pushRunError(runOpts._runErrors, {
+        kind: 'clock_timezone_assumed_utc',
+        clock_event: eventName,
+        key,
+        supplied: String(agentSignals[key]).slice(0, 80),
+        message: `${key} carries no timezone designator; interpreted as UTC. Append 'Z' or an offset to make the regulatory deadline unambiguous.`,
+      }, { dedupeKey: e => e.key || '' });
+    }
+    return date;
+  }
+  // Auto-stamp gate. Detection is "confirmed" when EITHER the agent submitted
   // detection_classification:'detected' OR the engine itself classified the
   // detect phase as 'detected'. Pre-fix only the agent-submitted signal was
   // honored, so an engine-confirmed detection (indicators fired from
@@ -3911,10 +4108,22 @@ function computeClockStart(eventName, agentSignals, runOpts = {}, engineClassifi
   // started the regulatory clock — notification deadlines silently stalled.
   const detected = agentSignals?.detection_classification === 'detected'
     || engineClassification === 'detected';
-  if (eventName === 'detect_confirmed' && detected
-      && runOpts && runOpts.operator_consent && runOpts.operator_consent.explicit === true) {
-    return new Date();
-  }
+  const ack = !!(runOpts && runOpts.operator_consent && runOpts.operator_consent.explicit === true);
+  if (!ack) return null;
+  // detect_confirmed auto-starts on a confirmed detection. analyze_complete /
+  // validate_complete auto-start once their engine phase has run in this same
+  // pass — the event literally names a phase that completed synchronously
+  // before close(), so --ack closes the awareness gap exactly as it does for
+  // detect_confirmed.
+  // Deterministic bundle mode roots every auto-started clock in the single
+  // frozen epoch so two runs over the same evidence emit identical
+  // clock_started_at and deadline values; otherwise the clock starts at
+  // wall-clock now. Operator-supplied clock timestamps (handled above) are
+  // never overridden — they are the explicit input.
+  const autoNow = () => (frozenEpoch ? new Date(frozenEpoch) : new Date());
+  if (eventName === 'detect_confirmed' && detected) return autoNow();
+  if (eventName === 'analyze_complete' && phaseFlags && phaseFlags.analyze_complete === true) return autoNow();
+  if (eventName === 'validate_complete' && phaseFlags && phaseFlags.validate_complete === true) return autoNow();
   return null;
 }
@@ -4012,6 +4221,9 @@ module.exports = {
   _releaseLock: releaseLock,
   _lockFilePath: lockFilePath,
   _vulnIdToUrn: vulnIdToUrn,
+  _buildCsafBranches: buildCsafBranches,
+  _advisoryAuthorityFor: advisoryAuthorityFor,
+  _computeClockStart: computeClockStart,
   _worstActiveExploitation: worstActiveExploitation,
   // Re-exported from scoring so parity between the catalog scorer and the
   // runtime evaluator is checkable (and enforced by a test) at the seam.

package/lib/prefetch.js CHANGED Viewed

@@ -107,6 +107,13 @@ const SOURCES = {
   },
 };
+// Sources the refresh orchestrator knows but that have no prefetch cache
+// layer: they resolve advisories by live id lookup, so there is nothing to
+// warm. Named here so an operator who scopes a cache-warm to one of them gets
+// "no prefetch cache layer (live id lookup only)" rather than a misleading
+// "unknown source" — the source is real, just not cacheable.
+const LIVE_ONLY_REFRESH_SOURCES = new Set(["ghsa", "osv", "advisories", "cve-regression-watcher"]);
 function parseArgs(argv) {
   const out = { maxAgeMs: 24 * 3600 * 1000, source: null, force: false, noNetwork: false, cacheDir: DEFAULT_CACHE, quiet: false, help: false, maxErrors: 0 };
   for (let i = 2; i < argv.length; i++) {
@@ -115,11 +122,19 @@ function parseArgs(argv) {
     else if (a === "--no-network" || a === "--dry-run" || a === "--air-gap") out.noNetwork = true;
     else if (a === "--quiet") out.quiet = true;
     else if (a === "--help" || a === "-h") out.help = true;
-    else if (a === "--source") out.source = argv[++i];
+    // The space-separated forms of --source / --max-age / --cache-dir consume
+    // the next token. A trailing flag (e.g. `prefetch --cache-dir` with no
+    // following value) would otherwise pass `undefined` into path.resolve /
+    // parseDuration — path.resolve(undefined) throws an uncaught TypeError,
+    // and parseDuration(undefined) silently returns 0 (which flips --max-age
+    // into "everything is stale, refetch all"). A bare --source likewise flips
+    // the scope to all sources. Treat a missing value (next token absent or
+    // itself a --flag) as a usage error so main() refuses with exit 2 instead.
+    else if (a === "--source") { const v = takesValue(argv, ++i); if (v === undefined) out._argError = "prefetch: --source requires a value"; else out.source = v; }
     else if (a.startsWith("--source=")) out.source = a.slice("--source=".length);
-    else if (a === "--max-age") out.maxAgeMs = parseDuration(argv[++i]);
+    else if (a === "--max-age") { const v = takesValue(argv, ++i); if (v === undefined) out._argError = "prefetch: --max-age requires a value"; else out.maxAgeMs = parseDuration(v); }
     else if (a.startsWith("--max-age=")) out.maxAgeMs = parseDuration(a.slice("--max-age=".length));
-    else if (a === "--cache-dir") out.cacheDir = path.resolve(argv[++i]);
+    else if (a === "--cache-dir") { const v = takesValue(argv, ++i); if (v === undefined) out._argError = "prefetch: --cache-dir requires a value"; else out.cacheDir = path.resolve(v); }
     else if (a.startsWith("--cache-dir=")) out.cacheDir = path.resolve(a.slice("--cache-dir=".length));
     // Per-entry fetch-error tolerance. An integer is an absolute budget; an
     // "<N>%" string is a fraction of the planned fetch count. A malformed
@@ -134,6 +149,19 @@ function parseArgs(argv) {
       (out._unknownFlags || (out._unknownFlags = [])).push(base);
     }
   }
+  // A supplied-but-empty --source (`--source ""`, `--source=`, or a comma-only
+  // value like `--source ,`) resolves to no source names. Left unguarded, the
+  // empty string is falsy and silently warms ALL sources, while a comma-only
+  // value silently warms none — both reporting success. Treat either as a
+  // usage error so main() refuses with exit 2, matching the unknown-source
+  // contract. Only fire when --source was actually supplied (out.source != null)
+  // so the omitted-flag default (warm all) is preserved.
+  if (!out._argError && out.source != null) {
+    const names = String(out.source).split(",").map((s) => s.trim()).filter(Boolean);
+    if (names.length === 0) {
+      out._argError = "prefetch: --source given but resolved to no source names (empty or comma-only value)";
+    }
+  }
   // The global air-gap switch implies a report-only / no-egress run: treat
   // EXCEPTD_AIR_GAP=1 the same as --no-network so prefetch never plans live
   // fetches under air-gap.
@@ -141,6 +169,18 @@ function parseArgs(argv) {
   return out;
 }
+// Read the value token a space-separated value-flag expects. Returns the
+// token, or `undefined` when the operator left the flag trailing (no token
+// follows) or the next token is itself a --flag (a swallowed missing value,
+// e.g. `--max-age --no-network`). Callers convert undefined into a usage
+// error rather than consuming a bad value.
+function takesValue(argv, i) {
+  const v = argv[i];
+  if (v === undefined) return undefined;
+  if (typeof v === "string" && v.startsWith("--")) return undefined;
+  return v;
+}
 function parseDuration(s) {
   if (!s) return 0;
   const m = String(s).match(/^(\d+)\s*([smhd])?$/);
@@ -235,6 +275,9 @@ Options:
   --no-network        report-only; list what would be fetched.
   --cache-dir <path>  override cache root (default .cache/upstream).
   --quiet             suppress per-entry log lines.
+  --max-errors <n|n%> tolerate up to n (or n% of planned) per-entry fetch
+                      errors before exit 1. Default: 0 (any error exits 1).
+                      A fully-dead source still exits 1 regardless of budget.
 Use NVD_API_KEY / GITHUB_TOKEN env vars to lift rate limits.
@@ -508,7 +551,15 @@ function isFresh(idx, source, id, maxAgeMs) {
   const e = idx.entries[entryKey(source, id)];
   if (!e) return false;
   if (!e.fetched_at) return false;
-  return Date.now() - new Date(e.fetched_at).getTime() < maxAgeMs;
+  const ageMs = Date.now() - new Date(e.fetched_at).getTime();
+  // A non-finite or negative age means the entry's provenance is untrustworthy:
+  // an unparseable fetched_at, or a future-dated one (clock skew or a poisoned
+  // index inflating apparent freshness past the maxAge gate). Either way, treat
+  // it as stale and force a re-fetch — re-fetching restores trustworthy
+  // provenance. This mirrors readCached()'s lower-bound guard so the planning
+  // side and read side cannot diverge on the same poisoned entry.
+  if (!Number.isFinite(ageMs) || ageMs < 0) return false;
+  return ageMs < maxAgeMs;
 }
 function authHeadersForSource(source) {
@@ -529,11 +580,32 @@ function authHeadersForSource(source) {
 async function prefetch(options = {}) {
   const opts = { maxAgeMs: 24 * 3600 * 1000, source: null, force: false, noNetwork: false, cacheDir: DEFAULT_CACHE, quiet: false, ...options };
   const ctx = loadCtx();
-  const chosen = opts.source
+  // Distinguish "operator omitted --source" (resolve to all sources, the
+  // documented default) from "operator passed --source but it resolved to
+  // nothing" (empty string or a comma-only value). The latter is a usage
+  // error, not a silent run-everything / run-nothing: an empty value would
+  // otherwise warm ALL sources and a comma-only value would warm NONE, both
+  // reporting success. Refuse so the typo surfaces. (main() maps the throw to
+  // exit 2, matching the existing unknown-source contract.)
+  const sourceSupplied = opts.source != null;
+  const chosen = sourceSupplied
     ? opts.source.split(",").map((s) => s.trim()).filter(Boolean)
     : Object.keys(SOURCES);
+  if (sourceSupplied && chosen.length === 0) {
+    throw new Error('prefetch: --source given but resolved to no source names (empty or comma-only value)');
+  }
   for (const n of chosen) {
-    if (!SOURCES[n]) throw new Error(`prefetch: unknown source "${n}"`);
+    if (!SOURCES[n]) {
+      // The refresh orchestrator exposes additional sources (ghsa, osv,
+      // advisories, cve-regression-watcher) that resolve advisories by live
+      // id lookup and have no prefetch cache layer. When the operator scopes
+      // a cache-warm to one of those, name the prefetchable subset rather than
+      // a bare "unknown source" — the source is real, it just isn't cacheable.
+      if (LIVE_ONLY_REFRESH_SOURCES.has(n)) {
+        throw new Error(`prefetch: source "${n}" has no prefetch cache layer (live id lookup only); prefetchable sources: ${Object.keys(SOURCES).join(",")}`);
+      }
+      throw new Error(`prefetch: unknown source "${n}"; prefetchable sources: ${Object.keys(SOURCES).join(",")}`);
+    }
   }
   // Build the queue with per-source budgets. NVD / GitHub upgrade if env-key