shroud-privacy 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +190 -0
  2. package/NOTICE +7 -0
  3. package/README.md +369 -0
  4. package/dist/audit.d.ts +46 -0
  5. package/dist/audit.js +127 -0
  6. package/dist/canary.d.ts +31 -0
  7. package/dist/canary.js +73 -0
  8. package/dist/config.d.ts +27 -0
  9. package/dist/config.js +123 -0
  10. package/dist/detectors/base.d.ts +8 -0
  11. package/dist/detectors/base.js +2 -0
  12. package/dist/detectors/code.d.ts +25 -0
  13. package/dist/detectors/code.js +144 -0
  14. package/dist/detectors/context.d.ts +31 -0
  15. package/dist/detectors/context.js +357 -0
  16. package/dist/detectors/patterns.d.ts +15 -0
  17. package/dist/detectors/patterns.js +58 -0
  18. package/dist/detectors/regex.d.ts +28 -0
  19. package/dist/detectors/regex.js +955 -0
  20. package/dist/generators/base.d.ts +6 -0
  21. package/dist/generators/base.js +2 -0
  22. package/dist/generators/codes.d.ts +20 -0
  23. package/dist/generators/codes.js +231 -0
  24. package/dist/generators/names.d.ts +29 -0
  25. package/dist/generators/names.js +194 -0
  26. package/dist/generators/network.d.ts +86 -0
  27. package/dist/generators/network.js +477 -0
  28. package/dist/hooks.d.ts +27 -0
  29. package/dist/hooks.js +457 -0
  30. package/dist/index.d.ts +12 -0
  31. package/dist/index.js +58 -0
  32. package/dist/mapping.d.ts +33 -0
  33. package/dist/mapping.js +72 -0
  34. package/dist/obfuscator.d.ts +78 -0
  35. package/dist/obfuscator.js +603 -0
  36. package/dist/redaction.d.ts +26 -0
  37. package/dist/redaction.js +76 -0
  38. package/dist/store.d.ts +40 -0
  39. package/dist/store.js +79 -0
  40. package/dist/types.d.ts +101 -0
  41. package/dist/types.js +35 -0
  42. package/ncg_adapter.py +530 -0
  43. package/openclaw.plugin.json +72 -0
  44. package/package.json +56 -0
  45. package/shroud_bridge.mjs +225 -0
package/dist/audit.js ADDED
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Tamper-evident audit log for PII detection events (in-memory only).
3
+ *
4
+ * Logs what was detected (category, count, timestamp) WITHOUT storing real values.
5
+ * Uses HMAC chaining for tamper evidence -- each log entry includes a hash of
6
+ * the previous entry, so any modification/deletion is detectable.
7
+ */
8
+ import { createHash, createHmac, randomBytes } from "node:crypto";
9
+ export class AuditLogger {
10
+ _secret;
11
+ _sessionId;
12
+ _maxEntries;
13
+ _lastHash;
14
+ _entries;
15
+ _stats;
16
+ constructor(secretKey, maxEntries = 200) {
17
+ this._secret = Buffer.from(secretKey, "utf-8");
18
+ this._sessionId = createHash("sha256")
19
+ .update(`${secretKey}:${Date.now()}`)
20
+ .digest("hex")
21
+ .slice(0, 12);
22
+ this._maxEntries = maxEntries;
23
+ this._lastHash = "0".repeat(64); // Genesis hash
24
+ this._entries = [];
25
+ this._stats = {
26
+ totalObfuscationEvents: 0,
27
+ totalDeobfuscationEvents: 0,
28
+ totalEntities: 0,
29
+ totalReplacements: 0,
30
+ byCategory: {},
31
+ };
32
+ }
33
+ /** Generate a unique request ID. */
34
+ static generateRequestId() {
35
+ return randomBytes(8).toString("hex");
36
+ }
37
+ /** Log an obfuscation event (no real values stored). */
38
+ logObfuscation(entities, textLength, requestId, processingTimeMs) {
39
+ if (entities.length === 0)
40
+ return;
41
+ // Aggregate by category
42
+ const categories = {};
43
+ for (const entity of entities) {
44
+ const cat = entity.category;
45
+ categories[cat] = (categories[cat] ?? 0) + 1;
46
+ }
47
+ this._writeEntry("obfuscation", categories, entities.length, textLength, requestId, processingTimeMs);
48
+ // Update running stats
49
+ this._stats.totalObfuscationEvents += 1;
50
+ this._stats.totalEntities += entities.length;
51
+ for (const [cat, count] of Object.entries(categories)) {
52
+ this._stats.byCategory[cat] = (this._stats.byCategory[cat] ?? 0) + count;
53
+ }
54
+ }
55
+ /** Log a deobfuscation event. */
56
+ logDeobfuscation(replacementsMade, requestId, processingTimeMs) {
57
+ if (replacementsMade <= 0)
58
+ return;
59
+ this._writeEntry("deobfuscation", {}, replacementsMade, 0, requestId, processingTimeMs);
60
+ // Update running stats
61
+ this._stats.totalDeobfuscationEvents += 1;
62
+ this._stats.totalReplacements += replacementsMade;
63
+ }
64
+ _writeEntry(eventType, categories, totalEntities, textLength, requestId, processingTimeMs) {
65
+ const ts = Date.now();
66
+ const tsIso = new Date(ts).toISOString();
67
+ // Compute chain hash
68
+ const sortedCategories = JSON.stringify(categories, Object.keys(categories).sort());
69
+ const payload = `${this._lastHash}:${ts}:${eventType}:${sortedCategories}`;
70
+ const chainHash = createHmac("sha256", this._secret)
71
+ .update(payload)
72
+ .digest("hex");
73
+ const entry = {
74
+ timestamp: ts,
75
+ timestampIso: tsIso,
76
+ eventType,
77
+ sessionId: this._sessionId,
78
+ requestId: requestId ?? AuditLogger.generateRequestId(),
79
+ categories,
80
+ totalEntities,
81
+ textLength,
82
+ processingTimeMs: Math.round((processingTimeMs ?? 0) * 100) / 100,
83
+ chainHash,
84
+ };
85
+ this._lastHash = chainHash;
86
+ // Ring buffer: drop oldest if at capacity
87
+ if (this._entries.length >= this._maxEntries) {
88
+ this._entries.shift();
89
+ }
90
+ this._entries.push(entry);
91
+ }
92
+ /** Return aggregate statistics (safe to expose). */
93
+ getStats() {
94
+ return {
95
+ sessionId: this._sessionId,
96
+ totalEvents: this._stats.totalObfuscationEvents +
97
+ this._stats.totalDeobfuscationEvents,
98
+ totalObfuscationEvents: this._stats.totalObfuscationEvents,
99
+ totalDeobfuscationEvents: this._stats.totalDeobfuscationEvents,
100
+ totalEntitiesScrubbed: this._stats.totalEntities,
101
+ totalReplacementsRestored: this._stats.totalReplacements,
102
+ byCategory: { ...this._stats.byCategory },
103
+ };
104
+ }
105
+ /**
106
+ * Verify the integrity of the audit log chain.
107
+ * Returns { valid, entriesChecked }.
108
+ */
109
+ verifyChain() {
110
+ let prevHash = "0".repeat(64);
111
+ let count = 0;
112
+ for (const entry of this._entries) {
113
+ count += 1;
114
+ // Recompute expected hash
115
+ const sortedCategories = JSON.stringify(entry.categories, Object.keys(entry.categories).sort());
116
+ const payload = `${prevHash}:${entry.timestamp}:${entry.eventType}:${sortedCategories}`;
117
+ const expected = createHmac("sha256", this._secret)
118
+ .update(payload)
119
+ .digest("hex");
120
+ if (entry.chainHash !== expected) {
121
+ return { valid: false, entriesChecked: count };
122
+ }
123
+ prevHash = entry.chainHash;
124
+ }
125
+ return { valid: true, entriesChecked: count };
126
+ }
127
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Canary token injection for detecting LLM data leakage.
3
+ *
4
+ * Injects unique, trackable tokens into obfuscated prompts. These tokens
5
+ * serve no semantic purpose but can be monitored for leakage -- if a canary
6
+ * appears in another user's output or in a training data audit, it proves
7
+ * your data was exposed.
8
+ */
9
+ export interface CanaryToken {
10
+ token: string;
11
+ sessionId: string;
12
+ timestamp: number;
13
+ messageIndex: number;
14
+ }
15
+ export declare class CanaryInjector {
16
+ private readonly _prefix;
17
+ private readonly _secret;
18
+ private _sessionId;
19
+ private _messageCounter;
20
+ private _tokens;
21
+ constructor(prefix: string, secretKey: string);
22
+ get sessionId(): string;
23
+ /** Inject a canary token into text. Returns modified text. */
24
+ inject(text: string): string;
25
+ /** Return all canary tokens injected in this session. */
26
+ getTokens(): CanaryToken[];
27
+ /** Check if any known canary tokens appear in given text. */
28
+ checkLeak(text: string): CanaryToken[];
29
+ /** Reset for a new session. */
30
+ reset(): void;
31
+ }
package/dist/canary.js ADDED
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Canary token injection for detecting LLM data leakage.
3
+ *
4
+ * Injects unique, trackable tokens into obfuscated prompts. These tokens
5
+ * serve no semantic purpose but can be monitored for leakage -- if a canary
6
+ * appears in another user's output or in a training data audit, it proves
7
+ * your data was exposed.
8
+ */
9
+ import { createHash } from "node:crypto";
10
+ export class CanaryInjector {
11
+ _prefix;
12
+ _secret;
13
+ _sessionId;
14
+ _messageCounter;
15
+ _tokens;
16
+ constructor(prefix, secretKey) {
17
+ this._prefix = prefix;
18
+ this._secret = secretKey;
19
+ this._sessionId = createHash("sha256")
20
+ .update(`${secretKey}:${Date.now()}`)
21
+ .digest("hex")
22
+ .slice(0, 12);
23
+ this._messageCounter = 0;
24
+ this._tokens = [];
25
+ }
26
+ get sessionId() {
27
+ return this._sessionId;
28
+ }
29
+ /** Inject a canary token into text. Returns modified text. */
30
+ inject(text) {
31
+ this._messageCounter += 1;
32
+ const ts = Date.now();
33
+ // Generate unique token
34
+ const raw = `${this._sessionId}:${this._messageCounter}:${ts}`;
35
+ const tokenHash = createHash("sha256")
36
+ .update(this._secret + raw)
37
+ .digest("hex")
38
+ .slice(0, 16);
39
+ const token = `${this._prefix}-${tokenHash}`;
40
+ const canary = {
41
+ token,
42
+ sessionId: this._sessionId,
43
+ timestamp: ts,
44
+ messageIndex: this._messageCounter,
45
+ };
46
+ this._tokens.push(canary);
47
+ // Inject as a non-semantic comment at the end of the text
48
+ return `${text}\n<!-- ${token} -->`;
49
+ }
50
+ /** Return all canary tokens injected in this session. */
51
+ getTokens() {
52
+ return [...this._tokens];
53
+ }
54
+ /** Check if any known canary tokens appear in given text. */
55
+ checkLeak(text) {
56
+ const leaked = [];
57
+ for (const canary of this._tokens) {
58
+ if (text.includes(canary.token)) {
59
+ leaked.push(canary);
60
+ }
61
+ }
62
+ return leaked;
63
+ }
64
+ /** Reset for a new session. */
65
+ reset() {
66
+ this._sessionId = createHash("sha256")
67
+ .update(`${this._secret}:${Date.now()}`)
68
+ .digest("hex")
69
+ .slice(0, 12);
70
+ this._messageCounter = 0;
71
+ this._tokens = [];
72
+ }
73
+ }
@@ -0,0 +1,27 @@
1
+ /**
2
+ * Configuration resolver for the Shroud plugin.
3
+ *
4
+ * Merges plugin config with environment variables and provides defaults.
5
+ */
6
+ import { ShroudConfig } from "./types.js";
7
+ /**
8
+ * Resolve a fully populated ShroudConfig from optional plugin config
9
+ * and environment variables.
10
+ *
11
+ * Priority: env vars > pluginConfig > defaults.
12
+ */
13
+ export declare function resolveConfig(pluginConfig?: unknown): ShroudConfig;
14
+ /** Validation issue severity. */
15
+ export type ConfigSeverity = "error" | "warning" | "info";
16
+ /** A single config validation issue. */
17
+ export interface ConfigIssue {
18
+ severity: ConfigSeverity;
19
+ field: string;
20
+ message: string;
21
+ }
22
+ /**
23
+ * Validate a resolved ShroudConfig and return actionable issues.
24
+ *
25
+ * Does NOT throw — callers decide how to handle warnings vs errors.
26
+ */
27
+ export declare function validateConfig(config: ShroudConfig): ConfigIssue[];
package/dist/config.js ADDED
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Configuration resolver for the Shroud plugin.
3
+ *
4
+ * Merges plugin config with environment variables and provides defaults.
5
+ */
6
+ import { randomBytes } from "node:crypto";
7
+ /**
8
+ * Resolve a fully populated ShroudConfig from optional plugin config
9
+ * and environment variables.
10
+ *
11
+ * Priority: env vars > pluginConfig > defaults.
12
+ */
13
+ export function resolveConfig(pluginConfig) {
14
+ const raw = pluginConfig != null && typeof pluginConfig === "object"
15
+ ? pluginConfig
16
+ : {};
17
+ // Env var overrides
18
+ const envSecretKey = process.env.SHROUD_SECRET_KEY;
19
+ const envSalt = process.env.SHROUD_PERSISTENT_SALT;
20
+ let secretKey = envSecretKey ??
21
+ (typeof raw.secretKey === "string" ? raw.secretKey : "");
22
+ // Auto-generate if missing
23
+ if (!secretKey) {
24
+ secretKey = randomBytes(32).toString("hex");
25
+ }
26
+ // Warn if too short (but don't throw -- let the plugin still load)
27
+ if (secretKey.length < 16) {
28
+ console.warn("[shroud] WARNING: secretKey is shorter than 16 characters. " +
29
+ "This weakens mapping security. Set SHROUD_SECRET_KEY or pass a longer key.");
30
+ }
31
+ const persistentSalt = envSalt ??
32
+ (typeof raw.persistentSalt === "string" ? raw.persistentSalt : "");
33
+ // Validate redactionLevel
34
+ const redactionRaw = raw.redactionLevel;
35
+ const validLevels = ["full", "masked", "stats"];
36
+ const redactionLevel = typeof redactionRaw === "string" && validLevels.includes(redactionRaw)
37
+ ? redactionRaw
38
+ : "full";
39
+ const config = {
40
+ secretKey,
41
+ persistentSalt,
42
+ minConfidence: typeof raw.minConfidence === "number" ? raw.minConfidence : 0.0,
43
+ allowlist: Array.isArray(raw.allowlist)
44
+ ? raw.allowlist
45
+ : [],
46
+ denylist: Array.isArray(raw.denylist)
47
+ ? raw.denylist
48
+ : [],
49
+ canaryEnabled: typeof raw.canaryEnabled === "boolean" ? raw.canaryEnabled : false,
50
+ canaryPrefix: typeof raw.canaryPrefix === "string"
51
+ ? raw.canaryPrefix
52
+ : "SHROUD-CANARY",
53
+ auditEnabled: typeof raw.auditEnabled === "boolean" ? raw.auditEnabled : false,
54
+ logMappings: typeof raw.logMappings === "boolean" ? raw.logMappings : false,
55
+ customPatterns: Array.isArray(raw.customPatterns)
56
+ ? raw.customPatterns
57
+ : [],
58
+ // Verbose audit logging
59
+ verboseLogging: typeof raw.verboseLogging === "boolean" ? raw.verboseLogging : false,
60
+ auditLogFormat: raw.auditLogFormat === "json" ? "json" : "human",
61
+ auditIncludeProofHashes: typeof raw.auditIncludeProofHashes === "boolean"
62
+ ? raw.auditIncludeProofHashes
63
+ : false,
64
+ auditHashSalt: typeof raw.auditHashSalt === "string" ? raw.auditHashSalt : "",
65
+ auditHashTruncate: typeof raw.auditHashTruncate === "number" ? raw.auditHashTruncate : 12,
66
+ auditMaxFakesSample: typeof raw.auditMaxFakesSample === "number"
67
+ ? raw.auditMaxFakesSample
68
+ : 0,
69
+ detectorOverrides: raw.detectorOverrides != null && typeof raw.detectorOverrides === "object"
70
+ ? raw.detectorOverrides
71
+ : {},
72
+ // Tool chain depth
73
+ maxToolDepth: typeof raw.maxToolDepth === "number" ? raw.maxToolDepth : 10,
74
+ // Redaction level
75
+ redactionLevel,
76
+ // Dry-run mode
77
+ dryRun: typeof raw.dryRun === "boolean" ? raw.dryRun : false,
78
+ // LRU store eviction (0 = unlimited)
79
+ maxStoreMappings: typeof raw.maxStoreMappings === "number" ? raw.maxStoreMappings : 0,
80
+ };
81
+ return config;
82
+ }
83
+ /**
84
+ * Validate a resolved ShroudConfig and return actionable issues.
85
+ *
86
+ * Does NOT throw — callers decide how to handle warnings vs errors.
87
+ */
88
+ export function validateConfig(config) {
89
+ const issues = [];
90
+ // Secret key checks
91
+ if (config.secretKey.length < 16) {
92
+ issues.push({ severity: "error", field: "secretKey", message: "secretKey is shorter than 16 chars — mappings are weak. Set SHROUD_SECRET_KEY." });
93
+ }
94
+ else if (config.secretKey.length < 32) {
95
+ issues.push({ severity: "warning", field: "secretKey", message: "secretKey is shorter than 32 chars — consider a longer key for production." });
96
+ }
97
+ // minConfidence range
98
+ if (config.minConfidence < 0 || config.minConfidence > 1) {
99
+ issues.push({ severity: "error", field: "minConfidence", message: `minConfidence=${config.minConfidence} is outside [0,1]. Set to a value between 0 and 1.` });
100
+ }
101
+ // maxStoreMappings negative
102
+ if (config.maxStoreMappings < 0) {
103
+ issues.push({ severity: "error", field: "maxStoreMappings", message: "maxStoreMappings must be >= 0 (0 = unlimited)." });
104
+ }
105
+ // dryRun informational
106
+ if (config.dryRun) {
107
+ issues.push({ severity: "info", field: "dryRun", message: "Dry-run mode is active — entities are detected but text is NOT obfuscated." });
108
+ }
109
+ // Custom patterns with invalid regex
110
+ for (const cp of config.customPatterns) {
111
+ try {
112
+ new RegExp(cp.pattern);
113
+ }
114
+ catch {
115
+ issues.push({ severity: "error", field: "customPatterns", message: `Custom pattern "${cp.name}" has invalid regex: ${cp.pattern}` });
116
+ }
117
+ }
118
+ // Detector overrides referencing unknown rules (info-level since we can't check at config time)
119
+ if (Object.keys(config.detectorOverrides).length > 0) {
120
+ issues.push({ severity: "info", field: "detectorOverrides", message: `${Object.keys(config.detectorOverrides).length} detector override(s) configured.` });
121
+ }
122
+ return issues;
123
+ }
@@ -0,0 +1,8 @@
1
+ /** Base detector interface. */
2
+ import { DetectedEntity } from "../types.js";
3
+ export interface BaseDetector {
4
+ readonly name: string;
5
+ detect(text: string): DetectedEntity[];
6
+ /** Optional reset for stateful detectors. */
7
+ reset?(): void;
8
+ }
@@ -0,0 +1,2 @@
1
+ /** Base detector interface. */
2
+ export {};
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Code-aware detector that finds sensitive data inside string literals and comments.
3
+ *
4
+ * Scans source code for string literals and comments across common languages,
5
+ * then runs the standard regex detector on the extracted text to find PII
6
+ * that would otherwise be missed by scanning raw code.
7
+ */
8
+ import { DetectedEntity } from "../types.js";
9
+ import { BaseDetector } from "./base.js";
10
+ import { RegexDetector } from "./regex.js";
11
+ /**
12
+ * Detects sensitive data embedded in source code strings and comments.
13
+ *
14
+ * Extracts string literals and comments from code, then runs PII detection
15
+ * on the extracted text. Entity positions are mapped back to the original
16
+ * source positions.
17
+ */
18
+ export declare class CodeDetector implements BaseDetector {
19
+ readonly name = "code";
20
+ private _inner;
21
+ constructor(inner?: RegexDetector);
22
+ detect(text: string): DetectedEntity[];
23
+ /** Extract string literals and comments from code. */
24
+ private _extractSpans;
25
+ }
@@ -0,0 +1,144 @@
1
+ /**
2
+ * Code-aware detector that finds sensitive data inside string literals and comments.
3
+ *
4
+ * Scans source code for string literals and comments across common languages,
5
+ * then runs the standard regex detector on the extracted text to find PII
6
+ * that would otherwise be missed by scanning raw code.
7
+ */
8
+ import { RegexDetector } from "./regex.js";
9
+ /** Language-agnostic patterns for extracting strings and comments. */
10
+ const SPAN_PATTERNS = [
11
+ // Triple-quoted strings (Python, etc.)
12
+ /"""[\s\S]*?"""/g,
13
+ /'''[\s\S]*?'''/g,
14
+ // Double-quoted strings
15
+ /"(?:[^"\\]|\\.)*"/g,
16
+ // Single-quoted strings
17
+ /'(?:[^'\\]|\\.)*'/g,
18
+ // Backtick strings (JS/Go/etc.)
19
+ /`(?:[^`\\]|\\.)*`/g,
20
+ // Line comments (C-style, Python, Ruby, Shell)
21
+ /\/\/[^\n]*/g,
22
+ /#[^\n]*/g,
23
+ // Block comments
24
+ /\/\*[\s\S]*?\*\//g,
25
+ ];
26
+ /** Patterns that are purely code constructs with no data (skip these). */
27
+ const CODE_NOISE = new RegExp("^[\\s\"'`#/\\*]*" +
28
+ "(?:import |from |require\\(|use |include |" +
29
+ "package |module |class |def |func |fn |" +
30
+ "return |const |let |var |type |interface )" +
31
+ "[^@]*$");
32
+ const CODE_INDICATORS = [
33
+ "def ", "class ", "function ", "import ", "from ", "require(",
34
+ "const ", "let ", "var ", "func ", "fn ", "pub ", "private ",
35
+ "return ", "if (", "for (", "while (", "package ", "module ",
36
+ "#!/", "# -*- coding", "use strict", "pragma ",
37
+ "SELECT ", "INSERT ", "CREATE TABLE",
38
+ ];
39
+ /** Heuristic: does this text look like source code? */
40
+ function looksLikeCode(text) {
41
+ const lines = text.split("\n");
42
+ if (lines.length < 3) {
43
+ return false;
44
+ }
45
+ let score = 0;
46
+ for (const indicator of CODE_INDICATORS) {
47
+ if (text.includes(indicator)) {
48
+ score++;
49
+ }
50
+ }
51
+ // Also check for common syntax patterns
52
+ if (/[{};]\s*$/m.test(text)) {
53
+ score++;
54
+ }
55
+ if (/^\s*(def|class|func|fn)\s+\w+/m.test(text)) {
56
+ score++;
57
+ }
58
+ return score >= 2;
59
+ }
60
+ /**
61
+ * Detects sensitive data embedded in source code strings and comments.
62
+ *
63
+ * Extracts string literals and comments from code, then runs PII detection
64
+ * on the extracted text. Entity positions are mapped back to the original
65
+ * source positions.
66
+ */
67
+ export class CodeDetector {
68
+ name = "code";
69
+ _inner;
70
+ constructor(inner) {
71
+ this._inner = inner ?? new RegexDetector();
72
+ }
73
+ detect(text) {
74
+ // Only run if the text looks like code
75
+ if (!looksLikeCode(text)) {
76
+ return [];
77
+ }
78
+ const spans = this._extractSpans(text);
79
+ const entities = [];
80
+ const seenSpans = new Set();
81
+ for (const span of spans) {
82
+ const innerText = span.text;
83
+ const innerOffset = span.start;
84
+ // Skip spans that look like pure code constructs
85
+ if (CODE_NOISE.test(innerText)) {
86
+ continue;
87
+ }
88
+ // Run PII detection on the inner text
89
+ const innerEntities = this._inner.detect(innerText);
90
+ for (const entity of innerEntities) {
91
+ // Map positions back to original text
92
+ const absStart = innerOffset + entity.start;
93
+ const absEnd = innerOffset + entity.end;
94
+ const spanKey = `${absStart}:${absEnd}`;
95
+ if (seenSpans.has(spanKey)) {
96
+ continue;
97
+ }
98
+ seenSpans.add(spanKey);
99
+ entities.push({
100
+ value: entity.value,
101
+ start: absStart,
102
+ end: absEnd,
103
+ category: entity.category,
104
+ confidence: entity.confidence * 0.9, // Slightly lower since it's inside code
105
+ detector: `code:${entity.detector}`,
106
+ });
107
+ }
108
+ }
109
+ entities.sort((a, b) => a.start - b.start);
110
+ return entities;
111
+ }
112
+ /** Extract string literals and comments from code. */
113
+ _extractSpans(text) {
114
+ const spans = [];
115
+ const covered = new Set();
116
+ for (const pattern of SPAN_PATTERNS) {
117
+ pattern.lastIndex = 0;
118
+ for (const match of text.matchAll(pattern)) {
119
+ const start = match.index;
120
+ const end = start + match[0].length;
121
+ // Skip if overlapping with already-found span
122
+ let overlaps = false;
123
+ for (let i = start; i < end; i++) {
124
+ if (covered.has(i)) {
125
+ overlaps = true;
126
+ break;
127
+ }
128
+ }
129
+ if (overlaps) {
130
+ continue;
131
+ }
132
+ for (let i = start; i < end; i++) {
133
+ covered.add(i);
134
+ }
135
+ const kind = match[0].startsWith("/") || match[0].startsWith("#")
136
+ ? "comment"
137
+ : "string";
138
+ spans.push({ text: match[0], start, end, kind });
139
+ }
140
+ }
141
+ spans.sort((a, b) => a.start - b.start);
142
+ return spans;
143
+ }
144
+ }
@@ -0,0 +1,31 @@
1
+ /**
2
+ * Context-aware detection enhancements.
3
+ *
4
+ * Wraps another detector and applies post-detection intelligence:
5
+ * 1. Context-aware confidence boosting (config keyword density)
6
+ * 3. Proximity-based PII clustering (nearby entities boost each other)
7
+ * 4. Config-block hostname extraction (hostname X -> detect bare X)
8
+ * 9. Learned entity propagation (cross-invocation memory)
9
+ * 10. Confidence decay by frequency (common words lose confidence)
10
+ */
11
+ import { DetectedEntity } from "../types.js";
12
+ import { BaseDetector } from "./base.js";
13
+ export declare class ContextDetector implements BaseDetector {
14
+ readonly name = "context";
15
+ private _inner;
16
+ /** Feature 9: Learned entities from previous invocations. */
17
+ private _learnedEntities;
18
+ constructor(inner: BaseDetector);
19
+ detect(text: string): DetectedEntity[];
20
+ /** Reset learned entities (called on Obfuscator.reset()). */
21
+ reset(): void;
22
+ /** Get count of learned entities. */
23
+ get learnedCount(): number;
24
+ private _boostFromContext;
25
+ private _splitBlocks;
26
+ private _boostByProximity;
27
+ private _extractAndPropagateHostnames;
28
+ private _injectLearnedEntities;
29
+ private _learnEntities;
30
+ private _decayCommonWords;
31
+ }