@ijfw/memory-server 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/bin/ijfw +27 -0
  2. package/bin/ijfw-dashboard +180 -0
  3. package/bin/ijfw-dispatch-plan +41 -0
  4. package/bin/ijfw-memorize +273 -0
  5. package/bin/ijfw-memory +51 -0
  6. package/fixtures/demo-target.js +28 -0
  7. package/package.json +53 -0
  8. package/src/api-client.js +190 -0
  9. package/src/audit-roster.js +315 -0
  10. package/src/caps.js +37 -0
  11. package/src/cold-scan-runner.mjs +37 -0
  12. package/src/compute/edges.js +155 -0
  13. package/src/compute/extract.js +560 -0
  14. package/src/compute/fts5.js +420 -0
  15. package/src/compute/graph-auto-index.js +191 -0
  16. package/src/compute/graph-lock.js +114 -0
  17. package/src/compute/index.js +18 -0
  18. package/src/compute/migration-runner.js +116 -0
  19. package/src/compute/migrations/001-initial.js +23 -0
  20. package/src/compute/migrations/002-porter-stemming-source.js +139 -0
  21. package/src/compute/migrations/003-tier-semantic.js +69 -0
  22. package/src/compute/migrations/004-kg-tables.js +83 -0
  23. package/src/compute/migrations/005-stale-candidate.js +72 -0
  24. package/src/compute/python-resolver.js +106 -0
  25. package/src/compute/runner-vm.js +185 -0
  26. package/src/compute/runner.js +416 -0
  27. package/src/compute/sandbox-detect.js +122 -0
  28. package/src/compute/sandbox-linux.js +164 -0
  29. package/src/compute/sandbox-macos.js +167 -0
  30. package/src/compute/sandbox-windows.js +63 -0
  31. package/src/compute/schema.sql +118 -0
  32. package/src/compute/staleness.js +239 -0
  33. package/src/compute/synonyms.js +367 -0
  34. package/src/compute/traverse.js +180 -0
  35. package/src/cost/aggregator.js +229 -0
  36. package/src/cost/pricing.js +134 -0
  37. package/src/cost/readers/claude.js +179 -0
  38. package/src/cost/readers/codex.js +131 -0
  39. package/src/cost/readers/gemini.js +111 -0
  40. package/src/cost/savings.js +243 -0
  41. package/src/cross-dispatcher.js +437 -0
  42. package/src/cross-orchestrator-cli.js +1885 -0
  43. package/src/cross-orchestrator.js +598 -0
  44. package/src/cross-project-search.js +114 -0
  45. package/src/dashboard-client.html +1180 -0
  46. package/src/dashboard-server.js +895 -0
  47. package/src/design-companion.js +81 -0
  48. package/src/dispatch/colon-syntax.js +732 -0
  49. package/src/dispatch-planner.js +235 -0
  50. package/src/dream/cooldown.js +105 -0
  51. package/src/dream/runner.mjs +373 -0
  52. package/src/dream/staleness-wiring.js +195 -0
  53. package/src/feedback-detector.js +57 -0
  54. package/src/hero-line.js +115 -0
  55. package/src/importers/claude-mem.js +152 -0
  56. package/src/importers/cli.js +311 -0
  57. package/src/importers/common.js +84 -0
  58. package/src/importers/discover.js +235 -0
  59. package/src/importers/rtk.js +107 -0
  60. package/src/intent-router.js +221 -0
  61. package/src/lib/atomic-io.js +201 -0
  62. package/src/lib/cache.js +33 -0
  63. package/src/lib/npm-view.js +104 -0
  64. package/src/lib/status-card.js +95 -0
  65. package/src/lib/token.js +85 -0
  66. package/src/memory/fts5.js +349 -0
  67. package/src/memory/migration-runner.js +116 -0
  68. package/src/memory/migrations/001-fts5-init.js +26 -0
  69. package/src/memory/migrations/002-tier-semantic.js +60 -0
  70. package/src/memory/migrations/003-stale-candidate.js +60 -0
  71. package/src/memory/reader.js +300 -0
  72. package/src/memory/recall-counter.js +76 -0
  73. package/src/memory/schema.sql +79 -0
  74. package/src/memory/search.js +431 -0
  75. package/src/memory/staleness.js +237 -0
  76. package/src/memory/tier-promotion.js +377 -0
  77. package/src/memory/tokenize.js +63 -0
  78. package/src/project-type-detector.js +866 -0
  79. package/src/prompt-check.js +171 -0
  80. package/src/ralph-allowlist.js +88 -0
  81. package/src/receipts.js +129 -0
  82. package/src/redactor.js +107 -0
  83. package/src/sandbox.js +275 -0
  84. package/src/sanitizer.js +69 -0
  85. package/src/scan-resume.js +167 -0
  86. package/src/schema.js +82 -0
  87. package/src/search-bm25.js +108 -0
  88. package/src/server.js +1414 -0
  89. package/src/swarm-config.js +80 -0
  90. package/src/trident/dispatch.js +211 -0
  91. package/src/trident/lens-health.js +253 -0
  92. package/src/update-apply.js +79 -0
  93. package/src/update-check.js +136 -0
  94. package/src/vectors.js +178 -0
  95. package/templates/design/bento-grid.md +84 -0
  96. package/templates/design/brutalist-luxe.md +82 -0
  97. package/templates/design/cinematic-dark.md +82 -0
  98. package/templates/design/data-dense-dashboard.md +88 -0
  99. package/templates/design/editorial-warm.md +81 -0
  100. package/templates/design/glassmorphic.md +84 -0
  101. package/templates/design/magazine-editorial.md +84 -0
  102. package/templates/design/maximalist-vibrant.md +85 -0
  103. package/templates/design/neo-swiss-tech.md +85 -0
  104. package/templates/design/swiss-minimal.md +80 -0
  105. package/templates/design/terminal-native.md +83 -0
  106. package/templates/design/warm-organic.md +84 -0
@@ -0,0 +1,171 @@
1
+ /**
2
+ * IJFW prompt-check -- deterministic vague-prompt detector.
3
+ *
4
+ * Pure functions, no I/O. Safe to call from MCP tool handler or import into
5
+ * a hook script. Returns { vague, signals, suggestion, bypass_reason? }.
6
+ *
7
+ * Design constraints (per AUDIT.md):
8
+ * - No LLM calls, no network. Pure regex.
9
+ * - Fire only when >=2 signals trip AND prompt is short AND has no target.
10
+ * - Single-signal trips are silent (low FP rate).
11
+ * - Override: leading `*` or substring "ijfw off" bypasses entirely.
12
+ * - Positive framing in any user-visible suggestion.
13
+ */
14
+
15
+ // 7-rule vagueness taxonomy from research (rule 3).
16
+ const RULES = [
17
+ {
18
+ id: 'bare_verb',
19
+ // Bare imperative + no object. Token count <6 reduces FP on real questions.
20
+ test: (text) => {
21
+ const t = text.trim().toLowerCase();
22
+ const tokens = t.split(/\s+/);
23
+ if (tokens.length >= 6) return false;
24
+ return /^(fix|refactor|improve|clean\s*up|optimi[sz]e|update|review|check|test|debug|analy[sz]e|handle|sort\s*out|tidy)\b/.test(t);
25
+ }
26
+ },
27
+ {
28
+ id: 'unresolved_anaphora',
29
+ // "this/that/it" sentence-start. Hook can't see prior turns reliably,
30
+ // so this is heuristic -- combined with no_target it's a strong signal.
31
+ test: (text) => /^(this|that|it|these|those|the\s+(bug|issue|file|code|function|error|problem))\b/i.test(text.trim())
32
+ },
33
+ {
34
+ id: 'abstract_goal',
35
+ // "make it better" / "production-ready" / etc. without acceptance criteria.
36
+ test: (text) => {
37
+ const hasAbstract = /\b(better|cleaner|nicer|more\s+robust|production[\s-]?ready|proper|correct|good|nice|right)\b/i.test(text);
38
+ if (!hasAbstract) return false;
39
+ // Mitigates: contains a metric or test/path reference.
40
+ const hasMetric = /\d+\s*(ms|%|x|kb|mb|sec|s\b|tests?\b|users?\b)/i.test(text);
41
+ const hasPath = /[\w./-]+\.\w{1,5}(\b|:)|src\/|tests?\//i.test(text);
42
+ return !hasMetric && !hasPath;
43
+ }
44
+ },
45
+ {
46
+ id: 'no_target',
47
+ // No file path, no identifier (CamelCase / snake_case >=2 chars), no line number.
48
+ test: (text) => {
49
+ if (/[\w./-]+\.\w{1,5}(\b|:)/.test(text)) return false; // file path
50
+ if (/:\d+/.test(text)) return false; // line number
51
+ if (/\b(src|lib|app|tests?|spec|docs?)\//i.test(text)) return false; // dir
52
+ // Identifier: snake_case, UpperCamelCase, or lowerCamelCase (>=2 segments)
53
+ if (/\b([a-z]+_[a-z][\w_]*|[A-Z][a-z]+[A-Z]\w*|[a-z]+[A-Z]\w*)\b/.test(text)) return false;
54
+ return true;
55
+ }
56
+ },
57
+ {
58
+ id: 'scope_plural',
59
+ test: (text) => /\b(the\s+tests|all\s+the\s+(things|stuff|files)|everything|stuff|things)\b/i.test(text)
60
+ },
61
+ {
62
+ id: 'polysemous',
63
+ // Bare polysemous coding terms standing alone (no object/qualifier).
64
+ test: (text) => {
65
+ const t = text.trim().toLowerCase();
66
+ return /^(source|build|run|deploy|ship|release|setup|set\s*up)\.?\s*$/.test(t);
67
+ }
68
+ },
69
+ {
70
+ id: 'missing_constraint',
71
+ // No constraint terms AND no numeric threshold AND text is non-trivial.
72
+ test: (text) => {
73
+ if (text.trim().split(/\s+/).length < 4) return false; // very short = skip rule
74
+ const hasConstraint = /\b(must|should|when|if|until|without|only|always|never|except)\b/i.test(text);
75
+ const hasNumber = /\b\d+\b/.test(text);
76
+ return !hasConstraint && !hasNumber;
77
+ }
78
+ }
79
+ ];
80
+
81
+ // Bypass conditions -- match severity1 plugin convention plus IJFW override.
82
+ function bypassReason(text) {
83
+ if (typeof text !== 'string') return 'non-string';
84
+ const t = text.trim();
85
+ if (t.length === 0) return 'empty';
86
+ if (t.startsWith('*')) return 'asterisk-prefix';
87
+ if (t.startsWith('/')) return 'slash-command';
88
+ if (t.startsWith('#')) return 'memorize-prefix';
89
+ if (/\bijfw\s+off\b/i.test(t)) return 'override-keyword';
90
+ // Pasted code/stack trace (very long or fenced) -- assume user knows the target.
91
+ if (t.length > 4000) return 'long-prompt';
92
+ if (/^```/m.test(t)) return 'fenced-code';
93
+ return null;
94
+ }
95
+
96
+ function checkPrompt(text) {
97
+ const bypass = bypassReason(text);
98
+ if (bypass) {
99
+ return { vague: false, signals: [], suggestion: '', bypass_reason: bypass };
100
+ }
101
+
102
+ const tokens = text.trim().split(/\s+/);
103
+ const signals = [];
104
+ for (const rule of RULES) {
105
+ try {
106
+ if (rule.test(text)) signals.push(rule.id);
107
+ } catch { /* never break the hook */ }
108
+ }
109
+
110
+ // Fire only when >=2 signals tripped AND prompt is short AND no target found.
111
+ // Threshold tuned for low false-positive rate per research (UX section).
112
+ const short = tokens.length < 30;
113
+ const noTarget = signals.includes('no_target');
114
+ const vague = signals.length >= 2 && short && noTarget;
115
+
116
+ // Positive-framed suggestion. Never says "your prompt is vague."
117
+ let suggestion = '';
118
+ if (vague) {
119
+ if (signals.includes('bare_verb') && noTarget) {
120
+ suggestion = 'Sharpening your aim -- which file, function, or symbol? e.g. src/auth.py:145, getUserById, the failing test name.';
121
+ } else if (signals.includes('unresolved_anaphora')) {
122
+ suggestion = 'Anchoring the reference -- which file or recent code do you mean?';
123
+ } else {
124
+ suggestion = 'Pinning the target -- naming the file, symbol, or expected behavior will sharpen the edit.';
125
+ }
126
+ }
127
+
128
+ // W2.2/A2 -- structured question pack the agent can surface verbatim
129
+ // when vague. Each question maps to a signal that fired. Keeps to
130
+ // ≤3 questions (Krug: don't make me answer 10).
131
+ const rewrite = vague ? buildQuestionPack(signals) : null;
132
+
133
+ return { vague, signals, suggestion, rewrite };
134
+ }
135
+
136
+ // Map signals → clarifying questions, deduped and capped at 3.
137
+ function buildQuestionPack(signals) {
138
+ const qs = [];
139
+ const seen = new Set();
140
+ const add = (q) => { if (!seen.has(q)) { seen.add(q); qs.push(q); } };
141
+ for (const sig of signals) {
142
+ if (qs.length >= 3) break;
143
+ switch (sig) {
144
+ case 'bare_verb':
145
+ case 'no_target':
146
+ add('Which file, function, or line number is the target?');
147
+ break;
148
+ case 'unresolved_anaphora':
149
+ add('What does "this/that" refer to -- a file, a symptom, a prior message?');
150
+ break;
151
+ case 'abstract_goal':
152
+ add('What specifically would "done" look like -- a metric, a test passing, or observable behavior?');
153
+ break;
154
+ case 'scope_plural':
155
+ add('Which of "all the X" -- do you want every instance, or a specific subset?');
156
+ break;
157
+ case 'missing_constraint':
158
+ add('Any constraints I should respect -- don\'t touch X, must run in <Y ms, preserve behavior Z?');
159
+ break;
160
+ case 'polysemous':
161
+ add('Which meaning -- e.g. "deploy" could mean build, release, push, or run locally?');
162
+ break;
163
+ }
164
+ }
165
+ if (qs.length === 0) {
166
+ qs.push('What file, function, or acceptance criterion pins the target?');
167
+ }
168
+ return qs.slice(0, 3);
169
+ }
170
+
171
+ export { checkPrompt, RULES, bypassReason, buildQuestionPack };
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Ralph verify-command allowlist -- Wave 0 F.3
3
+ * Zero deps, ESM. Used by Phase 4 Ralph loop before executing any shell criterion.
4
+ */
5
+
6
+ export const ALLOWLIST = [
7
+ 'grep -q',
8
+ 'npm test --',
9
+ 'pytest',
10
+ 'tsc --noEmit',
11
+ 'node --test',
12
+ 'bash scripts/',
13
+ 'git diff --exit-code',
14
+ 'test -f',
15
+ 'test -d',
16
+ ];
17
+
18
+ export const FORBID_LIST = [
19
+ 'rm',
20
+ 'rmdir',
21
+ 'mv',
22
+ 'cp',
23
+ 'curl',
24
+ 'wget',
25
+ 'fetch',
26
+ 'git push',
27
+ 'git reset --hard',
28
+ 'git clean -f',
29
+ 'git rebase -i',
30
+ 'sudo',
31
+ 'chmod',
32
+ 'chown',
33
+ 'bash -c',
34
+ 'eval',
35
+ 'npm publish',
36
+ 'npm install',
37
+ 'pip install',
38
+ ];
39
+
40
+ /**
41
+ * Check whether a shell command is safe to run as a Ralph verify step.
42
+ * Checks forbid list first (any token match), then allowlist (prefix match).
43
+ *
44
+ * @param {string} cmd
45
+ * @returns {{ safe: true } | { safe: false, reason: string }}
46
+ */
47
+ export function isSafeVerifyCommand(cmd) {
48
+ if (typeof cmd !== 'string' || cmd.trim() === '') {
49
+ return { safe: false, reason: 'command is empty or not a string' };
50
+ }
51
+
52
+ const trimmed = cmd.trim();
53
+
54
+ // Forbid list: check each entry as a token sequence against the command.
55
+ // Split on whitespace; check that the forbid token appears as a leading
56
+ // subsequence of whitespace-delimited tokens (so "rm" matches "rm -rf /"
57
+ // but not "grep -q 'rm'").
58
+ for (const forbidden of FORBID_LIST) {
59
+ const forbidTokens = forbidden.split(/\s+/);
60
+ const cmdTokens = trimmed.split(/\s+/);
61
+ let matched = true;
62
+ for (let i = 0; i < forbidTokens.length; i++) {
63
+ if (cmdTokens[i] !== forbidTokens[i]) {
64
+ matched = false;
65
+ break;
66
+ }
67
+ }
68
+ if (matched) {
69
+ return { safe: false, reason: `${forbidden} is in forbid list` };
70
+ }
71
+ }
72
+
73
+ // Allowlist: command must start with one of the known safe primitives.
74
+ // Primitives ending in '/' (like 'bash scripts/') are path-prefix matches --
75
+ // no trailing space needed since the script name continues the token directly.
76
+ for (const allowed of ALLOWLIST) {
77
+ if (trimmed === allowed) return { safe: true };
78
+ if (allowed.endsWith('/')) {
79
+ if (trimmed.startsWith(allowed)) return { safe: true };
80
+ } else {
81
+ if (trimmed.startsWith(allowed + ' ') || trimmed.startsWith(allowed + '\t')) {
82
+ return { safe: true };
83
+ }
84
+ }
85
+ }
86
+
87
+ return { safe: false, reason: 'no allowlist match' };
88
+ }
@@ -0,0 +1,129 @@
1
+ // receipts.js -- atomic append/read for cross-run JSONL receipts.
2
+ // ESM, zero deps, synchronous fs.
3
+ //
4
+ // renderReceipt(record, stepNum?) -- human-readable text for one receipt.
5
+ // Header: Phase N / Wave NA -- <operation> -- <timestamp>
6
+ // Body: Step N.M -- <finding>
7
+ // cache_stats fields (cache_creation_input_tokens, cache_read_input_tokens)
8
+ // are rendered when present; absence is a no-op.
9
+
10
+ import fs from 'node:fs';
11
+ import path from 'node:path';
12
+
13
+ export function RECEIPTS_FILE(projectDir) {
14
+ return path.join(projectDir, '.ijfw', 'receipts', 'cross-runs.jsonl');
15
+ }
16
+
17
+ const MAX_RECEIPTS = 100;
18
+
19
+ // Atomic append: O_APPEND is atomic for writes ≤ PIPE_BUF (>=4KB on POSIX).
20
+ // One JSON line is well under that limit, so appendFileSync is safe for
21
+ // concurrent writers without a lock or rename dance.
22
+ // After each write, prune to the last MAX_RECEIPTS entries.
23
+ export function writeReceipt(projectDir, record) {
24
+ const dest = RECEIPTS_FILE(projectDir);
25
+ const dir = path.dirname(dest);
26
+ fs.mkdirSync(dir, { recursive: true });
27
+ fs.appendFileSync(dest, JSON.stringify(record) + '\n');
28
+ _pruneReceipts(dest);
29
+ }
30
+
31
+ // Keep only the last MAX_RECEIPTS lines. No-op when at or under the limit.
32
+ function _pruneReceipts(dest) {
33
+ const raw = fs.readFileSync(dest, 'utf8');
34
+ const lines = raw.split('\n').filter(l => l.trim());
35
+ if (lines.length <= MAX_RECEIPTS) return;
36
+ fs.writeFileSync(dest, lines.slice(-MAX_RECEIPTS).join('\n') + '\n');
37
+ }
38
+
39
+ // Purge all receipts. Returns the count of entries removed.
40
+ export function purgeReceipts(projectDir) {
41
+ const dest = RECEIPTS_FILE(projectDir);
42
+ if (!fs.existsSync(dest)) return 0;
43
+ const raw = fs.readFileSync(dest, 'utf8');
44
+ const count = raw.split('\n').filter(l => l.trim()).length;
45
+ fs.writeFileSync(dest, '');
46
+ return count;
47
+ }
48
+
49
+ // Anthropic cache-read savings rate (mirrors hero-line.js constant).
50
+ const CACHE_SAVINGS_PER_TOKEN = 2.70 / 1_000_000;
51
+
52
+ // renderReceipt(record, phaseWave?, stepNum?)
53
+ // phaseWave -- caller-supplied label for the narration header. Default is
54
+ // a generic "Trident" heading so receipts do not carry stale
55
+ // phase numbers after IJFW itself moves on.
56
+ // stepNum -- N.M index for body lines (default 1)
57
+ // Returns a multi-line string. JSONL schema is never modified.
58
+ export function renderReceipt(record, phaseWave = 'Trident', stepNum = 1) {
59
+ const op = record.mode || 'cross';
60
+ const ts = record.timestamp ? record.timestamp.slice(0, 19).replace('T', ' ') : '';
61
+ const lines = [];
62
+
63
+ // Header: Phase N / Wave NA -- <operation> -- <timestamp>
64
+ lines.push(`${phaseWave} -- ${op} -- ${ts}`);
65
+
66
+ // Auditors
67
+ if (Array.isArray(record.auditors) && record.auditors.length > 0) {
68
+ const ids = record.auditors.map(a => a.id).filter(Boolean).join(', ');
69
+ lines.push(`Step ${stepNum}.1 -- auditors: ${ids}`);
70
+ }
71
+
72
+ // Findings
73
+ const findings = record.findings;
74
+ if (findings) {
75
+ if (Array.isArray(findings.items)) {
76
+ lines.push(`Step ${stepNum}.2 -- findings: ${findings.items.length} items`);
77
+ } else {
78
+ const c = typeof findings.consensus === 'number' ? findings.consensus : 0;
79
+ const ct = typeof findings.contested === 'number' ? findings.contested : 0;
80
+ const u = typeof findings.unique === 'number' ? findings.unique : 0;
81
+ lines.push(`Step ${stepNum}.2 -- findings: ${c} consensus, ${ct} contested, ${u} unique`);
82
+ }
83
+ }
84
+
85
+ // Duration
86
+ if (typeof record.duration_ms === 'number') {
87
+ const dur = record.duration_ms < 1000
88
+ ? `${Math.round(record.duration_ms)}ms`
89
+ : `${Math.round(record.duration_ms / 1000)}s`;
90
+ lines.push(`Step ${stepNum}.3 -- duration: ${dur}`);
91
+ }
92
+
93
+ // Cache stats (Step 10D.3: rendered when present, no-op when absent)
94
+ const cs = record.cache_stats;
95
+ if (cs) {
96
+ if (cs.cache_eligible === false) {
97
+ const reason = cs.cache_eligible_reason ?? 'prompt < 1024 tokens';
98
+ lines.push(`Step ${stepNum}.4 -- cache-eligible: false (${reason})`);
99
+ } else {
100
+ if (typeof cs.cache_creation_input_tokens === 'number') {
101
+ lines.push(`Step ${stepNum}.4 -- cache created: ${cs.cache_creation_input_tokens} tokens`);
102
+ }
103
+ if (typeof cs.cache_read_input_tokens === 'number') {
104
+ const saved = cs.cache_read_input_tokens * CACHE_SAVINGS_PER_TOKEN;
105
+ const savedStr = saved >= 0.01 ? ` (~$${saved.toFixed(2)} saved)` : '';
106
+ lines.push(`Step ${stepNum}.5 -- cache read: ${cs.cache_read_input_tokens} tokens${savedStr}`);
107
+ }
108
+ }
109
+ }
110
+
111
+ return lines.join('\n');
112
+ }
113
+
114
+ // Read and parse all lines; skip corrupt lines; return array.
115
+ export function readReceipts(projectDir) {
116
+ const file = RECEIPTS_FILE(projectDir);
117
+ if (!fs.existsSync(file)) return [];
118
+ const raw = fs.readFileSync(file, 'utf8');
119
+ const results = [];
120
+ for (const line of raw.split('\n')) {
121
+ if (!line.trim()) continue;
122
+ try {
123
+ results.push(JSON.parse(line));
124
+ } catch {
125
+ // skip malformed line
126
+ }
127
+ }
128
+ return results;
129
+ }
@@ -0,0 +1,107 @@
1
+ // --- Secret redactor (audit S5) ---
2
+ // Strips common credential patterns before any memory write. Conservative
3
+ // by design: pattern list is additive, never tries to classify "suspicious"
4
+ // strings. Better to miss a novel format than to corrupt legitimate prose.
5
+ //
6
+ // Wired into auto-memorize in Wave 3. Exported here so Wave 0 can land the
7
+ // library + tests ahead of the integration.
8
+
9
+ const PATTERNS = [
10
+ // Anthropic -- must come BEFORE generic OpenAI so `sk-ant-...` gets labeled correctly.
11
+ { re: /sk-ant-[A-Za-z0-9_-]{20,}/g, label: 'anthropic' },
12
+ // OpenAI -- `sk-proj-...` or `sk-...` with strong minimum length to avoid
13
+ // eating prose references like "sk-learn" (scikit-learn).
14
+ { re: /sk-(?:proj-)?[A-Za-z0-9_-]{32,}/g, label: 'openai' },
15
+ // GitHub classic PAT + fine-grained PAT + OAuth/App/User/Refresh tokens.
16
+ { re: /ghp_[A-Za-z0-9]{20,}/g, label: 'github' },
17
+ { re: /github_pat_[A-Za-z0-9_]{20,}/g, label: 'github' },
18
+ { re: /gh[ousr]_[A-Za-z0-9]{30,}/g, label: 'github' }, // gho_/ghu_/ghs_/ghr_
19
+ // AWS permanent access key ID (AKIA) + temporary (ASIA) key ID.
20
+ { re: /(?:AKIA|ASIA)[0-9A-Z]{16}/g, label: 'aws' },
21
+ // Authorization: Bearer <token>.
22
+ { re: /Bearer\s+[A-Za-z0-9._~+/=-]{10,}/g, label: 'bearer' },
23
+ // Slack bot / user / legacy tokens.
24
+ { re: /xox[baprs]-[A-Za-z0-9-]{10,}/g, label: 'slack' },
25
+ // Stripe live + test secret keys.
26
+ { re: /sk_live_[A-Za-z0-9]{24,}/g, label: 'stripe' },
27
+ { re: /sk_test_[A-Za-z0-9]{24,}/g, label: 'stripe' },
28
+ // npm access tokens.
29
+ { re: /npm_[A-Za-z0-9]{36}/g, label: 'npm' },
30
+ // HuggingFace user tokens.
31
+ { re: /hf_[A-Za-z0-9]{34,}/g, label: 'huggingface' },
32
+ // Azure Storage connection-string AccountKey (base64, 88 chars with padding).
33
+ { re: /AccountKey=[A-Za-z0-9+/]{86,88}={0,2}/g, label: 'azure' },
34
+ // GCP service-account private key PEM block.
35
+ { re: /-----BEGIN (?:RSA )?PRIVATE KEY-----[\s\S]+?-----END (?:RSA )?PRIVATE KEY-----/g, label: 'gcp' },
36
+ // GCP / Google API keys -- `AIza...` (39 chars total).
37
+ { re: /AIza[0-9A-Za-z_-]{35}/g, label: 'gcp' },
38
+ // Sentry DSN -- https://<key>@o<org>.ingest.sentry.io/<project>.
39
+ { re: /https?:\/\/[0-9a-f]{32,}(?::[0-9a-f]{32,})?@[\w.-]*sentry\.io\/[0-9]+/gi, label: 'sentry' },
40
+ // Cloudflare API tokens (40 chars base64url). Conservative: only flag when
41
+ // contextualized (CF_API_TOKEN=..., CLOUDFLARE_TOKEN=..., cf_auth_key=...)
42
+ // so we don't eat bare git commit SHAs or content hashes.
43
+ { re: /(?:cf|cloudflare)[_-]?(?:api[_-]?)?(?:token|auth|key)s?[= :]+[A-Za-z0-9_-]{40,}/gi, label: 'cloudflare' },
44
+ // Webhook URLs (Slack, Discord, MS Teams) -- include the secret path segment.
45
+ { re: /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[A-Za-z0-9]+/g, label: 'webhook' },
46
+ { re: /https:\/\/discord(?:app)?\.com\/api\/webhooks\/\d+\/[A-Za-z0-9_-]+/g, label: 'webhook' },
47
+ { re: /https:\/\/[\w-]+\.webhook\.office\.com\/webhookb2\/[\w@/-]+/g, label: 'webhook' },
48
+ ];
49
+
50
+ // INLINE rules match `key=value` style assignments. Value regex excludes
51
+ // `[` and `]` so we don't re-redact tokens already labeled by PATTERNS
52
+ // (e.g. `GOOGLE_API_KEY=[REDACTED:gcp]` must stay labeled, not get flattened
53
+ // to `[REDACTED]`).
54
+ const INLINE = [
55
+ /(password\s*=\s*)[^\s[\]]+/gi,
56
+ /(api[_-]?token\s*=\s*)[^\s[\]]+/gi,
57
+ /(api[_-]?key\s*=\s*)[^\s[\]]+/gi,
58
+ /(secret\s*=\s*)[^\s[\]]+/gi,
59
+ /(client[_-]?secret\s*=\s*)[^\s[\]]+/gi,
60
+ // JSON-style "clientSecret": "value" and similar.
61
+ /("(?:client_?secret|api_?key|password|access_?token)"\s*:\s*")[^"]+(")/gi,
62
+ ];
63
+
64
+ export function redactSecrets(s) {
65
+ if (typeof s !== 'string' || !s) return '';
66
+ let out = s;
67
+ for (const { re, label } of PATTERNS) out = out.replace(re, `[REDACTED:${label}]`);
68
+ for (const re of INLINE) {
69
+ if (re.source.startsWith('("')) {
70
+ // JSON pattern: keep the opening and closing quotes/keys, redact value.
71
+ out = out.replace(re, '$1[REDACTED]$2');
72
+ } else {
73
+ out = out.replace(re, '$1[REDACTED]');
74
+ }
75
+ }
76
+ return out;
77
+ }
78
+
79
+ // classify(value) -> { clean: boolean, redacted_kind: string | null }
80
+ //
81
+ // D-PILLAR-SPEC section 3 surface used by D2 entity extraction. Passes the
82
+ // value through the same PATTERNS list redactSecrets uses; if any pattern
83
+ // matches the WHOLE value (anchor-equivalent: pattern consumes the entire
84
+ // trimmed string), the value is classified as a secret and `redacted_kind`
85
+ // carries the matched label. INLINE rules are not applied here -- they
86
+ // only fire on key=value assignments which would never reach the entity
87
+ // extractor as a bare entity name.
88
+ //
89
+ // Important: PATTERNS are anchored implicitly via length minimums (e.g.
90
+ // `sk-(?:proj-)?[A-Za-z0-9_-]{32,}`), but to avoid classifying a long file
91
+ // path that happens to contain a token-shaped substring, classify() rejects
92
+ // only when the pattern matches the FULL trimmed value. File paths and
93
+ // function/identifier names are always shorter than the secret patterns'
94
+ // minimum lengths, so the conservative cut-line is "match must equal the
95
+ // candidate" -- a substring match doesn't trigger classification.
96
+ export function classify(value) {
97
+ if (typeof value !== 'string') return { clean: true, redacted_kind: null };
98
+ const v = value.trim();
99
+ if (!v) return { clean: true, redacted_kind: null };
100
+ for (const { re, label } of PATTERNS) {
101
+ // Build a fresh non-global RegExp per check; the source PATTERNS use /g
102
+ // for redactSecrets but classify needs a single full-value match.
103
+ const r = new RegExp(`^(?:${re.source})$`, re.flags.replace('g', ''));
104
+ if (r.test(v)) return { clean: false, redacted_kind: label };
105
+ }
106
+ return { clean: true, redacted_kind: null };
107
+ }