@lh8ppl/claude-memory-kit 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/bin/cmk-compress-lazy.mjs +59 -0
  2. package/bin/cmk-daily-distill.mjs +67 -0
  3. package/bin/cmk-weekly-curate.mjs +56 -0
  4. package/bin/cmk.mjs +12 -0
  5. package/package.json +50 -0
  6. package/src/audit-log.mjs +103 -0
  7. package/src/auto-extract.mjs +742 -0
  8. package/src/capture-prompt.mjs +61 -0
  9. package/src/capture-turn.mjs +273 -0
  10. package/src/claude-md.mjs +212 -0
  11. package/src/compress-session.mjs +349 -0
  12. package/src/compressor.mjs +376 -0
  13. package/src/conflict-queue.mjs +796 -0
  14. package/src/cooldown.mjs +61 -0
  15. package/src/daily-distill.mjs +252 -0
  16. package/src/doctor.mjs +528 -0
  17. package/src/forget.mjs +335 -0
  18. package/src/frontmatter.mjs +73 -0
  19. package/src/import-anthropic-memory.mjs +266 -0
  20. package/src/index-db.mjs +154 -0
  21. package/src/index-rebuild.mjs +597 -0
  22. package/src/index.mjs +90 -0
  23. package/src/inject-context.mjs +484 -0
  24. package/src/install.mjs +327 -0
  25. package/src/lazy-compress.mjs +326 -0
  26. package/src/lock-discipline.mjs +166 -0
  27. package/src/mcp-server.mjs +498 -0
  28. package/src/memory-write.mjs +565 -0
  29. package/src/merge-facts.mjs +213 -0
  30. package/src/observe-edit.mjs +87 -0
  31. package/src/platform-commands.mjs +138 -0
  32. package/src/poison-guard.mjs +245 -0
  33. package/src/privacy.mjs +21 -0
  34. package/src/provenance.mjs +217 -0
  35. package/src/register-crons.mjs +354 -0
  36. package/src/reindex.mjs +134 -0
  37. package/src/repair.mjs +316 -0
  38. package/src/result-shapes.mjs +155 -0
  39. package/src/review-queue.mjs +345 -0
  40. package/src/roll.mjs +115 -0
  41. package/src/scratchpad.mjs +335 -0
  42. package/src/search.mjs +311 -0
  43. package/src/subcommands.mjs +1252 -0
  44. package/src/tier-paths.mjs +74 -0
  45. package/src/transcripts.mjs +234 -0
  46. package/src/trust.mjs +226 -0
  47. package/src/weekly-curate.mjs +454 -0
  48. package/src/write-fact.mjs +205 -0
  49. package/template/.claude/hooks/pre-tool-memory.js +78 -0
  50. package/template/.claude/hooks/transcript-capture.js +69 -0
  51. package/template/.claude/settings.json +27 -0
  52. package/template/.claude/skills/memory-write/SKILL.md +117 -0
  53. package/template/.gitignore.fragment +12 -0
  54. package/template/CLAUDE.md.template +49 -0
  55. package/template/docs/journey/journey-log.md.template +292 -0
  56. package/template/local/machine-paths.md.template +37 -0
  57. package/template/local/overrides.md.template +36 -0
  58. package/template/project/.index/.gitkeep +0 -0
  59. package/template/project/MEMORY.md.template +47 -0
  60. package/template/project/SOUL.md.template +35 -0
  61. package/template/project/memory/INDEX.md.template +47 -0
  62. package/template/project/memory/archive/superseded/.gitkeep +0 -0
  63. package/template/project/memory/archive/tombstones/.gitkeep +0 -0
  64. package/template/project/queues/.gitkeep +0 -0
  65. package/template/project/sessions/.gitkeep +0 -0
  66. package/template/project/transcripts/.gitkeep +0 -0
  67. package/template/support/cron-jobs/daily-memory-distill.md +15 -0
  68. package/template/support/cron-jobs/nightly-memsearch-index.md +17 -0
  69. package/template/support/cron-jobs/weekly-memory-curator.md +15 -0
  70. package/template/support/milvus-deploy/README.md +57 -0
  71. package/template/support/milvus-deploy/docker-compose.yml +66 -0
  72. package/template/support/scripts/auto-extract-memory.sh +102 -0
  73. package/template/support/scripts/memsearch-index-with-flush.sh +59 -0
  74. package/template/support/scripts/refresh-distill-timestamp.py +35 -0
  75. package/template/support/scripts/register-crons.py +242 -0
  76. package/template/support/scripts/run-daily-distill.sh +67 -0
  77. package/template/support/scripts/run-weekly-curate.sh +58 -0
  78. package/template/user/HABITS.md.template +18 -0
  79. package/template/user/LESSONS.md.template +18 -0
  80. package/template/user/USER.md.template +18 -0
  81. package/template/user/fragments/INDEX.md.template +23 -0
@@ -0,0 +1,213 @@
1
+ // Fact consolidation (Task 10, refactored in cleanup-layer-2-cross-module-drift).
2
+ // Single public boundary: mergeFacts(opts) → result. See design §3.4.
3
+ //
4
+ // Uses shared modules: tier-paths, frontmatter, audit-log, result-shapes.
5
+ // Composes writeFact() to create the new merged fact, then moves A + B into
6
+ // archive/superseded/ with superseded_by injected. See CLAUDE.md "Shared
7
+ // modules" rule.
8
+
9
+ import {
10
+ existsSync,
11
+ mkdirSync,
12
+ readdirSync,
13
+ readFileSync,
14
+ statSync,
15
+ unlinkSync,
16
+ writeFileSync,
17
+ } from 'node:fs';
18
+ import { join } from 'node:path';
19
+ import {
20
+ VALID_TIERS,
21
+ ID_PATTERN,
22
+ resolveTierRoot,
23
+ resolveFactDir,
24
+ } from './tier-paths.mjs';
25
+ import { parse, format } from './frontmatter.mjs';
26
+ import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
27
+ import { ERROR_CATEGORIES, errorResult, notFoundResult } from './result-shapes.mjs';
28
+ import { writeFact } from './write-fact.mjs';
29
+
30
+ function listLiveFactFiles(factDir) {
31
+ if (!existsSync(factDir)) return [];
32
+ const out = [];
33
+ for (const entry of readdirSync(factDir, { withFileTypes: true })) {
34
+ if (!entry.isFile()) continue;
35
+ if (!entry.name.endsWith('.md')) continue;
36
+ if (entry.name === 'INDEX.md') continue;
37
+ out.push(entry.name);
38
+ }
39
+ return out;
40
+ }
41
+
42
+ function findLiveFactById(factDir, id) {
43
+ if (!existsSync(factDir)) return null;
44
+ for (const filename of listLiveFactFiles(factDir)) {
45
+ const p = join(factDir, filename);
46
+ if (!statSync(p).isFile()) continue;
47
+ const { frontmatter, body } = parse(readFileSync(p, 'utf8'));
48
+ if (frontmatter?.id === id && !frontmatter.deleted_at) {
49
+ return { id, path: p, frontmatter, body };
50
+ }
51
+ }
52
+ return null;
53
+ }
54
+
55
+ function moveToSuperseded(match, supersededBy) {
56
+ const supersededDir = join(match.factDir, 'archive', 'superseded');
57
+ mkdirSync(supersededDir, { recursive: true });
58
+ const newPath = join(supersededDir, `${match.id}.md`);
59
+ const { frontmatter, body } = parse(readFileSync(match.path, 'utf8'));
60
+ const updated = {
61
+ superseded_by: supersededBy,
62
+ ...(frontmatter ?? {}),
63
+ };
64
+ writeFileSync(newPath, format({ frontmatter: updated, body }), 'utf8');
65
+ unlinkSync(match.path);
66
+ return newPath;
67
+ }
68
+
69
+ export function mergeFacts(opts = {}) {
70
+ const {
71
+ idA,
72
+ idB,
73
+ mergedBody,
74
+ mergedTitle,
75
+ mergedSlug,
76
+ mergedType,
77
+ writeSource,
78
+ trust,
79
+ sourceFile,
80
+ sourceLine,
81
+ sourceSha1,
82
+ mergedTags,
83
+ projectRoot,
84
+ userDir,
85
+ now,
86
+ } = opts;
87
+
88
+ const errors = [];
89
+ if (!idA || !ID_PATTERN.test(idA)) errors.push('idA: must be a valid citation ID');
90
+ if (!idB || !ID_PATTERN.test(idB)) errors.push('idB: must be a valid citation ID');
91
+ if (idA && idB && idA === idB) {
92
+ return errorResult({
93
+ category: ERROR_CATEGORIES.SCHEMA,
94
+ errors: [`idA and idB are the same (${idA}); cannot merge a fact with itself`],
95
+ });
96
+ }
97
+ if (!mergedBody || typeof mergedBody !== 'string' || !mergedBody.length) {
98
+ errors.push('mergedBody: required, non-empty string');
99
+ }
100
+ if (!mergedTitle || typeof mergedTitle !== 'string') {
101
+ errors.push('mergedTitle: required, non-empty string');
102
+ }
103
+ // Layer-2 review S4: removed the redundant `mergedSlug` truthy check. The
104
+ // downstream writeFact owns all slug validation (pattern + presence).
105
+ // Inconsistent layering disappears; bad slugs surface from writeFact with
106
+ // a clear schema error.
107
+ //
108
+ // Layer-2 review S3: writeSource is now REQUIRED (no compressor default).
109
+ // Compressor was the most-suspicious default — Task 23 auto-extract and
110
+ // Task 24 memory-write are NOT compressor-driven. Forcing the caller to
111
+ // pick avoids accidentally tagging human-curated merges as 'compressor'.
112
+ if (!writeSource || typeof writeSource !== 'string') {
113
+ errors.push('writeSource: required (no default). Pick one of user-explicit/auto-extract/compressor/manual-edit/imported.');
114
+ }
115
+ if (errors.length > 0) {
116
+ return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
117
+ }
118
+
119
+ const tierA = idA[0];
120
+ const tierB = idB[0];
121
+ if (tierA !== tierB) {
122
+ return errorResult({
123
+ category: ERROR_CATEGORIES.SCHEMA,
124
+ errors: [
125
+ `cross-tier merge not supported: idA tier (${tierA}) ≠ idB tier (${tierB}). Promote one side to the same tier first.`,
126
+ ],
127
+ });
128
+ }
129
+ const tier = tierA;
130
+ if (!VALID_TIERS.has(tier)) {
131
+ return errorResult({
132
+ category: ERROR_CATEGORIES.SCHEMA,
133
+ errors: [`invalid tier prefix on ids: ${tier}`],
134
+ });
135
+ }
136
+
137
+ const tierRoot = resolveTierRoot({ tier, projectRoot, userDir });
138
+ const factDir = resolveFactDir(tier, tierRoot);
139
+
140
+ const matchA = findLiveFactById(factDir, idA);
141
+ const matchB = findLiveFactById(factDir, idB);
142
+ if (!matchA || !matchB) {
143
+ const missing = [];
144
+ if (!matchA) missing.push(idA);
145
+ if (!matchB) missing.push(idB);
146
+ return notFoundResult({
147
+ errors: [`no live fact found for ${missing.join(', ')}`],
148
+ });
149
+ }
150
+ matchA.factDir = factDir;
151
+ matchB.factDir = factDir;
152
+
153
+ const typeC =
154
+ mergedType ?? matchA.frontmatter.type ?? matchB.frontmatter.type;
155
+
156
+ const writeResult = writeFact({
157
+ tier,
158
+ type: typeC,
159
+ slug: mergedSlug,
160
+ title: mergedTitle,
161
+ body: mergedBody,
162
+ writeSource,
163
+ trust: trust ?? 'high',
164
+ sourceFile: sourceFile ?? matchA.frontmatter.source_file ?? 'merge',
165
+ sourceLine: sourceLine ?? 1,
166
+ sourceSha1: sourceSha1 ?? matchA.frontmatter.source_sha1 ?? 'merged',
167
+ mergedFrom: [idA, idB],
168
+ tags: mergedTags,
169
+ projectRoot,
170
+ userDir,
171
+ });
172
+ if (writeResult.action === 'error') {
173
+ return errorResult({
174
+ category: writeResult.errorCategory,
175
+ errors: writeResult.errors,
176
+ });
177
+ }
178
+ // PR-1 blocker B1 fix preserved: writeFact dedup'd to an existing unrelated
179
+ // fact → return collision error rather than silently retargeting A and B.
180
+ if (writeResult.action !== 'created') {
181
+ return errorResult({
182
+ category: ERROR_CATEGORIES.COLLISION,
183
+ errors: [
184
+ `merged body collides with existing fact ${writeResult.id} (writeFact returned ${writeResult.action}${writeResult.skipReason ? ': ' + writeResult.skipReason : ''}); choose a different mergedBody`,
185
+ ],
186
+ });
187
+ }
188
+
189
+ const supersededA = moveToSuperseded(matchA, writeResult.id);
190
+ const supersededB = moveToSuperseded(matchB, writeResult.id);
191
+
192
+ const ts = now ?? nowIso();
193
+ appendAuditEntry(tierRoot, {
194
+ ts,
195
+ action: 'merged',
196
+ tier,
197
+ id: writeResult.id,
198
+ reasonCode: REASON_CODES.CURATED_MERGE,
199
+ paths: {
200
+ after: writeResult.path,
201
+ archive: [supersededA, supersededB],
202
+ },
203
+ extra: { mergedFrom: [idA, idB] },
204
+ });
205
+
206
+ return {
207
+ action: 'merged',
208
+ id: writeResult.id,
209
+ tier,
210
+ path: writeResult.path,
211
+ supersededPaths: [supersededA, supersededB],
212
+ };
213
+ }
@@ -0,0 +1,87 @@
1
+ // PostToolUse hook real handler (Task 20, T-017). Fires on Write /
2
+ // Edit / MultiEdit only (matcher in hooks.json) and appends a one-line
3
+ // summary of large tool outputs to sessions/now.md — feeds the rolling-
4
+ // window compression pipeline (design §8.1).
5
+ //
6
+ // Public boundary: observeEdit({payload, projectRoot, now}) → result.
7
+ // The bin wrapper handles stdin parsing + the detached-spawn dance
8
+ // that makes the hook return within 50ms regardless of how big the
9
+ // tool output is.
10
+ //
11
+ // Filter rules (defensive — hooks.json matcher should be the first
12
+ // line of defense, but a misconfigured plugin install could route
13
+ // other tool_names here, so we double-check):
14
+ // - tool_name must be one of Write / Edit / MultiEdit
15
+ // - tool_response.content must be > LINE_THRESHOLD lines
16
+ //
17
+ // Per design §1.4 / §8.1 the summary line shape feeds claude-remember-
18
+ // style compaction downstream; we use a stable
19
+ // [<iso-ts>] <tool> file=<file_path> lines=<count>
20
+ // format so the SessionEnd Haiku compressor (Task 22+23) can recognize
21
+ // individual events.
22
+
23
+ import { existsSync, mkdirSync, appendFileSync } from 'node:fs';
24
+ import { join } from 'node:path';
25
+
26
+ const ELIGIBLE_TOOLS = new Set(['Write', 'Edit', 'MultiEdit']);
27
+ const LINE_THRESHOLD = 50; // strictly greater-than per design / 20.2
28
+
29
+ function countLines(text) {
30
+ if (typeof text !== 'string' || text === '') return 0;
31
+ let n = 1;
32
+ for (let i = 0; i < text.length; i++) {
33
+ if (text.charCodeAt(i) === 10) n++;
34
+ }
35
+ // Trailing newline → the empty-after-last-\n shouldn't count as a
36
+ // line of content. Subtract one if the text ended on \n.
37
+ if (text.charCodeAt(text.length - 1) === 10) n--;
38
+ return n;
39
+ }
40
+
41
+ function extractContent(payload) {
42
+ // Be permissive — different tool invocations shape the response
43
+ // slightly differently (Anthropic hook payload evolution). Probe a
44
+ // few documented spellings.
45
+ if (!payload || typeof payload !== 'object') return '';
46
+ const r = payload.tool_response ?? payload.toolResponse ?? null;
47
+ if (r && typeof r === 'object') {
48
+ if (typeof r.content === 'string') return r.content;
49
+ if (typeof r.output === 'string') return r.output;
50
+ if (typeof r.text === 'string') return r.text;
51
+ }
52
+ if (typeof payload.output === 'string') return payload.output;
53
+ return '';
54
+ }
55
+
56
+ function extractFilePath(payload) {
57
+ const i = payload?.tool_input ?? payload?.toolInput ?? null;
58
+ if (i && typeof i === 'object') {
59
+ if (typeof i.file_path === 'string') return i.file_path;
60
+ if (typeof i.filePath === 'string') return i.filePath;
61
+ if (typeof i.path === 'string') return i.path;
62
+ }
63
+ return '';
64
+ }
65
+
66
+ export function observeEdit({ payload, projectRoot, now } = {}) {
67
+ const toolName = payload?.tool_name ?? payload?.toolName;
68
+ if (!ELIGIBLE_TOOLS.has(toolName)) {
69
+ return { action: 'noop', reason: 'tool-name-not-eligible' };
70
+ }
71
+ const content = extractContent(payload);
72
+ const lines = countLines(content);
73
+ if (lines <= LINE_THRESHOLD) {
74
+ return { action: 'noop', reason: 'below-line-threshold', lines };
75
+ }
76
+
77
+ const ts = now ?? new Date().toISOString();
78
+ const filePath = extractFilePath(payload);
79
+ const summary = `[${ts}] ${toolName} file=${filePath} lines=${lines}\n`;
80
+ const sessionsDir = join(projectRoot, 'context', 'sessions');
81
+ const nowMd = join(sessionsDir, 'now.md');
82
+ if (!existsSync(sessionsDir)) {
83
+ mkdirSync(sessionsDir, { recursive: true });
84
+ }
85
+ appendFileSync(nowMd, summary, 'utf8');
86
+ return { action: 'appended', summaryLine: summary, lines };
87
+ }
@@ -0,0 +1,138 @@
1
+ // platform-commands.mjs — shared helper for emitting user-facing
2
+ // shell commands that work on the user's actual shell.
3
+ //
4
+ // Why this exists
5
+ // ---------------
6
+ //
7
+ // The kit emits shell commands to users at runtime in several places
8
+ // (lock-discipline.mjs's `recoveryCommand` field is the canonical
9
+ // example; future `cmk doctor` HC-* repair output + `cmk repair`
10
+ // self-repair output + error messages all do the same). The
11
+ // user-facing shell varies by OS:
12
+ //
13
+ // - Windows (cmd.exe / PowerShell): `Remove-Item`, `Get-ChildItem`,
14
+ // `New-Item`, etc. POSIX commands (`rm`, `ls`, `mkdir`) DO NOT
15
+ // work on stock cmd.exe and produce confusing errors.
16
+ // - macOS / Linux: POSIX commands.
17
+ // - Git Bash on Windows: POSIX commands work (it provides them).
18
+ //
19
+ // PR-B (lock-discipline.mjs) established the inline pattern:
20
+ // `process.platform === 'win32'` switches between `Remove-Item "..."`
21
+ // and `rm "..."`. PR-E (this campaign Part 7/7) generalizes that
22
+ // pattern into this shared helper so future code doesn't reinvent it,
23
+ // and so `scripts/validate-platform-commands.mjs` can mechanically
24
+ // verify every emission site uses the helper or an explicit
25
+ // suppression marker.
26
+ //
27
+ // What this module provides
28
+ // -------------------------
29
+ //
30
+ // One function per primitive command. Each takes argument(s) and
31
+ // returns a COMPLETE, copy-paste-ready shell command string in the
32
+ // user's native shell. The caller doesn't think about quoting,
33
+ // escaping, or platform differences — the helper does.
34
+ //
35
+ // Primitives currently covered:
36
+ // - removeFile(path) — delete a file
37
+ // - removeDir(path) — delete a directory recursively
38
+ // - listDir(path) — list directory contents
39
+ //
40
+ // What this module does NOT do
41
+ // ----------------------------
42
+ //
43
+ // - Generate shell-script files. This is for one-liner copy-paste
44
+ // hints to the user, not for scripting.
45
+ // - Escape shell injection. Callers pass paths derived from kit
46
+ // state (lock-file paths, install dirs). Untrusted user input
47
+ // should be validated upstream.
48
+ // - Detect Git Bash vs cmd.exe vs PowerShell on Windows. We emit
49
+ // the PowerShell-style command on win32; Git Bash users can run
50
+ // the POSIX command independently. The platform-detection
51
+ // defaults to "what the user's STOCK shell expects" since that's
52
+ // the failure mode (PR-B's `recoveryCommand` finding: a Windows
53
+ // user pasting `rm` into cmd.exe gets a "command not found"
54
+ // error; pasting `Remove-Item` works in both PowerShell AND
55
+ // Git Bash IF git-bash forwards the call, but at minimum it
56
+ // doesn't give a confusing error).
57
+ //
58
+ // Suppression
59
+ // -----------
60
+ //
61
+ // Sites that LEGITIMATELY hardcode a POSIX command (e.g., a .sh
62
+ // script that already requires bash) can suppress the
63
+ // `validate-platform-commands.mjs` check with a per-line
64
+ // `// platform-commands: ignore <reason>` marker. Use sparingly —
65
+ // the marker is for cases where a platform-specific shell is the
66
+ // contract, not an oversight.
67
+
68
+ // Note on Git Bash on Windows (per design §18.6): Git Bash reports
69
+ // `process.platform === 'win32'` (it's running under the win32 Node
70
+ // build) and accepts both POSIX `rm` AND PowerShell `Remove-Item`
71
+ // via PowerShell.exe in PATH. Emitting `Remove-Item` is therefore
72
+ // the cross-Windows-shell-compatible default — it works in stock
73
+ // PowerShell AND in Git Bash. A user on cmd.exe with neither
74
+ // PowerShell nor `rm` in PATH would have a broken Node install
75
+ // anyway, so that case is out of scope.
76
+ const IS_WINDOWS = process.platform === 'win32';
77
+
78
+ // Quote a path for the user's shell. PowerShell + cmd.exe both
79
+ // accept double-quoted paths. POSIX shells accept either; we use
80
+ // double-quotes for consistency. Paths containing literal double-
81
+ // quote characters are extremely rare in filesystem paths but would
82
+ // need additional handling — not addressed here.
83
+ function quote(path) {
84
+ return `"${path}"`;
85
+ }
86
+
87
+ /**
88
+ * Build a "remove this file" command in the user's native shell.
89
+ *
90
+ * Windows (PowerShell / cmd.exe via PowerShell-style fallback):
91
+ * Remove-Item "C:\path\to\file"
92
+ *
93
+ * POSIX (macOS / Linux / Git Bash):
94
+ * rm "/path/to/file"
95
+ */
96
+ export function removeFile(path) {
97
+ if (IS_WINDOWS) {
98
+ return `Remove-Item ${quote(path)}`;
99
+ }
100
+ return `rm ${quote(path)}`;
101
+ }
102
+
103
+ /**
104
+ * Build a "remove this directory recursively" command.
105
+ *
106
+ * Windows:
107
+ * Remove-Item -Recurse -Force "C:\path\to\dir"
108
+ *
109
+ * POSIX:
110
+ * rm -rf "/path/to/dir"
111
+ */
112
+ export function removeDir(path) {
113
+ if (IS_WINDOWS) {
114
+ return `Remove-Item -Recurse -Force ${quote(path)}`;
115
+ }
116
+ return `rm -rf ${quote(path)}`;
117
+ }
118
+
119
+ /**
120
+ * Build a "list directory contents" command.
121
+ *
122
+ * Windows:
123
+ * Get-ChildItem "C:\path\to\dir"
124
+ *
125
+ * POSIX:
126
+ * ls "/path/to/dir"
127
+ */
128
+ export function listDir(path) {
129
+ if (IS_WINDOWS) {
130
+ return `Get-ChildItem ${quote(path)}`;
131
+ }
132
+ return `ls ${quote(path)}`;
133
+ }
134
+
135
+ // Exported for the validator + tests to assert which platform the
136
+ // helper is currently emitting for. Useful for cross-platform CI
137
+ // matrices where the test asserts both halves.
138
+ export const PLATFORM = IS_WINDOWS ? 'win32' : 'posix';
@@ -0,0 +1,245 @@
1
+ // Poison_Guard — pre-write secret + injection filter (Task 24.5, T-021).
2
+ //
3
+ // The kit's last line of defense before any auto-extracted or
4
+ // user-explicit fact is written to a project-tier or user-tier file
5
+ // that may end up in git. False negatives = credentials in the repo.
6
+ // False positives = legitimate writes blocked. Pattern correctness
7
+ // has to be right (design §6.7).
8
+ //
9
+ // Public boundary:
10
+ // checkPoisonGuard(text) → {
11
+ // rejected: boolean,
12
+ // pattern_id: string | null, // category id; null if rejected:false
13
+ // redacted_excerpt: string, // safe-for-logging excerpt; masks matched text with ***
14
+ // }
15
+ //
16
+ // Pattern catalog per design §6.7. The catalog is intentionally
17
+ // conservative — see the design note "Why discoverability-only, not
18
+ // perfect prevention": the threat model is "accidental commit", not
19
+ // "active adversary in your repo." Regex catches the high-frequency
20
+ // mistakes; secret-scanners (gitleaks, trufflehog) are the second
21
+ // line of defense, not us.
22
+ //
23
+ // Redaction contract (security-load-bearing):
24
+ // - The matched secret/injection text MUST NEVER appear in
25
+ // redacted_excerpt in cleartext. The whole point of this module
26
+ // is to keep secrets out of logs. Every pattern in this catalog
27
+ // must produce a redacted excerpt that masks the match span
28
+ // with `***`. Unit tests pin this contract.
29
+ // - The excerpt is bounded in length (≤ ~200 chars) so a long
30
+ // pasted blob doesn't blow up the log line.
31
+
32
+ import {
33
+ appendFileSync,
34
+ existsSync,
35
+ mkdirSync,
36
+ } from 'node:fs';
37
+ import { join, dirname } from 'node:path';
38
+
39
+ // --- Pattern catalog -------------------------------------------------
40
+ // Each pattern is { id, re, category }. The id is the stable
41
+ // machine-parseable name that shows up in poison-guard.log NDJSON +
42
+ // extract.log error_category disambiguation. The re is the
43
+ // case-insensitive regex; category is 'secret' or 'injection' so the
44
+ // downstream categorizer can route into POISON_GUARD_CATEGORIES.
45
+ //
46
+ // Conservative-on-purpose. Adding a pattern is a write — adding a
47
+ // pattern that has false positives is a denial-of-service against
48
+ // legitimate user input. Each pattern should be vetted against
49
+ // realistic adversarial samples AND against realistic benign user
50
+ // content.
51
+ const SECRET_PATTERNS = [
52
+ // AWS access key id — fixed prefix (AKIA/ASIA/AGPA + various) + 16
53
+ // additional uppercase alphanumeric. The prefix is required to
54
+ // avoid matching arbitrary 20-char alphanumeric blobs.
55
+ {
56
+ id: 'secret_aws_access_key_id',
57
+ category: 'secret',
58
+ re: /\b(?:AKIA|ASIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASCA)[A-Z0-9]{16}\b/,
59
+ },
60
+ // AWS secret access key in a key=value shape. The value pattern is
61
+ // intentionally lenient (40-char Base64-ish) because real AWS secret
62
+ // keys are 40 chars of Base64. We require the `aws_secret` token
63
+ // nearby to gate on intent.
64
+ {
65
+ id: 'secret_aws_secret_access_key',
66
+ category: 'secret',
67
+ re: /(?:aws[_-]?secret[_-]?(?:access[_-]?)?key)[\s:=]+["']?[A-Za-z0-9/+=]{16,}/i,
68
+ },
69
+ // Generic api_key / secret / password / passwd / token / bearer
70
+ // in a key=value shape. 20-char minimum on the value catches
71
+ // realistic key shapes without flagging short test fixtures
72
+ // (api_key=abc123 → only 6 chars, skipped).
73
+ {
74
+ id: 'secret_generic_credential',
75
+ category: 'secret',
76
+ re: /(?:api[_-]?key|secret|password|passwd|token|bearer)[\s:=]+["']?[A-Za-z0-9_\-/+=]{20,}/i,
77
+ },
78
+ // PEM private key armor. The "RSA "/"EC "/"OPENSSH "/"PGP "
79
+ // variants are optional, so plain `-----BEGIN PRIVATE KEY-----`
80
+ // also matches.
81
+ {
82
+ id: 'secret_pem_private_key',
83
+ category: 'secret',
84
+ re: /-----BEGIN (?:RSA |EC |OPENSSH |PGP |DSA |ENCRYPTED )?PRIVATE KEY-----/,
85
+ },
86
+ // GitHub personal access token: ghp_ prefix + 36 alphanumeric.
87
+ // No trailing \b — adjacent alphanumeric padding (in tests; rare in
88
+ // real input but possible) shouldn't suppress detection. Leading \b
89
+ // is enough to prevent matching mid-identifier (e.g. "xghp_..." is
90
+ // not a token).
91
+ {
92
+ id: 'secret_github_pat',
93
+ category: 'secret',
94
+ re: /\bghp_[A-Za-z0-9]{36}/,
95
+ },
96
+ // OpenAI / Anthropic style keys. sk- prefix + optional ant-/proj-
97
+ // qualifier + ≥40 chars of alphanumeric/dash/underscore.
98
+ {
99
+ id: 'secret_openai_anthropic_key',
100
+ category: 'secret',
101
+ re: /\bsk-(?:ant-|proj-)?[A-Za-z0-9_-]{40,}/,
102
+ },
103
+ // Slack tokens: xoxb-/xoxp-/xoxs- prefix + 10+ alphanumeric/dash.
104
+ {
105
+ id: 'secret_slack_token',
106
+ category: 'secret',
107
+ re: /\bxox[bps]-[A-Za-z0-9-]{10,}/,
108
+ },
109
+ ];
110
+
111
+ const INJECTION_PATTERNS = [
112
+ // "ignore (all|any|previous|prior)* (instructions|prompts|rules)"
113
+ // Qualifier words are zero-or-more so all of these match:
114
+ // "ignore instructions"
115
+ // "ignore previous instructions"
116
+ // "ignore all previous instructions" (two qualifiers stacked)
117
+ // "IGNORE ALL PREVIOUS INSTRUCTIONS" (case-insensitive)
118
+ // The earlier `?` form only allowed ONE qualifier and missed the
119
+ // most common phrasing.
120
+ {
121
+ id: 'injection_ignore_instructions',
122
+ category: 'injection',
123
+ re: /ignore (?:all |any |previous |prior )*(?:instructions?|prompts?|rules?)/i,
124
+ },
125
+ // "You are now [an AI role]" — role-override attempt.
126
+ // Earlier draft was `/you are now (?:a |an |the )?[A-Za-z]/i` which
127
+ // matched ANY sentence starting "you are now <word>" — including
128
+ // benign content like "you are now able to ship", "you are now
129
+ // blocked on the API", "you are now responsible for X". False
130
+ // positives there = denial-of-service on legitimate memory writes.
131
+ // Tightened to require an explicit role-impersonation noun, with
132
+ // the optional adjective slot still capturing "you are now a
133
+ // helpful pirate assistant" / "you are now a different agent".
134
+ {
135
+ id: 'injection_role_override',
136
+ category: 'injection',
137
+ re: /you are now (?:a |an |the )?(?:[a-z]+ ){0,3}(?:assistant|chatbot|ai|bot|pirate|agent|expert|persona|model|gpt|claude)/i,
138
+ },
139
+ // Fake role tags. Closing or opening <system> / <assistant> tag
140
+ // in user-supplied content suggests an injection attempt.
141
+ {
142
+ id: 'injection_fake_role_tag',
143
+ category: 'injection',
144
+ re: /<\/?(?:system|assistant)>/i,
145
+ },
146
+ // "disregard the above" — common injection lead-in.
147
+ {
148
+ id: 'injection_disregard_above',
149
+ category: 'injection',
150
+ re: /disregard the above/i,
151
+ },
152
+ ];
153
+
154
+ const ALL_PATTERNS = [...SECRET_PATTERNS, ...INJECTION_PATTERNS];
155
+
156
+ // Frozen enum of pattern ids grouped by category. Callers import this
157
+ // to validate routing logic without depending on the internal pattern
158
+ // array order.
159
+ export const POISON_GUARD_CATEGORIES = Object.freeze({
160
+ SECRET_CATEGORIES: Object.freeze(SECRET_PATTERNS.map((p) => p.id)),
161
+ INJECTION_CATEGORIES: Object.freeze(INJECTION_PATTERNS.map((p) => p.id)),
162
+ });
163
+
164
+ // Redaction parameters. The excerpt window around the match should be
165
+ // small enough to fit in a log line but large enough to give a human
166
+ // auditor enough context to act on (e.g., recognize "that was the
167
+ // AWS_SECRET line in my terraform output").
168
+ const REDACTION_CONTEXT_CHARS = 30;
169
+ const REDACTION_MASK = '***';
170
+
171
+ function redactExcerpt(text, matchStart, matchLength) {
172
+ const ctxStart = Math.max(0, matchStart - REDACTION_CONTEXT_CHARS);
173
+ const ctxEnd = Math.min(
174
+ text.length,
175
+ matchStart + matchLength + REDACTION_CONTEXT_CHARS,
176
+ );
177
+ const before = text.slice(ctxStart, matchStart);
178
+ const after = text.slice(matchStart + matchLength, ctxEnd);
179
+ const prefix = ctxStart > 0 ? '...' : '';
180
+ const suffix = ctxEnd < text.length ? '...' : '';
181
+ return `${prefix}${before}${REDACTION_MASK}${after}${suffix}`;
182
+ }
183
+
184
+ export function checkPoisonGuard(text) {
185
+ if (typeof text !== 'string') {
186
+ return {
187
+ rejected: true,
188
+ pattern_id: 'schema',
189
+ redacted_excerpt: '',
190
+ };
191
+ }
192
+ for (const { id, re } of ALL_PATTERNS) {
193
+ const m = text.match(re);
194
+ if (m) {
195
+ return {
196
+ rejected: true,
197
+ pattern_id: id,
198
+ redacted_excerpt: redactExcerpt(text, m.index, m[0].length),
199
+ };
200
+ }
201
+ }
202
+ return {
203
+ rejected: false,
204
+ pattern_id: null,
205
+ redacted_excerpt: '',
206
+ };
207
+ }
208
+
209
+ // --- NDJSON logger (Task 24.6, design §6.7) -------------------------
210
+ //
211
+ // One line per rejection at <projectRoot>/context/.locks/
212
+ // poison-guard.log. Schema documented in design §6.7:
213
+ // {ts, pattern_id, source_file, source_line, action: "rejected",
214
+ // redacted_excerpt}
215
+ //
216
+ // The cleartext that triggered the rejection is INTENTIONALLY absent
217
+ // from this log line. The caller produces redacted_excerpt via
218
+ // checkPoisonGuard() and passes it in. Tests pin that no field
219
+ // named raw_text / unredacted / matched_text / original ever appears.
220
+
221
+ const POISON_GUARD_LOG_RELATIVE = ['context', '.locks', 'poison-guard.log'];
222
+
223
+ export function logPoisonGuardRejection({
224
+ projectRoot,
225
+ ts,
226
+ pattern_id,
227
+ source_file,
228
+ source_line,
229
+ redacted_excerpt,
230
+ } = {}) {
231
+ const logPath = join(projectRoot, ...POISON_GUARD_LOG_RELATIVE);
232
+ if (!existsSync(dirname(logPath))) {
233
+ mkdirSync(dirname(logPath), { recursive: true });
234
+ }
235
+ const entry = {
236
+ ts,
237
+ pattern_id,
238
+ source_file,
239
+ source_line,
240
+ action: 'rejected',
241
+ redacted_excerpt,
242
+ };
243
+ appendFileSync(logPath, JSON.stringify(entry) + '\n', 'utf8');
244
+ return logPath;
245
+ }