@lh8ppl/claude-memory-kit 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cmk-compress-lazy.mjs +59 -0
- package/bin/cmk-daily-distill.mjs +67 -0
- package/bin/cmk-weekly-curate.mjs +56 -0
- package/bin/cmk.mjs +12 -0
- package/package.json +50 -0
- package/src/audit-log.mjs +103 -0
- package/src/auto-extract.mjs +742 -0
- package/src/capture-prompt.mjs +61 -0
- package/src/capture-turn.mjs +273 -0
- package/src/claude-md.mjs +212 -0
- package/src/compress-session.mjs +349 -0
- package/src/compressor.mjs +376 -0
- package/src/conflict-queue.mjs +796 -0
- package/src/cooldown.mjs +61 -0
- package/src/daily-distill.mjs +252 -0
- package/src/doctor.mjs +528 -0
- package/src/forget.mjs +335 -0
- package/src/frontmatter.mjs +73 -0
- package/src/import-anthropic-memory.mjs +266 -0
- package/src/index-db.mjs +154 -0
- package/src/index-rebuild.mjs +597 -0
- package/src/index.mjs +90 -0
- package/src/inject-context.mjs +484 -0
- package/src/install.mjs +327 -0
- package/src/lazy-compress.mjs +326 -0
- package/src/lock-discipline.mjs +166 -0
- package/src/mcp-server.mjs +498 -0
- package/src/memory-write.mjs +565 -0
- package/src/merge-facts.mjs +213 -0
- package/src/observe-edit.mjs +87 -0
- package/src/platform-commands.mjs +138 -0
- package/src/poison-guard.mjs +245 -0
- package/src/privacy.mjs +21 -0
- package/src/provenance.mjs +217 -0
- package/src/register-crons.mjs +354 -0
- package/src/reindex.mjs +134 -0
- package/src/repair.mjs +316 -0
- package/src/result-shapes.mjs +155 -0
- package/src/review-queue.mjs +345 -0
- package/src/roll.mjs +115 -0
- package/src/scratchpad.mjs +335 -0
- package/src/search.mjs +311 -0
- package/src/subcommands.mjs +1252 -0
- package/src/tier-paths.mjs +74 -0
- package/src/transcripts.mjs +234 -0
- package/src/trust.mjs +226 -0
- package/src/weekly-curate.mjs +454 -0
- package/src/write-fact.mjs +205 -0
- package/template/.claude/hooks/pre-tool-memory.js +78 -0
- package/template/.claude/hooks/transcript-capture.js +69 -0
- package/template/.claude/settings.json +27 -0
- package/template/.claude/skills/memory-write/SKILL.md +117 -0
- package/template/.gitignore.fragment +12 -0
- package/template/CLAUDE.md.template +49 -0
- package/template/docs/journey/journey-log.md.template +292 -0
- package/template/local/machine-paths.md.template +37 -0
- package/template/local/overrides.md.template +36 -0
- package/template/project/.index/.gitkeep +0 -0
- package/template/project/MEMORY.md.template +47 -0
- package/template/project/SOUL.md.template +35 -0
- package/template/project/memory/INDEX.md.template +47 -0
- package/template/project/memory/archive/superseded/.gitkeep +0 -0
- package/template/project/memory/archive/tombstones/.gitkeep +0 -0
- package/template/project/queues/.gitkeep +0 -0
- package/template/project/sessions/.gitkeep +0 -0
- package/template/project/transcripts/.gitkeep +0 -0
- package/template/support/cron-jobs/daily-memory-distill.md +15 -0
- package/template/support/cron-jobs/nightly-memsearch-index.md +17 -0
- package/template/support/cron-jobs/weekly-memory-curator.md +15 -0
- package/template/support/milvus-deploy/README.md +57 -0
- package/template/support/milvus-deploy/docker-compose.yml +66 -0
- package/template/support/scripts/auto-extract-memory.sh +102 -0
- package/template/support/scripts/memsearch-index-with-flush.sh +59 -0
- package/template/support/scripts/refresh-distill-timestamp.py +35 -0
- package/template/support/scripts/register-crons.py +242 -0
- package/template/support/scripts/run-daily-distill.sh +67 -0
- package/template/support/scripts/run-weekly-curate.sh +58 -0
- package/template/user/HABITS.md.template +18 -0
- package/template/user/LESSONS.md.template +18 -0
- package/template/user/USER.md.template +18 -0
- package/template/user/fragments/INDEX.md.template +23 -0
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
// Fact consolidation (Task 10, refactored in cleanup-layer-2-cross-module-drift).
|
|
2
|
+
// Single public boundary: mergeFacts(opts) → result. See design §3.4.
|
|
3
|
+
//
|
|
4
|
+
// Uses shared modules: tier-paths, frontmatter, audit-log, result-shapes.
|
|
5
|
+
// Composes writeFact() to create the new merged fact, then moves A + B into
|
|
6
|
+
// archive/superseded/ with superseded_by injected. See CLAUDE.md "Shared
|
|
7
|
+
// modules" rule.
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
existsSync,
|
|
11
|
+
mkdirSync,
|
|
12
|
+
readdirSync,
|
|
13
|
+
readFileSync,
|
|
14
|
+
statSync,
|
|
15
|
+
unlinkSync,
|
|
16
|
+
writeFileSync,
|
|
17
|
+
} from 'node:fs';
|
|
18
|
+
import { join } from 'node:path';
|
|
19
|
+
import {
|
|
20
|
+
VALID_TIERS,
|
|
21
|
+
ID_PATTERN,
|
|
22
|
+
resolveTierRoot,
|
|
23
|
+
resolveFactDir,
|
|
24
|
+
} from './tier-paths.mjs';
|
|
25
|
+
import { parse, format } from './frontmatter.mjs';
|
|
26
|
+
import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
|
|
27
|
+
import { ERROR_CATEGORIES, errorResult, notFoundResult } from './result-shapes.mjs';
|
|
28
|
+
import { writeFact } from './write-fact.mjs';
|
|
29
|
+
|
|
30
|
+
function listLiveFactFiles(factDir) {
|
|
31
|
+
if (!existsSync(factDir)) return [];
|
|
32
|
+
const out = [];
|
|
33
|
+
for (const entry of readdirSync(factDir, { withFileTypes: true })) {
|
|
34
|
+
if (!entry.isFile()) continue;
|
|
35
|
+
if (!entry.name.endsWith('.md')) continue;
|
|
36
|
+
if (entry.name === 'INDEX.md') continue;
|
|
37
|
+
out.push(entry.name);
|
|
38
|
+
}
|
|
39
|
+
return out;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function findLiveFactById(factDir, id) {
|
|
43
|
+
if (!existsSync(factDir)) return null;
|
|
44
|
+
for (const filename of listLiveFactFiles(factDir)) {
|
|
45
|
+
const p = join(factDir, filename);
|
|
46
|
+
if (!statSync(p).isFile()) continue;
|
|
47
|
+
const { frontmatter, body } = parse(readFileSync(p, 'utf8'));
|
|
48
|
+
if (frontmatter?.id === id && !frontmatter.deleted_at) {
|
|
49
|
+
return { id, path: p, frontmatter, body };
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function moveToSuperseded(match, supersededBy) {
|
|
56
|
+
const supersededDir = join(match.factDir, 'archive', 'superseded');
|
|
57
|
+
mkdirSync(supersededDir, { recursive: true });
|
|
58
|
+
const newPath = join(supersededDir, `${match.id}.md`);
|
|
59
|
+
const { frontmatter, body } = parse(readFileSync(match.path, 'utf8'));
|
|
60
|
+
const updated = {
|
|
61
|
+
superseded_by: supersededBy,
|
|
62
|
+
...(frontmatter ?? {}),
|
|
63
|
+
};
|
|
64
|
+
writeFileSync(newPath, format({ frontmatter: updated, body }), 'utf8');
|
|
65
|
+
unlinkSync(match.path);
|
|
66
|
+
return newPath;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function mergeFacts(opts = {}) {
|
|
70
|
+
const {
|
|
71
|
+
idA,
|
|
72
|
+
idB,
|
|
73
|
+
mergedBody,
|
|
74
|
+
mergedTitle,
|
|
75
|
+
mergedSlug,
|
|
76
|
+
mergedType,
|
|
77
|
+
writeSource,
|
|
78
|
+
trust,
|
|
79
|
+
sourceFile,
|
|
80
|
+
sourceLine,
|
|
81
|
+
sourceSha1,
|
|
82
|
+
mergedTags,
|
|
83
|
+
projectRoot,
|
|
84
|
+
userDir,
|
|
85
|
+
now,
|
|
86
|
+
} = opts;
|
|
87
|
+
|
|
88
|
+
const errors = [];
|
|
89
|
+
if (!idA || !ID_PATTERN.test(idA)) errors.push('idA: must be a valid citation ID');
|
|
90
|
+
if (!idB || !ID_PATTERN.test(idB)) errors.push('idB: must be a valid citation ID');
|
|
91
|
+
if (idA && idB && idA === idB) {
|
|
92
|
+
return errorResult({
|
|
93
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
94
|
+
errors: [`idA and idB are the same (${idA}); cannot merge a fact with itself`],
|
|
95
|
+
});
|
|
96
|
+
}
|
|
97
|
+
if (!mergedBody || typeof mergedBody !== 'string' || !mergedBody.length) {
|
|
98
|
+
errors.push('mergedBody: required, non-empty string');
|
|
99
|
+
}
|
|
100
|
+
if (!mergedTitle || typeof mergedTitle !== 'string') {
|
|
101
|
+
errors.push('mergedTitle: required, non-empty string');
|
|
102
|
+
}
|
|
103
|
+
// Layer-2 review S4: removed the redundant `mergedSlug` truthy check. The
|
|
104
|
+
// downstream writeFact owns all slug validation (pattern + presence).
|
|
105
|
+
// Inconsistent layering disappears; bad slugs surface from writeFact with
|
|
106
|
+
// a clear schema error.
|
|
107
|
+
//
|
|
108
|
+
// Layer-2 review S3: writeSource is now REQUIRED (no compressor default).
|
|
109
|
+
// Compressor was the most-suspicious default — Task 23 auto-extract and
|
|
110
|
+
// Task 24 memory-write are NOT compressor-driven. Forcing the caller to
|
|
111
|
+
// pick avoids accidentally tagging human-curated merges as 'compressor'.
|
|
112
|
+
if (!writeSource || typeof writeSource !== 'string') {
|
|
113
|
+
errors.push('writeSource: required (no default). Pick one of user-explicit/auto-extract/compressor/manual-edit/imported.');
|
|
114
|
+
}
|
|
115
|
+
if (errors.length > 0) {
|
|
116
|
+
return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
const tierA = idA[0];
|
|
120
|
+
const tierB = idB[0];
|
|
121
|
+
if (tierA !== tierB) {
|
|
122
|
+
return errorResult({
|
|
123
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
124
|
+
errors: [
|
|
125
|
+
`cross-tier merge not supported: idA tier (${tierA}) ≠ idB tier (${tierB}). Promote one side to the same tier first.`,
|
|
126
|
+
],
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
const tier = tierA;
|
|
130
|
+
if (!VALID_TIERS.has(tier)) {
|
|
131
|
+
return errorResult({
|
|
132
|
+
category: ERROR_CATEGORIES.SCHEMA,
|
|
133
|
+
errors: [`invalid tier prefix on ids: ${tier}`],
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const tierRoot = resolveTierRoot({ tier, projectRoot, userDir });
|
|
138
|
+
const factDir = resolveFactDir(tier, tierRoot);
|
|
139
|
+
|
|
140
|
+
const matchA = findLiveFactById(factDir, idA);
|
|
141
|
+
const matchB = findLiveFactById(factDir, idB);
|
|
142
|
+
if (!matchA || !matchB) {
|
|
143
|
+
const missing = [];
|
|
144
|
+
if (!matchA) missing.push(idA);
|
|
145
|
+
if (!matchB) missing.push(idB);
|
|
146
|
+
return notFoundResult({
|
|
147
|
+
errors: [`no live fact found for ${missing.join(', ')}`],
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
matchA.factDir = factDir;
|
|
151
|
+
matchB.factDir = factDir;
|
|
152
|
+
|
|
153
|
+
const typeC =
|
|
154
|
+
mergedType ?? matchA.frontmatter.type ?? matchB.frontmatter.type;
|
|
155
|
+
|
|
156
|
+
const writeResult = writeFact({
|
|
157
|
+
tier,
|
|
158
|
+
type: typeC,
|
|
159
|
+
slug: mergedSlug,
|
|
160
|
+
title: mergedTitle,
|
|
161
|
+
body: mergedBody,
|
|
162
|
+
writeSource,
|
|
163
|
+
trust: trust ?? 'high',
|
|
164
|
+
sourceFile: sourceFile ?? matchA.frontmatter.source_file ?? 'merge',
|
|
165
|
+
sourceLine: sourceLine ?? 1,
|
|
166
|
+
sourceSha1: sourceSha1 ?? matchA.frontmatter.source_sha1 ?? 'merged',
|
|
167
|
+
mergedFrom: [idA, idB],
|
|
168
|
+
tags: mergedTags,
|
|
169
|
+
projectRoot,
|
|
170
|
+
userDir,
|
|
171
|
+
});
|
|
172
|
+
if (writeResult.action === 'error') {
|
|
173
|
+
return errorResult({
|
|
174
|
+
category: writeResult.errorCategory,
|
|
175
|
+
errors: writeResult.errors,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
// PR-1 blocker B1 fix preserved: writeFact dedup'd to an existing unrelated
|
|
179
|
+
// fact → return collision error rather than silently retargeting A and B.
|
|
180
|
+
if (writeResult.action !== 'created') {
|
|
181
|
+
return errorResult({
|
|
182
|
+
category: ERROR_CATEGORIES.COLLISION,
|
|
183
|
+
errors: [
|
|
184
|
+
`merged body collides with existing fact ${writeResult.id} (writeFact returned ${writeResult.action}${writeResult.skipReason ? ': ' + writeResult.skipReason : ''}); choose a different mergedBody`,
|
|
185
|
+
],
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const supersededA = moveToSuperseded(matchA, writeResult.id);
|
|
190
|
+
const supersededB = moveToSuperseded(matchB, writeResult.id);
|
|
191
|
+
|
|
192
|
+
const ts = now ?? nowIso();
|
|
193
|
+
appendAuditEntry(tierRoot, {
|
|
194
|
+
ts,
|
|
195
|
+
action: 'merged',
|
|
196
|
+
tier,
|
|
197
|
+
id: writeResult.id,
|
|
198
|
+
reasonCode: REASON_CODES.CURATED_MERGE,
|
|
199
|
+
paths: {
|
|
200
|
+
after: writeResult.path,
|
|
201
|
+
archive: [supersededA, supersededB],
|
|
202
|
+
},
|
|
203
|
+
extra: { mergedFrom: [idA, idB] },
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
action: 'merged',
|
|
208
|
+
id: writeResult.id,
|
|
209
|
+
tier,
|
|
210
|
+
path: writeResult.path,
|
|
211
|
+
supersededPaths: [supersededA, supersededB],
|
|
212
|
+
};
|
|
213
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
// PostToolUse hook real handler (Task 20, T-017). Fires on Write /
|
|
2
|
+
// Edit / MultiEdit only (matcher in hooks.json) and appends a one-line
|
|
3
|
+
// summary of large tool outputs to sessions/now.md — feeds the rolling-
|
|
4
|
+
// window compression pipeline (design §8.1).
|
|
5
|
+
//
|
|
6
|
+
// Public boundary: observeEdit({payload, projectRoot, now}) → result.
|
|
7
|
+
// The bin wrapper handles stdin parsing + the detached-spawn dance
|
|
8
|
+
// that makes the hook return within 50ms regardless of how big the
|
|
9
|
+
// tool output is.
|
|
10
|
+
//
|
|
11
|
+
// Filter rules (defensive — hooks.json matcher should be the first
|
|
12
|
+
// line of defense, but a misconfigured plugin install could route
|
|
13
|
+
// other tool_names here, so we double-check):
|
|
14
|
+
// - tool_name must be one of Write / Edit / MultiEdit
|
|
15
|
+
// - tool_response.content must be > LINE_THRESHOLD lines
|
|
16
|
+
//
|
|
17
|
+
// Per design §1.4 / §8.1 the summary line shape feeds claude-remember-
|
|
18
|
+
// style compaction downstream; we use a stable
|
|
19
|
+
// [<iso-ts>] <tool> file=<file_path> lines=<count>
|
|
20
|
+
// format so the SessionEnd Haiku compressor (Task 22+23) can recognize
|
|
21
|
+
// individual events.
|
|
22
|
+
|
|
23
|
+
import { existsSync, mkdirSync, appendFileSync } from 'node:fs';
|
|
24
|
+
import { join } from 'node:path';
|
|
25
|
+
|
|
26
|
+
const ELIGIBLE_TOOLS = new Set(['Write', 'Edit', 'MultiEdit']);
|
|
27
|
+
const LINE_THRESHOLD = 50; // strictly greater-than per design / 20.2
|
|
28
|
+
|
|
29
|
+
function countLines(text) {
|
|
30
|
+
if (typeof text !== 'string' || text === '') return 0;
|
|
31
|
+
let n = 1;
|
|
32
|
+
for (let i = 0; i < text.length; i++) {
|
|
33
|
+
if (text.charCodeAt(i) === 10) n++;
|
|
34
|
+
}
|
|
35
|
+
// Trailing newline → the empty-after-last-\n shouldn't count as a
|
|
36
|
+
// line of content. Subtract one if the text ended on \n.
|
|
37
|
+
if (text.charCodeAt(text.length - 1) === 10) n--;
|
|
38
|
+
return n;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function extractContent(payload) {
|
|
42
|
+
// Be permissive — different tool invocations shape the response
|
|
43
|
+
// slightly differently (Anthropic hook payload evolution). Probe a
|
|
44
|
+
// few documented spellings.
|
|
45
|
+
if (!payload || typeof payload !== 'object') return '';
|
|
46
|
+
const r = payload.tool_response ?? payload.toolResponse ?? null;
|
|
47
|
+
if (r && typeof r === 'object') {
|
|
48
|
+
if (typeof r.content === 'string') return r.content;
|
|
49
|
+
if (typeof r.output === 'string') return r.output;
|
|
50
|
+
if (typeof r.text === 'string') return r.text;
|
|
51
|
+
}
|
|
52
|
+
if (typeof payload.output === 'string') return payload.output;
|
|
53
|
+
return '';
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function extractFilePath(payload) {
|
|
57
|
+
const i = payload?.tool_input ?? payload?.toolInput ?? null;
|
|
58
|
+
if (i && typeof i === 'object') {
|
|
59
|
+
if (typeof i.file_path === 'string') return i.file_path;
|
|
60
|
+
if (typeof i.filePath === 'string') return i.filePath;
|
|
61
|
+
if (typeof i.path === 'string') return i.path;
|
|
62
|
+
}
|
|
63
|
+
return '';
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function observeEdit({ payload, projectRoot, now } = {}) {
|
|
67
|
+
const toolName = payload?.tool_name ?? payload?.toolName;
|
|
68
|
+
if (!ELIGIBLE_TOOLS.has(toolName)) {
|
|
69
|
+
return { action: 'noop', reason: 'tool-name-not-eligible' };
|
|
70
|
+
}
|
|
71
|
+
const content = extractContent(payload);
|
|
72
|
+
const lines = countLines(content);
|
|
73
|
+
if (lines <= LINE_THRESHOLD) {
|
|
74
|
+
return { action: 'noop', reason: 'below-line-threshold', lines };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const ts = now ?? new Date().toISOString();
|
|
78
|
+
const filePath = extractFilePath(payload);
|
|
79
|
+
const summary = `[${ts}] ${toolName} file=${filePath} lines=${lines}\n`;
|
|
80
|
+
const sessionsDir = join(projectRoot, 'context', 'sessions');
|
|
81
|
+
const nowMd = join(sessionsDir, 'now.md');
|
|
82
|
+
if (!existsSync(sessionsDir)) {
|
|
83
|
+
mkdirSync(sessionsDir, { recursive: true });
|
|
84
|
+
}
|
|
85
|
+
appendFileSync(nowMd, summary, 'utf8');
|
|
86
|
+
return { action: 'appended', summaryLine: summary, lines };
|
|
87
|
+
}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
// platform-commands.mjs — shared helper for emitting user-facing
|
|
2
|
+
// shell commands that work on the user's actual shell.
|
|
3
|
+
//
|
|
4
|
+
// Why this exists
|
|
5
|
+
// ---------------
|
|
6
|
+
//
|
|
7
|
+
// The kit emits shell commands to users at runtime in several places
|
|
8
|
+
// (lock-discipline.mjs's `recoveryCommand` field is the canonical
|
|
9
|
+
// example; future `cmk doctor` HC-* repair output + `cmk repair`
|
|
10
|
+
// self-repair output + error messages all do the same). The
|
|
11
|
+
// user-facing shell varies by OS:
|
|
12
|
+
//
|
|
13
|
+
// - Windows (cmd.exe / PowerShell): `Remove-Item`, `Get-ChildItem`,
|
|
14
|
+
// `New-Item`, etc. POSIX commands (`rm`, `ls`, `mkdir`) DO NOT
|
|
15
|
+
// work on stock cmd.exe and produce confusing errors.
|
|
16
|
+
// - macOS / Linux: POSIX commands.
|
|
17
|
+
// - Git Bash on Windows: POSIX commands work (it provides them).
|
|
18
|
+
//
|
|
19
|
+
// PR-B (lock-discipline.mjs) established the inline pattern:
|
|
20
|
+
// `process.platform === 'win32'` switches between `Remove-Item "..."`
|
|
21
|
+
// and `rm "..."`. PR-E (this campaign Part 7/7) generalizes that
|
|
22
|
+
// pattern into this shared helper so future code doesn't reinvent it,
|
|
23
|
+
// and so `scripts/validate-platform-commands.mjs` can mechanically
|
|
24
|
+
// verify every emission site uses the helper or an explicit
|
|
25
|
+
// suppression marker.
|
|
26
|
+
//
|
|
27
|
+
// What this module provides
|
|
28
|
+
// -------------------------
|
|
29
|
+
//
|
|
30
|
+
// One function per primitive command. Each takes argument(s) and
|
|
31
|
+
// returns a COMPLETE, copy-paste-ready shell command string in the
|
|
32
|
+
// user's native shell. The caller doesn't think about quoting,
|
|
33
|
+
// escaping, or platform differences — the helper does.
|
|
34
|
+
//
|
|
35
|
+
// Primitives currently covered:
|
|
36
|
+
// - removeFile(path) — delete a file
|
|
37
|
+
// - removeDir(path) — delete a directory recursively
|
|
38
|
+
// - listDir(path) — list directory contents
|
|
39
|
+
//
|
|
40
|
+
// What this module does NOT do
|
|
41
|
+
// ----------------------------
|
|
42
|
+
//
|
|
43
|
+
// - Generate shell-script files. This is for one-liner copy-paste
|
|
44
|
+
// hints to the user, not for scripting.
|
|
45
|
+
// - Escape shell injection. Callers pass paths derived from kit
|
|
46
|
+
// state (lock-file paths, install dirs). Untrusted user input
|
|
47
|
+
// should be validated upstream.
|
|
48
|
+
// - Detect Git Bash vs cmd.exe vs PowerShell on Windows. We emit
|
|
49
|
+
// the PowerShell-style command on win32; Git Bash users can run
|
|
50
|
+
// the POSIX command independently. The platform-detection
|
|
51
|
+
// defaults to "what the user's STOCK shell expects" since that's
|
|
52
|
+
// the failure mode (PR-B's `recoveryCommand` finding: a Windows
|
|
53
|
+
// user pasting `rm` into cmd.exe gets a "command not found"
|
|
54
|
+
// error; pasting `Remove-Item` works in both PowerShell AND
|
|
55
|
+
// Git Bash IF git-bash forwards the call, but at minimum it
|
|
56
|
+
// doesn't give a confusing error).
|
|
57
|
+
//
|
|
58
|
+
// Suppression
|
|
59
|
+
// -----------
|
|
60
|
+
//
|
|
61
|
+
// Sites that LEGITIMATELY hardcode a POSIX command (e.g., a .sh
|
|
62
|
+
// script that already requires bash) can suppress the
|
|
63
|
+
// `validate-platform-commands.mjs` check with a per-line
|
|
64
|
+
// `// platform-commands: ignore <reason>` marker. Use sparingly —
|
|
65
|
+
// the marker is for cases where a platform-specific shell is the
|
|
66
|
+
// contract, not an oversight.
|
|
67
|
+
|
|
68
|
+
// Note on Git Bash on Windows (per design §18.6): Git Bash reports
|
|
69
|
+
// `process.platform === 'win32'` (it's running under the win32 Node
|
|
70
|
+
// build) and accepts both POSIX `rm` AND PowerShell `Remove-Item`
|
|
71
|
+
// via PowerShell.exe in PATH. Emitting `Remove-Item` is therefore
|
|
72
|
+
// the cross-Windows-shell-compatible default — it works in stock
|
|
73
|
+
// PowerShell AND in Git Bash. A user on cmd.exe with neither
|
|
74
|
+
// PowerShell nor `rm` in PATH would have a broken Node install
|
|
75
|
+
// anyway, so that case is out of scope.
|
|
76
|
+
const IS_WINDOWS = process.platform === 'win32';
|
|
77
|
+
|
|
78
|
+
// Quote a path for the user's shell. PowerShell + cmd.exe both
|
|
79
|
+
// accept double-quoted paths. POSIX shells accept either; we use
|
|
80
|
+
// double-quotes for consistency. Paths containing literal double-
|
|
81
|
+
// quote characters are extremely rare in filesystem paths but would
|
|
82
|
+
// need additional handling — not addressed here.
|
|
83
|
+
function quote(path) {
|
|
84
|
+
return `"${path}"`;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Build a "remove this file" command in the user's native shell.
|
|
89
|
+
*
|
|
90
|
+
* Windows (PowerShell / cmd.exe via PowerShell-style fallback):
|
|
91
|
+
* Remove-Item "C:\path\to\file"
|
|
92
|
+
*
|
|
93
|
+
* POSIX (macOS / Linux / Git Bash):
|
|
94
|
+
* rm "/path/to/file"
|
|
95
|
+
*/
|
|
96
|
+
export function removeFile(path) {
|
|
97
|
+
if (IS_WINDOWS) {
|
|
98
|
+
return `Remove-Item ${quote(path)}`;
|
|
99
|
+
}
|
|
100
|
+
return `rm ${quote(path)}`;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Build a "remove this directory recursively" command.
|
|
105
|
+
*
|
|
106
|
+
* Windows:
|
|
107
|
+
* Remove-Item -Recurse -Force "C:\path\to\dir"
|
|
108
|
+
*
|
|
109
|
+
* POSIX:
|
|
110
|
+
* rm -rf "/path/to/dir"
|
|
111
|
+
*/
|
|
112
|
+
export function removeDir(path) {
|
|
113
|
+
if (IS_WINDOWS) {
|
|
114
|
+
return `Remove-Item -Recurse -Force ${quote(path)}`;
|
|
115
|
+
}
|
|
116
|
+
return `rm -rf ${quote(path)}`;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Build a "list directory contents" command.
|
|
121
|
+
*
|
|
122
|
+
* Windows:
|
|
123
|
+
* Get-ChildItem "C:\path\to\dir"
|
|
124
|
+
*
|
|
125
|
+
* POSIX:
|
|
126
|
+
* ls "/path/to/dir"
|
|
127
|
+
*/
|
|
128
|
+
export function listDir(path) {
|
|
129
|
+
if (IS_WINDOWS) {
|
|
130
|
+
return `Get-ChildItem ${quote(path)}`;
|
|
131
|
+
}
|
|
132
|
+
return `ls ${quote(path)}`;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Exported for the validator + tests to assert which platform the
|
|
136
|
+
// helper is currently emitting for. Useful for cross-platform CI
|
|
137
|
+
// matrices where the test asserts both halves.
|
|
138
|
+
export const PLATFORM = IS_WINDOWS ? 'win32' : 'posix';
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
// Poison_Guard — pre-write secret + injection filter (Task 24.5, T-021).
|
|
2
|
+
//
|
|
3
|
+
// The kit's last line of defense before any auto-extracted or
|
|
4
|
+
// user-explicit fact is written to a project-tier or user-tier file
|
|
5
|
+
// that may end up in git. False negatives = credentials in the repo.
|
|
6
|
+
// False positives = legitimate writes blocked. Pattern correctness
|
|
7
|
+
// has to be right (design §6.7).
|
|
8
|
+
//
|
|
9
|
+
// Public boundary:
|
|
10
|
+
// checkPoisonGuard(text) → {
|
|
11
|
+
// rejected: boolean,
|
|
12
|
+
// pattern_id: string | null, // category id; null if rejected:false
|
|
13
|
+
// redacted_excerpt: string, // safe-for-logging excerpt; masks matched text with ***
|
|
14
|
+
// }
|
|
15
|
+
//
|
|
16
|
+
// Pattern catalog per design §6.7. The catalog is intentionally
|
|
17
|
+
// conservative — see the design note "Why discoverability-only, not
|
|
18
|
+
// perfect prevention": the threat model is "accidental commit", not
|
|
19
|
+
// "active adversary in your repo." Regex catches the high-frequency
|
|
20
|
+
// mistakes; secret-scanners (gitleaks, trufflehog) are the second
|
|
21
|
+
// line of defense, not us.
|
|
22
|
+
//
|
|
23
|
+
// Redaction contract (security-load-bearing):
|
|
24
|
+
// - The matched secret/injection text MUST NEVER appear in
|
|
25
|
+
// redacted_excerpt in cleartext. The whole point of this module
|
|
26
|
+
// is to keep secrets out of logs. Every pattern in this catalog
|
|
27
|
+
// must produce a redacted excerpt that masks the match span
|
|
28
|
+
// with `***`. Unit tests pin this contract.
|
|
29
|
+
// - The excerpt is bounded in length (≤ ~200 chars) so a long
|
|
30
|
+
// pasted blob doesn't blow up the log line.
|
|
31
|
+
|
|
32
|
+
import {
|
|
33
|
+
appendFileSync,
|
|
34
|
+
existsSync,
|
|
35
|
+
mkdirSync,
|
|
36
|
+
} from 'node:fs';
|
|
37
|
+
import { join, dirname } from 'node:path';
|
|
38
|
+
|
|
39
|
+
// --- Pattern catalog -------------------------------------------------
|
|
40
|
+
// Each pattern is { id, re, category }. The id is the stable
|
|
41
|
+
// machine-parseable name that shows up in poison-guard.log NDJSON +
|
|
42
|
+
// extract.log error_category disambiguation. The re is the
|
|
43
|
+
// case-insensitive regex; category is 'secret' or 'injection' so the
|
|
44
|
+
// downstream categorizer can route into POISON_GUARD_CATEGORIES.
|
|
45
|
+
//
|
|
46
|
+
// Conservative-on-purpose. Adding a pattern is a write — adding a
|
|
47
|
+
// pattern that has false positives is a denial-of-service against
|
|
48
|
+
// legitimate user input. Each pattern should be vetted against
|
|
49
|
+
// realistic adversarial samples AND against realistic benign user
|
|
50
|
+
// content.
|
|
51
|
+
const SECRET_PATTERNS = [
|
|
52
|
+
// AWS access key id — fixed prefix (AKIA/ASIA/AGPA + various) + 16
|
|
53
|
+
// additional uppercase alphanumeric. The prefix is required to
|
|
54
|
+
// avoid matching arbitrary 20-char alphanumeric blobs.
|
|
55
|
+
{
|
|
56
|
+
id: 'secret_aws_access_key_id',
|
|
57
|
+
category: 'secret',
|
|
58
|
+
re: /\b(?:AKIA|ASIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASCA)[A-Z0-9]{16}\b/,
|
|
59
|
+
},
|
|
60
|
+
// AWS secret access key in a key=value shape. The value pattern is
|
|
61
|
+
// intentionally lenient (40-char Base64-ish) because real AWS secret
|
|
62
|
+
// keys are 40 chars of Base64. We require the `aws_secret` token
|
|
63
|
+
// nearby to gate on intent.
|
|
64
|
+
{
|
|
65
|
+
id: 'secret_aws_secret_access_key',
|
|
66
|
+
category: 'secret',
|
|
67
|
+
re: /(?:aws[_-]?secret[_-]?(?:access[_-]?)?key)[\s:=]+["']?[A-Za-z0-9/+=]{16,}/i,
|
|
68
|
+
},
|
|
69
|
+
// Generic api_key / secret / password / passwd / token / bearer
|
|
70
|
+
// in a key=value shape. 20-char minimum on the value catches
|
|
71
|
+
// realistic key shapes without flagging short test fixtures
|
|
72
|
+
// (api_key=abc123 → only 6 chars, skipped).
|
|
73
|
+
{
|
|
74
|
+
id: 'secret_generic_credential',
|
|
75
|
+
category: 'secret',
|
|
76
|
+
re: /(?:api[_-]?key|secret|password|passwd|token|bearer)[\s:=]+["']?[A-Za-z0-9_\-/+=]{20,}/i,
|
|
77
|
+
},
|
|
78
|
+
// PEM private key armor. The "RSA "/"EC "/"OPENSSH "/"PGP "
|
|
79
|
+
// variants are optional, so plain `-----BEGIN PRIVATE KEY-----`
|
|
80
|
+
// also matches.
|
|
81
|
+
{
|
|
82
|
+
id: 'secret_pem_private_key',
|
|
83
|
+
category: 'secret',
|
|
84
|
+
re: /-----BEGIN (?:RSA |EC |OPENSSH |PGP |DSA |ENCRYPTED )?PRIVATE KEY-----/,
|
|
85
|
+
},
|
|
86
|
+
// GitHub personal access token: ghp_ prefix + 36 alphanumeric.
|
|
87
|
+
// No trailing \b — adjacent alphanumeric padding (in tests; rare in
|
|
88
|
+
// real input but possible) shouldn't suppress detection. Leading \b
|
|
89
|
+
// is enough to prevent matching mid-identifier (e.g. "xghp_..." is
|
|
90
|
+
// not a token).
|
|
91
|
+
{
|
|
92
|
+
id: 'secret_github_pat',
|
|
93
|
+
category: 'secret',
|
|
94
|
+
re: /\bghp_[A-Za-z0-9]{36}/,
|
|
95
|
+
},
|
|
96
|
+
// OpenAI / Anthropic style keys. sk- prefix + optional ant-/proj-
|
|
97
|
+
// qualifier + ≥40 chars of alphanumeric/dash/underscore.
|
|
98
|
+
{
|
|
99
|
+
id: 'secret_openai_anthropic_key',
|
|
100
|
+
category: 'secret',
|
|
101
|
+
re: /\bsk-(?:ant-|proj-)?[A-Za-z0-9_-]{40,}/,
|
|
102
|
+
},
|
|
103
|
+
// Slack tokens: xoxb-/xoxp-/xoxs- prefix + 10+ alphanumeric/dash.
|
|
104
|
+
{
|
|
105
|
+
id: 'secret_slack_token',
|
|
106
|
+
category: 'secret',
|
|
107
|
+
re: /\bxox[bps]-[A-Za-z0-9-]{10,}/,
|
|
108
|
+
},
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
const INJECTION_PATTERNS = [
|
|
112
|
+
// "ignore (all|any|previous|prior)* (instructions|prompts|rules)"
|
|
113
|
+
// Qualifier words are zero-or-more so all of these match:
|
|
114
|
+
// "ignore instructions"
|
|
115
|
+
// "ignore previous instructions"
|
|
116
|
+
// "ignore all previous instructions" (two qualifiers stacked)
|
|
117
|
+
// "IGNORE ALL PREVIOUS INSTRUCTIONS" (case-insensitive)
|
|
118
|
+
// The earlier `?` form only allowed ONE qualifier and missed the
|
|
119
|
+
// most common phrasing.
|
|
120
|
+
{
|
|
121
|
+
id: 'injection_ignore_instructions',
|
|
122
|
+
category: 'injection',
|
|
123
|
+
re: /ignore (?:all |any |previous |prior )*(?:instructions?|prompts?|rules?)/i,
|
|
124
|
+
},
|
|
125
|
+
// "You are now [an AI role]" — role-override attempt.
|
|
126
|
+
// Earlier draft was `/you are now (?:a |an |the )?[A-Za-z]/i` which
|
|
127
|
+
// matched ANY sentence starting "you are now <word>" — including
|
|
128
|
+
// benign content like "you are now able to ship", "you are now
|
|
129
|
+
// blocked on the API", "you are now responsible for X". False
|
|
130
|
+
// positives there = denial-of-service on legitimate memory writes.
|
|
131
|
+
// Tightened to require an explicit role-impersonation noun, with
|
|
132
|
+
// the optional adjective slot still capturing "you are now a
|
|
133
|
+
// helpful pirate assistant" / "you are now a different agent".
|
|
134
|
+
{
|
|
135
|
+
id: 'injection_role_override',
|
|
136
|
+
category: 'injection',
|
|
137
|
+
re: /you are now (?:a |an |the )?(?:[a-z]+ ){0,3}(?:assistant|chatbot|ai|bot|pirate|agent|expert|persona|model|gpt|claude)/i,
|
|
138
|
+
},
|
|
139
|
+
// Fake role tags. Closing or opening <system> / <assistant> tag
|
|
140
|
+
// in user-supplied content suggests an injection attempt.
|
|
141
|
+
{
|
|
142
|
+
id: 'injection_fake_role_tag',
|
|
143
|
+
category: 'injection',
|
|
144
|
+
re: /<\/?(?:system|assistant)>/i,
|
|
145
|
+
},
|
|
146
|
+
// "disregard the above" — common injection lead-in.
|
|
147
|
+
{
|
|
148
|
+
id: 'injection_disregard_above',
|
|
149
|
+
category: 'injection',
|
|
150
|
+
re: /disregard the above/i,
|
|
151
|
+
},
|
|
152
|
+
];
|
|
153
|
+
|
|
154
|
+
const ALL_PATTERNS = [...SECRET_PATTERNS, ...INJECTION_PATTERNS];
|
|
155
|
+
|
|
156
|
+
// Frozen enum of pattern ids grouped by category. Callers import this
|
|
157
|
+
// to validate routing logic without depending on the internal pattern
|
|
158
|
+
// array order.
|
|
159
|
+
export const POISON_GUARD_CATEGORIES = Object.freeze({
|
|
160
|
+
SECRET_CATEGORIES: Object.freeze(SECRET_PATTERNS.map((p) => p.id)),
|
|
161
|
+
INJECTION_CATEGORIES: Object.freeze(INJECTION_PATTERNS.map((p) => p.id)),
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
// Redaction parameters. The excerpt window around the match should be
|
|
165
|
+
// small enough to fit in a log line but large enough to give a human
|
|
166
|
+
// auditor enough context to act on (e.g., recognize "that was the
|
|
167
|
+
// AWS_SECRET line in my terraform output").
|
|
168
|
+
const REDACTION_CONTEXT_CHARS = 30;
|
|
169
|
+
const REDACTION_MASK = '***';
|
|
170
|
+
|
|
171
|
+
function redactExcerpt(text, matchStart, matchLength) {
|
|
172
|
+
const ctxStart = Math.max(0, matchStart - REDACTION_CONTEXT_CHARS);
|
|
173
|
+
const ctxEnd = Math.min(
|
|
174
|
+
text.length,
|
|
175
|
+
matchStart + matchLength + REDACTION_CONTEXT_CHARS,
|
|
176
|
+
);
|
|
177
|
+
const before = text.slice(ctxStart, matchStart);
|
|
178
|
+
const after = text.slice(matchStart + matchLength, ctxEnd);
|
|
179
|
+
const prefix = ctxStart > 0 ? '...' : '';
|
|
180
|
+
const suffix = ctxEnd < text.length ? '...' : '';
|
|
181
|
+
return `${prefix}${before}${REDACTION_MASK}${after}${suffix}`;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function checkPoisonGuard(text) {
|
|
185
|
+
if (typeof text !== 'string') {
|
|
186
|
+
return {
|
|
187
|
+
rejected: true,
|
|
188
|
+
pattern_id: 'schema',
|
|
189
|
+
redacted_excerpt: '',
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
for (const { id, re } of ALL_PATTERNS) {
|
|
193
|
+
const m = text.match(re);
|
|
194
|
+
if (m) {
|
|
195
|
+
return {
|
|
196
|
+
rejected: true,
|
|
197
|
+
pattern_id: id,
|
|
198
|
+
redacted_excerpt: redactExcerpt(text, m.index, m[0].length),
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
return {
|
|
203
|
+
rejected: false,
|
|
204
|
+
pattern_id: null,
|
|
205
|
+
redacted_excerpt: '',
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// --- NDJSON logger (Task 24.6, design §6.7) -------------------------
|
|
210
|
+
//
|
|
211
|
+
// One line per rejection at <projectRoot>/context/.locks/
|
|
212
|
+
// poison-guard.log. Schema documented in design §6.7:
|
|
213
|
+
// {ts, pattern_id, source_file, source_line, action: "rejected",
|
|
214
|
+
// redacted_excerpt}
|
|
215
|
+
//
|
|
216
|
+
// The cleartext that triggered the rejection is INTENTIONALLY absent
|
|
217
|
+
// from this log line. The caller produces redacted_excerpt via
|
|
218
|
+
// checkPoisonGuard() and passes it in. Tests pin that no field
|
|
219
|
+
// named raw_text / unredacted / matched_text / original ever appears.
|
|
220
|
+
|
|
221
|
+
const POISON_GUARD_LOG_RELATIVE = ['context', '.locks', 'poison-guard.log'];
|
|
222
|
+
|
|
223
|
+
export function logPoisonGuardRejection({
|
|
224
|
+
projectRoot,
|
|
225
|
+
ts,
|
|
226
|
+
pattern_id,
|
|
227
|
+
source_file,
|
|
228
|
+
source_line,
|
|
229
|
+
redacted_excerpt,
|
|
230
|
+
} = {}) {
|
|
231
|
+
const logPath = join(projectRoot, ...POISON_GUARD_LOG_RELATIVE);
|
|
232
|
+
if (!existsSync(dirname(logPath))) {
|
|
233
|
+
mkdirSync(dirname(logPath), { recursive: true });
|
|
234
|
+
}
|
|
235
|
+
const entry = {
|
|
236
|
+
ts,
|
|
237
|
+
pattern_id,
|
|
238
|
+
source_file,
|
|
239
|
+
source_line,
|
|
240
|
+
action: 'rejected',
|
|
241
|
+
redacted_excerpt,
|
|
242
|
+
};
|
|
243
|
+
appendFileSync(logPath, JSON.stringify(entry) + '\n', 'utf8');
|
|
244
|
+
return logPath;
|
|
245
|
+
}
|