clawmem 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -4
- package/CLAUDE.md +5 -4
- package/README.md +12 -6
- package/SKILL.md +10 -3
- package/package.json +1 -1
- package/src/clawmem.ts +99 -0
- package/src/hooks/decision-extractor.ts +5 -1
- package/src/memory.ts +12 -3
- package/src/normalize.ts +390 -0
- package/src/observer.ts +9 -3
package/AGENTS.md
CHANGED
|
@@ -354,6 +354,7 @@ Pin, snooze, and forget are **manual MCP tools** — not automated. The agent sh
|
|
|
354
354
|
- Do NOT pin everything — pin is for persistent high-priority items, not temporary boosting.
|
|
355
355
|
- Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it naturally.
|
|
356
356
|
- Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically. Only after bulk ingestion or when `intent_search` returns weak graph results.
|
|
357
|
+
- Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command (same category as `update`/`reindex`). Suggest it to the user when they mention old conversation exports, but let them run it. Bulk import has disk/embedding cost implications that need user consent.
|
|
357
358
|
|
|
358
359
|
## Tool Selection (one-liner)
|
|
359
360
|
|
|
@@ -435,16 +436,16 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
|
|
|
435
436
|
|
|
436
437
|
| Content Type | Half-Life | Effect |
|
|
437
438
|
|--------------|-----------|--------|
|
|
438
|
-
| decision, hub | ∞ | Never decay |
|
|
439
|
+
| decision, preference, hub | ∞ | Never decay |
|
|
439
440
|
| antipattern | ∞ | Never decay — accumulated negative patterns persist |
|
|
440
441
|
| project | 120 days | Slow decay |
|
|
441
442
|
| research | 90 days | Moderate decay |
|
|
442
|
-
| note | 60 days | Default |
|
|
443
|
-
| progress | 45 days | Faster decay |
|
|
443
|
+
| problem, milestone, note | 60 days | Default |
|
|
444
|
+
| conversation, progress | 45 days | Faster decay |
|
|
444
445
|
| handoff | 30 days | Fast — recent matters most |
|
|
445
446
|
|
|
446
447
|
Half-lives extend up to 3× for frequently-accessed memories (access reinforcement decays over 90 days).
|
|
447
|
-
Attention decay: non-durable types (handoff, progress, note, project) lose 5% confidence per week without access. Decision/hub/research/antipattern are exempt.
|
|
448
|
+
Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/preference/hub/research/antipattern are exempt.
|
|
448
449
|
|
|
449
450
|
## Indexing & Graph Building
|
|
450
451
|
|
package/CLAUDE.md
CHANGED
|
@@ -354,6 +354,7 @@ Pin, snooze, and forget are **manual MCP tools** — not automated. The agent sh
|
|
|
354
354
|
- Do NOT pin everything — pin is for persistent high-priority items, not temporary boosting.
|
|
355
355
|
- Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it naturally.
|
|
356
356
|
- Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically. Only after bulk ingestion or when `intent_search` returns weak graph results.
|
|
357
|
+
- Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command (same category as `update`/`reindex`). Suggest it to the user when they mention old conversation exports, but let them run it. Bulk import has disk/embedding cost implications that need user consent.
|
|
357
358
|
|
|
358
359
|
## Tool Selection (one-liner)
|
|
359
360
|
|
|
@@ -435,16 +436,16 @@ compositeScore = (0.10 × searchScore + 0.70 × recencyScore + 0.20 × confidenc
|
|
|
435
436
|
|
|
436
437
|
| Content Type | Half-Life | Effect |
|
|
437
438
|
|--------------|-----------|--------|
|
|
438
|
-
| decision, hub | ∞ | Never decay |
|
|
439
|
+
| decision, preference, hub | ∞ | Never decay |
|
|
439
440
|
| antipattern | ∞ | Never decay — accumulated negative patterns persist |
|
|
440
441
|
| project | 120 days | Slow decay |
|
|
441
442
|
| research | 90 days | Moderate decay |
|
|
442
|
-
| note | 60 days | Default |
|
|
443
|
-
| progress | 45 days | Faster decay |
|
|
443
|
+
| problem, milestone, note | 60 days | Default |
|
|
444
|
+
| conversation, progress | 45 days | Faster decay |
|
|
444
445
|
| handoff | 30 days | Fast — recent matters most |
|
|
445
446
|
|
|
446
447
|
Half-lives extend up to 3× for frequently-accessed memories (access reinforcement decays over 90 days).
|
|
447
|
-
Attention decay: non-durable types (handoff, progress, note, project) lose 5% confidence per week without access. Decision/hub/research/antipattern are exempt.
|
|
448
|
+
Attention decay: non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access. Decision/preference/hub/research/antipattern are exempt.
|
|
448
449
|
|
|
449
450
|
## Indexing & Graph Building
|
|
450
451
|
|
package/README.md
CHANGED
|
@@ -18,7 +18,8 @@ ClawMem turns your markdown notes, project docs, and research dumps into persist
|
|
|
18
18
|
|
|
19
19
|
- **Surfaces relevant context** on every prompt (context-surfacing hook)
|
|
20
20
|
- **Bootstraps sessions** with your profile, latest handoff, recent decisions, and stale notes
|
|
21
|
-
- **Captures decisions** from session transcripts using a local GGUF observer model
|
|
21
|
+
- **Captures decisions, preferences, milestones, and problems** from session transcripts using a local GGUF observer model
|
|
22
|
+
- **Imports conversation exports** from Claude Code, ChatGPT, Claude.ai, Slack, and plain text via `clawmem mine`
|
|
22
23
|
- **Generates handoffs** at session end so the next session can pick up where you left off
|
|
23
24
|
- **Learns what matters** via a feedback loop that boosts referenced notes and decays unused ones
|
|
24
25
|
- **Guards against prompt injection** in surfaced content
|
|
@@ -643,6 +644,7 @@ clawmem collection list List collections
|
|
|
643
644
|
clawmem collection remove <name> Remove a collection
|
|
644
645
|
|
|
645
646
|
clawmem update [--pull] [--embed] Incremental re-scan
|
|
647
|
+
clawmem mine <dir> [-c name] [--embed] Import conversation exports (Claude, ChatGPT, Slack)
|
|
646
648
|
clawmem embed [-f] Generate fragment embeddings
|
|
647
649
|
clawmem reindex [--force] Full re-index
|
|
648
650
|
clawmem watch File watcher daemon
|
|
@@ -759,7 +761,7 @@ Hooks installed by `clawmem setup hooks`:
|
|
|
759
761
|
| `postcompact-inject` | SessionStart | Re-injects authoritative context after compaction: precompact state + recent decisions + antipatterns + vault context (1200 token budget) |
|
|
760
762
|
| `curator-nudge` | SessionStart | Surfaces curator report actions, nudges when report is stale (>7 days) |
|
|
761
763
|
| `precompact-extract` | PreCompact | Extracts decisions, file paths, open questions before auto-compaction → writes `precompact-state.md` to auto-memory |
|
|
762
|
-
| `decision-extractor` | Stop | GGUF observer extracts structured decisions, infers causal links, detects contradictions with prior decisions |
|
|
764
|
+
| `decision-extractor` | Stop | GGUF observer extracts structured observations (decisions, preferences, milestones, problems, bugfixes, features, refactors, discoveries), infers causal links, detects contradictions with prior decisions |
|
|
763
765
|
| `handoff-generator` | Stop | GGUF observer generates rich handoff, regex fallback |
|
|
764
766
|
| `feedback-loop` | Stop | Silently boosts referenced notes, decays unused ones, records co-activation + usage relations between co-referenced docs, tracks utility signals (surfaced vs referenced ratio for lifecycle automation) |
|
|
765
767
|
|
|
@@ -813,15 +815,19 @@ For WHY and ENTITY queries, the search pipeline expands results through the memo
|
|
|
813
815
|
| Type | Half-life | Baseline | Notes |
|
|
814
816
|
|---|---|---|---|
|
|
815
817
|
| `decision` | ∞ | 0.85 | Never decays |
|
|
818
|
+
| `preference` | ∞ | 0.80 | Never decays — user preferences are durable facts |
|
|
816
819
|
| `hub` | ∞ | 0.80 | Never decays |
|
|
820
|
+
| `antipattern` | ∞ | 0.75 | Never decays — accumulated negative patterns persist |
|
|
821
|
+
| `problem` | 60 days | 0.75 | High priority but resolves over time |
|
|
817
822
|
| `research` | 90 days | 0.70 | |
|
|
823
|
+
| `milestone` | 60 days | 0.70 | Important at the time, fades as project moves forward |
|
|
818
824
|
| `project` | 120 days | 0.65 | |
|
|
819
825
|
| `handoff` | 30 days | 0.60 | Fast decay — most recent matters |
|
|
826
|
+
| `conversation` | 45 days | 0.55 | Imported chat exchanges |
|
|
820
827
|
| `progress` | 45 days | 0.50 | |
|
|
821
828
|
| `note` | 60 days | 0.50 | Default |
|
|
822
|
-
| `antipattern` | ∞ | 0.75 | Never decays — accumulated negative patterns persist |
|
|
823
829
|
|
|
824
|
-
Content types are inferred from frontmatter or file path patterns. Half-lives extend up to 3× for frequently-accessed memories (access reinforcement, decays over 90 days). Non-durable types (handoff, progress, note, project) lose 5% confidence per week without access (attention decay). Decision/hub/research/antipattern are exempt.
|
|
830
|
+
Content types are inferred from frontmatter or file path patterns. Half-lives extend up to 3× for frequently-accessed memories (access reinforcement, decays over 90 days). Non-durable types (handoff, progress, conversation, note, project) lose 5% confidence per week without access (attention decay). Decision/preference/hub/research/antipattern are exempt.
|
|
825
831
|
|
|
826
832
|
**Quality scoring:** Each document gets a `quality_score` (0.0–1.0) computed during indexing based on length, structure (headings, lists), decision/correction keywords, and frontmatter richness. Applied as `qualityMultiplier = 0.7 + 0.6 × qualityScore` (range: 0.7× penalty to 1.3× boost).
|
|
827
833
|
|
|
@@ -868,7 +874,7 @@ Documents are split into semantic fragments (sections, lists, code blocks, front
|
|
|
868
874
|
|
|
869
875
|
### Local Observer Agent
|
|
870
876
|
|
|
871
|
-
Uses the LLM server (shared with query expansion and intent classification) to extract structured observations from session transcripts:
|
|
877
|
+
Uses the LLM server (shared with query expansion and intent classification) to extract structured observations from session transcripts. Observation types: `decision`, `bugfix`, `feature`, `refactor`, `discovery`, `change`, `preference`, `milestone`, `problem`. Each observation includes title, facts, narrative, concepts, and files read/modified. Preferences, milestones, and problems get first-class content_type treatment with dedicated confidence baselines and half-lives instead of being flattened to generic "observation". Falls back to regex patterns if the model is unavailable.
|
|
872
878
|
|
|
873
879
|
### User Profile
|
|
874
880
|
|
|
@@ -943,7 +949,7 @@ title: "Document Title"
|
|
|
943
949
|
tags: [tag1, tag2]
|
|
944
950
|
domain: "infrastructure"
|
|
945
951
|
workstream: "project-name"
|
|
946
|
-
content_type: "decision" # decision|hub|research|project|handoff|progress|note
|
|
952
|
+
content_type: "decision" # decision|preference|hub|research|project|handoff|conversation|progress|note
|
|
947
953
|
review_by: "2026-03-01"
|
|
948
954
|
---
|
|
949
955
|
```
|
package/SKILL.md
CHANGED
|
@@ -442,12 +442,12 @@ compositeScore = (0.10 x searchScore + 0.70 x recencyScore + 0.20 x confidenceSc
|
|
|
442
442
|
|
|
443
443
|
| Content Type | Half-Life | Effect |
|
|
444
444
|
|--------------|-----------|--------|
|
|
445
|
-
| decision, hub | infinity | Never decay |
|
|
445
|
+
| decision, preference, hub | infinity | Never decay |
|
|
446
446
|
| antipattern | infinity | Never decay — accumulated negative patterns persist |
|
|
447
447
|
| project | 120 days | Slow decay |
|
|
448
448
|
| research | 90 days | Moderate decay |
|
|
449
|
-
| note | 60 days | Default |
|
|
450
|
-
| progress | 45 days | Faster decay |
|
|
449
|
+
| problem, milestone, note | 60 days | Default |
|
|
450
|
+
| conversation, progress | 45 days | Faster decay |
|
|
451
451
|
| handoff | 30 days | Fast — recent matters most |
|
|
452
452
|
|
|
453
453
|
Half-lives extend up to 3x for frequently-accessed memories (access reinforcement decays over 90 days).
|
|
@@ -566,6 +566,7 @@ When `decision-extractor` detects a new decision contradicting an old one, the o
|
|
|
566
566
|
- Do NOT pin everything — pin is for persistent high-priority items.
|
|
567
567
|
- Do NOT forget memories to "clean up" — let confidence decay and contradiction detection handle it.
|
|
568
568
|
- Do NOT run `build_graphs` after every reindex — A-MEM creates per-doc links automatically.
|
|
569
|
+
- Do NOT run `clawmem mine` autonomously — it is a bulk ingestion command. Suggest it to the user when they mention old conversation exports, but let them run it.
|
|
569
570
|
|
|
570
571
|
---
|
|
571
572
|
|
|
@@ -657,6 +658,12 @@ Symptom: reindex --force crashes with UNIQUE constraint
|
|
|
657
658
|
-> Force deactivates rows but UNIQUE(collection, path) doesn't discriminate by active flag.
|
|
658
659
|
-> Fixed: indexer.ts reactivates inactive rows instead of inserting.
|
|
659
660
|
|
|
661
|
+
Symptom: `clawmem update` crashes with "Binding expected string, TypedArray, boolean, number, bigint or null"
|
|
662
|
+
-> YAML frontmatter values like `title: 2023-09-27` or `title: true` are coerced by gray-matter
|
|
663
|
+
into Date objects or booleans. Bun's SQLite driver rejects these as bind parameters.
|
|
664
|
+
-> Fixed v0.4.2: `parseDocument()` runtime-checks all frontmatter fields via `str()` helper.
|
|
665
|
+
-> Affects: title, domain, workstream, content_type, review_by.
|
|
666
|
+
|
|
660
667
|
Symptom: CLI reindex/update falls back to node-llama-cpp
|
|
661
668
|
-> GPU env vars only in systemd drop-in, not in wrapper script.
|
|
662
669
|
-> Fixed: bin/clawmem wrapper exports CLAWMEM_EMBED_URL/LLM_URL/RERANK_URL defaults.
|
package/package.json
CHANGED
package/src/clawmem.ts
CHANGED
|
@@ -235,6 +235,101 @@ async function cmdUpdate(args: string[]) {
|
|
|
235
235
|
}
|
|
236
236
|
}
|
|
237
237
|
|
|
238
|
+
async function cmdMine(args: string[]) {
|
|
239
|
+
const { values, positionals } = parseArgs({
|
|
240
|
+
args,
|
|
241
|
+
options: {
|
|
242
|
+
collection: { type: "string", short: "c" },
|
|
243
|
+
embed: { type: "boolean", default: false },
|
|
244
|
+
"dry-run": { type: "boolean", default: false },
|
|
245
|
+
},
|
|
246
|
+
allowPositionals: true,
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
const dir = positionals[0];
|
|
250
|
+
if (!dir) die("Usage: clawmem mine <directory> [-c collection-name] [--embed] [--dry-run]");
|
|
251
|
+
const absDir = pathResolve(dir);
|
|
252
|
+
if (!existsSync(absDir)) die(`Directory not found: ${absDir}`);
|
|
253
|
+
|
|
254
|
+
const { scanConversationDir, normalizeFile, chunkConversation } = await import("./normalize.ts");
|
|
255
|
+
|
|
256
|
+
console.log(`${c.cyan}Scanning for conversation files${c.reset} in ${absDir}`);
|
|
257
|
+
const files = scanConversationDir(absDir);
|
|
258
|
+
if (files.length === 0) die("No conversation files found (.json, .jsonl, .txt, .md)");
|
|
259
|
+
console.log(` Found ${files.length} candidate files`);
|
|
260
|
+
|
|
261
|
+
// Normalize and chunk
|
|
262
|
+
let totalChunks = 0;
|
|
263
|
+
let totalConversations = 0;
|
|
264
|
+
const allChunks: { title: string; body: string; sourcePath: string; chunkIndex: number }[] = [];
|
|
265
|
+
|
|
266
|
+
for (const file of files) {
|
|
267
|
+
const conv = normalizeFile(file);
|
|
268
|
+
if (!conv) continue;
|
|
269
|
+
totalConversations++;
|
|
270
|
+
|
|
271
|
+
const chunks = chunkConversation(conv);
|
|
272
|
+
if (chunks.length === 0) continue;
|
|
273
|
+
|
|
274
|
+
console.log(` ${c.green}✓${c.reset} ${conv.source} (${conv.format}, ${conv.messages.length} messages → ${chunks.length} chunks)`);
|
|
275
|
+
for (const chunk of chunks) {
|
|
276
|
+
chunk.sourcePath = file.replace(absDir + "/", "");
|
|
277
|
+
}
|
|
278
|
+
allChunks.push(...chunks);
|
|
279
|
+
totalChunks += chunks.length;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (totalConversations === 0) die("No conversation files could be parsed");
|
|
283
|
+
console.log(`\n${c.bold}Parsed:${c.reset} ${totalConversations} conversations → ${totalChunks} exchange chunks`);
|
|
284
|
+
|
|
285
|
+
if (values["dry-run"]) {
|
|
286
|
+
console.log(`${c.yellow}Dry run — no changes made${c.reset}`);
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// Write chunks as markdown to a staging directory (outside source tree), then index
|
|
291
|
+
const collectionName = values.collection || "conversations";
|
|
292
|
+
const { tmpdir } = await import("os");
|
|
293
|
+
const stagingDir = pathResolve(tmpdir(), `clawmem-mine-${Date.now()}`);
|
|
294
|
+
mkdirSync(stagingDir, { recursive: true });
|
|
295
|
+
|
|
296
|
+
const { rmSync } = await import("fs");
|
|
297
|
+
try {
|
|
298
|
+
const writePromises: Promise<number>[] = [];
|
|
299
|
+
for (const chunk of allChunks) {
|
|
300
|
+
const safeSource = chunk.sourcePath.replace(/[\/\\]/g, "_").replace(/\.[^.]+$/, "");
|
|
301
|
+
const filename = `${safeSource}_${String(chunk.chunkIndex).padStart(4, "0")}.md`;
|
|
302
|
+
const esc = (s: string) => s.replace(/"/g, '\\"');
|
|
303
|
+
const frontmatter = [
|
|
304
|
+
"---",
|
|
305
|
+
`title: "${esc(chunk.title)}"`,
|
|
306
|
+
`content_type: conversation`,
|
|
307
|
+
`source: "${esc(chunk.sourcePath)}"`,
|
|
308
|
+
"---",
|
|
309
|
+
"",
|
|
310
|
+
chunk.body,
|
|
311
|
+
].join("\n");
|
|
312
|
+
writePromises.push(Bun.write(pathResolve(stagingDir, filename), frontmatter));
|
|
313
|
+
}
|
|
314
|
+
await Promise.all(writePromises);
|
|
315
|
+
|
|
316
|
+
// Index through existing pipeline
|
|
317
|
+
const s = getStore();
|
|
318
|
+
console.log(`\n${c.cyan}Indexing ${totalChunks} conversation chunks${c.reset} as collection '${collectionName}'`);
|
|
319
|
+
const stats = await indexCollection(s, collectionName, stagingDir, "**/*.md");
|
|
320
|
+
console.log(` ${c.green}+${stats.added}${c.reset} added, ${c.yellow}~${stats.updated}${c.reset} updated, ${c.dim}=${stats.unchanged}${c.reset} unchanged`);
|
|
321
|
+
|
|
322
|
+
if (values.embed) {
|
|
323
|
+
console.log();
|
|
324
|
+
await cmdEmbed([]);
|
|
325
|
+
} else {
|
|
326
|
+
console.log(`\nRun ${c.cyan}clawmem embed${c.reset} to generate embeddings for the imported conversations`);
|
|
327
|
+
}
|
|
328
|
+
} finally {
|
|
329
|
+
rmSync(stagingDir, { recursive: true, force: true });
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
238
333
|
async function cmdEmbed(args: string[]) {
|
|
239
334
|
const { values } = parseArgs({
|
|
240
335
|
args,
|
|
@@ -1695,6 +1790,9 @@ async function main() {
|
|
|
1695
1790
|
case "update":
|
|
1696
1791
|
await cmdUpdate(subArgs);
|
|
1697
1792
|
break;
|
|
1793
|
+
case "mine":
|
|
1794
|
+
await cmdMine(subArgs);
|
|
1795
|
+
break;
|
|
1698
1796
|
case "embed":
|
|
1699
1797
|
await cmdEmbed(subArgs);
|
|
1700
1798
|
break;
|
|
@@ -2289,6 +2387,7 @@ ${c.bold}Setup:${c.reset}
|
|
|
2289
2387
|
|
|
2290
2388
|
${c.bold}Indexing:${c.reset}
|
|
2291
2389
|
clawmem update [--pull] [--embed] Re-scan collections (--embed auto-embeds)
|
|
2390
|
+
clawmem mine <dir> [-c name] [--embed] Import conversation exports (Claude, ChatGPT, Slack)
|
|
2292
2391
|
clawmem embed [-f] Generate fragment embeddings
|
|
2293
2392
|
clawmem reindex [--force] [--enrich] Full re-index (--enrich: run entity extraction + links on all docs)
|
|
2294
2393
|
clawmem watch File watcher daemon
|
|
@@ -335,7 +335,11 @@ export async function decisionExtractor(
|
|
|
335
335
|
const doc = store.findActiveDocument("_clawmem", obsPath);
|
|
336
336
|
if (doc) {
|
|
337
337
|
store.updateDocumentMeta(doc.id, {
|
|
338
|
-
content_type: obs.type === "decision" ? "decision"
|
|
338
|
+
content_type: obs.type === "decision" ? "decision"
|
|
339
|
+
: obs.type === "preference" ? "preference"
|
|
340
|
+
: obs.type === "milestone" ? "milestone"
|
|
341
|
+
: obs.type === "problem" ? "problem"
|
|
342
|
+
: "observation",
|
|
339
343
|
confidence: 0.80,
|
|
340
344
|
});
|
|
341
345
|
store.updateObservationFields(obsPath, "_clawmem", {
|
package/src/memory.ts
CHANGED
|
@@ -12,9 +12,13 @@
|
|
|
12
12
|
export const HALF_LIVES: Record<string, number> = {
|
|
13
13
|
handoff: 30,
|
|
14
14
|
progress: 45,
|
|
15
|
+
conversation: 45,
|
|
16
|
+
problem: 60,
|
|
17
|
+
milestone: 60,
|
|
15
18
|
note: 60,
|
|
16
19
|
research: 90,
|
|
17
20
|
project: 120,
|
|
21
|
+
preference: Infinity,
|
|
18
22
|
decision: Infinity,
|
|
19
23
|
hub: Infinity,
|
|
20
24
|
};
|
|
@@ -25,10 +29,14 @@ export const HALF_LIVES: Record<string, number> = {
|
|
|
25
29
|
|
|
26
30
|
export const TYPE_BASELINES: Record<string, number> = {
|
|
27
31
|
decision: 0.85,
|
|
32
|
+
preference: 0.80,
|
|
28
33
|
hub: 0.80,
|
|
34
|
+
problem: 0.75,
|
|
29
35
|
research: 0.70,
|
|
36
|
+
milestone: 0.70,
|
|
30
37
|
project: 0.65,
|
|
31
38
|
handoff: 0.60,
|
|
39
|
+
conversation: 0.55,
|
|
32
40
|
progress: 0.50,
|
|
33
41
|
note: 0.50,
|
|
34
42
|
};
|
|
@@ -37,7 +45,7 @@ export const TYPE_BASELINES: Record<string, number> = {
|
|
|
37
45
|
// Content Type Inference
|
|
38
46
|
// =============================================================================
|
|
39
47
|
|
|
40
|
-
export type ContentType = "decision" | "hub" | "research" | "project" | "handoff" | "progress" | "note";
|
|
48
|
+
export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
|
|
41
49
|
|
|
42
50
|
export function inferContentType(path: string, explicitType?: string): ContentType {
|
|
43
51
|
if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
|
|
@@ -48,6 +56,7 @@ export function inferContentType(path: string, explicitType?: string): ContentTy
|
|
|
48
56
|
if (lower.includes("research") || lower.includes("investigation") || lower.includes("analysis")) return "research";
|
|
49
57
|
if (lower.includes("project") || lower.includes("epic") || lower.includes("initiative")) return "project";
|
|
50
58
|
if (lower.includes("handoff") || lower.includes("handover") || lower.includes("session")) return "handoff";
|
|
59
|
+
if (lower.includes("conversation") || lower.includes("convo") || lower.includes("chat") || lower.includes("transcript")) return "conversation";
|
|
51
60
|
if (lower.includes("progress") || lower.includes("status") || lower.includes("standup") || lower.includes("changelog")) return "progress";
|
|
52
61
|
return "note";
|
|
53
62
|
}
|
|
@@ -65,7 +74,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
|
|
|
65
74
|
* - procedural: how-to, patterns, workflows (actionable)
|
|
66
75
|
*/
|
|
67
76
|
export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
|
|
68
|
-
if (["handoff", "progress"].includes(contentType)) return "episodic";
|
|
77
|
+
if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
|
|
69
78
|
if (["decision", "hub", "research"].includes(contentType)) return "semantic";
|
|
70
79
|
if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
|
|
71
80
|
if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
|
|
@@ -141,7 +150,7 @@ export function confidenceScore(
|
|
|
141
150
|
// Attention decay: reduce confidence if not accessed recently (5% per week)
|
|
142
151
|
// Only apply to episodic/progress content — skip for durable types (decision, hub, research)
|
|
143
152
|
// Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
|
|
144
|
-
const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern"]);
|
|
153
|
+
const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
|
|
145
154
|
let attentionDecay = 1.0;
|
|
146
155
|
if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
|
|
147
156
|
const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
|
package/src/normalize.ts
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* normalize.ts — Conversation format normalizer for ClawMem
|
|
3
|
+
*
|
|
4
|
+
* Converts chat export files into normalized markdown documents suitable for
|
|
5
|
+
* ClawMem's indexing pipeline. Supports:
|
|
6
|
+
* - Claude Code JSONL sessions
|
|
7
|
+
* - Claude.ai JSON exports (flat + privacy export)
|
|
8
|
+
* - ChatGPT conversations.json (mapping tree)
|
|
9
|
+
* - Slack JSON exports (DMs + channels)
|
|
10
|
+
* - Plain text with user/assistant markers
|
|
11
|
+
*
|
|
12
|
+
* Each exchange pair (user + assistant) becomes one markdown chunk.
|
|
13
|
+
* Inspired by MemPalace normalize.py, rewritten for TypeScript/Bun.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { readFileSync, readdirSync, statSync } from "fs";
|
|
17
|
+
import { basename, extname, join, relative } from "path";
|
|
18
|
+
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// Types
|
|
21
|
+
// =============================================================================
|
|
22
|
+
|
|
23
|
+
export type Message = { role: "user" | "assistant"; content: string };
|
|
24
|
+
|
|
25
|
+
export type NormalizedConversation = {
|
|
26
|
+
source: string; // original filename
|
|
27
|
+
format: string; // detected format
|
|
28
|
+
messages: Message[]; // normalized messages
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export type ConversationChunk = {
|
|
32
|
+
title: string; // "Exchange N" or extracted topic
|
|
33
|
+
body: string; // markdown body
|
|
34
|
+
sourcePath: string; // relative path of source file
|
|
35
|
+
chunkIndex: number;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// Format Detection & Normalization
|
|
40
|
+
// =============================================================================
|
|
41
|
+
|
|
42
|
+
const CONVO_EXTENSIONS = new Set([".txt", ".md", ".json", ".jsonl"]);
|
|
43
|
+
const SKIP_DIRS = new Set([".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build", ".next", ".mempalace", ".grepai", "tool-results"]);
|
|
44
|
+
|
|
45
|
+
export function normalizeFile(filepath: string): NormalizedConversation | null {
|
|
46
|
+
let content: string;
|
|
47
|
+
try {
|
|
48
|
+
content = readFileSync(filepath, "utf-8");
|
|
49
|
+
} catch {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (!content.trim()) return null;
|
|
54
|
+
|
|
55
|
+
const ext = extname(filepath).toLowerCase();
|
|
56
|
+
|
|
57
|
+
// Try JSONL formats first (Claude Code, Codex CLI)
|
|
58
|
+
if (ext === ".jsonl" || (content.trim().startsWith("{") && content.includes("\n{"))) {
|
|
59
|
+
const cc = tryClaudeCodeJsonl(content);
|
|
60
|
+
if (cc) return { source: basename(filepath), format: "claude-code", messages: cc };
|
|
61
|
+
|
|
62
|
+
const codex = tryCodexJsonl(content);
|
|
63
|
+
if (codex) return { source: basename(filepath), format: "codex-cli", messages: codex };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Try JSON formats
|
|
67
|
+
if (ext === ".json" || content.trim().startsWith("{") || content.trim().startsWith("[")) {
|
|
68
|
+
try {
|
|
69
|
+
const data = JSON.parse(content);
|
|
70
|
+
|
|
71
|
+
const claude = tryClaudeAiJson(data);
|
|
72
|
+
if (claude) return { source: basename(filepath), format: "claude-ai", messages: claude };
|
|
73
|
+
|
|
74
|
+
const chatgpt = tryChatGptJson(data);
|
|
75
|
+
if (chatgpt) return { source: basename(filepath), format: "chatgpt", messages: chatgpt };
|
|
76
|
+
|
|
77
|
+
const slack = trySlackJson(data);
|
|
78
|
+
if (slack) return { source: basename(filepath), format: "slack", messages: slack };
|
|
79
|
+
} catch {
|
|
80
|
+
// Not valid JSON
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Try plain text with user/assistant markers
|
|
85
|
+
const plain = tryPlainText(content);
|
|
86
|
+
if (plain) return { source: basename(filepath), format: "plain-text", messages: plain };
|
|
87
|
+
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// =============================================================================
|
|
92
|
+
// Format Parsers
|
|
93
|
+
// =============================================================================
|
|
94
|
+
|
|
95
|
+
function tryClaudeCodeJsonl(content: string): Message[] | null {
|
|
96
|
+
const lines = content.trim().split("\n").filter(l => l.trim());
|
|
97
|
+
const messages: Message[] = [];
|
|
98
|
+
|
|
99
|
+
for (const line of lines) {
|
|
100
|
+
let entry: any;
|
|
101
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
102
|
+
if (typeof entry !== "object" || !entry) continue;
|
|
103
|
+
|
|
104
|
+
const msgType = entry.type ?? "";
|
|
105
|
+
const message = entry.message ?? {};
|
|
106
|
+
|
|
107
|
+
if (msgType === "human" || msgType === "user") {
|
|
108
|
+
const text = extractContent(message.content);
|
|
109
|
+
if (text) messages.push({ role: "user", content: text });
|
|
110
|
+
} else if (msgType === "assistant") {
|
|
111
|
+
const text = extractContent(message.content);
|
|
112
|
+
if (text) messages.push({ role: "assistant", content: text });
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return messages.length >= 2 ? messages : null;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function tryCodexJsonl(content: string): Message[] | null {
|
|
120
|
+
const lines = content.trim().split("\n").filter(l => l.trim());
|
|
121
|
+
const messages: Message[] = [];
|
|
122
|
+
let hasSessionMeta = false;
|
|
123
|
+
|
|
124
|
+
for (const line of lines) {
|
|
125
|
+
let entry: any;
|
|
126
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
127
|
+
if (typeof entry !== "object" || !entry) continue;
|
|
128
|
+
|
|
129
|
+
if (entry.type === "session_meta") { hasSessionMeta = true; continue; }
|
|
130
|
+
if (entry.type !== "event_msg") continue;
|
|
131
|
+
|
|
132
|
+
const payload = entry.payload;
|
|
133
|
+
if (typeof payload !== "object" || !payload) continue;
|
|
134
|
+
|
|
135
|
+
const text = typeof payload.message === "string" ? payload.message.trim() : "";
|
|
136
|
+
if (!text) continue;
|
|
137
|
+
|
|
138
|
+
if (payload.type === "user_message") messages.push({ role: "user", content: text });
|
|
139
|
+
else if (payload.type === "agent_message") messages.push({ role: "assistant", content: text });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return messages.length >= 2 && hasSessionMeta ? messages : null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function tryClaudeAiJson(data: any): Message[] | null {
|
|
146
|
+
// Privacy export: array of conversation objects with chat_messages
|
|
147
|
+
if (Array.isArray(data) && data.length > 0 && data[0]?.chat_messages) {
|
|
148
|
+
const messages: Message[] = [];
|
|
149
|
+
for (const convo of data) {
|
|
150
|
+
for (const item of convo.chat_messages ?? []) {
|
|
151
|
+
const role = item.role ?? "";
|
|
152
|
+
const text = extractContent(item.content);
|
|
153
|
+
if ((role === "user" || role === "human") && text) messages.push({ role: "user", content: text });
|
|
154
|
+
else if ((role === "assistant" || role === "ai") && text) messages.push({ role: "assistant", content: text });
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return messages.length >= 2 ? messages : null;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Flat messages list or wrapped in { messages: [...] }
|
|
161
|
+
let msgs = data;
|
|
162
|
+
if (typeof data === "object" && !Array.isArray(data)) {
|
|
163
|
+
msgs = data.messages ?? data.chat_messages ?? [];
|
|
164
|
+
}
|
|
165
|
+
if (!Array.isArray(msgs)) return null;
|
|
166
|
+
|
|
167
|
+
const messages: Message[] = [];
|
|
168
|
+
for (const item of msgs) {
|
|
169
|
+
if (typeof item !== "object" || !item) continue;
|
|
170
|
+
const role = item.role ?? "";
|
|
171
|
+
const text = extractContent(item.content);
|
|
172
|
+
if ((role === "user" || role === "human") && text) messages.push({ role: "user", content: text });
|
|
173
|
+
else if ((role === "assistant" || role === "ai") && text) messages.push({ role: "assistant", content: text });
|
|
174
|
+
}
|
|
175
|
+
return messages.length >= 2 ? messages : null;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function tryChatGptJson(data: any): Message[] | null {
|
|
179
|
+
if (typeof data !== "object" || !data?.mapping) return null;
|
|
180
|
+
const mapping = data.mapping;
|
|
181
|
+
const messages: Message[] = [];
|
|
182
|
+
|
|
183
|
+
// Find root node (parent=null, no message)
|
|
184
|
+
let rootId: string | null = null;
|
|
185
|
+
let fallback: string | null = null;
|
|
186
|
+
for (const [nodeId, node] of Object.entries(mapping) as [string, any][]) {
|
|
187
|
+
if (node.parent === null) {
|
|
188
|
+
if (!node.message) { rootId = nodeId; break; }
|
|
189
|
+
else if (!fallback) fallback = nodeId;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
rootId = rootId ?? fallback;
|
|
193
|
+
if (!rootId) return null;
|
|
194
|
+
|
|
195
|
+
// Walk the tree
|
|
196
|
+
let currentId: string | null = rootId;
|
|
197
|
+
const visited = new Set<string>();
|
|
198
|
+
while (currentId && !visited.has(currentId)) {
|
|
199
|
+
visited.add(currentId);
|
|
200
|
+
const node = (mapping as any)[currentId];
|
|
201
|
+
if (node?.message) {
|
|
202
|
+
const role = node.message.author?.role ?? "";
|
|
203
|
+
const content = node.message.content;
|
|
204
|
+
const parts = content?.parts ?? [];
|
|
205
|
+
const text = parts.filter((p: any) => typeof p === "string").join(" ").trim();
|
|
206
|
+
if (role === "user" && text) messages.push({ role: "user", content: text });
|
|
207
|
+
else if (role === "assistant" && text) messages.push({ role: "assistant", content: text });
|
|
208
|
+
}
|
|
209
|
+
currentId = node?.children?.[0] ?? null;
|
|
210
|
+
}
|
|
211
|
+
return messages.length >= 2 ? messages : null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function trySlackJson(data: any): Message[] | null {
|
|
215
|
+
if (!Array.isArray(data)) return null;
|
|
216
|
+
|
|
217
|
+
// Count unique speakers — only support 2-party DMs
|
|
218
|
+
const speakers = new Set<string>();
|
|
219
|
+
for (const item of data) {
|
|
220
|
+
if (typeof item !== "object" || item?.type !== "message") continue;
|
|
221
|
+
const userId = item.user ?? item.username ?? "";
|
|
222
|
+
if (userId) speakers.add(userId);
|
|
223
|
+
if (speakers.size > 2) return null; // multi-person channel, unsupported
|
|
224
|
+
}
|
|
225
|
+
if (speakers.size < 2) return null;
|
|
226
|
+
|
|
227
|
+
const messages: Message[] = [];
|
|
228
|
+
const speakerList = [...speakers];
|
|
229
|
+
const roleMap: Record<string, "user" | "assistant"> = {
|
|
230
|
+
[speakerList[0]]: "user",
|
|
231
|
+
[speakerList[1]]: "assistant",
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
for (const item of data) {
|
|
235
|
+
if (typeof item !== "object" || item?.type !== "message") continue;
|
|
236
|
+
const userId = item.user ?? item.username ?? "";
|
|
237
|
+
const text = (item.text ?? "").trim();
|
|
238
|
+
if (!text || !roleMap[userId]) continue;
|
|
239
|
+
messages.push({ role: roleMap[userId], content: text });
|
|
240
|
+
}
|
|
241
|
+
return messages.length >= 2 ? messages : null;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function tryPlainText(content: string): Message[] | null {
|
|
245
|
+
const messages: Message[] = [];
|
|
246
|
+
// Only match explicit role prefixes (User:, Human:, Assistant:, etc.)
|
|
247
|
+
// Do NOT match bare blockquotes (> ) — too many false positives with markdown
|
|
248
|
+
const lines = content.split("\n");
|
|
249
|
+
let currentRole: "user" | "assistant" | null = null;
|
|
250
|
+
let currentText: string[] = [];
|
|
251
|
+
|
|
252
|
+
for (const line of lines) {
|
|
253
|
+
const trimmed = line.trim();
|
|
254
|
+
let newRole: "user" | "assistant" | null = null;
|
|
255
|
+
|
|
256
|
+
if (/^(User|Human)\s*:\s*/i.test(trimmed)) {
|
|
257
|
+
newRole = "user";
|
|
258
|
+
} else if (/^(Assistant|AI|Claude|GPT|Bot)\s*:\s*/i.test(trimmed)) {
|
|
259
|
+
newRole = "assistant";
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if (newRole) {
|
|
263
|
+
if (currentRole && currentText.length > 0) {
|
|
264
|
+
const text = currentText.join("\n").trim();
|
|
265
|
+
if (text) messages.push({ role: currentRole, content: text });
|
|
266
|
+
}
|
|
267
|
+
currentRole = newRole;
|
|
268
|
+
// Strip the role prefix
|
|
269
|
+
const cleaned = trimmed.replace(/^(User|Human|Assistant|AI|Claude|GPT|Bot)\s*:\s*/i, "");
|
|
270
|
+
currentText = cleaned ? [cleaned] : [];
|
|
271
|
+
} else if (currentRole) {
|
|
272
|
+
currentText.push(trimmed);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Flush last
|
|
277
|
+
if (currentRole && currentText.length > 0) {
|
|
278
|
+
const text = currentText.join("\n").trim();
|
|
279
|
+
if (text) messages.push({ role: currentRole, content: text });
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Require at least 2 exchanges AND both roles present (prevents false positives)
|
|
283
|
+
const hasUser = messages.some(m => m.role === "user");
|
|
284
|
+
const hasAssistant = messages.some(m => m.role === "assistant");
|
|
285
|
+
return messages.length >= 4 && hasUser && hasAssistant ? messages : null;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// =============================================================================
|
|
289
|
+
// Content Extraction
|
|
290
|
+
// =============================================================================
|
|
291
|
+
|
|
292
|
+
function extractContent(content: any): string {
|
|
293
|
+
if (typeof content === "string") return content.trim();
|
|
294
|
+
if (Array.isArray(content)) {
|
|
295
|
+
return content
|
|
296
|
+
.map(item => {
|
|
297
|
+
if (typeof item === "string") return item;
|
|
298
|
+
if (typeof item === "object" && item?.type === "text") return item.text ?? "";
|
|
299
|
+
return "";
|
|
300
|
+
})
|
|
301
|
+
.join(" ")
|
|
302
|
+
.trim();
|
|
303
|
+
}
|
|
304
|
+
if (typeof content === "object" && content) return (content.text ?? "").trim();
|
|
305
|
+
return "";
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// =============================================================================
|
|
309
|
+
// Chunking — Exchange Pairs
|
|
310
|
+
// =============================================================================
|
|
311
|
+
|
|
312
|
+
const MIN_CHUNK_CHARS = 30;
|
|
313
|
+
|
|
314
|
+
export function chunkConversation(conv: NormalizedConversation): ConversationChunk[] {
|
|
315
|
+
const chunks: ConversationChunk[] = [];
|
|
316
|
+
const { messages, source } = conv;
|
|
317
|
+
|
|
318
|
+
for (let i = 0; i < messages.length; i++) {
|
|
319
|
+
if (messages[i].role !== "user") continue;
|
|
320
|
+
|
|
321
|
+
const userMsg = messages[i].content;
|
|
322
|
+
// Collect ALL consecutive assistant messages (handles split replies)
|
|
323
|
+
const assistantParts: string[] = [];
|
|
324
|
+
while (i + 1 < messages.length && messages[i + 1].role === "assistant") {
|
|
325
|
+
assistantParts.push(messages[i + 1].content);
|
|
326
|
+
i++;
|
|
327
|
+
}
|
|
328
|
+
const assistantMsg = assistantParts.join("\n\n");
|
|
329
|
+
|
|
330
|
+
// Build markdown chunk
|
|
331
|
+
const title = extractExchangeTitle(userMsg, chunks.length + 1);
|
|
332
|
+
const body = formatExchangeMarkdown(userMsg, assistantMsg);
|
|
333
|
+
|
|
334
|
+
if (body.length >= MIN_CHUNK_CHARS) {
|
|
335
|
+
chunks.push({
|
|
336
|
+
title,
|
|
337
|
+
body,
|
|
338
|
+
sourcePath: source,
|
|
339
|
+
chunkIndex: chunks.length,
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return chunks;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function extractExchangeTitle(userMessage: string, index: number): string {
|
|
348
|
+
// Use the first line/sentence of the user message, capped at 80 chars
|
|
349
|
+
const firstLine = userMessage.split("\n")[0].trim();
|
|
350
|
+
if (firstLine.length <= 80) return firstLine;
|
|
351
|
+
return firstLine.slice(0, 77) + "...";
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function formatExchangeMarkdown(userMsg: string, assistantMsg: string): string {
|
|
355
|
+
const lines: string[] = [];
|
|
356
|
+
lines.push("**User:**", userMsg, "");
|
|
357
|
+
if (assistantMsg) {
|
|
358
|
+
lines.push("**Assistant:**", assistantMsg, "");
|
|
359
|
+
}
|
|
360
|
+
return lines.join("\n");
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// =============================================================================
|
|
364
|
+
// Directory Scanner
|
|
365
|
+
// =============================================================================
|
|
366
|
+
|
|
367
|
+
export function scanConversationDir(dir: string): string[] {
|
|
368
|
+
const files: string[] = [];
|
|
369
|
+
|
|
370
|
+
function walk(d: string) {
|
|
371
|
+
let entries: string[];
|
|
372
|
+
try { entries = readdirSync(d); } catch { return; }
|
|
373
|
+
|
|
374
|
+
for (const entry of entries) {
|
|
375
|
+
const fullPath = join(d, entry);
|
|
376
|
+
try {
|
|
377
|
+
const stat = statSync(fullPath);
|
|
378
|
+
if (stat.isDirectory()) {
|
|
379
|
+
if (!SKIP_DIRS.has(entry)) walk(fullPath);
|
|
380
|
+
} else if (stat.isFile()) {
|
|
381
|
+
const ext = extname(entry).toLowerCase();
|
|
382
|
+
if (CONVO_EXTENSIONS.has(ext)) files.push(fullPath);
|
|
383
|
+
}
|
|
384
|
+
} catch { continue; }
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
walk(dir);
|
|
389
|
+
return files;
|
|
390
|
+
}
|
package/src/observer.ts
CHANGED
|
@@ -15,7 +15,7 @@ import { MAX_LLM_GENERATE_TIMEOUT_MS } from "./limits.ts";
|
|
|
15
15
|
// =============================================================================
|
|
16
16
|
|
|
17
17
|
export type Observation = {
|
|
18
|
-
type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change";
|
|
18
|
+
type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change" | "preference" | "milestone" | "problem";
|
|
19
19
|
title: string;
|
|
20
20
|
facts: string[];
|
|
21
21
|
narrative: string;
|
|
@@ -51,7 +51,7 @@ const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding sessio
|
|
|
51
51
|
For each significant action, decision, or discovery, output an <observation> XML element.
|
|
52
52
|
|
|
53
53
|
<observation>
|
|
54
|
-
<type>one of: decision, bugfix, feature, refactor, discovery, change</type>
|
|
54
|
+
<type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
|
|
55
55
|
<title>Brief descriptive title (max 80 chars)</title>
|
|
56
56
|
<facts>
|
|
57
57
|
<fact>Individual atomic fact</fact>
|
|
@@ -69,7 +69,12 @@ Rules:
|
|
|
69
69
|
- Each fact should be a standalone, atomic piece of information
|
|
70
70
|
- The narrative should explain WHY something was done, not just WHAT
|
|
71
71
|
- Only include files that were explicitly mentioned in the transcript
|
|
72
|
-
- If no significant observations, output nothing
|
|
72
|
+
- If no significant observations, output nothing
|
|
73
|
+
|
|
74
|
+
Type guidance:
|
|
75
|
+
- preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
|
|
76
|
+
- milestone: significant completion point, version release, deployment, or phase transition
|
|
77
|
+
- problem: persistent issue, recurring bug, architectural limitation, or unresolved blocker`;
|
|
73
78
|
|
|
74
79
|
const SUMMARY_SYSTEM_PROMPT = `You are a session summarizer. Analyze this coding session transcript and output a structured summary.
|
|
75
80
|
|
|
@@ -118,6 +123,7 @@ function prepareTranscript(messages: TranscriptMessage[]): string {
|
|
|
118
123
|
|
|
119
124
|
const VALID_OBSERVATION_TYPES = new Set([
|
|
120
125
|
"decision", "bugfix", "feature", "refactor", "discovery", "change",
|
|
126
|
+
"preference", "milestone", "problem",
|
|
121
127
|
]);
|
|
122
128
|
|
|
123
129
|
const VALID_CONCEPTS = new Set([
|