clawmem 0.4.2 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +24 -6
- package/CLAUDE.md +24 -6
- package/README.md +27 -12
- package/SKILL.md +23 -5
- package/package.json +1 -1
- package/src/clawmem.ts +197 -0
- package/src/hooks/decision-extractor.ts +97 -1
- package/src/hooks/session-bootstrap.ts +84 -29
- package/src/llm.ts +120 -16
- package/src/mcp.ts +148 -0
- package/src/memory.ts +12 -3
- package/src/normalize.ts +390 -0
- package/src/observer.ts +9 -3
- package/src/store.ts +120 -1
package/src/memory.ts
CHANGED
|
@@ -12,9 +12,13 @@
|
|
|
12
12
|
export const HALF_LIVES: Record<string, number> = {
|
|
13
13
|
handoff: 30,
|
|
14
14
|
progress: 45,
|
|
15
|
+
conversation: 45,
|
|
16
|
+
problem: 60,
|
|
17
|
+
milestone: 60,
|
|
15
18
|
note: 60,
|
|
16
19
|
research: 90,
|
|
17
20
|
project: 120,
|
|
21
|
+
preference: Infinity,
|
|
18
22
|
decision: Infinity,
|
|
19
23
|
hub: Infinity,
|
|
20
24
|
};
|
|
@@ -25,10 +29,14 @@ export const HALF_LIVES: Record<string, number> = {
|
|
|
25
29
|
|
|
26
30
|
export const TYPE_BASELINES: Record<string, number> = {
|
|
27
31
|
decision: 0.85,
|
|
32
|
+
preference: 0.80,
|
|
28
33
|
hub: 0.80,
|
|
34
|
+
problem: 0.75,
|
|
29
35
|
research: 0.70,
|
|
36
|
+
milestone: 0.70,
|
|
30
37
|
project: 0.65,
|
|
31
38
|
handoff: 0.60,
|
|
39
|
+
conversation: 0.55,
|
|
32
40
|
progress: 0.50,
|
|
33
41
|
note: 0.50,
|
|
34
42
|
};
|
|
@@ -37,7 +45,7 @@ export const TYPE_BASELINES: Record<string, number> = {
|
|
|
37
45
|
// Content Type Inference
|
|
38
46
|
// =============================================================================
|
|
39
47
|
|
|
40
|
-
export type ContentType = "decision" | "hub" | "research" | "project" | "handoff" | "progress" | "note";
|
|
48
|
+
export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
|
|
41
49
|
|
|
42
50
|
export function inferContentType(path: string, explicitType?: string): ContentType {
|
|
43
51
|
if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
|
|
@@ -48,6 +56,7 @@ export function inferContentType(path: string, explicitType?: string): ContentTy
|
|
|
48
56
|
if (lower.includes("research") || lower.includes("investigation") || lower.includes("analysis")) return "research";
|
|
49
57
|
if (lower.includes("project") || lower.includes("epic") || lower.includes("initiative")) return "project";
|
|
50
58
|
if (lower.includes("handoff") || lower.includes("handover") || lower.includes("session")) return "handoff";
|
|
59
|
+
if (lower.includes("conversation") || lower.includes("convo") || lower.includes("chat") || lower.includes("transcript")) return "conversation";
|
|
51
60
|
if (lower.includes("progress") || lower.includes("status") || lower.includes("standup") || lower.includes("changelog")) return "progress";
|
|
52
61
|
return "note";
|
|
53
62
|
}
|
|
@@ -65,7 +74,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
|
|
|
65
74
|
* - procedural: how-to, patterns, workflows (actionable)
|
|
66
75
|
*/
|
|
67
76
|
export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
|
|
68
|
-
if (["handoff", "progress"].includes(contentType)) return "episodic";
|
|
77
|
+
if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
|
|
69
78
|
if (["decision", "hub", "research"].includes(contentType)) return "semantic";
|
|
70
79
|
if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
|
|
71
80
|
if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
|
|
@@ -141,7 +150,7 @@ export function confidenceScore(
|
|
|
141
150
|
// Attention decay: reduce confidence if not accessed recently (5% per week)
|
|
142
151
|
// Only apply to episodic/progress content — skip for durable types (decision, hub, research)
|
|
143
152
|
// Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
|
|
144
|
-
const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern"]);
|
|
153
|
+
const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
|
|
145
154
|
let attentionDecay = 1.0;
|
|
146
155
|
if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
|
|
147
156
|
const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
|
package/src/normalize.ts
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* normalize.ts — Conversation format normalizer for ClawMem
|
|
3
|
+
*
|
|
4
|
+
* Converts chat export files into normalized markdown documents suitable for
|
|
5
|
+
* ClawMem's indexing pipeline. Supports:
|
|
6
|
+
* - Claude Code JSONL sessions
|
|
7
|
+
* - Claude.ai JSON exports (flat + privacy export)
|
|
8
|
+
* - ChatGPT conversations.json (mapping tree)
|
|
9
|
+
* - Slack JSON exports (DMs + channels)
|
|
10
|
+
* - Plain text with user/assistant markers
|
|
11
|
+
*
|
|
12
|
+
* Each exchange pair (user + assistant) becomes one markdown chunk.
|
|
13
|
+
* Inspired by MemPalace normalize.py, rewritten for TypeScript/Bun.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { readFileSync, readdirSync, statSync } from "fs";
|
|
17
|
+
import { basename, extname, join, relative } from "path";
|
|
18
|
+
|
|
19
|
+
// =============================================================================
|
|
20
|
+
// Types
|
|
21
|
+
// =============================================================================
|
|
22
|
+
|
|
23
|
+
export type Message = { role: "user" | "assistant"; content: string };
|
|
24
|
+
|
|
25
|
+
export type NormalizedConversation = {
|
|
26
|
+
source: string; // original filename
|
|
27
|
+
format: string; // detected format
|
|
28
|
+
messages: Message[]; // normalized messages
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export type ConversationChunk = {
|
|
32
|
+
title: string; // "Exchange N" or extracted topic
|
|
33
|
+
body: string; // markdown body
|
|
34
|
+
sourcePath: string; // relative path of source file
|
|
35
|
+
chunkIndex: number;
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
// =============================================================================
|
|
39
|
+
// Format Detection & Normalization
|
|
40
|
+
// =============================================================================
|
|
41
|
+
|
|
42
|
+
const CONVO_EXTENSIONS = new Set([".txt", ".md", ".json", ".jsonl"]);
|
|
43
|
+
const SKIP_DIRS = new Set([".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build", ".next", ".mempalace", ".grepai", "tool-results"]);
|
|
44
|
+
|
|
45
|
+
export function normalizeFile(filepath: string): NormalizedConversation | null {
|
|
46
|
+
let content: string;
|
|
47
|
+
try {
|
|
48
|
+
content = readFileSync(filepath, "utf-8");
|
|
49
|
+
} catch {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
if (!content.trim()) return null;
|
|
54
|
+
|
|
55
|
+
const ext = extname(filepath).toLowerCase();
|
|
56
|
+
|
|
57
|
+
// Try JSONL formats first (Claude Code, Codex CLI)
|
|
58
|
+
if (ext === ".jsonl" || (content.trim().startsWith("{") && content.includes("\n{"))) {
|
|
59
|
+
const cc = tryClaudeCodeJsonl(content);
|
|
60
|
+
if (cc) return { source: basename(filepath), format: "claude-code", messages: cc };
|
|
61
|
+
|
|
62
|
+
const codex = tryCodexJsonl(content);
|
|
63
|
+
if (codex) return { source: basename(filepath), format: "codex-cli", messages: codex };
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Try JSON formats
|
|
67
|
+
if (ext === ".json" || content.trim().startsWith("{") || content.trim().startsWith("[")) {
|
|
68
|
+
try {
|
|
69
|
+
const data = JSON.parse(content);
|
|
70
|
+
|
|
71
|
+
const claude = tryClaudeAiJson(data);
|
|
72
|
+
if (claude) return { source: basename(filepath), format: "claude-ai", messages: claude };
|
|
73
|
+
|
|
74
|
+
const chatgpt = tryChatGptJson(data);
|
|
75
|
+
if (chatgpt) return { source: basename(filepath), format: "chatgpt", messages: chatgpt };
|
|
76
|
+
|
|
77
|
+
const slack = trySlackJson(data);
|
|
78
|
+
if (slack) return { source: basename(filepath), format: "slack", messages: slack };
|
|
79
|
+
} catch {
|
|
80
|
+
// Not valid JSON
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Try plain text with user/assistant markers
|
|
85
|
+
const plain = tryPlainText(content);
|
|
86
|
+
if (plain) return { source: basename(filepath), format: "plain-text", messages: plain };
|
|
87
|
+
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// =============================================================================
|
|
92
|
+
// Format Parsers
|
|
93
|
+
// =============================================================================
|
|
94
|
+
|
|
95
|
+
function tryClaudeCodeJsonl(content: string): Message[] | null {
|
|
96
|
+
const lines = content.trim().split("\n").filter(l => l.trim());
|
|
97
|
+
const messages: Message[] = [];
|
|
98
|
+
|
|
99
|
+
for (const line of lines) {
|
|
100
|
+
let entry: any;
|
|
101
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
102
|
+
if (typeof entry !== "object" || !entry) continue;
|
|
103
|
+
|
|
104
|
+
const msgType = entry.type ?? "";
|
|
105
|
+
const message = entry.message ?? {};
|
|
106
|
+
|
|
107
|
+
if (msgType === "human" || msgType === "user") {
|
|
108
|
+
const text = extractContent(message.content);
|
|
109
|
+
if (text) messages.push({ role: "user", content: text });
|
|
110
|
+
} else if (msgType === "assistant") {
|
|
111
|
+
const text = extractContent(message.content);
|
|
112
|
+
if (text) messages.push({ role: "assistant", content: text });
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return messages.length >= 2 ? messages : null;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function tryCodexJsonl(content: string): Message[] | null {
|
|
120
|
+
const lines = content.trim().split("\n").filter(l => l.trim());
|
|
121
|
+
const messages: Message[] = [];
|
|
122
|
+
let hasSessionMeta = false;
|
|
123
|
+
|
|
124
|
+
for (const line of lines) {
|
|
125
|
+
let entry: any;
|
|
126
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
127
|
+
if (typeof entry !== "object" || !entry) continue;
|
|
128
|
+
|
|
129
|
+
if (entry.type === "session_meta") { hasSessionMeta = true; continue; }
|
|
130
|
+
if (entry.type !== "event_msg") continue;
|
|
131
|
+
|
|
132
|
+
const payload = entry.payload;
|
|
133
|
+
if (typeof payload !== "object" || !payload) continue;
|
|
134
|
+
|
|
135
|
+
const text = typeof payload.message === "string" ? payload.message.trim() : "";
|
|
136
|
+
if (!text) continue;
|
|
137
|
+
|
|
138
|
+
if (payload.type === "user_message") messages.push({ role: "user", content: text });
|
|
139
|
+
else if (payload.type === "agent_message") messages.push({ role: "assistant", content: text });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return messages.length >= 2 && hasSessionMeta ? messages : null;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function tryClaudeAiJson(data: any): Message[] | null {
|
|
146
|
+
// Privacy export: array of conversation objects with chat_messages
|
|
147
|
+
if (Array.isArray(data) && data.length > 0 && data[0]?.chat_messages) {
|
|
148
|
+
const messages: Message[] = [];
|
|
149
|
+
for (const convo of data) {
|
|
150
|
+
for (const item of convo.chat_messages ?? []) {
|
|
151
|
+
const role = item.role ?? "";
|
|
152
|
+
const text = extractContent(item.content);
|
|
153
|
+
if ((role === "user" || role === "human") && text) messages.push({ role: "user", content: text });
|
|
154
|
+
else if ((role === "assistant" || role === "ai") && text) messages.push({ role: "assistant", content: text });
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
return messages.length >= 2 ? messages : null;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Flat messages list or wrapped in { messages: [...] }
|
|
161
|
+
let msgs = data;
|
|
162
|
+
if (typeof data === "object" && !Array.isArray(data)) {
|
|
163
|
+
msgs = data.messages ?? data.chat_messages ?? [];
|
|
164
|
+
}
|
|
165
|
+
if (!Array.isArray(msgs)) return null;
|
|
166
|
+
|
|
167
|
+
const messages: Message[] = [];
|
|
168
|
+
for (const item of msgs) {
|
|
169
|
+
if (typeof item !== "object" || !item) continue;
|
|
170
|
+
const role = item.role ?? "";
|
|
171
|
+
const text = extractContent(item.content);
|
|
172
|
+
if ((role === "user" || role === "human") && text) messages.push({ role: "user", content: text });
|
|
173
|
+
else if ((role === "assistant" || role === "ai") && text) messages.push({ role: "assistant", content: text });
|
|
174
|
+
}
|
|
175
|
+
return messages.length >= 2 ? messages : null;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function tryChatGptJson(data: any): Message[] | null {
|
|
179
|
+
if (typeof data !== "object" || !data?.mapping) return null;
|
|
180
|
+
const mapping = data.mapping;
|
|
181
|
+
const messages: Message[] = [];
|
|
182
|
+
|
|
183
|
+
// Find root node (parent=null, no message)
|
|
184
|
+
let rootId: string | null = null;
|
|
185
|
+
let fallback: string | null = null;
|
|
186
|
+
for (const [nodeId, node] of Object.entries(mapping) as [string, any][]) {
|
|
187
|
+
if (node.parent === null) {
|
|
188
|
+
if (!node.message) { rootId = nodeId; break; }
|
|
189
|
+
else if (!fallback) fallback = nodeId;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
rootId = rootId ?? fallback;
|
|
193
|
+
if (!rootId) return null;
|
|
194
|
+
|
|
195
|
+
// Walk the tree
|
|
196
|
+
let currentId: string | null = rootId;
|
|
197
|
+
const visited = new Set<string>();
|
|
198
|
+
while (currentId && !visited.has(currentId)) {
|
|
199
|
+
visited.add(currentId);
|
|
200
|
+
const node = (mapping as any)[currentId];
|
|
201
|
+
if (node?.message) {
|
|
202
|
+
const role = node.message.author?.role ?? "";
|
|
203
|
+
const content = node.message.content;
|
|
204
|
+
const parts = content?.parts ?? [];
|
|
205
|
+
const text = parts.filter((p: any) => typeof p === "string").join(" ").trim();
|
|
206
|
+
if (role === "user" && text) messages.push({ role: "user", content: text });
|
|
207
|
+
else if (role === "assistant" && text) messages.push({ role: "assistant", content: text });
|
|
208
|
+
}
|
|
209
|
+
currentId = node?.children?.[0] ?? null;
|
|
210
|
+
}
|
|
211
|
+
return messages.length >= 2 ? messages : null;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function trySlackJson(data: any): Message[] | null {
|
|
215
|
+
if (!Array.isArray(data)) return null;
|
|
216
|
+
|
|
217
|
+
// Count unique speakers — only support 2-party DMs
|
|
218
|
+
const speakers = new Set<string>();
|
|
219
|
+
for (const item of data) {
|
|
220
|
+
if (typeof item !== "object" || item?.type !== "message") continue;
|
|
221
|
+
const userId = item.user ?? item.username ?? "";
|
|
222
|
+
if (userId) speakers.add(userId);
|
|
223
|
+
if (speakers.size > 2) return null; // multi-person channel, unsupported
|
|
224
|
+
}
|
|
225
|
+
if (speakers.size < 2) return null;
|
|
226
|
+
|
|
227
|
+
const messages: Message[] = [];
|
|
228
|
+
const speakerList = [...speakers];
|
|
229
|
+
const roleMap: Record<string, "user" | "assistant"> = {
|
|
230
|
+
[speakerList[0]]: "user",
|
|
231
|
+
[speakerList[1]]: "assistant",
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
for (const item of data) {
|
|
235
|
+
if (typeof item !== "object" || item?.type !== "message") continue;
|
|
236
|
+
const userId = item.user ?? item.username ?? "";
|
|
237
|
+
const text = (item.text ?? "").trim();
|
|
238
|
+
if (!text || !roleMap[userId]) continue;
|
|
239
|
+
messages.push({ role: roleMap[userId], content: text });
|
|
240
|
+
}
|
|
241
|
+
return messages.length >= 2 ? messages : null;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function tryPlainText(content: string): Message[] | null {
|
|
245
|
+
const messages: Message[] = [];
|
|
246
|
+
// Only match explicit role prefixes (User:, Human:, Assistant:, etc.)
|
|
247
|
+
// Do NOT match bare blockquotes (> ) — too many false positives with markdown
|
|
248
|
+
const lines = content.split("\n");
|
|
249
|
+
let currentRole: "user" | "assistant" | null = null;
|
|
250
|
+
let currentText: string[] = [];
|
|
251
|
+
|
|
252
|
+
for (const line of lines) {
|
|
253
|
+
const trimmed = line.trim();
|
|
254
|
+
let newRole: "user" | "assistant" | null = null;
|
|
255
|
+
|
|
256
|
+
if (/^(User|Human)\s*:\s*/i.test(trimmed)) {
|
|
257
|
+
newRole = "user";
|
|
258
|
+
} else if (/^(Assistant|AI|Claude|GPT|Bot)\s*:\s*/i.test(trimmed)) {
|
|
259
|
+
newRole = "assistant";
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
if (newRole) {
|
|
263
|
+
if (currentRole && currentText.length > 0) {
|
|
264
|
+
const text = currentText.join("\n").trim();
|
|
265
|
+
if (text) messages.push({ role: currentRole, content: text });
|
|
266
|
+
}
|
|
267
|
+
currentRole = newRole;
|
|
268
|
+
// Strip the role prefix
|
|
269
|
+
const cleaned = trimmed.replace(/^(User|Human|Assistant|AI|Claude|GPT|Bot)\s*:\s*/i, "");
|
|
270
|
+
currentText = cleaned ? [cleaned] : [];
|
|
271
|
+
} else if (currentRole) {
|
|
272
|
+
currentText.push(trimmed);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Flush last
|
|
277
|
+
if (currentRole && currentText.length > 0) {
|
|
278
|
+
const text = currentText.join("\n").trim();
|
|
279
|
+
if (text) messages.push({ role: currentRole, content: text });
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Require at least 2 exchanges AND both roles present (prevents false positives)
|
|
283
|
+
const hasUser = messages.some(m => m.role === "user");
|
|
284
|
+
const hasAssistant = messages.some(m => m.role === "assistant");
|
|
285
|
+
return messages.length >= 4 && hasUser && hasAssistant ? messages : null;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// =============================================================================
|
|
289
|
+
// Content Extraction
|
|
290
|
+
// =============================================================================
|
|
291
|
+
|
|
292
|
+
function extractContent(content: any): string {
|
|
293
|
+
if (typeof content === "string") return content.trim();
|
|
294
|
+
if (Array.isArray(content)) {
|
|
295
|
+
return content
|
|
296
|
+
.map(item => {
|
|
297
|
+
if (typeof item === "string") return item;
|
|
298
|
+
if (typeof item === "object" && item?.type === "text") return item.text ?? "";
|
|
299
|
+
return "";
|
|
300
|
+
})
|
|
301
|
+
.join(" ")
|
|
302
|
+
.trim();
|
|
303
|
+
}
|
|
304
|
+
if (typeof content === "object" && content) return (content.text ?? "").trim();
|
|
305
|
+
return "";
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// =============================================================================
|
|
309
|
+
// Chunking — Exchange Pairs
|
|
310
|
+
// =============================================================================
|
|
311
|
+
|
|
312
|
+
const MIN_CHUNK_CHARS = 30;
|
|
313
|
+
|
|
314
|
+
export function chunkConversation(conv: NormalizedConversation): ConversationChunk[] {
|
|
315
|
+
const chunks: ConversationChunk[] = [];
|
|
316
|
+
const { messages, source } = conv;
|
|
317
|
+
|
|
318
|
+
for (let i = 0; i < messages.length; i++) {
|
|
319
|
+
if (messages[i].role !== "user") continue;
|
|
320
|
+
|
|
321
|
+
const userMsg = messages[i].content;
|
|
322
|
+
// Collect ALL consecutive assistant messages (handles split replies)
|
|
323
|
+
const assistantParts: string[] = [];
|
|
324
|
+
while (i + 1 < messages.length && messages[i + 1].role === "assistant") {
|
|
325
|
+
assistantParts.push(messages[i + 1].content);
|
|
326
|
+
i++;
|
|
327
|
+
}
|
|
328
|
+
const assistantMsg = assistantParts.join("\n\n");
|
|
329
|
+
|
|
330
|
+
// Build markdown chunk
|
|
331
|
+
const title = extractExchangeTitle(userMsg, chunks.length + 1);
|
|
332
|
+
const body = formatExchangeMarkdown(userMsg, assistantMsg);
|
|
333
|
+
|
|
334
|
+
if (body.length >= MIN_CHUNK_CHARS) {
|
|
335
|
+
chunks.push({
|
|
336
|
+
title,
|
|
337
|
+
body,
|
|
338
|
+
sourcePath: source,
|
|
339
|
+
chunkIndex: chunks.length,
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return chunks;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function extractExchangeTitle(userMessage: string, index: number): string {
|
|
348
|
+
// Use the first line/sentence of the user message, capped at 80 chars
|
|
349
|
+
const firstLine = userMessage.split("\n")[0].trim();
|
|
350
|
+
if (firstLine.length <= 80) return firstLine;
|
|
351
|
+
return firstLine.slice(0, 77) + "...";
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
function formatExchangeMarkdown(userMsg: string, assistantMsg: string): string {
|
|
355
|
+
const lines: string[] = [];
|
|
356
|
+
lines.push("**User:**", userMsg, "");
|
|
357
|
+
if (assistantMsg) {
|
|
358
|
+
lines.push("**Assistant:**", assistantMsg, "");
|
|
359
|
+
}
|
|
360
|
+
return lines.join("\n");
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
// =============================================================================
|
|
364
|
+
// Directory Scanner
|
|
365
|
+
// =============================================================================
|
|
366
|
+
|
|
367
|
+
export function scanConversationDir(dir: string): string[] {
|
|
368
|
+
const files: string[] = [];
|
|
369
|
+
|
|
370
|
+
function walk(d: string) {
|
|
371
|
+
let entries: string[];
|
|
372
|
+
try { entries = readdirSync(d); } catch { return; }
|
|
373
|
+
|
|
374
|
+
for (const entry of entries) {
|
|
375
|
+
const fullPath = join(d, entry);
|
|
376
|
+
try {
|
|
377
|
+
const stat = statSync(fullPath);
|
|
378
|
+
if (stat.isDirectory()) {
|
|
379
|
+
if (!SKIP_DIRS.has(entry)) walk(fullPath);
|
|
380
|
+
} else if (stat.isFile()) {
|
|
381
|
+
const ext = extname(entry).toLowerCase();
|
|
382
|
+
if (CONVO_EXTENSIONS.has(ext)) files.push(fullPath);
|
|
383
|
+
}
|
|
384
|
+
} catch { continue; }
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
walk(dir);
|
|
389
|
+
return files;
|
|
390
|
+
}
|
package/src/observer.ts
CHANGED
|
@@ -15,7 +15,7 @@ import { MAX_LLM_GENERATE_TIMEOUT_MS } from "./limits.ts";
|
|
|
15
15
|
// =============================================================================
|
|
16
16
|
|
|
17
17
|
export type Observation = {
|
|
18
|
-
type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change";
|
|
18
|
+
type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change" | "preference" | "milestone" | "problem";
|
|
19
19
|
title: string;
|
|
20
20
|
facts: string[];
|
|
21
21
|
narrative: string;
|
|
@@ -51,7 +51,7 @@ const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding sessio
|
|
|
51
51
|
For each significant action, decision, or discovery, output an <observation> XML element.
|
|
52
52
|
|
|
53
53
|
<observation>
|
|
54
|
-
<type>one of: decision, bugfix, feature, refactor, discovery, change</type>
|
|
54
|
+
<type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
|
|
55
55
|
<title>Brief descriptive title (max 80 chars)</title>
|
|
56
56
|
<facts>
|
|
57
57
|
<fact>Individual atomic fact</fact>
|
|
@@ -69,7 +69,12 @@ Rules:
|
|
|
69
69
|
- Each fact should be a standalone, atomic piece of information
|
|
70
70
|
- The narrative should explain WHY something was done, not just WHAT
|
|
71
71
|
- Only include files that were explicitly mentioned in the transcript
|
|
72
|
-
- If no significant observations, output nothing
|
|
72
|
+
- If no significant observations, output nothing
|
|
73
|
+
|
|
74
|
+
Type guidance:
|
|
75
|
+
- preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
|
|
76
|
+
- milestone: significant completion point, version release, deployment, or phase transition
|
|
77
|
+
- problem: persistent issue, recurring bug, architectural limitation, or unresolved blocker`;
|
|
73
78
|
|
|
74
79
|
const SUMMARY_SYSTEM_PROMPT = `You are a session summarizer. Analyze this coding session transcript and output a structured summary.
|
|
75
80
|
|
|
@@ -118,6 +123,7 @@ function prepareTranscript(messages: TranscriptMessage[]): string {
|
|
|
118
123
|
|
|
119
124
|
const VALID_OBSERVATION_TYPES = new Set([
|
|
120
125
|
"decision", "bugfix", "feature", "refactor", "discovery", "change",
|
|
126
|
+
"preference", "milestone", "problem",
|
|
121
127
|
]);
|
|
122
128
|
|
|
123
129
|
const VALID_CONCEPTS = new Set([
|
package/src/store.ts
CHANGED
|
@@ -708,6 +708,31 @@ function initializeDatabase(db: Database): void {
|
|
|
708
708
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_a ON entity_cooccurrences(entity_a)`);
|
|
709
709
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_b ON entity_cooccurrences(entity_b)`);
|
|
710
710
|
|
|
711
|
+
// SPO knowledge graph: temporal entity-relationship triples
|
|
712
|
+
db.exec(`
|
|
713
|
+
CREATE TABLE IF NOT EXISTS entity_triples (
|
|
714
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
715
|
+
subject_entity_id TEXT NOT NULL,
|
|
716
|
+
predicate TEXT NOT NULL,
|
|
717
|
+
object_entity_id TEXT,
|
|
718
|
+
object_literal TEXT,
|
|
719
|
+
valid_from TEXT,
|
|
720
|
+
valid_to TEXT,
|
|
721
|
+
confidence REAL DEFAULT 1.0,
|
|
722
|
+
source_doc_id INTEGER,
|
|
723
|
+
source_fact TEXT,
|
|
724
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
725
|
+
FOREIGN KEY (subject_entity_id) REFERENCES entity_nodes(entity_id),
|
|
726
|
+
FOREIGN KEY (object_entity_id) REFERENCES entity_nodes(entity_id),
|
|
727
|
+
FOREIGN KEY (source_doc_id) REFERENCES documents(id)
|
|
728
|
+
)
|
|
729
|
+
`);
|
|
730
|
+
|
|
731
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_subject ON entity_triples(subject_entity_id)`);
|
|
732
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_object ON entity_triples(object_entity_id)`);
|
|
733
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_predicate ON entity_triples(predicate)`);
|
|
734
|
+
db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_valid ON entity_triples(valid_from, valid_to)`);
|
|
735
|
+
|
|
711
736
|
// Entity FTS5 for fuzzy name lookup
|
|
712
737
|
db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(entity_id, name, entity_type)`);
|
|
713
738
|
|
|
@@ -904,6 +929,12 @@ export type Store = {
|
|
|
904
929
|
searchEntities: (query: string, limit?: number) => { entity_id: string; name: string; type: string; mention_count: number; cooccurrence_count: number }[];
|
|
905
930
|
getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => { docId: number; score: number; viaEntity: string }[];
|
|
906
931
|
|
|
932
|
+
// SPO knowledge graph
|
|
933
|
+
addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => number;
|
|
934
|
+
invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => number;
|
|
935
|
+
queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
|
|
936
|
+
getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
|
|
937
|
+
|
|
907
938
|
// Co-activation tracking
|
|
908
939
|
recordCoActivation: (paths: string[]) => void;
|
|
909
940
|
getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
|
|
@@ -1070,6 +1101,93 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
|
|
|
1070
1101
|
searchEntities: (query: string, limit?: number) => searchEntities(db, query, limit),
|
|
1071
1102
|
getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => getEntityGraphNeighbors(db, seedDocIds, limit),
|
|
1072
1103
|
|
|
1104
|
+
// SPO knowledge graph
|
|
1105
|
+
addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => {
|
|
1106
|
+
const pred = predicate.toLowerCase().replace(/\s+/g, "_");
|
|
1107
|
+
const now = new Date().toISOString();
|
|
1108
|
+
const objClause = objectEntityId
|
|
1109
|
+
? "object_entity_id = ? AND object_literal IS NULL"
|
|
1110
|
+
: "object_entity_id IS NULL AND object_literal = ?";
|
|
1111
|
+
const objParam = objectEntityId ?? objectLiteral;
|
|
1112
|
+
const existing = db.prepare(
|
|
1113
|
+
`SELECT id FROM entity_triples WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
|
|
1114
|
+
).get(subjectEntityId, pred, objParam) as { id: number } | null;
|
|
1115
|
+
if (existing) return existing.id;
|
|
1116
|
+
|
|
1117
|
+
const result = db.prepare(`
|
|
1118
|
+
INSERT INTO entity_triples (subject_entity_id, predicate, object_entity_id, object_literal, valid_from, valid_to, confidence, source_doc_id, source_fact, created_at)
|
|
1119
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
1120
|
+
`).run(
|
|
1121
|
+
subjectEntityId, pred, objectEntityId, objectLiteral,
|
|
1122
|
+
options?.validFrom ?? null, options?.validTo ?? null,
|
|
1123
|
+
options?.confidence ?? 1.0, options?.sourceDocId ?? null,
|
|
1124
|
+
options?.sourceFact ?? null, now
|
|
1125
|
+
);
|
|
1126
|
+
return Number(result.lastInsertRowid);
|
|
1127
|
+
},
|
|
1128
|
+
|
|
1129
|
+
invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => {
|
|
1130
|
+
const pred = predicate.toLowerCase().replace(/\s+/g, "_");
|
|
1131
|
+
const ended = endedDate || new Date().toISOString().slice(0, 10);
|
|
1132
|
+
const objClause = objectEntityId
|
|
1133
|
+
? "object_entity_id = ? AND object_literal IS NULL"
|
|
1134
|
+
: "object_entity_id IS NULL AND object_literal = ?";
|
|
1135
|
+
const objParam = objectEntityId ?? objectLiteral;
|
|
1136
|
+
const result = db.prepare(
|
|
1137
|
+
`UPDATE entity_triples SET valid_to = ? WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
|
|
1138
|
+
).run(ended, subjectEntityId, pred, objParam);
|
|
1139
|
+
return result.changes;
|
|
1140
|
+
},
|
|
1141
|
+
|
|
1142
|
+
queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => {
|
|
1143
|
+
const direction = options?.direction ?? "both";
|
|
1144
|
+
const asOf = options?.asOf;
|
|
1145
|
+
const results: { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[] = [];
|
|
1146
|
+
|
|
1147
|
+
if (direction === "outgoing" || direction === "both") {
|
|
1148
|
+
let query = `SELECT t.id, t.predicate, t.object_entity_id, t.object_literal, t.valid_from, t.valid_to, t.confidence,
|
|
1149
|
+
COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
|
|
1150
|
+
FROM entity_triples t
|
|
1151
|
+
LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
|
|
1152
|
+
LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
|
|
1153
|
+
WHERE t.subject_entity_id = ?`;
|
|
1154
|
+
const params: any[] = [entityId];
|
|
1155
|
+
if (asOf) {
|
|
1156
|
+
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
|
|
1157
|
+
params.push(asOf, asOf);
|
|
1158
|
+
}
|
|
1159
|
+
for (const row of db.prepare(query).all(...params) as any[]) {
|
|
1160
|
+
results.push({ id: row.id, direction: "outgoing", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
if (direction === "incoming" || direction === "both") {
|
|
1165
|
+
let query = `SELECT t.id, t.predicate, t.valid_from, t.valid_to, t.confidence,
|
|
1166
|
+
COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
|
|
1167
|
+
FROM entity_triples t
|
|
1168
|
+
LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
|
|
1169
|
+
LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
|
|
1170
|
+
WHERE t.object_entity_id = ?`;
|
|
1171
|
+
const params: any[] = [entityId];
|
|
1172
|
+
if (asOf) {
|
|
1173
|
+
query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
|
|
1174
|
+
params.push(asOf, asOf);
|
|
1175
|
+
}
|
|
1176
|
+
for (const row of db.prepare(query).all(...params) as any[]) {
|
|
1177
|
+
results.push({ id: row.id, direction: "incoming", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
|
|
1178
|
+
}
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
return results;
|
|
1182
|
+
},
|
|
1183
|
+
|
|
1184
|
+
getTripleStats: () => {
|
|
1185
|
+
const total = (db.prepare("SELECT COUNT(*) as n FROM entity_triples").get() as any).n;
|
|
1186
|
+
const current = (db.prepare("SELECT COUNT(*) as n FROM entity_triples WHERE valid_to IS NULL").get() as any).n;
|
|
1187
|
+
const predicates = db.prepare("SELECT DISTINCT predicate FROM entity_triples ORDER BY predicate").all().map((r: any) => r.predicate);
|
|
1188
|
+
return { totalTriples: total, currentFacts: current, expiredFacts: total - current, predicateTypes: predicates };
|
|
1189
|
+
},
|
|
1190
|
+
|
|
1073
1191
|
// Co-activation tracking
|
|
1074
1192
|
recordCoActivation: (paths: string[]) => {
|
|
1075
1193
|
if (paths.length < 2) return;
|
|
@@ -1333,6 +1451,7 @@ export type DocumentRow = {
|
|
|
1333
1451
|
confidence: number;
|
|
1334
1452
|
accessCount: number;
|
|
1335
1453
|
bodyLength: number;
|
|
1454
|
+
pinned: number;
|
|
1336
1455
|
};
|
|
1337
1456
|
|
|
1338
1457
|
// =============================================================================
|
|
@@ -3560,7 +3679,7 @@ function getDocumentsByTypeFn(db: Database, contentType: string, limit: number =
|
|
|
3560
3679
|
SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at as modifiedAt,
|
|
3561
3680
|
d.domain, d.workstream, d.tags, d.content_type as contentType,
|
|
3562
3681
|
d.review_by as reviewBy, d.confidence, d.access_count as accessCount,
|
|
3563
|
-
LENGTH(c.doc) as bodyLength
|
|
3682
|
+
LENGTH(c.doc) as bodyLength, d.pinned
|
|
3564
3683
|
FROM documents d
|
|
3565
3684
|
JOIN content c ON c.hash = d.hash
|
|
3566
3685
|
WHERE d.active = 1 AND d.content_type = ?
|