clawmem 0.4.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/memory.ts CHANGED
@@ -12,9 +12,13 @@
12
12
  export const HALF_LIVES: Record<string, number> = {
13
13
  handoff: 30,
14
14
  progress: 45,
15
+ conversation: 45,
16
+ problem: 60,
17
+ milestone: 60,
15
18
  note: 60,
16
19
  research: 90,
17
20
  project: 120,
21
+ preference: Infinity,
18
22
  decision: Infinity,
19
23
  hub: Infinity,
20
24
  };
@@ -25,10 +29,14 @@ export const HALF_LIVES: Record<string, number> = {
25
29
 
26
30
  export const TYPE_BASELINES: Record<string, number> = {
27
31
  decision: 0.85,
32
+ preference: 0.80,
28
33
  hub: 0.80,
34
+ problem: 0.75,
29
35
  research: 0.70,
36
+ milestone: 0.70,
30
37
  project: 0.65,
31
38
  handoff: 0.60,
39
+ conversation: 0.55,
32
40
  progress: 0.50,
33
41
  note: 0.50,
34
42
  };
@@ -37,7 +45,7 @@ export const TYPE_BASELINES: Record<string, number> = {
37
45
  // Content Type Inference
38
46
  // =============================================================================
39
47
 
40
- export type ContentType = "decision" | "hub" | "research" | "project" | "handoff" | "progress" | "note";
48
+ export type ContentType = "decision" | "preference" | "hub" | "research" | "project" | "handoff" | "conversation" | "progress" | "milestone" | "problem" | "note";
41
49
 
42
50
  export function inferContentType(path: string, explicitType?: string): ContentType {
43
51
  if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
@@ -48,6 +56,7 @@ export function inferContentType(path: string, explicitType?: string): ContentTy
48
56
  if (lower.includes("research") || lower.includes("investigation") || lower.includes("analysis")) return "research";
49
57
  if (lower.includes("project") || lower.includes("epic") || lower.includes("initiative")) return "project";
50
58
  if (lower.includes("handoff") || lower.includes("handover") || lower.includes("session")) return "handoff";
59
+ if (lower.includes("conversation") || lower.includes("convo") || lower.includes("chat") || lower.includes("transcript")) return "conversation";
51
60
  if (lower.includes("progress") || lower.includes("status") || lower.includes("standup") || lower.includes("changelog")) return "progress";
52
61
  return "note";
53
62
  }
@@ -65,7 +74,7 @@ export type MemoryType = "episodic" | "semantic" | "procedural";
65
74
  * - procedural: how-to, patterns, workflows (actionable)
66
75
  */
67
76
  export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
68
- if (["handoff", "progress"].includes(contentType)) return "episodic";
77
+ if (["handoff", "progress", "conversation"].includes(contentType)) return "episodic";
69
78
  if (["decision", "hub", "research"].includes(contentType)) return "semantic";
70
79
  if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
71
80
  if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
@@ -141,7 +150,7 @@ export function confidenceScore(
141
150
  // Attention decay: reduce confidence if not accessed recently (5% per week)
142
151
  // Only apply to episodic/progress content — skip for durable types (decision, hub, research)
143
152
  // Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
144
- const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern"]);
153
+ const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern", "preference"]);
145
154
  let attentionDecay = 1.0;
146
155
  if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
147
156
  const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
@@ -0,0 +1,390 @@
1
+ /**
2
+ * normalize.ts — Conversation format normalizer for ClawMem
3
+ *
4
+ * Converts chat export files into normalized markdown documents suitable for
5
+ * ClawMem's indexing pipeline. Supports:
6
+ * - Claude Code JSONL sessions
7
+ * - Claude.ai JSON exports (flat + privacy export)
8
+ * - ChatGPT conversations.json (mapping tree)
9
+ * - Slack JSON exports (DMs + channels)
10
+ * - Plain text with user/assistant markers
11
+ *
12
+ * Each exchange pair (user + assistant) becomes one markdown chunk.
13
+ * Inspired by MemPalace normalize.py, rewritten for TypeScript/Bun.
14
+ */
15
+
16
+ import { readFileSync, readdirSync, statSync } from "fs";
17
+ import { basename, extname, join, relative } from "path";
18
+
19
+ // =============================================================================
20
+ // Types
21
+ // =============================================================================
22
+
23
+ export type Message = { role: "user" | "assistant"; content: string };
24
+
25
+ export type NormalizedConversation = {
26
+ source: string; // original filename
27
+ format: string; // detected format
28
+ messages: Message[]; // normalized messages
29
+ };
30
+
31
+ export type ConversationChunk = {
32
+ title: string; // "Exchange N" or extracted topic
33
+ body: string; // markdown body
34
+ sourcePath: string; // relative path of source file
35
+ chunkIndex: number;
36
+ };
37
+
38
+ // =============================================================================
39
+ // Format Detection & Normalization
40
+ // =============================================================================
41
+
42
+ const CONVO_EXTENSIONS = new Set([".txt", ".md", ".json", ".jsonl"]);
43
+ const SKIP_DIRS = new Set([".git", "node_modules", "__pycache__", ".venv", "venv", "dist", "build", ".next", ".mempalace", ".grepai", "tool-results"]);
44
+
45
+ export function normalizeFile(filepath: string): NormalizedConversation | null {
46
+ let content: string;
47
+ try {
48
+ content = readFileSync(filepath, "utf-8");
49
+ } catch {
50
+ return null;
51
+ }
52
+
53
+ if (!content.trim()) return null;
54
+
55
+ const ext = extname(filepath).toLowerCase();
56
+
57
+ // Try JSONL formats first (Claude Code, Codex CLI)
58
+ if (ext === ".jsonl" || (content.trim().startsWith("{") && content.includes("\n{"))) {
59
+ const cc = tryClaudeCodeJsonl(content);
60
+ if (cc) return { source: basename(filepath), format: "claude-code", messages: cc };
61
+
62
+ const codex = tryCodexJsonl(content);
63
+ if (codex) return { source: basename(filepath), format: "codex-cli", messages: codex };
64
+ }
65
+
66
+ // Try JSON formats
67
+ if (ext === ".json" || content.trim().startsWith("{") || content.trim().startsWith("[")) {
68
+ try {
69
+ const data = JSON.parse(content);
70
+
71
+ const claude = tryClaudeAiJson(data);
72
+ if (claude) return { source: basename(filepath), format: "claude-ai", messages: claude };
73
+
74
+ const chatgpt = tryChatGptJson(data);
75
+ if (chatgpt) return { source: basename(filepath), format: "chatgpt", messages: chatgpt };
76
+
77
+ const slack = trySlackJson(data);
78
+ if (slack) return { source: basename(filepath), format: "slack", messages: slack };
79
+ } catch {
80
+ // Not valid JSON
81
+ }
82
+ }
83
+
84
+ // Try plain text with user/assistant markers
85
+ const plain = tryPlainText(content);
86
+ if (plain) return { source: basename(filepath), format: "plain-text", messages: plain };
87
+
88
+ return null;
89
+ }
90
+
91
+ // =============================================================================
92
+ // Format Parsers
93
+ // =============================================================================
94
+
95
+ function tryClaudeCodeJsonl(content: string): Message[] | null {
96
+ const lines = content.trim().split("\n").filter(l => l.trim());
97
+ const messages: Message[] = [];
98
+
99
+ for (const line of lines) {
100
+ let entry: any;
101
+ try { entry = JSON.parse(line); } catch { continue; }
102
+ if (typeof entry !== "object" || !entry) continue;
103
+
104
+ const msgType = entry.type ?? "";
105
+ const message = entry.message ?? {};
106
+
107
+ if (msgType === "human" || msgType === "user") {
108
+ const text = extractContent(message.content);
109
+ if (text) messages.push({ role: "user", content: text });
110
+ } else if (msgType === "assistant") {
111
+ const text = extractContent(message.content);
112
+ if (text) messages.push({ role: "assistant", content: text });
113
+ }
114
+ }
115
+
116
+ return messages.length >= 2 ? messages : null;
117
+ }
118
+
119
+ function tryCodexJsonl(content: string): Message[] | null {
120
+ const lines = content.trim().split("\n").filter(l => l.trim());
121
+ const messages: Message[] = [];
122
+ let hasSessionMeta = false;
123
+
124
+ for (const line of lines) {
125
+ let entry: any;
126
+ try { entry = JSON.parse(line); } catch { continue; }
127
+ if (typeof entry !== "object" || !entry) continue;
128
+
129
+ if (entry.type === "session_meta") { hasSessionMeta = true; continue; }
130
+ if (entry.type !== "event_msg") continue;
131
+
132
+ const payload = entry.payload;
133
+ if (typeof payload !== "object" || !payload) continue;
134
+
135
+ const text = typeof payload.message === "string" ? payload.message.trim() : "";
136
+ if (!text) continue;
137
+
138
+ if (payload.type === "user_message") messages.push({ role: "user", content: text });
139
+ else if (payload.type === "agent_message") messages.push({ role: "assistant", content: text });
140
+ }
141
+
142
+ return messages.length >= 2 && hasSessionMeta ? messages : null;
143
+ }
144
+
145
+ function tryClaudeAiJson(data: any): Message[] | null {
146
+ // Privacy export: array of conversation objects with chat_messages
147
+ if (Array.isArray(data) && data.length > 0 && data[0]?.chat_messages) {
148
+ const messages: Message[] = [];
149
+ for (const convo of data) {
150
+ for (const item of convo.chat_messages ?? []) {
151
+ const role = item.role ?? "";
152
+ const text = extractContent(item.content);
153
+ if ((role === "user" || role === "human") && text) messages.push({ role: "user", content: text });
154
+ else if ((role === "assistant" || role === "ai") && text) messages.push({ role: "assistant", content: text });
155
+ }
156
+ }
157
+ return messages.length >= 2 ? messages : null;
158
+ }
159
+
160
+ // Flat messages list or wrapped in { messages: [...] }
161
+ let msgs = data;
162
+ if (typeof data === "object" && !Array.isArray(data)) {
163
+ msgs = data.messages ?? data.chat_messages ?? [];
164
+ }
165
+ if (!Array.isArray(msgs)) return null;
166
+
167
+ const messages: Message[] = [];
168
+ for (const item of msgs) {
169
+ if (typeof item !== "object" || !item) continue;
170
+ const role = item.role ?? "";
171
+ const text = extractContent(item.content);
172
+ if ((role === "user" || role === "human") && text) messages.push({ role: "user", content: text });
173
+ else if ((role === "assistant" || role === "ai") && text) messages.push({ role: "assistant", content: text });
174
+ }
175
+ return messages.length >= 2 ? messages : null;
176
+ }
177
+
178
+ function tryChatGptJson(data: any): Message[] | null {
179
+ if (typeof data !== "object" || !data?.mapping) return null;
180
+ const mapping = data.mapping;
181
+ const messages: Message[] = [];
182
+
183
+ // Find root node (parent=null, no message)
184
+ let rootId: string | null = null;
185
+ let fallback: string | null = null;
186
+ for (const [nodeId, node] of Object.entries(mapping) as [string, any][]) {
187
+ if (node.parent === null) {
188
+ if (!node.message) { rootId = nodeId; break; }
189
+ else if (!fallback) fallback = nodeId;
190
+ }
191
+ }
192
+ rootId = rootId ?? fallback;
193
+ if (!rootId) return null;
194
+
195
+ // Walk the tree
196
+ let currentId: string | null = rootId;
197
+ const visited = new Set<string>();
198
+ while (currentId && !visited.has(currentId)) {
199
+ visited.add(currentId);
200
+ const node = (mapping as any)[currentId];
201
+ if (node?.message) {
202
+ const role = node.message.author?.role ?? "";
203
+ const content = node.message.content;
204
+ const parts = content?.parts ?? [];
205
+ const text = parts.filter((p: any) => typeof p === "string").join(" ").trim();
206
+ if (role === "user" && text) messages.push({ role: "user", content: text });
207
+ else if (role === "assistant" && text) messages.push({ role: "assistant", content: text });
208
+ }
209
+ currentId = node?.children?.[0] ?? null;
210
+ }
211
+ return messages.length >= 2 ? messages : null;
212
+ }
213
+
214
+ function trySlackJson(data: any): Message[] | null {
215
+ if (!Array.isArray(data)) return null;
216
+
217
+ // Count unique speakers — only support 2-party DMs
218
+ const speakers = new Set<string>();
219
+ for (const item of data) {
220
+ if (typeof item !== "object" || item?.type !== "message") continue;
221
+ const userId = item.user ?? item.username ?? "";
222
+ if (userId) speakers.add(userId);
223
+ if (speakers.size > 2) return null; // multi-person channel, unsupported
224
+ }
225
+ if (speakers.size < 2) return null;
226
+
227
+ const messages: Message[] = [];
228
+ const speakerList = [...speakers];
229
+ const roleMap: Record<string, "user" | "assistant"> = {
230
+ [speakerList[0]]: "user",
231
+ [speakerList[1]]: "assistant",
232
+ };
233
+
234
+ for (const item of data) {
235
+ if (typeof item !== "object" || item?.type !== "message") continue;
236
+ const userId = item.user ?? item.username ?? "";
237
+ const text = (item.text ?? "").trim();
238
+ if (!text || !roleMap[userId]) continue;
239
+ messages.push({ role: roleMap[userId], content: text });
240
+ }
241
+ return messages.length >= 2 ? messages : null;
242
+ }
243
+
244
+ function tryPlainText(content: string): Message[] | null {
245
+ const messages: Message[] = [];
246
+ // Only match explicit role prefixes (User:, Human:, Assistant:, etc.)
247
+ // Do NOT match bare blockquotes (> ) — too many false positives with markdown
248
+ const lines = content.split("\n");
249
+ let currentRole: "user" | "assistant" | null = null;
250
+ let currentText: string[] = [];
251
+
252
+ for (const line of lines) {
253
+ const trimmed = line.trim();
254
+ let newRole: "user" | "assistant" | null = null;
255
+
256
+ if (/^(User|Human)\s*:\s*/i.test(trimmed)) {
257
+ newRole = "user";
258
+ } else if (/^(Assistant|AI|Claude|GPT|Bot)\s*:\s*/i.test(trimmed)) {
259
+ newRole = "assistant";
260
+ }
261
+
262
+ if (newRole) {
263
+ if (currentRole && currentText.length > 0) {
264
+ const text = currentText.join("\n").trim();
265
+ if (text) messages.push({ role: currentRole, content: text });
266
+ }
267
+ currentRole = newRole;
268
+ // Strip the role prefix
269
+ const cleaned = trimmed.replace(/^(User|Human|Assistant|AI|Claude|GPT|Bot)\s*:\s*/i, "");
270
+ currentText = cleaned ? [cleaned] : [];
271
+ } else if (currentRole) {
272
+ currentText.push(trimmed);
273
+ }
274
+ }
275
+
276
+ // Flush last
277
+ if (currentRole && currentText.length > 0) {
278
+ const text = currentText.join("\n").trim();
279
+ if (text) messages.push({ role: currentRole, content: text });
280
+ }
281
+
282
+ // Require at least 2 exchanges AND both roles present (prevents false positives)
283
+ const hasUser = messages.some(m => m.role === "user");
284
+ const hasAssistant = messages.some(m => m.role === "assistant");
285
+ return messages.length >= 4 && hasUser && hasAssistant ? messages : null;
286
+ }
287
+
288
+ // =============================================================================
289
+ // Content Extraction
290
+ // =============================================================================
291
+
292
+ function extractContent(content: any): string {
293
+ if (typeof content === "string") return content.trim();
294
+ if (Array.isArray(content)) {
295
+ return content
296
+ .map(item => {
297
+ if (typeof item === "string") return item;
298
+ if (typeof item === "object" && item?.type === "text") return item.text ?? "";
299
+ return "";
300
+ })
301
+ .join(" ")
302
+ .trim();
303
+ }
304
+ if (typeof content === "object" && content) return (content.text ?? "").trim();
305
+ return "";
306
+ }
307
+
308
+ // =============================================================================
309
+ // Chunking — Exchange Pairs
310
+ // =============================================================================
311
+
312
+ const MIN_CHUNK_CHARS = 30;
313
+
314
+ export function chunkConversation(conv: NormalizedConversation): ConversationChunk[] {
315
+ const chunks: ConversationChunk[] = [];
316
+ const { messages, source } = conv;
317
+
318
+ for (let i = 0; i < messages.length; i++) {
319
+ if (messages[i].role !== "user") continue;
320
+
321
+ const userMsg = messages[i].content;
322
+ // Collect ALL consecutive assistant messages (handles split replies)
323
+ const assistantParts: string[] = [];
324
+ while (i + 1 < messages.length && messages[i + 1].role === "assistant") {
325
+ assistantParts.push(messages[i + 1].content);
326
+ i++;
327
+ }
328
+ const assistantMsg = assistantParts.join("\n\n");
329
+
330
+ // Build markdown chunk
331
+ const title = extractExchangeTitle(userMsg, chunks.length + 1);
332
+ const body = formatExchangeMarkdown(userMsg, assistantMsg);
333
+
334
+ if (body.length >= MIN_CHUNK_CHARS) {
335
+ chunks.push({
336
+ title,
337
+ body,
338
+ sourcePath: source,
339
+ chunkIndex: chunks.length,
340
+ });
341
+ }
342
+ }
343
+
344
+ return chunks;
345
+ }
346
+
347
+ function extractExchangeTitle(userMessage: string, index: number): string {
348
+ // Use the first line/sentence of the user message, capped at 80 chars
349
+ const firstLine = userMessage.split("\n")[0].trim();
350
+ if (firstLine.length <= 80) return firstLine;
351
+ return firstLine.slice(0, 77) + "...";
352
+ }
353
+
354
+ function formatExchangeMarkdown(userMsg: string, assistantMsg: string): string {
355
+ const lines: string[] = [];
356
+ lines.push("**User:**", userMsg, "");
357
+ if (assistantMsg) {
358
+ lines.push("**Assistant:**", assistantMsg, "");
359
+ }
360
+ return lines.join("\n");
361
+ }
362
+
363
+ // =============================================================================
364
+ // Directory Scanner
365
+ // =============================================================================
366
+
367
+ export function scanConversationDir(dir: string): string[] {
368
+ const files: string[] = [];
369
+
370
+ function walk(d: string) {
371
+ let entries: string[];
372
+ try { entries = readdirSync(d); } catch { return; }
373
+
374
+ for (const entry of entries) {
375
+ const fullPath = join(d, entry);
376
+ try {
377
+ const stat = statSync(fullPath);
378
+ if (stat.isDirectory()) {
379
+ if (!SKIP_DIRS.has(entry)) walk(fullPath);
380
+ } else if (stat.isFile()) {
381
+ const ext = extname(entry).toLowerCase();
382
+ if (CONVO_EXTENSIONS.has(ext)) files.push(fullPath);
383
+ }
384
+ } catch { continue; }
385
+ }
386
+ }
387
+
388
+ walk(dir);
389
+ return files;
390
+ }
package/src/observer.ts CHANGED
@@ -15,7 +15,7 @@ import { MAX_LLM_GENERATE_TIMEOUT_MS } from "./limits.ts";
15
15
  // =============================================================================
16
16
 
17
17
  export type Observation = {
18
- type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change";
18
+ type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change" | "preference" | "milestone" | "problem";
19
19
  title: string;
20
20
  facts: string[];
21
21
  narrative: string;
@@ -51,7 +51,7 @@ const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding sessio
51
51
  For each significant action, decision, or discovery, output an <observation> XML element.
52
52
 
53
53
  <observation>
54
- <type>one of: decision, bugfix, feature, refactor, discovery, change</type>
54
+ <type>one of: decision, bugfix, feature, refactor, discovery, change, preference, milestone, problem</type>
55
55
  <title>Brief descriptive title (max 80 chars)</title>
56
56
  <facts>
57
57
  <fact>Individual atomic fact</fact>
@@ -69,7 +69,12 @@ Rules:
69
69
  - Each fact should be a standalone, atomic piece of information
70
70
  - The narrative should explain WHY something was done, not just WHAT
71
71
  - Only include files that were explicitly mentioned in the transcript
72
- - If no significant observations, output nothing`;
72
+ - If no significant observations, output nothing
73
+
74
+ Type guidance:
75
+ - preference: user expresses a preference, habit, or way of working (e.g., "don't use subagents for this", "I prefer single PRs")
76
+ - milestone: significant completion point, version release, deployment, or phase transition
77
+ - problem: persistent issue, recurring bug, architectural limitation, or unresolved blocker`;
73
78
 
74
79
  const SUMMARY_SYSTEM_PROMPT = `You are a session summarizer. Analyze this coding session transcript and output a structured summary.
75
80
 
@@ -118,6 +123,7 @@ function prepareTranscript(messages: TranscriptMessage[]): string {
118
123
 
119
124
  const VALID_OBSERVATION_TYPES = new Set([
120
125
  "decision", "bugfix", "feature", "refactor", "discovery", "change",
126
+ "preference", "milestone", "problem",
121
127
  ]);
122
128
 
123
129
  const VALID_CONCEPTS = new Set([
package/src/store.ts CHANGED
@@ -708,6 +708,31 @@ function initializeDatabase(db: Database): void {
708
708
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_a ON entity_cooccurrences(entity_a)`);
709
709
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_cooccurrences_b ON entity_cooccurrences(entity_b)`);
710
710
 
711
+ // SPO knowledge graph: temporal entity-relationship triples
712
+ db.exec(`
713
+ CREATE TABLE IF NOT EXISTS entity_triples (
714
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
715
+ subject_entity_id TEXT NOT NULL,
716
+ predicate TEXT NOT NULL,
717
+ object_entity_id TEXT,
718
+ object_literal TEXT,
719
+ valid_from TEXT,
720
+ valid_to TEXT,
721
+ confidence REAL DEFAULT 1.0,
722
+ source_doc_id INTEGER,
723
+ source_fact TEXT,
724
+ created_at TEXT DEFAULT (datetime('now')),
725
+ FOREIGN KEY (subject_entity_id) REFERENCES entity_nodes(entity_id),
726
+ FOREIGN KEY (object_entity_id) REFERENCES entity_nodes(entity_id),
727
+ FOREIGN KEY (source_doc_id) REFERENCES documents(id)
728
+ )
729
+ `);
730
+
731
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_subject ON entity_triples(subject_entity_id)`);
732
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_object ON entity_triples(object_entity_id)`);
733
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_predicate ON entity_triples(predicate)`);
734
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_triples_valid ON entity_triples(valid_from, valid_to)`);
735
+
711
736
  // Entity FTS5 for fuzzy name lookup
712
737
  db.exec(`CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(entity_id, name, entity_type)`);
713
738
 
@@ -904,6 +929,12 @@ export type Store = {
904
929
  searchEntities: (query: string, limit?: number) => { entity_id: string; name: string; type: string; mention_count: number; cooccurrence_count: number }[];
905
930
  getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => { docId: number; score: number; viaEntity: string }[];
906
931
 
932
+ // SPO knowledge graph
933
+ addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => number;
934
+ invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => number;
935
+ queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[];
936
+ getTripleStats: () => { totalTriples: number; currentFacts: number; expiredFacts: number; predicateTypes: string[] };
937
+
907
938
  // Co-activation tracking
908
939
  recordCoActivation: (paths: string[]) => void;
909
940
  getCoActivated: (path: string, limit?: number) => { path: string; count: number }[];
@@ -1070,6 +1101,93 @@ export function createStore(dbPath?: string, opts?: { readonly?: boolean; busyTi
1070
1101
  searchEntities: (query: string, limit?: number) => searchEntities(db, query, limit),
1071
1102
  getEntityGraphNeighbors: (seedDocIds: number[], limit?: number) => getEntityGraphNeighbors(db, seedDocIds, limit),
1072
1103
 
1104
+ // SPO knowledge graph
1105
+ addTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, options?: { validFrom?: string; validTo?: string; confidence?: number; sourceDocId?: number; sourceFact?: string }) => {
1106
+ const pred = predicate.toLowerCase().replace(/\s+/g, "_");
1107
+ const now = new Date().toISOString();
1108
+ const objClause = objectEntityId
1109
+ ? "object_entity_id = ? AND object_literal IS NULL"
1110
+ : "object_entity_id IS NULL AND object_literal = ?";
1111
+ const objParam = objectEntityId ?? objectLiteral;
1112
+ const existing = db.prepare(
1113
+ `SELECT id FROM entity_triples WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
1114
+ ).get(subjectEntityId, pred, objParam) as { id: number } | null;
1115
+ if (existing) return existing.id;
1116
+
1117
+ const result = db.prepare(`
1118
+ INSERT INTO entity_triples (subject_entity_id, predicate, object_entity_id, object_literal, valid_from, valid_to, confidence, source_doc_id, source_fact, created_at)
1119
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1120
+ `).run(
1121
+ subjectEntityId, pred, objectEntityId, objectLiteral,
1122
+ options?.validFrom ?? null, options?.validTo ?? null,
1123
+ options?.confidence ?? 1.0, options?.sourceDocId ?? null,
1124
+ options?.sourceFact ?? null, now
1125
+ );
1126
+ return Number(result.lastInsertRowid);
1127
+ },
1128
+
1129
+ invalidateTriple: (subjectEntityId: string, predicate: string, objectEntityId: string | null, objectLiteral: string | null, endedDate?: string) => {
1130
+ const pred = predicate.toLowerCase().replace(/\s+/g, "_");
1131
+ const ended = endedDate || new Date().toISOString().slice(0, 10);
1132
+ const objClause = objectEntityId
1133
+ ? "object_entity_id = ? AND object_literal IS NULL"
1134
+ : "object_entity_id IS NULL AND object_literal = ?";
1135
+ const objParam = objectEntityId ?? objectLiteral;
1136
+ const result = db.prepare(
1137
+ `UPDATE entity_triples SET valid_to = ? WHERE subject_entity_id = ? AND predicate = ? AND ${objClause} AND valid_to IS NULL`
1138
+ ).run(ended, subjectEntityId, pred, objParam);
1139
+ return result.changes;
1140
+ },
1141
+
1142
+ queryEntityTriples: (entityId: string, options?: { asOf?: string; direction?: "outgoing" | "incoming" | "both" }) => {
1143
+ const direction = options?.direction ?? "both";
1144
+ const asOf = options?.asOf;
1145
+ const results: { id: number; direction: string; subject: string; predicate: string; object: string; validFrom: string | null; validTo: string | null; confidence: number; current: boolean }[] = [];
1146
+
1147
+ if (direction === "outgoing" || direction === "both") {
1148
+ let query = `SELECT t.id, t.predicate, t.object_entity_id, t.object_literal, t.valid_from, t.valid_to, t.confidence,
1149
+ COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
1150
+ FROM entity_triples t
1151
+ LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
1152
+ LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
1153
+ WHERE t.subject_entity_id = ?`;
1154
+ const params: any[] = [entityId];
1155
+ if (asOf) {
1156
+ query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
1157
+ params.push(asOf, asOf);
1158
+ }
1159
+ for (const row of db.prepare(query).all(...params) as any[]) {
1160
+ results.push({ id: row.id, direction: "outgoing", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
1161
+ }
1162
+ }
1163
+
1164
+ if (direction === "incoming" || direction === "both") {
1165
+ let query = `SELECT t.id, t.predicate, t.valid_from, t.valid_to, t.confidence,
1166
+ COALESCE(s.name, t.subject_entity_id) as sub_name, COALESCE(o.name, t.object_literal, t.object_entity_id) as obj_name
1167
+ FROM entity_triples t
1168
+ LEFT JOIN entity_nodes s ON t.subject_entity_id = s.entity_id
1169
+ LEFT JOIN entity_nodes o ON t.object_entity_id = o.entity_id
1170
+ WHERE t.object_entity_id = ?`;
1171
+ const params: any[] = [entityId];
1172
+ if (asOf) {
1173
+ query += " AND (t.valid_from IS NULL OR t.valid_from <= ?) AND (t.valid_to IS NULL OR t.valid_to >= ?)";
1174
+ params.push(asOf, asOf);
1175
+ }
1176
+ for (const row of db.prepare(query).all(...params) as any[]) {
1177
+ results.push({ id: row.id, direction: "incoming", subject: row.sub_name, predicate: row.predicate, object: row.obj_name, validFrom: row.valid_from, validTo: row.valid_to, confidence: row.confidence, current: row.valid_to === null });
1178
+ }
1179
+ }
1180
+
1181
+ return results;
1182
+ },
1183
+
1184
+ getTripleStats: () => {
1185
+ const total = (db.prepare("SELECT COUNT(*) as n FROM entity_triples").get() as any).n;
1186
+ const current = (db.prepare("SELECT COUNT(*) as n FROM entity_triples WHERE valid_to IS NULL").get() as any).n;
1187
+ const predicates = db.prepare("SELECT DISTINCT predicate FROM entity_triples ORDER BY predicate").all().map((r: any) => r.predicate);
1188
+ return { totalTriples: total, currentFacts: current, expiredFacts: total - current, predicateTypes: predicates };
1189
+ },
1190
+
1073
1191
  // Co-activation tracking
1074
1192
  recordCoActivation: (paths: string[]) => {
1075
1193
  if (paths.length < 2) return;
@@ -1333,6 +1451,7 @@ export type DocumentRow = {
1333
1451
  confidence: number;
1334
1452
  accessCount: number;
1335
1453
  bodyLength: number;
1454
+ pinned: number;
1336
1455
  };
1337
1456
 
1338
1457
  // =============================================================================
@@ -3560,7 +3679,7 @@ function getDocumentsByTypeFn(db: Database, contentType: string, limit: number =
3560
3679
  SELECT d.id, d.collection, d.path, d.title, d.hash, d.modified_at as modifiedAt,
3561
3680
  d.domain, d.workstream, d.tags, d.content_type as contentType,
3562
3681
  d.review_by as reviewBy, d.confidence, d.access_count as accessCount,
3563
- LENGTH(c.doc) as bodyLength
3682
+ LENGTH(c.doc) as bodyLength, d.pinned
3564
3683
  FROM documents d
3565
3684
  JOIN content c ON c.hash = d.hash
3566
3685
  WHERE d.active = 1 AND d.content_type = ?