clawmem 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/AGENTS.md +660 -0
  2. package/CLAUDE.md +660 -0
  3. package/LICENSE +21 -0
  4. package/README.md +993 -0
  5. package/SKILL.md +717 -0
  6. package/bin/clawmem +75 -0
  7. package/package.json +72 -0
  8. package/src/amem.ts +797 -0
  9. package/src/beads.ts +263 -0
  10. package/src/clawmem.ts +1849 -0
  11. package/src/collections.ts +405 -0
  12. package/src/config.ts +178 -0
  13. package/src/consolidation.ts +123 -0
  14. package/src/directory-context.ts +248 -0
  15. package/src/errors.ts +41 -0
  16. package/src/formatter.ts +427 -0
  17. package/src/graph-traversal.ts +247 -0
  18. package/src/hooks/context-surfacing.ts +317 -0
  19. package/src/hooks/curator-nudge.ts +89 -0
  20. package/src/hooks/decision-extractor.ts +639 -0
  21. package/src/hooks/feedback-loop.ts +214 -0
  22. package/src/hooks/handoff-generator.ts +345 -0
  23. package/src/hooks/postcompact-inject.ts +226 -0
  24. package/src/hooks/precompact-extract.ts +314 -0
  25. package/src/hooks/pretool-inject.ts +79 -0
  26. package/src/hooks/session-bootstrap.ts +324 -0
  27. package/src/hooks/staleness-check.ts +130 -0
  28. package/src/hooks.ts +367 -0
  29. package/src/indexer.ts +327 -0
  30. package/src/intent.ts +294 -0
  31. package/src/limits.ts +26 -0
  32. package/src/llm.ts +1175 -0
  33. package/src/mcp.ts +2138 -0
  34. package/src/memory.ts +336 -0
  35. package/src/mmr.ts +93 -0
  36. package/src/observer.ts +269 -0
  37. package/src/openclaw/engine.ts +283 -0
  38. package/src/openclaw/index.ts +221 -0
  39. package/src/openclaw/plugin.json +83 -0
  40. package/src/openclaw/shell.ts +207 -0
  41. package/src/openclaw/tools.ts +304 -0
  42. package/src/profile.ts +346 -0
  43. package/src/promptguard.ts +218 -0
  44. package/src/retrieval-gate.ts +106 -0
  45. package/src/search-utils.ts +127 -0
  46. package/src/server.ts +783 -0
  47. package/src/splitter.ts +325 -0
  48. package/src/store.ts +4062 -0
  49. package/src/validation.ts +67 -0
  50. package/src/watcher.ts +58 -0
package/src/memory.ts ADDED
@@ -0,0 +1,336 @@
1
+ /**
2
+ * ClawMem Memory Module - SAME composite scoring layer
3
+ *
4
+ * Provides recency decay, confidence scoring, and composite scoring
5
+ * that overlays on top of QMD's raw search results.
6
+ */
7
+
8
+ // =============================================================================
9
+ // Content Type Half-Lives (days until score drops to 50%)
10
+ // =============================================================================
11
+
12
+ export const HALF_LIVES: Record<string, number> = {
13
+ handoff: 30,
14
+ progress: 45,
15
+ note: 60,
16
+ research: 90,
17
+ project: 120,
18
+ decision: Infinity,
19
+ hub: Infinity,
20
+ };
21
+
22
+ // =============================================================================
23
+ // Confidence Baselines by Content Type
24
+ // =============================================================================
25
+
26
+ export const TYPE_BASELINES: Record<string, number> = {
27
+ decision: 0.85,
28
+ hub: 0.80,
29
+ research: 0.70,
30
+ project: 0.65,
31
+ handoff: 0.60,
32
+ progress: 0.50,
33
+ note: 0.50,
34
+ };
35
+
36
+ // =============================================================================
37
+ // Content Type Inference
38
+ // =============================================================================
39
+
40
+ export type ContentType = "decision" | "hub" | "research" | "project" | "handoff" | "progress" | "note";
41
+
42
+ export function inferContentType(path: string, explicitType?: string): ContentType {
43
+ if (explicitType && explicitType in TYPE_BASELINES) return explicitType as ContentType;
44
+
45
+ const lower = path.toLowerCase();
46
+ if (lower.includes("decision") || lower.includes("adr/") || lower.includes("adr-")) return "decision";
47
+ if (lower.includes("hub") || lower.includes("moc") || lower.match(/\/index\.md$/)) return "hub";
48
+ if (lower.includes("research") || lower.includes("investigation") || lower.includes("analysis")) return "research";
49
+ if (lower.includes("project") || lower.includes("epic") || lower.includes("initiative")) return "project";
50
+ if (lower.includes("handoff") || lower.includes("handover") || lower.includes("session")) return "handoff";
51
+ if (lower.includes("progress") || lower.includes("status") || lower.includes("standup") || lower.includes("changelog")) return "progress";
52
+ return "note";
53
+ }
54
+
55
+ // =============================================================================
56
+ // Memory Type Classification (E10)
57
+ // =============================================================================
58
+
59
+ export type MemoryType = "episodic" | "semantic" | "procedural";
60
+
61
+ /**
62
+ * Infer memory type from content metadata.
63
+ * - episodic: session events, handoffs, progress (time-bound)
64
+ * - semantic: facts, decisions, knowledge (declarative)
65
+ * - procedural: how-to, patterns, workflows (actionable)
66
+ */
67
+ export function inferMemoryType(path: string, contentType: string, body?: string): MemoryType {
68
+ if (["handoff", "progress"].includes(contentType)) return "episodic";
69
+ if (["decision", "hub", "research"].includes(contentType)) return "semantic";
70
+ if (body && /\b(step\s+\d|workflow|recipe|how\s+to|procedure|runbook|playbook)\b/i.test(body)) return "procedural";
71
+ if (path.includes("sop") || path.includes("runbook") || path.includes("playbook")) return "procedural";
72
+ if (contentType === "antipattern") return "semantic";
73
+ return "semantic";
74
+ }
75
+
76
+ // =============================================================================
77
+ // Recency Score
78
+ // =============================================================================
79
+
80
+ /**
81
+ * Compute effective half-life adjusted by access frequency.
82
+ * Frequently accessed memories decay slower (up to 3x base half-life).
83
+ */
84
+ function effectiveHalfLife(
85
+ baseHalfLife: number,
86
+ accessCount: number,
87
+ lastAccessedAt?: Date | string | null,
88
+ now: Date = new Date()
89
+ ): number {
90
+ if (!isFinite(baseHalfLife) || accessCount <= 0) return baseHalfLife;
91
+
92
+ let freshness = 1.0;
93
+ if (lastAccessedAt) {
94
+ const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
95
+ if (!isNaN(lastAccess.getTime())) {
96
+ const daysSinceAccess = (now.getTime() - lastAccess.getTime()) / (1000 * 60 * 60 * 24);
97
+ freshness = Math.max(0, 1 - daysSinceAccess / 90);
98
+ }
99
+ }
100
+
101
+ const extension = baseHalfLife * 0.3 * Math.log1p(accessCount * freshness);
102
+ return Math.min(baseHalfLife * 3, baseHalfLife + extension);
103
+ }
104
+
105
+ export function recencyScore(
106
+ modifiedAt: Date | string,
107
+ contentType: string,
108
+ now: Date = new Date(),
109
+ accessCount: number = 0,
110
+ lastAccessedAt?: Date | string | null
111
+ ): number {
112
+ const baseHalfLife = HALF_LIVES[contentType] ?? 60;
113
+ if (!isFinite(baseHalfLife)) return 1.0;
114
+
115
+ const halfLife = effectiveHalfLife(baseHalfLife, accessCount, lastAccessedAt, now);
116
+
117
+ const modified = typeof modifiedAt === "string" ? new Date(modifiedAt) : modifiedAt;
118
+ if (isNaN(modified.getTime())) return 0.5;
119
+ const daysSince = (now.getTime() - modified.getTime()) / (1000 * 60 * 60 * 24);
120
+ if (daysSince <= 0) return 1.0;
121
+ const result = Math.pow(0.5, daysSince / halfLife);
122
+ return Number.isFinite(result) ? result : 0;
123
+ }
124
+
125
+ // =============================================================================
126
+ // Confidence Score
127
+ // =============================================================================
128
+
129
+ export function confidenceScore(
130
+ contentType: string,
131
+ modifiedAt: Date | string,
132
+ accessCount: number,
133
+ now: Date = new Date(),
134
+ lastAccessedAt?: Date | string | null
135
+ ): number {
136
+ const baseline = TYPE_BASELINES[contentType] ?? 0.5;
137
+ const recency = recencyScore(modifiedAt, contentType, now);
138
+ const safeAccess = Number.isFinite(accessCount) && accessCount >= 0 ? accessCount : 0;
139
+ const accessBoost = Math.min(1.5, 1 + Math.log2(1 + safeAccess) * 0.1);
140
+
141
+ // Attention decay: reduce confidence if not accessed recently (5% per week)
142
+ // Only apply to episodic/progress content — skip for durable types (decision, hub, research)
143
+ // Also skip if last_accessed_at was backfilled from modified_at (no real access yet)
144
+ const DECAY_EXEMPT_TYPES = new Set(["decision", "hub", "research", "antipattern"]);
145
+ let attentionDecay = 1.0;
146
+ if (lastAccessedAt && !DECAY_EXEMPT_TYPES.has(contentType)) {
147
+ const lastAccess = typeof lastAccessedAt === "string" ? new Date(lastAccessedAt) : lastAccessedAt;
148
+ const modified = typeof modifiedAt === "string" ? new Date(modifiedAt) : modifiedAt;
149
+ if (!isNaN(lastAccess.getTime())) {
150
+ // Skip decay if last_accessed_at == modified_at (backfilled, no real access)
151
+ const isBackfilled = Math.abs(lastAccess.getTime() - modified.getTime()) < 1000;
152
+ if (!isBackfilled) {
153
+ const daysSinceAccess = (now.getTime() - lastAccess.getTime()) / (1000 * 60 * 60 * 24);
154
+ if (daysSinceAccess > 0) {
155
+ attentionDecay = Math.max(0.5, Math.pow(0.95, daysSinceAccess / 7));
156
+ }
157
+ }
158
+ }
159
+ }
160
+
161
+ const result = Math.min(1.0, baseline * recency * accessBoost * attentionDecay);
162
+ return Number.isFinite(result) ? result : 0;
163
+ }
164
+
165
+ // =============================================================================
166
+ // Composite Scoring
167
+ // =============================================================================
168
+
169
+ export type CompositeWeights = {
170
+ search: number;
171
+ recency: number;
172
+ confidence: number;
173
+ };
174
+
175
+ export const DEFAULT_WEIGHTS: CompositeWeights = { search: 0.5, recency: 0.25, confidence: 0.25 };
176
+ export const RECENCY_WEIGHTS: CompositeWeights = { search: 0.1, recency: 0.7, confidence: 0.2 };
177
+
178
+ const RECENCY_PATTERNS = [
179
+ /\brecent(ly)?\b/i,
180
+ /\blast\s+(session|time|week|month|few\s+days)\b/i,
181
+ /\bleft\s+off\b/i,
182
+ /\bwhere\s+(was|were)\s+(we|i)\b/i,
183
+ /\bpick\s+up\b/i,
184
+ /\bcontinue\b/i,
185
+ /\byesterday\b/i,
186
+ /\btoday\b/i,
187
+ /\bwhat\s+(was|were)\s+(we|i)\s+(doing|working)\b/i,
188
+ ];
189
+
190
+ export function hasRecencyIntent(query: string): boolean {
191
+ return RECENCY_PATTERNS.some(p => p.test(query));
192
+ }
193
+
194
+ export function compositeScore(
195
+ searchScore: number,
196
+ recency: number,
197
+ confidence: number,
198
+ weights: CompositeWeights = DEFAULT_WEIGHTS
199
+ ): number {
200
+ // Guard against NaN propagation
201
+ const s = Number.isFinite(searchScore) ? searchScore : 0;
202
+ const r = Number.isFinite(recency) ? recency : 0;
203
+ const c = Number.isFinite(confidence) ? confidence : 0;
204
+ const result = weights.search * s + weights.recency * r + weights.confidence * c;
205
+ return Number.isFinite(result) ? result : 0;
206
+ }
207
+
208
+ // =============================================================================
209
+ // Apply Composite Scoring to Search Results
210
+ // =============================================================================
211
+
212
+ export type EnrichedResult = {
213
+ filepath: string;
214
+ displayPath: string;
215
+ title: string;
216
+ score: number;
217
+ body?: string;
218
+ contentType: string;
219
+ modifiedAt: string;
220
+ accessCount: number;
221
+ confidence: number;
222
+ qualityScore: number;
223
+ pinned: boolean;
224
+ context: string | null;
225
+ hash: string;
226
+ docid: string;
227
+ collectionName: string;
228
+ bodyLength: number;
229
+ source: "fts" | "vec";
230
+ chunkPos?: number;
231
+ fragmentType?: string;
232
+ fragmentLabel?: string;
233
+ lastAccessedAt?: string | null;
234
+ // Engram integration: frequency/evolution metadata
235
+ duplicateCount: number;
236
+ revisionCount: number;
237
+ };
238
+
239
+ export type ScoredResult = EnrichedResult & {
240
+ compositeScore: number;
241
+ recencyScore: number;
242
+ };
243
+
244
+ export type CoActivationFn = (path: string) => { path: string; count: number }[];
245
+
246
+ export function applyCompositeScoring(
247
+ results: EnrichedResult[],
248
+ query: string,
249
+ coActivationFn?: CoActivationFn
250
+ ): ScoredResult[] {
251
+ const weights = hasRecencyIntent(query) ? RECENCY_WEIGHTS : DEFAULT_WEIGHTS;
252
+ const now = new Date();
253
+
254
+ const scored = results.map(r => {
255
+ const recency = recencyScore(r.modifiedAt, r.contentType, now, r.accessCount, r.lastAccessedAt);
256
+ const computed = confidenceScore(r.contentType, r.modifiedAt, r.accessCount, now, r.lastAccessedAt);
257
+ // Blend stored confidence (from contradiction lowering, feedback boosts) with computed.
258
+ // Default stored=0.5 → 100% computed. Stored deviations shift the result proportionally.
259
+ const storedConf = r.confidence ?? 0.5;
260
+ const conf = storedConf === 0.5 ? computed : Math.min(1.0, computed * (storedConf / 0.5) * 0.5 + computed * 0.5);
261
+ const composite = compositeScore(r.score, recency, conf, weights);
262
+
263
+ // Quality multiplier: 0.5 default → 1.0x (no effect)
264
+ // Range: 0.0 → 0.7x penalty, 1.0 → 1.3x boost
265
+ const qualityMultiplier = 0.7 + 0.6 * (r.qualityScore ?? 0.5);
266
+ let adjusted = composite * qualityMultiplier;
267
+
268
+ // Length normalization: penalize verbose entries that dominate via keyword density
269
+ // anchor=500 chars. At anchor → 1.0x, 1000 → 0.75x, 2000 → 0.57x. Never boosts short docs.
270
+ const lenRatio = Math.log2(Math.max((r.bodyLength || 500) / 500, 1));
271
+ const lenFactor = 1 / (1 + 0.5 * lenRatio);
272
+ adjusted = Math.max(adjusted * 0.3, adjusted * lenFactor);
273
+
274
+ // Engram integration: revision durability signal (Phase 3)
275
+ // revision_count is weighted more heavily than duplicate_count (evolution vs noise).
276
+ // Capped at 10% to prevent runaway amplification.
277
+ const revisions = (r.revisionCount || 1) - 1;
278
+ const duplicates = (r.duplicateCount || 1) - 1;
279
+ const freqSignal = revisions * 2 + duplicates; // revisions weighted 2x
280
+ const freqBoost = freqSignal > 0 ? Math.min(0.10, Math.log1p(freqSignal) * 0.03) : 0;
281
+ adjusted *= (1 + freqBoost);
282
+
283
+ // Pin boost: +0.3 additive, capped at 1.0
284
+ if (r.pinned) {
285
+ adjusted = Math.min(1.0, adjusted + 0.3);
286
+ }
287
+
288
+ return { ...r, compositeScore: adjusted, recencyScore: recency };
289
+ });
290
+
291
+ // Co-activation boost: docs frequently accessed alongside top results get a boost
292
+ if (coActivationFn && scored.length > 1) {
293
+ const topQuartile = Math.max(1, Math.floor(scored.length * 0.25));
294
+ // Co-activations are recorded using displayPath format (collection/path),
295
+ // but scored results use filepath format (clawmem://collection/path).
296
+ // Normalize: strip clawmem:// prefix for lookup, match back via both formats.
297
+ const stripPrefix = (p: string) => p.startsWith("clawmem://") ? p.slice(10) : p;
298
+ const topDisplayPaths = new Set(scored.slice(0, topQuartile).map(r => stripPrefix(r.filepath)));
299
+ const coActivatedCounts = new Map<string, number>();
300
+
301
+ for (const topPath of topDisplayPaths) {
302
+ const partners = coActivationFn(topPath);
303
+ for (const p of partners) {
304
+ if (!topDisplayPaths.has(p.path)) {
305
+ coActivatedCounts.set(p.path, (coActivatedCounts.get(p.path) || 0) + p.count);
306
+ }
307
+ }
308
+ }
309
+
310
+ if (coActivatedCounts.size > 0) {
311
+ for (const r of scored) {
312
+ const coCount = coActivatedCounts.get(stripPrefix(r.filepath));
313
+ if (coCount) {
314
+ // Boost capped at 15% to prevent runaway amplification
315
+ r.compositeScore *= 1 + Math.min(coCount / 10, 0.15);
316
+ }
317
+ }
318
+ }
319
+ }
320
+
321
+ // Sort by composite score descending
322
+ scored.sort((a, b) => b.compositeScore - a.compositeScore);
323
+
324
+ // Boost handoff/decision types when recency intent detected
325
+ if (hasRecencyIntent(query)) {
326
+ const priority = new Set<string>(["handoff", "decision", "progress"]);
327
+ scored.sort((a, b) => {
328
+ const aPriority = priority.has(a.contentType) ? 1 : 0;
329
+ const bPriority = priority.has(b.contentType) ? 1 : 0;
330
+ if (aPriority !== bPriority) return bPriority - aPriority;
331
+ return b.compositeScore - a.compositeScore;
332
+ });
333
+ }
334
+
335
+ return scored;
336
+ }
package/src/mmr.ts ADDED
@@ -0,0 +1,93 @@
1
+ /**
2
+ * MMR (Maximal Marginal Relevance) Diversity Filter
3
+ *
4
+ * Prevents top-k results from being dominated by near-duplicate entries.
5
+ * Uses text-based similarity (word overlap) — no vector lookups needed.
6
+ *
7
+ * Ported from memory-lancedb-pro's applyMMRDiversity(), adapted to use
8
+ * Jaccard similarity on word bigrams instead of cosine on vectors.
9
+ */
10
+
11
+ import type { ScoredResult } from "./memory.ts";
12
+
13
+ // =============================================================================
14
+ // Text Similarity
15
+ // =============================================================================
16
+
17
+ function extractBigrams(text: string): Set<string> {
18
+ const words = text.toLowerCase().replace(/[^\w\s]/g, "").split(/\s+/).filter(Boolean);
19
+ const bigrams = new Set<string>();
20
+ for (let i = 0; i < words.length - 1; i++) {
21
+ bigrams.add(`${words[i]} ${words[i + 1]}`);
22
+ }
23
+ // Also add individual words for short texts
24
+ for (const w of words) {
25
+ if (w.length > 3) bigrams.add(w);
26
+ }
27
+ return bigrams;
28
+ }
29
+
30
+ function jaccardSimilarity(a: Set<string>, b: Set<string>): number {
31
+ if (a.size === 0 && b.size === 0) return 1.0;
32
+ if (a.size === 0 || b.size === 0) return 0;
33
+ let intersection = 0;
34
+ const smaller = a.size <= b.size ? a : b;
35
+ const larger = a.size <= b.size ? b : a;
36
+ for (const item of smaller) {
37
+ if (larger.has(item)) intersection++;
38
+ }
39
+ return intersection / (a.size + b.size - intersection);
40
+ }
41
+
42
+ // =============================================================================
43
+ // MMR Diversity Filter
44
+ // =============================================================================
45
+
46
+ /**
47
+ * Greedily select results that are both relevant (high score) and diverse
48
+ * (low textual similarity to already-selected results).
49
+ *
50
+ * Results exceeding the similarity threshold against any selected result
51
+ * are demoted to the end rather than removed entirely.
52
+ *
53
+ * @param results - Pre-sorted by compositeScore descending
54
+ * @param similarityThreshold - Jaccard threshold above which results are demoted (default 0.6)
55
+ * @returns Reordered results with diverse items first, near-duplicates appended
56
+ */
57
+ export function applyMMRDiversity(
58
+ results: ScoredResult[],
59
+ similarityThreshold: number = 0.6
60
+ ): ScoredResult[] {
61
+ if (results.length <= 2) return results;
62
+
63
+ const bigramCache = new Map<string, Set<string>>();
64
+ function getBigrams(r: ScoredResult): Set<string> {
65
+ const key = r.filepath;
66
+ let cached = bigramCache.get(key);
67
+ if (!cached) {
68
+ cached = extractBigrams(`${r.title} ${r.body || ""}`);
69
+ bigramCache.set(key, cached);
70
+ }
71
+ return cached;
72
+ }
73
+
74
+ const selected: ScoredResult[] = [];
75
+ const deferred: ScoredResult[] = [];
76
+
77
+ for (const candidate of results) {
78
+ const candidateBigrams = getBigrams(candidate);
79
+
80
+ const tooSimilar = selected.some(s => {
81
+ const sim = jaccardSimilarity(getBigrams(s), candidateBigrams);
82
+ return sim > similarityThreshold;
83
+ });
84
+
85
+ if (tooSimilar) {
86
+ deferred.push(candidate);
87
+ } else {
88
+ selected.push(candidate);
89
+ }
90
+ }
91
+
92
+ return [...selected, ...deferred];
93
+ }
@@ -0,0 +1,269 @@
1
+ /**
2
+ * Local Observer Agent - Structured observation extraction using local GGUF model
3
+ *
4
+ * Uses Qwen3-1.7B (already loaded for query expansion) with XML-formatted prompts
5
+ * to extract structured observations and session summaries from transcripts.
6
+ * Falls back gracefully when model is unavailable.
7
+ */
8
+
9
+ import type { TranscriptMessage } from "./hooks.ts";
10
+ import { getDefaultLlamaCpp } from "./llm.ts";
11
+ import { MAX_LLM_GENERATE_TIMEOUT_MS } from "./limits.ts";
12
+
13
+ // =============================================================================
14
+ // Types
15
+ // =============================================================================
16
+
17
+ export type Observation = {
18
+ type: "decision" | "bugfix" | "feature" | "refactor" | "discovery" | "change";
19
+ title: string;
20
+ facts: string[];
21
+ narrative: string;
22
+ concepts: string[];
23
+ filesRead: string[];
24
+ filesModified: string[];
25
+ };
26
+
27
+ export type SessionSummary = {
28
+ request: string;
29
+ investigated: string;
30
+ learned: string;
31
+ completed: string;
32
+ nextSteps: string;
33
+ };
34
+
35
+ // =============================================================================
36
+ // Config
37
+ // =============================================================================
38
+
39
+ const MAX_TRANSCRIPT_MESSAGES = 100;
40
+ const MAX_USER_MSG_CHARS = 200;
41
+ const MAX_ASSISTANT_MSG_CHARS = 500;
42
+ const MAX_TRANSCRIPT_TOKENS = 2000;
43
+ const GENERATION_MAX_TOKENS = 2000;
44
+ const GENERATION_TEMPERATURE = 0.3;
45
+
46
+ // =============================================================================
47
+ // System Prompts
48
+ // =============================================================================
49
+
50
+ const OBSERVATION_SYSTEM_PROMPT = `You are an observer analyzing a coding session transcript. Extract structured observations.
51
+ For each significant action, decision, or discovery, output an <observation> XML element.
52
+
53
+ <observation>
54
+ <type>one of: decision, bugfix, feature, refactor, discovery, change</type>
55
+ <title>Brief descriptive title (max 80 chars)</title>
56
+ <facts>
57
+ <fact>Individual atomic fact</fact>
58
+ </facts>
59
+ <narrative>2-3 sentences explaining context and reasoning</narrative>
60
+ <concepts>
61
+ <concept>one of: how-it-works, why-it-exists, what-changed, problem-solution, gotcha, pattern, trade-off</concept>
62
+ </concepts>
63
+ <files_read><file>path/to/file</file></files_read>
64
+ <files_modified><file>path/to/file</file></files_modified>
65
+ </observation>
66
+
67
+ Rules:
68
+ - Output 1-5 observations, focusing on the MOST significant events
69
+ - Each fact should be a standalone, atomic piece of information
70
+ - The narrative should explain WHY something was done, not just WHAT
71
+ - Only include files that were explicitly mentioned in the transcript
72
+ - If no significant observations, output nothing`;
73
+
74
+ const SUMMARY_SYSTEM_PROMPT = `You are a session summarizer. Analyze this coding session transcript and output a structured summary.
75
+
76
+ <summary>
77
+ <request>What the user originally asked for (1-2 sentences)</request>
78
+ <investigated>What was explored or researched (1-2 sentences)</investigated>
79
+ <learned>Key insights or discoveries (1-2 sentences)</learned>
80
+ <completed>What was actually accomplished (1-2 sentences)</completed>
81
+ <next_steps>What should happen next (1-2 sentences)</next_steps>
82
+ </summary>
83
+
84
+ Rules:
85
+ - Be concise and specific
86
+ - Focus on outcomes, not process
87
+ - If a section has nothing relevant, write "None"`;
88
+
89
+ // =============================================================================
90
+ // Transcript Preparation
91
+ // =============================================================================
92
+
93
+ function prepareTranscript(messages: TranscriptMessage[]): string {
94
+ const recent = messages.slice(-MAX_TRANSCRIPT_MESSAGES);
95
+ const lines: string[] = [];
96
+ let charCount = 0;
97
+ const charBudget = MAX_TRANSCRIPT_TOKENS * 4; // ~4 chars per token
98
+
99
+ for (const msg of recent) {
100
+ if (charCount >= charBudget) break;
101
+
102
+ const maxChars = msg.role === "user" ? MAX_USER_MSG_CHARS : MAX_ASSISTANT_MSG_CHARS;
103
+ const content = msg.content.length > maxChars
104
+ ? msg.content.slice(0, maxChars) + "..."
105
+ : msg.content;
106
+
107
+ const line = `[${msg.role}]: ${content}`;
108
+ lines.push(line);
109
+ charCount += line.length;
110
+ }
111
+
112
+ return lines.join("\n");
113
+ }
114
+
115
+ // =============================================================================
116
+ // XML Parsers
117
+ // =============================================================================
118
+
119
+ const VALID_OBSERVATION_TYPES = new Set([
120
+ "decision", "bugfix", "feature", "refactor", "discovery", "change",
121
+ ]);
122
+
123
+ const VALID_CONCEPTS = new Set([
124
+ "how-it-works", "why-it-exists", "what-changed", "problem-solution",
125
+ "gotcha", "pattern", "trade-off",
126
+ ]);
127
+
128
+ export function parseObservationXml(xml: string): Observation | null {
129
+ const typeMatch = xml.match(/<type>\s*(.*?)\s*<\/type>/s);
130
+ const titleMatch = xml.match(/<title>\s*(.*?)\s*<\/title>/s);
131
+ const narrativeMatch = xml.match(/<narrative>\s*(.*?)\s*<\/narrative>/s);
132
+
133
+ if (!typeMatch?.[1] || !titleMatch?.[1]) return null;
134
+
135
+ const type = typeMatch[1].trim().toLowerCase();
136
+ if (!VALID_OBSERVATION_TYPES.has(type)) return null;
137
+
138
+ const facts = extractMultiple(xml, "fact");
139
+ const concepts = extractMultiple(xml, "concept")
140
+ .filter(c => VALID_CONCEPTS.has(c.toLowerCase()))
141
+ .map(c => c.toLowerCase());
142
+ const filesRead = extractMultiple(xml, "file", "files_read");
143
+ const filesModified = extractMultiple(xml, "file", "files_modified");
144
+
145
+ return {
146
+ type: type as Observation["type"],
147
+ title: titleMatch[1].trim().slice(0, 80),
148
+ facts: facts.filter(f => f.length >= 5),
149
+ narrative: narrativeMatch?.[1]?.trim() || "",
150
+ concepts,
151
+ filesRead,
152
+ filesModified,
153
+ };
154
+ }
155
+
156
+ export function parseSummaryXml(xml: string): SessionSummary | null {
157
+ const request = extractSingle(xml, "request");
158
+ const investigated = extractSingle(xml, "investigated");
159
+ const learned = extractSingle(xml, "learned");
160
+ const completed = extractSingle(xml, "completed");
161
+ const nextSteps = extractSingle(xml, "next_steps");
162
+
163
+ if (!request && !completed) return null;
164
+
165
+ return {
166
+ request: request || "Unknown",
167
+ investigated: investigated || "None",
168
+ learned: learned || "None",
169
+ completed: completed || "None",
170
+ nextSteps: nextSteps || "None",
171
+ };
172
+ }
173
+
174
+ function extractSingle(xml: string, tag: string): string | null {
175
+ const match = xml.match(new RegExp(`<${tag}>\\s*(.*?)\\s*</${tag}>`, "s"));
176
+ return match?.[1]?.trim() || null;
177
+ }
178
+
179
+ function extractMultiple(xml: string, tag: string, parentTag?: string): string[] {
180
+ let scope = xml;
181
+ if (parentTag) {
182
+ const parentMatch = xml.match(new RegExp(`<${parentTag}>([\\s\\S]*?)</${parentTag}>`, "s"));
183
+ if (!parentMatch?.[1]) return [];
184
+ scope = parentMatch[1];
185
+ }
186
+
187
+ const results: string[] = [];
188
+ const regex = new RegExp(`<${tag}>\\s*(.*?)\\s*</${tag}>`, "gs");
189
+ let match;
190
+ while ((match = regex.exec(scope)) !== null) {
191
+ const text = match[1]?.trim();
192
+ if (text) results.push(text);
193
+ }
194
+ return results;
195
+ }
196
+
197
+ // =============================================================================
198
+ // Core Extraction Functions
199
+ // =============================================================================
200
+
201
+ export async function extractObservations(
202
+ messages: TranscriptMessage[]
203
+ ): Promise<Observation[]> {
204
+ if (messages.length < 4) return [];
205
+
206
+ const transcript = prepareTranscript(messages);
207
+ const prompt = `${OBSERVATION_SYSTEM_PROMPT}\n\n--- TRANSCRIPT ---\n${transcript}\n--- END TRANSCRIPT ---\n\nExtract observations:`;
208
+
209
+ const controller = new AbortController();
210
+ const timer = setTimeout(() => controller.abort(), MAX_LLM_GENERATE_TIMEOUT_MS);
211
+ try {
212
+ const llm = getDefaultLlamaCpp();
213
+ const result = await llm.generate(prompt, {
214
+ maxTokens: GENERATION_MAX_TOKENS,
215
+ temperature: GENERATION_TEMPERATURE,
216
+ signal: controller.signal,
217
+ });
218
+
219
+ if (!result?.text) return [];
220
+
221
+ // Parse all <observation>...</observation> blocks
222
+ const observations: Observation[] = [];
223
+ const regex = /<observation>([\s\S]*?)<\/observation>/g;
224
+ let match;
225
+ while ((match = regex.exec(result.text)) !== null) {
226
+ const obs = parseObservationXml(match[1]!);
227
+ if (obs) observations.push(obs);
228
+ }
229
+
230
+ return observations;
231
+ } catch (err) {
232
+ console.error("Observer: observation extraction failed:", err);
233
+ return [];
234
+ } finally {
235
+ clearTimeout(timer);
236
+ }
237
+ }
238
+
239
+ export async function extractSummary(
240
+ messages: TranscriptMessage[]
241
+ ): Promise<SessionSummary | null> {
242
+ if (messages.length < 4) return null;
243
+
244
+ const transcript = prepareTranscript(messages);
245
+ const prompt = `${SUMMARY_SYSTEM_PROMPT}\n\n--- TRANSCRIPT ---\n${transcript}\n--- END TRANSCRIPT ---\n\nGenerate summary:`;
246
+
247
+ const controller = new AbortController();
248
+ const timer = setTimeout(() => controller.abort(), MAX_LLM_GENERATE_TIMEOUT_MS);
249
+ try {
250
+ const llm = getDefaultLlamaCpp();
251
+ const result = await llm.generate(prompt, {
252
+ maxTokens: 500,
253
+ temperature: GENERATION_TEMPERATURE,
254
+ signal: controller.signal,
255
+ });
256
+
257
+ if (!result?.text) return null;
258
+
259
+ const summaryMatch = result.text.match(/<summary>([\s\S]*?)<\/summary>/);
260
+ if (!summaryMatch?.[1]) return null;
261
+
262
+ return parseSummaryXml(summaryMatch[1]);
263
+ } catch (err) {
264
+ console.error("Observer: summary extraction failed:", err);
265
+ return null;
266
+ } finally {
267
+ clearTimeout(timer);
268
+ }
269
+ }