openclaw-mem 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,6 +30,19 @@ npm install
30
30
  npm install openclaw-mem
31
31
  ```
32
32
 
33
+ > ⚠️ **Important:** npm installation does NOT automatically prompt for API key configuration. You MUST manually configure your DeepSeek API key after installation. See [Configuration](#configuration) section below.
34
+
35
+ **After npm install, choose one of these methods:**
36
+
37
+ ```bash
38
+ # Method 1: Run the setup wizard
39
+ npx openclaw-mem-setup
40
+
41
+ # Method 2: Set environment variable directly
42
+ export DEEPSEEK_API_KEY="your-deepseek-api-key"
43
+ # Add this line to your ~/.bashrc or ~/.zshrc to persist
44
+ ```
45
+
33
46
  ## Quick Start
34
47
 
35
48
  1. **Install the hook** (see above)
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Backfill embeddings for existing observations.
4
+ * Run manually: node backfill-embeddings.js
5
+ *
6
+ * Finds all observations without embeddings and generates them
7
+ * in batches of 16 using the DeepSeek embeddings API.
8
+ */
9
+
10
+ import database from './database.js';
11
+ import { batchEmbeddings } from './gateway-llm.js';
12
+
13
+ const BATCH_SIZE = 16;
14
+
15
+ async function backfill() {
16
+ const totalObs = database.getStats().total_observations;
17
+ const existingEmbeddings = database.getEmbeddingCount();
18
+ console.log(`Total observations: ${totalObs}`);
19
+ console.log(`Existing embeddings: ${existingEmbeddings}`);
20
+ console.log(`Missing: ~${totalObs - existingEmbeddings}`);
21
+ console.log('');
22
+
23
+ let processed = 0;
24
+ let saved = 0;
25
+ let failed = 0;
26
+
27
+ while (true) {
28
+ const batch = database.getObservationsWithoutEmbeddings(BATCH_SIZE);
29
+ if (batch.length === 0) break;
30
+
31
+ // Build text for each observation
32
+ const texts = batch.map(obs => {
33
+ const parts = [obs.summary, obs.narrative].filter(Boolean);
34
+ return parts.join(' ').trim() || `Observation #${obs.id}`;
35
+ });
36
+
37
+ console.log(`Batch ${Math.floor(processed / BATCH_SIZE) + 1}: generating embeddings for ${batch.length} observations (IDs ${batch[0].id}-${batch[batch.length - 1].id})...`);
38
+
39
+ const embeddings = await batchEmbeddings(texts);
40
+
41
+ for (let i = 0; i < batch.length; i++) {
42
+ const obs = batch[i];
43
+ const embedding = embeddings[i];
44
+
45
+ if (embedding) {
46
+ const result = database.saveEmbedding(obs.id, embedding);
47
+ if (result.success) {
48
+ saved++;
49
+ } else {
50
+ failed++;
51
+ console.error(` Failed to save embedding for #${obs.id}: ${result.error}`);
52
+ }
53
+ } else {
54
+ failed++;
55
+ console.error(` No embedding returned for #${obs.id}`);
56
+ }
57
+ }
58
+
59
+ processed += batch.length;
60
+ console.log(` Progress: ${saved} saved, ${failed} failed, ${processed} processed`);
61
+
62
+ // Small delay between batches to avoid rate limiting
63
+ if (batch.length === BATCH_SIZE) {
64
+ await new Promise(r => setTimeout(r, 500));
65
+ }
66
+ }
67
+
68
+ console.log('');
69
+ console.log('=== Backfill Complete ===');
70
+ console.log(`Processed: ${processed}`);
71
+ console.log(`Saved: ${saved}`);
72
+ console.log(`Failed: ${failed}`);
73
+ console.log(`Total embeddings now: ${database.getEmbeddingCount()}`);
74
+ }
75
+
76
+ backfill().catch(err => {
77
+ console.error('Backfill failed:', err);
78
+ process.exit(1);
79
+ });
package/database.js CHANGED
@@ -7,6 +7,7 @@ import fs from 'node:fs';
7
7
  import path from 'node:path';
8
8
  import os from 'node:os';
9
9
  import Database from 'better-sqlite3';
10
+ import * as sqliteVec from 'sqlite-vec';
10
11
 
11
12
  const DATA_DIR = path.join(os.homedir(), '.openclaw-mem');
12
13
  const DB_PATH = path.join(DATA_DIR, 'memory.db');
@@ -20,6 +21,14 @@ if (!fs.existsSync(DATA_DIR)) {
20
21
  const db = new Database(DB_PATH);
21
22
  db.pragma('journal_mode = WAL');
22
23
 
24
+ // Load sqlite-vec extension for vector search
25
+ try {
26
+ sqliteVec.load(db);
27
+ console.log('[openclaw-mem] sqlite-vec extension loaded');
28
+ } catch (e) {
29
+ console.error('[openclaw-mem] Failed to load sqlite-vec:', e.message);
30
+ }
31
+
23
32
  // Create tables (base schema without new columns for backward compatibility)
24
33
  db.exec(`
25
34
  -- Sessions table
@@ -168,6 +177,28 @@ db.exec(`
168
177
  END;
169
178
  `);
170
179
 
180
+ // Create vec0 virtual table for vector embeddings
181
+ // Drop and recreate if dimension mismatch (migration from 768/1024 to 384)
182
+ try {
183
+ const vecInfo = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='observation_embeddings'`).get();
184
+ if (vecInfo && !vecInfo.sql.includes('float[384]')) {
185
+ console.log('[openclaw-mem] Recreating vec0 table with 384 dimensions...');
186
+ db.exec(`DROP TABLE IF EXISTS observation_embeddings`);
187
+ }
188
+ } catch (e) { /* table doesn't exist yet */ }
189
+
190
+ try {
191
+ db.exec(`
192
+ CREATE VIRTUAL TABLE IF NOT EXISTS observation_embeddings USING vec0(
193
+ observation_id INTEGER PRIMARY KEY,
194
+ embedding float[384]
195
+ );
196
+ `);
197
+ console.log('[openclaw-mem] observation_embeddings vec0 table ready');
198
+ } catch (e) {
199
+ console.error('[openclaw-mem] Failed to create vec0 table:', e.message);
200
+ }
201
+
171
202
  // Prepared statements
172
203
  const stmts = {
173
204
  // Sessions
@@ -261,8 +292,8 @@ const stmts = {
261
292
 
262
293
  // Summaries
263
294
  saveSummary: db.prepare(`
264
- INSERT INTO summaries (session_id, content, request, learned, completed, next_steps)
265
- VALUES (?, ?, ?, ?, ?, ?)
295
+ INSERT INTO summaries (session_id, content, request, investigated, learned, completed, next_steps)
296
+ VALUES (?, ?, ?, ?, ?, ?, ?)
266
297
  `),
267
298
 
268
299
  getRecentSummaries: db.prepare(`
@@ -290,6 +321,34 @@ const stmts = {
290
321
  LIMIT 1
291
322
  `),
292
323
 
324
+ // Embedding operations
325
+ saveEmbedding: db.prepare(`
326
+ INSERT OR REPLACE INTO observation_embeddings (observation_id, embedding)
327
+ VALUES (?, ?)
328
+ `),
329
+
330
+ searchByVector: db.prepare(`
331
+ SELECT observation_id, distance
332
+ FROM observation_embeddings
333
+ WHERE embedding MATCH ?
334
+ AND k = ?
335
+ ORDER BY distance
336
+ `),
337
+
338
+ getEmbeddingCount: db.prepare(`
339
+ SELECT COUNT(*) as count FROM observation_embeddings
340
+ `),
341
+
342
+ getObservationsWithoutEmbeddings: db.prepare(`
343
+ SELECT o.id, o.summary, o.narrative
344
+ FROM observations o
345
+ LEFT JOIN observation_embeddings oe ON o.id = oe.observation_id
346
+ WHERE oe.observation_id IS NULL
347
+ AND (o.summary IS NOT NULL OR o.narrative IS NOT NULL)
348
+ ORDER BY o.id
349
+ LIMIT ?
350
+ `),
351
+
293
352
  // Stats
294
353
  getStats: db.prepare(`
295
354
  SELECT
@@ -298,7 +357,8 @@ const stmts = {
298
357
  (SELECT COUNT(*) FROM summaries) as total_summaries,
299
358
  (SELECT COUNT(*) FROM user_prompts) as total_user_prompts,
300
359
  (SELECT SUM(tokens_discovery) FROM observations) as total_discovery_tokens,
301
- (SELECT SUM(tokens_read) FROM observations) as total_read_tokens
360
+ (SELECT SUM(tokens_read) FROM observations) as total_read_tokens,
361
+ (SELECT COUNT(*) FROM observation_embeddings) as total_embeddings
302
362
  `)
303
363
  };
304
364
 
@@ -489,8 +549,8 @@ export const database = {
489
549
  },
490
550
 
491
551
  // Summary operations
492
- saveSummary(sessionId, content, request = null, learned = null, completed = null, nextSteps = null) {
493
- const result = stmts.saveSummary.run(sessionId, content, request, learned, completed, nextSteps);
552
+ saveSummary(sessionId, content, request = null, investigated = null, learned = null, completed = null, nextSteps = null) {
553
+ const result = stmts.saveSummary.run(sessionId, content, request, investigated, learned, completed, nextSteps);
494
554
  return { success: true, id: result.lastInsertRowid };
495
555
  },
496
556
 
@@ -506,6 +566,51 @@ export const database = {
506
566
  return stmts.getSummaryBySessionKey.get(sessionKey);
507
567
  },
508
568
 
569
+ // Embedding operations
570
+ saveEmbedding(observationId, embedding) {
571
+ try {
572
+ // sqlite-vec expects Float32Array directly, not Buffer
573
+ const vec = embedding instanceof Float32Array
574
+ ? embedding
575
+ : new Float32Array(embedding);
576
+ stmts.saveEmbedding.run(BigInt(observationId), vec);
577
+ return { success: true };
578
+ } catch (err) {
579
+ console.error('[openclaw-mem] saveEmbedding error:', err.message);
580
+ return { success: false, error: err.message };
581
+ }
582
+ },
583
+
584
+ searchByVector(embedding, limit = 20) {
585
+ try {
586
+ const vec = embedding instanceof Float32Array
587
+ ? embedding
588
+ : new Float32Array(embedding);
589
+ const rows = stmts.searchByVector.all(vec, limit);
590
+ return rows;
591
+ } catch (err) {
592
+ console.error('[openclaw-mem] searchByVector error:', err.message);
593
+ return [];
594
+ }
595
+ },
596
+
597
+ getEmbeddingCount() {
598
+ try {
599
+ return stmts.getEmbeddingCount.get().count;
600
+ } catch {
601
+ return 0;
602
+ }
603
+ },
604
+
605
+ getObservationsWithoutEmbeddings(limit = 100) {
606
+ try {
607
+ return stmts.getObservationsWithoutEmbeddings.all(limit);
608
+ } catch (err) {
609
+ console.error('[openclaw-mem] getObservationsWithoutEmbeddings error:', err.message);
610
+ return [];
611
+ }
612
+ },
613
+
509
614
  // Stats
510
615
  getStats() {
511
616
  return stmts.getStats.get();
package/extractor.js CHANGED
@@ -1,19 +1,31 @@
1
1
  /**
2
2
  * OpenClaw-Mem LLM Extractor
3
- * Uses the local OpenClaw Gateway model to extract concepts and metadata
3
+ *
4
+ * Structured observation extraction inspired by claude-mem's observer agent pattern.
5
+ * Uses DeepSeek API to produce rich, searchable memory records.
4
6
  */
5
7
 
6
8
  import { callGatewayChat } from './gateway-llm.js';
7
9
 
8
- // Cache for extracted concepts (to avoid repeated API calls)
10
+ // ── Valid concept categories (fixed taxonomy for consistent search) ──
11
+ const VALID_CONCEPTS = [
12
+ 'how-it-works', // understanding mechanisms
13
+ 'why-it-exists', // purpose or rationale
14
+ 'what-changed', // modifications made
15
+ 'problem-solution', // issues and their fixes
16
+ 'gotcha', // traps or edge cases
17
+ 'pattern', // reusable approach
18
+ 'trade-off' // pros/cons of a decision
19
+ ];
20
+
21
+ // ── Cache ──
9
22
  const conceptCache = new Map();
10
23
  const CACHE_MAX_SIZE = 1000;
11
24
  const CACHE_TTL = 60 * 60 * 1000; // 1 hour
12
25
 
13
26
  function getCacheKey(text) {
14
- // Simple hash for cache key
15
27
  let hash = 0;
16
- const str = text.slice(0, 500); // Only hash first 500 chars
28
+ const str = text.slice(0, 500);
17
29
  for (let i = 0; i < str.length; i++) {
18
30
  const char = str.charCodeAt(i);
19
31
  hash = ((hash << 5) - hash) + char;
@@ -30,7 +42,6 @@ function cleanCache() {
30
42
  conceptCache.delete(key);
31
43
  }
32
44
  }
33
- // If still too large, remove oldest entries
34
45
  if (conceptCache.size > CACHE_MAX_SIZE) {
35
46
  const entries = [...conceptCache.entries()];
36
47
  entries.sort((a, b) => a[1].timestamp - b[1].timestamp);
@@ -43,17 +54,13 @@ function cleanCache() {
43
54
  }
44
55
 
45
56
  /**
46
- * Extract concepts/keywords from text using LLM
47
- * @param {string} text - The text to extract concepts from
48
- * @param {object} options - Options
49
- * @returns {Promise<string[]>} - Array of extracted concepts
57
+ * Extract concepts from text using LLM
50
58
  */
51
59
  export async function extractConcepts(text, options = {}) {
52
60
  if (!text || typeof text !== 'string' || text.trim().length < 10) {
53
61
  return [];
54
62
  }
55
63
 
56
- // Check cache first
57
64
  const cacheKey = getCacheKey(text);
58
65
  const cached = conceptCache.get(cacheKey);
59
66
  if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
@@ -61,123 +68,174 @@ export async function extractConcepts(text, options = {}) {
61
68
  }
62
69
 
63
70
  try {
64
- const content = await callGatewayChat([{
65
- role: 'user',
66
- content: `Extract 3-7 key concepts/topics from this text. Return ONLY a JSON array of strings, no explanation.
67
-
68
- Text: "${text.slice(0, 800)}"
71
+ const content = await callGatewayChat([
72
+ {
73
+ role: 'system',
74
+ content: `You are a knowledge classifier. Categorize the given text into 2-4 concept categories from this fixed list:
75
+ - how-it-works: understanding mechanisms or implementation details
76
+ - why-it-exists: purpose, rationale, or motivation
77
+ - what-changed: modifications, updates, or configuration changes
78
+ - problem-solution: issues encountered and their fixes
79
+ - gotcha: traps, edge cases, or surprising behavior
80
+ - pattern: reusable approaches or best practices
81
+ - trade-off: pros/cons analysis or design decisions
69
82
 
70
- JSON array:`
71
- }], { sessionKey: 'extract-concepts', temperature: 0.2, max_tokens: 200 });
83
+ Return ONLY a JSON array of matching categories. No explanation.`
84
+ },
85
+ {
86
+ role: 'user',
87
+ content: text.slice(0, 2000)
88
+ }
89
+ ], { sessionKey: 'extract-concepts', temperature: 0.1, max_tokens: 100 });
72
90
 
73
91
  if (!content) return [];
74
- // Parse JSON array from response
92
+
75
93
  let concepts = [];
76
94
  try {
77
- // Try to extract JSON array from response
78
95
  const match = content.match(/\[[\s\S]*?\]/);
79
96
  if (match) {
80
97
  concepts = JSON.parse(match[0]);
81
98
  }
82
99
  } catch (parseErr) {
83
- console.error('[openclaw-mem] Failed to parse LLM response:', parseErr.message);
100
+ console.error('[openclaw-mem] Failed to parse concepts response:', parseErr.message);
84
101
  return [];
85
102
  }
86
103
 
87
- // Validate and clean concepts
104
+ // Validate against fixed taxonomy
88
105
  concepts = concepts
89
- .filter(c => typeof c === 'string' && c.length > 1 && c.length < 50)
106
+ .filter(c => typeof c === 'string')
90
107
  .map(c => c.trim().toLowerCase())
91
- .slice(0, 7);
108
+ .filter(c => VALID_CONCEPTS.includes(c))
109
+ .slice(0, 4);
92
110
 
93
- // Cache the result
94
111
  cleanCache();
95
- conceptCache.set(cacheKey, {
96
- concepts,
97
- timestamp: Date.now()
98
- });
99
-
112
+ conceptCache.set(cacheKey, { concepts, timestamp: Date.now() });
100
113
  return concepts;
101
114
  } catch (err) {
102
- console.error('[openclaw-mem] LLM extraction error:', err.message);
115
+ console.error('[openclaw-mem] Concept extraction error:', err.message);
103
116
  return [];
104
117
  }
105
118
  }
106
119
 
107
120
  /**
108
- * Extract structured information from a tool call
109
- * @param {object} data - Tool call data
110
- * @returns {Promise<object>} - Extracted information
121
+ * Extract structured observation from a tool call
122
+ *
123
+ * Produces rich, searchable records with:
124
+ * - Accurate type classification
125
+ * - Descriptive title (short, action-oriented)
126
+ * - Detailed narrative (what happened, how it works, why it matters)
127
+ * - Structured facts (self-contained, grep-friendly)
128
+ * - Fixed concept categories
111
129
  */
112
130
  export async function extractFromToolCall(data) {
113
131
  const { tool_name, tool_input, tool_response, filesRead, filesModified } = data;
114
132
 
115
- // Build context for extraction
133
+ // Provide generous context (2000 chars each, not 300)
116
134
  const inputStr = typeof tool_input === 'string'
117
- ? tool_input.slice(0, 300)
118
- : JSON.stringify(tool_input).slice(0, 300);
135
+ ? tool_input.slice(0, 2000)
136
+ : JSON.stringify(tool_input, null, 0).slice(0, 2000);
119
137
 
120
138
  const responseStr = typeof tool_response === 'string'
121
- ? tool_response.slice(0, 300)
122
- : JSON.stringify(tool_response).slice(0, 300);
139
+ ? tool_response.slice(0, 2000)
140
+ : JSON.stringify(tool_response, null, 0).slice(0, 2000);
123
141
 
124
142
  try {
125
- const content = await callGatewayChat([{
126
- role: 'user',
127
- content: `Analyze this tool call and extract structured information. Return ONLY valid JSON.
143
+ const content = await callGatewayChat([
144
+ {
145
+ role: 'system',
146
+ content: `You are OpenClaw-Mem, a specialized observer that creates searchable memory records for FUTURE SESSIONS.
147
+
148
+ Your job: analyze a tool call and produce a structured observation capturing what was LEARNED, BUILT, FIXED, or CONFIGURED.
149
+
150
+ RULES:
151
+ - Record deliverables and capabilities, not process steps
152
+ - Use action verbs: implemented, fixed, deployed, configured, migrated, optimized, discovered, decided
153
+ - The "narrative" field is the most important: explain WHAT happened, HOW it works, and WHY it matters
154
+ - Facts must be self-contained statements (each fact should make sense without the others)
155
+ - Title should be a short noun phrase (3-10 words) capturing the core topic
156
+
157
+ TYPE DEFINITIONS (pick exactly one):
158
+ - bugfix: something was broken and is now fixed
159
+ - feature: new capability or functionality added
160
+ - refactor: code restructured without behavior change
161
+ - change: generic modification (docs, config, dependencies)
162
+ - discovery: learning about existing system, reading code, exploring
163
+ - decision: architectural or design choice with rationale
128
164
 
129
- Tool: ${tool_name}
165
+ CONCEPT CATEGORIES (pick 1-3):
166
+ - how-it-works: understanding mechanisms
167
+ - why-it-exists: purpose or rationale
168
+ - what-changed: modifications made
169
+ - problem-solution: issues and their fixes
170
+ - gotcha: traps or edge cases
171
+ - pattern: reusable approach
172
+ - trade-off: pros/cons of a decision
173
+
174
+ Return ONLY valid JSON, no markdown fences, no explanation.`
175
+ },
176
+ {
177
+ role: 'user',
178
+ content: `Tool: ${tool_name}
130
179
  Input: ${inputStr}
131
180
  Output: ${responseStr}
132
181
  Files read: ${filesRead?.join(', ') || 'none'}
133
182
  Files modified: ${filesModified?.join(', ') || 'none'}
134
183
 
135
- Return JSON with these fields:
184
+ Return JSON:
136
185
  {
137
- "type": "decision|bugfix|feature|refactor|discovery|testing|setup|other",
138
- "narrative": "One sentence describing what happened",
139
- "facts": ["fact1", "fact2"],
140
- "concepts": ["keyword1", "keyword2", "keyword3"]
141
- }
142
-
143
- JSON:`
144
- }], { sessionKey: 'extract-toolcall', temperature: 0.2, max_tokens: 300 });
186
+ "type": "one of: bugfix|feature|refactor|change|discovery|decision",
187
+ "title": "Short descriptive title (3-10 words)",
188
+ "narrative": "2-4 sentences: what was done, how it works, why it matters. Be specific and include key details.",
189
+ "facts": ["Self-contained fact 1", "Self-contained fact 2", "...up to 5"],
190
+ "concepts": ["category1", "category2"]
191
+ }`
192
+ }
193
+ ], { sessionKey: 'extract-toolcall', temperature: 0.2, max_tokens: 800 });
145
194
 
146
195
  if (!content) throw new Error('empty response');
147
196
 
148
- // Parse JSON from response
149
197
  const match = content.match(/\{[\s\S]*\}/);
150
198
  if (match) {
151
199
  const result = JSON.parse(match[0]);
200
+
201
+ // Validate type
202
+ const validTypes = ['bugfix', 'feature', 'refactor', 'change', 'discovery', 'decision'];
203
+ const type = validTypes.includes(result.type) ? result.type : 'discovery';
204
+
205
+ // Validate concepts against fixed taxonomy
206
+ const concepts = Array.isArray(result.concepts)
207
+ ? result.concepts.filter(c => VALID_CONCEPTS.includes(c)).slice(0, 3)
208
+ : [];
209
+
152
210
  return {
153
- type: result.type || 'other',
154
- narrative: result.narrative || '',
155
- facts: Array.isArray(result.facts) ? result.facts.slice(0, 5) : [],
156
- concepts: Array.isArray(result.concepts) ? result.concepts.slice(0, 7) : []
211
+ type,
212
+ title: (result.title || '').slice(0, 120),
213
+ narrative: (result.narrative || '').slice(0, 1000),
214
+ facts: Array.isArray(result.facts)
215
+ ? result.facts.filter(f => typeof f === 'string').slice(0, 5)
216
+ : [],
217
+ concepts: concepts.length > 0 ? concepts : ['how-it-works']
157
218
  };
158
219
  }
159
220
  } catch (err) {
160
221
  console.error('[openclaw-mem] Tool extraction error:', err.message);
161
222
  }
162
223
 
163
- // Return empty result on error
164
224
  return {
165
- type: 'other',
225
+ type: 'discovery',
226
+ title: '',
166
227
  narrative: '',
167
228
  facts: [],
168
- concepts: []
229
+ concepts: ['how-it-works']
169
230
  };
170
231
  }
171
232
 
172
233
  /**
173
234
  * Batch extract concepts from multiple texts
174
- * @param {string[]} texts - Array of texts to extract from
175
- * @returns {Promise<Map<string, string[]>>} - Map of text to concepts
176
235
  */
177
236
  export async function batchExtractConcepts(texts) {
178
237
  const results = new Map();
179
238
 
180
- // Filter out cached results first
181
239
  const uncached = [];
182
240
  for (const text of texts) {
183
241
  const cacheKey = getCacheKey(text);
@@ -189,7 +247,6 @@ export async function batchExtractConcepts(texts) {
189
247
  }
190
248
  }
191
249
 
192
- // Process uncached in batches
193
250
  const BATCH_SIZE = 5;
194
251
  for (let i = 0; i < uncached.length; i += BATCH_SIZE) {
195
252
  const batch = uncached.slice(i, i + BATCH_SIZE);
package/gateway-llm.js CHANGED
@@ -27,7 +27,7 @@ function truncateText(text, maxChars) {
27
27
  return text.slice(0, maxChars) + '…';
28
28
  }
29
29
 
30
- function formatTranscript(messages, maxChars = 8000) {
30
+ function formatTranscript(messages, maxChars = 12000) {
31
31
  const lines = [];
32
32
  for (const m of messages) {
33
33
  const role = (m.role || 'unknown').toUpperCase();
@@ -60,6 +60,7 @@ function normalizeSummaryFields(obj) {
60
60
  };
61
61
  return {
62
62
  request: pick('request'),
63
+ investigated: pick('investigated'),
63
64
  learned: pick('learned'),
64
65
  completed: pick('completed'),
65
66
  next_steps: pick('next_steps')
@@ -118,20 +119,37 @@ async function callGatewayChat(messages, options = {}) {
118
119
 
119
120
  export async function summarizeSession(messages, options = {}) {
120
121
  const { sessionKey = 'unknown' } = options;
121
- const transcript = formatTranscript(messages);
122
+ const transcript = formatTranscript(messages, 12000);
122
123
  if (!transcript) return null;
123
124
 
124
125
  const buildPrompts = (strict = false) => {
125
- const systemPrompt = [
126
- '你是一个对话总结助手。请用中文总结这段对话,返回一个 JSON 对象,包含以下字段:',
127
- '- request: 用户的主要问题或需求(一句话)',
128
- '- learned: 用户从对话中学到了什么',
129
- '- completed: 完成了什么任务或解答',
130
- '- next_steps: 建议的下一步行动',
131
- '只返回 JSON 对象,不要 markdown 代码块,不要其他内容。',
132
- strict ? '重要:只输出纯 JSON,不要任何额外文字。' : ''
133
- ].filter(Boolean).join('\n');
134
- const userPrompt = '对话记录:\n' + transcript + '\n\nJSON:';
126
+ const systemPrompt = `You are a session summarizer for an AI agent memory system. Your summaries help the agent recall past work in future sessions.
127
+
128
+ INSTRUCTIONS:
129
+ - Focus on OUTCOMES and DELIVERABLES, not conversational flow
130
+ - Use action verbs: implemented, fixed, configured, discovered, decided, explored
131
+ - Be specific: include file names, tool names, error messages, key decisions
132
+ - Write in the language the user used (Chinese if they spoke Chinese, English if English)
133
+
134
+ OUTPUT FORMAT: Return ONLY a valid JSON object with these fields:
135
+ {
136
+ "request": "What the user wanted to accomplish (1 sentence, specific)",
137
+ "investigated": "What was explored or researched to fulfill the request",
138
+ "learned": "Key technical insights, discoveries, or new understanding gained",
139
+ "completed": "Concrete deliverables: what was built, fixed, configured, or decided",
140
+ "next_steps": "Unfinished work or logical follow-up actions (null if fully completed)"
141
+ }
142
+
143
+ QUALITY GUIDELINES:
144
+ - "request" should capture the real goal, not just "user asked a question"
145
+ - "investigated" should list specific files read, APIs explored, architectures examined
146
+ - "learned" should contain reusable knowledge (not "learned how to do X" but the actual insight)
147
+ - "completed" should be a concrete outcome someone can verify
148
+ - "next_steps" should be actionable, not vague
149
+
150
+ ${strict ? 'CRITICAL: Output ONLY the JSON object. No markdown, no explanation, no code fences.' : ''}`;
151
+
152
+ const userPrompt = 'Session transcript:\n' + transcript + '\n\nJSON:';
135
153
  return [
136
154
  { role: 'system', content: systemPrompt },
137
155
  { role: 'user', content: userPrompt }
@@ -139,17 +157,94 @@ export async function summarizeSession(messages, options = {}) {
139
157
  };
140
158
 
141
159
  // First attempt
142
- let content = await callGatewayChat(buildPrompts(false), { sessionKey, temperature: 0.2, max_tokens: 300 });
160
+ let content = await callGatewayChat(buildPrompts(false), { sessionKey, temperature: 0.2, max_tokens: 600 });
143
161
  let parsed = parseSummaryJson(content || '');
144
162
  if (parsed) return normalizeSummaryFields(parsed);
145
163
 
146
164
  // Retry once with stricter instruction
147
- content = await callGatewayChat(buildPrompts(true), { sessionKey, temperature: 0.2, max_tokens: 300 });
165
+ content = await callGatewayChat(buildPrompts(true), { sessionKey, temperature: 0.1, max_tokens: 600 });
148
166
  parsed = parseSummaryJson(content || '');
149
167
  if (parsed) return normalizeSummaryFields(parsed);
150
168
 
151
169
  return null;
152
170
  }
153
171
 
172
+ // ============ Local Embedding Model (Qwen3-Embedding-0.6B) ============
173
+
174
+ const EMBEDDING_MODEL = 'Xenova/multilingual-e5-small';
175
+ const EMBEDDING_DIMS = 384;
176
+ const EMBEDDING_PREFIX = 'query: ';
177
+
178
+ // Singleton: lazily initialized embedding pipeline
179
+ let _extractorPromise = null;
180
+
181
+ function getExtractor() {
182
+ if (!_extractorPromise) {
183
+ _extractorPromise = (async () => {
184
+ try {
185
+ const { pipeline } = await import('@huggingface/transformers');
186
+ console.log('[openclaw-mem] Loading embedding model (first run downloads ~110MB)...');
187
+ const extractor = await pipeline('feature-extraction', EMBEDDING_MODEL);
188
+ console.log('[openclaw-mem] Embedding model loaded');
189
+ return extractor;
190
+ } catch (err) {
191
+ console.error('[openclaw-mem] Failed to load embedding model:', err.message);
192
+ _extractorPromise = null; // Allow retry
193
+ return null;
194
+ }
195
+ })();
196
+ }
197
+ return _extractorPromise;
198
+ }
199
+
200
+ /**
201
+ * Generate embedding vector for text using local Qwen3-Embedding-0.6B model.
202
+ * Returns Float32Array of 1024 dimensions, or null on failure.
203
+ */
204
+ export async function callGatewayEmbeddings(text) {
205
+ try {
206
+ const extractor = await getExtractor();
207
+ if (!extractor) return null;
208
+
209
+ const input = EMBEDDING_PREFIX + text;
210
+ const output = await extractor(input, {
211
+ pooling: 'mean',
212
+ normalize: true,
213
+ });
214
+
215
+ return new Float32Array(output.data);
216
+ } catch (err) {
217
+ console.error('[openclaw-mem] Embedding generation error:', err.message);
218
+ return null;
219
+ }
220
+ }
221
+
222
+ /**
223
+ * Generate embeddings for multiple texts sequentially.
224
+ * Returns array of Float32Array, or null entries on failure.
225
+ */
226
+ export async function batchEmbeddings(texts) {
227
+ const extractor = await getExtractor();
228
+ if (!extractor) return texts.map(() => null);
229
+
230
+ const results = [];
231
+ for (const text of texts) {
232
+ try {
233
+ const input = EMBEDDING_PREFIX + text;
234
+ const output = await extractor(input, {
235
+ pooling: 'mean',
236
+ normalize: true,
237
+ });
238
+ results.push(new Float32Array(output.data));
239
+ } catch (err) {
240
+ console.error('[openclaw-mem] Batch embedding error:', err.message);
241
+ results.push(null);
242
+ }
243
+ }
244
+ return results;
245
+ }
246
+
247
+ export { EMBEDDING_DIMS };
248
+
154
249
  export const INTERNAL_SUMMARY_PREFIX = SUMMARY_SESSION_PREFIX;
155
250
  export { callGatewayChat };
package/handler.js CHANGED
@@ -14,11 +14,11 @@ import path from 'node:path';
14
14
  import os from 'node:os';
15
15
  import { fileURLToPath } from 'node:url';
16
16
  import { spawn } from 'node:child_process';
17
- import { summarizeSession, INTERNAL_SUMMARY_PREFIX } from './gateway-llm.js';
17
+ import { summarizeSession, INTERNAL_SUMMARY_PREFIX, callGatewayEmbeddings } from './gateway-llm.js';
18
18
 
19
19
  const __dirname = path.dirname(fileURLToPath(import.meta.url));
20
20
  console.log('[openclaw-mem] >>> HANDLER LOADED AT', new Date().toISOString(), '<<<');
21
- const USE_LLM_EXTRACTION = false;
21
+ const USE_LLM_EXTRACTION = true;
22
22
  const SUMMARY_MAX_MESSAGES = 200;
23
23
  const MCP_API_PORT = 18790;
24
24
 
@@ -255,102 +255,9 @@ async function handleAgentBootstrap(event) {
255
255
  console.log('[openclaw-mem] Sample has content:', !!sample.content);
256
256
  }
257
257
 
258
- // ============ NEW: Capture incoming messages to database ============
259
- // This ensures every message through gateway is captured, not just on /new
260
- // Messages can be in: event.messages (array), event.message, or event.context.userMessage
261
- let messagesToCapture = [];
262
-
263
- // ============ Capture messages from session file ============
264
- // At bootstrap time, the incoming message isn't in the event yet
265
- // But we can read the session file which contains previous messages
266
-
267
- // Construct session file path from sessionKey
268
- // Session files are stored at ~/.openclaw/agents/main/sessions/<sessionKey>.jsonl
269
- const agentId = event.context?.agentId || 'main';
270
- const sessionFile = path.join(os.homedir(), '.openclaw', 'agents', agentId, 'sessions', `${sessionKey}.jsonl`);
271
- console.log('[openclaw-mem] Constructed session file path:', sessionFile);
272
-
273
- // Check if session file exists
274
- let sessionFileExists = false;
275
- try {
276
- await fs.access(sessionFile);
277
- sessionFileExists = true;
278
- } catch {
279
- sessionFileExists = false;
280
- }
281
-
282
- if (sessionFileExists) {
283
- console.log('[openclaw-mem] Found session file:', sessionFile);
284
- try {
285
- const messages = await extractSessionContent(sessionFile, 50);
286
- if (messages && messages.length > 0) {
287
- console.log(`[openclaw-mem] Found ${messages.length} messages in session file`);
288
-
289
- // Get or create session for this sessionKey
290
- let dbSessionId = getOrCreateSessionForKey(sessionKey, workspaceDir);
291
-
292
- // Track which messages we've already saved (to avoid duplicates)
293
- const savedHashes = new Set();
294
- try {
295
- const existing = database.getRecentObservations(null, 100);
296
- for (const obs of existing) {
297
- // Content is stored in the 'result' field as JSON
298
- try {
299
- const result = JSON.parse(obs.result || '{}');
300
- if (result.content) {
301
- savedHashes.add(hashContent(result.content));
302
- }
303
- } catch {
304
- // If result isn't JSON, use summary
305
- if (obs.summary) {
306
- savedHashes.add(hashContent(obs.summary));
307
- }
308
- }
309
- }
310
- console.log(`[openclaw-mem] Loaded ${savedHashes.size} existing message hashes`);
311
- } catch (e) {
312
- console.log('[openclaw-mem] Could not check existing observations:', e.message);
313
- }
314
-
315
- let newCount = 0;
316
- for (const msg of messages) {
317
- const contentHash = hashContent(msg.content);
318
- if (savedHashes.has(contentHash)) {
319
- continue; // Skip already saved messages
320
- }
321
-
322
- const toolName = msg.role === 'assistant' ? 'AssistantMessage' : 'UserMessage';
323
- const summary = msg.content.slice(0, 100) + (msg.content.length > 100 ? '...' : '');
324
- database.saveObservation(
325
- dbSessionId,
326
- toolName,
327
- { role: msg.role, sessionKey },
328
- { content: msg.content },
329
- {
330
- summary,
331
- // Use full message text so FTS can index real topics
332
- concepts: msg.content,
333
- tokensDiscovery: estimateTokens(msg.content),
334
- tokensRead: estimateTokens(summary)
335
- }
336
- );
337
- savedHashes.add(contentHash);
338
- newCount++;
339
- }
340
-
341
- if (newCount > 0) {
342
- console.log(`[openclaw-mem] ✓ Saved ${newCount} new messages to database`);
343
- } else {
344
- console.log('[openclaw-mem] All messages already in database');
345
- }
346
- }
347
- } catch (err) {
348
- console.log('[openclaw-mem] Could not read session file:', err.message);
349
- }
350
- } else {
351
- console.log('[openclaw-mem] No session file found in context');
352
- }
353
- // ============ END: Capture messages ============
258
+ // Raw messages are no longer stored individually — session summaries capture the important bits.
259
+ // This eliminates noise from greetings and low-value messages.
260
+ console.log('[openclaw-mem] Skipping per-message capture (handled via session summary)');
354
261
 
355
262
  // Ensure API server is running
356
263
  await startApiServer();
@@ -488,30 +395,8 @@ async function handleCommandNew(event) {
488
395
 
489
396
  if (messages && messages.length > 0) {
490
397
  console.log(`[openclaw-mem] Extracted ${messages.length} messages from session`);
491
-
492
- // Save each message as an observation
493
- for (const msg of messages) {
494
- const toolName = msg.role === 'user' ? 'UserMessage' : 'AssistantMessage';
495
- const summary = msg.content.slice(0, 100) + (msg.content.length > 100 ? '...' : '');
496
-
497
- database.saveObservation(
498
- sessionId,
499
- toolName,
500
- { role: msg.role },
501
- { content: msg.content },
502
- {
503
- summary,
504
- // Use message body for concepts to keep topic search working
505
- concepts: msg.content,
506
- tokensDiscovery: estimateTokens(msg.content),
507
- tokensRead: estimateTokens(summary)
508
- }
509
- );
510
- }
511
-
512
- console.log('[openclaw-mem] Session saved successfully');
513
- console.log('[openclaw-mem] >>> CODE VERSION 2026-02-03-1622 <<<');
514
- console.log('[openclaw-mem] >>> STARTING AI SUMMARY <<<');
398
+ // Raw messages are no longer stored individually — only the AI summary matters.
399
+ console.log('[openclaw-mem] Generating AI summary...');
515
400
 
516
401
  // Generate AI summary using DeepSeek
517
402
  let aiSummary = null;
@@ -528,6 +413,7 @@ async function handleCommandNew(event) {
528
413
  sessionId,
529
414
  summaryContent,
530
415
  aiSummary.request,
416
+ aiSummary.investigated || null,
531
417
  aiSummary.learned,
532
418
  aiSummary.completed,
533
419
  aiSummary.next_steps
@@ -545,6 +431,7 @@ async function handleCommandNew(event) {
545
431
  `Session with ${messages.length} messages`,
546
432
  fallbackRequest,
547
433
  null,
434
+ null,
548
435
  fallbackCompleted ? `Discussed: ${fallbackCompleted}` : null,
549
436
  null
550
437
  );
@@ -558,76 +445,20 @@ async function handleCommandNew(event) {
558
445
 
559
446
  /**
560
447
  * Handle agent:response event
561
- * Capture assistant responses to database
448
+ * Skip storing raw assistant messages session summary at stop/new captures the important bits.
449
+ * This avoids noise from greetings, acknowledgments, and other low-value messages.
562
450
  */
563
451
  async function handleAgentResponse(event) {
564
- console.log('[openclaw-mem] Agent response event');
565
-
566
- if (!await loadModules()) return;
567
-
568
- const sessionKey = event.sessionKey || 'unknown';
569
- const response = event.response || event.message || event.content;
570
- const workspaceDir = event.context?.workspaceDir || path.join(os.homedir(), '.openclaw', 'workspace');
571
-
572
- if (response && typeof response === 'string' && response.trim()) {
573
- console.log('[openclaw-mem] Capturing assistant response:', response.slice(0, 50) + '...');
574
-
575
- let sessionId = getOrCreateSessionForKey(sessionKey, workspaceDir);
576
-
577
- const summary = response.slice(0, 100) + (response.length > 100 ? '...' : '');
578
- database.saveObservation(
579
- sessionId,
580
- 'AssistantMessage',
581
- { role: 'assistant', sessionKey },
582
- { content: response },
583
- {
584
- summary,
585
- // Keep full content in concepts column for better topic recall
586
- concepts: response,
587
- tokensDiscovery: estimateTokens(response),
588
- tokensRead: estimateTokens(summary)
589
- }
590
- );
591
- console.log('[openclaw-mem] ✓ Assistant response saved to database');
592
- }
452
+ console.log('[openclaw-mem] Agent response event (skipped — captured via session summary)');
593
453
  }
594
454
 
595
455
  /**
596
456
  * Handle message events
597
- * Alternative event type for capturing messages
457
+ * Skip storing raw messages session summary at stop/new captures the important bits.
458
+ * This avoids noise from greetings, acknowledgments, and other low-value messages.
598
459
  */
599
460
  async function handleMessage(event) {
600
- console.log('[openclaw-mem] Message event:', event.action || 'unknown');
601
-
602
- if (!await loadModules()) return;
603
-
604
- const sessionKey = event.sessionKey || 'unknown';
605
- const message = event.message || event.content || event.text;
606
- const role = event.role || event.action || 'user';
607
- const workspaceDir = event.context?.workspaceDir || path.join(os.homedir(), '.openclaw', 'workspace');
608
-
609
- if (message && typeof message === 'string' && message.trim() && !message.startsWith('/')) {
610
- console.log(`[openclaw-mem] Capturing ${role} message:`, message.slice(0, 50) + '...');
611
-
612
- let sessionId = getOrCreateSessionForKey(sessionKey, workspaceDir);
613
-
614
- const toolName = role === 'assistant' ? 'AssistantMessage' : 'UserMessage';
615
- const summary = message.slice(0, 100) + (message.length > 100 ? '...' : '');
616
- database.saveObservation(
617
- sessionId,
618
- toolName,
619
- { role, sessionKey },
620
- { content: message },
621
- {
622
- summary,
623
- // Index actual message text (not just role) for topic search
624
- concepts: message,
625
- tokensDiscovery: estimateTokens(message),
626
- tokensRead: estimateTokens(summary)
627
- }
628
- );
629
- console.log(`[openclaw-mem] ✓ ${role} message saved to database`);
630
- }
461
+ console.log('[openclaw-mem] Message event (skipped captured via session summary)');
631
462
  }
632
463
 
633
464
  /**
@@ -830,15 +661,19 @@ async function handleToolPost(event) {
830
661
  extractedNarrative = extracted.narrative || narrative;
831
662
  extractedFacts = extracted.facts;
832
663
  extractedConcepts = extracted.concepts?.join(', ') || extractedConcepts;
664
+ // Use LLM-generated title as summary if available
665
+ if (extracted.title) {
666
+ summary = extracted.title;
667
+ }
833
668
  }
834
- console.log(`[openclaw-mem] LLM extracted: type=${extractedType}, concepts=${extractedConcepts.slice(0, 50)}...`);
669
+ console.log(`[openclaw-mem] LLM extracted: type=${extractedType}, title=${summary.slice(0, 60)}, concepts=${extractedConcepts}`);
835
670
  } catch (err) {
836
671
  console.log(`[openclaw-mem] LLM extraction failed, using fallback: ${err.message}`);
837
672
  }
838
673
  }
839
674
 
840
675
  // Save observation with extended metadata
841
- database.saveObservation(
676
+ const saveResult = database.saveObservation(
842
677
  sessionId,
843
678
  toolName,
844
679
  toolInput,
@@ -849,7 +684,7 @@ async function handleToolPost(event) {
849
684
  tokensDiscovery: estimateTokens(responseStr),
850
685
  tokensRead: estimateTokens(summary),
851
686
  type: extractedType,
852
- narrative: extractedNarrative.slice(0, 500),
687
+ narrative: extractedNarrative.slice(0, 1000),
853
688
  facts: extractedFacts,
854
689
  filesRead: filesRead,
855
690
  filesModified: filesModified
@@ -857,6 +692,21 @@ async function handleToolPost(event) {
857
692
  );
858
693
 
859
694
  console.log(`[openclaw-mem] ✓ Tool ${toolName} recorded (type: ${extractedType})`);
695
+
696
+ // Fire-and-forget: generate embedding for the new observation
697
+ if (saveResult.success && saveResult.id) {
698
+ const embeddingText = [summary, extractedNarrative].filter(Boolean).join(' ').trim();
699
+ if (embeddingText.length > 10) {
700
+ callGatewayEmbeddings(embeddingText).then(embedding => {
701
+ if (embedding) {
702
+ database.saveEmbedding(Number(saveResult.id), embedding);
703
+ console.log(`[openclaw-mem] ✓ Embedding saved for observation #${saveResult.id}`);
704
+ }
705
+ }).catch(err => {
706
+ console.log(`[openclaw-mem] Embedding generation failed: ${err.message}`);
707
+ });
708
+ }
709
+ }
860
710
  }
861
711
 
862
712
  /**
@@ -898,40 +748,7 @@ async function handleUserPromptSubmit(event) {
898
748
  database.saveUserPrompt(sessionId, prompt);
899
749
  console.log(`[openclaw-mem] ✓ User prompt saved (${prompt.slice(0, 50)}...)`);
900
750
 
901
- // Also save as an observation for searchability
902
- const summary = prompt.slice(0, 100) + (prompt.length > 100 ? '...' : '');
903
-
904
- // Try LLM extraction for concepts
905
- let concepts = prompt;
906
- if (USE_LLM_EXTRACTION && extractor && extractor.extractConcepts) {
907
- try {
908
- const extracted = await extractor.extractConcepts(prompt);
909
- if (extracted && extracted.length > 0) {
910
- concepts = extracted.join(', ');
911
- }
912
- } catch (err) {
913
- console.log('[openclaw-mem] LLM extraction failed for prompt:', err.message);
914
- }
915
- }
916
-
917
- database.saveObservation(
918
- sessionId,
919
- 'UserPrompt',
920
- { prompt: prompt.slice(0, 500) },
921
- { recorded: true },
922
- {
923
- summary,
924
- concepts,
925
- tokensDiscovery: estimateTokens(prompt),
926
- tokensRead: estimateTokens(summary),
927
- type: 'user_input',
928
- narrative: `User asked: ${summary}`,
929
- facts: null,
930
- filesRead: null,
931
- filesModified: null
932
- }
933
- );
934
- console.log('[openclaw-mem] ✓ User prompt observation saved');
751
+ // User prompts are saved to user_prompts table only (no observation duplication).
935
752
  }
936
753
 
937
754
  /**
@@ -1014,6 +831,7 @@ async function handleAgentStop(event) {
1014
831
  sessionId,
1015
832
  summaryContent,
1016
833
  summary.request,
834
+ summary.investigated || null,
1017
835
  summary.learned,
1018
836
  summary.completed,
1019
837
  summary.next_steps
@@ -1031,7 +849,8 @@ async function handleAgentStop(event) {
1031
849
  sessionId,
1032
850
  summaryContent,
1033
851
  firstUserMsg,
1034
- '',
852
+ null,
853
+ null,
1035
854
  `Discussed: ${lastAssistant}`,
1036
855
  null
1037
856
  );
package/mcp-http-api.js CHANGED
@@ -9,6 +9,7 @@
9
9
 
10
10
  import http from 'http';
11
11
  import database from './database.js';
12
+ import { callGatewayEmbeddings } from './gateway-llm.js';
12
13
 
13
14
  const PORT = process.env.OPENCLAW_MEM_API_PORT || 18790;
14
15
 
@@ -90,9 +91,53 @@ function normalizeIds(input) {
90
91
  return ids;
91
92
  }
92
93
 
94
+ // ============ Hybrid Search ============
95
+
96
+ function mergeHybridResults(ftsResults, vectorResults, limit) {
97
+ let ftsMin = Infinity, ftsMax = -Infinity;
98
+ for (const r of ftsResults) {
99
+ const rank = Math.abs(r.rank ?? 0);
100
+ if (rank < ftsMin) ftsMin = rank;
101
+ if (rank > ftsMax) ftsMax = rank;
102
+ }
103
+ const ftsRange = ftsMax - ftsMin || 1;
104
+
105
+ const scoreMap = new Map();
106
+
107
+ for (const r of ftsResults) {
108
+ const rank = Math.abs(r.rank ?? 0);
109
+ const ftsScore = 1 - ((rank - ftsMin) / ftsRange);
110
+ scoreMap.set(r.id, { obs: r, ftsScore, vecScore: 0 });
111
+ }
112
+
113
+ for (const v of vectorResults) {
114
+ const vecScore = 1 - (v.distance ?? 0);
115
+ const existing = scoreMap.get(v.observation_id);
116
+ if (existing) {
117
+ existing.vecScore = vecScore;
118
+ } else {
119
+ const obs = database.getObservation(v.observation_id);
120
+ if (obs) {
121
+ scoreMap.set(v.observation_id, { obs, ftsScore: 0, vecScore });
122
+ }
123
+ }
124
+ }
125
+
126
+ const scored = [];
127
+ for (const [id, entry] of scoreMap) {
128
+ const { obs, ftsScore, vecScore } = entry;
129
+ const inBoth = ftsScore > 0 && vecScore > 0;
130
+ const combined = (0.4 * ftsScore) + (0.6 * vecScore) + (inBoth ? 0.2 : 0);
131
+ scored.push({ obs, combined });
132
+ }
133
+
134
+ scored.sort((a, b) => b.combined - a.combined);
135
+ return scored.slice(0, limit).map(s => s.obs);
136
+ }
137
+
93
138
  // ============ API 功能 ============
94
139
 
95
- function search(args = {}) {
140
+ async function search(args = {}) {
96
141
  const query = typeof args === 'string' ? args : (args.query || args.q || '*');
97
142
  const limit = args.limit ?? 30;
98
143
 
@@ -100,7 +145,25 @@ function search(args = {}) {
100
145
  if (query === '*' || !query) {
101
146
  results = database.getRecentObservations(null, limit);
102
147
  } else {
103
- results = database.searchObservations(query, limit);
148
+ // Hybrid search: FTS + vector
149
+ const ftsResults = database.searchObservations(query, limit * 2);
150
+
151
+ let vectorResults = [];
152
+ try {
153
+ const embedding = await callGatewayEmbeddings(query);
154
+ if (embedding) {
155
+ vectorResults = database.searchByVector(embedding, limit * 2);
156
+ }
157
+ } catch (err) {
158
+ console.error('[openclaw-mem-api] Vector search error:', err.message);
159
+ }
160
+
161
+ if (vectorResults.length > 0) {
162
+ results = mergeHybridResults(ftsResults, vectorResults, limit);
163
+ console.log(`[openclaw-mem-api] Hybrid: ${ftsResults.length} FTS + ${vectorResults.length} vector → ${results.length} merged`);
164
+ } else {
165
+ results = ftsResults.slice(0, limit);
166
+ }
104
167
  }
105
168
 
106
169
  // 按日期分组
@@ -229,7 +292,7 @@ const server = http.createServer((req, res) => {
229
292
 
230
293
  let body = '';
231
294
  req.on('data', chunk => body += chunk);
232
- req.on('end', () => {
295
+ req.on('end', async () => {
233
296
  // 处理未编码的中文 URL - 手动编码非 ASCII 字符
234
297
  let safeUrl = req.url;
235
298
  try {
@@ -278,7 +341,7 @@ const server = http.createServer((req, res) => {
278
341
  break;
279
342
 
280
343
  case '/search':
281
- result = search(args);
344
+ result = await search(args);
282
345
  break;
283
346
 
284
347
  case '/timeline':
@@ -342,6 +405,11 @@ curl -X POST "http://localhost:${PORT}/get_observations" -d '{"ids":[123,124]}'
342
405
  server.listen(PORT, '127.0.0.1', () => {
343
406
  console.log(`[openclaw-mem] HTTP API running on http://127.0.0.1:${PORT}`);
344
407
  console.log(`[openclaw-mem] Try: curl "http://127.0.0.1:${PORT}/help"`);
408
+
409
+ // Preload embedding model in background
410
+ callGatewayEmbeddings('warmup').then(() => {
411
+ console.log('[openclaw-mem] Embedding model preloaded for HTTP API');
412
+ }).catch(() => {});
345
413
  });
346
414
 
347
415
  // 优雅关闭
package/mcp-server.js CHANGED
@@ -13,6 +13,7 @@ import {
13
13
  ListToolsRequestSchema,
14
14
  } from '@modelcontextprotocol/sdk/types.js';
15
15
  import database from './database.js';
16
+ import { callGatewayEmbeddings } from './gateway-llm.js';
16
17
 
17
18
  // ============ 工具函数 ============
18
19
 
@@ -97,7 +98,57 @@ function normalizeIds(input) {
97
98
 
98
99
  // ============ 搜索功能 ============
99
100
 
100
- function search(args = {}) {
101
+ /**
102
+ * Hybrid search: merge FTS5 keyword results with vector KNN results.
103
+ * FTS results get fts_score (normalized 0-1), vector results get vec_score (1 - distance).
104
+ * Results found in both get a 0.2 intersection bonus.
105
+ */
106
+ function mergeHybridResults(ftsResults, vectorResults, limit) {
107
+ // Normalize FTS scores (rank is negative, lower is better)
108
+ let ftsMin = Infinity, ftsMax = -Infinity;
109
+ for (const r of ftsResults) {
110
+ const rank = Math.abs(r.rank ?? 0);
111
+ if (rank < ftsMin) ftsMin = rank;
112
+ if (rank > ftsMax) ftsMax = rank;
113
+ }
114
+ const ftsRange = ftsMax - ftsMin || 1;
115
+
116
+ const scoreMap = new Map(); // id -> { obs, ftsScore, vecScore }
117
+
118
+ for (const r of ftsResults) {
119
+ const rank = Math.abs(r.rank ?? 0);
120
+ const ftsScore = 1 - ((rank - ftsMin) / ftsRange); // normalize to 0-1, higher is better
121
+ scoreMap.set(r.id, { obs: r, ftsScore, vecScore: 0 });
122
+ }
123
+
124
+ for (const v of vectorResults) {
125
+ const vecScore = 1 - (v.distance ?? 0); // cosine distance -> similarity
126
+ const existing = scoreMap.get(v.observation_id);
127
+ if (existing) {
128
+ existing.vecScore = vecScore;
129
+ } else {
130
+ // Need to fetch the full observation for vector-only results
131
+ const obs = database.getObservation(v.observation_id);
132
+ if (obs) {
133
+ scoreMap.set(v.observation_id, { obs, ftsScore: 0, vecScore });
134
+ }
135
+ }
136
+ }
137
+
138
+ // Calculate combined scores
139
+ const scored = [];
140
+ for (const [id, entry] of scoreMap) {
141
+ const { obs, ftsScore, vecScore } = entry;
142
+ const inBoth = ftsScore > 0 && vecScore > 0;
143
+ const combined = (0.4 * ftsScore) + (0.6 * vecScore) + (inBoth ? 0.2 : 0);
144
+ scored.push({ obs, combined, ftsScore, vecScore });
145
+ }
146
+
147
+ scored.sort((a, b) => b.combined - a.combined);
148
+ return scored.slice(0, limit);
149
+ }
150
+
151
+ async function search(args = {}) {
101
152
  const query = typeof args === 'string' ? args : (args.query || args.q || '*');
102
153
  const limit = args.limit ?? args.maxResults ?? 30;
103
154
  const project = args.project || null;
@@ -108,11 +159,33 @@ function search(args = {}) {
108
159
  let results;
109
160
 
110
161
  if (query === '*' || !query) {
111
- // 获取最近的 observations
162
+ // 获取最近的 observations — no embedding needed for recent listing
112
163
  results = database.getRecentObservations(project, limit * 2);
113
164
  } else {
114
- // 搜索
115
- results = database.searchObservations(query, limit * 2);
165
+ // Hybrid search: FTS5 + vector KNN
166
+ const ftsResults = database.searchObservations(query, limit * 2);
167
+
168
+ // Try vector search in parallel
169
+ let vectorResults = [];
170
+ try {
171
+ const embedding = await callGatewayEmbeddings(query);
172
+ if (embedding) {
173
+ vectorResults = database.searchByVector(embedding, limit * 2);
174
+ }
175
+ } catch (err) {
176
+ console.error('[openclaw-mem-mcp] Vector search error:', err.message);
177
+ }
178
+
179
+ if (vectorResults.length > 0) {
180
+ // Merge hybrid results
181
+ const merged = mergeHybridResults(ftsResults, vectorResults, limit * 2);
182
+ results = merged.map(m => m.obs);
183
+ console.error(`[openclaw-mem-mcp] Hybrid search: ${ftsResults.length} FTS + ${vectorResults.length} vector → ${results.length} merged`);
184
+ } else {
185
+ // Fallback to FTS-only
186
+ results = ftsResults;
187
+ console.error(`[openclaw-mem-mcp] FTS-only search: ${results.length} results`);
188
+ }
116
189
  }
117
190
 
118
191
  // 过滤
@@ -475,7 +548,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
475
548
  break;
476
549
 
477
550
  case 'search':
478
- result = search(args || {});
551
+ result = await search(args || {});
479
552
  break;
480
553
 
481
554
  case 'timeline':
@@ -517,6 +590,13 @@ async function main() {
517
590
  const transport = new StdioServerTransport();
518
591
  await server.connect(transport);
519
592
  console.error('[openclaw-mem-mcp] MCP Server started (stdio)');
593
+
594
+ // Preload embedding model in background so first search doesn't timeout
595
+ callGatewayEmbeddings('warmup').then(() => {
596
+ console.error('[openclaw-mem-mcp] Embedding model preloaded');
597
+ }).catch(() => {
598
+ console.error('[openclaw-mem-mcp] Embedding model preload failed (will retry on first search)');
599
+ });
520
600
  }
521
601
 
522
602
  main().catch((error) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-mem",
3
- "version": "1.2.1",
3
+ "version": "1.3.0",
4
4
  "description": "Persistent memory system for OpenClaw - captures conversations, generates summaries, and injects context into new sessions",
5
5
  "type": "module",
6
6
  "main": "handler.js",
@@ -20,7 +20,8 @@
20
20
  "api:start": "nohup node mcp-http-api.js > ~/.openclaw-mem/logs/api.log 2>&1 &",
21
21
  "debug": "node debug-logger.js",
22
22
  "setup": "node setup.js",
23
- "postinstall": "node setup.js"
23
+ "postinstall": "node setup.js",
24
+ "backfill-embeddings": "node backfill-embeddings.js"
24
25
  },
25
26
  "keywords": [
26
27
  "openclaw",
@@ -63,13 +64,16 @@
63
64
  "session-watcher.js",
64
65
  "sync-recent.js",
65
66
  "setup.js",
67
+ "backfill-embeddings.js",
66
68
  "HOOK.md",
67
69
  "MCP.json",
68
70
  "README.md"
69
71
  ],
70
72
  "dependencies": {
73
+ "@huggingface/transformers": "^3.8.1",
71
74
  "@modelcontextprotocol/sdk": "^1.25.3",
72
- "better-sqlite3": "^11.0.0"
75
+ "better-sqlite3": "^12.6.2",
76
+ "sqlite-vec": "^0.1.7-alpha.2"
73
77
  },
74
78
  "devDependencies": {
75
79
  "vitest": "^2.0.0"