openclaw-mem 1.0.4 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/HOOK.md +125 -0
  2. package/LICENSE +1 -1
  3. package/MCP.json +11 -0
  4. package/README.md +158 -167
  5. package/backfill-embeddings.js +79 -0
  6. package/context-builder.js +703 -0
  7. package/database.js +625 -0
  8. package/debug-logger.js +280 -0
  9. package/extractor.js +268 -0
  10. package/gateway-llm.js +250 -0
  11. package/handler.js +941 -0
  12. package/mcp-http-api.js +424 -0
  13. package/mcp-server.js +605 -0
  14. package/mem-get.sh +24 -0
  15. package/mem-search.sh +17 -0
  16. package/monitor.js +112 -0
  17. package/package.json +58 -30
  18. package/realtime-monitor.js +371 -0
  19. package/session-watcher.js +192 -0
  20. package/setup.js +114 -0
  21. package/sync-recent.js +63 -0
  22. package/README_CN.md +0 -201
  23. package/bin/openclaw-mem.js +0 -117
  24. package/docs/locales/README_AR.md +0 -35
  25. package/docs/locales/README_DE.md +0 -35
  26. package/docs/locales/README_ES.md +0 -35
  27. package/docs/locales/README_FR.md +0 -35
  28. package/docs/locales/README_HE.md +0 -35
  29. package/docs/locales/README_HI.md +0 -35
  30. package/docs/locales/README_ID.md +0 -35
  31. package/docs/locales/README_IT.md +0 -35
  32. package/docs/locales/README_JA.md +0 -57
  33. package/docs/locales/README_KO.md +0 -35
  34. package/docs/locales/README_NL.md +0 -35
  35. package/docs/locales/README_PL.md +0 -35
  36. package/docs/locales/README_PT.md +0 -35
  37. package/docs/locales/README_RU.md +0 -35
  38. package/docs/locales/README_TH.md +0 -35
  39. package/docs/locales/README_TR.md +0 -35
  40. package/docs/locales/README_UK.md +0 -35
  41. package/docs/locales/README_VI.md +0 -35
  42. package/docs/logo.svg +0 -32
  43. package/lib/context-builder.js +0 -415
  44. package/lib/database.js +0 -309
  45. package/lib/handler.js +0 -494
  46. package/scripts/commands.js +0 -141
  47. package/scripts/init.js +0 -248
package/HOOK.md ADDED
@@ -0,0 +1,125 @@
1
+ ---
2
+ name: openclaw-mem
3
+ description: "Persistent memory system - saves session context and injects history into new sessions"
4
+ homepage: https://github.com/openclaw-mem
5
+ metadata:
6
+ {
7
+ "openclaw":
8
+ {
9
+ "emoji": "🧠",
10
+ "events": ["command:new", "gateway:startup", "agent:bootstrap", "agent:response", "agent:stop", "tool:post", "user:prompt", "message"],
11
+ "requires": { "config": ["workspace.dir"] },
12
+ "install": [{ "id": "local", "kind": "local", "label": "Local installation" }],
13
+ },
14
+ }
15
+ ---
16
+
17
+ # OpenClaw-Mem: Persistent Memory System
18
+
19
+ A claude-mem inspired memory system for OpenClaw that automatically captures tool usage, generates summaries, and injects relevant context into new sessions.
20
+
21
+ ## Features
22
+
23
+ - 🧠 **Persistent Memory** - Context survives across sessions
24
+ - 📊 **Progressive Disclosure** - Shows index first, fetch details on demand
25
+ - 🔍 **Hybrid Search** - Full-text + semantic search
26
+ - 🤖 **AI Compression** - Automatic summarization of observations
27
+ - ⚡ **Token Efficient** - Only loads what's relevant
28
+ - 📡 **Real-time Capture** - Records messages as they happen
29
+
30
+ ## Events Captured
31
+
32
+ - `gateway:startup` - Initialize memory system
33
+ - `agent:bootstrap` - Inject historical context
34
+ - `agent:response` - Capture assistant responses in real-time
35
+ - `agent:stop` - Save session summary
36
+ - `command:new` - Save session content before reset
37
+ - `tool:post` - Capture tool usage
38
+ - `user:prompt` - Capture user messages
39
+ - `message` - Capture all messages
40
+
41
+ ## Configuration
42
+
43
+ ```json
44
+ {
45
+ "hooks": {
46
+ "internal": {
47
+ "entries": {
48
+ "openclaw-mem": {
49
+ "enabled": true,
50
+ "observationLimit": 50,
51
+ "fullDetailCount": 5,
52
+ "compressWithLLM": true
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
58
+ ```
59
+
60
+ ## Storage
61
+
62
+ Data is stored in SQLite at `~/.openclaw-mem/memory.db`:
63
+ - `sessions` - Session records
64
+ - `observations` - Tool call observations
65
+ - `summaries` - Session summaries
66
+
67
+ ## Real-time Monitoring
68
+
69
+ ```bash
70
+ node ~/.openclaw/hooks/openclaw-mem/monitor.js
71
+ ```
72
+
73
+ ## Usage
74
+
75
+ The memory system works automatically. To search manually:
76
+
77
+ ```bash
78
+ # Search memory
79
+ openclaw memory search "authentication"
80
+
81
+ # View status
82
+ openclaw memory status
83
+ ```
84
+
85
+ ## MCP Server (Model Context Protocol)
86
+
87
+ OpenClaw-Mem 提供 MCP Server,让 AI 可以按需查询记忆:
88
+
89
+ ### MCP 工具
90
+
91
+ | Tool | Description |
92
+ |------|-------------|
93
+ | `__IMPORTANT` | 显示 3 层工作流说明 |
94
+ | `search` | 搜索记忆索引 |
95
+ | `timeline` | 获取某条记录的上下文 |
96
+ | `get_observations` | 获取完整详情 |
97
+
98
+ ### 启动 MCP Server
99
+
100
+ ```bash
101
+ # stdio 模式(标准 MCP)
102
+ node ~/.openclaw/hooks/openclaw-mem/mcp-server.js
103
+
104
+ # HTTP API 模式(兼容模式)
105
+ node ~/.openclaw/hooks/openclaw-mem/mcp-http-api.js
106
+ ```
107
+
108
+ ### HTTP API 端点
109
+
110
+ ```bash
111
+ # 搜索
112
+ curl "http://127.0.0.1:18790/search?query=database&limit=10"
113
+
114
+ # Timeline
115
+ curl "http://127.0.0.1:18790/timeline?anchor=123"
116
+
117
+ # 获取详情
118
+ curl -X POST "http://127.0.0.1:18790/get_observations" -d '{"ids":[123,124]}'
119
+ ```
120
+
121
+ ## Disabling
122
+
123
+ ```bash
124
+ openclaw hooks disable openclaw-mem
125
+ ```
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024 OpenClaw Contributors
3
+ Copyright (c) 2026 Aaron
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/MCP.json ADDED
@@ -0,0 +1,11 @@
1
+ {
2
+ "mcpServers": {
3
+ "openclaw-mem-search": {
4
+ "type": "stdio",
5
+ "command": "node",
6
+ "args": ["mcp-server.js"],
7
+ "cwd": "${HOOK_ROOT}",
8
+ "description": "Memory search tools for OpenClaw-Mem"
9
+ }
10
+ }
11
+ }
package/README.md CHANGED
@@ -1,217 +1,208 @@
1
- <p align="center">
2
- <img src="docs/logo.svg" alt="OpenClaw-Mem Logo" width="300"/>
3
- </p>
4
-
5
- <h1 align="center">OpenClaw-Mem 🧠</h1>
6
-
7
- <p align="center">
8
- <strong>Give your OpenClaw AI agent persistent long-term memory</strong>
9
- </p>
10
-
11
- <p align="center">
12
- <a href="https://www.npmjs.com/package/openclaw-mem"><img src="https://img.shields.io/npm/v/openclaw-mem.svg" alt="npm version"/></a>
13
- <a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="License: MIT"/></a>
14
- <a href="https://www.npmjs.com/package/openclaw-mem"><img src="https://img.shields.io/npm/dm/openclaw-mem.svg" alt="npm downloads"/></a>
15
- </p>
16
-
17
- <p align="center">
18
- <a href="README_CN.md">🇨🇳 中文</a> •
19
- <a href="docs/locales/README_JA.md">🇯🇵 日本語</a> •
20
- <a href="docs/locales/README_KO.md">🇰🇷 한국어</a> •
21
- <a href="docs/locales/README_ES.md">🇪🇸 Español</a> •
22
- <a href="docs/locales/README_DE.md">🇩🇪 Deutsch</a> •
23
- <a href="docs/locales/README_FR.md">🇫🇷 Français</a> •
24
- <a href="docs/locales/README_PT.md">🇧🇷 Português</a>
25
- <br/>
26
- <a href="docs/locales/README_RU.md">🇷🇺 Русский</a> •
27
- <a href="docs/locales/README_AR.md">🇸🇦 العربية</a> •
28
- <a href="docs/locales/README_HE.md">🇮🇱 עברית</a> •
29
- <a href="docs/locales/README_TR.md">🇹🇷 Türkçe</a> •
30
- <a href="docs/locales/README_IT.md">🇮🇹 Italiano</a> •
31
- <a href="docs/locales/README_NL.md">🇳🇱 Nederlands</a> •
32
- <a href="docs/locales/README_PL.md">🇵🇱 Polski</a>
33
- <br/>
34
- <a href="docs/locales/README_UK.md">🇺🇦 Українська</a> •
35
- <a href="docs/locales/README_VI.md">🇻🇳 Tiếng Việt</a> •
36
- <a href="docs/locales/README_TH.md">🇹🇭 ไทย</a> •
37
- <a href="docs/locales/README_HI.md">🇮🇳 हिन्दी</a> •
38
- <a href="docs/locales/README_ID.md">🇮🇩 Indonesia</a>
39
- </p>
40
-
41
- <hr/>
42
-
43
- OpenClaw-Mem automatically captures your conversations and makes them searchable, allowing your AI assistant to remember what you've discussed across sessions.
44
-
45
- ## ✨ Features
46
-
47
- - **🔄 Automatic Memory Capture** - Conversations are saved automatically
48
- - **🔍 Full-Text Search** - Search through your entire conversation history
49
- - **📊 Progressive Disclosure** - Efficient token usage with layered context
50
- - **🎯 Topic Detection** - Automatically indexes discussions by topic
51
- - **💾 Local Storage** - All data stays on your machine (SQLite)
52
- - **⚡ Zero Config** - Works out of the box
53
-
54
- ## 🚀 Quick Start
1
+ # OpenClaw-Mem
55
2
 
56
- ```bash
57
- # Install and setup (one command!)
58
- npx openclaw-mem init
3
+ A persistent memory system for [OpenClaw](https://github.com/openclaw) that automatically captures conversations, generates summaries, and injects relevant context into new sessions.
59
4
 
60
- # Restart OpenClaw gateway
61
- openclaw gateway restart
62
- ```
5
+ ## Features
63
6
 
64
- That's it! Start chatting and your conversations will be remembered.
7
+ - **Persistent Memory** - Context survives across sessions
8
+ - **Progressive Disclosure** - Shows index first, fetch details on demand
9
+ - **Hybrid Search** - Full-text + LIKE search with CJK support
10
+ - **AI Compression** - Automatic summarization of observations
11
+ - **Token Efficient** - Only loads what's relevant
12
+ - **Real-time Capture** - Records messages as they happen
13
+ - **MCP Compatible** - Model Context Protocol server included
14
+ - **HTTP API** - REST API for memory queries
65
15
 
66
- ## 📖 Usage
16
+ ## Installation
67
17
 
68
- ### Recalling Memories in Chat
18
+ ### As OpenClaw Hook
69
19
 
70
- Ask your AI assistant:
71
- - "What did we discuss before?"
72
- - "What were we working on last time?"
73
- - "Remind me about the authentication issue"
20
+ ```bash
21
+ # Clone to OpenClaw hooks directory
22
+ git clone https://github.com/wenyupapa-sys/openclaw-mem.git ~/.openclaw/hooks/openclaw-mem
23
+ cd ~/.openclaw/hooks/openclaw-mem
24
+ npm install
25
+ ```
74
26
 
75
- ### CLI Commands
27
+ ### As npm Package
76
28
 
77
29
  ```bash
78
- # Check memory status
79
- npx openclaw-mem status
30
+ npm install openclaw-mem
31
+ ```
80
32
 
81
- # Search your memory
82
- npx openclaw-mem search "authentication"
83
- npx openclaw-mem search "AI memory"
33
+ > ⚠️ **Important:** npm installation does NOT automatically prompt for API key configuration. You MUST manually configure your DeepSeek API key after installation. See [Configuration](#configuration) section below.
84
34
 
85
- # Remove (keeps database)
86
- npx openclaw-mem uninstall
87
- ```
35
+ **After npm install, choose one of these methods:**
88
36
 
89
- ## 🏗️ How It Works
37
+ ```bash
38
+ # Method 1: Run the setup wizard
39
+ npx openclaw-mem-setup
90
40
 
41
+ # Method 2: Set environment variable directly
42
+ export DEEPSEEK_API_KEY="your-deepseek-api-key"
43
+ # Add this line to your ~/.bashrc or ~/.zshrc to persist
91
44
  ```
92
- +-------------+ +------------------+ +------------+
93
- | Telegram | --> | OpenClaw Gateway | --> | AI Agent |
94
- +-------------+ +------------------+ +------------+
95
- | |
96
- v v
97
- +--------------+ +------------+
98
- | openclaw-mem | | Read Tool |
99
- | hook | +------------+
100
- +--------------+ |
101
- | v
102
- +-------------------+----------------------+
103
- | |
104
- v v
105
- +---------------+ +------------------+
106
- | SQLite DB | | SESSION-MEMORY.md|
107
- | (persistent) | | (injected) |
108
- +---------------+ +------------------+
109
- ```
110
45
 
111
- ### Event Flow
46
+ ## Quick Start
47
+
48
+ 1. **Install the hook** (see above)
112
49
 
113
- 1. **`gateway:startup`** - Initializes the memory database
114
- 2. **`agent:bootstrap`** - Injects historical context into new sessions
115
- 3. **`command:new`** - Saves session summary when you start fresh
50
+ 2. **Run setup** - configure your DeepSeek API key (prompted automatically after install)
51
+ ```bash
52
+ # Or run manually later
53
+ npm run setup
54
+ # or
55
+ npx openclaw-mem-setup
56
+ ```
116
57
 
117
- ### Progressive Disclosure
58
+ 3. **Restart OpenClaw** to load the hook
118
59
 
119
- To optimize token usage, memories are organized in layers:
60
+ 4. **Start chatting** - conversations are automatically saved
120
61
 
121
- | Layer | Content | Tokens |
122
- |-------|---------|--------|
123
- | Index | Compact table of all observations | ~Low |
124
- | Topics | Key discussion summaries | ~Medium |
125
- | Details | Full content (on demand) | ~High |
62
+ 5. **Query memories** - ask "what did we discuss before?" and the AI will search the memory database
126
63
 
127
- ## 📁 File Locations
64
+ ## Events Captured
128
65
 
129
- | File | Location | Purpose |
130
- |------|----------|---------|
131
- | Database | `~/.openclaw-mem/memory.db` | Persistent storage |
132
- | Hook | `~/.openclaw/hooks/openclaw-mem/` | Event handlers |
133
- | Memory Context | `~/.openclaw/workspace/SESSION-MEMORY.md` | Injected context |
66
+ | Event | Description |
67
+ |-------|-------------|
68
+ | `gateway:startup` | Initialize memory system |
69
+ | `agent:bootstrap` | Inject historical context |
70
+ | `agent:response` | Capture assistant responses |
71
+ | `agent:stop` | Save session summary |
72
+ | `command:new` | Save session before reset |
73
+ | `tool:post` | Capture tool usage |
74
+ | `user:prompt` | Capture user messages |
134
75
 
135
- ## ⚙️ Configuration
76
+ ## API Reference
136
77
 
137
- ### Customize MEMORY.md
78
+ ### HTTP API (Port 18790)
138
79
 
139
- Add your preferences to `~/.openclaw/workspace/MEMORY.md`:
80
+ ```bash
81
+ # Search memories
82
+ curl -s -X POST "http://127.0.0.1:18790/search" \
83
+ -H "Content-Type: application/json" \
84
+ -d '{"query":"keyword","limit":10}'
85
+
86
+ # Get observation details
87
+ curl -s -X POST "http://127.0.0.1:18790/get_observations" \
88
+ -H "Content-Type: application/json" \
89
+ -d '{"ids":[123,124]}'
90
+
91
+ # Get timeline context
92
+ curl -s -X POST "http://127.0.0.1:18790/timeline" \
93
+ -H "Content-Type: application/json" \
94
+ -d '{"anchor":123}'
95
+
96
+ # Health check
97
+ curl "http://127.0.0.1:18790/health"
98
+ ```
140
99
 
141
- ```markdown
142
- ## User Preferences
143
- - Communication style: Technical and concise
144
- - Language: English
100
+ ### Shell Scripts
145
101
 
146
- ## Long-Term Context
147
- - Working on: AI memory systems
148
- - Interests: Machine learning, distributed systems
102
+ ```bash
103
+ # Search (handles CJK encoding automatically)
104
+ ~/.openclaw/hooks/openclaw-mem/mem-search.sh "关键词" 10
149
105
 
150
- ## Ongoing Focus
151
- - Current project: Building a chat application
106
+ # Get details
107
+ ~/.openclaw/hooks/openclaw-mem/mem-get.sh 123 124 125
152
108
  ```
153
109
 
154
- ### Dynamic Topic Detection
110
+ ### MCP Server
155
111
 
156
- The system automatically extracts topics from your actual conversations:
157
- - Concepts mentioned frequently are detected automatically
158
- - No hardcoded keywords - everything comes from your data
159
- - Topics evolve as your discussions change
112
+ ```bash
113
+ # Start MCP server (stdio mode)
114
+ node mcp-server.js
115
+ ```
160
116
 
161
- ## 🔧 Troubleshooting
117
+ MCP Tools:
118
+ - `search` - Search memory index
119
+ - `timeline` - Get context around an observation
120
+ - `get_observations` - Fetch full details
162
121
 
163
- ### Memory not being recalled?
122
+ ## Configuration
164
123
 
165
- 1. **Restart the gateway:**
166
- ```bash
167
- openclaw gateway restart
168
- ```
124
+ ### Environment Variables
169
125
 
170
- 2. **Check hook is installed:**
171
- ```bash
172
- ls ~/.openclaw/hooks/openclaw-mem/
173
- ```
126
+ ```bash
127
+ # Required for AI summarization (optional but recommended)
128
+ export DEEPSEEK_API_KEY="your-deepseek-api-key"
174
129
 
175
- 3. **Verify database has content:**
176
- ```bash
177
- npx openclaw-mem status
178
- ```
130
+ # Optional: Custom DeepSeek endpoint
131
+ export DEEPSEEK_BASE_URL="https://api.deepseek.com/v1"
132
+
133
+ # Optional: Custom model
134
+ export DEEPSEEK_MODEL="deepseek-chat"
135
+ ```
136
+
137
+ Get your DeepSeek API key at: https://platform.deepseek.com/
138
+
139
+ > **Note:** Without `DEEPSEEK_API_KEY`, the system will still work but won't generate AI summaries for sessions.
179
140
 
180
- ### Search not finding results?
141
+ ### OpenClaw Config
181
142
 
182
- - Try simpler search terms
183
- - Check for typos
184
- - Use keywords from the actual conversation
143
+ Add to your OpenClaw config:
185
144
 
186
- ### Database issues?
145
+ ```json
146
+ {
147
+ "hooks": {
148
+ "internal": {
149
+ "entries": {
150
+ "openclaw-mem": {
151
+ "enabled": true,
152
+ "observationLimit": 50,
153
+ "fullDetailCount": 5
154
+ }
155
+ }
156
+ }
157
+ }
158
+ }
159
+ ```
160
+
161
+ ## Storage
162
+
163
+ Data is stored in SQLite at `~/.openclaw-mem/memory.db`:
164
+
165
+ | Table | Description |
166
+ |-------|-------------|
167
+ | `sessions` | Session records |
168
+ | `observations` | Tool calls and messages |
169
+ | `summaries` | Session summaries |
170
+ | `user_prompts` | User inputs |
171
+
172
+ ## Development
187
173
 
188
174
  ```bash
189
- # Reset database (⚠️ deletes all memories)
190
- rm ~/.openclaw-mem/memory.db
191
- openclaw gateway restart
175
+ # Run tests
176
+ npm test
177
+
178
+ # Start HTTP API server
179
+ npm run api
180
+
181
+ # Start MCP server
182
+ npm run mcp
183
+
184
+ # Monitor real-time activity
185
+ node debug-logger.js
192
186
  ```
193
187
 
194
- ## 🤝 Contributing
188
+ ## 3-Layer Retrieval Workflow
189
+
190
+ For efficient token usage, use progressive disclosure:
195
191
 
196
- Contributions are welcome! Please feel free to submit a Pull Request.
192
+ 1. **Search** Get index with IDs (~50-100 tokens/result)
193
+ 2. **Timeline** → Get context around interesting results
194
+ 3. **Get Observations** → Fetch full details ONLY for filtered IDs
197
195
 
198
- 1. Fork the repository
199
- 2. Create your feature branch (`git checkout -b feature/amazing-feature`)
200
- 3. Commit your changes (`git commit -m 'Add amazing feature'`)
201
- 4. Push to the branch (`git push origin feature/amazing-feature`)
202
- 5. Open a Pull Request
196
+ This approach saves ~30% tokens compared to fetching everything.
203
197
 
204
- ## 📄 License
198
+ ## License
205
199
 
206
- MIT License - see [LICENSE](LICENSE) for details.
200
+ MIT
207
201
 
208
- ## 🙏 Acknowledgments
202
+ ## Contributing
209
203
 
210
- - Inspired by [claude-mem](https://github.com/anthropics/claude-mem)
211
- - Built for [OpenClaw](https://openclaw.ai)
204
+ Pull requests welcome! Please ensure tests pass before submitting.
212
205
 
213
- ---
206
+ ## Credits
214
207
 
215
- <p align="center">
216
- Made with ❤️ for the AI community
217
- </p>
208
+ Inspired by [claude-mem](https://github.com/anthropics/claude-code) plugin architecture.
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Backfill embeddings for existing observations.
4
+ * Run manually: node backfill-embeddings.js
5
+ *
6
+ * Finds all observations without embeddings and generates them
7
+ * in batches of 16 using the DeepSeek embeddings API.
8
+ */
9
+
10
+ import database from './database.js';
11
+ import { batchEmbeddings } from './gateway-llm.js';
12
+
13
+ const BATCH_SIZE = 16;
14
+
15
+ async function backfill() {
16
+ const totalObs = database.getStats().total_observations;
17
+ const existingEmbeddings = database.getEmbeddingCount();
18
+ console.log(`Total observations: ${totalObs}`);
19
+ console.log(`Existing embeddings: ${existingEmbeddings}`);
20
+ console.log(`Missing: ~${totalObs - existingEmbeddings}`);
21
+ console.log('');
22
+
23
+ let processed = 0;
24
+ let saved = 0;
25
+ let failed = 0;
26
+
27
+ while (true) {
28
+ const batch = database.getObservationsWithoutEmbeddings(BATCH_SIZE);
29
+ if (batch.length === 0) break;
30
+
31
+ // Build text for each observation
32
+ const texts = batch.map(obs => {
33
+ const parts = [obs.summary, obs.narrative].filter(Boolean);
34
+ return parts.join(' ').trim() || `Observation #${obs.id}`;
35
+ });
36
+
37
+ console.log(`Batch ${Math.floor(processed / BATCH_SIZE) + 1}: generating embeddings for ${batch.length} observations (IDs ${batch[0].id}-${batch[batch.length - 1].id})...`);
38
+
39
+ const embeddings = await batchEmbeddings(texts);
40
+
41
+ for (let i = 0; i < batch.length; i++) {
42
+ const obs = batch[i];
43
+ const embedding = embeddings[i];
44
+
45
+ if (embedding) {
46
+ const result = database.saveEmbedding(obs.id, embedding);
47
+ if (result.success) {
48
+ saved++;
49
+ } else {
50
+ failed++;
51
+ console.error(` Failed to save embedding for #${obs.id}: ${result.error}`);
52
+ }
53
+ } else {
54
+ failed++;
55
+ console.error(` No embedding returned for #${obs.id}`);
56
+ }
57
+ }
58
+
59
+ processed += batch.length;
60
+ console.log(` Progress: ${saved} saved, ${failed} failed, ${processed} processed`);
61
+
62
+ // Small delay between batches to avoid rate limiting
63
+ if (batch.length === BATCH_SIZE) {
64
+ await new Promise(r => setTimeout(r, 500));
65
+ }
66
+ }
67
+
68
+ console.log('');
69
+ console.log('=== Backfill Complete ===');
70
+ console.log(`Processed: ${processed}`);
71
+ console.log(`Saved: ${saved}`);
72
+ console.log(`Failed: ${failed}`);
73
+ console.log(`Total embeddings now: ${database.getEmbeddingCount()}`);
74
+ }
75
+
76
+ backfill().catch(err => {
77
+ console.error('Backfill failed:', err);
78
+ process.exit(1);
79
+ });