@icex-labs/openclaw-memory-engine 5.0.0 → 5.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -4
- package/lib/classifier.js +43 -2
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -210,6 +210,38 @@ Falls back to keyword-only without OpenAI key. Cost: ~$0.001/session.
|
|
|
210
210
|
|
|
211
211
|
---
|
|
212
212
|
|
|
213
|
+
## Classification (v5.0)
|
|
214
|
+
|
|
215
|
+
Entity and importance classification is **embedding-based** — no hardcoded keywords, works with any language.
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
With OPENAI_API_KEY (recommended):
|
|
219
|
+
15 entity anchors (health, finance, immigration, legal, vehicles, ...)
|
|
220
|
+
4 importance anchors (critical / high / medium / low)
|
|
221
|
+
Anchor embeddings computed once, cached to classifier-anchors.json
|
|
222
|
+
→ Language-agnostic: Japanese, French, Korean, Chinese, English all work
|
|
223
|
+
|
|
224
|
+
Without OPENAI_API_KEY (fallback):
|
|
225
|
+
Format-based heuristics:
|
|
226
|
+
$amounts → finance (importance 7)
|
|
227
|
+
URLs/code → technology
|
|
228
|
+
Dates → importance 6
|
|
229
|
+
Short messages → low importance
|
|
230
|
+
Long messages → high importance
|
|
231
|
+
→ Basic but functional, no API cost
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
Real results on 2,751 records:
|
|
235
|
+
|
|
236
|
+
| Metric | Before (regex v4) | After (embedding v5) |
|
|
237
|
+
|--------|-------------------|---------------------|
|
|
238
|
+
| "general" entities | 45% | **29%** |
|
|
239
|
+
| flat importance=5 | 71% | **39%** |
|
|
240
|
+
| Languages supported | English + Chinese | **any** |
|
|
241
|
+
| Hardcoded keywords | 100+ | **zero** |
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
213
245
|
## Self-Healing
|
|
214
246
|
|
|
215
247
|
| Issue | Auto-fix |
|
|
@@ -217,8 +249,9 @@ Falls back to keyword-only without OpenAI key. Cost: ~$0.001/session.
|
|
|
217
249
|
| Missing embeddings | Batch backfill on restart (all workspaces) |
|
|
218
250
|
| Agent forgets to save | Hooks capture everything passively |
|
|
219
251
|
| Duplicate facts | 60s dedup + keyword overlap + weekly cron |
|
|
220
|
-
| Flat importance scores | `memory_quality`
|
|
221
|
-
| General entity labels |
|
|
252
|
+
| Flat importance scores | Embedding-based re-rating via `memory_quality` |
|
|
253
|
+
| General entity labels | Embedding-based re-classification via `memory_quality` |
|
|
254
|
+
| No API key | Format-based fallback classifier (basic but functional) |
|
|
222
255
|
|
|
223
256
|
---
|
|
224
257
|
|
|
@@ -284,8 +317,9 @@ memory-engine/
|
|
|
284
317
|
│ ├── backup.js # Export / import
|
|
285
318
|
│ ├── store-sqlite.js # SQLite backend (FTS5)
|
|
286
319
|
│ ├── dashboard.js # HTML dashboard generator
|
|
287
|
-
│ ├──
|
|
288
|
-
│
|
|
320
|
+
│ ├── classifier.js # Embedding-based entity + importance classification
|
|
321
|
+
│ ├── quality.js # Data quality pass (uses classifier)
|
|
322
|
+
│ └── auto-capture.js # Passive hooks: message → archival (uses classifier)
|
|
289
323
|
├── extras/
|
|
290
324
|
│ ├── memory-maintenance.sh
|
|
291
325
|
│ ├── migrate-legacy.mjs
|
package/lib/classifier.js
CHANGED
|
@@ -158,16 +158,57 @@ export async function classifyImportance(contentEmbedding, ws) {
|
|
|
158
158
|
return IMPORTANCE_SCORES[bestLevel] || 5;
|
|
159
159
|
}
|
|
160
160
|
|
|
161
|
+
/**
|
|
162
|
+
* Lightweight fallback classifier — no embedding API needed.
|
|
163
|
+
* Uses format/symbol signals that work across languages:
|
|
164
|
+
* - $ amounts → finance
|
|
165
|
+
* - URLs → technology
|
|
166
|
+
* - dates → general (but higher importance)
|
|
167
|
+
* - very short messages → low importance
|
|
168
|
+
*/
|
|
169
|
+
function fallbackClassify(content) {
|
|
170
|
+
let entity = "general";
|
|
171
|
+
let importance = 5;
|
|
172
|
+
|
|
173
|
+
// Finance: currency symbols, large numbers
|
|
174
|
+
if (/[\$€£¥₹]\s*[\d,.]+|\b\d{4,}[\d,.]*\b/.test(content)) {
|
|
175
|
+
entity = "finance";
|
|
176
|
+
importance = 7;
|
|
177
|
+
}
|
|
178
|
+
// Technology: URLs, code patterns, file paths
|
|
179
|
+
else if (/https?:\/\/|```|\/\w+\/\w+|\.(js|py|ts|json|yaml|md)\b/i.test(content)) {
|
|
180
|
+
entity = "technology";
|
|
181
|
+
}
|
|
182
|
+
// Dates with context → likely scheduling/planning
|
|
183
|
+
else if (/\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}:\d{2}\b/.test(content)) {
|
|
184
|
+
importance = 6;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Short messages are less important
|
|
188
|
+
if (content.length < 30) importance = Math.min(importance, 3);
|
|
189
|
+
// Long detailed messages are more important
|
|
190
|
+
if (content.length > 200) importance = Math.max(importance, 6);
|
|
191
|
+
|
|
192
|
+
return { entity, importance };
|
|
193
|
+
}
|
|
194
|
+
|
|
161
195
|
/**
|
|
162
196
|
* Full classification: entity + importance in one call.
|
|
163
|
-
*
|
|
197
|
+
* Uses embedding similarity when available, falls back to format-based heuristics.
|
|
164
198
|
* @param {string} content - text to classify
|
|
165
199
|
* @param {string} ws - workspace path
|
|
166
200
|
* @param {float[]} [existingEmbedding] - reuse if already computed
|
|
167
|
-
* @returns {Promise<{ entity: string, importance: number }>}
|
|
201
|
+
* @returns {Promise<{ entity: string, importance: number, embedding: float[]|null }>}
|
|
168
202
|
*/
|
|
169
203
|
export async function classify(content, ws, existingEmbedding = null) {
|
|
170
204
|
const emb = existingEmbedding || await getEmbedding(content);
|
|
205
|
+
|
|
206
|
+
// If no embedding available (no API key), use fallback
|
|
207
|
+
if (!emb) {
|
|
208
|
+
const fb = fallbackClassify(content);
|
|
209
|
+
return { entity: fb.entity, importance: fb.importance, embedding: null };
|
|
210
|
+
}
|
|
211
|
+
|
|
171
212
|
const [entity, importance] = await Promise.all([
|
|
172
213
|
classifyEntity(emb, ws),
|
|
173
214
|
classifyImportance(emb, ws),
|
package/package.json
CHANGED