@icex-labs/openclaw-memory-engine 5.0.0 → 5.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -210,6 +210,38 @@ Falls back to keyword-only without OpenAI key. Cost: ~$0.001/session.
210
210
 
211
211
  ---
212
212
 
213
+ ## Classification (v5.0)
214
+
215
+ Entity and importance classification is **embedding-based** — no hardcoded keywords, works with any language.
216
+
217
+ ```
218
+ With OPENAI_API_KEY (recommended):
219
+ 15 entity anchors (health, finance, immigration, legal, vehicles, ...)
220
+ 4 importance anchors (critical / high / medium / low)
221
+ Anchor embeddings computed once, cached to classifier-anchors.json
222
+ → Language-agnostic: Japanese, French, Korean, Chinese, English all work
223
+
224
+ Without OPENAI_API_KEY (fallback):
225
+ Format-based heuristics:
226
+ $amounts → finance (importance 7)
227
+ URLs/code → technology
228
+ Dates → importance 6
229
+ Short messages → low importance
230
+ Long messages → high importance
231
+ → Basic but functional, no API cost
232
+ ```
233
+
234
+ Real results on 2,751 records:
235
+
236
+ | Metric | Before (regex v4) | After (embedding v5) |
237
+ |--------|-------------------|---------------------|
238
+ | "general" entities | 45% | **29%** |
239
+ | flat importance=5 | 71% | **39%** |
240
+ | Languages supported | English + Chinese | **any** |
241
+ | Hardcoded keywords | 100+ | **zero** |
242
+
243
+ ---
244
+
213
245
  ## Self-Healing
214
246
 
215
247
  | Issue | Auto-fix |
@@ -217,8 +249,9 @@ Falls back to keyword-only without OpenAI key. Cost: ~$0.001/session.
217
249
  | Missing embeddings | Batch backfill on restart (all workspaces) |
218
250
  | Agent forgets to save | Hooks capture everything passively |
219
251
  | Duplicate facts | 60s dedup + keyword overlap + weekly cron |
220
- | Flat importance scores | `memory_quality` pass after migration |
221
- | General entity labels | `memory_quality` re-classifies with 50+ patterns |
252
+ | Flat importance scores | Embedding-based re-rating via `memory_quality` |
253
+ | General entity labels | Embedding-based re-classification via `memory_quality` |
254
+ | No API key | Format-based fallback classifier (basic but functional) |
222
255
 
223
256
  ---
224
257
 
@@ -284,8 +317,9 @@ memory-engine/
284
317
  │ ├── backup.js # Export / import
285
318
  │ ├── store-sqlite.js # SQLite backend (FTS5)
286
319
  │ ├── dashboard.js # HTML dashboard generator
287
- │ ├── quality.js # Data quality: entity + importance + graph
288
- └── auto-capture.js # Passive hooks: message archival
320
+ │ ├── classifier.js # Embedding-based entity + importance classification
321
+ ├── quality.js # Data quality pass (uses classifier)
322
+ │ └── auto-capture.js # Passive hooks: message → archival (uses classifier)
289
323
  ├── extras/
290
324
  │ ├── memory-maintenance.sh
291
325
  │ ├── migrate-legacy.mjs
package/lib/classifier.js CHANGED
@@ -158,16 +158,57 @@ export async function classifyImportance(contentEmbedding, ws) {
158
158
  return IMPORTANCE_SCORES[bestLevel] || 5;
159
159
  }
160
160
 
161
+ /**
162
+ * Lightweight fallback classifier — no embedding API needed.
163
+ * Uses format/symbol signals that work across languages:
164
+ * - $ amounts → finance
165
+ * - URLs → technology
166
+ * - dates → general (but higher importance)
167
+ * - very short messages → low importance
168
+ */
169
+ function fallbackClassify(content) {
170
+ let entity = "general";
171
+ let importance = 5;
172
+
173
+ // Finance: currency symbols, large numbers
174
+ if (/[\$€£¥₹]\s*[\d,.]+|\b\d{4,}[\d,.]*\b/.test(content)) {
175
+ entity = "finance";
176
+ importance = 7;
177
+ }
178
+ // Technology: URLs, code patterns, file paths
179
+ else if (/https?:\/\/|```|\/\w+\/\w+|\.(js|py|ts|json|yaml|md)\b/i.test(content)) {
180
+ entity = "technology";
181
+ }
182
+ // Dates with context → likely scheduling/planning
183
+ else if (/\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}:\d{2}\b/.test(content)) {
184
+ importance = 6;
185
+ }
186
+
187
+ // Short messages are less important
188
+ if (content.length < 30) importance = Math.min(importance, 3);
189
+ // Long detailed messages are more important
190
+ if (content.length > 200) importance = Math.max(importance, 6);
191
+
192
+ return { entity, importance };
193
+ }
194
+
161
195
  /**
162
196
  * Full classification: entity + importance in one call.
163
- * Reuses the same content embedding for both.
197
+ * Uses embedding similarity when available, falls back to format-based heuristics.
164
198
  * @param {string} content - text to classify
165
199
  * @param {string} ws - workspace path
166
200
  * @param {float[]} [existingEmbedding] - reuse if already computed
167
- * @returns {Promise<{ entity: string, importance: number }>}
201
+ * @returns {Promise<{ entity: string, importance: number, embedding: float[]|null }>}
168
202
  */
169
203
  export async function classify(content, ws, existingEmbedding = null) {
170
204
  const emb = existingEmbedding || await getEmbedding(content);
205
+
206
+ // If no embedding available (no API key), use fallback
207
+ if (!emb) {
208
+ const fb = fallbackClassify(content);
209
+ return { entity: fb.entity, importance: fb.importance, embedding: null };
210
+ }
211
+
171
212
  const [entity, importance] = await Promise.all([
172
213
  classifyEntity(emb, ws),
173
214
  classifyImportance(emb, ws),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@icex-labs/openclaw-memory-engine",
3
- "version": "5.0.0",
3
+ "version": "5.0.2",
4
4
  "description": "MemGPT-style hierarchical memory plugin for OpenClaw — core memory block + archival storage with semantic search",
5
5
  "type": "module",
6
6
  "main": "index.js",