npm - memory-braid - Versions diffs - 0.4.5 → 0.4.6 - Mend

memory-braid 0.4.5 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -7,7 +7,7 @@ Memory Braid is an OpenClaw `kind: "memory"` plugin that augments local memory s
 - Hybrid recall: local memory + Mem0, merged with weighted RRF.
 - Capture-first Mem0 memory: plugin writes only captured memories to Mem0 (no markdown/session indexing).
 - Capture pipeline modes: `local`, `hybrid`, `ml`.
-- Optional entity extraction: multilingual NER with canonical `entity://...` URIs in memory metadata.
+- Optional entity extraction: local multilingual NER or OpenAI NER with canonical `entity://...` URIs in memory metadata.
 - Structured debug logs for troubleshooting and tuning.
 ## Breaking changes in 0.4.0
@@ -109,7 +109,7 @@ Note:
   - `fixCommand` (copy/paste command for that machine)
   - `pluginDir` (resolved extension directory when available)
-## Quick start: hybrid capture + multilingual NER
+## Quick start: hybrid capture + entity extraction
 Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
@@ -156,8 +156,9 @@ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
   },
   "entityExtraction": {
     "enabled": true,
-    "provider": "multilingual_ner",
-    "model": "Xenova/bert-base-multilingual-cased-ner-hrl",
+    "provider": "openai",
+    "model": "gpt-4o-mini",
+    "timeoutMs": 2500,
     "minScore": 0.65,
     "maxEntitiesPerMemory": 8,
     "startup": {
@@ -171,6 +172,18 @@ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
 }
 ```
+Local-model alternative (fully backward compatible):
+```json
+{
+  "entityExtraction": {
+    "enabled": true,
+    "provider": "multilingual_ner",
+    "model": "Xenova/bert-base-multilingual-cased-ner-hrl"
+  }
+}
+```
 Then restart:
 ```bash
@@ -186,7 +199,7 @@ openclaw plugins info memory-braid
 openclaw gateway status
 ```
-2. Trigger/inspect NER warmup:
+2. Trigger/inspect entity warmup:
 ```bash
 openclaw agent --agent main --message "/memorybraid warmup" --json
@@ -206,7 +219,7 @@ rg -n "memory_braid\\.startup|memory_braid\\.capture|memory_braid\\.entity|memor
 Expected events:
 - `memory_braid.startup`
-- `memory_braid.entity.model_load`
+- `memory_braid.entity.model_load` (local `multilingual_ner` provider only)
 - `memory_braid.entity.warmup`
 - `memory_braid.capture.extract`
 - `memory_braid.capture.ml` (for `capture.mode=hybrid|ml`)
@@ -436,8 +449,9 @@ Use this preset when:
       },
       "entityExtraction": {
         "enabled": true,
-        "provider": "multilingual_ner",
-        "model": "Xenova/bert-base-multilingual-cased-ner-hrl",
+        "provider": "openai",
+        "model": "gpt-4o-mini",
+        "timeoutMs": 2500,
         "minScore": 0.65,
         "maxEntitiesPerMemory": 8,
         "startup": {
@@ -505,7 +519,8 @@ Entity extraction defaults are:
 - `entityExtraction.enabled`: `false`
 - `entityExtraction.provider`: `"multilingual_ner"`
-- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"`
+- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"` (or `"gpt-4o-mini"` when `provider: "openai"` and model is unset)
+- `entityExtraction.timeoutMs`: `2500`
 - `entityExtraction.minScore`: `0.65`
 - `entityExtraction.maxEntitiesPerMemory`: `8`
 - `entityExtraction.startup.downloadOnStartup`: `true`
@@ -513,9 +528,9 @@ Entity extraction defaults are:
 When enabled:
-- Model cache/download path is `<OPENCLAW_STATE_DIR>/memory-braid/models/entity-extraction` (typically `~/.openclaw/memory-braid/models/entity-extraction`).
+- Local NER model cache/download path is `<OPENCLAW_STATE_DIR>/memory-braid/models/entity-extraction` (typically `~/.openclaw/memory-braid/models/entity-extraction`).
 - Captured memories get `metadata.entities` and `metadata.entityUris` (canonical IDs like `entity://person/john-doe`).
-- Startup can pre-download/warm the model (`downloadOnStartup: true`).
+- Startup warmup runs for both providers (`downloadOnStartup: true`).
 Warmup command:

package/openclaw.plugin.json CHANGED Viewed

@@ -72,13 +72,14 @@
           "enabled": { "type": "boolean", "default": false },
           "provider": {
             "type": "string",
-            "enum": ["multilingual_ner"],
+            "enum": ["multilingual_ner", "openai"],
             "default": "multilingual_ner"
           },
           "model": {
             "type": "string",
             "default": "Xenova/bert-base-multilingual-cased-ner-hrl"
           },
+          "timeoutMs": { "type": "integer", "minimum": 250, "maximum": 30000, "default": 2500 },
           "minScore": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.65 },
           "maxEntitiesPerMemory": { "type": "integer", "minimum": 1, "maximum": 50, "default": 8 },
           "startup": {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "memory-braid",
-  "version": "0.4.5",
+  "version": "0.4.6",
   "description": "OpenClaw memory plugin that augments local memory with Mem0 capture and recall.",
   "type": "module",
   "main": "./src/index.ts",

package/src/config.ts CHANGED Viewed

@@ -31,8 +31,9 @@ export type MemoryBraidConfig = {
   };
   entityExtraction: {
     enabled: boolean;
-    provider: "multilingual_ner";
+    provider: "multilingual_ner" | "openai";
     model: string;
+    timeoutMs: number;
     minScore: number;
     maxEntitiesPerMemory: number;
     startup: {
@@ -101,6 +102,7 @@ const DEFAULTS: MemoryBraidConfig = {
     enabled: false,
     provider: "multilingual_ner",
     model: "Xenova/bert-base-multilingual-cased-ner-hrl",
+    timeoutMs: 2500,
     minScore: 0.65,
     maxEntitiesPerMemory: 8,
     startup: {
@@ -184,6 +186,14 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
     rawCaptureMode === "local" || rawCaptureMode === "hybrid" || rawCaptureMode === "ml"
       ? rawCaptureMode
       : DEFAULTS.capture.mode;
+  const entityProvider = entityExtraction.provider === "openai" ? "openai" : "multilingual_ner";
+  const parsedEntityModel = asString(entityExtraction.model);
+  const entityModel =
+    entityProvider === "openai"
+      ? parsedEntityModel && parsedEntityModel !== DEFAULTS.entityExtraction.model
+        ? parsedEntityModel
+        : "gpt-4o-mini"
+      : parsedEntityModel ?? DEFAULTS.entityExtraction.model;
   return {
     enabled: asBoolean(root.enabled, DEFAULTS.enabled),
@@ -221,11 +231,14 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
     },
     entityExtraction: {
       enabled: asBoolean(entityExtraction.enabled, DEFAULTS.entityExtraction.enabled),
-      provider:
-        entityExtraction.provider === "multilingual_ner"
-          ? "multilingual_ner"
-          : DEFAULTS.entityExtraction.provider,
-      model: asString(entityExtraction.model) ?? DEFAULTS.entityExtraction.model,
+      provider: entityProvider,
+      model: entityModel,
+      timeoutMs: asInt(
+        entityExtraction.timeoutMs,
+        DEFAULTS.entityExtraction.timeoutMs,
+        250,
+        30_000,
+      ),
       minScore: asNumber(entityExtraction.minScore, DEFAULTS.entityExtraction.minScore, 0, 1),
       maxEntitiesPerMemory: asInt(
         entityExtraction.maxEntitiesPerMemory,

package/src/entities.ts CHANGED Viewed

@@ -15,6 +15,15 @@ type NerRecord = {
   end?: unknown;
 };
+type LlmEntityRecord = {
+  text?: unknown;
+  type?: unknown;
+  label?: unknown;
+  entity?: unknown;
+  entity_group?: unknown;
+  score?: unknown;
+};
 export type ExtractedEntity = {
   text: string;
   type: "person" | "organization" | "location" | "misc";
@@ -81,6 +90,44 @@ function normalizeEntityText(raw: unknown): string {
   return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
 }
+function clampScore(value: unknown, fallback = 0): number {
+  if (typeof value !== "number" || !Number.isFinite(value)) {
+    return Math.max(0, Math.min(1, fallback));
+  }
+  return Math.max(0, Math.min(1, value));
+}
+function parseJsonObjectArray(raw: string): Array<Record<string, unknown>> {
+  const attempts = [raw.trim()];
+  const fencedMatch = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/i);
+  if (fencedMatch?.[1]) {
+    attempts.push(fencedMatch[1].trim());
+  }
+  const firstBracket = raw.indexOf("[");
+  const lastBracket = raw.lastIndexOf("]");
+  if (firstBracket >= 0 && lastBracket > firstBracket) {
+    attempts.push(raw.slice(firstBracket, lastBracket + 1).trim());
+  }
+  for (const attempt of attempts) {
+    try {
+      const parsed = JSON.parse(attempt) as unknown;
+      if (!Array.isArray(parsed)) {
+        continue;
+      }
+      return parsed.filter((entry) => entry && typeof entry === "object") as Array<
+        Record<string, unknown>
+      >;
+    } catch {
+      continue;
+    }
+  }
+  return [];
+}
 type NormalizedEntityToken = {
   text: string;
   type: ExtractedEntity["type"];
@@ -213,6 +260,29 @@ function collapseAdjacentEntityTokens(
   return collapsed;
 }
+function dedupeAndLimitEntities(
+  entities: Array<Omit<ExtractedEntity, "canonicalUri">>,
+  maxEntities: number,
+): ExtractedEntity[] {
+  const deduped = new Map<string, ExtractedEntity>();
+  for (const entity of entities) {
+    const canonicalUri = buildCanonicalEntityUri(entity.type, entity.text);
+    const current = deduped.get(canonicalUri);
+    if (!current || entity.score > current.score) {
+      deduped.set(canonicalUri, {
+        text: entity.text,
+        type: entity.type,
+        score: entity.score,
+        canonicalUri,
+      });
+    }
+  }
+  return Array.from(deduped.values())
+    .sort((a, b) => b.score - a.score)
+    .slice(0, maxEntities);
+}
 type EntityExtractionOptions = {
   stateDir?: string;
 };
@@ -256,7 +326,10 @@ export class EntityExtractionManager {
       model: this.cfg.model,
       minScore: this.cfg.minScore,
       maxEntitiesPerMemory: this.cfg.maxEntitiesPerMemory,
-      cacheDir: resolveEntityModelCacheDir(this.stateDir),
+      cacheDir:
+        this.cfg.provider === "multilingual_ner"
+          ? resolveEntityModelCacheDir(this.stateDir)
+          : "n/a",
     };
   }
@@ -274,10 +347,14 @@ export class EntityExtractionManager {
     error?: string;
   }> {
     const startedAt = Date.now();
+    const cacheDir =
+      this.cfg.provider === "multilingual_ner"
+        ? resolveEntityModelCacheDir(this.stateDir)
+        : "n/a";
     if (!this.cfg.enabled) {
       return {
         ok: false,
-        cacheDir: resolveEntityModelCacheDir(this.stateDir),
+        cacheDir,
         model: this.cfg.model,
         entities: 0,
         durMs: Date.now() - startedAt,
@@ -285,29 +362,17 @@ export class EntityExtractionManager {
       };
     }
-    const pipeline = await this.ensurePipeline(params?.forceReload);
-    if (!pipeline) {
-      return {
-        ok: false,
-        cacheDir: resolveEntityModelCacheDir(this.stateDir),
-        model: this.cfg.model,
-        entities: 0,
-        durMs: Date.now() - startedAt,
-        error: "model_load_failed",
-      };
-    }
     try {
-      const entities = await this.extractWithPipeline({
-        pipeline,
+      const entities = await this.extractWithProvider({
         text: params?.text ?? this.cfg.startup.warmupText,
+        forceReload: params?.forceReload,
       });
       this.log.info("memory_braid.entity.warmup", {
         runId: params?.runId,
         reason: params?.reason ?? "manual",
         provider: this.cfg.provider,
         model: this.cfg.model,
-        cacheDir: resolveEntityModelCacheDir(this.stateDir),
+        cacheDir,
         entities: entities.length,
         entityTypes: summarizeEntityTypes(entities),
         sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
@@ -315,7 +380,7 @@ export class EntityExtractionManager {
       });
       return {
         ok: true,
-        cacheDir: resolveEntityModelCacheDir(this.stateDir),
+        cacheDir,
         model: this.cfg.model,
         entities: entities.length,
         durMs: Date.now() - startedAt,
@@ -327,12 +392,12 @@ export class EntityExtractionManager {
         reason: params?.reason ?? "manual",
         provider: this.cfg.provider,
         model: this.cfg.model,
-        cacheDir: resolveEntityModelCacheDir(this.stateDir),
+        cacheDir,
         error: message,
       });
       return {
         ok: false,
-        cacheDir: resolveEntityModelCacheDir(this.stateDir),
+        cacheDir,
         model: this.cfg.model,
         entities: 0,
         durMs: Date.now() - startedAt,
@@ -351,13 +416,8 @@ export class EntityExtractionManager {
       return [];
     }
-    const pipeline = await this.ensurePipeline();
-    if (!pipeline) {
-      return [];
-    }
     try {
-      const entities = await this.extractWithPipeline({ pipeline, text });
+      const entities = await this.extractWithProvider({ text });
       this.log.debug("memory_braid.entity.extract", {
         runId: params.runId,
         provider: this.cfg.provider,
@@ -378,11 +438,112 @@ export class EntityExtractionManager {
     }
   }
+  private async extractWithProvider(params: {
+    text: string;
+    forceReload?: boolean;
+  }): Promise<ExtractedEntity[]> {
+    if (this.cfg.provider === "openai") {
+      return this.extractWithOpenAi(params.text);
+    }
+    const pipeline = await this.ensurePipeline(params.forceReload);
+    if (!pipeline) {
+      throw new Error("model_load_failed");
+    }
+    return this.extractWithPipeline({ pipeline, text: params.text });
+  }
+  private async extractWithOpenAi(text: string): Promise<ExtractedEntity[]> {
+    const key = process.env.OPENAI_API_KEY?.trim();
+    if (!key) {
+      throw new Error("OPENAI_API_KEY is not set");
+    }
+    const controller = new AbortController();
+    const timer = setTimeout(() => controller.abort(), this.cfg.timeoutMs);
+    try {
+      const prompt = [
+        "Extract named entities from this text.",
+        "Return ONLY JSON array.",
+        "Each item: {text:string, type:string, score:number}.",
+        "type must be one of: person, organization, location, misc.",
+        "score must be between 0 and 1.",
+        "Do not include duplicates.",
+        text,
+      ].join("\n");
+      const response = await fetch("https://api.openai.com/v1/chat/completions", {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${key}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          model: this.cfg.model,
+          temperature: 0,
+          messages: [
+            {
+              role: "system",
+              content: "You return strict JSON only.",
+            },
+            {
+              role: "user",
+              content: prompt,
+            },
+          ],
+        }),
+        signal: controller.signal,
+      });
+      const data = (await response.json()) as {
+        error?: { message?: string };
+        choices?: Array<{ message?: { content?: string } }>;
+      };
+      if (!response.ok) {
+        throw new Error(data.error?.message ?? `OpenAI HTTP ${response.status}`);
+      }
+      const content = data.choices?.[0]?.message?.content ?? "";
+      const parsed = parseJsonObjectArray(content);
+      const normalized: Array<Omit<ExtractedEntity, "canonicalUri">> = [];
+      for (const row of parsed) {
+        const record = row as LlmEntityRecord;
+        const entityText = normalizeEntityText(record.text);
+        if (!entityText) {
+          continue;
+        }
+        const score = clampScore(record.score, 0.5);
+        if (score < this.cfg.minScore) {
+          continue;
+        }
+        const type = normalizeEntityType(
+          record.type ?? record.label ?? record.entity_group ?? record.entity,
+        );
+        normalized.push({
+          text: entityText,
+          type,
+          score,
+        });
+      }
+      return dedupeAndLimitEntities(normalized, this.cfg.maxEntitiesPerMemory);
+    } finally {
+      clearTimeout(timer);
+    }
+  }
   private async ensurePipeline(forceReload = false): Promise<NerPipeline | null> {
     if (!this.cfg.enabled) {
       return null;
     }
+    if (this.cfg.provider !== "multilingual_ner") {
+      return null;
+    }
     if (forceReload) {
       this.pipelinePromise = null;
     }
@@ -463,7 +624,7 @@ export class EntityExtractionManager {
       if (!entityText) {
         continue;
       }
-      const score = typeof record.score === "number" ? Math.max(0, Math.min(1, record.score)) : 0;
+      const score = clampScore(record.score);
       if (score < this.cfg.minScore) {
         continue;
       }
@@ -479,22 +640,13 @@ export class EntityExtractionManager {
     }
     const collapsed = collapseAdjacentEntityTokens(normalized, params.text);
-    const deduped = new Map<string, ExtractedEntity>();
-    for (const token of collapsed) {
-      const canonicalUri = buildCanonicalEntityUri(token.type, token.text);
-      const current = deduped.get(canonicalUri);
-      if (!current || token.score > current.score) {
-        deduped.set(canonicalUri, {
-          text: token.text,
-          type: token.type,
-          score: token.score,
-          canonicalUri,
-        });
-      }
-    }
-    return Array.from(deduped.values())
-      .sort((a, b) => b.score - a.score)
-      .slice(0, this.cfg.maxEntitiesPerMemory);
+    return dedupeAndLimitEntities(
+      collapsed.map((token) => ({
+        text: token.text,
+        type: token.type,
+        score: token.score,
+      })),
+      this.cfg.maxEntitiesPerMemory,
+    );
   }
 }