memory-braid 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -11
- package/openclaw.plugin.json +2 -1
- package/package.json +1 -1
- package/src/config.ts +19 -6
- package/src/entities.ts +196 -44
package/README.md
CHANGED
|
@@ -7,7 +7,7 @@ Memory Braid is an OpenClaw `kind: "memory"` plugin that augments local memory s
|
|
|
7
7
|
- Hybrid recall: local memory + Mem0, merged with weighted RRF.
|
|
8
8
|
- Capture-first Mem0 memory: plugin writes only captured memories to Mem0 (no markdown/session indexing).
|
|
9
9
|
- Capture pipeline modes: `local`, `hybrid`, `ml`.
|
|
10
|
-
- Optional entity extraction: multilingual NER with canonical `entity://...` URIs in memory metadata.
|
|
10
|
+
- Optional entity extraction: local multilingual NER or OpenAI NER with canonical `entity://...` URIs in memory metadata.
|
|
11
11
|
- Structured debug logs for troubleshooting and tuning.
|
|
12
12
|
|
|
13
13
|
## Breaking changes in 0.4.0
|
|
@@ -109,7 +109,7 @@ Note:
|
|
|
109
109
|
- `fixCommand` (copy/paste command for that machine)
|
|
110
110
|
- `pluginDir` (resolved extension directory when available)
|
|
111
111
|
|
|
112
|
-
## Quick start: hybrid capture +
|
|
112
|
+
## Quick start: hybrid capture + entity extraction
|
|
113
113
|
|
|
114
114
|
Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
115
115
|
|
|
@@ -156,8 +156,9 @@ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
|
156
156
|
},
|
|
157
157
|
"entityExtraction": {
|
|
158
158
|
"enabled": true,
|
|
159
|
-
"provider": "
|
|
160
|
-
"model": "
|
|
159
|
+
"provider": "openai",
|
|
160
|
+
"model": "gpt-4o-mini",
|
|
161
|
+
"timeoutMs": 2500,
|
|
161
162
|
"minScore": 0.65,
|
|
162
163
|
"maxEntitiesPerMemory": 8,
|
|
163
164
|
"startup": {
|
|
@@ -171,6 +172,18 @@ Add this under `plugins.entries["memory-braid"].config` in your OpenClaw config:
|
|
|
171
172
|
}
|
|
172
173
|
```
|
|
173
174
|
|
|
175
|
+
Local-model alternative (fully backward compatible):
|
|
176
|
+
|
|
177
|
+
```json
|
|
178
|
+
{
|
|
179
|
+
"entityExtraction": {
|
|
180
|
+
"enabled": true,
|
|
181
|
+
"provider": "multilingual_ner",
|
|
182
|
+
"model": "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
```
|
|
186
|
+
|
|
174
187
|
Then restart:
|
|
175
188
|
|
|
176
189
|
```bash
|
|
@@ -186,7 +199,7 @@ openclaw plugins info memory-braid
|
|
|
186
199
|
openclaw gateway status
|
|
187
200
|
```
|
|
188
201
|
|
|
189
|
-
2. Trigger/inspect
|
|
202
|
+
2. Trigger/inspect entity warmup:
|
|
190
203
|
|
|
191
204
|
```bash
|
|
192
205
|
openclaw agent --agent main --message "/memorybraid warmup" --json
|
|
@@ -206,7 +219,7 @@ rg -n "memory_braid\\.startup|memory_braid\\.capture|memory_braid\\.entity|memor
|
|
|
206
219
|
|
|
207
220
|
Expected events:
|
|
208
221
|
- `memory_braid.startup`
|
|
209
|
-
- `memory_braid.entity.model_load`
|
|
222
|
+
- `memory_braid.entity.model_load` (local `multilingual_ner` provider only)
|
|
210
223
|
- `memory_braid.entity.warmup`
|
|
211
224
|
- `memory_braid.capture.extract`
|
|
212
225
|
- `memory_braid.capture.ml` (for `capture.mode=hybrid|ml`)
|
|
@@ -436,8 +449,9 @@ Use this preset when:
|
|
|
436
449
|
},
|
|
437
450
|
"entityExtraction": {
|
|
438
451
|
"enabled": true,
|
|
439
|
-
"provider": "
|
|
440
|
-
"model": "
|
|
452
|
+
"provider": "openai",
|
|
453
|
+
"model": "gpt-4o-mini",
|
|
454
|
+
"timeoutMs": 2500,
|
|
441
455
|
"minScore": 0.65,
|
|
442
456
|
"maxEntitiesPerMemory": 8,
|
|
443
457
|
"startup": {
|
|
@@ -505,7 +519,8 @@ Entity extraction defaults are:
|
|
|
505
519
|
|
|
506
520
|
- `entityExtraction.enabled`: `false`
|
|
507
521
|
- `entityExtraction.provider`: `"multilingual_ner"`
|
|
508
|
-
- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"`
|
|
522
|
+
- `entityExtraction.model`: `"Xenova/bert-base-multilingual-cased-ner-hrl"` (or `"gpt-4o-mini"` when `provider: "openai"` and model is unset)
|
|
523
|
+
- `entityExtraction.timeoutMs`: `2500`
|
|
509
524
|
- `entityExtraction.minScore`: `0.65`
|
|
510
525
|
- `entityExtraction.maxEntitiesPerMemory`: `8`
|
|
511
526
|
- `entityExtraction.startup.downloadOnStartup`: `true`
|
|
@@ -513,9 +528,9 @@ Entity extraction defaults are:
|
|
|
513
528
|
|
|
514
529
|
When enabled:
|
|
515
530
|
|
|
516
|
-
-
|
|
531
|
+
- Local NER model cache/download path is `<OPENCLAW_STATE_DIR>/memory-braid/models/entity-extraction` (typically `~/.openclaw/memory-braid/models/entity-extraction`).
|
|
517
532
|
- Captured memories get `metadata.entities` and `metadata.entityUris` (canonical IDs like `entity://person/john-doe`).
|
|
518
|
-
- Startup
|
|
533
|
+
- Startup warmup runs for both providers (`downloadOnStartup: true`).
|
|
519
534
|
|
|
520
535
|
Warmup command:
|
|
521
536
|
|
package/openclaw.plugin.json
CHANGED
|
@@ -72,13 +72,14 @@
|
|
|
72
72
|
"enabled": { "type": "boolean", "default": false },
|
|
73
73
|
"provider": {
|
|
74
74
|
"type": "string",
|
|
75
|
-
"enum": ["multilingual_ner"],
|
|
75
|
+
"enum": ["multilingual_ner", "openai"],
|
|
76
76
|
"default": "multilingual_ner"
|
|
77
77
|
},
|
|
78
78
|
"model": {
|
|
79
79
|
"type": "string",
|
|
80
80
|
"default": "Xenova/bert-base-multilingual-cased-ner-hrl"
|
|
81
81
|
},
|
|
82
|
+
"timeoutMs": { "type": "integer", "minimum": 250, "maximum": 30000, "default": 2500 },
|
|
82
83
|
"minScore": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.65 },
|
|
83
84
|
"maxEntitiesPerMemory": { "type": "integer", "minimum": 1, "maximum": 50, "default": 8 },
|
|
84
85
|
"startup": {
|
package/package.json
CHANGED
package/src/config.ts
CHANGED
|
@@ -31,8 +31,9 @@ export type MemoryBraidConfig = {
|
|
|
31
31
|
};
|
|
32
32
|
entityExtraction: {
|
|
33
33
|
enabled: boolean;
|
|
34
|
-
provider: "multilingual_ner";
|
|
34
|
+
provider: "multilingual_ner" | "openai";
|
|
35
35
|
model: string;
|
|
36
|
+
timeoutMs: number;
|
|
36
37
|
minScore: number;
|
|
37
38
|
maxEntitiesPerMemory: number;
|
|
38
39
|
startup: {
|
|
@@ -101,6 +102,7 @@ const DEFAULTS: MemoryBraidConfig = {
|
|
|
101
102
|
enabled: false,
|
|
102
103
|
provider: "multilingual_ner",
|
|
103
104
|
model: "Xenova/bert-base-multilingual-cased-ner-hrl",
|
|
105
|
+
timeoutMs: 2500,
|
|
104
106
|
minScore: 0.65,
|
|
105
107
|
maxEntitiesPerMemory: 8,
|
|
106
108
|
startup: {
|
|
@@ -184,6 +186,14 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
184
186
|
rawCaptureMode === "local" || rawCaptureMode === "hybrid" || rawCaptureMode === "ml"
|
|
185
187
|
? rawCaptureMode
|
|
186
188
|
: DEFAULTS.capture.mode;
|
|
189
|
+
const entityProvider = entityExtraction.provider === "openai" ? "openai" : "multilingual_ner";
|
|
190
|
+
const parsedEntityModel = asString(entityExtraction.model);
|
|
191
|
+
const entityModel =
|
|
192
|
+
entityProvider === "openai"
|
|
193
|
+
? parsedEntityModel && parsedEntityModel !== DEFAULTS.entityExtraction.model
|
|
194
|
+
? parsedEntityModel
|
|
195
|
+
: "gpt-4o-mini"
|
|
196
|
+
: parsedEntityModel ?? DEFAULTS.entityExtraction.model;
|
|
187
197
|
|
|
188
198
|
return {
|
|
189
199
|
enabled: asBoolean(root.enabled, DEFAULTS.enabled),
|
|
@@ -221,11 +231,14 @@ export function parseConfig(raw: unknown): MemoryBraidConfig {
|
|
|
221
231
|
},
|
|
222
232
|
entityExtraction: {
|
|
223
233
|
enabled: asBoolean(entityExtraction.enabled, DEFAULTS.entityExtraction.enabled),
|
|
224
|
-
provider:
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
234
|
+
provider: entityProvider,
|
|
235
|
+
model: entityModel,
|
|
236
|
+
timeoutMs: asInt(
|
|
237
|
+
entityExtraction.timeoutMs,
|
|
238
|
+
DEFAULTS.entityExtraction.timeoutMs,
|
|
239
|
+
250,
|
|
240
|
+
30_000,
|
|
241
|
+
),
|
|
229
242
|
minScore: asNumber(entityExtraction.minScore, DEFAULTS.entityExtraction.minScore, 0, 1),
|
|
230
243
|
maxEntitiesPerMemory: asInt(
|
|
231
244
|
entityExtraction.maxEntitiesPerMemory,
|
package/src/entities.ts
CHANGED
|
@@ -15,6 +15,15 @@ type NerRecord = {
|
|
|
15
15
|
end?: unknown;
|
|
16
16
|
};
|
|
17
17
|
|
|
18
|
+
type LlmEntityRecord = {
|
|
19
|
+
text?: unknown;
|
|
20
|
+
type?: unknown;
|
|
21
|
+
label?: unknown;
|
|
22
|
+
entity?: unknown;
|
|
23
|
+
entity_group?: unknown;
|
|
24
|
+
score?: unknown;
|
|
25
|
+
};
|
|
26
|
+
|
|
18
27
|
export type ExtractedEntity = {
|
|
19
28
|
text: string;
|
|
20
29
|
type: "person" | "organization" | "location" | "misc";
|
|
@@ -81,6 +90,44 @@ function normalizeEntityText(raw: unknown): string {
|
|
|
81
90
|
return normalizeWhitespace(raw.replace(/^##/, "").replace(/^▁/, ""));
|
|
82
91
|
}
|
|
83
92
|
|
|
93
|
+
function clampScore(value: unknown, fallback = 0): number {
|
|
94
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
95
|
+
return Math.max(0, Math.min(1, fallback));
|
|
96
|
+
}
|
|
97
|
+
return Math.max(0, Math.min(1, value));
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function parseJsonObjectArray(raw: string): Array<Record<string, unknown>> {
|
|
101
|
+
const attempts = [raw.trim()];
|
|
102
|
+
|
|
103
|
+
const fencedMatch = raw.match(/```(?:json)?\s*([\s\S]+?)\s*```/i);
|
|
104
|
+
if (fencedMatch?.[1]) {
|
|
105
|
+
attempts.push(fencedMatch[1].trim());
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const firstBracket = raw.indexOf("[");
|
|
109
|
+
const lastBracket = raw.lastIndexOf("]");
|
|
110
|
+
if (firstBracket >= 0 && lastBracket > firstBracket) {
|
|
111
|
+
attempts.push(raw.slice(firstBracket, lastBracket + 1).trim());
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for (const attempt of attempts) {
|
|
115
|
+
try {
|
|
116
|
+
const parsed = JSON.parse(attempt) as unknown;
|
|
117
|
+
if (!Array.isArray(parsed)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
return parsed.filter((entry) => entry && typeof entry === "object") as Array<
|
|
121
|
+
Record<string, unknown>
|
|
122
|
+
>;
|
|
123
|
+
} catch {
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return [];
|
|
129
|
+
}
|
|
130
|
+
|
|
84
131
|
type NormalizedEntityToken = {
|
|
85
132
|
text: string;
|
|
86
133
|
type: ExtractedEntity["type"];
|
|
@@ -213,6 +260,29 @@ function collapseAdjacentEntityTokens(
|
|
|
213
260
|
return collapsed;
|
|
214
261
|
}
|
|
215
262
|
|
|
263
|
+
function dedupeAndLimitEntities(
|
|
264
|
+
entities: Array<Omit<ExtractedEntity, "canonicalUri">>,
|
|
265
|
+
maxEntities: number,
|
|
266
|
+
): ExtractedEntity[] {
|
|
267
|
+
const deduped = new Map<string, ExtractedEntity>();
|
|
268
|
+
for (const entity of entities) {
|
|
269
|
+
const canonicalUri = buildCanonicalEntityUri(entity.type, entity.text);
|
|
270
|
+
const current = deduped.get(canonicalUri);
|
|
271
|
+
if (!current || entity.score > current.score) {
|
|
272
|
+
deduped.set(canonicalUri, {
|
|
273
|
+
text: entity.text,
|
|
274
|
+
type: entity.type,
|
|
275
|
+
score: entity.score,
|
|
276
|
+
canonicalUri,
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
return Array.from(deduped.values())
|
|
282
|
+
.sort((a, b) => b.score - a.score)
|
|
283
|
+
.slice(0, maxEntities);
|
|
284
|
+
}
|
|
285
|
+
|
|
216
286
|
type EntityExtractionOptions = {
|
|
217
287
|
stateDir?: string;
|
|
218
288
|
};
|
|
@@ -256,7 +326,10 @@ export class EntityExtractionManager {
|
|
|
256
326
|
model: this.cfg.model,
|
|
257
327
|
minScore: this.cfg.minScore,
|
|
258
328
|
maxEntitiesPerMemory: this.cfg.maxEntitiesPerMemory,
|
|
259
|
-
cacheDir:
|
|
329
|
+
cacheDir:
|
|
330
|
+
this.cfg.provider === "multilingual_ner"
|
|
331
|
+
? resolveEntityModelCacheDir(this.stateDir)
|
|
332
|
+
: "n/a",
|
|
260
333
|
};
|
|
261
334
|
}
|
|
262
335
|
|
|
@@ -274,10 +347,14 @@ export class EntityExtractionManager {
|
|
|
274
347
|
error?: string;
|
|
275
348
|
}> {
|
|
276
349
|
const startedAt = Date.now();
|
|
350
|
+
const cacheDir =
|
|
351
|
+
this.cfg.provider === "multilingual_ner"
|
|
352
|
+
? resolveEntityModelCacheDir(this.stateDir)
|
|
353
|
+
: "n/a";
|
|
277
354
|
if (!this.cfg.enabled) {
|
|
278
355
|
return {
|
|
279
356
|
ok: false,
|
|
280
|
-
cacheDir
|
|
357
|
+
cacheDir,
|
|
281
358
|
model: this.cfg.model,
|
|
282
359
|
entities: 0,
|
|
283
360
|
durMs: Date.now() - startedAt,
|
|
@@ -285,29 +362,17 @@ export class EntityExtractionManager {
|
|
|
285
362
|
};
|
|
286
363
|
}
|
|
287
364
|
|
|
288
|
-
const pipeline = await this.ensurePipeline(params?.forceReload);
|
|
289
|
-
if (!pipeline) {
|
|
290
|
-
return {
|
|
291
|
-
ok: false,
|
|
292
|
-
cacheDir: resolveEntityModelCacheDir(this.stateDir),
|
|
293
|
-
model: this.cfg.model,
|
|
294
|
-
entities: 0,
|
|
295
|
-
durMs: Date.now() - startedAt,
|
|
296
|
-
error: "model_load_failed",
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
|
|
300
365
|
try {
|
|
301
|
-
const entities = await this.
|
|
302
|
-
pipeline,
|
|
366
|
+
const entities = await this.extractWithProvider({
|
|
303
367
|
text: params?.text ?? this.cfg.startup.warmupText,
|
|
368
|
+
forceReload: params?.forceReload,
|
|
304
369
|
});
|
|
305
370
|
this.log.info("memory_braid.entity.warmup", {
|
|
306
371
|
runId: params?.runId,
|
|
307
372
|
reason: params?.reason ?? "manual",
|
|
308
373
|
provider: this.cfg.provider,
|
|
309
374
|
model: this.cfg.model,
|
|
310
|
-
cacheDir
|
|
375
|
+
cacheDir,
|
|
311
376
|
entities: entities.length,
|
|
312
377
|
entityTypes: summarizeEntityTypes(entities),
|
|
313
378
|
sampleEntityUris: entities.slice(0, 5).map((entry) => entry.canonicalUri),
|
|
@@ -315,7 +380,7 @@ export class EntityExtractionManager {
|
|
|
315
380
|
});
|
|
316
381
|
return {
|
|
317
382
|
ok: true,
|
|
318
|
-
cacheDir
|
|
383
|
+
cacheDir,
|
|
319
384
|
model: this.cfg.model,
|
|
320
385
|
entities: entities.length,
|
|
321
386
|
durMs: Date.now() - startedAt,
|
|
@@ -327,12 +392,12 @@ export class EntityExtractionManager {
|
|
|
327
392
|
reason: params?.reason ?? "manual",
|
|
328
393
|
provider: this.cfg.provider,
|
|
329
394
|
model: this.cfg.model,
|
|
330
|
-
cacheDir
|
|
395
|
+
cacheDir,
|
|
331
396
|
error: message,
|
|
332
397
|
});
|
|
333
398
|
return {
|
|
334
399
|
ok: false,
|
|
335
|
-
cacheDir
|
|
400
|
+
cacheDir,
|
|
336
401
|
model: this.cfg.model,
|
|
337
402
|
entities: 0,
|
|
338
403
|
durMs: Date.now() - startedAt,
|
|
@@ -351,13 +416,8 @@ export class EntityExtractionManager {
|
|
|
351
416
|
return [];
|
|
352
417
|
}
|
|
353
418
|
|
|
354
|
-
const pipeline = await this.ensurePipeline();
|
|
355
|
-
if (!pipeline) {
|
|
356
|
-
return [];
|
|
357
|
-
}
|
|
358
|
-
|
|
359
419
|
try {
|
|
360
|
-
const entities = await this.
|
|
420
|
+
const entities = await this.extractWithProvider({ text });
|
|
361
421
|
this.log.debug("memory_braid.entity.extract", {
|
|
362
422
|
runId: params.runId,
|
|
363
423
|
provider: this.cfg.provider,
|
|
@@ -378,11 +438,112 @@ export class EntityExtractionManager {
|
|
|
378
438
|
}
|
|
379
439
|
}
|
|
380
440
|
|
|
441
|
+
private async extractWithProvider(params: {
|
|
442
|
+
text: string;
|
|
443
|
+
forceReload?: boolean;
|
|
444
|
+
}): Promise<ExtractedEntity[]> {
|
|
445
|
+
if (this.cfg.provider === "openai") {
|
|
446
|
+
return this.extractWithOpenAi(params.text);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const pipeline = await this.ensurePipeline(params.forceReload);
|
|
450
|
+
if (!pipeline) {
|
|
451
|
+
throw new Error("model_load_failed");
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return this.extractWithPipeline({ pipeline, text: params.text });
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
private async extractWithOpenAi(text: string): Promise<ExtractedEntity[]> {
|
|
458
|
+
const key = process.env.OPENAI_API_KEY?.trim();
|
|
459
|
+
if (!key) {
|
|
460
|
+
throw new Error("OPENAI_API_KEY is not set");
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const controller = new AbortController();
|
|
464
|
+
const timer = setTimeout(() => controller.abort(), this.cfg.timeoutMs);
|
|
465
|
+
|
|
466
|
+
try {
|
|
467
|
+
const prompt = [
|
|
468
|
+
"Extract named entities from this text.",
|
|
469
|
+
"Return ONLY JSON array.",
|
|
470
|
+
"Each item: {text:string, type:string, score:number}.",
|
|
471
|
+
"type must be one of: person, organization, location, misc.",
|
|
472
|
+
"score must be between 0 and 1.",
|
|
473
|
+
"Do not include duplicates.",
|
|
474
|
+
text,
|
|
475
|
+
].join("\n");
|
|
476
|
+
|
|
477
|
+
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
478
|
+
method: "POST",
|
|
479
|
+
headers: {
|
|
480
|
+
Authorization: `Bearer ${key}`,
|
|
481
|
+
"Content-Type": "application/json",
|
|
482
|
+
},
|
|
483
|
+
body: JSON.stringify({
|
|
484
|
+
model: this.cfg.model,
|
|
485
|
+
temperature: 0,
|
|
486
|
+
messages: [
|
|
487
|
+
{
|
|
488
|
+
role: "system",
|
|
489
|
+
content: "You return strict JSON only.",
|
|
490
|
+
},
|
|
491
|
+
{
|
|
492
|
+
role: "user",
|
|
493
|
+
content: prompt,
|
|
494
|
+
},
|
|
495
|
+
],
|
|
496
|
+
}),
|
|
497
|
+
signal: controller.signal,
|
|
498
|
+
});
|
|
499
|
+
const data = (await response.json()) as {
|
|
500
|
+
error?: { message?: string };
|
|
501
|
+
choices?: Array<{ message?: { content?: string } }>;
|
|
502
|
+
};
|
|
503
|
+
|
|
504
|
+
if (!response.ok) {
|
|
505
|
+
throw new Error(data.error?.message ?? `OpenAI HTTP ${response.status}`);
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
const content = data.choices?.[0]?.message?.content ?? "";
|
|
509
|
+
const parsed = parseJsonObjectArray(content);
|
|
510
|
+
|
|
511
|
+
const normalized: Array<Omit<ExtractedEntity, "canonicalUri">> = [];
|
|
512
|
+
for (const row of parsed) {
|
|
513
|
+
const record = row as LlmEntityRecord;
|
|
514
|
+
const entityText = normalizeEntityText(record.text);
|
|
515
|
+
if (!entityText) {
|
|
516
|
+
continue;
|
|
517
|
+
}
|
|
518
|
+
const score = clampScore(record.score, 0.5);
|
|
519
|
+
if (score < this.cfg.minScore) {
|
|
520
|
+
continue;
|
|
521
|
+
}
|
|
522
|
+
const type = normalizeEntityType(
|
|
523
|
+
record.type ?? record.label ?? record.entity_group ?? record.entity,
|
|
524
|
+
);
|
|
525
|
+
normalized.push({
|
|
526
|
+
text: entityText,
|
|
527
|
+
type,
|
|
528
|
+
score,
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
return dedupeAndLimitEntities(normalized, this.cfg.maxEntitiesPerMemory);
|
|
533
|
+
} finally {
|
|
534
|
+
clearTimeout(timer);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
381
538
|
private async ensurePipeline(forceReload = false): Promise<NerPipeline | null> {
|
|
382
539
|
if (!this.cfg.enabled) {
|
|
383
540
|
return null;
|
|
384
541
|
}
|
|
385
542
|
|
|
543
|
+
if (this.cfg.provider !== "multilingual_ner") {
|
|
544
|
+
return null;
|
|
545
|
+
}
|
|
546
|
+
|
|
386
547
|
if (forceReload) {
|
|
387
548
|
this.pipelinePromise = null;
|
|
388
549
|
}
|
|
@@ -463,7 +624,7 @@ export class EntityExtractionManager {
|
|
|
463
624
|
if (!entityText) {
|
|
464
625
|
continue;
|
|
465
626
|
}
|
|
466
|
-
const score =
|
|
627
|
+
const score = clampScore(record.score);
|
|
467
628
|
if (score < this.cfg.minScore) {
|
|
468
629
|
continue;
|
|
469
630
|
}
|
|
@@ -479,22 +640,13 @@ export class EntityExtractionManager {
|
|
|
479
640
|
}
|
|
480
641
|
|
|
481
642
|
const collapsed = collapseAdjacentEntityTokens(normalized, params.text);
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
score: token.score,
|
|
491
|
-
canonicalUri,
|
|
492
|
-
});
|
|
493
|
-
}
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
return Array.from(deduped.values())
|
|
497
|
-
.sort((a, b) => b.score - a.score)
|
|
498
|
-
.slice(0, this.cfg.maxEntitiesPerMemory);
|
|
643
|
+
return dedupeAndLimitEntities(
|
|
644
|
+
collapsed.map((token) => ({
|
|
645
|
+
text: token.text,
|
|
646
|
+
type: token.type,
|
|
647
|
+
score: token.score,
|
|
648
|
+
})),
|
|
649
|
+
this.cfg.maxEntitiesPerMemory,
|
|
650
|
+
);
|
|
499
651
|
}
|
|
500
652
|
}
|