@psiclawops/hypermem 0.9.6 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -0
- package/INSTALL.md +29 -9
- package/README.md +5 -1
- package/assets/default-config.json +20 -5
- package/assets/runtime-validation-fixture.json +123 -0
- package/bin/hypermem-cleanup.mjs +334 -0
- package/bin/hypermem-doctor.mjs +71 -0
- package/bin/hypermem-validate-runtime.mjs +282 -0
- package/dist/compositor.d.ts +43 -5
- package/dist/compositor.d.ts.map +1 -1
- package/dist/compositor.js +802 -30
- package/dist/entity-bridge-backfill.d.ts +66 -0
- package/dist/entity-bridge-backfill.d.ts.map +1 -0
- package/dist/entity-bridge-backfill.js +145 -0
- package/dist/entity-bridge-store.d.ts +164 -0
- package/dist/entity-bridge-store.d.ts.map +1 -0
- package/dist/entity-bridge-store.js +488 -0
- package/dist/entity-extractor.d.ts +124 -0
- package/dist/entity-extractor.d.ts.map +1 -0
- package/dist/entity-extractor.js +382 -0
- package/dist/entity-ppr.d.ts +55 -0
- package/dist/entity-ppr.d.ts.map +1 -0
- package/dist/entity-ppr.js +180 -0
- package/dist/hybrid-retrieval.d.ts +27 -0
- package/dist/hybrid-retrieval.d.ts.map +1 -1
- package/dist/hybrid-retrieval.js +26 -1
- package/dist/index.d.ts +19 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +63 -13
- package/dist/message-store.d.ts +36 -0
- package/dist/message-store.d.ts.map +1 -1
- package/dist/message-store.js +155 -1
- package/dist/open-domain.d.ts +13 -4
- package/dist/open-domain.d.ts.map +1 -1
- package/dist/open-domain.js +222 -20
- package/dist/profiles.js +13 -13
- package/dist/question-shape.d.ts +73 -0
- package/dist/question-shape.d.ts.map +1 -0
- package/dist/question-shape.js +230 -0
- package/dist/schema.d.ts +1 -1
- package/dist/schema.d.ts.map +1 -1
- package/dist/schema.js +92 -1
- package/dist/topic-detector.d.ts.map +1 -1
- package/dist/topic-detector.js +22 -9
- package/dist/types.d.ts +176 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/vector-store.d.ts +6 -0
- package/dist/vector-store.d.ts.map +1 -1
- package/dist/vector-store.js +3 -0
- package/docs/DIAGNOSTICS.md +47 -0
- package/docs/INTEGRATION_VALIDATION.md +24 -4
- package/docs/TUNING.md +21 -21
- package/memory-plugin/dist/index.d.ts +3 -3
- package/memory-plugin/dist/index.js +4 -2
- package/memory-plugin/openclaw.plugin.json +5 -0
- package/memory-plugin/package.json +10 -6
- package/package.json +22 -5
- package/plugin/dist/index.d.ts +3 -3
- package/plugin/dist/index.d.ts.map +1 -1
- package/plugin/dist/index.js +115 -13
- package/plugin/dist/index.js.map +1 -1
- package/plugin/package.json +10 -6
- package/scripts/install-runtime.mjs +4 -1
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* entity-bridge-store.ts \u2014 Sprint B
|
|
3
|
+
*
|
|
4
|
+
* Metadata-only CRUD/query helpers for the entity/facet bridge tables.
|
|
5
|
+
*
|
|
6
|
+
* Hard rule: this module never logs message content. Mention rows store
|
|
7
|
+
* a `match_term` (the surface form of the matched span) and offsets, but
|
|
8
|
+
* full message text remains in the `messages` table only.
|
|
9
|
+
*
|
|
10
|
+
* Tables (created by schema v12 migration):
|
|
11
|
+
* - memory_entities (agent_id, entity_key, display_name, ...)
|
|
12
|
+
* - memory_facets (agent_id, facet_key, ...)
|
|
13
|
+
* - message_entity_mentions (message_id, entity_id, ...)
|
|
14
|
+
* - message_facet_mentions (message_id, facet_id, ...)
|
|
15
|
+
* - entity_bridge_message_index (message_id, entity_count, facet_count, status, ...)
|
|
16
|
+
*
|
|
17
|
+
* The store is created on-demand. If the v12 tables are absent (older DB),
|
|
18
|
+
* `tablesExist()` returns false and all writes/reads are no-ops/empty.
|
|
19
|
+
*/
|
|
20
|
+
const REQUIRED_TABLES = [
|
|
21
|
+
'memory_entities',
|
|
22
|
+
'memory_facets',
|
|
23
|
+
'message_entity_mentions',
|
|
24
|
+
'message_facet_mentions',
|
|
25
|
+
'entity_bridge_message_index',
|
|
26
|
+
];
|
|
27
|
+
function nowIso() {
|
|
28
|
+
return new Date().toISOString();
|
|
29
|
+
}
|
|
30
|
+
export class EntityBridgeStore {
|
|
31
|
+
db;
|
|
32
|
+
_tablesChecked = false;
|
|
33
|
+
_tablesExist = false;
|
|
34
|
+
// Lazy prepared statements. Only created when tables exist.
|
|
35
|
+
_stmtUpsertEntity;
|
|
36
|
+
_stmtTouchEntity;
|
|
37
|
+
_stmtUpsertFacet;
|
|
38
|
+
_stmtTouchFacet;
|
|
39
|
+
_stmtInsertEntityMention;
|
|
40
|
+
_stmtInsertFacetMention;
|
|
41
|
+
_stmtUpsertIndex;
|
|
42
|
+
_stmtGetIndex;
|
|
43
|
+
constructor(db) {
|
|
44
|
+
this.db = db;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Check whether all v12 bridge tables exist in this DB.
|
|
48
|
+
* Cached after the first call. Cheap when cached.
|
|
49
|
+
*/
|
|
50
|
+
tablesExist() {
|
|
51
|
+
if (this._tablesChecked)
|
|
52
|
+
return this._tablesExist;
|
|
53
|
+
this._tablesChecked = true;
|
|
54
|
+
try {
|
|
55
|
+
const stmt = this.db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name IN (${REQUIRED_TABLES.map(() => '?').join(',')})`);
|
|
56
|
+
const rows = stmt.all(...REQUIRED_TABLES);
|
|
57
|
+
this._tablesExist = rows.length === REQUIRED_TABLES.length;
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
this._tablesExist = false;
|
|
61
|
+
}
|
|
62
|
+
return this._tablesExist;
|
|
63
|
+
}
|
|
64
|
+
// \u2500\u2500 Index state queries \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
65
|
+
getIndexState(messageId) {
|
|
66
|
+
if (!this.tablesExist())
|
|
67
|
+
return { exists: false };
|
|
68
|
+
if (!this._stmtGetIndex) {
|
|
69
|
+
this._stmtGetIndex = this.db.prepare('SELECT entity_count, facet_count, indexed_at, source, status, last_error FROM entity_bridge_message_index WHERE message_id = ?');
|
|
70
|
+
}
|
|
71
|
+
const row = this._stmtGetIndex.get(messageId);
|
|
72
|
+
if (!row)
|
|
73
|
+
return { exists: false };
|
|
74
|
+
return {
|
|
75
|
+
exists: true,
|
|
76
|
+
status: row.status,
|
|
77
|
+
source: row.source,
|
|
78
|
+
entityCount: row.entity_count,
|
|
79
|
+
facetCount: row.facet_count,
|
|
80
|
+
indexedAt: row.indexed_at,
|
|
81
|
+
lastError: row.last_error,
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Watermark diagnostics: counts of indexed/failed/zero-mention messages
|
|
86
|
+
* and the highest indexed message id, scoped to a single agent.
|
|
87
|
+
*/
|
|
88
|
+
getWatermarkDiagnostics(agentId) {
|
|
89
|
+
const empty = {
|
|
90
|
+
totalMessages: 0,
|
|
91
|
+
indexedMessages: 0,
|
|
92
|
+
failedMessages: 0,
|
|
93
|
+
zeroMentionMessages: 0,
|
|
94
|
+
highestIndexedMessageId: null,
|
|
95
|
+
};
|
|
96
|
+
if (!this.tablesExist())
|
|
97
|
+
return empty;
|
|
98
|
+
try {
|
|
99
|
+
const total = this.db.prepare('SELECT COUNT(*) AS c FROM messages WHERE agent_id = ?').get(agentId)?.c ?? 0;
|
|
100
|
+
const indexedRow = this.db.prepare(`SELECT COUNT(*) AS c, MAX(message_id) AS hi
|
|
101
|
+
FROM entity_bridge_message_index
|
|
102
|
+
WHERE agent_id = ? AND status = 'ok'`).get(agentId);
|
|
103
|
+
const failed = this.db.prepare(`SELECT COUNT(*) AS c FROM entity_bridge_message_index
|
|
104
|
+
WHERE agent_id = ? AND status != 'ok'`).get(agentId)?.c ?? 0;
|
|
105
|
+
const zero = this.db.prepare(`SELECT COUNT(*) AS c FROM entity_bridge_message_index
|
|
106
|
+
WHERE agent_id = ? AND status = 'ok' AND entity_count = 0 AND facet_count = 0`).get(agentId)?.c ?? 0;
|
|
107
|
+
return {
|
|
108
|
+
totalMessages: total,
|
|
109
|
+
indexedMessages: indexedRow?.c ?? 0,
|
|
110
|
+
failedMessages: failed,
|
|
111
|
+
zeroMentionMessages: zero,
|
|
112
|
+
highestIndexedMessageId: indexedRow?.hi ?? null,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return empty;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
// \u2500\u2500 Mention writes \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
120
|
+
/**
|
|
121
|
+
* Record entity/facet mentions for a single message. Always writes a
|
|
122
|
+
* row into `entity_bridge_message_index` even when there are zero mentions
|
|
123
|
+
* so that callers can distinguish "never indexed" from "indexed, no mentions".
|
|
124
|
+
*
|
|
125
|
+
* Wraps all writes in a single transaction. Returns whether any write
|
|
126
|
+
* occurred. On failure, records a 'failed' index row when possible and
|
|
127
|
+
* rethrows the underlying error so the caller can decide whether to surface.
|
|
128
|
+
*/
|
|
129
|
+
recordMentions(input) {
|
|
130
|
+
if (!this.tablesExist())
|
|
131
|
+
return { wrote: false, entityCount: 0, facetCount: 0 };
|
|
132
|
+
const source = input.source ?? 'live';
|
|
133
|
+
const ts = nowIso();
|
|
134
|
+
const begin = this.db.prepare('BEGIN');
|
|
135
|
+
const commit = this.db.prepare('COMMIT');
|
|
136
|
+
const rollback = this.db.prepare('ROLLBACK');
|
|
137
|
+
begin.run();
|
|
138
|
+
try {
|
|
139
|
+
this.ensureUpsertStmts();
|
|
140
|
+
const entityIds = new Map();
|
|
141
|
+
for (const ent of input.mentions.entities) {
|
|
142
|
+
const id = this.upsertEntityRow(input.agentId, ent, ts);
|
|
143
|
+
if (id != null) {
|
|
144
|
+
entityIds.set(ent.key, id);
|
|
145
|
+
this._stmtInsertEntityMention.run(input.messageId, id, input.agentId, input.threadRef ?? null, ent.surface, ent.start, ent.end, ts);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const facetIds = new Map();
|
|
149
|
+
for (const fac of input.mentions.facets) {
|
|
150
|
+
const id = this.upsertFacetRow(input.agentId, fac, ts);
|
|
151
|
+
if (id != null) {
|
|
152
|
+
facetIds.set(fac.key, id);
|
|
153
|
+
this._stmtInsertFacetMention.run(input.messageId, id, input.agentId, input.threadRef ?? null, fac.term, fac.start, fac.end, ts);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
this._stmtUpsertIndex.run(input.messageId, input.agentId, input.threadRef ?? null, entityIds.size, facetIds.size, ts, source, 'ok', null);
|
|
157
|
+
commit.run();
|
|
158
|
+
return { wrote: true, entityCount: entityIds.size, facetCount: facetIds.size };
|
|
159
|
+
}
|
|
160
|
+
catch (err) {
|
|
161
|
+
try {
|
|
162
|
+
rollback.run();
|
|
163
|
+
}
|
|
164
|
+
catch { /* swallow */ }
|
|
165
|
+
// Best-effort failure marker. Use a separate transaction so the
|
|
166
|
+
// failure record itself does not get rolled back.
|
|
167
|
+
try {
|
|
168
|
+
this.ensureUpsertStmts();
|
|
169
|
+
this._stmtUpsertIndex.run(input.messageId, input.agentId, input.threadRef ?? null, 0, 0, ts, source, 'failed', summarizeError(err));
|
|
170
|
+
this.recordIndexFailureEvent(input.messageId, input.agentId, source, err, ts);
|
|
171
|
+
}
|
|
172
|
+
catch { /* swallow */ }
|
|
173
|
+
throw err;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Emit a metadata-only failure marker without attempting any mention writes.
|
|
178
|
+
* Used by callers (e.g. message-store live indexing) when extraction itself
|
|
179
|
+
* threw before reaching the store, or to record an index attempt that never
|
|
180
|
+
* produced mentions due to disabled tables.
|
|
181
|
+
*/
|
|
182
|
+
recordIndexFailure(input) {
|
|
183
|
+
if (!this.tablesExist())
|
|
184
|
+
return false;
|
|
185
|
+
const ts = nowIso();
|
|
186
|
+
try {
|
|
187
|
+
this.ensureUpsertStmts();
|
|
188
|
+
const source = input.source ?? 'live';
|
|
189
|
+
this._stmtUpsertIndex.run(input.messageId, input.agentId, input.threadRef ?? null, 0, 0, ts, source, 'failed', summarizeError(input.error));
|
|
190
|
+
this.recordIndexFailureEvent(input.messageId, input.agentId, source, input.error, ts);
|
|
191
|
+
return true;
|
|
192
|
+
}
|
|
193
|
+
catch {
|
|
194
|
+
return false;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
recordIndexFailureEvent(messageId, agentId, source, error, ts) {
|
|
198
|
+
try {
|
|
199
|
+
this.db.prepare(`INSERT INTO index_events (agent_id, event_type, target_table, target_id, details, created_at)
|
|
200
|
+
VALUES (?, ?, ?, ?, ?, ?)`).run(agentId, 'entity_bridge_index_failed', 'messages', messageId, JSON.stringify({ source, error_class: errorClass(error) }), ts);
|
|
201
|
+
}
|
|
202
|
+
catch {
|
|
203
|
+
// Failure telemetry is best-effort; never let it affect message writes.
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// \u2500\u2500 Candidate / graph reads (used by PPR lane) \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
207
|
+
/**
|
|
208
|
+
* Look up the internal entity rows for a list of normalized entity keys.
|
|
209
|
+
* Missing keys are silently dropped.
|
|
210
|
+
*/
|
|
211
|
+
lookupEntityIds(agentId, keys) {
|
|
212
|
+
const out = new Map();
|
|
213
|
+
if (!this.tablesExist() || keys.length === 0)
|
|
214
|
+
return out;
|
|
215
|
+
const placeholders = keys.map(() => '?').join(',');
|
|
216
|
+
const rows = this.db.prepare(`SELECT id, entity_key FROM memory_entities WHERE agent_id = ? AND entity_key IN (${placeholders})`).all(agentId, ...keys);
|
|
217
|
+
for (const r of rows)
|
|
218
|
+
out.set(r.entity_key, r.id);
|
|
219
|
+
return out;
|
|
220
|
+
}
|
|
221
|
+
lookupFacetIds(agentId, keys) {
|
|
222
|
+
const out = new Map();
|
|
223
|
+
if (!this.tablesExist() || keys.length === 0)
|
|
224
|
+
return out;
|
|
225
|
+
const placeholders = keys.map(() => '?').join(',');
|
|
226
|
+
const rows = this.db.prepare(`SELECT id, facet_key FROM memory_facets WHERE agent_id = ? AND facet_key IN (${placeholders})`).all(agentId, ...keys);
|
|
227
|
+
for (const r of rows)
|
|
228
|
+
out.set(r.facet_key, r.id);
|
|
229
|
+
return out;
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Build a metadata-only graph snapshot for PPR. Bounded by node/edge caps.
|
|
233
|
+
*
|
|
234
|
+
* Algorithm:
|
|
235
|
+
* - Resolve seed entity/facet keys to internal ids.
|
|
236
|
+
* - For each seed, fetch up to `perSeedMessageLimit` mention rows.
|
|
237
|
+
* - Aggregate into message \u2194 entity \u2194 facet adjacency lists, capped.
|
|
238
|
+
*/
|
|
239
|
+
buildGraphSnapshot(opts) {
|
|
240
|
+
const empty = {
|
|
241
|
+
entityMessages: new Map(),
|
|
242
|
+
facetMessages: new Map(),
|
|
243
|
+
messageEntities: new Map(),
|
|
244
|
+
messageFacets: new Map(),
|
|
245
|
+
diagnostics: { nodeCount: 0, edgeCount: 0, seedExpanded: 0, nodesCapped: false, edgesCapped: false },
|
|
246
|
+
};
|
|
247
|
+
if (!this.tablesExist())
|
|
248
|
+
return empty;
|
|
249
|
+
if (opts.seedEntityKeys.length === 0 && opts.seedFacetKeys.length === 0)
|
|
250
|
+
return empty;
|
|
251
|
+
const entityIds = this.lookupEntityIds(opts.agentId, opts.seedEntityKeys);
|
|
252
|
+
const facetIds = this.lookupFacetIds(opts.agentId, opts.seedFacetKeys);
|
|
253
|
+
if (entityIds.size === 0 && facetIds.size === 0)
|
|
254
|
+
return empty;
|
|
255
|
+
const entityMessages = new Map();
|
|
256
|
+
const facetMessages = new Map();
|
|
257
|
+
const messageIds = new Set();
|
|
258
|
+
let edgeCount = 0;
|
|
259
|
+
let nodesCapped = false;
|
|
260
|
+
let edgesCapped = false;
|
|
261
|
+
let seedExpanded = 0;
|
|
262
|
+
const perSeed = Math.max(1, Math.min(2000, opts.perSeedMessageLimit));
|
|
263
|
+
// 1) Pull message ids per seed entity.
|
|
264
|
+
for (const [key, id] of entityIds) {
|
|
265
|
+
if (edgeCount >= opts.maxEdges) {
|
|
266
|
+
edgesCapped = true;
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
const rows = this.db.prepare(`SELECT message_id FROM message_entity_mentions
|
|
270
|
+
WHERE agent_id = ? AND entity_id = ?
|
|
271
|
+
ORDER BY id DESC LIMIT ?`).all(opts.agentId, id, perSeed);
|
|
272
|
+
const ids = rows.map(r => r.message_id);
|
|
273
|
+
entityMessages.set(key, ids);
|
|
274
|
+
seedExpanded++;
|
|
275
|
+
for (const mid of ids) {
|
|
276
|
+
messageIds.add(mid);
|
|
277
|
+
edgeCount++;
|
|
278
|
+
if (messageIds.size >= opts.maxNodes) {
|
|
279
|
+
nodesCapped = true;
|
|
280
|
+
break;
|
|
281
|
+
}
|
|
282
|
+
if (edgeCount >= opts.maxEdges) {
|
|
283
|
+
edgesCapped = true;
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
if (nodesCapped || edgesCapped)
|
|
288
|
+
break;
|
|
289
|
+
}
|
|
290
|
+
// 2) Pull message ids per seed facet.
|
|
291
|
+
if (!nodesCapped && !edgesCapped) {
|
|
292
|
+
for (const [key, id] of facetIds) {
|
|
293
|
+
if (edgeCount >= opts.maxEdges) {
|
|
294
|
+
edgesCapped = true;
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
const rows = this.db.prepare(`SELECT message_id FROM message_facet_mentions
|
|
298
|
+
WHERE agent_id = ? AND facet_id = ?
|
|
299
|
+
ORDER BY id DESC LIMIT ?`).all(opts.agentId, id, perSeed);
|
|
300
|
+
const ids = rows.map(r => r.message_id);
|
|
301
|
+
facetMessages.set(key, ids);
|
|
302
|
+
seedExpanded++;
|
|
303
|
+
for (const mid of ids) {
|
|
304
|
+
messageIds.add(mid);
|
|
305
|
+
edgeCount++;
|
|
306
|
+
if (messageIds.size >= opts.maxNodes) {
|
|
307
|
+
nodesCapped = true;
|
|
308
|
+
break;
|
|
309
|
+
}
|
|
310
|
+
if (edgeCount >= opts.maxEdges) {
|
|
311
|
+
edgesCapped = true;
|
|
312
|
+
break;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
if (nodesCapped || edgesCapped)
|
|
316
|
+
break;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// 3) For all messages collected, fetch all entity/facet adjacencies.
|
|
320
|
+
const messageEntities = new Map();
|
|
321
|
+
const messageFacets = new Map();
|
|
322
|
+
if (messageIds.size > 0) {
|
|
323
|
+
const idList = [...messageIds];
|
|
324
|
+
const placeholders = idList.map(() => '?').join(',');
|
|
325
|
+
const entRows = this.db.prepare(`SELECT m.message_id, e.entity_key
|
|
326
|
+
FROM message_entity_mentions m
|
|
327
|
+
JOIN memory_entities e ON e.id = m.entity_id
|
|
328
|
+
WHERE m.agent_id = ? AND m.message_id IN (${placeholders})`).all(opts.agentId, ...idList);
|
|
329
|
+
for (const r of entRows) {
|
|
330
|
+
if (edgeCount >= opts.maxEdges) {
|
|
331
|
+
edgesCapped = true;
|
|
332
|
+
break;
|
|
333
|
+
}
|
|
334
|
+
const list = messageEntities.get(r.message_id) ?? [];
|
|
335
|
+
if (!list.includes(r.entity_key)) {
|
|
336
|
+
list.push(r.entity_key);
|
|
337
|
+
edgeCount++;
|
|
338
|
+
}
|
|
339
|
+
messageEntities.set(r.message_id, list);
|
|
340
|
+
}
|
|
341
|
+
if (!edgesCapped) {
|
|
342
|
+
const facRows = this.db.prepare(`SELECT m.message_id, f.facet_key
|
|
343
|
+
FROM message_facet_mentions m
|
|
344
|
+
JOIN memory_facets f ON f.id = m.facet_id
|
|
345
|
+
WHERE m.agent_id = ? AND m.message_id IN (${placeholders})`).all(opts.agentId, ...idList);
|
|
346
|
+
for (const r of facRows) {
|
|
347
|
+
if (edgeCount >= opts.maxEdges) {
|
|
348
|
+
edgesCapped = true;
|
|
349
|
+
break;
|
|
350
|
+
}
|
|
351
|
+
const list = messageFacets.get(r.message_id) ?? [];
|
|
352
|
+
if (!list.includes(r.facet_key)) {
|
|
353
|
+
list.push(r.facet_key);
|
|
354
|
+
edgeCount++;
|
|
355
|
+
}
|
|
356
|
+
messageFacets.set(r.message_id, list);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
return {
|
|
361
|
+
entityMessages,
|
|
362
|
+
facetMessages,
|
|
363
|
+
messageEntities,
|
|
364
|
+
messageFacets,
|
|
365
|
+
diagnostics: {
|
|
366
|
+
nodeCount: messageIds.size + entityMessages.size + facetMessages.size,
|
|
367
|
+
edgeCount,
|
|
368
|
+
seedExpanded,
|
|
369
|
+
nodesCapped,
|
|
370
|
+
edgesCapped,
|
|
371
|
+
},
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Resolve a list of message ids into candidate rows joined back to
|
|
376
|
+
* `messages` for compose-lane consumption. Returns the matched entity/facet
|
|
377
|
+
* keys per message as a side-channel diagnostic.
|
|
378
|
+
*
|
|
379
|
+
* Caller is expected to gate further rendering on whether a message's
|
|
380
|
+
* full text should be hydrated.
|
|
381
|
+
*/
|
|
382
|
+
fetchCandidates(opts) {
|
|
383
|
+
if (!this.tablesExist() || opts.messageIds.length === 0)
|
|
384
|
+
return [];
|
|
385
|
+
const placeholders = opts.messageIds.map(() => '?').join(',');
|
|
386
|
+
const rows = this.db.prepare(`SELECT id, conversation_id FROM messages WHERE agent_id = ? AND id IN (${placeholders})`).all(opts.agentId, ...opts.messageIds);
|
|
387
|
+
const meta = new Map();
|
|
388
|
+
for (const r of rows)
|
|
389
|
+
meta.set(r.id, { threadRef: r.conversation_id });
|
|
390
|
+
const entByMsg = new Map();
|
|
391
|
+
const facByMsg = new Map();
|
|
392
|
+
const entRows = this.db.prepare(`SELECT m.message_id, e.entity_key
|
|
393
|
+
FROM message_entity_mentions m JOIN memory_entities e ON e.id = m.entity_id
|
|
394
|
+
WHERE m.agent_id = ? AND m.message_id IN (${placeholders})`).all(opts.agentId, ...opts.messageIds);
|
|
395
|
+
for (const r of entRows) {
|
|
396
|
+
const list = entByMsg.get(r.message_id) ?? [];
|
|
397
|
+
if (!list.includes(r.entity_key))
|
|
398
|
+
list.push(r.entity_key);
|
|
399
|
+
entByMsg.set(r.message_id, list);
|
|
400
|
+
}
|
|
401
|
+
const facRows = this.db.prepare(`SELECT m.message_id, f.facet_key
|
|
402
|
+
FROM message_facet_mentions m JOIN memory_facets f ON f.id = m.facet_id
|
|
403
|
+
WHERE m.agent_id = ? AND m.message_id IN (${placeholders})`).all(opts.agentId, ...opts.messageIds);
|
|
404
|
+
for (const r of facRows) {
|
|
405
|
+
const list = facByMsg.get(r.message_id) ?? [];
|
|
406
|
+
if (!list.includes(r.facet_key))
|
|
407
|
+
list.push(r.facet_key);
|
|
408
|
+
facByMsg.set(r.message_id, list);
|
|
409
|
+
}
|
|
410
|
+
const out = [];
|
|
411
|
+
for (const id of opts.messageIds) {
|
|
412
|
+
const m = meta.get(id);
|
|
413
|
+
if (!m)
|
|
414
|
+
continue;
|
|
415
|
+
out.push({
|
|
416
|
+
messageId: id,
|
|
417
|
+
threadRef: m.threadRef,
|
|
418
|
+
matchedEntities: entByMsg.get(id) ?? [],
|
|
419
|
+
matchedFacets: facByMsg.get(id) ?? [],
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
return out;
|
|
423
|
+
}
|
|
424
|
+
// \u2500\u2500 Internals \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
425
|
+
ensureUpsertStmts() {
|
|
426
|
+
if (this._stmtUpsertEntity)
|
|
427
|
+
return;
|
|
428
|
+
this._stmtUpsertEntity = this.db.prepare(`INSERT INTO memory_entities (agent_id, entity_key, display_name, first_seen_at, last_seen_at, mention_count)
|
|
429
|
+
VALUES (?, ?, ?, ?, ?, 1)
|
|
430
|
+
ON CONFLICT(agent_id, entity_key) DO UPDATE SET
|
|
431
|
+
display_name = COALESCE(memory_entities.display_name, excluded.display_name),
|
|
432
|
+
last_seen_at = excluded.last_seen_at,
|
|
433
|
+
mention_count = memory_entities.mention_count + 1`);
|
|
434
|
+
this._stmtTouchEntity = this.db.prepare('SELECT id FROM memory_entities WHERE agent_id = ? AND entity_key = ?');
|
|
435
|
+
this._stmtUpsertFacet = this.db.prepare(`INSERT INTO memory_facets (agent_id, facet_key, first_seen_at, last_seen_at, mention_count)
|
|
436
|
+
VALUES (?, ?, ?, ?, 1)
|
|
437
|
+
ON CONFLICT(agent_id, facet_key) DO UPDATE SET
|
|
438
|
+
last_seen_at = excluded.last_seen_at,
|
|
439
|
+
mention_count = memory_facets.mention_count + 1`);
|
|
440
|
+
this._stmtTouchFacet = this.db.prepare('SELECT id FROM memory_facets WHERE agent_id = ? AND facet_key = ?');
|
|
441
|
+
this._stmtInsertEntityMention = this.db.prepare(`INSERT INTO message_entity_mentions
|
|
442
|
+
(message_id, entity_id, agent_id, conversation_id, match_term, start_offset, end_offset, created_at)
|
|
443
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
444
|
+
this._stmtInsertFacetMention = this.db.prepare(`INSERT INTO message_facet_mentions
|
|
445
|
+
(message_id, facet_id, agent_id, conversation_id, match_term, start_offset, end_offset, created_at)
|
|
446
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
447
|
+
this._stmtUpsertIndex = this.db.prepare(`INSERT INTO entity_bridge_message_index
|
|
448
|
+
(message_id, agent_id, conversation_id, entity_count, facet_count, indexed_at, source, status, last_error)
|
|
449
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
450
|
+
ON CONFLICT(message_id) DO UPDATE SET
|
|
451
|
+
entity_count = excluded.entity_count,
|
|
452
|
+
facet_count = excluded.facet_count,
|
|
453
|
+
indexed_at = excluded.indexed_at,
|
|
454
|
+
source = excluded.source,
|
|
455
|
+
status = excluded.status,
|
|
456
|
+
last_error = excluded.last_error`);
|
|
457
|
+
}
|
|
458
|
+
upsertEntityRow(agentId, mention, ts) {
|
|
459
|
+
this._stmtUpsertEntity.run(agentId, mention.key, mention.surface, ts, ts);
|
|
460
|
+
const row = this._stmtTouchEntity.get(agentId, mention.key);
|
|
461
|
+
return row?.id ?? null;
|
|
462
|
+
}
|
|
463
|
+
upsertFacetRow(agentId, mention, ts) {
|
|
464
|
+
this._stmtUpsertFacet.run(agentId, mention.key, ts, ts);
|
|
465
|
+
const row = this._stmtTouchFacet.get(agentId, mention.key);
|
|
466
|
+
return row?.id ?? null;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
function errorClass(err) {
|
|
470
|
+
if (err instanceof Error)
|
|
471
|
+
return err.name || 'Error';
|
|
472
|
+
return typeof err || 'unknown';
|
|
473
|
+
}
|
|
474
|
+
function summarizeError(err) {
|
|
475
|
+
if (!err)
|
|
476
|
+
return 'unknown';
|
|
477
|
+
if (err instanceof Error) {
|
|
478
|
+
// Cap length so we never bloat the index row.
|
|
479
|
+
return (err.message || err.name || 'error').slice(0, 200);
|
|
480
|
+
}
|
|
481
|
+
try {
|
|
482
|
+
return String(err).slice(0, 200);
|
|
483
|
+
}
|
|
484
|
+
catch {
|
|
485
|
+
return 'error';
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
//# sourceMappingURL=entity-bridge-store.js.map
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* entity-extractor.ts — Entity and facet tagger
|
|
3
|
+
*
|
|
4
|
+
* Sprint A: compose-time annotation only.
|
|
5
|
+
* Sprint B: also used at ingest time. Adds shared key normalization
|
|
6
|
+
* (`normalizeEntityKey`, `normalizeFacetKey`) and an ingest-friendly
|
|
7
|
+
* `extractEntityFacetMentions(text)` returning cheap (start,end) offsets
|
|
8
|
+
* suitable for storing in `message_entity_mentions` / `message_facet_mentions`.
|
|
9
|
+
*
|
|
10
|
+
* Exported symbols:
|
|
11
|
+
* extractEntitiesFromText(text, knownEntities?) → ExtractedTextEntities
|
|
12
|
+
* annotateRecallGroups(...) → AnnotatedGroup[]
|
|
13
|
+
* formatStructuredHandoffBlock(...) → structured block
|
|
14
|
+
* buildStructuredHandoffInstruction(...) → string
|
|
15
|
+
* normalizeEntityKey(token) → string (Sprint B)
|
|
16
|
+
* normalizeFacetKey(token) → string (Sprint B)
|
|
17
|
+
* extractEntityFacetMentions(text) → EntityFacetMentions (Sprint B)
|
|
18
|
+
*/
|
|
19
|
+
export interface EntityMention {
|
|
20
|
+
/** Normalized entity key (lowercase, trimmed). */
|
|
21
|
+
key: string;
|
|
22
|
+
/** Original surface form as it appears in the text. */
|
|
23
|
+
surface: string;
|
|
24
|
+
/** Inclusive start offset in the source string. */
|
|
25
|
+
start: number;
|
|
26
|
+
/** Exclusive end offset in the source string. */
|
|
27
|
+
end: number;
|
|
28
|
+
}
|
|
29
|
+
export interface FacetMention {
|
|
30
|
+
/** Facet group key (e.g. 'job', 'death', 'venue'). */
|
|
31
|
+
key: string;
|
|
32
|
+
/** Raw facet term that triggered the match. */
|
|
33
|
+
term: string;
|
|
34
|
+
/** Inclusive start offset in the source string. */
|
|
35
|
+
start: number;
|
|
36
|
+
/** Exclusive end offset in the source string. */
|
|
37
|
+
end: number;
|
|
38
|
+
}
|
|
39
|
+
export interface EntityFacetMentions {
|
|
40
|
+
entities: EntityMention[];
|
|
41
|
+
facets: FacetMention[];
|
|
42
|
+
}
|
|
43
|
+
export interface ExtractedTextEntities {
|
|
44
|
+
/** Normalized entity tokens found in the text */
|
|
45
|
+
entities: string[];
|
|
46
|
+
/** Facet group names matched in the text */
|
|
47
|
+
facets: string[];
|
|
48
|
+
/** Raw facet terms matched in the text */
|
|
49
|
+
facetTerms: string[];
|
|
50
|
+
}
|
|
51
|
+
export interface AnnotatedGroup {
|
|
52
|
+
/** Original group identifier (e.g. conversation_id / conversation id) */
|
|
53
|
+
groupId: string;
|
|
54
|
+
/** Original group content lines */
|
|
55
|
+
lines: string[];
|
|
56
|
+
/** Entity tokens from query that appear in this group's content */
|
|
57
|
+
matchedEntities: string[];
|
|
58
|
+
/** Facet group names from query that appear in this group's content */
|
|
59
|
+
matchedFacets: string[];
|
|
60
|
+
/** Raw facet terms matched in this group */
|
|
61
|
+
matchedFacetTerms: string[];
|
|
62
|
+
/** True when this group contains at least one query entity or facet */
|
|
63
|
+
isRelevant: boolean;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Extract entity and facet tokens from a block of text content.
|
|
67
|
+
* When `knownEntities` is provided, the scan is biased toward those tokens
|
|
68
|
+
* (useful for narrowing to query-relevant entities during structured handoff).
|
|
69
|
+
*/
|
|
70
|
+
export declare function extractEntitiesFromText(text: string, knownEntities?: string[]): ExtractedTextEntities;
|
|
71
|
+
/**
|
|
72
|
+
* Parse raw recall content (already grouped by conversation_id) into annotated groups.
|
|
73
|
+
*
|
|
74
|
+
* Input is the `recall.content` string from `buildQueryMessageRecall()`,
|
|
75
|
+
* which uses `### Raw transcript group {id}` headers.
|
|
76
|
+
*
|
|
77
|
+
* For each group, annotates which query entities and facets appear in the
|
|
78
|
+
* group's content lines. This is the core of the Sprint A structured handoff.
|
|
79
|
+
*/
|
|
80
|
+
export declare function annotateRecallGroups(recallContent: string, queryEntities: string[], queryFacets: string[]): AnnotatedGroup[];
|
|
81
|
+
/**
|
|
82
|
+
* Format annotated groups into structured evidence blocks for multi-hop handoff.
|
|
83
|
+
*
|
|
84
|
+
* Each group gets a header that names which query entities/facets it contains,
|
|
85
|
+
* so the reader can quickly identify which groups are evidence for each hop.
|
|
86
|
+
*
|
|
87
|
+
* Token cost is minimal relative to the raw content: only the header changes.
|
|
88
|
+
* No content is dropped; existing budget accounting from buildQueryMessageRecall
|
|
89
|
+
* applies before this formatter runs.
|
|
90
|
+
*/
|
|
91
|
+
export declare function formatStructuredHandoffBlock(groups: AnnotatedGroup[], queryEntities: string[], queryFacets: string[]): {
|
|
92
|
+
content: string;
|
|
93
|
+
entityGroupCount: number;
|
|
94
|
+
facetGroupCount: number;
|
|
95
|
+
};
|
|
96
|
+
/**
|
|
97
|
+
* Normalize an entity surface form into a canonical bridge key.
|
|
98
|
+
*
|
|
99
|
+
* Strategy: lowercase, collapse whitespace, strip leading/trailing punctuation,
|
|
100
|
+
* preserve internal alphanumerics + a small set of joiners (-, _, .).
|
|
101
|
+
* The result is the join key used by `memory_entities.entity_key`.
|
|
102
|
+
*/
|
|
103
|
+
export declare function normalizeEntityKey(token: string): string;
|
|
104
|
+
/**
|
|
105
|
+
* Normalize a facet group name into a canonical bridge key.
|
|
106
|
+
* Facet keys are already lowercase identifiers in QUESTION_SHAPE_FACETS,
|
|
107
|
+
* but callers may pass raw terms. We snap raw terms to their facet group.
|
|
108
|
+
*/
|
|
109
|
+
export declare function normalizeFacetKey(token: string): string;
|
|
110
|
+
/**
|
|
111
|
+
* Extract entity and facet mentions from a text block, with cheap (start,end)
|
|
112
|
+
* offsets, for ingest indexing into the entity/facet bridge tables.
|
|
113
|
+
*
|
|
114
|
+
* Designed to be cheap and deterministic; never calls a model, never reads
|
|
115
|
+
* a DB. Caller decides whether to write the resulting mentions.
|
|
116
|
+
*/
|
|
117
|
+
export declare function extractEntityFacetMentions(text: string): EntityFacetMentions;
|
|
118
|
+
/**
|
|
119
|
+
* Build the structured handoff instruction preamble.
|
|
120
|
+
* Replaces the current flat multi-hop instruction string when structured
|
|
121
|
+
* handoff is active.
|
|
122
|
+
*/
|
|
123
|
+
export declare function buildStructuredHandoffInstruction(entities: string[], facets: string[]): string;
|
|
124
|
+
//# sourceMappingURL=entity-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"entity-extractor.d.ts","sourceRoot":"","sources":["../src/entity-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AASH,MAAM,WAAW,aAAa;IAC5B,kDAAkD;IAClD,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,OAAO,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,YAAY;IAC3B,sDAAsD;IACtD,GAAG,EAAE,MAAM,CAAC;IACZ,+CAA+C;IAC/C,IAAI,EAAE,MAAM,CAAC;IACb,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IACd,iDAAiD;IACjD,GAAG,EAAE,MAAM,CAAC;CACb;AAED,MAAM,WAAW,mBAAmB;IAClC,QAAQ,EAAE,aAAa,EAAE,CAAC;IAC1B,MAAM,EAAE,YAAY,EAAE,CAAC;CACxB;AAED,MAAM,WAAW,qBAAqB;IACpC,iDAAiD;IACjD,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,4CAA4C;IAC5C,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,0CAA0C;IAC1C,UAAU,EAAE,MAAM,EAAE,CAAC;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,yEAAyE;IACzE,OAAO,EAAE,MAAM,CAAC;IAChB,mCAAmC;IACnC,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,mEAAmE;IACnE,eAAe,EAAE,MAAM,EAAE,CAAC;IAC1B,uEAAuE;IACvE,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,4CAA4C;IAC5C,iBAAiB,EAAE,MAAM,EAAE,CAAC;IAC5B,uEAAuE;IACvE,UAAU,EAAE,OAAO,CAAC;CACrB;AA6BD;;;;GAIG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,aAAa,CAAC,EAAE,MAAM,EAAE,GACvB,qBAAqB,CAwDvB;AAID;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAClC,aAAa,EAAE,MAAM,EACrB,aAAa,EAAE,MAAM,EAAE,EACvB,WAAW,EAAE,MAAM,EAAE,GACpB,cAAc,EAAE,CAwDlB;AAID;;;;;;;;;GASG;AACH,wBAAgB,4BAA4B,CAC1C,MAAM,EAAE,cAAc,EAAE,EACxB,aAAa,EAAE,MAAM,EAAE,EACvB,WAAW,EAAE,MAAM,EAAE,GACpB;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAC;IAAC,eAAe,EAAE,MAAM,CAAA;CAAE,CAuCxE;AAID;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAOxD;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,CAavD;AA6BD;;;;;;GAMG;AACH,wBAAgB,0BAA0B,CAAC,IAAI,EAAE,MAAM,GAAG,mBAAmB,CA4E5E;AAED;;;;GAIG;AACH,wBAAgB,iCAAiC,CAC/C,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,EAAE,MAAM,EAAE,GACf,MAAM,CAsBR"}
|