@remnic/core 9.3.562 → 9.3.564
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-cli.js +40 -39
- package/dist/access-cli.js.map +1 -1
- package/dist/access-http.js +16 -16
- package/dist/access-mcp.js +13 -13
- package/dist/access-schema.js +3 -3
- package/dist/access-service.js +11 -11
- package/dist/active-recall.js +1 -1
- package/dist/adapters/index.js +4 -4
- package/dist/adapters/registry.js +2 -2
- package/dist/briefing.js +4 -4
- package/dist/causal-consolidation.js +5 -5
- package/dist/{chunk-I2K6KCVC.js → chunk-2FHLI4U6.js} +49 -49
- package/dist/chunk-3ONXXHQO.js +57 -0
- package/dist/chunk-3ONXXHQO.js.map +1 -0
- package/dist/{chunk-5GX5MUQ2.js → chunk-574MU2Y3.js} +3 -3
- package/dist/{chunk-65OLPXBU.js → chunk-5WB4C7KM.js} +6 -6
- package/dist/chunk-6PTSXBPE.js +483 -0
- package/dist/chunk-6PTSXBPE.js.map +1 -0
- package/dist/{chunk-Z56KAZQL.js → chunk-74VA26CT.js} +2 -2
- package/dist/{chunk-CC2ESOOG.js → chunk-7X7TBJRX.js} +2 -2
- package/dist/{chunk-O4M4WH6V.js → chunk-ARY5OOLG.js} +2 -2
- package/dist/{chunk-JBPKEARU.js → chunk-AU7Q3LSC.js} +4 -4
- package/dist/{chunk-PM3QHTFT.js → chunk-CF3ZF2YU.js} +3 -3
- package/dist/{chunk-SI3QCHWF.js → chunk-DARLGSFX.js} +5 -5
- package/dist/chunk-EWLQPEO6.js +308 -0
- package/dist/chunk-EWLQPEO6.js.map +1 -0
- package/dist/{chunk-FVCZINOF.js → chunk-FHBEL473.js} +2 -2
- package/dist/{chunk-7Q3RCKAQ.js → chunk-FXKPZ3H6.js} +2 -2
- package/dist/{chunk-5WLYNZPC.js → chunk-GBXGCFRH.js} +2 -2
- package/dist/{chunk-ILJXM3FV.js → chunk-HQO5EBUC.js} +10 -10
- package/dist/{chunk-FK556DDH.js → chunk-I4UNL747.js} +4 -4
- package/dist/{chunk-RLPIT4YI.js → chunk-IOTTZLFF.js} +38 -38
- package/dist/{chunk-TVZ6LKKS.js → chunk-IRFF6LSF.js} +8 -8
- package/dist/{chunk-M5T4Q2ZU.js → chunk-KGK2QKWL.js} +1 -1
- package/dist/chunk-KGK2QKWL.js.map +1 -0
- package/dist/{chunk-IPLYGWQF.js → chunk-KQAFEZQX.js} +5 -5
- package/dist/chunk-M46RYSMW.js +597 -0
- package/dist/chunk-M46RYSMW.js.map +1 -0
- package/dist/{chunk-KXULCVOC.js → chunk-M6I5Z4SR.js} +4 -2
- package/dist/chunk-M6I5Z4SR.js.map +1 -0
- package/dist/{chunk-JFN6K74Q.js → chunk-MQEIWDYW.js} +2 -2
- package/dist/{chunk-7H6CFEBJ.js → chunk-NZPF2SYV.js} +8 -1
- package/dist/{chunk-7H6CFEBJ.js.map → chunk-NZPF2SYV.js.map} +1 -1
- package/dist/{chunk-SML26KED.js → chunk-OB6353F7.js} +16 -12
- package/dist/chunk-OB6353F7.js.map +1 -0
- package/dist/{chunk-SOTR74FK.js → chunk-OPYFD6PD.js} +2 -2
- package/dist/{chunk-3C5RPJAX.js → chunk-OXJBNGBK.js} +2 -2
- package/dist/{chunk-BD5LHQWD.js → chunk-PPPZY2EU.js} +2 -2
- package/dist/{chunk-25BY3HHZ.js → chunk-SUTSSOYU.js} +2 -2
- package/dist/{chunk-KS7WO6EQ.js → chunk-VFB2G5YL.js} +20 -20
- package/dist/{chunk-BUUYY2H2.js → chunk-WP5OWVLZ.js} +4 -4
- package/dist/{chunk-6URPAY2D.js → chunk-XCAZF7KQ.js} +207 -53
- package/dist/chunk-XCAZF7KQ.js.map +1 -0
- package/dist/{chunk-S53PAX2V.js → chunk-XM7BYXT7.js} +2 -2
- package/dist/{chunk-FADZBOR4.js → chunk-XRWTAEZM.js} +2 -2
- package/dist/{chunk-E5OECWZ5.js → chunk-XT7XVA53.js} +2 -2
- package/dist/{chunk-R3PS27B4.js → chunk-Z4R6RI2N.js} +2 -2
- package/dist/cli.js +44 -43
- package/dist/compounding/engine.js +4 -4
- package/dist/config.js +1 -1
- package/dist/connectors/codex-materialize-runner.js +4 -4
- package/dist/connectors/index.js +4 -4
- package/dist/embedding-fallback.d.ts +12 -1
- package/dist/embedding-fallback.js +4 -1
- package/dist/entity-retrieval.js +4 -4
- package/dist/host-embedding-provider.d.ts +21 -0
- package/dist/host-embedding-provider.js +14 -0
- package/dist/host-embedding-provider.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +71 -63
- package/dist/index.js.map +1 -1
- package/dist/lcm/index.js +3 -3
- package/dist/maintenance/memory-governance.js +4 -4
- package/dist/maintenance/rebuild-memory-lifecycle-ledger.js +4 -4
- package/dist/maintenance/rebuild-memory-projection.js +5 -5
- package/dist/namespaces/migrate.js +14 -13
- package/dist/namespaces/search.js +9 -8
- package/dist/namespaces/storage.js +4 -4
- package/dist/operator-toolkit.js +17 -16
- package/dist/orchestrator.js +32 -31
- package/dist/recall-explain-renderer.js +3 -3
- package/dist/recall-xray-cli.js +4 -4
- package/dist/recall-xray-renderer.js +3 -3
- package/dist/recall-xray.js +2 -2
- package/dist/resume-bundles.js +2 -2
- package/dist/search/embed-helper.d.ts +48 -4
- package/dist/search/embed-helper.js +2 -1
- package/dist/search/factory.js +8 -7
- package/dist/search/index.d.ts +1 -0
- package/dist/search/index.js +12 -11
- package/dist/search/lancedb-backend.d.ts +11 -0
- package/dist/search/lancedb-backend.js +2 -2
- package/dist/search/meilisearch-backend.js +2 -2
- package/dist/search/orama-backend.d.ts +16 -0
- package/dist/search/orama-backend.js +2 -2
- package/dist/semantic-consolidation.js +5 -5
- package/dist/semantic-rule-promotion.js +4 -4
- package/dist/semantic-rule-verifier.js +4 -4
- package/dist/storage.js +3 -3
- package/dist/transfer/autodetect.js +1 -1
- package/dist/transfer/backup.js +1 -1
- package/dist/transfer/capsule-export.js +2 -2
- package/dist/transfer/types.d.ts +6 -6
- package/dist/types.d.ts +17 -0
- package/dist/types.js +1 -1
- package/dist/verified-recall.js +4 -4
- package/package.json +11 -1
- package/src/config.ts +18 -0
- package/src/embedding-fallback.ts +293 -61
- package/src/host-embedding-provider.ts +84 -0
- package/src/index.ts +7 -0
- package/src/namespaces/search.ts +9 -1
- package/src/qmd.test.ts +28 -0
- package/src/search/embed-helper.ts +319 -51
- package/src/search/factory.ts +6 -2
- package/src/search/lancedb-backend.ts +297 -41
- package/src/search/orama-backend.ts +418 -47
- package/src/types.ts +17 -0
- package/dist/chunk-6URPAY2D.js.map +0 -1
- package/dist/chunk-FUC4LZMD.js +0 -301
- package/dist/chunk-FUC4LZMD.js.map +0 -1
- package/dist/chunk-KXULCVOC.js.map +0 -1
- package/dist/chunk-M5T4Q2ZU.js.map +0 -1
- package/dist/chunk-ONPLNAPX.js +0 -133
- package/dist/chunk-ONPLNAPX.js.map +0 -1
- package/dist/chunk-QVJ4NWL2.js +0 -335
- package/dist/chunk-QVJ4NWL2.js.map +0 -1
- package/dist/chunk-SML26KED.js.map +0 -1
- /package/dist/{chunk-I2K6KCVC.js.map → chunk-2FHLI4U6.js.map} +0 -0
- /package/dist/{chunk-5GX5MUQ2.js.map → chunk-574MU2Y3.js.map} +0 -0
- /package/dist/{chunk-65OLPXBU.js.map → chunk-5WB4C7KM.js.map} +0 -0
- /package/dist/{chunk-Z56KAZQL.js.map → chunk-74VA26CT.js.map} +0 -0
- /package/dist/{chunk-CC2ESOOG.js.map → chunk-7X7TBJRX.js.map} +0 -0
- /package/dist/{chunk-O4M4WH6V.js.map → chunk-ARY5OOLG.js.map} +0 -0
- /package/dist/{chunk-JBPKEARU.js.map → chunk-AU7Q3LSC.js.map} +0 -0
- /package/dist/{chunk-PM3QHTFT.js.map → chunk-CF3ZF2YU.js.map} +0 -0
- /package/dist/{chunk-SI3QCHWF.js.map → chunk-DARLGSFX.js.map} +0 -0
- /package/dist/{chunk-FVCZINOF.js.map → chunk-FHBEL473.js.map} +0 -0
- /package/dist/{chunk-7Q3RCKAQ.js.map → chunk-FXKPZ3H6.js.map} +0 -0
- /package/dist/{chunk-5WLYNZPC.js.map → chunk-GBXGCFRH.js.map} +0 -0
- /package/dist/{chunk-ILJXM3FV.js.map → chunk-HQO5EBUC.js.map} +0 -0
- /package/dist/{chunk-FK556DDH.js.map → chunk-I4UNL747.js.map} +0 -0
- /package/dist/{chunk-RLPIT4YI.js.map → chunk-IOTTZLFF.js.map} +0 -0
- /package/dist/{chunk-TVZ6LKKS.js.map → chunk-IRFF6LSF.js.map} +0 -0
- /package/dist/{chunk-IPLYGWQF.js.map → chunk-KQAFEZQX.js.map} +0 -0
- /package/dist/{chunk-JFN6K74Q.js.map → chunk-MQEIWDYW.js.map} +0 -0
- /package/dist/{chunk-SOTR74FK.js.map → chunk-OPYFD6PD.js.map} +0 -0
- /package/dist/{chunk-3C5RPJAX.js.map → chunk-OXJBNGBK.js.map} +0 -0
- /package/dist/{chunk-BD5LHQWD.js.map → chunk-PPPZY2EU.js.map} +0 -0
- /package/dist/{chunk-25BY3HHZ.js.map → chunk-SUTSSOYU.js.map} +0 -0
- /package/dist/{chunk-KS7WO6EQ.js.map → chunk-VFB2G5YL.js.map} +0 -0
- /package/dist/{chunk-BUUYY2H2.js.map → chunk-WP5OWVLZ.js.map} +0 -0
- /package/dist/{chunk-S53PAX2V.js.map → chunk-XM7BYXT7.js.map} +0 -0
- /package/dist/{chunk-FADZBOR4.js.map → chunk-XRWTAEZM.js.map} +0 -0
- /package/dist/{chunk-E5OECWZ5.js.map → chunk-XT7XVA53.js.map} +0 -0
- /package/dist/{chunk-R3PS27B4.js.map → chunk-Z4R6RI2N.js.map} +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
|
-
import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
|
|
2
|
+
import { mkdir, readdir, readFile, rename, rm, writeFile } from "node:fs/promises";
|
|
3
3
|
import { log } from "../logger.js";
|
|
4
4
|
import type { SearchBackend, SearchExecutionOptions, SearchQueryOptions, SearchResult } from "./port.js";
|
|
5
|
-
import type { EmbedHelper } from "./embed-helper.js";
|
|
5
|
+
import type { EmbedHelper, EmbedProviderIdentity, EmbedWithProviderResult } from "./embed-helper.js";
|
|
6
6
|
import { scanMemoryDir } from "./document-scanner.js";
|
|
7
7
|
import { isSearchAborted, throwIfSearchAborted } from "./abort.js";
|
|
8
8
|
|
|
@@ -54,6 +54,10 @@ export class OramaBackend implements SearchBackend {
|
|
|
54
54
|
private db: any = null;
|
|
55
55
|
private oramaModule: any = null;
|
|
56
56
|
private persistModule: any = null;
|
|
57
|
+
private readonly vectorProviderCompatibility = new WeakMap<
|
|
58
|
+
object,
|
|
59
|
+
{ providerIdentity: EmbedProviderIdentity; compatible: boolean }
|
|
60
|
+
>();
|
|
57
61
|
|
|
58
62
|
constructor(opts: OramaBackendOptions) {
|
|
59
63
|
this.dbPath = opts.dbPath;
|
|
@@ -149,26 +153,44 @@ export class OramaBackend implements SearchBackend {
|
|
|
149
153
|
const db = await this.ensureDbForCollection(collection);
|
|
150
154
|
if (isSearchAborted(execution)) return;
|
|
151
155
|
if (!db) return;
|
|
152
|
-
const { search: oramaSearch, insert, remove, count } = this.oramaModule;
|
|
156
|
+
const { search: oramaSearch, insert, remove, count, getByID } = this.oramaModule;
|
|
153
157
|
|
|
154
158
|
const docs = await scanMemoryDir(this.memoryDir);
|
|
155
159
|
if (isSearchAborted(execution)) return;
|
|
156
160
|
const docMap = new Map(docs.map((d) => [d.docid, d]));
|
|
157
161
|
const { update: oramaUpdate } = this.oramaModule;
|
|
158
162
|
|
|
163
|
+
const embeddingProviderIdentity = this.embedHelper.getProviderIdentity();
|
|
164
|
+
let allRowsCompatible = !!embeddingProviderIdentity && docs.length > 0;
|
|
159
165
|
// Get existing docs to diff — map user doc ID → { internalId, vector }
|
|
160
|
-
const existingDocs = new Map<string, {
|
|
166
|
+
const existingDocs = new Map<string, {
|
|
167
|
+
internalId: string;
|
|
168
|
+
vector?: number[];
|
|
169
|
+
vectorProvider?: string;
|
|
170
|
+
}>();
|
|
161
171
|
const existingCount = await count(db);
|
|
162
172
|
if (existingCount > 0) {
|
|
163
|
-
const allHits = await oramaSearch(db, {
|
|
173
|
+
const allHits = await oramaSearch(db, {
|
|
174
|
+
term: "",
|
|
175
|
+
limit: existingCount + 100,
|
|
176
|
+
});
|
|
164
177
|
for (const hit of allHits.hits) {
|
|
165
178
|
if (isSearchAborted(execution)) return;
|
|
166
|
-
|
|
179
|
+
const storedDocument =
|
|
180
|
+
typeof getByID === "function"
|
|
181
|
+
? await getByID(db, hit.id)
|
|
182
|
+
: hit.document;
|
|
183
|
+
const document = storedDocument ?? hit.document ?? {};
|
|
184
|
+
if (!docMap.has(document.id)) {
|
|
167
185
|
await remove(db, hit.id);
|
|
168
186
|
} else {
|
|
169
|
-
existingDocs.set(
|
|
187
|
+
existingDocs.set(document.id, {
|
|
170
188
|
internalId: hit.id,
|
|
171
|
-
vector:
|
|
189
|
+
vector: this.normalizeStoredVector(document.vector) ?? undefined,
|
|
190
|
+
vectorProvider:
|
|
191
|
+
typeof document.vectorProvider === "string"
|
|
192
|
+
? document.vectorProvider
|
|
193
|
+
: undefined,
|
|
172
194
|
});
|
|
173
195
|
}
|
|
174
196
|
}
|
|
@@ -185,23 +207,46 @@ export class OramaBackend implements SearchBackend {
|
|
|
185
207
|
content: doc.content,
|
|
186
208
|
snippet: doc.snippet,
|
|
187
209
|
};
|
|
188
|
-
|
|
210
|
+
const preservesCompatibleProvider =
|
|
211
|
+
!!embeddingProviderIdentity &&
|
|
212
|
+
existing.vectorProvider === embeddingProviderIdentity;
|
|
213
|
+
if (preservesCompatibleProvider) {
|
|
214
|
+
if (this.isCompatibleStoredVector(existing.vector)) {
|
|
215
|
+
payload.vector = existing.vector;
|
|
216
|
+
payload.vectorProvider = existing.vectorProvider ?? "";
|
|
217
|
+
} else {
|
|
218
|
+
payload.vector = this.zeroVector();
|
|
219
|
+
payload.vectorProvider = "";
|
|
220
|
+
allRowsCompatible = false;
|
|
221
|
+
}
|
|
222
|
+
} else if (!embeddingProviderIdentity && this.isCompatibleStoredVector(existing.vector)) {
|
|
189
223
|
payload.vector = existing.vector;
|
|
224
|
+
payload.vectorProvider = existing.vectorProvider ?? "";
|
|
225
|
+
allRowsCompatible = false;
|
|
226
|
+
} else {
|
|
227
|
+
payload.vector = this.zeroVector();
|
|
228
|
+
payload.vectorProvider = "";
|
|
229
|
+
allRowsCompatible = false;
|
|
190
230
|
}
|
|
191
231
|
try {
|
|
192
232
|
await oramaUpdate(db, existing.internalId, payload);
|
|
193
233
|
} catch {
|
|
234
|
+
allRowsCompatible = false;
|
|
194
235
|
// Update failed — skip and continue with remaining docs
|
|
195
236
|
}
|
|
196
237
|
} else {
|
|
238
|
+
allRowsCompatible = false;
|
|
197
239
|
try {
|
|
198
240
|
await insert(db, {
|
|
199
241
|
id: doc.docid,
|
|
200
242
|
path: doc.path,
|
|
201
243
|
content: doc.content,
|
|
202
244
|
snippet: doc.snippet,
|
|
245
|
+
vector: this.zeroVector(),
|
|
246
|
+
vectorProvider: "",
|
|
203
247
|
});
|
|
204
248
|
} catch {
|
|
249
|
+
allRowsCompatible = false;
|
|
205
250
|
// Duplicate id edge case — skip
|
|
206
251
|
}
|
|
207
252
|
}
|
|
@@ -209,6 +254,11 @@ export class OramaBackend implements SearchBackend {
|
|
|
209
254
|
|
|
210
255
|
if (isSearchAborted(execution)) return;
|
|
211
256
|
await this.persistDbForCollection(db, collection);
|
|
257
|
+
this.rememberVectorProviderCompatibility(
|
|
258
|
+
db,
|
|
259
|
+
embeddingProviderIdentity,
|
|
260
|
+
allRowsCompatible,
|
|
261
|
+
);
|
|
212
262
|
}
|
|
213
263
|
|
|
214
264
|
async embed(): Promise<void> {
|
|
@@ -225,30 +275,80 @@ export class OramaBackend implements SearchBackend {
|
|
|
225
275
|
const existingCount = await count(db);
|
|
226
276
|
if (existingCount === 0) return;
|
|
227
277
|
|
|
228
|
-
|
|
278
|
+
const embeddingProviderIdentity = this.embedHelper.getProviderIdentity();
|
|
279
|
+
// Find docs without vectors or with vectors from a different provider.
|
|
229
280
|
const allHits = await oramaSearch(db, { term: "", limit: existingCount + 100 });
|
|
230
|
-
const needsEmbed = allHits.hits.filter((h: any) =>
|
|
231
|
-
|
|
232
|
-
|
|
281
|
+
const needsEmbed = allHits.hits.filter((h: any) => {
|
|
282
|
+
const vector = this.normalizeStoredVector(h.document?.vector);
|
|
283
|
+
return (
|
|
284
|
+
(embeddingProviderIdentity &&
|
|
285
|
+
h.document?.vectorProvider !== embeddingProviderIdentity) ||
|
|
286
|
+
!this.isCompatibleStoredVector(vector)
|
|
287
|
+
);
|
|
288
|
+
});
|
|
289
|
+
|
|
290
|
+
if (needsEmbed.length === 0) {
|
|
291
|
+
this.rememberVectorProviderCompatibility(db, embeddingProviderIdentity, true);
|
|
292
|
+
return;
|
|
293
|
+
}
|
|
233
294
|
|
|
234
|
-
|
|
235
|
-
|
|
295
|
+
let rowsToEmbed = needsEmbed;
|
|
296
|
+
let embedResult = await this.embedHelper.embedBatchWithProvider(
|
|
297
|
+
rowsToEmbed.map((h: any) => h.document.content as string),
|
|
298
|
+
);
|
|
299
|
+
if (!embedResult) return;
|
|
300
|
+
if (
|
|
301
|
+
embeddingProviderIdentity &&
|
|
302
|
+
embedResult.providerIdentity !== embeddingProviderIdentity
|
|
303
|
+
) {
|
|
304
|
+
const effectiveProviderIdentity = embedResult.providerIdentity;
|
|
305
|
+
const originalIds = new Set(rowsToEmbed.map((h: any) => h.id));
|
|
306
|
+
const effectiveNeedsEmbed = allHits.hits.filter((h: any) => {
|
|
307
|
+
const vector = this.normalizeStoredVector(h.document?.vector);
|
|
308
|
+
return (
|
|
309
|
+
h.document?.vectorProvider !== effectiveProviderIdentity ||
|
|
310
|
+
!this.isCompatibleStoredVector(vector)
|
|
311
|
+
);
|
|
312
|
+
});
|
|
313
|
+
const sameRows =
|
|
314
|
+
effectiveNeedsEmbed.length === rowsToEmbed.length &&
|
|
315
|
+
effectiveNeedsEmbed.every((h: any) => originalIds.has(h.id));
|
|
316
|
+
if (!sameRows) {
|
|
317
|
+
const effectiveTexts = effectiveNeedsEmbed.map((h: any) => h.document.content as string);
|
|
318
|
+
const effectiveEmbedResult = await this.embedHelper.embedBatchWithProvider(effectiveTexts);
|
|
319
|
+
if (effectiveEmbedResult) {
|
|
320
|
+
rowsToEmbed = effectiveNeedsEmbed;
|
|
321
|
+
embedResult = effectiveEmbedResult;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
const { vectors, providerIdentity } = embedResult;
|
|
236
326
|
|
|
237
|
-
|
|
327
|
+
let allEmbedded = true;
|
|
328
|
+
for (let i = 0; i < rowsToEmbed.length; i++) {
|
|
238
329
|
const vec = vectors[i];
|
|
239
|
-
if (!vec)
|
|
330
|
+
if (!this.isExpectedDimensionVector(vec)) {
|
|
331
|
+
allEmbedded = false;
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
240
334
|
// Orama update is remove+insert — must include all fields to avoid data loss
|
|
241
|
-
const doc =
|
|
242
|
-
await oramaUpdate(db,
|
|
335
|
+
const doc = rowsToEmbed[i].document;
|
|
336
|
+
await oramaUpdate(db, rowsToEmbed[i].id, {
|
|
243
337
|
id: doc.id,
|
|
244
338
|
path: doc.path,
|
|
245
339
|
content: doc.content,
|
|
246
340
|
snippet: doc.snippet,
|
|
247
341
|
vector: vec,
|
|
342
|
+
vectorProvider: providerIdentity,
|
|
248
343
|
});
|
|
249
344
|
}
|
|
250
345
|
|
|
251
346
|
await this.persistDbForCollection(db, collection);
|
|
347
|
+
if (allEmbedded) {
|
|
348
|
+
this.rememberVectorProviderCompatibility(db, providerIdentity, true);
|
|
349
|
+
} else {
|
|
350
|
+
this.rememberVectorProviderCompatibility(db, providerIdentity, false);
|
|
351
|
+
}
|
|
252
352
|
}
|
|
253
353
|
|
|
254
354
|
async ensureCollection(
|
|
@@ -277,25 +377,19 @@ export class OramaBackend implements SearchBackend {
|
|
|
277
377
|
await mkdir(this.dbPath, { recursive: true });
|
|
278
378
|
const filePath = this.dbFilePath(this.collection);
|
|
279
379
|
|
|
380
|
+
let raw: string;
|
|
280
381
|
try {
|
|
281
|
-
|
|
282
|
-
this.db = await this.persistModule.restore("json", raw);
|
|
283
|
-
return this.db;
|
|
382
|
+
raw = await readFile(filePath, "utf-8");
|
|
284
383
|
} catch {
|
|
285
384
|
// No existing DB — create fresh
|
|
385
|
+
this.db = await this.createDb();
|
|
386
|
+
return this.db;
|
|
286
387
|
}
|
|
287
388
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
content: "string",
|
|
293
|
-
snippet: "string",
|
|
294
|
-
};
|
|
295
|
-
if (this.embedHelper.isAvailable()) {
|
|
296
|
-
schema.vector = `vector[${this.embeddingDimension}]`;
|
|
297
|
-
}
|
|
298
|
-
this.db = await create({ schema });
|
|
389
|
+
this.db = await this.migrateLegacyVectorProviderSchema(
|
|
390
|
+
await this.persistModule.restore("json", raw),
|
|
391
|
+
this.collection,
|
|
392
|
+
);
|
|
299
393
|
return this.db;
|
|
300
394
|
}
|
|
301
395
|
|
|
@@ -307,31 +401,92 @@ export class OramaBackend implements SearchBackend {
|
|
|
307
401
|
await mkdir(this.dbPath, { recursive: true });
|
|
308
402
|
const filePath = this.dbFilePath(collection);
|
|
309
403
|
|
|
404
|
+
let raw: string;
|
|
310
405
|
try {
|
|
311
|
-
|
|
312
|
-
return await this.persistModule.restore("json", raw);
|
|
406
|
+
raw = await readFile(filePath, "utf-8");
|
|
313
407
|
} catch {
|
|
314
408
|
// No existing DB — create fresh
|
|
409
|
+
return await this.createDb();
|
|
315
410
|
}
|
|
316
411
|
|
|
412
|
+
return await this.migrateLegacyVectorProviderSchema(
|
|
413
|
+
await this.persistModule.restore("json", raw),
|
|
414
|
+
collection,
|
|
415
|
+
);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
private async createDb(): Promise<any> {
|
|
317
419
|
const { create } = this.oramaModule;
|
|
318
420
|
const schema: Record<string, string> = {
|
|
319
421
|
id: "string",
|
|
320
422
|
path: "string",
|
|
321
423
|
content: "string",
|
|
322
424
|
snippet: "string",
|
|
425
|
+
vectorProvider: "string",
|
|
426
|
+
vector: `vector[${this.embeddingDimension}]`,
|
|
323
427
|
};
|
|
324
|
-
if (this.embedHelper.isAvailable()) {
|
|
325
|
-
schema.vector = `vector[${this.embeddingDimension}]`;
|
|
326
|
-
}
|
|
327
428
|
return await create({ schema });
|
|
328
429
|
}
|
|
329
430
|
|
|
431
|
+
private async migrateLegacyVectorProviderSchema(db: any, collection: string): Promise<any> {
|
|
432
|
+
const { search: oramaSearch, count, insert } = this.oramaModule;
|
|
433
|
+
const existingCount = await count(db);
|
|
434
|
+
if (existingCount === 0) {
|
|
435
|
+
const migrated = await this.createDb();
|
|
436
|
+
await this.persistDbForCollection(migrated, collection);
|
|
437
|
+
return migrated;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const allHits = await oramaSearch(db, { term: "", limit: existingCount + 100 });
|
|
441
|
+
const hits = allHits.hits ?? [];
|
|
442
|
+
const needsMigration = hits.some((hit: any) =>
|
|
443
|
+
typeof hit.document?.vectorProvider !== "string"
|
|
444
|
+
);
|
|
445
|
+
if (!needsMigration) return db;
|
|
446
|
+
|
|
447
|
+
const migrated = await this.createDb();
|
|
448
|
+
for (const hit of hits) {
|
|
449
|
+
const doc = this.getStoredDocument(db, hit);
|
|
450
|
+
const vector = this.getStoredVector(db, hit, doc);
|
|
451
|
+
const payload: Record<string, unknown> = {
|
|
452
|
+
id: typeof doc.id === "string" && doc.id.length > 0 ? doc.id : String(hit.id),
|
|
453
|
+
path: typeof doc.path === "string" ? doc.path : "",
|
|
454
|
+
content: typeof doc.content === "string" ? doc.content : "",
|
|
455
|
+
snippet:
|
|
456
|
+
typeof doc.snippet === "string"
|
|
457
|
+
? doc.snippet
|
|
458
|
+
: typeof doc.content === "string"
|
|
459
|
+
? doc.content.slice(0, 200)
|
|
460
|
+
: "",
|
|
461
|
+
vectorProvider:
|
|
462
|
+
typeof doc.vectorProvider === "string" ? doc.vectorProvider : "",
|
|
463
|
+
};
|
|
464
|
+
if (vector) {
|
|
465
|
+
payload.vector = vector;
|
|
466
|
+
} else {
|
|
467
|
+
payload.vector = this.zeroVector();
|
|
468
|
+
}
|
|
469
|
+
await insert(migrated, payload);
|
|
470
|
+
}
|
|
471
|
+
await this.persistDbForCollection(migrated, collection);
|
|
472
|
+
return migrated;
|
|
473
|
+
}
|
|
474
|
+
|
|
330
475
|
private async persistDbForCollection(db: any, collection: string): Promise<void> {
|
|
331
476
|
const data = await this.persistModule.persist(db, "json");
|
|
332
477
|
const filePath = this.dbFilePath(collection);
|
|
333
478
|
await mkdir(path.dirname(filePath), { recursive: true });
|
|
334
|
-
|
|
479
|
+
const tempPath = path.join(
|
|
480
|
+
path.dirname(filePath),
|
|
481
|
+
`.${path.basename(filePath)}.${process.pid}.${Date.now()}.${Math.random().toString(16).slice(2)}.tmp`,
|
|
482
|
+
);
|
|
483
|
+
try {
|
|
484
|
+
await writeFile(tempPath, data, "utf-8");
|
|
485
|
+
await rename(tempPath, filePath);
|
|
486
|
+
} catch (err) {
|
|
487
|
+
await rm(tempPath, { force: true }).catch(() => undefined);
|
|
488
|
+
throw err;
|
|
489
|
+
}
|
|
335
490
|
}
|
|
336
491
|
|
|
337
492
|
private dbFilePath(collection: string): string {
|
|
@@ -353,8 +508,13 @@ export class OramaBackend implements SearchBackend {
|
|
|
353
508
|
try {
|
|
354
509
|
await this.ensureModules();
|
|
355
510
|
const raw = await readFile(filePath, "utf-8");
|
|
356
|
-
|
|
357
|
-
|
|
511
|
+
const collection = path.basename(filePath, ".msp");
|
|
512
|
+
return await this.migrateLegacyVectorProviderSchema(
|
|
513
|
+
await this.persistModule.restore("json", raw),
|
|
514
|
+
collection,
|
|
515
|
+
);
|
|
516
|
+
} catch (err) {
|
|
517
|
+
log.debug(`OramaBackend failed to load ${filePath}: ${err}`);
|
|
358
518
|
return null;
|
|
359
519
|
}
|
|
360
520
|
}
|
|
@@ -375,22 +535,22 @@ export class OramaBackend implements SearchBackend {
|
|
|
375
535
|
if (mode === "fulltext") {
|
|
376
536
|
searchParams = { term: query, limit };
|
|
377
537
|
} else if (mode === "vector") {
|
|
378
|
-
const
|
|
538
|
+
const embedResult = await this.resolveCompatibleQueryEmbedding(db, query, execution);
|
|
379
539
|
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
380
|
-
if (!
|
|
540
|
+
if (!embedResult) {
|
|
381
541
|
// Fall back to fulltext if no embeddings available
|
|
382
542
|
searchParams = { term: query, limit };
|
|
383
543
|
} else {
|
|
384
|
-
searchParams = { mode: "vector", vector: { value:
|
|
544
|
+
searchParams = { mode: "vector", vector: { value: embedResult.vector, property: "vector" }, limit };
|
|
385
545
|
}
|
|
386
546
|
} else {
|
|
387
547
|
// hybrid
|
|
388
|
-
const
|
|
548
|
+
const embedResult = await this.resolveCompatibleQueryEmbedding(db, query, execution);
|
|
389
549
|
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
390
|
-
if (!
|
|
550
|
+
if (!embedResult) {
|
|
391
551
|
searchParams = { term: query, limit };
|
|
392
552
|
} else {
|
|
393
|
-
searchParams = { mode: "hybrid", term: query, vector: { value:
|
|
553
|
+
searchParams = { mode: "hybrid", term: query, vector: { value: embedResult.vector, property: "vector" }, limit };
|
|
394
554
|
}
|
|
395
555
|
}
|
|
396
556
|
|
|
@@ -408,4 +568,215 @@ export class OramaBackend implements SearchBackend {
|
|
|
408
568
|
return [];
|
|
409
569
|
}
|
|
410
570
|
}
|
|
571
|
+
|
|
572
|
+
private async resolveCompatibleQueryEmbedding(
|
|
573
|
+
db: any,
|
|
574
|
+
query: string,
|
|
575
|
+
execution?: SearchExecutionOptions,
|
|
576
|
+
): Promise<EmbedWithProviderResult | null> {
|
|
577
|
+
const embedResult = await this.embedHelper.embedWithProvider(query, { signal: execution?.signal });
|
|
578
|
+
throwIfSearchAborted(execution, "OramaBackend query embedding aborted");
|
|
579
|
+
if (!embedResult || !this.isExpectedDimensionVector(embedResult.vector)) return null;
|
|
580
|
+
|
|
581
|
+
const storedProviderIdentity = await this.findCompatibleStoredVectorProvider(db, execution);
|
|
582
|
+
if (!storedProviderIdentity) {
|
|
583
|
+
this.rememberVectorProviderCompatibility(db, embedResult.providerIdentity, false);
|
|
584
|
+
return null;
|
|
585
|
+
}
|
|
586
|
+
if (storedProviderIdentity === embedResult.providerIdentity) return embedResult;
|
|
587
|
+
|
|
588
|
+
const fallbackEmbed = await this.embedQueryWithStoredFallbackProvider(query, storedProviderIdentity, execution);
|
|
589
|
+
throwIfSearchAborted(execution, "OramaBackend fallback query embedding aborted");
|
|
590
|
+
if (
|
|
591
|
+
fallbackEmbed &&
|
|
592
|
+
fallbackEmbed.providerIdentity === storedProviderIdentity &&
|
|
593
|
+
this.isExpectedDimensionVector(fallbackEmbed.vector)
|
|
594
|
+
) {
|
|
595
|
+
return fallbackEmbed;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
this.rememberVectorProviderCompatibility(db, embedResult.providerIdentity, false);
|
|
599
|
+
return null;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
private async embedQueryWithStoredFallbackProvider(
|
|
603
|
+
query: string,
|
|
604
|
+
providerIdentity: EmbedProviderIdentity,
|
|
605
|
+
execution?: SearchExecutionOptions,
|
|
606
|
+
): Promise<EmbedWithProviderResult | null> {
|
|
607
|
+
const embedWithIdentity = (this.embedHelper as unknown as {
|
|
608
|
+
embedWithFallbackProviderIdentity?: (
|
|
609
|
+
text: string,
|
|
610
|
+
identity: EmbedProviderIdentity,
|
|
611
|
+
options?: { signal?: AbortSignal },
|
|
612
|
+
) => Promise<EmbedWithProviderResult | null>;
|
|
613
|
+
}).embedWithFallbackProviderIdentity;
|
|
614
|
+
if (typeof embedWithIdentity !== "function") return null;
|
|
615
|
+
return embedWithIdentity.call(this.embedHelper, query, providerIdentity, { signal: execution?.signal });
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
private async findCompatibleStoredVectorProvider(
|
|
619
|
+
db: any,
|
|
620
|
+
execution?: SearchExecutionOptions,
|
|
621
|
+
): Promise<EmbedProviderIdentity | null> {
|
|
622
|
+
const { search: oramaSearch, count } = this.oramaModule;
|
|
623
|
+
try {
|
|
624
|
+
const cached = this.vectorProviderCompatibility.get(db);
|
|
625
|
+
if (cached?.compatible) return cached.providerIdentity;
|
|
626
|
+
const existingCount = await count(db);
|
|
627
|
+
if (existingCount === 0) return null;
|
|
628
|
+
const allHits = await oramaSearch(db, {
|
|
629
|
+
term: "",
|
|
630
|
+
limit: existingCount + 100,
|
|
631
|
+
properties: ["vectorProvider"],
|
|
632
|
+
});
|
|
633
|
+
let providerIdentity: EmbedProviderIdentity | null = null;
|
|
634
|
+
let compatible = (allHits.hits ?? []).length > 0;
|
|
635
|
+
for (const hit of allHits.hits ?? []) {
|
|
636
|
+
throwIfSearchAborted(execution, "OramaBackend vector provider check aborted");
|
|
637
|
+
const doc = this.getStoredDocument(db, hit);
|
|
638
|
+
if (
|
|
639
|
+
typeof doc.vectorProvider !== "string" ||
|
|
640
|
+
doc.vectorProvider.length === 0 ||
|
|
641
|
+
!this.isCompatibleStoredVector(this.getStoredVector(db, hit, doc))
|
|
642
|
+
) {
|
|
643
|
+
compatible = false;
|
|
644
|
+
break;
|
|
645
|
+
}
|
|
646
|
+
if (providerIdentity && doc.vectorProvider !== providerIdentity) {
|
|
647
|
+
compatible = false;
|
|
648
|
+
break;
|
|
649
|
+
}
|
|
650
|
+
providerIdentity = doc.vectorProvider as EmbedProviderIdentity;
|
|
651
|
+
}
|
|
652
|
+
if (compatible && providerIdentity) {
|
|
653
|
+
this.vectorProviderCompatibility.set(db, {
|
|
654
|
+
providerIdentity,
|
|
655
|
+
compatible: true,
|
|
656
|
+
});
|
|
657
|
+
return providerIdentity;
|
|
658
|
+
}
|
|
659
|
+
return null;
|
|
660
|
+
} catch (err) {
|
|
661
|
+
if (isSearchAborted(execution)) throw err;
|
|
662
|
+
log.debug(`OramaBackend stored vector provider check failed: ${err}`);
|
|
663
|
+
return null;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
|
|
667
|
+
private async dbHasCompatibleVectors(
|
|
668
|
+
db: any,
|
|
669
|
+
providerIdentity: EmbedProviderIdentity,
|
|
670
|
+
execution?: SearchExecutionOptions,
|
|
671
|
+
): Promise<boolean> {
|
|
672
|
+
const { search: oramaSearch, count } = this.oramaModule;
|
|
673
|
+
try {
|
|
674
|
+
const cached = this.vectorProviderCompatibility.get(db);
|
|
675
|
+
if (cached?.providerIdentity === providerIdentity) return cached.compatible;
|
|
676
|
+
const existingCount = await count(db);
|
|
677
|
+
if (existingCount === 0) return false;
|
|
678
|
+
const allHits = await oramaSearch(db, {
|
|
679
|
+
term: "",
|
|
680
|
+
limit: existingCount + 100,
|
|
681
|
+
properties: ["vectorProvider"],
|
|
682
|
+
});
|
|
683
|
+
let compatible = (allHits.hits ?? []).length > 0;
|
|
684
|
+
for (const hit of allHits.hits ?? []) {
|
|
685
|
+
throwIfSearchAborted(execution, "OramaBackend vector provider check aborted");
|
|
686
|
+
const doc = this.getStoredDocument(db, hit);
|
|
687
|
+
if (
|
|
688
|
+
doc.vectorProvider !== providerIdentity ||
|
|
689
|
+
!this.isCompatibleStoredVector(this.getStoredVector(db, hit, doc))
|
|
690
|
+
) {
|
|
691
|
+
compatible = false;
|
|
692
|
+
break;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
this.vectorProviderCompatibility.set(db, { providerIdentity, compatible });
|
|
696
|
+
return compatible;
|
|
697
|
+
} catch (err) {
|
|
698
|
+
if (isSearchAborted(execution)) throw err;
|
|
699
|
+
log.debug(`OramaBackend vector provider check failed: ${err}`);
|
|
700
|
+
return false;
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
private rememberVectorProviderCompatibility(
|
|
705
|
+
db: unknown,
|
|
706
|
+
providerIdentity: EmbedProviderIdentity | null,
|
|
707
|
+
compatible: boolean,
|
|
708
|
+
): void {
|
|
709
|
+
if (!db || typeof db !== "object") return;
|
|
710
|
+
if (!providerIdentity) {
|
|
711
|
+
this.vectorProviderCompatibility.delete(db);
|
|
712
|
+
return;
|
|
713
|
+
}
|
|
714
|
+
this.vectorProviderCompatibility.set(db, { providerIdentity, compatible });
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
private getStoredDocument(db: any, hit: any): Record<string, unknown> {
|
|
718
|
+
const internalId = this.getInternalDocumentId(db, hit);
|
|
719
|
+
const internalDoc =
|
|
720
|
+
internalId !== undefined && internalId !== null
|
|
721
|
+
? db?.data?.docs?.docs?.[String(internalId)]
|
|
722
|
+
: undefined;
|
|
723
|
+
if (internalDoc && typeof internalDoc === "object") {
|
|
724
|
+
return internalDoc as Record<string, unknown>;
|
|
725
|
+
}
|
|
726
|
+
return hit?.document && typeof hit.document === "object"
|
|
727
|
+
? hit.document as Record<string, unknown>
|
|
728
|
+
: {};
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
private getStoredVector(db: any, hit: any, doc: Record<string, unknown>): number[] | null {
|
|
732
|
+
const documentVector = this.normalizeStoredVector(doc.vector);
|
|
733
|
+
if (documentVector) return documentVector;
|
|
734
|
+
const internalId = this.getInternalDocumentId(db, hit);
|
|
735
|
+
if (internalId === undefined || internalId === null) return null;
|
|
736
|
+
const vectorEntry = db?.data?.index?.vectorIndexes?.vector?.node?.vectors?.get?.(internalId);
|
|
737
|
+
const vector = Array.isArray(vectorEntry) ? vectorEntry[1] : vectorEntry;
|
|
738
|
+
return this.normalizeStoredVector(vector);
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
private getInternalDocumentId(db: any, hit: any): unknown {
|
|
742
|
+
const publicId =
|
|
743
|
+
typeof hit?.id === "string"
|
|
744
|
+
? hit.id
|
|
745
|
+
: typeof hit?.document?.id === "string"
|
|
746
|
+
? hit.document.id
|
|
747
|
+
: undefined;
|
|
748
|
+
return publicId && typeof db?.internalDocumentIDStore?.idToInternalId?.get === "function"
|
|
749
|
+
? db.internalDocumentIDStore.idToInternalId.get(publicId)
|
|
750
|
+
: undefined;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
private isExpectedDimensionVector(vector: number[] | null | undefined): vector is number[] {
|
|
754
|
+
return Array.isArray(vector) && vector.length === this.embeddingDimension;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
private isCompatibleStoredVector(vector: unknown): vector is number[] {
|
|
758
|
+
if (!vector || typeof vector !== "object") return false;
|
|
759
|
+
const arr = Array.from(vector as ArrayLike<number>);
|
|
760
|
+
return (
|
|
761
|
+
arr.length === this.embeddingDimension &&
|
|
762
|
+
arr.every((value) => Number.isFinite(value)) &&
|
|
763
|
+
arr.some((value) => value !== 0)
|
|
764
|
+
);
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
private zeroVector(): number[] {
|
|
768
|
+
return Array.from({ length: this.embeddingDimension }, () => 0);
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
private normalizeStoredVector(vector: unknown): number[] | null {
|
|
772
|
+
const values =
|
|
773
|
+
Array.isArray(vector)
|
|
774
|
+
? vector
|
|
775
|
+
: ArrayBuffer.isView(vector) && !(vector instanceof DataView)
|
|
776
|
+
? Array.from(vector as unknown as ArrayLike<unknown>)
|
|
777
|
+
: null;
|
|
778
|
+
if (!values || values.length !== this.embeddingDimension) return null;
|
|
779
|
+
const normalized = values.map((value) => Number(value));
|
|
780
|
+
return normalized.every((value) => Number.isFinite(value)) ? normalized : null;
|
|
781
|
+
}
|
|
411
782
|
}
|
package/src/types.ts
CHANGED
|
@@ -585,6 +585,14 @@ export interface PluginConfig {
|
|
|
585
585
|
qmdGenerateModel?: string;
|
|
586
586
|
embeddingFallbackEnabled: boolean;
|
|
587
587
|
embeddingFallbackProvider: "auto" | "openai" | "local";
|
|
588
|
+
/**
|
|
589
|
+
* Host adapters may register an embedding provider scoped to this memoryDir.
|
|
590
|
+
* Core remains host-agnostic: when no provider is registered, the existing
|
|
591
|
+
* OpenAI/local fallback path is used.
|
|
592
|
+
*/
|
|
593
|
+
hostEmbeddingProviderEnabled: boolean;
|
|
594
|
+
hostEmbeddingProviderId?: string;
|
|
595
|
+
hostEmbeddingProviderModel?: string;
|
|
588
596
|
/**
|
|
589
597
|
* Optional model identifier for local embedding fallback requests.
|
|
590
598
|
*
|
|
@@ -816,6 +824,10 @@ export interface PluginConfig {
|
|
|
816
824
|
commandsListEnabled: boolean;
|
|
817
825
|
openclawToolsEnabled: boolean;
|
|
818
826
|
openclawToolSnippetMaxChars: number;
|
|
827
|
+
openclawMessageReceivedCaptureEnabled: boolean;
|
|
828
|
+
openclawReplyMetadataCaptureEnabled: boolean;
|
|
829
|
+
openclawReplyMetadataExtractionHintsEnabled: boolean;
|
|
830
|
+
openclawChannelEnvelopeCleaningEnabled: boolean;
|
|
819
831
|
sessionTogglesEnabled: boolean;
|
|
820
832
|
verboseRecallVisibility: boolean;
|
|
821
833
|
recallTranscriptsEnabled: boolean;
|
|
@@ -3066,6 +3078,11 @@ export interface TranscriptEntry {
|
|
|
3066
3078
|
metadata?: {
|
|
3067
3079
|
compactAfter?: boolean;
|
|
3068
3080
|
compactionId?: string | null;
|
|
3081
|
+
messageId?: string;
|
|
3082
|
+
threadId?: string;
|
|
3083
|
+
replyToId?: string;
|
|
3084
|
+
replyToBody?: string;
|
|
3085
|
+
replyToSender?: string;
|
|
3069
3086
|
};
|
|
3070
3087
|
}
|
|
3071
3088
|
|