@remnic/core 9.3.563 → 9.3.565
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-cli.js +40 -39
- package/dist/access-cli.js.map +1 -1
- package/dist/access-http.js +16 -16
- package/dist/access-mcp.js +13 -13
- package/dist/access-schema.js +3 -3
- package/dist/access-service.js +11 -11
- package/dist/active-recall.js +1 -1
- package/dist/adapters/index.js +4 -4
- package/dist/adapters/registry.js +2 -2
- package/dist/briefing.js +4 -4
- package/dist/causal-consolidation.js +5 -5
- package/dist/{chunk-I2K6KCVC.js → chunk-2FHLI4U6.js} +49 -49
- package/dist/chunk-3ONXXHQO.js +57 -0
- package/dist/chunk-3ONXXHQO.js.map +1 -0
- package/dist/{chunk-5GX5MUQ2.js → chunk-574MU2Y3.js} +3 -3
- package/dist/{chunk-65OLPXBU.js → chunk-5WB4C7KM.js} +6 -6
- package/dist/chunk-6PTSXBPE.js +483 -0
- package/dist/chunk-6PTSXBPE.js.map +1 -0
- package/dist/{chunk-Z56KAZQL.js → chunk-74VA26CT.js} +2 -2
- package/dist/{chunk-CC2ESOOG.js → chunk-7X7TBJRX.js} +2 -2
- package/dist/{chunk-O4M4WH6V.js → chunk-ARY5OOLG.js} +2 -2
- package/dist/{chunk-JBPKEARU.js → chunk-AU7Q3LSC.js} +4 -4
- package/dist/{chunk-PM3QHTFT.js → chunk-CF3ZF2YU.js} +3 -3
- package/dist/{chunk-SI3QCHWF.js → chunk-DARLGSFX.js} +5 -5
- package/dist/chunk-EWLQPEO6.js +308 -0
- package/dist/chunk-EWLQPEO6.js.map +1 -0
- package/dist/{chunk-FVCZINOF.js → chunk-FHBEL473.js} +2 -2
- package/dist/{chunk-7Q3RCKAQ.js → chunk-FXKPZ3H6.js} +2 -2
- package/dist/{chunk-5WLYNZPC.js → chunk-GBXGCFRH.js} +2 -2
- package/dist/{chunk-ILJXM3FV.js → chunk-HQO5EBUC.js} +10 -10
- package/dist/{chunk-FK556DDH.js → chunk-I4UNL747.js} +4 -4
- package/dist/{chunk-RLPIT4YI.js → chunk-IOTTZLFF.js} +38 -38
- package/dist/{chunk-TVZ6LKKS.js → chunk-IRFF6LSF.js} +8 -8
- package/dist/{chunk-M5T4Q2ZU.js → chunk-KGK2QKWL.js} +1 -1
- package/dist/chunk-KGK2QKWL.js.map +1 -0
- package/dist/{chunk-IPLYGWQF.js → chunk-KQAFEZQX.js} +5 -5
- package/dist/chunk-M46RYSMW.js +597 -0
- package/dist/chunk-M46RYSMW.js.map +1 -0
- package/dist/{chunk-KXULCVOC.js → chunk-M6I5Z4SR.js} +4 -2
- package/dist/chunk-M6I5Z4SR.js.map +1 -0
- package/dist/{chunk-JFN6K74Q.js → chunk-MQEIWDYW.js} +2 -2
- package/dist/{chunk-7H6CFEBJ.js → chunk-NZPF2SYV.js} +8 -1
- package/dist/{chunk-7H6CFEBJ.js.map → chunk-NZPF2SYV.js.map} +1 -1
- package/dist/{chunk-SML26KED.js → chunk-OB6353F7.js} +16 -12
- package/dist/chunk-OB6353F7.js.map +1 -0
- package/dist/{chunk-SOTR74FK.js → chunk-OPYFD6PD.js} +2 -2
- package/dist/{chunk-3C5RPJAX.js → chunk-OXJBNGBK.js} +2 -2
- package/dist/{chunk-BD5LHQWD.js → chunk-PPPZY2EU.js} +2 -2
- package/dist/{chunk-25BY3HHZ.js → chunk-SUTSSOYU.js} +2 -2
- package/dist/{chunk-KS7WO6EQ.js → chunk-VFB2G5YL.js} +20 -20
- package/dist/{chunk-BUUYY2H2.js → chunk-WP5OWVLZ.js} +4 -4
- package/dist/{chunk-6URPAY2D.js → chunk-XCAZF7KQ.js} +207 -53
- package/dist/chunk-XCAZF7KQ.js.map +1 -0
- package/dist/{chunk-S53PAX2V.js → chunk-XM7BYXT7.js} +2 -2
- package/dist/{chunk-FADZBOR4.js → chunk-XRWTAEZM.js} +2 -2
- package/dist/{chunk-E5OECWZ5.js → chunk-XT7XVA53.js} +2 -2
- package/dist/{chunk-R3PS27B4.js → chunk-Z4R6RI2N.js} +2 -2
- package/dist/cli.js +44 -43
- package/dist/compounding/engine.js +4 -4
- package/dist/config.js +1 -1
- package/dist/connectors/codex-materialize-runner.js +4 -4
- package/dist/connectors/index.js +4 -4
- package/dist/embedding-fallback.d.ts +12 -1
- package/dist/embedding-fallback.js +4 -1
- package/dist/entity-retrieval.js +4 -4
- package/dist/host-embedding-provider.d.ts +21 -0
- package/dist/host-embedding-provider.js +14 -0
- package/dist/host-embedding-provider.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +71 -63
- package/dist/index.js.map +1 -1
- package/dist/lcm/index.js +3 -3
- package/dist/maintenance/memory-governance.js +4 -4
- package/dist/maintenance/rebuild-memory-lifecycle-ledger.js +4 -4
- package/dist/maintenance/rebuild-memory-projection.js +5 -5
- package/dist/namespaces/migrate.js +14 -13
- package/dist/namespaces/search.js +9 -8
- package/dist/namespaces/storage.js +4 -4
- package/dist/operator-toolkit.js +17 -16
- package/dist/orchestrator.js +32 -31
- package/dist/recall-explain-renderer.js +3 -3
- package/dist/recall-xray-cli.js +4 -4
- package/dist/recall-xray-renderer.js +3 -3
- package/dist/recall-xray.js +2 -2
- package/dist/resume-bundles.js +2 -2
- package/dist/search/embed-helper.d.ts +48 -4
- package/dist/search/embed-helper.js +2 -1
- package/dist/search/factory.js +8 -7
- package/dist/search/index.d.ts +1 -0
- package/dist/search/index.js +12 -11
- package/dist/search/lancedb-backend.d.ts +11 -0
- package/dist/search/lancedb-backend.js +2 -2
- package/dist/search/meilisearch-backend.js +2 -2
- package/dist/search/orama-backend.d.ts +16 -0
- package/dist/search/orama-backend.js +2 -2
- package/dist/semantic-consolidation.js +5 -5
- package/dist/semantic-rule-promotion.js +4 -4
- package/dist/semantic-rule-verifier.js +4 -4
- package/dist/storage.js +3 -3
- package/dist/transfer/autodetect.js +1 -1
- package/dist/transfer/backup.js +1 -1
- package/dist/transfer/capsule-export.js +2 -2
- package/dist/transfer/types.d.ts +6 -6
- package/dist/types.d.ts +17 -0
- package/dist/types.js +1 -1
- package/dist/verified-recall.js +4 -4
- package/package.json +11 -1
- package/src/config.ts +18 -0
- package/src/embedding-fallback.ts +293 -61
- package/src/host-embedding-provider.ts +84 -0
- package/src/index.ts +7 -0
- package/src/namespaces/search.ts +9 -1
- package/src/qmd.test.ts +28 -0
- package/src/search/embed-helper.ts +319 -51
- package/src/search/factory.ts +6 -2
- package/src/search/lancedb-backend.ts +297 -41
- package/src/search/orama-backend.ts +418 -47
- package/src/types.ts +17 -0
- package/dist/chunk-6URPAY2D.js.map +0 -1
- package/dist/chunk-FUC4LZMD.js +0 -301
- package/dist/chunk-FUC4LZMD.js.map +0 -1
- package/dist/chunk-KXULCVOC.js.map +0 -1
- package/dist/chunk-M5T4Q2ZU.js.map +0 -1
- package/dist/chunk-ONPLNAPX.js +0 -133
- package/dist/chunk-ONPLNAPX.js.map +0 -1
- package/dist/chunk-QVJ4NWL2.js +0 -335
- package/dist/chunk-QVJ4NWL2.js.map +0 -1
- package/dist/chunk-SML26KED.js.map +0 -1
- /package/dist/{chunk-I2K6KCVC.js.map → chunk-2FHLI4U6.js.map} +0 -0
- /package/dist/{chunk-5GX5MUQ2.js.map → chunk-574MU2Y3.js.map} +0 -0
- /package/dist/{chunk-65OLPXBU.js.map → chunk-5WB4C7KM.js.map} +0 -0
- /package/dist/{chunk-Z56KAZQL.js.map → chunk-74VA26CT.js.map} +0 -0
- /package/dist/{chunk-CC2ESOOG.js.map → chunk-7X7TBJRX.js.map} +0 -0
- /package/dist/{chunk-O4M4WH6V.js.map → chunk-ARY5OOLG.js.map} +0 -0
- /package/dist/{chunk-JBPKEARU.js.map → chunk-AU7Q3LSC.js.map} +0 -0
- /package/dist/{chunk-PM3QHTFT.js.map → chunk-CF3ZF2YU.js.map} +0 -0
- /package/dist/{chunk-SI3QCHWF.js.map → chunk-DARLGSFX.js.map} +0 -0
- /package/dist/{chunk-FVCZINOF.js.map → chunk-FHBEL473.js.map} +0 -0
- /package/dist/{chunk-7Q3RCKAQ.js.map → chunk-FXKPZ3H6.js.map} +0 -0
- /package/dist/{chunk-5WLYNZPC.js.map → chunk-GBXGCFRH.js.map} +0 -0
- /package/dist/{chunk-ILJXM3FV.js.map → chunk-HQO5EBUC.js.map} +0 -0
- /package/dist/{chunk-FK556DDH.js.map → chunk-I4UNL747.js.map} +0 -0
- /package/dist/{chunk-RLPIT4YI.js.map → chunk-IOTTZLFF.js.map} +0 -0
- /package/dist/{chunk-TVZ6LKKS.js.map → chunk-IRFF6LSF.js.map} +0 -0
- /package/dist/{chunk-IPLYGWQF.js.map → chunk-KQAFEZQX.js.map} +0 -0
- /package/dist/{chunk-JFN6K74Q.js.map → chunk-MQEIWDYW.js.map} +0 -0
- /package/dist/{chunk-SOTR74FK.js.map → chunk-OPYFD6PD.js.map} +0 -0
- /package/dist/{chunk-3C5RPJAX.js.map → chunk-OXJBNGBK.js.map} +0 -0
- /package/dist/{chunk-BD5LHQWD.js.map → chunk-PPPZY2EU.js.map} +0 -0
- /package/dist/{chunk-25BY3HHZ.js.map → chunk-SUTSSOYU.js.map} +0 -0
- /package/dist/{chunk-KS7WO6EQ.js.map → chunk-VFB2G5YL.js.map} +0 -0
- /package/dist/{chunk-BUUYY2H2.js.map → chunk-WP5OWVLZ.js.map} +0 -0
- /package/dist/{chunk-S53PAX2V.js.map → chunk-XM7BYXT7.js.map} +0 -0
- /package/dist/{chunk-FADZBOR4.js.map → chunk-XRWTAEZM.js.map} +0 -0
- /package/dist/{chunk-E5OECWZ5.js.map → chunk-XT7XVA53.js.map} +0 -0
- /package/dist/{chunk-R3PS27B4.js.map → chunk-Z4R6RI2N.js.map} +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { log } from "../logger.js";
|
|
2
2
|
import type { SearchBackend, SearchExecutionOptions, SearchQueryOptions, SearchResult } from "./port.js";
|
|
3
|
-
import type { EmbedHelper } from "./embed-helper.js";
|
|
3
|
+
import type { EmbedHelper, EmbedProviderIdentity, EmbedWithProviderResult } from "./embed-helper.js";
|
|
4
4
|
import { scanMemoryDir } from "./document-scanner.js";
|
|
5
5
|
import { isSearchAborted, throwIfSearchAborted } from "./abort.js";
|
|
6
6
|
|
|
@@ -27,6 +27,10 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
27
27
|
private available = false;
|
|
28
28
|
private db: any = null;
|
|
29
29
|
private lanceModule: any = null;
|
|
30
|
+
private readonly vectorProviderCompatibility = new WeakMap<
|
|
31
|
+
object,
|
|
32
|
+
{ providerIdentity: EmbedProviderIdentity; compatible: boolean }
|
|
33
|
+
>();
|
|
30
34
|
|
|
31
35
|
constructor(opts: LanceDbBackendOptions) {
|
|
32
36
|
this.dbPath = opts.dbPath;
|
|
@@ -125,7 +129,7 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
125
129
|
|
|
126
130
|
async updateCollection(collection: string, execution?: SearchExecutionOptions): Promise<void> {
|
|
127
131
|
if (isSearchAborted(execution)) return;
|
|
128
|
-
|
|
132
|
+
let table = await this.ensureTableForCollection(collection);
|
|
129
133
|
if (isSearchAborted(execution)) return;
|
|
130
134
|
if (!table) return;
|
|
131
135
|
|
|
@@ -143,32 +147,68 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
143
147
|
return;
|
|
144
148
|
}
|
|
145
149
|
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
150
|
+
const embeddingProviderIdentity = this.embedHelper.getProviderIdentity();
|
|
151
|
+
const existingVectors = new Map<string, {
|
|
152
|
+
vector: number[];
|
|
153
|
+
providerIdentity?: string;
|
|
154
|
+
}>();
|
|
155
|
+
const vectorProviderColumnState = await this.tableVectorProviderColumnState(table);
|
|
156
|
+
if (vectorProviderColumnState === "missing") {
|
|
157
|
+
table = await this.recreateTableForCollection(collection);
|
|
158
|
+
if (isSearchAborted(execution)) return;
|
|
159
|
+
if (!table) return;
|
|
160
|
+
} else if (vectorProviderColumnState === "present") {
|
|
161
|
+
try {
|
|
162
|
+
const existingRows = await table.query().select(["docid", "vector", "vectorProvider"]).toArray();
|
|
163
|
+
for (const row of existingRows ?? []) {
|
|
164
|
+
if (isSearchAborted(execution)) return;
|
|
165
|
+
const docid = row.docid;
|
|
166
|
+
if (typeof docid !== "string") continue;
|
|
167
|
+
const vector = row.vector;
|
|
168
|
+
if (!vector || typeof vector !== "object") continue;
|
|
169
|
+
existingVectors.set(docid, {
|
|
170
|
+
vector: Array.from(vector as ArrayLike<number>),
|
|
171
|
+
providerIdentity: typeof row.vectorProvider === "string" ? row.vectorProvider : undefined,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
} catch {
|
|
175
|
+
log.debug("LanceDbBackend skipped refresh after vector preservation failed");
|
|
176
|
+
return;
|
|
156
177
|
}
|
|
157
|
-
}
|
|
158
|
-
|
|
178
|
+
} else {
|
|
179
|
+
log.debug("LanceDbBackend skipped vector preservation after vectorProvider probe failed");
|
|
180
|
+
return;
|
|
159
181
|
}
|
|
160
182
|
|
|
161
|
-
const rows = docs.map((d) =>
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
183
|
+
const rows = docs.map((d) => {
|
|
184
|
+
const existing = existingVectors.get(d.docid);
|
|
185
|
+
const canPreserveVector =
|
|
186
|
+
existing &&
|
|
187
|
+
this.isCompatibleStoredVector(existing.vector) &&
|
|
188
|
+
(!embeddingProviderIdentity ||
|
|
189
|
+
existing.providerIdentity === embeddingProviderIdentity);
|
|
190
|
+
return {
|
|
191
|
+
docid: d.docid,
|
|
192
|
+
path: d.path,
|
|
193
|
+
content: d.content,
|
|
194
|
+
snippet: d.snippet,
|
|
195
|
+
vector: canPreserveVector
|
|
196
|
+
? existing.vector
|
|
197
|
+
: new Array(this.embeddingDimension).fill(0),
|
|
198
|
+
vectorProvider: canPreserveVector
|
|
199
|
+
? existing.providerIdentity ?? ""
|
|
200
|
+
: "",
|
|
201
|
+
};
|
|
202
|
+
});
|
|
168
203
|
|
|
169
204
|
try {
|
|
170
205
|
if (isSearchAborted(execution)) return;
|
|
171
206
|
await table.add(rows, { mode: "overwrite" });
|
|
207
|
+
this.rememberVectorProviderCompatibility(
|
|
208
|
+
table,
|
|
209
|
+
embeddingProviderIdentity,
|
|
210
|
+
rows.length > 0 && rows.every((row) => row.vectorProvider === embeddingProviderIdentity),
|
|
211
|
+
);
|
|
172
212
|
if (isSearchAborted(execution)) return;
|
|
173
213
|
// Create FTS index on content column
|
|
174
214
|
try {
|
|
@@ -193,25 +233,66 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
193
233
|
if (!table) return;
|
|
194
234
|
|
|
195
235
|
try {
|
|
196
|
-
const
|
|
236
|
+
const embeddingProviderIdentity = this.embedHelper.getProviderIdentity();
|
|
237
|
+
const allRows = await table.query().select(["docid", "content", "vector", "vectorProvider"]).toArray();
|
|
197
238
|
const needsEmbed = allRows.filter((row: any) => {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
return arr.length === 0 || arr.every((v: number) => v === 0);
|
|
239
|
+
if (embeddingProviderIdentity && row.vectorProvider !== embeddingProviderIdentity) {
|
|
240
|
+
return true;
|
|
241
|
+
}
|
|
242
|
+
return !this.isCompatibleStoredVector(row.vector);
|
|
203
243
|
});
|
|
204
244
|
|
|
205
|
-
if (needsEmbed.length === 0)
|
|
245
|
+
if (needsEmbed.length === 0) {
|
|
246
|
+
this.rememberVectorProviderCompatibility(table, embeddingProviderIdentity, true);
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
206
249
|
|
|
207
|
-
|
|
208
|
-
|
|
250
|
+
let rowsToEmbed = needsEmbed;
|
|
251
|
+
let embedResult = await this.embedHelper.embedBatchWithProvider(
|
|
252
|
+
rowsToEmbed.map((row: any) => row.content as string),
|
|
253
|
+
);
|
|
254
|
+
if (!embedResult) return;
|
|
255
|
+
if (
|
|
256
|
+
embeddingProviderIdentity &&
|
|
257
|
+
embedResult.providerIdentity !== embeddingProviderIdentity
|
|
258
|
+
) {
|
|
259
|
+
const effectiveProviderIdentity = embedResult.providerIdentity;
|
|
260
|
+
const originalDocids = new Set(rowsToEmbed.map((row: any) => row.docid));
|
|
261
|
+
const effectiveNeedsEmbed = allRows.filter((row: any) => (
|
|
262
|
+
row.vectorProvider !== effectiveProviderIdentity ||
|
|
263
|
+
!this.isCompatibleStoredVector(row.vector)
|
|
264
|
+
));
|
|
265
|
+
const sameRows =
|
|
266
|
+
effectiveNeedsEmbed.length === rowsToEmbed.length &&
|
|
267
|
+
effectiveNeedsEmbed.every((row: any) => originalDocids.has(row.docid));
|
|
268
|
+
if (!sameRows) {
|
|
269
|
+
const effectiveTexts = effectiveNeedsEmbed.map((row: any) => row.content as string);
|
|
270
|
+
const effectiveEmbedResult = await this.embedHelper.embedBatchWithProvider(effectiveTexts);
|
|
271
|
+
if (effectiveEmbedResult) {
|
|
272
|
+
rowsToEmbed = effectiveNeedsEmbed;
|
|
273
|
+
embedResult = effectiveEmbedResult;
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
const { vectors, providerIdentity } = embedResult;
|
|
209
278
|
|
|
210
|
-
|
|
279
|
+
let allEmbedded = true;
|
|
280
|
+
for (let i = 0; i < rowsToEmbed.length; i++) {
|
|
211
281
|
const vec = vectors[i];
|
|
212
|
-
if (!vec)
|
|
213
|
-
|
|
214
|
-
|
|
282
|
+
if (!this.isExpectedDimensionVector(vec)) {
|
|
283
|
+
allEmbedded = false;
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
const docid = rowsToEmbed[i].docid;
|
|
287
|
+
await table.update({
|
|
288
|
+
where: `docid = '${docid.replace(/'/g, "''")}'`,
|
|
289
|
+
values: { vector: vec, vectorProvider: providerIdentity },
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
if (allEmbedded) {
|
|
293
|
+
this.rememberVectorProviderCompatibility(table, providerIdentity, true);
|
|
294
|
+
} else {
|
|
295
|
+
this.rememberVectorProviderCompatibility(table, providerIdentity, false);
|
|
215
296
|
}
|
|
216
297
|
} catch (err) {
|
|
217
298
|
log.debug(`LanceDbBackend embed failed: ${err}`);
|
|
@@ -264,6 +345,7 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
264
345
|
content: "",
|
|
265
346
|
snippet: "",
|
|
266
347
|
vector: new Array(this.embeddingDimension).fill(0),
|
|
348
|
+
vectorProvider: "",
|
|
267
349
|
};
|
|
268
350
|
const newTable = await db.createTable(collection, [emptyRow]);
|
|
269
351
|
try {
|
|
@@ -279,6 +361,30 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
279
361
|
return newTable;
|
|
280
362
|
}
|
|
281
363
|
|
|
364
|
+
private async recreateTableForCollection(collection: string): Promise<any> {
|
|
365
|
+
const db = await this.ensureDb();
|
|
366
|
+
try {
|
|
367
|
+
await db.dropTable(collection).catch(() => {});
|
|
368
|
+
} catch {
|
|
369
|
+
// Best-effort legacy schema migration; table creation below may still recover.
|
|
370
|
+
}
|
|
371
|
+
if (collection === this.collection) this.table = null;
|
|
372
|
+
return this.ensureTableForCollection(collection);
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
private async tableVectorProviderColumnState(table: any): Promise<"present" | "missing" | "unknown"> {
|
|
376
|
+
try {
|
|
377
|
+
await table.query().select(["vectorProvider"]).toArray();
|
|
378
|
+
return "present";
|
|
379
|
+
} catch (err) {
|
|
380
|
+
if (isMissingVectorProviderColumnError(err)) {
|
|
381
|
+
return "missing";
|
|
382
|
+
}
|
|
383
|
+
log.debug(`LanceDbBackend vectorProvider column probe failed: ${err}`);
|
|
384
|
+
return "unknown";
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
282
388
|
private async ensureTable(): Promise<any> {
|
|
283
389
|
if (this.table) return this.table;
|
|
284
390
|
|
|
@@ -297,6 +403,7 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
297
403
|
content: "",
|
|
298
404
|
snippet: "",
|
|
299
405
|
vector: new Array(this.embeddingDimension).fill(0),
|
|
406
|
+
vectorProvider: "",
|
|
300
407
|
};
|
|
301
408
|
this.table = await db.createTable(this.collection, [emptyRow]);
|
|
302
409
|
// Create FTS index on content column
|
|
@@ -330,23 +437,23 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
330
437
|
}
|
|
331
438
|
|
|
332
439
|
if (mode === "vector") {
|
|
333
|
-
const
|
|
440
|
+
const embedResult = await this.resolveCompatibleQueryEmbedding(table, query, execution);
|
|
334
441
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
335
|
-
if (!
|
|
442
|
+
if (!embedResult) {
|
|
336
443
|
// Fall back to FTS
|
|
337
444
|
const results = await table.search(query, "fts").limit(limit).toArray();
|
|
338
445
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
339
446
|
return this.mapRows(results);
|
|
340
447
|
}
|
|
341
|
-
const results = await table.search(
|
|
448
|
+
const results = await table.search(embedResult.vector).limit(limit).toArray();
|
|
342
449
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
343
450
|
return this.mapRows(results);
|
|
344
451
|
}
|
|
345
452
|
|
|
346
453
|
// hybrid — try FTS+vector with RRF reranking
|
|
347
|
-
const
|
|
454
|
+
const embedResult = await this.resolveCompatibleQueryEmbedding(table, query, execution);
|
|
348
455
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
349
|
-
if (!
|
|
456
|
+
if (!embedResult) {
|
|
350
457
|
const results = await table.search(query, "fts").limit(limit).toArray();
|
|
351
458
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
352
459
|
return this.mapRows(results);
|
|
@@ -355,14 +462,14 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
355
462
|
try {
|
|
356
463
|
const results = await table
|
|
357
464
|
.search(query, "hybrid")
|
|
358
|
-
.vector(
|
|
465
|
+
.vector(embedResult.vector)
|
|
359
466
|
.limit(limit)
|
|
360
467
|
.toArray();
|
|
361
468
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
362
469
|
return this.mapRows(results);
|
|
363
470
|
} catch {
|
|
364
471
|
// Hybrid may not be supported in all LanceDB versions — fall back to vector
|
|
365
|
-
const results = await table.search(
|
|
472
|
+
const results = await table.search(embedResult.vector).limit(limit).toArray();
|
|
366
473
|
throwIfSearchAborted(execution, `LanceDbBackend ${mode} search aborted`);
|
|
367
474
|
return this.mapRows(results);
|
|
368
475
|
}
|
|
@@ -372,6 +479,93 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
372
479
|
}
|
|
373
480
|
}
|
|
374
481
|
|
|
482
|
+
private async resolveCompatibleQueryEmbedding(
|
|
483
|
+
table: any,
|
|
484
|
+
query: string,
|
|
485
|
+
execution?: SearchExecutionOptions,
|
|
486
|
+
): Promise<EmbedWithProviderResult | null> {
|
|
487
|
+
const embedResult = await this.embedHelper.embedWithProvider(query, { signal: execution?.signal });
|
|
488
|
+
throwIfSearchAborted(execution, "LanceDbBackend query embedding aborted");
|
|
489
|
+
if (!embedResult || !this.isExpectedDimensionVector(embedResult.vector)) return null;
|
|
490
|
+
|
|
491
|
+
const storedProviderIdentity = await this.findCompatibleStoredVectorProvider(table, execution);
|
|
492
|
+
if (!storedProviderIdentity) {
|
|
493
|
+
this.rememberVectorProviderCompatibility(table, embedResult.providerIdentity, false);
|
|
494
|
+
return null;
|
|
495
|
+
}
|
|
496
|
+
if (storedProviderIdentity === embedResult.providerIdentity) return embedResult;
|
|
497
|
+
|
|
498
|
+
const fallbackEmbed = await this.embedQueryWithStoredFallbackProvider(query, storedProviderIdentity, execution);
|
|
499
|
+
throwIfSearchAborted(execution, "LanceDbBackend fallback query embedding aborted");
|
|
500
|
+
if (
|
|
501
|
+
fallbackEmbed &&
|
|
502
|
+
fallbackEmbed.providerIdentity === storedProviderIdentity &&
|
|
503
|
+
this.isExpectedDimensionVector(fallbackEmbed.vector)
|
|
504
|
+
) {
|
|
505
|
+
return fallbackEmbed;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
this.rememberVectorProviderCompatibility(table, embedResult.providerIdentity, false);
|
|
509
|
+
return null;
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
private async embedQueryWithStoredFallbackProvider(
|
|
513
|
+
query: string,
|
|
514
|
+
providerIdentity: EmbedProviderIdentity,
|
|
515
|
+
execution?: SearchExecutionOptions,
|
|
516
|
+
): Promise<EmbedWithProviderResult | null> {
|
|
517
|
+
const embedWithIdentity = (this.embedHelper as unknown as {
|
|
518
|
+
embedWithFallbackProviderIdentity?: (
|
|
519
|
+
text: string,
|
|
520
|
+
identity: EmbedProviderIdentity,
|
|
521
|
+
options?: { signal?: AbortSignal },
|
|
522
|
+
) => Promise<EmbedWithProviderResult | null>;
|
|
523
|
+
}).embedWithFallbackProviderIdentity;
|
|
524
|
+
if (typeof embedWithIdentity !== "function") return null;
|
|
525
|
+
return embedWithIdentity.call(this.embedHelper, query, providerIdentity, { signal: execution?.signal });
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
private async findCompatibleStoredVectorProvider(
|
|
529
|
+
table: any,
|
|
530
|
+
execution?: SearchExecutionOptions,
|
|
531
|
+
): Promise<EmbedProviderIdentity | null> {
|
|
532
|
+
try {
|
|
533
|
+
const cached = this.vectorProviderCompatibility.get(table);
|
|
534
|
+
if (cached?.compatible) return cached.providerIdentity;
|
|
535
|
+
const rows = await table.query().select(["vector", "vectorProvider"]).toArray();
|
|
536
|
+
let providerIdentity: EmbedProviderIdentity | null = null;
|
|
537
|
+
let compatible = rows.length > 0;
|
|
538
|
+
for (const row of rows ?? []) {
|
|
539
|
+
throwIfSearchAborted(execution, "LanceDbBackend vector provider check aborted");
|
|
540
|
+
if (
|
|
541
|
+
typeof row.vectorProvider !== "string" ||
|
|
542
|
+
row.vectorProvider.length === 0 ||
|
|
543
|
+
!this.isCompatibleStoredVector(row.vector)
|
|
544
|
+
) {
|
|
545
|
+
compatible = false;
|
|
546
|
+
break;
|
|
547
|
+
}
|
|
548
|
+
if (providerIdentity && row.vectorProvider !== providerIdentity) {
|
|
549
|
+
compatible = false;
|
|
550
|
+
break;
|
|
551
|
+
}
|
|
552
|
+
providerIdentity = row.vectorProvider as EmbedProviderIdentity;
|
|
553
|
+
}
|
|
554
|
+
if (compatible && providerIdentity) {
|
|
555
|
+
this.vectorProviderCompatibility.set(table, {
|
|
556
|
+
providerIdentity,
|
|
557
|
+
compatible: true,
|
|
558
|
+
});
|
|
559
|
+
return providerIdentity;
|
|
560
|
+
}
|
|
561
|
+
return null;
|
|
562
|
+
} catch (err) {
|
|
563
|
+
if (isSearchAborted(execution)) throw err;
|
|
564
|
+
log.debug(`LanceDbBackend stored vector provider check failed: ${err}`);
|
|
565
|
+
return null;
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
375
569
|
private mapRows(rows: any[]): SearchResult[] {
|
|
376
570
|
return (rows ?? [])
|
|
377
571
|
.filter((row) => row.docid && row.docid !== "__placeholder__")
|
|
@@ -382,4 +576,66 @@ export class LanceDbBackend implements SearchBackend {
|
|
|
382
576
|
score: row._relevance_score ?? (row._distance != null ? 1 / (1 + (row._distance ?? 0)) : 0.5),
|
|
383
577
|
}));
|
|
384
578
|
}
|
|
579
|
+
|
|
580
|
+
private async tableHasCompatibleVectors(
|
|
581
|
+
table: any,
|
|
582
|
+
providerIdentity: EmbedProviderIdentity,
|
|
583
|
+
execution?: SearchExecutionOptions,
|
|
584
|
+
): Promise<boolean> {
|
|
585
|
+
try {
|
|
586
|
+
const cached = this.vectorProviderCompatibility.get(table);
|
|
587
|
+
if (cached?.providerIdentity === providerIdentity) return cached.compatible;
|
|
588
|
+
const rows = await table.query().select(["vector", "vectorProvider"]).toArray();
|
|
589
|
+
let compatible = rows.length > 0;
|
|
590
|
+
for (const row of rows ?? []) {
|
|
591
|
+
throwIfSearchAborted(execution, "LanceDbBackend vector provider check aborted");
|
|
592
|
+
if (
|
|
593
|
+
row.vectorProvider !== providerIdentity ||
|
|
594
|
+
!this.isCompatibleStoredVector(row.vector)
|
|
595
|
+
) {
|
|
596
|
+
compatible = false;
|
|
597
|
+
break;
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
this.vectorProviderCompatibility.set(table, { providerIdentity, compatible });
|
|
601
|
+
return compatible;
|
|
602
|
+
} catch (err) {
|
|
603
|
+
if (isSearchAborted(execution)) throw err;
|
|
604
|
+
log.debug(`LanceDbBackend vector provider check failed: ${err}`);
|
|
605
|
+
return false;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
private rememberVectorProviderCompatibility(
|
|
610
|
+
table: unknown,
|
|
611
|
+
providerIdentity: EmbedProviderIdentity | null,
|
|
612
|
+
compatible: boolean,
|
|
613
|
+
): void {
|
|
614
|
+
if (!table || typeof table !== "object") return;
|
|
615
|
+
if (!providerIdentity) {
|
|
616
|
+
this.vectorProviderCompatibility.delete(table);
|
|
617
|
+
return;
|
|
618
|
+
}
|
|
619
|
+
this.vectorProviderCompatibility.set(table, { providerIdentity, compatible });
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
private isExpectedDimensionVector(vector: number[] | null | undefined): vector is number[] {
|
|
623
|
+
return Array.isArray(vector) && vector.length === this.embeddingDimension;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
private isCompatibleStoredVector(vector: unknown): boolean {
|
|
627
|
+
if (!vector || typeof vector !== "object") return false;
|
|
628
|
+
const arr = Array.from(vector as ArrayLike<number>);
|
|
629
|
+
return (
|
|
630
|
+
arr.length === this.embeddingDimension &&
|
|
631
|
+
arr.every((value) => Number.isFinite(value)) &&
|
|
632
|
+
arr.some((value) => value !== 0)
|
|
633
|
+
);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
function isMissingVectorProviderColumnError(err: unknown): boolean {
|
|
638
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
639
|
+
return /\bvectorProvider\b/i.test(message) &&
|
|
640
|
+
/\b(column|field|schema|missing|not found|not exist|does not exist|unknown)\b/i.test(message);
|
|
385
641
|
}
|