@remnic/core 9.3.563 → 9.3.564
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-cli.js +40 -39
- package/dist/access-cli.js.map +1 -1
- package/dist/access-http.js +16 -16
- package/dist/access-mcp.js +13 -13
- package/dist/access-schema.js +3 -3
- package/dist/access-service.js +11 -11
- package/dist/active-recall.js +1 -1
- package/dist/adapters/index.js +4 -4
- package/dist/adapters/registry.js +2 -2
- package/dist/briefing.js +4 -4
- package/dist/causal-consolidation.js +5 -5
- package/dist/{chunk-I2K6KCVC.js → chunk-2FHLI4U6.js} +49 -49
- package/dist/chunk-3ONXXHQO.js +57 -0
- package/dist/chunk-3ONXXHQO.js.map +1 -0
- package/dist/{chunk-5GX5MUQ2.js → chunk-574MU2Y3.js} +3 -3
- package/dist/{chunk-65OLPXBU.js → chunk-5WB4C7KM.js} +6 -6
- package/dist/chunk-6PTSXBPE.js +483 -0
- package/dist/chunk-6PTSXBPE.js.map +1 -0
- package/dist/{chunk-Z56KAZQL.js → chunk-74VA26CT.js} +2 -2
- package/dist/{chunk-CC2ESOOG.js → chunk-7X7TBJRX.js} +2 -2
- package/dist/{chunk-O4M4WH6V.js → chunk-ARY5OOLG.js} +2 -2
- package/dist/{chunk-JBPKEARU.js → chunk-AU7Q3LSC.js} +4 -4
- package/dist/{chunk-PM3QHTFT.js → chunk-CF3ZF2YU.js} +3 -3
- package/dist/{chunk-SI3QCHWF.js → chunk-DARLGSFX.js} +5 -5
- package/dist/chunk-EWLQPEO6.js +308 -0
- package/dist/chunk-EWLQPEO6.js.map +1 -0
- package/dist/{chunk-FVCZINOF.js → chunk-FHBEL473.js} +2 -2
- package/dist/{chunk-7Q3RCKAQ.js → chunk-FXKPZ3H6.js} +2 -2
- package/dist/{chunk-5WLYNZPC.js → chunk-GBXGCFRH.js} +2 -2
- package/dist/{chunk-ILJXM3FV.js → chunk-HQO5EBUC.js} +10 -10
- package/dist/{chunk-FK556DDH.js → chunk-I4UNL747.js} +4 -4
- package/dist/{chunk-RLPIT4YI.js → chunk-IOTTZLFF.js} +38 -38
- package/dist/{chunk-TVZ6LKKS.js → chunk-IRFF6LSF.js} +8 -8
- package/dist/{chunk-M5T4Q2ZU.js → chunk-KGK2QKWL.js} +1 -1
- package/dist/chunk-KGK2QKWL.js.map +1 -0
- package/dist/{chunk-IPLYGWQF.js → chunk-KQAFEZQX.js} +5 -5
- package/dist/chunk-M46RYSMW.js +597 -0
- package/dist/chunk-M46RYSMW.js.map +1 -0
- package/dist/{chunk-KXULCVOC.js → chunk-M6I5Z4SR.js} +4 -2
- package/dist/chunk-M6I5Z4SR.js.map +1 -0
- package/dist/{chunk-JFN6K74Q.js → chunk-MQEIWDYW.js} +2 -2
- package/dist/{chunk-7H6CFEBJ.js → chunk-NZPF2SYV.js} +8 -1
- package/dist/{chunk-7H6CFEBJ.js.map → chunk-NZPF2SYV.js.map} +1 -1
- package/dist/{chunk-SML26KED.js → chunk-OB6353F7.js} +16 -12
- package/dist/chunk-OB6353F7.js.map +1 -0
- package/dist/{chunk-SOTR74FK.js → chunk-OPYFD6PD.js} +2 -2
- package/dist/{chunk-3C5RPJAX.js → chunk-OXJBNGBK.js} +2 -2
- package/dist/{chunk-BD5LHQWD.js → chunk-PPPZY2EU.js} +2 -2
- package/dist/{chunk-25BY3HHZ.js → chunk-SUTSSOYU.js} +2 -2
- package/dist/{chunk-KS7WO6EQ.js → chunk-VFB2G5YL.js} +20 -20
- package/dist/{chunk-BUUYY2H2.js → chunk-WP5OWVLZ.js} +4 -4
- package/dist/{chunk-6URPAY2D.js → chunk-XCAZF7KQ.js} +207 -53
- package/dist/chunk-XCAZF7KQ.js.map +1 -0
- package/dist/{chunk-S53PAX2V.js → chunk-XM7BYXT7.js} +2 -2
- package/dist/{chunk-FADZBOR4.js → chunk-XRWTAEZM.js} +2 -2
- package/dist/{chunk-E5OECWZ5.js → chunk-XT7XVA53.js} +2 -2
- package/dist/{chunk-R3PS27B4.js → chunk-Z4R6RI2N.js} +2 -2
- package/dist/cli.js +44 -43
- package/dist/compounding/engine.js +4 -4
- package/dist/config.js +1 -1
- package/dist/connectors/codex-materialize-runner.js +4 -4
- package/dist/connectors/index.js +4 -4
- package/dist/embedding-fallback.d.ts +12 -1
- package/dist/embedding-fallback.js +4 -1
- package/dist/entity-retrieval.js +4 -4
- package/dist/host-embedding-provider.d.ts +21 -0
- package/dist/host-embedding-provider.js +14 -0
- package/dist/host-embedding-provider.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +71 -63
- package/dist/index.js.map +1 -1
- package/dist/lcm/index.js +3 -3
- package/dist/maintenance/memory-governance.js +4 -4
- package/dist/maintenance/rebuild-memory-lifecycle-ledger.js +4 -4
- package/dist/maintenance/rebuild-memory-projection.js +5 -5
- package/dist/namespaces/migrate.js +14 -13
- package/dist/namespaces/search.js +9 -8
- package/dist/namespaces/storage.js +4 -4
- package/dist/operator-toolkit.js +17 -16
- package/dist/orchestrator.js +32 -31
- package/dist/recall-explain-renderer.js +3 -3
- package/dist/recall-xray-cli.js +4 -4
- package/dist/recall-xray-renderer.js +3 -3
- package/dist/recall-xray.js +2 -2
- package/dist/resume-bundles.js +2 -2
- package/dist/search/embed-helper.d.ts +48 -4
- package/dist/search/embed-helper.js +2 -1
- package/dist/search/factory.js +8 -7
- package/dist/search/index.d.ts +1 -0
- package/dist/search/index.js +12 -11
- package/dist/search/lancedb-backend.d.ts +11 -0
- package/dist/search/lancedb-backend.js +2 -2
- package/dist/search/meilisearch-backend.js +2 -2
- package/dist/search/orama-backend.d.ts +16 -0
- package/dist/search/orama-backend.js +2 -2
- package/dist/semantic-consolidation.js +5 -5
- package/dist/semantic-rule-promotion.js +4 -4
- package/dist/semantic-rule-verifier.js +4 -4
- package/dist/storage.js +3 -3
- package/dist/transfer/autodetect.js +1 -1
- package/dist/transfer/backup.js +1 -1
- package/dist/transfer/capsule-export.js +2 -2
- package/dist/transfer/types.d.ts +6 -6
- package/dist/types.d.ts +17 -0
- package/dist/types.js +1 -1
- package/dist/verified-recall.js +4 -4
- package/package.json +11 -1
- package/src/config.ts +18 -0
- package/src/embedding-fallback.ts +293 -61
- package/src/host-embedding-provider.ts +84 -0
- package/src/index.ts +7 -0
- package/src/namespaces/search.ts +9 -1
- package/src/qmd.test.ts +28 -0
- package/src/search/embed-helper.ts +319 -51
- package/src/search/factory.ts +6 -2
- package/src/search/lancedb-backend.ts +297 -41
- package/src/search/orama-backend.ts +418 -47
- package/src/types.ts +17 -0
- package/dist/chunk-6URPAY2D.js.map +0 -1
- package/dist/chunk-FUC4LZMD.js +0 -301
- package/dist/chunk-FUC4LZMD.js.map +0 -1
- package/dist/chunk-KXULCVOC.js.map +0 -1
- package/dist/chunk-M5T4Q2ZU.js.map +0 -1
- package/dist/chunk-ONPLNAPX.js +0 -133
- package/dist/chunk-ONPLNAPX.js.map +0 -1
- package/dist/chunk-QVJ4NWL2.js +0 -335
- package/dist/chunk-QVJ4NWL2.js.map +0 -1
- package/dist/chunk-SML26KED.js.map +0 -1
- /package/dist/{chunk-I2K6KCVC.js.map → chunk-2FHLI4U6.js.map} +0 -0
- /package/dist/{chunk-5GX5MUQ2.js.map → chunk-574MU2Y3.js.map} +0 -0
- /package/dist/{chunk-65OLPXBU.js.map → chunk-5WB4C7KM.js.map} +0 -0
- /package/dist/{chunk-Z56KAZQL.js.map → chunk-74VA26CT.js.map} +0 -0
- /package/dist/{chunk-CC2ESOOG.js.map → chunk-7X7TBJRX.js.map} +0 -0
- /package/dist/{chunk-O4M4WH6V.js.map → chunk-ARY5OOLG.js.map} +0 -0
- /package/dist/{chunk-JBPKEARU.js.map → chunk-AU7Q3LSC.js.map} +0 -0
- /package/dist/{chunk-PM3QHTFT.js.map → chunk-CF3ZF2YU.js.map} +0 -0
- /package/dist/{chunk-SI3QCHWF.js.map → chunk-DARLGSFX.js.map} +0 -0
- /package/dist/{chunk-FVCZINOF.js.map → chunk-FHBEL473.js.map} +0 -0
- /package/dist/{chunk-7Q3RCKAQ.js.map → chunk-FXKPZ3H6.js.map} +0 -0
- /package/dist/{chunk-5WLYNZPC.js.map → chunk-GBXGCFRH.js.map} +0 -0
- /package/dist/{chunk-ILJXM3FV.js.map → chunk-HQO5EBUC.js.map} +0 -0
- /package/dist/{chunk-FK556DDH.js.map → chunk-I4UNL747.js.map} +0 -0
- /package/dist/{chunk-RLPIT4YI.js.map → chunk-IOTTZLFF.js.map} +0 -0
- /package/dist/{chunk-TVZ6LKKS.js.map → chunk-IRFF6LSF.js.map} +0 -0
- /package/dist/{chunk-IPLYGWQF.js.map → chunk-KQAFEZQX.js.map} +0 -0
- /package/dist/{chunk-JFN6K74Q.js.map → chunk-MQEIWDYW.js.map} +0 -0
- /package/dist/{chunk-SOTR74FK.js.map → chunk-OPYFD6PD.js.map} +0 -0
- /package/dist/{chunk-3C5RPJAX.js.map → chunk-OXJBNGBK.js.map} +0 -0
- /package/dist/{chunk-BD5LHQWD.js.map → chunk-PPPZY2EU.js.map} +0 -0
- /package/dist/{chunk-25BY3HHZ.js.map → chunk-SUTSSOYU.js.map} +0 -0
- /package/dist/{chunk-KS7WO6EQ.js.map → chunk-VFB2G5YL.js.map} +0 -0
- /package/dist/{chunk-BUUYY2H2.js.map → chunk-WP5OWVLZ.js.map} +0 -0
- /package/dist/{chunk-S53PAX2V.js.map → chunk-XM7BYXT7.js.map} +0 -0
- /package/dist/{chunk-FADZBOR4.js.map → chunk-XRWTAEZM.js.map} +0 -0
- /package/dist/{chunk-E5OECWZ5.js.map → chunk-XT7XVA53.js.map} +0 -0
- /package/dist/{chunk-R3PS27B4.js.map → chunk-Z4R6RI2N.js.map} +0 -0
|
@@ -0,0 +1,597 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isSearchAborted,
|
|
3
|
+
throwIfSearchAborted
|
|
4
|
+
} from "./chunk-CINZGPSJ.js";
|
|
5
|
+
import {
|
|
6
|
+
scanMemoryDir
|
|
7
|
+
} from "./chunk-Q4CAQGKQ.js";
|
|
8
|
+
import {
|
|
9
|
+
log
|
|
10
|
+
} from "./chunk-2ODBA7MQ.js";
|
|
11
|
+
|
|
12
|
+
// src/search/orama-backend.ts
|
|
13
|
+
import path from "path";
|
|
14
|
+
import { mkdir, readdir, readFile, rename, rm, writeFile } from "fs/promises";
|
|
15
|
+
var ORAMA_COLLECTION_FILENAME_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._-]*$/;
|
|
16
|
+
function pathIsInside(parent, child) {
|
|
17
|
+
const relative = path.relative(parent, child);
|
|
18
|
+
return relative === "" || !relative.startsWith("..") && !path.isAbsolute(relative);
|
|
19
|
+
}
|
|
20
|
+
function resolveOramaCollectionDbFilePath(dbPath, collection) {
|
|
21
|
+
if (!ORAMA_COLLECTION_FILENAME_PATTERN.test(collection)) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
`Invalid Orama collection name ${JSON.stringify(collection)}. Collection names must match [A-Za-z0-9][A-Za-z0-9._-]*.`
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
const resolvedDbPath = path.resolve(dbPath);
|
|
27
|
+
const filePath = path.resolve(resolvedDbPath, `${collection}.msp`);
|
|
28
|
+
if (!pathIsInside(resolvedDbPath, filePath)) {
|
|
29
|
+
throw new Error(
|
|
30
|
+
`Invalid Orama collection path for ${JSON.stringify(collection)}: resolved outside dbPath.`
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
return filePath;
|
|
34
|
+
}
|
|
35
|
+
var OramaBackend = class {
|
|
36
|
+
dbPath;
|
|
37
|
+
collection;
|
|
38
|
+
embedHelper;
|
|
39
|
+
memoryDir;
|
|
40
|
+
embeddingDimension;
|
|
41
|
+
available = false;
|
|
42
|
+
db = null;
|
|
43
|
+
oramaModule = null;
|
|
44
|
+
persistModule = null;
|
|
45
|
+
vectorProviderCompatibility = /* @__PURE__ */ new WeakMap();
|
|
46
|
+
constructor(opts) {
|
|
47
|
+
this.dbPath = opts.dbPath;
|
|
48
|
+
this.collection = opts.collection;
|
|
49
|
+
this.embedHelper = opts.embedHelper;
|
|
50
|
+
this.memoryDir = opts.memoryDir;
|
|
51
|
+
this.embeddingDimension = opts.embeddingDimension;
|
|
52
|
+
}
|
|
53
|
+
async probe() {
|
|
54
|
+
try {
|
|
55
|
+
await this.ensureModules();
|
|
56
|
+
await this.ensureDb();
|
|
57
|
+
this.available = true;
|
|
58
|
+
return true;
|
|
59
|
+
} catch (err) {
|
|
60
|
+
log.debug(`OramaBackend probe failed: ${err}`);
|
|
61
|
+
this.available = false;
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
isAvailable() {
|
|
66
|
+
return this.available;
|
|
67
|
+
}
|
|
68
|
+
debugStatus() {
|
|
69
|
+
return `backend=orama available=${this.available} dbPath=${this.dbPath}`;
|
|
70
|
+
}
|
|
71
|
+
async search(query, _collection, maxResults, _options, execution) {
|
|
72
|
+
return this.hybridSearch(query, _collection, maxResults, execution);
|
|
73
|
+
}
|
|
74
|
+
async searchGlobal(query, maxResults, execution) {
|
|
75
|
+
const limit = maxResults ?? 10;
|
|
76
|
+
if (!this.available) return [];
|
|
77
|
+
try {
|
|
78
|
+
throwIfSearchAborted(execution, "OramaBackend global search aborted");
|
|
79
|
+
const files = await this.listDbFiles();
|
|
80
|
+
const allResults = [];
|
|
81
|
+
for (const file of files) {
|
|
82
|
+
throwIfSearchAborted(execution, "OramaBackend global search aborted");
|
|
83
|
+
const db = await this.loadDbFromFile(file);
|
|
84
|
+
if (!db) continue;
|
|
85
|
+
const results = await this.searchDb(db, query, "hybrid", limit, execution);
|
|
86
|
+
allResults.push(...results);
|
|
87
|
+
}
|
|
88
|
+
allResults.sort((a, b) => b.score - a.score);
|
|
89
|
+
return allResults.slice(0, limit);
|
|
90
|
+
} catch (err) {
|
|
91
|
+
log.debug(`OramaBackend searchGlobal failed: ${err}`);
|
|
92
|
+
return [];
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async bm25Search(query, collection, maxResults, execution) {
|
|
96
|
+
if (isSearchAborted(execution)) return [];
|
|
97
|
+
const db = await this.ensureDbForCollection(collection ?? this.collection);
|
|
98
|
+
if (isSearchAborted(execution)) return [];
|
|
99
|
+
if (!db) return [];
|
|
100
|
+
return this.searchDb(db, query, "fulltext", maxResults ?? 10, execution);
|
|
101
|
+
}
|
|
102
|
+
async vectorSearch(query, collection, maxResults, execution) {
|
|
103
|
+
if (isSearchAborted(execution)) return [];
|
|
104
|
+
const db = await this.ensureDbForCollection(collection ?? this.collection);
|
|
105
|
+
if (isSearchAborted(execution)) return [];
|
|
106
|
+
if (!db) return [];
|
|
107
|
+
return this.searchDb(db, query, "vector", maxResults ?? 10, execution);
|
|
108
|
+
}
|
|
109
|
+
async hybridSearch(query, collection, maxResults, execution) {
|
|
110
|
+
if (isSearchAborted(execution)) return [];
|
|
111
|
+
const db = await this.ensureDbForCollection(collection ?? this.collection);
|
|
112
|
+
if (isSearchAborted(execution)) return [];
|
|
113
|
+
if (!db) return [];
|
|
114
|
+
return this.searchDb(db, query, "hybrid", maxResults ?? 10, execution);
|
|
115
|
+
}
|
|
116
|
+
async update(execution) {
|
|
117
|
+
await this.updateCollection(this.collection, execution);
|
|
118
|
+
}
|
|
119
|
+
async updateCollection(collection, execution) {
|
|
120
|
+
if (isSearchAborted(execution)) return;
|
|
121
|
+
const db = await this.ensureDbForCollection(collection);
|
|
122
|
+
if (isSearchAborted(execution)) return;
|
|
123
|
+
if (!db) return;
|
|
124
|
+
const { search: oramaSearch, insert, remove, count, getByID } = this.oramaModule;
|
|
125
|
+
const docs = await scanMemoryDir(this.memoryDir);
|
|
126
|
+
if (isSearchAborted(execution)) return;
|
|
127
|
+
const docMap = new Map(docs.map((d) => [d.docid, d]));
|
|
128
|
+
const { update: oramaUpdate } = this.oramaModule;
|
|
129
|
+
const embeddingProviderIdentity = this.embedHelper.getProviderIdentity();
|
|
130
|
+
let allRowsCompatible = !!embeddingProviderIdentity && docs.length > 0;
|
|
131
|
+
const existingDocs = /* @__PURE__ */ new Map();
|
|
132
|
+
const existingCount = await count(db);
|
|
133
|
+
if (existingCount > 0) {
|
|
134
|
+
const allHits = await oramaSearch(db, {
|
|
135
|
+
term: "",
|
|
136
|
+
limit: existingCount + 100
|
|
137
|
+
});
|
|
138
|
+
for (const hit of allHits.hits) {
|
|
139
|
+
if (isSearchAborted(execution)) return;
|
|
140
|
+
const storedDocument = typeof getByID === "function" ? await getByID(db, hit.id) : hit.document;
|
|
141
|
+
const document = storedDocument ?? hit.document ?? {};
|
|
142
|
+
if (!docMap.has(document.id)) {
|
|
143
|
+
await remove(db, hit.id);
|
|
144
|
+
} else {
|
|
145
|
+
existingDocs.set(document.id, {
|
|
146
|
+
internalId: hit.id,
|
|
147
|
+
vector: this.normalizeStoredVector(document.vector) ?? void 0,
|
|
148
|
+
vectorProvider: typeof document.vectorProvider === "string" ? document.vectorProvider : void 0
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
for (const doc of docs) {
|
|
154
|
+
if (isSearchAborted(execution)) return;
|
|
155
|
+
const existing = existingDocs.get(doc.docid);
|
|
156
|
+
if (existing) {
|
|
157
|
+
const payload = {
|
|
158
|
+
id: doc.docid,
|
|
159
|
+
path: doc.path,
|
|
160
|
+
content: doc.content,
|
|
161
|
+
snippet: doc.snippet
|
|
162
|
+
};
|
|
163
|
+
const preservesCompatibleProvider = !!embeddingProviderIdentity && existing.vectorProvider === embeddingProviderIdentity;
|
|
164
|
+
if (preservesCompatibleProvider) {
|
|
165
|
+
if (this.isCompatibleStoredVector(existing.vector)) {
|
|
166
|
+
payload.vector = existing.vector;
|
|
167
|
+
payload.vectorProvider = existing.vectorProvider ?? "";
|
|
168
|
+
} else {
|
|
169
|
+
payload.vector = this.zeroVector();
|
|
170
|
+
payload.vectorProvider = "";
|
|
171
|
+
allRowsCompatible = false;
|
|
172
|
+
}
|
|
173
|
+
} else if (!embeddingProviderIdentity && this.isCompatibleStoredVector(existing.vector)) {
|
|
174
|
+
payload.vector = existing.vector;
|
|
175
|
+
payload.vectorProvider = existing.vectorProvider ?? "";
|
|
176
|
+
allRowsCompatible = false;
|
|
177
|
+
} else {
|
|
178
|
+
payload.vector = this.zeroVector();
|
|
179
|
+
payload.vectorProvider = "";
|
|
180
|
+
allRowsCompatible = false;
|
|
181
|
+
}
|
|
182
|
+
try {
|
|
183
|
+
await oramaUpdate(db, existing.internalId, payload);
|
|
184
|
+
} catch {
|
|
185
|
+
allRowsCompatible = false;
|
|
186
|
+
}
|
|
187
|
+
} else {
|
|
188
|
+
allRowsCompatible = false;
|
|
189
|
+
try {
|
|
190
|
+
await insert(db, {
|
|
191
|
+
id: doc.docid,
|
|
192
|
+
path: doc.path,
|
|
193
|
+
content: doc.content,
|
|
194
|
+
snippet: doc.snippet,
|
|
195
|
+
vector: this.zeroVector(),
|
|
196
|
+
vectorProvider: ""
|
|
197
|
+
});
|
|
198
|
+
} catch {
|
|
199
|
+
allRowsCompatible = false;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
if (isSearchAborted(execution)) return;
|
|
204
|
+
await this.persistDbForCollection(db, collection);
|
|
205
|
+
this.rememberVectorProviderCompatibility(
|
|
206
|
+
db,
|
|
207
|
+
embeddingProviderIdentity,
|
|
208
|
+
allRowsCompatible
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
async embed() {
|
|
212
|
+
await this.embedCollection(this.collection);
|
|
213
|
+
}
|
|
214
|
+
async embedCollection(collection) {
|
|
215
|
+
if (!this.embedHelper.isAvailable()) return;
|
|
216
|
+
const db = await this.ensureDbForCollection(collection);
|
|
217
|
+
if (!db) return;
|
|
218
|
+
const { search: oramaSearch, update: oramaUpdate, count } = this.oramaModule;
|
|
219
|
+
const existingCount = await count(db);
|
|
220
|
+
if (existingCount === 0) return;
|
|
221
|
+
const embeddingProviderIdentity = this.embedHelper.getProviderIdentity();
|
|
222
|
+
const allHits = await oramaSearch(db, { term: "", limit: existingCount + 100 });
|
|
223
|
+
const needsEmbed = allHits.hits.filter((h) => {
|
|
224
|
+
const vector = this.normalizeStoredVector(h.document?.vector);
|
|
225
|
+
return embeddingProviderIdentity && h.document?.vectorProvider !== embeddingProviderIdentity || !this.isCompatibleStoredVector(vector);
|
|
226
|
+
});
|
|
227
|
+
if (needsEmbed.length === 0) {
|
|
228
|
+
this.rememberVectorProviderCompatibility(db, embeddingProviderIdentity, true);
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
let rowsToEmbed = needsEmbed;
|
|
232
|
+
let embedResult = await this.embedHelper.embedBatchWithProvider(
|
|
233
|
+
rowsToEmbed.map((h) => h.document.content)
|
|
234
|
+
);
|
|
235
|
+
if (!embedResult) return;
|
|
236
|
+
if (embeddingProviderIdentity && embedResult.providerIdentity !== embeddingProviderIdentity) {
|
|
237
|
+
const effectiveProviderIdentity = embedResult.providerIdentity;
|
|
238
|
+
const originalIds = new Set(rowsToEmbed.map((h) => h.id));
|
|
239
|
+
const effectiveNeedsEmbed = allHits.hits.filter((h) => {
|
|
240
|
+
const vector = this.normalizeStoredVector(h.document?.vector);
|
|
241
|
+
return h.document?.vectorProvider !== effectiveProviderIdentity || !this.isCompatibleStoredVector(vector);
|
|
242
|
+
});
|
|
243
|
+
const sameRows = effectiveNeedsEmbed.length === rowsToEmbed.length && effectiveNeedsEmbed.every((h) => originalIds.has(h.id));
|
|
244
|
+
if (!sameRows) {
|
|
245
|
+
const effectiveTexts = effectiveNeedsEmbed.map((h) => h.document.content);
|
|
246
|
+
const effectiveEmbedResult = await this.embedHelper.embedBatchWithProvider(effectiveTexts);
|
|
247
|
+
if (effectiveEmbedResult) {
|
|
248
|
+
rowsToEmbed = effectiveNeedsEmbed;
|
|
249
|
+
embedResult = effectiveEmbedResult;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
const { vectors, providerIdentity } = embedResult;
|
|
254
|
+
let allEmbedded = true;
|
|
255
|
+
for (let i = 0; i < rowsToEmbed.length; i++) {
|
|
256
|
+
const vec = vectors[i];
|
|
257
|
+
if (!this.isExpectedDimensionVector(vec)) {
|
|
258
|
+
allEmbedded = false;
|
|
259
|
+
continue;
|
|
260
|
+
}
|
|
261
|
+
const doc = rowsToEmbed[i].document;
|
|
262
|
+
await oramaUpdate(db, rowsToEmbed[i].id, {
|
|
263
|
+
id: doc.id,
|
|
264
|
+
path: doc.path,
|
|
265
|
+
content: doc.content,
|
|
266
|
+
snippet: doc.snippet,
|
|
267
|
+
vector: vec,
|
|
268
|
+
vectorProvider: providerIdentity
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
await this.persistDbForCollection(db, collection);
|
|
272
|
+
if (allEmbedded) {
|
|
273
|
+
this.rememberVectorProviderCompatibility(db, providerIdentity, true);
|
|
274
|
+
} else {
|
|
275
|
+
this.rememberVectorProviderCompatibility(db, providerIdentity, false);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
async ensureCollection(_memoryDir, _execution) {
|
|
279
|
+
try {
|
|
280
|
+
await this.ensureModules();
|
|
281
|
+
await this.ensureDb();
|
|
282
|
+
return "present";
|
|
283
|
+
} catch {
|
|
284
|
+
return "missing";
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
async ensureModules() {
|
|
288
|
+
if (this.oramaModule && this.persistModule) return;
|
|
289
|
+
this.oramaModule = await import("@orama/orama");
|
|
290
|
+
this.persistModule = await import("@orama/plugin-data-persistence");
|
|
291
|
+
}
|
|
292
|
+
async ensureDb() {
|
|
293
|
+
if (this.db) return this.db;
|
|
294
|
+
await this.ensureModules();
|
|
295
|
+
await mkdir(this.dbPath, { recursive: true });
|
|
296
|
+
const filePath = this.dbFilePath(this.collection);
|
|
297
|
+
let raw;
|
|
298
|
+
try {
|
|
299
|
+
raw = await readFile(filePath, "utf-8");
|
|
300
|
+
} catch {
|
|
301
|
+
this.db = await this.createDb();
|
|
302
|
+
return this.db;
|
|
303
|
+
}
|
|
304
|
+
this.db = await this.migrateLegacyVectorProviderSchema(
|
|
305
|
+
await this.persistModule.restore("json", raw),
|
|
306
|
+
this.collection
|
|
307
|
+
);
|
|
308
|
+
return this.db;
|
|
309
|
+
}
|
|
310
|
+
async ensureDbForCollection(collection) {
|
|
311
|
+
if (collection === this.collection) return this.ensureDb();
|
|
312
|
+
await this.ensureModules();
|
|
313
|
+
await mkdir(this.dbPath, { recursive: true });
|
|
314
|
+
const filePath = this.dbFilePath(collection);
|
|
315
|
+
let raw;
|
|
316
|
+
try {
|
|
317
|
+
raw = await readFile(filePath, "utf-8");
|
|
318
|
+
} catch {
|
|
319
|
+
return await this.createDb();
|
|
320
|
+
}
|
|
321
|
+
return await this.migrateLegacyVectorProviderSchema(
|
|
322
|
+
await this.persistModule.restore("json", raw),
|
|
323
|
+
collection
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
async createDb() {
|
|
327
|
+
const { create } = this.oramaModule;
|
|
328
|
+
const schema = {
|
|
329
|
+
id: "string",
|
|
330
|
+
path: "string",
|
|
331
|
+
content: "string",
|
|
332
|
+
snippet: "string",
|
|
333
|
+
vectorProvider: "string",
|
|
334
|
+
vector: `vector[${this.embeddingDimension}]`
|
|
335
|
+
};
|
|
336
|
+
return await create({ schema });
|
|
337
|
+
}
|
|
338
|
+
async migrateLegacyVectorProviderSchema(db, collection) {
|
|
339
|
+
const { search: oramaSearch, count, insert } = this.oramaModule;
|
|
340
|
+
const existingCount = await count(db);
|
|
341
|
+
if (existingCount === 0) {
|
|
342
|
+
const migrated2 = await this.createDb();
|
|
343
|
+
await this.persistDbForCollection(migrated2, collection);
|
|
344
|
+
return migrated2;
|
|
345
|
+
}
|
|
346
|
+
const allHits = await oramaSearch(db, { term: "", limit: existingCount + 100 });
|
|
347
|
+
const hits = allHits.hits ?? [];
|
|
348
|
+
const needsMigration = hits.some(
|
|
349
|
+
(hit) => typeof hit.document?.vectorProvider !== "string"
|
|
350
|
+
);
|
|
351
|
+
if (!needsMigration) return db;
|
|
352
|
+
const migrated = await this.createDb();
|
|
353
|
+
for (const hit of hits) {
|
|
354
|
+
const doc = this.getStoredDocument(db, hit);
|
|
355
|
+
const vector = this.getStoredVector(db, hit, doc);
|
|
356
|
+
const payload = {
|
|
357
|
+
id: typeof doc.id === "string" && doc.id.length > 0 ? doc.id : String(hit.id),
|
|
358
|
+
path: typeof doc.path === "string" ? doc.path : "",
|
|
359
|
+
content: typeof doc.content === "string" ? doc.content : "",
|
|
360
|
+
snippet: typeof doc.snippet === "string" ? doc.snippet : typeof doc.content === "string" ? doc.content.slice(0, 200) : "",
|
|
361
|
+
vectorProvider: typeof doc.vectorProvider === "string" ? doc.vectorProvider : ""
|
|
362
|
+
};
|
|
363
|
+
if (vector) {
|
|
364
|
+
payload.vector = vector;
|
|
365
|
+
} else {
|
|
366
|
+
payload.vector = this.zeroVector();
|
|
367
|
+
}
|
|
368
|
+
await insert(migrated, payload);
|
|
369
|
+
}
|
|
370
|
+
await this.persistDbForCollection(migrated, collection);
|
|
371
|
+
return migrated;
|
|
372
|
+
}
|
|
373
|
+
async persistDbForCollection(db, collection) {
|
|
374
|
+
const data = await this.persistModule.persist(db, "json");
|
|
375
|
+
const filePath = this.dbFilePath(collection);
|
|
376
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
377
|
+
const tempPath = path.join(
|
|
378
|
+
path.dirname(filePath),
|
|
379
|
+
`.${path.basename(filePath)}.${process.pid}.${Date.now()}.${Math.random().toString(16).slice(2)}.tmp`
|
|
380
|
+
);
|
|
381
|
+
try {
|
|
382
|
+
await writeFile(tempPath, data, "utf-8");
|
|
383
|
+
await rename(tempPath, filePath);
|
|
384
|
+
} catch (err) {
|
|
385
|
+
await rm(tempPath, { force: true }).catch(() => void 0);
|
|
386
|
+
throw err;
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
dbFilePath(collection) {
|
|
390
|
+
return resolveOramaCollectionDbFilePath(this.dbPath, collection);
|
|
391
|
+
}
|
|
392
|
+
async listDbFiles() {
|
|
393
|
+
try {
|
|
394
|
+
const entries = await readdir(this.dbPath);
|
|
395
|
+
return entries.filter((e) => e.endsWith(".msp")).map((e) => path.join(this.dbPath, e));
|
|
396
|
+
} catch {
|
|
397
|
+
return [];
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
async loadDbFromFile(filePath) {
|
|
401
|
+
try {
|
|
402
|
+
await this.ensureModules();
|
|
403
|
+
const raw = await readFile(filePath, "utf-8");
|
|
404
|
+
const collection = path.basename(filePath, ".msp");
|
|
405
|
+
return await this.migrateLegacyVectorProviderSchema(
|
|
406
|
+
await this.persistModule.restore("json", raw),
|
|
407
|
+
collection
|
|
408
|
+
);
|
|
409
|
+
} catch (err) {
|
|
410
|
+
log.debug(`OramaBackend failed to load ${filePath}: ${err}`);
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
async searchDb(db, query, mode, limit, execution) {
|
|
415
|
+
const { search: oramaSearch } = this.oramaModule;
|
|
416
|
+
try {
|
|
417
|
+
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
418
|
+
let searchParams;
|
|
419
|
+
if (mode === "fulltext") {
|
|
420
|
+
searchParams = { term: query, limit };
|
|
421
|
+
} else if (mode === "vector") {
|
|
422
|
+
const embedResult = await this.resolveCompatibleQueryEmbedding(db, query, execution);
|
|
423
|
+
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
424
|
+
if (!embedResult) {
|
|
425
|
+
searchParams = { term: query, limit };
|
|
426
|
+
} else {
|
|
427
|
+
searchParams = { mode: "vector", vector: { value: embedResult.vector, property: "vector" }, limit };
|
|
428
|
+
}
|
|
429
|
+
} else {
|
|
430
|
+
const embedResult = await this.resolveCompatibleQueryEmbedding(db, query, execution);
|
|
431
|
+
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
432
|
+
if (!embedResult) {
|
|
433
|
+
searchParams = { term: query, limit };
|
|
434
|
+
} else {
|
|
435
|
+
searchParams = { mode: "hybrid", term: query, vector: { value: embedResult.vector, property: "vector" }, limit };
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
439
|
+
const result = await oramaSearch(db, searchParams);
|
|
440
|
+
throwIfSearchAborted(execution, `OramaBackend ${mode} search aborted`);
|
|
441
|
+
return (result.hits ?? []).map((hit) => ({
|
|
442
|
+
docid: hit.document?.id ?? "",
|
|
443
|
+
path: hit.document?.path ?? "",
|
|
444
|
+
snippet: hit.document?.snippet ?? hit.document?.content?.slice(0, 200) ?? "",
|
|
445
|
+
score: hit.score ?? 0
|
|
446
|
+
}));
|
|
447
|
+
} catch (err) {
|
|
448
|
+
log.debug(`OramaBackend search (${mode}) failed: ${err}`);
|
|
449
|
+
return [];
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
async resolveCompatibleQueryEmbedding(db, query, execution) {
|
|
453
|
+
const embedResult = await this.embedHelper.embedWithProvider(query, { signal: execution?.signal });
|
|
454
|
+
throwIfSearchAborted(execution, "OramaBackend query embedding aborted");
|
|
455
|
+
if (!embedResult || !this.isExpectedDimensionVector(embedResult.vector)) return null;
|
|
456
|
+
const storedProviderIdentity = await this.findCompatibleStoredVectorProvider(db, execution);
|
|
457
|
+
if (!storedProviderIdentity) {
|
|
458
|
+
this.rememberVectorProviderCompatibility(db, embedResult.providerIdentity, false);
|
|
459
|
+
return null;
|
|
460
|
+
}
|
|
461
|
+
if (storedProviderIdentity === embedResult.providerIdentity) return embedResult;
|
|
462
|
+
const fallbackEmbed = await this.embedQueryWithStoredFallbackProvider(query, storedProviderIdentity, execution);
|
|
463
|
+
throwIfSearchAborted(execution, "OramaBackend fallback query embedding aborted");
|
|
464
|
+
if (fallbackEmbed && fallbackEmbed.providerIdentity === storedProviderIdentity && this.isExpectedDimensionVector(fallbackEmbed.vector)) {
|
|
465
|
+
return fallbackEmbed;
|
|
466
|
+
}
|
|
467
|
+
this.rememberVectorProviderCompatibility(db, embedResult.providerIdentity, false);
|
|
468
|
+
return null;
|
|
469
|
+
}
|
|
470
|
+
async embedQueryWithStoredFallbackProvider(query, providerIdentity, execution) {
|
|
471
|
+
const embedWithIdentity = this.embedHelper.embedWithFallbackProviderIdentity;
|
|
472
|
+
if (typeof embedWithIdentity !== "function") return null;
|
|
473
|
+
return embedWithIdentity.call(this.embedHelper, query, providerIdentity, { signal: execution?.signal });
|
|
474
|
+
}
|
|
475
|
+
async findCompatibleStoredVectorProvider(db, execution) {
|
|
476
|
+
const { search: oramaSearch, count } = this.oramaModule;
|
|
477
|
+
try {
|
|
478
|
+
const cached = this.vectorProviderCompatibility.get(db);
|
|
479
|
+
if (cached?.compatible) return cached.providerIdentity;
|
|
480
|
+
const existingCount = await count(db);
|
|
481
|
+
if (existingCount === 0) return null;
|
|
482
|
+
const allHits = await oramaSearch(db, {
|
|
483
|
+
term: "",
|
|
484
|
+
limit: existingCount + 100,
|
|
485
|
+
properties: ["vectorProvider"]
|
|
486
|
+
});
|
|
487
|
+
let providerIdentity = null;
|
|
488
|
+
let compatible = (allHits.hits ?? []).length > 0;
|
|
489
|
+
for (const hit of allHits.hits ?? []) {
|
|
490
|
+
throwIfSearchAborted(execution, "OramaBackend vector provider check aborted");
|
|
491
|
+
const doc = this.getStoredDocument(db, hit);
|
|
492
|
+
if (typeof doc.vectorProvider !== "string" || doc.vectorProvider.length === 0 || !this.isCompatibleStoredVector(this.getStoredVector(db, hit, doc))) {
|
|
493
|
+
compatible = false;
|
|
494
|
+
break;
|
|
495
|
+
}
|
|
496
|
+
if (providerIdentity && doc.vectorProvider !== providerIdentity) {
|
|
497
|
+
compatible = false;
|
|
498
|
+
break;
|
|
499
|
+
}
|
|
500
|
+
providerIdentity = doc.vectorProvider;
|
|
501
|
+
}
|
|
502
|
+
if (compatible && providerIdentity) {
|
|
503
|
+
this.vectorProviderCompatibility.set(db, {
|
|
504
|
+
providerIdentity,
|
|
505
|
+
compatible: true
|
|
506
|
+
});
|
|
507
|
+
return providerIdentity;
|
|
508
|
+
}
|
|
509
|
+
return null;
|
|
510
|
+
} catch (err) {
|
|
511
|
+
if (isSearchAborted(execution)) throw err;
|
|
512
|
+
log.debug(`OramaBackend stored vector provider check failed: ${err}`);
|
|
513
|
+
return null;
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
async dbHasCompatibleVectors(db, providerIdentity, execution) {
|
|
517
|
+
const { search: oramaSearch, count } = this.oramaModule;
|
|
518
|
+
try {
|
|
519
|
+
const cached = this.vectorProviderCompatibility.get(db);
|
|
520
|
+
if (cached?.providerIdentity === providerIdentity) return cached.compatible;
|
|
521
|
+
const existingCount = await count(db);
|
|
522
|
+
if (existingCount === 0) return false;
|
|
523
|
+
const allHits = await oramaSearch(db, {
|
|
524
|
+
term: "",
|
|
525
|
+
limit: existingCount + 100,
|
|
526
|
+
properties: ["vectorProvider"]
|
|
527
|
+
});
|
|
528
|
+
let compatible = (allHits.hits ?? []).length > 0;
|
|
529
|
+
for (const hit of allHits.hits ?? []) {
|
|
530
|
+
throwIfSearchAborted(execution, "OramaBackend vector provider check aborted");
|
|
531
|
+
const doc = this.getStoredDocument(db, hit);
|
|
532
|
+
if (doc.vectorProvider !== providerIdentity || !this.isCompatibleStoredVector(this.getStoredVector(db, hit, doc))) {
|
|
533
|
+
compatible = false;
|
|
534
|
+
break;
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
this.vectorProviderCompatibility.set(db, { providerIdentity, compatible });
|
|
538
|
+
return compatible;
|
|
539
|
+
} catch (err) {
|
|
540
|
+
if (isSearchAborted(execution)) throw err;
|
|
541
|
+
log.debug(`OramaBackend vector provider check failed: ${err}`);
|
|
542
|
+
return false;
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
rememberVectorProviderCompatibility(db, providerIdentity, compatible) {
|
|
546
|
+
if (!db || typeof db !== "object") return;
|
|
547
|
+
if (!providerIdentity) {
|
|
548
|
+
this.vectorProviderCompatibility.delete(db);
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
this.vectorProviderCompatibility.set(db, { providerIdentity, compatible });
|
|
552
|
+
}
|
|
553
|
+
getStoredDocument(db, hit) {
|
|
554
|
+
const internalId = this.getInternalDocumentId(db, hit);
|
|
555
|
+
const internalDoc = internalId !== void 0 && internalId !== null ? db?.data?.docs?.docs?.[String(internalId)] : void 0;
|
|
556
|
+
if (internalDoc && typeof internalDoc === "object") {
|
|
557
|
+
return internalDoc;
|
|
558
|
+
}
|
|
559
|
+
return hit?.document && typeof hit.document === "object" ? hit.document : {};
|
|
560
|
+
}
|
|
561
|
+
getStoredVector(db, hit, doc) {
|
|
562
|
+
const documentVector = this.normalizeStoredVector(doc.vector);
|
|
563
|
+
if (documentVector) return documentVector;
|
|
564
|
+
const internalId = this.getInternalDocumentId(db, hit);
|
|
565
|
+
if (internalId === void 0 || internalId === null) return null;
|
|
566
|
+
const vectorEntry = db?.data?.index?.vectorIndexes?.vector?.node?.vectors?.get?.(internalId);
|
|
567
|
+
const vector = Array.isArray(vectorEntry) ? vectorEntry[1] : vectorEntry;
|
|
568
|
+
return this.normalizeStoredVector(vector);
|
|
569
|
+
}
|
|
570
|
+
getInternalDocumentId(db, hit) {
|
|
571
|
+
const publicId = typeof hit?.id === "string" ? hit.id : typeof hit?.document?.id === "string" ? hit.document.id : void 0;
|
|
572
|
+
return publicId && typeof db?.internalDocumentIDStore?.idToInternalId?.get === "function" ? db.internalDocumentIDStore.idToInternalId.get(publicId) : void 0;
|
|
573
|
+
}
|
|
574
|
+
isExpectedDimensionVector(vector) {
|
|
575
|
+
return Array.isArray(vector) && vector.length === this.embeddingDimension;
|
|
576
|
+
}
|
|
577
|
+
isCompatibleStoredVector(vector) {
|
|
578
|
+
if (!vector || typeof vector !== "object") return false;
|
|
579
|
+
const arr = Array.from(vector);
|
|
580
|
+
return arr.length === this.embeddingDimension && arr.every((value) => Number.isFinite(value)) && arr.some((value) => value !== 0);
|
|
581
|
+
}
|
|
582
|
+
zeroVector() {
|
|
583
|
+
return Array.from({ length: this.embeddingDimension }, () => 0);
|
|
584
|
+
}
|
|
585
|
+
normalizeStoredVector(vector) {
|
|
586
|
+
const values = Array.isArray(vector) ? vector : ArrayBuffer.isView(vector) && !(vector instanceof DataView) ? Array.from(vector) : null;
|
|
587
|
+
if (!values || values.length !== this.embeddingDimension) return null;
|
|
588
|
+
const normalized = values.map((value) => Number(value));
|
|
589
|
+
return normalized.every((value) => Number.isFinite(value)) ? normalized : null;
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
export {
|
|
594
|
+
resolveOramaCollectionDbFilePath,
|
|
595
|
+
OramaBackend
|
|
596
|
+
};
|
|
597
|
+
//# sourceMappingURL=chunk-M46RYSMW.js.map
|