@yoch/frozenminisearch 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +1 -1
- package/dist/cjs/index.cjs +444 -260
- package/dist/es/index.d.ts +16 -1
- package/dist/es/index.js +444 -260
- package/package.json +1 -1
package/dist/es/index.js
CHANGED
|
@@ -2016,24 +2016,33 @@ function findSparseSlotByFieldId(fieldIds, start, end, fieldId) {
|
|
|
2016
2016
|
}
|
|
2017
2017
|
return -1;
|
|
2018
2018
|
}
|
|
2019
|
-
/**
|
|
2020
|
-
|
|
2019
|
+
/** Reusable scratch for {@link resolvePostingSlice} (scoring is synchronous). */
|
|
2020
|
+
const postingSliceScratch = { offset: 0, length: 0 };
|
|
2021
|
+
/**
|
|
2022
|
+
* Resolve one (termIndex, fieldId) posting run in flat buffers; writes into `out` without allocating.
|
|
2023
|
+
* @returns false when the slot is empty or missing
|
|
2024
|
+
*/
|
|
2025
|
+
function resolvePostingSlice(layout, termIndex, fieldId, out) {
|
|
2021
2026
|
if (layout.layout === 'dense') {
|
|
2022
2027
|
const base = termIndex * layout.fieldCount + fieldId;
|
|
2023
2028
|
const len = layout.denseLengths[base];
|
|
2024
2029
|
if (len === 0)
|
|
2025
|
-
return
|
|
2026
|
-
|
|
2030
|
+
return false;
|
|
2031
|
+
out.offset = layout.denseOffsets[base];
|
|
2032
|
+
out.length = len;
|
|
2033
|
+
return true;
|
|
2027
2034
|
}
|
|
2028
2035
|
const start = layout.sparseTermStarts[termIndex];
|
|
2029
2036
|
const end = layout.sparseTermStarts[termIndex + 1];
|
|
2030
2037
|
const slot = findSparseSlotByFieldId(layout.sparseFieldIds, start, end, fieldId);
|
|
2031
2038
|
if (slot < 0)
|
|
2032
|
-
return
|
|
2039
|
+
return false;
|
|
2033
2040
|
const len = layout.sparseLengths[slot];
|
|
2034
2041
|
if (len === 0)
|
|
2035
|
-
return
|
|
2036
|
-
|
|
2042
|
+
return false;
|
|
2043
|
+
out.offset = layout.sparseOffsets[slot];
|
|
2044
|
+
out.length = len;
|
|
2045
|
+
return true;
|
|
2037
2046
|
}
|
|
2038
2047
|
/**
|
|
2039
2048
|
* One flyweight wrapper for the lifetime of a frozen index. Call {@link bind} before each
|
|
@@ -2049,10 +2058,9 @@ function createFrozenFieldTermFlyweight(layout) {
|
|
|
2049
2058
|
return flyweight;
|
|
2050
2059
|
},
|
|
2051
2060
|
get(fieldId) {
|
|
2052
|
-
|
|
2053
|
-
if (slice == null)
|
|
2061
|
+
if (!resolvePostingSlice(layout, termIndex, fieldId, postingSliceScratch))
|
|
2054
2062
|
return undefined;
|
|
2055
|
-
return segment.rebind(
|
|
2063
|
+
return segment.rebind(postingSliceScratch.offset, postingSliceScratch.length);
|
|
2056
2064
|
},
|
|
2057
2065
|
};
|
|
2058
2066
|
return flyweight;
|
|
@@ -2071,10 +2079,9 @@ function collectDocIdsFromFrozenSegment(allDocIds, offset, length, context, docI
|
|
|
2071
2079
|
function collectDocIdsFromFrozenLayout(layout, termIndex, fieldBoosts, context, docIds, allowedDocs) {
|
|
2072
2080
|
const { fieldIds } = context;
|
|
2073
2081
|
for (const field of fieldBoosts.names) {
|
|
2074
|
-
|
|
2075
|
-
if (slice == null)
|
|
2082
|
+
if (!resolvePostingSlice(layout, termIndex, fieldIds[field], postingSliceScratch))
|
|
2076
2083
|
continue;
|
|
2077
|
-
collectDocIdsFromFrozenSegment(layout.allDocIds,
|
|
2084
|
+
collectDocIdsFromFrozenSegment(layout.allDocIds, postingSliceScratch.offset, postingSliceScratch.length, context, docIds, allowedDocs);
|
|
2078
2085
|
}
|
|
2079
2086
|
}
|
|
2080
2087
|
|
|
@@ -2228,22 +2235,387 @@ function collectFieldTermFreqsFromFieldInto(localFreqs, tokenScratch, tokenize,
|
|
|
2228
2235
|
tokenizeFieldInto(tokenScratch, tokenize, text, fieldName);
|
|
2229
2236
|
return collectFieldTermFreqsInto(localFreqs, tokenScratch, fieldName, processTerm);
|
|
2230
2237
|
}
|
|
2231
|
-
/** Same running average as {@link MiniSearch} private addFieldLength. */
|
|
2232
2238
|
function updateAvgFieldLength(avgFieldLength, fieldId, count, length) {
|
|
2233
2239
|
const averageFieldLength = avgFieldLength[fieldId] || 0;
|
|
2234
2240
|
const totalFieldLength = (averageFieldLength * count) + length;
|
|
2235
2241
|
avgFieldLength[fieldId] = totalFieldLength / (count + 1);
|
|
2236
2242
|
}
|
|
2237
|
-
|
|
2243
|
+
|
|
2244
|
+
function validateTreeShape(shape, termCount) {
|
|
2245
|
+
if (!Array.isArray(shape)) {
|
|
2246
|
+
throw invalidFrozenIndex('treeShape node must be an array');
|
|
2247
|
+
}
|
|
2248
|
+
for (const entry of shape) {
|
|
2249
|
+
if (!Array.isArray(entry) || entry.length !== 2) {
|
|
2250
|
+
throw invalidFrozenIndex('treeShape entry must be a [key, value] pair');
|
|
2251
|
+
}
|
|
2252
|
+
const [key, value] = entry;
|
|
2253
|
+
if (key === LEAF) {
|
|
2254
|
+
const idx = value;
|
|
2255
|
+
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2256
|
+
throw invalidFrozenIndex(`treeShape leaf term index out of range: ${idx}`);
|
|
2257
|
+
}
|
|
2258
|
+
}
|
|
2259
|
+
else {
|
|
2260
|
+
validateTreeShape(value, termCount);
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2263
|
+
}
|
|
2264
|
+
function termCountOf(snap) {
|
|
2265
|
+
return snap.postings.termCount;
|
|
2266
|
+
}
|
|
2267
|
+
/**
|
|
2268
|
+
* Numeric/structural invariants shared by both the decode path (untrusted binary)
|
|
2269
|
+
* and the build path (trusted internal code).
|
|
2270
|
+
*/
|
|
2271
|
+
function validateFrozenSnapshotNumeric(snap) {
|
|
2272
|
+
if (snap.fieldCount <= 0) {
|
|
2273
|
+
throw invalidFrozenIndex('fieldCount must be positive');
|
|
2274
|
+
}
|
|
2275
|
+
if (snap.nextId < 0 || snap.nextId >= 0xffffffff) {
|
|
2276
|
+
throw invalidFrozenIndex('nextId out of range');
|
|
2277
|
+
}
|
|
2278
|
+
if (snap.documentCount < 0 || snap.documentCount > snap.nextId) {
|
|
2279
|
+
throw invalidFrozenIndex('documentCount inconsistent with nextId');
|
|
2280
|
+
}
|
|
2281
|
+
if (snap.fieldLengthMatrix.length !== snap.nextId * snap.fieldCount) {
|
|
2282
|
+
throw invalidFrozenIndex('fieldLengthMatrix size mismatch');
|
|
2283
|
+
}
|
|
2284
|
+
if (snap.avgFieldLength.length !== snap.fieldCount) {
|
|
2285
|
+
throw invalidFrozenIndex('avgFieldLength size mismatch');
|
|
2286
|
+
}
|
|
2287
|
+
validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
|
|
2288
|
+
throw invalidFrozenIndex(detail);
|
|
2289
|
+
});
|
|
2290
|
+
const indexedFields = Object.keys(snap.fieldIds);
|
|
2291
|
+
if (indexedFields.length !== snap.fieldCount) {
|
|
2292
|
+
throw invalidFrozenIndex('fieldIds count mismatch');
|
|
2293
|
+
}
|
|
2294
|
+
for (let f = 0; f < snap.fieldCount; f++) {
|
|
2295
|
+
const found = indexedFields.some(name => snap.fieldIds[name] === f);
|
|
2296
|
+
if (!found) {
|
|
2297
|
+
throw invalidFrozenIndex(`missing field id ${f}`);
|
|
2298
|
+
}
|
|
2299
|
+
}
|
|
2300
|
+
}
|
|
2301
|
+
function readFieldNamesSection(buf, fieldNamesOff, fieldCount, externalIdsOff) {
|
|
2302
|
+
const fieldNames = [];
|
|
2303
|
+
let o = fieldNamesOff;
|
|
2304
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
2305
|
+
const { value, next } = readLengthPrefixedUtf8(buf, o);
|
|
2306
|
+
fieldNames.push(value);
|
|
2307
|
+
o = next;
|
|
2308
|
+
}
|
|
2309
|
+
if (o !== externalIdsOff) {
|
|
2310
|
+
throw invalidFrozenIndex('field names section size mismatch');
|
|
2311
|
+
}
|
|
2312
|
+
return fieldNames;
|
|
2313
|
+
}
|
|
2314
|
+
function readExternalIdsSection(buf, externalIdsOff, nextId, storedOff) {
|
|
2315
|
+
const externalIds = new Array(nextId);
|
|
2316
|
+
let o = externalIdsOff;
|
|
2317
|
+
for (let i = 0; i < nextId; i++) {
|
|
2318
|
+
const { value, next } = readExternalId(buf, o);
|
|
2319
|
+
externalIds[i] = value;
|
|
2320
|
+
o = next;
|
|
2321
|
+
}
|
|
2322
|
+
if (o !== storedOff) {
|
|
2323
|
+
throw invalidFrozenIndex('external ids section size mismatch');
|
|
2324
|
+
}
|
|
2325
|
+
return externalIds;
|
|
2326
|
+
}
|
|
2327
|
+
function readStoredFieldsSection(buf, storedOff, nextId, sectionEnd) {
|
|
2328
|
+
const storedFields = new Array(nextId);
|
|
2329
|
+
const tableEnd = storedOff + nextId * 4;
|
|
2330
|
+
if (tableEnd > sectionEnd) {
|
|
2331
|
+
throw invalidFrozenIndex('stored fields table out of bounds');
|
|
2332
|
+
}
|
|
2333
|
+
for (let i = 0; i < nextId; i++) {
|
|
2334
|
+
const rel = buf.readUInt32LE(storedOff + i * 4);
|
|
2335
|
+
if (rel === 0) {
|
|
2336
|
+
storedFields[i] = undefined;
|
|
2337
|
+
continue;
|
|
2338
|
+
}
|
|
2339
|
+
const entryOff = tableEnd + rel - 1;
|
|
2340
|
+
if (entryOff + 4 > sectionEnd) {
|
|
2341
|
+
throw invalidFrozenIndex('stored fields entry offset out of bounds');
|
|
2342
|
+
}
|
|
2343
|
+
const jsonLen = buf.readUInt32LE(entryOff);
|
|
2344
|
+
const jsonStart = entryOff + 4;
|
|
2345
|
+
const jsonEnd = jsonStart + jsonLen;
|
|
2346
|
+
if (jsonEnd > sectionEnd) {
|
|
2347
|
+
throw invalidFrozenIndex('stored fields JSON out of bounds');
|
|
2348
|
+
}
|
|
2349
|
+
storedFields[i] = JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
|
|
2350
|
+
}
|
|
2351
|
+
return storedFields;
|
|
2352
|
+
}
|
|
2353
|
+
/** Validate structural invariants of a decoded or assembled frozen snapshot. */
|
|
2354
|
+
function validateFrozenSnapshot(snap) {
|
|
2355
|
+
validateFrozenSnapshotNumeric(snap);
|
|
2356
|
+
const termCount = termCountOf(snap);
|
|
2357
|
+
if (snap.packedTermIndex != null) {
|
|
2358
|
+
validateFrozenTermIndexLeaves(snap.packedTermIndex, termCount);
|
|
2359
|
+
}
|
|
2360
|
+
else if (snap.termTree != null) {
|
|
2361
|
+
validateTermTreeLeaves(snap.termTree, termCount);
|
|
2362
|
+
}
|
|
2363
|
+
else {
|
|
2364
|
+
validateTreeShape(snap.treeShape, termCount);
|
|
2365
|
+
}
|
|
2366
|
+
}
|
|
2367
|
+
function fieldNamesFromFieldIds(fieldIds) {
|
|
2368
|
+
const names = Object.keys(fieldIds);
|
|
2369
|
+
names.sort((a, b) => fieldIds[a] - fieldIds[b]);
|
|
2370
|
+
return names;
|
|
2371
|
+
}
|
|
2372
|
+
/** Core with explicit {@link termCountOf} (no dictionary section). */
|
|
2373
|
+
function buildCoreSectionWithTermCount(snap) {
|
|
2374
|
+
const out = Buffer.alloc(16);
|
|
2375
|
+
out.writeUInt32LE(snap.documentCount, 0);
|
|
2376
|
+
out.writeUInt32LE(snap.nextId, 4);
|
|
2377
|
+
out.writeUInt32LE(snap.fieldCount, 8);
|
|
2378
|
+
out.writeUInt32LE(termCountOf(snap), 12);
|
|
2379
|
+
return out;
|
|
2380
|
+
}
|
|
2381
|
+
function buildFieldNamesSection(fieldNames) {
|
|
2382
|
+
const chunks = [];
|
|
2383
|
+
for (const name of fieldNames) {
|
|
2384
|
+
const body = Buffer.from(name, 'utf8');
|
|
2385
|
+
const header = Buffer.alloc(4);
|
|
2386
|
+
header.writeUInt32LE(body.length, 0);
|
|
2387
|
+
chunks.push(header, body);
|
|
2388
|
+
}
|
|
2389
|
+
return Buffer.concat(chunks);
|
|
2390
|
+
}
|
|
2391
|
+
function buildExternalIdsSection(externalIds, nextId) {
|
|
2392
|
+
const chunks = [];
|
|
2393
|
+
for (let i = 0; i < nextId; i++) {
|
|
2394
|
+
writeExternalId(chunks, externalIds[i]);
|
|
2395
|
+
}
|
|
2396
|
+
return Buffer.concat(chunks);
|
|
2397
|
+
}
|
|
2398
|
+
function buildStoredFieldsSection(storedFields, nextId) {
|
|
2399
|
+
const table = Buffer.alloc(nextId * 4);
|
|
2400
|
+
const heapChunks = [];
|
|
2401
|
+
let heapOff = 0;
|
|
2402
|
+
for (let i = 0; i < nextId; i++) {
|
|
2403
|
+
const row = storedFields[i];
|
|
2404
|
+
if (row == null) {
|
|
2405
|
+
table.writeUInt32LE(0, i * 4);
|
|
2406
|
+
continue;
|
|
2407
|
+
}
|
|
2408
|
+
table.writeUInt32LE(heapOff + 1, i * 4);
|
|
2409
|
+
const json = Buffer.from(JSON.stringify(row), 'utf8');
|
|
2410
|
+
const entry = Buffer.alloc(4 + json.length);
|
|
2411
|
+
entry.writeUInt32LE(json.length, 0);
|
|
2412
|
+
json.copy(entry, 4);
|
|
2413
|
+
heapChunks.push(entry);
|
|
2414
|
+
heapOff += entry.length;
|
|
2415
|
+
}
|
|
2416
|
+
return Buffer.concat([table, ...heapChunks]);
|
|
2417
|
+
}
|
|
2418
|
+
function validateTermTreeLeaves(tree, termCount) {
|
|
2419
|
+
for (const [key, val] of tree) {
|
|
2420
|
+
if (key === LEAF) {
|
|
2421
|
+
const idx = val;
|
|
2422
|
+
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2423
|
+
throw invalidFrozenIndex(`term tree leaf index out of range: ${idx}`);
|
|
2424
|
+
}
|
|
2425
|
+
}
|
|
2426
|
+
else {
|
|
2427
|
+
validateTermTreeLeaves(val, termCount);
|
|
2428
|
+
}
|
|
2429
|
+
}
|
|
2430
|
+
}
|
|
2431
|
+
function deserializeTermIndexTree(shape) {
|
|
2432
|
+
const tree = new Map();
|
|
2433
|
+
for (const [key, value] of shape) {
|
|
2434
|
+
if (key === LEAF) {
|
|
2435
|
+
tree.set(LEAF, value);
|
|
2436
|
+
}
|
|
2437
|
+
else {
|
|
2438
|
+
tree.set(key, deserializeTermIndexTree(value));
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
return tree;
|
|
2442
|
+
}
|
|
2443
|
+
|
|
2444
|
+
/**
|
|
2445
|
+
* Runtime stored fields. Single store field → one column (no per-doc Record at rest).
|
|
2446
|
+
* Wire format stays row JSON; encode/decode can skip intermediate row arrays when layout is known.
|
|
2447
|
+
*/
|
|
2448
|
+
function createStoredFieldsLayout(storeFields, capacity = 0) {
|
|
2238
2449
|
if (storeFields.length === 0)
|
|
2450
|
+
return { kind: 'none' };
|
|
2451
|
+
if (storeFields.length === 1) {
|
|
2452
|
+
return { kind: 'single', field: storeFields[0], values: new Array(capacity) };
|
|
2453
|
+
}
|
|
2454
|
+
return { kind: 'multi', rows: new Array(capacity) };
|
|
2455
|
+
}
|
|
2456
|
+
function writeStoredField(layout, shortId, storeFields, extractField, document) {
|
|
2457
|
+
if (layout.kind === 'none')
|
|
2458
|
+
return;
|
|
2459
|
+
if (layout.kind === 'single') {
|
|
2460
|
+
layout.values[shortId] = extractField(document, layout.field);
|
|
2461
|
+
return;
|
|
2462
|
+
}
|
|
2463
|
+
const row = {};
|
|
2464
|
+
for (const name of storeFields) {
|
|
2465
|
+
const value = extractField(document, name);
|
|
2466
|
+
if (value !== undefined)
|
|
2467
|
+
row[name] = value;
|
|
2468
|
+
}
|
|
2469
|
+
layout.rows[shortId] = row;
|
|
2470
|
+
}
|
|
2471
|
+
/** Materialize API/wire row for one document. */
|
|
2472
|
+
function readStoredFields(layout, shortId) {
|
|
2473
|
+
if (layout.kind === 'none')
|
|
2239
2474
|
return undefined;
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2475
|
+
if (layout.kind === 'multi')
|
|
2476
|
+
return layout.rows[shortId];
|
|
2477
|
+
const value = layout.values[shortId];
|
|
2478
|
+
if (value === undefined)
|
|
2479
|
+
return {};
|
|
2480
|
+
return { [layout.field]: value };
|
|
2481
|
+
}
|
|
2482
|
+
function resizeStoredFields(layout, length) {
|
|
2483
|
+
if (layout.kind === 'none')
|
|
2484
|
+
return layout;
|
|
2485
|
+
if (layout.kind === 'single') {
|
|
2486
|
+
return layout.values.length <= length
|
|
2487
|
+
? layout
|
|
2488
|
+
: { kind: 'single', field: layout.field, values: layout.values.slice(0, length) };
|
|
2489
|
+
}
|
|
2490
|
+
return layout.rows.length <= length
|
|
2491
|
+
? layout
|
|
2492
|
+
: { kind: 'multi', rows: layout.rows.slice(0, length) };
|
|
2493
|
+
}
|
|
2494
|
+
function cloneStoredFields(layout) {
|
|
2495
|
+
if (layout.kind === 'none')
|
|
2496
|
+
return layout;
|
|
2497
|
+
if (layout.kind === 'single') {
|
|
2498
|
+
return { kind: 'single', field: layout.field, values: layout.values.slice() };
|
|
2499
|
+
}
|
|
2500
|
+
return { kind: 'multi', rows: layout.rows.slice() };
|
|
2501
|
+
}
|
|
2502
|
+
/** Import from wire rows or lucaong snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
|
|
2503
|
+
function storedFieldsFromRows(rows, storeFields) {
|
|
2504
|
+
if (storeFields.length === 0) {
|
|
2505
|
+
const hasAny = rows.some(row => row != null && Object.keys(row).length > 0);
|
|
2506
|
+
return hasAny ? { kind: 'multi', rows } : { kind: 'none' };
|
|
2507
|
+
}
|
|
2508
|
+
if (storeFields.length === 1) {
|
|
2509
|
+
const field = storeFields[0];
|
|
2510
|
+
const values = rows.map(row => row === null || row === void 0 ? void 0 : row[field]);
|
|
2511
|
+
return { kind: 'single', field, values };
|
|
2512
|
+
}
|
|
2513
|
+
return { kind: 'multi', rows };
|
|
2514
|
+
}
|
|
2515
|
+
function storedFieldsJsonBytes(layout) {
|
|
2516
|
+
if (layout.kind === 'none')
|
|
2517
|
+
return 0;
|
|
2518
|
+
if (layout.kind === 'multi') {
|
|
2519
|
+
let total = 0;
|
|
2520
|
+
for (const row of layout.rows) {
|
|
2521
|
+
if (row != null)
|
|
2522
|
+
total += JSON.stringify(row).length;
|
|
2523
|
+
}
|
|
2524
|
+
return total;
|
|
2525
|
+
}
|
|
2526
|
+
let total = 0;
|
|
2527
|
+
const { field, values } = layout;
|
|
2528
|
+
for (let i = 0; i < values.length; i++) {
|
|
2529
|
+
const value = values[i];
|
|
2530
|
+
if (value !== undefined)
|
|
2531
|
+
total += JSON.stringify({ [field]: value }).length;
|
|
2245
2532
|
}
|
|
2246
|
-
return
|
|
2533
|
+
return total;
|
|
2534
|
+
}
|
|
2535
|
+
function storedFieldsSlotCount(layout) {
|
|
2536
|
+
if (layout.kind === 'none')
|
|
2537
|
+
return 0;
|
|
2538
|
+
return layout.kind === 'single' ? layout.values.length : layout.rows.length;
|
|
2539
|
+
}
|
|
2540
|
+
function appendStoredFieldJsonEntry(table, heapChunks, heapOffRef, docIndex, jsonUtf8) {
|
|
2541
|
+
table.writeUInt32LE(heapOffRef.value + 1, docIndex * 4);
|
|
2542
|
+
const entry = Buffer.alloc(4 + jsonUtf8.length);
|
|
2543
|
+
entry.writeUInt32LE(jsonUtf8.length, 0);
|
|
2544
|
+
jsonUtf8.copy(entry, 4);
|
|
2545
|
+
heapChunks.push(entry);
|
|
2546
|
+
heapOffRef.value += entry.length;
|
|
2547
|
+
}
|
|
2548
|
+
/** MSv5 StoredFields section from {@link StoredFieldsLayout} (no intermediate row array). */
|
|
2549
|
+
function buildStoredFieldsWireSection(layout, nextId) {
|
|
2550
|
+
if (layout.kind === 'multi') {
|
|
2551
|
+
const rows = layout.rows.length >= nextId
|
|
2552
|
+
? layout.rows
|
|
2553
|
+
: layout.rows.concat(new Array(nextId - layout.rows.length));
|
|
2554
|
+
return buildStoredFieldsSection(rows, nextId);
|
|
2555
|
+
}
|
|
2556
|
+
const table = Buffer.alloc(nextId * 4);
|
|
2557
|
+
if (layout.kind === 'none')
|
|
2558
|
+
return table;
|
|
2559
|
+
const heapChunks = [];
|
|
2560
|
+
const heapOffRef = { value: 0 };
|
|
2561
|
+
const { field, values } = layout;
|
|
2562
|
+
for (let i = 0; i < nextId; i++) {
|
|
2563
|
+
const value = values[i];
|
|
2564
|
+
if (value === undefined)
|
|
2565
|
+
continue;
|
|
2566
|
+
const jsonUtf8 = Buffer.from(JSON.stringify({ [field]: value }), 'utf8');
|
|
2567
|
+
appendStoredFieldJsonEntry(table, heapChunks, heapOffRef, i, jsonUtf8);
|
|
2568
|
+
}
|
|
2569
|
+
return heapChunks.length === 0 ? table : Buffer.concat([table, ...heapChunks]);
|
|
2570
|
+
}
|
|
2571
|
+
function storedFieldsTableEnd(storedOff, nextId, sectionEnd) {
|
|
2572
|
+
const tableEnd = storedOff + nextId * 4;
|
|
2573
|
+
if (tableEnd > sectionEnd) {
|
|
2574
|
+
throw invalidFrozenIndex('stored fields table out of bounds');
|
|
2575
|
+
}
|
|
2576
|
+
return tableEnd;
|
|
2577
|
+
}
|
|
2578
|
+
function readStoredFieldJsonAt(buf, tableEnd, sectionEnd, rel) {
|
|
2579
|
+
const entryOff = tableEnd + rel - 1;
|
|
2580
|
+
if (entryOff + 4 > sectionEnd) {
|
|
2581
|
+
throw invalidFrozenIndex('stored fields entry offset out of bounds');
|
|
2582
|
+
}
|
|
2583
|
+
const jsonLen = buf.readUInt32LE(entryOff);
|
|
2584
|
+
const jsonStart = entryOff + 4;
|
|
2585
|
+
const jsonEnd = jsonStart + jsonLen;
|
|
2586
|
+
if (jsonEnd > sectionEnd) {
|
|
2587
|
+
throw invalidFrozenIndex('stored fields JSON out of bounds');
|
|
2588
|
+
}
|
|
2589
|
+
return JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
|
|
2590
|
+
}
|
|
2591
|
+
/** MSv5 StoredFields section → layout (skips row materialization when storeFields hint allows). */
|
|
2592
|
+
function readStoredFieldsWireSection(buf, storedOff, nextId, sectionEnd, storeFields) {
|
|
2593
|
+
const tableEnd = storedFieldsTableEnd(storedOff, nextId, sectionEnd);
|
|
2594
|
+
if (storeFields.length === 1) {
|
|
2595
|
+
const field = storeFields[0];
|
|
2596
|
+
const values = new Array(nextId);
|
|
2597
|
+
for (let i = 0; i < nextId; i++) {
|
|
2598
|
+
const rel = buf.readUInt32LE(storedOff + i * 4);
|
|
2599
|
+
if (rel === 0)
|
|
2600
|
+
continue;
|
|
2601
|
+
const row = readStoredFieldJsonAt(buf, tableEnd, sectionEnd, rel);
|
|
2602
|
+
values[i] = row[field];
|
|
2603
|
+
}
|
|
2604
|
+
return { kind: 'single', field, values };
|
|
2605
|
+
}
|
|
2606
|
+
if (storeFields.length === 0) {
|
|
2607
|
+
let hasAny = false;
|
|
2608
|
+
for (let i = 0; i < nextId; i++) {
|
|
2609
|
+
if (buf.readUInt32LE(storedOff + i * 4) !== 0) {
|
|
2610
|
+
hasAny = true;
|
|
2611
|
+
break;
|
|
2612
|
+
}
|
|
2613
|
+
}
|
|
2614
|
+
if (!hasAny)
|
|
2615
|
+
return { kind: 'none' };
|
|
2616
|
+
}
|
|
2617
|
+
const rows = readStoredFieldsSection(buf, storedOff, nextId, sectionEnd);
|
|
2618
|
+
return storedFieldsFromRows(rows, storeFields);
|
|
2247
2619
|
}
|
|
2248
2620
|
|
|
2249
2621
|
const SUPPORTED_SERIALIZATION_VERSIONS = new Set([1, 2]);
|
|
@@ -2329,7 +2701,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2329
2701
|
let shortIdRemap = null;
|
|
2330
2702
|
const resolvedNextId = useDense ? documentCount : nextId;
|
|
2331
2703
|
const externalIds = new Array(resolvedNextId);
|
|
2332
|
-
const
|
|
2704
|
+
const storedFieldRows = new Array(externalIds.length);
|
|
2333
2705
|
if (useDense) {
|
|
2334
2706
|
shortIdRemap = new Uint32Array(nextId);
|
|
2335
2707
|
shortIdRemap.fill(DISCARDED_DOC_ID);
|
|
@@ -2341,7 +2713,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2341
2713
|
const shortIdStr = String(shortId);
|
|
2342
2714
|
shortIdRemap[shortId] = dense;
|
|
2343
2715
|
externalIds[dense] = snapshot.documentIds[shortIdStr];
|
|
2344
|
-
|
|
2716
|
+
storedFieldRows[dense] = snapshot.storedFields[shortIdStr];
|
|
2345
2717
|
dense++;
|
|
2346
2718
|
}
|
|
2347
2719
|
}
|
|
@@ -2349,7 +2721,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2349
2721
|
for (const [shortIdStr, id] of Object.entries(snapshot.documentIds)) {
|
|
2350
2722
|
const shortId = parseInt(shortIdStr, 10);
|
|
2351
2723
|
externalIds[shortId] = id;
|
|
2352
|
-
|
|
2724
|
+
storedFieldRows[shortId] = snapshot.storedFields[shortIdStr];
|
|
2353
2725
|
}
|
|
2354
2726
|
}
|
|
2355
2727
|
const idLookup = createIdToShortIdLookup(externalIds, resolvedNextId);
|
|
@@ -2372,6 +2744,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2372
2744
|
}
|
|
2373
2745
|
const searchableMap = buildSearchableMapFromSnapshot(snapshot);
|
|
2374
2746
|
const flat = buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, resolvedNextId, shortIdRemap);
|
|
2747
|
+
const storedFields = storedFieldsFromRows(storedFieldRows, opts.storeFields);
|
|
2375
2748
|
return {
|
|
2376
2749
|
options: opts,
|
|
2377
2750
|
documentCount,
|
|
@@ -2790,206 +3163,6 @@ function readMsv5GlobalFlags(buf) {
|
|
|
2790
3163
|
return buf.readUInt16LE(6);
|
|
2791
3164
|
}
|
|
2792
3165
|
|
|
2793
|
-
function validateTreeShape(shape, termCount) {
|
|
2794
|
-
if (!Array.isArray(shape)) {
|
|
2795
|
-
throw invalidFrozenIndex('treeShape node must be an array');
|
|
2796
|
-
}
|
|
2797
|
-
for (const entry of shape) {
|
|
2798
|
-
if (!Array.isArray(entry) || entry.length !== 2) {
|
|
2799
|
-
throw invalidFrozenIndex('treeShape entry must be a [key, value] pair');
|
|
2800
|
-
}
|
|
2801
|
-
const [key, value] = entry;
|
|
2802
|
-
if (key === LEAF) {
|
|
2803
|
-
const idx = value;
|
|
2804
|
-
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2805
|
-
throw invalidFrozenIndex(`treeShape leaf term index out of range: ${idx}`);
|
|
2806
|
-
}
|
|
2807
|
-
}
|
|
2808
|
-
else {
|
|
2809
|
-
validateTreeShape(value, termCount);
|
|
2810
|
-
}
|
|
2811
|
-
}
|
|
2812
|
-
}
|
|
2813
|
-
function termCountOf(snap) {
|
|
2814
|
-
return snap.postings.termCount;
|
|
2815
|
-
}
|
|
2816
|
-
/**
|
|
2817
|
-
* Numeric/structural invariants shared by both the decode path (untrusted binary)
|
|
2818
|
-
* and the build path (trusted internal code).
|
|
2819
|
-
*/
|
|
2820
|
-
function validateFrozenSnapshotNumeric(snap) {
|
|
2821
|
-
if (snap.fieldCount <= 0) {
|
|
2822
|
-
throw invalidFrozenIndex('fieldCount must be positive');
|
|
2823
|
-
}
|
|
2824
|
-
if (snap.nextId < 0 || snap.nextId >= 0xffffffff) {
|
|
2825
|
-
throw invalidFrozenIndex('nextId out of range');
|
|
2826
|
-
}
|
|
2827
|
-
if (snap.documentCount < 0 || snap.documentCount > snap.nextId) {
|
|
2828
|
-
throw invalidFrozenIndex('documentCount inconsistent with nextId');
|
|
2829
|
-
}
|
|
2830
|
-
if (snap.fieldLengthMatrix.length !== snap.nextId * snap.fieldCount) {
|
|
2831
|
-
throw invalidFrozenIndex('fieldLengthMatrix size mismatch');
|
|
2832
|
-
}
|
|
2833
|
-
if (snap.avgFieldLength.length !== snap.fieldCount) {
|
|
2834
|
-
throw invalidFrozenIndex('avgFieldLength size mismatch');
|
|
2835
|
-
}
|
|
2836
|
-
validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
|
|
2837
|
-
throw invalidFrozenIndex(detail);
|
|
2838
|
-
});
|
|
2839
|
-
const indexedFields = Object.keys(snap.fieldIds);
|
|
2840
|
-
if (indexedFields.length !== snap.fieldCount) {
|
|
2841
|
-
throw invalidFrozenIndex('fieldIds count mismatch');
|
|
2842
|
-
}
|
|
2843
|
-
for (let f = 0; f < snap.fieldCount; f++) {
|
|
2844
|
-
const found = indexedFields.some(name => snap.fieldIds[name] === f);
|
|
2845
|
-
if (!found) {
|
|
2846
|
-
throw invalidFrozenIndex(`missing field id ${f}`);
|
|
2847
|
-
}
|
|
2848
|
-
}
|
|
2849
|
-
}
|
|
2850
|
-
function readFieldNamesSection(buf, fieldNamesOff, fieldCount, externalIdsOff) {
|
|
2851
|
-
const fieldNames = [];
|
|
2852
|
-
let o = fieldNamesOff;
|
|
2853
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
2854
|
-
const { value, next } = readLengthPrefixedUtf8(buf, o);
|
|
2855
|
-
fieldNames.push(value);
|
|
2856
|
-
o = next;
|
|
2857
|
-
}
|
|
2858
|
-
if (o !== externalIdsOff) {
|
|
2859
|
-
throw invalidFrozenIndex('field names section size mismatch');
|
|
2860
|
-
}
|
|
2861
|
-
return fieldNames;
|
|
2862
|
-
}
|
|
2863
|
-
function readExternalIdsSection(buf, externalIdsOff, nextId, storedOff) {
|
|
2864
|
-
const externalIds = new Array(nextId);
|
|
2865
|
-
let o = externalIdsOff;
|
|
2866
|
-
for (let i = 0; i < nextId; i++) {
|
|
2867
|
-
const { value, next } = readExternalId(buf, o);
|
|
2868
|
-
externalIds[i] = value;
|
|
2869
|
-
o = next;
|
|
2870
|
-
}
|
|
2871
|
-
if (o !== storedOff) {
|
|
2872
|
-
throw invalidFrozenIndex('external ids section size mismatch');
|
|
2873
|
-
}
|
|
2874
|
-
return externalIds;
|
|
2875
|
-
}
|
|
2876
|
-
function readStoredFieldsSection(buf, storedOff, nextId, sectionEnd) {
|
|
2877
|
-
const storedFields = new Array(nextId);
|
|
2878
|
-
const tableEnd = storedOff + nextId * 4;
|
|
2879
|
-
if (tableEnd > sectionEnd) {
|
|
2880
|
-
throw invalidFrozenIndex('stored fields table out of bounds');
|
|
2881
|
-
}
|
|
2882
|
-
for (let i = 0; i < nextId; i++) {
|
|
2883
|
-
const rel = buf.readUInt32LE(storedOff + i * 4);
|
|
2884
|
-
if (rel === 0) {
|
|
2885
|
-
storedFields[i] = undefined;
|
|
2886
|
-
continue;
|
|
2887
|
-
}
|
|
2888
|
-
const entryOff = tableEnd + rel - 1;
|
|
2889
|
-
if (entryOff + 4 > sectionEnd) {
|
|
2890
|
-
throw invalidFrozenIndex('stored fields entry offset out of bounds');
|
|
2891
|
-
}
|
|
2892
|
-
const jsonLen = buf.readUInt32LE(entryOff);
|
|
2893
|
-
const jsonStart = entryOff + 4;
|
|
2894
|
-
const jsonEnd = jsonStart + jsonLen;
|
|
2895
|
-
if (jsonEnd > sectionEnd) {
|
|
2896
|
-
throw invalidFrozenIndex('stored fields JSON out of bounds');
|
|
2897
|
-
}
|
|
2898
|
-
storedFields[i] = JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
|
|
2899
|
-
}
|
|
2900
|
-
return storedFields;
|
|
2901
|
-
}
|
|
2902
|
-
/** Validate structural invariants of a decoded or assembled frozen snapshot. */
|
|
2903
|
-
function validateFrozenSnapshot(snap) {
|
|
2904
|
-
validateFrozenSnapshotNumeric(snap);
|
|
2905
|
-
const termCount = termCountOf(snap);
|
|
2906
|
-
if (snap.packedTermIndex != null) {
|
|
2907
|
-
validateFrozenTermIndexLeaves(snap.packedTermIndex, termCount);
|
|
2908
|
-
}
|
|
2909
|
-
else if (snap.termTree != null) {
|
|
2910
|
-
validateTermTreeLeaves(snap.termTree, termCount);
|
|
2911
|
-
}
|
|
2912
|
-
else {
|
|
2913
|
-
validateTreeShape(snap.treeShape, termCount);
|
|
2914
|
-
}
|
|
2915
|
-
}
|
|
2916
|
-
function fieldNamesFromFieldIds(fieldIds) {
|
|
2917
|
-
const names = Object.keys(fieldIds);
|
|
2918
|
-
names.sort((a, b) => fieldIds[a] - fieldIds[b]);
|
|
2919
|
-
return names;
|
|
2920
|
-
}
|
|
2921
|
-
/** Core with explicit {@link termCountOf} (no dictionary section). */
|
|
2922
|
-
function buildCoreSectionWithTermCount(snap) {
|
|
2923
|
-
const out = Buffer.alloc(16);
|
|
2924
|
-
out.writeUInt32LE(snap.documentCount, 0);
|
|
2925
|
-
out.writeUInt32LE(snap.nextId, 4);
|
|
2926
|
-
out.writeUInt32LE(snap.fieldCount, 8);
|
|
2927
|
-
out.writeUInt32LE(termCountOf(snap), 12);
|
|
2928
|
-
return out;
|
|
2929
|
-
}
|
|
2930
|
-
function buildFieldNamesSection(fieldNames) {
|
|
2931
|
-
const chunks = [];
|
|
2932
|
-
for (const name of fieldNames) {
|
|
2933
|
-
const body = Buffer.from(name, 'utf8');
|
|
2934
|
-
const header = Buffer.alloc(4);
|
|
2935
|
-
header.writeUInt32LE(body.length, 0);
|
|
2936
|
-
chunks.push(header, body);
|
|
2937
|
-
}
|
|
2938
|
-
return Buffer.concat(chunks);
|
|
2939
|
-
}
|
|
2940
|
-
function buildExternalIdsSection(externalIds, nextId) {
|
|
2941
|
-
const chunks = [];
|
|
2942
|
-
for (let i = 0; i < nextId; i++) {
|
|
2943
|
-
writeExternalId(chunks, externalIds[i]);
|
|
2944
|
-
}
|
|
2945
|
-
return Buffer.concat(chunks);
|
|
2946
|
-
}
|
|
2947
|
-
function buildStoredFieldsSection(storedFields, nextId) {
|
|
2948
|
-
const table = Buffer.alloc(nextId * 4);
|
|
2949
|
-
const heapChunks = [];
|
|
2950
|
-
let heapOff = 0;
|
|
2951
|
-
for (let i = 0; i < nextId; i++) {
|
|
2952
|
-
const row = storedFields[i];
|
|
2953
|
-
if (row == null) {
|
|
2954
|
-
table.writeUInt32LE(0, i * 4);
|
|
2955
|
-
continue;
|
|
2956
|
-
}
|
|
2957
|
-
table.writeUInt32LE(heapOff + 1, i * 4);
|
|
2958
|
-
const json = Buffer.from(JSON.stringify(row), 'utf8');
|
|
2959
|
-
const entry = Buffer.alloc(4 + json.length);
|
|
2960
|
-
entry.writeUInt32LE(json.length, 0);
|
|
2961
|
-
json.copy(entry, 4);
|
|
2962
|
-
heapChunks.push(entry);
|
|
2963
|
-
heapOff += entry.length;
|
|
2964
|
-
}
|
|
2965
|
-
return Buffer.concat([table, ...heapChunks]);
|
|
2966
|
-
}
|
|
2967
|
-
function validateTermTreeLeaves(tree, termCount) {
|
|
2968
|
-
for (const [key, val] of tree) {
|
|
2969
|
-
if (key === LEAF) {
|
|
2970
|
-
const idx = val;
|
|
2971
|
-
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2972
|
-
throw invalidFrozenIndex(`term tree leaf index out of range: ${idx}`);
|
|
2973
|
-
}
|
|
2974
|
-
}
|
|
2975
|
-
else {
|
|
2976
|
-
validateTermTreeLeaves(val, termCount);
|
|
2977
|
-
}
|
|
2978
|
-
}
|
|
2979
|
-
}
|
|
2980
|
-
function deserializeTermIndexTree(shape) {
|
|
2981
|
-
const tree = new Map();
|
|
2982
|
-
for (const [key, value] of shape) {
|
|
2983
|
-
if (key === LEAF) {
|
|
2984
|
-
tree.set(LEAF, value);
|
|
2985
|
-
}
|
|
2986
|
-
else {
|
|
2987
|
-
tree.set(key, deserializeTermIndexTree(value));
|
|
2988
|
-
}
|
|
2989
|
-
}
|
|
2990
|
-
return tree;
|
|
2991
|
-
}
|
|
2992
|
-
|
|
2993
3166
|
/** Global wire flags for {@link FreqArray} width. */
|
|
2994
3167
|
function freqWireFlags(freqs) {
|
|
2995
3168
|
if (freqs instanceof Uint16Array)
|
|
@@ -3281,11 +3454,14 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3281
3454
|
const flFlags = fieldLengthMatrixWireFlags(snap.fieldLengthMatrix);
|
|
3282
3455
|
const freqFlags = freqWireFlags(snap.postings.allFreqs);
|
|
3283
3456
|
const globalFlags = postingsWire.flags | flFlags | freqFlags;
|
|
3457
|
+
const storedFieldsSection = snap.storedFieldsLayout != null
|
|
3458
|
+
? buildStoredFieldsWireSection(snap.storedFieldsLayout, snap.nextId)
|
|
3459
|
+
: buildStoredFieldsSection(snap.storedFields, snap.nextId);
|
|
3284
3460
|
const rawSections = [
|
|
3285
3461
|
buildCoreSectionWithTermCount(snap),
|
|
3286
3462
|
buildFieldNamesSection(fieldNames),
|
|
3287
3463
|
buildExternalIdsSection(snap.externalIds, snap.nextId),
|
|
3288
|
-
|
|
3464
|
+
storedFieldsSection,
|
|
3289
3465
|
buildTermTreeSectionColumnar(packed),
|
|
3290
3466
|
bufferFromView(snap.avgFieldLength),
|
|
3291
3467
|
buildFieldLengthMatrixSection(snap.fieldLengthMatrix),
|
|
@@ -3309,11 +3485,14 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3309
3485
|
const flFlags = fieldLengthMatrixWireFlags(snap.fieldLengthMatrix);
|
|
3310
3486
|
const freqFlags = freqWireFlags(snap.postings.allFreqs);
|
|
3311
3487
|
const globalFlags = postingsWire.flags | flFlags | freqFlags;
|
|
3488
|
+
const storedFieldsSection = snap.storedFieldsLayout != null
|
|
3489
|
+
? buildStoredFieldsWireSection(snap.storedFieldsLayout, snap.nextId)
|
|
3490
|
+
: buildStoredFieldsSection(snap.storedFields, snap.nextId);
|
|
3312
3491
|
const rawSections = [
|
|
3313
3492
|
buildCoreSectionWithTermCount(snap),
|
|
3314
3493
|
buildFieldNamesSection(fieldNames),
|
|
3315
3494
|
buildExternalIdsSection(snap.externalIds, snap.nextId),
|
|
3316
|
-
|
|
3495
|
+
storedFieldsSection,
|
|
3317
3496
|
buildTermTreeSectionColumnar(packed),
|
|
3318
3497
|
bufferFromView(snap.avgFieldLength),
|
|
3319
3498
|
buildFieldLengthMatrixSection(snap.fieldLengthMatrix),
|
|
@@ -3343,7 +3522,7 @@ function validateMsv5Container(buf) {
|
|
|
3343
3522
|
}
|
|
3344
3523
|
return { globalFlags, directory };
|
|
3345
3524
|
}
|
|
3346
|
-
function decodeMsv5Sections(globalFlags, sections) {
|
|
3525
|
+
function decodeMsv5Sections(globalFlags, sections, hints) {
|
|
3347
3526
|
const core = sections[0 /* Msv5SectionId.Core */];
|
|
3348
3527
|
if (core.length !== 16) {
|
|
3349
3528
|
throw invalidFrozenIndex('core section size mismatch');
|
|
@@ -3358,7 +3537,12 @@ function decodeMsv5Sections(globalFlags, sections) {
|
|
|
3358
3537
|
fieldIds[fieldNames[f]] = f;
|
|
3359
3538
|
}
|
|
3360
3539
|
const externalIds = readExternalIdsSection(sections[2 /* Msv5SectionId.ExternalIds */], 0, nextId, sections[2 /* Msv5SectionId.ExternalIds */].length);
|
|
3361
|
-
const
|
|
3540
|
+
const storedFieldsLayout = hints != null
|
|
3541
|
+
? readStoredFieldsWireSection(sections[3 /* Msv5SectionId.StoredFields */], 0, nextId, sections[3 /* Msv5SectionId.StoredFields */].length, hints.storeFields)
|
|
3542
|
+
: undefined;
|
|
3543
|
+
const storedFields = storedFieldsLayout != null
|
|
3544
|
+
? new Array(nextId)
|
|
3545
|
+
: readStoredFieldsSection(sections[3 /* Msv5SectionId.StoredFields */], 0, nextId, sections[3 /* Msv5SectionId.StoredFields */].length);
|
|
3362
3546
|
const packedTermIndex = readPackedTermTreeSectionColumnar(sections[4 /* Msv5SectionId.TermTree */], termCount);
|
|
3363
3547
|
const avgBuf = sections[5 /* Msv5SectionId.AvgFieldLength */];
|
|
3364
3548
|
const avgFieldLength = readFloat32Array(avgBuf, 0, avgBuf.length);
|
|
@@ -3376,6 +3560,7 @@ function decodeMsv5Sections(globalFlags, sections) {
|
|
|
3376
3560
|
avgFieldLength,
|
|
3377
3561
|
externalIds,
|
|
3378
3562
|
storedFields,
|
|
3563
|
+
storedFieldsLayout,
|
|
3379
3564
|
fieldLengthMatrix,
|
|
3380
3565
|
treeShape: [],
|
|
3381
3566
|
packedTermIndex,
|
|
@@ -3384,13 +3569,13 @@ function decodeMsv5Sections(globalFlags, sections) {
|
|
|
3384
3569
|
validateFrozenSnapshot(snap);
|
|
3385
3570
|
return snap;
|
|
3386
3571
|
}
|
|
3387
|
-
function decodeFrozenSnapshotMsv5(buf) {
|
|
3572
|
+
function decodeFrozenSnapshotMsv5(buf, hints) {
|
|
3388
3573
|
const { globalFlags, directory } = validateMsv5Container(buf);
|
|
3389
|
-
return decodeMsv5Sections(globalFlags, loadMsv5Sections(buf, directory));
|
|
3574
|
+
return decodeMsv5Sections(globalFlags, loadMsv5Sections(buf, directory), hints);
|
|
3390
3575
|
}
|
|
3391
|
-
async function decodeFrozenSnapshotMsv5Async(buf) {
|
|
3576
|
+
async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
3392
3577
|
const { globalFlags, directory } = validateMsv5Container(buf);
|
|
3393
|
-
return decodeMsv5Sections(globalFlags, await loadMsv5SectionsAsync(buf, directory));
|
|
3578
|
+
return decodeMsv5Sections(globalFlags, await loadMsv5SectionsAsync(buf, directory), hints);
|
|
3394
3579
|
}
|
|
3395
3580
|
|
|
3396
3581
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
@@ -3404,12 +3589,12 @@ function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
|
3404
3589
|
|
|
3405
3590
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
3406
3591
|
/** Decode a frozen binary snapshot buffer. */
|
|
3407
|
-
function decodeFrozenSnapshot(buf) {
|
|
3592
|
+
function decodeFrozenSnapshot(buf, hints) {
|
|
3408
3593
|
assertBufferLength(buf, 8);
|
|
3409
3594
|
const magic = buf.toString('ascii', 0, 4);
|
|
3410
3595
|
const version = buf.readUInt16LE(4);
|
|
3411
3596
|
if (isMsv5Buffer(buf) && version === 5) {
|
|
3412
|
-
return decodeFrozenSnapshotMsv5(buf);
|
|
3597
|
+
return decodeFrozenSnapshotMsv5(buf, hints);
|
|
3413
3598
|
}
|
|
3414
3599
|
if (LEGACY_MAGICS.has(magic)) {
|
|
3415
3600
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from lucaong JSON');
|
|
@@ -3417,13 +3602,13 @@ function decodeFrozenSnapshot(buf) {
|
|
|
3417
3602
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3418
3603
|
}
|
|
3419
3604
|
/** Async frozen snapshot decode (streaming zstd). */
|
|
3420
|
-
async function decodeFrozenSnapshotAsync(buf) {
|
|
3605
|
+
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3421
3606
|
assertBufferLength(buf, 8);
|
|
3422
3607
|
const version = buf.readUInt16LE(4);
|
|
3423
3608
|
if (isMsv5Buffer(buf) && version === 5) {
|
|
3424
|
-
return decodeFrozenSnapshotMsv5Async(buf);
|
|
3609
|
+
return decodeFrozenSnapshotMsv5Async(buf, hints);
|
|
3425
3610
|
}
|
|
3426
|
-
return decodeFrozenSnapshot(buf);
|
|
3611
|
+
return decodeFrozenSnapshot(buf, hints);
|
|
3427
3612
|
}
|
|
3428
3613
|
|
|
3429
3614
|
const DEFAULT_CAPACITY = 16;
|
|
@@ -3685,14 +3870,13 @@ class FrozenIndexBuilder {
|
|
|
3685
3870
|
this._nextId = 0;
|
|
3686
3871
|
this._frozen = false;
|
|
3687
3872
|
const estimated = hints === null || hints === void 0 ? void 0 : hints.estimatedDocumentCount;
|
|
3873
|
+
this._storedFields = createStoredFieldsLayout(this._options.storeFields, estimated !== null && estimated !== void 0 ? estimated : 0);
|
|
3688
3874
|
if (estimated != null && estimated > 0) {
|
|
3689
3875
|
this._externalIds = new Array(estimated);
|
|
3690
|
-
this._storedFields = new Array(estimated);
|
|
3691
3876
|
this._fieldLengthData = new Array(estimated * this._fieldCount).fill(0);
|
|
3692
3877
|
}
|
|
3693
3878
|
else {
|
|
3694
3879
|
this._externalIds = [];
|
|
3695
|
-
this._storedFields = [];
|
|
3696
3880
|
this._fieldLengthData = [];
|
|
3697
3881
|
}
|
|
3698
3882
|
}
|
|
@@ -3715,7 +3899,7 @@ class FrozenIndexBuilder {
|
|
|
3715
3899
|
this._seenIds.add(id);
|
|
3716
3900
|
const shortId = this._nextId++;
|
|
3717
3901
|
this._externalIds[shortId] = id;
|
|
3718
|
-
this._storedFields
|
|
3902
|
+
writeStoredField(this._storedFields, shortId, storeFields, extractField, document);
|
|
3719
3903
|
const documentCount = shortId + 1;
|
|
3720
3904
|
for (const field of fields) {
|
|
3721
3905
|
const fieldValue = extractField(document, field);
|
|
@@ -3801,9 +3985,7 @@ class FrozenIndexBuilder {
|
|
|
3801
3985
|
const externalIds = this._externalIds.length > documentCount
|
|
3802
3986
|
? this._externalIds.slice(0, documentCount)
|
|
3803
3987
|
: this._externalIds;
|
|
3804
|
-
const storedFields = this._storedFields
|
|
3805
|
-
? this._storedFields.slice(0, documentCount)
|
|
3806
|
-
: this._storedFields;
|
|
3988
|
+
const storedFields = resizeStoredFields(this._storedFields, documentCount);
|
|
3807
3989
|
const idLookup = createIdToShortIdLookup(externalIds, documentCount);
|
|
3808
3990
|
return {
|
|
3809
3991
|
options: this._options,
|
|
@@ -4231,7 +4413,7 @@ function shallowCopyJsSnapshotFields(params) {
|
|
|
4231
4413
|
return {
|
|
4232
4414
|
fieldIds: { ...params.fieldIds },
|
|
4233
4415
|
options: shallowCopyOptions(params.options),
|
|
4234
|
-
storedFields: params.storedFields
|
|
4416
|
+
storedFields: cloneStoredFields(params.storedFields),
|
|
4235
4417
|
};
|
|
4236
4418
|
}
|
|
4237
4419
|
/**
|
|
@@ -4316,7 +4498,7 @@ class FrozenMiniSearch {
|
|
|
4316
4498
|
fieldIds: this._fieldIds,
|
|
4317
4499
|
getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
|
|
4318
4500
|
getExternalId: docId => this._externalIds[docId],
|
|
4319
|
-
getStoredFields: docId => this._storedFields
|
|
4501
|
+
getStoredFields: docId => readStoredFields(this._storedFields, docId),
|
|
4320
4502
|
};
|
|
4321
4503
|
this._queryEngineParams = {
|
|
4322
4504
|
fields: this._options.fields,
|
|
@@ -4328,7 +4510,7 @@ class FrozenMiniSearch {
|
|
|
4328
4510
|
const id = this._externalIds[shortId];
|
|
4329
4511
|
if (id === undefined)
|
|
4330
4512
|
continue;
|
|
4331
|
-
callback(shortId, id, this._storedFields
|
|
4513
|
+
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4332
4514
|
}
|
|
4333
4515
|
}),
|
|
4334
4516
|
aggregateContext: this._aggregateContext,
|
|
@@ -4339,11 +4521,7 @@ class FrozenMiniSearch {
|
|
|
4339
4521
|
memoryBreakdown() {
|
|
4340
4522
|
const termCount = this.termCount;
|
|
4341
4523
|
const postingsStats = postingsTypedBytes(this._postings);
|
|
4342
|
-
|
|
4343
|
-
for (const row of this._storedFields) {
|
|
4344
|
-
if (row != null)
|
|
4345
|
-
storedJson += JSON.stringify(row).length;
|
|
4346
|
-
}
|
|
4524
|
+
const storedJson = storedFieldsJsonBytes(this._storedFields);
|
|
4347
4525
|
const radixEst = this._index.packedByteLength();
|
|
4348
4526
|
const idMapBytes = this._idLookup.mode === 'lazy-map' ? this._idLookup.mapEntryCount * 32 : 0;
|
|
4349
4527
|
const estimatedStructuredBytes = postingsStats.totalTypedBytes
|
|
@@ -4373,7 +4551,7 @@ class FrozenMiniSearch {
|
|
|
4373
4551
|
},
|
|
4374
4552
|
documents: {
|
|
4375
4553
|
externalIdsSlots: this._externalIds.length,
|
|
4376
|
-
storedFieldsSlots: this._storedFields
|
|
4554
|
+
storedFieldsSlots: storedFieldsSlotCount(this._storedFields),
|
|
4377
4555
|
idLookupMode: this._idLookup.mode,
|
|
4378
4556
|
idToShortIdEntries: this._idLookup.mapEntryCount,
|
|
4379
4557
|
fieldLengthMatrixBytes: this._fieldLengthMatrix.byteLength,
|
|
@@ -4388,10 +4566,10 @@ class FrozenMiniSearch {
|
|
|
4388
4566
|
}
|
|
4389
4567
|
getStoredFields(id) {
|
|
4390
4568
|
const shortId = this._idLookup.get(id);
|
|
4391
|
-
return shortId == null ? undefined : this._storedFields
|
|
4569
|
+
return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
|
|
4392
4570
|
}
|
|
4393
4571
|
search(query, searchOptions = {}) {
|
|
4394
|
-
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], docId => this._storedFields
|
|
4572
|
+
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], docId => readStoredFields(this._storedFields, docId));
|
|
4395
4573
|
}
|
|
4396
4574
|
autoSuggest(queryString, options = {}) {
|
|
4397
4575
|
const merged = { ...this._options.autoSuggestOptions, ...options };
|
|
@@ -4407,7 +4585,8 @@ class FrozenMiniSearch {
|
|
|
4407
4585
|
fieldNames: fieldNamesFromFieldIds(this._fieldIds),
|
|
4408
4586
|
avgFieldLength: this._avgFieldLength,
|
|
4409
4587
|
externalIds: this._externalIds,
|
|
4410
|
-
storedFields: this.
|
|
4588
|
+
storedFields: new Array(this._nextId),
|
|
4589
|
+
storedFieldsLayout: this._storedFields,
|
|
4411
4590
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4412
4591
|
treeShape: [],
|
|
4413
4592
|
postings: this._postings,
|
|
@@ -4423,7 +4602,8 @@ class FrozenMiniSearch {
|
|
|
4423
4602
|
fieldNames: fieldNamesFromFieldIds(this._fieldIds),
|
|
4424
4603
|
avgFieldLength: this._avgFieldLength,
|
|
4425
4604
|
externalIds: this._externalIds,
|
|
4426
|
-
storedFields: this.
|
|
4605
|
+
storedFields: new Array(this._nextId),
|
|
4606
|
+
storedFieldsLayout: this._storedFields,
|
|
4427
4607
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4428
4608
|
treeShape: [],
|
|
4429
4609
|
postings: this._postings,
|
|
@@ -4431,16 +4611,20 @@ class FrozenMiniSearch {
|
|
|
4431
4611
|
}
|
|
4432
4612
|
/** Load a frozen binary snapshot. */
|
|
4433
4613
|
static loadBinarySync(buffer, options = {}) {
|
|
4434
|
-
|
|
4614
|
+
var _a;
|
|
4615
|
+
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
|
4616
|
+
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4435
4617
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4436
4618
|
}
|
|
4437
4619
|
/** Load a frozen binary snapshot with streaming zstd decompression (bounded memory). */
|
|
4438
4620
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4439
|
-
|
|
4621
|
+
var _a;
|
|
4622
|
+
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
|
4623
|
+
const snap = await decodeFrozenSnapshotAsync(buffer, { storeFields });
|
|
4440
4624
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4441
4625
|
}
|
|
4442
4626
|
static fromBinarySnapshot(snap, options) {
|
|
4443
|
-
var _a, _b;
|
|
4627
|
+
var _a, _b, _c;
|
|
4444
4628
|
const snapshotFields = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
4445
4629
|
if (options.fields != null) {
|
|
4446
4630
|
assertFieldsMatchSnapshot(options.fields, snap.fieldIds);
|
|
@@ -4468,7 +4652,7 @@ class FrozenMiniSearch {
|
|
|
4468
4652
|
fieldCount: snap.fieldCount,
|
|
4469
4653
|
externalIds: snap.externalIds,
|
|
4470
4654
|
idLookup,
|
|
4471
|
-
storedFields: snap.storedFields,
|
|
4655
|
+
storedFields: (_c = snap.storedFieldsLayout) !== null && _c !== void 0 ? _c : storedFieldsFromRows(snap.storedFields, opts.storeFields),
|
|
4472
4656
|
fieldLengthMatrix: snap.fieldLengthMatrix,
|
|
4473
4657
|
avgFieldLength: snap.avgFieldLength,
|
|
4474
4658
|
index,
|