@yoch/frozenminisearch 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/README.md +1 -1
- package/dist/cjs/index.cjs +444 -260
- package/dist/es/index.d.ts +16 -1
- package/dist/es/index.js +444 -260
- package/package.json +1 -1
package/dist/cjs/index.cjs
CHANGED
|
@@ -2020,24 +2020,33 @@ function findSparseSlotByFieldId(fieldIds, start, end, fieldId) {
|
|
|
2020
2020
|
}
|
|
2021
2021
|
return -1;
|
|
2022
2022
|
}
|
|
2023
|
-
/**
|
|
2024
|
-
|
|
2023
|
+
/** Reusable scratch for {@link resolvePostingSlice} (scoring is synchronous). */
|
|
2024
|
+
const postingSliceScratch = { offset: 0, length: 0 };
|
|
2025
|
+
/**
|
|
2026
|
+
* Resolve one (termIndex, fieldId) posting run in flat buffers; writes into `out` without allocating.
|
|
2027
|
+
* @returns false when the slot is empty or missing
|
|
2028
|
+
*/
|
|
2029
|
+
function resolvePostingSlice(layout, termIndex, fieldId, out) {
|
|
2025
2030
|
if (layout.layout === 'dense') {
|
|
2026
2031
|
const base = termIndex * layout.fieldCount + fieldId;
|
|
2027
2032
|
const len = layout.denseLengths[base];
|
|
2028
2033
|
if (len === 0)
|
|
2029
|
-
return
|
|
2030
|
-
|
|
2034
|
+
return false;
|
|
2035
|
+
out.offset = layout.denseOffsets[base];
|
|
2036
|
+
out.length = len;
|
|
2037
|
+
return true;
|
|
2031
2038
|
}
|
|
2032
2039
|
const start = layout.sparseTermStarts[termIndex];
|
|
2033
2040
|
const end = layout.sparseTermStarts[termIndex + 1];
|
|
2034
2041
|
const slot = findSparseSlotByFieldId(layout.sparseFieldIds, start, end, fieldId);
|
|
2035
2042
|
if (slot < 0)
|
|
2036
|
-
return
|
|
2043
|
+
return false;
|
|
2037
2044
|
const len = layout.sparseLengths[slot];
|
|
2038
2045
|
if (len === 0)
|
|
2039
|
-
return
|
|
2040
|
-
|
|
2046
|
+
return false;
|
|
2047
|
+
out.offset = layout.sparseOffsets[slot];
|
|
2048
|
+
out.length = len;
|
|
2049
|
+
return true;
|
|
2041
2050
|
}
|
|
2042
2051
|
/**
|
|
2043
2052
|
* One flyweight wrapper for the lifetime of a frozen index. Call {@link bind} before each
|
|
@@ -2053,10 +2062,9 @@ function createFrozenFieldTermFlyweight(layout) {
|
|
|
2053
2062
|
return flyweight;
|
|
2054
2063
|
},
|
|
2055
2064
|
get(fieldId) {
|
|
2056
|
-
|
|
2057
|
-
if (slice == null)
|
|
2065
|
+
if (!resolvePostingSlice(layout, termIndex, fieldId, postingSliceScratch))
|
|
2058
2066
|
return undefined;
|
|
2059
|
-
return segment.rebind(
|
|
2067
|
+
return segment.rebind(postingSliceScratch.offset, postingSliceScratch.length);
|
|
2060
2068
|
},
|
|
2061
2069
|
};
|
|
2062
2070
|
return flyweight;
|
|
@@ -2075,10 +2083,9 @@ function collectDocIdsFromFrozenSegment(allDocIds, offset, length, context, docI
|
|
|
2075
2083
|
function collectDocIdsFromFrozenLayout(layout, termIndex, fieldBoosts, context, docIds, allowedDocs) {
|
|
2076
2084
|
const { fieldIds } = context;
|
|
2077
2085
|
for (const field of fieldBoosts.names) {
|
|
2078
|
-
|
|
2079
|
-
if (slice == null)
|
|
2086
|
+
if (!resolvePostingSlice(layout, termIndex, fieldIds[field], postingSliceScratch))
|
|
2080
2087
|
continue;
|
|
2081
|
-
collectDocIdsFromFrozenSegment(layout.allDocIds,
|
|
2088
|
+
collectDocIdsFromFrozenSegment(layout.allDocIds, postingSliceScratch.offset, postingSliceScratch.length, context, docIds, allowedDocs);
|
|
2082
2089
|
}
|
|
2083
2090
|
}
|
|
2084
2091
|
|
|
@@ -2232,22 +2239,387 @@ function collectFieldTermFreqsFromFieldInto(localFreqs, tokenScratch, tokenize,
|
|
|
2232
2239
|
tokenizeFieldInto(tokenScratch, tokenize, text, fieldName);
|
|
2233
2240
|
return collectFieldTermFreqsInto(localFreqs, tokenScratch, fieldName, processTerm);
|
|
2234
2241
|
}
|
|
2235
|
-
/** Same running average as {@link MiniSearch} private addFieldLength. */
|
|
2236
2242
|
function updateAvgFieldLength(avgFieldLength, fieldId, count, length) {
|
|
2237
2243
|
const averageFieldLength = avgFieldLength[fieldId] || 0;
|
|
2238
2244
|
const totalFieldLength = (averageFieldLength * count) + length;
|
|
2239
2245
|
avgFieldLength[fieldId] = totalFieldLength / (count + 1);
|
|
2240
2246
|
}
|
|
2241
|
-
|
|
2247
|
+
|
|
2248
|
+
function validateTreeShape(shape, termCount) {
|
|
2249
|
+
if (!Array.isArray(shape)) {
|
|
2250
|
+
throw invalidFrozenIndex('treeShape node must be an array');
|
|
2251
|
+
}
|
|
2252
|
+
for (const entry of shape) {
|
|
2253
|
+
if (!Array.isArray(entry) || entry.length !== 2) {
|
|
2254
|
+
throw invalidFrozenIndex('treeShape entry must be a [key, value] pair');
|
|
2255
|
+
}
|
|
2256
|
+
const [key, value] = entry;
|
|
2257
|
+
if (key === LEAF) {
|
|
2258
|
+
const idx = value;
|
|
2259
|
+
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2260
|
+
throw invalidFrozenIndex(`treeShape leaf term index out of range: ${idx}`);
|
|
2261
|
+
}
|
|
2262
|
+
}
|
|
2263
|
+
else {
|
|
2264
|
+
validateTreeShape(value, termCount);
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
}
|
|
2268
|
+
function termCountOf(snap) {
|
|
2269
|
+
return snap.postings.termCount;
|
|
2270
|
+
}
|
|
2271
|
+
/**
|
|
2272
|
+
* Numeric/structural invariants shared by both the decode path (untrusted binary)
|
|
2273
|
+
* and the build path (trusted internal code).
|
|
2274
|
+
*/
|
|
2275
|
+
function validateFrozenSnapshotNumeric(snap) {
|
|
2276
|
+
if (snap.fieldCount <= 0) {
|
|
2277
|
+
throw invalidFrozenIndex('fieldCount must be positive');
|
|
2278
|
+
}
|
|
2279
|
+
if (snap.nextId < 0 || snap.nextId >= 0xffffffff) {
|
|
2280
|
+
throw invalidFrozenIndex('nextId out of range');
|
|
2281
|
+
}
|
|
2282
|
+
if (snap.documentCount < 0 || snap.documentCount > snap.nextId) {
|
|
2283
|
+
throw invalidFrozenIndex('documentCount inconsistent with nextId');
|
|
2284
|
+
}
|
|
2285
|
+
if (snap.fieldLengthMatrix.length !== snap.nextId * snap.fieldCount) {
|
|
2286
|
+
throw invalidFrozenIndex('fieldLengthMatrix size mismatch');
|
|
2287
|
+
}
|
|
2288
|
+
if (snap.avgFieldLength.length !== snap.fieldCount) {
|
|
2289
|
+
throw invalidFrozenIndex('avgFieldLength size mismatch');
|
|
2290
|
+
}
|
|
2291
|
+
validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
|
|
2292
|
+
throw invalidFrozenIndex(detail);
|
|
2293
|
+
});
|
|
2294
|
+
const indexedFields = Object.keys(snap.fieldIds);
|
|
2295
|
+
if (indexedFields.length !== snap.fieldCount) {
|
|
2296
|
+
throw invalidFrozenIndex('fieldIds count mismatch');
|
|
2297
|
+
}
|
|
2298
|
+
for (let f = 0; f < snap.fieldCount; f++) {
|
|
2299
|
+
const found = indexedFields.some(name => snap.fieldIds[name] === f);
|
|
2300
|
+
if (!found) {
|
|
2301
|
+
throw invalidFrozenIndex(`missing field id ${f}`);
|
|
2302
|
+
}
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
function readFieldNamesSection(buf, fieldNamesOff, fieldCount, externalIdsOff) {
|
|
2306
|
+
const fieldNames = [];
|
|
2307
|
+
let o = fieldNamesOff;
|
|
2308
|
+
for (let f = 0; f < fieldCount; f++) {
|
|
2309
|
+
const { value, next } = readLengthPrefixedUtf8(buf, o);
|
|
2310
|
+
fieldNames.push(value);
|
|
2311
|
+
o = next;
|
|
2312
|
+
}
|
|
2313
|
+
if (o !== externalIdsOff) {
|
|
2314
|
+
throw invalidFrozenIndex('field names section size mismatch');
|
|
2315
|
+
}
|
|
2316
|
+
return fieldNames;
|
|
2317
|
+
}
|
|
2318
|
+
function readExternalIdsSection(buf, externalIdsOff, nextId, storedOff) {
|
|
2319
|
+
const externalIds = new Array(nextId);
|
|
2320
|
+
let o = externalIdsOff;
|
|
2321
|
+
for (let i = 0; i < nextId; i++) {
|
|
2322
|
+
const { value, next } = readExternalId(buf, o);
|
|
2323
|
+
externalIds[i] = value;
|
|
2324
|
+
o = next;
|
|
2325
|
+
}
|
|
2326
|
+
if (o !== storedOff) {
|
|
2327
|
+
throw invalidFrozenIndex('external ids section size mismatch');
|
|
2328
|
+
}
|
|
2329
|
+
return externalIds;
|
|
2330
|
+
}
|
|
2331
|
+
function readStoredFieldsSection(buf, storedOff, nextId, sectionEnd) {
|
|
2332
|
+
const storedFields = new Array(nextId);
|
|
2333
|
+
const tableEnd = storedOff + nextId * 4;
|
|
2334
|
+
if (tableEnd > sectionEnd) {
|
|
2335
|
+
throw invalidFrozenIndex('stored fields table out of bounds');
|
|
2336
|
+
}
|
|
2337
|
+
for (let i = 0; i < nextId; i++) {
|
|
2338
|
+
const rel = buf.readUInt32LE(storedOff + i * 4);
|
|
2339
|
+
if (rel === 0) {
|
|
2340
|
+
storedFields[i] = undefined;
|
|
2341
|
+
continue;
|
|
2342
|
+
}
|
|
2343
|
+
const entryOff = tableEnd + rel - 1;
|
|
2344
|
+
if (entryOff + 4 > sectionEnd) {
|
|
2345
|
+
throw invalidFrozenIndex('stored fields entry offset out of bounds');
|
|
2346
|
+
}
|
|
2347
|
+
const jsonLen = buf.readUInt32LE(entryOff);
|
|
2348
|
+
const jsonStart = entryOff + 4;
|
|
2349
|
+
const jsonEnd = jsonStart + jsonLen;
|
|
2350
|
+
if (jsonEnd > sectionEnd) {
|
|
2351
|
+
throw invalidFrozenIndex('stored fields JSON out of bounds');
|
|
2352
|
+
}
|
|
2353
|
+
storedFields[i] = JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
|
|
2354
|
+
}
|
|
2355
|
+
return storedFields;
|
|
2356
|
+
}
|
|
2357
|
+
/** Validate structural invariants of a decoded or assembled frozen snapshot. */
|
|
2358
|
+
function validateFrozenSnapshot(snap) {
|
|
2359
|
+
validateFrozenSnapshotNumeric(snap);
|
|
2360
|
+
const termCount = termCountOf(snap);
|
|
2361
|
+
if (snap.packedTermIndex != null) {
|
|
2362
|
+
validateFrozenTermIndexLeaves(snap.packedTermIndex, termCount);
|
|
2363
|
+
}
|
|
2364
|
+
else if (snap.termTree != null) {
|
|
2365
|
+
validateTermTreeLeaves(snap.termTree, termCount);
|
|
2366
|
+
}
|
|
2367
|
+
else {
|
|
2368
|
+
validateTreeShape(snap.treeShape, termCount);
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
function fieldNamesFromFieldIds(fieldIds) {
|
|
2372
|
+
const names = Object.keys(fieldIds);
|
|
2373
|
+
names.sort((a, b) => fieldIds[a] - fieldIds[b]);
|
|
2374
|
+
return names;
|
|
2375
|
+
}
|
|
2376
|
+
/** Core with explicit {@link termCountOf} (no dictionary section). */
|
|
2377
|
+
function buildCoreSectionWithTermCount(snap) {
|
|
2378
|
+
const out = Buffer.alloc(16);
|
|
2379
|
+
out.writeUInt32LE(snap.documentCount, 0);
|
|
2380
|
+
out.writeUInt32LE(snap.nextId, 4);
|
|
2381
|
+
out.writeUInt32LE(snap.fieldCount, 8);
|
|
2382
|
+
out.writeUInt32LE(termCountOf(snap), 12);
|
|
2383
|
+
return out;
|
|
2384
|
+
}
|
|
2385
|
+
function buildFieldNamesSection(fieldNames) {
|
|
2386
|
+
const chunks = [];
|
|
2387
|
+
for (const name of fieldNames) {
|
|
2388
|
+
const body = Buffer.from(name, 'utf8');
|
|
2389
|
+
const header = Buffer.alloc(4);
|
|
2390
|
+
header.writeUInt32LE(body.length, 0);
|
|
2391
|
+
chunks.push(header, body);
|
|
2392
|
+
}
|
|
2393
|
+
return Buffer.concat(chunks);
|
|
2394
|
+
}
|
|
2395
|
+
function buildExternalIdsSection(externalIds, nextId) {
|
|
2396
|
+
const chunks = [];
|
|
2397
|
+
for (let i = 0; i < nextId; i++) {
|
|
2398
|
+
writeExternalId(chunks, externalIds[i]);
|
|
2399
|
+
}
|
|
2400
|
+
return Buffer.concat(chunks);
|
|
2401
|
+
}
|
|
2402
|
+
function buildStoredFieldsSection(storedFields, nextId) {
|
|
2403
|
+
const table = Buffer.alloc(nextId * 4);
|
|
2404
|
+
const heapChunks = [];
|
|
2405
|
+
let heapOff = 0;
|
|
2406
|
+
for (let i = 0; i < nextId; i++) {
|
|
2407
|
+
const row = storedFields[i];
|
|
2408
|
+
if (row == null) {
|
|
2409
|
+
table.writeUInt32LE(0, i * 4);
|
|
2410
|
+
continue;
|
|
2411
|
+
}
|
|
2412
|
+
table.writeUInt32LE(heapOff + 1, i * 4);
|
|
2413
|
+
const json = Buffer.from(JSON.stringify(row), 'utf8');
|
|
2414
|
+
const entry = Buffer.alloc(4 + json.length);
|
|
2415
|
+
entry.writeUInt32LE(json.length, 0);
|
|
2416
|
+
json.copy(entry, 4);
|
|
2417
|
+
heapChunks.push(entry);
|
|
2418
|
+
heapOff += entry.length;
|
|
2419
|
+
}
|
|
2420
|
+
return Buffer.concat([table, ...heapChunks]);
|
|
2421
|
+
}
|
|
2422
|
+
function validateTermTreeLeaves(tree, termCount) {
|
|
2423
|
+
for (const [key, val] of tree) {
|
|
2424
|
+
if (key === LEAF) {
|
|
2425
|
+
const idx = val;
|
|
2426
|
+
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2427
|
+
throw invalidFrozenIndex(`term tree leaf index out of range: ${idx}`);
|
|
2428
|
+
}
|
|
2429
|
+
}
|
|
2430
|
+
else {
|
|
2431
|
+
validateTermTreeLeaves(val, termCount);
|
|
2432
|
+
}
|
|
2433
|
+
}
|
|
2434
|
+
}
|
|
2435
|
+
function deserializeTermIndexTree(shape) {
|
|
2436
|
+
const tree = new Map();
|
|
2437
|
+
for (const [key, value] of shape) {
|
|
2438
|
+
if (key === LEAF) {
|
|
2439
|
+
tree.set(LEAF, value);
|
|
2440
|
+
}
|
|
2441
|
+
else {
|
|
2442
|
+
tree.set(key, deserializeTermIndexTree(value));
|
|
2443
|
+
}
|
|
2444
|
+
}
|
|
2445
|
+
return tree;
|
|
2446
|
+
}
|
|
2447
|
+
|
|
2448
|
+
/**
|
|
2449
|
+
* Runtime stored fields. Single store field → one column (no per-doc Record at rest).
|
|
2450
|
+
* Wire format stays row JSON; encode/decode can skip intermediate row arrays when layout is known.
|
|
2451
|
+
*/
|
|
2452
|
+
function createStoredFieldsLayout(storeFields, capacity = 0) {
|
|
2242
2453
|
if (storeFields.length === 0)
|
|
2454
|
+
return { kind: 'none' };
|
|
2455
|
+
if (storeFields.length === 1) {
|
|
2456
|
+
return { kind: 'single', field: storeFields[0], values: new Array(capacity) };
|
|
2457
|
+
}
|
|
2458
|
+
return { kind: 'multi', rows: new Array(capacity) };
|
|
2459
|
+
}
|
|
2460
|
+
function writeStoredField(layout, shortId, storeFields, extractField, document) {
|
|
2461
|
+
if (layout.kind === 'none')
|
|
2462
|
+
return;
|
|
2463
|
+
if (layout.kind === 'single') {
|
|
2464
|
+
layout.values[shortId] = extractField(document, layout.field);
|
|
2465
|
+
return;
|
|
2466
|
+
}
|
|
2467
|
+
const row = {};
|
|
2468
|
+
for (const name of storeFields) {
|
|
2469
|
+
const value = extractField(document, name);
|
|
2470
|
+
if (value !== undefined)
|
|
2471
|
+
row[name] = value;
|
|
2472
|
+
}
|
|
2473
|
+
layout.rows[shortId] = row;
|
|
2474
|
+
}
|
|
2475
|
+
/** Materialize API/wire row for one document. */
|
|
2476
|
+
function readStoredFields(layout, shortId) {
|
|
2477
|
+
if (layout.kind === 'none')
|
|
2243
2478
|
return undefined;
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2479
|
+
if (layout.kind === 'multi')
|
|
2480
|
+
return layout.rows[shortId];
|
|
2481
|
+
const value = layout.values[shortId];
|
|
2482
|
+
if (value === undefined)
|
|
2483
|
+
return {};
|
|
2484
|
+
return { [layout.field]: value };
|
|
2485
|
+
}
|
|
2486
|
+
function resizeStoredFields(layout, length) {
|
|
2487
|
+
if (layout.kind === 'none')
|
|
2488
|
+
return layout;
|
|
2489
|
+
if (layout.kind === 'single') {
|
|
2490
|
+
return layout.values.length <= length
|
|
2491
|
+
? layout
|
|
2492
|
+
: { kind: 'single', field: layout.field, values: layout.values.slice(0, length) };
|
|
2493
|
+
}
|
|
2494
|
+
return layout.rows.length <= length
|
|
2495
|
+
? layout
|
|
2496
|
+
: { kind: 'multi', rows: layout.rows.slice(0, length) };
|
|
2497
|
+
}
|
|
2498
|
+
function cloneStoredFields(layout) {
|
|
2499
|
+
if (layout.kind === 'none')
|
|
2500
|
+
return layout;
|
|
2501
|
+
if (layout.kind === 'single') {
|
|
2502
|
+
return { kind: 'single', field: layout.field, values: layout.values.slice() };
|
|
2503
|
+
}
|
|
2504
|
+
return { kind: 'multi', rows: layout.rows.slice() };
|
|
2505
|
+
}
|
|
2506
|
+
/** Import from wire rows or lucaong snapshot. Empty storeFields + non-empty rows → multi (binary load without options). */
|
|
2507
|
+
function storedFieldsFromRows(rows, storeFields) {
|
|
2508
|
+
if (storeFields.length === 0) {
|
|
2509
|
+
const hasAny = rows.some(row => row != null && Object.keys(row).length > 0);
|
|
2510
|
+
return hasAny ? { kind: 'multi', rows } : { kind: 'none' };
|
|
2511
|
+
}
|
|
2512
|
+
if (storeFields.length === 1) {
|
|
2513
|
+
const field = storeFields[0];
|
|
2514
|
+
const values = rows.map(row => row === null || row === void 0 ? void 0 : row[field]);
|
|
2515
|
+
return { kind: 'single', field, values };
|
|
2516
|
+
}
|
|
2517
|
+
return { kind: 'multi', rows };
|
|
2518
|
+
}
|
|
2519
|
+
function storedFieldsJsonBytes(layout) {
|
|
2520
|
+
if (layout.kind === 'none')
|
|
2521
|
+
return 0;
|
|
2522
|
+
if (layout.kind === 'multi') {
|
|
2523
|
+
let total = 0;
|
|
2524
|
+
for (const row of layout.rows) {
|
|
2525
|
+
if (row != null)
|
|
2526
|
+
total += JSON.stringify(row).length;
|
|
2527
|
+
}
|
|
2528
|
+
return total;
|
|
2529
|
+
}
|
|
2530
|
+
let total = 0;
|
|
2531
|
+
const { field, values } = layout;
|
|
2532
|
+
for (let i = 0; i < values.length; i++) {
|
|
2533
|
+
const value = values[i];
|
|
2534
|
+
if (value !== undefined)
|
|
2535
|
+
total += JSON.stringify({ [field]: value }).length;
|
|
2249
2536
|
}
|
|
2250
|
-
return
|
|
2537
|
+
return total;
|
|
2538
|
+
}
|
|
2539
|
+
function storedFieldsSlotCount(layout) {
|
|
2540
|
+
if (layout.kind === 'none')
|
|
2541
|
+
return 0;
|
|
2542
|
+
return layout.kind === 'single' ? layout.values.length : layout.rows.length;
|
|
2543
|
+
}
|
|
2544
|
+
function appendStoredFieldJsonEntry(table, heapChunks, heapOffRef, docIndex, jsonUtf8) {
|
|
2545
|
+
table.writeUInt32LE(heapOffRef.value + 1, docIndex * 4);
|
|
2546
|
+
const entry = Buffer.alloc(4 + jsonUtf8.length);
|
|
2547
|
+
entry.writeUInt32LE(jsonUtf8.length, 0);
|
|
2548
|
+
jsonUtf8.copy(entry, 4);
|
|
2549
|
+
heapChunks.push(entry);
|
|
2550
|
+
heapOffRef.value += entry.length;
|
|
2551
|
+
}
|
|
2552
|
+
/** MSv5 StoredFields section from {@link StoredFieldsLayout} (no intermediate row array). */
|
|
2553
|
+
function buildStoredFieldsWireSection(layout, nextId) {
|
|
2554
|
+
if (layout.kind === 'multi') {
|
|
2555
|
+
const rows = layout.rows.length >= nextId
|
|
2556
|
+
? layout.rows
|
|
2557
|
+
: layout.rows.concat(new Array(nextId - layout.rows.length));
|
|
2558
|
+
return buildStoredFieldsSection(rows, nextId);
|
|
2559
|
+
}
|
|
2560
|
+
const table = Buffer.alloc(nextId * 4);
|
|
2561
|
+
if (layout.kind === 'none')
|
|
2562
|
+
return table;
|
|
2563
|
+
const heapChunks = [];
|
|
2564
|
+
const heapOffRef = { value: 0 };
|
|
2565
|
+
const { field, values } = layout;
|
|
2566
|
+
for (let i = 0; i < nextId; i++) {
|
|
2567
|
+
const value = values[i];
|
|
2568
|
+
if (value === undefined)
|
|
2569
|
+
continue;
|
|
2570
|
+
const jsonUtf8 = Buffer.from(JSON.stringify({ [field]: value }), 'utf8');
|
|
2571
|
+
appendStoredFieldJsonEntry(table, heapChunks, heapOffRef, i, jsonUtf8);
|
|
2572
|
+
}
|
|
2573
|
+
return heapChunks.length === 0 ? table : Buffer.concat([table, ...heapChunks]);
|
|
2574
|
+
}
|
|
2575
|
+
function storedFieldsTableEnd(storedOff, nextId, sectionEnd) {
|
|
2576
|
+
const tableEnd = storedOff + nextId * 4;
|
|
2577
|
+
if (tableEnd > sectionEnd) {
|
|
2578
|
+
throw invalidFrozenIndex('stored fields table out of bounds');
|
|
2579
|
+
}
|
|
2580
|
+
return tableEnd;
|
|
2581
|
+
}
|
|
2582
|
+
function readStoredFieldJsonAt(buf, tableEnd, sectionEnd, rel) {
|
|
2583
|
+
const entryOff = tableEnd + rel - 1;
|
|
2584
|
+
if (entryOff + 4 > sectionEnd) {
|
|
2585
|
+
throw invalidFrozenIndex('stored fields entry offset out of bounds');
|
|
2586
|
+
}
|
|
2587
|
+
const jsonLen = buf.readUInt32LE(entryOff);
|
|
2588
|
+
const jsonStart = entryOff + 4;
|
|
2589
|
+
const jsonEnd = jsonStart + jsonLen;
|
|
2590
|
+
if (jsonEnd > sectionEnd) {
|
|
2591
|
+
throw invalidFrozenIndex('stored fields JSON out of bounds');
|
|
2592
|
+
}
|
|
2593
|
+
return JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
|
|
2594
|
+
}
|
|
2595
|
+
/** MSv5 StoredFields section → layout (skips row materialization when storeFields hint allows). */
|
|
2596
|
+
function readStoredFieldsWireSection(buf, storedOff, nextId, sectionEnd, storeFields) {
|
|
2597
|
+
const tableEnd = storedFieldsTableEnd(storedOff, nextId, sectionEnd);
|
|
2598
|
+
if (storeFields.length === 1) {
|
|
2599
|
+
const field = storeFields[0];
|
|
2600
|
+
const values = new Array(nextId);
|
|
2601
|
+
for (let i = 0; i < nextId; i++) {
|
|
2602
|
+
const rel = buf.readUInt32LE(storedOff + i * 4);
|
|
2603
|
+
if (rel === 0)
|
|
2604
|
+
continue;
|
|
2605
|
+
const row = readStoredFieldJsonAt(buf, tableEnd, sectionEnd, rel);
|
|
2606
|
+
values[i] = row[field];
|
|
2607
|
+
}
|
|
2608
|
+
return { kind: 'single', field, values };
|
|
2609
|
+
}
|
|
2610
|
+
if (storeFields.length === 0) {
|
|
2611
|
+
let hasAny = false;
|
|
2612
|
+
for (let i = 0; i < nextId; i++) {
|
|
2613
|
+
if (buf.readUInt32LE(storedOff + i * 4) !== 0) {
|
|
2614
|
+
hasAny = true;
|
|
2615
|
+
break;
|
|
2616
|
+
}
|
|
2617
|
+
}
|
|
2618
|
+
if (!hasAny)
|
|
2619
|
+
return { kind: 'none' };
|
|
2620
|
+
}
|
|
2621
|
+
const rows = readStoredFieldsSection(buf, storedOff, nextId, sectionEnd);
|
|
2622
|
+
return storedFieldsFromRows(rows, storeFields);
|
|
2251
2623
|
}
|
|
2252
2624
|
|
|
2253
2625
|
const SUPPORTED_SERIALIZATION_VERSIONS = new Set([1, 2]);
|
|
@@ -2333,7 +2705,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2333
2705
|
let shortIdRemap = null;
|
|
2334
2706
|
const resolvedNextId = useDense ? documentCount : nextId;
|
|
2335
2707
|
const externalIds = new Array(resolvedNextId);
|
|
2336
|
-
const
|
|
2708
|
+
const storedFieldRows = new Array(externalIds.length);
|
|
2337
2709
|
if (useDense) {
|
|
2338
2710
|
shortIdRemap = new Uint32Array(nextId);
|
|
2339
2711
|
shortIdRemap.fill(DISCARDED_DOC_ID);
|
|
@@ -2345,7 +2717,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2345
2717
|
const shortIdStr = String(shortId);
|
|
2346
2718
|
shortIdRemap[shortId] = dense;
|
|
2347
2719
|
externalIds[dense] = snapshot.documentIds[shortIdStr];
|
|
2348
|
-
|
|
2720
|
+
storedFieldRows[dense] = snapshot.storedFields[shortIdStr];
|
|
2349
2721
|
dense++;
|
|
2350
2722
|
}
|
|
2351
2723
|
}
|
|
@@ -2353,7 +2725,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2353
2725
|
for (const [shortIdStr, id] of Object.entries(snapshot.documentIds)) {
|
|
2354
2726
|
const shortId = parseInt(shortIdStr, 10);
|
|
2355
2727
|
externalIds[shortId] = id;
|
|
2356
|
-
|
|
2728
|
+
storedFieldRows[shortId] = snapshot.storedFields[shortIdStr];
|
|
2357
2729
|
}
|
|
2358
2730
|
}
|
|
2359
2731
|
const idLookup = createIdToShortIdLookup(externalIds, resolvedNextId);
|
|
@@ -2376,6 +2748,7 @@ function buildFrozenAssembleParamsFromMiniSearchSnapshot(snapshot, options) {
|
|
|
2376
2748
|
}
|
|
2377
2749
|
const searchableMap = buildSearchableMapFromSnapshot(snapshot);
|
|
2378
2750
|
const flat = buildFlatPostingsFromSearchableMap(searchableMap, fieldCount, resolvedNextId, shortIdRemap);
|
|
2751
|
+
const storedFields = storedFieldsFromRows(storedFieldRows, opts.storeFields);
|
|
2379
2752
|
return {
|
|
2380
2753
|
options: opts,
|
|
2381
2754
|
documentCount,
|
|
@@ -2794,206 +3167,6 @@ function readMsv5GlobalFlags(buf) {
|
|
|
2794
3167
|
return buf.readUInt16LE(6);
|
|
2795
3168
|
}
|
|
2796
3169
|
|
|
2797
|
-
function validateTreeShape(shape, termCount) {
|
|
2798
|
-
if (!Array.isArray(shape)) {
|
|
2799
|
-
throw invalidFrozenIndex('treeShape node must be an array');
|
|
2800
|
-
}
|
|
2801
|
-
for (const entry of shape) {
|
|
2802
|
-
if (!Array.isArray(entry) || entry.length !== 2) {
|
|
2803
|
-
throw invalidFrozenIndex('treeShape entry must be a [key, value] pair');
|
|
2804
|
-
}
|
|
2805
|
-
const [key, value] = entry;
|
|
2806
|
-
if (key === LEAF) {
|
|
2807
|
-
const idx = value;
|
|
2808
|
-
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2809
|
-
throw invalidFrozenIndex(`treeShape leaf term index out of range: ${idx}`);
|
|
2810
|
-
}
|
|
2811
|
-
}
|
|
2812
|
-
else {
|
|
2813
|
-
validateTreeShape(value, termCount);
|
|
2814
|
-
}
|
|
2815
|
-
}
|
|
2816
|
-
}
|
|
2817
|
-
function termCountOf(snap) {
|
|
2818
|
-
return snap.postings.termCount;
|
|
2819
|
-
}
|
|
2820
|
-
/**
|
|
2821
|
-
* Numeric/structural invariants shared by both the decode path (untrusted binary)
|
|
2822
|
-
* and the build path (trusted internal code).
|
|
2823
|
-
*/
|
|
2824
|
-
function validateFrozenSnapshotNumeric(snap) {
|
|
2825
|
-
if (snap.fieldCount <= 0) {
|
|
2826
|
-
throw invalidFrozenIndex('fieldCount must be positive');
|
|
2827
|
-
}
|
|
2828
|
-
if (snap.nextId < 0 || snap.nextId >= 0xffffffff) {
|
|
2829
|
-
throw invalidFrozenIndex('nextId out of range');
|
|
2830
|
-
}
|
|
2831
|
-
if (snap.documentCount < 0 || snap.documentCount > snap.nextId) {
|
|
2832
|
-
throw invalidFrozenIndex('documentCount inconsistent with nextId');
|
|
2833
|
-
}
|
|
2834
|
-
if (snap.fieldLengthMatrix.length !== snap.nextId * snap.fieldCount) {
|
|
2835
|
-
throw invalidFrozenIndex('fieldLengthMatrix size mismatch');
|
|
2836
|
-
}
|
|
2837
|
-
if (snap.avgFieldLength.length !== snap.fieldCount) {
|
|
2838
|
-
throw invalidFrozenIndex('avgFieldLength size mismatch');
|
|
2839
|
-
}
|
|
2840
|
-
validateFrozenPostingsLayout(snap.postings, snap.documentCount, snap.nextId, detail => {
|
|
2841
|
-
throw invalidFrozenIndex(detail);
|
|
2842
|
-
});
|
|
2843
|
-
const indexedFields = Object.keys(snap.fieldIds);
|
|
2844
|
-
if (indexedFields.length !== snap.fieldCount) {
|
|
2845
|
-
throw invalidFrozenIndex('fieldIds count mismatch');
|
|
2846
|
-
}
|
|
2847
|
-
for (let f = 0; f < snap.fieldCount; f++) {
|
|
2848
|
-
const found = indexedFields.some(name => snap.fieldIds[name] === f);
|
|
2849
|
-
if (!found) {
|
|
2850
|
-
throw invalidFrozenIndex(`missing field id ${f}`);
|
|
2851
|
-
}
|
|
2852
|
-
}
|
|
2853
|
-
}
|
|
2854
|
-
function readFieldNamesSection(buf, fieldNamesOff, fieldCount, externalIdsOff) {
|
|
2855
|
-
const fieldNames = [];
|
|
2856
|
-
let o = fieldNamesOff;
|
|
2857
|
-
for (let f = 0; f < fieldCount; f++) {
|
|
2858
|
-
const { value, next } = readLengthPrefixedUtf8(buf, o);
|
|
2859
|
-
fieldNames.push(value);
|
|
2860
|
-
o = next;
|
|
2861
|
-
}
|
|
2862
|
-
if (o !== externalIdsOff) {
|
|
2863
|
-
throw invalidFrozenIndex('field names section size mismatch');
|
|
2864
|
-
}
|
|
2865
|
-
return fieldNames;
|
|
2866
|
-
}
|
|
2867
|
-
function readExternalIdsSection(buf, externalIdsOff, nextId, storedOff) {
|
|
2868
|
-
const externalIds = new Array(nextId);
|
|
2869
|
-
let o = externalIdsOff;
|
|
2870
|
-
for (let i = 0; i < nextId; i++) {
|
|
2871
|
-
const { value, next } = readExternalId(buf, o);
|
|
2872
|
-
externalIds[i] = value;
|
|
2873
|
-
o = next;
|
|
2874
|
-
}
|
|
2875
|
-
if (o !== storedOff) {
|
|
2876
|
-
throw invalidFrozenIndex('external ids section size mismatch');
|
|
2877
|
-
}
|
|
2878
|
-
return externalIds;
|
|
2879
|
-
}
|
|
2880
|
-
function readStoredFieldsSection(buf, storedOff, nextId, sectionEnd) {
|
|
2881
|
-
const storedFields = new Array(nextId);
|
|
2882
|
-
const tableEnd = storedOff + nextId * 4;
|
|
2883
|
-
if (tableEnd > sectionEnd) {
|
|
2884
|
-
throw invalidFrozenIndex('stored fields table out of bounds');
|
|
2885
|
-
}
|
|
2886
|
-
for (let i = 0; i < nextId; i++) {
|
|
2887
|
-
const rel = buf.readUInt32LE(storedOff + i * 4);
|
|
2888
|
-
if (rel === 0) {
|
|
2889
|
-
storedFields[i] = undefined;
|
|
2890
|
-
continue;
|
|
2891
|
-
}
|
|
2892
|
-
const entryOff = tableEnd + rel - 1;
|
|
2893
|
-
if (entryOff + 4 > sectionEnd) {
|
|
2894
|
-
throw invalidFrozenIndex('stored fields entry offset out of bounds');
|
|
2895
|
-
}
|
|
2896
|
-
const jsonLen = buf.readUInt32LE(entryOff);
|
|
2897
|
-
const jsonStart = entryOff + 4;
|
|
2898
|
-
const jsonEnd = jsonStart + jsonLen;
|
|
2899
|
-
if (jsonEnd > sectionEnd) {
|
|
2900
|
-
throw invalidFrozenIndex('stored fields JSON out of bounds');
|
|
2901
|
-
}
|
|
2902
|
-
storedFields[i] = JSON.parse(buf.toString('utf8', jsonStart, jsonEnd));
|
|
2903
|
-
}
|
|
2904
|
-
return storedFields;
|
|
2905
|
-
}
|
|
2906
|
-
/** Validate structural invariants of a decoded or assembled frozen snapshot. */
|
|
2907
|
-
function validateFrozenSnapshot(snap) {
|
|
2908
|
-
validateFrozenSnapshotNumeric(snap);
|
|
2909
|
-
const termCount = termCountOf(snap);
|
|
2910
|
-
if (snap.packedTermIndex != null) {
|
|
2911
|
-
validateFrozenTermIndexLeaves(snap.packedTermIndex, termCount);
|
|
2912
|
-
}
|
|
2913
|
-
else if (snap.termTree != null) {
|
|
2914
|
-
validateTermTreeLeaves(snap.termTree, termCount);
|
|
2915
|
-
}
|
|
2916
|
-
else {
|
|
2917
|
-
validateTreeShape(snap.treeShape, termCount);
|
|
2918
|
-
}
|
|
2919
|
-
}
|
|
2920
|
-
function fieldNamesFromFieldIds(fieldIds) {
|
|
2921
|
-
const names = Object.keys(fieldIds);
|
|
2922
|
-
names.sort((a, b) => fieldIds[a] - fieldIds[b]);
|
|
2923
|
-
return names;
|
|
2924
|
-
}
|
|
2925
|
-
/** Core with explicit {@link termCountOf} (no dictionary section). */
|
|
2926
|
-
function buildCoreSectionWithTermCount(snap) {
|
|
2927
|
-
const out = Buffer.alloc(16);
|
|
2928
|
-
out.writeUInt32LE(snap.documentCount, 0);
|
|
2929
|
-
out.writeUInt32LE(snap.nextId, 4);
|
|
2930
|
-
out.writeUInt32LE(snap.fieldCount, 8);
|
|
2931
|
-
out.writeUInt32LE(termCountOf(snap), 12);
|
|
2932
|
-
return out;
|
|
2933
|
-
}
|
|
2934
|
-
function buildFieldNamesSection(fieldNames) {
|
|
2935
|
-
const chunks = [];
|
|
2936
|
-
for (const name of fieldNames) {
|
|
2937
|
-
const body = Buffer.from(name, 'utf8');
|
|
2938
|
-
const header = Buffer.alloc(4);
|
|
2939
|
-
header.writeUInt32LE(body.length, 0);
|
|
2940
|
-
chunks.push(header, body);
|
|
2941
|
-
}
|
|
2942
|
-
return Buffer.concat(chunks);
|
|
2943
|
-
}
|
|
2944
|
-
function buildExternalIdsSection(externalIds, nextId) {
|
|
2945
|
-
const chunks = [];
|
|
2946
|
-
for (let i = 0; i < nextId; i++) {
|
|
2947
|
-
writeExternalId(chunks, externalIds[i]);
|
|
2948
|
-
}
|
|
2949
|
-
return Buffer.concat(chunks);
|
|
2950
|
-
}
|
|
2951
|
-
function buildStoredFieldsSection(storedFields, nextId) {
|
|
2952
|
-
const table = Buffer.alloc(nextId * 4);
|
|
2953
|
-
const heapChunks = [];
|
|
2954
|
-
let heapOff = 0;
|
|
2955
|
-
for (let i = 0; i < nextId; i++) {
|
|
2956
|
-
const row = storedFields[i];
|
|
2957
|
-
if (row == null) {
|
|
2958
|
-
table.writeUInt32LE(0, i * 4);
|
|
2959
|
-
continue;
|
|
2960
|
-
}
|
|
2961
|
-
table.writeUInt32LE(heapOff + 1, i * 4);
|
|
2962
|
-
const json = Buffer.from(JSON.stringify(row), 'utf8');
|
|
2963
|
-
const entry = Buffer.alloc(4 + json.length);
|
|
2964
|
-
entry.writeUInt32LE(json.length, 0);
|
|
2965
|
-
json.copy(entry, 4);
|
|
2966
|
-
heapChunks.push(entry);
|
|
2967
|
-
heapOff += entry.length;
|
|
2968
|
-
}
|
|
2969
|
-
return Buffer.concat([table, ...heapChunks]);
|
|
2970
|
-
}
|
|
2971
|
-
function validateTermTreeLeaves(tree, termCount) {
|
|
2972
|
-
for (const [key, val] of tree) {
|
|
2973
|
-
if (key === LEAF) {
|
|
2974
|
-
const idx = val;
|
|
2975
|
-
if (!Number.isInteger(idx) || idx < 0 || idx >= termCount) {
|
|
2976
|
-
throw invalidFrozenIndex(`term tree leaf index out of range: ${idx}`);
|
|
2977
|
-
}
|
|
2978
|
-
}
|
|
2979
|
-
else {
|
|
2980
|
-
validateTermTreeLeaves(val, termCount);
|
|
2981
|
-
}
|
|
2982
|
-
}
|
|
2983
|
-
}
|
|
2984
|
-
function deserializeTermIndexTree(shape) {
|
|
2985
|
-
const tree = new Map();
|
|
2986
|
-
for (const [key, value] of shape) {
|
|
2987
|
-
if (key === LEAF) {
|
|
2988
|
-
tree.set(LEAF, value);
|
|
2989
|
-
}
|
|
2990
|
-
else {
|
|
2991
|
-
tree.set(key, deserializeTermIndexTree(value));
|
|
2992
|
-
}
|
|
2993
|
-
}
|
|
2994
|
-
return tree;
|
|
2995
|
-
}
|
|
2996
|
-
|
|
2997
3170
|
/** Global wire flags for {@link FreqArray} width. */
|
|
2998
3171
|
function freqWireFlags(freqs) {
|
|
2999
3172
|
if (freqs instanceof Uint16Array)
|
|
@@ -3285,11 +3458,14 @@ function encodeFrozenSnapshotMsv5(snap, termTree, packedTermIndex) {
|
|
|
3285
3458
|
const flFlags = fieldLengthMatrixWireFlags(snap.fieldLengthMatrix);
|
|
3286
3459
|
const freqFlags = freqWireFlags(snap.postings.allFreqs);
|
|
3287
3460
|
const globalFlags = postingsWire.flags | flFlags | freqFlags;
|
|
3461
|
+
const storedFieldsSection = snap.storedFieldsLayout != null
|
|
3462
|
+
? buildStoredFieldsWireSection(snap.storedFieldsLayout, snap.nextId)
|
|
3463
|
+
: buildStoredFieldsSection(snap.storedFields, snap.nextId);
|
|
3288
3464
|
const rawSections = [
|
|
3289
3465
|
buildCoreSectionWithTermCount(snap),
|
|
3290
3466
|
buildFieldNamesSection(fieldNames),
|
|
3291
3467
|
buildExternalIdsSection(snap.externalIds, snap.nextId),
|
|
3292
|
-
|
|
3468
|
+
storedFieldsSection,
|
|
3293
3469
|
buildTermTreeSectionColumnar(packed),
|
|
3294
3470
|
bufferFromView(snap.avgFieldLength),
|
|
3295
3471
|
buildFieldLengthMatrixSection(snap.fieldLengthMatrix),
|
|
@@ -3313,11 +3489,14 @@ async function encodeFrozenSnapshotMsv5Async(snap, termTree, packedTermIndex) {
|
|
|
3313
3489
|
const flFlags = fieldLengthMatrixWireFlags(snap.fieldLengthMatrix);
|
|
3314
3490
|
const freqFlags = freqWireFlags(snap.postings.allFreqs);
|
|
3315
3491
|
const globalFlags = postingsWire.flags | flFlags | freqFlags;
|
|
3492
|
+
const storedFieldsSection = snap.storedFieldsLayout != null
|
|
3493
|
+
? buildStoredFieldsWireSection(snap.storedFieldsLayout, snap.nextId)
|
|
3494
|
+
: buildStoredFieldsSection(snap.storedFields, snap.nextId);
|
|
3316
3495
|
const rawSections = [
|
|
3317
3496
|
buildCoreSectionWithTermCount(snap),
|
|
3318
3497
|
buildFieldNamesSection(fieldNames),
|
|
3319
3498
|
buildExternalIdsSection(snap.externalIds, snap.nextId),
|
|
3320
|
-
|
|
3499
|
+
storedFieldsSection,
|
|
3321
3500
|
buildTermTreeSectionColumnar(packed),
|
|
3322
3501
|
bufferFromView(snap.avgFieldLength),
|
|
3323
3502
|
buildFieldLengthMatrixSection(snap.fieldLengthMatrix),
|
|
@@ -3347,7 +3526,7 @@ function validateMsv5Container(buf) {
|
|
|
3347
3526
|
}
|
|
3348
3527
|
return { globalFlags, directory };
|
|
3349
3528
|
}
|
|
3350
|
-
function decodeMsv5Sections(globalFlags, sections) {
|
|
3529
|
+
function decodeMsv5Sections(globalFlags, sections, hints) {
|
|
3351
3530
|
const core = sections[0 /* Msv5SectionId.Core */];
|
|
3352
3531
|
if (core.length !== 16) {
|
|
3353
3532
|
throw invalidFrozenIndex('core section size mismatch');
|
|
@@ -3362,7 +3541,12 @@ function decodeMsv5Sections(globalFlags, sections) {
|
|
|
3362
3541
|
fieldIds[fieldNames[f]] = f;
|
|
3363
3542
|
}
|
|
3364
3543
|
const externalIds = readExternalIdsSection(sections[2 /* Msv5SectionId.ExternalIds */], 0, nextId, sections[2 /* Msv5SectionId.ExternalIds */].length);
|
|
3365
|
-
const
|
|
3544
|
+
const storedFieldsLayout = hints != null
|
|
3545
|
+
? readStoredFieldsWireSection(sections[3 /* Msv5SectionId.StoredFields */], 0, nextId, sections[3 /* Msv5SectionId.StoredFields */].length, hints.storeFields)
|
|
3546
|
+
: undefined;
|
|
3547
|
+
const storedFields = storedFieldsLayout != null
|
|
3548
|
+
? new Array(nextId)
|
|
3549
|
+
: readStoredFieldsSection(sections[3 /* Msv5SectionId.StoredFields */], 0, nextId, sections[3 /* Msv5SectionId.StoredFields */].length);
|
|
3366
3550
|
const packedTermIndex = readPackedTermTreeSectionColumnar(sections[4 /* Msv5SectionId.TermTree */], termCount);
|
|
3367
3551
|
const avgBuf = sections[5 /* Msv5SectionId.AvgFieldLength */];
|
|
3368
3552
|
const avgFieldLength = readFloat32Array(avgBuf, 0, avgBuf.length);
|
|
@@ -3380,6 +3564,7 @@ function decodeMsv5Sections(globalFlags, sections) {
|
|
|
3380
3564
|
avgFieldLength,
|
|
3381
3565
|
externalIds,
|
|
3382
3566
|
storedFields,
|
|
3567
|
+
storedFieldsLayout,
|
|
3383
3568
|
fieldLengthMatrix,
|
|
3384
3569
|
treeShape: [],
|
|
3385
3570
|
packedTermIndex,
|
|
@@ -3388,13 +3573,13 @@ function decodeMsv5Sections(globalFlags, sections) {
|
|
|
3388
3573
|
validateFrozenSnapshot(snap);
|
|
3389
3574
|
return snap;
|
|
3390
3575
|
}
|
|
3391
|
-
function decodeFrozenSnapshotMsv5(buf) {
|
|
3576
|
+
function decodeFrozenSnapshotMsv5(buf, hints) {
|
|
3392
3577
|
const { globalFlags, directory } = validateMsv5Container(buf);
|
|
3393
|
-
return decodeMsv5Sections(globalFlags, loadMsv5Sections(buf, directory));
|
|
3578
|
+
return decodeMsv5Sections(globalFlags, loadMsv5Sections(buf, directory), hints);
|
|
3394
3579
|
}
|
|
3395
|
-
async function decodeFrozenSnapshotMsv5Async(buf) {
|
|
3580
|
+
async function decodeFrozenSnapshotMsv5Async(buf, hints) {
|
|
3396
3581
|
const { globalFlags, directory } = validateMsv5Container(buf);
|
|
3397
|
-
return decodeMsv5Sections(globalFlags, await loadMsv5SectionsAsync(buf, directory));
|
|
3582
|
+
return decodeMsv5Sections(globalFlags, await loadMsv5SectionsAsync(buf, directory), hints);
|
|
3398
3583
|
}
|
|
3399
3584
|
|
|
3400
3585
|
/** Encode a frozen snapshot as a binary buffer. */
|
|
@@ -3408,12 +3593,12 @@ function encodeFrozenSnapshotAsync(snap, termTree, packedTermIndex) {
|
|
|
3408
3593
|
|
|
3409
3594
|
const LEGACY_MAGICS = new Set(['MSv1', 'MSv2', 'MSv3', 'MSv4']);
|
|
3410
3595
|
/** Decode a frozen binary snapshot buffer. */
|
|
3411
|
-
function decodeFrozenSnapshot(buf) {
|
|
3596
|
+
function decodeFrozenSnapshot(buf, hints) {
|
|
3412
3597
|
assertBufferLength(buf, 8);
|
|
3413
3598
|
const magic = buf.toString('ascii', 0, 4);
|
|
3414
3599
|
const version = buf.readUInt16LE(4);
|
|
3415
3600
|
if (isMsv5Buffer(buf) && version === 5) {
|
|
3416
|
-
return decodeFrozenSnapshotMsv5(buf);
|
|
3601
|
+
return decodeFrozenSnapshotMsv5(buf, hints);
|
|
3417
3602
|
}
|
|
3418
3603
|
if (LEGACY_MAGICS.has(magic)) {
|
|
3419
3604
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot; re-build with saveBinarySync() or from lucaong JSON');
|
|
@@ -3421,13 +3606,13 @@ function decodeFrozenSnapshot(buf) {
|
|
|
3421
3606
|
throw invalidFrozenIndex('Unsupported frozen binary snapshot');
|
|
3422
3607
|
}
|
|
3423
3608
|
/** Async frozen snapshot decode (streaming zstd). */
|
|
3424
|
-
async function decodeFrozenSnapshotAsync(buf) {
|
|
3609
|
+
async function decodeFrozenSnapshotAsync(buf, hints) {
|
|
3425
3610
|
assertBufferLength(buf, 8);
|
|
3426
3611
|
const version = buf.readUInt16LE(4);
|
|
3427
3612
|
if (isMsv5Buffer(buf) && version === 5) {
|
|
3428
|
-
return decodeFrozenSnapshotMsv5Async(buf);
|
|
3613
|
+
return decodeFrozenSnapshotMsv5Async(buf, hints);
|
|
3429
3614
|
}
|
|
3430
|
-
return decodeFrozenSnapshot(buf);
|
|
3615
|
+
return decodeFrozenSnapshot(buf, hints);
|
|
3431
3616
|
}
|
|
3432
3617
|
|
|
3433
3618
|
const DEFAULT_CAPACITY = 16;
|
|
@@ -3689,14 +3874,13 @@ class FrozenIndexBuilder {
|
|
|
3689
3874
|
this._nextId = 0;
|
|
3690
3875
|
this._frozen = false;
|
|
3691
3876
|
const estimated = hints === null || hints === void 0 ? void 0 : hints.estimatedDocumentCount;
|
|
3877
|
+
this._storedFields = createStoredFieldsLayout(this._options.storeFields, estimated !== null && estimated !== void 0 ? estimated : 0);
|
|
3692
3878
|
if (estimated != null && estimated > 0) {
|
|
3693
3879
|
this._externalIds = new Array(estimated);
|
|
3694
|
-
this._storedFields = new Array(estimated);
|
|
3695
3880
|
this._fieldLengthData = new Array(estimated * this._fieldCount).fill(0);
|
|
3696
3881
|
}
|
|
3697
3882
|
else {
|
|
3698
3883
|
this._externalIds = [];
|
|
3699
|
-
this._storedFields = [];
|
|
3700
3884
|
this._fieldLengthData = [];
|
|
3701
3885
|
}
|
|
3702
3886
|
}
|
|
@@ -3719,7 +3903,7 @@ class FrozenIndexBuilder {
|
|
|
3719
3903
|
this._seenIds.add(id);
|
|
3720
3904
|
const shortId = this._nextId++;
|
|
3721
3905
|
this._externalIds[shortId] = id;
|
|
3722
|
-
this._storedFields
|
|
3906
|
+
writeStoredField(this._storedFields, shortId, storeFields, extractField, document);
|
|
3723
3907
|
const documentCount = shortId + 1;
|
|
3724
3908
|
for (const field of fields) {
|
|
3725
3909
|
const fieldValue = extractField(document, field);
|
|
@@ -3805,9 +3989,7 @@ class FrozenIndexBuilder {
|
|
|
3805
3989
|
const externalIds = this._externalIds.length > documentCount
|
|
3806
3990
|
? this._externalIds.slice(0, documentCount)
|
|
3807
3991
|
: this._externalIds;
|
|
3808
|
-
const storedFields = this._storedFields
|
|
3809
|
-
? this._storedFields.slice(0, documentCount)
|
|
3810
|
-
: this._storedFields;
|
|
3992
|
+
const storedFields = resizeStoredFields(this._storedFields, documentCount);
|
|
3811
3993
|
const idLookup = createIdToShortIdLookup(externalIds, documentCount);
|
|
3812
3994
|
return {
|
|
3813
3995
|
options: this._options,
|
|
@@ -4235,7 +4417,7 @@ function shallowCopyJsSnapshotFields(params) {
|
|
|
4235
4417
|
return {
|
|
4236
4418
|
fieldIds: { ...params.fieldIds },
|
|
4237
4419
|
options: shallowCopyOptions(params.options),
|
|
4238
|
-
storedFields: params.storedFields
|
|
4420
|
+
storedFields: cloneStoredFields(params.storedFields),
|
|
4239
4421
|
};
|
|
4240
4422
|
}
|
|
4241
4423
|
/**
|
|
@@ -4320,7 +4502,7 @@ class FrozenMiniSearch {
|
|
|
4320
4502
|
fieldIds: this._fieldIds,
|
|
4321
4503
|
getFieldLength: (docId, fieldId) => this.getFieldLength(docId, fieldId),
|
|
4322
4504
|
getExternalId: docId => this._externalIds[docId],
|
|
4323
|
-
getStoredFields: docId => this._storedFields
|
|
4505
|
+
getStoredFields: docId => readStoredFields(this._storedFields, docId),
|
|
4324
4506
|
};
|
|
4325
4507
|
this._queryEngineParams = {
|
|
4326
4508
|
fields: this._options.fields,
|
|
@@ -4332,7 +4514,7 @@ class FrozenMiniSearch {
|
|
|
4332
4514
|
const id = this._externalIds[shortId];
|
|
4333
4515
|
if (id === undefined)
|
|
4334
4516
|
continue;
|
|
4335
|
-
callback(shortId, id, this._storedFields
|
|
4517
|
+
callback(shortId, id, readStoredFields(this._storedFields, shortId));
|
|
4336
4518
|
}
|
|
4337
4519
|
}),
|
|
4338
4520
|
aggregateContext: this._aggregateContext,
|
|
@@ -4343,11 +4525,7 @@ class FrozenMiniSearch {
|
|
|
4343
4525
|
memoryBreakdown() {
|
|
4344
4526
|
const termCount = this.termCount;
|
|
4345
4527
|
const postingsStats = postingsTypedBytes(this._postings);
|
|
4346
|
-
|
|
4347
|
-
for (const row of this._storedFields) {
|
|
4348
|
-
if (row != null)
|
|
4349
|
-
storedJson += JSON.stringify(row).length;
|
|
4350
|
-
}
|
|
4528
|
+
const storedJson = storedFieldsJsonBytes(this._storedFields);
|
|
4351
4529
|
const radixEst = this._index.packedByteLength();
|
|
4352
4530
|
const idMapBytes = this._idLookup.mode === 'lazy-map' ? this._idLookup.mapEntryCount * 32 : 0;
|
|
4353
4531
|
const estimatedStructuredBytes = postingsStats.totalTypedBytes
|
|
@@ -4377,7 +4555,7 @@ class FrozenMiniSearch {
|
|
|
4377
4555
|
},
|
|
4378
4556
|
documents: {
|
|
4379
4557
|
externalIdsSlots: this._externalIds.length,
|
|
4380
|
-
storedFieldsSlots: this._storedFields
|
|
4558
|
+
storedFieldsSlots: storedFieldsSlotCount(this._storedFields),
|
|
4381
4559
|
idLookupMode: this._idLookup.mode,
|
|
4382
4560
|
idToShortIdEntries: this._idLookup.mapEntryCount,
|
|
4383
4561
|
fieldLengthMatrixBytes: this._fieldLengthMatrix.byteLength,
|
|
@@ -4392,10 +4570,10 @@ class FrozenMiniSearch {
|
|
|
4392
4570
|
}
|
|
4393
4571
|
getStoredFields(id) {
|
|
4394
4572
|
const shortId = this._idLookup.get(id);
|
|
4395
|
-
return shortId == null ? undefined : this._storedFields
|
|
4573
|
+
return shortId == null ? undefined : readStoredFields(this._storedFields, shortId);
|
|
4396
4574
|
}
|
|
4397
4575
|
search(query, searchOptions = {}) {
|
|
4398
|
-
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], docId => this._storedFields
|
|
4576
|
+
return finalizeRawSearchResults(this.executeQuery(query, searchOptions), query, searchOptions, this._options.searchOptions, docId => this._externalIds[docId], docId => readStoredFields(this._storedFields, docId));
|
|
4399
4577
|
}
|
|
4400
4578
|
autoSuggest(queryString, options = {}) {
|
|
4401
4579
|
const merged = { ...this._options.autoSuggestOptions, ...options };
|
|
@@ -4411,7 +4589,8 @@ class FrozenMiniSearch {
|
|
|
4411
4589
|
fieldNames: fieldNamesFromFieldIds(this._fieldIds),
|
|
4412
4590
|
avgFieldLength: this._avgFieldLength,
|
|
4413
4591
|
externalIds: this._externalIds,
|
|
4414
|
-
storedFields: this.
|
|
4592
|
+
storedFields: new Array(this._nextId),
|
|
4593
|
+
storedFieldsLayout: this._storedFields,
|
|
4415
4594
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4416
4595
|
treeShape: [],
|
|
4417
4596
|
postings: this._postings,
|
|
@@ -4427,7 +4606,8 @@ class FrozenMiniSearch {
|
|
|
4427
4606
|
fieldNames: fieldNamesFromFieldIds(this._fieldIds),
|
|
4428
4607
|
avgFieldLength: this._avgFieldLength,
|
|
4429
4608
|
externalIds: this._externalIds,
|
|
4430
|
-
storedFields: this.
|
|
4609
|
+
storedFields: new Array(this._nextId),
|
|
4610
|
+
storedFieldsLayout: this._storedFields,
|
|
4431
4611
|
fieldLengthMatrix: fieldLengthMatrixForWire(this._fieldLengthMatrix),
|
|
4432
4612
|
treeShape: [],
|
|
4433
4613
|
postings: this._postings,
|
|
@@ -4435,16 +4615,20 @@ class FrozenMiniSearch {
|
|
|
4435
4615
|
}
|
|
4436
4616
|
/** Load a frozen binary snapshot. */
|
|
4437
4617
|
static loadBinarySync(buffer, options = {}) {
|
|
4438
|
-
|
|
4618
|
+
var _a;
|
|
4619
|
+
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
|
4620
|
+
const snap = decodeFrozenSnapshot(buffer, { storeFields });
|
|
4439
4621
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4440
4622
|
}
|
|
4441
4623
|
/** Load a frozen binary snapshot with streaming zstd decompression (bounded memory). */
|
|
4442
4624
|
static async loadBinaryAsync(buffer, options = {}) {
|
|
4443
|
-
|
|
4625
|
+
var _a;
|
|
4626
|
+
const storeFields = (_a = options.storeFields) !== null && _a !== void 0 ? _a : defaultFrozenLoadOptions.storeFields;
|
|
4627
|
+
const snap = await decodeFrozenSnapshotAsync(buffer, { storeFields });
|
|
4444
4628
|
return FrozenMiniSearch.fromBinarySnapshot(snap, options);
|
|
4445
4629
|
}
|
|
4446
4630
|
static fromBinarySnapshot(snap, options) {
|
|
4447
|
-
var _a, _b;
|
|
4631
|
+
var _a, _b, _c;
|
|
4448
4632
|
const snapshotFields = (_a = snap.fieldNames) !== null && _a !== void 0 ? _a : fieldNamesFromFieldIds(snap.fieldIds);
|
|
4449
4633
|
if (options.fields != null) {
|
|
4450
4634
|
assertFieldsMatchSnapshot(options.fields, snap.fieldIds);
|
|
@@ -4472,7 +4656,7 @@ class FrozenMiniSearch {
|
|
|
4472
4656
|
fieldCount: snap.fieldCount,
|
|
4473
4657
|
externalIds: snap.externalIds,
|
|
4474
4658
|
idLookup,
|
|
4475
|
-
storedFields: snap.storedFields,
|
|
4659
|
+
storedFields: (_c = snap.storedFieldsLayout) !== null && _c !== void 0 ? _c : storedFieldsFromRows(snap.storedFields, opts.storeFields),
|
|
4476
4660
|
fieldLengthMatrix: snap.fieldLengthMatrix,
|
|
4477
4661
|
avgFieldLength: snap.avgFieldLength,
|
|
4478
4662
|
index,
|