@yoch/minisearch 8.0.0-beta.2 → 8.0.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +68 -6
- package/dist/cjs/index.cjs +156 -90
- package/dist/es/index.d.ts +42 -1
- package/dist/es/index.js +154 -91
- package/package.json +13 -13
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
`MiniSearch` follows [semantic versioning](https://semver.org/spec/v2.0.0.html).
|
|
4
4
|
|
|
5
|
+
## v8.0.0-beta.3
|
|
6
|
+
|
|
7
|
+
Incremental frozen index construction without a temporary `documents[]` array.
|
|
8
|
+
|
|
9
|
+
- Add `FrozenIndexBuilder` and `createFrozenIndexBuilder(options, hints?)` with `.add(doc)`
|
|
10
|
+
and optional `estimatedDocumentCount` pre-sizing
|
|
11
|
+
- Add `freezeFrozenIndexBuilder(builder)` to finalize into `FrozenMiniSearch` (avoids a
|
|
12
|
+
circular import between build and assembly modules)
|
|
13
|
+
- Add `FrozenMiniSearch.fromAsyncIterable(iterable, options)` for async document streams
|
|
14
|
+
(e.g. CSV parsers)
|
|
15
|
+
- Refactor `buildFrozenParamsFromDocuments` to use the builder internally (same output)
|
|
16
|
+
- Trim per-document arrays when `estimatedDocumentCount` exceeds the actual document count
|
|
17
|
+
- Export `FrozenIndexBuilderHints` type
|
|
18
|
+
|
|
5
19
|
## v8.0.0-beta.2
|
|
6
20
|
|
|
7
21
|
Consolidated beta on npm. Supersedes `8.0.0-beta.0` and `8.0.0-beta.1` (unpublished).
|
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
**In-memory full-text search for Node.js** — a fork of [MiniSearch](https://github.com/lucaong/minisearch) by [Luca Ongaro](https://github.com/lucaong/minisearch), extended for **production serving**: smaller indexes, faster loads, and a read-only fast path.
|
|
4
4
|
|
|
5
|
-
> **Current release:** `8.0.0-beta.
|
|
5
|
+
> **Current release:** `8.0.0-beta.3` · install with `npm install @yoch/minisearch`
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
@@ -74,17 +74,73 @@ const { FrozenMiniSearch } = require('@yoch/minisearch')
|
|
|
74
74
|
|------|-----|
|
|
75
75
|
| Live index that changes over time | `MiniSearch` → `freeze()` when you need read-only serving |
|
|
76
76
|
| Fixed corpus, build frozen directly | **`FrozenMiniSearch.fromDocuments(documents, options)`** |
|
|
77
|
+
| Build doc-by-doc (no `documents[]` buffer) | **`createFrozenIndexBuilder(options)`** → `.add(doc)` → **`freezeFrozenIndexBuilder(builder)`** |
|
|
78
|
+
| Async stream of documents | **`FrozenMiniSearch.fromAsyncIterable(iterable, options)`** |
|
|
77
79
|
| Load a snapshot from disk | `FrozenMiniSearch.loadBinary(buffer, options)` |
|
|
78
80
|
| Custom assembly pipeline | `buildFrozenFromDocuments`, `assembleFrozen`, `freezeFromMiniSearch` |
|
|
79
81
|
|
|
80
82
|
`fromDocuments` matches `new MiniSearch(opts).addAll(docs).freeze()` for search ranking on the same corpus and options (`fields`, `tokenize`, `processTerm`, …). Frozen indexes do not support `add` / `remove`.
|
|
81
83
|
|
|
84
|
+
**External corpus (e.g. lookup by id after search):** keep full rows in your own store (`dataCache`, DB, etc.) and use minimal `storeFields` (often `['id']` only) so the frozen index does not duplicate payload text:
|
|
85
|
+
|
|
86
|
+
```javascript
|
|
87
|
+
import { createFrozenIndexBuilder, freezeFrozenIndexBuilder } from '@yoch/minisearch'
|
|
88
|
+
|
|
89
|
+
function buildFrozenIndexFromRows (rows, options) {
|
|
90
|
+
const builder = createFrozenIndexBuilder(options, {
|
|
91
|
+
estimatedDocumentCount: rows.length
|
|
92
|
+
})
|
|
93
|
+
for (let i = 0; i < rows.length; i++) {
|
|
94
|
+
builder.add(buildIndexDocument(rows[i], i))
|
|
95
|
+
}
|
|
96
|
+
return freezeFrozenIndexBuilder(builder)
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// After search: enrich from your store — frozen.getStoredFields(res.id) or dataCache[type][res.id]
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**Async stream** (no intermediate array; documents are indexed as they arrive):
|
|
103
|
+
|
|
104
|
+
```javascript
|
|
105
|
+
import { createReadStream } from 'node:fs'
|
|
106
|
+
import { parse } from 'csv-parse'
|
|
107
|
+
import { FrozenMiniSearch } from '@yoch/minisearch'
|
|
108
|
+
|
|
109
|
+
async function buildFromCsv (path, options) {
|
|
110
|
+
async function * documents () {
|
|
111
|
+
const parser = createReadStream(path).pipe(parse({ columns: true }))
|
|
112
|
+
for await (const row of parser) {
|
|
113
|
+
yield { id: row.cis, denomination: row.denomination, /* … */ }
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return FrozenMiniSearch.fromAsyncIterable(documents(), options)
|
|
117
|
+
}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
For a **sync** iterable (`for...of` on an array or generator), use the builder directly:
|
|
121
|
+
|
|
122
|
+
```javascript
|
|
123
|
+
import { createFrozenIndexBuilder, freezeFrozenIndexBuilder } from '@yoch/minisearch'
|
|
124
|
+
|
|
125
|
+
const builder = createFrozenIndexBuilder(options)
|
|
126
|
+
for (const doc of documentGenerator()) {
|
|
127
|
+
builder.add(doc)
|
|
128
|
+
}
|
|
129
|
+
const frozen = freezeFrozenIndexBuilder(builder)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
`estimatedDocumentCount` in the second argument to `createFrozenIndexBuilder` pre-allocates
|
|
133
|
+
per-document arrays when the final size is known; internal buffers are trimmed to the actual
|
|
134
|
+
count on freeze if the hint was too large.
|
|
135
|
+
|
|
82
136
|
---
|
|
83
137
|
|
|
84
138
|
## FrozenMiniSearch in a bit more detail
|
|
85
139
|
|
|
86
140
|
- **`freeze()`** — snapshot a mutable index into compact typed postings + a radix tree keyed by term index.
|
|
87
141
|
- **`fromDocuments()`** — build that structure in one pass (skips nested `Map` postings and radix cloning at freeze time).
|
|
142
|
+
- **`createFrozenIndexBuilder()`** — same output without a temporary `documents[]` array; finalize with `freezeFrozenIndexBuilder(builder)` (or `assembleFrozen(builder.freezeParams())` for custom assembly).
|
|
143
|
+
- **`fromAsyncIterable()`** — async document stream (e.g. CSV parser) into a frozen index; equivalent to builder + `for await` + `freezeFrozenIndexBuilder`.
|
|
88
144
|
- **`saveBinary()` / `loadBinary()`** — MSv2 on write, MSv1 still readable; pass the same `fields` (and custom `tokenize` / `processTerm` if used at build time).
|
|
89
145
|
- **Term frequencies** — stored as `Uint8` (max 255 per doc/term); only affects scores for extreme term repetition.
|
|
90
146
|
- **`frozenMemoryBreakdown()`** — introspect postings, radix tree, and stored-field footprint.
|
|
@@ -94,6 +150,10 @@ Advanced exports:
|
|
|
94
150
|
```javascript
|
|
95
151
|
import {
|
|
96
152
|
FrozenMiniSearch,
|
|
153
|
+
createFrozenIndexBuilder,
|
|
154
|
+
freezeFrozenIndexBuilder,
|
|
155
|
+
FrozenIndexBuilder,
|
|
156
|
+
type FrozenIndexBuilderHints,
|
|
97
157
|
buildFrozenFromDocuments,
|
|
98
158
|
assembleFrozen,
|
|
99
159
|
freezeFromMiniSearch,
|
|
@@ -124,8 +184,8 @@ TypeScript definitions: `dist/es/index.d.ts`.
|
|
|
124
184
|
Reproducible comparisons (heap, load time, search latency) live under [`benchmarks/`](benchmarks/README.md):
|
|
125
185
|
|
|
126
186
|
```bash
|
|
127
|
-
|
|
128
|
-
|
|
187
|
+
npm run benchmark:compare # terminal report
|
|
188
|
+
npm run benchmark:diff # vs versioned baseline
|
|
129
189
|
```
|
|
130
190
|
|
|
131
191
|
---
|
|
@@ -133,11 +193,13 @@ yarn benchmark:diff # vs versioned baseline
|
|
|
133
193
|
## Development
|
|
134
194
|
|
|
135
195
|
```bash
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
196
|
+
npm install
|
|
197
|
+
npm test
|
|
198
|
+
npm run build
|
|
139
199
|
```
|
|
140
200
|
|
|
201
|
+
Use `npm run` for scripts (Yarn 1.x on Node 22 prints `url.parse` deprecation noise when invoking `yarn test` / `yarn build`).
|
|
202
|
+
|
|
141
203
|
**Requirements:** Node.js **ES2018+**. No browser UMD/CDN build in this fork (Node-only ESM + CJS).
|
|
142
204
|
|
|
143
205
|
---
|
package/dist/cjs/index.cjs
CHANGED
|
@@ -1085,41 +1085,41 @@ function saveStoredFieldsForDocument(storeFields, extractField, document) {
|
|
|
1085
1085
|
return documentFields;
|
|
1086
1086
|
}
|
|
1087
1087
|
|
|
1088
|
-
function getOrCreateTermIndex(
|
|
1089
|
-
const existing =
|
|
1088
|
+
function getOrCreateTermIndex(state, index, term) {
|
|
1089
|
+
const existing = index.get(term);
|
|
1090
1090
|
if (existing != null)
|
|
1091
1091
|
return existing;
|
|
1092
|
-
const ti =
|
|
1093
|
-
|
|
1094
|
-
|
|
1092
|
+
const ti = state.terms.length;
|
|
1093
|
+
state.terms.push(term);
|
|
1094
|
+
index.set(term, ti);
|
|
1095
1095
|
return ti;
|
|
1096
1096
|
}
|
|
1097
|
-
function appendPosting(
|
|
1098
|
-
const slot = termIndex *
|
|
1099
|
-
let docIds =
|
|
1100
|
-
let freqs =
|
|
1097
|
+
function appendPosting(state, termIndex, fieldId, docId, freq) {
|
|
1098
|
+
const slot = termIndex * state.fieldCount + fieldId;
|
|
1099
|
+
let docIds = state.postingsDocIds[slot];
|
|
1100
|
+
let freqs = state.postingsFreqs[slot];
|
|
1101
1101
|
if (docIds == null) {
|
|
1102
1102
|
docIds = [];
|
|
1103
1103
|
freqs = [];
|
|
1104
|
-
|
|
1105
|
-
|
|
1104
|
+
state.postingsDocIds[slot] = docIds;
|
|
1105
|
+
state.postingsFreqs[slot] = freqs;
|
|
1106
1106
|
}
|
|
1107
1107
|
docIds.push(docId);
|
|
1108
1108
|
freqs.push(clampFreq(freq));
|
|
1109
1109
|
}
|
|
1110
|
-
function finalizeFlatPostings(
|
|
1111
|
-
const termCount =
|
|
1112
|
-
const slotCount = termCount *
|
|
1110
|
+
function finalizeFlatPostings(state) {
|
|
1111
|
+
const termCount = state.terms.length;
|
|
1112
|
+
const slotCount = termCount * state.fieldCount;
|
|
1113
1113
|
const postingsOffsets = new Uint32Array(slotCount);
|
|
1114
1114
|
const postingsLengths = new Uint32Array(slotCount);
|
|
1115
1115
|
const docScratch = [];
|
|
1116
1116
|
const freqScratch = [];
|
|
1117
1117
|
for (let ti = 0; ti < termCount; ti++) {
|
|
1118
|
-
const base = ti *
|
|
1119
|
-
for (let f = 0; f <
|
|
1118
|
+
const base = ti * state.fieldCount;
|
|
1119
|
+
for (let f = 0; f < state.fieldCount; f++) {
|
|
1120
1120
|
const offset = docScratch.length;
|
|
1121
|
-
const docIds =
|
|
1122
|
-
const freqs =
|
|
1121
|
+
const docIds = state.postingsDocIds[base + f];
|
|
1122
|
+
const freqs = state.postingsFreqs[base + f];
|
|
1123
1123
|
if (docIds == null || docIds.length === 0) {
|
|
1124
1124
|
postingsOffsets[base + f] = offset;
|
|
1125
1125
|
postingsLengths[base + f] = 0;
|
|
@@ -1140,84 +1140,132 @@ function finalizeFlatPostings(builder) {
|
|
|
1140
1140
|
allFreqs: new Uint8Array(freqScratch)
|
|
1141
1141
|
};
|
|
1142
1142
|
}
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
const
|
|
1158
|
-
if (
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
const ti = getOrCreateTermIndex(builder, term);
|
|
1168
|
-
appendPosting(builder, ti, fieldId, shortId, freq);
|
|
1143
|
+
/** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
|
|
1144
|
+
class FrozenIndexBuilder {
|
|
1145
|
+
constructor(options, hints) {
|
|
1146
|
+
this._options = resolveIndexingOptions(options);
|
|
1147
|
+
this._fieldIds = buildFieldIds(this._options.fields);
|
|
1148
|
+
this._fieldCount = this._options.fields.length;
|
|
1149
|
+
this._index = new SearchableMap();
|
|
1150
|
+
this._terms = [];
|
|
1151
|
+
this._postingsDocIds = [];
|
|
1152
|
+
this._postingsFreqs = [];
|
|
1153
|
+
this._idToShortId = new Map();
|
|
1154
|
+
this._avgFieldLength = [];
|
|
1155
|
+
this._nextId = 0;
|
|
1156
|
+
this._frozen = false;
|
|
1157
|
+
const estimated = hints === null || hints === void 0 ? void 0 : hints.estimatedDocumentCount;
|
|
1158
|
+
if (estimated != null && estimated > 0) {
|
|
1159
|
+
this._externalIds = new Array(estimated);
|
|
1160
|
+
this._storedFields = new Array(estimated);
|
|
1161
|
+
this._fieldLengthData = new Array(estimated * this._fieldCount).fill(0);
|
|
1162
|
+
}
|
|
1163
|
+
else {
|
|
1164
|
+
this._externalIds = [];
|
|
1165
|
+
this._storedFields = [];
|
|
1166
|
+
this._fieldLengthData = [];
|
|
1169
1167
|
}
|
|
1168
|
+
this._postingsState = {
|
|
1169
|
+
fieldCount: this._fieldCount,
|
|
1170
|
+
terms: this._terms,
|
|
1171
|
+
postingsDocIds: this._postingsDocIds,
|
|
1172
|
+
postingsFreqs: this._postingsFreqs
|
|
1173
|
+
};
|
|
1174
|
+
}
|
|
1175
|
+
/** Number of documents indexed so far (not yet frozen). */
|
|
1176
|
+
get documentCount() {
|
|
1177
|
+
return this._nextId;
|
|
1178
|
+
}
|
|
1179
|
+
add(document) {
|
|
1180
|
+
if (this._frozen) {
|
|
1181
|
+
throw new Error('FrozenIndexBuilder: cannot add after freezeParams()');
|
|
1182
|
+
}
|
|
1183
|
+
const { extractField, stringifyField, tokenize, processTerm, fields, idField, storeFields } = this._options;
|
|
1184
|
+
const id = extractField(document, idField);
|
|
1185
|
+
if (id == null) {
|
|
1186
|
+
throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
|
|
1187
|
+
}
|
|
1188
|
+
if (this._idToShortId.has(id)) {
|
|
1189
|
+
throw new Error(`MiniSearch: duplicate ID ${id}`);
|
|
1190
|
+
}
|
|
1191
|
+
const shortId = this._nextId++;
|
|
1192
|
+
this._idToShortId.set(id, shortId);
|
|
1193
|
+
this._externalIds[shortId] = id;
|
|
1194
|
+
this._storedFields[shortId] = saveStoredFieldsForDocument(storeFields, extractField, document);
|
|
1195
|
+
const documentCount = shortId + 1;
|
|
1196
|
+
for (const field of fields) {
|
|
1197
|
+
const fieldValue = extractField(document, field);
|
|
1198
|
+
if (fieldValue == null)
|
|
1199
|
+
continue;
|
|
1200
|
+
const tokens = tokenize(stringifyField(fieldValue, field), field);
|
|
1201
|
+
const fieldId = this._fieldIds[field];
|
|
1202
|
+
const uniqueTerms = new Set(tokens).size;
|
|
1203
|
+
const localFreqs = collectFieldTermFreqs(tokens, field, processTerm);
|
|
1204
|
+
this._fieldLengthData[shortId * this._fieldCount + fieldId] = uniqueTerms;
|
|
1205
|
+
updateAvgFieldLength(this._avgFieldLength, fieldId, documentCount - 1, uniqueTerms);
|
|
1206
|
+
for (const [term, freq] of localFreqs) {
|
|
1207
|
+
const ti = getOrCreateTermIndex(this._postingsState, this._index, term);
|
|
1208
|
+
appendPosting(this._postingsState, ti, fieldId, shortId, freq);
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
/**
|
|
1213
|
+
* Finalize this builder into assembly params. Call {@link assembleFrozen} or
|
|
1214
|
+
* {@link freezeFrozenIndexBuilder} to obtain a {@link FrozenMiniSearch} instance.
|
|
1215
|
+
*/
|
|
1216
|
+
freezeParams() {
|
|
1217
|
+
var _a;
|
|
1218
|
+
if (this._frozen) {
|
|
1219
|
+
throw new Error('FrozenIndexBuilder: freezeParams() already called');
|
|
1220
|
+
}
|
|
1221
|
+
this._frozen = true;
|
|
1222
|
+
const documentCount = this._nextId;
|
|
1223
|
+
const flat = finalizeFlatPostings(this._postingsState);
|
|
1224
|
+
const avgFieldLength = new Float32Array(this._fieldCount);
|
|
1225
|
+
for (let f = 0; f < this._fieldCount; f++) {
|
|
1226
|
+
avgFieldLength[f] = (_a = this._avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
|
|
1227
|
+
}
|
|
1228
|
+
// Ensure exact size regardless of over- or under-estimated documentCount.
|
|
1229
|
+
this._fieldLengthData.length = documentCount * this._fieldCount;
|
|
1230
|
+
// Trim per-document arrays to actual count when estimatedDocumentCount was too large.
|
|
1231
|
+
const externalIds = this._externalIds.length > documentCount
|
|
1232
|
+
? this._externalIds.slice(0, documentCount)
|
|
1233
|
+
: this._externalIds;
|
|
1234
|
+
const storedFields = this._storedFields.length > documentCount
|
|
1235
|
+
? this._storedFields.slice(0, documentCount)
|
|
1236
|
+
: this._storedFields;
|
|
1237
|
+
return {
|
|
1238
|
+
options: this._options,
|
|
1239
|
+
documentCount,
|
|
1240
|
+
nextId: documentCount,
|
|
1241
|
+
fieldIds: this._fieldIds,
|
|
1242
|
+
fieldCount: this._fieldCount,
|
|
1243
|
+
externalIds,
|
|
1244
|
+
idToShortId: this._idToShortId,
|
|
1245
|
+
storedFields,
|
|
1246
|
+
fieldLengthMatrix: new Uint32Array(this._fieldLengthData),
|
|
1247
|
+
avgFieldLength,
|
|
1248
|
+
index: this._index,
|
|
1249
|
+
terms: this._terms,
|
|
1250
|
+
postingsOffsets: flat.postingsOffsets,
|
|
1251
|
+
postingsLengths: flat.postingsLengths,
|
|
1252
|
+
allDocIds: flat.allDocIds,
|
|
1253
|
+
allFreqs: flat.allFreqs
|
|
1254
|
+
};
|
|
1170
1255
|
}
|
|
1171
1256
|
}
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
return
|
|
1175
|
-
options,
|
|
1176
|
-
fieldIds: buildFieldIds(options.fields),
|
|
1177
|
-
fieldCount,
|
|
1178
|
-
documentCount,
|
|
1179
|
-
index: new SearchableMap(),
|
|
1180
|
-
terms: [],
|
|
1181
|
-
postingsDocIds: [],
|
|
1182
|
-
postingsFreqs: [],
|
|
1183
|
-
externalIds: new Array(documentCount),
|
|
1184
|
-
idToShortId: new Map(),
|
|
1185
|
-
storedFields: new Array(documentCount),
|
|
1186
|
-
fieldLengthMatrix: new Uint32Array(documentCount * fieldCount),
|
|
1187
|
-
avgFieldLength: []
|
|
1188
|
-
};
|
|
1257
|
+
/** Create an incremental builder for {@link FrozenMiniSearch}. */
|
|
1258
|
+
function createFrozenIndexBuilder(options, hints) {
|
|
1259
|
+
return new FrozenIndexBuilder(options, hints);
|
|
1189
1260
|
}
|
|
1190
1261
|
function buildFrozenParamsFromDocuments(documents, options) {
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
indexDocument(builder, documents[d], d);
|
|
1197
|
-
}
|
|
1198
|
-
const flat = finalizeFlatPostings(builder);
|
|
1199
|
-
const avgFieldLength = new Float32Array(builder.fieldCount);
|
|
1200
|
-
for (let f = 0; f < builder.fieldCount; f++) {
|
|
1201
|
-
avgFieldLength[f] = (_a = builder.avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
|
|
1262
|
+
const builder = createFrozenIndexBuilder(options, {
|
|
1263
|
+
estimatedDocumentCount: documents.length
|
|
1264
|
+
});
|
|
1265
|
+
for (let d = 0; d < documents.length; d++) {
|
|
1266
|
+
builder.add(documents[d]);
|
|
1202
1267
|
}
|
|
1203
|
-
return
|
|
1204
|
-
options: resolved,
|
|
1205
|
-
documentCount,
|
|
1206
|
-
nextId: documentCount,
|
|
1207
|
-
fieldIds: builder.fieldIds,
|
|
1208
|
-
fieldCount: builder.fieldCount,
|
|
1209
|
-
externalIds: builder.externalIds,
|
|
1210
|
-
idToShortId: builder.idToShortId,
|
|
1211
|
-
storedFields: builder.storedFields,
|
|
1212
|
-
fieldLengthMatrix: builder.fieldLengthMatrix,
|
|
1213
|
-
avgFieldLength,
|
|
1214
|
-
index: builder.index,
|
|
1215
|
-
terms: builder.terms,
|
|
1216
|
-
postingsOffsets: flat.postingsOffsets,
|
|
1217
|
-
postingsLengths: flat.postingsLengths,
|
|
1218
|
-
allDocIds: flat.allDocIds,
|
|
1219
|
-
allFreqs: flat.allFreqs
|
|
1220
|
-
};
|
|
1268
|
+
return builder.freezeParams();
|
|
1221
1269
|
}
|
|
1222
1270
|
|
|
1223
1271
|
/** Shared wildcard query symbol for MiniSearch and FrozenMiniSearch */
|
|
@@ -1370,6 +1418,10 @@ function freezeFromMiniSearch(source) {
|
|
|
1370
1418
|
function buildFrozenFromDocuments(documents, options) {
|
|
1371
1419
|
return assembleFrozen(buildFrozenParamsFromDocuments(documents, options));
|
|
1372
1420
|
}
|
|
1421
|
+
/** Finalize a {@link FrozenIndexBuilder} into a read-only index. */
|
|
1422
|
+
function freezeFrozenIndexBuilder(builder) {
|
|
1423
|
+
return assembleFrozen(builder.freezeParams());
|
|
1424
|
+
}
|
|
1373
1425
|
class FrozenMiniSearch {
|
|
1374
1426
|
constructor(params) {
|
|
1375
1427
|
this._options = params.options;
|
|
@@ -1560,6 +1612,17 @@ class FrozenMiniSearch {
|
|
|
1560
1612
|
static fromDocuments(documents, options) {
|
|
1561
1613
|
return buildFrozenFromDocuments(documents, options);
|
|
1562
1614
|
}
|
|
1615
|
+
/**
|
|
1616
|
+
* Build a read-only index from an async stream of documents (e.g. CSV parser).
|
|
1617
|
+
* For sync iterables, use {@link createFrozenIndexBuilder} with `for...of` instead.
|
|
1618
|
+
*/
|
|
1619
|
+
static async fromAsyncIterable(iterable, options) {
|
|
1620
|
+
const builder = createFrozenIndexBuilder(options);
|
|
1621
|
+
for await (const document of iterable) {
|
|
1622
|
+
builder.add(document);
|
|
1623
|
+
}
|
|
1624
|
+
return freezeFrozenIndexBuilder(builder);
|
|
1625
|
+
}
|
|
1563
1626
|
getFieldLength(docId, fieldId) {
|
|
1564
1627
|
var _a;
|
|
1565
1628
|
return (_a = this._fieldLengthMatrix[docId * this._fieldCount + fieldId]) !== null && _a !== void 0 ? _a : 0;
|
|
@@ -2952,10 +3015,13 @@ const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
|
2952
3015
|
|
|
2953
3016
|
exports.AND = AND;
|
|
2954
3017
|
exports.AND_NOT = AND_NOT;
|
|
3018
|
+
exports.FrozenIndexBuilder = FrozenIndexBuilder;
|
|
2955
3019
|
exports.FrozenMiniSearch = FrozenMiniSearch;
|
|
2956
3020
|
exports.OR = OR;
|
|
2957
3021
|
exports.assembleFrozen = assembleFrozen;
|
|
2958
3022
|
exports.buildFrozenFromDocuments = buildFrozenFromDocuments;
|
|
3023
|
+
exports.createFrozenIndexBuilder = createFrozenIndexBuilder;
|
|
2959
3024
|
exports.default = MiniSearch;
|
|
2960
3025
|
exports.freezeFromMiniSearch = freezeFromMiniSearch;
|
|
3026
|
+
exports.freezeFrozenIndexBuilder = freezeFrozenIndexBuilder;
|
|
2961
3027
|
exports.frozenMemoryBreakdown = frozenMemoryBreakdown;
|
package/dist/es/index.d.ts
CHANGED
|
@@ -254,6 +254,40 @@ declare class SearchableMap<T = any> {
|
|
|
254
254
|
}): SearchableMap<any>;
|
|
255
255
|
}
|
|
256
256
|
|
|
257
|
+
interface FrozenIndexBuilderHints {
|
|
258
|
+
/** Pre-size per-document arrays when the final document count is known. */
|
|
259
|
+
estimatedDocumentCount?: number;
|
|
260
|
+
}
|
|
261
|
+
/** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
|
|
262
|
+
declare class FrozenIndexBuilder<T> {
|
|
263
|
+
private readonly _options;
|
|
264
|
+
private readonly _fieldIds;
|
|
265
|
+
private readonly _fieldCount;
|
|
266
|
+
private readonly _index;
|
|
267
|
+
private readonly _terms;
|
|
268
|
+
private readonly _postingsDocIds;
|
|
269
|
+
private readonly _postingsFreqs;
|
|
270
|
+
private readonly _externalIds;
|
|
271
|
+
private readonly _idToShortId;
|
|
272
|
+
private readonly _storedFields;
|
|
273
|
+
private readonly _fieldLengthData;
|
|
274
|
+
private readonly _avgFieldLength;
|
|
275
|
+
private readonly _postingsState;
|
|
276
|
+
private _nextId;
|
|
277
|
+
private _frozen;
|
|
278
|
+
constructor(options: Options<T>, hints?: FrozenIndexBuilderHints);
|
|
279
|
+
/** Number of documents indexed so far (not yet frozen). */
|
|
280
|
+
get documentCount(): number;
|
|
281
|
+
add(document: T): void;
|
|
282
|
+
/**
|
|
283
|
+
* Finalize this builder into assembly params. Call {@link assembleFrozen} or
|
|
284
|
+
* {@link freezeFrozenIndexBuilder} to obtain a {@link FrozenMiniSearch} instance.
|
|
285
|
+
*/
|
|
286
|
+
freezeParams(): FrozenAssembleParams<T>;
|
|
287
|
+
}
|
|
288
|
+
/** Create an incremental builder for {@link FrozenMiniSearch}. */
|
|
289
|
+
declare function createFrozenIndexBuilder<T>(options: Options<T>, hints?: FrozenIndexBuilderHints): FrozenIndexBuilder<T>;
|
|
290
|
+
|
|
257
291
|
/** Shared wildcard query symbol for MiniSearch and FrozenMiniSearch */
|
|
258
292
|
declare const WILDCARD_QUERY: unique symbol;
|
|
259
293
|
|
|
@@ -345,6 +379,8 @@ interface FrozenAssembleParams<T = any> {
|
|
|
345
379
|
declare function assembleFrozen<T>(params: FrozenAssembleParams<T>): FrozenMiniSearch<T>;
|
|
346
380
|
declare function freezeFromMiniSearch<T>(source: FreezeSource<T>): FrozenMiniSearch<T>;
|
|
347
381
|
declare function buildFrozenFromDocuments<T>(documents: readonly T[], options: Options<T>): FrozenMiniSearch<T>;
|
|
382
|
+
/** Finalize a {@link FrozenIndexBuilder} into a read-only index. */
|
|
383
|
+
declare function freezeFrozenIndexBuilder<T>(builder: FrozenIndexBuilder<T>): FrozenMiniSearch<T>;
|
|
348
384
|
declare class FrozenMiniSearch<T = any> {
|
|
349
385
|
private readonly _options;
|
|
350
386
|
private readonly _index;
|
|
@@ -408,6 +444,11 @@ declare class FrozenMiniSearch<T = any> {
|
|
|
408
444
|
* incremental updates before freezing.
|
|
409
445
|
*/
|
|
410
446
|
static fromDocuments<T>(documents: readonly T[], options: Options<T>): FrozenMiniSearch<T>;
|
|
447
|
+
/**
|
|
448
|
+
* Build a read-only index from an async stream of documents (e.g. CSV parser).
|
|
449
|
+
* For sync iterables, use {@link createFrozenIndexBuilder} with `for...of` instead.
|
|
450
|
+
*/
|
|
451
|
+
static fromAsyncIterable<T>(iterable: AsyncIterable<T>, options: Options<T>): Promise<FrozenMiniSearch<T>>;
|
|
411
452
|
private getFieldLength;
|
|
412
453
|
private fieldTermDataFor;
|
|
413
454
|
private aggregateContext;
|
|
@@ -1632,4 +1673,4 @@ interface SerializedIndexEntry {
|
|
|
1632
1673
|
[key: string]: number;
|
|
1633
1674
|
}
|
|
1634
1675
|
|
|
1635
|
-
export { AND, AND_NOT, type AsPlainObject, type AutoVacuumOptions, type BM25Params, type CombinationOperator, type FrozenAssembleParams, type FrozenMemoryBreakdown, FrozenMiniSearch, type LowercaseCombinationOperator, type MatchInfo, OR, type Options, type Query, type QueryCombination, type SearchOptions, type SearchResult, type Suggestion, type VacuumConditions, type VacuumOptions, type Wildcard, assembleFrozen, buildFrozenFromDocuments, MiniSearch as default, freezeFromMiniSearch, frozenMemoryBreakdown };
|
|
1676
|
+
export { AND, AND_NOT, type AsPlainObject, type AutoVacuumOptions, type BM25Params, type CombinationOperator, type FrozenAssembleParams, FrozenIndexBuilder, type FrozenIndexBuilderHints, type FrozenMemoryBreakdown, FrozenMiniSearch, type LowercaseCombinationOperator, type MatchInfo, OR, type Options, type Query, type QueryCombination, type SearchOptions, type SearchResult, type Suggestion, type VacuumConditions, type VacuumOptions, type Wildcard, assembleFrozen, buildFrozenFromDocuments, createFrozenIndexBuilder, MiniSearch as default, freezeFromMiniSearch, freezeFrozenIndexBuilder, frozenMemoryBreakdown };
|
package/dist/es/index.js
CHANGED
|
@@ -1081,41 +1081,41 @@ function saveStoredFieldsForDocument(storeFields, extractField, document) {
|
|
|
1081
1081
|
return documentFields;
|
|
1082
1082
|
}
|
|
1083
1083
|
|
|
1084
|
-
function getOrCreateTermIndex(
|
|
1085
|
-
const existing =
|
|
1084
|
+
function getOrCreateTermIndex(state, index, term) {
|
|
1085
|
+
const existing = index.get(term);
|
|
1086
1086
|
if (existing != null)
|
|
1087
1087
|
return existing;
|
|
1088
|
-
const ti =
|
|
1089
|
-
|
|
1090
|
-
|
|
1088
|
+
const ti = state.terms.length;
|
|
1089
|
+
state.terms.push(term);
|
|
1090
|
+
index.set(term, ti);
|
|
1091
1091
|
return ti;
|
|
1092
1092
|
}
|
|
1093
|
-
function appendPosting(
|
|
1094
|
-
const slot = termIndex *
|
|
1095
|
-
let docIds =
|
|
1096
|
-
let freqs =
|
|
1093
|
+
function appendPosting(state, termIndex, fieldId, docId, freq) {
|
|
1094
|
+
const slot = termIndex * state.fieldCount + fieldId;
|
|
1095
|
+
let docIds = state.postingsDocIds[slot];
|
|
1096
|
+
let freqs = state.postingsFreqs[slot];
|
|
1097
1097
|
if (docIds == null) {
|
|
1098
1098
|
docIds = [];
|
|
1099
1099
|
freqs = [];
|
|
1100
|
-
|
|
1101
|
-
|
|
1100
|
+
state.postingsDocIds[slot] = docIds;
|
|
1101
|
+
state.postingsFreqs[slot] = freqs;
|
|
1102
1102
|
}
|
|
1103
1103
|
docIds.push(docId);
|
|
1104
1104
|
freqs.push(clampFreq(freq));
|
|
1105
1105
|
}
|
|
1106
|
-
function finalizeFlatPostings(
|
|
1107
|
-
const termCount =
|
|
1108
|
-
const slotCount = termCount *
|
|
1106
|
+
function finalizeFlatPostings(state) {
|
|
1107
|
+
const termCount = state.terms.length;
|
|
1108
|
+
const slotCount = termCount * state.fieldCount;
|
|
1109
1109
|
const postingsOffsets = new Uint32Array(slotCount);
|
|
1110
1110
|
const postingsLengths = new Uint32Array(slotCount);
|
|
1111
1111
|
const docScratch = [];
|
|
1112
1112
|
const freqScratch = [];
|
|
1113
1113
|
for (let ti = 0; ti < termCount; ti++) {
|
|
1114
|
-
const base = ti *
|
|
1115
|
-
for (let f = 0; f <
|
|
1114
|
+
const base = ti * state.fieldCount;
|
|
1115
|
+
for (let f = 0; f < state.fieldCount; f++) {
|
|
1116
1116
|
const offset = docScratch.length;
|
|
1117
|
-
const docIds =
|
|
1118
|
-
const freqs =
|
|
1117
|
+
const docIds = state.postingsDocIds[base + f];
|
|
1118
|
+
const freqs = state.postingsFreqs[base + f];
|
|
1119
1119
|
if (docIds == null || docIds.length === 0) {
|
|
1120
1120
|
postingsOffsets[base + f] = offset;
|
|
1121
1121
|
postingsLengths[base + f] = 0;
|
|
@@ -1136,84 +1136,132 @@ function finalizeFlatPostings(builder) {
|
|
|
1136
1136
|
allFreqs: new Uint8Array(freqScratch)
|
|
1137
1137
|
};
|
|
1138
1138
|
}
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
const
|
|
1154
|
-
if (
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
const ti = getOrCreateTermIndex(builder, term);
|
|
1164
|
-
appendPosting(builder, ti, fieldId, shortId, freq);
|
|
1139
|
+
/** Incremental builder for {@link FrozenMiniSearch} without materializing a full `documents[]` array. */
|
|
1140
|
+
class FrozenIndexBuilder {
|
|
1141
|
+
constructor(options, hints) {
|
|
1142
|
+
this._options = resolveIndexingOptions(options);
|
|
1143
|
+
this._fieldIds = buildFieldIds(this._options.fields);
|
|
1144
|
+
this._fieldCount = this._options.fields.length;
|
|
1145
|
+
this._index = new SearchableMap();
|
|
1146
|
+
this._terms = [];
|
|
1147
|
+
this._postingsDocIds = [];
|
|
1148
|
+
this._postingsFreqs = [];
|
|
1149
|
+
this._idToShortId = new Map();
|
|
1150
|
+
this._avgFieldLength = [];
|
|
1151
|
+
this._nextId = 0;
|
|
1152
|
+
this._frozen = false;
|
|
1153
|
+
const estimated = hints === null || hints === void 0 ? void 0 : hints.estimatedDocumentCount;
|
|
1154
|
+
if (estimated != null && estimated > 0) {
|
|
1155
|
+
this._externalIds = new Array(estimated);
|
|
1156
|
+
this._storedFields = new Array(estimated);
|
|
1157
|
+
this._fieldLengthData = new Array(estimated * this._fieldCount).fill(0);
|
|
1158
|
+
}
|
|
1159
|
+
else {
|
|
1160
|
+
this._externalIds = [];
|
|
1161
|
+
this._storedFields = [];
|
|
1162
|
+
this._fieldLengthData = [];
|
|
1165
1163
|
}
|
|
1164
|
+
this._postingsState = {
|
|
1165
|
+
fieldCount: this._fieldCount,
|
|
1166
|
+
terms: this._terms,
|
|
1167
|
+
postingsDocIds: this._postingsDocIds,
|
|
1168
|
+
postingsFreqs: this._postingsFreqs
|
|
1169
|
+
};
|
|
1170
|
+
}
|
|
1171
|
+
/** Number of documents indexed so far (not yet frozen). */
|
|
1172
|
+
get documentCount() {
|
|
1173
|
+
return this._nextId;
|
|
1174
|
+
}
|
|
1175
|
+
add(document) {
|
|
1176
|
+
if (this._frozen) {
|
|
1177
|
+
throw new Error('FrozenIndexBuilder: cannot add after freezeParams()');
|
|
1178
|
+
}
|
|
1179
|
+
const { extractField, stringifyField, tokenize, processTerm, fields, idField, storeFields } = this._options;
|
|
1180
|
+
const id = extractField(document, idField);
|
|
1181
|
+
if (id == null) {
|
|
1182
|
+
throw new Error(`MiniSearch: document does not have ID field "${idField}"`);
|
|
1183
|
+
}
|
|
1184
|
+
if (this._idToShortId.has(id)) {
|
|
1185
|
+
throw new Error(`MiniSearch: duplicate ID ${id}`);
|
|
1186
|
+
}
|
|
1187
|
+
const shortId = this._nextId++;
|
|
1188
|
+
this._idToShortId.set(id, shortId);
|
|
1189
|
+
this._externalIds[shortId] = id;
|
|
1190
|
+
this._storedFields[shortId] = saveStoredFieldsForDocument(storeFields, extractField, document);
|
|
1191
|
+
const documentCount = shortId + 1;
|
|
1192
|
+
for (const field of fields) {
|
|
1193
|
+
const fieldValue = extractField(document, field);
|
|
1194
|
+
if (fieldValue == null)
|
|
1195
|
+
continue;
|
|
1196
|
+
const tokens = tokenize(stringifyField(fieldValue, field), field);
|
|
1197
|
+
const fieldId = this._fieldIds[field];
|
|
1198
|
+
const uniqueTerms = new Set(tokens).size;
|
|
1199
|
+
const localFreqs = collectFieldTermFreqs(tokens, field, processTerm);
|
|
1200
|
+
this._fieldLengthData[shortId * this._fieldCount + fieldId] = uniqueTerms;
|
|
1201
|
+
updateAvgFieldLength(this._avgFieldLength, fieldId, documentCount - 1, uniqueTerms);
|
|
1202
|
+
for (const [term, freq] of localFreqs) {
|
|
1203
|
+
const ti = getOrCreateTermIndex(this._postingsState, this._index, term);
|
|
1204
|
+
appendPosting(this._postingsState, ti, fieldId, shortId, freq);
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
}
|
|
1208
|
+
/**
|
|
1209
|
+
* Finalize this builder into assembly params. Call {@link assembleFrozen} or
|
|
1210
|
+
* {@link freezeFrozenIndexBuilder} to obtain a {@link FrozenMiniSearch} instance.
|
|
1211
|
+
*/
|
|
1212
|
+
freezeParams() {
|
|
1213
|
+
var _a;
|
|
1214
|
+
if (this._frozen) {
|
|
1215
|
+
throw new Error('FrozenIndexBuilder: freezeParams() already called');
|
|
1216
|
+
}
|
|
1217
|
+
this._frozen = true;
|
|
1218
|
+
const documentCount = this._nextId;
|
|
1219
|
+
const flat = finalizeFlatPostings(this._postingsState);
|
|
1220
|
+
const avgFieldLength = new Float32Array(this._fieldCount);
|
|
1221
|
+
for (let f = 0; f < this._fieldCount; f++) {
|
|
1222
|
+
avgFieldLength[f] = (_a = this._avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
|
|
1223
|
+
}
|
|
1224
|
+
// Ensure exact size regardless of over- or under-estimated documentCount.
|
|
1225
|
+
this._fieldLengthData.length = documentCount * this._fieldCount;
|
|
1226
|
+
// Trim per-document arrays to actual count when estimatedDocumentCount was too large.
|
|
1227
|
+
const externalIds = this._externalIds.length > documentCount
|
|
1228
|
+
? this._externalIds.slice(0, documentCount)
|
|
1229
|
+
: this._externalIds;
|
|
1230
|
+
const storedFields = this._storedFields.length > documentCount
|
|
1231
|
+
? this._storedFields.slice(0, documentCount)
|
|
1232
|
+
: this._storedFields;
|
|
1233
|
+
return {
|
|
1234
|
+
options: this._options,
|
|
1235
|
+
documentCount,
|
|
1236
|
+
nextId: documentCount,
|
|
1237
|
+
fieldIds: this._fieldIds,
|
|
1238
|
+
fieldCount: this._fieldCount,
|
|
1239
|
+
externalIds,
|
|
1240
|
+
idToShortId: this._idToShortId,
|
|
1241
|
+
storedFields,
|
|
1242
|
+
fieldLengthMatrix: new Uint32Array(this._fieldLengthData),
|
|
1243
|
+
avgFieldLength,
|
|
1244
|
+
index: this._index,
|
|
1245
|
+
terms: this._terms,
|
|
1246
|
+
postingsOffsets: flat.postingsOffsets,
|
|
1247
|
+
postingsLengths: flat.postingsLengths,
|
|
1248
|
+
allDocIds: flat.allDocIds,
|
|
1249
|
+
allFreqs: flat.allFreqs
|
|
1250
|
+
};
|
|
1166
1251
|
}
|
|
1167
1252
|
}
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
return
|
|
1171
|
-
options,
|
|
1172
|
-
fieldIds: buildFieldIds(options.fields),
|
|
1173
|
-
fieldCount,
|
|
1174
|
-
documentCount,
|
|
1175
|
-
index: new SearchableMap(),
|
|
1176
|
-
terms: [],
|
|
1177
|
-
postingsDocIds: [],
|
|
1178
|
-
postingsFreqs: [],
|
|
1179
|
-
externalIds: new Array(documentCount),
|
|
1180
|
-
idToShortId: new Map(),
|
|
1181
|
-
storedFields: new Array(documentCount),
|
|
1182
|
-
fieldLengthMatrix: new Uint32Array(documentCount * fieldCount),
|
|
1183
|
-
avgFieldLength: []
|
|
1184
|
-
};
|
|
1253
|
+
/** Create an incremental builder for {@link FrozenMiniSearch}. */
|
|
1254
|
+
function createFrozenIndexBuilder(options, hints) {
|
|
1255
|
+
return new FrozenIndexBuilder(options, hints);
|
|
1185
1256
|
}
|
|
1186
1257
|
function buildFrozenParamsFromDocuments(documents, options) {
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
indexDocument(builder, documents[d], d);
|
|
1193
|
-
}
|
|
1194
|
-
const flat = finalizeFlatPostings(builder);
|
|
1195
|
-
const avgFieldLength = new Float32Array(builder.fieldCount);
|
|
1196
|
-
for (let f = 0; f < builder.fieldCount; f++) {
|
|
1197
|
-
avgFieldLength[f] = (_a = builder.avgFieldLength[f]) !== null && _a !== void 0 ? _a : 0;
|
|
1258
|
+
const builder = createFrozenIndexBuilder(options, {
|
|
1259
|
+
estimatedDocumentCount: documents.length
|
|
1260
|
+
});
|
|
1261
|
+
for (let d = 0; d < documents.length; d++) {
|
|
1262
|
+
builder.add(documents[d]);
|
|
1198
1263
|
}
|
|
1199
|
-
return
|
|
1200
|
-
options: resolved,
|
|
1201
|
-
documentCount,
|
|
1202
|
-
nextId: documentCount,
|
|
1203
|
-
fieldIds: builder.fieldIds,
|
|
1204
|
-
fieldCount: builder.fieldCount,
|
|
1205
|
-
externalIds: builder.externalIds,
|
|
1206
|
-
idToShortId: builder.idToShortId,
|
|
1207
|
-
storedFields: builder.storedFields,
|
|
1208
|
-
fieldLengthMatrix: builder.fieldLengthMatrix,
|
|
1209
|
-
avgFieldLength,
|
|
1210
|
-
index: builder.index,
|
|
1211
|
-
terms: builder.terms,
|
|
1212
|
-
postingsOffsets: flat.postingsOffsets,
|
|
1213
|
-
postingsLengths: flat.postingsLengths,
|
|
1214
|
-
allDocIds: flat.allDocIds,
|
|
1215
|
-
allFreqs: flat.allFreqs
|
|
1216
|
-
};
|
|
1264
|
+
return builder.freezeParams();
|
|
1217
1265
|
}
|
|
1218
1266
|
|
|
1219
1267
|
/** Shared wildcard query symbol for MiniSearch and FrozenMiniSearch */
|
|
@@ -1366,6 +1414,10 @@ function freezeFromMiniSearch(source) {
|
|
|
1366
1414
|
function buildFrozenFromDocuments(documents, options) {
|
|
1367
1415
|
return assembleFrozen(buildFrozenParamsFromDocuments(documents, options));
|
|
1368
1416
|
}
|
|
1417
|
+
/** Finalize a {@link FrozenIndexBuilder} into a read-only index. */
|
|
1418
|
+
function freezeFrozenIndexBuilder(builder) {
|
|
1419
|
+
return assembleFrozen(builder.freezeParams());
|
|
1420
|
+
}
|
|
1369
1421
|
class FrozenMiniSearch {
|
|
1370
1422
|
constructor(params) {
|
|
1371
1423
|
this._options = params.options;
|
|
@@ -1556,6 +1608,17 @@ class FrozenMiniSearch {
|
|
|
1556
1608
|
static fromDocuments(documents, options) {
|
|
1557
1609
|
return buildFrozenFromDocuments(documents, options);
|
|
1558
1610
|
}
|
|
1611
|
+
/**
|
|
1612
|
+
* Build a read-only index from an async stream of documents (e.g. CSV parser).
|
|
1613
|
+
* For sync iterables, use {@link createFrozenIndexBuilder} with `for...of` instead.
|
|
1614
|
+
*/
|
|
1615
|
+
static async fromAsyncIterable(iterable, options) {
|
|
1616
|
+
const builder = createFrozenIndexBuilder(options);
|
|
1617
|
+
for await (const document of iterable) {
|
|
1618
|
+
builder.add(document);
|
|
1619
|
+
}
|
|
1620
|
+
return freezeFrozenIndexBuilder(builder);
|
|
1621
|
+
}
|
|
1559
1622
|
getFieldLength(docId, fieldId) {
|
|
1560
1623
|
var _a;
|
|
1561
1624
|
return (_a = this._fieldLengthMatrix[docId * this._fieldCount + fieldId]) !== null && _a !== void 0 ? _a : 0;
|
|
@@ -2946,4 +3009,4 @@ const objectToNumericMapAsync = async (object) => {
|
|
|
2946
3009
|
};
|
|
2947
3010
|
const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
2948
3011
|
|
|
2949
|
-
export { AND, AND_NOT, FrozenMiniSearch, OR, assembleFrozen, buildFrozenFromDocuments, MiniSearch as default, freezeFromMiniSearch, frozenMemoryBreakdown };
|
|
3012
|
+
export { AND, AND_NOT, FrozenIndexBuilder, FrozenMiniSearch, OR, assembleFrozen, buildFrozenFromDocuments, createFrozenIndexBuilder, MiniSearch as default, freezeFromMiniSearch, freezeFrozenIndexBuilder, frozenMemoryBreakdown };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@yoch/minisearch",
|
|
3
|
-
"version": "8.0.0-beta.
|
|
3
|
+
"version": "8.0.0-beta.3",
|
|
4
4
|
"description": "Node.js full-text search with FrozenMiniSearch and binary index snapshots",
|
|
5
5
|
"main": "dist/cjs/index.cjs",
|
|
6
6
|
"module": "dist/es/index.js",
|
|
@@ -94,23 +94,23 @@
|
|
|
94
94
|
"test": "jest",
|
|
95
95
|
"test-watch": "jest --watch",
|
|
96
96
|
"coverage": "jest --coverage",
|
|
97
|
-
"benchmark": "
|
|
98
|
-
"benchmark:baseline": "
|
|
99
|
-
"benchmark:compare": "
|
|
100
|
-
"benchmark:record": "
|
|
101
|
-
"benchmark:record:reference": "
|
|
102
|
-
"benchmark:diff": "
|
|
97
|
+
"benchmark": "npm run build-benchmark && NODE_ENV=production node --expose-gc benchmarks/dist/index.cjs",
|
|
98
|
+
"benchmark:baseline": "npm run build && node --expose-gc benchmarks/baseline.js",
|
|
99
|
+
"benchmark:compare": "npm run build && node --expose-gc benchmarks/compare.js",
|
|
100
|
+
"benchmark:record": "npm run build && node --expose-gc benchmarks/captureBaseline.js",
|
|
101
|
+
"benchmark:record:reference": "npm run build && node --expose-gc benchmarks/captureBaseline.js --reference",
|
|
102
|
+
"benchmark:diff": "npm run build && node --expose-gc benchmarks/diffBaseline.js",
|
|
103
103
|
"benchmark:diff:latest": "node --expose-gc benchmarks/diffBaseline.js --latest",
|
|
104
|
-
"benchmark:baseline:update": "
|
|
105
|
-
"build-benchmark": "BENCHMARKS=true
|
|
106
|
-
"build": "
|
|
104
|
+
"benchmark:baseline:update": "npm run benchmark:record:reference",
|
|
105
|
+
"build-benchmark": "BENCHMARKS=true npm run build",
|
|
106
|
+
"build": "npm run clean-build && NODE_ENV=production rollup -c && node scripts/postbuild-cjs.cjs",
|
|
107
107
|
"clean-build": "rm -rf dist",
|
|
108
|
-
"build-minified": "MINIFY=true
|
|
109
|
-
"build-docs": "typedoc --options typedoc.json &&
|
|
108
|
+
"build-minified": "MINIFY=true npm run build",
|
|
109
|
+
"build-docs": "typedoc --options typedoc.json && npm run build-demo",
|
|
110
110
|
"build-demo": "mkdir -p ./docs/demo && cp -r ./examples/plain_js/. ./docs/demo",
|
|
111
111
|
"lint": "eslint 'src/**/*.{js,ts}'",
|
|
112
112
|
"lintfix": "eslint --fix 'src/**/*.{js,ts}'",
|
|
113
|
-
"prepublishOnly": "
|
|
113
|
+
"prepublishOnly": "npm test && npm run build"
|
|
114
114
|
},
|
|
115
115
|
"sideEffects": false
|
|
116
116
|
}
|