@tinacms/search 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +10 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +8 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +20 -0
- package/dist/fuzzy-search-wrapper.d.ts +23 -0
- package/dist/index-client.d.ts +25 -15
- package/dist/index-client.js +141 -134
- package/dist/index.d.ts +8 -3
- package/dist/index.js +508 -163
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +16 -0
- package/dist/types.d.ts +53 -11
- package/package.json +5 -5
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
|
-
import
|
|
2
|
+
import createSearchIndex2 from "search-index";
|
|
3
3
|
|
|
4
4
|
// src/indexer/index.ts
|
|
5
5
|
import {
|
|
@@ -13,137 +13,137 @@ import {
|
|
|
13
13
|
|
|
14
14
|
// src/indexer/utils.ts
|
|
15
15
|
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
16
17
|
var StringBuilder = class {
|
|
17
|
-
buffer;
|
|
18
|
-
length = 0;
|
|
18
|
+
buffer = [];
|
|
19
19
|
limit;
|
|
20
|
+
length = 0;
|
|
20
21
|
constructor(limit) {
|
|
21
|
-
this.buffer = [];
|
|
22
22
|
this.limit = limit;
|
|
23
23
|
}
|
|
24
24
|
append(str) {
|
|
25
|
-
if (this.length + str.length > this.limit)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
this.length += str.length;
|
|
30
|
-
if (this.length > this.limit) {
|
|
31
|
-
return true;
|
|
32
|
-
}
|
|
33
|
-
return false;
|
|
34
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
35
29
|
}
|
|
36
30
|
toString() {
|
|
37
31
|
return this.buffer.join(" ");
|
|
38
32
|
}
|
|
39
33
|
};
|
|
40
|
-
var
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
49
43
|
}
|
|
50
|
-
data.children?.forEach?.(
|
|
51
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
52
|
-
);
|
|
53
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
54
46
|
};
|
|
55
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
56
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
57
49
|
};
|
|
58
|
-
var
|
|
59
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
60
|
-
};
|
|
61
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
62
51
|
const tokens = tokenizeString(value);
|
|
63
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
64
53
|
for (const part of tokens) {
|
|
65
|
-
if (builder.append(part))
|
|
66
|
-
break;
|
|
67
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
68
55
|
}
|
|
69
56
|
return builder.toString();
|
|
70
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
71
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
72
94
|
if (!field) {
|
|
73
|
-
const
|
|
74
|
-
data["_id"] = `${collection.name}:${
|
|
75
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
76
98
|
}
|
|
77
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
78
101
|
if (!f.searchable) {
|
|
79
102
|
delete data[f.name];
|
|
80
103
|
continue;
|
|
81
104
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
);
|
|
110
|
-
} else {
|
|
111
|
-
data[f.name] = processTextFieldValue(
|
|
112
|
-
data[f.name],
|
|
113
|
-
fieldTextIndexLength
|
|
114
|
-
);
|
|
115
|
-
}
|
|
116
|
-
} else if (f.type === "rich-text") {
|
|
117
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
118
|
-
if (isList) {
|
|
119
|
-
data[f.name] = data[f.name].map((value) => {
|
|
120
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
121
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
122
|
-
return acc.toString();
|
|
123
|
-
});
|
|
124
|
-
} else {
|
|
125
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
126
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
127
|
-
data[f.name] = acc.toString();
|
|
128
|
-
}
|
|
129
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
130
132
|
}
|
|
131
133
|
}
|
|
132
134
|
return data;
|
|
133
135
|
};
|
|
134
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
135
137
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
for (const key of keys) {
|
|
143
|
-
stopwords.push(...sw[key]);
|
|
144
|
-
}
|
|
145
|
-
memo[keys.join(",")] = stopwords;
|
|
138
|
+
if (!keys) {
|
|
139
|
+
return defaultStopWords;
|
|
140
|
+
}
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
146
144
|
}
|
|
145
|
+
const stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
147
147
|
return stopwords;
|
|
148
148
|
};
|
|
149
149
|
|
|
@@ -161,6 +161,24 @@ var SearchIndexer = class {
|
|
|
161
161
|
this.batchSize = options.batchSize || 100;
|
|
162
162
|
this.textIndexLength = options.textIndexLength || 500;
|
|
163
163
|
}
|
|
164
|
+
createBatchProcessor() {
|
|
165
|
+
let batch = [];
|
|
166
|
+
return {
|
|
167
|
+
callback: async (item) => {
|
|
168
|
+
batch.push(item);
|
|
169
|
+
if (batch.length >= this.batchSize) {
|
|
170
|
+
await this.client.put(batch);
|
|
171
|
+
batch = [];
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
flush: async () => {
|
|
175
|
+
if (batch.length > 0) {
|
|
176
|
+
await this.client.put(batch);
|
|
177
|
+
batch = [];
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
}
|
|
164
182
|
makeIndexerCallback(itemCallback) {
|
|
165
183
|
return async (collection, contentPaths) => {
|
|
166
184
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
@@ -191,43 +209,25 @@ var SearchIndexer = class {
|
|
|
191
209
|
};
|
|
192
210
|
}
|
|
193
211
|
async indexContentByPaths(documentPaths) {
|
|
194
|
-
|
|
195
|
-
const itemCallback = async (item) => {
|
|
196
|
-
batch.push(item);
|
|
197
|
-
if (batch.length > this.batchSize) {
|
|
198
|
-
await this.client.put(batch);
|
|
199
|
-
batch = [];
|
|
200
|
-
}
|
|
201
|
-
};
|
|
212
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
202
213
|
await this.client.onStartIndexing?.();
|
|
203
214
|
await scanContentByPaths(
|
|
204
215
|
this.schema,
|
|
205
216
|
documentPaths,
|
|
206
|
-
this.makeIndexerCallback(
|
|
217
|
+
this.makeIndexerCallback(callback)
|
|
207
218
|
);
|
|
208
|
-
|
|
209
|
-
await this.client.put(batch);
|
|
210
|
-
}
|
|
219
|
+
await flush();
|
|
211
220
|
await this.client.onFinishIndexing?.();
|
|
212
221
|
}
|
|
213
222
|
async indexAllContent() {
|
|
223
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
214
224
|
await this.client.onStartIndexing?.();
|
|
215
|
-
let batch = [];
|
|
216
|
-
const itemCallback = async (item) => {
|
|
217
|
-
batch.push(item);
|
|
218
|
-
if (batch.length > this.batchSize) {
|
|
219
|
-
await this.client.put(batch);
|
|
220
|
-
batch = [];
|
|
221
|
-
}
|
|
222
|
-
};
|
|
223
225
|
const warnings = await scanAllContent(
|
|
224
226
|
this.schema,
|
|
225
227
|
this.bridge,
|
|
226
|
-
this.makeIndexerCallback(
|
|
228
|
+
this.makeIndexerCallback(callback)
|
|
227
229
|
);
|
|
228
|
-
|
|
229
|
-
await this.client.put(batch);
|
|
230
|
-
}
|
|
230
|
+
await flush();
|
|
231
231
|
await this.client.onFinishIndexing?.();
|
|
232
232
|
return { warnings };
|
|
233
233
|
}
|
|
@@ -239,57 +239,387 @@ var SearchIndexer = class {
|
|
|
239
239
|
};
|
|
240
240
|
|
|
241
241
|
// src/client/index.ts
|
|
242
|
-
import
|
|
243
|
-
import
|
|
242
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
243
|
+
import createSearchIndex from "search-index";
|
|
244
244
|
import { MemoryLevel } from "memory-level";
|
|
245
|
+
|
|
246
|
+
// src/fuzzy/types.ts
|
|
247
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
248
|
+
maxDistance: 2,
|
|
249
|
+
minSimilarity: 0.6,
|
|
250
|
+
maxTermExpansions: 10,
|
|
251
|
+
useTranspositions: true,
|
|
252
|
+
caseSensitive: false,
|
|
253
|
+
useNgramFilter: true,
|
|
254
|
+
ngramSize: 2,
|
|
255
|
+
minNgramOverlap: 0.2
|
|
256
|
+
};
|
|
257
|
+
var clamp = (v, min, max) => Math.min(Math.max(v, min), max);
|
|
258
|
+
function normalizeFuzzyOptions(options = {}) {
|
|
259
|
+
const o = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
260
|
+
return {
|
|
261
|
+
...o,
|
|
262
|
+
maxDistance: clamp(o.maxDistance, 0, 10),
|
|
263
|
+
minSimilarity: clamp(o.minSimilarity, 0, 1),
|
|
264
|
+
maxTermExpansions: clamp(o.maxTermExpansions, 1, 100),
|
|
265
|
+
minNgramOverlap: clamp(o.minNgramOverlap, 0, 1),
|
|
266
|
+
ngramSize: clamp(o.ngramSize, 1, 5)
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// src/fuzzy/cache.ts
|
|
271
|
+
var FuzzyCache = class {
|
|
272
|
+
cache;
|
|
273
|
+
maxSize;
|
|
274
|
+
constructor(maxSize = 100) {
|
|
275
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
276
|
+
this.maxSize = maxSize;
|
|
277
|
+
}
|
|
278
|
+
getCacheKey(query, options) {
|
|
279
|
+
return JSON.stringify({ query, options });
|
|
280
|
+
}
|
|
281
|
+
get(query, options) {
|
|
282
|
+
const key = this.getCacheKey(query, options);
|
|
283
|
+
const value = this.cache.get(key);
|
|
284
|
+
if (value) {
|
|
285
|
+
this.cache.delete(key);
|
|
286
|
+
this.cache.set(key, value);
|
|
287
|
+
}
|
|
288
|
+
return value;
|
|
289
|
+
}
|
|
290
|
+
set(query, options, results) {
|
|
291
|
+
const key = this.getCacheKey(query, options);
|
|
292
|
+
if (this.cache.size >= this.maxSize) {
|
|
293
|
+
const firstKey = this.cache.keys().next().value;
|
|
294
|
+
this.cache.delete(firstKey);
|
|
295
|
+
}
|
|
296
|
+
this.cache.set(key, results);
|
|
297
|
+
}
|
|
298
|
+
clear() {
|
|
299
|
+
this.cache.clear();
|
|
300
|
+
}
|
|
301
|
+
get size() {
|
|
302
|
+
return this.cache.size;
|
|
303
|
+
}
|
|
304
|
+
};
|
|
305
|
+
|
|
306
|
+
// src/fuzzy/distance.ts
|
|
307
|
+
var PREFIX_MATCH_MIN_SIMILARITY = 0.8;
|
|
308
|
+
function levenshteinDistance(str1, str2) {
|
|
309
|
+
const len1 = str1.length;
|
|
310
|
+
const len2 = str2.length;
|
|
311
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
312
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
313
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
314
|
+
for (let i = 1; i <= len1; i++) {
|
|
315
|
+
for (let j = 1; j <= len2; j++) {
|
|
316
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
317
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
318
|
+
} else {
|
|
319
|
+
dp[i][j] = Math.min(
|
|
320
|
+
dp[i - 1][j] + 1,
|
|
321
|
+
dp[i][j - 1] + 1,
|
|
322
|
+
dp[i - 1][j - 1] + 1
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return dp[len1][len2];
|
|
328
|
+
}
|
|
329
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
330
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
331
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
332
|
+
if (maxLength === 0) return 1;
|
|
333
|
+
return 1 - distance / maxLength;
|
|
334
|
+
}
|
|
335
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
336
|
+
const len1 = str1.length;
|
|
337
|
+
const len2 = str2.length;
|
|
338
|
+
const maxDist = len1 + len2;
|
|
339
|
+
const charLastPosition = {};
|
|
340
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
341
|
+
dp[0][0] = maxDist;
|
|
342
|
+
for (let i = 0; i <= len1; i++) {
|
|
343
|
+
dp[i + 1][0] = maxDist;
|
|
344
|
+
dp[i + 1][1] = i;
|
|
345
|
+
}
|
|
346
|
+
for (let j = 0; j <= len2; j++) {
|
|
347
|
+
dp[0][j + 1] = maxDist;
|
|
348
|
+
dp[1][j + 1] = j;
|
|
349
|
+
}
|
|
350
|
+
for (let i = 1; i <= len1; i++) {
|
|
351
|
+
let lastMatchingCol = 0;
|
|
352
|
+
for (let j = 1; j <= len2; j++) {
|
|
353
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
354
|
+
const lastColWithMatch = lastMatchingCol;
|
|
355
|
+
let cost = 1;
|
|
356
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
357
|
+
cost = 0;
|
|
358
|
+
lastMatchingCol = j;
|
|
359
|
+
}
|
|
360
|
+
dp[i + 1][j + 1] = Math.min(
|
|
361
|
+
dp[i][j] + cost,
|
|
362
|
+
dp[i + 1][j] + 1,
|
|
363
|
+
dp[i][j + 1] + 1,
|
|
364
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
365
|
+
);
|
|
366
|
+
}
|
|
367
|
+
charLastPosition[str1[i - 1]] = i;
|
|
368
|
+
}
|
|
369
|
+
return dp[len1 + 1][len2 + 1];
|
|
370
|
+
}
|
|
371
|
+
function getNgrams(str, n = 2) {
|
|
372
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
373
|
+
if (str.length < n) {
|
|
374
|
+
ngrams.add(str);
|
|
375
|
+
return ngrams;
|
|
376
|
+
}
|
|
377
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
378
|
+
ngrams.add(str.substring(i, i + n));
|
|
379
|
+
}
|
|
380
|
+
return ngrams;
|
|
381
|
+
}
|
|
382
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
383
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
384
|
+
let overlap = 0;
|
|
385
|
+
for (const ngram of ngrams1) {
|
|
386
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
387
|
+
}
|
|
388
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
389
|
+
return overlap / minSize;
|
|
390
|
+
}
|
|
391
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
392
|
+
const opts = normalizeFuzzyOptions(options);
|
|
393
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
394
|
+
if (normalizedQuery.length === 0) return [];
|
|
395
|
+
const matches = [];
|
|
396
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
397
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
398
|
+
for (const term of dictionary) {
|
|
399
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
400
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
401
|
+
if (queryNgrams) {
|
|
402
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
403
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
404
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
405
|
+
}
|
|
406
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
407
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
408
|
+
matches.push({
|
|
409
|
+
term,
|
|
410
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
411
|
+
similarity: Math.max(prefixSimilarity, PREFIX_MATCH_MIN_SIMILARITY)
|
|
412
|
+
});
|
|
413
|
+
continue;
|
|
414
|
+
}
|
|
415
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
416
|
+
if (distance > opts.maxDistance) continue;
|
|
417
|
+
const similarity = similarityScore(
|
|
418
|
+
normalizedQuery,
|
|
419
|
+
normalizedTerm,
|
|
420
|
+
opts.useTranspositions
|
|
421
|
+
);
|
|
422
|
+
if (similarity >= opts.minSimilarity) {
|
|
423
|
+
matches.push({ term, distance, similarity });
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
matches.sort((a, b) => {
|
|
427
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
428
|
+
return a.distance - b.distance;
|
|
429
|
+
}
|
|
430
|
+
return b.similarity - a.similarity;
|
|
431
|
+
});
|
|
432
|
+
return matches.slice(0, opts.maxTermExpansions);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// src/pagination.ts
|
|
436
|
+
function buildPageOptions(options) {
|
|
437
|
+
if (!options.limit) return {};
|
|
438
|
+
return {
|
|
439
|
+
PAGE: {
|
|
440
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
441
|
+
SIZE: options.limit
|
|
442
|
+
}
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
function buildPaginationCursors(total, options) {
|
|
446
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
447
|
+
const pageSize = options.limit;
|
|
448
|
+
const hasPreviousPage = currentPage > 0;
|
|
449
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
450
|
+
return {
|
|
451
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
452
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// src/fuzzy-search-wrapper.ts
|
|
457
|
+
var FuzzySearchWrapper = class {
|
|
458
|
+
cache;
|
|
459
|
+
searchIndex;
|
|
460
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
461
|
+
this.searchIndex = searchIndex;
|
|
462
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
463
|
+
}
|
|
464
|
+
async getDictionary(field) {
|
|
465
|
+
const token = field ? { FIELD: field } : void 0;
|
|
466
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
467
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
468
|
+
}
|
|
469
|
+
async findSimilar(query, field, options = {}) {
|
|
470
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
471
|
+
const cached = this.cache.get(cacheKey, options);
|
|
472
|
+
if (cached) return cached;
|
|
473
|
+
const dictionary = await this.getDictionary(field);
|
|
474
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
475
|
+
this.cache.set(cacheKey, options, matches);
|
|
476
|
+
return matches;
|
|
477
|
+
}
|
|
478
|
+
async expandQuery(query, options = {}) {
|
|
479
|
+
const opts = normalizeFuzzyOptions(options);
|
|
480
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
481
|
+
const expanded = [];
|
|
482
|
+
const matches = {};
|
|
483
|
+
for (const term of terms) {
|
|
484
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
485
|
+
expanded.push(term);
|
|
486
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
487
|
+
expanded.push(...similarValues);
|
|
488
|
+
if (similarTerms.length > 0) {
|
|
489
|
+
matches[term] = similarTerms;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
return {
|
|
493
|
+
original: terms,
|
|
494
|
+
expanded: Array.from(new Set(expanded)),
|
|
495
|
+
matches
|
|
496
|
+
};
|
|
497
|
+
}
|
|
498
|
+
async query(query, options = {}) {
|
|
499
|
+
const pageOptions = buildPageOptions(options);
|
|
500
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
501
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
502
|
+
const results2 = await this.searchIndex.QUERY(
|
|
503
|
+
{ AND: expansion.original },
|
|
504
|
+
pageOptions
|
|
505
|
+
);
|
|
506
|
+
const pagination2 = buildPaginationCursors(
|
|
507
|
+
results2.RESULT_LENGTH || 0,
|
|
508
|
+
options
|
|
509
|
+
);
|
|
510
|
+
return {
|
|
511
|
+
results: results2.RESULT || [],
|
|
512
|
+
total: results2.RESULT_LENGTH || 0,
|
|
513
|
+
...pagination2,
|
|
514
|
+
fuzzyMatches: expansion.matches
|
|
515
|
+
};
|
|
516
|
+
}
|
|
517
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
518
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
519
|
+
return [originalTerm, ...similarTerms];
|
|
520
|
+
});
|
|
521
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
522
|
+
AND: queryGroups.map(
|
|
523
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
524
|
+
)
|
|
525
|
+
};
|
|
526
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
527
|
+
const pagination = buildPaginationCursors(
|
|
528
|
+
results.RESULT_LENGTH || 0,
|
|
529
|
+
options
|
|
530
|
+
);
|
|
531
|
+
return {
|
|
532
|
+
results: results.RESULT || [],
|
|
533
|
+
total: results.RESULT_LENGTH || 0,
|
|
534
|
+
...pagination,
|
|
535
|
+
fuzzyMatches: expansion.matches
|
|
536
|
+
};
|
|
537
|
+
}
|
|
538
|
+
clearCache() {
|
|
539
|
+
this.cache.clear();
|
|
540
|
+
}
|
|
541
|
+
getCacheSize() {
|
|
542
|
+
return this.cache.size;
|
|
543
|
+
}
|
|
544
|
+
};
|
|
545
|
+
|
|
546
|
+
// src/client/index.ts
|
|
245
547
|
import * as zlib from "node:zlib";
|
|
246
|
-
var
|
|
548
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
247
549
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
248
550
|
var LocalSearchIndexClient = class {
|
|
249
551
|
searchIndex;
|
|
250
552
|
memoryLevel;
|
|
251
553
|
stopwords;
|
|
252
554
|
tokenSplitRegex;
|
|
555
|
+
fuzzySearchWrapper;
|
|
253
556
|
constructor(options) {
|
|
254
557
|
this.memoryLevel = new MemoryLevel();
|
|
255
558
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
256
559
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
257
560
|
}
|
|
258
561
|
async onStartIndexing() {
|
|
259
|
-
|
|
260
|
-
// @ts-ignore
|
|
562
|
+
const options = {
|
|
261
563
|
db: this.memoryLevel,
|
|
262
564
|
stopwords: this.stopwords,
|
|
263
565
|
tokenSplitRegex: this.tokenSplitRegex
|
|
264
|
-
}
|
|
566
|
+
};
|
|
567
|
+
this.searchIndex = await createSearchIndex(
|
|
568
|
+
options
|
|
569
|
+
);
|
|
570
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
265
571
|
}
|
|
266
572
|
async put(docs) {
|
|
267
573
|
if (!this.searchIndex) {
|
|
268
574
|
throw new Error("onStartIndexing must be called first");
|
|
269
575
|
}
|
|
270
|
-
|
|
576
|
+
await this.searchIndex.PUT(docs);
|
|
271
577
|
}
|
|
272
578
|
async del(ids) {
|
|
273
579
|
if (!this.searchIndex) {
|
|
274
580
|
throw new Error("onStartIndexing must be called first");
|
|
275
581
|
}
|
|
276
|
-
|
|
277
|
-
}
|
|
278
|
-
query(query, options) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
582
|
+
await this.searchIndex.DELETE(ids);
|
|
583
|
+
}
|
|
584
|
+
async query(query, options) {
|
|
585
|
+
if (!this.searchIndex) {
|
|
586
|
+
throw new Error("onStartIndexing must be called first");
|
|
587
|
+
}
|
|
588
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
589
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
590
|
+
limit: options.limit,
|
|
591
|
+
cursor: options.cursor,
|
|
592
|
+
fuzzyOptions: options.fuzzyOptions
|
|
593
|
+
});
|
|
594
|
+
}
|
|
595
|
+
const searchIndexOptions = buildPageOptions({
|
|
596
|
+
limit: options?.limit,
|
|
597
|
+
cursor: options?.cursor
|
|
598
|
+
});
|
|
599
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
600
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
601
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
602
|
+
queryObj,
|
|
603
|
+
searchIndexOptions
|
|
604
|
+
);
|
|
605
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
606
|
+
const pagination = buildPaginationCursors(total, {
|
|
607
|
+
limit: options?.limit,
|
|
608
|
+
cursor: options?.cursor
|
|
284
609
|
});
|
|
610
|
+
return {
|
|
611
|
+
results: searchResults.RESULT || [],
|
|
612
|
+
total,
|
|
613
|
+
...pagination
|
|
614
|
+
};
|
|
285
615
|
}
|
|
286
616
|
async export(filename) {
|
|
287
|
-
const
|
|
617
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
288
618
|
const iterator = this.memoryLevel.iterator();
|
|
289
619
|
for await (const [key, value] of iterator) {
|
|
290
|
-
await
|
|
620
|
+
await sqliteLevel.put(key, value);
|
|
291
621
|
}
|
|
292
|
-
await
|
|
622
|
+
await sqliteLevel.close();
|
|
293
623
|
}
|
|
294
624
|
};
|
|
295
625
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
@@ -302,49 +632,64 @@ var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
|
302
632
|
this.branch = options.branch;
|
|
303
633
|
this.indexerToken = options.indexerToken;
|
|
304
634
|
}
|
|
305
|
-
async
|
|
635
|
+
async getUploadUrl() {
|
|
306
636
|
const headers = new Headers();
|
|
307
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
637
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
308
638
|
headers.append("Content-Type", "application/json");
|
|
309
|
-
|
|
639
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
310
640
|
method: "GET",
|
|
311
641
|
headers
|
|
312
642
|
});
|
|
313
|
-
if (
|
|
314
|
-
|
|
315
|
-
try {
|
|
316
|
-
json = await res.json();
|
|
317
|
-
} catch (e) {
|
|
318
|
-
console.error("Failed to parse error response", e);
|
|
319
|
-
}
|
|
643
|
+
if (response.status !== 200) {
|
|
644
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
320
645
|
throw new Error(
|
|
321
|
-
`Failed to get upload url. Status: ${
|
|
646
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
322
647
|
);
|
|
323
648
|
}
|
|
324
|
-
const { signedUrl } = await
|
|
325
|
-
|
|
649
|
+
const { signedUrl } = await response.json();
|
|
650
|
+
return signedUrl;
|
|
651
|
+
}
|
|
652
|
+
async serializeIndex() {
|
|
653
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
326
654
|
const iterator = this.memoryLevel.iterator();
|
|
327
655
|
for await (const [key, value] of iterator) {
|
|
328
|
-
await
|
|
656
|
+
await sqliteLevel.put(key, value);
|
|
329
657
|
}
|
|
330
|
-
const buffer =
|
|
331
|
-
await
|
|
332
|
-
|
|
333
|
-
|
|
658
|
+
const buffer = sqliteLevel.db.serialize();
|
|
659
|
+
await sqliteLevel.close();
|
|
660
|
+
return zlib.gzipSync(buffer);
|
|
661
|
+
}
|
|
662
|
+
async uploadIndex(signedUrl, data) {
|
|
663
|
+
const response = await fetch(signedUrl, {
|
|
334
664
|
method: "PUT",
|
|
335
|
-
body:
|
|
665
|
+
body: data
|
|
336
666
|
});
|
|
337
|
-
if (
|
|
667
|
+
if (response.status !== 200) {
|
|
668
|
+
const errorText = await response.text();
|
|
338
669
|
throw new Error(
|
|
339
|
-
`Failed to upload search index. Status: ${
|
|
340
|
-
${
|
|
670
|
+
`Failed to upload search index. Status: ${response.status}
|
|
671
|
+
${errorText}`
|
|
341
672
|
);
|
|
342
673
|
}
|
|
343
674
|
}
|
|
675
|
+
async onFinishIndexing() {
|
|
676
|
+
const signedUrl = await this.getUploadUrl();
|
|
677
|
+
const indexData = await this.serializeIndex();
|
|
678
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
679
|
+
}
|
|
344
680
|
};
|
|
345
681
|
export {
|
|
682
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
683
|
+
FuzzyCache,
|
|
684
|
+
FuzzySearchWrapper,
|
|
346
685
|
LocalSearchIndexClient,
|
|
347
686
|
SearchIndexer,
|
|
348
687
|
TinaCMSSearchIndexClient,
|
|
349
|
-
|
|
688
|
+
buildPageOptions,
|
|
689
|
+
buildPaginationCursors,
|
|
690
|
+
createSearchIndex2 as createSearchIndex,
|
|
691
|
+
damerauLevenshteinDistance,
|
|
692
|
+
findSimilarTerms,
|
|
693
|
+
levenshteinDistance,
|
|
694
|
+
similarityScore
|
|
350
695
|
};
|