@tinacms/search 0.0.0-bf8b9b7-20251204000148 → 0.0.0-c19d29e-20251224001156
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +10 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +8 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +19 -0
- package/dist/fuzzy-search-wrapper.d.ts +23 -0
- package/dist/index-client.d.ts +25 -15
- package/dist/index-client.js +141 -134
- package/dist/index.d.ts +8 -3
- package/dist/index.js +496 -163
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +16 -0
- package/dist/types.d.ts +51 -11
- package/package.json +6 -6
package/dist/index.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// src/index.ts
|
|
2
|
-
import
|
|
2
|
+
import createSearchIndex2 from "search-index";
|
|
3
3
|
|
|
4
4
|
// src/indexer/index.ts
|
|
5
5
|
import {
|
|
@@ -13,137 +13,137 @@ import {
|
|
|
13
13
|
|
|
14
14
|
// src/indexer/utils.ts
|
|
15
15
|
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
16
17
|
var StringBuilder = class {
|
|
17
|
-
buffer;
|
|
18
|
-
length = 0;
|
|
18
|
+
buffer = [];
|
|
19
19
|
limit;
|
|
20
|
+
length = 0;
|
|
20
21
|
constructor(limit) {
|
|
21
|
-
this.buffer = [];
|
|
22
22
|
this.limit = limit;
|
|
23
23
|
}
|
|
24
24
|
append(str) {
|
|
25
|
-
if (this.length + str.length > this.limit)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
this.length += str.length;
|
|
30
|
-
if (this.length > this.limit) {
|
|
31
|
-
return true;
|
|
32
|
-
}
|
|
33
|
-
return false;
|
|
34
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
35
29
|
}
|
|
36
30
|
toString() {
|
|
37
31
|
return this.buffer.join(" ");
|
|
38
32
|
}
|
|
39
33
|
};
|
|
40
|
-
var
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
49
43
|
}
|
|
50
|
-
data.children?.forEach?.(
|
|
51
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
52
|
-
);
|
|
53
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
54
46
|
};
|
|
55
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
56
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
57
49
|
};
|
|
58
|
-
var
|
|
59
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
60
|
-
};
|
|
61
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
62
51
|
const tokens = tokenizeString(value);
|
|
63
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
64
53
|
for (const part of tokens) {
|
|
65
|
-
if (builder.append(part))
|
|
66
|
-
break;
|
|
67
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
68
55
|
}
|
|
69
56
|
return builder.toString();
|
|
70
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
71
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
72
94
|
if (!field) {
|
|
73
|
-
const
|
|
74
|
-
data["_id"] = `${collection.name}:${
|
|
75
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
76
98
|
}
|
|
77
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
78
101
|
if (!f.searchable) {
|
|
79
102
|
delete data[f.name];
|
|
80
103
|
continue;
|
|
81
104
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
);
|
|
110
|
-
} else {
|
|
111
|
-
data[f.name] = processTextFieldValue(
|
|
112
|
-
data[f.name],
|
|
113
|
-
fieldTextIndexLength
|
|
114
|
-
);
|
|
115
|
-
}
|
|
116
|
-
} else if (f.type === "rich-text") {
|
|
117
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
118
|
-
if (isList) {
|
|
119
|
-
data[f.name] = data[f.name].map((value) => {
|
|
120
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
121
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
122
|
-
return acc.toString();
|
|
123
|
-
});
|
|
124
|
-
} else {
|
|
125
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
126
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
127
|
-
data[f.name] = acc.toString();
|
|
128
|
-
}
|
|
129
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
130
132
|
}
|
|
131
133
|
}
|
|
132
134
|
return data;
|
|
133
135
|
};
|
|
134
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
135
137
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
for (const key of keys) {
|
|
143
|
-
stopwords.push(...sw[key]);
|
|
144
|
-
}
|
|
145
|
-
memo[keys.join(",")] = stopwords;
|
|
138
|
+
if (!keys) {
|
|
139
|
+
return defaultStopWords;
|
|
140
|
+
}
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
146
144
|
}
|
|
145
|
+
const stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
147
147
|
return stopwords;
|
|
148
148
|
};
|
|
149
149
|
|
|
@@ -161,6 +161,24 @@ var SearchIndexer = class {
|
|
|
161
161
|
this.batchSize = options.batchSize || 100;
|
|
162
162
|
this.textIndexLength = options.textIndexLength || 500;
|
|
163
163
|
}
|
|
164
|
+
createBatchProcessor() {
|
|
165
|
+
let batch = [];
|
|
166
|
+
return {
|
|
167
|
+
callback: async (item) => {
|
|
168
|
+
batch.push(item);
|
|
169
|
+
if (batch.length >= this.batchSize) {
|
|
170
|
+
await this.client.put(batch);
|
|
171
|
+
batch = [];
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
flush: async () => {
|
|
175
|
+
if (batch.length > 0) {
|
|
176
|
+
await this.client.put(batch);
|
|
177
|
+
batch = [];
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
}
|
|
164
182
|
makeIndexerCallback(itemCallback) {
|
|
165
183
|
return async (collection, contentPaths) => {
|
|
166
184
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
@@ -191,43 +209,25 @@ var SearchIndexer = class {
|
|
|
191
209
|
};
|
|
192
210
|
}
|
|
193
211
|
async indexContentByPaths(documentPaths) {
|
|
194
|
-
|
|
195
|
-
const itemCallback = async (item) => {
|
|
196
|
-
batch.push(item);
|
|
197
|
-
if (batch.length > this.batchSize) {
|
|
198
|
-
await this.client.put(batch);
|
|
199
|
-
batch = [];
|
|
200
|
-
}
|
|
201
|
-
};
|
|
212
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
202
213
|
await this.client.onStartIndexing?.();
|
|
203
214
|
await scanContentByPaths(
|
|
204
215
|
this.schema,
|
|
205
216
|
documentPaths,
|
|
206
|
-
this.makeIndexerCallback(
|
|
217
|
+
this.makeIndexerCallback(callback)
|
|
207
218
|
);
|
|
208
|
-
|
|
209
|
-
await this.client.put(batch);
|
|
210
|
-
}
|
|
219
|
+
await flush();
|
|
211
220
|
await this.client.onFinishIndexing?.();
|
|
212
221
|
}
|
|
213
222
|
async indexAllContent() {
|
|
223
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
214
224
|
await this.client.onStartIndexing?.();
|
|
215
|
-
let batch = [];
|
|
216
|
-
const itemCallback = async (item) => {
|
|
217
|
-
batch.push(item);
|
|
218
|
-
if (batch.length > this.batchSize) {
|
|
219
|
-
await this.client.put(batch);
|
|
220
|
-
batch = [];
|
|
221
|
-
}
|
|
222
|
-
};
|
|
223
225
|
const warnings = await scanAllContent(
|
|
224
226
|
this.schema,
|
|
225
227
|
this.bridge,
|
|
226
|
-
this.makeIndexerCallback(
|
|
228
|
+
this.makeIndexerCallback(callback)
|
|
227
229
|
);
|
|
228
|
-
|
|
229
|
-
await this.client.put(batch);
|
|
230
|
-
}
|
|
230
|
+
await flush();
|
|
231
231
|
await this.client.onFinishIndexing?.();
|
|
232
232
|
return { warnings };
|
|
233
233
|
}
|
|
@@ -239,57 +239,375 @@ var SearchIndexer = class {
|
|
|
239
239
|
};
|
|
240
240
|
|
|
241
241
|
// src/client/index.ts
|
|
242
|
-
import
|
|
243
|
-
import
|
|
242
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
243
|
+
import createSearchIndex from "search-index";
|
|
244
244
|
import { MemoryLevel } from "memory-level";
|
|
245
|
+
|
|
246
|
+
// src/fuzzy/types.ts
|
|
247
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
248
|
+
maxDistance: 2,
|
|
249
|
+
minSimilarity: 0.6,
|
|
250
|
+
maxResults: 10,
|
|
251
|
+
useTranspositions: true,
|
|
252
|
+
caseSensitive: false,
|
|
253
|
+
useNgramFilter: true,
|
|
254
|
+
ngramSize: 2,
|
|
255
|
+
minNgramOverlap: 0.2
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
// src/fuzzy/cache.ts
|
|
259
|
+
var FuzzyCache = class {
|
|
260
|
+
cache;
|
|
261
|
+
maxSize;
|
|
262
|
+
constructor(maxSize = 100) {
|
|
263
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
264
|
+
this.maxSize = maxSize;
|
|
265
|
+
}
|
|
266
|
+
getCacheKey(query, options) {
|
|
267
|
+
return JSON.stringify({ query, options });
|
|
268
|
+
}
|
|
269
|
+
get(query, options) {
|
|
270
|
+
const key = this.getCacheKey(query, options);
|
|
271
|
+
const value = this.cache.get(key);
|
|
272
|
+
if (value) {
|
|
273
|
+
this.cache.delete(key);
|
|
274
|
+
this.cache.set(key, value);
|
|
275
|
+
}
|
|
276
|
+
return value;
|
|
277
|
+
}
|
|
278
|
+
set(query, options, results) {
|
|
279
|
+
const key = this.getCacheKey(query, options);
|
|
280
|
+
if (this.cache.size >= this.maxSize) {
|
|
281
|
+
const firstKey = this.cache.keys().next().value;
|
|
282
|
+
this.cache.delete(firstKey);
|
|
283
|
+
}
|
|
284
|
+
this.cache.set(key, results);
|
|
285
|
+
}
|
|
286
|
+
clear() {
|
|
287
|
+
this.cache.clear();
|
|
288
|
+
}
|
|
289
|
+
get size() {
|
|
290
|
+
return this.cache.size;
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
// src/fuzzy/distance.ts
|
|
295
|
+
var PREFIX_MATCH_MIN_SIMILARITY = 0.8;
|
|
296
|
+
function levenshteinDistance(str1, str2) {
|
|
297
|
+
const len1 = str1.length;
|
|
298
|
+
const len2 = str2.length;
|
|
299
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
300
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
301
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
302
|
+
for (let i = 1; i <= len1; i++) {
|
|
303
|
+
for (let j = 1; j <= len2; j++) {
|
|
304
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
305
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
306
|
+
} else {
|
|
307
|
+
dp[i][j] = Math.min(
|
|
308
|
+
dp[i - 1][j] + 1,
|
|
309
|
+
dp[i][j - 1] + 1,
|
|
310
|
+
dp[i - 1][j - 1] + 1
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return dp[len1][len2];
|
|
316
|
+
}
|
|
317
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
318
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
319
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
320
|
+
if (maxLength === 0) return 1;
|
|
321
|
+
return 1 - distance / maxLength;
|
|
322
|
+
}
|
|
323
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
324
|
+
const len1 = str1.length;
|
|
325
|
+
const len2 = str2.length;
|
|
326
|
+
const maxDist = len1 + len2;
|
|
327
|
+
const charLastPosition = {};
|
|
328
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
329
|
+
dp[0][0] = maxDist;
|
|
330
|
+
for (let i = 0; i <= len1; i++) {
|
|
331
|
+
dp[i + 1][0] = maxDist;
|
|
332
|
+
dp[i + 1][1] = i;
|
|
333
|
+
}
|
|
334
|
+
for (let j = 0; j <= len2; j++) {
|
|
335
|
+
dp[0][j + 1] = maxDist;
|
|
336
|
+
dp[1][j + 1] = j;
|
|
337
|
+
}
|
|
338
|
+
for (let i = 1; i <= len1; i++) {
|
|
339
|
+
let lastMatchingCol = 0;
|
|
340
|
+
for (let j = 1; j <= len2; j++) {
|
|
341
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
342
|
+
const lastColWithMatch = lastMatchingCol;
|
|
343
|
+
let cost = 1;
|
|
344
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
345
|
+
cost = 0;
|
|
346
|
+
lastMatchingCol = j;
|
|
347
|
+
}
|
|
348
|
+
dp[i + 1][j + 1] = Math.min(
|
|
349
|
+
dp[i][j] + cost,
|
|
350
|
+
dp[i + 1][j] + 1,
|
|
351
|
+
dp[i][j + 1] + 1,
|
|
352
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
charLastPosition[str1[i - 1]] = i;
|
|
356
|
+
}
|
|
357
|
+
return dp[len1 + 1][len2 + 1];
|
|
358
|
+
}
|
|
359
|
+
function getNgrams(str, n = 2) {
|
|
360
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
361
|
+
if (str.length < n) {
|
|
362
|
+
ngrams.add(str);
|
|
363
|
+
return ngrams;
|
|
364
|
+
}
|
|
365
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
366
|
+
ngrams.add(str.substring(i, i + n));
|
|
367
|
+
}
|
|
368
|
+
return ngrams;
|
|
369
|
+
}
|
|
370
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
371
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
372
|
+
let overlap = 0;
|
|
373
|
+
for (const ngram of ngrams1) {
|
|
374
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
375
|
+
}
|
|
376
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
377
|
+
return overlap / minSize;
|
|
378
|
+
}
|
|
379
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
380
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
381
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
382
|
+
if (normalizedQuery.length === 0) return [];
|
|
383
|
+
const matches = [];
|
|
384
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
385
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
386
|
+
for (const term of dictionary) {
|
|
387
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
388
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
389
|
+
if (queryNgrams) {
|
|
390
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
391
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
392
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
393
|
+
}
|
|
394
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
395
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
396
|
+
matches.push({
|
|
397
|
+
term,
|
|
398
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
399
|
+
similarity: Math.max(prefixSimilarity, PREFIX_MATCH_MIN_SIMILARITY)
|
|
400
|
+
});
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
404
|
+
if (distance > opts.maxDistance) continue;
|
|
405
|
+
const similarity = similarityScore(
|
|
406
|
+
normalizedQuery,
|
|
407
|
+
normalizedTerm,
|
|
408
|
+
opts.useTranspositions
|
|
409
|
+
);
|
|
410
|
+
if (similarity >= opts.minSimilarity) {
|
|
411
|
+
matches.push({ term, distance, similarity });
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
matches.sort((a, b) => {
|
|
415
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
416
|
+
return a.distance - b.distance;
|
|
417
|
+
}
|
|
418
|
+
return b.similarity - a.similarity;
|
|
419
|
+
});
|
|
420
|
+
return matches.slice(0, opts.maxResults);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// src/pagination.ts
|
|
424
|
+
function buildPageOptions(options) {
|
|
425
|
+
if (!options.limit) return {};
|
|
426
|
+
return {
|
|
427
|
+
PAGE: {
|
|
428
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
429
|
+
SIZE: options.limit
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
function buildPaginationCursors(total, options) {
|
|
434
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
435
|
+
const pageSize = options.limit;
|
|
436
|
+
const hasPreviousPage = currentPage > 0;
|
|
437
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
438
|
+
return {
|
|
439
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
440
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// src/fuzzy-search-wrapper.ts
|
|
445
|
+
var FuzzySearchWrapper = class {
|
|
446
|
+
cache;
|
|
447
|
+
searchIndex;
|
|
448
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
449
|
+
this.searchIndex = searchIndex;
|
|
450
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
451
|
+
}
|
|
452
|
+
async getDictionary(field) {
|
|
453
|
+
const token = field ? { FIELD: field } : void 0;
|
|
454
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
455
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
456
|
+
}
|
|
457
|
+
async findSimilar(query, field, options = {}) {
|
|
458
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
459
|
+
const cached = this.cache.get(cacheKey, options);
|
|
460
|
+
if (cached) return cached;
|
|
461
|
+
const dictionary = await this.getDictionary(field);
|
|
462
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
463
|
+
this.cache.set(cacheKey, options, matches);
|
|
464
|
+
return matches;
|
|
465
|
+
}
|
|
466
|
+
async expandQuery(query, options = {}) {
|
|
467
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
468
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
469
|
+
const expanded = [];
|
|
470
|
+
const matches = {};
|
|
471
|
+
for (const term of terms) {
|
|
472
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
473
|
+
expanded.push(term);
|
|
474
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
475
|
+
expanded.push(...similarValues);
|
|
476
|
+
if (similarTerms.length > 0) {
|
|
477
|
+
matches[term] = similarTerms;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
original: terms,
|
|
482
|
+
expanded: Array.from(new Set(expanded)),
|
|
483
|
+
matches
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
async query(query, options = {}) {
|
|
487
|
+
const pageOptions = buildPageOptions(options);
|
|
488
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
489
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
490
|
+
const results2 = await this.searchIndex.QUERY(
|
|
491
|
+
{ AND: expansion.original },
|
|
492
|
+
pageOptions
|
|
493
|
+
);
|
|
494
|
+
const pagination2 = buildPaginationCursors(
|
|
495
|
+
results2.RESULT_LENGTH || 0,
|
|
496
|
+
options
|
|
497
|
+
);
|
|
498
|
+
return {
|
|
499
|
+
results: results2.RESULT || [],
|
|
500
|
+
total: results2.RESULT_LENGTH || 0,
|
|
501
|
+
...pagination2,
|
|
502
|
+
fuzzyMatches: expansion.matches
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
506
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
507
|
+
return [originalTerm, ...similarTerms];
|
|
508
|
+
});
|
|
509
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
510
|
+
AND: queryGroups.map(
|
|
511
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
512
|
+
)
|
|
513
|
+
};
|
|
514
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
515
|
+
const pagination = buildPaginationCursors(
|
|
516
|
+
results.RESULT_LENGTH || 0,
|
|
517
|
+
options
|
|
518
|
+
);
|
|
519
|
+
return {
|
|
520
|
+
results: results.RESULT || [],
|
|
521
|
+
total: results.RESULT_LENGTH || 0,
|
|
522
|
+
...pagination,
|
|
523
|
+
fuzzyMatches: expansion.matches
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
clearCache() {
|
|
527
|
+
this.cache.clear();
|
|
528
|
+
}
|
|
529
|
+
getCacheSize() {
|
|
530
|
+
return this.cache.size;
|
|
531
|
+
}
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
// src/client/index.ts
|
|
245
535
|
import * as zlib from "node:zlib";
|
|
246
|
-
var
|
|
536
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
247
537
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
248
538
|
var LocalSearchIndexClient = class {
|
|
249
539
|
searchIndex;
|
|
250
540
|
memoryLevel;
|
|
251
541
|
stopwords;
|
|
252
542
|
tokenSplitRegex;
|
|
543
|
+
fuzzySearchWrapper;
|
|
253
544
|
constructor(options) {
|
|
254
545
|
this.memoryLevel = new MemoryLevel();
|
|
255
546
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
256
547
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
257
548
|
}
|
|
258
549
|
async onStartIndexing() {
|
|
259
|
-
|
|
260
|
-
// @ts-ignore
|
|
550
|
+
const options = {
|
|
261
551
|
db: this.memoryLevel,
|
|
262
552
|
stopwords: this.stopwords,
|
|
263
553
|
tokenSplitRegex: this.tokenSplitRegex
|
|
264
|
-
}
|
|
554
|
+
};
|
|
555
|
+
this.searchIndex = await createSearchIndex(
|
|
556
|
+
options
|
|
557
|
+
);
|
|
558
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
265
559
|
}
|
|
266
560
|
async put(docs) {
|
|
267
561
|
if (!this.searchIndex) {
|
|
268
562
|
throw new Error("onStartIndexing must be called first");
|
|
269
563
|
}
|
|
270
|
-
|
|
564
|
+
await this.searchIndex.PUT(docs);
|
|
271
565
|
}
|
|
272
566
|
async del(ids) {
|
|
273
567
|
if (!this.searchIndex) {
|
|
274
568
|
throw new Error("onStartIndexing must be called first");
|
|
275
569
|
}
|
|
276
|
-
|
|
277
|
-
}
|
|
278
|
-
query(query, options) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
570
|
+
await this.searchIndex.DELETE(ids);
|
|
571
|
+
}
|
|
572
|
+
async query(query, options) {
|
|
573
|
+
if (!this.searchIndex) {
|
|
574
|
+
throw new Error("onStartIndexing must be called first");
|
|
575
|
+
}
|
|
576
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
577
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
578
|
+
limit: options.limit,
|
|
579
|
+
cursor: options.cursor,
|
|
580
|
+
fuzzyOptions: options.fuzzyOptions
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
const searchIndexOptions = buildPageOptions({
|
|
584
|
+
limit: options?.limit,
|
|
585
|
+
cursor: options?.cursor
|
|
586
|
+
});
|
|
587
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
588
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
589
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
590
|
+
queryObj,
|
|
591
|
+
searchIndexOptions
|
|
592
|
+
);
|
|
593
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
594
|
+
const pagination = buildPaginationCursors(total, {
|
|
595
|
+
limit: options?.limit,
|
|
596
|
+
cursor: options?.cursor
|
|
284
597
|
});
|
|
598
|
+
return {
|
|
599
|
+
results: searchResults.RESULT || [],
|
|
600
|
+
total,
|
|
601
|
+
...pagination
|
|
602
|
+
};
|
|
285
603
|
}
|
|
286
604
|
async export(filename) {
|
|
287
|
-
const
|
|
605
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
288
606
|
const iterator = this.memoryLevel.iterator();
|
|
289
607
|
for await (const [key, value] of iterator) {
|
|
290
|
-
await
|
|
608
|
+
await sqliteLevel.put(key, value);
|
|
291
609
|
}
|
|
292
|
-
await
|
|
610
|
+
await sqliteLevel.close();
|
|
293
611
|
}
|
|
294
612
|
};
|
|
295
613
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
@@ -302,49 +620,64 @@ var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
|
302
620
|
this.branch = options.branch;
|
|
303
621
|
this.indexerToken = options.indexerToken;
|
|
304
622
|
}
|
|
305
|
-
async
|
|
623
|
+
async getUploadUrl() {
|
|
306
624
|
const headers = new Headers();
|
|
307
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
625
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
308
626
|
headers.append("Content-Type", "application/json");
|
|
309
|
-
|
|
627
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
310
628
|
method: "GET",
|
|
311
629
|
headers
|
|
312
630
|
});
|
|
313
|
-
if (
|
|
314
|
-
|
|
315
|
-
try {
|
|
316
|
-
json = await res.json();
|
|
317
|
-
} catch (e) {
|
|
318
|
-
console.error("Failed to parse error response", e);
|
|
319
|
-
}
|
|
631
|
+
if (response.status !== 200) {
|
|
632
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
320
633
|
throw new Error(
|
|
321
|
-
`Failed to get upload url. Status: ${
|
|
634
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
322
635
|
);
|
|
323
636
|
}
|
|
324
|
-
const { signedUrl } = await
|
|
325
|
-
|
|
637
|
+
const { signedUrl } = await response.json();
|
|
638
|
+
return signedUrl;
|
|
639
|
+
}
|
|
640
|
+
async serializeIndex() {
|
|
641
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
326
642
|
const iterator = this.memoryLevel.iterator();
|
|
327
643
|
for await (const [key, value] of iterator) {
|
|
328
|
-
await
|
|
644
|
+
await sqliteLevel.put(key, value);
|
|
329
645
|
}
|
|
330
|
-
const buffer =
|
|
331
|
-
await
|
|
332
|
-
|
|
333
|
-
|
|
646
|
+
const buffer = sqliteLevel.db.serialize();
|
|
647
|
+
await sqliteLevel.close();
|
|
648
|
+
return zlib.gzipSync(buffer);
|
|
649
|
+
}
|
|
650
|
+
async uploadIndex(signedUrl, data) {
|
|
651
|
+
const response = await fetch(signedUrl, {
|
|
334
652
|
method: "PUT",
|
|
335
|
-
body:
|
|
653
|
+
body: data
|
|
336
654
|
});
|
|
337
|
-
if (
|
|
655
|
+
if (response.status !== 200) {
|
|
656
|
+
const errorText = await response.text();
|
|
338
657
|
throw new Error(
|
|
339
|
-
`Failed to upload search index. Status: ${
|
|
340
|
-
${
|
|
658
|
+
`Failed to upload search index. Status: ${response.status}
|
|
659
|
+
${errorText}`
|
|
341
660
|
);
|
|
342
661
|
}
|
|
343
662
|
}
|
|
663
|
+
async onFinishIndexing() {
|
|
664
|
+
const signedUrl = await this.getUploadUrl();
|
|
665
|
+
const indexData = await this.serializeIndex();
|
|
666
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
667
|
+
}
|
|
344
668
|
};
|
|
345
669
|
export {
|
|
670
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
671
|
+
FuzzyCache,
|
|
672
|
+
FuzzySearchWrapper,
|
|
346
673
|
LocalSearchIndexClient,
|
|
347
674
|
SearchIndexer,
|
|
348
675
|
TinaCMSSearchIndexClient,
|
|
349
|
-
|
|
676
|
+
buildPageOptions,
|
|
677
|
+
buildPaginationCursors,
|
|
678
|
+
createSearchIndex2 as createSearchIndex,
|
|
679
|
+
damerauLevenshteinDistance,
|
|
680
|
+
findSimilarTerms,
|
|
681
|
+
levenshteinDistance,
|
|
682
|
+
similarityScore
|
|
350
683
|
};
|