@tinacms/search 0.0.0-f608f48-20250617065117 → 0.0.0-f894432-20251221235528
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +28 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +7 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +19 -0
- package/dist/fuzzy-search-wrapper.d.ts +46 -0
- package/dist/index-client.d.ts +30 -13
- package/dist/index-client.js +201 -205
- package/dist/index.d.ts +6 -1
- package/dist/index.js +538 -203
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +16 -0
- package/dist/types.d.ts +30 -11
- package/package.json +13 -14
- package/dist/index-client.mjs +0 -195
package/dist/index.js
CHANGED
|
@@ -1,181 +1,160 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
var __copyProps = (to, from, except, desc) => {
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
-
for (let key of __getOwnPropNames(from))
|
|
14
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
-
mod
|
|
26
|
-
));
|
|
27
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
-
|
|
29
1
|
// src/index.ts
|
|
30
|
-
|
|
31
|
-
__export(index_exports, {
|
|
32
|
-
LocalSearchIndexClient: () => LocalSearchIndexClient,
|
|
33
|
-
SearchIndexer: () => SearchIndexer,
|
|
34
|
-
TinaCMSSearchIndexClient: () => TinaCMSSearchIndexClient,
|
|
35
|
-
si: () => import_search_index2.default
|
|
36
|
-
});
|
|
37
|
-
module.exports = __toCommonJS(index_exports);
|
|
38
|
-
var import_search_index2 = __toESM(require("search-index"));
|
|
2
|
+
import si2 from "search-index";
|
|
39
3
|
|
|
40
4
|
// src/indexer/index.ts
|
|
41
|
-
|
|
5
|
+
import {
|
|
6
|
+
loadAndParseWithAliases,
|
|
7
|
+
sequential,
|
|
8
|
+
scanAllContent,
|
|
9
|
+
scanContentByPaths,
|
|
10
|
+
transformDocument,
|
|
11
|
+
transformDocumentIntoPayload
|
|
12
|
+
} from "@tinacms/graphql";
|
|
42
13
|
|
|
43
14
|
// src/indexer/utils.ts
|
|
44
|
-
|
|
15
|
+
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
45
17
|
var StringBuilder = class {
|
|
18
|
+
buffer = [];
|
|
19
|
+
limit;
|
|
20
|
+
length = 0;
|
|
46
21
|
constructor(limit) {
|
|
47
|
-
this.length = 0;
|
|
48
|
-
this.buffer = [];
|
|
49
22
|
this.limit = limit;
|
|
50
23
|
}
|
|
51
24
|
append(str) {
|
|
52
|
-
if (this.length + str.length > this.limit)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
this.length += str.length;
|
|
57
|
-
if (this.length > this.limit) {
|
|
58
|
-
return true;
|
|
59
|
-
}
|
|
60
|
-
return false;
|
|
61
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
62
29
|
}
|
|
63
30
|
toString() {
|
|
64
31
|
return this.buffer.join(" ");
|
|
65
32
|
}
|
|
66
33
|
};
|
|
67
|
-
var
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
76
43
|
}
|
|
77
|
-
data.children?.forEach?.(
|
|
78
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
79
|
-
);
|
|
80
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
81
46
|
};
|
|
82
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
83
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
84
49
|
};
|
|
85
|
-
var
|
|
86
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
87
|
-
};
|
|
88
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
89
51
|
const tokens = tokenizeString(value);
|
|
90
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
91
53
|
for (const part of tokens) {
|
|
92
|
-
if (builder.append(part))
|
|
93
|
-
break;
|
|
94
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
95
55
|
}
|
|
96
56
|
return builder.toString();
|
|
97
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
98
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
99
94
|
if (!field) {
|
|
100
|
-
const
|
|
101
|
-
data["_id"] = `${collection.name}:${
|
|
102
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
103
98
|
}
|
|
104
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
105
101
|
if (!f.searchable) {
|
|
106
102
|
delete data[f.name];
|
|
107
103
|
continue;
|
|
108
104
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
);
|
|
137
|
-
} else {
|
|
138
|
-
data[f.name] = processTextFieldValue(
|
|
139
|
-
data[f.name],
|
|
140
|
-
fieldTextIndexLength
|
|
141
|
-
);
|
|
142
|
-
}
|
|
143
|
-
} else if (f.type === "rich-text") {
|
|
144
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
145
|
-
if (isList) {
|
|
146
|
-
data[f.name] = data[f.name].map((value) => {
|
|
147
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
148
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
149
|
-
return acc.toString();
|
|
150
|
-
});
|
|
151
|
-
} else {
|
|
152
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
153
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
154
|
-
data[f.name] = acc.toString();
|
|
155
|
-
}
|
|
156
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
157
132
|
}
|
|
158
133
|
}
|
|
159
134
|
return data;
|
|
160
135
|
};
|
|
161
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
137
|
+
var PRESERVED_WORDS = ["about"];
|
|
162
138
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
163
139
|
let stopwords = defaultStopWords;
|
|
164
140
|
if (keys) {
|
|
165
|
-
|
|
166
|
-
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
167
144
|
}
|
|
168
|
-
stopwords = [];
|
|
169
|
-
|
|
170
|
-
stopwords.push(...sw[key]);
|
|
171
|
-
}
|
|
172
|
-
memo[keys.join(",")] = stopwords;
|
|
145
|
+
stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
173
147
|
}
|
|
174
|
-
return stopwords;
|
|
148
|
+
return stopwords.filter((word) => !PRESERVED_WORDS.includes(word));
|
|
175
149
|
};
|
|
176
150
|
|
|
177
151
|
// src/indexer/index.ts
|
|
178
152
|
var SearchIndexer = class {
|
|
153
|
+
batchSize;
|
|
154
|
+
client;
|
|
155
|
+
bridge;
|
|
156
|
+
schema;
|
|
157
|
+
textIndexLength;
|
|
179
158
|
constructor(options) {
|
|
180
159
|
this.client = options.client;
|
|
181
160
|
this.bridge = options.bridge;
|
|
@@ -183,15 +162,33 @@ var SearchIndexer = class {
|
|
|
183
162
|
this.batchSize = options.batchSize || 100;
|
|
184
163
|
this.textIndexLength = options.textIndexLength || 500;
|
|
185
164
|
}
|
|
165
|
+
createBatchProcessor() {
|
|
166
|
+
let batch = [];
|
|
167
|
+
return {
|
|
168
|
+
callback: async (item) => {
|
|
169
|
+
batch.push(item);
|
|
170
|
+
if (batch.length >= this.batchSize) {
|
|
171
|
+
await this.client.put(batch);
|
|
172
|
+
batch = [];
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
flush: async () => {
|
|
176
|
+
if (batch.length > 0) {
|
|
177
|
+
await this.client.put(batch);
|
|
178
|
+
batch = [];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
}
|
|
186
183
|
makeIndexerCallback(itemCallback) {
|
|
187
184
|
return async (collection, contentPaths) => {
|
|
188
185
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
189
|
-
await
|
|
190
|
-
const data = await
|
|
186
|
+
await sequential(contentPaths, async (path) => {
|
|
187
|
+
const data = await transformDocumentIntoPayload(
|
|
191
188
|
`${collection.path}/${path}`,
|
|
192
|
-
|
|
189
|
+
transformDocument(
|
|
193
190
|
path,
|
|
194
|
-
await
|
|
191
|
+
await loadAndParseWithAliases(
|
|
195
192
|
this.bridge,
|
|
196
193
|
path,
|
|
197
194
|
collection,
|
|
@@ -213,43 +210,25 @@ var SearchIndexer = class {
|
|
|
213
210
|
};
|
|
214
211
|
}
|
|
215
212
|
async indexContentByPaths(documentPaths) {
|
|
216
|
-
|
|
217
|
-
const itemCallback = async (item) => {
|
|
218
|
-
batch.push(item);
|
|
219
|
-
if (batch.length > this.batchSize) {
|
|
220
|
-
await this.client.put(batch);
|
|
221
|
-
batch = [];
|
|
222
|
-
}
|
|
223
|
-
};
|
|
213
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
224
214
|
await this.client.onStartIndexing?.();
|
|
225
|
-
await
|
|
215
|
+
await scanContentByPaths(
|
|
226
216
|
this.schema,
|
|
227
217
|
documentPaths,
|
|
228
|
-
this.makeIndexerCallback(
|
|
218
|
+
this.makeIndexerCallback(callback)
|
|
229
219
|
);
|
|
230
|
-
|
|
231
|
-
await this.client.put(batch);
|
|
232
|
-
}
|
|
220
|
+
await flush();
|
|
233
221
|
await this.client.onFinishIndexing?.();
|
|
234
222
|
}
|
|
235
223
|
async indexAllContent() {
|
|
224
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
236
225
|
await this.client.onStartIndexing?.();
|
|
237
|
-
|
|
238
|
-
const itemCallback = async (item) => {
|
|
239
|
-
batch.push(item);
|
|
240
|
-
if (batch.length > this.batchSize) {
|
|
241
|
-
await this.client.put(batch);
|
|
242
|
-
batch = [];
|
|
243
|
-
}
|
|
244
|
-
};
|
|
245
|
-
const warnings = await (0, import_graphql.scanAllContent)(
|
|
226
|
+
const warnings = await scanAllContent(
|
|
246
227
|
this.schema,
|
|
247
228
|
this.bridge,
|
|
248
|
-
this.makeIndexerCallback(
|
|
229
|
+
this.makeIndexerCallback(callback)
|
|
249
230
|
);
|
|
250
|
-
|
|
251
|
-
await this.client.put(batch);
|
|
252
|
-
}
|
|
231
|
+
await flush();
|
|
253
232
|
await this.client.onFinishIndexing?.();
|
|
254
233
|
return { warnings };
|
|
255
234
|
}
|
|
@@ -261,47 +240,385 @@ var SearchIndexer = class {
|
|
|
261
240
|
};
|
|
262
241
|
|
|
263
242
|
// src/client/index.ts
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
243
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
244
|
+
import si from "search-index";
|
|
245
|
+
import { MemoryLevel } from "memory-level";
|
|
246
|
+
|
|
247
|
+
// src/fuzzy/types.ts
|
|
248
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
249
|
+
maxDistance: 2,
|
|
250
|
+
minSimilarity: 0.6,
|
|
251
|
+
maxResults: 10,
|
|
252
|
+
useTranspositions: true,
|
|
253
|
+
caseSensitive: false,
|
|
254
|
+
useNgramFilter: true,
|
|
255
|
+
ngramSize: 2,
|
|
256
|
+
minNgramOverlap: 0.2
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// src/fuzzy/cache.ts
|
|
260
|
+
var FuzzyCache = class {
|
|
261
|
+
cache;
|
|
262
|
+
maxSize;
|
|
263
|
+
constructor(maxSize = 100) {
|
|
264
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
265
|
+
this.maxSize = maxSize;
|
|
266
|
+
}
|
|
267
|
+
getCacheKey(query, options) {
|
|
268
|
+
return JSON.stringify({ query, options });
|
|
269
|
+
}
|
|
270
|
+
get(query, options) {
|
|
271
|
+
const key = this.getCacheKey(query, options);
|
|
272
|
+
const value = this.cache.get(key);
|
|
273
|
+
if (value) {
|
|
274
|
+
this.cache.delete(key);
|
|
275
|
+
this.cache.set(key, value);
|
|
276
|
+
}
|
|
277
|
+
return value;
|
|
278
|
+
}
|
|
279
|
+
set(query, options, results) {
|
|
280
|
+
const key = this.getCacheKey(query, options);
|
|
281
|
+
if (this.cache.size >= this.maxSize) {
|
|
282
|
+
const firstKey = this.cache.keys().next().value;
|
|
283
|
+
this.cache.delete(firstKey);
|
|
284
|
+
}
|
|
285
|
+
this.cache.set(key, results);
|
|
286
|
+
}
|
|
287
|
+
clear() {
|
|
288
|
+
this.cache.clear();
|
|
289
|
+
}
|
|
290
|
+
get size() {
|
|
291
|
+
return this.cache.size;
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
// src/fuzzy/distance.ts
|
|
296
|
+
function levenshteinDistance(str1, str2) {
|
|
297
|
+
const len1 = str1.length;
|
|
298
|
+
const len2 = str2.length;
|
|
299
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
300
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
301
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
302
|
+
for (let i = 1; i <= len1; i++) {
|
|
303
|
+
for (let j = 1; j <= len2; j++) {
|
|
304
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
305
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
306
|
+
} else {
|
|
307
|
+
dp[i][j] = Math.min(
|
|
308
|
+
dp[i - 1][j] + 1,
|
|
309
|
+
dp[i][j - 1] + 1,
|
|
310
|
+
dp[i - 1][j - 1] + 1
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return dp[len1][len2];
|
|
316
|
+
}
|
|
317
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
318
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
319
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
320
|
+
if (maxLength === 0) return 1;
|
|
321
|
+
return 1 - distance / maxLength;
|
|
322
|
+
}
|
|
323
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
324
|
+
const len1 = str1.length;
|
|
325
|
+
const len2 = str2.length;
|
|
326
|
+
const maxDist = len1 + len2;
|
|
327
|
+
const charLastPosition = {};
|
|
328
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
329
|
+
dp[0][0] = maxDist;
|
|
330
|
+
for (let i = 0; i <= len1; i++) {
|
|
331
|
+
dp[i + 1][0] = maxDist;
|
|
332
|
+
dp[i + 1][1] = i;
|
|
333
|
+
}
|
|
334
|
+
for (let j = 0; j <= len2; j++) {
|
|
335
|
+
dp[0][j + 1] = maxDist;
|
|
336
|
+
dp[1][j + 1] = j;
|
|
337
|
+
}
|
|
338
|
+
for (let i = 1; i <= len1; i++) {
|
|
339
|
+
let lastMatchingCol = 0;
|
|
340
|
+
for (let j = 1; j <= len2; j++) {
|
|
341
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
342
|
+
const lastColWithMatch = lastMatchingCol;
|
|
343
|
+
let cost = 1;
|
|
344
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
345
|
+
cost = 0;
|
|
346
|
+
lastMatchingCol = j;
|
|
347
|
+
}
|
|
348
|
+
dp[i + 1][j + 1] = Math.min(
|
|
349
|
+
dp[i][j] + cost,
|
|
350
|
+
dp[i + 1][j] + 1,
|
|
351
|
+
dp[i][j + 1] + 1,
|
|
352
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
charLastPosition[str1[i - 1]] = i;
|
|
356
|
+
}
|
|
357
|
+
return dp[len1 + 1][len2 + 1];
|
|
358
|
+
}
|
|
359
|
+
function getNgrams(str, n = 2) {
|
|
360
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
361
|
+
if (str.length < n) {
|
|
362
|
+
ngrams.add(str);
|
|
363
|
+
return ngrams;
|
|
364
|
+
}
|
|
365
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
366
|
+
ngrams.add(str.substring(i, i + n));
|
|
367
|
+
}
|
|
368
|
+
return ngrams;
|
|
369
|
+
}
|
|
370
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
371
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
372
|
+
let overlap = 0;
|
|
373
|
+
for (const ngram of ngrams1) {
|
|
374
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
375
|
+
}
|
|
376
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
377
|
+
return overlap / minSize;
|
|
378
|
+
}
|
|
379
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
380
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
381
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
382
|
+
if (normalizedQuery.length === 0) return [];
|
|
383
|
+
const matches = [];
|
|
384
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
385
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
386
|
+
for (const term of dictionary) {
|
|
387
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
388
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
389
|
+
if (queryNgrams) {
|
|
390
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
391
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
392
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
393
|
+
}
|
|
394
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
395
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
396
|
+
matches.push({
|
|
397
|
+
term,
|
|
398
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
399
|
+
similarity: Math.max(prefixSimilarity, 0.8)
|
|
400
|
+
});
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
404
|
+
if (distance > opts.maxDistance) continue;
|
|
405
|
+
const similarity = similarityScore(
|
|
406
|
+
normalizedQuery,
|
|
407
|
+
normalizedTerm,
|
|
408
|
+
opts.useTranspositions
|
|
409
|
+
);
|
|
410
|
+
if (similarity >= opts.minSimilarity) {
|
|
411
|
+
matches.push({ term, distance, similarity });
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
matches.sort((a, b) => {
|
|
415
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
416
|
+
return a.distance - b.distance;
|
|
417
|
+
}
|
|
418
|
+
return b.similarity - a.similarity;
|
|
419
|
+
});
|
|
420
|
+
return matches.slice(0, opts.maxResults);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// src/pagination.ts
|
|
424
|
+
function buildPageOptions(options) {
|
|
425
|
+
if (!options.limit) return {};
|
|
426
|
+
return {
|
|
427
|
+
PAGE: {
|
|
428
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
429
|
+
SIZE: options.limit
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
function buildPaginationCursors(total, options) {
|
|
434
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
435
|
+
const pageSize = options.limit;
|
|
436
|
+
const hasPreviousPage = currentPage > 0;
|
|
437
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
438
|
+
return {
|
|
439
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
440
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// src/fuzzy-search-wrapper.ts
|
|
445
|
+
var FuzzySearchWrapper = class {
|
|
446
|
+
cache;
|
|
447
|
+
searchIndex;
|
|
448
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
449
|
+
this.searchIndex = searchIndex;
|
|
450
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
451
|
+
}
|
|
452
|
+
async getDictionary(field) {
|
|
453
|
+
const token = field ? { FIELD: field } : void 0;
|
|
454
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
455
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
456
|
+
}
|
|
457
|
+
async findSimilar(query, field, options = {}) {
|
|
458
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
459
|
+
const cached = this.cache.get(cacheKey, options);
|
|
460
|
+
if (cached) return cached;
|
|
461
|
+
const dictionary = await this.getDictionary(field);
|
|
462
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
463
|
+
this.cache.set(cacheKey, options, matches);
|
|
464
|
+
return matches;
|
|
465
|
+
}
|
|
466
|
+
async expandQuery(query, options = {}) {
|
|
467
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
468
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
469
|
+
const expanded = [];
|
|
470
|
+
const matches = {};
|
|
471
|
+
for (const term of terms) {
|
|
472
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
473
|
+
expanded.push(term);
|
|
474
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
475
|
+
expanded.push(...similarValues);
|
|
476
|
+
if (similarTerms.length > 0) {
|
|
477
|
+
matches[term] = similarTerms;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
original: terms,
|
|
482
|
+
expanded: Array.from(new Set(expanded)),
|
|
483
|
+
matches
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
async query(query, options = {}) {
|
|
487
|
+
const pageOptions = buildPageOptions(options);
|
|
488
|
+
if (!options.fuzzy) {
|
|
489
|
+
const results2 = await this.searchIndex.QUERY(
|
|
490
|
+
{ AND: query.split(" ").filter((t) => t) },
|
|
491
|
+
pageOptions
|
|
492
|
+
);
|
|
493
|
+
const pagination2 = buildPaginationCursors(
|
|
494
|
+
results2.RESULT_LENGTH || 0,
|
|
495
|
+
options
|
|
496
|
+
);
|
|
497
|
+
return {
|
|
498
|
+
results: results2.RESULT || [],
|
|
499
|
+
total: results2.RESULT_LENGTH || 0,
|
|
500
|
+
...pagination2
|
|
501
|
+
};
|
|
502
|
+
}
|
|
503
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
504
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
505
|
+
const results2 = await this.searchIndex.QUERY(
|
|
506
|
+
{ AND: expansion.original },
|
|
507
|
+
pageOptions
|
|
508
|
+
);
|
|
509
|
+
const pagination2 = buildPaginationCursors(
|
|
510
|
+
results2.RESULT_LENGTH || 0,
|
|
511
|
+
options
|
|
512
|
+
);
|
|
513
|
+
return {
|
|
514
|
+
results: results2.RESULT || [],
|
|
515
|
+
total: results2.RESULT_LENGTH || 0,
|
|
516
|
+
...pagination2,
|
|
517
|
+
fuzzyMatches: expansion.matches
|
|
518
|
+
};
|
|
519
|
+
}
|
|
520
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
521
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
522
|
+
return [originalTerm, ...similarTerms];
|
|
523
|
+
});
|
|
524
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
525
|
+
AND: queryGroups.map(
|
|
526
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
527
|
+
)
|
|
528
|
+
};
|
|
529
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
530
|
+
const pagination = buildPaginationCursors(
|
|
531
|
+
results.RESULT_LENGTH || 0,
|
|
532
|
+
options
|
|
533
|
+
);
|
|
534
|
+
return {
|
|
535
|
+
results: results.RESULT || [],
|
|
536
|
+
total: results.RESULT_LENGTH || 0,
|
|
537
|
+
...pagination,
|
|
538
|
+
fuzzyMatches: expansion.matches
|
|
539
|
+
};
|
|
540
|
+
}
|
|
541
|
+
clearCache() {
|
|
542
|
+
this.cache.clear();
|
|
543
|
+
}
|
|
544
|
+
getCacheSize() {
|
|
545
|
+
return this.cache.size;
|
|
546
|
+
}
|
|
547
|
+
};
|
|
548
|
+
|
|
549
|
+
// src/client/index.ts
|
|
550
|
+
import * as zlib from "node:zlib";
|
|
551
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
268
552
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
269
553
|
var LocalSearchIndexClient = class {
|
|
554
|
+
searchIndex;
|
|
555
|
+
memoryLevel;
|
|
556
|
+
stopwords;
|
|
557
|
+
tokenSplitRegex;
|
|
558
|
+
fuzzySearchWrapper;
|
|
270
559
|
constructor(options) {
|
|
271
|
-
this.memoryLevel = new
|
|
560
|
+
this.memoryLevel = new MemoryLevel();
|
|
272
561
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
273
562
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
274
563
|
}
|
|
275
564
|
async onStartIndexing() {
|
|
276
|
-
|
|
277
|
-
// @ts-ignore
|
|
565
|
+
const options = {
|
|
278
566
|
db: this.memoryLevel,
|
|
279
567
|
stopwords: this.stopwords,
|
|
280
568
|
tokenSplitRegex: this.tokenSplitRegex
|
|
281
|
-
}
|
|
569
|
+
};
|
|
570
|
+
this.searchIndex = await si(
|
|
571
|
+
options
|
|
572
|
+
);
|
|
573
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
282
574
|
}
|
|
283
575
|
async put(docs) {
|
|
284
576
|
if (!this.searchIndex) {
|
|
285
577
|
throw new Error("onStartIndexing must be called first");
|
|
286
578
|
}
|
|
287
|
-
|
|
579
|
+
await this.searchIndex.PUT(docs);
|
|
288
580
|
}
|
|
289
581
|
async del(ids) {
|
|
290
582
|
if (!this.searchIndex) {
|
|
291
583
|
throw new Error("onStartIndexing must be called first");
|
|
292
584
|
}
|
|
293
|
-
|
|
294
|
-
}
|
|
295
|
-
query(query, options) {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
585
|
+
await this.searchIndex.DELETE(ids);
|
|
586
|
+
}
|
|
587
|
+
async query(query, options) {
|
|
588
|
+
if (!this.searchIndex) {
|
|
589
|
+
throw new Error("onStartIndexing must be called first");
|
|
590
|
+
}
|
|
591
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
592
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
593
|
+
limit: options.limit,
|
|
594
|
+
cursor: options.cursor,
|
|
595
|
+
fuzzy: true,
|
|
596
|
+
fuzzyOptions: options.fuzzyOptions
|
|
597
|
+
});
|
|
598
|
+
}
|
|
599
|
+
const searchIndexOptions = buildPageOptions({
|
|
600
|
+
limit: options?.limit,
|
|
601
|
+
cursor: options?.cursor
|
|
602
|
+
});
|
|
603
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
604
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
605
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
606
|
+
queryObj,
|
|
607
|
+
searchIndexOptions
|
|
608
|
+
);
|
|
609
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
610
|
+
const pagination = buildPaginationCursors(total, {
|
|
611
|
+
limit: options?.limit,
|
|
612
|
+
cursor: options?.cursor
|
|
301
613
|
});
|
|
614
|
+
return {
|
|
615
|
+
results: searchResults.RESULT || [],
|
|
616
|
+
total,
|
|
617
|
+
...pagination
|
|
618
|
+
};
|
|
302
619
|
}
|
|
303
620
|
async export(filename) {
|
|
304
|
-
const sqliteLevel = new
|
|
621
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
305
622
|
const iterator = this.memoryLevel.iterator();
|
|
306
623
|
for await (const [key, value] of iterator) {
|
|
307
624
|
await sqliteLevel.put(key, value);
|
|
@@ -310,55 +627,73 @@ var LocalSearchIndexClient = class {
|
|
|
310
627
|
}
|
|
311
628
|
};
|
|
312
629
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
630
|
+
apiUrl;
|
|
631
|
+
branch;
|
|
632
|
+
indexerToken;
|
|
313
633
|
constructor(options) {
|
|
314
634
|
super(options);
|
|
315
635
|
this.apiUrl = options.apiUrl;
|
|
316
636
|
this.branch = options.branch;
|
|
317
637
|
this.indexerToken = options.indexerToken;
|
|
318
638
|
}
|
|
319
|
-
async
|
|
639
|
+
async getUploadUrl() {
|
|
320
640
|
const headers = new Headers();
|
|
321
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
641
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
322
642
|
headers.append("Content-Type", "application/json");
|
|
323
|
-
|
|
643
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
324
644
|
method: "GET",
|
|
325
645
|
headers
|
|
326
646
|
});
|
|
327
|
-
if (
|
|
328
|
-
|
|
329
|
-
try {
|
|
330
|
-
json = await res.json();
|
|
331
|
-
} catch (e) {
|
|
332
|
-
console.error("Failed to parse error response", e);
|
|
333
|
-
}
|
|
647
|
+
if (response.status !== 200) {
|
|
648
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
334
649
|
throw new Error(
|
|
335
|
-
`Failed to get upload url. Status: ${
|
|
650
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
336
651
|
);
|
|
337
652
|
}
|
|
338
|
-
const { signedUrl } = await
|
|
339
|
-
|
|
653
|
+
const { signedUrl } = await response.json();
|
|
654
|
+
return signedUrl;
|
|
655
|
+
}
|
|
656
|
+
async serializeIndex() {
|
|
657
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
340
658
|
const iterator = this.memoryLevel.iterator();
|
|
341
659
|
for await (const [key, value] of iterator) {
|
|
342
660
|
await sqliteLevel.put(key, value);
|
|
343
661
|
}
|
|
344
662
|
const buffer = sqliteLevel.db.serialize();
|
|
345
663
|
await sqliteLevel.close();
|
|
346
|
-
|
|
664
|
+
return zlib.gzipSync(buffer);
|
|
665
|
+
}
|
|
666
|
+
async uploadIndex(signedUrl, data) {
|
|
667
|
+
const response = await fetch(signedUrl, {
|
|
347
668
|
method: "PUT",
|
|
348
|
-
body:
|
|
669
|
+
body: data
|
|
349
670
|
});
|
|
350
|
-
if (
|
|
671
|
+
if (response.status !== 200) {
|
|
672
|
+
const errorText = await response.text();
|
|
351
673
|
throw new Error(
|
|
352
|
-
`Failed to upload search index. Status: ${
|
|
353
|
-
${
|
|
674
|
+
`Failed to upload search index. Status: ${response.status}
|
|
675
|
+
${errorText}`
|
|
354
676
|
);
|
|
355
677
|
}
|
|
356
678
|
}
|
|
679
|
+
async onFinishIndexing() {
|
|
680
|
+
const signedUrl = await this.getUploadUrl();
|
|
681
|
+
const indexData = await this.serializeIndex();
|
|
682
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
683
|
+
}
|
|
357
684
|
};
|
|
358
|
-
|
|
359
|
-
|
|
685
|
+
export {
|
|
686
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
687
|
+
FuzzyCache,
|
|
688
|
+
FuzzySearchWrapper,
|
|
360
689
|
LocalSearchIndexClient,
|
|
361
690
|
SearchIndexer,
|
|
362
691
|
TinaCMSSearchIndexClient,
|
|
363
|
-
|
|
364
|
-
|
|
692
|
+
buildPageOptions,
|
|
693
|
+
buildPaginationCursors,
|
|
694
|
+
damerauLevenshteinDistance,
|
|
695
|
+
findSimilarTerms,
|
|
696
|
+
levenshteinDistance,
|
|
697
|
+
si2 as si,
|
|
698
|
+
similarityScore
|
|
699
|
+
};
|