@tinacms/search 0.0.0-ef282d9-20241024212433 → 0.0.0-f1cec43-20251216232909
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +28 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +15 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +19 -0
- package/dist/fuzzy-search-wrapper.d.ts +46 -0
- package/dist/index-client.d.ts +30 -13
- package/dist/index-client.js +189 -205
- package/dist/index.d.ts +6 -1
- package/dist/index.js +545 -209
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +22 -0
- package/dist/types.d.ts +30 -11
- package/package.json +18 -19
- package/dist/index-client.mjs +0 -195
package/dist/index.js
CHANGED
|
@@ -1,179 +1,160 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
var __copyProps = (to, from, except, desc) => {
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
-
for (let key of __getOwnPropNames(from))
|
|
14
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
21
|
-
mod
|
|
22
|
-
));
|
|
23
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
24
|
-
|
|
25
1
|
// src/index.ts
|
|
26
|
-
|
|
27
|
-
__export(src_exports, {
|
|
28
|
-
LocalSearchIndexClient: () => LocalSearchIndexClient,
|
|
29
|
-
SearchIndexer: () => SearchIndexer,
|
|
30
|
-
TinaCMSSearchIndexClient: () => TinaCMSSearchIndexClient,
|
|
31
|
-
si: () => import_search_index2.default
|
|
32
|
-
});
|
|
33
|
-
module.exports = __toCommonJS(src_exports);
|
|
34
|
-
var import_search_index2 = __toESM(require("search-index"));
|
|
2
|
+
import si2 from "search-index";
|
|
35
3
|
|
|
36
4
|
// src/indexer/index.ts
|
|
37
|
-
|
|
5
|
+
import {
|
|
6
|
+
loadAndParseWithAliases,
|
|
7
|
+
sequential,
|
|
8
|
+
scanAllContent,
|
|
9
|
+
scanContentByPaths,
|
|
10
|
+
transformDocument,
|
|
11
|
+
transformDocumentIntoPayload
|
|
12
|
+
} from "@tinacms/graphql";
|
|
38
13
|
|
|
39
14
|
// src/indexer/utils.ts
|
|
40
|
-
|
|
15
|
+
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
41
17
|
var StringBuilder = class {
|
|
18
|
+
buffer = [];
|
|
19
|
+
limit;
|
|
20
|
+
length = 0;
|
|
42
21
|
constructor(limit) {
|
|
43
|
-
this.length = 0;
|
|
44
|
-
this.buffer = [];
|
|
45
22
|
this.limit = limit;
|
|
46
23
|
}
|
|
47
24
|
append(str) {
|
|
48
|
-
if (this.length + str.length > this.limit)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
this.length += str.length;
|
|
53
|
-
if (this.length > this.limit) {
|
|
54
|
-
return true;
|
|
55
|
-
}
|
|
56
|
-
return false;
|
|
57
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
58
29
|
}
|
|
59
30
|
toString() {
|
|
60
31
|
return this.buffer.join(" ");
|
|
61
32
|
}
|
|
62
33
|
};
|
|
63
|
-
var
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
73
43
|
}
|
|
74
|
-
(_b = (_a = data.children) == null ? void 0 : _a.forEach) == null ? void 0 : _b.call(
|
|
75
|
-
_a,
|
|
76
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
77
|
-
);
|
|
78
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
79
46
|
};
|
|
80
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
81
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
82
49
|
};
|
|
83
|
-
var
|
|
84
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
85
|
-
};
|
|
86
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
87
51
|
const tokens = tokenizeString(value);
|
|
88
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
89
53
|
for (const part of tokens) {
|
|
90
|
-
if (builder.append(part))
|
|
91
|
-
break;
|
|
92
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
93
55
|
}
|
|
94
56
|
return builder.toString();
|
|
95
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
96
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
97
94
|
if (!field) {
|
|
98
|
-
const
|
|
99
|
-
data["_id"] = `${collection.name}:${
|
|
100
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
101
98
|
}
|
|
102
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
103
101
|
if (!f.searchable) {
|
|
104
102
|
delete data[f.name];
|
|
105
103
|
continue;
|
|
106
104
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
);
|
|
135
|
-
} else {
|
|
136
|
-
data[f.name] = processTextFieldValue(
|
|
137
|
-
data[f.name],
|
|
138
|
-
fieldTextIndexLength
|
|
139
|
-
);
|
|
140
|
-
}
|
|
141
|
-
} else if (f.type === "rich-text") {
|
|
142
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
143
|
-
if (isList) {
|
|
144
|
-
data[f.name] = data[f.name].map((value) => {
|
|
145
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
146
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
147
|
-
return acc.toString();
|
|
148
|
-
});
|
|
149
|
-
} else {
|
|
150
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
151
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
152
|
-
data[f.name] = acc.toString();
|
|
153
|
-
}
|
|
154
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
155
132
|
}
|
|
156
133
|
}
|
|
157
134
|
return data;
|
|
158
135
|
};
|
|
159
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
137
|
+
var PRESERVED_WORDS = ["about"];
|
|
160
138
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
161
139
|
let stopwords = defaultStopWords;
|
|
162
140
|
if (keys) {
|
|
163
|
-
|
|
164
|
-
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
165
144
|
}
|
|
166
|
-
stopwords = [];
|
|
167
|
-
|
|
168
|
-
stopwords.push(...sw[key]);
|
|
169
|
-
}
|
|
170
|
-
memo[keys.join(",")] = stopwords;
|
|
145
|
+
stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
171
147
|
}
|
|
172
|
-
return stopwords;
|
|
148
|
+
return stopwords.filter((word) => !PRESERVED_WORDS.includes(word));
|
|
173
149
|
};
|
|
174
150
|
|
|
175
151
|
// src/indexer/index.ts
|
|
176
152
|
var SearchIndexer = class {
|
|
153
|
+
batchSize;
|
|
154
|
+
client;
|
|
155
|
+
bridge;
|
|
156
|
+
schema;
|
|
157
|
+
textIndexLength;
|
|
177
158
|
constructor(options) {
|
|
178
159
|
this.client = options.client;
|
|
179
160
|
this.bridge = options.bridge;
|
|
@@ -181,15 +162,33 @@ var SearchIndexer = class {
|
|
|
181
162
|
this.batchSize = options.batchSize || 100;
|
|
182
163
|
this.textIndexLength = options.textIndexLength || 500;
|
|
183
164
|
}
|
|
165
|
+
createBatchProcessor() {
|
|
166
|
+
let batch = [];
|
|
167
|
+
return {
|
|
168
|
+
callback: async (item) => {
|
|
169
|
+
batch.push(item);
|
|
170
|
+
if (batch.length >= this.batchSize) {
|
|
171
|
+
await this.client.put(batch);
|
|
172
|
+
batch = [];
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
flush: async () => {
|
|
176
|
+
if (batch.length > 0) {
|
|
177
|
+
await this.client.put(batch);
|
|
178
|
+
batch = [];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
}
|
|
184
183
|
makeIndexerCallback(itemCallback) {
|
|
185
184
|
return async (collection, contentPaths) => {
|
|
186
185
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
187
|
-
await
|
|
188
|
-
const data = await
|
|
186
|
+
await sequential(contentPaths, async (path) => {
|
|
187
|
+
const data = await transformDocumentIntoPayload(
|
|
189
188
|
`${collection.path}/${path}`,
|
|
190
|
-
|
|
189
|
+
transformDocument(
|
|
191
190
|
path,
|
|
192
|
-
await
|
|
191
|
+
await loadAndParseWithAliases(
|
|
193
192
|
this.bridge,
|
|
194
193
|
path,
|
|
195
194
|
collection,
|
|
@@ -211,97 +210,416 @@ var SearchIndexer = class {
|
|
|
211
210
|
};
|
|
212
211
|
}
|
|
213
212
|
async indexContentByPaths(documentPaths) {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
batch.push(item);
|
|
218
|
-
if (batch.length > this.batchSize) {
|
|
219
|
-
await this.client.put(batch);
|
|
220
|
-
batch = [];
|
|
221
|
-
}
|
|
222
|
-
};
|
|
223
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
224
|
-
await (0, import_graphql.scanContentByPaths)(
|
|
213
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
214
|
+
await this.client.onStartIndexing?.();
|
|
215
|
+
await scanContentByPaths(
|
|
225
216
|
this.schema,
|
|
226
217
|
documentPaths,
|
|
227
|
-
this.makeIndexerCallback(
|
|
218
|
+
this.makeIndexerCallback(callback)
|
|
228
219
|
);
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
}
|
|
232
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
220
|
+
await flush();
|
|
221
|
+
await this.client.onFinishIndexing?.();
|
|
233
222
|
}
|
|
234
223
|
async indexAllContent() {
|
|
235
|
-
|
|
236
|
-
await
|
|
237
|
-
|
|
238
|
-
const itemCallback = async (item) => {
|
|
239
|
-
batch.push(item);
|
|
240
|
-
if (batch.length > this.batchSize) {
|
|
241
|
-
await this.client.put(batch);
|
|
242
|
-
batch = [];
|
|
243
|
-
}
|
|
244
|
-
};
|
|
245
|
-
const warnings = await (0, import_graphql.scanAllContent)(
|
|
224
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
225
|
+
await this.client.onStartIndexing?.();
|
|
226
|
+
const warnings = await scanAllContent(
|
|
246
227
|
this.schema,
|
|
247
228
|
this.bridge,
|
|
248
|
-
this.makeIndexerCallback(
|
|
229
|
+
this.makeIndexerCallback(callback)
|
|
249
230
|
);
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
}
|
|
253
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
231
|
+
await flush();
|
|
232
|
+
await this.client.onFinishIndexing?.();
|
|
254
233
|
return { warnings };
|
|
255
234
|
}
|
|
256
235
|
async deleteIndexContent(documentPaths) {
|
|
257
|
-
|
|
258
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
236
|
+
await this.client.onStartIndexing?.();
|
|
259
237
|
await this.client.del(documentPaths);
|
|
260
|
-
await
|
|
238
|
+
await this.client.onFinishIndexing?.();
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
// src/client/index.ts
|
|
243
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
244
|
+
import si from "search-index";
|
|
245
|
+
import { MemoryLevel } from "memory-level";
|
|
246
|
+
|
|
247
|
+
// src/fuzzy/types.ts
|
|
248
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
249
|
+
maxDistance: 2,
|
|
250
|
+
minSimilarity: 0.6,
|
|
251
|
+
maxResults: 10,
|
|
252
|
+
useTranspositions: true,
|
|
253
|
+
caseSensitive: false,
|
|
254
|
+
useNgramFilter: true,
|
|
255
|
+
ngramSize: 2,
|
|
256
|
+
minNgramOverlap: 0.2
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// src/fuzzy/cache.ts
|
|
260
|
+
var FuzzyCache = class {
|
|
261
|
+
cache;
|
|
262
|
+
maxSize;
|
|
263
|
+
constructor(maxSize = 100) {
|
|
264
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
265
|
+
this.maxSize = maxSize;
|
|
266
|
+
}
|
|
267
|
+
getCacheKey(query, options) {
|
|
268
|
+
return JSON.stringify({ query, options });
|
|
269
|
+
}
|
|
270
|
+
get(query, options) {
|
|
271
|
+
const key = this.getCacheKey(query, options);
|
|
272
|
+
const value = this.cache.get(key);
|
|
273
|
+
if (value) {
|
|
274
|
+
this.cache.delete(key);
|
|
275
|
+
this.cache.set(key, value);
|
|
276
|
+
}
|
|
277
|
+
return value;
|
|
278
|
+
}
|
|
279
|
+
set(query, options, results) {
|
|
280
|
+
const key = this.getCacheKey(query, options);
|
|
281
|
+
if (this.cache.size >= this.maxSize) {
|
|
282
|
+
const firstKey = this.cache.keys().next().value;
|
|
283
|
+
this.cache.delete(firstKey);
|
|
284
|
+
}
|
|
285
|
+
this.cache.set(key, results);
|
|
286
|
+
}
|
|
287
|
+
clear() {
|
|
288
|
+
this.cache.clear();
|
|
289
|
+
}
|
|
290
|
+
get size() {
|
|
291
|
+
return this.cache.size;
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
// src/fuzzy/distance.ts
|
|
296
|
+
function levenshteinDistance(str1, str2) {
|
|
297
|
+
const len1 = str1.length;
|
|
298
|
+
const len2 = str2.length;
|
|
299
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
300
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
301
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
302
|
+
for (let i = 1; i <= len1; i++) {
|
|
303
|
+
for (let j = 1; j <= len2; j++) {
|
|
304
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
305
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
306
|
+
} else {
|
|
307
|
+
dp[i][j] = Math.min(
|
|
308
|
+
dp[i - 1][j] + 1,
|
|
309
|
+
dp[i][j - 1] + 1,
|
|
310
|
+
dp[i - 1][j - 1] + 1
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return dp[len1][len2];
|
|
316
|
+
}
|
|
317
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
318
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
319
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
320
|
+
if (maxLength === 0) return 1;
|
|
321
|
+
return 1 - distance / maxLength;
|
|
322
|
+
}
|
|
323
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
324
|
+
const len1 = str1.length;
|
|
325
|
+
const len2 = str2.length;
|
|
326
|
+
const maxDist = len1 + len2;
|
|
327
|
+
const charLastPosition = {};
|
|
328
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
329
|
+
dp[0][0] = maxDist;
|
|
330
|
+
for (let i = 0; i <= len1; i++) {
|
|
331
|
+
dp[i + 1][0] = maxDist;
|
|
332
|
+
dp[i + 1][1] = i;
|
|
333
|
+
}
|
|
334
|
+
for (let j = 0; j <= len2; j++) {
|
|
335
|
+
dp[0][j + 1] = maxDist;
|
|
336
|
+
dp[1][j + 1] = j;
|
|
337
|
+
}
|
|
338
|
+
for (let i = 1; i <= len1; i++) {
|
|
339
|
+
let lastMatchingCol = 0;
|
|
340
|
+
for (let j = 1; j <= len2; j++) {
|
|
341
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
342
|
+
const lastColWithMatch = lastMatchingCol;
|
|
343
|
+
let cost = 1;
|
|
344
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
345
|
+
cost = 0;
|
|
346
|
+
lastMatchingCol = j;
|
|
347
|
+
}
|
|
348
|
+
dp[i + 1][j + 1] = Math.min(
|
|
349
|
+
dp[i][j] + cost,
|
|
350
|
+
dp[i + 1][j] + 1,
|
|
351
|
+
dp[i][j + 1] + 1,
|
|
352
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
charLastPosition[str1[i - 1]] = i;
|
|
356
|
+
}
|
|
357
|
+
return dp[len1 + 1][len2 + 1];
|
|
358
|
+
}
|
|
359
|
+
function getNgrams(str, n = 2) {
|
|
360
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
361
|
+
if (str.length < n) {
|
|
362
|
+
ngrams.add(str);
|
|
363
|
+
return ngrams;
|
|
364
|
+
}
|
|
365
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
366
|
+
ngrams.add(str.substring(i, i + n));
|
|
367
|
+
}
|
|
368
|
+
return ngrams;
|
|
369
|
+
}
|
|
370
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
371
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
372
|
+
let overlap = 0;
|
|
373
|
+
for (const ngram of ngrams1) {
|
|
374
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
375
|
+
}
|
|
376
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
377
|
+
return overlap / minSize;
|
|
378
|
+
}
|
|
379
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
380
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
381
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
382
|
+
if (normalizedQuery.length === 0) return [];
|
|
383
|
+
const matches = [];
|
|
384
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
385
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
386
|
+
for (const term of dictionary) {
|
|
387
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
388
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
389
|
+
if (queryNgrams) {
|
|
390
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
391
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
392
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
393
|
+
}
|
|
394
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
395
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
396
|
+
matches.push({
|
|
397
|
+
term,
|
|
398
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
399
|
+
similarity: Math.max(prefixSimilarity, 0.8)
|
|
400
|
+
// Prefix matches get at least 0.8 similarity
|
|
401
|
+
});
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
405
|
+
if (distance > opts.maxDistance) continue;
|
|
406
|
+
const similarity = similarityScore(
|
|
407
|
+
normalizedQuery,
|
|
408
|
+
normalizedTerm,
|
|
409
|
+
opts.useTranspositions
|
|
410
|
+
);
|
|
411
|
+
if (similarity >= opts.minSimilarity) {
|
|
412
|
+
matches.push({ term, distance, similarity });
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
matches.sort((a, b) => {
|
|
416
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
417
|
+
return a.distance - b.distance;
|
|
418
|
+
}
|
|
419
|
+
return b.similarity - a.similarity;
|
|
420
|
+
});
|
|
421
|
+
return matches.slice(0, opts.maxResults);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// src/pagination.ts
|
|
425
|
+
function buildPageOptions(options) {
|
|
426
|
+
if (!options.limit) return {};
|
|
427
|
+
return {
|
|
428
|
+
PAGE: {
|
|
429
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
430
|
+
SIZE: options.limit
|
|
431
|
+
}
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
function buildPaginationCursors(total, options) {
|
|
435
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
436
|
+
const pageSize = options.limit;
|
|
437
|
+
const hasPreviousPage = currentPage > 0;
|
|
438
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
439
|
+
return {
|
|
440
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
441
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// src/fuzzy-search-wrapper.ts
|
|
446
|
+
var FuzzySearchWrapper = class {
|
|
447
|
+
cache;
|
|
448
|
+
searchIndex;
|
|
449
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
450
|
+
this.searchIndex = searchIndex;
|
|
451
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
452
|
+
}
|
|
453
|
+
async getDictionary(field) {
|
|
454
|
+
const token = field ? { FIELD: field } : void 0;
|
|
455
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
456
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
457
|
+
}
|
|
458
|
+
async findSimilar(query, field, options = {}) {
|
|
459
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
460
|
+
const cached = this.cache.get(cacheKey, options);
|
|
461
|
+
if (cached) return cached;
|
|
462
|
+
const dictionary = await this.getDictionary(field);
|
|
463
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
464
|
+
this.cache.set(cacheKey, options, matches);
|
|
465
|
+
return matches;
|
|
466
|
+
}
|
|
467
|
+
async expandQuery(query, options = {}) {
|
|
468
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
469
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
470
|
+
const expanded = [];
|
|
471
|
+
const matches = {};
|
|
472
|
+
for (const term of terms) {
|
|
473
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
474
|
+
expanded.push(term);
|
|
475
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
476
|
+
expanded.push(...similarValues);
|
|
477
|
+
if (similarTerms.length > 0) {
|
|
478
|
+
matches[term] = similarTerms;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
return {
|
|
482
|
+
original: terms,
|
|
483
|
+
expanded: Array.from(new Set(expanded)),
|
|
484
|
+
matches
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
async query(query, options = {}) {
|
|
488
|
+
const pageOptions = buildPageOptions(options);
|
|
489
|
+
if (!options.fuzzy) {
|
|
490
|
+
const results2 = await this.searchIndex.QUERY(
|
|
491
|
+
{ AND: query.split(" ").filter((t) => t) },
|
|
492
|
+
pageOptions
|
|
493
|
+
);
|
|
494
|
+
const pagination2 = buildPaginationCursors(
|
|
495
|
+
results2.RESULT_LENGTH || 0,
|
|
496
|
+
options
|
|
497
|
+
);
|
|
498
|
+
return {
|
|
499
|
+
results: results2.RESULT || [],
|
|
500
|
+
total: results2.RESULT_LENGTH || 0,
|
|
501
|
+
...pagination2
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
505
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
506
|
+
const results2 = await this.searchIndex.QUERY(
|
|
507
|
+
{ AND: expansion.original },
|
|
508
|
+
pageOptions
|
|
509
|
+
);
|
|
510
|
+
const pagination2 = buildPaginationCursors(
|
|
511
|
+
results2.RESULT_LENGTH || 0,
|
|
512
|
+
options
|
|
513
|
+
);
|
|
514
|
+
return {
|
|
515
|
+
results: results2.RESULT || [],
|
|
516
|
+
total: results2.RESULT_LENGTH || 0,
|
|
517
|
+
...pagination2,
|
|
518
|
+
fuzzyMatches: expansion.matches
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
522
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
523
|
+
return [originalTerm, ...similarTerms];
|
|
524
|
+
});
|
|
525
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
526
|
+
AND: queryGroups.map(
|
|
527
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
528
|
+
)
|
|
529
|
+
};
|
|
530
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
531
|
+
const pagination = buildPaginationCursors(
|
|
532
|
+
results.RESULT_LENGTH || 0,
|
|
533
|
+
options
|
|
534
|
+
);
|
|
535
|
+
return {
|
|
536
|
+
results: results.RESULT || [],
|
|
537
|
+
total: results.RESULT_LENGTH || 0,
|
|
538
|
+
...pagination,
|
|
539
|
+
fuzzyMatches: expansion.matches
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
clearCache() {
|
|
543
|
+
this.cache.clear();
|
|
544
|
+
}
|
|
545
|
+
getCacheSize() {
|
|
546
|
+
return this.cache.size;
|
|
261
547
|
}
|
|
262
548
|
};
|
|
263
549
|
|
|
264
550
|
// src/client/index.ts
|
|
265
|
-
|
|
266
|
-
var
|
|
267
|
-
var import_memory_level = require("memory-level");
|
|
268
|
-
var zlib = __toESM(require("zlib"));
|
|
551
|
+
import * as zlib from "node:zlib";
|
|
552
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
269
553
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
270
554
|
var LocalSearchIndexClient = class {
|
|
555
|
+
searchIndex;
|
|
556
|
+
memoryLevel;
|
|
557
|
+
stopwords;
|
|
558
|
+
tokenSplitRegex;
|
|
559
|
+
fuzzySearchWrapper;
|
|
271
560
|
constructor(options) {
|
|
272
|
-
this.memoryLevel = new
|
|
561
|
+
this.memoryLevel = new MemoryLevel();
|
|
273
562
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
274
563
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
275
564
|
}
|
|
276
565
|
async onStartIndexing() {
|
|
277
|
-
|
|
566
|
+
const options = {
|
|
278
567
|
db: this.memoryLevel,
|
|
279
568
|
stopwords: this.stopwords,
|
|
280
569
|
tokenSplitRegex: this.tokenSplitRegex
|
|
281
|
-
}
|
|
570
|
+
};
|
|
571
|
+
this.searchIndex = await si(
|
|
572
|
+
options
|
|
573
|
+
);
|
|
574
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
282
575
|
}
|
|
283
576
|
async put(docs) {
|
|
284
577
|
if (!this.searchIndex) {
|
|
285
578
|
throw new Error("onStartIndexing must be called first");
|
|
286
579
|
}
|
|
287
|
-
|
|
580
|
+
await this.searchIndex.PUT(docs);
|
|
288
581
|
}
|
|
289
582
|
async del(ids) {
|
|
290
583
|
if (!this.searchIndex) {
|
|
291
584
|
throw new Error("onStartIndexing must be called first");
|
|
292
585
|
}
|
|
293
|
-
|
|
294
|
-
}
|
|
295
|
-
query(query, options) {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
586
|
+
await this.searchIndex.DELETE(ids);
|
|
587
|
+
}
|
|
588
|
+
async query(query, options) {
|
|
589
|
+
if (!this.searchIndex) {
|
|
590
|
+
throw new Error("onStartIndexing must be called first");
|
|
591
|
+
}
|
|
592
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
593
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
594
|
+
limit: options.limit,
|
|
595
|
+
cursor: options.cursor,
|
|
596
|
+
fuzzy: true,
|
|
597
|
+
fuzzyOptions: options.fuzzyOptions
|
|
598
|
+
});
|
|
599
|
+
}
|
|
600
|
+
const searchIndexOptions = buildPageOptions({
|
|
601
|
+
limit: options?.limit,
|
|
602
|
+
cursor: options?.cursor
|
|
603
|
+
});
|
|
604
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
605
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
606
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
607
|
+
queryObj,
|
|
608
|
+
searchIndexOptions
|
|
609
|
+
);
|
|
610
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
611
|
+
const pagination = buildPaginationCursors(total, {
|
|
612
|
+
limit: options?.limit,
|
|
613
|
+
cursor: options?.cursor
|
|
301
614
|
});
|
|
615
|
+
return {
|
|
616
|
+
results: searchResults.RESULT || [],
|
|
617
|
+
total,
|
|
618
|
+
...pagination
|
|
619
|
+
};
|
|
302
620
|
}
|
|
303
621
|
async export(filename) {
|
|
304
|
-
const sqliteLevel = new
|
|
622
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
305
623
|
const iterator = this.memoryLevel.iterator();
|
|
306
624
|
for await (const [key, value] of iterator) {
|
|
307
625
|
await sqliteLevel.put(key, value);
|
|
@@ -310,55 +628,73 @@ var LocalSearchIndexClient = class {
|
|
|
310
628
|
}
|
|
311
629
|
};
|
|
312
630
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
631
|
+
apiUrl;
|
|
632
|
+
branch;
|
|
633
|
+
indexerToken;
|
|
313
634
|
constructor(options) {
|
|
314
635
|
super(options);
|
|
315
636
|
this.apiUrl = options.apiUrl;
|
|
316
637
|
this.branch = options.branch;
|
|
317
638
|
this.indexerToken = options.indexerToken;
|
|
318
639
|
}
|
|
319
|
-
async
|
|
640
|
+
async getUploadUrl() {
|
|
320
641
|
const headers = new Headers();
|
|
321
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
642
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
322
643
|
headers.append("Content-Type", "application/json");
|
|
323
|
-
|
|
644
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
324
645
|
method: "GET",
|
|
325
646
|
headers
|
|
326
647
|
});
|
|
327
|
-
if (
|
|
328
|
-
|
|
329
|
-
try {
|
|
330
|
-
json = await res.json();
|
|
331
|
-
} catch (e) {
|
|
332
|
-
console.error("Failed to parse error response", e);
|
|
333
|
-
}
|
|
648
|
+
if (response.status !== 200) {
|
|
649
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
334
650
|
throw new Error(
|
|
335
|
-
`Failed to get upload url. Status: ${
|
|
651
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
336
652
|
);
|
|
337
653
|
}
|
|
338
|
-
const { signedUrl } = await
|
|
339
|
-
|
|
654
|
+
const { signedUrl } = await response.json();
|
|
655
|
+
return signedUrl;
|
|
656
|
+
}
|
|
657
|
+
async serializeIndex() {
|
|
658
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
340
659
|
const iterator = this.memoryLevel.iterator();
|
|
341
660
|
for await (const [key, value] of iterator) {
|
|
342
661
|
await sqliteLevel.put(key, value);
|
|
343
662
|
}
|
|
344
663
|
const buffer = sqliteLevel.db.serialize();
|
|
345
664
|
await sqliteLevel.close();
|
|
346
|
-
|
|
665
|
+
return zlib.gzipSync(buffer);
|
|
666
|
+
}
|
|
667
|
+
async uploadIndex(signedUrl, data) {
|
|
668
|
+
const response = await fetch(signedUrl, {
|
|
347
669
|
method: "PUT",
|
|
348
|
-
body:
|
|
670
|
+
body: data
|
|
349
671
|
});
|
|
350
|
-
if (
|
|
672
|
+
if (response.status !== 200) {
|
|
673
|
+
const errorText = await response.text();
|
|
351
674
|
throw new Error(
|
|
352
|
-
`Failed to upload search index. Status: ${
|
|
353
|
-
${
|
|
675
|
+
`Failed to upload search index. Status: ${response.status}
|
|
676
|
+
${errorText}`
|
|
354
677
|
);
|
|
355
678
|
}
|
|
356
679
|
}
|
|
680
|
+
async onFinishIndexing() {
|
|
681
|
+
const signedUrl = await this.getUploadUrl();
|
|
682
|
+
const indexData = await this.serializeIndex();
|
|
683
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
684
|
+
}
|
|
357
685
|
};
|
|
358
|
-
|
|
359
|
-
|
|
686
|
+
export {
|
|
687
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
688
|
+
FuzzyCache,
|
|
689
|
+
FuzzySearchWrapper,
|
|
360
690
|
LocalSearchIndexClient,
|
|
361
691
|
SearchIndexer,
|
|
362
692
|
TinaCMSSearchIndexClient,
|
|
363
|
-
|
|
364
|
-
|
|
693
|
+
buildPageOptions,
|
|
694
|
+
buildPaginationCursors,
|
|
695
|
+
damerauLevenshteinDistance,
|
|
696
|
+
findSimilarTerms,
|
|
697
|
+
levenshteinDistance,
|
|
698
|
+
si2 as si,
|
|
699
|
+
similarityScore
|
|
700
|
+
};
|