@tinacms/search 0.0.0-c1132cd-20241024060747 → 0.0.0-c19d29e-20251224001156
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +10 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +8 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +19 -0
- package/dist/fuzzy-search-wrapper.d.ts +23 -0
- package/dist/index-client.d.ts +25 -13
- package/dist/index-client.js +201 -206
- package/dist/index.d.ts +8 -3
- package/dist/index.js +530 -211
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +16 -0
- package/dist/types.d.ts +51 -11
- package/package.json +18 -19
- package/dist/index-client.mjs +0 -195
package/dist/index.js
CHANGED
|
@@ -1,179 +1,159 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
var __copyProps = (to, from, except, desc) => {
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
-
for (let key of __getOwnPropNames(from))
|
|
14
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
21
|
-
mod
|
|
22
|
-
));
|
|
23
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
24
|
-
|
|
25
1
|
// src/index.ts
|
|
26
|
-
|
|
27
|
-
__export(src_exports, {
|
|
28
|
-
LocalSearchIndexClient: () => LocalSearchIndexClient,
|
|
29
|
-
SearchIndexer: () => SearchIndexer,
|
|
30
|
-
TinaCMSSearchIndexClient: () => TinaCMSSearchIndexClient,
|
|
31
|
-
si: () => import_search_index2.default
|
|
32
|
-
});
|
|
33
|
-
module.exports = __toCommonJS(src_exports);
|
|
34
|
-
var import_search_index2 = __toESM(require("search-index"));
|
|
2
|
+
import createSearchIndex2 from "search-index";
|
|
35
3
|
|
|
36
4
|
// src/indexer/index.ts
|
|
37
|
-
|
|
5
|
+
import {
|
|
6
|
+
loadAndParseWithAliases,
|
|
7
|
+
sequential,
|
|
8
|
+
scanAllContent,
|
|
9
|
+
scanContentByPaths,
|
|
10
|
+
transformDocument,
|
|
11
|
+
transformDocumentIntoPayload
|
|
12
|
+
} from "@tinacms/graphql";
|
|
38
13
|
|
|
39
14
|
// src/indexer/utils.ts
|
|
40
|
-
|
|
15
|
+
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
41
17
|
var StringBuilder = class {
|
|
18
|
+
buffer = [];
|
|
19
|
+
limit;
|
|
20
|
+
length = 0;
|
|
42
21
|
constructor(limit) {
|
|
43
|
-
this.length = 0;
|
|
44
|
-
this.buffer = [];
|
|
45
22
|
this.limit = limit;
|
|
46
23
|
}
|
|
47
24
|
append(str) {
|
|
48
|
-
if (this.length + str.length > this.limit)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
this.length += str.length;
|
|
53
|
-
if (this.length > this.limit) {
|
|
54
|
-
return true;
|
|
55
|
-
}
|
|
56
|
-
return false;
|
|
57
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
58
29
|
}
|
|
59
30
|
toString() {
|
|
60
31
|
return this.buffer.join(" ");
|
|
61
32
|
}
|
|
62
33
|
};
|
|
63
|
-
var
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
}
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
73
43
|
}
|
|
74
|
-
(_b = (_a = data.children) == null ? void 0 : _a.forEach) == null ? void 0 : _b.call(
|
|
75
|
-
_a,
|
|
76
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
77
|
-
);
|
|
78
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
79
46
|
};
|
|
80
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
81
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
82
49
|
};
|
|
83
|
-
var
|
|
84
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
85
|
-
};
|
|
86
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
87
51
|
const tokens = tokenizeString(value);
|
|
88
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
89
53
|
for (const part of tokens) {
|
|
90
|
-
if (builder.append(part))
|
|
91
|
-
break;
|
|
92
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
93
55
|
}
|
|
94
56
|
return builder.toString();
|
|
95
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
96
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
97
94
|
if (!field) {
|
|
98
|
-
const
|
|
99
|
-
data["_id"] = `${collection.name}:${
|
|
100
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
101
98
|
}
|
|
102
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
103
101
|
if (!f.searchable) {
|
|
104
102
|
delete data[f.name];
|
|
105
103
|
continue;
|
|
106
104
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
);
|
|
135
|
-
} else {
|
|
136
|
-
data[f.name] = processTextFieldValue(
|
|
137
|
-
data[f.name],
|
|
138
|
-
fieldTextIndexLength
|
|
139
|
-
);
|
|
140
|
-
}
|
|
141
|
-
} else if (f.type === "rich-text") {
|
|
142
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
143
|
-
if (isList) {
|
|
144
|
-
data[f.name] = data[f.name].map((value) => {
|
|
145
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
146
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
147
|
-
return acc.toString();
|
|
148
|
-
});
|
|
149
|
-
} else {
|
|
150
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
151
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
152
|
-
data[f.name] = acc.toString();
|
|
153
|
-
}
|
|
154
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
155
132
|
}
|
|
156
133
|
}
|
|
157
134
|
return data;
|
|
158
135
|
};
|
|
159
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
160
137
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
if (memo[keys.join(",")]) {
|
|
164
|
-
return memo[keys.join(",")];
|
|
165
|
-
}
|
|
166
|
-
stopwords = [];
|
|
167
|
-
for (const key of keys) {
|
|
168
|
-
stopwords.push(...sw[key]);
|
|
169
|
-
}
|
|
170
|
-
memo[keys.join(",")] = stopwords;
|
|
138
|
+
if (!keys) {
|
|
139
|
+
return defaultStopWords;
|
|
171
140
|
}
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
144
|
+
}
|
|
145
|
+
const stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
172
147
|
return stopwords;
|
|
173
148
|
};
|
|
174
149
|
|
|
175
150
|
// src/indexer/index.ts
|
|
176
151
|
var SearchIndexer = class {
|
|
152
|
+
batchSize;
|
|
153
|
+
client;
|
|
154
|
+
bridge;
|
|
155
|
+
schema;
|
|
156
|
+
textIndexLength;
|
|
177
157
|
constructor(options) {
|
|
178
158
|
this.client = options.client;
|
|
179
159
|
this.bridge = options.bridge;
|
|
@@ -181,15 +161,33 @@ var SearchIndexer = class {
|
|
|
181
161
|
this.batchSize = options.batchSize || 100;
|
|
182
162
|
this.textIndexLength = options.textIndexLength || 500;
|
|
183
163
|
}
|
|
164
|
+
createBatchProcessor() {
|
|
165
|
+
let batch = [];
|
|
166
|
+
return {
|
|
167
|
+
callback: async (item) => {
|
|
168
|
+
batch.push(item);
|
|
169
|
+
if (batch.length >= this.batchSize) {
|
|
170
|
+
await this.client.put(batch);
|
|
171
|
+
batch = [];
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
flush: async () => {
|
|
175
|
+
if (batch.length > 0) {
|
|
176
|
+
await this.client.put(batch);
|
|
177
|
+
batch = [];
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
}
|
|
184
182
|
makeIndexerCallback(itemCallback) {
|
|
185
183
|
return async (collection, contentPaths) => {
|
|
186
184
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
187
|
-
await
|
|
188
|
-
const data = await
|
|
185
|
+
await sequential(contentPaths, async (path) => {
|
|
186
|
+
const data = await transformDocumentIntoPayload(
|
|
189
187
|
`${collection.path}/${path}`,
|
|
190
|
-
|
|
188
|
+
transformDocument(
|
|
191
189
|
path,
|
|
192
|
-
await
|
|
190
|
+
await loadAndParseWithAliases(
|
|
193
191
|
this.bridge,
|
|
194
192
|
path,
|
|
195
193
|
collection,
|
|
@@ -211,97 +209,400 @@ var SearchIndexer = class {
|
|
|
211
209
|
};
|
|
212
210
|
}
|
|
213
211
|
async indexContentByPaths(documentPaths) {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
batch.push(item);
|
|
218
|
-
if (batch.length > this.batchSize) {
|
|
219
|
-
await this.client.put(batch);
|
|
220
|
-
batch = [];
|
|
221
|
-
}
|
|
222
|
-
};
|
|
223
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
224
|
-
await (0, import_graphql.scanContentByPaths)(
|
|
212
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
213
|
+
await this.client.onStartIndexing?.();
|
|
214
|
+
await scanContentByPaths(
|
|
225
215
|
this.schema,
|
|
226
216
|
documentPaths,
|
|
227
|
-
this.makeIndexerCallback(
|
|
217
|
+
this.makeIndexerCallback(callback)
|
|
228
218
|
);
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
}
|
|
232
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
219
|
+
await flush();
|
|
220
|
+
await this.client.onFinishIndexing?.();
|
|
233
221
|
}
|
|
234
222
|
async indexAllContent() {
|
|
235
|
-
|
|
236
|
-
await
|
|
237
|
-
|
|
238
|
-
const itemCallback = async (item) => {
|
|
239
|
-
batch.push(item);
|
|
240
|
-
if (batch.length > this.batchSize) {
|
|
241
|
-
await this.client.put(batch);
|
|
242
|
-
batch = [];
|
|
243
|
-
}
|
|
244
|
-
};
|
|
245
|
-
const warnings = await (0, import_graphql.scanAllContent)(
|
|
223
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
224
|
+
await this.client.onStartIndexing?.();
|
|
225
|
+
const warnings = await scanAllContent(
|
|
246
226
|
this.schema,
|
|
247
227
|
this.bridge,
|
|
248
|
-
this.makeIndexerCallback(
|
|
228
|
+
this.makeIndexerCallback(callback)
|
|
249
229
|
);
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
}
|
|
253
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
230
|
+
await flush();
|
|
231
|
+
await this.client.onFinishIndexing?.();
|
|
254
232
|
return { warnings };
|
|
255
233
|
}
|
|
256
234
|
async deleteIndexContent(documentPaths) {
|
|
257
|
-
|
|
258
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
235
|
+
await this.client.onStartIndexing?.();
|
|
259
236
|
await this.client.del(documentPaths);
|
|
260
|
-
await
|
|
237
|
+
await this.client.onFinishIndexing?.();
|
|
261
238
|
}
|
|
262
239
|
};
|
|
263
240
|
|
|
264
241
|
// src/client/index.ts
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
242
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
243
|
+
import createSearchIndex from "search-index";
|
|
244
|
+
import { MemoryLevel } from "memory-level";
|
|
245
|
+
|
|
246
|
+
// src/fuzzy/types.ts
|
|
247
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
248
|
+
maxDistance: 2,
|
|
249
|
+
minSimilarity: 0.6,
|
|
250
|
+
maxResults: 10,
|
|
251
|
+
useTranspositions: true,
|
|
252
|
+
caseSensitive: false,
|
|
253
|
+
useNgramFilter: true,
|
|
254
|
+
ngramSize: 2,
|
|
255
|
+
minNgramOverlap: 0.2
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
// src/fuzzy/cache.ts
|
|
259
|
+
var FuzzyCache = class {
|
|
260
|
+
cache;
|
|
261
|
+
maxSize;
|
|
262
|
+
constructor(maxSize = 100) {
|
|
263
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
264
|
+
this.maxSize = maxSize;
|
|
265
|
+
}
|
|
266
|
+
getCacheKey(query, options) {
|
|
267
|
+
return JSON.stringify({ query, options });
|
|
268
|
+
}
|
|
269
|
+
get(query, options) {
|
|
270
|
+
const key = this.getCacheKey(query, options);
|
|
271
|
+
const value = this.cache.get(key);
|
|
272
|
+
if (value) {
|
|
273
|
+
this.cache.delete(key);
|
|
274
|
+
this.cache.set(key, value);
|
|
275
|
+
}
|
|
276
|
+
return value;
|
|
277
|
+
}
|
|
278
|
+
set(query, options, results) {
|
|
279
|
+
const key = this.getCacheKey(query, options);
|
|
280
|
+
if (this.cache.size >= this.maxSize) {
|
|
281
|
+
const firstKey = this.cache.keys().next().value;
|
|
282
|
+
this.cache.delete(firstKey);
|
|
283
|
+
}
|
|
284
|
+
this.cache.set(key, results);
|
|
285
|
+
}
|
|
286
|
+
clear() {
|
|
287
|
+
this.cache.clear();
|
|
288
|
+
}
|
|
289
|
+
get size() {
|
|
290
|
+
return this.cache.size;
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
// src/fuzzy/distance.ts
|
|
295
|
+
var PREFIX_MATCH_MIN_SIMILARITY = 0.8;
|
|
296
|
+
function levenshteinDistance(str1, str2) {
|
|
297
|
+
const len1 = str1.length;
|
|
298
|
+
const len2 = str2.length;
|
|
299
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
300
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
301
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
302
|
+
for (let i = 1; i <= len1; i++) {
|
|
303
|
+
for (let j = 1; j <= len2; j++) {
|
|
304
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
305
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
306
|
+
} else {
|
|
307
|
+
dp[i][j] = Math.min(
|
|
308
|
+
dp[i - 1][j] + 1,
|
|
309
|
+
dp[i][j - 1] + 1,
|
|
310
|
+
dp[i - 1][j - 1] + 1
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return dp[len1][len2];
|
|
316
|
+
}
|
|
317
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
318
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
319
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
320
|
+
if (maxLength === 0) return 1;
|
|
321
|
+
return 1 - distance / maxLength;
|
|
322
|
+
}
|
|
323
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
324
|
+
const len1 = str1.length;
|
|
325
|
+
const len2 = str2.length;
|
|
326
|
+
const maxDist = len1 + len2;
|
|
327
|
+
const charLastPosition = {};
|
|
328
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
329
|
+
dp[0][0] = maxDist;
|
|
330
|
+
for (let i = 0; i <= len1; i++) {
|
|
331
|
+
dp[i + 1][0] = maxDist;
|
|
332
|
+
dp[i + 1][1] = i;
|
|
333
|
+
}
|
|
334
|
+
for (let j = 0; j <= len2; j++) {
|
|
335
|
+
dp[0][j + 1] = maxDist;
|
|
336
|
+
dp[1][j + 1] = j;
|
|
337
|
+
}
|
|
338
|
+
for (let i = 1; i <= len1; i++) {
|
|
339
|
+
let lastMatchingCol = 0;
|
|
340
|
+
for (let j = 1; j <= len2; j++) {
|
|
341
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
342
|
+
const lastColWithMatch = lastMatchingCol;
|
|
343
|
+
let cost = 1;
|
|
344
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
345
|
+
cost = 0;
|
|
346
|
+
lastMatchingCol = j;
|
|
347
|
+
}
|
|
348
|
+
dp[i + 1][j + 1] = Math.min(
|
|
349
|
+
dp[i][j] + cost,
|
|
350
|
+
dp[i + 1][j] + 1,
|
|
351
|
+
dp[i][j + 1] + 1,
|
|
352
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
charLastPosition[str1[i - 1]] = i;
|
|
356
|
+
}
|
|
357
|
+
return dp[len1 + 1][len2 + 1];
|
|
358
|
+
}
|
|
359
|
+
function getNgrams(str, n = 2) {
|
|
360
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
361
|
+
if (str.length < n) {
|
|
362
|
+
ngrams.add(str);
|
|
363
|
+
return ngrams;
|
|
364
|
+
}
|
|
365
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
366
|
+
ngrams.add(str.substring(i, i + n));
|
|
367
|
+
}
|
|
368
|
+
return ngrams;
|
|
369
|
+
}
|
|
370
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
371
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
372
|
+
let overlap = 0;
|
|
373
|
+
for (const ngram of ngrams1) {
|
|
374
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
375
|
+
}
|
|
376
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
377
|
+
return overlap / minSize;
|
|
378
|
+
}
|
|
379
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
380
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
381
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
382
|
+
if (normalizedQuery.length === 0) return [];
|
|
383
|
+
const matches = [];
|
|
384
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
385
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
386
|
+
for (const term of dictionary) {
|
|
387
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
388
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
389
|
+
if (queryNgrams) {
|
|
390
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
391
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
392
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
393
|
+
}
|
|
394
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
395
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
396
|
+
matches.push({
|
|
397
|
+
term,
|
|
398
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
399
|
+
similarity: Math.max(prefixSimilarity, PREFIX_MATCH_MIN_SIMILARITY)
|
|
400
|
+
});
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
404
|
+
if (distance > opts.maxDistance) continue;
|
|
405
|
+
const similarity = similarityScore(
|
|
406
|
+
normalizedQuery,
|
|
407
|
+
normalizedTerm,
|
|
408
|
+
opts.useTranspositions
|
|
409
|
+
);
|
|
410
|
+
if (similarity >= opts.minSimilarity) {
|
|
411
|
+
matches.push({ term, distance, similarity });
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
matches.sort((a, b) => {
|
|
415
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
416
|
+
return a.distance - b.distance;
|
|
417
|
+
}
|
|
418
|
+
return b.similarity - a.similarity;
|
|
419
|
+
});
|
|
420
|
+
return matches.slice(0, opts.maxResults);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// src/pagination.ts
|
|
424
|
+
function buildPageOptions(options) {
|
|
425
|
+
if (!options.limit) return {};
|
|
426
|
+
return {
|
|
427
|
+
PAGE: {
|
|
428
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
429
|
+
SIZE: options.limit
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
function buildPaginationCursors(total, options) {
|
|
434
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
435
|
+
const pageSize = options.limit;
|
|
436
|
+
const hasPreviousPage = currentPage > 0;
|
|
437
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
438
|
+
return {
|
|
439
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
440
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// src/fuzzy-search-wrapper.ts
|
|
445
|
+
var FuzzySearchWrapper = class {
|
|
446
|
+
cache;
|
|
447
|
+
searchIndex;
|
|
448
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
449
|
+
this.searchIndex = searchIndex;
|
|
450
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
451
|
+
}
|
|
452
|
+
async getDictionary(field) {
|
|
453
|
+
const token = field ? { FIELD: field } : void 0;
|
|
454
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
455
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
456
|
+
}
|
|
457
|
+
async findSimilar(query, field, options = {}) {
|
|
458
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
459
|
+
const cached = this.cache.get(cacheKey, options);
|
|
460
|
+
if (cached) return cached;
|
|
461
|
+
const dictionary = await this.getDictionary(field);
|
|
462
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
463
|
+
this.cache.set(cacheKey, options, matches);
|
|
464
|
+
return matches;
|
|
465
|
+
}
|
|
466
|
+
async expandQuery(query, options = {}) {
|
|
467
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
468
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
469
|
+
const expanded = [];
|
|
470
|
+
const matches = {};
|
|
471
|
+
for (const term of terms) {
|
|
472
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
473
|
+
expanded.push(term);
|
|
474
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
475
|
+
expanded.push(...similarValues);
|
|
476
|
+
if (similarTerms.length > 0) {
|
|
477
|
+
matches[term] = similarTerms;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
original: terms,
|
|
482
|
+
expanded: Array.from(new Set(expanded)),
|
|
483
|
+
matches
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
async query(query, options = {}) {
|
|
487
|
+
const pageOptions = buildPageOptions(options);
|
|
488
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
489
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
490
|
+
const results2 = await this.searchIndex.QUERY(
|
|
491
|
+
{ AND: expansion.original },
|
|
492
|
+
pageOptions
|
|
493
|
+
);
|
|
494
|
+
const pagination2 = buildPaginationCursors(
|
|
495
|
+
results2.RESULT_LENGTH || 0,
|
|
496
|
+
options
|
|
497
|
+
);
|
|
498
|
+
return {
|
|
499
|
+
results: results2.RESULT || [],
|
|
500
|
+
total: results2.RESULT_LENGTH || 0,
|
|
501
|
+
...pagination2,
|
|
502
|
+
fuzzyMatches: expansion.matches
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
506
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
507
|
+
return [originalTerm, ...similarTerms];
|
|
508
|
+
});
|
|
509
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
510
|
+
AND: queryGroups.map(
|
|
511
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
512
|
+
)
|
|
513
|
+
};
|
|
514
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
515
|
+
const pagination = buildPaginationCursors(
|
|
516
|
+
results.RESULT_LENGTH || 0,
|
|
517
|
+
options
|
|
518
|
+
);
|
|
519
|
+
return {
|
|
520
|
+
results: results.RESULT || [],
|
|
521
|
+
total: results.RESULT_LENGTH || 0,
|
|
522
|
+
...pagination,
|
|
523
|
+
fuzzyMatches: expansion.matches
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
clearCache() {
|
|
527
|
+
this.cache.clear();
|
|
528
|
+
}
|
|
529
|
+
getCacheSize() {
|
|
530
|
+
return this.cache.size;
|
|
531
|
+
}
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
// src/client/index.ts
|
|
535
|
+
import * as zlib from "node:zlib";
|
|
536
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
269
537
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
270
538
|
var LocalSearchIndexClient = class {
|
|
539
|
+
searchIndex;
|
|
540
|
+
memoryLevel;
|
|
541
|
+
stopwords;
|
|
542
|
+
tokenSplitRegex;
|
|
543
|
+
fuzzySearchWrapper;
|
|
271
544
|
constructor(options) {
|
|
272
|
-
this.memoryLevel = new
|
|
545
|
+
this.memoryLevel = new MemoryLevel();
|
|
273
546
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
274
547
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
275
548
|
}
|
|
276
549
|
async onStartIndexing() {
|
|
277
|
-
|
|
550
|
+
const options = {
|
|
278
551
|
db: this.memoryLevel,
|
|
279
552
|
stopwords: this.stopwords,
|
|
280
553
|
tokenSplitRegex: this.tokenSplitRegex
|
|
281
|
-
}
|
|
554
|
+
};
|
|
555
|
+
this.searchIndex = await createSearchIndex(
|
|
556
|
+
options
|
|
557
|
+
);
|
|
558
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
282
559
|
}
|
|
283
560
|
async put(docs) {
|
|
284
561
|
if (!this.searchIndex) {
|
|
285
562
|
throw new Error("onStartIndexing must be called first");
|
|
286
563
|
}
|
|
287
|
-
|
|
564
|
+
await this.searchIndex.PUT(docs);
|
|
288
565
|
}
|
|
289
566
|
async del(ids) {
|
|
290
567
|
if (!this.searchIndex) {
|
|
291
568
|
throw new Error("onStartIndexing must be called first");
|
|
292
569
|
}
|
|
293
|
-
|
|
294
|
-
}
|
|
295
|
-
query(query, options) {
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
570
|
+
await this.searchIndex.DELETE(ids);
|
|
571
|
+
}
|
|
572
|
+
async query(query, options) {
|
|
573
|
+
if (!this.searchIndex) {
|
|
574
|
+
throw new Error("onStartIndexing must be called first");
|
|
575
|
+
}
|
|
576
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
577
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
578
|
+
limit: options.limit,
|
|
579
|
+
cursor: options.cursor,
|
|
580
|
+
fuzzyOptions: options.fuzzyOptions
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
const searchIndexOptions = buildPageOptions({
|
|
584
|
+
limit: options?.limit,
|
|
585
|
+
cursor: options?.cursor
|
|
586
|
+
});
|
|
587
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
588
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
589
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
590
|
+
queryObj,
|
|
591
|
+
searchIndexOptions
|
|
592
|
+
);
|
|
593
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
594
|
+
const pagination = buildPaginationCursors(total, {
|
|
595
|
+
limit: options?.limit,
|
|
596
|
+
cursor: options?.cursor
|
|
301
597
|
});
|
|
598
|
+
return {
|
|
599
|
+
results: searchResults.RESULT || [],
|
|
600
|
+
total,
|
|
601
|
+
...pagination
|
|
602
|
+
};
|
|
302
603
|
}
|
|
303
604
|
async export(filename) {
|
|
304
|
-
const sqliteLevel = new
|
|
605
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
305
606
|
const iterator = this.memoryLevel.iterator();
|
|
306
607
|
for await (const [key, value] of iterator) {
|
|
307
608
|
await sqliteLevel.put(key, value);
|
|
@@ -310,55 +611,73 @@ var LocalSearchIndexClient = class {
|
|
|
310
611
|
}
|
|
311
612
|
};
|
|
312
613
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
614
|
+
apiUrl;
|
|
615
|
+
branch;
|
|
616
|
+
indexerToken;
|
|
313
617
|
constructor(options) {
|
|
314
618
|
super(options);
|
|
315
619
|
this.apiUrl = options.apiUrl;
|
|
316
620
|
this.branch = options.branch;
|
|
317
621
|
this.indexerToken = options.indexerToken;
|
|
318
622
|
}
|
|
319
|
-
async
|
|
623
|
+
async getUploadUrl() {
|
|
320
624
|
const headers = new Headers();
|
|
321
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
625
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
322
626
|
headers.append("Content-Type", "application/json");
|
|
323
|
-
|
|
627
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
324
628
|
method: "GET",
|
|
325
629
|
headers
|
|
326
630
|
});
|
|
327
|
-
if (
|
|
328
|
-
|
|
329
|
-
try {
|
|
330
|
-
json = await res.json();
|
|
331
|
-
} catch (e) {
|
|
332
|
-
console.error("Failed to parse error response", e);
|
|
333
|
-
}
|
|
631
|
+
if (response.status !== 200) {
|
|
632
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
334
633
|
throw new Error(
|
|
335
|
-
`Failed to get upload url. Status: ${
|
|
634
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
336
635
|
);
|
|
337
636
|
}
|
|
338
|
-
const { signedUrl } = await
|
|
339
|
-
|
|
637
|
+
const { signedUrl } = await response.json();
|
|
638
|
+
return signedUrl;
|
|
639
|
+
}
|
|
640
|
+
async serializeIndex() {
|
|
641
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
340
642
|
const iterator = this.memoryLevel.iterator();
|
|
341
643
|
for await (const [key, value] of iterator) {
|
|
342
644
|
await sqliteLevel.put(key, value);
|
|
343
645
|
}
|
|
344
646
|
const buffer = sqliteLevel.db.serialize();
|
|
345
647
|
await sqliteLevel.close();
|
|
346
|
-
|
|
648
|
+
return zlib.gzipSync(buffer);
|
|
649
|
+
}
|
|
650
|
+
async uploadIndex(signedUrl, data) {
|
|
651
|
+
const response = await fetch(signedUrl, {
|
|
347
652
|
method: "PUT",
|
|
348
|
-
body:
|
|
653
|
+
body: data
|
|
349
654
|
});
|
|
350
|
-
if (
|
|
655
|
+
if (response.status !== 200) {
|
|
656
|
+
const errorText = await response.text();
|
|
351
657
|
throw new Error(
|
|
352
|
-
`Failed to upload search index. Status: ${
|
|
353
|
-
${
|
|
658
|
+
`Failed to upload search index. Status: ${response.status}
|
|
659
|
+
${errorText}`
|
|
354
660
|
);
|
|
355
661
|
}
|
|
356
662
|
}
|
|
663
|
+
async onFinishIndexing() {
|
|
664
|
+
const signedUrl = await this.getUploadUrl();
|
|
665
|
+
const indexData = await this.serializeIndex();
|
|
666
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
667
|
+
}
|
|
357
668
|
};
|
|
358
|
-
|
|
359
|
-
|
|
669
|
+
export {
|
|
670
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
671
|
+
FuzzyCache,
|
|
672
|
+
FuzzySearchWrapper,
|
|
360
673
|
LocalSearchIndexClient,
|
|
361
674
|
SearchIndexer,
|
|
362
675
|
TinaCMSSearchIndexClient,
|
|
363
|
-
|
|
364
|
-
|
|
676
|
+
buildPageOptions,
|
|
677
|
+
buildPaginationCursors,
|
|
678
|
+
createSearchIndex2 as createSearchIndex,
|
|
679
|
+
damerauLevenshteinDistance,
|
|
680
|
+
findSimilarTerms,
|
|
681
|
+
levenshteinDistance,
|
|
682
|
+
similarityScore
|
|
683
|
+
};
|