@tinacms/search 0.0.0-c6915ea-20250421012527 → 0.0.0-c706b9f-20251222081038
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +10 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +8 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +19 -0
- package/dist/fuzzy-search-wrapper.d.ts +23 -0
- package/dist/index-client.d.ts +28 -1
- package/dist/index-client.js +201 -206
- package/dist/index.d.ts +10 -1
- package/dist/index.js +530 -216
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +16 -0
- package/dist/types.d.ts +51 -11
- package/package.json +13 -14
- package/dist/index-client.mjs +0 -195
package/dist/index.js
CHANGED
|
@@ -1,183 +1,159 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
var __copyProps = (to, from, except, desc) => {
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
-
for (let key of __getOwnPropNames(from))
|
|
14
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
-
mod
|
|
26
|
-
));
|
|
27
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
-
|
|
29
1
|
// src/index.ts
|
|
30
|
-
|
|
31
|
-
__export(index_exports, {
|
|
32
|
-
LocalSearchIndexClient: () => LocalSearchIndexClient,
|
|
33
|
-
SearchIndexer: () => SearchIndexer,
|
|
34
|
-
TinaCMSSearchIndexClient: () => TinaCMSSearchIndexClient,
|
|
35
|
-
si: () => import_search_index2.default
|
|
36
|
-
});
|
|
37
|
-
module.exports = __toCommonJS(index_exports);
|
|
38
|
-
var import_search_index2 = __toESM(require("search-index"));
|
|
2
|
+
import createSearchIndex2 from "search-index";
|
|
39
3
|
|
|
40
4
|
// src/indexer/index.ts
|
|
41
|
-
|
|
5
|
+
import {
|
|
6
|
+
loadAndParseWithAliases,
|
|
7
|
+
sequential,
|
|
8
|
+
scanAllContent,
|
|
9
|
+
scanContentByPaths,
|
|
10
|
+
transformDocument,
|
|
11
|
+
transformDocumentIntoPayload
|
|
12
|
+
} from "@tinacms/graphql";
|
|
42
13
|
|
|
43
14
|
// src/indexer/utils.ts
|
|
44
|
-
|
|
15
|
+
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
45
17
|
var StringBuilder = class {
|
|
18
|
+
buffer = [];
|
|
19
|
+
limit;
|
|
20
|
+
length = 0;
|
|
46
21
|
constructor(limit) {
|
|
47
|
-
this.length = 0;
|
|
48
|
-
this.buffer = [];
|
|
49
22
|
this.limit = limit;
|
|
50
23
|
}
|
|
51
24
|
append(str) {
|
|
52
|
-
if (this.length + str.length > this.limit)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
this.length += str.length;
|
|
57
|
-
if (this.length > this.limit) {
|
|
58
|
-
return true;
|
|
59
|
-
}
|
|
60
|
-
return false;
|
|
61
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
62
29
|
}
|
|
63
30
|
toString() {
|
|
64
31
|
return this.buffer.join(" ");
|
|
65
32
|
}
|
|
66
33
|
};
|
|
67
|
-
var
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
77
43
|
}
|
|
78
|
-
(_b = (_a = data.children) == null ? void 0 : _a.forEach) == null ? void 0 : _b.call(
|
|
79
|
-
_a,
|
|
80
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
81
|
-
);
|
|
82
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
83
46
|
};
|
|
84
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
85
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
86
49
|
};
|
|
87
|
-
var
|
|
88
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
89
|
-
};
|
|
90
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
91
51
|
const tokens = tokenizeString(value);
|
|
92
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
93
53
|
for (const part of tokens) {
|
|
94
|
-
if (builder.append(part))
|
|
95
|
-
break;
|
|
96
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
97
55
|
}
|
|
98
56
|
return builder.toString();
|
|
99
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
100
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
101
94
|
if (!field) {
|
|
102
|
-
const
|
|
103
|
-
data["_id"] = `${collection.name}:${
|
|
104
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
105
98
|
}
|
|
106
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
107
101
|
if (!f.searchable) {
|
|
108
102
|
delete data[f.name];
|
|
109
103
|
continue;
|
|
110
104
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
);
|
|
139
|
-
} else {
|
|
140
|
-
data[f.name] = processTextFieldValue(
|
|
141
|
-
data[f.name],
|
|
142
|
-
fieldTextIndexLength
|
|
143
|
-
);
|
|
144
|
-
}
|
|
145
|
-
} else if (f.type === "rich-text") {
|
|
146
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
147
|
-
if (isList) {
|
|
148
|
-
data[f.name] = data[f.name].map((value) => {
|
|
149
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
150
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
151
|
-
return acc.toString();
|
|
152
|
-
});
|
|
153
|
-
} else {
|
|
154
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
155
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
156
|
-
data[f.name] = acc.toString();
|
|
157
|
-
}
|
|
158
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
159
132
|
}
|
|
160
133
|
}
|
|
161
134
|
return data;
|
|
162
135
|
};
|
|
163
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
164
137
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
if (memo[keys.join(",")]) {
|
|
168
|
-
return memo[keys.join(",")];
|
|
169
|
-
}
|
|
170
|
-
stopwords = [];
|
|
171
|
-
for (const key of keys) {
|
|
172
|
-
stopwords.push(...sw[key]);
|
|
173
|
-
}
|
|
174
|
-
memo[keys.join(",")] = stopwords;
|
|
138
|
+
if (!keys) {
|
|
139
|
+
return defaultStopWords;
|
|
175
140
|
}
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
144
|
+
}
|
|
145
|
+
const stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
176
147
|
return stopwords;
|
|
177
148
|
};
|
|
178
149
|
|
|
179
150
|
// src/indexer/index.ts
|
|
180
151
|
var SearchIndexer = class {
|
|
152
|
+
batchSize;
|
|
153
|
+
client;
|
|
154
|
+
bridge;
|
|
155
|
+
schema;
|
|
156
|
+
textIndexLength;
|
|
181
157
|
constructor(options) {
|
|
182
158
|
this.client = options.client;
|
|
183
159
|
this.bridge = options.bridge;
|
|
@@ -185,15 +161,33 @@ var SearchIndexer = class {
|
|
|
185
161
|
this.batchSize = options.batchSize || 100;
|
|
186
162
|
this.textIndexLength = options.textIndexLength || 500;
|
|
187
163
|
}
|
|
164
|
+
createBatchProcessor() {
|
|
165
|
+
let batch = [];
|
|
166
|
+
return {
|
|
167
|
+
callback: async (item) => {
|
|
168
|
+
batch.push(item);
|
|
169
|
+
if (batch.length >= this.batchSize) {
|
|
170
|
+
await this.client.put(batch);
|
|
171
|
+
batch = [];
|
|
172
|
+
}
|
|
173
|
+
},
|
|
174
|
+
flush: async () => {
|
|
175
|
+
if (batch.length > 0) {
|
|
176
|
+
await this.client.put(batch);
|
|
177
|
+
batch = [];
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
};
|
|
181
|
+
}
|
|
188
182
|
makeIndexerCallback(itemCallback) {
|
|
189
183
|
return async (collection, contentPaths) => {
|
|
190
184
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
191
|
-
await
|
|
192
|
-
const data = await
|
|
185
|
+
await sequential(contentPaths, async (path) => {
|
|
186
|
+
const data = await transformDocumentIntoPayload(
|
|
193
187
|
`${collection.path}/${path}`,
|
|
194
|
-
|
|
188
|
+
transformDocument(
|
|
195
189
|
path,
|
|
196
|
-
await
|
|
190
|
+
await loadAndParseWithAliases(
|
|
197
191
|
this.bridge,
|
|
198
192
|
path,
|
|
199
193
|
collection,
|
|
@@ -215,98 +209,400 @@ var SearchIndexer = class {
|
|
|
215
209
|
};
|
|
216
210
|
}
|
|
217
211
|
async indexContentByPaths(documentPaths) {
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
batch.push(item);
|
|
222
|
-
if (batch.length > this.batchSize) {
|
|
223
|
-
await this.client.put(batch);
|
|
224
|
-
batch = [];
|
|
225
|
-
}
|
|
226
|
-
};
|
|
227
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
228
|
-
await (0, import_graphql.scanContentByPaths)(
|
|
212
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
213
|
+
await this.client.onStartIndexing?.();
|
|
214
|
+
await scanContentByPaths(
|
|
229
215
|
this.schema,
|
|
230
216
|
documentPaths,
|
|
231
|
-
this.makeIndexerCallback(
|
|
217
|
+
this.makeIndexerCallback(callback)
|
|
232
218
|
);
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
}
|
|
236
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
219
|
+
await flush();
|
|
220
|
+
await this.client.onFinishIndexing?.();
|
|
237
221
|
}
|
|
238
222
|
async indexAllContent() {
|
|
239
|
-
|
|
240
|
-
await
|
|
241
|
-
|
|
242
|
-
const itemCallback = async (item) => {
|
|
243
|
-
batch.push(item);
|
|
244
|
-
if (batch.length > this.batchSize) {
|
|
245
|
-
await this.client.put(batch);
|
|
246
|
-
batch = [];
|
|
247
|
-
}
|
|
248
|
-
};
|
|
249
|
-
const warnings = await (0, import_graphql.scanAllContent)(
|
|
223
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
224
|
+
await this.client.onStartIndexing?.();
|
|
225
|
+
const warnings = await scanAllContent(
|
|
250
226
|
this.schema,
|
|
251
227
|
this.bridge,
|
|
252
|
-
this.makeIndexerCallback(
|
|
228
|
+
this.makeIndexerCallback(callback)
|
|
253
229
|
);
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
}
|
|
257
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
230
|
+
await flush();
|
|
231
|
+
await this.client.onFinishIndexing?.();
|
|
258
232
|
return { warnings };
|
|
259
233
|
}
|
|
260
234
|
async deleteIndexContent(documentPaths) {
|
|
261
|
-
|
|
262
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
235
|
+
await this.client.onStartIndexing?.();
|
|
263
236
|
await this.client.del(documentPaths);
|
|
264
|
-
await
|
|
237
|
+
await this.client.onFinishIndexing?.();
|
|
265
238
|
}
|
|
266
239
|
};
|
|
267
240
|
|
|
268
241
|
// src/client/index.ts
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
242
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
243
|
+
import createSearchIndex from "search-index";
|
|
244
|
+
import { MemoryLevel } from "memory-level";
|
|
245
|
+
|
|
246
|
+
// src/fuzzy/types.ts
|
|
247
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
248
|
+
maxDistance: 2,
|
|
249
|
+
minSimilarity: 0.6,
|
|
250
|
+
maxResults: 10,
|
|
251
|
+
useTranspositions: true,
|
|
252
|
+
caseSensitive: false,
|
|
253
|
+
useNgramFilter: true,
|
|
254
|
+
ngramSize: 2,
|
|
255
|
+
minNgramOverlap: 0.2
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
// src/fuzzy/cache.ts
|
|
259
|
+
var FuzzyCache = class {
|
|
260
|
+
cache;
|
|
261
|
+
maxSize;
|
|
262
|
+
constructor(maxSize = 100) {
|
|
263
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
264
|
+
this.maxSize = maxSize;
|
|
265
|
+
}
|
|
266
|
+
getCacheKey(query, options) {
|
|
267
|
+
return JSON.stringify({ query, options });
|
|
268
|
+
}
|
|
269
|
+
get(query, options) {
|
|
270
|
+
const key = this.getCacheKey(query, options);
|
|
271
|
+
const value = this.cache.get(key);
|
|
272
|
+
if (value) {
|
|
273
|
+
this.cache.delete(key);
|
|
274
|
+
this.cache.set(key, value);
|
|
275
|
+
}
|
|
276
|
+
return value;
|
|
277
|
+
}
|
|
278
|
+
set(query, options, results) {
|
|
279
|
+
const key = this.getCacheKey(query, options);
|
|
280
|
+
if (this.cache.size >= this.maxSize) {
|
|
281
|
+
const firstKey = this.cache.keys().next().value;
|
|
282
|
+
this.cache.delete(firstKey);
|
|
283
|
+
}
|
|
284
|
+
this.cache.set(key, results);
|
|
285
|
+
}
|
|
286
|
+
clear() {
|
|
287
|
+
this.cache.clear();
|
|
288
|
+
}
|
|
289
|
+
get size() {
|
|
290
|
+
return this.cache.size;
|
|
291
|
+
}
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
// src/fuzzy/distance.ts
|
|
295
|
+
var PREFIX_MATCH_MIN_SIMILARITY = 0.8;
|
|
296
|
+
function levenshteinDistance(str1, str2) {
|
|
297
|
+
const len1 = str1.length;
|
|
298
|
+
const len2 = str2.length;
|
|
299
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
300
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
301
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
302
|
+
for (let i = 1; i <= len1; i++) {
|
|
303
|
+
for (let j = 1; j <= len2; j++) {
|
|
304
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
305
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
306
|
+
} else {
|
|
307
|
+
dp[i][j] = Math.min(
|
|
308
|
+
dp[i - 1][j] + 1,
|
|
309
|
+
dp[i][j - 1] + 1,
|
|
310
|
+
dp[i - 1][j - 1] + 1
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return dp[len1][len2];
|
|
316
|
+
}
|
|
317
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
318
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
319
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
320
|
+
if (maxLength === 0) return 1;
|
|
321
|
+
return 1 - distance / maxLength;
|
|
322
|
+
}
|
|
323
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
324
|
+
const len1 = str1.length;
|
|
325
|
+
const len2 = str2.length;
|
|
326
|
+
const maxDist = len1 + len2;
|
|
327
|
+
const charLastPosition = {};
|
|
328
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
329
|
+
dp[0][0] = maxDist;
|
|
330
|
+
for (let i = 0; i <= len1; i++) {
|
|
331
|
+
dp[i + 1][0] = maxDist;
|
|
332
|
+
dp[i + 1][1] = i;
|
|
333
|
+
}
|
|
334
|
+
for (let j = 0; j <= len2; j++) {
|
|
335
|
+
dp[0][j + 1] = maxDist;
|
|
336
|
+
dp[1][j + 1] = j;
|
|
337
|
+
}
|
|
338
|
+
for (let i = 1; i <= len1; i++) {
|
|
339
|
+
let lastMatchingCol = 0;
|
|
340
|
+
for (let j = 1; j <= len2; j++) {
|
|
341
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
342
|
+
const lastColWithMatch = lastMatchingCol;
|
|
343
|
+
let cost = 1;
|
|
344
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
345
|
+
cost = 0;
|
|
346
|
+
lastMatchingCol = j;
|
|
347
|
+
}
|
|
348
|
+
dp[i + 1][j + 1] = Math.min(
|
|
349
|
+
dp[i][j] + cost,
|
|
350
|
+
dp[i + 1][j] + 1,
|
|
351
|
+
dp[i][j + 1] + 1,
|
|
352
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
charLastPosition[str1[i - 1]] = i;
|
|
356
|
+
}
|
|
357
|
+
return dp[len1 + 1][len2 + 1];
|
|
358
|
+
}
|
|
359
|
+
function getNgrams(str, n = 2) {
|
|
360
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
361
|
+
if (str.length < n) {
|
|
362
|
+
ngrams.add(str);
|
|
363
|
+
return ngrams;
|
|
364
|
+
}
|
|
365
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
366
|
+
ngrams.add(str.substring(i, i + n));
|
|
367
|
+
}
|
|
368
|
+
return ngrams;
|
|
369
|
+
}
|
|
370
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
371
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
372
|
+
let overlap = 0;
|
|
373
|
+
for (const ngram of ngrams1) {
|
|
374
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
375
|
+
}
|
|
376
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
377
|
+
return overlap / minSize;
|
|
378
|
+
}
|
|
379
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
380
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
381
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
382
|
+
if (normalizedQuery.length === 0) return [];
|
|
383
|
+
const matches = [];
|
|
384
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
385
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
386
|
+
for (const term of dictionary) {
|
|
387
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
388
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
389
|
+
if (queryNgrams) {
|
|
390
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
391
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
392
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
393
|
+
}
|
|
394
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
395
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
396
|
+
matches.push({
|
|
397
|
+
term,
|
|
398
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
399
|
+
similarity: Math.max(prefixSimilarity, PREFIX_MATCH_MIN_SIMILARITY)
|
|
400
|
+
});
|
|
401
|
+
continue;
|
|
402
|
+
}
|
|
403
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
404
|
+
if (distance > opts.maxDistance) continue;
|
|
405
|
+
const similarity = similarityScore(
|
|
406
|
+
normalizedQuery,
|
|
407
|
+
normalizedTerm,
|
|
408
|
+
opts.useTranspositions
|
|
409
|
+
);
|
|
410
|
+
if (similarity >= opts.minSimilarity) {
|
|
411
|
+
matches.push({ term, distance, similarity });
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
matches.sort((a, b) => {
|
|
415
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
416
|
+
return a.distance - b.distance;
|
|
417
|
+
}
|
|
418
|
+
return b.similarity - a.similarity;
|
|
419
|
+
});
|
|
420
|
+
return matches.slice(0, opts.maxResults);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// src/pagination.ts
|
|
424
|
+
function buildPageOptions(options) {
|
|
425
|
+
if (!options.limit) return {};
|
|
426
|
+
return {
|
|
427
|
+
PAGE: {
|
|
428
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
429
|
+
SIZE: options.limit
|
|
430
|
+
}
|
|
431
|
+
};
|
|
432
|
+
}
|
|
433
|
+
function buildPaginationCursors(total, options) {
|
|
434
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
435
|
+
const pageSize = options.limit;
|
|
436
|
+
const hasPreviousPage = currentPage > 0;
|
|
437
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
438
|
+
return {
|
|
439
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
440
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
441
|
+
};
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// src/fuzzy-search-wrapper.ts
|
|
445
|
+
var FuzzySearchWrapper = class {
|
|
446
|
+
cache;
|
|
447
|
+
searchIndex;
|
|
448
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
449
|
+
this.searchIndex = searchIndex;
|
|
450
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
451
|
+
}
|
|
452
|
+
async getDictionary(field) {
|
|
453
|
+
const token = field ? { FIELD: field } : void 0;
|
|
454
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
455
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
456
|
+
}
|
|
457
|
+
async findSimilar(query, field, options = {}) {
|
|
458
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
459
|
+
const cached = this.cache.get(cacheKey, options);
|
|
460
|
+
if (cached) return cached;
|
|
461
|
+
const dictionary = await this.getDictionary(field);
|
|
462
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
463
|
+
this.cache.set(cacheKey, options, matches);
|
|
464
|
+
return matches;
|
|
465
|
+
}
|
|
466
|
+
async expandQuery(query, options = {}) {
|
|
467
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
468
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
469
|
+
const expanded = [];
|
|
470
|
+
const matches = {};
|
|
471
|
+
for (const term of terms) {
|
|
472
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
473
|
+
expanded.push(term);
|
|
474
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
475
|
+
expanded.push(...similarValues);
|
|
476
|
+
if (similarTerms.length > 0) {
|
|
477
|
+
matches[term] = similarTerms;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
original: terms,
|
|
482
|
+
expanded: Array.from(new Set(expanded)),
|
|
483
|
+
matches
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
async query(query, options = {}) {
|
|
487
|
+
const pageOptions = buildPageOptions(options);
|
|
488
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
489
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
490
|
+
const results2 = await this.searchIndex.QUERY(
|
|
491
|
+
{ AND: expansion.original },
|
|
492
|
+
pageOptions
|
|
493
|
+
);
|
|
494
|
+
const pagination2 = buildPaginationCursors(
|
|
495
|
+
results2.RESULT_LENGTH || 0,
|
|
496
|
+
options
|
|
497
|
+
);
|
|
498
|
+
return {
|
|
499
|
+
results: results2.RESULT || [],
|
|
500
|
+
total: results2.RESULT_LENGTH || 0,
|
|
501
|
+
...pagination2,
|
|
502
|
+
fuzzyMatches: expansion.matches
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
506
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
507
|
+
return [originalTerm, ...similarTerms];
|
|
508
|
+
});
|
|
509
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
510
|
+
AND: queryGroups.map(
|
|
511
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
512
|
+
)
|
|
513
|
+
};
|
|
514
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
515
|
+
const pagination = buildPaginationCursors(
|
|
516
|
+
results.RESULT_LENGTH || 0,
|
|
517
|
+
options
|
|
518
|
+
);
|
|
519
|
+
return {
|
|
520
|
+
results: results.RESULT || [],
|
|
521
|
+
total: results.RESULT_LENGTH || 0,
|
|
522
|
+
...pagination,
|
|
523
|
+
fuzzyMatches: expansion.matches
|
|
524
|
+
};
|
|
525
|
+
}
|
|
526
|
+
clearCache() {
|
|
527
|
+
this.cache.clear();
|
|
528
|
+
}
|
|
529
|
+
getCacheSize() {
|
|
530
|
+
return this.cache.size;
|
|
531
|
+
}
|
|
532
|
+
};
|
|
533
|
+
|
|
534
|
+
// src/client/index.ts
|
|
535
|
+
import * as zlib from "node:zlib";
|
|
536
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
273
537
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
274
538
|
var LocalSearchIndexClient = class {
|
|
539
|
+
searchIndex;
|
|
540
|
+
memoryLevel;
|
|
541
|
+
stopwords;
|
|
542
|
+
tokenSplitRegex;
|
|
543
|
+
fuzzySearchWrapper;
|
|
275
544
|
constructor(options) {
|
|
276
|
-
this.memoryLevel = new
|
|
545
|
+
this.memoryLevel = new MemoryLevel();
|
|
277
546
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
278
547
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
279
548
|
}
|
|
280
549
|
async onStartIndexing() {
|
|
281
|
-
|
|
282
|
-
// @ts-ignore
|
|
550
|
+
const options = {
|
|
283
551
|
db: this.memoryLevel,
|
|
284
552
|
stopwords: this.stopwords,
|
|
285
553
|
tokenSplitRegex: this.tokenSplitRegex
|
|
286
|
-
}
|
|
554
|
+
};
|
|
555
|
+
this.searchIndex = await createSearchIndex(
|
|
556
|
+
options
|
|
557
|
+
);
|
|
558
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
287
559
|
}
|
|
288
560
|
async put(docs) {
|
|
289
561
|
if (!this.searchIndex) {
|
|
290
562
|
throw new Error("onStartIndexing must be called first");
|
|
291
563
|
}
|
|
292
|
-
|
|
564
|
+
await this.searchIndex.PUT(docs);
|
|
293
565
|
}
|
|
294
566
|
async del(ids) {
|
|
295
567
|
if (!this.searchIndex) {
|
|
296
568
|
throw new Error("onStartIndexing must be called first");
|
|
297
569
|
}
|
|
298
|
-
|
|
299
|
-
}
|
|
300
|
-
query(query, options) {
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
570
|
+
await this.searchIndex.DELETE(ids);
|
|
571
|
+
}
|
|
572
|
+
async query(query, options) {
|
|
573
|
+
if (!this.searchIndex) {
|
|
574
|
+
throw new Error("onStartIndexing must be called first");
|
|
575
|
+
}
|
|
576
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
577
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
578
|
+
limit: options.limit,
|
|
579
|
+
cursor: options.cursor,
|
|
580
|
+
fuzzyOptions: options.fuzzyOptions
|
|
581
|
+
});
|
|
582
|
+
}
|
|
583
|
+
const searchIndexOptions = buildPageOptions({
|
|
584
|
+
limit: options?.limit,
|
|
585
|
+
cursor: options?.cursor
|
|
586
|
+
});
|
|
587
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
588
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
589
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
590
|
+
queryObj,
|
|
591
|
+
searchIndexOptions
|
|
592
|
+
);
|
|
593
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
594
|
+
const pagination = buildPaginationCursors(total, {
|
|
595
|
+
limit: options?.limit,
|
|
596
|
+
cursor: options?.cursor
|
|
306
597
|
});
|
|
598
|
+
return {
|
|
599
|
+
results: searchResults.RESULT || [],
|
|
600
|
+
total,
|
|
601
|
+
...pagination
|
|
602
|
+
};
|
|
307
603
|
}
|
|
308
604
|
async export(filename) {
|
|
309
|
-
const sqliteLevel = new
|
|
605
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
310
606
|
const iterator = this.memoryLevel.iterator();
|
|
311
607
|
for await (const [key, value] of iterator) {
|
|
312
608
|
await sqliteLevel.put(key, value);
|
|
@@ -315,55 +611,73 @@ var LocalSearchIndexClient = class {
|
|
|
315
611
|
}
|
|
316
612
|
};
|
|
317
613
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
614
|
+
apiUrl;
|
|
615
|
+
branch;
|
|
616
|
+
indexerToken;
|
|
318
617
|
constructor(options) {
|
|
319
618
|
super(options);
|
|
320
619
|
this.apiUrl = options.apiUrl;
|
|
321
620
|
this.branch = options.branch;
|
|
322
621
|
this.indexerToken = options.indexerToken;
|
|
323
622
|
}
|
|
324
|
-
async
|
|
623
|
+
async getUploadUrl() {
|
|
325
624
|
const headers = new Headers();
|
|
326
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
625
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
327
626
|
headers.append("Content-Type", "application/json");
|
|
328
|
-
|
|
627
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
329
628
|
method: "GET",
|
|
330
629
|
headers
|
|
331
630
|
});
|
|
332
|
-
if (
|
|
333
|
-
|
|
334
|
-
try {
|
|
335
|
-
json = await res.json();
|
|
336
|
-
} catch (e) {
|
|
337
|
-
console.error("Failed to parse error response", e);
|
|
338
|
-
}
|
|
631
|
+
if (response.status !== 200) {
|
|
632
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
339
633
|
throw new Error(
|
|
340
|
-
`Failed to get upload url. Status: ${
|
|
634
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
341
635
|
);
|
|
342
636
|
}
|
|
343
|
-
const { signedUrl } = await
|
|
344
|
-
|
|
637
|
+
const { signedUrl } = await response.json();
|
|
638
|
+
return signedUrl;
|
|
639
|
+
}
|
|
640
|
+
async serializeIndex() {
|
|
641
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
345
642
|
const iterator = this.memoryLevel.iterator();
|
|
346
643
|
for await (const [key, value] of iterator) {
|
|
347
644
|
await sqliteLevel.put(key, value);
|
|
348
645
|
}
|
|
349
646
|
const buffer = sqliteLevel.db.serialize();
|
|
350
647
|
await sqliteLevel.close();
|
|
351
|
-
|
|
648
|
+
return zlib.gzipSync(buffer);
|
|
649
|
+
}
|
|
650
|
+
async uploadIndex(signedUrl, data) {
|
|
651
|
+
const response = await fetch(signedUrl, {
|
|
352
652
|
method: "PUT",
|
|
353
|
-
body:
|
|
653
|
+
body: data
|
|
354
654
|
});
|
|
355
|
-
if (
|
|
655
|
+
if (response.status !== 200) {
|
|
656
|
+
const errorText = await response.text();
|
|
356
657
|
throw new Error(
|
|
357
|
-
`Failed to upload search index. Status: ${
|
|
358
|
-
${
|
|
658
|
+
`Failed to upload search index. Status: ${response.status}
|
|
659
|
+
${errorText}`
|
|
359
660
|
);
|
|
360
661
|
}
|
|
361
662
|
}
|
|
663
|
+
async onFinishIndexing() {
|
|
664
|
+
const signedUrl = await this.getUploadUrl();
|
|
665
|
+
const indexData = await this.serializeIndex();
|
|
666
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
667
|
+
}
|
|
362
668
|
};
|
|
363
|
-
|
|
364
|
-
|
|
669
|
+
export {
|
|
670
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
671
|
+
FuzzyCache,
|
|
672
|
+
FuzzySearchWrapper,
|
|
365
673
|
LocalSearchIndexClient,
|
|
366
674
|
SearchIndexer,
|
|
367
675
|
TinaCMSSearchIndexClient,
|
|
368
|
-
|
|
369
|
-
|
|
676
|
+
buildPageOptions,
|
|
677
|
+
buildPaginationCursors,
|
|
678
|
+
createSearchIndex2 as createSearchIndex,
|
|
679
|
+
damerauLevenshteinDistance,
|
|
680
|
+
findSimilarTerms,
|
|
681
|
+
levenshteinDistance,
|
|
682
|
+
similarityScore
|
|
683
|
+
};
|