@tinacms/search 0.0.0-ee8d9a3-20250429131017 → 0.0.0-f1cec43-20251216232909
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -0
- package/dist/client/index.d.ts +28 -13
- package/dist/fuzzy/cache.d.ts +11 -0
- package/dist/fuzzy/distance.d.ts +15 -0
- package/dist/fuzzy/index.d.ts +4 -0
- package/dist/fuzzy/types.d.ts +19 -0
- package/dist/fuzzy-search-wrapper.d.ts +46 -0
- package/dist/index-client.d.ts +30 -13
- package/dist/index-client.js +189 -206
- package/dist/index.d.ts +6 -1
- package/dist/index.js +545 -214
- package/dist/indexer/index.d.ts +1 -0
- package/dist/indexer/utils.d.ts +1 -1
- package/dist/pagination.d.ts +22 -0
- package/dist/types.d.ts +30 -11
- package/package.json +13 -14
- package/dist/index-client.js.map +0 -1
- package/dist/index-client.mjs +0 -196
- package/dist/index-client.mjs.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,183 +1,160 @@
|
|
|
1
|
-
var __create = Object.create;
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
var __copyProps = (to, from, except, desc) => {
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
-
for (let key of __getOwnPropNames(from))
|
|
14
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
-
mod
|
|
26
|
-
));
|
|
27
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
-
|
|
29
1
|
// src/index.ts
|
|
30
|
-
|
|
31
|
-
__export(index_exports, {
|
|
32
|
-
LocalSearchIndexClient: () => LocalSearchIndexClient,
|
|
33
|
-
SearchIndexer: () => SearchIndexer,
|
|
34
|
-
TinaCMSSearchIndexClient: () => TinaCMSSearchIndexClient,
|
|
35
|
-
si: () => import_search_index2.default
|
|
36
|
-
});
|
|
37
|
-
module.exports = __toCommonJS(index_exports);
|
|
38
|
-
var import_search_index2 = __toESM(require("search-index"));
|
|
2
|
+
import si2 from "search-index";
|
|
39
3
|
|
|
40
4
|
// src/indexer/index.ts
|
|
41
|
-
|
|
5
|
+
import {
|
|
6
|
+
loadAndParseWithAliases,
|
|
7
|
+
sequential,
|
|
8
|
+
scanAllContent,
|
|
9
|
+
scanContentByPaths,
|
|
10
|
+
transformDocument,
|
|
11
|
+
transformDocumentIntoPayload
|
|
12
|
+
} from "@tinacms/graphql";
|
|
42
13
|
|
|
43
14
|
// src/indexer/utils.ts
|
|
44
|
-
|
|
15
|
+
import * as sw from "stopword";
|
|
16
|
+
var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
|
|
45
17
|
var StringBuilder = class {
|
|
18
|
+
buffer = [];
|
|
19
|
+
limit;
|
|
20
|
+
length = 0;
|
|
46
21
|
constructor(limit) {
|
|
47
|
-
this.length = 0;
|
|
48
|
-
this.buffer = [];
|
|
49
22
|
this.limit = limit;
|
|
50
23
|
}
|
|
51
24
|
append(str) {
|
|
52
|
-
if (this.length + str.length > this.limit)
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
this.length += str.length;
|
|
57
|
-
if (this.length > this.limit) {
|
|
58
|
-
return true;
|
|
59
|
-
}
|
|
60
|
-
return false;
|
|
61
|
-
}
|
|
25
|
+
if (this.length + str.length > this.limit) return true;
|
|
26
|
+
this.buffer.push(str);
|
|
27
|
+
this.length += str.length;
|
|
28
|
+
return this.length > this.limit;
|
|
62
29
|
}
|
|
63
30
|
toString() {
|
|
64
31
|
return this.buffer.join(" ");
|
|
65
32
|
}
|
|
66
33
|
};
|
|
67
|
-
var
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
}
|
|
34
|
+
var tokenizeString = (str) => {
|
|
35
|
+
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
36
|
+
};
|
|
37
|
+
var extractText = (data, builder, nodeTypes) => {
|
|
38
|
+
if (!data) return;
|
|
39
|
+
if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
|
|
40
|
+
const tokens = tokenizeString(data.text || data.value || "");
|
|
41
|
+
for (const token of tokens) {
|
|
42
|
+
if (builder.append(token)) return;
|
|
77
43
|
}
|
|
78
|
-
(_b = (_a = data.children) == null ? void 0 : _a.forEach) == null ? void 0 : _b.call(
|
|
79
|
-
_a,
|
|
80
|
-
(child) => extractText(child, acc, indexableNodeTypes)
|
|
81
|
-
);
|
|
82
44
|
}
|
|
45
|
+
data.children?.forEach((child) => extractText(child, builder, nodeTypes));
|
|
83
46
|
};
|
|
84
|
-
var
|
|
47
|
+
var getRelativePath = (path, collection) => {
|
|
85
48
|
return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
|
|
86
49
|
};
|
|
87
|
-
var
|
|
88
|
-
return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
|
|
89
|
-
};
|
|
90
|
-
var processTextFieldValue = (value, maxLen) => {
|
|
50
|
+
var processTextField = (value, maxLength) => {
|
|
91
51
|
const tokens = tokenizeString(value);
|
|
92
|
-
const builder = new StringBuilder(
|
|
52
|
+
const builder = new StringBuilder(maxLength);
|
|
93
53
|
for (const part of tokens) {
|
|
94
|
-
if (builder.append(part))
|
|
95
|
-
break;
|
|
96
|
-
}
|
|
54
|
+
if (builder.append(part)) break;
|
|
97
55
|
}
|
|
98
56
|
return builder.toString();
|
|
99
57
|
};
|
|
58
|
+
var processRichTextField = (value, maxLength) => {
|
|
59
|
+
const builder = new StringBuilder(maxLength);
|
|
60
|
+
extractText(value, builder, INDEXABLE_NODE_TYPES);
|
|
61
|
+
return builder.toString();
|
|
62
|
+
};
|
|
63
|
+
var processObjectField = (data, path, collection, textIndexLength, field) => {
|
|
64
|
+
if (field.list) {
|
|
65
|
+
return data.map(
|
|
66
|
+
(obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return processDocumentForIndexing(
|
|
70
|
+
data,
|
|
71
|
+
path,
|
|
72
|
+
collection,
|
|
73
|
+
textIndexLength,
|
|
74
|
+
field
|
|
75
|
+
);
|
|
76
|
+
};
|
|
77
|
+
var processStringField = (data, maxLength, isList) => {
|
|
78
|
+
if (isList) {
|
|
79
|
+
return data.map(
|
|
80
|
+
(value) => processTextField(value, maxLength)
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
return processTextField(data, maxLength);
|
|
84
|
+
};
|
|
85
|
+
var processRichTextFieldData = (data, maxLength, isList) => {
|
|
86
|
+
if (isList) {
|
|
87
|
+
return data.map(
|
|
88
|
+
(value) => processRichTextField(value, maxLength)
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return processRichTextField(data, maxLength);
|
|
92
|
+
};
|
|
100
93
|
var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
|
|
101
94
|
if (!field) {
|
|
102
|
-
const
|
|
103
|
-
data["_id"] = `${collection.name}:${
|
|
104
|
-
data["_relativePath"] =
|
|
95
|
+
const relativePath = getRelativePath(path, collection);
|
|
96
|
+
data["_id"] = `${collection.name}:${relativePath}`;
|
|
97
|
+
data["_relativePath"] = relativePath;
|
|
105
98
|
}
|
|
106
|
-
|
|
99
|
+
const fields = field?.fields || collection.fields || [];
|
|
100
|
+
for (const f of fields) {
|
|
107
101
|
if (!f.searchable) {
|
|
108
102
|
delete data[f.name];
|
|
109
103
|
continue;
|
|
110
104
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
);
|
|
139
|
-
} else {
|
|
140
|
-
data[f.name] = processTextFieldValue(
|
|
141
|
-
data[f.name],
|
|
142
|
-
fieldTextIndexLength
|
|
143
|
-
);
|
|
144
|
-
}
|
|
145
|
-
} else if (f.type === "rich-text") {
|
|
146
|
-
const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
147
|
-
if (isList) {
|
|
148
|
-
data[f.name] = data[f.name].map((value) => {
|
|
149
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
150
|
-
extractText(value, acc, ["text", "code_block", "html"]);
|
|
151
|
-
return acc.toString();
|
|
152
|
-
});
|
|
153
|
-
} else {
|
|
154
|
-
const acc = new StringBuilder(fieldTextIndexLength);
|
|
155
|
-
extractText(data[f.name], acc, ["text", "code_block", "html"]);
|
|
156
|
-
data[f.name] = acc.toString();
|
|
157
|
-
}
|
|
158
|
-
}
|
|
105
|
+
if (!data[f.name]) continue;
|
|
106
|
+
const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
|
|
107
|
+
const isList = Boolean(f.list);
|
|
108
|
+
switch (f.type) {
|
|
109
|
+
case "object":
|
|
110
|
+
data[f.name] = processObjectField(
|
|
111
|
+
data[f.name],
|
|
112
|
+
path,
|
|
113
|
+
collection,
|
|
114
|
+
textIndexLength,
|
|
115
|
+
f
|
|
116
|
+
);
|
|
117
|
+
break;
|
|
118
|
+
case "string":
|
|
119
|
+
data[f.name] = processStringField(
|
|
120
|
+
data[f.name],
|
|
121
|
+
fieldMaxLength,
|
|
122
|
+
isList
|
|
123
|
+
);
|
|
124
|
+
break;
|
|
125
|
+
case "rich-text":
|
|
126
|
+
data[f.name] = processRichTextFieldData(
|
|
127
|
+
data[f.name],
|
|
128
|
+
fieldMaxLength,
|
|
129
|
+
isList
|
|
130
|
+
);
|
|
131
|
+
break;
|
|
159
132
|
}
|
|
160
133
|
}
|
|
161
134
|
return data;
|
|
162
135
|
};
|
|
163
|
-
var
|
|
136
|
+
var stopwordCache = {};
|
|
137
|
+
var PRESERVED_WORDS = ["about"];
|
|
164
138
|
var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
|
|
165
139
|
let stopwords = defaultStopWords;
|
|
166
140
|
if (keys) {
|
|
167
|
-
|
|
168
|
-
|
|
141
|
+
const cacheKey = keys.join(",");
|
|
142
|
+
if (stopwordCache[cacheKey]) {
|
|
143
|
+
return stopwordCache[cacheKey];
|
|
169
144
|
}
|
|
170
|
-
stopwords = [];
|
|
171
|
-
|
|
172
|
-
stopwords.push(...sw[key]);
|
|
173
|
-
}
|
|
174
|
-
memo[keys.join(",")] = stopwords;
|
|
145
|
+
stopwords = keys.flatMap((key) => sw[key] || []);
|
|
146
|
+
stopwordCache[cacheKey] = stopwords;
|
|
175
147
|
}
|
|
176
|
-
return stopwords;
|
|
148
|
+
return stopwords.filter((word) => !PRESERVED_WORDS.includes(word));
|
|
177
149
|
};
|
|
178
150
|
|
|
179
151
|
// src/indexer/index.ts
|
|
180
152
|
var SearchIndexer = class {
|
|
153
|
+
batchSize;
|
|
154
|
+
client;
|
|
155
|
+
bridge;
|
|
156
|
+
schema;
|
|
157
|
+
textIndexLength;
|
|
181
158
|
constructor(options) {
|
|
182
159
|
this.client = options.client;
|
|
183
160
|
this.bridge = options.bridge;
|
|
@@ -185,15 +162,33 @@ var SearchIndexer = class {
|
|
|
185
162
|
this.batchSize = options.batchSize || 100;
|
|
186
163
|
this.textIndexLength = options.textIndexLength || 500;
|
|
187
164
|
}
|
|
165
|
+
createBatchProcessor() {
|
|
166
|
+
let batch = [];
|
|
167
|
+
return {
|
|
168
|
+
callback: async (item) => {
|
|
169
|
+
batch.push(item);
|
|
170
|
+
if (batch.length >= this.batchSize) {
|
|
171
|
+
await this.client.put(batch);
|
|
172
|
+
batch = [];
|
|
173
|
+
}
|
|
174
|
+
},
|
|
175
|
+
flush: async () => {
|
|
176
|
+
if (batch.length > 0) {
|
|
177
|
+
await this.client.put(batch);
|
|
178
|
+
batch = [];
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
};
|
|
182
|
+
}
|
|
188
183
|
makeIndexerCallback(itemCallback) {
|
|
189
184
|
return async (collection, contentPaths) => {
|
|
190
185
|
const templateInfo = this.schema.getTemplatesForCollectable(collection);
|
|
191
|
-
await
|
|
192
|
-
const data = await
|
|
186
|
+
await sequential(contentPaths, async (path) => {
|
|
187
|
+
const data = await transformDocumentIntoPayload(
|
|
193
188
|
`${collection.path}/${path}`,
|
|
194
|
-
|
|
189
|
+
transformDocument(
|
|
195
190
|
path,
|
|
196
|
-
await
|
|
191
|
+
await loadAndParseWithAliases(
|
|
197
192
|
this.bridge,
|
|
198
193
|
path,
|
|
199
194
|
collection,
|
|
@@ -215,98 +210,416 @@ var SearchIndexer = class {
|
|
|
215
210
|
};
|
|
216
211
|
}
|
|
217
212
|
async indexContentByPaths(documentPaths) {
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
batch.push(item);
|
|
222
|
-
if (batch.length > this.batchSize) {
|
|
223
|
-
await this.client.put(batch);
|
|
224
|
-
batch = [];
|
|
225
|
-
}
|
|
226
|
-
};
|
|
227
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
228
|
-
await (0, import_graphql.scanContentByPaths)(
|
|
213
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
214
|
+
await this.client.onStartIndexing?.();
|
|
215
|
+
await scanContentByPaths(
|
|
229
216
|
this.schema,
|
|
230
217
|
documentPaths,
|
|
231
|
-
this.makeIndexerCallback(
|
|
218
|
+
this.makeIndexerCallback(callback)
|
|
232
219
|
);
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
}
|
|
236
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
220
|
+
await flush();
|
|
221
|
+
await this.client.onFinishIndexing?.();
|
|
237
222
|
}
|
|
238
223
|
async indexAllContent() {
|
|
239
|
-
|
|
240
|
-
await
|
|
241
|
-
|
|
242
|
-
const itemCallback = async (item) => {
|
|
243
|
-
batch.push(item);
|
|
244
|
-
if (batch.length > this.batchSize) {
|
|
245
|
-
await this.client.put(batch);
|
|
246
|
-
batch = [];
|
|
247
|
-
}
|
|
248
|
-
};
|
|
249
|
-
const warnings = await (0, import_graphql.scanAllContent)(
|
|
224
|
+
const { callback, flush } = this.createBatchProcessor();
|
|
225
|
+
await this.client.onStartIndexing?.();
|
|
226
|
+
const warnings = await scanAllContent(
|
|
250
227
|
this.schema,
|
|
251
228
|
this.bridge,
|
|
252
|
-
this.makeIndexerCallback(
|
|
229
|
+
this.makeIndexerCallback(callback)
|
|
253
230
|
);
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
}
|
|
257
|
-
await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
|
|
231
|
+
await flush();
|
|
232
|
+
await this.client.onFinishIndexing?.();
|
|
258
233
|
return { warnings };
|
|
259
234
|
}
|
|
260
235
|
async deleteIndexContent(documentPaths) {
|
|
261
|
-
|
|
262
|
-
await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
|
|
236
|
+
await this.client.onStartIndexing?.();
|
|
263
237
|
await this.client.del(documentPaths);
|
|
264
|
-
await
|
|
238
|
+
await this.client.onFinishIndexing?.();
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
// src/client/index.ts
|
|
243
|
+
import * as sqliteLevelModule from "sqlite-level";
|
|
244
|
+
import si from "search-index";
|
|
245
|
+
import { MemoryLevel } from "memory-level";
|
|
246
|
+
|
|
247
|
+
// src/fuzzy/types.ts
|
|
248
|
+
var DEFAULT_FUZZY_OPTIONS = {
|
|
249
|
+
maxDistance: 2,
|
|
250
|
+
minSimilarity: 0.6,
|
|
251
|
+
maxResults: 10,
|
|
252
|
+
useTranspositions: true,
|
|
253
|
+
caseSensitive: false,
|
|
254
|
+
useNgramFilter: true,
|
|
255
|
+
ngramSize: 2,
|
|
256
|
+
minNgramOverlap: 0.2
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
// src/fuzzy/cache.ts
|
|
260
|
+
var FuzzyCache = class {
|
|
261
|
+
cache;
|
|
262
|
+
maxSize;
|
|
263
|
+
constructor(maxSize = 100) {
|
|
264
|
+
this.cache = /* @__PURE__ */ new Map();
|
|
265
|
+
this.maxSize = maxSize;
|
|
266
|
+
}
|
|
267
|
+
getCacheKey(query, options) {
|
|
268
|
+
return JSON.stringify({ query, options });
|
|
269
|
+
}
|
|
270
|
+
get(query, options) {
|
|
271
|
+
const key = this.getCacheKey(query, options);
|
|
272
|
+
const value = this.cache.get(key);
|
|
273
|
+
if (value) {
|
|
274
|
+
this.cache.delete(key);
|
|
275
|
+
this.cache.set(key, value);
|
|
276
|
+
}
|
|
277
|
+
return value;
|
|
278
|
+
}
|
|
279
|
+
set(query, options, results) {
|
|
280
|
+
const key = this.getCacheKey(query, options);
|
|
281
|
+
if (this.cache.size >= this.maxSize) {
|
|
282
|
+
const firstKey = this.cache.keys().next().value;
|
|
283
|
+
this.cache.delete(firstKey);
|
|
284
|
+
}
|
|
285
|
+
this.cache.set(key, results);
|
|
286
|
+
}
|
|
287
|
+
clear() {
|
|
288
|
+
this.cache.clear();
|
|
289
|
+
}
|
|
290
|
+
get size() {
|
|
291
|
+
return this.cache.size;
|
|
292
|
+
}
|
|
293
|
+
};
|
|
294
|
+
|
|
295
|
+
// src/fuzzy/distance.ts
|
|
296
|
+
function levenshteinDistance(str1, str2) {
|
|
297
|
+
const len1 = str1.length;
|
|
298
|
+
const len2 = str2.length;
|
|
299
|
+
const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
|
|
300
|
+
for (let i = 0; i <= len1; i++) dp[i][0] = i;
|
|
301
|
+
for (let j = 0; j <= len2; j++) dp[0][j] = j;
|
|
302
|
+
for (let i = 1; i <= len1; i++) {
|
|
303
|
+
for (let j = 1; j <= len2; j++) {
|
|
304
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
305
|
+
dp[i][j] = dp[i - 1][j - 1];
|
|
306
|
+
} else {
|
|
307
|
+
dp[i][j] = Math.min(
|
|
308
|
+
dp[i - 1][j] + 1,
|
|
309
|
+
dp[i][j - 1] + 1,
|
|
310
|
+
dp[i - 1][j - 1] + 1
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
return dp[len1][len2];
|
|
316
|
+
}
|
|
317
|
+
function similarityScore(str1, str2, useTranspositions = false) {
|
|
318
|
+
const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
|
|
319
|
+
const maxLength = Math.max(str1.length, str2.length);
|
|
320
|
+
if (maxLength === 0) return 1;
|
|
321
|
+
return 1 - distance / maxLength;
|
|
322
|
+
}
|
|
323
|
+
function damerauLevenshteinDistance(str1, str2) {
|
|
324
|
+
const len1 = str1.length;
|
|
325
|
+
const len2 = str2.length;
|
|
326
|
+
const maxDist = len1 + len2;
|
|
327
|
+
const charLastPosition = {};
|
|
328
|
+
const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
|
|
329
|
+
dp[0][0] = maxDist;
|
|
330
|
+
for (let i = 0; i <= len1; i++) {
|
|
331
|
+
dp[i + 1][0] = maxDist;
|
|
332
|
+
dp[i + 1][1] = i;
|
|
333
|
+
}
|
|
334
|
+
for (let j = 0; j <= len2; j++) {
|
|
335
|
+
dp[0][j + 1] = maxDist;
|
|
336
|
+
dp[1][j + 1] = j;
|
|
337
|
+
}
|
|
338
|
+
for (let i = 1; i <= len1; i++) {
|
|
339
|
+
let lastMatchingCol = 0;
|
|
340
|
+
for (let j = 1; j <= len2; j++) {
|
|
341
|
+
const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
|
|
342
|
+
const lastColWithMatch = lastMatchingCol;
|
|
343
|
+
let cost = 1;
|
|
344
|
+
if (str1[i - 1] === str2[j - 1]) {
|
|
345
|
+
cost = 0;
|
|
346
|
+
lastMatchingCol = j;
|
|
347
|
+
}
|
|
348
|
+
dp[i + 1][j + 1] = Math.min(
|
|
349
|
+
dp[i][j] + cost,
|
|
350
|
+
dp[i + 1][j] + 1,
|
|
351
|
+
dp[i][j + 1] + 1,
|
|
352
|
+
dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
|
|
353
|
+
);
|
|
354
|
+
}
|
|
355
|
+
charLastPosition[str1[i - 1]] = i;
|
|
356
|
+
}
|
|
357
|
+
return dp[len1 + 1][len2 + 1];
|
|
358
|
+
}
|
|
359
|
+
function getNgrams(str, n = 2) {
|
|
360
|
+
const ngrams = /* @__PURE__ */ new Set();
|
|
361
|
+
if (str.length < n) {
|
|
362
|
+
ngrams.add(str);
|
|
363
|
+
return ngrams;
|
|
364
|
+
}
|
|
365
|
+
for (let i = 0; i <= str.length - n; i++) {
|
|
366
|
+
ngrams.add(str.substring(i, i + n));
|
|
367
|
+
}
|
|
368
|
+
return ngrams;
|
|
369
|
+
}
|
|
370
|
+
function ngramOverlap(ngrams1, ngrams2) {
|
|
371
|
+
if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
|
|
372
|
+
let overlap = 0;
|
|
373
|
+
for (const ngram of ngrams1) {
|
|
374
|
+
if (ngrams2.has(ngram)) overlap++;
|
|
375
|
+
}
|
|
376
|
+
const minSize = Math.min(ngrams1.size, ngrams2.size);
|
|
377
|
+
return overlap / minSize;
|
|
378
|
+
}
|
|
379
|
+
function findSimilarTerms(query, dictionary, options = {}) {
|
|
380
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
381
|
+
const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
|
|
382
|
+
if (normalizedQuery.length === 0) return [];
|
|
383
|
+
const matches = [];
|
|
384
|
+
const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
|
|
385
|
+
const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
|
|
386
|
+
for (const term of dictionary) {
|
|
387
|
+
if (typeof term !== "string" || term.length === 0) continue;
|
|
388
|
+
const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
|
|
389
|
+
if (queryNgrams) {
|
|
390
|
+
const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
|
|
391
|
+
const overlap = ngramOverlap(queryNgrams, termNgrams);
|
|
392
|
+
if (overlap < opts.minNgramOverlap) continue;
|
|
393
|
+
}
|
|
394
|
+
if (normalizedTerm.startsWith(normalizedQuery)) {
|
|
395
|
+
const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
|
|
396
|
+
matches.push({
|
|
397
|
+
term,
|
|
398
|
+
distance: normalizedTerm.length - normalizedQuery.length,
|
|
399
|
+
similarity: Math.max(prefixSimilarity, 0.8)
|
|
400
|
+
// Prefix matches get at least 0.8 similarity
|
|
401
|
+
});
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
const distance = distanceFunc(normalizedQuery, normalizedTerm);
|
|
405
|
+
if (distance > opts.maxDistance) continue;
|
|
406
|
+
const similarity = similarityScore(
|
|
407
|
+
normalizedQuery,
|
|
408
|
+
normalizedTerm,
|
|
409
|
+
opts.useTranspositions
|
|
410
|
+
);
|
|
411
|
+
if (similarity >= opts.minSimilarity) {
|
|
412
|
+
matches.push({ term, distance, similarity });
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
matches.sort((a, b) => {
|
|
416
|
+
if (Math.abs(a.similarity - b.similarity) < 1e-3) {
|
|
417
|
+
return a.distance - b.distance;
|
|
418
|
+
}
|
|
419
|
+
return b.similarity - a.similarity;
|
|
420
|
+
});
|
|
421
|
+
return matches.slice(0, opts.maxResults);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// src/pagination.ts
|
|
425
|
+
function buildPageOptions(options) {
|
|
426
|
+
if (!options.limit) return {};
|
|
427
|
+
return {
|
|
428
|
+
PAGE: {
|
|
429
|
+
NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
|
|
430
|
+
SIZE: options.limit
|
|
431
|
+
}
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
function buildPaginationCursors(total, options) {
|
|
435
|
+
const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
|
|
436
|
+
const pageSize = options.limit;
|
|
437
|
+
const hasPreviousPage = currentPage > 0;
|
|
438
|
+
const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
|
|
439
|
+
return {
|
|
440
|
+
prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
|
|
441
|
+
nextCursor: hasNextPage ? (currentPage + 1).toString() : null
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// src/fuzzy-search-wrapper.ts
|
|
446
|
+
var FuzzySearchWrapper = class {
|
|
447
|
+
cache;
|
|
448
|
+
searchIndex;
|
|
449
|
+
constructor(searchIndex, cacheSize = 100) {
|
|
450
|
+
this.searchIndex = searchIndex;
|
|
451
|
+
this.cache = new FuzzyCache(cacheSize);
|
|
452
|
+
}
|
|
453
|
+
async getDictionary(field) {
|
|
454
|
+
const token = field ? { FIELD: field } : void 0;
|
|
455
|
+
const dictionary = await this.searchIndex.DICTIONARY(token);
|
|
456
|
+
return dictionary.filter((entry) => typeof entry === "string");
|
|
457
|
+
}
|
|
458
|
+
async findSimilar(query, field, options = {}) {
|
|
459
|
+
const cacheKey = `${query}:${field || "all"}`;
|
|
460
|
+
const cached = this.cache.get(cacheKey, options);
|
|
461
|
+
if (cached) return cached;
|
|
462
|
+
const dictionary = await this.getDictionary(field);
|
|
463
|
+
const matches = findSimilarTerms(query, dictionary, options);
|
|
464
|
+
this.cache.set(cacheKey, options, matches);
|
|
465
|
+
return matches;
|
|
466
|
+
}
|
|
467
|
+
async expandQuery(query, options = {}) {
|
|
468
|
+
const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
|
|
469
|
+
const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
|
|
470
|
+
const expanded = [];
|
|
471
|
+
const matches = {};
|
|
472
|
+
for (const term of terms) {
|
|
473
|
+
const similarTerms = await this.findSimilar(term, void 0, opts);
|
|
474
|
+
expanded.push(term);
|
|
475
|
+
const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
|
|
476
|
+
expanded.push(...similarValues);
|
|
477
|
+
if (similarTerms.length > 0) {
|
|
478
|
+
matches[term] = similarTerms;
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
return {
|
|
482
|
+
original: terms,
|
|
483
|
+
expanded: Array.from(new Set(expanded)),
|
|
484
|
+
matches
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
async query(query, options = {}) {
|
|
488
|
+
const pageOptions = buildPageOptions(options);
|
|
489
|
+
if (!options.fuzzy) {
|
|
490
|
+
const results2 = await this.searchIndex.QUERY(
|
|
491
|
+
{ AND: query.split(" ").filter((t) => t) },
|
|
492
|
+
pageOptions
|
|
493
|
+
);
|
|
494
|
+
const pagination2 = buildPaginationCursors(
|
|
495
|
+
results2.RESULT_LENGTH || 0,
|
|
496
|
+
options
|
|
497
|
+
);
|
|
498
|
+
return {
|
|
499
|
+
results: results2.RESULT || [],
|
|
500
|
+
total: results2.RESULT_LENGTH || 0,
|
|
501
|
+
...pagination2
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
const expansion = await this.expandQuery(query, options.fuzzyOptions);
|
|
505
|
+
if (expansion.expanded.length === expansion.original.length) {
|
|
506
|
+
const results2 = await this.searchIndex.QUERY(
|
|
507
|
+
{ AND: expansion.original },
|
|
508
|
+
pageOptions
|
|
509
|
+
);
|
|
510
|
+
const pagination2 = buildPaginationCursors(
|
|
511
|
+
results2.RESULT_LENGTH || 0,
|
|
512
|
+
options
|
|
513
|
+
);
|
|
514
|
+
return {
|
|
515
|
+
results: results2.RESULT || [],
|
|
516
|
+
total: results2.RESULT_LENGTH || 0,
|
|
517
|
+
...pagination2,
|
|
518
|
+
fuzzyMatches: expansion.matches
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
const queryGroups = expansion.original.map((originalTerm) => {
|
|
522
|
+
const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
|
|
523
|
+
return [originalTerm, ...similarTerms];
|
|
524
|
+
});
|
|
525
|
+
const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
|
|
526
|
+
AND: queryGroups.map(
|
|
527
|
+
(group) => group.length === 1 ? group[0] : { OR: group }
|
|
528
|
+
)
|
|
529
|
+
};
|
|
530
|
+
const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
|
|
531
|
+
const pagination = buildPaginationCursors(
|
|
532
|
+
results.RESULT_LENGTH || 0,
|
|
533
|
+
options
|
|
534
|
+
);
|
|
535
|
+
return {
|
|
536
|
+
results: results.RESULT || [],
|
|
537
|
+
total: results.RESULT_LENGTH || 0,
|
|
538
|
+
...pagination,
|
|
539
|
+
fuzzyMatches: expansion.matches
|
|
540
|
+
};
|
|
541
|
+
}
|
|
542
|
+
clearCache() {
|
|
543
|
+
this.cache.clear();
|
|
544
|
+
}
|
|
545
|
+
getCacheSize() {
|
|
546
|
+
return this.cache.size;
|
|
265
547
|
}
|
|
266
548
|
};
|
|
267
549
|
|
|
268
550
|
// src/client/index.ts
|
|
269
|
-
|
|
270
|
-
var
|
|
271
|
-
var import_memory_level = require("memory-level");
|
|
272
|
-
var zlib = __toESM(require("zlib"));
|
|
551
|
+
import * as zlib from "node:zlib";
|
|
552
|
+
var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
|
|
273
553
|
var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
|
|
274
554
|
var LocalSearchIndexClient = class {
|
|
555
|
+
searchIndex;
|
|
556
|
+
memoryLevel;
|
|
557
|
+
stopwords;
|
|
558
|
+
tokenSplitRegex;
|
|
559
|
+
fuzzySearchWrapper;
|
|
275
560
|
constructor(options) {
|
|
276
|
-
this.memoryLevel = new
|
|
561
|
+
this.memoryLevel = new MemoryLevel();
|
|
277
562
|
this.stopwords = lookupStopwords(options.stopwordLanguages);
|
|
278
563
|
this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
|
|
279
564
|
}
|
|
280
565
|
async onStartIndexing() {
|
|
281
|
-
|
|
282
|
-
// @ts-ignore
|
|
566
|
+
const options = {
|
|
283
567
|
db: this.memoryLevel,
|
|
284
568
|
stopwords: this.stopwords,
|
|
285
569
|
tokenSplitRegex: this.tokenSplitRegex
|
|
286
|
-
}
|
|
570
|
+
};
|
|
571
|
+
this.searchIndex = await si(
|
|
572
|
+
options
|
|
573
|
+
);
|
|
574
|
+
this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
|
|
287
575
|
}
|
|
288
576
|
async put(docs) {
|
|
289
577
|
if (!this.searchIndex) {
|
|
290
578
|
throw new Error("onStartIndexing must be called first");
|
|
291
579
|
}
|
|
292
|
-
|
|
580
|
+
await this.searchIndex.PUT(docs);
|
|
293
581
|
}
|
|
294
582
|
async del(ids) {
|
|
295
583
|
if (!this.searchIndex) {
|
|
296
584
|
throw new Error("onStartIndexing must be called first");
|
|
297
585
|
}
|
|
298
|
-
|
|
299
|
-
}
|
|
300
|
-
query(query, options) {
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
586
|
+
await this.searchIndex.DELETE(ids);
|
|
587
|
+
}
|
|
588
|
+
async query(query, options) {
|
|
589
|
+
if (!this.searchIndex) {
|
|
590
|
+
throw new Error("onStartIndexing must be called first");
|
|
591
|
+
}
|
|
592
|
+
if (options?.fuzzy && this.fuzzySearchWrapper) {
|
|
593
|
+
return this.fuzzySearchWrapper.query(query, {
|
|
594
|
+
limit: options.limit,
|
|
595
|
+
cursor: options.cursor,
|
|
596
|
+
fuzzy: true,
|
|
597
|
+
fuzzyOptions: options.fuzzyOptions
|
|
598
|
+
});
|
|
599
|
+
}
|
|
600
|
+
const searchIndexOptions = buildPageOptions({
|
|
601
|
+
limit: options?.limit,
|
|
602
|
+
cursor: options?.cursor
|
|
603
|
+
});
|
|
604
|
+
const terms = query.split(" ").filter((t) => t.trim().length > 0);
|
|
605
|
+
const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
|
|
606
|
+
const searchResults = await this.searchIndex.QUERY(
|
|
607
|
+
queryObj,
|
|
608
|
+
searchIndexOptions
|
|
609
|
+
);
|
|
610
|
+
const total = searchResults.RESULT_LENGTH || 0;
|
|
611
|
+
const pagination = buildPaginationCursors(total, {
|
|
612
|
+
limit: options?.limit,
|
|
613
|
+
cursor: options?.cursor
|
|
306
614
|
});
|
|
615
|
+
return {
|
|
616
|
+
results: searchResults.RESULT || [],
|
|
617
|
+
total,
|
|
618
|
+
...pagination
|
|
619
|
+
};
|
|
307
620
|
}
|
|
308
621
|
async export(filename) {
|
|
309
|
-
const sqliteLevel = new
|
|
622
|
+
const sqliteLevel = new SqliteLevel2({ filename });
|
|
310
623
|
const iterator = this.memoryLevel.iterator();
|
|
311
624
|
for await (const [key, value] of iterator) {
|
|
312
625
|
await sqliteLevel.put(key, value);
|
|
@@ -315,55 +628,73 @@ var LocalSearchIndexClient = class {
|
|
|
315
628
|
}
|
|
316
629
|
};
|
|
317
630
|
var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
|
|
631
|
+
apiUrl;
|
|
632
|
+
branch;
|
|
633
|
+
indexerToken;
|
|
318
634
|
constructor(options) {
|
|
319
635
|
super(options);
|
|
320
636
|
this.apiUrl = options.apiUrl;
|
|
321
637
|
this.branch = options.branch;
|
|
322
638
|
this.indexerToken = options.indexerToken;
|
|
323
639
|
}
|
|
324
|
-
async
|
|
640
|
+
async getUploadUrl() {
|
|
325
641
|
const headers = new Headers();
|
|
326
|
-
headers.append("x-api-key", this.indexerToken || "
|
|
642
|
+
headers.append("x-api-key", this.indexerToken || "");
|
|
327
643
|
headers.append("Content-Type", "application/json");
|
|
328
|
-
|
|
644
|
+
const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
|
|
329
645
|
method: "GET",
|
|
330
646
|
headers
|
|
331
647
|
});
|
|
332
|
-
if (
|
|
333
|
-
|
|
334
|
-
try {
|
|
335
|
-
json = await res.json();
|
|
336
|
-
} catch (e) {
|
|
337
|
-
console.error("Failed to parse error response", e);
|
|
338
|
-
}
|
|
648
|
+
if (response.status !== 200) {
|
|
649
|
+
const errorBody = await response.json().catch(() => ({}));
|
|
339
650
|
throw new Error(
|
|
340
|
-
`Failed to get upload url. Status: ${
|
|
651
|
+
`Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
|
|
341
652
|
);
|
|
342
653
|
}
|
|
343
|
-
const { signedUrl } = await
|
|
344
|
-
|
|
654
|
+
const { signedUrl } = await response.json();
|
|
655
|
+
return signedUrl;
|
|
656
|
+
}
|
|
657
|
+
async serializeIndex() {
|
|
658
|
+
const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
|
|
345
659
|
const iterator = this.memoryLevel.iterator();
|
|
346
660
|
for await (const [key, value] of iterator) {
|
|
347
661
|
await sqliteLevel.put(key, value);
|
|
348
662
|
}
|
|
349
663
|
const buffer = sqliteLevel.db.serialize();
|
|
350
664
|
await sqliteLevel.close();
|
|
351
|
-
|
|
665
|
+
return zlib.gzipSync(buffer);
|
|
666
|
+
}
|
|
667
|
+
async uploadIndex(signedUrl, data) {
|
|
668
|
+
const response = await fetch(signedUrl, {
|
|
352
669
|
method: "PUT",
|
|
353
|
-
body:
|
|
670
|
+
body: data
|
|
354
671
|
});
|
|
355
|
-
if (
|
|
672
|
+
if (response.status !== 200) {
|
|
673
|
+
const errorText = await response.text();
|
|
356
674
|
throw new Error(
|
|
357
|
-
`Failed to upload search index. Status: ${
|
|
358
|
-
${
|
|
675
|
+
`Failed to upload search index. Status: ${response.status}
|
|
676
|
+
${errorText}`
|
|
359
677
|
);
|
|
360
678
|
}
|
|
361
679
|
}
|
|
680
|
+
async onFinishIndexing() {
|
|
681
|
+
const signedUrl = await this.getUploadUrl();
|
|
682
|
+
const indexData = await this.serializeIndex();
|
|
683
|
+
await this.uploadIndex(signedUrl, indexData);
|
|
684
|
+
}
|
|
362
685
|
};
|
|
363
|
-
|
|
364
|
-
|
|
686
|
+
export {
|
|
687
|
+
DEFAULT_FUZZY_OPTIONS,
|
|
688
|
+
FuzzyCache,
|
|
689
|
+
FuzzySearchWrapper,
|
|
365
690
|
LocalSearchIndexClient,
|
|
366
691
|
SearchIndexer,
|
|
367
692
|
TinaCMSSearchIndexClient,
|
|
368
|
-
|
|
369
|
-
|
|
693
|
+
buildPageOptions,
|
|
694
|
+
buildPaginationCursors,
|
|
695
|
+
damerauLevenshteinDistance,
|
|
696
|
+
findSimilarTerms,
|
|
697
|
+
levenshteinDistance,
|
|
698
|
+
si2 as si,
|
|
699
|
+
similarityScore
|
|
700
|
+
};
|