@tinacms/search 0.0.0-f696c7d-20241104134240 → 0.0.0-f894432-20251221235528

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,179 +1,160 @@
1
- var __create = Object.create;
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __getProtoOf = Object.getPrototypeOf;
6
- var __hasOwnProp = Object.prototype.hasOwnProperty;
7
- var __export = (target, all) => {
8
- for (var name in all)
9
- __defProp(target, name, { get: all[name], enumerable: true });
10
- };
11
- var __copyProps = (to, from, except, desc) => {
12
- if (from && typeof from === "object" || typeof from === "function") {
13
- for (let key of __getOwnPropNames(from))
14
- if (!__hasOwnProp.call(to, key) && key !== except)
15
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
16
- }
17
- return to;
18
- };
19
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
20
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
21
- mod
22
- ));
23
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
24
-
25
1
  // src/index.ts
26
- var src_exports = {};
27
- __export(src_exports, {
28
- LocalSearchIndexClient: () => LocalSearchIndexClient,
29
- SearchIndexer: () => SearchIndexer,
30
- TinaCMSSearchIndexClient: () => TinaCMSSearchIndexClient,
31
- si: () => import_search_index2.default
32
- });
33
- module.exports = __toCommonJS(src_exports);
34
- var import_search_index2 = __toESM(require("search-index"));
2
+ import si2 from "search-index";
35
3
 
36
4
  // src/indexer/index.ts
37
- var import_graphql = require("@tinacms/graphql");
5
+ import {
6
+ loadAndParseWithAliases,
7
+ sequential,
8
+ scanAllContent,
9
+ scanContentByPaths,
10
+ transformDocument,
11
+ transformDocumentIntoPayload
12
+ } from "@tinacms/graphql";
38
13
 
39
14
  // src/indexer/utils.ts
40
- var sw = __toESM(require("stopword"));
15
+ import * as sw from "stopword";
16
+ var INDEXABLE_NODE_TYPES = ["text", "code_block", "html"];
41
17
  var StringBuilder = class {
18
+ buffer = [];
19
+ limit;
20
+ length = 0;
42
21
  constructor(limit) {
43
- this.length = 0;
44
- this.buffer = [];
45
22
  this.limit = limit;
46
23
  }
47
24
  append(str) {
48
- if (this.length + str.length > this.limit) {
49
- return true;
50
- } else {
51
- this.buffer.push(str);
52
- this.length += str.length;
53
- if (this.length > this.limit) {
54
- return true;
55
- }
56
- return false;
57
- }
25
+ if (this.length + str.length > this.limit) return true;
26
+ this.buffer.push(str);
27
+ this.length += str.length;
28
+ return this.length > this.limit;
58
29
  }
59
30
  toString() {
60
31
  return this.buffer.join(" ");
61
32
  }
62
33
  };
63
- var extractText = (data, acc, indexableNodeTypes) => {
64
- var _a, _b;
65
- if (data) {
66
- if (indexableNodeTypes.indexOf(data.type) !== -1 && (data.text || data.value)) {
67
- const tokens = tokenizeString(data.text || data.value);
68
- for (const token of tokens) {
69
- if (acc.append(token)) {
70
- return;
71
- }
72
- }
34
+ var tokenizeString = (str) => {
35
+ return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
36
+ };
37
+ var extractText = (data, builder, nodeTypes) => {
38
+ if (!data) return;
39
+ if (nodeTypes.includes(data.type ?? "") && (data.text || data.value)) {
40
+ const tokens = tokenizeString(data.text || data.value || "");
41
+ for (const token of tokens) {
42
+ if (builder.append(token)) return;
73
43
  }
74
- (_b = (_a = data.children) == null ? void 0 : _a.forEach) == null ? void 0 : _b.call(
75
- _a,
76
- (child) => extractText(child, acc, indexableNodeTypes)
77
- );
78
44
  }
45
+ data.children?.forEach((child) => extractText(child, builder, nodeTypes));
79
46
  };
80
- var relativePath = (path, collection) => {
47
+ var getRelativePath = (path, collection) => {
81
48
  return path.replace(/\\/g, "/").replace(collection.path, "").replace(/^\/|\/$/g, "");
82
49
  };
83
- var tokenizeString = (str) => {
84
- return str.split(/[\s\.,]+/).map((s) => s.toLowerCase()).filter((s) => s);
85
- };
86
- var processTextFieldValue = (value, maxLen) => {
50
+ var processTextField = (value, maxLength) => {
87
51
  const tokens = tokenizeString(value);
88
- const builder = new StringBuilder(maxLen);
52
+ const builder = new StringBuilder(maxLength);
89
53
  for (const part of tokens) {
90
- if (builder.append(part)) {
91
- break;
92
- }
54
+ if (builder.append(part)) break;
93
55
  }
94
56
  return builder.toString();
95
57
  };
58
+ var processRichTextField = (value, maxLength) => {
59
+ const builder = new StringBuilder(maxLength);
60
+ extractText(value, builder, INDEXABLE_NODE_TYPES);
61
+ return builder.toString();
62
+ };
63
+ var processObjectField = (data, path, collection, textIndexLength, field) => {
64
+ if (field.list) {
65
+ return data.map(
66
+ (obj) => processDocumentForIndexing(obj, path, collection, textIndexLength, field)
67
+ );
68
+ }
69
+ return processDocumentForIndexing(
70
+ data,
71
+ path,
72
+ collection,
73
+ textIndexLength,
74
+ field
75
+ );
76
+ };
77
+ var processStringField = (data, maxLength, isList) => {
78
+ if (isList) {
79
+ return data.map(
80
+ (value) => processTextField(value, maxLength)
81
+ );
82
+ }
83
+ return processTextField(data, maxLength);
84
+ };
85
+ var processRichTextFieldData = (data, maxLength, isList) => {
86
+ if (isList) {
87
+ return data.map(
88
+ (value) => processRichTextField(value, maxLength)
89
+ );
90
+ }
91
+ return processRichTextField(data, maxLength);
92
+ };
96
93
  var processDocumentForIndexing = (data, path, collection, textIndexLength, field) => {
97
94
  if (!field) {
98
- const relPath = relativePath(path, collection);
99
- data["_id"] = `${collection.name}:${relPath}`;
100
- data["_relativePath"] = relPath;
95
+ const relativePath = getRelativePath(path, collection);
96
+ data["_id"] = `${collection.name}:${relativePath}`;
97
+ data["_relativePath"] = relativePath;
101
98
  }
102
- for (const f of (field == null ? void 0 : field.fields) || collection.fields || []) {
99
+ const fields = field?.fields || collection.fields || [];
100
+ for (const f of fields) {
103
101
  if (!f.searchable) {
104
102
  delete data[f.name];
105
103
  continue;
106
104
  }
107
- const isList = f.list;
108
- if (data[f.name]) {
109
- if (f.type === "object") {
110
- if (isList) {
111
- data[f.name] = data[f.name].map(
112
- (obj) => processDocumentForIndexing(
113
- obj,
114
- path,
115
- collection,
116
- textIndexLength,
117
- f
118
- )
119
- );
120
- } else {
121
- data[f.name] = processDocumentForIndexing(
122
- data[f.name],
123
- path,
124
- collection,
125
- textIndexLength,
126
- f
127
- );
128
- }
129
- } else if (f.type === "string") {
130
- const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
131
- if (isList) {
132
- data[f.name] = data[f.name].map(
133
- (value) => processTextFieldValue(value, fieldTextIndexLength)
134
- );
135
- } else {
136
- data[f.name] = processTextFieldValue(
137
- data[f.name],
138
- fieldTextIndexLength
139
- );
140
- }
141
- } else if (f.type === "rich-text") {
142
- const fieldTextIndexLength = f.maxSearchIndexFieldLength || textIndexLength;
143
- if (isList) {
144
- data[f.name] = data[f.name].map((value) => {
145
- const acc = new StringBuilder(fieldTextIndexLength);
146
- extractText(value, acc, ["text", "code_block", "html"]);
147
- return acc.toString();
148
- });
149
- } else {
150
- const acc = new StringBuilder(fieldTextIndexLength);
151
- extractText(data[f.name], acc, ["text", "code_block", "html"]);
152
- data[f.name] = acc.toString();
153
- }
154
- }
105
+ if (!data[f.name]) continue;
106
+ const fieldMaxLength = f.maxSearchIndexFieldLength || textIndexLength;
107
+ const isList = Boolean(f.list);
108
+ switch (f.type) {
109
+ case "object":
110
+ data[f.name] = processObjectField(
111
+ data[f.name],
112
+ path,
113
+ collection,
114
+ textIndexLength,
115
+ f
116
+ );
117
+ break;
118
+ case "string":
119
+ data[f.name] = processStringField(
120
+ data[f.name],
121
+ fieldMaxLength,
122
+ isList
123
+ );
124
+ break;
125
+ case "rich-text":
126
+ data[f.name] = processRichTextFieldData(
127
+ data[f.name],
128
+ fieldMaxLength,
129
+ isList
130
+ );
131
+ break;
155
132
  }
156
133
  }
157
134
  return data;
158
135
  };
159
- var memo = {};
136
+ var stopwordCache = {};
137
+ var PRESERVED_WORDS = ["about"];
160
138
  var lookupStopwords = (keys, defaultStopWords = sw.eng) => {
161
139
  let stopwords = defaultStopWords;
162
140
  if (keys) {
163
- if (memo[keys.join(",")]) {
164
- return memo[keys.join(",")];
141
+ const cacheKey = keys.join(",");
142
+ if (stopwordCache[cacheKey]) {
143
+ return stopwordCache[cacheKey];
165
144
  }
166
- stopwords = [];
167
- for (const key of keys) {
168
- stopwords.push(...sw[key]);
169
- }
170
- memo[keys.join(",")] = stopwords;
145
+ stopwords = keys.flatMap((key) => sw[key] || []);
146
+ stopwordCache[cacheKey] = stopwords;
171
147
  }
172
- return stopwords;
148
+ return stopwords.filter((word) => !PRESERVED_WORDS.includes(word));
173
149
  };
174
150
 
175
151
  // src/indexer/index.ts
176
152
  var SearchIndexer = class {
153
+ batchSize;
154
+ client;
155
+ bridge;
156
+ schema;
157
+ textIndexLength;
177
158
  constructor(options) {
178
159
  this.client = options.client;
179
160
  this.bridge = options.bridge;
@@ -181,15 +162,33 @@ var SearchIndexer = class {
181
162
  this.batchSize = options.batchSize || 100;
182
163
  this.textIndexLength = options.textIndexLength || 500;
183
164
  }
165
+ createBatchProcessor() {
166
+ let batch = [];
167
+ return {
168
+ callback: async (item) => {
169
+ batch.push(item);
170
+ if (batch.length >= this.batchSize) {
171
+ await this.client.put(batch);
172
+ batch = [];
173
+ }
174
+ },
175
+ flush: async () => {
176
+ if (batch.length > 0) {
177
+ await this.client.put(batch);
178
+ batch = [];
179
+ }
180
+ }
181
+ };
182
+ }
184
183
  makeIndexerCallback(itemCallback) {
185
184
  return async (collection, contentPaths) => {
186
185
  const templateInfo = this.schema.getTemplatesForCollectable(collection);
187
- await (0, import_graphql.sequential)(contentPaths, async (path) => {
188
- const data = await (0, import_graphql.transformDocumentIntoPayload)(
186
+ await sequential(contentPaths, async (path) => {
187
+ const data = await transformDocumentIntoPayload(
189
188
  `${collection.path}/${path}`,
190
- (0, import_graphql.transformDocument)(
189
+ transformDocument(
191
190
  path,
192
- await (0, import_graphql.loadAndParseWithAliases)(
191
+ await loadAndParseWithAliases(
193
192
  this.bridge,
194
193
  path,
195
194
  collection,
@@ -211,97 +210,415 @@ var SearchIndexer = class {
211
210
  };
212
211
  }
213
212
  async indexContentByPaths(documentPaths) {
214
- var _a, _b, _c, _d;
215
- let batch = [];
216
- const itemCallback = async (item) => {
217
- batch.push(item);
218
- if (batch.length > this.batchSize) {
219
- await this.client.put(batch);
220
- batch = [];
221
- }
222
- };
223
- await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
224
- await (0, import_graphql.scanContentByPaths)(
213
+ const { callback, flush } = this.createBatchProcessor();
214
+ await this.client.onStartIndexing?.();
215
+ await scanContentByPaths(
225
216
  this.schema,
226
217
  documentPaths,
227
- this.makeIndexerCallback(itemCallback)
218
+ this.makeIndexerCallback(callback)
228
219
  );
229
- if (batch.length > 0) {
230
- await this.client.put(batch);
231
- }
232
- await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
220
+ await flush();
221
+ await this.client.onFinishIndexing?.();
233
222
  }
234
223
  async indexAllContent() {
235
- var _a, _b, _c, _d;
236
- await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
237
- let batch = [];
238
- const itemCallback = async (item) => {
239
- batch.push(item);
240
- if (batch.length > this.batchSize) {
241
- await this.client.put(batch);
242
- batch = [];
243
- }
244
- };
245
- const warnings = await (0, import_graphql.scanAllContent)(
224
+ const { callback, flush } = this.createBatchProcessor();
225
+ await this.client.onStartIndexing?.();
226
+ const warnings = await scanAllContent(
246
227
  this.schema,
247
228
  this.bridge,
248
- this.makeIndexerCallback(itemCallback)
229
+ this.makeIndexerCallback(callback)
249
230
  );
250
- if (batch.length > 0) {
251
- await this.client.put(batch);
252
- }
253
- await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
231
+ await flush();
232
+ await this.client.onFinishIndexing?.();
254
233
  return { warnings };
255
234
  }
256
235
  async deleteIndexContent(documentPaths) {
257
- var _a, _b, _c, _d;
258
- await ((_b = (_a = this.client).onStartIndexing) == null ? void 0 : _b.call(_a));
236
+ await this.client.onStartIndexing?.();
259
237
  await this.client.del(documentPaths);
260
- await ((_d = (_c = this.client).onFinishIndexing) == null ? void 0 : _d.call(_c));
238
+ await this.client.onFinishIndexing?.();
239
+ }
240
+ };
241
+
242
+ // src/client/index.ts
243
+ import * as sqliteLevelModule from "sqlite-level";
244
+ import si from "search-index";
245
+ import { MemoryLevel } from "memory-level";
246
+
247
+ // src/fuzzy/types.ts
248
+ var DEFAULT_FUZZY_OPTIONS = {
249
+ maxDistance: 2,
250
+ minSimilarity: 0.6,
251
+ maxResults: 10,
252
+ useTranspositions: true,
253
+ caseSensitive: false,
254
+ useNgramFilter: true,
255
+ ngramSize: 2,
256
+ minNgramOverlap: 0.2
257
+ };
258
+
259
+ // src/fuzzy/cache.ts
260
+ var FuzzyCache = class {
261
+ cache;
262
+ maxSize;
263
+ constructor(maxSize = 100) {
264
+ this.cache = /* @__PURE__ */ new Map();
265
+ this.maxSize = maxSize;
266
+ }
267
+ getCacheKey(query, options) {
268
+ return JSON.stringify({ query, options });
269
+ }
270
+ get(query, options) {
271
+ const key = this.getCacheKey(query, options);
272
+ const value = this.cache.get(key);
273
+ if (value) {
274
+ this.cache.delete(key);
275
+ this.cache.set(key, value);
276
+ }
277
+ return value;
278
+ }
279
+ set(query, options, results) {
280
+ const key = this.getCacheKey(query, options);
281
+ if (this.cache.size >= this.maxSize) {
282
+ const firstKey = this.cache.keys().next().value;
283
+ this.cache.delete(firstKey);
284
+ }
285
+ this.cache.set(key, results);
286
+ }
287
+ clear() {
288
+ this.cache.clear();
289
+ }
290
+ get size() {
291
+ return this.cache.size;
292
+ }
293
+ };
294
+
295
+ // src/fuzzy/distance.ts
296
+ function levenshteinDistance(str1, str2) {
297
+ const len1 = str1.length;
298
+ const len2 = str2.length;
299
+ const dp = Array(len1 + 1).fill(null).map(() => Array(len2 + 1).fill(0));
300
+ for (let i = 0; i <= len1; i++) dp[i][0] = i;
301
+ for (let j = 0; j <= len2; j++) dp[0][j] = j;
302
+ for (let i = 1; i <= len1; i++) {
303
+ for (let j = 1; j <= len2; j++) {
304
+ if (str1[i - 1] === str2[j - 1]) {
305
+ dp[i][j] = dp[i - 1][j - 1];
306
+ } else {
307
+ dp[i][j] = Math.min(
308
+ dp[i - 1][j] + 1,
309
+ dp[i][j - 1] + 1,
310
+ dp[i - 1][j - 1] + 1
311
+ );
312
+ }
313
+ }
314
+ }
315
+ return dp[len1][len2];
316
+ }
317
+ function similarityScore(str1, str2, useTranspositions = false) {
318
+ const distance = useTranspositions ? damerauLevenshteinDistance(str1, str2) : levenshteinDistance(str1, str2);
319
+ const maxLength = Math.max(str1.length, str2.length);
320
+ if (maxLength === 0) return 1;
321
+ return 1 - distance / maxLength;
322
+ }
323
+ function damerauLevenshteinDistance(str1, str2) {
324
+ const len1 = str1.length;
325
+ const len2 = str2.length;
326
+ const maxDist = len1 + len2;
327
+ const charLastPosition = {};
328
+ const dp = Array(len1 + 2).fill(null).map(() => Array(len2 + 2).fill(0));
329
+ dp[0][0] = maxDist;
330
+ for (let i = 0; i <= len1; i++) {
331
+ dp[i + 1][0] = maxDist;
332
+ dp[i + 1][1] = i;
333
+ }
334
+ for (let j = 0; j <= len2; j++) {
335
+ dp[0][j + 1] = maxDist;
336
+ dp[1][j + 1] = j;
337
+ }
338
+ for (let i = 1; i <= len1; i++) {
339
+ let lastMatchingCol = 0;
340
+ for (let j = 1; j <= len2; j++) {
341
+ const lastRowWithMatch = charLastPosition[str2[j - 1]] || 0;
342
+ const lastColWithMatch = lastMatchingCol;
343
+ let cost = 1;
344
+ if (str1[i - 1] === str2[j - 1]) {
345
+ cost = 0;
346
+ lastMatchingCol = j;
347
+ }
348
+ dp[i + 1][j + 1] = Math.min(
349
+ dp[i][j] + cost,
350
+ dp[i + 1][j] + 1,
351
+ dp[i][j + 1] + 1,
352
+ dp[lastRowWithMatch][lastColWithMatch] + (i - lastRowWithMatch - 1) + 1 + (j - lastColWithMatch - 1)
353
+ );
354
+ }
355
+ charLastPosition[str1[i - 1]] = i;
356
+ }
357
+ return dp[len1 + 1][len2 + 1];
358
+ }
359
+ function getNgrams(str, n = 2) {
360
+ const ngrams = /* @__PURE__ */ new Set();
361
+ if (str.length < n) {
362
+ ngrams.add(str);
363
+ return ngrams;
364
+ }
365
+ for (let i = 0; i <= str.length - n; i++) {
366
+ ngrams.add(str.substring(i, i + n));
367
+ }
368
+ return ngrams;
369
+ }
370
+ function ngramOverlap(ngrams1, ngrams2) {
371
+ if (ngrams1.size === 0 || ngrams2.size === 0) return 0;
372
+ let overlap = 0;
373
+ for (const ngram of ngrams1) {
374
+ if (ngrams2.has(ngram)) overlap++;
375
+ }
376
+ const minSize = Math.min(ngrams1.size, ngrams2.size);
377
+ return overlap / minSize;
378
+ }
379
+ function findSimilarTerms(query, dictionary, options = {}) {
380
+ const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
381
+ const normalizedQuery = opts.caseSensitive ? query : query.toLowerCase();
382
+ if (normalizedQuery.length === 0) return [];
383
+ const matches = [];
384
+ const distanceFunc = opts.useTranspositions ? damerauLevenshteinDistance : levenshteinDistance;
385
+ const queryNgrams = opts.useNgramFilter ? getNgrams(normalizedQuery, opts.ngramSize) : null;
386
+ for (const term of dictionary) {
387
+ if (typeof term !== "string" || term.length === 0) continue;
388
+ const normalizedTerm = opts.caseSensitive ? term : term.toLowerCase();
389
+ if (queryNgrams) {
390
+ const termNgrams = getNgrams(normalizedTerm, opts.ngramSize);
391
+ const overlap = ngramOverlap(queryNgrams, termNgrams);
392
+ if (overlap < opts.minNgramOverlap) continue;
393
+ }
394
+ if (normalizedTerm.startsWith(normalizedQuery)) {
395
+ const prefixSimilarity = normalizedQuery.length / normalizedTerm.length;
396
+ matches.push({
397
+ term,
398
+ distance: normalizedTerm.length - normalizedQuery.length,
399
+ similarity: Math.max(prefixSimilarity, 0.8)
400
+ });
401
+ continue;
402
+ }
403
+ const distance = distanceFunc(normalizedQuery, normalizedTerm);
404
+ if (distance > opts.maxDistance) continue;
405
+ const similarity = similarityScore(
406
+ normalizedQuery,
407
+ normalizedTerm,
408
+ opts.useTranspositions
409
+ );
410
+ if (similarity >= opts.minSimilarity) {
411
+ matches.push({ term, distance, similarity });
412
+ }
413
+ }
414
+ matches.sort((a, b) => {
415
+ if (Math.abs(a.similarity - b.similarity) < 1e-3) {
416
+ return a.distance - b.distance;
417
+ }
418
+ return b.similarity - a.similarity;
419
+ });
420
+ return matches.slice(0, opts.maxResults);
421
+ }
422
+
423
+ // src/pagination.ts
424
+ function buildPageOptions(options) {
425
+ if (!options.limit) return {};
426
+ return {
427
+ PAGE: {
428
+ NUMBER: options.cursor ? parseInt(options.cursor, 10) : 0,
429
+ SIZE: options.limit
430
+ }
431
+ };
432
+ }
433
+ function buildPaginationCursors(total, options) {
434
+ const currentPage = options.cursor ? parseInt(options.cursor, 10) : 0;
435
+ const pageSize = options.limit;
436
+ const hasPreviousPage = currentPage > 0;
437
+ const hasNextPage = pageSize ? total > (currentPage + 1) * pageSize : false;
438
+ return {
439
+ prevCursor: hasPreviousPage ? (currentPage - 1).toString() : null,
440
+ nextCursor: hasNextPage ? (currentPage + 1).toString() : null
441
+ };
442
+ }
443
+
444
+ // src/fuzzy-search-wrapper.ts
445
+ var FuzzySearchWrapper = class {
446
+ cache;
447
+ searchIndex;
448
+ constructor(searchIndex, cacheSize = 100) {
449
+ this.searchIndex = searchIndex;
450
+ this.cache = new FuzzyCache(cacheSize);
451
+ }
452
+ async getDictionary(field) {
453
+ const token = field ? { FIELD: field } : void 0;
454
+ const dictionary = await this.searchIndex.DICTIONARY(token);
455
+ return dictionary.filter((entry) => typeof entry === "string");
456
+ }
457
+ async findSimilar(query, field, options = {}) {
458
+ const cacheKey = `${query}:${field || "all"}`;
459
+ const cached = this.cache.get(cacheKey, options);
460
+ if (cached) return cached;
461
+ const dictionary = await this.getDictionary(field);
462
+ const matches = findSimilarTerms(query, dictionary, options);
463
+ this.cache.set(cacheKey, options, matches);
464
+ return matches;
465
+ }
466
+ async expandQuery(query, options = {}) {
467
+ const opts = { ...DEFAULT_FUZZY_OPTIONS, ...options };
468
+ const terms = query.split(" ").map((t) => t.trim()).filter((t) => t.length > 0);
469
+ const expanded = [];
470
+ const matches = {};
471
+ for (const term of terms) {
472
+ const similarTerms = await this.findSimilar(term, void 0, opts);
473
+ expanded.push(term);
474
+ const similarValues = similarTerms.filter((m) => m.term.toLowerCase() !== term.toLowerCase()).map((m) => m.term);
475
+ expanded.push(...similarValues);
476
+ if (similarTerms.length > 0) {
477
+ matches[term] = similarTerms;
478
+ }
479
+ }
480
+ return {
481
+ original: terms,
482
+ expanded: Array.from(new Set(expanded)),
483
+ matches
484
+ };
485
+ }
486
+ async query(query, options = {}) {
487
+ const pageOptions = buildPageOptions(options);
488
+ if (!options.fuzzy) {
489
+ const results2 = await this.searchIndex.QUERY(
490
+ { AND: query.split(" ").filter((t) => t) },
491
+ pageOptions
492
+ );
493
+ const pagination2 = buildPaginationCursors(
494
+ results2.RESULT_LENGTH || 0,
495
+ options
496
+ );
497
+ return {
498
+ results: results2.RESULT || [],
499
+ total: results2.RESULT_LENGTH || 0,
500
+ ...pagination2
501
+ };
502
+ }
503
+ const expansion = await this.expandQuery(query, options.fuzzyOptions);
504
+ if (expansion.expanded.length === expansion.original.length) {
505
+ const results2 = await this.searchIndex.QUERY(
506
+ { AND: expansion.original },
507
+ pageOptions
508
+ );
509
+ const pagination2 = buildPaginationCursors(
510
+ results2.RESULT_LENGTH || 0,
511
+ options
512
+ );
513
+ return {
514
+ results: results2.RESULT || [],
515
+ total: results2.RESULT_LENGTH || 0,
516
+ ...pagination2,
517
+ fuzzyMatches: expansion.matches
518
+ };
519
+ }
520
+ const queryGroups = expansion.original.map((originalTerm) => {
521
+ const similarTerms = expansion.matches[originalTerm]?.map((m) => m.term) || [];
522
+ return [originalTerm, ...similarTerms];
523
+ });
524
+ const searchQuery = queryGroups.length === 1 ? { OR: queryGroups[0] } : {
525
+ AND: queryGroups.map(
526
+ (group) => group.length === 1 ? group[0] : { OR: group }
527
+ )
528
+ };
529
+ const results = await this.searchIndex.QUERY(searchQuery, pageOptions);
530
+ const pagination = buildPaginationCursors(
531
+ results.RESULT_LENGTH || 0,
532
+ options
533
+ );
534
+ return {
535
+ results: results.RESULT || [],
536
+ total: results.RESULT_LENGTH || 0,
537
+ ...pagination,
538
+ fuzzyMatches: expansion.matches
539
+ };
540
+ }
541
+ clearCache() {
542
+ this.cache.clear();
543
+ }
544
+ getCacheSize() {
545
+ return this.cache.size;
261
546
  }
262
547
  };
263
548
 
264
549
  // src/client/index.ts
265
- var import_sqlite_level = require("sqlite-level");
266
- var import_search_index = __toESM(require("search-index"));
267
- var import_memory_level = require("memory-level");
268
- var zlib = __toESM(require("zlib"));
550
+ import * as zlib from "node:zlib";
551
+ var SqliteLevel2 = sqliteLevelModule.default?.SqliteLevel ?? sqliteLevelModule.SqliteLevel;
269
552
  var DEFAULT_TOKEN_SPLIT_REGEX = /[\p{L}\d_]+/gu;
270
553
  var LocalSearchIndexClient = class {
554
+ searchIndex;
555
+ memoryLevel;
556
+ stopwords;
557
+ tokenSplitRegex;
558
+ fuzzySearchWrapper;
271
559
  constructor(options) {
272
- this.memoryLevel = new import_memory_level.MemoryLevel();
560
+ this.memoryLevel = new MemoryLevel();
273
561
  this.stopwords = lookupStopwords(options.stopwordLanguages);
274
562
  this.tokenSplitRegex = options.tokenSplitRegex ? new RegExp(options.tokenSplitRegex, "gu") : DEFAULT_TOKEN_SPLIT_REGEX;
275
563
  }
276
564
  async onStartIndexing() {
277
- this.searchIndex = await (0, import_search_index.default)({
565
+ const options = {
278
566
  db: this.memoryLevel,
279
567
  stopwords: this.stopwords,
280
568
  tokenSplitRegex: this.tokenSplitRegex
281
- });
569
+ };
570
+ this.searchIndex = await si(
571
+ options
572
+ );
573
+ this.fuzzySearchWrapper = new FuzzySearchWrapper(this.searchIndex);
282
574
  }
283
575
  async put(docs) {
284
576
  if (!this.searchIndex) {
285
577
  throw new Error("onStartIndexing must be called first");
286
578
  }
287
- return this.searchIndex.PUT(docs);
579
+ await this.searchIndex.PUT(docs);
288
580
  }
289
581
  async del(ids) {
290
582
  if (!this.searchIndex) {
291
583
  throw new Error("onStartIndexing must be called first");
292
584
  }
293
- return this.searchIndex.DELETE(ids);
294
- }
295
- query(query, options) {
296
- return Promise.resolve({
297
- nextCursor: void 0,
298
- prevCursor: void 0,
299
- results: [],
300
- total: 0
585
+ await this.searchIndex.DELETE(ids);
586
+ }
587
+ async query(query, options) {
588
+ if (!this.searchIndex) {
589
+ throw new Error("onStartIndexing must be called first");
590
+ }
591
+ if (options?.fuzzy && this.fuzzySearchWrapper) {
592
+ return this.fuzzySearchWrapper.query(query, {
593
+ limit: options.limit,
594
+ cursor: options.cursor,
595
+ fuzzy: true,
596
+ fuzzyOptions: options.fuzzyOptions
597
+ });
598
+ }
599
+ const searchIndexOptions = buildPageOptions({
600
+ limit: options?.limit,
601
+ cursor: options?.cursor
602
+ });
603
+ const terms = query.split(" ").filter((t) => t.trim().length > 0);
604
+ const queryObj = terms.length > 1 ? { AND: terms } : { AND: [terms[0] || ""] };
605
+ const searchResults = await this.searchIndex.QUERY(
606
+ queryObj,
607
+ searchIndexOptions
608
+ );
609
+ const total = searchResults.RESULT_LENGTH || 0;
610
+ const pagination = buildPaginationCursors(total, {
611
+ limit: options?.limit,
612
+ cursor: options?.cursor
301
613
  });
614
+ return {
615
+ results: searchResults.RESULT || [],
616
+ total,
617
+ ...pagination
618
+ };
302
619
  }
303
620
  async export(filename) {
304
- const sqliteLevel = new import_sqlite_level.SqliteLevel({ filename });
621
+ const sqliteLevel = new SqliteLevel2({ filename });
305
622
  const iterator = this.memoryLevel.iterator();
306
623
  for await (const [key, value] of iterator) {
307
624
  await sqliteLevel.put(key, value);
@@ -310,55 +627,73 @@ var LocalSearchIndexClient = class {
310
627
  }
311
628
  };
312
629
  var TinaCMSSearchIndexClient = class extends LocalSearchIndexClient {
630
+ apiUrl;
631
+ branch;
632
+ indexerToken;
313
633
  constructor(options) {
314
634
  super(options);
315
635
  this.apiUrl = options.apiUrl;
316
636
  this.branch = options.branch;
317
637
  this.indexerToken = options.indexerToken;
318
638
  }
319
- async onFinishIndexing() {
639
+ async getUploadUrl() {
320
640
  const headers = new Headers();
321
- headers.append("x-api-key", this.indexerToken || "bogus");
641
+ headers.append("x-api-key", this.indexerToken || "");
322
642
  headers.append("Content-Type", "application/json");
323
- let res = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
643
+ const response = await fetch(`${this.apiUrl}/upload/${this.branch}`, {
324
644
  method: "GET",
325
645
  headers
326
646
  });
327
- if (res.status !== 200) {
328
- let json;
329
- try {
330
- json = await res.json();
331
- } catch (e) {
332
- console.error("Failed to parse error response", e);
333
- }
647
+ if (response.status !== 200) {
648
+ const errorBody = await response.json().catch(() => ({}));
334
649
  throw new Error(
335
- `Failed to get upload url. Status: ${res.status}${(json == null ? void 0 : json.message) ? ` - ${json.message}` : ``}`
650
+ `Failed to get upload url. Status: ${response.status}${errorBody?.message ? ` - ${errorBody.message}` : ""}`
336
651
  );
337
652
  }
338
- const { signedUrl } = await res.json();
339
- const sqliteLevel = new import_sqlite_level.SqliteLevel({ filename: ":memory:" });
653
+ const { signedUrl } = await response.json();
654
+ return signedUrl;
655
+ }
656
+ async serializeIndex() {
657
+ const sqliteLevel = new SqliteLevel2({ filename: ":memory:" });
340
658
  const iterator = this.memoryLevel.iterator();
341
659
  for await (const [key, value] of iterator) {
342
660
  await sqliteLevel.put(key, value);
343
661
  }
344
662
  const buffer = sqliteLevel.db.serialize();
345
663
  await sqliteLevel.close();
346
- res = await fetch(signedUrl, {
664
+ return zlib.gzipSync(buffer);
665
+ }
666
+ async uploadIndex(signedUrl, data) {
667
+ const response = await fetch(signedUrl, {
347
668
  method: "PUT",
348
- body: zlib.gzipSync(buffer)
669
+ body: data
349
670
  });
350
- if (res.status !== 200) {
671
+ if (response.status !== 200) {
672
+ const errorText = await response.text();
351
673
  throw new Error(
352
- `Failed to upload search index. Status: ${res.status}
353
- ${await res.text()}`
674
+ `Failed to upload search index. Status: ${response.status}
675
+ ${errorText}`
354
676
  );
355
677
  }
356
678
  }
679
+ async onFinishIndexing() {
680
+ const signedUrl = await this.getUploadUrl();
681
+ const indexData = await this.serializeIndex();
682
+ await this.uploadIndex(signedUrl, indexData);
683
+ }
357
684
  };
358
- // Annotate the CommonJS export names for ESM import in node:
359
- 0 && (module.exports = {
685
+ export {
686
+ DEFAULT_FUZZY_OPTIONS,
687
+ FuzzyCache,
688
+ FuzzySearchWrapper,
360
689
  LocalSearchIndexClient,
361
690
  SearchIndexer,
362
691
  TinaCMSSearchIndexClient,
363
- si
364
- });
692
+ buildPageOptions,
693
+ buildPaginationCursors,
694
+ damerauLevenshteinDistance,
695
+ findSimilarTerms,
696
+ levenshteinDistance,
697
+ si2 as si,
698
+ similarityScore
699
+ };