@86d-app/search 0.0.23 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/dist/modules/search/src/__tests__/admin-settings.test.js +262 -0
  2. package/dist/modules/search/src/__tests__/controllers.test.js +853 -0
  3. package/dist/modules/search/src/__tests__/embedding-provider.test.js +150 -0
  4. package/dist/modules/search/src/__tests__/endpoint-security.test.js +250 -0
  5. package/dist/modules/search/src/__tests__/meilisearch-provider.test.js +318 -0
  6. package/dist/modules/search/src/__tests__/service-impl.test.js +703 -0
  7. package/dist/modules/search/src/__tests__/store-endpoints.test.js +295 -0
  8. package/dist/{admin/components/index.d.ts → modules/search/src/admin/components/index.jsx} +0 -1
  9. package/dist/modules/search/src/admin/components/search-analytics.jsx +230 -0
  10. package/dist/modules/search/src/admin/endpoints/analytics.js +9 -0
  11. package/dist/modules/search/src/admin/endpoints/bulk-index.js +26 -0
  12. package/dist/modules/search/src/admin/endpoints/click-analytics.js +9 -0
  13. package/dist/modules/search/src/admin/endpoints/get-settings.js +97 -0
  14. package/dist/modules/search/src/admin/endpoints/index-manage.js +32 -0
  15. package/dist/modules/search/src/admin/endpoints/index.js +21 -0
  16. package/dist/modules/search/src/admin/endpoints/popular.js +11 -0
  17. package/dist/modules/search/src/admin/endpoints/synonyms.js +30 -0
  18. package/dist/modules/search/src/admin/endpoints/zero-results.js +11 -0
  19. package/dist/modules/search/src/embedding-provider.js +77 -0
  20. package/dist/modules/search/src/index.js +75 -0
  21. package/dist/modules/search/src/meilisearch-provider.js +138 -0
  22. package/dist/modules/search/src/schema.js +61 -0
  23. package/dist/modules/search/src/service-impl.js +770 -0
  24. package/dist/modules/search/src/service.js +1 -0
  25. package/dist/modules/search/src/store/components/_hooks.js +10 -0
  26. package/dist/modules/search/src/store/components/index.jsx +9 -0
  27. package/dist/modules/search/src/store/components/search-bar.jsx +91 -0
  28. package/dist/modules/search/src/store/components/search-page.jsx +17 -0
  29. package/dist/modules/search/src/store/components/search-results.jsx +51 -0
  30. package/dist/modules/search/src/store/endpoints/click.js +15 -0
  31. package/dist/modules/search/src/store/endpoints/index.js +12 -0
  32. package/dist/modules/search/src/store/endpoints/recent.js +18 -0
  33. package/dist/modules/search/src/store/endpoints/search.js +57 -0
  34. package/dist/modules/search/src/store/endpoints/store-search.js +33 -0
  35. package/dist/modules/search/src/store/endpoints/suggest.js +12 -0
  36. package/package.json +1 -1
  37. package/src/__tests__/admin-settings.test.ts +367 -0
  38. package/src/__tests__/store-endpoints.test.ts +392 -0
  39. package/src/admin/endpoints/get-settings.ts +77 -0
  40. package/dist/__tests__/controllers.test.d.ts +0 -2
  41. package/dist/__tests__/controllers.test.d.ts.map +0 -1
  42. package/dist/__tests__/embedding-provider.test.d.ts +0 -2
  43. package/dist/__tests__/embedding-provider.test.d.ts.map +0 -1
  44. package/dist/__tests__/endpoint-security.test.d.ts +0 -2
  45. package/dist/__tests__/endpoint-security.test.d.ts.map +0 -1
  46. package/dist/__tests__/meilisearch-provider.test.d.ts +0 -2
  47. package/dist/__tests__/meilisearch-provider.test.d.ts.map +0 -1
  48. package/dist/__tests__/service-impl.test.d.ts +0 -2
  49. package/dist/__tests__/service-impl.test.d.ts.map +0 -1
  50. package/dist/admin/components/index.d.ts.map +0 -1
  51. package/dist/admin/components/search-analytics.d.ts +0 -2
  52. package/dist/admin/components/search-analytics.d.ts.map +0 -1
  53. package/dist/admin/endpoints/analytics.d.ts +0 -15
  54. package/dist/admin/endpoints/analytics.d.ts.map +0 -1
  55. package/dist/admin/endpoints/bulk-index.d.ts +0 -20
  56. package/dist/admin/endpoints/bulk-index.d.ts.map +0 -1
  57. package/dist/admin/endpoints/click-analytics.d.ts +0 -7
  58. package/dist/admin/endpoints/click-analytics.d.ts.map +0 -1
  59. package/dist/admin/endpoints/get-settings.d.ts +0 -17
  60. package/dist/admin/endpoints/get-settings.d.ts.map +0 -1
  61. package/dist/admin/endpoints/index-manage.d.ts +0 -26
  62. package/dist/admin/endpoints/index-manage.d.ts.map +0 -1
  63. package/dist/admin/endpoints/index.d.ts +0 -125
  64. package/dist/admin/endpoints/index.d.ts.map +0 -1
  65. package/dist/admin/endpoints/popular.d.ts +0 -10
  66. package/dist/admin/endpoints/popular.d.ts.map +0 -1
  67. package/dist/admin/endpoints/synonyms.d.ts +0 -30
  68. package/dist/admin/endpoints/synonyms.d.ts.map +0 -1
  69. package/dist/admin/endpoints/zero-results.d.ts +0 -10
  70. package/dist/admin/endpoints/zero-results.d.ts.map +0 -1
  71. package/dist/embedding-provider.d.ts +0 -28
  72. package/dist/embedding-provider.d.ts.map +0 -1
  73. package/dist/index.d.ts +0 -23
  74. package/dist/index.d.ts.map +0 -1
  75. package/dist/meilisearch-provider.d.ts +0 -104
  76. package/dist/meilisearch-provider.d.ts.map +0 -1
  77. package/dist/schema.d.ts +0 -133
  78. package/dist/schema.d.ts.map +0 -1
  79. package/dist/service-impl.d.ts +0 -6
  80. package/dist/service-impl.d.ts.map +0 -1
  81. package/dist/service.d.ts +0 -127
  82. package/dist/service.d.ts.map +0 -1
  83. package/dist/store/components/_hooks.d.ts +0 -6
  84. package/dist/store/components/_hooks.d.ts.map +0 -1
  85. package/dist/store/components/index.d.ts +0 -10
  86. package/dist/store/components/index.d.ts.map +0 -1
  87. package/dist/store/components/search-bar.d.ts +0 -7
  88. package/dist/store/components/search-bar.d.ts.map +0 -1
  89. package/dist/store/components/search-page.d.ts +0 -4
  90. package/dist/store/components/search-page.d.ts.map +0 -1
  91. package/dist/store/components/search-results.d.ts +0 -9
  92. package/dist/store/components/search-results.d.ts.map +0 -1
  93. package/dist/store/endpoints/click.d.ts +0 -14
  94. package/dist/store/endpoints/click.d.ts.map +0 -1
  95. package/dist/store/endpoints/index.d.ts +0 -85
  96. package/dist/store/endpoints/index.d.ts.map +0 -1
  97. package/dist/store/endpoints/recent.d.ts +0 -15
  98. package/dist/store/endpoints/recent.d.ts.map +0 -1
  99. package/dist/store/endpoints/search.d.ts +0 -36
  100. package/dist/store/endpoints/search.d.ts.map +0 -1
  101. package/dist/store/endpoints/store-search.d.ts +0 -16
  102. package/dist/store/endpoints/store-search.d.ts.map +0 -1
  103. package/dist/store/endpoints/suggest.d.ts +0 -11
  104. package/dist/store/endpoints/suggest.d.ts.map +0 -1
@@ -0,0 +1,770 @@
1
+ import { cosineSimilarity } from "./embedding-provider";
2
+ function normalize(text) {
3
+ return text.toLowerCase().trim().replace(/\s+/g, " ");
4
+ }
5
+ function tokenize(text) {
6
+ return normalize(text)
7
+ .split(/[\s\-_/,.]+/)
8
+ .filter((t) => t.length > 0);
9
+ }
10
+ /**
11
+ * Levenshtein distance between two strings.
12
+ * Used for fuzzy matching and did-you-mean suggestions.
13
+ */
14
+ function levenshtein(a, b) {
15
+ const m = a.length;
16
+ const n = b.length;
17
+ if (m === 0)
18
+ return n;
19
+ if (n === 0)
20
+ return m;
21
+ // Use single-row optimization for space efficiency
22
+ let prev = new Array(n + 1);
23
+ let curr = new Array(n + 1);
24
+ for (let j = 0; j <= n; j++) {
25
+ prev[j] = j;
26
+ }
27
+ for (let i = 1; i <= m; i++) {
28
+ curr[0] = i;
29
+ for (let j = 1; j <= n; j++) {
30
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
31
+ curr[j] = Math.min(curr[j - 1] + 1, // insertion
32
+ prev[j] + 1, // deletion
33
+ prev[j - 1] + cost);
34
+ }
35
+ [prev, curr] = [curr, prev];
36
+ }
37
+ return prev[n];
38
+ }
39
+ /**
40
+ * Returns maximum edit distance allowed for a given word length.
41
+ * Shorter words get less tolerance to avoid noisy matches.
42
+ */
43
+ function maxEditDistance(wordLength) {
44
+ if (wordLength <= 3)
45
+ return 0;
46
+ if (wordLength <= 5)
47
+ return 1;
48
+ return 2;
49
+ }
50
+ /**
51
+ * Check if token fuzzy-matches the target within edit distance tolerance.
52
+ */
53
+ function fuzzyMatch(token, target) {
54
+ const maxDist = maxEditDistance(token.length);
55
+ if (maxDist === 0)
56
+ return token === target;
57
+ return levenshtein(token, target) <= maxDist;
58
+ }
59
+ /**
60
+ * Highlight matching segments in text by wrapping in <mark> tags.
61
+ */
62
+ function highlightText(text, queryTokens, expandedTerms) {
63
+ if (!text || queryTokens.length === 0)
64
+ return text;
65
+ const allTerms = [...queryTokens, ...expandedTerms];
66
+ let result = text;
67
+ for (const term of allTerms) {
68
+ const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
69
+ const regex = new RegExp(`(${escaped})`, "gi");
70
+ result = result.replace(regex, "<mark>$1</mark>");
71
+ }
72
+ return result;
73
+ }
74
+ function scoreMatch(item, queryTokens, expandedTerms, fuzzy) {
75
+ let score = 0;
76
+ const titleLower = normalize(item.title);
77
+ const bodyLower = item.body ? normalize(item.body) : "";
78
+ const tagLower = item.tags.map((t) => normalize(t));
79
+ const titleTokens = tokenize(item.title);
80
+ for (const token of queryTokens) {
81
+ const allTerms = [token, ...expandedTerms];
82
+ for (const term of allTerms) {
83
+ // Exact title match is highest value
84
+ if (titleLower === term) {
85
+ score += 100;
86
+ }
87
+ else if (titleLower.startsWith(term)) {
88
+ score += 50;
89
+ }
90
+ else if (titleLower.includes(term)) {
91
+ score += 25;
92
+ }
93
+ // Body match
94
+ if (bodyLower.includes(term)) {
95
+ score += 10;
96
+ }
97
+ // Tag match
98
+ for (const tag of tagLower) {
99
+ if (tag === term) {
100
+ score += 30;
101
+ }
102
+ else if (tag.includes(term)) {
103
+ score += 15;
104
+ }
105
+ }
106
+ }
107
+ // Fuzzy matching on title tokens (lower weight than exact)
108
+ if (fuzzy) {
109
+ for (const titleToken of titleTokens) {
110
+ const titleTokenNorm = normalize(titleToken);
111
+ if (!titleLower.includes(token) && fuzzyMatch(token, titleTokenNorm)) {
112
+ score += 15;
113
+ }
114
+ }
115
+ // Fuzzy on tags
116
+ for (const tag of tagLower) {
117
+ const tagTokens = tokenize(tag);
118
+ for (const tagToken of tagTokens) {
119
+ if (!tag.includes(token) && fuzzyMatch(token, tagToken)) {
120
+ score += 8;
121
+ }
122
+ }
123
+ }
124
+ // Fuzzy on body tokens
125
+ if (bodyLower && !bodyLower.includes(token)) {
126
+ const bodyTokens = tokenize(bodyLower);
127
+ for (const bodyToken of bodyTokens) {
128
+ if (fuzzyMatch(token, bodyToken)) {
129
+ score += 5;
130
+ break; // only count once per token per body
131
+ }
132
+ }
133
+ }
134
+ }
135
+ }
136
+ return score;
137
+ }
138
+ /**
139
+ * Convert MeiliSearch facetDistribution to our SearchFacets shape.
140
+ */
141
+ function meiliResultToFacets(distribution) {
142
+ if (!distribution)
143
+ return { entityTypes: [], tags: [] };
144
+ const entityTypes = Object.entries(distribution.entityType ?? {})
145
+ .map(([type, count]) => ({ type, count }))
146
+ .sort((a, b) => b.count - a.count);
147
+ const tags = Object.entries(distribution.tags ?? {})
148
+ .map(([tag, count]) => ({ tag, count }))
149
+ .sort((a, b) => b.count - a.count)
150
+ .slice(0, 20);
151
+ return { entityTypes, tags };
152
+ }
153
+ function computeFacets(items) {
154
+ const typeCounts = new Map();
155
+ const tagCounts = new Map();
156
+ for (const { item } of items) {
157
+ typeCounts.set(item.entityType, (typeCounts.get(item.entityType) ?? 0) + 1);
158
+ for (const tag of item.tags) {
159
+ const norm = normalize(tag);
160
+ tagCounts.set(norm, (tagCounts.get(norm) ?? 0) + 1);
161
+ }
162
+ }
163
+ return {
164
+ entityTypes: Array.from(typeCounts.entries())
165
+ .map(([type, count]) => ({ type, count }))
166
+ .sort((a, b) => b.count - a.count),
167
+ tags: Array.from(tagCounts.entries())
168
+ .map(([tag, count]) => ({ tag, count }))
169
+ .sort((a, b) => b.count - a.count)
170
+ .slice(0, 20),
171
+ };
172
+ }
173
+ function sortResults(results, sort) {
174
+ const sorted = [...results];
175
+ switch (sort) {
176
+ case "newest":
177
+ sorted.sort((a, b) => new Date(b.item.indexedAt).getTime() -
178
+ new Date(a.item.indexedAt).getTime());
179
+ break;
180
+ case "oldest":
181
+ sorted.sort((a, b) => new Date(a.item.indexedAt).getTime() -
182
+ new Date(b.item.indexedAt).getTime());
183
+ break;
184
+ case "title_asc":
185
+ sorted.sort((a, b) => a.item.title.localeCompare(b.item.title));
186
+ break;
187
+ case "title_desc":
188
+ sorted.sort((a, b) => b.item.title.localeCompare(a.item.title));
189
+ break;
190
+ default:
191
+ sorted.sort((a, b) => b.score - a.score);
192
+ break;
193
+ }
194
+ return sorted;
195
+ }
196
+ /**
197
+ * Find the closest known term to the query for did-you-mean suggestions.
198
+ * Checks against indexed titles and popular search terms.
199
+ */
200
+ function findDidYouMean(queryTokens, indexedTitles, popularTerms, hasResults) {
201
+ // Only suggest corrections when results are few or none
202
+ if (hasResults)
203
+ return undefined;
204
+ const candidates = new Set();
205
+ for (const title of indexedTitles) {
206
+ for (const token of tokenize(title)) {
207
+ candidates.add(token);
208
+ }
209
+ }
210
+ for (const term of popularTerms) {
211
+ for (const token of tokenize(term)) {
212
+ candidates.add(token);
213
+ }
214
+ }
215
+ const corrections = [];
216
+ for (const token of queryTokens) {
217
+ let bestMatch = token;
218
+ let bestDist = Number.POSITIVE_INFINITY;
219
+ for (const candidate of candidates) {
220
+ if (candidate === token) {
221
+ bestMatch = token;
222
+ bestDist = 0;
223
+ break;
224
+ }
225
+ const dist = levenshtein(token, candidate);
226
+ const maxDist = maxEditDistance(token.length);
227
+ if (maxDist > 0 && dist <= maxDist && dist < bestDist) {
228
+ bestDist = dist;
229
+ bestMatch = candidate;
230
+ }
231
+ }
232
+ corrections.push(bestMatch);
233
+ }
234
+ const suggestion = corrections.join(" ");
235
+ const original = queryTokens.join(" ");
236
+ if (suggestion === original)
237
+ return undefined;
238
+ return suggestion;
239
+ }
240
+ export function createSearchController(data, embeddingProvider, meiliProvider) {
241
+ /**
242
+ * Generate and store an embedding for an indexed item.
243
+ * Combines title + body + tags into a single text for embedding.
244
+ * Failures are silent — semantic search degrades gracefully.
245
+ */
246
+ async function embedItem(item) {
247
+ if (!embeddingProvider)
248
+ return item;
249
+ try {
250
+ const parts = [item.title];
251
+ if (item.body)
252
+ parts.push(item.body);
253
+ if (item.tags.length > 0)
254
+ parts.push(item.tags.join(", "));
255
+ const text = parts.join(". ");
256
+ const embedding = await embeddingProvider.generateEmbedding(text);
257
+ if (embedding) {
258
+ item.metadata = { ...item.metadata, __embedding: embedding };
259
+ }
260
+ }
261
+ catch {
262
+ // Embedding is best-effort — lexical search still works
263
+ }
264
+ return item;
265
+ }
266
+ /**
267
+ * Convert a SearchIndexItem to a MeiliSearch document.
268
+ * Strips the __embedding metadata to avoid bloating the search index.
269
+ */
270
+ function toMeiliDocument(item) {
271
+ return {
272
+ id: item.id,
273
+ entityType: item.entityType,
274
+ entityId: item.entityId,
275
+ title: item.title,
276
+ body: item.body,
277
+ tags: item.tags,
278
+ url: item.url,
279
+ image: item.image,
280
+ indexedAt: item.indexedAt.toISOString(),
281
+ };
282
+ }
283
+ /**
284
+ * Sync a document to MeiliSearch. Fire-and-forget — failures are silent
285
+ * so local search still works as a fallback.
286
+ */
287
+ function syncToMeili(items) {
288
+ if (!meiliProvider || items.length === 0)
289
+ return;
290
+ void meiliProvider.addDocuments(items.map(toMeiliDocument)).catch(() => { });
291
+ }
292
+ /**
293
+ * Remove a document from MeiliSearch by ID. Fire-and-forget.
294
+ */
295
+ function removeFromMeili(documentId) {
296
+ if (!meiliProvider)
297
+ return;
298
+ void meiliProvider.deleteDocument(documentId).catch(() => { });
299
+ }
300
+ return {
301
+ async indexItem(params) {
302
+ const existing = await data.findMany("searchIndex", {
303
+ where: {
304
+ entityType: params.entityType,
305
+ entityId: params.entityId,
306
+ },
307
+ take: 1,
308
+ });
309
+ const existingItems = existing;
310
+ const id = existingItems.length > 0 ? existingItems[0].id : crypto.randomUUID();
311
+ let item = {
312
+ id,
313
+ entityType: params.entityType,
314
+ entityId: params.entityId,
315
+ title: params.title,
316
+ body: params.body,
317
+ tags: params.tags ?? [],
318
+ url: params.url,
319
+ image: params.image,
320
+ metadata: params.metadata ?? {},
321
+ indexedAt: new Date(),
322
+ };
323
+ item = await embedItem(item);
324
+ await data.upsert("searchIndex", id, item);
325
+ syncToMeili([item]);
326
+ return item;
327
+ },
328
+ async bulkIndex(items) {
329
+ let indexed = 0;
330
+ let errors = 0;
331
+ // Batch generate embeddings for all items at once
332
+ if (embeddingProvider && items.length > 0) {
333
+ const texts = items.map((p) => {
334
+ const parts = [p.title];
335
+ if (p.body)
336
+ parts.push(p.body);
337
+ if (p.tags && p.tags.length > 0)
338
+ parts.push(p.tags.join(", "));
339
+ return parts.join(". ");
340
+ });
341
+ const embeddings = await embeddingProvider.generateEmbeddings(texts);
342
+ for (let i = 0; i < items.length; i++) {
343
+ if (embeddings[i]) {
344
+ items[i] = {
345
+ ...items[i],
346
+ metadata: {
347
+ ...items[i].metadata,
348
+ __embedding: embeddings[i],
349
+ },
350
+ };
351
+ }
352
+ }
353
+ }
354
+ const indexedItems = [];
355
+ for (const params of items) {
356
+ try {
357
+ const existing = await data.findMany("searchIndex", {
358
+ where: {
359
+ entityType: params.entityType,
360
+ entityId: params.entityId,
361
+ },
362
+ take: 1,
363
+ });
364
+ const existingItems = existing;
365
+ const id = existingItems.length > 0
366
+ ? existingItems[0].id
367
+ : crypto.randomUUID();
368
+ const item = {
369
+ id,
370
+ entityType: params.entityType,
371
+ entityId: params.entityId,
372
+ title: params.title,
373
+ body: params.body,
374
+ tags: params.tags ?? [],
375
+ url: params.url,
376
+ image: params.image,
377
+ metadata: params.metadata ?? {},
378
+ indexedAt: new Date(),
379
+ };
380
+ await data.upsert("searchIndex", id, item);
381
+ indexedItems.push(item);
382
+ indexed++;
383
+ }
384
+ catch {
385
+ errors++;
386
+ }
387
+ }
388
+ syncToMeili(indexedItems);
389
+ return { indexed, errors };
390
+ },
391
+ async removeFromIndex(entityType, entityId) {
392
+ const items = await data.findMany("searchIndex", {
393
+ where: { entityType, entityId },
394
+ });
395
+ const found = items;
396
+ if (found.length === 0)
397
+ return false;
398
+ for (const item of found) {
399
+ await data.delete("searchIndex", item.id);
400
+ removeFromMeili(item.id);
401
+ }
402
+ return true;
403
+ },
404
+ async search(query, options) {
405
+ const limit = options?.limit ?? 20;
406
+ const skip = options?.skip ?? 0;
407
+ const sort = options?.sort ?? "relevance";
408
+ const queryTokens = tokenize(query);
409
+ if (queryTokens.length === 0) {
410
+ return {
411
+ results: [],
412
+ total: 0,
413
+ facets: { entityTypes: [], tags: [] },
414
+ };
415
+ }
416
+ // ── MeiliSearch path: delegate search to dedicated engine ──
417
+ if (meiliProvider) {
418
+ try {
419
+ const meiliSort = sort === "newest"
420
+ ? ["indexedAt:desc"]
421
+ : sort === "oldest"
422
+ ? ["indexedAt:asc"]
423
+ : sort === "title_asc"
424
+ ? ["title:asc"]
425
+ : sort === "title_desc"
426
+ ? ["title:desc"]
427
+ : undefined;
428
+ const filters = [];
429
+ if (options?.entityType) {
430
+ filters.push(`entityType = "${options.entityType}"`);
431
+ }
432
+ if (options?.tags && options.tags.length > 0) {
433
+ const tagFilters = options.tags
434
+ .map((t) => `tags = "${t}"`)
435
+ .join(" OR ");
436
+ filters.push(`(${tagFilters})`);
437
+ }
438
+ const meiliResult = await meiliProvider.search(query, {
439
+ limit,
440
+ offset: skip,
441
+ filter: filters.length > 0 ? filters.join(" AND ") : undefined,
442
+ sort: meiliSort,
443
+ facets: ["entityType", "tags"],
444
+ attributesToHighlight: ["title", "body"],
445
+ highlightPreTag: "<mark>",
446
+ highlightPostTag: "</mark>",
447
+ showRankingScore: true,
448
+ });
449
+ const results = meiliResult.hits.map((hit) => ({
450
+ item: {
451
+ id: hit.id,
452
+ entityType: hit.entityType,
453
+ entityId: hit.entityId,
454
+ title: hit.title,
455
+ body: hit.body,
456
+ tags: hit.tags ?? [],
457
+ url: hit.url,
458
+ image: hit.image,
459
+ metadata: {},
460
+ indexedAt: new Date(hit.indexedAt),
461
+ },
462
+ score: (hit._rankingScore ?? 0.5) * 100,
463
+ highlights: {
464
+ title: hit._formatted?.title,
465
+ body: hit._formatted?.body,
466
+ },
467
+ }));
468
+ const facets = meiliResultToFacets(meiliResult.facetDistribution);
469
+ return {
470
+ results,
471
+ total: meiliResult.estimatedTotalHits ?? results.length,
472
+ facets,
473
+ };
474
+ }
475
+ catch {
476
+ // MeiliSearch unavailable — fall through to local search
477
+ }
478
+ }
479
+ // ── Local search path: lexical + semantic scoring ──
480
+ const fuzzy = options?.fuzzy ?? true;
481
+ // Load synonyms for query expansion
482
+ const allSynonyms = (await data.findMany("searchSynonym", {}));
483
+ const expandedTerms = new Set();
484
+ for (const syn of allSynonyms) {
485
+ const synTermNorm = normalize(syn.term);
486
+ for (const token of queryTokens) {
487
+ if (token === synTermNorm) {
488
+ for (const s of syn.synonyms) {
489
+ expandedTerms.add(normalize(s));
490
+ }
491
+ }
492
+ for (const s of syn.synonyms) {
493
+ if (normalize(s) === token) {
494
+ expandedTerms.add(synTermNorm);
495
+ }
496
+ }
497
+ }
498
+ }
499
+ const where = {};
500
+ if (options?.entityType) {
501
+ where.entityType = options.entityType;
502
+ }
503
+ const allItems = (await data.findMany("searchIndex", {
504
+ ...(Object.keys(where).length > 0 ? { where } : {}),
505
+ }));
506
+ // Generate query embedding for semantic search (if provider is available)
507
+ let queryEmbedding = null;
508
+ if (embeddingProvider) {
509
+ try {
510
+ queryEmbedding = await embeddingProvider.generateEmbedding(query);
511
+ }
512
+ catch {
513
+ // Semantic search is best-effort
514
+ }
515
+ }
516
+ // Score and rank — hybrid lexical + semantic scoring
517
+ const scored = [];
518
+ for (const item of allItems) {
519
+ const lexicalScore = scoreMatch(item, queryTokens, expandedTerms, fuzzy);
520
+ // Compute semantic score if embeddings are available
521
+ let semanticScore = 0;
522
+ if (queryEmbedding) {
523
+ const itemEmbedding = item.metadata?.__embedding;
524
+ if (Array.isArray(itemEmbedding) && itemEmbedding.length > 0) {
525
+ semanticScore = cosineSimilarity(queryEmbedding, itemEmbedding);
526
+ }
527
+ }
528
+ // Include items with lexical match OR strong semantic similarity
529
+ const semanticBoost = semanticScore * 80;
530
+ const score = lexicalScore + semanticBoost;
531
+ if (lexicalScore > 0 || semanticScore > 0.5) {
532
+ const highlights = {
533
+ title: highlightText(item.title, queryTokens, expandedTerms),
534
+ body: item.body
535
+ ? highlightText(item.body, queryTokens, expandedTerms)
536
+ : undefined,
537
+ };
538
+ scored.push({ item, score, highlights });
539
+ }
540
+ }
541
+ // Filter by tags if specified
542
+ let filtered = scored;
543
+ if (options?.tags && options.tags.length > 0) {
544
+ const filterTags = new Set(options.tags.map((t) => normalize(t)));
545
+ filtered = scored.filter((r) => r.item.tags.some((tag) => filterTags.has(normalize(tag))));
546
+ }
547
+ // Compute facets before pagination
548
+ const facets = computeFacets(filtered);
549
+ // Sort
550
+ const sorted = sortResults(filtered, sort);
551
+ const total = sorted.length;
552
+ const results = sorted.slice(skip, skip + limit);
553
+ // Did-you-mean suggestion for zero/low results
554
+ let didYouMean;
555
+ if (total === 0) {
556
+ const allTitles = allItems.map((i) => i.title);
557
+ const allQueries = (await data.findMany("searchQuery", {}));
558
+ const popularTermsList = allQueries
559
+ .filter((q) => q.resultCount > 0)
560
+ .map((q) => q.term);
561
+ didYouMean = findDidYouMean(queryTokens, allTitles, popularTermsList, false);
562
+ }
563
+ return { results, total, facets, didYouMean };
564
+ },
565
+ async suggest(prefix, limit = 10) {
566
+ const prefixNorm = normalize(prefix);
567
+ if (prefixNorm.length === 0)
568
+ return [];
569
+ const allQueries = (await data.findMany("searchQuery", {}));
570
+ const termCounts = new Map();
571
+ for (const q of allQueries) {
572
+ if (q.resultCount > 0 && q.normalizedTerm.startsWith(prefixNorm)) {
573
+ termCounts.set(q.term, (termCounts.get(q.term) ?? 0) + 1);
574
+ }
575
+ }
576
+ const allItems = (await data.findMany("searchIndex", {}));
577
+ const titleSuggestions = [];
578
+ for (const item of allItems) {
579
+ if (normalize(item.title).includes(prefixNorm)) {
580
+ titleSuggestions.push(item.title);
581
+ }
582
+ }
583
+ const popularTerms = Array.from(termCounts.entries())
584
+ .sort((a, b) => b[1] - a[1])
585
+ .map(([term]) => term);
586
+ const seen = new Set();
587
+ const suggestions = [];
588
+ for (const term of popularTerms) {
589
+ const norm = normalize(term);
590
+ if (!seen.has(norm)) {
591
+ seen.add(norm);
592
+ suggestions.push(term);
593
+ }
594
+ }
595
+ for (const title of titleSuggestions) {
596
+ const norm = normalize(title);
597
+ if (!seen.has(norm)) {
598
+ seen.add(norm);
599
+ suggestions.push(title);
600
+ }
601
+ }
602
+ return suggestions.slice(0, limit);
603
+ },
604
+ async recordQuery(term, resultCount, sessionId) {
605
+ const id = crypto.randomUUID();
606
+ const query = {
607
+ id,
608
+ term,
609
+ normalizedTerm: normalize(term),
610
+ resultCount,
611
+ sessionId,
612
+ searchedAt: new Date(),
613
+ };
614
+ await data.upsert("searchQuery", id, query);
615
+ return query;
616
+ },
617
+ async recordClick(params) {
618
+ const id = crypto.randomUUID();
619
+ const click = {
620
+ id,
621
+ queryId: params.queryId,
622
+ term: params.term,
623
+ entityType: params.entityType,
624
+ entityId: params.entityId,
625
+ position: params.position,
626
+ clickedAt: new Date(),
627
+ };
628
+ await data.upsert("searchClick", id, click);
629
+ return click;
630
+ },
631
+ async getRecentQueries(sessionId, limit = 10) {
632
+ const all = (await data.findMany("searchQuery", {
633
+ where: { sessionId },
634
+ }));
635
+ all.sort((a, b) => new Date(b.searchedAt).getTime() - new Date(a.searchedAt).getTime());
636
+ const seen = new Set();
637
+ const results = [];
638
+ for (const q of all) {
639
+ if (!seen.has(q.normalizedTerm)) {
640
+ seen.add(q.normalizedTerm);
641
+ results.push(q);
642
+ }
643
+ if (results.length >= limit)
644
+ break;
645
+ }
646
+ return results;
647
+ },
648
+ async getPopularTerms(limit = 20) {
649
+ const all = (await data.findMany("searchQuery", {}));
650
+ const termStats = new Map();
651
+ for (const q of all) {
652
+ const existing = termStats.get(q.normalizedTerm);
653
+ if (existing) {
654
+ existing.count += 1;
655
+ existing.totalResults += q.resultCount;
656
+ }
657
+ else {
658
+ termStats.set(q.normalizedTerm, {
659
+ term: q.term,
660
+ count: 1,
661
+ totalResults: q.resultCount,
662
+ });
663
+ }
664
+ }
665
+ return Array.from(termStats.values())
666
+ .map((s) => ({
667
+ term: s.term,
668
+ count: s.count,
669
+ avgResultCount: Math.round(s.totalResults / s.count),
670
+ }))
671
+ .sort((a, b) => b.count - a.count)
672
+ .slice(0, limit);
673
+ },
674
+ async getZeroResultQueries(limit = 20) {
675
+ const all = (await data.findMany("searchQuery", {}));
676
+ const termStats = new Map();
677
+ for (const q of all) {
678
+ if (q.resultCount === 0) {
679
+ const existing = termStats.get(q.normalizedTerm);
680
+ if (existing) {
681
+ existing.count += 1;
682
+ }
683
+ else {
684
+ termStats.set(q.normalizedTerm, {
685
+ term: q.term,
686
+ count: 1,
687
+ });
688
+ }
689
+ }
690
+ }
691
+ return Array.from(termStats.values())
692
+ .map((s) => ({
693
+ term: s.term,
694
+ count: s.count,
695
+ avgResultCount: 0,
696
+ }))
697
+ .sort((a, b) => b.count - a.count)
698
+ .slice(0, limit);
699
+ },
700
+ async getAnalytics() {
701
+ const allQueries = (await data.findMany("searchQuery", {}));
702
+ const allClicks = (await data.findMany("searchClick", {}));
703
+ if (allQueries.length === 0) {
704
+ return {
705
+ totalQueries: 0,
706
+ uniqueTerms: 0,
707
+ avgResultCount: 0,
708
+ zeroResultCount: 0,
709
+ zeroResultRate: 0,
710
+ clickThroughRate: 0,
711
+ avgClickPosition: 0,
712
+ };
713
+ }
714
+ const uniqueTerms = new Set(allQueries.map((q) => q.normalizedTerm));
715
+ const totalResults = allQueries.reduce((sum, q) => sum + q.resultCount, 0);
716
+ const zeroResultCount = allQueries.filter((q) => q.resultCount === 0).length;
717
+ // CTR: queries that led to at least one click
718
+ const clickedQueryIds = new Set(allClicks.map((c) => c.queryId));
719
+ const queriesWithResults = allQueries.filter((q) => q.resultCount > 0).length;
720
+ const clickThroughRate = queriesWithResults > 0
721
+ ? Math.round((clickedQueryIds.size / queriesWithResults) * 100)
722
+ : 0;
723
+ const avgClickPosition = allClicks.length > 0
724
+ ? Math.round((allClicks.reduce((sum, c) => sum + c.position, 0) /
725
+ allClicks.length) *
726
+ 10) / 10
727
+ : 0;
728
+ return {
729
+ totalQueries: allQueries.length,
730
+ uniqueTerms: uniqueTerms.size,
731
+ avgResultCount: Math.round(totalResults / allQueries.length),
732
+ zeroResultCount,
733
+ zeroResultRate: Math.round((zeroResultCount / allQueries.length) * 100),
734
+ clickThroughRate,
735
+ avgClickPosition,
736
+ };
737
+ },
738
+ async addSynonym(term, synonyms) {
739
+ const existing = await data.findMany("searchSynonym", {
740
+ where: { term: normalize(term) },
741
+ take: 1,
742
+ });
743
+ const existingItems = existing;
744
+ const id = existingItems.length > 0 ? existingItems[0].id : crypto.randomUUID();
745
+ const synonym = {
746
+ id,
747
+ term: normalize(term),
748
+ synonyms: synonyms.map((s) => s.trim()),
749
+ createdAt: existingItems.length > 0 ? existingItems[0].createdAt : new Date(),
750
+ };
751
+ await data.upsert("searchSynonym", id, synonym);
752
+ return synonym;
753
+ },
754
+ async removeSynonym(id) {
755
+ const existing = await data.get("searchSynonym", id);
756
+ if (!existing)
757
+ return false;
758
+ await data.delete("searchSynonym", id);
759
+ return true;
760
+ },
761
+ async listSynonyms() {
762
+ const all = (await data.findMany("searchSynonym", {}));
763
+ return all;
764
+ },
765
+ async getIndexCount() {
766
+ const all = await data.findMany("searchIndex", {});
767
+ return all.length;
768
+ },
769
+ };
770
+ }