@86d-app/search 0.0.4 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/.turbo/turbo-build.log +1 -0
  2. package/AGENTS.md +72 -0
  3. package/README.md +171 -28
  4. package/dist/__tests__/controllers.test.d.ts +2 -0
  5. package/dist/__tests__/controllers.test.d.ts.map +1 -0
  6. package/dist/__tests__/embedding-provider.test.d.ts +2 -0
  7. package/dist/__tests__/embedding-provider.test.d.ts.map +1 -0
  8. package/dist/__tests__/endpoint-security.test.d.ts +2 -0
  9. package/dist/__tests__/endpoint-security.test.d.ts.map +1 -0
  10. package/dist/__tests__/meilisearch-provider.test.d.ts +2 -0
  11. package/dist/__tests__/meilisearch-provider.test.d.ts.map +1 -0
  12. package/dist/__tests__/service-impl.test.d.ts +2 -0
  13. package/dist/__tests__/service-impl.test.d.ts.map +1 -0
  14. package/dist/admin/components/index.d.ts +2 -0
  15. package/dist/admin/components/index.d.ts.map +1 -0
  16. package/dist/admin/components/search-analytics.d.ts +2 -0
  17. package/dist/admin/components/search-analytics.d.ts.map +1 -0
  18. package/dist/admin/endpoints/analytics.d.ts +15 -0
  19. package/dist/admin/endpoints/analytics.d.ts.map +1 -0
  20. package/dist/admin/endpoints/bulk-index.d.ts +20 -0
  21. package/dist/admin/endpoints/bulk-index.d.ts.map +1 -0
  22. package/dist/admin/endpoints/click-analytics.d.ts +7 -0
  23. package/dist/admin/endpoints/click-analytics.d.ts.map +1 -0
  24. package/dist/admin/endpoints/get-settings.d.ts +17 -0
  25. package/dist/admin/endpoints/get-settings.d.ts.map +1 -0
  26. package/dist/admin/endpoints/index-manage.d.ts +26 -0
  27. package/dist/admin/endpoints/index-manage.d.ts.map +1 -0
  28. package/dist/admin/endpoints/index.d.ts +125 -0
  29. package/dist/admin/endpoints/index.d.ts.map +1 -0
  30. package/dist/admin/endpoints/popular.d.ts +10 -0
  31. package/dist/admin/endpoints/popular.d.ts.map +1 -0
  32. package/dist/admin/endpoints/synonyms.d.ts +30 -0
  33. package/dist/admin/endpoints/synonyms.d.ts.map +1 -0
  34. package/dist/admin/endpoints/zero-results.d.ts +10 -0
  35. package/dist/admin/endpoints/zero-results.d.ts.map +1 -0
  36. package/dist/embedding-provider.d.ts +28 -0
  37. package/dist/embedding-provider.d.ts.map +1 -0
  38. package/dist/index.d.ts +23 -0
  39. package/dist/index.d.ts.map +1 -0
  40. package/dist/meilisearch-provider.d.ts +104 -0
  41. package/dist/meilisearch-provider.d.ts.map +1 -0
  42. package/dist/schema.d.ts +133 -0
  43. package/dist/schema.d.ts.map +1 -0
  44. package/dist/service-impl.d.ts +6 -0
  45. package/dist/service-impl.d.ts.map +1 -0
  46. package/dist/service.d.ts +127 -0
  47. package/dist/service.d.ts.map +1 -0
  48. package/dist/store/components/_hooks.d.ts +6 -0
  49. package/dist/store/components/_hooks.d.ts.map +1 -0
  50. package/dist/store/components/index.d.ts +10 -0
  51. package/dist/store/components/index.d.ts.map +1 -0
  52. package/dist/store/components/search-bar.d.ts +7 -0
  53. package/dist/store/components/search-bar.d.ts.map +1 -0
  54. package/dist/store/components/search-page.d.ts +4 -0
  55. package/dist/store/components/search-page.d.ts.map +1 -0
  56. package/dist/store/components/search-results.d.ts +9 -0
  57. package/dist/store/components/search-results.d.ts.map +1 -0
  58. package/dist/store/endpoints/click.d.ts +14 -0
  59. package/dist/store/endpoints/click.d.ts.map +1 -0
  60. package/dist/store/endpoints/index.d.ts +85 -0
  61. package/dist/store/endpoints/index.d.ts.map +1 -0
  62. package/dist/store/endpoints/recent.d.ts +15 -0
  63. package/dist/store/endpoints/recent.d.ts.map +1 -0
  64. package/dist/store/endpoints/search.d.ts +36 -0
  65. package/dist/store/endpoints/search.d.ts.map +1 -0
  66. package/dist/store/endpoints/store-search.d.ts +16 -0
  67. package/dist/store/endpoints/store-search.d.ts.map +1 -0
  68. package/dist/store/endpoints/suggest.d.ts +11 -0
  69. package/dist/store/endpoints/suggest.d.ts.map +1 -0
  70. package/package.json +3 -3
  71. package/src/__tests__/controllers.test.ts +1026 -0
  72. package/src/__tests__/embedding-provider.test.ts +195 -0
  73. package/src/__tests__/endpoint-security.test.ts +300 -0
  74. package/src/__tests__/meilisearch-provider.test.ts +400 -0
  75. package/src/__tests__/service-impl.test.ts +341 -8
  76. package/src/admin/components/search-analytics.tsx +120 -0
  77. package/src/admin/endpoints/bulk-index.ts +34 -0
  78. package/src/admin/endpoints/click-analytics.ts +16 -0
  79. package/src/admin/endpoints/get-settings.ts +56 -0
  80. package/src/admin/endpoints/index-manage.ts +4 -1
  81. package/src/admin/endpoints/index.ts +6 -0
  82. package/src/admin/endpoints/synonyms.ts +1 -1
  83. package/src/embedding-provider.ts +99 -0
  84. package/src/index.ts +60 -4
  85. package/src/meilisearch-provider.ts +239 -0
  86. package/src/schema.ts +15 -0
  87. package/src/service-impl.ts +605 -34
  88. package/src/service.ts +60 -1
  89. package/src/store/endpoints/click.ts +21 -0
  90. package/src/store/endpoints/index.ts +2 -0
  91. package/src/store/endpoints/recent.ts +1 -1
  92. package/src/store/endpoints/search.ts +38 -10
  93. package/src/store/endpoints/store-search.ts +1 -1
  94. package/src/store/endpoints/suggest.ts +2 -2
  95. package/vitest.config.ts +2 -0
@@ -1,9 +1,19 @@
1
1
  import type { ModuleDataService } from "@86d-app/core";
2
+ import type { EmbeddingProvider } from "./embedding-provider";
3
+ import { cosineSimilarity } from "./embedding-provider";
2
4
  import type {
5
+ MeiliSearchDocument,
6
+ MeiliSearchProvider,
7
+ } from "./meilisearch-provider";
8
+ import type {
9
+ SearchClick,
3
10
  SearchController,
11
+ SearchFacets,
12
+ SearchHighlight,
4
13
  SearchIndexItem,
5
14
  SearchQuery,
6
15
  SearchResult,
16
+ SearchSortField,
7
17
  SearchSynonym,
8
18
  } from "./service";
9
19
 
@@ -17,15 +27,89 @@ function tokenize(text: string): string[] {
17
27
  .filter((t) => t.length > 0);
18
28
  }
19
29
 
30
+ /**
31
+ * Levenshtein distance between two strings.
32
+ * Used for fuzzy matching and did-you-mean suggestions.
33
+ */
34
+ function levenshtein(a: string, b: string): number {
35
+ const m = a.length;
36
+ const n = b.length;
37
+ if (m === 0) return n;
38
+ if (n === 0) return m;
39
+
40
+ // Use single-row optimization for space efficiency
41
+ let prev = new Array<number>(n + 1);
42
+ let curr = new Array<number>(n + 1);
43
+
44
+ for (let j = 0; j <= n; j++) {
45
+ prev[j] = j;
46
+ }
47
+
48
+ for (let i = 1; i <= m; i++) {
49
+ curr[0] = i;
50
+ for (let j = 1; j <= n; j++) {
51
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
52
+ curr[j] = Math.min(
53
+ curr[j - 1] + 1, // insertion
54
+ prev[j] + 1, // deletion
55
+ prev[j - 1] + cost, // substitution
56
+ );
57
+ }
58
+ [prev, curr] = [curr, prev];
59
+ }
60
+
61
+ return prev[n];
62
+ }
63
+
64
+ /**
65
+ * Returns maximum edit distance allowed for a given word length.
66
+ * Shorter words get less tolerance to avoid noisy matches.
67
+ */
68
+ function maxEditDistance(wordLength: number): number {
69
+ if (wordLength <= 3) return 0;
70
+ if (wordLength <= 5) return 1;
71
+ return 2;
72
+ }
73
+
74
+ /**
75
+ * Check if token fuzzy-matches the target within edit distance tolerance.
76
+ */
77
+ function fuzzyMatch(token: string, target: string): boolean {
78
+ const maxDist = maxEditDistance(token.length);
79
+ if (maxDist === 0) return token === target;
80
+ return levenshtein(token, target) <= maxDist;
81
+ }
82
+
83
+ /**
84
+ * Highlight matching segments in text by wrapping in <mark> tags.
85
+ */
86
+ function highlightText(
87
+ text: string,
88
+ queryTokens: string[],
89
+ expandedTerms: Set<string>,
90
+ ): string {
91
+ if (!text || queryTokens.length === 0) return text;
92
+ const allTerms = [...queryTokens, ...expandedTerms];
93
+ let result = text;
94
+ for (const term of allTerms) {
95
+ const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
96
+ const regex = new RegExp(`(${escaped})`, "gi");
97
+ result = result.replace(regex, "<mark>$1</mark>");
98
+ }
99
+ return result;
100
+ }
101
+
20
102
  function scoreMatch(
21
103
  item: SearchIndexItem,
22
104
  queryTokens: string[],
23
105
  expandedTerms: Set<string>,
106
+ fuzzy: boolean,
24
107
  ): number {
25
108
  let score = 0;
26
109
  const titleLower = normalize(item.title);
27
110
  const bodyLower = item.body ? normalize(item.body) : "";
28
111
  const tagLower = item.tags.map((t) => normalize(t));
112
+ const titleTokens = tokenize(item.title);
29
113
 
30
114
  for (const token of queryTokens) {
31
115
  const allTerms = [token, ...expandedTerms];
@@ -53,17 +137,233 @@ function scoreMatch(
53
137
  }
54
138
  }
55
139
  }
140
+
141
+ // Fuzzy matching on title tokens (lower weight than exact)
142
+ if (fuzzy) {
143
+ for (const titleToken of titleTokens) {
144
+ const titleTokenNorm = normalize(titleToken);
145
+ if (!titleLower.includes(token) && fuzzyMatch(token, titleTokenNorm)) {
146
+ score += 15;
147
+ }
148
+ }
149
+ // Fuzzy on tags
150
+ for (const tag of tagLower) {
151
+ const tagTokens = tokenize(tag);
152
+ for (const tagToken of tagTokens) {
153
+ if (!tag.includes(token) && fuzzyMatch(token, tagToken)) {
154
+ score += 8;
155
+ }
156
+ }
157
+ }
158
+ // Fuzzy on body tokens
159
+ if (bodyLower && !bodyLower.includes(token)) {
160
+ const bodyTokens = tokenize(bodyLower);
161
+ for (const bodyToken of bodyTokens) {
162
+ if (fuzzyMatch(token, bodyToken)) {
163
+ score += 5;
164
+ break; // only count once per token per body
165
+ }
166
+ }
167
+ }
168
+ }
56
169
  }
57
170
 
58
171
  return score;
59
172
  }
60
173
 
174
+ /**
175
+ * Convert MeiliSearch facetDistribution to our SearchFacets shape.
176
+ */
177
+ function meiliResultToFacets(
178
+ distribution?: Record<string, Record<string, number>>,
179
+ ): SearchFacets {
180
+ if (!distribution) return { entityTypes: [], tags: [] };
181
+
182
+ const entityTypes = Object.entries(distribution.entityType ?? {})
183
+ .map(([type, count]) => ({ type, count }))
184
+ .sort((a, b) => b.count - a.count);
185
+
186
+ const tags = Object.entries(distribution.tags ?? {})
187
+ .map(([tag, count]) => ({ tag, count }))
188
+ .sort((a, b) => b.count - a.count)
189
+ .slice(0, 20);
190
+
191
+ return { entityTypes, tags };
192
+ }
193
+
194
+ function computeFacets(items: SearchResult[]): SearchFacets {
195
+ const typeCounts = new Map<string, number>();
196
+ const tagCounts = new Map<string, number>();
197
+
198
+ for (const { item } of items) {
199
+ typeCounts.set(item.entityType, (typeCounts.get(item.entityType) ?? 0) + 1);
200
+ for (const tag of item.tags) {
201
+ const norm = normalize(tag);
202
+ tagCounts.set(norm, (tagCounts.get(norm) ?? 0) + 1);
203
+ }
204
+ }
205
+
206
+ return {
207
+ entityTypes: Array.from(typeCounts.entries())
208
+ .map(([type, count]) => ({ type, count }))
209
+ .sort((a, b) => b.count - a.count),
210
+ tags: Array.from(tagCounts.entries())
211
+ .map(([tag, count]) => ({ tag, count }))
212
+ .sort((a, b) => b.count - a.count)
213
+ .slice(0, 20),
214
+ };
215
+ }
216
+
217
+ function sortResults(
218
+ results: SearchResult[],
219
+ sort: SearchSortField,
220
+ ): SearchResult[] {
221
+ const sorted = [...results];
222
+ switch (sort) {
223
+ case "newest":
224
+ sorted.sort(
225
+ (a, b) =>
226
+ new Date(b.item.indexedAt).getTime() -
227
+ new Date(a.item.indexedAt).getTime(),
228
+ );
229
+ break;
230
+ case "oldest":
231
+ sorted.sort(
232
+ (a, b) =>
233
+ new Date(a.item.indexedAt).getTime() -
234
+ new Date(b.item.indexedAt).getTime(),
235
+ );
236
+ break;
237
+ case "title_asc":
238
+ sorted.sort((a, b) => a.item.title.localeCompare(b.item.title));
239
+ break;
240
+ case "title_desc":
241
+ sorted.sort((a, b) => b.item.title.localeCompare(a.item.title));
242
+ break;
243
+ default:
244
+ sorted.sort((a, b) => b.score - a.score);
245
+ break;
246
+ }
247
+ return sorted;
248
+ }
249
+
250
+ /**
251
+ * Find the closest known term to the query for did-you-mean suggestions.
252
+ * Checks against indexed titles and popular search terms.
253
+ */
254
+ function findDidYouMean(
255
+ queryTokens: string[],
256
+ indexedTitles: string[],
257
+ popularTerms: string[],
258
+ hasResults: boolean,
259
+ ): string | undefined {
260
+ // Only suggest corrections when results are few or none
261
+ if (hasResults) return undefined;
262
+
263
+ const candidates = new Set<string>();
264
+ for (const title of indexedTitles) {
265
+ for (const token of tokenize(title)) {
266
+ candidates.add(token);
267
+ }
268
+ }
269
+ for (const term of popularTerms) {
270
+ for (const token of tokenize(term)) {
271
+ candidates.add(token);
272
+ }
273
+ }
274
+
275
+ const corrections: string[] = [];
276
+ for (const token of queryTokens) {
277
+ let bestMatch = token;
278
+ let bestDist = Number.POSITIVE_INFINITY;
279
+
280
+ for (const candidate of candidates) {
281
+ if (candidate === token) {
282
+ bestMatch = token;
283
+ bestDist = 0;
284
+ break;
285
+ }
286
+ const dist = levenshtein(token, candidate);
287
+ const maxDist = maxEditDistance(token.length);
288
+ if (maxDist > 0 && dist <= maxDist && dist < bestDist) {
289
+ bestDist = dist;
290
+ bestMatch = candidate;
291
+ }
292
+ }
293
+
294
+ corrections.push(bestMatch);
295
+ }
296
+
297
+ const suggestion = corrections.join(" ");
298
+ const original = queryTokens.join(" ");
299
+ if (suggestion === original) return undefined;
300
+ return suggestion;
301
+ }
302
+
61
303
  export function createSearchController(
62
304
  data: ModuleDataService,
305
+ embeddingProvider?: EmbeddingProvider,
306
+ meiliProvider?: MeiliSearchProvider,
63
307
  ): SearchController {
308
+ /**
309
+ * Generate and store an embedding for an indexed item.
310
+ * Combines title + body + tags into a single text for embedding.
311
+ * Failures are silent — semantic search degrades gracefully.
312
+ */
313
+ async function embedItem(item: SearchIndexItem): Promise<SearchIndexItem> {
314
+ if (!embeddingProvider) return item;
315
+ try {
316
+ const parts = [item.title];
317
+ if (item.body) parts.push(item.body);
318
+ if (item.tags.length > 0) parts.push(item.tags.join(", "));
319
+ const text = parts.join(". ");
320
+ const embedding = await embeddingProvider.generateEmbedding(text);
321
+ if (embedding) {
322
+ item.metadata = { ...item.metadata, __embedding: embedding };
323
+ }
324
+ } catch {
325
+ // Embedding is best-effort — lexical search still works
326
+ }
327
+ return item;
328
+ }
329
+
330
+ /**
331
+ * Convert a SearchIndexItem to a MeiliSearch document.
332
+ * Strips the __embedding metadata to avoid bloating the search index.
333
+ */
334
+ function toMeiliDocument(item: SearchIndexItem): MeiliSearchDocument {
335
+ return {
336
+ id: item.id,
337
+ entityType: item.entityType,
338
+ entityId: item.entityId,
339
+ title: item.title,
340
+ body: item.body,
341
+ tags: item.tags,
342
+ url: item.url,
343
+ image: item.image,
344
+ indexedAt: item.indexedAt.toISOString(),
345
+ };
346
+ }
347
+
348
+ /**
349
+ * Sync a document to MeiliSearch. Fire-and-forget — failures are silent
350
+ * so local search still works as a fallback.
351
+ */
352
+ function syncToMeili(items: SearchIndexItem[]): void {
353
+ if (!meiliProvider || items.length === 0) return;
354
+ void meiliProvider.addDocuments(items.map(toMeiliDocument)).catch(() => {});
355
+ }
356
+
357
+ /**
358
+ * Remove a document from MeiliSearch by ID. Fire-and-forget.
359
+ */
360
+ function removeFromMeili(documentId: string): void {
361
+ if (!meiliProvider) return;
362
+ void meiliProvider.deleteDocument(documentId).catch(() => {});
363
+ }
364
+
64
365
  return {
65
366
  async indexItem(params) {
66
- // Check if already indexed — update if so
67
367
  const existing = await data.findMany("searchIndex", {
68
368
  where: {
69
369
  entityType: params.entityType,
@@ -75,7 +375,7 @@ export function createSearchController(
75
375
 
76
376
  const id =
77
377
  existingItems.length > 0 ? existingItems[0].id : crypto.randomUUID();
78
- const item: SearchIndexItem = {
378
+ let item: SearchIndexItem = {
79
379
  id,
80
380
  entityType: params.entityType,
81
381
  entityId: params.entityId,
@@ -87,11 +387,86 @@ export function createSearchController(
87
387
  metadata: params.metadata ?? {},
88
388
  indexedAt: new Date(),
89
389
  };
90
- // biome-ignore lint/suspicious/noExplicitAny: ModuleDataService requires any
91
- await data.upsert("searchIndex", id, item as Record<string, any>);
390
+ item = await embedItem(item);
391
+ await data.upsert(
392
+ "searchIndex",
393
+ id,
394
+ item as unknown as Record<string, string>,
395
+ );
396
+ syncToMeili([item]);
92
397
  return item;
93
398
  },
94
399
 
400
+ async bulkIndex(items) {
401
+ let indexed = 0;
402
+ let errors = 0;
403
+
404
+ // Batch generate embeddings for all items at once
405
+ if (embeddingProvider && items.length > 0) {
406
+ const texts = items.map((p) => {
407
+ const parts = [p.title];
408
+ if (p.body) parts.push(p.body);
409
+ if (p.tags && p.tags.length > 0) parts.push(p.tags.join(", "));
410
+ return parts.join(". ");
411
+ });
412
+ const embeddings = await embeddingProvider.generateEmbeddings(texts);
413
+ for (let i = 0; i < items.length; i++) {
414
+ if (embeddings[i]) {
415
+ items[i] = {
416
+ ...items[i],
417
+ metadata: {
418
+ ...items[i].metadata,
419
+ __embedding: embeddings[i],
420
+ },
421
+ };
422
+ }
423
+ }
424
+ }
425
+
426
+ const indexedItems: SearchIndexItem[] = [];
427
+ for (const params of items) {
428
+ try {
429
+ const existing = await data.findMany("searchIndex", {
430
+ where: {
431
+ entityType: params.entityType,
432
+ entityId: params.entityId,
433
+ },
434
+ take: 1,
435
+ });
436
+ const existingItems = existing as unknown as SearchIndexItem[];
437
+ const id =
438
+ existingItems.length > 0
439
+ ? existingItems[0].id
440
+ : crypto.randomUUID();
441
+
442
+ const item: SearchIndexItem = {
443
+ id,
444
+ entityType: params.entityType,
445
+ entityId: params.entityId,
446
+ title: params.title,
447
+ body: params.body,
448
+ tags: params.tags ?? [],
449
+ url: params.url,
450
+ image: params.image,
451
+ metadata: params.metadata ?? {},
452
+ indexedAt: new Date(),
453
+ };
454
+ await data.upsert(
455
+ "searchIndex",
456
+ id,
457
+ item as unknown as Record<string, string>,
458
+ );
459
+ indexedItems.push(item);
460
+ indexed++;
461
+ } catch {
462
+ errors++;
463
+ }
464
+ }
465
+
466
+ syncToMeili(indexedItems);
467
+ return { indexed, errors };
468
+ },
469
+
95
470
  async removeFromIndex(entityType, entityId) {
96
471
  const items = await data.findMany("searchIndex", {
97
472
  where: { entityType, entityId },
@@ -100,6 +475,7 @@ export function createSearchController(
100
475
  if (found.length === 0) return false;
101
476
  for (const item of found) {
102
477
  await data.delete("searchIndex", item.id);
478
+ removeFromMeili(item.id);
103
479
  }
104
480
  return true;
105
481
  },
@@ -107,12 +483,89 @@ export function createSearchController(
107
483
  async search(query, options) {
108
484
  const limit = options?.limit ?? 20;
109
485
  const skip = options?.skip ?? 0;
486
+ const sort = options?.sort ?? "relevance";
110
487
  const queryTokens = tokenize(query);
111
488
 
112
489
  if (queryTokens.length === 0) {
113
- return { results: [], total: 0 };
490
+ return {
491
+ results: [],
492
+ total: 0,
493
+ facets: { entityTypes: [], tags: [] },
494
+ };
495
+ }
496
+
497
+ // ── MeiliSearch path: delegate search to dedicated engine ──
498
+ if (meiliProvider) {
499
+ try {
500
+ const meiliSort: string[] | undefined =
501
+ sort === "newest"
502
+ ? ["indexedAt:desc"]
503
+ : sort === "oldest"
504
+ ? ["indexedAt:asc"]
505
+ : sort === "title_asc"
506
+ ? ["title:asc"]
507
+ : sort === "title_desc"
508
+ ? ["title:desc"]
509
+ : undefined;
510
+
511
+ const filters: string[] = [];
512
+ if (options?.entityType) {
513
+ filters.push(`entityType = "${options.entityType}"`);
514
+ }
515
+ if (options?.tags && options.tags.length > 0) {
516
+ const tagFilters = options.tags
517
+ .map((t) => `tags = "${t}"`)
518
+ .join(" OR ");
519
+ filters.push(`(${tagFilters})`);
520
+ }
521
+
522
+ const meiliResult = await meiliProvider.search(query, {
523
+ limit,
524
+ offset: skip,
525
+ filter: filters.length > 0 ? filters.join(" AND ") : undefined,
526
+ sort: meiliSort,
527
+ facets: ["entityType", "tags"],
528
+ attributesToHighlight: ["title", "body"],
529
+ highlightPreTag: "<mark>",
530
+ highlightPostTag: "</mark>",
531
+ showRankingScore: true,
532
+ });
533
+
534
+ const results: SearchResult[] = meiliResult.hits.map((hit) => ({
535
+ item: {
536
+ id: hit.id,
537
+ entityType: hit.entityType,
538
+ entityId: hit.entityId,
539
+ title: hit.title,
540
+ body: hit.body,
541
+ tags: hit.tags ?? [],
542
+ url: hit.url,
543
+ image: hit.image,
544
+ metadata: {},
545
+ indexedAt: new Date(hit.indexedAt),
546
+ },
547
+ score: (hit._rankingScore ?? 0.5) * 100,
548
+ highlights: {
549
+ title: hit._formatted?.title,
550
+ body: hit._formatted?.body,
551
+ },
552
+ }));
553
+
554
+ const facets = meiliResultToFacets(meiliResult.facetDistribution);
555
+
556
+ return {
557
+ results,
558
+ total: meiliResult.estimatedTotalHits ?? results.length,
559
+ facets,
560
+ };
561
+ } catch {
562
+ // MeiliSearch unavailable — fall through to local search
563
+ }
114
564
  }
115
565
 
566
+ // ── Local search path: lexical + semantic scoring ──
567
+ const fuzzy = options?.fuzzy ?? true;
568
+
116
569
  // Load synonyms for query expansion
117
570
  const allSynonyms = (await data.findMany(
118
571
  "searchSynonym",
@@ -127,7 +580,6 @@ export function createSearchController(
127
580
  expandedTerms.add(normalize(s));
128
581
  }
129
582
  }
130
- // Also reverse: if a query token matches a synonym, expand to the term
131
583
  for (const s of syn.synonyms) {
132
584
  if (normalize(s) === token) {
133
585
  expandedTerms.add(synTermNorm);
@@ -136,8 +588,7 @@ export function createSearchController(
136
588
  }
137
589
  }
138
590
 
139
- // biome-ignore lint/suspicious/noExplicitAny: JSONB where filter
140
- const where: Record<string, any> = {};
591
+ const where: Record<string, string> = {};
141
592
  if (options?.entityType) {
142
593
  where.entityType = options.entityType;
143
594
  }
@@ -146,33 +597,99 @@ export function createSearchController(
146
597
  ...(Object.keys(where).length > 0 ? { where } : {}),
147
598
  })) as unknown as SearchIndexItem[];
148
599
 
149
- // Score and rank
600
+ // Generate query embedding for semantic search (if provider is available)
601
+ let queryEmbedding: number[] | null = null;
602
+ if (embeddingProvider) {
603
+ try {
604
+ queryEmbedding = await embeddingProvider.generateEmbedding(query);
605
+ } catch {
606
+ // Semantic search is best-effort
607
+ }
608
+ }
609
+
610
+ // Score and rank — hybrid lexical + semantic scoring
150
611
  const scored: SearchResult[] = [];
151
612
  for (const item of allItems) {
152
- const score = scoreMatch(item, queryTokens, expandedTerms);
153
- if (score > 0) {
154
- scored.push({ item, score });
613
+ const lexicalScore = scoreMatch(
614
+ item,
615
+ queryTokens,
616
+ expandedTerms,
617
+ fuzzy,
618
+ );
619
+
620
+ // Compute semantic score if embeddings are available
621
+ let semanticScore = 0;
622
+ if (queryEmbedding) {
623
+ const itemEmbedding = item.metadata?.__embedding as
624
+ | number[]
625
+ | undefined;
626
+ if (Array.isArray(itemEmbedding) && itemEmbedding.length > 0) {
627
+ semanticScore = cosineSimilarity(queryEmbedding, itemEmbedding);
628
+ }
629
+ }
630
+
631
+ // Include items with lexical match OR strong semantic similarity
632
+ const semanticBoost = semanticScore * 80;
633
+ const score = lexicalScore + semanticBoost;
634
+ if (lexicalScore > 0 || semanticScore > 0.5) {
635
+ const highlights: SearchHighlight = {
636
+ title: highlightText(item.title, queryTokens, expandedTerms),
637
+ body: item.body
638
+ ? highlightText(item.body, queryTokens, expandedTerms)
639
+ : undefined,
640
+ };
641
+ scored.push({ item, score, highlights });
155
642
  }
156
643
  }
157
644
 
158
- scored.sort((a, b) => b.score - a.score);
159
- const total = scored.length;
160
- const results = scored.slice(skip, skip + limit);
645
+ // Filter by tags if specified
646
+ let filtered = scored;
647
+ if (options?.tags && options.tags.length > 0) {
648
+ const filterTags = new Set(options.tags.map((t) => normalize(t)));
649
+ filtered = scored.filter((r) =>
650
+ r.item.tags.some((tag) => filterTags.has(normalize(tag))),
651
+ );
652
+ }
653
+
654
+ // Compute facets before pagination
655
+ const facets = computeFacets(filtered);
656
+
657
+ // Sort
658
+ const sorted = sortResults(filtered, sort);
659
+ const total = sorted.length;
660
+ const results = sorted.slice(skip, skip + limit);
661
+
662
+ // Did-you-mean suggestion for zero/low results
663
+ let didYouMean: string | undefined;
664
+ if (total === 0) {
665
+ const allTitles = allItems.map((i) => i.title);
666
+ const allQueries = (await data.findMany(
667
+ "searchQuery",
668
+ {},
669
+ )) as unknown as SearchQuery[];
670
+ const popularTermsList = allQueries
671
+ .filter((q) => q.resultCount > 0)
672
+ .map((q) => q.term);
673
+ didYouMean = findDidYouMean(
674
+ queryTokens,
675
+ allTitles,
676
+ popularTermsList,
677
+ false,
678
+ );
679
+ }
161
680
 
162
- return { results, total };
681
+ return { results, total, facets, didYouMean };
163
682
  },
164
683
 
165
684
  async suggest(prefix, limit = 10) {
166
685
  const prefixNorm = normalize(prefix);
167
686
  if (prefixNorm.length === 0) return [];
168
687
 
169
- // Combine popular terms + index titles
170
688
  const allQueries = (await data.findMany(
171
689
  "searchQuery",
172
690
  {},
173
691
  )) as unknown as SearchQuery[];
174
692
 
175
- // Count query frequency
176
693
  const termCounts = new Map<string, number>();
177
694
  for (const q of allQueries) {
178
695
  if (q.resultCount > 0 && q.normalizedTerm.startsWith(prefixNorm)) {
@@ -180,7 +697,6 @@ export function createSearchController(
180
697
  }
181
698
  }
182
699
 
183
- // Also match index titles
184
700
  const allItems = (await data.findMany(
185
701
  "searchIndex",
186
702
  {},
@@ -192,7 +708,6 @@ export function createSearchController(
192
708
  }
193
709
  }
194
710
 
195
- // Merge: popular terms first, then title suggestions
196
711
  const popularTerms = Array.from(termCounts.entries())
197
712
  .sort((a, b) => b[1] - a[1])
198
713
  .map(([term]) => term);
@@ -227,17 +742,38 @@ export function createSearchController(
227
742
  sessionId,
228
743
  searchedAt: new Date(),
229
744
  };
230
- // biome-ignore lint/suspicious/noExplicitAny: ModuleDataService requires any
231
- await data.upsert("searchQuery", id, query as Record<string, any>);
745
+ await data.upsert(
746
+ "searchQuery",
747
+ id,
748
+ query as unknown as Record<string, string>,
749
+ );
232
750
  return query;
233
751
  },
234
752
 
753
+ async recordClick(params) {
754
+ const id = crypto.randomUUID();
755
+ const click: SearchClick = {
756
+ id,
757
+ queryId: params.queryId,
758
+ term: params.term,
759
+ entityType: params.entityType,
760
+ entityId: params.entityId,
761
+ position: params.position,
762
+ clickedAt: new Date(),
763
+ };
764
+ await data.upsert(
765
+ "searchClick",
766
+ id,
767
+ click as unknown as Record<string, string>,
768
+ );
769
+ return click;
770
+ },
771
+
235
772
  async getRecentQueries(sessionId, limit = 10) {
236
773
  const all = (await data.findMany("searchQuery", {
237
774
  where: { sessionId },
238
775
  })) as unknown as SearchQuery[];
239
776
 
240
- // Sort by date desc, deduplicate by normalized term
241
777
  all.sort(
242
778
  (a, b) =>
243
779
  new Date(b.searchedAt).getTime() - new Date(a.searchedAt).getTime(),
@@ -322,36 +858,68 @@ export function createSearchController(
322
858
  },
323
859
 
324
860
  async getAnalytics() {
325
- const all = (await data.findMany(
861
+ const allQueries = (await data.findMany(
326
862
  "searchQuery",
327
863
  {},
328
864
  )) as unknown as SearchQuery[];
329
865
 
330
- if (all.length === 0) {
866
+ const allClicks = (await data.findMany(
867
+ "searchClick",
868
+ {},
869
+ )) as unknown as SearchClick[];
870
+
871
+ if (allQueries.length === 0) {
331
872
  return {
332
873
  totalQueries: 0,
333
874
  uniqueTerms: 0,
334
875
  avgResultCount: 0,
335
876
  zeroResultCount: 0,
336
877
  zeroResultRate: 0,
878
+ clickThroughRate: 0,
879
+ avgClickPosition: 0,
337
880
  };
338
881
  }
339
882
 
340
- const uniqueTerms = new Set(all.map((q) => q.normalizedTerm));
341
- const totalResults = all.reduce((sum, q) => sum + q.resultCount, 0);
342
- const zeroResultCount = all.filter((q) => q.resultCount === 0).length;
883
+ const uniqueTerms = new Set(allQueries.map((q) => q.normalizedTerm));
884
+ const totalResults = allQueries.reduce(
885
+ (sum, q) => sum + q.resultCount,
886
+ 0,
887
+ );
888
+ const zeroResultCount = allQueries.filter(
889
+ (q) => q.resultCount === 0,
890
+ ).length;
891
+
892
+ // CTR: queries that led to at least one click
893
+ const clickedQueryIds = new Set(allClicks.map((c) => c.queryId));
894
+ const queriesWithResults = allQueries.filter(
895
+ (q) => q.resultCount > 0,
896
+ ).length;
897
+ const clickThroughRate =
898
+ queriesWithResults > 0
899
+ ? Math.round((clickedQueryIds.size / queriesWithResults) * 100)
900
+ : 0;
901
+
902
+ const avgClickPosition =
903
+ allClicks.length > 0
904
+ ? Math.round(
905
+ (allClicks.reduce((sum, c) => sum + c.position, 0) /
906
+ allClicks.length) *
907
+ 10,
908
+ ) / 10
909
+ : 0;
343
910
 
344
911
  return {
345
- totalQueries: all.length,
912
+ totalQueries: allQueries.length,
346
913
  uniqueTerms: uniqueTerms.size,
347
- avgResultCount: Math.round(totalResults / all.length),
914
+ avgResultCount: Math.round(totalResults / allQueries.length),
348
915
  zeroResultCount,
349
- zeroResultRate: Math.round((zeroResultCount / all.length) * 100),
916
+ zeroResultRate: Math.round((zeroResultCount / allQueries.length) * 100),
917
+ clickThroughRate,
918
+ avgClickPosition,
350
919
  };
351
920
  },
352
921
 
353
922
  async addSynonym(term, synonyms) {
354
- // Check if synonym for this term already exists
355
923
  const existing = await data.findMany("searchSynonym", {
356
924
  where: { term: normalize(term) },
357
925
  take: 1,
@@ -367,8 +935,11 @@ export function createSearchController(
367
935
  createdAt:
368
936
  existingItems.length > 0 ? existingItems[0].createdAt : new Date(),
369
937
  };
370
- // biome-ignore lint/suspicious/noExplicitAny: ModuleDataService requires any
371
- await data.upsert("searchSynonym", id, synonym as Record<string, any>);
938
+ await data.upsert(
939
+ "searchSynonym",
940
+ id,
941
+ synonym as unknown as Record<string, string>,
942
+ );
372
943
  return synonym;
373
944
  },
374
945