@86d-app/search 0.0.4 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -0
- package/AGENTS.md +72 -0
- package/README.md +171 -28
- package/dist/__tests__/controllers.test.d.ts +2 -0
- package/dist/__tests__/controllers.test.d.ts.map +1 -0
- package/dist/__tests__/embedding-provider.test.d.ts +2 -0
- package/dist/__tests__/embedding-provider.test.d.ts.map +1 -0
- package/dist/__tests__/endpoint-security.test.d.ts +2 -0
- package/dist/__tests__/endpoint-security.test.d.ts.map +1 -0
- package/dist/__tests__/meilisearch-provider.test.d.ts +2 -0
- package/dist/__tests__/meilisearch-provider.test.d.ts.map +1 -0
- package/dist/__tests__/service-impl.test.d.ts +2 -0
- package/dist/__tests__/service-impl.test.d.ts.map +1 -0
- package/dist/admin/components/index.d.ts +2 -0
- package/dist/admin/components/index.d.ts.map +1 -0
- package/dist/admin/components/search-analytics.d.ts +2 -0
- package/dist/admin/components/search-analytics.d.ts.map +1 -0
- package/dist/admin/endpoints/analytics.d.ts +15 -0
- package/dist/admin/endpoints/analytics.d.ts.map +1 -0
- package/dist/admin/endpoints/bulk-index.d.ts +20 -0
- package/dist/admin/endpoints/bulk-index.d.ts.map +1 -0
- package/dist/admin/endpoints/click-analytics.d.ts +7 -0
- package/dist/admin/endpoints/click-analytics.d.ts.map +1 -0
- package/dist/admin/endpoints/get-settings.d.ts +17 -0
- package/dist/admin/endpoints/get-settings.d.ts.map +1 -0
- package/dist/admin/endpoints/index-manage.d.ts +26 -0
- package/dist/admin/endpoints/index-manage.d.ts.map +1 -0
- package/dist/admin/endpoints/index.d.ts +125 -0
- package/dist/admin/endpoints/index.d.ts.map +1 -0
- package/dist/admin/endpoints/popular.d.ts +10 -0
- package/dist/admin/endpoints/popular.d.ts.map +1 -0
- package/dist/admin/endpoints/synonyms.d.ts +30 -0
- package/dist/admin/endpoints/synonyms.d.ts.map +1 -0
- package/dist/admin/endpoints/zero-results.d.ts +10 -0
- package/dist/admin/endpoints/zero-results.d.ts.map +1 -0
- package/dist/embedding-provider.d.ts +28 -0
- package/dist/embedding-provider.d.ts.map +1 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/meilisearch-provider.d.ts +104 -0
- package/dist/meilisearch-provider.d.ts.map +1 -0
- package/dist/schema.d.ts +133 -0
- package/dist/schema.d.ts.map +1 -0
- package/dist/service-impl.d.ts +6 -0
- package/dist/service-impl.d.ts.map +1 -0
- package/dist/service.d.ts +127 -0
- package/dist/service.d.ts.map +1 -0
- package/dist/store/components/_hooks.d.ts +6 -0
- package/dist/store/components/_hooks.d.ts.map +1 -0
- package/dist/store/components/index.d.ts +10 -0
- package/dist/store/components/index.d.ts.map +1 -0
- package/dist/store/components/search-bar.d.ts +7 -0
- package/dist/store/components/search-bar.d.ts.map +1 -0
- package/dist/store/components/search-page.d.ts +4 -0
- package/dist/store/components/search-page.d.ts.map +1 -0
- package/dist/store/components/search-results.d.ts +9 -0
- package/dist/store/components/search-results.d.ts.map +1 -0
- package/dist/store/endpoints/click.d.ts +14 -0
- package/dist/store/endpoints/click.d.ts.map +1 -0
- package/dist/store/endpoints/index.d.ts +85 -0
- package/dist/store/endpoints/index.d.ts.map +1 -0
- package/dist/store/endpoints/recent.d.ts +15 -0
- package/dist/store/endpoints/recent.d.ts.map +1 -0
- package/dist/store/endpoints/search.d.ts +36 -0
- package/dist/store/endpoints/search.d.ts.map +1 -0
- package/dist/store/endpoints/store-search.d.ts +16 -0
- package/dist/store/endpoints/store-search.d.ts.map +1 -0
- package/dist/store/endpoints/suggest.d.ts +11 -0
- package/dist/store/endpoints/suggest.d.ts.map +1 -0
- package/package.json +3 -3
- package/src/__tests__/controllers.test.ts +1026 -0
- package/src/__tests__/embedding-provider.test.ts +195 -0
- package/src/__tests__/endpoint-security.test.ts +300 -0
- package/src/__tests__/meilisearch-provider.test.ts +400 -0
- package/src/__tests__/service-impl.test.ts +341 -8
- package/src/admin/components/search-analytics.tsx +120 -0
- package/src/admin/endpoints/bulk-index.ts +34 -0
- package/src/admin/endpoints/click-analytics.ts +16 -0
- package/src/admin/endpoints/get-settings.ts +56 -0
- package/src/admin/endpoints/index-manage.ts +4 -1
- package/src/admin/endpoints/index.ts +6 -0
- package/src/admin/endpoints/synonyms.ts +1 -1
- package/src/embedding-provider.ts +99 -0
- package/src/index.ts +60 -4
- package/src/meilisearch-provider.ts +239 -0
- package/src/schema.ts +15 -0
- package/src/service-impl.ts +605 -34
- package/src/service.ts +60 -1
- package/src/store/endpoints/click.ts +21 -0
- package/src/store/endpoints/index.ts +2 -0
- package/src/store/endpoints/recent.ts +1 -1
- package/src/store/endpoints/search.ts +38 -10
- package/src/store/endpoints/store-search.ts +1 -1
- package/src/store/endpoints/suggest.ts +2 -2
- package/vitest.config.ts +2 -0
package/src/service-impl.ts
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
import type { ModuleDataService } from "@86d-app/core";
|
|
2
|
+
import type { EmbeddingProvider } from "./embedding-provider";
|
|
3
|
+
import { cosineSimilarity } from "./embedding-provider";
|
|
2
4
|
import type {
|
|
5
|
+
MeiliSearchDocument,
|
|
6
|
+
MeiliSearchProvider,
|
|
7
|
+
} from "./meilisearch-provider";
|
|
8
|
+
import type {
|
|
9
|
+
SearchClick,
|
|
3
10
|
SearchController,
|
|
11
|
+
SearchFacets,
|
|
12
|
+
SearchHighlight,
|
|
4
13
|
SearchIndexItem,
|
|
5
14
|
SearchQuery,
|
|
6
15
|
SearchResult,
|
|
16
|
+
SearchSortField,
|
|
7
17
|
SearchSynonym,
|
|
8
18
|
} from "./service";
|
|
9
19
|
|
|
@@ -17,15 +27,89 @@ function tokenize(text: string): string[] {
|
|
|
17
27
|
.filter((t) => t.length > 0);
|
|
18
28
|
}
|
|
19
29
|
|
|
30
|
+
/**
|
|
31
|
+
* Levenshtein distance between two strings.
|
|
32
|
+
* Used for fuzzy matching and did-you-mean suggestions.
|
|
33
|
+
*/
|
|
34
|
+
function levenshtein(a: string, b: string): number {
|
|
35
|
+
const m = a.length;
|
|
36
|
+
const n = b.length;
|
|
37
|
+
if (m === 0) return n;
|
|
38
|
+
if (n === 0) return m;
|
|
39
|
+
|
|
40
|
+
// Use single-row optimization for space efficiency
|
|
41
|
+
let prev = new Array<number>(n + 1);
|
|
42
|
+
let curr = new Array<number>(n + 1);
|
|
43
|
+
|
|
44
|
+
for (let j = 0; j <= n; j++) {
|
|
45
|
+
prev[j] = j;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
for (let i = 1; i <= m; i++) {
|
|
49
|
+
curr[0] = i;
|
|
50
|
+
for (let j = 1; j <= n; j++) {
|
|
51
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
52
|
+
curr[j] = Math.min(
|
|
53
|
+
curr[j - 1] + 1, // insertion
|
|
54
|
+
prev[j] + 1, // deletion
|
|
55
|
+
prev[j - 1] + cost, // substitution
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
[prev, curr] = [curr, prev];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return prev[n];
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Returns maximum edit distance allowed for a given word length.
|
|
66
|
+
* Shorter words get less tolerance to avoid noisy matches.
|
|
67
|
+
*/
|
|
68
|
+
function maxEditDistance(wordLength: number): number {
|
|
69
|
+
if (wordLength <= 3) return 0;
|
|
70
|
+
if (wordLength <= 5) return 1;
|
|
71
|
+
return 2;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Check if token fuzzy-matches the target within edit distance tolerance.
|
|
76
|
+
*/
|
|
77
|
+
function fuzzyMatch(token: string, target: string): boolean {
|
|
78
|
+
const maxDist = maxEditDistance(token.length);
|
|
79
|
+
if (maxDist === 0) return token === target;
|
|
80
|
+
return levenshtein(token, target) <= maxDist;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Highlight matching segments in text by wrapping in <mark> tags.
|
|
85
|
+
*/
|
|
86
|
+
function highlightText(
|
|
87
|
+
text: string,
|
|
88
|
+
queryTokens: string[],
|
|
89
|
+
expandedTerms: Set<string>,
|
|
90
|
+
): string {
|
|
91
|
+
if (!text || queryTokens.length === 0) return text;
|
|
92
|
+
const allTerms = [...queryTokens, ...expandedTerms];
|
|
93
|
+
let result = text;
|
|
94
|
+
for (const term of allTerms) {
|
|
95
|
+
const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
96
|
+
const regex = new RegExp(`(${escaped})`, "gi");
|
|
97
|
+
result = result.replace(regex, "<mark>$1</mark>");
|
|
98
|
+
}
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
|
|
20
102
|
function scoreMatch(
|
|
21
103
|
item: SearchIndexItem,
|
|
22
104
|
queryTokens: string[],
|
|
23
105
|
expandedTerms: Set<string>,
|
|
106
|
+
fuzzy: boolean,
|
|
24
107
|
): number {
|
|
25
108
|
let score = 0;
|
|
26
109
|
const titleLower = normalize(item.title);
|
|
27
110
|
const bodyLower = item.body ? normalize(item.body) : "";
|
|
28
111
|
const tagLower = item.tags.map((t) => normalize(t));
|
|
112
|
+
const titleTokens = tokenize(item.title);
|
|
29
113
|
|
|
30
114
|
for (const token of queryTokens) {
|
|
31
115
|
const allTerms = [token, ...expandedTerms];
|
|
@@ -53,17 +137,233 @@ function scoreMatch(
|
|
|
53
137
|
}
|
|
54
138
|
}
|
|
55
139
|
}
|
|
140
|
+
|
|
141
|
+
// Fuzzy matching on title tokens (lower weight than exact)
|
|
142
|
+
if (fuzzy) {
|
|
143
|
+
for (const titleToken of titleTokens) {
|
|
144
|
+
const titleTokenNorm = normalize(titleToken);
|
|
145
|
+
if (!titleLower.includes(token) && fuzzyMatch(token, titleTokenNorm)) {
|
|
146
|
+
score += 15;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// Fuzzy on tags
|
|
150
|
+
for (const tag of tagLower) {
|
|
151
|
+
const tagTokens = tokenize(tag);
|
|
152
|
+
for (const tagToken of tagTokens) {
|
|
153
|
+
if (!tag.includes(token) && fuzzyMatch(token, tagToken)) {
|
|
154
|
+
score += 8;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
// Fuzzy on body tokens
|
|
159
|
+
if (bodyLower && !bodyLower.includes(token)) {
|
|
160
|
+
const bodyTokens = tokenize(bodyLower);
|
|
161
|
+
for (const bodyToken of bodyTokens) {
|
|
162
|
+
if (fuzzyMatch(token, bodyToken)) {
|
|
163
|
+
score += 5;
|
|
164
|
+
break; // only count once per token per body
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
56
169
|
}
|
|
57
170
|
|
|
58
171
|
return score;
|
|
59
172
|
}
|
|
60
173
|
|
|
174
|
+
/**
|
|
175
|
+
* Convert MeiliSearch facetDistribution to our SearchFacets shape.
|
|
176
|
+
*/
|
|
177
|
+
function meiliResultToFacets(
|
|
178
|
+
distribution?: Record<string, Record<string, number>>,
|
|
179
|
+
): SearchFacets {
|
|
180
|
+
if (!distribution) return { entityTypes: [], tags: [] };
|
|
181
|
+
|
|
182
|
+
const entityTypes = Object.entries(distribution.entityType ?? {})
|
|
183
|
+
.map(([type, count]) => ({ type, count }))
|
|
184
|
+
.sort((a, b) => b.count - a.count);
|
|
185
|
+
|
|
186
|
+
const tags = Object.entries(distribution.tags ?? {})
|
|
187
|
+
.map(([tag, count]) => ({ tag, count }))
|
|
188
|
+
.sort((a, b) => b.count - a.count)
|
|
189
|
+
.slice(0, 20);
|
|
190
|
+
|
|
191
|
+
return { entityTypes, tags };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function computeFacets(items: SearchResult[]): SearchFacets {
|
|
195
|
+
const typeCounts = new Map<string, number>();
|
|
196
|
+
const tagCounts = new Map<string, number>();
|
|
197
|
+
|
|
198
|
+
for (const { item } of items) {
|
|
199
|
+
typeCounts.set(item.entityType, (typeCounts.get(item.entityType) ?? 0) + 1);
|
|
200
|
+
for (const tag of item.tags) {
|
|
201
|
+
const norm = normalize(tag);
|
|
202
|
+
tagCounts.set(norm, (tagCounts.get(norm) ?? 0) + 1);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
entityTypes: Array.from(typeCounts.entries())
|
|
208
|
+
.map(([type, count]) => ({ type, count }))
|
|
209
|
+
.sort((a, b) => b.count - a.count),
|
|
210
|
+
tags: Array.from(tagCounts.entries())
|
|
211
|
+
.map(([tag, count]) => ({ tag, count }))
|
|
212
|
+
.sort((a, b) => b.count - a.count)
|
|
213
|
+
.slice(0, 20),
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function sortResults(
|
|
218
|
+
results: SearchResult[],
|
|
219
|
+
sort: SearchSortField,
|
|
220
|
+
): SearchResult[] {
|
|
221
|
+
const sorted = [...results];
|
|
222
|
+
switch (sort) {
|
|
223
|
+
case "newest":
|
|
224
|
+
sorted.sort(
|
|
225
|
+
(a, b) =>
|
|
226
|
+
new Date(b.item.indexedAt).getTime() -
|
|
227
|
+
new Date(a.item.indexedAt).getTime(),
|
|
228
|
+
);
|
|
229
|
+
break;
|
|
230
|
+
case "oldest":
|
|
231
|
+
sorted.sort(
|
|
232
|
+
(a, b) =>
|
|
233
|
+
new Date(a.item.indexedAt).getTime() -
|
|
234
|
+
new Date(b.item.indexedAt).getTime(),
|
|
235
|
+
);
|
|
236
|
+
break;
|
|
237
|
+
case "title_asc":
|
|
238
|
+
sorted.sort((a, b) => a.item.title.localeCompare(b.item.title));
|
|
239
|
+
break;
|
|
240
|
+
case "title_desc":
|
|
241
|
+
sorted.sort((a, b) => b.item.title.localeCompare(a.item.title));
|
|
242
|
+
break;
|
|
243
|
+
default:
|
|
244
|
+
sorted.sort((a, b) => b.score - a.score);
|
|
245
|
+
break;
|
|
246
|
+
}
|
|
247
|
+
return sorted;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Find the closest known term to the query for did-you-mean suggestions.
|
|
252
|
+
* Checks against indexed titles and popular search terms.
|
|
253
|
+
*/
|
|
254
|
+
function findDidYouMean(
|
|
255
|
+
queryTokens: string[],
|
|
256
|
+
indexedTitles: string[],
|
|
257
|
+
popularTerms: string[],
|
|
258
|
+
hasResults: boolean,
|
|
259
|
+
): string | undefined {
|
|
260
|
+
// Only suggest corrections when results are few or none
|
|
261
|
+
if (hasResults) return undefined;
|
|
262
|
+
|
|
263
|
+
const candidates = new Set<string>();
|
|
264
|
+
for (const title of indexedTitles) {
|
|
265
|
+
for (const token of tokenize(title)) {
|
|
266
|
+
candidates.add(token);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
for (const term of popularTerms) {
|
|
270
|
+
for (const token of tokenize(term)) {
|
|
271
|
+
candidates.add(token);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
const corrections: string[] = [];
|
|
276
|
+
for (const token of queryTokens) {
|
|
277
|
+
let bestMatch = token;
|
|
278
|
+
let bestDist = Number.POSITIVE_INFINITY;
|
|
279
|
+
|
|
280
|
+
for (const candidate of candidates) {
|
|
281
|
+
if (candidate === token) {
|
|
282
|
+
bestMatch = token;
|
|
283
|
+
bestDist = 0;
|
|
284
|
+
break;
|
|
285
|
+
}
|
|
286
|
+
const dist = levenshtein(token, candidate);
|
|
287
|
+
const maxDist = maxEditDistance(token.length);
|
|
288
|
+
if (maxDist > 0 && dist <= maxDist && dist < bestDist) {
|
|
289
|
+
bestDist = dist;
|
|
290
|
+
bestMatch = candidate;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
corrections.push(bestMatch);
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
const suggestion = corrections.join(" ");
|
|
298
|
+
const original = queryTokens.join(" ");
|
|
299
|
+
if (suggestion === original) return undefined;
|
|
300
|
+
return suggestion;
|
|
301
|
+
}
|
|
302
|
+
|
|
61
303
|
export function createSearchController(
|
|
62
304
|
data: ModuleDataService,
|
|
305
|
+
embeddingProvider?: EmbeddingProvider,
|
|
306
|
+
meiliProvider?: MeiliSearchProvider,
|
|
63
307
|
): SearchController {
|
|
308
|
+
/**
|
|
309
|
+
* Generate and store an embedding for an indexed item.
|
|
310
|
+
* Combines title + body + tags into a single text for embedding.
|
|
311
|
+
* Failures are silent — semantic search degrades gracefully.
|
|
312
|
+
*/
|
|
313
|
+
async function embedItem(item: SearchIndexItem): Promise<SearchIndexItem> {
|
|
314
|
+
if (!embeddingProvider) return item;
|
|
315
|
+
try {
|
|
316
|
+
const parts = [item.title];
|
|
317
|
+
if (item.body) parts.push(item.body);
|
|
318
|
+
if (item.tags.length > 0) parts.push(item.tags.join(", "));
|
|
319
|
+
const text = parts.join(". ");
|
|
320
|
+
const embedding = await embeddingProvider.generateEmbedding(text);
|
|
321
|
+
if (embedding) {
|
|
322
|
+
item.metadata = { ...item.metadata, __embedding: embedding };
|
|
323
|
+
}
|
|
324
|
+
} catch {
|
|
325
|
+
// Embedding is best-effort — lexical search still works
|
|
326
|
+
}
|
|
327
|
+
return item;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Convert a SearchIndexItem to a MeiliSearch document.
|
|
332
|
+
* Strips the __embedding metadata to avoid bloating the search index.
|
|
333
|
+
*/
|
|
334
|
+
function toMeiliDocument(item: SearchIndexItem): MeiliSearchDocument {
|
|
335
|
+
return {
|
|
336
|
+
id: item.id,
|
|
337
|
+
entityType: item.entityType,
|
|
338
|
+
entityId: item.entityId,
|
|
339
|
+
title: item.title,
|
|
340
|
+
body: item.body,
|
|
341
|
+
tags: item.tags,
|
|
342
|
+
url: item.url,
|
|
343
|
+
image: item.image,
|
|
344
|
+
indexedAt: item.indexedAt.toISOString(),
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Sync a document to MeiliSearch. Fire-and-forget — failures are silent
|
|
350
|
+
* so local search still works as a fallback.
|
|
351
|
+
*/
|
|
352
|
+
function syncToMeili(items: SearchIndexItem[]): void {
|
|
353
|
+
if (!meiliProvider || items.length === 0) return;
|
|
354
|
+
void meiliProvider.addDocuments(items.map(toMeiliDocument)).catch(() => {});
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Remove a document from MeiliSearch by ID. Fire-and-forget.
|
|
359
|
+
*/
|
|
360
|
+
function removeFromMeili(documentId: string): void {
|
|
361
|
+
if (!meiliProvider) return;
|
|
362
|
+
void meiliProvider.deleteDocument(documentId).catch(() => {});
|
|
363
|
+
}
|
|
364
|
+
|
|
64
365
|
return {
|
|
65
366
|
async indexItem(params) {
|
|
66
|
-
// Check if already indexed — update if so
|
|
67
367
|
const existing = await data.findMany("searchIndex", {
|
|
68
368
|
where: {
|
|
69
369
|
entityType: params.entityType,
|
|
@@ -75,7 +375,7 @@ export function createSearchController(
|
|
|
75
375
|
|
|
76
376
|
const id =
|
|
77
377
|
existingItems.length > 0 ? existingItems[0].id : crypto.randomUUID();
|
|
78
|
-
|
|
378
|
+
let item: SearchIndexItem = {
|
|
79
379
|
id,
|
|
80
380
|
entityType: params.entityType,
|
|
81
381
|
entityId: params.entityId,
|
|
@@ -87,11 +387,86 @@ export function createSearchController(
|
|
|
87
387
|
metadata: params.metadata ?? {},
|
|
88
388
|
indexedAt: new Date(),
|
|
89
389
|
};
|
|
90
|
-
|
|
91
|
-
await data.upsert(
|
|
390
|
+
item = await embedItem(item);
|
|
391
|
+
await data.upsert(
|
|
392
|
+
"searchIndex",
|
|
393
|
+
id,
|
|
394
|
+
item as unknown as Record<string, string>,
|
|
395
|
+
);
|
|
396
|
+
syncToMeili([item]);
|
|
92
397
|
return item;
|
|
93
398
|
},
|
|
94
399
|
|
|
400
|
+
async bulkIndex(items) {
|
|
401
|
+
let indexed = 0;
|
|
402
|
+
let errors = 0;
|
|
403
|
+
|
|
404
|
+
// Batch generate embeddings for all items at once
|
|
405
|
+
if (embeddingProvider && items.length > 0) {
|
|
406
|
+
const texts = items.map((p) => {
|
|
407
|
+
const parts = [p.title];
|
|
408
|
+
if (p.body) parts.push(p.body);
|
|
409
|
+
if (p.tags && p.tags.length > 0) parts.push(p.tags.join(", "));
|
|
410
|
+
return parts.join(". ");
|
|
411
|
+
});
|
|
412
|
+
const embeddings = await embeddingProvider.generateEmbeddings(texts);
|
|
413
|
+
for (let i = 0; i < items.length; i++) {
|
|
414
|
+
if (embeddings[i]) {
|
|
415
|
+
items[i] = {
|
|
416
|
+
...items[i],
|
|
417
|
+
metadata: {
|
|
418
|
+
...items[i].metadata,
|
|
419
|
+
__embedding: embeddings[i],
|
|
420
|
+
},
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
const indexedItems: SearchIndexItem[] = [];
|
|
427
|
+
for (const params of items) {
|
|
428
|
+
try {
|
|
429
|
+
const existing = await data.findMany("searchIndex", {
|
|
430
|
+
where: {
|
|
431
|
+
entityType: params.entityType,
|
|
432
|
+
entityId: params.entityId,
|
|
433
|
+
},
|
|
434
|
+
take: 1,
|
|
435
|
+
});
|
|
436
|
+
const existingItems = existing as unknown as SearchIndexItem[];
|
|
437
|
+
const id =
|
|
438
|
+
existingItems.length > 0
|
|
439
|
+
? existingItems[0].id
|
|
440
|
+
: crypto.randomUUID();
|
|
441
|
+
|
|
442
|
+
const item: SearchIndexItem = {
|
|
443
|
+
id,
|
|
444
|
+
entityType: params.entityType,
|
|
445
|
+
entityId: params.entityId,
|
|
446
|
+
title: params.title,
|
|
447
|
+
body: params.body,
|
|
448
|
+
tags: params.tags ?? [],
|
|
449
|
+
url: params.url,
|
|
450
|
+
image: params.image,
|
|
451
|
+
metadata: params.metadata ?? {},
|
|
452
|
+
indexedAt: new Date(),
|
|
453
|
+
};
|
|
454
|
+
await data.upsert(
|
|
455
|
+
"searchIndex",
|
|
456
|
+
id,
|
|
457
|
+
item as unknown as Record<string, string>,
|
|
458
|
+
);
|
|
459
|
+
indexedItems.push(item);
|
|
460
|
+
indexed++;
|
|
461
|
+
} catch {
|
|
462
|
+
errors++;
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
syncToMeili(indexedItems);
|
|
467
|
+
return { indexed, errors };
|
|
468
|
+
},
|
|
469
|
+
|
|
95
470
|
async removeFromIndex(entityType, entityId) {
|
|
96
471
|
const items = await data.findMany("searchIndex", {
|
|
97
472
|
where: { entityType, entityId },
|
|
@@ -100,6 +475,7 @@ export function createSearchController(
|
|
|
100
475
|
if (found.length === 0) return false;
|
|
101
476
|
for (const item of found) {
|
|
102
477
|
await data.delete("searchIndex", item.id);
|
|
478
|
+
removeFromMeili(item.id);
|
|
103
479
|
}
|
|
104
480
|
return true;
|
|
105
481
|
},
|
|
@@ -107,12 +483,89 @@ export function createSearchController(
|
|
|
107
483
|
async search(query, options) {
|
|
108
484
|
const limit = options?.limit ?? 20;
|
|
109
485
|
const skip = options?.skip ?? 0;
|
|
486
|
+
const sort = options?.sort ?? "relevance";
|
|
110
487
|
const queryTokens = tokenize(query);
|
|
111
488
|
|
|
112
489
|
if (queryTokens.length === 0) {
|
|
113
|
-
return {
|
|
490
|
+
return {
|
|
491
|
+
results: [],
|
|
492
|
+
total: 0,
|
|
493
|
+
facets: { entityTypes: [], tags: [] },
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
// ── MeiliSearch path: delegate search to dedicated engine ──
|
|
498
|
+
if (meiliProvider) {
|
|
499
|
+
try {
|
|
500
|
+
const meiliSort: string[] | undefined =
|
|
501
|
+
sort === "newest"
|
|
502
|
+
? ["indexedAt:desc"]
|
|
503
|
+
: sort === "oldest"
|
|
504
|
+
? ["indexedAt:asc"]
|
|
505
|
+
: sort === "title_asc"
|
|
506
|
+
? ["title:asc"]
|
|
507
|
+
: sort === "title_desc"
|
|
508
|
+
? ["title:desc"]
|
|
509
|
+
: undefined;
|
|
510
|
+
|
|
511
|
+
const filters: string[] = [];
|
|
512
|
+
if (options?.entityType) {
|
|
513
|
+
filters.push(`entityType = "${options.entityType}"`);
|
|
514
|
+
}
|
|
515
|
+
if (options?.tags && options.tags.length > 0) {
|
|
516
|
+
const tagFilters = options.tags
|
|
517
|
+
.map((t) => `tags = "${t}"`)
|
|
518
|
+
.join(" OR ");
|
|
519
|
+
filters.push(`(${tagFilters})`);
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
const meiliResult = await meiliProvider.search(query, {
|
|
523
|
+
limit,
|
|
524
|
+
offset: skip,
|
|
525
|
+
filter: filters.length > 0 ? filters.join(" AND ") : undefined,
|
|
526
|
+
sort: meiliSort,
|
|
527
|
+
facets: ["entityType", "tags"],
|
|
528
|
+
attributesToHighlight: ["title", "body"],
|
|
529
|
+
highlightPreTag: "<mark>",
|
|
530
|
+
highlightPostTag: "</mark>",
|
|
531
|
+
showRankingScore: true,
|
|
532
|
+
});
|
|
533
|
+
|
|
534
|
+
const results: SearchResult[] = meiliResult.hits.map((hit) => ({
|
|
535
|
+
item: {
|
|
536
|
+
id: hit.id,
|
|
537
|
+
entityType: hit.entityType,
|
|
538
|
+
entityId: hit.entityId,
|
|
539
|
+
title: hit.title,
|
|
540
|
+
body: hit.body,
|
|
541
|
+
tags: hit.tags ?? [],
|
|
542
|
+
url: hit.url,
|
|
543
|
+
image: hit.image,
|
|
544
|
+
metadata: {},
|
|
545
|
+
indexedAt: new Date(hit.indexedAt),
|
|
546
|
+
},
|
|
547
|
+
score: (hit._rankingScore ?? 0.5) * 100,
|
|
548
|
+
highlights: {
|
|
549
|
+
title: hit._formatted?.title,
|
|
550
|
+
body: hit._formatted?.body,
|
|
551
|
+
},
|
|
552
|
+
}));
|
|
553
|
+
|
|
554
|
+
const facets = meiliResultToFacets(meiliResult.facetDistribution);
|
|
555
|
+
|
|
556
|
+
return {
|
|
557
|
+
results,
|
|
558
|
+
total: meiliResult.estimatedTotalHits ?? results.length,
|
|
559
|
+
facets,
|
|
560
|
+
};
|
|
561
|
+
} catch {
|
|
562
|
+
// MeiliSearch unavailable — fall through to local search
|
|
563
|
+
}
|
|
114
564
|
}
|
|
115
565
|
|
|
566
|
+
// ── Local search path: lexical + semantic scoring ──
|
|
567
|
+
const fuzzy = options?.fuzzy ?? true;
|
|
568
|
+
|
|
116
569
|
// Load synonyms for query expansion
|
|
117
570
|
const allSynonyms = (await data.findMany(
|
|
118
571
|
"searchSynonym",
|
|
@@ -127,7 +580,6 @@ export function createSearchController(
|
|
|
127
580
|
expandedTerms.add(normalize(s));
|
|
128
581
|
}
|
|
129
582
|
}
|
|
130
|
-
// Also reverse: if a query token matches a synonym, expand to the term
|
|
131
583
|
for (const s of syn.synonyms) {
|
|
132
584
|
if (normalize(s) === token) {
|
|
133
585
|
expandedTerms.add(synTermNorm);
|
|
@@ -136,8 +588,7 @@ export function createSearchController(
|
|
|
136
588
|
}
|
|
137
589
|
}
|
|
138
590
|
|
|
139
|
-
|
|
140
|
-
const where: Record<string, any> = {};
|
|
591
|
+
const where: Record<string, string> = {};
|
|
141
592
|
if (options?.entityType) {
|
|
142
593
|
where.entityType = options.entityType;
|
|
143
594
|
}
|
|
@@ -146,33 +597,99 @@ export function createSearchController(
|
|
|
146
597
|
...(Object.keys(where).length > 0 ? { where } : {}),
|
|
147
598
|
})) as unknown as SearchIndexItem[];
|
|
148
599
|
|
|
149
|
-
//
|
|
600
|
+
// Generate query embedding for semantic search (if provider is available)
|
|
601
|
+
let queryEmbedding: number[] | null = null;
|
|
602
|
+
if (embeddingProvider) {
|
|
603
|
+
try {
|
|
604
|
+
queryEmbedding = await embeddingProvider.generateEmbedding(query);
|
|
605
|
+
} catch {
|
|
606
|
+
// Semantic search is best-effort
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
// Score and rank — hybrid lexical + semantic scoring
|
|
150
611
|
const scored: SearchResult[] = [];
|
|
151
612
|
for (const item of allItems) {
|
|
152
|
-
const
|
|
153
|
-
|
|
154
|
-
|
|
613
|
+
const lexicalScore = scoreMatch(
|
|
614
|
+
item,
|
|
615
|
+
queryTokens,
|
|
616
|
+
expandedTerms,
|
|
617
|
+
fuzzy,
|
|
618
|
+
);
|
|
619
|
+
|
|
620
|
+
// Compute semantic score if embeddings are available
|
|
621
|
+
let semanticScore = 0;
|
|
622
|
+
if (queryEmbedding) {
|
|
623
|
+
const itemEmbedding = item.metadata?.__embedding as
|
|
624
|
+
| number[]
|
|
625
|
+
| undefined;
|
|
626
|
+
if (Array.isArray(itemEmbedding) && itemEmbedding.length > 0) {
|
|
627
|
+
semanticScore = cosineSimilarity(queryEmbedding, itemEmbedding);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Include items with lexical match OR strong semantic similarity
|
|
632
|
+
const semanticBoost = semanticScore * 80;
|
|
633
|
+
const score = lexicalScore + semanticBoost;
|
|
634
|
+
if (lexicalScore > 0 || semanticScore > 0.5) {
|
|
635
|
+
const highlights: SearchHighlight = {
|
|
636
|
+
title: highlightText(item.title, queryTokens, expandedTerms),
|
|
637
|
+
body: item.body
|
|
638
|
+
? highlightText(item.body, queryTokens, expandedTerms)
|
|
639
|
+
: undefined,
|
|
640
|
+
};
|
|
641
|
+
scored.push({ item, score, highlights });
|
|
155
642
|
}
|
|
156
643
|
}
|
|
157
644
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
645
|
+
// Filter by tags if specified
|
|
646
|
+
let filtered = scored;
|
|
647
|
+
if (options?.tags && options.tags.length > 0) {
|
|
648
|
+
const filterTags = new Set(options.tags.map((t) => normalize(t)));
|
|
649
|
+
filtered = scored.filter((r) =>
|
|
650
|
+
r.item.tags.some((tag) => filterTags.has(normalize(tag))),
|
|
651
|
+
);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
// Compute facets before pagination
|
|
655
|
+
const facets = computeFacets(filtered);
|
|
656
|
+
|
|
657
|
+
// Sort
|
|
658
|
+
const sorted = sortResults(filtered, sort);
|
|
659
|
+
const total = sorted.length;
|
|
660
|
+
const results = sorted.slice(skip, skip + limit);
|
|
661
|
+
|
|
662
|
+
// Did-you-mean suggestion for zero/low results
|
|
663
|
+
let didYouMean: string | undefined;
|
|
664
|
+
if (total === 0) {
|
|
665
|
+
const allTitles = allItems.map((i) => i.title);
|
|
666
|
+
const allQueries = (await data.findMany(
|
|
667
|
+
"searchQuery",
|
|
668
|
+
{},
|
|
669
|
+
)) as unknown as SearchQuery[];
|
|
670
|
+
const popularTermsList = allQueries
|
|
671
|
+
.filter((q) => q.resultCount > 0)
|
|
672
|
+
.map((q) => q.term);
|
|
673
|
+
didYouMean = findDidYouMean(
|
|
674
|
+
queryTokens,
|
|
675
|
+
allTitles,
|
|
676
|
+
popularTermsList,
|
|
677
|
+
false,
|
|
678
|
+
);
|
|
679
|
+
}
|
|
161
680
|
|
|
162
|
-
return { results, total };
|
|
681
|
+
return { results, total, facets, didYouMean };
|
|
163
682
|
},
|
|
164
683
|
|
|
165
684
|
async suggest(prefix, limit = 10) {
|
|
166
685
|
const prefixNorm = normalize(prefix);
|
|
167
686
|
if (prefixNorm.length === 0) return [];
|
|
168
687
|
|
|
169
|
-
// Combine popular terms + index titles
|
|
170
688
|
const allQueries = (await data.findMany(
|
|
171
689
|
"searchQuery",
|
|
172
690
|
{},
|
|
173
691
|
)) as unknown as SearchQuery[];
|
|
174
692
|
|
|
175
|
-
// Count query frequency
|
|
176
693
|
const termCounts = new Map<string, number>();
|
|
177
694
|
for (const q of allQueries) {
|
|
178
695
|
if (q.resultCount > 0 && q.normalizedTerm.startsWith(prefixNorm)) {
|
|
@@ -180,7 +697,6 @@ export function createSearchController(
|
|
|
180
697
|
}
|
|
181
698
|
}
|
|
182
699
|
|
|
183
|
-
// Also match index titles
|
|
184
700
|
const allItems = (await data.findMany(
|
|
185
701
|
"searchIndex",
|
|
186
702
|
{},
|
|
@@ -192,7 +708,6 @@ export function createSearchController(
|
|
|
192
708
|
}
|
|
193
709
|
}
|
|
194
710
|
|
|
195
|
-
// Merge: popular terms first, then title suggestions
|
|
196
711
|
const popularTerms = Array.from(termCounts.entries())
|
|
197
712
|
.sort((a, b) => b[1] - a[1])
|
|
198
713
|
.map(([term]) => term);
|
|
@@ -227,17 +742,38 @@ export function createSearchController(
|
|
|
227
742
|
sessionId,
|
|
228
743
|
searchedAt: new Date(),
|
|
229
744
|
};
|
|
230
|
-
|
|
231
|
-
|
|
745
|
+
await data.upsert(
|
|
746
|
+
"searchQuery",
|
|
747
|
+
id,
|
|
748
|
+
query as unknown as Record<string, string>,
|
|
749
|
+
);
|
|
232
750
|
return query;
|
|
233
751
|
},
|
|
234
752
|
|
|
753
|
+
async recordClick(params) {
|
|
754
|
+
const id = crypto.randomUUID();
|
|
755
|
+
const click: SearchClick = {
|
|
756
|
+
id,
|
|
757
|
+
queryId: params.queryId,
|
|
758
|
+
term: params.term,
|
|
759
|
+
entityType: params.entityType,
|
|
760
|
+
entityId: params.entityId,
|
|
761
|
+
position: params.position,
|
|
762
|
+
clickedAt: new Date(),
|
|
763
|
+
};
|
|
764
|
+
await data.upsert(
|
|
765
|
+
"searchClick",
|
|
766
|
+
id,
|
|
767
|
+
click as unknown as Record<string, string>,
|
|
768
|
+
);
|
|
769
|
+
return click;
|
|
770
|
+
},
|
|
771
|
+
|
|
235
772
|
async getRecentQueries(sessionId, limit = 10) {
|
|
236
773
|
const all = (await data.findMany("searchQuery", {
|
|
237
774
|
where: { sessionId },
|
|
238
775
|
})) as unknown as SearchQuery[];
|
|
239
776
|
|
|
240
|
-
// Sort by date desc, deduplicate by normalized term
|
|
241
777
|
all.sort(
|
|
242
778
|
(a, b) =>
|
|
243
779
|
new Date(b.searchedAt).getTime() - new Date(a.searchedAt).getTime(),
|
|
@@ -322,36 +858,68 @@ export function createSearchController(
|
|
|
322
858
|
},
|
|
323
859
|
|
|
324
860
|
async getAnalytics() {
|
|
325
|
-
const
|
|
861
|
+
const allQueries = (await data.findMany(
|
|
326
862
|
"searchQuery",
|
|
327
863
|
{},
|
|
328
864
|
)) as unknown as SearchQuery[];
|
|
329
865
|
|
|
330
|
-
|
|
866
|
+
const allClicks = (await data.findMany(
|
|
867
|
+
"searchClick",
|
|
868
|
+
{},
|
|
869
|
+
)) as unknown as SearchClick[];
|
|
870
|
+
|
|
871
|
+
if (allQueries.length === 0) {
|
|
331
872
|
return {
|
|
332
873
|
totalQueries: 0,
|
|
333
874
|
uniqueTerms: 0,
|
|
334
875
|
avgResultCount: 0,
|
|
335
876
|
zeroResultCount: 0,
|
|
336
877
|
zeroResultRate: 0,
|
|
878
|
+
clickThroughRate: 0,
|
|
879
|
+
avgClickPosition: 0,
|
|
337
880
|
};
|
|
338
881
|
}
|
|
339
882
|
|
|
340
|
-
const uniqueTerms = new Set(
|
|
341
|
-
const totalResults =
|
|
342
|
-
|
|
883
|
+
const uniqueTerms = new Set(allQueries.map((q) => q.normalizedTerm));
|
|
884
|
+
const totalResults = allQueries.reduce(
|
|
885
|
+
(sum, q) => sum + q.resultCount,
|
|
886
|
+
0,
|
|
887
|
+
);
|
|
888
|
+
const zeroResultCount = allQueries.filter(
|
|
889
|
+
(q) => q.resultCount === 0,
|
|
890
|
+
).length;
|
|
891
|
+
|
|
892
|
+
// CTR: queries that led to at least one click
|
|
893
|
+
const clickedQueryIds = new Set(allClicks.map((c) => c.queryId));
|
|
894
|
+
const queriesWithResults = allQueries.filter(
|
|
895
|
+
(q) => q.resultCount > 0,
|
|
896
|
+
).length;
|
|
897
|
+
const clickThroughRate =
|
|
898
|
+
queriesWithResults > 0
|
|
899
|
+
? Math.round((clickedQueryIds.size / queriesWithResults) * 100)
|
|
900
|
+
: 0;
|
|
901
|
+
|
|
902
|
+
const avgClickPosition =
|
|
903
|
+
allClicks.length > 0
|
|
904
|
+
? Math.round(
|
|
905
|
+
(allClicks.reduce((sum, c) => sum + c.position, 0) /
|
|
906
|
+
allClicks.length) *
|
|
907
|
+
10,
|
|
908
|
+
) / 10
|
|
909
|
+
: 0;
|
|
343
910
|
|
|
344
911
|
return {
|
|
345
|
-
totalQueries:
|
|
912
|
+
totalQueries: allQueries.length,
|
|
346
913
|
uniqueTerms: uniqueTerms.size,
|
|
347
|
-
avgResultCount: Math.round(totalResults /
|
|
914
|
+
avgResultCount: Math.round(totalResults / allQueries.length),
|
|
348
915
|
zeroResultCount,
|
|
349
|
-
zeroResultRate: Math.round((zeroResultCount /
|
|
916
|
+
zeroResultRate: Math.round((zeroResultCount / allQueries.length) * 100),
|
|
917
|
+
clickThroughRate,
|
|
918
|
+
avgClickPosition,
|
|
350
919
|
};
|
|
351
920
|
},
|
|
352
921
|
|
|
353
922
|
async addSynonym(term, synonyms) {
|
|
354
|
-
// Check if synonym for this term already exists
|
|
355
923
|
const existing = await data.findMany("searchSynonym", {
|
|
356
924
|
where: { term: normalize(term) },
|
|
357
925
|
take: 1,
|
|
@@ -367,8 +935,11 @@ export function createSearchController(
|
|
|
367
935
|
createdAt:
|
|
368
936
|
existingItems.length > 0 ? existingItems[0].createdAt : new Date(),
|
|
369
937
|
};
|
|
370
|
-
|
|
371
|
-
|
|
938
|
+
await data.upsert(
|
|
939
|
+
"searchSynonym",
|
|
940
|
+
id,
|
|
941
|
+
synonym as unknown as Record<string, string>,
|
|
942
|
+
);
|
|
372
943
|
return synonym;
|
|
373
944
|
},
|
|
374
945
|
|