@86d-app/search 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/modules/search/src/__tests__/admin-settings.test.js +262 -0
- package/dist/modules/search/src/__tests__/controllers.test.js +853 -0
- package/dist/modules/search/src/__tests__/embedding-provider.test.js +150 -0
- package/dist/modules/search/src/__tests__/endpoint-security.test.js +250 -0
- package/dist/modules/search/src/__tests__/meilisearch-provider.test.js +318 -0
- package/dist/modules/search/src/__tests__/service-impl.test.js +703 -0
- package/dist/modules/search/src/__tests__/store-endpoints.test.js +295 -0
- package/dist/{admin/components/index.d.ts → modules/search/src/admin/components/index.jsx} +0 -1
- package/dist/modules/search/src/admin/components/search-analytics.jsx +230 -0
- package/dist/modules/search/src/admin/endpoints/analytics.js +9 -0
- package/dist/modules/search/src/admin/endpoints/bulk-index.js +26 -0
- package/dist/modules/search/src/admin/endpoints/click-analytics.js +9 -0
- package/dist/modules/search/src/admin/endpoints/get-settings.js +97 -0
- package/dist/modules/search/src/admin/endpoints/index-manage.js +32 -0
- package/dist/modules/search/src/admin/endpoints/index.js +21 -0
- package/dist/modules/search/src/admin/endpoints/popular.js +11 -0
- package/dist/modules/search/src/admin/endpoints/synonyms.js +30 -0
- package/dist/modules/search/src/admin/endpoints/zero-results.js +11 -0
- package/dist/modules/search/src/embedding-provider.js +77 -0
- package/dist/modules/search/src/index.js +75 -0
- package/dist/modules/search/src/meilisearch-provider.js +138 -0
- package/dist/modules/search/src/schema.js +61 -0
- package/dist/modules/search/src/service-impl.js +770 -0
- package/dist/modules/search/src/service.js +1 -0
- package/dist/modules/search/src/store/components/_hooks.js +10 -0
- package/dist/modules/search/src/store/components/index.jsx +9 -0
- package/dist/modules/search/src/store/components/search-bar.jsx +91 -0
- package/dist/modules/search/src/store/components/search-page.jsx +17 -0
- package/dist/modules/search/src/store/components/search-results.jsx +51 -0
- package/dist/modules/search/src/store/endpoints/click.js +15 -0
- package/dist/modules/search/src/store/endpoints/index.js +12 -0
- package/dist/modules/search/src/store/endpoints/recent.js +18 -0
- package/dist/modules/search/src/store/endpoints/search.js +57 -0
- package/dist/modules/search/src/store/endpoints/store-search.js +33 -0
- package/dist/modules/search/src/store/endpoints/suggest.js +12 -0
- package/package.json +1 -1
- package/src/__tests__/admin-settings.test.ts +367 -0
- package/src/__tests__/store-endpoints.test.ts +392 -0
- package/src/admin/endpoints/get-settings.ts +77 -0
- package/dist/__tests__/controllers.test.d.ts +0 -2
- package/dist/__tests__/controllers.test.d.ts.map +0 -1
- package/dist/__tests__/embedding-provider.test.d.ts +0 -2
- package/dist/__tests__/embedding-provider.test.d.ts.map +0 -1
- package/dist/__tests__/endpoint-security.test.d.ts +0 -2
- package/dist/__tests__/endpoint-security.test.d.ts.map +0 -1
- package/dist/__tests__/meilisearch-provider.test.d.ts +0 -2
- package/dist/__tests__/meilisearch-provider.test.d.ts.map +0 -1
- package/dist/__tests__/service-impl.test.d.ts +0 -2
- package/dist/__tests__/service-impl.test.d.ts.map +0 -1
- package/dist/admin/components/index.d.ts.map +0 -1
- package/dist/admin/components/search-analytics.d.ts +0 -2
- package/dist/admin/components/search-analytics.d.ts.map +0 -1
- package/dist/admin/endpoints/analytics.d.ts +0 -15
- package/dist/admin/endpoints/analytics.d.ts.map +0 -1
- package/dist/admin/endpoints/bulk-index.d.ts +0 -20
- package/dist/admin/endpoints/bulk-index.d.ts.map +0 -1
- package/dist/admin/endpoints/click-analytics.d.ts +0 -7
- package/dist/admin/endpoints/click-analytics.d.ts.map +0 -1
- package/dist/admin/endpoints/get-settings.d.ts +0 -17
- package/dist/admin/endpoints/get-settings.d.ts.map +0 -1
- package/dist/admin/endpoints/index-manage.d.ts +0 -26
- package/dist/admin/endpoints/index-manage.d.ts.map +0 -1
- package/dist/admin/endpoints/index.d.ts +0 -125
- package/dist/admin/endpoints/index.d.ts.map +0 -1
- package/dist/admin/endpoints/popular.d.ts +0 -10
- package/dist/admin/endpoints/popular.d.ts.map +0 -1
- package/dist/admin/endpoints/synonyms.d.ts +0 -30
- package/dist/admin/endpoints/synonyms.d.ts.map +0 -1
- package/dist/admin/endpoints/zero-results.d.ts +0 -10
- package/dist/admin/endpoints/zero-results.d.ts.map +0 -1
- package/dist/embedding-provider.d.ts +0 -28
- package/dist/embedding-provider.d.ts.map +0 -1
- package/dist/index.d.ts +0 -23
- package/dist/index.d.ts.map +0 -1
- package/dist/meilisearch-provider.d.ts +0 -104
- package/dist/meilisearch-provider.d.ts.map +0 -1
- package/dist/schema.d.ts +0 -133
- package/dist/schema.d.ts.map +0 -1
- package/dist/service-impl.d.ts +0 -6
- package/dist/service-impl.d.ts.map +0 -1
- package/dist/service.d.ts +0 -127
- package/dist/service.d.ts.map +0 -1
- package/dist/store/components/_hooks.d.ts +0 -6
- package/dist/store/components/_hooks.d.ts.map +0 -1
- package/dist/store/components/index.d.ts +0 -10
- package/dist/store/components/index.d.ts.map +0 -1
- package/dist/store/components/search-bar.d.ts +0 -7
- package/dist/store/components/search-bar.d.ts.map +0 -1
- package/dist/store/components/search-page.d.ts +0 -4
- package/dist/store/components/search-page.d.ts.map +0 -1
- package/dist/store/components/search-results.d.ts +0 -9
- package/dist/store/components/search-results.d.ts.map +0 -1
- package/dist/store/endpoints/click.d.ts +0 -14
- package/dist/store/endpoints/click.d.ts.map +0 -1
- package/dist/store/endpoints/index.d.ts +0 -85
- package/dist/store/endpoints/index.d.ts.map +0 -1
- package/dist/store/endpoints/recent.d.ts +0 -15
- package/dist/store/endpoints/recent.d.ts.map +0 -1
- package/dist/store/endpoints/search.d.ts +0 -36
- package/dist/store/endpoints/search.d.ts.map +0 -1
- package/dist/store/endpoints/store-search.d.ts +0 -16
- package/dist/store/endpoints/store-search.d.ts.map +0 -1
- package/dist/store/endpoints/suggest.d.ts +0 -11
- package/dist/store/endpoints/suggest.d.ts.map +0 -1
|
@@ -0,0 +1,770 @@
|
|
|
1
|
+
import { cosineSimilarity } from "./embedding-provider";
|
|
2
|
+
function normalize(text) {
|
|
3
|
+
return text.toLowerCase().trim().replace(/\s+/g, " ");
|
|
4
|
+
}
|
|
5
|
+
function tokenize(text) {
|
|
6
|
+
return normalize(text)
|
|
7
|
+
.split(/[\s\-_/,.]+/)
|
|
8
|
+
.filter((t) => t.length > 0);
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Levenshtein distance between two strings.
|
|
12
|
+
* Used for fuzzy matching and did-you-mean suggestions.
|
|
13
|
+
*/
|
|
14
|
+
function levenshtein(a, b) {
|
|
15
|
+
const m = a.length;
|
|
16
|
+
const n = b.length;
|
|
17
|
+
if (m === 0)
|
|
18
|
+
return n;
|
|
19
|
+
if (n === 0)
|
|
20
|
+
return m;
|
|
21
|
+
// Use single-row optimization for space efficiency
|
|
22
|
+
let prev = new Array(n + 1);
|
|
23
|
+
let curr = new Array(n + 1);
|
|
24
|
+
for (let j = 0; j <= n; j++) {
|
|
25
|
+
prev[j] = j;
|
|
26
|
+
}
|
|
27
|
+
for (let i = 1; i <= m; i++) {
|
|
28
|
+
curr[0] = i;
|
|
29
|
+
for (let j = 1; j <= n; j++) {
|
|
30
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
31
|
+
curr[j] = Math.min(curr[j - 1] + 1, // insertion
|
|
32
|
+
prev[j] + 1, // deletion
|
|
33
|
+
prev[j - 1] + cost);
|
|
34
|
+
}
|
|
35
|
+
[prev, curr] = [curr, prev];
|
|
36
|
+
}
|
|
37
|
+
return prev[n];
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Returns maximum edit distance allowed for a given word length.
|
|
41
|
+
* Shorter words get less tolerance to avoid noisy matches.
|
|
42
|
+
*/
|
|
43
|
+
function maxEditDistance(wordLength) {
|
|
44
|
+
if (wordLength <= 3)
|
|
45
|
+
return 0;
|
|
46
|
+
if (wordLength <= 5)
|
|
47
|
+
return 1;
|
|
48
|
+
return 2;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Check if token fuzzy-matches the target within edit distance tolerance.
|
|
52
|
+
*/
|
|
53
|
+
function fuzzyMatch(token, target) {
|
|
54
|
+
const maxDist = maxEditDistance(token.length);
|
|
55
|
+
if (maxDist === 0)
|
|
56
|
+
return token === target;
|
|
57
|
+
return levenshtein(token, target) <= maxDist;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Highlight matching segments in text by wrapping in <mark> tags.
|
|
61
|
+
*/
|
|
62
|
+
function highlightText(text, queryTokens, expandedTerms) {
|
|
63
|
+
if (!text || queryTokens.length === 0)
|
|
64
|
+
return text;
|
|
65
|
+
const allTerms = [...queryTokens, ...expandedTerms];
|
|
66
|
+
let result = text;
|
|
67
|
+
for (const term of allTerms) {
|
|
68
|
+
const escaped = term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
69
|
+
const regex = new RegExp(`(${escaped})`, "gi");
|
|
70
|
+
result = result.replace(regex, "<mark>$1</mark>");
|
|
71
|
+
}
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
function scoreMatch(item, queryTokens, expandedTerms, fuzzy) {
|
|
75
|
+
let score = 0;
|
|
76
|
+
const titleLower = normalize(item.title);
|
|
77
|
+
const bodyLower = item.body ? normalize(item.body) : "";
|
|
78
|
+
const tagLower = item.tags.map((t) => normalize(t));
|
|
79
|
+
const titleTokens = tokenize(item.title);
|
|
80
|
+
for (const token of queryTokens) {
|
|
81
|
+
const allTerms = [token, ...expandedTerms];
|
|
82
|
+
for (const term of allTerms) {
|
|
83
|
+
// Exact title match is highest value
|
|
84
|
+
if (titleLower === term) {
|
|
85
|
+
score += 100;
|
|
86
|
+
}
|
|
87
|
+
else if (titleLower.startsWith(term)) {
|
|
88
|
+
score += 50;
|
|
89
|
+
}
|
|
90
|
+
else if (titleLower.includes(term)) {
|
|
91
|
+
score += 25;
|
|
92
|
+
}
|
|
93
|
+
// Body match
|
|
94
|
+
if (bodyLower.includes(term)) {
|
|
95
|
+
score += 10;
|
|
96
|
+
}
|
|
97
|
+
// Tag match
|
|
98
|
+
for (const tag of tagLower) {
|
|
99
|
+
if (tag === term) {
|
|
100
|
+
score += 30;
|
|
101
|
+
}
|
|
102
|
+
else if (tag.includes(term)) {
|
|
103
|
+
score += 15;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Fuzzy matching on title tokens (lower weight than exact)
|
|
108
|
+
if (fuzzy) {
|
|
109
|
+
for (const titleToken of titleTokens) {
|
|
110
|
+
const titleTokenNorm = normalize(titleToken);
|
|
111
|
+
if (!titleLower.includes(token) && fuzzyMatch(token, titleTokenNorm)) {
|
|
112
|
+
score += 15;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
// Fuzzy on tags
|
|
116
|
+
for (const tag of tagLower) {
|
|
117
|
+
const tagTokens = tokenize(tag);
|
|
118
|
+
for (const tagToken of tagTokens) {
|
|
119
|
+
if (!tag.includes(token) && fuzzyMatch(token, tagToken)) {
|
|
120
|
+
score += 8;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
// Fuzzy on body tokens
|
|
125
|
+
if (bodyLower && !bodyLower.includes(token)) {
|
|
126
|
+
const bodyTokens = tokenize(bodyLower);
|
|
127
|
+
for (const bodyToken of bodyTokens) {
|
|
128
|
+
if (fuzzyMatch(token, bodyToken)) {
|
|
129
|
+
score += 5;
|
|
130
|
+
break; // only count once per token per body
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return score;
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Convert MeiliSearch facetDistribution to our SearchFacets shape.
|
|
140
|
+
*/
|
|
141
|
+
function meiliResultToFacets(distribution) {
|
|
142
|
+
if (!distribution)
|
|
143
|
+
return { entityTypes: [], tags: [] };
|
|
144
|
+
const entityTypes = Object.entries(distribution.entityType ?? {})
|
|
145
|
+
.map(([type, count]) => ({ type, count }))
|
|
146
|
+
.sort((a, b) => b.count - a.count);
|
|
147
|
+
const tags = Object.entries(distribution.tags ?? {})
|
|
148
|
+
.map(([tag, count]) => ({ tag, count }))
|
|
149
|
+
.sort((a, b) => b.count - a.count)
|
|
150
|
+
.slice(0, 20);
|
|
151
|
+
return { entityTypes, tags };
|
|
152
|
+
}
|
|
153
|
+
function computeFacets(items) {
|
|
154
|
+
const typeCounts = new Map();
|
|
155
|
+
const tagCounts = new Map();
|
|
156
|
+
for (const { item } of items) {
|
|
157
|
+
typeCounts.set(item.entityType, (typeCounts.get(item.entityType) ?? 0) + 1);
|
|
158
|
+
for (const tag of item.tags) {
|
|
159
|
+
const norm = normalize(tag);
|
|
160
|
+
tagCounts.set(norm, (tagCounts.get(norm) ?? 0) + 1);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
entityTypes: Array.from(typeCounts.entries())
|
|
165
|
+
.map(([type, count]) => ({ type, count }))
|
|
166
|
+
.sort((a, b) => b.count - a.count),
|
|
167
|
+
tags: Array.from(tagCounts.entries())
|
|
168
|
+
.map(([tag, count]) => ({ tag, count }))
|
|
169
|
+
.sort((a, b) => b.count - a.count)
|
|
170
|
+
.slice(0, 20),
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
function sortResults(results, sort) {
|
|
174
|
+
const sorted = [...results];
|
|
175
|
+
switch (sort) {
|
|
176
|
+
case "newest":
|
|
177
|
+
sorted.sort((a, b) => new Date(b.item.indexedAt).getTime() -
|
|
178
|
+
new Date(a.item.indexedAt).getTime());
|
|
179
|
+
break;
|
|
180
|
+
case "oldest":
|
|
181
|
+
sorted.sort((a, b) => new Date(a.item.indexedAt).getTime() -
|
|
182
|
+
new Date(b.item.indexedAt).getTime());
|
|
183
|
+
break;
|
|
184
|
+
case "title_asc":
|
|
185
|
+
sorted.sort((a, b) => a.item.title.localeCompare(b.item.title));
|
|
186
|
+
break;
|
|
187
|
+
case "title_desc":
|
|
188
|
+
sorted.sort((a, b) => b.item.title.localeCompare(a.item.title));
|
|
189
|
+
break;
|
|
190
|
+
default:
|
|
191
|
+
sorted.sort((a, b) => b.score - a.score);
|
|
192
|
+
break;
|
|
193
|
+
}
|
|
194
|
+
return sorted;
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Find the closest known term to the query for did-you-mean suggestions.
|
|
198
|
+
* Checks against indexed titles and popular search terms.
|
|
199
|
+
*/
|
|
200
|
+
function findDidYouMean(queryTokens, indexedTitles, popularTerms, hasResults) {
|
|
201
|
+
// Only suggest corrections when results are few or none
|
|
202
|
+
if (hasResults)
|
|
203
|
+
return undefined;
|
|
204
|
+
const candidates = new Set();
|
|
205
|
+
for (const title of indexedTitles) {
|
|
206
|
+
for (const token of tokenize(title)) {
|
|
207
|
+
candidates.add(token);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
for (const term of popularTerms) {
|
|
211
|
+
for (const token of tokenize(term)) {
|
|
212
|
+
candidates.add(token);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
const corrections = [];
|
|
216
|
+
for (const token of queryTokens) {
|
|
217
|
+
let bestMatch = token;
|
|
218
|
+
let bestDist = Number.POSITIVE_INFINITY;
|
|
219
|
+
for (const candidate of candidates) {
|
|
220
|
+
if (candidate === token) {
|
|
221
|
+
bestMatch = token;
|
|
222
|
+
bestDist = 0;
|
|
223
|
+
break;
|
|
224
|
+
}
|
|
225
|
+
const dist = levenshtein(token, candidate);
|
|
226
|
+
const maxDist = maxEditDistance(token.length);
|
|
227
|
+
if (maxDist > 0 && dist <= maxDist && dist < bestDist) {
|
|
228
|
+
bestDist = dist;
|
|
229
|
+
bestMatch = candidate;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
corrections.push(bestMatch);
|
|
233
|
+
}
|
|
234
|
+
const suggestion = corrections.join(" ");
|
|
235
|
+
const original = queryTokens.join(" ");
|
|
236
|
+
if (suggestion === original)
|
|
237
|
+
return undefined;
|
|
238
|
+
return suggestion;
|
|
239
|
+
}
|
|
240
|
+
export function createSearchController(data, embeddingProvider, meiliProvider) {
|
|
241
|
+
/**
|
|
242
|
+
* Generate and store an embedding for an indexed item.
|
|
243
|
+
* Combines title + body + tags into a single text for embedding.
|
|
244
|
+
* Failures are silent — semantic search degrades gracefully.
|
|
245
|
+
*/
|
|
246
|
+
async function embedItem(item) {
|
|
247
|
+
if (!embeddingProvider)
|
|
248
|
+
return item;
|
|
249
|
+
try {
|
|
250
|
+
const parts = [item.title];
|
|
251
|
+
if (item.body)
|
|
252
|
+
parts.push(item.body);
|
|
253
|
+
if (item.tags.length > 0)
|
|
254
|
+
parts.push(item.tags.join(", "));
|
|
255
|
+
const text = parts.join(". ");
|
|
256
|
+
const embedding = await embeddingProvider.generateEmbedding(text);
|
|
257
|
+
if (embedding) {
|
|
258
|
+
item.metadata = { ...item.metadata, __embedding: embedding };
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
catch {
|
|
262
|
+
// Embedding is best-effort — lexical search still works
|
|
263
|
+
}
|
|
264
|
+
return item;
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Convert a SearchIndexItem to a MeiliSearch document.
|
|
268
|
+
* Strips the __embedding metadata to avoid bloating the search index.
|
|
269
|
+
*/
|
|
270
|
+
function toMeiliDocument(item) {
|
|
271
|
+
return {
|
|
272
|
+
id: item.id,
|
|
273
|
+
entityType: item.entityType,
|
|
274
|
+
entityId: item.entityId,
|
|
275
|
+
title: item.title,
|
|
276
|
+
body: item.body,
|
|
277
|
+
tags: item.tags,
|
|
278
|
+
url: item.url,
|
|
279
|
+
image: item.image,
|
|
280
|
+
indexedAt: item.indexedAt.toISOString(),
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Sync a document to MeiliSearch. Fire-and-forget — failures are silent
|
|
285
|
+
* so local search still works as a fallback.
|
|
286
|
+
*/
|
|
287
|
+
function syncToMeili(items) {
|
|
288
|
+
if (!meiliProvider || items.length === 0)
|
|
289
|
+
return;
|
|
290
|
+
void meiliProvider.addDocuments(items.map(toMeiliDocument)).catch(() => { });
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Remove a document from MeiliSearch by ID. Fire-and-forget.
|
|
294
|
+
*/
|
|
295
|
+
function removeFromMeili(documentId) {
|
|
296
|
+
if (!meiliProvider)
|
|
297
|
+
return;
|
|
298
|
+
void meiliProvider.deleteDocument(documentId).catch(() => { });
|
|
299
|
+
}
|
|
300
|
+
return {
|
|
301
|
+
async indexItem(params) {
|
|
302
|
+
const existing = await data.findMany("searchIndex", {
|
|
303
|
+
where: {
|
|
304
|
+
entityType: params.entityType,
|
|
305
|
+
entityId: params.entityId,
|
|
306
|
+
},
|
|
307
|
+
take: 1,
|
|
308
|
+
});
|
|
309
|
+
const existingItems = existing;
|
|
310
|
+
const id = existingItems.length > 0 ? existingItems[0].id : crypto.randomUUID();
|
|
311
|
+
let item = {
|
|
312
|
+
id,
|
|
313
|
+
entityType: params.entityType,
|
|
314
|
+
entityId: params.entityId,
|
|
315
|
+
title: params.title,
|
|
316
|
+
body: params.body,
|
|
317
|
+
tags: params.tags ?? [],
|
|
318
|
+
url: params.url,
|
|
319
|
+
image: params.image,
|
|
320
|
+
metadata: params.metadata ?? {},
|
|
321
|
+
indexedAt: new Date(),
|
|
322
|
+
};
|
|
323
|
+
item = await embedItem(item);
|
|
324
|
+
await data.upsert("searchIndex", id, item);
|
|
325
|
+
syncToMeili([item]);
|
|
326
|
+
return item;
|
|
327
|
+
},
|
|
328
|
+
async bulkIndex(items) {
|
|
329
|
+
let indexed = 0;
|
|
330
|
+
let errors = 0;
|
|
331
|
+
// Batch generate embeddings for all items at once
|
|
332
|
+
if (embeddingProvider && items.length > 0) {
|
|
333
|
+
const texts = items.map((p) => {
|
|
334
|
+
const parts = [p.title];
|
|
335
|
+
if (p.body)
|
|
336
|
+
parts.push(p.body);
|
|
337
|
+
if (p.tags && p.tags.length > 0)
|
|
338
|
+
parts.push(p.tags.join(", "));
|
|
339
|
+
return parts.join(". ");
|
|
340
|
+
});
|
|
341
|
+
const embeddings = await embeddingProvider.generateEmbeddings(texts);
|
|
342
|
+
for (let i = 0; i < items.length; i++) {
|
|
343
|
+
if (embeddings[i]) {
|
|
344
|
+
items[i] = {
|
|
345
|
+
...items[i],
|
|
346
|
+
metadata: {
|
|
347
|
+
...items[i].metadata,
|
|
348
|
+
__embedding: embeddings[i],
|
|
349
|
+
},
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
const indexedItems = [];
|
|
355
|
+
for (const params of items) {
|
|
356
|
+
try {
|
|
357
|
+
const existing = await data.findMany("searchIndex", {
|
|
358
|
+
where: {
|
|
359
|
+
entityType: params.entityType,
|
|
360
|
+
entityId: params.entityId,
|
|
361
|
+
},
|
|
362
|
+
take: 1,
|
|
363
|
+
});
|
|
364
|
+
const existingItems = existing;
|
|
365
|
+
const id = existingItems.length > 0
|
|
366
|
+
? existingItems[0].id
|
|
367
|
+
: crypto.randomUUID();
|
|
368
|
+
const item = {
|
|
369
|
+
id,
|
|
370
|
+
entityType: params.entityType,
|
|
371
|
+
entityId: params.entityId,
|
|
372
|
+
title: params.title,
|
|
373
|
+
body: params.body,
|
|
374
|
+
tags: params.tags ?? [],
|
|
375
|
+
url: params.url,
|
|
376
|
+
image: params.image,
|
|
377
|
+
metadata: params.metadata ?? {},
|
|
378
|
+
indexedAt: new Date(),
|
|
379
|
+
};
|
|
380
|
+
await data.upsert("searchIndex", id, item);
|
|
381
|
+
indexedItems.push(item);
|
|
382
|
+
indexed++;
|
|
383
|
+
}
|
|
384
|
+
catch {
|
|
385
|
+
errors++;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
syncToMeili(indexedItems);
|
|
389
|
+
return { indexed, errors };
|
|
390
|
+
},
|
|
391
|
+
async removeFromIndex(entityType, entityId) {
|
|
392
|
+
const items = await data.findMany("searchIndex", {
|
|
393
|
+
where: { entityType, entityId },
|
|
394
|
+
});
|
|
395
|
+
const found = items;
|
|
396
|
+
if (found.length === 0)
|
|
397
|
+
return false;
|
|
398
|
+
for (const item of found) {
|
|
399
|
+
await data.delete("searchIndex", item.id);
|
|
400
|
+
removeFromMeili(item.id);
|
|
401
|
+
}
|
|
402
|
+
return true;
|
|
403
|
+
},
|
|
404
|
+
async search(query, options) {
|
|
405
|
+
const limit = options?.limit ?? 20;
|
|
406
|
+
const skip = options?.skip ?? 0;
|
|
407
|
+
const sort = options?.sort ?? "relevance";
|
|
408
|
+
const queryTokens = tokenize(query);
|
|
409
|
+
if (queryTokens.length === 0) {
|
|
410
|
+
return {
|
|
411
|
+
results: [],
|
|
412
|
+
total: 0,
|
|
413
|
+
facets: { entityTypes: [], tags: [] },
|
|
414
|
+
};
|
|
415
|
+
}
|
|
416
|
+
// ── MeiliSearch path: delegate search to dedicated engine ──
|
|
417
|
+
if (meiliProvider) {
|
|
418
|
+
try {
|
|
419
|
+
const meiliSort = sort === "newest"
|
|
420
|
+
? ["indexedAt:desc"]
|
|
421
|
+
: sort === "oldest"
|
|
422
|
+
? ["indexedAt:asc"]
|
|
423
|
+
: sort === "title_asc"
|
|
424
|
+
? ["title:asc"]
|
|
425
|
+
: sort === "title_desc"
|
|
426
|
+
? ["title:desc"]
|
|
427
|
+
: undefined;
|
|
428
|
+
const filters = [];
|
|
429
|
+
if (options?.entityType) {
|
|
430
|
+
filters.push(`entityType = "${options.entityType}"`);
|
|
431
|
+
}
|
|
432
|
+
if (options?.tags && options.tags.length > 0) {
|
|
433
|
+
const tagFilters = options.tags
|
|
434
|
+
.map((t) => `tags = "${t}"`)
|
|
435
|
+
.join(" OR ");
|
|
436
|
+
filters.push(`(${tagFilters})`);
|
|
437
|
+
}
|
|
438
|
+
const meiliResult = await meiliProvider.search(query, {
|
|
439
|
+
limit,
|
|
440
|
+
offset: skip,
|
|
441
|
+
filter: filters.length > 0 ? filters.join(" AND ") : undefined,
|
|
442
|
+
sort: meiliSort,
|
|
443
|
+
facets: ["entityType", "tags"],
|
|
444
|
+
attributesToHighlight: ["title", "body"],
|
|
445
|
+
highlightPreTag: "<mark>",
|
|
446
|
+
highlightPostTag: "</mark>",
|
|
447
|
+
showRankingScore: true,
|
|
448
|
+
});
|
|
449
|
+
const results = meiliResult.hits.map((hit) => ({
|
|
450
|
+
item: {
|
|
451
|
+
id: hit.id,
|
|
452
|
+
entityType: hit.entityType,
|
|
453
|
+
entityId: hit.entityId,
|
|
454
|
+
title: hit.title,
|
|
455
|
+
body: hit.body,
|
|
456
|
+
tags: hit.tags ?? [],
|
|
457
|
+
url: hit.url,
|
|
458
|
+
image: hit.image,
|
|
459
|
+
metadata: {},
|
|
460
|
+
indexedAt: new Date(hit.indexedAt),
|
|
461
|
+
},
|
|
462
|
+
score: (hit._rankingScore ?? 0.5) * 100,
|
|
463
|
+
highlights: {
|
|
464
|
+
title: hit._formatted?.title,
|
|
465
|
+
body: hit._formatted?.body,
|
|
466
|
+
},
|
|
467
|
+
}));
|
|
468
|
+
const facets = meiliResultToFacets(meiliResult.facetDistribution);
|
|
469
|
+
return {
|
|
470
|
+
results,
|
|
471
|
+
total: meiliResult.estimatedTotalHits ?? results.length,
|
|
472
|
+
facets,
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
catch {
|
|
476
|
+
// MeiliSearch unavailable — fall through to local search
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
// ── Local search path: lexical + semantic scoring ──
|
|
480
|
+
const fuzzy = options?.fuzzy ?? true;
|
|
481
|
+
// Load synonyms for query expansion
|
|
482
|
+
const allSynonyms = (await data.findMany("searchSynonym", {}));
|
|
483
|
+
const expandedTerms = new Set();
|
|
484
|
+
for (const syn of allSynonyms) {
|
|
485
|
+
const synTermNorm = normalize(syn.term);
|
|
486
|
+
for (const token of queryTokens) {
|
|
487
|
+
if (token === synTermNorm) {
|
|
488
|
+
for (const s of syn.synonyms) {
|
|
489
|
+
expandedTerms.add(normalize(s));
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
for (const s of syn.synonyms) {
|
|
493
|
+
if (normalize(s) === token) {
|
|
494
|
+
expandedTerms.add(synTermNorm);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
const where = {};
|
|
500
|
+
if (options?.entityType) {
|
|
501
|
+
where.entityType = options.entityType;
|
|
502
|
+
}
|
|
503
|
+
const allItems = (await data.findMany("searchIndex", {
|
|
504
|
+
...(Object.keys(where).length > 0 ? { where } : {}),
|
|
505
|
+
}));
|
|
506
|
+
// Generate query embedding for semantic search (if provider is available)
|
|
507
|
+
let queryEmbedding = null;
|
|
508
|
+
if (embeddingProvider) {
|
|
509
|
+
try {
|
|
510
|
+
queryEmbedding = await embeddingProvider.generateEmbedding(query);
|
|
511
|
+
}
|
|
512
|
+
catch {
|
|
513
|
+
// Semantic search is best-effort
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
// Score and rank — hybrid lexical + semantic scoring
|
|
517
|
+
const scored = [];
|
|
518
|
+
for (const item of allItems) {
|
|
519
|
+
const lexicalScore = scoreMatch(item, queryTokens, expandedTerms, fuzzy);
|
|
520
|
+
// Compute semantic score if embeddings are available
|
|
521
|
+
let semanticScore = 0;
|
|
522
|
+
if (queryEmbedding) {
|
|
523
|
+
const itemEmbedding = item.metadata?.__embedding;
|
|
524
|
+
if (Array.isArray(itemEmbedding) && itemEmbedding.length > 0) {
|
|
525
|
+
semanticScore = cosineSimilarity(queryEmbedding, itemEmbedding);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
// Include items with lexical match OR strong semantic similarity
|
|
529
|
+
const semanticBoost = semanticScore * 80;
|
|
530
|
+
const score = lexicalScore + semanticBoost;
|
|
531
|
+
if (lexicalScore > 0 || semanticScore > 0.5) {
|
|
532
|
+
const highlights = {
|
|
533
|
+
title: highlightText(item.title, queryTokens, expandedTerms),
|
|
534
|
+
body: item.body
|
|
535
|
+
? highlightText(item.body, queryTokens, expandedTerms)
|
|
536
|
+
: undefined,
|
|
537
|
+
};
|
|
538
|
+
scored.push({ item, score, highlights });
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
// Filter by tags if specified
|
|
542
|
+
let filtered = scored;
|
|
543
|
+
if (options?.tags && options.tags.length > 0) {
|
|
544
|
+
const filterTags = new Set(options.tags.map((t) => normalize(t)));
|
|
545
|
+
filtered = scored.filter((r) => r.item.tags.some((tag) => filterTags.has(normalize(tag))));
|
|
546
|
+
}
|
|
547
|
+
// Compute facets before pagination
|
|
548
|
+
const facets = computeFacets(filtered);
|
|
549
|
+
// Sort
|
|
550
|
+
const sorted = sortResults(filtered, sort);
|
|
551
|
+
const total = sorted.length;
|
|
552
|
+
const results = sorted.slice(skip, skip + limit);
|
|
553
|
+
// Did-you-mean suggestion for zero/low results
|
|
554
|
+
let didYouMean;
|
|
555
|
+
if (total === 0) {
|
|
556
|
+
const allTitles = allItems.map((i) => i.title);
|
|
557
|
+
const allQueries = (await data.findMany("searchQuery", {}));
|
|
558
|
+
const popularTermsList = allQueries
|
|
559
|
+
.filter((q) => q.resultCount > 0)
|
|
560
|
+
.map((q) => q.term);
|
|
561
|
+
didYouMean = findDidYouMean(queryTokens, allTitles, popularTermsList, false);
|
|
562
|
+
}
|
|
563
|
+
return { results, total, facets, didYouMean };
|
|
564
|
+
},
|
|
565
|
+
async suggest(prefix, limit = 10) {
|
|
566
|
+
const prefixNorm = normalize(prefix);
|
|
567
|
+
if (prefixNorm.length === 0)
|
|
568
|
+
return [];
|
|
569
|
+
const allQueries = (await data.findMany("searchQuery", {}));
|
|
570
|
+
const termCounts = new Map();
|
|
571
|
+
for (const q of allQueries) {
|
|
572
|
+
if (q.resultCount > 0 && q.normalizedTerm.startsWith(prefixNorm)) {
|
|
573
|
+
termCounts.set(q.term, (termCounts.get(q.term) ?? 0) + 1);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
const allItems = (await data.findMany("searchIndex", {}));
|
|
577
|
+
const titleSuggestions = [];
|
|
578
|
+
for (const item of allItems) {
|
|
579
|
+
if (normalize(item.title).includes(prefixNorm)) {
|
|
580
|
+
titleSuggestions.push(item.title);
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
const popularTerms = Array.from(termCounts.entries())
|
|
584
|
+
.sort((a, b) => b[1] - a[1])
|
|
585
|
+
.map(([term]) => term);
|
|
586
|
+
const seen = new Set();
|
|
587
|
+
const suggestions = [];
|
|
588
|
+
for (const term of popularTerms) {
|
|
589
|
+
const norm = normalize(term);
|
|
590
|
+
if (!seen.has(norm)) {
|
|
591
|
+
seen.add(norm);
|
|
592
|
+
suggestions.push(term);
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
for (const title of titleSuggestions) {
|
|
596
|
+
const norm = normalize(title);
|
|
597
|
+
if (!seen.has(norm)) {
|
|
598
|
+
seen.add(norm);
|
|
599
|
+
suggestions.push(title);
|
|
600
|
+
}
|
|
601
|
+
}
|
|
602
|
+
return suggestions.slice(0, limit);
|
|
603
|
+
},
|
|
604
|
+
async recordQuery(term, resultCount, sessionId) {
|
|
605
|
+
const id = crypto.randomUUID();
|
|
606
|
+
const query = {
|
|
607
|
+
id,
|
|
608
|
+
term,
|
|
609
|
+
normalizedTerm: normalize(term),
|
|
610
|
+
resultCount,
|
|
611
|
+
sessionId,
|
|
612
|
+
searchedAt: new Date(),
|
|
613
|
+
};
|
|
614
|
+
await data.upsert("searchQuery", id, query);
|
|
615
|
+
return query;
|
|
616
|
+
},
|
|
617
|
+
async recordClick(params) {
|
|
618
|
+
const id = crypto.randomUUID();
|
|
619
|
+
const click = {
|
|
620
|
+
id,
|
|
621
|
+
queryId: params.queryId,
|
|
622
|
+
term: params.term,
|
|
623
|
+
entityType: params.entityType,
|
|
624
|
+
entityId: params.entityId,
|
|
625
|
+
position: params.position,
|
|
626
|
+
clickedAt: new Date(),
|
|
627
|
+
};
|
|
628
|
+
await data.upsert("searchClick", id, click);
|
|
629
|
+
return click;
|
|
630
|
+
},
|
|
631
|
+
async getRecentQueries(sessionId, limit = 10) {
|
|
632
|
+
const all = (await data.findMany("searchQuery", {
|
|
633
|
+
where: { sessionId },
|
|
634
|
+
}));
|
|
635
|
+
all.sort((a, b) => new Date(b.searchedAt).getTime() - new Date(a.searchedAt).getTime());
|
|
636
|
+
const seen = new Set();
|
|
637
|
+
const results = [];
|
|
638
|
+
for (const q of all) {
|
|
639
|
+
if (!seen.has(q.normalizedTerm)) {
|
|
640
|
+
seen.add(q.normalizedTerm);
|
|
641
|
+
results.push(q);
|
|
642
|
+
}
|
|
643
|
+
if (results.length >= limit)
|
|
644
|
+
break;
|
|
645
|
+
}
|
|
646
|
+
return results;
|
|
647
|
+
},
|
|
648
|
+
async getPopularTerms(limit = 20) {
|
|
649
|
+
const all = (await data.findMany("searchQuery", {}));
|
|
650
|
+
const termStats = new Map();
|
|
651
|
+
for (const q of all) {
|
|
652
|
+
const existing = termStats.get(q.normalizedTerm);
|
|
653
|
+
if (existing) {
|
|
654
|
+
existing.count += 1;
|
|
655
|
+
existing.totalResults += q.resultCount;
|
|
656
|
+
}
|
|
657
|
+
else {
|
|
658
|
+
termStats.set(q.normalizedTerm, {
|
|
659
|
+
term: q.term,
|
|
660
|
+
count: 1,
|
|
661
|
+
totalResults: q.resultCount,
|
|
662
|
+
});
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return Array.from(termStats.values())
|
|
666
|
+
.map((s) => ({
|
|
667
|
+
term: s.term,
|
|
668
|
+
count: s.count,
|
|
669
|
+
avgResultCount: Math.round(s.totalResults / s.count),
|
|
670
|
+
}))
|
|
671
|
+
.sort((a, b) => b.count - a.count)
|
|
672
|
+
.slice(0, limit);
|
|
673
|
+
},
|
|
674
|
+
async getZeroResultQueries(limit = 20) {
|
|
675
|
+
const all = (await data.findMany("searchQuery", {}));
|
|
676
|
+
const termStats = new Map();
|
|
677
|
+
for (const q of all) {
|
|
678
|
+
if (q.resultCount === 0) {
|
|
679
|
+
const existing = termStats.get(q.normalizedTerm);
|
|
680
|
+
if (existing) {
|
|
681
|
+
existing.count += 1;
|
|
682
|
+
}
|
|
683
|
+
else {
|
|
684
|
+
termStats.set(q.normalizedTerm, {
|
|
685
|
+
term: q.term,
|
|
686
|
+
count: 1,
|
|
687
|
+
});
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
}
|
|
691
|
+
return Array.from(termStats.values())
|
|
692
|
+
.map((s) => ({
|
|
693
|
+
term: s.term,
|
|
694
|
+
count: s.count,
|
|
695
|
+
avgResultCount: 0,
|
|
696
|
+
}))
|
|
697
|
+
.sort((a, b) => b.count - a.count)
|
|
698
|
+
.slice(0, limit);
|
|
699
|
+
},
|
|
700
|
+
async getAnalytics() {
|
|
701
|
+
const allQueries = (await data.findMany("searchQuery", {}));
|
|
702
|
+
const allClicks = (await data.findMany("searchClick", {}));
|
|
703
|
+
if (allQueries.length === 0) {
|
|
704
|
+
return {
|
|
705
|
+
totalQueries: 0,
|
|
706
|
+
uniqueTerms: 0,
|
|
707
|
+
avgResultCount: 0,
|
|
708
|
+
zeroResultCount: 0,
|
|
709
|
+
zeroResultRate: 0,
|
|
710
|
+
clickThroughRate: 0,
|
|
711
|
+
avgClickPosition: 0,
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
const uniqueTerms = new Set(allQueries.map((q) => q.normalizedTerm));
|
|
715
|
+
const totalResults = allQueries.reduce((sum, q) => sum + q.resultCount, 0);
|
|
716
|
+
const zeroResultCount = allQueries.filter((q) => q.resultCount === 0).length;
|
|
717
|
+
// CTR: queries that led to at least one click
|
|
718
|
+
const clickedQueryIds = new Set(allClicks.map((c) => c.queryId));
|
|
719
|
+
const queriesWithResults = allQueries.filter((q) => q.resultCount > 0).length;
|
|
720
|
+
const clickThroughRate = queriesWithResults > 0
|
|
721
|
+
? Math.round((clickedQueryIds.size / queriesWithResults) * 100)
|
|
722
|
+
: 0;
|
|
723
|
+
const avgClickPosition = allClicks.length > 0
|
|
724
|
+
? Math.round((allClicks.reduce((sum, c) => sum + c.position, 0) /
|
|
725
|
+
allClicks.length) *
|
|
726
|
+
10) / 10
|
|
727
|
+
: 0;
|
|
728
|
+
return {
|
|
729
|
+
totalQueries: allQueries.length,
|
|
730
|
+
uniqueTerms: uniqueTerms.size,
|
|
731
|
+
avgResultCount: Math.round(totalResults / allQueries.length),
|
|
732
|
+
zeroResultCount,
|
|
733
|
+
zeroResultRate: Math.round((zeroResultCount / allQueries.length) * 100),
|
|
734
|
+
clickThroughRate,
|
|
735
|
+
avgClickPosition,
|
|
736
|
+
};
|
|
737
|
+
},
|
|
738
|
+
async addSynonym(term, synonyms) {
|
|
739
|
+
const existing = await data.findMany("searchSynonym", {
|
|
740
|
+
where: { term: normalize(term) },
|
|
741
|
+
take: 1,
|
|
742
|
+
});
|
|
743
|
+
const existingItems = existing;
|
|
744
|
+
const id = existingItems.length > 0 ? existingItems[0].id : crypto.randomUUID();
|
|
745
|
+
const synonym = {
|
|
746
|
+
id,
|
|
747
|
+
term: normalize(term),
|
|
748
|
+
synonyms: synonyms.map((s) => s.trim()),
|
|
749
|
+
createdAt: existingItems.length > 0 ? existingItems[0].createdAt : new Date(),
|
|
750
|
+
};
|
|
751
|
+
await data.upsert("searchSynonym", id, synonym);
|
|
752
|
+
return synonym;
|
|
753
|
+
},
|
|
754
|
+
async removeSynonym(id) {
|
|
755
|
+
const existing = await data.get("searchSynonym", id);
|
|
756
|
+
if (!existing)
|
|
757
|
+
return false;
|
|
758
|
+
await data.delete("searchSynonym", id);
|
|
759
|
+
return true;
|
|
760
|
+
},
|
|
761
|
+
async listSynonyms() {
|
|
762
|
+
const all = (await data.findMany("searchSynonym", {}));
|
|
763
|
+
return all;
|
|
764
|
+
},
|
|
765
|
+
async getIndexCount() {
|
|
766
|
+
const all = await data.findMany("searchIndex", {});
|
|
767
|
+
return all.length;
|
|
768
|
+
},
|
|
769
|
+
};
|
|
770
|
+
}
|