nodebb-plugin-search-agent 0.0.4 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/controllers.js +24 -0
- package/lib/searchHandler.js +117 -26
- package/library.js +10 -0
- package/package.json +2 -2
- package/plugin.json +1 -1
- package/public/lib/acp-main.js +33 -0
- package/public/lib/main.js +59 -43
- package/services/syncService.js +13 -2
- package/services/vectorSearchService.js +15 -4
- package/services/vectorStore.js +18 -1
- package/templates/admin/plugins/search-agent.tpl +17 -0
package/lib/controllers.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const { searchTopics, getSettings } = require('./searchHandler');
|
|
4
|
+
const { clearAllEmbeddings } = require('../services/vectorStore');
|
|
5
|
+
const { invalidateIndex } = require('../services/vectorSearchService');
|
|
6
|
+
const { startSync } = require('../services/syncService');
|
|
4
7
|
|
|
5
8
|
const controllers = {};
|
|
6
9
|
|
|
@@ -66,4 +69,25 @@ controllers.getConfig = async function (req, res, helpers) {
|
|
|
66
69
|
}
|
|
67
70
|
};
|
|
68
71
|
|
|
72
|
+
/**
|
|
73
|
+
* POST /api/v3/plugins/search-agent/embeddings/resync
|
|
74
|
+
* Admin-only: wipes all stored embeddings and kicks off a full re-index.
|
|
75
|
+
*/
|
|
76
|
+
controllers.clearAndResync = async function (req, res, helpers) {
|
|
77
|
+
try {
|
|
78
|
+
const deleted = await clearAllEmbeddings();
|
|
79
|
+
invalidateIndex();
|
|
80
|
+
startSync();
|
|
81
|
+
require.main.require('winston').info(
|
|
82
|
+
`[search-agent] clearAndResync: deleted ${deleted} embedding(s); re-sync started by uid ${req.uid}`
|
|
83
|
+
);
|
|
84
|
+
helpers.formatApiResponse(200, res, {
|
|
85
|
+
message: `Cleared ${deleted} embedding(s). Full re-index is running in the background.`,
|
|
86
|
+
});
|
|
87
|
+
} catch (err) {
|
|
88
|
+
require.main.require('winston').error(`[search-agent] clearAndResync error: ${err.message}`);
|
|
89
|
+
helpers.formatApiResponse(500, res, new Error('Failed to clear or re-sync embeddings.'));
|
|
90
|
+
}
|
|
91
|
+
};
|
|
92
|
+
|
|
69
93
|
module.exports = controllers;
|
package/lib/searchHandler.js
CHANGED
|
@@ -122,23 +122,56 @@ function callOpenAI(apiKey, model, messages) {
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
/**
|
|
125
|
-
*
|
|
126
|
-
*
|
|
125
|
+
* HyDE: generate a hypothetical forum post that would answer the query.
|
|
126
|
+
* Embedding this richer text instead of the raw query dramatically improves
|
|
127
|
+
* cosine similarity against actual post embeddings.
|
|
128
|
+
* Falls back to the original queryText on any error.
|
|
129
|
+
*
|
|
130
|
+
* @param {string} queryText
|
|
131
|
+
* @param {string} apiKey
|
|
132
|
+
* @param {string} model
|
|
133
|
+
* @returns {Promise<string>}
|
|
134
|
+
*/
|
|
135
|
+
async function expandQueryWithHyDE(queryText, apiKey, model) {
|
|
136
|
+
const response = await callOpenAI(apiKey, model, [
|
|
137
|
+
{
|
|
138
|
+
role: 'system',
|
|
139
|
+
content:
|
|
140
|
+
'אתה חבר בפורום. בהינתן שאלת חיפוש, כתוב פוסט תגובה קצר וריאליסטי בפורום שעונה ישירות על השאלה. ' +
|
|
141
|
+
'כתוב רק את תוכן הפוסט — ללא ברכות, הערות מטא, או שורת נושא.',
|
|
142
|
+
},
|
|
143
|
+
{ role: 'user', content: queryText },
|
|
144
|
+
]);
|
|
145
|
+
return (response.choices[0].message.content || '').trim() || queryText;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Send candidates to OpenAI for independent per-topic relevance scoring.
|
|
150
|
+
* Each topic is rated 0-10 separately; topics scoring < 7 are excluded.
|
|
151
|
+
* This is more reliable than asking GPT to rank a list, because each topic
|
|
152
|
+
* is evaluated on its own merits rather than relative to the others.
|
|
153
|
+
* @param {object} [snippetByTid] - Map of tid → main post content snippet
|
|
127
154
|
*/
|
|
128
|
-
async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults) {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
.
|
|
155
|
+
async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults, snippetByTid = {}) {
|
|
156
|
+
console.log('Re-ranking with AI:', { queryText, candidates: candidates.map(c => ({ tid: c.tid, title: (topicMap[String(c.tid)] || {}).title })) });
|
|
157
|
+
const candidateList = candidates
|
|
158
|
+
.map((c) => {
|
|
159
|
+
const title = (topicMap[String(c.tid)] || {}).title || '';
|
|
160
|
+
const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
161
|
+
const snippet = raw.length > 0 ? `\n תוכן: "${raw.slice(0, 500)}"` : '';
|
|
162
|
+
return `[tid:${c.tid}] ${title}${snippet}`;
|
|
163
|
+
})
|
|
164
|
+
.join('\n\n');
|
|
132
165
|
|
|
133
166
|
const systemPrompt =
|
|
134
|
-
'
|
|
135
|
-
'
|
|
136
|
-
'
|
|
137
|
-
'
|
|
138
|
-
'
|
|
167
|
+
'אתה מסנן חיפוש פורום מחמיר. ' +
|
|
168
|
+
'לכל נושא ברשימה, דרג את הרלוונטיות שלו לשאלת המשתמש בסקלה 0-10: ' +
|
|
169
|
+
'10 = עונה ישירות ובאופן מלא. 7-9 = עונה על חלק משמעותי. 0-6 = לא רלוונטי. ' +
|
|
170
|
+
'החזר אך ורק JSON תקני במבנה: {"tid": ציון, ...} — לדוגמה: {"42": 9, "15": 3}. ' +
|
|
171
|
+
'אין להוסיף הסברים, טקסט נוסף, או עיצוב מחוץ ל-JSON.';
|
|
139
172
|
|
|
140
173
|
const userMessage =
|
|
141
|
-
|
|
174
|
+
`שאלת המשתמש: "${queryText}"\n\nנושאים:\n${candidateList}`;
|
|
142
175
|
|
|
143
176
|
const response = await callOpenAI(apiKey, model, [
|
|
144
177
|
{ role: 'system', content: systemPrompt },
|
|
@@ -147,19 +180,21 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
|
|
|
147
180
|
|
|
148
181
|
const content = (response.choices[0].message.content || '').trim();
|
|
149
182
|
|
|
150
|
-
//
|
|
151
|
-
const match = content.match(/\[
|
|
183
|
+
// Extract the JSON object from the response
|
|
184
|
+
const match = content.match(/\{[^}]*\}/);
|
|
152
185
|
if (!match) {
|
|
153
|
-
throw new Error(`Unexpected AI response
|
|
186
|
+
throw new Error(`Unexpected AI scoring response: ${content.slice(0, 100)}`);
|
|
154
187
|
}
|
|
155
188
|
|
|
156
|
-
const
|
|
157
|
-
const candidateByTid = Object.fromEntries(candidates.map(c => [c.tid, c]));
|
|
189
|
+
const scores = JSON.parse(match[0]);
|
|
190
|
+
const candidateByTid = Object.fromEntries(candidates.map(c => [String(c.tid), c]));
|
|
158
191
|
|
|
159
|
-
return
|
|
160
|
-
.
|
|
161
|
-
.
|
|
162
|
-
.slice(0, maxResults)
|
|
192
|
+
return Object.entries(scores)
|
|
193
|
+
.filter(([, score]) => Number(score) >= 7)
|
|
194
|
+
.sort(([, a], [, b]) => Number(b) - Number(a))
|
|
195
|
+
.slice(0, maxResults)
|
|
196
|
+
.map(([tid]) => candidateByTid[tid])
|
|
197
|
+
.filter(Boolean);
|
|
163
198
|
}
|
|
164
199
|
|
|
165
200
|
// ─── Public API ───────────────────────────────────────────────────────────────
|
|
@@ -178,24 +213,80 @@ async function searchTopics(queryText) {
|
|
|
178
213
|
// ── Semantic search (primary) ────────────────────────────────────────────
|
|
179
214
|
try {
|
|
180
215
|
const { search: vectorSearch } = require('../services/vectorSearchService');
|
|
181
|
-
const
|
|
216
|
+
const useAI = settings.aiEnabled && settings.openaiApiKey;
|
|
217
|
+
|
|
218
|
+
// HyDE: replace the short raw query with a hypothetical answer so the
|
|
219
|
+
// embedding matches post content more closely.
|
|
220
|
+
let embeddingQuery = queryText;
|
|
221
|
+
if (useAI) {
|
|
222
|
+
try {
|
|
223
|
+
embeddingQuery = await expandQueryWithHyDE(
|
|
224
|
+
queryText, settings.openaiApiKey, settings.openaiModel
|
|
225
|
+
);
|
|
226
|
+
winston.verbose(`[search-agent] HyDE expanded query (${embeddingQuery.length} chars)`);
|
|
227
|
+
} catch (hydeErr) {
|
|
228
|
+
winston.warn(`[search-agent] HyDE expansion failed, using raw query: ${hydeErr.message}`);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
// Request more candidates when AI will re-rank them.
|
|
233
|
+
const vectorLimit = useAI ? settings.aiCandidates : settings.maxResults;
|
|
234
|
+
const vectorResults = await vectorSearch(embeddingQuery, vectorLimit);
|
|
182
235
|
|
|
183
236
|
if (vectorResults.length > 0) {
|
|
184
237
|
const Topics = require.main.require('./src/topics');
|
|
238
|
+
const Posts = require.main.require('./src/posts');
|
|
185
239
|
const tids = [...new Set(vectorResults.map(r => r.topic_id))];
|
|
186
|
-
const topics = await Topics.getTopicsFields(tids, ['tid', 'title', 'slug', 'deleted']);
|
|
240
|
+
const topics = await Topics.getTopicsFields(tids, ['tid', 'title', 'slug', 'deleted', 'mainPid']);
|
|
187
241
|
const topicByTid = Object.fromEntries(
|
|
188
242
|
topics.filter(t => t && t.tid && !t.deleted).map(t => [String(t.tid), t])
|
|
189
243
|
);
|
|
190
244
|
|
|
191
|
-
|
|
245
|
+
// Build snippet map: prefer the main post body (which describes what the topic is about).
|
|
246
|
+
// Fall back to the best vector-matched post content if no main post is available.
|
|
247
|
+
const fallbackSnippetByTid = {};
|
|
248
|
+
for (const r of vectorResults) {
|
|
249
|
+
const key = String(r.topic_id);
|
|
250
|
+
if (!fallbackSnippetByTid[key]) fallbackSnippetByTid[key] = r.content;
|
|
251
|
+
}
|
|
252
|
+
const snippetByTid = { ...fallbackSnippetByTid };
|
|
253
|
+
const topicsWithMainPid = topics.filter(t => t && t.tid && !t.deleted && t.mainPid);
|
|
254
|
+
if (topicsWithMainPid.length > 0) {
|
|
255
|
+
const mainContents = await Posts.getPostsFields(
|
|
256
|
+
topicsWithMainPid.map(t => t.mainPid),
|
|
257
|
+
['pid', 'content']
|
|
258
|
+
);
|
|
259
|
+
for (let i = 0; i < topicsWithMainPid.length; i++) {
|
|
260
|
+
const content = mainContents[i] && mainContents[i].content;
|
|
261
|
+
if (content) snippetByTid[String(topicsWithMainPid[i].tid)] = content;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
let results = vectorResults
|
|
192
266
|
.filter(r => topicByTid[String(r.topic_id)])
|
|
193
267
|
.map(r => {
|
|
194
268
|
const t = topicByTid[String(r.topic_id)];
|
|
195
269
|
return { tid: t.tid, title: t.title, url: `/topic/${t.slug || t.tid}` };
|
|
196
270
|
})
|
|
197
|
-
.filter((r, i, arr) => arr.findIndex(x => x.tid === r.tid) === i) // dedupe by tid
|
|
198
|
-
|
|
271
|
+
.filter((r, i, arr) => arr.findIndex(x => x.tid === r.tid) === i); // dedupe by tid
|
|
272
|
+
|
|
273
|
+
console.log('Vector search results before AI re-rank:', results);
|
|
274
|
+
// AI re-ranking: pass both titles AND content snippets so GPT can judge relevance.
|
|
275
|
+
if (useAI && results.length > 0) {
|
|
276
|
+
try {
|
|
277
|
+
results = await reRankWithAI(
|
|
278
|
+
queryText, results, topicByTid,
|
|
279
|
+
settings.openaiApiKey, settings.openaiModel, settings.maxResults,
|
|
280
|
+
snippetByTid
|
|
281
|
+
);
|
|
282
|
+
winston.info(`[search-agent] AI re-ranked vector results to ${results.length} result(s).`);
|
|
283
|
+
} catch (rankErr) {
|
|
284
|
+
winston.warn(`[search-agent] AI re-rank of vector results failed, using raw order: ${rankErr.message}`);
|
|
285
|
+
results = results.slice(0, settings.maxResults);
|
|
286
|
+
}
|
|
287
|
+
} else {
|
|
288
|
+
results = results.slice(0, settings.maxResults);
|
|
289
|
+
}
|
|
199
290
|
|
|
200
291
|
if (results.length > 0) {
|
|
201
292
|
winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
|
package/library.js
CHANGED
|
@@ -91,6 +91,16 @@ plugin.addRoutes = async ({ router, middleware, helpers }) => {
|
|
|
91
91
|
}
|
|
92
92
|
);
|
|
93
93
|
winston.info('[search-agent] API route registered: POST /api/v3/plugins/search-agent/cache/invalidate');
|
|
94
|
+
|
|
95
|
+
// Clear all embeddings and re-index (admin only)
|
|
96
|
+
routeHelpers.setupApiRoute(
|
|
97
|
+
router,
|
|
98
|
+
'post',
|
|
99
|
+
'/search-agent/embeddings/resync',
|
|
100
|
+
[middleware.ensureLoggedIn, middleware.admin.checkPrivileges],
|
|
101
|
+
(req, res) => controllers.clearAndResync(req, res, helpers)
|
|
102
|
+
);
|
|
103
|
+
winston.info('[search-agent] API route registered: POST /api/v3/plugins/search-agent/embeddings/resync');
|
|
94
104
|
};
|
|
95
105
|
|
|
96
106
|
/**
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "nodebb-plugin-search-agent",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.6",
|
|
4
4
|
"description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
|
|
5
5
|
"main": "library.js",
|
|
6
6
|
"author": "Racheli Bayfus",
|
|
@@ -37,7 +37,7 @@
|
|
|
37
37
|
},
|
|
38
38
|
"license": "MIT",
|
|
39
39
|
"bugs": {
|
|
40
|
-
"url": "https://github.com/
|
|
40
|
+
"url": "https://github.com/racheliK9201/nodebb-plugin-search-agent/issues"
|
|
41
41
|
},
|
|
42
42
|
"readmeFilename": "README.md",
|
|
43
43
|
"nbbpm": {
|
package/plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "nodebb-plugin-search-agent",
|
|
3
3
|
"name": "Search Agent",
|
|
4
4
|
"description": "Floating chat assistant that finds relevant forum topics using TF-IDF similarity",
|
|
5
|
-
"url": "https://github.com/
|
|
5
|
+
"url": "https://github.com/racheliK9201/nodebb-plugin-search-agent",
|
|
6
6
|
"library": "./library.js",
|
|
7
7
|
"hooks": [
|
|
8
8
|
{ "hook": "static:app.load", "method": "init" },
|
package/public/lib/acp-main.js
CHANGED
|
@@ -33,6 +33,39 @@ $(document).ready(function () {
|
|
|
33
33
|
console.log('[search-agent] setupAdminPage: loading settings into form');
|
|
34
34
|
Settings.load('search-agent', $('form.search-agent-settings'));
|
|
35
35
|
});
|
|
36
|
+
|
|
37
|
+
// Re-index button
|
|
38
|
+
$('#btn-resync-embeddings').on('click', function () {
|
|
39
|
+
const $btn = $(this);
|
|
40
|
+
const $status = $('#resync-status');
|
|
41
|
+
|
|
42
|
+
if (!confirm('This will delete ALL stored embeddings and regenerate them. Continue?')) {
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
$btn.prop('disabled', true).text('Working…');
|
|
47
|
+
$status.text('');
|
|
48
|
+
|
|
49
|
+
$.ajax({
|
|
50
|
+
url: config.relative_path + '/api/v3/plugins/search-agent/embeddings/resync',
|
|
51
|
+
type: 'POST',
|
|
52
|
+
headers: { 'x-csrf-token': config.csrf_token },
|
|
53
|
+
success: function (data) {
|
|
54
|
+
const msg = (data && data.response && data.response.message) || 'Re-index started.';
|
|
55
|
+
$status.html('<span class="text-success">' + msg + '</span>');
|
|
56
|
+
app.alertSuccess(msg);
|
|
57
|
+
},
|
|
58
|
+
error: function (xhr) {
|
|
59
|
+
const msg = (xhr.responseJSON && xhr.responseJSON.status && xhr.responseJSON.status.message)
|
|
60
|
+
|| 'Re-index failed.';
|
|
61
|
+
$status.html('<span class="text-danger">' + msg + '</span>');
|
|
62
|
+
app.alertError(msg);
|
|
63
|
+
},
|
|
64
|
+
complete: function () {
|
|
65
|
+
$btn.prop('disabled', false).text('Clear & Re-index');
|
|
66
|
+
},
|
|
67
|
+
});
|
|
68
|
+
});
|
|
36
69
|
}
|
|
37
70
|
});
|
|
38
71
|
|
package/public/lib/main.js
CHANGED
|
@@ -119,63 +119,79 @@ function buildPanelHtml() {
|
|
|
119
119
|
|
|
120
120
|
// ─── Mount ────────────────────────────────────────────────────────────────────
|
|
121
121
|
|
|
122
|
+
let _mounting = false;
|
|
123
|
+
|
|
122
124
|
function mountSearchAgent({ api, translator }) {
|
|
123
125
|
console.log('[search-agent] mountSearchAgent: injecting FAB and chat panel into DOM');
|
|
124
|
-
// Avoid double-mounting if somehow called twice
|
|
125
|
-
if (document.getElementById('search-agent-fab')) {
|
|
126
|
-
console.log('[search-agent] mountSearchAgent: widget already mounted, skipping');
|
|
126
|
+
// Avoid double-mounting if somehow called twice or concurrently
|
|
127
|
+
if (document.getElementById('search-agent-fab') || _mounting) {
|
|
128
|
+
console.log('[search-agent] mountSearchAgent: widget already mounted or mount in progress, skipping');
|
|
127
129
|
return;
|
|
128
130
|
}
|
|
129
131
|
|
|
132
|
+
_mounting = true;
|
|
130
133
|
const wrapper = document.createElement('div');
|
|
131
134
|
wrapper.className = 'search-agent-wrapper';
|
|
132
135
|
|
|
133
|
-
// Translate [[search-agent:key]] markers before inserting into the DOM
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
136
|
+
// Translate [[search-agent:key]] markers before inserting into the DOM.
|
|
137
|
+
// Use Promise style so a translation failure (e.g. language-file 404)
|
|
138
|
+
// still falls back to raw HTML rather than silently never mounting.
|
|
139
|
+
const rawHtml = buildFabHtml() + buildPanelHtml();
|
|
140
|
+
const translatePromise = translator.translate(rawHtml);
|
|
141
|
+
(translatePromise && typeof translatePromise.then === 'function'
|
|
142
|
+
? translatePromise
|
|
143
|
+
: Promise.resolve(rawHtml)
|
|
144
|
+
)
|
|
145
|
+
.catch(function (err) {
|
|
146
|
+
console.warn('[search-agent] mountSearchAgent: translation failed, using raw HTML', err);
|
|
147
|
+
return rawHtml;
|
|
148
|
+
})
|
|
149
|
+
.then(function (translatedHtml) {
|
|
150
|
+
wrapper.innerHTML = translatedHtml;
|
|
151
|
+
_mounting = false;
|
|
152
|
+
document.body.appendChild(wrapper);
|
|
153
|
+
|
|
154
|
+
const fab = document.getElementById('search-agent-fab');
|
|
155
|
+
const panel = document.getElementById('search-agent-panel');
|
|
156
|
+
const closeBtn = document.getElementById('search-agent-close');
|
|
157
|
+
const input = document.getElementById('search-agent-input');
|
|
158
|
+
const sendBtn = document.getElementById('search-agent-send');
|
|
159
|
+
const messages = document.getElementById('search-agent-messages');
|
|
160
|
+
|
|
161
|
+
// ── Toggle panel ──────────────────────────────────────────────────────────
|
|
162
|
+
fab.addEventListener('click', () => {
|
|
163
|
+
const isOpen = !panel.hidden;
|
|
164
|
+
panel.hidden = isOpen;
|
|
165
|
+
fab.classList.toggle('search-agent-fab--active', !isOpen);
|
|
166
|
+
console.log(`[search-agent] FAB clicked — panel is now ${isOpen ? 'closed' : 'open'}`);
|
|
167
|
+
if (!isOpen) {
|
|
168
|
+
input.focus();
|
|
169
|
+
}
|
|
170
|
+
});
|
|
160
171
|
|
|
161
|
-
|
|
162
|
-
document.addEventListener('keydown', (e) => {
|
|
163
|
-
if (e.key === 'Escape' && !panel.hidden) {
|
|
172
|
+
closeBtn.addEventListener('click', () => {
|
|
164
173
|
panel.hidden = true;
|
|
165
174
|
fab.classList.remove('search-agent-fab--active');
|
|
166
|
-
}
|
|
167
|
-
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
// Close on Escape
|
|
178
|
+
document.addEventListener('keydown', (e) => {
|
|
179
|
+
if (e.key === 'Escape' && !panel.hidden) {
|
|
180
|
+
panel.hidden = true;
|
|
181
|
+
fab.classList.remove('search-agent-fab--active');
|
|
182
|
+
}
|
|
183
|
+
});
|
|
168
184
|
|
|
169
|
-
|
|
170
|
-
|
|
185
|
+
// ── Submit on Enter or button click ───────────────────────────────────────
|
|
186
|
+
sendBtn.addEventListener('click', () => submitQuery({ api, translator, input, messages, sendBtn }));
|
|
171
187
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
188
|
+
input.addEventListener('keydown', (e) => {
|
|
189
|
+
if (e.key === 'Enter' && !e.shiftKey) {
|
|
190
|
+
e.preventDefault();
|
|
191
|
+
submitQuery({ api, translator, input, messages, sendBtn });
|
|
192
|
+
}
|
|
193
|
+
});
|
|
177
194
|
});
|
|
178
|
-
});
|
|
179
195
|
}
|
|
180
196
|
|
|
181
197
|
// ─── Chat logic ───────────────────────────────────────────────────────────────
|
package/services/syncService.js
CHANGED
|
@@ -108,12 +108,23 @@ async function runSync() {
|
|
|
108
108
|
continue;
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
+
// Fetch topic titles so embeddings carry the topic context
|
|
112
|
+
const Topics = require.main.require('./src/topics');
|
|
113
|
+
const uniqueTids = [...new Set(posts.map(p => parseInt(p.tid, 10)))];
|
|
114
|
+
const topicFields = await Topics.getTopicsFields(uniqueTids, ['tid', 'title']);
|
|
115
|
+
const titleByTid = Object.fromEntries(
|
|
116
|
+
topicFields.filter(t => t && t.tid).map(t => [String(t.tid), t.title || ''])
|
|
117
|
+
);
|
|
118
|
+
|
|
111
119
|
// ------------------------------------------------------------------
|
|
112
|
-
// 3. Generate embeddings for this sub-batch
|
|
120
|
+
// 3. Generate embeddings for this sub-batch (title + content for context)
|
|
113
121
|
// ------------------------------------------------------------------
|
|
114
122
|
let vectors;
|
|
115
123
|
try {
|
|
116
|
-
vectors = await embedBatch(posts.map(p =>
|
|
124
|
+
vectors = await embedBatch(posts.map((p) => {
|
|
125
|
+
const title = titleByTid[String(p.tid)] || '';
|
|
126
|
+
return title ? `${title}\n\n${p.content}` : p.content;
|
|
127
|
+
}));
|
|
117
128
|
} catch (err) {
|
|
118
129
|
winston().error(`[search-agent] syncService: failed to generate embeddings (offset ${offset}): ${err.message}`);
|
|
119
130
|
totalErrors++;
|
|
@@ -8,7 +8,12 @@ function winston() {
|
|
|
8
8
|
return require.main.require('winston');
|
|
9
9
|
}
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
// Fetch this many candidates from Orama — cast a wide net so the AI has enough to choose from
|
|
12
|
+
const TOP_K = 20;
|
|
13
|
+
// Absolute minimum cosine similarity — only filters pure noise (near-zero similarity).
|
|
14
|
+
// Do NOT raise this: the relevant result often scores lower than irrelevant ones.
|
|
15
|
+
// The AI re-ranker (which reads content) is the precision gate, not this floor.
|
|
16
|
+
const MIN_SCORE = 0.10;
|
|
12
17
|
// Rebuild the Orama index after this interval (mirrors TF-IDF cache TTL)
|
|
13
18
|
const INDEX_TTL_MS = 5 * 60 * 1000;
|
|
14
19
|
|
|
@@ -84,7 +89,7 @@ function invalidateIndex() {
|
|
|
84
89
|
* @returns {Promise<Array<{ topic_id: number, post_id: number, content: string, score: number }>>}
|
|
85
90
|
* Top results sorted by cosine similarity descending.
|
|
86
91
|
*/
|
|
87
|
-
async function search(query) {
|
|
92
|
+
async function search(query, limit = TOP_K) {
|
|
88
93
|
if (typeof query !== 'string' || query.trim() === '') {
|
|
89
94
|
throw new Error('search() requires a non-empty query string');
|
|
90
95
|
}
|
|
@@ -99,13 +104,19 @@ async function search(query) {
|
|
|
99
104
|
const results = await oramaSearch(db, {
|
|
100
105
|
mode: 'vector',
|
|
101
106
|
vector: { value: queryEmbedding, property: 'embedding' },
|
|
102
|
-
limit
|
|
107
|
+
limit,
|
|
108
|
+
similarity: 0.1,
|
|
103
109
|
includeVectors: false,
|
|
104
110
|
});
|
|
105
111
|
|
|
106
112
|
winston().verbose(`[search-agent] vectorSearchService: Orama returned ${results.hits.length} hit(s)`);
|
|
107
113
|
|
|
108
|
-
|
|
114
|
+
const filtered = results.hits.filter(hit => hit.score >= MIN_SCORE);
|
|
115
|
+
winston().verbose(
|
|
116
|
+
`[search-agent] vectorSearchService: ${filtered.length}/${results.hits.length} hit(s) passed noise floor (MIN_SCORE=${MIN_SCORE})`
|
|
117
|
+
);
|
|
118
|
+
|
|
119
|
+
return filtered.map(hit => ({
|
|
109
120
|
topic_id: hit.document.topic_id,
|
|
110
121
|
post_id: hit.document.post_id,
|
|
111
122
|
content: hit.document.content,
|
package/services/vectorStore.js
CHANGED
|
@@ -134,4 +134,21 @@ async function getMissingEmbeddings(postIds) {
|
|
|
134
134
|
return missing;
|
|
135
135
|
}
|
|
136
136
|
|
|
137
|
-
|
|
137
|
+
/**
|
|
138
|
+
* Delete every stored embedding and reset the in-memory cache.
|
|
139
|
+
* Call this before a full re-index.
|
|
140
|
+
*
|
|
141
|
+
* @returns {Promise<number>} Number of documents deleted
|
|
142
|
+
*/
|
|
143
|
+
async function clearAllEmbeddings() {
|
|
144
|
+
winston().info('[search-agent] vectorStore: clearing ALL embeddings from database…');
|
|
145
|
+
await ensureIndexes();
|
|
146
|
+
const col = getCollection();
|
|
147
|
+
const result = await col.deleteMany({});
|
|
148
|
+
_cache = [];
|
|
149
|
+
_cachePromise = null;
|
|
150
|
+
winston().info(`[search-agent] vectorStore: deleted ${result.deletedCount} embedding(s)`);
|
|
151
|
+
return result.deletedCount;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
module.exports = { saveEmbedding, getAllEmbeddings, findByPostId, getMissingEmbeddings, clearAllEmbeddings };
|
|
@@ -181,6 +181,23 @@
|
|
|
181
181
|
</button>
|
|
182
182
|
</div>
|
|
183
183
|
</div>
|
|
184
|
+
|
|
185
|
+
<div class="card mt-3">
|
|
186
|
+
<div class="card-header">Re-index Embeddings</div>
|
|
187
|
+
<div class="card-body">
|
|
188
|
+
<p class="card-text small">
|
|
189
|
+
Delete all stored embeddings and re-generate them with the
|
|
190
|
+
current strategy (title + post content).
|
|
191
|
+
The re-index runs in the background — search continues working
|
|
192
|
+
via TF-IDF until it finishes.
|
|
193
|
+
</p>
|
|
194
|
+
<button id="btn-resync-embeddings" class="btn btn-danger btn-sm fw-semibold">
|
|
195
|
+
Clear & Re-index
|
|
196
|
+
</button>
|
|
197
|
+
<div id="resync-status" class="mt-2 small"></div>
|
|
198
|
+
</div>
|
|
199
|
+
</div>
|
|
200
|
+
</div>
|
|
184
201
|
</div>
|
|
185
202
|
</div>
|
|
186
203
|
</div>
|