nodebb-plugin-search-agent 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,9 @@
1
1
  'use strict';
2
2
 
3
3
  const { searchTopics, getSettings } = require('./searchHandler');
4
+ const { clearAllEmbeddings } = require('../services/vectorStore');
5
+ const { invalidateIndex } = require('../services/vectorSearchService');
6
+ const { startSync } = require('../services/syncService');
4
7
 
5
8
  const controllers = {};
6
9
 
@@ -66,4 +69,25 @@ controllers.getConfig = async function (req, res, helpers) {
66
69
  }
67
70
  };
68
71
 
72
+ /**
73
+ * POST /api/v3/plugins/search-agent/embeddings/resync
74
+ * Admin-only: wipes all stored embeddings and kicks off a full re-index.
75
+ */
76
+ controllers.clearAndResync = async function (req, res, helpers) {
77
+ try {
78
+ const deleted = await clearAllEmbeddings();
79
+ invalidateIndex();
80
+ startSync();
81
+ require.main.require('winston').info(
82
+ `[search-agent] clearAndResync: deleted ${deleted} embedding(s); re-sync started by uid ${req.uid}`
83
+ );
84
+ helpers.formatApiResponse(200, res, {
85
+ message: `Cleared ${deleted} embedding(s). Full re-index is running in the background.`,
86
+ });
87
+ } catch (err) {
88
+ require.main.require('winston').error(`[search-agent] clearAndResync error: ${err.message}`);
89
+ helpers.formatApiResponse(500, res, new Error('Failed to clear or re-sync embeddings.'));
90
+ }
91
+ };
92
+
69
93
  module.exports = controllers;
@@ -122,23 +122,56 @@ function callOpenAI(apiKey, model, messages) {
122
122
  }
123
123
 
124
124
  /**
125
- * Send TF-IDF candidates to OpenAI and ask it to pick the most relevant ones,
126
- * ordered by relevance. Falls back to original TF-IDF order on any error.
125
+ * HyDE: generate a hypothetical forum post that would answer the query.
126
+ * Embedding this richer text instead of the raw query dramatically improves
127
+ * cosine similarity against actual post embeddings.
128
+ * Falls back to the original queryText on any error.
129
+ *
130
+ * @param {string} queryText
131
+ * @param {string} apiKey
132
+ * @param {string} model
133
+ * @returns {Promise<string>}
134
+ */
135
+ async function expandQueryWithHyDE(queryText, apiKey, model) {
136
+ const response = await callOpenAI(apiKey, model, [
137
+ {
138
+ role: 'system',
139
+ content:
140
+ 'אתה חבר בפורום. בהינתן שאלת חיפוש, כתוב פוסט תגובה קצר וריאליסטי בפורום שעונה ישירות על השאלה. ' +
141
+ 'כתוב רק את תוכן הפוסט — ללא ברכות, הערות מטא, או שורת נושא.',
142
+ },
143
+ { role: 'user', content: queryText },
144
+ ]);
145
+ return (response.choices[0].message.content || '').trim() || queryText;
146
+ }
147
+
148
+ /**
149
+ * Send candidates to OpenAI for independent per-topic relevance scoring.
150
+ * Each topic is rated 0-10 separately; topics scoring < 7 are excluded.
151
+ * This is more reliable than asking GPT to rank a list, because each topic
152
+ * is evaluated on its own merits rather than relative to the others.
153
+ * @param {object} [snippetByTid] - Map of tid → main post content snippet
127
154
  */
128
- async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults) {
129
- const candidateList = candidates
130
- .map((c, i) => `${i + 1}. [tid:${c.tid}] ${(topicMap[String(c.tid)] || {}).title || ''}`)
131
- .join('\n');
155
+ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults, snippetByTid = {}) {
156
+ console.log('Re-ranking with AI:', { queryText, candidates: candidates.map(c => ({ tid: c.tid, title: (topicMap[String(c.tid)] || {}).title })) });
157
+ const candidateList = candidates
158
+ .map((c) => {
159
+ const title = (topicMap[String(c.tid)] || {}).title || '';
160
+ const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
161
+ const snippet = raw.length > 0 ? `\n תוכן: "${raw.slice(0, 500)}"` : '';
162
+ return `[tid:${c.tid}] ${title}${snippet}`;
163
+ })
164
+ .join('\n\n');
132
165
 
133
166
  const systemPrompt =
134
- 'You are a forum search assistant. ' +
135
- 'Given a user question and a numbered list of forum topic titles, ' +
136
- 'respond with ONLY a JSON array of the tid values (integers after "tid:") ' +
137
- 'for the topics that actually answer the question, ordered from most to least relevant. ' +
138
- 'Include only truly relevant topics. Example response: [12, 5, 33]';
167
+ 'אתה מסנן חיפוש פורום מחמיר. ' +
168
+ 'לכל נושא ברשימה, דרג את הרלוונטיות שלו לשאלת המשתמש בסקלה 0-10: ' +
169
+ '10 = עונה ישירות ובאופן מלא. 7-9 = עונה על חלק משמעותי. 0-6 = לא רלוונטי. ' +
170
+ 'החזר אך ורק JSON תקני במבנה: {"tid": ציון, ...} לדוגמה: {"42": 9, "15": 3}. ' +
171
+ 'אין להוסיף הסברים, טקסט נוסף, או עיצוב מחוץ ל-JSON.';
139
172
 
140
173
  const userMessage =
141
- `User question: "${queryText}"\n\nForum topics:\n${candidateList}`;
174
+ `שאלת המשתמש: "${queryText}"\n\nנושאים:\n${candidateList}`;
142
175
 
143
176
  const response = await callOpenAI(apiKey, model, [
144
177
  { role: 'system', content: systemPrompt },
@@ -147,19 +180,21 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
147
180
 
148
181
  const content = (response.choices[0].message.content || '').trim();
149
182
 
150
- // Robustly extract a JSON integer array from the response
151
- const match = content.match(/\[[\d,\s]+\]/);
183
+ // Extract the JSON object from the response
184
+ const match = content.match(/\{[^}]*\}/);
152
185
  if (!match) {
153
- throw new Error(`Unexpected AI response format: ${content.slice(0, 100)}`);
186
+ throw new Error(`Unexpected AI scoring response: ${content.slice(0, 100)}`);
154
187
  }
155
188
 
156
- const rankedTids = JSON.parse(match[0]);
157
- const candidateByTid = Object.fromEntries(candidates.map(c => [c.tid, c]));
189
+ const scores = JSON.parse(match[0]);
190
+ const candidateByTid = Object.fromEntries(candidates.map(c => [String(c.tid), c]));
158
191
 
159
- return rankedTids
160
- .map(tid => candidateByTid[tid])
161
- .filter(Boolean)
162
- .slice(0, maxResults);
192
+ return Object.entries(scores)
193
+ .filter(([, score]) => Number(score) >= 7)
194
+ .sort(([, a], [, b]) => Number(b) - Number(a))
195
+ .slice(0, maxResults)
196
+ .map(([tid]) => candidateByTid[tid])
197
+ .filter(Boolean);
163
198
  }
164
199
 
165
200
  // ─── Public API ───────────────────────────────────────────────────────────────
@@ -178,24 +213,80 @@ async function searchTopics(queryText) {
178
213
  // ── Semantic search (primary) ────────────────────────────────────────────
179
214
  try {
180
215
  const { search: vectorSearch } = require('../services/vectorSearchService');
181
- const vectorResults = await vectorSearch(queryText);
216
+ const useAI = settings.aiEnabled && settings.openaiApiKey;
217
+
218
+ // HyDE: replace the short raw query with a hypothetical answer so the
219
+ // embedding matches post content more closely.
220
+ let embeddingQuery = queryText;
221
+ if (useAI) {
222
+ try {
223
+ embeddingQuery = await expandQueryWithHyDE(
224
+ queryText, settings.openaiApiKey, settings.openaiModel
225
+ );
226
+ winston.verbose(`[search-agent] HyDE expanded query (${embeddingQuery.length} chars)`);
227
+ } catch (hydeErr) {
228
+ winston.warn(`[search-agent] HyDE expansion failed, using raw query: ${hydeErr.message}`);
229
+ }
230
+ }
231
+
232
+ // Request more candidates when AI will re-rank them.
233
+ const vectorLimit = useAI ? settings.aiCandidates : settings.maxResults;
234
+ const vectorResults = await vectorSearch(embeddingQuery, vectorLimit);
182
235
 
183
236
  if (vectorResults.length > 0) {
184
237
  const Topics = require.main.require('./src/topics');
238
+ const Posts = require.main.require('./src/posts');
185
239
  const tids = [...new Set(vectorResults.map(r => r.topic_id))];
186
- const topics = await Topics.getTopicsFields(tids, ['tid', 'title', 'slug', 'deleted']);
240
+ const topics = await Topics.getTopicsFields(tids, ['tid', 'title', 'slug', 'deleted', 'mainPid']);
187
241
  const topicByTid = Object.fromEntries(
188
242
  topics.filter(t => t && t.tid && !t.deleted).map(t => [String(t.tid), t])
189
243
  );
190
244
 
191
- const results = vectorResults
245
+ // Build snippet map: prefer the main post body (which describes what the topic is about).
246
+ // Fall back to the best vector-matched post content if no main post is available.
247
+ const fallbackSnippetByTid = {};
248
+ for (const r of vectorResults) {
249
+ const key = String(r.topic_id);
250
+ if (!fallbackSnippetByTid[key]) fallbackSnippetByTid[key] = r.content;
251
+ }
252
+ const snippetByTid = { ...fallbackSnippetByTid };
253
+ const topicsWithMainPid = topics.filter(t => t && t.tid && !t.deleted && t.mainPid);
254
+ if (topicsWithMainPid.length > 0) {
255
+ const mainContents = await Posts.getPostsFields(
256
+ topicsWithMainPid.map(t => t.mainPid),
257
+ ['pid', 'content']
258
+ );
259
+ for (let i = 0; i < topicsWithMainPid.length; i++) {
260
+ const content = mainContents[i] && mainContents[i].content;
261
+ if (content) snippetByTid[String(topicsWithMainPid[i].tid)] = content;
262
+ }
263
+ }
264
+
265
+ let results = vectorResults
192
266
  .filter(r => topicByTid[String(r.topic_id)])
193
267
  .map(r => {
194
268
  const t = topicByTid[String(r.topic_id)];
195
269
  return { tid: t.tid, title: t.title, url: `/topic/${t.slug || t.tid}` };
196
270
  })
197
- .filter((r, i, arr) => arr.findIndex(x => x.tid === r.tid) === i) // dedupe by tid
198
- .slice(0, settings.maxResults);
271
+ .filter((r, i, arr) => arr.findIndex(x => x.tid === r.tid) === i); // dedupe by tid
272
+
273
+ console.log('Vector search results before AI re-rank:', results);
274
+ // AI re-ranking: pass both titles AND content snippets so GPT can judge relevance.
275
+ if (useAI && results.length > 0) {
276
+ try {
277
+ results = await reRankWithAI(
278
+ queryText, results, topicByTid,
279
+ settings.openaiApiKey, settings.openaiModel, settings.maxResults,
280
+ snippetByTid
281
+ );
282
+ winston.info(`[search-agent] AI re-ranked vector results to ${results.length} result(s).`);
283
+ } catch (rankErr) {
284
+ winston.warn(`[search-agent] AI re-rank of vector results failed, using raw order: ${rankErr.message}`);
285
+ results = results.slice(0, settings.maxResults);
286
+ }
287
+ } else {
288
+ results = results.slice(0, settings.maxResults);
289
+ }
199
290
 
200
291
  if (results.length > 0) {
201
292
  winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
package/library.js CHANGED
@@ -91,6 +91,16 @@ plugin.addRoutes = async ({ router, middleware, helpers }) => {
91
91
  }
92
92
  );
93
93
  winston.info('[search-agent] API route registered: POST /api/v3/plugins/search-agent/cache/invalidate');
94
+
95
+ // Clear all embeddings and re-index (admin only)
96
+ routeHelpers.setupApiRoute(
97
+ router,
98
+ 'post',
99
+ '/search-agent/embeddings/resync',
100
+ [middleware.ensureLoggedIn, middleware.admin.checkPrivileges],
101
+ (req, res) => controllers.clearAndResync(req, res, helpers)
102
+ );
103
+ winston.info('[search-agent] API route registered: POST /api/v3/plugins/search-agent/embeddings/resync');
94
104
  };
95
105
 
96
106
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodebb-plugin-search-agent",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
5
5
  "main": "library.js",
6
6
  "author": "Racheli Bayfus",
@@ -37,7 +37,7 @@
37
37
  },
38
38
  "license": "MIT",
39
39
  "bugs": {
40
- "url": "https://github.com/nodebb/nodebb-plugin-search-agent/issues"
40
+ "url": "https://github.com/racheliK9201/nodebb-plugin-search-agent/issues"
41
41
  },
42
42
  "readmeFilename": "README.md",
43
43
  "nbbpm": {
package/plugin.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "id": "nodebb-plugin-search-agent",
3
3
  "name": "Search Agent",
4
4
  "description": "Floating chat assistant that finds relevant forum topics using TF-IDF similarity",
5
- "url": "https://github.com/NodeBB/nodebb-plugin-search-agent",
5
+ "url": "https://github.com/racheliK9201/nodebb-plugin-search-agent",
6
6
  "library": "./library.js",
7
7
  "hooks": [
8
8
  { "hook": "static:app.load", "method": "init" },
@@ -33,6 +33,39 @@ $(document).ready(function () {
33
33
  console.log('[search-agent] setupAdminPage: loading settings into form');
34
34
  Settings.load('search-agent', $('form.search-agent-settings'));
35
35
  });
36
+
37
+ // Re-index button
38
+ $('#btn-resync-embeddings').on('click', function () {
39
+ const $btn = $(this);
40
+ const $status = $('#resync-status');
41
+
42
+ if (!confirm('This will delete ALL stored embeddings and regenerate them. Continue?')) {
43
+ return;
44
+ }
45
+
46
+ $btn.prop('disabled', true).text('Working…');
47
+ $status.text('');
48
+
49
+ $.ajax({
50
+ url: config.relative_path + '/api/v3/plugins/search-agent/embeddings/resync',
51
+ type: 'POST',
52
+ headers: { 'x-csrf-token': config.csrf_token },
53
+ success: function (data) {
54
+ const msg = (data && data.response && data.response.message) || 'Re-index started.';
55
+ $status.html('<span class="text-success">' + msg + '</span>');
56
+ app.alertSuccess(msg);
57
+ },
58
+ error: function (xhr) {
59
+ const msg = (xhr.responseJSON && xhr.responseJSON.status && xhr.responseJSON.status.message)
60
+ || 'Re-index failed.';
61
+ $status.html('<span class="text-danger">' + msg + '</span>');
62
+ app.alertError(msg);
63
+ },
64
+ complete: function () {
65
+ $btn.prop('disabled', false).text('Clear & Re-index');
66
+ },
67
+ });
68
+ });
36
69
  }
37
70
  });
38
71
 
@@ -119,63 +119,79 @@ function buildPanelHtml() {
119
119
 
120
120
  // ─── Mount ────────────────────────────────────────────────────────────────────
121
121
 
122
+ let _mounting = false;
123
+
122
124
  function mountSearchAgent({ api, translator }) {
123
125
  console.log('[search-agent] mountSearchAgent: injecting FAB and chat panel into DOM');
124
- // Avoid double-mounting if somehow called twice
125
- if (document.getElementById('search-agent-fab')) {
126
- console.log('[search-agent] mountSearchAgent: widget already mounted, skipping');
126
+ // Avoid double-mounting if somehow called twice or concurrently
127
+ if (document.getElementById('search-agent-fab') || _mounting) {
128
+ console.log('[search-agent] mountSearchAgent: widget already mounted or mount in progress, skipping');
127
129
  return;
128
130
  }
129
131
 
132
+ _mounting = true;
130
133
  const wrapper = document.createElement('div');
131
134
  wrapper.className = 'search-agent-wrapper';
132
135
 
133
- // Translate [[search-agent:key]] markers before inserting into the DOM
134
- translator.translate(buildFabHtml() + buildPanelHtml(), function (translatedHtml) {
135
- wrapper.innerHTML = translatedHtml;
136
- document.body.appendChild(wrapper);
137
-
138
- const fab = document.getElementById('search-agent-fab');
139
- const panel = document.getElementById('search-agent-panel');
140
- const closeBtn = document.getElementById('search-agent-close');
141
- const input = document.getElementById('search-agent-input');
142
- const sendBtn = document.getElementById('search-agent-send');
143
- const messages = document.getElementById('search-agent-messages');
144
-
145
- // ── Toggle panel ──────────────────────────────────────────────────────────
146
- fab.addEventListener('click', () => {
147
- const isOpen = !panel.hidden;
148
- panel.hidden = isOpen;
149
- fab.classList.toggle('search-agent-fab--active', !isOpen);
150
- console.log(`[search-agent] FAB clicked — panel is now ${isOpen ? 'closed' : 'open'}`);
151
- if (!isOpen) {
152
- input.focus();
153
- }
154
- });
155
-
156
- closeBtn.addEventListener('click', () => {
157
- panel.hidden = true;
158
- fab.classList.remove('search-agent-fab--active');
159
- });
136
+ // Translate [[search-agent:key]] markers before inserting into the DOM.
137
+ // Use Promise style so a translation failure (e.g. language-file 404)
138
+ // still falls back to raw HTML rather than silently never mounting.
139
+ const rawHtml = buildFabHtml() + buildPanelHtml();
140
+ const translatePromise = translator.translate(rawHtml);
141
+ (translatePromise && typeof translatePromise.then === 'function'
142
+ ? translatePromise
143
+ : Promise.resolve(rawHtml)
144
+ )
145
+ .catch(function (err) {
146
+ console.warn('[search-agent] mountSearchAgent: translation failed, using raw HTML', err);
147
+ return rawHtml;
148
+ })
149
+ .then(function (translatedHtml) {
150
+ wrapper.innerHTML = translatedHtml;
151
+ _mounting = false;
152
+ document.body.appendChild(wrapper);
153
+
154
+ const fab = document.getElementById('search-agent-fab');
155
+ const panel = document.getElementById('search-agent-panel');
156
+ const closeBtn = document.getElementById('search-agent-close');
157
+ const input = document.getElementById('search-agent-input');
158
+ const sendBtn = document.getElementById('search-agent-send');
159
+ const messages = document.getElementById('search-agent-messages');
160
+
161
+ // ── Toggle panel ──────────────────────────────────────────────────────────
162
+ fab.addEventListener('click', () => {
163
+ const isOpen = !panel.hidden;
164
+ panel.hidden = isOpen;
165
+ fab.classList.toggle('search-agent-fab--active', !isOpen);
166
+ console.log(`[search-agent] FAB clicked — panel is now ${isOpen ? 'closed' : 'open'}`);
167
+ if (!isOpen) {
168
+ input.focus();
169
+ }
170
+ });
160
171
 
161
- // Close on Escape
162
- document.addEventListener('keydown', (e) => {
163
- if (e.key === 'Escape' && !panel.hidden) {
172
+ closeBtn.addEventListener('click', () => {
164
173
  panel.hidden = true;
165
174
  fab.classList.remove('search-agent-fab--active');
166
- }
167
- });
175
+ });
176
+
177
+ // Close on Escape
178
+ document.addEventListener('keydown', (e) => {
179
+ if (e.key === 'Escape' && !panel.hidden) {
180
+ panel.hidden = true;
181
+ fab.classList.remove('search-agent-fab--active');
182
+ }
183
+ });
168
184
 
169
- // ── Submit on Enter or button click ───────────────────────────────────────
170
- sendBtn.addEventListener('click', () => submitQuery({ api, translator, input, messages, sendBtn }));
185
+ // ── Submit on Enter or button click ───────────────────────────────────────
186
+ sendBtn.addEventListener('click', () => submitQuery({ api, translator, input, messages, sendBtn }));
171
187
 
172
- input.addEventListener('keydown', (e) => {
173
- if (e.key === 'Enter' && !e.shiftKey) {
174
- e.preventDefault();
175
- submitQuery({ api, translator, input, messages, sendBtn });
176
- }
188
+ input.addEventListener('keydown', (e) => {
189
+ if (e.key === 'Enter' && !e.shiftKey) {
190
+ e.preventDefault();
191
+ submitQuery({ api, translator, input, messages, sendBtn });
192
+ }
193
+ });
177
194
  });
178
- });
179
195
  }
180
196
 
181
197
  // ─── Chat logic ───────────────────────────────────────────────────────────────
@@ -108,12 +108,23 @@ async function runSync() {
108
108
  continue;
109
109
  }
110
110
 
111
+ // Fetch topic titles so embeddings carry the topic context
112
+ const Topics = require.main.require('./src/topics');
113
+ const uniqueTids = [...new Set(posts.map(p => parseInt(p.tid, 10)))];
114
+ const topicFields = await Topics.getTopicsFields(uniqueTids, ['tid', 'title']);
115
+ const titleByTid = Object.fromEntries(
116
+ topicFields.filter(t => t && t.tid).map(t => [String(t.tid), t.title || ''])
117
+ );
118
+
111
119
  // ------------------------------------------------------------------
112
- // 3. Generate embeddings for this sub-batch
120
+ // 3. Generate embeddings for this sub-batch (title + content for context)
113
121
  // ------------------------------------------------------------------
114
122
  let vectors;
115
123
  try {
116
- vectors = await embedBatch(posts.map(p => p.content));
124
+ vectors = await embedBatch(posts.map((p) => {
125
+ const title = titleByTid[String(p.tid)] || '';
126
+ return title ? `${title}\n\n${p.content}` : p.content;
127
+ }));
117
128
  } catch (err) {
118
129
  winston().error(`[search-agent] syncService: failed to generate embeddings (offset ${offset}): ${err.message}`);
119
130
  totalErrors++;
@@ -8,7 +8,12 @@ function winston() {
8
8
  return require.main.require('winston');
9
9
  }
10
10
 
11
- const TOP_K = 10;
11
+ // Fetch this many candidates from Orama — cast a wide net so the AI has enough to choose from
12
+ const TOP_K = 20;
13
+ // Absolute minimum cosine similarity — only filters pure noise (near-zero similarity).
14
+ // Do NOT raise this: the relevant result often scores lower than irrelevant ones.
15
+ // The AI re-ranker (which reads content) is the precision gate, not this floor.
16
+ const MIN_SCORE = 0.10;
12
17
  // Rebuild the Orama index after this interval (mirrors TF-IDF cache TTL)
13
18
  const INDEX_TTL_MS = 5 * 60 * 1000;
14
19
 
@@ -84,7 +89,7 @@ function invalidateIndex() {
84
89
  * @returns {Promise<Array<{ topic_id: number, post_id: number, content: string, score: number }>>}
85
90
  * Top results sorted by cosine similarity descending.
86
91
  */
87
- async function search(query) {
92
+ async function search(query, limit = TOP_K) {
88
93
  if (typeof query !== 'string' || query.trim() === '') {
89
94
  throw new Error('search() requires a non-empty query string');
90
95
  }
@@ -99,13 +104,19 @@ async function search(query) {
99
104
  const results = await oramaSearch(db, {
100
105
  mode: 'vector',
101
106
  vector: { value: queryEmbedding, property: 'embedding' },
102
- limit: TOP_K,
107
+ limit,
108
+ similarity: 0.1,
103
109
  includeVectors: false,
104
110
  });
105
111
 
106
112
  winston().verbose(`[search-agent] vectorSearchService: Orama returned ${results.hits.length} hit(s)`);
107
113
 
108
- return results.hits.map(hit => ({
114
+ const filtered = results.hits.filter(hit => hit.score >= MIN_SCORE);
115
+ winston().verbose(
116
+ `[search-agent] vectorSearchService: ${filtered.length}/${results.hits.length} hit(s) passed noise floor (MIN_SCORE=${MIN_SCORE})`
117
+ );
118
+
119
+ return filtered.map(hit => ({
109
120
  topic_id: hit.document.topic_id,
110
121
  post_id: hit.document.post_id,
111
122
  content: hit.document.content,
@@ -134,4 +134,21 @@ async function getMissingEmbeddings(postIds) {
134
134
  return missing;
135
135
  }
136
136
 
137
- module.exports = { saveEmbedding, getAllEmbeddings, findByPostId, getMissingEmbeddings };
137
+ /**
138
+ * Delete every stored embedding and reset the in-memory cache.
139
+ * Call this before a full re-index.
140
+ *
141
+ * @returns {Promise<number>} Number of documents deleted
142
+ */
143
+ async function clearAllEmbeddings() {
144
+ winston().info('[search-agent] vectorStore: clearing ALL embeddings from database…');
145
+ await ensureIndexes();
146
+ const col = getCollection();
147
+ const result = await col.deleteMany({});
148
+ _cache = [];
149
+ _cachePromise = null;
150
+ winston().info(`[search-agent] vectorStore: deleted ${result.deletedCount} embedding(s)`);
151
+ return result.deletedCount;
152
+ }
153
+
154
+ module.exports = { saveEmbedding, getAllEmbeddings, findByPostId, getMissingEmbeddings, clearAllEmbeddings };
@@ -181,6 +181,23 @@
181
181
  </button>
182
182
  </div>
183
183
  </div>
184
+
185
+ <div class="card mt-3">
186
+ <div class="card-header">Re-index Embeddings</div>
187
+ <div class="card-body">
188
+ <p class="card-text small">
189
+ Delete all stored embeddings and re-generate them with the
190
+ current strategy (title&nbsp;+ post content).
191
+ The re-index runs in the background — search continues working
192
+ via TF-IDF until it finishes.
193
+ </p>
194
+ <button id="btn-resync-embeddings" class="btn btn-danger btn-sm fw-semibold">
195
+ Clear &amp; Re-index
196
+ </button>
197
+ <div id="resync-status" class="mt-2 small"></div>
198
+ </div>
199
+ </div>
200
+ </div>
184
201
  </div>
185
202
  </div>
186
203
  </div>