nodebb-plugin-search-agent 0.0.92 → 0.0.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,10 +10,42 @@ let cachedTopicMap = null;
10
10
  let cacheTs = 0;
11
11
  const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
12
12
 
13
+ // ─── Search result cache ──────────────────────────────────────────────────────
14
+ // Caches final search results by normalised query string.
15
+ // Saves all AI calls for repeated queries within the TTL window.
16
+
17
+ const _searchCache = new Map();
18
+ const SEARCH_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
19
+ const SEARCH_CACHE_MAX = 200;
20
+
21
+ function _normalizeQuery(q) {
22
+ return q.trim().toLowerCase().replace(/\s+/g, ' ');
23
+ }
24
+
25
+ function _getSearchCache(queryText) {
26
+ const key = _normalizeQuery(queryText);
27
+ const entry = _searchCache.get(key);
28
+ if (entry && (Date.now() - entry.ts) < SEARCH_CACHE_TTL_MS) {
29
+ return entry.results;
30
+ }
31
+ _searchCache.delete(key);
32
+ return null;
33
+ }
34
+
35
+ function _setSearchCache(queryText, results) {
36
+ const key = _normalizeQuery(queryText);
37
+ _searchCache.set(key, { results, ts: Date.now() });
38
+ if (_searchCache.size > SEARCH_CACHE_MAX) {
39
+ // Evict the oldest entry
40
+ _searchCache.delete(_searchCache.keys().next().value);
41
+ }
42
+ }
43
+
13
44
  function invalidateCache() {
14
45
  cachedIndex = null;
15
46
  cachedTopicMap = null;
16
47
  cacheTs = 0;
48
+ _searchCache.clear();
17
49
  require.main.require('winston').info('[search-agent] Topic index cache invalidated.');
18
50
  }
19
51
 
@@ -30,6 +62,8 @@ async function getSettings() {
30
62
  openaiModel: (raw.openaiModel || 'gpt-4o-mini').trim(),
31
63
  // How many TF-IDF candidates to send to AI for re-ranking
32
64
  aiCandidates: Math.min(100, Math.max(5, parseInt(raw.aiCandidates, 10) || 30)),
65
+ // HyDE: generate a hypothetical answer before embedding — improves recall but costs one extra LLM call per search
66
+ hydeEnabled: raw.hydeEnabled === 'on',
33
67
  // Visibility: 'all' = all logged-in users, 'admins' = administrators only
34
68
  visibleTo: raw.visibleTo || 'all',
35
69
  // Whether guests (non-logged-in users) may use the widget
@@ -163,25 +197,32 @@ async function expandQueryWithHyDE(queryText, apiKey, model) {
163
197
  */
164
198
  async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults, snippetByTid = {}) {
165
199
  console.log('Re-ranking with AI:', { queryText, candidates: candidates.map(c => ({ tid: c.tid, title: (topicMap[String(c.tid)] || {}).title })) });
166
- const candidateList = candidates
167
- .map((c) => {
168
- const title = (topicMap[String(c.tid)] || {}).title || '';
169
- const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
170
- const snippet = raw.length > 0 ? `\n תוכן: "${raw.slice(0, 1500)}"` : '';
171
- return `[tid:${c.tid}] ${title}${snippet}`;
172
- })
173
- .join('\n\n');
200
+
201
+
202
+ // Embed the query and all candidate post snippets
203
+ const { embed, embedBatch } = require('../services/embeddingService');
204
+ const queryEmbedding = await embed(queryText);
205
+ const postSnippets = candidates.map((c) => {
206
+ const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
207
+ return raw.slice(0, 1500);
208
+ });
209
+ const postEmbeddings = await embedBatch(postSnippets);
210
+
211
+ // Format: [tid:..., embedding: [v1, v2, ...]]
212
+ const candidateList = candidates.map((c, i) => {
213
+ return `[tid:${c.tid}]\nembedding: [${postEmbeddings[i].slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]`;
214
+ }).join('\n\n');
174
215
 
175
216
  const systemPrompt =
176
217
  'אתה מסנן חיפוש פורום מחמיר. ' +
177
- 'לכל נושא ברשימה, דרג את הרלוונטיות שלו לשאלת המשתמש בסקלה 0-10: ' +
218
+ 'לכל מועמד ברשימה, דרג את הרלוונטיות של embedding הפוסט לembedding של השאלה בסקלה 0-10: ' +
178
219
  '10 = עונה ישירות ובאופן מלא. 7-9 = עונה על חלק משמעותי. 0-6 = לא רלוונטי. ' +
179
220
  'החזר אך ורק JSON תקני במבנה: {"tid": ציון, ...} — לדוגמה: {"42": 9, "15": 3}. ' +
180
221
  'אין להוסיף הסברים, טקסט נוסף, או עיצוב מחוץ ל-JSON.'+
181
- 'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר למה נושא עם ציון נמוך לא רלוונטי, כדי שנוכל להבין את שיקול הדעת של המודל.';
222
+ 'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר לפי מה נעשה הדירוג.';
182
223
 
183
224
  const userMessage =
184
- `שאלת המשתמש: "${queryText}"\n\nנושאים:\n${candidateList}`;
225
+ `embedding של שאלת המשתמש: [${queryEmbedding.slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]\n\nפוסטים:\n${candidateList}`;
185
226
 
186
227
  const response = await callOpenAI(apiKey, model, [
187
228
  { role: 'system', content: systemPrompt },
@@ -220,6 +261,14 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
220
261
  */
221
262
  async function searchTopics(queryText) {
222
263
  const winston = require.main.require('winston');
264
+
265
+ // ── Search result cache ───────────────────────────────────────────────────
266
+ const cachedResults = _getSearchCache(queryText);
267
+ if (cachedResults) {
268
+ winston.verbose(`[search-agent] Search cache hit for "${queryText}" (${cachedResults.length} results)`);
269
+ return cachedResults;
270
+ }
271
+
223
272
  const settings = await getSettings();
224
273
 
225
274
  // ── Semantic search (primary) ────────────────────────────────────────────
@@ -230,7 +279,7 @@ async function searchTopics(queryText) {
230
279
  // HyDE: replace the short raw query with a hypothetical answer so the
231
280
  // embedding matches post content more closely.
232
281
  let embeddingQuery = queryText;
233
- if (useAI) {
282
+ if (useAI && settings.hydeEnabled) {
234
283
  try {
235
284
  embeddingQuery = await expandQueryWithHyDE(
236
285
  queryText, settings.openaiApiKey, settings.openaiModel
@@ -314,6 +363,7 @@ async function searchTopics(queryText) {
314
363
 
315
364
  if (results.length > 0) {
316
365
  winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
366
+ _setSearchCache(queryText, results);
317
367
  return results;
318
368
  }
319
369
  }
@@ -369,6 +419,7 @@ async function searchTopics(queryText) {
369
419
  url: `/topic/${(topicMap[String(r.tid)] || {}).slug || r.tid}`,
370
420
  }));
371
421
  winston.info(`[search-agent] Final results: ${JSON.stringify(results.map(r => r.title))}`);
422
+ _setSearchCache(queryText, results);
372
423
  return results;
373
424
  }
374
425
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodebb-plugin-search-agent",
3
- "version": "0.0.92",
3
+ "version": "0.0.93",
4
4
  "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
5
5
  "main": "library.js",
6
6
  "author": "Racheli Bayfus",
@@ -16,6 +16,12 @@ const RETRY_DELAY_MS = 500;
16
16
  // Using 1.5 chars/token worst-case: 8000 tokens × 1.5 = 12 000 chars — gives a safe margin.
17
17
  const MAX_CHARS = 12000;
18
18
 
19
+ // ─── Embedding cache ──────────────────────────────────────────────────────────
20
+ // Avoids calling the embeddings API for the same text within a session.
21
+ // HyDE output varies, so the biggest wins come from repeated identical queries.
22
+ const _embedCache = new Map();
23
+ const EMBED_CACHE_MAX = 500;
24
+
19
25
  function truncate(text) {
20
26
  return text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) : text;
21
27
  }
@@ -104,10 +110,23 @@ async function embed(text) {
104
110
  }
105
111
 
106
112
  const safe = truncate(text);
113
+
114
+ if (_embedCache.has(safe)) {
115
+ winston().verbose('[search-agent] embeddingService: embedding cache hit');
116
+ return _embedCache.get(safe);
117
+ }
118
+
107
119
  winston().verbose(`[search-agent] embeddingService: generating embedding for text (${safe.length} chars)`);
108
120
  const response = await withRetry(() => requestEmbeddings(apiKey, safe));
109
121
  winston().verbose('[search-agent] embeddingService: embedding generated successfully');
110
- return response.data[0].embedding;
122
+ const embedding = response.data[0].embedding;
123
+
124
+ if (_embedCache.size >= EMBED_CACHE_MAX) {
125
+ _embedCache.delete(_embedCache.keys().next().value);
126
+ }
127
+ _embedCache.set(safe, embedding);
128
+
129
+ return embedding;
111
130
  }
112
131
 
113
132
  /**
@@ -73,6 +73,18 @@
73
73
  </label>
74
74
  </div>
75
75
 
76
+ <div class="form-check form-switch mb-3">
77
+ <input type="checkbox" class="form-check-input" id="hydeEnabled" name="hydeEnabled">
78
+ <label for="hydeEnabled" class="form-check-label">
79
+ Enable HyDE query expansion
80
+ </label>
81
+ <div class="form-text">
82
+ Generates a hypothetical forum post from the query before embedding — improves
83
+ recall for vague queries but adds <strong>one extra LLM call per search</strong>.
84
+ Disable to cut AI costs roughly in half.
85
+ </div>
86
+ </div>
87
+
76
88
  <div class="mb-3">
77
89
  <label class="form-label" for="openaiApiKey">OpenAI API Key</label>
78
90
  <input