nodebb-plugin-search-agent 0.0.91 → 0.0.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,10 +10,42 @@ let cachedTopicMap = null;
10
10
  let cacheTs = 0;
11
11
  const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
12
12
 
13
+ // ─── Search result cache ──────────────────────────────────────────────────────
14
+ // Caches final search results by normalised query string.
15
+ // Saves all AI calls for repeated queries within the TTL window.
16
+
17
+ const _searchCache = new Map();
18
+ const SEARCH_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
19
+ const SEARCH_CACHE_MAX = 200;
20
+
21
+ function _normalizeQuery(q) {
22
+ return q.trim().toLowerCase().replace(/\s+/g, ' ');
23
+ }
24
+
25
+ function _getSearchCache(queryText) {
26
+ const key = _normalizeQuery(queryText);
27
+ const entry = _searchCache.get(key);
28
+ if (entry && (Date.now() - entry.ts) < SEARCH_CACHE_TTL_MS) {
29
+ return entry.results;
30
+ }
31
+ _searchCache.delete(key);
32
+ return null;
33
+ }
34
+
35
+ function _setSearchCache(queryText, results) {
36
+ const key = _normalizeQuery(queryText);
37
+ _searchCache.set(key, { results, ts: Date.now() });
38
+ if (_searchCache.size > SEARCH_CACHE_MAX) {
39
+ // Evict the oldest entry
40
+ _searchCache.delete(_searchCache.keys().next().value);
41
+ }
42
+ }
43
+
13
44
  function invalidateCache() {
14
45
  cachedIndex = null;
15
46
  cachedTopicMap = null;
16
47
  cacheTs = 0;
48
+ _searchCache.clear();
17
49
  require.main.require('winston').info('[search-agent] Topic index cache invalidated.');
18
50
  }
19
51
 
@@ -30,6 +62,8 @@ async function getSettings() {
30
62
  openaiModel: (raw.openaiModel || 'gpt-4o-mini').trim(),
31
63
  // How many TF-IDF candidates to send to AI for re-ranking
32
64
  aiCandidates: Math.min(100, Math.max(5, parseInt(raw.aiCandidates, 10) || 30)),
65
+ // HyDE: generate a hypothetical answer before embedding — improves recall but costs one extra LLM call per search
66
+ hydeEnabled: raw.hydeEnabled === 'on',
33
67
  // Visibility: 'all' = all logged-in users, 'admins' = administrators only
34
68
  visibleTo: raw.visibleTo || 'all',
35
69
  // Whether guests (non-logged-in users) may use the widget
@@ -163,31 +197,40 @@ async function expandQueryWithHyDE(queryText, apiKey, model) {
163
197
  */
164
198
  async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults, snippetByTid = {}) {
165
199
  console.log('Re-ranking with AI:', { queryText, candidates: candidates.map(c => ({ tid: c.tid, title: (topicMap[String(c.tid)] || {}).title })) });
166
- const candidateList = candidates
167
- .map((c) => {
168
- const title = (topicMap[String(c.tid)] || {}).title || '';
169
- const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
170
- const snippet = raw.length > 0 ? `\n תוכן: "${raw.slice(0, 1500)}"` : '';
171
- return `[tid:${c.tid}] ${title}${snippet}`;
172
- })
173
- .join('\n\n');
200
+
201
+
202
+ // Embed the query and all candidate post snippets
203
+ const { embed, embedBatch } = require('../services/embeddingService');
204
+ const queryEmbedding = await embed(queryText);
205
+ const postSnippets = candidates.map((c) => {
206
+ const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
207
+ return raw.slice(0, 1500);
208
+ });
209
+ const postEmbeddings = await embedBatch(postSnippets);
210
+
211
+ // Format: [tid:..., embedding: [v1, v2, ...]]
212
+ const candidateList = candidates.map((c, i) => {
213
+ return `[tid:${c.tid}]\nembedding: [${postEmbeddings[i].slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]`;
214
+ }).join('\n\n');
174
215
 
175
216
  const systemPrompt =
176
217
  'אתה מסנן חיפוש פורום מחמיר. ' +
177
- 'לכל נושא ברשימה, דרג את הרלוונטיות שלו לשאלת המשתמש בסקלה 0-10: ' +
218
+ 'לכל מועמד ברשימה, דרג את הרלוונטיות של embedding הפוסט לembedding של השאלה בסקלה 0-10: ' +
178
219
  '10 = עונה ישירות ובאופן מלא. 7-9 = עונה על חלק משמעותי. 0-6 = לא רלוונטי. ' +
179
220
  'החזר אך ורק JSON תקני במבנה: {"tid": ציון, ...} — לדוגמה: {"42": 9, "15": 3}. ' +
180
221
  'אין להוסיף הסברים, טקסט נוסף, או עיצוב מחוץ ל-JSON.'+
181
- 'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר למה נושא עם ציון נמוך לא רלוונטי, כדי שנוכל להבין את שיקול הדעת של המודל.';
222
+ 'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר לפי מה נעשה הדירוג.';
182
223
 
183
224
  const userMessage =
184
- `שאלת המשתמש: "${queryText}"\n\nנושאים:\n${candidateList}`;
225
+ `embedding של שאלת המשתמש: [${queryEmbedding.slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]\n\nפוסטים:\n${candidateList}`;
185
226
 
186
227
  const response = await callOpenAI(apiKey, model, [
187
228
  { role: 'system', content: systemPrompt },
188
229
  { role: 'user', content: userMessage },
189
230
  ]);
190
231
 
232
+ console.log('AI scoring response:', response.choices[0].message.content);
233
+
191
234
  const content = (response.choices[0].message.content || '').trim();
192
235
 
193
236
  // Extract the JSON object from the response
@@ -218,6 +261,14 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
218
261
  */
219
262
  async function searchTopics(queryText) {
220
263
  const winston = require.main.require('winston');
264
+
265
+ // ── Search result cache ───────────────────────────────────────────────────
266
+ const cachedResults = _getSearchCache(queryText);
267
+ if (cachedResults) {
268
+ winston.verbose(`[search-agent] Search cache hit for "${queryText}" (${cachedResults.length} results)`);
269
+ return cachedResults;
270
+ }
271
+
221
272
  const settings = await getSettings();
222
273
 
223
274
  // ── Semantic search (primary) ────────────────────────────────────────────
@@ -228,7 +279,7 @@ async function searchTopics(queryText) {
228
279
  // HyDE: replace the short raw query with a hypothetical answer so the
229
280
  // embedding matches post content more closely.
230
281
  let embeddingQuery = queryText;
231
- if (useAI) {
282
+ if (useAI && settings.hydeEnabled) {
232
283
  try {
233
284
  embeddingQuery = await expandQueryWithHyDE(
234
285
  queryText, settings.openaiApiKey, settings.openaiModel
@@ -312,6 +363,7 @@ async function searchTopics(queryText) {
312
363
 
313
364
  if (results.length > 0) {
314
365
  winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
366
+ _setSearchCache(queryText, results);
315
367
  return results;
316
368
  }
317
369
  }
@@ -367,6 +419,7 @@ async function searchTopics(queryText) {
367
419
  url: `/topic/${(topicMap[String(r.tid)] || {}).slug || r.tid}`,
368
420
  }));
369
421
  winston.info(`[search-agent] Final results: ${JSON.stringify(results.map(r => r.title))}`);
422
+ _setSearchCache(queryText, results);
370
423
  return results;
371
424
  }
372
425
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodebb-plugin-search-agent",
3
- "version": "0.0.91",
3
+ "version": "0.0.93",
4
4
  "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
5
5
  "main": "library.js",
6
6
  "author": "Racheli Bayfus",
@@ -11,8 +11,16 @@ const OPENAI_EMBEDDINGS_PATH = '/v1/embeddings';
11
11
  const EMBEDDING_MODEL = 'text-embedding-3-small';
12
12
  const MAX_RETRIES = 3;
13
13
  const RETRY_DELAY_MS = 500;
14
- // text-embedding-3-small supports 8 192 tokens; ~4 chars/token → cap at 30 000 chars (~7 500 tokens)
15
- const MAX_CHARS = 30000;
14
+ // text-embedding-3-small supports 8 192 tokens.
15
+ // Hebrew/non-ASCII text tokenizes at ~1.5–2 chars/token (UTF-8 multibyte).
16
+ // Using 1.5 chars/token worst-case: 8000 tokens × 1.5 = 12 000 chars — gives a safe margin.
17
+ const MAX_CHARS = 12000;
18
+
19
+ // ─── Embedding cache ──────────────────────────────────────────────────────────
20
+ // Avoids calling the embeddings API for the same text within a session.
21
+ // HyDE output varies, so the biggest wins come from repeated identical queries.
22
+ const _embedCache = new Map();
23
+ const EMBED_CACHE_MAX = 500;
16
24
 
17
25
  function truncate(text) {
18
26
  return text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) : text;
@@ -102,10 +110,23 @@ async function embed(text) {
102
110
  }
103
111
 
104
112
  const safe = truncate(text);
113
+
114
+ if (_embedCache.has(safe)) {
115
+ winston().verbose('[search-agent] embeddingService: embedding cache hit');
116
+ return _embedCache.get(safe);
117
+ }
118
+
105
119
  winston().verbose(`[search-agent] embeddingService: generating embedding for text (${safe.length} chars)`);
106
120
  const response = await withRetry(() => requestEmbeddings(apiKey, safe));
107
121
  winston().verbose('[search-agent] embeddingService: embedding generated successfully');
108
- return response.data[0].embedding;
122
+ const embedding = response.data[0].embedding;
123
+
124
+ if (_embedCache.size >= EMBED_CACHE_MAX) {
125
+ _embedCache.delete(_embedCache.keys().next().value);
126
+ }
127
+ _embedCache.set(safe, embedding);
128
+
129
+ return embedding;
109
130
  }
110
131
 
111
132
  /**
@@ -73,6 +73,18 @@
73
73
  </label>
74
74
  </div>
75
75
 
76
+ <div class="form-check form-switch mb-3">
77
+ <input type="checkbox" class="form-check-input" id="hydeEnabled" name="hydeEnabled">
78
+ <label for="hydeEnabled" class="form-check-label">
79
+ Enable HyDE query expansion
80
+ </label>
81
+ <div class="form-text">
82
+ Generates a hypothetical forum post from the query before embedding — improves
83
+ recall for vague queries but adds <strong>one extra LLM call per search</strong>.
84
+ Disable to cut AI costs roughly in half.
85
+ </div>
86
+ </div>
87
+
76
88
  <div class="mb-3">
77
89
  <label class="form-label" for="openaiApiKey">OpenAI API Key</label>
78
90
  <input