nodebb-plugin-search-agent 0.0.92 → 0.0.931

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,9 @@
1
+ // ─── Token estimation helper ───────────────────────────────────────────────
2
+ function estimateTokens(str) {
3
+ // Roughly 4 chars/token for English, 2 for Hebrew/UTF-8, but 4 is safe for cost estimation
4
+ return Math.ceil(str.length / 4);
5
+ }
6
+
1
7
  'use strict';
2
8
 
3
9
  const https = require('https');
@@ -10,10 +16,42 @@ let cachedTopicMap = null;
10
16
  let cacheTs = 0;
11
17
  const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
12
18
 
19
+ // ─── Search result cache ──────────────────────────────────────────────────────
20
+ // Caches final search results by normalised query string.
21
+ // Saves all AI calls for repeated queries within the TTL window.
22
+
23
+ const _searchCache = new Map();
24
+ const SEARCH_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
25
+ const SEARCH_CACHE_MAX = 200;
26
+
27
+ function _normalizeQuery(q) {
28
+ return q.trim().toLowerCase().replace(/\s+/g, ' ');
29
+ }
30
+
31
+ function _getSearchCache(queryText) {
32
+ const key = _normalizeQuery(queryText);
33
+ const entry = _searchCache.get(key);
34
+ if (entry && (Date.now() - entry.ts) < SEARCH_CACHE_TTL_MS) {
35
+ return entry.results;
36
+ }
37
+ _searchCache.delete(key);
38
+ return null;
39
+ }
40
+
41
+ function _setSearchCache(queryText, results) {
42
+ const key = _normalizeQuery(queryText);
43
+ _searchCache.set(key, { results, ts: Date.now() });
44
+ if (_searchCache.size > SEARCH_CACHE_MAX) {
45
+ // Evict the oldest entry
46
+ _searchCache.delete(_searchCache.keys().next().value);
47
+ }
48
+ }
49
+
13
50
  function invalidateCache() {
14
51
  cachedIndex = null;
15
52
  cachedTopicMap = null;
16
53
  cacheTs = 0;
54
+ _searchCache.clear();
17
55
  require.main.require('winston').info('[search-agent] Topic index cache invalidated.');
18
56
  }
19
57
 
@@ -30,6 +68,8 @@ async function getSettings() {
30
68
  openaiModel: (raw.openaiModel || 'gpt-4o-mini').trim(),
31
69
  // How many TF-IDF candidates to send to AI for re-ranking
32
70
  aiCandidates: Math.min(100, Math.max(5, parseInt(raw.aiCandidates, 10) || 30)),
71
+ // HyDE: generate a hypothetical answer before embedding — improves recall but costs one extra LLM call per search
72
+ hydeEnabled: raw.hydeEnabled === 'on',
33
73
  // Visibility: 'all' = all logged-in users, 'admins' = administrators only
34
74
  visibleTo: raw.visibleTo || 'all',
35
75
  // Whether guests (non-logged-in users) may use the widget
@@ -163,25 +203,40 @@ async function expandQueryWithHyDE(queryText, apiKey, model) {
163
203
  */
164
204
  async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults, snippetByTid = {}) {
165
205
  console.log('Re-ranking with AI:', { queryText, candidates: candidates.map(c => ({ tid: c.tid, title: (topicMap[String(c.tid)] || {}).title })) });
166
- const candidateList = candidates
167
- .map((c) => {
168
- const title = (topicMap[String(c.tid)] || {}).title || '';
169
- const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
170
- const snippet = raw.length > 0 ? `\n תוכן: "${raw.slice(0, 1500)}"` : '';
171
- return `[tid:${c.tid}] ${title}${snippet}`;
172
- })
173
- .join('\n\n');
206
+
207
+
208
+ // Embed the query and all candidate post snippets
209
+
210
+ const { embed, embedBatch } = require('../services/embeddingService');
211
+ const queryEmbedding = await embed(queryText);
212
+ const postSnippets = candidates.map((c) => {
213
+ const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
214
+ return raw.slice(0, 1500);
215
+ });
216
+ const postEmbeddings = await embedBatch(postSnippets);
217
+
218
+ // Format: [tid:..., embedding: [v1, v2, ...]]
219
+ const candidateList = candidates.map((c, i) => {
220
+ return `[tid:${c.tid}]\nembedding: [${postEmbeddings[i].slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]`;
221
+ }).join('\n\n');
174
222
 
175
223
  const systemPrompt =
176
224
  'אתה מסנן חיפוש פורום מחמיר. ' +
177
- 'לכל נושא ברשימה, דרג את הרלוונטיות שלו לשאלת המשתמש בסקלה 0-10: ' +
225
+ 'לכל מועמד ברשימה, דרג את הרלוונטיות של embedding הפוסט לembedding של השאלה בסקלה 0-10: ' +
178
226
  '10 = עונה ישירות ובאופן מלא. 7-9 = עונה על חלק משמעותי. 0-6 = לא רלוונטי. ' +
179
227
  'החזר אך ורק JSON תקני במבנה: {"tid": ציון, ...} — לדוגמה: {"42": 9, "15": 3}. ' +
180
228
  'אין להוסיף הסברים, טקסט נוסף, או עיצוב מחוץ ל-JSON.'+
181
- 'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר למה נושא עם ציון נמוך לא רלוונטי, כדי שנוכל להבין את שיקול הדעת של המודל.';
229
+ 'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר לפי מה נעשה הדירוג.';
182
230
 
183
231
  const userMessage =
184
- `שאלת המשתמש: "${queryText}"\n\nנושאים:\n${candidateList}`;
232
+ `embedding של שאלת המשתמש: [${queryEmbedding.slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]\n\nפוסטים:\n${candidateList}`;
233
+
234
+ // --- Token count logging ---
235
+ const totalEmbeddingChars = queryText.length + postSnippets.reduce((sum, s) => sum + s.length, 0);
236
+ const embeddingTokens = estimateTokens(queryText) + postSnippets.reduce((sum, s) => sum + estimateTokens(s), 0);
237
+ const llmPromptTokens = estimateTokens(systemPrompt) + estimateTokens(userMessage);
238
+ const winston = require.main.require('winston');
239
+ winston.info(`[search-agent] Token usage: embedding API ≈ ${embeddingTokens} tokens, LLM prompt ≈ ${llmPromptTokens} tokens (for this search)`);
185
240
 
186
241
  const response = await callOpenAI(apiKey, model, [
187
242
  { role: 'system', content: systemPrompt },
@@ -220,6 +275,14 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
220
275
  */
221
276
  async function searchTopics(queryText) {
222
277
  const winston = require.main.require('winston');
278
+
279
+ // ── Search result cache ───────────────────────────────────────────────────
280
+ const cachedResults = _getSearchCache(queryText);
281
+ if (cachedResults) {
282
+ winston.verbose(`[search-agent] Search cache hit for "${queryText}" (${cachedResults.length} results)`);
283
+ return cachedResults;
284
+ }
285
+
223
286
  const settings = await getSettings();
224
287
 
225
288
  // ── Semantic search (primary) ────────────────────────────────────────────
@@ -230,7 +293,7 @@ async function searchTopics(queryText) {
230
293
  // HyDE: replace the short raw query with a hypothetical answer so the
231
294
  // embedding matches post content more closely.
232
295
  let embeddingQuery = queryText;
233
- if (useAI) {
296
+ if (useAI && settings.hydeEnabled) {
234
297
  try {
235
298
  embeddingQuery = await expandQueryWithHyDE(
236
299
  queryText, settings.openaiApiKey, settings.openaiModel
@@ -314,6 +377,7 @@ async function searchTopics(queryText) {
314
377
 
315
378
  if (results.length > 0) {
316
379
  winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
380
+ _setSearchCache(queryText, results);
317
381
  return results;
318
382
  }
319
383
  }
@@ -369,6 +433,7 @@ async function searchTopics(queryText) {
369
433
  url: `/topic/${(topicMap[String(r.tid)] || {}).slug || r.tid}`,
370
434
  }));
371
435
  winston.info(`[search-agent] Final results: ${JSON.stringify(results.map(r => r.title))}`);
436
+ _setSearchCache(queryText, results);
372
437
  return results;
373
438
  }
374
439
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodebb-plugin-search-agent",
3
- "version": "0.0.92",
3
+ "version": "0.0.931",
4
4
  "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
5
5
  "main": "library.js",
6
6
  "author": "Racheli Bayfus",
@@ -16,6 +16,12 @@ const RETRY_DELAY_MS = 500;
16
16
  // Using 1.5 chars/token worst-case: 8000 tokens × 1.5 = 12 000 chars — gives a safe margin.
17
17
  const MAX_CHARS = 12000;
18
18
 
19
+ // ─── Embedding cache ──────────────────────────────────────────────────────────
20
+ // Avoids calling the embeddings API for the same text within a session.
21
+ // HyDE output varies, so the biggest wins come from repeated identical queries.
22
+ const _embedCache = new Map();
23
+ const EMBED_CACHE_MAX = 500;
24
+
19
25
  function truncate(text) {
20
26
  return text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) : text;
21
27
  }
@@ -104,10 +110,23 @@ async function embed(text) {
104
110
  }
105
111
 
106
112
  const safe = truncate(text);
113
+
114
+ if (_embedCache.has(safe)) {
115
+ winston().verbose('[search-agent] embeddingService: embedding cache hit');
116
+ return _embedCache.get(safe);
117
+ }
118
+
107
119
  winston().verbose(`[search-agent] embeddingService: generating embedding for text (${safe.length} chars)`);
108
120
  const response = await withRetry(() => requestEmbeddings(apiKey, safe));
109
121
  winston().verbose('[search-agent] embeddingService: embedding generated successfully');
110
- return response.data[0].embedding;
122
+ const embedding = response.data[0].embedding;
123
+
124
+ if (_embedCache.size >= EMBED_CACHE_MAX) {
125
+ _embedCache.delete(_embedCache.keys().next().value);
126
+ }
127
+ _embedCache.set(safe, embedding);
128
+
129
+ return embedding;
111
130
  }
112
131
 
113
132
  /**
@@ -73,6 +73,18 @@
73
73
  </label>
74
74
  </div>
75
75
 
76
+ <div class="form-check form-switch mb-3">
77
+ <input type="checkbox" class="form-check-input" id="hydeEnabled" name="hydeEnabled">
78
+ <label for="hydeEnabled" class="form-check-label">
79
+ Enable HyDE query expansion
80
+ </label>
81
+ <div class="form-text">
82
+ Generates a hypothetical forum post from the query before embedding — improves
83
+ recall for vague queries but adds <strong>one extra LLM call per search</strong>.
84
+ Disable to cut AI costs roughly in half.
85
+ </div>
86
+ </div>
87
+
76
88
  <div class="mb-3">
77
89
  <label class="form-label" for="openaiApiKey">OpenAI API Key</label>
78
90
  <input