npm - nodebb-plugin-search-agent - Versions diffs - 0.0.92 → 0.0.931 - Mend

nodebb-plugin-search-agent 0.0.92 → 0.0.931

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/lib/searchHandler.js +77 -12
package/package.json +1 -1
package/services/embeddingService.js +20 -1
package/templates/admin/plugins/search-agent.tpl +12 -0

package/lib/searchHandler.js CHANGED Viewed

@@ -1,3 +1,9 @@
+// ─── Token estimation helper ───────────────────────────────────────────────
+function estimateTokens(str) {
+	// Roughly 4 chars/token for English, 2 for Hebrew/UTF-8, but 4 is safe for cost estimation
+	return Math.ceil(str.length / 4);
+}
 'use strict';
 const https = require('https');
@@ -10,10 +16,42 @@ let cachedTopicMap = null;
 let cacheTs = 0;
 const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
+// ─── Search result cache ──────────────────────────────────────────────────────
+// Caches final search results by normalised query string.
+// Saves all AI calls for repeated queries within the TTL window.
+const _searchCache = new Map();
+const SEARCH_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
+const SEARCH_CACHE_MAX = 200;
+function _normalizeQuery(q) {
+	return q.trim().toLowerCase().replace(/\s+/g, ' ');
+}
+function _getSearchCache(queryText) {
+	const key = _normalizeQuery(queryText);
+	const entry = _searchCache.get(key);
+	if (entry && (Date.now() - entry.ts) < SEARCH_CACHE_TTL_MS) {
+		return entry.results;
+	}
+	_searchCache.delete(key);
+	return null;
+}
+function _setSearchCache(queryText, results) {
+	const key = _normalizeQuery(queryText);
+	_searchCache.set(key, { results, ts: Date.now() });
+	if (_searchCache.size > SEARCH_CACHE_MAX) {
+		// Evict the oldest entry
+		_searchCache.delete(_searchCache.keys().next().value);
+	}
+}
 function invalidateCache() {
 	cachedIndex = null;
 	cachedTopicMap = null;
 	cacheTs = 0;
+	_searchCache.clear();
 	require.main.require('winston').info('[search-agent] Topic index cache invalidated.');
 }
@@ -30,6 +68,8 @@ async function getSettings() {
 		openaiModel: (raw.openaiModel || 'gpt-4o-mini').trim(),
 		// How many TF-IDF candidates to send to AI for re-ranking
 		aiCandidates: Math.min(100, Math.max(5, parseInt(raw.aiCandidates, 10) || 30)),
+		// HyDE: generate a hypothetical answer before embedding — improves recall but costs one extra LLM call per search
+		hydeEnabled: raw.hydeEnabled === 'on',
 		// Visibility: 'all' = all logged-in users, 'admins' = administrators only
 		visibleTo: raw.visibleTo || 'all',
 		// Whether guests (non-logged-in users) may use the widget
@@ -163,25 +203,40 @@ async function expandQueryWithHyDE(queryText, apiKey, model) {
  */
 async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxResults, snippetByTid = {}) {
 	console.log('Re-ranking with AI:', { queryText, candidates: candidates.map(c => ({ tid: c.tid, title: (topicMap[String(c.tid)] || {}).title })) });
-    const candidateList = candidates
-		.map((c) => {
-			const title = (topicMap[String(c.tid)] || {}).title || '';
-			const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
-			const snippet = raw.length > 0 ? `\n   תוכן: "${raw.slice(0, 1500)}"` : '';
-			return `[tid:${c.tid}] ${title}${snippet}`;
-		})
-		.join('\n\n');
+	// Embed the query and all candidate post snippets
+	const { embed, embedBatch } = require('../services/embeddingService');
+	const queryEmbedding = await embed(queryText);
+	const postSnippets = candidates.map((c) => {
+		const raw = (snippetByTid[String(c.tid)] || '').replace(/<[^>]*>/g, ' ').replace(/[ \t]+/g, ' ').trim();
+		return raw.slice(0, 1500);
+	});
+	const postEmbeddings = await embedBatch(postSnippets);
+	// Format: [tid:..., embedding: [v1, v2, ...]]
+	const candidateList = candidates.map((c, i) => {
+		return `[tid:${c.tid}]\nembedding: [${postEmbeddings[i].slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]`;
+	}).join('\n\n');
 	const systemPrompt =
 		'אתה מסנן חיפוש פורום מחמיר. ' +
-		'לכל נושא ברשימה, דרג את הרלוונטיות שלו לשאלת המשתמש בסקלה 0-10: ' +
+		'לכל מועמד ברשימה, דרג את הרלוונטיות של embedding הפוסט לembedding של השאלה בסקלה 0-10: ' +
 		'10 = עונה ישירות ובאופן מלא. 7-9 = עונה על חלק משמעותי. 0-6 = לא רלוונטי. ' +
 		'החזר אך ורק JSON תקני במבנה: {"tid": ציון, ...} — לדוגמה: {"42": 9, "15": 3}. ' +
 		'אין להוסיף הסברים, טקסט נוסף, או עיצוב מחוץ ל-JSON.'+
-        'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר למה נושא עם ציון נמוך לא רלוונטי, כדי שנוכל להבין את שיקול הדעת של המודל.';
+		'הוסף שדה נוסף "scoreExplanation" עם משפט קצר שמסביר לפי מה נעשה הדירוג.';
 	const userMessage =
-		`שאלת המשתמש: "${queryText}"\n\nנושאים:\n${candidateList}`;
+		`embedding של שאלת המשתמש: [${queryEmbedding.slice(0, 8).map(x => x.toFixed(4)).join(', ')} ...]\n\nפוסטים:\n${candidateList}`;
+	// --- Token count logging ---
+	const totalEmbeddingChars = queryText.length + postSnippets.reduce((sum, s) => sum + s.length, 0);
+	const embeddingTokens = estimateTokens(queryText) + postSnippets.reduce((sum, s) => sum + estimateTokens(s), 0);
+	const llmPromptTokens = estimateTokens(systemPrompt) + estimateTokens(userMessage);
+	const winston = require.main.require('winston');
+	winston.info(`[search-agent] Token usage: embedding API ≈ ${embeddingTokens} tokens, LLM prompt ≈ ${llmPromptTokens} tokens (for this search)`);
 	const response = await callOpenAI(apiKey, model, [
 		{ role: 'system', content: systemPrompt },
@@ -220,6 +275,14 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
  */
 async function searchTopics(queryText) {
 	const winston = require.main.require('winston');
+	// ── Search result cache ───────────────────────────────────────────────────
+	const cachedResults = _getSearchCache(queryText);
+	if (cachedResults) {
+		winston.verbose(`[search-agent] Search cache hit for "${queryText}" (${cachedResults.length} results)`);
+		return cachedResults;
+	}
 	const settings = await getSettings();
 	// ── Semantic search (primary) ────────────────────────────────────────────
@@ -230,7 +293,7 @@ async function searchTopics(queryText) {
 		// HyDE: replace the short raw query with a hypothetical answer so the
 		// embedding matches post content more closely.
 		let embeddingQuery = queryText;
-		if (useAI) {
+		if (useAI && settings.hydeEnabled) {
 			try {
 				embeddingQuery = await expandQueryWithHyDE(
 					queryText, settings.openaiApiKey, settings.openaiModel
@@ -314,6 +377,7 @@ async function searchTopics(queryText) {
 			if (results.length > 0) {
 				winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
+				_setSearchCache(queryText, results);
 				return results;
 			}
 		}
@@ -369,6 +433,7 @@ async function searchTopics(queryText) {
 		url: `/topic/${(topicMap[String(r.tid)] || {}).slug || r.tid}`,
 	}));
 	winston.info(`[search-agent] Final results: ${JSON.stringify(results.map(r => r.title))}`);
+	_setSearchCache(queryText, results);
 	return results;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nodebb-plugin-search-agent",
-  "version": "0.0.92",
+  "version": "0.0.931",
   "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
   "main": "library.js",
   "author": "Racheli Bayfus",

package/services/embeddingService.js CHANGED Viewed

@@ -16,6 +16,12 @@ const RETRY_DELAY_MS = 500;
 // Using 1.5 chars/token worst-case: 8000 tokens × 1.5 = 12 000 chars — gives a safe margin.
 const MAX_CHARS = 12000;
+// ─── Embedding cache ──────────────────────────────────────────────────────────
+// Avoids calling the embeddings API for the same text within a session.
+// HyDE output varies, so the biggest wins come from repeated identical queries.
+const _embedCache = new Map();
+const EMBED_CACHE_MAX = 500;
 function truncate(text) {
 	return text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) : text;
 }
@@ -104,10 +110,23 @@ async function embed(text) {
 	}
 	const safe = truncate(text);
+	if (_embedCache.has(safe)) {
+		winston().verbose('[search-agent] embeddingService: embedding cache hit');
+		return _embedCache.get(safe);
+	}
 	winston().verbose(`[search-agent] embeddingService: generating embedding for text (${safe.length} chars)`);
 	const response = await withRetry(() => requestEmbeddings(apiKey, safe));
 	winston().verbose('[search-agent] embeddingService: embedding generated successfully');
-	return response.data[0].embedding;
+	const embedding = response.data[0].embedding;
+	if (_embedCache.size >= EMBED_CACHE_MAX) {
+		_embedCache.delete(_embedCache.keys().next().value);
+	}
+	_embedCache.set(safe, embedding);
+	return embedding;
 }
 /**

package/templates/admin/plugins/search-agent.tpl CHANGED Viewed

@@ -73,6 +73,18 @@
 						</label>
 					</div>
+					<div class="form-check form-switch mb-3">
+						<input type="checkbox" class="form-check-input" id="hydeEnabled" name="hydeEnabled">
+						<label for="hydeEnabled" class="form-check-label">
+							Enable HyDE query expansion
+						</label>
+						<div class="form-text">
+							Generates a hypothetical forum post from the query before embedding — improves
+							recall for vague queries but adds <strong>one extra LLM call per search</strong>.
+							Disable to cut AI costs roughly in half.
+						</div>
+					</div>
 					<div class="mb-3">
 						<label class="form-label" for="openaiApiKey">OpenAI API Key</label>
 						<input