npm - nodebb-plugin-search-agent - Versions diffs - 0.0.9 → 0.0.92 - Mend

nodebb-plugin-search-agent 0.0.9 → 0.0.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/lib/searchHandler.js +2 -0
package/package.json +1 -1
package/services/embeddingService.js +14 -4

package/lib/searchHandler.js CHANGED Viewed

@@ -188,6 +188,8 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
 		{ role: 'user', content: userMessage },
 	]);
+    console.log('AI scoring response:', response.choices[0].message.content);
 	const content = (response.choices[0].message.content || '').trim();
 	// Extract the JSON object from the response

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "nodebb-plugin-search-agent",
-  "version": "0.0.9",
+  "version": "0.0.92",
   "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
   "main": "library.js",
   "author": "Racheli Bayfus",

package/services/embeddingService.js CHANGED Viewed

@@ -11,6 +11,14 @@ const OPENAI_EMBEDDINGS_PATH = '/v1/embeddings';
 const EMBEDDING_MODEL = 'text-embedding-3-small';
 const MAX_RETRIES = 3;
 const RETRY_DELAY_MS = 500;
+// text-embedding-3-small supports 8 192 tokens.
+// Hebrew/non-ASCII text tokenizes at ~1.5–2 chars/token (UTF-8 multibyte).
+// Using 1.5 chars/token worst-case: 8000 tokens × 1.5 = 12 000 chars — gives a safe margin.
+const MAX_CHARS = 12000;
+function truncate(text) {
+	return text.length > MAX_CHARS ? text.slice(0, MAX_CHARS) : text;
+}
 /**
  * Performs an HTTPS POST request to the OpenAI embeddings endpoint.
@@ -95,8 +103,9 @@ async function embed(text) {
 		throw new Error('OPENAI_API_KEY environment variable is not set');
 	}
-	winston().verbose(`[search-agent] embeddingService: generating embedding for text (${text.length} chars)`);
-	const response = await withRetry(() => requestEmbeddings(apiKey, text));
+	const safe = truncate(text);
+	winston().verbose(`[search-agent] embeddingService: generating embedding for text (${safe.length} chars)`);
+	const response = await withRetry(() => requestEmbeddings(apiKey, safe));
 	winston().verbose('[search-agent] embeddingService: embedding generated successfully');
 	return response.data[0].embedding;
 }
@@ -122,8 +131,9 @@ async function embedBatch(texts) {
 		throw new Error('OPENAI_API_KEY environment variable is not set');
 	}
-	winston().verbose(`[search-agent] embeddingService: generating batch embeddings for ${texts.length} text(s)`);
-	const response = await withRetry(() => requestEmbeddings(apiKey, texts));
+	const safeTexts = texts.map(truncate);
+	winston().verbose(`[search-agent] embeddingService: generating batch embeddings for ${safeTexts.length} text(s)`);
+	const response = await withRetry(() => requestEmbeddings(apiKey, safeTexts));
 	winston().verbose(`[search-agent] embeddingService: batch embeddings generated successfully (${texts.length} vector(s))`);
 	// OpenAI returns items sorted by index field, but sort explicitly to be safe