npm - nodebb-plugin-search-agent - Versions diffs - 0.0.2 → 0.0.4 - Mend

nodebb-plugin-search-agent 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/lib/cosineSimilarity.js +42 -0
package/lib/searchHandler.js +36 -1
package/library.js +44 -8
package/package.json +8 -2
package/plugin.json +3 -1
package/services/embeddingService.js +135 -0
package/services/realtimeService.js +42 -0
package/services/syncService.js +175 -0
package/services/vectorSearchService.js +116 -0
package/services/vectorStore.js +137 -0
package/test/runTests.js +71 -0
package/test/testCosine.js +15 -0
package/test/testEmbedding.js +14 -0
package/test/testSearch.js +16 -0
package/test/testSync.js +11 -0
package/test/testVectorStore.js +20 -0
package/nodebb-plugin-search-agent-0.0.1.tgz +0 -0

package/lib/cosineSimilarity.js ADDED Viewed

@@ -0,0 +1,42 @@
+'use strict';
+/**
+ * Computes the cosine similarity between two numeric vectors.
+ * Handles mismatched lengths by using the shorter vector's dimension.
+ * Returns 0 if either vector has zero magnitude.
+ *
+ * @param {number[]} a
+ * @param {number[]} b
+ * @returns {number} similarity in [-1, 1]
+ */
+function cosineSimilarity(a, b) {
+	const len = Math.min(a.length, b.length);
+	let dot = 0;
+	let magA = 0;
+	let magB = 0;
+	for (let i = 0; i < len; i++) {
+		dot += a[i] * b[i];
+		magA += a[i] * a[i];
+		magB += b[i] * b[i];
+	}
+	const denom = Math.sqrt(magA) * Math.sqrt(magB);
+	return denom === 0 ? 0 : dot / denom;
+}
+/**
+ * Ranks items by cosine similarity to a query embedding.
+ * Each item must have an `embedding` property (number[]).
+ *
+ * @param {number[]} queryEmbedding
+ * @param {Array<{embedding: number[], [key: string]: any}>} items
+ * @returns {Array<{item: object, score: number}>} sorted descending by score
+ */
+function rankBySimilarity(queryEmbedding, items) {
+	return items
+		.map(item => ({ item, score: cosineSimilarity(queryEmbedding, item.embedding) }))
+		.sort((a, b) => b.score - a.score);
+}
+module.exports = { cosineSimilarity, rankBySimilarity };

package/lib/searchHandler.js CHANGED Viewed

@@ -166,7 +166,7 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
 /**
  * Search forum topics for the given query text.
- * Uses TF-IDF for candidate selection and optionally OpenAI for re-ranking.
+ * Tries semantic (vector) search first; falls back to TF-IDF on failure or empty results.
  *
  * @param {string} queryText
  * @returns {Promise<{ tid: number|string, title: string, url: string }[]>}
@@ -174,6 +174,41 @@ async function reRankWithAI(queryText, candidates, topicMap, apiKey, model, maxR
 async function searchTopics(queryText) {
 	const winston = require.main.require('winston');
 	const settings = await getSettings();
+	// ── Semantic search (primary) ────────────────────────────────────────────
+	try {
+		const { search: vectorSearch } = require('../services/vectorSearchService');
+		const vectorResults = await vectorSearch(queryText);
+		if (vectorResults.length > 0) {
+			const Topics = require.main.require('./src/topics');
+			const tids = [...new Set(vectorResults.map(r => r.topic_id))];
+			const topics = await Topics.getTopicsFields(tids, ['tid', 'title', 'slug', 'deleted']);
+			const topicByTid = Object.fromEntries(
+				topics.filter(t => t && t.tid && !t.deleted).map(t => [String(t.tid), t])
+			);
+			const results = vectorResults
+				.filter(r => topicByTid[String(r.topic_id)])
+				.map(r => {
+					const t = topicByTid[String(r.topic_id)];
+					return { tid: t.tid, title: t.title, url: `/topic/${t.slug || t.tid}` };
+				})
+				.filter((r, i, arr) => arr.findIndex(x => x.tid === r.tid) === i) // dedupe by tid
+				.slice(0, settings.maxResults);
+			if (results.length > 0) {
+				winston.info(`[search-agent] Semantic search returned ${results.length} results for "${queryText}".`);
+				return results;
+			}
+		}
+		winston.info(`[search-agent] Semantic search returned no results for "${queryText}", falling back to TF-IDF.`);
+	} catch (err) {
+		winston.warn(`[search-agent] Semantic search failed, falling back to TF-IDF: ${err.message}`);
+	}
+	// ── TF-IDF fallback (original logic) ─────────────────────────────────────
 	const { index, topicMap } = await getIndex(settings.topicLimit);
 	winston.info(`[search-agent] Query: "${queryText}" | index size: ${index.length} topics | aiEnabled: ${settings.aiEnabled && !!settings.openaiApiKey}`);

package/library.js CHANGED Viewed

@@ -4,6 +4,8 @@ const winston = require.main.require('winston');
 const controllers = require('./lib/controllers');
 const { invalidateCache } = require('./lib/searchHandler');
+const { startSync } = require('./services/syncService');
+const { indexPost } = require('./services/realtimeService');
 const routeHelpers = require.main.require('./src/routes/helpers');
@@ -14,7 +16,7 @@ const plugin = {};
  * Register admin page route and set up cache-invalidation on new topic creation.
  */
 plugin.init = async (params) => {
-	console.log('[search-agent] plugin.init called');
+	winston.info('[search-agent] plugin.init called');
 	const { router } = params;
 	// Admin settings page
@@ -24,7 +26,19 @@ plugin.init = async (params) => {
 		controllers.renderAdminPage
 	);
-	console.log('[search-agent] Admin route registered: /admin/plugins/search-agent');
+	winston.info('[search-agent] Admin route registered: /admin/plugins/search-agent');
+	// Start initial embedding sync in the background — does not block NodeBB startup.
+	winston.info('[search-agent] Starting initial embedding sync…');
+	startSync().catch(err => winston.warn(`[search-agent] Initial sync failed: ${err.message}`));
+	// Re-sync every 10 minutes to pick up new posts.
+	const RESYNC_INTERVAL_MS = 10 * 60 * 1000;
+	setInterval(() => {
+		winston.info('[search-agent] Running scheduled embedding re-sync…');
+		startSync().catch(err => winston.warn(`[search-agent] Scheduled re-sync failed: ${err.message}`));
+	}, RESYNC_INTERVAL_MS).unref();
 	winston.info('[plugins/search-agent] Initialised.');
 };
@@ -40,7 +54,7 @@ plugin.init = async (params) => {
  * To allow guests as well, remove the middleware array below.
  */
 plugin.addRoutes = async ({ router, middleware, helpers }) => {
-	console.log('[search-agent] plugin.addRoutes called — registering API routes');
+	winston.info('[search-agent] plugin.addRoutes called — registering API routes');
 	const middlewares = [
 		middleware.ensureLoggedIn,
 	];
@@ -52,7 +66,7 @@ plugin.addRoutes = async ({ router, middleware, helpers }) => {
 		middlewares,
 		(req, res) => controllers.handleQuery(req, res, helpers)
 	);
-	console.log('[search-agent] API route registered: POST /api/v3/plugins/search-agent/query');
+	winston.info('[search-agent] API route registered: POST /api/v3/plugins/search-agent/query');
 	// Public config endpoint — no auth required so guests can read the visibility setting
 	routeHelpers.setupApiRoute(
@@ -62,7 +76,7 @@ plugin.addRoutes = async ({ router, middleware, helpers }) => {
 		[],
 		(req, res) => controllers.getConfig(req, res, helpers)
 	);
-	console.log('[search-agent] API route registered: GET /api/v3/plugins/search-agent/config');
+	winston.info('[search-agent] API route registered: GET /api/v3/plugins/search-agent/config');
 	// Lightweight cache-bust endpoint (admin only)
 	routeHelpers.setupApiRoute(
@@ -71,12 +85,34 @@ plugin.addRoutes = async ({ router, middleware, helpers }) => {
 		'/search-agent/cache/invalidate',
 		[middleware.ensureLoggedIn, middleware.admin.checkPrivileges],
 		(req, res) => {
-			console.log('[search-agent] Cache invalidation requested by uid:', req.uid);
+			winston.info(`[search-agent] Cache invalidation requested by uid: ${req.uid}`);
 			invalidateCache();
 			helpers.formatApiResponse(200, res, { message: 'Cache invalidated.' });
 		}
 	);
-	console.log('[search-agent] API route registered: POST /api/v3/plugins/search-agent/cache/invalidate');
+	winston.info('[search-agent] API route registered: POST /api/v3/plugins/search-agent/cache/invalidate');
+};
+/**
+ * action:post.save
+ * Index a newly created post immediately after it is persisted.
+ * Non-blocking — errors are caught and logged without affecting the caller.
+ */
+plugin.onPostSave = ({ post }) => {
+	indexPost(post).catch(err =>
+		winston.warn(`[search-agent] realtimeService: failed to index new post pid ${post && post.pid}: ${err.message}`)
+	);
+};
+/**
+ * action:post.edit
+ * Re-index a post whenever its content is edited.
+ * Non-blocking — errors are caught and logged without affecting the caller.
+ */
+plugin.onPostEdit = ({ post }) => {
+	indexPost(post).catch(err =>
+		winston.warn(`[search-agent] realtimeService: failed to re-index edited post pid ${post && post.pid}: ${err.message}`)
+	);
 };
 /**
@@ -84,7 +120,7 @@ plugin.addRoutes = async ({ router, middleware, helpers }) => {
  * Add the plugin entry to the ACP sidebar.
  */
 plugin.addAdminNavigation = (header) => {
-	console.log('[search-agent] plugin.addAdminNavigation called — adding ACP sidebar entry');
+	winston.info('[search-agent] plugin.addAdminNavigation called — adding ACP sidebar entry');
 	header.plugins.push({
 		route: '/plugins/search-agent',
 		icon: 'fa-comments',

package/package.json CHANGED Viewed

@@ -1,15 +1,17 @@
 {
   "name": "nodebb-plugin-search-agent",
-  "version": "0.0.2",
+  "version": "0.0.4",
   "description": "NodeBB plugin that adds a floating chat assistant to help users find relevant forum topics using TF-IDF text similarity",
   "main": "library.js",
   "author": "Racheli Bayfus",
+  "private": false,
   "repository": {
     "type": "git",
     "url": "git+https://github.com/racheliK9201/nodebb-plugin-search-agent.git"
   },
   "scripts": {
-    "lint": "eslint ."
+    "lint": "eslint .",
+    "test:ai": "node test/runTests.js"
   },
   "keywords": [
     "nodebb",
@@ -48,5 +50,9 @@
     "eslint-config-nodebb": "2.0.1",
     "husky": "9.1.7",
     "lint-staged": "16.4.0"
+  },
+  "dependencies": {
+    "@orama/orama": "^3.1.18",
+    "winston": "^3.19.0"
   }
 }

package/plugin.json CHANGED Viewed

@@ -7,7 +7,9 @@
 	"hooks": [
 		{ "hook": "static:app.load", "method": "init" },
 		{ "hook": "static:api.routes", "method": "addRoutes" },
-		{ "hook": "filter:admin.header.build", "method": "addAdminNavigation" }
+		{ "hook": "filter:admin.header.build", "method": "addAdminNavigation" },
+		{ "hook": "action:post.save", "method": "onPostSave" },
+		{ "hook": "action:post.edit", "method": "onPostEdit" }
 	],
 	"staticDirs": {
 		"static": "./static"

package/services/embeddingService.js ADDED Viewed

@@ -0,0 +1,135 @@
+'use strict';
+const https = require('https');
+function winston() {
+	return require.main.require('winston');
+}
+const OPENAI_EMBEDDINGS_HOSTNAME = 'api.openai.com';
+const OPENAI_EMBEDDINGS_PATH = '/v1/embeddings';
+const EMBEDDING_MODEL = 'text-embedding-3-small';
+const MAX_RETRIES = 3;
+const RETRY_DELAY_MS = 500;
+/**
+ * Performs an HTTPS POST request to the OpenAI embeddings endpoint.
+ * @param {string} apiKey
+ * @param {string|string[]} input - Single text or array of texts
+ * @returns {Promise<object>} Parsed JSON response body
+ */
+function requestEmbeddings(apiKey, input) {
+	return new Promise((resolve, reject) => {
+		const body = JSON.stringify({ model: EMBEDDING_MODEL, input });
+		const options = {
+			hostname: OPENAI_EMBEDDINGS_HOSTNAME,
+			path: OPENAI_EMBEDDINGS_PATH,
+			method: 'POST',
+			headers: {
+				'Content-Type': 'application/json',
+				'Authorization': `Bearer ${apiKey}`,
+				'Content-Length': Buffer.byteLength(body),
+			},
+		};
+		const req = https.request(options, (res) => {
+			const chunks = [];
+			res.on('data', chunk => chunks.push(chunk));
+			res.on('end', () => {
+				let parsed;
+				try {
+					parsed = JSON.parse(Buffer.concat(chunks).toString('utf8'));
+				} catch (e) {
+					return reject(new Error(`Failed to parse OpenAI response: ${e.message}`));
+				}
+				if (res.statusCode >= 400) {
+					const message = (parsed.error && parsed.error.message) || `HTTP ${res.statusCode}`;
+					return reject(new Error(`OpenAI API error: ${message}`));
+				}
+				resolve(parsed);
+			});
+		});
+		req.on('error', err => reject(new Error(`Network error calling OpenAI: ${err.message}`)));
+		req.write(body);
+		req.end();
+	});
+}
+/**
+ * Retries an async operation up to maxRetries times with exponential back-off.
+ * @param {Function} fn - Async function to retry
+ * @param {number} retries
+ * @returns {Promise<*>}
+ */
+async function withRetry(fn, retries = MAX_RETRIES) {
+	let lastError;
+	for (let attempt = 1; attempt <= retries; attempt++) {
+		try {
+			return await fn();
+		} catch (err) {
+			lastError = err;
+			if (attempt < retries) {
+				winston().warn(`[search-agent] embeddingService: attempt ${attempt} failed (${err.message}), retrying in ${RETRY_DELAY_MS * attempt} ms…`);
+				await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS * attempt));
+			}
+		}
+	}
+	throw lastError;
+}
+/**
+ * Converts a single text string into an embedding vector.
+ * @param {string} text
+ * @returns {Promise<number[]>}
+ */
+async function embed(text) {
+	if (typeof text !== 'string' || text.trim() === '') {
+		throw new Error('embed() requires a non-empty string');
+	}
+	const apiKey = process.env.OPENAI_API_KEY;
+	if (!apiKey) {
+		throw new Error('OPENAI_API_KEY environment variable is not set');
+	}
+	winston().verbose(`[search-agent] embeddingService: generating embedding for text (${text.length} chars)`);
+	const response = await withRetry(() => requestEmbeddings(apiKey, text));
+	winston().verbose('[search-agent] embeddingService: embedding generated successfully');
+	return response.data[0].embedding;
+}
+/**
+ * Converts an array of text strings into an array of embedding vectors.
+ * Texts are sent in a single batched API request.
+ * @param {string[]} texts
+ * @returns {Promise<number[][]>}
+ */
+async function embedBatch(texts) {
+	if (!Array.isArray(texts) || texts.length === 0) {
+		throw new Error('embedBatch() requires a non-empty array of strings');
+	}
+	const invalid = texts.findIndex(t => typeof t !== 'string' || t.trim() === '');
+	if (invalid !== -1) {
+		throw new Error(`embedBatch() received an empty or non-string value at index ${invalid}`);
+	}
+	const apiKey = process.env.OPENAI_API_KEY;
+	if (!apiKey) {
+		throw new Error('OPENAI_API_KEY environment variable is not set');
+	}
+	winston().verbose(`[search-agent] embeddingService: generating batch embeddings for ${texts.length} text(s)`);
+	const response = await withRetry(() => requestEmbeddings(apiKey, texts));
+	winston().verbose(`[search-agent] embeddingService: batch embeddings generated successfully (${texts.length} vector(s))`);
+	// OpenAI returns items sorted by index field, but sort explicitly to be safe
+	return response.data
+		.sort((a, b) => a.index - b.index)
+		.map(item => item.embedding);
+}
+module.exports = { embed, embedBatch };

package/services/realtimeService.js ADDED Viewed

@@ -0,0 +1,42 @@
+'use strict';
+const { embed } = require('./embeddingService');
+const { saveEmbedding } = require('./vectorStore');
+function winston() {
+	return require.main.require('winston');
+}
+/**
+ * Generate and persist an embedding for a single post.
+ * Safe to call fire-and-forget — all errors are caught and logged.
+ *
+ * @param {{ pid: number|string, tid: number|string, content: string, deleted?: boolean }} postData
+ * @returns {Promise<void>}
+ */
+async function indexPost(postData) {
+	if (!postData || postData.deleted) {
+		return;
+	}
+	const pid = parseInt(postData.pid, 10);
+	const tid = parseInt(postData.tid, 10);
+	const content = postData.content;
+	if (Number.isNaN(pid) || Number.isNaN(tid)) {
+		winston().warn('[search-agent] realtimeService: skipping post with invalid pid/tid');
+		return;
+	}
+	if (typeof content !== 'string' || content.trim() === '') {
+		return;
+	}
+	const vector = await embed(content);
+	await saveEmbedding({ post_id: pid, topic_id: tid, content, embedding: vector });
+	winston().verbose(`[search-agent] realtimeService: indexed pid ${pid}`);
+}
+module.exports = { indexPost };

package/services/syncService.js ADDED Viewed

@@ -0,0 +1,175 @@
+'use strict';
+const { embedBatch } = require('./embeddingService');
+const { saveEmbedding, getMissingEmbeddings } = require('./vectorStore');
+const BATCH_SIZE = 25;
+// Brief pause between batches to yield the event loop and avoid I/O starvation
+const INTER_BATCH_DELAY_MS = 200;
+let _syncRunning = false;
+function sleep(ms) {
+	return new Promise(resolve => setTimeout(resolve, ms));
+}
+function winston() {
+	return require.main.require('winston');
+}
+/**
+ * Fetch every post ID from the NodeBB sorted set, ordered newest-first.
+ * @returns {Promise<number[]>}
+ */
+async function fetchAllPostIds() {
+	const db = require.main.require('./src/database');
+	const raw = await db.getSortedSetRevRange('posts:pid', 0, -1);
+	return (raw || []).map(id => parseInt(id, 10)).filter(n => !Number.isNaN(n));
+}
+/**
+ * Fetch selected fields for a list of post IDs.
+ * @param {number[]} pids
+ * @returns {Promise<Array<{pid: number, tid: number, content: string, deleted: boolean}>>}
+ */
+async function fetchPostFields(pids) {
+	const Posts = require.main.require('./src/posts');
+	return Posts.getPostsFields(pids, ['pid', 'tid', 'content', 'deleted']);
+}
+/**
+ * Core sync loop. Iterates all posts in batches, skips already-embedded ones,
+ * generates embeddings for the rest, and persists them.
+ */
+async function runSync() {
+	if (_syncRunning) {
+		winston().warn('[search-agent] syncService: sync already in progress, skipping duplicate start.');
+		return;
+	}
+	_syncRunning = true;
+	winston().info('[search-agent] syncService: starting embedding sync…');
+	try {
+		const allPids = await fetchAllPostIds();
+		if (allPids.length === 0) {
+			winston().info('[search-agent] syncService: no posts found — sync complete.');
+			return;
+		}
+		winston().info(`[search-agent] syncService: ${allPids.length} post(s) to scan.`);
+		let totalChecked = 0;
+		let totalEmbedded = 0;
+		let totalErrors = 0;
+		for (let offset = 0; offset < allPids.length; offset += BATCH_SIZE) {
+			const batchPids = allPids.slice(offset, offset + BATCH_SIZE);
+			// Yield between batches so other async work can proceed
+			await sleep(INTER_BATCH_DELAY_MS);
+			// ------------------------------------------------------------------
+			// 1. Find which PIDs in this batch still lack an embedding
+			// ------------------------------------------------------------------
+			let missingPids;
+			try {
+				missingPids = await getMissingEmbeddings(batchPids);
+			} catch (err) {
+				winston().error(`[search-agent] syncService: failed to query missing embeddings (offset ${offset}): ${err.message}`);
+				totalErrors++;
+				continue;
+			}
+			totalChecked += batchPids.length;
+			if (missingPids.length === 0) {
+				continue; // all already embedded
+			}
+			// ------------------------------------------------------------------
+			// 2. Fetch post content for the missing PIDs
+			// ------------------------------------------------------------------
+			let rawPosts;
+			try {
+				rawPosts = await fetchPostFields(missingPids);
+			} catch (err) {
+				winston().error(`[search-agent] syncService: failed to fetch post fields (offset ${offset}): ${err.message}`);
+				totalErrors++;
+				continue;
+			}
+			// Drop deleted posts and posts with no usable content
+			const posts = rawPosts.filter(
+				p => p && !p.deleted && typeof p.content === 'string' && p.content.trim() !== ''
+			);
+			if (posts.length === 0) {
+				continue;
+			}
+			// ------------------------------------------------------------------
+			// 3. Generate embeddings for this sub-batch
+			// ------------------------------------------------------------------
+			let vectors;
+			try {
+				vectors = await embedBatch(posts.map(p => p.content));
+			} catch (err) {
+				winston().error(`[search-agent] syncService: failed to generate embeddings (offset ${offset}): ${err.message}`);
+				totalErrors++;
+				continue;
+			}
+			// ------------------------------------------------------------------
+			// 4. Persist each embedding — failures are isolated per document
+			// ------------------------------------------------------------------
+			for (let i = 0; i < posts.length; i++) {
+				const post = posts[i];
+				try {
+					await saveEmbedding({
+						post_id: parseInt(post.pid, 10),
+						topic_id: parseInt(post.tid, 10),
+						content: post.content,
+						embedding: vectors[i],
+					});
+					totalEmbedded++;
+				} catch (err) {
+					winston().error(`[search-agent] syncService: failed to save embedding for pid ${post.pid}: ${err.message}`);
+					totalErrors++;
+				}
+			}
+			winston().info(
+				`[search-agent] syncService: progress — ${totalChecked}/${allPids.length} checked, ` +
+				`${totalEmbedded} newly embedded, ${totalErrors} error(s).`
+			);
+		}
+		winston().info(
+			`[search-agent] syncService: sync complete — ` +
+			`${totalEmbedded} embedding(s) created, ${totalErrors} error(s).`
+		);
+	} catch (err) {
+		winston().error(`[search-agent] syncService: unexpected error — ${err.message}`);
+	} finally {
+		_syncRunning = false;
+	}
+}
+/**
+ * Kick off the embedding sync as a non-blocking background job.
+ *
+ * Safe to call multiple times — if a sync is already running the new call is
+ * silently dropped. Returns immediately so it never delays server startup.
+ */
+function startSync() {
+	setImmediate(() => {
+		runSync().catch(err => {
+			require.main.require('winston').error(
+				`[search-agent] syncService: unhandled rejection in runSync — ${err.message}`
+			);
+		});
+	});
+}
+module.exports = { startSync };

package/services/vectorSearchService.js ADDED Viewed

@@ -0,0 +1,116 @@
+'use strict';
+const { create, insertMultiple, search: oramaSearch } = require('@orama/orama');
+const { embed } = require('./embeddingService');
+const { getAllEmbeddings } = require('./vectorStore');
+function winston() {
+	return require.main.require('winston');
+}
+const TOP_K = 10;
+// Rebuild the Orama index after this interval (mirrors TF-IDF cache TTL)
+const INDEX_TTL_MS = 5 * 60 * 1000;
+let _db = null;
+let _dbTs = 0;
+let _buildPromise = null;
+async function buildIndex() {
+	const storedEmbeddings = await getAllEmbeddings();
+	// Detect dimension from data; fall back to 1536 (text-embedding-3-small default)
+	const dimensions = storedEmbeddings.length > 0
+		? storedEmbeddings[0].embedding.length
+		: 1536;
+	const db = await create({
+		schema: {
+			post_id: 'number',
+			topic_id: 'number',
+			content: 'string',
+			embedding: `vector[${dimensions}]`,
+		},
+	});
+	if (storedEmbeddings.length > 0) {
+		await insertMultiple(db, storedEmbeddings.map(e => ({
+			id: String(e.post_id),
+			post_id: e.post_id,
+			topic_id: e.topic_id,
+			content: e.content,
+			embedding: e.embedding,
+		})));
+	}
+	winston().info(`[search-agent] vectorSearchService: Orama index built with ${storedEmbeddings.length} document(s)`);
+	return db;
+}
+async function getDb() {
+	const now = Date.now();
+	if (_db && (now - _dbTs) < INDEX_TTL_MS) {
+		return _db;
+	}
+	if (_buildPromise) {
+		return _buildPromise;
+	}
+	_buildPromise = buildIndex().then((db) => {
+		_db = db;
+		_dbTs = Date.now();
+		_buildPromise = null;
+		return db;
+	}).catch((err) => {
+		_buildPromise = null;
+		throw err;
+	});
+	return _buildPromise;
+}
+/** Invalidate the in-memory Orama index (e.g. after new embeddings are saved). */
+function invalidateIndex() {
+	_db = null;
+	_dbTs = 0;
+	winston().info('[search-agent] vectorSearchService: Orama index invalidated');
+}
+/**
+ * Performs semantic search against stored post embeddings using Orama vector search.
+ *
+ * @param {string} query - The search query string.
+ * @returns {Promise<Array<{ topic_id: number, post_id: number, content: string, score: number }>>}
+ *   Top results sorted by cosine similarity descending.
+ */
+async function search(query) {
+	if (typeof query !== 'string' || query.trim() === '') {
+		throw new Error('search() requires a non-empty query string');
+	}
+	winston().verbose(`[search-agent] vectorSearchService: running Orama vector search for "${query.trim()}"`);
+	const [queryEmbedding, db] = await Promise.all([
+		embed(query),
+		getDb(),
+	]);
+	const results = await oramaSearch(db, {
+		mode: 'vector',
+		vector: { value: queryEmbedding, property: 'embedding' },
+		limit: TOP_K,
+		includeVectors: false,
+	});
+	winston().verbose(`[search-agent] vectorSearchService: Orama returned ${results.hits.length} hit(s)`);
+	return results.hits.map(hit => ({
+		topic_id: hit.document.topic_id,
+		post_id: hit.document.post_id,
+		content: hit.document.content,
+		score: hit.score,
+	}));
+}
+module.exports = { search, invalidateIndex };

package/services/vectorStore.js ADDED Viewed

@@ -0,0 +1,137 @@
+'use strict';
+const COLLECTION = 'plugin_ai_embeddings';
+// Maximum number of embeddings held in memory. Older posts (lower post_id) are
+// dropped first when this ceiling is reached on initial load.
+const MAX_DATASET_SIZE = 50_000;
+function getCollection() {
+	const db = require.main.require('./src/database');
+	return db.client.collection(COLLECTION);
+}
+function winston() {
+	return require.main.require('winston');
+}
+// Promise-based singleton so concurrent callers share one init operation
+let _ensureIndexes = null;
+// In-memory cache of loaded embeddings (null = not yet populated)
+let _cache = null;
+// Inflight load promise shared by concurrent first-callers
+let _cachePromise = null;
+function ensureIndexes() {
+	if (!_ensureIndexes) {
+		_ensureIndexes = (async () => {
+			const col = getCollection();
+			await col.createIndex({ post_id: 1 }, { unique: true });
+			await col.createIndex({ topic_id: 1 });
+		})();
+	}
+	return _ensureIndexes;
+}
+/**
+ * Upsert an embedding document for a post. Safe to call repeatedly — no duplicates.
+ * Also updates the in-memory cache if it is already loaded.
+ *
+ * @param {{ post_id: number, topic_id: number, content: string, embedding: number[] }} post
+ * @returns {Promise<void>}
+ */
+async function saveEmbedding(post) {
+	const { post_id, topic_id, content, embedding } = post;
+	winston().verbose(`[search-agent] vectorStore: upserting embedding for post_id ${post_id}`);
+	await ensureIndexes();
+	const col = getCollection();
+	await col.updateOne(
+		{ post_id },
+		{ $set: { post_id, topic_id, content, embedding } },
+		{ upsert: true }
+	);
+	// Keep in-memory cache consistent without requiring a full reload
+	if (_cache !== null) {
+		const entry = { post_id, topic_id, content, embedding };
+		const idx = _cache.findIndex(e => e.post_id === post_id);
+		if (idx !== -1) {
+			_cache[idx] = entry;
+		} else {
+			// Prepend so newest posts stay at the front (mirrors load order)
+			_cache.unshift(entry);
+			if (_cache.length > MAX_DATASET_SIZE) {
+				_cache.length = MAX_DATASET_SIZE;
+			}
+		}
+	}
+}
+/**
+ * Retrieve all stored embeddings from the in-memory cache.
+ * On first call the cache is populated from MongoDB (up to MAX_DATASET_SIZE
+ * documents, newest posts first). Subsequent calls are served from memory.
+ *
+ * @returns {Promise<Array<{ post_id: number, topic_id: number, content: string, embedding: number[] }>>}
+ */
+async function getAllEmbeddings() {
+	if (_cache !== null) {
+		winston().verbose(`[search-agent] vectorStore: cache hit — returning ${_cache.length} embedding(s) from memory`);
+		return _cache;
+	}
+	if (!_cachePromise) {
+		_cachePromise = (async () => {
+			winston().info('[search-agent] vectorStore: loading embeddings from database…');
+			await ensureIndexes();
+			const col = getCollection();
+			_cache = await col
+				.find({}, { projection: { _id: 0 } })
+				.sort({ post_id: -1 })
+				.limit(MAX_DATASET_SIZE)
+				.toArray();
+			_cachePromise = null;
+			winston().info(`[search-agent] vectorStore: loaded ${_cache.length} embedding(s) from database`);
+			return _cache;
+		})();
+	}
+	return _cachePromise;
+}
+/**
+ * Find a single embedding by post ID.
+ *
+ * @param {number} post_id
+ * @returns {Promise<{ post_id: number, topic_id: number, content: string, embedding: number[] } | null>}
+ */
+async function findByPostId(post_id) {
+	winston().verbose(`[search-agent] vectorStore: looking up embedding for post_id ${post_id}`);
+	await ensureIndexes();
+	const col = getCollection();
+	return col.findOne({ post_id }, { projection: { _id: 0 } });
+}
+/**
+ * Given an array of post IDs, return the subset that have no stored embedding yet.
+ *
+ * @param {number[]} postIds
+ * @returns {Promise<number[]>}
+ */
+async function getMissingEmbeddings(postIds) {
+	if (!Array.isArray(postIds) || postIds.length === 0) {
+		return [];
+	}
+	winston().verbose(`[search-agent] vectorStore: checking ${postIds.length} post ID(s) for missing embeddings`);
+	await ensureIndexes();
+	const col = getCollection();
+	const existing = await col
+		.find({ post_id: { $in: postIds } }, { projection: { _id: 0, post_id: 1 } })
+		.toArray();
+	const found = new Set(existing.map(doc => doc.post_id));
+	const missing = postIds.filter(id => !found.has(id));
+	winston().verbose(`[search-agent] vectorStore: ${missing.length} of ${postIds.length} post(s) missing embeddings`);
+	return missing;
+}
+module.exports = { saveEmbedding, getAllEmbeddings, findByPostId, getMissingEmbeddings };

package/test/runTests.js ADDED Viewed

@@ -0,0 +1,71 @@
+'use strict';
+// ─── Stub NodeBB internals ───────────────────────────────────────────────────
+// Services call require.main.require('./src/database') etc. which resolves
+// relative to this file when running standalone.  Intercept those calls and
+// return lightweight in-memory fakes so no NodeBB installation is needed.
+const inMemoryStore = new Map();
+function makeCollection() {
+  return {
+    createIndex: async () => {},
+    updateOne: async (filter, update) => {
+      inMemoryStore.set(filter.post_id, update.$set || {});
+    },
+    find: (filter) => {
+      const all = [...inMemoryStore.values()];
+      const filtered =
+        filter && filter.post_id && filter.post_id.$in
+          ? all.filter(d => filter.post_id.$in.includes(d.post_id))
+          : all;
+      const chain = {
+        sort:    ()  => chain,
+        limit:   (n) => { chain._n = n; return chain; },
+        toArray: async () => (chain._n != null ? filtered.slice(0, chain._n) : filtered),
+        _n: null,
+      };
+      return chain;
+    },
+    findOne: async (filter) => inMemoryStore.get(filter.post_id) || null,
+  };
+}
+const winstonStub = {
+  info:    (...a) => console.log('[info]',    ...a),
+  warn:    (...a) => console.warn('[warn]',   ...a),
+  error:   (...a) => console.error('[error]', ...a),
+  verbose: (...a) => console.log('[verbose]', ...a),
+};
+const stubs = {
+  './src/database': {
+    client: { collection: makeCollection },
+    getSortedSetRevRange: async () => [],
+  },
+  './src/posts':  { getPostsFields: async () => [] },
+  './src/topics': { getTopicsFields: async () => [], getTopicField: async () => '' },
+  './src/meta':   { settings: { get: async () => ({}) } },
+  './src/user':   { isAdministrator: async () => false },
+  'winston':      winstonStub,
+};
+const _origRequire = require.main.require.bind(require.main);
+require.main.require = (id) => stubs[id] || _origRequire(id);
+// ─── Run tests ───────────────────────────────────────────────────────────────
+(async () => {
+  try {
+    await require('./testEmbedding')();
+    await require('./testVectorStore')();
+    require('./testCosine')();
+    await require('./testSync')();
+    await require('./testSearch')();
+    console.log("\n🎉 ALL TESTS PASSED");
+  } catch (err) {
+    console.error("\n❌ TEST FAILED:", err.message);
+    process.exit(1);
+  }
+})();

package/test/testCosine.js ADDED Viewed

@@ -0,0 +1,15 @@
+const { cosineSimilarity } = require('../lib/cosineSimilarity');
+function testCosine() {
+  console.log("Testing cosine similarity...");
+  const a = cosineSimilarity([1, 0], [1, 0]);
+  const b = cosineSimilarity([1, 0], [0, 1]);
+  if (a < 0.9) throw new Error("Expected high similarity");
+  if (b > 0.1) throw new Error("Expected low similarity");
+  console.log("✅ Cosine OK");
+}
+module.exports = testCosine;

package/test/testEmbedding.js ADDED Viewed

@@ -0,0 +1,14 @@
+const { embed } = require('../services/embeddingService');
+async function testEmbedding() {
+  console.log("Testing embedding...");
+  const result = await embed("How to install NodeBB plugin?");
+  if (!Array.isArray(result)) throw new Error("Not an array");
+  if (result.length === 0) throw new Error("Empty embedding");
+  console.log("✅ Embedding OK. Length:", result.length);
+}
+module.exports = testEmbedding;

package/test/testSearch.js ADDED Viewed

@@ -0,0 +1,16 @@
+const { search } = require('../services/vectorSearchService');
+async function testSearch() {
+  console.log("Testing semantic search...");
+  const results = await search("How to install plugin?");
+  if (!Array.isArray(results)) throw new Error("Invalid result");
+  if (results.length === 0) {
+    console.warn("⚠️ No results (may be expected if DB empty)");
+  }
+  console.log("✅ Search OK", results.length);
+}
+module.exports = testSearch;

package/test/testSync.js ADDED Viewed

@@ -0,0 +1,11 @@
+const { startSync } = require('../services/syncService');
+async function testSync() {
+  console.log("Testing sync...");
+  await startSync();
+  console.log("✅ Sync executed (check logs for actual indexing)");
+}
+module.exports = testSync;

package/test/testVectorStore.js ADDED Viewed

@@ -0,0 +1,20 @@
+const store = require('../services/vectorStore');
+async function testVectorStore() {
+  console.log("Testing vector store...");
+  await store.saveEmbedding({
+    post_id: 999,
+    topic_id: 1,
+    content: "test content",
+    embedding: [0.1, 0.2, 0.3]
+  });
+  const results = await store.getAllEmbeddings(5);
+  if (!results.length) throw new Error("No data returned");
+  console.log("✅ Vector store OK");
+}
+module.exports = testVectorStore;

package/nodebb-plugin-search-agent-0.0.1.tgz DELETED Viewed

Binary file