npm - @o-lang/semantic-doc-search - Versions diffs - 1.0.8 → 1.0.10 - Mend

@o-lang/semantic-doc-search 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json +2 -1
package/src/adapters/pgvectorAdapter.js +74 -0
package/src/index.js +96 -18
package/src/services/docQA.js +70 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@o-lang/semantic-doc-search",
-  "version": "1.0.8",
+  "version": "1.0.10",
   "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
   "main": "src/index.js",
   "type": "commonjs",
@@ -23,6 +23,7 @@
     "node-stream-zip": "*",
     "openai": "^4.3.1",
     "pdf-parse": "^1.1.1",
+    "pg": "^8.16.3",
     "pinecone-client": "^1.0.0",
     "readline": "^1.3.0",
     "redis": "^5.2.0"

package/src/adapters/pgvectorAdapter.js ADDED Viewed

@@ -0,0 +1,74 @@
+// src/adapters/pgvectorAdapter.js
+const { Pool } = require("pg");
+class PgVectorAdapter {
+  constructor(config = {}) {
+    this.pool = new Pool({
+      connectionString: config.POSTGRES_URL || process.env.POSTGRES_URL,
+      host: config.DB_HOST || process.env.DB_HOST,
+      port: config.DB_PORT || process.env.DB_PORT || 5432,
+      user: config.DB_USER || process.env.DB_USER,
+      password: config.DB_PASSWORD || process.env.DB_PASSWORD,
+      database: config.DB_NAME || process.env.DB_NAME || 'olang',
+    });
+    // Test connection
+    this.testConnection();
+  }
+  async testConnection() {
+    try {
+      await this.pool.query('SELECT 1');
+      console.log('✅ PgVector adapter connected successfully');
+    } catch (error) {
+      console.error('❌ PgVector connection failed:', error.message);
+      throw error;
+    }
+  }
+  async upsert({ id, vector, content, source, metadata = {} }) {
+    // Convert vector to proper format for pgvector
+    const vectorStr = `[${vector.join(',')}]`;
+    const query = `
+      INSERT INTO doc_embeddings (id, embedding, content, source, metadata, updated_at)
+      VALUES ($1, $2::vector, $3, $4, $5, NOW())
+      ON CONFLICT (id)
+      DO UPDATE SET
+        embedding = $2::vector,
+        content = $3,
+        source = $4,
+        metadata = $5,
+        updated_at = NOW()`;
+    await this.pool.query(query, [id, vectorStr, content, source, JSON.stringify(metadata)]);
+  }
+  async query(vector, topK = 5) {
+    const vectorStr = `[${vector.join(',')}]`;
+    const query = `
+      SELECT id, content, source, metadata,
+             1 - (embedding <=> $1::vector) AS score
+      FROM doc_embeddings
+      ORDER BY embedding <=> $1::vector
+      LIMIT $2`;
+    const res = await this.pool.query(query, [vectorStr, topK]);
+    // Return in the format your resolver expects
+    return res.rows.map(row => ({
+      id: row.id,
+      content: row.content,
+      source: row.source,
+      metadata: row.metadata,
+      score: parseFloat(row.score)
+    }));
+  }
+  async close() {
+    await this.pool.end();
+  }
+}
+module.exports = PgVectorAdapter;

package/src/index.js CHANGED Viewed

@@ -7,6 +7,7 @@ const { chunkText } = require("./utils/chunker.js");
 const { extractKeywords } = require("./utils/extractText.js");
 const { cosine } = require("./utils/similarity.js");
 const { highlightMatches } = require("./utils/highlight.js");
+const PgVectorAdapter = require("./adapters/pgvectorAdapter.js"); // ✅ Properly imported
 const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
@@ -33,7 +34,7 @@ function saveCache(cache) {
   } catch {}
 }
-// ✅ UNIVERSAL DATABASE ADAPTER (NEW - Keep your existing imports!)
+// ✅ UNIVERSAL DATABASE ADAPTER (Your existing SQL-based adapter)
 class DatabaseAdapter {
   constructor() {
     this.initialized = false;
@@ -42,7 +43,6 @@ class DatabaseAdapter {
   async initialize(context) {
     if (this.initialized) return;
-    // Initialize based on context configuration
     if (context.db_type === 'mongodb' || context.MONGO_URI) {
       await this.initMongo(context);
     } else if (context.db_type === 'sqlite' || context.db_path) {
@@ -84,7 +84,7 @@ class DatabaseAdapter {
     return await db.collection(collectionName).find(filter, { projection }).toArray();
   }
-  // PostgreSQL Support
+  // PostgreSQL Support (Traditional SQL)
   async initPostgres(context) {
     const { Pool } = require('pg');
     const poolConfig = {
@@ -95,7 +95,6 @@ class DatabaseAdapter {
       password: context.DB_PASSWORD,
       database: context.DB_NAME || 'olang'
     };
-    // Remove undefined/null values
     Object.keys(poolConfig).forEach(key => {
       if (poolConfig[key] === undefined || poolConfig[key] === null) {
         delete poolConfig[key];
@@ -110,7 +109,7 @@ class DatabaseAdapter {
     return result.rows;
   }
-  // Universal Query Method
+  // Universal Query Method (Traditional SQL-based)
   async queryDocuments(context) {
     const {
       db_type,
@@ -158,7 +157,6 @@ class DatabaseAdapter {
       try {
         filter = JSON.parse(doc_filter);
       } catch {
-        // Text search fallback
         filter = { $text: { $search: doc_filter } };
       }
     } else if (typeof doc_filter === 'object' && Object.keys(doc_filter).length > 0) {
@@ -180,7 +178,6 @@ class DatabaseAdapter {
       doc_params = []
     } = context;
-    // Parse doc_params from string if needed
     let params = doc_params;
     if (typeof doc_params === 'string') {
       try {
@@ -212,10 +209,10 @@ class DatabaseAdapter {
   }
 }
-// ✅ LOAD DOCUMENTS FROM DATABASE (if configured)
+// ✅ LOAD DOCUMENTS FROM DATABASE (SQL-based)
 async function loadDocumentsFromDatabase(context) {
   if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) {
-    return null; // No database configured
+    return null;
   }
   const dbAdapter = new DatabaseAdapter();
@@ -232,13 +229,11 @@ async function loadDocumentsFromDatabase(context) {
 async function loadAllDocuments(context) {
   const documents = [];
-  // 1. Load from database first (if configured)
   const dbDocs = await loadDocumentsFromDatabase(context);
   if (dbDocs) {
     documents.push(...dbDocs);
   }
-  // 2. Load from file system (existing behavior)
   const baseDir = context.doc_root
     ? safeResolve(process.cwd(), context.doc_root)
     : path.join(process.cwd(), "docs");
@@ -262,8 +257,79 @@ async function loadAllDocuments(context) {
   return documents;
 }
-// ✅ MAIN SEARCH FUNCTION (Your existing logic + universal docs)
-async function performDocQA(query, context = {}) {
+// ✅ PGVECTOR SEARCH FUNCTION (Using imported adapter)
+async function performPgVectorSearch(query, context = {}) {
+  const options = context.options || {};
+  const topK = options.topK || 5;
+  // Check for POSTGRES_URL in context or environment
+  const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
+  if (!postgresUrl) {
+    return {
+      text: "POSTGRES_URL not configured for pgvector search",
+      meta: { method: "error" }
+    };
+  }
+  const embedder = new LocalEmbedding();
+  const pgVectorAdapter = new PgVectorAdapter({
+    POSTGRES_URL: postgresUrl,
+    DB_HOST: context.DB_HOST,
+    DB_PORT: context.DB_PORT,
+    DB_USER: context.DB_USER,
+    DB_PASSWORD: context.DB_PASSWORD,
+    DB_NAME: context.DB_NAME,
+  });
+  try {
+    const queryVector = await embedder.embed(query);
+    const docs = await pgVectorAdapter.query(queryVector, topK);
+    if (docs.length === 0) {
+      return {
+        text: `No relevant documents found for: "${query}"`,
+        meta: { method: "pgvector-no-results" }
+      };
+    }
+    // Use first document as context (or combine multiple)
+    const contextText = docs.map((doc, i) => `(${i + 1}) ${doc.content}`).join("\n\n");
+    if (options.provider && options.provider !== "local") {
+      const llm = createLLM({
+        provider: options.provider,
+        openaiApiKey: options.openaiApiKey,
+        groqApiKey: options.groqApiKey,
+        anthropicApiKey: options.anthropicApiKey,
+      });
+      const prompt = `Answer the question using the context below.\n\nContext:\n${contextText}\n\nQuestion: ${query}`;
+      const resp = await llm.generate({ prompt: prompt, model: options.model });
+      return {
+        text: resp.text,
+        meta: {
+          method: "pgvector-rag",
+          sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
+        }
+      };
+    } else {
+      // Return raw context without LLM
+      return {
+        text: contextText,
+        meta: {
+          method: "pgvector-retrieval-only",
+          sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
+        }
+      };
+    }
+  } finally {
+    await pgVectorAdapter.close();
+  }
+}
+// ✅ MAIN SEARCH FUNCTION (Your existing hybrid logic)
+async function performHybridDocQA(query, context = {}) {
   const { doc_root, stream = false } = context;
   const options = context.options || {};
   const CHUNK_SIZE = options.chunkSize || 1200;
@@ -276,7 +342,6 @@ async function performDocQA(query, context = {}) {
     return { text: "Missing required input: query" };
   }
-  // Load documents from both database and files
   const allDocs = await loadAllDocuments(context);
   if (!allDocs || !allDocs.length) {
     return { text: "No documents available." };
@@ -396,7 +461,23 @@ async function performDocQA(query, context = {}) {
   };
 }
-// ✅ O-Lang Resolver Interface (Your existing interface - converted to CommonJS)
+// ✅ SMART ROUTER - Auto-select search method based on context
+async function performDocQA(query, context = {}) {
+  // 🔍 AUTO-DETECT MODE BASED ON CONTEXT
+  // Mode 1: pgvector mode (if PostgreSQL URL provided in context or env)
+  const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
+  if (postgresUrl) {
+    console.log('🔍 Using pgvector search mode');
+    return await performPgVectorSearch(query, context);
+  }
+  // Mode 2: Traditional hybrid search (files + databases)
+  console.log('🔍 Using hybrid file/DB search mode');
+  return await performHybridDocQA(query, context);
+}
+// ✅ O-Lang Resolver Interface
 async function docSearchResolver(action, context) {
   if (action.startsWith('Ask doc-search ')) {
     const match = action.match(/"(.*)"|'(.*)'/);
@@ -406,8 +487,5 @@ async function docSearchResolver(action, context) {
   return undefined;
 }
-// ✅ Resolver name matches package name: @o-lang/doc-search → doc-search
 docSearchResolver.resolverName = 'doc-search';
-// ✅ COMMONJS EXPORT (this is the key change)
 module.exports = docSearchResolver;

package/src/services/docQA.js ADDED Viewed

@@ -0,0 +1,70 @@
+// src/services/docQA.js
+/**
+ * Generic RAG service that works with any vector store adapter
+ */
+async function performDocQA({
+  query,
+  vectorStore,
+  embedder,
+  llm,
+  topK = 5,
+  useLLM = true
+}) {
+  try {
+    const queryVector = await embedder.embed(query);
+    const docs = await vectorStore.query(queryVector, topK);
+    if (docs.length === 0) {
+      return {
+        text: `No relevant documents found for: "${query}"`,
+        meta: { method: "no_documents" }
+      };
+    }
+    const context = docs
+      .map((d, i) => `(${i + 1}) ${d.content}`)
+      .join("\n\n");
+    if (!useLLM) {
+      // Return raw context without LLM
+      return {
+        text: context,
+        meta: {
+          method: "vector-retrieval-only",
+          sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
+        }
+      };
+    }
+    // Use LLM to generate answer
+    const prompt = `Answer the question using the context below.\n\nContext:\n${context}\n\nQuestion: ${query}`;
+    if (llm && typeof llm.generate === 'function') {
+      const response = await llm.generate({ prompt });
+      return {
+        text: response.text,
+        meta: {
+          method: "rag-with-llm",
+          sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
+        }
+      };
+    } else {
+      // Fallback to raw context if no LLM
+      return {
+        text: context,
+        meta: {
+          method: "vector-retrieval-only",
+          sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
+        }
+      };
+    }
+  } catch (error) {
+    console.error('RAG service error:', error);
+    return {
+      text: `Error processing query: ${error.message}`,
+      meta: { method: "error", error: error.message }
+    };
+  }
+}
+module.exports = { performDocQA };