@o-lang/semantic-doc-search 1.0.8 → 1.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.8",
3
+ "version": "1.0.10",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
@@ -23,6 +23,7 @@
23
23
  "node-stream-zip": "*",
24
24
  "openai": "^4.3.1",
25
25
  "pdf-parse": "^1.1.1",
26
+ "pg": "^8.16.3",
26
27
  "pinecone-client": "^1.0.0",
27
28
  "readline": "^1.3.0",
28
29
  "redis": "^5.2.0"
@@ -0,0 +1,74 @@
1
+ // src/adapters/pgvectorAdapter.js
2
+ const { Pool } = require("pg");
3
+
4
+ class PgVectorAdapter {
5
+ constructor(config = {}) {
6
+ this.pool = new Pool({
7
+ connectionString: config.POSTGRES_URL || process.env.POSTGRES_URL,
8
+ host: config.DB_HOST || process.env.DB_HOST,
9
+ port: config.DB_PORT || process.env.DB_PORT || 5432,
10
+ user: config.DB_USER || process.env.DB_USER,
11
+ password: config.DB_PASSWORD || process.env.DB_PASSWORD,
12
+ database: config.DB_NAME || process.env.DB_NAME || 'olang',
13
+ });
14
+
15
+ // Test connection
16
+ this.testConnection();
17
+ }
18
+
19
+ async testConnection() {
20
+ try {
21
+ await this.pool.query('SELECT 1');
22
+ console.log('✅ PgVector adapter connected successfully');
23
+ } catch (error) {
24
+ console.error('❌ PgVector connection failed:', error.message);
25
+ throw error;
26
+ }
27
+ }
28
+
29
+ async upsert({ id, vector, content, source, metadata = {} }) {
30
+ // Convert vector to proper format for pgvector
31
+ const vectorStr = `[${vector.join(',')}]`;
32
+
33
+ const query = `
34
+ INSERT INTO doc_embeddings (id, embedding, content, source, metadata, updated_at)
35
+ VALUES ($1, $2::vector, $3, $4, $5, NOW())
36
+ ON CONFLICT (id)
37
+ DO UPDATE SET
38
+ embedding = $2::vector,
39
+ content = $3,
40
+ source = $4,
41
+ metadata = $5,
42
+ updated_at = NOW()`;
43
+
44
+ await this.pool.query(query, [id, vectorStr, content, source, JSON.stringify(metadata)]);
45
+ }
46
+
47
+ async query(vector, topK = 5) {
48
+ const vectorStr = `[${vector.join(',')}]`;
49
+
50
+ const query = `
51
+ SELECT id, content, source, metadata,
52
+ 1 - (embedding <=> $1::vector) AS score
53
+ FROM doc_embeddings
54
+ ORDER BY embedding <=> $1::vector
55
+ LIMIT $2`;
56
+
57
+ const res = await this.pool.query(query, [vectorStr, topK]);
58
+
59
+ // Return in the format your resolver expects
60
+ return res.rows.map(row => ({
61
+ id: row.id,
62
+ content: row.content,
63
+ source: row.source,
64
+ metadata: row.metadata,
65
+ score: parseFloat(row.score)
66
+ }));
67
+ }
68
+
69
+ async close() {
70
+ await this.pool.end();
71
+ }
72
+ }
73
+
74
+ module.exports = PgVectorAdapter;
package/src/index.js CHANGED
@@ -7,6 +7,7 @@ const { chunkText } = require("./utils/chunker.js");
7
7
  const { extractKeywords } = require("./utils/extractText.js");
8
8
  const { cosine } = require("./utils/similarity.js");
9
9
  const { highlightMatches } = require("./utils/highlight.js");
10
+ const PgVectorAdapter = require("./adapters/pgvectorAdapter.js"); // ✅ Properly imported
10
11
 
11
12
  const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
12
13
 
@@ -33,7 +34,7 @@ function saveCache(cache) {
33
34
  } catch {}
34
35
  }
35
36
 
36
- // ✅ UNIVERSAL DATABASE ADAPTER (NEW - Keep your existing imports!)
37
+ // ✅ UNIVERSAL DATABASE ADAPTER (Your existing SQL-based adapter)
37
38
  class DatabaseAdapter {
38
39
  constructor() {
39
40
  this.initialized = false;
@@ -42,7 +43,6 @@ class DatabaseAdapter {
42
43
  async initialize(context) {
43
44
  if (this.initialized) return;
44
45
 
45
- // Initialize based on context configuration
46
46
  if (context.db_type === 'mongodb' || context.MONGO_URI) {
47
47
  await this.initMongo(context);
48
48
  } else if (context.db_type === 'sqlite' || context.db_path) {
@@ -84,7 +84,7 @@ class DatabaseAdapter {
84
84
  return await db.collection(collectionName).find(filter, { projection }).toArray();
85
85
  }
86
86
 
87
- // PostgreSQL Support
87
+ // PostgreSQL Support (Traditional SQL)
88
88
  async initPostgres(context) {
89
89
  const { Pool } = require('pg');
90
90
  const poolConfig = {
@@ -95,7 +95,6 @@ class DatabaseAdapter {
95
95
  password: context.DB_PASSWORD,
96
96
  database: context.DB_NAME || 'olang'
97
97
  };
98
- // Remove undefined/null values
99
98
  Object.keys(poolConfig).forEach(key => {
100
99
  if (poolConfig[key] === undefined || poolConfig[key] === null) {
101
100
  delete poolConfig[key];
@@ -110,7 +109,7 @@ class DatabaseAdapter {
110
109
  return result.rows;
111
110
  }
112
111
 
113
- // Universal Query Method
112
+ // Universal Query Method (Traditional SQL-based)
114
113
  async queryDocuments(context) {
115
114
  const {
116
115
  db_type,
@@ -158,7 +157,6 @@ class DatabaseAdapter {
158
157
  try {
159
158
  filter = JSON.parse(doc_filter);
160
159
  } catch {
161
- // Text search fallback
162
160
  filter = { $text: { $search: doc_filter } };
163
161
  }
164
162
  } else if (typeof doc_filter === 'object' && Object.keys(doc_filter).length > 0) {
@@ -180,7 +178,6 @@ class DatabaseAdapter {
180
178
  doc_params = []
181
179
  } = context;
182
180
 
183
- // Parse doc_params from string if needed
184
181
  let params = doc_params;
185
182
  if (typeof doc_params === 'string') {
186
183
  try {
@@ -212,10 +209,10 @@ class DatabaseAdapter {
212
209
  }
213
210
  }
214
211
 
215
- // ✅ LOAD DOCUMENTS FROM DATABASE (if configured)
212
+ // ✅ LOAD DOCUMENTS FROM DATABASE (SQL-based)
216
213
  async function loadDocumentsFromDatabase(context) {
217
214
  if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) {
218
- return null; // No database configured
215
+ return null;
219
216
  }
220
217
 
221
218
  const dbAdapter = new DatabaseAdapter();
@@ -232,13 +229,11 @@ async function loadDocumentsFromDatabase(context) {
232
229
  async function loadAllDocuments(context) {
233
230
  const documents = [];
234
231
 
235
- // 1. Load from database first (if configured)
236
232
  const dbDocs = await loadDocumentsFromDatabase(context);
237
233
  if (dbDocs) {
238
234
  documents.push(...dbDocs);
239
235
  }
240
236
 
241
- // 2. Load from file system (existing behavior)
242
237
  const baseDir = context.doc_root
243
238
  ? safeResolve(process.cwd(), context.doc_root)
244
239
  : path.join(process.cwd(), "docs");
@@ -262,8 +257,79 @@ async function loadAllDocuments(context) {
262
257
  return documents;
263
258
  }
264
259
 
265
- // ✅ MAIN SEARCH FUNCTION (Your existing logic + universal docs)
266
- async function performDocQA(query, context = {}) {
260
+ // ✅ PGVECTOR SEARCH FUNCTION (Using imported adapter)
261
+ async function performPgVectorSearch(query, context = {}) {
262
+ const options = context.options || {};
263
+ const topK = options.topK || 5;
264
+
265
+ // Check for POSTGRES_URL in context or environment
266
+ const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
267
+ if (!postgresUrl) {
268
+ return {
269
+ text: "POSTGRES_URL not configured for pgvector search",
270
+ meta: { method: "error" }
271
+ };
272
+ }
273
+
274
+ const embedder = new LocalEmbedding();
275
+ const pgVectorAdapter = new PgVectorAdapter({
276
+ POSTGRES_URL: postgresUrl,
277
+ DB_HOST: context.DB_HOST,
278
+ DB_PORT: context.DB_PORT,
279
+ DB_USER: context.DB_USER,
280
+ DB_PASSWORD: context.DB_PASSWORD,
281
+ DB_NAME: context.DB_NAME,
282
+ });
283
+
284
+ try {
285
+ const queryVector = await embedder.embed(query);
286
+ const docs = await pgVectorAdapter.query(queryVector, topK);
287
+
288
+ if (docs.length === 0) {
289
+ return {
290
+ text: `No relevant documents found for: "${query}"`,
291
+ meta: { method: "pgvector-no-results" }
292
+ };
293
+ }
294
+
295
+ // Use first document as context (or combine multiple)
296
+ const contextText = docs.map((doc, i) => `(${i + 1}) ${doc.content}`).join("\n\n");
297
+
298
+ if (options.provider && options.provider !== "local") {
299
+ const llm = createLLM({
300
+ provider: options.provider,
301
+ openaiApiKey: options.openaiApiKey,
302
+ groqApiKey: options.groqApiKey,
303
+ anthropicApiKey: options.anthropicApiKey,
304
+ });
305
+
306
+ const prompt = `Answer the question using the context below.\n\nContext:\n${contextText}\n\nQuestion: ${query}`;
307
+ const resp = await llm.generate({ prompt: prompt, model: options.model });
308
+
309
+ return {
310
+ text: resp.text,
311
+ meta: {
312
+ method: "pgvector-rag",
313
+ sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
314
+ }
315
+ };
316
+ } else {
317
+ // Return raw context without LLM
318
+ return {
319
+ text: contextText,
320
+ meta: {
321
+ method: "pgvector-retrieval-only",
322
+ sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
323
+ }
324
+ };
325
+ }
326
+ } finally {
327
+ await pgVectorAdapter.close();
328
+ }
329
+ }
330
+
331
+ // ✅ MAIN SEARCH FUNCTION (Your existing hybrid logic)
332
+ async function performHybridDocQA(query, context = {}) {
267
333
  const { doc_root, stream = false } = context;
268
334
  const options = context.options || {};
269
335
  const CHUNK_SIZE = options.chunkSize || 1200;
@@ -276,7 +342,6 @@ async function performDocQA(query, context = {}) {
276
342
  return { text: "Missing required input: query" };
277
343
  }
278
344
 
279
- // Load documents from both database and files
280
345
  const allDocs = await loadAllDocuments(context);
281
346
  if (!allDocs || !allDocs.length) {
282
347
  return { text: "No documents available." };
@@ -396,7 +461,23 @@ async function performDocQA(query, context = {}) {
396
461
  };
397
462
  }
398
463
 
399
- // ✅ O-Lang Resolver Interface (Your existing interface - converted to CommonJS)
464
+ // ✅ SMART ROUTER - Auto-select search method based on context
465
+ async function performDocQA(query, context = {}) {
466
+ // 🔍 AUTO-DETECT MODE BASED ON CONTEXT
467
+
468
+ // Mode 1: pgvector mode (if PostgreSQL URL provided in context or env)
469
+ const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
470
+ if (postgresUrl) {
471
+ console.log('🔍 Using pgvector search mode');
472
+ return await performPgVectorSearch(query, context);
473
+ }
474
+
475
+ // Mode 2: Traditional hybrid search (files + databases)
476
+ console.log('🔍 Using hybrid file/DB search mode');
477
+ return await performHybridDocQA(query, context);
478
+ }
479
+
480
+ // ✅ O-Lang Resolver Interface
400
481
  async function docSearchResolver(action, context) {
401
482
  if (action.startsWith('Ask doc-search ')) {
402
483
  const match = action.match(/"(.*)"|'(.*)'/);
@@ -406,8 +487,5 @@ async function docSearchResolver(action, context) {
406
487
  return undefined;
407
488
  }
408
489
 
409
- // ✅ Resolver name matches package name: @o-lang/doc-search → doc-search
410
490
  docSearchResolver.resolverName = 'doc-search';
411
-
412
- // ✅ COMMONJS EXPORT (this is the key change)
413
491
  module.exports = docSearchResolver;
@@ -0,0 +1,70 @@
1
+ // src/services/docQA.js
2
+ /**
3
+ * Generic RAG service that works with any vector store adapter
4
+ */
5
+ async function performDocQA({
6
+ query,
7
+ vectorStore,
8
+ embedder,
9
+ llm,
10
+ topK = 5,
11
+ useLLM = true
12
+ }) {
13
+ try {
14
+ const queryVector = await embedder.embed(query);
15
+ const docs = await vectorStore.query(queryVector, topK);
16
+
17
+ if (docs.length === 0) {
18
+ return {
19
+ text: `No relevant documents found for: "${query}"`,
20
+ meta: { method: "no_documents" }
21
+ };
22
+ }
23
+
24
+ const context = docs
25
+ .map((d, i) => `(${i + 1}) ${d.content}`)
26
+ .join("\n\n");
27
+
28
+ if (!useLLM) {
29
+ // Return raw context without LLM
30
+ return {
31
+ text: context,
32
+ meta: {
33
+ method: "vector-retrieval-only",
34
+ sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
35
+ }
36
+ };
37
+ }
38
+
39
+ // Use LLM to generate answer
40
+ const prompt = `Answer the question using the context below.\n\nContext:\n${context}\n\nQuestion: ${query}`;
41
+
42
+ if (llm && typeof llm.generate === 'function') {
43
+ const response = await llm.generate({ prompt });
44
+ return {
45
+ text: response.text,
46
+ meta: {
47
+ method: "rag-with-llm",
48
+ sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
49
+ }
50
+ };
51
+ } else {
52
+ // Fallback to raw context if no LLM
53
+ return {
54
+ text: context,
55
+ meta: {
56
+ method: "vector-retrieval-only",
57
+ sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
58
+ }
59
+ };
60
+ }
61
+ } catch (error) {
62
+ console.error('RAG service error:', error);
63
+ return {
64
+ text: `Error processing query: ${error.message}`,
65
+ meta: { method: "error", error: error.message }
66
+ };
67
+ }
68
+ }
69
+
70
+ module.exports = { performDocQA };