@o-lang/semantic-doc-search 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +256 -27
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/index.js CHANGED
@@ -33,6 +33,236 @@ function saveCache(cache) {
33
33
  } catch {}
34
34
  }
35
35
 
36
+ // ✅ UNIVERSAL DATABASE ADAPTER (NEW - Keep your existing imports!)
37
+ class DatabaseAdapter {
38
+ constructor() {
39
+ this.initialized = false;
40
+ }
41
+
42
+ async initialize(context) {
43
+ if (this.initialized) return;
44
+
45
+ // Initialize based on context configuration
46
+ if (context.db_type === 'mongodb' || context.MONGO_URI) {
47
+ await this.initMongo(context);
48
+ } else if (context.db_type === 'sqlite' || context.db_path) {
49
+ await this.initSQLite(context);
50
+ } else if (context.db_type === 'postgres' || context.POSTGRES_URL) {
51
+ await this.initPostgres(context);
52
+ }
53
+ this.initialized = true;
54
+ }
55
+
56
+ // SQLite Support
57
+ async initSQLite(context) {
58
+ const Database = require('better-sqlite3');
59
+ const dbPath = context.db_path || './database.db';
60
+ const dbDir = path.dirname(path.resolve(dbPath));
61
+ if (!fs.existsSync(dbDir)) {
62
+ throw new Error(`SQLite database directory not found: ${dbDir}`);
63
+ }
64
+ this.sqliteClient = new Database(dbPath, { readonly: true });
65
+ }
66
+
67
+ async querySQLite(query, params = []) {
68
+ if (!this.sqliteClient) throw new Error('SQLite client not initialized');
69
+ const stmt = this.sqliteClient.prepare(query);
70
+ return stmt.all(...params);
71
+ }
72
+
73
+ // MongoDB Support
74
+ async initMongo(context) {
75
+ const { MongoClient } = require('mongodb');
76
+ const uri = context.MONGO_URI || `mongodb://localhost:27017/${context.db_name || 'olang'}`;
77
+ this.mongoClient = new MongoClient(uri);
78
+ await this.mongoClient.connect();
79
+ }
80
+
81
+ async queryMongo(collectionName, filter = {}, projection = {}) {
82
+ if (!this.mongoClient) throw new Error('MongoDB client not initialized');
83
+ const db = this.mongoClient.db(process.env.DB_NAME || context.db_name || 'olang');
84
+ return await db.collection(collectionName).find(filter, { projection }).toArray();
85
+ }
86
+
87
+ // PostgreSQL Support
88
+ async initPostgres(context) {
89
+ const { Pool } = require('pg');
90
+ const poolConfig = {
91
+ connectionString: context.POSTGRES_URL,
92
+ host: context.DB_HOST || 'localhost',
93
+ port: parseInt(context.DB_PORT) || 5432,
94
+ user: context.DB_USER,
95
+ password: context.DB_PASSWORD,
96
+ database: context.DB_NAME || 'olang'
97
+ };
98
+ // Remove undefined/null values
99
+ Object.keys(poolConfig).forEach(key => {
100
+ if (poolConfig[key] === undefined || poolConfig[key] === null) {
101
+ delete poolConfig[key];
102
+ }
103
+ });
104
+ this.postgresClient = new Pool(poolConfig);
105
+ }
106
+
107
+ async queryPostgres(query, params = []) {
108
+ if (!this.postgresClient) throw new Error('PostgreSQL client not initialized');
109
+ const result = await this.postgresClient.query(query, params);
110
+ return result.rows;
111
+ }
112
+
113
+ // Universal Query Method
114
+ async queryDocuments(context) {
115
+ const {
116
+ db_type,
117
+ db_table = 'documents',
118
+ db_content_column = 'content',
119
+ db_id_column = 'id'
120
+ } = context;
121
+
122
+ if (db_type === 'mongodb' || context.MONGO_URI) {
123
+ const mongoQuery = this.buildMongoQuery(context);
124
+ const results = await this.queryMongo(db_table, mongoQuery.filter, mongoQuery.projection);
125
+ return results.map(doc => ({
126
+ id: doc._id?.toString() || doc.id || doc[db_id_column],
127
+ content: doc[db_content_column] || doc.content || doc.text || '',
128
+ source: `mongodb:${db_table}`
129
+ }));
130
+ }
131
+ else if (db_type === 'sqlite' || context.db_path) {
132
+ const sqliteQuery = this.buildSqlQuery(context, 'sqlite');
133
+ const results = await this.querySQLite(sqliteQuery.sql, sqliteQuery.params);
134
+ return results.map(row => ({
135
+ id: row[db_id_column],
136
+ content: row[db_content_column],
137
+ source: `sqlite:${db_table}`
138
+ }));
139
+ }
140
+ else if (db_type === 'postgres' || context.POSTGRES_URL) {
141
+ const postgresQuery = this.buildSqlQuery(context, 'postgres');
142
+ const results = await this.queryPostgres(postgresQuery.sql, postgresQuery.params);
143
+ return results.map(row => ({
144
+ id: row[db_id_column],
145
+ content: row[db_content_column],
146
+ source: `postgres:${db_table}`
147
+ }));
148
+ }
149
+
150
+ return [];
151
+ }
152
+
153
+ buildMongoQuery(context) {
154
+ const { doc_filter = {}, doc_projection = {} } = context;
155
+
156
+ let filter = {};
157
+ if (typeof doc_filter === 'string') {
158
+ try {
159
+ filter = JSON.parse(doc_filter);
160
+ } catch {
161
+ // Text search fallback
162
+ filter = { $text: { $search: doc_filter } };
163
+ }
164
+ } else if (typeof doc_filter === 'object' && Object.keys(doc_filter).length > 0) {
165
+ filter = doc_filter;
166
+ }
167
+
168
+ const projection = typeof doc_projection === 'string'
169
+ ? JSON.parse(doc_projection)
170
+ : doc_projection;
171
+
172
+ return { filter, projection };
173
+ }
174
+
175
+ buildSqlQuery(context, dialect) {
176
+ const {
177
+ db_content_column = 'content',
178
+ db_id_column = 'id',
179
+ doc_where = '1=1',
180
+ doc_params = []
181
+ } = context;
182
+
183
+ // Parse doc_params from string if needed
184
+ let params = doc_params;
185
+ if (typeof doc_params === 'string') {
186
+ try {
187
+ params = JSON.parse(doc_params);
188
+ } catch {
189
+ params = [doc_params];
190
+ }
191
+ }
192
+
193
+ const table = context.db_table || 'documents';
194
+ const sql = `SELECT ${db_id_column}, ${db_content_column} FROM ${table} WHERE ${doc_where}`;
195
+ return { sql, params };
196
+ }
197
+
198
+ async close() {
199
+ if (this.sqliteClient) {
200
+ try { this.sqliteClient.close(); } catch {}
201
+ this.sqliteClient = null;
202
+ }
203
+ if (this.mongoClient) {
204
+ try { await this.mongoClient.close(); } catch {}
205
+ this.mongoClient = null;
206
+ }
207
+ if (this.postgresClient) {
208
+ try { await this.postgresClient.end(); } catch {}
209
+ this.postgresClient = null;
210
+ }
211
+ this.initialized = false;
212
+ }
213
+ }
214
+
215
+ // ✅ LOAD DOCUMENTS FROM DATABASE (if configured)
216
+ async function loadDocumentsFromDatabase(context) {
217
+ if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) {
218
+ return null; // No database configured
219
+ }
220
+
221
+ const dbAdapter = new DatabaseAdapter();
222
+ try {
223
+ await dbAdapter.initialize(context);
224
+ return await dbAdapter.queryDocuments(context);
225
+ } catch (error) {
226
+ console.error('🗃️ [doc-search] Database load error:', error.message);
227
+ return null;
228
+ }
229
+ }
230
+
231
+ // ✅ LOAD ALL DOCUMENTS (Database + Files)
232
+ async function loadAllDocuments(context) {
233
+ const documents = [];
234
+
235
+ // 1. Load from database first (if configured)
236
+ const dbDocs = await loadDocumentsFromDatabase(context);
237
+ if (dbDocs) {
238
+ documents.push(...dbDocs);
239
+ }
240
+
241
+ // 2. Load from file system (existing behavior)
242
+ const baseDir = context.doc_root
243
+ ? safeResolve(process.cwd(), context.doc_root)
244
+ : path.join(process.cwd(), "docs");
245
+
246
+ if (fs.existsSync(baseDir)) {
247
+ const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
248
+ for (const file of files) {
249
+ try {
250
+ const content = fs.readFileSync(path.join(baseDir, file), "utf8");
251
+ documents.push({
252
+ id: file,
253
+ content: content,
254
+ source: `file:${file}`
255
+ });
256
+ } catch (error) {
257
+ console.warn(`⚠️ [doc-search] Failed to read file ${file}: ${error.message}`);
258
+ }
259
+ }
260
+ }
261
+
262
+ return documents;
263
+ }
264
+
265
+ // ✅ MAIN SEARCH FUNCTION (Your existing logic + universal docs)
36
266
  async function performDocQA(query, context = {}) {
37
267
  const { doc_root, stream = false } = context;
38
268
  const options = context.options || {};
@@ -46,25 +276,20 @@ async function performDocQA(query, context = {}) {
46
276
  return { text: "Missing required input: query" };
47
277
  }
48
278
 
49
- const baseDir = doc_root
50
- ? safeResolve(process.cwd(), doc_root)
51
- : path.join(process.cwd(), "docs");
52
-
53
- if (!fs.existsSync(baseDir)) {
54
- return { text: `Document directory not found: ${baseDir}` };
55
- }
56
-
57
- const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
58
- if (!files.length) {
279
+ // Load documents from both database and files
280
+ const allDocs = await loadAllDocuments(context);
281
+ if (!allDocs || !allDocs.length) {
59
282
  return { text: "No documents available." };
60
283
  }
61
284
 
62
285
  const qLower = query.toLowerCase().trim();
63
- const exact = files.find(f => path.basename(f, path.extname(f)).toLowerCase() === qLower);
64
- if (exact) {
286
+ const exactMatch = allDocs.find(doc =>
287
+ path.basename(doc.id || '', path.extname(doc.id || '')).toLowerCase() === qLower
288
+ );
289
+ if (exactMatch) {
65
290
  return {
66
- text: fs.readFileSync(path.join(baseDir, exact), "utf8"),
67
- meta: { file: exact, method: "exact-filename" }
291
+ text: exactMatch.content,
292
+ meta: { file: exactMatch.id, method: "exact-filename" }
68
293
  };
69
294
  }
70
295
 
@@ -72,13 +297,12 @@ async function performDocQA(query, context = {}) {
72
297
  const docs = [];
73
298
  const localEmbedder = new LocalEmbedding();
74
299
 
75
- for (const file of files) {
76
- const raw = fs.readFileSync(path.join(baseDir, file), "utf8");
77
- const chunks = chunkText(raw, CHUNK_SIZE, OVERLAP);
300
+ for (const doc of allDocs) {
301
+ const chunks = chunkText(doc.content, CHUNK_SIZE, OVERLAP);
78
302
  const chunkObjs = [];
79
303
 
80
304
  for (let i = 0; i < chunks.length; i++) {
81
- const key = `${file}::chunk::${i}`;
305
+ const key = `${doc.id}::chunk::${i}`;
82
306
  let emb = cache[key];
83
307
  if (!emb) {
84
308
  try {
@@ -91,7 +315,7 @@ async function performDocQA(query, context = {}) {
91
315
  }
92
316
  chunkObjs.push({ index: i, text: chunks[i], emb });
93
317
  }
94
- docs.push({ file, raw, chunks: chunkObjs });
318
+ docs.push({ file: doc.id, raw: doc.content, chunks: chunkObjs, source: doc.source });
95
319
  }
96
320
 
97
321
  let queryEmb = null;
@@ -117,20 +341,20 @@ async function performDocQA(query, context = {}) {
117
341
  bestChunk = { ...ch, semScore, lexScore, hybrid };
118
342
  }
119
343
  }
120
- return { file: doc.file, score: bestHybrid, bestChunk };
344
+ return { file: doc.file, score: bestHybrid, bestChunk, source: doc.source };
121
345
  });
122
346
 
123
347
  fileScores.sort((a, b) => b.score - a.score);
124
348
  const best = fileScores[0];
125
349
 
126
350
  if (!best || best.score < MIN_SCORE) {
127
- for (const file of files) {
128
- const text = fs.readFileSync(path.join(baseDir, file), "utf8").toLowerCase();
351
+ for (const doc of allDocs) {
352
+ const text = doc.content.toLowerCase();
129
353
  if (keywords.some(k => text.includes(k))) {
130
354
  const snippetIndex = text.indexOf(keywords.find(k => text.includes(k)));
131
355
  const start = Math.max(0, snippetIndex - 200);
132
356
  const snippet = text.slice(start, Math.min(text.length, snippetIndex + 400));
133
- return { text: snippet, meta: { file, method: "lexical-fallback" } };
357
+ return { text: snippet, meta: { file: doc.id, method: "lexical-fallback", source: doc.source } };
134
358
  }
135
359
  }
136
360
  return { text: `No document found matching: "${query}"` };
@@ -150,24 +374,29 @@ async function performDocQA(query, context = {}) {
150
374
  await llm.stream({ prompt: snippet, model, onToken: context.onToken });
151
375
  return {
152
376
  text: snippet,
153
- meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic-stream" }
377
+ meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic-stream", source: best.source }
154
378
  };
155
379
  } else {
156
380
  const resp = await llm.generate({ prompt: snippet, model });
157
381
  return {
158
382
  text: resp.text,
159
- meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic" }
383
+ meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic", source: best.source }
160
384
  };
161
385
  }
162
386
  }
163
387
 
164
388
  return {
165
389
  text: snippet,
166
- meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic" }
390
+ meta: {
391
+ file: best.file,
392
+ chunkIndex: best.bestChunk.index,
393
+ method: "hybrid-semantic",
394
+ source: best.source
395
+ }
167
396
  };
168
397
  }
169
398
 
170
- // ✅ O-Lang Resolver Interface
399
+ // ✅ O-Lang Resolver Interface (Your existing interface)
171
400
  export default async function docSearchResolver(action, context) {
172
401
  if (action.startsWith('Ask doc-search ')) {
173
402
  const match = action.match(/"(.*)"|'(.*)'/);