@o-lang/semantic-doc-search 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +256 -27
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -33,6 +33,236 @@ function saveCache(cache) {
|
|
|
33
33
|
} catch {}
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
+
// ✅ UNIVERSAL DATABASE ADAPTER (NEW - Keep your existing imports!)
|
|
37
|
+
class DatabaseAdapter {
|
|
38
|
+
constructor() {
|
|
39
|
+
this.initialized = false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async initialize(context) {
|
|
43
|
+
if (this.initialized) return;
|
|
44
|
+
|
|
45
|
+
// Initialize based on context configuration
|
|
46
|
+
if (context.db_type === 'mongodb' || context.MONGO_URI) {
|
|
47
|
+
await this.initMongo(context);
|
|
48
|
+
} else if (context.db_type === 'sqlite' || context.db_path) {
|
|
49
|
+
await this.initSQLite(context);
|
|
50
|
+
} else if (context.db_type === 'postgres' || context.POSTGRES_URL) {
|
|
51
|
+
await this.initPostgres(context);
|
|
52
|
+
}
|
|
53
|
+
this.initialized = true;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// SQLite Support
|
|
57
|
+
async initSQLite(context) {
|
|
58
|
+
const Database = require('better-sqlite3');
|
|
59
|
+
const dbPath = context.db_path || './database.db';
|
|
60
|
+
const dbDir = path.dirname(path.resolve(dbPath));
|
|
61
|
+
if (!fs.existsSync(dbDir)) {
|
|
62
|
+
throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
63
|
+
}
|
|
64
|
+
this.sqliteClient = new Database(dbPath, { readonly: true });
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async querySQLite(query, params = []) {
|
|
68
|
+
if (!this.sqliteClient) throw new Error('SQLite client not initialized');
|
|
69
|
+
const stmt = this.sqliteClient.prepare(query);
|
|
70
|
+
return stmt.all(...params);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// MongoDB Support
|
|
74
|
+
async initMongo(context) {
|
|
75
|
+
const { MongoClient } = require('mongodb');
|
|
76
|
+
const uri = context.MONGO_URI || `mongodb://localhost:27017/${context.db_name || 'olang'}`;
|
|
77
|
+
this.mongoClient = new MongoClient(uri);
|
|
78
|
+
await this.mongoClient.connect();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
82
|
+
if (!this.mongoClient) throw new Error('MongoDB client not initialized');
|
|
83
|
+
const db = this.mongoClient.db(process.env.DB_NAME || context.db_name || 'olang');
|
|
84
|
+
return await db.collection(collectionName).find(filter, { projection }).toArray();
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// PostgreSQL Support
|
|
88
|
+
async initPostgres(context) {
|
|
89
|
+
const { Pool } = require('pg');
|
|
90
|
+
const poolConfig = {
|
|
91
|
+
connectionString: context.POSTGRES_URL,
|
|
92
|
+
host: context.DB_HOST || 'localhost',
|
|
93
|
+
port: parseInt(context.DB_PORT) || 5432,
|
|
94
|
+
user: context.DB_USER,
|
|
95
|
+
password: context.DB_PASSWORD,
|
|
96
|
+
database: context.DB_NAME || 'olang'
|
|
97
|
+
};
|
|
98
|
+
// Remove undefined/null values
|
|
99
|
+
Object.keys(poolConfig).forEach(key => {
|
|
100
|
+
if (poolConfig[key] === undefined || poolConfig[key] === null) {
|
|
101
|
+
delete poolConfig[key];
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
this.postgresClient = new Pool(poolConfig);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async queryPostgres(query, params = []) {
|
|
108
|
+
if (!this.postgresClient) throw new Error('PostgreSQL client not initialized');
|
|
109
|
+
const result = await this.postgresClient.query(query, params);
|
|
110
|
+
return result.rows;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// Universal Query Method
|
|
114
|
+
async queryDocuments(context) {
|
|
115
|
+
const {
|
|
116
|
+
db_type,
|
|
117
|
+
db_table = 'documents',
|
|
118
|
+
db_content_column = 'content',
|
|
119
|
+
db_id_column = 'id'
|
|
120
|
+
} = context;
|
|
121
|
+
|
|
122
|
+
if (db_type === 'mongodb' || context.MONGO_URI) {
|
|
123
|
+
const mongoQuery = this.buildMongoQuery(context);
|
|
124
|
+
const results = await this.queryMongo(db_table, mongoQuery.filter, mongoQuery.projection);
|
|
125
|
+
return results.map(doc => ({
|
|
126
|
+
id: doc._id?.toString() || doc.id || doc[db_id_column],
|
|
127
|
+
content: doc[db_content_column] || doc.content || doc.text || '',
|
|
128
|
+
source: `mongodb:${db_table}`
|
|
129
|
+
}));
|
|
130
|
+
}
|
|
131
|
+
else if (db_type === 'sqlite' || context.db_path) {
|
|
132
|
+
const sqliteQuery = this.buildSqlQuery(context, 'sqlite');
|
|
133
|
+
const results = await this.querySQLite(sqliteQuery.sql, sqliteQuery.params);
|
|
134
|
+
return results.map(row => ({
|
|
135
|
+
id: row[db_id_column],
|
|
136
|
+
content: row[db_content_column],
|
|
137
|
+
source: `sqlite:${db_table}`
|
|
138
|
+
}));
|
|
139
|
+
}
|
|
140
|
+
else if (db_type === 'postgres' || context.POSTGRES_URL) {
|
|
141
|
+
const postgresQuery = this.buildSqlQuery(context, 'postgres');
|
|
142
|
+
const results = await this.queryPostgres(postgresQuery.sql, postgresQuery.params);
|
|
143
|
+
return results.map(row => ({
|
|
144
|
+
id: row[db_id_column],
|
|
145
|
+
content: row[db_content_column],
|
|
146
|
+
source: `postgres:${db_table}`
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return [];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
buildMongoQuery(context) {
|
|
154
|
+
const { doc_filter = {}, doc_projection = {} } = context;
|
|
155
|
+
|
|
156
|
+
let filter = {};
|
|
157
|
+
if (typeof doc_filter === 'string') {
|
|
158
|
+
try {
|
|
159
|
+
filter = JSON.parse(doc_filter);
|
|
160
|
+
} catch {
|
|
161
|
+
// Text search fallback
|
|
162
|
+
filter = { $text: { $search: doc_filter } };
|
|
163
|
+
}
|
|
164
|
+
} else if (typeof doc_filter === 'object' && Object.keys(doc_filter).length > 0) {
|
|
165
|
+
filter = doc_filter;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const projection = typeof doc_projection === 'string'
|
|
169
|
+
? JSON.parse(doc_projection)
|
|
170
|
+
: doc_projection;
|
|
171
|
+
|
|
172
|
+
return { filter, projection };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
buildSqlQuery(context, dialect) {
|
|
176
|
+
const {
|
|
177
|
+
db_content_column = 'content',
|
|
178
|
+
db_id_column = 'id',
|
|
179
|
+
doc_where = '1=1',
|
|
180
|
+
doc_params = []
|
|
181
|
+
} = context;
|
|
182
|
+
|
|
183
|
+
// Parse doc_params from string if needed
|
|
184
|
+
let params = doc_params;
|
|
185
|
+
if (typeof doc_params === 'string') {
|
|
186
|
+
try {
|
|
187
|
+
params = JSON.parse(doc_params);
|
|
188
|
+
} catch {
|
|
189
|
+
params = [doc_params];
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const table = context.db_table || 'documents';
|
|
194
|
+
const sql = `SELECT ${db_id_column}, ${db_content_column} FROM ${table} WHERE ${doc_where}`;
|
|
195
|
+
return { sql, params };
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async close() {
|
|
199
|
+
if (this.sqliteClient) {
|
|
200
|
+
try { this.sqliteClient.close(); } catch {}
|
|
201
|
+
this.sqliteClient = null;
|
|
202
|
+
}
|
|
203
|
+
if (this.mongoClient) {
|
|
204
|
+
try { await this.mongoClient.close(); } catch {}
|
|
205
|
+
this.mongoClient = null;
|
|
206
|
+
}
|
|
207
|
+
if (this.postgresClient) {
|
|
208
|
+
try { await this.postgresClient.end(); } catch {}
|
|
209
|
+
this.postgresClient = null;
|
|
210
|
+
}
|
|
211
|
+
this.initialized = false;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ✅ LOAD DOCUMENTS FROM DATABASE (if configured)
|
|
216
|
+
async function loadDocumentsFromDatabase(context) {
|
|
217
|
+
if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) {
|
|
218
|
+
return null; // No database configured
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const dbAdapter = new DatabaseAdapter();
|
|
222
|
+
try {
|
|
223
|
+
await dbAdapter.initialize(context);
|
|
224
|
+
return await dbAdapter.queryDocuments(context);
|
|
225
|
+
} catch (error) {
|
|
226
|
+
console.error('🗃️ [doc-search] Database load error:', error.message);
|
|
227
|
+
return null;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// ✅ LOAD ALL DOCUMENTS (Database + Files)
|
|
232
|
+
async function loadAllDocuments(context) {
|
|
233
|
+
const documents = [];
|
|
234
|
+
|
|
235
|
+
// 1. Load from database first (if configured)
|
|
236
|
+
const dbDocs = await loadDocumentsFromDatabase(context);
|
|
237
|
+
if (dbDocs) {
|
|
238
|
+
documents.push(...dbDocs);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// 2. Load from file system (existing behavior)
|
|
242
|
+
const baseDir = context.doc_root
|
|
243
|
+
? safeResolve(process.cwd(), context.doc_root)
|
|
244
|
+
: path.join(process.cwd(), "docs");
|
|
245
|
+
|
|
246
|
+
if (fs.existsSync(baseDir)) {
|
|
247
|
+
const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
|
|
248
|
+
for (const file of files) {
|
|
249
|
+
try {
|
|
250
|
+
const content = fs.readFileSync(path.join(baseDir, file), "utf8");
|
|
251
|
+
documents.push({
|
|
252
|
+
id: file,
|
|
253
|
+
content: content,
|
|
254
|
+
source: `file:${file}`
|
|
255
|
+
});
|
|
256
|
+
} catch (error) {
|
|
257
|
+
console.warn(`⚠️ [doc-search] Failed to read file ${file}: ${error.message}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return documents;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// ✅ MAIN SEARCH FUNCTION (Your existing logic + universal docs)
|
|
36
266
|
async function performDocQA(query, context = {}) {
|
|
37
267
|
const { doc_root, stream = false } = context;
|
|
38
268
|
const options = context.options || {};
|
|
@@ -46,25 +276,20 @@ async function performDocQA(query, context = {}) {
|
|
|
46
276
|
return { text: "Missing required input: query" };
|
|
47
277
|
}
|
|
48
278
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if (!fs.existsSync(baseDir)) {
|
|
54
|
-
return { text: `Document directory not found: ${baseDir}` };
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
|
|
58
|
-
if (!files.length) {
|
|
279
|
+
// Load documents from both database and files
|
|
280
|
+
const allDocs = await loadAllDocuments(context);
|
|
281
|
+
if (!allDocs || !allDocs.length) {
|
|
59
282
|
return { text: "No documents available." };
|
|
60
283
|
}
|
|
61
284
|
|
|
62
285
|
const qLower = query.toLowerCase().trim();
|
|
63
|
-
const
|
|
64
|
-
|
|
286
|
+
const exactMatch = allDocs.find(doc =>
|
|
287
|
+
path.basename(doc.id || '', path.extname(doc.id || '')).toLowerCase() === qLower
|
|
288
|
+
);
|
|
289
|
+
if (exactMatch) {
|
|
65
290
|
return {
|
|
66
|
-
text:
|
|
67
|
-
meta: { file:
|
|
291
|
+
text: exactMatch.content,
|
|
292
|
+
meta: { file: exactMatch.id, method: "exact-filename" }
|
|
68
293
|
};
|
|
69
294
|
}
|
|
70
295
|
|
|
@@ -72,13 +297,12 @@ async function performDocQA(query, context = {}) {
|
|
|
72
297
|
const docs = [];
|
|
73
298
|
const localEmbedder = new LocalEmbedding();
|
|
74
299
|
|
|
75
|
-
for (const
|
|
76
|
-
const
|
|
77
|
-
const chunks = chunkText(raw, CHUNK_SIZE, OVERLAP);
|
|
300
|
+
for (const doc of allDocs) {
|
|
301
|
+
const chunks = chunkText(doc.content, CHUNK_SIZE, OVERLAP);
|
|
78
302
|
const chunkObjs = [];
|
|
79
303
|
|
|
80
304
|
for (let i = 0; i < chunks.length; i++) {
|
|
81
|
-
const key = `${
|
|
305
|
+
const key = `${doc.id}::chunk::${i}`;
|
|
82
306
|
let emb = cache[key];
|
|
83
307
|
if (!emb) {
|
|
84
308
|
try {
|
|
@@ -91,7 +315,7 @@ async function performDocQA(query, context = {}) {
|
|
|
91
315
|
}
|
|
92
316
|
chunkObjs.push({ index: i, text: chunks[i], emb });
|
|
93
317
|
}
|
|
94
|
-
docs.push({ file, raw, chunks: chunkObjs });
|
|
318
|
+
docs.push({ file: doc.id, raw: doc.content, chunks: chunkObjs, source: doc.source });
|
|
95
319
|
}
|
|
96
320
|
|
|
97
321
|
let queryEmb = null;
|
|
@@ -117,20 +341,20 @@ async function performDocQA(query, context = {}) {
|
|
|
117
341
|
bestChunk = { ...ch, semScore, lexScore, hybrid };
|
|
118
342
|
}
|
|
119
343
|
}
|
|
120
|
-
return { file: doc.file, score: bestHybrid, bestChunk };
|
|
344
|
+
return { file: doc.file, score: bestHybrid, bestChunk, source: doc.source };
|
|
121
345
|
});
|
|
122
346
|
|
|
123
347
|
fileScores.sort((a, b) => b.score - a.score);
|
|
124
348
|
const best = fileScores[0];
|
|
125
349
|
|
|
126
350
|
if (!best || best.score < MIN_SCORE) {
|
|
127
|
-
for (const
|
|
128
|
-
const text =
|
|
351
|
+
for (const doc of allDocs) {
|
|
352
|
+
const text = doc.content.toLowerCase();
|
|
129
353
|
if (keywords.some(k => text.includes(k))) {
|
|
130
354
|
const snippetIndex = text.indexOf(keywords.find(k => text.includes(k)));
|
|
131
355
|
const start = Math.max(0, snippetIndex - 200);
|
|
132
356
|
const snippet = text.slice(start, Math.min(text.length, snippetIndex + 400));
|
|
133
|
-
return { text: snippet, meta: { file, method: "lexical-fallback" } };
|
|
357
|
+
return { text: snippet, meta: { file: doc.id, method: "lexical-fallback", source: doc.source } };
|
|
134
358
|
}
|
|
135
359
|
}
|
|
136
360
|
return { text: `No document found matching: "${query}"` };
|
|
@@ -150,24 +374,29 @@ async function performDocQA(query, context = {}) {
|
|
|
150
374
|
await llm.stream({ prompt: snippet, model, onToken: context.onToken });
|
|
151
375
|
return {
|
|
152
376
|
text: snippet,
|
|
153
|
-
meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic-stream" }
|
|
377
|
+
meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic-stream", source: best.source }
|
|
154
378
|
};
|
|
155
379
|
} else {
|
|
156
380
|
const resp = await llm.generate({ prompt: snippet, model });
|
|
157
381
|
return {
|
|
158
382
|
text: resp.text,
|
|
159
|
-
meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic" }
|
|
383
|
+
meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic", source: best.source }
|
|
160
384
|
};
|
|
161
385
|
}
|
|
162
386
|
}
|
|
163
387
|
|
|
164
388
|
return {
|
|
165
389
|
text: snippet,
|
|
166
|
-
meta: {
|
|
390
|
+
meta: {
|
|
391
|
+
file: best.file,
|
|
392
|
+
chunkIndex: best.bestChunk.index,
|
|
393
|
+
method: "hybrid-semantic",
|
|
394
|
+
source: best.source
|
|
395
|
+
}
|
|
167
396
|
};
|
|
168
397
|
}
|
|
169
398
|
|
|
170
|
-
// ✅ O-Lang Resolver Interface
|
|
399
|
+
// ✅ O-Lang Resolver Interface (Your existing interface)
|
|
171
400
|
export default async function docSearchResolver(action, context) {
|
|
172
401
|
if (action.startsWith('Ask doc-search ')) {
|
|
173
402
|
const match = action.match(/"(.*)"|'(.*)'/);
|