@o-lang/semantic-doc-search 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/index.js +51 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@o-lang/semantic-doc-search",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.11",
|
|
4
4
|
"description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
"node-stream-zip": "*",
|
|
24
24
|
"openai": "^4.3.1",
|
|
25
25
|
"pdf-parse": "^1.1.1",
|
|
26
|
+
"pg": "^8.16.3",
|
|
26
27
|
"pinecone-client": "^1.0.0",
|
|
27
28
|
"readline": "^1.3.0",
|
|
28
29
|
"redis": "^5.2.0"
|
package/src/index.js
CHANGED
|
@@ -257,12 +257,50 @@ async function loadAllDocuments(context) {
|
|
|
257
257
|
return documents;
|
|
258
258
|
}
|
|
259
259
|
|
|
260
|
-
//
|
|
260
|
+
// 🔥 AUTO-MIGRATION HELPER FUNCTIONS
|
|
261
|
+
async function checkPgVectorHasData(pgVectorAdapter) {
|
|
262
|
+
try {
|
|
263
|
+
const result = await pgVectorAdapter.pool.query('SELECT COUNT(*) FROM doc_embeddings');
|
|
264
|
+
return parseInt(result.rows[0].count) > 0;
|
|
265
|
+
} catch (error) {
|
|
266
|
+
// Table doesn't exist or other error - treat as empty
|
|
267
|
+
return false;
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async function migrateDocumentsToPgVector(docRoot, pgVectorAdapter, embedder) {
|
|
272
|
+
const baseDir = safeResolve(process.cwd(), docRoot);
|
|
273
|
+
if (!fs.existsSync(baseDir)) {
|
|
274
|
+
console.log('📁 No docs directory found, skipping migration');
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
|
|
279
|
+
console.log(`🔄 Migrating ${files.length} documents to pgvector...`);
|
|
280
|
+
|
|
281
|
+
for (const file of files) {
|
|
282
|
+
try {
|
|
283
|
+
const content = fs.readFileSync(path.join(baseDir, file), "utf8");
|
|
284
|
+
const vector = await embedder.embed(content);
|
|
285
|
+
|
|
286
|
+
await pgVectorAdapter.upsert({
|
|
287
|
+
id: file,
|
|
288
|
+
vector: vector,
|
|
289
|
+
content: content,
|
|
290
|
+
source: `file:${file}`
|
|
291
|
+
});
|
|
292
|
+
console.log(`✅ Migrated ${file}`);
|
|
293
|
+
} catch (error) {
|
|
294
|
+
console.warn(`⚠️ Failed to migrate ${file}: ${error.message}`);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// ✅ PGVECTOR SEARCH FUNCTION WITH AUTO-MIGRATION
|
|
261
300
|
async function performPgVectorSearch(query, context = {}) {
|
|
262
301
|
const options = context.options || {};
|
|
263
302
|
const topK = options.topK || 5;
|
|
264
303
|
|
|
265
|
-
// Check for POSTGRES_URL in context or environment
|
|
266
304
|
const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
|
|
267
305
|
if (!postgresUrl) {
|
|
268
306
|
return {
|
|
@@ -282,6 +320,16 @@ async function performPgVectorSearch(query, context = {}) {
|
|
|
282
320
|
});
|
|
283
321
|
|
|
284
322
|
try {
|
|
323
|
+
// 🔥 AUTO-MIGRATION LOGIC
|
|
324
|
+
if (context.migrate_on_demand && context.doc_root) {
|
|
325
|
+
const hasData = await checkPgVectorHasData(pgVectorAdapter);
|
|
326
|
+
if (!hasData) {
|
|
327
|
+
console.log('🔄 Auto-migrating documents to pgvector (first run)...');
|
|
328
|
+
await migrateDocumentsToPgVector(context.doc_root, pgVectorAdapter, embedder);
|
|
329
|
+
console.log('✅ Migration completed');
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
285
333
|
const queryVector = await embedder.embed(query);
|
|
286
334
|
const docs = await pgVectorAdapter.query(queryVector, topK);
|
|
287
335
|
|
|
@@ -465,7 +513,7 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
465
513
|
async function performDocQA(query, context = {}) {
|
|
466
514
|
// 🔍 AUTO-DETECT MODE BASED ON CONTEXT
|
|
467
515
|
|
|
468
|
-
// Mode 1: pgvector mode (if PostgreSQL URL provided
|
|
516
|
+
// Mode 1: pgvector mode (if PostgreSQL URL provided)
|
|
469
517
|
const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
|
|
470
518
|
if (postgresUrl) {
|
|
471
519
|
console.log('🔍 Using pgvector search mode');
|