@o-lang/semantic-doc-search 1.0.10 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.10",
3
+ "version": "1.0.12",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
@@ -11,57 +11,44 @@ class PgVectorAdapter {
11
11
  password: config.DB_PASSWORD || process.env.DB_PASSWORD,
12
12
  database: config.DB_NAME || process.env.DB_NAME || 'olang',
13
13
  });
14
-
15
- // Test connection
16
- this.testConnection();
17
14
  }
18
15
 
19
- async testConnection() {
20
- try {
21
- await this.pool.query('SELECT 1');
22
- console.log('✅ PgVector adapter connected successfully');
23
- } catch (error) {
24
- console.error('❌ PgVector connection failed:', error.message);
25
- throw error;
26
- }
16
+ // Convert JavaScript array to PostgreSQL array format
17
+ // [1.0, 2.0, 3.0] -> {1.0,2.0,3.0}
18
+ arrayToPgArray(arr) {
19
+ return `{${arr.join(',')}}`;
27
20
  }
28
21
 
29
22
  async upsert({ id, vector, content, source, metadata = {} }) {
30
- // Convert vector to proper format for pgvector
31
- const vectorStr = `[${vector.join(',')}]`;
32
-
33
- const query = `
34
- INSERT INTO doc_embeddings (id, embedding, content, source, metadata, updated_at)
35
- VALUES ($1, $2::vector, $3, $4, $5, NOW())
36
- ON CONFLICT (id)
37
- DO UPDATE SET
38
- embedding = $2::vector,
39
- content = $3,
40
- source = $4,
41
- metadata = $5,
42
- updated_at = NOW()`;
23
+ // Convert to PostgreSQL array format (NOT JSON)
24
+ const pgVector = this.arrayToPgArray(vector);
43
25
 
44
- await this.pool.query(query, [id, vectorStr, content, source, JSON.stringify(metadata)]);
26
+ await this.pool.query(
27
+ `INSERT INTO doc_embeddings (id, embedding, content, source, metadata)
28
+ VALUES ($1, $2::vector, $3, $4, $5::jsonb)
29
+ ON CONFLICT (id) DO UPDATE
30
+ SET embedding = $2::vector, content = $3, source = $4, metadata = $5::jsonb, updated_at = NOW()`,
31
+ [id, pgVector, content, source, JSON.stringify(metadata)]
32
+ );
45
33
  }
46
34
 
47
35
  async query(vector, topK = 5) {
48
- const vectorStr = `[${vector.join(',')}]`;
49
-
50
- const query = `
51
- SELECT id, content, source, metadata,
52
- 1 - (embedding <=> $1::vector) AS score
53
- FROM doc_embeddings
54
- ORDER BY embedding <=> $1::vector
55
- LIMIT $2`;
36
+ const pgVector = this.arrayToPgArray(vector);
56
37
 
57
- const res = await this.pool.query(query, [vectorStr, topK]);
38
+ const res = await this.pool.query(
39
+ `SELECT id, content, source, metadata,
40
+ 1 - (embedding <=> $1::vector) AS score
41
+ FROM doc_embeddings
42
+ ORDER BY embedding <=> $1::vector
43
+ LIMIT $2`,
44
+ [pgVector, topK]
45
+ );
58
46
 
59
- // Return in the format your resolver expects
60
47
  return res.rows.map(row => ({
61
48
  id: row.id,
62
49
  content: row.content,
63
50
  source: row.source,
64
- metadata: row.metadata,
51
+ meta: row.metadata,
65
52
  score: parseFloat(row.score)
66
53
  }));
67
54
  }
package/src/index.js CHANGED
@@ -257,12 +257,50 @@ async function loadAllDocuments(context) {
257
257
  return documents;
258
258
  }
259
259
 
260
- // PGVECTOR SEARCH FUNCTION (Using imported adapter)
260
+ // 🔥 AUTO-MIGRATION HELPER FUNCTIONS
261
+ async function checkPgVectorHasData(pgVectorAdapter) {
262
+ try {
263
+ const result = await pgVectorAdapter.pool.query('SELECT COUNT(*) FROM doc_embeddings');
264
+ return parseInt(result.rows[0].count) > 0;
265
+ } catch (error) {
266
+ // Table doesn't exist or other error - treat as empty
267
+ return false;
268
+ }
269
+ }
270
+
271
+ async function migrateDocumentsToPgVector(docRoot, pgVectorAdapter, embedder) {
272
+ const baseDir = safeResolve(process.cwd(), docRoot);
273
+ if (!fs.existsSync(baseDir)) {
274
+ console.log('📁 No docs directory found, skipping migration');
275
+ return;
276
+ }
277
+
278
+ const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
279
+ console.log(`🔄 Migrating ${files.length} documents to pgvector...`);
280
+
281
+ for (const file of files) {
282
+ try {
283
+ const content = fs.readFileSync(path.join(baseDir, file), "utf8");
284
+ const vector = await embedder.embed(content);
285
+
286
+ await pgVectorAdapter.upsert({
287
+ id: file,
288
+ vector: vector,
289
+ content: content,
290
+ source: `file:${file}`
291
+ });
292
+ console.log(`✅ Migrated ${file}`);
293
+ } catch (error) {
294
+ console.warn(`⚠️ Failed to migrate ${file}: ${error.message}`);
295
+ }
296
+ }
297
+ }
298
+
299
+ // ✅ PGVECTOR SEARCH FUNCTION WITH AUTO-MIGRATION
261
300
  async function performPgVectorSearch(query, context = {}) {
262
301
  const options = context.options || {};
263
302
  const topK = options.topK || 5;
264
303
 
265
- // Check for POSTGRES_URL in context or environment
266
304
  const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
267
305
  if (!postgresUrl) {
268
306
  return {
@@ -282,6 +320,16 @@ async function performPgVectorSearch(query, context = {}) {
282
320
  });
283
321
 
284
322
  try {
323
+ // 🔥 AUTO-MIGRATION LOGIC
324
+ if (context.migrate_on_demand && context.doc_root) {
325
+ const hasData = await checkPgVectorHasData(pgVectorAdapter);
326
+ if (!hasData) {
327
+ console.log('🔄 Auto-migrating documents to pgvector (first run)...');
328
+ await migrateDocumentsToPgVector(context.doc_root, pgVectorAdapter, embedder);
329
+ console.log('✅ Migration completed');
330
+ }
331
+ }
332
+
285
333
  const queryVector = await embedder.embed(query);
286
334
  const docs = await pgVectorAdapter.query(queryVector, topK);
287
335
 
@@ -465,7 +513,7 @@ async function performHybridDocQA(query, context = {}) {
465
513
  async function performDocQA(query, context = {}) {
466
514
  // 🔍 AUTO-DETECT MODE BASED ON CONTEXT
467
515
 
468
- // Mode 1: pgvector mode (if PostgreSQL URL provided in context or env)
516
+ // Mode 1: pgvector mode (if PostgreSQL URL provided)
469
517
  const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
470
518
  if (postgresUrl) {
471
519
  console.log('🔍 Using pgvector search mode');