@o-lang/semantic-doc-search 1.0.18 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.18",
3
+ "version": "1.0.21",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
@@ -1,31 +1,30 @@
1
- const PgVectorAdapter = require("./pgvectorAdapter");
2
- const PineconeAdapter = require("./pineconeAdapter");
3
- const RedisAdapter = require("./redisAdapter");
4
- const InMemoryAdapter = require("./inMemoryAdapter");
5
-
6
1
  class VectorRouter {
7
2
  static create(config = {}) {
8
- const backend = config.backend;
9
-
10
- if (!backend) {
11
- throw new Error("Vector backend not specified");
12
- }
3
+ const backend = config.backend || "pgvector";
13
4
 
14
5
  switch (backend) {
15
- case "pgvector":
6
+ case "pgvector": {
7
+ const PgVectorAdapter = require("./pgvectorAdapter");
16
8
  return new PgVectorAdapter(config);
9
+ }
17
10
 
18
- case "pinecone":
19
- return new PineconeAdapter(config);
11
+ case "memory": {
12
+ const InMemoryAdapter = require("./inMemoryAdapter");
13
+ return new InMemoryAdapter(config);
14
+ }
20
15
 
21
- case "redis":
16
+ case "redis": {
17
+ const RedisAdapter = require("./redisAdapter");
22
18
  return new RedisAdapter(config);
19
+ }
23
20
 
24
- case "memory":
25
- return new InMemoryAdapter(config);
21
+ case "pinecone": {
22
+ const PineconeAdapter = require("./pineconeAdapter");
23
+ return new PineconeAdapter(config);
24
+ }
26
25
 
27
26
  default:
28
- throw new Error(`Unsupported vector backend: ${backend}`);
27
+ throw new Error(`Unknown vector backend: ${backend}`);
29
28
  }
30
29
  }
31
30
  }
package/src/index.js CHANGED
@@ -1,4 +1,3 @@
1
- // doc-search.js
2
1
  const fs = require("fs");
3
2
  const path = require("path");
4
3
  const { createLLM } = require("./llm/router.js");
@@ -20,23 +19,36 @@ function safeResolve(base, userPath) {
20
19
 
21
20
  function loadCache() {
22
21
  try {
23
- if (fs.existsSync(CACHE_PATH)) return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
22
+ if (fs.existsSync(CACHE_PATH)) {
23
+ return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
24
+ }
24
25
  } catch {}
25
26
  return {};
26
27
  }
27
28
 
28
29
  function saveCache(cache) {
29
- try { fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2)); } catch {}
30
+ try {
31
+ fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2));
32
+ } catch {}
30
33
  }
31
34
 
32
35
  // ------------------- DATABASE ADAPTER -------------------
33
36
  class DatabaseAdapter {
34
- constructor() { this.initialized = false; }
37
+ constructor() {
38
+ this.initialized = false;
39
+ }
40
+
35
41
  async initialize(context) {
36
42
  if (this.initialized) return;
37
- if (context.db_type === "mongodb" || context.MONGO_URI) await this.initMongo(context);
38
- else if (context.db_type === "sqlite" || context.db_path) await this.initSQLite(context);
39
- else if (context.db_type === "postgres" || context.POSTGRES_URL) await this.initPostgres(context);
43
+
44
+ if (context.db_type === "mongodb" || context.MONGO_URI) {
45
+ await this.initMongo(context);
46
+ } else if (context.db_type === "sqlite" || context.db_path) {
47
+ await this.initSQLite(context);
48
+ } else if (context.db_type === "postgres" || context.POSTGRES_URL) {
49
+ await this.initPostgres(context);
50
+ }
51
+
40
52
  this.initialized = true;
41
53
  }
42
54
 
@@ -63,8 +75,8 @@ class DatabaseAdapter {
63
75
 
64
76
  async queryMongo(collectionName, filter = {}, projection = {}) {
65
77
  if (!this.mongoClient) throw new Error("MongoDB client not initialized");
66
- const db = this.mongoClient.db(process.env.DB_NAME || context.db_name || "olang");
67
- return await db.collection(collectionName).find(filter, { projection }).toArray();
78
+ const db = this.mongoClient.db(process.env.DB_NAME || "olang");
79
+ return db.collection(collectionName).find(filter, { projection }).toArray();
68
80
  }
69
81
 
70
82
  async initPostgres(context) {
@@ -78,7 +90,7 @@ class DatabaseAdapter {
78
90
  database: context.DB_NAME || "olang",
79
91
  };
80
92
  Object.keys(poolConfig).forEach((k) => {
81
- if (poolConfig[k] === undefined || poolConfig[k] === null) delete poolConfig[k];
93
+ if (poolConfig[k] == null) delete poolConfig[k];
82
94
  });
83
95
  this.postgresClient = new Pool(poolConfig);
84
96
  }
@@ -90,16 +102,24 @@ class DatabaseAdapter {
90
102
  }
91
103
 
92
104
  async queryDocuments(context) {
93
- const { db_type, db_table = "documents", db_content_column = "content", db_id_column = "id" } = context;
105
+ const {
106
+ db_type,
107
+ db_table = "documents",
108
+ db_content_column = "content",
109
+ db_id_column = "id",
110
+ } = context;
111
+
94
112
  if (db_type === "mongodb" || context.MONGO_URI) {
95
113
  const { filter, projection } = this.buildMongoQuery(context);
96
114
  const results = await this.queryMongo(db_table, filter, projection);
97
115
  return results.map((doc) => ({
98
- id: doc._id?.toString() || doc.id || doc[db_id_column],
99
- content: doc[db_content_column] || doc.content || doc.text || "",
116
+ id: doc._id?.toString() || doc[db_id_column],
117
+ content: doc[db_content_column] || "",
100
118
  source: `mongodb:${db_table}`,
101
119
  }));
102
- } else if (db_type === "sqlite" || context.db_path) {
120
+ }
121
+
122
+ if (db_type === "sqlite" || context.db_path) {
103
123
  const { sql, params } = this.buildSqlQuery(context);
104
124
  const results = await this.querySQLite(sql, params);
105
125
  return results.map((row) => ({
@@ -107,7 +127,9 @@ class DatabaseAdapter {
107
127
  content: row[db_content_column],
108
128
  source: `sqlite:${db_table}`,
109
129
  }));
110
- } else if (db_type === "postgres" || context.POSTGRES_URL) {
130
+ }
131
+
132
+ if (db_type === "postgres" || context.POSTGRES_URL) {
111
133
  const { sql, params } = this.buildSqlQuery(context);
112
134
  const results = await this.queryPostgres(sql, params);
113
135
  return results.map((row) => ({
@@ -116,111 +138,131 @@ class DatabaseAdapter {
116
138
  source: `postgres:${db_table}`,
117
139
  }));
118
140
  }
141
+
119
142
  return [];
120
143
  }
121
144
 
122
145
  buildMongoQuery(context) {
123
- const { doc_filter = {}, doc_projection = {} } = context;
124
146
  let filter = {};
125
- if (typeof doc_filter === "string") {
126
- try { filter = JSON.parse(doc_filter); } catch { filter = { $text: { $search: doc_filter } }; }
127
- } else if (typeof doc_filter === "object" && Object.keys(doc_filter).length > 0) filter = doc_filter;
128
- const projection = typeof doc_projection === "string" ? JSON.parse(doc_projection) : doc_projection;
129
- return { filter, projection };
147
+ if (typeof context.doc_filter === "string") {
148
+ try {
149
+ filter = JSON.parse(context.doc_filter);
150
+ } catch {
151
+ filter = { $text: { $search: context.doc_filter } };
152
+ }
153
+ }
154
+ return { filter, projection: {} };
130
155
  }
131
156
 
132
157
  buildSqlQuery(context) {
133
- const { db_content_column = "content", db_id_column = "id", doc_where = "1=1", doc_params = [] } = context;
134
- let params = doc_params;
135
- if (typeof doc_params === "string") {
136
- try { params = JSON.parse(doc_params); } catch { params = [doc_params]; }
137
- }
138
158
  const table = context.db_table || "documents";
139
- const sql = `SELECT ${db_id_column}, ${db_content_column} FROM ${table} WHERE ${doc_where}`;
140
- return { sql, params };
141
- }
142
-
143
- async close() {
144
- if (this.sqliteClient) { try { this.sqliteClient.close(); } catch {} this.sqliteClient = null; }
145
- if (this.mongoClient) { try { await this.mongoClient.close(); } catch {} this.mongoClient = null; }
146
- if (this.postgresClient) { try { await this.postgresClient.end(); } catch {} this.postgresClient = null; }
147
- this.initialized = false;
159
+ const where = context.doc_where || "1=1";
160
+ return {
161
+ sql: `SELECT * FROM ${table} WHERE ${where}`,
162
+ params: [],
163
+ };
148
164
  }
149
165
  }
150
166
 
151
167
  // ------------------- DOCUMENT LOADING -------------------
152
- async function loadDocumentsFromDatabase(context) {
153
- if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) return null;
154
- const dbAdapter = new DatabaseAdapter();
155
- try { await dbAdapter.initialize(context); return await dbAdapter.queryDocuments(context); } catch (e) { console.error("🗃️ [doc-search] Database load error:", e.message); return null; }
156
- }
157
-
158
168
  async function loadAllDocuments(context) {
159
- const documents = [];
160
- const dbDocs = await loadDocumentsFromDatabase(context);
161
- if (dbDocs) documents.push(...dbDocs);
169
+ const docs = [];
170
+ const db = new DatabaseAdapter();
171
+
172
+ try {
173
+ await db.initialize(context);
174
+ docs.push(...(await db.queryDocuments(context)));
175
+ } catch {}
176
+
177
+ const baseDir = context.doc_root
178
+ ? safeResolve(process.cwd(), context.doc_root)
179
+ : path.join(process.cwd(), "docs");
162
180
 
163
- const baseDir = context.doc_root ? safeResolve(process.cwd(), context.doc_root) : path.join(process.cwd(), "docs");
164
181
  if (fs.existsSync(baseDir)) {
165
182
  const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
166
- for (const file of files) {
167
- try {
168
- const content = fs.readFileSync(path.join(baseDir, file), "utf8");
169
- documents.push({ id: file, content, source: `file:${file}` });
170
- } catch (e) { console.warn(`⚠️ [doc-search] Failed to read file ${file}: ${e.message}`); }
183
+ for (const f of files) {
184
+ const content = fs.readFileSync(path.join(baseDir, f), "utf8");
185
+ docs.push({ id: f, content, source: `file:${f}` });
171
186
  }
172
187
  }
173
- return documents;
174
- }
175
188
 
176
- // ------------------- VECTOR MIGRATION -------------------
177
- async function checkPgVectorHasData(pgVectorAdapter) {
178
- try { const result = await pgVectorAdapter.pool.query("SELECT COUNT(*) FROM doc_embeddings"); return parseInt(result.rows[0].count) > 0; } catch { return false; }
189
+ return docs;
179
190
  }
180
191
 
181
- async function migrateDocumentsToPgVector(docRoot, pgVectorAdapter, embedder) {
182
- const baseDir = safeResolve(process.cwd(), docRoot);
183
- if (!fs.existsSync(baseDir)) { console.log("📁 No docs directory found, skipping migration"); return; }
184
- const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
185
- console.log(`🔄 Migrating ${files.length} documents to pgvector...`);
186
- for (const file of files) {
187
- try {
188
- const content = fs.readFileSync(path.join(baseDir, file), "utf8");
189
- const vector = await embedder.embed(content);
190
- await pgVectorAdapter.upsert({ id: file, vector, content, source: `file:${file}` });
191
- console.log(`✅ Migrated ${file}`);
192
- } catch (e) { console.warn(`⚠️ Failed to migrate ${file}: ${e.message}`); }
192
+ // ------------------- HYBRID SEARCH (FIXED & DEFINED) -------------------
193
+ async function performHybridDocQA(query, context = {}) {
194
+ const cache = loadCache();
195
+ const embedder = new LocalEmbedding({ dimension: 384 });
196
+
197
+ const router = VectorRouter.create({
198
+ backend: context.vectorBackend || "memory",
199
+ dimension: 384,
200
+ ...context,
201
+ });
202
+
203
+ const documents = await loadAllDocuments(context);
204
+ if (!documents.length) return { text: "", meta: {} };
205
+
206
+ for (const doc of documents) {
207
+ if (!cache[doc.id]) {
208
+ const chunks = chunkText(doc.content, 500);
209
+ cache[doc.id] = [];
210
+ for (const chunk of chunks) {
211
+ const vector = await embedder.embed(chunk);
212
+ await router.upsert({
213
+ id: `${doc.id}:${cache[doc.id].length}`,
214
+ vector,
215
+ text: chunk,
216
+ source: doc.source,
217
+ });
218
+ cache[doc.id].push(vector);
219
+ }
220
+ }
193
221
  }
222
+
223
+ saveCache(cache);
224
+
225
+ const queryVector = await embedder.embed(query);
226
+ const results = await router.search({ embedding: queryVector, topK: 5 });
227
+
228
+ return {
229
+ text: highlightMatches(
230
+ results.map((r) => r.text).join("\n\n"),
231
+ extractKeywords(query)
232
+ ),
233
+ meta: { matches: results.length },
234
+ };
194
235
  }
195
236
 
196
- // ------------------- VECTOR SEARCH (AUTO SWITCH) -------------------
197
- async function performVectorQA(query, context = {}) {
198
- const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
199
- const vectorBackend = context.vectorBackend;
200
-
201
- if (postgresUrl) {
202
- return await performPgVectorSearch(query, context);
203
- } else if (vectorBackend) {
204
- return await performVectorSearch(query, context);
205
- } else {
206
- return await performHybridDocQA(query, context);
207
- }
237
+ // ------------------- PGVECTOR SEARCH -------------------
238
+ async function performPgVectorSearch(query, context = {}) {
239
+ const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
240
+ const embedder = new LocalEmbedding({ dimension: 384 });
241
+ const vector = await embedder.embed(query);
242
+ const results = await adapter.search(vector, 5);
243
+ return {
244
+ text: results.map((r) => r.content).join("\n\n"),
245
+ meta: { matches: results.length },
246
+ };
208
247
  }
209
248
 
210
- // ------------------- HYBRID + VECTOR SEARCH FUNCTIONS -------------------
211
- // [Keep performPgVectorSearch, performHybridDocQA, loadAllDocuments, chunking, cache logic identical to previous full file]
249
+ // ------------------- ROUTER -------------------
250
+ async function performVectorQA(query, context = {}) {
251
+ if (context.POSTGRES_URL) return performPgVectorSearch(query, context);
252
+ return performHybridDocQA(query, context);
253
+ }
212
254
 
213
255
  async function performDocQA(query, context = {}) {
214
- return await performVectorQA(query, context);
256
+ return performVectorQA(query, context);
215
257
  }
216
258
 
259
+ // ------------------- RESOLVER -------------------
217
260
  async function docSearchResolver(action, context) {
218
- if (action.startsWith("Ask doc-search ")) {
261
+ if (action.startsWith("Ask doc-search")) {
219
262
  const match = action.match(/"(.*)"|'(.*)'/);
220
- const query = match ? match[1] || match[2] : action.replace(/^Ask doc-search\s+/, "").trim();
221
- return await performDocQA(query, context);
263
+ const query = match ? match[1] || match[2] : action.replace("Ask doc-search", "").trim();
264
+ return performDocQA(query, context);
222
265
  }
223
- return undefined;
224
266
  }
225
267
 
226
268
  docSearchResolver.resolverName = "doc-search";