@o-lang/semantic-doc-search 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +85 -93
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.21",
3
+ "version": "1.0.22",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
package/src/index.js CHANGED
@@ -13,7 +13,9 @@ const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
13
13
 
14
14
  function safeResolve(base, userPath) {
15
15
  const resolved = path.resolve(base, userPath);
16
- if (!resolved.startsWith(path.resolve(base))) throw new Error("Path traversal detected");
16
+ if (!resolved.startsWith(path.resolve(base))) {
17
+ throw new Error("Path traversal detected");
18
+ }
17
19
  return resolved;
18
20
  }
19
21
 
@@ -32,7 +34,8 @@ function saveCache(cache) {
32
34
  } catch {}
33
35
  }
34
36
 
35
- // ------------------- DATABASE ADAPTER -------------------
37
+ /* ---------------- DATABASE ADAPTER ---------------- */
38
+
36
39
  class DatabaseAdapter {
37
40
  constructor() {
38
41
  this.initialized = false;
@@ -56,115 +59,85 @@ class DatabaseAdapter {
56
59
  const Database = require("better-sqlite3");
57
60
  const dbPath = context.db_path || "./database.db";
58
61
  const dbDir = path.dirname(path.resolve(dbPath));
59
- if (!fs.existsSync(dbDir)) throw new Error(`SQLite database directory not found: ${dbDir}`);
62
+ if (!fs.existsSync(dbDir)) {
63
+ throw new Error(`SQLite database directory not found: ${dbDir}`);
64
+ }
60
65
  this.sqliteClient = new Database(dbPath, { readonly: true });
61
66
  }
62
67
 
63
68
  async querySQLite(query, params = []) {
64
- if (!this.sqliteClient) throw new Error("SQLite client not initialized");
65
69
  const stmt = this.sqliteClient.prepare(query);
66
70
  return stmt.all(...params);
67
71
  }
68
72
 
69
73
  async initMongo(context) {
70
74
  const { MongoClient } = require("mongodb");
71
- const uri = context.MONGO_URI || `mongodb://localhost:27017/${context.db_name || "olang"}`;
75
+ const uri =
76
+ context.MONGO_URI ||
77
+ `mongodb://localhost:27017/${context.db_name || "olang"}`;
72
78
  this.mongoClient = new MongoClient(uri);
73
79
  await this.mongoClient.connect();
74
80
  }
75
81
 
76
82
  async queryMongo(collectionName, filter = {}, projection = {}) {
77
- if (!this.mongoClient) throw new Error("MongoDB client not initialized");
78
83
  const db = this.mongoClient.db(process.env.DB_NAME || "olang");
79
84
  return db.collection(collectionName).find(filter, { projection }).toArray();
80
85
  }
81
86
 
82
87
  async initPostgres(context) {
83
88
  const { Pool } = require("pg");
84
- const poolConfig = {
89
+ this.postgresClient = new Pool({
85
90
  connectionString: context.POSTGRES_URL,
86
- host: context.DB_HOST || "localhost",
87
- port: parseInt(context.DB_PORT) || 5432,
88
- user: context.DB_USER,
89
- password: context.DB_PASSWORD,
90
- database: context.DB_NAME || "olang",
91
- };
92
- Object.keys(poolConfig).forEach((k) => {
93
- if (poolConfig[k] == null) delete poolConfig[k];
94
91
  });
95
- this.postgresClient = new Pool(poolConfig);
96
92
  }
97
93
 
98
94
  async queryPostgres(query, params = []) {
99
- if (!this.postgresClient) throw new Error("PostgreSQL client not initialized");
100
95
  const result = await this.postgresClient.query(query, params);
101
96
  return result.rows;
102
97
  }
103
98
 
104
99
  async queryDocuments(context) {
105
- const {
106
- db_type,
107
- db_table = "documents",
108
- db_content_column = "content",
109
- db_id_column = "id",
110
- } = context;
111
-
112
- if (db_type === "mongodb" || context.MONGO_URI) {
113
- const { filter, projection } = this.buildMongoQuery(context);
114
- const results = await this.queryMongo(db_table, filter, projection);
115
- return results.map((doc) => ({
116
- id: doc._id?.toString() || doc[db_id_column],
117
- content: doc[db_content_column] || "",
118
- source: `mongodb:${db_table}`,
100
+ const table = context.db_table || "documents";
101
+ const contentCol = context.db_content_column || "content";
102
+ const idCol = context.db_id_column || "id";
103
+
104
+ if (context.MONGO_URI) {
105
+ const rows = await this.queryMongo(table);
106
+ return rows.map((r) => ({
107
+ id: r._id?.toString(),
108
+ content: r[contentCol] || "",
109
+ source: `mongodb:${table}`,
119
110
  }));
120
111
  }
121
112
 
122
- if (db_type === "sqlite" || context.db_path) {
123
- const { sql, params } = this.buildSqlQuery(context);
124
- const results = await this.querySQLite(sql, params);
125
- return results.map((row) => ({
126
- id: row[db_id_column],
127
- content: row[db_content_column],
128
- source: `sqlite:${db_table}`,
113
+ if (context.db_path) {
114
+ const rows = await this.querySQLite(
115
+ `SELECT ${idCol}, ${contentCol} FROM ${table}`
116
+ );
117
+ return rows.map((r) => ({
118
+ id: r[idCol],
119
+ content: r[contentCol],
120
+ source: `sqlite:${table}`,
129
121
  }));
130
122
  }
131
123
 
132
- if (db_type === "postgres" || context.POSTGRES_URL) {
133
- const { sql, params } = this.buildSqlQuery(context);
134
- const results = await this.queryPostgres(sql, params);
135
- return results.map((row) => ({
136
- id: row[db_id_column],
137
- content: row[db_content_column],
138
- source: `postgres:${db_table}`,
124
+ if (context.POSTGRES_URL) {
125
+ const rows = await this.queryPostgres(
126
+ `SELECT ${idCol}, ${contentCol} FROM ${table}`
127
+ );
128
+ return rows.map((r) => ({
129
+ id: r[idCol],
130
+ content: r[contentCol],
131
+ source: `postgres:${table}`,
139
132
  }));
140
133
  }
141
134
 
142
135
  return [];
143
136
  }
144
-
145
- buildMongoQuery(context) {
146
- let filter = {};
147
- if (typeof context.doc_filter === "string") {
148
- try {
149
- filter = JSON.parse(context.doc_filter);
150
- } catch {
151
- filter = { $text: { $search: context.doc_filter } };
152
- }
153
- }
154
- return { filter, projection: {} };
155
- }
156
-
157
- buildSqlQuery(context) {
158
- const table = context.db_table || "documents";
159
- const where = context.doc_where || "1=1";
160
- return {
161
- sql: `SELECT * FROM ${table} WHERE ${where}`,
162
- params: [],
163
- };
164
- }
165
137
  }
166
138
 
167
- // ------------------- DOCUMENT LOADING -------------------
139
+ /* ---------------- DOCUMENT LOADING ---------------- */
140
+
168
141
  async function loadAllDocuments(context) {
169
142
  const docs = [];
170
143
  const db = new DatabaseAdapter();
@@ -179,43 +152,52 @@ async function loadAllDocuments(context) {
179
152
  : path.join(process.cwd(), "docs");
180
153
 
181
154
  if (fs.existsSync(baseDir)) {
182
- const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
155
+ const files = fs
156
+ .readdirSync(baseDir)
157
+ .filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
158
+
183
159
  for (const f of files) {
184
- const content = fs.readFileSync(path.join(baseDir, f), "utf8");
185
- docs.push({ id: f, content, source: `file:${f}` });
160
+ docs.push({
161
+ id: f,
162
+ content: fs.readFileSync(path.join(baseDir, f), "utf8"),
163
+ source: `file:${f}`,
164
+ });
186
165
  }
187
166
  }
188
167
 
189
168
  return docs;
190
169
  }
191
170
 
192
- // ------------------- HYBRID SEARCH (FIXED & DEFINED) -------------------
171
+ /* ---------------- HYBRID VECTOR SEARCH ---------------- */
172
+
193
173
  async function performHybridDocQA(query, context = {}) {
194
174
  const cache = loadCache();
195
175
  const embedder = new LocalEmbedding({ dimension: 384 });
196
176
 
197
- const router = VectorRouter.create({
177
+ const vectorStore = VectorRouter.create({
198
178
  backend: context.vectorBackend || "memory",
199
179
  dimension: 384,
200
180
  ...context,
201
181
  });
202
182
 
203
183
  const documents = await loadAllDocuments(context);
204
- if (!documents.length) return { text: "", meta: {} };
184
+ if (!documents.length) {
185
+ return { text: "", meta: {} };
186
+ }
205
187
 
206
188
  for (const doc of documents) {
207
189
  if (!cache[doc.id]) {
190
+ cache[doc.id] = true;
208
191
  const chunks = chunkText(doc.content, 500);
209
- cache[doc.id] = [];
210
- for (const chunk of chunks) {
211
- const vector = await embedder.embed(chunk);
212
- await router.upsert({
213
- id: `${doc.id}:${cache[doc.id].length}`,
192
+
193
+ for (let i = 0; i < chunks.length; i++) {
194
+ const vector = await embedder.embed(chunks[i]);
195
+ await vectorStore.upsert({
196
+ id: `${doc.id}:${i}`,
214
197
  vector,
215
- text: chunk,
198
+ content: chunks[i],
216
199
  source: doc.source,
217
200
  });
218
- cache[doc.id].push(vector);
219
201
  }
220
202
  }
221
203
  }
@@ -223,44 +205,54 @@ async function performHybridDocQA(query, context = {}) {
223
205
  saveCache(cache);
224
206
 
225
207
  const queryVector = await embedder.embed(query);
226
- const results = await router.search({ embedding: queryVector, topK: 5 });
208
+ const results = await vectorStore.query(queryVector, 5);
227
209
 
228
210
  return {
229
211
  text: highlightMatches(
230
- results.map((r) => r.text).join("\n\n"),
212
+ results.map((r) => r.content).join("\n\n"),
231
213
  extractKeywords(query)
232
214
  ),
233
215
  meta: { matches: results.length },
234
216
  };
235
217
  }
236
218
 
237
- // ------------------- PGVECTOR SEARCH -------------------
219
+ /* ---------------- PGVECTOR SEARCH ---------------- */
220
+
238
221
  async function performPgVectorSearch(query, context = {}) {
239
- const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
222
+ const adapter = new PgVectorAdapter({
223
+ POSTGRES_URL: context.POSTGRES_URL,
224
+ });
225
+
240
226
  const embedder = new LocalEmbedding({ dimension: 384 });
241
227
  const vector = await embedder.embed(query);
242
- const results = await adapter.search(vector, 5);
228
+ const results = await adapter.query(vector, 5);
229
+
230
+ await adapter.close();
231
+
243
232
  return {
244
233
  text: results.map((r) => r.content).join("\n\n"),
245
234
  meta: { matches: results.length },
246
235
  };
247
236
  }
248
237
 
249
- // ------------------- ROUTER -------------------
250
- async function performVectorQA(query, context = {}) {
251
- if (context.POSTGRES_URL) return performPgVectorSearch(query, context);
252
- return performHybridDocQA(query, context);
253
- }
238
+ /* ---------------- ROUTER ---------------- */
254
239
 
255
240
  async function performDocQA(query, context = {}) {
256
- return performVectorQA(query, context);
241
+ if (context.POSTGRES_URL) {
242
+ return performPgVectorSearch(query, context);
243
+ }
244
+ return performHybridDocQA(query, context);
257
245
  }
258
246
 
259
- // ------------------- RESOLVER -------------------
247
+ /* ---------------- O-LANG RESOLVER ---------------- */
248
+
260
249
  async function docSearchResolver(action, context) {
261
250
  if (action.startsWith("Ask doc-search")) {
262
251
  const match = action.match(/"(.*)"|'(.*)'/);
263
- const query = match ? match[1] || match[2] : action.replace("Ask doc-search", "").trim();
252
+ const query = match
253
+ ? match[1] || match[2]
254
+ : action.replace("Ask doc-search", "").trim();
255
+
264
256
  return performDocQA(query, context);
265
257
  }
266
258
  }