@o-lang/semantic-doc-search 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +94 -130
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.21",
3
+ "version": "1.0.23",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
package/src/index.js CHANGED
@@ -1,19 +1,21 @@
1
1
  const fs = require("fs");
2
2
  const path = require("path");
3
- const { createLLM } = require("./llm/router.js");
4
3
  const { LocalEmbedding } = require("./embeddings/local.js");
5
4
  const { chunkText } = require("./utils/chunker.js");
6
5
  const { extractKeywords } = require("./utils/extractText.js");
7
- const { cosine } = require("./utils/similarity.js");
8
6
  const { highlightMatches } = require("./utils/highlight.js");
9
- const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
10
7
  const VectorRouter = require("./adapters/vectorRouter");
8
+ const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
11
9
 
12
10
  const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
13
11
 
12
+ /* ---------------- UTIL ---------------- */
13
+
14
14
  function safeResolve(base, userPath) {
15
15
  const resolved = path.resolve(base, userPath);
16
- if (!resolved.startsWith(path.resolve(base))) throw new Error("Path traversal detected");
16
+ if (!resolved.startsWith(path.resolve(base))) {
17
+ throw new Error("Path traversal detected");
18
+ }
17
19
  return resolved;
18
20
  }
19
21
 
@@ -32,7 +34,8 @@ function saveCache(cache) {
32
34
  } catch {}
33
35
  }
34
36
 
35
- // ------------------- DATABASE ADAPTER -------------------
37
+ /* ---------------- DATABASE ADAPTER ---------------- */
38
+
36
39
  class DatabaseAdapter {
37
40
  constructor() {
38
41
  this.initialized = false;
@@ -41,13 +44,9 @@ class DatabaseAdapter {
41
44
  async initialize(context) {
42
45
  if (this.initialized) return;
43
46
 
44
- if (context.db_type === "mongodb" || context.MONGO_URI) {
45
- await this.initMongo(context);
46
- } else if (context.db_type === "sqlite" || context.db_path) {
47
- await this.initSQLite(context);
48
- } else if (context.db_type === "postgres" || context.POSTGRES_URL) {
49
- await this.initPostgres(context);
50
- }
47
+ if (context.MONGO_URI) await this.initMongo(context);
48
+ else if (context.db_path) await this.initSQLite(context);
49
+ else if (context.POSTGRES_URL) await this.initPostgres(context);
51
50
 
52
51
  this.initialized = true;
53
52
  }
@@ -55,120 +54,67 @@ class DatabaseAdapter {
55
54
  async initSQLite(context) {
56
55
  const Database = require("better-sqlite3");
57
56
  const dbPath = context.db_path || "./database.db";
58
- const dbDir = path.dirname(path.resolve(dbPath));
59
- if (!fs.existsSync(dbDir)) throw new Error(`SQLite database directory not found: ${dbDir}`);
60
- this.sqliteClient = new Database(dbPath, { readonly: true });
61
- }
62
-
63
- async querySQLite(query, params = []) {
64
- if (!this.sqliteClient) throw new Error("SQLite client not initialized");
65
- const stmt = this.sqliteClient.prepare(query);
66
- return stmt.all(...params);
57
+ this.sqlite = new Database(dbPath, { readonly: true });
67
58
  }
68
59
 
69
60
  async initMongo(context) {
70
61
  const { MongoClient } = require("mongodb");
71
- const uri = context.MONGO_URI || `mongodb://localhost:27017/${context.db_name || "olang"}`;
72
- this.mongoClient = new MongoClient(uri);
73
- await this.mongoClient.connect();
74
- }
75
-
76
- async queryMongo(collectionName, filter = {}, projection = {}) {
77
- if (!this.mongoClient) throw new Error("MongoDB client not initialized");
78
- const db = this.mongoClient.db(process.env.DB_NAME || "olang");
79
- return db.collection(collectionName).find(filter, { projection }).toArray();
62
+ const uri = context.MONGO_URI;
63
+ this.mongo = new MongoClient(uri);
64
+ await this.mongo.connect();
80
65
  }
81
66
 
82
67
  async initPostgres(context) {
83
68
  const { Pool } = require("pg");
84
- const poolConfig = {
85
- connectionString: context.POSTGRES_URL,
86
- host: context.DB_HOST || "localhost",
87
- port: parseInt(context.DB_PORT) || 5432,
88
- user: context.DB_USER,
89
- password: context.DB_PASSWORD,
90
- database: context.DB_NAME || "olang",
91
- };
92
- Object.keys(poolConfig).forEach((k) => {
93
- if (poolConfig[k] == null) delete poolConfig[k];
94
- });
95
- this.postgresClient = new Pool(poolConfig);
96
- }
97
-
98
- async queryPostgres(query, params = []) {
99
- if (!this.postgresClient) throw new Error("PostgreSQL client not initialized");
100
- const result = await this.postgresClient.query(query, params);
101
- return result.rows;
69
+ this.pg = new Pool({ connectionString: context.POSTGRES_URL });
102
70
  }
103
71
 
104
72
  async queryDocuments(context) {
105
- const {
106
- db_type,
107
- db_table = "documents",
108
- db_content_column = "content",
109
- db_id_column = "id",
110
- } = context;
111
-
112
- if (db_type === "mongodb" || context.MONGO_URI) {
113
- const { filter, projection } = this.buildMongoQuery(context);
114
- const results = await this.queryMongo(db_table, filter, projection);
115
- return results.map((doc) => ({
116
- id: doc._id?.toString() || doc[db_id_column],
117
- content: doc[db_content_column] || "",
118
- source: `mongodb:${db_table}`,
73
+ const table = context.db_table || "documents";
74
+ const idCol = context.db_id_column || "id";
75
+ const contentCol = context.db_content_column || "content";
76
+
77
+ if (this.mongo) {
78
+ const rows = await this.mongo.db().collection(table).find({}).toArray();
79
+ return rows.map(r => ({
80
+ id: r._id.toString(),
81
+ content: r[contentCol] || "",
82
+ source: `mongodb:${table}`,
119
83
  }));
120
84
  }
121
85
 
122
- if (db_type === "sqlite" || context.db_path) {
123
- const { sql, params } = this.buildSqlQuery(context);
124
- const results = await this.querySQLite(sql, params);
125
- return results.map((row) => ({
126
- id: row[db_id_column],
127
- content: row[db_content_column],
128
- source: `sqlite:${db_table}`,
86
+ if (this.sqlite) {
87
+ const rows = this.sqlite
88
+ .prepare(`SELECT ${idCol}, ${contentCol} FROM ${table}`)
89
+ .all();
90
+ return rows.map(r => ({
91
+ id: r[idCol],
92
+ content: r[contentCol],
93
+ source: `sqlite:${table}`,
129
94
  }));
130
95
  }
131
96
 
132
- if (db_type === "postgres" || context.POSTGRES_URL) {
133
- const { sql, params } = this.buildSqlQuery(context);
134
- const results = await this.queryPostgres(sql, params);
135
- return results.map((row) => ({
136
- id: row[db_id_column],
137
- content: row[db_content_column],
138
- source: `postgres:${db_table}`,
97
+ if (this.pg) {
98
+ const res = await this.pg.query(
99
+ `SELECT ${idCol}, ${contentCol} FROM ${table}`
100
+ );
101
+ return res.rows.map(r => ({
102
+ id: r[idCol],
103
+ content: r[contentCol],
104
+ source: `postgres:${table}`,
139
105
  }));
140
106
  }
141
107
 
142
108
  return [];
143
109
  }
144
-
145
- buildMongoQuery(context) {
146
- let filter = {};
147
- if (typeof context.doc_filter === "string") {
148
- try {
149
- filter = JSON.parse(context.doc_filter);
150
- } catch {
151
- filter = { $text: { $search: context.doc_filter } };
152
- }
153
- }
154
- return { filter, projection: {} };
155
- }
156
-
157
- buildSqlQuery(context) {
158
- const table = context.db_table || "documents";
159
- const where = context.doc_where || "1=1";
160
- return {
161
- sql: `SELECT * FROM ${table} WHERE ${where}`,
162
- params: [],
163
- };
164
- }
165
110
  }
166
111
 
167
- // ------------------- DOCUMENT LOADING -------------------
112
+ /* ---------------- DOCUMENT LOADING ---------------- */
113
+
168
114
  async function loadAllDocuments(context) {
169
115
  const docs = [];
170
- const db = new DatabaseAdapter();
171
116
 
117
+ const db = new DatabaseAdapter();
172
118
  try {
173
119
  await db.initialize(context);
174
120
  docs.push(...(await db.queryDocuments(context)));
@@ -179,22 +125,29 @@ async function loadAllDocuments(context) {
179
125
  : path.join(process.cwd(), "docs");
180
126
 
181
127
  if (fs.existsSync(baseDir)) {
182
- const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
183
- for (const f of files) {
184
- const content = fs.readFileSync(path.join(baseDir, f), "utf8");
185
- docs.push({ id: f, content, source: `file:${f}` });
128
+ const files = fs.readdirSync(baseDir).filter(f =>
129
+ f.endsWith(".txt") || f.endsWith(".md")
130
+ );
131
+
132
+ for (const file of files) {
133
+ docs.push({
134
+ id: file,
135
+ content: fs.readFileSync(path.join(baseDir, file), "utf8"),
136
+ source: `file:${file}`,
137
+ });
186
138
  }
187
139
  }
188
140
 
189
141
  return docs;
190
142
  }
191
143
 
192
- // ------------------- HYBRID SEARCH (FIXED & DEFINED) -------------------
193
- async function performHybridDocQA(query, context = {}) {
144
+ /* ---------------- HYBRID VECTOR SEARCH ---------------- */
145
+
146
+ async function performHybridDocQA(query, context) {
194
147
  const cache = loadCache();
195
148
  const embedder = new LocalEmbedding({ dimension: 384 });
196
149
 
197
- const router = VectorRouter.create({
150
+ const store = VectorRouter.create({
198
151
  backend: context.vectorBackend || "memory",
199
152
  dimension: 384,
200
153
  ...context,
@@ -205,17 +158,17 @@ async function performHybridDocQA(query, context = {}) {
205
158
 
206
159
  for (const doc of documents) {
207
160
  if (!cache[doc.id]) {
161
+ cache[doc.id] = true;
208
162
  const chunks = chunkText(doc.content, 500);
209
- cache[doc.id] = [];
210
- for (const chunk of chunks) {
211
- const vector = await embedder.embed(chunk);
212
- await router.upsert({
213
- id: `${doc.id}:${cache[doc.id].length}`,
163
+
164
+ for (let i = 0; i < chunks.length; i++) {
165
+ const vector = await embedder.embed(chunks[i]);
166
+ await store.upsert({
167
+ id: `${doc.id}:${i}`,
214
168
  vector,
215
- text: chunk,
169
+ content: chunks[i],
216
170
  source: doc.source,
217
171
  });
218
- cache[doc.id].push(vector);
219
172
  }
220
173
  }
221
174
  }
@@ -223,47 +176,58 @@ async function performHybridDocQA(query, context = {}) {
223
176
  saveCache(cache);
224
177
 
225
178
  const queryVector = await embedder.embed(query);
226
- const results = await router.search({ embedding: queryVector, topK: 5 });
179
+ const results = await store.search({
180
+ embedding: queryVector,
181
+ topK: 5,
182
+ });
227
183
 
228
184
  return {
229
185
  text: highlightMatches(
230
- results.map((r) => r.text).join("\n\n"),
186
+ results.map(r => r.content).join("\n\n"),
231
187
  extractKeywords(query)
232
188
  ),
233
189
  meta: { matches: results.length },
234
190
  };
235
191
  }
236
192
 
237
- // ------------------- PGVECTOR SEARCH -------------------
238
- async function performPgVectorSearch(query, context = {}) {
193
+ /* ---------------- PGVECTOR SEARCH ---------------- */
194
+
195
+ async function performPgVectorSearch(query, context) {
239
196
  const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
240
197
  const embedder = new LocalEmbedding({ dimension: 384 });
198
+
241
199
  const vector = await embedder.embed(query);
242
200
  const results = await adapter.search(vector, 5);
201
+ await adapter.close();
202
+
243
203
  return {
244
- text: results.map((r) => r.content).join("\n\n"),
204
+ text: results.map(r => r.content).join("\n\n"),
245
205
  meta: { matches: results.length },
246
206
  };
247
207
  }
248
208
 
249
- // ------------------- ROUTER -------------------
250
- async function performVectorQA(query, context = {}) {
251
- if (context.POSTGRES_URL) return performPgVectorSearch(query, context);
209
+ /* ---------------- ROUTER ---------------- */
210
+
211
+ async function performDocQA(query, context) {
212
+ if (context.POSTGRES_URL) {
213
+ return performPgVectorSearch(query, context);
214
+ }
252
215
  return performHybridDocQA(query, context);
253
216
  }
254
217
 
255
- async function performDocQA(query, context = {}) {
256
- return performVectorQA(query, context);
257
- }
218
+ /* ---------------- O-LANG RESOLVER ---------------- */
258
219
 
259
- // ------------------- RESOLVER -------------------
260
220
  async function docSearchResolver(action, context) {
261
- if (action.startsWith("Ask doc-search")) {
262
- const match = action.match(/"(.*)"|'(.*)'/);
263
- const query = match ? match[1] || match[2] : action.replace("Ask doc-search", "").trim();
264
- return performDocQA(query, context);
265
- }
221
+ if (!action.startsWith("Ask doc-search")) return;
222
+
223
+ const match = action.match(/"(.*)"|'(.*)'/);
224
+ const query = match
225
+ ? match[1] || match[2]
226
+ : action.replace("Ask doc-search", "").trim();
227
+
228
+ return performDocQA(query, context);
266
229
  }
267
230
 
268
231
  docSearchResolver.resolverName = "doc-search";
269
232
  module.exports = docSearchResolver;
233
+