@o-lang/semantic-doc-search 1.0.22 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +55 -83
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@o-lang/semantic-doc-search",
3
- "version": "1.0.22",
3
+ "version": "1.0.23",
4
4
  "description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
5
5
  "main": "src/index.js",
6
6
  "type": "commonjs",
package/src/index.js CHANGED
@@ -1,16 +1,16 @@
1
1
  const fs = require("fs");
2
2
  const path = require("path");
3
- const { createLLM } = require("./llm/router.js");
4
3
  const { LocalEmbedding } = require("./embeddings/local.js");
5
4
  const { chunkText } = require("./utils/chunker.js");
6
5
  const { extractKeywords } = require("./utils/extractText.js");
7
- const { cosine } = require("./utils/similarity.js");
8
6
  const { highlightMatches } = require("./utils/highlight.js");
9
- const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
10
7
  const VectorRouter = require("./adapters/vectorRouter");
8
+ const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
11
9
 
12
10
  const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
13
11
 
12
+ /* ---------------- UTIL ---------------- */
13
+
14
14
  function safeResolve(base, userPath) {
15
15
  const resolved = path.resolve(base, userPath);
16
16
  if (!resolved.startsWith(path.resolve(base))) {
@@ -44,13 +44,9 @@ class DatabaseAdapter {
44
44
  async initialize(context) {
45
45
  if (this.initialized) return;
46
46
 
47
- if (context.db_type === "mongodb" || context.MONGO_URI) {
48
- await this.initMongo(context);
49
- } else if (context.db_type === "sqlite" || context.db_path) {
50
- await this.initSQLite(context);
51
- } else if (context.db_type === "postgres" || context.POSTGRES_URL) {
52
- await this.initPostgres(context);
53
- }
47
+ if (context.MONGO_URI) await this.initMongo(context);
48
+ else if (context.db_path) await this.initSQLite(context);
49
+ else if (context.POSTGRES_URL) await this.initPostgres(context);
54
50
 
55
51
  this.initialized = true;
56
52
  }
@@ -58,74 +54,51 @@ class DatabaseAdapter {
58
54
  async initSQLite(context) {
59
55
  const Database = require("better-sqlite3");
60
56
  const dbPath = context.db_path || "./database.db";
61
- const dbDir = path.dirname(path.resolve(dbPath));
62
- if (!fs.existsSync(dbDir)) {
63
- throw new Error(`SQLite database directory not found: ${dbDir}`);
64
- }
65
- this.sqliteClient = new Database(dbPath, { readonly: true });
66
- }
67
-
68
- async querySQLite(query, params = []) {
69
- const stmt = this.sqliteClient.prepare(query);
70
- return stmt.all(...params);
57
+ this.sqlite = new Database(dbPath, { readonly: true });
71
58
  }
72
59
 
73
60
  async initMongo(context) {
74
61
  const { MongoClient } = require("mongodb");
75
- const uri =
76
- context.MONGO_URI ||
77
- `mongodb://localhost:27017/${context.db_name || "olang"}`;
78
- this.mongoClient = new MongoClient(uri);
79
- await this.mongoClient.connect();
80
- }
81
-
82
- async queryMongo(collectionName, filter = {}, projection = {}) {
83
- const db = this.mongoClient.db(process.env.DB_NAME || "olang");
84
- return db.collection(collectionName).find(filter, { projection }).toArray();
62
+ const uri = context.MONGO_URI;
63
+ this.mongo = new MongoClient(uri);
64
+ await this.mongo.connect();
85
65
  }
86
66
 
87
67
  async initPostgres(context) {
88
68
  const { Pool } = require("pg");
89
- this.postgresClient = new Pool({
90
- connectionString: context.POSTGRES_URL,
91
- });
92
- }
93
-
94
- async queryPostgres(query, params = []) {
95
- const result = await this.postgresClient.query(query, params);
96
- return result.rows;
69
+ this.pg = new Pool({ connectionString: context.POSTGRES_URL });
97
70
  }
98
71
 
99
72
  async queryDocuments(context) {
100
73
  const table = context.db_table || "documents";
101
- const contentCol = context.db_content_column || "content";
102
74
  const idCol = context.db_id_column || "id";
75
+ const contentCol = context.db_content_column || "content";
103
76
 
104
- if (context.MONGO_URI) {
105
- const rows = await this.queryMongo(table);
106
- return rows.map((r) => ({
107
- id: r._id?.toString(),
77
+ if (this.mongo) {
78
+ const rows = await this.mongo.db().collection(table).find({}).toArray();
79
+ return rows.map(r => ({
80
+ id: r._id.toString(),
108
81
  content: r[contentCol] || "",
109
82
  source: `mongodb:${table}`,
110
83
  }));
111
84
  }
112
85
 
113
- if (context.db_path) {
114
- const rows = await this.querySQLite(
115
- `SELECT ${idCol}, ${contentCol} FROM ${table}`
116
- );
117
- return rows.map((r) => ({
86
+ if (this.sqlite) {
87
+ const rows = this.sqlite
88
+ .prepare(`SELECT ${idCol}, ${contentCol} FROM ${table}`)
89
+ .all();
90
+ return rows.map(r => ({
118
91
  id: r[idCol],
119
92
  content: r[contentCol],
120
93
  source: `sqlite:${table}`,
121
94
  }));
122
95
  }
123
96
 
124
- if (context.POSTGRES_URL) {
125
- const rows = await this.queryPostgres(
97
+ if (this.pg) {
98
+ const res = await this.pg.query(
126
99
  `SELECT ${idCol}, ${contentCol} FROM ${table}`
127
100
  );
128
- return rows.map((r) => ({
101
+ return res.rows.map(r => ({
129
102
  id: r[idCol],
130
103
  content: r[contentCol],
131
104
  source: `postgres:${table}`,
@@ -140,8 +113,8 @@ class DatabaseAdapter {
140
113
 
141
114
  async function loadAllDocuments(context) {
142
115
  const docs = [];
143
- const db = new DatabaseAdapter();
144
116
 
117
+ const db = new DatabaseAdapter();
145
118
  try {
146
119
  await db.initialize(context);
147
120
  docs.push(...(await db.queryDocuments(context)));
@@ -152,15 +125,15 @@ async function loadAllDocuments(context) {
152
125
  : path.join(process.cwd(), "docs");
153
126
 
154
127
  if (fs.existsSync(baseDir)) {
155
- const files = fs
156
- .readdirSync(baseDir)
157
- .filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
128
+ const files = fs.readdirSync(baseDir).filter(f =>
129
+ f.endsWith(".txt") || f.endsWith(".md")
130
+ );
158
131
 
159
- for (const f of files) {
132
+ for (const file of files) {
160
133
  docs.push({
161
- id: f,
162
- content: fs.readFileSync(path.join(baseDir, f), "utf8"),
163
- source: `file:${f}`,
134
+ id: file,
135
+ content: fs.readFileSync(path.join(baseDir, file), "utf8"),
136
+ source: `file:${file}`,
164
137
  });
165
138
  }
166
139
  }
@@ -170,20 +143,18 @@ async function loadAllDocuments(context) {
170
143
 
171
144
  /* ---------------- HYBRID VECTOR SEARCH ---------------- */
172
145
 
173
- async function performHybridDocQA(query, context = {}) {
146
+ async function performHybridDocQA(query, context) {
174
147
  const cache = loadCache();
175
148
  const embedder = new LocalEmbedding({ dimension: 384 });
176
149
 
177
- const vectorStore = VectorRouter.create({
150
+ const store = VectorRouter.create({
178
151
  backend: context.vectorBackend || "memory",
179
152
  dimension: 384,
180
153
  ...context,
181
154
  });
182
155
 
183
156
  const documents = await loadAllDocuments(context);
184
- if (!documents.length) {
185
- return { text: "", meta: {} };
186
- }
157
+ if (!documents.length) return { text: "", meta: {} };
187
158
 
188
159
  for (const doc of documents) {
189
160
  if (!cache[doc.id]) {
@@ -192,7 +163,7 @@ async function performHybridDocQA(query, context = {}) {
192
163
 
193
164
  for (let i = 0; i < chunks.length; i++) {
194
165
  const vector = await embedder.embed(chunks[i]);
195
- await vectorStore.upsert({
166
+ await store.upsert({
196
167
  id: `${doc.id}:${i}`,
197
168
  vector,
198
169
  content: chunks[i],
@@ -205,11 +176,14 @@ async function performHybridDocQA(query, context = {}) {
205
176
  saveCache(cache);
206
177
 
207
178
  const queryVector = await embedder.embed(query);
208
- const results = await vectorStore.query(queryVector, 5);
179
+ const results = await store.search({
180
+ embedding: queryVector,
181
+ topK: 5,
182
+ });
209
183
 
210
184
  return {
211
185
  text: highlightMatches(
212
- results.map((r) => r.content).join("\n\n"),
186
+ results.map(r => r.content).join("\n\n"),
213
187
  extractKeywords(query)
214
188
  ),
215
189
  meta: { matches: results.length },
@@ -218,26 +192,23 @@ async function performHybridDocQA(query, context = {}) {
218
192
 
219
193
  /* ---------------- PGVECTOR SEARCH ---------------- */
220
194
 
221
- async function performPgVectorSearch(query, context = {}) {
222
- const adapter = new PgVectorAdapter({
223
- POSTGRES_URL: context.POSTGRES_URL,
224
- });
225
-
195
+ async function performPgVectorSearch(query, context) {
196
+ const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
226
197
  const embedder = new LocalEmbedding({ dimension: 384 });
227
- const vector = await embedder.embed(query);
228
- const results = await adapter.query(vector, 5);
229
198
 
199
+ const vector = await embedder.embed(query);
200
+ const results = await adapter.search(vector, 5);
230
201
  await adapter.close();
231
202
 
232
203
  return {
233
- text: results.map((r) => r.content).join("\n\n"),
204
+ text: results.map(r => r.content).join("\n\n"),
234
205
  meta: { matches: results.length },
235
206
  };
236
207
  }
237
208
 
238
209
  /* ---------------- ROUTER ---------------- */
239
210
 
240
- async function performDocQA(query, context = {}) {
211
+ async function performDocQA(query, context) {
241
212
  if (context.POSTGRES_URL) {
242
213
  return performPgVectorSearch(query, context);
243
214
  }
@@ -247,15 +218,16 @@ async function performDocQA(query, context = {}) {
247
218
  /* ---------------- O-LANG RESOLVER ---------------- */
248
219
 
249
220
  async function docSearchResolver(action, context) {
250
- if (action.startsWith("Ask doc-search")) {
251
- const match = action.match(/"(.*)"|'(.*)'/);
252
- const query = match
253
- ? match[1] || match[2]
254
- : action.replace("Ask doc-search", "").trim();
221
+ if (!action.startsWith("Ask doc-search")) return;
255
222
 
256
- return performDocQA(query, context);
257
- }
223
+ const match = action.match(/"(.*)"|'(.*)'/);
224
+ const query = match
225
+ ? match[1] || match[2]
226
+ : action.replace("Ask doc-search", "").trim();
227
+
228
+ return performDocQA(query, context);
258
229
  }
259
230
 
260
231
  docSearchResolver.resolverName = "doc-search";
261
232
  module.exports = docSearchResolver;
233
+