@o-lang/semantic-doc-search 1.0.18 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/adapters/vectorRouter.js +16 -17
- package/src/index.js +128 -86
package/package.json
CHANGED
|
@@ -1,31 +1,30 @@
|
|
|
1
|
-
const PgVectorAdapter = require("./pgvectorAdapter");
|
|
2
|
-
const PineconeAdapter = require("./pineconeAdapter");
|
|
3
|
-
const RedisAdapter = require("./redisAdapter");
|
|
4
|
-
const InMemoryAdapter = require("./inMemoryAdapter");
|
|
5
|
-
|
|
6
1
|
class VectorRouter {
|
|
7
2
|
static create(config = {}) {
|
|
8
|
-
const backend = config.backend;
|
|
9
|
-
|
|
10
|
-
if (!backend) {
|
|
11
|
-
throw new Error("Vector backend not specified");
|
|
12
|
-
}
|
|
3
|
+
const backend = config.backend || "pgvector";
|
|
13
4
|
|
|
14
5
|
switch (backend) {
|
|
15
|
-
case "pgvector":
|
|
6
|
+
case "pgvector": {
|
|
7
|
+
const PgVectorAdapter = require("./pgvectorAdapter");
|
|
16
8
|
return new PgVectorAdapter(config);
|
|
9
|
+
}
|
|
17
10
|
|
|
18
|
-
case "
|
|
19
|
-
|
|
11
|
+
case "memory": {
|
|
12
|
+
const InMemoryAdapter = require("./inMemoryAdapter");
|
|
13
|
+
return new InMemoryAdapter(config);
|
|
14
|
+
}
|
|
20
15
|
|
|
21
|
-
case "redis":
|
|
16
|
+
case "redis": {
|
|
17
|
+
const RedisAdapter = require("./redisAdapter");
|
|
22
18
|
return new RedisAdapter(config);
|
|
19
|
+
}
|
|
23
20
|
|
|
24
|
-
case "
|
|
25
|
-
|
|
21
|
+
case "pinecone": {
|
|
22
|
+
const PineconeAdapter = require("./pineconeAdapter");
|
|
23
|
+
return new PineconeAdapter(config);
|
|
24
|
+
}
|
|
26
25
|
|
|
27
26
|
default:
|
|
28
|
-
throw new Error(`
|
|
27
|
+
throw new Error(`Unknown vector backend: ${backend}`);
|
|
29
28
|
}
|
|
30
29
|
}
|
|
31
30
|
}
|
package/src/index.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
// doc-search.js
|
|
2
1
|
const fs = require("fs");
|
|
3
2
|
const path = require("path");
|
|
4
3
|
const { createLLM } = require("./llm/router.js");
|
|
@@ -20,23 +19,36 @@ function safeResolve(base, userPath) {
|
|
|
20
19
|
|
|
21
20
|
function loadCache() {
|
|
22
21
|
try {
|
|
23
|
-
if (fs.existsSync(CACHE_PATH))
|
|
22
|
+
if (fs.existsSync(CACHE_PATH)) {
|
|
23
|
+
return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
|
|
24
|
+
}
|
|
24
25
|
} catch {}
|
|
25
26
|
return {};
|
|
26
27
|
}
|
|
27
28
|
|
|
28
29
|
function saveCache(cache) {
|
|
29
|
-
try {
|
|
30
|
+
try {
|
|
31
|
+
fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2));
|
|
32
|
+
} catch {}
|
|
30
33
|
}
|
|
31
34
|
|
|
32
35
|
// ------------------- DATABASE ADAPTER -------------------
|
|
33
36
|
class DatabaseAdapter {
|
|
34
|
-
constructor() {
|
|
37
|
+
constructor() {
|
|
38
|
+
this.initialized = false;
|
|
39
|
+
}
|
|
40
|
+
|
|
35
41
|
async initialize(context) {
|
|
36
42
|
if (this.initialized) return;
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
43
|
+
|
|
44
|
+
if (context.db_type === "mongodb" || context.MONGO_URI) {
|
|
45
|
+
await this.initMongo(context);
|
|
46
|
+
} else if (context.db_type === "sqlite" || context.db_path) {
|
|
47
|
+
await this.initSQLite(context);
|
|
48
|
+
} else if (context.db_type === "postgres" || context.POSTGRES_URL) {
|
|
49
|
+
await this.initPostgres(context);
|
|
50
|
+
}
|
|
51
|
+
|
|
40
52
|
this.initialized = true;
|
|
41
53
|
}
|
|
42
54
|
|
|
@@ -63,8 +75,8 @@ class DatabaseAdapter {
|
|
|
63
75
|
|
|
64
76
|
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
65
77
|
if (!this.mongoClient) throw new Error("MongoDB client not initialized");
|
|
66
|
-
const db = this.mongoClient.db(process.env.DB_NAME ||
|
|
67
|
-
return
|
|
78
|
+
const db = this.mongoClient.db(process.env.DB_NAME || "olang");
|
|
79
|
+
return db.collection(collectionName).find(filter, { projection }).toArray();
|
|
68
80
|
}
|
|
69
81
|
|
|
70
82
|
async initPostgres(context) {
|
|
@@ -78,7 +90,7 @@ class DatabaseAdapter {
|
|
|
78
90
|
database: context.DB_NAME || "olang",
|
|
79
91
|
};
|
|
80
92
|
Object.keys(poolConfig).forEach((k) => {
|
|
81
|
-
if (poolConfig[k]
|
|
93
|
+
if (poolConfig[k] == null) delete poolConfig[k];
|
|
82
94
|
});
|
|
83
95
|
this.postgresClient = new Pool(poolConfig);
|
|
84
96
|
}
|
|
@@ -90,16 +102,24 @@ class DatabaseAdapter {
|
|
|
90
102
|
}
|
|
91
103
|
|
|
92
104
|
async queryDocuments(context) {
|
|
93
|
-
const {
|
|
105
|
+
const {
|
|
106
|
+
db_type,
|
|
107
|
+
db_table = "documents",
|
|
108
|
+
db_content_column = "content",
|
|
109
|
+
db_id_column = "id",
|
|
110
|
+
} = context;
|
|
111
|
+
|
|
94
112
|
if (db_type === "mongodb" || context.MONGO_URI) {
|
|
95
113
|
const { filter, projection } = this.buildMongoQuery(context);
|
|
96
114
|
const results = await this.queryMongo(db_table, filter, projection);
|
|
97
115
|
return results.map((doc) => ({
|
|
98
|
-
id: doc._id?.toString() || doc
|
|
99
|
-
content: doc[db_content_column] ||
|
|
116
|
+
id: doc._id?.toString() || doc[db_id_column],
|
|
117
|
+
content: doc[db_content_column] || "",
|
|
100
118
|
source: `mongodb:${db_table}`,
|
|
101
119
|
}));
|
|
102
|
-
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (db_type === "sqlite" || context.db_path) {
|
|
103
123
|
const { sql, params } = this.buildSqlQuery(context);
|
|
104
124
|
const results = await this.querySQLite(sql, params);
|
|
105
125
|
return results.map((row) => ({
|
|
@@ -107,7 +127,9 @@ class DatabaseAdapter {
|
|
|
107
127
|
content: row[db_content_column],
|
|
108
128
|
source: `sqlite:${db_table}`,
|
|
109
129
|
}));
|
|
110
|
-
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (db_type === "postgres" || context.POSTGRES_URL) {
|
|
111
133
|
const { sql, params } = this.buildSqlQuery(context);
|
|
112
134
|
const results = await this.queryPostgres(sql, params);
|
|
113
135
|
return results.map((row) => ({
|
|
@@ -116,111 +138,131 @@ class DatabaseAdapter {
|
|
|
116
138
|
source: `postgres:${db_table}`,
|
|
117
139
|
}));
|
|
118
140
|
}
|
|
141
|
+
|
|
119
142
|
return [];
|
|
120
143
|
}
|
|
121
144
|
|
|
122
145
|
buildMongoQuery(context) {
|
|
123
|
-
const { doc_filter = {}, doc_projection = {} } = context;
|
|
124
146
|
let filter = {};
|
|
125
|
-
if (typeof doc_filter === "string") {
|
|
126
|
-
try {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
147
|
+
if (typeof context.doc_filter === "string") {
|
|
148
|
+
try {
|
|
149
|
+
filter = JSON.parse(context.doc_filter);
|
|
150
|
+
} catch {
|
|
151
|
+
filter = { $text: { $search: context.doc_filter } };
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return { filter, projection: {} };
|
|
130
155
|
}
|
|
131
156
|
|
|
132
157
|
buildSqlQuery(context) {
|
|
133
|
-
const { db_content_column = "content", db_id_column = "id", doc_where = "1=1", doc_params = [] } = context;
|
|
134
|
-
let params = doc_params;
|
|
135
|
-
if (typeof doc_params === "string") {
|
|
136
|
-
try { params = JSON.parse(doc_params); } catch { params = [doc_params]; }
|
|
137
|
-
}
|
|
138
158
|
const table = context.db_table || "documents";
|
|
139
|
-
const
|
|
140
|
-
return {
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if (this.sqliteClient) { try { this.sqliteClient.close(); } catch {} this.sqliteClient = null; }
|
|
145
|
-
if (this.mongoClient) { try { await this.mongoClient.close(); } catch {} this.mongoClient = null; }
|
|
146
|
-
if (this.postgresClient) { try { await this.postgresClient.end(); } catch {} this.postgresClient = null; }
|
|
147
|
-
this.initialized = false;
|
|
159
|
+
const where = context.doc_where || "1=1";
|
|
160
|
+
return {
|
|
161
|
+
sql: `SELECT * FROM ${table} WHERE ${where}`,
|
|
162
|
+
params: [],
|
|
163
|
+
};
|
|
148
164
|
}
|
|
149
165
|
}
|
|
150
166
|
|
|
151
167
|
// ------------------- DOCUMENT LOADING -------------------
|
|
152
|
-
async function loadDocumentsFromDatabase(context) {
|
|
153
|
-
if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) return null;
|
|
154
|
-
const dbAdapter = new DatabaseAdapter();
|
|
155
|
-
try { await dbAdapter.initialize(context); return await dbAdapter.queryDocuments(context); } catch (e) { console.error("🗃️ [doc-search] Database load error:", e.message); return null; }
|
|
156
|
-
}
|
|
157
|
-
|
|
158
168
|
async function loadAllDocuments(context) {
|
|
159
|
-
const
|
|
160
|
-
const
|
|
161
|
-
|
|
169
|
+
const docs = [];
|
|
170
|
+
const db = new DatabaseAdapter();
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
await db.initialize(context);
|
|
174
|
+
docs.push(...(await db.queryDocuments(context)));
|
|
175
|
+
} catch {}
|
|
176
|
+
|
|
177
|
+
const baseDir = context.doc_root
|
|
178
|
+
? safeResolve(process.cwd(), context.doc_root)
|
|
179
|
+
: path.join(process.cwd(), "docs");
|
|
162
180
|
|
|
163
|
-
const baseDir = context.doc_root ? safeResolve(process.cwd(), context.doc_root) : path.join(process.cwd(), "docs");
|
|
164
181
|
if (fs.existsSync(baseDir)) {
|
|
165
182
|
const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
|
|
166
|
-
for (const
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
documents.push({ id: file, content, source: `file:${file}` });
|
|
170
|
-
} catch (e) { console.warn(`⚠️ [doc-search] Failed to read file ${file}: ${e.message}`); }
|
|
183
|
+
for (const f of files) {
|
|
184
|
+
const content = fs.readFileSync(path.join(baseDir, f), "utf8");
|
|
185
|
+
docs.push({ id: f, content, source: `file:${f}` });
|
|
171
186
|
}
|
|
172
187
|
}
|
|
173
|
-
return documents;
|
|
174
|
-
}
|
|
175
188
|
|
|
176
|
-
|
|
177
|
-
async function checkPgVectorHasData(pgVectorAdapter) {
|
|
178
|
-
try { const result = await pgVectorAdapter.pool.query("SELECT COUNT(*) FROM doc_embeddings"); return parseInt(result.rows[0].count) > 0; } catch { return false; }
|
|
189
|
+
return docs;
|
|
179
190
|
}
|
|
180
191
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
const
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
192
|
+
// ------------------- HYBRID SEARCH (FIXED & DEFINED) -------------------
|
|
193
|
+
async function performHybridDocQA(query, context = {}) {
|
|
194
|
+
const cache = loadCache();
|
|
195
|
+
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
196
|
+
|
|
197
|
+
const router = VectorRouter.create({
|
|
198
|
+
backend: context.vectorBackend || "memory",
|
|
199
|
+
dimension: 384,
|
|
200
|
+
...context,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
const documents = await loadAllDocuments(context);
|
|
204
|
+
if (!documents.length) return { text: "", meta: {} };
|
|
205
|
+
|
|
206
|
+
for (const doc of documents) {
|
|
207
|
+
if (!cache[doc.id]) {
|
|
208
|
+
const chunks = chunkText(doc.content, 500);
|
|
209
|
+
cache[doc.id] = [];
|
|
210
|
+
for (const chunk of chunks) {
|
|
211
|
+
const vector = await embedder.embed(chunk);
|
|
212
|
+
await router.upsert({
|
|
213
|
+
id: `${doc.id}:${cache[doc.id].length}`,
|
|
214
|
+
vector,
|
|
215
|
+
text: chunk,
|
|
216
|
+
source: doc.source,
|
|
217
|
+
});
|
|
218
|
+
cache[doc.id].push(vector);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
193
221
|
}
|
|
222
|
+
|
|
223
|
+
saveCache(cache);
|
|
224
|
+
|
|
225
|
+
const queryVector = await embedder.embed(query);
|
|
226
|
+
const results = await router.search({ embedding: queryVector, topK: 5 });
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
text: highlightMatches(
|
|
230
|
+
results.map((r) => r.text).join("\n\n"),
|
|
231
|
+
extractKeywords(query)
|
|
232
|
+
),
|
|
233
|
+
meta: { matches: results.length },
|
|
234
|
+
};
|
|
194
235
|
}
|
|
195
236
|
|
|
196
|
-
// -------------------
|
|
197
|
-
async function
|
|
198
|
-
const
|
|
199
|
-
const
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
}
|
|
206
|
-
return await performHybridDocQA(query, context);
|
|
207
|
-
}
|
|
237
|
+
// ------------------- PGVECTOR SEARCH -------------------
|
|
238
|
+
async function performPgVectorSearch(query, context = {}) {
|
|
239
|
+
const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
|
|
240
|
+
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
241
|
+
const vector = await embedder.embed(query);
|
|
242
|
+
const results = await adapter.search(vector, 5);
|
|
243
|
+
return {
|
|
244
|
+
text: results.map((r) => r.content).join("\n\n"),
|
|
245
|
+
meta: { matches: results.length },
|
|
246
|
+
};
|
|
208
247
|
}
|
|
209
248
|
|
|
210
|
-
// -------------------
|
|
211
|
-
|
|
249
|
+
// ------------------- ROUTER -------------------
|
|
250
|
+
async function performVectorQA(query, context = {}) {
|
|
251
|
+
if (context.POSTGRES_URL) return performPgVectorSearch(query, context);
|
|
252
|
+
return performHybridDocQA(query, context);
|
|
253
|
+
}
|
|
212
254
|
|
|
213
255
|
async function performDocQA(query, context = {}) {
|
|
214
|
-
return
|
|
256
|
+
return performVectorQA(query, context);
|
|
215
257
|
}
|
|
216
258
|
|
|
259
|
+
// ------------------- RESOLVER -------------------
|
|
217
260
|
async function docSearchResolver(action, context) {
|
|
218
|
-
if (action.startsWith("Ask doc-search
|
|
261
|
+
if (action.startsWith("Ask doc-search")) {
|
|
219
262
|
const match = action.match(/"(.*)"|'(.*)'/);
|
|
220
|
-
const query = match ? match[1] || match[2] : action.replace(
|
|
221
|
-
return
|
|
263
|
+
const query = match ? match[1] || match[2] : action.replace("Ask doc-search", "").trim();
|
|
264
|
+
return performDocQA(query, context);
|
|
222
265
|
}
|
|
223
|
-
return undefined;
|
|
224
266
|
}
|
|
225
267
|
|
|
226
268
|
docSearchResolver.resolverName = "doc-search";
|