@o-lang/semantic-doc-search 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +94 -130
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
const fs = require("fs");
|
|
2
2
|
const path = require("path");
|
|
3
|
-
const { createLLM } = require("./llm/router.js");
|
|
4
3
|
const { LocalEmbedding } = require("./embeddings/local.js");
|
|
5
4
|
const { chunkText } = require("./utils/chunker.js");
|
|
6
5
|
const { extractKeywords } = require("./utils/extractText.js");
|
|
7
|
-
const { cosine } = require("./utils/similarity.js");
|
|
8
6
|
const { highlightMatches } = require("./utils/highlight.js");
|
|
9
|
-
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
|
|
10
7
|
const VectorRouter = require("./adapters/vectorRouter");
|
|
8
|
+
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
|
|
11
9
|
|
|
12
10
|
const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
13
11
|
|
|
12
|
+
/* ---------------- UTIL ---------------- */
|
|
13
|
+
|
|
14
14
|
function safeResolve(base, userPath) {
|
|
15
15
|
const resolved = path.resolve(base, userPath);
|
|
16
|
-
if (!resolved.startsWith(path.resolve(base)))
|
|
16
|
+
if (!resolved.startsWith(path.resolve(base))) {
|
|
17
|
+
throw new Error("Path traversal detected");
|
|
18
|
+
}
|
|
17
19
|
return resolved;
|
|
18
20
|
}
|
|
19
21
|
|
|
@@ -32,7 +34,8 @@ function saveCache(cache) {
|
|
|
32
34
|
} catch {}
|
|
33
35
|
}
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
/* ---------------- DATABASE ADAPTER ---------------- */
|
|
38
|
+
|
|
36
39
|
class DatabaseAdapter {
|
|
37
40
|
constructor() {
|
|
38
41
|
this.initialized = false;
|
|
@@ -41,13 +44,9 @@ class DatabaseAdapter {
|
|
|
41
44
|
async initialize(context) {
|
|
42
45
|
if (this.initialized) return;
|
|
43
46
|
|
|
44
|
-
if (context.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
await this.initSQLite(context);
|
|
48
|
-
} else if (context.db_type === "postgres" || context.POSTGRES_URL) {
|
|
49
|
-
await this.initPostgres(context);
|
|
50
|
-
}
|
|
47
|
+
if (context.MONGO_URI) await this.initMongo(context);
|
|
48
|
+
else if (context.db_path) await this.initSQLite(context);
|
|
49
|
+
else if (context.POSTGRES_URL) await this.initPostgres(context);
|
|
51
50
|
|
|
52
51
|
this.initialized = true;
|
|
53
52
|
}
|
|
@@ -55,120 +54,67 @@ class DatabaseAdapter {
|
|
|
55
54
|
async initSQLite(context) {
|
|
56
55
|
const Database = require("better-sqlite3");
|
|
57
56
|
const dbPath = context.db_path || "./database.db";
|
|
58
|
-
|
|
59
|
-
if (!fs.existsSync(dbDir)) throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
60
|
-
this.sqliteClient = new Database(dbPath, { readonly: true });
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
async querySQLite(query, params = []) {
|
|
64
|
-
if (!this.sqliteClient) throw new Error("SQLite client not initialized");
|
|
65
|
-
const stmt = this.sqliteClient.prepare(query);
|
|
66
|
-
return stmt.all(...params);
|
|
57
|
+
this.sqlite = new Database(dbPath, { readonly: true });
|
|
67
58
|
}
|
|
68
59
|
|
|
69
60
|
async initMongo(context) {
|
|
70
61
|
const { MongoClient } = require("mongodb");
|
|
71
|
-
const uri = context.MONGO_URI
|
|
72
|
-
this.
|
|
73
|
-
await this.
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
77
|
-
if (!this.mongoClient) throw new Error("MongoDB client not initialized");
|
|
78
|
-
const db = this.mongoClient.db(process.env.DB_NAME || "olang");
|
|
79
|
-
return db.collection(collectionName).find(filter, { projection }).toArray();
|
|
62
|
+
const uri = context.MONGO_URI;
|
|
63
|
+
this.mongo = new MongoClient(uri);
|
|
64
|
+
await this.mongo.connect();
|
|
80
65
|
}
|
|
81
66
|
|
|
82
67
|
async initPostgres(context) {
|
|
83
68
|
const { Pool } = require("pg");
|
|
84
|
-
|
|
85
|
-
connectionString: context.POSTGRES_URL,
|
|
86
|
-
host: context.DB_HOST || "localhost",
|
|
87
|
-
port: parseInt(context.DB_PORT) || 5432,
|
|
88
|
-
user: context.DB_USER,
|
|
89
|
-
password: context.DB_PASSWORD,
|
|
90
|
-
database: context.DB_NAME || "olang",
|
|
91
|
-
};
|
|
92
|
-
Object.keys(poolConfig).forEach((k) => {
|
|
93
|
-
if (poolConfig[k] == null) delete poolConfig[k];
|
|
94
|
-
});
|
|
95
|
-
this.postgresClient = new Pool(poolConfig);
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
async queryPostgres(query, params = []) {
|
|
99
|
-
if (!this.postgresClient) throw new Error("PostgreSQL client not initialized");
|
|
100
|
-
const result = await this.postgresClient.query(query, params);
|
|
101
|
-
return result.rows;
|
|
69
|
+
this.pg = new Pool({ connectionString: context.POSTGRES_URL });
|
|
102
70
|
}
|
|
103
71
|
|
|
104
72
|
async queryDocuments(context) {
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return results.map((doc) => ({
|
|
116
|
-
id: doc._id?.toString() || doc[db_id_column],
|
|
117
|
-
content: doc[db_content_column] || "",
|
|
118
|
-
source: `mongodb:${db_table}`,
|
|
73
|
+
const table = context.db_table || "documents";
|
|
74
|
+
const idCol = context.db_id_column || "id";
|
|
75
|
+
const contentCol = context.db_content_column || "content";
|
|
76
|
+
|
|
77
|
+
if (this.mongo) {
|
|
78
|
+
const rows = await this.mongo.db().collection(table).find({}).toArray();
|
|
79
|
+
return rows.map(r => ({
|
|
80
|
+
id: r._id.toString(),
|
|
81
|
+
content: r[contentCol] || "",
|
|
82
|
+
source: `mongodb:${table}`,
|
|
119
83
|
}));
|
|
120
84
|
}
|
|
121
85
|
|
|
122
|
-
if (
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
86
|
+
if (this.sqlite) {
|
|
87
|
+
const rows = this.sqlite
|
|
88
|
+
.prepare(`SELECT ${idCol}, ${contentCol} FROM ${table}`)
|
|
89
|
+
.all();
|
|
90
|
+
return rows.map(r => ({
|
|
91
|
+
id: r[idCol],
|
|
92
|
+
content: r[contentCol],
|
|
93
|
+
source: `sqlite:${table}`,
|
|
129
94
|
}));
|
|
130
95
|
}
|
|
131
96
|
|
|
132
|
-
if (
|
|
133
|
-
const
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
97
|
+
if (this.pg) {
|
|
98
|
+
const res = await this.pg.query(
|
|
99
|
+
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
100
|
+
);
|
|
101
|
+
return res.rows.map(r => ({
|
|
102
|
+
id: r[idCol],
|
|
103
|
+
content: r[contentCol],
|
|
104
|
+
source: `postgres:${table}`,
|
|
139
105
|
}));
|
|
140
106
|
}
|
|
141
107
|
|
|
142
108
|
return [];
|
|
143
109
|
}
|
|
144
|
-
|
|
145
|
-
buildMongoQuery(context) {
|
|
146
|
-
let filter = {};
|
|
147
|
-
if (typeof context.doc_filter === "string") {
|
|
148
|
-
try {
|
|
149
|
-
filter = JSON.parse(context.doc_filter);
|
|
150
|
-
} catch {
|
|
151
|
-
filter = { $text: { $search: context.doc_filter } };
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
return { filter, projection: {} };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
buildSqlQuery(context) {
|
|
158
|
-
const table = context.db_table || "documents";
|
|
159
|
-
const where = context.doc_where || "1=1";
|
|
160
|
-
return {
|
|
161
|
-
sql: `SELECT * FROM ${table} WHERE ${where}`,
|
|
162
|
-
params: [],
|
|
163
|
-
};
|
|
164
|
-
}
|
|
165
110
|
}
|
|
166
111
|
|
|
167
|
-
|
|
112
|
+
/* ---------------- DOCUMENT LOADING ---------------- */
|
|
113
|
+
|
|
168
114
|
async function loadAllDocuments(context) {
|
|
169
115
|
const docs = [];
|
|
170
|
-
const db = new DatabaseAdapter();
|
|
171
116
|
|
|
117
|
+
const db = new DatabaseAdapter();
|
|
172
118
|
try {
|
|
173
119
|
await db.initialize(context);
|
|
174
120
|
docs.push(...(await db.queryDocuments(context)));
|
|
@@ -179,22 +125,29 @@ async function loadAllDocuments(context) {
|
|
|
179
125
|
: path.join(process.cwd(), "docs");
|
|
180
126
|
|
|
181
127
|
if (fs.existsSync(baseDir)) {
|
|
182
|
-
const files = fs.readdirSync(baseDir).filter(
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
128
|
+
const files = fs.readdirSync(baseDir).filter(f =>
|
|
129
|
+
f.endsWith(".txt") || f.endsWith(".md")
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
for (const file of files) {
|
|
133
|
+
docs.push({
|
|
134
|
+
id: file,
|
|
135
|
+
content: fs.readFileSync(path.join(baseDir, file), "utf8"),
|
|
136
|
+
source: `file:${file}`,
|
|
137
|
+
});
|
|
186
138
|
}
|
|
187
139
|
}
|
|
188
140
|
|
|
189
141
|
return docs;
|
|
190
142
|
}
|
|
191
143
|
|
|
192
|
-
|
|
193
|
-
|
|
144
|
+
/* ---------------- HYBRID VECTOR SEARCH ---------------- */
|
|
145
|
+
|
|
146
|
+
async function performHybridDocQA(query, context) {
|
|
194
147
|
const cache = loadCache();
|
|
195
148
|
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
196
149
|
|
|
197
|
-
const
|
|
150
|
+
const store = VectorRouter.create({
|
|
198
151
|
backend: context.vectorBackend || "memory",
|
|
199
152
|
dimension: 384,
|
|
200
153
|
...context,
|
|
@@ -205,17 +158,17 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
205
158
|
|
|
206
159
|
for (const doc of documents) {
|
|
207
160
|
if (!cache[doc.id]) {
|
|
161
|
+
cache[doc.id] = true;
|
|
208
162
|
const chunks = chunkText(doc.content, 500);
|
|
209
|
-
|
|
210
|
-
for (
|
|
211
|
-
const vector = await embedder.embed(
|
|
212
|
-
await
|
|
213
|
-
id: `${doc.id}:${
|
|
163
|
+
|
|
164
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
165
|
+
const vector = await embedder.embed(chunks[i]);
|
|
166
|
+
await store.upsert({
|
|
167
|
+
id: `${doc.id}:${i}`,
|
|
214
168
|
vector,
|
|
215
|
-
|
|
169
|
+
content: chunks[i],
|
|
216
170
|
source: doc.source,
|
|
217
171
|
});
|
|
218
|
-
cache[doc.id].push(vector);
|
|
219
172
|
}
|
|
220
173
|
}
|
|
221
174
|
}
|
|
@@ -223,47 +176,58 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
223
176
|
saveCache(cache);
|
|
224
177
|
|
|
225
178
|
const queryVector = await embedder.embed(query);
|
|
226
|
-
const results = await
|
|
179
|
+
const results = await store.search({
|
|
180
|
+
embedding: queryVector,
|
|
181
|
+
topK: 5,
|
|
182
|
+
});
|
|
227
183
|
|
|
228
184
|
return {
|
|
229
185
|
text: highlightMatches(
|
|
230
|
-
results.map(
|
|
186
|
+
results.map(r => r.content).join("\n\n"),
|
|
231
187
|
extractKeywords(query)
|
|
232
188
|
),
|
|
233
189
|
meta: { matches: results.length },
|
|
234
190
|
};
|
|
235
191
|
}
|
|
236
192
|
|
|
237
|
-
|
|
238
|
-
|
|
193
|
+
/* ---------------- PGVECTOR SEARCH ---------------- */
|
|
194
|
+
|
|
195
|
+
async function performPgVectorSearch(query, context) {
|
|
239
196
|
const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
|
|
240
197
|
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
198
|
+
|
|
241
199
|
const vector = await embedder.embed(query);
|
|
242
200
|
const results = await adapter.search(vector, 5);
|
|
201
|
+
await adapter.close();
|
|
202
|
+
|
|
243
203
|
return {
|
|
244
|
-
text: results.map(
|
|
204
|
+
text: results.map(r => r.content).join("\n\n"),
|
|
245
205
|
meta: { matches: results.length },
|
|
246
206
|
};
|
|
247
207
|
}
|
|
248
208
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
209
|
+
/* ---------------- ROUTER ---------------- */
|
|
210
|
+
|
|
211
|
+
async function performDocQA(query, context) {
|
|
212
|
+
if (context.POSTGRES_URL) {
|
|
213
|
+
return performPgVectorSearch(query, context);
|
|
214
|
+
}
|
|
252
215
|
return performHybridDocQA(query, context);
|
|
253
216
|
}
|
|
254
217
|
|
|
255
|
-
|
|
256
|
-
return performVectorQA(query, context);
|
|
257
|
-
}
|
|
218
|
+
/* ---------------- O-LANG RESOLVER ---------------- */
|
|
258
219
|
|
|
259
|
-
// ------------------- RESOLVER -------------------
|
|
260
220
|
async function docSearchResolver(action, context) {
|
|
261
|
-
if (action.startsWith("Ask doc-search"))
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
221
|
+
if (!action.startsWith("Ask doc-search")) return;
|
|
222
|
+
|
|
223
|
+
const match = action.match(/"(.*)"|'(.*)'/);
|
|
224
|
+
const query = match
|
|
225
|
+
? match[1] || match[2]
|
|
226
|
+
: action.replace("Ask doc-search", "").trim();
|
|
227
|
+
|
|
228
|
+
return performDocQA(query, context);
|
|
266
229
|
}
|
|
267
230
|
|
|
268
231
|
docSearchResolver.resolverName = "doc-search";
|
|
269
232
|
module.exports = docSearchResolver;
|
|
233
|
+
|