@o-lang/semantic-doc-search 1.0.21 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +85 -93
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -13,7 +13,9 @@ const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
|
13
13
|
|
|
14
14
|
function safeResolve(base, userPath) {
|
|
15
15
|
const resolved = path.resolve(base, userPath);
|
|
16
|
-
if (!resolved.startsWith(path.resolve(base)))
|
|
16
|
+
if (!resolved.startsWith(path.resolve(base))) {
|
|
17
|
+
throw new Error("Path traversal detected");
|
|
18
|
+
}
|
|
17
19
|
return resolved;
|
|
18
20
|
}
|
|
19
21
|
|
|
@@ -32,7 +34,8 @@ function saveCache(cache) {
|
|
|
32
34
|
} catch {}
|
|
33
35
|
}
|
|
34
36
|
|
|
35
|
-
|
|
37
|
+
/* ---------------- DATABASE ADAPTER ---------------- */
|
|
38
|
+
|
|
36
39
|
class DatabaseAdapter {
|
|
37
40
|
constructor() {
|
|
38
41
|
this.initialized = false;
|
|
@@ -56,115 +59,85 @@ class DatabaseAdapter {
|
|
|
56
59
|
const Database = require("better-sqlite3");
|
|
57
60
|
const dbPath = context.db_path || "./database.db";
|
|
58
61
|
const dbDir = path.dirname(path.resolve(dbPath));
|
|
59
|
-
if (!fs.existsSync(dbDir))
|
|
62
|
+
if (!fs.existsSync(dbDir)) {
|
|
63
|
+
throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
64
|
+
}
|
|
60
65
|
this.sqliteClient = new Database(dbPath, { readonly: true });
|
|
61
66
|
}
|
|
62
67
|
|
|
63
68
|
async querySQLite(query, params = []) {
|
|
64
|
-
if (!this.sqliteClient) throw new Error("SQLite client not initialized");
|
|
65
69
|
const stmt = this.sqliteClient.prepare(query);
|
|
66
70
|
return stmt.all(...params);
|
|
67
71
|
}
|
|
68
72
|
|
|
69
73
|
async initMongo(context) {
|
|
70
74
|
const { MongoClient } = require("mongodb");
|
|
71
|
-
const uri =
|
|
75
|
+
const uri =
|
|
76
|
+
context.MONGO_URI ||
|
|
77
|
+
`mongodb://localhost:27017/${context.db_name || "olang"}`;
|
|
72
78
|
this.mongoClient = new MongoClient(uri);
|
|
73
79
|
await this.mongoClient.connect();
|
|
74
80
|
}
|
|
75
81
|
|
|
76
82
|
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
77
|
-
if (!this.mongoClient) throw new Error("MongoDB client not initialized");
|
|
78
83
|
const db = this.mongoClient.db(process.env.DB_NAME || "olang");
|
|
79
84
|
return db.collection(collectionName).find(filter, { projection }).toArray();
|
|
80
85
|
}
|
|
81
86
|
|
|
82
87
|
async initPostgres(context) {
|
|
83
88
|
const { Pool } = require("pg");
|
|
84
|
-
|
|
89
|
+
this.postgresClient = new Pool({
|
|
85
90
|
connectionString: context.POSTGRES_URL,
|
|
86
|
-
host: context.DB_HOST || "localhost",
|
|
87
|
-
port: parseInt(context.DB_PORT) || 5432,
|
|
88
|
-
user: context.DB_USER,
|
|
89
|
-
password: context.DB_PASSWORD,
|
|
90
|
-
database: context.DB_NAME || "olang",
|
|
91
|
-
};
|
|
92
|
-
Object.keys(poolConfig).forEach((k) => {
|
|
93
|
-
if (poolConfig[k] == null) delete poolConfig[k];
|
|
94
91
|
});
|
|
95
|
-
this.postgresClient = new Pool(poolConfig);
|
|
96
92
|
}
|
|
97
93
|
|
|
98
94
|
async queryPostgres(query, params = []) {
|
|
99
|
-
if (!this.postgresClient) throw new Error("PostgreSQL client not initialized");
|
|
100
95
|
const result = await this.postgresClient.query(query, params);
|
|
101
96
|
return result.rows;
|
|
102
97
|
}
|
|
103
98
|
|
|
104
99
|
async queryDocuments(context) {
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return results.map((doc) => ({
|
|
116
|
-
id: doc._id?.toString() || doc[db_id_column],
|
|
117
|
-
content: doc[db_content_column] || "",
|
|
118
|
-
source: `mongodb:${db_table}`,
|
|
100
|
+
const table = context.db_table || "documents";
|
|
101
|
+
const contentCol = context.db_content_column || "content";
|
|
102
|
+
const idCol = context.db_id_column || "id";
|
|
103
|
+
|
|
104
|
+
if (context.MONGO_URI) {
|
|
105
|
+
const rows = await this.queryMongo(table);
|
|
106
|
+
return rows.map((r) => ({
|
|
107
|
+
id: r._id?.toString(),
|
|
108
|
+
content: r[contentCol] || "",
|
|
109
|
+
source: `mongodb:${table}`,
|
|
119
110
|
}));
|
|
120
111
|
}
|
|
121
112
|
|
|
122
|
-
if (
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
113
|
+
if (context.db_path) {
|
|
114
|
+
const rows = await this.querySQLite(
|
|
115
|
+
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
116
|
+
);
|
|
117
|
+
return rows.map((r) => ({
|
|
118
|
+
id: r[idCol],
|
|
119
|
+
content: r[contentCol],
|
|
120
|
+
source: `sqlite:${table}`,
|
|
129
121
|
}));
|
|
130
122
|
}
|
|
131
123
|
|
|
132
|
-
if (
|
|
133
|
-
const
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
124
|
+
if (context.POSTGRES_URL) {
|
|
125
|
+
const rows = await this.queryPostgres(
|
|
126
|
+
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
127
|
+
);
|
|
128
|
+
return rows.map((r) => ({
|
|
129
|
+
id: r[idCol],
|
|
130
|
+
content: r[contentCol],
|
|
131
|
+
source: `postgres:${table}`,
|
|
139
132
|
}));
|
|
140
133
|
}
|
|
141
134
|
|
|
142
135
|
return [];
|
|
143
136
|
}
|
|
144
|
-
|
|
145
|
-
buildMongoQuery(context) {
|
|
146
|
-
let filter = {};
|
|
147
|
-
if (typeof context.doc_filter === "string") {
|
|
148
|
-
try {
|
|
149
|
-
filter = JSON.parse(context.doc_filter);
|
|
150
|
-
} catch {
|
|
151
|
-
filter = { $text: { $search: context.doc_filter } };
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
return { filter, projection: {} };
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
buildSqlQuery(context) {
|
|
158
|
-
const table = context.db_table || "documents";
|
|
159
|
-
const where = context.doc_where || "1=1";
|
|
160
|
-
return {
|
|
161
|
-
sql: `SELECT * FROM ${table} WHERE ${where}`,
|
|
162
|
-
params: [],
|
|
163
|
-
};
|
|
164
|
-
}
|
|
165
137
|
}
|
|
166
138
|
|
|
167
|
-
|
|
139
|
+
/* ---------------- DOCUMENT LOADING ---------------- */
|
|
140
|
+
|
|
168
141
|
async function loadAllDocuments(context) {
|
|
169
142
|
const docs = [];
|
|
170
143
|
const db = new DatabaseAdapter();
|
|
@@ -179,43 +152,52 @@ async function loadAllDocuments(context) {
|
|
|
179
152
|
: path.join(process.cwd(), "docs");
|
|
180
153
|
|
|
181
154
|
if (fs.existsSync(baseDir)) {
|
|
182
|
-
const files = fs
|
|
155
|
+
const files = fs
|
|
156
|
+
.readdirSync(baseDir)
|
|
157
|
+
.filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
|
|
158
|
+
|
|
183
159
|
for (const f of files) {
|
|
184
|
-
|
|
185
|
-
|
|
160
|
+
docs.push({
|
|
161
|
+
id: f,
|
|
162
|
+
content: fs.readFileSync(path.join(baseDir, f), "utf8"),
|
|
163
|
+
source: `file:${f}`,
|
|
164
|
+
});
|
|
186
165
|
}
|
|
187
166
|
}
|
|
188
167
|
|
|
189
168
|
return docs;
|
|
190
169
|
}
|
|
191
170
|
|
|
192
|
-
|
|
171
|
+
/* ---------------- HYBRID VECTOR SEARCH ---------------- */
|
|
172
|
+
|
|
193
173
|
async function performHybridDocQA(query, context = {}) {
|
|
194
174
|
const cache = loadCache();
|
|
195
175
|
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
196
176
|
|
|
197
|
-
const
|
|
177
|
+
const vectorStore = VectorRouter.create({
|
|
198
178
|
backend: context.vectorBackend || "memory",
|
|
199
179
|
dimension: 384,
|
|
200
180
|
...context,
|
|
201
181
|
});
|
|
202
182
|
|
|
203
183
|
const documents = await loadAllDocuments(context);
|
|
204
|
-
if (!documents.length)
|
|
184
|
+
if (!documents.length) {
|
|
185
|
+
return { text: "", meta: {} };
|
|
186
|
+
}
|
|
205
187
|
|
|
206
188
|
for (const doc of documents) {
|
|
207
189
|
if (!cache[doc.id]) {
|
|
190
|
+
cache[doc.id] = true;
|
|
208
191
|
const chunks = chunkText(doc.content, 500);
|
|
209
|
-
|
|
210
|
-
for (
|
|
211
|
-
const vector = await embedder.embed(
|
|
212
|
-
await
|
|
213
|
-
id: `${doc.id}:${
|
|
192
|
+
|
|
193
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
194
|
+
const vector = await embedder.embed(chunks[i]);
|
|
195
|
+
await vectorStore.upsert({
|
|
196
|
+
id: `${doc.id}:${i}`,
|
|
214
197
|
vector,
|
|
215
|
-
|
|
198
|
+
content: chunks[i],
|
|
216
199
|
source: doc.source,
|
|
217
200
|
});
|
|
218
|
-
cache[doc.id].push(vector);
|
|
219
201
|
}
|
|
220
202
|
}
|
|
221
203
|
}
|
|
@@ -223,44 +205,54 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
223
205
|
saveCache(cache);
|
|
224
206
|
|
|
225
207
|
const queryVector = await embedder.embed(query);
|
|
226
|
-
const results = await
|
|
208
|
+
const results = await vectorStore.query(queryVector, 5);
|
|
227
209
|
|
|
228
210
|
return {
|
|
229
211
|
text: highlightMatches(
|
|
230
|
-
results.map((r) => r.
|
|
212
|
+
results.map((r) => r.content).join("\n\n"),
|
|
231
213
|
extractKeywords(query)
|
|
232
214
|
),
|
|
233
215
|
meta: { matches: results.length },
|
|
234
216
|
};
|
|
235
217
|
}
|
|
236
218
|
|
|
237
|
-
|
|
219
|
+
/* ---------------- PGVECTOR SEARCH ---------------- */
|
|
220
|
+
|
|
238
221
|
async function performPgVectorSearch(query, context = {}) {
|
|
239
|
-
const adapter = new PgVectorAdapter({
|
|
222
|
+
const adapter = new PgVectorAdapter({
|
|
223
|
+
POSTGRES_URL: context.POSTGRES_URL,
|
|
224
|
+
});
|
|
225
|
+
|
|
240
226
|
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
241
227
|
const vector = await embedder.embed(query);
|
|
242
|
-
const results = await adapter.
|
|
228
|
+
const results = await adapter.query(vector, 5);
|
|
229
|
+
|
|
230
|
+
await adapter.close();
|
|
231
|
+
|
|
243
232
|
return {
|
|
244
233
|
text: results.map((r) => r.content).join("\n\n"),
|
|
245
234
|
meta: { matches: results.length },
|
|
246
235
|
};
|
|
247
236
|
}
|
|
248
237
|
|
|
249
|
-
|
|
250
|
-
async function performVectorQA(query, context = {}) {
|
|
251
|
-
if (context.POSTGRES_URL) return performPgVectorSearch(query, context);
|
|
252
|
-
return performHybridDocQA(query, context);
|
|
253
|
-
}
|
|
238
|
+
/* ---------------- ROUTER ---------------- */
|
|
254
239
|
|
|
255
240
|
async function performDocQA(query, context = {}) {
|
|
256
|
-
|
|
241
|
+
if (context.POSTGRES_URL) {
|
|
242
|
+
return performPgVectorSearch(query, context);
|
|
243
|
+
}
|
|
244
|
+
return performHybridDocQA(query, context);
|
|
257
245
|
}
|
|
258
246
|
|
|
259
|
-
|
|
247
|
+
/* ---------------- O-LANG RESOLVER ---------------- */
|
|
248
|
+
|
|
260
249
|
async function docSearchResolver(action, context) {
|
|
261
250
|
if (action.startsWith("Ask doc-search")) {
|
|
262
251
|
const match = action.match(/"(.*)"|'(.*)'/);
|
|
263
|
-
const query = match
|
|
252
|
+
const query = match
|
|
253
|
+
? match[1] || match[2]
|
|
254
|
+
: action.replace("Ask doc-search", "").trim();
|
|
255
|
+
|
|
264
256
|
return performDocQA(query, context);
|
|
265
257
|
}
|
|
266
258
|
}
|