@o-lang/semantic-doc-search 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +161 -127
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
// doc-search.js
|
|
2
1
|
const fs = require("fs");
|
|
3
2
|
const path = require("path");
|
|
4
3
|
const { createLLM } = require("./llm/router.js");
|
|
@@ -14,29 +13,45 @@ const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
|
14
13
|
|
|
15
14
|
function safeResolve(base, userPath) {
|
|
16
15
|
const resolved = path.resolve(base, userPath);
|
|
17
|
-
if (!resolved.startsWith(path.resolve(base)))
|
|
16
|
+
if (!resolved.startsWith(path.resolve(base))) {
|
|
17
|
+
throw new Error("Path traversal detected");
|
|
18
|
+
}
|
|
18
19
|
return resolved;
|
|
19
20
|
}
|
|
20
21
|
|
|
21
22
|
function loadCache() {
|
|
22
23
|
try {
|
|
23
|
-
if (fs.existsSync(CACHE_PATH))
|
|
24
|
+
if (fs.existsSync(CACHE_PATH)) {
|
|
25
|
+
return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
|
|
26
|
+
}
|
|
24
27
|
} catch {}
|
|
25
28
|
return {};
|
|
26
29
|
}
|
|
27
30
|
|
|
28
31
|
function saveCache(cache) {
|
|
29
|
-
try {
|
|
32
|
+
try {
|
|
33
|
+
fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2));
|
|
34
|
+
} catch {}
|
|
30
35
|
}
|
|
31
36
|
|
|
32
|
-
|
|
37
|
+
/* ---------------- DATABASE ADAPTER ---------------- */
|
|
38
|
+
|
|
33
39
|
class DatabaseAdapter {
|
|
34
|
-
constructor() {
|
|
40
|
+
constructor() {
|
|
41
|
+
this.initialized = false;
|
|
42
|
+
}
|
|
43
|
+
|
|
35
44
|
async initialize(context) {
|
|
36
45
|
if (this.initialized) return;
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
46
|
+
|
|
47
|
+
if (context.db_type === "mongodb" || context.MONGO_URI) {
|
|
48
|
+
await this.initMongo(context);
|
|
49
|
+
} else if (context.db_type === "sqlite" || context.db_path) {
|
|
50
|
+
await this.initSQLite(context);
|
|
51
|
+
} else if (context.db_type === "postgres" || context.POSTGRES_URL) {
|
|
52
|
+
await this.initPostgres(context);
|
|
53
|
+
}
|
|
54
|
+
|
|
40
55
|
this.initialized = true;
|
|
41
56
|
}
|
|
42
57
|
|
|
@@ -44,183 +59,202 @@ class DatabaseAdapter {
|
|
|
44
59
|
const Database = require("better-sqlite3");
|
|
45
60
|
const dbPath = context.db_path || "./database.db";
|
|
46
61
|
const dbDir = path.dirname(path.resolve(dbPath));
|
|
47
|
-
if (!fs.existsSync(dbDir))
|
|
62
|
+
if (!fs.existsSync(dbDir)) {
|
|
63
|
+
throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
64
|
+
}
|
|
48
65
|
this.sqliteClient = new Database(dbPath, { readonly: true });
|
|
49
66
|
}
|
|
50
67
|
|
|
51
68
|
async querySQLite(query, params = []) {
|
|
52
|
-
if (!this.sqliteClient) throw new Error("SQLite client not initialized");
|
|
53
69
|
const stmt = this.sqliteClient.prepare(query);
|
|
54
70
|
return stmt.all(...params);
|
|
55
71
|
}
|
|
56
72
|
|
|
57
73
|
async initMongo(context) {
|
|
58
74
|
const { MongoClient } = require("mongodb");
|
|
59
|
-
const uri =
|
|
75
|
+
const uri =
|
|
76
|
+
context.MONGO_URI ||
|
|
77
|
+
`mongodb://localhost:27017/${context.db_name || "olang"}`;
|
|
60
78
|
this.mongoClient = new MongoClient(uri);
|
|
61
79
|
await this.mongoClient.connect();
|
|
62
80
|
}
|
|
63
81
|
|
|
64
82
|
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
return await db.collection(collectionName).find(filter, { projection }).toArray();
|
|
83
|
+
const db = this.mongoClient.db(process.env.DB_NAME || "olang");
|
|
84
|
+
return db.collection(collectionName).find(filter, { projection }).toArray();
|
|
68
85
|
}
|
|
69
86
|
|
|
70
87
|
async initPostgres(context) {
|
|
71
88
|
const { Pool } = require("pg");
|
|
72
|
-
|
|
89
|
+
this.postgresClient = new Pool({
|
|
73
90
|
connectionString: context.POSTGRES_URL,
|
|
74
|
-
host: context.DB_HOST || "localhost",
|
|
75
|
-
port: parseInt(context.DB_PORT) || 5432,
|
|
76
|
-
user: context.DB_USER,
|
|
77
|
-
password: context.DB_PASSWORD,
|
|
78
|
-
database: context.DB_NAME || "olang",
|
|
79
|
-
};
|
|
80
|
-
Object.keys(poolConfig).forEach((k) => {
|
|
81
|
-
if (poolConfig[k] === undefined || poolConfig[k] === null) delete poolConfig[k];
|
|
82
91
|
});
|
|
83
|
-
this.postgresClient = new Pool(poolConfig);
|
|
84
92
|
}
|
|
85
93
|
|
|
86
94
|
async queryPostgres(query, params = []) {
|
|
87
|
-
if (!this.postgresClient) throw new Error("PostgreSQL client not initialized");
|
|
88
95
|
const result = await this.postgresClient.query(query, params);
|
|
89
96
|
return result.rows;
|
|
90
97
|
}
|
|
91
98
|
|
|
92
99
|
async queryDocuments(context) {
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const { sql, params } = this.buildSqlQuery(context);
|
|
104
|
-
const results = await this.querySQLite(sql, params);
|
|
105
|
-
return results.map((row) => ({
|
|
106
|
-
id: row[db_id_column],
|
|
107
|
-
content: row[db_content_column],
|
|
108
|
-
source: `sqlite:${db_table}`,
|
|
109
|
-
}));
|
|
110
|
-
} else if (db_type === "postgres" || context.POSTGRES_URL) {
|
|
111
|
-
const { sql, params } = this.buildSqlQuery(context);
|
|
112
|
-
const results = await this.queryPostgres(sql, params);
|
|
113
|
-
return results.map((row) => ({
|
|
114
|
-
id: row[db_id_column],
|
|
115
|
-
content: row[db_content_column],
|
|
116
|
-
source: `postgres:${db_table}`,
|
|
100
|
+
const table = context.db_table || "documents";
|
|
101
|
+
const contentCol = context.db_content_column || "content";
|
|
102
|
+
const idCol = context.db_id_column || "id";
|
|
103
|
+
|
|
104
|
+
if (context.MONGO_URI) {
|
|
105
|
+
const rows = await this.queryMongo(table);
|
|
106
|
+
return rows.map((r) => ({
|
|
107
|
+
id: r._id?.toString(),
|
|
108
|
+
content: r[contentCol] || "",
|
|
109
|
+
source: `mongodb:${table}`,
|
|
117
110
|
}));
|
|
118
111
|
}
|
|
119
|
-
return [];
|
|
120
|
-
}
|
|
121
112
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
113
|
+
if (context.db_path) {
|
|
114
|
+
const rows = await this.querySQLite(
|
|
115
|
+
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
116
|
+
);
|
|
117
|
+
return rows.map((r) => ({
|
|
118
|
+
id: r[idCol],
|
|
119
|
+
content: r[contentCol],
|
|
120
|
+
source: `sqlite:${table}`,
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
131
123
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
124
|
+
if (context.POSTGRES_URL) {
|
|
125
|
+
const rows = await this.queryPostgres(
|
|
126
|
+
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
127
|
+
);
|
|
128
|
+
return rows.map((r) => ({
|
|
129
|
+
id: r[idCol],
|
|
130
|
+
content: r[contentCol],
|
|
131
|
+
source: `postgres:${table}`,
|
|
132
|
+
}));
|
|
137
133
|
}
|
|
138
|
-
const table = context.db_table || "documents";
|
|
139
|
-
const sql = `SELECT ${db_id_column}, ${db_content_column} FROM ${table} WHERE ${doc_where}`;
|
|
140
|
-
return { sql, params };
|
|
141
|
-
}
|
|
142
134
|
|
|
143
|
-
|
|
144
|
-
if (this.sqliteClient) { try { this.sqliteClient.close(); } catch {} this.sqliteClient = null; }
|
|
145
|
-
if (this.mongoClient) { try { await this.mongoClient.close(); } catch {} this.mongoClient = null; }
|
|
146
|
-
if (this.postgresClient) { try { await this.postgresClient.end(); } catch {} this.postgresClient = null; }
|
|
147
|
-
this.initialized = false;
|
|
135
|
+
return [];
|
|
148
136
|
}
|
|
149
137
|
}
|
|
150
138
|
|
|
151
|
-
|
|
152
|
-
async function loadDocumentsFromDatabase(context) {
|
|
153
|
-
if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) return null;
|
|
154
|
-
const dbAdapter = new DatabaseAdapter();
|
|
155
|
-
try { await dbAdapter.initialize(context); return await dbAdapter.queryDocuments(context); } catch (e) { console.error("🗃️ [doc-search] Database load error:", e.message); return null; }
|
|
156
|
-
}
|
|
139
|
+
/* ---------------- DOCUMENT LOADING ---------------- */
|
|
157
140
|
|
|
158
141
|
async function loadAllDocuments(context) {
|
|
159
|
-
const
|
|
160
|
-
const
|
|
161
|
-
|
|
142
|
+
const docs = [];
|
|
143
|
+
const db = new DatabaseAdapter();
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
await db.initialize(context);
|
|
147
|
+
docs.push(...(await db.queryDocuments(context)));
|
|
148
|
+
} catch {}
|
|
149
|
+
|
|
150
|
+
const baseDir = context.doc_root
|
|
151
|
+
? safeResolve(process.cwd(), context.doc_root)
|
|
152
|
+
: path.join(process.cwd(), "docs");
|
|
162
153
|
|
|
163
|
-
const baseDir = context.doc_root ? safeResolve(process.cwd(), context.doc_root) : path.join(process.cwd(), "docs");
|
|
164
154
|
if (fs.existsSync(baseDir)) {
|
|
165
|
-
const files = fs
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
155
|
+
const files = fs
|
|
156
|
+
.readdirSync(baseDir)
|
|
157
|
+
.filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
|
|
158
|
+
|
|
159
|
+
for (const f of files) {
|
|
160
|
+
docs.push({
|
|
161
|
+
id: f,
|
|
162
|
+
content: fs.readFileSync(path.join(baseDir, f), "utf8"),
|
|
163
|
+
source: `file:${f}`,
|
|
164
|
+
});
|
|
171
165
|
}
|
|
172
166
|
}
|
|
173
|
-
return documents;
|
|
174
|
-
}
|
|
175
167
|
|
|
176
|
-
|
|
177
|
-
async function checkPgVectorHasData(pgVectorAdapter) {
|
|
178
|
-
try { const result = await pgVectorAdapter.pool.query("SELECT COUNT(*) FROM doc_embeddings"); return parseInt(result.rows[0].count) > 0; } catch { return false; }
|
|
168
|
+
return docs;
|
|
179
169
|
}
|
|
180
170
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
const
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
171
|
+
/* ---------------- HYBRID VECTOR SEARCH ---------------- */
|
|
172
|
+
|
|
173
|
+
async function performHybridDocQA(query, context = {}) {
|
|
174
|
+
const cache = loadCache();
|
|
175
|
+
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
176
|
+
|
|
177
|
+
const vectorStore = VectorRouter.create({
|
|
178
|
+
backend: context.vectorBackend || "memory",
|
|
179
|
+
dimension: 384,
|
|
180
|
+
...context,
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
const documents = await loadAllDocuments(context);
|
|
184
|
+
if (!documents.length) {
|
|
185
|
+
return { text: "", meta: {} };
|
|
193
186
|
}
|
|
194
|
-
}
|
|
195
187
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
188
|
+
for (const doc of documents) {
|
|
189
|
+
if (!cache[doc.id]) {
|
|
190
|
+
cache[doc.id] = true;
|
|
191
|
+
const chunks = chunkText(doc.content, 500);
|
|
192
|
+
|
|
193
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
194
|
+
const vector = await embedder.embed(chunks[i]);
|
|
195
|
+
await vectorStore.upsert({
|
|
196
|
+
id: `${doc.id}:${i}`,
|
|
197
|
+
vector,
|
|
198
|
+
content: chunks[i],
|
|
199
|
+
source: doc.source,
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
207
203
|
}
|
|
204
|
+
|
|
205
|
+
saveCache(cache);
|
|
206
|
+
|
|
207
|
+
const queryVector = await embedder.embed(query);
|
|
208
|
+
const results = await vectorStore.query(queryVector, 5);
|
|
209
|
+
|
|
210
|
+
return {
|
|
211
|
+
text: highlightMatches(
|
|
212
|
+
results.map((r) => r.content).join("\n\n"),
|
|
213
|
+
extractKeywords(query)
|
|
214
|
+
),
|
|
215
|
+
meta: { matches: results.length },
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/* ---------------- PGVECTOR SEARCH ---------------- */
|
|
220
|
+
|
|
221
|
+
async function performPgVectorSearch(query, context = {}) {
|
|
222
|
+
const adapter = new PgVectorAdapter({
|
|
223
|
+
POSTGRES_URL: context.POSTGRES_URL,
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
227
|
+
const vector = await embedder.embed(query);
|
|
228
|
+
const results = await adapter.query(vector, 5);
|
|
229
|
+
|
|
230
|
+
await adapter.close();
|
|
231
|
+
|
|
232
|
+
return {
|
|
233
|
+
text: results.map((r) => r.content).join("\n\n"),
|
|
234
|
+
meta: { matches: results.length },
|
|
235
|
+
};
|
|
208
236
|
}
|
|
209
237
|
|
|
210
|
-
|
|
211
|
-
// [Keep performPgVectorSearch, performHybridDocQA, loadAllDocuments, chunking, cache logic identical to previous full file]
|
|
238
|
+
/* ---------------- ROUTER ---------------- */
|
|
212
239
|
|
|
213
240
|
async function performDocQA(query, context = {}) {
|
|
214
|
-
|
|
241
|
+
if (context.POSTGRES_URL) {
|
|
242
|
+
return performPgVectorSearch(query, context);
|
|
243
|
+
}
|
|
244
|
+
return performHybridDocQA(query, context);
|
|
215
245
|
}
|
|
216
246
|
|
|
247
|
+
/* ---------------- O-LANG RESOLVER ---------------- */
|
|
248
|
+
|
|
217
249
|
async function docSearchResolver(action, context) {
|
|
218
|
-
if (action.startsWith("Ask doc-search
|
|
250
|
+
if (action.startsWith("Ask doc-search")) {
|
|
219
251
|
const match = action.match(/"(.*)"|'(.*)'/);
|
|
220
|
-
const query = match
|
|
221
|
-
|
|
252
|
+
const query = match
|
|
253
|
+
? match[1] || match[2]
|
|
254
|
+
: action.replace("Ask doc-search", "").trim();
|
|
255
|
+
|
|
256
|
+
return performDocQA(query, context);
|
|
222
257
|
}
|
|
223
|
-
return undefined;
|
|
224
258
|
}
|
|
225
259
|
|
|
226
260
|
docSearchResolver.resolverName = "doc-search";
|