@o-lang/semantic-doc-search 1.0.22 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +55 -83
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
const fs = require("fs");
|
|
2
2
|
const path = require("path");
|
|
3
|
-
const { createLLM } = require("./llm/router.js");
|
|
4
3
|
const { LocalEmbedding } = require("./embeddings/local.js");
|
|
5
4
|
const { chunkText } = require("./utils/chunker.js");
|
|
6
5
|
const { extractKeywords } = require("./utils/extractText.js");
|
|
7
|
-
const { cosine } = require("./utils/similarity.js");
|
|
8
6
|
const { highlightMatches } = require("./utils/highlight.js");
|
|
9
|
-
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
|
|
10
7
|
const VectorRouter = require("./adapters/vectorRouter");
|
|
8
|
+
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
|
|
11
9
|
|
|
12
10
|
const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
13
11
|
|
|
12
|
+
/* ---------------- UTIL ---------------- */
|
|
13
|
+
|
|
14
14
|
function safeResolve(base, userPath) {
|
|
15
15
|
const resolved = path.resolve(base, userPath);
|
|
16
16
|
if (!resolved.startsWith(path.resolve(base))) {
|
|
@@ -44,13 +44,9 @@ class DatabaseAdapter {
|
|
|
44
44
|
async initialize(context) {
|
|
45
45
|
if (this.initialized) return;
|
|
46
46
|
|
|
47
|
-
if (context.
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
await this.initSQLite(context);
|
|
51
|
-
} else if (context.db_type === "postgres" || context.POSTGRES_URL) {
|
|
52
|
-
await this.initPostgres(context);
|
|
53
|
-
}
|
|
47
|
+
if (context.MONGO_URI) await this.initMongo(context);
|
|
48
|
+
else if (context.db_path) await this.initSQLite(context);
|
|
49
|
+
else if (context.POSTGRES_URL) await this.initPostgres(context);
|
|
54
50
|
|
|
55
51
|
this.initialized = true;
|
|
56
52
|
}
|
|
@@ -58,74 +54,51 @@ class DatabaseAdapter {
|
|
|
58
54
|
async initSQLite(context) {
|
|
59
55
|
const Database = require("better-sqlite3");
|
|
60
56
|
const dbPath = context.db_path || "./database.db";
|
|
61
|
-
|
|
62
|
-
if (!fs.existsSync(dbDir)) {
|
|
63
|
-
throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
64
|
-
}
|
|
65
|
-
this.sqliteClient = new Database(dbPath, { readonly: true });
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
async querySQLite(query, params = []) {
|
|
69
|
-
const stmt = this.sqliteClient.prepare(query);
|
|
70
|
-
return stmt.all(...params);
|
|
57
|
+
this.sqlite = new Database(dbPath, { readonly: true });
|
|
71
58
|
}
|
|
72
59
|
|
|
73
60
|
async initMongo(context) {
|
|
74
61
|
const { MongoClient } = require("mongodb");
|
|
75
|
-
const uri =
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
this.mongoClient = new MongoClient(uri);
|
|
79
|
-
await this.mongoClient.connect();
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
83
|
-
const db = this.mongoClient.db(process.env.DB_NAME || "olang");
|
|
84
|
-
return db.collection(collectionName).find(filter, { projection }).toArray();
|
|
62
|
+
const uri = context.MONGO_URI;
|
|
63
|
+
this.mongo = new MongoClient(uri);
|
|
64
|
+
await this.mongo.connect();
|
|
85
65
|
}
|
|
86
66
|
|
|
87
67
|
async initPostgres(context) {
|
|
88
68
|
const { Pool } = require("pg");
|
|
89
|
-
this.
|
|
90
|
-
connectionString: context.POSTGRES_URL,
|
|
91
|
-
});
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
async queryPostgres(query, params = []) {
|
|
95
|
-
const result = await this.postgresClient.query(query, params);
|
|
96
|
-
return result.rows;
|
|
69
|
+
this.pg = new Pool({ connectionString: context.POSTGRES_URL });
|
|
97
70
|
}
|
|
98
71
|
|
|
99
72
|
async queryDocuments(context) {
|
|
100
73
|
const table = context.db_table || "documents";
|
|
101
|
-
const contentCol = context.db_content_column || "content";
|
|
102
74
|
const idCol = context.db_id_column || "id";
|
|
75
|
+
const contentCol = context.db_content_column || "content";
|
|
103
76
|
|
|
104
|
-
if (
|
|
105
|
-
const rows = await this.
|
|
106
|
-
return rows.map(
|
|
107
|
-
id: r._id
|
|
77
|
+
if (this.mongo) {
|
|
78
|
+
const rows = await this.mongo.db().collection(table).find({}).toArray();
|
|
79
|
+
return rows.map(r => ({
|
|
80
|
+
id: r._id.toString(),
|
|
108
81
|
content: r[contentCol] || "",
|
|
109
82
|
source: `mongodb:${table}`,
|
|
110
83
|
}));
|
|
111
84
|
}
|
|
112
85
|
|
|
113
|
-
if (
|
|
114
|
-
const rows =
|
|
115
|
-
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
116
|
-
|
|
117
|
-
return rows.map(
|
|
86
|
+
if (this.sqlite) {
|
|
87
|
+
const rows = this.sqlite
|
|
88
|
+
.prepare(`SELECT ${idCol}, ${contentCol} FROM ${table}`)
|
|
89
|
+
.all();
|
|
90
|
+
return rows.map(r => ({
|
|
118
91
|
id: r[idCol],
|
|
119
92
|
content: r[contentCol],
|
|
120
93
|
source: `sqlite:${table}`,
|
|
121
94
|
}));
|
|
122
95
|
}
|
|
123
96
|
|
|
124
|
-
if (
|
|
125
|
-
const
|
|
97
|
+
if (this.pg) {
|
|
98
|
+
const res = await this.pg.query(
|
|
126
99
|
`SELECT ${idCol}, ${contentCol} FROM ${table}`
|
|
127
100
|
);
|
|
128
|
-
return rows.map(
|
|
101
|
+
return res.rows.map(r => ({
|
|
129
102
|
id: r[idCol],
|
|
130
103
|
content: r[contentCol],
|
|
131
104
|
source: `postgres:${table}`,
|
|
@@ -140,8 +113,8 @@ class DatabaseAdapter {
|
|
|
140
113
|
|
|
141
114
|
async function loadAllDocuments(context) {
|
|
142
115
|
const docs = [];
|
|
143
|
-
const db = new DatabaseAdapter();
|
|
144
116
|
|
|
117
|
+
const db = new DatabaseAdapter();
|
|
145
118
|
try {
|
|
146
119
|
await db.initialize(context);
|
|
147
120
|
docs.push(...(await db.queryDocuments(context)));
|
|
@@ -152,15 +125,15 @@ async function loadAllDocuments(context) {
|
|
|
152
125
|
: path.join(process.cwd(), "docs");
|
|
153
126
|
|
|
154
127
|
if (fs.existsSync(baseDir)) {
|
|
155
|
-
const files = fs
|
|
156
|
-
.
|
|
157
|
-
|
|
128
|
+
const files = fs.readdirSync(baseDir).filter(f =>
|
|
129
|
+
f.endsWith(".txt") || f.endsWith(".md")
|
|
130
|
+
);
|
|
158
131
|
|
|
159
|
-
for (const
|
|
132
|
+
for (const file of files) {
|
|
160
133
|
docs.push({
|
|
161
|
-
id:
|
|
162
|
-
content: fs.readFileSync(path.join(baseDir,
|
|
163
|
-
source: `file:${
|
|
134
|
+
id: file,
|
|
135
|
+
content: fs.readFileSync(path.join(baseDir, file), "utf8"),
|
|
136
|
+
source: `file:${file}`,
|
|
164
137
|
});
|
|
165
138
|
}
|
|
166
139
|
}
|
|
@@ -170,20 +143,18 @@ async function loadAllDocuments(context) {
|
|
|
170
143
|
|
|
171
144
|
/* ---------------- HYBRID VECTOR SEARCH ---------------- */
|
|
172
145
|
|
|
173
|
-
async function performHybridDocQA(query, context
|
|
146
|
+
async function performHybridDocQA(query, context) {
|
|
174
147
|
const cache = loadCache();
|
|
175
148
|
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
176
149
|
|
|
177
|
-
const
|
|
150
|
+
const store = VectorRouter.create({
|
|
178
151
|
backend: context.vectorBackend || "memory",
|
|
179
152
|
dimension: 384,
|
|
180
153
|
...context,
|
|
181
154
|
});
|
|
182
155
|
|
|
183
156
|
const documents = await loadAllDocuments(context);
|
|
184
|
-
if (!documents.length) {
|
|
185
|
-
return { text: "", meta: {} };
|
|
186
|
-
}
|
|
157
|
+
if (!documents.length) return { text: "", meta: {} };
|
|
187
158
|
|
|
188
159
|
for (const doc of documents) {
|
|
189
160
|
if (!cache[doc.id]) {
|
|
@@ -192,7 +163,7 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
192
163
|
|
|
193
164
|
for (let i = 0; i < chunks.length; i++) {
|
|
194
165
|
const vector = await embedder.embed(chunks[i]);
|
|
195
|
-
await
|
|
166
|
+
await store.upsert({
|
|
196
167
|
id: `${doc.id}:${i}`,
|
|
197
168
|
vector,
|
|
198
169
|
content: chunks[i],
|
|
@@ -205,11 +176,14 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
205
176
|
saveCache(cache);
|
|
206
177
|
|
|
207
178
|
const queryVector = await embedder.embed(query);
|
|
208
|
-
const results = await
|
|
179
|
+
const results = await store.search({
|
|
180
|
+
embedding: queryVector,
|
|
181
|
+
topK: 5,
|
|
182
|
+
});
|
|
209
183
|
|
|
210
184
|
return {
|
|
211
185
|
text: highlightMatches(
|
|
212
|
-
results.map(
|
|
186
|
+
results.map(r => r.content).join("\n\n"),
|
|
213
187
|
extractKeywords(query)
|
|
214
188
|
),
|
|
215
189
|
meta: { matches: results.length },
|
|
@@ -218,26 +192,23 @@ async function performHybridDocQA(query, context = {}) {
|
|
|
218
192
|
|
|
219
193
|
/* ---------------- PGVECTOR SEARCH ---------------- */
|
|
220
194
|
|
|
221
|
-
async function performPgVectorSearch(query, context
|
|
222
|
-
const adapter = new PgVectorAdapter({
|
|
223
|
-
POSTGRES_URL: context.POSTGRES_URL,
|
|
224
|
-
});
|
|
225
|
-
|
|
195
|
+
async function performPgVectorSearch(query, context) {
|
|
196
|
+
const adapter = new PgVectorAdapter({ POSTGRES_URL: context.POSTGRES_URL });
|
|
226
197
|
const embedder = new LocalEmbedding({ dimension: 384 });
|
|
227
|
-
const vector = await embedder.embed(query);
|
|
228
|
-
const results = await adapter.query(vector, 5);
|
|
229
198
|
|
|
199
|
+
const vector = await embedder.embed(query);
|
|
200
|
+
const results = await adapter.search(vector, 5);
|
|
230
201
|
await adapter.close();
|
|
231
202
|
|
|
232
203
|
return {
|
|
233
|
-
text: results.map(
|
|
204
|
+
text: results.map(r => r.content).join("\n\n"),
|
|
234
205
|
meta: { matches: results.length },
|
|
235
206
|
};
|
|
236
207
|
}
|
|
237
208
|
|
|
238
209
|
/* ---------------- ROUTER ---------------- */
|
|
239
210
|
|
|
240
|
-
async function performDocQA(query, context
|
|
211
|
+
async function performDocQA(query, context) {
|
|
241
212
|
if (context.POSTGRES_URL) {
|
|
242
213
|
return performPgVectorSearch(query, context);
|
|
243
214
|
}
|
|
@@ -247,15 +218,16 @@ async function performDocQA(query, context = {}) {
|
|
|
247
218
|
/* ---------------- O-LANG RESOLVER ---------------- */
|
|
248
219
|
|
|
249
220
|
async function docSearchResolver(action, context) {
|
|
250
|
-
if (action.startsWith("Ask doc-search"))
|
|
251
|
-
const match = action.match(/"(.*)"|'(.*)'/);
|
|
252
|
-
const query = match
|
|
253
|
-
? match[1] || match[2]
|
|
254
|
-
: action.replace("Ask doc-search", "").trim();
|
|
221
|
+
if (!action.startsWith("Ask doc-search")) return;
|
|
255
222
|
|
|
256
|
-
|
|
257
|
-
|
|
223
|
+
const match = action.match(/"(.*)"|'(.*)'/);
|
|
224
|
+
const query = match
|
|
225
|
+
? match[1] || match[2]
|
|
226
|
+
: action.replace("Ask doc-search", "").trim();
|
|
227
|
+
|
|
228
|
+
return performDocQA(query, context);
|
|
258
229
|
}
|
|
259
230
|
|
|
260
231
|
docSearchResolver.resolverName = "doc-search";
|
|
261
232
|
module.exports = docSearchResolver;
|
|
233
|
+
|