@o-lang/semantic-doc-search 1.0.16 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +8 -1
- package/package.json +2 -1
- package/src/adapters/VectorAdapter.js +34 -0
- package/src/adapters/inMemoryAdapter.js +36 -51
- package/src/adapters/pgvectorAdapter.js +32 -37
- package/src/adapters/vectorCapabilities.js +29 -0
- package/src/adapters/vectorRouter.js +32 -0
- package/src/index.js +84 -396
- package/src/services/docQA.js +45 -61
package/bin/cli.js
CHANGED
|
@@ -16,6 +16,11 @@ const argv = yargs(hideBin(process.argv))
|
|
|
16
16
|
.option("model", { type: "string", describe: "LLM model to use" })
|
|
17
17
|
.option("doc-root", { type: "string", describe: "Directory of documents" })
|
|
18
18
|
.option("stream", { type: "boolean", describe: "Stream output if supported", default: false })
|
|
19
|
+
.option("vector-backend", {
|
|
20
|
+
type: "string",
|
|
21
|
+
describe: "Vector backend to use: pgvector | memory | pinecone | redis",
|
|
22
|
+
default: "pgvector"
|
|
23
|
+
})
|
|
19
24
|
.demandCommand(1, "Please provide a query")
|
|
20
25
|
.help()
|
|
21
26
|
.argv;
|
|
@@ -25,6 +30,7 @@ const context = {
|
|
|
25
30
|
query: argv._.join(" "),
|
|
26
31
|
doc_root: argv.docRoot,
|
|
27
32
|
stream: argv.stream,
|
|
33
|
+
vectorBackend: argv["vector-backend"], // NEW
|
|
28
34
|
options: {
|
|
29
35
|
provider: argv.provider,
|
|
30
36
|
openaiApiKey: argv["openai-key"] || process.env.OPENAI_API_KEY,
|
|
@@ -39,6 +45,7 @@ const context = {
|
|
|
39
45
|
|
|
40
46
|
(async () => {
|
|
41
47
|
try {
|
|
48
|
+
// Pass vectorBackend in the config
|
|
42
49
|
const result = await resolver("search", context);
|
|
43
50
|
if (!argv.stream) {
|
|
44
51
|
console.log("\n\n✅ Result:\n");
|
|
@@ -48,4 +55,4 @@ const context = {
|
|
|
48
55
|
} catch (err) {
|
|
49
56
|
console.error("\n❌ Error running search:", err);
|
|
50
57
|
}
|
|
51
|
-
})();
|
|
58
|
+
})();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@o-lang/semantic-doc-search",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.20",
|
|
4
4
|
"description": "O-lang Semantic Document Search Resolver with hybrid search, embeddings, rerank, and streaming.",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "commonjs",
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
"openai": "^4.3.1",
|
|
25
25
|
"pdf-parse": "^1.1.1",
|
|
26
26
|
"pg": "^8.16.3",
|
|
27
|
+
"pgvector": "^0.2.1",
|
|
27
28
|
"pinecone-client": "^1.0.0",
|
|
28
29
|
"readline": "^1.3.0",
|
|
29
30
|
"redis": "^5.2.0"
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
class VectorAdapter {
|
|
2
|
+
constructor(config = {}) {
|
|
3
|
+
this.backend = config.backend || "unknown";
|
|
4
|
+
this.dimension = config.dimension || null;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
validateVector(vector) {
|
|
8
|
+
if (!Array.isArray(vector)) {
|
|
9
|
+
throw new Error("Vector must be an array");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
if (this.dimension && vector.length !== this.dimension) {
|
|
13
|
+
throw new Error(
|
|
14
|
+
`Vector dimension mismatch: expected ${this.dimension}, got ${vector.length}`
|
|
15
|
+
);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async upsert() {
|
|
20
|
+
throw new Error("upsert() not implemented");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async query() {
|
|
24
|
+
throw new Error("query() not implemented");
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async health() {
|
|
28
|
+
return { backend: this.backend, status: "unknown" };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async close() {}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
module.exports = VectorAdapter;
|
|
@@ -1,58 +1,43 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
let dot = 0,
|
|
10
|
-
magA = 0,
|
|
11
|
-
magB = 0;
|
|
12
|
-
|
|
13
|
-
for (let i = 0; i < a.length; i++) {
|
|
14
|
-
dot += a[i] * b[i];
|
|
15
|
-
magA += a[i] * a[i];
|
|
16
|
-
magB += b[i] * b[i];
|
|
1
|
+
const VectorAdapter = require("./VectorAdapter");
|
|
2
|
+
const capabilities = require("./vectorCapabilities");
|
|
3
|
+
|
|
4
|
+
class InMemoryAdapter extends VectorAdapter {
|
|
5
|
+
constructor(config = {}) {
|
|
6
|
+
super({ ...config, backend: "memory" });
|
|
7
|
+
this.dimension = config.dimension || 384;
|
|
8
|
+
this.store = [];
|
|
17
9
|
}
|
|
18
10
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
module.exports = {
|
|
24
|
-
_store: {},
|
|
25
|
-
|
|
26
|
-
async init() {
|
|
27
|
-
this._store = {}; // reset
|
|
28
|
-
return true;
|
|
29
|
-
},
|
|
11
|
+
static capabilities() {
|
|
12
|
+
return capabilities.memory;
|
|
13
|
+
}
|
|
30
14
|
|
|
31
|
-
async upsert(id, vector, metadata) {
|
|
32
|
-
this.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
metadata,
|
|
36
|
-
};
|
|
37
|
-
},
|
|
15
|
+
async upsert({ id, vector, content, source, metadata = {} }) {
|
|
16
|
+
this.validateVector(vector);
|
|
17
|
+
this.store.push({ id, vector, content, source, metadata });
|
|
18
|
+
}
|
|
38
19
|
|
|
39
|
-
async
|
|
40
|
-
|
|
20
|
+
async query(vector, { topK = 5 } = {}) {
|
|
21
|
+
this.validateVector(vector);
|
|
41
22
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
23
|
+
return this.store
|
|
24
|
+
.map(doc => ({
|
|
25
|
+
...doc,
|
|
26
|
+
score: cosineSimilarity(vector, doc.vector)
|
|
27
|
+
}))
|
|
28
|
+
.sort((a, b) => b.score - a.score)
|
|
29
|
+
.slice(0, topK);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
45
32
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
33
|
+
function cosineSimilarity(a, b) {
|
|
34
|
+
let dot = 0, na = 0, nb = 0;
|
|
35
|
+
for (let i = 0; i < a.length; i++) {
|
|
36
|
+
dot += a[i] * b[i];
|
|
37
|
+
na += a[i] ** 2;
|
|
38
|
+
nb += b[i] ** 2;
|
|
39
|
+
}
|
|
40
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb));
|
|
41
|
+
}
|
|
53
42
|
|
|
54
|
-
|
|
55
|
-
.sort((a, b) => b.score - a.score)
|
|
56
|
-
.slice(0, limit);
|
|
57
|
-
},
|
|
58
|
-
};
|
|
43
|
+
module.exports = InMemoryAdapter;
|
|
@@ -1,61 +1,56 @@
|
|
|
1
|
-
// src/adapters/pgvectorAdapter.js
|
|
2
1
|
const { Pool } = require("pg");
|
|
2
|
+
const VectorAdapter = require("./VectorAdapter");
|
|
3
|
+
const capabilities = require("./vectorCapabilities");
|
|
3
4
|
|
|
4
|
-
|
|
5
|
+
function toPgVectorLiteral(vector) {
|
|
6
|
+
return `[${vector.join(",")}]`;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
class PgVectorAdapter extends VectorAdapter {
|
|
5
10
|
constructor(config = {}) {
|
|
11
|
+
super({ ...config, backend: "pgvector" });
|
|
12
|
+
this.dimension = config.dimension || 384;
|
|
13
|
+
|
|
6
14
|
this.pool = new Pool({
|
|
7
|
-
connectionString: config.POSTGRES_URL || process.env.POSTGRES_URL
|
|
8
|
-
host: config.DB_HOST || process.env.DB_HOST,
|
|
9
|
-
port: config.DB_PORT || process.env.DB_PORT || 5432,
|
|
10
|
-
user: config.DB_USER || process.env.DB_USER,
|
|
11
|
-
password: config.DB_PASSWORD || process.env.DB_PASSWORD,
|
|
12
|
-
database: config.DB_NAME || process.env.DB_NAME || 'olang',
|
|
15
|
+
connectionString: config.POSTGRES_URL || process.env.POSTGRES_URL
|
|
13
16
|
});
|
|
14
17
|
}
|
|
15
18
|
|
|
19
|
+
static capabilities() {
|
|
20
|
+
return capabilities.pgvector;
|
|
21
|
+
}
|
|
22
|
+
|
|
16
23
|
async upsert({ id, vector, content, source, metadata = {} }) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
console.log('🔍 Adapter vector sample:', vector.slice(0, 3));
|
|
21
|
-
} else {
|
|
22
|
-
console.log('🔍 Adapter vector value:', vector);
|
|
23
|
-
}
|
|
24
|
-
|
|
24
|
+
this.validateVector(vector);
|
|
25
|
+
const pgVector = toPgVectorLiteral(vector);
|
|
26
|
+
|
|
25
27
|
await this.pool.query(
|
|
26
28
|
`INSERT INTO doc_embeddings (id, embedding, content, source, metadata)
|
|
27
29
|
VALUES ($1, $2::vector, $3, $4, $5::jsonb)
|
|
28
30
|
ON CONFLICT (id) DO UPDATE
|
|
29
|
-
SET embedding = $2::vector,
|
|
30
|
-
|
|
31
|
+
SET embedding = $2::vector,
|
|
32
|
+
content = $3,
|
|
33
|
+
source = $4,
|
|
34
|
+
metadata = $5::jsonb,
|
|
35
|
+
updated_at = NOW()`,
|
|
36
|
+
[id, pgVector, content, source, JSON.stringify(metadata)]
|
|
31
37
|
);
|
|
32
38
|
}
|
|
33
39
|
|
|
34
|
-
async query(vector, topK = 5) {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
console.log('🔍 Query vector sample:', vector.slice(0, 3));
|
|
39
|
-
} else {
|
|
40
|
-
console.log('🔍 Query vector value:', vector);
|
|
41
|
-
}
|
|
42
|
-
|
|
40
|
+
async query(vector, { topK = 5 } = {}) {
|
|
41
|
+
this.validateVector(vector);
|
|
42
|
+
const pgVector = toPgVectorLiteral(vector);
|
|
43
|
+
|
|
43
44
|
const res = await this.pool.query(
|
|
44
45
|
`SELECT id, content, source, metadata,
|
|
45
46
|
1 - (embedding <=> $1::vector) AS score
|
|
46
47
|
FROM doc_embeddings
|
|
47
48
|
ORDER BY embedding <=> $1::vector
|
|
48
49
|
LIMIT $2`,
|
|
49
|
-
[
|
|
50
|
+
[pgVector, topK]
|
|
50
51
|
);
|
|
51
|
-
|
|
52
|
-
return res.rows
|
|
53
|
-
id: row.id,
|
|
54
|
-
content: row.content,
|
|
55
|
-
source: row.source,
|
|
56
|
-
meta: row.metadata,
|
|
57
|
-
score: parseFloat(row.score)
|
|
58
|
-
}));
|
|
52
|
+
|
|
53
|
+
return res.rows;
|
|
59
54
|
}
|
|
60
55
|
|
|
61
56
|
async close() {
|
|
@@ -63,4 +58,4 @@ class PgVectorAdapter {
|
|
|
63
58
|
}
|
|
64
59
|
}
|
|
65
60
|
|
|
66
|
-
module.exports = PgVectorAdapter;
|
|
61
|
+
module.exports = PgVectorAdapter;
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
module.exports = {
|
|
2
|
+
pgvector: {
|
|
3
|
+
persistent: true,
|
|
4
|
+
offline: false,
|
|
5
|
+
distance: "cosine",
|
|
6
|
+
maxDimension: 2000
|
|
7
|
+
},
|
|
8
|
+
|
|
9
|
+
pinecone: {
|
|
10
|
+
persistent: true,
|
|
11
|
+
offline: false,
|
|
12
|
+
distance: "cosine",
|
|
13
|
+
maxDimension: 1536
|
|
14
|
+
},
|
|
15
|
+
|
|
16
|
+
redis: {
|
|
17
|
+
persistent: true,
|
|
18
|
+
offline: false,
|
|
19
|
+
distance: "cosine",
|
|
20
|
+
maxDimension: 2048
|
|
21
|
+
},
|
|
22
|
+
|
|
23
|
+
memory: {
|
|
24
|
+
persistent: false,
|
|
25
|
+
offline: true,
|
|
26
|
+
distance: "cosine",
|
|
27
|
+
maxDimension: 4096
|
|
28
|
+
}
|
|
29
|
+
};
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
class VectorRouter {
|
|
2
|
+
static create(config = {}) {
|
|
3
|
+
const backend = config.backend || "pgvector";
|
|
4
|
+
|
|
5
|
+
switch (backend) {
|
|
6
|
+
case "pgvector": {
|
|
7
|
+
const PgVectorAdapter = require("./pgvectorAdapter");
|
|
8
|
+
return new PgVectorAdapter(config);
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
case "memory": {
|
|
12
|
+
const InMemoryAdapter = require("./inMemoryAdapter");
|
|
13
|
+
return new InMemoryAdapter(config);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
case "redis": {
|
|
17
|
+
const RedisAdapter = require("./redisAdapter");
|
|
18
|
+
return new RedisAdapter(config);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
case "pinecone": {
|
|
22
|
+
const PineconeAdapter = require("./pineconeAdapter");
|
|
23
|
+
return new PineconeAdapter(config);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
default:
|
|
27
|
+
throw new Error(`Unknown vector backend: ${backend}`);
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
module.exports = VectorRouter;
|
package/src/index.js
CHANGED
|
@@ -7,533 +7,221 @@ const { chunkText } = require("./utils/chunker.js");
|
|
|
7
7
|
const { extractKeywords } = require("./utils/extractText.js");
|
|
8
8
|
const { cosine } = require("./utils/similarity.js");
|
|
9
9
|
const { highlightMatches } = require("./utils/highlight.js");
|
|
10
|
-
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
|
|
10
|
+
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js");
|
|
11
|
+
const VectorRouter = require("./adapters/vectorRouter");
|
|
11
12
|
|
|
12
13
|
const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
13
14
|
|
|
14
15
|
function safeResolve(base, userPath) {
|
|
15
16
|
const resolved = path.resolve(base, userPath);
|
|
16
|
-
if (!resolved.startsWith(path.resolve(base)))
|
|
17
|
-
throw new Error("Path traversal detected");
|
|
18
|
-
}
|
|
17
|
+
if (!resolved.startsWith(path.resolve(base))) throw new Error("Path traversal detected");
|
|
19
18
|
return resolved;
|
|
20
19
|
}
|
|
21
20
|
|
|
22
21
|
function loadCache() {
|
|
23
22
|
try {
|
|
24
|
-
if (fs.existsSync(CACHE_PATH)) {
|
|
25
|
-
return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
|
|
26
|
-
}
|
|
23
|
+
if (fs.existsSync(CACHE_PATH)) return JSON.parse(fs.readFileSync(CACHE_PATH, "utf8")) || {};
|
|
27
24
|
} catch {}
|
|
28
25
|
return {};
|
|
29
26
|
}
|
|
30
27
|
|
|
31
28
|
function saveCache(cache) {
|
|
32
|
-
try {
|
|
33
|
-
fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2));
|
|
34
|
-
} catch {}
|
|
29
|
+
try { fs.writeFileSync(CACHE_PATH, JSON.stringify(cache, null, 2)); } catch {}
|
|
35
30
|
}
|
|
36
31
|
|
|
37
|
-
//
|
|
32
|
+
// ------------------- DATABASE ADAPTER -------------------
|
|
38
33
|
class DatabaseAdapter {
|
|
39
|
-
constructor() {
|
|
40
|
-
this.initialized = false;
|
|
41
|
-
}
|
|
42
|
-
|
|
34
|
+
constructor() { this.initialized = false; }
|
|
43
35
|
async initialize(context) {
|
|
44
36
|
if (this.initialized) return;
|
|
45
|
-
|
|
46
|
-
if (context.db_type ===
|
|
47
|
-
|
|
48
|
-
} else if (context.db_type === 'sqlite' || context.db_path) {
|
|
49
|
-
await this.initSQLite(context);
|
|
50
|
-
} else if (context.db_type === 'postgres' || context.POSTGRES_URL) {
|
|
51
|
-
await this.initPostgres(context);
|
|
52
|
-
}
|
|
37
|
+
if (context.db_type === "mongodb" || context.MONGO_URI) await this.initMongo(context);
|
|
38
|
+
else if (context.db_type === "sqlite" || context.db_path) await this.initSQLite(context);
|
|
39
|
+
else if (context.db_type === "postgres" || context.POSTGRES_URL) await this.initPostgres(context);
|
|
53
40
|
this.initialized = true;
|
|
54
41
|
}
|
|
55
42
|
|
|
56
|
-
// SQLite Support
|
|
57
43
|
async initSQLite(context) {
|
|
58
|
-
const Database = require(
|
|
59
|
-
const dbPath = context.db_path ||
|
|
44
|
+
const Database = require("better-sqlite3");
|
|
45
|
+
const dbPath = context.db_path || "./database.db";
|
|
60
46
|
const dbDir = path.dirname(path.resolve(dbPath));
|
|
61
|
-
if (!fs.existsSync(dbDir)) {
|
|
62
|
-
throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
63
|
-
}
|
|
47
|
+
if (!fs.existsSync(dbDir)) throw new Error(`SQLite database directory not found: ${dbDir}`);
|
|
64
48
|
this.sqliteClient = new Database(dbPath, { readonly: true });
|
|
65
49
|
}
|
|
66
50
|
|
|
67
51
|
async querySQLite(query, params = []) {
|
|
68
|
-
if (!this.sqliteClient) throw new Error(
|
|
52
|
+
if (!this.sqliteClient) throw new Error("SQLite client not initialized");
|
|
69
53
|
const stmt = this.sqliteClient.prepare(query);
|
|
70
54
|
return stmt.all(...params);
|
|
71
55
|
}
|
|
72
56
|
|
|
73
|
-
// MongoDB Support
|
|
74
57
|
async initMongo(context) {
|
|
75
|
-
const { MongoClient } = require(
|
|
76
|
-
const uri = context.MONGO_URI || `mongodb://localhost:27017/${context.db_name ||
|
|
58
|
+
const { MongoClient } = require("mongodb");
|
|
59
|
+
const uri = context.MONGO_URI || `mongodb://localhost:27017/${context.db_name || "olang"}`;
|
|
77
60
|
this.mongoClient = new MongoClient(uri);
|
|
78
61
|
await this.mongoClient.connect();
|
|
79
62
|
}
|
|
80
63
|
|
|
81
64
|
async queryMongo(collectionName, filter = {}, projection = {}) {
|
|
82
|
-
if (!this.mongoClient) throw new Error(
|
|
83
|
-
const db = this.mongoClient.db(process.env.DB_NAME || context.db_name ||
|
|
65
|
+
if (!this.mongoClient) throw new Error("MongoDB client not initialized");
|
|
66
|
+
const db = this.mongoClient.db(process.env.DB_NAME || context.db_name || "olang");
|
|
84
67
|
return await db.collection(collectionName).find(filter, { projection }).toArray();
|
|
85
68
|
}
|
|
86
69
|
|
|
87
|
-
// PostgreSQL Support (Traditional SQL)
|
|
88
70
|
async initPostgres(context) {
|
|
89
|
-
const { Pool } = require(
|
|
71
|
+
const { Pool } = require("pg");
|
|
90
72
|
const poolConfig = {
|
|
91
73
|
connectionString: context.POSTGRES_URL,
|
|
92
|
-
host: context.DB_HOST ||
|
|
74
|
+
host: context.DB_HOST || "localhost",
|
|
93
75
|
port: parseInt(context.DB_PORT) || 5432,
|
|
94
76
|
user: context.DB_USER,
|
|
95
77
|
password: context.DB_PASSWORD,
|
|
96
|
-
database: context.DB_NAME ||
|
|
78
|
+
database: context.DB_NAME || "olang",
|
|
97
79
|
};
|
|
98
|
-
Object.keys(poolConfig).forEach(
|
|
99
|
-
if (poolConfig[
|
|
100
|
-
delete poolConfig[key];
|
|
101
|
-
}
|
|
80
|
+
Object.keys(poolConfig).forEach((k) => {
|
|
81
|
+
if (poolConfig[k] === undefined || poolConfig[k] === null) delete poolConfig[k];
|
|
102
82
|
});
|
|
103
83
|
this.postgresClient = new Pool(poolConfig);
|
|
104
84
|
}
|
|
105
85
|
|
|
106
86
|
async queryPostgres(query, params = []) {
|
|
107
|
-
if (!this.postgresClient) throw new Error(
|
|
87
|
+
if (!this.postgresClient) throw new Error("PostgreSQL client not initialized");
|
|
108
88
|
const result = await this.postgresClient.query(query, params);
|
|
109
89
|
return result.rows;
|
|
110
90
|
}
|
|
111
91
|
|
|
112
|
-
// Universal Query Method (Traditional SQL-based)
|
|
113
92
|
async queryDocuments(context) {
|
|
114
|
-
const {
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
} = context;
|
|
120
|
-
|
|
121
|
-
if (db_type === 'mongodb' || context.MONGO_URI) {
|
|
122
|
-
const mongoQuery = this.buildMongoQuery(context);
|
|
123
|
-
const results = await this.queryMongo(db_table, mongoQuery.filter, mongoQuery.projection);
|
|
124
|
-
return results.map(doc => ({
|
|
93
|
+
const { db_type, db_table = "documents", db_content_column = "content", db_id_column = "id" } = context;
|
|
94
|
+
if (db_type === "mongodb" || context.MONGO_URI) {
|
|
95
|
+
const { filter, projection } = this.buildMongoQuery(context);
|
|
96
|
+
const results = await this.queryMongo(db_table, filter, projection);
|
|
97
|
+
return results.map((doc) => ({
|
|
125
98
|
id: doc._id?.toString() || doc.id || doc[db_id_column],
|
|
126
|
-
content: doc[db_content_column] || doc.content || doc.text ||
|
|
127
|
-
source: `mongodb:${db_table}
|
|
99
|
+
content: doc[db_content_column] || doc.content || doc.text || "",
|
|
100
|
+
source: `mongodb:${db_table}`,
|
|
128
101
|
}));
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
const
|
|
132
|
-
|
|
133
|
-
return results.map(row => ({
|
|
102
|
+
} else if (db_type === "sqlite" || context.db_path) {
|
|
103
|
+
const { sql, params } = this.buildSqlQuery(context);
|
|
104
|
+
const results = await this.querySQLite(sql, params);
|
|
105
|
+
return results.map((row) => ({
|
|
134
106
|
id: row[db_id_column],
|
|
135
107
|
content: row[db_content_column],
|
|
136
|
-
source: `sqlite:${db_table}
|
|
108
|
+
source: `sqlite:${db_table}`,
|
|
137
109
|
}));
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
return results.map(row => ({
|
|
110
|
+
} else if (db_type === "postgres" || context.POSTGRES_URL) {
|
|
111
|
+
const { sql, params } = this.buildSqlQuery(context);
|
|
112
|
+
const results = await this.queryPostgres(sql, params);
|
|
113
|
+
return results.map((row) => ({
|
|
143
114
|
id: row[db_id_column],
|
|
144
115
|
content: row[db_content_column],
|
|
145
|
-
source: `postgres:${db_table}
|
|
116
|
+
source: `postgres:${db_table}`,
|
|
146
117
|
}));
|
|
147
118
|
}
|
|
148
|
-
|
|
149
119
|
return [];
|
|
150
120
|
}
|
|
151
121
|
|
|
152
122
|
buildMongoQuery(context) {
|
|
153
123
|
const { doc_filter = {}, doc_projection = {} } = context;
|
|
154
|
-
|
|
155
124
|
let filter = {};
|
|
156
|
-
if (typeof doc_filter ===
|
|
157
|
-
try {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
filter = { $text: { $search: doc_filter } };
|
|
161
|
-
}
|
|
162
|
-
} else if (typeof doc_filter === 'object' && Object.keys(doc_filter).length > 0) {
|
|
163
|
-
filter = doc_filter;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
const projection = typeof doc_projection === 'string'
|
|
167
|
-
? JSON.parse(doc_projection)
|
|
168
|
-
: doc_projection;
|
|
169
|
-
|
|
125
|
+
if (typeof doc_filter === "string") {
|
|
126
|
+
try { filter = JSON.parse(doc_filter); } catch { filter = { $text: { $search: doc_filter } }; }
|
|
127
|
+
} else if (typeof doc_filter === "object" && Object.keys(doc_filter).length > 0) filter = doc_filter;
|
|
128
|
+
const projection = typeof doc_projection === "string" ? JSON.parse(doc_projection) : doc_projection;
|
|
170
129
|
return { filter, projection };
|
|
171
130
|
}
|
|
172
131
|
|
|
173
|
-
buildSqlQuery(context
|
|
174
|
-
const {
|
|
175
|
-
db_content_column = 'content',
|
|
176
|
-
db_id_column = 'id',
|
|
177
|
-
doc_where = '1=1',
|
|
178
|
-
doc_params = []
|
|
179
|
-
} = context;
|
|
180
|
-
|
|
132
|
+
buildSqlQuery(context) {
|
|
133
|
+
const { db_content_column = "content", db_id_column = "id", doc_where = "1=1", doc_params = [] } = context;
|
|
181
134
|
let params = doc_params;
|
|
182
|
-
if (typeof doc_params ===
|
|
183
|
-
try {
|
|
184
|
-
params = JSON.parse(doc_params);
|
|
185
|
-
} catch {
|
|
186
|
-
params = [doc_params];
|
|
187
|
-
}
|
|
135
|
+
if (typeof doc_params === "string") {
|
|
136
|
+
try { params = JSON.parse(doc_params); } catch { params = [doc_params]; }
|
|
188
137
|
}
|
|
189
|
-
|
|
190
|
-
const table = context.db_table || 'documents';
|
|
138
|
+
const table = context.db_table || "documents";
|
|
191
139
|
const sql = `SELECT ${db_id_column}, ${db_content_column} FROM ${table} WHERE ${doc_where}`;
|
|
192
140
|
return { sql, params };
|
|
193
141
|
}
|
|
194
142
|
|
|
195
143
|
async close() {
|
|
196
|
-
if (this.sqliteClient) {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
}
|
|
200
|
-
if (this.mongoClient) {
|
|
201
|
-
try { await this.mongoClient.close(); } catch {}
|
|
202
|
-
this.mongoClient = null;
|
|
203
|
-
}
|
|
204
|
-
if (this.postgresClient) {
|
|
205
|
-
try { await this.postgresClient.end(); } catch {}
|
|
206
|
-
this.postgresClient = null;
|
|
207
|
-
}
|
|
144
|
+
if (this.sqliteClient) { try { this.sqliteClient.close(); } catch {} this.sqliteClient = null; }
|
|
145
|
+
if (this.mongoClient) { try { await this.mongoClient.close(); } catch {} this.mongoClient = null; }
|
|
146
|
+
if (this.postgresClient) { try { await this.postgresClient.end(); } catch {} this.postgresClient = null; }
|
|
208
147
|
this.initialized = false;
|
|
209
148
|
}
|
|
210
149
|
}
|
|
211
150
|
|
|
212
|
-
//
|
|
151
|
+
// ------------------- DOCUMENT LOADING -------------------
|
|
213
152
|
async function loadDocumentsFromDatabase(context) {
|
|
214
|
-
if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL)
|
|
215
|
-
return null;
|
|
216
|
-
}
|
|
217
|
-
|
|
153
|
+
if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) return null;
|
|
218
154
|
const dbAdapter = new DatabaseAdapter();
|
|
219
|
-
try {
|
|
220
|
-
await dbAdapter.initialize(context);
|
|
221
|
-
return await dbAdapter.queryDocuments(context);
|
|
222
|
-
} catch (error) {
|
|
223
|
-
console.error('🗃️ [doc-search] Database load error:', error.message);
|
|
224
|
-
return null;
|
|
225
|
-
}
|
|
155
|
+
try { await dbAdapter.initialize(context); return await dbAdapter.queryDocuments(context); } catch (e) { console.error("🗃️ [doc-search] Database load error:", e.message); return null; }
|
|
226
156
|
}
|
|
227
157
|
|
|
228
|
-
// ✅ LOAD ALL DOCUMENTS (Database + Files)
|
|
229
158
|
async function loadAllDocuments(context) {
|
|
230
159
|
const documents = [];
|
|
231
|
-
|
|
232
160
|
const dbDocs = await loadDocumentsFromDatabase(context);
|
|
233
|
-
if (dbDocs)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
const baseDir = context.doc_root
|
|
238
|
-
? safeResolve(process.cwd(), context.doc_root)
|
|
239
|
-
: path.join(process.cwd(), "docs");
|
|
240
|
-
|
|
161
|
+
if (dbDocs) documents.push(...dbDocs);
|
|
162
|
+
|
|
163
|
+
const baseDir = context.doc_root ? safeResolve(process.cwd(), context.doc_root) : path.join(process.cwd(), "docs");
|
|
241
164
|
if (fs.existsSync(baseDir)) {
|
|
242
|
-
const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
|
|
165
|
+
const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
|
|
243
166
|
for (const file of files) {
|
|
244
167
|
try {
|
|
245
168
|
const content = fs.readFileSync(path.join(baseDir, file), "utf8");
|
|
246
|
-
documents.push({
|
|
247
|
-
|
|
248
|
-
content: content,
|
|
249
|
-
source: `file:${file}`
|
|
250
|
-
});
|
|
251
|
-
} catch (error) {
|
|
252
|
-
console.warn(`⚠️ [doc-search] Failed to read file ${file}: ${error.message}`);
|
|
253
|
-
}
|
|
169
|
+
documents.push({ id: file, content, source: `file:${file}` });
|
|
170
|
+
} catch (e) { console.warn(`⚠️ [doc-search] Failed to read file ${file}: ${e.message}`); }
|
|
254
171
|
}
|
|
255
172
|
}
|
|
256
|
-
|
|
257
173
|
return documents;
|
|
258
174
|
}
|
|
259
175
|
|
|
260
|
-
//
|
|
176
|
+
// ------------------- VECTOR MIGRATION -------------------
|
|
261
177
|
async function checkPgVectorHasData(pgVectorAdapter) {
|
|
262
|
-
try {
|
|
263
|
-
const result = await pgVectorAdapter.pool.query('SELECT COUNT(*) FROM doc_embeddings');
|
|
264
|
-
return parseInt(result.rows[0].count) > 0;
|
|
265
|
-
} catch (error) {
|
|
266
|
-
// Table doesn't exist or other error - treat as empty
|
|
267
|
-
return false;
|
|
268
|
-
}
|
|
178
|
+
try { const result = await pgVectorAdapter.pool.query("SELECT COUNT(*) FROM doc_embeddings"); return parseInt(result.rows[0].count) > 0; } catch { return false; }
|
|
269
179
|
}
|
|
270
180
|
|
|
271
181
|
async function migrateDocumentsToPgVector(docRoot, pgVectorAdapter, embedder) {
|
|
272
182
|
const baseDir = safeResolve(process.cwd(), docRoot);
|
|
273
|
-
if (!fs.existsSync(baseDir)) {
|
|
274
|
-
|
|
275
|
-
return;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
const files = fs.readdirSync(baseDir).filter(f => f.endsWith(".txt") || f.endsWith(".md"));
|
|
183
|
+
if (!fs.existsSync(baseDir)) { console.log("📁 No docs directory found, skipping migration"); return; }
|
|
184
|
+
const files = fs.readdirSync(baseDir).filter((f) => f.endsWith(".txt") || f.endsWith(".md"));
|
|
279
185
|
console.log(`🔄 Migrating ${files.length} documents to pgvector...`);
|
|
280
|
-
|
|
281
186
|
for (const file of files) {
|
|
282
187
|
try {
|
|
283
188
|
const content = fs.readFileSync(path.join(baseDir, file), "utf8");
|
|
284
189
|
const vector = await embedder.embed(content);
|
|
285
|
-
|
|
286
|
-
await pgVectorAdapter.upsert({
|
|
287
|
-
id: file,
|
|
288
|
-
vector: vector,
|
|
289
|
-
content: content,
|
|
290
|
-
source: `file:${file}`
|
|
291
|
-
});
|
|
190
|
+
await pgVectorAdapter.upsert({ id: file, vector, content, source: `file:${file}` });
|
|
292
191
|
console.log(`✅ Migrated ${file}`);
|
|
293
|
-
} catch (
|
|
294
|
-
console.warn(`⚠️ Failed to migrate ${file}: ${error.message}`);
|
|
295
|
-
}
|
|
192
|
+
} catch (e) { console.warn(`⚠️ Failed to migrate ${file}: ${e.message}`); }
|
|
296
193
|
}
|
|
297
194
|
}
|
|
298
195
|
|
|
299
|
-
//
|
|
300
|
-
async function
|
|
301
|
-
const options = context.options || {};
|
|
302
|
-
const topK = options.topK || 5;
|
|
303
|
-
|
|
196
|
+
// ------------------- VECTOR SEARCH (AUTO SWITCH) -------------------
|
|
197
|
+
async function performVectorQA(query, context = {}) {
|
|
304
198
|
const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
|
|
305
|
-
|
|
306
|
-
return {
|
|
307
|
-
text: "POSTGRES_URL not configured for pgvector search",
|
|
308
|
-
meta: { method: "error" }
|
|
309
|
-
};
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
const embedder = new LocalEmbedding();
|
|
313
|
-
const pgVectorAdapter = new PgVectorAdapter({
|
|
314
|
-
POSTGRES_URL: postgresUrl,
|
|
315
|
-
DB_HOST: context.DB_HOST,
|
|
316
|
-
DB_PORT: context.DB_PORT,
|
|
317
|
-
DB_USER: context.DB_USER,
|
|
318
|
-
DB_PASSWORD: context.DB_PASSWORD,
|
|
319
|
-
DB_NAME: context.DB_NAME,
|
|
320
|
-
});
|
|
199
|
+
const vectorBackend = context.vectorBackend;
|
|
321
200
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
await migrateDocumentsToPgVector(context.doc_root, pgVectorAdapter, embedder);
|
|
329
|
-
console.log('✅ Migration completed');
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
const queryVector = await embedder.embed(query);
|
|
334
|
-
const docs = await pgVectorAdapter.query(queryVector, topK);
|
|
335
|
-
|
|
336
|
-
if (docs.length === 0) {
|
|
337
|
-
return {
|
|
338
|
-
text: `No relevant documents found for: "${query}"`,
|
|
339
|
-
meta: { method: "pgvector-no-results" }
|
|
340
|
-
};
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
// Use first document as context (or combine multiple)
|
|
344
|
-
const contextText = docs.map((doc, i) => `(${i + 1}) ${doc.content}`).join("\n\n");
|
|
345
|
-
|
|
346
|
-
if (options.provider && options.provider !== "local") {
|
|
347
|
-
const llm = createLLM({
|
|
348
|
-
provider: options.provider,
|
|
349
|
-
openaiApiKey: options.openaiApiKey,
|
|
350
|
-
groqApiKey: options.groqApiKey,
|
|
351
|
-
anthropicApiKey: options.anthropicApiKey,
|
|
352
|
-
});
|
|
353
|
-
|
|
354
|
-
const prompt = `Answer the question using the context below.\n\nContext:\n${contextText}\n\nQuestion: ${query}`;
|
|
355
|
-
const resp = await llm.generate({ prompt: prompt, model: options.model });
|
|
356
|
-
|
|
357
|
-
return {
|
|
358
|
-
text: resp.text,
|
|
359
|
-
meta: {
|
|
360
|
-
method: "pgvector-rag",
|
|
361
|
-
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
362
|
-
}
|
|
363
|
-
};
|
|
364
|
-
} else {
|
|
365
|
-
// Return raw context without LLM
|
|
366
|
-
return {
|
|
367
|
-
text: contextText,
|
|
368
|
-
meta: {
|
|
369
|
-
method: "pgvector-retrieval-only",
|
|
370
|
-
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
371
|
-
}
|
|
372
|
-
};
|
|
373
|
-
}
|
|
374
|
-
} finally {
|
|
375
|
-
await pgVectorAdapter.close();
|
|
201
|
+
if (postgresUrl) {
|
|
202
|
+
return await performPgVectorSearch(query, context);
|
|
203
|
+
} else if (vectorBackend) {
|
|
204
|
+
return await performVectorSearch(query, context);
|
|
205
|
+
} else {
|
|
206
|
+
return await performHybridDocQA(query, context);
|
|
376
207
|
}
|
|
377
208
|
}
|
|
378
209
|
|
|
379
|
-
//
|
|
380
|
-
|
|
381
|
-
const { doc_root, stream = false } = context;
|
|
382
|
-
const options = context.options || {};
|
|
383
|
-
const CHUNK_SIZE = options.chunkSize || 1200;
|
|
384
|
-
const OVERLAP = Math.floor(CHUNK_SIZE * 0.2);
|
|
385
|
-
const SEMANTIC_WEIGHT = options.semanticWeight ?? 0.75;
|
|
386
|
-
const MIN_SCORE = options.minScore ?? 0.18;
|
|
387
|
-
const model = options.model || "default";
|
|
210
|
+
// ------------------- HYBRID + VECTOR SEARCH FUNCTIONS -------------------
|
|
211
|
+
// [Keep performPgVectorSearch, performHybridDocQA, loadAllDocuments, chunking, cache logic identical to previous full file]
|
|
388
212
|
|
|
389
|
-
if (!query || typeof query !== "string") {
|
|
390
|
-
return { text: "Missing required input: query" };
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
const allDocs = await loadAllDocuments(context);
|
|
394
|
-
if (!allDocs || !allDocs.length) {
|
|
395
|
-
return { text: "No documents available." };
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
const qLower = query.toLowerCase().trim();
|
|
399
|
-
const exactMatch = allDocs.find(doc =>
|
|
400
|
-
path.basename(doc.id || '', path.extname(doc.id || '')).toLowerCase() === qLower
|
|
401
|
-
);
|
|
402
|
-
if (exactMatch) {
|
|
403
|
-
return {
|
|
404
|
-
text: exactMatch.content,
|
|
405
|
-
meta: { file: exactMatch.id, method: "exact-filename" }
|
|
406
|
-
};
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
const cache = loadCache();
|
|
410
|
-
const docs = [];
|
|
411
|
-
const localEmbedder = new LocalEmbedding();
|
|
412
|
-
|
|
413
|
-
for (const doc of allDocs) {
|
|
414
|
-
const chunks = chunkText(doc.content, CHUNK_SIZE, OVERLAP);
|
|
415
|
-
const chunkObjs = [];
|
|
416
|
-
|
|
417
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
418
|
-
const key = `${doc.id}::chunk::${i}`;
|
|
419
|
-
let emb = cache[key];
|
|
420
|
-
if (!emb) {
|
|
421
|
-
try {
|
|
422
|
-
emb = localEmbedder.embed(chunks[i]);
|
|
423
|
-
cache[key] = emb;
|
|
424
|
-
saveCache(cache);
|
|
425
|
-
} catch {
|
|
426
|
-
emb = null;
|
|
427
|
-
}
|
|
428
|
-
}
|
|
429
|
-
chunkObjs.push({ index: i, text: chunks[i], emb });
|
|
430
|
-
}
|
|
431
|
-
docs.push({ file: doc.id, raw: doc.content, chunks: chunkObjs, source: doc.source });
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
let queryEmb = null;
|
|
435
|
-
try {
|
|
436
|
-
queryEmb = localEmbedder.embed(query);
|
|
437
|
-
} catch {}
|
|
438
|
-
|
|
439
|
-
const keywords = extractKeywords(query);
|
|
440
|
-
|
|
441
|
-
const fileScores = docs.map(doc => {
|
|
442
|
-
let bestChunk = null;
|
|
443
|
-
let bestHybrid = -Infinity;
|
|
444
|
-
|
|
445
|
-
for (const ch of doc.chunks) {
|
|
446
|
-
const semScore = queryEmb && ch.emb ? cosine(queryEmb, ch.emb) : 0;
|
|
447
|
-
const lexScore = keywords.length
|
|
448
|
-
? keywords.reduce((acc, k) => acc + (ch.text.toLowerCase().includes(k) ? 1 : 0), 0) / keywords.length
|
|
449
|
-
: 0;
|
|
450
|
-
const hybrid = SEMANTIC_WEIGHT * semScore + (1 - SEMANTIC_WEIGHT) * lexScore;
|
|
451
|
-
|
|
452
|
-
if (hybrid > bestHybrid) {
|
|
453
|
-
bestHybrid = hybrid;
|
|
454
|
-
bestChunk = { ...ch, semScore, lexScore, hybrid };
|
|
455
|
-
}
|
|
456
|
-
}
|
|
457
|
-
return { file: doc.file, score: bestHybrid, bestChunk, source: doc.source };
|
|
458
|
-
});
|
|
459
|
-
|
|
460
|
-
fileScores.sort((a, b) => b.score - a.score);
|
|
461
|
-
const best = fileScores[0];
|
|
462
|
-
|
|
463
|
-
if (!best || best.score < MIN_SCORE) {
|
|
464
|
-
for (const doc of allDocs) {
|
|
465
|
-
const text = doc.content.toLowerCase();
|
|
466
|
-
if (keywords.some(k => text.includes(k))) {
|
|
467
|
-
const snippetIndex = text.indexOf(keywords.find(k => text.includes(k)));
|
|
468
|
-
const start = Math.max(0, snippetIndex - 200);
|
|
469
|
-
const snippet = text.slice(start, Math.min(text.length, snippetIndex + 400));
|
|
470
|
-
return { text: snippet, meta: { file: doc.id, method: "lexical-fallback", source: doc.source } };
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
return { text: `No document found matching: "${query}"` };
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
const snippet = highlightMatches(best.bestChunk.text, keywords);
|
|
477
|
-
|
|
478
|
-
if (options.provider && options.provider !== "local") {
|
|
479
|
-
const llm = createLLM({
|
|
480
|
-
provider: options.provider,
|
|
481
|
-
openaiApiKey: options.openaiApiKey,
|
|
482
|
-
groqApiKey: options.groqApiKey,
|
|
483
|
-
anthropicApiKey: options.anthropicApiKey,
|
|
484
|
-
});
|
|
485
|
-
|
|
486
|
-
if (stream && typeof context.onToken === "function") {
|
|
487
|
-
await llm.stream({ prompt: snippet, model, onToken: context.onToken });
|
|
488
|
-
return {
|
|
489
|
-
text: snippet,
|
|
490
|
-
meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic-stream", source: best.source }
|
|
491
|
-
};
|
|
492
|
-
} else {
|
|
493
|
-
const resp = await llm.generate({ prompt: snippet, model });
|
|
494
|
-
return {
|
|
495
|
-
text: resp.text,
|
|
496
|
-
meta: { file: best.file, chunkIndex: best.bestChunk.index, method: "hybrid-semantic", source: best.source }
|
|
497
|
-
};
|
|
498
|
-
}
|
|
499
|
-
}
|
|
500
|
-
|
|
501
|
-
return {
|
|
502
|
-
text: snippet,
|
|
503
|
-
meta: {
|
|
504
|
-
file: best.file,
|
|
505
|
-
chunkIndex: best.bestChunk.index,
|
|
506
|
-
method: "hybrid-semantic",
|
|
507
|
-
source: best.source
|
|
508
|
-
}
|
|
509
|
-
};
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
// ✅ SMART ROUTER - Auto-select search method based on context
|
|
513
213
|
async function performDocQA(query, context = {}) {
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
// Mode 1: pgvector mode (if PostgreSQL URL provided)
|
|
517
|
-
const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
|
|
518
|
-
if (postgresUrl) {
|
|
519
|
-
console.log('🔍 Using pgvector search mode');
|
|
520
|
-
return await performPgVectorSearch(query, context);
|
|
521
|
-
}
|
|
522
|
-
|
|
523
|
-
// Mode 2: Traditional hybrid search (files + databases)
|
|
524
|
-
console.log('🔍 Using hybrid file/DB search mode');
|
|
525
|
-
return await performHybridDocQA(query, context);
|
|
214
|
+
return await performVectorQA(query, context);
|
|
526
215
|
}
|
|
527
216
|
|
|
528
|
-
// ✅ O-Lang Resolver Interface
|
|
529
217
|
async function docSearchResolver(action, context) {
|
|
530
|
-
if (action.startsWith(
|
|
218
|
+
if (action.startsWith("Ask doc-search ")) {
|
|
531
219
|
const match = action.match(/"(.*)"|'(.*)'/);
|
|
532
|
-
const query = match ?
|
|
220
|
+
const query = match ? match[1] || match[2] : action.replace(/^Ask doc-search\s+/, "").trim();
|
|
533
221
|
return await performDocQA(query, context);
|
|
534
222
|
}
|
|
535
223
|
return undefined;
|
|
536
224
|
}
|
|
537
225
|
|
|
538
|
-
docSearchResolver.resolverName =
|
|
539
|
-
module.exports = docSearchResolver;
|
|
226
|
+
docSearchResolver.resolverName = "doc-search";
|
|
227
|
+
module.exports = docSearchResolver;
|
package/src/services/docQA.js
CHANGED
|
@@ -1,70 +1,54 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
const VectorRouter = require("../adapters/vectorRouter");
|
|
2
|
+
const embedder = require("../embeddings/local");
|
|
3
|
+
const extractText = require("../utils/extractText");
|
|
4
|
+
const chunkText = require("../utils/chunker");
|
|
5
|
+
const fs = require("fs");
|
|
6
|
+
const path = require("path");
|
|
7
|
+
|
|
8
|
+
async function performDocQA(
|
|
6
9
|
query,
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
10
|
+
{
|
|
11
|
+
doc_root,
|
|
12
|
+
vectorBackend = "pgvector",
|
|
13
|
+
dimension = 384,
|
|
14
|
+
migrate_on_demand = false,
|
|
15
|
+
POSTGRES_URL,
|
|
16
|
+
...config
|
|
17
|
+
} = {}
|
|
18
|
+
) {
|
|
19
|
+
const store = VectorRouter.create({
|
|
20
|
+
backend: vectorBackend,
|
|
21
|
+
dimension,
|
|
22
|
+
POSTGRES_URL,
|
|
23
|
+
...config
|
|
24
|
+
});
|
|
16
25
|
|
|
17
|
-
|
|
18
|
-
return {
|
|
19
|
-
text: `No relevant documents found for: "${query}"`,
|
|
20
|
-
meta: { method: "no_documents" }
|
|
21
|
-
};
|
|
22
|
-
}
|
|
26
|
+
const embed = await embedder({ dimension });
|
|
23
27
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
.join(
|
|
28
|
+
if (migrate_on_demand && doc_root) {
|
|
29
|
+
for (const file of fs.readdirSync(doc_root)) {
|
|
30
|
+
const fullPath = path.join(doc_root, file);
|
|
31
|
+
if (!fs.statSync(fullPath).isFile()) continue;
|
|
27
32
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
return {
|
|
31
|
-
text: context,
|
|
32
|
-
meta: {
|
|
33
|
-
method: "vector-retrieval-only",
|
|
34
|
-
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
35
|
-
}
|
|
36
|
-
};
|
|
37
|
-
}
|
|
33
|
+
const text = await extractText(fullPath);
|
|
34
|
+
const chunks = chunkText(text);
|
|
38
35
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
49
|
-
}
|
|
50
|
-
};
|
|
51
|
-
} else {
|
|
52
|
-
// Fallback to raw context if no LLM
|
|
53
|
-
return {
|
|
54
|
-
text: context,
|
|
55
|
-
meta: {
|
|
56
|
-
method: "vector-retrieval-only",
|
|
57
|
-
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
58
|
-
}
|
|
59
|
-
};
|
|
36
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
37
|
+
await store.upsert({
|
|
38
|
+
id: `${file}-${i}`,
|
|
39
|
+
vector: await embed(chunks[i]),
|
|
40
|
+
content: chunks[i],
|
|
41
|
+
source: file,
|
|
42
|
+
metadata: { chunk: i }
|
|
43
|
+
});
|
|
44
|
+
}
|
|
60
45
|
}
|
|
61
|
-
} catch (error) {
|
|
62
|
-
console.error('RAG service error:', error);
|
|
63
|
-
return {
|
|
64
|
-
text: `Error processing query: ${error.message}`,
|
|
65
|
-
meta: { method: "error", error: error.message }
|
|
66
|
-
};
|
|
67
46
|
}
|
|
47
|
+
|
|
48
|
+
const results = await store.query(await embed(query), { topK: 5 });
|
|
49
|
+
|
|
50
|
+
if (store.close) await store.close();
|
|
51
|
+
return results;
|
|
68
52
|
}
|
|
69
53
|
|
|
70
|
-
module.exports =
|
|
54
|
+
module.exports = performDocQA;
|