@o-lang/semantic-doc-search 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +4 -44
- package/package.json +1 -1
- package/src/adapters/pgvectorAdapter.js +74 -0
- package/src/embeddings/anthropic.js +4 -3
- package/src/embeddings/groq.js +4 -3
- package/src/embeddings/openai.js +4 -3
- package/src/index.js +96 -18
- package/src/rerank/cohere.js +4 -3
- package/src/rerank/groqRerank.js +4 -3
- package/src/rerank/local.js +4 -3
- package/src/services/docQA.js +70 -0
- package/src/utils/fileLoader.js +6 -5
package/bin/cli.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
2
|
+
const yargs = require("yargs");
|
|
3
|
+
const { hideBin } = require("yargs/helpers");
|
|
4
|
+
const resolver = require("../src/index.js");
|
|
5
5
|
|
|
6
6
|
const argv = yargs(hideBin(process.argv))
|
|
7
7
|
.usage("Usage: $0 <query> [options]")
|
|
@@ -48,44 +48,4 @@ const context = {
|
|
|
48
48
|
} catch (err) {
|
|
49
49
|
console.error("\n❌ Error running search:", err);
|
|
50
50
|
}
|
|
51
|
-
})();
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
// console.error("❌ Please provide a query string.");
|
|
55
|
-
// process.exit(1);
|
|
56
|
-
// }
|
|
57
|
-
|
|
58
|
-
// const stream = argv.stream || false;
|
|
59
|
-
// const provider = argv.provider || "openai";
|
|
60
|
-
|
|
61
|
-
// // Resolve doc_root if given
|
|
62
|
-
// const doc_root = argv.doc_root
|
|
63
|
-
// ? path.resolve(__dirname, "..", argv.doc_root)
|
|
64
|
-
// : undefined;
|
|
65
|
-
|
|
66
|
-
// // Optional runtime API keys (users pass env variables)
|
|
67
|
-
// const openaiApiKey = process.env.OPENAI_API_KEY;
|
|
68
|
-
// const groqApiKey = process.env.GROQ_API_KEY;
|
|
69
|
-
// const anthropicApiKey = process.env.ANTHROPIC_API_KEY;
|
|
70
|
-
|
|
71
|
-
// (async () => {
|
|
72
|
-
// try {
|
|
73
|
-
// const result = await resolver("search", {
|
|
74
|
-
// query,
|
|
75
|
-
// stream,
|
|
76
|
-
// doc_root,
|
|
77
|
-
// options: { provider, openaiApiKey, groqApiKey, anthropicApiKey },
|
|
78
|
-
// onToken: token => {
|
|
79
|
-
// if (stream) process.stdout.write(token);
|
|
80
|
-
// },
|
|
81
|
-
// });
|
|
82
|
-
|
|
83
|
-
// if (!stream) {
|
|
84
|
-
// console.log("\n\n✅ Result:\n");
|
|
85
|
-
// console.log(result.text);
|
|
86
|
-
// console.log("\nMeta:", result.meta);
|
|
87
|
-
// }
|
|
88
|
-
// } catch (err) {
|
|
89
|
-
// console.error("❌ Error running search:", err);
|
|
90
|
-
// }
|
|
91
|
-
// })();
|
|
51
|
+
})();
|
package/package.json
CHANGED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
// src/adapters/pgvectorAdapter.js
|
|
2
|
+
const { Pool } = require("pg");
|
|
3
|
+
|
|
4
|
+
class PgVectorAdapter {
|
|
5
|
+
constructor(config = {}) {
|
|
6
|
+
this.pool = new Pool({
|
|
7
|
+
connectionString: config.POSTGRES_URL || process.env.POSTGRES_URL,
|
|
8
|
+
host: config.DB_HOST || process.env.DB_HOST,
|
|
9
|
+
port: config.DB_PORT || process.env.DB_PORT || 5432,
|
|
10
|
+
user: config.DB_USER || process.env.DB_USER,
|
|
11
|
+
password: config.DB_PASSWORD || process.env.DB_PASSWORD,
|
|
12
|
+
database: config.DB_NAME || process.env.DB_NAME || 'olang',
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
// Test connection
|
|
16
|
+
this.testConnection();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async testConnection() {
|
|
20
|
+
try {
|
|
21
|
+
await this.pool.query('SELECT 1');
|
|
22
|
+
console.log('✅ PgVector adapter connected successfully');
|
|
23
|
+
} catch (error) {
|
|
24
|
+
console.error('❌ PgVector connection failed:', error.message);
|
|
25
|
+
throw error;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
async upsert({ id, vector, content, source, metadata = {} }) {
|
|
30
|
+
// Convert vector to proper format for pgvector
|
|
31
|
+
const vectorStr = `[${vector.join(',')}]`;
|
|
32
|
+
|
|
33
|
+
const query = `
|
|
34
|
+
INSERT INTO doc_embeddings (id, embedding, content, source, metadata, updated_at)
|
|
35
|
+
VALUES ($1, $2::vector, $3, $4, $5, NOW())
|
|
36
|
+
ON CONFLICT (id)
|
|
37
|
+
DO UPDATE SET
|
|
38
|
+
embedding = $2::vector,
|
|
39
|
+
content = $3,
|
|
40
|
+
source = $4,
|
|
41
|
+
metadata = $5,
|
|
42
|
+
updated_at = NOW()`;
|
|
43
|
+
|
|
44
|
+
await this.pool.query(query, [id, vectorStr, content, source, JSON.stringify(metadata)]);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async query(vector, topK = 5) {
|
|
48
|
+
const vectorStr = `[${vector.join(',')}]`;
|
|
49
|
+
|
|
50
|
+
const query = `
|
|
51
|
+
SELECT id, content, source, metadata,
|
|
52
|
+
1 - (embedding <=> $1::vector) AS score
|
|
53
|
+
FROM doc_embeddings
|
|
54
|
+
ORDER BY embedding <=> $1::vector
|
|
55
|
+
LIMIT $2`;
|
|
56
|
+
|
|
57
|
+
const res = await this.pool.query(query, [vectorStr, topK]);
|
|
58
|
+
|
|
59
|
+
// Return in the format your resolver expects
|
|
60
|
+
return res.rows.map(row => ({
|
|
61
|
+
id: row.id,
|
|
62
|
+
content: row.content,
|
|
63
|
+
source: row.source,
|
|
64
|
+
metadata: row.metadata,
|
|
65
|
+
score: parseFloat(row.score)
|
|
66
|
+
}));
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async close() {
|
|
70
|
+
await this.pool.end();
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
module.exports = PgVectorAdapter;
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/embeddings/anthropic.js
|
|
2
|
-
|
|
3
|
-
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
const Anthropic = require("@anthropic-ai/sdk");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* AnthropicEmbedding
|
|
7
6
|
* Generates vector embeddings using Claude embeddings API.
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class AnthropicEmbedding {
|
|
10
9
|
constructor(apiKey = process.env.ANTHROPIC_API_KEY) {
|
|
11
10
|
if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY");
|
|
12
11
|
this.client = new Anthropic({ apiKey });
|
|
@@ -43,3 +42,5 @@ export class AnthropicEmbedding {
|
|
|
43
42
|
return res.data.map(item => item.embedding);
|
|
44
43
|
}
|
|
45
44
|
}
|
|
45
|
+
|
|
46
|
+
module.exports = { AnthropicEmbedding };
|
package/src/embeddings/groq.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// src/embeddings/groq.js
|
|
2
|
-
|
|
3
|
-
import Groq from "groq-sdk";
|
|
2
|
+
const Groq = require("groq-sdk");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* GroqEmbedding
|
|
@@ -8,7 +7,7 @@ import Groq from "groq-sdk";
|
|
|
8
7
|
*
|
|
9
8
|
* Default model: nomic-embed-text
|
|
10
9
|
*/
|
|
11
|
-
|
|
10
|
+
class GroqEmbedding {
|
|
12
11
|
constructor(apiKey = process.env.GROQ_API_KEY) {
|
|
13
12
|
if (!apiKey) throw new Error("Missing GROQ_API_KEY");
|
|
14
13
|
this.client = new Groq({ apiKey });
|
|
@@ -45,3 +44,5 @@ export class GroqEmbedding {
|
|
|
45
44
|
return res.data.map(item => item.embedding);
|
|
46
45
|
}
|
|
47
46
|
}
|
|
47
|
+
|
|
48
|
+
module.exports = { GroqEmbedding };
|
package/src/embeddings/openai.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
// src/embeddings/openai.js
|
|
2
|
-
|
|
3
|
-
import OpenAI from "openai";
|
|
2
|
+
const OpenAI = require("openai");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* OpenAIEmbedding
|
|
@@ -8,7 +7,7 @@ import OpenAI from "openai";
|
|
|
8
7
|
*
|
|
9
8
|
* Default model: text-embedding-3-large (best for RAG)
|
|
10
9
|
*/
|
|
11
|
-
|
|
10
|
+
class OpenAIEmbedding {
|
|
12
11
|
constructor(apiKey = process.env.OPENAI_API_KEY) {
|
|
13
12
|
if (!apiKey) throw new Error("Missing OPENAI_API_KEY");
|
|
14
13
|
this.client = new OpenAI({ apiKey });
|
|
@@ -45,3 +44,5 @@ export class OpenAIEmbedding {
|
|
|
45
44
|
return res.data.map(item => item.embedding);
|
|
46
45
|
}
|
|
47
46
|
}
|
|
47
|
+
|
|
48
|
+
module.exports = { OpenAIEmbedding };
|
package/src/index.js
CHANGED
|
@@ -7,6 +7,7 @@ const { chunkText } = require("./utils/chunker.js");
|
|
|
7
7
|
const { extractKeywords } = require("./utils/extractText.js");
|
|
8
8
|
const { cosine } = require("./utils/similarity.js");
|
|
9
9
|
const { highlightMatches } = require("./utils/highlight.js");
|
|
10
|
+
const PgVectorAdapter = require("./adapters/pgvectorAdapter.js"); // ✅ Properly imported
|
|
10
11
|
|
|
11
12
|
const CACHE_PATH = path.join(process.cwd(), "embeddings.json");
|
|
12
13
|
|
|
@@ -33,7 +34,7 @@ function saveCache(cache) {
|
|
|
33
34
|
} catch {}
|
|
34
35
|
}
|
|
35
36
|
|
|
36
|
-
// ✅ UNIVERSAL DATABASE ADAPTER (
|
|
37
|
+
// ✅ UNIVERSAL DATABASE ADAPTER (Your existing SQL-based adapter)
|
|
37
38
|
class DatabaseAdapter {
|
|
38
39
|
constructor() {
|
|
39
40
|
this.initialized = false;
|
|
@@ -42,7 +43,6 @@ class DatabaseAdapter {
|
|
|
42
43
|
async initialize(context) {
|
|
43
44
|
if (this.initialized) return;
|
|
44
45
|
|
|
45
|
-
// Initialize based on context configuration
|
|
46
46
|
if (context.db_type === 'mongodb' || context.MONGO_URI) {
|
|
47
47
|
await this.initMongo(context);
|
|
48
48
|
} else if (context.db_type === 'sqlite' || context.db_path) {
|
|
@@ -84,7 +84,7 @@ class DatabaseAdapter {
|
|
|
84
84
|
return await db.collection(collectionName).find(filter, { projection }).toArray();
|
|
85
85
|
}
|
|
86
86
|
|
|
87
|
-
// PostgreSQL Support
|
|
87
|
+
// PostgreSQL Support (Traditional SQL)
|
|
88
88
|
async initPostgres(context) {
|
|
89
89
|
const { Pool } = require('pg');
|
|
90
90
|
const poolConfig = {
|
|
@@ -95,7 +95,6 @@ class DatabaseAdapter {
|
|
|
95
95
|
password: context.DB_PASSWORD,
|
|
96
96
|
database: context.DB_NAME || 'olang'
|
|
97
97
|
};
|
|
98
|
-
// Remove undefined/null values
|
|
99
98
|
Object.keys(poolConfig).forEach(key => {
|
|
100
99
|
if (poolConfig[key] === undefined || poolConfig[key] === null) {
|
|
101
100
|
delete poolConfig[key];
|
|
@@ -110,7 +109,7 @@ class DatabaseAdapter {
|
|
|
110
109
|
return result.rows;
|
|
111
110
|
}
|
|
112
111
|
|
|
113
|
-
// Universal Query Method
|
|
112
|
+
// Universal Query Method (Traditional SQL-based)
|
|
114
113
|
async queryDocuments(context) {
|
|
115
114
|
const {
|
|
116
115
|
db_type,
|
|
@@ -158,7 +157,6 @@ class DatabaseAdapter {
|
|
|
158
157
|
try {
|
|
159
158
|
filter = JSON.parse(doc_filter);
|
|
160
159
|
} catch {
|
|
161
|
-
// Text search fallback
|
|
162
160
|
filter = { $text: { $search: doc_filter } };
|
|
163
161
|
}
|
|
164
162
|
} else if (typeof doc_filter === 'object' && Object.keys(doc_filter).length > 0) {
|
|
@@ -180,7 +178,6 @@ class DatabaseAdapter {
|
|
|
180
178
|
doc_params = []
|
|
181
179
|
} = context;
|
|
182
180
|
|
|
183
|
-
// Parse doc_params from string if needed
|
|
184
181
|
let params = doc_params;
|
|
185
182
|
if (typeof doc_params === 'string') {
|
|
186
183
|
try {
|
|
@@ -212,10 +209,10 @@ class DatabaseAdapter {
|
|
|
212
209
|
}
|
|
213
210
|
}
|
|
214
211
|
|
|
215
|
-
// ✅ LOAD DOCUMENTS FROM DATABASE (
|
|
212
|
+
// ✅ LOAD DOCUMENTS FROM DATABASE (SQL-based)
|
|
216
213
|
async function loadDocumentsFromDatabase(context) {
|
|
217
214
|
if (!context.db_type && !context.db_path && !context.MONGO_URI && !context.POSTGRES_URL) {
|
|
218
|
-
return null;
|
|
215
|
+
return null;
|
|
219
216
|
}
|
|
220
217
|
|
|
221
218
|
const dbAdapter = new DatabaseAdapter();
|
|
@@ -232,13 +229,11 @@ async function loadDocumentsFromDatabase(context) {
|
|
|
232
229
|
async function loadAllDocuments(context) {
|
|
233
230
|
const documents = [];
|
|
234
231
|
|
|
235
|
-
// 1. Load from database first (if configured)
|
|
236
232
|
const dbDocs = await loadDocumentsFromDatabase(context);
|
|
237
233
|
if (dbDocs) {
|
|
238
234
|
documents.push(...dbDocs);
|
|
239
235
|
}
|
|
240
236
|
|
|
241
|
-
// 2. Load from file system (existing behavior)
|
|
242
237
|
const baseDir = context.doc_root
|
|
243
238
|
? safeResolve(process.cwd(), context.doc_root)
|
|
244
239
|
: path.join(process.cwd(), "docs");
|
|
@@ -262,8 +257,79 @@ async function loadAllDocuments(context) {
|
|
|
262
257
|
return documents;
|
|
263
258
|
}
|
|
264
259
|
|
|
265
|
-
// ✅
|
|
266
|
-
async function
|
|
260
|
+
// ✅ PGVECTOR SEARCH FUNCTION (Using imported adapter)
|
|
261
|
+
async function performPgVectorSearch(query, context = {}) {
|
|
262
|
+
const options = context.options || {};
|
|
263
|
+
const topK = options.topK || 5;
|
|
264
|
+
|
|
265
|
+
// Check for POSTGRES_URL in context or environment
|
|
266
|
+
const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
|
|
267
|
+
if (!postgresUrl) {
|
|
268
|
+
return {
|
|
269
|
+
text: "POSTGRES_URL not configured for pgvector search",
|
|
270
|
+
meta: { method: "error" }
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const embedder = new LocalEmbedding();
|
|
275
|
+
const pgVectorAdapter = new PgVectorAdapter({
|
|
276
|
+
POSTGRES_URL: postgresUrl,
|
|
277
|
+
DB_HOST: context.DB_HOST,
|
|
278
|
+
DB_PORT: context.DB_PORT,
|
|
279
|
+
DB_USER: context.DB_USER,
|
|
280
|
+
DB_PASSWORD: context.DB_PASSWORD,
|
|
281
|
+
DB_NAME: context.DB_NAME,
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
try {
|
|
285
|
+
const queryVector = await embedder.embed(query);
|
|
286
|
+
const docs = await pgVectorAdapter.query(queryVector, topK);
|
|
287
|
+
|
|
288
|
+
if (docs.length === 0) {
|
|
289
|
+
return {
|
|
290
|
+
text: `No relevant documents found for: "${query}"`,
|
|
291
|
+
meta: { method: "pgvector-no-results" }
|
|
292
|
+
};
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Use first document as context (or combine multiple)
|
|
296
|
+
const contextText = docs.map((doc, i) => `(${i + 1}) ${doc.content}`).join("\n\n");
|
|
297
|
+
|
|
298
|
+
if (options.provider && options.provider !== "local") {
|
|
299
|
+
const llm = createLLM({
|
|
300
|
+
provider: options.provider,
|
|
301
|
+
openaiApiKey: options.openaiApiKey,
|
|
302
|
+
groqApiKey: options.groqApiKey,
|
|
303
|
+
anthropicApiKey: options.anthropicApiKey,
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
const prompt = `Answer the question using the context below.\n\nContext:\n${contextText}\n\nQuestion: ${query}`;
|
|
307
|
+
const resp = await llm.generate({ prompt: prompt, model: options.model });
|
|
308
|
+
|
|
309
|
+
return {
|
|
310
|
+
text: resp.text,
|
|
311
|
+
meta: {
|
|
312
|
+
method: "pgvector-rag",
|
|
313
|
+
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
314
|
+
}
|
|
315
|
+
};
|
|
316
|
+
} else {
|
|
317
|
+
// Return raw context without LLM
|
|
318
|
+
return {
|
|
319
|
+
text: contextText,
|
|
320
|
+
meta: {
|
|
321
|
+
method: "pgvector-retrieval-only",
|
|
322
|
+
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
323
|
+
}
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
} finally {
|
|
327
|
+
await pgVectorAdapter.close();
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// ✅ MAIN SEARCH FUNCTION (Your existing hybrid logic)
|
|
332
|
+
async function performHybridDocQA(query, context = {}) {
|
|
267
333
|
const { doc_root, stream = false } = context;
|
|
268
334
|
const options = context.options || {};
|
|
269
335
|
const CHUNK_SIZE = options.chunkSize || 1200;
|
|
@@ -276,7 +342,6 @@ async function performDocQA(query, context = {}) {
|
|
|
276
342
|
return { text: "Missing required input: query" };
|
|
277
343
|
}
|
|
278
344
|
|
|
279
|
-
// Load documents from both database and files
|
|
280
345
|
const allDocs = await loadAllDocuments(context);
|
|
281
346
|
if (!allDocs || !allDocs.length) {
|
|
282
347
|
return { text: "No documents available." };
|
|
@@ -396,7 +461,23 @@ async function performDocQA(query, context = {}) {
|
|
|
396
461
|
};
|
|
397
462
|
}
|
|
398
463
|
|
|
399
|
-
// ✅
|
|
464
|
+
// ✅ SMART ROUTER - Auto-select search method based on context
|
|
465
|
+
async function performDocQA(query, context = {}) {
|
|
466
|
+
// 🔍 AUTO-DETECT MODE BASED ON CONTEXT
|
|
467
|
+
|
|
468
|
+
// Mode 1: pgvector mode (if PostgreSQL URL provided in context or env)
|
|
469
|
+
const postgresUrl = context.POSTGRES_URL || process.env.POSTGRES_URL;
|
|
470
|
+
if (postgresUrl) {
|
|
471
|
+
console.log('🔍 Using pgvector search mode');
|
|
472
|
+
return await performPgVectorSearch(query, context);
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Mode 2: Traditional hybrid search (files + databases)
|
|
476
|
+
console.log('🔍 Using hybrid file/DB search mode');
|
|
477
|
+
return await performHybridDocQA(query, context);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// ✅ O-Lang Resolver Interface
|
|
400
481
|
async function docSearchResolver(action, context) {
|
|
401
482
|
if (action.startsWith('Ask doc-search ')) {
|
|
402
483
|
const match = action.match(/"(.*)"|'(.*)'/);
|
|
@@ -406,8 +487,5 @@ async function docSearchResolver(action, context) {
|
|
|
406
487
|
return undefined;
|
|
407
488
|
}
|
|
408
489
|
|
|
409
|
-
// ✅ Resolver name matches package name: @o-lang/doc-search → doc-search
|
|
410
490
|
docSearchResolver.resolverName = 'doc-search';
|
|
411
|
-
|
|
412
|
-
// ✅ COMMONJS EXPORT (this is the key change)
|
|
413
491
|
module.exports = docSearchResolver;
|
package/src/rerank/cohere.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/rerank/cohere.js
|
|
2
|
-
|
|
3
|
-
import Cohere from "cohere-ai";
|
|
2
|
+
const Cohere = require("cohere-ai");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* CohereReranker
|
|
7
6
|
* Uses Cohere Rerank API to reorder candidate documents/snippets
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class CohereReranker {
|
|
10
9
|
constructor(apiKey = process.env.COHERE_API_KEY) {
|
|
11
10
|
if (!apiKey) throw new Error("Missing COHERE_API_KEY");
|
|
12
11
|
Cohere.init(apiKey);
|
|
@@ -37,3 +36,5 @@ export class CohereReranker {
|
|
|
37
36
|
return ranked;
|
|
38
37
|
}
|
|
39
38
|
}
|
|
39
|
+
|
|
40
|
+
module.exports = { CohereReranker };
|
package/src/rerank/groqRerank.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/rerank/groqRerank.js
|
|
2
|
-
|
|
3
|
-
import Groq from "groq-sdk";
|
|
2
|
+
const Groq = require("groq-sdk");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* GroqReranker
|
|
7
6
|
* Uses Groq LLMs to rerank candidate documents/snippets given a query
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class GroqReranker {
|
|
10
9
|
constructor(apiKey = process.env.GROQ_API_KEY) {
|
|
11
10
|
if (!apiKey) throw new Error("Missing GROQ_API_KEY");
|
|
12
11
|
this.client = new Groq({ apiKey });
|
|
@@ -48,3 +47,5 @@ Return JSON array: [{"text": "...", "score": 0.95}, ...]
|
|
|
48
47
|
}
|
|
49
48
|
}
|
|
50
49
|
}
|
|
50
|
+
|
|
51
|
+
module.exports = { GroqReranker };
|
package/src/rerank/local.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
// src/rerank/local.js
|
|
2
|
-
|
|
3
|
-
import { cosine } from "../utils/similarity.js";
|
|
2
|
+
const { cosine } = require("../utils/similarity.js");
|
|
4
3
|
|
|
5
4
|
/**
|
|
6
5
|
* LocalReranker
|
|
7
6
|
* Simple fallback reranker using keyword overlap + cosine similarity
|
|
8
7
|
*/
|
|
9
|
-
|
|
8
|
+
class LocalReranker {
|
|
10
9
|
constructor() {}
|
|
11
10
|
|
|
12
11
|
/**
|
|
@@ -41,3 +40,5 @@ export class LocalReranker {
|
|
|
41
40
|
return results;
|
|
42
41
|
}
|
|
43
42
|
}
|
|
43
|
+
|
|
44
|
+
module.exports = { LocalReranker };
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// src/services/docQA.js
|
|
2
|
+
/**
|
|
3
|
+
* Generic RAG service that works with any vector store adapter
|
|
4
|
+
*/
|
|
5
|
+
async function performDocQA({
|
|
6
|
+
query,
|
|
7
|
+
vectorStore,
|
|
8
|
+
embedder,
|
|
9
|
+
llm,
|
|
10
|
+
topK = 5,
|
|
11
|
+
useLLM = true
|
|
12
|
+
}) {
|
|
13
|
+
try {
|
|
14
|
+
const queryVector = await embedder.embed(query);
|
|
15
|
+
const docs = await vectorStore.query(queryVector, topK);
|
|
16
|
+
|
|
17
|
+
if (docs.length === 0) {
|
|
18
|
+
return {
|
|
19
|
+
text: `No relevant documents found for: "${query}"`,
|
|
20
|
+
meta: { method: "no_documents" }
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const context = docs
|
|
25
|
+
.map((d, i) => `(${i + 1}) ${d.content}`)
|
|
26
|
+
.join("\n\n");
|
|
27
|
+
|
|
28
|
+
if (!useLLM) {
|
|
29
|
+
// Return raw context without LLM
|
|
30
|
+
return {
|
|
31
|
+
text: context,
|
|
32
|
+
meta: {
|
|
33
|
+
method: "vector-retrieval-only",
|
|
34
|
+
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Use LLM to generate answer
|
|
40
|
+
const prompt = `Answer the question using the context below.\n\nContext:\n${context}\n\nQuestion: ${query}`;
|
|
41
|
+
|
|
42
|
+
if (llm && typeof llm.generate === 'function') {
|
|
43
|
+
const response = await llm.generate({ prompt });
|
|
44
|
+
return {
|
|
45
|
+
text: response.text,
|
|
46
|
+
meta: {
|
|
47
|
+
method: "rag-with-llm",
|
|
48
|
+
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
} else {
|
|
52
|
+
// Fallback to raw context if no LLM
|
|
53
|
+
return {
|
|
54
|
+
text: context,
|
|
55
|
+
meta: {
|
|
56
|
+
method: "vector-retrieval-only",
|
|
57
|
+
sources: docs.map(d => ({ id: d.id, source: d.source, score: d.score }))
|
|
58
|
+
}
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
} catch (error) {
|
|
62
|
+
console.error('RAG service error:', error);
|
|
63
|
+
return {
|
|
64
|
+
text: `Error processing query: ${error.message}`,
|
|
65
|
+
meta: { method: "error", error: error.message }
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
module.exports = { performDocQA };
|
package/src/utils/fileLoader.js
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
// src/utils/fileLoader.js
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import { extractTextFromFile } from "./extractText.js";
|
|
2
|
+
const fs = require("fs");
|
|
3
|
+
const path = require("path");
|
|
4
|
+
const { extractTextFromFile } = require("./extractText.js");
|
|
6
5
|
|
|
7
6
|
/**
|
|
8
7
|
* loadDocuments
|
|
@@ -13,7 +12,7 @@ import { extractTextFromFile } from "./extractText.js";
|
|
|
13
12
|
* @param {string[]} exts - array of supported file extensions
|
|
14
13
|
* @returns {Promise<Array<{ filePath: string, text: string }>>}
|
|
15
14
|
*/
|
|
16
|
-
|
|
15
|
+
async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".html", ".docx"]) {
|
|
17
16
|
if (!fs.existsSync(dirPath)) return [];
|
|
18
17
|
|
|
19
18
|
const files = fs.readdirSync(dirPath, { withFileTypes: true });
|
|
@@ -37,3 +36,5 @@ export async function loadDocuments(dirPath, exts = [".txt", ".md", ".pdf", ".ht
|
|
|
37
36
|
|
|
38
37
|
return docs;
|
|
39
38
|
}
|
|
39
|
+
|
|
40
|
+
module.exports = { loadDocuments };
|