vectra-js 0.9.7 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/FUNDING.yml +4 -0
- package/.github/workflows/npm-publish.yml +3 -4
- package/README.md +392 -538
- package/RELEASE_NOTES.md +15 -0
- package/docs/assets/vectraArch.png +0 -0
- package/package.json +3 -3
- package/src/backends/openrouter.js +2 -2
- package/src/config.js +1 -1
- package/src/core.js +172 -130
- package/src/observability.js +0 -6
- package/src/processor.js +1 -1
- package/src/webconfig_server.js +1 -1
package/RELEASE_NOTES.md
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Vectra-js 0.9.7-beta Release Notes
|
|
2
|
+
|
|
3
|
+
## New Features
|
|
4
|
+
* **Native PostgreSQL Support**: Added `PostgresVectorStore` for direct PostgreSQL vector operations without Prisma dependency.
|
|
5
|
+
* **Enhanced Validation**: Integrated Zod for robust configuration schema validation.
|
|
6
|
+
* **Observability**: Added SQLite-based logging for better telemetry and debugging.
|
|
7
|
+
|
|
8
|
+
## Improvements
|
|
9
|
+
* **Code Quality**: Refactored core logic to reduce magic numbers and improve maintainability (SonarCloud fixes).
|
|
10
|
+
* **Linting**: Migrated to ESLint flat config and enforced stricter code style (no-var, prefer-const).
|
|
11
|
+
* **CLI**: Improved stability and error handling in CLI commands.
|
|
12
|
+
|
|
13
|
+
## Fixes
|
|
14
|
+
* Fixed potential unhandled promise rejections in observability logger.
|
|
15
|
+
* Fixed console log noise in production builds.
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vectra-js",
|
|
3
|
-
"version": "0.9.
|
|
3
|
+
"version": "0.9.8",
|
|
4
4
|
"description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
7
7
|
"test": "echo \"Error: no test specified\" && exit 1",
|
|
8
8
|
"prisma:generate": "prisma generate",
|
|
9
|
-
"lint": "eslint .
|
|
10
|
-
"lint:fix": "eslint . --
|
|
9
|
+
"lint": "eslint .",
|
|
10
|
+
"lint:fix": "eslint . --fix"
|
|
11
11
|
},
|
|
12
12
|
"bin": {
|
|
13
13
|
"vectra": "bin/vectra.js"
|
|
@@ -15,8 +15,8 @@ class OpenRouterBackend {
|
|
|
15
15
|
});
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
async embedDocuments(
|
|
19
|
-
async embedQuery(
|
|
18
|
+
async embedDocuments(_) { throw new Error('OpenRouter does not support embeddings via this SDK.'); }
|
|
19
|
+
async embedQuery(_) { throw new Error('OpenRouter does not support embeddings via this SDK.'); }
|
|
20
20
|
|
|
21
21
|
async generate(prompt, sys) {
|
|
22
22
|
const msgs = [];
|
package/src/config.js
CHANGED
|
@@ -113,7 +113,7 @@ const RAGConfigSchema = z.object({
|
|
|
113
113
|
generation: z.object({ structuredOutput: z.enum(['none','citations']).default('none'), outputFormat: z.enum(['text','json']).default('text') }).optional(),
|
|
114
114
|
prompts: z.object({ query: z.string().optional(), reranking: z.string().optional() }).optional(),
|
|
115
115
|
tracing: z.object({ enable: z.boolean().default(false) }).optional(),
|
|
116
|
-
callbacks: z.array(z.custom((
|
|
116
|
+
callbacks: z.array(z.custom((_) => true)).optional(),
|
|
117
117
|
observability: z.object({
|
|
118
118
|
enabled: z.boolean().default(false),
|
|
119
119
|
sqlitePath: z.string().default('vectra-observability.db'),
|
package/src/core.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
|
-
const { RAGConfigSchema, ProviderType,
|
|
3
|
+
const { RAGConfigSchema, ProviderType, RetrievalStrategy } = require('./config');
|
|
4
4
|
const crypto = require('crypto');
|
|
5
5
|
const { DocumentProcessor } = require('./processor');
|
|
6
6
|
const { OpenAIBackend } = require('./backends/openai');
|
|
@@ -20,6 +20,18 @@ const { v5: uuidv5 } = require('uuid');
|
|
|
20
20
|
const { v4: uuidv4 } = require('uuid');
|
|
21
21
|
const SQLiteLogger = require('./observability');
|
|
22
22
|
|
|
23
|
+
const DEFAULT_TOKEN_BUDGET = 2048;
|
|
24
|
+
const DEFAULT_PREFER_SUMMARY_BELOW = 1024;
|
|
25
|
+
const DEFAULT_SUMMARY_LENGTH = 800;
|
|
26
|
+
const DEFAULT_CHUNK_LENGTH = 1200;
|
|
27
|
+
const DEFAULT_FALLBACK_SUMMARY_LENGTH = 300;
|
|
28
|
+
const DEFAULT_KEYWORD_COUNT = 10;
|
|
29
|
+
const DEFAULT_MEMORY_MESSAGES = 20;
|
|
30
|
+
const DEFAULT_CONCURRENCY_LIMIT = 5;
|
|
31
|
+
const DEFAULT_RETRY_ATTEMPTS = 3;
|
|
32
|
+
const DEFAULT_INITIAL_RETRY_DELAY = 500;
|
|
33
|
+
const DEFAULT_MAX_RETRY_DELAY = 4000;
|
|
34
|
+
|
|
23
35
|
class VectraClient {
|
|
24
36
|
constructor(config) {
|
|
25
37
|
const parsed = RAGConfigSchema.parse(config);
|
|
@@ -50,7 +62,7 @@ class VectraClient {
|
|
|
50
62
|
this.vectorStore = this.createVectorStore(this.config.database);
|
|
51
63
|
this._embeddingCache = new Map();
|
|
52
64
|
this._metadataEnrichmentEnabled = !!(this.config.metadata && this.config.metadata.enrichment);
|
|
53
|
-
const mm = this.config.memory?.maxMessages ||
|
|
65
|
+
const mm = this.config.memory?.maxMessages || DEFAULT_MEMORY_MESSAGES;
|
|
54
66
|
if (this.config.memory && this.config.memory.enabled) {
|
|
55
67
|
if (this.config.memory.type === 'in-memory') {
|
|
56
68
|
this.history = new InMemoryHistory(mm);
|
|
@@ -129,14 +141,149 @@ class VectraClient {
|
|
|
129
141
|
const words = c.toLowerCase().replace(/[^a-z0-9\s]/g, ' ').split(/\s+/).filter(w => w.length > 3);
|
|
130
142
|
const freq = {};
|
|
131
143
|
for (const w of words) freq[w] = (freq[w] || 0) + 1;
|
|
132
|
-
const top = Object.entries(freq).sort((a,b)=>b[1]-a[1]).slice(0,
|
|
133
|
-
const summary = c.slice(0,
|
|
144
|
+
const top = Object.entries(freq).sort((a,b)=>b[1]-a[1]).slice(0,DEFAULT_KEYWORD_COUNT).map(([w])=>w);
|
|
145
|
+
const summary = c.slice(0, DEFAULT_FALLBACK_SUMMARY_LENGTH);
|
|
134
146
|
enriched.push({ summary, keywords: top, hypothetical_questions: [] });
|
|
135
147
|
}
|
|
136
148
|
}
|
|
137
149
|
return enriched;
|
|
138
150
|
}
|
|
139
151
|
|
|
152
|
+
async _batchEmbedChunks(toEmbed, mapIndex, hashes) {
|
|
153
|
+
const newEmbeds = [];
|
|
154
|
+
if (toEmbed.length > 0) {
|
|
155
|
+
const enabled = !!(this.config.ingestion && this.config.ingestion.rateLimitEnabled);
|
|
156
|
+
const defaultLimit = (this.config.ingestion && typeof this.config.ingestion.concurrencyLimit === 'number') ? this.config.ingestion.concurrencyLimit : DEFAULT_CONCURRENCY_LIMIT;
|
|
157
|
+
const limit = enabled ? defaultLimit : toEmbed.length;
|
|
158
|
+
const batches = [];
|
|
159
|
+
for (let i = 0; i < toEmbed.length; i += limit) batches.push(toEmbed.slice(i, i + limit));
|
|
160
|
+
for (const batch of batches) {
|
|
161
|
+
let attempt = 0; let delay = DEFAULT_INITIAL_RETRY_DELAY;
|
|
162
|
+
while (true) {
|
|
163
|
+
try {
|
|
164
|
+
const out = await this.embedder.embedDocuments(batch);
|
|
165
|
+
newEmbeds.push(...out);
|
|
166
|
+
break;
|
|
167
|
+
} catch (err) {
|
|
168
|
+
attempt++;
|
|
169
|
+
if (attempt >= DEFAULT_RETRY_ATTEMPTS) throw err;
|
|
170
|
+
await new Promise(r => setTimeout(r, delay));
|
|
171
|
+
delay = Math.min(DEFAULT_MAX_RETRY_DELAY, delay * 2);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
newEmbeds.forEach((vec, j) => {
|
|
176
|
+
const h = hashes[mapIndex[j]];
|
|
177
|
+
this._embeddingCache.set(h, vec);
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async _processDirectory(filePath) {
|
|
183
|
+
const files = await fs.promises.readdir(filePath);
|
|
184
|
+
const summary = { processed: 0, succeeded: 0, failed: 0, errors: [] };
|
|
185
|
+
for (const file of files) {
|
|
186
|
+
const full = path.join(filePath, file);
|
|
187
|
+
if (this._isTemporaryFile(full)) continue;
|
|
188
|
+
summary.processed++;
|
|
189
|
+
try {
|
|
190
|
+
await this.ingestDocuments(full);
|
|
191
|
+
summary.succeeded++;
|
|
192
|
+
} catch (err) {
|
|
193
|
+
summary.failed++;
|
|
194
|
+
summary.errors.push({ file: full, message: err?.message || String(err) });
|
|
195
|
+
this.trigger('onError', err);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
this.trigger('onIngestSummary', summary);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
async _validateFile(filePath, stats) {
|
|
202
|
+
const absPath = path.resolve(filePath);
|
|
203
|
+
const size = stats.size || 0;
|
|
204
|
+
const mtime = Math.floor(stats.mtimeMs || Date.now());
|
|
205
|
+
const md5 = crypto.createHash('md5');
|
|
206
|
+
const sha = crypto.createHash('sha256');
|
|
207
|
+
await new Promise((resolve, reject) => {
|
|
208
|
+
const s = fs.createReadStream(filePath);
|
|
209
|
+
s.on('data', (chunk) => { md5.update(chunk); sha.update(chunk); });
|
|
210
|
+
s.on('error', reject);
|
|
211
|
+
s.on('end', resolve);
|
|
212
|
+
});
|
|
213
|
+
const fileMD5 = md5.digest('hex');
|
|
214
|
+
const fileSHA256 = sha.digest('hex');
|
|
215
|
+
return { absolutePath: absPath, fileMD5, fileSHA256, fileSize: size, lastModified: mtime, timestamp: Date.now() };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
async _prepareDocuments(filePath, rawText, chunks, embeddings, hashes, validation) {
|
|
219
|
+
const metas = this.processor.computeChunkMetadata(filePath, rawText, chunks);
|
|
220
|
+
const idNamespace = uuidv5('vectra-js', uuidv5.DNS);
|
|
221
|
+
let documents = chunks.map((content, i) => ({
|
|
222
|
+
id: uuidv5(`${validation.fileSHA256}:${i}`, idNamespace),
|
|
223
|
+
content,
|
|
224
|
+
embedding: embeddings[i],
|
|
225
|
+
metadata: {
|
|
226
|
+
docId: uuidv5(`${validation.fileSHA256}:${i}`, idNamespace),
|
|
227
|
+
source: filePath,
|
|
228
|
+
absolutePath: validation.absolutePath,
|
|
229
|
+
fileMD5: validation.fileMD5,
|
|
230
|
+
fileSHA256: validation.fileSHA256,
|
|
231
|
+
fileSize: validation.fileSize,
|
|
232
|
+
lastModified: validation.lastModified,
|
|
233
|
+
chunkIndex: i,
|
|
234
|
+
sha256: hashes[i],
|
|
235
|
+
fileType: metas[i]?.fileType,
|
|
236
|
+
docTitle: metas[i]?.docTitle,
|
|
237
|
+
pageFrom: metas[i]?.pageFrom,
|
|
238
|
+
pageTo: metas[i]?.pageTo,
|
|
239
|
+
section: metas[i]?.section
|
|
240
|
+
}
|
|
241
|
+
}));
|
|
242
|
+
|
|
243
|
+
if (this._metadataEnrichmentEnabled) {
|
|
244
|
+
const extra = await this._enrichChunkMetadata(chunks);
|
|
245
|
+
documents = documents.map((d, i) => ({
|
|
246
|
+
...d,
|
|
247
|
+
metadata: {
|
|
248
|
+
...d.metadata,
|
|
249
|
+
summary: extra[i]?.summary,
|
|
250
|
+
keywords: extra[i]?.keywords,
|
|
251
|
+
hypothetical_questions: extra[i]?.hypothetical_questions,
|
|
252
|
+
}
|
|
253
|
+
}));
|
|
254
|
+
}
|
|
255
|
+
return documents;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
async _storeDocuments(documents, mode, absPath) {
|
|
259
|
+
if (this.vectorStore && typeof this.vectorStore.ensureIndexes === 'function') {
|
|
260
|
+
try { await this.vectorStore.ensureIndexes(); } catch (_) {}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (mode === 'replace' && this.vectorStore && typeof this.vectorStore.deleteDocuments === 'function') {
|
|
264
|
+
try {
|
|
265
|
+
await this.vectorStore.deleteDocuments({ filter: { absolutePath: absPath } });
|
|
266
|
+
} catch (_) {}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
let attempt = 0; let delay = DEFAULT_INITIAL_RETRY_DELAY;
|
|
270
|
+
while (true) {
|
|
271
|
+
try {
|
|
272
|
+
if (mode === 'replace' && this.vectorStore && typeof this.vectorStore.upsertDocuments === 'function') {
|
|
273
|
+
await this.vectorStore.upsertDocuments(documents);
|
|
274
|
+
} else {
|
|
275
|
+
await this.vectorStore.addDocuments(documents);
|
|
276
|
+
}
|
|
277
|
+
break;
|
|
278
|
+
} catch (err) {
|
|
279
|
+
attempt++;
|
|
280
|
+
if (attempt >= DEFAULT_RETRY_ATTEMPTS) throw err;
|
|
281
|
+
await new Promise(r => setTimeout(r, delay));
|
|
282
|
+
delay = Math.min(DEFAULT_MAX_RETRY_DELAY, delay * 2);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
140
287
|
async ingestDocuments(filePath) {
|
|
141
288
|
const traceId = uuidv4();
|
|
142
289
|
const rootSpanId = uuidv4();
|
|
@@ -148,58 +295,33 @@ class VectraClient {
|
|
|
148
295
|
const stats = await fs.promises.stat(filePath);
|
|
149
296
|
|
|
150
297
|
if (stats.isDirectory()) {
|
|
151
|
-
|
|
152
|
-
const summary = { processed: 0, succeeded: 0, failed: 0, errors: [] };
|
|
153
|
-
for (const file of files) {
|
|
154
|
-
const full = path.join(filePath, file);
|
|
155
|
-
if (this._isTemporaryFile(full)) continue;
|
|
156
|
-
summary.processed++;
|
|
157
|
-
try {
|
|
158
|
-
await this.ingestDocuments(full);
|
|
159
|
-
summary.succeeded++;
|
|
160
|
-
} catch (err) {
|
|
161
|
-
summary.failed++;
|
|
162
|
-
summary.errors.push({ file: full, message: err?.message || String(err) });
|
|
163
|
-
this.trigger('onError', err);
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
this.trigger('onIngestSummary', summary);
|
|
298
|
+
await this._processDirectory(filePath);
|
|
167
299
|
return;
|
|
168
300
|
}
|
|
169
301
|
|
|
170
302
|
const t0 = Date.now();
|
|
171
303
|
this.trigger('onIngestStart', filePath);
|
|
172
|
-
|
|
173
|
-
const
|
|
174
|
-
const mtime = Math.floor(stats.mtimeMs || Date.now());
|
|
175
|
-
const md5 = crypto.createHash('md5');
|
|
176
|
-
const sha = crypto.createHash('sha256');
|
|
177
|
-
await new Promise((resolve, reject) => {
|
|
178
|
-
const s = fs.createReadStream(filePath);
|
|
179
|
-
s.on('data', (chunk) => { md5.update(chunk); sha.update(chunk); });
|
|
180
|
-
s.on('error', reject);
|
|
181
|
-
s.on('end', resolve);
|
|
182
|
-
});
|
|
183
|
-
const fileMD5 = md5.digest('hex');
|
|
184
|
-
const fileSHA256 = sha.digest('hex');
|
|
185
|
-
const validation = { absolutePath: absPath, fileMD5, fileSHA256, fileSize: size, lastModified: mtime, timestamp: Date.now() };
|
|
304
|
+
|
|
305
|
+
const validation = await this._validateFile(filePath, stats);
|
|
186
306
|
this.trigger('onPreIngestionValidation', validation);
|
|
307
|
+
|
|
187
308
|
const mode = (this.config.ingestion && this.config.ingestion.mode) ? this.config.ingestion.mode : 'skip';
|
|
188
309
|
let exists = false;
|
|
189
310
|
if (this.vectorStore && typeof this.vectorStore.fileExists === 'function') {
|
|
190
|
-
try { exists = await this.vectorStore.fileExists(fileSHA256,
|
|
311
|
+
try { exists = await this.vectorStore.fileExists(validation.fileSHA256, validation.fileSize, validation.lastModified); } catch { exists = false; }
|
|
191
312
|
}
|
|
192
313
|
if (mode === 'skip' && exists) {
|
|
193
314
|
this.trigger('onIngestSkipped', validation);
|
|
194
315
|
return;
|
|
195
316
|
}
|
|
317
|
+
|
|
196
318
|
const rawText = await this.processor.loadDocument(filePath);
|
|
197
319
|
|
|
198
320
|
this.trigger('onChunkingStart', this.config.chunking.strategy);
|
|
199
321
|
const chunks = await this.processor.process(rawText);
|
|
200
322
|
|
|
201
323
|
this.trigger('onEmbeddingStart', chunks.length);
|
|
202
|
-
|
|
324
|
+
|
|
203
325
|
const hashes = chunks.map(c => crypto.createHash('sha256').update(c).digest('hex'));
|
|
204
326
|
const toEmbed = [];
|
|
205
327
|
const mapIndex = [];
|
|
@@ -208,104 +330,24 @@ class VectraClient {
|
|
|
208
330
|
toEmbed.push(chunks[i]);
|
|
209
331
|
mapIndex.push(i);
|
|
210
332
|
});
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
const defaultLimit = (this.config.ingestion && typeof this.config.ingestion.concurrencyLimit === 'number') ? this.config.ingestion.concurrencyLimit : 5;
|
|
215
|
-
const limit = enabled ? defaultLimit : toEmbed.length;
|
|
216
|
-
const batches = [];
|
|
217
|
-
for (let i = 0; i < toEmbed.length; i += limit) batches.push(toEmbed.slice(i, i + limit));
|
|
218
|
-
for (const batch of batches) {
|
|
219
|
-
let attempt = 0; let delay = 500;
|
|
220
|
-
while (true) {
|
|
221
|
-
try {
|
|
222
|
-
const out = await this.embedder.embedDocuments(batch);
|
|
223
|
-
newEmbeds.push(...out);
|
|
224
|
-
break;
|
|
225
|
-
} catch (err) {
|
|
226
|
-
attempt++;
|
|
227
|
-
if (attempt >= 3) throw err;
|
|
228
|
-
await new Promise(r => setTimeout(r, delay));
|
|
229
|
-
delay = Math.min(4000, delay * 2);
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
newEmbeds.forEach((vec, j) => {
|
|
234
|
-
const h = hashes[mapIndex[j]];
|
|
235
|
-
this._embeddingCache.set(h, vec);
|
|
236
|
-
});
|
|
237
|
-
}
|
|
333
|
+
|
|
334
|
+
await this._batchEmbedChunks(toEmbed, mapIndex, hashes);
|
|
335
|
+
|
|
238
336
|
const embeddings = hashes.map((h) => this._embeddingCache.get(h));
|
|
239
337
|
|
|
240
|
-
const
|
|
241
|
-
|
|
242
|
-
let documents = chunks.map((content, i) => ({
|
|
243
|
-
id: uuidv5(`${fileSHA256}:${i}`, idNamespace),
|
|
244
|
-
content,
|
|
245
|
-
embedding: embeddings[i],
|
|
246
|
-
metadata: {
|
|
247
|
-
docId: uuidv5(`${fileSHA256}:${i}`, idNamespace),
|
|
248
|
-
source: filePath,
|
|
249
|
-
absolutePath: absPath,
|
|
250
|
-
fileMD5,
|
|
251
|
-
fileSHA256,
|
|
252
|
-
fileSize: size,
|
|
253
|
-
lastModified: mtime,
|
|
254
|
-
chunkIndex: i,
|
|
255
|
-
sha256: hashes[i],
|
|
256
|
-
fileType: metas[i]?.fileType,
|
|
257
|
-
docTitle: metas[i]?.docTitle,
|
|
258
|
-
pageFrom: metas[i]?.pageFrom,
|
|
259
|
-
pageTo: metas[i]?.pageTo,
|
|
260
|
-
section: metas[i]?.section
|
|
261
|
-
}
|
|
262
|
-
}));
|
|
263
|
-
|
|
264
|
-
if (this._metadataEnrichmentEnabled) {
|
|
265
|
-
const extra = await this._enrichChunkMetadata(chunks);
|
|
266
|
-
documents = documents.map((d, i) => ({
|
|
267
|
-
...d,
|
|
268
|
-
metadata: {
|
|
269
|
-
...d.metadata,
|
|
270
|
-
summary: extra[i]?.summary,
|
|
271
|
-
keywords: extra[i]?.keywords,
|
|
272
|
-
hypothetical_questions: extra[i]?.hypothetical_questions,
|
|
273
|
-
}
|
|
274
|
-
}));
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
if (this.vectorStore && typeof this.vectorStore.ensureIndexes === 'function') {
|
|
278
|
-
try { await this.vectorStore.ensureIndexes(); } catch (_) {}
|
|
279
|
-
}
|
|
338
|
+
const documents = await this._prepareDocuments(filePath, rawText, chunks, embeddings, hashes, validation);
|
|
339
|
+
|
|
280
340
|
let existsServer = false;
|
|
281
341
|
if (this.vectorStore && typeof this.vectorStore.fileExists === 'function') {
|
|
282
|
-
try { existsServer = await this.vectorStore.fileExists(fileSHA256,
|
|
342
|
+
try { existsServer = await this.vectorStore.fileExists(validation.fileSHA256, validation.fileSize, validation.lastModified); } catch { existsServer = false; }
|
|
283
343
|
}
|
|
284
344
|
if (mode === 'skip' && existsServer) {
|
|
285
345
|
this.trigger('onIngestSkipped', validation);
|
|
286
346
|
return;
|
|
287
347
|
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
} catch (_) {}
|
|
292
|
-
}
|
|
293
|
-
let attempt = 0; let delay = 500;
|
|
294
|
-
while (true) {
|
|
295
|
-
try {
|
|
296
|
-
if (mode === 'replace' && this.vectorStore && typeof this.vectorStore.upsertDocuments === 'function') {
|
|
297
|
-
await this.vectorStore.upsertDocuments(documents);
|
|
298
|
-
} else {
|
|
299
|
-
await this.vectorStore.addDocuments(documents);
|
|
300
|
-
}
|
|
301
|
-
break;
|
|
302
|
-
} catch (err) {
|
|
303
|
-
attempt++;
|
|
304
|
-
if (attempt >= 3) throw err;
|
|
305
|
-
await new Promise(r => setTimeout(r, delay));
|
|
306
|
-
delay = Math.min(4000, delay * 2);
|
|
307
|
-
}
|
|
308
|
-
}
|
|
348
|
+
|
|
349
|
+
await this._storeDocuments(documents, mode, validation.absolutePath);
|
|
350
|
+
|
|
309
351
|
const durationMs = Date.now() - t0;
|
|
310
352
|
this.trigger('onIngestEnd', filePath, chunks.length, durationMs);
|
|
311
353
|
|
|
@@ -317,7 +359,7 @@ class VectraClient {
|
|
|
317
359
|
endTime: Date.now(),
|
|
318
360
|
input: { filePath },
|
|
319
361
|
output: { chunks: chunks.length, durationMs },
|
|
320
|
-
attributes: { fileSize:
|
|
362
|
+
attributes: { fileSize: validation.fileSize },
|
|
321
363
|
provider,
|
|
322
364
|
modelName
|
|
323
365
|
});
|
|
@@ -395,16 +437,16 @@ class VectraClient {
|
|
|
395
437
|
}
|
|
396
438
|
|
|
397
439
|
buildContextParts(docs, query) {
|
|
398
|
-
const budget = (this.config.queryPlanning && this.config.queryPlanning.tokenBudget) ? this.config.queryPlanning.tokenBudget :
|
|
399
|
-
const preferSumm = (this.config.queryPlanning && this.config.queryPlanning.preferSummariesBelow) ? this.config.queryPlanning.preferSummariesBelow :
|
|
440
|
+
const budget = (this.config.queryPlanning && this.config.queryPlanning.tokenBudget) ? this.config.queryPlanning.tokenBudget : DEFAULT_TOKEN_BUDGET;
|
|
441
|
+
const preferSumm = (this.config.queryPlanning && this.config.queryPlanning.preferSummariesBelow) ? this.config.queryPlanning.preferSummariesBelow : DEFAULT_PREFER_SUMMARY_BELOW;
|
|
400
442
|
const parts = [];
|
|
401
443
|
let used = 0;
|
|
402
444
|
for (const d of docs) {
|
|
403
445
|
const t = d.metadata?.docTitle || '';
|
|
404
446
|
const sec = d.metadata?.section || '';
|
|
405
447
|
const pages = (d.metadata?.pageFrom && d.metadata?.pageTo) ? `pages ${d.metadata.pageFrom}-${d.metadata.pageTo}` : '';
|
|
406
|
-
const sum = d.metadata?.summary ? d.metadata.summary : d.content.slice(0,
|
|
407
|
-
const chosen = (this.tokenEstimate(sum) <= preferSumm) ? sum : d.content.slice(0,
|
|
448
|
+
const sum = d.metadata?.summary ? d.metadata.summary : d.content.slice(0, DEFAULT_SUMMARY_LENGTH);
|
|
449
|
+
const chosen = (this.tokenEstimate(sum) <= preferSumm) ? sum : d.content.slice(0, DEFAULT_CHUNK_LENGTH);
|
|
408
450
|
const part = `${t} ${sec} ${pages}\n${chosen}`;
|
|
409
451
|
const est = this.tokenEstimate(part);
|
|
410
452
|
if (used + est > budget) break;
|
package/src/observability.js
CHANGED
|
@@ -17,26 +17,20 @@ class SQLiteLogger {
|
|
|
17
17
|
const dbPath = path.isAbsolute(rawPath) ? rawPath : path.resolve(process.cwd(), rawPath);
|
|
18
18
|
// Ensure directory exists
|
|
19
19
|
const dbDir = path.dirname(dbPath);
|
|
20
|
-
console.log(`[SQLiteLogger] dbPath: ${dbPath}, dbDir: ${dbDir}`);
|
|
21
20
|
|
|
22
21
|
const fs = require('fs');
|
|
23
22
|
if (!fs.existsSync(dbDir)) {
|
|
24
|
-
console.log(`[SQLiteLogger] Creating directory: ${dbDir}`);
|
|
25
23
|
fs.mkdirSync(dbDir, { recursive: true });
|
|
26
|
-
} else {
|
|
27
|
-
console.log(`[SQLiteLogger] Directory exists: ${dbDir}`);
|
|
28
24
|
}
|
|
29
25
|
|
|
30
26
|
const sqlite3 = require('sqlite3').verbose();
|
|
31
27
|
this.db = new sqlite3.Database(dbPath, (err) => {
|
|
32
28
|
if (err) {
|
|
33
|
-
console.error('Failed to connect to SQLite database:', err);
|
|
34
29
|
throw err;
|
|
35
30
|
}
|
|
36
31
|
});
|
|
37
32
|
this.initializeSchema();
|
|
38
33
|
} catch (error) {
|
|
39
|
-
console.error('Failed to initialize SQLite logger:', error);
|
|
40
34
|
throw error;
|
|
41
35
|
}
|
|
42
36
|
}
|
package/src/processor.js
CHANGED
package/src/webconfig_server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const http = require('http');
|
|
2
2
|
const fs = require('fs');
|
|
3
3
|
const path = require('path');
|
|
4
|
-
const {
|
|
4
|
+
const { ProviderType, ChunkingStrategy, RetrievalStrategy } = require('./config');
|
|
5
5
|
const sqlite3 = require('sqlite3').verbose();
|
|
6
6
|
|
|
7
7
|
|