vectra-js 0.9.8 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -40,11 +40,12 @@ If you find this project useful, consider supporting it:<br>
40
40
  * [WebConfig (Config Generator UI)](#webconfig-config-generator-ui)
41
41
  * [Observability Dashboard](#observability-dashboard)
42
42
  * [13. Observability & Callbacks](#13-observability--callbacks)
43
- * [14. Database Schemas & Indexing](#14-database-schemas--indexing)
44
- * [15. Extending Vectra](#15-extending-vectra)
45
- * [16. Architecture Overview](#16-architecture-overview)
46
- * [17. Development & Contribution Guide](#17-development--contribution-guide)
47
- * [18. Production Best Practices](#18-production-best-practices)
43
+ * [14. Telemetry](#14-telemetry)
44
+ * [15. Database Schemas & Indexing](#15-database-schemas--indexing)
45
+ * [16. Extending Vectra](#16-extending-vectra)
46
+ * [17. Architecture Overview](#17-architecture-overview)
47
+ * [18. Development & Contribution Guide](#18-development--contribution-guide)
48
+ * [19. Production Best Practices](#19-production-best-practices)
48
49
 
49
50
  ---
50
51
 
@@ -127,15 +128,19 @@ Every major subsystem (providers, vector stores, callbacks) is interface‑drive
127
128
  ### Library
128
129
 
129
130
  ```bash
130
- npm install vectra-js @prisma/client
131
+ npm install vectra-js
131
132
  # or
132
- pnpm add vectra-js @prisma/client
133
+ pnpm add vectra-js
133
134
  ```
134
135
 
135
- Optional backends:
136
+ Backends:
136
137
 
137
138
  ```bash
138
- npm install chromadb
139
+ npm install pg # https://node-postgres.com/
140
+ npm install @prisma/client # https://prisma.io/docs
141
+ npm install chromadb # https://docs.trychroma.com/
142
+ npm install qdrant-client # https://qdrant.tech/documentation/
143
+ npm install pymilvus # https://milvus.io/docs/
139
144
  ```
140
145
 
141
146
  ### CLI
@@ -152,6 +157,11 @@ pnpm add -g vectra-js
152
157
 
153
158
  ```js
154
159
  const { VectraClient, ProviderType } = require('vectra-js');
160
+ const { Pool } = require('pg');
161
+
162
+ const pool = new Pool({
163
+ connectionString: process.env.DATABASE_URL
164
+ });
155
165
 
156
166
  const client = new VectraClient({
157
167
  embedding: {
@@ -162,12 +172,13 @@ const client = new VectraClient({
162
172
  llm: {
163
173
  provider: ProviderType.GEMINI,
164
174
  apiKey: process.env.GOOGLE_API_KEY,
165
- modelName: 'gemini-1.5-pro-latest'
175
+ modelName: 'gemini-2.5-flash'
166
176
  },
167
177
  database: {
168
- type: 'prisma',
169
- clientInstance: prisma,
170
- tableName: 'Document'
178
+ type: 'postgres',
179
+ clientInstance: pool,
180
+ tableName: 'document',
181
+ columnMap: { 'content': 'content', 'metadata': 'metadata', 'vector': 'vector' }
171
182
  }
172
183
  });
173
184
 
@@ -240,7 +251,7 @@ Use `dimensions` when using pgvector to avoid runtime mismatches.
240
251
  llm: {
241
252
  provider: ProviderType.GEMINI,
242
253
  apiKey: process.env.GOOGLE_API_KEY,
243
- modelName: 'gemini-1.5-pro-latest',
254
+ modelName: 'gemini-2.5-flash',
244
255
  temperature: 0.3,
245
256
  maxTokens: 1024
246
257
  }
@@ -257,15 +268,54 @@ Used for:
257
268
 
258
269
  ### Database
259
270
 
271
+ Supports Prisma, Postgres (native), Chroma, Qdrant, Milvus.
272
+
260
273
  ```js
274
+ // PostgreSQL (native pg)
275
+ database: {
276
+ type: 'postgres',
277
+ clientInstance: pool, // new Pool(...)
278
+ tableName: 'document',
279
+ columnMap: { content: 'content', metadata: 'metadata', vector: 'vector' }
280
+ }
281
+ ```
282
+
283
+ ```js
284
+ // Prisma
261
285
  database: {
262
286
  type: 'prisma',
263
287
  clientInstance: prisma,
264
- tableName: 'Document'
288
+ tableName: 'Document',
289
+ columnMap: { content: 'content', metadata: 'metadata', vector: 'embedding' }
290
+ }
291
+ ```
292
+
293
+ ```js
294
+ // ChromaDB
295
+ database: {
296
+ type: 'chroma',
297
+ clientInstance: chromaClient,
298
+ collectionName: 'rag_collection'
265
299
  }
266
300
  ```
267
301
 
268
- Supports Prisma, Chroma, Qdrant, Milvus.
302
+ ```js
303
+ // Qdrant
304
+ database: {
305
+ type: 'qdrant',
306
+ clientInstance: qdrantClient,
307
+ collectionName: 'rag_collection'
308
+ }
309
+ ```
310
+
311
+ ```js
312
+ // Milvus
313
+ database: {
314
+ type: 'milvus',
315
+ clientInstance: milvusClient,
316
+ collectionName: 'rag_collection'
317
+ }
318
+ ```
269
319
 
270
320
  ---
271
321
 
@@ -324,6 +374,38 @@ memory: { enabled: true, type: 'in-memory', maxMessages: 20 }
324
374
 
325
375
  Redis and Postgres are supported.
326
376
 
377
+ ```js
378
+ // Redis
379
+ memory: {
380
+ enabled: true,
381
+ type: 'redis',
382
+ maxMessages: 20,
383
+ redis: {
384
+ clientInstance: redisClient,
385
+ keyPrefix: 'vectra:chat:'
386
+ }
387
+ }
388
+ ```
389
+
390
+ ```js
391
+ // Postgres
392
+ memory: {
393
+ enabled: true,
394
+ type: 'postgres',
395
+ maxMessages: 20,
396
+ postgres: {
397
+ clientInstance: pool, // pg Pool
398
+ tableName: 'ChatMessage',
399
+ columnMap: {
400
+ sessionId: 'sessionId',
401
+ role: 'role',
402
+ content: 'content',
403
+ createdAt: 'createdAt'
404
+ }
405
+ }
406
+ }
407
+ ```
408
+
327
409
  ---
328
410
 
329
411
  ### Observability
@@ -455,7 +537,48 @@ Lifecycle hooks:
455
537
 
456
538
  ---
457
539
 
458
- ## 14. Database Schemas & Indexing
540
+ ## 14. Telemetry
541
+
542
+ Vectra collects anonymous usage data to help us improve the SDK, prioritize features, and detect broken versions.
543
+
544
+ ### What we track
545
+
546
+ * **Identity**: A random UUID (`distinct_id`) stored locally in `~/.vectra/telemetry.json`. **No PII, emails, IPs, or hostnames.**
547
+ * **Events**:
548
+ * `sdk_initialized`: Config shape (providers used), OS/Runtime version, session type (api/cli/chat).
549
+ * `ingest_started/completed`: Source type, chunking strategy, duration bucket, chunk count bucket.
550
+ * `query_executed`: Retrieval strategy, query mode (rag), result count, latency bucket.
551
+ * `feature_used`: WebConfig/Dashboard usage.
552
+ * `evaluation_run`: Dataset size bucket.
553
+ * `error_occurred`: Error type and stage (no stack traces).
554
+ * `cli_command_used`: Command name and flags.
555
+
556
+ ### Why we track it
557
+
558
+ * **Detect broken versions**: Spikes in `error_occurred` help us find bugs.
559
+ * **Measure adoption**: Helps us understand which providers (OpenAI vs Gemini) and vector stores are most popular.
560
+ * **Drop support safely**: We can see if anyone is still using Node 18 before dropping it.
561
+
562
+ ### How to opt-out
563
+
564
+ Telemetry is **enabled by default**. To disable it:
565
+
566
+ **Option 1: Config**
567
+
568
+ ```js
569
+ const client = new VectraClient({
570
+ // ...
571
+ telemetry: { enabled: false }
572
+ });
573
+ ```
574
+
575
+ **Option 2: Environment Variable**
576
+
577
+ Set `VECTRA_TELEMETRY_DISABLED=1` or `DO_NOT_TRACK=1`.
578
+
579
+ ---
580
+
581
+ ## 15. Database Schemas & Indexing
459
582
 
460
583
  ```prisma
461
584
  model Document {
@@ -469,7 +592,7 @@ model Document {
469
592
 
470
593
  ---
471
594
 
472
- ## 15. Extending Vectra
595
+ ## 16. Extending Vectra
473
596
 
474
597
  ### Custom Vector Store
475
598
 
@@ -482,7 +605,7 @@ class MyStore extends VectorStore {
482
605
 
483
606
  ---
484
607
 
485
- ## 16. Architecture Overview
608
+ ## 17. Architecture Overview
486
609
 
487
610
  * `VectraClient`: orchestrator
488
611
  * Typed config schema
@@ -491,7 +614,7 @@ class MyStore extends VectorStore {
491
614
 
492
615
  ---
493
616
 
494
- ## 17. Development & Contribution Guide
617
+ ## 18. Development & Contribution Guide
495
618
 
496
619
  * Node.js 18+
497
620
  * pnpm recommended
@@ -499,7 +622,7 @@ class MyStore extends VectorStore {
499
622
 
500
623
  ---
501
624
 
502
- ## 18. Production Best Practices
625
+ ## 19. Production Best Practices
503
626
 
504
627
  * Match embedding dimensions to pgvector
505
628
  * Prefer HYBRID retrieval
package/bin/vectra.js CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  const { start: startWebConfig } = require('../src/webconfig_server');
3
+ const telemetry = require('../src/telemetry');
3
4
  const fs = require('fs');
4
5
  const path = require('path');
5
6
 
@@ -25,31 +26,49 @@ async function run() {
25
26
  target = arg;
26
27
  }
27
28
  }
29
+
30
+ // Load config for telemetry init if possible
31
+ let cfg = null;
32
+ try {
33
+ const p = configPath ? path.resolve(configPath) : path.join(process.cwd(), 'vectra-config.json');
34
+ if (fs.existsSync(p)) cfg = JSON.parse(fs.readFileSync(p, 'utf-8'));
35
+ } catch (_) {}
36
+
37
+ telemetry.init(cfg || {});
38
+ if (cmd) {
39
+ telemetry.track('cli_command_used', {
40
+ command: cmd,
41
+ flags: stream ? ['--stream'] : []
42
+ });
43
+ }
28
44
 
29
45
  if (cmd === 'webconfig') {
30
46
  const cfgPath = configPath || path.join(process.cwd(), 'vectra-config.json');
31
47
  startWebConfig(cfgPath, 'webconfig');
48
+ await telemetry.flush();
32
49
  return;
33
50
  }
34
51
 
35
52
  if (cmd === 'dashboard') {
36
53
  const cfgPath = configPath || path.join(process.cwd(), 'vectra-config.json');
37
54
  startWebConfig(cfgPath, 'dashboard');
55
+ await telemetry.flush();
38
56
  return;
39
57
  }
40
58
 
41
59
  if (!cmd || (!target && cmd !== 'webconfig' && cmd !== 'dashboard')) {
42
60
  console.error('Usage: vectra <ingest|query|webconfig|dashboard> <path|text> [--config=path] [--stream]');
61
+ await telemetry.flush();
43
62
  process.exit(1);
44
63
  }
45
64
 
46
65
  // Lazy load VectraClient to avoid overhead when just running help or webconfig
47
66
  const { VectraClient } = require('..');
48
67
 
49
- let cfg = null;
50
- if (configPath) {
68
+ // Re-load config if we just did a quick check earlier
69
+ if (configPath && !cfg) {
51
70
  cfg = JSON.parse(fs.readFileSync(path.resolve(configPath), 'utf-8'));
52
- } else {
71
+ } else if (!cfg) {
53
72
  // Fallback to test config if exists, or null
54
73
  try {
55
74
  cfg = require(path.resolve(process.cwd(), 'nodejs-test/index.js')).config;
@@ -57,7 +76,11 @@ async function run() {
57
76
  cfg = null;
58
77
  }
59
78
  }
60
-
79
+
80
+ // VectraClient will re-init telemetry but that's fine (idempotent)
81
+ if (cfg) {
82
+ cfg.sessionType = 'cli';
83
+ }
61
84
  const client = new VectraClient(cfg);
62
85
  if (cmd === 'ingest') {
63
86
  await client.ingestDocuments(path.resolve(process.cwd(), target));
@@ -75,8 +98,14 @@ async function run() {
75
98
  }
76
99
  } else {
77
100
  console.error('Unknown command');
101
+ await telemetry.flush();
78
102
  process.exit(1);
79
103
  }
104
+ await telemetry.flush();
80
105
  }
81
106
 
82
- run().catch(e => { console.error(e && e.message ? e.message : String(e)); process.exit(1); });
107
+ run().catch(async e => {
108
+ console.error(e && e.message ? e.message : String(e));
109
+ try { await telemetry.flush(); } catch {}
110
+ process.exit(1);
111
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vectra-js",
3
- "version": "0.9.8",
3
+ "version": "0.9.12",
4
4
  "description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -34,7 +34,7 @@
34
34
  "author": "Abhishek N",
35
35
  "license": "GPL-3.0",
36
36
  "dependencies": {
37
- "@anthropic-ai/sdk": "^0.20.9",
37
+ "@anthropic-ai/sdk": "^0.71.2",
38
38
  "@google/genai": "^1.34.0",
39
39
  "dotenv": "^16.6.1",
40
40
  "mammoth": "^1.11.0",
@@ -45,6 +45,28 @@ class MilvusVectorStore extends VectorStore {
45
45
  return rows.map((r) => ({ id: r.id, content: r.content || '', metadata: r.metadata ? JSON.parse(r.metadata) : {} }));
46
46
  }
47
47
 
48
+ async fileExists(sha256, size, lastModified) {
49
+ if (typeof this.client.query !== 'function') return false;
50
+ try {
51
+ const expr = '';
52
+ const res = await this.client.query({
53
+ collection_name: this.collection,
54
+ expr,
55
+ output_fields: ['content', 'metadata'],
56
+ limit: 1
57
+ });
58
+ const rows = Array.isArray(res) ? res : (res?.data || res?.results || []);
59
+ return rows.some((r) => {
60
+ try {
61
+ const m = r.metadata ? JSON.parse(r.metadata) : {};
62
+ return m.fileSHA256 === sha256 && m.fileSize === size && m.lastModified === lastModified;
63
+ } catch (_) { return false; }
64
+ });
65
+ } catch (_) {
66
+ return false;
67
+ }
68
+ }
69
+
48
70
  async deleteDocuments({ ids = null, filter = null } = {}) {
49
71
  if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Milvus client');
50
72
  if (Array.isArray(ids) && ids.length > 0) {
@@ -24,9 +24,11 @@ class PostgresVectorStore extends VectorStore {
24
24
  const tableName = config.tableName || 'document';
25
25
  const columnMap = config.columnMap || {};
26
26
  this._table = quoteTableName(tableName, 'tableName');
27
+ this._tableBase = tableName.split('.').pop();
27
28
  this._cContent = quoteIdentifier(columnMap.content || 'content', 'columnMap.content');
28
29
  this._cMeta = quoteIdentifier(columnMap.metadata || 'metadata', 'columnMap.metadata');
29
30
  this._cVec = quoteIdentifier(columnMap.vector || 'vector', 'columnMap.vector');
31
+ this._cCreatedAt = '"createdAt"';
30
32
 
31
33
  // We expect config.clientInstance to be a pg.Pool or pg.Client
32
34
  if (!this.config.clientInstance) {
@@ -45,7 +47,34 @@ class PostgresVectorStore extends VectorStore {
45
47
  // Enable pgvector extension
46
48
  await this.client.query('CREATE EXTENSION IF NOT EXISTS vector');
47
49
 
48
- // Create table if not exists
50
+ // Detect existing column type to avoid malformed array issues
51
+ try {
52
+ const typeCheck = await this.client.query(
53
+ `SELECT data_type, udt_name
54
+ FROM information_schema.columns
55
+ WHERE table_name = $1 AND column_name = $2`,
56
+ [this._tableBase, this._cVec.replace(/"/g, '')]
57
+ );
58
+ const row = typeCheck.rows[0];
59
+ if (row) {
60
+ const isPgVector = row.udt_name === 'vector';
61
+ const isArray = row.data_type && row.data_type.toLowerCase().includes('array');
62
+ if (isArray && !isPgVector) {
63
+ throw new Error(
64
+ 'Postgres schema mismatch: vector column is double precision[] (array). ' +
65
+ 'Use pgvector type: vector(<dimensions>). ' +
66
+ 'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(1536);'
67
+ );
68
+ }
69
+ }
70
+ } catch (e) {
71
+ // Only throw if we explicitly detected array type; otherwise continue
72
+ if (String(e.message || e).includes('schema mismatch')) {
73
+ throw e;
74
+ }
75
+ }
76
+
77
+ // Create table if not exists (best-effort)
49
78
  // Note: We need to know vector dimensions. We'll try to guess or use default 1536
50
79
  // If embedding dimensions are provided in config, use them
51
80
  // But store config usually doesn't have embedding config directly unless passed down
@@ -65,6 +94,47 @@ class PostgresVectorStore extends VectorStore {
65
94
  `;
66
95
  await this.client.query(createTableQuery);
67
96
 
97
+ // Ensure required columns exist (non-destructive)
98
+ try {
99
+ const res = await this.client.query(
100
+ `SELECT column_name, data_type, udt_name
101
+ FROM information_schema.columns
102
+ WHERE table_name = $1`,
103
+ [this._tableBase]
104
+ );
105
+ const cols = new Map(res.rows.map(r => [r.column_name, r]));
106
+ const contentCol = this._cContent.replace(/"/g, '');
107
+ const metaCol = this._cMeta.replace(/"/g, '');
108
+ const vecCol = this._cVec.replace(/"/g, '');
109
+ const createdAtCol = this._cCreatedAt.replace(/"/g, '');
110
+
111
+ if (!cols.has(contentCol)) {
112
+ await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
113
+ }
114
+ if (!cols.has(metaCol)) {
115
+ await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
116
+ }
117
+ if (!cols.has(vecCol)) {
118
+ await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
119
+ } else {
120
+ const vinfo = cols.get(vecCol);
121
+ const isPgVector = vinfo && vinfo.udt_name === 'vector';
122
+ const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
123
+ if (isArray && !isPgVector) {
124
+ throw new Error(
125
+ 'Postgres schema mismatch: vector column is double precision[] (array). ' +
126
+ 'Use pgvector type: vector(' + dim + '). ' +
127
+ 'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
128
+ );
129
+ }
130
+ }
131
+ if (!cols.has(createdAtCol)) {
132
+ await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cCreatedAt} TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
133
+ }
134
+ } catch (_) {
135
+ // best-effort; ignore
136
+ }
137
+
68
138
  // Create HNSW index for faster search
69
139
  // checking if index exists is hard in raw sql cross-version,
70
140
  // simpler to CREATE INDEX IF NOT EXISTS which pg supports in recent versions
@@ -72,12 +142,17 @@ class PostgresVectorStore extends VectorStore {
72
142
  try {
73
143
  await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
74
144
  } catch (e) {
75
- console.warn('Could not create vector index (might be fine if not supported):', e.message);
145
+ // Fallback to ivfflat when hnsw not supported
146
+ try {
147
+ await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops)`);
148
+ } catch (e2) {
149
+ console.warn('Could not create vector index (might be fine if not supported):', e.message);
150
+ }
76
151
  }
77
152
  }
78
153
 
79
154
  async addDocuments(docs) {
80
- const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW())`;
155
+ const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW()) ON CONFLICT ("id") DO NOTHING`;
81
156
 
82
157
  for (const doc of docs) {
83
158
  const id = doc.id || uuidv4();
@@ -186,6 +261,22 @@ class PostgresVectorStore extends VectorStore {
186
261
 
187
262
  return Object.values(combined).sort((a, b) => b.score - a.score).slice(0, limit);
188
263
  }
264
+
265
+ async fileExists(sha256, size, lastModified) {
266
+ try {
267
+ const q = `
268
+ SELECT 1
269
+ FROM ${this._table}
270
+ WHERE ${this._cMeta} @> $1
271
+ LIMIT 1
272
+ `;
273
+ const metaFilter = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
274
+ const res = await this.client.query(q, [metaFilter]);
275
+ return res.rowCount > 0;
276
+ } catch (_) {
277
+ return false;
278
+ }
279
+ }
189
280
  }
190
281
 
191
282
  module.exports = { PostgresVectorStore };
@@ -35,7 +35,7 @@ class PrismaVectorStore extends VectorStore {
35
35
  }
36
36
  async addDocuments(docs) {
37
37
  const { clientInstance } = this.config;
38
- const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW())`;
38
+ const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW()) ON CONFLICT ("id") DO NOTHING`;
39
39
  for (const doc of docs) {
40
40
  const id = doc.id || uuidv4();
41
41
  const vec = JSON.stringify(this.normalizeVector(doc.embedding));
@@ -108,6 +108,7 @@ class PrismaVectorStore extends VectorStore {
108
108
  const idxFts = `"${base}_content_fts_gin"`;
109
109
  try {
110
110
  await clientInstance.$executeRawUnsafe('CREATE EXTENSION IF NOT EXISTS vector');
111
+ await this._ensureColumns();
111
112
  await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxVec} ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops) WITH (lists = 100);`);
112
113
  await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxFts} ON ${this._table} USING GIN (to_tsvector('english', ${this._cContent}));`);
113
114
  } catch (e) {
@@ -115,6 +116,54 @@ class PrismaVectorStore extends VectorStore {
115
116
  }
116
117
  }
117
118
 
119
+ async _ensureColumns() {
120
+ const { clientInstance } = this.config;
121
+ const dim = 1536;
122
+ const createTableQuery = `
123
+ CREATE TABLE IF NOT EXISTS ${this._table} (
124
+ "id" TEXT PRIMARY KEY,
125
+ ${this._cContent} TEXT,
126
+ ${this._cMeta} JSONB,
127
+ ${this._cVec} vector(${dim}),
128
+ "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
129
+ )
130
+ `;
131
+ await clientInstance.$executeRawUnsafe(createTableQuery);
132
+ try {
133
+ const res = await clientInstance.$queryRawUnsafe(
134
+ `SELECT column_name, data_type, udt_name FROM information_schema.columns WHERE table_name = $1`,
135
+ this._tableBase
136
+ );
137
+ const cols = new Map(res.map(r => [r.column_name, r]));
138
+ const contentCol = this._cContent.replace(/"/g, '');
139
+ const metaCol = this._cMeta.replace(/"/g, '');
140
+ const vecCol = this._cVec.replace(/"/g, '');
141
+ const createdAtCol = 'createdAt';
142
+ if (!cols.has(contentCol)) {
143
+ await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
144
+ }
145
+ if (!cols.has(metaCol)) {
146
+ await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
147
+ }
148
+ if (!cols.has(vecCol)) {
149
+ await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
150
+ } else {
151
+ const vinfo = cols.get(vecCol);
152
+ const isPgVector = vinfo && vinfo.udt_name === 'vector';
153
+ const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
154
+ if (isArray && !isPgVector) {
155
+ throw new Error(
156
+ 'Postgres schema mismatch: vector column is double precision[] (array). Use pgvector type: vector(' + dim + '). Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
157
+ );
158
+ }
159
+ }
160
+ if (!cols.has(createdAtCol)) {
161
+ await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
162
+ }
163
+ } catch (_) {
164
+ }
165
+ }
166
+
118
167
  async fileExists(sha256, size, lastModified) {
119
168
  const { clientInstance } = this.config;
120
169
  const payload = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
@@ -52,6 +52,16 @@ class QdrantVectorStore extends VectorStore {
52
52
  }
53
53
  return out;
54
54
  }
55
+ async fileExists(sha256, size, lastModified) {
56
+ const filter = this.normalizeFilter({ fileSHA256: sha256, fileSize: size, lastModified });
57
+ try {
58
+ const res = await this.client.scroll(this.collection, { limit: 1, filter });
59
+ const points = res?.points || res?.result?.points || [];
60
+ return points.length > 0;
61
+ } catch (_) {
62
+ return false;
63
+ }
64
+ }
55
65
  async deleteDocuments({ ids = null, filter = null } = {}) {
56
66
  if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Qdrant client');
57
67
  if (Array.isArray(ids) && ids.length > 0) {
package/src/config.js CHANGED
@@ -87,8 +87,12 @@ const RAGConfigSchema = z.object({
87
87
  chunking: ChunkingConfigSchema.default({}),
88
88
  retrieval: RetrievalConfigSchema.default({}),
89
89
  reranking: RerankingConfigSchema.default({}),
90
+ sessionType: z.enum(['cli', 'api', 'chat']).default('api'),
90
91
  metadata: z.object({ enrichment: z.boolean().default(false) }).optional(),
91
92
  ingestion: z.object({ rateLimitEnabled: z.boolean().default(false), concurrencyLimit: z.number().default(5) }).optional(),
93
+ telemetry: z.object({
94
+ enabled: z.boolean().default(true),
95
+ }).default({ enabled: true }),
92
96
  memory: z.object({
93
97
  enabled: z.boolean().default(false),
94
98
  type: z.enum(['in-memory','redis','postgres']).default('in-memory'),
package/src/core.js CHANGED
@@ -19,6 +19,7 @@ const { OllamaBackend } = require('./backends/ollama');
19
19
  const { v5: uuidv5 } = require('uuid');
20
20
  const { v4: uuidv4 } = require('uuid');
21
21
  const SQLiteLogger = require('./observability');
22
+ const telemetry = require('./telemetry');
22
23
 
23
24
  const DEFAULT_TOKEN_BUDGET = 2048;
24
25
  const DEFAULT_PREFER_SUMMARY_BELOW = 1024;
@@ -37,6 +38,17 @@ class VectraClient {
37
38
  const parsed = RAGConfigSchema.parse(config);
38
39
  this.config = parsed;
39
40
  this.callbacks = config.callbacks || [];
41
+
42
+ // Initialize telemetry
43
+ telemetry.init(this.config);
44
+ telemetry.track('sdk_initialized', {
45
+ vector_store: this.config.database.type,
46
+ embedding_provider: this.config.embedding.provider,
47
+ llm_provider: this.config.llm.provider,
48
+ observability_enabled: !!(this.config.observability && this.config.observability.enabled),
49
+ memory_enabled: !!(this.config.memory && this.config.memory.enabled),
50
+ session_type: this.config.sessionType
51
+ });
40
52
 
41
53
  // Initialize observability
42
54
  this.logger = (this.config.observability && this.config.observability.enabled)
@@ -294,6 +306,13 @@ class VectraClient {
294
306
  try {
295
307
  const stats = await fs.promises.stat(filePath);
296
308
 
309
+ telemetry.track('ingest_started', {
310
+ source_type: stats.isDirectory() ? 'directory' : 'file',
311
+ file_types: stats.isDirectory() ? [] : [path.extname(filePath).replace('.', '')],
312
+ chunking_strategy: this.config.chunking.strategy,
313
+ metadata_enrichment: this._metadataEnrichmentEnabled
314
+ });
315
+
297
316
  if (stats.isDirectory()) {
298
317
  await this._processDirectory(filePath);
299
318
  return;
@@ -351,6 +370,15 @@ class VectraClient {
351
370
  const durationMs = Date.now() - t0;
352
371
  this.trigger('onIngestEnd', filePath, chunks.length, durationMs);
353
372
 
373
+ const chunkCountBucket = chunks.length < 50 ? '1-50' : chunks.length < 200 ? '50-200' : '200+';
374
+ const durationBucket = durationMs < 1000 ? '0-1s' : durationMs < 5000 ? '1-5s' : '5s+';
375
+
376
+ telemetry.track('ingest_completed', {
377
+ chunk_count_bucket: chunkCountBucket,
378
+ duration_ms_bucket: durationBucket,
379
+ cached_embeddings: false
380
+ });
381
+
354
382
  this.logger.logTrace({
355
383
  traceId,
356
384
  spanId: rootSpanId,
@@ -366,6 +394,10 @@ class VectraClient {
366
394
  this.logger.logMetric({ name: 'ingest_latency', value: durationMs, tags: { type: 'single_file' } });
367
395
 
368
396
  } catch (e) {
397
+ telemetry.track('error_occurred', {
398
+ stage: 'ingestion',
399
+ error_type: e.name || 'unknown'
400
+ });
369
401
  this.trigger('onError', e);
370
402
  this.logger.logTrace({
371
403
  traceId,
@@ -604,6 +636,15 @@ class VectraClient {
604
636
  const retrievalMs = Date.now() - tRetrieval;
605
637
  this.trigger('onRetrievalEnd', docs.length, retrievalMs);
606
638
 
639
+ telemetry.track('query_executed', {
640
+ query_mode: 'rag',
641
+ retrieval_strategy: strategy,
642
+ reranking_enabled: !!(this.config.reranking && this.config.reranking.enabled),
643
+ streaming: stream,
644
+ memory_used: !!(this.history && sessionId),
645
+ result_count: docs.length
646
+ });
647
+
607
648
  this.logger.logTrace({
608
649
  traceId,
609
650
  spanId: uuidv4(),
@@ -801,6 +842,10 @@ class VectraClient {
801
842
  return { answer, sources: docs.map(d => d.metadata) };
802
843
  }
803
844
  } catch (e) {
845
+ telemetry.track('error_occurred', {
846
+ stage: 'retrieval_or_generation',
847
+ error_type: e.name || 'unknown'
848
+ });
804
849
  this.trigger('onError', e);
805
850
  this.logger.logTrace({
806
851
  traceId,
@@ -819,6 +864,11 @@ class VectraClient {
819
864
  }
820
865
 
821
866
  async evaluate(testSet) {
867
+ const bucket = testSet.length < 5 ? '1-5' : testSet.length < 20 ? '5-20' : '20+';
868
+ telemetry.track('evaluation_run', {
869
+ dataset_size_bucket: bucket
870
+ });
871
+
822
872
  const report = [];
823
873
  for (const item of testSet) {
824
874
  const res = await this.queryRAG(item.question);
@@ -0,0 +1,145 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const os = require('os');
4
+ const { v4: uuidv4 } = require('uuid');
5
+ const packageJson = require('../package.json');
6
+
7
+ const TELEMETRY_DIR = path.join(os.homedir(), '.vectra');
8
+ const TELEMETRY_FILE = path.join(TELEMETRY_DIR, 'telemetry.json');
9
+
10
+ const BATCH_SIZE = 10;
11
+ const FLUSH_INTERVAL_MS = 60_000;
12
+
13
+ const API_ENDPOINT =
14
+ process.env.VECTRA_TELEMETRY_ENDPOINT ||
15
+ 'https://thwcefdrkimerqztvfjj.supabase.co/functions/v1/vectra-collect';
16
+
17
+
18
+ class TelemetryManager {
19
+ constructor() {
20
+ this.distinctId = null;
21
+ this.queue = [];
22
+ this.timer = null;
23
+ this.enabled = true;
24
+ this.initialized = false;
25
+
26
+ this.globalProperties = {
27
+ sdk: 'vectra-node',
28
+ sdk_version: packageJson.version,
29
+ language: 'node',
30
+ runtime: `node-${process.version}`,
31
+ os: process.platform,
32
+ ci: !!process.env.CI,
33
+ telemetry_version: 1,
34
+ };
35
+ }
36
+
37
+ init(config = {}) {
38
+ if (this.initialized) return;
39
+
40
+ if (config.telemetry?.enabled === false) {
41
+ this.enabled = false;
42
+ return;
43
+ }
44
+
45
+ if (
46
+ process.env.VECTRA_TELEMETRY_DISABLED === '1' ||
47
+ process.env.DO_NOT_TRACK === '1'
48
+ ) {
49
+ this.enabled = false;
50
+ return;
51
+ }
52
+
53
+ this._loadIdentity();
54
+ this._startFlushTimer();
55
+ this.initialized = true;
56
+ }
57
+
58
+ _loadIdentity() {
59
+ try {
60
+ if (!fs.existsSync(TELEMETRY_DIR)) {
61
+ fs.mkdirSync(TELEMETRY_DIR, { recursive: true });
62
+ }
63
+
64
+ if (fs.existsSync(TELEMETRY_FILE)) {
65
+ const data = JSON.parse(fs.readFileSync(TELEMETRY_FILE, 'utf8'));
66
+ if (data.distinct_id) {
67
+ this.distinctId = data.distinct_id;
68
+ return;
69
+ }
70
+ }
71
+
72
+ this.distinctId = `anon_${uuidv4()}`;
73
+ fs.writeFileSync(
74
+ TELEMETRY_FILE,
75
+ JSON.stringify({ distinct_id: this.distinctId }, null, 2)
76
+ );
77
+ } catch {
78
+ this.enabled = false;
79
+ }
80
+ }
81
+
82
+ track(event, properties = {}) {
83
+ if (!this.enabled || !this.distinctId) return;
84
+
85
+ this.queue.push({
86
+ event,
87
+ distinct_id: this.distinctId,
88
+ timestamp: new Date().toISOString(),
89
+ properties: {
90
+ ...this.globalProperties,
91
+ ...properties,
92
+ },
93
+ });
94
+
95
+ if (this.queue.length >= BATCH_SIZE) {
96
+ setImmediate(() => this.flush());
97
+ }
98
+ }
99
+
100
+
101
+ async flush() {
102
+ if (!this.enabled || this.queue.length === 0) return;
103
+
104
+ const batch = this.queue.splice(0, this.queue.length);
105
+
106
+ if (!global.fetch) {
107
+ if (process.env.VECTRA_TELEMETRY_DEBUG) {
108
+ console.log('Telemetry batch (debug):', batch);
109
+ }
110
+ return;
111
+ }
112
+
113
+ try {
114
+ await fetch(API_ENDPOINT, {
115
+ method: 'POST',
116
+ headers: {
117
+ 'Content-Type': 'application/json'
118
+ },
119
+ body: JSON.stringify(batch),
120
+ signal: AbortSignal.timeout(6000),
121
+ });
122
+
123
+ if (process.env.VECTRA_TELEMETRY_DEBUG) {
124
+ console.log('Telemetry batch flushed');
125
+ }
126
+ } catch (err) {
127
+ if (process.env.VECTRA_TELEMETRY_DEBUG) {
128
+ console.error('Telemetry flush failed:', err);
129
+ }
130
+ // Drop on error (OSS-safe choice)
131
+ }
132
+ }
133
+
134
+ _startFlushTimer() {
135
+ if (this.timer) clearInterval(this.timer);
136
+ this.timer = setInterval(() => this.flush(), FLUSH_INTERVAL_MS);
137
+ }
138
+
139
+ shutdown() {
140
+ if (this.timer) clearInterval(this.timer);
141
+ return this.flush();
142
+ }
143
+ }
144
+
145
+ module.exports = new TelemetryManager();
@@ -2,6 +2,7 @@ const http = require('http');
2
2
  const fs = require('fs');
3
3
  const path = require('path');
4
4
  const { ProviderType, ChunkingStrategy, RetrievalStrategy } = require('./config');
5
+ const telemetry = require('./telemetry');
5
6
  const sqlite3 = require('sqlite3').verbose();
6
7
 
7
8
 
@@ -91,6 +92,14 @@ function serveStatic(res, filePath, contentType) {
91
92
  function start(configPath, mode = 'webconfig', port = 8766, openInBrowser = true) {
92
93
  const absConfigPath = path.resolve(configPath);
93
94
 
95
+ // Init telemetry
96
+ let cfg = {};
97
+ try {
98
+ if (fs.existsSync(absConfigPath)) cfg = JSON.parse(fs.readFileSync(absConfigPath, 'utf-8'));
99
+ } catch (_) {}
100
+ telemetry.init(cfg);
101
+ telemetry.track('feature_used', { feature: mode }); // mode is 'webconfig' or 'dashboard'
102
+
94
103
  const createServer = (currentPort) => {
95
104
  const server = http.createServer((req, res) => {
96
105
  const sendJson = (status, obj) => {