npm - vectra-js - Versions diffs - 0.9.8 → 0.9.12 - Mend

vectra-js 0.9.8 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +144 -21
package/bin/vectra.js +34 -5
package/package.json +2 -2
package/src/backends/milvus_store.js +22 -0
package/src/backends/postgres_store.js +94 -3
package/src/backends/prisma_store.js +50 -1
package/src/backends/qdrant_store.js +10 -0
package/src/config.js +4 -0
package/src/core.js +50 -0
package/src/telemetry.js +145 -0
package/src/webconfig_server.js +9 -0

package/README.md CHANGED Viewed

@@ -40,11 +40,12 @@ If you find this project useful, consider supporting it:<br>
   * [WebConfig (Config Generator UI)](#webconfig-config-generator-ui)
   * [Observability Dashboard](#observability-dashboard)
 * [13. Observability & Callbacks](#13-observability--callbacks)
-* [14. Database Schemas & Indexing](#14-database-schemas--indexing)
-* [15. Extending Vectra](#15-extending-vectra)
-* [16. Architecture Overview](#16-architecture-overview)
-* [17. Development & Contribution Guide](#17-development--contribution-guide)
-* [18. Production Best Practices](#18-production-best-practices)
+* [14. Telemetry](#14-telemetry)
+* [15. Database Schemas & Indexing](#15-database-schemas--indexing)
+* [16. Extending Vectra](#16-extending-vectra)
+* [17. Architecture Overview](#17-architecture-overview)
+* [18. Development & Contribution Guide](#18-development--contribution-guide)
+* [19. Production Best Practices](#19-production-best-practices)
 ---
@@ -127,15 +128,19 @@ Every major subsystem (providers, vector stores, callbacks) is interface‑drive
 ### Library
 ```bash
-npm install vectra-js @prisma/client
+npm install vectra-js
 # or
-pnpm add vectra-js @prisma/client
+pnpm add vectra-js
 ```
-Optional backends:
+Backends:
 ```bash
-npm install chromadb
+npm install pg                    # https://node-postgres.com/
+npm install @prisma/client       # https://prisma.io/docs
+npm install chromadb              # https://docs.trychroma.com/
+npm install qdrant-client         # https://qdrant.tech/documentation/
+npm install pymilvus              # https://milvus.io/docs/
 ```
 ### CLI
@@ -152,6 +157,11 @@ pnpm add -g vectra-js
 ```js
 const { VectraClient, ProviderType } = require('vectra-js');
+const { Pool } = require('pg');
+const pool = new Pool({
+  connectionString: process.env.DATABASE_URL
+});
 const client = new VectraClient({
   embedding: {
@@ -162,12 +172,13 @@ const client = new VectraClient({
   llm: {
     provider: ProviderType.GEMINI,
     apiKey: process.env.GOOGLE_API_KEY,
-    modelName: 'gemini-1.5-pro-latest'
+    modelName: 'gemini-2.5-flash'
   },
   database: {
-    type: 'prisma',
-    clientInstance: prisma,
-    tableName: 'Document'
+    type: 'postgres',
+    clientInstance: pool,
+    tableName: 'document',
+    columnMap: { 'content': 'content', 'metadata': 'metadata', 'vector': 'vector' }
   }
 });
@@ -240,7 +251,7 @@ Use `dimensions` when using pgvector to avoid runtime mismatches.
 llm: {
   provider: ProviderType.GEMINI,
   apiKey: process.env.GOOGLE_API_KEY,
-  modelName: 'gemini-1.5-pro-latest',
+  modelName: 'gemini-2.5-flash',
   temperature: 0.3,
   maxTokens: 1024
 }
@@ -257,15 +268,54 @@ Used for:
 ### Database
+Supports Prisma, Postgres (native), Chroma, Qdrant, Milvus.
 ```js
+// PostgreSQL (native pg)
+database: {
+  type: 'postgres',
+  clientInstance: pool, // new Pool(...)
+  tableName: 'document',
+  columnMap: { content: 'content', metadata: 'metadata', vector: 'vector' }
+}
+```
+```js
+// Prisma
 database: {
   type: 'prisma',
   clientInstance: prisma,
-  tableName: 'Document'
+  tableName: 'Document',
+  columnMap: { content: 'content', metadata: 'metadata', vector: 'embedding' }
+}
+```
+```js
+// ChromaDB
+database: {
+  type: 'chroma',
+  clientInstance: chromaClient,
+  collectionName: 'rag_collection'
 }
 ```
-Supports Prisma, Chroma, Qdrant, Milvus.
+```js
+// Qdrant
+database: {
+  type: 'qdrant',
+  clientInstance: qdrantClient,
+  collectionName: 'rag_collection'
+}
+```
+```js
+// Milvus
+database: {
+  type: 'milvus',
+  clientInstance: milvusClient,
+  collectionName: 'rag_collection'
+}
+```
 ---
@@ -324,6 +374,38 @@ memory: { enabled: true, type: 'in-memory', maxMessages: 20 }
 Redis and Postgres are supported.
+```js
+// Redis
+memory: {
+  enabled: true,
+  type: 'redis',
+  maxMessages: 20,
+  redis: {
+    clientInstance: redisClient,
+    keyPrefix: 'vectra:chat:'
+  }
+}
+```
+```js
+// Postgres
+memory: {
+  enabled: true,
+  type: 'postgres',
+  maxMessages: 20,
+  postgres: {
+    clientInstance: pool, // pg Pool
+    tableName: 'ChatMessage',
+    columnMap: {
+      sessionId: 'sessionId',
+      role: 'role',
+      content: 'content',
+      createdAt: 'createdAt'
+    }
+  }
+}
+```
 ---
 ### Observability
@@ -455,7 +537,48 @@ Lifecycle hooks:
 ---
-## 14. Database Schemas & Indexing
+## 14. Telemetry
+Vectra collects anonymous usage data to help us improve the SDK, prioritize features, and detect broken versions.
+### What we track
+* **Identity**: A random UUID (`distinct_id`) stored locally in `~/.vectra/telemetry.json`. **No PII, emails, IPs, or hostnames.**
+* **Events**:
+    * `sdk_initialized`: Config shape (providers used), OS/Runtime version, session type (api/cli/chat).
+    * `ingest_started/completed`: Source type, chunking strategy, duration bucket, chunk count bucket.
+    * `query_executed`: Retrieval strategy, query mode (rag), result count, latency bucket.
+    * `feature_used`: WebConfig/Dashboard usage.
+    * `evaluation_run`: Dataset size bucket.
+    * `error_occurred`: Error type and stage (no stack traces).
+    * `cli_command_used`: Command name and flags.
+### Why we track it
+* **Detect broken versions**: Spikes in `error_occurred` help us find bugs.
+* **Measure adoption**: Helps us understand which providers (OpenAI vs Gemini) and vector stores are most popular.
+* **Drop support safely**: We can see if anyone is still using Node 18 before dropping it.
+### How to opt-out
+Telemetry is **enabled by default**. To disable it:
+**Option 1: Config**
+```js
+const client = new VectraClient({
+  // ...
+  telemetry: { enabled: false }
+});
+```
+**Option 2: Environment Variable**
+Set `VECTRA_TELEMETRY_DISABLED=1` or `DO_NOT_TRACK=1`.
+---
+## 15. Database Schemas & Indexing
 ```prisma
 model Document {
@@ -469,7 +592,7 @@ model Document {
 ---
-## 15. Extending Vectra
+## 16. Extending Vectra
 ### Custom Vector Store
@@ -482,7 +605,7 @@ class MyStore extends VectorStore {
 ---
-## 16. Architecture Overview
+## 17. Architecture Overview
 * `VectraClient`: orchestrator
 * Typed config schema
@@ -491,7 +614,7 @@ class MyStore extends VectorStore {
 ---
-## 17. Development & Contribution Guide
+## 18. Development & Contribution Guide
 * Node.js 18+
 * pnpm recommended
@@ -499,7 +622,7 @@ class MyStore extends VectorStore {
 ---
-## 18. Production Best Practices
+## 19. Production Best Practices
 * Match embedding dimensions to pgvector
 * Prefer HYBRID retrieval

package/bin/vectra.js CHANGED Viewed

@@ -1,5 +1,6 @@
 #!/usr/bin/env node
 const { start: startWebConfig } = require('../src/webconfig_server');
+const telemetry = require('../src/telemetry');
 const fs = require('fs');
 const path = require('path');
@@ -25,31 +26,49 @@ async function run() {
       target = arg;
     }
   }
+  // Load config for telemetry init if possible
+  let cfg = null;
+  try {
+      const p = configPath ? path.resolve(configPath) : path.join(process.cwd(), 'vectra-config.json');
+      if (fs.existsSync(p)) cfg = JSON.parse(fs.readFileSync(p, 'utf-8'));
+  } catch (_) {}
+  telemetry.init(cfg || {});
+  if (cmd) {
+      telemetry.track('cli_command_used', {
+          command: cmd,
+          flags: stream ? ['--stream'] : []
+      });
+  }
   if (cmd === 'webconfig') {
       const cfgPath = configPath || path.join(process.cwd(), 'vectra-config.json');
       startWebConfig(cfgPath, 'webconfig');
+      await telemetry.flush();
       return;
   }
   if (cmd === 'dashboard') {
       const cfgPath = configPath || path.join(process.cwd(), 'vectra-config.json');
       startWebConfig(cfgPath, 'dashboard');
+      await telemetry.flush();
       return;
   }
   if (!cmd || (!target && cmd !== 'webconfig' && cmd !== 'dashboard')) {
     console.error('Usage: vectra <ingest|query|webconfig|dashboard> <path|text> [--config=path] [--stream]');
+    await telemetry.flush();
     process.exit(1);
   }
   // Lazy load VectraClient to avoid overhead when just running help or webconfig
   const { VectraClient } = require('..');
-  let cfg = null;
-  if (configPath) {
+  // Re-load config if we just did a quick check earlier
+  if (configPath && !cfg) {
     cfg = JSON.parse(fs.readFileSync(path.resolve(configPath), 'utf-8'));
-  } else {
+  } else if (!cfg) {
     // Fallback to test config if exists, or null
     try {
       cfg = require(path.resolve(process.cwd(), 'nodejs-test/index.js')).config;
@@ -57,7 +76,11 @@ async function run() {
       cfg = null;
     }
   }
+  // VectraClient will re-init telemetry but that's fine (idempotent)
+  if (cfg) {
+      cfg.sessionType = 'cli';
+  }
   const client = new VectraClient(cfg);
   if (cmd === 'ingest') {
     await client.ingestDocuments(path.resolve(process.cwd(), target));
@@ -75,8 +98,14 @@ async function run() {
     }
   } else {
     console.error('Unknown command');
+    await telemetry.flush();
     process.exit(1);
   }
+  await telemetry.flush();
 }
-run().catch(e => { console.error(e && e.message ? e.message : String(e)); process.exit(1); });
+run().catch(async e => {
+    console.error(e && e.message ? e.message : String(e));
+    try { await telemetry.flush(); } catch {}
+    process.exit(1);
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "vectra-js",
-  "version": "0.9.8",
+  "version": "0.9.12",
   "description": "A production-ready, provider-agnostic Node.js SDK for End-to-End RAG pipelines.",
   "main": "index.js",
   "scripts": {
@@ -34,7 +34,7 @@
   "author": "Abhishek N",
   "license": "GPL-3.0",
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.20.9",
+    "@anthropic-ai/sdk": "^0.71.2",
     "@google/genai": "^1.34.0",
     "dotenv": "^16.6.1",
     "mammoth": "^1.11.0",

package/src/backends/milvus_store.js CHANGED Viewed

@@ -45,6 +45,28 @@ class MilvusVectorStore extends VectorStore {
     return rows.map((r) => ({ id: r.id, content: r.content || '', metadata: r.metadata ? JSON.parse(r.metadata) : {} }));
   }
+  async fileExists(sha256, size, lastModified) {
+    if (typeof this.client.query !== 'function') return false;
+    try {
+      const expr = '';
+      const res = await this.client.query({
+        collection_name: this.collection,
+        expr,
+        output_fields: ['content', 'metadata'],
+        limit: 1
+      });
+      const rows = Array.isArray(res) ? res : (res?.data || res?.results || []);
+      return rows.some((r) => {
+        try {
+          const m = r.metadata ? JSON.parse(r.metadata) : {};
+          return m.fileSHA256 === sha256 && m.fileSize === size && m.lastModified === lastModified;
+        } catch (_) { return false; }
+      });
+    } catch (_) {
+      return false;
+    }
+  }
   async deleteDocuments({ ids = null, filter = null } = {}) {
     if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Milvus client');
     if (Array.isArray(ids) && ids.length > 0) {

package/src/backends/postgres_store.js CHANGED Viewed

@@ -24,9 +24,11 @@ class PostgresVectorStore extends VectorStore {
     const tableName = config.tableName || 'document';
     const columnMap = config.columnMap || {};
     this._table = quoteTableName(tableName, 'tableName');
+    this._tableBase = tableName.split('.').pop();
     this._cContent = quoteIdentifier(columnMap.content || 'content', 'columnMap.content');
     this._cMeta = quoteIdentifier(columnMap.metadata || 'metadata', 'columnMap.metadata');
     this._cVec = quoteIdentifier(columnMap.vector || 'vector', 'columnMap.vector');
+    this._cCreatedAt = '"createdAt"';
     // We expect config.clientInstance to be a pg.Pool or pg.Client
     if (!this.config.clientInstance) {
@@ -45,7 +47,34 @@ class PostgresVectorStore extends VectorStore {
     // Enable pgvector extension
     await this.client.query('CREATE EXTENSION IF NOT EXISTS vector');
-    // Create table if not exists
+    // Detect existing column type to avoid malformed array issues
+    try {
+      const typeCheck = await this.client.query(
+        `SELECT data_type, udt_name
+         FROM information_schema.columns
+         WHERE table_name = $1 AND column_name = $2`,
+        [this._tableBase, this._cVec.replace(/"/g, '')]
+      );
+      const row = typeCheck.rows[0];
+      if (row) {
+        const isPgVector = row.udt_name === 'vector';
+        const isArray = row.data_type && row.data_type.toLowerCase().includes('array');
+        if (isArray && !isPgVector) {
+          throw new Error(
+            'Postgres schema mismatch: vector column is double precision[] (array). ' +
+            'Use pgvector type: vector(<dimensions>). ' +
+            'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(1536);'
+          );
+        }
+      }
+    } catch (e) {
+      // Only throw if we explicitly detected array type; otherwise continue
+      if (String(e.message || e).includes('schema mismatch')) {
+        throw e;
+      }
+    }
+    // Create table if not exists (best-effort)
     // Note: We need to know vector dimensions. We'll try to guess or use default 1536
     // If embedding dimensions are provided in config, use them
     // But store config usually doesn't have embedding config directly unless passed down
@@ -65,6 +94,47 @@ class PostgresVectorStore extends VectorStore {
     `;
     await this.client.query(createTableQuery);
+    // Ensure required columns exist (non-destructive)
+    try {
+      const res = await this.client.query(
+        `SELECT column_name, data_type, udt_name
+         FROM information_schema.columns
+         WHERE table_name = $1`,
+        [this._tableBase]
+      );
+      const cols = new Map(res.rows.map(r => [r.column_name, r]));
+      const contentCol = this._cContent.replace(/"/g, '');
+      const metaCol = this._cMeta.replace(/"/g, '');
+      const vecCol = this._cVec.replace(/"/g, '');
+      const createdAtCol = this._cCreatedAt.replace(/"/g, '');
+      if (!cols.has(contentCol)) {
+        await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
+      }
+      if (!cols.has(metaCol)) {
+        await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
+      }
+      if (!cols.has(vecCol)) {
+        await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
+      } else {
+        const vinfo = cols.get(vecCol);
+        const isPgVector = vinfo && vinfo.udt_name === 'vector';
+        const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
+        if (isArray && !isPgVector) {
+          throw new Error(
+            'Postgres schema mismatch: vector column is double precision[] (array). ' +
+            'Use pgvector type: vector(' + dim + '). ' +
+            'Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
+          );
+        }
+      }
+      if (!cols.has(createdAtCol)) {
+        await this.client.query(`ALTER TABLE ${this._table} ADD COLUMN ${this._cCreatedAt} TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
+      }
+    } catch (_) {
+      // best-effort; ignore
+    }
     // Create HNSW index for faster search
     // checking if index exists is hard in raw sql cross-version,
     // simpler to CREATE INDEX IF NOT EXISTS which pg supports in recent versions
@@ -72,12 +142,17 @@ class PostgresVectorStore extends VectorStore {
     try {
         await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING hnsw (${this._cVec} vector_cosine_ops)`);
     } catch (e) {
-        console.warn('Could not create vector index (might be fine if not supported):', e.message);
+        // Fallback to ivfflat when hnsw not supported
+        try {
+          await this.client.query(`CREATE INDEX IF NOT EXISTS "${this._table.replace(/"/g, '')}_vec_idx" ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops)`);
+        } catch (e2) {
+          console.warn('Could not create vector index (might be fine if not supported):', e.message);
+        }
     }
   }
   async addDocuments(docs) {
-    const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW())`;
+    const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4, NOW()) ON CONFLICT ("id") DO NOTHING`;
     for (const doc of docs) {
       const id = doc.id || uuidv4();
@@ -186,6 +261,22 @@ class PostgresVectorStore extends VectorStore {
     return Object.values(combined).sort((a, b) => b.score - a.score).slice(0, limit);
   }
+  async fileExists(sha256, size, lastModified) {
+    try {
+      const q = `
+        SELECT 1
+        FROM ${this._table}
+        WHERE ${this._cMeta} @> $1
+        LIMIT 1
+      `;
+      const metaFilter = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });
+      const res = await this.client.query(q, [metaFilter]);
+      return res.rowCount > 0;
+    } catch (_) {
+      return false;
+    }
+  }
 }
 module.exports = { PostgresVectorStore };

package/src/backends/prisma_store.js CHANGED Viewed

@@ -35,7 +35,7 @@ class PrismaVectorStore extends VectorStore {
   }
   async addDocuments(docs) {
     const { clientInstance } = this.config;
-    const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW())`;
+    const q = `INSERT INTO ${this._table} ("id", ${this._cContent}, ${this._cMeta}, ${this._cVec}, "createdAt") VALUES ($1, $2, $3, $4::vector, NOW()) ON CONFLICT ("id") DO NOTHING`;
     for (const doc of docs) {
         const id = doc.id || uuidv4();
         const vec = JSON.stringify(this.normalizeVector(doc.embedding));
@@ -108,6 +108,7 @@ class PrismaVectorStore extends VectorStore {
     const idxFts = `"${base}_content_fts_gin"`;
     try {
       await clientInstance.$executeRawUnsafe('CREATE EXTENSION IF NOT EXISTS vector');
+      await this._ensureColumns();
       await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxVec} ON ${this._table} USING ivfflat (${this._cVec} vector_cosine_ops) WITH (lists = 100);`);
       await clientInstance.$executeRawUnsafe(`CREATE INDEX IF NOT EXISTS ${idxFts} ON ${this._table} USING GIN (to_tsvector('english', ${this._cContent}));`);
     } catch (e) {
@@ -115,6 +116,54 @@ class PrismaVectorStore extends VectorStore {
     }
   }
+  async _ensureColumns() {
+    const { clientInstance } = this.config;
+    const dim = 1536;
+    const createTableQuery = `
+      CREATE TABLE IF NOT EXISTS ${this._table} (
+        "id" TEXT PRIMARY KEY,
+        ${this._cContent} TEXT,
+        ${this._cMeta} JSONB,
+        ${this._cVec} vector(${dim}),
+        "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+      )
+    `;
+    await clientInstance.$executeRawUnsafe(createTableQuery);
+    try {
+      const res = await clientInstance.$queryRawUnsafe(
+        `SELECT column_name, data_type, udt_name FROM information_schema.columns WHERE table_name = $1`,
+        this._tableBase
+      );
+      const cols = new Map(res.map(r => [r.column_name, r]));
+      const contentCol = this._cContent.replace(/"/g, '');
+      const metaCol = this._cMeta.replace(/"/g, '');
+      const vecCol = this._cVec.replace(/"/g, '');
+      const createdAtCol = 'createdAt';
+      if (!cols.has(contentCol)) {
+        await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cContent} TEXT`);
+      }
+      if (!cols.has(metaCol)) {
+        await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cMeta} JSONB`);
+      }
+      if (!cols.has(vecCol)) {
+        await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN ${this._cVec} vector(${dim})`);
+      } else {
+        const vinfo = cols.get(vecCol);
+        const isPgVector = vinfo && vinfo.udt_name === 'vector';
+        const isArray = vinfo && vinfo.data_type && vinfo.data_type.toLowerCase().includes('array');
+        if (isArray && !isPgVector) {
+          throw new Error(
+            'Postgres schema mismatch: vector column is double precision[] (array). Use pgvector type: vector(' + dim + '). Example: ALTER TABLE ' + this._table + ' ALTER COLUMN ' + this._cVec + ' TYPE vector(' + dim + ');'
+          );
+        }
+      }
+      if (!cols.has(createdAtCol)) {
+        await clientInstance.$executeRawUnsafe(`ALTER TABLE ${this._table} ADD COLUMN "createdAt" TIMESTAMP WITH TIME ZONE DEFAULT NOW()`);
+      }
+    } catch (_) {
+    }
+  }
   async fileExists(sha256, size, lastModified) {
     const { clientInstance } = this.config;
     const payload = JSON.stringify({ fileSHA256: sha256, fileSize: size, lastModified });

package/src/backends/qdrant_store.js CHANGED Viewed

@@ -52,6 +52,16 @@ class QdrantVectorStore extends VectorStore {
     }
     return out;
   }
+  async fileExists(sha256, size, lastModified) {
+    const filter = this.normalizeFilter({ fileSHA256: sha256, fileSize: size, lastModified });
+    try {
+      const res = await this.client.scroll(this.collection, { limit: 1, filter });
+      const points = res?.points || res?.result?.points || [];
+      return points.length > 0;
+    } catch (_) {
+      return false;
+    }
+  }
   async deleteDocuments({ ids = null, filter = null } = {}) {
     if (typeof this.client.delete !== 'function') throw new Error('deleteDocuments is not supported for this Qdrant client');
     if (Array.isArray(ids) && ids.length > 0) {

package/src/config.js CHANGED Viewed

@@ -87,8 +87,12 @@ const RAGConfigSchema = z.object({
   chunking: ChunkingConfigSchema.default({}),
   retrieval: RetrievalConfigSchema.default({}),
   reranking: RerankingConfigSchema.default({}),
+  sessionType: z.enum(['cli', 'api', 'chat']).default('api'),
   metadata: z.object({ enrichment: z.boolean().default(false) }).optional(),
   ingestion: z.object({ rateLimitEnabled: z.boolean().default(false), concurrencyLimit: z.number().default(5) }).optional(),
+  telemetry: z.object({
+    enabled: z.boolean().default(true),
+  }).default({ enabled: true }),
   memory: z.object({
     enabled: z.boolean().default(false),
     type: z.enum(['in-memory','redis','postgres']).default('in-memory'),

package/src/core.js CHANGED Viewed

@@ -19,6 +19,7 @@ const { OllamaBackend } = require('./backends/ollama');
 const { v5: uuidv5 } = require('uuid');
 const { v4: uuidv4 } = require('uuid');
 const SQLiteLogger = require('./observability');
+const telemetry = require('./telemetry');
 const DEFAULT_TOKEN_BUDGET = 2048;
 const DEFAULT_PREFER_SUMMARY_BELOW = 1024;
@@ -37,6 +38,17 @@ class VectraClient {
     const parsed = RAGConfigSchema.parse(config);
     this.config = parsed;
     this.callbacks = config.callbacks || [];
+    // Initialize telemetry
+    telemetry.init(this.config);
+    telemetry.track('sdk_initialized', {
+      vector_store: this.config.database.type,
+      embedding_provider: this.config.embedding.provider,
+      llm_provider: this.config.llm.provider,
+      observability_enabled: !!(this.config.observability && this.config.observability.enabled),
+      memory_enabled: !!(this.config.memory && this.config.memory.enabled),
+      session_type: this.config.sessionType
+    });
     // Initialize observability
     this.logger = (this.config.observability && this.config.observability.enabled)
@@ -294,6 +306,13 @@ class VectraClient {
     try {
       const stats = await fs.promises.stat(filePath);
+      telemetry.track('ingest_started', {
+        source_type: stats.isDirectory() ? 'directory' : 'file',
+        file_types: stats.isDirectory() ? [] : [path.extname(filePath).replace('.', '')],
+        chunking_strategy: this.config.chunking.strategy,
+        metadata_enrichment: this._metadataEnrichmentEnabled
+      });
       if (stats.isDirectory()) {
         await this._processDirectory(filePath);
         return;
@@ -351,6 +370,15 @@ class VectraClient {
       const durationMs = Date.now() - t0;
       this.trigger('onIngestEnd', filePath, chunks.length, durationMs);
+      const chunkCountBucket = chunks.length < 50 ? '1-50' : chunks.length < 200 ? '50-200' : '200+';
+      const durationBucket = durationMs < 1000 ? '0-1s' : durationMs < 5000 ? '1-5s' : '5s+';
+      telemetry.track('ingest_completed', {
+        chunk_count_bucket: chunkCountBucket,
+        duration_ms_bucket: durationBucket,
+        cached_embeddings: false
+      });
       this.logger.logTrace({
         traceId,
         spanId: rootSpanId,
@@ -366,6 +394,10 @@ class VectraClient {
       this.logger.logMetric({ name: 'ingest_latency', value: durationMs, tags: { type: 'single_file' } });
     } catch (e) {
+      telemetry.track('error_occurred', {
+        stage: 'ingestion',
+        error_type: e.name || 'unknown'
+      });
       this.trigger('onError', e);
       this.logger.logTrace({
         traceId,
@@ -604,6 +636,15 @@ class VectraClient {
         const retrievalMs = Date.now() - tRetrieval;
         this.trigger('onRetrievalEnd', docs.length, retrievalMs);
+        telemetry.track('query_executed', {
+           query_mode: 'rag',
+           retrieval_strategy: strategy,
+           reranking_enabled: !!(this.config.reranking && this.config.reranking.enabled),
+           streaming: stream,
+           memory_used: !!(this.history && sessionId),
+           result_count: docs.length
+        });
         this.logger.logTrace({
             traceId,
             spanId: uuidv4(),
@@ -801,6 +842,10 @@ class VectraClient {
             return { answer, sources: docs.map(d => d.metadata) };
         }
     } catch (e) {
+      telemetry.track('error_occurred', {
+        stage: 'retrieval_or_generation',
+        error_type: e.name || 'unknown'
+      });
       this.trigger('onError', e);
       this.logger.logTrace({
         traceId,
@@ -819,6 +864,11 @@ class VectraClient {
   }
   async evaluate(testSet) {
+    const bucket = testSet.length < 5 ? '1-5' : testSet.length < 20 ? '5-20' : '20+';
+    telemetry.track('evaluation_run', {
+      dataset_size_bucket: bucket
+    });
     const report = [];
     for (const item of testSet) {
       const res = await this.queryRAG(item.question);

package/src/telemetry.js ADDED Viewed

@@ -0,0 +1,145 @@
+const fs = require('fs');
+const path = require('path');
+const os = require('os');
+const { v4: uuidv4 } = require('uuid');
+const packageJson = require('../package.json');
+const TELEMETRY_DIR = path.join(os.homedir(), '.vectra');
+const TELEMETRY_FILE = path.join(TELEMETRY_DIR, 'telemetry.json');
+const BATCH_SIZE = 10;
+const FLUSH_INTERVAL_MS = 60_000;
+const API_ENDPOINT =
+  process.env.VECTRA_TELEMETRY_ENDPOINT ||
+  'https://thwcefdrkimerqztvfjj.supabase.co/functions/v1/vectra-collect';
+class TelemetryManager {
+  constructor() {
+    this.distinctId = null;
+    this.queue = [];
+    this.timer = null;
+    this.enabled = true;
+    this.initialized = false;
+    this.globalProperties = {
+      sdk: 'vectra-node',
+      sdk_version: packageJson.version,
+      language: 'node',
+      runtime: `node-${process.version}`,
+      os: process.platform,
+      ci: !!process.env.CI,
+      telemetry_version: 1,
+    };
+  }
+  init(config = {}) {
+    if (this.initialized) return;
+    if (config.telemetry?.enabled === false) {
+      this.enabled = false;
+      return;
+    }
+    if (
+      process.env.VECTRA_TELEMETRY_DISABLED === '1' ||
+      process.env.DO_NOT_TRACK === '1'
+    ) {
+      this.enabled = false;
+      return;
+    }
+    this._loadIdentity();
+    this._startFlushTimer();
+    this.initialized = true;
+  }
+  _loadIdentity() {
+    try {
+      if (!fs.existsSync(TELEMETRY_DIR)) {
+        fs.mkdirSync(TELEMETRY_DIR, { recursive: true });
+      }
+      if (fs.existsSync(TELEMETRY_FILE)) {
+        const data = JSON.parse(fs.readFileSync(TELEMETRY_FILE, 'utf8'));
+        if (data.distinct_id) {
+          this.distinctId = data.distinct_id;
+          return;
+        }
+      }
+      this.distinctId = `anon_${uuidv4()}`;
+      fs.writeFileSync(
+        TELEMETRY_FILE,
+        JSON.stringify({ distinct_id: this.distinctId }, null, 2)
+      );
+    } catch {
+      this.enabled = false;
+    }
+  }
+  track(event, properties = {}) {
+  if (!this.enabled || !this.distinctId) return;
+  this.queue.push({
+    event,
+    distinct_id: this.distinctId,
+    timestamp: new Date().toISOString(),
+    properties: {
+      ...this.globalProperties,
+      ...properties,
+    },
+  });
+  if (this.queue.length >= BATCH_SIZE) {
+    setImmediate(() => this.flush());
+  }
+}
+  async flush() {
+    if (!this.enabled || this.queue.length === 0) return;
+    const batch = this.queue.splice(0, this.queue.length);
+    if (!global.fetch) {
+      if (process.env.VECTRA_TELEMETRY_DEBUG) {
+        console.log('Telemetry batch (debug):', batch);
+      }
+      return;
+    }
+    try {
+      await fetch(API_ENDPOINT, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json'
+        },
+        body: JSON.stringify(batch),
+        signal: AbortSignal.timeout(6000),
+      });
+      if (process.env.VECTRA_TELEMETRY_DEBUG) {
+        console.log('Telemetry batch flushed');
+      }
+    } catch (err) {
+      if (process.env.VECTRA_TELEMETRY_DEBUG) {
+        console.error('Telemetry flush failed:', err);
+      }
+      // Drop on error (OSS-safe choice)
+    }
+  }
+  _startFlushTimer() {
+    if (this.timer) clearInterval(this.timer);
+    this.timer = setInterval(() => this.flush(), FLUSH_INTERVAL_MS);
+  }
+  shutdown() {
+    if (this.timer) clearInterval(this.timer);
+    return this.flush();
+  }
+}
+module.exports = new TelemetryManager();

package/src/webconfig_server.js CHANGED Viewed

@@ -2,6 +2,7 @@ const http = require('http');
 const fs = require('fs');
 const path = require('path');
 const { ProviderType, ChunkingStrategy, RetrievalStrategy } = require('./config');
+const telemetry = require('./telemetry');
 const sqlite3 = require('sqlite3').verbose();
@@ -91,6 +92,14 @@ function serveStatic(res, filePath, contentType) {
 function start(configPath, mode = 'webconfig', port = 8766, openInBrowser = true) {
   const absConfigPath = path.resolve(configPath);
+  // Init telemetry
+  let cfg = {};
+  try {
+      if (fs.existsSync(absConfigPath)) cfg = JSON.parse(fs.readFileSync(absConfigPath, 'utf-8'));
+  } catch (_) {}
+  telemetry.init(cfg);
+  telemetry.track('feature_used', { feature: mode }); // mode is 'webconfig' or 'dashboard'
   const createServer = (currentPort) => {
     const server = http.createServer((req, res) => {
       const sendJson = (status, obj) => {