@yesvara/svara 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -180,6 +180,7 @@ var Chunker = class {
180
180
  id: this.chunkId(document.id, index),
181
181
  documentId: document.id,
182
182
  content: content.trim(),
183
+ source: document.source,
183
184
  index,
184
185
  metadata: {
185
186
  ...document.metadata,
@@ -255,7 +256,258 @@ var Chunker = class {
255
256
  }
256
257
  };
257
258
 
259
+ // src/database/sqlite.ts
260
+ import path2 from "path";
261
+ import fs2 from "fs";
262
+
263
+ // src/database/schema.ts
264
+ var SCHEMA_VERSION = 1;
265
+ var CREATE_TABLES_SQL = `
266
+ -- Schema version tracking
267
+ CREATE TABLE IF NOT EXISTS svara_meta (
268
+ key TEXT PRIMARY KEY,
269
+ value TEXT NOT NULL
270
+ );
271
+
272
+ -- Conversation history persistence
273
+ CREATE TABLE IF NOT EXISTS svara_messages (
274
+ id TEXT PRIMARY KEY,
275
+ session_id TEXT NOT NULL,
276
+ role TEXT NOT NULL CHECK(role IN ('user', 'assistant', 'system', 'tool')),
277
+ content TEXT NOT NULL,
278
+ tool_call_id TEXT,
279
+ created_at INTEGER NOT NULL DEFAULT (unixepoch())
280
+ );
281
+
282
+ CREATE INDEX IF NOT EXISTS idx_messages_session
283
+ ON svara_messages (session_id, created_at);
284
+
285
+ -- User registry
286
+ CREATE TABLE IF NOT EXISTS svara_users (
287
+ id TEXT PRIMARY KEY,
288
+ email TEXT,
289
+ display_name TEXT,
290
+ first_seen INTEGER NOT NULL DEFAULT (unixepoch()),
291
+ last_seen INTEGER NOT NULL DEFAULT (unixepoch()),
292
+ metadata TEXT DEFAULT '{}'
293
+ );
294
+
295
+ CREATE INDEX IF NOT EXISTS idx_users_email
296
+ ON svara_users (email);
297
+
298
+ -- Session metadata
299
+ CREATE TABLE IF NOT EXISTS svara_sessions (
300
+ id TEXT PRIMARY KEY,
301
+ user_id TEXT NOT NULL,
302
+ channel TEXT NOT NULL,
303
+ created_at INTEGER NOT NULL DEFAULT (unixepoch()),
304
+ updated_at INTEGER NOT NULL DEFAULT (unixepoch()),
305
+ metadata TEXT DEFAULT '{}',
306
+ FOREIGN KEY (user_id) REFERENCES svara_users(id)
307
+ );
308
+
309
+ CREATE INDEX IF NOT EXISTS idx_sessions_user
310
+ ON svara_sessions (user_id);
311
+
312
+ -- Vector store chunks for RAG (per agent)
313
+ CREATE TABLE IF NOT EXISTS svara_chunks (
314
+ id TEXT PRIMARY KEY,
315
+ agent_name TEXT NOT NULL, -- Separate RAG per agent
316
+ document_id TEXT NOT NULL,
317
+ content TEXT NOT NULL,
318
+ content_hash TEXT NOT NULL, -- MD5 hash of content for deduplication
319
+ chunk_index INTEGER NOT NULL,
320
+ embedding TEXT, -- stored as JSON string of float array
321
+ source TEXT NOT NULL,
322
+ metadata TEXT DEFAULT '{}',
323
+ created_at INTEGER NOT NULL DEFAULT (unixepoch())
324
+ );
325
+
326
+ CREATE INDEX IF NOT EXISTS idx_chunks_agent
327
+ ON svara_chunks (agent_name);
328
+
329
+ CREATE INDEX IF NOT EXISTS idx_chunks_agent_document
330
+ ON svara_chunks (agent_name, document_id);
331
+
332
+ CREATE INDEX IF NOT EXISTS idx_chunks_content_hash
333
+ ON svara_chunks (content_hash);
334
+
335
+ -- Document registry
336
+ CREATE TABLE IF NOT EXISTS svara_documents (
337
+ id TEXT PRIMARY KEY,
338
+ source TEXT NOT NULL UNIQUE,
339
+ type TEXT NOT NULL,
340
+ size INTEGER,
341
+ hash TEXT,
342
+ indexed_at INTEGER NOT NULL DEFAULT (unixepoch()),
343
+ metadata TEXT DEFAULT '{}'
344
+ );
345
+
346
+ -- Key-value store for arbitrary agent state
347
+ CREATE TABLE IF NOT EXISTS svara_kv (
348
+ key TEXT PRIMARY KEY,
349
+ value TEXT NOT NULL,
350
+ expires_at INTEGER, -- unix timestamp, NULL = no expiry
351
+ updated_at INTEGER NOT NULL DEFAULT (unixepoch())
352
+ );
353
+ `;
354
+ var INSERT_META_SQL = `
355
+ INSERT OR REPLACE INTO svara_meta (key, value)
356
+ VALUES ('schema_version', ?), ('created_at', ?);
357
+ `;
358
+
359
+ // src/database/sqlite.ts
360
+ var KVStore = class {
361
+ constructor(db) {
362
+ this.db = db;
363
+ }
364
+ db;
365
+ /** Set a key-value pair, with optional TTL in seconds. */
366
+ set(key, value, ttlSeconds) {
367
+ const expiresAt = ttlSeconds ? Math.floor(Date.now() / 1e3) + ttlSeconds : null;
368
+ this.db.prepare(`
369
+ INSERT OR REPLACE INTO svara_kv (key, value, expires_at, updated_at)
370
+ VALUES (?, ?, ?, unixepoch())
371
+ `).run(key, JSON.stringify(value), expiresAt);
372
+ }
373
+ /** Get a value by key. Returns undefined if not found or expired. */
374
+ get(key) {
375
+ const row = this.db.prepare(`
376
+ SELECT value, expires_at FROM svara_kv
377
+ WHERE key = ? AND (expires_at IS NULL OR expires_at > unixepoch())
378
+ `).get(key);
379
+ if (!row) return void 0;
380
+ return JSON.parse(row.value);
381
+ }
382
+ /** Delete a key. */
383
+ delete(key) {
384
+ this.db.prepare("DELETE FROM svara_kv WHERE key = ?").run(key);
385
+ }
386
+ /** Check if a key exists and is not expired. */
387
+ has(key) {
388
+ return this.get(key) !== void 0;
389
+ }
390
+ /** Get all keys matching a prefix. */
391
+ keys(prefix = "") {
392
+ const rows = this.db.prepare(`
393
+ SELECT key FROM svara_kv
394
+ WHERE key LIKE ? AND (expires_at IS NULL OR expires_at > unixepoch())
395
+ `).all(`${prefix}%`);
396
+ return rows.map((r) => r.key);
397
+ }
398
+ };
399
+ var SvaraDB = class {
400
+ db;
401
+ kv;
402
+ constructor(dbPath = ":memory:") {
403
+ if (dbPath !== ":memory:") {
404
+ fs2.mkdirSync(path2.dirname(path2.resolve(dbPath)), { recursive: true });
405
+ }
406
+ this.db = this.openDatabase(dbPath);
407
+ this.configure();
408
+ this.migrate();
409
+ this.kv = new KVStore(this.db);
410
+ }
411
+ // ─── Query Helpers ────────────────────────────────────────────────────────
412
+ /**
413
+ * Run a SELECT and return all matching rows.
414
+ */
415
+ query(sql, params = []) {
416
+ return this.db.prepare(sql).all(...params);
417
+ }
418
+ /**
419
+ * Run a SELECT and return the first matching row.
420
+ */
421
+ queryOne(sql, params = []) {
422
+ return this.db.prepare(sql).get(...params);
423
+ }
424
+ /**
425
+ * Run an INSERT/UPDATE/DELETE. Returns affected row count.
426
+ */
427
+ run(sql, params = []) {
428
+ return this.db.prepare(sql).run(...params).changes;
429
+ }
430
+ /**
431
+ * Execute raw SQL (for DDL, migrations, etc.).
432
+ */
433
+ exec(sql) {
434
+ this.db.exec(sql);
435
+ }
436
+ /**
437
+ * Run multiple operations in a single transaction.
438
+ *
439
+ * @example
440
+ * db.transaction(() => {
441
+ * db.run('INSERT INTO orders ...', [...]);
442
+ * db.run('UPDATE inventory ...', [...]);
443
+ * });
444
+ */
445
+ transaction(fn) {
446
+ return this.db.transaction(fn)();
447
+ }
448
+ /**
449
+ * Close the database connection.
450
+ */
451
+ close() {
452
+ this.db.close();
453
+ }
454
+ // ─── Internal Message Storage ─────────────────────────────────────────────
455
+ saveMessage(params) {
456
+ this.db.prepare(`
457
+ INSERT OR REPLACE INTO svara_messages (id, session_id, role, content, tool_call_id)
458
+ VALUES (?, ?, ?, ?, ?)
459
+ `).run(
460
+ params.id,
461
+ params.sessionId,
462
+ params.role,
463
+ params.content,
464
+ params.toolCallId ?? null
465
+ );
466
+ }
467
+ getMessages(sessionId, limit = 50) {
468
+ return this.db.prepare(`
469
+ SELECT id, role, content, tool_call_id, created_at
470
+ FROM svara_messages
471
+ WHERE session_id = ?
472
+ ORDER BY created_at ASC
473
+ LIMIT ?
474
+ `).all(sessionId, limit);
475
+ }
476
+ clearSession(sessionId) {
477
+ this.db.prepare("DELETE FROM svara_messages WHERE session_id = ?").run(sessionId);
478
+ }
479
+ // ─── Private Setup ────────────────────────────────────────────────────────
480
+ openDatabase(dbPath) {
481
+ try {
482
+ const Database = __require("better-sqlite3");
483
+ return new Database(dbPath);
484
+ } catch {
485
+ throw new Error(
486
+ '[SvaraJS] Database requires the "better-sqlite3" package.\nRun: npm install better-sqlite3'
487
+ );
488
+ }
489
+ }
490
+ configure() {
491
+ this.db.pragma("journal_mode = WAL");
492
+ this.db.pragma("synchronous = NORMAL");
493
+ this.db.pragma("foreign_keys = ON");
494
+ }
495
+ migrate() {
496
+ this.db.exec(CREATE_TABLES_SQL);
497
+ const meta = this.db.prepare(
498
+ "SELECT value FROM svara_meta WHERE key = 'schema_version'"
499
+ ).get();
500
+ if (!meta) {
501
+ this.db.prepare(INSERT_META_SQL).run(
502
+ String(SCHEMA_VERSION),
503
+ (/* @__PURE__ */ new Date()).toISOString()
504
+ );
505
+ }
506
+ }
507
+ };
508
+
258
509
  // src/rag/retriever.ts
510
+ import crypto3 from "crypto";
259
511
  var OpenAIEmbeddings = class {
260
512
  client;
261
513
  model;
@@ -310,25 +562,95 @@ var OllamaEmbeddings = class {
310
562
  return data.embedding;
311
563
  }
312
564
  };
313
- var InMemoryVectorStore = class {
314
- entries = [];
315
- add(chunk, embedding) {
316
- const existing = this.entries.findIndex((e) => e.chunk.id === chunk.id);
317
- if (existing >= 0) {
318
- this.entries[existing] = { chunk, embedding };
319
- } else {
320
- this.entries.push({ chunk, embedding });
321
- }
322
- }
323
- search(queryEmbedding, topK, threshold = 0) {
324
- const scored = this.entries.map((entry) => ({
325
- chunk: entry.chunk,
326
- score: cosineSimilarity(queryEmbedding, entry.embedding)
327
- }));
328
- return scored.filter((s) => s.score >= threshold).sort((a, b) => b.score - a.score).slice(0, topK).map((s) => s.chunk);
565
+ var VectorStore = class {
566
+ contentHash(content) {
567
+ return crypto3.createHash("md5").update(content).digest("hex");
329
568
  }
330
- get size() {
331
- return this.entries.length;
569
+ };
570
+ var PersistentVectorStore = class extends VectorStore {
571
+ constructor(db, agentName) {
572
+ super();
573
+ this.db = db;
574
+ this.agentName = agentName;
575
+ }
576
+ db;
577
+ agentName;
578
+ async add(chunk, embedding) {
579
+ const contentHash = this.contentHash(chunk.content);
580
+ const existing = this.db.query(
581
+ "SELECT id FROM svara_chunks WHERE agent_name = ? AND content_hash = ?",
582
+ [this.agentName, contentHash]
583
+ );
584
+ if (existing.length > 0) {
585
+ console.log(`[SvaraJS:RAG] Duplicate content detected for ${this.agentName}, skipping chunk ${chunk.id}`);
586
+ return;
587
+ }
588
+ const embeddingJson = JSON.stringify(embedding);
589
+ this.db.run(
590
+ `INSERT OR REPLACE INTO svara_chunks
591
+ (id, agent_name, document_id, content, content_hash, chunk_index, embedding, source, metadata)
592
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
593
+ [
594
+ chunk.id,
595
+ this.agentName,
596
+ chunk.documentId,
597
+ chunk.content,
598
+ contentHash,
599
+ chunk.index,
600
+ embeddingJson,
601
+ chunk.source,
602
+ JSON.stringify(chunk.metadata)
603
+ ]
604
+ );
605
+ }
606
+ async search(queryEmbedding, topK, threshold = 0) {
607
+ const rows = this.db.query(
608
+ "SELECT id, document_id, content, chunk_index, embedding, source, metadata FROM svara_chunks WHERE agent_name = ? ORDER BY id DESC",
609
+ [this.agentName]
610
+ );
611
+ const scored = rows.map((row) => {
612
+ const embedding = JSON.parse(row.embedding);
613
+ return {
614
+ chunk: {
615
+ id: row.id,
616
+ documentId: row.document_id,
617
+ content: row.content,
618
+ index: row.chunk_index,
619
+ source: row.source,
620
+ metadata: JSON.parse(row.metadata)
621
+ },
622
+ score: cosineSimilarity(queryEmbedding, embedding)
623
+ };
624
+ }).filter((s) => s.score >= threshold).sort((a, b) => b.score - a.score).slice(0, topK);
625
+ return scored.map((s) => s.chunk);
626
+ }
627
+ async searchWithScores(queryEmbedding, topK, threshold = 0) {
628
+ const rows = this.db.query(
629
+ "SELECT id, document_id, content, chunk_index, embedding, source, metadata FROM svara_chunks WHERE agent_name = ? ORDER BY id DESC",
630
+ [this.agentName]
631
+ );
632
+ const scored = rows.map((row) => {
633
+ const embedding = JSON.parse(row.embedding);
634
+ return {
635
+ chunk: {
636
+ id: row.id,
637
+ documentId: row.document_id,
638
+ content: row.content,
639
+ index: row.chunk_index,
640
+ source: row.source,
641
+ metadata: JSON.parse(row.metadata)
642
+ },
643
+ score: cosineSimilarity(queryEmbedding, embedding)
644
+ };
645
+ }).filter((s) => s.score >= threshold).sort((a, b) => b.score - a.score).slice(0, topK);
646
+ return scored;
647
+ }
648
+ async size() {
649
+ const result = this.db.query(
650
+ "SELECT COUNT(*) as count FROM svara_chunks WHERE agent_name = ?",
651
+ [this.agentName]
652
+ );
653
+ return result[0]?.count ?? 0;
332
654
  }
333
655
  };
334
656
  var VectorRetriever = class {
@@ -337,13 +659,17 @@ var VectorRetriever = class {
337
659
  loader;
338
660
  chunker;
339
661
  config;
340
- constructor() {
341
- this.store = new InMemoryVectorStore();
662
+ db;
663
+ agentName;
664
+ constructor(agentName, db) {
665
+ this.agentName = agentName;
342
666
  this.loader = new DocumentLoader();
343
667
  this.chunker = new Chunker();
668
+ this.db = db || new SvaraDB("./data/svara.db");
344
669
  }
345
670
  async init(config) {
346
671
  this.config = config;
672
+ this.store = new PersistentVectorStore(this.db, this.agentName);
347
673
  if (config.chunking) {
348
674
  this.chunker = new Chunker({
349
675
  strategy: config.chunking.strategy ?? "sentence",
@@ -372,15 +698,17 @@ var VectorRetriever = class {
372
698
  console.log(`[SvaraJS:RAG] Embedding ${chunks.length} chunk(s)...`);
373
699
  const embeddings = await this.embedder.embed(chunks.map((c) => c.content));
374
700
  for (let i = 0; i < chunks.length; i++) {
375
- this.store.add(chunks[i], embeddings[i]);
701
+ await this.store.add(chunks[i], embeddings[i]);
376
702
  }
377
- console.log(`[SvaraJS:RAG] Vector store now has ${this.store.size} chunk(s).`);
703
+ const size = await this.store.size();
704
+ console.log(`[SvaraJS:RAG] Vector store now has ${size} chunk(s).`);
378
705
  }
379
706
  async retrieve(query, topK = 5) {
380
- if (this.store.size === 0) return "";
707
+ const size = await this.store.size();
708
+ if (size === 0) return "";
381
709
  const queryEmbedding = await this.embedder.embedOne(query);
382
710
  const threshold = this.config.retrieval?.threshold ?? 0.3;
383
- const chunks = this.store.search(queryEmbedding, topK, threshold);
711
+ const chunks = await this.store.search(queryEmbedding, topK, threshold);
384
712
  if (!chunks.length) return "";
385
713
  return chunks.map((chunk, i) => `[Context ${i + 1}]
386
714
  Source: ${String(chunk.metadata.filename ?? chunk.documentId)}
@@ -389,11 +717,11 @@ ${chunk.content}`).join("\n\n---\n\n");
389
717
  async retrieveChunks(query, topK = 5) {
390
718
  const queryEmbedding = await this.embedder.embedOne(query);
391
719
  const threshold = this.config.retrieval?.threshold ?? 0.3;
392
- const chunks = this.store.search(queryEmbedding, topK, threshold);
720
+ const chunksWithScores = await this.store.searchWithScores(queryEmbedding, topK, threshold);
393
721
  return {
394
- chunks,
722
+ chunks: chunksWithScores,
395
723
  query,
396
- totalFound: chunks.length
724
+ totalFound: chunksWithScores.length
397
725
  };
398
726
  }
399
727
  };
@@ -412,6 +740,7 @@ function cosineSimilarity(a, b) {
412
740
  }
413
741
 
414
742
  export {
743
+ SvaraDB,
415
744
  DocumentLoader,
416
745
  Chunker,
417
746
  VectorRetriever
package/dist/index.d.mts CHANGED
@@ -141,6 +141,11 @@ interface AgentRunOptions {
141
141
  userId?: string;
142
142
  metadata?: Record<string, unknown>;
143
143
  }
144
+ interface RetrievedDocument {
145
+ source: string;
146
+ score: number;
147
+ excerpt: string;
148
+ }
144
149
  interface AgentRunResult {
145
150
  response: string;
146
151
  sessionId: string;
@@ -148,6 +153,7 @@ interface AgentRunResult {
148
153
  iterations: number;
149
154
  usage: TokenUsage;
150
155
  duration: number;
156
+ retrievedDocuments?: RetrievedDocument[];
151
157
  }
152
158
  type DocumentType = 'text' | 'markdown' | 'pdf' | 'html' | 'json' | 'docx';
153
159
  interface Document {
@@ -166,6 +172,7 @@ interface DocumentChunk {
166
172
  id: string;
167
173
  documentId: string;
168
174
  content: string;
175
+ source: string;
169
176
  index: number;
170
177
  metadata: {
171
178
  filename: string;
@@ -192,8 +199,12 @@ interface RAGConfig {
192
199
  threshold?: number;
193
200
  };
194
201
  }
202
+ interface ChunkWithScore {
203
+ chunk: DocumentChunk;
204
+ score: number;
205
+ }
195
206
  interface RetrievedContext {
196
- chunks: DocumentChunk[];
207
+ chunks: ChunkWithScore[];
197
208
  query: string;
198
209
  totalFound: number;
199
210
  }
@@ -432,8 +443,10 @@ declare class SvaraAgent extends EventEmitter {
432
443
  private readonly verbose;
433
444
  private channels;
434
445
  private knowledgeBase;
446
+ private retriever;
435
447
  private knowledgePaths;
436
448
  private isStarted;
449
+ private db;
437
450
  constructor(config: AgentConfig);
438
451
  /**
439
452
  * Send a message and get a reply. The simplest way to use an agent.
@@ -523,6 +536,7 @@ declare class SvaraAgent extends EventEmitter {
523
536
  * agent.addKnowledge('./new-policies.pdf');
524
537
  */
525
538
  addKnowledge(paths: string | string[]): Promise<void>;
539
+ private trackUserAndSession;
526
540
  /**
527
541
  * Receives a raw incoming message from a channel and processes it.
528
542
  * Called by channel handlers — not typically used directly.
@@ -910,7 +924,9 @@ declare class VectorRetriever implements RAGRetriever {
910
924
  private loader;
911
925
  private chunker;
912
926
  private config;
913
- constructor();
927
+ private db;
928
+ private agentName;
929
+ constructor(agentName: string, db?: SvaraDB);
914
930
  init(config: RAGConfig): Promise<void>;
915
931
  addDocuments(filePaths: string[]): Promise<void>;
916
932
  retrieve(query: string, topK?: number): Promise<string>;
package/dist/index.d.ts CHANGED
@@ -141,6 +141,11 @@ interface AgentRunOptions {
141
141
  userId?: string;
142
142
  metadata?: Record<string, unknown>;
143
143
  }
144
+ interface RetrievedDocument {
145
+ source: string;
146
+ score: number;
147
+ excerpt: string;
148
+ }
144
149
  interface AgentRunResult {
145
150
  response: string;
146
151
  sessionId: string;
@@ -148,6 +153,7 @@ interface AgentRunResult {
148
153
  iterations: number;
149
154
  usage: TokenUsage;
150
155
  duration: number;
156
+ retrievedDocuments?: RetrievedDocument[];
151
157
  }
152
158
  type DocumentType = 'text' | 'markdown' | 'pdf' | 'html' | 'json' | 'docx';
153
159
  interface Document {
@@ -166,6 +172,7 @@ interface DocumentChunk {
166
172
  id: string;
167
173
  documentId: string;
168
174
  content: string;
175
+ source: string;
169
176
  index: number;
170
177
  metadata: {
171
178
  filename: string;
@@ -192,8 +199,12 @@ interface RAGConfig {
192
199
  threshold?: number;
193
200
  };
194
201
  }
202
+ interface ChunkWithScore {
203
+ chunk: DocumentChunk;
204
+ score: number;
205
+ }
195
206
  interface RetrievedContext {
196
- chunks: DocumentChunk[];
207
+ chunks: ChunkWithScore[];
197
208
  query: string;
198
209
  totalFound: number;
199
210
  }
@@ -432,8 +443,10 @@ declare class SvaraAgent extends EventEmitter {
432
443
  private readonly verbose;
433
444
  private channels;
434
445
  private knowledgeBase;
446
+ private retriever;
435
447
  private knowledgePaths;
436
448
  private isStarted;
449
+ private db;
437
450
  constructor(config: AgentConfig);
438
451
  /**
439
452
  * Send a message and get a reply. The simplest way to use an agent.
@@ -523,6 +536,7 @@ declare class SvaraAgent extends EventEmitter {
523
536
  * agent.addKnowledge('./new-policies.pdf');
524
537
  */
525
538
  addKnowledge(paths: string | string[]): Promise<void>;
539
+ private trackUserAndSession;
526
540
  /**
527
541
  * Receives a raw incoming message from a channel and processes it.
528
542
  * Called by channel handlers — not typically used directly.
@@ -910,7 +924,9 @@ declare class VectorRetriever implements RAGRetriever {
910
924
  private loader;
911
925
  private chunker;
912
926
  private config;
913
- constructor();
927
+ private db;
928
+ private agentName;
929
+ constructor(agentName: string, db?: SvaraDB);
914
930
  init(config: RAGConfig): Promise<void>;
915
931
  addDocuments(filePaths: string[]): Promise<void>;
916
932
  retrieve(query: string, topK?: number): Promise<string>;