@yesvara/svara 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,8 @@ import type { RAGConfig, DocumentChunk, RetrievedContext } from '../core/types.j
18
18
  import type { RAGRetriever } from '../core/agent.js';
19
19
  import { DocumentLoader } from './loader.js';
20
20
  import { Chunker } from './chunker.js';
21
+ import { SvaraDB } from '../database/sqlite.js';
22
+ import crypto from 'crypto';
21
23
 
22
24
  // ─── Embedding Interface ──────────────────────────────────────────────────────
23
25
 
@@ -103,41 +105,142 @@ class OllamaEmbeddings implements EmbeddingProvider {
103
105
  }
104
106
  }
105
107
 
106
- // ─── In-Memory Vector Store ───────────────────────────────────────────────────
108
+ // ─── Vector Store (Persistent with SQLite) ────────────────────────────────────
107
109
 
108
- interface VectorEntry {
109
- chunk: DocumentChunk;
110
- embedding: number[];
110
+ abstract class VectorStore {
111
+ abstract add(chunk: DocumentChunk, embedding: number[]): Promise<void>;
112
+ abstract search(queryEmbedding: number[], topK: number, threshold?: number): Promise<DocumentChunk[]>;
113
+ abstract searchWithScores(queryEmbedding: number[], topK: number, threshold?: number): Promise<Array<{ chunk: DocumentChunk; score: number }>>;
114
+ abstract size(): Promise<number>;
115
+ protected contentHash(content: string): string {
116
+ return crypto.createHash('md5').update(content).digest('hex');
117
+ }
111
118
  }
112
119
 
113
- class InMemoryVectorStore {
114
- private entries: VectorEntry[] = [];
120
+ class PersistentVectorStore extends VectorStore {
121
+ constructor(private db: SvaraDB, private agentName: string) {
122
+ super();
123
+ }
124
+
125
+ async add(chunk: DocumentChunk, embedding: number[]): Promise<void> {
126
+ const contentHash = this.contentHash(chunk.content);
115
127
 
116
- add(chunk: DocumentChunk, embedding: number[]): void {
117
- // Replace if same chunk id
118
- const existing = this.entries.findIndex((e) => e.chunk.id === chunk.id);
119
- if (existing >= 0) {
120
- this.entries[existing] = { chunk, embedding };
121
- } else {
122
- this.entries.push({ chunk, embedding });
128
+ // Check if content already exists for this agent (deduplication per agent)
129
+ const existing = this.db.query(
130
+ 'SELECT id FROM svara_chunks WHERE agent_name = ? AND content_hash = ?',
131
+ [this.agentName, contentHash]
132
+ ) as Array<{ id: string }>;
133
+
134
+ if (existing.length > 0) {
135
+ console.log(`[SvaraJS:RAG] Duplicate content detected for ${this.agentName}, skipping chunk ${chunk.id}`);
136
+ return;
123
137
  }
138
+
139
+ // Store embedding as JSON string
140
+ const embeddingJson = JSON.stringify(embedding);
141
+
142
+ this.db.run(
143
+ `INSERT OR REPLACE INTO svara_chunks
144
+ (id, agent_name, document_id, content, content_hash, chunk_index, embedding, source, metadata)
145
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
146
+ [
147
+ chunk.id,
148
+ this.agentName,
149
+ chunk.documentId,
150
+ chunk.content,
151
+ contentHash,
152
+ chunk.index,
153
+ embeddingJson,
154
+ chunk.source,
155
+ JSON.stringify(chunk.metadata),
156
+ ]
157
+ );
124
158
  }
125
159
 
126
- search(queryEmbedding: number[], topK: number, threshold = 0): DocumentChunk[] {
127
- const scored = this.entries.map((entry) => ({
128
- chunk: entry.chunk,
129
- score: cosineSimilarity(queryEmbedding, entry.embedding),
130
- }));
160
+ async search(queryEmbedding: number[], topK: number, threshold = 0): Promise<DocumentChunk[]> {
161
+ // Retrieve chunks for this agent only
162
+ const rows = this.db.query(
163
+ 'SELECT id, document_id, content, chunk_index, embedding, source, metadata FROM svara_chunks WHERE agent_name = ? ORDER BY id DESC',
164
+ [this.agentName]
165
+ ) as Array<{
166
+ id: string;
167
+ document_id: string;
168
+ content: string;
169
+ chunk_index: number;
170
+ embedding: string;
171
+ source: string;
172
+ metadata: string;
173
+ }>;
174
+
175
+ // Score and sort in-memory (SQLite doesn't have vector similarity)
176
+ const scored = rows
177
+ .map((row) => {
178
+ const embedding = JSON.parse(row.embedding) as number[];
179
+ return {
180
+ chunk: {
181
+ id: row.id,
182
+ documentId: row.document_id,
183
+ content: row.content,
184
+ index: row.chunk_index,
185
+ source: row.source,
186
+ metadata: JSON.parse(row.metadata),
187
+ } as DocumentChunk,
188
+ score: cosineSimilarity(queryEmbedding, embedding),
189
+ };
190
+ })
191
+ .filter((s) => s.score >= threshold)
192
+ .sort((a, b) => b.score - a.score)
193
+ .slice(0, topK);
194
+
195
+ return scored.map((s) => s.chunk);
196
+ }
131
197
 
132
- return scored
198
+ async searchWithScores(queryEmbedding: number[], topK: number, threshold = 0): Promise<Array<{ chunk: DocumentChunk; score: number }>> {
199
+ // Retrieve chunks for this agent only
200
+ const rows = this.db.query(
201
+ 'SELECT id, document_id, content, chunk_index, embedding, source, metadata FROM svara_chunks WHERE agent_name = ? ORDER BY id DESC',
202
+ [this.agentName]
203
+ ) as Array<{
204
+ id: string;
205
+ document_id: string;
206
+ content: string;
207
+ chunk_index: number;
208
+ embedding: string;
209
+ source: string;
210
+ metadata: string;
211
+ }>;
212
+
213
+ // Score and sort in-memory (SQLite doesn't have vector similarity)
214
+ const scored = rows
215
+ .map((row) => {
216
+ const embedding = JSON.parse(row.embedding) as number[];
217
+ return {
218
+ chunk: {
219
+ id: row.id,
220
+ documentId: row.document_id,
221
+ content: row.content,
222
+ index: row.chunk_index,
223
+ source: row.source,
224
+ metadata: JSON.parse(row.metadata),
225
+ } as DocumentChunk,
226
+ score: cosineSimilarity(queryEmbedding, embedding),
227
+ };
228
+ })
133
229
  .filter((s) => s.score >= threshold)
134
230
  .sort((a, b) => b.score - a.score)
135
- .slice(0, topK)
136
- .map((s) => s.chunk);
231
+ .slice(0, topK);
232
+
233
+ return scored;
137
234
  }
138
235
 
139
- get size(): number {
140
- return this.entries.length;
236
+ async size(): Promise<number> {
237
+ const result = this.db.query(
238
+ 'SELECT COUNT(*) as count FROM svara_chunks WHERE agent_name = ?',
239
+ [this.agentName]
240
+ ) as Array<{
241
+ count: number;
242
+ }>;
243
+ return result[0]?.count ?? 0;
141
244
  }
142
245
  }
143
246
 
@@ -145,20 +248,26 @@ class InMemoryVectorStore {
145
248
 
146
249
  export class VectorRetriever implements RAGRetriever {
147
250
  private embedder!: EmbeddingProvider;
148
- private store: InMemoryVectorStore;
251
+ private store!: VectorStore;
149
252
  private loader: DocumentLoader;
150
253
  private chunker: Chunker;
151
254
  private config!: RAGConfig;
255
+ private db: SvaraDB;
256
+ private agentName: string;
152
257
 
153
- constructor() {
154
- this.store = new InMemoryVectorStore();
258
+ constructor(agentName: string, db?: SvaraDB) {
259
+ this.agentName = agentName;
155
260
  this.loader = new DocumentLoader();
156
261
  this.chunker = new Chunker();
262
+ this.db = db || new SvaraDB('./data/svara.db');
157
263
  }
158
264
 
159
265
  async init(config: RAGConfig): Promise<void> {
160
266
  this.config = config;
161
267
 
268
+ // Init vector store (persistent per agent)
269
+ this.store = new PersistentVectorStore(this.db, this.agentName);
270
+
162
271
  // Init chunker with config
163
272
  if (config.chunking) {
164
273
  this.chunker = new Chunker({
@@ -193,19 +302,21 @@ export class VectorRetriever implements RAGRetriever {
193
302
  const embeddings = await this.embedder.embed(chunks.map((c) => c.content));
194
303
 
195
304
  for (let i = 0; i < chunks.length; i++) {
196
- this.store.add(chunks[i], embeddings[i]);
305
+ await this.store.add(chunks[i], embeddings[i]);
197
306
  }
198
307
 
199
- console.log(`[SvaraJS:RAG] Vector store now has ${this.store.size} chunk(s).`);
308
+ const size = await this.store.size();
309
+ console.log(`[SvaraJS:RAG] Vector store now has ${size} chunk(s).`);
200
310
  }
201
311
 
202
312
  async retrieve(query: string, topK = 5): Promise<string> {
203
- if (this.store.size === 0) return '';
313
+ const size = await this.store.size();
314
+ if (size === 0) return '';
204
315
 
205
316
  const queryEmbedding = await this.embedder.embedOne(query);
206
317
  const threshold = this.config.retrieval?.threshold ?? 0.3;
207
318
 
208
- const chunks = this.store.search(queryEmbedding, topK, threshold);
319
+ const chunks = await this.store.search(queryEmbedding, topK, threshold);
209
320
 
210
321
  if (!chunks.length) return '';
211
322
 
@@ -218,12 +329,14 @@ export class VectorRetriever implements RAGRetriever {
218
329
  async retrieveChunks(query: string, topK = 5): Promise<RetrievedContext> {
219
330
  const queryEmbedding = await this.embedder.embedOne(query);
220
331
  const threshold = this.config.retrieval?.threshold ?? 0.3;
221
- const chunks = this.store.search(queryEmbedding, topK, threshold);
332
+
333
+ // Get chunks with scores from the store
334
+ const chunksWithScores = await this.store.searchWithScores(queryEmbedding, topK, threshold);
222
335
 
223
336
  return {
224
- chunks,
337
+ chunks: chunksWithScores,
225
338
  query,
226
- totalFound: chunks.length,
339
+ totalFound: chunksWithScores.length,
227
340
  };
228
341
  }
229
342
  }
package/svara@1.0.0 ADDED
File without changes
package/test-rag.ts ADDED
@@ -0,0 +1,20 @@
1
+ import 'dotenv/config';
2
+ import { SvaraApp, SvaraAgent } from './src/index.js';
3
+
4
+ async function main() {
5
+ const app = new SvaraApp({ cors: true });
6
+
7
+ const agent = new SvaraAgent({
8
+ name: 'TestAgent',
9
+ model: 'gpt-4o-mini',
10
+ knowledge: '/Users/920078/Documents/svara/contoh_folder_knowledge/**/*',
11
+ verbose: true,
12
+ });
13
+
14
+ await agent.start();
15
+ app.route('/chat', agent.handler());
16
+ app.listen(3000);
17
+ console.log('✓ Test server on port 3000');
18
+ }
19
+
20
+ main().catch(console.error);
package/tsx ADDED
File without changes