@yesvara/svara 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +233 -0
- package/README.md +211 -23
- package/SvaraJS.png +0 -0
- package/dist/{chunk-FEA5KIJN.mjs → chunk-CCNWHBEI.mjs} +108 -27
- package/dist/chunk-GA7LHPOF.mjs +257 -0
- package/dist/cli/index.js +490 -4
- package/dist/cli/index.mjs +35 -3
- package/dist/db-PEMUBXAR.mjs +190 -0
- package/dist/index.d.mts +18 -2
- package/dist/index.d.ts +18 -2
- package/dist/index.js +454 -269
- package/dist/index.mjs +83 -239
- package/dist/{retriever-4QY667XF.mjs → retriever-JYOGHA4F.mjs} +2 -1
- package/package.json +27 -15
- package/src/cli/commands/db.ts +267 -0
- package/src/cli/index.ts +74 -4
- package/src/core/agent.ts +89 -8
- package/src/core/types.ts +14 -1
- package/src/database/schema.ts +31 -6
- package/src/rag/chunker.ts +1 -0
- package/src/rag/retriever.ts +146 -33
- package/svara@1.0.0 +0 -0
- package/test-rag.ts +20 -0
- package/tsx +0 -0
package/src/rag/retriever.ts
CHANGED
|
@@ -18,6 +18,8 @@ import type { RAGConfig, DocumentChunk, RetrievedContext } from '../core/types.j
|
|
|
18
18
|
import type { RAGRetriever } from '../core/agent.js';
|
|
19
19
|
import { DocumentLoader } from './loader.js';
|
|
20
20
|
import { Chunker } from './chunker.js';
|
|
21
|
+
import { SvaraDB } from '../database/sqlite.js';
|
|
22
|
+
import crypto from 'crypto';
|
|
21
23
|
|
|
22
24
|
// ─── Embedding Interface ──────────────────────────────────────────────────────
|
|
23
25
|
|
|
@@ -103,41 +105,142 @@ class OllamaEmbeddings implements EmbeddingProvider {
|
|
|
103
105
|
}
|
|
104
106
|
}
|
|
105
107
|
|
|
106
|
-
// ───
|
|
108
|
+
// ─── Vector Store (Persistent with SQLite) ────────────────────────────────────
|
|
107
109
|
|
|
108
|
-
|
|
109
|
-
chunk: DocumentChunk
|
|
110
|
-
|
|
110
|
+
abstract class VectorStore {
|
|
111
|
+
abstract add(chunk: DocumentChunk, embedding: number[]): Promise<void>;
|
|
112
|
+
abstract search(queryEmbedding: number[], topK: number, threshold?: number): Promise<DocumentChunk[]>;
|
|
113
|
+
abstract searchWithScores(queryEmbedding: number[], topK: number, threshold?: number): Promise<Array<{ chunk: DocumentChunk; score: number }>>;
|
|
114
|
+
abstract size(): Promise<number>;
|
|
115
|
+
protected contentHash(content: string): string {
|
|
116
|
+
return crypto.createHash('md5').update(content).digest('hex');
|
|
117
|
+
}
|
|
111
118
|
}
|
|
112
119
|
|
|
113
|
-
class
|
|
114
|
-
private
|
|
120
|
+
class PersistentVectorStore extends VectorStore {
|
|
121
|
+
constructor(private db: SvaraDB, private agentName: string) {
|
|
122
|
+
super();
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
async add(chunk: DocumentChunk, embedding: number[]): Promise<void> {
|
|
126
|
+
const contentHash = this.contentHash(chunk.content);
|
|
115
127
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
128
|
+
// Check if content already exists for this agent (deduplication per agent)
|
|
129
|
+
const existing = this.db.query(
|
|
130
|
+
'SELECT id FROM svara_chunks WHERE agent_name = ? AND content_hash = ?',
|
|
131
|
+
[this.agentName, contentHash]
|
|
132
|
+
) as Array<{ id: string }>;
|
|
133
|
+
|
|
134
|
+
if (existing.length > 0) {
|
|
135
|
+
console.log(`[SvaraJS:RAG] Duplicate content detected for ${this.agentName}, skipping chunk ${chunk.id}`);
|
|
136
|
+
return;
|
|
123
137
|
}
|
|
138
|
+
|
|
139
|
+
// Store embedding as JSON string
|
|
140
|
+
const embeddingJson = JSON.stringify(embedding);
|
|
141
|
+
|
|
142
|
+
this.db.run(
|
|
143
|
+
`INSERT OR REPLACE INTO svara_chunks
|
|
144
|
+
(id, agent_name, document_id, content, content_hash, chunk_index, embedding, source, metadata)
|
|
145
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
146
|
+
[
|
|
147
|
+
chunk.id,
|
|
148
|
+
this.agentName,
|
|
149
|
+
chunk.documentId,
|
|
150
|
+
chunk.content,
|
|
151
|
+
contentHash,
|
|
152
|
+
chunk.index,
|
|
153
|
+
embeddingJson,
|
|
154
|
+
chunk.source,
|
|
155
|
+
JSON.stringify(chunk.metadata),
|
|
156
|
+
]
|
|
157
|
+
);
|
|
124
158
|
}
|
|
125
159
|
|
|
126
|
-
search(queryEmbedding: number[], topK: number, threshold = 0): DocumentChunk[] {
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
160
|
+
async search(queryEmbedding: number[], topK: number, threshold = 0): Promise<DocumentChunk[]> {
|
|
161
|
+
// Retrieve chunks for this agent only
|
|
162
|
+
const rows = this.db.query(
|
|
163
|
+
'SELECT id, document_id, content, chunk_index, embedding, source, metadata FROM svara_chunks WHERE agent_name = ? ORDER BY id DESC',
|
|
164
|
+
[this.agentName]
|
|
165
|
+
) as Array<{
|
|
166
|
+
id: string;
|
|
167
|
+
document_id: string;
|
|
168
|
+
content: string;
|
|
169
|
+
chunk_index: number;
|
|
170
|
+
embedding: string;
|
|
171
|
+
source: string;
|
|
172
|
+
metadata: string;
|
|
173
|
+
}>;
|
|
174
|
+
|
|
175
|
+
// Score and sort in-memory (SQLite doesn't have vector similarity)
|
|
176
|
+
const scored = rows
|
|
177
|
+
.map((row) => {
|
|
178
|
+
const embedding = JSON.parse(row.embedding) as number[];
|
|
179
|
+
return {
|
|
180
|
+
chunk: {
|
|
181
|
+
id: row.id,
|
|
182
|
+
documentId: row.document_id,
|
|
183
|
+
content: row.content,
|
|
184
|
+
index: row.chunk_index,
|
|
185
|
+
source: row.source,
|
|
186
|
+
metadata: JSON.parse(row.metadata),
|
|
187
|
+
} as DocumentChunk,
|
|
188
|
+
score: cosineSimilarity(queryEmbedding, embedding),
|
|
189
|
+
};
|
|
190
|
+
})
|
|
191
|
+
.filter((s) => s.score >= threshold)
|
|
192
|
+
.sort((a, b) => b.score - a.score)
|
|
193
|
+
.slice(0, topK);
|
|
194
|
+
|
|
195
|
+
return scored.map((s) => s.chunk);
|
|
196
|
+
}
|
|
131
197
|
|
|
132
|
-
|
|
198
|
+
async searchWithScores(queryEmbedding: number[], topK: number, threshold = 0): Promise<Array<{ chunk: DocumentChunk; score: number }>> {
|
|
199
|
+
// Retrieve chunks for this agent only
|
|
200
|
+
const rows = this.db.query(
|
|
201
|
+
'SELECT id, document_id, content, chunk_index, embedding, source, metadata FROM svara_chunks WHERE agent_name = ? ORDER BY id DESC',
|
|
202
|
+
[this.agentName]
|
|
203
|
+
) as Array<{
|
|
204
|
+
id: string;
|
|
205
|
+
document_id: string;
|
|
206
|
+
content: string;
|
|
207
|
+
chunk_index: number;
|
|
208
|
+
embedding: string;
|
|
209
|
+
source: string;
|
|
210
|
+
metadata: string;
|
|
211
|
+
}>;
|
|
212
|
+
|
|
213
|
+
// Score and sort in-memory (SQLite doesn't have vector similarity)
|
|
214
|
+
const scored = rows
|
|
215
|
+
.map((row) => {
|
|
216
|
+
const embedding = JSON.parse(row.embedding) as number[];
|
|
217
|
+
return {
|
|
218
|
+
chunk: {
|
|
219
|
+
id: row.id,
|
|
220
|
+
documentId: row.document_id,
|
|
221
|
+
content: row.content,
|
|
222
|
+
index: row.chunk_index,
|
|
223
|
+
source: row.source,
|
|
224
|
+
metadata: JSON.parse(row.metadata),
|
|
225
|
+
} as DocumentChunk,
|
|
226
|
+
score: cosineSimilarity(queryEmbedding, embedding),
|
|
227
|
+
};
|
|
228
|
+
})
|
|
133
229
|
.filter((s) => s.score >= threshold)
|
|
134
230
|
.sort((a, b) => b.score - a.score)
|
|
135
|
-
.slice(0, topK)
|
|
136
|
-
|
|
231
|
+
.slice(0, topK);
|
|
232
|
+
|
|
233
|
+
return scored;
|
|
137
234
|
}
|
|
138
235
|
|
|
139
|
-
|
|
140
|
-
|
|
236
|
+
async size(): Promise<number> {
|
|
237
|
+
const result = this.db.query(
|
|
238
|
+
'SELECT COUNT(*) as count FROM svara_chunks WHERE agent_name = ?',
|
|
239
|
+
[this.agentName]
|
|
240
|
+
) as Array<{
|
|
241
|
+
count: number;
|
|
242
|
+
}>;
|
|
243
|
+
return result[0]?.count ?? 0;
|
|
141
244
|
}
|
|
142
245
|
}
|
|
143
246
|
|
|
@@ -145,20 +248,26 @@ class InMemoryVectorStore {
|
|
|
145
248
|
|
|
146
249
|
export class VectorRetriever implements RAGRetriever {
|
|
147
250
|
private embedder!: EmbeddingProvider;
|
|
148
|
-
private store
|
|
251
|
+
private store!: VectorStore;
|
|
149
252
|
private loader: DocumentLoader;
|
|
150
253
|
private chunker: Chunker;
|
|
151
254
|
private config!: RAGConfig;
|
|
255
|
+
private db: SvaraDB;
|
|
256
|
+
private agentName: string;
|
|
152
257
|
|
|
153
|
-
constructor() {
|
|
154
|
-
this.
|
|
258
|
+
constructor(agentName: string, db?: SvaraDB) {
|
|
259
|
+
this.agentName = agentName;
|
|
155
260
|
this.loader = new DocumentLoader();
|
|
156
261
|
this.chunker = new Chunker();
|
|
262
|
+
this.db = db || new SvaraDB('./data/svara.db');
|
|
157
263
|
}
|
|
158
264
|
|
|
159
265
|
async init(config: RAGConfig): Promise<void> {
|
|
160
266
|
this.config = config;
|
|
161
267
|
|
|
268
|
+
// Init vector store (persistent per agent)
|
|
269
|
+
this.store = new PersistentVectorStore(this.db, this.agentName);
|
|
270
|
+
|
|
162
271
|
// Init chunker with config
|
|
163
272
|
if (config.chunking) {
|
|
164
273
|
this.chunker = new Chunker({
|
|
@@ -193,19 +302,21 @@ export class VectorRetriever implements RAGRetriever {
|
|
|
193
302
|
const embeddings = await this.embedder.embed(chunks.map((c) => c.content));
|
|
194
303
|
|
|
195
304
|
for (let i = 0; i < chunks.length; i++) {
|
|
196
|
-
this.store.add(chunks[i], embeddings[i]);
|
|
305
|
+
await this.store.add(chunks[i], embeddings[i]);
|
|
197
306
|
}
|
|
198
307
|
|
|
199
|
-
|
|
308
|
+
const size = await this.store.size();
|
|
309
|
+
console.log(`[SvaraJS:RAG] Vector store now has ${size} chunk(s).`);
|
|
200
310
|
}
|
|
201
311
|
|
|
202
312
|
async retrieve(query: string, topK = 5): Promise<string> {
|
|
203
|
-
|
|
313
|
+
const size = await this.store.size();
|
|
314
|
+
if (size === 0) return '';
|
|
204
315
|
|
|
205
316
|
const queryEmbedding = await this.embedder.embedOne(query);
|
|
206
317
|
const threshold = this.config.retrieval?.threshold ?? 0.3;
|
|
207
318
|
|
|
208
|
-
const chunks = this.store.search(queryEmbedding, topK, threshold);
|
|
319
|
+
const chunks = await this.store.search(queryEmbedding, topK, threshold);
|
|
209
320
|
|
|
210
321
|
if (!chunks.length) return '';
|
|
211
322
|
|
|
@@ -218,12 +329,14 @@ export class VectorRetriever implements RAGRetriever {
|
|
|
218
329
|
async retrieveChunks(query: string, topK = 5): Promise<RetrievedContext> {
|
|
219
330
|
const queryEmbedding = await this.embedder.embedOne(query);
|
|
220
331
|
const threshold = this.config.retrieval?.threshold ?? 0.3;
|
|
221
|
-
|
|
332
|
+
|
|
333
|
+
// Get chunks with scores from the store
|
|
334
|
+
const chunksWithScores = await this.store.searchWithScores(queryEmbedding, topK, threshold);
|
|
222
335
|
|
|
223
336
|
return {
|
|
224
|
-
chunks,
|
|
337
|
+
chunks: chunksWithScores,
|
|
225
338
|
query,
|
|
226
|
-
totalFound:
|
|
339
|
+
totalFound: chunksWithScores.length,
|
|
227
340
|
};
|
|
228
341
|
}
|
|
229
342
|
}
|
package/svara@1.0.0
ADDED
|
File without changes
|
package/test-rag.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import 'dotenv/config';
|
|
2
|
+
import { SvaraApp, SvaraAgent } from './src/index.js';
|
|
3
|
+
|
|
4
|
+
async function main() {
|
|
5
|
+
const app = new SvaraApp({ cors: true });
|
|
6
|
+
|
|
7
|
+
const agent = new SvaraAgent({
|
|
8
|
+
name: 'TestAgent',
|
|
9
|
+
model: 'gpt-4o-mini',
|
|
10
|
+
knowledge: '/Users/920078/Documents/svara/contoh_folder_knowledge/**/*',
|
|
11
|
+
verbose: true,
|
|
12
|
+
});
|
|
13
|
+
|
|
14
|
+
await agent.start();
|
|
15
|
+
app.route('/chat', agent.handler());
|
|
16
|
+
app.listen(3000);
|
|
17
|
+
console.log('✓ Test server on port 3000');
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
main().catch(console.error);
|
package/tsx
ADDED
|
File without changes
|