@yesvara/svara 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +497 -0
- package/dist/chunk-CIESM3BP.mjs +33 -0
- package/dist/chunk-FEA5KIJN.mjs +418 -0
- package/dist/cli/index.d.mts +1 -0
- package/dist/cli/index.d.ts +1 -0
- package/dist/cli/index.js +328 -0
- package/dist/cli/index.mjs +39 -0
- package/dist/dev-OYGXXK2B.mjs +69 -0
- package/dist/index.d.mts +967 -0
- package/dist/index.d.ts +967 -0
- package/dist/index.js +1976 -0
- package/dist/index.mjs +1502 -0
- package/dist/new-7K4NIDZO.mjs +177 -0
- package/dist/retriever-4QY667XF.mjs +7 -0
- package/examples/01-basic/index.ts +26 -0
- package/examples/02-with-tools/index.ts +73 -0
- package/examples/03-rag-knowledge/index.ts +41 -0
- package/examples/04-multi-channel/index.ts +91 -0
- package/package.json +74 -0
- package/src/app/index.ts +176 -0
- package/src/channels/telegram.ts +122 -0
- package/src/channels/web.ts +118 -0
- package/src/channels/whatsapp.ts +161 -0
- package/src/cli/commands/dev.ts +87 -0
- package/src/cli/commands/new.ts +213 -0
- package/src/cli/index.ts +78 -0
- package/src/core/agent.ts +607 -0
- package/src/core/llm.ts +406 -0
- package/src/core/types.ts +183 -0
- package/src/database/schema.ts +79 -0
- package/src/database/sqlite.ts +239 -0
- package/src/index.ts +94 -0
- package/src/memory/context.ts +49 -0
- package/src/memory/conversation.ts +51 -0
- package/src/rag/chunker.ts +165 -0
- package/src/rag/loader.ts +216 -0
- package/src/rag/retriever.ts +248 -0
- package/src/tools/executor.ts +54 -0
- package/src/tools/index.ts +89 -0
- package/src/tools/registry.ts +44 -0
- package/src/types.ts +131 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__require
|
|
3
|
+
} from "./chunk-CIESM3BP.mjs";
|
|
4
|
+
|
|
5
|
+
// src/rag/loader.ts
|
|
6
|
+
import fs from "fs/promises";
|
|
7
|
+
import path from "path";
|
|
8
|
+
import crypto from "crypto";
|
|
9
|
+
var TextFileLoader = class {
|
|
10
|
+
extensions = [".txt", ".md", ".mdx", ".rst", ".csv", ".log"];
|
|
11
|
+
async load(filePath) {
|
|
12
|
+
return fs.readFile(filePath, "utf-8");
|
|
13
|
+
}
|
|
14
|
+
};
|
|
15
|
+
var JsonFileLoader = class {
|
|
16
|
+
extensions = [".json", ".jsonl"];
|
|
17
|
+
async load(filePath) {
|
|
18
|
+
const raw = await fs.readFile(filePath, "utf-8");
|
|
19
|
+
if (path.extname(filePath) === ".jsonl") {
|
|
20
|
+
return raw.split("\n").filter(Boolean).map((line) => {
|
|
21
|
+
const obj = JSON.parse(line);
|
|
22
|
+
return Object.values(obj).join(" ");
|
|
23
|
+
}).join("\n");
|
|
24
|
+
}
|
|
25
|
+
const data = JSON.parse(raw);
|
|
26
|
+
return JSON.stringify(data, null, 2);
|
|
27
|
+
}
|
|
28
|
+
};
|
|
29
|
+
var HtmlFileLoader = class {
|
|
30
|
+
extensions = [".html", ".htm"];
|
|
31
|
+
async load(filePath) {
|
|
32
|
+
const raw = await fs.readFile(filePath, "utf-8");
|
|
33
|
+
return raw.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
34
|
+
}
|
|
35
|
+
};
|
|
36
|
+
var PdfFileLoader = class {
|
|
37
|
+
extensions = [".pdf"];
|
|
38
|
+
async load(filePath) {
|
|
39
|
+
try {
|
|
40
|
+
const pdfParse = __require("pdf-parse");
|
|
41
|
+
const buffer = await fs.readFile(filePath);
|
|
42
|
+
const data = await pdfParse(buffer);
|
|
43
|
+
return data.text;
|
|
44
|
+
} catch {
|
|
45
|
+
throw new Error(
|
|
46
|
+
'[SvaraJS] PDF loading requires the "pdf-parse" package.\nRun: npm install pdf-parse'
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
};
|
|
51
|
+
var DocxFileLoader = class {
|
|
52
|
+
extensions = [".docx"];
|
|
53
|
+
async load(filePath) {
|
|
54
|
+
try {
|
|
55
|
+
const mammoth = __require("mammoth");
|
|
56
|
+
const result = await mammoth.extractRawText({ path: filePath });
|
|
57
|
+
return result.value;
|
|
58
|
+
} catch {
|
|
59
|
+
throw new Error(
|
|
60
|
+
'[SvaraJS] DOCX loading requires the "mammoth" package.\nRun: npm install mammoth'
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
var DocumentLoader = class {
|
|
66
|
+
loaders;
|
|
67
|
+
extensionMap;
|
|
68
|
+
constructor() {
|
|
69
|
+
this.loaders = [
|
|
70
|
+
new TextFileLoader(),
|
|
71
|
+
new JsonFileLoader(),
|
|
72
|
+
new HtmlFileLoader(),
|
|
73
|
+
new PdfFileLoader(),
|
|
74
|
+
new DocxFileLoader()
|
|
75
|
+
];
|
|
76
|
+
this.extensionMap = /* @__PURE__ */ new Map();
|
|
77
|
+
for (const loader of this.loaders) {
|
|
78
|
+
for (const ext of loader.extensions) {
|
|
79
|
+
this.extensionMap.set(ext, loader);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Load a single file into a Document.
|
|
85
|
+
*/
|
|
86
|
+
async load(filePath) {
|
|
87
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
88
|
+
const loader = this.extensionMap.get(ext);
|
|
89
|
+
if (!loader) {
|
|
90
|
+
throw new Error(
|
|
91
|
+
`[SvaraJS] Unsupported file type: "${ext}". Supported: ${[...this.extensionMap.keys()].join(", ")}`
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
const content = await loader.load(filePath);
|
|
95
|
+
const stats = await fs.stat(filePath);
|
|
96
|
+
return {
|
|
97
|
+
id: this.hashFile(filePath),
|
|
98
|
+
content,
|
|
99
|
+
type: this.detectType(ext),
|
|
100
|
+
source: filePath,
|
|
101
|
+
metadata: {
|
|
102
|
+
filename: path.basename(filePath),
|
|
103
|
+
extension: ext,
|
|
104
|
+
size: stats.size,
|
|
105
|
+
lastModified: stats.mtime.toISOString()
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Load multiple files. Silently skips unreadable files with a warning.
|
|
111
|
+
*/
|
|
112
|
+
async loadMany(filePaths) {
|
|
113
|
+
const results = [];
|
|
114
|
+
for (const filePath of filePaths) {
|
|
115
|
+
try {
|
|
116
|
+
const doc = await this.load(filePath);
|
|
117
|
+
results.push(doc);
|
|
118
|
+
} catch (err) {
|
|
119
|
+
console.warn(`[SvaraJS:RAG] Skipping "${filePath}": ${err.message}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return results;
|
|
123
|
+
}
|
|
124
|
+
/** Check if this loader supports a given file extension. */
|
|
125
|
+
supports(filePath) {
|
|
126
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
127
|
+
return this.extensionMap.has(ext);
|
|
128
|
+
}
|
|
129
|
+
detectType(ext) {
|
|
130
|
+
const map = {
|
|
131
|
+
".txt": "text",
|
|
132
|
+
".md": "markdown",
|
|
133
|
+
".mdx": "markdown",
|
|
134
|
+
".pdf": "pdf",
|
|
135
|
+
".html": "html",
|
|
136
|
+
".htm": "html",
|
|
137
|
+
".json": "json",
|
|
138
|
+
".jsonl": "json",
|
|
139
|
+
".docx": "docx"
|
|
140
|
+
};
|
|
141
|
+
return map[ext] ?? "text";
|
|
142
|
+
}
|
|
143
|
+
hashFile(filePath) {
|
|
144
|
+
return crypto.createHash("md5").update(filePath).digest("hex");
|
|
145
|
+
}
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// src/rag/chunker.ts
|
|
149
|
+
import crypto2 from "crypto";
|
|
150
|
+
var Chunker = class {
|
|
151
|
+
options;
|
|
152
|
+
constructor(options = {}) {
|
|
153
|
+
this.options = {
|
|
154
|
+
strategy: options.strategy ?? "sentence",
|
|
155
|
+
size: options.size ?? 2e3,
|
|
156
|
+
overlap: options.overlap ?? 200
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Split a document into overlapping chunks.
|
|
161
|
+
* Returns the document with populated `chunks` field.
|
|
162
|
+
*/
|
|
163
|
+
chunk(document) {
|
|
164
|
+
const text = document.content.trim();
|
|
165
|
+
if (!text) return [];
|
|
166
|
+
let texts;
|
|
167
|
+
switch (this.options.strategy) {
|
|
168
|
+
case "fixed":
|
|
169
|
+
texts = this.fixedChunk(text);
|
|
170
|
+
break;
|
|
171
|
+
case "paragraph":
|
|
172
|
+
texts = this.paragraphChunk(text);
|
|
173
|
+
break;
|
|
174
|
+
case "sentence":
|
|
175
|
+
default:
|
|
176
|
+
texts = this.sentenceChunk(text);
|
|
177
|
+
break;
|
|
178
|
+
}
|
|
179
|
+
return texts.filter((t) => t.trim().length > 0).map((content, index) => ({
|
|
180
|
+
id: this.chunkId(document.id, index),
|
|
181
|
+
documentId: document.id,
|
|
182
|
+
content: content.trim(),
|
|
183
|
+
index,
|
|
184
|
+
metadata: {
|
|
185
|
+
...document.metadata,
|
|
186
|
+
chunkIndex: index,
|
|
187
|
+
strategy: this.options.strategy,
|
|
188
|
+
charCount: content.length
|
|
189
|
+
}
|
|
190
|
+
}));
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Chunk multiple documents at once.
|
|
194
|
+
*/
|
|
195
|
+
chunkMany(documents) {
|
|
196
|
+
return documents.flatMap((doc) => this.chunk(doc));
|
|
197
|
+
}
|
|
198
|
+
// ─── Strategies ───────────────────────────────────────────────────────────
|
|
199
|
+
/** Split into fixed-size windows with overlap. Good for code and structured data. */
|
|
200
|
+
fixedChunk(text) {
|
|
201
|
+
const { size, overlap } = this.options;
|
|
202
|
+
const chunks = [];
|
|
203
|
+
let start = 0;
|
|
204
|
+
while (start < text.length) {
|
|
205
|
+
const end = Math.min(start + size, text.length);
|
|
206
|
+
chunks.push(text.slice(start, end));
|
|
207
|
+
start += size - overlap;
|
|
208
|
+
}
|
|
209
|
+
return chunks;
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Split by sentences, grouping them until size limit.
|
|
213
|
+
* Best for prose text — preserves natural reading units.
|
|
214
|
+
*/
|
|
215
|
+
sentenceChunk(text) {
|
|
216
|
+
const sentences = this.splitSentences(text);
|
|
217
|
+
return this.groupBySize(sentences);
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Split by paragraphs (double newline), grouping small ones.
|
|
221
|
+
* Best for documentation, articles, and manuals.
|
|
222
|
+
*/
|
|
223
|
+
paragraphChunk(text) {
|
|
224
|
+
const paragraphs = text.split(/\n{2,}/).map((p) => p.trim()).filter(Boolean);
|
|
225
|
+
return this.groupBySize(paragraphs);
|
|
226
|
+
}
|
|
227
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────
|
|
228
|
+
splitSentences(text) {
|
|
229
|
+
return text.split(/(?<=[.!?])\s+(?=[A-Z"'(])/).map((s) => s.trim()).filter(Boolean);
|
|
230
|
+
}
|
|
231
|
+
groupBySize(units) {
|
|
232
|
+
const { size, overlap } = this.options;
|
|
233
|
+
const chunks = [];
|
|
234
|
+
let current = "";
|
|
235
|
+
let overlapBuffer = "";
|
|
236
|
+
for (const unit of units) {
|
|
237
|
+
if (current.length + unit.length + 1 > size && current.length > 0) {
|
|
238
|
+
chunks.push(current);
|
|
239
|
+
current = overlapBuffer + (overlapBuffer ? " " : "") + unit;
|
|
240
|
+
overlapBuffer = "";
|
|
241
|
+
} else {
|
|
242
|
+
current += (current ? " " : "") + unit;
|
|
243
|
+
}
|
|
244
|
+
if (current.length > overlap) {
|
|
245
|
+
overlapBuffer = current.slice(-overlap);
|
|
246
|
+
} else {
|
|
247
|
+
overlapBuffer = current;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
if (current.trim()) chunks.push(current);
|
|
251
|
+
return chunks;
|
|
252
|
+
}
|
|
253
|
+
chunkId(documentId, index) {
|
|
254
|
+
return crypto2.createHash("md5").update(`${documentId}:${index}`).digest("hex");
|
|
255
|
+
}
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
// src/rag/retriever.ts
|
|
259
|
+
var OpenAIEmbeddings = class {
|
|
260
|
+
client;
|
|
261
|
+
model;
|
|
262
|
+
constructor(apiKey, model = "text-embedding-3-small") {
|
|
263
|
+
this.model = model;
|
|
264
|
+
try {
|
|
265
|
+
const { default: OpenAI } = __require("openai");
|
|
266
|
+
this.client = new OpenAI({ apiKey: apiKey ?? process.env.OPENAI_API_KEY });
|
|
267
|
+
} catch {
|
|
268
|
+
throw new Error('[SvaraJS] OpenAI embeddings require the "openai" package.');
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
async embed(texts) {
|
|
272
|
+
const client = this.client;
|
|
273
|
+
const BATCH_SIZE = 100;
|
|
274
|
+
const results = [];
|
|
275
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
276
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
277
|
+
const response = await client.embeddings.create({
|
|
278
|
+
model: this.model,
|
|
279
|
+
input: batch
|
|
280
|
+
});
|
|
281
|
+
results.push(...response.data.map((d) => d.embedding));
|
|
282
|
+
}
|
|
283
|
+
return results;
|
|
284
|
+
}
|
|
285
|
+
async embedOne(text) {
|
|
286
|
+
const [embedding] = await this.embed([text]);
|
|
287
|
+
return embedding;
|
|
288
|
+
}
|
|
289
|
+
};
|
|
290
|
+
var OllamaEmbeddings = class {
|
|
291
|
+
baseURL;
|
|
292
|
+
model;
|
|
293
|
+
constructor(model = "nomic-embed-text", baseURL = "http://localhost:11434") {
|
|
294
|
+
this.model = model;
|
|
295
|
+
this.baseURL = baseURL;
|
|
296
|
+
}
|
|
297
|
+
async embed(texts) {
|
|
298
|
+
return Promise.all(texts.map((t) => this.embedOne(t)));
|
|
299
|
+
}
|
|
300
|
+
async embedOne(text) {
|
|
301
|
+
const response = await fetch(`${this.baseURL}/api/embeddings`, {
|
|
302
|
+
method: "POST",
|
|
303
|
+
headers: { "Content-Type": "application/json" },
|
|
304
|
+
body: JSON.stringify({ model: this.model, prompt: text })
|
|
305
|
+
});
|
|
306
|
+
if (!response.ok) {
|
|
307
|
+
throw new Error(`[SvaraJS] Ollama embeddings failed: ${response.statusText}`);
|
|
308
|
+
}
|
|
309
|
+
const data = await response.json();
|
|
310
|
+
return data.embedding;
|
|
311
|
+
}
|
|
312
|
+
};
|
|
313
|
+
var InMemoryVectorStore = class {
|
|
314
|
+
entries = [];
|
|
315
|
+
add(chunk, embedding) {
|
|
316
|
+
const existing = this.entries.findIndex((e) => e.chunk.id === chunk.id);
|
|
317
|
+
if (existing >= 0) {
|
|
318
|
+
this.entries[existing] = { chunk, embedding };
|
|
319
|
+
} else {
|
|
320
|
+
this.entries.push({ chunk, embedding });
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
search(queryEmbedding, topK, threshold = 0) {
|
|
324
|
+
const scored = this.entries.map((entry) => ({
|
|
325
|
+
chunk: entry.chunk,
|
|
326
|
+
score: cosineSimilarity(queryEmbedding, entry.embedding)
|
|
327
|
+
}));
|
|
328
|
+
return scored.filter((s) => s.score >= threshold).sort((a, b) => b.score - a.score).slice(0, topK).map((s) => s.chunk);
|
|
329
|
+
}
|
|
330
|
+
get size() {
|
|
331
|
+
return this.entries.length;
|
|
332
|
+
}
|
|
333
|
+
};
|
|
334
|
+
var VectorRetriever = class {
|
|
335
|
+
embedder;
|
|
336
|
+
store;
|
|
337
|
+
loader;
|
|
338
|
+
chunker;
|
|
339
|
+
config;
|
|
340
|
+
constructor() {
|
|
341
|
+
this.store = new InMemoryVectorStore();
|
|
342
|
+
this.loader = new DocumentLoader();
|
|
343
|
+
this.chunker = new Chunker();
|
|
344
|
+
}
|
|
345
|
+
async init(config) {
|
|
346
|
+
this.config = config;
|
|
347
|
+
if (config.chunking) {
|
|
348
|
+
this.chunker = new Chunker({
|
|
349
|
+
strategy: config.chunking.strategy ?? "sentence",
|
|
350
|
+
size: config.chunking.size ? config.chunking.size * 4 : 2e3,
|
|
351
|
+
// rough token→char
|
|
352
|
+
overlap: config.chunking.overlap ? config.chunking.overlap * 4 : 200
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
const emb = config.embeddings ?? { provider: "openai" };
|
|
356
|
+
switch (emb.provider) {
|
|
357
|
+
case "openai":
|
|
358
|
+
this.embedder = new OpenAIEmbeddings(emb.apiKey, emb.model);
|
|
359
|
+
break;
|
|
360
|
+
case "ollama":
|
|
361
|
+
this.embedder = new OllamaEmbeddings(emb.model);
|
|
362
|
+
break;
|
|
363
|
+
default:
|
|
364
|
+
throw new Error(`[SvaraJS] Unknown embeddings provider: "${emb.provider}"`);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
async addDocuments(filePaths) {
|
|
368
|
+
const documents = await this.loader.loadMany(filePaths);
|
|
369
|
+
if (!documents.length) return;
|
|
370
|
+
const chunks = this.chunker.chunkMany(documents);
|
|
371
|
+
if (!chunks.length) return;
|
|
372
|
+
console.log(`[SvaraJS:RAG] Embedding ${chunks.length} chunk(s)...`);
|
|
373
|
+
const embeddings = await this.embedder.embed(chunks.map((c) => c.content));
|
|
374
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
375
|
+
this.store.add(chunks[i], embeddings[i]);
|
|
376
|
+
}
|
|
377
|
+
console.log(`[SvaraJS:RAG] Vector store now has ${this.store.size} chunk(s).`);
|
|
378
|
+
}
|
|
379
|
+
async retrieve(query, topK = 5) {
|
|
380
|
+
if (this.store.size === 0) return "";
|
|
381
|
+
const queryEmbedding = await this.embedder.embedOne(query);
|
|
382
|
+
const threshold = this.config.retrieval?.threshold ?? 0.3;
|
|
383
|
+
const chunks = this.store.search(queryEmbedding, topK, threshold);
|
|
384
|
+
if (!chunks.length) return "";
|
|
385
|
+
return chunks.map((chunk, i) => `[Context ${i + 1}]
|
|
386
|
+
Source: ${String(chunk.metadata.filename ?? chunk.documentId)}
|
|
387
|
+
${chunk.content}`).join("\n\n---\n\n");
|
|
388
|
+
}
|
|
389
|
+
async retrieveChunks(query, topK = 5) {
|
|
390
|
+
const queryEmbedding = await this.embedder.embedOne(query);
|
|
391
|
+
const threshold = this.config.retrieval?.threshold ?? 0.3;
|
|
392
|
+
const chunks = this.store.search(queryEmbedding, topK, threshold);
|
|
393
|
+
return {
|
|
394
|
+
chunks,
|
|
395
|
+
query,
|
|
396
|
+
totalFound: chunks.length
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
};
|
|
400
|
+
function cosineSimilarity(a, b) {
|
|
401
|
+
if (a.length !== b.length) return 0;
|
|
402
|
+
let dot = 0;
|
|
403
|
+
let normA = 0;
|
|
404
|
+
let normB = 0;
|
|
405
|
+
for (let i = 0; i < a.length; i++) {
|
|
406
|
+
dot += a[i] * b[i];
|
|
407
|
+
normA += a[i] * a[i];
|
|
408
|
+
normB += b[i] * b[i];
|
|
409
|
+
}
|
|
410
|
+
const denominator = Math.sqrt(normA) * Math.sqrt(normB);
|
|
411
|
+
return denominator === 0 ? 0 : dot / denominator;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
export {
|
|
415
|
+
DocumentLoader,
|
|
416
|
+
Chunker,
|
|
417
|
+
VectorRetriever
|
|
418
|
+
};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|