@toolpack-sdk/knowledge 1.2.0-SNAPSHOT.04032026-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +220 -0
  2. package/dist/embedders/ollama.d.ts +15 -0
  3. package/dist/embedders/ollama.d.ts.map +1 -0
  4. package/dist/embedders/ollama.js +51 -0
  5. package/dist/embedders/ollama.js.map +1 -0
  6. package/dist/embedders/openai.d.ts +18 -0
  7. package/dist/embedders/openai.d.ts.map +1 -0
  8. package/dist/embedders/openai.js +63 -0
  9. package/dist/embedders/openai.js.map +1 -0
  10. package/dist/errors.d.ts +25 -0
  11. package/dist/errors.d.ts.map +1 -0
  12. package/dist/errors.js +58 -0
  13. package/dist/errors.js.map +1 -0
  14. package/dist/index.d.ts +14 -0
  15. package/dist/index.d.ts.map +1 -0
  16. package/dist/index.js +31 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/interfaces.d.ts +48 -0
  19. package/dist/interfaces.d.ts.map +1 -0
  20. package/dist/interfaces.js +3 -0
  21. package/dist/interfaces.js.map +1 -0
  22. package/dist/knowledge.d.ts +74 -0
  23. package/dist/knowledge.d.ts.map +1 -0
  24. package/dist/knowledge.js +120 -0
  25. package/dist/knowledge.js.map +1 -0
  26. package/dist/providers/memory.d.ts +16 -0
  27. package/dist/providers/memory.d.ts.map +1 -0
  28. package/dist/providers/memory.js +72 -0
  29. package/dist/providers/memory.js.map +1 -0
  30. package/dist/providers/persistent.d.ts +23 -0
  31. package/dist/providers/persistent.d.ts.map +1 -0
  32. package/dist/providers/persistent.js +162 -0
  33. package/dist/providers/persistent.js.map +1 -0
  34. package/dist/sources/markdown.d.ts +20 -0
  35. package/dist/sources/markdown.d.ts.map +1 -0
  36. package/dist/sources/markdown.js +196 -0
  37. package/dist/sources/markdown.js.map +1 -0
  38. package/dist/utils/chunking.d.ts +6 -0
  39. package/dist/utils/chunking.d.ts.map +1 -0
  40. package/dist/utils/chunking.js +86 -0
  41. package/dist/utils/chunking.js.map +1 -0
  42. package/dist/utils/cosine.d.ts +4 -0
  43. package/dist/utils/cosine.d.ts.map +1 -0
  44. package/dist/utils/cosine.js +52 -0
  45. package/dist/utils/cosine.js.map +1 -0
  46. package/package.json +39 -0
@@ -0,0 +1,196 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.MarkdownSource = void 0;
37
+ const fs = __importStar(require("fs/promises"));
38
+ const path = __importStar(require("path"));
39
+ const crypto = __importStar(require("crypto"));
40
+ const fast_glob_1 = require("fast-glob");
41
+ const errors_js_1 = require("../errors.js");
42
+ const chunking_js_1 = require("../utils/chunking.js");
43
+ class MarkdownSource {
44
+ pattern;
45
+ options;
46
+ constructor(pattern, options = {}) {
47
+ this.pattern = pattern;
48
+ this.options = {
49
+ maxChunkSize: options.maxChunkSize ?? 2000,
50
+ chunkOverlap: options.chunkOverlap ?? 200,
51
+ minChunkSize: options.minChunkSize ?? 100,
52
+ namespace: options.namespace ?? 'markdown',
53
+ metadata: options.metadata ?? {},
54
+ };
55
+ }
56
+ async *load() {
57
+ const files = await (0, fast_glob_1.glob)(this.pattern, { absolute: true });
58
+ for (const file of files) {
59
+ try {
60
+ const content = await fs.readFile(file, 'utf-8');
61
+ const chunks = this.chunkMarkdown(content, file);
62
+ for (const chunk of chunks) {
63
+ yield chunk;
64
+ }
65
+ }
66
+ catch (error) {
67
+ throw new errors_js_1.IngestionError(`Failed to process file: ${error.message}`, file);
68
+ }
69
+ }
70
+ }
71
+ chunkMarkdown(content, filePath) {
72
+ const frontmatter = this.extractFrontmatter(content);
73
+ const contentWithoutFrontmatter = this.removeFrontmatter(content);
74
+ const sections = this.parseHeadings(contentWithoutFrontmatter);
75
+ const chunks = [];
76
+ let chunkIndex = 0;
77
+ for (const section of sections) {
78
+ const hasCode = /```[\s\S]*?```/.test(section.content);
79
+ const tokens = (0, chunking_js_1.estimateTokens)(section.content);
80
+ if (tokens < this.options.minChunkSize && chunks.length > 0) {
81
+ const lastChunk = chunks[chunks.length - 1];
82
+ lastChunk.content += '\n\n' + section.content;
83
+ if (hasCode) {
84
+ lastChunk.metadata.hasCode = true;
85
+ }
86
+ continue;
87
+ }
88
+ let sectionChunks;
89
+ if (tokens > this.options.maxChunkSize) {
90
+ sectionChunks = (0, chunking_js_1.splitLargeChunk)(section.content, this.options.maxChunkSize);
91
+ }
92
+ else {
93
+ sectionChunks = [section.content];
94
+ }
95
+ if (this.options.chunkOverlap > 0 && sectionChunks.length > 1) {
96
+ sectionChunks = (0, chunking_js_1.applyOverlap)(sectionChunks, this.options.chunkOverlap);
97
+ }
98
+ for (let i = 0; i < sectionChunks.length; i++) {
99
+ const chunkContent = sectionChunks[i];
100
+ const chunkId = this.generateChunkId(filePath, chunkContent, chunkIndex);
101
+ chunks.push({
102
+ id: chunkId,
103
+ content: chunkContent,
104
+ metadata: {
105
+ ...this.options.metadata,
106
+ ...frontmatter,
107
+ heading: section.heading,
108
+ hasCode,
109
+ source: path.basename(filePath),
110
+ sourcePath: filePath,
111
+ chunkIndex,
112
+ totalChunks: sectionChunks.length,
113
+ },
114
+ });
115
+ chunkIndex++;
116
+ }
117
+ }
118
+ return chunks;
119
+ }
120
+ parseHeadings(content) {
121
+ const lines = content.split('\n');
122
+ const sections = [];
123
+ const headingStack = [];
124
+ let currentContent = [];
125
+ for (const line of lines) {
126
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
127
+ if (headingMatch) {
128
+ if (currentContent.length > 0) {
129
+ const headingPath = headingStack.map(h => h.text);
130
+ sections.push({
131
+ heading: headingPath.length > 0 ? [...headingPath] : [''],
132
+ content: currentContent.join('\n').trim(),
133
+ level: headingStack.length > 0 ? headingStack[headingStack.length - 1].level : 0,
134
+ });
135
+ currentContent = [];
136
+ }
137
+ const level = headingMatch[1].length;
138
+ const text = headingMatch[2].trim();
139
+ while (headingStack.length > 0 && headingStack[headingStack.length - 1].level >= level) {
140
+ headingStack.pop();
141
+ }
142
+ headingStack.push({ level, text });
143
+ currentContent.push(line);
144
+ }
145
+ else {
146
+ currentContent.push(line);
147
+ }
148
+ }
149
+ if (currentContent.length > 0) {
150
+ const headingPath = headingStack.map(h => h.text);
151
+ sections.push({
152
+ heading: headingPath.length > 0 ? [...headingPath] : [''],
153
+ content: currentContent.join('\n').trim(),
154
+ level: headingStack.length > 0 ? headingStack[headingStack.length - 1].level : 0,
155
+ });
156
+ }
157
+ return sections.filter(s => s.content.length > 0);
158
+ }
159
+ extractFrontmatter(content) {
160
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
161
+ if (!frontmatterMatch) {
162
+ return {};
163
+ }
164
+ const frontmatterText = frontmatterMatch[1];
165
+ const frontmatter = {};
166
+ const lines = frontmatterText.split('\n');
167
+ for (const line of lines) {
168
+ const match = line.match(/^(\w+):\s*(.+)$/);
169
+ if (match) {
170
+ const key = match[1];
171
+ let value = match[2].trim();
172
+ if (value === 'true')
173
+ value = true;
174
+ else if (value === 'false')
175
+ value = false;
176
+ else if (!isNaN(Number(value)))
177
+ value = Number(value);
178
+ else if (typeof value === 'string' && value.startsWith('[') && value.endsWith(']')) {
179
+ value = value.slice(1, -1).split(',').map((v) => v.trim());
180
+ }
181
+ frontmatter[key] = value;
182
+ }
183
+ }
184
+ return frontmatter;
185
+ }
186
+ removeFrontmatter(content) {
187
+ return content.replace(/^---\n[\s\S]*?\n---\n/, '');
188
+ }
189
+ generateChunkId(filePath, content, index) {
190
+ const hash = crypto.createHash('md5').update(content).digest('hex').substring(0, 8);
191
+ const filename = path.basename(filePath, path.extname(filePath));
192
+ return `${this.options.namespace}:${filename}:${index}:${hash}`;
193
+ }
194
+ }
195
+ exports.MarkdownSource = MarkdownSource;
196
+ //# sourceMappingURL=markdown.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/sources/markdown.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAC7B,+CAAiC;AACjC,yCAAiC;AAEjC,4CAA8C;AAC9C,sDAAqF;AAgBrF,MAAa,cAAc;IAIf;IAHF,OAAO,CAAkC;IAEjD,YACU,OAAe,EACvB,UAAiC,EAAE;QAD3B,YAAO,GAAP,OAAO,CAAQ;QAGvB,IAAI,CAAC,OAAO,GAAG;YACb,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,IAAI;YAC1C,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,GAAG;YACzC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,GAAG;YACzC,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,UAAU;YAC1C,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,CAAC,IAAI;QACT,MAAM,KAAK,GAAG,MAAM,IAAA,gBAAI,EAAC,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAE3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBACjD,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;gBAEjD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,0BAAc,CAAC,2BAA4B,KAAe,CAAC,OAAO,EAAE,EAAE,IAAI,CAAC,CAAC;YACxF,CAAC;QACH,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,OAAe,EAAE,QAAgB;QACrD,MAAM,WAAW,GAAG,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,yBAAyB,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,yBAAyB,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACvD,MAAM,MAAM,GAAG,IAAA,4BAAc,EAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAE/C,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC5C,SAAS,CAAC,OAAO,IAAI,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,SAAS,CAAC,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;gBACpC,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IAAI,aAAuB,CAAC;YAC5B,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;gBACvC,aAAa,GAAG,IAAA,6BAAe,EAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC9E,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;YAED,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9D,aAAa,GAAG,IAAA,0BAAY,EAAC,aAAa,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACzE,CAAC;YAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,MAAM,YAAY,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;gBACtC,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC;gBAEzE,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,OAAO;oBACX,OAAO,EAAE,YAAY;oBACrB,QAAQ,EAAE;wBACR,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ;wBACxB,GAAG,WAAW;wBACd,OAAO,EAAE,OAAO,CAAC,OAAO;wBACxB,OAAO;wBACP,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC/B,UAAU,EAAE,QAAQ;wBACpB,UAAU;wBACV,WAAW,EAAE,aAAa,CAAC,MAAM;qBAClC;iBACF,CAAC,CAAC;gBAEH,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,aAAa,CAAC,OAAe;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,QAAQ,GAAc,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAsC,EAAE,CAAC;QAC3D,IAAI,cAAc,GAAa,EAAE,CAAC;QAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YAErD,IAAI,YAAY,EAAE,CAAC;gBACjB,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9B,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBAClD,QAAQ,CAAC,IAAI,CAAC;wBACZ,OAAO,EAAE,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBACzD,OAAO,EAAE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;wBACzC,KAAK,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;qBACjF,CAAC,CAAC;oBACH,cAAc,GAAG,EAAE,CAAC;gBACtB,CAAC;gBAED,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;gBACrC,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAEpC,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;oBACvF,YAAY,CAAC,GAAG,EAAE,CAAC;gBACrB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;gBACnC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,CAAC;iBAAM,CAAC;gBACN,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAClD,QAAQ,CAAC,IAAI,CAAC;gBACZ,OAAO,EAAE,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACzD,OAAO,EAAE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACzC,KAAK,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aACjF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpD,CAAC;IAEO,kBAAkB,CAAC,OAAe;QACxC,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QAChE,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,eAAe,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,WAAW,GAA4B,EAAE,CAAC;QAEhD,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YAC5C,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,IAAI,KAAK,GAAY,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAErC,IAAI,KAAK,KAAK,MAAM;oBAAE,KAAK,GAAG,IAAI,CAAC;qBAC9B,IAAI,KAAK,KAAK,OAAO;oBAAE,KAAK,GAAG,KAAK,CAAC;qBACrC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBAAE,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;qBACjD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnF,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACrE,CAAC;gBAED,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,iBAAiB,CAAC,OAAe;QACvC,OAAO,OAAO,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CAAC;IACtD,CAAC;IAEO,eAAe,CAAC,QAAgB,EAAE,OAAe,EAAE,KAAa;QACtE,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QACjE,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;IAClE,CAAC;CACF;AAhLD,wCAgLC"}
@@ -0,0 +1,6 @@
1
+ export declare function estimateTokens(text: string): number;
2
+ export declare function splitByParagraphs(text: string, maxTokens: number): string[];
3
+ export declare function splitBySentences(text: string, maxTokens: number): string[];
4
+ export declare function applyOverlap(chunks: string[], overlapTokens: number): string[];
5
+ export declare function splitLargeChunk(text: string, maxTokens: number): string[];
6
+ //# sourceMappingURL=chunking.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.d.ts","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAAA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAsB3E;AAED,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAsB1E;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,aAAa,EAAE,MAAM,GAAG,MAAM,EAAE,CAsB9E;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAmBzE"}
@@ -0,0 +1,86 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.estimateTokens = estimateTokens;
4
+ exports.splitByParagraphs = splitByParagraphs;
5
+ exports.splitBySentences = splitBySentences;
6
+ exports.applyOverlap = applyOverlap;
7
+ exports.splitLargeChunk = splitLargeChunk;
8
+ function estimateTokens(text) {
9
+ return Math.ceil(text.length / 4);
10
+ }
11
+ function splitByParagraphs(text, maxTokens) {
12
+ const paragraphs = text.split(/\n\n+/);
13
+ const chunks = [];
14
+ let currentChunk = '';
15
+ for (const paragraph of paragraphs) {
16
+ const paragraphTokens = estimateTokens(paragraph);
17
+ const currentTokens = estimateTokens(currentChunk);
18
+ if (currentTokens + paragraphTokens > maxTokens && currentChunk) {
19
+ chunks.push(currentChunk.trim());
20
+ currentChunk = paragraph;
21
+ }
22
+ else {
23
+ currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
24
+ }
25
+ }
26
+ if (currentChunk) {
27
+ chunks.push(currentChunk.trim());
28
+ }
29
+ return chunks;
30
+ }
31
+ function splitBySentences(text, maxTokens) {
32
+ const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
33
+ const chunks = [];
34
+ let currentChunk = '';
35
+ for (const sentence of sentences) {
36
+ const sentenceTokens = estimateTokens(sentence);
37
+ const currentTokens = estimateTokens(currentChunk);
38
+ if (currentTokens + sentenceTokens > maxTokens && currentChunk) {
39
+ chunks.push(currentChunk.trim());
40
+ currentChunk = sentence;
41
+ }
42
+ else {
43
+ currentChunk += (currentChunk ? ' ' : '') + sentence;
44
+ }
45
+ }
46
+ if (currentChunk) {
47
+ chunks.push(currentChunk.trim());
48
+ }
49
+ return chunks;
50
+ }
51
+ function applyOverlap(chunks, overlapTokens) {
52
+ if (chunks.length <= 1 || overlapTokens === 0) {
53
+ return chunks;
54
+ }
55
+ const overlappedChunks = [];
56
+ for (let i = 0; i < chunks.length; i++) {
57
+ let chunk = chunks[i];
58
+ if (i > 0) {
59
+ const prevChunk = chunks[i - 1];
60
+ const words = prevChunk.split(/\s+/);
61
+ const overlapWords = Math.ceil(overlapTokens / 4);
62
+ const overlap = words.slice(-overlapWords).join(' ');
63
+ chunk = overlap + ' ' + chunk;
64
+ }
65
+ overlappedChunks.push(chunk);
66
+ }
67
+ return overlappedChunks;
68
+ }
69
+ function splitLargeChunk(text, maxTokens) {
70
+ const tokens = estimateTokens(text);
71
+ if (tokens <= maxTokens) {
72
+ return [text];
73
+ }
74
+ const paragraphChunks = splitByParagraphs(text, maxTokens);
75
+ const finalChunks = [];
76
+ for (const chunk of paragraphChunks) {
77
+ if (estimateTokens(chunk) > maxTokens) {
78
+ finalChunks.push(...splitBySentences(chunk, maxTokens));
79
+ }
80
+ else {
81
+ finalChunks.push(chunk);
82
+ }
83
+ }
84
+ return finalChunks;
85
+ }
86
+ //# sourceMappingURL=chunking.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":";;AAAA,wCAEC;AAED,8CAsBC;AAED,4CAsBC;AAED,oCAsBC;AAED,0CAmBC;AA/FD,SAAgB,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,SAAgB,iBAAiB,CAAC,IAAY,EAAE,SAAiB;IAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,eAAe,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAClD,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAEnD,IAAI,aAAa,GAAG,eAAe,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;YAChE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACjC,YAAY,GAAG,SAAS,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC;QAC3D,CAAC;IACH,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAgB,gBAAgB,CAAC,IAAY,EAAE,SAAiB;IAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,cAAc,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAEnD,IAAI,aAAa,GAAG,cAAc,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;YAC/D,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACjC,YAAY,GAAG,QAAQ,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC;QACvD,CAAC;IACH,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAgB,YAAY,CAAC,MAAgB,EAAE,aAAqB;IAClE,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,aAAa,KAAK,CAAC,EAAE,CAAC;QAC9C,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,gBAAgB,GAAa,EAAE,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,IAAI,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAEtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACV,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;YAClD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrD,KAAK,GAAG,OAAO,GAAG,GAAG,GAAG,KAAK,CAAC;QAChC,CAAC;QAED,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,SAAgB,eAAe,CAAC,IAAY,EAAE,SAAiB;IAC7D,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAEpC,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,MAAM,eAAe,GAAG,iBAAiB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IAE3D,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;QACpC,IAAI,cAAc,CAAC,KAAK,CAAC,GAAG,SAAS,EAAE,CAAC;YACtC,WAAW,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;QAC1D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC"}
@@ -0,0 +1,4 @@
1
+ import { MetadataFilter } from '../interfaces.js';
2
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
3
+ export declare function matchesFilter(metadata: Record<string, unknown>, filter?: MetadataFilter): boolean;
4
+ //# sourceMappingURL=cosine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine.d.ts","sourceRoot":"","sources":["../../src/utils/cosine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAsBjE;AAED,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAuBjG"}
@@ -0,0 +1,52 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.cosineSimilarity = cosineSimilarity;
4
+ exports.matchesFilter = matchesFilter;
5
+ function cosineSimilarity(a, b) {
6
+ if (a.length !== b.length) {
7
+ throw new Error('Vectors must have same dimensions');
8
+ }
9
+ let dotProduct = 0;
10
+ let normA = 0;
11
+ let normB = 0;
12
+ for (let i = 0; i < a.length; i++) {
13
+ dotProduct += a[i] * b[i];
14
+ normA += a[i] * a[i];
15
+ normB += b[i] * b[i];
16
+ }
17
+ const denominator = Math.sqrt(normA) * Math.sqrt(normB);
18
+ if (denominator === 0) {
19
+ return 0;
20
+ }
21
+ return dotProduct / denominator;
22
+ }
23
+ function matchesFilter(metadata, filter) {
24
+ if (!filter)
25
+ return true;
26
+ for (const [key, value] of Object.entries(filter)) {
27
+ const metaValue = metadata[key];
28
+ if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
29
+ if ('$in' in value) {
30
+ const inArray = value.$in;
31
+ if (!inArray.includes(metaValue))
32
+ return false;
33
+ }
34
+ else if ('$gt' in value) {
35
+ const gtValue = value.$gt;
36
+ if (typeof metaValue !== 'number' || metaValue <= gtValue)
37
+ return false;
38
+ }
39
+ else if ('$lt' in value) {
40
+ const ltValue = value.$lt;
41
+ if (typeof metaValue !== 'number' || metaValue >= ltValue)
42
+ return false;
43
+ }
44
+ }
45
+ else {
46
+ if (metaValue !== value)
47
+ return false;
48
+ }
49
+ }
50
+ return true;
51
+ }
52
+ //# sourceMappingURL=cosine.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cosine.js","sourceRoot":"","sources":["../../src/utils/cosine.ts"],"names":[],"mappings":";;AAEA,4CAsBC;AAED,sCAuBC;AA/CD,SAAgB,gBAAgB,CAAC,CAAW,EAAE,CAAW;IACvD,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,UAAU,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1B,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAExD,IAAI,WAAW,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,CAAC,CAAC;IACX,CAAC;IAED,OAAO,UAAU,GAAG,WAAW,CAAC;AAClC,CAAC;AAED,SAAgB,aAAa,CAAC,QAAiC,EAAE,MAAuB;IACtF,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAClD,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;QAEhC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;YACzE,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;gBACnB,MAAM,OAAO,GAAI,KAA4B,CAAC,GAAG,CAAC;gBAClD,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAAE,OAAO,KAAK,CAAC;YACjD,CAAC;iBAAM,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAI,KAAyB,CAAC,GAAG,CAAC;gBAC/C,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,IAAI,OAAO;oBAAE,OAAO,KAAK,CAAC;YAC1E,CAAC;iBAAM,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAI,KAAyB,CAAC,GAAG,CAAC;gBAC/C,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,IAAI,OAAO;oBAAE,OAAO,KAAK,CAAC;YAC1E,CAAC;QACH,CAAC;aAAM,CAAC;YACN,IAAI,SAAS,KAAK,KAAK;gBAAE,OAAO,KAAK,CAAC;QACxC,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC"}
package/package.json ADDED
@@ -0,0 +1,39 @@
1
+ {
2
+ "name": "@toolpack-sdk/knowledge",
3
+ "version": "1.2.0-SNAPSHOT.04032026-2",
4
+ "description": "RAG (Retrieval-Augmented Generation) package for Toolpack SDK",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "files": [
8
+ "dist",
9
+ "README.md"
10
+ ],
11
+ "scripts": {
12
+ "build": "rimraf dist && tsc",
13
+ "build:dev": "rimraf dist && tsc",
14
+ "test": "vitest run",
15
+ "test:watch": "vitest"
16
+ },
17
+ "keywords": [
18
+ "rag",
19
+ "retrieval",
20
+ "embeddings",
21
+ "vector-search",
22
+ "knowledge-base",
23
+ "ai"
24
+ ],
25
+ "author": "Sajeer (https://sajeerzeji.com)",
26
+ "license": "Apache-2.0",
27
+ "dependencies": {
28
+ "better-sqlite3": "^12.6.2",
29
+ "fast-glob": "^3.3.3",
30
+ "openai": "^6.18.0"
31
+ },
32
+ "devDependencies": {
33
+ "@types/better-sqlite3": "^7.6.13",
34
+ "@types/node": "^25.3.2",
35
+ "typescript": "^5.9.3",
36
+ "vitest": "^4.0.18",
37
+ "rimraf": "^5.0.0"
38
+ }
39
+ }