toolpack-sdk 1.0.0 → 1.1.0-SNAPSHOT

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/README.md +115 -4
  2. package/dist/client/index.d.ts +1 -0
  3. package/dist/client/index.d.ts.map +1 -1
  4. package/dist/client/index.js +82 -79
  5. package/dist/client/index.js.map +1 -1
  6. package/dist/index.d.ts +1 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +1 -0
  9. package/dist/index.js.map +1 -1
  10. package/dist/knowledge/embedders/gemini-embedder.d.ts +21 -0
  11. package/dist/knowledge/embedders/gemini-embedder.d.ts.map +1 -0
  12. package/dist/knowledge/embedders/gemini-embedder.js +93 -0
  13. package/dist/knowledge/embedders/gemini-embedder.js.map +1 -0
  14. package/dist/knowledge/embedders/ollama-embedder.d.ts +12 -0
  15. package/dist/knowledge/embedders/ollama-embedder.d.ts.map +1 -0
  16. package/dist/knowledge/embedders/ollama-embedder.js +68 -0
  17. package/dist/knowledge/embedders/ollama-embedder.js.map +1 -0
  18. package/dist/knowledge/embedders/openai-embedder.d.ts +14 -0
  19. package/dist/knowledge/embedders/openai-embedder.d.ts.map +1 -0
  20. package/dist/knowledge/embedders/openai-embedder.js +94 -0
  21. package/dist/knowledge/embedders/openai-embedder.js.map +1 -0
  22. package/dist/knowledge/errors.d.ts +22 -0
  23. package/dist/knowledge/errors.d.ts.map +1 -0
  24. package/dist/knowledge/errors.js +51 -0
  25. package/dist/knowledge/errors.js.map +1 -0
  26. package/dist/knowledge/index.d.ts +12 -0
  27. package/dist/knowledge/index.d.ts.map +1 -0
  28. package/dist/knowledge/index.js +26 -0
  29. package/dist/knowledge/index.js.map +1 -0
  30. package/dist/knowledge/knowledge.d.ts +38 -0
  31. package/dist/knowledge/knowledge.d.ts.map +1 -0
  32. package/dist/knowledge/knowledge.js +287 -0
  33. package/dist/knowledge/knowledge.js.map +1 -0
  34. package/dist/knowledge/providers/memory-provider.d.ts +15 -0
  35. package/dist/knowledge/providers/memory-provider.d.ts.map +1 -0
  36. package/dist/knowledge/providers/memory-provider.js +113 -0
  37. package/dist/knowledge/providers/memory-provider.js.map +1 -0
  38. package/dist/knowledge/sources/json-source.d.ts +18 -0
  39. package/dist/knowledge/sources/json-source.d.ts.map +1 -0
  40. package/dist/knowledge/sources/json-source.js +224 -0
  41. package/dist/knowledge/sources/json-source.js.map +1 -0
  42. package/dist/knowledge/sources/markdown-source.d.ts +27 -0
  43. package/dist/knowledge/sources/markdown-source.d.ts.map +1 -0
  44. package/dist/knowledge/sources/markdown-source.js +410 -0
  45. package/dist/knowledge/sources/markdown-source.js.map +1 -0
  46. package/dist/knowledge/sources/sqlite-text-source.d.ts +18 -0
  47. package/dist/knowledge/sources/sqlite-text-source.d.ts.map +1 -0
  48. package/dist/knowledge/sources/sqlite-text-source.js +201 -0
  49. package/dist/knowledge/sources/sqlite-text-source.js.map +1 -0
  50. package/dist/knowledge/types.d.ts +130 -0
  51. package/dist/knowledge/types.d.ts.map +1 -0
  52. package/dist/knowledge/types.js +3 -0
  53. package/dist/knowledge/types.js.map +1 -0
  54. package/dist/mcp/client.js +1 -1
  55. package/dist/mcp/client.js.map +1 -1
  56. package/dist/providers/anthropic/index.js +13 -13
  57. package/dist/providers/anthropic/index.js.map +1 -1
  58. package/dist/providers/config.d.ts +0 -2
  59. package/dist/providers/config.d.ts.map +1 -1
  60. package/dist/providers/config.js.map +1 -1
  61. package/dist/providers/gemini/index.js +10 -10
  62. package/dist/providers/gemini/index.js.map +1 -1
  63. package/dist/providers/ollama/adapter.d.ts.map +1 -1
  64. package/dist/providers/ollama/adapter.js +14 -18
  65. package/dist/providers/ollama/adapter.js.map +1 -1
  66. package/dist/providers/ollama/slm-healer.js +7 -7
  67. package/dist/providers/ollama/slm-healer.js.map +1 -1
  68. package/dist/providers/openai/index.d.ts.map +1 -1
  69. package/dist/providers/openai/index.js +15 -21
  70. package/dist/providers/openai/index.js.map +1 -1
  71. package/dist/providers/provider-logger.d.ts +17 -9
  72. package/dist/providers/provider-logger.d.ts.map +1 -1
  73. package/dist/providers/provider-logger.js +68 -26
  74. package/dist/providers/provider-logger.js.map +1 -1
  75. package/dist/toolpack.d.ts +12 -0
  76. package/dist/toolpack.d.ts.map +1 -1
  77. package/dist/toolpack.js +36 -34
  78. package/dist/toolpack.js.map +1 -1
  79. package/dist/tools/cloud-tools/tools/deploy/index.d.ts.map +1 -1
  80. package/dist/tools/cloud-tools/tools/deploy/index.js +2 -0
  81. package/dist/tools/cloud-tools/tools/deploy/index.js.map +1 -1
  82. package/dist/tools/cloud-tools/tools/list/index.d.ts.map +1 -1
  83. package/dist/tools/cloud-tools/tools/list/index.js +2 -0
  84. package/dist/tools/cloud-tools/tools/list/index.js.map +1 -1
  85. package/dist/tools/coding-tools/tools/find-references/index.d.ts.map +1 -1
  86. package/dist/tools/coding-tools/tools/find-references/index.js +2 -0
  87. package/dist/tools/coding-tools/tools/find-references/index.js.map +1 -1
  88. package/dist/tools/coding-tools/tools/find-symbol/index.d.ts.map +1 -1
  89. package/dist/tools/coding-tools/tools/find-symbol/index.js +2 -0
  90. package/dist/tools/coding-tools/tools/find-symbol/index.js.map +1 -1
  91. package/dist/tools/coding-tools/tools/get-exports/index.d.ts.map +1 -1
  92. package/dist/tools/coding-tools/tools/get-exports/index.js +2 -0
  93. package/dist/tools/coding-tools/tools/get-exports/index.js.map +1 -1
  94. package/dist/tools/coding-tools/tools/get-imports/index.d.ts.map +1 -1
  95. package/dist/tools/coding-tools/tools/get-imports/index.js +2 -0
  96. package/dist/tools/coding-tools/tools/get-imports/index.js.map +1 -1
  97. package/dist/tools/coding-tools/tools/get-outline/index.d.ts.map +1 -1
  98. package/dist/tools/coding-tools/tools/get-outline/index.js +2 -0
  99. package/dist/tools/coding-tools/tools/get-outline/index.js.map +1 -1
  100. package/dist/tools/coding-tools/tools/get-symbols/index.d.ts.map +1 -1
  101. package/dist/tools/coding-tools/tools/get-symbols/index.js +2 -0
  102. package/dist/tools/coding-tools/tools/get-symbols/index.js.map +1 -1
  103. package/dist/tools/config-loader.d.ts +13 -0
  104. package/dist/tools/config-loader.d.ts.map +1 -1
  105. package/dist/tools/config-loader.js +20 -8
  106. package/dist/tools/config-loader.js.map +1 -1
  107. package/dist/tools/create-tool-project.d.ts.map +1 -1
  108. package/dist/tools/create-tool-project.js +2 -1
  109. package/dist/tools/create-tool-project.js.map +1 -1
  110. package/dist/tools/db-tools/tools/query/index.d.ts.map +1 -1
  111. package/dist/tools/db-tools/tools/query/index.js +2 -0
  112. package/dist/tools/db-tools/tools/query/index.js.map +1 -1
  113. package/dist/tools/diff-tools/tools/apply/index.d.ts.map +1 -1
  114. package/dist/tools/diff-tools/tools/apply/index.js +2 -0
  115. package/dist/tools/diff-tools/tools/apply/index.js.map +1 -1
  116. package/dist/tools/diff-tools/tools/create/index.d.ts.map +1 -1
  117. package/dist/tools/diff-tools/tools/create/index.js +2 -0
  118. package/dist/tools/diff-tools/tools/create/index.js.map +1 -1
  119. package/dist/tools/exec-tools/tools/run/index.d.ts.map +1 -1
  120. package/dist/tools/exec-tools/tools/run/index.js +2 -0
  121. package/dist/tools/exec-tools/tools/run/index.js.map +1 -1
  122. package/dist/tools/exec-tools/tools/run-background/index.d.ts.map +1 -1
  123. package/dist/tools/exec-tools/tools/run-background/index.js +5 -0
  124. package/dist/tools/exec-tools/tools/run-background/index.js.map +1 -1
  125. package/dist/tools/exec-tools/tools/run-shell/index.d.ts.map +1 -1
  126. package/dist/tools/exec-tools/tools/run-shell/index.js +2 -0
  127. package/dist/tools/exec-tools/tools/run-shell/index.js.map +1 -1
  128. package/dist/tools/fs-tools/tools/delete-file/index.d.ts.map +1 -1
  129. package/dist/tools/fs-tools/tools/delete-file/index.js +2 -0
  130. package/dist/tools/fs-tools/tools/delete-file/index.js.map +1 -1
  131. package/dist/tools/fs-tools/tools/read-file/index.d.ts.map +1 -1
  132. package/dist/tools/fs-tools/tools/read-file/index.js +2 -0
  133. package/dist/tools/fs-tools/tools/read-file/index.js.map +1 -1
  134. package/dist/tools/fs-tools/tools/search/index.d.ts.map +1 -1
  135. package/dist/tools/fs-tools/tools/search/index.js +2 -0
  136. package/dist/tools/fs-tools/tools/search/index.js.map +1 -1
  137. package/dist/tools/fs-tools/tools/write-file/index.d.ts.map +1 -1
  138. package/dist/tools/fs-tools/tools/write-file/index.js +2 -0
  139. package/dist/tools/fs-tools/tools/write-file/index.js.map +1 -1
  140. package/dist/tools/http-tools/tools/delete/index.d.ts.map +1 -1
  141. package/dist/tools/http-tools/tools/delete/index.js +2 -0
  142. package/dist/tools/http-tools/tools/delete/index.js.map +1 -1
  143. package/dist/tools/http-tools/tools/download/index.d.ts.map +1 -1
  144. package/dist/tools/http-tools/tools/download/index.js +2 -0
  145. package/dist/tools/http-tools/tools/download/index.js.map +1 -1
  146. package/dist/tools/http-tools/tools/get/index.d.ts.map +1 -1
  147. package/dist/tools/http-tools/tools/get/index.js +2 -0
  148. package/dist/tools/http-tools/tools/get/index.js.map +1 -1
  149. package/dist/tools/http-tools/tools/post/index.d.ts.map +1 -1
  150. package/dist/tools/http-tools/tools/post/index.js +2 -0
  151. package/dist/tools/http-tools/tools/post/index.js.map +1 -1
  152. package/dist/tools/http-tools/tools/put/index.d.ts.map +1 -1
  153. package/dist/tools/http-tools/tools/put/index.js +2 -0
  154. package/dist/tools/http-tools/tools/put/index.js.map +1 -1
  155. package/dist/tools/index.d.ts +1 -1
  156. package/dist/tools/index.d.ts.map +1 -1
  157. package/dist/tools/index.js +3 -2
  158. package/dist/tools/index.js.map +1 -1
  159. package/dist/tools/system-tools/tools/cwd/index.d.ts.map +1 -1
  160. package/dist/tools/system-tools/tools/cwd/index.js +2 -0
  161. package/dist/tools/system-tools/tools/cwd/index.js.map +1 -1
  162. package/dist/tools/system-tools/tools/env/index.d.ts.map +1 -1
  163. package/dist/tools/system-tools/tools/env/index.js +2 -0
  164. package/dist/tools/system-tools/tools/env/index.js.map +1 -1
  165. package/dist/tools/system-tools/tools/info/index.d.ts.map +1 -1
  166. package/dist/tools/system-tools/tools/info/index.js +2 -0
  167. package/dist/tools/system-tools/tools/info/index.js.map +1 -1
  168. package/dist/tools/types.d.ts +1 -0
  169. package/dist/tools/types.d.ts.map +1 -1
  170. package/dist/tools/types.js +1 -0
  171. package/dist/tools/types.js.map +1 -1
  172. package/dist/tools/web-tools/tools/fetch/index.d.ts.map +1 -1
  173. package/dist/tools/web-tools/tools/fetch/index.js +2 -0
  174. package/dist/tools/web-tools/tools/fetch/index.js.map +1 -1
  175. package/dist/tools/web-tools/tools/scrape/index.d.ts.map +1 -1
  176. package/dist/tools/web-tools/tools/scrape/index.js +2 -0
  177. package/dist/tools/web-tools/tools/scrape/index.js.map +1 -1
  178. package/dist/tools/web-tools/tools/search/index.d.ts.map +1 -1
  179. package/dist/tools/web-tools/tools/search/index.js +44 -7
  180. package/dist/tools/web-tools/tools/search/index.js.map +1 -1
  181. package/dist/tools/web-tools/tools/search/schema.js +1 -1
  182. package/dist/tools/web-tools/tools/search/schema.js.map +1 -1
  183. package/dist/workflows/planning/planner.d.ts.map +1 -1
  184. package/dist/workflows/planning/planner.js +16 -2
  185. package/dist/workflows/planning/planner.js.map +1 -1
  186. package/dist/workflows/steps/step-executor.d.ts.map +1 -1
  187. package/dist/workflows/steps/step-executor.js +17 -5
  188. package/dist/workflows/steps/step-executor.js.map +1 -1
  189. package/dist/workflows/workflow-executor.d.ts.map +1 -1
  190. package/dist/workflows/workflow-executor.js +32 -2
  191. package/dist/workflows/workflow-executor.js.map +1 -1
  192. package/package.json +4 -4
@@ -0,0 +1,224 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.JSONSource = void 0;
37
+ const fs = __importStar(require("fs"));
38
+ const path = __importStar(require("path"));
39
+ const crypto = __importStar(require("crypto"));
40
+ const errors_js_1 = require("../errors.js");
41
+ class JSONSource {
42
+ filePath;
43
+ options;
44
+ watcher = null;
45
+ lastHash = null;
46
+ constructor(filePath, options = {}) {
47
+ this.filePath = path.resolve(filePath);
48
+ this.options = options;
49
+ }
50
+ async *load() {
51
+ try {
52
+ const content = await fs.promises.readFile(this.filePath, 'utf-8');
53
+ this.lastHash = this.hashContent(content);
54
+ const data = JSON.parse(content);
55
+ const chunks = this.extractChunks(data);
56
+ for (const chunk of chunks) {
57
+ yield chunk;
58
+ }
59
+ }
60
+ catch (error) {
61
+ throw new errors_js_1.IngestionError(`Failed to parse JSON file: ${error.message}`, this.filePath, error);
62
+ }
63
+ }
64
+ async *watch() {
65
+ if (!this.options.watch) {
66
+ return;
67
+ }
68
+ const updateQueue = [];
69
+ let resolveNext = null;
70
+ const processChange = async () => {
71
+ try {
72
+ const content = await fs.promises.readFile(this.filePath, 'utf-8');
73
+ const newHash = this.hashContent(content);
74
+ if (newHash !== this.lastHash) {
75
+ this.lastHash = newHash;
76
+ const data = JSON.parse(content);
77
+ const chunks = this.extractChunks(data);
78
+ for (const chunk of chunks) {
79
+ updateQueue.push({ type: 'update', chunk });
80
+ }
81
+ if (resolveNext && updateQueue.length > 0) {
82
+ const update = updateQueue.shift();
83
+ const resolve = resolveNext;
84
+ resolveNext = null;
85
+ resolve({ value: update, done: false });
86
+ }
87
+ }
88
+ }
89
+ catch {
90
+ // File might be in the middle of being written
91
+ }
92
+ };
93
+ this.watcher = fs.watch(this.filePath, (eventType) => {
94
+ if (eventType === 'change') {
95
+ processChange();
96
+ }
97
+ });
98
+ while (true) {
99
+ if (updateQueue.length > 0) {
100
+ yield updateQueue.shift();
101
+ }
102
+ else {
103
+ yield await new Promise((resolve) => {
104
+ resolveNext = (result) => resolve(result.value);
105
+ });
106
+ }
107
+ }
108
+ }
109
+ stop() {
110
+ if (this.watcher) {
111
+ this.watcher.close();
112
+ this.watcher = null;
113
+ }
114
+ }
115
+ extractChunks(data) {
116
+ const chunkBy = this.options.chunkBy ?? 'item';
117
+ const contentFields = this.options.contentFields ?? [];
118
+ const metadataFields = this.options.metadataFields ?? [];
119
+ let items;
120
+ if (chunkBy === 'item') {
121
+ items = Array.isArray(data) ? data : [data];
122
+ }
123
+ else if (chunkBy.startsWith('$.')) {
124
+ items = this.evaluateJSONPath(data, chunkBy);
125
+ }
126
+ else {
127
+ items = Array.isArray(data) ? data : [data];
128
+ }
129
+ return items.map((item, index) => this.itemToChunk(item, index, contentFields, metadataFields));
130
+ }
131
+ evaluateJSONPath(data, pathExpr) {
132
+ const path = pathExpr.slice(2);
133
+ const segments = path.split(/(?=\[)|\./).filter(Boolean);
134
+ let current = [data];
135
+ for (const segment of segments) {
136
+ const next = [];
137
+ for (const item of current) {
138
+ if (segment === '[*]') {
139
+ if (Array.isArray(item)) {
140
+ next.push(...item);
141
+ }
142
+ else if (typeof item === 'object' && item !== null) {
143
+ next.push(...Object.values(item));
144
+ }
145
+ }
146
+ else {
147
+ const key = segment.replace(/^\[|\]$/g, '');
148
+ if (item && typeof item === 'object' && key in item) {
149
+ next.push(item[key]);
150
+ }
151
+ }
152
+ }
153
+ current = next;
154
+ }
155
+ return current;
156
+ }
157
+ itemToChunk(item, index, contentFields, metadataFields) {
158
+ let content;
159
+ if (contentFields.length > 0) {
160
+ const contentParts = contentFields
161
+ .map((field) => this.getNestedValue(item, field))
162
+ .filter((v) => v !== undefined && v !== null)
163
+ .map((v) => String(v));
164
+ content = contentParts.join('\n\n');
165
+ }
166
+ else {
167
+ content = this.flattenToContent(item);
168
+ }
169
+ const metadata = {
170
+ ...(this.options.metadata ?? {}),
171
+ source: this.filePath,
172
+ index,
173
+ };
174
+ if (metadataFields.length > 0) {
175
+ for (const field of metadataFields) {
176
+ const value = this.getNestedValue(item, field);
177
+ if (value !== undefined) {
178
+ metadata[field] = value;
179
+ }
180
+ }
181
+ }
182
+ const namespace = this.options.namespace ?? 'json';
183
+ const hash = crypto.createHash('md5').update(content).digest('hex').slice(0, 8);
184
+ const id = `${namespace}:${path.basename(this.filePath)}:${hash}:${index}`;
185
+ return { id, content, metadata };
186
+ }
187
+ getNestedValue(obj, path) {
188
+ const parts = path.split('.');
189
+ let current = obj;
190
+ for (const part of parts) {
191
+ if (current === null || current === undefined) {
192
+ return undefined;
193
+ }
194
+ current = current[part];
195
+ }
196
+ return current;
197
+ }
198
+ flattenToContent(obj, prefix = '') {
199
+ if (obj === null || obj === undefined) {
200
+ return '';
201
+ }
202
+ if (typeof obj !== 'object') {
203
+ return prefix ? `${prefix}: ${obj}` : String(obj);
204
+ }
205
+ if (Array.isArray(obj)) {
206
+ return obj
207
+ .map((item, i) => this.flattenToContent(item, prefix ? `${prefix}[${i}]` : `[${i}]`))
208
+ .filter(Boolean)
209
+ .join('\n');
210
+ }
211
+ return Object.entries(obj)
212
+ .map(([key, value]) => {
213
+ const newPrefix = prefix ? `${prefix}.${key}` : key;
214
+ return this.flattenToContent(value, newPrefix);
215
+ })
216
+ .filter(Boolean)
217
+ .join('\n');
218
+ }
219
+ hashContent(content) {
220
+ return crypto.createHash('md5').update(content).digest('hex');
221
+ }
222
+ }
223
+ exports.JSONSource = JSONSource;
224
+ //# sourceMappingURL=json-source.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-source.js","sourceRoot":"","sources":["../../../src/knowledge/sources/json-source.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,+CAAiC;AAEjC,4CAA8C;AAE9C,MAAa,UAAU;IACb,QAAQ,CAAS;IACjB,OAAO,CAAoB;IAC3B,OAAO,GAAwB,IAAI,CAAC;IACpC,QAAQ,GAAkB,IAAI,CAAC;IAEvC,YAAY,QAAgB,EAAE,UAA6B,EAAE;QAC3D,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACvC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,CAAC,IAAI;QACT,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACnE,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAEjC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YACxC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,0BAAc,CACtB,8BAA+B,KAAe,CAAC,OAAO,EAAE,EACxD,IAAI,CAAC,QAAQ,EACb,KAAc,CACf,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,CAAC,KAAK;QACV,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACxB,OAAO;QACT,CAAC;QAED,MAAM,WAAW,GAAkB,EAAE,CAAC;QACtC,IAAI,WAAW,GAA0D,IAAI,CAAC;QAE9E,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;YAC/B,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnE,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;gBAE1C,IAAI,OAAO,KAAK,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC9B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;oBACxB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;oBACjC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;oBAExC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;wBAC3B,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC9C,CAAC;oBAED,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC1C,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,EAAG,CAAC;wBACpC,MAAM,OAAO,GAAG,WAAW,CAAC;wBAC5B,WAAW,GAAG,IAAI,CAAC;wBACnB,OAAO,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC1C,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,+CAA+C;YACjD,CAAC;QACH,CAAC,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,SAAS,EAAE,EAAE;YACnD,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;gBAC3B,aAAa,EAAE,CAAC;YAClB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,IAAI,EAAE,CAAC;YACZ,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,WAAW,CAAC,KAAK,EAAG,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,IAAI,OAAO,CAAc,CAAC,OAAO,EAAE,EAAE;oBAC/C,WAAW,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAClD,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI;QACF,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,IAAS;QAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,MAAM,CAAC;QAC/C,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,IAAI,EAAE,CAAC;QACvD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,EAAE,CAAC;QAEzD,IAAI,KAAY,CAAC;QAEjB,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;YACvB,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACpC,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;QAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC,CAAC;IAClG,CAAC;IAEO,gBAAgB,CAAC,IAAS,EAAE,QAAgB;QAClD,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAEzD,IAAI,OAAO,GAAU,CAAC,IAAI,CAAC,CAAC;QAE5B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAU,EAAE,CAAC;YAEvB,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;gBAC3B,IAAI,OAAO,KAAK,KAAK,EAAE,CAAC;oBACtB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;wBACxB,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;wBACrD,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;oBACpC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;oBAC5C,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;wBACpD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACvB,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,WAAW,CACjB,IAAS,EACT,KAAa,EACb,aAAuB,EACvB,cAAwB;QAExB,IAAI,OAAe,CAAC;QAEpB,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,YAAY,GAAG,aAAa;iBAC/B,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;iBAChD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,IAAI,CAAC;iBAC5C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACzB,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,QAAQ,GAAwB;YACpC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;YAChC,MAAM,EAAE,IAAI,CAAC,QAAQ;YACrB,KAAK;SACN,CAAC;QAEF,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;gBACnC,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBAC/C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACxB,QAAQ,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC;QACnD,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAChF,MAAM,EAAE,GAAG,GAAG,SAAS,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;QAE3E,OAAO,EAAE,EAAE,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IACnC,CAAC;IAEO,cAAc,CAAC,GAAQ,EAAE,IAAY;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,OAAO,GAAG,GAAG,CAAC;QAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC9C,OAAO,SAAS,CAAC;YACnB,CAAC;YACD,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,gBAAgB,CAAC,GAAQ,EAAE,SAAiB,EAAE;QACpD,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACtC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5B,OAAO,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,GAAG;iBACP,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;iBACpF,MAAM,CAAC,OAAO,CAAC;iBACf,IAAI,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC;QAED,OAAO,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC;aACvB,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YACpB,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACpD,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,CAAC,CAAC;aACD,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAEO,WAAW,CAAC,OAAe;QACjC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAChE,CAAC;CACF;AA1ND,gCA0NC"}
@@ -0,0 +1,27 @@
1
+ import type { KnowledgeSource, Chunk, ChunkUpdate, MarkdownSourceOptions } from '../types.js';
2
+ export declare class MarkdownSource implements KnowledgeSource {
3
+ private pattern;
4
+ private options;
5
+ private watcher;
6
+ private fileHashes;
7
+ constructor(pattern: string, options?: MarkdownSourceOptions);
8
+ load(): AsyncIterable<Chunk>;
9
+ watch(): AsyncIterable<ChunkUpdate>;
10
+ stop(): void;
11
+ private parseMarkdown;
12
+ private extractFrontmatter;
13
+ private splitByHeadings;
14
+ private splitLargeSection;
15
+ private getOverlapText;
16
+ private splitIntoSentences;
17
+ private estimateTokens;
18
+ private extractTags;
19
+ private extractWikilinks;
20
+ private generateChunkId;
21
+ private getRelativePath;
22
+ private getBaseDir;
23
+ private matchesPattern;
24
+ private hashContent;
25
+ private parseFrontmatterValue;
26
+ }
27
+ //# sourceMappingURL=markdown-source.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"markdown-source.d.ts","sourceRoot":"","sources":["../../../src/knowledge/sources/markdown-source.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,WAAW,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAiB9F,qBAAa,cAAe,YAAW,eAAe;IACpD,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA6B;IAC5C,OAAO,CAAC,UAAU,CAAkC;gBAExC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,qBAA0B;IAezD,IAAI,IAAI,aAAa,CAAC,KAAK,CAAC;IAuB5B,KAAK,IAAI,aAAa,CAAC,WAAW,CAAC;IA8E1C,IAAI,IAAI,IAAI;IAOZ,OAAO,CAAC,aAAa;IAqCrB,OAAO,CAAC,kBAAkB;IAwB1B,OAAO,CAAC,eAAe;IAmEvB,OAAO,CAAC,iBAAiB;IAqEzB,OAAO,CAAC,cAAc;IAiBtB,OAAO,CAAC,kBAAkB;IAI1B,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,gBAAgB;IAYxB,OAAO,CAAC,eAAe;IAOvB,OAAO,CAAC,eAAe;IAKvB,OAAO,CAAC,UAAU;IAKlB,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,WAAW;IAInB,OAAO,CAAC,qBAAqB;CAsB9B"}
@@ -0,0 +1,410 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ var __importDefault = (this && this.__importDefault) || function (mod) {
36
+ return (mod && mod.__esModule) ? mod : { "default": mod };
37
+ };
38
+ Object.defineProperty(exports, "__esModule", { value: true });
39
+ exports.MarkdownSource = void 0;
40
+ const fs = __importStar(require("fs"));
41
+ const path = __importStar(require("path"));
42
+ const crypto = __importStar(require("crypto"));
43
+ const fast_glob_1 = __importDefault(require("fast-glob"));
44
+ const errors_js_1 = require("../errors.js");
45
+ const DEFAULT_MAX_CHUNK_SIZE = 2000;
46
+ const DEFAULT_CHUNK_OVERLAP = 200;
47
+ const DEFAULT_MIN_CHUNK_SIZE = 100;
48
+ class MarkdownSource {
49
+ pattern;
50
+ options;
51
+ watcher = null;
52
+ fileHashes = new Map();
53
+ constructor(pattern, options = {}) {
54
+ // Auto-append **/*.md if pattern looks like a directory (no glob chars and no .md extension)
55
+ if (!pattern.includes('*') && !pattern.includes('?') && !pattern.endsWith('.md')) {
56
+ this.pattern = pattern.endsWith('/') ? `${pattern}**/*.md` : `${pattern}/**/*.md`;
57
+ }
58
+ else {
59
+ this.pattern = pattern;
60
+ }
61
+ this.options = {
62
+ maxChunkSize: DEFAULT_MAX_CHUNK_SIZE,
63
+ chunkOverlap: DEFAULT_CHUNK_OVERLAP,
64
+ minChunkSize: DEFAULT_MIN_CHUNK_SIZE,
65
+ ...options,
66
+ };
67
+ }
68
+ async *load() {
69
+ const files = await (0, fast_glob_1.default)(this.pattern, { absolute: true });
70
+ for (const filePath of files) {
71
+ try {
72
+ const content = await fs.promises.readFile(filePath, 'utf-8');
73
+ const hash = this.hashContent(content);
74
+ this.fileHashes.set(filePath, hash);
75
+ const chunks = this.parseMarkdown(filePath, content);
76
+ for (const chunk of chunks) {
77
+ yield chunk;
78
+ }
79
+ }
80
+ catch (error) {
81
+ throw new errors_js_1.IngestionError(`Failed to parse markdown file: ${error.message}`, filePath, error);
82
+ }
83
+ }
84
+ }
85
+ async *watch() {
86
+ if (!this.options.watch) {
87
+ return;
88
+ }
89
+ const baseDir = this.getBaseDir();
90
+ const updateQueue = [];
91
+ let resolveNext = null;
92
+ const processFile = async (filePath, eventType) => {
93
+ const absolutePath = path.isAbsolute(filePath) ? filePath : path.join(baseDir, filePath);
94
+ if (!this.matchesPattern(absolutePath)) {
95
+ return;
96
+ }
97
+ if (eventType === 'unlink') {
98
+ const oldHash = this.fileHashes.get(absolutePath);
99
+ if (oldHash) {
100
+ this.fileHashes.delete(absolutePath);
101
+ const chunkId = this.generateChunkId(absolutePath, '', 0);
102
+ updateQueue.push({
103
+ type: 'delete',
104
+ chunk: { id: chunkId, content: '', metadata: { source: absolutePath } },
105
+ });
106
+ }
107
+ }
108
+ else {
109
+ try {
110
+ const content = await fs.promises.readFile(absolutePath, 'utf-8');
111
+ const newHash = this.hashContent(content);
112
+ const oldHash = this.fileHashes.get(absolutePath);
113
+ if (oldHash !== newHash) {
114
+ this.fileHashes.set(absolutePath, newHash);
115
+ const chunks = this.parseMarkdown(absolutePath, content);
116
+ const updateType = oldHash ? 'update' : 'add';
117
+ for (const chunk of chunks) {
118
+ updateQueue.push({ type: updateType, chunk });
119
+ }
120
+ }
121
+ }
122
+ catch {
123
+ // File might have been deleted between event and read
124
+ }
125
+ }
126
+ if (resolveNext && updateQueue.length > 0) {
127
+ const update = updateQueue.shift();
128
+ const resolve = resolveNext;
129
+ resolveNext = null;
130
+ resolve({ value: update, done: false });
131
+ }
132
+ };
133
+ this.watcher = fs.watch(baseDir, { recursive: true }, (eventType, filename) => {
134
+ if (filename && filename.endsWith('.md')) {
135
+ const fullPath = path.join(baseDir, filename);
136
+ if (eventType === 'rename') {
137
+ fs.access(fullPath, fs.constants.F_OK, (err) => {
138
+ processFile(fullPath, err ? 'unlink' : 'add');
139
+ });
140
+ }
141
+ else {
142
+ processFile(fullPath, 'change');
143
+ }
144
+ }
145
+ });
146
+ while (true) {
147
+ if (updateQueue.length > 0) {
148
+ yield updateQueue.shift();
149
+ }
150
+ else {
151
+ yield await new Promise((resolve) => {
152
+ resolveNext = (result) => resolve(result.value);
153
+ });
154
+ }
155
+ }
156
+ }
157
+ stop() {
158
+ if (this.watcher) {
159
+ this.watcher.close();
160
+ this.watcher = null;
161
+ }
162
+ }
163
+ parseMarkdown(filePath, content) {
164
+ const { frontmatter, body } = this.extractFrontmatter(content);
165
+ const sections = this.splitByHeadings(body);
166
+ const chunks = [];
167
+ let chunkIndex = 0;
168
+ const relativePath = this.getRelativePath(filePath);
169
+ for (const section of sections) {
170
+ const sectionChunks = this.splitLargeSection(section);
171
+ const totalChunks = sectionChunks.length;
172
+ for (const sectionChunk of sectionChunks) {
173
+ const id = this.generateChunkId(filePath, sectionChunk.content, chunkIndex);
174
+ chunks.push({
175
+ id,
176
+ content: sectionChunk.content,
177
+ metadata: {
178
+ ...frontmatter,
179
+ ...(this.options.metadata ?? {}),
180
+ heading: sectionChunk.headings,
181
+ hasCode: sectionChunk.hasCode,
182
+ source: relativePath,
183
+ chunkIndex,
184
+ totalChunks,
185
+ ...(this.extractTags(sectionChunk.content)),
186
+ ...(this.extractWikilinks(sectionChunk.content)),
187
+ },
188
+ });
189
+ chunkIndex++;
190
+ }
191
+ }
192
+ return chunks;
193
+ }
194
+ extractFrontmatter(content) {
195
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n/);
196
+ if (!frontmatterMatch) {
197
+ return { frontmatter: {}, body: content };
198
+ }
199
+ const frontmatterStr = frontmatterMatch[1];
200
+ const body = content.slice(frontmatterMatch[0].length);
201
+ const frontmatter = {};
202
+ for (const line of frontmatterStr.split('\n')) {
203
+ const colonIndex = line.indexOf(':');
204
+ if (colonIndex > 0) {
205
+ const key = line.slice(0, colonIndex).trim();
206
+ const rawValue = line.slice(colonIndex + 1).trim();
207
+ const parsedValue = this.parseFrontmatterValue(rawValue);
208
+ frontmatter[key] = parsedValue;
209
+ }
210
+ }
211
+ return { frontmatter, body };
212
+ }
213
+ splitByHeadings(content) {
214
+ const lines = content.split('\n');
215
+ const sections = [];
216
+ let currentHeadings = [];
217
+ let currentContent = [];
218
+ let inCodeBlock = false;
219
+ for (const line of lines) {
220
+ if (line.startsWith('```')) {
221
+ inCodeBlock = !inCodeBlock;
222
+ currentContent.push(line);
223
+ continue;
224
+ }
225
+ if (!inCodeBlock && line.match(/^#{1,6}\s/)) {
226
+ if (currentContent.length > 0) {
227
+ const contentStr = currentContent.join('\n').trim();
228
+ if (contentStr) {
229
+ sections.push({
230
+ headings: [...currentHeadings],
231
+ content: contentStr,
232
+ hasCode: currentContent.some((l) => l.startsWith('```')),
233
+ frontmatter: {},
234
+ });
235
+ }
236
+ }
237
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
238
+ if (headingMatch) {
239
+ const level = headingMatch[1].length;
240
+ const headingText = headingMatch[2].trim();
241
+ currentHeadings = currentHeadings.slice(0, level - 1);
242
+ currentHeadings[level - 1] = headingText;
243
+ currentHeadings = currentHeadings.filter(Boolean);
244
+ }
245
+ currentContent = [];
246
+ }
247
+ else {
248
+ currentContent.push(line);
249
+ }
250
+ }
251
+ if (currentContent.length > 0) {
252
+ const contentStr = currentContent.join('\n').trim();
253
+ if (contentStr) {
254
+ sections.push({
255
+ headings: [...currentHeadings],
256
+ content: contentStr,
257
+ hasCode: currentContent.some((l) => l.startsWith('```')),
258
+ frontmatter: {},
259
+ });
260
+ }
261
+ }
262
+ if (sections.length === 0 && content.trim()) {
263
+ sections.push({
264
+ headings: [],
265
+ content: content.trim(),
266
+ hasCode: content.includes('```'),
267
+ frontmatter: {},
268
+ });
269
+ }
270
+ return sections;
271
+ }
272
+ splitLargeSection(section) {
273
+ const maxSize = this.options.maxChunkSize;
274
+ const minSize = this.options.minChunkSize;
275
+ const overlap = this.options.chunkOverlap;
276
+ const tokens = this.estimateTokens(section.content);
277
+ if (tokens <= maxSize) {
278
+ return [section];
279
+ }
280
+ const paragraphs = section.content.split(/\n\n+/);
281
+ const chunks = [];
282
+ let currentChunk = [];
283
+ let currentTokens = 0;
284
+ for (const paragraph of paragraphs) {
285
+ const paragraphTokens = this.estimateTokens(paragraph);
286
+ if (currentTokens + paragraphTokens > maxSize && currentChunk.length > 0) {
287
+ chunks.push({
288
+ ...section,
289
+ content: currentChunk.join('\n\n'),
290
+ });
291
+ const overlapText = this.getOverlapText(currentChunk, overlap);
292
+ currentChunk = overlapText ? [overlapText] : [];
293
+ currentTokens = this.estimateTokens(currentChunk.join('\n\n'));
294
+ }
295
+ if (paragraphTokens > maxSize) {
296
+ const sentences = this.splitIntoSentences(paragraph);
297
+ for (const sentence of sentences) {
298
+ const sentenceTokens = this.estimateTokens(sentence);
299
+ if (currentTokens + sentenceTokens > maxSize && currentChunk.length > 0) {
300
+ chunks.push({
301
+ ...section,
302
+ content: currentChunk.join(' '),
303
+ });
304
+ currentChunk = [];
305
+ currentTokens = 0;
306
+ }
307
+ currentChunk.push(sentence);
308
+ currentTokens += sentenceTokens;
309
+ }
310
+ }
311
+ else {
312
+ currentChunk.push(paragraph);
313
+ currentTokens += paragraphTokens;
314
+ }
315
+ }
316
+ if (currentChunk.length > 0) {
317
+ const content = currentChunk.join('\n\n');
318
+ if (this.estimateTokens(content) >= minSize || chunks.length === 0) {
319
+ chunks.push({
320
+ ...section,
321
+ content,
322
+ });
323
+ }
324
+ else if (chunks.length > 0) {
325
+ const lastChunk = chunks[chunks.length - 1];
326
+ lastChunk.content += '\n\n' + content;
327
+ }
328
+ }
329
+ return chunks;
330
+ }
331
+ getOverlapText(chunks, targetTokens) {
332
+ const result = [];
333
+ let tokens = 0;
334
+ for (let i = chunks.length - 1; i >= 0 && tokens < targetTokens; i--) {
335
+ const chunkTokens = this.estimateTokens(chunks[i]);
336
+ if (tokens + chunkTokens <= targetTokens) {
337
+ result.unshift(chunks[i]);
338
+ tokens += chunkTokens;
339
+ }
340
+ else {
341
+ break;
342
+ }
343
+ }
344
+ return result.join('\n\n');
345
+ }
346
+ splitIntoSentences(text) {
347
+ return text.split(/(?<=[.!?])\s+/).filter(Boolean);
348
+ }
349
+ estimateTokens(text) {
350
+ return Math.ceil(text.length / 4);
351
+ }
352
+ extractTags(content) {
353
+ const tagMatches = content.match(/#[\w-]+/g);
354
+ if (tagMatches) {
355
+ return { tags: [...new Set(tagMatches.map((t) => t.slice(1)))] };
356
+ }
357
+ return {};
358
+ }
359
+ extractWikilinks(content) {
360
+ const linkMatches = content.match(/\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g);
361
+ if (linkMatches) {
362
+ const links = linkMatches.map((l) => {
363
+ const match = l.match(/\[\[([^\]|]+)/);
364
+ return match ? match[1] : '';
365
+ }).filter(Boolean);
366
+ return { links: [...new Set(links)] };
367
+ }
368
+ return {};
369
+ }
370
+ generateChunkId(filePath, content, index) {
371
+ const namespace = this.options.namespace ?? 'default';
372
+ const relativePath = this.getRelativePath(filePath);
373
+ const hash = crypto.createHash('md5').update(content).digest('hex').slice(0, 8);
374
+ return `${namespace}:${relativePath}:${hash}:${index}`;
375
+ }
376
+ getRelativePath(filePath) {
377
+ const baseDir = this.getBaseDir();
378
+ return path.relative(baseDir, filePath);
379
+ }
380
+ getBaseDir() {
381
+ const patternParts = this.pattern.split(/[*?]/);
382
+ return patternParts[0].replace(/\/$/, '') || '.';
383
+ }
384
+ matchesPattern(filePath) {
385
+ return filePath.endsWith('.md');
386
+ }
387
+ hashContent(content) {
388
+ return crypto.createHash('md5').update(content).digest('hex');
389
+ }
390
+ parseFrontmatterValue(value) {
391
+ if (value.startsWith('[') && value.endsWith(']')) {
392
+ return value
393
+ .slice(1, -1)
394
+ .split(',')
395
+ .map((segment) => segment.trim().replace(/^["']|["']$/g, ''));
396
+ }
397
+ if (value === 'true') {
398
+ return true;
399
+ }
400
+ if (value === 'false') {
401
+ return false;
402
+ }
403
+ if (!Number.isNaN(Number(value)) && value !== '') {
404
+ return Number(value);
405
+ }
406
+ return value.replace(/^["']|["']$/g, '');
407
+ }
408
+ }
409
+ exports.MarkdownSource = MarkdownSource;
410
+ //# sourceMappingURL=markdown-source.js.map