opc-agent 4.1.0 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +20 -20
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +14 -14
  3. package/.github/PULL_REQUEST_TEMPLATE.md +13 -13
  4. package/CHANGELOG.md +48 -48
  5. package/CONTRIBUTING.md +36 -36
  6. package/README.zh-CN.md +497 -497
  7. package/dist/channels/wechat.js +6 -6
  8. package/dist/deploy/index.js +56 -56
  9. package/dist/studio/server.js +30 -1
  10. package/dist/studio-ui/index.html +230 -10
  11. package/dist/ui/components.js +105 -105
  12. package/examples/README.md +22 -22
  13. package/examples/basic-agent.ts +90 -90
  14. package/examples/brain-integration.ts +71 -71
  15. package/examples/multi-channel.ts +74 -74
  16. package/fix-sidebar.mjs +188 -188
  17. package/install.ps1 +154 -154
  18. package/install.sh +164 -164
  19. package/package.json +1 -1
  20. package/scripts/install.ps1 +31 -31
  21. package/scripts/install.sh +40 -40
  22. package/serve-studio.js +13 -13
  23. package/serve-test.js +25 -25
  24. package/src/channels/dingtalk.ts +46 -46
  25. package/src/channels/email.ts +351 -351
  26. package/src/channels/feishu.ts +349 -349
  27. package/src/channels/googlechat.ts +42 -42
  28. package/src/channels/imessage.ts +31 -31
  29. package/src/channels/irc.ts +82 -82
  30. package/src/channels/line.ts +32 -32
  31. package/src/channels/matrix.ts +33 -33
  32. package/src/channels/mattermost.ts +57 -57
  33. package/src/channels/msteams.ts +32 -32
  34. package/src/channels/nostr.ts +32 -32
  35. package/src/channels/qq.ts +33 -33
  36. package/src/channels/signal.ts +32 -32
  37. package/src/channels/sms.ts +33 -33
  38. package/src/channels/telegram.ts +616 -616
  39. package/src/channels/twitch.ts +65 -65
  40. package/src/channels/voice-call.ts +100 -100
  41. package/src/channels/websocket.ts +399 -399
  42. package/src/channels/wechat.ts +329 -329
  43. package/src/channels/whatsapp.ts +32 -32
  44. package/src/cli/chat.ts +99 -99
  45. package/src/cli/setup.ts +314 -314
  46. package/src/core/agent.ts +476 -476
  47. package/src/core/api-server.ts +277 -277
  48. package/src/core/audio.ts +98 -98
  49. package/src/core/collaboration.ts +275 -275
  50. package/src/core/context-discovery.ts +85 -85
  51. package/src/core/context-refs.ts +140 -140
  52. package/src/core/gateway.ts +106 -106
  53. package/src/core/heartbeat.ts +51 -51
  54. package/src/core/hooks.ts +105 -105
  55. package/src/core/ide-bridge.ts +133 -133
  56. package/src/core/node-network.ts +86 -86
  57. package/src/core/profiles.ts +122 -122
  58. package/src/core/scheduler.ts +187 -187
  59. package/src/core/session-manager.ts +137 -137
  60. package/src/core/subagent.ts +98 -98
  61. package/src/core/vision.ts +180 -180
  62. package/src/core/workflow-graph.ts +365 -365
  63. package/src/daemon.ts +96 -96
  64. package/src/deploy/index.ts +255 -255
  65. package/src/doctor.ts +156 -156
  66. package/src/eval/index.ts +211 -211
  67. package/src/eval/suites/basic.json +16 -16
  68. package/src/eval/suites/memory.json +12 -12
  69. package/src/eval/suites/safety.json +14 -14
  70. package/src/hub/brain-seed.ts +54 -54
  71. package/src/hub/client.ts +60 -60
  72. package/src/mcp/servers/calculator-mcp.ts +65 -65
  73. package/src/mcp/servers/crypto-mcp.ts +73 -73
  74. package/src/mcp/servers/database-mcp.ts +72 -72
  75. package/src/mcp/servers/datetime-mcp.ts +69 -69
  76. package/src/mcp/servers/filesystem.ts +66 -66
  77. package/src/mcp/servers/github-mcp.ts +58 -58
  78. package/src/mcp/servers/index.ts +63 -63
  79. package/src/mcp/servers/json-mcp.ts +102 -102
  80. package/src/mcp/servers/memory-mcp.ts +56 -56
  81. package/src/mcp/servers/regex-mcp.ts +53 -53
  82. package/src/mcp/servers/web-mcp.ts +49 -49
  83. package/src/memory/context-compressor.ts +189 -189
  84. package/src/memory/seed-loader.ts +212 -212
  85. package/src/memory/user-profiler.ts +215 -215
  86. package/src/plugins/content-filter.ts +23 -23
  87. package/src/plugins/logger.ts +18 -18
  88. package/src/plugins/rate-limiter.ts +38 -38
  89. package/src/protocols/a2a/client.ts +132 -132
  90. package/src/protocols/a2a/index.ts +8 -8
  91. package/src/protocols/a2a/server.ts +333 -333
  92. package/src/protocols/a2a/types.ts +88 -88
  93. package/src/protocols/a2a/utils.ts +50 -50
  94. package/src/protocols/agui/client.ts +83 -83
  95. package/src/protocols/agui/index.ts +4 -4
  96. package/src/protocols/agui/server.ts +218 -218
  97. package/src/protocols/agui/types.ts +153 -153
  98. package/src/protocols/index.ts +2 -2
  99. package/src/protocols/mcp/agent-tools.ts +134 -134
  100. package/src/protocols/mcp/index.ts +8 -8
  101. package/src/protocols/mcp/server.ts +262 -262
  102. package/src/protocols/mcp/types.ts +69 -69
  103. package/src/providers/index.ts +632 -632
  104. package/src/publish/index.ts +376 -376
  105. package/src/scheduler/cron-engine.ts +191 -191
  106. package/src/scheduler/index.ts +2 -2
  107. package/src/schema/oad.ts +217 -217
  108. package/src/security/approval.ts +131 -131
  109. package/src/security/approvals.ts +143 -143
  110. package/src/security/elevated.ts +105 -105
  111. package/src/security/guardrails.ts +248 -248
  112. package/src/security/index.ts +9 -9
  113. package/src/security/keys.ts +87 -87
  114. package/src/security/secrets.ts +129 -129
  115. package/src/skills/builtin/index.ts +408 -408
  116. package/src/skills/marketplace.ts +113 -113
  117. package/src/skills/types.ts +42 -42
  118. package/src/studio/server.ts +31 -1
  119. package/src/studio/templates-data.ts +178 -178
  120. package/src/studio-ui/index.html +230 -10
  121. package/src/telemetry/index.ts +324 -324
  122. package/src/tools/builtin/browser.ts +299 -299
  123. package/src/tools/builtin/datetime.ts +41 -41
  124. package/src/tools/builtin/file.ts +107 -107
  125. package/src/tools/builtin/home-assistant.ts +116 -116
  126. package/src/tools/builtin/rl-tools.ts +243 -243
  127. package/src/tools/builtin/shell.ts +43 -43
  128. package/src/tools/builtin/vision.ts +64 -64
  129. package/src/tools/builtin/web-search.ts +126 -126
  130. package/src/tools/builtin/web.ts +35 -35
  131. package/src/tools/document-processor.ts +213 -213
  132. package/src/tools/image-generator.ts +150 -150
  133. package/src/tools/integrations/calendar.ts +73 -73
  134. package/src/tools/integrations/code-exec.ts +39 -39
  135. package/src/tools/integrations/csv-analyzer.ts +92 -92
  136. package/src/tools/integrations/database.ts +44 -44
  137. package/src/tools/integrations/email-send.ts +76 -76
  138. package/src/tools/integrations/git-tool.ts +42 -42
  139. package/src/tools/integrations/github-tool.ts +76 -76
  140. package/src/tools/integrations/image-gen.ts +56 -56
  141. package/src/tools/integrations/index.ts +92 -92
  142. package/src/tools/integrations/jira.ts +83 -83
  143. package/src/tools/integrations/notion.ts +71 -71
  144. package/src/tools/integrations/npm-tool.ts +48 -48
  145. package/src/tools/integrations/pdf-reader.ts +58 -58
  146. package/src/tools/integrations/slack.ts +65 -65
  147. package/src/tools/integrations/summarizer.ts +49 -49
  148. package/src/tools/integrations/translator.ts +48 -48
  149. package/src/tools/integrations/trello.ts +60 -60
  150. package/src/tools/integrations/vector-search.ts +42 -42
  151. package/src/tools/integrations/web-scraper.ts +47 -47
  152. package/src/tools/integrations/web-search.ts +58 -58
  153. package/src/tools/integrations/webhook.ts +38 -38
  154. package/src/tools/mcp-client.ts +131 -131
  155. package/src/tools/web-scraper.ts +179 -179
  156. package/src/tools/web-search.ts +180 -180
  157. package/src/ui/components.ts +127 -127
  158. package/srv-out.txt +1 -1
  159. package/templates/ecommerce-assistant/README.md +45 -45
  160. package/templates/ecommerce-assistant/oad.yaml +47 -47
  161. package/templates/tech-support/README.md +43 -43
  162. package/templates/tech-support/oad.yaml +45 -45
  163. package/test-agent/Dockerfile +9 -9
  164. package/test-agent/README.md +50 -50
  165. package/test-agent/agent.yaml +23 -23
  166. package/test-agent/docker-compose.yml +11 -11
  167. package/test-agent/oad.yaml +31 -31
  168. package/test-agent/package-lock.json +1492 -1492
  169. package/test-agent/package.json +17 -17
  170. package/test-agent/src/index.ts +24 -24
  171. package/test-agent/src/skills/echo.ts +15 -15
  172. package/test-agent/tsconfig.json +24 -24
  173. package/test-full.js +43 -43
  174. package/test-sidebar.js +22 -22
  175. package/test-studio3.js +75 -75
  176. package/test-studio4.js +41 -41
  177. package/tests/a2a-protocol.test.ts +285 -285
  178. package/tests/agui-protocol.test.ts +246 -246
  179. package/tests/api-server.test.ts +148 -148
  180. package/tests/approvals.test.ts +89 -89
  181. package/tests/audio.test.ts +40 -40
  182. package/tests/brain-seed-extended.test.ts +490 -490
  183. package/tests/brain-seed.test.ts +239 -239
  184. package/tests/browser.test.ts +179 -179
  185. package/tests/channels/discord.test.ts +79 -79
  186. package/tests/channels/email.test.ts +148 -148
  187. package/tests/channels/feishu.test.ts +123 -123
  188. package/tests/channels/telegram.test.ts +129 -129
  189. package/tests/channels/websocket.test.ts +53 -53
  190. package/tests/channels/wechat.test.ts +170 -170
  191. package/tests/channels-extra.test.ts +45 -45
  192. package/tests/chat-cli.test.ts +160 -160
  193. package/tests/cli.test.ts +46 -46
  194. package/tests/context-compressor.test.ts +172 -172
  195. package/tests/context-refs.test.ts +121 -121
  196. package/tests/cron-engine.test.ts +101 -101
  197. package/tests/daemon.test.ts +135 -135
  198. package/tests/deepbrain-wire.test.ts +234 -234
  199. package/tests/deploy-and-dag.test.ts +196 -196
  200. package/tests/doctor.test.ts +38 -38
  201. package/tests/document-processor.test.ts +69 -69
  202. package/tests/e2e-nocode.test.ts +442 -442
  203. package/tests/elevated.test.ts +69 -69
  204. package/tests/eval.test.ts +173 -173
  205. package/tests/gateway.test.ts +63 -63
  206. package/tests/guardrails.test.ts +177 -177
  207. package/tests/home-assistant.test.ts +40 -40
  208. package/tests/hooks.test.ts +79 -79
  209. package/tests/ide-bridge.test.ts +38 -38
  210. package/tests/image-generator.test.ts +84 -84
  211. package/tests/init-role.test.ts +124 -124
  212. package/tests/integrations.test.ts +249 -249
  213. package/tests/mcp-client.test.ts +92 -92
  214. package/tests/mcp-server.test.ts +178 -178
  215. package/tests/mcp-servers.test.ts +260 -260
  216. package/tests/node-network.test.ts +74 -74
  217. package/tests/plugin-a2a-enhanced.test.ts +230 -230
  218. package/tests/profiles.test.ts +61 -61
  219. package/tests/publish.test.ts +231 -231
  220. package/tests/rl-tools.test.ts +93 -93
  221. package/tests/sandbox-manager.test.ts +46 -46
  222. package/tests/scheduler.test.ts +200 -200
  223. package/tests/secrets.test.ts +107 -107
  224. package/tests/security-enhanced.test.ts +233 -233
  225. package/tests/settings-api.test.ts +148 -148
  226. package/tests/setup.test.ts +73 -73
  227. package/tests/subagent.test.ts +193 -193
  228. package/tests/telegram-discord.test.ts +60 -60
  229. package/tests/telemetry.test.ts +186 -186
  230. package/tests/user-profiler.test.ts +169 -169
  231. package/tests/v090-features.test.ts +254 -254
  232. package/tests/vision.test.ts +61 -61
  233. package/tests/voice-call.test.ts +47 -47
  234. package/tests/voice-enhanced.test.ts +169 -169
  235. package/tests/voice-interaction.test.ts +38 -38
  236. package/tests/web-search.test.ts +155 -155
  237. package/tests/workflow-graph.test.ts +279 -279
  238. package/tutorial/customer-service-agent/README.md +612 -612
  239. package/tutorial/customer-service-agent/SOUL.md +26 -26
  240. package/tutorial/customer-service-agent/agent.yaml +63 -63
  241. package/tutorial/customer-service-agent/package.json +19 -19
  242. package/tutorial/customer-service-agent/src/index.ts +69 -69
  243. package/tutorial/customer-service-agent/src/skills/faq.ts +27 -27
  244. package/tutorial/customer-service-agent/src/skills/ticket.ts +22 -22
  245. package/tutorial/customer-service-agent/tsconfig.json +14 -14
@@ -1,213 +1,213 @@
1
- /**
2
- * Document Processor - Parse and chunk documents for knowledge learning
3
- * Supports: PDF, TXT, MD, DOCX, CSV, JSON
4
- */
5
-
6
- export interface DocumentChunk {
7
- title: string;
8
- content: string;
9
- metadata: {
10
- source: string;
11
- format: string;
12
- chunkIndex: number;
13
- totalChunks?: number;
14
- page?: number;
15
- };
16
- }
17
-
18
- export interface ProcessedDocument {
19
- id: string;
20
- filename: string;
21
- format: string;
22
- size: number;
23
- chunks: DocumentChunk[];
24
- processedAt: string;
25
- }
26
-
27
- const MAX_FILE_SIZE = 50 * 1024 * 1024; // 50MB
28
- const CHUNK_TARGET_CHARS = 2000; // ~500 tokens
29
- const CHUNK_MAX_CHARS = 4000; // ~1000 tokens
30
-
31
- export class DocumentProcessor {
32
- /**
33
- * Process a file buffer into chunks
34
- */
35
- async process(buffer: Buffer, filename: string): Promise<ProcessedDocument> {
36
- if (buffer.length > MAX_FILE_SIZE) {
37
- throw new Error(`File too large: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (max 50MB)`);
38
- }
39
-
40
- const ext = filename.split('.').pop()?.toLowerCase() || '';
41
- let rawText: string;
42
-
43
- switch (ext) {
44
- case 'pdf':
45
- rawText = await this.parsePDF(buffer);
46
- break;
47
- case 'docx':
48
- rawText = await this.parseDOCX(buffer);
49
- break;
50
- case 'csv':
51
- rawText = this.parseCSV(buffer.toString('utf-8'));
52
- break;
53
- case 'json':
54
- rawText = this.parseJSON(buffer.toString('utf-8'));
55
- break;
56
- case 'txt':
57
- case 'md':
58
- case 'markdown':
59
- rawText = buffer.toString('utf-8');
60
- break;
61
- default:
62
- // Try as plain text
63
- rawText = buffer.toString('utf-8');
64
- }
65
-
66
- const chunks = this.chunkText(rawText, filename, ext);
67
-
68
- return {
69
- id: `doc-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
70
- filename,
71
- format: ext,
72
- size: buffer.length,
73
- chunks,
74
- processedAt: new Date().toISOString(),
75
- };
76
- }
77
-
78
- private async parsePDF(buffer: Buffer): Promise<string> {
79
- try {
80
- const pdfParse = require('pdf-parse');
81
- const data = await pdfParse(buffer);
82
- return data.text || '';
83
- } catch (e: any) {
84
- throw new Error(`PDF parse failed: ${e.message}`);
85
- }
86
- }
87
-
88
- private async parseDOCX(buffer: Buffer): Promise<string> {
89
- try {
90
- const mammoth = require('mammoth');
91
- const result = await mammoth.extractRawText({ buffer });
92
- return result.value || '';
93
- } catch (e: any) {
94
- throw new Error(`DOCX parse failed: ${e.message}`);
95
- }
96
- }
97
-
98
- private parseCSV(text: string): string {
99
- const lines = text.split('\n').filter(l => l.trim());
100
- if (lines.length === 0) return '';
101
-
102
- const headers = lines[0].split(',').map(h => h.trim().replace(/^"|"$/g, ''));
103
- const rows = lines.slice(1);
104
-
105
- // Convert CSV to readable text
106
- return rows.map((row, i) => {
107
- const values = this.parseCSVLine(row);
108
- const pairs = headers.map((h, j) => `${h}: ${values[j] || ''}`);
109
- return `Record ${i + 1}:\n${pairs.join('\n')}`;
110
- }).join('\n\n');
111
- }
112
-
113
- private parseCSVLine(line: string): string[] {
114
- const result: string[] = [];
115
- let current = '';
116
- let inQuotes = false;
117
- for (const ch of line) {
118
- if (ch === '"') { inQuotes = !inQuotes; }
119
- else if (ch === ',' && !inQuotes) { result.push(current.trim()); current = ''; }
120
- else { current += ch; }
121
- }
122
- result.push(current.trim());
123
- return result;
124
- }
125
-
126
- private parseJSON(text: string): string {
127
- try {
128
- const data = JSON.parse(text);
129
- if (Array.isArray(data)) {
130
- return data.map((item, i) => `Item ${i + 1}:\n${JSON.stringify(item, null, 2)}`).join('\n\n');
131
- }
132
- return JSON.stringify(data, null, 2);
133
- } catch {
134
- return text;
135
- }
136
- }
137
-
138
- /**
139
- * Smart chunking: split by headings/paragraphs, respecting size limits
140
- */
141
- private chunkText(text: string, filename: string, format: string): DocumentChunk[] {
142
- if (!text.trim()) return [];
143
-
144
- // Split by markdown headings or double newlines
145
- const sections = text.split(/\n(?=#{1,3}\s)|(?:\n\s*\n)/).filter(s => s.trim());
146
- const chunks: DocumentChunk[] = [];
147
- let currentChunk = '';
148
- let currentTitle = filename;
149
-
150
- for (const section of sections) {
151
- const headingMatch = section.match(/^(#{1,3})\s+(.+)/);
152
- if (headingMatch) {
153
- currentTitle = headingMatch[2].trim();
154
- }
155
-
156
- if (currentChunk.length + section.length > CHUNK_MAX_CHARS && currentChunk.length > 0) {
157
- chunks.push({
158
- title: currentTitle,
159
- content: currentChunk.trim(),
160
- metadata: { source: filename, format, chunkIndex: chunks.length },
161
- });
162
- currentChunk = '';
163
- }
164
-
165
- currentChunk += section + '\n\n';
166
-
167
- if (currentChunk.length >= CHUNK_TARGET_CHARS) {
168
- chunks.push({
169
- title: currentTitle,
170
- content: currentChunk.trim(),
171
- metadata: { source: filename, format, chunkIndex: chunks.length },
172
- });
173
- currentChunk = '';
174
- }
175
- }
176
-
177
- if (currentChunk.trim()) {
178
- chunks.push({
179
- title: currentTitle,
180
- content: currentChunk.trim(),
181
- metadata: { source: filename, format, chunkIndex: chunks.length },
182
- });
183
- }
184
-
185
- // If we got no chunks from section splitting (e.g. dense text), force-split
186
- if (chunks.length === 0 && text.trim()) {
187
- const words = text.split(/\s+/);
188
- let buf = '';
189
- for (const w of words) {
190
- if (buf.length + w.length + 1 > CHUNK_MAX_CHARS && buf) {
191
- chunks.push({
192
- title: filename,
193
- content: buf.trim(),
194
- metadata: { source: filename, format, chunkIndex: chunks.length },
195
- });
196
- buf = '';
197
- }
198
- buf += w + ' ';
199
- }
200
- if (buf.trim()) {
201
- chunks.push({
202
- title: filename,
203
- content: buf.trim(),
204
- metadata: { source: filename, format, chunkIndex: chunks.length },
205
- });
206
- }
207
- }
208
-
209
- // Set totalChunks
210
- for (const c of chunks) c.metadata.totalChunks = chunks.length;
211
- return chunks;
212
- }
213
- }
1
+ /**
2
+ * Document Processor - Parse and chunk documents for knowledge learning
3
+ * Supports: PDF, TXT, MD, DOCX, CSV, JSON
4
+ */
5
+
6
+ export interface DocumentChunk {
7
+ title: string;
8
+ content: string;
9
+ metadata: {
10
+ source: string;
11
+ format: string;
12
+ chunkIndex: number;
13
+ totalChunks?: number;
14
+ page?: number;
15
+ };
16
+ }
17
+
18
+ export interface ProcessedDocument {
19
+ id: string;
20
+ filename: string;
21
+ format: string;
22
+ size: number;
23
+ chunks: DocumentChunk[];
24
+ processedAt: string;
25
+ }
26
+
27
+ const MAX_FILE_SIZE = 50 * 1024 * 1024; // 50MB
28
+ const CHUNK_TARGET_CHARS = 2000; // ~500 tokens
29
+ const CHUNK_MAX_CHARS = 4000; // ~1000 tokens
30
+
31
+ export class DocumentProcessor {
32
+ /**
33
+ * Process a file buffer into chunks
34
+ */
35
+ async process(buffer: Buffer, filename: string): Promise<ProcessedDocument> {
36
+ if (buffer.length > MAX_FILE_SIZE) {
37
+ throw new Error(`File too large: ${(buffer.length / 1024 / 1024).toFixed(1)}MB (max 50MB)`);
38
+ }
39
+
40
+ const ext = filename.split('.').pop()?.toLowerCase() || '';
41
+ let rawText: string;
42
+
43
+ switch (ext) {
44
+ case 'pdf':
45
+ rawText = await this.parsePDF(buffer);
46
+ break;
47
+ case 'docx':
48
+ rawText = await this.parseDOCX(buffer);
49
+ break;
50
+ case 'csv':
51
+ rawText = this.parseCSV(buffer.toString('utf-8'));
52
+ break;
53
+ case 'json':
54
+ rawText = this.parseJSON(buffer.toString('utf-8'));
55
+ break;
56
+ case 'txt':
57
+ case 'md':
58
+ case 'markdown':
59
+ rawText = buffer.toString('utf-8');
60
+ break;
61
+ default:
62
+ // Try as plain text
63
+ rawText = buffer.toString('utf-8');
64
+ }
65
+
66
+ const chunks = this.chunkText(rawText, filename, ext);
67
+
68
+ return {
69
+ id: `doc-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
70
+ filename,
71
+ format: ext,
72
+ size: buffer.length,
73
+ chunks,
74
+ processedAt: new Date().toISOString(),
75
+ };
76
+ }
77
+
78
+ private async parsePDF(buffer: Buffer): Promise<string> {
79
+ try {
80
+ const pdfParse = require('pdf-parse');
81
+ const data = await pdfParse(buffer);
82
+ return data.text || '';
83
+ } catch (e: any) {
84
+ throw new Error(`PDF parse failed: ${e.message}`);
85
+ }
86
+ }
87
+
88
+ private async parseDOCX(buffer: Buffer): Promise<string> {
89
+ try {
90
+ const mammoth = require('mammoth');
91
+ const result = await mammoth.extractRawText({ buffer });
92
+ return result.value || '';
93
+ } catch (e: any) {
94
+ throw new Error(`DOCX parse failed: ${e.message}`);
95
+ }
96
+ }
97
+
98
+ private parseCSV(text: string): string {
99
+ const lines = text.split('\n').filter(l => l.trim());
100
+ if (lines.length === 0) return '';
101
+
102
+ const headers = lines[0].split(',').map(h => h.trim().replace(/^"|"$/g, ''));
103
+ const rows = lines.slice(1);
104
+
105
+ // Convert CSV to readable text
106
+ return rows.map((row, i) => {
107
+ const values = this.parseCSVLine(row);
108
+ const pairs = headers.map((h, j) => `${h}: ${values[j] || ''}`);
109
+ return `Record ${i + 1}:\n${pairs.join('\n')}`;
110
+ }).join('\n\n');
111
+ }
112
+
113
+ private parseCSVLine(line: string): string[] {
114
+ const result: string[] = [];
115
+ let current = '';
116
+ let inQuotes = false;
117
+ for (const ch of line) {
118
+ if (ch === '"') { inQuotes = !inQuotes; }
119
+ else if (ch === ',' && !inQuotes) { result.push(current.trim()); current = ''; }
120
+ else { current += ch; }
121
+ }
122
+ result.push(current.trim());
123
+ return result;
124
+ }
125
+
126
+ private parseJSON(text: string): string {
127
+ try {
128
+ const data = JSON.parse(text);
129
+ if (Array.isArray(data)) {
130
+ return data.map((item, i) => `Item ${i + 1}:\n${JSON.stringify(item, null, 2)}`).join('\n\n');
131
+ }
132
+ return JSON.stringify(data, null, 2);
133
+ } catch {
134
+ return text;
135
+ }
136
+ }
137
+
138
+ /**
139
+ * Smart chunking: split by headings/paragraphs, respecting size limits
140
+ */
141
+ private chunkText(text: string, filename: string, format: string): DocumentChunk[] {
142
+ if (!text.trim()) return [];
143
+
144
+ // Split by markdown headings or double newlines
145
+ const sections = text.split(/\n(?=#{1,3}\s)|(?:\n\s*\n)/).filter(s => s.trim());
146
+ const chunks: DocumentChunk[] = [];
147
+ let currentChunk = '';
148
+ let currentTitle = filename;
149
+
150
+ for (const section of sections) {
151
+ const headingMatch = section.match(/^(#{1,3})\s+(.+)/);
152
+ if (headingMatch) {
153
+ currentTitle = headingMatch[2].trim();
154
+ }
155
+
156
+ if (currentChunk.length + section.length > CHUNK_MAX_CHARS && currentChunk.length > 0) {
157
+ chunks.push({
158
+ title: currentTitle,
159
+ content: currentChunk.trim(),
160
+ metadata: { source: filename, format, chunkIndex: chunks.length },
161
+ });
162
+ currentChunk = '';
163
+ }
164
+
165
+ currentChunk += section + '\n\n';
166
+
167
+ if (currentChunk.length >= CHUNK_TARGET_CHARS) {
168
+ chunks.push({
169
+ title: currentTitle,
170
+ content: currentChunk.trim(),
171
+ metadata: { source: filename, format, chunkIndex: chunks.length },
172
+ });
173
+ currentChunk = '';
174
+ }
175
+ }
176
+
177
+ if (currentChunk.trim()) {
178
+ chunks.push({
179
+ title: currentTitle,
180
+ content: currentChunk.trim(),
181
+ metadata: { source: filename, format, chunkIndex: chunks.length },
182
+ });
183
+ }
184
+
185
+ // If we got no chunks from section splitting (e.g. dense text), force-split
186
+ if (chunks.length === 0 && text.trim()) {
187
+ const words = text.split(/\s+/);
188
+ let buf = '';
189
+ for (const w of words) {
190
+ if (buf.length + w.length + 1 > CHUNK_MAX_CHARS && buf) {
191
+ chunks.push({
192
+ title: filename,
193
+ content: buf.trim(),
194
+ metadata: { source: filename, format, chunkIndex: chunks.length },
195
+ });
196
+ buf = '';
197
+ }
198
+ buf += w + ' ';
199
+ }
200
+ if (buf.trim()) {
201
+ chunks.push({
202
+ title: filename,
203
+ content: buf.trim(),
204
+ metadata: { source: filename, format, chunkIndex: chunks.length },
205
+ });
206
+ }
207
+ }
208
+
209
+ // Set totalChunks
210
+ for (const c of chunks) c.metadata.totalChunks = chunks.length;
211
+ return chunks;
212
+ }
213
+ }