@juspay/neurolink 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +1 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/fileDetector.d.ts +7 -1
  113. package/dist/lib/utils/fileDetector.js +91 -18
  114. package/dist/lib/utils/json/extract.d.ts +103 -0
  115. package/dist/lib/utils/json/extract.js +249 -0
  116. package/dist/lib/utils/json/index.d.ts +36 -0
  117. package/dist/lib/utils/json/index.js +37 -0
  118. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  119. package/dist/lib/utils/json/safeParse.js +191 -0
  120. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  121. package/dist/lib/utils/messageBuilder.js +15 -7
  122. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  123. package/dist/lib/utils/sanitizers/filename.js +366 -0
  124. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  125. package/dist/lib/utils/sanitizers/html.js +326 -0
  126. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  127. package/dist/lib/utils/sanitizers/index.js +30 -0
  128. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  129. package/dist/lib/utils/sanitizers/svg.js +483 -0
  130. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  131. package/dist/processors/base/BaseFileProcessor.js +613 -0
  132. package/dist/processors/base/index.d.ts +14 -0
  133. package/dist/processors/base/index.js +19 -0
  134. package/dist/processors/base/types.d.ts +593 -0
  135. package/dist/processors/base/types.js +76 -0
  136. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  137. package/dist/processors/cli/fileProcessorCli.js +388 -0
  138. package/dist/processors/cli/index.d.ts +37 -0
  139. package/dist/processors/cli/index.js +49 -0
  140. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  141. package/dist/processors/code/ConfigProcessor.js +400 -0
  142. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  143. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  144. package/dist/processors/code/index.d.ts +44 -0
  145. package/dist/processors/code/index.js +60 -0
  146. package/dist/processors/config/fileTypes.d.ts +283 -0
  147. package/dist/processors/config/fileTypes.js +520 -0
  148. package/dist/processors/config/index.d.ts +32 -0
  149. package/dist/processors/config/index.js +92 -0
  150. package/dist/processors/config/languageMap.d.ts +66 -0
  151. package/dist/processors/config/languageMap.js +410 -0
  152. package/dist/processors/config/mimeTypes.d.ts +376 -0
  153. package/dist/processors/config/mimeTypes.js +338 -0
  154. package/dist/processors/config/sizeLimits.d.ts +194 -0
  155. package/dist/processors/config/sizeLimits.js +246 -0
  156. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  157. package/dist/processors/data/JsonProcessor.js +203 -0
  158. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  159. package/dist/processors/data/XmlProcessor.js +283 -0
  160. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  161. package/dist/processors/data/YamlProcessor.js +294 -0
  162. package/dist/processors/data/index.d.ts +49 -0
  163. package/dist/processors/data/index.js +76 -0
  164. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  165. package/dist/processors/document/ExcelProcessor.js +519 -0
  166. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  167. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  168. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  169. package/dist/processors/document/RtfProcessor.js +361 -0
  170. package/dist/processors/document/WordProcessor.d.ts +168 -0
  171. package/dist/processors/document/WordProcessor.js +353 -0
  172. package/dist/processors/document/index.d.ts +54 -0
  173. package/dist/processors/document/index.js +90 -0
  174. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  175. package/dist/processors/errors/FileErrorCode.js +255 -0
  176. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  177. package/dist/processors/errors/errorHelpers.js +378 -0
  178. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  179. package/dist/processors/errors/errorSerializer.js +507 -0
  180. package/dist/processors/errors/index.d.ts +46 -0
  181. package/dist/processors/errors/index.js +49 -0
  182. package/dist/processors/index.d.ts +76 -0
  183. package/dist/processors/index.js +112 -0
  184. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  185. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  186. package/dist/processors/integration/index.d.ts +42 -0
  187. package/dist/processors/integration/index.js +44 -0
  188. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  189. package/dist/processors/markup/HtmlProcessor.js +249 -0
  190. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  191. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  192. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  193. package/dist/processors/markup/SvgProcessor.js +240 -0
  194. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  195. package/dist/processors/markup/TextProcessor.js +188 -0
  196. package/dist/processors/markup/index.d.ts +66 -0
  197. package/dist/processors/markup/index.js +102 -0
  198. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  199. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  200. package/dist/processors/registry/index.d.ts +12 -0
  201. package/dist/processors/registry/index.js +16 -0
  202. package/dist/processors/registry/types.d.ts +53 -0
  203. package/dist/processors/registry/types.js +10 -0
  204. package/dist/server/utils/validation.d.ts +6 -6
  205. package/dist/types/fileTypes.d.ts +1 -1
  206. package/dist/types/index.d.ts +25 -24
  207. package/dist/types/index.js +21 -20
  208. package/dist/types/modelTypes.d.ts +10 -10
  209. package/dist/types/pptTypes.d.ts +14 -2
  210. package/dist/types/pptTypes.js +16 -0
  211. package/dist/utils/async/delay.d.ts +40 -0
  212. package/dist/utils/async/delay.js +42 -0
  213. package/dist/utils/async/index.d.ts +23 -0
  214. package/dist/utils/async/index.js +23 -0
  215. package/dist/utils/async/retry.d.ts +141 -0
  216. package/dist/utils/async/retry.js +171 -0
  217. package/dist/utils/async/withTimeout.d.ts +73 -0
  218. package/dist/utils/async/withTimeout.js +96 -0
  219. package/dist/utils/fileDetector.d.ts +7 -1
  220. package/dist/utils/fileDetector.js +91 -18
  221. package/dist/utils/json/extract.d.ts +103 -0
  222. package/dist/utils/json/extract.js +248 -0
  223. package/dist/utils/json/index.d.ts +36 -0
  224. package/dist/utils/json/index.js +36 -0
  225. package/dist/utils/json/safeParse.d.ts +137 -0
  226. package/dist/utils/json/safeParse.js +190 -0
  227. package/dist/utils/messageBuilder.d.ts +2 -2
  228. package/dist/utils/messageBuilder.js +15 -7
  229. package/dist/utils/sanitizers/filename.d.ts +137 -0
  230. package/dist/utils/sanitizers/filename.js +365 -0
  231. package/dist/utils/sanitizers/html.d.ts +170 -0
  232. package/dist/utils/sanitizers/html.js +325 -0
  233. package/dist/utils/sanitizers/index.d.ts +26 -0
  234. package/dist/utils/sanitizers/index.js +29 -0
  235. package/dist/utils/sanitizers/svg.d.ts +81 -0
  236. package/dist/utils/sanitizers/svg.js +482 -0
  237. package/package.json +2 -2
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Markdown File Processor
3
+ *
4
+ * Processes Markdown files with structure extraction and analysis.
5
+ * Markdown files are analyzed to extract metadata about their structure
6
+ * including headings, code blocks, and tables.
7
+ *
8
+ * Features:
9
+ * - Original content preservation
10
+ * - Line count calculation
11
+ * - Code block detection
12
+ * - Table detection
13
+ * - Heading extraction (all levels)
14
+ *
15
+ * @module processors/markup/MarkdownProcessor
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * import { markdownProcessor, processMarkdown, isMarkdownFile } from "./markup/MarkdownProcessor.js";
20
+ *
21
+ * // Check if file is Markdown
22
+ * if (isMarkdownFile(mimetype, filename)) {
23
+ * const result = await processMarkdown(fileInfo);
24
+ * if (result.success) {
25
+ * console.log('Line count:', result.data.lineCount);
26
+ * console.log('Headings:', result.data.headings);
27
+ * console.log('Has code blocks:', result.data.hasCodeBlocks);
28
+ * }
29
+ * }
30
+ * ```
31
+ */
32
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
33
+ import { MARKDOWN_EXTENSIONS, SIZE_LIMITS, TEXT_MIME_TYPES, } from "../config/index.js";
34
+ // =============================================================================
35
+ // CONSTANTS
36
+ // =============================================================================
37
+ /**
38
+ * Supported Markdown MIME types.
39
+ * Derived from the centralized TEXT_MIME_TYPES config.
40
+ */
41
+ const SUPPORTED_MARKDOWN_TYPES = [
42
+ TEXT_MIME_TYPES.MARKDOWN,
43
+ TEXT_MIME_TYPES.MARKDOWN_ALT,
44
+ ];
45
+ /**
46
+ * Supported Markdown file extensions.
47
+ * Derived from the centralized MARKDOWN_EXTENSIONS config.
48
+ */
49
+ const SUPPORTED_MARKDOWN_EXTENSIONS = MARKDOWN_EXTENSIONS;
50
+ /** Default timeout for Markdown processing (30 seconds) */
51
+ const MARKDOWN_TIMEOUT_MS = 30000;
52
+ // =============================================================================
53
+ // MARKDOWN PROCESSOR
54
+ // =============================================================================
55
+ /**
56
+ * Markdown Processor - processes Markdown files with structure analysis.
57
+ *
58
+ * This processor analyzes Markdown documents to extract structural metadata
59
+ * including headings, code blocks, and tables. The original content is
60
+ * preserved for AI processing.
61
+ *
62
+ * Priority: 40 (before JSON at 50, before generic text at 110)
63
+ *
64
+ * @example
65
+ * ```typescript
66
+ * const processor = new MarkdownProcessor();
67
+ *
68
+ * const result = await processor.processFile({
69
+ * id: 'md-123',
70
+ * name: 'README.md',
71
+ * mimetype: 'text/markdown',
72
+ * size: 4096,
73
+ * url: 'https://example.com/README.md',
74
+ * });
75
+ *
76
+ * if (result.success) {
77
+ * console.log('Headings:', result.data.headings);
78
+ * console.log('Has code blocks:', result.data.hasCodeBlocks);
79
+ * }
80
+ * ```
81
+ */
82
+ export class MarkdownProcessor extends BaseFileProcessor {
83
+ constructor() {
84
+ super({
85
+ maxSizeMB: SIZE_LIMITS.TEXT_MAX_MB,
86
+ timeoutMs: MARKDOWN_TIMEOUT_MS,
87
+ supportedMimeTypes: [...SUPPORTED_MARKDOWN_TYPES],
88
+ supportedExtensions: [...SUPPORTED_MARKDOWN_EXTENSIONS],
89
+ fileTypeName: "Markdown",
90
+ defaultFilename: "document.md",
91
+ });
92
+ }
93
+ /**
94
+ * Validate downloaded Markdown file.
95
+ * Markdown is very permissive - almost any text is valid.
96
+ *
97
+ * @param buffer - Downloaded file content
98
+ * @param _fileInfo - Original file information
99
+ * @returns null if valid, error message if invalid
100
+ */
101
+ async validateDownloadedFile(buffer, _fileInfo) {
102
+ // Markdown is very permissive - any text is valid markdown
103
+ // We only check for completely empty files
104
+ if (buffer.length === 0) {
105
+ return "Invalid Markdown - file is empty";
106
+ }
107
+ // Check if the content appears to be binary
108
+ const content = buffer.toString("utf-8");
109
+ const nullByteIndex = content.indexOf("\0");
110
+ if (nullByteIndex !== -1 && nullByteIndex < 1000) {
111
+ return "Invalid Markdown - appears to be binary file";
112
+ }
113
+ return null;
114
+ }
115
+ /**
116
+ * Build processed Markdown result with structure analysis.
117
+ *
118
+ * Processing steps:
119
+ * 1. Preserve original content
120
+ * 2. Count lines
121
+ * 3. Detect fenced code blocks
122
+ * 4. Detect tables
123
+ * 5. Extract headings
124
+ *
125
+ * @param buffer - Downloaded file content
126
+ * @param fileInfo - Original file information
127
+ * @returns Processed Markdown result
128
+ */
129
+ buildProcessedResult(buffer, fileInfo) {
130
+ const content = buffer.toString("utf-8");
131
+ const filename = this.getFilename(fileInfo);
132
+ // Split into lines for analysis
133
+ const lines = content.split("\n");
134
+ // Extract headings (# lines at any level 1-6)
135
+ const headings = lines
136
+ .filter((line) => /^#{1,6}\s+/.test(line))
137
+ .map((line) => line.replace(/^#+\s+/, "").trim());
138
+ // Detect fenced code blocks (```)
139
+ const hasCodeBlocks = /```/.test(content);
140
+ // Detect Markdown tables (pipe-delimited rows with at least 2 pipes)
141
+ // Looking for patterns like: | col1 | col2 |
142
+ const hasTables = /\|.*\|.*\|/.test(content);
143
+ return {
144
+ content,
145
+ lineCount: lines.length,
146
+ hasCodeBlocks,
147
+ hasTables,
148
+ headings,
149
+ buffer,
150
+ mimetype: fileInfo.mimetype || "text/markdown",
151
+ size: fileInfo.size,
152
+ filename,
153
+ };
154
+ }
155
+ }
156
+ // =============================================================================
157
+ // SINGLETON INSTANCE
158
+ // =============================================================================
159
+ /**
160
+ * Singleton Markdown processor instance.
161
+ * Use this for most processing needs.
162
+ *
163
+ * @example
164
+ * ```typescript
165
+ * import { markdownProcessor } from "./markup/MarkdownProcessor.js";
166
+ *
167
+ * const result = await markdownProcessor.processFile(fileInfo);
168
+ * ```
169
+ */
170
+ export const markdownProcessor = new MarkdownProcessor();
171
+ // =============================================================================
172
+ // HELPER FUNCTIONS
173
+ // =============================================================================
174
+ /**
175
+ * Check if a file is a Markdown file.
176
+ *
177
+ * @param mimetype - MIME type of the file
178
+ * @param filename - Filename (for extension-based detection)
179
+ * @returns true if the file is a Markdown file
180
+ *
181
+ * @example
182
+ * ```typescript
183
+ * if (isMarkdownFile('text/markdown', 'README.md')) {
184
+ * // Handle as Markdown
185
+ * }
186
+ *
187
+ * // Also works with just filename
188
+ * if (isMarkdownFile('', 'CHANGELOG.markdown')) {
189
+ * // Handle as Markdown based on extension
190
+ * }
191
+ * ```
192
+ */
193
+ export function isMarkdownFile(mimetype, filename) {
194
+ return markdownProcessor.isFileSupported(mimetype, filename);
195
+ }
196
+ /**
197
+ * Validate Markdown file size against configured limit.
198
+ *
199
+ * @param sizeBytes - File size in bytes
200
+ * @returns true if size is within the allowed limit
201
+ *
202
+ * @example
203
+ * ```typescript
204
+ * if (!validateMarkdownSize(fileInfo.size)) {
205
+ * console.error('Markdown file is too large');
206
+ * }
207
+ * ```
208
+ */
209
+ export function validateMarkdownSize(sizeBytes) {
210
+ const maxBytes = SIZE_LIMITS.TEXT_MAX_MB * 1024 * 1024;
211
+ return sizeBytes <= maxBytes;
212
+ }
213
+ /**
214
+ * Process a single Markdown file.
215
+ * Convenience function that uses the singleton processor.
216
+ *
217
+ * @param fileInfo - File information (can include URL or buffer)
218
+ * @param options - Optional processing options (auth headers, timeout, retry config)
219
+ * @returns Processing result with Markdown content and structure analysis
220
+ *
221
+ * @example
222
+ * ```typescript
223
+ * const result = await processMarkdown({
224
+ * id: 'md-123',
225
+ * name: 'README.md',
226
+ * mimetype: 'text/markdown',
227
+ * size: 4096,
228
+ * buffer: markdownBuffer,
229
+ * });
230
+ *
231
+ * if (result.success) {
232
+ * console.log('Line count:', result.data.lineCount);
233
+ * console.log('Headings:', result.data.headings);
234
+ * if (result.data.hasCodeBlocks) {
235
+ * console.log('Document contains code examples');
236
+ * }
237
+ * } else {
238
+ * console.error('Processing failed:', result.error.userMessage);
239
+ * }
240
+ * ```
241
+ */
242
+ export async function processMarkdown(fileInfo, options) {
243
+ return markdownProcessor.processFile(fileInfo, options);
244
+ }
@@ -0,0 +1,156 @@
1
+ /**
2
+ * SVG File Processor
3
+ *
4
+ * Processes SVG files as TEXT content, not as images, because:
5
+ * 1. Most AI vision models don't support SVG format directly
6
+ * 2. SVG is XML-based and can be analyzed as code/markup
7
+ * 3. SVG can contain security risks (scripts, XSS vectors)
8
+ *
9
+ * Security: Uses OWASP-compliant allowlist-based SVG sanitization
10
+ *
11
+ * @module processors/markup/SvgProcessor
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { svgProcessor, processSvg, isSvgFile } from "./markup/SvgProcessor.js";
16
+ *
17
+ * // Check if file is SVG
18
+ * if (isSvgFile(mimetype, filename)) {
19
+ * const result = await processSvg(fileInfo);
20
+ * if (result.success) {
21
+ * console.log('Sanitized SVG:', result.data.textContent);
22
+ * if (result.data.securityWarnings.length > 0) {
23
+ * console.warn('Security warnings:', result.data.securityWarnings);
24
+ * }
25
+ * }
26
+ * }
27
+ * ```
28
+ */
29
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
30
+ import type { FileInfo, FileProcessingResult, ProcessOptions } from "../base/types.js";
31
+ export type { ProcessedSvg } from "../base/types.js";
32
+ import type { ProcessedSvg } from "../base/types.js";
33
+ /**
34
+ * SVG Processor - processes SVG as TEXT, not as image.
35
+ *
36
+ * Why text instead of image:
37
+ * 1. Most AI vision models don't support SVG format
38
+ * 2. SVG is XML-based and can be analyzed as markup
39
+ * 3. Security: SVG can contain scripts and XSS vectors
40
+ *
41
+ * Priority: 5 (before IMAGE at priority 10)
42
+ *
43
+ * @example
44
+ * ```typescript
45
+ * const processor = new SvgProcessor();
46
+ *
47
+ * const result = await processor.processFile({
48
+ * id: 'svg-123',
49
+ * name: 'diagram.svg',
50
+ * mimetype: 'image/svg+xml',
51
+ * size: 2048,
52
+ * url: 'https://example.com/diagram.svg',
53
+ * });
54
+ *
55
+ * if (result.success) {
56
+ * // Use sanitized SVG text content
57
+ * console.log(result.data.textContent);
58
+ * }
59
+ * ```
60
+ */
61
+ export declare class SvgProcessor extends BaseFileProcessor<ProcessedSvg> {
62
+ constructor();
63
+ /**
64
+ * Validate downloaded SVG file.
65
+ * Checks for valid XML structure (SVG must contain <svg> element).
66
+ *
67
+ * @param buffer - Downloaded file content
68
+ * @param fileInfo - Original file information
69
+ * @returns null if valid, error message if invalid
70
+ */
71
+ protected validateDownloadedFile(buffer: Buffer, _fileInfo: FileInfo): Promise<string | null>;
72
+ /**
73
+ * Build processed SVG result with sanitized content.
74
+ * Applies security sanitization to remove potentially malicious content.
75
+ *
76
+ * @param buffer - Downloaded file content
77
+ * @param fileInfo - Original file information
78
+ * @returns Processed SVG result with sanitized text content
79
+ */
80
+ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedSvg;
81
+ }
82
+ /**
83
+ * Singleton SVG processor instance.
84
+ * Use this for most processing needs.
85
+ *
86
+ * @example
87
+ * ```typescript
88
+ * import { svgProcessor } from "./markup/SvgProcessor.js";
89
+ *
90
+ * const result = await svgProcessor.processFile(fileInfo);
91
+ * ```
92
+ */
93
+ export declare const svgProcessor: SvgProcessor;
94
+ /**
95
+ * Check if a file is an SVG file.
96
+ *
97
+ * @param mimetype - MIME type of the file
98
+ * @param filename - Filename (for extension-based detection)
99
+ * @returns true if the file is an SVG
100
+ *
101
+ * @example
102
+ * ```typescript
103
+ * if (isSvgFile('image/svg+xml', 'diagram.svg')) {
104
+ * // Handle as SVG
105
+ * }
106
+ *
107
+ * // Also works with just filename
108
+ * if (isSvgFile('', 'icon.svg')) {
109
+ * // Handle as SVG based on extension
110
+ * }
111
+ * ```
112
+ */
113
+ export declare function isSvgFile(mimetype: string, filename: string): boolean;
114
+ /**
115
+ * Validate SVG file size against configured limit.
116
+ *
117
+ * @param sizeBytes - File size in bytes
118
+ * @returns true if size is within the allowed limit (5 MB)
119
+ *
120
+ * @example
121
+ * ```typescript
122
+ * if (!validateSvgSize(fileInfo.size)) {
123
+ * console.error('SVG file is too large');
124
+ * }
125
+ * ```
126
+ */
127
+ export declare function validateSvgSize(sizeBytes: number): boolean;
128
+ /**
129
+ * Process a single SVG file.
130
+ * Convenience function that uses the singleton processor.
131
+ *
132
+ * @param fileInfo - File information (can include URL or buffer)
133
+ * @param options - Optional processing options (auth headers, timeout, retry config)
134
+ * @returns Processing result with sanitized SVG text content
135
+ *
136
+ * @example
137
+ * ```typescript
138
+ * const result = await processSvg({
139
+ * id: 'svg-123',
140
+ * name: 'diagram.svg',
141
+ * mimetype: 'image/svg+xml',
142
+ * size: 2048,
143
+ * buffer: svgBuffer,
144
+ * });
145
+ *
146
+ * if (result.success) {
147
+ * console.log('Processed SVG:', result.data.textContent);
148
+ * if (result.data.sanitized) {
149
+ * console.log('Content was sanitized for security');
150
+ * }
151
+ * } else {
152
+ * console.error('Processing failed:', result.error.userMessage);
153
+ * }
154
+ * ```
155
+ */
156
+ export declare function processSvg(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedSvg>>;
@@ -0,0 +1,240 @@
1
+ /**
2
+ * SVG File Processor
3
+ *
4
+ * Processes SVG files as TEXT content, not as images, because:
5
+ * 1. Most AI vision models don't support SVG format directly
6
+ * 2. SVG is XML-based and can be analyzed as code/markup
7
+ * 3. SVG can contain security risks (scripts, XSS vectors)
8
+ *
9
+ * Security: Uses OWASP-compliant allowlist-based SVG sanitization
10
+ *
11
+ * @module processors/markup/SvgProcessor
12
+ *
13
+ * @example
14
+ * ```typescript
15
+ * import { svgProcessor, processSvg, isSvgFile } from "./markup/SvgProcessor.js";
16
+ *
17
+ * // Check if file is SVG
18
+ * if (isSvgFile(mimetype, filename)) {
19
+ * const result = await processSvg(fileInfo);
20
+ * if (result.success) {
21
+ * console.log('Sanitized SVG:', result.data.textContent);
22
+ * if (result.data.securityWarnings.length > 0) {
23
+ * console.warn('Security warnings:', result.data.securityWarnings);
24
+ * }
25
+ * }
26
+ * }
27
+ * ```
28
+ */
29
+ import { isSvgContentSafe, sanitizeSvgContent, } from "../../utils/sanitizers/svg.js";
30
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
31
+ // =============================================================================
32
+ // CONSTANTS
33
+ // =============================================================================
34
+ /** SVG MIME type */
35
+ const SUPPORTED_SVG_TYPES = ["image/svg+xml"];
36
+ /** SVG file extension */
37
+ const SUPPORTED_SVG_EXTENSIONS = [".svg"];
38
+ // =============================================================================
39
+ // SVG PROCESSOR
40
+ // =============================================================================
41
+ /**
42
+ * SVG Processor - processes SVG as TEXT, not as image.
43
+ *
44
+ * Why text instead of image:
45
+ * 1. Most AI vision models don't support SVG format
46
+ * 2. SVG is XML-based and can be analyzed as markup
47
+ * 3. Security: SVG can contain scripts and XSS vectors
48
+ *
49
+ * Priority: 5 (before IMAGE at priority 10)
50
+ *
51
+ * @example
52
+ * ```typescript
53
+ * const processor = new SvgProcessor();
54
+ *
55
+ * const result = await processor.processFile({
56
+ * id: 'svg-123',
57
+ * name: 'diagram.svg',
58
+ * mimetype: 'image/svg+xml',
59
+ * size: 2048,
60
+ * url: 'https://example.com/diagram.svg',
61
+ * });
62
+ *
63
+ * if (result.success) {
64
+ * // Use sanitized SVG text content
65
+ * console.log(result.data.textContent);
66
+ * }
67
+ * ```
68
+ */
69
+ export class SvgProcessor extends BaseFileProcessor {
70
+ constructor() {
71
+ super({
72
+ maxSizeMB: 5,
73
+ timeoutMs: 30000,
74
+ supportedMimeTypes: [...SUPPORTED_SVG_TYPES],
75
+ supportedExtensions: [...SUPPORTED_SVG_EXTENSIONS],
76
+ fileTypeName: "SVG",
77
+ defaultFilename: "image.svg",
78
+ });
79
+ }
80
+ /**
81
+ * Validate downloaded SVG file.
82
+ * Checks for valid XML structure (SVG must contain <svg> element).
83
+ *
84
+ * @param buffer - Downloaded file content
85
+ * @param fileInfo - Original file information
86
+ * @returns null if valid, error message if invalid
87
+ */
88
+ async validateDownloadedFile(buffer, _fileInfo) {
89
+ const content = buffer.toString("utf-8").trim();
90
+ // Check for valid SVG content
91
+ // Valid SVG can start with: <?xml, <!DOCTYPE, <svg, or just contain <svg
92
+ const startsWithXml = content.startsWith("<?xml");
93
+ const startsWithDoctype = content.toLowerCase().startsWith("<!doctype");
94
+ const startsWithSvg = content.toLowerCase().startsWith("<svg");
95
+ const containsSvgTag = content.toLowerCase().includes("<svg");
96
+ if (!startsWithXml &&
97
+ !startsWithDoctype &&
98
+ !startsWithSvg &&
99
+ !containsSvgTag) {
100
+ // Check if it might be HTML (download error page)
101
+ if (content.toLowerCase().includes("<html")) {
102
+ return "Download failed - received HTML instead of SVG content";
103
+ }
104
+ return "Invalid SVG - missing <svg> element";
105
+ }
106
+ return null;
107
+ }
108
+ /**
109
+ * Build processed SVG result with sanitized content.
110
+ * Applies security sanitization to remove potentially malicious content.
111
+ *
112
+ * @param buffer - Downloaded file content
113
+ * @param fileInfo - Original file information
114
+ * @returns Processed SVG result with sanitized text content
115
+ */
116
+ buildProcessedResult(buffer, fileInfo) {
117
+ const rawContent = buffer.toString("utf-8");
118
+ const filename = this.getFilename(fileInfo);
119
+ // Check if content is safe before sanitization
120
+ const wasSafe = isSvgContentSafe(rawContent);
121
+ // Build security warnings (initialized before try/catch so catch block can append)
122
+ const securityWarnings = [];
123
+ // Apply security sanitization using allowlist-based approach
124
+ let textContent;
125
+ try {
126
+ textContent = sanitizeSvgContent(rawContent);
127
+ }
128
+ catch {
129
+ // Fail closed: if sanitization fails (e.g., malformed XML with XXE),
130
+ // return a safe empty SVG instead of attempting regex-based cleanup.
131
+ // Regex cannot safely sanitize XML/HTML (context-free grammar).
132
+ textContent = '<svg xmlns="http://www.w3.org/2000/svg"></svg>';
133
+ securityWarnings.push("SVG sanitization failed - malformed content replaced with empty SVG for security");
134
+ }
135
+ if (!wasSafe) {
136
+ securityWarnings.push("SVG contained potentially unsafe content that was sanitized");
137
+ }
138
+ // Check if content was actually modified
139
+ const contentWasModified = textContent !== rawContent;
140
+ return {
141
+ textContent,
142
+ // Only include raw content if sanitization actually changed it (for debugging)
143
+ rawContent: contentWasModified ? rawContent : undefined,
144
+ sanitized: !wasSafe || contentWasModified,
145
+ securityWarnings,
146
+ buffer,
147
+ mimetype: fileInfo.mimetype || "image/svg+xml",
148
+ size: fileInfo.size,
149
+ filename,
150
+ };
151
+ }
152
+ }
153
+ // =============================================================================
154
+ // SINGLETON INSTANCE
155
+ // =============================================================================
156
+ /**
157
+ * Singleton SVG processor instance.
158
+ * Use this for most processing needs.
159
+ *
160
+ * @example
161
+ * ```typescript
162
+ * import { svgProcessor } from "./markup/SvgProcessor.js";
163
+ *
164
+ * const result = await svgProcessor.processFile(fileInfo);
165
+ * ```
166
+ */
167
+ export const svgProcessor = new SvgProcessor();
168
+ // =============================================================================
169
+ // HELPER FUNCTIONS
170
+ // =============================================================================
171
+ /**
172
+ * Check if a file is an SVG file.
173
+ *
174
+ * @param mimetype - MIME type of the file
175
+ * @param filename - Filename (for extension-based detection)
176
+ * @returns true if the file is an SVG
177
+ *
178
+ * @example
179
+ * ```typescript
180
+ * if (isSvgFile('image/svg+xml', 'diagram.svg')) {
181
+ * // Handle as SVG
182
+ * }
183
+ *
184
+ * // Also works with just filename
185
+ * if (isSvgFile('', 'icon.svg')) {
186
+ * // Handle as SVG based on extension
187
+ * }
188
+ * ```
189
+ */
190
+ export function isSvgFile(mimetype, filename) {
191
+ return svgProcessor.isFileSupported(mimetype, filename);
192
+ }
193
+ /**
194
+ * Validate SVG file size against configured limit.
195
+ *
196
+ * @param sizeBytes - File size in bytes
197
+ * @returns true if size is within the allowed limit (5 MB)
198
+ *
199
+ * @example
200
+ * ```typescript
201
+ * if (!validateSvgSize(fileInfo.size)) {
202
+ * console.error('SVG file is too large');
203
+ * }
204
+ * ```
205
+ */
206
+ export function validateSvgSize(sizeBytes) {
207
+ const maxBytes = 5 * 1024 * 1024; // 5 MB
208
+ return sizeBytes <= maxBytes;
209
+ }
210
+ /**
211
+ * Process a single SVG file.
212
+ * Convenience function that uses the singleton processor.
213
+ *
214
+ * @param fileInfo - File information (can include URL or buffer)
215
+ * @param options - Optional processing options (auth headers, timeout, retry config)
216
+ * @returns Processing result with sanitized SVG text content
217
+ *
218
+ * @example
219
+ * ```typescript
220
+ * const result = await processSvg({
221
+ * id: 'svg-123',
222
+ * name: 'diagram.svg',
223
+ * mimetype: 'image/svg+xml',
224
+ * size: 2048,
225
+ * buffer: svgBuffer,
226
+ * });
227
+ *
228
+ * if (result.success) {
229
+ * console.log('Processed SVG:', result.data.textContent);
230
+ * if (result.data.sanitized) {
231
+ * console.log('Content was sanitized for security');
232
+ * }
233
+ * } else {
234
+ * console.error('Processing failed:', result.error.userMessage);
235
+ * }
236
+ * ```
237
+ */
238
+ export async function processSvg(fileInfo, options) {
239
+ return svgProcessor.processFile(fileInfo, options);
240
+ }