@juspay/neurolink 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +1 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/fileDetector.d.ts +7 -1
  113. package/dist/lib/utils/fileDetector.js +91 -18
  114. package/dist/lib/utils/json/extract.d.ts +103 -0
  115. package/dist/lib/utils/json/extract.js +249 -0
  116. package/dist/lib/utils/json/index.d.ts +36 -0
  117. package/dist/lib/utils/json/index.js +37 -0
  118. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  119. package/dist/lib/utils/json/safeParse.js +191 -0
  120. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  121. package/dist/lib/utils/messageBuilder.js +15 -7
  122. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  123. package/dist/lib/utils/sanitizers/filename.js +366 -0
  124. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  125. package/dist/lib/utils/sanitizers/html.js +326 -0
  126. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  127. package/dist/lib/utils/sanitizers/index.js +30 -0
  128. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  129. package/dist/lib/utils/sanitizers/svg.js +483 -0
  130. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  131. package/dist/processors/base/BaseFileProcessor.js +613 -0
  132. package/dist/processors/base/index.d.ts +14 -0
  133. package/dist/processors/base/index.js +19 -0
  134. package/dist/processors/base/types.d.ts +593 -0
  135. package/dist/processors/base/types.js +76 -0
  136. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  137. package/dist/processors/cli/fileProcessorCli.js +388 -0
  138. package/dist/processors/cli/index.d.ts +37 -0
  139. package/dist/processors/cli/index.js +49 -0
  140. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  141. package/dist/processors/code/ConfigProcessor.js +400 -0
  142. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  143. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  144. package/dist/processors/code/index.d.ts +44 -0
  145. package/dist/processors/code/index.js +60 -0
  146. package/dist/processors/config/fileTypes.d.ts +283 -0
  147. package/dist/processors/config/fileTypes.js +520 -0
  148. package/dist/processors/config/index.d.ts +32 -0
  149. package/dist/processors/config/index.js +92 -0
  150. package/dist/processors/config/languageMap.d.ts +66 -0
  151. package/dist/processors/config/languageMap.js +410 -0
  152. package/dist/processors/config/mimeTypes.d.ts +376 -0
  153. package/dist/processors/config/mimeTypes.js +338 -0
  154. package/dist/processors/config/sizeLimits.d.ts +194 -0
  155. package/dist/processors/config/sizeLimits.js +246 -0
  156. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  157. package/dist/processors/data/JsonProcessor.js +203 -0
  158. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  159. package/dist/processors/data/XmlProcessor.js +283 -0
  160. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  161. package/dist/processors/data/YamlProcessor.js +294 -0
  162. package/dist/processors/data/index.d.ts +49 -0
  163. package/dist/processors/data/index.js +76 -0
  164. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  165. package/dist/processors/document/ExcelProcessor.js +519 -0
  166. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  167. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  168. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  169. package/dist/processors/document/RtfProcessor.js +361 -0
  170. package/dist/processors/document/WordProcessor.d.ts +168 -0
  171. package/dist/processors/document/WordProcessor.js +353 -0
  172. package/dist/processors/document/index.d.ts +54 -0
  173. package/dist/processors/document/index.js +90 -0
  174. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  175. package/dist/processors/errors/FileErrorCode.js +255 -0
  176. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  177. package/dist/processors/errors/errorHelpers.js +378 -0
  178. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  179. package/dist/processors/errors/errorSerializer.js +507 -0
  180. package/dist/processors/errors/index.d.ts +46 -0
  181. package/dist/processors/errors/index.js +49 -0
  182. package/dist/processors/index.d.ts +76 -0
  183. package/dist/processors/index.js +112 -0
  184. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  185. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  186. package/dist/processors/integration/index.d.ts +42 -0
  187. package/dist/processors/integration/index.js +44 -0
  188. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  189. package/dist/processors/markup/HtmlProcessor.js +249 -0
  190. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  191. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  192. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  193. package/dist/processors/markup/SvgProcessor.js +240 -0
  194. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  195. package/dist/processors/markup/TextProcessor.js +188 -0
  196. package/dist/processors/markup/index.d.ts +66 -0
  197. package/dist/processors/markup/index.js +102 -0
  198. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  199. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  200. package/dist/processors/registry/index.d.ts +12 -0
  201. package/dist/processors/registry/index.js +16 -0
  202. package/dist/processors/registry/types.d.ts +53 -0
  203. package/dist/processors/registry/types.js +10 -0
  204. package/dist/server/utils/validation.d.ts +6 -6
  205. package/dist/types/fileTypes.d.ts +1 -1
  206. package/dist/types/index.d.ts +25 -24
  207. package/dist/types/index.js +21 -20
  208. package/dist/types/modelTypes.d.ts +10 -10
  209. package/dist/types/pptTypes.d.ts +14 -2
  210. package/dist/types/pptTypes.js +16 -0
  211. package/dist/utils/async/delay.d.ts +40 -0
  212. package/dist/utils/async/delay.js +42 -0
  213. package/dist/utils/async/index.d.ts +23 -0
  214. package/dist/utils/async/index.js +23 -0
  215. package/dist/utils/async/retry.d.ts +141 -0
  216. package/dist/utils/async/retry.js +171 -0
  217. package/dist/utils/async/withTimeout.d.ts +73 -0
  218. package/dist/utils/async/withTimeout.js +96 -0
  219. package/dist/utils/fileDetector.d.ts +7 -1
  220. package/dist/utils/fileDetector.js +91 -18
  221. package/dist/utils/json/extract.d.ts +103 -0
  222. package/dist/utils/json/extract.js +248 -0
  223. package/dist/utils/json/index.d.ts +36 -0
  224. package/dist/utils/json/index.js +36 -0
  225. package/dist/utils/json/safeParse.d.ts +137 -0
  226. package/dist/utils/json/safeParse.js +190 -0
  227. package/dist/utils/messageBuilder.d.ts +2 -2
  228. package/dist/utils/messageBuilder.js +15 -7
  229. package/dist/utils/sanitizers/filename.d.ts +137 -0
  230. package/dist/utils/sanitizers/filename.js +365 -0
  231. package/dist/utils/sanitizers/html.d.ts +170 -0
  232. package/dist/utils/sanitizers/html.js +325 -0
  233. package/dist/utils/sanitizers/index.d.ts +26 -0
  234. package/dist/utils/sanitizers/index.js +29 -0
  235. package/dist/utils/sanitizers/svg.d.ts +81 -0
  236. package/dist/utils/sanitizers/svg.js +482 -0
  237. package/package.json +2 -2
@@ -0,0 +1,160 @@
1
+ /**
2
+ * XML Processing Utility
3
+ *
4
+ * Handles downloading, validating, and processing XML files with security.
5
+ *
6
+ * Security Notes:
7
+ * ---------------
8
+ * XML parsing can be vulnerable to XML External Entity (XXE) attacks:
9
+ *
10
+ * 1. **XXE Attacks**: DOCTYPE and ENTITY declarations can be exploited to:
11
+ * - Read local files on the server
12
+ * - Perform Server-Side Request Forgery (SSRF)
13
+ * - Cause Denial of Service via entity expansion
14
+ *
15
+ * 2. **Mitigation**: We reject XML files containing DOCTYPE or ENTITY declarations
16
+ * and disable entity processing in the parser.
17
+ *
18
+ * References:
19
+ * - https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing
20
+ * - https://cwe.mitre.org/data/definitions/611.html (XXE)
21
+ *
22
+ * @module processors/data/XmlProcessor
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * import { xmlProcessor, isXmlFile, processXml } from "./XmlProcessor.js";
27
+ *
28
+ * // Check if file is XML
29
+ * if (isXmlFile("application/xml", "data.xml")) {
30
+ * // Process the file
31
+ * const result = await processXml(fileInfo);
32
+ * if (result.success && result.data) {
33
+ * console.log("Root element:", result.data.rootElement);
34
+ * console.log("Parsed:", result.data.parsed);
35
+ * }
36
+ * }
37
+ * ```
38
+ */
39
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
40
+ import type { FileInfo, FileProcessingResult, OperationResult, ProcessOptions } from "../base/types.js";
41
+ export type { ProcessedXml } from "../base/types.js";
42
+ import type { ProcessedXml } from "../base/types.js";
43
+ /**
44
+ * XML file processor.
45
+ * Extends BaseFileProcessor with XML-specific parsing and validation.
46
+ *
47
+ * Features:
48
+ * - XXE protection (rejects DOCTYPE and ENTITY declarations)
49
+ * - Parses XML to JavaScript objects
50
+ * - Extracts root element name
51
+ *
52
+ * @example
53
+ * ```typescript
54
+ * const processor = new XmlProcessor();
55
+ *
56
+ * const result = await processor.processFile({
57
+ * id: "file-123",
58
+ * name: "data.xml",
59
+ * mimetype: "application/xml",
60
+ * size: 1024,
61
+ * buffer: xmlBuffer,
62
+ * });
63
+ *
64
+ * if (result.success && result.data?.valid) {
65
+ * console.log("Root element:", result.data.rootElement);
66
+ * }
67
+ * ```
68
+ */
69
+ export declare class XmlProcessor extends BaseFileProcessor<ProcessedXml> {
70
+ constructor();
71
+ /**
72
+ * Extract the root element name from XML content.
73
+ *
74
+ * @param content - XML content string
75
+ * @returns Root element name or undefined if not found
76
+ */
77
+ private extractRootElement;
78
+ /**
79
+ * Check if XML content contains XXE attack vectors.
80
+ *
81
+ * @param content - XML content string
82
+ * @returns Object with detection results
83
+ */
84
+ private checkXxeVectors;
85
+ /**
86
+ * Parse XML content to JavaScript object securely.
87
+ *
88
+ * @param content - XML content string
89
+ * @returns Parsed XML content
90
+ */
91
+ private parseXmlSecurely;
92
+ /**
93
+ * Validate downloaded XML is parseable and safe with structured error result.
94
+ * Includes XXE protection by rejecting XML with DOCTYPE or ENTITY declarations.
95
+ * Returns user-friendly error messages with actionable suggestions.
96
+ *
97
+ * @param buffer - Downloaded file content
98
+ * @param fileInfo - Original file information
99
+ * @returns Success result or error result
100
+ */
101
+ protected validateDownloadedFileWithResult(buffer: Buffer, fileInfo: FileInfo): Promise<OperationResult<void>>;
102
+ /**
103
+ * Build processed XML result with parsed content.
104
+ *
105
+ * @param buffer - Downloaded file content
106
+ * @param fileInfo - Original file information
107
+ * @returns Processed XML result
108
+ */
109
+ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedXml;
110
+ }
111
+ /** Singleton XML processor instance */
112
+ export declare const xmlProcessor: XmlProcessor;
113
+ /**
114
+ * Check if a file is an XML file based on MIME type or extension.
115
+ *
116
+ * @param mimetype - MIME type of the file
117
+ * @param filename - Filename (for extension-based detection)
118
+ * @returns true if the file is an XML file
119
+ *
120
+ * @example
121
+ * ```typescript
122
+ * if (isXmlFile("application/xml", "data.xml")) {
123
+ * // Process as XML
124
+ * }
125
+ * ```
126
+ */
127
+ export declare function isXmlFile(mimetype: string, filename: string): boolean;
128
+ /**
129
+ * Validate XML file size against configured limit.
130
+ *
131
+ * @param sizeBytes - File size in bytes
132
+ * @returns true if size is within the limit
133
+ */
134
+ export declare function validateXmlSize(sizeBytes: number): boolean;
135
+ /**
136
+ * Process a single XML file with XXE protection.
137
+ *
138
+ * @param fileInfo - File information (with URL or buffer)
139
+ * @param options - Optional processing options (auth headers, timeout, retry config)
140
+ * @returns Processing result with parsed XML or error
141
+ *
142
+ * @example
143
+ * ```typescript
144
+ * const result = await processXml({
145
+ * id: "file-123",
146
+ * name: "data.xml",
147
+ * mimetype: "application/xml",
148
+ * size: 2048,
149
+ * url: "https://example.com/data.xml",
150
+ * }, {
151
+ * authHeaders: { "Authorization": "Bearer token" },
152
+ * });
153
+ *
154
+ * if (result.success && result.data) {
155
+ * console.log("Root:", result.data.rootElement);
156
+ * console.log("Parsed:", result.data.parsed);
157
+ * }
158
+ * ```
159
+ */
160
+ export declare function processXml(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedXml>>;
@@ -0,0 +1,283 @@
1
+ /**
2
+ * XML Processing Utility
3
+ *
4
+ * Handles downloading, validating, and processing XML files with security.
5
+ *
6
+ * Security Notes:
7
+ * ---------------
8
+ * XML parsing can be vulnerable to XML External Entity (XXE) attacks:
9
+ *
10
+ * 1. **XXE Attacks**: DOCTYPE and ENTITY declarations can be exploited to:
11
+ * - Read local files on the server
12
+ * - Perform Server-Side Request Forgery (SSRF)
13
+ * - Cause Denial of Service via entity expansion
14
+ *
15
+ * 2. **Mitigation**: We reject XML files containing DOCTYPE or ENTITY declarations
16
+ * and disable entity processing in the parser.
17
+ *
18
+ * References:
19
+ * - https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing
20
+ * - https://cwe.mitre.org/data/definitions/611.html (XXE)
21
+ *
22
+ * @module processors/data/XmlProcessor
23
+ *
24
+ * @example
25
+ * ```typescript
26
+ * import { xmlProcessor, isXmlFile, processXml } from "./XmlProcessor.js";
27
+ *
28
+ * // Check if file is XML
29
+ * if (isXmlFile("application/xml", "data.xml")) {
30
+ * // Process the file
31
+ * const result = await processXml(fileInfo);
32
+ * if (result.success && result.data) {
33
+ * console.log("Root element:", result.data.rootElement);
34
+ * console.log("Parsed:", result.data.parsed);
35
+ * }
36
+ * }
37
+ * ```
38
+ */
39
+ import { createRequire } from "node:module";
40
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
41
+ import { SIZE_LIMITS_MB } from "../config/index.js";
42
+ import { createFileError, FileErrorCode } from "../errors/index.js";
43
+ const require = createRequire(import.meta.url);
44
+ // =============================================================================
45
+ // CONSTANTS
46
+ // =============================================================================
47
+ /** Supported XML MIME types */
48
+ const SUPPORTED_XML_TYPES = ["application/xml", "text/xml"];
49
+ /** Supported XML file extensions */
50
+ const SUPPORTED_XML_EXTENSIONS = [".xml"];
51
+ // =============================================================================
52
+ // XML PROCESSOR CLASS
53
+ // =============================================================================
54
+ /**
55
+ * XML file processor.
56
+ * Extends BaseFileProcessor with XML-specific parsing and validation.
57
+ *
58
+ * Features:
59
+ * - XXE protection (rejects DOCTYPE and ENTITY declarations)
60
+ * - Parses XML to JavaScript objects
61
+ * - Extracts root element name
62
+ *
63
+ * @example
64
+ * ```typescript
65
+ * const processor = new XmlProcessor();
66
+ *
67
+ * const result = await processor.processFile({
68
+ * id: "file-123",
69
+ * name: "data.xml",
70
+ * mimetype: "application/xml",
71
+ * size: 1024,
72
+ * buffer: xmlBuffer,
73
+ * });
74
+ *
75
+ * if (result.success && result.data?.valid) {
76
+ * console.log("Root element:", result.data.rootElement);
77
+ * }
78
+ * ```
79
+ */
80
+ export class XmlProcessor extends BaseFileProcessor {
81
+ constructor() {
82
+ super({
83
+ maxSizeMB: SIZE_LIMITS_MB.XML_MAX_MB,
84
+ timeoutMs: 30000,
85
+ supportedMimeTypes: SUPPORTED_XML_TYPES,
86
+ supportedExtensions: SUPPORTED_XML_EXTENSIONS,
87
+ fileTypeName: "XML",
88
+ defaultFilename: "data.xml",
89
+ });
90
+ }
91
+ /**
92
+ * Extract the root element name from XML content.
93
+ *
94
+ * @param content - XML content string
95
+ * @returns Root element name or undefined if not found
96
+ */
97
+ extractRootElement(content) {
98
+ // Skip XML declaration and comments, then find first element
99
+ const elementMatch = content.match(/<([a-zA-Z][a-zA-Z0-9_:-]*)[>\s/]/);
100
+ return elementMatch?.[1];
101
+ }
102
+ /**
103
+ * Check if XML content contains XXE attack vectors.
104
+ *
105
+ * @param content - XML content string
106
+ * @returns Object with detection results
107
+ */
108
+ checkXxeVectors(content) {
109
+ const lower = content.toLowerCase();
110
+ return {
111
+ hasDOCTYPE: lower.includes("<!doctype"),
112
+ hasENTITY: lower.includes("<!entity"),
113
+ };
114
+ }
115
+ /**
116
+ * Parse XML content to JavaScript object securely.
117
+ *
118
+ * @param content - XML content string
119
+ * @returns Parsed XML content
120
+ */
121
+ parseXmlSecurely(content) {
122
+ // Dynamically import fast-xml-parser
123
+ const { XMLParser } = require("fast-xml-parser");
124
+ // Initialize XML parser with sensible defaults
125
+ // XXE Protection: Disable entity processing to prevent XML External Entity attacks
126
+ const parser = new XMLParser({
127
+ ignoreAttributes: false,
128
+ attributeNamePrefix: "@_",
129
+ textNodeName: "#text",
130
+ parseAttributeValue: true,
131
+ parseTagValue: true,
132
+ trimValues: true,
133
+ // XXE Protection - explicitly disable entity processing
134
+ processEntities: false,
135
+ htmlEntities: false,
136
+ });
137
+ return parser.parse(content);
138
+ }
139
+ /**
140
+ * Validate downloaded XML is parseable and safe with structured error result.
141
+ * Includes XXE protection by rejecting XML with DOCTYPE or ENTITY declarations.
142
+ * Returns user-friendly error messages with actionable suggestions.
143
+ *
144
+ * @param buffer - Downloaded file content
145
+ * @param fileInfo - Original file information
146
+ * @returns Success result or error result
147
+ */
148
+ async validateDownloadedFileWithResult(buffer, fileInfo) {
149
+ try {
150
+ const content = buffer.toString("utf-8");
151
+ // XXE Protection: Check for potentially dangerous DOCTYPE/ENTITY declarations
152
+ const { hasDOCTYPE, hasENTITY } = this.checkXxeVectors(content);
153
+ if (hasDOCTYPE || hasENTITY) {
154
+ const error = createFileError(FileErrorCode.XXE_DETECTED, {
155
+ hasDOCTYPE,
156
+ hasENTITY,
157
+ filename: fileInfo.name,
158
+ });
159
+ return {
160
+ success: false,
161
+ error: {
162
+ code: error.code,
163
+ message: error.message,
164
+ userMessage: error.userMessage,
165
+ details: error.details,
166
+ },
167
+ };
168
+ }
169
+ // Parse to validate structure
170
+ this.parseXmlSecurely(content);
171
+ return { success: true, data: undefined };
172
+ }
173
+ catch (error) {
174
+ const fileError = createFileError(FileErrorCode.PARSING_FAILED, { fileType: "XML" }, error instanceof Error ? error : undefined);
175
+ return {
176
+ success: false,
177
+ error: {
178
+ code: fileError.code,
179
+ message: fileError.message,
180
+ userMessage: fileError.userMessage,
181
+ details: fileError.details,
182
+ },
183
+ };
184
+ }
185
+ }
186
+ /**
187
+ * Build processed XML result with parsed content.
188
+ *
189
+ * @param buffer - Downloaded file content
190
+ * @param fileInfo - Original file information
191
+ * @returns Processed XML result
192
+ */
193
+ buildProcessedResult(buffer, fileInfo) {
194
+ const content = buffer.toString("utf-8");
195
+ let parsed = null;
196
+ let valid = true;
197
+ let errorMessage;
198
+ // Extract root element
199
+ const rootElement = this.extractRootElement(content);
200
+ try {
201
+ parsed = this.parseXmlSecurely(content);
202
+ }
203
+ catch (error) {
204
+ // This shouldn't happen since we validate, but handle gracefully
205
+ valid = false;
206
+ errorMessage = error instanceof Error ? error.message : "Invalid XML";
207
+ }
208
+ return {
209
+ content,
210
+ parsed,
211
+ valid,
212
+ errorMessage,
213
+ rootElement,
214
+ buffer,
215
+ mimetype: fileInfo.mimetype || "application/xml",
216
+ size: fileInfo.size,
217
+ filename: this.getFilename(fileInfo),
218
+ };
219
+ }
220
+ }
221
+ // =============================================================================
222
+ // SINGLETON INSTANCE
223
+ // =============================================================================
224
+ /** Singleton XML processor instance */
225
+ export const xmlProcessor = new XmlProcessor();
226
+ // =============================================================================
227
+ // UTILITY FUNCTIONS
228
+ // =============================================================================
229
+ /**
230
+ * Check if a file is an XML file based on MIME type or extension.
231
+ *
232
+ * @param mimetype - MIME type of the file
233
+ * @param filename - Filename (for extension-based detection)
234
+ * @returns true if the file is an XML file
235
+ *
236
+ * @example
237
+ * ```typescript
238
+ * if (isXmlFile("application/xml", "data.xml")) {
239
+ * // Process as XML
240
+ * }
241
+ * ```
242
+ */
243
+ export function isXmlFile(mimetype, filename) {
244
+ return xmlProcessor.isFileSupported(mimetype, filename);
245
+ }
246
+ /**
247
+ * Validate XML file size against configured limit.
248
+ *
249
+ * @param sizeBytes - File size in bytes
250
+ * @returns true if size is within the limit
251
+ */
252
+ export function validateXmlSize(sizeBytes) {
253
+ const maxBytes = SIZE_LIMITS_MB.XML_MAX_MB * 1024 * 1024;
254
+ return sizeBytes <= maxBytes;
255
+ }
256
+ /**
257
+ * Process a single XML file with XXE protection.
258
+ *
259
+ * @param fileInfo - File information (with URL or buffer)
260
+ * @param options - Optional processing options (auth headers, timeout, retry config)
261
+ * @returns Processing result with parsed XML or error
262
+ *
263
+ * @example
264
+ * ```typescript
265
+ * const result = await processXml({
266
+ * id: "file-123",
267
+ * name: "data.xml",
268
+ * mimetype: "application/xml",
269
+ * size: 2048,
270
+ * url: "https://example.com/data.xml",
271
+ * }, {
272
+ * authHeaders: { "Authorization": "Bearer token" },
273
+ * });
274
+ *
275
+ * if (result.success && result.data) {
276
+ * console.log("Root:", result.data.rootElement);
277
+ * console.log("Parsed:", result.data.parsed);
278
+ * }
279
+ * ```
280
+ */
281
+ export function processXml(fileInfo, options) {
282
+ return xmlProcessor.processFile(fileInfo, options);
283
+ }
@@ -0,0 +1,163 @@
1
+ /**
2
+ * YAML Processing Utility
3
+ *
4
+ * Handles downloading, validating, and processing YAML files with security.
5
+ *
6
+ * Security Notes:
7
+ * ---------------
8
+ * YAML parsing can be vulnerable to various attacks if not configured securely:
9
+ *
10
+ * 1. **Code Execution via Custom Tags**: YAML supports custom tags like `!!python/object`,
11
+ * `!!ruby/object`, or `!!js/function` that can execute arbitrary code when parsed.
12
+ * We use the 'core' schema which only allows standard YAML types (strings, numbers,
13
+ * booleans, null, arrays, and objects) and explicitly check for dangerous tag patterns.
14
+ *
15
+ * 2. **Billion Laughs Attack (Entity Expansion)**: YAML supports anchors (&) and aliases (*)
16
+ * for referencing content. Malicious YAML can use nested aliases to create exponential
17
+ * expansion (e.g., 10 levels of 10x expansion = 10^10 entities from a small file).
18
+ * We limit `maxAliasCount` to 100 to prevent memory exhaustion.
19
+ *
20
+ * 3. **Denial of Service**: Large or deeply nested YAML files can exhaust memory/CPU.
21
+ * Size limits are enforced by the base processor's maxSizeMB configuration.
22
+ *
23
+ * References:
24
+ * - https://en.wikipedia.org/wiki/Billion_laughs_attack
25
+ * - https://cwe.mitre.org/data/definitions/502.html (Deserialization of Untrusted Data)
26
+ *
27
+ * @module processors/data/YamlProcessor
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * import { yamlProcessor, isYamlFile, processYaml } from "./YamlProcessor.js";
32
+ *
33
+ * // Check if file is YAML
34
+ * if (isYamlFile("application/x-yaml", "config.yaml")) {
35
+ * // Process the file
36
+ * const result = await processYaml(fileInfo);
37
+ * if (result.success && result.data) {
38
+ * console.log("Parsed YAML:", result.data.parsed);
39
+ * console.log("As JSON:", result.data.asJson);
40
+ * }
41
+ * }
42
+ * ```
43
+ */
44
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
45
+ import type { FileInfo, FileProcessingResult, OperationResult, ProcessOptions } from "../base/types.js";
46
+ export type { ProcessedYaml } from "../base/types.js";
47
+ import type { ProcessedYaml } from "../base/types.js";
48
+ /**
49
+ * YAML file processor.
50
+ * Extends BaseFileProcessor with YAML-specific parsing and validation.
51
+ *
52
+ * Uses secure parsing configuration to prevent:
53
+ * - Code execution via custom tags (uses 'core' schema)
54
+ * - Billion laughs attack (limits alias count to 100)
55
+ * - Dangerous custom tag injection (explicit pattern checking)
56
+ *
57
+ * @example
58
+ * ```typescript
59
+ * const processor = new YamlProcessor();
60
+ *
61
+ * const result = await processor.processFile({
62
+ * id: "file-123",
63
+ * name: "config.yaml",
64
+ * mimetype: "application/x-yaml",
65
+ * size: 1024,
66
+ * buffer: yamlBuffer,
67
+ * });
68
+ *
69
+ * if (result.success && result.data?.valid) {
70
+ * console.log("As JSON:", result.data.asJson);
71
+ * }
72
+ * ```
73
+ */
74
+ export declare class YamlProcessor extends BaseFileProcessor<ProcessedYaml> {
75
+ constructor();
76
+ /**
77
+ * Get detected dangerous tags in YAML content.
78
+ *
79
+ * @param content - Raw YAML content string
80
+ * @returns Array of detected dangerous tags (empty if none found)
81
+ */
82
+ private getDetectedDangerousTags;
83
+ /**
84
+ * Parse YAML content securely using strict schema.
85
+ *
86
+ * Security measures:
87
+ * - 'core' schema: Only allows standard YAML types (string, number, boolean, null, array, object)
88
+ * - maxAliasCount: Limits alias expansion to prevent billion laughs attack
89
+ *
90
+ * @param content - Raw YAML content string
91
+ * @returns Parsed YAML content
92
+ */
93
+ private parseYamlSecurely;
94
+ /**
95
+ * Validate downloaded YAML is parseable and safe with structured error result.
96
+ * Checks for dangerous custom tags and validates YAML syntax.
97
+ * Returns user-friendly error messages with actionable suggestions.
98
+ *
99
+ * @param buffer - Downloaded file content
100
+ * @param fileInfo - Original file information
101
+ * @returns Success result or error result
102
+ */
103
+ protected validateDownloadedFileWithResult(buffer: Buffer, fileInfo: FileInfo): Promise<OperationResult<void>>;
104
+ /**
105
+ * Build processed YAML result with parsed content.
106
+ * Uses secure parsing configuration to prevent code execution attacks.
107
+ *
108
+ * @param buffer - Downloaded file content
109
+ * @param fileInfo - Original file information
110
+ * @returns Processed YAML result
111
+ */
112
+ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedYaml;
113
+ }
114
+ /** Singleton YAML processor instance */
115
+ export declare const yamlProcessor: YamlProcessor;
116
+ /**
117
+ * Check if a file is a YAML file based on MIME type or extension.
118
+ *
119
+ * @param mimetype - MIME type of the file
120
+ * @param filename - Filename (for extension-based detection)
121
+ * @returns true if the file is a YAML file
122
+ *
123
+ * @example
124
+ * ```typescript
125
+ * if (isYamlFile("application/x-yaml", "config.yaml")) {
126
+ * // Process as YAML
127
+ * }
128
+ * ```
129
+ */
130
+ export declare function isYamlFile(mimetype: string, filename: string): boolean;
131
+ /**
132
+ * Validate YAML file size against configured limit.
133
+ *
134
+ * @param sizeBytes - File size in bytes
135
+ * @returns true if size is within the limit
136
+ */
137
+ export declare function validateYamlSize(sizeBytes: number): boolean;
138
+ /**
139
+ * Process a single YAML file with security validation.
140
+ *
141
+ * @param fileInfo - File information (with URL or buffer)
142
+ * @param options - Optional processing options (auth headers, timeout, retry config)
143
+ * @returns Processing result with parsed YAML or error
144
+ *
145
+ * @example
146
+ * ```typescript
147
+ * const result = await processYaml({
148
+ * id: "file-123",
149
+ * name: "config.yaml",
150
+ * mimetype: "application/x-yaml",
151
+ * size: 2048,
152
+ * url: "https://example.com/config.yaml",
153
+ * }, {
154
+ * authHeaders: { "Authorization": "Bearer token" },
155
+ * });
156
+ *
157
+ * if (result.success && result.data) {
158
+ * console.log("Parsed:", result.data.parsed);
159
+ * console.log("As JSON:", result.data.asJson);
160
+ * }
161
+ * ```
162
+ */
163
+ export declare function processYaml(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedYaml>>;