@juspay/neurolink 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +1 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/fileDetector.d.ts +7 -1
  113. package/dist/lib/utils/fileDetector.js +91 -18
  114. package/dist/lib/utils/json/extract.d.ts +103 -0
  115. package/dist/lib/utils/json/extract.js +249 -0
  116. package/dist/lib/utils/json/index.d.ts +36 -0
  117. package/dist/lib/utils/json/index.js +37 -0
  118. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  119. package/dist/lib/utils/json/safeParse.js +191 -0
  120. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  121. package/dist/lib/utils/messageBuilder.js +15 -7
  122. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  123. package/dist/lib/utils/sanitizers/filename.js +366 -0
  124. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  125. package/dist/lib/utils/sanitizers/html.js +326 -0
  126. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  127. package/dist/lib/utils/sanitizers/index.js +30 -0
  128. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  129. package/dist/lib/utils/sanitizers/svg.js +483 -0
  130. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  131. package/dist/processors/base/BaseFileProcessor.js +613 -0
  132. package/dist/processors/base/index.d.ts +14 -0
  133. package/dist/processors/base/index.js +19 -0
  134. package/dist/processors/base/types.d.ts +593 -0
  135. package/dist/processors/base/types.js +76 -0
  136. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  137. package/dist/processors/cli/fileProcessorCli.js +388 -0
  138. package/dist/processors/cli/index.d.ts +37 -0
  139. package/dist/processors/cli/index.js +49 -0
  140. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  141. package/dist/processors/code/ConfigProcessor.js +400 -0
  142. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  143. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  144. package/dist/processors/code/index.d.ts +44 -0
  145. package/dist/processors/code/index.js +60 -0
  146. package/dist/processors/config/fileTypes.d.ts +283 -0
  147. package/dist/processors/config/fileTypes.js +520 -0
  148. package/dist/processors/config/index.d.ts +32 -0
  149. package/dist/processors/config/index.js +92 -0
  150. package/dist/processors/config/languageMap.d.ts +66 -0
  151. package/dist/processors/config/languageMap.js +410 -0
  152. package/dist/processors/config/mimeTypes.d.ts +376 -0
  153. package/dist/processors/config/mimeTypes.js +338 -0
  154. package/dist/processors/config/sizeLimits.d.ts +194 -0
  155. package/dist/processors/config/sizeLimits.js +246 -0
  156. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  157. package/dist/processors/data/JsonProcessor.js +203 -0
  158. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  159. package/dist/processors/data/XmlProcessor.js +283 -0
  160. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  161. package/dist/processors/data/YamlProcessor.js +294 -0
  162. package/dist/processors/data/index.d.ts +49 -0
  163. package/dist/processors/data/index.js +76 -0
  164. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  165. package/dist/processors/document/ExcelProcessor.js +519 -0
  166. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  167. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  168. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  169. package/dist/processors/document/RtfProcessor.js +361 -0
  170. package/dist/processors/document/WordProcessor.d.ts +168 -0
  171. package/dist/processors/document/WordProcessor.js +353 -0
  172. package/dist/processors/document/index.d.ts +54 -0
  173. package/dist/processors/document/index.js +90 -0
  174. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  175. package/dist/processors/errors/FileErrorCode.js +255 -0
  176. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  177. package/dist/processors/errors/errorHelpers.js +378 -0
  178. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  179. package/dist/processors/errors/errorSerializer.js +507 -0
  180. package/dist/processors/errors/index.d.ts +46 -0
  181. package/dist/processors/errors/index.js +49 -0
  182. package/dist/processors/index.d.ts +76 -0
  183. package/dist/processors/index.js +112 -0
  184. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  185. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  186. package/dist/processors/integration/index.d.ts +42 -0
  187. package/dist/processors/integration/index.js +44 -0
  188. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  189. package/dist/processors/markup/HtmlProcessor.js +249 -0
  190. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  191. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  192. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  193. package/dist/processors/markup/SvgProcessor.js +240 -0
  194. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  195. package/dist/processors/markup/TextProcessor.js +188 -0
  196. package/dist/processors/markup/index.d.ts +66 -0
  197. package/dist/processors/markup/index.js +102 -0
  198. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  199. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  200. package/dist/processors/registry/index.d.ts +12 -0
  201. package/dist/processors/registry/index.js +16 -0
  202. package/dist/processors/registry/types.d.ts +53 -0
  203. package/dist/processors/registry/types.js +10 -0
  204. package/dist/server/utils/validation.d.ts +6 -6
  205. package/dist/types/fileTypes.d.ts +1 -1
  206. package/dist/types/index.d.ts +25 -24
  207. package/dist/types/index.js +21 -20
  208. package/dist/types/modelTypes.d.ts +10 -10
  209. package/dist/types/pptTypes.d.ts +14 -2
  210. package/dist/types/pptTypes.js +16 -0
  211. package/dist/utils/async/delay.d.ts +40 -0
  212. package/dist/utils/async/delay.js +42 -0
  213. package/dist/utils/async/index.d.ts +23 -0
  214. package/dist/utils/async/index.js +23 -0
  215. package/dist/utils/async/retry.d.ts +141 -0
  216. package/dist/utils/async/retry.js +171 -0
  217. package/dist/utils/async/withTimeout.d.ts +73 -0
  218. package/dist/utils/async/withTimeout.js +96 -0
  219. package/dist/utils/fileDetector.d.ts +7 -1
  220. package/dist/utils/fileDetector.js +91 -18
  221. package/dist/utils/json/extract.d.ts +103 -0
  222. package/dist/utils/json/extract.js +248 -0
  223. package/dist/utils/json/index.d.ts +36 -0
  224. package/dist/utils/json/index.js +36 -0
  225. package/dist/utils/json/safeParse.d.ts +137 -0
  226. package/dist/utils/json/safeParse.js +190 -0
  227. package/dist/utils/messageBuilder.d.ts +2 -2
  228. package/dist/utils/messageBuilder.js +15 -7
  229. package/dist/utils/sanitizers/filename.d.ts +137 -0
  230. package/dist/utils/sanitizers/filename.js +365 -0
  231. package/dist/utils/sanitizers/html.d.ts +170 -0
  232. package/dist/utils/sanitizers/html.js +325 -0
  233. package/dist/utils/sanitizers/index.d.ts +26 -0
  234. package/dist/utils/sanitizers/index.js +29 -0
  235. package/dist/utils/sanitizers/svg.d.ts +81 -0
  236. package/dist/utils/sanitizers/svg.js +482 -0
  237. package/package.json +2 -2
@@ -0,0 +1,294 @@
1
+ /**
2
+ * YAML Processing Utility
3
+ *
4
+ * Handles downloading, validating, and processing YAML files with security.
5
+ *
6
+ * Security Notes:
7
+ * ---------------
8
+ * YAML parsing can be vulnerable to various attacks if not configured securely:
9
+ *
10
+ * 1. **Code Execution via Custom Tags**: YAML supports custom tags like `!!python/object`,
11
+ * `!!ruby/object`, or `!!js/function` that can execute arbitrary code when parsed.
12
+ * We use the 'core' schema which only allows standard YAML types (strings, numbers,
13
+ * booleans, null, arrays, and objects) and explicitly check for dangerous tag patterns.
14
+ *
15
+ * 2. **Billion Laughs Attack (Entity Expansion)**: YAML supports anchors (&) and aliases (*)
16
+ * for referencing content. Malicious YAML can use nested aliases to create exponential
17
+ * expansion (e.g., 10 levels of 10x expansion = 10^10 entities from a small file).
18
+ * We limit `maxAliasCount` to 100 to prevent memory exhaustion.
19
+ *
20
+ * 3. **Denial of Service**: Large or deeply nested YAML files can exhaust memory/CPU.
21
+ * Size limits are enforced by the base processor's maxSizeMB configuration.
22
+ *
23
+ * References:
24
+ * - https://en.wikipedia.org/wiki/Billion_laughs_attack
25
+ * - https://cwe.mitre.org/data/definitions/502.html (Deserialization of Untrusted Data)
26
+ *
27
+ * @module processors/data/YamlProcessor
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * import { yamlProcessor, isYamlFile, processYaml } from "./YamlProcessor.js";
32
+ *
33
+ * // Check if file is YAML
34
+ * if (isYamlFile("application/x-yaml", "config.yaml")) {
35
+ * // Process the file
36
+ * const result = await processYaml(fileInfo);
37
+ * if (result.success && result.data) {
38
+ * console.log("Parsed YAML:", result.data.parsed);
39
+ * console.log("As JSON:", result.data.asJson);
40
+ * }
41
+ * }
42
+ * ```
43
+ */
44
+ import { createRequire } from "node:module";
45
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
46
+ import { SIZE_LIMITS_MB } from "../config/index.js";
47
+ import { createFileError, FileErrorCode } from "../errors/index.js";
48
+ const require = createRequire(import.meta.url);
49
+ // =============================================================================
50
+ // CONSTANTS
51
+ // =============================================================================
52
+ /** Supported YAML MIME types */
53
+ const SUPPORTED_YAML_TYPES = ["application/x-yaml", "text/yaml", "text/x-yaml"];
54
+ /** Supported YAML file extensions */
55
+ const SUPPORTED_YAML_EXTENSIONS = [".yaml", ".yml"];
56
+ /**
57
+ * Dangerous YAML custom tags that can execute code.
58
+ * These patterns indicate potential security threats and should be rejected.
59
+ */
60
+ const YAML_DANGEROUS_TAGS = [
61
+ // Python code execution
62
+ "!!python/object",
63
+ "!!python/object/apply",
64
+ "!!python/object/new",
65
+ "!!python/name",
66
+ "!!python/module",
67
+ // Ruby code execution
68
+ "!!ruby/object",
69
+ "!!ruby/hash",
70
+ "!!ruby/struct",
71
+ "!!ruby/sym",
72
+ // JavaScript code execution
73
+ "!!js/function",
74
+ "!!js/undefined",
75
+ // General dangerous patterns
76
+ "!!perl/",
77
+ "!!php/",
78
+ "!!java/",
79
+ ];
80
+ // =============================================================================
81
+ // YAML PROCESSOR CLASS
82
+ // =============================================================================
83
+ /**
84
+ * YAML file processor.
85
+ * Extends BaseFileProcessor with YAML-specific parsing and validation.
86
+ *
87
+ * Uses secure parsing configuration to prevent:
88
+ * - Code execution via custom tags (uses 'core' schema)
89
+ * - Billion laughs attack (limits alias count to 100)
90
+ * - Dangerous custom tag injection (explicit pattern checking)
91
+ *
92
+ * @example
93
+ * ```typescript
94
+ * const processor = new YamlProcessor();
95
+ *
96
+ * const result = await processor.processFile({
97
+ * id: "file-123",
98
+ * name: "config.yaml",
99
+ * mimetype: "application/x-yaml",
100
+ * size: 1024,
101
+ * buffer: yamlBuffer,
102
+ * });
103
+ *
104
+ * if (result.success && result.data?.valid) {
105
+ * console.log("As JSON:", result.data.asJson);
106
+ * }
107
+ * ```
108
+ */
109
+ export class YamlProcessor extends BaseFileProcessor {
110
+ constructor() {
111
+ super({
112
+ maxSizeMB: SIZE_LIMITS_MB.YAML_MAX_MB,
113
+ timeoutMs: 30000,
114
+ supportedMimeTypes: SUPPORTED_YAML_TYPES,
115
+ supportedExtensions: SUPPORTED_YAML_EXTENSIONS,
116
+ fileTypeName: "YAML",
117
+ defaultFilename: "config.yaml",
118
+ });
119
+ }
120
+ /**
121
+ * Get detected dangerous tags in YAML content.
122
+ *
123
+ * @param content - Raw YAML content string
124
+ * @returns Array of detected dangerous tags (empty if none found)
125
+ */
126
+ getDetectedDangerousTags(content) {
127
+ return YAML_DANGEROUS_TAGS.filter((pattern) => content.includes(pattern));
128
+ }
129
+ /**
130
+ * Parse YAML content securely using strict schema.
131
+ *
132
+ * Security measures:
133
+ * - 'core' schema: Only allows standard YAML types (string, number, boolean, null, array, object)
134
+ * - maxAliasCount: Limits alias expansion to prevent billion laughs attack
135
+ *
136
+ * @param content - Raw YAML content string
137
+ * @returns Parsed YAML content
138
+ */
139
+ parseYamlSecurely(content) {
140
+ // Dynamically import js-yaml to parse YAML securely
141
+ const yaml = require("js-yaml");
142
+ return yaml.load(content, {
143
+ schema: yaml.CORE_SCHEMA, // Only allow standard YAML types, no custom tags
144
+ // Prevent billion laughs attack via alias expansion
145
+ // Note: js-yaml doesn't have maxAliasCount, but using CORE_SCHEMA + size limits provides protection
146
+ });
147
+ }
148
+ /**
149
+ * Validate downloaded YAML is parseable and safe with structured error result.
150
+ * Checks for dangerous custom tags and validates YAML syntax.
151
+ * Returns user-friendly error messages with actionable suggestions.
152
+ *
153
+ * @param buffer - Downloaded file content
154
+ * @param fileInfo - Original file information
155
+ * @returns Success result or error result
156
+ */
157
+ async validateDownloadedFileWithResult(buffer, fileInfo) {
158
+ try {
159
+ const content = buffer.toString("utf-8");
160
+ // Check for potentially dangerous YAML constructs before parsing
161
+ const detectedTags = this.getDetectedDangerousTags(content);
162
+ if (detectedTags.length > 0) {
163
+ const error = createFileError(FileErrorCode.CODE_EXECUTION_DETECTED, {
164
+ fileType: "YAML",
165
+ detectedTags: detectedTags.join(", "),
166
+ filename: fileInfo.name,
167
+ });
168
+ return {
169
+ success: false,
170
+ error: {
171
+ code: error.code,
172
+ message: error.message,
173
+ userMessage: error.userMessage,
174
+ details: error.details,
175
+ },
176
+ };
177
+ }
178
+ // Parse with secure configuration
179
+ this.parseYamlSecurely(content);
180
+ return { success: true, data: undefined };
181
+ }
182
+ catch (error) {
183
+ const fileError = createFileError(FileErrorCode.PARSING_FAILED, { fileType: "YAML" }, error instanceof Error ? error : undefined);
184
+ return {
185
+ success: false,
186
+ error: {
187
+ code: fileError.code,
188
+ message: fileError.message,
189
+ userMessage: fileError.userMessage,
190
+ details: fileError.details,
191
+ },
192
+ };
193
+ }
194
+ }
195
+ /**
196
+ * Build processed YAML result with parsed content.
197
+ * Uses secure parsing configuration to prevent code execution attacks.
198
+ *
199
+ * @param buffer - Downloaded file content
200
+ * @param fileInfo - Original file information
201
+ * @returns Processed YAML result
202
+ */
203
+ buildProcessedResult(buffer, fileInfo) {
204
+ const content = buffer.toString("utf-8");
205
+ let parsed = null;
206
+ let valid = true;
207
+ let errorMessage;
208
+ let asJson = null;
209
+ try {
210
+ // Use secure parsing - validation already passed, but maintain consistent security
211
+ parsed = this.parseYamlSecurely(content);
212
+ asJson = JSON.stringify(parsed, null, 2);
213
+ }
214
+ catch (error) {
215
+ // This shouldn't happen since we validate, but handle gracefully
216
+ valid = false;
217
+ errorMessage = error instanceof Error ? error.message : "Invalid YAML";
218
+ }
219
+ return {
220
+ content,
221
+ parsed,
222
+ valid,
223
+ errorMessage,
224
+ asJson,
225
+ buffer,
226
+ mimetype: fileInfo.mimetype || "application/x-yaml",
227
+ size: fileInfo.size,
228
+ filename: this.getFilename(fileInfo),
229
+ };
230
+ }
231
+ }
232
+ // =============================================================================
233
+ // SINGLETON INSTANCE
234
+ // =============================================================================
235
+ /** Singleton YAML processor instance */
236
+ export const yamlProcessor = new YamlProcessor();
237
+ // =============================================================================
238
+ // UTILITY FUNCTIONS
239
+ // =============================================================================
240
+ /**
241
+ * Check if a file is a YAML file based on MIME type or extension.
242
+ *
243
+ * @param mimetype - MIME type of the file
244
+ * @param filename - Filename (for extension-based detection)
245
+ * @returns true if the file is a YAML file
246
+ *
247
+ * @example
248
+ * ```typescript
249
+ * if (isYamlFile("application/x-yaml", "config.yaml")) {
250
+ * // Process as YAML
251
+ * }
252
+ * ```
253
+ */
254
+ export function isYamlFile(mimetype, filename) {
255
+ return yamlProcessor.isFileSupported(mimetype, filename);
256
+ }
257
+ /**
258
+ * Validate YAML file size against configured limit.
259
+ *
260
+ * @param sizeBytes - File size in bytes
261
+ * @returns true if size is within the limit
262
+ */
263
+ export function validateYamlSize(sizeBytes) {
264
+ const maxBytes = SIZE_LIMITS_MB.YAML_MAX_MB * 1024 * 1024;
265
+ return sizeBytes <= maxBytes;
266
+ }
267
+ /**
268
+ * Process a single YAML file with security validation.
269
+ *
270
+ * @param fileInfo - File information (with URL or buffer)
271
+ * @param options - Optional processing options (auth headers, timeout, retry config)
272
+ * @returns Processing result with parsed YAML or error
273
+ *
274
+ * @example
275
+ * ```typescript
276
+ * const result = await processYaml({
277
+ * id: "file-123",
278
+ * name: "config.yaml",
279
+ * mimetype: "application/x-yaml",
280
+ * size: 2048,
281
+ * url: "https://example.com/config.yaml",
282
+ * }, {
283
+ * authHeaders: { "Authorization": "Bearer token" },
284
+ * });
285
+ *
286
+ * if (result.success && result.data) {
287
+ * console.log("Parsed:", result.data.parsed);
288
+ * console.log("As JSON:", result.data.asJson);
289
+ * }
290
+ * ```
291
+ */
292
+ export function processYaml(fileInfo, options) {
293
+ return yamlProcessor.processFile(fileInfo, options);
294
+ }
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Data Processors Module
3
+ *
4
+ * Provides processors for structured data formats (JSON, YAML, XML).
5
+ * All processors include security validation and provide parsed content
6
+ * for easy integration with AI models.
7
+ *
8
+ * @module processors/data
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import {
13
+ * // JSON processing
14
+ * jsonProcessor,
15
+ * isJsonFile,
16
+ * processJson,
17
+ * type ProcessedJson,
18
+ *
19
+ * // YAML processing (with security)
20
+ * yamlProcessor,
21
+ * isYamlFile,
22
+ * processYaml,
23
+ * type ProcessedYaml,
24
+ *
25
+ * // XML processing (with XXE protection)
26
+ * xmlProcessor,
27
+ * isXmlFile,
28
+ * processXml,
29
+ * type ProcessedXml,
30
+ * } from "./data/index.js";
31
+ *
32
+ * // Auto-detect and process
33
+ * async function processDataFile(fileInfo: FileInfo) {
34
+ * if (isJsonFile(fileInfo.mimetype, fileInfo.name)) {
35
+ * return processJson(fileInfo);
36
+ * }
37
+ * if (isYamlFile(fileInfo.mimetype, fileInfo.name)) {
38
+ * return processYaml(fileInfo);
39
+ * }
40
+ * if (isXmlFile(fileInfo.mimetype, fileInfo.name)) {
41
+ * return processXml(fileInfo);
42
+ * }
43
+ * throw new Error("Unsupported data format");
44
+ * }
45
+ * ```
46
+ */
47
+ export { isJsonFile, JsonProcessor, jsonProcessor, type ProcessedJson, processJson, validateJsonSize, } from "./JsonProcessor.js";
48
+ export { isYamlFile, type ProcessedYaml, processYaml, validateYamlSize, YamlProcessor, yamlProcessor, } from "./YamlProcessor.js";
49
+ export { isXmlFile, type ProcessedXml, processXml, validateXmlSize, XmlProcessor, xmlProcessor, } from "./XmlProcessor.js";
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Data Processors Module
3
+ *
4
+ * Provides processors for structured data formats (JSON, YAML, XML).
5
+ * All processors include security validation and provide parsed content
6
+ * for easy integration with AI models.
7
+ *
8
+ * @module processors/data
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * import {
13
+ * // JSON processing
14
+ * jsonProcessor,
15
+ * isJsonFile,
16
+ * processJson,
17
+ * type ProcessedJson,
18
+ *
19
+ * // YAML processing (with security)
20
+ * yamlProcessor,
21
+ * isYamlFile,
22
+ * processYaml,
23
+ * type ProcessedYaml,
24
+ *
25
+ * // XML processing (with XXE protection)
26
+ * xmlProcessor,
27
+ * isXmlFile,
28
+ * processXml,
29
+ * type ProcessedXml,
30
+ * } from "./data/index.js";
31
+ *
32
+ * // Auto-detect and process
33
+ * async function processDataFile(fileInfo: FileInfo) {
34
+ * if (isJsonFile(fileInfo.mimetype, fileInfo.name)) {
35
+ * return processJson(fileInfo);
36
+ * }
37
+ * if (isYamlFile(fileInfo.mimetype, fileInfo.name)) {
38
+ * return processYaml(fileInfo);
39
+ * }
40
+ * if (isXmlFile(fileInfo.mimetype, fileInfo.name)) {
41
+ * return processXml(fileInfo);
42
+ * }
43
+ * throw new Error("Unsupported data format");
44
+ * }
45
+ * ```
46
+ */
47
+ // =============================================================================
48
+ // JSON PROCESSOR
49
+ // =============================================================================
50
+ export {
51
+ // Utility functions
52
+ isJsonFile,
53
+ // Class
54
+ JsonProcessor,
55
+ // Singleton
56
+ jsonProcessor, processJson, validateJsonSize, } from "./JsonProcessor.js";
57
+ // =============================================================================
58
+ // YAML PROCESSOR
59
+ // =============================================================================
60
+ export {
61
+ // Utility functions
62
+ isYamlFile, processYaml, validateYamlSize,
63
+ // Class
64
+ YamlProcessor,
65
+ // Singleton
66
+ yamlProcessor, } from "./YamlProcessor.js";
67
+ // =============================================================================
68
+ // XML PROCESSOR
69
+ // =============================================================================
70
+ export {
71
+ // Utility functions
72
+ isXmlFile, processXml, validateXmlSize,
73
+ // Class
74
+ XmlProcessor,
75
+ // Singleton
76
+ xmlProcessor, } from "./XmlProcessor.js";
@@ -0,0 +1,238 @@
1
+ /**
2
+ * Excel Processor
3
+ *
4
+ * Handles downloading, validating, and processing Excel files (.xlsx, .xls).
5
+ * Uses exceljs library for parsing with streaming support for large files.
6
+ *
7
+ * Key features:
8
+ * - Supports both .xlsx and legacy .xls formats
9
+ * - Extracts worksheet data with headers
10
+ * - Handles complex cell types (formulas, rich text, dates)
11
+ * - Respects configurable row and sheet limits
12
+ * - Provides truncation metadata when limits are exceeded
13
+ *
14
+ * @module processors/document/ExcelProcessor
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * import { excelProcessor, processExcel, isExcelFile } from "./ExcelProcessor.js";
19
+ *
20
+ * // Check if a file is an Excel file
21
+ * if (isExcelFile(fileInfo.mimetype, fileInfo.name)) {
22
+ * // Process the Excel file
23
+ * const result = await processExcel(fileInfo, {
24
+ * authHeaders: { Authorization: "Bearer token" },
25
+ * });
26
+ *
27
+ * if (result.success) {
28
+ * console.log(`Processed ${result.data.sheetCount} sheets`);
29
+ * console.log(`Total rows: ${result.data.totalRows}`);
30
+ *
31
+ * for (const sheet of result.data.worksheets) {
32
+ * console.log(`Sheet: ${sheet.name}, Rows: ${sheet.rowCount}`);
33
+ * }
34
+ * }
35
+ * }
36
+ * ```
37
+ */
38
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
39
+ import type { FileInfo, FileProcessingResult, ProcessOptions } from "../base/types.js";
40
+ export type { ExcelWorksheet, ProcessedExcel } from "../base/types.js";
41
+ import type { ProcessedExcel } from "../base/types.js";
42
+ /**
43
+ * Excel Processor - handles .xlsx and .xls files.
44
+ * Uses exceljs library for parsing with support for large files.
45
+ *
46
+ * Features:
47
+ * - ZIP format validation (XLSX files are ZIP archives)
48
+ * - Sheet count limiting (MAX_EXCEL_SHEETS)
49
+ * - Row count limiting per sheet (MAX_EXCEL_ROWS)
50
+ * - Cell type handling (text, numbers, formulas, dates, rich text)
51
+ *
52
+ * @example
53
+ * ```typescript
54
+ * const processor = new ExcelProcessor();
55
+ *
56
+ * // Process a file
57
+ * const result = await processor.processFile(fileInfo, {
58
+ * authHeaders: { Authorization: "Bearer token" },
59
+ * });
60
+ *
61
+ * if (result.success) {
62
+ * console.log(`Sheets: ${result.data.sheetCount}`);
63
+ * console.log(`Truncated: ${result.data.truncated}`);
64
+ * }
65
+ * ```
66
+ */
67
+ export declare class ExcelProcessor extends BaseFileProcessor<ProcessedExcel> {
68
+ constructor();
69
+ /**
70
+ * Validate downloaded Excel file has correct format.
71
+ * XLSX files are ZIP archives starting with PK signature.
72
+ *
73
+ * @param buffer - Downloaded file content
74
+ * @param _fileInfo - Original file information (unused but required by interface)
75
+ * @returns null if valid, error message if invalid
76
+ */
77
+ protected validateDownloadedFile(buffer: Buffer, _fileInfo: FileInfo): Promise<string | null>;
78
+ /**
79
+ * Build processed result stub.
80
+ * Note: This is a synchronous stub - actual parsing happens in processFile override.
81
+ *
82
+ * @param buffer - Downloaded file content
83
+ * @param fileInfo - Original file information
84
+ * @returns Empty ProcessedExcel structure (populated by processFile)
85
+ */
86
+ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedExcel;
87
+ /**
88
+ * Override processFile for async Excel parsing with exceljs.
89
+ * This override is necessary because exceljs uses async parsing.
90
+ *
91
+ * @param fileInfo - File information (can include URL or buffer)
92
+ * @param options - Optional processing options (auth headers, timeout, etc.)
93
+ * @returns Processing result with parsed Excel data or error
94
+ */
95
+ processFile(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedExcel>>;
96
+ /**
97
+ * Parse Excel buffer into workbook using exceljs.
98
+ *
99
+ * @param buffer - Excel file content
100
+ * @returns Parsed ExcelJS Workbook
101
+ */
102
+ private parseWorkbook;
103
+ /**
104
+ * Extract worksheet data from workbook with row and sheet limits.
105
+ *
106
+ * @param workbook - Parsed ExcelJS Workbook
107
+ * @returns Extracted worksheets with truncation metadata
108
+ */
109
+ private extractWorksheets;
110
+ /**
111
+ * Convert an Excel cell value to a primitive type.
112
+ * Handles various cell types including formulas, rich text, and dates.
113
+ *
114
+ * @param cell - ExcelJS cell value (can be various types)
115
+ * @returns Primitive value (string, number, boolean, or null)
116
+ */
117
+ private getCellValue;
118
+ /**
119
+ * Extract text from rich text cell format.
120
+ * Rich text cells contain an array of text fragments with formatting.
121
+ *
122
+ * @param richText - Array of rich text fragments
123
+ * @returns Concatenated plain text
124
+ */
125
+ private extractRichText;
126
+ }
127
+ /**
128
+ * Singleton Excel processor instance.
129
+ * Use this for standard Excel processing operations.
130
+ *
131
+ * @example
132
+ * ```typescript
133
+ * import { excelProcessor } from "./ExcelProcessor.js";
134
+ *
135
+ * const result = await excelProcessor.processFile(fileInfo);
136
+ * ```
137
+ */
138
+ export declare const excelProcessor: ExcelProcessor;
139
+ /**
140
+ * Check if a file is an Excel file.
141
+ * Matches by MIME type or file extension.
142
+ *
143
+ * @param mimetype - MIME type of the file
144
+ * @param filename - Filename (for extension-based detection)
145
+ * @returns true if the file is an Excel file
146
+ *
147
+ * @example
148
+ * ```typescript
149
+ * if (isExcelFile("application/vnd.ms-excel", "data.xls")) {
150
+ * // Process as Excel
151
+ * }
152
+ *
153
+ * if (isExcelFile("", "report.xlsx")) {
154
+ * // Also matches by extension
155
+ * }
156
+ * ```
157
+ */
158
+ export declare function isExcelFile(mimetype: string, filename: string): boolean;
159
+ /**
160
+ * Validate Excel file size against configured limit.
161
+ *
162
+ * @param sizeBytes - File size in bytes
163
+ * @returns true if size is within the Excel file limit
164
+ *
165
+ * @example
166
+ * ```typescript
167
+ * if (!validateExcelSize(fileInfo.size)) {
168
+ * console.error(`File too large: max ${SIZE_LIMITS.EXCEL_MAX_MB}MB`);
169
+ * }
170
+ * ```
171
+ */
172
+ export declare function validateExcelSize(sizeBytes: number): boolean;
173
+ /**
174
+ * Process a single Excel file.
175
+ * Convenience function that uses the singleton processor.
176
+ *
177
+ * @param fileInfo - File information (can include URL or buffer)
178
+ * @param options - Optional processing options (auth headers, timeout, etc.)
179
+ * @returns Processing result with parsed Excel data or error
180
+ *
181
+ * @example
182
+ * ```typescript
183
+ * import { processExcel } from "./ExcelProcessor.js";
184
+ *
185
+ * const result = await processExcel(fileInfo, {
186
+ * authHeaders: { Authorization: "Bearer token" },
187
+ * timeout: 120000, // 2 minutes for large files
188
+ * });
189
+ *
190
+ * if (result.success) {
191
+ * const { worksheets, totalRows, truncated } = result.data;
192
+ * console.log(`Extracted ${totalRows} rows from ${worksheets.length} sheets`);
193
+ *
194
+ * if (truncated) {
195
+ * console.warn("Some data was truncated due to size limits");
196
+ * }
197
+ * } else {
198
+ * console.error(`Processing failed: ${result.error?.userMessage}`);
199
+ * }
200
+ * ```
201
+ */
202
+ export declare function processExcel(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedExcel>>;
203
+ /**
204
+ * Get Excel max size in MB.
205
+ *
206
+ * @returns Maximum Excel file size in megabytes
207
+ *
208
+ * @example
209
+ * ```typescript
210
+ * const maxSize = getExcelMaxSizeMB(); // 10
211
+ * console.log(`Maximum Excel file size: ${maxSize}MB`);
212
+ * ```
213
+ */
214
+ export declare function getExcelMaxSizeMB(): number;
215
+ /**
216
+ * Get Excel max rows per sheet.
217
+ *
218
+ * @returns Maximum rows to process per worksheet
219
+ *
220
+ * @example
221
+ * ```typescript
222
+ * const maxRows = getExcelMaxRows(); // 5000
223
+ * console.log(`Maximum rows per sheet: ${maxRows}`);
224
+ * ```
225
+ */
226
+ export declare function getExcelMaxRows(): number;
227
+ /**
228
+ * Get Excel max sheets to process.
229
+ *
230
+ * @returns Maximum number of worksheets to process
231
+ *
232
+ * @example
233
+ * ```typescript
234
+ * const maxSheets = getExcelMaxSheets(); // 10
235
+ * console.log(`Maximum sheets to process: ${maxSheets}`);
236
+ * ```
237
+ */
238
+ export declare function getExcelMaxSheets(): number;