@juspay/neurolink 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +1 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/fileDetector.d.ts +7 -1
  113. package/dist/lib/utils/fileDetector.js +91 -18
  114. package/dist/lib/utils/json/extract.d.ts +103 -0
  115. package/dist/lib/utils/json/extract.js +249 -0
  116. package/dist/lib/utils/json/index.d.ts +36 -0
  117. package/dist/lib/utils/json/index.js +37 -0
  118. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  119. package/dist/lib/utils/json/safeParse.js +191 -0
  120. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  121. package/dist/lib/utils/messageBuilder.js +15 -7
  122. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  123. package/dist/lib/utils/sanitizers/filename.js +366 -0
  124. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  125. package/dist/lib/utils/sanitizers/html.js +326 -0
  126. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  127. package/dist/lib/utils/sanitizers/index.js +30 -0
  128. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  129. package/dist/lib/utils/sanitizers/svg.js +483 -0
  130. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  131. package/dist/processors/base/BaseFileProcessor.js +613 -0
  132. package/dist/processors/base/index.d.ts +14 -0
  133. package/dist/processors/base/index.js +19 -0
  134. package/dist/processors/base/types.d.ts +593 -0
  135. package/dist/processors/base/types.js +76 -0
  136. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  137. package/dist/processors/cli/fileProcessorCli.js +388 -0
  138. package/dist/processors/cli/index.d.ts +37 -0
  139. package/dist/processors/cli/index.js +49 -0
  140. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  141. package/dist/processors/code/ConfigProcessor.js +400 -0
  142. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  143. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  144. package/dist/processors/code/index.d.ts +44 -0
  145. package/dist/processors/code/index.js +60 -0
  146. package/dist/processors/config/fileTypes.d.ts +283 -0
  147. package/dist/processors/config/fileTypes.js +520 -0
  148. package/dist/processors/config/index.d.ts +32 -0
  149. package/dist/processors/config/index.js +92 -0
  150. package/dist/processors/config/languageMap.d.ts +66 -0
  151. package/dist/processors/config/languageMap.js +410 -0
  152. package/dist/processors/config/mimeTypes.d.ts +376 -0
  153. package/dist/processors/config/mimeTypes.js +338 -0
  154. package/dist/processors/config/sizeLimits.d.ts +194 -0
  155. package/dist/processors/config/sizeLimits.js +246 -0
  156. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  157. package/dist/processors/data/JsonProcessor.js +203 -0
  158. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  159. package/dist/processors/data/XmlProcessor.js +283 -0
  160. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  161. package/dist/processors/data/YamlProcessor.js +294 -0
  162. package/dist/processors/data/index.d.ts +49 -0
  163. package/dist/processors/data/index.js +76 -0
  164. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  165. package/dist/processors/document/ExcelProcessor.js +519 -0
  166. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  167. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  168. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  169. package/dist/processors/document/RtfProcessor.js +361 -0
  170. package/dist/processors/document/WordProcessor.d.ts +168 -0
  171. package/dist/processors/document/WordProcessor.js +353 -0
  172. package/dist/processors/document/index.d.ts +54 -0
  173. package/dist/processors/document/index.js +90 -0
  174. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  175. package/dist/processors/errors/FileErrorCode.js +255 -0
  176. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  177. package/dist/processors/errors/errorHelpers.js +378 -0
  178. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  179. package/dist/processors/errors/errorSerializer.js +507 -0
  180. package/dist/processors/errors/index.d.ts +46 -0
  181. package/dist/processors/errors/index.js +49 -0
  182. package/dist/processors/index.d.ts +76 -0
  183. package/dist/processors/index.js +112 -0
  184. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  185. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  186. package/dist/processors/integration/index.d.ts +42 -0
  187. package/dist/processors/integration/index.js +44 -0
  188. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  189. package/dist/processors/markup/HtmlProcessor.js +249 -0
  190. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  191. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  192. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  193. package/dist/processors/markup/SvgProcessor.js +240 -0
  194. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  195. package/dist/processors/markup/TextProcessor.js +188 -0
  196. package/dist/processors/markup/index.d.ts +66 -0
  197. package/dist/processors/markup/index.js +102 -0
  198. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  199. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  200. package/dist/processors/registry/index.d.ts +12 -0
  201. package/dist/processors/registry/index.js +16 -0
  202. package/dist/processors/registry/types.d.ts +53 -0
  203. package/dist/processors/registry/types.js +10 -0
  204. package/dist/server/utils/validation.d.ts +6 -6
  205. package/dist/types/fileTypes.d.ts +1 -1
  206. package/dist/types/index.d.ts +25 -24
  207. package/dist/types/index.js +21 -20
  208. package/dist/types/modelTypes.d.ts +10 -10
  209. package/dist/types/pptTypes.d.ts +14 -2
  210. package/dist/types/pptTypes.js +16 -0
  211. package/dist/utils/async/delay.d.ts +40 -0
  212. package/dist/utils/async/delay.js +42 -0
  213. package/dist/utils/async/index.d.ts +23 -0
  214. package/dist/utils/async/index.js +23 -0
  215. package/dist/utils/async/retry.d.ts +141 -0
  216. package/dist/utils/async/retry.js +171 -0
  217. package/dist/utils/async/withTimeout.d.ts +73 -0
  218. package/dist/utils/async/withTimeout.js +96 -0
  219. package/dist/utils/fileDetector.d.ts +7 -1
  220. package/dist/utils/fileDetector.js +91 -18
  221. package/dist/utils/json/extract.d.ts +103 -0
  222. package/dist/utils/json/extract.js +248 -0
  223. package/dist/utils/json/index.d.ts +36 -0
  224. package/dist/utils/json/index.js +36 -0
  225. package/dist/utils/json/safeParse.d.ts +137 -0
  226. package/dist/utils/json/safeParse.js +190 -0
  227. package/dist/utils/messageBuilder.d.ts +2 -2
  228. package/dist/utils/messageBuilder.js +15 -7
  229. package/dist/utils/sanitizers/filename.d.ts +137 -0
  230. package/dist/utils/sanitizers/filename.js +365 -0
  231. package/dist/utils/sanitizers/html.d.ts +170 -0
  232. package/dist/utils/sanitizers/html.js +325 -0
  233. package/dist/utils/sanitizers/index.d.ts +26 -0
  234. package/dist/utils/sanitizers/index.js +29 -0
  235. package/dist/utils/sanitizers/svg.d.ts +81 -0
  236. package/dist/utils/sanitizers/svg.js +482 -0
  237. package/package.json +2 -2
@@ -0,0 +1,613 @@
1
+ /**
2
+ * Base File Processor Abstract Class
3
+ *
4
+ * Provides common functionality for downloading, validating, and processing files
5
+ * from any source (URLs, buffers, cloud storage, etc.)
6
+ *
7
+ * This class uses the Template Method pattern to provide a consistent processing
8
+ * pipeline while allowing subclasses to customize specific steps.
9
+ *
10
+ * Key features:
11
+ * - Support for both URL downloads and direct buffer input
12
+ * - Configurable retry with exponential backoff
13
+ * - Gzip decompression support
14
+ * - Structured error handling with user-friendly messages
15
+ * - File type validation by MIME type and extension
16
+ * - Size limit enforcement
17
+ *
18
+ * @module processors/base/BaseFileProcessor
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * class ImageProcessor extends BaseFileProcessor<ProcessedImage> {
23
+ * constructor() {
24
+ * super({
25
+ * maxSizeMB: 10,
26
+ * timeoutMs: 30000,
27
+ * supportedMimeTypes: ['image/jpeg', 'image/png'],
28
+ * supportedExtensions: ['.jpg', '.jpeg', '.png'],
29
+ * fileTypeName: 'image',
30
+ * defaultFilename: 'image.jpg',
31
+ * });
32
+ * }
33
+ *
34
+ * protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedImage {
35
+ * return {
36
+ * buffer,
37
+ * mimetype: fileInfo.mimetype,
38
+ * size: buffer.length,
39
+ * filename: this.getFilename(fileInfo),
40
+ * // ... additional image-specific fields
41
+ * };
42
+ * }
43
+ * }
44
+ * ```
45
+ */
46
+ import { promisify } from "util";
47
+ import { gunzip } from "zlib";
48
+ import { SIZE_LIMITS } from "../config/index.js";
49
+ import { createFileError, extractHttpStatus, FileErrorCode, isRetryableError, } from "../errors/index.js";
50
+ import { DEFAULT_RETRY_CONFIG } from "./types.js";
51
+ const gunzipAsync = promisify(gunzip);
52
+ /**
53
+ * Abstract base class for file processors.
54
+ * Provides common download, validation, and error handling functionality.
55
+ *
56
+ * @typeParam T - The type of processed result, must extend ProcessedFileBase
57
+ */
58
+ export class BaseFileProcessor {
59
+ /** Processor configuration */
60
+ config;
61
+ /**
62
+ * Creates a new file processor with the given configuration.
63
+ *
64
+ * @param config - Processor configuration
65
+ */
66
+ constructor(config) {
67
+ this.config = config;
68
+ }
69
+ /**
70
+ * Get the processor configuration.
71
+ * Provides read-only access to processor config for external consumers
72
+ * (e.g., ProcessorRegistry, FileProcessorIntegration) without requiring
73
+ * unsafe casts to access the protected field.
74
+ *
75
+ * @returns Readonly processor configuration
76
+ */
77
+ getConfig() {
78
+ return this.config;
79
+ }
80
+ // ===========================================================================
81
+ // PUBLIC API
82
+ // ===========================================================================
83
+ /**
84
+ * Process a single file.
85
+ * Main entry point - implements the Template Method pattern.
86
+ *
87
+ * @param fileInfo - File information (can include URL or buffer)
88
+ * @param options - Optional processing options (auth headers, timeout, retry config)
89
+ * @returns Processing result with success flag and either data or error
90
+ *
91
+ * @example
92
+ * ```typescript
93
+ * const result = await processor.processFile(fileInfo, {
94
+ * authHeaders: { 'Authorization': 'Bearer token' },
95
+ * timeout: 60000,
96
+ * });
97
+ *
98
+ * if (result.success) {
99
+ * console.log('Processed:', result.data.filename);
100
+ * } else {
101
+ * console.error('Failed:', result.error.userMessage);
102
+ * }
103
+ * ```
104
+ */
105
+ async processFile(fileInfo, options) {
106
+ try {
107
+ // Step 1: Validate file type and size
108
+ const validationResult = this.validateFileWithResult(fileInfo);
109
+ if (!validationResult.success) {
110
+ return {
111
+ success: false,
112
+ error: validationResult.error,
113
+ };
114
+ }
115
+ // Step 2: Get file buffer (from direct buffer or download from URL)
116
+ let buffer;
117
+ if (fileInfo.buffer) {
118
+ // Direct buffer provided - skip download
119
+ buffer = fileInfo.buffer;
120
+ }
121
+ else if (fileInfo.url) {
122
+ // Download from URL
123
+ const downloadResult = await this.downloadFileWithRetry(fileInfo, options);
124
+ if (!downloadResult.success) {
125
+ return {
126
+ success: false,
127
+ error: downloadResult.error,
128
+ };
129
+ }
130
+ if (!downloadResult.data) {
131
+ return {
132
+ success: false,
133
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
134
+ reason: "Download succeeded but returned no data",
135
+ }),
136
+ };
137
+ }
138
+ buffer = downloadResult.data;
139
+ // Validate actual downloaded size against limit
140
+ if (!this.validateFileSize(buffer.length)) {
141
+ return {
142
+ success: false,
143
+ error: this.createError(FileErrorCode.FILE_TOO_LARGE, {
144
+ sizeMB: (buffer.length / (1024 * 1024)).toFixed(2),
145
+ maxMB: this.config.maxSizeMB,
146
+ type: this.config.fileTypeName,
147
+ }),
148
+ };
149
+ }
150
+ }
151
+ else {
152
+ // No buffer or URL provided
153
+ return {
154
+ success: false,
155
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
156
+ reason: "No buffer or URL provided for file",
157
+ }),
158
+ };
159
+ }
160
+ // Step 3: Post-download validation (subclasses can override)
161
+ const postValidationResult = await this.validateDownloadedFileWithResult(buffer, fileInfo);
162
+ if (!postValidationResult.success) {
163
+ return {
164
+ success: false,
165
+ error: postValidationResult.error,
166
+ };
167
+ }
168
+ // Step 4: Build processed result using template method
169
+ return await this.buildProcessedResultWithResult(buffer, fileInfo);
170
+ }
171
+ catch (error) {
172
+ // Catch any unexpected errors
173
+ return {
174
+ success: false,
175
+ error: this.createError(FileErrorCode.UNKNOWN_ERROR, { error: error instanceof Error ? error.message : String(error) }, error instanceof Error ? error : undefined),
176
+ };
177
+ }
178
+ }
179
+ /**
180
+ * Process multiple files with detailed summary.
181
+ *
182
+ * @param fileIds - Array of file IDs to process
183
+ * @param getFileInfo - Function to retrieve file info by ID
184
+ * @param options - Optional processing options
185
+ * @returns Summary with processed, failed, and skipped files
186
+ *
187
+ * @example
188
+ * ```typescript
189
+ * const summary = await processor.processFiles(
190
+ * ['file1', 'file2', 'file3'],
191
+ * async (id) => await fetchFileInfo(id),
192
+ * { authHeaders: { 'Authorization': 'Bearer token' } }
193
+ * );
194
+ *
195
+ * console.log(`Success: ${summary.processedFiles.length}`);
196
+ * console.log(`Failed: ${summary.failedFiles.length}`);
197
+ * ```
198
+ */
199
+ async processFiles(fileIds, getFileInfo, options) {
200
+ const results = [];
201
+ const processedFiles = [];
202
+ const failedFiles = [];
203
+ const skippedFiles = [];
204
+ const warnings = [];
205
+ for (const fileId of fileIds) {
206
+ const fileInfo = await getFileInfo(fileId);
207
+ if (!fileInfo) {
208
+ failedFiles.push({
209
+ fileId,
210
+ filename: "unknown",
211
+ mimetype: "unknown",
212
+ size: 0,
213
+ error: this.createError(FileErrorCode.FILE_NOT_FOUND),
214
+ });
215
+ continue;
216
+ }
217
+ const result = await this.processFile(fileInfo, options);
218
+ if (result.success && result.data) {
219
+ results.push(result.data);
220
+ processedFiles.push({
221
+ fileId: fileInfo.id,
222
+ filename: fileInfo.name || "unknown",
223
+ mimetype: fileInfo.mimetype,
224
+ size: fileInfo.size,
225
+ processorType: this.config.fileTypeName,
226
+ });
227
+ }
228
+ else if (result.error) {
229
+ // Check if this is a "skipped" case vs hard failure
230
+ if (result.error.code === FileErrorCode.UNSUPPORTED_TYPE) {
231
+ skippedFiles.push({
232
+ fileId: fileInfo.id,
233
+ filename: fileInfo.name || "unknown",
234
+ mimetype: fileInfo.mimetype,
235
+ size: fileInfo.size,
236
+ reason: result.error.message,
237
+ });
238
+ }
239
+ else {
240
+ failedFiles.push({
241
+ fileId: fileInfo.id,
242
+ filename: fileInfo.name || "unknown",
243
+ mimetype: fileInfo.mimetype,
244
+ size: fileInfo.size,
245
+ error: result.error,
246
+ });
247
+ }
248
+ }
249
+ }
250
+ return {
251
+ totalFiles: fileIds.length,
252
+ processedFiles,
253
+ failedFiles,
254
+ skippedFiles,
255
+ warnings,
256
+ results,
257
+ };
258
+ }
259
+ /**
260
+ * Check if a file is supported by this processor.
261
+ *
262
+ * @param mimetype - MIME type of the file
263
+ * @param filename - Filename (for extension-based detection)
264
+ * @returns true if the file type is supported
265
+ *
266
+ * @example
267
+ * ```typescript
268
+ * if (processor.isFileSupported('image/jpeg', 'photo.jpg')) {
269
+ * // Process the file
270
+ * }
271
+ * ```
272
+ */
273
+ isFileSupported(mimetype, filename) {
274
+ return (this.isSupportedMimeType(mimetype) || this.isSupportedExtension(filename));
275
+ }
276
+ // ===========================================================================
277
+ // PROTECTED METHODS - Can be overridden by subclasses
278
+ // ===========================================================================
279
+ /**
280
+ * Validate downloaded file buffer.
281
+ * Override for custom post-download validation (e.g., magic bytes).
282
+ *
283
+ * @param _buffer - Downloaded file content
284
+ * @param _fileInfo - Original file information
285
+ * @returns null if valid, error message if invalid
286
+ */
287
+ async validateDownloadedFile(_buffer, _fileInfo) {
288
+ return null; // No validation by default
289
+ }
290
+ /**
291
+ * Validate downloaded file buffer with structured error result.
292
+ * Override for custom post-download validation with detailed errors.
293
+ *
294
+ * @param buffer - Downloaded file content
295
+ * @param fileInfo - Original file information
296
+ * @returns Success result or error result
297
+ */
298
+ async validateDownloadedFileWithResult(buffer, fileInfo) {
299
+ // Call the legacy validation method for backward compatibility
300
+ const errorMessage = await this.validateDownloadedFile(buffer, fileInfo);
301
+ if (errorMessage) {
302
+ return {
303
+ success: false,
304
+ error: this.createError(FileErrorCode.INVALID_FORMAT, {
305
+ reason: errorMessage,
306
+ }),
307
+ };
308
+ }
309
+ return { success: true, data: undefined };
310
+ }
311
+ /**
312
+ * Build processed result with structured error handling.
313
+ * Override for custom result building that can fail with errors.
314
+ *
315
+ * @param buffer - Downloaded file content
316
+ * @param fileInfo - Original file information
317
+ * @returns Success result with data or error result
318
+ */
319
+ async buildProcessedResultWithResult(buffer, fileInfo) {
320
+ try {
321
+ const result = await this.buildProcessedResult(buffer, fileInfo);
322
+ return { success: true, data: result };
323
+ }
324
+ catch (error) {
325
+ return {
326
+ success: false,
327
+ error: this.createError(FileErrorCode.PROCESSING_FAILED, { fileType: this.config.fileTypeName }, error instanceof Error ? error : undefined),
328
+ };
329
+ }
330
+ }
331
+ /**
332
+ * Get filename with default fallback.
333
+ *
334
+ * @param fileInfo - File information
335
+ * @returns Filename or default if not available
336
+ */
337
+ getFilename(fileInfo) {
338
+ return fileInfo.name || this.config.defaultFilename;
339
+ }
340
+ // ===========================================================================
341
+ // DOWNLOAD METHODS
342
+ // ===========================================================================
343
+ /**
344
+ * Download file from URL with authentication.
345
+ *
346
+ * @param url - URL to download from
347
+ * @param authHeaders - Optional authentication headers
348
+ * @param timeout - Optional timeout override
349
+ * @returns Downloaded file content as Buffer
350
+ * @throws Error if download fails
351
+ */
352
+ async downloadFile(url, authHeaders, timeout) {
353
+ // Note: We intentionally use AbortController + setTimeout here rather than the shared
354
+ // withTimeout utility. AbortController.signal cancels the actual HTTP request via
355
+ // fetch's signal option, while withTimeout only races promises and would leave
356
+ // the fetch running in the background, consuming network resources.
357
+ const controller = new AbortController();
358
+ const effectiveTimeout = timeout ?? this.config.timeoutMs;
359
+ const timeoutId = setTimeout(() => {
360
+ controller.abort();
361
+ }, effectiveTimeout);
362
+ try {
363
+ const headers = {
364
+ ...authHeaders,
365
+ };
366
+ const response = await fetch(url, {
367
+ headers,
368
+ signal: controller.signal,
369
+ });
370
+ if (!response.ok) {
371
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
372
+ }
373
+ // Reject HTML responses - likely an error page or redirect
374
+ const contentType = response.headers.get("Content-Type");
375
+ if (contentType && contentType.includes("text/html")) {
376
+ throw new Error(`Received HTML response instead of file content (Content-Type: ${contentType}). This usually means the download URL returned an error page.`);
377
+ }
378
+ const arrayBuffer = await response.arrayBuffer();
379
+ let buffer = Buffer.from(arrayBuffer);
380
+ // Check for gzip encoding and decompress if needed
381
+ // Only decompress if the data actually starts with gzip magic bytes (0x1f 0x8b)
382
+ const contentEncoding = response.headers.get("Content-Encoding");
383
+ const isActuallyGzipped = buffer.length >= 2 && buffer[0] === 0x1f && buffer[1] === 0x8b;
384
+ if (contentEncoding?.toLowerCase().includes("gzip") &&
385
+ isActuallyGzipped) {
386
+ try {
387
+ buffer = Buffer.from(await gunzipAsync(buffer));
388
+ }
389
+ catch (gzipError) {
390
+ throw new Error(`Failed to decompress gzip response: ${gzipError instanceof Error ? gzipError.message : String(gzipError)}`);
391
+ }
392
+ }
393
+ return buffer;
394
+ }
395
+ finally {
396
+ clearTimeout(timeoutId);
397
+ }
398
+ }
399
+ /**
400
+ * Download file with retry logic for transient failures.
401
+ *
402
+ * @param fileInfo - File information with URL
403
+ * @param options - Processing options including auth headers and retry config
404
+ * @returns Success result with buffer or error result
405
+ */
406
+ async downloadFileWithRetry(fileInfo, options) {
407
+ const url = fileInfo.url;
408
+ if (!url) {
409
+ return {
410
+ success: false,
411
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
412
+ reason: "No URL provided for download",
413
+ }),
414
+ };
415
+ }
416
+ const retryConfig = options?.retryConfig ?? DEFAULT_RETRY_CONFIG;
417
+ let lastError;
418
+ for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
419
+ try {
420
+ const buffer = await this.downloadFile(url, options?.authHeaders, options?.timeout);
421
+ return { success: true, data: buffer };
422
+ }
423
+ catch (error) {
424
+ lastError = error instanceof Error ? error : new Error(String(error));
425
+ // Check if we should retry
426
+ const shouldRetry = attempt < retryConfig.maxRetries &&
427
+ (retryConfig.retryOn
428
+ ? retryConfig.retryOn(lastError)
429
+ : isRetryableError(lastError));
430
+ if (shouldRetry) {
431
+ // Calculate delay with exponential backoff
432
+ const delay = Math.min(retryConfig.baseDelayMs * 2 ** attempt, retryConfig.maxDelayMs);
433
+ await this.sleep(delay);
434
+ continue;
435
+ }
436
+ // No more retries, return error
437
+ break;
438
+ }
439
+ }
440
+ // Classify the final error
441
+ return {
442
+ success: false,
443
+ error: this.classifyDownloadError(lastError),
444
+ };
445
+ }
446
+ // ===========================================================================
447
+ // VALIDATION METHODS
448
+ // ===========================================================================
449
+ /**
450
+ * Validate file type and size with structured error result.
451
+ *
452
+ * @param fileInfo - File information to validate
453
+ * @returns Success result or error result
454
+ */
455
+ validateFileWithResult(fileInfo) {
456
+ // Validate file type
457
+ if (!this.isFileSupported(fileInfo.mimetype, fileInfo.name || "")) {
458
+ return {
459
+ success: false,
460
+ error: this.createError(FileErrorCode.UNSUPPORTED_TYPE, {
461
+ format: fileInfo.mimetype || fileInfo.name?.split(".").pop() || "unknown",
462
+ supportedFormats: this.config.supportedMimeTypes.length > 0
463
+ ? this.config.supportedMimeTypes.join(", ")
464
+ : this.config.supportedExtensions.join(", "),
465
+ type: this.config.fileTypeName,
466
+ }),
467
+ };
468
+ }
469
+ // Validate size
470
+ if (!this.validateFileSize(fileInfo.size)) {
471
+ const sizeMB = this.formatSizeMB(fileInfo.size);
472
+ return {
473
+ success: false,
474
+ error: this.createError(FileErrorCode.FILE_TOO_LARGE, {
475
+ sizeMB,
476
+ maxMB: this.config.maxSizeMB,
477
+ type: this.config.fileTypeName,
478
+ }),
479
+ };
480
+ }
481
+ return { success: true, data: undefined };
482
+ }
483
+ /**
484
+ * Validate file size against configured maximum.
485
+ *
486
+ * @param sizeBytes - File size in bytes
487
+ * @returns true if size is within limits
488
+ */
489
+ validateFileSize(sizeBytes) {
490
+ const maxBytes = this.config.maxSizeMB * 1024 * 1024;
491
+ return sizeBytes <= maxBytes;
492
+ }
493
+ /**
494
+ * Check if file matches supported MIME types.
495
+ *
496
+ * @param mimetype - MIME type to check
497
+ * @returns true if MIME type is supported
498
+ */
499
+ isSupportedMimeType(mimetype) {
500
+ if (!mimetype) {
501
+ return false;
502
+ }
503
+ return this.config.supportedMimeTypes.includes(mimetype.toLowerCase());
504
+ }
505
+ /**
506
+ * Check if file matches supported extensions.
507
+ *
508
+ * @param filename - Filename to check
509
+ * @returns true if extension is supported
510
+ */
511
+ isSupportedExtension(filename) {
512
+ if (!filename) {
513
+ return false;
514
+ }
515
+ const lowerFilename = filename.toLowerCase();
516
+ return this.config.supportedExtensions.some((ext) => lowerFilename.endsWith(ext));
517
+ }
518
+ // ===========================================================================
519
+ // UTILITY METHODS
520
+ // ===========================================================================
521
+ /**
522
+ * Format file size in MB with 2 decimal places.
523
+ *
524
+ * @param sizeBytes - Size in bytes
525
+ * @returns Formatted size string
526
+ */
527
+ formatSizeMB(sizeBytes) {
528
+ return (sizeBytes / (1024 * 1024)).toFixed(2);
529
+ }
530
+ /**
531
+ * Create a structured file processing error.
532
+ *
533
+ * @param code - Error code
534
+ * @param details - Additional error details
535
+ * @param originalError - Original error that caused this
536
+ * @returns Structured error object
537
+ */
538
+ createError(code, details, originalError) {
539
+ return createFileError(code, details, originalError);
540
+ }
541
+ /**
542
+ * Classify a download error into appropriate error code.
543
+ *
544
+ * @param error - The error to classify
545
+ * @returns Structured file processing error
546
+ */
547
+ classifyDownloadError(error) {
548
+ if (error.name === "AbortError") {
549
+ return this.createError(FileErrorCode.DOWNLOAD_TIMEOUT, { timeoutMs: this.config.timeoutMs }, error);
550
+ }
551
+ if (error.message.includes("HTTP")) {
552
+ const status = extractHttpStatus(error);
553
+ if (status === 404) {
554
+ return this.createError(FileErrorCode.FILE_NOT_FOUND, {}, error);
555
+ }
556
+ if (status === 401 || status === 403) {
557
+ return this.createError(FileErrorCode.DOWNLOAD_AUTH_FAILED, { httpStatus: status }, error);
558
+ }
559
+ if (status === 429) {
560
+ return this.createError(FileErrorCode.RATE_LIMITED, {}, error);
561
+ }
562
+ return this.createError(FileErrorCode.NETWORK_ERROR, { httpStatus: status }, error);
563
+ }
564
+ if (error.message.includes("decompress")) {
565
+ return this.createError(FileErrorCode.DECOMPRESSION_FAILED, {}, error);
566
+ }
567
+ return this.createError(FileErrorCode.DOWNLOAD_FAILED, {}, error);
568
+ }
569
+ /**
570
+ * Sleep for specified milliseconds.
571
+ *
572
+ * @param ms - Milliseconds to sleep
573
+ */
574
+ sleep(ms) {
575
+ return new Promise((resolve) => setTimeout(resolve, ms));
576
+ }
577
+ }
578
+ // ===========================================================================
579
+ // UTILITY FUNCTIONS
580
+ // ===========================================================================
581
+ /**
582
+ * Get the default text file download timeout.
583
+ *
584
+ * @returns Timeout in milliseconds
585
+ */
586
+ export function getDefaultTextTimeout() {
587
+ // Return a sensible default since we don't have env config
588
+ return 30000;
589
+ }
590
+ /**
591
+ * Get the default image download timeout.
592
+ *
593
+ * @returns Timeout in milliseconds
594
+ */
595
+ export function getDefaultImageTimeout() {
596
+ return 30000;
597
+ }
598
+ /**
599
+ * Get the default text file max size in MB.
600
+ *
601
+ * @returns Max size in megabytes
602
+ */
603
+ export function getDefaultTextMaxSizeMB() {
604
+ return SIZE_LIMITS.TEXT_MAX_MB;
605
+ }
606
+ /**
607
+ * Get the default image max size in MB.
608
+ *
609
+ * @returns Max size in megabytes
610
+ */
611
+ export function getDefaultImageMaxSizeMB() {
612
+ return SIZE_LIMITS.IMAGE_MAX_MB;
613
+ }
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Base File Processor Infrastructure
3
+ *
4
+ * Provides the foundation for building file processors in NeuroLink.
5
+ * This module contains:
6
+ * - Abstract base class for file processors (BaseFileProcessor)
7
+ * - ALL type definitions for file processing operations
8
+ * - Constants for defaults and priorities
9
+ *
10
+ * @module processors/base
11
+ */
12
+ export { BaseFileProcessor, getDefaultImageMaxSizeMB, getDefaultImageTimeout, getDefaultTextMaxSizeMB, getDefaultTextTimeout, } from "./BaseFileProcessor.js";
13
+ export type { BatchProcessingSummary, ErrorMessageTemplate, ExcelWorksheet, FailedFileInfo, FileErrorCode, FileInfo, FileProcessingError, FileProcessingResult, FileProcessorConfig, FileWarning, JsonTypeGuard, OperationResult, ProcessedConfig, ProcessedExcel, ProcessedFileBase, ProcessedFileInfo, ProcessedHtml, ProcessedJson, ProcessedMarkdown, ProcessedOpenDocument, ProcessedRtf, ProcessedSourceCode, ProcessedSvg, ProcessedText, ProcessedWord, ProcessedXml, ProcessedYaml, ProcessOptions, ProcessorInfo, ProcessorMatch, ProcessorPriorityKey, ProcessorPriorityValue, RegistryOptions, RegistryProcessResult, RetryConfig, SkippedFileInfo, UnsupportedFileError, } from "./types.js";
14
+ export { DEFAULT_IMAGE_MAX_SIZE_MB, DEFAULT_IMAGE_TIMEOUT_MS, DEFAULT_RETRY_CONFIG, DEFAULT_TEXT_MAX_SIZE_MB, DEFAULT_TEXT_TIMEOUT_MS, PROCESSOR_PRIORITIES, } from "./types.js";
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Base File Processor Infrastructure
3
+ *
4
+ * Provides the foundation for building file processors in NeuroLink.
5
+ * This module contains:
6
+ * - Abstract base class for file processors (BaseFileProcessor)
7
+ * - ALL type definitions for file processing operations
8
+ * - Constants for defaults and priorities
9
+ *
10
+ * @module processors/base
11
+ */
12
+ // =============================================================================
13
+ // BASE PROCESSOR CLASS
14
+ // =============================================================================
15
+ export { BaseFileProcessor, getDefaultImageMaxSizeMB, getDefaultImageTimeout, getDefaultTextMaxSizeMB, getDefaultTextTimeout, } from "./BaseFileProcessor.js";
16
+ // =============================================================================
17
+ // CONSTANTS
18
+ // =============================================================================
19
+ export { DEFAULT_IMAGE_MAX_SIZE_MB, DEFAULT_IMAGE_TIMEOUT_MS, DEFAULT_RETRY_CONFIG, DEFAULT_TEXT_MAX_SIZE_MB, DEFAULT_TEXT_TIMEOUT_MS, PROCESSOR_PRIORITIES, } from "./types.js";