@juspay/neurolink 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +51 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/csvProcessor.js +442 -0
  113. package/dist/lib/utils/fileDetector.d.ts +7 -1
  114. package/dist/lib/utils/fileDetector.js +91 -18
  115. package/dist/lib/utils/json/extract.d.ts +103 -0
  116. package/dist/lib/utils/json/extract.js +249 -0
  117. package/dist/lib/utils/json/index.d.ts +36 -0
  118. package/dist/lib/utils/json/index.js +37 -0
  119. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  120. package/dist/lib/utils/json/safeParse.js +191 -0
  121. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  122. package/dist/lib/utils/messageBuilder.js +15 -7
  123. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  124. package/dist/lib/utils/sanitizers/filename.js +366 -0
  125. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  126. package/dist/lib/utils/sanitizers/html.js +326 -0
  127. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  128. package/dist/lib/utils/sanitizers/index.js +30 -0
  129. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  130. package/dist/lib/utils/sanitizers/svg.js +483 -0
  131. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  132. package/dist/processors/base/BaseFileProcessor.js +613 -0
  133. package/dist/processors/base/index.d.ts +14 -0
  134. package/dist/processors/base/index.js +19 -0
  135. package/dist/processors/base/types.d.ts +593 -0
  136. package/dist/processors/base/types.js +76 -0
  137. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  138. package/dist/processors/cli/fileProcessorCli.js +388 -0
  139. package/dist/processors/cli/index.d.ts +37 -0
  140. package/dist/processors/cli/index.js +49 -0
  141. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  142. package/dist/processors/code/ConfigProcessor.js +400 -0
  143. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  144. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  145. package/dist/processors/code/index.d.ts +44 -0
  146. package/dist/processors/code/index.js +60 -0
  147. package/dist/processors/config/fileTypes.d.ts +283 -0
  148. package/dist/processors/config/fileTypes.js +520 -0
  149. package/dist/processors/config/index.d.ts +32 -0
  150. package/dist/processors/config/index.js +92 -0
  151. package/dist/processors/config/languageMap.d.ts +66 -0
  152. package/dist/processors/config/languageMap.js +410 -0
  153. package/dist/processors/config/mimeTypes.d.ts +376 -0
  154. package/dist/processors/config/mimeTypes.js +338 -0
  155. package/dist/processors/config/sizeLimits.d.ts +194 -0
  156. package/dist/processors/config/sizeLimits.js +246 -0
  157. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  158. package/dist/processors/data/JsonProcessor.js +203 -0
  159. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  160. package/dist/processors/data/XmlProcessor.js +283 -0
  161. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  162. package/dist/processors/data/YamlProcessor.js +294 -0
  163. package/dist/processors/data/index.d.ts +49 -0
  164. package/dist/processors/data/index.js +76 -0
  165. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  166. package/dist/processors/document/ExcelProcessor.js +519 -0
  167. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  168. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  169. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  170. package/dist/processors/document/RtfProcessor.js +361 -0
  171. package/dist/processors/document/WordProcessor.d.ts +168 -0
  172. package/dist/processors/document/WordProcessor.js +353 -0
  173. package/dist/processors/document/index.d.ts +54 -0
  174. package/dist/processors/document/index.js +90 -0
  175. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  176. package/dist/processors/errors/FileErrorCode.js +255 -0
  177. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  178. package/dist/processors/errors/errorHelpers.js +378 -0
  179. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  180. package/dist/processors/errors/errorSerializer.js +507 -0
  181. package/dist/processors/errors/index.d.ts +46 -0
  182. package/dist/processors/errors/index.js +49 -0
  183. package/dist/processors/index.d.ts +76 -0
  184. package/dist/processors/index.js +112 -0
  185. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  186. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  187. package/dist/processors/integration/index.d.ts +42 -0
  188. package/dist/processors/integration/index.js +44 -0
  189. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  190. package/dist/processors/markup/HtmlProcessor.js +249 -0
  191. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  192. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  193. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  194. package/dist/processors/markup/SvgProcessor.js +240 -0
  195. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  196. package/dist/processors/markup/TextProcessor.js +188 -0
  197. package/dist/processors/markup/index.d.ts +66 -0
  198. package/dist/processors/markup/index.js +102 -0
  199. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  200. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  201. package/dist/processors/registry/index.d.ts +12 -0
  202. package/dist/processors/registry/index.js +16 -0
  203. package/dist/processors/registry/types.d.ts +53 -0
  204. package/dist/processors/registry/types.js +10 -0
  205. package/dist/server/utils/validation.d.ts +6 -6
  206. package/dist/types/fileTypes.d.ts +51 -1
  207. package/dist/types/index.d.ts +25 -24
  208. package/dist/types/index.js +21 -20
  209. package/dist/types/modelTypes.d.ts +10 -10
  210. package/dist/types/pptTypes.d.ts +14 -2
  211. package/dist/types/pptTypes.js +16 -0
  212. package/dist/utils/async/delay.d.ts +40 -0
  213. package/dist/utils/async/delay.js +42 -0
  214. package/dist/utils/async/index.d.ts +23 -0
  215. package/dist/utils/async/index.js +23 -0
  216. package/dist/utils/async/retry.d.ts +141 -0
  217. package/dist/utils/async/retry.js +171 -0
  218. package/dist/utils/async/withTimeout.d.ts +73 -0
  219. package/dist/utils/async/withTimeout.js +96 -0
  220. package/dist/utils/csvProcessor.js +442 -0
  221. package/dist/utils/fileDetector.d.ts +7 -1
  222. package/dist/utils/fileDetector.js +91 -18
  223. package/dist/utils/json/extract.d.ts +103 -0
  224. package/dist/utils/json/extract.js +248 -0
  225. package/dist/utils/json/index.d.ts +36 -0
  226. package/dist/utils/json/index.js +36 -0
  227. package/dist/utils/json/safeParse.d.ts +137 -0
  228. package/dist/utils/json/safeParse.js +190 -0
  229. package/dist/utils/messageBuilder.d.ts +2 -2
  230. package/dist/utils/messageBuilder.js +15 -7
  231. package/dist/utils/sanitizers/filename.d.ts +137 -0
  232. package/dist/utils/sanitizers/filename.js +365 -0
  233. package/dist/utils/sanitizers/html.d.ts +170 -0
  234. package/dist/utils/sanitizers/html.js +325 -0
  235. package/dist/utils/sanitizers/index.d.ts +26 -0
  236. package/dist/utils/sanitizers/index.js +29 -0
  237. package/dist/utils/sanitizers/svg.d.ts +81 -0
  238. package/dist/utils/sanitizers/svg.js +482 -0
  239. package/package.json +2 -2
@@ -0,0 +1,519 @@
1
+ /**
2
+ * Excel Processor
3
+ *
4
+ * Handles downloading, validating, and processing Excel files (.xlsx, .xls).
5
+ * Uses exceljs library for parsing with streaming support for large files.
6
+ *
7
+ * Key features:
8
+ * - Supports both .xlsx and legacy .xls formats
9
+ * - Extracts worksheet data with headers
10
+ * - Handles complex cell types (formulas, rich text, dates)
11
+ * - Respects configurable row and sheet limits
12
+ * - Provides truncation metadata when limits are exceeded
13
+ *
14
+ * @module processors/document/ExcelProcessor
15
+ *
16
+ * @example
17
+ * ```typescript
18
+ * import { excelProcessor, processExcel, isExcelFile } from "./ExcelProcessor.js";
19
+ *
20
+ * // Check if a file is an Excel file
21
+ * if (isExcelFile(fileInfo.mimetype, fileInfo.name)) {
22
+ * // Process the Excel file
23
+ * const result = await processExcel(fileInfo, {
24
+ * authHeaders: { Authorization: "Bearer token" },
25
+ * });
26
+ *
27
+ * if (result.success) {
28
+ * console.log(`Processed ${result.data.sheetCount} sheets`);
29
+ * console.log(`Total rows: ${result.data.totalRows}`);
30
+ *
31
+ * for (const sheet of result.data.worksheets) {
32
+ * console.log(`Sheet: ${sheet.name}, Rows: ${sheet.rowCount}`);
33
+ * }
34
+ * }
35
+ * }
36
+ * ```
37
+ */
38
+ import { Workbook } from "exceljs";
39
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
40
+ import { SIZE_LIMITS } from "../config/index.js";
41
+ import { FileErrorCode } from "../errors/index.js";
42
+ // =============================================================================
43
+ // CONSTANTS
44
+ // =============================================================================
45
+ /** Supported MIME types for Excel files */
46
+ const SUPPORTED_EXCEL_TYPES = [
47
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // .xlsx
48
+ "application/vnd.ms-excel", // .xls
49
+ ];
50
+ /** Supported file extensions for Excel files */
51
+ const SUPPORTED_EXCEL_EXTENSIONS = [".xlsx", ".xls"];
52
+ // =============================================================================
53
+ // EXCEL PROCESSOR CLASS
54
+ // =============================================================================
55
+ /**
56
+ * Excel Processor - handles .xlsx and .xls files.
57
+ * Uses exceljs library for parsing with support for large files.
58
+ *
59
+ * Features:
60
+ * - ZIP format validation (XLSX files are ZIP archives)
61
+ * - Sheet count limiting (MAX_EXCEL_SHEETS)
62
+ * - Row count limiting per sheet (MAX_EXCEL_ROWS)
63
+ * - Cell type handling (text, numbers, formulas, dates, rich text)
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * const processor = new ExcelProcessor();
68
+ *
69
+ * // Process a file
70
+ * const result = await processor.processFile(fileInfo, {
71
+ * authHeaders: { Authorization: "Bearer token" },
72
+ * });
73
+ *
74
+ * if (result.success) {
75
+ * console.log(`Sheets: ${result.data.sheetCount}`);
76
+ * console.log(`Truncated: ${result.data.truncated}`);
77
+ * }
78
+ * ```
79
+ */
80
+ export class ExcelProcessor extends BaseFileProcessor {
81
+ constructor() {
82
+ super({
83
+ maxSizeMB: SIZE_LIMITS.EXCEL_MAX_MB,
84
+ timeoutMs: 60000, // Excel parsing can take longer than text files
85
+ supportedMimeTypes: [...SUPPORTED_EXCEL_TYPES],
86
+ supportedExtensions: [...SUPPORTED_EXCEL_EXTENSIONS],
87
+ fileTypeName: "Excel",
88
+ defaultFilename: "spreadsheet.xlsx",
89
+ });
90
+ }
91
+ // ===========================================================================
92
+ // VALIDATION
93
+ // ===========================================================================
94
+ /**
95
+ * Validate downloaded Excel file has correct format.
96
+ * XLSX files are ZIP archives starting with PK signature.
97
+ *
98
+ * @param buffer - Downloaded file content
99
+ * @param _fileInfo - Original file information (unused but required by interface)
100
+ * @returns null if valid, error message if invalid
101
+ */
102
+ async validateDownloadedFile(buffer, _fileInfo) {
103
+ // Check minimum size
104
+ if (buffer.length < 4) {
105
+ return "Invalid Excel file - file too small";
106
+ }
107
+ // XLSX files are ZIP archives (PK signature: 0x50 0x4B)
108
+ const pkSignature = buffer.subarray(0, 2).toString("ascii");
109
+ if (pkSignature !== "PK") {
110
+ // Provide helpful error for common issues
111
+ const preview = buffer
112
+ .subarray(0, 100)
113
+ .toString("utf8")
114
+ .substring(0, 100);
115
+ if (preview.includes("<!DOCTYPE") || preview.includes("<html")) {
116
+ return "Invalid Excel file - received HTML response instead of file content";
117
+ }
118
+ return "Invalid Excel file - not a valid XLSX format (missing PK signature)";
119
+ }
120
+ return null;
121
+ }
122
+ // ===========================================================================
123
+ // PROCESSING
124
+ // ===========================================================================
125
+ /**
126
+ * Build processed result stub.
127
+ * Note: This is a synchronous stub - actual parsing happens in processFile override.
128
+ *
129
+ * @param buffer - Downloaded file content
130
+ * @param fileInfo - Original file information
131
+ * @returns Empty ProcessedExcel structure (populated by processFile)
132
+ */
133
+ buildProcessedResult(buffer, fileInfo) {
134
+ return {
135
+ worksheets: [],
136
+ buffer,
137
+ mimetype: fileInfo.mimetype ||
138
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
139
+ size: fileInfo.size,
140
+ filename: this.getFilename(fileInfo),
141
+ sheetCount: 0,
142
+ totalRows: 0,
143
+ truncated: false,
144
+ truncatedSheets: [],
145
+ };
146
+ }
147
+ /**
148
+ * Override processFile for async Excel parsing with exceljs.
149
+ * This override is necessary because exceljs uses async parsing.
150
+ *
151
+ * @param fileInfo - File information (can include URL or buffer)
152
+ * @param options - Optional processing options (auth headers, timeout, etc.)
153
+ * @returns Processing result with parsed Excel data or error
154
+ */
155
+ async processFile(fileInfo, options) {
156
+ try {
157
+ // Step 1: Validate file type and size
158
+ const validationResult = this.validateFileWithResult(fileInfo);
159
+ if (!validationResult.success) {
160
+ return {
161
+ success: false,
162
+ error: validationResult.error,
163
+ };
164
+ }
165
+ // Step 2: Get file buffer (from direct buffer or download from URL)
166
+ let buffer;
167
+ if (fileInfo.buffer) {
168
+ buffer = fileInfo.buffer;
169
+ }
170
+ else if (fileInfo.url) {
171
+ const downloadResult = await this.downloadFileWithRetry(fileInfo, options);
172
+ if (!downloadResult.success) {
173
+ return {
174
+ success: false,
175
+ error: downloadResult.error,
176
+ };
177
+ }
178
+ if (!downloadResult.data) {
179
+ return {
180
+ success: false,
181
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
182
+ reason: "Download succeeded but returned no data",
183
+ }),
184
+ };
185
+ }
186
+ buffer = downloadResult.data;
187
+ }
188
+ else {
189
+ return {
190
+ success: false,
191
+ error: this.createError(FileErrorCode.DOWNLOAD_FAILED, {
192
+ reason: "No buffer or URL provided for file",
193
+ }),
194
+ };
195
+ }
196
+ // Step 3: Validate downloaded file (magic bytes check)
197
+ const postValidationResult = await this.validateDownloadedFileWithResult(buffer, fileInfo);
198
+ if (!postValidationResult.success) {
199
+ return {
200
+ success: false,
201
+ error: postValidationResult.error,
202
+ };
203
+ }
204
+ // Step 4: Parse Excel file asynchronously using exceljs
205
+ const workbook = await this.parseWorkbook(buffer);
206
+ // Step 5: Extract worksheet data with limits
207
+ const { worksheets, truncated, truncatedSheets } = this.extractWorksheets(workbook);
208
+ // Calculate total rows across all worksheets
209
+ const totalRows = worksheets.reduce((sum, sheet) => sum + sheet.rowCount, 0);
210
+ // Build final result
211
+ return {
212
+ success: true,
213
+ data: {
214
+ worksheets,
215
+ buffer,
216
+ mimetype: fileInfo.mimetype ||
217
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
218
+ size: fileInfo.size,
219
+ filename: this.getFilename(fileInfo),
220
+ sheetCount: worksheets.length,
221
+ totalRows,
222
+ truncated,
223
+ truncatedSheets,
224
+ },
225
+ };
226
+ }
227
+ catch (error) {
228
+ return {
229
+ success: false,
230
+ error: this.createError(FileErrorCode.PROCESSING_FAILED, {
231
+ fileType: "Excel",
232
+ error: error instanceof Error ? error.message : String(error),
233
+ }, error instanceof Error ? error : undefined),
234
+ };
235
+ }
236
+ }
237
+ // ===========================================================================
238
+ // PRIVATE HELPER METHODS
239
+ // ===========================================================================
240
+ /**
241
+ * Parse Excel buffer into workbook using exceljs.
242
+ *
243
+ * @param buffer - Excel file content
244
+ * @returns Parsed ExcelJS Workbook
245
+ */
246
+ async parseWorkbook(buffer) {
247
+ const workbook = new Workbook();
248
+ // ExcelJS load() types expect Buffer but Node 22+ Buffer<ArrayBufferLike>
249
+ // is not directly assignable. Extract a clean ArrayBuffer for the exact
250
+ // byte range via slice, then cast for type compatibility.
251
+ await workbook.xlsx.load(buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength));
252
+ return workbook;
253
+ }
254
+ /**
255
+ * Extract worksheet data from workbook with row and sheet limits.
256
+ *
257
+ * @param workbook - Parsed ExcelJS Workbook
258
+ * @returns Extracted worksheets with truncation metadata
259
+ */
260
+ extractWorksheets(workbook) {
261
+ const worksheets = [];
262
+ const truncatedSheets = [];
263
+ let truncated = false;
264
+ const maxRows = SIZE_LIMITS.EXCEL_MAX_ROWS;
265
+ const maxSheets = SIZE_LIMITS.EXCEL_MAX_SHEETS;
266
+ let sheetIndex = 0;
267
+ for (const worksheet of workbook.worksheets) {
268
+ // Check sheet limit
269
+ if (sheetIndex >= maxSheets) {
270
+ truncated = true;
271
+ break;
272
+ }
273
+ const rows = [];
274
+ let headers = [];
275
+ let rowIndex = 0;
276
+ let hitLimit = false;
277
+ worksheet.eachRow((row, rowNumber) => {
278
+ if (hitLimit) {
279
+ return;
280
+ }
281
+ // Check row limit
282
+ if (rowIndex >= maxRows) {
283
+ if (!truncatedSheets.includes(worksheet.name)) {
284
+ truncatedSheets.push(worksheet.name);
285
+ }
286
+ truncated = true;
287
+ hitLimit = true;
288
+ return;
289
+ }
290
+ // ExcelJS row.values is 1-indexed, so first element is undefined
291
+ const rowValues = row.values;
292
+ // Convert cell values to primitive types and remove the first undefined element
293
+ const cleanRow = rowValues
294
+ .slice(1)
295
+ .map((cell) => this.getCellValue(cell));
296
+ // Extract headers from first row
297
+ if (rowNumber === 1) {
298
+ headers = cleanRow.map((v) => String(v ?? ""));
299
+ }
300
+ rows.push(cleanRow);
301
+ rowIndex++;
302
+ });
303
+ worksheets.push({
304
+ name: worksheet.name,
305
+ rows,
306
+ headers,
307
+ rowCount: rows.length,
308
+ columnCount: headers.length || (rows[0]?.length ?? 0),
309
+ });
310
+ sheetIndex++;
311
+ }
312
+ return { worksheets, truncated, truncatedSheets };
313
+ }
314
+ /**
315
+ * Convert an Excel cell value to a primitive type.
316
+ * Handles various cell types including formulas, rich text, and dates.
317
+ *
318
+ * @param cell - ExcelJS cell value (can be various types)
319
+ * @returns Primitive value (string, number, boolean, or null)
320
+ */
321
+ getCellValue(cell) {
322
+ if (cell === null || cell === undefined) {
323
+ return null;
324
+ }
325
+ // Handle primitive types directly
326
+ if (typeof cell === "string" ||
327
+ typeof cell === "number" ||
328
+ typeof cell === "boolean") {
329
+ return cell;
330
+ }
331
+ // Handle Date objects
332
+ if (cell instanceof Date) {
333
+ return cell.toISOString();
334
+ }
335
+ // Handle ExcelJS cell objects
336
+ if (typeof cell === "object" && cell !== null) {
337
+ const cellObj = cell;
338
+ // Formula result (prioritize result over formula string)
339
+ if ("result" in cellObj && cellObj.result !== undefined) {
340
+ if (typeof cellObj.result === "object" && cellObj.result !== null) {
341
+ // Handle error values like { error: '#VALUE!' }
342
+ if ("error" in cellObj.result) {
343
+ return String(cellObj.result.error);
344
+ }
345
+ }
346
+ return typeof cellObj.result === "string" ||
347
+ typeof cellObj.result === "number" ||
348
+ typeof cellObj.result === "boolean"
349
+ ? cellObj.result
350
+ : String(cellObj.result);
351
+ }
352
+ // Rich text
353
+ if ("richText" in cellObj && Array.isArray(cellObj.richText)) {
354
+ return this.extractRichText(cellObj.richText);
355
+ }
356
+ // Simple text value
357
+ if ("text" in cellObj && cellObj.text !== undefined) {
358
+ return cellObj.text;
359
+ }
360
+ // Hyperlink (return the display text or URL)
361
+ if ("hyperlink" in cellObj && cellObj.hyperlink) {
362
+ return cellObj.text || cellObj.hyperlink;
363
+ }
364
+ }
365
+ // Fallback: convert to string
366
+ return String(cell);
367
+ }
368
+ /**
369
+ * Extract text from rich text cell format.
370
+ * Rich text cells contain an array of text fragments with formatting.
371
+ *
372
+ * @param richText - Array of rich text fragments
373
+ * @returns Concatenated plain text
374
+ */
375
+ extractRichText(richText) {
376
+ if (!Array.isArray(richText)) {
377
+ return "";
378
+ }
379
+ return richText
380
+ .map((rt) => {
381
+ if (typeof rt === "object" && rt !== null && "text" in rt) {
382
+ return rt.text || "";
383
+ }
384
+ return "";
385
+ })
386
+ .join("");
387
+ }
388
+ }
389
+ // =============================================================================
390
+ // SINGLETON INSTANCE
391
+ // =============================================================================
392
+ /**
393
+ * Singleton Excel processor instance.
394
+ * Use this for standard Excel processing operations.
395
+ *
396
+ * @example
397
+ * ```typescript
398
+ * import { excelProcessor } from "./ExcelProcessor.js";
399
+ *
400
+ * const result = await excelProcessor.processFile(fileInfo);
401
+ * ```
402
+ */
403
+ export const excelProcessor = new ExcelProcessor();
404
+ // =============================================================================
405
+ // HELPER FUNCTIONS
406
+ // =============================================================================
407
+ /**
408
+ * Check if a file is an Excel file.
409
+ * Matches by MIME type or file extension.
410
+ *
411
+ * @param mimetype - MIME type of the file
412
+ * @param filename - Filename (for extension-based detection)
413
+ * @returns true if the file is an Excel file
414
+ *
415
+ * @example
416
+ * ```typescript
417
+ * if (isExcelFile("application/vnd.ms-excel", "data.xls")) {
418
+ * // Process as Excel
419
+ * }
420
+ *
421
+ * if (isExcelFile("", "report.xlsx")) {
422
+ * // Also matches by extension
423
+ * }
424
+ * ```
425
+ */
426
+ export function isExcelFile(mimetype, filename) {
427
+ return excelProcessor.isFileSupported(mimetype, filename);
428
+ }
429
+ /**
430
+ * Validate Excel file size against configured limit.
431
+ *
432
+ * @param sizeBytes - File size in bytes
433
+ * @returns true if size is within the Excel file limit
434
+ *
435
+ * @example
436
+ * ```typescript
437
+ * if (!validateExcelSize(fileInfo.size)) {
438
+ * console.error(`File too large: max ${SIZE_LIMITS.EXCEL_MAX_MB}MB`);
439
+ * }
440
+ * ```
441
+ */
442
+ export function validateExcelSize(sizeBytes) {
443
+ const maxBytes = SIZE_LIMITS.EXCEL_MAX_MB * 1024 * 1024;
444
+ return sizeBytes <= maxBytes;
445
+ }
446
+ /**
447
+ * Process a single Excel file.
448
+ * Convenience function that uses the singleton processor.
449
+ *
450
+ * @param fileInfo - File information (can include URL or buffer)
451
+ * @param options - Optional processing options (auth headers, timeout, etc.)
452
+ * @returns Processing result with parsed Excel data or error
453
+ *
454
+ * @example
455
+ * ```typescript
456
+ * import { processExcel } from "./ExcelProcessor.js";
457
+ *
458
+ * const result = await processExcel(fileInfo, {
459
+ * authHeaders: { Authorization: "Bearer token" },
460
+ * timeout: 120000, // 2 minutes for large files
461
+ * });
462
+ *
463
+ * if (result.success) {
464
+ * const { worksheets, totalRows, truncated } = result.data;
465
+ * console.log(`Extracted ${totalRows} rows from ${worksheets.length} sheets`);
466
+ *
467
+ * if (truncated) {
468
+ * console.warn("Some data was truncated due to size limits");
469
+ * }
470
+ * } else {
471
+ * console.error(`Processing failed: ${result.error?.userMessage}`);
472
+ * }
473
+ * ```
474
+ */
475
+ export async function processExcel(fileInfo, options) {
476
+ return excelProcessor.processFile(fileInfo, options);
477
+ }
478
+ /**
479
+ * Get Excel max size in MB.
480
+ *
481
+ * @returns Maximum Excel file size in megabytes
482
+ *
483
+ * @example
484
+ * ```typescript
485
+ * const maxSize = getExcelMaxSizeMB(); // 10
486
+ * console.log(`Maximum Excel file size: ${maxSize}MB`);
487
+ * ```
488
+ */
489
+ export function getExcelMaxSizeMB() {
490
+ return SIZE_LIMITS.EXCEL_MAX_MB;
491
+ }
492
+ /**
493
+ * Get Excel max rows per sheet.
494
+ *
495
+ * @returns Maximum rows to process per worksheet
496
+ *
497
+ * @example
498
+ * ```typescript
499
+ * const maxRows = getExcelMaxRows(); // 5000
500
+ * console.log(`Maximum rows per sheet: ${maxRows}`);
501
+ * ```
502
+ */
503
+ export function getExcelMaxRows() {
504
+ return SIZE_LIMITS.EXCEL_MAX_ROWS;
505
+ }
506
+ /**
507
+ * Get Excel max sheets to process.
508
+ *
509
+ * @returns Maximum number of worksheets to process
510
+ *
511
+ * @example
512
+ * ```typescript
513
+ * const maxSheets = getExcelMaxSheets(); // 10
514
+ * console.log(`Maximum sheets to process: ${maxSheets}`);
515
+ * ```
516
+ */
517
+ export function getExcelMaxSheets() {
518
+ return SIZE_LIMITS.EXCEL_MAX_SHEETS;
519
+ }
@@ -0,0 +1,69 @@
1
+ /**
2
+ * OpenDocument Processor
3
+ *
4
+ * Processes OpenDocument format files (.odt, .ods, .odp) by extracting
5
+ * text content from the internal XML structure.
6
+ *
7
+ * @module processors/document/OpenDocumentProcessor
8
+ */
9
+ import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
10
+ import type { FileInfo, FileProcessingResult, ProcessOptions } from "../base/types.js";
11
+ export type { ProcessedOpenDocument } from "../base/types.js";
12
+ import type { ProcessedOpenDocument } from "../base/types.js";
13
+ /**
14
+ * OpenDocument Processor - handles .odt, .ods, .odp files
15
+ *
16
+ * OpenDocument files are ZIP archives containing XML content.
17
+ * The main content is in content.xml within the archive.
18
+ *
19
+ * Priority: ~105 (between Word and Text)
20
+ */
21
+ export declare class OpenDocumentProcessor extends BaseFileProcessor<ProcessedOpenDocument> {
22
+ constructor();
23
+ /**
24
+ * Validate that the file is a valid ZIP archive (OpenDocument format)
25
+ */
26
+ protected validateDownloadedFile(buffer: Buffer, _fileInfo: FileInfo): Promise<string | null>;
27
+ /**
28
+ * Build the processed result by extracting content from the OpenDocument
29
+ */
30
+ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedOpenDocument;
31
+ /**
32
+ * Decode HTML entities in a single pass to prevent double-unescaping.
33
+ * Sequential replacement is vulnerable: "&amp;lt;" → "&lt;" → "<"
34
+ * Single-pass avoids this by replacing each entity exactly once.
35
+ */
36
+ private decodeHtmlEntities;
37
+ /**
38
+ * Extract text content from OpenDocument XML
39
+ */
40
+ private extractTextFromXml;
41
+ /**
42
+ * Detect the OpenDocument format from file extension
43
+ */
44
+ private detectFormat;
45
+ /**
46
+ * Get file extension from filename
47
+ */
48
+ private getExtension;
49
+ }
50
+ /**
51
+ * Singleton instance of OpenDocumentProcessor
52
+ */
53
+ export declare const openDocumentProcessor: OpenDocumentProcessor;
54
+ /**
55
+ * Check if a file is an OpenDocument file by MIME type or extension
56
+ */
57
+ export declare function isOpenDocumentFile(mimetype: string, filename: string): boolean;
58
+ /**
59
+ * Validate OpenDocument file size against limits
60
+ */
61
+ export declare function validateOpenDocumentSize(sizeBytes: number): boolean;
62
+ /**
63
+ * Process an OpenDocument file
64
+ */
65
+ export declare function processOpenDocument(fileInfo: FileInfo, options?: ProcessOptions): Promise<FileProcessingResult<ProcessedOpenDocument>>;
66
+ /**
67
+ * Get the maximum allowed OpenDocument file size in MB
68
+ */
69
+ export declare function getOpenDocumentMaxSizeMB(): number;