@juspay/neurolink 9.1.1 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. package/CHANGELOG.md +6 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +1 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/fileDetector.d.ts +7 -1
  113. package/dist/lib/utils/fileDetector.js +91 -18
  114. package/dist/lib/utils/json/extract.d.ts +103 -0
  115. package/dist/lib/utils/json/extract.js +249 -0
  116. package/dist/lib/utils/json/index.d.ts +36 -0
  117. package/dist/lib/utils/json/index.js +37 -0
  118. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  119. package/dist/lib/utils/json/safeParse.js +191 -0
  120. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  121. package/dist/lib/utils/messageBuilder.js +15 -7
  122. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  123. package/dist/lib/utils/sanitizers/filename.js +366 -0
  124. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  125. package/dist/lib/utils/sanitizers/html.js +326 -0
  126. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  127. package/dist/lib/utils/sanitizers/index.js +30 -0
  128. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  129. package/dist/lib/utils/sanitizers/svg.js +483 -0
  130. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  131. package/dist/processors/base/BaseFileProcessor.js +613 -0
  132. package/dist/processors/base/index.d.ts +14 -0
  133. package/dist/processors/base/index.js +19 -0
  134. package/dist/processors/base/types.d.ts +593 -0
  135. package/dist/processors/base/types.js +76 -0
  136. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  137. package/dist/processors/cli/fileProcessorCli.js +388 -0
  138. package/dist/processors/cli/index.d.ts +37 -0
  139. package/dist/processors/cli/index.js +49 -0
  140. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  141. package/dist/processors/code/ConfigProcessor.js +400 -0
  142. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  143. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  144. package/dist/processors/code/index.d.ts +44 -0
  145. package/dist/processors/code/index.js +60 -0
  146. package/dist/processors/config/fileTypes.d.ts +283 -0
  147. package/dist/processors/config/fileTypes.js +520 -0
  148. package/dist/processors/config/index.d.ts +32 -0
  149. package/dist/processors/config/index.js +92 -0
  150. package/dist/processors/config/languageMap.d.ts +66 -0
  151. package/dist/processors/config/languageMap.js +410 -0
  152. package/dist/processors/config/mimeTypes.d.ts +376 -0
  153. package/dist/processors/config/mimeTypes.js +338 -0
  154. package/dist/processors/config/sizeLimits.d.ts +194 -0
  155. package/dist/processors/config/sizeLimits.js +246 -0
  156. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  157. package/dist/processors/data/JsonProcessor.js +203 -0
  158. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  159. package/dist/processors/data/XmlProcessor.js +283 -0
  160. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  161. package/dist/processors/data/YamlProcessor.js +294 -0
  162. package/dist/processors/data/index.d.ts +49 -0
  163. package/dist/processors/data/index.js +76 -0
  164. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  165. package/dist/processors/document/ExcelProcessor.js +519 -0
  166. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  167. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  168. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  169. package/dist/processors/document/RtfProcessor.js +361 -0
  170. package/dist/processors/document/WordProcessor.d.ts +168 -0
  171. package/dist/processors/document/WordProcessor.js +353 -0
  172. package/dist/processors/document/index.d.ts +54 -0
  173. package/dist/processors/document/index.js +90 -0
  174. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  175. package/dist/processors/errors/FileErrorCode.js +255 -0
  176. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  177. package/dist/processors/errors/errorHelpers.js +378 -0
  178. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  179. package/dist/processors/errors/errorSerializer.js +507 -0
  180. package/dist/processors/errors/index.d.ts +46 -0
  181. package/dist/processors/errors/index.js +49 -0
  182. package/dist/processors/index.d.ts +76 -0
  183. package/dist/processors/index.js +112 -0
  184. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  185. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  186. package/dist/processors/integration/index.d.ts +42 -0
  187. package/dist/processors/integration/index.js +44 -0
  188. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  189. package/dist/processors/markup/HtmlProcessor.js +249 -0
  190. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  191. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  192. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  193. package/dist/processors/markup/SvgProcessor.js +240 -0
  194. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  195. package/dist/processors/markup/TextProcessor.js +188 -0
  196. package/dist/processors/markup/index.d.ts +66 -0
  197. package/dist/processors/markup/index.js +102 -0
  198. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  199. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  200. package/dist/processors/registry/index.d.ts +12 -0
  201. package/dist/processors/registry/index.js +16 -0
  202. package/dist/processors/registry/types.d.ts +53 -0
  203. package/dist/processors/registry/types.js +10 -0
  204. package/dist/server/utils/validation.d.ts +6 -6
  205. package/dist/types/fileTypes.d.ts +1 -1
  206. package/dist/types/index.d.ts +25 -24
  207. package/dist/types/index.js +21 -20
  208. package/dist/types/modelTypes.d.ts +10 -10
  209. package/dist/types/pptTypes.d.ts +14 -2
  210. package/dist/types/pptTypes.js +16 -0
  211. package/dist/utils/async/delay.d.ts +40 -0
  212. package/dist/utils/async/delay.js +42 -0
  213. package/dist/utils/async/index.d.ts +23 -0
  214. package/dist/utils/async/index.js +23 -0
  215. package/dist/utils/async/retry.d.ts +141 -0
  216. package/dist/utils/async/retry.js +171 -0
  217. package/dist/utils/async/withTimeout.d.ts +73 -0
  218. package/dist/utils/async/withTimeout.js +96 -0
  219. package/dist/utils/fileDetector.d.ts +7 -1
  220. package/dist/utils/fileDetector.js +91 -18
  221. package/dist/utils/json/extract.d.ts +103 -0
  222. package/dist/utils/json/extract.js +248 -0
  223. package/dist/utils/json/index.d.ts +36 -0
  224. package/dist/utils/json/index.js +36 -0
  225. package/dist/utils/json/safeParse.d.ts +137 -0
  226. package/dist/utils/json/safeParse.js +190 -0
  227. package/dist/utils/messageBuilder.d.ts +2 -2
  228. package/dist/utils/messageBuilder.js +15 -7
  229. package/dist/utils/sanitizers/filename.d.ts +137 -0
  230. package/dist/utils/sanitizers/filename.js +365 -0
  231. package/dist/utils/sanitizers/html.d.ts +170 -0
  232. package/dist/utils/sanitizers/html.js +325 -0
  233. package/dist/utils/sanitizers/index.d.ts +26 -0
  234. package/dist/utils/sanitizers/index.js +29 -0
  235. package/dist/utils/sanitizers/svg.d.ts +81 -0
  236. package/dist/utils/sanitizers/svg.js +482 -0
  237. package/package.json +2 -2
@@ -3,11 +3,11 @@
3
3
  * Centralized file detection for all multimodal file types
4
4
  * Uses multi-strategy approach for reliable type identification
5
5
  */
6
- import { request, getGlobalDispatcher, interceptors } from "undici";
7
6
  import { readFile, stat } from "fs/promises";
8
- import { logger } from "./logger.js";
7
+ import { getGlobalDispatcher, interceptors, request } from "undici";
9
8
  import { CSVProcessor } from "./csvProcessor.js";
10
9
  import { ImageProcessor } from "./imageProcessor.js";
10
+ import { logger } from "./logger.js";
11
11
  import { PDFProcessor } from "./pdfProcessor.js";
12
12
  /**
13
13
  * Default retry configuration constants
@@ -106,7 +106,7 @@ async function withRetry(operation, options = {}) {
106
106
  throw error;
107
107
  }
108
108
  // Calculate exponential backoff delay
109
- const delay = retryDelay * Math.pow(2, attempt);
109
+ const delay = retryDelay * 2 ** attempt;
110
110
  logger.debug("Retrying network operation after transient error", {
111
111
  attempt: attempt + 1,
112
112
  maxRetries,
@@ -186,7 +186,7 @@ export class FileDetector {
186
186
  * @returns Processed file result with type and content
187
187
  */
188
188
  static async detectAndProcess(input, options) {
189
- const detection = await this.detect(input, options);
189
+ const detection = await FileDetector.detect(input, options);
190
190
  // FD-018: Comprehensive fallback parsing for extension-less files
191
191
  // When file detection returns "unknown" or doesn't match allowedTypes,
192
192
  // attempt parsing for each allowed type before failing. This handles cases like Slack
@@ -194,12 +194,12 @@ export class FileDetector {
194
194
  if (options?.allowedTypes &&
195
195
  !options.allowedTypes.includes(detection.type)) {
196
196
  // Try fallback parsing for both "unknown" types and when detection doesn't match allowed types
197
- const content = await this.loadContent(input, detection, options);
197
+ const content = await FileDetector.loadContent(input, detection, options);
198
198
  const errors = [];
199
199
  // Try each allowed type in order of specificity
200
200
  for (const allowedType of options.allowedTypes) {
201
201
  try {
202
- const result = await this.tryFallbackParsing(content, allowedType, options);
202
+ const result = await FileDetector.tryFallbackParsing(content, allowedType, options);
203
203
  if (result) {
204
204
  logger.info(`[FileDetector] ✅ ${allowedType.toUpperCase()} fallback successful`);
205
205
  return result;
@@ -214,10 +214,10 @@ export class FileDetector {
214
214
  // All fallbacks failed
215
215
  throw new Error(`File type detection failed and all fallback parsing attempts failed. Original detection: ${detection.type}. Attempted types: ${options.allowedTypes.join(", ")}. Errors: ${errors.join("; ")}`);
216
216
  }
217
- const content = await this.loadContent(input, detection, options);
217
+ const content = await FileDetector.loadContent(input, detection, options);
218
218
  // Extract CSV-specific options from FileDetectorOptions
219
219
  const csvOptions = options?.csvOptions;
220
- return await this.processFile(content, detection, csvOptions, options?.provider);
220
+ return await FileDetector.processFile(content, detection, csvOptions, options?.provider);
221
221
  }
222
222
  /**
223
223
  * Try fallback parsing for a specific file type
@@ -237,11 +237,11 @@ export class FileDetector {
237
237
  // Try text parsing - check if content is valid UTF-8 text
238
238
  const textContent = content.toString("utf-8");
239
239
  // Validate it's actually text (no null bytes, mostly printable)
240
- if (this.isValidText(textContent)) {
240
+ if (FileDetector.isValidText(textContent)) {
241
241
  return {
242
242
  type: "text",
243
243
  content: textContent,
244
- mimeType: this.guessTextMimeType(textContent),
244
+ mimeType: FileDetector.guessTextMimeType(textContent),
245
245
  metadata: {
246
246
  confidence: 70,
247
247
  size: content.length,
@@ -307,7 +307,7 @@ export class FileDetector {
307
307
  }
308
308
  }
309
309
  // Check for XML/HTML using stricter detection
310
- if (this.looksLikeXMLStrict(trimmed)) {
310
+ if (FileDetector.looksLikeXMLStrict(trimmed)) {
311
311
  const isHTML = trimmed.includes("<!DOCTYPE html") ||
312
312
  trimmed.toLowerCase().includes("<html") ||
313
313
  trimmed.includes("<head") ||
@@ -315,7 +315,7 @@ export class FileDetector {
315
315
  return isHTML ? "text/html" : "application/xml";
316
316
  }
317
317
  // Check for YAML using robust multi-indicator detection
318
- if (this.looksLikeYAMLStrict(trimmed)) {
318
+ if (FileDetector.looksLikeYAMLStrict(trimmed)) {
319
319
  return "application/yaml";
320
320
  }
321
321
  // Default to plain text
@@ -427,13 +427,13 @@ export class FileDetector {
427
427
  }
428
428
  switch (source) {
429
429
  case "url":
430
- return await this.loadFromURL(input, options);
430
+ return await FileDetector.loadFromURL(input, options);
431
431
  case "path":
432
- return await this.loadFromPath(input, options);
432
+ return await FileDetector.loadFromPath(input, options);
433
433
  case "buffer":
434
434
  return input;
435
435
  case "datauri":
436
- return this.loadFromDataURI(input);
436
+ return FileDetector.loadFromDataURI(input);
437
437
  default:
438
438
  throw new Error(`Unknown source: ${source}`);
439
439
  }
@@ -454,6 +454,10 @@ export class FileDetector {
454
454
  return await ImageProcessor.process(content);
455
455
  case "pdf":
456
456
  return await PDFProcessor.process(content, { provider });
457
+ case "svg":
458
+ // SVG is processed as text content (sanitized XML markup)
459
+ // AI providers don't support SVG as image format, so we extract text content
460
+ return await FileDetector.processSvgAsText(content, detection);
457
461
  case "text":
458
462
  return {
459
463
  type: "text",
@@ -465,12 +469,74 @@ export class FileDetector {
465
469
  throw new Error(`Unsupported file type: ${detection.type}`);
466
470
  }
467
471
  }
472
+ /**
473
+ * Process SVG file as text content
474
+ * Uses SvgProcessor for security sanitization (removes XSS vectors)
475
+ * Returns sanitized SVG markup as text for AI analysis
476
+ */
477
+ static async processSvgAsText(content, detection) {
478
+ try {
479
+ // Dynamic import to avoid circular dependencies
480
+ const { processSvg } = await import("../processors/markup/SvgProcessor.js");
481
+ const result = await processSvg({
482
+ id: "svg-file",
483
+ name: detection.metadata.filename || "image.svg",
484
+ mimetype: "image/svg+xml",
485
+ size: content.length,
486
+ buffer: content,
487
+ });
488
+ if (result.success && result.data) {
489
+ logger.info(`[FileDetector] SVG processed as text: ${detection.metadata.filename || "image.svg"}`);
490
+ return {
491
+ type: "svg",
492
+ content: result.data.textContent, // Sanitized SVG content
493
+ mimeType: "image/svg+xml",
494
+ metadata: {
495
+ confidence: detection.metadata.confidence,
496
+ size: content.length,
497
+ filename: detection.metadata.filename,
498
+ extension: detection.extension,
499
+ },
500
+ };
501
+ }
502
+ else {
503
+ // Fail closed: return safe empty SVG instead of raw unsanitized content
504
+ logger.warn(`[FileDetector] SVG processor failed, returning safe empty SVG: ${result.error?.userMessage}`);
505
+ return {
506
+ type: "svg",
507
+ content: '<svg xmlns="http://www.w3.org/2000/svg"></svg>',
508
+ mimeType: "image/svg+xml",
509
+ metadata: {
510
+ confidence: detection.metadata.confidence,
511
+ size: content.length,
512
+ filename: detection.metadata.filename,
513
+ extension: detection.extension,
514
+ },
515
+ };
516
+ }
517
+ }
518
+ catch (error) {
519
+ // Fail closed: return safe empty SVG instead of raw unsanitized content
520
+ logger.warn(`[FileDetector] SVG processor not available, returning safe empty SVG: ${error instanceof Error ? error.message : String(error)}`);
521
+ return {
522
+ type: "svg",
523
+ content: '<svg xmlns="http://www.w3.org/2000/svg"></svg>',
524
+ mimeType: "image/svg+xml",
525
+ metadata: {
526
+ confidence: detection.metadata.confidence,
527
+ size: content.length,
528
+ filename: detection.metadata.filename,
529
+ extension: detection.extension,
530
+ },
531
+ };
532
+ }
533
+ }
468
534
  /**
469
535
  * Load file from URL with automatic retry on transient network errors
470
536
  */
471
537
  static async loadFromURL(url, options) {
472
538
  const maxSize = options?.maxSize || 10 * 1024 * 1024;
473
- const timeout = options?.timeout || this.DEFAULT_NETWORK_TIMEOUT;
539
+ const timeout = options?.timeout || FileDetector.DEFAULT_NETWORK_TIMEOUT;
474
540
  const maxRetries = options?.maxRetries ?? DEFAULT_MAX_RETRIES;
475
541
  const retryDelay = options?.retryDelay ?? DEFAULT_RETRY_DELAY;
476
542
  return withRetry(async () => {
@@ -627,6 +693,11 @@ class MimeTypeStrategy {
627
693
  if (mime.includes("text/tab-separated-values")) {
628
694
  return "csv";
629
695
  }
696
+ // SVG is processed as text/markup, NOT as image
697
+ // Must check before generic image/ check
698
+ if (mime.includes("image/svg+xml")) {
699
+ return "svg";
700
+ }
630
701
  if (mime.includes("image/")) {
631
702
  return "image";
632
703
  }
@@ -675,7 +746,9 @@ class ExtensionStrategy {
675
746
  bmp: "image",
676
747
  tiff: "image",
677
748
  tif: "image",
678
- svg: "image",
749
+ // SVG is handled as text/markup, NOT as image
750
+ // AI providers don't support SVG format, so we process it as sanitized text
751
+ svg: "svg",
679
752
  avif: "image",
680
753
  pdf: "pdf",
681
754
  txt: "text",
@@ -865,7 +938,7 @@ class ContentHeuristicStrategy {
865
938
  // (data values like IDs, codes, numbers - not varied content)
866
939
  const lengths = lines.map((l) => l.length);
867
940
  const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
868
- const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) /
941
+ const variance = lengths.reduce((sum, len) => sum + (len - avgLength) ** 2, 0) /
869
942
  lengths.length;
870
943
  const stdDev = Math.sqrt(variance);
871
944
  // Single-column CSVs can contain varied data (names, cities, emails, etc.)
@@ -0,0 +1,103 @@
1
+ /**
2
+ * JSON Extraction Utilities
3
+ *
4
+ * Utilities for extracting JSON from mixed text content.
5
+ * Particularly useful for parsing AI responses that contain JSON within prose.
6
+ */
7
+ /**
8
+ * Extract JSON string from text that may contain surrounding content.
9
+ *
10
+ * Searches for valid JSON in the following order:
11
+ * 1. Direct parse of the entire text
12
+ * 2. JSON within markdown code blocks (```json ... ``` or ``` ... ```)
13
+ * 3. JSON object pattern ({ ... })
14
+ * 4. JSON array pattern ([ ... ])
15
+ *
16
+ * @param text - Text that may contain JSON
17
+ * @returns Extracted JSON string or null if none found
18
+ *
19
+ * @example
20
+ * ```typescript
21
+ * const response = "Here's the data: {\"name\": \"test\"} Let me know if you need more.";
22
+ * const json = extractJsonStringFromText(response);
23
+ * // Returns: '{"name": "test"}'
24
+ * ```
25
+ */
26
+ export declare function extractJsonStringFromText(text: string): string | null;
27
+ /**
28
+ * Extract and parse JSON from mixed text content.
29
+ *
30
+ * Useful for parsing AI responses that contain JSON within prose.
31
+ * Combines extraction and parsing in one step.
32
+ *
33
+ * @param text - Text that may contain JSON
34
+ * @returns Parsed JSON value or null if not found/invalid
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * const response = `
39
+ * Here is your configuration:
40
+ * \`\`\`json
41
+ * {"theme": "dark", "fontSize": 14}
42
+ * \`\`\`
43
+ * Let me know if you need changes.
44
+ * `;
45
+ * const config = extractJsonFromText(response);
46
+ * // Returns: { theme: "dark", fontSize: 14 }
47
+ * ```
48
+ */
49
+ export declare function extractJsonFromText(text: string): unknown | null;
50
+ export type { JsonTypeGuard } from "../../processors/base/types.js";
51
+ import type { JsonTypeGuard } from "../../processors/base/types.js";
52
+ /**
53
+ * Parse JSON from text with optional type validation.
54
+ *
55
+ * Extracts JSON from text and optionally validates it against a type guard.
56
+ * Useful when you need type-safe parsing of AI responses.
57
+ *
58
+ * @param text - Text that may contain JSON
59
+ * @param validator - Optional type guard to validate the parsed result
60
+ * @returns Parsed and validated JSON or null if not found/invalid/fails validation
61
+ *
62
+ * @example
63
+ * ```typescript
64
+ * interface UserConfig {
65
+ * theme: string;
66
+ * fontSize: number;
67
+ * }
68
+ *
69
+ * function isUserConfig(obj: unknown): obj is UserConfig {
70
+ * return (
71
+ * typeof obj === 'object' &&
72
+ * obj !== null &&
73
+ * 'theme' in obj &&
74
+ * 'fontSize' in obj &&
75
+ * typeof (obj as UserConfig).theme === 'string' &&
76
+ * typeof (obj as UserConfig).fontSize === 'number'
77
+ * );
78
+ * }
79
+ *
80
+ * const config = parseJsonFromText<UserConfig>(aiResponse, isUserConfig);
81
+ * if (config) {
82
+ * // config is typed as UserConfig
83
+ * console.log(config.theme, config.fontSize);
84
+ * }
85
+ * ```
86
+ */
87
+ export declare function parseJsonFromText<T>(text: string, validator?: JsonTypeGuard<T>): T | null;
88
+ /**
89
+ * Extract all JSON objects/arrays from text.
90
+ *
91
+ * Useful when text contains multiple JSON blocks.
92
+ *
93
+ * @param text - Text that may contain multiple JSON values
94
+ * @returns Array of parsed JSON values
95
+ *
96
+ * @example
97
+ * ```typescript
98
+ * const text = 'First: {"a": 1} Second: {"b": 2}';
99
+ * const results = extractAllJsonFromText(text);
100
+ * // Returns: [{ a: 1 }, { b: 2 }]
101
+ * ```
102
+ */
103
+ export declare function extractAllJsonFromText(text: string): unknown[];
@@ -0,0 +1,249 @@
1
+ /**
2
+ * JSON Extraction Utilities
3
+ *
4
+ * Utilities for extracting JSON from mixed text content.
5
+ * Particularly useful for parsing AI responses that contain JSON within prose.
6
+ */
7
+ import { parseJsonOrNull } from "./safeParse.js";
8
+ /**
9
+ * Extract JSON string from text that may contain surrounding content.
10
+ *
11
+ * Searches for valid JSON in the following order:
12
+ * 1. Direct parse of the entire text
13
+ * 2. JSON within markdown code blocks (```json ... ``` or ``` ... ```)
14
+ * 3. JSON object pattern ({ ... })
15
+ * 4. JSON array pattern ([ ... ])
16
+ *
17
+ * @param text - Text that may contain JSON
18
+ * @returns Extracted JSON string or null if none found
19
+ *
20
+ * @example
21
+ * ```typescript
22
+ * const response = "Here's the data: {\"name\": \"test\"} Let me know if you need more.";
23
+ * const json = extractJsonStringFromText(response);
24
+ * // Returns: '{"name": "test"}'
25
+ * ```
26
+ */
27
+ export function extractJsonStringFromText(text) {
28
+ // Try direct parse first - maybe the whole text is valid JSON
29
+ try {
30
+ JSON.parse(text);
31
+ return text;
32
+ }
33
+ catch {
34
+ // Continue to extraction patterns
35
+ }
36
+ // Try to find JSON in code blocks (```json ... ``` or ``` ... ```)
37
+ const codeBlockMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
38
+ if (codeBlockMatch?.[1]) {
39
+ const potentialJson = codeBlockMatch[1].trim();
40
+ try {
41
+ JSON.parse(potentialJson);
42
+ return potentialJson;
43
+ }
44
+ catch {
45
+ // Continue to other patterns
46
+ }
47
+ }
48
+ // Try to find JSON object or array pattern using non-greedy iterative scan.
49
+ // Note: [\s\S]*? is non-greedy but can still produce over-spanning matches
50
+ // in texts with many braces. This is acceptable as we try-parse each candidate
51
+ // and move to the next on failure. A bracket-balancing parser would be more
52
+ // precise but significantly more complex for marginal benefit.
53
+ const candidateRegex = /(\{[\s\S]*?\}|\[[\s\S]*?\])/g;
54
+ let candidate;
55
+ while ((candidate = candidateRegex.exec(text)) !== null) {
56
+ try {
57
+ JSON.parse(candidate[1]);
58
+ return candidate[1];
59
+ }
60
+ catch {
61
+ // Try next candidate
62
+ }
63
+ }
64
+ return null;
65
+ }
66
+ /**
67
+ * Extract and parse JSON from mixed text content.
68
+ *
69
+ * Useful for parsing AI responses that contain JSON within prose.
70
+ * Combines extraction and parsing in one step.
71
+ *
72
+ * @param text - Text that may contain JSON
73
+ * @returns Parsed JSON value or null if not found/invalid
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * const response = `
78
+ * Here is your configuration:
79
+ * \`\`\`json
80
+ * {"theme": "dark", "fontSize": 14}
81
+ * \`\`\`
82
+ * Let me know if you need changes.
83
+ * `;
84
+ * const config = extractJsonFromText(response);
85
+ * // Returns: { theme: "dark", fontSize: 14 }
86
+ * ```
87
+ */
88
+ export function extractJsonFromText(text) {
89
+ const jsonString = extractJsonStringFromText(text);
90
+ if (!jsonString) {
91
+ return null;
92
+ }
93
+ return parseJsonOrNull(jsonString);
94
+ }
95
+ /**
96
+ * Parse JSON from text with optional type validation.
97
+ *
98
+ * Extracts JSON from text and optionally validates it against a type guard.
99
+ * Useful when you need type-safe parsing of AI responses.
100
+ *
101
+ * @param text - Text that may contain JSON
102
+ * @param validator - Optional type guard to validate the parsed result
103
+ * @returns Parsed and validated JSON or null if not found/invalid/fails validation
104
+ *
105
+ * @example
106
+ * ```typescript
107
+ * interface UserConfig {
108
+ * theme: string;
109
+ * fontSize: number;
110
+ * }
111
+ *
112
+ * function isUserConfig(obj: unknown): obj is UserConfig {
113
+ * return (
114
+ * typeof obj === 'object' &&
115
+ * obj !== null &&
116
+ * 'theme' in obj &&
117
+ * 'fontSize' in obj &&
118
+ * typeof (obj as UserConfig).theme === 'string' &&
119
+ * typeof (obj as UserConfig).fontSize === 'number'
120
+ * );
121
+ * }
122
+ *
123
+ * const config = parseJsonFromText<UserConfig>(aiResponse, isUserConfig);
124
+ * if (config) {
125
+ * // config is typed as UserConfig
126
+ * console.log(config.theme, config.fontSize);
127
+ * }
128
+ * ```
129
+ */
130
+ export function parseJsonFromText(text, validator) {
131
+ const parsed = extractJsonFromText(text);
132
+ if (parsed === null) {
133
+ return null;
134
+ }
135
+ if (validator && !validator(parsed)) {
136
+ return null;
137
+ }
138
+ return parsed;
139
+ }
140
+ /**
141
+ * Extract all JSON objects/arrays from text.
142
+ *
143
+ * Useful when text contains multiple JSON blocks.
144
+ *
145
+ * @param text - Text that may contain multiple JSON values
146
+ * @returns Array of parsed JSON values
147
+ *
148
+ * @example
149
+ * ```typescript
150
+ * const text = 'First: {"a": 1} Second: {"b": 2}';
151
+ * const results = extractAllJsonFromText(text);
152
+ * // Returns: [{ a: 1 }, { b: 2 }]
153
+ * ```
154
+ */
155
+ export function extractAllJsonFromText(text) {
156
+ const results = [];
157
+ // Extract from code blocks first
158
+ const codeBlockRegex = /```(?:json)?\s*([\s\S]*?)```/g;
159
+ let match = codeBlockRegex.exec(text);
160
+ while (match !== null) {
161
+ if (match[1]) {
162
+ const potentialJson = match[1].trim();
163
+ try {
164
+ results.push(JSON.parse(potentialJson));
165
+ }
166
+ catch {
167
+ // Not valid JSON, skip
168
+ }
169
+ }
170
+ match = codeBlockRegex.exec(text);
171
+ }
172
+ // If we found code blocks, return those results
173
+ if (results.length > 0) {
174
+ return results;
175
+ }
176
+ // Otherwise, try to find all JSON objects and arrays
177
+ // This is a simplified approach - it finds top-level JSON structures
178
+ const remaining = text;
179
+ let searchStart = 0;
180
+ while (searchStart < remaining.length) {
181
+ // Find next potential JSON start
182
+ const objectStart = remaining.indexOf("{", searchStart);
183
+ const arrayStart = remaining.indexOf("[", searchStart);
184
+ let start;
185
+ let isObject;
186
+ if (objectStart === -1 && arrayStart === -1) {
187
+ break;
188
+ }
189
+ else if (objectStart === -1) {
190
+ start = arrayStart;
191
+ isObject = false;
192
+ }
193
+ else if (arrayStart === -1) {
194
+ start = objectStart;
195
+ isObject = true;
196
+ }
197
+ else {
198
+ start = Math.min(objectStart, arrayStart);
199
+ isObject = objectStart < arrayStart;
200
+ }
201
+ // Try to find matching end
202
+ const openChar = isObject ? "{" : "[";
203
+ const closeChar = isObject ? "}" : "]";
204
+ let depth = 0;
205
+ let inString = false;
206
+ let escapeNext = false;
207
+ for (let i = start; i < remaining.length; i++) {
208
+ const char = remaining[i];
209
+ if (escapeNext) {
210
+ escapeNext = false;
211
+ continue;
212
+ }
213
+ if (char === "\\") {
214
+ escapeNext = true;
215
+ continue;
216
+ }
217
+ if (char === '"') {
218
+ inString = !inString;
219
+ continue;
220
+ }
221
+ if (inString) {
222
+ continue;
223
+ }
224
+ if (char === openChar) {
225
+ depth++;
226
+ }
227
+ else if (char === closeChar) {
228
+ depth--;
229
+ if (depth === 0) {
230
+ const potentialJson = remaining.substring(start, i + 1);
231
+ try {
232
+ results.push(JSON.parse(potentialJson));
233
+ searchStart = i + 1;
234
+ }
235
+ catch {
236
+ searchStart = start + 1;
237
+ }
238
+ break;
239
+ }
240
+ }
241
+ }
242
+ if (depth !== 0) {
243
+ // Unbalanced brackets, move past this start
244
+ searchStart = start + 1;
245
+ }
246
+ }
247
+ return results;
248
+ }
249
+ //# sourceMappingURL=extract.js.map
@@ -0,0 +1,36 @@
1
+ /**
2
+ * JSON Utilities
3
+ *
4
+ * Centralized JSON parsing, serialization, and extraction utilities.
5
+ * Provides safe operations that handle errors gracefully without throwing.
6
+ *
7
+ * @module json
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import {
12
+ * safeParseJson,
13
+ * parseJsonOrNull,
14
+ * isValidJson,
15
+ * safeStringify,
16
+ * extractJsonFromText,
17
+ * parseJsonFromText,
18
+ * } from './utils/json/index.js';
19
+ *
20
+ * // Safe parsing with fallback
21
+ * const config = safeParseJson(userInput, { theme: 'light' });
22
+ *
23
+ * // Check if valid before parsing
24
+ * if (isValidJson(str)) {
25
+ * const data = JSON.parse(str);
26
+ * }
27
+ *
28
+ * // Handle circular references
29
+ * const json = safeStringify(complexObject, 2);
30
+ *
31
+ * // Extract JSON from AI responses
32
+ * const result = extractJsonFromText(aiResponse);
33
+ * ```
34
+ */
35
+ export * from "./extract.js";
36
+ export * from "./safeParse.js";
@@ -0,0 +1,37 @@
1
+ /**
2
+ * JSON Utilities
3
+ *
4
+ * Centralized JSON parsing, serialization, and extraction utilities.
5
+ * Provides safe operations that handle errors gracefully without throwing.
6
+ *
7
+ * @module json
8
+ *
9
+ * @example
10
+ * ```typescript
11
+ * import {
12
+ * safeParseJson,
13
+ * parseJsonOrNull,
14
+ * isValidJson,
15
+ * safeStringify,
16
+ * extractJsonFromText,
17
+ * parseJsonFromText,
18
+ * } from './utils/json/index.js';
19
+ *
20
+ * // Safe parsing with fallback
21
+ * const config = safeParseJson(userInput, { theme: 'light' });
22
+ *
23
+ * // Check if valid before parsing
24
+ * if (isValidJson(str)) {
25
+ * const data = JSON.parse(str);
26
+ * }
27
+ *
28
+ * // Handle circular references
29
+ * const json = safeStringify(complexObject, 2);
30
+ *
31
+ * // Extract JSON from AI responses
32
+ * const result = extractJsonFromText(aiResponse);
33
+ * ```
34
+ */
35
+ export * from "./extract.js";
36
+ export * from "./safeParse.js";
37
+ //# sourceMappingURL=index.js.map