@juspay/neurolink 9.1.0 → 9.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. package/CHANGELOG.md +12 -0
  2. package/README.md +54 -7
  3. package/dist/agent/directTools.d.ts +3 -3
  4. package/dist/cli/commands/config.d.ts +6 -6
  5. package/dist/image-gen/ImageGenService.d.ts +143 -0
  6. package/dist/image-gen/ImageGenService.js +345 -0
  7. package/dist/image-gen/imageGenTools.d.ts +126 -0
  8. package/dist/image-gen/imageGenTools.js +304 -0
  9. package/dist/image-gen/index.d.ts +46 -0
  10. package/dist/image-gen/index.js +48 -0
  11. package/dist/image-gen/types.d.ts +237 -0
  12. package/dist/image-gen/types.js +24 -0
  13. package/dist/lib/agent/directTools.d.ts +3 -3
  14. package/dist/lib/image-gen/ImageGenService.d.ts +143 -0
  15. package/dist/lib/image-gen/ImageGenService.js +346 -0
  16. package/dist/lib/image-gen/imageGenTools.d.ts +126 -0
  17. package/dist/lib/image-gen/imageGenTools.js +305 -0
  18. package/dist/lib/image-gen/index.d.ts +46 -0
  19. package/dist/lib/image-gen/index.js +49 -0
  20. package/dist/lib/image-gen/types.d.ts +237 -0
  21. package/dist/lib/image-gen/types.js +25 -0
  22. package/dist/lib/processors/base/BaseFileProcessor.d.ts +273 -0
  23. package/dist/lib/processors/base/BaseFileProcessor.js +614 -0
  24. package/dist/lib/processors/base/index.d.ts +14 -0
  25. package/dist/lib/processors/base/index.js +20 -0
  26. package/dist/lib/processors/base/types.d.ts +593 -0
  27. package/dist/lib/processors/base/types.js +77 -0
  28. package/dist/lib/processors/cli/fileProcessorCli.d.ts +163 -0
  29. package/dist/lib/processors/cli/fileProcessorCli.js +389 -0
  30. package/dist/lib/processors/cli/index.d.ts +37 -0
  31. package/dist/lib/processors/cli/index.js +50 -0
  32. package/dist/lib/processors/code/ConfigProcessor.d.ts +171 -0
  33. package/dist/lib/processors/code/ConfigProcessor.js +401 -0
  34. package/dist/lib/processors/code/SourceCodeProcessor.d.ts +174 -0
  35. package/dist/lib/processors/code/SourceCodeProcessor.js +305 -0
  36. package/dist/lib/processors/code/index.d.ts +44 -0
  37. package/dist/lib/processors/code/index.js +61 -0
  38. package/dist/lib/processors/config/fileTypes.d.ts +283 -0
  39. package/dist/lib/processors/config/fileTypes.js +521 -0
  40. package/dist/lib/processors/config/index.d.ts +32 -0
  41. package/dist/lib/processors/config/index.js +93 -0
  42. package/dist/lib/processors/config/languageMap.d.ts +66 -0
  43. package/dist/lib/processors/config/languageMap.js +411 -0
  44. package/dist/lib/processors/config/mimeTypes.d.ts +376 -0
  45. package/dist/lib/processors/config/mimeTypes.js +339 -0
  46. package/dist/lib/processors/config/sizeLimits.d.ts +194 -0
  47. package/dist/lib/processors/config/sizeLimits.js +247 -0
  48. package/dist/lib/processors/data/JsonProcessor.d.ts +122 -0
  49. package/dist/lib/processors/data/JsonProcessor.js +204 -0
  50. package/dist/lib/processors/data/XmlProcessor.d.ts +160 -0
  51. package/dist/lib/processors/data/XmlProcessor.js +284 -0
  52. package/dist/lib/processors/data/YamlProcessor.d.ts +163 -0
  53. package/dist/lib/processors/data/YamlProcessor.js +295 -0
  54. package/dist/lib/processors/data/index.d.ts +49 -0
  55. package/dist/lib/processors/data/index.js +77 -0
  56. package/dist/lib/processors/document/ExcelProcessor.d.ts +238 -0
  57. package/dist/lib/processors/document/ExcelProcessor.js +520 -0
  58. package/dist/lib/processors/document/OpenDocumentProcessor.d.ts +69 -0
  59. package/dist/lib/processors/document/OpenDocumentProcessor.js +211 -0
  60. package/dist/lib/processors/document/RtfProcessor.d.ts +152 -0
  61. package/dist/lib/processors/document/RtfProcessor.js +362 -0
  62. package/dist/lib/processors/document/WordProcessor.d.ts +168 -0
  63. package/dist/lib/processors/document/WordProcessor.js +354 -0
  64. package/dist/lib/processors/document/index.d.ts +54 -0
  65. package/dist/lib/processors/document/index.js +91 -0
  66. package/dist/lib/processors/errors/FileErrorCode.d.ts +98 -0
  67. package/dist/lib/processors/errors/FileErrorCode.js +256 -0
  68. package/dist/lib/processors/errors/errorHelpers.d.ts +151 -0
  69. package/dist/lib/processors/errors/errorHelpers.js +379 -0
  70. package/dist/lib/processors/errors/errorSerializer.d.ts +139 -0
  71. package/dist/lib/processors/errors/errorSerializer.js +508 -0
  72. package/dist/lib/processors/errors/index.d.ts +46 -0
  73. package/dist/lib/processors/errors/index.js +50 -0
  74. package/dist/lib/processors/index.d.ts +76 -0
  75. package/dist/lib/processors/index.js +113 -0
  76. package/dist/lib/processors/integration/FileProcessorIntegration.d.ts +244 -0
  77. package/dist/lib/processors/integration/FileProcessorIntegration.js +273 -0
  78. package/dist/lib/processors/integration/index.d.ts +42 -0
  79. package/dist/lib/processors/integration/index.js +45 -0
  80. package/dist/lib/processors/markup/HtmlProcessor.d.ts +169 -0
  81. package/dist/lib/processors/markup/HtmlProcessor.js +250 -0
  82. package/dist/lib/processors/markup/MarkdownProcessor.d.ts +165 -0
  83. package/dist/lib/processors/markup/MarkdownProcessor.js +245 -0
  84. package/dist/lib/processors/markup/SvgProcessor.d.ts +156 -0
  85. package/dist/lib/processors/markup/SvgProcessor.js +241 -0
  86. package/dist/lib/processors/markup/TextProcessor.d.ts +135 -0
  87. package/dist/lib/processors/markup/TextProcessor.js +189 -0
  88. package/dist/lib/processors/markup/index.d.ts +66 -0
  89. package/dist/lib/processors/markup/index.js +103 -0
  90. package/dist/lib/processors/registry/ProcessorRegistry.d.ts +334 -0
  91. package/dist/lib/processors/registry/ProcessorRegistry.js +609 -0
  92. package/dist/lib/processors/registry/index.d.ts +12 -0
  93. package/dist/lib/processors/registry/index.js +17 -0
  94. package/dist/lib/processors/registry/types.d.ts +53 -0
  95. package/dist/lib/processors/registry/types.js +11 -0
  96. package/dist/lib/providers/sagemaker/language-model.d.ts +2 -2
  97. package/dist/lib/server/utils/validation.d.ts +6 -6
  98. package/dist/lib/types/fileTypes.d.ts +51 -1
  99. package/dist/lib/types/index.d.ts +25 -24
  100. package/dist/lib/types/index.js +21 -20
  101. package/dist/lib/types/modelTypes.d.ts +18 -18
  102. package/dist/lib/types/pptTypes.d.ts +14 -2
  103. package/dist/lib/types/pptTypes.js +16 -0
  104. package/dist/lib/utils/async/delay.d.ts +40 -0
  105. package/dist/lib/utils/async/delay.js +43 -0
  106. package/dist/lib/utils/async/index.d.ts +23 -0
  107. package/dist/lib/utils/async/index.js +24 -0
  108. package/dist/lib/utils/async/retry.d.ts +141 -0
  109. package/dist/lib/utils/async/retry.js +172 -0
  110. package/dist/lib/utils/async/withTimeout.d.ts +73 -0
  111. package/dist/lib/utils/async/withTimeout.js +97 -0
  112. package/dist/lib/utils/csvProcessor.js +442 -0
  113. package/dist/lib/utils/fileDetector.d.ts +7 -1
  114. package/dist/lib/utils/fileDetector.js +91 -18
  115. package/dist/lib/utils/json/extract.d.ts +103 -0
  116. package/dist/lib/utils/json/extract.js +249 -0
  117. package/dist/lib/utils/json/index.d.ts +36 -0
  118. package/dist/lib/utils/json/index.js +37 -0
  119. package/dist/lib/utils/json/safeParse.d.ts +137 -0
  120. package/dist/lib/utils/json/safeParse.js +191 -0
  121. package/dist/lib/utils/messageBuilder.d.ts +2 -2
  122. package/dist/lib/utils/messageBuilder.js +15 -7
  123. package/dist/lib/utils/sanitizers/filename.d.ts +137 -0
  124. package/dist/lib/utils/sanitizers/filename.js +366 -0
  125. package/dist/lib/utils/sanitizers/html.d.ts +170 -0
  126. package/dist/lib/utils/sanitizers/html.js +326 -0
  127. package/dist/lib/utils/sanitizers/index.d.ts +26 -0
  128. package/dist/lib/utils/sanitizers/index.js +30 -0
  129. package/dist/lib/utils/sanitizers/svg.d.ts +81 -0
  130. package/dist/lib/utils/sanitizers/svg.js +483 -0
  131. package/dist/processors/base/BaseFileProcessor.d.ts +273 -0
  132. package/dist/processors/base/BaseFileProcessor.js +613 -0
  133. package/dist/processors/base/index.d.ts +14 -0
  134. package/dist/processors/base/index.js +19 -0
  135. package/dist/processors/base/types.d.ts +593 -0
  136. package/dist/processors/base/types.js +76 -0
  137. package/dist/processors/cli/fileProcessorCli.d.ts +163 -0
  138. package/dist/processors/cli/fileProcessorCli.js +388 -0
  139. package/dist/processors/cli/index.d.ts +37 -0
  140. package/dist/processors/cli/index.js +49 -0
  141. package/dist/processors/code/ConfigProcessor.d.ts +171 -0
  142. package/dist/processors/code/ConfigProcessor.js +400 -0
  143. package/dist/processors/code/SourceCodeProcessor.d.ts +174 -0
  144. package/dist/processors/code/SourceCodeProcessor.js +304 -0
  145. package/dist/processors/code/index.d.ts +44 -0
  146. package/dist/processors/code/index.js +60 -0
  147. package/dist/processors/config/fileTypes.d.ts +283 -0
  148. package/dist/processors/config/fileTypes.js +520 -0
  149. package/dist/processors/config/index.d.ts +32 -0
  150. package/dist/processors/config/index.js +92 -0
  151. package/dist/processors/config/languageMap.d.ts +66 -0
  152. package/dist/processors/config/languageMap.js +410 -0
  153. package/dist/processors/config/mimeTypes.d.ts +376 -0
  154. package/dist/processors/config/mimeTypes.js +338 -0
  155. package/dist/processors/config/sizeLimits.d.ts +194 -0
  156. package/dist/processors/config/sizeLimits.js +246 -0
  157. package/dist/processors/data/JsonProcessor.d.ts +122 -0
  158. package/dist/processors/data/JsonProcessor.js +203 -0
  159. package/dist/processors/data/XmlProcessor.d.ts +160 -0
  160. package/dist/processors/data/XmlProcessor.js +283 -0
  161. package/dist/processors/data/YamlProcessor.d.ts +163 -0
  162. package/dist/processors/data/YamlProcessor.js +294 -0
  163. package/dist/processors/data/index.d.ts +49 -0
  164. package/dist/processors/data/index.js +76 -0
  165. package/dist/processors/document/ExcelProcessor.d.ts +238 -0
  166. package/dist/processors/document/ExcelProcessor.js +519 -0
  167. package/dist/processors/document/OpenDocumentProcessor.d.ts +69 -0
  168. package/dist/processors/document/OpenDocumentProcessor.js +210 -0
  169. package/dist/processors/document/RtfProcessor.d.ts +152 -0
  170. package/dist/processors/document/RtfProcessor.js +361 -0
  171. package/dist/processors/document/WordProcessor.d.ts +168 -0
  172. package/dist/processors/document/WordProcessor.js +353 -0
  173. package/dist/processors/document/index.d.ts +54 -0
  174. package/dist/processors/document/index.js +90 -0
  175. package/dist/processors/errors/FileErrorCode.d.ts +98 -0
  176. package/dist/processors/errors/FileErrorCode.js +255 -0
  177. package/dist/processors/errors/errorHelpers.d.ts +151 -0
  178. package/dist/processors/errors/errorHelpers.js +378 -0
  179. package/dist/processors/errors/errorSerializer.d.ts +139 -0
  180. package/dist/processors/errors/errorSerializer.js +507 -0
  181. package/dist/processors/errors/index.d.ts +46 -0
  182. package/dist/processors/errors/index.js +49 -0
  183. package/dist/processors/index.d.ts +76 -0
  184. package/dist/processors/index.js +112 -0
  185. package/dist/processors/integration/FileProcessorIntegration.d.ts +244 -0
  186. package/dist/processors/integration/FileProcessorIntegration.js +272 -0
  187. package/dist/processors/integration/index.d.ts +42 -0
  188. package/dist/processors/integration/index.js +44 -0
  189. package/dist/processors/markup/HtmlProcessor.d.ts +169 -0
  190. package/dist/processors/markup/HtmlProcessor.js +249 -0
  191. package/dist/processors/markup/MarkdownProcessor.d.ts +165 -0
  192. package/dist/processors/markup/MarkdownProcessor.js +244 -0
  193. package/dist/processors/markup/SvgProcessor.d.ts +156 -0
  194. package/dist/processors/markup/SvgProcessor.js +240 -0
  195. package/dist/processors/markup/TextProcessor.d.ts +135 -0
  196. package/dist/processors/markup/TextProcessor.js +188 -0
  197. package/dist/processors/markup/index.d.ts +66 -0
  198. package/dist/processors/markup/index.js +102 -0
  199. package/dist/processors/registry/ProcessorRegistry.d.ts +334 -0
  200. package/dist/processors/registry/ProcessorRegistry.js +608 -0
  201. package/dist/processors/registry/index.d.ts +12 -0
  202. package/dist/processors/registry/index.js +16 -0
  203. package/dist/processors/registry/types.d.ts +53 -0
  204. package/dist/processors/registry/types.js +10 -0
  205. package/dist/server/utils/validation.d.ts +6 -6
  206. package/dist/types/fileTypes.d.ts +51 -1
  207. package/dist/types/index.d.ts +25 -24
  208. package/dist/types/index.js +21 -20
  209. package/dist/types/modelTypes.d.ts +10 -10
  210. package/dist/types/pptTypes.d.ts +14 -2
  211. package/dist/types/pptTypes.js +16 -0
  212. package/dist/utils/async/delay.d.ts +40 -0
  213. package/dist/utils/async/delay.js +42 -0
  214. package/dist/utils/async/index.d.ts +23 -0
  215. package/dist/utils/async/index.js +23 -0
  216. package/dist/utils/async/retry.d.ts +141 -0
  217. package/dist/utils/async/retry.js +171 -0
  218. package/dist/utils/async/withTimeout.d.ts +73 -0
  219. package/dist/utils/async/withTimeout.js +96 -0
  220. package/dist/utils/csvProcessor.js +442 -0
  221. package/dist/utils/fileDetector.d.ts +7 -1
  222. package/dist/utils/fileDetector.js +91 -18
  223. package/dist/utils/json/extract.d.ts +103 -0
  224. package/dist/utils/json/extract.js +248 -0
  225. package/dist/utils/json/index.d.ts +36 -0
  226. package/dist/utils/json/index.js +36 -0
  227. package/dist/utils/json/safeParse.d.ts +137 -0
  228. package/dist/utils/json/safeParse.js +190 -0
  229. package/dist/utils/messageBuilder.d.ts +2 -2
  230. package/dist/utils/messageBuilder.js +15 -7
  231. package/dist/utils/sanitizers/filename.d.ts +137 -0
  232. package/dist/utils/sanitizers/filename.js +365 -0
  233. package/dist/utils/sanitizers/html.d.ts +170 -0
  234. package/dist/utils/sanitizers/html.js +325 -0
  235. package/dist/utils/sanitizers/index.d.ts +26 -0
  236. package/dist/utils/sanitizers/index.js +29 -0
  237. package/dist/utils/sanitizers/svg.d.ts +81 -0
  238. package/dist/utils/sanitizers/svg.js +482 -0
  239. package/package.json +2 -2
@@ -6,6 +6,418 @@
6
6
  import csvParser from "csv-parser";
7
7
  import { Readable } from "stream";
8
8
  import { logger } from "./logger.js";
9
+ // ============================================================================
10
+ // Data Type Detection Patterns
11
+ // ============================================================================
12
+ const DATE_PATTERNS = [
13
+ { regex: /^\d{4}-\d{2}-\d{2}$/, format: "YYYY-MM-DD" },
14
+ { regex: /^\d{2}\/\d{2}\/\d{4}$/, format: "MM/DD/YYYY" },
15
+ { regex: /^\d{2}-\d{2}-\d{4}$/, format: "DD-MM-YYYY" },
16
+ { regex: /^\d{2}\.\d{2}\.\d{4}$/, format: "DD.MM.YYYY" },
17
+ { regex: /^\d{4}\/\d{2}\/\d{2}$/, format: "YYYY/MM/DD" },
18
+ ];
19
+ const DATETIME_PATTERNS = [
20
+ { regex: /^\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}/, format: "ISO8601" },
21
+ { regex: /^\d{2}\/\d{2}\/\d{4} \d{2}:\d{2}/, format: "MM/DD/YYYY HH:mm" },
22
+ ];
23
+ const EMAIL_REGEX = /^[^\s@]+@[^\s@]+\.[^\s@]+$/;
24
+ const URL_REGEX = /^(https?:\/\/|www\.)[^\s]+$/i;
25
+ const INTEGER_REGEX = /^-?\d+$/;
26
+ const FLOAT_REGEX = /^-?\d+\.\d+$/;
27
+ const BOOLEAN_VALUES = new Set([
28
+ "true",
29
+ "false",
30
+ "yes",
31
+ "no",
32
+ "1",
33
+ "0",
34
+ "t",
35
+ "f",
36
+ "y",
37
+ "n",
38
+ ]);
39
+ // ============================================================================
40
+ // Column Name Validation
41
+ // ============================================================================
42
+ /**
43
+ * Validate column name and return issues
44
+ */
45
+ function validateColumnName(name) {
46
+ const issues = [];
47
+ if (!name || name.trim() === "") {
48
+ issues.push("Empty or blank column name");
49
+ return issues;
50
+ }
51
+ if (name !== name.trim()) {
52
+ issues.push("Leading or trailing whitespace");
53
+ }
54
+ if (/^\d/.test(name)) {
55
+ issues.push("Starts with a number");
56
+ }
57
+ if (/[^a-zA-Z0-9_\- ]/.test(name)) {
58
+ issues.push("Contains special characters");
59
+ }
60
+ if (name.length > 64) {
61
+ issues.push("Name exceeds 64 characters");
62
+ }
63
+ if (/\s{2,}/.test(name)) {
64
+ issues.push("Contains multiple consecutive spaces");
65
+ }
66
+ return issues;
67
+ }
68
+ // ============================================================================
69
+ // Data Type Detection
70
+ // ============================================================================
71
+ /**
72
+ * Detect the data type of a single value
73
+ */
74
+ function detectValueType(value) {
75
+ if (value === "" || value === null || value === undefined) {
76
+ return "empty";
77
+ }
78
+ const trimmed = value.trim();
79
+ if (trimmed === "") {
80
+ return "empty";
81
+ }
82
+ // Check boolean first (before numbers since "1" and "0" could be both)
83
+ if (BOOLEAN_VALUES.has(trimmed.toLowerCase())) {
84
+ return "boolean";
85
+ }
86
+ // Check integer
87
+ if (INTEGER_REGEX.test(trimmed)) {
88
+ return "integer";
89
+ }
90
+ // Check float
91
+ if (FLOAT_REGEX.test(trimmed)) {
92
+ return "float";
93
+ }
94
+ // Check email
95
+ if (EMAIL_REGEX.test(trimmed)) {
96
+ return "email";
97
+ }
98
+ // Check URL
99
+ if (URL_REGEX.test(trimmed)) {
100
+ return "url";
101
+ }
102
+ // Check datetime (before date since datetime is more specific)
103
+ for (const pattern of DATETIME_PATTERNS) {
104
+ if (pattern.regex.test(trimmed)) {
105
+ return "datetime";
106
+ }
107
+ }
108
+ // Check date
109
+ for (const pattern of DATE_PATTERNS) {
110
+ if (pattern.regex.test(trimmed)) {
111
+ return "date";
112
+ }
113
+ }
114
+ return "string";
115
+ }
116
+ /**
117
+ * Detect date format from value
118
+ */
119
+ function detectDateFormat(value) {
120
+ const trimmed = value.trim();
121
+ for (const pattern of DATETIME_PATTERNS) {
122
+ if (pattern.regex.test(trimmed)) {
123
+ return pattern.format;
124
+ }
125
+ }
126
+ for (const pattern of DATE_PATTERNS) {
127
+ if (pattern.regex.test(trimmed)) {
128
+ return pattern.format;
129
+ }
130
+ }
131
+ return undefined;
132
+ }
133
+ /**
134
+ * Determine the predominant type for a column based on sampled values
135
+ */
136
+ function determineColumnType(types) {
137
+ const nonEmpty = types.filter((t) => t !== "empty");
138
+ if (nonEmpty.length === 0) {
139
+ return { type: "empty", confidence: 100 };
140
+ }
141
+ // Count occurrences of each type
142
+ const typeCounts = new Map();
143
+ for (const t of nonEmpty) {
144
+ typeCounts.set(t, (typeCounts.get(t) || 0) + 1);
145
+ }
146
+ // Find the most common type
147
+ let maxType = "string";
148
+ let maxCount = 0;
149
+ for (const [type, count] of typeCounts) {
150
+ if (count > maxCount) {
151
+ maxCount = count;
152
+ maxType = type;
153
+ }
154
+ }
155
+ // Calculate confidence
156
+ const confidence = Math.round((maxCount / nonEmpty.length) * 100);
157
+ // Consolidate integer and float into number if the column contains only numeric types
158
+ // This check must happen before the mixed-type check to avoid classifying numeric-only columns as mixed
159
+ if (typeCounts.has("integer") && typeCounts.has("float")) {
160
+ // Check if these are the only two types (purely numeric column)
161
+ if (typeCounts.size === 2) {
162
+ const totalNumeric = (typeCounts.get("integer") || 0) + (typeCounts.get("float") || 0);
163
+ const numericConfidence = Math.round((totalNumeric / nonEmpty.length) * 100);
164
+ return { type: "number", confidence: numericConfidence };
165
+ }
166
+ }
167
+ // If confidence is low and multiple types exist, mark as mixed
168
+ if (confidence < 70 && typeCounts.size > 1) {
169
+ return { type: "mixed", confidence };
170
+ }
171
+ return { type: maxType, confidence };
172
+ }
173
+ /**
174
+ * Analyze a single column and return rich metadata
175
+ */
176
+ function analyzeColumn(columnName, columnIndex, values) {
177
+ const types = [];
178
+ const uniqueValues = new Set();
179
+ const numericValues = [];
180
+ let nullCount = 0;
181
+ let dateFormat;
182
+ for (const value of values) {
183
+ const trimmed = value?.trim() ?? "";
184
+ if (trimmed === "") {
185
+ nullCount++;
186
+ types.push("empty");
187
+ continue;
188
+ }
189
+ uniqueValues.add(trimmed);
190
+ const type = detectValueType(trimmed);
191
+ types.push(type);
192
+ // Collect numeric values for statistics
193
+ if (type === "integer" || type === "float") {
194
+ const num = parseFloat(trimmed);
195
+ if (!isNaN(num)) {
196
+ numericValues.push(num);
197
+ }
198
+ }
199
+ // Detect date format
200
+ if ((type === "date" || type === "datetime") && !dateFormat) {
201
+ dateFormat = detectDateFormat(trimmed);
202
+ }
203
+ }
204
+ const { type: detectedType, confidence } = determineColumnType(types);
205
+ // Get sample values (up to 5 unique non-empty)
206
+ const sampleValues = Array.from(uniqueValues).slice(0, 5);
207
+ // Calculate numeric statistics
208
+ let minValue;
209
+ let maxValue;
210
+ let avgValue;
211
+ if (numericValues.length > 0) {
212
+ minValue = Math.min(...numericValues);
213
+ maxValue = Math.max(...numericValues);
214
+ avgValue =
215
+ Math.round((numericValues.reduce((a, b) => a + b, 0) / numericValues.length) * 100) / 100;
216
+ }
217
+ // Validate column name
218
+ const nameIssues = validateColumnName(columnName);
219
+ const metadata = {
220
+ name: columnName,
221
+ index: columnIndex,
222
+ detectedType,
223
+ typeConfidence: confidence,
224
+ nullCount,
225
+ uniqueCount: uniqueValues.size,
226
+ sampleValues,
227
+ };
228
+ if (minValue !== undefined) {
229
+ metadata.minValue = minValue;
230
+ }
231
+ if (maxValue !== undefined) {
232
+ metadata.maxValue = maxValue;
233
+ }
234
+ if (avgValue !== undefined) {
235
+ metadata.avgValue = avgValue;
236
+ }
237
+ if (dateFormat) {
238
+ metadata.dateFormat = dateFormat;
239
+ }
240
+ if (nameIssues.length > 0) {
241
+ metadata.nameIssues = nameIssues;
242
+ }
243
+ return metadata;
244
+ }
245
+ /**
246
+ * Generate data quality warnings based on column analysis
247
+ */
248
+ function generateDataQualityWarnings(columns, totalRows) {
249
+ const warnings = [];
250
+ for (const col of columns) {
251
+ // Check for high null rate (>20%)
252
+ const nullRate = totalRows > 0 ? col.nullCount / totalRows : 0;
253
+ if (nullRate > 0.2) {
254
+ warnings.push({
255
+ column: col.name,
256
+ type: "high_null_rate",
257
+ message: `Column has ${Math.round(nullRate * 100)}% empty/null values (${col.nullCount} of ${totalRows} rows)`,
258
+ severity: nullRate > 0.5 ? "warning" : "info",
259
+ affectedRows: col.nullCount,
260
+ });
261
+ }
262
+ // Check for invalid column names
263
+ if (col.nameIssues && col.nameIssues.length > 0) {
264
+ warnings.push({
265
+ column: col.name,
266
+ type: "invalid_name",
267
+ message: `Column name issues: ${col.nameIssues.join(", ")}`,
268
+ severity: col.name.trim() === "" ? "error" : "warning",
269
+ });
270
+ }
271
+ // Check for mixed types (low confidence)
272
+ if (col.detectedType === "mixed" || col.typeConfidence < 70) {
273
+ warnings.push({
274
+ column: col.name,
275
+ type: "mixed_types",
276
+ message: `Column has inconsistent data types (${col.typeConfidence}% confidence for ${col.detectedType})`,
277
+ severity: "warning",
278
+ });
279
+ }
280
+ // Check for potential duplicates (very low unique count)
281
+ if (totalRows > 10 && col.uniqueCount === 1 && col.nullCount === 0) {
282
+ warnings.push({
283
+ column: col.name,
284
+ type: "duplicates",
285
+ message: `All ${totalRows} rows have the same value`,
286
+ severity: "info",
287
+ affectedRows: totalRows,
288
+ });
289
+ }
290
+ // Check for all empty column
291
+ if (col.detectedType === "empty") {
292
+ warnings.push({
293
+ column: col.name,
294
+ type: "empty_values",
295
+ message: "Column is entirely empty",
296
+ severity: "warning",
297
+ affectedRows: totalRows,
298
+ });
299
+ }
300
+ }
301
+ return warnings;
302
+ }
303
+ /**
304
+ * Calculate overall data quality score
305
+ */
306
+ function calculateDataQualityScore(columns, warnings, totalRows) {
307
+ if (columns.length === 0 || totalRows === 0) {
308
+ return 0;
309
+ }
310
+ let score = 100;
311
+ // Deduct for warnings
312
+ for (const warning of warnings) {
313
+ switch (warning.severity) {
314
+ case "error":
315
+ score -= 15;
316
+ break;
317
+ case "warning":
318
+ score -= 8;
319
+ break;
320
+ case "info":
321
+ score -= 3;
322
+ break;
323
+ }
324
+ }
325
+ // Deduct for overall null rate
326
+ const totalNulls = columns.reduce((sum, col) => sum + col.nullCount, 0);
327
+ const totalCells = columns.length * totalRows;
328
+ const overallNullRate = totalCells > 0 ? totalNulls / totalCells : 0;
329
+ score -= Math.round(overallNullRate * 30);
330
+ // Deduct for low type confidence
331
+ const avgConfidence = columns.reduce((sum, col) => sum + col.typeConfidence, 0) / columns.length;
332
+ if (avgConfidence < 80) {
333
+ score -= Math.round((80 - avgConfidence) / 2);
334
+ }
335
+ return Math.max(0, Math.min(100, score));
336
+ }
337
+ /**
338
+ * Analyze all columns in parsed CSV data
339
+ */
340
+ function analyzeColumns(rows) {
341
+ if (rows.length === 0) {
342
+ return {
343
+ columnMetadata: [],
344
+ dataQualityWarnings: [],
345
+ dataQualityScore: 0,
346
+ };
347
+ }
348
+ const columnNames = Object.keys(rows[0]);
349
+ const columnMetadata = [];
350
+ for (let i = 0; i < columnNames.length; i++) {
351
+ const colName = columnNames[i];
352
+ const values = rows.map((row) => String(row[colName] ?? ""));
353
+ columnMetadata.push(analyzeColumn(colName, i, values));
354
+ }
355
+ const dataQualityWarnings = generateDataQualityWarnings(columnMetadata, rows.length);
356
+ const dataQualityScore = calculateDataQualityScore(columnMetadata, dataQualityWarnings, rows.length);
357
+ return {
358
+ columnMetadata,
359
+ dataQualityWarnings,
360
+ dataQualityScore,
361
+ };
362
+ }
363
+ /**
364
+ * Detect if the first row appears to be a header row
365
+ *
366
+ * Heuristics used:
367
+ * 1. Header values should be text/string type (not numbers, dates, emails, etc.)
368
+ * 2. Header values should be unique (no duplicate column names)
369
+ * 3. If data rows exist, headers should have different type profile than data
370
+ *
371
+ * @param headerValues - The values from the first row (potential headers)
372
+ * @param dataRows - Sample of data rows for comparison (optional)
373
+ * @returns true if the first row appears to be headers
374
+ */
375
+ function detectHasHeaders(headerValues, dataRows) {
376
+ if (headerValues.length === 0) {
377
+ return false;
378
+ }
379
+ // Check 1: All header values should look like text labels, not data values
380
+ let textLikeCount = 0;
381
+ for (const value of headerValues) {
382
+ const trimmed = value?.trim() ?? "";
383
+ if (trimmed === "") {
384
+ continue; // Empty headers are allowed but don't count toward text-like
385
+ }
386
+ const type = detectValueType(trimmed);
387
+ // Headers are typically strings - not numbers, dates, emails, URLs, or booleans
388
+ if (type === "string") {
389
+ textLikeCount++;
390
+ }
391
+ }
392
+ // If most header values are text-like (not numeric/date/etc.), likely headers
393
+ const nonEmptyHeaders = headerValues.filter((v) => v?.trim()).length;
394
+ if (nonEmptyHeaders === 0) {
395
+ return false;
396
+ }
397
+ const textRatio = textLikeCount / nonEmptyHeaders;
398
+ // Check 2: Headers should be unique
399
+ const uniqueHeaders = new Set(headerValues.map((v) => v?.trim().toLowerCase()));
400
+ const hasUniqueHeaders = uniqueHeaders.size === headerValues.length;
401
+ // Check 3: Compare with data rows if available
402
+ if (dataRows && dataRows.length > 0) {
403
+ // If first data row has different type profile than headers, likely has headers
404
+ const firstDataRow = Object.values(dataRows[0] || {}).map((v) => String(v ?? ""));
405
+ let dataTextCount = 0;
406
+ for (const value of firstDataRow) {
407
+ const type = detectValueType(value?.trim() ?? "");
408
+ if (type === "string") {
409
+ dataTextCount++;
410
+ }
411
+ }
412
+ const dataTextRatio = firstDataRow.length > 0 ? dataTextCount / firstDataRow.length : 0;
413
+ // If headers are mostly text but data has more varied types, likely has headers
414
+ if (textRatio > 0.7 && dataTextRatio < textRatio - 0.2) {
415
+ return true;
416
+ }
417
+ }
418
+ // Default: if >70% of header values are text-like and unique, assume headers
419
+ return textRatio >= 0.7 && hasUniqueHeaders;
420
+ }
9
421
  /**
10
422
  * Detect if first line is CSV metadata (not actual data/headers)
11
423
  * Common patterns:
@@ -106,6 +518,16 @@ export class CSVProcessor {
106
518
  columnCount: (limitedLines[0] || "").split(",").length,
107
519
  truncated: wasTruncated,
108
520
  });
521
+ // Parse a sample for enhanced metadata analysis (raw format still benefits from column analysis)
522
+ const sampleForAnalysis = await this.parseCSVString(limitedCSV, Math.min(rowCount, 500));
523
+ const { columnMetadata, dataQualityWarnings, dataQualityScore } = analyzeColumns(sampleForAnalysis);
524
+ // Log data quality summary
525
+ if (dataQualityWarnings.length > 0) {
526
+ logger.debug("[CSVProcessor] Data quality warnings detected", {
527
+ warningCount: dataQualityWarnings.length,
528
+ score: dataQualityScore,
529
+ });
530
+ }
109
531
  return {
110
532
  type: "csv",
111
533
  content: limitedCSV,
@@ -117,6 +539,11 @@ export class CSVProcessor {
117
539
  totalLines: limitedLines.length,
118
540
  columnCount: (limitedLines[0] || "").split(",").length,
119
541
  extension,
542
+ columnMetadata,
543
+ dataQualityWarnings,
544
+ dataQualityScore,
545
+ hasHeaders: detectHasHeaders((limitedLines[0] || "").split(","), undefined),
546
+ detectedDelimiter: ",",
120
547
  },
121
548
  };
122
549
  }
@@ -155,6 +582,15 @@ export class CSVProcessor {
155
582
  if (rowCount === 0) {
156
583
  logger.warn("[CSVProcessor] CSV file contains no data rows");
157
584
  }
585
+ // Perform enhanced column analysis
586
+ const { columnMetadata, dataQualityWarnings, dataQualityScore } = analyzeColumns(nonEmptyRows);
587
+ // Log data quality summary
588
+ if (dataQualityWarnings.length > 0) {
589
+ logger.debug("[CSVProcessor] Data quality warnings detected", {
590
+ warningCount: dataQualityWarnings.length,
591
+ score: dataQualityScore,
592
+ });
593
+ }
158
594
  // Format parsed data
159
595
  logger.debug(`[CSVProcessor] Converting ${rowCount} rows to ${formatStyle} format`);
160
596
  const formatted = this.formatForLLM(nonEmptyRows, formatStyle, includeHeaders);
@@ -164,6 +600,7 @@ export class CSVProcessor {
164
600
  columnCount,
165
601
  outputLength: formatted.length,
166
602
  hasEmptyColumns,
603
+ dataQualityScore,
167
604
  });
168
605
  return {
169
606
  type: "csv",
@@ -178,6 +615,11 @@ export class CSVProcessor {
178
615
  sampleData,
179
616
  hasEmptyColumns,
180
617
  extension,
618
+ columnMetadata,
619
+ dataQualityWarnings,
620
+ dataQualityScore,
621
+ hasHeaders: detectHasHeaders(columnNames, nonEmptyRows),
622
+ detectedDelimiter: ",",
181
623
  },
182
624
  };
183
625
  }
@@ -3,7 +3,7 @@
3
3
  * Centralized file detection for all multimodal file types
4
4
  * Uses multi-strategy approach for reliable type identification
5
5
  */
6
- import type { FileInput, FileProcessingResult, FileDetectorOptions } from "../types/fileTypes.js";
6
+ import type { FileDetectorOptions, FileInput, FileProcessingResult } from "../types/fileTypes.js";
7
7
  /**
8
8
  * Centralized file type detection and processing
9
9
  *
@@ -69,6 +69,12 @@ export declare class FileDetector {
69
69
  * Route to appropriate processor
70
70
  */
71
71
  private static processFile;
72
+ /**
73
+ * Process SVG file as text content
74
+ * Uses SvgProcessor for security sanitization (removes XSS vectors)
75
+ * Returns sanitized SVG markup as text for AI analysis
76
+ */
77
+ private static processSvgAsText;
72
78
  /**
73
79
  * Load file from URL with automatic retry on transient network errors
74
80
  */