@juspay/neurolink 8.19.0 → 8.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,29 @@ import { logger } from "./logger.js";
9
9
  import { CSVProcessor } from "./csvProcessor.js";
10
10
  import { ImageProcessor } from "./imageProcessor.js";
11
11
  import { PDFProcessor } from "./pdfProcessor.js";
12
+ /**
13
+ * Check if text has JSON markers (starts with { or [ and ends with corresponding closing bracket)
14
+ */
15
+ function hasJsonMarkers(text) {
16
+ const trimmed = text.trim();
17
+ if (!trimmed) {
18
+ return false;
19
+ }
20
+ const firstChar = trimmed[0];
21
+ const lastChar = trimmed[trimmed.length - 1];
22
+ const hasMatchingBrackets = (firstChar === "{" && lastChar === "}") ||
23
+ (firstChar === "[" && lastChar === "]");
24
+ if (!hasMatchingBrackets) {
25
+ return false;
26
+ }
27
+ try {
28
+ JSON.parse(trimmed);
29
+ return true;
30
+ }
31
+ catch {
32
+ return false;
33
+ }
34
+ }
12
35
  /**
13
36
  * Format file size in human-readable units
14
37
  */
@@ -54,15 +77,199 @@ export class FileDetector {
54
77
  */
55
78
  static async detectAndProcess(input, options) {
56
79
  const detection = await this.detect(input, options);
80
+ // FD-018: Comprehensive fallback parsing for extension-less files
81
+ // When file detection returns "unknown" or doesn't match allowedTypes,
82
+ // attempt parsing for each allowed type before failing. This handles cases like Slack
83
+ // files named "file-1", "file-2" without extensions that could be CSV, JSON, or text.
57
84
  if (options?.allowedTypes &&
58
85
  !options.allowedTypes.includes(detection.type)) {
59
- throw new Error(`File type ${detection.type} not allowed. Allowed: ${options.allowedTypes.join(", ")}`);
86
+ // Try fallback parsing for both "unknown" types and when detection doesn't match allowed types
87
+ const content = await this.loadContent(input, detection, options);
88
+ const errors = [];
89
+ // Try each allowed type in order of specificity
90
+ for (const allowedType of options.allowedTypes) {
91
+ try {
92
+ const result = await this.tryFallbackParsing(content, allowedType, options);
93
+ if (result) {
94
+ logger.info(`[FileDetector] ✅ ${allowedType.toUpperCase()} fallback successful`);
95
+ return result;
96
+ }
97
+ }
98
+ catch (error) {
99
+ const errorMsg = error instanceof Error ? error.message : String(error);
100
+ errors.push(`${allowedType}: ${errorMsg}`);
101
+ logger.debug(`[FileDetector] ${allowedType} fallback failed: ${errorMsg}`);
102
+ }
103
+ }
104
+ // All fallbacks failed
105
+ throw new Error(`File type detection failed and all fallback parsing attempts failed. Original detection: ${detection.type}. Attempted types: ${options.allowedTypes.join(", ")}. Errors: ${errors.join("; ")}`);
60
106
  }
61
107
  const content = await this.loadContent(input, detection, options);
62
108
  // Extract CSV-specific options from FileDetectorOptions
63
109
  const csvOptions = options?.csvOptions;
64
110
  return await this.processFile(content, detection, csvOptions, options?.provider);
65
111
  }
112
+ /**
113
+ * Try fallback parsing for a specific file type
114
+ * Used when file detection returns "unknown" but we want to try parsing anyway
115
+ */
116
+ static async tryFallbackParsing(content, fileType, options) {
117
+ logger.info(`[FileDetector] Attempting ${fileType.toUpperCase()} fallback parsing`);
118
+ switch (fileType) {
119
+ case "csv": {
120
+ // Try CSV parsing
121
+ const csvOptions = options?.csvOptions;
122
+ const result = await CSVProcessor.process(content, csvOptions);
123
+ logger.info(`[FileDetector] CSV fallback: ${result.metadata?.rowCount || 0} rows, ${result.metadata?.columnCount || 0} columns`);
124
+ return result;
125
+ }
126
+ case "text": {
127
+ // Try text parsing - check if content is valid UTF-8 text
128
+ const textContent = content.toString("utf-8");
129
+ // Validate it's actually text (no null bytes, mostly printable)
130
+ if (this.isValidText(textContent)) {
131
+ return {
132
+ type: "text",
133
+ content: textContent,
134
+ mimeType: this.guessTextMimeType(textContent),
135
+ metadata: {
136
+ confidence: 70,
137
+ size: content.length,
138
+ },
139
+ };
140
+ }
141
+ throw new Error("Content does not appear to be valid text");
142
+ }
143
+ case "image": {
144
+ // Image requires magic bytes - can't fallback without detection
145
+ throw new Error("Image type requires binary detection, cannot fallback parse");
146
+ }
147
+ case "pdf": {
148
+ // PDF requires magic bytes - can't fallback without detection
149
+ throw new Error("PDF type requires binary detection, cannot fallback parse");
150
+ }
151
+ case "audio": {
152
+ // Audio requires magic bytes - can't fallback without detection
153
+ throw new Error("Audio type requires binary detection, cannot fallback parse");
154
+ }
155
+ default:
156
+ return null;
157
+ }
158
+ }
159
+ /**
160
+ * Check if content is valid text (UTF-8, mostly printable)
161
+ */
162
+ static isValidText(content) {
163
+ // Check for null bytes which indicate binary content
164
+ if (content.includes("\0")) {
165
+ return false;
166
+ }
167
+ // Check if content has reasonable amount of printable characters
168
+ let printableCount = 0;
169
+ for (let i = 0; i < content.length; i++) {
170
+ const code = content.charCodeAt(i);
171
+ if ((code >= 32 && code < 127) || // ASCII printable
172
+ code === 9 || // Tab
173
+ code === 10 || // Newline
174
+ code === 13 || // Carriage return
175
+ code > 127 // Unicode (non-ASCII)
176
+ ) {
177
+ printableCount++;
178
+ }
179
+ }
180
+ // At least 90% should be printable
181
+ return printableCount / content.length >= 0.9;
182
+ }
183
+ /**
184
+ * Guess the MIME type for text content based on content patterns
185
+ */
186
+ static guessTextMimeType(content) {
187
+ const trimmed = content.trim();
188
+ // Check for JSON
189
+ if ((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
190
+ (trimmed.startsWith("[") && trimmed.endsWith("]"))) {
191
+ try {
192
+ JSON.parse(trimmed);
193
+ return "application/json";
194
+ }
195
+ catch {
196
+ // Not valid JSON, continue checking
197
+ }
198
+ }
199
+ // Check for XML/HTML using stricter detection
200
+ if (this.looksLikeXMLStrict(trimmed)) {
201
+ const isHTML = trimmed.includes("<!DOCTYPE html") ||
202
+ trimmed.toLowerCase().includes("<html") ||
203
+ trimmed.includes("<head") ||
204
+ trimmed.includes("<body");
205
+ return isHTML ? "text/html" : "application/xml";
206
+ }
207
+ // Check for YAML using robust multi-indicator detection
208
+ if (this.looksLikeYAMLStrict(trimmed)) {
209
+ return "application/yaml";
210
+ }
211
+ // Default to plain text
212
+ return "text/plain";
213
+ }
214
+ /**
215
+ * Strict YAML detection for guessTextMimeType
216
+ * Similar to ContentHeuristicStrategy but requires at least 2 indicators
217
+ * to avoid false positives from simple key: value patterns
218
+ */
219
+ static looksLikeYAMLStrict(text) {
220
+ if (text.length === 0) {
221
+ return false;
222
+ }
223
+ const lines = text.split("\n");
224
+ // For single-line content, only --- or ... qualify as YAML
225
+ if (lines.length === 1) {
226
+ return text === "---" || text === "...";
227
+ }
228
+ // Collect YAML indicators (requires at least 2 for positive detection)
229
+ const indicators = [];
230
+ // Indicator 1: Document start marker (---)
231
+ indicators.push(text.startsWith("---"));
232
+ // Indicator 2: Document end marker (...)
233
+ indicators.push(/^\.\.\.$|[\n]\.\.\.$/.test(text));
234
+ // Indicator 3: YAML list items (- followed by space)
235
+ indicators.push(/^[\s]*-\s+[^-]/m.test(text));
236
+ // Indicator 4: Multiple key-value pairs (at least 2)
237
+ const keyValuePattern = /^[\s]*[a-zA-Z_][a-zA-Z0-9_-]*:\s*(.+)$/;
238
+ const keyValueMatches = lines.filter((line) => keyValuePattern.test(line)).length;
239
+ indicators.push(keyValueMatches >= 2);
240
+ // Require at least 2 indicators for confident YAML detection
241
+ const matchCount = indicators.filter(Boolean).length;
242
+ return matchCount >= 2;
243
+ }
244
+ /**
245
+ * Strict XML detection for guessTextMimeType
246
+ * Ensures content has proper XML declaration or valid tag structure with closing tags
247
+ * Prevents false positives from arbitrary content starting with <
248
+ */
249
+ static looksLikeXMLStrict(content) {
250
+ // XML declaration is a definitive marker
251
+ if (content.startsWith("<?xml")) {
252
+ return true;
253
+ }
254
+ // Must start with < for XML/HTML
255
+ if (!content.startsWith("<")) {
256
+ return false;
257
+ }
258
+ // Check for HTML DOCTYPE declaration
259
+ if (content.includes("<!DOCTYPE html")) {
260
+ return true;
261
+ }
262
+ // Must have valid opening tag structure: <tagname
263
+ // Not just any < character like "< something"
264
+ const hasValidOpeningTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?>/;
265
+ if (!hasValidOpeningTag.test(content)) {
266
+ return false;
267
+ }
268
+ // Must have at least one closing tag or self-closing tag to be valid XML/HTML
269
+ const hasClosingTag = /<\/[a-zA-Z][a-zA-Z0-9-]*>/.test(content);
270
+ const hasSelfClosingTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?\s*\/\s*>/.test(content);
271
+ return hasClosingTag || hasSelfClosingTag;
272
+ }
66
273
  /**
67
274
  * Detect file type using multi-strategy approach
68
275
  * Stops at first strategy with confidence >= threshold (default: 80%)
@@ -136,7 +343,7 @@ export class FileDetector {
136
343
  return {
137
344
  type: "text",
138
345
  content: content.toString("utf-8"),
139
- mimeType: "text/plain",
346
+ mimeType: detection.mimeType || "text/plain",
140
347
  metadata: detection.metadata,
141
348
  };
142
349
  default:
@@ -354,6 +561,16 @@ class ExtensionStrategy {
354
561
  pdf: "pdf",
355
562
  txt: "text",
356
563
  md: "text",
564
+ json: "text",
565
+ xml: "text",
566
+ yaml: "text",
567
+ yml: "text",
568
+ html: "text",
569
+ htm: "text",
570
+ log: "text",
571
+ conf: "text",
572
+ cfg: "text",
573
+ ini: "text",
357
574
  };
358
575
  const type = typeMap[ext.toLowerCase()];
359
576
  return {
@@ -402,6 +619,16 @@ class ExtensionStrategy {
402
619
  pdf: "application/pdf",
403
620
  txt: "text/plain",
404
621
  md: "text/markdown",
622
+ json: "application/json",
623
+ xml: "application/xml",
624
+ yaml: "application/yaml",
625
+ yml: "application/yaml",
626
+ html: "text/html",
627
+ htm: "text/html",
628
+ log: "text/plain",
629
+ conf: "text/plain",
630
+ cfg: "text/plain",
631
+ ini: "text/plain",
405
632
  };
406
633
  return mimeMap[ext.toLowerCase()] || "application/octet-stream";
407
634
  }
@@ -421,27 +648,223 @@ class ExtensionStrategy {
421
648
  */
422
649
  class ContentHeuristicStrategy {
423
650
  async detect(input) {
424
- if (!Buffer.isBuffer(input)) {
651
+ let buffer;
652
+ if (Buffer.isBuffer(input)) {
653
+ buffer = input;
654
+ }
655
+ else if (typeof input === "string") {
656
+ // Try to load from file path or data URI
657
+ if (input.startsWith("data:")) {
658
+ // Data URI
659
+ const match = input.match(/^data:([^;]+);base64,(.+)$/);
660
+ if (!match) {
661
+ return this.unknown();
662
+ }
663
+ buffer = Buffer.from(match[2], "base64");
664
+ }
665
+ else if (input.startsWith("http://") || input.startsWith("https://")) {
666
+ // URL - can't analyze without making HTTP request in ContentHeuristic
667
+ return this.unknown();
668
+ }
669
+ else {
670
+ // File path - try to load it
671
+ try {
672
+ buffer = await readFile(input);
673
+ }
674
+ catch {
675
+ return this.unknown();
676
+ }
677
+ }
678
+ }
679
+ else {
425
680
  return this.unknown();
426
681
  }
427
- const sample = input.toString("utf-8", 0, Math.min(1000, input.length));
682
+ const sample = buffer.toString("utf-8", 0, Math.min(2000, buffer.length));
683
+ // Check for JSON first (more specific than CSV)
684
+ if (this.looksLikeJSON(sample)) {
685
+ return this.result("text", "application/json", 75);
686
+ }
687
+ // Check CSV after JSON (CSV is more generic)
428
688
  if (this.looksLikeCSV(sample)) {
429
689
  return this.result("csv", "text/csv", 75);
430
690
  }
691
+ // Check for XML/HTML
692
+ if (this.looksLikeXML(sample)) {
693
+ const isHTML = sample.includes("<!DOCTYPE html") || sample.includes("<html");
694
+ return this.result("text", isHTML ? "text/html" : "application/xml", 70);
695
+ }
696
+ // Check for YAML
697
+ if (this.looksLikeYAML(sample)) {
698
+ return this.result("text", "application/yaml", 70);
699
+ }
700
+ // Check for plain text (if mostly printable characters)
701
+ if (this.looksLikeText(sample)) {
702
+ return this.result("text", "text/plain", 60);
703
+ }
431
704
  return this.unknown();
432
705
  }
433
706
  looksLikeCSV(text) {
434
- const lines = text.split("\n").slice(0, 5);
707
+ const lines = text.trim().split("\n");
435
708
  if (lines.length < 2) {
436
709
  return false;
437
710
  }
438
- const hasCommas = lines.every((line) => line.includes(","));
439
- if (!hasCommas) {
711
+ // Detect delimiter from first line
712
+ const firstLine = lines[0];
713
+ const delimiters = [",", ";", "\t", "|"];
714
+ const delimiter = delimiters.find((d) => firstLine.includes(d));
715
+ // Single-column CSV check (no delimiter)
716
+ if (!delimiter) {
717
+ // Exclude content that looks like other structured formats
718
+ // YAML indicators
719
+ if (text.startsWith("---") ||
720
+ /^[\s]*-\s+/m.test(text) ||
721
+ /^[\s]*[a-zA-Z_][a-zA-Z0-9_-]*:\s*/m.test(text)) {
722
+ return false;
723
+ }
724
+ // XML/HTML indicators
725
+ if (text.startsWith("<") || text.includes("<?xml")) {
726
+ return false;
727
+ }
728
+ // JSON indicators
729
+ if ((text.startsWith("{") && text.includes("}")) ||
730
+ (text.startsWith("[") && text.includes("]"))) {
731
+ return false;
732
+ }
733
+ // Exclude prose/sentences (look for sentence patterns)
734
+ // Check for multiple words per line (prose indicator)
735
+ const hasProsePattern = lines.some((line) => {
736
+ const words = line.trim().split(/\s+/);
737
+ return words.length > 4; // More than 4 words suggests prose, not data
738
+ });
739
+ if (hasProsePattern) {
740
+ return false;
741
+ }
742
+ // Check for consistent line structure (not binary, reasonable lengths)
743
+ const hasReasonableLengths = lines.every((l) => l.length > 0 && l.length < 1000);
744
+ const noBinaryChars = !text.includes("\0");
745
+ // Single-column CSVs should have VERY uniform line lengths
746
+ // (data values like IDs, codes, numbers - not varied content)
747
+ const lengths = lines.map((l) => l.length);
748
+ const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
749
+ const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) /
750
+ lengths.length;
751
+ const stdDev = Math.sqrt(variance);
752
+ // Single-column CSVs can contain varied data (names, cities, emails, etc.)
753
+ // but should still show some consistency compared to random text
754
+ const hasUniformLengths = stdDev / avgLength < 0.75;
755
+ return hasReasonableLengths && noBinaryChars && hasUniformLengths;
756
+ }
757
+ // Count delimiters per line and check consistency
758
+ const delimRegex = delimiter === "|" ? /\|/g : new RegExp(delimiter, "g");
759
+ const counts = lines.map((line) => (line.match(delimRegex) || []).length);
760
+ const firstCount = counts[0];
761
+ const consistentLines = counts.filter((c) => c === firstCount).length;
762
+ return consistentLines / lines.length >= 0.8;
763
+ }
764
+ looksLikeJSON(text) {
765
+ // hasJsonMarkers now does full validation including JSON.parse
766
+ return hasJsonMarkers(text);
767
+ }
768
+ looksLikeXML(text) {
769
+ const trimmed = text.trim();
770
+ // XML declaration is a definitive marker
771
+ if (trimmed.startsWith("<?xml")) {
772
+ return true;
773
+ }
774
+ // Check for HTML DOCTYPE or tags
775
+ if (trimmed.includes("<!DOCTYPE html") ||
776
+ trimmed.toLowerCase().includes("<html")) {
777
+ return true;
778
+ }
779
+ // Strict validation for arbitrary content starting with <:
780
+ // Must have proper tag structure with at least one closing tag
781
+ if (!trimmed.startsWith("<")) {
782
+ return false;
783
+ }
784
+ // Must have valid opening tag structure: <tagname followed by space or >
785
+ // Not just any < character
786
+ const hasValidOpeningTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?>/;
787
+ if (!hasValidOpeningTag.test(trimmed)) {
440
788
  return false;
441
789
  }
442
- const columnCounts = lines.map((line) => line.split(",").length);
443
- const uniqueCounts = new Set(columnCounts);
444
- return uniqueCounts.size === 1 && columnCounts[0] >= 2;
790
+ // Must have at least one closing tag or self-closing tag to be valid XML/HTML
791
+ const hasClosingTag = /<\/[a-zA-Z][a-zA-Z0-9-]*>/.test(trimmed);
792
+ const hasSelfClosingTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?\s*\/\s*>/.test(trimmed);
793
+ return hasClosingTag || hasSelfClosingTag;
794
+ }
795
+ looksLikeYAML(text) {
796
+ const trimmed = text.trim();
797
+ if (trimmed.length === 0) {
798
+ return false;
799
+ }
800
+ // For single-line content, be very conservative about YAML detection
801
+ const lines = trimmed.split("\n");
802
+ if (lines.length === 1) {
803
+ // Single line can only be YAML if it's a document marker
804
+ return trimmed === "---" || trimmed === "...";
805
+ }
806
+ // Collect YAML indicators (requires at least 2 for positive detection)
807
+ const indicators = [];
808
+ // Indicator 1: Document start marker (---)
809
+ indicators.push(trimmed.startsWith("---"));
810
+ // Indicator 2: Document end marker (...) or appears within content
811
+ indicators.push(/^\.\.\.$|[\n]\.\.\.$/.test(trimmed));
812
+ // Indicator 3: YAML list items (- followed by space at line start)
813
+ indicators.push(/^[\s]*-\s+[^-]/m.test(trimmed));
814
+ // Indicator 4: Multiple key-value pairs (at least 2)
815
+ // Allow hyphens and underscores in keys, support nested keys
816
+ const keyValuePattern = /^[\s]*[a-zA-Z_][a-zA-Z0-9_-]*:\s*(.+)$/;
817
+ const keyValueMatches = lines.filter((line) => keyValuePattern.test(line)).length;
818
+ indicators.push(keyValueMatches >= 2);
819
+ // Indicator 5: Nested indentation pattern (common in YAML objects/lists)
820
+ let hasNesting = false;
821
+ const sampleLines = lines.slice(0, 10);
822
+ for (let i = 0; i < sampleLines.length - 1; i++) {
823
+ const currentLine = sampleLines[i].trim();
824
+ const nextLine = sampleLines[i + 1];
825
+ if (currentLine.length > 0 &&
826
+ nextLine.length > 0 &&
827
+ /[:-]$/.test(currentLine)) {
828
+ const currentIndent = sampleLines[i].match(/^[\s]*/)?.[0].length ?? 0;
829
+ const nextIndent = nextLine.match(/^[\s]*/)?.[0].length ?? 0;
830
+ if (nextIndent > currentIndent) {
831
+ hasNesting = true;
832
+ break;
833
+ }
834
+ }
835
+ }
836
+ indicators.push(hasNesting);
837
+ // Indicator 6: YAML comments (# followed by space)
838
+ indicators.push(/^\s*#\s+/m.test(trimmed));
839
+ // Indicator 7: List continuation (multiple items with - )
840
+ const listItemCount = lines.filter((line) => /^[\s]*-[\s]/.test(line)).length;
841
+ indicators.push(listItemCount >= 2);
842
+ // Indicator 8: Inline maps or complex structures
843
+ indicators.push(/{\s*[a-zA-Z_]/.test(trimmed) || /\[.*\]/.test(trimmed));
844
+ // Require at least 2 indicators for confident YAML detection
845
+ const matchCount = indicators.filter(Boolean).length;
846
+ return matchCount >= 2;
847
+ }
848
+ looksLikeText(text) {
849
+ // Check if content has null bytes (binary indicator)
850
+ if (text.includes("\0")) {
851
+ return false;
852
+ }
853
+ // Count printable characters
854
+ let printable = 0;
855
+ for (let i = 0; i < text.length; i++) {
856
+ const code = text.charCodeAt(i);
857
+ if ((code >= 32 && code < 127) || // ASCII printable
858
+ code === 9 || // Tab
859
+ code === 10 || // Newline
860
+ code === 13 || // Carriage return
861
+ code > 127 // Unicode
862
+ ) {
863
+ printable++;
864
+ }
865
+ }
866
+ // At least 85% should be printable for text
867
+ return printable / text.length >= 0.85;
445
868
  }
446
869
  result(type, mime, confidence) {
447
870
  return {
@@ -463,8 +463,12 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
463
463
  provider: provider,
464
464
  });
465
465
  if (Buffer.isBuffer(result.content)) {
466
- pdfFiles.push({ buffer: result.content, filename });
467
- logger.info(`[PDF] ✅ Queued for multimodal: ${filename}`);
466
+ pdfFiles.push({
467
+ buffer: result.content,
468
+ filename,
469
+ pageCount: result.metadata?.estimatedPages ?? null,
470
+ });
471
+ logger.info(`[PDF] ✅ Queued for multimodal: ${filename} (${result.metadata?.estimatedPages ?? "unknown"} pages)`);
468
472
  }
469
473
  }
470
474
  catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@juspay/neurolink",
3
- "version": "8.19.0",
3
+ "version": "8.19.1",
4
4
  "description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
5
5
  "author": {
6
6
  "name": "Juspay Technologies",