@juspay/neurolink 8.19.0 → 8.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/adapters/providerImageAdapter.d.ts +12 -0
- package/dist/adapters/providerImageAdapter.js +30 -3
- package/dist/cli/loop/optionsSchema.js +4 -0
- package/dist/config/conversationMemory.d.ts +17 -1
- package/dist/config/conversationMemory.js +37 -10
- package/dist/core/baseProvider.js +23 -13
- package/dist/core/conversationMemoryFactory.js +0 -3
- package/dist/core/conversationMemoryInitializer.js +1 -9
- package/dist/core/conversationMemoryManager.d.ts +31 -8
- package/dist/core/conversationMemoryManager.js +174 -80
- package/dist/core/modules/GenerationHandler.d.ts +5 -0
- package/dist/core/modules/GenerationHandler.js +56 -9
- package/dist/core/redisConversationMemoryManager.d.ts +28 -13
- package/dist/core/redisConversationMemoryManager.js +211 -121
- package/dist/lib/adapters/providerImageAdapter.d.ts +12 -0
- package/dist/lib/adapters/providerImageAdapter.js +30 -3
- package/dist/lib/config/conversationMemory.d.ts +17 -1
- package/dist/lib/config/conversationMemory.js +37 -10
- package/dist/lib/core/baseProvider.js +23 -13
- package/dist/lib/core/conversationMemoryFactory.js +0 -3
- package/dist/lib/core/conversationMemoryInitializer.js +1 -9
- package/dist/lib/core/conversationMemoryManager.d.ts +31 -8
- package/dist/lib/core/conversationMemoryManager.js +174 -80
- package/dist/lib/core/modules/GenerationHandler.d.ts +5 -0
- package/dist/lib/core/modules/GenerationHandler.js +56 -9
- package/dist/lib/core/redisConversationMemoryManager.d.ts +28 -13
- package/dist/lib/core/redisConversationMemoryManager.js +211 -121
- package/dist/lib/mcp/servers/agent/directToolsServer.js +5 -0
- package/dist/lib/mcp/toolRegistry.js +5 -0
- package/dist/lib/neurolink.js +29 -22
- package/dist/lib/types/conversation.d.ts +58 -9
- package/dist/lib/types/generateTypes.d.ts +1 -0
- package/dist/lib/types/sdkTypes.d.ts +1 -1
- package/dist/lib/types/streamTypes.d.ts +1 -0
- package/dist/lib/utils/conversationMemory.d.ts +43 -1
- package/dist/lib/utils/conversationMemory.js +181 -5
- package/dist/lib/utils/conversationMemoryUtils.js +16 -1
- package/dist/lib/utils/fileDetector.d.ts +25 -0
- package/dist/lib/utils/fileDetector.js +433 -10
- package/dist/lib/utils/messageBuilder.js +6 -2
- package/dist/lib/utils/redis.js +0 -5
- package/dist/mcp/servers/agent/directToolsServer.js +5 -0
- package/dist/mcp/toolRegistry.js +5 -0
- package/dist/neurolink.js +29 -22
- package/dist/types/conversation.d.ts +58 -9
- package/dist/types/generateTypes.d.ts +1 -0
- package/dist/types/sdkTypes.d.ts +1 -1
- package/dist/types/streamTypes.d.ts +1 -0
- package/dist/utils/conversationMemory.d.ts +43 -1
- package/dist/utils/conversationMemory.js +181 -5
- package/dist/utils/conversationMemoryUtils.js +16 -1
- package/dist/utils/fileDetector.d.ts +25 -0
- package/dist/utils/fileDetector.js +433 -10
- package/dist/utils/messageBuilder.js +6 -2
- package/dist/utils/redis.js +0 -5
- package/package.json +1 -1
|
@@ -9,6 +9,29 @@ import { logger } from "./logger.js";
|
|
|
9
9
|
import { CSVProcessor } from "./csvProcessor.js";
|
|
10
10
|
import { ImageProcessor } from "./imageProcessor.js";
|
|
11
11
|
import { PDFProcessor } from "./pdfProcessor.js";
|
|
12
|
+
/**
|
|
13
|
+
* Check if text has JSON markers (starts with { or [ and ends with corresponding closing bracket)
|
|
14
|
+
*/
|
|
15
|
+
function hasJsonMarkers(text) {
|
|
16
|
+
const trimmed = text.trim();
|
|
17
|
+
if (!trimmed) {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
const firstChar = trimmed[0];
|
|
21
|
+
const lastChar = trimmed[trimmed.length - 1];
|
|
22
|
+
const hasMatchingBrackets = (firstChar === "{" && lastChar === "}") ||
|
|
23
|
+
(firstChar === "[" && lastChar === "]");
|
|
24
|
+
if (!hasMatchingBrackets) {
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
try {
|
|
28
|
+
JSON.parse(trimmed);
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
return false;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
12
35
|
/**
|
|
13
36
|
* Format file size in human-readable units
|
|
14
37
|
*/
|
|
@@ -54,15 +77,199 @@ export class FileDetector {
|
|
|
54
77
|
*/
|
|
55
78
|
static async detectAndProcess(input, options) {
|
|
56
79
|
const detection = await this.detect(input, options);
|
|
80
|
+
// FD-018: Comprehensive fallback parsing for extension-less files
|
|
81
|
+
// When file detection returns "unknown" or doesn't match allowedTypes,
|
|
82
|
+
// attempt parsing for each allowed type before failing. This handles cases like Slack
|
|
83
|
+
// files named "file-1", "file-2" without extensions that could be CSV, JSON, or text.
|
|
57
84
|
if (options?.allowedTypes &&
|
|
58
85
|
!options.allowedTypes.includes(detection.type)) {
|
|
59
|
-
|
|
86
|
+
// Try fallback parsing for both "unknown" types and when detection doesn't match allowed types
|
|
87
|
+
const content = await this.loadContent(input, detection, options);
|
|
88
|
+
const errors = [];
|
|
89
|
+
// Try each allowed type in order of specificity
|
|
90
|
+
for (const allowedType of options.allowedTypes) {
|
|
91
|
+
try {
|
|
92
|
+
const result = await this.tryFallbackParsing(content, allowedType, options);
|
|
93
|
+
if (result) {
|
|
94
|
+
logger.info(`[FileDetector] ✅ ${allowedType.toUpperCase()} fallback successful`);
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
const errorMsg = error instanceof Error ? error.message : String(error);
|
|
100
|
+
errors.push(`${allowedType}: ${errorMsg}`);
|
|
101
|
+
logger.debug(`[FileDetector] ${allowedType} fallback failed: ${errorMsg}`);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// All fallbacks failed
|
|
105
|
+
throw new Error(`File type detection failed and all fallback parsing attempts failed. Original detection: ${detection.type}. Attempted types: ${options.allowedTypes.join(", ")}. Errors: ${errors.join("; ")}`);
|
|
60
106
|
}
|
|
61
107
|
const content = await this.loadContent(input, detection, options);
|
|
62
108
|
// Extract CSV-specific options from FileDetectorOptions
|
|
63
109
|
const csvOptions = options?.csvOptions;
|
|
64
110
|
return await this.processFile(content, detection, csvOptions, options?.provider);
|
|
65
111
|
}
|
|
112
|
+
/**
|
|
113
|
+
* Try fallback parsing for a specific file type
|
|
114
|
+
* Used when file detection returns "unknown" but we want to try parsing anyway
|
|
115
|
+
*/
|
|
116
|
+
static async tryFallbackParsing(content, fileType, options) {
|
|
117
|
+
logger.info(`[FileDetector] Attempting ${fileType.toUpperCase()} fallback parsing`);
|
|
118
|
+
switch (fileType) {
|
|
119
|
+
case "csv": {
|
|
120
|
+
// Try CSV parsing
|
|
121
|
+
const csvOptions = options?.csvOptions;
|
|
122
|
+
const result = await CSVProcessor.process(content, csvOptions);
|
|
123
|
+
logger.info(`[FileDetector] CSV fallback: ${result.metadata?.rowCount || 0} rows, ${result.metadata?.columnCount || 0} columns`);
|
|
124
|
+
return result;
|
|
125
|
+
}
|
|
126
|
+
case "text": {
|
|
127
|
+
// Try text parsing - check if content is valid UTF-8 text
|
|
128
|
+
const textContent = content.toString("utf-8");
|
|
129
|
+
// Validate it's actually text (no null bytes, mostly printable)
|
|
130
|
+
if (this.isValidText(textContent)) {
|
|
131
|
+
return {
|
|
132
|
+
type: "text",
|
|
133
|
+
content: textContent,
|
|
134
|
+
mimeType: this.guessTextMimeType(textContent),
|
|
135
|
+
metadata: {
|
|
136
|
+
confidence: 70,
|
|
137
|
+
size: content.length,
|
|
138
|
+
},
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
throw new Error("Content does not appear to be valid text");
|
|
142
|
+
}
|
|
143
|
+
case "image": {
|
|
144
|
+
// Image requires magic bytes - can't fallback without detection
|
|
145
|
+
throw new Error("Image type requires binary detection, cannot fallback parse");
|
|
146
|
+
}
|
|
147
|
+
case "pdf": {
|
|
148
|
+
// PDF requires magic bytes - can't fallback without detection
|
|
149
|
+
throw new Error("PDF type requires binary detection, cannot fallback parse");
|
|
150
|
+
}
|
|
151
|
+
case "audio": {
|
|
152
|
+
// Audio requires magic bytes - can't fallback without detection
|
|
153
|
+
throw new Error("Audio type requires binary detection, cannot fallback parse");
|
|
154
|
+
}
|
|
155
|
+
default:
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Check if content is valid text (UTF-8, mostly printable)
|
|
161
|
+
*/
|
|
162
|
+
static isValidText(content) {
|
|
163
|
+
// Check for null bytes which indicate binary content
|
|
164
|
+
if (content.includes("\0")) {
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
// Check if content has reasonable amount of printable characters
|
|
168
|
+
let printableCount = 0;
|
|
169
|
+
for (let i = 0; i < content.length; i++) {
|
|
170
|
+
const code = content.charCodeAt(i);
|
|
171
|
+
if ((code >= 32 && code < 127) || // ASCII printable
|
|
172
|
+
code === 9 || // Tab
|
|
173
|
+
code === 10 || // Newline
|
|
174
|
+
code === 13 || // Carriage return
|
|
175
|
+
code > 127 // Unicode (non-ASCII)
|
|
176
|
+
) {
|
|
177
|
+
printableCount++;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
// At least 90% should be printable
|
|
181
|
+
return printableCount / content.length >= 0.9;
|
|
182
|
+
}
|
|
183
|
+
/**
|
|
184
|
+
* Guess the MIME type for text content based on content patterns
|
|
185
|
+
*/
|
|
186
|
+
static guessTextMimeType(content) {
|
|
187
|
+
const trimmed = content.trim();
|
|
188
|
+
// Check for JSON
|
|
189
|
+
if ((trimmed.startsWith("{") && trimmed.endsWith("}")) ||
|
|
190
|
+
(trimmed.startsWith("[") && trimmed.endsWith("]"))) {
|
|
191
|
+
try {
|
|
192
|
+
JSON.parse(trimmed);
|
|
193
|
+
return "application/json";
|
|
194
|
+
}
|
|
195
|
+
catch {
|
|
196
|
+
// Not valid JSON, continue checking
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// Check for XML/HTML using stricter detection
|
|
200
|
+
if (this.looksLikeXMLStrict(trimmed)) {
|
|
201
|
+
const isHTML = trimmed.includes("<!DOCTYPE html") ||
|
|
202
|
+
trimmed.toLowerCase().includes("<html") ||
|
|
203
|
+
trimmed.includes("<head") ||
|
|
204
|
+
trimmed.includes("<body");
|
|
205
|
+
return isHTML ? "text/html" : "application/xml";
|
|
206
|
+
}
|
|
207
|
+
// Check for YAML using robust multi-indicator detection
|
|
208
|
+
if (this.looksLikeYAMLStrict(trimmed)) {
|
|
209
|
+
return "application/yaml";
|
|
210
|
+
}
|
|
211
|
+
// Default to plain text
|
|
212
|
+
return "text/plain";
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Strict YAML detection for guessTextMimeType
|
|
216
|
+
* Similar to ContentHeuristicStrategy but requires at least 2 indicators
|
|
217
|
+
* to avoid false positives from simple key: value patterns
|
|
218
|
+
*/
|
|
219
|
+
static looksLikeYAMLStrict(text) {
|
|
220
|
+
if (text.length === 0) {
|
|
221
|
+
return false;
|
|
222
|
+
}
|
|
223
|
+
const lines = text.split("\n");
|
|
224
|
+
// For single-line content, only --- or ... qualify as YAML
|
|
225
|
+
if (lines.length === 1) {
|
|
226
|
+
return text === "---" || text === "...";
|
|
227
|
+
}
|
|
228
|
+
// Collect YAML indicators (requires at least 2 for positive detection)
|
|
229
|
+
const indicators = [];
|
|
230
|
+
// Indicator 1: Document start marker (---)
|
|
231
|
+
indicators.push(text.startsWith("---"));
|
|
232
|
+
// Indicator 2: Document end marker (...)
|
|
233
|
+
indicators.push(/^\.\.\.$|[\n]\.\.\.$/.test(text));
|
|
234
|
+
// Indicator 3: YAML list items (- followed by space)
|
|
235
|
+
indicators.push(/^[\s]*-\s+[^-]/m.test(text));
|
|
236
|
+
// Indicator 4: Multiple key-value pairs (at least 2)
|
|
237
|
+
const keyValuePattern = /^[\s]*[a-zA-Z_][a-zA-Z0-9_-]*:\s*(.+)$/;
|
|
238
|
+
const keyValueMatches = lines.filter((line) => keyValuePattern.test(line)).length;
|
|
239
|
+
indicators.push(keyValueMatches >= 2);
|
|
240
|
+
// Require at least 2 indicators for confident YAML detection
|
|
241
|
+
const matchCount = indicators.filter(Boolean).length;
|
|
242
|
+
return matchCount >= 2;
|
|
243
|
+
}
|
|
244
|
+
/**
|
|
245
|
+
* Strict XML detection for guessTextMimeType
|
|
246
|
+
* Ensures content has proper XML declaration or valid tag structure with closing tags
|
|
247
|
+
* Prevents false positives from arbitrary content starting with <
|
|
248
|
+
*/
|
|
249
|
+
static looksLikeXMLStrict(content) {
|
|
250
|
+
// XML declaration is a definitive marker
|
|
251
|
+
if (content.startsWith("<?xml")) {
|
|
252
|
+
return true;
|
|
253
|
+
}
|
|
254
|
+
// Must start with < for XML/HTML
|
|
255
|
+
if (!content.startsWith("<")) {
|
|
256
|
+
return false;
|
|
257
|
+
}
|
|
258
|
+
// Check for HTML DOCTYPE declaration
|
|
259
|
+
if (content.includes("<!DOCTYPE html")) {
|
|
260
|
+
return true;
|
|
261
|
+
}
|
|
262
|
+
// Must have valid opening tag structure: <tagname
|
|
263
|
+
// Not just any < character like "< something"
|
|
264
|
+
const hasValidOpeningTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?>/;
|
|
265
|
+
if (!hasValidOpeningTag.test(content)) {
|
|
266
|
+
return false;
|
|
267
|
+
}
|
|
268
|
+
// Must have at least one closing tag or self-closing tag to be valid XML/HTML
|
|
269
|
+
const hasClosingTag = /<\/[a-zA-Z][a-zA-Z0-9-]*>/.test(content);
|
|
270
|
+
const hasSelfClosingTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?\s*\/\s*>/.test(content);
|
|
271
|
+
return hasClosingTag || hasSelfClosingTag;
|
|
272
|
+
}
|
|
66
273
|
/**
|
|
67
274
|
* Detect file type using multi-strategy approach
|
|
68
275
|
* Stops at first strategy with confidence >= threshold (default: 80%)
|
|
@@ -136,7 +343,7 @@ export class FileDetector {
|
|
|
136
343
|
return {
|
|
137
344
|
type: "text",
|
|
138
345
|
content: content.toString("utf-8"),
|
|
139
|
-
mimeType: "text/plain",
|
|
346
|
+
mimeType: detection.mimeType || "text/plain",
|
|
140
347
|
metadata: detection.metadata,
|
|
141
348
|
};
|
|
142
349
|
default:
|
|
@@ -354,6 +561,16 @@ class ExtensionStrategy {
|
|
|
354
561
|
pdf: "pdf",
|
|
355
562
|
txt: "text",
|
|
356
563
|
md: "text",
|
|
564
|
+
json: "text",
|
|
565
|
+
xml: "text",
|
|
566
|
+
yaml: "text",
|
|
567
|
+
yml: "text",
|
|
568
|
+
html: "text",
|
|
569
|
+
htm: "text",
|
|
570
|
+
log: "text",
|
|
571
|
+
conf: "text",
|
|
572
|
+
cfg: "text",
|
|
573
|
+
ini: "text",
|
|
357
574
|
};
|
|
358
575
|
const type = typeMap[ext.toLowerCase()];
|
|
359
576
|
return {
|
|
@@ -402,6 +619,16 @@ class ExtensionStrategy {
|
|
|
402
619
|
pdf: "application/pdf",
|
|
403
620
|
txt: "text/plain",
|
|
404
621
|
md: "text/markdown",
|
|
622
|
+
json: "application/json",
|
|
623
|
+
xml: "application/xml",
|
|
624
|
+
yaml: "application/yaml",
|
|
625
|
+
yml: "application/yaml",
|
|
626
|
+
html: "text/html",
|
|
627
|
+
htm: "text/html",
|
|
628
|
+
log: "text/plain",
|
|
629
|
+
conf: "text/plain",
|
|
630
|
+
cfg: "text/plain",
|
|
631
|
+
ini: "text/plain",
|
|
405
632
|
};
|
|
406
633
|
return mimeMap[ext.toLowerCase()] || "application/octet-stream";
|
|
407
634
|
}
|
|
@@ -421,27 +648,223 @@ class ExtensionStrategy {
|
|
|
421
648
|
*/
|
|
422
649
|
class ContentHeuristicStrategy {
|
|
423
650
|
async detect(input) {
|
|
424
|
-
|
|
651
|
+
let buffer;
|
|
652
|
+
if (Buffer.isBuffer(input)) {
|
|
653
|
+
buffer = input;
|
|
654
|
+
}
|
|
655
|
+
else if (typeof input === "string") {
|
|
656
|
+
// Try to load from file path or data URI
|
|
657
|
+
if (input.startsWith("data:")) {
|
|
658
|
+
// Data URI
|
|
659
|
+
const match = input.match(/^data:([^;]+);base64,(.+)$/);
|
|
660
|
+
if (!match) {
|
|
661
|
+
return this.unknown();
|
|
662
|
+
}
|
|
663
|
+
buffer = Buffer.from(match[2], "base64");
|
|
664
|
+
}
|
|
665
|
+
else if (input.startsWith("http://") || input.startsWith("https://")) {
|
|
666
|
+
// URL - can't analyze without making HTTP request in ContentHeuristic
|
|
667
|
+
return this.unknown();
|
|
668
|
+
}
|
|
669
|
+
else {
|
|
670
|
+
// File path - try to load it
|
|
671
|
+
try {
|
|
672
|
+
buffer = await readFile(input);
|
|
673
|
+
}
|
|
674
|
+
catch {
|
|
675
|
+
return this.unknown();
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
}
|
|
679
|
+
else {
|
|
425
680
|
return this.unknown();
|
|
426
681
|
}
|
|
427
|
-
const sample =
|
|
682
|
+
const sample = buffer.toString("utf-8", 0, Math.min(2000, buffer.length));
|
|
683
|
+
// Check for JSON first (more specific than CSV)
|
|
684
|
+
if (this.looksLikeJSON(sample)) {
|
|
685
|
+
return this.result("text", "application/json", 75);
|
|
686
|
+
}
|
|
687
|
+
// Check CSV after JSON (CSV is more generic)
|
|
428
688
|
if (this.looksLikeCSV(sample)) {
|
|
429
689
|
return this.result("csv", "text/csv", 75);
|
|
430
690
|
}
|
|
691
|
+
// Check for XML/HTML
|
|
692
|
+
if (this.looksLikeXML(sample)) {
|
|
693
|
+
const isHTML = sample.includes("<!DOCTYPE html") || sample.includes("<html");
|
|
694
|
+
return this.result("text", isHTML ? "text/html" : "application/xml", 70);
|
|
695
|
+
}
|
|
696
|
+
// Check for YAML
|
|
697
|
+
if (this.looksLikeYAML(sample)) {
|
|
698
|
+
return this.result("text", "application/yaml", 70);
|
|
699
|
+
}
|
|
700
|
+
// Check for plain text (if mostly printable characters)
|
|
701
|
+
if (this.looksLikeText(sample)) {
|
|
702
|
+
return this.result("text", "text/plain", 60);
|
|
703
|
+
}
|
|
431
704
|
return this.unknown();
|
|
432
705
|
}
|
|
433
706
|
looksLikeCSV(text) {
|
|
434
|
-
const lines = text.split("\n")
|
|
707
|
+
const lines = text.trim().split("\n");
|
|
435
708
|
if (lines.length < 2) {
|
|
436
709
|
return false;
|
|
437
710
|
}
|
|
438
|
-
|
|
439
|
-
|
|
711
|
+
// Detect delimiter from first line
|
|
712
|
+
const firstLine = lines[0];
|
|
713
|
+
const delimiters = [",", ";", "\t", "|"];
|
|
714
|
+
const delimiter = delimiters.find((d) => firstLine.includes(d));
|
|
715
|
+
// Single-column CSV check (no delimiter)
|
|
716
|
+
if (!delimiter) {
|
|
717
|
+
// Exclude content that looks like other structured formats
|
|
718
|
+
// YAML indicators
|
|
719
|
+
if (text.startsWith("---") ||
|
|
720
|
+
/^[\s]*-\s+/m.test(text) ||
|
|
721
|
+
/^[\s]*[a-zA-Z_][a-zA-Z0-9_-]*:\s*/m.test(text)) {
|
|
722
|
+
return false;
|
|
723
|
+
}
|
|
724
|
+
// XML/HTML indicators
|
|
725
|
+
if (text.startsWith("<") || text.includes("<?xml")) {
|
|
726
|
+
return false;
|
|
727
|
+
}
|
|
728
|
+
// JSON indicators
|
|
729
|
+
if ((text.startsWith("{") && text.includes("}")) ||
|
|
730
|
+
(text.startsWith("[") && text.includes("]"))) {
|
|
731
|
+
return false;
|
|
732
|
+
}
|
|
733
|
+
// Exclude prose/sentences (look for sentence patterns)
|
|
734
|
+
// Check for multiple words per line (prose indicator)
|
|
735
|
+
const hasProsePattern = lines.some((line) => {
|
|
736
|
+
const words = line.trim().split(/\s+/);
|
|
737
|
+
return words.length > 4; // More than 4 words suggests prose, not data
|
|
738
|
+
});
|
|
739
|
+
if (hasProsePattern) {
|
|
740
|
+
return false;
|
|
741
|
+
}
|
|
742
|
+
// Check for consistent line structure (not binary, reasonable lengths)
|
|
743
|
+
const hasReasonableLengths = lines.every((l) => l.length > 0 && l.length < 1000);
|
|
744
|
+
const noBinaryChars = !text.includes("\0");
|
|
745
|
+
// Single-column CSVs should have VERY uniform line lengths
|
|
746
|
+
// (data values like IDs, codes, numbers - not varied content)
|
|
747
|
+
const lengths = lines.map((l) => l.length);
|
|
748
|
+
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
|
749
|
+
const variance = lengths.reduce((sum, len) => sum + Math.pow(len - avgLength, 2), 0) /
|
|
750
|
+
lengths.length;
|
|
751
|
+
const stdDev = Math.sqrt(variance);
|
|
752
|
+
// Single-column CSVs can contain varied data (names, cities, emails, etc.)
|
|
753
|
+
// but should still show some consistency compared to random text
|
|
754
|
+
const hasUniformLengths = stdDev / avgLength < 0.75;
|
|
755
|
+
return hasReasonableLengths && noBinaryChars && hasUniformLengths;
|
|
756
|
+
}
|
|
757
|
+
// Count delimiters per line and check consistency
|
|
758
|
+
const delimRegex = delimiter === "|" ? /\|/g : new RegExp(delimiter, "g");
|
|
759
|
+
const counts = lines.map((line) => (line.match(delimRegex) || []).length);
|
|
760
|
+
const firstCount = counts[0];
|
|
761
|
+
const consistentLines = counts.filter((c) => c === firstCount).length;
|
|
762
|
+
return consistentLines / lines.length >= 0.8;
|
|
763
|
+
}
|
|
764
|
+
looksLikeJSON(text) {
|
|
765
|
+
// hasJsonMarkers now does full validation including JSON.parse
|
|
766
|
+
return hasJsonMarkers(text);
|
|
767
|
+
}
|
|
768
|
+
looksLikeXML(text) {
|
|
769
|
+
const trimmed = text.trim();
|
|
770
|
+
// XML declaration is a definitive marker
|
|
771
|
+
if (trimmed.startsWith("<?xml")) {
|
|
772
|
+
return true;
|
|
773
|
+
}
|
|
774
|
+
// Check for HTML DOCTYPE or tags
|
|
775
|
+
if (trimmed.includes("<!DOCTYPE html") ||
|
|
776
|
+
trimmed.toLowerCase().includes("<html")) {
|
|
777
|
+
return true;
|
|
778
|
+
}
|
|
779
|
+
// Strict validation for arbitrary content starting with <:
|
|
780
|
+
// Must have proper tag structure with at least one closing tag
|
|
781
|
+
if (!trimmed.startsWith("<")) {
|
|
782
|
+
return false;
|
|
783
|
+
}
|
|
784
|
+
// Must have valid opening tag structure: <tagname followed by space or >
|
|
785
|
+
// Not just any < character
|
|
786
|
+
const hasValidOpeningTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?>/;
|
|
787
|
+
if (!hasValidOpeningTag.test(trimmed)) {
|
|
440
788
|
return false;
|
|
441
789
|
}
|
|
442
|
-
|
|
443
|
-
const
|
|
444
|
-
|
|
790
|
+
// Must have at least one closing tag or self-closing tag to be valid XML/HTML
|
|
791
|
+
const hasClosingTag = /<\/[a-zA-Z][a-zA-Z0-9-]*>/.test(trimmed);
|
|
792
|
+
const hasSelfClosingTag = /<[a-zA-Z][a-zA-Z0-9-]*(?:\s[^>]*)?\s*\/\s*>/.test(trimmed);
|
|
793
|
+
return hasClosingTag || hasSelfClosingTag;
|
|
794
|
+
}
|
|
795
|
+
looksLikeYAML(text) {
|
|
796
|
+
const trimmed = text.trim();
|
|
797
|
+
if (trimmed.length === 0) {
|
|
798
|
+
return false;
|
|
799
|
+
}
|
|
800
|
+
// For single-line content, be very conservative about YAML detection
|
|
801
|
+
const lines = trimmed.split("\n");
|
|
802
|
+
if (lines.length === 1) {
|
|
803
|
+
// Single line can only be YAML if it's a document marker
|
|
804
|
+
return trimmed === "---" || trimmed === "...";
|
|
805
|
+
}
|
|
806
|
+
// Collect YAML indicators (requires at least 2 for positive detection)
|
|
807
|
+
const indicators = [];
|
|
808
|
+
// Indicator 1: Document start marker (---)
|
|
809
|
+
indicators.push(trimmed.startsWith("---"));
|
|
810
|
+
// Indicator 2: Document end marker (...) or appears within content
|
|
811
|
+
indicators.push(/^\.\.\.$|[\n]\.\.\.$/.test(trimmed));
|
|
812
|
+
// Indicator 3: YAML list items (- followed by space at line start)
|
|
813
|
+
indicators.push(/^[\s]*-\s+[^-]/m.test(trimmed));
|
|
814
|
+
// Indicator 4: Multiple key-value pairs (at least 2)
|
|
815
|
+
// Allow hyphens and underscores in keys, support nested keys
|
|
816
|
+
const keyValuePattern = /^[\s]*[a-zA-Z_][a-zA-Z0-9_-]*:\s*(.+)$/;
|
|
817
|
+
const keyValueMatches = lines.filter((line) => keyValuePattern.test(line)).length;
|
|
818
|
+
indicators.push(keyValueMatches >= 2);
|
|
819
|
+
// Indicator 5: Nested indentation pattern (common in YAML objects/lists)
|
|
820
|
+
let hasNesting = false;
|
|
821
|
+
const sampleLines = lines.slice(0, 10);
|
|
822
|
+
for (let i = 0; i < sampleLines.length - 1; i++) {
|
|
823
|
+
const currentLine = sampleLines[i].trim();
|
|
824
|
+
const nextLine = sampleLines[i + 1];
|
|
825
|
+
if (currentLine.length > 0 &&
|
|
826
|
+
nextLine.length > 0 &&
|
|
827
|
+
/[:-]$/.test(currentLine)) {
|
|
828
|
+
const currentIndent = sampleLines[i].match(/^[\s]*/)?.[0].length ?? 0;
|
|
829
|
+
const nextIndent = nextLine.match(/^[\s]*/)?.[0].length ?? 0;
|
|
830
|
+
if (nextIndent > currentIndent) {
|
|
831
|
+
hasNesting = true;
|
|
832
|
+
break;
|
|
833
|
+
}
|
|
834
|
+
}
|
|
835
|
+
}
|
|
836
|
+
indicators.push(hasNesting);
|
|
837
|
+
// Indicator 6: YAML comments (# followed by space)
|
|
838
|
+
indicators.push(/^\s*#\s+/m.test(trimmed));
|
|
839
|
+
// Indicator 7: List continuation (multiple items with - )
|
|
840
|
+
const listItemCount = lines.filter((line) => /^[\s]*-[\s]/.test(line)).length;
|
|
841
|
+
indicators.push(listItemCount >= 2);
|
|
842
|
+
// Indicator 8: Inline maps or complex structures
|
|
843
|
+
indicators.push(/{\s*[a-zA-Z_]/.test(trimmed) || /\[.*\]/.test(trimmed));
|
|
844
|
+
// Require at least 2 indicators for confident YAML detection
|
|
845
|
+
const matchCount = indicators.filter(Boolean).length;
|
|
846
|
+
return matchCount >= 2;
|
|
847
|
+
}
|
|
848
|
+
looksLikeText(text) {
|
|
849
|
+
// Check if content has null bytes (binary indicator)
|
|
850
|
+
if (text.includes("\0")) {
|
|
851
|
+
return false;
|
|
852
|
+
}
|
|
853
|
+
// Count printable characters
|
|
854
|
+
let printable = 0;
|
|
855
|
+
for (let i = 0; i < text.length; i++) {
|
|
856
|
+
const code = text.charCodeAt(i);
|
|
857
|
+
if ((code >= 32 && code < 127) || // ASCII printable
|
|
858
|
+
code === 9 || // Tab
|
|
859
|
+
code === 10 || // Newline
|
|
860
|
+
code === 13 || // Carriage return
|
|
861
|
+
code > 127 // Unicode
|
|
862
|
+
) {
|
|
863
|
+
printable++;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
// At least 85% should be printable for text
|
|
867
|
+
return printable / text.length >= 0.85;
|
|
445
868
|
}
|
|
446
869
|
result(type, mime, confidence) {
|
|
447
870
|
return {
|
|
@@ -463,8 +463,12 @@ export async function buildMultimodalMessagesArray(options, provider, model) {
|
|
|
463
463
|
provider: provider,
|
|
464
464
|
});
|
|
465
465
|
if (Buffer.isBuffer(result.content)) {
|
|
466
|
-
pdfFiles.push({
|
|
467
|
-
|
|
466
|
+
pdfFiles.push({
|
|
467
|
+
buffer: result.content,
|
|
468
|
+
filename,
|
|
469
|
+
pageCount: result.metadata?.estimatedPages ?? null,
|
|
470
|
+
});
|
|
471
|
+
logger.info(`[PDF] ✅ Queued for multimodal: ${filename} (${result.metadata?.estimatedPages ?? "unknown"} pages)`);
|
|
468
472
|
}
|
|
469
473
|
}
|
|
470
474
|
catch (error) {
|
package/dist/utils/redis.js
CHANGED
|
@@ -91,14 +91,9 @@ export function serializeConversation(conversation) {
|
|
|
91
91
|
*/
|
|
92
92
|
export function deserializeConversation(data) {
|
|
93
93
|
if (!data) {
|
|
94
|
-
logger.debug("[redisUtils] No conversation data to deserialize, returning null");
|
|
95
94
|
return null;
|
|
96
95
|
}
|
|
97
96
|
try {
|
|
98
|
-
logger.debug("[redisUtils] Deserializing conversation", {
|
|
99
|
-
dataLength: data.length,
|
|
100
|
-
dataPreview: data.substring(0, 100) + (data.length > 100 ? "..." : ""),
|
|
101
|
-
});
|
|
102
97
|
// Parse as unknown first, then validate before casting
|
|
103
98
|
const parsedData = JSON.parse(data);
|
|
104
99
|
// Check if the parsed data is an object with required properties
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@juspay/neurolink",
|
|
3
|
-
"version": "8.
|
|
3
|
+
"version": "8.20.0",
|
|
4
4
|
"description": "Universal AI Development Platform with working MCP integration, multi-provider support, and professional CLI. Built-in tools operational, 58+ external MCP servers discoverable. Connect to filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major providers: OpenAI, Anthropic, Google AI, AWS Bedrock, Azure, Hugging Face, Ollama, and Mistral AI.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Juspay Technologies",
|