@juspay/neurolink 7.48.1 → 7.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +215 -16
- package/dist/agent/directTools.d.ts +55 -0
- package/dist/agent/directTools.js +266 -0
- package/dist/cli/factories/commandFactory.d.ts +2 -0
- package/dist/cli/factories/commandFactory.js +130 -16
- package/dist/cli/index.js +0 -0
- package/dist/cli/loop/conversationSelector.d.ts +45 -0
- package/dist/cli/loop/conversationSelector.js +222 -0
- package/dist/cli/loop/optionsSchema.d.ts +1 -1
- package/dist/cli/loop/session.d.ts +36 -8
- package/dist/cli/loop/session.js +257 -61
- package/dist/core/baseProvider.js +9 -2
- package/dist/core/evaluation.js +5 -2
- package/dist/factories/providerRegistry.js +2 -2
- package/dist/lib/agent/directTools.d.ts +55 -0
- package/dist/lib/agent/directTools.js +266 -0
- package/dist/lib/core/baseProvider.js +9 -2
- package/dist/lib/core/evaluation.js +5 -2
- package/dist/lib/factories/providerRegistry.js +2 -2
- package/dist/lib/mcp/factory.d.ts +2 -157
- package/dist/lib/mcp/flexibleToolValidator.d.ts +1 -5
- package/dist/lib/mcp/index.d.ts +3 -2
- package/dist/lib/mcp/mcpCircuitBreaker.d.ts +1 -75
- package/dist/lib/mcp/mcpClientFactory.d.ts +1 -20
- package/dist/lib/mcp/mcpClientFactory.js +1 -0
- package/dist/lib/mcp/registry.d.ts +3 -10
- package/dist/lib/mcp/servers/agent/directToolsServer.d.ts +1 -1
- package/dist/lib/mcp/servers/aiProviders/aiCoreServer.d.ts +1 -1
- package/dist/lib/mcp/servers/utilities/utilityServer.d.ts +1 -1
- package/dist/lib/mcp/toolDiscoveryService.d.ts +3 -84
- package/dist/lib/mcp/toolRegistry.d.ts +2 -24
- package/dist/lib/middleware/builtin/guardrails.d.ts +5 -16
- package/dist/lib/middleware/builtin/guardrails.js +44 -39
- package/dist/lib/middleware/utils/guardrailsUtils.d.ts +64 -0
- package/dist/lib/middleware/utils/guardrailsUtils.js +387 -0
- package/dist/lib/neurolink.d.ts +1 -1
- package/dist/lib/providers/anthropic.js +46 -3
- package/dist/lib/providers/azureOpenai.js +8 -2
- package/dist/lib/providers/googleAiStudio.js +8 -2
- package/dist/lib/providers/googleVertex.js +11 -2
- package/dist/lib/providers/huggingFace.js +1 -1
- package/dist/lib/providers/litellm.js +1 -1
- package/dist/lib/providers/mistral.js +1 -1
- package/dist/lib/providers/openAI.js +46 -3
- package/dist/lib/session/globalSessionState.d.ts +26 -0
- package/dist/lib/session/globalSessionState.js +49 -0
- package/dist/lib/types/cli.d.ts +28 -0
- package/dist/lib/types/content.d.ts +18 -5
- package/dist/lib/types/contextTypes.d.ts +1 -1
- package/dist/lib/types/conversation.d.ts +55 -4
- package/dist/lib/types/fileTypes.d.ts +65 -0
- package/dist/lib/types/fileTypes.js +4 -0
- package/dist/lib/types/generateTypes.d.ts +12 -0
- package/dist/lib/types/guardrails.d.ts +103 -0
- package/dist/lib/types/guardrails.js +1 -0
- package/dist/lib/types/index.d.ts +4 -2
- package/dist/lib/types/index.js +4 -0
- package/dist/lib/types/mcpTypes.d.ts +407 -14
- package/dist/lib/types/streamTypes.d.ts +7 -0
- package/dist/lib/types/tools.d.ts +132 -35
- package/dist/lib/utils/csvProcessor.d.ts +68 -0
- package/dist/lib/utils/csvProcessor.js +277 -0
- package/dist/lib/utils/fileDetector.d.ts +57 -0
- package/dist/lib/utils/fileDetector.js +457 -0
- package/dist/lib/utils/imageProcessor.d.ts +10 -0
- package/dist/lib/utils/imageProcessor.js +22 -0
- package/dist/lib/utils/loopUtils.d.ts +71 -0
- package/dist/lib/utils/loopUtils.js +262 -0
- package/dist/lib/utils/messageBuilder.d.ts +2 -1
- package/dist/lib/utils/messageBuilder.js +197 -2
- package/dist/lib/utils/optionsUtils.d.ts +1 -1
- package/dist/mcp/factory.d.ts +2 -157
- package/dist/mcp/flexibleToolValidator.d.ts +1 -5
- package/dist/mcp/index.d.ts +3 -2
- package/dist/mcp/mcpCircuitBreaker.d.ts +1 -75
- package/dist/mcp/mcpClientFactory.d.ts +1 -20
- package/dist/mcp/mcpClientFactory.js +1 -0
- package/dist/mcp/registry.d.ts +3 -10
- package/dist/mcp/servers/agent/directToolsServer.d.ts +1 -1
- package/dist/mcp/servers/aiProviders/aiCoreServer.d.ts +1 -1
- package/dist/mcp/servers/utilities/utilityServer.d.ts +1 -1
- package/dist/mcp/toolDiscoveryService.d.ts +3 -84
- package/dist/mcp/toolRegistry.d.ts +2 -24
- package/dist/middleware/builtin/guardrails.d.ts +5 -16
- package/dist/middleware/builtin/guardrails.js +44 -39
- package/dist/middleware/utils/guardrailsUtils.d.ts +64 -0
- package/dist/middleware/utils/guardrailsUtils.js +387 -0
- package/dist/neurolink.d.ts +1 -1
- package/dist/providers/anthropic.js +46 -3
- package/dist/providers/azureOpenai.js +8 -2
- package/dist/providers/googleAiStudio.js +8 -2
- package/dist/providers/googleVertex.js +11 -2
- package/dist/providers/huggingFace.js +1 -1
- package/dist/providers/litellm.js +1 -1
- package/dist/providers/mistral.js +1 -1
- package/dist/providers/openAI.js +46 -3
- package/dist/session/globalSessionState.d.ts +26 -0
- package/dist/session/globalSessionState.js +49 -0
- package/dist/types/cli.d.ts +28 -0
- package/dist/types/content.d.ts +18 -5
- package/dist/types/contextTypes.d.ts +1 -1
- package/dist/types/conversation.d.ts +55 -4
- package/dist/types/fileTypes.d.ts +65 -0
- package/dist/types/fileTypes.js +4 -0
- package/dist/types/generateTypes.d.ts +12 -0
- package/dist/types/guardrails.d.ts +103 -0
- package/dist/types/guardrails.js +1 -0
- package/dist/types/index.d.ts +4 -2
- package/dist/types/index.js +4 -0
- package/dist/types/mcpTypes.d.ts +407 -14
- package/dist/types/modelTypes.d.ts +6 -6
- package/dist/types/streamTypes.d.ts +7 -0
- package/dist/types/tools.d.ts +132 -35
- package/dist/utils/csvProcessor.d.ts +68 -0
- package/dist/utils/csvProcessor.js +277 -0
- package/dist/utils/fileDetector.d.ts +57 -0
- package/dist/utils/fileDetector.js +457 -0
- package/dist/utils/imageProcessor.d.ts +10 -0
- package/dist/utils/imageProcessor.js +22 -0
- package/dist/utils/loopUtils.d.ts +71 -0
- package/dist/utils/loopUtils.js +262 -0
- package/dist/utils/messageBuilder.d.ts +2 -1
- package/dist/utils/messageBuilder.js +197 -2
- package/dist/utils/optionsUtils.d.ts +1 -1
- package/package.json +9 -3
- package/dist/lib/mcp/contracts/mcpContract.d.ts +0 -106
- package/dist/lib/mcp/contracts/mcpContract.js +0 -5
- package/dist/mcp/contracts/mcpContract.d.ts +0 -106
- package/dist/mcp/contracts/mcpContract.js +0 -5
|
@@ -4,10 +4,10 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import type { Result, JsonValue, ErrorInfo } from "./common.js";
|
|
7
|
+
import type { StandardRecord, ZodUnknownSchema } from "./typeAliases.js";
|
|
7
8
|
/**
|
|
8
9
|
* Commonly used Zod schema type aliases for cleaner type declarations
|
|
9
10
|
*/
|
|
10
|
-
import type { ZodUnknownSchema } from "./typeAliases.js";
|
|
11
11
|
export type { ZodUnknownSchema } from "./typeAliases.js";
|
|
12
12
|
export type ZodAnySchema = z.ZodSchema<unknown>;
|
|
13
13
|
export type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
|
|
@@ -19,148 +19,245 @@ export type ToolParameterSchema = ZodUnknownSchema | Record<string, JsonValue>;
|
|
|
19
19
|
/**
|
|
20
20
|
* Standard tool input parameters
|
|
21
21
|
*/
|
|
22
|
-
export
|
|
22
|
+
export type BaseToolArgs = {
|
|
23
23
|
[key: string]: JsonValue | undefined;
|
|
24
|
-
}
|
|
24
|
+
};
|
|
25
25
|
/**
|
|
26
26
|
* Tool execution arguments with specific common patterns
|
|
27
27
|
*/
|
|
28
|
-
export
|
|
28
|
+
export type ToolArgs = BaseToolArgs & {
|
|
29
29
|
input?: JsonValue;
|
|
30
30
|
data?: JsonValue;
|
|
31
31
|
options?: JsonValue;
|
|
32
|
-
}
|
|
32
|
+
};
|
|
33
|
+
/**
|
|
34
|
+
* Generic execution context for MCP operations
|
|
35
|
+
* Moved from src/lib/mcp/contracts/mcpContract.ts
|
|
36
|
+
*/
|
|
37
|
+
export type ExecutionContext<T = StandardRecord> = {
|
|
38
|
+
sessionId?: string;
|
|
39
|
+
userId?: string;
|
|
40
|
+
config?: T;
|
|
41
|
+
metadata?: StandardRecord;
|
|
42
|
+
cacheOptions?: CacheOptions;
|
|
43
|
+
fallbackOptions?: FallbackOptions;
|
|
44
|
+
timeoutMs?: number;
|
|
45
|
+
startTime?: number;
|
|
46
|
+
};
|
|
47
|
+
/**
|
|
48
|
+
* Cache configuration options
|
|
49
|
+
* Moved from src/lib/mcp/contracts/mcpContract.ts
|
|
50
|
+
*/
|
|
51
|
+
export type CacheOptions = {
|
|
52
|
+
enabled?: boolean;
|
|
53
|
+
ttlMs?: number;
|
|
54
|
+
strategy?: "memory" | "writeThrough" | "cacheAside";
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Fallback configuration options
|
|
58
|
+
* Moved from src/lib/mcp/contracts/mcpContract.ts
|
|
59
|
+
*/
|
|
60
|
+
export type FallbackOptions = {
|
|
61
|
+
enabled?: boolean;
|
|
62
|
+
maxAttempts?: number;
|
|
63
|
+
delayMs?: number;
|
|
64
|
+
circuitBreaker?: boolean;
|
|
65
|
+
};
|
|
66
|
+
/**
|
|
67
|
+
* Tool information with extensibility
|
|
68
|
+
* Moved from src/lib/mcp/contracts/mcpContract.ts
|
|
69
|
+
*/
|
|
70
|
+
export type ToolInfo = {
|
|
71
|
+
name: string;
|
|
72
|
+
description?: string;
|
|
73
|
+
category?: string;
|
|
74
|
+
serverId?: string;
|
|
75
|
+
inputSchema?: StandardRecord;
|
|
76
|
+
outputSchema?: StandardRecord;
|
|
77
|
+
[key: string]: unknown;
|
|
78
|
+
};
|
|
79
|
+
/**
|
|
80
|
+
* Tool Implementation type for MCP tool registry
|
|
81
|
+
* Extracted from toolRegistry.ts for centralized type management
|
|
82
|
+
*/
|
|
83
|
+
export type ToolImplementation = {
|
|
84
|
+
execute: (params: unknown, context?: ExecutionContext) => Promise<unknown> | unknown;
|
|
85
|
+
description?: string;
|
|
86
|
+
inputSchema?: unknown;
|
|
87
|
+
outputSchema?: unknown;
|
|
88
|
+
category?: string;
|
|
89
|
+
permissions?: string[];
|
|
90
|
+
};
|
|
91
|
+
/**
|
|
92
|
+
* Tool execution options for enhanced control
|
|
93
|
+
* Extracted from toolRegistry.ts for centralized type management
|
|
94
|
+
*/
|
|
95
|
+
export type ToolExecutionOptions = {
|
|
96
|
+
timeout?: number;
|
|
97
|
+
retries?: number;
|
|
98
|
+
context?: unknown;
|
|
99
|
+
preferredSource?: string;
|
|
100
|
+
fallbackEnabled?: boolean;
|
|
101
|
+
validateBeforeExecution?: boolean;
|
|
102
|
+
timeoutMs?: number;
|
|
103
|
+
};
|
|
104
|
+
/**
|
|
105
|
+
* Tool execution result
|
|
106
|
+
* Moved from src/lib/mcp/contracts/mcpContract.ts
|
|
107
|
+
*/
|
|
108
|
+
export type ToolExecutionResult<T = unknown> = {
|
|
109
|
+
result: T;
|
|
110
|
+
context?: ExecutionContext;
|
|
111
|
+
performance?: {
|
|
112
|
+
duration: number;
|
|
113
|
+
tokensUsed?: number;
|
|
114
|
+
cost?: number;
|
|
115
|
+
};
|
|
116
|
+
validation?: ValidationResult;
|
|
117
|
+
cached?: boolean;
|
|
118
|
+
fallback?: boolean;
|
|
119
|
+
};
|
|
120
|
+
/**
|
|
121
|
+
* Validation result for runtime checks
|
|
122
|
+
* Moved from src/lib/mcp/contracts/mcpContract.ts
|
|
123
|
+
*/
|
|
124
|
+
export type ValidationResult = {
|
|
125
|
+
valid: boolean;
|
|
126
|
+
missing: string[];
|
|
127
|
+
warnings: string[];
|
|
128
|
+
recommendations: string[];
|
|
129
|
+
};
|
|
33
130
|
/**
|
|
34
131
|
* Tool execution metadata
|
|
35
132
|
*/
|
|
36
|
-
export
|
|
133
|
+
export type ToolExecutionMetadata = {
|
|
37
134
|
requestId?: string;
|
|
38
135
|
startTime?: number;
|
|
39
136
|
version?: string;
|
|
40
137
|
[key: string]: JsonValue | undefined;
|
|
41
|
-
}
|
|
138
|
+
};
|
|
42
139
|
/**
|
|
43
140
|
* Tool execution context
|
|
44
141
|
*/
|
|
45
|
-
export
|
|
142
|
+
export type ToolContext = {
|
|
46
143
|
sessionId?: string;
|
|
47
144
|
userId?: string;
|
|
48
145
|
aiProvider?: string;
|
|
49
146
|
metadata?: ToolExecutionMetadata;
|
|
50
|
-
}
|
|
147
|
+
};
|
|
51
148
|
/**
|
|
52
149
|
* Tool execution result metadata
|
|
53
150
|
*/
|
|
54
|
-
export
|
|
151
|
+
export type ToolResultMetadata = {
|
|
55
152
|
toolName?: string;
|
|
56
153
|
executionTime?: number;
|
|
57
154
|
timestamp?: number;
|
|
58
155
|
source?: string;
|
|
59
156
|
version?: string;
|
|
60
157
|
serverId?: string;
|
|
61
|
-
}
|
|
158
|
+
};
|
|
62
159
|
/**
|
|
63
160
|
* Tool execution result
|
|
64
161
|
*/
|
|
65
|
-
export
|
|
162
|
+
export type ToolResult<T = JsonValue> = Result<T, ErrorInfo> & {
|
|
66
163
|
success: boolean;
|
|
67
164
|
data?: T;
|
|
68
165
|
error?: ErrorInfo;
|
|
69
166
|
metadata?: ToolResultMetadata;
|
|
70
|
-
}
|
|
167
|
+
};
|
|
71
168
|
/**
|
|
72
169
|
* Tool metadata for registration
|
|
73
170
|
*/
|
|
74
|
-
export
|
|
171
|
+
export type ToolMetadata = {
|
|
75
172
|
category?: string;
|
|
76
173
|
version?: string;
|
|
77
174
|
author?: string;
|
|
78
175
|
tags?: string[];
|
|
79
176
|
documentation?: string;
|
|
80
177
|
[key: string]: JsonValue | undefined;
|
|
81
|
-
}
|
|
178
|
+
};
|
|
82
179
|
/**
|
|
83
|
-
* Tool definition
|
|
180
|
+
* Tool definition type
|
|
84
181
|
*/
|
|
85
|
-
export
|
|
182
|
+
export type ToolDefinition<TArgs = ToolArgs, TResult = JsonValue> = {
|
|
86
183
|
description: string;
|
|
87
184
|
parameters?: ToolParameterSchema;
|
|
88
185
|
metadata?: ToolMetadata;
|
|
89
186
|
execute: (params: TArgs, context?: ToolContext) => Promise<ToolResult<TResult>> | ToolResult<TResult>;
|
|
90
|
-
}
|
|
187
|
+
};
|
|
91
188
|
/**
|
|
92
|
-
* Simple tool
|
|
189
|
+
* Simple tool type (for SDK)
|
|
93
190
|
*/
|
|
94
|
-
export
|
|
191
|
+
export type SimpleTool<TArgs = ToolArgs, TResult = JsonValue> = {
|
|
95
192
|
description: string;
|
|
96
193
|
parameters?: ZodUnknownSchema;
|
|
97
194
|
metadata?: ToolMetadata;
|
|
98
195
|
execute: (params: TArgs, context?: ToolContext) => Promise<TResult>;
|
|
99
|
-
}
|
|
196
|
+
};
|
|
100
197
|
/**
|
|
101
198
|
* Tool registry entry
|
|
102
199
|
*/
|
|
103
|
-
export
|
|
200
|
+
export type ToolRegistryEntry = {
|
|
104
201
|
name: string;
|
|
105
202
|
description: string;
|
|
106
203
|
serverId?: string;
|
|
107
204
|
isImplemented?: boolean;
|
|
108
205
|
parameters?: ToolParameterSchema;
|
|
109
206
|
execute?: ToolDefinition["execute"];
|
|
110
|
-
}
|
|
207
|
+
};
|
|
111
208
|
/**
|
|
112
209
|
* Tool execution information
|
|
113
210
|
*/
|
|
114
|
-
export
|
|
211
|
+
export type ToolExecution = {
|
|
115
212
|
toolName: string;
|
|
116
213
|
params: ToolArgs;
|
|
117
214
|
result: ToolResult;
|
|
118
215
|
executionTime: number;
|
|
119
216
|
timestamp: number;
|
|
120
|
-
}
|
|
217
|
+
};
|
|
121
218
|
/**
|
|
122
219
|
* Available tool information
|
|
123
220
|
*/
|
|
124
|
-
export
|
|
221
|
+
export type AvailableTool = {
|
|
125
222
|
name: string;
|
|
126
223
|
description: string;
|
|
127
224
|
serverId?: string;
|
|
128
225
|
toolName?: string;
|
|
129
226
|
parameters?: ToolParameterSchema;
|
|
130
|
-
}
|
|
227
|
+
};
|
|
131
228
|
/**
|
|
132
229
|
* Tool validation options
|
|
133
230
|
*/
|
|
134
|
-
export
|
|
231
|
+
export type ToolValidationOptions = {
|
|
135
232
|
customValidator?: (toolName: string, params: ToolArgs) => boolean | Promise<boolean>;
|
|
136
233
|
validateSchema?: boolean;
|
|
137
234
|
allowUnknownProperties?: boolean;
|
|
138
|
-
}
|
|
235
|
+
};
|
|
139
236
|
/**
|
|
140
237
|
* Tool call information (for AI SDK integration)
|
|
141
238
|
*/
|
|
142
|
-
export
|
|
239
|
+
export type ToolCall = {
|
|
143
240
|
toolName: string;
|
|
144
241
|
parameters: ToolArgs;
|
|
145
242
|
id?: string;
|
|
146
|
-
}
|
|
243
|
+
};
|
|
147
244
|
/**
|
|
148
245
|
* AI SDK Tool Call format (from Vercel AI SDK)
|
|
149
246
|
*/
|
|
150
|
-
export
|
|
247
|
+
export type AiSdkToolCall = {
|
|
151
248
|
type: "tool-call";
|
|
152
249
|
toolCallId: string;
|
|
153
250
|
toolName: string;
|
|
154
251
|
params: ToolArgs;
|
|
155
|
-
}
|
|
252
|
+
};
|
|
156
253
|
/**
|
|
157
254
|
* Tool call result (for AI SDK integration)
|
|
158
255
|
*/
|
|
159
|
-
export
|
|
256
|
+
export type ToolCallResult = {
|
|
160
257
|
id?: string;
|
|
161
258
|
result: ToolResult;
|
|
162
259
|
formattedForAI: string;
|
|
163
|
-
}
|
|
260
|
+
};
|
|
164
261
|
/**
|
|
165
262
|
* Type guard for tool result
|
|
166
263
|
*/
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Processing Utility
|
|
3
|
+
* Converts CSV files to LLM-friendly text formats
|
|
4
|
+
* Uses streaming for memory efficiency with large files
|
|
5
|
+
*/
|
|
6
|
+
import type { FileProcessingResult, CSVProcessorOptions } from "../types/fileTypes.js";
|
|
7
|
+
/**
|
|
8
|
+
* CSV processor for converting CSV data to LLM-optimized formats
|
|
9
|
+
*
|
|
10
|
+
* Supports three output formats:
|
|
11
|
+
* - raw: Original CSV format with proper escaping (RECOMMENDED for best LLM performance)
|
|
12
|
+
* - json: JSON array format (best for structured data processing)
|
|
13
|
+
* - markdown: Markdown table format (best for small datasets <100 rows)
|
|
14
|
+
*
|
|
15
|
+
* All formats use csv-parser for reliable parsing, then convert to the target format.
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```typescript
|
|
19
|
+
* const csvBuffer = Buffer.from('name,age\nAlice,30\nBob,25');
|
|
20
|
+
* const result = await CSVProcessor.process(csvBuffer, {
|
|
21
|
+
* maxRows: 1000,
|
|
22
|
+
* formatStyle: 'raw'
|
|
23
|
+
* });
|
|
24
|
+
* console.log(result.content); // CSV string with proper escaping
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export declare class CSVProcessor {
|
|
28
|
+
/**
|
|
29
|
+
* Process CSV Buffer to LLM-friendly format
|
|
30
|
+
* Content already loaded by FileDetector
|
|
31
|
+
*
|
|
32
|
+
* @param content - CSV file as Buffer
|
|
33
|
+
* @param options - Processing options
|
|
34
|
+
* @returns Formatted CSV data ready for LLM (JSON or Markdown)
|
|
35
|
+
*/
|
|
36
|
+
static process(content: Buffer, options?: CSVProcessorOptions): Promise<FileProcessingResult>;
|
|
37
|
+
/**
|
|
38
|
+
* Parse CSV string into array of row objects using streaming
|
|
39
|
+
* Memory-efficient for large files
|
|
40
|
+
*/
|
|
41
|
+
/**
|
|
42
|
+
* Parse CSV file from disk using streaming (memory efficient)
|
|
43
|
+
*
|
|
44
|
+
* @param filePath - Path to CSV file
|
|
45
|
+
* @param maxRows - Maximum rows to parse (default: 1000)
|
|
46
|
+
* @returns Array of row objects
|
|
47
|
+
*/
|
|
48
|
+
static parseCSVFile(filePath: string, maxRows?: number): Promise<unknown[]>;
|
|
49
|
+
/**
|
|
50
|
+
* Parse CSV string to array of row objects
|
|
51
|
+
* Exposed for use by tools that need direct CSV parsing
|
|
52
|
+
*
|
|
53
|
+
* @param csvString - CSV data as string
|
|
54
|
+
* @param maxRows - Maximum rows to parse (default: 1000)
|
|
55
|
+
* @returns Array of row objects
|
|
56
|
+
*/
|
|
57
|
+
static parseCSVString(csvString: string, maxRows?: number): Promise<unknown[]>;
|
|
58
|
+
/**
|
|
59
|
+
* Format parsed CSV data for LLM consumption
|
|
60
|
+
* Only used for JSON and Markdown formats (raw format handled separately)
|
|
61
|
+
*/
|
|
62
|
+
private static formatForLLM;
|
|
63
|
+
/**
|
|
64
|
+
* Format as markdown table
|
|
65
|
+
* Best for small datasets (<100 rows)
|
|
66
|
+
*/
|
|
67
|
+
private static toMarkdownTable;
|
|
68
|
+
}
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSV Processing Utility
|
|
3
|
+
* Converts CSV files to LLM-friendly text formats
|
|
4
|
+
* Uses streaming for memory efficiency with large files
|
|
5
|
+
*/
|
|
6
|
+
import csvParser from "csv-parser";
|
|
7
|
+
import { Readable } from "stream";
|
|
8
|
+
import { logger } from "./logger.js";
|
|
9
|
+
/**
|
|
10
|
+
* Detect if first line is CSV metadata (not actual data/headers)
|
|
11
|
+
* Common patterns:
|
|
12
|
+
* - Excel separator line: "SEP=,"
|
|
13
|
+
* - Lines with significantly different delimiter count than line 2
|
|
14
|
+
* - Lines that don't match CSV structure of subsequent lines
|
|
15
|
+
*/
|
|
16
|
+
function isMetadataLine(lines) {
|
|
17
|
+
if (!lines[0] || lines.length < 2) {
|
|
18
|
+
return false;
|
|
19
|
+
}
|
|
20
|
+
const firstLine = lines[0].trim();
|
|
21
|
+
const secondLine = lines[1].trim();
|
|
22
|
+
if (firstLine.match(/^sep=/i)) {
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
const firstCommaCount = (firstLine.match(/,/g) || []).length;
|
|
26
|
+
const secondCommaCount = (secondLine.match(/,/g) || []).length;
|
|
27
|
+
if (firstCommaCount === 0 && secondCommaCount > 0) {
|
|
28
|
+
return true;
|
|
29
|
+
}
|
|
30
|
+
if (secondCommaCount > 0 && firstCommaCount !== secondCommaCount) {
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* CSV processor for converting CSV data to LLM-optimized formats
|
|
37
|
+
*
|
|
38
|
+
* Supports three output formats:
|
|
39
|
+
* - raw: Original CSV format with proper escaping (RECOMMENDED for best LLM performance)
|
|
40
|
+
* - json: JSON array format (best for structured data processing)
|
|
41
|
+
* - markdown: Markdown table format (best for small datasets <100 rows)
|
|
42
|
+
*
|
|
43
|
+
* All formats use csv-parser for reliable parsing, then convert to the target format.
|
|
44
|
+
*
|
|
45
|
+
* @example
|
|
46
|
+
* ```typescript
|
|
47
|
+
* const csvBuffer = Buffer.from('name,age\nAlice,30\nBob,25');
|
|
48
|
+
* const result = await CSVProcessor.process(csvBuffer, {
|
|
49
|
+
* maxRows: 1000,
|
|
50
|
+
* formatStyle: 'raw'
|
|
51
|
+
* });
|
|
52
|
+
* console.log(result.content); // CSV string with proper escaping
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export class CSVProcessor {
|
|
56
|
+
/**
|
|
57
|
+
* Process CSV Buffer to LLM-friendly format
|
|
58
|
+
* Content already loaded by FileDetector
|
|
59
|
+
*
|
|
60
|
+
* @param content - CSV file as Buffer
|
|
61
|
+
* @param options - Processing options
|
|
62
|
+
* @returns Formatted CSV data ready for LLM (JSON or Markdown)
|
|
63
|
+
*/
|
|
64
|
+
static async process(content, options) {
|
|
65
|
+
const { maxRows: rawMaxRows = 1000, formatStyle = "raw", includeHeaders = true, } = options || {};
|
|
66
|
+
const maxRows = Math.max(1, Math.min(10000, rawMaxRows));
|
|
67
|
+
const csvString = content.toString("utf-8");
|
|
68
|
+
// For raw format, return original CSV with row limit (no parsing needed)
|
|
69
|
+
// This preserves the exact original format which works best for LLMs
|
|
70
|
+
if (formatStyle === "raw") {
|
|
71
|
+
const lines = csvString.split("\n");
|
|
72
|
+
const hasMetadataLine = isMetadataLine(lines);
|
|
73
|
+
// Skip metadata line if present, then take header + maxRows data rows
|
|
74
|
+
const csvLines = hasMetadataLine
|
|
75
|
+
? lines.slice(1) // Skip metadata line
|
|
76
|
+
: lines;
|
|
77
|
+
const limitedLines = csvLines.slice(0, 1 + maxRows); // header + data rows
|
|
78
|
+
const limitedCSV = limitedLines.join("\n");
|
|
79
|
+
const rowCount = limitedLines.length - 1; // Subtract header
|
|
80
|
+
const originalRowCount = csvLines.length - 1; // Subtract header from original
|
|
81
|
+
logger.debug(`[CSVProcessor] raw format: ${rowCount} rows (original: ${originalRowCount}) → ${limitedCSV.length} chars`, {
|
|
82
|
+
formatStyle: "raw",
|
|
83
|
+
originalSize: csvString.length,
|
|
84
|
+
limitedSize: limitedCSV.length,
|
|
85
|
+
});
|
|
86
|
+
return {
|
|
87
|
+
type: "csv",
|
|
88
|
+
content: limitedCSV,
|
|
89
|
+
mimeType: "text/csv",
|
|
90
|
+
metadata: {
|
|
91
|
+
confidence: 100,
|
|
92
|
+
size: content.length,
|
|
93
|
+
rowCount,
|
|
94
|
+
columnCount: (limitedLines[0] || "").split(",").length,
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
// Parse CSV for JSON and Markdown formats only
|
|
99
|
+
const rows = await this.parseCSVString(csvString, maxRows);
|
|
100
|
+
// Extract metadata from parsed results
|
|
101
|
+
const rowCount = rows.length;
|
|
102
|
+
const columnNames = rows.length > 0 ? Object.keys(rows[0]) : [];
|
|
103
|
+
const columnCount = columnNames.length;
|
|
104
|
+
const hasEmptyColumns = columnNames.some((col) => !col || col.trim() === "");
|
|
105
|
+
const sampleRows = rows.slice(0, 3);
|
|
106
|
+
const sampleData = sampleRows.length > 0
|
|
107
|
+
? JSON.stringify(sampleRows, null, 2)
|
|
108
|
+
: "No data rows";
|
|
109
|
+
// Format parsed data
|
|
110
|
+
const formatted = this.formatForLLM(rows, formatStyle, includeHeaders);
|
|
111
|
+
logger.info(`[CSVProcessor] ${formatStyle} format: ${rowCount} rows × ${columnCount} columns → ${formatted.length} chars`, { rowCount, columnCount, columns: columnNames, hasEmptyColumns });
|
|
112
|
+
return {
|
|
113
|
+
type: "csv",
|
|
114
|
+
content: formatted,
|
|
115
|
+
mimeType: "text/csv",
|
|
116
|
+
metadata: {
|
|
117
|
+
confidence: 100,
|
|
118
|
+
size: content.length,
|
|
119
|
+
rowCount,
|
|
120
|
+
columnCount,
|
|
121
|
+
columnNames,
|
|
122
|
+
sampleData,
|
|
123
|
+
hasEmptyColumns,
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Parse CSV string into array of row objects using streaming
|
|
129
|
+
* Memory-efficient for large files
|
|
130
|
+
*/
|
|
131
|
+
/**
|
|
132
|
+
* Parse CSV file from disk using streaming (memory efficient)
|
|
133
|
+
*
|
|
134
|
+
* @param filePath - Path to CSV file
|
|
135
|
+
* @param maxRows - Maximum rows to parse (default: 1000)
|
|
136
|
+
* @returns Array of row objects
|
|
137
|
+
*/
|
|
138
|
+
static async parseCSVFile(filePath, maxRows = 1000) {
|
|
139
|
+
const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
|
|
140
|
+
const fs = await import("fs");
|
|
141
|
+
// Read first 2 lines to detect metadata
|
|
142
|
+
const fileHandle = await fs.promises.open(filePath, "r");
|
|
143
|
+
const firstLines = [];
|
|
144
|
+
const lineReader = fileHandle.createReadStream({ encoding: "utf-8" });
|
|
145
|
+
await new Promise((resolve) => {
|
|
146
|
+
let buffer = "";
|
|
147
|
+
lineReader.on("data", (chunk) => {
|
|
148
|
+
buffer += chunk.toString();
|
|
149
|
+
const lines = buffer.split("\n");
|
|
150
|
+
if (lines.length >= 2) {
|
|
151
|
+
firstLines.push(lines[0], lines[1]);
|
|
152
|
+
lineReader.destroy();
|
|
153
|
+
resolve();
|
|
154
|
+
}
|
|
155
|
+
});
|
|
156
|
+
lineReader.on("end", () => resolve());
|
|
157
|
+
});
|
|
158
|
+
await fileHandle.close();
|
|
159
|
+
const hasMetadataLine = isMetadataLine(firstLines);
|
|
160
|
+
const skipLines = hasMetadataLine ? 1 : 0;
|
|
161
|
+
return new Promise((resolve, reject) => {
|
|
162
|
+
const rows = [];
|
|
163
|
+
let count = 0;
|
|
164
|
+
let lineCount = 0;
|
|
165
|
+
const source = fs.createReadStream(filePath, { encoding: "utf-8" });
|
|
166
|
+
const parser = csvParser();
|
|
167
|
+
const abort = () => {
|
|
168
|
+
source.destroy();
|
|
169
|
+
parser.destroy();
|
|
170
|
+
};
|
|
171
|
+
source
|
|
172
|
+
.pipe(parser)
|
|
173
|
+
.on("data", (row) => {
|
|
174
|
+
lineCount++;
|
|
175
|
+
if (lineCount <= skipLines) {
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
rows.push(row);
|
|
179
|
+
count++;
|
|
180
|
+
if (count >= clampedMaxRows) {
|
|
181
|
+
logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
|
|
182
|
+
abort();
|
|
183
|
+
resolve(rows);
|
|
184
|
+
}
|
|
185
|
+
})
|
|
186
|
+
.on("end", () => {
|
|
187
|
+
resolve(rows);
|
|
188
|
+
})
|
|
189
|
+
.on("error", (error) => {
|
|
190
|
+
logger.error("[CSVProcessor] File parsing failed:", error);
|
|
191
|
+
reject(error);
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Parse CSV string to array of row objects
|
|
197
|
+
* Exposed for use by tools that need direct CSV parsing
|
|
198
|
+
*
|
|
199
|
+
* @param csvString - CSV data as string
|
|
200
|
+
* @param maxRows - Maximum rows to parse (default: 1000)
|
|
201
|
+
* @returns Array of row objects
|
|
202
|
+
*/
|
|
203
|
+
static async parseCSVString(csvString, maxRows = 1000) {
|
|
204
|
+
const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
|
|
205
|
+
// Detect and skip metadata line
|
|
206
|
+
const lines = csvString.split("\n");
|
|
207
|
+
const hasMetadataLine = isMetadataLine(lines);
|
|
208
|
+
const csvData = hasMetadataLine ? lines.slice(1).join("\n") : csvString;
|
|
209
|
+
return new Promise((resolve, reject) => {
|
|
210
|
+
const rows = [];
|
|
211
|
+
let count = 0;
|
|
212
|
+
const source = Readable.from([csvData]);
|
|
213
|
+
const parser = csvParser();
|
|
214
|
+
const abort = () => {
|
|
215
|
+
source.destroy();
|
|
216
|
+
parser.destroy();
|
|
217
|
+
};
|
|
218
|
+
source
|
|
219
|
+
.pipe(parser)
|
|
220
|
+
.on("data", (row) => {
|
|
221
|
+
rows.push(row);
|
|
222
|
+
count++;
|
|
223
|
+
if (count >= clampedMaxRows) {
|
|
224
|
+
logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
|
|
225
|
+
abort();
|
|
226
|
+
resolve(rows);
|
|
227
|
+
}
|
|
228
|
+
})
|
|
229
|
+
.on("end", () => {
|
|
230
|
+
resolve(rows);
|
|
231
|
+
})
|
|
232
|
+
.on("error", (error) => {
|
|
233
|
+
logger.error("[CSVProcessor] Parsing failed:", error);
|
|
234
|
+
reject(error);
|
|
235
|
+
});
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Format parsed CSV data for LLM consumption
|
|
240
|
+
* Only used for JSON and Markdown formats (raw format handled separately)
|
|
241
|
+
*/
|
|
242
|
+
static formatForLLM(rows, formatStyle, includeHeaders) {
|
|
243
|
+
if (rows.length === 0) {
|
|
244
|
+
return "CSV file is empty or contains no data.";
|
|
245
|
+
}
|
|
246
|
+
if (formatStyle === "json") {
|
|
247
|
+
return JSON.stringify(rows, null, 2);
|
|
248
|
+
}
|
|
249
|
+
return this.toMarkdownTable(rows, includeHeaders);
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Format as markdown table
|
|
253
|
+
* Best for small datasets (<100 rows)
|
|
254
|
+
*/
|
|
255
|
+
static toMarkdownTable(rows, includeHeaders) {
|
|
256
|
+
if (rows.length === 0) {
|
|
257
|
+
return "CSV file is empty or contains no data.";
|
|
258
|
+
}
|
|
259
|
+
const headers = Object.keys(rows[0]);
|
|
260
|
+
// Escape backslashes, pipes, and sanitize newlines to keep rows intact
|
|
261
|
+
const escapePipe = (str) => str.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
|
|
262
|
+
let markdown = "";
|
|
263
|
+
if (includeHeaders) {
|
|
264
|
+
markdown = "| " + headers.map(escapePipe).join(" | ") + " |\n";
|
|
265
|
+
markdown += "|" + headers.map(() => " --- ").join("|") + "|\n";
|
|
266
|
+
}
|
|
267
|
+
rows.forEach((row) => {
|
|
268
|
+
markdown +=
|
|
269
|
+
"| " +
|
|
270
|
+
headers
|
|
271
|
+
.map((h) => escapePipe(String(row[h] || "")))
|
|
272
|
+
.join(" | ") +
|
|
273
|
+
" |\n";
|
|
274
|
+
});
|
|
275
|
+
return markdown;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* File Type Detection Utility
|
|
3
|
+
* Centralized file detection for all multimodal file types
|
|
4
|
+
* Uses multi-strategy approach for reliable type identification
|
|
5
|
+
*/
|
|
6
|
+
import type { FileInput, FileProcessingResult, FileDetectorOptions } from "../types/fileTypes.js";
|
|
7
|
+
/**
|
|
8
|
+
* Centralized file type detection and processing
|
|
9
|
+
*
|
|
10
|
+
* @example
|
|
11
|
+
* ```typescript
|
|
12
|
+
* // Auto-detect and process any file
|
|
13
|
+
* const result = await FileDetector.detectAndProcess("data.csv");
|
|
14
|
+
* console.log(result.type); // 'csv'
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
export declare class FileDetector {
|
|
18
|
+
/**
|
|
19
|
+
* Auto-detect file type and process in one call
|
|
20
|
+
*
|
|
21
|
+
* Runs detection strategies in priority order:
|
|
22
|
+
* 1. MagicBytesStrategy (95% confidence) - Binary file headers
|
|
23
|
+
* 2. MimeTypeStrategy (85% confidence) - HTTP Content-Type for URLs
|
|
24
|
+
* 3. ExtensionStrategy (70% confidence) - File extension
|
|
25
|
+
* 4. ContentHeuristicStrategy (75% confidence) - Content analysis
|
|
26
|
+
*
|
|
27
|
+
* @param input - File path, URL, Buffer, or data URI
|
|
28
|
+
* @param options - Detection and processing options
|
|
29
|
+
* @returns Processed file result with type and content
|
|
30
|
+
*/
|
|
31
|
+
static detectAndProcess(input: FileInput, options?: FileDetectorOptions): Promise<FileProcessingResult>;
|
|
32
|
+
/**
|
|
33
|
+
* Detect file type using multi-strategy approach
|
|
34
|
+
* Stops at first strategy with confidence >= threshold (default: 80%)
|
|
35
|
+
*/
|
|
36
|
+
private static detect;
|
|
37
|
+
/**
|
|
38
|
+
* Load file content from various sources
|
|
39
|
+
*/
|
|
40
|
+
private static loadContent;
|
|
41
|
+
/**
|
|
42
|
+
* Route to appropriate processor
|
|
43
|
+
*/
|
|
44
|
+
private static processFile;
|
|
45
|
+
/**
|
|
46
|
+
* Load file from URL
|
|
47
|
+
*/
|
|
48
|
+
private static loadFromURL;
|
|
49
|
+
/**
|
|
50
|
+
* Load file from filesystem path
|
|
51
|
+
*/
|
|
52
|
+
private static loadFromPath;
|
|
53
|
+
/**
|
|
54
|
+
* Load file from data URI
|
|
55
|
+
*/
|
|
56
|
+
private static loadFromDataURI;
|
|
57
|
+
}
|