@juspay/neurolink 7.48.0 → 7.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/CHANGELOG.md +15 -0
  2. package/README.md +177 -784
  3. package/dist/agent/directTools.d.ts +55 -0
  4. package/dist/agent/directTools.js +266 -0
  5. package/dist/cli/factories/commandFactory.d.ts +2 -0
  6. package/dist/cli/factories/commandFactory.js +130 -16
  7. package/dist/cli/index.js +0 -0
  8. package/dist/cli/loop/conversationSelector.d.ts +45 -0
  9. package/dist/cli/loop/conversationSelector.js +222 -0
  10. package/dist/cli/loop/optionsSchema.d.ts +1 -1
  11. package/dist/cli/loop/session.d.ts +36 -8
  12. package/dist/cli/loop/session.js +257 -61
  13. package/dist/core/baseProvider.js +9 -2
  14. package/dist/core/evaluation.js +5 -2
  15. package/dist/factories/providerRegistry.js +2 -2
  16. package/dist/lib/agent/directTools.d.ts +55 -0
  17. package/dist/lib/agent/directTools.js +266 -0
  18. package/dist/lib/core/baseProvider.js +9 -2
  19. package/dist/lib/core/evaluation.js +5 -2
  20. package/dist/lib/factories/providerRegistry.js +2 -2
  21. package/dist/lib/mcp/factory.d.ts +2 -157
  22. package/dist/lib/mcp/flexibleToolValidator.d.ts +1 -5
  23. package/dist/lib/mcp/index.d.ts +3 -2
  24. package/dist/lib/mcp/mcpCircuitBreaker.d.ts +1 -75
  25. package/dist/lib/mcp/mcpClientFactory.d.ts +1 -20
  26. package/dist/lib/mcp/mcpClientFactory.js +1 -0
  27. package/dist/lib/mcp/registry.d.ts +3 -10
  28. package/dist/lib/mcp/servers/agent/directToolsServer.d.ts +1 -1
  29. package/dist/lib/mcp/servers/aiProviders/aiCoreServer.d.ts +1 -1
  30. package/dist/lib/mcp/servers/utilities/utilityServer.d.ts +1 -1
  31. package/dist/lib/mcp/toolDiscoveryService.d.ts +3 -84
  32. package/dist/lib/mcp/toolRegistry.d.ts +2 -24
  33. package/dist/lib/middleware/builtin/guardrails.d.ts +5 -16
  34. package/dist/lib/middleware/builtin/guardrails.js +44 -39
  35. package/dist/lib/middleware/utils/guardrailsUtils.d.ts +64 -0
  36. package/dist/lib/middleware/utils/guardrailsUtils.js +387 -0
  37. package/dist/lib/neurolink.d.ts +1 -1
  38. package/dist/lib/providers/anthropic.js +46 -3
  39. package/dist/lib/providers/azureOpenai.js +8 -2
  40. package/dist/lib/providers/googleAiStudio.js +8 -2
  41. package/dist/lib/providers/googleVertex.js +11 -2
  42. package/dist/lib/providers/huggingFace.js +1 -1
  43. package/dist/lib/providers/litellm.js +1 -1
  44. package/dist/lib/providers/mistral.js +1 -1
  45. package/dist/lib/providers/openAI.js +46 -3
  46. package/dist/lib/providers/sagemaker/adaptive-semaphore.d.ts +1 -13
  47. package/dist/lib/providers/sagemaker/client.d.ts +1 -1
  48. package/dist/lib/providers/sagemaker/config.d.ts +1 -1
  49. package/dist/lib/providers/sagemaker/detection.d.ts +1 -1
  50. package/dist/lib/providers/sagemaker/errors.d.ts +1 -1
  51. package/dist/lib/providers/sagemaker/index.d.ts +1 -1
  52. package/dist/lib/providers/sagemaker/language-model.d.ts +1 -1
  53. package/dist/lib/providers/sagemaker/parsers.d.ts +1 -1
  54. package/dist/lib/providers/sagemaker/streaming.d.ts +1 -1
  55. package/dist/lib/providers/sagemaker/structured-parser.d.ts +1 -1
  56. package/dist/lib/session/globalSessionState.d.ts +26 -0
  57. package/dist/lib/session/globalSessionState.js +49 -0
  58. package/dist/lib/types/cli.d.ts +28 -0
  59. package/dist/lib/types/content.d.ts +18 -5
  60. package/dist/lib/types/contextTypes.d.ts +1 -1
  61. package/dist/lib/types/conversation.d.ts +55 -4
  62. package/dist/lib/types/fileTypes.d.ts +65 -0
  63. package/dist/lib/types/fileTypes.js +4 -0
  64. package/dist/lib/types/generateTypes.d.ts +12 -0
  65. package/dist/lib/types/guardrails.d.ts +103 -0
  66. package/dist/lib/types/guardrails.js +1 -0
  67. package/dist/lib/types/index.d.ts +4 -2
  68. package/dist/lib/types/index.js +4 -0
  69. package/dist/lib/types/mcpTypes.d.ts +407 -14
  70. package/dist/lib/types/providers.d.ts +469 -0
  71. package/dist/lib/types/streamTypes.d.ts +7 -0
  72. package/dist/lib/types/tools.d.ts +132 -35
  73. package/dist/lib/utils/csvProcessor.d.ts +68 -0
  74. package/dist/lib/utils/csvProcessor.js +277 -0
  75. package/dist/lib/utils/fileDetector.d.ts +57 -0
  76. package/dist/lib/utils/fileDetector.js +457 -0
  77. package/dist/lib/utils/imageProcessor.d.ts +10 -0
  78. package/dist/lib/utils/imageProcessor.js +22 -0
  79. package/dist/lib/utils/loopUtils.d.ts +71 -0
  80. package/dist/lib/utils/loopUtils.js +262 -0
  81. package/dist/lib/utils/messageBuilder.d.ts +2 -1
  82. package/dist/lib/utils/messageBuilder.js +197 -2
  83. package/dist/lib/utils/optionsUtils.d.ts +1 -1
  84. package/dist/mcp/factory.d.ts +2 -157
  85. package/dist/mcp/flexibleToolValidator.d.ts +1 -5
  86. package/dist/mcp/index.d.ts +3 -2
  87. package/dist/mcp/mcpCircuitBreaker.d.ts +1 -75
  88. package/dist/mcp/mcpClientFactory.d.ts +1 -20
  89. package/dist/mcp/mcpClientFactory.js +1 -0
  90. package/dist/mcp/registry.d.ts +3 -10
  91. package/dist/mcp/servers/agent/directToolsServer.d.ts +1 -1
  92. package/dist/mcp/servers/aiProviders/aiCoreServer.d.ts +1 -1
  93. package/dist/mcp/servers/utilities/utilityServer.d.ts +1 -1
  94. package/dist/mcp/toolDiscoveryService.d.ts +3 -84
  95. package/dist/mcp/toolRegistry.d.ts +2 -24
  96. package/dist/middleware/builtin/guardrails.d.ts +5 -16
  97. package/dist/middleware/builtin/guardrails.js +44 -39
  98. package/dist/middleware/utils/guardrailsUtils.d.ts +64 -0
  99. package/dist/middleware/utils/guardrailsUtils.js +387 -0
  100. package/dist/neurolink.d.ts +1 -1
  101. package/dist/providers/anthropic.js +46 -3
  102. package/dist/providers/azureOpenai.js +8 -2
  103. package/dist/providers/googleAiStudio.js +8 -2
  104. package/dist/providers/googleVertex.js +11 -2
  105. package/dist/providers/huggingFace.js +1 -1
  106. package/dist/providers/litellm.js +1 -1
  107. package/dist/providers/mistral.js +1 -1
  108. package/dist/providers/openAI.js +46 -3
  109. package/dist/providers/sagemaker/adaptive-semaphore.d.ts +1 -13
  110. package/dist/providers/sagemaker/client.d.ts +1 -1
  111. package/dist/providers/sagemaker/config.d.ts +1 -1
  112. package/dist/providers/sagemaker/detection.d.ts +1 -1
  113. package/dist/providers/sagemaker/errors.d.ts +1 -1
  114. package/dist/providers/sagemaker/index.d.ts +1 -1
  115. package/dist/providers/sagemaker/language-model.d.ts +3 -3
  116. package/dist/providers/sagemaker/parsers.d.ts +1 -1
  117. package/dist/providers/sagemaker/streaming.d.ts +1 -1
  118. package/dist/providers/sagemaker/structured-parser.d.ts +1 -1
  119. package/dist/session/globalSessionState.d.ts +26 -0
  120. package/dist/session/globalSessionState.js +49 -0
  121. package/dist/types/cli.d.ts +28 -0
  122. package/dist/types/content.d.ts +18 -5
  123. package/dist/types/contextTypes.d.ts +1 -1
  124. package/dist/types/conversation.d.ts +55 -4
  125. package/dist/types/fileTypes.d.ts +65 -0
  126. package/dist/types/fileTypes.js +4 -0
  127. package/dist/types/generateTypes.d.ts +12 -0
  128. package/dist/types/guardrails.d.ts +103 -0
  129. package/dist/types/guardrails.js +1 -0
  130. package/dist/types/index.d.ts +4 -2
  131. package/dist/types/index.js +4 -0
  132. package/dist/types/mcpTypes.d.ts +407 -14
  133. package/dist/types/modelTypes.d.ts +6 -6
  134. package/dist/types/providers.d.ts +469 -0
  135. package/dist/types/streamTypes.d.ts +7 -0
  136. package/dist/types/tools.d.ts +132 -35
  137. package/dist/utils/csvProcessor.d.ts +68 -0
  138. package/dist/utils/csvProcessor.js +277 -0
  139. package/dist/utils/fileDetector.d.ts +57 -0
  140. package/dist/utils/fileDetector.js +457 -0
  141. package/dist/utils/imageProcessor.d.ts +10 -0
  142. package/dist/utils/imageProcessor.js +22 -0
  143. package/dist/utils/loopUtils.d.ts +71 -0
  144. package/dist/utils/loopUtils.js +262 -0
  145. package/dist/utils/messageBuilder.d.ts +2 -1
  146. package/dist/utils/messageBuilder.js +197 -2
  147. package/dist/utils/optionsUtils.d.ts +1 -1
  148. package/package.json +9 -3
  149. package/dist/lib/mcp/contracts/mcpContract.d.ts +0 -106
  150. package/dist/lib/mcp/contracts/mcpContract.js +0 -5
  151. package/dist/lib/providers/sagemaker/types.d.ts +0 -456
  152. package/dist/lib/providers/sagemaker/types.js +0 -7
  153. package/dist/mcp/contracts/mcpContract.d.ts +0 -106
  154. package/dist/mcp/contracts/mcpContract.js +0 -5
  155. package/dist/providers/sagemaker/types.d.ts +0 -456
  156. package/dist/providers/sagemaker/types.js +0 -7
@@ -4,10 +4,10 @@
4
4
  */
5
5
  import { z } from "zod";
6
6
  import type { Result, JsonValue, ErrorInfo } from "./common.js";
7
+ import type { StandardRecord, ZodUnknownSchema } from "./typeAliases.js";
7
8
  /**
8
9
  * Commonly used Zod schema type aliases for cleaner type declarations
9
10
  */
10
- import type { ZodUnknownSchema } from "./typeAliases.js";
11
11
  export type { ZodUnknownSchema } from "./typeAliases.js";
12
12
  export type ZodAnySchema = z.ZodSchema<unknown>;
13
13
  export type ZodObjectSchema = z.ZodObject<z.ZodRawShape>;
@@ -19,148 +19,245 @@ export type ToolParameterSchema = ZodUnknownSchema | Record<string, JsonValue>;
19
19
  /**
20
20
  * Standard tool input parameters
21
21
  */
22
- export interface BaseToolArgs {
22
+ export type BaseToolArgs = {
23
23
  [key: string]: JsonValue | undefined;
24
- }
24
+ };
25
25
  /**
26
26
  * Tool execution arguments with specific common patterns
27
27
  */
28
- export interface ToolArgs extends BaseToolArgs {
28
+ export type ToolArgs = BaseToolArgs & {
29
29
  input?: JsonValue;
30
30
  data?: JsonValue;
31
31
  options?: JsonValue;
32
- }
32
+ };
33
+ /**
34
+ * Generic execution context for MCP operations
35
+ * Moved from src/lib/mcp/contracts/mcpContract.ts
36
+ */
37
+ export type ExecutionContext<T = StandardRecord> = {
38
+ sessionId?: string;
39
+ userId?: string;
40
+ config?: T;
41
+ metadata?: StandardRecord;
42
+ cacheOptions?: CacheOptions;
43
+ fallbackOptions?: FallbackOptions;
44
+ timeoutMs?: number;
45
+ startTime?: number;
46
+ };
47
+ /**
48
+ * Cache configuration options
49
+ * Moved from src/lib/mcp/contracts/mcpContract.ts
50
+ */
51
+ export type CacheOptions = {
52
+ enabled?: boolean;
53
+ ttlMs?: number;
54
+ strategy?: "memory" | "writeThrough" | "cacheAside";
55
+ };
56
+ /**
57
+ * Fallback configuration options
58
+ * Moved from src/lib/mcp/contracts/mcpContract.ts
59
+ */
60
+ export type FallbackOptions = {
61
+ enabled?: boolean;
62
+ maxAttempts?: number;
63
+ delayMs?: number;
64
+ circuitBreaker?: boolean;
65
+ };
66
+ /**
67
+ * Tool information with extensibility
68
+ * Moved from src/lib/mcp/contracts/mcpContract.ts
69
+ */
70
+ export type ToolInfo = {
71
+ name: string;
72
+ description?: string;
73
+ category?: string;
74
+ serverId?: string;
75
+ inputSchema?: StandardRecord;
76
+ outputSchema?: StandardRecord;
77
+ [key: string]: unknown;
78
+ };
79
+ /**
80
+ * Tool Implementation type for MCP tool registry
81
+ * Extracted from toolRegistry.ts for centralized type management
82
+ */
83
+ export type ToolImplementation = {
84
+ execute: (params: unknown, context?: ExecutionContext) => Promise<unknown> | unknown;
85
+ description?: string;
86
+ inputSchema?: unknown;
87
+ outputSchema?: unknown;
88
+ category?: string;
89
+ permissions?: string[];
90
+ };
91
+ /**
92
+ * Tool execution options for enhanced control
93
+ * Extracted from toolRegistry.ts for centralized type management
94
+ */
95
+ export type ToolExecutionOptions = {
96
+ timeout?: number;
97
+ retries?: number;
98
+ context?: unknown;
99
+ preferredSource?: string;
100
+ fallbackEnabled?: boolean;
101
+ validateBeforeExecution?: boolean;
102
+ timeoutMs?: number;
103
+ };
104
+ /**
105
+ * Tool execution result
106
+ * Moved from src/lib/mcp/contracts/mcpContract.ts
107
+ */
108
+ export type ToolExecutionResult<T = unknown> = {
109
+ result: T;
110
+ context?: ExecutionContext;
111
+ performance?: {
112
+ duration: number;
113
+ tokensUsed?: number;
114
+ cost?: number;
115
+ };
116
+ validation?: ValidationResult;
117
+ cached?: boolean;
118
+ fallback?: boolean;
119
+ };
120
+ /**
121
+ * Validation result for runtime checks
122
+ * Moved from src/lib/mcp/contracts/mcpContract.ts
123
+ */
124
+ export type ValidationResult = {
125
+ valid: boolean;
126
+ missing: string[];
127
+ warnings: string[];
128
+ recommendations: string[];
129
+ };
33
130
  /**
34
131
  * Tool execution metadata
35
132
  */
36
- export interface ToolExecutionMetadata {
133
+ export type ToolExecutionMetadata = {
37
134
  requestId?: string;
38
135
  startTime?: number;
39
136
  version?: string;
40
137
  [key: string]: JsonValue | undefined;
41
- }
138
+ };
42
139
  /**
43
140
  * Tool execution context
44
141
  */
45
- export interface ToolContext {
142
+ export type ToolContext = {
46
143
  sessionId?: string;
47
144
  userId?: string;
48
145
  aiProvider?: string;
49
146
  metadata?: ToolExecutionMetadata;
50
- }
147
+ };
51
148
  /**
52
149
  * Tool execution result metadata
53
150
  */
54
- export interface ToolResultMetadata {
151
+ export type ToolResultMetadata = {
55
152
  toolName?: string;
56
153
  executionTime?: number;
57
154
  timestamp?: number;
58
155
  source?: string;
59
156
  version?: string;
60
157
  serverId?: string;
61
- }
158
+ };
62
159
  /**
63
160
  * Tool execution result
64
161
  */
65
- export interface ToolResult<T = JsonValue> extends Result<T, ErrorInfo> {
162
+ export type ToolResult<T = JsonValue> = Result<T, ErrorInfo> & {
66
163
  success: boolean;
67
164
  data?: T;
68
165
  error?: ErrorInfo;
69
166
  metadata?: ToolResultMetadata;
70
- }
167
+ };
71
168
  /**
72
169
  * Tool metadata for registration
73
170
  */
74
- export interface ToolMetadata {
171
+ export type ToolMetadata = {
75
172
  category?: string;
76
173
  version?: string;
77
174
  author?: string;
78
175
  tags?: string[];
79
176
  documentation?: string;
80
177
  [key: string]: JsonValue | undefined;
81
- }
178
+ };
82
179
  /**
83
- * Tool definition interface
180
+ * Tool definition type
84
181
  */
85
- export interface ToolDefinition<TArgs = ToolArgs, TResult = JsonValue> {
182
+ export type ToolDefinition<TArgs = ToolArgs, TResult = JsonValue> = {
86
183
  description: string;
87
184
  parameters?: ToolParameterSchema;
88
185
  metadata?: ToolMetadata;
89
186
  execute: (params: TArgs, context?: ToolContext) => Promise<ToolResult<TResult>> | ToolResult<TResult>;
90
- }
187
+ };
91
188
  /**
92
- * Simple tool interface (for SDK)
189
+ * Simple tool type (for SDK)
93
190
  */
94
- export interface SimpleTool<TArgs = ToolArgs, TResult = JsonValue> {
191
+ export type SimpleTool<TArgs = ToolArgs, TResult = JsonValue> = {
95
192
  description: string;
96
193
  parameters?: ZodUnknownSchema;
97
194
  metadata?: ToolMetadata;
98
195
  execute: (params: TArgs, context?: ToolContext) => Promise<TResult>;
99
- }
196
+ };
100
197
  /**
101
198
  * Tool registry entry
102
199
  */
103
- export interface ToolRegistryEntry {
200
+ export type ToolRegistryEntry = {
104
201
  name: string;
105
202
  description: string;
106
203
  serverId?: string;
107
204
  isImplemented?: boolean;
108
205
  parameters?: ToolParameterSchema;
109
206
  execute?: ToolDefinition["execute"];
110
- }
207
+ };
111
208
  /**
112
209
  * Tool execution information
113
210
  */
114
- export interface ToolExecution {
211
+ export type ToolExecution = {
115
212
  toolName: string;
116
213
  params: ToolArgs;
117
214
  result: ToolResult;
118
215
  executionTime: number;
119
216
  timestamp: number;
120
- }
217
+ };
121
218
  /**
122
219
  * Available tool information
123
220
  */
124
- export interface AvailableTool {
221
+ export type AvailableTool = {
125
222
  name: string;
126
223
  description: string;
127
224
  serverId?: string;
128
225
  toolName?: string;
129
226
  parameters?: ToolParameterSchema;
130
- }
227
+ };
131
228
  /**
132
229
  * Tool validation options
133
230
  */
134
- export interface ToolValidationOptions {
231
+ export type ToolValidationOptions = {
135
232
  customValidator?: (toolName: string, params: ToolArgs) => boolean | Promise<boolean>;
136
233
  validateSchema?: boolean;
137
234
  allowUnknownProperties?: boolean;
138
- }
235
+ };
139
236
  /**
140
237
  * Tool call information (for AI SDK integration)
141
238
  */
142
- export interface ToolCall {
239
+ export type ToolCall = {
143
240
  toolName: string;
144
241
  parameters: ToolArgs;
145
242
  id?: string;
146
- }
243
+ };
147
244
  /**
148
245
  * AI SDK Tool Call format (from Vercel AI SDK)
149
246
  */
150
- export interface AiSdkToolCall {
247
+ export type AiSdkToolCall = {
151
248
  type: "tool-call";
152
249
  toolCallId: string;
153
250
  toolName: string;
154
251
  params: ToolArgs;
155
- }
252
+ };
156
253
  /**
157
254
  * Tool call result (for AI SDK integration)
158
255
  */
159
- export interface ToolCallResult {
256
+ export type ToolCallResult = {
160
257
  id?: string;
161
258
  result: ToolResult;
162
259
  formattedForAI: string;
163
- }
260
+ };
164
261
  /**
165
262
  * Type guard for tool result
166
263
  */
@@ -0,0 +1,68 @@
1
+ /**
2
+ * CSV Processing Utility
3
+ * Converts CSV files to LLM-friendly text formats
4
+ * Uses streaming for memory efficiency with large files
5
+ */
6
+ import type { FileProcessingResult, CSVProcessorOptions } from "../types/fileTypes.js";
7
+ /**
8
+ * CSV processor for converting CSV data to LLM-optimized formats
9
+ *
10
+ * Supports three output formats:
11
+ * - raw: Original CSV format with proper escaping (RECOMMENDED for best LLM performance)
12
+ * - json: JSON array format (best for structured data processing)
13
+ * - markdown: Markdown table format (best for small datasets <100 rows)
14
+ *
15
+ * All formats use csv-parser for reliable parsing, then convert to the target format.
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * const csvBuffer = Buffer.from('name,age\nAlice,30\nBob,25');
20
+ * const result = await CSVProcessor.process(csvBuffer, {
21
+ * maxRows: 1000,
22
+ * formatStyle: 'raw'
23
+ * });
24
+ * console.log(result.content); // CSV string with proper escaping
25
+ * ```
26
+ */
27
+ export declare class CSVProcessor {
28
+ /**
29
+ * Process CSV Buffer to LLM-friendly format
30
+ * Content already loaded by FileDetector
31
+ *
32
+ * @param content - CSV file as Buffer
33
+ * @param options - Processing options
34
+ * @returns Formatted CSV data ready for LLM (JSON or Markdown)
35
+ */
36
+ static process(content: Buffer, options?: CSVProcessorOptions): Promise<FileProcessingResult>;
37
+ /**
38
+ * Parse CSV string into array of row objects using streaming
39
+ * Memory-efficient for large files
40
+ */
41
+ /**
42
+ * Parse CSV file from disk using streaming (memory efficient)
43
+ *
44
+ * @param filePath - Path to CSV file
45
+ * @param maxRows - Maximum rows to parse (default: 1000)
46
+ * @returns Array of row objects
47
+ */
48
+ static parseCSVFile(filePath: string, maxRows?: number): Promise<unknown[]>;
49
+ /**
50
+ * Parse CSV string to array of row objects
51
+ * Exposed for use by tools that need direct CSV parsing
52
+ *
53
+ * @param csvString - CSV data as string
54
+ * @param maxRows - Maximum rows to parse (default: 1000)
55
+ * @returns Array of row objects
56
+ */
57
+ static parseCSVString(csvString: string, maxRows?: number): Promise<unknown[]>;
58
+ /**
59
+ * Format parsed CSV data for LLM consumption
60
+ * Only used for JSON and Markdown formats (raw format handled separately)
61
+ */
62
+ private static formatForLLM;
63
+ /**
64
+ * Format as markdown table
65
+ * Best for small datasets (<100 rows)
66
+ */
67
+ private static toMarkdownTable;
68
+ }
@@ -0,0 +1,277 @@
1
+ /**
2
+ * CSV Processing Utility
3
+ * Converts CSV files to LLM-friendly text formats
4
+ * Uses streaming for memory efficiency with large files
5
+ */
6
+ import csvParser from "csv-parser";
7
+ import { Readable } from "stream";
8
+ import { logger } from "./logger.js";
9
+ /**
10
+ * Detect if first line is CSV metadata (not actual data/headers)
11
+ * Common patterns:
12
+ * - Excel separator line: "SEP=,"
13
+ * - Lines with significantly different delimiter count than line 2
14
+ * - Lines that don't match CSV structure of subsequent lines
15
+ */
16
+ function isMetadataLine(lines) {
17
+ if (!lines[0] || lines.length < 2) {
18
+ return false;
19
+ }
20
+ const firstLine = lines[0].trim();
21
+ const secondLine = lines[1].trim();
22
+ if (firstLine.match(/^sep=/i)) {
23
+ return true;
24
+ }
25
+ const firstCommaCount = (firstLine.match(/,/g) || []).length;
26
+ const secondCommaCount = (secondLine.match(/,/g) || []).length;
27
+ if (firstCommaCount === 0 && secondCommaCount > 0) {
28
+ return true;
29
+ }
30
+ if (secondCommaCount > 0 && firstCommaCount !== secondCommaCount) {
31
+ return true;
32
+ }
33
+ return false;
34
+ }
35
+ /**
36
+ * CSV processor for converting CSV data to LLM-optimized formats
37
+ *
38
+ * Supports three output formats:
39
+ * - raw: Original CSV format with proper escaping (RECOMMENDED for best LLM performance)
40
+ * - json: JSON array format (best for structured data processing)
41
+ * - markdown: Markdown table format (best for small datasets <100 rows)
42
+ *
43
+ * All formats use csv-parser for reliable parsing, then convert to the target format.
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * const csvBuffer = Buffer.from('name,age\nAlice,30\nBob,25');
48
+ * const result = await CSVProcessor.process(csvBuffer, {
49
+ * maxRows: 1000,
50
+ * formatStyle: 'raw'
51
+ * });
52
+ * console.log(result.content); // CSV string with proper escaping
53
+ * ```
54
+ */
55
+ export class CSVProcessor {
56
+ /**
57
+ * Process CSV Buffer to LLM-friendly format
58
+ * Content already loaded by FileDetector
59
+ *
60
+ * @param content - CSV file as Buffer
61
+ * @param options - Processing options
62
+ * @returns Formatted CSV data ready for LLM (JSON or Markdown)
63
+ */
64
+ static async process(content, options) {
65
+ const { maxRows: rawMaxRows = 1000, formatStyle = "raw", includeHeaders = true, } = options || {};
66
+ const maxRows = Math.max(1, Math.min(10000, rawMaxRows));
67
+ const csvString = content.toString("utf-8");
68
+ // For raw format, return original CSV with row limit (no parsing needed)
69
+ // This preserves the exact original format which works best for LLMs
70
+ if (formatStyle === "raw") {
71
+ const lines = csvString.split("\n");
72
+ const hasMetadataLine = isMetadataLine(lines);
73
+ // Skip metadata line if present, then take header + maxRows data rows
74
+ const csvLines = hasMetadataLine
75
+ ? lines.slice(1) // Skip metadata line
76
+ : lines;
77
+ const limitedLines = csvLines.slice(0, 1 + maxRows); // header + data rows
78
+ const limitedCSV = limitedLines.join("\n");
79
+ const rowCount = limitedLines.length - 1; // Subtract header
80
+ const originalRowCount = csvLines.length - 1; // Subtract header from original
81
+ logger.debug(`[CSVProcessor] raw format: ${rowCount} rows (original: ${originalRowCount}) → ${limitedCSV.length} chars`, {
82
+ formatStyle: "raw",
83
+ originalSize: csvString.length,
84
+ limitedSize: limitedCSV.length,
85
+ });
86
+ return {
87
+ type: "csv",
88
+ content: limitedCSV,
89
+ mimeType: "text/csv",
90
+ metadata: {
91
+ confidence: 100,
92
+ size: content.length,
93
+ rowCount,
94
+ columnCount: (limitedLines[0] || "").split(",").length,
95
+ },
96
+ };
97
+ }
98
+ // Parse CSV for JSON and Markdown formats only
99
+ const rows = await this.parseCSVString(csvString, maxRows);
100
+ // Extract metadata from parsed results
101
+ const rowCount = rows.length;
102
+ const columnNames = rows.length > 0 ? Object.keys(rows[0]) : [];
103
+ const columnCount = columnNames.length;
104
+ const hasEmptyColumns = columnNames.some((col) => !col || col.trim() === "");
105
+ const sampleRows = rows.slice(0, 3);
106
+ const sampleData = sampleRows.length > 0
107
+ ? JSON.stringify(sampleRows, null, 2)
108
+ : "No data rows";
109
+ // Format parsed data
110
+ const formatted = this.formatForLLM(rows, formatStyle, includeHeaders);
111
+ logger.info(`[CSVProcessor] ${formatStyle} format: ${rowCount} rows × ${columnCount} columns → ${formatted.length} chars`, { rowCount, columnCount, columns: columnNames, hasEmptyColumns });
112
+ return {
113
+ type: "csv",
114
+ content: formatted,
115
+ mimeType: "text/csv",
116
+ metadata: {
117
+ confidence: 100,
118
+ size: content.length,
119
+ rowCount,
120
+ columnCount,
121
+ columnNames,
122
+ sampleData,
123
+ hasEmptyColumns,
124
+ },
125
+ };
126
+ }
127
+ /**
128
+ * Parse CSV string into array of row objects using streaming
129
+ * Memory-efficient for large files
130
+ */
131
+ /**
132
+ * Parse CSV file from disk using streaming (memory efficient)
133
+ *
134
+ * @param filePath - Path to CSV file
135
+ * @param maxRows - Maximum rows to parse (default: 1000)
136
+ * @returns Array of row objects
137
+ */
138
+ static async parseCSVFile(filePath, maxRows = 1000) {
139
+ const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
140
+ const fs = await import("fs");
141
+ // Read first 2 lines to detect metadata
142
+ const fileHandle = await fs.promises.open(filePath, "r");
143
+ const firstLines = [];
144
+ const lineReader = fileHandle.createReadStream({ encoding: "utf-8" });
145
+ await new Promise((resolve) => {
146
+ let buffer = "";
147
+ lineReader.on("data", (chunk) => {
148
+ buffer += chunk.toString();
149
+ const lines = buffer.split("\n");
150
+ if (lines.length >= 2) {
151
+ firstLines.push(lines[0], lines[1]);
152
+ lineReader.destroy();
153
+ resolve();
154
+ }
155
+ });
156
+ lineReader.on("end", () => resolve());
157
+ });
158
+ await fileHandle.close();
159
+ const hasMetadataLine = isMetadataLine(firstLines);
160
+ const skipLines = hasMetadataLine ? 1 : 0;
161
+ return new Promise((resolve, reject) => {
162
+ const rows = [];
163
+ let count = 0;
164
+ let lineCount = 0;
165
+ const source = fs.createReadStream(filePath, { encoding: "utf-8" });
166
+ const parser = csvParser();
167
+ const abort = () => {
168
+ source.destroy();
169
+ parser.destroy();
170
+ };
171
+ source
172
+ .pipe(parser)
173
+ .on("data", (row) => {
174
+ lineCount++;
175
+ if (lineCount <= skipLines) {
176
+ return;
177
+ }
178
+ rows.push(row);
179
+ count++;
180
+ if (count >= clampedMaxRows) {
181
+ logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
182
+ abort();
183
+ resolve(rows);
184
+ }
185
+ })
186
+ .on("end", () => {
187
+ resolve(rows);
188
+ })
189
+ .on("error", (error) => {
190
+ logger.error("[CSVProcessor] File parsing failed:", error);
191
+ reject(error);
192
+ });
193
+ });
194
+ }
195
+ /**
196
+ * Parse CSV string to array of row objects
197
+ * Exposed for use by tools that need direct CSV parsing
198
+ *
199
+ * @param csvString - CSV data as string
200
+ * @param maxRows - Maximum rows to parse (default: 1000)
201
+ * @returns Array of row objects
202
+ */
203
+ static async parseCSVString(csvString, maxRows = 1000) {
204
+ const clampedMaxRows = Math.max(1, Math.min(10000, maxRows));
205
+ // Detect and skip metadata line
206
+ const lines = csvString.split("\n");
207
+ const hasMetadataLine = isMetadataLine(lines);
208
+ const csvData = hasMetadataLine ? lines.slice(1).join("\n") : csvString;
209
+ return new Promise((resolve, reject) => {
210
+ const rows = [];
211
+ let count = 0;
212
+ const source = Readable.from([csvData]);
213
+ const parser = csvParser();
214
+ const abort = () => {
215
+ source.destroy();
216
+ parser.destroy();
217
+ };
218
+ source
219
+ .pipe(parser)
220
+ .on("data", (row) => {
221
+ rows.push(row);
222
+ count++;
223
+ if (count >= clampedMaxRows) {
224
+ logger.debug(`[CSVProcessor] Reached row limit ${clampedMaxRows}, stopping parse`);
225
+ abort();
226
+ resolve(rows);
227
+ }
228
+ })
229
+ .on("end", () => {
230
+ resolve(rows);
231
+ })
232
+ .on("error", (error) => {
233
+ logger.error("[CSVProcessor] Parsing failed:", error);
234
+ reject(error);
235
+ });
236
+ });
237
+ }
238
+ /**
239
+ * Format parsed CSV data for LLM consumption
240
+ * Only used for JSON and Markdown formats (raw format handled separately)
241
+ */
242
+ static formatForLLM(rows, formatStyle, includeHeaders) {
243
+ if (rows.length === 0) {
244
+ return "CSV file is empty or contains no data.";
245
+ }
246
+ if (formatStyle === "json") {
247
+ return JSON.stringify(rows, null, 2);
248
+ }
249
+ return this.toMarkdownTable(rows, includeHeaders);
250
+ }
251
+ /**
252
+ * Format as markdown table
253
+ * Best for small datasets (<100 rows)
254
+ */
255
+ static toMarkdownTable(rows, includeHeaders) {
256
+ if (rows.length === 0) {
257
+ return "CSV file is empty or contains no data.";
258
+ }
259
+ const headers = Object.keys(rows[0]);
260
+ // Escape backslashes, pipes, and sanitize newlines to keep rows intact
261
+ const escapePipe = (str) => str.replace(/\\/g, "\\\\").replace(/\|/g, "\\|").replace(/\r?\n/g, " ");
262
+ let markdown = "";
263
+ if (includeHeaders) {
264
+ markdown = "| " + headers.map(escapePipe).join(" | ") + " |\n";
265
+ markdown += "|" + headers.map(() => " --- ").join("|") + "|\n";
266
+ }
267
+ rows.forEach((row) => {
268
+ markdown +=
269
+ "| " +
270
+ headers
271
+ .map((h) => escapePipe(String(row[h] || "")))
272
+ .join(" | ") +
273
+ " |\n";
274
+ });
275
+ return markdown;
276
+ }
277
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * File Type Detection Utility
3
+ * Centralized file detection for all multimodal file types
4
+ * Uses multi-strategy approach for reliable type identification
5
+ */
6
+ import type { FileInput, FileProcessingResult, FileDetectorOptions } from "../types/fileTypes.js";
7
+ /**
8
+ * Centralized file type detection and processing
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * // Auto-detect and process any file
13
+ * const result = await FileDetector.detectAndProcess("data.csv");
14
+ * console.log(result.type); // 'csv'
15
+ * ```
16
+ */
17
+ export declare class FileDetector {
18
+ /**
19
+ * Auto-detect file type and process in one call
20
+ *
21
+ * Runs detection strategies in priority order:
22
+ * 1. MagicBytesStrategy (95% confidence) - Binary file headers
23
+ * 2. MimeTypeStrategy (85% confidence) - HTTP Content-Type for URLs
24
+ * 3. ExtensionStrategy (70% confidence) - File extension
25
+ * 4. ContentHeuristicStrategy (75% confidence) - Content analysis
26
+ *
27
+ * @param input - File path, URL, Buffer, or data URI
28
+ * @param options - Detection and processing options
29
+ * @returns Processed file result with type and content
30
+ */
31
+ static detectAndProcess(input: FileInput, options?: FileDetectorOptions): Promise<FileProcessingResult>;
32
+ /**
33
+ * Detect file type using multi-strategy approach
34
+ * Stops at first strategy with confidence >= threshold (default: 80%)
35
+ */
36
+ private static detect;
37
+ /**
38
+ * Load file content from various sources
39
+ */
40
+ private static loadContent;
41
+ /**
42
+ * Route to appropriate processor
43
+ */
44
+ private static processFile;
45
+ /**
46
+ * Load file from URL
47
+ */
48
+ private static loadFromURL;
49
+ /**
50
+ * Load file from filesystem path
51
+ */
52
+ private static loadFromPath;
53
+ /**
54
+ * Load file from data URI
55
+ */
56
+ private static loadFromDataURI;
57
+ }