@juspay/neurolink 8.6.0 → 8.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -0
- package/dist/cli/factories/commandFactory.d.ts +5 -0
- package/dist/cli/factories/commandFactory.js +96 -0
- package/dist/cli/utils/audioFileUtils.d.ts +70 -0
- package/dist/cli/utils/audioFileUtils.js +174 -0
- package/dist/lib/types/cli.d.ts +2 -0
- package/dist/lib/types/fileTypes.d.ts +13 -12
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +2 -0
- package/dist/lib/types/ttsTypes.d.ts +91 -0
- package/dist/lib/types/ttsTypes.js +58 -0
- package/dist/lib/utils/imageProcessor.d.ts +38 -5
- package/dist/lib/utils/imageProcessor.js +131 -7
- package/dist/lib/utils/pdfProcessor.js +24 -2
- package/dist/types/cli.d.ts +2 -0
- package/dist/types/fileTypes.d.ts +13 -12
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +2 -0
- package/dist/types/ttsTypes.d.ts +91 -0
- package/dist/types/ttsTypes.js +57 -0
- package/dist/utils/imageProcessor.d.ts +38 -5
- package/dist/utils/imageProcessor.js +131 -7
- package/dist/utils/pdfProcessor.js +24 -2
- package/package.json +7 -4
|
@@ -3,6 +3,57 @@
|
|
|
3
3
|
* Handles format conversion for different AI providers
|
|
4
4
|
*/
|
|
5
5
|
import { logger } from "./logger.js";
|
|
6
|
+
import { withRetry } from "./retryHandler.js";
|
|
7
|
+
import { SYSTEM_LIMITS } from "../core/constants.js";
|
|
8
|
+
/**
|
|
9
|
+
* Network error codes that should trigger a retry
|
|
10
|
+
*/
|
|
11
|
+
const RETRYABLE_ERROR_CODES = new Set([
|
|
12
|
+
"ECONNRESET",
|
|
13
|
+
"ENOTFOUND",
|
|
14
|
+
"ECONNREFUSED",
|
|
15
|
+
"ETIMEDOUT",
|
|
16
|
+
"ERR_NETWORK",
|
|
17
|
+
]);
|
|
18
|
+
/**
|
|
19
|
+
* Determines if an HTTP error is retryable based on status code
|
|
20
|
+
* Only network errors and certain HTTP status codes should be retried
|
|
21
|
+
* 4xx client errors like 404 (Not Found) and 403 (Forbidden) should NOT be retried
|
|
22
|
+
*
|
|
23
|
+
* @param error - The error to check
|
|
24
|
+
* @returns true if the error is retryable, false otherwise
|
|
25
|
+
*/
|
|
26
|
+
function isRetryableDownloadError(error) {
|
|
27
|
+
// Network-related errors should be retried
|
|
28
|
+
if (error && typeof error === "object") {
|
|
29
|
+
const errorCode = error.code;
|
|
30
|
+
const errorName = error.name;
|
|
31
|
+
if (RETRYABLE_ERROR_CODES.has(errorCode || "") ||
|
|
32
|
+
errorName === "AbortError") {
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Check for HTTP status code in error message for retryable errors
|
|
37
|
+
// Only retry on 5xx server errors, 429 (Too Many Requests), and 408 (Request Timeout)
|
|
38
|
+
// Do NOT retry on 4xx client errors like 404 (Not Found) or 403 (Forbidden)
|
|
39
|
+
if (error instanceof Error) {
|
|
40
|
+
const message = error.message;
|
|
41
|
+
// Extract HTTP status from error message like "HTTP 503: Service Unavailable"
|
|
42
|
+
const statusMatch = message.match(/HTTP (\d{3}):/);
|
|
43
|
+
if (statusMatch) {
|
|
44
|
+
const status = parseInt(statusMatch[1], 10);
|
|
45
|
+
// Retry on 5xx server errors, 429 (rate limit), 408 (timeout)
|
|
46
|
+
return status >= 500 || status === 429 || status === 408;
|
|
47
|
+
}
|
|
48
|
+
// Check for timeout/network-related error messages
|
|
49
|
+
// Use more precise matching to avoid false positives like "No timeout specified"
|
|
50
|
+
if (/\b(request timed out|operation timed out|connection timed out|timed out)\b/i.test(message) ||
|
|
51
|
+
/\bnetwork (error|failure|unreachable|down)\b/i.test(message)) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
6
57
|
/**
|
|
7
58
|
* Image processor class for handling provider-specific image formatting
|
|
8
59
|
*/
|
|
@@ -16,9 +67,16 @@ export class ImageProcessor {
|
|
|
16
67
|
* @returns Processed image as data URI
|
|
17
68
|
*/
|
|
18
69
|
static async process(content, _options) {
|
|
70
|
+
// Validate content is non-empty before processing
|
|
71
|
+
if (content.length === 0) {
|
|
72
|
+
logger.error("Empty buffer provided");
|
|
73
|
+
throw new Error("Invalid image processing: buffer is empty");
|
|
74
|
+
}
|
|
19
75
|
const mediaType = this.detectImageType(content);
|
|
20
76
|
const base64 = content.toString("base64");
|
|
21
77
|
const dataUri = `data:${mediaType};base64,${base64}`;
|
|
78
|
+
// Validate output before returning
|
|
79
|
+
this.validateProcessOutput(dataUri, base64, mediaType);
|
|
22
80
|
return {
|
|
23
81
|
type: "image",
|
|
24
82
|
content: dataUri,
|
|
@@ -29,6 +87,37 @@ export class ImageProcessor {
|
|
|
29
87
|
},
|
|
30
88
|
};
|
|
31
89
|
}
|
|
90
|
+
/**
|
|
91
|
+
* Validate processed output meets required format
|
|
92
|
+
* Checks:
|
|
93
|
+
* - Base64 content is non-empty
|
|
94
|
+
* - Data URI format is valid (data:{mimeType};base64,{content})
|
|
95
|
+
* - MIME type is in the allowed list
|
|
96
|
+
* @param dataUri - The complete data URI string
|
|
97
|
+
* @param base64 - The base64-encoded content
|
|
98
|
+
* @param mediaType - The MIME type of the image
|
|
99
|
+
* @throws Error if any validation fails
|
|
100
|
+
*/
|
|
101
|
+
static validateProcessOutput(dataUri, base64, mediaType) {
|
|
102
|
+
// Validate base64 is non-empty (check first for better error message)
|
|
103
|
+
if (base64.length === 0) {
|
|
104
|
+
logger.error("Empty base64 content generated");
|
|
105
|
+
throw new Error("Invalid image processing: base64 content is empty");
|
|
106
|
+
}
|
|
107
|
+
// Validate data URI format with proper base64 character validation
|
|
108
|
+
// Base64 can only have 0, 1, or 2 padding characters at the end
|
|
109
|
+
const dataUriRegex = /^data:[^;]+;base64,[A-Za-z0-9+/]*={0,2}$/;
|
|
110
|
+
if (!dataUriRegex.test(dataUri)) {
|
|
111
|
+
logger.error("Invalid data URI format generated", { dataUri });
|
|
112
|
+
throw new Error("Invalid data URI format: must be data:{mimeType};base64,{content}");
|
|
113
|
+
}
|
|
114
|
+
// Defensive check: ensure detectImageType() returns valid MIME type
|
|
115
|
+
// This validation protects against future changes to detectImageType()
|
|
116
|
+
if (!this.validateImageFormat(mediaType)) {
|
|
117
|
+
logger.error("Invalid MIME type generated", { mediaType });
|
|
118
|
+
throw new Error(`Invalid MIME type: ${mediaType} is not in allowed list`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
32
121
|
/**
|
|
33
122
|
* Process image for OpenAI (requires data URI format)
|
|
34
123
|
*/
|
|
@@ -434,14 +523,35 @@ export const imageUtils = {
|
|
|
434
523
|
}
|
|
435
524
|
},
|
|
436
525
|
/**
|
|
437
|
-
* Convert URL to base64 data URI by downloading the image
|
|
526
|
+
* Convert URL to base64 data URI by downloading the image.
|
|
527
|
+
* Implements retry logic with exponential backoff for network errors.
|
|
528
|
+
*
|
|
529
|
+
* Retries are performed for:
|
|
530
|
+
* - Network errors (ECONNRESET, ENOTFOUND, ECONNREFUSED, ETIMEDOUT, ERR_NETWORK, AbortError)
|
|
531
|
+
* - Server errors (5xx status codes)
|
|
532
|
+
* - Rate limiting (429 Too Many Requests)
|
|
533
|
+
* - Request timeouts (408 Request Timeout)
|
|
534
|
+
*
|
|
535
|
+
* Retries are NOT performed for:
|
|
536
|
+
* - Client errors (4xx status codes except 408, 429)
|
|
537
|
+
* - Invalid content type
|
|
538
|
+
* - Content size limit exceeded
|
|
539
|
+
* - Unsupported protocol
|
|
540
|
+
*
|
|
541
|
+
* @param url - The URL of the image to download
|
|
542
|
+
* @param options - Configuration options
|
|
543
|
+
* @param options.timeoutMs - Timeout for each download attempt (default: 15000ms)
|
|
544
|
+
* @param options.maxBytes - Maximum allowed file size (default: 10MB)
|
|
545
|
+
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
546
|
+
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
438
547
|
*/
|
|
439
|
-
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024 } = {}) => {
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
548
|
+
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024, maxAttempts = 3, } = {}) => {
|
|
549
|
+
// Basic protocol whitelist - fail fast, no retry needed
|
|
550
|
+
if (!/^https?:\/\//i.test(url)) {
|
|
551
|
+
throw new Error("Unsupported protocol");
|
|
552
|
+
}
|
|
553
|
+
// Perform the actual download with retry logic
|
|
554
|
+
const performDownload = async () => {
|
|
445
555
|
const controller = new AbortController();
|
|
446
556
|
const t = setTimeout(() => controller.abort(), timeoutMs);
|
|
447
557
|
try {
|
|
@@ -467,6 +577,20 @@ export const imageUtils = {
|
|
|
467
577
|
finally {
|
|
468
578
|
clearTimeout(t);
|
|
469
579
|
}
|
|
580
|
+
};
|
|
581
|
+
try {
|
|
582
|
+
return await withRetry(performDownload, {
|
|
583
|
+
maxAttempts,
|
|
584
|
+
initialDelay: SYSTEM_LIMITS.DEFAULT_INITIAL_DELAY,
|
|
585
|
+
backoffMultiplier: SYSTEM_LIMITS.DEFAULT_BACKOFF_MULTIPLIER,
|
|
586
|
+
maxDelay: SYSTEM_LIMITS.DEFAULT_MAX_DELAY,
|
|
587
|
+
retryCondition: isRetryableDownloadError,
|
|
588
|
+
onRetry: (attempt, error) => {
|
|
589
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
590
|
+
const attemptsLeft = maxAttempts - attempt;
|
|
591
|
+
logger.warn(`⚠️ Image download attempt ${attempt} failed for ${url}: ${message}. ${attemptsLeft} ${attemptsLeft === 1 ? "attempt" : "attempts"} remaining...`);
|
|
592
|
+
},
|
|
593
|
+
});
|
|
470
594
|
}
|
|
471
595
|
catch (error) {
|
|
472
596
|
throw new Error(`Failed to download and convert URL to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { logger } from "./logger.js";
|
|
2
|
-
|
|
3
|
-
import
|
|
2
|
+
// Lazy-load pdfjs-dist to avoid DOMMatrix errors in Node.js server environment
|
|
3
|
+
// import * as pdfjs from "pdfjs-dist/legacy/build/pdf.mjs";
|
|
4
4
|
const PDF_PROVIDER_CONFIGS = {
|
|
5
5
|
anthropic: {
|
|
6
6
|
maxSizeMB: 5,
|
|
@@ -196,6 +196,28 @@ export class PDFProcessor {
|
|
|
196
196
|
}
|
|
197
197
|
}
|
|
198
198
|
static async convertPDFToImages(pdfBuffer, options) {
|
|
199
|
+
// Dynamic import canvas - only load when actually needed
|
|
200
|
+
let createCanvas;
|
|
201
|
+
try {
|
|
202
|
+
const canvasModule = await import("canvas");
|
|
203
|
+
createCanvas = canvasModule.createCanvas;
|
|
204
|
+
}
|
|
205
|
+
catch {
|
|
206
|
+
throw new Error("Canvas dependency not available. " +
|
|
207
|
+
"PDF-to-image conversion requires the 'canvas' package with native bindings. " +
|
|
208
|
+
"Install with: pnpm install canvas\n" +
|
|
209
|
+
"Note: This requires native build tools (Python, C++ compiler).");
|
|
210
|
+
}
|
|
211
|
+
// Dynamic import pdfjs - only load when actually needed to avoid DOMMatrix errors
|
|
212
|
+
let pdfjs;
|
|
213
|
+
try {
|
|
214
|
+
pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
215
|
+
}
|
|
216
|
+
catch {
|
|
217
|
+
throw new Error("pdfjs-dist dependency not available. " +
|
|
218
|
+
"PDF processing requires the 'pdfjs-dist' package. " +
|
|
219
|
+
"Install with: pnpm install pdfjs-dist");
|
|
220
|
+
}
|
|
199
221
|
const maxPages = options?.maxPages || 10;
|
|
200
222
|
const scale = options?.scale || 2.0;
|
|
201
223
|
const format = options?.format || "png";
|
package/dist/types/cli.d.ts
CHANGED
|
@@ -81,18 +81,7 @@ export type PDFProcessorOptions = {
|
|
|
81
81
|
bedrockApiMode?: "converse" | "invokeModel";
|
|
82
82
|
};
|
|
83
83
|
/**
|
|
84
|
-
*
|
|
85
|
-
*/
|
|
86
|
-
export type FileDetectorOptions = {
|
|
87
|
-
maxSize?: number;
|
|
88
|
-
timeout?: number;
|
|
89
|
-
allowedTypes?: FileType[];
|
|
90
|
-
csvOptions?: CSVProcessorOptions;
|
|
91
|
-
confidenceThreshold?: number;
|
|
92
|
-
provider?: string;
|
|
93
|
-
};
|
|
94
|
-
/**
|
|
95
|
-
* Audio processor options for transcription configuration
|
|
84
|
+
* Audio processor options
|
|
96
85
|
*/
|
|
97
86
|
export type AudioProcessorOptions = {
|
|
98
87
|
/** AI provider to use for transcription (e.g., 'openai', 'google', 'azure') */
|
|
@@ -108,6 +97,18 @@ export type AudioProcessorOptions = {
|
|
|
108
97
|
/** Maximum file size in megabytes */
|
|
109
98
|
maxSizeMB?: number;
|
|
110
99
|
};
|
|
100
|
+
/**
|
|
101
|
+
* File detector options
|
|
102
|
+
*/
|
|
103
|
+
export type FileDetectorOptions = {
|
|
104
|
+
maxSize?: number;
|
|
105
|
+
timeout?: number;
|
|
106
|
+
allowedTypes?: FileType[];
|
|
107
|
+
audioOptions?: AudioProcessorOptions;
|
|
108
|
+
csvOptions?: CSVProcessorOptions;
|
|
109
|
+
confidenceThreshold?: number;
|
|
110
|
+
provider?: string;
|
|
111
|
+
};
|
|
111
112
|
/**
|
|
112
113
|
* Google AI Studio Files API types
|
|
113
114
|
*/
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/index.js
CHANGED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text-to-Speech (TTS) Type Definitions for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* This module defines types for TTS audio generation and output.
|
|
5
|
+
*
|
|
6
|
+
* @module types/ttsTypes
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Supported audio formats for TTS output
|
|
10
|
+
*/
|
|
11
|
+
export type AudioFormat = "mp3" | "wav" | "ogg" | "opus";
|
|
12
|
+
/**
|
|
13
|
+
* TTS quality settings
|
|
14
|
+
*/
|
|
15
|
+
export type TTSQuality = "standard" | "hd";
|
|
16
|
+
/**
|
|
17
|
+
* TTS configuration options
|
|
18
|
+
*/
|
|
19
|
+
export type TTSOptions = {
|
|
20
|
+
/** Enable TTS output */
|
|
21
|
+
enabled?: boolean;
|
|
22
|
+
/** Voice identifier (e.g., "en-US-Neural2-C") */
|
|
23
|
+
voice?: string;
|
|
24
|
+
/** Audio format (default: mp3) */
|
|
25
|
+
format?: AudioFormat;
|
|
26
|
+
/** Speaking rate 0.25-4.0 (default: 1.0) */
|
|
27
|
+
speed?: number;
|
|
28
|
+
/** Audio quality (default: standard) */
|
|
29
|
+
quality?: TTSQuality;
|
|
30
|
+
/** Output file path (optional) */
|
|
31
|
+
output?: string;
|
|
32
|
+
/** Auto-play audio after generation (default: false) */
|
|
33
|
+
play?: boolean;
|
|
34
|
+
};
|
|
35
|
+
/**
|
|
36
|
+
* TTS audio result returned from generation
|
|
37
|
+
*/
|
|
38
|
+
export type TTSResult = {
|
|
39
|
+
/** Audio data as Buffer */
|
|
40
|
+
buffer: Buffer;
|
|
41
|
+
/** Audio format */
|
|
42
|
+
format: AudioFormat;
|
|
43
|
+
/** Audio file size in bytes */
|
|
44
|
+
size: number;
|
|
45
|
+
/** Duration in seconds (if available) */
|
|
46
|
+
duration?: number;
|
|
47
|
+
/** Voice used for generation */
|
|
48
|
+
voice?: string;
|
|
49
|
+
/** Sample rate in Hz */
|
|
50
|
+
sampleRate?: number;
|
|
51
|
+
};
|
|
52
|
+
/**
|
|
53
|
+
* Result of saving audio to file
|
|
54
|
+
*/
|
|
55
|
+
export type AudioSaveResult = {
|
|
56
|
+
/** Whether the save was successful */
|
|
57
|
+
success: boolean;
|
|
58
|
+
/** Full path to the saved file */
|
|
59
|
+
path: string;
|
|
60
|
+
/** File size in bytes */
|
|
61
|
+
size: number;
|
|
62
|
+
/** Error message if failed */
|
|
63
|
+
error?: string;
|
|
64
|
+
};
|
|
65
|
+
/**
|
|
66
|
+
* TTS voice information
|
|
67
|
+
*/
|
|
68
|
+
export type TTSVoice = {
|
|
69
|
+
/** Voice identifier */
|
|
70
|
+
id: string;
|
|
71
|
+
/** Display name */
|
|
72
|
+
name: string;
|
|
73
|
+
/** Language code (e.g., "en-US") */
|
|
74
|
+
languageCode: string;
|
|
75
|
+
/** Gender */
|
|
76
|
+
gender: "male" | "female" | "neutral";
|
|
77
|
+
/** Voice type */
|
|
78
|
+
type: "neural" | "wavenet" | "standard";
|
|
79
|
+
};
|
|
80
|
+
/** Valid audio formats as an array for runtime validation */
|
|
81
|
+
export declare const VALID_AUDIO_FORMATS: readonly AudioFormat[];
|
|
82
|
+
/** Valid TTS quality levels as an array for runtime validation */
|
|
83
|
+
export declare const VALID_TTS_QUALITIES: readonly TTSQuality[];
|
|
84
|
+
/**
|
|
85
|
+
* Type guard to check if an object is a TTSResult
|
|
86
|
+
*/
|
|
87
|
+
export declare function isTTSResult(value: unknown): value is TTSResult;
|
|
88
|
+
/**
|
|
89
|
+
* Type guard to check if TTSOptions are valid
|
|
90
|
+
*/
|
|
91
|
+
export declare function isValidTTSOptions(options: unknown): options is TTSOptions;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text-to-Speech (TTS) Type Definitions for NeuroLink
|
|
3
|
+
*
|
|
4
|
+
* This module defines types for TTS audio generation and output.
|
|
5
|
+
*
|
|
6
|
+
* @module types/ttsTypes
|
|
7
|
+
*/
|
|
8
|
+
/** Valid audio formats as an array for runtime validation */
|
|
9
|
+
export const VALID_AUDIO_FORMATS = [
|
|
10
|
+
"mp3",
|
|
11
|
+
"wav",
|
|
12
|
+
"ogg",
|
|
13
|
+
"opus",
|
|
14
|
+
];
|
|
15
|
+
/** Valid TTS quality levels as an array for runtime validation */
|
|
16
|
+
export const VALID_TTS_QUALITIES = ["standard", "hd"];
|
|
17
|
+
/**
|
|
18
|
+
* Type guard to check if an object is a TTSResult
|
|
19
|
+
*/
|
|
20
|
+
export function isTTSResult(value) {
|
|
21
|
+
if (!value || typeof value !== "object") {
|
|
22
|
+
return false;
|
|
23
|
+
}
|
|
24
|
+
const obj = value;
|
|
25
|
+
return (Buffer.isBuffer(obj.buffer) &&
|
|
26
|
+
typeof obj.format === "string" &&
|
|
27
|
+
VALID_AUDIO_FORMATS.includes(obj.format) &&
|
|
28
|
+
typeof obj.size === "number" &&
|
|
29
|
+
obj.size >= 0);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Type guard to check if TTSOptions are valid
|
|
33
|
+
*/
|
|
34
|
+
export function isValidTTSOptions(options) {
|
|
35
|
+
if (!options || typeof options !== "object") {
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
const opts = options;
|
|
39
|
+
if (opts.speed !== undefined) {
|
|
40
|
+
if (typeof opts.speed !== "number" ||
|
|
41
|
+
opts.speed < 0.25 ||
|
|
42
|
+
opts.speed > 4.0) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (opts.format !== undefined) {
|
|
47
|
+
if (!VALID_AUDIO_FORMATS.includes(opts.format)) {
|
|
48
|
+
return false;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (opts.quality !== undefined) {
|
|
52
|
+
if (!VALID_TTS_QUALITIES.includes(opts.quality)) {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return true;
|
|
57
|
+
}
|
|
@@ -17,6 +17,18 @@ export declare class ImageProcessor {
|
|
|
17
17
|
* @returns Processed image as data URI
|
|
18
18
|
*/
|
|
19
19
|
static process(content: Buffer, _options?: unknown): Promise<FileProcessingResult>;
|
|
20
|
+
/**
|
|
21
|
+
* Validate processed output meets required format
|
|
22
|
+
* Checks:
|
|
23
|
+
* - Base64 content is non-empty
|
|
24
|
+
* - Data URI format is valid (data:{mimeType};base64,{content})
|
|
25
|
+
* - MIME type is in the allowed list
|
|
26
|
+
* @param dataUri - The complete data URI string
|
|
27
|
+
* @param base64 - The base64-encoded content
|
|
28
|
+
* @param mediaType - The MIME type of the image
|
|
29
|
+
* @throws Error if any validation fails
|
|
30
|
+
*/
|
|
31
|
+
private static validateProcessOutput;
|
|
20
32
|
/**
|
|
21
33
|
* Process image for OpenAI (requires data URI format)
|
|
22
34
|
*/
|
|
@@ -104,11 +116,32 @@ export declare const imageUtils: {
|
|
|
104
116
|
*/
|
|
105
117
|
fileToBase64DataUri: (filePath: string, maxBytes?: number) => Promise<string>;
|
|
106
118
|
/**
|
|
107
|
-
* Convert URL to base64 data URI by downloading the image
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
119
|
+
* Convert URL to base64 data URI by downloading the image.
|
|
120
|
+
* Implements retry logic with exponential backoff for network errors.
|
|
121
|
+
*
|
|
122
|
+
* Retries are performed for:
|
|
123
|
+
* - Network errors (ECONNRESET, ENOTFOUND, ECONNREFUSED, ETIMEDOUT, ERR_NETWORK, AbortError)
|
|
124
|
+
* - Server errors (5xx status codes)
|
|
125
|
+
* - Rate limiting (429 Too Many Requests)
|
|
126
|
+
* - Request timeouts (408 Request Timeout)
|
|
127
|
+
*
|
|
128
|
+
* Retries are NOT performed for:
|
|
129
|
+
* - Client errors (4xx status codes except 408, 429)
|
|
130
|
+
* - Invalid content type
|
|
131
|
+
* - Content size limit exceeded
|
|
132
|
+
* - Unsupported protocol
|
|
133
|
+
*
|
|
134
|
+
* @param url - The URL of the image to download
|
|
135
|
+
* @param options - Configuration options
|
|
136
|
+
* @param options.timeoutMs - Timeout for each download attempt (default: 15000ms)
|
|
137
|
+
* @param options.maxBytes - Maximum allowed file size (default: 10MB)
|
|
138
|
+
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
139
|
+
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
140
|
+
*/
|
|
141
|
+
urlToBase64DataUri: (url: string, { timeoutMs, maxBytes, maxAttempts, }?: {
|
|
142
|
+
timeoutMs?: number;
|
|
143
|
+
maxBytes?: number;
|
|
144
|
+
maxAttempts?: number;
|
|
112
145
|
}) => Promise<string>;
|
|
113
146
|
/**
|
|
114
147
|
* Extract base64 data from data URI
|
|
@@ -3,6 +3,57 @@
|
|
|
3
3
|
* Handles format conversion for different AI providers
|
|
4
4
|
*/
|
|
5
5
|
import { logger } from "./logger.js";
|
|
6
|
+
import { withRetry } from "./retryHandler.js";
|
|
7
|
+
import { SYSTEM_LIMITS } from "../core/constants.js";
|
|
8
|
+
/**
|
|
9
|
+
* Network error codes that should trigger a retry
|
|
10
|
+
*/
|
|
11
|
+
const RETRYABLE_ERROR_CODES = new Set([
|
|
12
|
+
"ECONNRESET",
|
|
13
|
+
"ENOTFOUND",
|
|
14
|
+
"ECONNREFUSED",
|
|
15
|
+
"ETIMEDOUT",
|
|
16
|
+
"ERR_NETWORK",
|
|
17
|
+
]);
|
|
18
|
+
/**
|
|
19
|
+
* Determines if an HTTP error is retryable based on status code
|
|
20
|
+
* Only network errors and certain HTTP status codes should be retried
|
|
21
|
+
* 4xx client errors like 404 (Not Found) and 403 (Forbidden) should NOT be retried
|
|
22
|
+
*
|
|
23
|
+
* @param error - The error to check
|
|
24
|
+
* @returns true if the error is retryable, false otherwise
|
|
25
|
+
*/
|
|
26
|
+
function isRetryableDownloadError(error) {
|
|
27
|
+
// Network-related errors should be retried
|
|
28
|
+
if (error && typeof error === "object") {
|
|
29
|
+
const errorCode = error.code;
|
|
30
|
+
const errorName = error.name;
|
|
31
|
+
if (RETRYABLE_ERROR_CODES.has(errorCode || "") ||
|
|
32
|
+
errorName === "AbortError") {
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// Check for HTTP status code in error message for retryable errors
|
|
37
|
+
// Only retry on 5xx server errors, 429 (Too Many Requests), and 408 (Request Timeout)
|
|
38
|
+
// Do NOT retry on 4xx client errors like 404 (Not Found) or 403 (Forbidden)
|
|
39
|
+
if (error instanceof Error) {
|
|
40
|
+
const message = error.message;
|
|
41
|
+
// Extract HTTP status from error message like "HTTP 503: Service Unavailable"
|
|
42
|
+
const statusMatch = message.match(/HTTP (\d{3}):/);
|
|
43
|
+
if (statusMatch) {
|
|
44
|
+
const status = parseInt(statusMatch[1], 10);
|
|
45
|
+
// Retry on 5xx server errors, 429 (rate limit), 408 (timeout)
|
|
46
|
+
return status >= 500 || status === 429 || status === 408;
|
|
47
|
+
}
|
|
48
|
+
// Check for timeout/network-related error messages
|
|
49
|
+
// Use more precise matching to avoid false positives like "No timeout specified"
|
|
50
|
+
if (/\b(request timed out|operation timed out|connection timed out|timed out)\b/i.test(message) ||
|
|
51
|
+
/\bnetwork (error|failure|unreachable|down)\b/i.test(message)) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
6
57
|
/**
|
|
7
58
|
* Image processor class for handling provider-specific image formatting
|
|
8
59
|
*/
|
|
@@ -16,9 +67,16 @@ export class ImageProcessor {
|
|
|
16
67
|
* @returns Processed image as data URI
|
|
17
68
|
*/
|
|
18
69
|
static async process(content, _options) {
|
|
70
|
+
// Validate content is non-empty before processing
|
|
71
|
+
if (content.length === 0) {
|
|
72
|
+
logger.error("Empty buffer provided");
|
|
73
|
+
throw new Error("Invalid image processing: buffer is empty");
|
|
74
|
+
}
|
|
19
75
|
const mediaType = this.detectImageType(content);
|
|
20
76
|
const base64 = content.toString("base64");
|
|
21
77
|
const dataUri = `data:${mediaType};base64,${base64}`;
|
|
78
|
+
// Validate output before returning
|
|
79
|
+
this.validateProcessOutput(dataUri, base64, mediaType);
|
|
22
80
|
return {
|
|
23
81
|
type: "image",
|
|
24
82
|
content: dataUri,
|
|
@@ -29,6 +87,37 @@ export class ImageProcessor {
|
|
|
29
87
|
},
|
|
30
88
|
};
|
|
31
89
|
}
|
|
90
|
+
/**
|
|
91
|
+
* Validate processed output meets required format
|
|
92
|
+
* Checks:
|
|
93
|
+
* - Base64 content is non-empty
|
|
94
|
+
* - Data URI format is valid (data:{mimeType};base64,{content})
|
|
95
|
+
* - MIME type is in the allowed list
|
|
96
|
+
* @param dataUri - The complete data URI string
|
|
97
|
+
* @param base64 - The base64-encoded content
|
|
98
|
+
* @param mediaType - The MIME type of the image
|
|
99
|
+
* @throws Error if any validation fails
|
|
100
|
+
*/
|
|
101
|
+
static validateProcessOutput(dataUri, base64, mediaType) {
|
|
102
|
+
// Validate base64 is non-empty (check first for better error message)
|
|
103
|
+
if (base64.length === 0) {
|
|
104
|
+
logger.error("Empty base64 content generated");
|
|
105
|
+
throw new Error("Invalid image processing: base64 content is empty");
|
|
106
|
+
}
|
|
107
|
+
// Validate data URI format with proper base64 character validation
|
|
108
|
+
// Base64 can only have 0, 1, or 2 padding characters at the end
|
|
109
|
+
const dataUriRegex = /^data:[^;]+;base64,[A-Za-z0-9+/]*={0,2}$/;
|
|
110
|
+
if (!dataUriRegex.test(dataUri)) {
|
|
111
|
+
logger.error("Invalid data URI format generated", { dataUri });
|
|
112
|
+
throw new Error("Invalid data URI format: must be data:{mimeType};base64,{content}");
|
|
113
|
+
}
|
|
114
|
+
// Defensive check: ensure detectImageType() returns valid MIME type
|
|
115
|
+
// This validation protects against future changes to detectImageType()
|
|
116
|
+
if (!this.validateImageFormat(mediaType)) {
|
|
117
|
+
logger.error("Invalid MIME type generated", { mediaType });
|
|
118
|
+
throw new Error(`Invalid MIME type: ${mediaType} is not in allowed list`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
32
121
|
/**
|
|
33
122
|
* Process image for OpenAI (requires data URI format)
|
|
34
123
|
*/
|
|
@@ -434,14 +523,35 @@ export const imageUtils = {
|
|
|
434
523
|
}
|
|
435
524
|
},
|
|
436
525
|
/**
|
|
437
|
-
* Convert URL to base64 data URI by downloading the image
|
|
526
|
+
* Convert URL to base64 data URI by downloading the image.
|
|
527
|
+
* Implements retry logic with exponential backoff for network errors.
|
|
528
|
+
*
|
|
529
|
+
* Retries are performed for:
|
|
530
|
+
* - Network errors (ECONNRESET, ENOTFOUND, ECONNREFUSED, ETIMEDOUT, ERR_NETWORK, AbortError)
|
|
531
|
+
* - Server errors (5xx status codes)
|
|
532
|
+
* - Rate limiting (429 Too Many Requests)
|
|
533
|
+
* - Request timeouts (408 Request Timeout)
|
|
534
|
+
*
|
|
535
|
+
* Retries are NOT performed for:
|
|
536
|
+
* - Client errors (4xx status codes except 408, 429)
|
|
537
|
+
* - Invalid content type
|
|
538
|
+
* - Content size limit exceeded
|
|
539
|
+
* - Unsupported protocol
|
|
540
|
+
*
|
|
541
|
+
* @param url - The URL of the image to download
|
|
542
|
+
* @param options - Configuration options
|
|
543
|
+
* @param options.timeoutMs - Timeout for each download attempt (default: 15000ms)
|
|
544
|
+
* @param options.maxBytes - Maximum allowed file size (default: 10MB)
|
|
545
|
+
* @param options.maxAttempts - Maximum number of total attempts including initial attempt (default: 3)
|
|
546
|
+
* @returns Promise<string> - Base64 data URI of the downloaded image
|
|
438
547
|
*/
|
|
439
|
-
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024 } = {}) => {
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
548
|
+
urlToBase64DataUri: async (url, { timeoutMs = 15000, maxBytes = 10 * 1024 * 1024, maxAttempts = 3, } = {}) => {
|
|
549
|
+
// Basic protocol whitelist - fail fast, no retry needed
|
|
550
|
+
if (!/^https?:\/\//i.test(url)) {
|
|
551
|
+
throw new Error("Unsupported protocol");
|
|
552
|
+
}
|
|
553
|
+
// Perform the actual download with retry logic
|
|
554
|
+
const performDownload = async () => {
|
|
445
555
|
const controller = new AbortController();
|
|
446
556
|
const t = setTimeout(() => controller.abort(), timeoutMs);
|
|
447
557
|
try {
|
|
@@ -467,6 +577,20 @@ export const imageUtils = {
|
|
|
467
577
|
finally {
|
|
468
578
|
clearTimeout(t);
|
|
469
579
|
}
|
|
580
|
+
};
|
|
581
|
+
try {
|
|
582
|
+
return await withRetry(performDownload, {
|
|
583
|
+
maxAttempts,
|
|
584
|
+
initialDelay: SYSTEM_LIMITS.DEFAULT_INITIAL_DELAY,
|
|
585
|
+
backoffMultiplier: SYSTEM_LIMITS.DEFAULT_BACKOFF_MULTIPLIER,
|
|
586
|
+
maxDelay: SYSTEM_LIMITS.DEFAULT_MAX_DELAY,
|
|
587
|
+
retryCondition: isRetryableDownloadError,
|
|
588
|
+
onRetry: (attempt, error) => {
|
|
589
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
590
|
+
const attemptsLeft = maxAttempts - attempt;
|
|
591
|
+
logger.warn(`⚠️ Image download attempt ${attempt} failed for ${url}: ${message}. ${attemptsLeft} ${attemptsLeft === 1 ? "attempt" : "attempts"} remaining...`);
|
|
592
|
+
},
|
|
593
|
+
});
|
|
470
594
|
}
|
|
471
595
|
catch (error) {
|
|
472
596
|
throw new Error(`Failed to download and convert URL to base64: ${error instanceof Error ? error.message : "Unknown error"}`);
|