npm - @aj-archipelago/cortex - Versions diffs - 1.3.51 → 1.3.52 - Mend

@aj-archipelago/cortex 1.3.51 → 1.3.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/helper-apps/cortex-file-handler/{.env.test.azure → .env.test.azure.sample} RENAMED Viewed

@@ -3,4 +3,5 @@ REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
 AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
 AZURE_STORAGE_CONTAINER_NAME=test-container
 NODE_ENV=test
-PORT=7072  # Different port for testing
+PORT=7072  # Different port for testing
+MARKITDOWN_CONVERT_URL= #cortex-markitdown url

package/helper-apps/cortex-file-handler/{.env.test.gcs → .env.test.gcs.sample} RENAMED Viewed

@@ -6,4 +6,5 @@ GCS_BUCKETNAME=cortextempfiles
 AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
 AZURE_STORAGE_CONTAINER_NAME=test-container
 NODE_ENV=test
-PORT=7072  # Different port for testing
+PORT=7072  # Different port for testing
+MARKITDOWN_CONVERT_URL= #cortex-markitdown url

package/helper-apps/cortex-file-handler/{.env.test → .env.test.sample} RENAMED Viewed

@@ -4,4 +4,5 @@ REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
 AZURE_STORAGE_CONTAINER_NAME=test-container
 #GCP_SERVICE_ACCOUNT_KEY={"type":"service_account","project_id":"test-project"}
 NODE_ENV=test
-PORT=7072  # Different port for testing
+PORT=7072  # Different port for testing
+MARKITDOWN_CONVERT_URL= #cortex-markitdown url

package/helper-apps/cortex-file-handler/Dockerfile CHANGED Viewed

@@ -16,4 +16,4 @@ EXPOSE 7071
 # RUN npm run build
-CMD [ "node", "start.js" ]
+CMD [ "npm", "start" ]

package/helper-apps/cortex-file-handler/INTERFACE.md ADDED Viewed

@@ -0,0 +1,178 @@
+# Cortex File Handler Interface Documentation
+## Overview
+The Cortex File Handler is a service that processes files through various operations including uploading, downloading, chunking, and document processing. It supports multiple storage backends (Azure Blob Storage, Google Cloud Storage, and Local File System).
+## Request Methods
+### POST
+- **Purpose**: Upload a file
+- **Content-Type**: `multipart/form-data`
+- **Parameters**:
+  - `hash` (optional): Unique identifier for the file
+  - `requestId` (required): Unique identifier for the request
+  - File content must be included in the form data
+- **Behavior**:
+  - Uploads file to primary storage (Azure or Local)
+  - If GCS is configured, also uploads to GCS
+  - If hash is provided, stores file metadata in Redis
+  - Returns upload result with file URLs
+- **Response**: Object containing:
+  - `url`: Primary storage URL
+  - `gcs`: GCS URL (if GCS is configured)
+  - `hash`: Hash value (if provided)
+  - `message`: Success message
+  - `filename`: Original filename
+- **Note**: The `save` parameter is not supported in POST requests. To convert and save a document as text, use GET with the `save` parameter.
+### GET
+- **Purpose**: Process or retrieve files
+- **Parameters** (can be in query string or request body):
+  - `uri` (required if not using fetch/load/restore): URL of the file to process
+    - Requires `requestId` parameter
+    - No Redis caching
+    - Direct processing based on file type
+  - `requestId` (required with `uri`): Unique identifier for the request
+  - `save` (optional): If true, saves document as text file
+    - When true, converts document to text and saves to primary storage only (Azure or Local)
+    - Does not save to GCS
+    - Original document is deleted from storage after text conversion
+  - `hash` (optional): Unique identifier for the file
+  - `checkHash` (optional): Check if hash exists
+  - `clearHash` (optional): Remove hash from storage
+  - `fetch`/`load`/`restore` (optional): URL to fetch remote file (these are aliases - any of the three parameters will trigger the same remote file processing behavior)
+    - Does not require `requestId`
+    - Uses Redis caching
+    - Downloads and validates file first
+    - Ensures correct file extension
+    - Truncates long filenames
+- **Behavior**:
+  - For documents (PDF, DOC, etc.):
+    - If `save=true`:
+      - Converts document to text
+      - Saves text file to primary storage (Azure or Local)
+      - Deletes original document from storage
+      - Does not save to GCS
+      - Returns object with primary storage URL
+    - If `save=false`:
+      - Converts document to text
+      - Returns array of text chunks
+      - Does not persist any files
+  - For media files:
+    - Splits into chunks
+    - Uploads chunks to primary storage and GCS (if configured)
+    - Returns chunk information with offsets
+  - For remote files (`fetch`/`load`/`restore`):
+    - Downloads file from URL
+    - Processes based on file type
+    - Returns processed result
+    - Caches result in Redis using URL as key
+    - Updates Redis timestamp on subsequent requests
+    - Truncates filenames longer than 200 characters
+    - Ensures correct file extension based on content type
+### DELETE
+- **Purpose**: Remove files from storage
+- **Parameters** (can be in query string or request body):
+  - `requestId` (required): Unique identifier for the request
+- **Behavior**:
+  - Deletes file from primary storage (Azure or Local)
+  - Deletes file from GCS if configured
+  - Returns deletion result
+- **Response**: Array of deleted file URLs
+## Storage Configuration
+- **Azure**: Enabled if `AZURE_STORAGE_CONNECTION_STRING` is set
+- **GCS**: Enabled if `GCP_SERVICE_ACCOUNT_KEY_BASE64` or `GCP_SERVICE_ACCOUNT_KEY` is set
+- **Local**: Used as fallback if Azure is not configured
+## Response Format
+- **Success**:
+  - Status: 200
+  - Body: Varies by operation (see specific methods above)
+- **Error**:
+  - Status: 400/404/500
+  - Body: Error message string
+## Progress Tracking
+- Progress updates are published to Redis for each operation
+- Progress includes:
+  - `progress`: Completion percentage (0-1)
+  - `completedCount`: Number of completed steps
+  - `totalCount`: Total number of steps
+  - `numberOfChunks`: Number of chunks (for media files)
+  - `data`: Additional operation data
+- Progress updates are published to Redis channel associated with `requestId`
+## File Types
+- **Documents**: Processed based on `DOC_EXTENSIONS` list
+  - Supported extensions:
+    - Text: .txt, .json, .csv, .md, .xml, .js, .html, .css
+    - Office: .doc, .docx, .xls, .xlsx
+  - Document processing limitations:
+    - PDFs: Does not support scanned, encrypted, or password-protected PDFs
+    - Requires OCR for PDFs without embedded fonts
+  - Text chunking:
+    - Maximum chunk size: 10,000 characters
+    - Chunks are split at sentence boundaries when possible
+    - Returns array of text chunks
+- **Media**: All other file types, processed through chunking
+  - Chunked into smaller pieces for processing
+  - Each chunk is stored separately
+  - Media chunking behavior:
+    - Default chunk duration: 500 seconds
+    - Chunks are processed in parallel (3 at a time)
+    - Audio is converted to MP3 format (128kbps)
+    - Uses 4MB read buffer for file processing
+  - Supported media types:
+    - Images: .jpg, .jpeg, .png, .webp, .heic, .heif, .pdf
+    - Video: .mp4, .mpeg, .mov, .avi, .flv, .mpg, .webm, .wmv, .3gp
+    - Audio: .wav, .mp3, .aac, .ogg, .flac, .m4a
+  - File download behavior:
+    - 30 second timeout for downloads
+    - Supports streaming downloads
+    - Handles URL encoding/decoding
+    - Truncates filenames longer than 200 characters
+## Storage Behavior
+- **Primary Storage** (Azure or Local):
+  - Files are stored with UUID-based names
+  - Organized by requestId folders
+  - Azure: Uses SAS tokens for access
+  - Local: Served via HTTP on configured port
+- **GCS** (if configured):
+  - Files stored with gs:// protocol URLs
+  - Same folder structure as primary storage
+  - Only used for media file chunks
+- **Redis**:
+  - Stores file metadata and URLs
+  - Used for caching remote file results
+  - Tracks file access timestamps
+  - Used for progress tracking
+## Cleanup
+- Automatic cleanup of inactive files
+- Removes files from:
+  - Primary storage (Azure/Local)
+  - GCS (if configured)
+  - Redis file store map
+- Cleanup is triggered on each request but only runs if not already in progress
+- Temporary files are cleaned up:
+  - After 1 hour of inactivity
+  - After successful processing
+  - On error conditions
+## Error Handling
+- **400 Bad Request**:
+  - Missing required parameters
+  - Invalid or inaccessible URL
+  - Unsupported file type
+- **404 Not Found**:
+  - File or hash not found
+  - File not found in storage
+- **500 Internal Server Error**:
+  - Processing errors
+  - Storage errors
+  - Document conversion errors
+  - PDF processing errors (scanned, encrypted, password-protected)
+- All errors include descriptive message in response body

package/helper-apps/cortex-file-handler/package.json CHANGED Viewed

@@ -1,11 +1,12 @@
 {
   "name": "@aj-archipelago/cortex-file-handler",
-  "version": "1.1.01",
+  "version": "2.0.02",
   "description": "File handling service for Cortex - handles file uploads, media chunking, and document processing",
   "type": "module",
+  "main": "src/index.js",
   "scripts": {
-    "start": "node start.js",
-    "dev": "node -r dotenv/config start.js",
+    "start": "node src/start.js",
+    "dev": "node -r dotenv/config src/start.js",
     "test": "DOTENV_CONFIG_PATH=.env.test NODE_ENV=test node -r dotenv/config node_modules/ava/entrypoints/cli.mjs",
     "test:azure": "DOTENV_CONFIG_PATH=.env.test.azure NODE_ENV=test ./scripts/test-azure.sh",
     "test:watch": "DOTENV_CONFIG_PATH=.env.test NODE_ENV=test node -r dotenv/config node_modules/ava/entrypoints/cli.mjs --watch",

package/helper-apps/cortex-file-handler/scripts/test-azure.sh CHANGED Viewed

@@ -27,6 +27,9 @@ TEST_RESULT=$?
 echo "Cleaning up..."
 kill $AZURITE_PID
+# Wait for Azurite to finish cleanup
+sleep 2
 # Clean up Azurite directory
 rm -rf $AZURITE_DIR

package/helper-apps/cortex-file-handler/{blobHandler.js → src/blobHandler.js} RENAMED Viewed

@@ -23,6 +23,7 @@ import { CONVERTED_EXTENSIONS } from './constants.js';
 import mime from 'mime-types';
 import os from 'os';
+import { sanitizeFilename } from './utils/filenameUtils.js';
 import { FileConversionService } from './services/FileConversionService.js';
@@ -81,8 +82,15 @@ function ensureUnencodedGcsUrl(url) {
     }
     // Split into bucket and path parts
     const [bucket, ...pathParts] = url.replace('gs://', '').split('/');
-    // Reconstruct URL with decoded path parts
-    return `gs://${bucket}/${pathParts.map(part => decodeURIComponent(part)).join('/')}`;
+    // Reconstruct URL with decoded path parts, handling invalid characters
+    return `gs://${bucket}/${pathParts.map(part => {
+        try {
+            return decodeURIComponent(part);
+        } catch (error) {
+            // If decoding fails, sanitize the filename by removing invalid characters
+            return part.replace(/[^\w\-\.]/g, '_');
+        }
+    }).join('/')}`;
 }
 async function gcsUrlExists(url, defaultReturn = false) {
@@ -348,14 +356,11 @@ function uploadBlob(
                                 return;
                             }
                             tempFilePath = path.join(tempDir, safeFilename);
-                            console.log('Temp dir:', tempDir, 'Original filename:', filename, 'Safe filename:', safeFilename, 'Temp file path:', tempFilePath);
-                            console.log('About to create write stream for:', tempFilePath);
                             try {
                                 diskWriteStream = fs.createWriteStream(tempFilePath, {
                                     highWaterMark: 1024 * 1024,
                                     autoClose: true,
                                 });
-                                console.log('Write stream created successfully for:', tempFilePath);
                             } catch (err) {
                                 console.error('Error creating write stream:', err, 'Temp dir exists:', fs.existsSync(tempDir));
                                 errorOccurred = true;
@@ -476,7 +481,58 @@ function uploadBlob(
                                 }
                             }
-                            // Respond as soon as cloud uploads are done
+                            // After original uploads, handle optional conversion
+                            const conversionService = new FileConversionService(context, !saveToLocal);
+                            if (conversionService.needsConversion(safeFilename)) {
+                                try {
+                                    context.log('Starting file conversion (busboy)...');
+                                    // Ensure we have a local copy of the file for conversion
+                                    let localPathForConversion = tempFilePath;
+                                    if (!localPathForConversion) {
+                                        // No temp file was written (saveToLocal === false). Download from primary URL.
+                                        const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'convert-'));
+                                        localPathForConversion = path.join(tmpDir, safeFilename);
+                                        await conversionService._downloadFile(result.url, localPathForConversion);
+                                    } else {
+                                        // Wait until disk write completes to guarantee full file is present
+                                        if (diskWritePromise) {
+                                            await diskWritePromise;
+                                        }
+                                    }
+                                    // Perform the conversion
+                                    const conversion = await conversionService.convertFile(localPathForConversion, result.url);
+                                    context.log('File conversion completed (busboy):', conversion);
+                                    if (conversion.converted) {
+                                        context.log('Saving converted file (busboy)...');
+                                        // Save converted file to primary storage
+                                        const convertedSaveResult = await conversionService._saveConvertedFile(conversion.convertedPath, requestId);
+                                        // Optionally save to GCS
+                                        let convertedGcsUrl;
+                                        if (conversionService._isGCSConfigured()) {
+                                            convertedGcsUrl = await conversionService._uploadChunkToGCS(conversion.convertedPath, requestId);
+                                        }
+                                        // Attach to response body
+                                        result.converted = {
+                                            url: convertedSaveResult.url,
+                                            gcs: convertedGcsUrl,
+                                        };
+                                        context.log('Conversion process (busboy) completed successfully');
+                                    }
+                                } catch (convErr) {
+                                    console.error('Error converting file (busboy):', convErr);
+                                    context.log('Error during conversion (busboy):', convErr.message);
+                                    // Continue without failing the upload
+                                }
+                            }
+                            // Respond after conversion (if any)
                             context.res = { status: 200, body: result };
                             resolve(result);
                         } catch (err) {
@@ -552,23 +608,24 @@ function uploadBlob(
 async function saveToLocalStorage(context, requestId, encodedFilename, file) {
     const localPath = join(publicFolder, requestId);
     fs.mkdirSync(localPath, { recursive: true });
-    const destinationPath = `${localPath}/${encodedFilename}`;
-    context.log(`Saving to local storage... ${destinationPath}`);
+    // Sanitize filename by removing invalid characters
+    const sanitizedFilename = sanitizeFilename(encodedFilename);
+    const destinationPath = `${localPath}/${sanitizedFilename}`;
     await pipeline(file, fs.createWriteStream(destinationPath));
-    return `http://${ipAddress}:${port}/files/${requestId}/${encodedFilename}`;
+    return `http://${ipAddress}:${port}/files/${requestId}/${sanitizedFilename}`;
 }
 // Helper function to handle Azure blob storage
 async function saveToAzureStorage(context, encodedFilename, file) {
     const { containerClient } = await getBlobClient();
     const contentType = mime.lookup(encodedFilename);
-    // Decode the filename if it's already encoded to prevent double-encoding
-    let blobName = encodedFilename;
-    if (isEncoded(blobName)) {
-        blobName = decodeURIComponent(blobName);
-    }
+    // Create a safe blob name that is URI-encoded once (no double encoding)
+    let blobName = sanitizeFilename(encodedFilename);
+    blobName = encodeURIComponent(blobName);
     const options = {
         blobHTTPHeaders: contentType ? { blobContentType: contentType } : {},
         maxConcurrency: 50,
@@ -583,31 +640,29 @@ async function saveToAzureStorage(context, encodedFilename, file) {
 }
 // Helper function to upload a file to Google Cloud Storage
-async function uploadToGCS(context, file, encodedFilename) {
-    const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(encodedFilename);
+async function uploadToGCS(context, file, filename) {
+    const objectName = sanitizeFilename(filename);
+    const gcsFile = gcs.bucket(GCS_BUCKETNAME).file(objectName);
     const writeStream = gcsFile.createWriteStream({
         resumable: true,
         validation: false,
         metadata: {
-            contentType: mime.lookup(encodedFilename) || 'application/octet-stream',
+            contentType: mime.lookup(objectName) || 'application/octet-stream',
         },
         chunkSize: 8 * 1024 * 1024,
         numRetries: 3,
         retryDelay: 1000,
     });
-    context.log(`Uploading to GCS... ${encodedFilename}`);
+    context.log(`Uploading to GCS... ${objectName}`);
     await pipeline(file, writeStream);
-    // Never encode GCS URLs
-    const gcsUrl = `gs://${GCS_BUCKETNAME}/${encodedFilename}`;
-    return gcsUrl;
+    return `gs://${GCS_BUCKETNAME}/${objectName}`;
 }
-// Helper function to handle Google Cloud Storage
+// Wrapper that checks if GCS is configured
 async function saveToGoogleStorage(context, encodedFilename, file) {
     if (!gcs) {
         throw new Error('Google Cloud Storage is not initialized');
     }
     return uploadToGCS(context, file, encodedFilename);
 }
@@ -816,7 +871,6 @@ async function cleanup(context, urls = null) {
                     cleanedURLs.push(blob.name);
                 } catch (error) {
                     if (error.statusCode !== 404) {
-                        // Ignore "not found" errors
                         context.log(`Error cleaning blob ${blob.name}:`, error);
                     }
                 }
@@ -832,7 +886,6 @@ async function cleanup(context, urls = null) {
                 cleanedURLs.push(blobName);
             } catch (error) {
                 if (error.statusCode !== 404) {
-                    // Ignore "not found" errors
                     context.log(`Error cleaning blob ${url}:`, error);
                 }
             }
@@ -842,13 +895,14 @@ async function cleanup(context, urls = null) {
 }
 async function cleanupGCS(urls = null) {
+    if (!gcs) return [];
     const bucket = gcs.bucket(GCS_BUCKETNAME);
     const directories = new Set();
     const cleanedURLs = [];
     if (!urls) {
         const daysN = 30;
-        const thirtyDaysAgo = new Date(Date.now() - daysN * 24 * 60 * 60 * 1000);
+        const threshold = Date.now() - daysN * 24 * 60 * 60 * 1000;
         const [files] = await bucket.getFiles();
         for (const file of files) {
@@ -856,33 +910,27 @@ async function cleanupGCS(urls = null) {
             const directoryPath = path.dirname(file.name);
             directories.add(directoryPath);
             if (metadata.updated) {
-                const updatedTime = new Date(metadata.updated);
-                if (updatedTime.getTime() < thirtyDaysAgo.getTime()) {
-                    console.log(`Cleaning file: ${file.name}`);
+                const updatedTime = new Date(metadata.updated).getTime();
+                if (updatedTime < threshold) {
                     await file.delete();
                     cleanedURLs.push(file.name);
                 }
             }
         }
     } else {
-        try {
-            for (const url of urls) {
-                const filename = path.join(url.split('/').slice(3).join('/'));
-                const file = bucket.file(filename);
-                const directoryPath = path.dirname(file.name);
-                directories.add(directoryPath);
-                await file.delete();
-                cleanedURLs.push(url);
-            }
-        } catch (error) {
-            console.error(`Error cleaning up files: ${error}`);
+        for (const url of urls) {
+            const filePath = url.split('/').slice(3).join('/');
+            const file = bucket.file(filePath);
+            const directoryPath = path.dirname(file.name);
+            directories.add(directoryPath);
+            await file.delete();
+            cleanedURLs.push(url);
         }
     }
     for (const directory of directories) {
         const [files] = await bucket.getFiles({ prefix: directory });
         if (files.length === 0) {
-            console.log(`Deleting empty directory: ${directory}`);
             await bucket.deleteFiles({ prefix: directory });
         }
     }
@@ -891,47 +939,85 @@ async function cleanupGCS(urls = null) {
 }
 async function deleteGCS(blobName) {
-    if (!blobName) throw new Error('Missing blobName parameter');
-    if (!gcs) throw new Error('Google Cloud Storage is not initialized');
+    if (!blobName) {
+        console.log('[deleteGCS] No blobName provided, skipping GCS deletion');
+        return;
+    }
-    try {
-        const bucket = gcs.bucket(GCS_BUCKETNAME);
-        const deletedFiles = [];
+    if (!gcs) {
+        console.log('[deleteGCS] GCS not initialized, skipping deletion');
+        return;
+    }
+    try {
         if (process.env.STORAGE_EMULATOR_HOST) {
-            // For fake GCS server, use HTTP API directly
-            const response = await axios.get(
-                `http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o`,
-                { params: { prefix: blobName } },
-            );
-            if (response.data.items) {
-                for (const item of response.data.items) {
-                    await axios.delete(
-                        `http://localhost:4443/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(item.name)}`,
-                        { validateStatus: (status) => status === 200 || status === 404 },
-                    );
-                    deletedFiles.push(item.name);
+            console.log(`[deleteGCS] Using emulator at ${process.env.STORAGE_EMULATOR_HOST}`);
+            console.log(`[deleteGCS] Attempting to delete files with prefix: ${blobName}`);
+            // List files first
+            const listUrl = `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${GCS_BUCKETNAME}/o?prefix=${blobName}`;
+            console.log(`[deleteGCS] Listing files with URL: ${listUrl}`);
+            const listResponse = await axios.get(listUrl, {
+                validateStatus: (status) => true,
+            });
+            console.log(`[deleteGCS] List response status: ${listResponse.status}`);
+            console.log(`[deleteGCS] List response data: ${JSON.stringify(listResponse.data)}`);
+            if (listResponse.status === 200 && listResponse.data.items) {
+                console.log(`[deleteGCS] Found ${listResponse.data.items.length} items to delete`);
+                // Delete each file
+                for (const item of listResponse.data.items) {
+                    const deleteUrl = `${process.env.STORAGE_EMULATOR_HOST}/storage/v1/b/${GCS_BUCKETNAME}/o/${encodeURIComponent(item.name)}`;
+                    console.log(`[deleteGCS] Deleting file: ${item.name}`);
+                    console.log(`[deleteGCS] Delete URL: ${deleteUrl}`);
+                    const deleteResponse = await axios.delete(deleteUrl, {
+                        validateStatus: (status) => true,
+                        headers: {
+                            'Content-Type': 'application/json',
+                        },
+                    });
+                    console.log(`[deleteGCS] Delete response status: ${deleteResponse.status}`);
+                    console.log(`[deleteGCS] Delete response data: ${JSON.stringify(deleteResponse.data)}`);
                 }
+                console.log('[deleteGCS] All files deleted successfully');
+            } else {
+                console.log('[deleteGCS] No files found to delete');
             }
         } else {
-            // For real GCS, use the SDK
+            console.log('[deleteGCS] Using real GCS');
+            const bucket = gcs.bucket(GCS_BUCKETNAME);
             const [files] = await bucket.getFiles({ prefix: blobName });
-            for (const file of files) {
-                await file.delete();
-                deletedFiles.push(file.name);
+            console.log(`[deleteGCS] Found ${files.length} files to delete`);
+            if (files.length > 0) {
+                await Promise.all(files.map((file) => file.delete()));
+                console.log('[deleteGCS] All files deleted successfully');
+            } else {
+                console.log('[deleteGCS] No files found to delete');
             }
         }
-        if (deletedFiles.length > 0) {
-            console.log(`Cleaned GCS files: ${deletedFiles.join(', ')}`);
-        }
-        return deletedFiles;
     } catch (error) {
-        if (error.code !== 404) {
-            console.error(`Error in deleteGCS: ${error}`);
-            throw error;
+        // If we get a 404 error, it means the file is already gone, which is fine
+        if (error.response?.status === 404 || error.code === 404) {
+            console.log('[deleteGCS] File not found in GCS (404) - this is expected if file was already deleted');
+            return;
         }
-        return [];
+        console.error('[deleteGCS] Error during deletion:', error);
+        console.error('[deleteGCS] Error details:', {
+            message: error.message,
+            code: error.code,
+            errors: error.errors,
+            response: error.response ? {
+                status: error.response.status,
+                statusText: error.response.statusText,
+                data: error.response.data,
+                headers: error.response.headers,
+            } : null,
+        });
+        // Don't throw the error - we want to continue with cleanup even if GCS deletion fails
     }
 }
@@ -939,37 +1025,19 @@ async function deleteGCS(blobName) {
 async function ensureGCSUpload(context, existingFile) {
     if (!existingFile.gcs && gcs) {
         context.log('GCS file was missing - uploading.');
-        let encodedFilename = path.basename(existingFile.url.split('?')[0]);
-        if (!isEncoded(encodedFilename)) {
-            encodedFilename = encodeURIComponent(encodedFilename);
-        }
-        // Download the file from Azure/local storage
-        const response = await axios({
-            method: 'get',
-            url: existingFile.url,
-            responseType: 'stream',
-        });
-        // Upload the file stream to GCS
-        existingFile.gcs = await uploadToGCS(
-            context,
-            response.data,
-            encodedFilename,
-        );
+        const fileName = sanitizeFilename(path.basename(existingFile.url.split('?')[0]));
+        const response = await axios({ method: 'get', url: existingFile.url, responseType: 'stream' });
+        existingFile.gcs = await uploadToGCS(context, response.data, fileName);
     }
     return existingFile;
 }
-// Helper function to upload a chunk to GCS
 async function uploadChunkToGCS(chunkPath, requestId) {
     if (!gcs) return null;
-    let baseName = path.basename(chunkPath);
-    if (!isEncoded(baseName)) {
-        baseName = encodeURIComponent(baseName);
-    }
-    const gcsFileName = `${requestId}/${baseName}`;
-    await gcs.bucket(GCS_BUCKETNAME).upload(chunkPath, {
-        destination: gcsFileName,
-    });
+    const dirName = requestId || uuidv4();
+    const baseName = sanitizeFilename(path.basename(chunkPath));
+    const gcsFileName = `${dirName}/${baseName}`;
+    await gcs.bucket(GCS_BUCKETNAME).upload(chunkPath, { destination: gcsFileName });
     return `gs://${GCS_BUCKETNAME}/${gcsFileName}`;
 }
@@ -985,4 +1053,4 @@ export {
     gcs,
     uploadChunkToGCS,
     downloadFromGCS,
-};
+};