@aj-archipelago/cortex 1.3.67 ā 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +27 -0
- package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/__init__.py +3 -0
- package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/function.json +20 -0
- package/helper-apps/cortex-doc-to-pdf/Dockerfile +46 -0
- package/helper-apps/cortex-doc-to-pdf/README.md +408 -0
- package/helper-apps/cortex-doc-to-pdf/converter.py +157 -0
- package/helper-apps/cortex-doc-to-pdf/docker-compose.yml +23 -0
- package/helper-apps/cortex-doc-to-pdf/document_converter.py +181 -0
- package/helper-apps/cortex-doc-to-pdf/examples/README.md +252 -0
- package/helper-apps/cortex-doc-to-pdf/examples/nodejs-client.js +266 -0
- package/helper-apps/cortex-doc-to-pdf/examples/package-lock.json +297 -0
- package/helper-apps/cortex-doc-to-pdf/examples/package.json +23 -0
- package/helper-apps/cortex-doc-to-pdf/function_app.py +85 -0
- package/helper-apps/cortex-doc-to-pdf/host.json +16 -0
- package/helper-apps/cortex-doc-to-pdf/request_handlers.py +193 -0
- package/helper-apps/cortex-doc-to-pdf/requirements.txt +3 -0
- package/helper-apps/cortex-doc-to-pdf/tests/run_tests.sh +26 -0
- package/helper-apps/cortex-doc-to-pdf/tests/test_conversion.py +320 -0
- package/helper-apps/cortex-doc-to-pdf/tests/test_streaming.py +419 -0
- package/helper-apps/cortex-file-handler/package-lock.json +1 -0
- package/helper-apps/cortex-file-handler/package.json +1 -0
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +81 -8
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +54 -7
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +19 -7
- package/lib/encodeCache.js +5 -0
- package/lib/keyValueStorageClient.js +5 -0
- package/lib/logger.js +1 -1
- package/lib/pathwayTools.js +8 -1
- package/lib/redisSubscription.js +6 -0
- package/lib/requestExecutor.js +4 -0
- package/lib/util.js +88 -0
- package/package.json +1 -1
- package/pathways/basePathway.js +3 -3
- package/pathways/bing_afagent.js +1 -0
- package/pathways/gemini_15_vision.js +1 -1
- package/pathways/google_cse.js +2 -2
- package/pathways/image_gemini_25.js +85 -0
- package/pathways/image_prompt_optimizer_gemini_25.js +149 -0
- package/pathways/image_qwen.js +28 -0
- package/pathways/image_seedream4.js +26 -0
- package/pathways/rag.js +1 -1
- package/pathways/rag_jarvis.js +1 -1
- package/pathways/system/entity/sys_entity_continue.js +1 -1
- package/pathways/system/entity/sys_generator_results.js +1 -1
- package/pathways/system/entity/tools/sys_tool_google_search.js +15 -2
- package/pathways/system/entity/tools/sys_tool_grok_x_search.js +3 -3
- package/pathways/system/entity/tools/sys_tool_image.js +28 -23
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +135 -0
- package/server/graphql.js +9 -2
- package/server/modelExecutor.js +4 -0
- package/server/pathwayResolver.js +19 -18
- package/server/plugins/claude3VertexPlugin.js +13 -8
- package/server/plugins/gemini15ChatPlugin.js +15 -10
- package/server/plugins/gemini15VisionPlugin.js +2 -23
- package/server/plugins/gemini25ImagePlugin.js +155 -0
- package/server/plugins/modelPlugin.js +3 -2
- package/server/plugins/openAiChatPlugin.js +6 -6
- package/server/plugins/replicateApiPlugin.js +268 -12
- package/server/plugins/veoVideoPlugin.js +15 -1
- package/server/rest.js +2 -0
- package/server/typeDef.js +96 -10
- package/tests/integration/apptekTranslatePlugin.integration.test.js +1 -1
- package/tests/unit/core/pathwayManager.test.js +2 -4
- package/tests/unit/plugins/gemini25ImagePlugin.test.js +294 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""Document conversion logic - handles both URI and stream-based conversions."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import tempfile
|
|
5
|
+
import os
|
|
6
|
+
import urllib.request
|
|
7
|
+
import urllib.parse
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
import shutil
|
|
10
|
+
from converter import DocumentConverter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
async def convert_from_uri(uri: str) -> dict:
|
|
14
|
+
"""
|
|
15
|
+
Convert a document from a URI to PDF.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
uri: URL of the document to convert
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
dict with 'success', 'data' (PDF bytes), 'filename', or 'error' keys
|
|
22
|
+
"""
|
|
23
|
+
try:
|
|
24
|
+
converter = DocumentConverter()
|
|
25
|
+
|
|
26
|
+
logging.info(f"Downloading document from: {uri}")
|
|
27
|
+
|
|
28
|
+
# Parse filename from URI
|
|
29
|
+
parsed_url = urllib.parse.urlparse(uri)
|
|
30
|
+
filename = os.path.basename(parsed_url.path)
|
|
31
|
+
if not filename or '.' not in filename:
|
|
32
|
+
filename = "document.pdf"
|
|
33
|
+
|
|
34
|
+
# Create temporary directory for processing
|
|
35
|
+
temp_dir = tempfile.mkdtemp()
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
# Download file
|
|
39
|
+
input_path = os.path.join(temp_dir, filename)
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
urllib.request.urlretrieve(uri, input_path)
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logging.error(f"Failed to download file: {str(e)}")
|
|
45
|
+
return {
|
|
46
|
+
"success": False,
|
|
47
|
+
"error": "Failed to download document",
|
|
48
|
+
"details": str(e)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
logging.info(f"Document downloaded to: {input_path}")
|
|
52
|
+
|
|
53
|
+
# Check if file extension is supported
|
|
54
|
+
file_ext = Path(input_path).suffix.lower()
|
|
55
|
+
if not converter.is_supported_format(file_ext):
|
|
56
|
+
return {
|
|
57
|
+
"success": False,
|
|
58
|
+
"error": "Unsupported file format",
|
|
59
|
+
"format": file_ext,
|
|
60
|
+
"supported_formats": converter.get_supported_formats()
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Convert to PDF
|
|
64
|
+
logging.info(f"Converting {file_ext} document to PDF...")
|
|
65
|
+
pdf_path = converter.convert_to_pdf(input_path, temp_dir)
|
|
66
|
+
|
|
67
|
+
if not pdf_path or not os.path.exists(pdf_path):
|
|
68
|
+
return {
|
|
69
|
+
"success": False,
|
|
70
|
+
"error": "Conversion failed",
|
|
71
|
+
"message": "The document could not be converted to PDF"
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
# Read the PDF file
|
|
75
|
+
with open(pdf_path, 'rb') as pdf_file:
|
|
76
|
+
pdf_data = pdf_file.read()
|
|
77
|
+
|
|
78
|
+
# Generate output filename
|
|
79
|
+
output_filename = Path(filename).stem + ".pdf"
|
|
80
|
+
|
|
81
|
+
logging.info(f"Conversion successful. PDF size: {len(pdf_data)} bytes")
|
|
82
|
+
|
|
83
|
+
return {
|
|
84
|
+
"success": True,
|
|
85
|
+
"data": pdf_data,
|
|
86
|
+
"filename": output_filename
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
finally:
|
|
90
|
+
# Cleanup temp directory
|
|
91
|
+
try:
|
|
92
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
93
|
+
except Exception as e:
|
|
94
|
+
logging.warning(f"Failed to cleanup temp directory: {e}")
|
|
95
|
+
|
|
96
|
+
except Exception as e:
|
|
97
|
+
logging.error(f"Error during conversion: {str(e)}", exc_info=True)
|
|
98
|
+
return {
|
|
99
|
+
"success": False,
|
|
100
|
+
"error": "Conversion error",
|
|
101
|
+
"details": str(e)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
async def convert_from_stream(file_data: bytes, filename: str) -> dict:
|
|
106
|
+
"""
|
|
107
|
+
Convert a document from uploaded file data to PDF.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
file_data: Binary file data
|
|
111
|
+
filename: Original filename (used to determine format)
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
dict with 'success', 'data' (PDF bytes), 'filename', or 'error' keys
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
converter = DocumentConverter()
|
|
118
|
+
|
|
119
|
+
logging.info(f"Converting uploaded file: {filename} ({len(file_data)} bytes)")
|
|
120
|
+
|
|
121
|
+
# Create temporary directory for processing
|
|
122
|
+
temp_dir = tempfile.mkdtemp()
|
|
123
|
+
|
|
124
|
+
try:
|
|
125
|
+
# Save uploaded file
|
|
126
|
+
input_path = os.path.join(temp_dir, filename)
|
|
127
|
+
with open(input_path, 'wb') as f:
|
|
128
|
+
f.write(file_data)
|
|
129
|
+
|
|
130
|
+
logging.info(f"Saved uploaded file to: {input_path}")
|
|
131
|
+
|
|
132
|
+
# Check if file extension is supported
|
|
133
|
+
file_ext = Path(input_path).suffix.lower()
|
|
134
|
+
if not converter.is_supported_format(file_ext):
|
|
135
|
+
return {
|
|
136
|
+
"success": False,
|
|
137
|
+
"error": "Unsupported file format",
|
|
138
|
+
"format": file_ext,
|
|
139
|
+
"supported_formats": converter.get_supported_formats()
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# Convert to PDF
|
|
143
|
+
logging.info(f"Converting {file_ext} document to PDF...")
|
|
144
|
+
pdf_path = converter.convert_to_pdf(input_path, temp_dir)
|
|
145
|
+
|
|
146
|
+
if not pdf_path or not os.path.exists(pdf_path):
|
|
147
|
+
return {
|
|
148
|
+
"success": False,
|
|
149
|
+
"error": "Conversion failed",
|
|
150
|
+
"message": "The document could not be converted to PDF"
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
# Read the PDF file
|
|
154
|
+
with open(pdf_path, 'rb') as pdf_file:
|
|
155
|
+
pdf_data = pdf_file.read()
|
|
156
|
+
|
|
157
|
+
# Generate output filename
|
|
158
|
+
output_filename = Path(filename).stem + ".pdf"
|
|
159
|
+
|
|
160
|
+
logging.info(f"Conversion successful. PDF size: {len(pdf_data)} bytes")
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
"success": True,
|
|
164
|
+
"data": pdf_data,
|
|
165
|
+
"filename": output_filename
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
finally:
|
|
169
|
+
# Cleanup temp directory
|
|
170
|
+
try:
|
|
171
|
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logging.warning(f"Failed to cleanup temp directory: {e}")
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
logging.error(f"Error during conversion: {str(e)}", exc_info=True)
|
|
177
|
+
return {
|
|
178
|
+
"success": False,
|
|
179
|
+
"error": "Conversion error",
|
|
180
|
+
"details": str(e)
|
|
181
|
+
}
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# JavaScript Client Examples
|
|
2
|
+
|
|
3
|
+
Complete examples showing how to use the Document to PDF Converter from JavaScript.
|
|
4
|
+
|
|
5
|
+
## Node.js Client
|
|
6
|
+
|
|
7
|
+
### Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
cd examples
|
|
11
|
+
npm install
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
### Usage
|
|
15
|
+
|
|
16
|
+
```javascript
|
|
17
|
+
const { convertFileToPDF, convertUriToURL, checkHealth } = require('./nodejs-client');
|
|
18
|
+
|
|
19
|
+
// Check if service is running
|
|
20
|
+
await checkHealth();
|
|
21
|
+
|
|
22
|
+
// Convert a local file (streaming)
|
|
23
|
+
await convertFileToURL(
|
|
24
|
+
'document.docx',
|
|
25
|
+
'output.pdf'
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
// Convert from URI
|
|
29
|
+
await convertUriToURL(
|
|
30
|
+
'https://example.com/document.xlsx',
|
|
31
|
+
'output.pdf'
|
|
32
|
+
);
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### Run Example
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
npm run example
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Features
|
|
42
|
+
|
|
43
|
+
- ā
**Streaming upload** - Files are streamed in chunks (memory efficient)
|
|
44
|
+
- ā
**Streaming download** - PDF is streamed directly to file
|
|
45
|
+
- ā
**Progress tracking** - Monitor upload and download progress
|
|
46
|
+
- ā
**Batch conversion** - Convert multiple files efficiently
|
|
47
|
+
- ā
**Error handling** - Comprehensive error handling
|
|
48
|
+
- ā
**Large file support** - Handles files of any size
|
|
49
|
+
|
|
50
|
+
### API Methods
|
|
51
|
+
|
|
52
|
+
#### `convertFileToURL(inputPath, outputPath)`
|
|
53
|
+
Convert a local file to PDF with streaming.
|
|
54
|
+
|
|
55
|
+
```javascript
|
|
56
|
+
await convertFileToURL('./document.docx', './output.pdf');
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
#### `convertUriToURL(uri, outputPath)`
|
|
60
|
+
Convert a document from URL to PDF.
|
|
61
|
+
|
|
62
|
+
```javascript
|
|
63
|
+
await convertUriToURL('https://example.com/file.xlsx', './output.pdf');
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
#### `convertFileWithProgress(inputPath, outputPath)`
|
|
67
|
+
Convert with upload/download progress tracking.
|
|
68
|
+
|
|
69
|
+
```javascript
|
|
70
|
+
await convertFileWithProgress('./large-file.docx', './output.pdf');
|
|
71
|
+
// Shows: š¤ Upload: 75%
|
|
72
|
+
// š„ Downloaded: 523.45 KB
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
#### `convertMultipleFiles(files, outputDir)`
|
|
76
|
+
Batch convert multiple files.
|
|
77
|
+
|
|
78
|
+
```javascript
|
|
79
|
+
await convertMultipleFiles(
|
|
80
|
+
['file1.docx', 'file2.xlsx', 'file3.pptx'],
|
|
81
|
+
'./output'
|
|
82
|
+
);
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
#### `checkHealth()`
|
|
86
|
+
Check if service is running.
|
|
87
|
+
|
|
88
|
+
```javascript
|
|
89
|
+
const isHealthy = await checkHealth();
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Advanced Examples
|
|
93
|
+
|
|
94
|
+
### TypeScript Example
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
import axios from 'axios';
|
|
98
|
+
import * as fs from 'fs';
|
|
99
|
+
import FormData from 'form-data';
|
|
100
|
+
|
|
101
|
+
interface ConversionResult {
|
|
102
|
+
success: boolean;
|
|
103
|
+
outputPath: string;
|
|
104
|
+
error?: string;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async function convertFile(
|
|
108
|
+
inputPath: string,
|
|
109
|
+
outputPath: string
|
|
110
|
+
): Promise<ConversionResult> {
|
|
111
|
+
try {
|
|
112
|
+
const form = new FormData();
|
|
113
|
+
form.append('file', fs.createReadStream(inputPath));
|
|
114
|
+
|
|
115
|
+
const response = await axios.post('http://localhost:8080/', form, {
|
|
116
|
+
headers: form.getHeaders(),
|
|
117
|
+
responseType: 'stream',
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const writer = fs.createWriteStream(outputPath);
|
|
121
|
+
response.data.pipe(writer);
|
|
122
|
+
|
|
123
|
+
return new Promise((resolve) => {
|
|
124
|
+
writer.on('finish', () =>
|
|
125
|
+
resolve({ success: true, outputPath })
|
|
126
|
+
);
|
|
127
|
+
writer.on('error', (error) =>
|
|
128
|
+
resolve({ success: false, outputPath, error: error.message })
|
|
129
|
+
);
|
|
130
|
+
});
|
|
131
|
+
} catch (error) {
|
|
132
|
+
return {
|
|
133
|
+
success: false,
|
|
134
|
+
outputPath,
|
|
135
|
+
error: error.message
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### React Example
|
|
142
|
+
|
|
143
|
+
```jsx
|
|
144
|
+
import React, { useState } from 'react';
|
|
145
|
+
import axios from 'axios';
|
|
146
|
+
|
|
147
|
+
function DocumentConverter() {
|
|
148
|
+
const [file, setFile] = useState(null);
|
|
149
|
+
const [progress, setProgress] = useState(0);
|
|
150
|
+
const [pdfUrl, setPdfUrl] = useState(null);
|
|
151
|
+
|
|
152
|
+
const handleConvert = async () => {
|
|
153
|
+
if (!file) return;
|
|
154
|
+
|
|
155
|
+
const formData = new FormData();
|
|
156
|
+
formData.append('file', file);
|
|
157
|
+
|
|
158
|
+
try {
|
|
159
|
+
const response = await axios.post(
|
|
160
|
+
'http://localhost:8080/',
|
|
161
|
+
formData,
|
|
162
|
+
{
|
|
163
|
+
responseType: 'blob',
|
|
164
|
+
onUploadProgress: (e) => {
|
|
165
|
+
setProgress(Math.round((e.loaded * 100) / e.total));
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
);
|
|
169
|
+
|
|
170
|
+
const url = URL.createObjectURL(response.data);
|
|
171
|
+
setPdfUrl(url);
|
|
172
|
+
} catch (error) {
|
|
173
|
+
console.error('Conversion failed:', error);
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
return (
|
|
178
|
+
<div>
|
|
179
|
+
<input
|
|
180
|
+
type="file"
|
|
181
|
+
onChange={(e) => setFile(e.target.files[0])}
|
|
182
|
+
/>
|
|
183
|
+
<button onClick={handleConvert}>Convert</button>
|
|
184
|
+
{progress > 0 && <progress value={progress} max="100" />}
|
|
185
|
+
{pdfUrl && <a href={pdfUrl} download="output.pdf">Download PDF</a>}
|
|
186
|
+
</div>
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Service Endpoints
|
|
192
|
+
|
|
193
|
+
Both Node.js and browser examples support these endpoints:
|
|
194
|
+
|
|
195
|
+
- **POST /** - Convert file (root path)
|
|
196
|
+
- **POST /convert** - Convert file (explicit path)
|
|
197
|
+
- **GET /health** - Health check
|
|
198
|
+
|
|
199
|
+
## Streaming Benefits
|
|
200
|
+
|
|
201
|
+
### Upload Streaming
|
|
202
|
+
- Files are sent in chunks (not loaded entirely in memory)
|
|
203
|
+
- Supports large files without memory issues
|
|
204
|
+
- Progress can be tracked
|
|
205
|
+
|
|
206
|
+
### Download Streaming
|
|
207
|
+
- PDF is streamed directly to destination
|
|
208
|
+
- No intermediate buffering
|
|
209
|
+
- Memory efficient
|
|
210
|
+
- Can save directly to file (Node.js) or download (browser)
|
|
211
|
+
|
|
212
|
+
## Error Handling
|
|
213
|
+
|
|
214
|
+
```javascript
|
|
215
|
+
try {
|
|
216
|
+
await convertFileToURL('document.docx', 'output.pdf');
|
|
217
|
+
} catch (error) {
|
|
218
|
+
if (error.response) {
|
|
219
|
+
// Server responded with error
|
|
220
|
+
console.error('Server error:', error.response.data);
|
|
221
|
+
} else if (error.request) {
|
|
222
|
+
// No response received
|
|
223
|
+
console.error('No response from server');
|
|
224
|
+
} else {
|
|
225
|
+
// Other errors
|
|
226
|
+
console.error('Error:', error.message);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## Performance Tips
|
|
232
|
+
|
|
233
|
+
1. **Use streaming** - Always use `responseType: 'stream'` in Node.js
|
|
234
|
+
2. **Batch processing** - Convert multiple files sequentially, not in parallel
|
|
235
|
+
3. **Error recovery** - Implement retry logic for failed conversions
|
|
236
|
+
4. **Progress feedback** - Show progress to users for better UX
|
|
237
|
+
5. **File validation** - Validate file types before upload
|
|
238
|
+
|
|
239
|
+
## Troubleshooting
|
|
240
|
+
|
|
241
|
+
### CORS Issues (Browser)
|
|
242
|
+
If you get CORS errors in the browser, the server needs to set appropriate headers. For development, you can use a proxy or run the service with CORS enabled.
|
|
243
|
+
|
|
244
|
+
### Large Files
|
|
245
|
+
The service handles large files efficiently with streaming. No special configuration needed!
|
|
246
|
+
|
|
247
|
+
### Memory Usage
|
|
248
|
+
Both upload and download use streaming, so memory usage stays low even with large files.
|
|
249
|
+
|
|
250
|
+
## License
|
|
251
|
+
|
|
252
|
+
Part of the Cortex project.
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Node.js Client Example for Document to PDF Converter
|
|
3
|
+
*
|
|
4
|
+
* This example shows how to:
|
|
5
|
+
* 1. Upload files with streaming
|
|
6
|
+
* 2. Download PDFs with streaming
|
|
7
|
+
* 3. Save to local file system
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
const fs = require('fs');
|
|
11
|
+
const path = require('path');
|
|
12
|
+
const FormData = require('form-data');
|
|
13
|
+
const axios = require('axios');
|
|
14
|
+
|
|
15
|
+
const SERVICE_URL = process.env.DOC_TO_PDF_URL || 'http://localhost:8080';
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Method 1: Upload local file (Recommended - Streaming)
|
|
19
|
+
* Uses streams for memory efficiency
|
|
20
|
+
*/
|
|
21
|
+
async function convertFileToPDF(inputFilePath, outputFilePath) {
|
|
22
|
+
try {
|
|
23
|
+
console.log(`š¤ Uploading: ${inputFilePath}`);
|
|
24
|
+
|
|
25
|
+
// Create form data with file stream
|
|
26
|
+
const form = new FormData();
|
|
27
|
+
const fileStream = fs.createReadStream(inputFilePath);
|
|
28
|
+
form.append('file', fileStream, path.basename(inputFilePath));
|
|
29
|
+
|
|
30
|
+
// Upload with streaming (both upload and download)
|
|
31
|
+
const response = await axios({
|
|
32
|
+
method: 'POST',
|
|
33
|
+
url: `${SERVICE_URL}/`, // Can use / or /convert
|
|
34
|
+
data: form,
|
|
35
|
+
headers: {
|
|
36
|
+
...form.getHeaders(),
|
|
37
|
+
},
|
|
38
|
+
responseType: 'stream', // Stream the response
|
|
39
|
+
maxContentLength: Infinity,
|
|
40
|
+
maxBodyLength: Infinity,
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Stream the PDF response to file
|
|
44
|
+
const writer = fs.createWriteStream(outputFilePath);
|
|
45
|
+
response.data.pipe(writer);
|
|
46
|
+
|
|
47
|
+
return new Promise((resolve, reject) => {
|
|
48
|
+
writer.on('finish', () => {
|
|
49
|
+
console.log(`ā
Saved to: ${outputFilePath}`);
|
|
50
|
+
const stats = fs.statSync(outputFilePath);
|
|
51
|
+
console.log(`š Size: ${(stats.size / 1024).toFixed(2)} KB`);
|
|
52
|
+
resolve(outputFilePath);
|
|
53
|
+
});
|
|
54
|
+
writer.on('error', reject);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
} catch (error) {
|
|
58
|
+
console.error('ā Conversion failed:', error.message);
|
|
59
|
+
if (error.response) {
|
|
60
|
+
console.error('Response:', error.response.data);
|
|
61
|
+
}
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Method 2: Convert from URI
|
|
68
|
+
* Downloads from URL and converts to PDF
|
|
69
|
+
*/
|
|
70
|
+
async function convertUriToPDF(documentUri, outputFilePath) {
|
|
71
|
+
try {
|
|
72
|
+
console.log(`š Converting from URI: ${documentUri}`);
|
|
73
|
+
|
|
74
|
+
const response = await axios({
|
|
75
|
+
method: 'POST',
|
|
76
|
+
url: `${SERVICE_URL}/convert`,
|
|
77
|
+
data: { uri: documentUri },
|
|
78
|
+
headers: {
|
|
79
|
+
'Content-Type': 'application/json',
|
|
80
|
+
},
|
|
81
|
+
responseType: 'stream', // Stream the response
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
// Stream to file
|
|
85
|
+
const writer = fs.createWriteStream(outputFilePath);
|
|
86
|
+
response.data.pipe(writer);
|
|
87
|
+
|
|
88
|
+
return new Promise((resolve, reject) => {
|
|
89
|
+
writer.on('finish', () => {
|
|
90
|
+
console.log(`ā
Saved to: ${outputFilePath}`);
|
|
91
|
+
resolve(outputFilePath);
|
|
92
|
+
});
|
|
93
|
+
writer.on('error', reject);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
} catch (error) {
|
|
97
|
+
console.error('ā Conversion failed:', error.message);
|
|
98
|
+
if (error.response) {
|
|
99
|
+
const chunks = [];
|
|
100
|
+
for await (const chunk of error.response.data) {
|
|
101
|
+
chunks.push(chunk);
|
|
102
|
+
}
|
|
103
|
+
console.error('Error:', JSON.parse(Buffer.concat(chunks).toString()));
|
|
104
|
+
}
|
|
105
|
+
throw error;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Method 3: Upload with progress tracking
|
|
111
|
+
* Shows upload and download progress
|
|
112
|
+
*/
|
|
113
|
+
async function convertFileWithProgress(inputFilePath, outputFilePath) {
|
|
114
|
+
try {
|
|
115
|
+
console.log(`š¤ Uploading: ${inputFilePath}`);
|
|
116
|
+
|
|
117
|
+
const form = new FormData();
|
|
118
|
+
const fileStream = fs.createReadStream(inputFilePath);
|
|
119
|
+
const fileSize = fs.statSync(inputFilePath).size;
|
|
120
|
+
|
|
121
|
+
form.append('file', fileStream, path.basename(inputFilePath));
|
|
122
|
+
|
|
123
|
+
let uploadedBytes = 0;
|
|
124
|
+
|
|
125
|
+
const response = await axios({
|
|
126
|
+
method: 'POST',
|
|
127
|
+
url: `${SERVICE_URL}/`,
|
|
128
|
+
data: form,
|
|
129
|
+
headers: form.getHeaders(),
|
|
130
|
+
responseType: 'stream',
|
|
131
|
+
maxContentLength: Infinity,
|
|
132
|
+
maxBodyLength: Infinity,
|
|
133
|
+
onUploadProgress: (progressEvent) => {
|
|
134
|
+
uploadedBytes = progressEvent.loaded;
|
|
135
|
+
const percent = Math.round((progressEvent.loaded * 100) / fileSize);
|
|
136
|
+
process.stdout.write(`\rš¤ Upload: ${percent}%`);
|
|
137
|
+
},
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
console.log('\nš„ Downloading PDF...');
|
|
141
|
+
|
|
142
|
+
const writer = fs.createWriteStream(outputFilePath);
|
|
143
|
+
let downloadedBytes = 0;
|
|
144
|
+
|
|
145
|
+
response.data.on('data', (chunk) => {
|
|
146
|
+
downloadedBytes += chunk.length;
|
|
147
|
+
process.stdout.write(`\rš„ Downloaded: ${(downloadedBytes / 1024).toFixed(2)} KB`);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
response.data.pipe(writer);
|
|
151
|
+
|
|
152
|
+
return new Promise((resolve, reject) => {
|
|
153
|
+
writer.on('finish', () => {
|
|
154
|
+
console.log(`\nā
Saved to: ${outputFilePath}`);
|
|
155
|
+
resolve(outputFilePath);
|
|
156
|
+
});
|
|
157
|
+
writer.on('error', reject);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
} catch (error) {
|
|
161
|
+
console.error('\nā Conversion failed:', error.message);
|
|
162
|
+
throw error;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Method 4: Batch conversion
|
|
168
|
+
* Convert multiple files efficiently
|
|
169
|
+
*/
|
|
170
|
+
async function convertMultipleFiles(inputFiles, outputDir) {
|
|
171
|
+
console.log(`š¦ Converting ${inputFiles.length} files...`);
|
|
172
|
+
|
|
173
|
+
const results = [];
|
|
174
|
+
|
|
175
|
+
for (const inputFile of inputFiles) {
|
|
176
|
+
try {
|
|
177
|
+
const fileName = path.basename(inputFile, path.extname(inputFile));
|
|
178
|
+
const outputFile = path.join(outputDir, `${fileName}.pdf`);
|
|
179
|
+
|
|
180
|
+
await convertFileToPDF(inputFile, outputFile);
|
|
181
|
+
results.push({ success: true, input: inputFile, output: outputFile });
|
|
182
|
+
} catch (error) {
|
|
183
|
+
results.push({ success: false, input: inputFile, error: error.message });
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Summary
|
|
188
|
+
const successful = results.filter(r => r.success).length;
|
|
189
|
+
console.log(`\nā
Converted ${successful}/${inputFiles.length} files`);
|
|
190
|
+
|
|
191
|
+
return results;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Health check
|
|
196
|
+
*/
|
|
197
|
+
async function checkHealth() {
|
|
198
|
+
try {
|
|
199
|
+
const response = await axios.get(`${SERVICE_URL}/health`);
|
|
200
|
+
console.log('ā
Service is healthy:', response.data);
|
|
201
|
+
return true;
|
|
202
|
+
} catch (error) {
|
|
203
|
+
console.error('ā Service is not available:', error.message);
|
|
204
|
+
return false;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Example usage
|
|
209
|
+
async function main() {
|
|
210
|
+
// Check if service is running
|
|
211
|
+
const isHealthy = await checkHealth();
|
|
212
|
+
if (!isHealthy) {
|
|
213
|
+
console.error('Service is not available. Please start it first.');
|
|
214
|
+
process.exit(1);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
console.log('\n=== Document to PDF Converter - Node.js Client ===\n');
|
|
218
|
+
|
|
219
|
+
// Example 1: Convert a single file
|
|
220
|
+
await convertFileToPDF(
|
|
221
|
+
'../samples/data.txt',
|
|
222
|
+
'./output/data.pdf'
|
|
223
|
+
);
|
|
224
|
+
|
|
225
|
+
console.log('\n---\n');
|
|
226
|
+
|
|
227
|
+
// Example 2: Convert with progress
|
|
228
|
+
await convertFileWithProgress(
|
|
229
|
+
'../samples/file-sample_1MB.docx',
|
|
230
|
+
'./output/document.pdf'
|
|
231
|
+
);
|
|
232
|
+
|
|
233
|
+
console.log('\n---\n');
|
|
234
|
+
|
|
235
|
+
// Example 3: Convert from URI
|
|
236
|
+
// await convertUriToPDF(
|
|
237
|
+
// 'https://example.com/document.docx',
|
|
238
|
+
// './output/from-uri.pdf'
|
|
239
|
+
// );
|
|
240
|
+
|
|
241
|
+
// Example 4: Batch conversion
|
|
242
|
+
// await convertMultipleFiles(
|
|
243
|
+
// ['file1.docx', 'file2.xlsx', 'file3.pptx'],
|
|
244
|
+
// './output'
|
|
245
|
+
// );
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Run if called directly
|
|
249
|
+
if (require.main === module) {
|
|
250
|
+
// Create output directory
|
|
251
|
+
const outputDir = path.join(__dirname, 'output');
|
|
252
|
+
if (!fs.existsSync(outputDir)) {
|
|
253
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
main().catch(console.error);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Export for use in other modules
|
|
260
|
+
module.exports = {
|
|
261
|
+
convertFileToPDF,
|
|
262
|
+
convertUriToPDF,
|
|
263
|
+
convertFileWithProgress,
|
|
264
|
+
convertMultipleFiles,
|
|
265
|
+
checkHealth,
|
|
266
|
+
};
|