@aj-archipelago/cortex 1.3.67 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +27 -0
- package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/__init__.py +3 -0
- package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/function.json +20 -0
- package/helper-apps/cortex-doc-to-pdf/Dockerfile +46 -0
- package/helper-apps/cortex-doc-to-pdf/README.md +408 -0
- package/helper-apps/cortex-doc-to-pdf/converter.py +157 -0
- package/helper-apps/cortex-doc-to-pdf/docker-compose.yml +23 -0
- package/helper-apps/cortex-doc-to-pdf/document_converter.py +181 -0
- package/helper-apps/cortex-doc-to-pdf/examples/README.md +252 -0
- package/helper-apps/cortex-doc-to-pdf/examples/nodejs-client.js +266 -0
- package/helper-apps/cortex-doc-to-pdf/examples/package-lock.json +297 -0
- package/helper-apps/cortex-doc-to-pdf/examples/package.json +23 -0
- package/helper-apps/cortex-doc-to-pdf/function_app.py +85 -0
- package/helper-apps/cortex-doc-to-pdf/host.json +16 -0
- package/helper-apps/cortex-doc-to-pdf/request_handlers.py +193 -0
- package/helper-apps/cortex-doc-to-pdf/requirements.txt +3 -0
- package/helper-apps/cortex-doc-to-pdf/tests/run_tests.sh +26 -0
- package/helper-apps/cortex-doc-to-pdf/tests/test_conversion.py +320 -0
- package/helper-apps/cortex-doc-to-pdf/tests/test_streaming.py +419 -0
- package/helper-apps/cortex-file-handler/package-lock.json +1 -0
- package/helper-apps/cortex-file-handler/package.json +1 -0
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +81 -8
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +54 -7
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +19 -7
- package/lib/encodeCache.js +5 -0
- package/lib/keyValueStorageClient.js +5 -0
- package/lib/logger.js +1 -1
- package/lib/pathwayTools.js +8 -1
- package/lib/redisSubscription.js +6 -0
- package/lib/requestExecutor.js +4 -0
- package/lib/util.js +88 -0
- package/package.json +1 -1
- package/pathways/basePathway.js +3 -3
- package/pathways/bing_afagent.js +1 -0
- package/pathways/gemini_15_vision.js +1 -1
- package/pathways/google_cse.js +2 -2
- package/pathways/image_gemini_25.js +85 -0
- package/pathways/image_prompt_optimizer_gemini_25.js +149 -0
- package/pathways/image_qwen.js +28 -0
- package/pathways/image_seedream4.js +26 -0
- package/pathways/rag.js +1 -1
- package/pathways/rag_jarvis.js +1 -1
- package/pathways/system/entity/sys_entity_continue.js +1 -1
- package/pathways/system/entity/sys_generator_results.js +1 -1
- package/pathways/system/entity/tools/sys_tool_google_search.js +15 -2
- package/pathways/system/entity/tools/sys_tool_grok_x_search.js +3 -3
- package/pathways/system/entity/tools/sys_tool_image.js +28 -23
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +135 -0
- package/server/graphql.js +9 -2
- package/server/modelExecutor.js +4 -0
- package/server/pathwayResolver.js +19 -18
- package/server/plugins/claude3VertexPlugin.js +13 -8
- package/server/plugins/gemini15ChatPlugin.js +15 -10
- package/server/plugins/gemini15VisionPlugin.js +2 -23
- package/server/plugins/gemini25ImagePlugin.js +155 -0
- package/server/plugins/modelPlugin.js +3 -2
- package/server/plugins/openAiChatPlugin.js +6 -6
- package/server/plugins/replicateApiPlugin.js +268 -12
- package/server/plugins/veoVideoPlugin.js +15 -1
- package/server/rest.js +2 -0
- package/server/typeDef.js +96 -10
- package/tests/integration/apptekTranslatePlugin.integration.test.js +1 -1
- package/tests/unit/core/pathwayManager.test.js +2 -4
- package/tests/unit/plugins/gemini25ImagePlugin.test.js +294 -0
|
@@ -0,0 +1,419 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
End-to-end tests for streaming document upload and PDF download.
|
|
4
|
+
Tests both file upload and URI-based conversion with streaming responses.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
import requests
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import PyPDF2
|
|
13
|
+
|
|
14
|
+
# Test configuration
|
|
15
|
+
SAMPLES_DIR = Path(__file__).parent.parent / "samples"
|
|
16
|
+
OUTPUT_DIR = Path(__file__).parent.parent / "test_streaming_output"
|
|
17
|
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
18
|
+
|
|
19
|
+
BASE_URL = os.getenv("TEST_URL", "http://localhost:8080")
|
|
20
|
+
CONVERT_ENDPOINT = f"{BASE_URL}/convert"
|
|
21
|
+
|
|
22
|
+
class Colors:
|
|
23
|
+
"""ANSI color codes"""
|
|
24
|
+
GREEN = '\033[92m'
|
|
25
|
+
RED = '\033[91m'
|
|
26
|
+
YELLOW = '\033[93m'
|
|
27
|
+
BLUE = '\033[94m'
|
|
28
|
+
RESET = '\033[0m'
|
|
29
|
+
BOLD = '\033[1m'
|
|
30
|
+
|
|
31
|
+
def print_success(msg):
|
|
32
|
+
print(f"{Colors.GREEN}✓{Colors.RESET} {msg}")
|
|
33
|
+
|
|
34
|
+
def print_error(msg):
|
|
35
|
+
print(f"{Colors.RED}✗{Colors.RESET} {msg}")
|
|
36
|
+
|
|
37
|
+
def print_info(msg):
|
|
38
|
+
print(f"{Colors.BLUE}ℹ{Colors.RESET} {msg}")
|
|
39
|
+
|
|
40
|
+
def print_warning(msg):
|
|
41
|
+
print(f"{Colors.YELLOW}⚠{Colors.RESET} {msg}")
|
|
42
|
+
|
|
43
|
+
def print_header(msg):
|
|
44
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
45
|
+
print(f"{Colors.BOLD}{msg}{Colors.RESET}")
|
|
46
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
47
|
+
|
|
48
|
+
def verify_pdf(pdf_path):
|
|
49
|
+
"""Verify that a file is a valid PDF."""
|
|
50
|
+
try:
|
|
51
|
+
with open(pdf_path, 'rb') as f:
|
|
52
|
+
pdf_reader = PyPDF2.PdfReader(f)
|
|
53
|
+
return {
|
|
54
|
+
'valid': True,
|
|
55
|
+
'pages': len(pdf_reader.pages),
|
|
56
|
+
'size': os.path.getsize(pdf_path)
|
|
57
|
+
}
|
|
58
|
+
except Exception as e:
|
|
59
|
+
return {
|
|
60
|
+
'valid': False,
|
|
61
|
+
'error': str(e)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
def test_file_upload_streaming(file_path, test_name):
|
|
65
|
+
"""Test streaming file upload and response."""
|
|
66
|
+
print(f"\n{Colors.BOLD}Test: {test_name}{Colors.RESET}")
|
|
67
|
+
print(f" File: {file_path.name}")
|
|
68
|
+
print(f" Size: {os.path.getsize(file_path):,} bytes")
|
|
69
|
+
|
|
70
|
+
output_path = OUTPUT_DIR / f"{file_path.stem}_uploaded.pdf"
|
|
71
|
+
|
|
72
|
+
start_time = time.time()
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
# Open file and stream it
|
|
76
|
+
with open(file_path, 'rb') as f:
|
|
77
|
+
files = {'file': (file_path.name, f, 'application/octet-stream')}
|
|
78
|
+
|
|
79
|
+
print_info("Uploading file...")
|
|
80
|
+
response = requests.post(
|
|
81
|
+
CONVERT_ENDPOINT,
|
|
82
|
+
files=files,
|
|
83
|
+
stream=True # Enable streaming response
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
elapsed = time.time() - start_time
|
|
87
|
+
|
|
88
|
+
if response.status_code != 200:
|
|
89
|
+
print_error(f"HTTP {response.status_code}: {response.text[:200]}")
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
# Stream response to file
|
|
93
|
+
print_info(f"Streaming PDF response...")
|
|
94
|
+
bytes_received = 0
|
|
95
|
+
with open(output_path, 'wb') as f:
|
|
96
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
97
|
+
if chunk:
|
|
98
|
+
f.write(chunk)
|
|
99
|
+
bytes_received += len(chunk)
|
|
100
|
+
|
|
101
|
+
print_success(f"Completed in {elapsed:.2f}s")
|
|
102
|
+
print_info(f"Downloaded: {bytes_received:,} bytes")
|
|
103
|
+
|
|
104
|
+
# Verify PDF
|
|
105
|
+
pdf_info = verify_pdf(output_path)
|
|
106
|
+
if not pdf_info['valid']:
|
|
107
|
+
print_error(f"Invalid PDF: {pdf_info.get('error')}")
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
print_success(f"Valid PDF: {pdf_info['pages']} pages, {pdf_info['size']:,} bytes")
|
|
111
|
+
return True
|
|
112
|
+
|
|
113
|
+
except requests.exceptions.ConnectionError:
|
|
114
|
+
print_error("Connection failed - is the service running?")
|
|
115
|
+
return False
|
|
116
|
+
except Exception as e:
|
|
117
|
+
print_error(f"Test failed: {e}")
|
|
118
|
+
import traceback
|
|
119
|
+
traceback.print_exc()
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
def test_uri_streaming(uri, filename, test_name):
|
|
123
|
+
"""Test URI-based conversion with streaming response."""
|
|
124
|
+
print(f"\n{Colors.BOLD}Test: {test_name}{Colors.RESET}")
|
|
125
|
+
print(f" URI: {uri[:80]}...")
|
|
126
|
+
|
|
127
|
+
output_path = OUTPUT_DIR / filename
|
|
128
|
+
|
|
129
|
+
start_time = time.time()
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
print_info("Requesting conversion...")
|
|
133
|
+
response = requests.post(
|
|
134
|
+
CONVERT_ENDPOINT,
|
|
135
|
+
json={'uri': uri},
|
|
136
|
+
stream=True # Enable streaming response
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
elapsed = time.time() - start_time
|
|
140
|
+
|
|
141
|
+
if response.status_code != 200:
|
|
142
|
+
print_error(f"HTTP {response.status_code}: {response.text[:200]}")
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
# Stream response to file
|
|
146
|
+
print_info(f"Streaming PDF response...")
|
|
147
|
+
bytes_received = 0
|
|
148
|
+
with open(output_path, 'wb') as f:
|
|
149
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
150
|
+
if chunk:
|
|
151
|
+
f.write(chunk)
|
|
152
|
+
bytes_received += len(chunk)
|
|
153
|
+
|
|
154
|
+
print_success(f"Completed in {elapsed:.2f}s")
|
|
155
|
+
print_info(f"Downloaded: {bytes_received:,} bytes")
|
|
156
|
+
|
|
157
|
+
# Verify PDF
|
|
158
|
+
pdf_info = verify_pdf(output_path)
|
|
159
|
+
if not pdf_info['valid']:
|
|
160
|
+
print_error(f"Invalid PDF: {pdf_info.get('error')}")
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
print_success(f"Valid PDF: {pdf_info['pages']} pages, {pdf_info['size']:,} bytes")
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
except requests.exceptions.ConnectionError:
|
|
167
|
+
print_error("Connection failed - is the service running?")
|
|
168
|
+
return False
|
|
169
|
+
except Exception as e:
|
|
170
|
+
print_error(f"Test failed: {e}")
|
|
171
|
+
import traceback
|
|
172
|
+
traceback.print_exc()
|
|
173
|
+
return False
|
|
174
|
+
|
|
175
|
+
def test_large_file_streaming():
|
|
176
|
+
"""Test streaming with larger files."""
|
|
177
|
+
print_header("Large File Streaming Tests")
|
|
178
|
+
|
|
179
|
+
results = []
|
|
180
|
+
|
|
181
|
+
# Test with Excel file (usually larger)
|
|
182
|
+
excel_file = SAMPLES_DIR / "file_example_XLSX_5000.xlsx"
|
|
183
|
+
if excel_file.exists():
|
|
184
|
+
results.append(test_file_upload_streaming(
|
|
185
|
+
excel_file,
|
|
186
|
+
"Large Excel File Upload"
|
|
187
|
+
))
|
|
188
|
+
|
|
189
|
+
# Test with PowerPoint
|
|
190
|
+
ppt_file = SAMPLES_DIR / "file_example_PPT_1MB.ppt"
|
|
191
|
+
if ppt_file.exists():
|
|
192
|
+
results.append(test_file_upload_streaming(
|
|
193
|
+
ppt_file,
|
|
194
|
+
"Large PowerPoint Upload"
|
|
195
|
+
))
|
|
196
|
+
|
|
197
|
+
return results
|
|
198
|
+
|
|
199
|
+
def test_various_formats():
|
|
200
|
+
"""Test streaming with various document formats."""
|
|
201
|
+
print_header("Various Format Streaming Tests")
|
|
202
|
+
|
|
203
|
+
results = []
|
|
204
|
+
test_files = [
|
|
205
|
+
("file-sample_1MB.docx", "Word Document"),
|
|
206
|
+
("ascii-art.txt", "Text File"),
|
|
207
|
+
("sample1.html", "HTML File"),
|
|
208
|
+
]
|
|
209
|
+
|
|
210
|
+
for filename, description in test_files:
|
|
211
|
+
file_path = SAMPLES_DIR / filename
|
|
212
|
+
if file_path.exists():
|
|
213
|
+
results.append(test_file_upload_streaming(
|
|
214
|
+
file_path,
|
|
215
|
+
f"{description} Upload"
|
|
216
|
+
))
|
|
217
|
+
else:
|
|
218
|
+
print_warning(f"Skipping {filename} - not found")
|
|
219
|
+
|
|
220
|
+
return results
|
|
221
|
+
|
|
222
|
+
def test_uri_based_streaming():
|
|
223
|
+
"""Test URI-based conversion with streaming."""
|
|
224
|
+
print_header("URI-Based Streaming Tests")
|
|
225
|
+
|
|
226
|
+
results = []
|
|
227
|
+
|
|
228
|
+
# Test with a public document
|
|
229
|
+
results.append(test_uri_streaming(
|
|
230
|
+
"https://file-examples.com/storage/fe783f04fc66761fd44fb46/2017/02/file-sample_100kB.docx",
|
|
231
|
+
"public_word_doc.pdf",
|
|
232
|
+
"Public Word Document via URI"
|
|
233
|
+
))
|
|
234
|
+
|
|
235
|
+
return results
|
|
236
|
+
|
|
237
|
+
def test_concurrent_uploads():
|
|
238
|
+
"""Test multiple concurrent streaming uploads."""
|
|
239
|
+
print_header("Concurrent Streaming Tests")
|
|
240
|
+
|
|
241
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
242
|
+
|
|
243
|
+
test_files = [
|
|
244
|
+
SAMPLES_DIR / "data.txt",
|
|
245
|
+
SAMPLES_DIR / "multilang.txt",
|
|
246
|
+
SAMPLES_DIR / "sample2.html",
|
|
247
|
+
]
|
|
248
|
+
|
|
249
|
+
# Filter existing files
|
|
250
|
+
test_files = [f for f in test_files if f.exists()]
|
|
251
|
+
|
|
252
|
+
if not test_files:
|
|
253
|
+
print_warning("No test files available for concurrent test")
|
|
254
|
+
return [True]
|
|
255
|
+
|
|
256
|
+
print_info(f"Testing {len(test_files)} concurrent uploads...")
|
|
257
|
+
start_time = time.time()
|
|
258
|
+
|
|
259
|
+
results = []
|
|
260
|
+
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
261
|
+
futures = {
|
|
262
|
+
executor.submit(
|
|
263
|
+
test_file_upload_streaming,
|
|
264
|
+
file_path,
|
|
265
|
+
f"Concurrent Upload {i+1}"
|
|
266
|
+
): file_path
|
|
267
|
+
for i, file_path in enumerate(test_files)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
for future in as_completed(futures):
|
|
271
|
+
file_path = futures[future]
|
|
272
|
+
try:
|
|
273
|
+
result = future.result()
|
|
274
|
+
results.append(result)
|
|
275
|
+
except Exception as e:
|
|
276
|
+
print_error(f"Concurrent test failed for {file_path.name}: {e}")
|
|
277
|
+
results.append(False)
|
|
278
|
+
|
|
279
|
+
elapsed = time.time() - start_time
|
|
280
|
+
print_success(f"All concurrent uploads completed in {elapsed:.2f}s")
|
|
281
|
+
|
|
282
|
+
return results
|
|
283
|
+
|
|
284
|
+
def test_error_handling():
|
|
285
|
+
"""Test error handling in streaming."""
|
|
286
|
+
print_header("Error Handling Tests")
|
|
287
|
+
|
|
288
|
+
results = []
|
|
289
|
+
|
|
290
|
+
# Test 1: Invalid file type
|
|
291
|
+
print(f"\n{Colors.BOLD}Test: Invalid File Type{Colors.RESET}")
|
|
292
|
+
try:
|
|
293
|
+
# Create a fake file with unsupported extension
|
|
294
|
+
fake_file = OUTPUT_DIR / "test.xyz"
|
|
295
|
+
fake_file.write_text("test content")
|
|
296
|
+
|
|
297
|
+
with open(fake_file, 'rb') as f:
|
|
298
|
+
files = {'file': ('test.xyz', f, 'application/octet-stream')}
|
|
299
|
+
response = requests.post(CONVERT_ENDPOINT, files=files)
|
|
300
|
+
|
|
301
|
+
if response.status_code == 400:
|
|
302
|
+
print_success("Correctly rejected unsupported file type")
|
|
303
|
+
results.append(True)
|
|
304
|
+
else:
|
|
305
|
+
print_error(f"Expected 400, got {response.status_code}")
|
|
306
|
+
results.append(False)
|
|
307
|
+
|
|
308
|
+
fake_file.unlink()
|
|
309
|
+
except Exception as e:
|
|
310
|
+
print_error(f"Error test failed: {e}")
|
|
311
|
+
results.append(False)
|
|
312
|
+
|
|
313
|
+
# Test 2: Empty file
|
|
314
|
+
print(f"\n{Colors.BOLD}Test: Empty File Upload{Colors.RESET}")
|
|
315
|
+
try:
|
|
316
|
+
empty_file = OUTPUT_DIR / "empty.txt"
|
|
317
|
+
empty_file.write_text("")
|
|
318
|
+
|
|
319
|
+
with open(empty_file, 'rb') as f:
|
|
320
|
+
files = {'file': ('empty.txt', f, 'application/octet-stream')}
|
|
321
|
+
response = requests.post(CONVERT_ENDPOINT, files=files)
|
|
322
|
+
|
|
323
|
+
# Should handle gracefully (either convert or error)
|
|
324
|
+
if response.status_code in [200, 400, 500]:
|
|
325
|
+
print_success(f"Handled empty file gracefully (status: {response.status_code})")
|
|
326
|
+
results.append(True)
|
|
327
|
+
else:
|
|
328
|
+
print_error(f"Unexpected status: {response.status_code}")
|
|
329
|
+
results.append(False)
|
|
330
|
+
|
|
331
|
+
empty_file.unlink()
|
|
332
|
+
except Exception as e:
|
|
333
|
+
print_error(f"Empty file test failed: {e}")
|
|
334
|
+
results.append(False)
|
|
335
|
+
|
|
336
|
+
# Test 3: Invalid URI
|
|
337
|
+
print(f"\n{Colors.BOLD}Test: Invalid URI{Colors.RESET}")
|
|
338
|
+
try:
|
|
339
|
+
response = requests.post(
|
|
340
|
+
CONVERT_ENDPOINT,
|
|
341
|
+
json={'uri': 'not-a-valid-url'}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
if response.status_code == 400:
|
|
345
|
+
print_success("Correctly rejected invalid URI")
|
|
346
|
+
results.append(True)
|
|
347
|
+
else:
|
|
348
|
+
print_error(f"Expected 400, got {response.status_code}")
|
|
349
|
+
results.append(False)
|
|
350
|
+
except Exception as e:
|
|
351
|
+
print_error(f"Invalid URI test failed: {e}")
|
|
352
|
+
results.append(False)
|
|
353
|
+
|
|
354
|
+
return results
|
|
355
|
+
|
|
356
|
+
def check_service_health():
|
|
357
|
+
"""Check if the service is running."""
|
|
358
|
+
try:
|
|
359
|
+
response = requests.get(f"{BASE_URL}/health", timeout=5)
|
|
360
|
+
if response.status_code == 200:
|
|
361
|
+
print_success(f"Service is running at {BASE_URL}")
|
|
362
|
+
return True
|
|
363
|
+
else:
|
|
364
|
+
print_error(f"Service returned status {response.status_code}")
|
|
365
|
+
return False
|
|
366
|
+
except requests.exceptions.ConnectionError:
|
|
367
|
+
print_error(f"Cannot connect to service at {BASE_URL}")
|
|
368
|
+
print_info("Make sure the service is running: docker compose up -d")
|
|
369
|
+
return False
|
|
370
|
+
except Exception as e:
|
|
371
|
+
print_error(f"Health check failed: {e}")
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
def main():
|
|
375
|
+
"""Run all streaming tests."""
|
|
376
|
+
print_header("Document to PDF Streaming Tests")
|
|
377
|
+
print(f"Test URL: {BASE_URL}")
|
|
378
|
+
print(f"Samples: {SAMPLES_DIR}")
|
|
379
|
+
print(f"Output: {OUTPUT_DIR}")
|
|
380
|
+
|
|
381
|
+
# Check service health
|
|
382
|
+
if not check_service_health():
|
|
383
|
+
return 1
|
|
384
|
+
|
|
385
|
+
# Run all test suites
|
|
386
|
+
all_results = []
|
|
387
|
+
|
|
388
|
+
all_results.extend(test_various_formats())
|
|
389
|
+
all_results.extend(test_large_file_streaming())
|
|
390
|
+
all_results.extend(test_uri_based_streaming())
|
|
391
|
+
all_results.extend(test_concurrent_uploads())
|
|
392
|
+
all_results.extend(test_error_handling())
|
|
393
|
+
|
|
394
|
+
# Print summary
|
|
395
|
+
print_header("Test Summary")
|
|
396
|
+
|
|
397
|
+
passed = sum(1 for r in all_results if r)
|
|
398
|
+
failed = sum(1 for r in all_results if not r)
|
|
399
|
+
total = len(all_results)
|
|
400
|
+
|
|
401
|
+
print(f"\nTotal tests: {total}")
|
|
402
|
+
print_success(f"Passed: {passed}")
|
|
403
|
+
if failed > 0:
|
|
404
|
+
print_error(f"Failed: {failed}")
|
|
405
|
+
|
|
406
|
+
success_rate = (passed / total * 100) if total > 0 else 0
|
|
407
|
+
print(f"\nSuccess rate: {success_rate:.1f}%")
|
|
408
|
+
|
|
409
|
+
print(f"\n{Colors.BLUE}All output PDFs saved to: {OUTPUT_DIR}{Colors.RESET}")
|
|
410
|
+
|
|
411
|
+
if failed == 0:
|
|
412
|
+
print(f"\n{Colors.GREEN}{Colors.BOLD}🎉 All streaming tests passed!{Colors.RESET}")
|
|
413
|
+
return 0
|
|
414
|
+
else:
|
|
415
|
+
print(f"\n{Colors.RED}{Colors.BOLD}❌ Some tests failed{Colors.RESET}")
|
|
416
|
+
return 1
|
|
417
|
+
|
|
418
|
+
if __name__ == "__main__":
|
|
419
|
+
sys.exit(main())
|
|
@@ -4,12 +4,19 @@ import path from "path";
|
|
|
4
4
|
import { createReadStream, createWriteStream } from "fs";
|
|
5
5
|
import { pipeline } from "stream/promises";
|
|
6
6
|
import axios from "axios";
|
|
7
|
+
import FormData from "form-data";
|
|
7
8
|
import XLSX from "xlsx";
|
|
8
9
|
import { CONVERTED_EXTENSIONS } from "../constants.js";
|
|
9
10
|
import { v4 as uuidv4 } from "uuid";
|
|
10
11
|
import { sanitizeFilename, generateShortId } from "../utils/filenameUtils.js";
|
|
11
12
|
|
|
12
|
-
|
|
13
|
+
// Read service URLs at call time to allow tests to mutate process.env
|
|
14
|
+
function getMarkitdownUrl() {
|
|
15
|
+
return process.env.MARKITDOWN_CONVERT_URL || null;
|
|
16
|
+
}
|
|
17
|
+
function getDocToPdfUrl() {
|
|
18
|
+
return process.env.DOC_TO_PDF_SERVICE_URL || null;
|
|
19
|
+
}
|
|
13
20
|
|
|
14
21
|
export class ConversionService {
|
|
15
22
|
constructor(context) {
|
|
@@ -229,20 +236,36 @@ export class ConversionService {
|
|
|
229
236
|
}
|
|
230
237
|
|
|
231
238
|
async _handleDocumentConversion(filePath, originalUrl, tempDir) {
|
|
232
|
-
|
|
239
|
+
// Default: Try PDF conversion if service is configured
|
|
240
|
+
const pdfServiceUrl = getDocToPdfUrl();
|
|
241
|
+
if (pdfServiceUrl) {
|
|
242
|
+
this.context.log("PDF service configured - converting to PDF");
|
|
243
|
+
try {
|
|
244
|
+
const pdfPath = await this._convertToPDF(filePath, tempDir);
|
|
245
|
+
if (pdfPath) {
|
|
246
|
+
return {
|
|
247
|
+
convertedPath: pdfPath,
|
|
248
|
+
convertedName: path.basename(pdfPath),
|
|
249
|
+
converted: true,
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
} catch (error) {
|
|
253
|
+
this.context.log("PDF conversion failed, falling back to markdown:", error.message);
|
|
254
|
+
}
|
|
255
|
+
} else {
|
|
256
|
+
this.context.log("PDF service not configured - using markdown conversion");
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Fallback to markdown if PDF service not configured or conversion fails
|
|
233
260
|
if (!originalUrl) {
|
|
234
261
|
throw new Error("Original URL is required for document conversion");
|
|
235
262
|
}
|
|
236
263
|
|
|
237
264
|
const markdown = await this._convertToMarkdown(originalUrl);
|
|
238
265
|
if (!markdown) {
|
|
239
|
-
throw new Error("Markdown conversion
|
|
266
|
+
throw new Error("Markdown conversion failed");
|
|
240
267
|
}
|
|
241
268
|
|
|
242
|
-
// Remove any query parameters from the file path before processing
|
|
243
|
-
const cleanFilePath = filePath.split("?")[0];
|
|
244
|
-
const ext = path.extname(cleanFilePath);
|
|
245
|
-
// Use LLM-friendly naming for temp files instead of original filename
|
|
246
269
|
const shortId = generateShortId();
|
|
247
270
|
const convertedPath = path.join(tempDir, `${shortId}.md`);
|
|
248
271
|
await fs.writeFile(convertedPath, markdown);
|
|
@@ -254,9 +277,59 @@ export class ConversionService {
|
|
|
254
277
|
};
|
|
255
278
|
}
|
|
256
279
|
|
|
280
|
+
/**
|
|
281
|
+
* Convert document to PDF using streaming upload
|
|
282
|
+
* @param {string} filePath - Local path to file
|
|
283
|
+
* @param {string} tempDir - Temporary directory for output
|
|
284
|
+
* @returns {Promise<string>} - Path to converted PDF
|
|
285
|
+
*/
|
|
286
|
+
async _convertToPDF(filePath, tempDir) {
|
|
287
|
+
try {
|
|
288
|
+
const pdfServiceUrl = getDocToPdfUrl();
|
|
289
|
+
if (!pdfServiceUrl) {
|
|
290
|
+
throw new Error("DOC_TO_PDF_SERVICE_URL is not configured");
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
this.context.log("Converting to PDF via service:", pdfServiceUrl);
|
|
294
|
+
|
|
295
|
+
// Create form data with file stream
|
|
296
|
+
const form = new FormData();
|
|
297
|
+
form.append('file', createReadStream(filePath), path.basename(filePath));
|
|
298
|
+
|
|
299
|
+
// Upload with streaming
|
|
300
|
+
const response = await axios({
|
|
301
|
+
method: 'POST',
|
|
302
|
+
url: pdfServiceUrl,
|
|
303
|
+
data: form,
|
|
304
|
+
headers: form.getHeaders(),
|
|
305
|
+
responseType: 'stream',
|
|
306
|
+
maxContentLength: Infinity,
|
|
307
|
+
maxBodyLength: Infinity,
|
|
308
|
+
timeout: 60000, // 60 second timeout
|
|
309
|
+
});
|
|
310
|
+
|
|
311
|
+
// Stream PDF to temp file using original filename with .pdf extension
|
|
312
|
+
const originalBase = path.basename(filePath);
|
|
313
|
+
const baseWithoutExt = originalBase.includes('.')
|
|
314
|
+
? originalBase.replace(/\.[^/.]+$/, '')
|
|
315
|
+
: originalBase;
|
|
316
|
+
const pdfPath = path.join(tempDir, `${baseWithoutExt}.pdf`);
|
|
317
|
+
const writer = createWriteStream(pdfPath);
|
|
318
|
+
|
|
319
|
+
await pipeline(response.data, writer);
|
|
320
|
+
|
|
321
|
+
this.context.log("PDF conversion successful:", pdfPath);
|
|
322
|
+
return pdfPath;
|
|
323
|
+
|
|
324
|
+
} catch (error) {
|
|
325
|
+
this.context.log("PDF conversion error:", error.message);
|
|
326
|
+
throw error;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
257
330
|
async _convertToMarkdown(fileUrl) {
|
|
258
331
|
try {
|
|
259
|
-
const markitdownUrl =
|
|
332
|
+
const markitdownUrl = getMarkitdownUrl();
|
|
260
333
|
if (!markitdownUrl) {
|
|
261
334
|
throw new Error("MARKITDOWN_CONVERT_URL is not set");
|
|
262
335
|
}
|
|
@@ -3,6 +3,7 @@ import { dirname, join } from "path";
|
|
|
3
3
|
import { fileURLToPath } from "url";
|
|
4
4
|
import test from "ava";
|
|
5
5
|
import axios from "axios";
|
|
6
|
+
import nock from "nock";
|
|
6
7
|
import XLSX from "xlsx";
|
|
7
8
|
import { FileConversionService } from "../src/services/FileConversionService.js";
|
|
8
9
|
|
|
@@ -92,8 +93,11 @@ test("converts Excel to CSV successfully", async (t) => {
|
|
|
92
93
|
test("converts document to markdown via MarkItDown API", async (t) => {
|
|
93
94
|
// Set the environment variable for the test
|
|
94
95
|
const originalEnv = process.env.MARKITDOWN_CONVERT_URL;
|
|
96
|
+
const originalPdfEnv = process.env.DOC_TO_PDF_SERVICE_URL;
|
|
97
|
+
// Ensure PDF path is NOT used in this test
|
|
98
|
+
delete process.env.DOC_TO_PDF_SERVICE_URL;
|
|
95
99
|
process.env.MARKITDOWN_CONVERT_URL = "http://localhost:8080/convert?url=";
|
|
96
|
-
|
|
100
|
+
|
|
97
101
|
// Mock axios.get for MarkItDown API
|
|
98
102
|
const originalAxiosGet = axios.get;
|
|
99
103
|
axios.get = async (url) => {
|
|
@@ -129,6 +133,49 @@ test("converts document to markdown via MarkItDown API", async (t) => {
|
|
|
129
133
|
} else {
|
|
130
134
|
delete process.env.MARKITDOWN_CONVERT_URL;
|
|
131
135
|
}
|
|
136
|
+
if (originalPdfEnv) {
|
|
137
|
+
process.env.DOC_TO_PDF_SERVICE_URL = originalPdfEnv;
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
// Test document conversion with external PDF service
|
|
142
|
+
test("converts document to PDF via external service", async (t) => {
|
|
143
|
+
const originalPdfEnv = process.env.DOC_TO_PDF_SERVICE_URL;
|
|
144
|
+
const originalMdEnv = process.env.MARKITDOWN_CONVERT_URL;
|
|
145
|
+
// Prefer PDF path in this test
|
|
146
|
+
delete process.env.MARKITDOWN_CONVERT_URL;
|
|
147
|
+
process.env.DOC_TO_PDF_SERVICE_URL = "http://pdf.test/convert";
|
|
148
|
+
|
|
149
|
+
// Mock the external PDF service
|
|
150
|
+
const pdfBody = Buffer.from("%PDF-1.4\n%\u00E2\u00E3\u00CF\u00D3\n1 0 obj<<>>endobj\ntrailer<<>>\n%%EOF\n", "utf-8");
|
|
151
|
+
const scope = nock("http://pdf.test").post("/convert").reply(200, pdfBody, {
|
|
152
|
+
"Content-Type": "application/pdf",
|
|
153
|
+
"Content-Length": String(pdfBody.length),
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
const service = new FileConversionService(mockContext);
|
|
157
|
+
// Create a real local test file to stream to the PDF service
|
|
158
|
+
const docPath = join(t.context.testDir, "test.docx");
|
|
159
|
+
await fs.writeFile(docPath, "Dummy DOCX content for PDF test");
|
|
160
|
+
const result = await service.convertFile(docPath, "https://example.com/test.docx");
|
|
161
|
+
|
|
162
|
+
t.true(result.converted);
|
|
163
|
+
t.true(result.convertedPath.endsWith(".pdf"));
|
|
164
|
+
|
|
165
|
+
const content = await fs.readFile(result.convertedPath);
|
|
166
|
+
t.is(content.slice(0, 4).toString(), "%PDF");
|
|
167
|
+
t.true(scope.isDone());
|
|
168
|
+
|
|
169
|
+
// Restore env
|
|
170
|
+
if (originalPdfEnv) {
|
|
171
|
+
process.env.DOC_TO_PDF_SERVICE_URL = originalPdfEnv;
|
|
172
|
+
} else {
|
|
173
|
+
delete process.env.DOC_TO_PDF_SERVICE_URL;
|
|
174
|
+
}
|
|
175
|
+
if (originalMdEnv) {
|
|
176
|
+
process.env.MARKITDOWN_CONVERT_URL = originalMdEnv;
|
|
177
|
+
}
|
|
178
|
+
nock.cleanAll();
|
|
132
179
|
});
|
|
133
180
|
|
|
134
181
|
// Test error handling for missing original URL
|
|
@@ -158,11 +205,11 @@ test("correctly detects file extensions", (t) => {
|
|
|
158
205
|
// Test _saveConvertedFile method signature and container parameter handling
|
|
159
206
|
test("_saveConvertedFile accepts container parameter", async (t) => {
|
|
160
207
|
const service = new FileConversionService(mockContext, false); // Use local storage for testing
|
|
161
|
-
|
|
208
|
+
|
|
162
209
|
// Create a test file
|
|
163
210
|
const testFile = join(t.context.testDir, "container-param-test.txt");
|
|
164
211
|
await fs.writeFile(testFile, "Test content for container parameter");
|
|
165
|
-
|
|
212
|
+
|
|
166
213
|
// Test that the method accepts all parameters without throwing
|
|
167
214
|
const result = await service._saveConvertedFile(
|
|
168
215
|
testFile,
|
|
@@ -170,7 +217,7 @@ test("_saveConvertedFile accepts container parameter", async (t) => {
|
|
|
170
217
|
"test-filename.txt",
|
|
171
218
|
"test-container"
|
|
172
219
|
);
|
|
173
|
-
|
|
220
|
+
|
|
174
221
|
t.truthy(result);
|
|
175
222
|
t.truthy(result.url);
|
|
176
223
|
t.true(typeof result.url === 'string');
|
|
@@ -179,20 +226,20 @@ test("_saveConvertedFile accepts container parameter", async (t) => {
|
|
|
179
226
|
// Test ensureConvertedVersion method signature with container parameter
|
|
180
227
|
test("ensureConvertedVersion accepts container parameter", async (t) => {
|
|
181
228
|
const service = new FileConversionService(mockContext, false);
|
|
182
|
-
|
|
229
|
+
|
|
183
230
|
// Mock file info object
|
|
184
231
|
const fileInfo = {
|
|
185
232
|
url: "http://example.com/test.txt", // Non-convertible file
|
|
186
233
|
gcs: "gs://bucket/test.txt"
|
|
187
234
|
};
|
|
188
|
-
|
|
235
|
+
|
|
189
236
|
// Test that the method accepts container parameter without throwing
|
|
190
237
|
const result = await service.ensureConvertedVersion(
|
|
191
238
|
fileInfo,
|
|
192
239
|
"test-request-id",
|
|
193
240
|
"test-container"
|
|
194
241
|
);
|
|
195
|
-
|
|
242
|
+
|
|
196
243
|
t.truthy(result);
|
|
197
244
|
t.is(result.url, fileInfo.url); // Should return original for non-convertible file
|
|
198
245
|
});
|