@aj-archipelago/cortex 1.3.67 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/config.js +27 -0
  2. package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/__init__.py +3 -0
  3. package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/function.json +20 -0
  4. package/helper-apps/cortex-doc-to-pdf/Dockerfile +46 -0
  5. package/helper-apps/cortex-doc-to-pdf/README.md +408 -0
  6. package/helper-apps/cortex-doc-to-pdf/converter.py +157 -0
  7. package/helper-apps/cortex-doc-to-pdf/docker-compose.yml +23 -0
  8. package/helper-apps/cortex-doc-to-pdf/document_converter.py +181 -0
  9. package/helper-apps/cortex-doc-to-pdf/examples/README.md +252 -0
  10. package/helper-apps/cortex-doc-to-pdf/examples/nodejs-client.js +266 -0
  11. package/helper-apps/cortex-doc-to-pdf/examples/package-lock.json +297 -0
  12. package/helper-apps/cortex-doc-to-pdf/examples/package.json +23 -0
  13. package/helper-apps/cortex-doc-to-pdf/function_app.py +85 -0
  14. package/helper-apps/cortex-doc-to-pdf/host.json +16 -0
  15. package/helper-apps/cortex-doc-to-pdf/request_handlers.py +193 -0
  16. package/helper-apps/cortex-doc-to-pdf/requirements.txt +3 -0
  17. package/helper-apps/cortex-doc-to-pdf/tests/run_tests.sh +26 -0
  18. package/helper-apps/cortex-doc-to-pdf/tests/test_conversion.py +320 -0
  19. package/helper-apps/cortex-doc-to-pdf/tests/test_streaming.py +419 -0
  20. package/helper-apps/cortex-file-handler/package-lock.json +1 -0
  21. package/helper-apps/cortex-file-handler/package.json +1 -0
  22. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +81 -8
  23. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +54 -7
  24. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +19 -7
  25. package/lib/encodeCache.js +5 -0
  26. package/lib/keyValueStorageClient.js +5 -0
  27. package/lib/logger.js +1 -1
  28. package/lib/pathwayTools.js +8 -1
  29. package/lib/redisSubscription.js +6 -0
  30. package/lib/requestExecutor.js +4 -0
  31. package/lib/util.js +88 -0
  32. package/package.json +1 -1
  33. package/pathways/basePathway.js +3 -3
  34. package/pathways/bing_afagent.js +1 -0
  35. package/pathways/gemini_15_vision.js +1 -1
  36. package/pathways/google_cse.js +2 -2
  37. package/pathways/image_gemini_25.js +85 -0
  38. package/pathways/image_prompt_optimizer_gemini_25.js +149 -0
  39. package/pathways/image_qwen.js +28 -0
  40. package/pathways/image_seedream4.js +26 -0
  41. package/pathways/rag.js +1 -1
  42. package/pathways/rag_jarvis.js +1 -1
  43. package/pathways/system/entity/sys_entity_continue.js +1 -1
  44. package/pathways/system/entity/sys_generator_results.js +1 -1
  45. package/pathways/system/entity/tools/sys_tool_google_search.js +15 -2
  46. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +3 -3
  47. package/pathways/system/entity/tools/sys_tool_image.js +28 -23
  48. package/pathways/system/entity/tools/sys_tool_image_gemini.js +135 -0
  49. package/server/graphql.js +9 -2
  50. package/server/modelExecutor.js +4 -0
  51. package/server/pathwayResolver.js +19 -18
  52. package/server/plugins/claude3VertexPlugin.js +13 -8
  53. package/server/plugins/gemini15ChatPlugin.js +15 -10
  54. package/server/plugins/gemini15VisionPlugin.js +2 -23
  55. package/server/plugins/gemini25ImagePlugin.js +155 -0
  56. package/server/plugins/modelPlugin.js +3 -2
  57. package/server/plugins/openAiChatPlugin.js +6 -6
  58. package/server/plugins/replicateApiPlugin.js +268 -12
  59. package/server/plugins/veoVideoPlugin.js +15 -1
  60. package/server/rest.js +2 -0
  61. package/server/typeDef.js +96 -10
  62. package/tests/integration/apptekTranslatePlugin.integration.test.js +1 -1
  63. package/tests/unit/core/pathwayManager.test.js +2 -4
  64. package/tests/unit/plugins/gemini25ImagePlugin.test.js +294 -0
@@ -0,0 +1,419 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ End-to-end tests for streaming document upload and PDF download.
4
+ Tests both file upload and URI-based conversion with streaming responses.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import time
10
+ import requests
11
+ from pathlib import Path
12
+ import PyPDF2
13
+
14
+ # Test configuration
15
+ SAMPLES_DIR = Path(__file__).parent.parent / "samples"
16
+ OUTPUT_DIR = Path(__file__).parent.parent / "test_streaming_output"
17
+ OUTPUT_DIR.mkdir(exist_ok=True)
18
+
19
+ BASE_URL = os.getenv("TEST_URL", "http://localhost:8080")
20
+ CONVERT_ENDPOINT = f"{BASE_URL}/convert"
21
+
22
+ class Colors:
23
+ """ANSI color codes"""
24
+ GREEN = '\033[92m'
25
+ RED = '\033[91m'
26
+ YELLOW = '\033[93m'
27
+ BLUE = '\033[94m'
28
+ RESET = '\033[0m'
29
+ BOLD = '\033[1m'
30
+
31
+ def print_success(msg):
32
+ print(f"{Colors.GREEN}✓{Colors.RESET} {msg}")
33
+
34
+ def print_error(msg):
35
+ print(f"{Colors.RED}✗{Colors.RESET} {msg}")
36
+
37
+ def print_info(msg):
38
+ print(f"{Colors.BLUE}ℹ{Colors.RESET} {msg}")
39
+
40
+ def print_warning(msg):
41
+ print(f"{Colors.YELLOW}⚠{Colors.RESET} {msg}")
42
+
43
+ def print_header(msg):
44
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
45
+ print(f"{Colors.BOLD}{msg}{Colors.RESET}")
46
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
47
+
48
+ def verify_pdf(pdf_path):
49
+ """Verify that a file is a valid PDF."""
50
+ try:
51
+ with open(pdf_path, 'rb') as f:
52
+ pdf_reader = PyPDF2.PdfReader(f)
53
+ return {
54
+ 'valid': True,
55
+ 'pages': len(pdf_reader.pages),
56
+ 'size': os.path.getsize(pdf_path)
57
+ }
58
+ except Exception as e:
59
+ return {
60
+ 'valid': False,
61
+ 'error': str(e)
62
+ }
63
+
64
+ def test_file_upload_streaming(file_path, test_name):
65
+ """Test streaming file upload and response."""
66
+ print(f"\n{Colors.BOLD}Test: {test_name}{Colors.RESET}")
67
+ print(f" File: {file_path.name}")
68
+ print(f" Size: {os.path.getsize(file_path):,} bytes")
69
+
70
+ output_path = OUTPUT_DIR / f"{file_path.stem}_uploaded.pdf"
71
+
72
+ start_time = time.time()
73
+
74
+ try:
75
+ # Open file and stream it
76
+ with open(file_path, 'rb') as f:
77
+ files = {'file': (file_path.name, f, 'application/octet-stream')}
78
+
79
+ print_info("Uploading file...")
80
+ response = requests.post(
81
+ CONVERT_ENDPOINT,
82
+ files=files,
83
+ stream=True # Enable streaming response
84
+ )
85
+
86
+ elapsed = time.time() - start_time
87
+
88
+ if response.status_code != 200:
89
+ print_error(f"HTTP {response.status_code}: {response.text[:200]}")
90
+ return False
91
+
92
+ # Stream response to file
93
+ print_info(f"Streaming PDF response...")
94
+ bytes_received = 0
95
+ with open(output_path, 'wb') as f:
96
+ for chunk in response.iter_content(chunk_size=8192):
97
+ if chunk:
98
+ f.write(chunk)
99
+ bytes_received += len(chunk)
100
+
101
+ print_success(f"Completed in {elapsed:.2f}s")
102
+ print_info(f"Downloaded: {bytes_received:,} bytes")
103
+
104
+ # Verify PDF
105
+ pdf_info = verify_pdf(output_path)
106
+ if not pdf_info['valid']:
107
+ print_error(f"Invalid PDF: {pdf_info.get('error')}")
108
+ return False
109
+
110
+ print_success(f"Valid PDF: {pdf_info['pages']} pages, {pdf_info['size']:,} bytes")
111
+ return True
112
+
113
+ except requests.exceptions.ConnectionError:
114
+ print_error("Connection failed - is the service running?")
115
+ return False
116
+ except Exception as e:
117
+ print_error(f"Test failed: {e}")
118
+ import traceback
119
+ traceback.print_exc()
120
+ return False
121
+
122
+ def test_uri_streaming(uri, filename, test_name):
123
+ """Test URI-based conversion with streaming response."""
124
+ print(f"\n{Colors.BOLD}Test: {test_name}{Colors.RESET}")
125
+ print(f" URI: {uri[:80]}...")
126
+
127
+ output_path = OUTPUT_DIR / filename
128
+
129
+ start_time = time.time()
130
+
131
+ try:
132
+ print_info("Requesting conversion...")
133
+ response = requests.post(
134
+ CONVERT_ENDPOINT,
135
+ json={'uri': uri},
136
+ stream=True # Enable streaming response
137
+ )
138
+
139
+ elapsed = time.time() - start_time
140
+
141
+ if response.status_code != 200:
142
+ print_error(f"HTTP {response.status_code}: {response.text[:200]}")
143
+ return False
144
+
145
+ # Stream response to file
146
+ print_info(f"Streaming PDF response...")
147
+ bytes_received = 0
148
+ with open(output_path, 'wb') as f:
149
+ for chunk in response.iter_content(chunk_size=8192):
150
+ if chunk:
151
+ f.write(chunk)
152
+ bytes_received += len(chunk)
153
+
154
+ print_success(f"Completed in {elapsed:.2f}s")
155
+ print_info(f"Downloaded: {bytes_received:,} bytes")
156
+
157
+ # Verify PDF
158
+ pdf_info = verify_pdf(output_path)
159
+ if not pdf_info['valid']:
160
+ print_error(f"Invalid PDF: {pdf_info.get('error')}")
161
+ return False
162
+
163
+ print_success(f"Valid PDF: {pdf_info['pages']} pages, {pdf_info['size']:,} bytes")
164
+ return True
165
+
166
+ except requests.exceptions.ConnectionError:
167
+ print_error("Connection failed - is the service running?")
168
+ return False
169
+ except Exception as e:
170
+ print_error(f"Test failed: {e}")
171
+ import traceback
172
+ traceback.print_exc()
173
+ return False
174
+
175
+ def test_large_file_streaming():
176
+ """Test streaming with larger files."""
177
+ print_header("Large File Streaming Tests")
178
+
179
+ results = []
180
+
181
+ # Test with Excel file (usually larger)
182
+ excel_file = SAMPLES_DIR / "file_example_XLSX_5000.xlsx"
183
+ if excel_file.exists():
184
+ results.append(test_file_upload_streaming(
185
+ excel_file,
186
+ "Large Excel File Upload"
187
+ ))
188
+
189
+ # Test with PowerPoint
190
+ ppt_file = SAMPLES_DIR / "file_example_PPT_1MB.ppt"
191
+ if ppt_file.exists():
192
+ results.append(test_file_upload_streaming(
193
+ ppt_file,
194
+ "Large PowerPoint Upload"
195
+ ))
196
+
197
+ return results
198
+
199
+ def test_various_formats():
200
+ """Test streaming with various document formats."""
201
+ print_header("Various Format Streaming Tests")
202
+
203
+ results = []
204
+ test_files = [
205
+ ("file-sample_1MB.docx", "Word Document"),
206
+ ("ascii-art.txt", "Text File"),
207
+ ("sample1.html", "HTML File"),
208
+ ]
209
+
210
+ for filename, description in test_files:
211
+ file_path = SAMPLES_DIR / filename
212
+ if file_path.exists():
213
+ results.append(test_file_upload_streaming(
214
+ file_path,
215
+ f"{description} Upload"
216
+ ))
217
+ else:
218
+ print_warning(f"Skipping {filename} - not found")
219
+
220
+ return results
221
+
222
+ def test_uri_based_streaming():
223
+ """Test URI-based conversion with streaming."""
224
+ print_header("URI-Based Streaming Tests")
225
+
226
+ results = []
227
+
228
+ # Test with a public document
229
+ results.append(test_uri_streaming(
230
+ "https://file-examples.com/storage/fe783f04fc66761fd44fb46/2017/02/file-sample_100kB.docx",
231
+ "public_word_doc.pdf",
232
+ "Public Word Document via URI"
233
+ ))
234
+
235
+ return results
236
+
237
+ def test_concurrent_uploads():
238
+ """Test multiple concurrent streaming uploads."""
239
+ print_header("Concurrent Streaming Tests")
240
+
241
+ from concurrent.futures import ThreadPoolExecutor, as_completed
242
+
243
+ test_files = [
244
+ SAMPLES_DIR / "data.txt",
245
+ SAMPLES_DIR / "multilang.txt",
246
+ SAMPLES_DIR / "sample2.html",
247
+ ]
248
+
249
+ # Filter existing files
250
+ test_files = [f for f in test_files if f.exists()]
251
+
252
+ if not test_files:
253
+ print_warning("No test files available for concurrent test")
254
+ return [True]
255
+
256
+ print_info(f"Testing {len(test_files)} concurrent uploads...")
257
+ start_time = time.time()
258
+
259
+ results = []
260
+ with ThreadPoolExecutor(max_workers=3) as executor:
261
+ futures = {
262
+ executor.submit(
263
+ test_file_upload_streaming,
264
+ file_path,
265
+ f"Concurrent Upload {i+1}"
266
+ ): file_path
267
+ for i, file_path in enumerate(test_files)
268
+ }
269
+
270
+ for future in as_completed(futures):
271
+ file_path = futures[future]
272
+ try:
273
+ result = future.result()
274
+ results.append(result)
275
+ except Exception as e:
276
+ print_error(f"Concurrent test failed for {file_path.name}: {e}")
277
+ results.append(False)
278
+
279
+ elapsed = time.time() - start_time
280
+ print_success(f"All concurrent uploads completed in {elapsed:.2f}s")
281
+
282
+ return results
283
+
284
+ def test_error_handling():
285
+ """Test error handling in streaming."""
286
+ print_header("Error Handling Tests")
287
+
288
+ results = []
289
+
290
+ # Test 1: Invalid file type
291
+ print(f"\n{Colors.BOLD}Test: Invalid File Type{Colors.RESET}")
292
+ try:
293
+ # Create a fake file with unsupported extension
294
+ fake_file = OUTPUT_DIR / "test.xyz"
295
+ fake_file.write_text("test content")
296
+
297
+ with open(fake_file, 'rb') as f:
298
+ files = {'file': ('test.xyz', f, 'application/octet-stream')}
299
+ response = requests.post(CONVERT_ENDPOINT, files=files)
300
+
301
+ if response.status_code == 400:
302
+ print_success("Correctly rejected unsupported file type")
303
+ results.append(True)
304
+ else:
305
+ print_error(f"Expected 400, got {response.status_code}")
306
+ results.append(False)
307
+
308
+ fake_file.unlink()
309
+ except Exception as e:
310
+ print_error(f"Error test failed: {e}")
311
+ results.append(False)
312
+
313
+ # Test 2: Empty file
314
+ print(f"\n{Colors.BOLD}Test: Empty File Upload{Colors.RESET}")
315
+ try:
316
+ empty_file = OUTPUT_DIR / "empty.txt"
317
+ empty_file.write_text("")
318
+
319
+ with open(empty_file, 'rb') as f:
320
+ files = {'file': ('empty.txt', f, 'application/octet-stream')}
321
+ response = requests.post(CONVERT_ENDPOINT, files=files)
322
+
323
+ # Should handle gracefully (either convert or error)
324
+ if response.status_code in [200, 400, 500]:
325
+ print_success(f"Handled empty file gracefully (status: {response.status_code})")
326
+ results.append(True)
327
+ else:
328
+ print_error(f"Unexpected status: {response.status_code}")
329
+ results.append(False)
330
+
331
+ empty_file.unlink()
332
+ except Exception as e:
333
+ print_error(f"Empty file test failed: {e}")
334
+ results.append(False)
335
+
336
+ # Test 3: Invalid URI
337
+ print(f"\n{Colors.BOLD}Test: Invalid URI{Colors.RESET}")
338
+ try:
339
+ response = requests.post(
340
+ CONVERT_ENDPOINT,
341
+ json={'uri': 'not-a-valid-url'}
342
+ )
343
+
344
+ if response.status_code == 400:
345
+ print_success("Correctly rejected invalid URI")
346
+ results.append(True)
347
+ else:
348
+ print_error(f"Expected 400, got {response.status_code}")
349
+ results.append(False)
350
+ except Exception as e:
351
+ print_error(f"Invalid URI test failed: {e}")
352
+ results.append(False)
353
+
354
+ return results
355
+
356
+ def check_service_health():
357
+ """Check if the service is running."""
358
+ try:
359
+ response = requests.get(f"{BASE_URL}/health", timeout=5)
360
+ if response.status_code == 200:
361
+ print_success(f"Service is running at {BASE_URL}")
362
+ return True
363
+ else:
364
+ print_error(f"Service returned status {response.status_code}")
365
+ return False
366
+ except requests.exceptions.ConnectionError:
367
+ print_error(f"Cannot connect to service at {BASE_URL}")
368
+ print_info("Make sure the service is running: docker compose up -d")
369
+ return False
370
+ except Exception as e:
371
+ print_error(f"Health check failed: {e}")
372
+ return False
373
+
374
+ def main():
375
+ """Run all streaming tests."""
376
+ print_header("Document to PDF Streaming Tests")
377
+ print(f"Test URL: {BASE_URL}")
378
+ print(f"Samples: {SAMPLES_DIR}")
379
+ print(f"Output: {OUTPUT_DIR}")
380
+
381
+ # Check service health
382
+ if not check_service_health():
383
+ return 1
384
+
385
+ # Run all test suites
386
+ all_results = []
387
+
388
+ all_results.extend(test_various_formats())
389
+ all_results.extend(test_large_file_streaming())
390
+ all_results.extend(test_uri_based_streaming())
391
+ all_results.extend(test_concurrent_uploads())
392
+ all_results.extend(test_error_handling())
393
+
394
+ # Print summary
395
+ print_header("Test Summary")
396
+
397
+ passed = sum(1 for r in all_results if r)
398
+ failed = sum(1 for r in all_results if not r)
399
+ total = len(all_results)
400
+
401
+ print(f"\nTotal tests: {total}")
402
+ print_success(f"Passed: {passed}")
403
+ if failed > 0:
404
+ print_error(f"Failed: {failed}")
405
+
406
+ success_rate = (passed / total * 100) if total > 0 else 0
407
+ print(f"\nSuccess rate: {success_rate:.1f}%")
408
+
409
+ print(f"\n{Colors.BLUE}All output PDFs saved to: {OUTPUT_DIR}{Colors.RESET}")
410
+
411
+ if failed == 0:
412
+ print(f"\n{Colors.GREEN}{Colors.BOLD}🎉 All streaming tests passed!{Colors.RESET}")
413
+ return 0
414
+ else:
415
+ print(f"\n{Colors.RED}{Colors.BOLD}❌ Some tests failed{Colors.RESET}")
416
+ return 1
417
+
418
+ if __name__ == "__main__":
419
+ sys.exit(main())
@@ -16,6 +16,7 @@
16
16
  "cors": "^2.8.5",
17
17
  "express": "^4.21.1",
18
18
  "fluent-ffmpeg": "^2.1.3",
19
+ "form-data": "^4.0.0",
19
20
  "ioredis": "^5.3.1",
20
21
  "mime-types": "^3.0.1",
21
22
  "papaparse": "^5.4.1",
@@ -21,6 +21,7 @@
21
21
  "cors": "^2.8.5",
22
22
  "express": "^4.21.1",
23
23
  "fluent-ffmpeg": "^2.1.3",
24
+ "form-data": "^4.0.0",
24
25
  "ioredis": "^5.3.1",
25
26
  "mime-types": "^3.0.1",
26
27
  "papaparse": "^5.4.1",
@@ -4,12 +4,19 @@ import path from "path";
4
4
  import { createReadStream, createWriteStream } from "fs";
5
5
  import { pipeline } from "stream/promises";
6
6
  import axios from "axios";
7
+ import FormData from "form-data";
7
8
  import XLSX from "xlsx";
8
9
  import { CONVERTED_EXTENSIONS } from "../constants.js";
9
10
  import { v4 as uuidv4 } from "uuid";
10
11
  import { sanitizeFilename, generateShortId } from "../utils/filenameUtils.js";
11
12
 
12
- const MARKITDOWN_CONVERT_URL = process.env.MARKITDOWN_CONVERT_URL || null;
13
+ // Read service URLs at call time to allow tests to mutate process.env
14
+ function getMarkitdownUrl() {
15
+ return process.env.MARKITDOWN_CONVERT_URL || null;
16
+ }
17
+ function getDocToPdfUrl() {
18
+ return process.env.DOC_TO_PDF_SERVICE_URL || null;
19
+ }
13
20
 
14
21
  export class ConversionService {
15
22
  constructor(context) {
@@ -229,20 +236,36 @@ export class ConversionService {
229
236
  }
230
237
 
231
238
  async _handleDocumentConversion(filePath, originalUrl, tempDir) {
232
- this.context.log("Handling document conversion");
239
+ // Default: Try PDF conversion if service is configured
240
+ const pdfServiceUrl = getDocToPdfUrl();
241
+ if (pdfServiceUrl) {
242
+ this.context.log("PDF service configured - converting to PDF");
243
+ try {
244
+ const pdfPath = await this._convertToPDF(filePath, tempDir);
245
+ if (pdfPath) {
246
+ return {
247
+ convertedPath: pdfPath,
248
+ convertedName: path.basename(pdfPath),
249
+ converted: true,
250
+ };
251
+ }
252
+ } catch (error) {
253
+ this.context.log("PDF conversion failed, falling back to markdown:", error.message);
254
+ }
255
+ } else {
256
+ this.context.log("PDF service not configured - using markdown conversion");
257
+ }
258
+
259
+ // Fallback to markdown if PDF service not configured or conversion fails
233
260
  if (!originalUrl) {
234
261
  throw new Error("Original URL is required for document conversion");
235
262
  }
236
263
 
237
264
  const markdown = await this._convertToMarkdown(originalUrl);
238
265
  if (!markdown) {
239
- throw new Error("Markdown conversion returned empty result");
266
+ throw new Error("Markdown conversion failed");
240
267
  }
241
268
 
242
- // Remove any query parameters from the file path before processing
243
- const cleanFilePath = filePath.split("?")[0];
244
- const ext = path.extname(cleanFilePath);
245
- // Use LLM-friendly naming for temp files instead of original filename
246
269
  const shortId = generateShortId();
247
270
  const convertedPath = path.join(tempDir, `${shortId}.md`);
248
271
  await fs.writeFile(convertedPath, markdown);
@@ -254,9 +277,59 @@ export class ConversionService {
254
277
  };
255
278
  }
256
279
 
280
+ /**
281
+ * Convert document to PDF using streaming upload
282
+ * @param {string} filePath - Local path to file
283
+ * @param {string} tempDir - Temporary directory for output
284
+ * @returns {Promise<string>} - Path to converted PDF
285
+ */
286
+ async _convertToPDF(filePath, tempDir) {
287
+ try {
288
+ const pdfServiceUrl = getDocToPdfUrl();
289
+ if (!pdfServiceUrl) {
290
+ throw new Error("DOC_TO_PDF_SERVICE_URL is not configured");
291
+ }
292
+
293
+ this.context.log("Converting to PDF via service:", pdfServiceUrl);
294
+
295
+ // Create form data with file stream
296
+ const form = new FormData();
297
+ form.append('file', createReadStream(filePath), path.basename(filePath));
298
+
299
+ // Upload with streaming
300
+ const response = await axios({
301
+ method: 'POST',
302
+ url: pdfServiceUrl,
303
+ data: form,
304
+ headers: form.getHeaders(),
305
+ responseType: 'stream',
306
+ maxContentLength: Infinity,
307
+ maxBodyLength: Infinity,
308
+ timeout: 60000, // 60 second timeout
309
+ });
310
+
311
+ // Stream PDF to temp file using original filename with .pdf extension
312
+ const originalBase = path.basename(filePath);
313
+ const baseWithoutExt = originalBase.includes('.')
314
+ ? originalBase.replace(/\.[^/.]+$/, '')
315
+ : originalBase;
316
+ const pdfPath = path.join(tempDir, `${baseWithoutExt}.pdf`);
317
+ const writer = createWriteStream(pdfPath);
318
+
319
+ await pipeline(response.data, writer);
320
+
321
+ this.context.log("PDF conversion successful:", pdfPath);
322
+ return pdfPath;
323
+
324
+ } catch (error) {
325
+ this.context.log("PDF conversion error:", error.message);
326
+ throw error;
327
+ }
328
+ }
329
+
257
330
  async _convertToMarkdown(fileUrl) {
258
331
  try {
259
- const markitdownUrl = process.env.MARKITDOWN_CONVERT_URL;
332
+ const markitdownUrl = getMarkitdownUrl();
260
333
  if (!markitdownUrl) {
261
334
  throw new Error("MARKITDOWN_CONVERT_URL is not set");
262
335
  }
@@ -3,6 +3,7 @@ import { dirname, join } from "path";
3
3
  import { fileURLToPath } from "url";
4
4
  import test from "ava";
5
5
  import axios from "axios";
6
+ import nock from "nock";
6
7
  import XLSX from "xlsx";
7
8
  import { FileConversionService } from "../src/services/FileConversionService.js";
8
9
 
@@ -92,8 +93,11 @@ test("converts Excel to CSV successfully", async (t) => {
92
93
  test("converts document to markdown via MarkItDown API", async (t) => {
93
94
  // Set the environment variable for the test
94
95
  const originalEnv = process.env.MARKITDOWN_CONVERT_URL;
96
+ const originalPdfEnv = process.env.DOC_TO_PDF_SERVICE_URL;
97
+ // Ensure PDF path is NOT used in this test
98
+ delete process.env.DOC_TO_PDF_SERVICE_URL;
95
99
  process.env.MARKITDOWN_CONVERT_URL = "http://localhost:8080/convert?url=";
96
-
100
+
97
101
  // Mock axios.get for MarkItDown API
98
102
  const originalAxiosGet = axios.get;
99
103
  axios.get = async (url) => {
@@ -129,6 +133,49 @@ test("converts document to markdown via MarkItDown API", async (t) => {
129
133
  } else {
130
134
  delete process.env.MARKITDOWN_CONVERT_URL;
131
135
  }
136
+ if (originalPdfEnv) {
137
+ process.env.DOC_TO_PDF_SERVICE_URL = originalPdfEnv;
138
+ }
139
+ });
140
+
141
+ // Test document conversion with external PDF service
142
+ test("converts document to PDF via external service", async (t) => {
143
+ const originalPdfEnv = process.env.DOC_TO_PDF_SERVICE_URL;
144
+ const originalMdEnv = process.env.MARKITDOWN_CONVERT_URL;
145
+ // Prefer PDF path in this test
146
+ delete process.env.MARKITDOWN_CONVERT_URL;
147
+ process.env.DOC_TO_PDF_SERVICE_URL = "http://pdf.test/convert";
148
+
149
+ // Mock the external PDF service
150
+ const pdfBody = Buffer.from("%PDF-1.4\n%\u00E2\u00E3\u00CF\u00D3\n1 0 obj<<>>endobj\ntrailer<<>>\n%%EOF\n", "utf-8");
151
+ const scope = nock("http://pdf.test").post("/convert").reply(200, pdfBody, {
152
+ "Content-Type": "application/pdf",
153
+ "Content-Length": String(pdfBody.length),
154
+ });
155
+
156
+ const service = new FileConversionService(mockContext);
157
+ // Create a real local test file to stream to the PDF service
158
+ const docPath = join(t.context.testDir, "test.docx");
159
+ await fs.writeFile(docPath, "Dummy DOCX content for PDF test");
160
+ const result = await service.convertFile(docPath, "https://example.com/test.docx");
161
+
162
+ t.true(result.converted);
163
+ t.true(result.convertedPath.endsWith(".pdf"));
164
+
165
+ const content = await fs.readFile(result.convertedPath);
166
+ t.is(content.slice(0, 4).toString(), "%PDF");
167
+ t.true(scope.isDone());
168
+
169
+ // Restore env
170
+ if (originalPdfEnv) {
171
+ process.env.DOC_TO_PDF_SERVICE_URL = originalPdfEnv;
172
+ } else {
173
+ delete process.env.DOC_TO_PDF_SERVICE_URL;
174
+ }
175
+ if (originalMdEnv) {
176
+ process.env.MARKITDOWN_CONVERT_URL = originalMdEnv;
177
+ }
178
+ nock.cleanAll();
132
179
  });
133
180
 
134
181
  // Test error handling for missing original URL
@@ -158,11 +205,11 @@ test("correctly detects file extensions", (t) => {
158
205
  // Test _saveConvertedFile method signature and container parameter handling
159
206
  test("_saveConvertedFile accepts container parameter", async (t) => {
160
207
  const service = new FileConversionService(mockContext, false); // Use local storage for testing
161
-
208
+
162
209
  // Create a test file
163
210
  const testFile = join(t.context.testDir, "container-param-test.txt");
164
211
  await fs.writeFile(testFile, "Test content for container parameter");
165
-
212
+
166
213
  // Test that the method accepts all parameters without throwing
167
214
  const result = await service._saveConvertedFile(
168
215
  testFile,
@@ -170,7 +217,7 @@ test("_saveConvertedFile accepts container parameter", async (t) => {
170
217
  "test-filename.txt",
171
218
  "test-container"
172
219
  );
173
-
220
+
174
221
  t.truthy(result);
175
222
  t.truthy(result.url);
176
223
  t.true(typeof result.url === 'string');
@@ -179,20 +226,20 @@ test("_saveConvertedFile accepts container parameter", async (t) => {
179
226
  // Test ensureConvertedVersion method signature with container parameter
180
227
  test("ensureConvertedVersion accepts container parameter", async (t) => {
181
228
  const service = new FileConversionService(mockContext, false);
182
-
229
+
183
230
  // Mock file info object
184
231
  const fileInfo = {
185
232
  url: "http://example.com/test.txt", // Non-convertible file
186
233
  gcs: "gs://bucket/test.txt"
187
234
  };
188
-
235
+
189
236
  // Test that the method accepts container parameter without throwing
190
237
  const result = await service.ensureConvertedVersion(
191
238
  fileInfo,
192
239
  "test-request-id",
193
240
  "test-container"
194
241
  );
195
-
242
+
196
243
  t.truthy(result);
197
244
  t.is(result.url, fileInfo.url); // Should return original for non-convertible file
198
245
  });