@aj-archipelago/cortex 1.3.67 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/config.js +27 -0
  2. package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/__init__.py +3 -0
  3. package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/function.json +20 -0
  4. package/helper-apps/cortex-doc-to-pdf/Dockerfile +46 -0
  5. package/helper-apps/cortex-doc-to-pdf/README.md +408 -0
  6. package/helper-apps/cortex-doc-to-pdf/converter.py +157 -0
  7. package/helper-apps/cortex-doc-to-pdf/docker-compose.yml +23 -0
  8. package/helper-apps/cortex-doc-to-pdf/document_converter.py +181 -0
  9. package/helper-apps/cortex-doc-to-pdf/examples/README.md +252 -0
  10. package/helper-apps/cortex-doc-to-pdf/examples/nodejs-client.js +266 -0
  11. package/helper-apps/cortex-doc-to-pdf/examples/package-lock.json +297 -0
  12. package/helper-apps/cortex-doc-to-pdf/examples/package.json +23 -0
  13. package/helper-apps/cortex-doc-to-pdf/function_app.py +85 -0
  14. package/helper-apps/cortex-doc-to-pdf/host.json +16 -0
  15. package/helper-apps/cortex-doc-to-pdf/request_handlers.py +193 -0
  16. package/helper-apps/cortex-doc-to-pdf/requirements.txt +3 -0
  17. package/helper-apps/cortex-doc-to-pdf/tests/run_tests.sh +26 -0
  18. package/helper-apps/cortex-doc-to-pdf/tests/test_conversion.py +320 -0
  19. package/helper-apps/cortex-doc-to-pdf/tests/test_streaming.py +419 -0
  20. package/helper-apps/cortex-file-handler/package-lock.json +1 -0
  21. package/helper-apps/cortex-file-handler/package.json +1 -0
  22. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +81 -8
  23. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +54 -7
  24. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +19 -7
  25. package/lib/encodeCache.js +5 -0
  26. package/lib/keyValueStorageClient.js +5 -0
  27. package/lib/logger.js +1 -1
  28. package/lib/pathwayTools.js +8 -1
  29. package/lib/redisSubscription.js +6 -0
  30. package/lib/requestExecutor.js +4 -0
  31. package/lib/util.js +88 -0
  32. package/package.json +1 -1
  33. package/pathways/basePathway.js +3 -3
  34. package/pathways/bing_afagent.js +1 -0
  35. package/pathways/gemini_15_vision.js +1 -1
  36. package/pathways/google_cse.js +2 -2
  37. package/pathways/image_gemini_25.js +85 -0
  38. package/pathways/image_prompt_optimizer_gemini_25.js +149 -0
  39. package/pathways/image_qwen.js +28 -0
  40. package/pathways/image_seedream4.js +26 -0
  41. package/pathways/rag.js +1 -1
  42. package/pathways/rag_jarvis.js +1 -1
  43. package/pathways/system/entity/sys_entity_continue.js +1 -1
  44. package/pathways/system/entity/sys_generator_results.js +1 -1
  45. package/pathways/system/entity/tools/sys_tool_google_search.js +15 -2
  46. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +3 -3
  47. package/pathways/system/entity/tools/sys_tool_image.js +28 -23
  48. package/pathways/system/entity/tools/sys_tool_image_gemini.js +135 -0
  49. package/server/graphql.js +9 -2
  50. package/server/modelExecutor.js +4 -0
  51. package/server/pathwayResolver.js +19 -18
  52. package/server/plugins/claude3VertexPlugin.js +13 -8
  53. package/server/plugins/gemini15ChatPlugin.js +15 -10
  54. package/server/plugins/gemini15VisionPlugin.js +2 -23
  55. package/server/plugins/gemini25ImagePlugin.js +155 -0
  56. package/server/plugins/modelPlugin.js +3 -2
  57. package/server/plugins/openAiChatPlugin.js +6 -6
  58. package/server/plugins/replicateApiPlugin.js +268 -12
  59. package/server/plugins/veoVideoPlugin.js +15 -1
  60. package/server/rest.js +2 -0
  61. package/server/typeDef.js +96 -10
  62. package/tests/integration/apptekTranslatePlugin.integration.test.js +1 -1
  63. package/tests/unit/core/pathwayManager.test.js +2 -4
  64. package/tests/unit/plugins/gemini25ImagePlugin.test.js +294 -0
package/config.js CHANGED
@@ -423,6 +423,33 @@ var config = convict({
423
423
  "Content-Type": "application/json"
424
424
  },
425
425
  },
426
+ "replicate-qwen-image": {
427
+ "type": "REPLICATE-API",
428
+ "url": "https://api.replicate.com/v1/models/qwen/qwen-image/predictions",
429
+ "headers": {
430
+ "Prefer": "wait",
431
+ "Authorization": "Token {{REPLICATE_API_KEY}}",
432
+ "Content-Type": "application/json"
433
+ },
434
+ },
435
+ "replicate-qwen-image-edit-plus": {
436
+ "type": "REPLICATE-API",
437
+ "url": "https://api.replicate.com/v1/models/qwen/qwen-image-edit-plus/predictions",
438
+ "headers": {
439
+ "Prefer": "wait",
440
+ "Authorization": "Token {{REPLICATE_API_KEY}}",
441
+ "Content-Type": "application/json"
442
+ },
443
+ },
444
+ "replicate-seedream-4": {
445
+ "type": "REPLICATE-API",
446
+ "url": "https://api.replicate.com/v1/models/bytedance/seedream-4/predictions",
447
+ "headers": {
448
+ "Prefer": "wait",
449
+ "Authorization": "Token {{REPLICATE_API_KEY}}",
450
+ "Content-Type": "application/json"
451
+ },
452
+ },
426
453
  "azure-video-translate": {
427
454
  "type": "AZURE-VIDEO-TRANSLATE",
428
455
  "url": "https://eastus.api.cognitive.microsoft.com/videotranslation",
@@ -0,0 +1,3 @@
1
+ """Azure Function entry point for document to PDF conversion."""
2
+ import azure.functions as func
3
+ from function_app import app
@@ -0,0 +1,20 @@
1
+ {
2
+ "scriptFile": "__init__.py",
3
+ "bindings": [
4
+ {
5
+ "authLevel": "function",
6
+ "type": "httpTrigger",
7
+ "direction": "in",
8
+ "name": "req",
9
+ "methods": [
10
+ "get",
11
+ "post"
12
+ ]
13
+ },
14
+ {
15
+ "type": "http",
16
+ "direction": "out",
17
+ "name": "$return"
18
+ }
19
+ ]
20
+ }
@@ -0,0 +1,46 @@
1
+ # Use official Azure Functions Python base image
2
+ FROM mcr.microsoft.com/azure-functions/python:4-python3.11
3
+
4
+ # Install LibreOffice + UNO Python bindings and unoconv for fast conversions
5
+ RUN apt-get update && \
6
+ apt-get install -y \
7
+ libreoffice \
8
+ libreoffice-writer \
9
+ libreoffice-calc \
10
+ libreoffice-impress \
11
+ python3-uno \
12
+ unoconv \
13
+ fonts-liberation \
14
+ fonts-dejavu \
15
+ fonts-liberation2 \
16
+ fonts-noto \
17
+ fonts-noto-cjk \
18
+ fonts-noto-color-emoji \
19
+ curl \
20
+ && apt-get clean \
21
+ && rm -rf /var/lib/apt/lists/*
22
+
23
+ # Set LibreOffice to use headless mode by default
24
+ ENV SAL_USE_VCLPLUGIN=svp
25
+
26
+ # Set the working directory
27
+ ENV AzureWebJobsScriptRoot=/home/site/wwwroot \
28
+ AzureFunctionsJobHost__Logging__Console__IsEnabled=true \
29
+ FUNCTIONS_WORKER_RUNTIME=python
30
+
31
+ # Copy requirements first for better layer caching
32
+ WORKDIR /home/site/wwwroot
33
+ COPY requirements.txt .
34
+
35
+ # Install Python dependencies
36
+ RUN pip install --no-cache-dir --upgrade pip && \
37
+ pip install --no-cache-dir -r requirements.txt
38
+
39
+ # Copy function app files
40
+ COPY . .
41
+
42
+ # Expose port for standalone server mode
43
+ EXPOSE 8080
44
+
45
+ # Start the application directly (no UNO listener)
46
+ CMD ["python", "function_app.py"]
@@ -0,0 +1,408 @@
1
+ # Document to PDF Converter - Azure Container App
2
+
3
+ A comprehensive document-to-PDF conversion service that runs as both an Azure Function and a standalone HTTP server. Built with LibreOffice, it supports **40+ document formats** including Word, Excel, PowerPoint, text files, HTML, and more.
4
+
5
+ ## Quick Start
6
+
7
+ ### Using Docker Compose (Easiest)
8
+
9
+ ```bash
10
+ # Build and start the service
11
+ docker compose up --build -d
12
+
13
+ # Test the service
14
+ curl "http://localhost:8080/convert?uri=https://file-examples.com/storage/fe783f04fc66761fd44fb46/2017/02/file-sample_100kB.doc" -o test.pdf
15
+
16
+ # Check health
17
+ curl http://localhost:8080/health
18
+
19
+ # View logs
20
+ docker compose logs -f
21
+
22
+ # Stop the service
23
+ docker compose down
24
+ ```
25
+
26
+ ## Supported Formats
27
+
28
+ ### Microsoft Office
29
+ - **Word**: `.doc`, `.docx`, `.docm`, `.dot`, `.dotx`, `.dotm`
30
+ - **Excel**: `.xls`, `.xlsx`, `.xlsm`, `.xlt`, `.xltx`, `.xltm`, `.csv`
31
+ - **PowerPoint**: `.ppt`, `.pptx`, `.pptm`, `.pot`, `.potx`, `.potm`, `.pps`, `.ppsx`, `.ppsm`
32
+
33
+ ### OpenDocument
34
+ - **Text**: `.odt`, `.ott`
35
+ - **Spreadsheet**: `.ods`, `.ots`
36
+ - **Presentation**: `.odp`, `.otp`
37
+ - **Graphics**: `.odg`, `.otg`
38
+
39
+ ### Web & Text
40
+ - **Web**: `.html`, `.htm`, `.xhtml`
41
+ - **Text**: `.txt`, `.rtf`, `.xml`
42
+
43
+ ### Legacy Formats
44
+ - WordPerfect, Lotus 1-2-3, dBase files, and more
45
+
46
+ ## API Usage
47
+
48
+ ### Endpoints
49
+
50
+ **Standalone Server Mode** (Docker):
51
+ - `GET/POST /convert` - Convert document to PDF
52
+ - `GET /health` - Health check
53
+
54
+ **Azure Function Mode**:
55
+ - `GET/POST /api/convert` - Convert document to PDF
56
+
57
+ ### Convert Document
58
+
59
+ ```bash
60
+ # GET request
61
+ curl "http://localhost:8080/convert?uri=https://example.com/document.docx" -o output.pdf
62
+
63
+ # POST request
64
+ curl -X POST http://localhost:8080/convert \
65
+ -H "Content-Type: application/json" \
66
+ -d '{"uri": "https://example.com/document.xlsx"}' \
67
+ -o output.pdf
68
+ ```
69
+
70
+ ### Response
71
+
72
+ **Success (200)**:
73
+ - Content-Type: `application/pdf`
74
+ - Body: PDF binary data
75
+
76
+ **Error (400/500)**:
77
+ ```json
78
+ {
79
+ "error": "Error type",
80
+ "details": "Error details"
81
+ }
82
+ ```
83
+
84
+ ## Testing
85
+
86
+ ### Run Tests
87
+
88
+ ```bash
89
+ # Run conversion tests
90
+ python3 tests/test_conversion.py
91
+
92
+ # Run streaming tests
93
+ python3 tests/test_streaming.py
94
+
95
+ # Or run in Docker
96
+ docker compose run --rm doc-to-pdf python3 tests/test_streaming.py
97
+ ```
98
+
99
+ Tests verify:
100
+ - ✅ File upload streaming (memory efficient)
101
+ - ✅ URI-based conversion
102
+ - ✅ Streaming downloads
103
+ - ✅ Concurrent conversions
104
+ - ✅ Error handling
105
+ - ✅ All document formats
106
+
107
+ Sample files are in the `samples/` directory.
108
+
109
+ ## Deployment
110
+
111
+ ### Azure Container Apps (Recommended)
112
+
113
+ ```bash
114
+ # Create resources
115
+ az group create --name cortex-rg --location eastus
116
+ az acr create --resource-group cortex-rg --name cortexregistry --sku Basic
117
+
118
+ # Build and push
119
+ az acr build --registry cortexregistry --image cortex-doc-to-pdf:latest .
120
+
121
+ # Create container app environment
122
+ az containerapp env create \
123
+ --name cortex-env \
124
+ --resource-group cortex-rg \
125
+ --location eastus
126
+
127
+ # Deploy
128
+ az containerapp create \
129
+ --name cortex-doc-to-pdf \
130
+ --resource-group cortex-rg \
131
+ --environment cortex-env \
132
+ --image cortexregistry.azurecr.io/cortex-doc-to-pdf:latest \
133
+ --target-port 8080 \
134
+ --ingress external \
135
+ --min-replicas 1 \
136
+ --max-replicas 10 \
137
+ --cpu 1.0 \
138
+ --memory 2.0Gi \
139
+ --env-vars PORT=8080 \
140
+ --command python function_app.py
141
+ ```
142
+
143
+ ### Azure Function App
144
+
145
+ ```bash
146
+ # Create Function App
147
+ az functionapp create \
148
+ --resource-group cortex-rg \
149
+ --name cortex-doc-to-pdf-func \
150
+ --storage-account cortexstorage \
151
+ --runtime python \
152
+ --runtime-version 3.11 \
153
+ --functions-version 4 \
154
+ --os-type Linux
155
+
156
+ # Deploy
157
+ func azure functionapp publish cortex-doc-to-pdf-func
158
+ ```
159
+
160
+ ## Local Development
161
+
162
+ ### With Docker (Recommended)
163
+
164
+ ```bash
165
+ # Start with auto-reload
166
+ docker compose up
167
+
168
+ # Rebuild after code changes
169
+ docker compose up --build
170
+ ```
171
+
172
+ ### Without Docker
173
+
174
+ ```bash
175
+ # Install LibreOffice
176
+ # macOS: brew install --cask libreoffice
177
+ # Ubuntu: sudo apt-get install libreoffice
178
+
179
+ # Install dependencies
180
+ pip install -r requirements.txt
181
+
182
+ # Run server
183
+ python function_app.py
184
+ ```
185
+
186
+ ## Configuration
187
+
188
+ ### Environment Variables
189
+
190
+ | Variable | Description | Default |
191
+ |----------|-------------|---------|
192
+ | `PORT` | HTTP server port | `8080` |
193
+ | `AzureWebJobsStorage` | Azure Storage (Function mode) | - |
194
+ | `FUNCTIONS_WORKER_RUNTIME` | Azure Functions runtime | `python` |
195
+
196
+ ### Conversion Timeout
197
+
198
+ Adjust in `host.json`:
199
+ ```json
200
+ {
201
+ "functionTimeout": "00:10:00"
202
+ }
203
+ ```
204
+
205
+ ## Performance
206
+
207
+ ### Typical Conversion Times
208
+ - Simple documents: 1-3 seconds
209
+ - Complex documents: 3-10 seconds
210
+ - Large presentations: 10-30 seconds
211
+
212
+ ### Resource Requirements
213
+ - **CPU**: 1.0 vCPU minimum
214
+ - **Memory**: 2.0 GB minimum
215
+ - **Disk**: Ephemeral storage
216
+
217
+ ## Troubleshooting
218
+
219
+ ### Docker Build Issues
220
+
221
+ **Platform mismatch error** (Apple Silicon Macs):
222
+ ```bash
223
+ # Already configured in docker-compose.yml
224
+ platform: linux/amd64
225
+ ```
226
+
227
+ **Disk space error**:
228
+ ```bash
229
+ docker system prune -a -f
230
+ ```
231
+
232
+ ### Conversion Failures
233
+
234
+ **Check logs**:
235
+ ```bash
236
+ docker compose logs -f
237
+ ```
238
+
239
+ **Test with known-good file**:
240
+ ```bash
241
+ curl "http://localhost:8080/convert?uri=https://file-examples.com/storage/fe783f04fc66761fd44fb46/2017/02/file-sample_100kB.doc" -o test.pdf
242
+ ```
243
+
244
+ ## Project Structure
245
+
246
+ ```
247
+ cortex-doc-to-pdf/
248
+ ├── function_app.py # Main entry point & routing
249
+ ├── request_handlers.py # HTTP request/response handling
250
+ ├── document_converter.py # Conversion business logic
251
+ ├── converter.py # LibreOffice wrapper
252
+ ├── tests/ # Test suite
253
+ │ ├── test_streaming.py # Streaming tests
254
+ │ ├── test_conversion.py # Conversion tests
255
+ │ └── run_tests.sh # Test runner
256
+ ├── samples/ # Sample documents
257
+ ├── Dockerfile # Container image
258
+ ├── docker-compose.yml # Local orchestration
259
+ ├── requirements.txt # Dependencies
260
+ └── README.md # Documentation
261
+ ```
262
+
263
+ ## Examples
264
+
265
+ ### URI-Based Conversion
266
+
267
+ ```bash
268
+ # Word document via URI
269
+ curl -X POST http://localhost:8080/convert \
270
+ -H "Content-Type: application/json" \
271
+ -d '{"uri": "https://example.com/document.docx"}' \
272
+ -o output.pdf
273
+
274
+ # Or with GET (URL-encode special characters)
275
+ curl "http://localhost:8080/convert?uri=https://example.com/file.xlsx" -o output.pdf
276
+ ```
277
+
278
+ ### File Upload (Recommended - Streaming)
279
+
280
+ Upload local files directly - **no need for remote URI**. Files are streamed in 8KB chunks for memory efficiency.
281
+
282
+ ```bash
283
+ # Upload file directly (streams upload & download)
284
+ curl -X POST http://localhost:8080/convert \
285
+ -F "file=@document.xlsx" \
286
+ -o output.pdf
287
+
288
+ # Works with any supported format
289
+ curl -X POST http://localhost:8080/convert \
290
+ -F "file=@presentation.pptx" \
291
+ -o slides.pdf
292
+
293
+ # Large files are handled efficiently (no memory bloat)
294
+ curl -X POST http://localhost:8080/convert \
295
+ -F "file=@large-spreadsheet.xlsx" \
296
+ -o output.pdf
297
+ ```
298
+
299
+ **Why file upload is recommended:**
300
+ - ✅ **Streaming**: Chunked upload (8KB) and download
301
+ - ✅ **Memory efficient**: Handles large files without RAM bloat
302
+ - ✅ **Direct**: No need to host files on a server first
303
+ - ✅ **Fast**: No download step required
304
+
305
+ ### JavaScript/Node.js Example
306
+
307
+ **See `examples/` folder for complete working examples!**
308
+
309
+ ```javascript
310
+ const axios = require('axios');
311
+ const fs = require('fs');
312
+ const FormData = require('form-data');
313
+
314
+ // Upload file with streaming (both upload & download)
315
+ async function convertToPDF(inputFile, outputFile) {
316
+ const form = new FormData();
317
+ form.append('file', fs.createReadStream(inputFile));
318
+
319
+ const response = await axios({
320
+ method: 'POST',
321
+ url: 'http://localhost:8080/', // Can use / or /convert
322
+ data: form,
323
+ headers: form.getHeaders(),
324
+ responseType: 'stream', // Stream the response
325
+ });
326
+
327
+ // Stream PDF to file
328
+ const writer = fs.createWriteStream(outputFile);
329
+ response.data.pipe(writer);
330
+
331
+ return new Promise((resolve, reject) => {
332
+ writer.on('finish', resolve);
333
+ writer.on('error', reject);
334
+ });
335
+ }
336
+
337
+ // Usage
338
+ await convertToPDF('document.docx', 'output.pdf');
339
+ ```
340
+
341
+ **Features:**
342
+ - ✅ Streaming upload (memory efficient)
343
+ - ✅ Streaming download (direct to file)
344
+ - ✅ Progress tracking support
345
+ - ✅ Works in Node.js and Browser
346
+
347
+ **Full example:** See `examples/nodejs-client.js`
348
+
349
+ ### Python Example
350
+
351
+ ```python
352
+ import requests
353
+
354
+ # Method 1: Upload local file (Recommended - Streaming)
355
+ def convert_file_to_pdf(file_path):
356
+ with open(file_path, 'rb') as f:
357
+ files = {'file': f}
358
+ response = requests.post(
359
+ 'http://localhost:8080/convert',
360
+ files=files
361
+ )
362
+
363
+ if response.status_code == 200:
364
+ with open('output.pdf', 'wb') as f:
365
+ f.write(response.content)
366
+ print('✓ PDF created successfully')
367
+ else:
368
+ print(f'✗ Error: {response.json()}')
369
+
370
+ # Method 2: Convert from URI
371
+ def convert_url_to_pdf(document_url):
372
+ response = requests.post(
373
+ 'http://localhost:8080/convert',
374
+ json={'uri': document_url}
375
+ )
376
+
377
+ if response.status_code == 200:
378
+ with open('output.pdf', 'wb') as f:
379
+ f.write(response.content)
380
+ print('✓ PDF created successfully')
381
+ else:
382
+ print(f'✗ Error: {response.json()}')
383
+
384
+ # Upload local file (streams efficiently)
385
+ convert_file_to_pdf('document.docx')
386
+
387
+ # Or convert from URL
388
+ convert_url_to_pdf('https://example.com/document.docx')
389
+ ```
390
+
391
+ ## Security Considerations
392
+
393
+ - **Authentication**: Add API keys or OAuth for production
394
+ - **Rate Limiting**: Implement at API gateway level
395
+ - **Input Validation**: URI format and allowlisting
396
+ - **HTTPS**: Use reverse proxy or Azure ingress
397
+ - **Resource Limits**: Configure memory and CPU limits
398
+
399
+ ## License
400
+
401
+ This project is part of the Cortex project.
402
+
403
+ ## Support
404
+
405
+ For issues or questions:
406
+ 1. Check the logs: `docker compose logs -f`
407
+ 2. Run tests: `./test_in_docker.sh`
408
+ 3. Verify LibreOffice: `docker compose run --rm --entrypoint soffice doc-to-pdf --version`
@@ -0,0 +1,157 @@
1
+ import os
2
+ import subprocess
3
+ import logging
4
+ import time
5
+ from pathlib import Path
6
+ from typing import Optional, List
7
+ try:
8
+ import uno # type: ignore
9
+ from com.sun.star.beans import PropertyValue # type: ignore
10
+ except Exception:
11
+ uno = None
12
+ PropertyValue = None
13
+
14
+ class DocumentConverter:
15
+ """
16
+ Optimized document converter using LibreOffice with maximum performance settings.
17
+ Conversions typically take 2-4 seconds depending on file size and complexity.
18
+ """
19
+
20
+ SUPPORTED_FORMATS = {
21
+ '.doc', '.docx', '.docm', '.dot', '.dotx', '.dotm',
22
+ '.xls', '.xlsx', '.xlsm', '.xlt', '.xltx', '.xltm', '.csv',
23
+ '.ppt', '.pptx', '.pptm', '.pot', '.potx', '.potm', '.pps', '.ppsx', '.ppsm',
24
+ '.odt', '.ott', '.ods', '.ots', '.odp', '.otp', '.odg', '.otg', '.odf',
25
+ '.txt', '.rtf',
26
+ '.html', '.htm', '.xhtml',
27
+ '.xml', '.wpd', '.wps',
28
+ '.wk1', '.wks', '.123', '.dif', '.dbf',
29
+ }
30
+
31
+ def __init__(self, libreoffice_path: Optional[str] = None):
32
+ """Initialize with optimized LibreOffice settings."""
33
+ self.libreoffice_path = libreoffice_path or self._find_libreoffice()
34
+ if not self.libreoffice_path:
35
+ raise RuntimeError("LibreOffice not found!")
36
+
37
+ logging.info(f"Using LibreOffice at: {self.libreoffice_path}")
38
+
39
+ def _find_libreoffice(self) -> Optional[str]:
40
+ """Find LibreOffice installation."""
41
+ for cmd in ['soffice', 'libreoffice']:
42
+ try:
43
+ result = subprocess.run(
44
+ ['which', cmd],
45
+ capture_output=True,
46
+ text=True,
47
+ timeout=5
48
+ )
49
+ if result.returncode == 0 and result.stdout.strip():
50
+ return result.stdout.strip()
51
+ except:
52
+ pass
53
+
54
+ common_paths = [
55
+ '/usr/bin/libreoffice',
56
+ '/usr/bin/soffice',
57
+ '/Applications/LibreOffice.app/Contents/MacOS/soffice',
58
+ ]
59
+
60
+ for path in common_paths:
61
+ if os.path.exists(path):
62
+ return path
63
+
64
+ return None
65
+
66
+ def is_supported_format(self, file_extension: str) -> bool:
67
+ """Check if file extension is supported."""
68
+ if not file_extension.startswith('.'):
69
+ file_extension = '.' + file_extension
70
+ return file_extension.lower() in self.SUPPORTED_FORMATS
71
+
72
+ def get_supported_formats(self) -> List[str]:
73
+ """Get list of supported formats."""
74
+ return sorted(list(self.SUPPORTED_FORMATS))
75
+
76
+ def convert_to_pdf(
77
+ self,
78
+ input_file: str,
79
+ output_dir: Optional[str] = None,
80
+ timeout: int = 30
81
+ ) -> Optional[str]:
82
+ """
83
+ Convert document to PDF with maximum speed optimizations.
84
+
85
+ Performance: 2-4 seconds typical conversion time
86
+ - Small files (< 100KB): ~2.5s
87
+ - Medium files (100KB-1MB): ~3-4s
88
+ - Large files (> 1MB): ~3-4s
89
+
90
+ Note: LibreOffice has baseline processing overhead that cannot be eliminated.
91
+ This is already optimized with minimal flags and headless backend.
92
+ """
93
+ if not os.path.exists(input_file):
94
+ raise FileNotFoundError(f"Input file not found: {input_file}")
95
+
96
+ input_path = Path(input_file)
97
+ if output_dir is None:
98
+ output_dir = str(input_path.parent)
99
+
100
+ pdf_filename = input_path.stem + ".pdf"
101
+ output_path = os.path.join(output_dir, pdf_filename)
102
+
103
+ start_time = time.time()
104
+ logging.info(f"Converting {input_file} to PDF...")
105
+
106
+ # Direct soffice path only
107
+
108
+ try:
109
+ # Optimized LibreOffice command - minimal flags for maximum speed
110
+ cmd = [
111
+ self.libreoffice_path,
112
+ '--headless',
113
+ '--invisible',
114
+ '--nocrashreport',
115
+ '--nodefault',
116
+ '--nofirststartwizard',
117
+ '--nolockcheck',
118
+ '--nologo',
119
+ '--norestore',
120
+ '--convert-to', 'pdf:writer_pdf_Export',
121
+ '--outdir', output_dir,
122
+ input_file
123
+ ]
124
+
125
+ env = {**os.environ, 'HOME': output_dir, 'SAL_USE_VCLPLUGIN': 'svp'}
126
+
127
+ result = subprocess.run(
128
+ cmd,
129
+ capture_output=True,
130
+ text=True,
131
+ timeout=timeout,
132
+ env=env,
133
+ stdin=subprocess.DEVNULL
134
+ )
135
+
136
+ elapsed = time.time() - start_time
137
+
138
+ if result.returncode != 0:
139
+ logging.error(f"Conversion failed ({elapsed:.2f}s): {result.stderr}")
140
+ return None
141
+
142
+ if not os.path.exists(output_path):
143
+ logging.error(f"PDF not created ({elapsed:.2f}s): {output_path}")
144
+ return None
145
+
146
+ file_size = os.path.getsize(output_path)
147
+ logging.info(f"✓ Converted in {elapsed:.2f}s ({file_size/1024:.1f}KB): {output_path}")
148
+ return output_path
149
+
150
+ except subprocess.TimeoutExpired:
151
+ elapsed = time.time() - start_time
152
+ logging.error(f"Conversion timed out after {elapsed:.2f}s")
153
+ return None
154
+ except Exception as e:
155
+ elapsed = time.time() - start_time
156
+ logging.error(f"Conversion error ({elapsed:.2f}s): {e}", exc_info=True)
157
+ return None
@@ -0,0 +1,23 @@
1
+ services:
2
+ doc-to-pdf:
3
+ build:
4
+ context: .
5
+ platform: linux/amd64 # Force AMD64 for Azure compatibility (works on Mac via emulation)
6
+ ports:
7
+ - "8080:8080"
8
+ environment:
9
+ - PORT=8080
10
+
11
+ tmpfs:
12
+ - /tmp:size=256m
13
+ volumes:
14
+ # Mount local code for development (optional)
15
+ - ./function_app.py:/home/site/wwwroot/function_app.py
16
+ - ./converter.py:/home/site/wwwroot/converter.py
17
+ restart: unless-stopped
18
+ healthcheck:
19
+ test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
20
+ interval: 30s
21
+ timeout: 10s
22
+ retries: 3
23
+ start_period: 40s