@aj-archipelago/cortex 1.3.67 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.js +27 -0
- package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/__init__.py +3 -0
- package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/function.json +20 -0
- package/helper-apps/cortex-doc-to-pdf/Dockerfile +46 -0
- package/helper-apps/cortex-doc-to-pdf/README.md +408 -0
- package/helper-apps/cortex-doc-to-pdf/converter.py +157 -0
- package/helper-apps/cortex-doc-to-pdf/docker-compose.yml +23 -0
- package/helper-apps/cortex-doc-to-pdf/document_converter.py +181 -0
- package/helper-apps/cortex-doc-to-pdf/examples/README.md +252 -0
- package/helper-apps/cortex-doc-to-pdf/examples/nodejs-client.js +266 -0
- package/helper-apps/cortex-doc-to-pdf/examples/package-lock.json +297 -0
- package/helper-apps/cortex-doc-to-pdf/examples/package.json +23 -0
- package/helper-apps/cortex-doc-to-pdf/function_app.py +85 -0
- package/helper-apps/cortex-doc-to-pdf/host.json +16 -0
- package/helper-apps/cortex-doc-to-pdf/request_handlers.py +193 -0
- package/helper-apps/cortex-doc-to-pdf/requirements.txt +3 -0
- package/helper-apps/cortex-doc-to-pdf/tests/run_tests.sh +26 -0
- package/helper-apps/cortex-doc-to-pdf/tests/test_conversion.py +320 -0
- package/helper-apps/cortex-doc-to-pdf/tests/test_streaming.py +419 -0
- package/helper-apps/cortex-file-handler/package-lock.json +1 -0
- package/helper-apps/cortex-file-handler/package.json +1 -0
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +81 -8
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +54 -7
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +19 -7
- package/lib/encodeCache.js +5 -0
- package/lib/keyValueStorageClient.js +5 -0
- package/lib/logger.js +1 -1
- package/lib/pathwayTools.js +8 -1
- package/lib/redisSubscription.js +6 -0
- package/lib/requestExecutor.js +4 -0
- package/lib/util.js +88 -0
- package/package.json +1 -1
- package/pathways/basePathway.js +3 -3
- package/pathways/bing_afagent.js +1 -0
- package/pathways/gemini_15_vision.js +1 -1
- package/pathways/google_cse.js +2 -2
- package/pathways/image_gemini_25.js +85 -0
- package/pathways/image_prompt_optimizer_gemini_25.js +149 -0
- package/pathways/image_qwen.js +28 -0
- package/pathways/image_seedream4.js +26 -0
- package/pathways/rag.js +1 -1
- package/pathways/rag_jarvis.js +1 -1
- package/pathways/system/entity/sys_entity_continue.js +1 -1
- package/pathways/system/entity/sys_generator_results.js +1 -1
- package/pathways/system/entity/tools/sys_tool_google_search.js +15 -2
- package/pathways/system/entity/tools/sys_tool_grok_x_search.js +3 -3
- package/pathways/system/entity/tools/sys_tool_image.js +28 -23
- package/pathways/system/entity/tools/sys_tool_image_gemini.js +135 -0
- package/server/graphql.js +9 -2
- package/server/modelExecutor.js +4 -0
- package/server/pathwayResolver.js +19 -18
- package/server/plugins/claude3VertexPlugin.js +13 -8
- package/server/plugins/gemini15ChatPlugin.js +15 -10
- package/server/plugins/gemini15VisionPlugin.js +2 -23
- package/server/plugins/gemini25ImagePlugin.js +155 -0
- package/server/plugins/modelPlugin.js +3 -2
- package/server/plugins/openAiChatPlugin.js +6 -6
- package/server/plugins/replicateApiPlugin.js +268 -12
- package/server/plugins/veoVideoPlugin.js +15 -1
- package/server/rest.js +2 -0
- package/server/typeDef.js +96 -10
- package/tests/integration/apptekTranslatePlugin.integration.test.js +1 -1
- package/tests/unit/core/pathwayManager.test.js +2 -4
- package/tests/unit/plugins/gemini25ImagePlugin.test.js +294 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
End-to-end tests for document to PDF conversion.
|
|
4
|
+
Tests actual conversion of various file formats and verifies PDF content.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
import time
|
|
10
|
+
import subprocess
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import PyPDF2
|
|
13
|
+
from converter import DocumentConverter
|
|
14
|
+
|
|
15
|
+
# Test configuration
|
|
16
|
+
SAMPLES_DIR = Path(__file__).parent.parent / "samples"
|
|
17
|
+
OUTPUT_DIR = Path(__file__).parent.parent / "test_output"
|
|
18
|
+
OUTPUT_DIR.mkdir(exist_ok=True)
|
|
19
|
+
|
|
20
|
+
class Colors:
|
|
21
|
+
"""ANSI color codes for terminal output"""
|
|
22
|
+
GREEN = '\033[92m'
|
|
23
|
+
RED = '\033[91m'
|
|
24
|
+
YELLOW = '\033[93m'
|
|
25
|
+
BLUE = '\033[94m'
|
|
26
|
+
RESET = '\033[0m'
|
|
27
|
+
BOLD = '\033[1m'
|
|
28
|
+
|
|
29
|
+
def print_success(msg):
|
|
30
|
+
print(f"{Colors.GREEN}✓{Colors.RESET} {msg}")
|
|
31
|
+
|
|
32
|
+
def print_error(msg):
|
|
33
|
+
print(f"{Colors.RED}✗{Colors.RESET} {msg}")
|
|
34
|
+
|
|
35
|
+
def print_info(msg):
|
|
36
|
+
print(f"{Colors.BLUE}ℹ{Colors.RESET} {msg}")
|
|
37
|
+
|
|
38
|
+
def print_warning(msg):
|
|
39
|
+
print(f"{Colors.YELLOW}⚠{Colors.RESET} {msg}")
|
|
40
|
+
|
|
41
|
+
def extract_text_from_pdf(pdf_path):
|
|
42
|
+
"""Extract text content from a PDF file."""
|
|
43
|
+
try:
|
|
44
|
+
with open(pdf_path, 'rb') as file:
|
|
45
|
+
pdf_reader = PyPDF2.PdfReader(file)
|
|
46
|
+
text = ""
|
|
47
|
+
for page in pdf_reader.pages:
|
|
48
|
+
text += page.extract_text()
|
|
49
|
+
return text.strip()
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print_error(f"Failed to extract text from PDF: {e}")
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
def get_pdf_info(pdf_path):
|
|
55
|
+
"""Get PDF metadata and page count."""
|
|
56
|
+
try:
|
|
57
|
+
with open(pdf_path, 'rb') as file:
|
|
58
|
+
pdf_reader = PyPDF2.PdfReader(file)
|
|
59
|
+
return {
|
|
60
|
+
'pages': len(pdf_reader.pages),
|
|
61
|
+
'metadata': pdf_reader.metadata,
|
|
62
|
+
'is_encrypted': pdf_reader.is_encrypted
|
|
63
|
+
}
|
|
64
|
+
except Exception as e:
|
|
65
|
+
print_error(f"Failed to get PDF info: {e}")
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
def test_file_conversion(input_file, expected_content_snippets=None, min_pages=1):
|
|
69
|
+
"""
|
|
70
|
+
Test conversion of a single file.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
input_file: Path to the input file
|
|
74
|
+
expected_content_snippets: List of strings that should appear in the PDF
|
|
75
|
+
min_pages: Minimum expected number of pages
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
bool: True if test passed, False otherwise
|
|
79
|
+
"""
|
|
80
|
+
file_name = input_file.name
|
|
81
|
+
file_ext = input_file.suffix
|
|
82
|
+
print(f"\n{Colors.BOLD}Testing: {file_name}{Colors.RESET}")
|
|
83
|
+
print(f" Format: {file_ext}")
|
|
84
|
+
|
|
85
|
+
# Initialize converter
|
|
86
|
+
try:
|
|
87
|
+
converter = DocumentConverter()
|
|
88
|
+
except RuntimeError as e:
|
|
89
|
+
print_error(f"Converter initialization failed: {e}")
|
|
90
|
+
return False
|
|
91
|
+
|
|
92
|
+
# Check if format is supported
|
|
93
|
+
if not converter.is_supported_format(file_ext):
|
|
94
|
+
print_warning(f"Format {file_ext} is not supported - SKIPPING")
|
|
95
|
+
return True # Not a failure, just unsupported
|
|
96
|
+
|
|
97
|
+
# Convert to PDF
|
|
98
|
+
output_file = OUTPUT_DIR / f"{input_file.stem}.pdf"
|
|
99
|
+
print_info(f"Converting to: {output_file.name}")
|
|
100
|
+
|
|
101
|
+
start_time = time.time()
|
|
102
|
+
try:
|
|
103
|
+
pdf_path = converter.convert_to_pdf(str(input_file), str(OUTPUT_DIR))
|
|
104
|
+
conversion_time = time.time() - start_time
|
|
105
|
+
|
|
106
|
+
if not pdf_path or not os.path.exists(pdf_path):
|
|
107
|
+
print_error("Conversion failed - PDF not created")
|
|
108
|
+
return False
|
|
109
|
+
|
|
110
|
+
print_success(f"Converted in {conversion_time:.2f}s")
|
|
111
|
+
|
|
112
|
+
except Exception as e:
|
|
113
|
+
print_error(f"Conversion failed: {e}")
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
# Verify PDF was created
|
|
117
|
+
file_size = os.path.getsize(pdf_path)
|
|
118
|
+
if file_size == 0:
|
|
119
|
+
print_error("PDF file is empty (0 bytes)")
|
|
120
|
+
return False
|
|
121
|
+
|
|
122
|
+
print_info(f"PDF size: {file_size:,} bytes")
|
|
123
|
+
|
|
124
|
+
# Get PDF information
|
|
125
|
+
pdf_info = get_pdf_info(pdf_path)
|
|
126
|
+
if not pdf_info:
|
|
127
|
+
print_error("Failed to read PDF metadata")
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
print_info(f"Pages: {pdf_info['pages']}")
|
|
131
|
+
|
|
132
|
+
# Verify minimum page count
|
|
133
|
+
if pdf_info['pages'] < min_pages:
|
|
134
|
+
print_error(f"Expected at least {min_pages} pages, got {pdf_info['pages']}")
|
|
135
|
+
return False
|
|
136
|
+
|
|
137
|
+
# Extract and verify content
|
|
138
|
+
if expected_content_snippets:
|
|
139
|
+
print_info("Extracting and verifying content...")
|
|
140
|
+
pdf_text = extract_text_from_pdf(pdf_path)
|
|
141
|
+
|
|
142
|
+
if not pdf_text:
|
|
143
|
+
print_warning("Could not extract text from PDF (might be image-based)")
|
|
144
|
+
else:
|
|
145
|
+
print_info(f"Extracted {len(pdf_text)} characters")
|
|
146
|
+
|
|
147
|
+
# Check for expected content
|
|
148
|
+
missing_content = []
|
|
149
|
+
for snippet in expected_content_snippets:
|
|
150
|
+
if snippet.lower() not in pdf_text.lower():
|
|
151
|
+
missing_content.append(snippet)
|
|
152
|
+
|
|
153
|
+
if missing_content:
|
|
154
|
+
print_error(f"Missing expected content: {', '.join(missing_content)}")
|
|
155
|
+
return False
|
|
156
|
+
else:
|
|
157
|
+
print_success(f"All {len(expected_content_snippets)} content checks passed")
|
|
158
|
+
|
|
159
|
+
print_success(f"✓ {file_name} conversion PASSED")
|
|
160
|
+
return True
|
|
161
|
+
|
|
162
|
+
def test_text_files():
|
|
163
|
+
"""Test text file conversions"""
|
|
164
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
165
|
+
print(f"{Colors.BOLD}Testing TEXT Files{Colors.RESET}")
|
|
166
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
167
|
+
|
|
168
|
+
tests = [
|
|
169
|
+
("ascii-art.txt", None),
|
|
170
|
+
("data.txt", None),
|
|
171
|
+
("long-doc.txt", None),
|
|
172
|
+
("multilang.txt", None),
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
results = []
|
|
176
|
+
for file_name, expected_content in tests:
|
|
177
|
+
file_path = SAMPLES_DIR / file_name
|
|
178
|
+
if file_path.exists():
|
|
179
|
+
results.append(test_file_conversion(file_path, expected_content))
|
|
180
|
+
else:
|
|
181
|
+
print_warning(f"File not found: {file_name}")
|
|
182
|
+
|
|
183
|
+
return results
|
|
184
|
+
|
|
185
|
+
def test_office_documents():
|
|
186
|
+
"""Test Microsoft Office document conversions"""
|
|
187
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
188
|
+
print(f"{Colors.BOLD}Testing OFFICE Documents{Colors.RESET}")
|
|
189
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
190
|
+
|
|
191
|
+
tests = [
|
|
192
|
+
("file-sample_1MB.docx", ["document"], 1),
|
|
193
|
+
("file_example_XLSX_5000.xlsx", None, 1),
|
|
194
|
+
("file_example_PPT_1MB.ppt", None, 1),
|
|
195
|
+
("powerful_gen1_pokemon__20251002T065535Z_6ab329cc.pptx", ["pokemon"], 1),
|
|
196
|
+
("Powerful_Pokemon_Gen1__20251002T054021Z_dbe0091f.pptx", ["pokemon"], 1),
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
results = []
|
|
200
|
+
for test_data in tests:
|
|
201
|
+
file_name = test_data[0]
|
|
202
|
+
expected_content = test_data[1] if len(test_data) > 1 else None
|
|
203
|
+
min_pages = test_data[2] if len(test_data) > 2 else 1
|
|
204
|
+
|
|
205
|
+
file_path = SAMPLES_DIR / file_name
|
|
206
|
+
if file_path.exists():
|
|
207
|
+
results.append(test_file_conversion(file_path, expected_content, min_pages))
|
|
208
|
+
else:
|
|
209
|
+
print_warning(f"File not found: {file_name}")
|
|
210
|
+
|
|
211
|
+
return results
|
|
212
|
+
|
|
213
|
+
def test_html_files():
|
|
214
|
+
"""Test HTML file conversions"""
|
|
215
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
216
|
+
print(f"{Colors.BOLD}Testing HTML Files{Colors.RESET}")
|
|
217
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
218
|
+
|
|
219
|
+
tests = [
|
|
220
|
+
("sample1.html", None),
|
|
221
|
+
("sample2.html", None),
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
results = []
|
|
225
|
+
for file_name, expected_content in tests:
|
|
226
|
+
file_path = SAMPLES_DIR / file_name
|
|
227
|
+
if file_path.exists():
|
|
228
|
+
results.append(test_file_conversion(file_path, expected_content))
|
|
229
|
+
else:
|
|
230
|
+
print_warning(f"File not found: {file_name}")
|
|
231
|
+
|
|
232
|
+
return results
|
|
233
|
+
|
|
234
|
+
def test_pdf_files():
|
|
235
|
+
"""Test PDF file handling (should pass through or re-process)"""
|
|
236
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
237
|
+
print(f"{Colors.BOLD}Testing PDF Files{Colors.RESET}")
|
|
238
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
239
|
+
|
|
240
|
+
tests = [
|
|
241
|
+
("quote_of_the_day__20251002T064921Z_af063c8e.pdf", None),
|
|
242
|
+
]
|
|
243
|
+
|
|
244
|
+
results = []
|
|
245
|
+
for file_name, expected_content in tests:
|
|
246
|
+
file_path = SAMPLES_DIR / file_name
|
|
247
|
+
if file_path.exists():
|
|
248
|
+
# PDFs might not be supported for conversion (already PDF)
|
|
249
|
+
print_info(f"Testing: {file_name}")
|
|
250
|
+
print_warning("PDF files may not require conversion - SKIPPING")
|
|
251
|
+
results.append(True)
|
|
252
|
+
else:
|
|
253
|
+
print_warning(f"File not found: {file_name}")
|
|
254
|
+
|
|
255
|
+
return results
|
|
256
|
+
|
|
257
|
+
def check_libreoffice():
|
|
258
|
+
"""Check if LibreOffice is installed"""
|
|
259
|
+
try:
|
|
260
|
+
converter = DocumentConverter()
|
|
261
|
+
print_success(f"LibreOffice found at: {converter.libreoffice_path}")
|
|
262
|
+
return True
|
|
263
|
+
except RuntimeError as e:
|
|
264
|
+
print_error(f"LibreOffice not found: {e}")
|
|
265
|
+
print_info("Please install LibreOffice:")
|
|
266
|
+
print_info(" macOS: brew install --cask libreoffice")
|
|
267
|
+
print_info(" Ubuntu/Debian: sudo apt-get install libreoffice")
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
def main():
|
|
271
|
+
"""Run all tests"""
|
|
272
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
273
|
+
print(f"{Colors.BOLD}Document to PDF Conversion Tests{Colors.RESET}")
|
|
274
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
275
|
+
|
|
276
|
+
# Check prerequisites
|
|
277
|
+
if not check_libreoffice():
|
|
278
|
+
sys.exit(1)
|
|
279
|
+
|
|
280
|
+
if not SAMPLES_DIR.exists():
|
|
281
|
+
print_error(f"Samples directory not found: {SAMPLES_DIR}")
|
|
282
|
+
sys.exit(1)
|
|
283
|
+
|
|
284
|
+
print_info(f"Samples directory: {SAMPLES_DIR}")
|
|
285
|
+
print_info(f"Output directory: {OUTPUT_DIR}")
|
|
286
|
+
|
|
287
|
+
# Run all test suites
|
|
288
|
+
all_results = []
|
|
289
|
+
|
|
290
|
+
all_results.extend(test_text_files())
|
|
291
|
+
all_results.extend(test_office_documents())
|
|
292
|
+
all_results.extend(test_html_files())
|
|
293
|
+
all_results.extend(test_pdf_files())
|
|
294
|
+
|
|
295
|
+
# Print summary
|
|
296
|
+
print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
297
|
+
print(f"{Colors.BOLD}Test Summary{Colors.RESET}")
|
|
298
|
+
print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
|
|
299
|
+
|
|
300
|
+
passed = sum(1 for r in all_results if r)
|
|
301
|
+
failed = sum(1 for r in all_results if not r)
|
|
302
|
+
total = len(all_results)
|
|
303
|
+
|
|
304
|
+
print(f"\nTotal tests: {total}")
|
|
305
|
+
print_success(f"Passed: {passed}")
|
|
306
|
+
if failed > 0:
|
|
307
|
+
print_error(f"Failed: {failed}")
|
|
308
|
+
|
|
309
|
+
success_rate = (passed / total * 100) if total > 0 else 0
|
|
310
|
+
print(f"\nSuccess rate: {success_rate:.1f}%")
|
|
311
|
+
|
|
312
|
+
if failed == 0:
|
|
313
|
+
print(f"\n{Colors.GREEN}{Colors.BOLD}🎉 All tests passed!{Colors.RESET}")
|
|
314
|
+
return 0
|
|
315
|
+
else:
|
|
316
|
+
print(f"\n{Colors.RED}{Colors.BOLD}❌ Some tests failed{Colors.RESET}")
|
|
317
|
+
return 1
|
|
318
|
+
|
|
319
|
+
if __name__ == "__main__":
|
|
320
|
+
sys.exit(main())
|