@aj-archipelago/cortex 1.3.67 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/config.js +27 -0
  2. package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/__init__.py +3 -0
  3. package/helper-apps/cortex-doc-to-pdf/DocToPdfFunction/function.json +20 -0
  4. package/helper-apps/cortex-doc-to-pdf/Dockerfile +46 -0
  5. package/helper-apps/cortex-doc-to-pdf/README.md +408 -0
  6. package/helper-apps/cortex-doc-to-pdf/converter.py +157 -0
  7. package/helper-apps/cortex-doc-to-pdf/docker-compose.yml +23 -0
  8. package/helper-apps/cortex-doc-to-pdf/document_converter.py +181 -0
  9. package/helper-apps/cortex-doc-to-pdf/examples/README.md +252 -0
  10. package/helper-apps/cortex-doc-to-pdf/examples/nodejs-client.js +266 -0
  11. package/helper-apps/cortex-doc-to-pdf/examples/package-lock.json +297 -0
  12. package/helper-apps/cortex-doc-to-pdf/examples/package.json +23 -0
  13. package/helper-apps/cortex-doc-to-pdf/function_app.py +85 -0
  14. package/helper-apps/cortex-doc-to-pdf/host.json +16 -0
  15. package/helper-apps/cortex-doc-to-pdf/request_handlers.py +193 -0
  16. package/helper-apps/cortex-doc-to-pdf/requirements.txt +3 -0
  17. package/helper-apps/cortex-doc-to-pdf/tests/run_tests.sh +26 -0
  18. package/helper-apps/cortex-doc-to-pdf/tests/test_conversion.py +320 -0
  19. package/helper-apps/cortex-doc-to-pdf/tests/test_streaming.py +419 -0
  20. package/helper-apps/cortex-file-handler/package-lock.json +1 -0
  21. package/helper-apps/cortex-file-handler/package.json +1 -0
  22. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +81 -8
  23. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +54 -7
  24. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +19 -7
  25. package/lib/encodeCache.js +5 -0
  26. package/lib/keyValueStorageClient.js +5 -0
  27. package/lib/logger.js +1 -1
  28. package/lib/pathwayTools.js +8 -1
  29. package/lib/redisSubscription.js +6 -0
  30. package/lib/requestExecutor.js +4 -0
  31. package/lib/util.js +88 -0
  32. package/package.json +1 -1
  33. package/pathways/basePathway.js +3 -3
  34. package/pathways/bing_afagent.js +1 -0
  35. package/pathways/gemini_15_vision.js +1 -1
  36. package/pathways/google_cse.js +2 -2
  37. package/pathways/image_gemini_25.js +85 -0
  38. package/pathways/image_prompt_optimizer_gemini_25.js +149 -0
  39. package/pathways/image_qwen.js +28 -0
  40. package/pathways/image_seedream4.js +26 -0
  41. package/pathways/rag.js +1 -1
  42. package/pathways/rag_jarvis.js +1 -1
  43. package/pathways/system/entity/sys_entity_continue.js +1 -1
  44. package/pathways/system/entity/sys_generator_results.js +1 -1
  45. package/pathways/system/entity/tools/sys_tool_google_search.js +15 -2
  46. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +3 -3
  47. package/pathways/system/entity/tools/sys_tool_image.js +28 -23
  48. package/pathways/system/entity/tools/sys_tool_image_gemini.js +135 -0
  49. package/server/graphql.js +9 -2
  50. package/server/modelExecutor.js +4 -0
  51. package/server/pathwayResolver.js +19 -18
  52. package/server/plugins/claude3VertexPlugin.js +13 -8
  53. package/server/plugins/gemini15ChatPlugin.js +15 -10
  54. package/server/plugins/gemini15VisionPlugin.js +2 -23
  55. package/server/plugins/gemini25ImagePlugin.js +155 -0
  56. package/server/plugins/modelPlugin.js +3 -2
  57. package/server/plugins/openAiChatPlugin.js +6 -6
  58. package/server/plugins/replicateApiPlugin.js +268 -12
  59. package/server/plugins/veoVideoPlugin.js +15 -1
  60. package/server/rest.js +2 -0
  61. package/server/typeDef.js +96 -10
  62. package/tests/integration/apptekTranslatePlugin.integration.test.js +1 -1
  63. package/tests/unit/core/pathwayManager.test.js +2 -4
  64. package/tests/unit/plugins/gemini25ImagePlugin.test.js +294 -0
@@ -0,0 +1,320 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ End-to-end tests for document to PDF conversion.
4
+ Tests actual conversion of various file formats and verifies PDF content.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import time
10
+ import subprocess
11
+ from pathlib import Path
12
+ import PyPDF2
13
+ from converter import DocumentConverter
14
+
15
+ # Test configuration
16
+ SAMPLES_DIR = Path(__file__).parent.parent / "samples"
17
+ OUTPUT_DIR = Path(__file__).parent.parent / "test_output"
18
+ OUTPUT_DIR.mkdir(exist_ok=True)
19
+
20
+ class Colors:
21
+ """ANSI color codes for terminal output"""
22
+ GREEN = '\033[92m'
23
+ RED = '\033[91m'
24
+ YELLOW = '\033[93m'
25
+ BLUE = '\033[94m'
26
+ RESET = '\033[0m'
27
+ BOLD = '\033[1m'
28
+
29
+ def print_success(msg):
30
+ print(f"{Colors.GREEN}✓{Colors.RESET} {msg}")
31
+
32
+ def print_error(msg):
33
+ print(f"{Colors.RED}✗{Colors.RESET} {msg}")
34
+
35
+ def print_info(msg):
36
+ print(f"{Colors.BLUE}ℹ{Colors.RESET} {msg}")
37
+
38
+ def print_warning(msg):
39
+ print(f"{Colors.YELLOW}⚠{Colors.RESET} {msg}")
40
+
41
+ def extract_text_from_pdf(pdf_path):
42
+ """Extract text content from a PDF file."""
43
+ try:
44
+ with open(pdf_path, 'rb') as file:
45
+ pdf_reader = PyPDF2.PdfReader(file)
46
+ text = ""
47
+ for page in pdf_reader.pages:
48
+ text += page.extract_text()
49
+ return text.strip()
50
+ except Exception as e:
51
+ print_error(f"Failed to extract text from PDF: {e}")
52
+ return None
53
+
54
+ def get_pdf_info(pdf_path):
55
+ """Get PDF metadata and page count."""
56
+ try:
57
+ with open(pdf_path, 'rb') as file:
58
+ pdf_reader = PyPDF2.PdfReader(file)
59
+ return {
60
+ 'pages': len(pdf_reader.pages),
61
+ 'metadata': pdf_reader.metadata,
62
+ 'is_encrypted': pdf_reader.is_encrypted
63
+ }
64
+ except Exception as e:
65
+ print_error(f"Failed to get PDF info: {e}")
66
+ return None
67
+
68
+ def test_file_conversion(input_file, expected_content_snippets=None, min_pages=1):
69
+ """
70
+ Test conversion of a single file.
71
+
72
+ Args:
73
+ input_file: Path to the input file
74
+ expected_content_snippets: List of strings that should appear in the PDF
75
+ min_pages: Minimum expected number of pages
76
+
77
+ Returns:
78
+ bool: True if test passed, False otherwise
79
+ """
80
+ file_name = input_file.name
81
+ file_ext = input_file.suffix
82
+ print(f"\n{Colors.BOLD}Testing: {file_name}{Colors.RESET}")
83
+ print(f" Format: {file_ext}")
84
+
85
+ # Initialize converter
86
+ try:
87
+ converter = DocumentConverter()
88
+ except RuntimeError as e:
89
+ print_error(f"Converter initialization failed: {e}")
90
+ return False
91
+
92
+ # Check if format is supported
93
+ if not converter.is_supported_format(file_ext):
94
+ print_warning(f"Format {file_ext} is not supported - SKIPPING")
95
+ return True # Not a failure, just unsupported
96
+
97
+ # Convert to PDF
98
+ output_file = OUTPUT_DIR / f"{input_file.stem}.pdf"
99
+ print_info(f"Converting to: {output_file.name}")
100
+
101
+ start_time = time.time()
102
+ try:
103
+ pdf_path = converter.convert_to_pdf(str(input_file), str(OUTPUT_DIR))
104
+ conversion_time = time.time() - start_time
105
+
106
+ if not pdf_path or not os.path.exists(pdf_path):
107
+ print_error("Conversion failed - PDF not created")
108
+ return False
109
+
110
+ print_success(f"Converted in {conversion_time:.2f}s")
111
+
112
+ except Exception as e:
113
+ print_error(f"Conversion failed: {e}")
114
+ return False
115
+
116
+ # Verify PDF was created
117
+ file_size = os.path.getsize(pdf_path)
118
+ if file_size == 0:
119
+ print_error("PDF file is empty (0 bytes)")
120
+ return False
121
+
122
+ print_info(f"PDF size: {file_size:,} bytes")
123
+
124
+ # Get PDF information
125
+ pdf_info = get_pdf_info(pdf_path)
126
+ if not pdf_info:
127
+ print_error("Failed to read PDF metadata")
128
+ return False
129
+
130
+ print_info(f"Pages: {pdf_info['pages']}")
131
+
132
+ # Verify minimum page count
133
+ if pdf_info['pages'] < min_pages:
134
+ print_error(f"Expected at least {min_pages} pages, got {pdf_info['pages']}")
135
+ return False
136
+
137
+ # Extract and verify content
138
+ if expected_content_snippets:
139
+ print_info("Extracting and verifying content...")
140
+ pdf_text = extract_text_from_pdf(pdf_path)
141
+
142
+ if not pdf_text:
143
+ print_warning("Could not extract text from PDF (might be image-based)")
144
+ else:
145
+ print_info(f"Extracted {len(pdf_text)} characters")
146
+
147
+ # Check for expected content
148
+ missing_content = []
149
+ for snippet in expected_content_snippets:
150
+ if snippet.lower() not in pdf_text.lower():
151
+ missing_content.append(snippet)
152
+
153
+ if missing_content:
154
+ print_error(f"Missing expected content: {', '.join(missing_content)}")
155
+ return False
156
+ else:
157
+ print_success(f"All {len(expected_content_snippets)} content checks passed")
158
+
159
+ print_success(f"✓ {file_name} conversion PASSED")
160
+ return True
161
+
162
+ def test_text_files():
163
+ """Test text file conversions"""
164
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
165
+ print(f"{Colors.BOLD}Testing TEXT Files{Colors.RESET}")
166
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
167
+
168
+ tests = [
169
+ ("ascii-art.txt", None),
170
+ ("data.txt", None),
171
+ ("long-doc.txt", None),
172
+ ("multilang.txt", None),
173
+ ]
174
+
175
+ results = []
176
+ for file_name, expected_content in tests:
177
+ file_path = SAMPLES_DIR / file_name
178
+ if file_path.exists():
179
+ results.append(test_file_conversion(file_path, expected_content))
180
+ else:
181
+ print_warning(f"File not found: {file_name}")
182
+
183
+ return results
184
+
185
+ def test_office_documents():
186
+ """Test Microsoft Office document conversions"""
187
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
188
+ print(f"{Colors.BOLD}Testing OFFICE Documents{Colors.RESET}")
189
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
190
+
191
+ tests = [
192
+ ("file-sample_1MB.docx", ["document"], 1),
193
+ ("file_example_XLSX_5000.xlsx", None, 1),
194
+ ("file_example_PPT_1MB.ppt", None, 1),
195
+ ("powerful_gen1_pokemon__20251002T065535Z_6ab329cc.pptx", ["pokemon"], 1),
196
+ ("Powerful_Pokemon_Gen1__20251002T054021Z_dbe0091f.pptx", ["pokemon"], 1),
197
+ ]
198
+
199
+ results = []
200
+ for test_data in tests:
201
+ file_name = test_data[0]
202
+ expected_content = test_data[1] if len(test_data) > 1 else None
203
+ min_pages = test_data[2] if len(test_data) > 2 else 1
204
+
205
+ file_path = SAMPLES_DIR / file_name
206
+ if file_path.exists():
207
+ results.append(test_file_conversion(file_path, expected_content, min_pages))
208
+ else:
209
+ print_warning(f"File not found: {file_name}")
210
+
211
+ return results
212
+
213
+ def test_html_files():
214
+ """Test HTML file conversions"""
215
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
216
+ print(f"{Colors.BOLD}Testing HTML Files{Colors.RESET}")
217
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
218
+
219
+ tests = [
220
+ ("sample1.html", None),
221
+ ("sample2.html", None),
222
+ ]
223
+
224
+ results = []
225
+ for file_name, expected_content in tests:
226
+ file_path = SAMPLES_DIR / file_name
227
+ if file_path.exists():
228
+ results.append(test_file_conversion(file_path, expected_content))
229
+ else:
230
+ print_warning(f"File not found: {file_name}")
231
+
232
+ return results
233
+
234
+ def test_pdf_files():
235
+ """Test PDF file handling (should pass through or re-process)"""
236
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
237
+ print(f"{Colors.BOLD}Testing PDF Files{Colors.RESET}")
238
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
239
+
240
+ tests = [
241
+ ("quote_of_the_day__20251002T064921Z_af063c8e.pdf", None),
242
+ ]
243
+
244
+ results = []
245
+ for file_name, expected_content in tests:
246
+ file_path = SAMPLES_DIR / file_name
247
+ if file_path.exists():
248
+ # PDFs might not be supported for conversion (already PDF)
249
+ print_info(f"Testing: {file_name}")
250
+ print_warning("PDF files may not require conversion - SKIPPING")
251
+ results.append(True)
252
+ else:
253
+ print_warning(f"File not found: {file_name}")
254
+
255
+ return results
256
+
257
+ def check_libreoffice():
258
+ """Check if LibreOffice is installed"""
259
+ try:
260
+ converter = DocumentConverter()
261
+ print_success(f"LibreOffice found at: {converter.libreoffice_path}")
262
+ return True
263
+ except RuntimeError as e:
264
+ print_error(f"LibreOffice not found: {e}")
265
+ print_info("Please install LibreOffice:")
266
+ print_info(" macOS: brew install --cask libreoffice")
267
+ print_info(" Ubuntu/Debian: sudo apt-get install libreoffice")
268
+ return False
269
+
270
+ def main():
271
+ """Run all tests"""
272
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
273
+ print(f"{Colors.BOLD}Document to PDF Conversion Tests{Colors.RESET}")
274
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
275
+
276
+ # Check prerequisites
277
+ if not check_libreoffice():
278
+ sys.exit(1)
279
+
280
+ if not SAMPLES_DIR.exists():
281
+ print_error(f"Samples directory not found: {SAMPLES_DIR}")
282
+ sys.exit(1)
283
+
284
+ print_info(f"Samples directory: {SAMPLES_DIR}")
285
+ print_info(f"Output directory: {OUTPUT_DIR}")
286
+
287
+ # Run all test suites
288
+ all_results = []
289
+
290
+ all_results.extend(test_text_files())
291
+ all_results.extend(test_office_documents())
292
+ all_results.extend(test_html_files())
293
+ all_results.extend(test_pdf_files())
294
+
295
+ # Print summary
296
+ print(f"\n{Colors.BOLD}{'='*60}{Colors.RESET}")
297
+ print(f"{Colors.BOLD}Test Summary{Colors.RESET}")
298
+ print(f"{Colors.BOLD}{'='*60}{Colors.RESET}")
299
+
300
+ passed = sum(1 for r in all_results if r)
301
+ failed = sum(1 for r in all_results if not r)
302
+ total = len(all_results)
303
+
304
+ print(f"\nTotal tests: {total}")
305
+ print_success(f"Passed: {passed}")
306
+ if failed > 0:
307
+ print_error(f"Failed: {failed}")
308
+
309
+ success_rate = (passed / total * 100) if total > 0 else 0
310
+ print(f"\nSuccess rate: {success_rate:.1f}%")
311
+
312
+ if failed == 0:
313
+ print(f"\n{Colors.GREEN}{Colors.BOLD}🎉 All tests passed!{Colors.RESET}")
314
+ return 0
315
+ else:
316
+ print(f"\n{Colors.RED}{Colors.BOLD}❌ Some tests failed{Colors.RESET}")
317
+ return 1
318
+
319
+ if __name__ == "__main__":
320
+ sys.exit(main())