@aj-archipelago/cortex 1.3.62 → 1.3.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/.github/workflows/cortex-file-handler-test.yml +61 -0
  2. package/README.md +31 -7
  3. package/config/default.example.json +15 -0
  4. package/config.js +133 -12
  5. package/helper-apps/cortex-autogen2/DigiCertGlobalRootCA.crt.pem +22 -0
  6. package/helper-apps/cortex-autogen2/Dockerfile +31 -0
  7. package/helper-apps/cortex-autogen2/Dockerfile.worker +41 -0
  8. package/helper-apps/cortex-autogen2/README.md +183 -0
  9. package/helper-apps/cortex-autogen2/__init__.py +1 -0
  10. package/helper-apps/cortex-autogen2/agents.py +131 -0
  11. package/helper-apps/cortex-autogen2/docker-compose.yml +20 -0
  12. package/helper-apps/cortex-autogen2/function_app.py +55 -0
  13. package/helper-apps/cortex-autogen2/host.json +15 -0
  14. package/helper-apps/cortex-autogen2/main.py +126 -0
  15. package/helper-apps/cortex-autogen2/poetry.lock +3652 -0
  16. package/helper-apps/cortex-autogen2/pyproject.toml +36 -0
  17. package/helper-apps/cortex-autogen2/requirements.txt +20 -0
  18. package/helper-apps/cortex-autogen2/send_task.py +105 -0
  19. package/helper-apps/cortex-autogen2/services/__init__.py +1 -0
  20. package/helper-apps/cortex-autogen2/services/azure_queue.py +85 -0
  21. package/helper-apps/cortex-autogen2/services/redis_publisher.py +153 -0
  22. package/helper-apps/cortex-autogen2/task_processor.py +488 -0
  23. package/helper-apps/cortex-autogen2/tools/__init__.py +24 -0
  24. package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +175 -0
  25. package/helper-apps/cortex-autogen2/tools/azure_foundry_agents.py +601 -0
  26. package/helper-apps/cortex-autogen2/tools/coding_tools.py +72 -0
  27. package/helper-apps/cortex-autogen2/tools/download_tools.py +48 -0
  28. package/helper-apps/cortex-autogen2/tools/file_tools.py +545 -0
  29. package/helper-apps/cortex-autogen2/tools/search_tools.py +646 -0
  30. package/helper-apps/cortex-azure-cleaner/README.md +36 -0
  31. package/helper-apps/cortex-file-converter/README.md +93 -0
  32. package/helper-apps/cortex-file-converter/key_to_pdf.py +104 -0
  33. package/helper-apps/cortex-file-converter/list_blob_extensions.py +89 -0
  34. package/helper-apps/cortex-file-converter/process_azure_keynotes.py +181 -0
  35. package/helper-apps/cortex-file-converter/requirements.txt +1 -0
  36. package/helper-apps/cortex-file-handler/.env.test.azure.ci +7 -0
  37. package/helper-apps/cortex-file-handler/.env.test.azure.sample +1 -1
  38. package/helper-apps/cortex-file-handler/.env.test.gcs.ci +10 -0
  39. package/helper-apps/cortex-file-handler/.env.test.gcs.sample +2 -2
  40. package/helper-apps/cortex-file-handler/INTERFACE.md +41 -0
  41. package/helper-apps/cortex-file-handler/package.json +1 -1
  42. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +41 -17
  43. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +30 -15
  44. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +32 -6
  45. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +24 -2
  46. package/helper-apps/cortex-file-handler/scripts/validate-env.js +128 -0
  47. package/helper-apps/cortex-file-handler/src/blobHandler.js +161 -51
  48. package/helper-apps/cortex-file-handler/src/constants.js +3 -0
  49. package/helper-apps/cortex-file-handler/src/fileChunker.js +10 -8
  50. package/helper-apps/cortex-file-handler/src/index.js +116 -9
  51. package/helper-apps/cortex-file-handler/src/redis.js +61 -1
  52. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +11 -8
  53. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +2 -2
  54. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +88 -6
  55. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +58 -0
  56. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +25 -5
  57. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +9 -0
  58. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +120 -16
  59. package/helper-apps/cortex-file-handler/src/start.js +27 -17
  60. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +52 -1
  61. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +40 -0
  62. package/helper-apps/cortex-file-handler/tests/checkHashShortLived.test.js +553 -0
  63. package/helper-apps/cortex-file-handler/tests/cleanup.test.js +46 -52
  64. package/helper-apps/cortex-file-handler/tests/containerConversionFlow.test.js +451 -0
  65. package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +229 -0
  66. package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +392 -0
  67. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +7 -2
  68. package/helper-apps/cortex-file-handler/tests/deleteOperations.test.js +348 -0
  69. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +23 -2
  70. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +11 -5
  71. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +58 -24
  72. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +11 -4
  73. package/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js +225 -0
  74. package/helper-apps/cortex-file-handler/tests/start.test.js +8 -12
  75. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +80 -0
  76. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +388 -22
  77. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -0
  78. package/lib/cortexResponse.js +153 -0
  79. package/lib/entityConstants.js +21 -3
  80. package/lib/logger.js +21 -4
  81. package/lib/pathwayTools.js +28 -9
  82. package/lib/util.js +49 -0
  83. package/package.json +1 -1
  84. package/pathways/basePathway.js +1 -0
  85. package/pathways/bing_afagent.js +54 -1
  86. package/pathways/call_tools.js +2 -3
  87. package/pathways/chat_jarvis.js +1 -1
  88. package/pathways/google_cse.js +27 -0
  89. package/pathways/grok_live_search.js +18 -0
  90. package/pathways/system/entity/memory/sys_memory_lookup_required.js +1 -0
  91. package/pathways/system/entity/memory/sys_memory_required.js +1 -0
  92. package/pathways/system/entity/memory/sys_search_memory.js +1 -0
  93. package/pathways/system/entity/sys_entity_agent.js +56 -4
  94. package/pathways/system/entity/sys_generator_quick.js +1 -0
  95. package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +26 -0
  96. package/pathways/system/entity/tools/sys_tool_google_search.js +141 -0
  97. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +237 -0
  98. package/pathways/system/entity/tools/sys_tool_image.js +1 -1
  99. package/pathways/system/rest_streaming/sys_claude_37_sonnet.js +21 -0
  100. package/pathways/system/rest_streaming/sys_claude_41_opus.js +21 -0
  101. package/pathways/system/rest_streaming/sys_claude_4_sonnet.js +21 -0
  102. package/pathways/system/rest_streaming/sys_google_gemini_25_flash.js +25 -0
  103. package/pathways/system/rest_streaming/{sys_google_gemini_chat.js → sys_google_gemini_25_pro.js} +6 -4
  104. package/pathways/system/rest_streaming/sys_grok_4.js +23 -0
  105. package/pathways/system/rest_streaming/sys_grok_4_fast_non_reasoning.js +23 -0
  106. package/pathways/system/rest_streaming/sys_grok_4_fast_reasoning.js +23 -0
  107. package/pathways/system/rest_streaming/sys_openai_chat.js +3 -0
  108. package/pathways/system/rest_streaming/sys_openai_chat_gpt41.js +22 -0
  109. package/pathways/system/rest_streaming/sys_openai_chat_gpt41_mini.js +21 -0
  110. package/pathways/system/rest_streaming/sys_openai_chat_gpt41_nano.js +21 -0
  111. package/pathways/system/rest_streaming/{sys_claude_35_sonnet.js → sys_openai_chat_gpt4_omni.js} +6 -4
  112. package/pathways/system/rest_streaming/sys_openai_chat_gpt4_omni_mini.js +21 -0
  113. package/pathways/system/rest_streaming/{sys_claude_3_haiku.js → sys_openai_chat_gpt5.js} +7 -5
  114. package/pathways/system/rest_streaming/sys_openai_chat_gpt5_chat.js +21 -0
  115. package/pathways/system/rest_streaming/sys_openai_chat_gpt5_mini.js +21 -0
  116. package/pathways/system/rest_streaming/sys_openai_chat_gpt5_nano.js +21 -0
  117. package/pathways/system/rest_streaming/{sys_openai_chat_o1.js → sys_openai_chat_o3.js} +6 -3
  118. package/pathways/system/rest_streaming/sys_openai_chat_o3_mini.js +3 -0
  119. package/pathways/system/workspaces/run_workspace_prompt.js +99 -0
  120. package/pathways/vision.js +1 -1
  121. package/server/graphql.js +1 -1
  122. package/server/modelExecutor.js +8 -0
  123. package/server/pathwayResolver.js +166 -16
  124. package/server/pathwayResponseParser.js +16 -8
  125. package/server/plugins/azureFoundryAgentsPlugin.js +1 -1
  126. package/server/plugins/claude3VertexPlugin.js +193 -45
  127. package/server/plugins/gemini15ChatPlugin.js +21 -0
  128. package/server/plugins/gemini15VisionPlugin.js +360 -0
  129. package/server/plugins/googleCsePlugin.js +94 -0
  130. package/server/plugins/grokVisionPlugin.js +365 -0
  131. package/server/plugins/modelPlugin.js +3 -1
  132. package/server/plugins/openAiChatPlugin.js +106 -13
  133. package/server/plugins/openAiVisionPlugin.js +42 -30
  134. package/server/resolver.js +28 -4
  135. package/server/rest.js +270 -53
  136. package/server/typeDef.js +1 -0
  137. package/tests/{mocks.js → helpers/mocks.js} +5 -2
  138. package/tests/{server.js → helpers/server.js} +2 -2
  139. package/tests/helpers/sseAssert.js +23 -0
  140. package/tests/helpers/sseClient.js +73 -0
  141. package/tests/helpers/subscriptionAssert.js +11 -0
  142. package/tests/helpers/subscriptions.js +113 -0
  143. package/tests/{sublong.srt → integration/features/translate/sublong.srt} +4543 -4543
  144. package/tests/integration/features/translate/translate_chunking_stream.test.js +100 -0
  145. package/tests/{translate_srt.test.js → integration/features/translate/translate_srt.test.js} +2 -2
  146. package/tests/integration/graphql/async/stream/agentic.test.js +477 -0
  147. package/tests/integration/graphql/async/stream/subscription_streaming.test.js +62 -0
  148. package/tests/integration/graphql/async/stream/sys_entity_start_streaming.test.js +71 -0
  149. package/tests/integration/graphql/async/stream/vendors/claude_streaming.test.js +56 -0
  150. package/tests/integration/graphql/async/stream/vendors/gemini_streaming.test.js +66 -0
  151. package/tests/integration/graphql/async/stream/vendors/grok_streaming.test.js +56 -0
  152. package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +72 -0
  153. package/tests/integration/graphql/features/google/sysToolGoogleSearch.test.js +96 -0
  154. package/tests/integration/graphql/features/grok/grok.test.js +688 -0
  155. package/tests/integration/graphql/features/grok/grok_x_search_tool.test.js +354 -0
  156. package/tests/{main.test.js → integration/graphql/features/main.test.js} +1 -1
  157. package/tests/{call_tools.test.js → integration/graphql/features/tools/call_tools.test.js} +2 -2
  158. package/tests/{vision.test.js → integration/graphql/features/vision/vision.test.js} +1 -1
  159. package/tests/integration/graphql/subscriptions/connection.test.js +26 -0
  160. package/tests/{openai_api.test.js → integration/rest/oai/openai_api.test.js} +63 -238
  161. package/tests/integration/rest/oai/tool_calling_api.test.js +343 -0
  162. package/tests/integration/rest/oai/tool_calling_streaming.test.js +85 -0
  163. package/tests/integration/rest/vendors/claude_streaming.test.js +47 -0
  164. package/tests/integration/rest/vendors/claude_tool_calling_streaming.test.js +75 -0
  165. package/tests/integration/rest/vendors/gemini_streaming.test.js +47 -0
  166. package/tests/integration/rest/vendors/gemini_tool_calling_streaming.test.js +75 -0
  167. package/tests/integration/rest/vendors/grok_streaming.test.js +55 -0
  168. package/tests/integration/rest/vendors/grok_tool_calling_streaming.test.js +75 -0
  169. package/tests/{azureAuthTokenHelper.test.js → unit/core/azureAuthTokenHelper.test.js} +1 -1
  170. package/tests/{chunkfunction.test.js → unit/core/chunkfunction.test.js} +2 -2
  171. package/tests/{config.test.js → unit/core/config.test.js} +3 -3
  172. package/tests/{encodeCache.test.js → unit/core/encodeCache.test.js} +1 -1
  173. package/tests/{fastLruCache.test.js → unit/core/fastLruCache.test.js} +1 -1
  174. package/tests/{handleBars.test.js → unit/core/handleBars.test.js} +1 -1
  175. package/tests/{memoryfunction.test.js → unit/core/memoryfunction.test.js} +2 -2
  176. package/tests/unit/core/mergeResolver.test.js +952 -0
  177. package/tests/{parser.test.js → unit/core/parser.test.js} +3 -3
  178. package/tests/unit/core/pathwayResolver.test.js +187 -0
  179. package/tests/{requestMonitor.test.js → unit/core/requestMonitor.test.js} +1 -1
  180. package/tests/{requestMonitorDurationEstimator.test.js → unit/core/requestMonitorDurationEstimator.test.js} +1 -1
  181. package/tests/{truncateMessages.test.js → unit/core/truncateMessages.test.js} +3 -3
  182. package/tests/{util.test.js → unit/core/util.test.js} +1 -1
  183. package/tests/{apptekTranslatePlugin.test.js → unit/plugins/apptekTranslatePlugin.test.js} +3 -3
  184. package/tests/{azureFoundryAgents.test.js → unit/plugins/azureFoundryAgents.test.js} +136 -1
  185. package/tests/{claude3VertexPlugin.test.js → unit/plugins/claude3VertexPlugin.test.js} +32 -10
  186. package/tests/{claude3VertexToolConversion.test.js → unit/plugins/claude3VertexToolConversion.test.js} +3 -3
  187. package/tests/unit/plugins/googleCsePlugin.test.js +111 -0
  188. package/tests/unit/plugins/grokVisionPlugin.test.js +1392 -0
  189. package/tests/{modelPlugin.test.js → unit/plugins/modelPlugin.test.js} +3 -3
  190. package/tests/{multimodal_conversion.test.js → unit/plugins/multimodal_conversion.test.js} +4 -4
  191. package/tests/{openAiChatPlugin.test.js → unit/plugins/openAiChatPlugin.test.js} +13 -4
  192. package/tests/{openAiToolPlugin.test.js → unit/plugins/openAiToolPlugin.test.js} +35 -27
  193. package/tests/{tokenHandlingTests.test.js → unit/plugins/tokenHandlingTests.test.js} +5 -5
  194. package/tests/{translate_apptek.test.js → unit/plugins/translate_apptek.test.js} +3 -3
  195. package/tests/{streaming.test.js → unit/plugins.streaming/plugin_stream_events.test.js} +19 -58
  196. package/helper-apps/mogrt-handler/tests/test-files/test.gif +0 -1
  197. package/helper-apps/mogrt-handler/tests/test-files/test.mogrt +0 -1
  198. package/helper-apps/mogrt-handler/tests/test-files/test.mp4 +0 -1
  199. package/pathways/system/rest_streaming/sys_openai_chat_gpt4.js +0 -19
  200. package/pathways/system/rest_streaming/sys_openai_chat_gpt4_32.js +0 -19
  201. package/pathways/system/rest_streaming/sys_openai_chat_gpt4_turbo.js +0 -19
  202. package/pathways/system/workspaces/run_claude35_sonnet.js +0 -21
  203. package/pathways/system/workspaces/run_claude3_haiku.js +0 -20
  204. package/pathways/system/workspaces/run_gpt35turbo.js +0 -20
  205. package/pathways/system/workspaces/run_gpt4.js +0 -20
  206. package/pathways/system/workspaces/run_gpt4_32.js +0 -20
  207. package/tests/agentic.test.js +0 -256
  208. package/tests/pathwayResolver.test.js +0 -78
  209. package/tests/subscription.test.js +0 -387
  210. /package/tests/{subchunk.srt → integration/features/translate/subchunk.srt} +0 -0
  211. /package/tests/{subhorizontal.srt → integration/features/translate/subhorizontal.srt} +0 -0
@@ -0,0 +1,93 @@
1
+ # Keynote to PDF Conversion Utilities
2
+
3
+ This directory contains utilities related to converting Apple Keynote (`.key`) files to PDF format, particularly focusing on processing files stored in Azure Blob Storage.
4
+
5
+ ## `process_azure_keynotes.py`
6
+
7
+ ### Purpose
8
+
9
+ This script connects to an Azure Blob Storage container (specified by the `AZURE_STORAGE_CONTAINER` environment variable), searches for Keynote (`.key`) files, converts them to PDF format if a PDF version doesn't already exist, and uploads the resulting PDF back to the same location in the container.
10
+
11
+ ### Functionality
12
+
13
+ 1. **Connects** to the Azure Blob Storage account specified by the `AZURE_STORAGE_CONNECTION_STRING` environment variable.
14
+ 2. **Lists** all blobs within the container specified by the `AZURE_STORAGE_CONTAINER` environment variable.
15
+ 3. **Identifies** `.key` files.
16
+ 4. **Checks** if a corresponding `.pdf` file (with the same base name) already exists in the container.
17
+ 5. **Skips** processing if the `.pdf` file already exists.
18
+ 6. **Downloads** the `.key` file to a temporary local directory if no `.pdf` exists.
19
+ 7. **Converts** the downloaded `.key` file to PDF using an embedded AppleScript function (adapted from `key_to_pdf.py`). This conversion uses the "Good" image quality setting to potentially reduce file size.
20
+ 8. **Uploads** the generated `.pdf` file back to the original path within the Azure container, replacing the `.key` extension with `.pdf`.
21
+ 9. **Logs** progress, skips, and any errors encountered during the process.
22
+ 10. **Prints** a summary report upon completion.
23
+
24
+ ### Prerequisites
25
+
26
+ * **macOS:** The script relies on AppleScript to interact with the Keynote application.
27
+ * **Keynote Application:** Apple Keynote must be installed.
28
+ * **Python 3.x:** The script is written for Python 3.
29
+ * **Python Libraries:** Requires the `azure-storage-blob` library. Install dependencies using the main `requirements.txt` file in the project root: `pip install -r ../../requirements.txt` (adjust path as necessary relative to the main project root).
30
+ * **Azure Connection String:** The environment variable `AZURE_STORAGE_CONNECTION_STRING` must be set to a valid connection string for the Azure Storage account containing the target container.
31
+ * **Azure Container Name:** The environment variable `AZURE_STORAGE_CONTAINER` must be set to the name of the target container.
32
+ * **Automation Permissions:** You might need to grant permission for your terminal application (e.g., Terminal, iTerm) or Python itself to control Keynote. Check `System Settings` > `Privacy & Security` > `Automation`.
33
+
34
+ ### Usage
35
+
36
+ 1. Ensure all prerequisites are met.
37
+ 2. Navigate to the main project root directory in your terminal.
38
+ 3. Set the Azure connection string environment variable:
39
+ ```bash
40
+ export AZURE_STORAGE_CONNECTION_STRING="<your_azure_storage_connection_string>"
41
+ export AZURE_STORAGE_CONTAINER="<your_target_container_name>"
42
+ ```
43
+ 4. Run the script:
44
+ ```bash
45
+ python helper-apps/cortex-file-converter/process_azure_keynotes.py
46
+ ```
47
+
48
+ ## `key_to_pdf.py`
49
+
50
+ This is a standalone command-line utility for converting a single local Keynote file to PDF using AppleScript. It was the basis for the conversion logic now embedded within `process_azure_keynotes.py`.
51
+
52
+ ### Usage (Standalone)
53
+
54
+ ```bash
55
+ python helper-apps/cortex-file-converter/key_to_pdf.py <input_keynote_file.key> [-o <output_pdf_file.pdf>]
56
+ ```
57
+
58
+ If the output path (`-o`) is omitted, the PDF will be saved in the same directory as the input file with a `.pdf` extension.
59
+
60
+ ## `list_blob_extensions.py`
61
+
62
+ ### Purpose
63
+
64
+ This script connects to an Azure Blob Storage container (specified by the `AZURE_STORAGE_CONTAINER` environment variable), lists all the blobs within it, and reports the unique file extensions found. It also marks extensions known to be typically unsupported for content cracking by the default Azure Cognitive Search indexer configuration.
65
+
66
+ ### Functionality
67
+
68
+ 1. **Connects** to the Azure Blob Storage account specified by the `AZURE_STORAGE_CONNECTION_STRING` environment variable and the container specified by `AZURE_STORAGE_CONTAINER`.
69
+ 2. **Lists** all blobs within the specified container.
70
+ 3. **Extracts** the file extension from each blob name.
71
+ 4. **Collects** all unique extensions found.
72
+ 5. **Prints** a list of unique extensions, marking those present in its predefined `UNSUPPORTED_EXTENSIONS_KNOWN` set.
73
+
74
+ ### Prerequisites
75
+
76
+ * **Python 3.x:** The script is written for Python 3.
77
+ * **Python Libraries:** Requires the `azure-storage-blob` library. Install dependencies using: `pip install -r requirements.txt`.
78
+ * **Azure Connection String:** The environment variable `AZURE_STORAGE_CONNECTION_STRING` must be set.
79
+ * **Azure Container Name:** The environment variable `AZURE_STORAGE_CONTAINER` must be set to the name of the target container.
80
+
81
+ ### Usage
82
+
83
+ 1. Ensure all prerequisites are met.
84
+ 2. Navigate to the `helper-apps/cortex-file-converter` directory in your terminal.
85
+ 3. Set the required environment variables:
86
+ ```bash
87
+ export AZURE_STORAGE_CONNECTION_STRING="<your_azure_storage_connection_string>"
88
+ export AZURE_STORAGE_CONTAINER="<your_target_container_name>"
89
+ ```
90
+ 4. Run the script:
91
+ ```bash
92
+ python list_blob_extensions.py
93
+ ```
@@ -0,0 +1,104 @@
1
+ import argparse
2
+ import subprocess
3
+ import os
4
+ import sys
5
+
6
+ def convert_key_to_pdf(key_path, pdf_path):
7
+ """
8
+ Converts a Keynote file (.key) to PDF using AppleScript.
9
+ """
10
+ if not os.path.exists(key_path):
11
+ print(f"Error: Input file not found: {key_path}", file=sys.stderr)
12
+ sys.exit(1)
13
+
14
+ # Ensure the output directory exists
15
+ output_dir = os.path.dirname(pdf_path)
16
+ if output_dir and not os.path.exists(output_dir):
17
+ try:
18
+ os.makedirs(output_dir)
19
+ except OSError as e:
20
+ print(f"Error creating output directory {output_dir}: {e}", file=sys.stderr)
21
+ sys.exit(1)
22
+
23
+ # Check if output PDF exists and delete it
24
+ if os.path.exists(pdf_path):
25
+ try:
26
+ os.remove(pdf_path)
27
+ print(f"Removed existing output file: {pdf_path}")
28
+ except OSError as e:
29
+ print(f"Error removing existing output file {pdf_path}: {e}", file=sys.stderr)
30
+ # Decide if this should be fatal or just a warning
31
+ # sys.exit(1)
32
+
33
+ applescript = f'''
34
+ tell application "Keynote"
35
+ try
36
+ set theDocument to open POSIX file "{key_path}"
37
+ if not (exists theDocument) then error "Failed to open document."
38
+
39
+ set pdf_export_settings to {{PDF image quality:Good}} -- Define settings as record (escaped braces)
40
+
41
+ with timeout of 1200 seconds -- Allow 20 minutes for export
42
+ export theDocument to POSIX file "{pdf_path}" as PDF with properties pdf_export_settings -- Use settings record
43
+ end timeout
44
+
45
+ close theDocument saving no
46
+ log "Successfully exported {key_path} to {pdf_path}"
47
+ on error errMsg number errNum
48
+ log "AppleScript Error: " & errMsg & " (Number: " & errNum & ")"
49
+ # Try to quit Keynote even if there was an error during export/close
50
+ try
51
+ if exists theDocument then
52
+ close theDocument saving no
53
+ end if
54
+ end try
55
+ error "Keynote conversion failed: " & errMsg number errNum
56
+ end try
57
+ -- Optional: Quit Keynote after conversion
58
+ -- quit
59
+ end tell
60
+ '''
61
+
62
+ try:
63
+ # Using osascript to run the AppleScript
64
+ process = subprocess.run(['osascript', '-e', applescript],
65
+ capture_output=True, text=True, check=True, timeout=1260) # Add a slightly longer timeout for the process itself
66
+ print(f"Successfully converted '{key_path}' to '{pdf_path}'")
67
+ # Print Keynote's log messages if needed
68
+ # print("AppleScript Output:\n", process.stdout)
69
+ # print("AppleScript Errors:\n", process.stderr) # osascript might put log messages here too
70
+ except subprocess.CalledProcessError as e:
71
+ print(f"Error executing AppleScript: {e}", file=sys.stderr)
72
+ print(f"stdout:\n{e.stdout}", file=sys.stderr)
73
+ print(f"stderr:\n{e.stderr}", file=sys.stderr)
74
+ sys.exit(1)
75
+ except subprocess.TimeoutExpired:
76
+ print(f"Error: AppleScript execution timed out for {key_path}", file=sys.stderr)
77
+ # It's hard to reliably kill the Keynote process started by AppleScript here,
78
+ # as Keynote might still be hung. Manual intervention might be needed.
79
+ sys.exit(1)
80
+ except Exception as e:
81
+ print(f"An unexpected Python error occurred: {e}", file=sys.stderr)
82
+ sys.exit(1)
83
+
84
+
85
+ def main():
86
+ parser = argparse.ArgumentParser(description="Convert Keynote (.key) files to PDF.")
87
+ parser.add_argument("input_key_file", help="Path to the input Keynote file.")
88
+ parser.add_argument("-o", "--output", help="Path for the output PDF file. Defaults to the same name as the input file but with a .pdf extension.")
89
+
90
+ args = parser.parse_args()
91
+
92
+ input_path = os.path.abspath(args.input_key_file)
93
+
94
+ if args.output:
95
+ output_path = os.path.abspath(args.output)
96
+ else:
97
+ # Default output path
98
+ base_name = os.path.splitext(input_path)[0]
99
+ output_path = base_name + ".pdf"
100
+
101
+ convert_key_to_pdf(input_path, output_path)
102
+
103
+ if __name__ == "__main__":
104
+ main()
@@ -0,0 +1,89 @@
1
+ import os
2
+ import sys
3
+ from azure.storage.blob import BlobServiceClient
4
+ from pathlib import Path
5
+
6
+ # Set of known extensions typically NOT supported for content cracking by Azure Cognitive Search (default configuration)
7
+ # Check Azure docs for the official list of *supported* formats.
8
+ # This list focuses on common formats often not processed for text content.
9
+ UNSUPPORTED_EXTENSIONS_KNOWN = {
10
+ 'key', # Apple Keynote (as requested)
11
+ 'zip', # Archive formats often need specific skills/configurations
12
+ 'rar',
13
+ 'gz',
14
+ 'tar',
15
+ '7z',
16
+ 'pkg',
17
+ 'dmg', # Disk images
18
+ 'iso',
19
+ 'exe', # Executables
20
+ 'dll',
21
+ 'mp4', # Video formats
22
+ 'mov',
23
+ 'avi',
24
+ 'wmv',
25
+ 'mp3', # Audio formats
26
+ 'wav',
27
+ 'aac',
28
+ 'pyc', # Compiled Python
29
+ 'class', # Compiled Java
30
+ 'o', # Compiled C/C++ object files
31
+ 'a', # Static libraries
32
+ 'so', # Shared libraries (Linux)
33
+ # Add other known unsupported types if needed
34
+ }
35
+
36
+ def main():
37
+ # --- Configuration ---
38
+ try:
39
+ connection_string = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
40
+ container_name = os.environ["AZURE_STORAGE_CONTAINER"]
41
+ except KeyError as e:
42
+ print(f"Error: Required environment variable {e} is not set.", file=sys.stderr)
43
+ sys.exit(1)
44
+
45
+ # ---------------------
46
+
47
+ print(f"Connecting to Azure Blob Storage container: {container_name}")
48
+ try:
49
+ blob_service_client = BlobServiceClient.from_connection_string(connection_string)
50
+ container_client = blob_service_client.get_container_client(container_name)
51
+ # Check if container exists
52
+ container_client.get_container_properties()
53
+ except Exception as e:
54
+ print(f"Error connecting to or accessing container '{container_name}': {e}", file=sys.stderr)
55
+ sys.exit(1)
56
+
57
+ print("Listing blobs and collecting extensions...")
58
+ unique_extensions = set()
59
+ blob_count = 0
60
+ try:
61
+ blob_list = container_client.list_blobs()
62
+ for blob in blob_list:
63
+ blob_count += 1
64
+ suffix = Path(blob.name).suffix.lower()
65
+ if suffix: # Only process if there is an extension
66
+ # Store extension without the leading dot
67
+ extension = suffix[1:]
68
+ unique_extensions.add(extension)
69
+ if blob_count % 10000 == 0:
70
+ print(f" Processed {blob_count} blobs...")
71
+
72
+ print(f"Finished processing {blob_count} blobs.")
73
+ except Exception as e:
74
+ print(f"Error listing blobs in container '{container_name}': {e}", file=sys.stderr)
75
+ sys.exit(1)
76
+
77
+ print("\n" + "-"*30)
78
+ print(f"Found {len(unique_extensions)} unique file extensions:")
79
+ print("" + "-"*30)
80
+
81
+ # Sort extensions for consistent output
82
+ sorted_extensions = sorted(list(unique_extensions))
83
+
84
+ for ext in sorted_extensions:
85
+ marker = "(Unsupported by Indexer)" if ext in UNSUPPORTED_EXTENSIONS_KNOWN else ""
86
+ print(f".{ext} {marker}")
87
+
88
+ if __name__ == "__main__":
89
+ main()
@@ -0,0 +1,181 @@
1
+ import os
2
+ import subprocess
3
+ import sys
4
+ import tempfile
5
+ from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
6
+ from pathlib import Path
7
+
8
+ # --- Conversion Function (Adapted from key_to_pdf.py) ---
9
+ class ConversionError(Exception):
10
+ """Custom exception for conversion failures."""
11
+ pass
12
+
13
+ def convert_key_to_pdf(key_path, pdf_path):
14
+ """
15
+ Converts a Keynote file (.key) to PDF using AppleScript.
16
+ Raises FileNotFoundError if input doesn't exist.
17
+ Raises ConversionError if AppleScript execution fails.
18
+ """
19
+ if not os.path.exists(key_path):
20
+ raise FileNotFoundError(f"Input Keynote file not found: {key_path}")
21
+
22
+ # Ensure the output directory exists
23
+ output_dir = os.path.dirname(pdf_path)
24
+ if output_dir and not os.path.exists(output_dir):
25
+ try:
26
+ os.makedirs(output_dir)
27
+ except OSError as e:
28
+ # Raise an error if we can't create the output dir
29
+ raise ConversionError(f"Cannot create output directory {output_dir}: {e}")
30
+
31
+ # Check if output PDF exists and delete it (optional, might be handled by caller)
32
+ # if os.path.exists(pdf_path):
33
+ # try:
34
+ # os.remove(pdf_path)
35
+ # except OSError as e:
36
+ # raise ConversionError(f"Cannot remove existing output file {pdf_path}: {e}")
37
+
38
+ applescript = f'''
39
+ tell application "Keynote"
40
+ try
41
+ set theDocument to open POSIX file "{key_path}"
42
+ if not (exists theDocument) then error "Failed to open document."
43
+
44
+ set pdf_export_settings to {{PDF image quality:Good}} -- Define settings as record (escaped braces)
45
+
46
+ with timeout of 1200 seconds -- Allow 20 minutes for export
47
+ export theDocument to POSIX file "{pdf_path}" as PDF with properties pdf_export_settings -- Use settings record
48
+ end timeout
49
+
50
+ close theDocument saving no
51
+ -- log "Successfully exported {key_path} to {pdf_path}" -- Logging handled by caller
52
+ on error errMsg number errNum
53
+ -- log "AppleScript Error: " & errMsg & " (Number: " & errNum & ")" -- Log detail in exception
54
+ -- Try to quit Keynote even if there was an error during export/close
55
+ try
56
+ if exists theDocument then
57
+ close theDocument saving no
58
+ end if
59
+ end try
60
+ error "Keynote conversion failed: " & errMsg number errNum
61
+ end try
62
+ -- Optional: Quit Keynote after conversion
63
+ -- quit
64
+ end tell
65
+ '''
66
+
67
+ try:
68
+ # Using osascript to run the AppleScript
69
+ process = subprocess.run(['osascript', '-e', applescript],
70
+ capture_output=True, text=True, check=False, timeout=1260) # check=False
71
+
72
+ if process.returncode != 0:
73
+ # Raise an error with details from AppleScript failure
74
+ raise ConversionError(f"AppleScript execution failed (Code {process.returncode}) for {key_path}. stderr: {process.stderr.strip()}")
75
+
76
+ # Final check if PDF exists after successful run
77
+ if not os.path.exists(pdf_path):
78
+ raise ConversionError(f"Conversion reported success but output PDF not found: {pdf_path}")
79
+
80
+ except subprocess.TimeoutExpired:
81
+ raise ConversionError(f"AppleScript execution timed out for {key_path}")
82
+ except Exception as e:
83
+ # Catch any other unexpected Python errors during subprocess handling
84
+ raise ConversionError(f"An unexpected error occurred during conversion process for {key_path}: {e}")
85
+
86
+ # --- Main Azure Processing Logic ---
87
+
88
+ def main():
89
+ # --- Configuration ---
90
+ try:
91
+ connection_string = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
92
+ container_name = os.environ["AZURE_STORAGE_CONTAINER"]
93
+ except KeyError as e:
94
+ print(f"Error: Required environment variable {e} is not set.", file=sys.stderr)
95
+ sys.exit(1)
96
+
97
+
98
+ print(f"Connecting to Azure Blob Storage container: {container_name}")
99
+ try:
100
+ blob_service_client = BlobServiceClient.from_connection_string(connection_string)
101
+ container_client = blob_service_client.get_container_client(container_name)
102
+ # Check if container exists by trying to get properties
103
+ container_client.get_container_properties()
104
+ except Exception as e:
105
+ print(f"Error connecting to or accessing container '{container_name}': {e}", file=sys.stderr)
106
+ sys.exit(1)
107
+
108
+ print("Listing blobs...")
109
+ all_blob_names = set()
110
+ try:
111
+ blob_list = container_client.list_blobs()
112
+ for blob in blob_list:
113
+ all_blob_names.add(blob.name)
114
+ print(f"Found {len(all_blob_names)} blobs in total.")
115
+ except Exception as e:
116
+ print(f"Error listing blobs in container '{container_name}': {e}", file=sys.stderr)
117
+ sys.exit(1)
118
+
119
+ print("Starting processing...")
120
+ processed_count = 0
121
+ skipped_count = 0
122
+ error_count = 0
123
+
124
+ for blob_name in all_blob_names:
125
+ if blob_name.lower().endswith(".key"):
126
+ print(f"Found Keynote file: {blob_name}")
127
+ pdf_blob_name = Path(blob_name).with_suffix(".pdf").as_posix() # Generate corresponding PDF name
128
+
129
+ if pdf_blob_name in all_blob_names:
130
+ print(f" -> PDF version already exists: {pdf_blob_name}. Skipping.")
131
+ skipped_count += 1
132
+ continue
133
+
134
+ print(f" -> PDF version not found. Attempting conversion.")
135
+
136
+ # Use a temporary directory for download and conversion
137
+ with tempfile.TemporaryDirectory() as temp_dir:
138
+ local_key_path = os.path.join(temp_dir, os.path.basename(blob_name))
139
+ local_pdf_path = Path(local_key_path).with_suffix(".pdf").as_posix()
140
+
141
+ try:
142
+ # 1. Download .key file
143
+ print(f" Downloading {blob_name} to {local_key_path}...")
144
+ blob_client = container_client.get_blob_client(blob_name)
145
+ with open(local_key_path, "wb") as download_file:
146
+ download_stream = blob_client.download_blob()
147
+ download_file.write(download_stream.readall())
148
+ print(" Download complete.")
149
+
150
+ # 2. Convert .key to .pdf (using local function)
151
+ print(f" Converting {local_key_path} to {local_pdf_path}...")
152
+ try:
153
+ convert_key_to_pdf(local_key_path, local_pdf_path)
154
+ print(" Conversion successful.")
155
+ except (FileNotFoundError, ConversionError) as convert_err:
156
+ print(f" ERROR: Conversion failed for {local_key_path}: {convert_err}", file=sys.stderr)
157
+ error_count += 1
158
+ continue # Skip upload if conversion failed
159
+
160
+ # 3. Upload .pdf file
161
+ print(f" Uploading {local_pdf_path} to {pdf_blob_name}...")
162
+ pdf_blob_client = container_client.get_blob_client(pdf_blob_name)
163
+ with open(local_pdf_path, "rb") as upload_file:
164
+ pdf_blob_client.upload_blob(upload_file, overwrite=True, timeout=600) # Increased timeout to 10 mins
165
+ print(" Upload complete.")
166
+ processed_count += 1
167
+
168
+ except Exception as e:
169
+ print(f" ERROR processing file {blob_name}: {e}", file=sys.stderr)
170
+ error_count += 1
171
+
172
+ # Temporary directory and its contents are automatically cleaned up here
173
+
174
+ print("-"*30)
175
+ print("Processing complete.")
176
+ print(f"Successfully converted and uploaded: {processed_count}")
177
+ print(f"Skipped (PDF already exists): {skipped_count}")
178
+ print(f"Errors encountered: {error_count}")
179
+
180
+ if __name__ == "__main__":
181
+ main()
@@ -0,0 +1 @@
1
+ azure-storage-blob
@@ -0,0 +1,7 @@
1
+ # Test environment configuration for Azure tests
2
+ REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
3
+ AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
4
+ AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
5
+ NODE_ENV=test
6
+ PORT=7072 # Different port for testing
7
+ MARKITDOWN_CONVERT_URL= #cortex-markitdown url
@@ -1,7 +1,7 @@
1
1
  # Test environment configuration for Azure tests
2
2
  REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
3
3
  AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
4
- AZURE_STORAGE_CONTAINER_NAME=test-container
4
+ AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
5
5
  NODE_ENV=test
6
6
  PORT=7072 # Different port for testing
7
7
  MARKITDOWN_CONVERT_URL= #cortex-markitdown url
@@ -0,0 +1,10 @@
1
+ # Test environment configuration for Azure tests
2
+ REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
3
+ STORAGE_EMULATOR_HOST=http://localhost:4443
4
+ GCP_SERVICE_ACCOUNT_KEY={"project_id":"test-project"}
5
+ GCS_BUCKETNAME=cortextempfiles
6
+ AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
7
+ AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
8
+ NODE_ENV=test
9
+ PORT=7072 # Different port for testing
10
+ MARKITDOWN_CONVERT_URL= #cortex-markitdown url
@@ -1,10 +1,10 @@
1
1
  # Test environment configuration for Azure tests
2
2
  REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
3
- GCP_SERVICE_ACCOUNT_KEY={"project_id":"test-project"}
4
3
  STORAGE_EMULATOR_HOST=http://localhost:4443
4
+ GCP_SERVICE_ACCOUNT_KEY={"project_id":"test-project"}
5
5
  GCS_BUCKETNAME=cortextempfiles
6
6
  AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
7
- AZURE_STORAGE_CONTAINER_NAME=test-container
7
+ AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
8
8
  NODE_ENV=test
9
9
  PORT=7072 # Different port for testing
10
10
  MARKITDOWN_CONVERT_URL= #cortex-markitdown url
@@ -43,6 +43,11 @@ The Cortex File Handler is a service that processes files through various operat
43
43
  - `hash` (optional): Unique identifier for the file
44
44
  - `checkHash` (optional): Check if hash exists
45
45
  - `clearHash` (optional): Remove hash from storage
46
+ - `generateShortLived` (optional): Generate a short-lived URL for an existing hash
47
+ - Requires `hash` parameter
48
+ - Generates a new SAS token with short expiration time
49
+ - Returns a temporary URL for secure sharing
50
+ - `shortLivedMinutes` (optional): Duration in minutes for short-lived URLs (default: 5)
46
51
  - `fetch`/`load`/`restore` (optional): URL to fetch remote file (these are aliases - any of the three parameters will trigger the same remote file processing behavior)
47
52
  - Does not require `requestId`
48
53
  - Uses Redis caching
@@ -73,6 +78,12 @@ The Cortex File Handler is a service that processes files through various operat
73
78
  - Updates Redis timestamp on subsequent requests
74
79
  - Truncates filenames longer than 200 characters
75
80
  - Ensures correct file extension based on content type
81
+ - For checkHash (`checkHash=true`):
82
+ - Requires valid `hash` parameter
83
+ - Checks if file exists in storage and restores if needed
84
+ - Always generates new SAS token with short expiration (default: 5 minutes)
85
+ - Returns file information with temporary URL and expiration information
86
+ - Updates Redis timestamp
76
87
 
77
88
  ### DELETE
78
89
 
@@ -172,6 +183,36 @@ The Cortex File Handler is a service that processes files through various operat
172
183
  - After successful processing
173
184
  - On error conditions
174
185
 
186
+ ## Usage Examples
187
+
188
+ ### Check Hash (Always Returns Short-Lived URL)
189
+
190
+ ```bash
191
+ # Check hash with 5-minute short-lived URL (default)
192
+ GET /file-handler?hash=abc123&checkHash=true
193
+
194
+ # Check hash with 10-minute short-lived URL
195
+ GET /file-handler?hash=abc123&checkHash=true&shortLivedMinutes=10
196
+ ```
197
+
198
+ **Response (always includes short-lived URL):**
199
+ ```json
200
+ {
201
+ "message": "File 'document.pdf' uploaded successfully.",
202
+ "filename": "document.pdf",
203
+ "url": "https://storage.blob.core.windows.net/container/file.pdf?original-sas-token",
204
+ "gcs": "gs://bucket/file.pdf",
205
+ "hash": "abc123",
206
+ "shortLivedUrl": "https://storage.blob.core.windows.net/container/file.pdf?sv=2023-11-03&se=2024-01-15T10%3A15%3A00Z&sr=b&sp=r&sig=...",
207
+ "expiresInMinutes": 5,
208
+ "timestamp": "2024-01-15T10:10:00.000Z",
209
+ "converted": {
210
+ "url": "https://storage.blob.core.windows.net/container/converted.pdf",
211
+ "gcs": "gs://bucket/converted.pdf"
212
+ }
213
+ }
214
+ ```
215
+
175
216
  ## Error Handling
176
217
 
177
218
  - **400 Bad Request**:
@@ -45,7 +45,7 @@
45
45
  "!tests/test-docs/**/*",
46
46
  "!tests/mocks/**/*"
47
47
  ],
48
- "timeout": "1m",
48
+ "timeout": "10m",
49
49
  "nodeArguments": [
50
50
  "--experimental-modules"
51
51
  ],
@@ -1,25 +1,49 @@
1
1
  import { BlobServiceClient } from "@azure/storage-blob";
2
2
 
3
- async function createContainer() {
3
+ async function createContainers() {
4
4
  try {
5
- const blobServiceClient = BlobServiceClient.fromConnectionString(
6
- "UseDevelopmentStorage=true",
7
- );
8
- const containerClient =
9
- blobServiceClient.getContainerClient("test-container");
5
+ // Check if required environment variables are set
6
+ const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
7
+ const containerNames = process.env.AZURE_STORAGE_CONTAINER_NAME;
8
+
9
+ if (!connectionString) {
10
+ throw new Error("AZURE_STORAGE_CONNECTION_STRING environment variable is required");
11
+ }
12
+
13
+ if (!containerNames) {
14
+ throw new Error("AZURE_STORAGE_CONTAINER_NAME environment variable is required");
15
+ }
10
16
 
11
- console.log("Creating container...");
12
- await containerClient.create();
13
- console.log("Container created successfully");
14
- } catch (error) {
15
- // Ignore if container already exists
16
- if (error.statusCode === 409) {
17
- console.log("Container already exists");
18
- } else {
19
- console.error("Error creating container:", error);
20
- process.exit(1);
17
+ const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
18
+
19
+ // Parse container names from environment variable
20
+ const containers = containerNames.split(',').map(name => name.trim());
21
+ console.log(`Creating containers: ${containers.join(', ')}`);
22
+
23
+ // Create each container
24
+ for (const containerName of containers) {
25
+ if (!containerName) continue; // Skip empty names
26
+
27
+ try {
28
+ const containerClient = blobServiceClient.getContainerClient(containerName);
29
+ await containerClient.create();
30
+ console.log(`✅ Container '${containerName}' created successfully`);
31
+ } catch (error) {
32
+ // Ignore if container already exists
33
+ if (error.statusCode === 409) {
34
+ console.log(`✅ Container '${containerName}' already exists`);
35
+ } else {
36
+ console.error(`❌ Error creating container '${containerName}':`, error.message);
37
+ throw error;
38
+ }
39
+ }
21
40
  }
41
+
42
+ console.log("🎉 All containers setup completed");
43
+ } catch (error) {
44
+ console.error("❌ Container setup failed:", error.message);
45
+ process.exit(1);
22
46
  }
23
47
  }
24
48
 
25
- createContainer();
49
+ createContainers();