@aj-archipelago/cortex 1.3.62 → 1.3.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/cortex-file-handler-test.yml +61 -0
- package/README.md +31 -7
- package/config/default.example.json +15 -0
- package/config.js +133 -12
- package/helper-apps/cortex-autogen2/DigiCertGlobalRootCA.crt.pem +22 -0
- package/helper-apps/cortex-autogen2/Dockerfile +31 -0
- package/helper-apps/cortex-autogen2/Dockerfile.worker +41 -0
- package/helper-apps/cortex-autogen2/README.md +183 -0
- package/helper-apps/cortex-autogen2/__init__.py +1 -0
- package/helper-apps/cortex-autogen2/agents.py +131 -0
- package/helper-apps/cortex-autogen2/docker-compose.yml +20 -0
- package/helper-apps/cortex-autogen2/function_app.py +55 -0
- package/helper-apps/cortex-autogen2/host.json +15 -0
- package/helper-apps/cortex-autogen2/main.py +126 -0
- package/helper-apps/cortex-autogen2/poetry.lock +3652 -0
- package/helper-apps/cortex-autogen2/pyproject.toml +36 -0
- package/helper-apps/cortex-autogen2/requirements.txt +20 -0
- package/helper-apps/cortex-autogen2/send_task.py +105 -0
- package/helper-apps/cortex-autogen2/services/__init__.py +1 -0
- package/helper-apps/cortex-autogen2/services/azure_queue.py +85 -0
- package/helper-apps/cortex-autogen2/services/redis_publisher.py +153 -0
- package/helper-apps/cortex-autogen2/task_processor.py +488 -0
- package/helper-apps/cortex-autogen2/tools/__init__.py +24 -0
- package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +175 -0
- package/helper-apps/cortex-autogen2/tools/azure_foundry_agents.py +601 -0
- package/helper-apps/cortex-autogen2/tools/coding_tools.py +72 -0
- package/helper-apps/cortex-autogen2/tools/download_tools.py +48 -0
- package/helper-apps/cortex-autogen2/tools/file_tools.py +545 -0
- package/helper-apps/cortex-autogen2/tools/search_tools.py +646 -0
- package/helper-apps/cortex-azure-cleaner/README.md +36 -0
- package/helper-apps/cortex-file-converter/README.md +93 -0
- package/helper-apps/cortex-file-converter/key_to_pdf.py +104 -0
- package/helper-apps/cortex-file-converter/list_blob_extensions.py +89 -0
- package/helper-apps/cortex-file-converter/process_azure_keynotes.py +181 -0
- package/helper-apps/cortex-file-converter/requirements.txt +1 -0
- package/helper-apps/cortex-file-handler/.env.test.azure.ci +7 -0
- package/helper-apps/cortex-file-handler/.env.test.azure.sample +1 -1
- package/helper-apps/cortex-file-handler/.env.test.gcs.ci +10 -0
- package/helper-apps/cortex-file-handler/.env.test.gcs.sample +2 -2
- package/helper-apps/cortex-file-handler/INTERFACE.md +41 -0
- package/helper-apps/cortex-file-handler/package.json +1 -1
- package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +41 -17
- package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +30 -15
- package/helper-apps/cortex-file-handler/scripts/test-azure.sh +32 -6
- package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +24 -2
- package/helper-apps/cortex-file-handler/scripts/validate-env.js +128 -0
- package/helper-apps/cortex-file-handler/src/blobHandler.js +161 -51
- package/helper-apps/cortex-file-handler/src/constants.js +3 -0
- package/helper-apps/cortex-file-handler/src/fileChunker.js +10 -8
- package/helper-apps/cortex-file-handler/src/index.js +116 -9
- package/helper-apps/cortex-file-handler/src/redis.js +61 -1
- package/helper-apps/cortex-file-handler/src/services/ConversionService.js +11 -8
- package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +2 -2
- package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +88 -6
- package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +58 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +25 -5
- package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +9 -0
- package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +120 -16
- package/helper-apps/cortex-file-handler/src/start.js +27 -17
- package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +52 -1
- package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +40 -0
- package/helper-apps/cortex-file-handler/tests/checkHashShortLived.test.js +553 -0
- package/helper-apps/cortex-file-handler/tests/cleanup.test.js +46 -52
- package/helper-apps/cortex-file-handler/tests/containerConversionFlow.test.js +451 -0
- package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +229 -0
- package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +392 -0
- package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +7 -2
- package/helper-apps/cortex-file-handler/tests/deleteOperations.test.js +348 -0
- package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +23 -2
- package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +11 -5
- package/helper-apps/cortex-file-handler/tests/getOperations.test.js +58 -24
- package/helper-apps/cortex-file-handler/tests/postOperations.test.js +11 -4
- package/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js +225 -0
- package/helper-apps/cortex-file-handler/tests/start.test.js +8 -12
- package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +80 -0
- package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +388 -22
- package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -0
- package/lib/cortexResponse.js +153 -0
- package/lib/entityConstants.js +21 -3
- package/lib/logger.js +21 -4
- package/lib/pathwayTools.js +28 -9
- package/lib/util.js +49 -0
- package/package.json +1 -1
- package/pathways/basePathway.js +1 -0
- package/pathways/bing_afagent.js +54 -1
- package/pathways/call_tools.js +2 -3
- package/pathways/chat_jarvis.js +1 -1
- package/pathways/google_cse.js +27 -0
- package/pathways/grok_live_search.js +18 -0
- package/pathways/system/entity/memory/sys_memory_lookup_required.js +1 -0
- package/pathways/system/entity/memory/sys_memory_required.js +1 -0
- package/pathways/system/entity/memory/sys_search_memory.js +1 -0
- package/pathways/system/entity/sys_entity_agent.js +56 -4
- package/pathways/system/entity/sys_generator_quick.js +1 -0
- package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +26 -0
- package/pathways/system/entity/tools/sys_tool_google_search.js +141 -0
- package/pathways/system/entity/tools/sys_tool_grok_x_search.js +237 -0
- package/pathways/system/entity/tools/sys_tool_image.js +1 -1
- package/pathways/system/rest_streaming/sys_claude_37_sonnet.js +21 -0
- package/pathways/system/rest_streaming/sys_claude_41_opus.js +21 -0
- package/pathways/system/rest_streaming/sys_claude_4_sonnet.js +21 -0
- package/pathways/system/rest_streaming/sys_google_gemini_25_flash.js +25 -0
- package/pathways/system/rest_streaming/{sys_google_gemini_chat.js → sys_google_gemini_25_pro.js} +6 -4
- package/pathways/system/rest_streaming/sys_grok_4.js +23 -0
- package/pathways/system/rest_streaming/sys_grok_4_fast_non_reasoning.js +23 -0
- package/pathways/system/rest_streaming/sys_grok_4_fast_reasoning.js +23 -0
- package/pathways/system/rest_streaming/sys_openai_chat.js +3 -0
- package/pathways/system/rest_streaming/sys_openai_chat_gpt41.js +22 -0
- package/pathways/system/rest_streaming/sys_openai_chat_gpt41_mini.js +21 -0
- package/pathways/system/rest_streaming/sys_openai_chat_gpt41_nano.js +21 -0
- package/pathways/system/rest_streaming/{sys_claude_35_sonnet.js → sys_openai_chat_gpt4_omni.js} +6 -4
- package/pathways/system/rest_streaming/sys_openai_chat_gpt4_omni_mini.js +21 -0
- package/pathways/system/rest_streaming/{sys_claude_3_haiku.js → sys_openai_chat_gpt5.js} +7 -5
- package/pathways/system/rest_streaming/sys_openai_chat_gpt5_chat.js +21 -0
- package/pathways/system/rest_streaming/sys_openai_chat_gpt5_mini.js +21 -0
- package/pathways/system/rest_streaming/sys_openai_chat_gpt5_nano.js +21 -0
- package/pathways/system/rest_streaming/{sys_openai_chat_o1.js → sys_openai_chat_o3.js} +6 -3
- package/pathways/system/rest_streaming/sys_openai_chat_o3_mini.js +3 -0
- package/pathways/system/workspaces/run_workspace_prompt.js +99 -0
- package/pathways/vision.js +1 -1
- package/server/graphql.js +1 -1
- package/server/modelExecutor.js +8 -0
- package/server/pathwayResolver.js +166 -16
- package/server/pathwayResponseParser.js +16 -8
- package/server/plugins/azureFoundryAgentsPlugin.js +1 -1
- package/server/plugins/claude3VertexPlugin.js +193 -45
- package/server/plugins/gemini15ChatPlugin.js +21 -0
- package/server/plugins/gemini15VisionPlugin.js +360 -0
- package/server/plugins/googleCsePlugin.js +94 -0
- package/server/plugins/grokVisionPlugin.js +365 -0
- package/server/plugins/modelPlugin.js +3 -1
- package/server/plugins/openAiChatPlugin.js +106 -13
- package/server/plugins/openAiVisionPlugin.js +42 -30
- package/server/resolver.js +28 -4
- package/server/rest.js +270 -53
- package/server/typeDef.js +1 -0
- package/tests/{mocks.js → helpers/mocks.js} +5 -2
- package/tests/{server.js → helpers/server.js} +2 -2
- package/tests/helpers/sseAssert.js +23 -0
- package/tests/helpers/sseClient.js +73 -0
- package/tests/helpers/subscriptionAssert.js +11 -0
- package/tests/helpers/subscriptions.js +113 -0
- package/tests/{sublong.srt → integration/features/translate/sublong.srt} +4543 -4543
- package/tests/integration/features/translate/translate_chunking_stream.test.js +100 -0
- package/tests/{translate_srt.test.js → integration/features/translate/translate_srt.test.js} +2 -2
- package/tests/integration/graphql/async/stream/agentic.test.js +477 -0
- package/tests/integration/graphql/async/stream/subscription_streaming.test.js +62 -0
- package/tests/integration/graphql/async/stream/sys_entity_start_streaming.test.js +71 -0
- package/tests/integration/graphql/async/stream/vendors/claude_streaming.test.js +56 -0
- package/tests/integration/graphql/async/stream/vendors/gemini_streaming.test.js +66 -0
- package/tests/integration/graphql/async/stream/vendors/grok_streaming.test.js +56 -0
- package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +72 -0
- package/tests/integration/graphql/features/google/sysToolGoogleSearch.test.js +96 -0
- package/tests/integration/graphql/features/grok/grok.test.js +688 -0
- package/tests/integration/graphql/features/grok/grok_x_search_tool.test.js +354 -0
- package/tests/{main.test.js → integration/graphql/features/main.test.js} +1 -1
- package/tests/{call_tools.test.js → integration/graphql/features/tools/call_tools.test.js} +2 -2
- package/tests/{vision.test.js → integration/graphql/features/vision/vision.test.js} +1 -1
- package/tests/integration/graphql/subscriptions/connection.test.js +26 -0
- package/tests/{openai_api.test.js → integration/rest/oai/openai_api.test.js} +63 -238
- package/tests/integration/rest/oai/tool_calling_api.test.js +343 -0
- package/tests/integration/rest/oai/tool_calling_streaming.test.js +85 -0
- package/tests/integration/rest/vendors/claude_streaming.test.js +47 -0
- package/tests/integration/rest/vendors/claude_tool_calling_streaming.test.js +75 -0
- package/tests/integration/rest/vendors/gemini_streaming.test.js +47 -0
- package/tests/integration/rest/vendors/gemini_tool_calling_streaming.test.js +75 -0
- package/tests/integration/rest/vendors/grok_streaming.test.js +55 -0
- package/tests/integration/rest/vendors/grok_tool_calling_streaming.test.js +75 -0
- package/tests/{azureAuthTokenHelper.test.js → unit/core/azureAuthTokenHelper.test.js} +1 -1
- package/tests/{chunkfunction.test.js → unit/core/chunkfunction.test.js} +2 -2
- package/tests/{config.test.js → unit/core/config.test.js} +3 -3
- package/tests/{encodeCache.test.js → unit/core/encodeCache.test.js} +1 -1
- package/tests/{fastLruCache.test.js → unit/core/fastLruCache.test.js} +1 -1
- package/tests/{handleBars.test.js → unit/core/handleBars.test.js} +1 -1
- package/tests/{memoryfunction.test.js → unit/core/memoryfunction.test.js} +2 -2
- package/tests/unit/core/mergeResolver.test.js +952 -0
- package/tests/{parser.test.js → unit/core/parser.test.js} +3 -3
- package/tests/unit/core/pathwayResolver.test.js +187 -0
- package/tests/{requestMonitor.test.js → unit/core/requestMonitor.test.js} +1 -1
- package/tests/{requestMonitorDurationEstimator.test.js → unit/core/requestMonitorDurationEstimator.test.js} +1 -1
- package/tests/{truncateMessages.test.js → unit/core/truncateMessages.test.js} +3 -3
- package/tests/{util.test.js → unit/core/util.test.js} +1 -1
- package/tests/{apptekTranslatePlugin.test.js → unit/plugins/apptekTranslatePlugin.test.js} +3 -3
- package/tests/{azureFoundryAgents.test.js → unit/plugins/azureFoundryAgents.test.js} +136 -1
- package/tests/{claude3VertexPlugin.test.js → unit/plugins/claude3VertexPlugin.test.js} +32 -10
- package/tests/{claude3VertexToolConversion.test.js → unit/plugins/claude3VertexToolConversion.test.js} +3 -3
- package/tests/unit/plugins/googleCsePlugin.test.js +111 -0
- package/tests/unit/plugins/grokVisionPlugin.test.js +1392 -0
- package/tests/{modelPlugin.test.js → unit/plugins/modelPlugin.test.js} +3 -3
- package/tests/{multimodal_conversion.test.js → unit/plugins/multimodal_conversion.test.js} +4 -4
- package/tests/{openAiChatPlugin.test.js → unit/plugins/openAiChatPlugin.test.js} +13 -4
- package/tests/{openAiToolPlugin.test.js → unit/plugins/openAiToolPlugin.test.js} +35 -27
- package/tests/{tokenHandlingTests.test.js → unit/plugins/tokenHandlingTests.test.js} +5 -5
- package/tests/{translate_apptek.test.js → unit/plugins/translate_apptek.test.js} +3 -3
- package/tests/{streaming.test.js → unit/plugins.streaming/plugin_stream_events.test.js} +19 -58
- package/helper-apps/mogrt-handler/tests/test-files/test.gif +0 -1
- package/helper-apps/mogrt-handler/tests/test-files/test.mogrt +0 -1
- package/helper-apps/mogrt-handler/tests/test-files/test.mp4 +0 -1
- package/pathways/system/rest_streaming/sys_openai_chat_gpt4.js +0 -19
- package/pathways/system/rest_streaming/sys_openai_chat_gpt4_32.js +0 -19
- package/pathways/system/rest_streaming/sys_openai_chat_gpt4_turbo.js +0 -19
- package/pathways/system/workspaces/run_claude35_sonnet.js +0 -21
- package/pathways/system/workspaces/run_claude3_haiku.js +0 -20
- package/pathways/system/workspaces/run_gpt35turbo.js +0 -20
- package/pathways/system/workspaces/run_gpt4.js +0 -20
- package/pathways/system/workspaces/run_gpt4_32.js +0 -20
- package/tests/agentic.test.js +0 -256
- package/tests/pathwayResolver.test.js +0 -78
- package/tests/subscription.test.js +0 -387
- /package/tests/{subchunk.srt → integration/features/translate/subchunk.srt} +0 -0
- /package/tests/{subhorizontal.srt → integration/features/translate/subhorizontal.srt} +0 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Keynote to PDF Conversion Utilities
|
|
2
|
+
|
|
3
|
+
This directory contains utilities related to converting Apple Keynote (`.key`) files to PDF format, particularly focusing on processing files stored in Azure Blob Storage.
|
|
4
|
+
|
|
5
|
+
## `process_azure_keynotes.py`
|
|
6
|
+
|
|
7
|
+
### Purpose
|
|
8
|
+
|
|
9
|
+
This script connects to an Azure Blob Storage container (specified by the `AZURE_STORAGE_CONTAINER` environment variable), searches for Keynote (`.key`) files, converts them to PDF format if a PDF version doesn't already exist, and uploads the resulting PDF back to the same location in the container.
|
|
10
|
+
|
|
11
|
+
### Functionality
|
|
12
|
+
|
|
13
|
+
1. **Connects** to the Azure Blob Storage account specified by the `AZURE_STORAGE_CONNECTION_STRING` environment variable.
|
|
14
|
+
2. **Lists** all blobs within the container specified by the `AZURE_STORAGE_CONTAINER` environment variable.
|
|
15
|
+
3. **Identifies** `.key` files.
|
|
16
|
+
4. **Checks** if a corresponding `.pdf` file (with the same base name) already exists in the container.
|
|
17
|
+
5. **Skips** processing if the `.pdf` file already exists.
|
|
18
|
+
6. **Downloads** the `.key` file to a temporary local directory if no `.pdf` exists.
|
|
19
|
+
7. **Converts** the downloaded `.key` file to PDF using an embedded AppleScript function (adapted from `key_to_pdf.py`). This conversion uses the "Good" image quality setting to potentially reduce file size.
|
|
20
|
+
8. **Uploads** the generated `.pdf` file back to the original path within the Azure container, replacing the `.key` extension with `.pdf`.
|
|
21
|
+
9. **Logs** progress, skips, and any errors encountered during the process.
|
|
22
|
+
10. **Prints** a summary report upon completion.
|
|
23
|
+
|
|
24
|
+
### Prerequisites
|
|
25
|
+
|
|
26
|
+
* **macOS:** The script relies on AppleScript to interact with the Keynote application.
|
|
27
|
+
* **Keynote Application:** Apple Keynote must be installed.
|
|
28
|
+
* **Python 3.x:** The script is written for Python 3.
|
|
29
|
+
* **Python Libraries:** Requires the `azure-storage-blob` library. Install dependencies using the main `requirements.txt` file in the project root: `pip install -r ../../requirements.txt` (adjust path as necessary relative to the main project root).
|
|
30
|
+
* **Azure Connection String:** The environment variable `AZURE_STORAGE_CONNECTION_STRING` must be set to a valid connection string for the Azure Storage account containing the target container.
|
|
31
|
+
* **Azure Container Name:** The environment variable `AZURE_STORAGE_CONTAINER` must be set to the name of the target container.
|
|
32
|
+
* **Automation Permissions:** You might need to grant permission for your terminal application (e.g., Terminal, iTerm) or Python itself to control Keynote. Check `System Settings` > `Privacy & Security` > `Automation`.
|
|
33
|
+
|
|
34
|
+
### Usage
|
|
35
|
+
|
|
36
|
+
1. Ensure all prerequisites are met.
|
|
37
|
+
2. Navigate to the main project root directory in your terminal.
|
|
38
|
+
3. Set the Azure connection string environment variable:
|
|
39
|
+
```bash
|
|
40
|
+
export AZURE_STORAGE_CONNECTION_STRING="<your_azure_storage_connection_string>"
|
|
41
|
+
export AZURE_STORAGE_CONTAINER="<your_target_container_name>"
|
|
42
|
+
```
|
|
43
|
+
4. Run the script:
|
|
44
|
+
```bash
|
|
45
|
+
python helper-apps/cortex-file-converter/process_azure_keynotes.py
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## `key_to_pdf.py`
|
|
49
|
+
|
|
50
|
+
This is a standalone command-line utility for converting a single local Keynote file to PDF using AppleScript. It was the basis for the conversion logic now embedded within `process_azure_keynotes.py`.
|
|
51
|
+
|
|
52
|
+
### Usage (Standalone)
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python helper-apps/cortex-file-converter/key_to_pdf.py <input_keynote_file.key> [-o <output_pdf_file.pdf>]
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
If the output path (`-o`) is omitted, the PDF will be saved in the same directory as the input file with a `.pdf` extension.
|
|
59
|
+
|
|
60
|
+
## `list_blob_extensions.py`
|
|
61
|
+
|
|
62
|
+
### Purpose
|
|
63
|
+
|
|
64
|
+
This script connects to an Azure Blob Storage container (specified by the `AZURE_STORAGE_CONTAINER` environment variable), lists all the blobs within it, and reports the unique file extensions found. It also marks extensions known to be typically unsupported for content cracking by the default Azure Cognitive Search indexer configuration.
|
|
65
|
+
|
|
66
|
+
### Functionality
|
|
67
|
+
|
|
68
|
+
1. **Connects** to the Azure Blob Storage account specified by the `AZURE_STORAGE_CONNECTION_STRING` environment variable and the container specified by `AZURE_STORAGE_CONTAINER`.
|
|
69
|
+
2. **Lists** all blobs within the specified container.
|
|
70
|
+
3. **Extracts** the file extension from each blob name.
|
|
71
|
+
4. **Collects** all unique extensions found.
|
|
72
|
+
5. **Prints** a list of unique extensions, marking those present in its predefined `UNSUPPORTED_EXTENSIONS_KNOWN` set.
|
|
73
|
+
|
|
74
|
+
### Prerequisites
|
|
75
|
+
|
|
76
|
+
* **Python 3.x:** The script is written for Python 3.
|
|
77
|
+
* **Python Libraries:** Requires the `azure-storage-blob` library. Install dependencies using: `pip install -r requirements.txt`.
|
|
78
|
+
* **Azure Connection String:** The environment variable `AZURE_STORAGE_CONNECTION_STRING` must be set.
|
|
79
|
+
* **Azure Container Name:** The environment variable `AZURE_STORAGE_CONTAINER` must be set to the name of the target container.
|
|
80
|
+
|
|
81
|
+
### Usage
|
|
82
|
+
|
|
83
|
+
1. Ensure all prerequisites are met.
|
|
84
|
+
2. Navigate to the `helper-apps/cortex-file-converter` directory in your terminal.
|
|
85
|
+
3. Set the required environment variables:
|
|
86
|
+
```bash
|
|
87
|
+
export AZURE_STORAGE_CONNECTION_STRING="<your_azure_storage_connection_string>"
|
|
88
|
+
export AZURE_STORAGE_CONTAINER="<your_target_container_name>"
|
|
89
|
+
```
|
|
90
|
+
4. Run the script:
|
|
91
|
+
```bash
|
|
92
|
+
python list_blob_extensions.py
|
|
93
|
+
```
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import subprocess
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
def convert_key_to_pdf(key_path, pdf_path):
|
|
7
|
+
"""
|
|
8
|
+
Converts a Keynote file (.key) to PDF using AppleScript.
|
|
9
|
+
"""
|
|
10
|
+
if not os.path.exists(key_path):
|
|
11
|
+
print(f"Error: Input file not found: {key_path}", file=sys.stderr)
|
|
12
|
+
sys.exit(1)
|
|
13
|
+
|
|
14
|
+
# Ensure the output directory exists
|
|
15
|
+
output_dir = os.path.dirname(pdf_path)
|
|
16
|
+
if output_dir and not os.path.exists(output_dir):
|
|
17
|
+
try:
|
|
18
|
+
os.makedirs(output_dir)
|
|
19
|
+
except OSError as e:
|
|
20
|
+
print(f"Error creating output directory {output_dir}: {e}", file=sys.stderr)
|
|
21
|
+
sys.exit(1)
|
|
22
|
+
|
|
23
|
+
# Check if output PDF exists and delete it
|
|
24
|
+
if os.path.exists(pdf_path):
|
|
25
|
+
try:
|
|
26
|
+
os.remove(pdf_path)
|
|
27
|
+
print(f"Removed existing output file: {pdf_path}")
|
|
28
|
+
except OSError as e:
|
|
29
|
+
print(f"Error removing existing output file {pdf_path}: {e}", file=sys.stderr)
|
|
30
|
+
# Decide if this should be fatal or just a warning
|
|
31
|
+
# sys.exit(1)
|
|
32
|
+
|
|
33
|
+
applescript = f'''
|
|
34
|
+
tell application "Keynote"
|
|
35
|
+
try
|
|
36
|
+
set theDocument to open POSIX file "{key_path}"
|
|
37
|
+
if not (exists theDocument) then error "Failed to open document."
|
|
38
|
+
|
|
39
|
+
set pdf_export_settings to {{PDF image quality:Good}} -- Define settings as record (escaped braces)
|
|
40
|
+
|
|
41
|
+
with timeout of 1200 seconds -- Allow 20 minutes for export
|
|
42
|
+
export theDocument to POSIX file "{pdf_path}" as PDF with properties pdf_export_settings -- Use settings record
|
|
43
|
+
end timeout
|
|
44
|
+
|
|
45
|
+
close theDocument saving no
|
|
46
|
+
log "Successfully exported {key_path} to {pdf_path}"
|
|
47
|
+
on error errMsg number errNum
|
|
48
|
+
log "AppleScript Error: " & errMsg & " (Number: " & errNum & ")"
|
|
49
|
+
# Try to quit Keynote even if there was an error during export/close
|
|
50
|
+
try
|
|
51
|
+
if exists theDocument then
|
|
52
|
+
close theDocument saving no
|
|
53
|
+
end if
|
|
54
|
+
end try
|
|
55
|
+
error "Keynote conversion failed: " & errMsg number errNum
|
|
56
|
+
end try
|
|
57
|
+
-- Optional: Quit Keynote after conversion
|
|
58
|
+
-- quit
|
|
59
|
+
end tell
|
|
60
|
+
'''
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
# Using osascript to run the AppleScript
|
|
64
|
+
process = subprocess.run(['osascript', '-e', applescript],
|
|
65
|
+
capture_output=True, text=True, check=True, timeout=1260) # Add a slightly longer timeout for the process itself
|
|
66
|
+
print(f"Successfully converted '{key_path}' to '{pdf_path}'")
|
|
67
|
+
# Print Keynote's log messages if needed
|
|
68
|
+
# print("AppleScript Output:\n", process.stdout)
|
|
69
|
+
# print("AppleScript Errors:\n", process.stderr) # osascript might put log messages here too
|
|
70
|
+
except subprocess.CalledProcessError as e:
|
|
71
|
+
print(f"Error executing AppleScript: {e}", file=sys.stderr)
|
|
72
|
+
print(f"stdout:\n{e.stdout}", file=sys.stderr)
|
|
73
|
+
print(f"stderr:\n{e.stderr}", file=sys.stderr)
|
|
74
|
+
sys.exit(1)
|
|
75
|
+
except subprocess.TimeoutExpired:
|
|
76
|
+
print(f"Error: AppleScript execution timed out for {key_path}", file=sys.stderr)
|
|
77
|
+
# It's hard to reliably kill the Keynote process started by AppleScript here,
|
|
78
|
+
# as Keynote might still be hung. Manual intervention might be needed.
|
|
79
|
+
sys.exit(1)
|
|
80
|
+
except Exception as e:
|
|
81
|
+
print(f"An unexpected Python error occurred: {e}", file=sys.stderr)
|
|
82
|
+
sys.exit(1)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def main():
|
|
86
|
+
parser = argparse.ArgumentParser(description="Convert Keynote (.key) files to PDF.")
|
|
87
|
+
parser.add_argument("input_key_file", help="Path to the input Keynote file.")
|
|
88
|
+
parser.add_argument("-o", "--output", help="Path for the output PDF file. Defaults to the same name as the input file but with a .pdf extension.")
|
|
89
|
+
|
|
90
|
+
args = parser.parse_args()
|
|
91
|
+
|
|
92
|
+
input_path = os.path.abspath(args.input_key_file)
|
|
93
|
+
|
|
94
|
+
if args.output:
|
|
95
|
+
output_path = os.path.abspath(args.output)
|
|
96
|
+
else:
|
|
97
|
+
# Default output path
|
|
98
|
+
base_name = os.path.splitext(input_path)[0]
|
|
99
|
+
output_path = base_name + ".pdf"
|
|
100
|
+
|
|
101
|
+
convert_key_to_pdf(input_path, output_path)
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
main()
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from azure.storage.blob import BlobServiceClient
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Set of known extensions typically NOT supported for content cracking by Azure Cognitive Search (default configuration)
|
|
7
|
+
# Check Azure docs for the official list of *supported* formats.
|
|
8
|
+
# This list focuses on common formats often not processed for text content.
|
|
9
|
+
UNSUPPORTED_EXTENSIONS_KNOWN = {
|
|
10
|
+
'key', # Apple Keynote (as requested)
|
|
11
|
+
'zip', # Archive formats often need specific skills/configurations
|
|
12
|
+
'rar',
|
|
13
|
+
'gz',
|
|
14
|
+
'tar',
|
|
15
|
+
'7z',
|
|
16
|
+
'pkg',
|
|
17
|
+
'dmg', # Disk images
|
|
18
|
+
'iso',
|
|
19
|
+
'exe', # Executables
|
|
20
|
+
'dll',
|
|
21
|
+
'mp4', # Video formats
|
|
22
|
+
'mov',
|
|
23
|
+
'avi',
|
|
24
|
+
'wmv',
|
|
25
|
+
'mp3', # Audio formats
|
|
26
|
+
'wav',
|
|
27
|
+
'aac',
|
|
28
|
+
'pyc', # Compiled Python
|
|
29
|
+
'class', # Compiled Java
|
|
30
|
+
'o', # Compiled C/C++ object files
|
|
31
|
+
'a', # Static libraries
|
|
32
|
+
'so', # Shared libraries (Linux)
|
|
33
|
+
# Add other known unsupported types if needed
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def main():
|
|
37
|
+
# --- Configuration ---
|
|
38
|
+
try:
|
|
39
|
+
connection_string = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
|
|
40
|
+
container_name = os.environ["AZURE_STORAGE_CONTAINER"]
|
|
41
|
+
except KeyError as e:
|
|
42
|
+
print(f"Error: Required environment variable {e} is not set.", file=sys.stderr)
|
|
43
|
+
sys.exit(1)
|
|
44
|
+
|
|
45
|
+
# ---------------------
|
|
46
|
+
|
|
47
|
+
print(f"Connecting to Azure Blob Storage container: {container_name}")
|
|
48
|
+
try:
|
|
49
|
+
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
|
50
|
+
container_client = blob_service_client.get_container_client(container_name)
|
|
51
|
+
# Check if container exists
|
|
52
|
+
container_client.get_container_properties()
|
|
53
|
+
except Exception as e:
|
|
54
|
+
print(f"Error connecting to or accessing container '{container_name}': {e}", file=sys.stderr)
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
print("Listing blobs and collecting extensions...")
|
|
58
|
+
unique_extensions = set()
|
|
59
|
+
blob_count = 0
|
|
60
|
+
try:
|
|
61
|
+
blob_list = container_client.list_blobs()
|
|
62
|
+
for blob in blob_list:
|
|
63
|
+
blob_count += 1
|
|
64
|
+
suffix = Path(blob.name).suffix.lower()
|
|
65
|
+
if suffix: # Only process if there is an extension
|
|
66
|
+
# Store extension without the leading dot
|
|
67
|
+
extension = suffix[1:]
|
|
68
|
+
unique_extensions.add(extension)
|
|
69
|
+
if blob_count % 10000 == 0:
|
|
70
|
+
print(f" Processed {blob_count} blobs...")
|
|
71
|
+
|
|
72
|
+
print(f"Finished processing {blob_count} blobs.")
|
|
73
|
+
except Exception as e:
|
|
74
|
+
print(f"Error listing blobs in container '{container_name}': {e}", file=sys.stderr)
|
|
75
|
+
sys.exit(1)
|
|
76
|
+
|
|
77
|
+
print("\n" + "-"*30)
|
|
78
|
+
print(f"Found {len(unique_extensions)} unique file extensions:")
|
|
79
|
+
print("" + "-"*30)
|
|
80
|
+
|
|
81
|
+
# Sort extensions for consistent output
|
|
82
|
+
sorted_extensions = sorted(list(unique_extensions))
|
|
83
|
+
|
|
84
|
+
for ext in sorted_extensions:
|
|
85
|
+
marker = "(Unsupported by Indexer)" if ext in UNSUPPORTED_EXTENSIONS_KNOWN else ""
|
|
86
|
+
print(f".{ext} {marker}")
|
|
87
|
+
|
|
88
|
+
if __name__ == "__main__":
|
|
89
|
+
main()
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
import tempfile
|
|
5
|
+
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# --- Conversion Function (Adapted from key_to_pdf.py) ---
|
|
9
|
+
class ConversionError(Exception):
|
|
10
|
+
"""Custom exception for conversion failures."""
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
def convert_key_to_pdf(key_path, pdf_path):
|
|
14
|
+
"""
|
|
15
|
+
Converts a Keynote file (.key) to PDF using AppleScript.
|
|
16
|
+
Raises FileNotFoundError if input doesn't exist.
|
|
17
|
+
Raises ConversionError if AppleScript execution fails.
|
|
18
|
+
"""
|
|
19
|
+
if not os.path.exists(key_path):
|
|
20
|
+
raise FileNotFoundError(f"Input Keynote file not found: {key_path}")
|
|
21
|
+
|
|
22
|
+
# Ensure the output directory exists
|
|
23
|
+
output_dir = os.path.dirname(pdf_path)
|
|
24
|
+
if output_dir and not os.path.exists(output_dir):
|
|
25
|
+
try:
|
|
26
|
+
os.makedirs(output_dir)
|
|
27
|
+
except OSError as e:
|
|
28
|
+
# Raise an error if we can't create the output dir
|
|
29
|
+
raise ConversionError(f"Cannot create output directory {output_dir}: {e}")
|
|
30
|
+
|
|
31
|
+
# Check if output PDF exists and delete it (optional, might be handled by caller)
|
|
32
|
+
# if os.path.exists(pdf_path):
|
|
33
|
+
# try:
|
|
34
|
+
# os.remove(pdf_path)
|
|
35
|
+
# except OSError as e:
|
|
36
|
+
# raise ConversionError(f"Cannot remove existing output file {pdf_path}: {e}")
|
|
37
|
+
|
|
38
|
+
applescript = f'''
|
|
39
|
+
tell application "Keynote"
|
|
40
|
+
try
|
|
41
|
+
set theDocument to open POSIX file "{key_path}"
|
|
42
|
+
if not (exists theDocument) then error "Failed to open document."
|
|
43
|
+
|
|
44
|
+
set pdf_export_settings to {{PDF image quality:Good}} -- Define settings as record (escaped braces)
|
|
45
|
+
|
|
46
|
+
with timeout of 1200 seconds -- Allow 20 minutes for export
|
|
47
|
+
export theDocument to POSIX file "{pdf_path}" as PDF with properties pdf_export_settings -- Use settings record
|
|
48
|
+
end timeout
|
|
49
|
+
|
|
50
|
+
close theDocument saving no
|
|
51
|
+
-- log "Successfully exported {key_path} to {pdf_path}" -- Logging handled by caller
|
|
52
|
+
on error errMsg number errNum
|
|
53
|
+
-- log "AppleScript Error: " & errMsg & " (Number: " & errNum & ")" -- Log detail in exception
|
|
54
|
+
-- Try to quit Keynote even if there was an error during export/close
|
|
55
|
+
try
|
|
56
|
+
if exists theDocument then
|
|
57
|
+
close theDocument saving no
|
|
58
|
+
end if
|
|
59
|
+
end try
|
|
60
|
+
error "Keynote conversion failed: " & errMsg number errNum
|
|
61
|
+
end try
|
|
62
|
+
-- Optional: Quit Keynote after conversion
|
|
63
|
+
-- quit
|
|
64
|
+
end tell
|
|
65
|
+
'''
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
# Using osascript to run the AppleScript
|
|
69
|
+
process = subprocess.run(['osascript', '-e', applescript],
|
|
70
|
+
capture_output=True, text=True, check=False, timeout=1260) # check=False
|
|
71
|
+
|
|
72
|
+
if process.returncode != 0:
|
|
73
|
+
# Raise an error with details from AppleScript failure
|
|
74
|
+
raise ConversionError(f"AppleScript execution failed (Code {process.returncode}) for {key_path}. stderr: {process.stderr.strip()}")
|
|
75
|
+
|
|
76
|
+
# Final check if PDF exists after successful run
|
|
77
|
+
if not os.path.exists(pdf_path):
|
|
78
|
+
raise ConversionError(f"Conversion reported success but output PDF not found: {pdf_path}")
|
|
79
|
+
|
|
80
|
+
except subprocess.TimeoutExpired:
|
|
81
|
+
raise ConversionError(f"AppleScript execution timed out for {key_path}")
|
|
82
|
+
except Exception as e:
|
|
83
|
+
# Catch any other unexpected Python errors during subprocess handling
|
|
84
|
+
raise ConversionError(f"An unexpected error occurred during conversion process for {key_path}: {e}")
|
|
85
|
+
|
|
86
|
+
# --- Main Azure Processing Logic ---
|
|
87
|
+
|
|
88
|
+
def main():
|
|
89
|
+
# --- Configuration ---
|
|
90
|
+
try:
|
|
91
|
+
connection_string = os.environ["AZURE_STORAGE_CONNECTION_STRING"]
|
|
92
|
+
container_name = os.environ["AZURE_STORAGE_CONTAINER"]
|
|
93
|
+
except KeyError as e:
|
|
94
|
+
print(f"Error: Required environment variable {e} is not set.", file=sys.stderr)
|
|
95
|
+
sys.exit(1)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
print(f"Connecting to Azure Blob Storage container: {container_name}")
|
|
99
|
+
try:
|
|
100
|
+
blob_service_client = BlobServiceClient.from_connection_string(connection_string)
|
|
101
|
+
container_client = blob_service_client.get_container_client(container_name)
|
|
102
|
+
# Check if container exists by trying to get properties
|
|
103
|
+
container_client.get_container_properties()
|
|
104
|
+
except Exception as e:
|
|
105
|
+
print(f"Error connecting to or accessing container '{container_name}': {e}", file=sys.stderr)
|
|
106
|
+
sys.exit(1)
|
|
107
|
+
|
|
108
|
+
print("Listing blobs...")
|
|
109
|
+
all_blob_names = set()
|
|
110
|
+
try:
|
|
111
|
+
blob_list = container_client.list_blobs()
|
|
112
|
+
for blob in blob_list:
|
|
113
|
+
all_blob_names.add(blob.name)
|
|
114
|
+
print(f"Found {len(all_blob_names)} blobs in total.")
|
|
115
|
+
except Exception as e:
|
|
116
|
+
print(f"Error listing blobs in container '{container_name}': {e}", file=sys.stderr)
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
|
|
119
|
+
print("Starting processing...")
|
|
120
|
+
processed_count = 0
|
|
121
|
+
skipped_count = 0
|
|
122
|
+
error_count = 0
|
|
123
|
+
|
|
124
|
+
for blob_name in all_blob_names:
|
|
125
|
+
if blob_name.lower().endswith(".key"):
|
|
126
|
+
print(f"Found Keynote file: {blob_name}")
|
|
127
|
+
pdf_blob_name = Path(blob_name).with_suffix(".pdf").as_posix() # Generate corresponding PDF name
|
|
128
|
+
|
|
129
|
+
if pdf_blob_name in all_blob_names:
|
|
130
|
+
print(f" -> PDF version already exists: {pdf_blob_name}. Skipping.")
|
|
131
|
+
skipped_count += 1
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
print(f" -> PDF version not found. Attempting conversion.")
|
|
135
|
+
|
|
136
|
+
# Use a temporary directory for download and conversion
|
|
137
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
138
|
+
local_key_path = os.path.join(temp_dir, os.path.basename(blob_name))
|
|
139
|
+
local_pdf_path = Path(local_key_path).with_suffix(".pdf").as_posix()
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
# 1. Download .key file
|
|
143
|
+
print(f" Downloading {blob_name} to {local_key_path}...")
|
|
144
|
+
blob_client = container_client.get_blob_client(blob_name)
|
|
145
|
+
with open(local_key_path, "wb") as download_file:
|
|
146
|
+
download_stream = blob_client.download_blob()
|
|
147
|
+
download_file.write(download_stream.readall())
|
|
148
|
+
print(" Download complete.")
|
|
149
|
+
|
|
150
|
+
# 2. Convert .key to .pdf (using local function)
|
|
151
|
+
print(f" Converting {local_key_path} to {local_pdf_path}...")
|
|
152
|
+
try:
|
|
153
|
+
convert_key_to_pdf(local_key_path, local_pdf_path)
|
|
154
|
+
print(" Conversion successful.")
|
|
155
|
+
except (FileNotFoundError, ConversionError) as convert_err:
|
|
156
|
+
print(f" ERROR: Conversion failed for {local_key_path}: {convert_err}", file=sys.stderr)
|
|
157
|
+
error_count += 1
|
|
158
|
+
continue # Skip upload if conversion failed
|
|
159
|
+
|
|
160
|
+
# 3. Upload .pdf file
|
|
161
|
+
print(f" Uploading {local_pdf_path} to {pdf_blob_name}...")
|
|
162
|
+
pdf_blob_client = container_client.get_blob_client(pdf_blob_name)
|
|
163
|
+
with open(local_pdf_path, "rb") as upload_file:
|
|
164
|
+
pdf_blob_client.upload_blob(upload_file, overwrite=True, timeout=600) # Increased timeout to 10 mins
|
|
165
|
+
print(" Upload complete.")
|
|
166
|
+
processed_count += 1
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f" ERROR processing file {blob_name}: {e}", file=sys.stderr)
|
|
170
|
+
error_count += 1
|
|
171
|
+
|
|
172
|
+
# Temporary directory and its contents are automatically cleaned up here
|
|
173
|
+
|
|
174
|
+
print("-"*30)
|
|
175
|
+
print("Processing complete.")
|
|
176
|
+
print(f"Successfully converted and uploaded: {processed_count}")
|
|
177
|
+
print(f"Skipped (PDF already exists): {skipped_count}")
|
|
178
|
+
print(f"Errors encountered: {error_count}")
|
|
179
|
+
|
|
180
|
+
if __name__ == "__main__":
|
|
181
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
azure-storage-blob
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Test environment configuration for Azure tests
|
|
2
|
+
REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
|
|
3
|
+
AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
|
|
4
|
+
AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
|
|
5
|
+
NODE_ENV=test
|
|
6
|
+
PORT=7072 # Different port for testing
|
|
7
|
+
MARKITDOWN_CONVERT_URL= #cortex-markitdown url
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Test environment configuration for Azure tests
|
|
2
2
|
REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
|
|
3
3
|
AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
|
|
4
|
-
AZURE_STORAGE_CONTAINER_NAME=test-container
|
|
4
|
+
AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
|
|
5
5
|
NODE_ENV=test
|
|
6
6
|
PORT=7072 # Different port for testing
|
|
7
7
|
MARKITDOWN_CONVERT_URL= #cortex-markitdown url
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Test environment configuration for Azure tests
|
|
2
|
+
REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
|
|
3
|
+
STORAGE_EMULATOR_HOST=http://localhost:4443
|
|
4
|
+
GCP_SERVICE_ACCOUNT_KEY={"project_id":"test-project"}
|
|
5
|
+
GCS_BUCKETNAME=cortextempfiles
|
|
6
|
+
AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
|
|
7
|
+
AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
|
|
8
|
+
NODE_ENV=test
|
|
9
|
+
PORT=7072 # Different port for testing
|
|
10
|
+
MARKITDOWN_CONVERT_URL= #cortex-markitdown url
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# Test environment configuration for Azure tests
|
|
2
2
|
REDIS_CONNECTION_STRING=redis://default:redispw@localhost:32768
|
|
3
|
-
GCP_SERVICE_ACCOUNT_KEY={"project_id":"test-project"}
|
|
4
3
|
STORAGE_EMULATOR_HOST=http://localhost:4443
|
|
4
|
+
GCP_SERVICE_ACCOUNT_KEY={"project_id":"test-project"}
|
|
5
5
|
GCS_BUCKETNAME=cortextempfiles
|
|
6
6
|
AZURE_STORAGE_CONNECTION_STRING=UseDevelopmentStorage=true
|
|
7
|
-
AZURE_STORAGE_CONTAINER_NAME=test-container
|
|
7
|
+
AZURE_STORAGE_CONTAINER_NAME=default,test-container,test1,test2,test3,container1,container2,container3
|
|
8
8
|
NODE_ENV=test
|
|
9
9
|
PORT=7072 # Different port for testing
|
|
10
10
|
MARKITDOWN_CONVERT_URL= #cortex-markitdown url
|
|
@@ -43,6 +43,11 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
43
43
|
- `hash` (optional): Unique identifier for the file
|
|
44
44
|
- `checkHash` (optional): Check if hash exists
|
|
45
45
|
- `clearHash` (optional): Remove hash from storage
|
|
46
|
+
- `generateShortLived` (optional): Generate a short-lived URL for an existing hash
|
|
47
|
+
- Requires `hash` parameter
|
|
48
|
+
- Generates a new SAS token with short expiration time
|
|
49
|
+
- Returns a temporary URL for secure sharing
|
|
50
|
+
- `shortLivedMinutes` (optional): Duration in minutes for short-lived URLs (default: 5)
|
|
46
51
|
- `fetch`/`load`/`restore` (optional): URL to fetch remote file (these are aliases - any of the three parameters will trigger the same remote file processing behavior)
|
|
47
52
|
- Does not require `requestId`
|
|
48
53
|
- Uses Redis caching
|
|
@@ -73,6 +78,12 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
73
78
|
- Updates Redis timestamp on subsequent requests
|
|
74
79
|
- Truncates filenames longer than 200 characters
|
|
75
80
|
- Ensures correct file extension based on content type
|
|
81
|
+
- For checkHash (`checkHash=true`):
|
|
82
|
+
- Requires valid `hash` parameter
|
|
83
|
+
- Checks if file exists in storage and restores if needed
|
|
84
|
+
- Always generates new SAS token with short expiration (default: 5 minutes)
|
|
85
|
+
- Returns file information with temporary URL and expiration information
|
|
86
|
+
- Updates Redis timestamp
|
|
76
87
|
|
|
77
88
|
### DELETE
|
|
78
89
|
|
|
@@ -172,6 +183,36 @@ The Cortex File Handler is a service that processes files through various operat
|
|
|
172
183
|
- After successful processing
|
|
173
184
|
- On error conditions
|
|
174
185
|
|
|
186
|
+
## Usage Examples
|
|
187
|
+
|
|
188
|
+
### Check Hash (Always Returns Short-Lived URL)
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
# Check hash with 5-minute short-lived URL (default)
|
|
192
|
+
GET /file-handler?hash=abc123&checkHash=true
|
|
193
|
+
|
|
194
|
+
# Check hash with 10-minute short-lived URL
|
|
195
|
+
GET /file-handler?hash=abc123&checkHash=true&shortLivedMinutes=10
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**Response (always includes short-lived URL):**
|
|
199
|
+
```json
|
|
200
|
+
{
|
|
201
|
+
"message": "File 'document.pdf' uploaded successfully.",
|
|
202
|
+
"filename": "document.pdf",
|
|
203
|
+
"url": "https://storage.blob.core.windows.net/container/file.pdf?original-sas-token",
|
|
204
|
+
"gcs": "gs://bucket/file.pdf",
|
|
205
|
+
"hash": "abc123",
|
|
206
|
+
"shortLivedUrl": "https://storage.blob.core.windows.net/container/file.pdf?sv=2023-11-03&se=2024-01-15T10%3A15%3A00Z&sr=b&sp=r&sig=...",
|
|
207
|
+
"expiresInMinutes": 5,
|
|
208
|
+
"timestamp": "2024-01-15T10:10:00.000Z",
|
|
209
|
+
"converted": {
|
|
210
|
+
"url": "https://storage.blob.core.windows.net/container/converted.pdf",
|
|
211
|
+
"gcs": "gs://bucket/converted.pdf"
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
```
|
|
215
|
+
|
|
175
216
|
## Error Handling
|
|
176
217
|
|
|
177
218
|
- **400 Bad Request**:
|
|
@@ -1,25 +1,49 @@
|
|
|
1
1
|
import { BlobServiceClient } from "@azure/storage-blob";
|
|
2
2
|
|
|
3
|
-
async function
|
|
3
|
+
async function createContainers() {
|
|
4
4
|
try {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
// Check if required environment variables are set
|
|
6
|
+
const connectionString = process.env.AZURE_STORAGE_CONNECTION_STRING;
|
|
7
|
+
const containerNames = process.env.AZURE_STORAGE_CONTAINER_NAME;
|
|
8
|
+
|
|
9
|
+
if (!connectionString) {
|
|
10
|
+
throw new Error("AZURE_STORAGE_CONNECTION_STRING environment variable is required");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if (!containerNames) {
|
|
14
|
+
throw new Error("AZURE_STORAGE_CONTAINER_NAME environment variable is required");
|
|
15
|
+
}
|
|
10
16
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
const blobServiceClient = BlobServiceClient.fromConnectionString(connectionString);
|
|
18
|
+
|
|
19
|
+
// Parse container names from environment variable
|
|
20
|
+
const containers = containerNames.split(',').map(name => name.trim());
|
|
21
|
+
console.log(`Creating containers: ${containers.join(', ')}`);
|
|
22
|
+
|
|
23
|
+
// Create each container
|
|
24
|
+
for (const containerName of containers) {
|
|
25
|
+
if (!containerName) continue; // Skip empty names
|
|
26
|
+
|
|
27
|
+
try {
|
|
28
|
+
const containerClient = blobServiceClient.getContainerClient(containerName);
|
|
29
|
+
await containerClient.create();
|
|
30
|
+
console.log(`✅ Container '${containerName}' created successfully`);
|
|
31
|
+
} catch (error) {
|
|
32
|
+
// Ignore if container already exists
|
|
33
|
+
if (error.statusCode === 409) {
|
|
34
|
+
console.log(`✅ Container '${containerName}' already exists`);
|
|
35
|
+
} else {
|
|
36
|
+
console.error(`❌ Error creating container '${containerName}':`, error.message);
|
|
37
|
+
throw error;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
21
40
|
}
|
|
41
|
+
|
|
42
|
+
console.log("🎉 All containers setup completed");
|
|
43
|
+
} catch (error) {
|
|
44
|
+
console.error("❌ Container setup failed:", error.message);
|
|
45
|
+
process.exit(1);
|
|
22
46
|
}
|
|
23
47
|
}
|
|
24
48
|
|
|
25
|
-
|
|
49
|
+
createContainers();
|