@aj-archipelago/cortex 1.3.62 → 1.3.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/.github/workflows/cortex-file-handler-test.yml +61 -0
  2. package/README.md +31 -7
  3. package/config/default.example.json +15 -0
  4. package/config.js +133 -12
  5. package/helper-apps/cortex-autogen2/DigiCertGlobalRootCA.crt.pem +22 -0
  6. package/helper-apps/cortex-autogen2/Dockerfile +31 -0
  7. package/helper-apps/cortex-autogen2/Dockerfile.worker +41 -0
  8. package/helper-apps/cortex-autogen2/README.md +183 -0
  9. package/helper-apps/cortex-autogen2/__init__.py +1 -0
  10. package/helper-apps/cortex-autogen2/agents.py +131 -0
  11. package/helper-apps/cortex-autogen2/docker-compose.yml +20 -0
  12. package/helper-apps/cortex-autogen2/function_app.py +55 -0
  13. package/helper-apps/cortex-autogen2/host.json +15 -0
  14. package/helper-apps/cortex-autogen2/main.py +126 -0
  15. package/helper-apps/cortex-autogen2/poetry.lock +3652 -0
  16. package/helper-apps/cortex-autogen2/pyproject.toml +36 -0
  17. package/helper-apps/cortex-autogen2/requirements.txt +20 -0
  18. package/helper-apps/cortex-autogen2/send_task.py +105 -0
  19. package/helper-apps/cortex-autogen2/services/__init__.py +1 -0
  20. package/helper-apps/cortex-autogen2/services/azure_queue.py +85 -0
  21. package/helper-apps/cortex-autogen2/services/redis_publisher.py +153 -0
  22. package/helper-apps/cortex-autogen2/task_processor.py +488 -0
  23. package/helper-apps/cortex-autogen2/tools/__init__.py +24 -0
  24. package/helper-apps/cortex-autogen2/tools/azure_blob_tools.py +175 -0
  25. package/helper-apps/cortex-autogen2/tools/azure_foundry_agents.py +601 -0
  26. package/helper-apps/cortex-autogen2/tools/coding_tools.py +72 -0
  27. package/helper-apps/cortex-autogen2/tools/download_tools.py +48 -0
  28. package/helper-apps/cortex-autogen2/tools/file_tools.py +545 -0
  29. package/helper-apps/cortex-autogen2/tools/search_tools.py +646 -0
  30. package/helper-apps/cortex-azure-cleaner/README.md +36 -0
  31. package/helper-apps/cortex-file-converter/README.md +93 -0
  32. package/helper-apps/cortex-file-converter/key_to_pdf.py +104 -0
  33. package/helper-apps/cortex-file-converter/list_blob_extensions.py +89 -0
  34. package/helper-apps/cortex-file-converter/process_azure_keynotes.py +181 -0
  35. package/helper-apps/cortex-file-converter/requirements.txt +1 -0
  36. package/helper-apps/cortex-file-handler/.env.test.azure.ci +7 -0
  37. package/helper-apps/cortex-file-handler/.env.test.azure.sample +1 -1
  38. package/helper-apps/cortex-file-handler/.env.test.gcs.ci +10 -0
  39. package/helper-apps/cortex-file-handler/.env.test.gcs.sample +2 -2
  40. package/helper-apps/cortex-file-handler/INTERFACE.md +41 -0
  41. package/helper-apps/cortex-file-handler/package.json +1 -1
  42. package/helper-apps/cortex-file-handler/scripts/setup-azure-container.js +41 -17
  43. package/helper-apps/cortex-file-handler/scripts/setup-test-containers.js +30 -15
  44. package/helper-apps/cortex-file-handler/scripts/test-azure.sh +32 -6
  45. package/helper-apps/cortex-file-handler/scripts/test-gcs.sh +24 -2
  46. package/helper-apps/cortex-file-handler/scripts/validate-env.js +128 -0
  47. package/helper-apps/cortex-file-handler/src/blobHandler.js +161 -51
  48. package/helper-apps/cortex-file-handler/src/constants.js +3 -0
  49. package/helper-apps/cortex-file-handler/src/fileChunker.js +10 -8
  50. package/helper-apps/cortex-file-handler/src/index.js +116 -9
  51. package/helper-apps/cortex-file-handler/src/redis.js +61 -1
  52. package/helper-apps/cortex-file-handler/src/services/ConversionService.js +11 -8
  53. package/helper-apps/cortex-file-handler/src/services/FileConversionService.js +2 -2
  54. package/helper-apps/cortex-file-handler/src/services/storage/AzureStorageProvider.js +88 -6
  55. package/helper-apps/cortex-file-handler/src/services/storage/GCSStorageProvider.js +58 -0
  56. package/helper-apps/cortex-file-handler/src/services/storage/StorageFactory.js +25 -5
  57. package/helper-apps/cortex-file-handler/src/services/storage/StorageProvider.js +9 -0
  58. package/helper-apps/cortex-file-handler/src/services/storage/StorageService.js +120 -16
  59. package/helper-apps/cortex-file-handler/src/start.js +27 -17
  60. package/helper-apps/cortex-file-handler/tests/FileConversionService.test.js +52 -1
  61. package/helper-apps/cortex-file-handler/tests/blobHandler.test.js +40 -0
  62. package/helper-apps/cortex-file-handler/tests/checkHashShortLived.test.js +553 -0
  63. package/helper-apps/cortex-file-handler/tests/cleanup.test.js +46 -52
  64. package/helper-apps/cortex-file-handler/tests/containerConversionFlow.test.js +451 -0
  65. package/helper-apps/cortex-file-handler/tests/containerNameParsing.test.js +229 -0
  66. package/helper-apps/cortex-file-handler/tests/containerParameterFlow.test.js +392 -0
  67. package/helper-apps/cortex-file-handler/tests/conversionResilience.test.js +7 -2
  68. package/helper-apps/cortex-file-handler/tests/deleteOperations.test.js +348 -0
  69. package/helper-apps/cortex-file-handler/tests/fileChunker.test.js +23 -2
  70. package/helper-apps/cortex-file-handler/tests/fileUpload.test.js +11 -5
  71. package/helper-apps/cortex-file-handler/tests/getOperations.test.js +58 -24
  72. package/helper-apps/cortex-file-handler/tests/postOperations.test.js +11 -4
  73. package/helper-apps/cortex-file-handler/tests/shortLivedUrlConversion.test.js +225 -0
  74. package/helper-apps/cortex-file-handler/tests/start.test.js +8 -12
  75. package/helper-apps/cortex-file-handler/tests/storage/StorageFactory.test.js +80 -0
  76. package/helper-apps/cortex-file-handler/tests/storage/StorageService.test.js +388 -22
  77. package/helper-apps/cortex-file-handler/tests/testUtils.helper.js +74 -0
  78. package/lib/cortexResponse.js +153 -0
  79. package/lib/entityConstants.js +21 -3
  80. package/lib/logger.js +21 -4
  81. package/lib/pathwayTools.js +28 -9
  82. package/lib/util.js +49 -0
  83. package/package.json +1 -1
  84. package/pathways/basePathway.js +1 -0
  85. package/pathways/bing_afagent.js +54 -1
  86. package/pathways/call_tools.js +2 -3
  87. package/pathways/chat_jarvis.js +1 -1
  88. package/pathways/google_cse.js +27 -0
  89. package/pathways/grok_live_search.js +18 -0
  90. package/pathways/system/entity/memory/sys_memory_lookup_required.js +1 -0
  91. package/pathways/system/entity/memory/sys_memory_required.js +1 -0
  92. package/pathways/system/entity/memory/sys_search_memory.js +1 -0
  93. package/pathways/system/entity/sys_entity_agent.js +56 -4
  94. package/pathways/system/entity/sys_generator_quick.js +1 -0
  95. package/pathways/system/entity/tools/sys_tool_bing_search_afagent.js +26 -0
  96. package/pathways/system/entity/tools/sys_tool_google_search.js +141 -0
  97. package/pathways/system/entity/tools/sys_tool_grok_x_search.js +237 -0
  98. package/pathways/system/entity/tools/sys_tool_image.js +1 -1
  99. package/pathways/system/rest_streaming/sys_claude_37_sonnet.js +21 -0
  100. package/pathways/system/rest_streaming/sys_claude_41_opus.js +21 -0
  101. package/pathways/system/rest_streaming/sys_claude_4_sonnet.js +21 -0
  102. package/pathways/system/rest_streaming/sys_google_gemini_25_flash.js +25 -0
  103. package/pathways/system/rest_streaming/{sys_google_gemini_chat.js → sys_google_gemini_25_pro.js} +6 -4
  104. package/pathways/system/rest_streaming/sys_grok_4.js +23 -0
  105. package/pathways/system/rest_streaming/sys_grok_4_fast_non_reasoning.js +23 -0
  106. package/pathways/system/rest_streaming/sys_grok_4_fast_reasoning.js +23 -0
  107. package/pathways/system/rest_streaming/sys_openai_chat.js +3 -0
  108. package/pathways/system/rest_streaming/sys_openai_chat_gpt41.js +22 -0
  109. package/pathways/system/rest_streaming/sys_openai_chat_gpt41_mini.js +21 -0
  110. package/pathways/system/rest_streaming/sys_openai_chat_gpt41_nano.js +21 -0
  111. package/pathways/system/rest_streaming/{sys_claude_35_sonnet.js → sys_openai_chat_gpt4_omni.js} +6 -4
  112. package/pathways/system/rest_streaming/sys_openai_chat_gpt4_omni_mini.js +21 -0
  113. package/pathways/system/rest_streaming/{sys_claude_3_haiku.js → sys_openai_chat_gpt5.js} +7 -5
  114. package/pathways/system/rest_streaming/sys_openai_chat_gpt5_chat.js +21 -0
  115. package/pathways/system/rest_streaming/sys_openai_chat_gpt5_mini.js +21 -0
  116. package/pathways/system/rest_streaming/sys_openai_chat_gpt5_nano.js +21 -0
  117. package/pathways/system/rest_streaming/{sys_openai_chat_o1.js → sys_openai_chat_o3.js} +6 -3
  118. package/pathways/system/rest_streaming/sys_openai_chat_o3_mini.js +3 -0
  119. package/pathways/system/workspaces/run_workspace_prompt.js +99 -0
  120. package/pathways/vision.js +1 -1
  121. package/server/graphql.js +1 -1
  122. package/server/modelExecutor.js +8 -0
  123. package/server/pathwayResolver.js +166 -16
  124. package/server/pathwayResponseParser.js +16 -8
  125. package/server/plugins/azureFoundryAgentsPlugin.js +1 -1
  126. package/server/plugins/claude3VertexPlugin.js +193 -45
  127. package/server/plugins/gemini15ChatPlugin.js +21 -0
  128. package/server/plugins/gemini15VisionPlugin.js +360 -0
  129. package/server/plugins/googleCsePlugin.js +94 -0
  130. package/server/plugins/grokVisionPlugin.js +365 -0
  131. package/server/plugins/modelPlugin.js +3 -1
  132. package/server/plugins/openAiChatPlugin.js +106 -13
  133. package/server/plugins/openAiVisionPlugin.js +42 -30
  134. package/server/resolver.js +28 -4
  135. package/server/rest.js +270 -53
  136. package/server/typeDef.js +1 -0
  137. package/tests/{mocks.js → helpers/mocks.js} +5 -2
  138. package/tests/{server.js → helpers/server.js} +2 -2
  139. package/tests/helpers/sseAssert.js +23 -0
  140. package/tests/helpers/sseClient.js +73 -0
  141. package/tests/helpers/subscriptionAssert.js +11 -0
  142. package/tests/helpers/subscriptions.js +113 -0
  143. package/tests/{sublong.srt → integration/features/translate/sublong.srt} +4543 -4543
  144. package/tests/integration/features/translate/translate_chunking_stream.test.js +100 -0
  145. package/tests/{translate_srt.test.js → integration/features/translate/translate_srt.test.js} +2 -2
  146. package/tests/integration/graphql/async/stream/agentic.test.js +477 -0
  147. package/tests/integration/graphql/async/stream/subscription_streaming.test.js +62 -0
  148. package/tests/integration/graphql/async/stream/sys_entity_start_streaming.test.js +71 -0
  149. package/tests/integration/graphql/async/stream/vendors/claude_streaming.test.js +56 -0
  150. package/tests/integration/graphql/async/stream/vendors/gemini_streaming.test.js +66 -0
  151. package/tests/integration/graphql/async/stream/vendors/grok_streaming.test.js +56 -0
  152. package/tests/integration/graphql/async/stream/vendors/openai_streaming.test.js +72 -0
  153. package/tests/integration/graphql/features/google/sysToolGoogleSearch.test.js +96 -0
  154. package/tests/integration/graphql/features/grok/grok.test.js +688 -0
  155. package/tests/integration/graphql/features/grok/grok_x_search_tool.test.js +354 -0
  156. package/tests/{main.test.js → integration/graphql/features/main.test.js} +1 -1
  157. package/tests/{call_tools.test.js → integration/graphql/features/tools/call_tools.test.js} +2 -2
  158. package/tests/{vision.test.js → integration/graphql/features/vision/vision.test.js} +1 -1
  159. package/tests/integration/graphql/subscriptions/connection.test.js +26 -0
  160. package/tests/{openai_api.test.js → integration/rest/oai/openai_api.test.js} +63 -238
  161. package/tests/integration/rest/oai/tool_calling_api.test.js +343 -0
  162. package/tests/integration/rest/oai/tool_calling_streaming.test.js +85 -0
  163. package/tests/integration/rest/vendors/claude_streaming.test.js +47 -0
  164. package/tests/integration/rest/vendors/claude_tool_calling_streaming.test.js +75 -0
  165. package/tests/integration/rest/vendors/gemini_streaming.test.js +47 -0
  166. package/tests/integration/rest/vendors/gemini_tool_calling_streaming.test.js +75 -0
  167. package/tests/integration/rest/vendors/grok_streaming.test.js +55 -0
  168. package/tests/integration/rest/vendors/grok_tool_calling_streaming.test.js +75 -0
  169. package/tests/{azureAuthTokenHelper.test.js → unit/core/azureAuthTokenHelper.test.js} +1 -1
  170. package/tests/{chunkfunction.test.js → unit/core/chunkfunction.test.js} +2 -2
  171. package/tests/{config.test.js → unit/core/config.test.js} +3 -3
  172. package/tests/{encodeCache.test.js → unit/core/encodeCache.test.js} +1 -1
  173. package/tests/{fastLruCache.test.js → unit/core/fastLruCache.test.js} +1 -1
  174. package/tests/{handleBars.test.js → unit/core/handleBars.test.js} +1 -1
  175. package/tests/{memoryfunction.test.js → unit/core/memoryfunction.test.js} +2 -2
  176. package/tests/unit/core/mergeResolver.test.js +952 -0
  177. package/tests/{parser.test.js → unit/core/parser.test.js} +3 -3
  178. package/tests/unit/core/pathwayResolver.test.js +187 -0
  179. package/tests/{requestMonitor.test.js → unit/core/requestMonitor.test.js} +1 -1
  180. package/tests/{requestMonitorDurationEstimator.test.js → unit/core/requestMonitorDurationEstimator.test.js} +1 -1
  181. package/tests/{truncateMessages.test.js → unit/core/truncateMessages.test.js} +3 -3
  182. package/tests/{util.test.js → unit/core/util.test.js} +1 -1
  183. package/tests/{apptekTranslatePlugin.test.js → unit/plugins/apptekTranslatePlugin.test.js} +3 -3
  184. package/tests/{azureFoundryAgents.test.js → unit/plugins/azureFoundryAgents.test.js} +136 -1
  185. package/tests/{claude3VertexPlugin.test.js → unit/plugins/claude3VertexPlugin.test.js} +32 -10
  186. package/tests/{claude3VertexToolConversion.test.js → unit/plugins/claude3VertexToolConversion.test.js} +3 -3
  187. package/tests/unit/plugins/googleCsePlugin.test.js +111 -0
  188. package/tests/unit/plugins/grokVisionPlugin.test.js +1392 -0
  189. package/tests/{modelPlugin.test.js → unit/plugins/modelPlugin.test.js} +3 -3
  190. package/tests/{multimodal_conversion.test.js → unit/plugins/multimodal_conversion.test.js} +4 -4
  191. package/tests/{openAiChatPlugin.test.js → unit/plugins/openAiChatPlugin.test.js} +13 -4
  192. package/tests/{openAiToolPlugin.test.js → unit/plugins/openAiToolPlugin.test.js} +35 -27
  193. package/tests/{tokenHandlingTests.test.js → unit/plugins/tokenHandlingTests.test.js} +5 -5
  194. package/tests/{translate_apptek.test.js → unit/plugins/translate_apptek.test.js} +3 -3
  195. package/tests/{streaming.test.js → unit/plugins.streaming/plugin_stream_events.test.js} +19 -58
  196. package/helper-apps/mogrt-handler/tests/test-files/test.gif +0 -1
  197. package/helper-apps/mogrt-handler/tests/test-files/test.mogrt +0 -1
  198. package/helper-apps/mogrt-handler/tests/test-files/test.mp4 +0 -1
  199. package/pathways/system/rest_streaming/sys_openai_chat_gpt4.js +0 -19
  200. package/pathways/system/rest_streaming/sys_openai_chat_gpt4_32.js +0 -19
  201. package/pathways/system/rest_streaming/sys_openai_chat_gpt4_turbo.js +0 -19
  202. package/pathways/system/workspaces/run_claude35_sonnet.js +0 -21
  203. package/pathways/system/workspaces/run_claude3_haiku.js +0 -20
  204. package/pathways/system/workspaces/run_gpt35turbo.js +0 -20
  205. package/pathways/system/workspaces/run_gpt4.js +0 -20
  206. package/pathways/system/workspaces/run_gpt4_32.js +0 -20
  207. package/tests/agentic.test.js +0 -256
  208. package/tests/pathwayResolver.test.js +0 -78
  209. package/tests/subscription.test.js +0 -387
  210. /package/tests/{subchunk.srt → integration/features/translate/subchunk.srt} +0 -0
  211. /package/tests/{subhorizontal.srt → integration/features/translate/subhorizontal.srt} +0 -0
@@ -0,0 +1,48 @@
1
+ """
2
+ File download tools.
3
+ """
4
+ import requests
5
+ import os
6
+ import mimetypes
7
+ from urllib.parse import urlparse
8
+
9
+ def download_file(url: str, filename: str = None) -> str:
10
+ """
11
+ Downloads a file from a URL and saves it to the current working directory.
12
+
13
+ Args:
14
+ url: The URL of the file to download.
15
+ filename: The desired filename. If not provided, it will be inferred from the URL.
16
+
17
+ Returns:
18
+ A success or error message string.
19
+ """
20
+ try:
21
+ response = requests.get(url, stream=True, timeout=30)
22
+ response.raise_for_status()
23
+
24
+ if not filename:
25
+ parsed_url = urlparse(url)
26
+ filename = os.path.basename(parsed_url.path)
27
+ if not filename:
28
+ # Guess filename from content type
29
+ content_type = response.headers.get('content-type')
30
+ if content_type:
31
+ extension = mimetypes.guess_extension(content_type)
32
+ if extension:
33
+ filename = f"downloaded_file{extension}"
34
+ if not filename:
35
+ filename = "downloaded_file"
36
+
37
+ filepath = os.path.join(os.getcwd(), filename)
38
+
39
+ with open(filepath, 'wb') as f:
40
+ for chunk in response.iter_content(chunk_size=8192):
41
+ f.write(chunk)
42
+
43
+ return f"Successfully downloaded '{url}' and saved as '{filename}'"
44
+
45
+ except requests.exceptions.RequestException as e:
46
+ return f"Error downloading file: {e}"
47
+ except Exception as e:
48
+ return f"An unexpected error occurred: {e}"
@@ -0,0 +1,545 @@
1
+ """
2
+ Universal File Tools for Cortex-AutoGen2
3
+ Enhanced file handling system that works across all file types.
4
+ """
5
+
6
+ import asyncio
7
+ import logging
8
+ import os
9
+ import json
10
+ import mimetypes
11
+ from pathlib import Path
12
+ from typing import List, Optional, Dict, Any, Union
13
+ from autogen_core.tools import FunctionTool
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class UniversalFileHandler:
19
+ """Universal file handler that intelligently processes any file type."""
20
+
21
+ @staticmethod
22
+ def detect_file_type(file_path: str) -> Dict[str, Any]:
23
+ """
24
+ Intelligently detect file type and characteristics.
25
+
26
+ Args:
27
+ file_path: Path to the file
28
+
29
+ Returns:
30
+ Dictionary with file type information
31
+ """
32
+ if not os.path.exists(file_path):
33
+ return {"error": "File not found", "exists": False}
34
+
35
+ filename = os.path.basename(file_path)
36
+ extension = os.path.splitext(filename)[1].lower()
37
+ size = os.path.getsize(file_path)
38
+
39
+ # MIME type detection
40
+ mime_type, _ = mimetypes.guess_type(file_path)
41
+
42
+ # Category classification
43
+ categories = {
44
+ "document": [".pdf", ".doc", ".docx", ".txt", ".md", ".rtf"],
45
+ "spreadsheet": [".xls", ".xlsx", ".csv", ".tsv"],
46
+ "presentation": [".ppt", ".pptx"],
47
+ "image": [".png", ".jpg", ".jpeg", ".gif", ".bmp", ".svg", ".webp"],
48
+ "data": [".json", ".xml", ".yaml", ".yml"],
49
+ "code": [".py", ".js", ".html", ".css", ".sql", ".r"],
50
+ "archive": [".zip", ".tar", ".gz", ".rar", ".7z"],
51
+ "media": [".mp4", ".mp3", ".avi", ".mov", ".wav"]
52
+ }
53
+
54
+ category = "unknown"
55
+ for cat, extensions in categories.items():
56
+ if extension in extensions:
57
+ category = cat
58
+ break
59
+
60
+ return {
61
+ "filename": filename,
62
+ "extension": extension,
63
+ "size": size,
64
+ "mime_type": mime_type,
65
+ "category": category,
66
+ "exists": True,
67
+ "is_deliverable": not filename.startswith(("tmp_", "temp_", ".")),
68
+ "is_readable": category in ["document", "data", "code"] or extension in [".txt", ".csv", ".json"],
69
+ "description": _get_file_description(extension, category)
70
+ }
71
+
72
+ @staticmethod
73
+ def read_file_intelligently(file_path: str, max_length: int = 5000) -> Dict[str, Any]:
74
+ """
75
+ Intelligently read file content based on file type.
76
+
77
+ Args:
78
+ file_path: Path to the file
79
+ max_length: Maximum characters to read for text files
80
+
81
+ Returns:
82
+ Dictionary with file content and metadata
83
+ """
84
+ file_info = UniversalFileHandler.detect_file_type(file_path)
85
+
86
+ if not file_info["exists"]:
87
+ return file_info
88
+
89
+ try:
90
+ content_info = {
91
+ **file_info,
92
+ "content": None,
93
+ "preview": None,
94
+ "metadata": {}
95
+ }
96
+
97
+ if file_info["category"] == "image":
98
+ content_info["preview"] = f"Image file: {file_info['filename']} ({file_info['size']} bytes)"
99
+ content_info["metadata"]["viewable"] = True
100
+
101
+ elif file_info["category"] == "document" and file_info["extension"] == ".pdf":
102
+ content_info["preview"] = f"PDF document: {file_info['filename']} ({file_info['size']} bytes)"
103
+ content_info["metadata"]["pages"] = "Unknown"
104
+ content_info["metadata"]["viewable"] = True
105
+
106
+ elif file_info["is_readable"]:
107
+ # Try to read as text
108
+ encodings = ['utf-8', 'utf-16', 'latin-1', 'cp1252']
109
+ content = None
110
+
111
+ for encoding in encodings:
112
+ try:
113
+ with open(file_path, 'r', encoding=encoding) as f:
114
+ content = f.read(max_length)
115
+ break
116
+ except (UnicodeDecodeError, UnicodeError):
117
+ continue
118
+
119
+ if content is not None:
120
+ content_info["content"] = content
121
+ content_info["preview"] = content[:500] + "..." if len(content) > 500 else content
122
+ content_info["metadata"]["encoding"] = encoding
123
+ content_info["metadata"]["truncated"] = len(content) >= max_length
124
+
125
+ # Special handling for structured data
126
+ if file_info["extension"] == ".json":
127
+ try:
128
+ json_data = json.loads(content)
129
+ content_info["metadata"]["json_valid"] = True
130
+ content_info["metadata"]["json_type"] = type(json_data).__name__
131
+ if isinstance(json_data, list):
132
+ content_info["metadata"]["json_length"] = len(json_data)
133
+ except json.JSONDecodeError:
134
+ content_info["metadata"]["json_valid"] = False
135
+
136
+ elif file_info["extension"] == ".csv":
137
+ lines = content.split('\n')
138
+ content_info["metadata"]["csv_rows"] = len(lines)
139
+ content_info["metadata"]["csv_columns"] = len(lines[0].split(',')) if lines else 0
140
+ else:
141
+ content_info["preview"] = "Binary file - cannot preview as text"
142
+ else:
143
+ content_info["preview"] = f"Binary file: {file_info['filename']} ({file_info['size']} bytes)"
144
+ content_info["metadata"]["binary"] = True
145
+
146
+ return content_info
147
+
148
+ except Exception as e:
149
+ return {
150
+ **file_info,
151
+ "error": f"Error reading file: {str(e)}",
152
+ "content": None,
153
+ "preview": None
154
+ }
155
+
156
+
157
+ def _get_file_description(extension: str, category: str) -> str:
158
+ """Get a human-readable description of the file type."""
159
+ descriptions = {
160
+ ".pdf": "PDF Document",
161
+ ".txt": "Text File",
162
+ ".csv": "CSV Data File",
163
+ ".json": "JSON Data File",
164
+ ".png": "PNG Image",
165
+ ".jpg": "JPEG Image",
166
+ ".jpeg": "JPEG Image",
167
+ ".gif": "GIF Image",
168
+ ".svg": "SVG Vector Image",
169
+ ".html": "HTML Document",
170
+ ".xml": "XML Document",
171
+ ".docx": "Word Document",
172
+ ".xlsx": "Excel Spreadsheet",
173
+ ".pptx": "PowerPoint Presentation",
174
+ ".zip": "ZIP Archive",
175
+ ".py": "Python Script",
176
+ ".js": "JavaScript File",
177
+ ".css": "CSS Stylesheet",
178
+ ".md": "Markdown Document",
179
+ ".sql": "SQL Script",
180
+ ".yaml": "YAML Configuration",
181
+ ".yml": "YAML Configuration"
182
+ }
183
+
184
+ return descriptions.get(extension, f"{category.title()} File" if category != "unknown" else "Unknown File Type")
185
+
186
+
187
+ async def download_image(url: str, filename: str, work_dir: Optional[str] = None) -> str:
188
+ """
189
+ Downloads an image from a URL and saves it to the working directory.
190
+
191
+ Args:
192
+ url: The URL of the image to download.
193
+ filename: The local filename to save the image as.
194
+ work_dir: Optional working directory path.
195
+
196
+ Returns:
197
+ A JSON string indicating success or failure.
198
+ """
199
+ if not work_dir:
200
+ work_dir = os.getcwd()
201
+
202
+ file_path = os.path.join(work_dir, filename)
203
+
204
+ try:
205
+ import requests
206
+ BROWSER_UA = (
207
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
208
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
209
+ "Chrome/125.0.0.0 Safari/537.36"
210
+ )
211
+ session = requests.Session()
212
+ session.headers.update({"User-Agent": BROWSER_UA})
213
+
214
+ with session.get(url, stream=True, timeout=20, allow_redirects=True) as response:
215
+ response.raise_for_status()
216
+
217
+ content_type = (response.headers.get("Content-Type") or "").lower()
218
+
219
+ # Peek first few bytes to validate image magic if header is missing/misleading
220
+ first_chunk = next(response.iter_content(chunk_size=4096), b"")
221
+
222
+ def looks_like_image(buf: bytes) -> bool:
223
+ if not buf or len(buf) < 4:
224
+ return False
225
+ sigs = [
226
+ b"\x89PNG\r\n\x1a\n", # PNG
227
+ b"\xff\xd8\xff", # JPEG
228
+ b"GIF87a", b"GIF89a", # GIF
229
+ b"RIFF" # WEBP starts with RIFF
230
+ ]
231
+ return any(buf.startswith(sig) for sig in sigs)
232
+
233
+ if not (content_type.startswith("image/") or looks_like_image(first_chunk)):
234
+ logger.error(f"❌ URL did not return an image content-type: {content_type} for {url}")
235
+ return json.dumps({"status": "error", "message": f"Non-image content-type: {content_type}"})
236
+
237
+ # Write first chunk then stream the rest
238
+ with open(file_path, 'wb') as f:
239
+ if first_chunk:
240
+ f.write(first_chunk)
241
+ for chunk in response.iter_content(chunk_size=8192):
242
+ if chunk:
243
+ f.write(chunk)
244
+
245
+ logger.info(f"✅ Successfully downloaded image from {url} to {file_path}")
246
+ return json.dumps({"status": "success", "file_path": file_path})
247
+ except Exception as e:
248
+ logger.error(f"❌ Failed to download image from {url}: {e}")
249
+ return json.dumps({"status": "error", "message": str(e)})
250
+
251
+
252
+ # Enhanced file tools
253
+ async def list_files_in_work_dir(work_dir: Optional[str] = None) -> str:
254
+ """
255
+ Intelligently list and categorize all files in the working directory.
256
+
257
+ Args:
258
+ work_dir: Optional working directory path
259
+
260
+ Returns:
261
+ String containing categorized file listing with metadata
262
+ """
263
+ if not work_dir or not os.path.exists(work_dir):
264
+ return "❌ Working directory not found or not specified"
265
+
266
+ try:
267
+ all_files = []
268
+ categories = {
269
+ "deliverable": [],
270
+ "temporary": [],
271
+ "data": [],
272
+ "documents": [],
273
+ "images": [],
274
+ "code": [],
275
+ "other": []
276
+ }
277
+
278
+ for item in os.listdir(work_dir):
279
+ item_path = os.path.join(work_dir, item)
280
+ if os.path.isfile(item_path):
281
+ file_info = UniversalFileHandler.detect_file_type(item_path)
282
+ all_files.append(file_info)
283
+
284
+ # Categorize for display
285
+ if not file_info["is_deliverable"]:
286
+ categories["temporary"].append(file_info)
287
+ elif file_info["category"] == "document":
288
+ categories["documents"].append(file_info)
289
+ elif file_info["category"] == "image":
290
+ categories["images"].append(file_info)
291
+ elif file_info["category"] == "data":
292
+ categories["data"].append(file_info)
293
+ elif file_info["category"] == "code":
294
+ categories["code"].append(file_info)
295
+ else:
296
+ categories["deliverable"].append(file_info)
297
+
298
+ if not all_files:
299
+ return f"📁 No files found in working directory: {work_dir}"
300
+
301
+ # Sort each category by size (largest first)
302
+ for category in categories.values():
303
+ category.sort(key=lambda x: x["size"], reverse=True)
304
+
305
+ result = f"📁 **UNIVERSAL FILE DISCOVERY**\n"
306
+ result += f"Directory: {work_dir}\n"
307
+ result += f"Total Files: {len(all_files)}\n\n"
308
+
309
+ # Show deliverable files first
310
+ deliverable_count = sum(len(cat) for cat_name, cat in categories.items() if cat_name != "temporary")
311
+
312
+ if deliverable_count > 0:
313
+ result += f"🎯 **DELIVERABLE FILES** ({deliverable_count} files):\n\n"
314
+
315
+ if categories["documents"]:
316
+ result += f"📄 **Documents** ({len(categories['documents'])}):\n"
317
+ for file_info in categories["documents"]:
318
+ result += f" ✅ {file_info['filename']} ({file_info['size']} bytes) - {file_info['description']}\n"
319
+ result += "\n"
320
+
321
+ if categories["images"]:
322
+ result += f"🖼️ **Images** ({len(categories['images'])}):\n"
323
+ for file_info in categories["images"]:
324
+ result += f" ✅ {file_info['filename']} ({file_info['size']} bytes) - {file_info['description']}\n"
325
+ result += "\n"
326
+
327
+ if categories["data"]:
328
+ result += f"📊 **Data Files** ({len(categories['data'])}):\n"
329
+ for file_info in categories["data"]:
330
+ result += f" ✅ {file_info['filename']} ({file_info['size']} bytes) - {file_info['description']}\n"
331
+ result += "\n"
332
+
333
+ if categories["deliverable"]:
334
+ result += f"📦 **Other Deliverables** ({len(categories['deliverable'])}):\n"
335
+ for file_info in categories["deliverable"]:
336
+ result += f" ✅ {file_info['filename']} ({file_info['size']} bytes) - {file_info['description']}\n"
337
+ result += "\n"
338
+
339
+ if categories["temporary"]:
340
+ result += f"🗂️ **Temporary Files** ({len(categories['temporary'])}):\n"
341
+ for file_info in categories["temporary"]:
342
+ result += f" - {file_info['filename']} ({file_info['size']} bytes)\n"
343
+ result += "\n"
344
+
345
+ if categories["code"]:
346
+ result += f"💻 **Code Files** ({len(categories['code'])}):\n"
347
+ for file_info in categories["code"]:
348
+ result += f" 📝 {file_info['filename']} ({file_info['size']} bytes) - {file_info['description']}\n"
349
+
350
+ # Add recommendations
351
+ if deliverable_count > 0:
352
+ result += f"\n🚀 **NEXT ACTIONS**:\n"
353
+ result += f"1. Read and analyze deliverable files\n"
354
+ result += f"2. Upload all deliverables to Azure\n"
355
+ result += f"3. Provide download links to user\n"
356
+ else:
357
+ result += f"\n⚠️ **WARNING**: No deliverable files found!\n"
358
+ result += f"Check if the task was completed successfully.\n"
359
+
360
+ return result
361
+
362
+ except Exception as e:
363
+ return f"❌ Error listing files: {str(e)}"
364
+
365
+ async def create_file(filename: str, content: str, work_dir: Optional[str] = None) -> str:
366
+ """
367
+ Creates a new file with the given content in the specified working directory.
368
+
369
+ Args:
370
+ filename: The name of the file to create.
371
+ content: The content to write to the file.
372
+ work_dir: Optional working directory path. If not provided, uses the current directory.
373
+
374
+ Returns:
375
+ A JSON string confirming the file creation or an error message.
376
+ """
377
+ dir_path = work_dir or os.getcwd()
378
+ file_path = os.path.join(dir_path, filename)
379
+
380
+ try:
381
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
382
+ with open(file_path, "w", encoding="utf-8") as f:
383
+ f.write(content)
384
+ return json.dumps({"status": "success", "message": f"File '{filename}' created successfully in '{dir_path}'."})
385
+ except Exception as e:
386
+ return json.dumps({"status": "error", "message": str(e)})
387
+
388
+ async def read_file_from_work_dir(filename: str, work_dir: Optional[str] = None, max_length: int = 5000) -> str:
389
+ """
390
+ Intelligently read and analyze any file type.
391
+
392
+ Args:
393
+ filename: Name of the file to read
394
+ work_dir: Optional working directory path
395
+ max_length: Maximum characters to read for text files
396
+
397
+ Returns:
398
+ String containing file analysis and content
399
+ """
400
+ if not work_dir or not os.path.exists(work_dir):
401
+ return "❌ Working directory not found or not specified"
402
+
403
+ file_path = os.path.join(work_dir, filename)
404
+
405
+ if not os.path.exists(file_path):
406
+ return f"❌ File not found: {filename}"
407
+
408
+ try:
409
+ content_info = UniversalFileHandler.read_file_intelligently(file_path, max_length)
410
+
411
+ if "error" in content_info:
412
+ return f"❌ {content_info['error']}"
413
+
414
+ result = f"📄 **UNIVERSAL FILE ANALYSIS: {filename}**\n"
415
+ result += f"Type: {content_info['description']}\n"
416
+ result += f"Category: {content_info['category'].title()}\n"
417
+ result += f"Size: {content_info['size']} bytes\n"
418
+ result += f"MIME Type: {content_info['mime_type'] or 'Unknown'}\n"
419
+ result += f"Deliverable: {'✅ Yes' if content_info['is_deliverable'] else '❌ No'}\n"
420
+
421
+ if content_info["metadata"]:
422
+ result += f"\n📊 **Metadata**:\n"
423
+ for key, value in content_info["metadata"].items():
424
+ result += f" {key}: {value}\n"
425
+
426
+ if content_info["preview"]:
427
+ result += f"\n📝 **Content Preview**:\n"
428
+ if content_info["content"] and content_info["is_readable"]:
429
+ result += f"```\n{content_info['preview']}\n```\n"
430
+
431
+ if content_info["metadata"].get("truncated"):
432
+ result += f"\n📏 **Note**: Content truncated at {max_length} characters\n"
433
+ else:
434
+ result += f"{content_info['preview']}\n"
435
+
436
+ # Add recommendations
437
+ result += f"\n💡 **Recommendations**:\n"
438
+ if content_info["is_deliverable"]:
439
+ result += f"✅ Upload this file to Azure for user download\n"
440
+ if content_info["category"] == "document":
441
+ result += f"📋 This appears to be a document - perfect for user delivery\n"
442
+ elif content_info["category"] == "image":
443
+ result += f"🖼️ This is an image file - should be viewable after upload\n"
444
+ elif content_info["category"] == "data":
445
+ result += f"📊 This contains data - useful for analysis or download\n"
446
+ else:
447
+ result += f"🗂️ This appears to be a temporary file\n"
448
+
449
+ return result
450
+
451
+ except Exception as e:
452
+ return f"❌ Error reading file {filename}: {str(e)}"
453
+
454
+
455
+ async def get_file_info(filename: str, work_dir: Optional[str] = None) -> str:
456
+ """
457
+ Get comprehensive metadata about any file type.
458
+
459
+ Args:
460
+ filename: Name of the file to analyze
461
+ work_dir: Optional working directory path
462
+
463
+ Returns:
464
+ String containing detailed file metadata
465
+ """
466
+ if not work_dir or not os.path.exists(work_dir):
467
+ return "❌ Working directory not found or not specified"
468
+
469
+ file_path = os.path.join(work_dir, filename)
470
+
471
+ if not os.path.exists(file_path):
472
+ return f"❌ File not found: {filename}"
473
+
474
+ try:
475
+ import time
476
+ stat = os.stat(file_path)
477
+ file_info = UniversalFileHandler.detect_file_type(file_path)
478
+
479
+ result = f"📊 **COMPREHENSIVE FILE METADATA: {filename}**\n"
480
+ result += f"Full Path: {file_path}\n"
481
+ result += f"Size: {stat.st_size} bytes ({stat.st_size / 1024:.1f} KB)\n"
482
+ result += f"Created: {time.ctime(stat.st_ctime)}\n"
483
+ result += f"Modified: {time.ctime(stat.st_mtime)}\n"
484
+ result += f"Extension: {file_info['extension'] or 'None'}\n"
485
+ result += f"MIME Type: {file_info['mime_type'] or 'Unknown'}\n"
486
+ result += f"Category: {file_info['category'].title()}\n"
487
+ result += f"Description: {file_info['description']}\n"
488
+ result += f"Readable: {'✅ Yes' if file_info['is_readable'] else '❌ No'}\n"
489
+ result += f"Deliverable: {'✅ Yes' if file_info['is_deliverable'] else '❌ No'}\n"
490
+
491
+ # File permissions
492
+ result += f"\n🔐 **Permissions**:\n"
493
+ result += f"Readable: {'✅' if os.access(file_path, os.R_OK) else '❌'}\n"
494
+ result += f"Writable: {'✅' if os.access(file_path, os.W_OK) else '❌'}\n"
495
+ result += f"Executable: {'✅' if os.access(file_path, os.X_OK) else '❌'}\n"
496
+
497
+ return result
498
+
499
+ except Exception as e:
500
+ return f"❌ Error getting file info for {filename}: {str(e)}"
501
+
502
+
503
+ def get_file_tools(executor_work_dir: Optional[str] = None) -> List[FunctionTool]:
504
+ """
505
+ Get universal file tools for any agent.
506
+
507
+ Args:
508
+ executor_work_dir: Working directory for file operations
509
+
510
+ Returns:
511
+ List of file management tools
512
+ """
513
+ tools = []
514
+
515
+ # Create partial functions with work_dir bound
516
+ def bound_list_files():
517
+ return asyncio.run(list_files_in_work_dir(executor_work_dir))
518
+
519
+ def bound_read_file(filename: str, max_length: int = 5000):
520
+ return asyncio.run(read_file_from_work_dir(filename, executor_work_dir, max_length))
521
+
522
+ def bound_get_file_info(filename: str):
523
+ return asyncio.run(get_file_info(filename, executor_work_dir))
524
+
525
+ # Add tools
526
+ tools.append(FunctionTool(
527
+ bound_list_files,
528
+ name="list_files_in_work_dir",
529
+ description="Intelligently discover and categorize all files in the working directory with comprehensive metadata"
530
+ ))
531
+
532
+ tools.append(FunctionTool(
533
+ bound_read_file,
534
+ name="read_file_from_work_dir",
535
+ description="Intelligently read and analyze any file type with automatic content detection and preview generation"
536
+ ))
537
+
538
+ tools.append(FunctionTool(
539
+ bound_get_file_info,
540
+ name="get_file_info",
541
+ description="Get comprehensive metadata and analysis for any file type including permissions and recommendations"
542
+ ))
543
+
544
+ logger.info(f"✅ Universal file tools created for work_dir: {executor_work_dir}")
545
+ return tools