atlas-chat 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. atlas/__init__.py +40 -0
  2. atlas/application/__init__.py +7 -0
  3. atlas/application/chat/__init__.py +7 -0
  4. atlas/application/chat/agent/__init__.py +10 -0
  5. atlas/application/chat/agent/act_loop.py +179 -0
  6. atlas/application/chat/agent/factory.py +142 -0
  7. atlas/application/chat/agent/protocols.py +46 -0
  8. atlas/application/chat/agent/react_loop.py +338 -0
  9. atlas/application/chat/agent/think_act_loop.py +171 -0
  10. atlas/application/chat/approval_manager.py +151 -0
  11. atlas/application/chat/elicitation_manager.py +191 -0
  12. atlas/application/chat/events/__init__.py +1 -0
  13. atlas/application/chat/events/agent_event_relay.py +112 -0
  14. atlas/application/chat/modes/__init__.py +1 -0
  15. atlas/application/chat/modes/agent.py +125 -0
  16. atlas/application/chat/modes/plain.py +74 -0
  17. atlas/application/chat/modes/rag.py +81 -0
  18. atlas/application/chat/modes/tools.py +179 -0
  19. atlas/application/chat/orchestrator.py +213 -0
  20. atlas/application/chat/policies/__init__.py +1 -0
  21. atlas/application/chat/policies/tool_authorization.py +99 -0
  22. atlas/application/chat/preprocessors/__init__.py +1 -0
  23. atlas/application/chat/preprocessors/message_builder.py +92 -0
  24. atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
  25. atlas/application/chat/service.py +454 -0
  26. atlas/application/chat/utilities/__init__.py +6 -0
  27. atlas/application/chat/utilities/error_handler.py +367 -0
  28. atlas/application/chat/utilities/event_notifier.py +546 -0
  29. atlas/application/chat/utilities/file_processor.py +613 -0
  30. atlas/application/chat/utilities/tool_executor.py +789 -0
  31. atlas/atlas_chat_cli.py +347 -0
  32. atlas/atlas_client.py +238 -0
  33. atlas/core/__init__.py +0 -0
  34. atlas/core/auth.py +205 -0
  35. atlas/core/authorization_manager.py +27 -0
  36. atlas/core/capabilities.py +123 -0
  37. atlas/core/compliance.py +215 -0
  38. atlas/core/domain_whitelist.py +147 -0
  39. atlas/core/domain_whitelist_middleware.py +82 -0
  40. atlas/core/http_client.py +28 -0
  41. atlas/core/log_sanitizer.py +102 -0
  42. atlas/core/metrics_logger.py +59 -0
  43. atlas/core/middleware.py +131 -0
  44. atlas/core/otel_config.py +242 -0
  45. atlas/core/prompt_risk.py +200 -0
  46. atlas/core/rate_limit.py +0 -0
  47. atlas/core/rate_limit_middleware.py +64 -0
  48. atlas/core/security_headers_middleware.py +51 -0
  49. atlas/domain/__init__.py +37 -0
  50. atlas/domain/chat/__init__.py +1 -0
  51. atlas/domain/chat/dtos.py +85 -0
  52. atlas/domain/errors.py +96 -0
  53. atlas/domain/messages/__init__.py +12 -0
  54. atlas/domain/messages/models.py +160 -0
  55. atlas/domain/rag_mcp_service.py +664 -0
  56. atlas/domain/sessions/__init__.py +7 -0
  57. atlas/domain/sessions/models.py +36 -0
  58. atlas/domain/unified_rag_service.py +371 -0
  59. atlas/infrastructure/__init__.py +10 -0
  60. atlas/infrastructure/app_factory.py +135 -0
  61. atlas/infrastructure/events/__init__.py +1 -0
  62. atlas/infrastructure/events/cli_event_publisher.py +140 -0
  63. atlas/infrastructure/events/websocket_publisher.py +140 -0
  64. atlas/infrastructure/sessions/in_memory_repository.py +56 -0
  65. atlas/infrastructure/transport/__init__.py +7 -0
  66. atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
  67. atlas/init_cli.py +226 -0
  68. atlas/interfaces/__init__.py +15 -0
  69. atlas/interfaces/events.py +134 -0
  70. atlas/interfaces/llm.py +54 -0
  71. atlas/interfaces/rag.py +40 -0
  72. atlas/interfaces/sessions.py +75 -0
  73. atlas/interfaces/tools.py +57 -0
  74. atlas/interfaces/transport.py +24 -0
  75. atlas/main.py +564 -0
  76. atlas/mcp/api_key_demo/README.md +76 -0
  77. atlas/mcp/api_key_demo/main.py +172 -0
  78. atlas/mcp/api_key_demo/run.sh +56 -0
  79. atlas/mcp/basictable/main.py +147 -0
  80. atlas/mcp/calculator/main.py +149 -0
  81. atlas/mcp/code-executor/execution_engine.py +98 -0
  82. atlas/mcp/code-executor/execution_environment.py +95 -0
  83. atlas/mcp/code-executor/main.py +528 -0
  84. atlas/mcp/code-executor/result_processing.py +276 -0
  85. atlas/mcp/code-executor/script_generation.py +195 -0
  86. atlas/mcp/code-executor/security_checker.py +140 -0
  87. atlas/mcp/corporate_cars/main.py +437 -0
  88. atlas/mcp/csv_reporter/main.py +545 -0
  89. atlas/mcp/duckduckgo/main.py +182 -0
  90. atlas/mcp/elicitation_demo/README.md +171 -0
  91. atlas/mcp/elicitation_demo/main.py +262 -0
  92. atlas/mcp/env-demo/README.md +158 -0
  93. atlas/mcp/env-demo/main.py +199 -0
  94. atlas/mcp/file_size_test/main.py +284 -0
  95. atlas/mcp/filesystem/main.py +348 -0
  96. atlas/mcp/image_demo/main.py +113 -0
  97. atlas/mcp/image_demo/requirements.txt +4 -0
  98. atlas/mcp/logging_demo/README.md +72 -0
  99. atlas/mcp/logging_demo/main.py +103 -0
  100. atlas/mcp/many_tools_demo/main.py +50 -0
  101. atlas/mcp/order_database/__init__.py +0 -0
  102. atlas/mcp/order_database/main.py +369 -0
  103. atlas/mcp/order_database/signal_data.csv +1001 -0
  104. atlas/mcp/pdfbasic/main.py +394 -0
  105. atlas/mcp/pptx_generator/main.py +760 -0
  106. atlas/mcp/pptx_generator/requirements.txt +13 -0
  107. atlas/mcp/pptx_generator/run_test.sh +1 -0
  108. atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
  109. atlas/mcp/progress_demo/main.py +167 -0
  110. atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
  111. atlas/mcp/progress_updates_demo/README.md +120 -0
  112. atlas/mcp/progress_updates_demo/main.py +497 -0
  113. atlas/mcp/prompts/main.py +222 -0
  114. atlas/mcp/public_demo/main.py +189 -0
  115. atlas/mcp/sampling_demo/README.md +169 -0
  116. atlas/mcp/sampling_demo/main.py +234 -0
  117. atlas/mcp/thinking/main.py +77 -0
  118. atlas/mcp/tool_planner/main.py +240 -0
  119. atlas/mcp/ui-demo/badmesh.png +0 -0
  120. atlas/mcp/ui-demo/main.py +383 -0
  121. atlas/mcp/ui-demo/templates/button_demo.html +32 -0
  122. atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
  123. atlas/mcp/ui-demo/templates/form_demo.html +28 -0
  124. atlas/mcp/username-override-demo/README.md +320 -0
  125. atlas/mcp/username-override-demo/main.py +308 -0
  126. atlas/modules/__init__.py +0 -0
  127. atlas/modules/config/__init__.py +34 -0
  128. atlas/modules/config/cli.py +231 -0
  129. atlas/modules/config/config_manager.py +1096 -0
  130. atlas/modules/file_storage/__init__.py +22 -0
  131. atlas/modules/file_storage/cli.py +330 -0
  132. atlas/modules/file_storage/content_extractor.py +290 -0
  133. atlas/modules/file_storage/manager.py +295 -0
  134. atlas/modules/file_storage/mock_s3_client.py +402 -0
  135. atlas/modules/file_storage/s3_client.py +417 -0
  136. atlas/modules/llm/__init__.py +19 -0
  137. atlas/modules/llm/caller.py +287 -0
  138. atlas/modules/llm/litellm_caller.py +675 -0
  139. atlas/modules/llm/models.py +19 -0
  140. atlas/modules/mcp_tools/__init__.py +17 -0
  141. atlas/modules/mcp_tools/client.py +2123 -0
  142. atlas/modules/mcp_tools/token_storage.py +556 -0
  143. atlas/modules/prompts/prompt_provider.py +130 -0
  144. atlas/modules/rag/__init__.py +24 -0
  145. atlas/modules/rag/atlas_rag_client.py +336 -0
  146. atlas/modules/rag/client.py +129 -0
  147. atlas/routes/admin_routes.py +865 -0
  148. atlas/routes/config_routes.py +484 -0
  149. atlas/routes/feedback_routes.py +361 -0
  150. atlas/routes/files_routes.py +274 -0
  151. atlas/routes/health_routes.py +40 -0
  152. atlas/routes/mcp_auth_routes.py +223 -0
  153. atlas/server_cli.py +164 -0
  154. atlas/tests/conftest.py +20 -0
  155. atlas/tests/integration/test_mcp_auth_integration.py +152 -0
  156. atlas/tests/manual_test_sampling.py +87 -0
  157. atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
  158. atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
  159. atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
  160. atlas/tests/test_agent_roa.py +135 -0
  161. atlas/tests/test_app_factory_smoke.py +47 -0
  162. atlas/tests/test_approval_manager.py +439 -0
  163. atlas/tests/test_atlas_client.py +188 -0
  164. atlas/tests/test_atlas_rag_client.py +447 -0
  165. atlas/tests/test_atlas_rag_integration.py +224 -0
  166. atlas/tests/test_attach_file_flow.py +287 -0
  167. atlas/tests/test_auth_utils.py +165 -0
  168. atlas/tests/test_backend_public_url.py +185 -0
  169. atlas/tests/test_banner_logging.py +287 -0
  170. atlas/tests/test_capability_tokens_and_injection.py +203 -0
  171. atlas/tests/test_compliance_level.py +54 -0
  172. atlas/tests/test_compliance_manager.py +253 -0
  173. atlas/tests/test_config_manager.py +617 -0
  174. atlas/tests/test_config_manager_paths.py +12 -0
  175. atlas/tests/test_core_auth.py +18 -0
  176. atlas/tests/test_core_utils.py +190 -0
  177. atlas/tests/test_docker_env_sync.py +202 -0
  178. atlas/tests/test_domain_errors.py +329 -0
  179. atlas/tests/test_domain_whitelist.py +359 -0
  180. atlas/tests/test_elicitation_manager.py +408 -0
  181. atlas/tests/test_elicitation_routing.py +296 -0
  182. atlas/tests/test_env_demo_server.py +88 -0
  183. atlas/tests/test_error_classification.py +113 -0
  184. atlas/tests/test_error_flow_integration.py +116 -0
  185. atlas/tests/test_feedback_routes.py +333 -0
  186. atlas/tests/test_file_content_extraction.py +1134 -0
  187. atlas/tests/test_file_extraction_routes.py +158 -0
  188. atlas/tests/test_file_library.py +107 -0
  189. atlas/tests/test_file_manager_unit.py +18 -0
  190. atlas/tests/test_health_route.py +49 -0
  191. atlas/tests/test_http_client_stub.py +8 -0
  192. atlas/tests/test_imports_smoke.py +30 -0
  193. atlas/tests/test_interfaces_llm_response.py +9 -0
  194. atlas/tests/test_issue_access_denied_fix.py +136 -0
  195. atlas/tests/test_llm_env_expansion.py +836 -0
  196. atlas/tests/test_log_level_sensitive_data.py +285 -0
  197. atlas/tests/test_mcp_auth_routes.py +341 -0
  198. atlas/tests/test_mcp_client_auth.py +331 -0
  199. atlas/tests/test_mcp_data_injection.py +270 -0
  200. atlas/tests/test_mcp_get_authorized_servers.py +95 -0
  201. atlas/tests/test_mcp_hot_reload.py +512 -0
  202. atlas/tests/test_mcp_image_content.py +424 -0
  203. atlas/tests/test_mcp_logging.py +172 -0
  204. atlas/tests/test_mcp_progress_updates.py +313 -0
  205. atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
  206. atlas/tests/test_mcp_prompts_server.py +39 -0
  207. atlas/tests/test_mcp_tool_result_parsing.py +296 -0
  208. atlas/tests/test_metrics_logger.py +56 -0
  209. atlas/tests/test_middleware_auth.py +379 -0
  210. atlas/tests/test_prompt_risk_and_acl.py +141 -0
  211. atlas/tests/test_rag_mcp_aggregator.py +204 -0
  212. atlas/tests/test_rag_mcp_service.py +224 -0
  213. atlas/tests/test_rate_limit_middleware.py +45 -0
  214. atlas/tests/test_routes_config_smoke.py +60 -0
  215. atlas/tests/test_routes_files_download_token.py +41 -0
  216. atlas/tests/test_routes_files_health.py +18 -0
  217. atlas/tests/test_runtime_imports.py +53 -0
  218. atlas/tests/test_sampling_integration.py +482 -0
  219. atlas/tests/test_security_admin_routes.py +61 -0
  220. atlas/tests/test_security_capability_tokens.py +65 -0
  221. atlas/tests/test_security_file_stats_scope.py +21 -0
  222. atlas/tests/test_security_header_injection.py +191 -0
  223. atlas/tests/test_security_headers_and_filename.py +63 -0
  224. atlas/tests/test_shared_session_repository.py +101 -0
  225. atlas/tests/test_system_prompt_loading.py +181 -0
  226. atlas/tests/test_token_storage.py +505 -0
  227. atlas/tests/test_tool_approval_config.py +93 -0
  228. atlas/tests/test_tool_approval_utils.py +356 -0
  229. atlas/tests/test_tool_authorization_group_filtering.py +223 -0
  230. atlas/tests/test_tool_details_in_config.py +108 -0
  231. atlas/tests/test_tool_planner.py +300 -0
  232. atlas/tests/test_unified_rag_service.py +398 -0
  233. atlas/tests/test_username_override_in_approval.py +258 -0
  234. atlas/tests/test_websocket_auth_header.py +168 -0
  235. atlas/version.py +6 -0
  236. atlas_chat-0.1.0.data/data/.env.example +253 -0
  237. atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
  238. atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
  239. atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
  240. atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
  241. atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
  242. atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
  243. atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
  244. atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
  245. atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
  246. atlas_chat-0.1.0.dist-info/METADATA +236 -0
  247. atlas_chat-0.1.0.dist-info/RECORD +250 -0
  248. atlas_chat-0.1.0.dist-info/WHEEL +5 -0
  249. atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
  250. atlas_chat-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,22 @@
1
+ """File storage module for the chat backend.
2
+
3
+ This module provides:
4
+ - S3 storage client for file operations
5
+ - File management utilities
6
+ - Content type detection and categorization
7
+ - CLI tools for file operations
8
+ """
9
+
10
+ from .manager import FileManager
11
+ from .s3_client import S3StorageClient
12
+
13
+ # Create default instances
14
+ s3_client = S3StorageClient()
15
+ file_manager = FileManager(s3_client)
16
+
17
+ __all__ = [
18
+ "S3StorageClient",
19
+ "FileManager",
20
+ "s3_client",
21
+ "file_manager",
22
+ ]
@@ -0,0 +1,330 @@
1
+ """CLI interface for file storage operations.
2
+
3
+ This CLI allows you to:
4
+ - Upload files to S3
5
+ - List files for users
6
+ - Download files from S3
7
+ - Get file statistics
8
+ - Test file storage operations
9
+ """
10
+
11
+ import argparse
12
+ import base64
13
+ import logging
14
+ import sys
15
+ from pathlib import Path
16
+
17
+ from .manager import FileManager
18
+ from .s3_client import S3StorageClient
19
+
20
+ # Set up logging for CLI
21
+ logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ async def upload_file(args) -> None:
26
+ """Upload a file to S3 storage."""
27
+ file_path = Path(args.file_path)
28
+
29
+ if not file_path.exists():
30
+ print(f"❌ File not found: {file_path}")
31
+ return
32
+
33
+ if not args.user_email:
34
+ print("❌ User email is required")
35
+ return
36
+
37
+ print(f"📤 Uploading {file_path.name} for user {args.user_email}...")
38
+
39
+ try:
40
+ # Read and encode file content
41
+ with open(file_path, 'rb') as f:
42
+ content = f.read()
43
+ content_base64 = base64.b64encode(content).decode('utf-8')
44
+
45
+ # Use specified filename or original filename
46
+ filename = args.filename or file_path.name
47
+
48
+ # Initialize file manager and upload
49
+ file_manager = FileManager()
50
+ result = await file_manager.upload_file(
51
+ user_email=args.user_email,
52
+ filename=filename,
53
+ content_base64=content_base64,
54
+ source_type=args.source_type
55
+ )
56
+
57
+ print("✅ File uploaded successfully!")
58
+ print(f" S3 Key: {result['key']}")
59
+ print(f" Size: {result.get('size', 'unknown')} bytes")
60
+ print(f" Content Type: {result.get('content_type', 'unknown')}")
61
+
62
+ except Exception as e:
63
+ print(f"❌ Upload failed: {e}")
64
+ logger.error(f"Upload error: {e}")
65
+
66
+
67
+ async def list_files(args) -> None:
68
+ """List files for a user."""
69
+ if not args.user_email:
70
+ print("❌ User email is required")
71
+ return
72
+
73
+ print(f"📂 Listing files for user {args.user_email}...")
74
+
75
+ try:
76
+ s3_client = S3StorageClient()
77
+ files = await s3_client.list_files(
78
+ user_email=args.user_email,
79
+ file_type=args.file_type,
80
+ limit=args.limit
81
+ )
82
+
83
+ if not files:
84
+ print("📭 No files found")
85
+ return
86
+
87
+ print(f"\n📋 Found {len(files)} file(s):\n")
88
+
89
+ # Group files by type if no specific filter
90
+ if not args.file_type:
91
+ user_files = [f for f in files if f.get('tags', {}).get('source') == 'user']
92
+ tool_files = [f for f in files if f.get('tags', {}).get('source') == 'tool']
93
+
94
+ if user_files:
95
+ print("👤 User Files:")
96
+ for file_info in user_files:
97
+ print(f" 📄 {file_info['filename']}")
98
+ print(f" Key: {file_info['key']}")
99
+ print(f" Size: {file_info.get('size', 0)} bytes")
100
+ print(f" Type: {file_info.get('content_type', 'unknown')}")
101
+ print(f" Modified: {file_info.get('last_modified', 'unknown')}")
102
+ print()
103
+
104
+ if tool_files:
105
+ print("🔧 Tool-Generated Files:")
106
+ for file_info in tool_files:
107
+ tags = file_info.get('tags', {})
108
+ print(f" 📄 {file_info['filename']}")
109
+ print(f" Key: {file_info['key']}")
110
+ print(f" Size: {file_info.get('size', 0)} bytes")
111
+ print(f" Source Tool: {tags.get('source_tool', 'unknown')}")
112
+ print(f" Modified: {file_info.get('last_modified', 'unknown')}")
113
+ print()
114
+ else:
115
+ for file_info in files:
116
+ print(f"📄 {file_info['filename']}")
117
+ print(f" Key: {file_info['key']}")
118
+ print(f" Size: {file_info.get('size', 0)} bytes")
119
+ print(f" Type: {file_info.get('content_type', 'unknown')}")
120
+ print(f" Modified: {file_info.get('last_modified', 'unknown')}")
121
+ print()
122
+
123
+ except Exception as e:
124
+ print(f"❌ List failed: {e}")
125
+ logger.error(f"List error: {e}")
126
+
127
+
128
+ async def download_file(args) -> None:
129
+ """Download a file from S3 storage."""
130
+ if not args.user_email:
131
+ print("❌ User email is required")
132
+ return
133
+
134
+ if not args.s3_key:
135
+ print("❌ S3 key is required")
136
+ return
137
+
138
+ print(f"📥 Downloading file {args.s3_key} for user {args.user_email}...")
139
+
140
+ try:
141
+ s3_client = S3StorageClient()
142
+ file_data = await s3_client.get_file(args.user_email, args.s3_key)
143
+
144
+ if not file_data:
145
+ print("❌ File not found")
146
+ return
147
+
148
+ # Decode base64 content
149
+ content = base64.b64decode(file_data['content_base64'])
150
+
151
+ # Determine output filename
152
+ output_path = Path(args.output) if args.output else Path(file_data['filename'])
153
+
154
+ # Write to file
155
+ with open(output_path, 'wb') as f:
156
+ f.write(content)
157
+
158
+ print("✅ File downloaded successfully!")
159
+ print(f" Saved to: {output_path}")
160
+ print(f" Size: {len(content)} bytes")
161
+
162
+ except Exception as e:
163
+ print(f"❌ Download failed: {e}")
164
+ logger.error(f"Download error: {e}")
165
+
166
+
167
+ async def delete_file(args) -> None:
168
+ """Delete a file from S3 storage."""
169
+ if not args.user_email:
170
+ print("❌ User email is required")
171
+ return
172
+
173
+ if not args.s3_key:
174
+ print("❌ S3 key is required")
175
+ return
176
+
177
+ print(f"🗑️ Deleting file {args.s3_key} for user {args.user_email}...")
178
+
179
+ if not args.force:
180
+ confirm = input("⚠️ Are you sure? This action cannot be undone. (y/N): ")
181
+ if confirm.lower() != 'y':
182
+ print("❌ Deletion cancelled")
183
+ return
184
+
185
+ try:
186
+ s3_client = S3StorageClient()
187
+ success = await s3_client.delete_file(args.user_email, args.s3_key)
188
+
189
+ if success:
190
+ print("✅ File deleted successfully!")
191
+ else:
192
+ print("❌ File not found or already deleted")
193
+
194
+ except Exception as e:
195
+ print(f"❌ Deletion failed: {e}")
196
+ logger.error(f"Deletion error: {e}")
197
+
198
+
199
+ async def get_stats(args) -> None:
200
+ """Get file statistics for a user."""
201
+ if not args.user_email:
202
+ print("❌ User email is required")
203
+ return
204
+
205
+ print(f"📊 Getting file statistics for user {args.user_email}...")
206
+
207
+ try:
208
+ s3_client = S3StorageClient()
209
+ stats = await s3_client.get_user_stats(args.user_email)
210
+
211
+ print("\n📈 File Statistics:\n")
212
+ print(f" 📁 Total Files: {stats.get('total_files', 0)}")
213
+ print(f" 💾 Total Size: {stats.get('total_size_bytes', 0)} bytes")
214
+ print(f" 📤 User Files: {stats.get('user_files', 0)}")
215
+ print(f" 🔧 Tool Files: {stats.get('tool_files', 0)}")
216
+
217
+ if 'file_types' in stats:
218
+ print("\n📊 By File Type:")
219
+ for file_type, count in stats['file_types'].items():
220
+ print(f" {file_type}: {count}")
221
+
222
+ except Exception as e:
223
+ print(f"❌ Stats failed: {e}")
224
+ logger.error(f"Stats error: {e}")
225
+
226
+
227
+ def test_categorization(args) -> None:
228
+ """Test file categorization and content type detection."""
229
+ if not args.filename:
230
+ print("❌ Filename is required")
231
+ return
232
+
233
+ file_manager = FileManager()
234
+
235
+ print(f"🧪 Testing file categorization for: {args.filename}\n")
236
+
237
+ content_type = file_manager.get_content_type(args.filename)
238
+ category = file_manager.categorize_file_type(args.filename)
239
+ extension = file_manager.get_file_extension(args.filename)
240
+ canvas_type = file_manager.get_canvas_file_type(extension.lower())
241
+ should_display = file_manager.should_display_in_canvas(args.filename)
242
+
243
+ print(f"📄 Content Type: {content_type}")
244
+ print(f"🏷️ Category: {category}")
245
+ print(f"📎 Extension: {extension}")
246
+ print(f"🎨 Canvas Type: {canvas_type}")
247
+ print(f"👁️ Display in Canvas: {'✅ Yes' if should_display else '❌ No'}")
248
+
249
+
250
+ def main():
251
+ """Main CLI entry point."""
252
+ parser = argparse.ArgumentParser(
253
+ description="File storage management CLI",
254
+ formatter_class=argparse.RawDescriptionHelpFormatter,
255
+ epilog="""
256
+ Examples:
257
+ python -m backend.modules.file_storage.cli upload test.txt user@example.com
258
+ python -m backend.modules.file_storage.cli list user@example.com
259
+ python -m backend.modules.file_storage.cli download user@example.com file_key_123 --output downloaded.txt
260
+ python -m backend.modules.file_storage.cli stats user@example.com
261
+ python -m backend.modules.file_storage.cli test-categorization example.py
262
+ """
263
+ )
264
+
265
+ subparsers = parser.add_subparsers(dest='command', help='Available commands')
266
+
267
+ # Upload command
268
+ upload_parser = subparsers.add_parser('upload', help='Upload a file to S3')
269
+ upload_parser.add_argument('file_path', help='Path to file to upload')
270
+ upload_parser.add_argument('user_email', help='User email')
271
+ upload_parser.add_argument('--filename', help='Custom filename (default: use original)')
272
+ upload_parser.add_argument('--source-type', default='user', choices=['user', 'tool'], help='Source type')
273
+ upload_parser.set_defaults(func=upload_file)
274
+
275
+ # List command
276
+ list_parser = subparsers.add_parser('list', help='List files for a user')
277
+ list_parser.add_argument('user_email', help='User email')
278
+ list_parser.add_argument('--file-type', choices=['user', 'tool'], help='Filter by file type')
279
+ list_parser.add_argument('--limit', type=int, default=100, help='Maximum files to return')
280
+ list_parser.set_defaults(func=list_files)
281
+
282
+ # Download command
283
+ download_parser = subparsers.add_parser('download', help='Download a file from S3')
284
+ download_parser.add_argument('user_email', help='User email')
285
+ download_parser.add_argument('s3_key', help='S3 key of file to download')
286
+ download_parser.add_argument('--output', '-o', help='Output filename (default: original filename)')
287
+ download_parser.set_defaults(func=download_file)
288
+
289
+ # Delete command
290
+ delete_parser = subparsers.add_parser('delete', help='Delete a file from S3')
291
+ delete_parser.add_argument('user_email', help='User email')
292
+ delete_parser.add_argument('s3_key', help='S3 key of file to delete')
293
+ delete_parser.add_argument('--force', '-f', action='store_true', help='Skip confirmation')
294
+ delete_parser.set_defaults(func=delete_file)
295
+
296
+ # Stats command
297
+ stats_parser = subparsers.add_parser('stats', help='Get file statistics for a user')
298
+ stats_parser.add_argument('user_email', help='User email')
299
+ stats_parser.set_defaults(func=get_stats)
300
+
301
+ # Test categorization command
302
+ test_parser = subparsers.add_parser('test-categorization', help='Test file categorization')
303
+ test_parser.add_argument('filename', help='Filename to test')
304
+ test_parser.set_defaults(func=test_categorization)
305
+
306
+ args = parser.parse_args()
307
+
308
+ if not args.command:
309
+ parser.print_help()
310
+ return
311
+
312
+ try:
313
+ if hasattr(args, 'func'):
314
+ if args.command in ['upload', 'list', 'download', 'delete', 'stats']:
315
+ # Async commands
316
+ import asyncio
317
+ asyncio.run(args.func(args))
318
+ else:
319
+ # Sync commands
320
+ args.func(args)
321
+ except KeyboardInterrupt:
322
+ print("\n⚠️ Operation cancelled by user")
323
+ sys.exit(1)
324
+ except Exception as e:
325
+ logger.error(f"Error: {e}")
326
+ sys.exit(1)
327
+
328
+
329
+ if __name__ == '__main__':
330
+ main()
@@ -0,0 +1,290 @@
1
+ """
2
+ File content extraction client for calling HTTP-based extraction services.
3
+
4
+ This module provides a generic interface for extracting content from files
5
+ (PDFs, images, etc.) via configurable HTTP endpoints.
6
+ """
7
+
8
+ import base64
9
+ import logging
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ import httpx
15
+
16
+ from atlas.modules.config.config_manager import (
17
+ FileExtractorConfig,
18
+ FileExtractorsConfig,
19
+ get_app_settings,
20
+ get_file_extractors_config,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class ExtractionResult:
28
+ """Result of a content extraction attempt."""
29
+ success: bool
30
+ content: Optional[str] = None
31
+ preview: Optional[str] = None
32
+ error: Optional[str] = None
33
+ metadata: Optional[dict] = None
34
+
35
+
36
+ class FileContentExtractor:
37
+ """
38
+ Client for extracting content from files using configured HTTP services.
39
+
40
+ Supports extension-based and MIME-type-based extractor lookup with
41
+ configurable preview length truncation.
42
+ """
43
+
44
+ def __init__(self, config: Optional[FileExtractorsConfig] = None):
45
+ """
46
+ Initialize the extractor with optional config override.
47
+
48
+ Args:
49
+ config: Optional config override. If None, loads from config manager.
50
+ """
51
+ self._config = config
52
+
53
+ @property
54
+ def config(self) -> FileExtractorsConfig:
55
+ """Get the extractors configuration (lazy loaded)."""
56
+ if self._config is None:
57
+ self._config = get_file_extractors_config()
58
+ return self._config
59
+
60
+ def is_enabled(self) -> bool:
61
+ """Check if file content extraction is enabled globally."""
62
+ app_settings = get_app_settings()
63
+ return (
64
+ app_settings.feature_file_content_extraction_enabled
65
+ and self.config.enabled
66
+ )
67
+
68
+ def get_default_behavior(self) -> str:
69
+ """Get the default extraction behavior ('extract' or 'attach_only')."""
70
+ return self.config.default_behavior
71
+
72
+ def get_extractor_for_file(
73
+ self, filename: str, mime_type: Optional[str] = None
74
+ ) -> Optional[FileExtractorConfig]:
75
+ """
76
+ Find the appropriate extractor for a file based on extension or MIME type.
77
+
78
+ Args:
79
+ filename: The filename to look up
80
+ mime_type: Optional MIME type for fallback lookup
81
+
82
+ Returns:
83
+ FileExtractorConfig if found and enabled, None otherwise
84
+ """
85
+ if not self.is_enabled():
86
+ return None
87
+
88
+ # Try extension-based lookup first
89
+ ext = Path(filename).suffix.lower()
90
+ extractor_name = self.config.extension_mapping.get(ext)
91
+
92
+ # Fall back to MIME type lookup
93
+ if not extractor_name and mime_type:
94
+ extractor_name = self.config.mime_mapping.get(mime_type)
95
+
96
+ if not extractor_name:
97
+ logger.debug(f"No extractor mapping for file: {filename} (mime: {mime_type})")
98
+ return None
99
+
100
+ extractor = self.config.extractors.get(extractor_name)
101
+ if not extractor:
102
+ logger.warning(f"Extractor '{extractor_name}' not found in config")
103
+ return None
104
+
105
+ if not extractor.enabled:
106
+ logger.debug(f"Extractor '{extractor_name}' is disabled")
107
+ return None
108
+
109
+ return extractor
110
+
111
+ def can_extract(self, filename: str, mime_type: Optional[str] = None) -> bool:
112
+ """
113
+ Check if content extraction is possible for a given file.
114
+
115
+ Args:
116
+ filename: The filename to check
117
+ mime_type: Optional MIME type
118
+
119
+ Returns:
120
+ True if an enabled extractor is available for this file type
121
+ """
122
+ return self.get_extractor_for_file(filename, mime_type) is not None
123
+
124
+ def get_supported_extensions(self) -> list[str]:
125
+ """Get list of file extensions that have extraction support."""
126
+ if not self.is_enabled():
127
+ return []
128
+
129
+ supported = []
130
+ for ext, extractor_name in self.config.extension_mapping.items():
131
+ extractor = self.config.extractors.get(extractor_name)
132
+ if extractor and extractor.enabled:
133
+ supported.append(ext)
134
+ return supported
135
+
136
+ async def extract_content(
137
+ self,
138
+ filename: str,
139
+ content_base64: str,
140
+ mime_type: Optional[str] = None,
141
+ ) -> ExtractionResult:
142
+ """
143
+ Extract content from a file using the appropriate HTTP extractor service.
144
+
145
+ Args:
146
+ filename: The name of the file
147
+ content_base64: Base64-encoded file content
148
+ mime_type: Optional MIME type of the file
149
+
150
+ Returns:
151
+ ExtractionResult with extracted content or error information
152
+ """
153
+ extractor = self.get_extractor_for_file(filename, mime_type)
154
+ if not extractor:
155
+ return ExtractionResult(
156
+ success=False,
157
+ error=f"No extractor available for file: {filename}"
158
+ )
159
+
160
+ # Check file size limit
161
+ content_size_mb = len(content_base64) * 3 / 4 / (1024 * 1024) # Approximate decoded size
162
+ if content_size_mb > extractor.max_file_size_mb:
163
+ return ExtractionResult(
164
+ success=False,
165
+ error=f"File too large: {content_size_mb:.1f}MB exceeds limit of {extractor.max_file_size_mb}MB"
166
+ )
167
+
168
+ try:
169
+ # Build request headers
170
+ request_headers = {}
171
+
172
+ # Add API key as Authorization header if configured
173
+ if extractor.api_key:
174
+ request_headers["Authorization"] = f"Bearer {extractor.api_key}"
175
+
176
+ # Add any custom headers from config
177
+ if extractor.headers:
178
+ request_headers.update(extractor.headers)
179
+
180
+ async with httpx.AsyncClient(timeout=extractor.timeout_seconds) as client:
181
+ if extractor.request_format == "multipart":
182
+ # Multipart form-data upload
183
+ try:
184
+ file_bytes = base64.b64decode(content_base64)
185
+ except Exception as e:
186
+ return ExtractionResult(
187
+ success=False,
188
+ error=f"Failed to decode base64 content: {str(e)}"
189
+ )
190
+
191
+ content_type = mime_type or "application/octet-stream"
192
+ files = {
193
+ extractor.form_field_name: (filename, file_bytes, content_type)
194
+ }
195
+
196
+ # Request JSON response from the extractor service
197
+ request_headers.setdefault("Accept", "application/json")
198
+
199
+ response = await client.post(
200
+ url=extractor.url,
201
+ files=files,
202
+ headers=request_headers if request_headers else None,
203
+ )
204
+ else:
205
+ # Base64 JSON payload (default)
206
+ payload = {
207
+ "content": content_base64,
208
+ "filename": filename,
209
+ "options": {
210
+ "preview_chars": extractor.preview_chars,
211
+ }
212
+ }
213
+
214
+ response = await client.request(
215
+ method=extractor.method,
216
+ url=extractor.url,
217
+ json=payload,
218
+ headers=request_headers if request_headers else None,
219
+ )
220
+
221
+ if response.status_code != 200:
222
+ logger.warning(
223
+ f"Extractor returned status {response.status_code} for {filename}"
224
+ )
225
+ return ExtractionResult(
226
+ success=False,
227
+ error=f"Extractor service returned status {response.status_code}"
228
+ )
229
+
230
+ result_data = response.json()
231
+
232
+ # Check for success flag if present
233
+ if "success" in result_data and not result_data["success"]:
234
+ return ExtractionResult(
235
+ success=False,
236
+ error=result_data.get("error", "Extraction failed")
237
+ )
238
+
239
+ # Extract the content field
240
+ extracted_text = result_data.get(extractor.response_field)
241
+ if extracted_text is None:
242
+ return ExtractionResult(
243
+ success=False,
244
+ error=f"Response missing expected field: {extractor.response_field}"
245
+ )
246
+
247
+ # Generate preview if content is longer than preview_chars
248
+ preview = None
249
+ if extractor.preview_chars and len(extracted_text) > extractor.preview_chars:
250
+ preview = extracted_text[:extractor.preview_chars] + "..."
251
+ else:
252
+ preview = extracted_text
253
+
254
+ return ExtractionResult(
255
+ success=True,
256
+ content=extracted_text,
257
+ preview=preview,
258
+ metadata=result_data.get("metadata")
259
+ )
260
+
261
+ except httpx.TimeoutException:
262
+ logger.warning(f"Extraction timeout for {filename} after {extractor.timeout_seconds}s")
263
+ return ExtractionResult(
264
+ success=False,
265
+ error=f"Extraction timed out after {extractor.timeout_seconds} seconds"
266
+ )
267
+ except httpx.RequestError as e:
268
+ logger.warning(f"Extraction request failed for {filename}: {e}")
269
+ return ExtractionResult(
270
+ success=False,
271
+ error=f"Failed to connect to extractor service: {str(e)}"
272
+ )
273
+ except Exception as e:
274
+ logger.error(f"Unexpected error extracting content from {filename}: {e}", exc_info=True)
275
+ return ExtractionResult(
276
+ success=False,
277
+ error=f"Unexpected extraction error: {str(e)}"
278
+ )
279
+
280
+
281
+ # Module-level instance for convenience
282
+ _extractor_instance: Optional[FileContentExtractor] = None
283
+
284
+
285
+ def get_content_extractor() -> FileContentExtractor:
286
+ """Get the shared file content extractor instance."""
287
+ global _extractor_instance
288
+ if _extractor_instance is None:
289
+ _extractor_instance = FileContentExtractor()
290
+ return _extractor_instance