PyPI - atlas-chat - Versions diffs - 0.1.0__py3-none-any.whl - Mend

atlas-chat 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (250) hide show

atlas/__init__.py +40 -0
atlas/application/__init__.py +7 -0
atlas/application/chat/__init__.py +7 -0
atlas/application/chat/agent/__init__.py +10 -0
atlas/application/chat/agent/act_loop.py +179 -0
atlas/application/chat/agent/factory.py +142 -0
atlas/application/chat/agent/protocols.py +46 -0
atlas/application/chat/agent/react_loop.py +338 -0
atlas/application/chat/agent/think_act_loop.py +171 -0
atlas/application/chat/approval_manager.py +151 -0
atlas/application/chat/elicitation_manager.py +191 -0
atlas/application/chat/events/__init__.py +1 -0
atlas/application/chat/events/agent_event_relay.py +112 -0
atlas/application/chat/modes/__init__.py +1 -0
atlas/application/chat/modes/agent.py +125 -0
atlas/application/chat/modes/plain.py +74 -0
atlas/application/chat/modes/rag.py +81 -0
atlas/application/chat/modes/tools.py +179 -0
atlas/application/chat/orchestrator.py +213 -0
atlas/application/chat/policies/__init__.py +1 -0
atlas/application/chat/policies/tool_authorization.py +99 -0
atlas/application/chat/preprocessors/__init__.py +1 -0
atlas/application/chat/preprocessors/message_builder.py +92 -0
atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
atlas/application/chat/service.py +454 -0
atlas/application/chat/utilities/__init__.py +6 -0
atlas/application/chat/utilities/error_handler.py +367 -0
atlas/application/chat/utilities/event_notifier.py +546 -0
atlas/application/chat/utilities/file_processor.py +613 -0
atlas/application/chat/utilities/tool_executor.py +789 -0
atlas/atlas_chat_cli.py +347 -0
atlas/atlas_client.py +238 -0
atlas/core/__init__.py +0 -0
atlas/core/auth.py +205 -0
atlas/core/authorization_manager.py +27 -0
atlas/core/capabilities.py +123 -0
atlas/core/compliance.py +215 -0
atlas/core/domain_whitelist.py +147 -0
atlas/core/domain_whitelist_middleware.py +82 -0
atlas/core/http_client.py +28 -0
atlas/core/log_sanitizer.py +102 -0
atlas/core/metrics_logger.py +59 -0
atlas/core/middleware.py +131 -0
atlas/core/otel_config.py +242 -0
atlas/core/prompt_risk.py +200 -0
atlas/core/rate_limit.py +0 -0
atlas/core/rate_limit_middleware.py +64 -0
atlas/core/security_headers_middleware.py +51 -0
atlas/domain/__init__.py +37 -0
atlas/domain/chat/__init__.py +1 -0
atlas/domain/chat/dtos.py +85 -0
atlas/domain/errors.py +96 -0
atlas/domain/messages/__init__.py +12 -0
atlas/domain/messages/models.py +160 -0
atlas/domain/rag_mcp_service.py +664 -0
atlas/domain/sessions/__init__.py +7 -0
atlas/domain/sessions/models.py +36 -0
atlas/domain/unified_rag_service.py +371 -0
atlas/infrastructure/__init__.py +10 -0
atlas/infrastructure/app_factory.py +135 -0
atlas/infrastructure/events/__init__.py +1 -0
atlas/infrastructure/events/cli_event_publisher.py +140 -0
atlas/infrastructure/events/websocket_publisher.py +140 -0
atlas/infrastructure/sessions/in_memory_repository.py +56 -0
atlas/infrastructure/transport/__init__.py +7 -0
atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
atlas/init_cli.py +226 -0
atlas/interfaces/__init__.py +15 -0
atlas/interfaces/events.py +134 -0
atlas/interfaces/llm.py +54 -0
atlas/interfaces/rag.py +40 -0
atlas/interfaces/sessions.py +75 -0
atlas/interfaces/tools.py +57 -0
atlas/interfaces/transport.py +24 -0
atlas/main.py +564 -0
atlas/mcp/api_key_demo/README.md +76 -0
atlas/mcp/api_key_demo/main.py +172 -0
atlas/mcp/api_key_demo/run.sh +56 -0
atlas/mcp/basictable/main.py +147 -0
atlas/mcp/calculator/main.py +149 -0
atlas/mcp/code-executor/execution_engine.py +98 -0
atlas/mcp/code-executor/execution_environment.py +95 -0
atlas/mcp/code-executor/main.py +528 -0
atlas/mcp/code-executor/result_processing.py +276 -0
atlas/mcp/code-executor/script_generation.py +195 -0
atlas/mcp/code-executor/security_checker.py +140 -0
atlas/mcp/corporate_cars/main.py +437 -0
atlas/mcp/csv_reporter/main.py +545 -0
atlas/mcp/duckduckgo/main.py +182 -0
atlas/mcp/elicitation_demo/README.md +171 -0
atlas/mcp/elicitation_demo/main.py +262 -0
atlas/mcp/env-demo/README.md +158 -0
atlas/mcp/env-demo/main.py +199 -0
atlas/mcp/file_size_test/main.py +284 -0
atlas/mcp/filesystem/main.py +348 -0
atlas/mcp/image_demo/main.py +113 -0
atlas/mcp/image_demo/requirements.txt +4 -0
atlas/mcp/logging_demo/README.md +72 -0
atlas/mcp/logging_demo/main.py +103 -0
atlas/mcp/many_tools_demo/main.py +50 -0
atlas/mcp/order_database/__init__.py +0 -0
atlas/mcp/order_database/main.py +369 -0
atlas/mcp/order_database/signal_data.csv +1001 -0
atlas/mcp/pdfbasic/main.py +394 -0
atlas/mcp/pptx_generator/main.py +760 -0
atlas/mcp/pptx_generator/requirements.txt +13 -0
atlas/mcp/pptx_generator/run_test.sh +1 -0
atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
atlas/mcp/progress_demo/main.py +167 -0
atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
atlas/mcp/progress_updates_demo/README.md +120 -0
atlas/mcp/progress_updates_demo/main.py +497 -0
atlas/mcp/prompts/main.py +222 -0
atlas/mcp/public_demo/main.py +189 -0
atlas/mcp/sampling_demo/README.md +169 -0
atlas/mcp/sampling_demo/main.py +234 -0
atlas/mcp/thinking/main.py +77 -0
atlas/mcp/tool_planner/main.py +240 -0
atlas/mcp/ui-demo/badmesh.png +0 -0
atlas/mcp/ui-demo/main.py +383 -0
atlas/mcp/ui-demo/templates/button_demo.html +32 -0
atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
atlas/mcp/ui-demo/templates/form_demo.html +28 -0
atlas/mcp/username-override-demo/README.md +320 -0
atlas/mcp/username-override-demo/main.py +308 -0
atlas/modules/__init__.py +0 -0
atlas/modules/config/__init__.py +34 -0
atlas/modules/config/cli.py +231 -0
atlas/modules/config/config_manager.py +1096 -0
atlas/modules/file_storage/__init__.py +22 -0
atlas/modules/file_storage/cli.py +330 -0
atlas/modules/file_storage/content_extractor.py +290 -0
atlas/modules/file_storage/manager.py +295 -0
atlas/modules/file_storage/mock_s3_client.py +402 -0
atlas/modules/file_storage/s3_client.py +417 -0
atlas/modules/llm/__init__.py +19 -0
atlas/modules/llm/caller.py +287 -0
atlas/modules/llm/litellm_caller.py +675 -0
atlas/modules/llm/models.py +19 -0
atlas/modules/mcp_tools/__init__.py +17 -0
atlas/modules/mcp_tools/client.py +2123 -0
atlas/modules/mcp_tools/token_storage.py +556 -0
atlas/modules/prompts/prompt_provider.py +130 -0
atlas/modules/rag/__init__.py +24 -0
atlas/modules/rag/atlas_rag_client.py +336 -0
atlas/modules/rag/client.py +129 -0
atlas/routes/admin_routes.py +865 -0
atlas/routes/config_routes.py +484 -0
atlas/routes/feedback_routes.py +361 -0
atlas/routes/files_routes.py +274 -0
atlas/routes/health_routes.py +40 -0
atlas/routes/mcp_auth_routes.py +223 -0
atlas/server_cli.py +164 -0
atlas/tests/conftest.py +20 -0
atlas/tests/integration/test_mcp_auth_integration.py +152 -0
atlas/tests/manual_test_sampling.py +87 -0
atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
atlas/tests/test_agent_roa.py +135 -0
atlas/tests/test_app_factory_smoke.py +47 -0
atlas/tests/test_approval_manager.py +439 -0
atlas/tests/test_atlas_client.py +188 -0
atlas/tests/test_atlas_rag_client.py +447 -0
atlas/tests/test_atlas_rag_integration.py +224 -0
atlas/tests/test_attach_file_flow.py +287 -0
atlas/tests/test_auth_utils.py +165 -0
atlas/tests/test_backend_public_url.py +185 -0
atlas/tests/test_banner_logging.py +287 -0
atlas/tests/test_capability_tokens_and_injection.py +203 -0
atlas/tests/test_compliance_level.py +54 -0
atlas/tests/test_compliance_manager.py +253 -0
atlas/tests/test_config_manager.py +617 -0
atlas/tests/test_config_manager_paths.py +12 -0
atlas/tests/test_core_auth.py +18 -0
atlas/tests/test_core_utils.py +190 -0
atlas/tests/test_docker_env_sync.py +202 -0
atlas/tests/test_domain_errors.py +329 -0
atlas/tests/test_domain_whitelist.py +359 -0
atlas/tests/test_elicitation_manager.py +408 -0
atlas/tests/test_elicitation_routing.py +296 -0
atlas/tests/test_env_demo_server.py +88 -0
atlas/tests/test_error_classification.py +113 -0
atlas/tests/test_error_flow_integration.py +116 -0
atlas/tests/test_feedback_routes.py +333 -0
atlas/tests/test_file_content_extraction.py +1134 -0
atlas/tests/test_file_extraction_routes.py +158 -0
atlas/tests/test_file_library.py +107 -0
atlas/tests/test_file_manager_unit.py +18 -0
atlas/tests/test_health_route.py +49 -0
atlas/tests/test_http_client_stub.py +8 -0
atlas/tests/test_imports_smoke.py +30 -0
atlas/tests/test_interfaces_llm_response.py +9 -0
atlas/tests/test_issue_access_denied_fix.py +136 -0
atlas/tests/test_llm_env_expansion.py +836 -0
atlas/tests/test_log_level_sensitive_data.py +285 -0
atlas/tests/test_mcp_auth_routes.py +341 -0
atlas/tests/test_mcp_client_auth.py +331 -0
atlas/tests/test_mcp_data_injection.py +270 -0
atlas/tests/test_mcp_get_authorized_servers.py +95 -0
atlas/tests/test_mcp_hot_reload.py +512 -0
atlas/tests/test_mcp_image_content.py +424 -0
atlas/tests/test_mcp_logging.py +172 -0
atlas/tests/test_mcp_progress_updates.py +313 -0
atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
atlas/tests/test_mcp_prompts_server.py +39 -0
atlas/tests/test_mcp_tool_result_parsing.py +296 -0
atlas/tests/test_metrics_logger.py +56 -0
atlas/tests/test_middleware_auth.py +379 -0
atlas/tests/test_prompt_risk_and_acl.py +141 -0
atlas/tests/test_rag_mcp_aggregator.py +204 -0
atlas/tests/test_rag_mcp_service.py +224 -0
atlas/tests/test_rate_limit_middleware.py +45 -0
atlas/tests/test_routes_config_smoke.py +60 -0
atlas/tests/test_routes_files_download_token.py +41 -0
atlas/tests/test_routes_files_health.py +18 -0
atlas/tests/test_runtime_imports.py +53 -0
atlas/tests/test_sampling_integration.py +482 -0
atlas/tests/test_security_admin_routes.py +61 -0
atlas/tests/test_security_capability_tokens.py +65 -0
atlas/tests/test_security_file_stats_scope.py +21 -0
atlas/tests/test_security_header_injection.py +191 -0
atlas/tests/test_security_headers_and_filename.py +63 -0
atlas/tests/test_shared_session_repository.py +101 -0
atlas/tests/test_system_prompt_loading.py +181 -0
atlas/tests/test_token_storage.py +505 -0
atlas/tests/test_tool_approval_config.py +93 -0
atlas/tests/test_tool_approval_utils.py +356 -0
atlas/tests/test_tool_authorization_group_filtering.py +223 -0
atlas/tests/test_tool_details_in_config.py +108 -0
atlas/tests/test_tool_planner.py +300 -0
atlas/tests/test_unified_rag_service.py +398 -0
atlas/tests/test_username_override_in_approval.py +258 -0
atlas/tests/test_websocket_auth_header.py +168 -0
atlas/version.py +6 -0
atlas_chat-0.1.0.data/data/.env.example +253 -0
atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
atlas_chat-0.1.0.dist-info/METADATA +236 -0
atlas_chat-0.1.0.dist-info/RECORD +250 -0
atlas_chat-0.1.0.dist-info/WHEEL +5 -0
atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
atlas_chat-0.1.0.dist-info/top_level.txt +1 -0

atlas/modules/file_storage/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+"""File storage module for the chat backend.
+This module provides:
+- S3 storage client for file operations
+- File management utilities
+- Content type detection and categorization
+- CLI tools for file operations
+"""
+from .manager import FileManager
+from .s3_client import S3StorageClient
+# Create default instances
+s3_client = S3StorageClient()
+file_manager = FileManager(s3_client)
+__all__ = [
+    "S3StorageClient",
+    "FileManager",
+    "s3_client",
+    "file_manager",
+]

atlas/modules/file_storage/cli.py ADDED Viewed

@@ -0,0 +1,330 @@
+"""CLI interface for file storage operations.
+This CLI allows you to:
+- Upload files to S3
+- List files for users
+- Download files from S3
+- Get file statistics
+- Test file storage operations
+"""
+import argparse
+import base64
+import logging
+import sys
+from pathlib import Path
+from .manager import FileManager
+from .s3_client import S3StorageClient
+# Set up logging for CLI
+logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+logger = logging.getLogger(__name__)
+async def upload_file(args) -> None:
+    """Upload a file to S3 storage."""
+    file_path = Path(args.file_path)
+    if not file_path.exists():
+        print(f"❌ File not found: {file_path}")
+        return
+    if not args.user_email:
+        print("❌ User email is required")
+        return
+    print(f"📤 Uploading {file_path.name} for user {args.user_email}...")
+    try:
+        # Read and encode file content
+        with open(file_path, 'rb') as f:
+            content = f.read()
+        content_base64 = base64.b64encode(content).decode('utf-8')
+        # Use specified filename or original filename
+        filename = args.filename or file_path.name
+        # Initialize file manager and upload
+        file_manager = FileManager()
+        result = await file_manager.upload_file(
+            user_email=args.user_email,
+            filename=filename,
+            content_base64=content_base64,
+            source_type=args.source_type
+        )
+        print("✅ File uploaded successfully!")
+        print(f"   S3 Key: {result['key']}")
+        print(f"   Size: {result.get('size', 'unknown')} bytes")
+        print(f"   Content Type: {result.get('content_type', 'unknown')}")
+    except Exception as e:
+        print(f"❌ Upload failed: {e}")
+        logger.error(f"Upload error: {e}")
+async def list_files(args) -> None:
+    """List files for a user."""
+    if not args.user_email:
+        print("❌ User email is required")
+        return
+    print(f"📂 Listing files for user {args.user_email}...")
+    try:
+        s3_client = S3StorageClient()
+        files = await s3_client.list_files(
+            user_email=args.user_email,
+            file_type=args.file_type,
+            limit=args.limit
+        )
+        if not files:
+            print("📭 No files found")
+            return
+        print(f"\n📋 Found {len(files)} file(s):\n")
+        # Group files by type if no specific filter
+        if not args.file_type:
+            user_files = [f for f in files if f.get('tags', {}).get('source') == 'user']
+            tool_files = [f for f in files if f.get('tags', {}).get('source') == 'tool']
+            if user_files:
+                print("👤 User Files:")
+                for file_info in user_files:
+                    print(f"   📄 {file_info['filename']}")
+                    print(f"      Key: {file_info['key']}")
+                    print(f"      Size: {file_info.get('size', 0)} bytes")
+                    print(f"      Type: {file_info.get('content_type', 'unknown')}")
+                    print(f"      Modified: {file_info.get('last_modified', 'unknown')}")
+                    print()
+            if tool_files:
+                print("🔧 Tool-Generated Files:")
+                for file_info in tool_files:
+                    tags = file_info.get('tags', {})
+                    print(f"   📄 {file_info['filename']}")
+                    print(f"      Key: {file_info['key']}")
+                    print(f"      Size: {file_info.get('size', 0)} bytes")
+                    print(f"      Source Tool: {tags.get('source_tool', 'unknown')}")
+                    print(f"      Modified: {file_info.get('last_modified', 'unknown')}")
+                    print()
+        else:
+            for file_info in files:
+                print(f"📄 {file_info['filename']}")
+                print(f"   Key: {file_info['key']}")
+                print(f"   Size: {file_info.get('size', 0)} bytes")
+                print(f"   Type: {file_info.get('content_type', 'unknown')}")
+                print(f"   Modified: {file_info.get('last_modified', 'unknown')}")
+                print()
+    except Exception as e:
+        print(f"❌ List failed: {e}")
+        logger.error(f"List error: {e}")
+async def download_file(args) -> None:
+    """Download a file from S3 storage."""
+    if not args.user_email:
+        print("❌ User email is required")
+        return
+    if not args.s3_key:
+        print("❌ S3 key is required")
+        return
+    print(f"📥 Downloading file {args.s3_key} for user {args.user_email}...")
+    try:
+        s3_client = S3StorageClient()
+        file_data = await s3_client.get_file(args.user_email, args.s3_key)
+        if not file_data:
+            print("❌ File not found")
+            return
+        # Decode base64 content
+        content = base64.b64decode(file_data['content_base64'])
+        # Determine output filename
+        output_path = Path(args.output) if args.output else Path(file_data['filename'])
+        # Write to file
+        with open(output_path, 'wb') as f:
+            f.write(content)
+        print("✅ File downloaded successfully!")
+        print(f"   Saved to: {output_path}")
+        print(f"   Size: {len(content)} bytes")
+    except Exception as e:
+        print(f"❌ Download failed: {e}")
+        logger.error(f"Download error: {e}")
+async def delete_file(args) -> None:
+    """Delete a file from S3 storage."""
+    if not args.user_email:
+        print("❌ User email is required")
+        return
+    if not args.s3_key:
+        print("❌ S3 key is required")
+        return
+    print(f"🗑️  Deleting file {args.s3_key} for user {args.user_email}...")
+    if not args.force:
+        confirm = input("⚠️  Are you sure? This action cannot be undone. (y/N): ")
+        if confirm.lower() != 'y':
+            print("❌ Deletion cancelled")
+            return
+    try:
+        s3_client = S3StorageClient()
+        success = await s3_client.delete_file(args.user_email, args.s3_key)
+        if success:
+            print("✅ File deleted successfully!")
+        else:
+            print("❌ File not found or already deleted")
+    except Exception as e:
+        print(f"❌ Deletion failed: {e}")
+        logger.error(f"Deletion error: {e}")
+async def get_stats(args) -> None:
+    """Get file statistics for a user."""
+    if not args.user_email:
+        print("❌ User email is required")
+        return
+    print(f"📊 Getting file statistics for user {args.user_email}...")
+    try:
+        s3_client = S3StorageClient()
+        stats = await s3_client.get_user_stats(args.user_email)
+        print("\n📈 File Statistics:\n")
+        print(f"   📁 Total Files: {stats.get('total_files', 0)}")
+        print(f"   💾 Total Size: {stats.get('total_size_bytes', 0)} bytes")
+        print(f"   📤 User Files: {stats.get('user_files', 0)}")
+        print(f"   🔧 Tool Files: {stats.get('tool_files', 0)}")
+        if 'file_types' in stats:
+            print("\n📊 By File Type:")
+            for file_type, count in stats['file_types'].items():
+                print(f"   {file_type}: {count}")
+    except Exception as e:
+        print(f"❌ Stats failed: {e}")
+        logger.error(f"Stats error: {e}")
+def test_categorization(args) -> None:
+    """Test file categorization and content type detection."""
+    if not args.filename:
+        print("❌ Filename is required")
+        return
+    file_manager = FileManager()
+    print(f"🧪 Testing file categorization for: {args.filename}\n")
+    content_type = file_manager.get_content_type(args.filename)
+    category = file_manager.categorize_file_type(args.filename)
+    extension = file_manager.get_file_extension(args.filename)
+    canvas_type = file_manager.get_canvas_file_type(extension.lower())
+    should_display = file_manager.should_display_in_canvas(args.filename)
+    print(f"📄 Content Type: {content_type}")
+    print(f"🏷️  Category: {category}")
+    print(f"📎 Extension: {extension}")
+    print(f"🎨 Canvas Type: {canvas_type}")
+    print(f"👁️  Display in Canvas: {'✅ Yes' if should_display else '❌ No'}")
+def main():
+    """Main CLI entry point."""
+    parser = argparse.ArgumentParser(
+        description="File storage management CLI",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python -m backend.modules.file_storage.cli upload test.txt user@example.com
+  python -m backend.modules.file_storage.cli list user@example.com
+  python -m backend.modules.file_storage.cli download user@example.com file_key_123 --output downloaded.txt
+  python -m backend.modules.file_storage.cli stats user@example.com
+  python -m backend.modules.file_storage.cli test-categorization example.py
+        """
+    )
+    subparsers = parser.add_subparsers(dest='command', help='Available commands')
+    # Upload command
+    upload_parser = subparsers.add_parser('upload', help='Upload a file to S3')
+    upload_parser.add_argument('file_path', help='Path to file to upload')
+    upload_parser.add_argument('user_email', help='User email')
+    upload_parser.add_argument('--filename', help='Custom filename (default: use original)')
+    upload_parser.add_argument('--source-type', default='user', choices=['user', 'tool'], help='Source type')
+    upload_parser.set_defaults(func=upload_file)
+    # List command
+    list_parser = subparsers.add_parser('list', help='List files for a user')
+    list_parser.add_argument('user_email', help='User email')
+    list_parser.add_argument('--file-type', choices=['user', 'tool'], help='Filter by file type')
+    list_parser.add_argument('--limit', type=int, default=100, help='Maximum files to return')
+    list_parser.set_defaults(func=list_files)
+    # Download command
+    download_parser = subparsers.add_parser('download', help='Download a file from S3')
+    download_parser.add_argument('user_email', help='User email')
+    download_parser.add_argument('s3_key', help='S3 key of file to download')
+    download_parser.add_argument('--output', '-o', help='Output filename (default: original filename)')
+    download_parser.set_defaults(func=download_file)
+    # Delete command
+    delete_parser = subparsers.add_parser('delete', help='Delete a file from S3')
+    delete_parser.add_argument('user_email', help='User email')
+    delete_parser.add_argument('s3_key', help='S3 key of file to delete')
+    delete_parser.add_argument('--force', '-f', action='store_true', help='Skip confirmation')
+    delete_parser.set_defaults(func=delete_file)
+    # Stats command
+    stats_parser = subparsers.add_parser('stats', help='Get file statistics for a user')
+    stats_parser.add_argument('user_email', help='User email')
+    stats_parser.set_defaults(func=get_stats)
+    # Test categorization command
+    test_parser = subparsers.add_parser('test-categorization', help='Test file categorization')
+    test_parser.add_argument('filename', help='Filename to test')
+    test_parser.set_defaults(func=test_categorization)
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        return
+    try:
+        if hasattr(args, 'func'):
+            if args.command in ['upload', 'list', 'download', 'delete', 'stats']:
+                # Async commands
+                import asyncio
+                asyncio.run(args.func(args))
+            else:
+                # Sync commands
+                args.func(args)
+    except KeyboardInterrupt:
+        print("\n⚠️  Operation cancelled by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error: {e}")
+        sys.exit(1)
+if __name__ == '__main__':
+    main()

atlas/modules/file_storage/content_extractor.py ADDED Viewed

@@ -0,0 +1,290 @@
+"""
+File content extraction client for calling HTTP-based extraction services.
+This module provides a generic interface for extracting content from files
+(PDFs, images, etc.) via configurable HTTP endpoints.
+"""
+import base64
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+import httpx
+from atlas.modules.config.config_manager import (
+    FileExtractorConfig,
+    FileExtractorsConfig,
+    get_app_settings,
+    get_file_extractors_config,
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class ExtractionResult:
+    """Result of a content extraction attempt."""
+    success: bool
+    content: Optional[str] = None
+    preview: Optional[str] = None
+    error: Optional[str] = None
+    metadata: Optional[dict] = None
+class FileContentExtractor:
+    """
+    Client for extracting content from files using configured HTTP services.
+    Supports extension-based and MIME-type-based extractor lookup with
+    configurable preview length truncation.
+    """
+    def __init__(self, config: Optional[FileExtractorsConfig] = None):
+        """
+        Initialize the extractor with optional config override.
+        Args:
+            config: Optional config override. If None, loads from config manager.
+        """
+        self._config = config
+    @property
+    def config(self) -> FileExtractorsConfig:
+        """Get the extractors configuration (lazy loaded)."""
+        if self._config is None:
+            self._config = get_file_extractors_config()
+        return self._config
+    def is_enabled(self) -> bool:
+        """Check if file content extraction is enabled globally."""
+        app_settings = get_app_settings()
+        return (
+            app_settings.feature_file_content_extraction_enabled
+            and self.config.enabled
+        )
+    def get_default_behavior(self) -> str:
+        """Get the default extraction behavior ('extract' or 'attach_only')."""
+        return self.config.default_behavior
+    def get_extractor_for_file(
+        self, filename: str, mime_type: Optional[str] = None
+    ) -> Optional[FileExtractorConfig]:
+        """
+        Find the appropriate extractor for a file based on extension or MIME type.
+        Args:
+            filename: The filename to look up
+            mime_type: Optional MIME type for fallback lookup
+        Returns:
+            FileExtractorConfig if found and enabled, None otherwise
+        """
+        if not self.is_enabled():
+            return None
+        # Try extension-based lookup first
+        ext = Path(filename).suffix.lower()
+        extractor_name = self.config.extension_mapping.get(ext)
+        # Fall back to MIME type lookup
+        if not extractor_name and mime_type:
+            extractor_name = self.config.mime_mapping.get(mime_type)
+        if not extractor_name:
+            logger.debug(f"No extractor mapping for file: {filename} (mime: {mime_type})")
+            return None
+        extractor = self.config.extractors.get(extractor_name)
+        if not extractor:
+            logger.warning(f"Extractor '{extractor_name}' not found in config")
+            return None
+        if not extractor.enabled:
+            logger.debug(f"Extractor '{extractor_name}' is disabled")
+            return None
+        return extractor
+    def can_extract(self, filename: str, mime_type: Optional[str] = None) -> bool:
+        """
+        Check if content extraction is possible for a given file.
+        Args:
+            filename: The filename to check
+            mime_type: Optional MIME type
+        Returns:
+            True if an enabled extractor is available for this file type
+        """
+        return self.get_extractor_for_file(filename, mime_type) is not None
+    def get_supported_extensions(self) -> list[str]:
+        """Get list of file extensions that have extraction support."""
+        if not self.is_enabled():
+            return []
+        supported = []
+        for ext, extractor_name in self.config.extension_mapping.items():
+            extractor = self.config.extractors.get(extractor_name)
+            if extractor and extractor.enabled:
+                supported.append(ext)
+        return supported
+    async def extract_content(
+        self,
+        filename: str,
+        content_base64: str,
+        mime_type: Optional[str] = None,
+    ) -> ExtractionResult:
+        """
+        Extract content from a file using the appropriate HTTP extractor service.
+        Args:
+            filename: The name of the file
+            content_base64: Base64-encoded file content
+            mime_type: Optional MIME type of the file
+        Returns:
+            ExtractionResult with extracted content or error information
+        """
+        extractor = self.get_extractor_for_file(filename, mime_type)
+        if not extractor:
+            return ExtractionResult(
+                success=False,
+                error=f"No extractor available for file: {filename}"
+            )
+        # Check file size limit
+        content_size_mb = len(content_base64) * 3 / 4 / (1024 * 1024)  # Approximate decoded size
+        if content_size_mb > extractor.max_file_size_mb:
+            return ExtractionResult(
+                success=False,
+                error=f"File too large: {content_size_mb:.1f}MB exceeds limit of {extractor.max_file_size_mb}MB"
+            )
+        try:
+            # Build request headers
+            request_headers = {}
+            # Add API key as Authorization header if configured
+            if extractor.api_key:
+                request_headers["Authorization"] = f"Bearer {extractor.api_key}"
+            # Add any custom headers from config
+            if extractor.headers:
+                request_headers.update(extractor.headers)
+            async with httpx.AsyncClient(timeout=extractor.timeout_seconds) as client:
+                if extractor.request_format == "multipart":
+                    # Multipart form-data upload
+                    try:
+                        file_bytes = base64.b64decode(content_base64)
+                    except Exception as e:
+                        return ExtractionResult(
+                            success=False,
+                            error=f"Failed to decode base64 content: {str(e)}"
+                        )
+                    content_type = mime_type or "application/octet-stream"
+                    files = {
+                        extractor.form_field_name: (filename, file_bytes, content_type)
+                    }
+                    # Request JSON response from the extractor service
+                    request_headers.setdefault("Accept", "application/json")
+                    response = await client.post(
+                        url=extractor.url,
+                        files=files,
+                        headers=request_headers if request_headers else None,
+                    )
+                else:
+                    # Base64 JSON payload (default)
+                    payload = {
+                        "content": content_base64,
+                        "filename": filename,
+                        "options": {
+                            "preview_chars": extractor.preview_chars,
+                        }
+                    }
+                    response = await client.request(
+                        method=extractor.method,
+                        url=extractor.url,
+                        json=payload,
+                        headers=request_headers if request_headers else None,
+                    )
+                if response.status_code != 200:
+                    logger.warning(
+                        f"Extractor returned status {response.status_code} for {filename}"
+                    )
+                    return ExtractionResult(
+                        success=False,
+                        error=f"Extractor service returned status {response.status_code}"
+                    )
+                result_data = response.json()
+                # Check for success flag if present
+                if "success" in result_data and not result_data["success"]:
+                    return ExtractionResult(
+                        success=False,
+                        error=result_data.get("error", "Extraction failed")
+                    )
+                # Extract the content field
+                extracted_text = result_data.get(extractor.response_field)
+                if extracted_text is None:
+                    return ExtractionResult(
+                        success=False,
+                        error=f"Response missing expected field: {extractor.response_field}"
+                    )
+                # Generate preview if content is longer than preview_chars
+                preview = None
+                if extractor.preview_chars and len(extracted_text) > extractor.preview_chars:
+                    preview = extracted_text[:extractor.preview_chars] + "..."
+                else:
+                    preview = extracted_text
+                return ExtractionResult(
+                    success=True,
+                    content=extracted_text,
+                    preview=preview,
+                    metadata=result_data.get("metadata")
+                )
+        except httpx.TimeoutException:
+            logger.warning(f"Extraction timeout for {filename} after {extractor.timeout_seconds}s")
+            return ExtractionResult(
+                success=False,
+                error=f"Extraction timed out after {extractor.timeout_seconds} seconds"
+            )
+        except httpx.RequestError as e:
+            logger.warning(f"Extraction request failed for {filename}: {e}")
+            return ExtractionResult(
+                success=False,
+                error=f"Failed to connect to extractor service: {str(e)}"
+            )
+        except Exception as e:
+            logger.error(f"Unexpected error extracting content from {filename}: {e}", exc_info=True)
+            return ExtractionResult(
+                success=False,
+                error=f"Unexpected extraction error: {str(e)}"
+            )
+# Module-level instance for convenience
+_extractor_instance: Optional[FileContentExtractor] = None
+def get_content_extractor() -> FileContentExtractor:
+    """Get the shared file content extractor instance."""
+    global _extractor_instance
+    if _extractor_instance is None:
+        _extractor_instance = FileContentExtractor()
+    return _extractor_instance