atlas-chat 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atlas/__init__.py +40 -0
- atlas/application/__init__.py +7 -0
- atlas/application/chat/__init__.py +7 -0
- atlas/application/chat/agent/__init__.py +10 -0
- atlas/application/chat/agent/act_loop.py +179 -0
- atlas/application/chat/agent/factory.py +142 -0
- atlas/application/chat/agent/protocols.py +46 -0
- atlas/application/chat/agent/react_loop.py +338 -0
- atlas/application/chat/agent/think_act_loop.py +171 -0
- atlas/application/chat/approval_manager.py +151 -0
- atlas/application/chat/elicitation_manager.py +191 -0
- atlas/application/chat/events/__init__.py +1 -0
- atlas/application/chat/events/agent_event_relay.py +112 -0
- atlas/application/chat/modes/__init__.py +1 -0
- atlas/application/chat/modes/agent.py +125 -0
- atlas/application/chat/modes/plain.py +74 -0
- atlas/application/chat/modes/rag.py +81 -0
- atlas/application/chat/modes/tools.py +179 -0
- atlas/application/chat/orchestrator.py +213 -0
- atlas/application/chat/policies/__init__.py +1 -0
- atlas/application/chat/policies/tool_authorization.py +99 -0
- atlas/application/chat/preprocessors/__init__.py +1 -0
- atlas/application/chat/preprocessors/message_builder.py +92 -0
- atlas/application/chat/preprocessors/prompt_override_service.py +104 -0
- atlas/application/chat/service.py +454 -0
- atlas/application/chat/utilities/__init__.py +6 -0
- atlas/application/chat/utilities/error_handler.py +367 -0
- atlas/application/chat/utilities/event_notifier.py +546 -0
- atlas/application/chat/utilities/file_processor.py +613 -0
- atlas/application/chat/utilities/tool_executor.py +789 -0
- atlas/atlas_chat_cli.py +347 -0
- atlas/atlas_client.py +238 -0
- atlas/core/__init__.py +0 -0
- atlas/core/auth.py +205 -0
- atlas/core/authorization_manager.py +27 -0
- atlas/core/capabilities.py +123 -0
- atlas/core/compliance.py +215 -0
- atlas/core/domain_whitelist.py +147 -0
- atlas/core/domain_whitelist_middleware.py +82 -0
- atlas/core/http_client.py +28 -0
- atlas/core/log_sanitizer.py +102 -0
- atlas/core/metrics_logger.py +59 -0
- atlas/core/middleware.py +131 -0
- atlas/core/otel_config.py +242 -0
- atlas/core/prompt_risk.py +200 -0
- atlas/core/rate_limit.py +0 -0
- atlas/core/rate_limit_middleware.py +64 -0
- atlas/core/security_headers_middleware.py +51 -0
- atlas/domain/__init__.py +37 -0
- atlas/domain/chat/__init__.py +1 -0
- atlas/domain/chat/dtos.py +85 -0
- atlas/domain/errors.py +96 -0
- atlas/domain/messages/__init__.py +12 -0
- atlas/domain/messages/models.py +160 -0
- atlas/domain/rag_mcp_service.py +664 -0
- atlas/domain/sessions/__init__.py +7 -0
- atlas/domain/sessions/models.py +36 -0
- atlas/domain/unified_rag_service.py +371 -0
- atlas/infrastructure/__init__.py +10 -0
- atlas/infrastructure/app_factory.py +135 -0
- atlas/infrastructure/events/__init__.py +1 -0
- atlas/infrastructure/events/cli_event_publisher.py +140 -0
- atlas/infrastructure/events/websocket_publisher.py +140 -0
- atlas/infrastructure/sessions/in_memory_repository.py +56 -0
- atlas/infrastructure/transport/__init__.py +7 -0
- atlas/infrastructure/transport/websocket_connection_adapter.py +33 -0
- atlas/init_cli.py +226 -0
- atlas/interfaces/__init__.py +15 -0
- atlas/interfaces/events.py +134 -0
- atlas/interfaces/llm.py +54 -0
- atlas/interfaces/rag.py +40 -0
- atlas/interfaces/sessions.py +75 -0
- atlas/interfaces/tools.py +57 -0
- atlas/interfaces/transport.py +24 -0
- atlas/main.py +564 -0
- atlas/mcp/api_key_demo/README.md +76 -0
- atlas/mcp/api_key_demo/main.py +172 -0
- atlas/mcp/api_key_demo/run.sh +56 -0
- atlas/mcp/basictable/main.py +147 -0
- atlas/mcp/calculator/main.py +149 -0
- atlas/mcp/code-executor/execution_engine.py +98 -0
- atlas/mcp/code-executor/execution_environment.py +95 -0
- atlas/mcp/code-executor/main.py +528 -0
- atlas/mcp/code-executor/result_processing.py +276 -0
- atlas/mcp/code-executor/script_generation.py +195 -0
- atlas/mcp/code-executor/security_checker.py +140 -0
- atlas/mcp/corporate_cars/main.py +437 -0
- atlas/mcp/csv_reporter/main.py +545 -0
- atlas/mcp/duckduckgo/main.py +182 -0
- atlas/mcp/elicitation_demo/README.md +171 -0
- atlas/mcp/elicitation_demo/main.py +262 -0
- atlas/mcp/env-demo/README.md +158 -0
- atlas/mcp/env-demo/main.py +199 -0
- atlas/mcp/file_size_test/main.py +284 -0
- atlas/mcp/filesystem/main.py +348 -0
- atlas/mcp/image_demo/main.py +113 -0
- atlas/mcp/image_demo/requirements.txt +4 -0
- atlas/mcp/logging_demo/README.md +72 -0
- atlas/mcp/logging_demo/main.py +103 -0
- atlas/mcp/many_tools_demo/main.py +50 -0
- atlas/mcp/order_database/__init__.py +0 -0
- atlas/mcp/order_database/main.py +369 -0
- atlas/mcp/order_database/signal_data.csv +1001 -0
- atlas/mcp/pdfbasic/main.py +394 -0
- atlas/mcp/pptx_generator/main.py +760 -0
- atlas/mcp/pptx_generator/requirements.txt +13 -0
- atlas/mcp/pptx_generator/run_test.sh +1 -0
- atlas/mcp/pptx_generator/test_pptx_generator_security.py +169 -0
- atlas/mcp/progress_demo/main.py +167 -0
- atlas/mcp/progress_updates_demo/QUICKSTART.md +273 -0
- atlas/mcp/progress_updates_demo/README.md +120 -0
- atlas/mcp/progress_updates_demo/main.py +497 -0
- atlas/mcp/prompts/main.py +222 -0
- atlas/mcp/public_demo/main.py +189 -0
- atlas/mcp/sampling_demo/README.md +169 -0
- atlas/mcp/sampling_demo/main.py +234 -0
- atlas/mcp/thinking/main.py +77 -0
- atlas/mcp/tool_planner/main.py +240 -0
- atlas/mcp/ui-demo/badmesh.png +0 -0
- atlas/mcp/ui-demo/main.py +383 -0
- atlas/mcp/ui-demo/templates/button_demo.html +32 -0
- atlas/mcp/ui-demo/templates/data_visualization.html +32 -0
- atlas/mcp/ui-demo/templates/form_demo.html +28 -0
- atlas/mcp/username-override-demo/README.md +320 -0
- atlas/mcp/username-override-demo/main.py +308 -0
- atlas/modules/__init__.py +0 -0
- atlas/modules/config/__init__.py +34 -0
- atlas/modules/config/cli.py +231 -0
- atlas/modules/config/config_manager.py +1096 -0
- atlas/modules/file_storage/__init__.py +22 -0
- atlas/modules/file_storage/cli.py +330 -0
- atlas/modules/file_storage/content_extractor.py +290 -0
- atlas/modules/file_storage/manager.py +295 -0
- atlas/modules/file_storage/mock_s3_client.py +402 -0
- atlas/modules/file_storage/s3_client.py +417 -0
- atlas/modules/llm/__init__.py +19 -0
- atlas/modules/llm/caller.py +287 -0
- atlas/modules/llm/litellm_caller.py +675 -0
- atlas/modules/llm/models.py +19 -0
- atlas/modules/mcp_tools/__init__.py +17 -0
- atlas/modules/mcp_tools/client.py +2123 -0
- atlas/modules/mcp_tools/token_storage.py +556 -0
- atlas/modules/prompts/prompt_provider.py +130 -0
- atlas/modules/rag/__init__.py +24 -0
- atlas/modules/rag/atlas_rag_client.py +336 -0
- atlas/modules/rag/client.py +129 -0
- atlas/routes/admin_routes.py +865 -0
- atlas/routes/config_routes.py +484 -0
- atlas/routes/feedback_routes.py +361 -0
- atlas/routes/files_routes.py +274 -0
- atlas/routes/health_routes.py +40 -0
- atlas/routes/mcp_auth_routes.py +223 -0
- atlas/server_cli.py +164 -0
- atlas/tests/conftest.py +20 -0
- atlas/tests/integration/test_mcp_auth_integration.py +152 -0
- atlas/tests/manual_test_sampling.py +87 -0
- atlas/tests/modules/mcp_tools/test_client_auth.py +226 -0
- atlas/tests/modules/mcp_tools/test_client_env.py +191 -0
- atlas/tests/test_admin_mcp_server_management_routes.py +141 -0
- atlas/tests/test_agent_roa.py +135 -0
- atlas/tests/test_app_factory_smoke.py +47 -0
- atlas/tests/test_approval_manager.py +439 -0
- atlas/tests/test_atlas_client.py +188 -0
- atlas/tests/test_atlas_rag_client.py +447 -0
- atlas/tests/test_atlas_rag_integration.py +224 -0
- atlas/tests/test_attach_file_flow.py +287 -0
- atlas/tests/test_auth_utils.py +165 -0
- atlas/tests/test_backend_public_url.py +185 -0
- atlas/tests/test_banner_logging.py +287 -0
- atlas/tests/test_capability_tokens_and_injection.py +203 -0
- atlas/tests/test_compliance_level.py +54 -0
- atlas/tests/test_compliance_manager.py +253 -0
- atlas/tests/test_config_manager.py +617 -0
- atlas/tests/test_config_manager_paths.py +12 -0
- atlas/tests/test_core_auth.py +18 -0
- atlas/tests/test_core_utils.py +190 -0
- atlas/tests/test_docker_env_sync.py +202 -0
- atlas/tests/test_domain_errors.py +329 -0
- atlas/tests/test_domain_whitelist.py +359 -0
- atlas/tests/test_elicitation_manager.py +408 -0
- atlas/tests/test_elicitation_routing.py +296 -0
- atlas/tests/test_env_demo_server.py +88 -0
- atlas/tests/test_error_classification.py +113 -0
- atlas/tests/test_error_flow_integration.py +116 -0
- atlas/tests/test_feedback_routes.py +333 -0
- atlas/tests/test_file_content_extraction.py +1134 -0
- atlas/tests/test_file_extraction_routes.py +158 -0
- atlas/tests/test_file_library.py +107 -0
- atlas/tests/test_file_manager_unit.py +18 -0
- atlas/tests/test_health_route.py +49 -0
- atlas/tests/test_http_client_stub.py +8 -0
- atlas/tests/test_imports_smoke.py +30 -0
- atlas/tests/test_interfaces_llm_response.py +9 -0
- atlas/tests/test_issue_access_denied_fix.py +136 -0
- atlas/tests/test_llm_env_expansion.py +836 -0
- atlas/tests/test_log_level_sensitive_data.py +285 -0
- atlas/tests/test_mcp_auth_routes.py +341 -0
- atlas/tests/test_mcp_client_auth.py +331 -0
- atlas/tests/test_mcp_data_injection.py +270 -0
- atlas/tests/test_mcp_get_authorized_servers.py +95 -0
- atlas/tests/test_mcp_hot_reload.py +512 -0
- atlas/tests/test_mcp_image_content.py +424 -0
- atlas/tests/test_mcp_logging.py +172 -0
- atlas/tests/test_mcp_progress_updates.py +313 -0
- atlas/tests/test_mcp_prompt_override_system_prompt.py +102 -0
- atlas/tests/test_mcp_prompts_server.py +39 -0
- atlas/tests/test_mcp_tool_result_parsing.py +296 -0
- atlas/tests/test_metrics_logger.py +56 -0
- atlas/tests/test_middleware_auth.py +379 -0
- atlas/tests/test_prompt_risk_and_acl.py +141 -0
- atlas/tests/test_rag_mcp_aggregator.py +204 -0
- atlas/tests/test_rag_mcp_service.py +224 -0
- atlas/tests/test_rate_limit_middleware.py +45 -0
- atlas/tests/test_routes_config_smoke.py +60 -0
- atlas/tests/test_routes_files_download_token.py +41 -0
- atlas/tests/test_routes_files_health.py +18 -0
- atlas/tests/test_runtime_imports.py +53 -0
- atlas/tests/test_sampling_integration.py +482 -0
- atlas/tests/test_security_admin_routes.py +61 -0
- atlas/tests/test_security_capability_tokens.py +65 -0
- atlas/tests/test_security_file_stats_scope.py +21 -0
- atlas/tests/test_security_header_injection.py +191 -0
- atlas/tests/test_security_headers_and_filename.py +63 -0
- atlas/tests/test_shared_session_repository.py +101 -0
- atlas/tests/test_system_prompt_loading.py +181 -0
- atlas/tests/test_token_storage.py +505 -0
- atlas/tests/test_tool_approval_config.py +93 -0
- atlas/tests/test_tool_approval_utils.py +356 -0
- atlas/tests/test_tool_authorization_group_filtering.py +223 -0
- atlas/tests/test_tool_details_in_config.py +108 -0
- atlas/tests/test_tool_planner.py +300 -0
- atlas/tests/test_unified_rag_service.py +398 -0
- atlas/tests/test_username_override_in_approval.py +258 -0
- atlas/tests/test_websocket_auth_header.py +168 -0
- atlas/version.py +6 -0
- atlas_chat-0.1.0.data/data/.env.example +253 -0
- atlas_chat-0.1.0.data/data/config/defaults/compliance-levels.json +44 -0
- atlas_chat-0.1.0.data/data/config/defaults/domain-whitelist.json +123 -0
- atlas_chat-0.1.0.data/data/config/defaults/file-extractors.json +74 -0
- atlas_chat-0.1.0.data/data/config/defaults/help-config.json +198 -0
- atlas_chat-0.1.0.data/data/config/defaults/llmconfig-buggy.yml +11 -0
- atlas_chat-0.1.0.data/data/config/defaults/llmconfig.yml +19 -0
- atlas_chat-0.1.0.data/data/config/defaults/mcp.json +138 -0
- atlas_chat-0.1.0.data/data/config/defaults/rag-sources.json +17 -0
- atlas_chat-0.1.0.data/data/config/defaults/splash-config.json +16 -0
- atlas_chat-0.1.0.dist-info/METADATA +236 -0
- atlas_chat-0.1.0.dist-info/RECORD +250 -0
- atlas_chat-0.1.0.dist-info/WHEEL +5 -0
- atlas_chat-0.1.0.dist-info/entry_points.txt +4 -0
- atlas_chat-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1134 @@
|
|
|
1
|
+
"""Tests for file content extraction feature.
|
|
2
|
+
|
|
3
|
+
Tests the FileExtractorConfig, FileExtractorsConfig models, FileContentExtractor class,
|
|
4
|
+
and related configuration functionality.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from unittest.mock import AsyncMock, Mock, patch
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from atlas.modules.config.config_manager import (
|
|
13
|
+
ConfigManager,
|
|
14
|
+
FileExtractorConfig,
|
|
15
|
+
FileExtractorsConfig,
|
|
16
|
+
)
|
|
17
|
+
from atlas.modules.file_storage.content_extractor import (
|
|
18
|
+
ExtractionResult,
|
|
19
|
+
FileContentExtractor,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestFileExtractorConfig:
|
|
24
|
+
"""Test FileExtractorConfig Pydantic model."""
|
|
25
|
+
|
|
26
|
+
def test_file_extractor_config_with_defaults(self):
|
|
27
|
+
"""FileExtractorConfig should have sensible defaults."""
|
|
28
|
+
config = FileExtractorConfig(url="http://localhost:8010/extract")
|
|
29
|
+
|
|
30
|
+
assert config.url == "http://localhost:8010/extract"
|
|
31
|
+
assert config.method == "POST"
|
|
32
|
+
assert config.timeout_seconds == 30
|
|
33
|
+
assert config.max_file_size_mb == 50
|
|
34
|
+
assert config.preview_chars == 2000
|
|
35
|
+
assert config.request_format == "base64"
|
|
36
|
+
assert config.response_field == "text"
|
|
37
|
+
assert config.enabled is True
|
|
38
|
+
|
|
39
|
+
def test_file_extractor_config_with_custom_values(self):
|
|
40
|
+
"""FileExtractorConfig should accept custom values."""
|
|
41
|
+
config = FileExtractorConfig(
|
|
42
|
+
url="http://custom-service:9000/ocr",
|
|
43
|
+
method="PUT",
|
|
44
|
+
timeout_seconds=120,
|
|
45
|
+
max_file_size_mb=100,
|
|
46
|
+
preview_chars=5000,
|
|
47
|
+
request_format="url",
|
|
48
|
+
response_field="content",
|
|
49
|
+
enabled=False
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
assert config.url == "http://custom-service:9000/ocr"
|
|
53
|
+
assert config.method == "PUT"
|
|
54
|
+
assert config.timeout_seconds == 120
|
|
55
|
+
assert config.max_file_size_mb == 100
|
|
56
|
+
assert config.preview_chars == 5000
|
|
57
|
+
assert config.request_format == "url"
|
|
58
|
+
assert config.response_field == "content"
|
|
59
|
+
assert config.enabled is False
|
|
60
|
+
|
|
61
|
+
def test_file_extractor_config_preview_chars_optional(self):
|
|
62
|
+
"""preview_chars should be optional (None)."""
|
|
63
|
+
config = FileExtractorConfig(
|
|
64
|
+
url="http://localhost:8010/extract",
|
|
65
|
+
preview_chars=None
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
assert config.preview_chars is None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class TestFileExtractorsConfig:
|
|
72
|
+
"""Test FileExtractorsConfig Pydantic model."""
|
|
73
|
+
|
|
74
|
+
def test_file_extractors_config_with_defaults(self):
|
|
75
|
+
"""FileExtractorsConfig should have sensible defaults."""
|
|
76
|
+
config = FileExtractorsConfig()
|
|
77
|
+
|
|
78
|
+
assert config.enabled is True
|
|
79
|
+
assert config.default_behavior == "full"
|
|
80
|
+
assert config.extractors == {}
|
|
81
|
+
assert config.extension_mapping == {}
|
|
82
|
+
assert config.mime_mapping == {}
|
|
83
|
+
|
|
84
|
+
def test_file_extractors_config_with_extractors(self):
|
|
85
|
+
"""FileExtractorsConfig should properly parse nested extractors."""
|
|
86
|
+
config = FileExtractorsConfig(
|
|
87
|
+
enabled=True,
|
|
88
|
+
default_behavior="attach_only",
|
|
89
|
+
extractors={
|
|
90
|
+
"pdf-text": {
|
|
91
|
+
"url": "http://localhost:8010/extract",
|
|
92
|
+
"timeout_seconds": 60
|
|
93
|
+
},
|
|
94
|
+
"image-vision": {
|
|
95
|
+
"url": "http://localhost:8011/analyze",
|
|
96
|
+
"enabled": False
|
|
97
|
+
}
|
|
98
|
+
},
|
|
99
|
+
extension_mapping={
|
|
100
|
+
".pdf": "pdf-text",
|
|
101
|
+
".png": "image-vision"
|
|
102
|
+
},
|
|
103
|
+
mime_mapping={
|
|
104
|
+
"application/pdf": "pdf-text",
|
|
105
|
+
"image/png": "image-vision"
|
|
106
|
+
}
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
assert config.enabled is True
|
|
110
|
+
assert config.default_behavior == "none" # "attach_only" normalized to "none"
|
|
111
|
+
assert len(config.extractors) == 2
|
|
112
|
+
assert isinstance(config.extractors["pdf-text"], FileExtractorConfig)
|
|
113
|
+
assert config.extractors["pdf-text"].url == "http://localhost:8010/extract"
|
|
114
|
+
assert config.extractors["pdf-text"].timeout_seconds == 60
|
|
115
|
+
assert config.extractors["image-vision"].enabled is False
|
|
116
|
+
assert config.extension_mapping[".pdf"] == "pdf-text"
|
|
117
|
+
assert config.mime_mapping["application/pdf"] == "pdf-text"
|
|
118
|
+
|
|
119
|
+
def test_file_extractors_config_validator_converts_dicts(self):
|
|
120
|
+
"""Validator should convert plain dicts to FileExtractorConfig objects."""
|
|
121
|
+
raw_data = {
|
|
122
|
+
"enabled": True,
|
|
123
|
+
"extractors": {
|
|
124
|
+
"test": {"url": "http://test.local/extract"}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
config = FileExtractorsConfig(**raw_data)
|
|
129
|
+
|
|
130
|
+
assert isinstance(config.extractors["test"], FileExtractorConfig)
|
|
131
|
+
assert config.extractors["test"].url == "http://test.local/extract"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class TestFileContentExtractor:
|
|
135
|
+
"""Test FileContentExtractor class."""
|
|
136
|
+
|
|
137
|
+
def test_extractor_initialization_with_config(self):
|
|
138
|
+
"""FileContentExtractor should accept config override."""
|
|
139
|
+
config = FileExtractorsConfig(enabled=True)
|
|
140
|
+
extractor = FileContentExtractor(config=config)
|
|
141
|
+
|
|
142
|
+
assert extractor.config is config
|
|
143
|
+
|
|
144
|
+
def test_extractor_lazy_loads_config(self):
|
|
145
|
+
"""FileContentExtractor should lazy load config if not provided."""
|
|
146
|
+
extractor = FileContentExtractor()
|
|
147
|
+
|
|
148
|
+
# Config should be loaded on first access
|
|
149
|
+
config = extractor.config
|
|
150
|
+
assert config is not None
|
|
151
|
+
assert isinstance(config, FileExtractorsConfig)
|
|
152
|
+
|
|
153
|
+
def test_is_enabled_checks_both_flags(self):
|
|
154
|
+
"""is_enabled should check both feature flag and config enabled."""
|
|
155
|
+
config = FileExtractorsConfig(enabled=True)
|
|
156
|
+
extractor = FileContentExtractor(config=config)
|
|
157
|
+
|
|
158
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
159
|
+
# Both enabled
|
|
160
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
161
|
+
assert extractor.is_enabled() is True
|
|
162
|
+
|
|
163
|
+
# Feature flag disabled
|
|
164
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = False
|
|
165
|
+
assert extractor.is_enabled() is False
|
|
166
|
+
|
|
167
|
+
def test_is_enabled_config_disabled(self):
|
|
168
|
+
"""is_enabled should return False if config.enabled is False."""
|
|
169
|
+
config = FileExtractorsConfig(enabled=False)
|
|
170
|
+
extractor = FileContentExtractor(config=config)
|
|
171
|
+
|
|
172
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
173
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
174
|
+
assert extractor.is_enabled() is False
|
|
175
|
+
|
|
176
|
+
def test_get_default_behavior(self):
|
|
177
|
+
"""get_default_behavior should return config value."""
|
|
178
|
+
config = FileExtractorsConfig(default_behavior="attach_only")
|
|
179
|
+
extractor = FileContentExtractor(config=config)
|
|
180
|
+
|
|
181
|
+
assert extractor.get_default_behavior() == "none" # "attach_only" normalized
|
|
182
|
+
|
|
183
|
+
def test_get_default_behavior_preview(self):
|
|
184
|
+
"""get_default_behavior should accept preview mode directly."""
|
|
185
|
+
config = FileExtractorsConfig(default_behavior="preview")
|
|
186
|
+
extractor = FileContentExtractor(config=config)
|
|
187
|
+
|
|
188
|
+
assert extractor.get_default_behavior() == "preview"
|
|
189
|
+
|
|
190
|
+
def test_legacy_extract_normalizes_to_full(self):
|
|
191
|
+
"""Legacy 'extract' value should normalize to 'full'."""
|
|
192
|
+
config = FileExtractorsConfig(default_behavior="extract")
|
|
193
|
+
|
|
194
|
+
assert config.default_behavior == "full"
|
|
195
|
+
|
|
196
|
+
def test_legacy_attach_only_normalizes_to_none(self):
|
|
197
|
+
"""Legacy 'attach_only' value should normalize to 'none'."""
|
|
198
|
+
config = FileExtractorsConfig(default_behavior="attach_only")
|
|
199
|
+
|
|
200
|
+
assert config.default_behavior == "none"
|
|
201
|
+
|
|
202
|
+
def test_get_extractor_for_file_by_extension(self):
|
|
203
|
+
"""Should find extractor by file extension."""
|
|
204
|
+
config = FileExtractorsConfig(
|
|
205
|
+
enabled=True,
|
|
206
|
+
extractors={
|
|
207
|
+
"pdf-text": FileExtractorConfig(url="http://localhost/pdf", enabled=True)
|
|
208
|
+
},
|
|
209
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
210
|
+
)
|
|
211
|
+
extractor = FileContentExtractor(config=config)
|
|
212
|
+
|
|
213
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
214
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
215
|
+
|
|
216
|
+
result = extractor.get_extractor_for_file("document.pdf")
|
|
217
|
+
assert result is not None
|
|
218
|
+
assert result.url == "http://localhost/pdf"
|
|
219
|
+
|
|
220
|
+
def test_get_extractor_for_file_by_mime_fallback(self):
|
|
221
|
+
"""Should fall back to MIME type lookup."""
|
|
222
|
+
config = FileExtractorsConfig(
|
|
223
|
+
enabled=True,
|
|
224
|
+
extractors={
|
|
225
|
+
"pdf-text": FileExtractorConfig(url="http://localhost/pdf", enabled=True)
|
|
226
|
+
},
|
|
227
|
+
extension_mapping={}, # No extension mapping
|
|
228
|
+
mime_mapping={"application/pdf": "pdf-text"}
|
|
229
|
+
)
|
|
230
|
+
extractor = FileContentExtractor(config=config)
|
|
231
|
+
|
|
232
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
233
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
234
|
+
|
|
235
|
+
result = extractor.get_extractor_for_file("document.xyz", mime_type="application/pdf")
|
|
236
|
+
assert result is not None
|
|
237
|
+
assert result.url == "http://localhost/pdf"
|
|
238
|
+
|
|
239
|
+
def test_get_extractor_for_file_returns_none_if_disabled(self):
|
|
240
|
+
"""Should return None if extraction is disabled."""
|
|
241
|
+
config = FileExtractorsConfig(
|
|
242
|
+
enabled=True,
|
|
243
|
+
extractors={
|
|
244
|
+
"pdf-text": FileExtractorConfig(url="http://localhost/pdf", enabled=False)
|
|
245
|
+
},
|
|
246
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
247
|
+
)
|
|
248
|
+
extractor = FileContentExtractor(config=config)
|
|
249
|
+
|
|
250
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
251
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
252
|
+
|
|
253
|
+
result = extractor.get_extractor_for_file("document.pdf")
|
|
254
|
+
assert result is None
|
|
255
|
+
|
|
256
|
+
def test_get_extractor_for_file_returns_none_if_no_mapping(self):
|
|
257
|
+
"""Should return None if no mapping exists."""
|
|
258
|
+
config = FileExtractorsConfig(
|
|
259
|
+
enabled=True,
|
|
260
|
+
extractors={},
|
|
261
|
+
extension_mapping={}
|
|
262
|
+
)
|
|
263
|
+
extractor = FileContentExtractor(config=config)
|
|
264
|
+
|
|
265
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
266
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
267
|
+
|
|
268
|
+
result = extractor.get_extractor_for_file("document.xyz")
|
|
269
|
+
assert result is None
|
|
270
|
+
|
|
271
|
+
def test_can_extract_returns_boolean(self):
|
|
272
|
+
"""can_extract should return True/False based on extractor availability."""
|
|
273
|
+
config = FileExtractorsConfig(
|
|
274
|
+
enabled=True,
|
|
275
|
+
extractors={
|
|
276
|
+
"pdf-text": FileExtractorConfig(url="http://localhost/pdf", enabled=True)
|
|
277
|
+
},
|
|
278
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
279
|
+
)
|
|
280
|
+
extractor = FileContentExtractor(config=config)
|
|
281
|
+
|
|
282
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
283
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
284
|
+
|
|
285
|
+
assert extractor.can_extract("document.pdf") is True
|
|
286
|
+
assert extractor.can_extract("document.xyz") is False
|
|
287
|
+
|
|
288
|
+
def test_get_supported_extensions(self):
|
|
289
|
+
"""get_supported_extensions should return list of extractable extensions."""
|
|
290
|
+
config = FileExtractorsConfig(
|
|
291
|
+
enabled=True,
|
|
292
|
+
extractors={
|
|
293
|
+
"pdf-text": FileExtractorConfig(url="http://localhost/pdf", enabled=True),
|
|
294
|
+
"image-vision": FileExtractorConfig(url="http://localhost/img", enabled=False)
|
|
295
|
+
},
|
|
296
|
+
extension_mapping={
|
|
297
|
+
".pdf": "pdf-text",
|
|
298
|
+
".png": "image-vision" # Disabled extractor
|
|
299
|
+
}
|
|
300
|
+
)
|
|
301
|
+
extractor = FileContentExtractor(config=config)
|
|
302
|
+
|
|
303
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
304
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
305
|
+
|
|
306
|
+
extensions = extractor.get_supported_extensions()
|
|
307
|
+
assert ".pdf" in extensions
|
|
308
|
+
assert ".png" not in extensions # Extractor is disabled
|
|
309
|
+
|
|
310
|
+
def test_get_supported_extensions_empty_when_disabled(self):
|
|
311
|
+
"""get_supported_extensions should return empty list when disabled."""
|
|
312
|
+
config = FileExtractorsConfig(enabled=False)
|
|
313
|
+
extractor = FileContentExtractor(config=config)
|
|
314
|
+
|
|
315
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
316
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
317
|
+
|
|
318
|
+
extensions = extractor.get_supported_extensions()
|
|
319
|
+
assert extensions == []
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class TestFileContentExtractorAsync:
|
|
323
|
+
"""Test FileContentExtractor async extraction methods."""
|
|
324
|
+
|
|
325
|
+
@pytest.mark.asyncio
|
|
326
|
+
async def test_extract_content_success(self):
|
|
327
|
+
"""extract_content should return successful result on 200 response."""
|
|
328
|
+
config = FileExtractorsConfig(
|
|
329
|
+
enabled=True,
|
|
330
|
+
extractors={
|
|
331
|
+
"pdf-text": FileExtractorConfig(
|
|
332
|
+
url="http://localhost:8010/extract",
|
|
333
|
+
enabled=True,
|
|
334
|
+
response_field="text",
|
|
335
|
+
preview_chars=100
|
|
336
|
+
)
|
|
337
|
+
},
|
|
338
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
339
|
+
)
|
|
340
|
+
extractor = FileContentExtractor(config=config)
|
|
341
|
+
|
|
342
|
+
mock_response = Mock()
|
|
343
|
+
mock_response.status_code = 200
|
|
344
|
+
mock_response.json.return_value = {
|
|
345
|
+
"success": True,
|
|
346
|
+
"text": "This is the extracted content from the PDF document.",
|
|
347
|
+
"metadata": {"pages": 5}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
351
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
352
|
+
|
|
353
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
354
|
+
mock_client = AsyncMock()
|
|
355
|
+
mock_client.request = AsyncMock(return_value=mock_response)
|
|
356
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
357
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
358
|
+
mock_client_class.return_value = mock_client
|
|
359
|
+
|
|
360
|
+
result = await extractor.extract_content(
|
|
361
|
+
filename="document.pdf",
|
|
362
|
+
content_base64="dGVzdCBjb250ZW50" # "test content" in base64
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
assert result.success is True
|
|
366
|
+
assert result.content == "This is the extracted content from the PDF document."
|
|
367
|
+
assert result.metadata == {"pages": 5}
|
|
368
|
+
|
|
369
|
+
@pytest.mark.asyncio
|
|
370
|
+
async def test_extract_content_no_extractor(self):
|
|
371
|
+
"""extract_content should return error when no extractor available."""
|
|
372
|
+
config = FileExtractorsConfig(enabled=True, extractors={})
|
|
373
|
+
extractor = FileContentExtractor(config=config)
|
|
374
|
+
|
|
375
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
376
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
377
|
+
|
|
378
|
+
result = await extractor.extract_content(
|
|
379
|
+
filename="document.xyz",
|
|
380
|
+
content_base64="dGVzdA=="
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
assert result.success is False
|
|
384
|
+
assert "No extractor available" in result.error
|
|
385
|
+
|
|
386
|
+
@pytest.mark.asyncio
|
|
387
|
+
async def test_extract_content_file_too_large(self):
|
|
388
|
+
"""extract_content should reject files exceeding size limit."""
|
|
389
|
+
config = FileExtractorsConfig(
|
|
390
|
+
enabled=True,
|
|
391
|
+
extractors={
|
|
392
|
+
"pdf-text": FileExtractorConfig(
|
|
393
|
+
url="http://localhost:8010/extract",
|
|
394
|
+
enabled=True,
|
|
395
|
+
max_file_size_mb=1 # 1MB limit
|
|
396
|
+
)
|
|
397
|
+
},
|
|
398
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
399
|
+
)
|
|
400
|
+
extractor = FileContentExtractor(config=config)
|
|
401
|
+
|
|
402
|
+
# Create a base64 string that would decode to more than 1MB
|
|
403
|
+
large_content = "A" * (2 * 1024 * 1024) # ~1.5MB when decoded
|
|
404
|
+
|
|
405
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
406
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
407
|
+
|
|
408
|
+
result = await extractor.extract_content(
|
|
409
|
+
filename="large.pdf",
|
|
410
|
+
content_base64=large_content
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
assert result.success is False
|
|
414
|
+
assert "File too large" in result.error
|
|
415
|
+
|
|
416
|
+
@pytest.mark.asyncio
|
|
417
|
+
async def test_extract_content_http_error(self):
|
|
418
|
+
"""extract_content should handle HTTP errors gracefully."""
|
|
419
|
+
config = FileExtractorsConfig(
|
|
420
|
+
enabled=True,
|
|
421
|
+
extractors={
|
|
422
|
+
"pdf-text": FileExtractorConfig(
|
|
423
|
+
url="http://localhost:8010/extract",
|
|
424
|
+
enabled=True
|
|
425
|
+
)
|
|
426
|
+
},
|
|
427
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
428
|
+
)
|
|
429
|
+
extractor = FileContentExtractor(config=config)
|
|
430
|
+
|
|
431
|
+
mock_response = Mock()
|
|
432
|
+
mock_response.status_code = 500
|
|
433
|
+
|
|
434
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
435
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
436
|
+
|
|
437
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
438
|
+
mock_client = AsyncMock()
|
|
439
|
+
mock_client.request = AsyncMock(return_value=mock_response)
|
|
440
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
441
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
442
|
+
mock_client_class.return_value = mock_client
|
|
443
|
+
|
|
444
|
+
result = await extractor.extract_content(
|
|
445
|
+
filename="document.pdf",
|
|
446
|
+
content_base64="dGVzdA=="
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
assert result.success is False
|
|
450
|
+
assert "status 500" in result.error
|
|
451
|
+
|
|
452
|
+
@pytest.mark.asyncio
|
|
453
|
+
async def test_extract_content_timeout(self):
|
|
454
|
+
"""extract_content should handle timeout gracefully."""
|
|
455
|
+
config = FileExtractorsConfig(
|
|
456
|
+
enabled=True,
|
|
457
|
+
extractors={
|
|
458
|
+
"pdf-text": FileExtractorConfig(
|
|
459
|
+
url="http://localhost:8010/extract",
|
|
460
|
+
enabled=True,
|
|
461
|
+
timeout_seconds=5
|
|
462
|
+
)
|
|
463
|
+
},
|
|
464
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
465
|
+
)
|
|
466
|
+
extractor = FileContentExtractor(config=config)
|
|
467
|
+
|
|
468
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
469
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
470
|
+
|
|
471
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
472
|
+
mock_client = AsyncMock()
|
|
473
|
+
mock_client.request = AsyncMock(side_effect=httpx.TimeoutException("Timeout"))
|
|
474
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
475
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
476
|
+
mock_client_class.return_value = mock_client
|
|
477
|
+
|
|
478
|
+
result = await extractor.extract_content(
|
|
479
|
+
filename="document.pdf",
|
|
480
|
+
content_base64="dGVzdA=="
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
assert result.success is False
|
|
484
|
+
assert "timed out" in result.error
|
|
485
|
+
|
|
486
|
+
@pytest.mark.asyncio
|
|
487
|
+
async def test_extract_content_connection_error(self):
|
|
488
|
+
"""extract_content should handle connection errors gracefully."""
|
|
489
|
+
config = FileExtractorsConfig(
|
|
490
|
+
enabled=True,
|
|
491
|
+
extractors={
|
|
492
|
+
"pdf-text": FileExtractorConfig(
|
|
493
|
+
url="http://localhost:8010/extract",
|
|
494
|
+
enabled=True
|
|
495
|
+
)
|
|
496
|
+
},
|
|
497
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
498
|
+
)
|
|
499
|
+
extractor = FileContentExtractor(config=config)
|
|
500
|
+
|
|
501
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
502
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
503
|
+
|
|
504
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
505
|
+
mock_client = AsyncMock()
|
|
506
|
+
mock_client.request = AsyncMock(
|
|
507
|
+
side_effect=httpx.RequestError("Connection refused")
|
|
508
|
+
)
|
|
509
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
510
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
511
|
+
mock_client_class.return_value = mock_client
|
|
512
|
+
|
|
513
|
+
result = await extractor.extract_content(
|
|
514
|
+
filename="document.pdf",
|
|
515
|
+
content_base64="dGVzdA=="
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
assert result.success is False
|
|
519
|
+
assert "Failed to connect" in result.error
|
|
520
|
+
|
|
521
|
+
@pytest.mark.asyncio
|
|
522
|
+
async def test_extract_content_preview_truncation(self):
|
|
523
|
+
"""extract_content should truncate preview for long content."""
|
|
524
|
+
config = FileExtractorsConfig(
|
|
525
|
+
enabled=True,
|
|
526
|
+
extractors={
|
|
527
|
+
"pdf-text": FileExtractorConfig(
|
|
528
|
+
url="http://localhost:8010/extract",
|
|
529
|
+
enabled=True,
|
|
530
|
+
response_field="text",
|
|
531
|
+
preview_chars=20
|
|
532
|
+
)
|
|
533
|
+
},
|
|
534
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
535
|
+
)
|
|
536
|
+
extractor = FileContentExtractor(config=config)
|
|
537
|
+
|
|
538
|
+
long_text = "A" * 100 # 100 characters
|
|
539
|
+
mock_response = Mock()
|
|
540
|
+
mock_response.status_code = 200
|
|
541
|
+
mock_response.json.return_value = {
|
|
542
|
+
"success": True,
|
|
543
|
+
"text": long_text
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
547
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
548
|
+
|
|
549
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
550
|
+
mock_client = AsyncMock()
|
|
551
|
+
mock_client.request = AsyncMock(return_value=mock_response)
|
|
552
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
553
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
554
|
+
mock_client_class.return_value = mock_client
|
|
555
|
+
|
|
556
|
+
result = await extractor.extract_content(
|
|
557
|
+
filename="document.pdf",
|
|
558
|
+
content_base64="dGVzdA=="
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
assert result.success is True
|
|
562
|
+
assert result.content == long_text
|
|
563
|
+
assert result.preview == "A" * 20 + "..."
|
|
564
|
+
assert len(result.preview) == 23 # 20 + "..."
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
class TestExtractionResult:
|
|
568
|
+
"""Test ExtractionResult dataclass."""
|
|
569
|
+
|
|
570
|
+
def test_extraction_result_success(self):
|
|
571
|
+
"""ExtractionResult should store successful extraction data."""
|
|
572
|
+
result = ExtractionResult(
|
|
573
|
+
success=True,
|
|
574
|
+
content="Extracted text",
|
|
575
|
+
preview="Extracted...",
|
|
576
|
+
metadata={"pages": 3}
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
assert result.success is True
|
|
580
|
+
assert result.content == "Extracted text"
|
|
581
|
+
assert result.preview == "Extracted..."
|
|
582
|
+
assert result.metadata == {"pages": 3}
|
|
583
|
+
assert result.error is None
|
|
584
|
+
|
|
585
|
+
def test_extraction_result_failure(self):
|
|
586
|
+
"""ExtractionResult should store failure information."""
|
|
587
|
+
result = ExtractionResult(
|
|
588
|
+
success=False,
|
|
589
|
+
error="Connection refused"
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
assert result.success is False
|
|
593
|
+
assert result.error == "Connection refused"
|
|
594
|
+
assert result.content is None
|
|
595
|
+
assert result.preview is None
|
|
596
|
+
assert result.metadata is None
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
class TestFileExtractorApiKeyAndHeaders:
|
|
600
|
+
"""Test FileExtractorConfig api_key and headers functionality."""
|
|
601
|
+
|
|
602
|
+
def test_file_extractor_config_with_api_key(self):
|
|
603
|
+
"""FileExtractorConfig should accept api_key field."""
|
|
604
|
+
config = FileExtractorConfig(
|
|
605
|
+
url="http://localhost:8010/extract",
|
|
606
|
+
api_key="sk-test-key-123"
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
assert config.api_key == "sk-test-key-123"
|
|
610
|
+
|
|
611
|
+
def test_file_extractor_config_with_headers(self):
|
|
612
|
+
"""FileExtractorConfig should accept headers field."""
|
|
613
|
+
config = FileExtractorConfig(
|
|
614
|
+
url="http://localhost:8010/extract",
|
|
615
|
+
headers={"X-Client-ID": "client-123", "X-Custom-Header": "value"}
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
assert config.headers == {"X-Client-ID": "client-123", "X-Custom-Header": "value"}
|
|
619
|
+
|
|
620
|
+
def test_file_extractor_config_api_key_and_headers_optional(self):
|
|
621
|
+
"""api_key and headers should be optional (None by default)."""
|
|
622
|
+
config = FileExtractorConfig(url="http://localhost:8010/extract")
|
|
623
|
+
|
|
624
|
+
assert config.api_key is None
|
|
625
|
+
assert config.headers is None
|
|
626
|
+
|
|
627
|
+
def test_file_extractor_env_var_resolution_api_key(self, monkeypatch):
|
|
628
|
+
"""ConfigManager should resolve ${ENV_VAR} in api_key."""
|
|
629
|
+
from atlas.modules.config.config_manager import resolve_env_var
|
|
630
|
+
|
|
631
|
+
monkeypatch.setenv("TEST_EXTRACTOR_API_KEY", "sk-resolved-key-456")
|
|
632
|
+
|
|
633
|
+
# Test resolve_env_var directly
|
|
634
|
+
resolved = resolve_env_var("${TEST_EXTRACTOR_API_KEY}")
|
|
635
|
+
assert resolved == "sk-resolved-key-456"
|
|
636
|
+
|
|
637
|
+
def test_file_extractor_env_var_resolution_headers(self, monkeypatch):
|
|
638
|
+
"""ConfigManager should resolve ${ENV_VAR} in header values."""
|
|
639
|
+
from atlas.modules.config.config_manager import resolve_env_var
|
|
640
|
+
|
|
641
|
+
monkeypatch.setenv("TEST_CLIENT_ID", "client-resolved-789")
|
|
642
|
+
|
|
643
|
+
resolved = resolve_env_var("${TEST_CLIENT_ID}")
|
|
644
|
+
assert resolved == "client-resolved-789"
|
|
645
|
+
|
|
646
|
+
def test_file_extractor_env_var_optional_returns_none(self):
|
|
647
|
+
"""resolve_env_var with required=False should return None for missing vars."""
|
|
648
|
+
from atlas.modules.config.config_manager import resolve_env_var
|
|
649
|
+
|
|
650
|
+
# Missing env var with required=False should return None
|
|
651
|
+
result = resolve_env_var("${MISSING_OPTIONAL_KEY}", required=False)
|
|
652
|
+
assert result is None
|
|
653
|
+
|
|
654
|
+
def test_file_extractor_env_var_required_raises(self):
|
|
655
|
+
"""resolve_env_var with required=True should raise for missing vars."""
|
|
656
|
+
from atlas.modules.config.config_manager import resolve_env_var
|
|
657
|
+
|
|
658
|
+
with pytest.raises(ValueError) as exc_info:
|
|
659
|
+
resolve_env_var("${MISSING_REQUIRED_KEY}", required=True)
|
|
660
|
+
|
|
661
|
+
assert "MISSING_REQUIRED_KEY" in str(exc_info.value)
|
|
662
|
+
|
|
663
|
+
def test_file_extractor_literal_value_unchanged(self):
|
|
664
|
+
"""resolve_env_var should return literal values unchanged."""
|
|
665
|
+
from atlas.modules.config.config_manager import resolve_env_var
|
|
666
|
+
|
|
667
|
+
result = resolve_env_var("sk-literal-key")
|
|
668
|
+
assert result == "sk-literal-key"
|
|
669
|
+
|
|
670
|
+
@pytest.mark.asyncio
|
|
671
|
+
async def test_extract_content_includes_api_key_header(self):
|
|
672
|
+
"""extract_content should include api_key as Authorization header."""
|
|
673
|
+
config = FileExtractorsConfig(
|
|
674
|
+
enabled=True,
|
|
675
|
+
extractors={
|
|
676
|
+
"pdf-text": FileExtractorConfig(
|
|
677
|
+
url="http://localhost:8010/extract",
|
|
678
|
+
enabled=True,
|
|
679
|
+
api_key="sk-test-api-key",
|
|
680
|
+
response_field="text"
|
|
681
|
+
)
|
|
682
|
+
},
|
|
683
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
684
|
+
)
|
|
685
|
+
extractor = FileContentExtractor(config=config)
|
|
686
|
+
|
|
687
|
+
mock_response = Mock()
|
|
688
|
+
mock_response.status_code = 200
|
|
689
|
+
mock_response.json.return_value = {"success": True, "text": "Extracted content"}
|
|
690
|
+
|
|
691
|
+
captured_headers = {}
|
|
692
|
+
|
|
693
|
+
async def capture_request(*args, **kwargs):
|
|
694
|
+
nonlocal captured_headers
|
|
695
|
+
captured_headers = kwargs.get("headers", {})
|
|
696
|
+
return mock_response
|
|
697
|
+
|
|
698
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
699
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
700
|
+
|
|
701
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
702
|
+
mock_client = AsyncMock()
|
|
703
|
+
mock_client.request = AsyncMock(side_effect=capture_request)
|
|
704
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
705
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
706
|
+
mock_client_class.return_value = mock_client
|
|
707
|
+
|
|
708
|
+
await extractor.extract_content(
|
|
709
|
+
filename="document.pdf",
|
|
710
|
+
content_base64="dGVzdA=="
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
assert "Authorization" in captured_headers
|
|
714
|
+
assert captured_headers["Authorization"] == "Bearer sk-test-api-key"
|
|
715
|
+
|
|
716
|
+
@pytest.mark.asyncio
|
|
717
|
+
async def test_extract_content_includes_custom_headers(self):
|
|
718
|
+
"""extract_content should include custom headers from config."""
|
|
719
|
+
config = FileExtractorsConfig(
|
|
720
|
+
enabled=True,
|
|
721
|
+
extractors={
|
|
722
|
+
"pdf-text": FileExtractorConfig(
|
|
723
|
+
url="http://localhost:8010/extract",
|
|
724
|
+
enabled=True,
|
|
725
|
+
headers={"X-Client-ID": "my-client", "X-Request-Source": "atlas-ui"},
|
|
726
|
+
response_field="text"
|
|
727
|
+
)
|
|
728
|
+
},
|
|
729
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
730
|
+
)
|
|
731
|
+
extractor = FileContentExtractor(config=config)
|
|
732
|
+
|
|
733
|
+
mock_response = Mock()
|
|
734
|
+
mock_response.status_code = 200
|
|
735
|
+
mock_response.json.return_value = {"success": True, "text": "Extracted content"}
|
|
736
|
+
|
|
737
|
+
captured_headers = {}
|
|
738
|
+
|
|
739
|
+
async def capture_request(*args, **kwargs):
|
|
740
|
+
nonlocal captured_headers
|
|
741
|
+
captured_headers = kwargs.get("headers", {})
|
|
742
|
+
return mock_response
|
|
743
|
+
|
|
744
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
745
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
746
|
+
|
|
747
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
748
|
+
mock_client = AsyncMock()
|
|
749
|
+
mock_client.request = AsyncMock(side_effect=capture_request)
|
|
750
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
751
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
752
|
+
mock_client_class.return_value = mock_client
|
|
753
|
+
|
|
754
|
+
await extractor.extract_content(
|
|
755
|
+
filename="document.pdf",
|
|
756
|
+
content_base64="dGVzdA=="
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
assert captured_headers.get("X-Client-ID") == "my-client"
|
|
760
|
+
assert captured_headers.get("X-Request-Source") == "atlas-ui"
|
|
761
|
+
|
|
762
|
+
@pytest.mark.asyncio
|
|
763
|
+
async def test_extract_content_no_headers_when_not_configured(self):
|
|
764
|
+
"""extract_content should pass None headers when not configured."""
|
|
765
|
+
config = FileExtractorsConfig(
|
|
766
|
+
enabled=True,
|
|
767
|
+
extractors={
|
|
768
|
+
"pdf-text": FileExtractorConfig(
|
|
769
|
+
url="http://localhost:8010/extract",
|
|
770
|
+
enabled=True,
|
|
771
|
+
response_field="text"
|
|
772
|
+
# No api_key or headers
|
|
773
|
+
)
|
|
774
|
+
},
|
|
775
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
776
|
+
)
|
|
777
|
+
extractor = FileContentExtractor(config=config)
|
|
778
|
+
|
|
779
|
+
mock_response = Mock()
|
|
780
|
+
mock_response.status_code = 200
|
|
781
|
+
mock_response.json.return_value = {"success": True, "text": "Extracted content"}
|
|
782
|
+
|
|
783
|
+
captured_headers = "NOT_SET"
|
|
784
|
+
|
|
785
|
+
async def capture_request(*args, **kwargs):
|
|
786
|
+
nonlocal captured_headers
|
|
787
|
+
captured_headers = kwargs.get("headers")
|
|
788
|
+
return mock_response
|
|
789
|
+
|
|
790
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
791
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
792
|
+
|
|
793
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
794
|
+
mock_client = AsyncMock()
|
|
795
|
+
mock_client.request = AsyncMock(side_effect=capture_request)
|
|
796
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
797
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
798
|
+
mock_client_class.return_value = mock_client
|
|
799
|
+
|
|
800
|
+
await extractor.extract_content(
|
|
801
|
+
filename="document.pdf",
|
|
802
|
+
content_base64="dGVzdA=="
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
assert captured_headers is None
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
class TestMultipartUpload:
|
|
809
|
+
"""Test multipart form-data upload path in FileContentExtractor."""
|
|
810
|
+
|
|
811
|
+
def test_file_extractor_config_form_field_name_default(self):
|
|
812
|
+
"""form_field_name should default to 'file'."""
|
|
813
|
+
config = FileExtractorConfig(url="http://localhost:8010/extract")
|
|
814
|
+
assert config.form_field_name == "file"
|
|
815
|
+
|
|
816
|
+
def test_file_extractor_config_custom_form_field_name(self):
|
|
817
|
+
"""form_field_name should accept custom values."""
|
|
818
|
+
config = FileExtractorConfig(
|
|
819
|
+
url="http://localhost:8010/extract",
|
|
820
|
+
request_format="multipart",
|
|
821
|
+
form_field_name="document"
|
|
822
|
+
)
|
|
823
|
+
assert config.form_field_name == "document"
|
|
824
|
+
assert config.request_format == "multipart"
|
|
825
|
+
|
|
826
|
+
@pytest.mark.asyncio
|
|
827
|
+
async def test_multipart_upload_sends_file(self):
|
|
828
|
+
"""Multipart request_format should send file via multipart form-data."""
|
|
829
|
+
config = FileExtractorsConfig(
|
|
830
|
+
enabled=True,
|
|
831
|
+
extractors={
|
|
832
|
+
"pdf-text": FileExtractorConfig(
|
|
833
|
+
url="http://localhost:8010/extract-multipart",
|
|
834
|
+
enabled=True,
|
|
835
|
+
request_format="multipart",
|
|
836
|
+
form_field_name="file",
|
|
837
|
+
response_field="text"
|
|
838
|
+
)
|
|
839
|
+
},
|
|
840
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
841
|
+
)
|
|
842
|
+
extractor = FileContentExtractor(config=config)
|
|
843
|
+
|
|
844
|
+
mock_response = Mock()
|
|
845
|
+
mock_response.status_code = 200
|
|
846
|
+
mock_response.json.return_value = {
|
|
847
|
+
"success": True,
|
|
848
|
+
"text": "Extracted multipart content"
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
captured_kwargs = {}
|
|
852
|
+
|
|
853
|
+
async def capture_post(*args, **kwargs):
|
|
854
|
+
nonlocal captured_kwargs
|
|
855
|
+
captured_kwargs = kwargs
|
|
856
|
+
return mock_response
|
|
857
|
+
|
|
858
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
859
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
860
|
+
|
|
861
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
862
|
+
mock_client = AsyncMock()
|
|
863
|
+
mock_client.post = AsyncMock(side_effect=capture_post)
|
|
864
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
865
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
866
|
+
mock_client_class.return_value = mock_client
|
|
867
|
+
|
|
868
|
+
result = await extractor.extract_content(
|
|
869
|
+
filename="document.pdf",
|
|
870
|
+
content_base64="dGVzdCBjb250ZW50", # "test content"
|
|
871
|
+
mime_type="application/pdf"
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
assert result.success is True
|
|
875
|
+
assert result.content == "Extracted multipart content"
|
|
876
|
+
|
|
877
|
+
# Verify multipart files dict was passed
|
|
878
|
+
assert "files" in captured_kwargs
|
|
879
|
+
files = captured_kwargs["files"]
|
|
880
|
+
assert "file" in files
|
|
881
|
+
file_tuple = files["file"]
|
|
882
|
+
assert file_tuple[0] == "document.pdf"
|
|
883
|
+
assert file_tuple[1] == b"test content"
|
|
884
|
+
assert file_tuple[2] == "application/pdf"
|
|
885
|
+
|
|
886
|
+
@pytest.mark.asyncio
|
|
887
|
+
async def test_multipart_upload_custom_field_name(self):
|
|
888
|
+
"""Multipart upload should use the configured form_field_name."""
|
|
889
|
+
config = FileExtractorsConfig(
|
|
890
|
+
enabled=True,
|
|
891
|
+
extractors={
|
|
892
|
+
"pdf-text": FileExtractorConfig(
|
|
893
|
+
url="http://localhost:8010/extract-multipart",
|
|
894
|
+
enabled=True,
|
|
895
|
+
request_format="multipart",
|
|
896
|
+
form_field_name="document",
|
|
897
|
+
response_field="text"
|
|
898
|
+
)
|
|
899
|
+
},
|
|
900
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
901
|
+
)
|
|
902
|
+
extractor = FileContentExtractor(config=config)
|
|
903
|
+
|
|
904
|
+
mock_response = Mock()
|
|
905
|
+
mock_response.status_code = 200
|
|
906
|
+
mock_response.json.return_value = {"success": True, "text": "Content"}
|
|
907
|
+
|
|
908
|
+
captured_kwargs = {}
|
|
909
|
+
|
|
910
|
+
async def capture_post(*args, **kwargs):
|
|
911
|
+
nonlocal captured_kwargs
|
|
912
|
+
captured_kwargs = kwargs
|
|
913
|
+
return mock_response
|
|
914
|
+
|
|
915
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
916
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
917
|
+
|
|
918
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
919
|
+
mock_client = AsyncMock()
|
|
920
|
+
mock_client.post = AsyncMock(side_effect=capture_post)
|
|
921
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
922
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
923
|
+
mock_client_class.return_value = mock_client
|
|
924
|
+
|
|
925
|
+
await extractor.extract_content(
|
|
926
|
+
filename="document.pdf",
|
|
927
|
+
content_base64="dGVzdA==",
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
files = captured_kwargs["files"]
|
|
931
|
+
assert "document" in files
|
|
932
|
+
|
|
933
|
+
@pytest.mark.asyncio
|
|
934
|
+
async def test_multipart_upload_invalid_base64(self):
|
|
935
|
+
"""Multipart upload should handle invalid base64 gracefully."""
|
|
936
|
+
config = FileExtractorsConfig(
|
|
937
|
+
enabled=True,
|
|
938
|
+
extractors={
|
|
939
|
+
"pdf-text": FileExtractorConfig(
|
|
940
|
+
url="http://localhost:8010/extract-multipart",
|
|
941
|
+
enabled=True,
|
|
942
|
+
request_format="multipart",
|
|
943
|
+
response_field="text"
|
|
944
|
+
)
|
|
945
|
+
},
|
|
946
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
947
|
+
)
|
|
948
|
+
extractor = FileContentExtractor(config=config)
|
|
949
|
+
|
|
950
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
951
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
952
|
+
|
|
953
|
+
result = await extractor.extract_content(
|
|
954
|
+
filename="document.pdf",
|
|
955
|
+
content_base64="!!!not-valid-base64!!!"
|
|
956
|
+
)
|
|
957
|
+
|
|
958
|
+
assert result.success is False
|
|
959
|
+
assert "decode" in result.error.lower() or "base64" in result.error.lower()
|
|
960
|
+
|
|
961
|
+
@pytest.mark.asyncio
|
|
962
|
+
async def test_multipart_upload_includes_accept_header(self):
|
|
963
|
+
"""Multipart upload should include Accept: application/json header."""
|
|
964
|
+
config = FileExtractorsConfig(
|
|
965
|
+
enabled=True,
|
|
966
|
+
extractors={
|
|
967
|
+
"pdf-text": FileExtractorConfig(
|
|
968
|
+
url="http://localhost:8010/extract-multipart",
|
|
969
|
+
enabled=True,
|
|
970
|
+
request_format="multipart",
|
|
971
|
+
response_field="text"
|
|
972
|
+
)
|
|
973
|
+
},
|
|
974
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
975
|
+
)
|
|
976
|
+
extractor = FileContentExtractor(config=config)
|
|
977
|
+
|
|
978
|
+
mock_response = Mock()
|
|
979
|
+
mock_response.status_code = 200
|
|
980
|
+
mock_response.json.return_value = {"success": True, "text": "Content"}
|
|
981
|
+
|
|
982
|
+
captured_kwargs = {}
|
|
983
|
+
|
|
984
|
+
async def capture_post(*args, **kwargs):
|
|
985
|
+
nonlocal captured_kwargs
|
|
986
|
+
captured_kwargs = kwargs
|
|
987
|
+
return mock_response
|
|
988
|
+
|
|
989
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
990
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
991
|
+
|
|
992
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
993
|
+
mock_client = AsyncMock()
|
|
994
|
+
mock_client.post = AsyncMock(side_effect=capture_post)
|
|
995
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
996
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
997
|
+
mock_client_class.return_value = mock_client
|
|
998
|
+
|
|
999
|
+
await extractor.extract_content(
|
|
1000
|
+
filename="document.pdf",
|
|
1001
|
+
content_base64="dGVzdA=="
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
headers = captured_kwargs.get("headers", {})
|
|
1005
|
+
assert headers.get("Accept") == "application/json"
|
|
1006
|
+
|
|
1007
|
+
@pytest.mark.asyncio
|
|
1008
|
+
async def test_multipart_upload_with_api_key(self):
|
|
1009
|
+
"""Multipart upload should include Authorization header when api_key is set."""
|
|
1010
|
+
config = FileExtractorsConfig(
|
|
1011
|
+
enabled=True,
|
|
1012
|
+
extractors={
|
|
1013
|
+
"pdf-text": FileExtractorConfig(
|
|
1014
|
+
url="http://localhost:8010/extract-multipart",
|
|
1015
|
+
enabled=True,
|
|
1016
|
+
request_format="multipart",
|
|
1017
|
+
api_key="sk-test-key",
|
|
1018
|
+
response_field="text"
|
|
1019
|
+
)
|
|
1020
|
+
},
|
|
1021
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
1022
|
+
)
|
|
1023
|
+
extractor = FileContentExtractor(config=config)
|
|
1024
|
+
|
|
1025
|
+
mock_response = Mock()
|
|
1026
|
+
mock_response.status_code = 200
|
|
1027
|
+
mock_response.json.return_value = {"success": True, "text": "Content"}
|
|
1028
|
+
|
|
1029
|
+
captured_kwargs = {}
|
|
1030
|
+
|
|
1031
|
+
async def capture_post(*args, **kwargs):
|
|
1032
|
+
nonlocal captured_kwargs
|
|
1033
|
+
captured_kwargs = kwargs
|
|
1034
|
+
return mock_response
|
|
1035
|
+
|
|
1036
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
1037
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
1038
|
+
|
|
1039
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
1040
|
+
mock_client = AsyncMock()
|
|
1041
|
+
mock_client.post = AsyncMock(side_effect=capture_post)
|
|
1042
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
1043
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
1044
|
+
mock_client_class.return_value = mock_client
|
|
1045
|
+
|
|
1046
|
+
await extractor.extract_content(
|
|
1047
|
+
filename="document.pdf",
|
|
1048
|
+
content_base64="dGVzdA=="
|
|
1049
|
+
)
|
|
1050
|
+
|
|
1051
|
+
headers = captured_kwargs.get("headers", {})
|
|
1052
|
+
assert headers.get("Authorization") == "Bearer sk-test-key"
|
|
1053
|
+
|
|
1054
|
+
@pytest.mark.asyncio
|
|
1055
|
+
async def test_multipart_default_mime_type(self):
|
|
1056
|
+
"""Multipart upload should default to application/octet-stream when no mime_type."""
|
|
1057
|
+
config = FileExtractorsConfig(
|
|
1058
|
+
enabled=True,
|
|
1059
|
+
extractors={
|
|
1060
|
+
"pdf-text": FileExtractorConfig(
|
|
1061
|
+
url="http://localhost:8010/extract-multipart",
|
|
1062
|
+
enabled=True,
|
|
1063
|
+
request_format="multipart",
|
|
1064
|
+
response_field="text"
|
|
1065
|
+
)
|
|
1066
|
+
},
|
|
1067
|
+
extension_mapping={".pdf": "pdf-text"}
|
|
1068
|
+
)
|
|
1069
|
+
extractor = FileContentExtractor(config=config)
|
|
1070
|
+
|
|
1071
|
+
mock_response = Mock()
|
|
1072
|
+
mock_response.status_code = 200
|
|
1073
|
+
mock_response.json.return_value = {"success": True, "text": "Content"}
|
|
1074
|
+
|
|
1075
|
+
captured_kwargs = {}
|
|
1076
|
+
|
|
1077
|
+
async def capture_post(*args, **kwargs):
|
|
1078
|
+
nonlocal captured_kwargs
|
|
1079
|
+
captured_kwargs = kwargs
|
|
1080
|
+
return mock_response
|
|
1081
|
+
|
|
1082
|
+
with patch('atlas.modules.file_storage.content_extractor.get_app_settings') as mock_settings:
|
|
1083
|
+
mock_settings.return_value.feature_file_content_extraction_enabled = True
|
|
1084
|
+
|
|
1085
|
+
with patch('httpx.AsyncClient') as mock_client_class:
|
|
1086
|
+
mock_client = AsyncMock()
|
|
1087
|
+
mock_client.post = AsyncMock(side_effect=capture_post)
|
|
1088
|
+
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
|
|
1089
|
+
mock_client.__aexit__ = AsyncMock(return_value=None)
|
|
1090
|
+
mock_client_class.return_value = mock_client
|
|
1091
|
+
|
|
1092
|
+
await extractor.extract_content(
|
|
1093
|
+
filename="document.pdf",
|
|
1094
|
+
content_base64="dGVzdA==",
|
|
1095
|
+
mime_type=None
|
|
1096
|
+
)
|
|
1097
|
+
|
|
1098
|
+
files = captured_kwargs["files"]
|
|
1099
|
+
file_tuple = files["file"]
|
|
1100
|
+
assert file_tuple[2] == "application/octet-stream"
|
|
1101
|
+
|
|
1102
|
+
|
|
1103
|
+
class TestConfigManagerFileExtractors:
|
|
1104
|
+
"""Test ConfigManager loading of file extractors config."""
|
|
1105
|
+
|
|
1106
|
+
def test_config_manager_loads_file_extractors(self):
|
|
1107
|
+
"""ConfigManager should load file extractors configuration."""
|
|
1108
|
+
cm = ConfigManager()
|
|
1109
|
+
config = cm.file_extractors_config
|
|
1110
|
+
|
|
1111
|
+
assert config is not None
|
|
1112
|
+
assert isinstance(config, FileExtractorsConfig)
|
|
1113
|
+
|
|
1114
|
+
def test_config_manager_caches_file_extractors(self):
|
|
1115
|
+
"""ConfigManager should cache file extractors config."""
|
|
1116
|
+
cm = ConfigManager()
|
|
1117
|
+
|
|
1118
|
+
config1 = cm.file_extractors_config
|
|
1119
|
+
config2 = cm.file_extractors_config
|
|
1120
|
+
|
|
1121
|
+
assert config1 is config2
|
|
1122
|
+
|
|
1123
|
+
def test_config_manager_returns_disabled_on_missing_file(self):
|
|
1124
|
+
"""ConfigManager should return disabled config if file not found."""
|
|
1125
|
+
cm = ConfigManager()
|
|
1126
|
+
|
|
1127
|
+
# Clear cached config
|
|
1128
|
+
cm._file_extractors_config = None
|
|
1129
|
+
|
|
1130
|
+
# Mock _search_paths to return empty paths
|
|
1131
|
+
with patch.object(cm, '_load_file_with_error_handling', return_value=None):
|
|
1132
|
+
config = cm.file_extractors_config
|
|
1133
|
+
|
|
1134
|
+
assert config.enabled is False
|