aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,754 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File Storage Implementation with Google Cloud Storage
|
|
3
|
+
|
|
4
|
+
Provides file storage capabilities using Google Cloud Storage as the backend,
|
|
5
|
+
with support for local fallback and caching.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
import aiofiles
|
|
12
|
+
from typing import Dict, List, Any, Optional, Union
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import gzip
|
|
16
|
+
import pickle
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from google.cloud import storage
|
|
20
|
+
from google.cloud.exceptions import NotFound, GoogleCloudError
|
|
21
|
+
from google.auth.exceptions import DefaultCredentialsError
|
|
22
|
+
|
|
23
|
+
GCS_AVAILABLE = True
|
|
24
|
+
except ImportError:
|
|
25
|
+
GCS_AVAILABLE = False
|
|
26
|
+
storage = None
|
|
27
|
+
NotFound = Exception
|
|
28
|
+
GoogleCloudError = Exception
|
|
29
|
+
DefaultCredentialsError = Exception
|
|
30
|
+
|
|
31
|
+
from ..monitoring.global_metrics_manager import get_global_metrics
|
|
32
|
+
|
|
33
|
+
logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FileStorageError(Exception):
|
|
37
|
+
"""Base exception for file storage operations."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class FileStorageConfig:
|
|
41
|
+
"""Configuration for file storage."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, config: Dict[str, Any]):
|
|
44
|
+
# Google Cloud Storage settings
|
|
45
|
+
self.gcs_bucket_name = config.get("gcs_bucket_name", "multi-task-storage")
|
|
46
|
+
self.gcs_project_id = config.get("gcs_project_id")
|
|
47
|
+
self.gcs_credentials_path = config.get("gcs_credentials_path")
|
|
48
|
+
self.gcs_location = config.get("gcs_location", "US")
|
|
49
|
+
|
|
50
|
+
# Local storage fallback
|
|
51
|
+
self.local_storage_path = config.get("local_storage_path", "./storage")
|
|
52
|
+
self.enable_local_fallback = config.get("enable_local_fallback", True)
|
|
53
|
+
|
|
54
|
+
# Cache settings
|
|
55
|
+
self.enable_cache = config.get("enable_cache", True)
|
|
56
|
+
self.cache_ttl_seconds = config.get("cache_ttl_seconds", 3600)
|
|
57
|
+
self.max_cache_size_mb = config.get("max_cache_size_mb", 100)
|
|
58
|
+
|
|
59
|
+
# Performance settings
|
|
60
|
+
self.chunk_size = config.get("chunk_size", 8192)
|
|
61
|
+
self.max_retries = config.get("max_retries", 3)
|
|
62
|
+
self.timeout_seconds = config.get("timeout_seconds", 30)
|
|
63
|
+
|
|
64
|
+
# Compression settings
|
|
65
|
+
self.enable_compression = config.get("enable_compression", True)
|
|
66
|
+
self.compression_threshold_bytes = config.get("compression_threshold_bytes", 1024)
|
|
67
|
+
|
|
68
|
+
# Security settings
|
|
69
|
+
self.enable_encryption = config.get("enable_encryption", False)
|
|
70
|
+
self.encryption_key = config.get("encryption_key")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class FileStorage:
|
|
74
|
+
"""
|
|
75
|
+
File storage implementation with Google Cloud Storage backend.
|
|
76
|
+
|
|
77
|
+
Features:
|
|
78
|
+
- Google Cloud Storage as primary backend
|
|
79
|
+
- Local filesystem fallback
|
|
80
|
+
- In-memory caching with TTL
|
|
81
|
+
- Automatic compression for large files
|
|
82
|
+
- Retry logic with exponential backoff
|
|
83
|
+
- Metrics collection
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self, config: Dict[str, Any]):
|
|
87
|
+
self.config = FileStorageConfig(config)
|
|
88
|
+
self._gcs_client = None
|
|
89
|
+
self._gcs_bucket = None
|
|
90
|
+
self._cache = {}
|
|
91
|
+
self._cache_timestamps = {}
|
|
92
|
+
self._initialized = False
|
|
93
|
+
|
|
94
|
+
# Metrics - use global metrics manager
|
|
95
|
+
self.metrics = get_global_metrics()
|
|
96
|
+
|
|
97
|
+
# Ensure local storage directory exists
|
|
98
|
+
if self.config.enable_local_fallback:
|
|
99
|
+
Path(self.config.local_storage_path).mkdir(parents=True, exist_ok=True)
|
|
100
|
+
|
|
101
|
+
async def initialize(self) -> bool:
|
|
102
|
+
"""
|
|
103
|
+
Initialize the file storage system.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
True if initialization was successful
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
if GCS_AVAILABLE:
|
|
110
|
+
await self._init_gcs()
|
|
111
|
+
else:
|
|
112
|
+
logger.warning("Google Cloud Storage not available, using local storage only")
|
|
113
|
+
|
|
114
|
+
self._initialized = True
|
|
115
|
+
logger.info("File storage initialized successfully")
|
|
116
|
+
return True
|
|
117
|
+
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logger.error(f"Failed to initialize file storage: {e}")
|
|
120
|
+
if not self.config.enable_local_fallback:
|
|
121
|
+
raise FileStorageError(f"Storage initialization failed: {e}")
|
|
122
|
+
|
|
123
|
+
logger.info("Falling back to local storage only")
|
|
124
|
+
self._initialized = True
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
async def _init_gcs(self):
|
|
128
|
+
"""Initialize Google Cloud Storage client."""
|
|
129
|
+
try:
|
|
130
|
+
# Set credentials if provided
|
|
131
|
+
if self.config.gcs_credentials_path:
|
|
132
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.config.gcs_credentials_path
|
|
133
|
+
|
|
134
|
+
# Create client - project is required for bucket creation
|
|
135
|
+
# If project_id is None, client will use default project from credentials
|
|
136
|
+
# but we need it for bucket creation API calls
|
|
137
|
+
if not self.config.gcs_project_id:
|
|
138
|
+
logger.warning("GCS project ID not provided. Bucket creation will be disabled.")
|
|
139
|
+
logger.warning(
|
|
140
|
+
"Bucket must exist and be accessible. Falling back to local storage if bucket not found."
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Create client with project ID (can be None, but bucket creation
|
|
144
|
+
# will fail)
|
|
145
|
+
self._gcs_client = storage.Client(project=self.config.gcs_project_id)
|
|
146
|
+
|
|
147
|
+
# Get or create bucket
|
|
148
|
+
try:
|
|
149
|
+
self._gcs_bucket = self._gcs_client.bucket(self.config.gcs_bucket_name)
|
|
150
|
+
# Test bucket access
|
|
151
|
+
self._gcs_bucket.reload()
|
|
152
|
+
logger.info(f"Connected to GCS bucket: {self.config.gcs_bucket_name}")
|
|
153
|
+
|
|
154
|
+
except NotFound:
|
|
155
|
+
# Only create bucket if project_id is provided
|
|
156
|
+
# Bucket creation requires project parameter in API call
|
|
157
|
+
if self.config.gcs_project_id:
|
|
158
|
+
try:
|
|
159
|
+
self._gcs_bucket = self._gcs_client.create_bucket(
|
|
160
|
+
self.config.gcs_bucket_name,
|
|
161
|
+
project=self.config.gcs_project_id, # Explicitly pass project parameter
|
|
162
|
+
location=self.config.gcs_location,
|
|
163
|
+
)
|
|
164
|
+
logger.info(
|
|
165
|
+
f"Created GCS bucket: {self.config.gcs_bucket_name} in project {self.config.gcs_project_id}"
|
|
166
|
+
)
|
|
167
|
+
except Exception as create_error:
|
|
168
|
+
logger.error(
|
|
169
|
+
f"Failed to create GCS bucket {self.config.gcs_bucket_name}: {create_error}"
|
|
170
|
+
)
|
|
171
|
+
logger.warning("Bucket creation failed. Will use local storage fallback.")
|
|
172
|
+
self._gcs_bucket = None
|
|
173
|
+
else:
|
|
174
|
+
logger.error(
|
|
175
|
+
f"GCS bucket '{self.config.gcs_bucket_name}' not found and "
|
|
176
|
+
"project ID is not provided. Cannot create bucket without project parameter."
|
|
177
|
+
)
|
|
178
|
+
logger.warning(
|
|
179
|
+
"Please ensure the bucket exists or provide DOC_PARSER_GCS_PROJECT_ID in configuration."
|
|
180
|
+
)
|
|
181
|
+
logger.warning("Falling back to local storage only.")
|
|
182
|
+
self._gcs_bucket = None
|
|
183
|
+
|
|
184
|
+
except DefaultCredentialsError:
|
|
185
|
+
logger.warning("GCS credentials not found, using local storage only")
|
|
186
|
+
self._gcs_client = None
|
|
187
|
+
self._gcs_bucket = None
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error(f"Failed to initialize GCS: {e}")
|
|
191
|
+
self._gcs_client = None
|
|
192
|
+
self._gcs_bucket = None
|
|
193
|
+
|
|
194
|
+
async def store(
|
|
195
|
+
self,
|
|
196
|
+
key: str,
|
|
197
|
+
data: Union[str, bytes, Dict[str, Any]],
|
|
198
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
199
|
+
) -> bool:
|
|
200
|
+
"""
|
|
201
|
+
Store data with the given key.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
key: Storage key
|
|
205
|
+
data: Data to store
|
|
206
|
+
metadata: Optional metadata
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
True if storage was successful
|
|
210
|
+
"""
|
|
211
|
+
if not self._initialized:
|
|
212
|
+
await self.initialize()
|
|
213
|
+
|
|
214
|
+
start_time = datetime.utcnow()
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# Serialize data
|
|
218
|
+
serialized_data = await self._serialize_data(data)
|
|
219
|
+
|
|
220
|
+
# Compress if enabled and data is large enough
|
|
221
|
+
if (
|
|
222
|
+
self.config.enable_compression
|
|
223
|
+
and len(serialized_data) > self.config.compression_threshold_bytes
|
|
224
|
+
):
|
|
225
|
+
serialized_data = gzip.compress(serialized_data)
|
|
226
|
+
compressed = True
|
|
227
|
+
else:
|
|
228
|
+
compressed = False
|
|
229
|
+
|
|
230
|
+
# Store in cache
|
|
231
|
+
if self.config.enable_cache:
|
|
232
|
+
self._cache[key] = {
|
|
233
|
+
"data": data,
|
|
234
|
+
"metadata": metadata,
|
|
235
|
+
"compressed": compressed,
|
|
236
|
+
}
|
|
237
|
+
self._cache_timestamps[key] = datetime.utcnow()
|
|
238
|
+
await self._cleanup_cache()
|
|
239
|
+
|
|
240
|
+
# Store in GCS if available
|
|
241
|
+
if self._gcs_bucket:
|
|
242
|
+
success = await self._store_gcs(key, serialized_data, metadata, compressed)
|
|
243
|
+
if success:
|
|
244
|
+
if self.metrics:
|
|
245
|
+
self.metrics.record_operation("gcs_store_success", 1)
|
|
246
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
247
|
+
self.metrics.record_duration("gcs_store_duration", duration)
|
|
248
|
+
return True
|
|
249
|
+
|
|
250
|
+
# Fallback to local storage
|
|
251
|
+
if self.config.enable_local_fallback:
|
|
252
|
+
success = await self._store_local(key, serialized_data, metadata, compressed)
|
|
253
|
+
if success:
|
|
254
|
+
if self.metrics:
|
|
255
|
+
self.metrics.record_operation("local_store_success", 1)
|
|
256
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
257
|
+
self.metrics.record_duration("local_store_duration", duration)
|
|
258
|
+
return True
|
|
259
|
+
|
|
260
|
+
if self.metrics:
|
|
261
|
+
self.metrics.record_operation("store_failure", 1)
|
|
262
|
+
return False
|
|
263
|
+
|
|
264
|
+
except Exception as e:
|
|
265
|
+
logger.error(f"Failed to store data for key {key}: {e}")
|
|
266
|
+
if self.metrics:
|
|
267
|
+
self.metrics.record_operation("store_error", 1)
|
|
268
|
+
raise FileStorageError(f"Storage failed: {e}")
|
|
269
|
+
|
|
270
|
+
async def retrieve(self, key: str) -> Optional[Union[str, bytes, Dict[str, Any]]]:
|
|
271
|
+
"""
|
|
272
|
+
Retrieve data by key.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
key: Storage key
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
The stored data if found, None otherwise
|
|
279
|
+
"""
|
|
280
|
+
if not self._initialized:
|
|
281
|
+
await self.initialize()
|
|
282
|
+
|
|
283
|
+
start_time = datetime.utcnow()
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
# Check cache first
|
|
287
|
+
if self.config.enable_cache and key in self._cache:
|
|
288
|
+
cache_time = self._cache_timestamps.get(key)
|
|
289
|
+
if (
|
|
290
|
+
cache_time
|
|
291
|
+
and (datetime.utcnow() - cache_time).total_seconds()
|
|
292
|
+
< self.config.cache_ttl_seconds
|
|
293
|
+
):
|
|
294
|
+
if self.metrics:
|
|
295
|
+
self.metrics.record_operation("cache_hit", 1)
|
|
296
|
+
return self._cache[key]["data"]
|
|
297
|
+
else:
|
|
298
|
+
# Remove expired cache entry
|
|
299
|
+
self._cache.pop(key, None)
|
|
300
|
+
self._cache_timestamps.pop(key, None)
|
|
301
|
+
|
|
302
|
+
# Try GCS first
|
|
303
|
+
if self._gcs_bucket:
|
|
304
|
+
data = await self._retrieve_gcs(key)
|
|
305
|
+
if data is not None:
|
|
306
|
+
if self.metrics:
|
|
307
|
+
self.metrics.record_operation("gcs_retrieve_success", 1)
|
|
308
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
309
|
+
self.metrics.record_duration("gcs_retrieve_duration", duration)
|
|
310
|
+
|
|
311
|
+
# Update cache
|
|
312
|
+
if self.config.enable_cache:
|
|
313
|
+
self._cache[key] = {"data": data, "metadata": {}}
|
|
314
|
+
self._cache_timestamps[key] = datetime.utcnow()
|
|
315
|
+
|
|
316
|
+
return data
|
|
317
|
+
|
|
318
|
+
# Fallback to local storage
|
|
319
|
+
if self.config.enable_local_fallback:
|
|
320
|
+
data = await self._retrieve_local(key)
|
|
321
|
+
if data is not None:
|
|
322
|
+
if self.metrics:
|
|
323
|
+
self.metrics.record_operation("local_retrieve_success", 1)
|
|
324
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
325
|
+
self.metrics.record_duration("local_retrieve_duration", duration)
|
|
326
|
+
|
|
327
|
+
# Update cache
|
|
328
|
+
if self.config.enable_cache:
|
|
329
|
+
self._cache[key] = {"data": data, "metadata": {}}
|
|
330
|
+
self._cache_timestamps[key] = datetime.utcnow()
|
|
331
|
+
|
|
332
|
+
return data
|
|
333
|
+
|
|
334
|
+
if self.metrics:
|
|
335
|
+
self.metrics.record_operation("retrieve_not_found", 1)
|
|
336
|
+
return None
|
|
337
|
+
|
|
338
|
+
except Exception as e:
|
|
339
|
+
logger.error(f"Failed to retrieve data for key {key}: {e}")
|
|
340
|
+
if self.metrics:
|
|
341
|
+
self.metrics.record_operation("retrieve_error", 1)
|
|
342
|
+
raise FileStorageError(f"Retrieval failed: {e}")
|
|
343
|
+
|
|
344
|
+
async def delete(self, key: str) -> bool:
|
|
345
|
+
"""
|
|
346
|
+
Delete data by key.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
key: Storage key
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
True if deletion was successful
|
|
353
|
+
"""
|
|
354
|
+
if not self._initialized:
|
|
355
|
+
await self.initialize()
|
|
356
|
+
|
|
357
|
+
try:
|
|
358
|
+
success = True
|
|
359
|
+
|
|
360
|
+
# Remove from cache
|
|
361
|
+
if self.config.enable_cache:
|
|
362
|
+
self._cache.pop(key, None)
|
|
363
|
+
self._cache_timestamps.pop(key, None)
|
|
364
|
+
|
|
365
|
+
# Delete from GCS
|
|
366
|
+
if self._gcs_bucket:
|
|
367
|
+
gcs_success = await self._delete_gcs(key)
|
|
368
|
+
if gcs_success:
|
|
369
|
+
if self.metrics:
|
|
370
|
+
self.metrics.record_operation("gcs_delete_success", 1)
|
|
371
|
+
else:
|
|
372
|
+
success = False
|
|
373
|
+
|
|
374
|
+
# Delete from local storage
|
|
375
|
+
if self.config.enable_local_fallback:
|
|
376
|
+
local_success = await self._delete_local(key)
|
|
377
|
+
if local_success:
|
|
378
|
+
if self.metrics:
|
|
379
|
+
self.metrics.record_operation("local_delete_success", 1)
|
|
380
|
+
else:
|
|
381
|
+
success = False
|
|
382
|
+
|
|
383
|
+
if self.metrics:
|
|
384
|
+
if success:
|
|
385
|
+
self.metrics.record_operation("delete_success", 1)
|
|
386
|
+
else:
|
|
387
|
+
self.metrics.record_operation("delete_failure", 1)
|
|
388
|
+
|
|
389
|
+
return success
|
|
390
|
+
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"Failed to delete data for key {key}: {e}")
|
|
393
|
+
if self.metrics:
|
|
394
|
+
self.metrics.record_operation("delete_error", 1)
|
|
395
|
+
raise FileStorageError(f"Deletion failed: {e}")
|
|
396
|
+
|
|
397
|
+
async def exists(self, key: str) -> bool:
|
|
398
|
+
"""
|
|
399
|
+
Check if data exists for the given key.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
key: Storage key
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
True if data exists
|
|
406
|
+
"""
|
|
407
|
+
if not self._initialized:
|
|
408
|
+
await self.initialize()
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
# Check cache first
|
|
412
|
+
if self.config.enable_cache and key in self._cache:
|
|
413
|
+
cache_time = self._cache_timestamps.get(key)
|
|
414
|
+
if (
|
|
415
|
+
cache_time
|
|
416
|
+
and (datetime.utcnow() - cache_time).total_seconds()
|
|
417
|
+
< self.config.cache_ttl_seconds
|
|
418
|
+
):
|
|
419
|
+
return True
|
|
420
|
+
|
|
421
|
+
# Check GCS
|
|
422
|
+
if self._gcs_bucket:
|
|
423
|
+
if await self._exists_gcs(key):
|
|
424
|
+
return True
|
|
425
|
+
|
|
426
|
+
# Check local storage
|
|
427
|
+
if self.config.enable_local_fallback:
|
|
428
|
+
return await self._exists_local(key)
|
|
429
|
+
|
|
430
|
+
return False
|
|
431
|
+
|
|
432
|
+
except Exception as e:
|
|
433
|
+
logger.error(f"Failed to check existence for key {key}: {e}")
|
|
434
|
+
raise FileStorageError(f"Existence check failed: {e}")
|
|
435
|
+
|
|
436
|
+
async def list_keys(
|
|
437
|
+
self, prefix: Optional[str] = None, limit: Optional[int] = None
|
|
438
|
+
) -> List[str]:
|
|
439
|
+
"""
|
|
440
|
+
List storage keys with optional prefix filtering.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
prefix: Optional key prefix filter
|
|
444
|
+
limit: Maximum number of keys to return
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
List of storage keys
|
|
448
|
+
"""
|
|
449
|
+
if not self._initialized:
|
|
450
|
+
await self.initialize()
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
keys = set()
|
|
454
|
+
|
|
455
|
+
# Get keys from GCS
|
|
456
|
+
if self._gcs_bucket:
|
|
457
|
+
gcs_keys = await self._list_keys_gcs(prefix, limit)
|
|
458
|
+
keys.update(gcs_keys)
|
|
459
|
+
|
|
460
|
+
# Get keys from local storage
|
|
461
|
+
if self.config.enable_local_fallback:
|
|
462
|
+
local_keys = await self._list_keys_local(prefix, limit)
|
|
463
|
+
keys.update(local_keys)
|
|
464
|
+
|
|
465
|
+
# Apply limit if specified
|
|
466
|
+
keys_list = list(keys)
|
|
467
|
+
if limit:
|
|
468
|
+
keys_list = keys_list[:limit]
|
|
469
|
+
|
|
470
|
+
return keys_list
|
|
471
|
+
|
|
472
|
+
except Exception as e:
|
|
473
|
+
logger.error(f"Failed to list keys: {e}")
|
|
474
|
+
raise FileStorageError(f"Key listing failed: {e}")
|
|
475
|
+
|
|
476
|
+
# GCS implementation methods
|
|
477
|
+
|
|
478
|
+
async def _store_gcs(
|
|
479
|
+
self,
|
|
480
|
+
key: str,
|
|
481
|
+
data: bytes,
|
|
482
|
+
metadata: Optional[Dict[str, Any]],
|
|
483
|
+
compressed: bool,
|
|
484
|
+
) -> bool:
|
|
485
|
+
"""Store data in Google Cloud Storage."""
|
|
486
|
+
try:
|
|
487
|
+
blob = self._gcs_bucket.blob(key)
|
|
488
|
+
|
|
489
|
+
# Set metadata
|
|
490
|
+
if metadata:
|
|
491
|
+
blob.metadata = metadata
|
|
492
|
+
if compressed:
|
|
493
|
+
blob.content_encoding = "gzip"
|
|
494
|
+
|
|
495
|
+
# Upload data
|
|
496
|
+
blob.upload_from_string(data)
|
|
497
|
+
return True
|
|
498
|
+
|
|
499
|
+
except Exception as e:
|
|
500
|
+
logger.error(f"GCS store failed for key {key}: {e}")
|
|
501
|
+
return False
|
|
502
|
+
|
|
503
|
+
async def _retrieve_gcs(self, key: str) -> Optional[Any]:
|
|
504
|
+
"""Retrieve data from Google Cloud Storage."""
|
|
505
|
+
try:
|
|
506
|
+
blob = self._gcs_bucket.blob(key)
|
|
507
|
+
|
|
508
|
+
if not blob.exists():
|
|
509
|
+
return None
|
|
510
|
+
|
|
511
|
+
# Download data
|
|
512
|
+
data = blob.download_as_bytes()
|
|
513
|
+
|
|
514
|
+
# Decompress if needed
|
|
515
|
+
if blob.content_encoding == "gzip":
|
|
516
|
+
data = gzip.decompress(data)
|
|
517
|
+
|
|
518
|
+
# Deserialize data
|
|
519
|
+
return await self._deserialize_data(data)
|
|
520
|
+
|
|
521
|
+
except NotFound:
|
|
522
|
+
return None
|
|
523
|
+
except Exception as e:
|
|
524
|
+
logger.error(f"GCS retrieve failed for key {key}: {e}")
|
|
525
|
+
return None
|
|
526
|
+
|
|
527
|
+
async def _delete_gcs(self, key: str) -> bool:
|
|
528
|
+
"""Delete data from Google Cloud Storage."""
|
|
529
|
+
try:
|
|
530
|
+
blob = self._gcs_bucket.blob(key)
|
|
531
|
+
blob.delete()
|
|
532
|
+
return True
|
|
533
|
+
|
|
534
|
+
except NotFound:
|
|
535
|
+
return True # Already deleted
|
|
536
|
+
except Exception as e:
|
|
537
|
+
logger.error(f"GCS delete failed for key {key}: {e}")
|
|
538
|
+
return False
|
|
539
|
+
|
|
540
|
+
async def _exists_gcs(self, key: str) -> bool:
|
|
541
|
+
"""Check if data exists in Google Cloud Storage."""
|
|
542
|
+
try:
|
|
543
|
+
blob = self._gcs_bucket.blob(key)
|
|
544
|
+
return blob.exists()
|
|
545
|
+
|
|
546
|
+
except Exception as e:
|
|
547
|
+
logger.error(f"GCS exists check failed for key {key}: {e}")
|
|
548
|
+
return False
|
|
549
|
+
|
|
550
|
+
async def _list_keys_gcs(self, prefix: Optional[str], limit: Optional[int]) -> List[str]:
|
|
551
|
+
"""List keys from Google Cloud Storage."""
|
|
552
|
+
try:
|
|
553
|
+
blobs = self._gcs_bucket.list_blobs(prefix=prefix, max_results=limit)
|
|
554
|
+
return [blob.name for blob in blobs]
|
|
555
|
+
|
|
556
|
+
except Exception as e:
|
|
557
|
+
logger.error(f"GCS list keys failed: {e}")
|
|
558
|
+
return []
|
|
559
|
+
|
|
560
|
+
# Local storage implementation methods
|
|
561
|
+
|
|
562
|
+
async def _store_local(
|
|
563
|
+
self,
|
|
564
|
+
key: str,
|
|
565
|
+
data: bytes,
|
|
566
|
+
metadata: Optional[Dict[str, Any]],
|
|
567
|
+
compressed: bool,
|
|
568
|
+
) -> bool:
|
|
569
|
+
"""Store data in local filesystem."""
|
|
570
|
+
try:
|
|
571
|
+
file_path = Path(self.config.local_storage_path) / key
|
|
572
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
573
|
+
|
|
574
|
+
async with aiofiles.open(file_path, "wb") as f:
|
|
575
|
+
await f.write(data)
|
|
576
|
+
|
|
577
|
+
# Store metadata separately
|
|
578
|
+
if metadata:
|
|
579
|
+
metadata_path = file_path.with_suffix(".metadata")
|
|
580
|
+
metadata_with_compression = {
|
|
581
|
+
**metadata,
|
|
582
|
+
"compressed": compressed,
|
|
583
|
+
}
|
|
584
|
+
async with aiofiles.open(metadata_path, "w") as f:
|
|
585
|
+
await f.write(json.dumps(metadata_with_compression))
|
|
586
|
+
|
|
587
|
+
return True
|
|
588
|
+
|
|
589
|
+
except Exception as e:
|
|
590
|
+
logger.error(f"Local store failed for key {key}: {e}")
|
|
591
|
+
return False
|
|
592
|
+
|
|
593
|
+
async def _retrieve_local(self, key: str) -> Optional[Any]:
|
|
594
|
+
"""Retrieve data from local filesystem."""
|
|
595
|
+
try:
|
|
596
|
+
file_path = Path(self.config.local_storage_path) / key
|
|
597
|
+
|
|
598
|
+
if not file_path.exists():
|
|
599
|
+
return None
|
|
600
|
+
|
|
601
|
+
async with aiofiles.open(file_path, "rb") as f:
|
|
602
|
+
data = await f.read()
|
|
603
|
+
|
|
604
|
+
# Check for compression metadata
|
|
605
|
+
metadata_path = file_path.with_suffix(".metadata")
|
|
606
|
+
compressed = False
|
|
607
|
+
if metadata_path.exists():
|
|
608
|
+
async with aiofiles.open(metadata_path, "r") as f:
|
|
609
|
+
metadata = json.loads(await f.read())
|
|
610
|
+
compressed = metadata.get("compressed", False)
|
|
611
|
+
|
|
612
|
+
# Decompress if needed
|
|
613
|
+
if compressed:
|
|
614
|
+
data = gzip.decompress(data)
|
|
615
|
+
|
|
616
|
+
# Deserialize data
|
|
617
|
+
return await self._deserialize_data(data)
|
|
618
|
+
|
|
619
|
+
except Exception as e:
|
|
620
|
+
logger.error(f"Local retrieve failed for key {key}: {e}")
|
|
621
|
+
return None
|
|
622
|
+
|
|
623
|
+
async def _delete_local(self, key: str) -> bool:
|
|
624
|
+
"""Delete data from local filesystem."""
|
|
625
|
+
try:
|
|
626
|
+
file_path = Path(self.config.local_storage_path) / key
|
|
627
|
+
metadata_path = file_path.with_suffix(".metadata")
|
|
628
|
+
|
|
629
|
+
success = True
|
|
630
|
+
if file_path.exists():
|
|
631
|
+
file_path.unlink()
|
|
632
|
+
|
|
633
|
+
if metadata_path.exists():
|
|
634
|
+
metadata_path.unlink()
|
|
635
|
+
|
|
636
|
+
return success
|
|
637
|
+
|
|
638
|
+
except Exception as e:
|
|
639
|
+
logger.error(f"Local delete failed for key {key}: {e}")
|
|
640
|
+
return False
|
|
641
|
+
|
|
642
|
+
async def _exists_local(self, key: str) -> bool:
|
|
643
|
+
"""Check if data exists in local filesystem."""
|
|
644
|
+
try:
|
|
645
|
+
file_path = Path(self.config.local_storage_path) / key
|
|
646
|
+
return file_path.exists()
|
|
647
|
+
|
|
648
|
+
except Exception as e:
|
|
649
|
+
logger.error(f"Local exists check failed for key {key}: {e}")
|
|
650
|
+
return False
|
|
651
|
+
|
|
652
|
+
async def _list_keys_local(self, prefix: Optional[str], limit: Optional[int]) -> List[str]:
|
|
653
|
+
"""List keys from local filesystem."""
|
|
654
|
+
try:
|
|
655
|
+
storage_path = Path(self.config.local_storage_path)
|
|
656
|
+
if not storage_path.exists():
|
|
657
|
+
return []
|
|
658
|
+
|
|
659
|
+
keys = []
|
|
660
|
+
for file_path in storage_path.rglob("*"):
|
|
661
|
+
if file_path.is_file() and not file_path.name.endswith(".metadata"):
|
|
662
|
+
key = str(file_path.relative_to(storage_path))
|
|
663
|
+
if not prefix or key.startswith(prefix):
|
|
664
|
+
keys.append(key)
|
|
665
|
+
if limit and len(keys) >= limit:
|
|
666
|
+
break
|
|
667
|
+
|
|
668
|
+
return keys
|
|
669
|
+
|
|
670
|
+
except Exception as e:
|
|
671
|
+
logger.error(f"Local list keys failed: {e}")
|
|
672
|
+
return []
|
|
673
|
+
|
|
674
|
+
# Utility methods
|
|
675
|
+
|
|
676
|
+
async def _serialize_data(self, data: Union[str, bytes, Dict[str, Any]]) -> bytes:
|
|
677
|
+
"""Serialize data for storage."""
|
|
678
|
+
if isinstance(data, bytes):
|
|
679
|
+
return data
|
|
680
|
+
elif isinstance(data, str):
|
|
681
|
+
return data.encode("utf-8")
|
|
682
|
+
else:
|
|
683
|
+
# Use pickle for complex objects
|
|
684
|
+
return pickle.dumps(data)
|
|
685
|
+
|
|
686
|
+
async def _deserialize_data(self, data: bytes) -> Any:
|
|
687
|
+
"""Deserialize data from storage."""
|
|
688
|
+
try:
|
|
689
|
+
# Try to deserialize as pickle first
|
|
690
|
+
return pickle.loads(data)
|
|
691
|
+
except Exception:
|
|
692
|
+
try:
|
|
693
|
+
# Try as JSON
|
|
694
|
+
return json.loads(data.decode("utf-8"))
|
|
695
|
+
except Exception:
|
|
696
|
+
# Return as string
|
|
697
|
+
return data.decode("utf-8")
|
|
698
|
+
|
|
699
|
+
async def _cleanup_cache(self):
|
|
700
|
+
"""Clean up expired cache entries."""
|
|
701
|
+
if not self.config.enable_cache:
|
|
702
|
+
return
|
|
703
|
+
|
|
704
|
+
current_time = datetime.utcnow()
|
|
705
|
+
expired_keys = []
|
|
706
|
+
|
|
707
|
+
for key, timestamp in self._cache_timestamps.items():
|
|
708
|
+
if (current_time - timestamp).total_seconds() > self.config.cache_ttl_seconds:
|
|
709
|
+
expired_keys.append(key)
|
|
710
|
+
|
|
711
|
+
for key in expired_keys:
|
|
712
|
+
self._cache.pop(key, None)
|
|
713
|
+
self._cache_timestamps.pop(key, None)
|
|
714
|
+
|
|
715
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
716
|
+
"""Get storage statistics."""
|
|
717
|
+
return {
|
|
718
|
+
"initialized": self._initialized,
|
|
719
|
+
"gcs_available": self._gcs_bucket is not None,
|
|
720
|
+
"local_fallback_enabled": self.config.enable_local_fallback,
|
|
721
|
+
"cache_enabled": self.config.enable_cache,
|
|
722
|
+
"cache_size": len(self._cache),
|
|
723
|
+
"metrics": (
|
|
724
|
+
self.metrics.get_metrics_summary()
|
|
725
|
+
if self.metrics and hasattr(self.metrics, "get_metrics_summary")
|
|
726
|
+
else {}
|
|
727
|
+
),
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
# Global instance
|
|
732
|
+
_file_storage_instance = None
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
def get_file_storage(config: Optional[Dict[str, Any]] = None) -> FileStorage:
|
|
736
|
+
"""Get the global file storage instance."""
|
|
737
|
+
global _file_storage_instance
|
|
738
|
+
if _file_storage_instance is None:
|
|
739
|
+
if config is None:
|
|
740
|
+
from aiecs.config.config import get_settings
|
|
741
|
+
|
|
742
|
+
settings = get_settings()
|
|
743
|
+
config = settings.file_storage_config
|
|
744
|
+
_file_storage_instance = FileStorage(config)
|
|
745
|
+
return _file_storage_instance
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
async def initialize_file_storage(
|
|
749
|
+
config: Optional[Dict[str, Any]] = None,
|
|
750
|
+
) -> FileStorage:
|
|
751
|
+
"""Initialize and return the file storage instance."""
|
|
752
|
+
storage = get_file_storage(config)
|
|
753
|
+
await storage.initialize()
|
|
754
|
+
return storage
|