aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -8,35 +8,40 @@ with support for local fallback and caching.
|
|
|
8
8
|
import os
|
|
9
9
|
import json
|
|
10
10
|
import logging
|
|
11
|
-
import asyncio
|
|
12
11
|
import aiofiles
|
|
13
|
-
from typing import Dict, List, Any, Optional, Union
|
|
14
|
-
from datetime import datetime
|
|
12
|
+
from typing import Dict, List, Any, Optional, Union
|
|
13
|
+
from datetime import datetime
|
|
15
14
|
from pathlib import Path
|
|
16
|
-
import hashlib
|
|
17
15
|
import gzip
|
|
18
16
|
import pickle
|
|
19
17
|
|
|
20
18
|
try:
|
|
21
|
-
from google.cloud import storage
|
|
19
|
+
from google.cloud import storage # type: ignore[attr-defined]
|
|
22
20
|
from google.cloud.exceptions import NotFound, GoogleCloudError
|
|
23
21
|
from google.auth.exceptions import DefaultCredentialsError
|
|
22
|
+
|
|
24
23
|
GCS_AVAILABLE = True
|
|
25
24
|
except ImportError:
|
|
26
25
|
GCS_AVAILABLE = False
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
from typing import Any, TYPE_CHECKING
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
storage: Any # type: ignore[assignment,no-redef]
|
|
29
|
+
NotFound: Any # type: ignore[assignment,no-redef]
|
|
30
|
+
GoogleCloudError: Any # type: ignore[assignment,no-redef]
|
|
31
|
+
DefaultCredentialsError: Any # type: ignore[assignment,no-redef]
|
|
32
|
+
else:
|
|
33
|
+
storage = None # type: ignore[assignment]
|
|
34
|
+
NotFound = Exception # type: ignore[assignment]
|
|
35
|
+
GoogleCloudError = Exception # type: ignore[assignment]
|
|
36
|
+
DefaultCredentialsError = Exception # type: ignore[assignment]
|
|
37
|
+
|
|
38
|
+
from ..monitoring.global_metrics_manager import get_global_metrics
|
|
33
39
|
|
|
34
40
|
logger = logging.getLogger(__name__)
|
|
35
41
|
|
|
36
42
|
|
|
37
43
|
class FileStorageError(Exception):
|
|
38
44
|
"""Base exception for file storage operations."""
|
|
39
|
-
pass
|
|
40
45
|
|
|
41
46
|
|
|
42
47
|
class FileStorageConfig:
|
|
@@ -44,32 +49,32 @@ class FileStorageConfig:
|
|
|
44
49
|
|
|
45
50
|
def __init__(self, config: Dict[str, Any]):
|
|
46
51
|
# Google Cloud Storage settings
|
|
47
|
-
self.gcs_bucket_name = config.get(
|
|
48
|
-
self.gcs_project_id = config.get(
|
|
49
|
-
self.gcs_credentials_path = config.get(
|
|
50
|
-
self.gcs_location = config.get(
|
|
52
|
+
self.gcs_bucket_name = config.get("gcs_bucket_name", "multi-task-storage")
|
|
53
|
+
self.gcs_project_id = config.get("gcs_project_id")
|
|
54
|
+
self.gcs_credentials_path = config.get("gcs_credentials_path")
|
|
55
|
+
self.gcs_location = config.get("gcs_location", "US")
|
|
51
56
|
|
|
52
57
|
# Local storage fallback
|
|
53
|
-
self.local_storage_path = config.get(
|
|
54
|
-
self.enable_local_fallback = config.get(
|
|
58
|
+
self.local_storage_path = config.get("local_storage_path", "./storage")
|
|
59
|
+
self.enable_local_fallback = config.get("enable_local_fallback", True)
|
|
55
60
|
|
|
56
61
|
# Cache settings
|
|
57
|
-
self.enable_cache = config.get(
|
|
58
|
-
self.cache_ttl_seconds = config.get(
|
|
59
|
-
self.max_cache_size_mb = config.get(
|
|
62
|
+
self.enable_cache = config.get("enable_cache", True)
|
|
63
|
+
self.cache_ttl_seconds = config.get("cache_ttl_seconds", 3600)
|
|
64
|
+
self.max_cache_size_mb = config.get("max_cache_size_mb", 100)
|
|
60
65
|
|
|
61
66
|
# Performance settings
|
|
62
|
-
self.chunk_size = config.get(
|
|
63
|
-
self.max_retries = config.get(
|
|
64
|
-
self.timeout_seconds = config.get(
|
|
67
|
+
self.chunk_size = config.get("chunk_size", 8192)
|
|
68
|
+
self.max_retries = config.get("max_retries", 3)
|
|
69
|
+
self.timeout_seconds = config.get("timeout_seconds", 30)
|
|
65
70
|
|
|
66
71
|
# Compression settings
|
|
67
|
-
self.enable_compression = config.get(
|
|
68
|
-
self.compression_threshold_bytes = config.get(
|
|
72
|
+
self.enable_compression = config.get("enable_compression", True)
|
|
73
|
+
self.compression_threshold_bytes = config.get("compression_threshold_bytes", 1024)
|
|
69
74
|
|
|
70
75
|
# Security settings
|
|
71
|
-
self.enable_encryption = config.get(
|
|
72
|
-
self.encryption_key = config.get(
|
|
76
|
+
self.enable_encryption = config.get("enable_encryption", False)
|
|
77
|
+
self.encryption_key = config.get("encryption_key")
|
|
73
78
|
|
|
74
79
|
|
|
75
80
|
class FileStorage:
|
|
@@ -89,12 +94,12 @@ class FileStorage:
|
|
|
89
94
|
self.config = FileStorageConfig(config)
|
|
90
95
|
self._gcs_client = None
|
|
91
96
|
self._gcs_bucket = None
|
|
92
|
-
self._cache = {}
|
|
93
|
-
self._cache_timestamps = {}
|
|
97
|
+
self._cache: Dict[str, Any] = {}
|
|
98
|
+
self._cache_timestamps: Dict[str, datetime] = {}
|
|
94
99
|
self._initialized = False
|
|
95
100
|
|
|
96
|
-
# Metrics
|
|
97
|
-
self.metrics =
|
|
101
|
+
# Metrics - use global metrics manager
|
|
102
|
+
self.metrics = get_global_metrics()
|
|
98
103
|
|
|
99
104
|
# Ensure local storage directory exists
|
|
100
105
|
if self.config.enable_local_fallback:
|
|
@@ -131,9 +136,17 @@ class FileStorage:
|
|
|
131
136
|
try:
|
|
132
137
|
# Set credentials if provided
|
|
133
138
|
if self.config.gcs_credentials_path:
|
|
134
|
-
os.environ[
|
|
139
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.config.gcs_credentials_path
|
|
135
140
|
|
|
136
|
-
# Create client
|
|
141
|
+
# Create client - project is required for bucket creation
|
|
142
|
+
# If project_id is None, client will use default project from credentials
|
|
143
|
+
# but we need it for bucket creation API calls
|
|
144
|
+
if not self.config.gcs_project_id:
|
|
145
|
+
logger.warning("GCS project ID not provided. Bucket creation will be disabled.")
|
|
146
|
+
logger.warning("Bucket must exist and be accessible. Falling back to local storage if bucket not found.")
|
|
147
|
+
|
|
148
|
+
# Create client with project ID (can be None, but bucket creation
|
|
149
|
+
# will fail)
|
|
137
150
|
self._gcs_client = storage.Client(project=self.config.gcs_project_id)
|
|
138
151
|
|
|
139
152
|
# Get or create bucket
|
|
@@ -144,12 +157,25 @@ class FileStorage:
|
|
|
144
157
|
logger.info(f"Connected to GCS bucket: {self.config.gcs_bucket_name}")
|
|
145
158
|
|
|
146
159
|
except NotFound:
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
160
|
+
# Only create bucket if project_id is provided
|
|
161
|
+
# Bucket creation requires project parameter in API call
|
|
162
|
+
if self.config.gcs_project_id:
|
|
163
|
+
try:
|
|
164
|
+
self._gcs_bucket = self._gcs_client.create_bucket(
|
|
165
|
+
self.config.gcs_bucket_name,
|
|
166
|
+
project=self.config.gcs_project_id, # Explicitly pass project parameter
|
|
167
|
+
location=self.config.gcs_location,
|
|
168
|
+
)
|
|
169
|
+
logger.info(f"Created GCS bucket: {self.config.gcs_bucket_name} in project {self.config.gcs_project_id}")
|
|
170
|
+
except Exception as create_error:
|
|
171
|
+
logger.error(f"Failed to create GCS bucket {self.config.gcs_bucket_name}: {create_error}")
|
|
172
|
+
logger.warning("Bucket creation failed. Will use local storage fallback.")
|
|
173
|
+
self._gcs_bucket = None
|
|
174
|
+
else:
|
|
175
|
+
logger.error(f"GCS bucket '{self.config.gcs_bucket_name}' not found and " "project ID is not provided. Cannot create bucket without project parameter.")
|
|
176
|
+
logger.warning("Please ensure the bucket exists or provide DOC_PARSER_GCS_PROJECT_ID in configuration.")
|
|
177
|
+
logger.warning("Falling back to local storage only.")
|
|
178
|
+
self._gcs_bucket = None
|
|
153
179
|
|
|
154
180
|
except DefaultCredentialsError:
|
|
155
181
|
logger.warning("GCS credentials not found, using local storage only")
|
|
@@ -161,8 +187,12 @@ class FileStorage:
|
|
|
161
187
|
self._gcs_client = None
|
|
162
188
|
self._gcs_bucket = None
|
|
163
189
|
|
|
164
|
-
async def store(
|
|
165
|
-
|
|
190
|
+
async def store(
|
|
191
|
+
self,
|
|
192
|
+
key: str,
|
|
193
|
+
data: Union[str, bytes, Dict[str, Any]],
|
|
194
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
195
|
+
) -> bool:
|
|
166
196
|
"""
|
|
167
197
|
Store data with the given key.
|
|
168
198
|
|
|
@@ -184,8 +214,7 @@ class FileStorage:
|
|
|
184
214
|
serialized_data = await self._serialize_data(data)
|
|
185
215
|
|
|
186
216
|
# Compress if enabled and data is large enough
|
|
187
|
-
if
|
|
188
|
-
len(serialized_data) > self.config.compression_threshold_bytes):
|
|
217
|
+
if self.config.enable_compression and len(serialized_data) > self.config.compression_threshold_bytes:
|
|
189
218
|
serialized_data = gzip.compress(serialized_data)
|
|
190
219
|
compressed = True
|
|
191
220
|
else:
|
|
@@ -194,9 +223,9 @@ class FileStorage:
|
|
|
194
223
|
# Store in cache
|
|
195
224
|
if self.config.enable_cache:
|
|
196
225
|
self._cache[key] = {
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
226
|
+
"data": data,
|
|
227
|
+
"metadata": metadata,
|
|
228
|
+
"compressed": compressed,
|
|
200
229
|
}
|
|
201
230
|
self._cache_timestamps[key] = datetime.utcnow()
|
|
202
231
|
await self._cleanup_cache()
|
|
@@ -205,26 +234,30 @@ class FileStorage:
|
|
|
205
234
|
if self._gcs_bucket:
|
|
206
235
|
success = await self._store_gcs(key, serialized_data, metadata, compressed)
|
|
207
236
|
if success:
|
|
208
|
-
self.metrics
|
|
209
|
-
|
|
210
|
-
|
|
237
|
+
if self.metrics:
|
|
238
|
+
self.metrics.record_operation("gcs_store_success", True)
|
|
239
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
240
|
+
self.metrics.record_duration("gcs_store_duration", duration)
|
|
211
241
|
return True
|
|
212
242
|
|
|
213
243
|
# Fallback to local storage
|
|
214
244
|
if self.config.enable_local_fallback:
|
|
215
245
|
success = await self._store_local(key, serialized_data, metadata, compressed)
|
|
216
246
|
if success:
|
|
217
|
-
self.metrics
|
|
218
|
-
|
|
219
|
-
|
|
247
|
+
if self.metrics:
|
|
248
|
+
self.metrics.record_operation("local_store_success", True)
|
|
249
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
250
|
+
self.metrics.record_duration("local_store_duration", duration)
|
|
220
251
|
return True
|
|
221
252
|
|
|
222
|
-
self.metrics
|
|
253
|
+
if self.metrics:
|
|
254
|
+
self.metrics.record_operation("store_failure", False)
|
|
223
255
|
return False
|
|
224
256
|
|
|
225
257
|
except Exception as e:
|
|
226
258
|
logger.error(f"Failed to store data for key {key}: {e}")
|
|
227
|
-
self.metrics
|
|
259
|
+
if self.metrics:
|
|
260
|
+
self.metrics.record_operation("store_error", False)
|
|
228
261
|
raise FileStorageError(f"Storage failed: {e}")
|
|
229
262
|
|
|
230
263
|
async def retrieve(self, key: str) -> Optional[Union[str, bytes, Dict[str, Any]]]:
|
|
@@ -246,9 +279,10 @@ class FileStorage:
|
|
|
246
279
|
# Check cache first
|
|
247
280
|
if self.config.enable_cache and key in self._cache:
|
|
248
281
|
cache_time = self._cache_timestamps.get(key)
|
|
249
|
-
if cache_time and (datetime.utcnow() - cache_time).total_seconds() < self.config.cache_ttl_seconds:
|
|
250
|
-
self.metrics
|
|
251
|
-
|
|
282
|
+
if cache_time and (datetime.utcnow() - cache_time).total_seconds() < float(self.config.cache_ttl_seconds):
|
|
283
|
+
if self.metrics:
|
|
284
|
+
self.metrics.record_operation("cache_hit", True)
|
|
285
|
+
return self._cache[key]["data"]
|
|
252
286
|
else:
|
|
253
287
|
# Remove expired cache entry
|
|
254
288
|
self._cache.pop(key, None)
|
|
@@ -258,13 +292,14 @@ class FileStorage:
|
|
|
258
292
|
if self._gcs_bucket:
|
|
259
293
|
data = await self._retrieve_gcs(key)
|
|
260
294
|
if data is not None:
|
|
261
|
-
self.metrics
|
|
262
|
-
|
|
263
|
-
|
|
295
|
+
if self.metrics:
|
|
296
|
+
self.metrics.record_operation("gcs_retrieve_success", True)
|
|
297
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
298
|
+
self.metrics.record_duration("gcs_retrieve_duration", duration)
|
|
264
299
|
|
|
265
300
|
# Update cache
|
|
266
301
|
if self.config.enable_cache:
|
|
267
|
-
self._cache[key] = {
|
|
302
|
+
self._cache[key] = {"data": data, "metadata": {}}
|
|
268
303
|
self._cache_timestamps[key] = datetime.utcnow()
|
|
269
304
|
|
|
270
305
|
return data
|
|
@@ -273,23 +308,26 @@ class FileStorage:
|
|
|
273
308
|
if self.config.enable_local_fallback:
|
|
274
309
|
data = await self._retrieve_local(key)
|
|
275
310
|
if data is not None:
|
|
276
|
-
self.metrics
|
|
277
|
-
|
|
278
|
-
|
|
311
|
+
if self.metrics:
|
|
312
|
+
self.metrics.record_operation("local_retrieve_success", True)
|
|
313
|
+
duration = (datetime.utcnow() - start_time).total_seconds()
|
|
314
|
+
self.metrics.record_duration("local_retrieve_duration", duration)
|
|
279
315
|
|
|
280
316
|
# Update cache
|
|
281
317
|
if self.config.enable_cache:
|
|
282
|
-
self._cache[key] = {
|
|
318
|
+
self._cache[key] = {"data": data, "metadata": {}}
|
|
283
319
|
self._cache_timestamps[key] = datetime.utcnow()
|
|
284
320
|
|
|
285
321
|
return data
|
|
286
322
|
|
|
287
|
-
self.metrics
|
|
323
|
+
if self.metrics:
|
|
324
|
+
self.metrics.record_operation("retrieve_not_found", False)
|
|
288
325
|
return None
|
|
289
326
|
|
|
290
327
|
except Exception as e:
|
|
291
328
|
logger.error(f"Failed to retrieve data for key {key}: {e}")
|
|
292
|
-
self.metrics
|
|
329
|
+
if self.metrics:
|
|
330
|
+
self.metrics.record_operation("retrieve_error", False)
|
|
293
331
|
raise FileStorageError(f"Retrieval failed: {e}")
|
|
294
332
|
|
|
295
333
|
async def delete(self, key: str) -> bool:
|
|
@@ -317,7 +355,8 @@ class FileStorage:
|
|
|
317
355
|
if self._gcs_bucket:
|
|
318
356
|
gcs_success = await self._delete_gcs(key)
|
|
319
357
|
if gcs_success:
|
|
320
|
-
self.metrics
|
|
358
|
+
if self.metrics:
|
|
359
|
+
self.metrics.record_operation("gcs_delete_success", True)
|
|
321
360
|
else:
|
|
322
361
|
success = False
|
|
323
362
|
|
|
@@ -325,20 +364,23 @@ class FileStorage:
|
|
|
325
364
|
if self.config.enable_local_fallback:
|
|
326
365
|
local_success = await self._delete_local(key)
|
|
327
366
|
if local_success:
|
|
328
|
-
self.metrics
|
|
367
|
+
if self.metrics:
|
|
368
|
+
self.metrics.record_operation("local_delete_success", True)
|
|
329
369
|
else:
|
|
330
370
|
success = False
|
|
331
371
|
|
|
332
|
-
if
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
372
|
+
if self.metrics:
|
|
373
|
+
if success:
|
|
374
|
+
self.metrics.record_operation("delete_success", True)
|
|
375
|
+
else:
|
|
376
|
+
self.metrics.record_operation("delete_failure", False)
|
|
336
377
|
|
|
337
378
|
return success
|
|
338
379
|
|
|
339
380
|
except Exception as e:
|
|
340
381
|
logger.error(f"Failed to delete data for key {key}: {e}")
|
|
341
|
-
self.metrics
|
|
382
|
+
if self.metrics:
|
|
383
|
+
self.metrics.record_operation("delete_error", False)
|
|
342
384
|
raise FileStorageError(f"Deletion failed: {e}")
|
|
343
385
|
|
|
344
386
|
async def exists(self, key: str) -> bool:
|
|
@@ -358,7 +400,7 @@ class FileStorage:
|
|
|
358
400
|
# Check cache first
|
|
359
401
|
if self.config.enable_cache and key in self._cache:
|
|
360
402
|
cache_time = self._cache_timestamps.get(key)
|
|
361
|
-
if cache_time and (datetime.utcnow() - cache_time).total_seconds() < self.config.cache_ttl_seconds:
|
|
403
|
+
if cache_time and (datetime.utcnow() - cache_time).total_seconds() < float(self.config.cache_ttl_seconds):
|
|
362
404
|
return True
|
|
363
405
|
|
|
364
406
|
# Check GCS
|
|
@@ -416,8 +458,17 @@ class FileStorage:
|
|
|
416
458
|
|
|
417
459
|
# GCS implementation methods
|
|
418
460
|
|
|
419
|
-
async def _store_gcs(
|
|
461
|
+
async def _store_gcs(
|
|
462
|
+
self,
|
|
463
|
+
key: str,
|
|
464
|
+
data: bytes,
|
|
465
|
+
metadata: Optional[Dict[str, Any]],
|
|
466
|
+
compressed: bool,
|
|
467
|
+
) -> bool:
|
|
420
468
|
"""Store data in Google Cloud Storage."""
|
|
469
|
+
if self._gcs_bucket is None:
|
|
470
|
+
logger.error("GCS bucket not initialized")
|
|
471
|
+
return False
|
|
421
472
|
try:
|
|
422
473
|
blob = self._gcs_bucket.blob(key)
|
|
423
474
|
|
|
@@ -425,7 +476,7 @@ class FileStorage:
|
|
|
425
476
|
if metadata:
|
|
426
477
|
blob.metadata = metadata
|
|
427
478
|
if compressed:
|
|
428
|
-
blob.content_encoding =
|
|
479
|
+
blob.content_encoding = "gzip"
|
|
429
480
|
|
|
430
481
|
# Upload data
|
|
431
482
|
blob.upload_from_string(data)
|
|
@@ -437,6 +488,9 @@ class FileStorage:
|
|
|
437
488
|
|
|
438
489
|
async def _retrieve_gcs(self, key: str) -> Optional[Any]:
|
|
439
490
|
"""Retrieve data from Google Cloud Storage."""
|
|
491
|
+
if self._gcs_bucket is None:
|
|
492
|
+
logger.error("GCS bucket not initialized")
|
|
493
|
+
return None
|
|
440
494
|
try:
|
|
441
495
|
blob = self._gcs_bucket.blob(key)
|
|
442
496
|
|
|
@@ -447,7 +501,7 @@ class FileStorage:
|
|
|
447
501
|
data = blob.download_as_bytes()
|
|
448
502
|
|
|
449
503
|
# Decompress if needed
|
|
450
|
-
if blob.content_encoding ==
|
|
504
|
+
if blob.content_encoding == "gzip":
|
|
451
505
|
data = gzip.decompress(data)
|
|
452
506
|
|
|
453
507
|
# Deserialize data
|
|
@@ -461,6 +515,9 @@ class FileStorage:
|
|
|
461
515
|
|
|
462
516
|
async def _delete_gcs(self, key: str) -> bool:
|
|
463
517
|
"""Delete data from Google Cloud Storage."""
|
|
518
|
+
if self._gcs_bucket is None:
|
|
519
|
+
logger.error("GCS bucket not initialized")
|
|
520
|
+
return False
|
|
464
521
|
try:
|
|
465
522
|
blob = self._gcs_bucket.blob(key)
|
|
466
523
|
blob.delete()
|
|
@@ -474,6 +531,9 @@ class FileStorage:
|
|
|
474
531
|
|
|
475
532
|
async def _exists_gcs(self, key: str) -> bool:
|
|
476
533
|
"""Check if data exists in Google Cloud Storage."""
|
|
534
|
+
if self._gcs_bucket is None:
|
|
535
|
+
logger.error("GCS bucket not initialized")
|
|
536
|
+
return False
|
|
477
537
|
try:
|
|
478
538
|
blob = self._gcs_bucket.blob(key)
|
|
479
539
|
return blob.exists()
|
|
@@ -484,6 +544,9 @@ class FileStorage:
|
|
|
484
544
|
|
|
485
545
|
async def _list_keys_gcs(self, prefix: Optional[str], limit: Optional[int]) -> List[str]:
|
|
486
546
|
"""List keys from Google Cloud Storage."""
|
|
547
|
+
if self._gcs_bucket is None:
|
|
548
|
+
logger.error("GCS bucket not initialized")
|
|
549
|
+
return []
|
|
487
550
|
try:
|
|
488
551
|
blobs = self._gcs_bucket.list_blobs(prefix=prefix, max_results=limit)
|
|
489
552
|
return [blob.name for blob in blobs]
|
|
@@ -494,20 +557,29 @@ class FileStorage:
|
|
|
494
557
|
|
|
495
558
|
# Local storage implementation methods
|
|
496
559
|
|
|
497
|
-
async def _store_local(
|
|
560
|
+
async def _store_local(
|
|
561
|
+
self,
|
|
562
|
+
key: str,
|
|
563
|
+
data: bytes,
|
|
564
|
+
metadata: Optional[Dict[str, Any]],
|
|
565
|
+
compressed: bool,
|
|
566
|
+
) -> bool:
|
|
498
567
|
"""Store data in local filesystem."""
|
|
499
568
|
try:
|
|
500
569
|
file_path = Path(self.config.local_storage_path) / key
|
|
501
570
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
502
571
|
|
|
503
|
-
async with aiofiles.open(file_path,
|
|
572
|
+
async with aiofiles.open(file_path, "wb") as f:
|
|
504
573
|
await f.write(data)
|
|
505
574
|
|
|
506
575
|
# Store metadata separately
|
|
507
576
|
if metadata:
|
|
508
|
-
metadata_path = file_path.with_suffix(
|
|
509
|
-
metadata_with_compression = {
|
|
510
|
-
|
|
577
|
+
metadata_path = file_path.with_suffix(".metadata")
|
|
578
|
+
metadata_with_compression = {
|
|
579
|
+
**metadata,
|
|
580
|
+
"compressed": compressed,
|
|
581
|
+
}
|
|
582
|
+
async with aiofiles.open(metadata_path, "w") as f:
|
|
511
583
|
await f.write(json.dumps(metadata_with_compression))
|
|
512
584
|
|
|
513
585
|
return True
|
|
@@ -524,16 +596,16 @@ class FileStorage:
|
|
|
524
596
|
if not file_path.exists():
|
|
525
597
|
return None
|
|
526
598
|
|
|
527
|
-
async with aiofiles.open(file_path,
|
|
599
|
+
async with aiofiles.open(file_path, "rb") as f:
|
|
528
600
|
data = await f.read()
|
|
529
601
|
|
|
530
602
|
# Check for compression metadata
|
|
531
|
-
metadata_path = file_path.with_suffix(
|
|
603
|
+
metadata_path = file_path.with_suffix(".metadata")
|
|
532
604
|
compressed = False
|
|
533
605
|
if metadata_path.exists():
|
|
534
|
-
async with aiofiles.open(metadata_path,
|
|
606
|
+
async with aiofiles.open(metadata_path, "r") as f:
|
|
535
607
|
metadata = json.loads(await f.read())
|
|
536
|
-
compressed = metadata.get(
|
|
608
|
+
compressed = metadata.get("compressed", False)
|
|
537
609
|
|
|
538
610
|
# Decompress if needed
|
|
539
611
|
if compressed:
|
|
@@ -550,7 +622,7 @@ class FileStorage:
|
|
|
550
622
|
"""Delete data from local filesystem."""
|
|
551
623
|
try:
|
|
552
624
|
file_path = Path(self.config.local_storage_path) / key
|
|
553
|
-
metadata_path = file_path.with_suffix(
|
|
625
|
+
metadata_path = file_path.with_suffix(".metadata")
|
|
554
626
|
|
|
555
627
|
success = True
|
|
556
628
|
if file_path.exists():
|
|
@@ -583,8 +655,8 @@ class FileStorage:
|
|
|
583
655
|
return []
|
|
584
656
|
|
|
585
657
|
keys = []
|
|
586
|
-
for file_path in storage_path.rglob(
|
|
587
|
-
if file_path.is_file() and not file_path.name.endswith(
|
|
658
|
+
for file_path in storage_path.rglob("*"):
|
|
659
|
+
if file_path.is_file() and not file_path.name.endswith(".metadata"):
|
|
588
660
|
key = str(file_path.relative_to(storage_path))
|
|
589
661
|
if not prefix or key.startswith(prefix):
|
|
590
662
|
keys.append(key)
|
|
@@ -604,7 +676,7 @@ class FileStorage:
|
|
|
604
676
|
if isinstance(data, bytes):
|
|
605
677
|
return data
|
|
606
678
|
elif isinstance(data, str):
|
|
607
|
-
return data.encode(
|
|
679
|
+
return data.encode("utf-8")
|
|
608
680
|
else:
|
|
609
681
|
# Use pickle for complex objects
|
|
610
682
|
return pickle.dumps(data)
|
|
@@ -614,13 +686,13 @@ class FileStorage:
|
|
|
614
686
|
try:
|
|
615
687
|
# Try to deserialize as pickle first
|
|
616
688
|
return pickle.loads(data)
|
|
617
|
-
except:
|
|
689
|
+
except Exception:
|
|
618
690
|
try:
|
|
619
691
|
# Try as JSON
|
|
620
|
-
return json.loads(data.decode(
|
|
621
|
-
except:
|
|
692
|
+
return json.loads(data.decode("utf-8"))
|
|
693
|
+
except Exception:
|
|
622
694
|
# Return as string
|
|
623
|
-
return data.decode(
|
|
695
|
+
return data.decode("utf-8")
|
|
624
696
|
|
|
625
697
|
async def _cleanup_cache(self):
|
|
626
698
|
"""Clean up expired cache entries."""
|
|
@@ -641,30 +713,35 @@ class FileStorage:
|
|
|
641
713
|
def get_stats(self) -> Dict[str, Any]:
|
|
642
714
|
"""Get storage statistics."""
|
|
643
715
|
return {
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
716
|
+
"initialized": self._initialized,
|
|
717
|
+
"gcs_available": self._gcs_bucket is not None,
|
|
718
|
+
"local_fallback_enabled": self.config.enable_local_fallback,
|
|
719
|
+
"cache_enabled": self.config.enable_cache,
|
|
720
|
+
"cache_size": len(self._cache),
|
|
721
|
+
"metrics": (self.metrics.get_metrics_summary() if self.metrics and hasattr(self.metrics, "get_metrics_summary") else {}),
|
|
650
722
|
}
|
|
651
723
|
|
|
652
724
|
|
|
653
725
|
# Global instance
|
|
654
726
|
_file_storage_instance = None
|
|
655
727
|
|
|
728
|
+
|
|
656
729
|
def get_file_storage(config: Optional[Dict[str, Any]] = None) -> FileStorage:
|
|
657
730
|
"""Get the global file storage instance."""
|
|
658
731
|
global _file_storage_instance
|
|
659
732
|
if _file_storage_instance is None:
|
|
660
733
|
if config is None:
|
|
661
734
|
from aiecs.config.config import get_settings
|
|
735
|
+
|
|
662
736
|
settings = get_settings()
|
|
663
737
|
config = settings.file_storage_config
|
|
664
738
|
_file_storage_instance = FileStorage(config)
|
|
665
739
|
return _file_storage_instance
|
|
666
740
|
|
|
667
|
-
|
|
741
|
+
|
|
742
|
+
async def initialize_file_storage(
|
|
743
|
+
config: Optional[Dict[str, Any]] = None,
|
|
744
|
+
) -> FileStorage:
|
|
668
745
|
"""Initialize and return the file storage instance."""
|
|
669
746
|
storage = get_file_storage(config)
|
|
670
747
|
await storage.initialize()
|