aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import subprocess
|
|
4
|
+
import tempfile
|
|
5
|
+
from typing import Dict, Any, List, Optional
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from dataclasses import field
|
|
8
|
+
|
|
9
|
+
from pydantic import (
|
|
10
|
+
BaseModel,
|
|
11
|
+
ValidationError,
|
|
12
|
+
field_validator,
|
|
13
|
+
ConfigDict,
|
|
14
|
+
Field,
|
|
15
|
+
)
|
|
16
|
+
from PIL import Image, ExifTags, ImageFilter
|
|
17
|
+
from queue import Queue
|
|
18
|
+
|
|
19
|
+
from aiecs.tools.base_tool import BaseTool
|
|
20
|
+
from aiecs.tools import register_tool
|
|
21
|
+
|
|
22
|
+
# Module-level default configuration for validators
|
|
23
|
+
_DEFAULT_MAX_FILE_SIZE_MB = 50
|
|
24
|
+
_DEFAULT_ALLOWED_EXTENSIONS = [
|
|
25
|
+
".jpg",
|
|
26
|
+
".jpeg",
|
|
27
|
+
".png",
|
|
28
|
+
".bmp",
|
|
29
|
+
".tiff",
|
|
30
|
+
".gif",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
# Exceptions
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class ImageToolError(Exception):
|
|
37
|
+
"""Base exception for ImageTool errors."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class FileOperationError(ImageToolError):
|
|
41
|
+
"""Raised when file operations fail."""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SecurityError(ImageToolError):
|
|
45
|
+
"""Raised for security-related issues."""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Base schema for common fields
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BaseFileSchema(BaseModel):
|
|
52
|
+
file_path: str
|
|
53
|
+
_mtime: Optional[float] = None # Internal use for cache
|
|
54
|
+
|
|
55
|
+
@field_validator("file_path")
|
|
56
|
+
@classmethod
|
|
57
|
+
def validate_file_path(cls, v: str) -> str:
|
|
58
|
+
"""Validate file path for existence, size, and extension."""
|
|
59
|
+
abs_path = os.path.abspath(os.path.normpath(v))
|
|
60
|
+
ext = os.path.splitext(abs_path)[1].lower()
|
|
61
|
+
if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
|
|
62
|
+
raise SecurityError(
|
|
63
|
+
f"Extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}"
|
|
64
|
+
)
|
|
65
|
+
if not os.path.isfile(abs_path):
|
|
66
|
+
raise FileOperationError(f"File not found: {abs_path}")
|
|
67
|
+
size_mb = os.path.getsize(abs_path) / (1024 * 1024)
|
|
68
|
+
if size_mb > _DEFAULT_MAX_FILE_SIZE_MB:
|
|
69
|
+
raise FileOperationError(
|
|
70
|
+
f"File too large: {size_mb:.1f}MB, max {_DEFAULT_MAX_FILE_SIZE_MB}MB"
|
|
71
|
+
)
|
|
72
|
+
return abs_path
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# Schemas for operations
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class LoadSchema(BaseFileSchema):
|
|
79
|
+
"""Schema for load operation."""
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class OCRSchema(BaseFileSchema):
|
|
83
|
+
"""Schema for OCR operation."""
|
|
84
|
+
|
|
85
|
+
lang: Optional[str] = None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class MetadataSchema(BaseFileSchema):
|
|
89
|
+
"""Schema for metadata extraction operation."""
|
|
90
|
+
|
|
91
|
+
include_exif: bool = False
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class ResizeSchema(BaseFileSchema):
|
|
95
|
+
"""Schema for resize operation."""
|
|
96
|
+
|
|
97
|
+
output_path: str
|
|
98
|
+
width: int
|
|
99
|
+
height: int
|
|
100
|
+
|
|
101
|
+
@field_validator("output_path")
|
|
102
|
+
@classmethod
|
|
103
|
+
def validate_output_path(cls, v: str) -> str:
|
|
104
|
+
"""Validate output path for existence and extension."""
|
|
105
|
+
abs_path = os.path.abspath(os.path.normpath(v))
|
|
106
|
+
ext = os.path.splitext(abs_path)[1].lower()
|
|
107
|
+
if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
|
|
108
|
+
raise SecurityError(
|
|
109
|
+
f"Output extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}"
|
|
110
|
+
)
|
|
111
|
+
if os.path.exists(abs_path):
|
|
112
|
+
raise FileOperationError(f"Output file already exists: {abs_path}")
|
|
113
|
+
return abs_path
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class FilterSchema(BaseFileSchema):
|
|
117
|
+
"""Schema for filter operation."""
|
|
118
|
+
|
|
119
|
+
output_path: str
|
|
120
|
+
filter_type: str = "blur"
|
|
121
|
+
|
|
122
|
+
@field_validator("filter_type")
|
|
123
|
+
@classmethod
|
|
124
|
+
def validate_filter_type(cls, v: str) -> str:
|
|
125
|
+
"""Validate filter type."""
|
|
126
|
+
valid_filters = ["blur", "sharpen", "edge_enhance"]
|
|
127
|
+
if v not in valid_filters:
|
|
128
|
+
raise ValueError(f"Invalid filter_type '{v}', expected {valid_filters}")
|
|
129
|
+
return v
|
|
130
|
+
|
|
131
|
+
@field_validator("output_path")
|
|
132
|
+
@classmethod
|
|
133
|
+
def validate_output_path(cls, v: str) -> str:
|
|
134
|
+
"""Validate output path for existence and extension."""
|
|
135
|
+
abs_path = os.path.abspath(os.path.normpath(v))
|
|
136
|
+
ext = os.path.splitext(abs_path)[1].lower()
|
|
137
|
+
if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
|
|
138
|
+
raise SecurityError(
|
|
139
|
+
f"Output extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}"
|
|
140
|
+
)
|
|
141
|
+
if os.path.exists(abs_path):
|
|
142
|
+
raise FileOperationError(f"Output file already exists: {abs_path}")
|
|
143
|
+
return abs_path
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
# Tesseract process manager
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
@dataclass
|
|
150
|
+
class TesseractManager:
|
|
151
|
+
"""Manages a pool of Tesseract processes for OCR."""
|
|
152
|
+
|
|
153
|
+
pool_size: int
|
|
154
|
+
processes: List[subprocess.Popen] = field(default_factory=list)
|
|
155
|
+
queue: Queue = field(default_factory=lambda: Queue())
|
|
156
|
+
|
|
157
|
+
def initialize(self):
|
|
158
|
+
"""Initialize Tesseract process pool."""
|
|
159
|
+
for _ in range(self.pool_size):
|
|
160
|
+
try:
|
|
161
|
+
proc = subprocess.Popen(
|
|
162
|
+
["tesseract", "--oem", "1", "-", "stdout", "-l", "eng"],
|
|
163
|
+
stdin=subprocess.PIPE,
|
|
164
|
+
stdout=subprocess.PIPE,
|
|
165
|
+
stderr=subprocess.PIPE,
|
|
166
|
+
text=True,
|
|
167
|
+
)
|
|
168
|
+
self.queue.put(proc)
|
|
169
|
+
self.processes.append(proc)
|
|
170
|
+
except FileNotFoundError:
|
|
171
|
+
logging.getLogger(__name__).warning("Tesseract not found; OCR will be disabled")
|
|
172
|
+
break
|
|
173
|
+
|
|
174
|
+
def get_process(self) -> Optional[subprocess.Popen]:
|
|
175
|
+
"""Get an available Tesseract process."""
|
|
176
|
+
if self.queue.empty():
|
|
177
|
+
return None
|
|
178
|
+
return self.queue.get()
|
|
179
|
+
|
|
180
|
+
def return_process(self, proc: subprocess.Popen):
|
|
181
|
+
"""Return a Tesseract process to the pool."""
|
|
182
|
+
self.queue.put(proc)
|
|
183
|
+
|
|
184
|
+
def cleanup(self):
|
|
185
|
+
"""Clean up all Tesseract processes."""
|
|
186
|
+
for proc in self.processes:
|
|
187
|
+
try:
|
|
188
|
+
proc.terminate()
|
|
189
|
+
proc.wait(timeout=1)
|
|
190
|
+
except (subprocess.TimeoutExpired, OSError) as e:
|
|
191
|
+
logging.getLogger(__name__).warning(f"Error terminating Tesseract process: {e}")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
@register_tool("image")
|
|
195
|
+
class ImageTool(BaseTool):
|
|
196
|
+
"""
|
|
197
|
+
Image processing tool supporting:
|
|
198
|
+
- load: Load image and return size and mode.
|
|
199
|
+
- ocr: Extract text using a pooled Tesseract process.
|
|
200
|
+
- metadata: Retrieve EXIF and basic image info.
|
|
201
|
+
- resize: Resize image to specified dimensions.
|
|
202
|
+
- filter: Apply filters (blur, sharpen, edge_enhance).
|
|
203
|
+
|
|
204
|
+
Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
# Configuration schema
|
|
208
|
+
class Config(BaseModel):
|
|
209
|
+
"""Configuration for the image tool"""
|
|
210
|
+
|
|
211
|
+
model_config = ConfigDict(env_prefix="IMAGE_TOOL_")
|
|
212
|
+
|
|
213
|
+
max_file_size_mb: int = Field(default=50, description="Maximum file size in megabytes")
|
|
214
|
+
allowed_extensions: List[str] = Field(
|
|
215
|
+
default=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"],
|
|
216
|
+
description="Allowed image file extensions",
|
|
217
|
+
)
|
|
218
|
+
tesseract_pool_size: int = Field(
|
|
219
|
+
default=2, description="Number of Tesseract processes for OCR"
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
223
|
+
"""
|
|
224
|
+
Initialize ImageTool with configuration and resources.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
config (Dict, optional): Configuration overrides for ImageTool.
|
|
228
|
+
|
|
229
|
+
Raises:
|
|
230
|
+
ValueError: If config contains invalid settings.
|
|
231
|
+
"""
|
|
232
|
+
super().__init__(config)
|
|
233
|
+
|
|
234
|
+
# Parse configuration
|
|
235
|
+
self.config = self.Config(**(config or {}))
|
|
236
|
+
|
|
237
|
+
self.logger = logging.getLogger(__name__)
|
|
238
|
+
if not self.logger.handlers:
|
|
239
|
+
handler = logging.StreamHandler()
|
|
240
|
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
|
241
|
+
self.logger.addHandler(handler)
|
|
242
|
+
self.logger.setLevel(logging.INFO)
|
|
243
|
+
|
|
244
|
+
# Initialize Tesseract manager
|
|
245
|
+
self._tesseract_manager = TesseractManager(self.config.tesseract_pool_size)
|
|
246
|
+
self._tesseract_manager.initialize()
|
|
247
|
+
|
|
248
|
+
def __del__(self):
|
|
249
|
+
"""Clean up Tesseract processes on destruction."""
|
|
250
|
+
self._tesseract_manager.cleanup()
|
|
251
|
+
|
|
252
|
+
def update_config(self, config: Dict) -> None:
|
|
253
|
+
"""
|
|
254
|
+
Update configuration settings dynamically.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
config (Dict): New settings to apply.
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
ValueError: If config contains invalid settings.
|
|
261
|
+
"""
|
|
262
|
+
try:
|
|
263
|
+
self.config = self.Config(**{**self.config.model_dump(), **config})
|
|
264
|
+
# Reinitialize Tesseract if pool size changes
|
|
265
|
+
if "tesseract_pool_size" in config:
|
|
266
|
+
self._tesseract_manager.cleanup()
|
|
267
|
+
self._tesseract_manager = TesseractManager(self.config.tesseract_pool_size)
|
|
268
|
+
self._tesseract_manager.initialize()
|
|
269
|
+
except ValidationError as e:
|
|
270
|
+
raise ValueError(f"Invalid configuration: {e}")
|
|
271
|
+
|
|
272
|
+
def load(self, file_path: str) -> Dict[str, Any]:
|
|
273
|
+
"""
|
|
274
|
+
Load an image and return its size and mode.
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
file_path (str): Path to the image file.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Dict[str, Any]: Image info {'size': (width, height), 'mode': str}.
|
|
281
|
+
|
|
282
|
+
Raises:
|
|
283
|
+
FileOperationError: If file is invalid or inaccessible.
|
|
284
|
+
"""
|
|
285
|
+
# Validate input using schema
|
|
286
|
+
validated_input = LoadSchema(file_path=file_path)
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
with Image.open(validated_input.file_path) as img:
|
|
290
|
+
img.load()
|
|
291
|
+
return {"size": img.size, "mode": img.mode}
|
|
292
|
+
except Exception as e:
|
|
293
|
+
raise FileOperationError(f"load: Failed to load image '{file_path}': {e}")
|
|
294
|
+
|
|
295
|
+
def ocr(self, file_path: str, lang: Optional[str] = None) -> str:
|
|
296
|
+
"""
|
|
297
|
+
Extract text from an image using a pooled Tesseract process.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
file_path (str): Path to the image file.
|
|
301
|
+
lang (Optional[str]): Language code for OCR (e.g., 'eng').
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
str: Extracted text.
|
|
305
|
+
|
|
306
|
+
Raises:
|
|
307
|
+
FileOperationError: If OCR fails or Tesseract is unavailable.
|
|
308
|
+
"""
|
|
309
|
+
# Validate input using schema
|
|
310
|
+
validated_input = OCRSchema(file_path=file_path, lang=lang)
|
|
311
|
+
|
|
312
|
+
proc = self._tesseract_manager.get_process()
|
|
313
|
+
if not proc:
|
|
314
|
+
raise FileOperationError(
|
|
315
|
+
f"ocr: No Tesseract processes available (lang: {lang or 'eng'})"
|
|
316
|
+
)
|
|
317
|
+
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
|
|
318
|
+
temp_path = temp_file.name
|
|
319
|
+
try:
|
|
320
|
+
img = Image.open(validated_input.file_path).convert("L").filter(ImageFilter.SHARPEN)
|
|
321
|
+
img.save(temp_path)
|
|
322
|
+
stdout, stderr = proc.communicate(input=temp_path, timeout=30)
|
|
323
|
+
if proc.returncode != 0:
|
|
324
|
+
raise FileOperationError(
|
|
325
|
+
f"ocr: Tesseract failed for '{file_path}' (lang: {lang or 'eng'}): {stderr}"
|
|
326
|
+
)
|
|
327
|
+
return stdout.strip()
|
|
328
|
+
except Exception as e:
|
|
329
|
+
raise FileOperationError(
|
|
330
|
+
f"ocr: Failed to process '{file_path}' (lang: {lang or 'eng'}): {e}"
|
|
331
|
+
)
|
|
332
|
+
finally:
|
|
333
|
+
self._tesseract_manager.return_process(proc)
|
|
334
|
+
if os.path.exists(temp_path):
|
|
335
|
+
try:
|
|
336
|
+
os.unlink(temp_path)
|
|
337
|
+
except Exception as e:
|
|
338
|
+
self.logger.warning(f"Failed to remove temporary file {temp_path}: {e}")
|
|
339
|
+
|
|
340
|
+
def metadata(self, file_path: str, include_exif: bool = False) -> Dict[str, Any]:
|
|
341
|
+
"""
|
|
342
|
+
Retrieve metadata (size, mode, EXIF) from an image.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
file_path (str): Path to the image file.
|
|
346
|
+
include_exif (bool): Whether to include EXIF data.
|
|
347
|
+
|
|
348
|
+
Returns:
|
|
349
|
+
Dict[str, Any]: Image metadata {'size': tuple, 'mode': str, 'exif': Dict}.
|
|
350
|
+
|
|
351
|
+
Raises:
|
|
352
|
+
FileOperationError: If metadata extraction fails.
|
|
353
|
+
"""
|
|
354
|
+
# Validate input using schema
|
|
355
|
+
validated_input = MetadataSchema(file_path=file_path, include_exif=include_exif)
|
|
356
|
+
|
|
357
|
+
try:
|
|
358
|
+
with Image.open(validated_input.file_path) as img:
|
|
359
|
+
img.load()
|
|
360
|
+
info = {"size": img.size, "mode": img.mode}
|
|
361
|
+
if include_exif:
|
|
362
|
+
exif = {}
|
|
363
|
+
raw = img._getexif() or {}
|
|
364
|
+
for tag, val in raw.items():
|
|
365
|
+
decoded = ExifTags.TAGS.get(tag, tag)
|
|
366
|
+
exif[decoded] = val
|
|
367
|
+
info["exif"] = exif
|
|
368
|
+
return info
|
|
369
|
+
except Exception as e:
|
|
370
|
+
raise FileOperationError(f"metadata: Failed to process '{file_path}': {e}")
|
|
371
|
+
|
|
372
|
+
def resize(self, file_path: str, output_path: str, width: int, height: int) -> Dict[str, Any]:
|
|
373
|
+
"""
|
|
374
|
+
Resize an image to specified dimensions and save to output path.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
file_path (str): Path to the image file.
|
|
378
|
+
output_path (str): Path to save the resized image.
|
|
379
|
+
width (int): Target width.
|
|
380
|
+
height (int): Target height.
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Dict[str, Any]: Status with output path {'success': bool, 'output_path': str}.
|
|
384
|
+
|
|
385
|
+
Raises:
|
|
386
|
+
FileOperationError: If resizing fails.
|
|
387
|
+
"""
|
|
388
|
+
# Validate input using schema
|
|
389
|
+
validated_input = ResizeSchema(
|
|
390
|
+
file_path=file_path,
|
|
391
|
+
output_path=output_path,
|
|
392
|
+
width=width,
|
|
393
|
+
height=height,
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
with Image.open(validated_input.file_path) as img:
|
|
398
|
+
img = img.resize((width, height), Image.Resampling.LANCZOS)
|
|
399
|
+
img.save(validated_input.output_path)
|
|
400
|
+
return {
|
|
401
|
+
"success": True,
|
|
402
|
+
"output_path": validated_input.output_path,
|
|
403
|
+
}
|
|
404
|
+
except Exception as e:
|
|
405
|
+
raise FileOperationError(
|
|
406
|
+
f"resize: Failed to process '{file_path}' (output_path: {output_path}): {e}"
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
def filter(self, file_path: str, output_path: str, filter_type: str) -> Dict[str, Any]:
|
|
410
|
+
"""
|
|
411
|
+
Apply a filter (blur, sharpen, edge_enhance) to an image and save to output path.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
file_path (str): Path to the image file.
|
|
415
|
+
output_path (str): Path to save the filtered image.
|
|
416
|
+
filter_type (str): Filter type ('blur', 'sharpen', 'edge_enhance').
|
|
417
|
+
|
|
418
|
+
Returns:
|
|
419
|
+
Dict[str, Any]: Status with output path {'success': bool, 'output_path': str}.
|
|
420
|
+
|
|
421
|
+
Raises:
|
|
422
|
+
FileOperationError: If filtering fails.
|
|
423
|
+
"""
|
|
424
|
+
# Validate input using schema
|
|
425
|
+
validated_input = FilterSchema(
|
|
426
|
+
file_path=file_path,
|
|
427
|
+
output_path=output_path,
|
|
428
|
+
filter_type=filter_type,
|
|
429
|
+
)
|
|
430
|
+
|
|
431
|
+
try:
|
|
432
|
+
filter_map = {
|
|
433
|
+
"blur": ImageFilter.BLUR,
|
|
434
|
+
"sharpen": ImageFilter.SHARPEN,
|
|
435
|
+
"edge_enhance": ImageFilter.EDGE_ENHANCE,
|
|
436
|
+
}
|
|
437
|
+
with Image.open(validated_input.file_path) as img:
|
|
438
|
+
img = img.filter(filter_map[filter_type])
|
|
439
|
+
img.save(validated_input.output_path)
|
|
440
|
+
return {
|
|
441
|
+
"success": True,
|
|
442
|
+
"output_path": validated_input.output_path,
|
|
443
|
+
}
|
|
444
|
+
except Exception as e:
|
|
445
|
+
raise FileOperationError(
|
|
446
|
+
f"filter: Failed to process '{file_path}' (output_path: {output_path}, filter_type: {filter_type}): {e}"
|
|
447
|
+
)
|