aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,447 @@
1
+ import os
2
+ import logging
3
+ import subprocess
4
+ import tempfile
5
+ from typing import Dict, Any, List, Optional
6
+ from dataclasses import dataclass
7
+ from dataclasses import field
8
+
9
+ from pydantic import (
10
+ BaseModel,
11
+ ValidationError,
12
+ field_validator,
13
+ ConfigDict,
14
+ Field,
15
+ )
16
+ from PIL import Image, ExifTags, ImageFilter
17
+ from queue import Queue
18
+
19
+ from aiecs.tools.base_tool import BaseTool
20
+ from aiecs.tools import register_tool
21
+
22
+ # Module-level default configuration for validators
23
+ _DEFAULT_MAX_FILE_SIZE_MB = 50
24
+ _DEFAULT_ALLOWED_EXTENSIONS = [
25
+ ".jpg",
26
+ ".jpeg",
27
+ ".png",
28
+ ".bmp",
29
+ ".tiff",
30
+ ".gif",
31
+ ]
32
+
33
+ # Exceptions
34
+
35
+
36
+ class ImageToolError(Exception):
37
+ """Base exception for ImageTool errors."""
38
+
39
+
40
+ class FileOperationError(ImageToolError):
41
+ """Raised when file operations fail."""
42
+
43
+
44
+ class SecurityError(ImageToolError):
45
+ """Raised for security-related issues."""
46
+
47
+
48
+ # Base schema for common fields
49
+
50
+
51
+ class BaseFileSchema(BaseModel):
52
+ file_path: str
53
+ _mtime: Optional[float] = None # Internal use for cache
54
+
55
+ @field_validator("file_path")
56
+ @classmethod
57
+ def validate_file_path(cls, v: str) -> str:
58
+ """Validate file path for existence, size, and extension."""
59
+ abs_path = os.path.abspath(os.path.normpath(v))
60
+ ext = os.path.splitext(abs_path)[1].lower()
61
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
62
+ raise SecurityError(
63
+ f"Extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}"
64
+ )
65
+ if not os.path.isfile(abs_path):
66
+ raise FileOperationError(f"File not found: {abs_path}")
67
+ size_mb = os.path.getsize(abs_path) / (1024 * 1024)
68
+ if size_mb > _DEFAULT_MAX_FILE_SIZE_MB:
69
+ raise FileOperationError(
70
+ f"File too large: {size_mb:.1f}MB, max {_DEFAULT_MAX_FILE_SIZE_MB}MB"
71
+ )
72
+ return abs_path
73
+
74
+
75
+ # Schemas for operations
76
+
77
+
78
+ class LoadSchema(BaseFileSchema):
79
+ """Schema for load operation."""
80
+
81
+
82
+ class OCRSchema(BaseFileSchema):
83
+ """Schema for OCR operation."""
84
+
85
+ lang: Optional[str] = None
86
+
87
+
88
+ class MetadataSchema(BaseFileSchema):
89
+ """Schema for metadata extraction operation."""
90
+
91
+ include_exif: bool = False
92
+
93
+
94
+ class ResizeSchema(BaseFileSchema):
95
+ """Schema for resize operation."""
96
+
97
+ output_path: str
98
+ width: int
99
+ height: int
100
+
101
+ @field_validator("output_path")
102
+ @classmethod
103
+ def validate_output_path(cls, v: str) -> str:
104
+ """Validate output path for existence and extension."""
105
+ abs_path = os.path.abspath(os.path.normpath(v))
106
+ ext = os.path.splitext(abs_path)[1].lower()
107
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
108
+ raise SecurityError(
109
+ f"Output extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}"
110
+ )
111
+ if os.path.exists(abs_path):
112
+ raise FileOperationError(f"Output file already exists: {abs_path}")
113
+ return abs_path
114
+
115
+
116
+ class FilterSchema(BaseFileSchema):
117
+ """Schema for filter operation."""
118
+
119
+ output_path: str
120
+ filter_type: str = "blur"
121
+
122
+ @field_validator("filter_type")
123
+ @classmethod
124
+ def validate_filter_type(cls, v: str) -> str:
125
+ """Validate filter type."""
126
+ valid_filters = ["blur", "sharpen", "edge_enhance"]
127
+ if v not in valid_filters:
128
+ raise ValueError(f"Invalid filter_type '{v}', expected {valid_filters}")
129
+ return v
130
+
131
+ @field_validator("output_path")
132
+ @classmethod
133
+ def validate_output_path(cls, v: str) -> str:
134
+ """Validate output path for existence and extension."""
135
+ abs_path = os.path.abspath(os.path.normpath(v))
136
+ ext = os.path.splitext(abs_path)[1].lower()
137
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
138
+ raise SecurityError(
139
+ f"Output extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}"
140
+ )
141
+ if os.path.exists(abs_path):
142
+ raise FileOperationError(f"Output file already exists: {abs_path}")
143
+ return abs_path
144
+
145
+
146
+ # Tesseract process manager
147
+
148
+
149
+ @dataclass
150
+ class TesseractManager:
151
+ """Manages a pool of Tesseract processes for OCR."""
152
+
153
+ pool_size: int
154
+ processes: List[subprocess.Popen] = field(default_factory=list)
155
+ queue: Queue = field(default_factory=lambda: Queue())
156
+
157
+ def initialize(self):
158
+ """Initialize Tesseract process pool."""
159
+ for _ in range(self.pool_size):
160
+ try:
161
+ proc = subprocess.Popen(
162
+ ["tesseract", "--oem", "1", "-", "stdout", "-l", "eng"],
163
+ stdin=subprocess.PIPE,
164
+ stdout=subprocess.PIPE,
165
+ stderr=subprocess.PIPE,
166
+ text=True,
167
+ )
168
+ self.queue.put(proc)
169
+ self.processes.append(proc)
170
+ except FileNotFoundError:
171
+ logging.getLogger(__name__).warning("Tesseract not found; OCR will be disabled")
172
+ break
173
+
174
+ def get_process(self) -> Optional[subprocess.Popen]:
175
+ """Get an available Tesseract process."""
176
+ if self.queue.empty():
177
+ return None
178
+ return self.queue.get()
179
+
180
+ def return_process(self, proc: subprocess.Popen):
181
+ """Return a Tesseract process to the pool."""
182
+ self.queue.put(proc)
183
+
184
+ def cleanup(self):
185
+ """Clean up all Tesseract processes."""
186
+ for proc in self.processes:
187
+ try:
188
+ proc.terminate()
189
+ proc.wait(timeout=1)
190
+ except (subprocess.TimeoutExpired, OSError) as e:
191
+ logging.getLogger(__name__).warning(f"Error terminating Tesseract process: {e}")
192
+
193
+
194
+ @register_tool("image")
195
+ class ImageTool(BaseTool):
196
+ """
197
+ Image processing tool supporting:
198
+ - load: Load image and return size and mode.
199
+ - ocr: Extract text using a pooled Tesseract process.
200
+ - metadata: Retrieve EXIF and basic image info.
201
+ - resize: Resize image to specified dimensions.
202
+ - filter: Apply filters (blur, sharpen, edge_enhance).
203
+
204
+ Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
205
+ """
206
+
207
+ # Configuration schema
208
+ class Config(BaseModel):
209
+ """Configuration for the image tool"""
210
+
211
+ model_config = ConfigDict(env_prefix="IMAGE_TOOL_")
212
+
213
+ max_file_size_mb: int = Field(default=50, description="Maximum file size in megabytes")
214
+ allowed_extensions: List[str] = Field(
215
+ default=[".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".gif"],
216
+ description="Allowed image file extensions",
217
+ )
218
+ tesseract_pool_size: int = Field(
219
+ default=2, description="Number of Tesseract processes for OCR"
220
+ )
221
+
222
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
223
+ """
224
+ Initialize ImageTool with configuration and resources.
225
+
226
+ Args:
227
+ config (Dict, optional): Configuration overrides for ImageTool.
228
+
229
+ Raises:
230
+ ValueError: If config contains invalid settings.
231
+ """
232
+ super().__init__(config)
233
+
234
+ # Parse configuration
235
+ self.config = self.Config(**(config or {}))
236
+
237
+ self.logger = logging.getLogger(__name__)
238
+ if not self.logger.handlers:
239
+ handler = logging.StreamHandler()
240
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
241
+ self.logger.addHandler(handler)
242
+ self.logger.setLevel(logging.INFO)
243
+
244
+ # Initialize Tesseract manager
245
+ self._tesseract_manager = TesseractManager(self.config.tesseract_pool_size)
246
+ self._tesseract_manager.initialize()
247
+
248
+ def __del__(self):
249
+ """Clean up Tesseract processes on destruction."""
250
+ self._tesseract_manager.cleanup()
251
+
252
+ def update_config(self, config: Dict) -> None:
253
+ """
254
+ Update configuration settings dynamically.
255
+
256
+ Args:
257
+ config (Dict): New settings to apply.
258
+
259
+ Raises:
260
+ ValueError: If config contains invalid settings.
261
+ """
262
+ try:
263
+ self.config = self.Config(**{**self.config.model_dump(), **config})
264
+ # Reinitialize Tesseract if pool size changes
265
+ if "tesseract_pool_size" in config:
266
+ self._tesseract_manager.cleanup()
267
+ self._tesseract_manager = TesseractManager(self.config.tesseract_pool_size)
268
+ self._tesseract_manager.initialize()
269
+ except ValidationError as e:
270
+ raise ValueError(f"Invalid configuration: {e}")
271
+
272
+ def load(self, file_path: str) -> Dict[str, Any]:
273
+ """
274
+ Load an image and return its size and mode.
275
+
276
+ Args:
277
+ file_path (str): Path to the image file.
278
+
279
+ Returns:
280
+ Dict[str, Any]: Image info {'size': (width, height), 'mode': str}.
281
+
282
+ Raises:
283
+ FileOperationError: If file is invalid or inaccessible.
284
+ """
285
+ # Validate input using schema
286
+ validated_input = LoadSchema(file_path=file_path)
287
+
288
+ try:
289
+ with Image.open(validated_input.file_path) as img:
290
+ img.load()
291
+ return {"size": img.size, "mode": img.mode}
292
+ except Exception as e:
293
+ raise FileOperationError(f"load: Failed to load image '{file_path}': {e}")
294
+
295
+ def ocr(self, file_path: str, lang: Optional[str] = None) -> str:
296
+ """
297
+ Extract text from an image using a pooled Tesseract process.
298
+
299
+ Args:
300
+ file_path (str): Path to the image file.
301
+ lang (Optional[str]): Language code for OCR (e.g., 'eng').
302
+
303
+ Returns:
304
+ str: Extracted text.
305
+
306
+ Raises:
307
+ FileOperationError: If OCR fails or Tesseract is unavailable.
308
+ """
309
+ # Validate input using schema
310
+ validated_input = OCRSchema(file_path=file_path, lang=lang)
311
+
312
+ proc = self._tesseract_manager.get_process()
313
+ if not proc:
314
+ raise FileOperationError(
315
+ f"ocr: No Tesseract processes available (lang: {lang or 'eng'})"
316
+ )
317
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
318
+ temp_path = temp_file.name
319
+ try:
320
+ img = Image.open(validated_input.file_path).convert("L").filter(ImageFilter.SHARPEN)
321
+ img.save(temp_path)
322
+ stdout, stderr = proc.communicate(input=temp_path, timeout=30)
323
+ if proc.returncode != 0:
324
+ raise FileOperationError(
325
+ f"ocr: Tesseract failed for '{file_path}' (lang: {lang or 'eng'}): {stderr}"
326
+ )
327
+ return stdout.strip()
328
+ except Exception as e:
329
+ raise FileOperationError(
330
+ f"ocr: Failed to process '{file_path}' (lang: {lang or 'eng'}): {e}"
331
+ )
332
+ finally:
333
+ self._tesseract_manager.return_process(proc)
334
+ if os.path.exists(temp_path):
335
+ try:
336
+ os.unlink(temp_path)
337
+ except Exception as e:
338
+ self.logger.warning(f"Failed to remove temporary file {temp_path}: {e}")
339
+
340
+ def metadata(self, file_path: str, include_exif: bool = False) -> Dict[str, Any]:
341
+ """
342
+ Retrieve metadata (size, mode, EXIF) from an image.
343
+
344
+ Args:
345
+ file_path (str): Path to the image file.
346
+ include_exif (bool): Whether to include EXIF data.
347
+
348
+ Returns:
349
+ Dict[str, Any]: Image metadata {'size': tuple, 'mode': str, 'exif': Dict}.
350
+
351
+ Raises:
352
+ FileOperationError: If metadata extraction fails.
353
+ """
354
+ # Validate input using schema
355
+ validated_input = MetadataSchema(file_path=file_path, include_exif=include_exif)
356
+
357
+ try:
358
+ with Image.open(validated_input.file_path) as img:
359
+ img.load()
360
+ info = {"size": img.size, "mode": img.mode}
361
+ if include_exif:
362
+ exif = {}
363
+ raw = img._getexif() or {}
364
+ for tag, val in raw.items():
365
+ decoded = ExifTags.TAGS.get(tag, tag)
366
+ exif[decoded] = val
367
+ info["exif"] = exif
368
+ return info
369
+ except Exception as e:
370
+ raise FileOperationError(f"metadata: Failed to process '{file_path}': {e}")
371
+
372
+ def resize(self, file_path: str, output_path: str, width: int, height: int) -> Dict[str, Any]:
373
+ """
374
+ Resize an image to specified dimensions and save to output path.
375
+
376
+ Args:
377
+ file_path (str): Path to the image file.
378
+ output_path (str): Path to save the resized image.
379
+ width (int): Target width.
380
+ height (int): Target height.
381
+
382
+ Returns:
383
+ Dict[str, Any]: Status with output path {'success': bool, 'output_path': str}.
384
+
385
+ Raises:
386
+ FileOperationError: If resizing fails.
387
+ """
388
+ # Validate input using schema
389
+ validated_input = ResizeSchema(
390
+ file_path=file_path,
391
+ output_path=output_path,
392
+ width=width,
393
+ height=height,
394
+ )
395
+
396
+ try:
397
+ with Image.open(validated_input.file_path) as img:
398
+ img = img.resize((width, height), Image.Resampling.LANCZOS)
399
+ img.save(validated_input.output_path)
400
+ return {
401
+ "success": True,
402
+ "output_path": validated_input.output_path,
403
+ }
404
+ except Exception as e:
405
+ raise FileOperationError(
406
+ f"resize: Failed to process '{file_path}' (output_path: {output_path}): {e}"
407
+ )
408
+
409
+ def filter(self, file_path: str, output_path: str, filter_type: str) -> Dict[str, Any]:
410
+ """
411
+ Apply a filter (blur, sharpen, edge_enhance) to an image and save to output path.
412
+
413
+ Args:
414
+ file_path (str): Path to the image file.
415
+ output_path (str): Path to save the filtered image.
416
+ filter_type (str): Filter type ('blur', 'sharpen', 'edge_enhance').
417
+
418
+ Returns:
419
+ Dict[str, Any]: Status with output path {'success': bool, 'output_path': str}.
420
+
421
+ Raises:
422
+ FileOperationError: If filtering fails.
423
+ """
424
+ # Validate input using schema
425
+ validated_input = FilterSchema(
426
+ file_path=file_path,
427
+ output_path=output_path,
428
+ filter_type=filter_type,
429
+ )
430
+
431
+ try:
432
+ filter_map = {
433
+ "blur": ImageFilter.BLUR,
434
+ "sharpen": ImageFilter.SHARPEN,
435
+ "edge_enhance": ImageFilter.EDGE_ENHANCE,
436
+ }
437
+ with Image.open(validated_input.file_path) as img:
438
+ img = img.filter(filter_map[filter_type])
439
+ img.save(validated_input.output_path)
440
+ return {
441
+ "success": True,
442
+ "output_path": validated_input.output_path,
443
+ }
444
+ except Exception as e:
445
+ raise FileOperationError(
446
+ f"filter: Failed to process '{file_path}' (output_path: {output_path}, filter_type: {filter_type}): {e}"
447
+ )