aiecs 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (90) hide show
  1. aiecs/__init__.py +75 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +295 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +341 -0
  7. aiecs/config/__init__.py +15 -0
  8. aiecs/config/config.py +117 -0
  9. aiecs/config/registry.py +19 -0
  10. aiecs/core/__init__.py +46 -0
  11. aiecs/core/interface/__init__.py +34 -0
  12. aiecs/core/interface/execution_interface.py +150 -0
  13. aiecs/core/interface/storage_interface.py +214 -0
  14. aiecs/domain/__init__.py +20 -0
  15. aiecs/domain/context/__init__.py +28 -0
  16. aiecs/domain/context/content_engine.py +982 -0
  17. aiecs/domain/context/conversation_models.py +306 -0
  18. aiecs/domain/execution/__init__.py +12 -0
  19. aiecs/domain/execution/model.py +49 -0
  20. aiecs/domain/task/__init__.py +13 -0
  21. aiecs/domain/task/dsl_processor.py +460 -0
  22. aiecs/domain/task/model.py +50 -0
  23. aiecs/domain/task/task_context.py +257 -0
  24. aiecs/infrastructure/__init__.py +26 -0
  25. aiecs/infrastructure/messaging/__init__.py +13 -0
  26. aiecs/infrastructure/messaging/celery_task_manager.py +341 -0
  27. aiecs/infrastructure/messaging/websocket_manager.py +289 -0
  28. aiecs/infrastructure/monitoring/__init__.py +12 -0
  29. aiecs/infrastructure/monitoring/executor_metrics.py +138 -0
  30. aiecs/infrastructure/monitoring/structured_logger.py +50 -0
  31. aiecs/infrastructure/monitoring/tracing_manager.py +376 -0
  32. aiecs/infrastructure/persistence/__init__.py +12 -0
  33. aiecs/infrastructure/persistence/database_manager.py +286 -0
  34. aiecs/infrastructure/persistence/file_storage.py +671 -0
  35. aiecs/infrastructure/persistence/redis_client.py +162 -0
  36. aiecs/llm/__init__.py +54 -0
  37. aiecs/llm/base_client.py +99 -0
  38. aiecs/llm/client_factory.py +339 -0
  39. aiecs/llm/custom_callbacks.py +228 -0
  40. aiecs/llm/openai_client.py +125 -0
  41. aiecs/llm/vertex_client.py +186 -0
  42. aiecs/llm/xai_client.py +184 -0
  43. aiecs/main.py +351 -0
  44. aiecs/scripts/DEPENDENCY_SYSTEM_SUMMARY.md +241 -0
  45. aiecs/scripts/README_DEPENDENCY_CHECKER.md +309 -0
  46. aiecs/scripts/README_WEASEL_PATCH.md +126 -0
  47. aiecs/scripts/__init__.py +3 -0
  48. aiecs/scripts/dependency_checker.py +825 -0
  49. aiecs/scripts/dependency_fixer.py +348 -0
  50. aiecs/scripts/download_nlp_data.py +348 -0
  51. aiecs/scripts/fix_weasel_validator.py +121 -0
  52. aiecs/scripts/fix_weasel_validator.sh +82 -0
  53. aiecs/scripts/patch_weasel_library.sh +188 -0
  54. aiecs/scripts/quick_dependency_check.py +269 -0
  55. aiecs/scripts/run_weasel_patch.sh +41 -0
  56. aiecs/scripts/setup_nlp_data.sh +217 -0
  57. aiecs/tasks/__init__.py +2 -0
  58. aiecs/tasks/worker.py +111 -0
  59. aiecs/tools/__init__.py +196 -0
  60. aiecs/tools/base_tool.py +202 -0
  61. aiecs/tools/langchain_adapter.py +361 -0
  62. aiecs/tools/task_tools/__init__.py +82 -0
  63. aiecs/tools/task_tools/chart_tool.py +704 -0
  64. aiecs/tools/task_tools/classfire_tool.py +901 -0
  65. aiecs/tools/task_tools/image_tool.py +397 -0
  66. aiecs/tools/task_tools/office_tool.py +600 -0
  67. aiecs/tools/task_tools/pandas_tool.py +565 -0
  68. aiecs/tools/task_tools/report_tool.py +499 -0
  69. aiecs/tools/task_tools/research_tool.py +363 -0
  70. aiecs/tools/task_tools/scraper_tool.py +548 -0
  71. aiecs/tools/task_tools/search_api.py +7 -0
  72. aiecs/tools/task_tools/stats_tool.py +513 -0
  73. aiecs/tools/temp_file_manager.py +126 -0
  74. aiecs/tools/tool_executor/__init__.py +35 -0
  75. aiecs/tools/tool_executor/tool_executor.py +518 -0
  76. aiecs/utils/LLM_output_structor.py +409 -0
  77. aiecs/utils/__init__.py +23 -0
  78. aiecs/utils/base_callback.py +50 -0
  79. aiecs/utils/execution_utils.py +158 -0
  80. aiecs/utils/logging.py +1 -0
  81. aiecs/utils/prompt_loader.py +13 -0
  82. aiecs/utils/token_usage_repository.py +279 -0
  83. aiecs/ws/__init__.py +0 -0
  84. aiecs/ws/socket_server.py +41 -0
  85. aiecs-1.0.0.dist-info/METADATA +610 -0
  86. aiecs-1.0.0.dist-info/RECORD +90 -0
  87. aiecs-1.0.0.dist-info/WHEEL +5 -0
  88. aiecs-1.0.0.dist-info/entry_points.txt +7 -0
  89. aiecs-1.0.0.dist-info/licenses/LICENSE +225 -0
  90. aiecs-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,397 @@
1
+ import os
2
+ import logging
3
+ import subprocess
4
+ import uuid
5
+ import tempfile
6
+ from typing import Dict, Any, List, Optional
7
+ from dataclasses import dataclass
8
+ from dataclasses import field
9
+
10
+ from pydantic import BaseModel, ValidationError, field_validator, ConfigDict
11
+ from pydantic_settings import BaseSettings
12
+ from PIL import Image, ExifTags, ImageFilter
13
+ from queue import Queue
14
+
15
+ from aiecs.tools.base_tool import BaseTool
16
+ from aiecs.tools import register_tool
17
+
18
+ # Configuration for ImageTool
19
+ class ImageSettings(BaseSettings):
20
+ """
21
+ Configuration for ImageTool.
22
+
23
+ Attributes:
24
+ max_file_size_mb (int): Maximum file size in megabytes.
25
+ allowed_extensions (List[str]): Allowed image file extensions.
26
+ tesseract_pool_size (int): Number of Tesseract processes for OCR.
27
+ env_prefix (str): Environment variable prefix for settings.
28
+ """
29
+ max_file_size_mb: int = 50
30
+ allowed_extensions: List[str] = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif']
31
+ tesseract_pool_size: int = 2
32
+ env_prefix: str = 'IMAGE_TOOL_'
33
+
34
+ model_config = ConfigDict(env_prefix='IMAGE_TOOL_')
35
+
36
+ # Exceptions
37
+ class ImageToolError(Exception):
38
+ """Base exception for ImageTool errors."""
39
+ pass
40
+
41
+ class FileOperationError(ImageToolError):
42
+ """Raised when file operations fail."""
43
+ pass
44
+
45
+ class SecurityError(ImageToolError):
46
+ """Raised for security-related issues."""
47
+ pass
48
+
49
+ # Base schema for common fields
50
+ class BaseFileSchema(BaseModel):
51
+ file_path: str
52
+ _mtime: Optional[float] = None # Internal use for cache
53
+
54
+ @field_validator('file_path')
55
+ @classmethod
56
+ def validate_file_path(cls, v: str) -> str:
57
+ """Validate file path for existence, size, and extension."""
58
+ settings = ImageSettings()
59
+ abs_path = os.path.abspath(os.path.normpath(v))
60
+ ext = os.path.splitext(abs_path)[1].lower()
61
+ if ext not in settings.allowed_extensions:
62
+ raise SecurityError(f"Extension '{ext}' not allowed, expected {settings.allowed_extensions}")
63
+ if not os.path.isfile(abs_path):
64
+ raise FileOperationError(f"File not found: {abs_path}")
65
+ size_mb = os.path.getsize(abs_path) / (1024 * 1024)
66
+ if size_mb > settings.max_file_size_mb:
67
+ raise FileOperationError(f"File too large: {size_mb:.1f}MB, max {settings.max_file_size_mb}MB")
68
+ return abs_path
69
+
70
+ # Schemas for operations
71
+ class LoadSchema(BaseFileSchema):
72
+ """Schema for load operation."""
73
+ pass
74
+
75
+ class OCRSchema(BaseFileSchema):
76
+ """Schema for OCR operation."""
77
+ lang: Optional[str] = None
78
+
79
+
80
+ class MetadataSchema(BaseFileSchema):
81
+ """Schema for metadata extraction operation."""
82
+ include_exif: bool = False
83
+
84
+ class ResizeSchema(BaseFileSchema):
85
+ """Schema for resize operation."""
86
+ output_path: str
87
+ width: int
88
+ height: int
89
+
90
+ @field_validator('output_path')
91
+ @classmethod
92
+ def validate_output_path(cls, v: str) -> str:
93
+ """Validate output path for existence and extension."""
94
+ settings = ImageSettings()
95
+ abs_path = os.path.abspath(os.path.normpath(v))
96
+ ext = os.path.splitext(abs_path)[1].lower()
97
+ if ext not in settings.allowed_extensions:
98
+ raise SecurityError(f"Output extension '{ext}' not allowed, expected {settings.allowed_extensions}")
99
+ if os.path.exists(abs_path):
100
+ raise FileOperationError(f"Output file already exists: {abs_path}")
101
+ return abs_path
102
+
103
+ class FilterSchema(BaseFileSchema):
104
+ """Schema for filter operation."""
105
+ output_path: str
106
+ filter_type: str = 'blur'
107
+
108
+ @field_validator('filter_type')
109
+ @classmethod
110
+ def validate_filter_type(cls, v: str) -> str:
111
+ """Validate filter type."""
112
+ valid_filters = ['blur', 'sharpen', 'edge_enhance']
113
+ if v not in valid_filters:
114
+ raise ValueError(f"Invalid filter_type '{v}', expected {valid_filters}")
115
+ return v
116
+
117
+ @field_validator('output_path')
118
+ @classmethod
119
+ def validate_output_path(cls, v: str) -> str:
120
+ """Validate output path for existence and extension."""
121
+ settings = ImageSettings()
122
+ abs_path = os.path.abspath(os.path.normpath(v))
123
+ ext = os.path.splitext(abs_path)[1].lower()
124
+ if ext not in settings.allowed_extensions:
125
+ raise SecurityError(f"Output extension '{ext}' not allowed, expected {settings.allowed_extensions}")
126
+ if os.path.exists(abs_path):
127
+ raise FileOperationError(f"Output file already exists: {abs_path}")
128
+ return abs_path
129
+
130
+ # Tesseract process manager
131
+ @dataclass
132
+ class TesseractManager:
133
+ """Manages a pool of Tesseract processes for OCR."""
134
+ pool_size: int
135
+ processes: List[subprocess.Popen] = field(default_factory=list)
136
+ queue: Queue = field(default_factory=lambda: Queue())
137
+
138
+ def initialize(self):
139
+ """Initialize Tesseract process pool."""
140
+ for _ in range(self.pool_size):
141
+ try:
142
+ proc = subprocess.Popen(
143
+ ['tesseract', '--oem', '1', '-', 'stdout', '-l', 'eng'],
144
+ stdin=subprocess.PIPE,
145
+ stdout=subprocess.PIPE,
146
+ stderr=subprocess.PIPE,
147
+ text=True
148
+ )
149
+ self.queue.put(proc)
150
+ self.processes.append(proc)
151
+ except FileNotFoundError:
152
+ logging.getLogger(__name__).warning("Tesseract not found; OCR will be disabled")
153
+ break
154
+
155
+ def get_process(self) -> Optional[subprocess.Popen]:
156
+ """Get an available Tesseract process."""
157
+ if self.queue.empty():
158
+ return None
159
+ return self.queue.get()
160
+
161
+ def return_process(self, proc: subprocess.Popen):
162
+ """Return a Tesseract process to the pool."""
163
+ self.queue.put(proc)
164
+
165
+ def cleanup(self):
166
+ """Clean up all Tesseract processes."""
167
+ for proc in self.processes:
168
+ try:
169
+ proc.terminate()
170
+ proc.wait(timeout=1)
171
+ except (subprocess.TimeoutExpired, OSError) as e:
172
+ logging.getLogger(__name__).warning(f"Error terminating Tesseract process: {e}")
173
+
174
+ @register_tool('image')
175
+ class ImageTool(BaseTool):
176
+ """
177
+ Image processing tool supporting:
178
+ - load: Load image and return size and mode.
179
+ - ocr: Extract text using a pooled Tesseract process.
180
+ - metadata: Retrieve EXIF and basic image info.
181
+ - resize: Resize image to specified dimensions.
182
+ - filter: Apply filters (blur, sharpen, edge_enhance).
183
+
184
+ Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
185
+ """
186
+ def __init__(self, config: Dict[Any, Any] = None):
187
+ """
188
+ Initialize ImageTool with settings and resources.
189
+
190
+ Args:
191
+ config (Dict, optional): Configuration overrides for ImageSettings.
192
+
193
+ Raises:
194
+ ValueError: If config contains invalid settings.
195
+ """
196
+ super().__init__(config)
197
+ self.settings = ImageSettings()
198
+ if config:
199
+ try:
200
+ self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
201
+ except ValidationError as e:
202
+ raise ValueError(f"Invalid configuration: {e}")
203
+ self.logger = logging.getLogger(__name__)
204
+ if not self.logger.handlers:
205
+ handler = logging.StreamHandler()
206
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
207
+ self.logger.addHandler(handler)
208
+ self.logger.setLevel(logging.INFO)
209
+ # Initialize Tesseract manager
210
+ self._tesseract_manager = TesseractManager(self.settings.tesseract_pool_size)
211
+ self._tesseract_manager.initialize()
212
+
213
+ def __del__(self):
214
+ """Clean up Tesseract processes on destruction."""
215
+ self._tesseract_manager.cleanup()
216
+
217
+ def update_settings(self, config: Dict):
218
+ """
219
+ Update configuration settings dynamically.
220
+
221
+ Args:
222
+ config (Dict): New settings to apply.
223
+
224
+ Raises:
225
+ ValueError: If config contains invalid settings.
226
+ """
227
+ try:
228
+ self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
229
+ # Reinitialize Tesseract if pool size changes
230
+ if 'tesseract_pool_size' in config:
231
+ self._tesseract_manager.cleanup()
232
+ self._tesseract_manager = TesseractManager(self.settings.tesseract_pool_size)
233
+ self._tesseract_manager.initialize()
234
+ except ValidationError as e:
235
+ raise ValueError(f"Invalid configuration: {e}")
236
+
237
+ def load(self, file_path: str) -> Dict[str, Any]:
238
+ """
239
+ Load an image and return its size and mode.
240
+
241
+ Args:
242
+ file_path (str): Path to the image file.
243
+
244
+ Returns:
245
+ Dict[str, Any]: Image info {'size': (width, height), 'mode': str}.
246
+
247
+ Raises:
248
+ FileOperationError: If file is invalid or inaccessible.
249
+ """
250
+ # Validate input using schema
251
+ validated_input = LoadSchema(file_path=file_path)
252
+
253
+ try:
254
+ with Image.open(validated_input.file_path) as img:
255
+ img.load()
256
+ return {'size': img.size, 'mode': img.mode}
257
+ except Exception as e:
258
+ raise FileOperationError(f"load: Failed to load image '{file_path}': {e}")
259
+
260
+ def ocr(self, file_path: str, lang: Optional[str] = None) -> str:
261
+ """
262
+ Extract text from an image using a pooled Tesseract process.
263
+
264
+ Args:
265
+ file_path (str): Path to the image file.
266
+ lang (Optional[str]): Language code for OCR (e.g., 'eng').
267
+
268
+ Returns:
269
+ str: Extracted text.
270
+
271
+ Raises:
272
+ FileOperationError: If OCR fails or Tesseract is unavailable.
273
+ """
274
+ # Validate input using schema
275
+ validated_input = OCRSchema(file_path=file_path, lang=lang)
276
+
277
+ proc = self._tesseract_manager.get_process()
278
+ if not proc:
279
+ raise FileOperationError(f"ocr: No Tesseract processes available (lang: {lang or 'eng'})")
280
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
281
+ temp_path = temp_file.name
282
+ try:
283
+ img = Image.open(validated_input.file_path).convert('L').filter(ImageFilter.SHARPEN)
284
+ img.save(temp_path)
285
+ stdout, stderr = proc.communicate(input=temp_path, timeout=30)
286
+ if proc.returncode != 0:
287
+ raise FileOperationError(f"ocr: Tesseract failed for '{file_path}' (lang: {lang or 'eng'}): {stderr}")
288
+ return stdout.strip()
289
+ except Exception as e:
290
+ raise FileOperationError(f"ocr: Failed to process '{file_path}' (lang: {lang or 'eng'}): {e}")
291
+ finally:
292
+ self._tesseract_manager.return_process(proc)
293
+ if os.path.exists(temp_path):
294
+ try:
295
+ os.unlink(temp_path)
296
+ except Exception as e:
297
+ self.logger.warning(f"Failed to remove temporary file {temp_path}: {e}")
298
+
299
+
300
+ def metadata(self, file_path: str, include_exif: bool = False) -> Dict[str, Any]:
301
+ """
302
+ Retrieve metadata (size, mode, EXIF) from an image.
303
+
304
+ Args:
305
+ file_path (str): Path to the image file.
306
+ include_exif (bool): Whether to include EXIF data.
307
+
308
+ Returns:
309
+ Dict[str, Any]: Image metadata {'size': tuple, 'mode': str, 'exif': Dict}.
310
+
311
+ Raises:
312
+ FileOperationError: If metadata extraction fails.
313
+ """
314
+ # Validate input using schema
315
+ validated_input = MetadataSchema(file_path=file_path, include_exif=include_exif)
316
+
317
+ try:
318
+ with Image.open(validated_input.file_path) as img:
319
+ img.load()
320
+ info = {'size': img.size, 'mode': img.mode}
321
+ if include_exif:
322
+ exif = {}
323
+ raw = img._getexif() or {}
324
+ for tag, val in raw.items():
325
+ decoded = ExifTags.TAGS.get(tag, tag)
326
+ exif[decoded] = val
327
+ info['exif'] = exif
328
+ return info
329
+ except Exception as e:
330
+ raise FileOperationError(f"metadata: Failed to process '{file_path}': {e}")
331
+
332
+ def resize(self, file_path: str, output_path: str, width: int, height: int) -> Dict[str, Any]:
333
+ """
334
+ Resize an image to specified dimensions and save to output path.
335
+
336
+ Args:
337
+ file_path (str): Path to the image file.
338
+ output_path (str): Path to save the resized image.
339
+ width (int): Target width.
340
+ height (int): Target height.
341
+
342
+ Returns:
343
+ Dict[str, Any]: Status with output path {'success': bool, 'output_path': str}.
344
+
345
+ Raises:
346
+ FileOperationError: If resizing fails.
347
+ """
348
+ # Validate input using schema
349
+ validated_input = ResizeSchema(
350
+ file_path=file_path,
351
+ output_path=output_path,
352
+ width=width,
353
+ height=height
354
+ )
355
+
356
+ try:
357
+ with Image.open(validated_input.file_path) as img:
358
+ img = img.resize((width, height), Image.Resampling.LANCZOS)
359
+ img.save(validated_input.output_path)
360
+ return {'success': True, 'output_path': validated_input.output_path}
361
+ except Exception as e:
362
+ raise FileOperationError(f"resize: Failed to process '{file_path}' (output_path: {output_path}): {e}")
363
+
364
+ def filter(self, file_path: str, output_path: str, filter_type: str) -> Dict[str, Any]:
365
+ """
366
+ Apply a filter (blur, sharpen, edge_enhance) to an image and save to output path.
367
+
368
+ Args:
369
+ file_path (str): Path to the image file.
370
+ output_path (str): Path to save the filtered image.
371
+ filter_type (str): Filter type ('blur', 'sharpen', 'edge_enhance').
372
+
373
+ Returns:
374
+ Dict[str, Any]: Status with output path {'success': bool, 'output_path': str}.
375
+
376
+ Raises:
377
+ FileOperationError: If filtering fails.
378
+ """
379
+ # Validate input using schema
380
+ validated_input = FilterSchema(
381
+ file_path=file_path,
382
+ output_path=output_path,
383
+ filter_type=filter_type
384
+ )
385
+
386
+ try:
387
+ filter_map = {
388
+ 'blur': ImageFilter.BLUR,
389
+ 'sharpen': ImageFilter.SHARPEN,
390
+ 'edge_enhance': ImageFilter.EDGE_ENHANCE
391
+ }
392
+ with Image.open(validated_input.file_path) as img:
393
+ img = img.filter(filter_map[filter_type])
394
+ img.save(validated_input.output_path)
395
+ return {'success': True, 'output_path': validated_input.output_path}
396
+ except Exception as e:
397
+ raise FileOperationError(f"filter: Failed to process '{file_path}' (output_path: {output_path}, filter_type: {filter_type}): {e}")