aiecs 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (58) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/config/config.py +2 -0
  3. aiecs/domain/__init__.py +95 -0
  4. aiecs/domain/community/__init__.py +159 -0
  5. aiecs/domain/community/agent_adapter.py +516 -0
  6. aiecs/domain/community/analytics.py +465 -0
  7. aiecs/domain/community/collaborative_workflow.py +99 -7
  8. aiecs/domain/community/communication_hub.py +649 -0
  9. aiecs/domain/community/community_builder.py +322 -0
  10. aiecs/domain/community/community_integration.py +365 -12
  11. aiecs/domain/community/community_manager.py +481 -5
  12. aiecs/domain/community/decision_engine.py +459 -13
  13. aiecs/domain/community/exceptions.py +238 -0
  14. aiecs/domain/community/models/__init__.py +36 -0
  15. aiecs/domain/community/resource_manager.py +1 -1
  16. aiecs/domain/community/shared_context_manager.py +621 -0
  17. aiecs/domain/context/context_engine.py +37 -33
  18. aiecs/main.py +2 -2
  19. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  20. aiecs/scripts/aid/__init__.py +15 -0
  21. aiecs/scripts/aid/version_manager.py +224 -0
  22. aiecs/scripts/dependance_check/download_nlp_data.py +1 -0
  23. aiecs/tools/__init__.py +23 -23
  24. aiecs/tools/docs/__init__.py +5 -2
  25. aiecs/tools/docs/ai_document_orchestrator.py +39 -26
  26. aiecs/tools/docs/ai_document_writer_orchestrator.py +61 -38
  27. aiecs/tools/docs/content_insertion_tool.py +48 -28
  28. aiecs/tools/docs/document_creator_tool.py +47 -29
  29. aiecs/tools/docs/document_layout_tool.py +35 -20
  30. aiecs/tools/docs/document_parser_tool.py +56 -36
  31. aiecs/tools/docs/document_writer_tool.py +115 -62
  32. aiecs/tools/schema_generator.py +56 -56
  33. aiecs/tools/statistics/__init__.py +82 -0
  34. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
  35. aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
  36. aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
  37. aiecs/tools/statistics/data_loader_tool.py +518 -0
  38. aiecs/tools/statistics/data_profiler_tool.py +599 -0
  39. aiecs/tools/statistics/data_transformer_tool.py +531 -0
  40. aiecs/tools/statistics/data_visualizer_tool.py +460 -0
  41. aiecs/tools/statistics/model_trainer_tool.py +470 -0
  42. aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
  43. aiecs/tools/task_tools/chart_tool.py +2 -1
  44. aiecs/tools/task_tools/image_tool.py +43 -43
  45. aiecs/tools/task_tools/office_tool.py +39 -36
  46. aiecs/tools/task_tools/pandas_tool.py +37 -33
  47. aiecs/tools/task_tools/report_tool.py +67 -56
  48. aiecs/tools/task_tools/research_tool.py +32 -31
  49. aiecs/tools/task_tools/scraper_tool.py +53 -46
  50. aiecs/tools/task_tools/search_tool.py +1123 -0
  51. aiecs/tools/task_tools/stats_tool.py +20 -15
  52. {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/METADATA +5 -1
  53. {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/RECORD +57 -36
  54. {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/entry_points.txt +1 -0
  55. aiecs/tools/task_tools/search_api.py +0 -7
  56. {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/WHEEL +0 -0
  57. {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/licenses/LICENSE +0 -0
  58. {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,426 @@
1
+ """
2
+ Statistical Analyzer Tool - Advanced statistical analysis and hypothesis testing
3
+
4
+ This tool provides comprehensive statistical analysis with:
5
+ - Descriptive and inferential statistics
6
+ - Hypothesis testing (t-test, ANOVA, chi-square)
7
+ - Regression analysis
8
+ - Time series analysis
9
+ - Correlation and causality analysis
10
+ """
11
+
12
+ import logging
13
+ from typing import Dict, Any, List, Optional, Union
14
+ from enum import Enum
15
+
16
+ import pandas as pd
17
+ import numpy as np
18
+ from scipy import stats as scipy_stats
19
+ from pydantic import BaseModel, Field, ValidationError, ConfigDict
20
+
21
+ from aiecs.tools.base_tool import BaseTool
22
+ from aiecs.tools import register_tool
23
+
24
+
25
+ class AnalysisType(str, Enum):
26
+ """Types of statistical analyses"""
27
+ DESCRIPTIVE = "descriptive"
28
+ T_TEST = "t_test"
29
+ ANOVA = "anova"
30
+ CHI_SQUARE = "chi_square"
31
+ LINEAR_REGRESSION = "linear_regression"
32
+ LOGISTIC_REGRESSION = "logistic_regression"
33
+ CORRELATION = "correlation"
34
+ TIME_SERIES = "time_series"
35
+
36
+
37
+
38
+
39
+ class StatisticalAnalyzerError(Exception):
40
+ """Base exception for StatisticalAnalyzer errors"""
41
+ pass
42
+
43
+
44
+ class AnalysisError(StatisticalAnalyzerError):
45
+ """Raised when analysis fails"""
46
+ pass
47
+
48
+
49
+ @register_tool('statistical_analyzer')
50
+ class StatisticalAnalyzerTool(BaseTool):
51
+ """
52
+ Advanced statistical analysis tool that can:
53
+ 1. Perform hypothesis testing
54
+ 2. Conduct regression analysis
55
+ 3. Analyze time series
56
+ 4. Perform correlation and causal analysis
57
+
58
+ Integrates with stats_tool for core statistical operations.
59
+ """
60
+
61
+ # Configuration schema
62
+ class Config(BaseModel):
63
+ """Configuration for the statistical analyzer tool"""
64
+ model_config = ConfigDict(env_prefix="STATISTICAL_ANALYZER_")
65
+
66
+ significance_level: float = Field(
67
+ default=0.05,
68
+ description="Significance level for hypothesis testing"
69
+ )
70
+ confidence_level: float = Field(
71
+ default=0.95,
72
+ description="Confidence level for statistical intervals"
73
+ )
74
+ enable_effect_size: bool = Field(
75
+ default=True,
76
+ description="Whether to calculate effect sizes in analyses"
77
+ )
78
+
79
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
80
+ """Initialize StatisticalAnalyzerTool with settings"""
81
+ super().__init__(config)
82
+
83
+ # Parse configuration
84
+ self.config = self.Config(**(config or {}))
85
+
86
+ self.logger = logging.getLogger(__name__)
87
+ if not self.logger.handlers:
88
+ handler = logging.StreamHandler()
89
+ handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
90
+ self.logger.addHandler(handler)
91
+ self.logger.setLevel(logging.INFO)
92
+
93
+ self._init_external_tools()
94
+
95
+ def _init_external_tools(self):
96
+ """Initialize external task tools"""
97
+ self.external_tools = {}
98
+
99
+ try:
100
+ from aiecs.tools.task_tools.stats_tool import StatsTool
101
+ self.external_tools['stats'] = StatsTool()
102
+ self.logger.info("StatsTool initialized successfully")
103
+ except ImportError:
104
+ self.logger.warning("StatsTool not available")
105
+ self.external_tools['stats'] = None
106
+
107
+ # Schema definitions
108
+ class AnalyzeSchema(BaseModel):
109
+ """Schema for analyze operation"""
110
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data to analyze")
111
+ analysis_type: AnalysisType = Field(description="Type of analysis to perform")
112
+ variables: Dict[str, Any] = Field(description="Variables specification")
113
+ params: Optional[Dict[str, Any]] = Field(default=None, description="Additional parameters")
114
+
115
+ class TestHypothesisSchema(BaseModel):
116
+ """Schema for test_hypothesis operation"""
117
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for hypothesis testing")
118
+ test_type: str = Field(description="Type of test: t_test, anova, chi_square")
119
+ variables: Dict[str, Any] = Field(description="Variables for testing")
120
+
121
+ class PerformRegressionSchema(BaseModel):
122
+ """Schema for perform_regression operation"""
123
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for regression")
124
+ dependent_var: str = Field(description="Dependent variable")
125
+ independent_vars: List[str] = Field(description="Independent variables")
126
+ regression_type: str = Field(default="linear", description="Type: linear or logistic")
127
+
128
+ class AnalyzeCorrelationSchema(BaseModel):
129
+ """Schema for analyze_correlation operation"""
130
+ data: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(description="Data for correlation analysis")
131
+ variables: Optional[List[str]] = Field(default=None, description="Variables to analyze")
132
+ method: str = Field(default="pearson", description="Correlation method")
133
+
134
+ def analyze(
135
+ self,
136
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
137
+ analysis_type: AnalysisType,
138
+ variables: Dict[str, Any],
139
+ params: Optional[Dict[str, Any]] = None
140
+ ) -> Dict[str, Any]:
141
+ """
142
+ Perform statistical analysis.
143
+
144
+ Args:
145
+ data: Data to analyze
146
+ analysis_type: Type of analysis
147
+ variables: Variables specification (dependent, independent, etc.)
148
+ params: Additional parameters
149
+
150
+ Returns:
151
+ Dict containing analysis results with statistics, p-values, interpretations
152
+ """
153
+ try:
154
+ df = self._to_dataframe(data)
155
+ params = params or {}
156
+
157
+ if analysis_type == AnalysisType.DESCRIPTIVE:
158
+ result = self._descriptive_analysis(df, variables)
159
+ elif analysis_type == AnalysisType.T_TEST:
160
+ result = self._t_test_analysis(df, variables, params)
161
+ elif analysis_type == AnalysisType.ANOVA:
162
+ result = self._anova_analysis(df, variables, params)
163
+ elif analysis_type == AnalysisType.CHI_SQUARE:
164
+ result = self._chi_square_analysis(df, variables, params)
165
+ elif analysis_type == AnalysisType.LINEAR_REGRESSION:
166
+ result = self._linear_regression_analysis(df, variables, params)
167
+ elif analysis_type == AnalysisType.CORRELATION:
168
+ result = self._correlation_analysis(df, variables, params)
169
+ else:
170
+ raise AnalysisError(f"Unsupported analysis type: {analysis_type}")
171
+
172
+ result['analysis_type'] = analysis_type.value
173
+ return result
174
+
175
+ except Exception as e:
176
+ self.logger.error(f"Error in analysis: {e}")
177
+ raise AnalysisError(f"Analysis failed: {e}")
178
+
179
+ def test_hypothesis(
180
+ self,
181
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
182
+ test_type: str,
183
+ variables: Dict[str, Any]
184
+ ) -> Dict[str, Any]:
185
+ """Perform hypothesis testing"""
186
+ try:
187
+ df = self._to_dataframe(data)
188
+
189
+ if test_type == "t_test":
190
+ return self._t_test_analysis(df, variables, {})
191
+ elif test_type == "anova":
192
+ return self._anova_analysis(df, variables, {})
193
+ elif test_type == "chi_square":
194
+ return self._chi_square_analysis(df, variables, {})
195
+ else:
196
+ raise AnalysisError(f"Unsupported test type: {test_type}")
197
+
198
+ except Exception as e:
199
+ self.logger.error(f"Error in hypothesis testing: {e}")
200
+ raise AnalysisError(f"Hypothesis testing failed: {e}")
201
+
202
+ def perform_regression(
203
+ self,
204
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
205
+ dependent_var: str,
206
+ independent_vars: List[str],
207
+ regression_type: str = "linear"
208
+ ) -> Dict[str, Any]:
209
+ """Perform regression analysis"""
210
+ try:
211
+ df = self._to_dataframe(data)
212
+ variables = {
213
+ 'dependent': dependent_var,
214
+ 'independent': independent_vars
215
+ }
216
+
217
+ if regression_type == "linear":
218
+ return self._linear_regression_analysis(df, variables, {})
219
+ else:
220
+ raise AnalysisError(f"Unsupported regression type: {regression_type}")
221
+
222
+ except Exception as e:
223
+ self.logger.error(f"Error in regression: {e}")
224
+ raise AnalysisError(f"Regression failed: {e}")
225
+
226
+ def analyze_correlation(
227
+ self,
228
+ data: Union[Dict[str, Any], List[Dict[str, Any]], pd.DataFrame],
229
+ variables: Optional[List[str]] = None,
230
+ method: str = "pearson"
231
+ ) -> Dict[str, Any]:
232
+ """Perform correlation analysis"""
233
+ try:
234
+ df = self._to_dataframe(data)
235
+ var_dict = {'variables': variables} if variables else {}
236
+ return self._correlation_analysis(df, var_dict, {'method': method})
237
+
238
+ except Exception as e:
239
+ self.logger.error(f"Error in correlation analysis: {e}")
240
+ raise AnalysisError(f"Correlation analysis failed: {e}")
241
+
242
+ # Internal analysis methods
243
+
244
+ def _to_dataframe(self, data: Union[Dict, List, pd.DataFrame]) -> pd.DataFrame:
245
+ """Convert data to DataFrame"""
246
+ if isinstance(data, pd.DataFrame):
247
+ return data
248
+ elif isinstance(data, list):
249
+ return pd.DataFrame(data)
250
+ elif isinstance(data, dict):
251
+ return pd.DataFrame([data])
252
+ else:
253
+ raise AnalysisError(f"Unsupported data type: {type(data)}")
254
+
255
+ def _descriptive_analysis(self, df: pd.DataFrame, variables: Dict[str, Any]) -> Dict[str, Any]:
256
+ """Perform descriptive statistics analysis"""
257
+ cols = variables.get('columns', df.select_dtypes(include=[np.number]).columns.tolist())
258
+
259
+ results = {}
260
+ for col in cols:
261
+ if col in df.columns:
262
+ series = df[col].dropna()
263
+ results[col] = {
264
+ 'count': int(len(series)),
265
+ 'mean': float(series.mean()),
266
+ 'std': float(series.std()),
267
+ 'min': float(series.min()),
268
+ 'q25': float(series.quantile(0.25)),
269
+ 'median': float(series.median()),
270
+ 'q75': float(series.quantile(0.75)),
271
+ 'max': float(series.max()),
272
+ 'skewness': float(series.skew()),
273
+ 'kurtosis': float(series.kurt())
274
+ }
275
+
276
+ return {
277
+ 'results': results,
278
+ 'interpretation': 'Descriptive statistics computed successfully'
279
+ }
280
+
281
+ def _t_test_analysis(self, df: pd.DataFrame, variables: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, Any]:
282
+ """Perform t-test"""
283
+ var1_name = variables.get('var1')
284
+ var2_name = variables.get('var2')
285
+
286
+ if not var1_name or not var2_name:
287
+ raise AnalysisError("T-test requires var1 and var2")
288
+
289
+ var1 = df[var1_name].dropna()
290
+ var2 = df[var2_name].dropna()
291
+
292
+ statistic, pvalue = scipy_stats.ttest_ind(var1, var2)
293
+
294
+ return {
295
+ 'test_type': 't_test',
296
+ 'statistic': float(statistic),
297
+ 'p_value': float(pvalue),
298
+ 'significant': pvalue < self.config.significance_level,
299
+ 'interpretation': f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} difference at α={self.config.significance_level}",
300
+ 'variables': [var1_name, var2_name]
301
+ }
302
+
303
+ def _anova_analysis(self, df: pd.DataFrame, variables: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, Any]:
304
+ """Perform ANOVA"""
305
+ groups = variables.get('groups', [])
306
+
307
+ if len(groups) < 2:
308
+ raise AnalysisError("ANOVA requires at least 2 groups")
309
+
310
+ group_data = [df[group].dropna() for group in groups if group in df.columns]
311
+
312
+ if len(group_data) < 2:
313
+ raise AnalysisError("Insufficient valid groups for ANOVA")
314
+
315
+ statistic, pvalue = scipy_stats.f_oneway(*group_data)
316
+
317
+ return {
318
+ 'test_type': 'anova',
319
+ 'statistic': float(statistic),
320
+ 'p_value': float(pvalue),
321
+ 'significant': pvalue < self.config.significance_level,
322
+ 'interpretation': f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} difference between groups",
323
+ 'groups': groups
324
+ }
325
+
326
+ def _chi_square_analysis(self, df: pd.DataFrame, variables: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, Any]:
327
+ """Perform chi-square test"""
328
+ var1_name = variables.get('var1')
329
+ var2_name = variables.get('var2')
330
+
331
+ if not var1_name or not var2_name:
332
+ raise AnalysisError("Chi-square test requires var1 and var2")
333
+
334
+ contingency_table = pd.crosstab(df[var1_name], df[var2_name])
335
+ statistic, pvalue, dof, expected = scipy_stats.chi2_contingency(contingency_table)
336
+
337
+ return {
338
+ 'test_type': 'chi_square',
339
+ 'statistic': float(statistic),
340
+ 'p_value': float(pvalue),
341
+ 'degrees_of_freedom': int(dof),
342
+ 'significant': pvalue < self.config.significance_level,
343
+ 'interpretation': f"{'Significant' if pvalue < self.config.significance_level else 'Not significant'} association",
344
+ 'variables': [var1_name, var2_name]
345
+ }
346
+
347
+ def _linear_regression_analysis(self, df: pd.DataFrame, variables: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, Any]:
348
+ """Perform linear regression"""
349
+ from sklearn.linear_model import LinearRegression
350
+ from sklearn.metrics import r2_score, mean_squared_error
351
+
352
+ dependent = variables.get('dependent')
353
+ independent = variables.get('independent', [])
354
+
355
+ if not dependent or not independent:
356
+ raise AnalysisError("Regression requires dependent and independent variables")
357
+
358
+ X = df[independent].dropna()
359
+ y = df[dependent].loc[X.index]
360
+
361
+ model = LinearRegression()
362
+ model.fit(X, y)
363
+
364
+ y_pred = model.predict(X)
365
+ r2 = r2_score(y, y_pred)
366
+ mse = mean_squared_error(y, y_pred)
367
+
368
+ coefficients = {var: float(coef) for var, coef in zip(independent, model.coef_)}
369
+
370
+ return {
371
+ 'model_type': 'linear_regression',
372
+ 'intercept': float(model.intercept_),
373
+ 'coefficients': coefficients,
374
+ 'r_squared': float(r2),
375
+ 'mse': float(mse),
376
+ 'rmse': float(np.sqrt(mse)),
377
+ 'interpretation': f"Model explains {r2*100:.2f}% of variance",
378
+ 'dependent_variable': dependent,
379
+ 'independent_variables': independent
380
+ }
381
+
382
+ def _correlation_analysis(self, df: pd.DataFrame, variables: Dict[str, Any], params: Dict[str, Any]) -> Dict[str, Any]:
383
+ """Perform correlation analysis"""
384
+ method = params.get('method', 'pearson')
385
+ cols = variables.get('variables')
386
+
387
+ if cols:
388
+ numeric_df = df[cols].select_dtypes(include=[np.number])
389
+ else:
390
+ numeric_df = df.select_dtypes(include=[np.number])
391
+
392
+ if numeric_df.shape[1] < 2:
393
+ raise AnalysisError("Correlation requires at least 2 numeric variables")
394
+
395
+ corr_matrix = numeric_df.corr(method=method)
396
+
397
+ # Find significant correlations
398
+ significant_pairs = []
399
+ for i in range(len(corr_matrix.columns)):
400
+ for j in range(i+1, len(corr_matrix.columns)):
401
+ corr_value = corr_matrix.iloc[i, j]
402
+ if abs(corr_value) > 0.3: # Threshold for noteworthy correlation
403
+ significant_pairs.append({
404
+ 'var1': corr_matrix.columns[i],
405
+ 'var2': corr_matrix.columns[j],
406
+ 'correlation': float(corr_value),
407
+ 'strength': self._interpret_correlation(corr_value)
408
+ })
409
+
410
+ return {
411
+ 'method': method,
412
+ 'correlation_matrix': corr_matrix.to_dict(),
413
+ 'significant_correlations': significant_pairs,
414
+ 'interpretation': f"Found {len(significant_pairs)} significant correlations"
415
+ }
416
+
417
+ def _interpret_correlation(self, corr: float) -> str:
418
+ """Interpret correlation strength"""
419
+ abs_corr = abs(corr)
420
+ if abs_corr < 0.3:
421
+ return "weak"
422
+ elif abs_corr < 0.7:
423
+ return "moderate"
424
+ else:
425
+ return "strong"
426
+
@@ -39,8 +39,9 @@ class ChartTool(BaseTool):
39
39
 
40
40
  # Configuration schema
41
41
  class Config(BaseModel):
42
- model_config = ConfigDict()
43
42
  """Configuration for the chart tool"""
43
+ model_config = ConfigDict(env_prefix="CHART_TOOL_")
44
+
44
45
  export_dir: str = Field(
45
46
  default=os.path.join(tempfile.gettempdir(), 'chart_exports'),
46
47
  description="Directory to export files to"
@@ -7,31 +7,16 @@ from typing import Dict, Any, List, Optional
7
7
  from dataclasses import dataclass
8
8
  from dataclasses import field
9
9
 
10
- from pydantic import BaseModel, ValidationError, field_validator, ConfigDict
11
- from pydantic_settings import BaseSettings
10
+ from pydantic import BaseModel, ValidationError, field_validator, ConfigDict, Field
12
11
  from PIL import Image, ExifTags, ImageFilter
13
12
  from queue import Queue
14
13
 
15
14
  from aiecs.tools.base_tool import BaseTool
16
15
  from aiecs.tools import register_tool
17
16
 
18
- # Configuration for ImageTool
19
- class ImageSettings(BaseSettings):
20
- """
21
- Configuration for ImageTool.
22
-
23
- Attributes:
24
- max_file_size_mb (int): Maximum file size in megabytes.
25
- allowed_extensions (List[str]): Allowed image file extensions.
26
- tesseract_pool_size (int): Number of Tesseract processes for OCR.
27
- env_prefix (str): Environment variable prefix for settings.
28
- """
29
- max_file_size_mb: int = 50
30
- allowed_extensions: List[str] = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif']
31
- tesseract_pool_size: int = 2
32
- env_prefix: str = 'IMAGE_TOOL_'
33
-
34
- model_config = ConfigDict(env_prefix='IMAGE_TOOL_')
17
+ # Module-level default configuration for validators
18
+ _DEFAULT_MAX_FILE_SIZE_MB = 50
19
+ _DEFAULT_ALLOWED_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif']
35
20
 
36
21
  # Exceptions
37
22
  class ImageToolError(Exception):
@@ -55,16 +40,15 @@ class BaseFileSchema(BaseModel):
55
40
  @classmethod
56
41
  def validate_file_path(cls, v: str) -> str:
57
42
  """Validate file path for existence, size, and extension."""
58
- settings = ImageSettings()
59
43
  abs_path = os.path.abspath(os.path.normpath(v))
60
44
  ext = os.path.splitext(abs_path)[1].lower()
61
- if ext not in settings.allowed_extensions:
62
- raise SecurityError(f"Extension '{ext}' not allowed, expected {settings.allowed_extensions}")
45
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
46
+ raise SecurityError(f"Extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}")
63
47
  if not os.path.isfile(abs_path):
64
48
  raise FileOperationError(f"File not found: {abs_path}")
65
49
  size_mb = os.path.getsize(abs_path) / (1024 * 1024)
66
- if size_mb > settings.max_file_size_mb:
67
- raise FileOperationError(f"File too large: {size_mb:.1f}MB, max {settings.max_file_size_mb}MB")
50
+ if size_mb > _DEFAULT_MAX_FILE_SIZE_MB:
51
+ raise FileOperationError(f"File too large: {size_mb:.1f}MB, max {_DEFAULT_MAX_FILE_SIZE_MB}MB")
68
52
  return abs_path
69
53
 
70
54
  # Schemas for operations
@@ -91,11 +75,10 @@ class ResizeSchema(BaseFileSchema):
91
75
  @classmethod
92
76
  def validate_output_path(cls, v: str) -> str:
93
77
  """Validate output path for existence and extension."""
94
- settings = ImageSettings()
95
78
  abs_path = os.path.abspath(os.path.normpath(v))
96
79
  ext = os.path.splitext(abs_path)[1].lower()
97
- if ext not in settings.allowed_extensions:
98
- raise SecurityError(f"Output extension '{ext}' not allowed, expected {settings.allowed_extensions}")
80
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
81
+ raise SecurityError(f"Output extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}")
99
82
  if os.path.exists(abs_path):
100
83
  raise FileOperationError(f"Output file already exists: {abs_path}")
101
84
  return abs_path
@@ -118,11 +101,10 @@ class FilterSchema(BaseFileSchema):
118
101
  @classmethod
119
102
  def validate_output_path(cls, v: str) -> str:
120
103
  """Validate output path for existence and extension."""
121
- settings = ImageSettings()
122
104
  abs_path = os.path.abspath(os.path.normpath(v))
123
105
  ext = os.path.splitext(abs_path)[1].lower()
124
- if ext not in settings.allowed_extensions:
125
- raise SecurityError(f"Output extension '{ext}' not allowed, expected {settings.allowed_extensions}")
106
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
107
+ raise SecurityError(f"Output extension '{ext}' not allowed, expected {_DEFAULT_ALLOWED_EXTENSIONS}")
126
108
  if os.path.exists(abs_path):
127
109
  raise FileOperationError(f"Output file already exists: {abs_path}")
128
110
  return abs_path
@@ -183,38 +165,56 @@ class ImageTool(BaseTool):
183
165
 
184
166
  Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
185
167
  """
186
- def __init__(self, config: Dict[Any, Any] = None):
168
+
169
+ # Configuration schema
170
+ class Config(BaseModel):
171
+ """Configuration for the image tool"""
172
+ model_config = ConfigDict(env_prefix="IMAGE_TOOL_")
173
+
174
+ max_file_size_mb: int = Field(
175
+ default=50,
176
+ description="Maximum file size in megabytes"
177
+ )
178
+ allowed_extensions: List[str] = Field(
179
+ default=['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.gif'],
180
+ description="Allowed image file extensions"
181
+ )
182
+ tesseract_pool_size: int = Field(
183
+ default=2,
184
+ description="Number of Tesseract processes for OCR"
185
+ )
186
+
187
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
187
188
  """
188
- Initialize ImageTool with settings and resources.
189
+ Initialize ImageTool with configuration and resources.
189
190
 
190
191
  Args:
191
- config (Dict, optional): Configuration overrides for ImageSettings.
192
+ config (Dict, optional): Configuration overrides for ImageTool.
192
193
 
193
194
  Raises:
194
195
  ValueError: If config contains invalid settings.
195
196
  """
196
197
  super().__init__(config)
197
- self.settings = ImageSettings()
198
- if config:
199
- try:
200
- self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
201
- except ValidationError as e:
202
- raise ValueError(f"Invalid configuration: {e}")
198
+
199
+ # Parse configuration
200
+ self.config = self.Config(**(config or {}))
201
+
203
202
  self.logger = logging.getLogger(__name__)
204
203
  if not self.logger.handlers:
205
204
  handler = logging.StreamHandler()
206
205
  handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
207
206
  self.logger.addHandler(handler)
208
207
  self.logger.setLevel(logging.INFO)
208
+
209
209
  # Initialize Tesseract manager
210
- self._tesseract_manager = TesseractManager(self.settings.tesseract_pool_size)
210
+ self._tesseract_manager = TesseractManager(self.config.tesseract_pool_size)
211
211
  self._tesseract_manager.initialize()
212
212
 
213
213
  def __del__(self):
214
214
  """Clean up Tesseract processes on destruction."""
215
215
  self._tesseract_manager.cleanup()
216
216
 
217
- def update_settings(self, config: Dict) -> None:
217
+ def update_config(self, config: Dict) -> None:
218
218
  """
219
219
  Update configuration settings dynamically.
220
220
 
@@ -225,11 +225,11 @@ class ImageTool(BaseTool):
225
225
  ValueError: If config contains invalid settings.
226
226
  """
227
227
  try:
228
- self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
228
+ self.config = self.Config(**{**self.config.model_dump(), **config})
229
229
  # Reinitialize Tesseract if pool size changes
230
230
  if 'tesseract_pool_size' in config:
231
231
  self._tesseract_manager.cleanup()
232
- self._tesseract_manager = TesseractManager(self.settings.tesseract_pool_size)
232
+ self._tesseract_manager = TesseractManager(self.config.tesseract_pool_size)
233
233
  self._tesseract_manager.initialize()
234
234
  except ValidationError as e:
235
235
  raise ValueError(f"Invalid configuration: {e}")