aiecs 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +1 -1
- aiecs/config/config.py +2 -0
- aiecs/domain/__init__.py +95 -0
- aiecs/domain/community/__init__.py +159 -0
- aiecs/domain/community/agent_adapter.py +516 -0
- aiecs/domain/community/analytics.py +465 -0
- aiecs/domain/community/collaborative_workflow.py +99 -7
- aiecs/domain/community/communication_hub.py +649 -0
- aiecs/domain/community/community_builder.py +322 -0
- aiecs/domain/community/community_integration.py +365 -12
- aiecs/domain/community/community_manager.py +481 -5
- aiecs/domain/community/decision_engine.py +459 -13
- aiecs/domain/community/exceptions.py +238 -0
- aiecs/domain/community/models/__init__.py +36 -0
- aiecs/domain/community/resource_manager.py +1 -1
- aiecs/domain/community/shared_context_manager.py +621 -0
- aiecs/domain/context/context_engine.py +37 -33
- aiecs/main.py +2 -2
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +15 -0
- aiecs/scripts/aid/version_manager.py +224 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +1 -0
- aiecs/tools/__init__.py +23 -23
- aiecs/tools/docs/__init__.py +5 -2
- aiecs/tools/docs/ai_document_orchestrator.py +39 -26
- aiecs/tools/docs/ai_document_writer_orchestrator.py +61 -38
- aiecs/tools/docs/content_insertion_tool.py +48 -28
- aiecs/tools/docs/document_creator_tool.py +47 -29
- aiecs/tools/docs/document_layout_tool.py +35 -20
- aiecs/tools/docs/document_parser_tool.py +56 -36
- aiecs/tools/docs/document_writer_tool.py +115 -62
- aiecs/tools/schema_generator.py +56 -56
- aiecs/tools/statistics/__init__.py +82 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
- aiecs/tools/statistics/data_loader_tool.py +518 -0
- aiecs/tools/statistics/data_profiler_tool.py +599 -0
- aiecs/tools/statistics/data_transformer_tool.py +531 -0
- aiecs/tools/statistics/data_visualizer_tool.py +460 -0
- aiecs/tools/statistics/model_trainer_tool.py +470 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
- aiecs/tools/task_tools/chart_tool.py +2 -1
- aiecs/tools/task_tools/image_tool.py +43 -43
- aiecs/tools/task_tools/office_tool.py +39 -36
- aiecs/tools/task_tools/pandas_tool.py +37 -33
- aiecs/tools/task_tools/report_tool.py +67 -56
- aiecs/tools/task_tools/research_tool.py +32 -31
- aiecs/tools/task_tools/scraper_tool.py +53 -46
- aiecs/tools/task_tools/search_tool.py +1123 -0
- aiecs/tools/task_tools/stats_tool.py +20 -15
- {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/METADATA +5 -1
- {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/RECORD +57 -36
- {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/entry_points.txt +1 -0
- aiecs/tools/task_tools/search_api.py +0 -7
- {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/WHEEL +0 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.0.dist-info}/top_level.txt +0 -0
|
@@ -20,31 +20,14 @@ from docx import Document as DocxDocument
|
|
|
20
20
|
from docx.shared import Pt
|
|
21
21
|
from pptx import Presentation
|
|
22
22
|
from pptx.util import Inches
|
|
23
|
-
from pydantic import BaseModel, field_validator, ValidationError, ConfigDict
|
|
24
|
-
from pydantic_settings import BaseSettings
|
|
23
|
+
from pydantic import BaseModel, field_validator, ValidationError, ConfigDict, Field
|
|
25
24
|
|
|
26
25
|
from aiecs.tools.base_tool import BaseTool
|
|
27
26
|
from aiecs.tools import register_tool
|
|
28
27
|
|
|
29
|
-
#
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
Configuration for OfficeTool.
|
|
33
|
-
|
|
34
|
-
Attributes:
|
|
35
|
-
max_file_size_mb (int): Maximum file size in megabytes.
|
|
36
|
-
default_font (str): Default font for documents.
|
|
37
|
-
default_font_size (int): Default font size in points.
|
|
38
|
-
allowed_extensions (List[str]): Allowed document file extensions.
|
|
39
|
-
env_prefix (str): Environment variable prefix for settings.
|
|
40
|
-
"""
|
|
41
|
-
max_file_size_mb: int = 100
|
|
42
|
-
default_font: str = "Arial"
|
|
43
|
-
default_font_size: int = 12
|
|
44
|
-
allowed_extensions: List[str] = ['.docx', '.pptx', '.xlsx', '.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif']
|
|
45
|
-
env_prefix: str = 'OFFICE_TOOL_'
|
|
46
|
-
|
|
47
|
-
model_config = ConfigDict(env_prefix='OFFICE_TOOL_')
|
|
28
|
+
# Module-level default configuration for validators
|
|
29
|
+
_DEFAULT_MAX_FILE_SIZE_MB = 100
|
|
30
|
+
_DEFAULT_ALLOWED_EXTENSIONS = ['.docx', '.pptx', '.xlsx', '.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif']
|
|
48
31
|
|
|
49
32
|
# Exceptions
|
|
50
33
|
class OfficeToolError(Exception):
|
|
@@ -78,7 +61,6 @@ class BaseFileSchema(BaseModel):
|
|
|
78
61
|
"""Validate file paths for existence, size, extension, and path traversal."""
|
|
79
62
|
if not v:
|
|
80
63
|
return v
|
|
81
|
-
settings = OfficeSettings()
|
|
82
64
|
abs_path = os.path.abspath(os.path.normpath(v))
|
|
83
65
|
# Check for path traversal
|
|
84
66
|
if '..' in v or '~' in v or '%' in v:
|
|
@@ -90,15 +72,15 @@ class BaseFileSchema(BaseModel):
|
|
|
90
72
|
raise SecurityError(f"Path not in allowed directories: {abs_path}")
|
|
91
73
|
# Check extension
|
|
92
74
|
ext = os.path.splitext(abs_path)[1].lower()
|
|
93
|
-
if ext not in
|
|
94
|
-
raise SecurityError(f"Extension '{ext}' not allowed for '{field.field_name}', expected {
|
|
75
|
+
if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
|
|
76
|
+
raise SecurityError(f"Extension '{ext}' not allowed for '{field.field_name}', expected {_DEFAULT_ALLOWED_EXTENSIONS}")
|
|
95
77
|
# Check file existence and size for input paths
|
|
96
78
|
if field.field_name == 'file_path':
|
|
97
79
|
if not os.path.isfile(abs_path):
|
|
98
80
|
raise FileOperationError(f"{field.field_name}: File not found: {abs_path}")
|
|
99
81
|
size_mb = os.path.getsize(abs_path) / (1024 * 1024)
|
|
100
|
-
if size_mb >
|
|
101
|
-
raise FileOperationError(f"{field.field_name}: File too large: {size_mb:.1f}MB, max {
|
|
82
|
+
if size_mb > _DEFAULT_MAX_FILE_SIZE_MB:
|
|
83
|
+
raise FileOperationError(f"{field.field_name}: File too large: {size_mb:.1f}MB, max {_DEFAULT_MAX_FILE_SIZE_MB}MB")
|
|
102
84
|
# Check for existing output paths
|
|
103
85
|
elif field.field_name == 'output_path' and os.path.exists(abs_path):
|
|
104
86
|
raise FileOperationError(f"{field.field_name}: File already exists: {abs_path}")
|
|
@@ -155,23 +137,44 @@ class OfficeTool(BaseTool):
|
|
|
155
137
|
|
|
156
138
|
Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
|
|
157
139
|
"""
|
|
140
|
+
|
|
141
|
+
# Configuration schema
|
|
142
|
+
class Config(BaseModel):
|
|
143
|
+
"""Configuration for the office tool"""
|
|
144
|
+
model_config = ConfigDict(env_prefix="OFFICE_TOOL_")
|
|
145
|
+
|
|
146
|
+
max_file_size_mb: int = Field(
|
|
147
|
+
default=100,
|
|
148
|
+
description="Maximum file size in megabytes"
|
|
149
|
+
)
|
|
150
|
+
default_font: str = Field(
|
|
151
|
+
default="Arial",
|
|
152
|
+
description="Default font for documents"
|
|
153
|
+
)
|
|
154
|
+
default_font_size: int = Field(
|
|
155
|
+
default=12,
|
|
156
|
+
description="Default font size in points"
|
|
157
|
+
)
|
|
158
|
+
allowed_extensions: List[str] = Field(
|
|
159
|
+
default=['.docx', '.pptx', '.xlsx', '.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif'],
|
|
160
|
+
description="Allowed document file extensions"
|
|
161
|
+
)
|
|
162
|
+
|
|
158
163
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
159
164
|
"""
|
|
160
|
-
Initialize OfficeTool with
|
|
165
|
+
Initialize OfficeTool with configuration.
|
|
161
166
|
|
|
162
167
|
Args:
|
|
163
|
-
config (Dict, optional): Configuration overrides for
|
|
168
|
+
config (Dict, optional): Configuration overrides for OfficeTool.
|
|
164
169
|
|
|
165
170
|
Raises:
|
|
166
171
|
ValueError: If config contains invalid settings.
|
|
167
172
|
"""
|
|
168
173
|
super().__init__(config)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
except ValidationError as e:
|
|
174
|
-
raise ValueError(f"Invalid configuration: {e}")
|
|
174
|
+
|
|
175
|
+
# Parse configuration
|
|
176
|
+
self.config = self.Config(**(config or {}))
|
|
177
|
+
|
|
175
178
|
self.logger = logging.getLogger(__name__)
|
|
176
179
|
if not self.logger.handlers:
|
|
177
180
|
handler = logging.StreamHandler()
|
|
@@ -385,8 +388,8 @@ class OfficeTool(BaseTool):
|
|
|
385
388
|
sanitized_table_data = self._sanitize_table_data(table_data)
|
|
386
389
|
doc = DocxDocument()
|
|
387
390
|
style = doc.styles['Normal']
|
|
388
|
-
style.font.name = self.
|
|
389
|
-
style.font.size = Pt(self.
|
|
391
|
+
style.font.name = self.config.default_font
|
|
392
|
+
style.font.size = Pt(self.config.default_font_size)
|
|
390
393
|
for line in sanitized_text.splitlines():
|
|
391
394
|
doc.add_paragraph(line)
|
|
392
395
|
if sanitized_table_data and sanitized_table_data[0]:
|
|
@@ -2,7 +2,7 @@ from io import StringIO
|
|
|
2
2
|
import pandas as pd
|
|
3
3
|
import numpy as np
|
|
4
4
|
from typing import List, Dict, Union, Optional, Any
|
|
5
|
-
from pydantic import BaseModel, ValidationError as PydanticValidationError, ConfigDict
|
|
5
|
+
from pydantic import BaseModel, ValidationError as PydanticValidationError, ConfigDict, Field
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
8
|
from aiecs.tools.base_tool import BaseTool
|
|
@@ -29,31 +29,6 @@ class ValidationError(PandasToolError):
|
|
|
29
29
|
"""Validation error."""
|
|
30
30
|
pass
|
|
31
31
|
|
|
32
|
-
# Configuration for PandasTool
|
|
33
|
-
class PandasToolConfig(BaseModel):
|
|
34
|
-
"""
|
|
35
|
-
Configuration for PandasTool.
|
|
36
|
-
|
|
37
|
-
Attributes:
|
|
38
|
-
csv_delimiter (str): Delimiter for CSV files.
|
|
39
|
-
encoding (str): Encoding for file operations.
|
|
40
|
-
default_agg (Dict[str, str]): Default aggregation functions.
|
|
41
|
-
chunk_size (int): Chunk size for large file processing.
|
|
42
|
-
max_csv_size (int): Threshold for chunked CSV processing.
|
|
43
|
-
allowed_file_extensions (List[str]): Allowed file extensions.
|
|
44
|
-
env_prefix (str): Environment variable prefix.
|
|
45
|
-
"""
|
|
46
|
-
csv_delimiter: str = ","
|
|
47
|
-
encoding: str = "utf-8"
|
|
48
|
-
default_agg: Dict[str, str] = {"numeric": "mean", "object": "count"}
|
|
49
|
-
chunk_size: int = 10000
|
|
50
|
-
max_csv_size: int = 1000000
|
|
51
|
-
allowed_file_extensions: List[str] = ['.csv', '.xlsx', '.json']
|
|
52
|
-
env_prefix: str = "PANDAS_TOOL_"
|
|
53
|
-
|
|
54
|
-
model_config = ConfigDict(env_prefix="PANDAS_TOOL_")
|
|
55
|
-
|
|
56
|
-
|
|
57
32
|
@register_tool("pandas")
|
|
58
33
|
class PandasTool(BaseTool):
|
|
59
34
|
"""
|
|
@@ -72,23 +47,52 @@ class PandasTool(BaseTool):
|
|
|
72
47
|
|
|
73
48
|
Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
|
|
74
49
|
"""
|
|
50
|
+
|
|
51
|
+
# Configuration schema
|
|
52
|
+
class Config(BaseModel):
|
|
53
|
+
"""Configuration for the pandas tool"""
|
|
54
|
+
model_config = ConfigDict(env_prefix="PANDAS_TOOL_")
|
|
55
|
+
|
|
56
|
+
csv_delimiter: str = Field(
|
|
57
|
+
default=",",
|
|
58
|
+
description="Delimiter for CSV files"
|
|
59
|
+
)
|
|
60
|
+
encoding: str = Field(
|
|
61
|
+
default="utf-8",
|
|
62
|
+
description="Encoding for file operations"
|
|
63
|
+
)
|
|
64
|
+
default_agg: Dict[str, str] = Field(
|
|
65
|
+
default={"numeric": "mean", "object": "count"},
|
|
66
|
+
description="Default aggregation functions"
|
|
67
|
+
)
|
|
68
|
+
chunk_size: int = Field(
|
|
69
|
+
default=10000,
|
|
70
|
+
description="Chunk size for large file processing"
|
|
71
|
+
)
|
|
72
|
+
max_csv_size: int = Field(
|
|
73
|
+
default=1000000,
|
|
74
|
+
description="Threshold for chunked CSV processing"
|
|
75
|
+
)
|
|
76
|
+
allowed_file_extensions: List[str] = Field(
|
|
77
|
+
default=['.csv', '.xlsx', '.json'],
|
|
78
|
+
description="Allowed file extensions"
|
|
79
|
+
)
|
|
80
|
+
|
|
75
81
|
def __init__(self, config: Optional[Dict] = None):
|
|
76
82
|
"""
|
|
77
83
|
Initialize PandasTool with configuration.
|
|
78
84
|
|
|
79
85
|
Args:
|
|
80
|
-
config (Dict, optional): Configuration overrides for
|
|
86
|
+
config (Dict, optional): Configuration overrides for PandasTool.
|
|
81
87
|
|
|
82
88
|
Raises:
|
|
83
89
|
ValueError: If config is invalid.
|
|
84
90
|
"""
|
|
85
91
|
super().__init__(config)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
except PydanticValidationError as e:
|
|
91
|
-
raise ValueError(f"Invalid configuration: {e}")
|
|
92
|
+
|
|
93
|
+
# Parse configuration
|
|
94
|
+
self.config = self.Config(**(config or {}))
|
|
95
|
+
|
|
92
96
|
self.logger = logging.getLogger(__name__)
|
|
93
97
|
if not self.logger.handlers:
|
|
94
98
|
handler = logging.StreamHandler()
|
|
@@ -19,8 +19,7 @@ from docx import Document
|
|
|
19
19
|
from docx.shared import Pt as DocxPt, RGBColor
|
|
20
20
|
import markdown
|
|
21
21
|
import matplotlib.pyplot as plt
|
|
22
|
-
from pydantic import ValidationError, ConfigDict
|
|
23
|
-
from pydantic_settings import BaseSettings
|
|
22
|
+
from pydantic import ValidationError, ConfigDict, Field
|
|
24
23
|
import tempfile
|
|
25
24
|
import logging
|
|
26
25
|
|
|
@@ -28,45 +27,6 @@ from aiecs.tools.base_tool import BaseTool
|
|
|
28
27
|
from aiecs.tools import register_tool
|
|
29
28
|
from aiecs.tools.temp_file_manager import TempFileManager
|
|
30
29
|
|
|
31
|
-
# Configuration for ReportTool
|
|
32
|
-
class ReportSettings(BaseSettings):
|
|
33
|
-
"""
|
|
34
|
-
Configuration for ReportTool.
|
|
35
|
-
|
|
36
|
-
Attributes:
|
|
37
|
-
templates_dir (str): Directory for Jinja2 templates.
|
|
38
|
-
default_output_dir (str): Default directory for output files.
|
|
39
|
-
allowed_extensions (List[str]): Allowed file extensions for outputs.
|
|
40
|
-
pdf_page_size (str): Default PDF page size.
|
|
41
|
-
default_font (str): Default font for documents.
|
|
42
|
-
default_font_size (int): Default font size in points.
|
|
43
|
-
allowed_html_tags (Set[str]): Allowed HTML tags for sanitization.
|
|
44
|
-
allowed_html_attributes (Dict[str, List[str]]): Allowed HTML attributes for sanitization.
|
|
45
|
-
temp_files_max_age (int): Maximum age of temporary files in seconds.
|
|
46
|
-
env_prefix (str): Environment variable prefix for settings.
|
|
47
|
-
"""
|
|
48
|
-
templates_dir: str = os.getcwd()
|
|
49
|
-
default_output_dir: str = os.path.join(tempfile.gettempdir(), 'reports')
|
|
50
|
-
allowed_extensions: List[str] = ['.html', '.pdf', '.xlsx', '.pptx', '.docx', '.md', '.png']
|
|
51
|
-
pdf_page_size: str = 'A4'
|
|
52
|
-
default_font: str = 'Arial'
|
|
53
|
-
default_font_size: int = 12
|
|
54
|
-
allowed_html_tags: Set[str] = {
|
|
55
|
-
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'a', 'ul', 'ol', 'li',
|
|
56
|
-
'strong', 'em', 'b', 'i', 'table', 'tr', 'td', 'th', 'thead', 'tbody',
|
|
57
|
-
'span', 'div', 'img', 'hr', 'code', 'pre'
|
|
58
|
-
}
|
|
59
|
-
allowed_html_attributes: Dict[str, List[str]] = {
|
|
60
|
-
'a': ['href', 'title', 'target'],
|
|
61
|
-
'img': ['src', 'alt', 'title', 'width', 'height'],
|
|
62
|
-
'td': ['colspan', 'rowspan', 'align'],
|
|
63
|
-
'th': ['colspan', 'rowspan', 'align'],
|
|
64
|
-
'*': ['class', 'id', 'style']
|
|
65
|
-
}
|
|
66
|
-
temp_files_max_age: int = 3600 # 1 hour in seconds
|
|
67
|
-
env_prefix: str = 'REPORT_TOOL_'
|
|
68
|
-
|
|
69
|
-
model_config = ConfigDict(env_prefix='REPORT_TOOL_')
|
|
70
30
|
|
|
71
31
|
# Exceptions
|
|
72
32
|
class ReportToolError(Exception):
|
|
@@ -119,23 +79,74 @@ class ReportTool(BaseTool):
|
|
|
119
79
|
|
|
120
80
|
Inherits from BaseTool.
|
|
121
81
|
"""
|
|
82
|
+
|
|
83
|
+
# Configuration schema
|
|
84
|
+
class Config(BaseModel):
|
|
85
|
+
"""Configuration for the report tool"""
|
|
86
|
+
model_config = ConfigDict(env_prefix="REPORT_TOOL_")
|
|
87
|
+
|
|
88
|
+
templates_dir: str = Field(
|
|
89
|
+
default=os.getcwd(),
|
|
90
|
+
description="Directory for Jinja2 templates"
|
|
91
|
+
)
|
|
92
|
+
default_output_dir: str = Field(
|
|
93
|
+
default=os.path.join(tempfile.gettempdir(), 'reports'),
|
|
94
|
+
description="Default directory for output files"
|
|
95
|
+
)
|
|
96
|
+
allowed_extensions: List[str] = Field(
|
|
97
|
+
default=['.html', '.pdf', '.xlsx', '.pptx', '.docx', '.md', '.png'],
|
|
98
|
+
description="Allowed file extensions for outputs"
|
|
99
|
+
)
|
|
100
|
+
pdf_page_size: str = Field(
|
|
101
|
+
default='A4',
|
|
102
|
+
description="Default PDF page size"
|
|
103
|
+
)
|
|
104
|
+
default_font: str = Field(
|
|
105
|
+
default='Arial',
|
|
106
|
+
description="Default font for documents"
|
|
107
|
+
)
|
|
108
|
+
default_font_size: int = Field(
|
|
109
|
+
default=12,
|
|
110
|
+
description="Default font size in points"
|
|
111
|
+
)
|
|
112
|
+
allowed_html_tags: Set[str] = Field(
|
|
113
|
+
default={
|
|
114
|
+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'a', 'ul', 'ol', 'li',
|
|
115
|
+
'strong', 'em', 'b', 'i', 'table', 'tr', 'td', 'th', 'thead', 'tbody',
|
|
116
|
+
'span', 'div', 'img', 'hr', 'code', 'pre'
|
|
117
|
+
},
|
|
118
|
+
description="Allowed HTML tags for sanitization"
|
|
119
|
+
)
|
|
120
|
+
allowed_html_attributes: Dict[str, List[str]] = Field(
|
|
121
|
+
default={
|
|
122
|
+
'a': ['href', 'title', 'target'],
|
|
123
|
+
'img': ['src', 'alt', 'title', 'width', 'height'],
|
|
124
|
+
'td': ['colspan', 'rowspan', 'align'],
|
|
125
|
+
'th': ['colspan', 'rowspan', 'align'],
|
|
126
|
+
'*': ['class', 'id', 'style']
|
|
127
|
+
},
|
|
128
|
+
description="Allowed HTML attributes for sanitization"
|
|
129
|
+
)
|
|
130
|
+
temp_files_max_age: int = Field(
|
|
131
|
+
default=3600,
|
|
132
|
+
description="Maximum age of temporary files in seconds"
|
|
133
|
+
)
|
|
134
|
+
|
|
122
135
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
123
136
|
"""
|
|
124
137
|
Initialize ReportTool with settings and resources.
|
|
125
138
|
|
|
126
139
|
Args:
|
|
127
|
-
config (Dict, optional): Configuration overrides for
|
|
140
|
+
config (Dict, optional): Configuration overrides for ReportTool.
|
|
128
141
|
|
|
129
142
|
Raises:
|
|
130
143
|
ValueError: If config contains invalid settings.
|
|
131
144
|
"""
|
|
132
145
|
super().__init__(config)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
except ValidationError as e:
|
|
138
|
-
raise ValueError(f"Invalid configuration: {e}")
|
|
146
|
+
|
|
147
|
+
# Parse configuration
|
|
148
|
+
self.config = self.Config(**(config or {}))
|
|
149
|
+
|
|
139
150
|
self.logger = logging.getLogger(__name__)
|
|
140
151
|
if not self.logger.handlers:
|
|
141
152
|
handler = logging.StreamHandler()
|
|
@@ -143,10 +154,10 @@ class ReportTool(BaseTool):
|
|
|
143
154
|
self.logger.addHandler(handler)
|
|
144
155
|
self.logger.setLevel(logging.INFO)
|
|
145
156
|
self._jinja_env = sandbox.SandboxedEnvironment(
|
|
146
|
-
loader=FileSystemLoader(self.
|
|
157
|
+
loader=FileSystemLoader(self.config.templates_dir),
|
|
147
158
|
autoescape=True
|
|
148
159
|
)
|
|
149
|
-
self._temp_manager = TempFileManager(self.
|
|
160
|
+
self._temp_manager = TempFileManager(self.config.default_output_dir, self.config.temp_files_max_age)
|
|
150
161
|
|
|
151
162
|
def generate_html(self, template_path: Optional[str], template_str: Optional[str], context: Dict[str, Any], output_path: str, template_variables: Optional[Dict[str, str]] = None) -> str:
|
|
152
163
|
"""
|
|
@@ -167,7 +178,7 @@ class ReportTool(BaseTool):
|
|
|
167
178
|
"""
|
|
168
179
|
try:
|
|
169
180
|
if template_path:
|
|
170
|
-
path = os.path.join(self.
|
|
181
|
+
path = os.path.join(self.config.templates_dir, template_path)
|
|
171
182
|
tmpl = self._jinja_env.get_template(template_path)
|
|
172
183
|
else:
|
|
173
184
|
tmpl = self._jinja_env.from_string(template_str)
|
|
@@ -178,7 +189,7 @@ class ReportTool(BaseTool):
|
|
|
178
189
|
html = html.replace('<head>', '<head>\n' + csrf_meta)
|
|
179
190
|
else:
|
|
180
191
|
html = csrf_meta + html
|
|
181
|
-
html = sanitize_html(html, self.
|
|
192
|
+
html = sanitize_html(html, self.config.allowed_html_tags, self.config.allowed_html_attributes)
|
|
182
193
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
183
194
|
f.write(html)
|
|
184
195
|
self._temp_manager.register_file(output_path)
|
|
@@ -290,8 +301,8 @@ class ReportTool(BaseTool):
|
|
|
290
301
|
s = prs.slides.add_slide(prs.slide_layouts[1])
|
|
291
302
|
title_shape = s.shapes.title
|
|
292
303
|
title_shape.text = slide['title']
|
|
293
|
-
font = slide.get('font') or default_font or self.
|
|
294
|
-
font_size = slide.get('font_size') or default_font_size or self.
|
|
304
|
+
font = slide.get('font') or default_font or self.config.default_font
|
|
305
|
+
font_size = slide.get('font_size') or default_font_size or self.config.default_font_size
|
|
295
306
|
font_color = slide.get('font_color') or default_font_color or (0, 0, 0)
|
|
296
307
|
title_shape.text_frame.paragraphs[0].font.name = font
|
|
297
308
|
title_shape.text_frame.paragraphs[0].font.size = Pt(font_size)
|
|
@@ -370,8 +381,8 @@ class ReportTool(BaseTool):
|
|
|
370
381
|
tmpl = self._jinja_env.from_string(template_str)
|
|
371
382
|
content = tmpl.render(**context)
|
|
372
383
|
doc = Document()
|
|
373
|
-
font = font or self.
|
|
374
|
-
font_size = font_size or self.
|
|
384
|
+
font = font or self.config.default_font
|
|
385
|
+
font_size = font_size or self.config.default_font_size
|
|
375
386
|
font_color = font_color or (0, 0, 0)
|
|
376
387
|
for line in content.splitlines():
|
|
377
388
|
p = doc.add_paragraph()
|
|
@@ -2,8 +2,7 @@ import logging
|
|
|
2
2
|
from typing import Dict, Any, List, Optional, Tuple
|
|
3
3
|
import spacy
|
|
4
4
|
from spacy.language import Language
|
|
5
|
-
from pydantic import BaseModel, ValidationError, ConfigDict
|
|
6
|
-
from pydantic_settings import BaseSettings
|
|
5
|
+
from pydantic import BaseModel, ValidationError, ConfigDict, Field
|
|
7
6
|
from collections import Counter
|
|
8
7
|
from scipy.stats import pearsonr
|
|
9
8
|
import os
|
|
@@ -11,25 +10,6 @@ import os
|
|
|
11
10
|
from aiecs.tools.base_tool import BaseTool
|
|
12
11
|
from aiecs.tools import register_tool
|
|
13
12
|
|
|
14
|
-
# Configuration for ResearchTool
|
|
15
|
-
class ResearchSettings(BaseSettings):
|
|
16
|
-
"""
|
|
17
|
-
Configuration for ResearchTool.
|
|
18
|
-
|
|
19
|
-
Attributes:
|
|
20
|
-
max_workers (int): Maximum number of thread pool workers.
|
|
21
|
-
spacy_model (str): Default spaCy model to use.
|
|
22
|
-
max_text_length (int): Maximum text length for inputs.
|
|
23
|
-
allowed_spacy_models (List[str]): Allowed spaCy models.
|
|
24
|
-
env_prefix (str): Environment variable prefix.
|
|
25
|
-
"""
|
|
26
|
-
max_workers: int = min(32, (os.cpu_count() or 4) * 2)
|
|
27
|
-
spacy_model: str = "en_core_web_sm"
|
|
28
|
-
max_text_length: int = 10_000
|
|
29
|
-
allowed_spacy_models: List[str] = ["en_core_web_sm", "zh_core_web_sm"]
|
|
30
|
-
env_prefix: str = 'RESEARCH_TOOL_'
|
|
31
|
-
|
|
32
|
-
model_config = ConfigDict(env_prefix='RESEARCH_TOOL_')
|
|
33
13
|
|
|
34
14
|
# Exceptions
|
|
35
15
|
class ResearchToolError(Exception):
|
|
@@ -57,23 +37,44 @@ class ResearchTool(BaseTool):
|
|
|
57
37
|
|
|
58
38
|
Inherits from BaseTool.
|
|
59
39
|
"""
|
|
40
|
+
|
|
41
|
+
# Configuration schema
|
|
42
|
+
class Config(BaseModel):
|
|
43
|
+
"""Configuration for the research tool"""
|
|
44
|
+
model_config = ConfigDict(env_prefix="RESEARCH_TOOL_")
|
|
45
|
+
|
|
46
|
+
max_workers: int = Field(
|
|
47
|
+
default=min(32, (os.cpu_count() or 4) * 2),
|
|
48
|
+
description="Maximum number of worker threads"
|
|
49
|
+
)
|
|
50
|
+
spacy_model: str = Field(
|
|
51
|
+
default="en_core_web_sm",
|
|
52
|
+
description="Default spaCy model to use"
|
|
53
|
+
)
|
|
54
|
+
max_text_length: int = Field(
|
|
55
|
+
default=10_000,
|
|
56
|
+
description="Maximum text length for inputs"
|
|
57
|
+
)
|
|
58
|
+
allowed_spacy_models: List[str] = Field(
|
|
59
|
+
default=["en_core_web_sm", "zh_core_web_sm"],
|
|
60
|
+
description="Allowed spaCy models"
|
|
61
|
+
)
|
|
62
|
+
|
|
60
63
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
61
64
|
"""
|
|
62
65
|
Initialize ResearchTool with settings and resources.
|
|
63
66
|
|
|
64
67
|
Args:
|
|
65
|
-
config (Dict, optional): Configuration overrides for
|
|
68
|
+
config (Dict, optional): Configuration overrides for ResearchTool.
|
|
66
69
|
|
|
67
70
|
Raises:
|
|
68
71
|
ValueError: If config contains invalid settings.
|
|
69
72
|
"""
|
|
70
73
|
super().__init__(config)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
except ValidationError as e:
|
|
76
|
-
raise ValueError(f"Invalid configuration: {e}")
|
|
74
|
+
|
|
75
|
+
# Parse configuration
|
|
76
|
+
self.config = self.Config(**(config or {}))
|
|
77
|
+
|
|
77
78
|
self.logger = logging.getLogger(__name__)
|
|
78
79
|
if not self.logger.handlers:
|
|
79
80
|
handler = logging.StreamHandler()
|
|
@@ -98,9 +99,9 @@ class ResearchTool(BaseTool):
|
|
|
98
99
|
ResearchToolError: If the spaCy model is invalid.
|
|
99
100
|
"""
|
|
100
101
|
if self._spacy_nlp is None:
|
|
101
|
-
if self.
|
|
102
|
-
raise ResearchToolError(f"Invalid spaCy model '{self.
|
|
103
|
-
self._spacy_nlp = spacy.load(self.
|
|
102
|
+
if self.config.spacy_model not in self.config.allowed_spacy_models:
|
|
103
|
+
raise ResearchToolError(f"Invalid spaCy model '{self.config.spacy_model}', expected {self.config.allowed_spacy_models}")
|
|
104
|
+
self._spacy_nlp = spacy.load(self.config.spacy_model, disable=["textcat"])
|
|
104
105
|
return self._spacy_nlp
|
|
105
106
|
|
|
106
107
|
def mill_agreement(self, cases: List[Dict[str, Any]]) -> Dict[str, Any]:
|