aiecs 1.1.0__py3-none-any.whl → 1.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +1 -1
- aiecs/aiecs_client.py +1 -1
- aiecs/config/config.py +38 -0
- aiecs/domain/__init__.py +95 -0
- aiecs/domain/community/__init__.py +159 -0
- aiecs/domain/community/agent_adapter.py +516 -0
- aiecs/domain/community/analytics.py +465 -0
- aiecs/domain/community/collaborative_workflow.py +99 -7
- aiecs/domain/community/communication_hub.py +649 -0
- aiecs/domain/community/community_builder.py +322 -0
- aiecs/domain/community/community_integration.py +365 -12
- aiecs/domain/community/community_manager.py +481 -5
- aiecs/domain/community/decision_engine.py +459 -13
- aiecs/domain/community/exceptions.py +238 -0
- aiecs/domain/community/models/__init__.py +36 -0
- aiecs/domain/community/resource_manager.py +1 -1
- aiecs/domain/community/shared_context_manager.py +621 -0
- aiecs/domain/context/context_engine.py +37 -33
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +207 -0
- aiecs/infrastructure/persistence/file_storage.py +41 -28
- aiecs/llm/__init__.py +44 -7
- aiecs/llm/callbacks/__init__.py +12 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +1 -1
- aiecs/llm/client_factory.py +23 -6
- aiecs/llm/clients/__init__.py +35 -0
- aiecs/llm/{base_client.py → clients/base_client.py} +73 -1
- aiecs/llm/{googleai_client.py → clients/googleai_client.py} +19 -15
- aiecs/llm/{openai_client.py → clients/openai_client.py} +9 -14
- aiecs/llm/{vertex_client.py → clients/vertex_client.py} +15 -15
- aiecs/llm/{xai_client.py → clients/xai_client.py} +36 -50
- aiecs/llm/config/__init__.py +54 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +237 -0
- aiecs/llm/config/model_config.py +132 -0
- aiecs/llm/utils/__init__.py +11 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +32 -2
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +15 -0
- aiecs/scripts/aid/version_manager.py +224 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +1 -0
- aiecs/tools/__init__.py +23 -23
- aiecs/tools/docs/__init__.py +5 -2
- aiecs/tools/docs/ai_document_orchestrator.py +39 -26
- aiecs/tools/docs/ai_document_writer_orchestrator.py +61 -38
- aiecs/tools/docs/content_insertion_tool.py +48 -28
- aiecs/tools/docs/document_creator_tool.py +47 -29
- aiecs/tools/docs/document_layout_tool.py +35 -20
- aiecs/tools/docs/document_parser_tool.py +56 -36
- aiecs/tools/docs/document_writer_tool.py +115 -62
- aiecs/tools/schema_generator.py +56 -56
- aiecs/tools/statistics/__init__.py +82 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
- aiecs/tools/statistics/data_loader_tool.py +518 -0
- aiecs/tools/statistics/data_profiler_tool.py +599 -0
- aiecs/tools/statistics/data_transformer_tool.py +531 -0
- aiecs/tools/statistics/data_visualizer_tool.py +460 -0
- aiecs/tools/statistics/model_trainer_tool.py +470 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
- aiecs/tools/task_tools/chart_tool.py +2 -1
- aiecs/tools/task_tools/image_tool.py +43 -43
- aiecs/tools/task_tools/office_tool.py +39 -36
- aiecs/tools/task_tools/pandas_tool.py +37 -33
- aiecs/tools/task_tools/report_tool.py +67 -56
- aiecs/tools/task_tools/research_tool.py +32 -31
- aiecs/tools/task_tools/scraper_tool.py +53 -46
- aiecs/tools/task_tools/search_tool.py +1123 -0
- aiecs/tools/task_tools/stats_tool.py +20 -15
- aiecs/tools/tool_executor/__init__.py +2 -2
- aiecs/tools/tool_executor/tool_executor.py +3 -3
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/METADATA +5 -1
- aiecs-1.2.1.dist-info/RECORD +144 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/entry_points.txt +1 -0
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.1.0.dist-info/RECORD +0 -114
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/WHEEL +0 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.1.0.dist-info → aiecs-1.2.1.dist-info}/top_level.txt +0 -0
|
@@ -12,8 +12,7 @@ from urllib.parse import urlparse, urljoin
|
|
|
12
12
|
import httpx
|
|
13
13
|
from bs4 import BeautifulSoup
|
|
14
14
|
from urllib import request as urllib_request
|
|
15
|
-
from pydantic import BaseModel, ValidationError, ConfigDict
|
|
16
|
-
from pydantic_settings import BaseSettings
|
|
15
|
+
from pydantic import BaseModel, ValidationError, ConfigDict, Field
|
|
17
16
|
|
|
18
17
|
from aiecs.tools.base_tool import BaseTool
|
|
19
18
|
from aiecs.tools import register_tool
|
|
@@ -45,31 +44,6 @@ class RenderEngine(str, Enum):
|
|
|
45
44
|
NONE = "none"
|
|
46
45
|
PLAYWRIGHT = "playwright"
|
|
47
46
|
|
|
48
|
-
# Global settings
|
|
49
|
-
class ScraperSettings(BaseModel):
|
|
50
|
-
"""
|
|
51
|
-
Configuration for ScraperTool.
|
|
52
|
-
|
|
53
|
-
Attributes:
|
|
54
|
-
user_agent (str): User agent for HTTP requests.
|
|
55
|
-
max_content_length (int): Maximum content length in bytes.
|
|
56
|
-
output_dir (str): Directory for output files.
|
|
57
|
-
scrapy_command (str): Command to run Scrapy.
|
|
58
|
-
allowed_domains (List[str]): Allowed domains for scraping.
|
|
59
|
-
blocked_domains (List[str]): Blocked domains for scraping.
|
|
60
|
-
playwright_available (bool): Whether Playwright is available.
|
|
61
|
-
env_prefix (str): Environment variable prefix.
|
|
62
|
-
"""
|
|
63
|
-
user_agent: str = "PythonMiddlewareScraper/2.0"
|
|
64
|
-
max_content_length: int = 10 * 1024 * 1024 # 10MB
|
|
65
|
-
output_dir: str = os.path.join(tempfile.gettempdir(), 'scraper_outputs')
|
|
66
|
-
scrapy_command: str = "scrapy"
|
|
67
|
-
allowed_domains: List[str] = []
|
|
68
|
-
blocked_domains: List[str] = []
|
|
69
|
-
playwright_available: bool = False
|
|
70
|
-
env_prefix: str = "SCRAPER_TOOL_"
|
|
71
|
-
|
|
72
|
-
model_config = ConfigDict(env_prefix="SCRAPER_TOOL_")
|
|
73
47
|
|
|
74
48
|
# Exceptions
|
|
75
49
|
class ScraperToolError(Exception):
|
|
@@ -117,39 +91,72 @@ class ScraperTool(BaseTool):
|
|
|
117
91
|
- Scrapy integration for advanced crawling
|
|
118
92
|
- Output in various formats: text, JSON, HTML, Markdown, CSV
|
|
119
93
|
"""
|
|
94
|
+
|
|
95
|
+
# Configuration schema
|
|
96
|
+
class Config(BaseModel):
|
|
97
|
+
"""Configuration for the scraper tool"""
|
|
98
|
+
model_config = ConfigDict(env_prefix="SCRAPER_TOOL_")
|
|
99
|
+
|
|
100
|
+
user_agent: str = Field(
|
|
101
|
+
default="PythonMiddlewareScraper/2.0",
|
|
102
|
+
description="User agent for HTTP requests"
|
|
103
|
+
)
|
|
104
|
+
max_content_length: int = Field(
|
|
105
|
+
default=10 * 1024 * 1024,
|
|
106
|
+
description="Maximum content length in bytes"
|
|
107
|
+
)
|
|
108
|
+
output_dir: str = Field(
|
|
109
|
+
default=os.path.join(tempfile.gettempdir(), 'scraper_outputs'),
|
|
110
|
+
description="Directory for output files"
|
|
111
|
+
)
|
|
112
|
+
scrapy_command: str = Field(
|
|
113
|
+
default="scrapy",
|
|
114
|
+
description="Command to run Scrapy"
|
|
115
|
+
)
|
|
116
|
+
allowed_domains: List[str] = Field(
|
|
117
|
+
default=[],
|
|
118
|
+
description="Allowed domains for scraping"
|
|
119
|
+
)
|
|
120
|
+
blocked_domains: List[str] = Field(
|
|
121
|
+
default=[],
|
|
122
|
+
description="Blocked domains for scraping"
|
|
123
|
+
)
|
|
124
|
+
playwright_available: bool = Field(
|
|
125
|
+
default=False,
|
|
126
|
+
description="Whether Playwright is available (auto-detected)"
|
|
127
|
+
)
|
|
128
|
+
|
|
120
129
|
def __init__(self, config: Optional[Dict] = None):
|
|
121
130
|
"""
|
|
122
131
|
Initialize ScraperTool with settings and resources.
|
|
123
132
|
|
|
124
133
|
Args:
|
|
125
|
-
config (Dict, optional): Configuration overrides for
|
|
134
|
+
config (Dict, optional): Configuration overrides for ScraperTool.
|
|
126
135
|
|
|
127
136
|
Raises:
|
|
128
137
|
ValueError: If config contains invalid settings.
|
|
129
138
|
"""
|
|
130
139
|
super().__init__(config)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
except ValidationError as e:
|
|
136
|
-
raise ValueError(f"Invalid settings: {e}")
|
|
140
|
+
|
|
141
|
+
# Parse configuration
|
|
142
|
+
self.config = self.Config(**(config or {}))
|
|
143
|
+
|
|
137
144
|
self.logger = logging.getLogger(__name__)
|
|
138
145
|
if not self.logger.handlers:
|
|
139
146
|
handler = logging.StreamHandler()
|
|
140
147
|
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
|
|
141
148
|
self.logger.addHandler(handler)
|
|
142
149
|
self.logger.setLevel(logging.INFO)
|
|
143
|
-
os.makedirs(self.
|
|
150
|
+
os.makedirs(self.config.output_dir, exist_ok=True)
|
|
144
151
|
self._check_external_tools()
|
|
145
152
|
|
|
146
153
|
def _check_external_tools(self):
|
|
147
154
|
"""Check if external tools are available."""
|
|
148
155
|
try:
|
|
149
156
|
import playwright
|
|
150
|
-
self.
|
|
157
|
+
self.config.playwright_available = True
|
|
151
158
|
except ImportError:
|
|
152
|
-
self.
|
|
159
|
+
self.config.playwright_available = False
|
|
153
160
|
|
|
154
161
|
|
|
155
162
|
async def _save_output(self, content: Any, path: str, format: OutputFormat) -> None:
|
|
@@ -232,7 +239,7 @@ class ScraperTool(BaseTool):
|
|
|
232
239
|
try:
|
|
233
240
|
headers = headers or {}
|
|
234
241
|
if 'User-Agent' not in headers:
|
|
235
|
-
headers['User-Agent'] = self.
|
|
242
|
+
headers['User-Agent'] = self.config.user_agent
|
|
236
243
|
kwargs = {
|
|
237
244
|
'params': params,
|
|
238
245
|
'headers': headers,
|
|
@@ -261,7 +268,7 @@ class ScraperTool(BaseTool):
|
|
|
261
268
|
except httpx.HTTPStatusError as e:
|
|
262
269
|
raise HttpError(f"HTTP {e.response.status_code}: {e.response.reason_phrase} for {url}")
|
|
263
270
|
|
|
264
|
-
if len(resp.content) > self.
|
|
271
|
+
if len(resp.content) > self.config.max_content_length:
|
|
265
272
|
raise HttpError(f"Response content too large: {len(resp.content)} bytes")
|
|
266
273
|
|
|
267
274
|
if content_type == ContentType.JSON:
|
|
@@ -308,7 +315,7 @@ class ScraperTool(BaseTool):
|
|
|
308
315
|
|
|
309
316
|
headers = headers or {}
|
|
310
317
|
if 'User-Agent' not in headers:
|
|
311
|
-
headers['User-Agent'] = self.
|
|
318
|
+
headers['User-Agent'] = self.config.user_agent
|
|
312
319
|
data_bytes = None
|
|
313
320
|
if data:
|
|
314
321
|
data_bytes = urllib.parse.urlencode(data).encode()
|
|
@@ -320,7 +327,7 @@ class ScraperTool(BaseTool):
|
|
|
320
327
|
)
|
|
321
328
|
with urllib_request.urlopen(req) as resp:
|
|
322
329
|
content_length = resp.getheader('Content-Length')
|
|
323
|
-
if content_length and int(content_length) > self.
|
|
330
|
+
if content_length and int(content_length) > self.config.max_content_length:
|
|
324
331
|
raise HttpError(f"Response content too large: {content_length} bytes")
|
|
325
332
|
content = resp.read()
|
|
326
333
|
charset = resp.headers.get_content_charset() or 'utf-8'
|
|
@@ -375,7 +382,7 @@ class ScraperTool(BaseTool):
|
|
|
375
382
|
"""
|
|
376
383
|
try:
|
|
377
384
|
if engine == RenderEngine.PLAYWRIGHT:
|
|
378
|
-
if not self.
|
|
385
|
+
if not self.config.playwright_available:
|
|
379
386
|
raise RenderingError("Playwright is not available. Install with 'pip install playwright'")
|
|
380
387
|
result = await self._render_with_playwright(url, wait_time, wait_selector, scroll_to_bottom, screenshot, screenshot_path)
|
|
381
388
|
else:
|
|
@@ -393,7 +400,7 @@ class ScraperTool(BaseTool):
|
|
|
393
400
|
async with async_playwright() as p:
|
|
394
401
|
browser = await p.chromium.launch()
|
|
395
402
|
page = await browser.new_page(
|
|
396
|
-
user_agent=self.
|
|
403
|
+
user_agent=self.config.user_agent,
|
|
397
404
|
viewport={'width': 1280, 'height': 800}
|
|
398
405
|
)
|
|
399
406
|
try:
|
|
@@ -407,7 +414,7 @@ class ScraperTool(BaseTool):
|
|
|
407
414
|
await page.wait_for_timeout(1000)
|
|
408
415
|
screenshot_result = None
|
|
409
416
|
if screenshot:
|
|
410
|
-
screenshot_path = screenshot_path or os.path.join(self.
|
|
417
|
+
screenshot_path = screenshot_path or os.path.join(self.config.output_dir, f"screenshot_{int(time.time())}.png")
|
|
411
418
|
os.makedirs(os.path.dirname(os.path.abspath(screenshot_path)), exist_ok=True)
|
|
412
419
|
await page.screenshot(path=screenshot_path)
|
|
413
420
|
screenshot_result = screenshot_path
|
|
@@ -447,10 +454,10 @@ class ScraperTool(BaseTool):
|
|
|
447
454
|
start_time = time.time()
|
|
448
455
|
os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
|
|
449
456
|
cmd = [
|
|
450
|
-
self.
|
|
457
|
+
self.config.scrapy_command,
|
|
451
458
|
'crawl', spider_name,
|
|
452
459
|
'-o', output_path,
|
|
453
|
-
'-s', f'USER_AGENT={self.
|
|
460
|
+
'-s', f'USER_AGENT={self.config.user_agent}',
|
|
454
461
|
'-s', 'LOG_LEVEL=INFO'
|
|
455
462
|
]
|
|
456
463
|
if spider_args:
|