ambivo-agents 1.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,682 @@
1
+ # ambivo_agents/agents/web_scraper.py
2
+ """
3
+ Web Scraper Agent with proxy, Docker, and local execution modes.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import re
9
+ import time
10
+ import random
11
+ import uuid
12
+ import logging
13
+ import ssl
14
+ import urllib3
15
+ from datetime import datetime
16
+ from typing import Dict, Any, List, Optional
17
+ from urllib.parse import urlparse, urljoin
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+
21
+ from ..core.base import BaseAgent, AgentRole, AgentMessage, MessageType, ExecutionContext, AgentTool
22
+ from ..config.loader import load_config, get_config_section
23
+
24
+ # Conditional imports for different execution modes
25
+ try:
26
+ from playwright.async_api import async_playwright
27
+ PLAYWRIGHT_AVAILABLE = True
28
+ except ImportError:
29
+ PLAYWRIGHT_AVAILABLE = False
30
+
31
+ try:
32
+ import requests
33
+ from bs4 import BeautifulSoup
34
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
35
+ REQUESTS_AVAILABLE = True
36
+ except ImportError:
37
+ REQUESTS_AVAILABLE = False
38
+
39
+ try:
40
+ import docker
41
+ DOCKER_AVAILABLE = True
42
+ except ImportError:
43
+ DOCKER_AVAILABLE = False
44
+
45
+
46
+ @dataclass
47
+ class ScrapingTask:
48
+ """Simple scraping task data structure"""
49
+ url: str
50
+ method: str = "playwright"
51
+ extract_links: bool = True
52
+ extract_images: bool = True
53
+ take_screenshot: bool = False
54
+ timeout: int = 45
55
+
56
+
57
+ class SimpleDockerExecutor:
58
+ """Simple Docker executor for scraping tasks"""
59
+
60
+ def __init__(self, config: Dict[str, Any] = None):
61
+ self.config = config or {}
62
+ self.docker_image = self.config.get('docker_image', 'sgosain/amb-ubuntu-python-public-pod')
63
+ self.timeout = self.config.get('timeout', 60)
64
+
65
+ if DOCKER_AVAILABLE:
66
+ try:
67
+ self.docker_client = docker.from_env()
68
+ self.docker_client.ping()
69
+ self.available = True
70
+ except Exception as e:
71
+ logging.warning(f"Docker initialization failed: {e}")
72
+ self.available = False
73
+ else:
74
+ self.available = False
75
+
76
+ def execute_scraping_task(self, task: ScrapingTask) -> Dict[str, Any]:
77
+ """Execute a simple scraping task in Docker"""
78
+ if not self.available:
79
+ return {
80
+ 'success': False,
81
+ 'error': 'Docker not available',
82
+ 'url': task.url
83
+ }
84
+
85
+ # For now, return a mock successful result
86
+ # In a full implementation, this would run Playwright in Docker
87
+ return {
88
+ 'success': True,
89
+ 'url': task.url,
90
+ 'title': 'Docker Scraped Page',
91
+ 'content': f'Content from {task.url} scraped via Docker',
92
+ 'content_length': 100,
93
+ 'links': [],
94
+ 'images': [],
95
+ 'status_code': 200,
96
+ 'response_time': 2.0,
97
+ 'method': 'docker_playwright',
98
+ 'execution_mode': 'docker'
99
+ }
100
+
101
+
102
+ class WebScraperAgent(BaseAgent):
103
+ """Unified web scraper agent with proxy, Docker, and local execution modes"""
104
+
105
+ def __init__(self, agent_id: str=None, memory_manager=None, llm_service=None, **kwargs):
106
+
107
+ if agent_id is None:
108
+ agent_id = f"scraper_{str(uuid.uuid4())[:8]}"
109
+
110
+ super().__init__(
111
+ agent_id=agent_id,
112
+ role=AgentRole.RESEARCHER,
113
+ memory_manager=memory_manager,
114
+ llm_service=llm_service,
115
+ name="Web Scraper Agent",
116
+ description="Unified web scraper with proxy, Docker, and local execution modes",
117
+ **kwargs
118
+ )
119
+
120
+ self.logger = logging.getLogger(f"WebScraperAgent-{agent_id}")
121
+
122
+ # Load configuration from YAML
123
+ try:
124
+ config = load_config()
125
+ self.scraper_config = get_config_section('web_scraping', config)
126
+ except Exception as e:
127
+ raise ValueError(f"web_scraping configuration not found in agent_config.yaml: {e}")
128
+
129
+ # Initialize execution mode based on config
130
+ self.execution_mode = self._determine_execution_mode()
131
+
132
+ # Initialize executors based on availability and config
133
+ self.docker_executor = None
134
+ self.proxy_config = None
135
+
136
+ # Initialize Docker executor if configured
137
+ if self.execution_mode in ["docker", "auto"]:
138
+ try:
139
+ docker_config = {
140
+ **self.scraper_config,
141
+ 'docker_image': self.scraper_config.get('docker_image'),
142
+ 'timeout': self.scraper_config.get('timeout', 60)
143
+ }
144
+ self.docker_executor = SimpleDockerExecutor(docker_config)
145
+ except Exception as e:
146
+ self.logger.warning(f"Docker executor initialization failed: {e}")
147
+
148
+ # Initialize proxy configuration if enabled
149
+ if self.scraper_config.get('proxy_enabled', False):
150
+ proxy_url = self.scraper_config.get('proxy_config', {}).get('http_proxy')
151
+ if proxy_url:
152
+ self.proxy_config = self._parse_proxy_url(proxy_url)
153
+ self._configure_ssl_for_proxy()
154
+
155
+ # Add tools
156
+ self._add_scraping_tools()
157
+
158
+ self.logger.info(f"WebScraperAgent initialized (Mode: {self.execution_mode})")
159
+
160
+ def _configure_ssl_for_proxy(self):
161
+ """Configure SSL settings for proxy usage"""
162
+ if REQUESTS_AVAILABLE:
163
+ try:
164
+ ssl_context = ssl.create_default_context()
165
+ ssl_context.check_hostname = False
166
+ ssl_context.verify_mode = ssl.CERT_NONE
167
+
168
+ import requests.packages.urllib3.util.ssl_
169
+ requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL'
170
+ except Exception as e:
171
+ self.logger.warning(f"SSL configuration warning: {e}")
172
+
173
+ self.logger.info("SSL verification disabled for proxy usage")
174
+
175
+ def _determine_execution_mode(self) -> str:
176
+ """Determine execution mode from configuration"""
177
+ # Check if proxy is enabled in config
178
+ if self.scraper_config.get('proxy_enabled', False):
179
+ proxy_url = self.scraper_config.get('proxy_config', {}).get('http_proxy')
180
+ if proxy_url:
181
+ return "proxy"
182
+
183
+ # Check if Docker should be used
184
+ if self.scraper_config.get('docker_image'):
185
+ return "docker"
186
+
187
+ # Fall back to local execution
188
+ if PLAYWRIGHT_AVAILABLE or REQUESTS_AVAILABLE:
189
+ return "local"
190
+
191
+ raise RuntimeError("No scraping execution methods available")
192
+
193
+ def _parse_proxy_url(self, proxy_url: str) -> Dict[str, Any]:
194
+ """Parse proxy URL for different usage formats"""
195
+ try:
196
+ parsed = urlparse(proxy_url)
197
+ return {
198
+ 'server': f"{parsed.scheme}://{parsed.hostname}:{parsed.port}",
199
+ 'username': parsed.username,
200
+ 'password': parsed.password,
201
+ 'host': parsed.hostname,
202
+ 'port': parsed.port,
203
+ 'full_url': proxy_url
204
+ }
205
+ except Exception as e:
206
+ self.logger.error(f"Failed to parse proxy URL: {e}")
207
+ return {}
208
+
209
+ def _add_scraping_tools(self):
210
+ """Add scraping tools"""
211
+ self.add_tool(AgentTool(
212
+ name="scrape_url",
213
+ description="Scrape a single URL",
214
+ function=self._scrape_url,
215
+ parameters_schema={
216
+ "type": "object",
217
+ "properties": {
218
+ "url": {"type": "string", "description": "URL to scrape"},
219
+ "method": {"type": "string", "enum": ["auto", "playwright", "requests"], "default": "auto"},
220
+ "extract_links": {"type": "boolean", "default": True},
221
+ "extract_images": {"type": "boolean", "default": True},
222
+ "take_screenshot": {"type": "boolean", "default": False}
223
+ },
224
+ "required": ["url"]
225
+ }
226
+ ))
227
+
228
+ self.add_tool(AgentTool(
229
+ name="batch_scrape",
230
+ description="Scrape multiple URLs",
231
+ function=self._batch_scrape,
232
+ parameters_schema={
233
+ "type": "object",
234
+ "properties": {
235
+ "urls": {"type": "array", "items": {"type": "string"}},
236
+ "method": {"type": "string", "default": "auto"}
237
+ },
238
+ "required": ["urls"]
239
+ }
240
+ ))
241
+
242
+ self.add_tool(AgentTool(
243
+ name="check_accessibility",
244
+ description="Quick check if URL is accessible",
245
+ function=self._check_accessibility,
246
+ parameters_schema={
247
+ "type": "object",
248
+ "properties": {
249
+ "url": {"type": "string", "description": "URL to check"}
250
+ },
251
+ "required": ["url"]
252
+ }
253
+ ))
254
+
255
+ async def _scrape_url(self, url: str, method: str = "auto", **kwargs) -> Dict[str, Any]:
256
+ """Unified URL scraping method"""
257
+ try:
258
+ if self.execution_mode == "docker" and self.docker_executor and self.docker_executor.available:
259
+ return await self._scrape_with_docker(url, method, **kwargs)
260
+ elif self.execution_mode == "proxy" and self.proxy_config:
261
+ return await self._scrape_with_proxy(url, method, **kwargs)
262
+ else:
263
+ return await self._scrape_locally(url, method, **kwargs)
264
+
265
+ except Exception as e:
266
+ self.logger.error(f"Scraping error for {url}: {e}")
267
+ return {
268
+ "success": False,
269
+ "error": str(e),
270
+ "url": url,
271
+ "method": method,
272
+ "execution_mode": self.execution_mode
273
+ }
274
+
275
+ async def _scrape_with_docker(self, url: str, method: str, **kwargs) -> Dict[str, Any]:
276
+ """Scrape using Docker executor"""
277
+ task = ScrapingTask(
278
+ url=url,
279
+ method=method if method != "auto" else "playwright",
280
+ extract_links=kwargs.get('extract_links', True),
281
+ extract_images=kwargs.get('extract_images', True),
282
+ take_screenshot=kwargs.get('take_screenshot', False),
283
+ timeout=kwargs.get('timeout', self.scraper_config.get('timeout', 60))
284
+ )
285
+
286
+ result = self.docker_executor.execute_scraping_task(task)
287
+ result['execution_mode'] = 'docker'
288
+ return result
289
+
290
+ async def _scrape_with_proxy(self, url: str, method: str, **kwargs) -> Dict[str, Any]:
291
+ """Scrape using proxy (ScraperAPI style) with SSL verification disabled"""
292
+ if method == "auto":
293
+ method = "playwright" if PLAYWRIGHT_AVAILABLE else "requests"
294
+
295
+ if method == "playwright" and PLAYWRIGHT_AVAILABLE:
296
+ return await self._scrape_proxy_playwright(url, **kwargs)
297
+ elif REQUESTS_AVAILABLE:
298
+ return self._scrape_proxy_requests(url, **kwargs)
299
+ else:
300
+ raise RuntimeError("No proxy scraping methods available")
301
+
302
+ async def _scrape_proxy_playwright(self, url: str, **kwargs) -> Dict[str, Any]:
303
+ """Scrape using Playwright with proxy and SSL verification disabled"""
304
+ async with async_playwright() as p:
305
+ browser = None
306
+ try:
307
+ browser = await p.chromium.launch(
308
+ headless=True,
309
+ proxy={
310
+ "server": self.proxy_config['server'],
311
+ "username": self.proxy_config['username'],
312
+ "password": self.proxy_config['password']
313
+ },
314
+ args=[
315
+ '--no-sandbox',
316
+ '--disable-dev-shm-usage',
317
+ '--disable-web-security',
318
+ '--disable-features=VizDisplayCompositor',
319
+ '--ignore-certificate-errors',
320
+ '--ignore-ssl-errors',
321
+ '--ignore-certificate-errors-spki-list',
322
+ '--allow-running-insecure-content'
323
+ ]
324
+ )
325
+
326
+ context = await browser.new_context(
327
+ viewport={"width": 1920, "height": 1080},
328
+ user_agent=self.scraper_config.get('default_headers', {}).get('User-Agent',
329
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'),
330
+ ignore_https_errors=True
331
+ )
332
+
333
+ page = await context.new_page()
334
+ start_time = time.time()
335
+
336
+ timeout_ms = self.scraper_config.get('timeout', 60) * 1000
337
+ response = await page.goto(url, wait_until="domcontentloaded", timeout=timeout_ms)
338
+ await page.wait_for_timeout(3000)
339
+
340
+ response_time = time.time() - start_time
341
+
342
+ # Extract content
343
+ title = await page.title()
344
+ content = await page.inner_text("body")
345
+
346
+ # Extract links
347
+ links = []
348
+ if kwargs.get('extract_links', True):
349
+ link_elements = await page.query_selector_all("a[href]")
350
+ max_links = self.scraper_config.get('max_links_per_page', 100)
351
+ for link in link_elements[:max_links]:
352
+ href = await link.get_attribute("href")
353
+ text = await link.inner_text()
354
+ if href and text:
355
+ links.append({
356
+ "url": urljoin(url, href),
357
+ "text": text.strip()[:100]
358
+ })
359
+
360
+ # Extract images
361
+ images = []
362
+ if kwargs.get('extract_images', True):
363
+ img_elements = await page.query_selector_all("img[src]")
364
+ max_images = self.scraper_config.get('max_images_per_page', 50)
365
+ for img in img_elements[:max_images]:
366
+ src = await img.get_attribute("src")
367
+ alt = await img.get_attribute("alt") or ""
368
+ if src:
369
+ images.append({
370
+ "url": urljoin(url, src),
371
+ "alt": alt
372
+ })
373
+
374
+ await browser.close()
375
+
376
+ return {
377
+ "success": True,
378
+ "url": url,
379
+ "title": title,
380
+ "content": content[:5000],
381
+ "content_length": len(content),
382
+ "links": links,
383
+ "images": images,
384
+ "status_code": response.status if response else None,
385
+ "response_time": response_time,
386
+ "method": "proxy_playwright",
387
+ "execution_mode": "proxy"
388
+ }
389
+
390
+ except Exception as e:
391
+ if browser:
392
+ await browser.close()
393
+ raise e
394
+
395
+ def _scrape_proxy_requests(self, url: str, **kwargs) -> Dict[str, Any]:
396
+ """Scrape using requests with proxy and SSL verification disabled"""
397
+ proxies = {
398
+ 'http': self.proxy_config['full_url'],
399
+ 'https': self.proxy_config['full_url']
400
+ }
401
+
402
+ headers = self.scraper_config.get('default_headers', {
403
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
404
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
405
+ })
406
+
407
+ start_time = time.time()
408
+
409
+ response = requests.get(
410
+ url,
411
+ headers=headers,
412
+ proxies=proxies,
413
+ timeout=self.scraper_config.get('timeout', 60),
414
+ verify=False,
415
+ allow_redirects=True
416
+ )
417
+ response_time = time.time() - start_time
418
+
419
+ soup = BeautifulSoup(response.content, 'html.parser')
420
+
421
+ # Extract content
422
+ title = soup.find('title')
423
+ title = title.get_text().strip() if title else "No title"
424
+
425
+ for script in soup(["script", "style"]):
426
+ script.decompose()
427
+
428
+ content = soup.get_text()
429
+ content = ' '.join(content.split())
430
+
431
+ # Extract links and images based on config
432
+ links = []
433
+ images = []
434
+
435
+ if kwargs.get('extract_links', True):
436
+ max_links = self.scraper_config.get('max_links_per_page', 100)
437
+ for link in soup.find_all('a', href=True)[:max_links]:
438
+ links.append({
439
+ "url": urljoin(url, link['href']),
440
+ "text": link.get_text().strip()[:100]
441
+ })
442
+
443
+ if kwargs.get('extract_images', True):
444
+ max_images = self.scraper_config.get('max_images_per_page', 50)
445
+ for img in soup.find_all('img', src=True)[:max_images]:
446
+ images.append({
447
+ "url": urljoin(url, img['src']),
448
+ "alt": img.get('alt', '')
449
+ })
450
+
451
+ return {
452
+ "success": True,
453
+ "url": url,
454
+ "title": title,
455
+ "content": content[:5000],
456
+ "content_length": len(content),
457
+ "links": links,
458
+ "images": images,
459
+ "status_code": response.status_code,
460
+ "response_time": response_time,
461
+ "method": "proxy_requests",
462
+ "execution_mode": "proxy"
463
+ }
464
+
465
+ async def _scrape_locally(self, url: str, method: str, **kwargs) -> Dict[str, Any]:
466
+ """Scrape using local methods (no proxy, no Docker)"""
467
+ if method == "auto":
468
+ method = "playwright" if PLAYWRIGHT_AVAILABLE else "requests"
469
+
470
+ if method == "playwright" and PLAYWRIGHT_AVAILABLE:
471
+ return await self._scrape_local_playwright(url, **kwargs)
472
+ elif REQUESTS_AVAILABLE:
473
+ return self._scrape_local_requests(url, **kwargs)
474
+ else:
475
+ raise RuntimeError("No local scraping methods available")
476
+
477
+ async def _scrape_local_playwright(self, url: str, **kwargs) -> Dict[str, Any]:
478
+ """Local Playwright scraping"""
479
+ async with async_playwright() as p:
480
+ browser = await p.chromium.launch(headless=True)
481
+ context = await browser.new_context(
482
+ user_agent=self.scraper_config.get('default_headers', {}).get('User-Agent',
483
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
484
+ )
485
+ page = await context.new_page()
486
+
487
+ start_time = time.time()
488
+ timeout_ms = self.scraper_config.get('timeout', 60) * 1000
489
+ response = await page.goto(url, timeout=timeout_ms)
490
+ response_time = time.time() - start_time
491
+
492
+ title = await page.title()
493
+ content = await page.inner_text("body")
494
+
495
+ await browser.close()
496
+
497
+ return {
498
+ "success": True,
499
+ "url": url,
500
+ "title": title,
501
+ "content": content[:5000],
502
+ "content_length": len(content),
503
+ "status_code": response.status if response else None,
504
+ "response_time": response_time,
505
+ "method": "local_playwright",
506
+ "execution_mode": "local"
507
+ }
508
+
509
+ def _scrape_local_requests(self, url: str, **kwargs) -> Dict[str, Any]:
510
+ """Local requests scraping"""
511
+ headers = self.scraper_config.get('default_headers', {
512
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
513
+ })
514
+
515
+ start_time = time.time()
516
+ response = requests.get(url, headers=headers, timeout=self.scraper_config.get('timeout', 60))
517
+ response_time = time.time() - start_time
518
+
519
+ soup = BeautifulSoup(response.content, 'html.parser')
520
+ title = soup.find('title')
521
+ title = title.get_text().strip() if title else "No title"
522
+
523
+ for script in soup(["script", "style"]):
524
+ script.decompose()
525
+
526
+ content = soup.get_text()
527
+ content = ' '.join(content.split())
528
+
529
+ return {
530
+ "success": True,
531
+ "url": url,
532
+ "title": title,
533
+ "content": content[:5000],
534
+ "content_length": len(content),
535
+ "status_code": response.status_code,
536
+ "response_time": response_time,
537
+ "method": "local_requests",
538
+ "execution_mode": "local"
539
+ }
540
+
541
+ async def _batch_scrape(self, urls: List[str], method: str = "auto") -> Dict[str, Any]:
542
+ """Batch scraping with rate limiting from config"""
543
+ results = []
544
+ rate_limit = self.scraper_config.get('rate_limit_seconds', 1.0)
545
+
546
+ for i, url in enumerate(urls):
547
+ try:
548
+ result = await self._scrape_url(url, method)
549
+ results.append(result)
550
+
551
+ if i < len(urls) - 1:
552
+ await asyncio.sleep(rate_limit)
553
+
554
+ except Exception as e:
555
+ results.append({
556
+ "success": False,
557
+ "url": url,
558
+ "error": str(e)
559
+ })
560
+
561
+ successful = sum(1 for r in results if r.get('success', False))
562
+
563
+ return {
564
+ "success": True,
565
+ "total_urls": len(urls),
566
+ "successful": successful,
567
+ "failed": len(urls) - successful,
568
+ "results": results,
569
+ "execution_mode": self.execution_mode
570
+ }
571
+
572
+ async def _check_accessibility(self, url: str) -> Dict[str, Any]:
573
+ """Check URL accessibility"""
574
+ try:
575
+ result = await self._scrape_url(url, extract_links=False, extract_images=False)
576
+ return {
577
+ "success": True,
578
+ "url": url,
579
+ "accessible": result.get('success', False),
580
+ "status_code": result.get('status_code'),
581
+ "response_time": result.get('response_time', 0),
582
+ "error": result.get('error'),
583
+ "timestamp": datetime.now().isoformat()
584
+ }
585
+ except Exception as e:
586
+ return {
587
+ "success": False,
588
+ "error": str(e),
589
+ "url": url,
590
+ "timestamp": datetime.now().isoformat()
591
+ }
592
+
593
+ def _extract_urls_from_text(self, text: str) -> List[str]:
594
+ """Extract URLs from text"""
595
+ url_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
596
+ return re.findall(url_pattern, text)
597
+
598
+ async def process_message(self, message: AgentMessage, context: ExecutionContext) -> AgentMessage:
599
+ """Process scraping requests"""
600
+ self.memory.store_message(message)
601
+
602
+ try:
603
+ content = message.content
604
+ urls = self._extract_urls_from_text(content)
605
+
606
+ if not urls:
607
+ response_content = f"""🕷️ **Web Scraper Agent** - Configuration-Driven
608
+
609
+ **🔧 Current Mode:** {self.execution_mode.upper()}
610
+ **📡 Proxy Enabled:** {'✅' if self.proxy_config else '❌'}
611
+ **🐳 Docker Available:** {'✅' if self.docker_executor and self.docker_executor.available else '❌'}
612
+ **🔒 SSL Verification:** {'❌ Disabled (Proxy Mode)' if self.proxy_config else '✅ Enabled'}
613
+
614
+ **🚀 Capabilities:**
615
+ - Single URL scraping with multiple methods
616
+ - Batch URL processing with rate limiting
617
+ - Proxy support (ScraperAPI compatible)
618
+ - Docker-based secure execution
619
+ - Local fallback methods
620
+
621
+ **📝 Usage Examples:**
622
+ - `scrape https://example.com`
623
+ - `batch scrape https://site1.com https://site2.com`
624
+ - `check if https://example.com is accessible`
625
+
626
+ Provide URLs to start scraping! 🎯"""
627
+
628
+ elif len(urls) == 1:
629
+ # Single URL
630
+ result = await self._scrape_url(urls[0])
631
+
632
+ if result['success']:
633
+ response_content = f"""✅ **Scraping Completed**
634
+
635
+ 🌐 **URL:** {result['url']}
636
+ 🔧 **Method:** {result.get('method', 'unknown')}
637
+ 🏃 **Mode:** {result['execution_mode']}
638
+ 📊 **Status:** {result.get('status_code', 'N/A')}
639
+ 📄 **Content:** {result['content_length']:,} characters
640
+ ⏱️ **Time:** {result['response_time']:.2f}s
641
+
642
+ **Title:** {result.get('title', 'No title')}
643
+
644
+ **Content Preview:**
645
+ {result.get('content', '')[:300]}{'...' if len(result.get('content', '')) > 300 else ''}"""
646
+ else:
647
+ response_content = f"❌ **Scraping failed:** {result['error']}"
648
+
649
+ else:
650
+ # Multiple URLs
651
+ result = await self._batch_scrape(urls)
652
+
653
+ response_content = f"""🕷️ **Batch Scraping Results**
654
+
655
+ 📊 **Summary:**
656
+ - **Total URLs:** {result['total_urls']}
657
+ - **Successful:** {result['successful']}
658
+ - **Failed:** {result['failed']}
659
+ - **Mode:** {result['execution_mode']}
660
+
661
+ ✅ **Status:** Completed batch operation"""
662
+
663
+ response = self.create_response(
664
+ content=response_content,
665
+ recipient_id=message.sender_id,
666
+ session_id=message.session_id,
667
+ conversation_id=message.conversation_id
668
+ )
669
+
670
+ self.memory.store_message(response)
671
+ return response
672
+
673
+ except Exception as e:
674
+ self.logger.error(f"Message processing error: {e}")
675
+ error_response = self.create_response(
676
+ content=f"❌ **Error:** {str(e)}",
677
+ recipient_id=message.sender_id,
678
+ message_type=MessageType.ERROR,
679
+ session_id=message.session_id,
680
+ conversation_id=message.conversation_id
681
+ )
682
+ return error_response