semantio 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. {semantio-0.0.4 → semantio-0.0.6}/LICENSE +1 -1
  2. {semantio-0.0.4 → semantio-0.0.6}/PKG-INFO +1 -1
  3. semantio-0.0.6/semantio/__init__.py +4 -0
  4. {semantio-0.0.4 → semantio-0.0.6}/semantio/agent.py +86 -23
  5. semantio-0.0.6/semantio/memory.py +54 -0
  6. semantio-0.0.6/semantio/models.py +9 -0
  7. semantio-0.0.6/semantio/storage/__init__.py +5 -0
  8. semantio-0.0.6/semantio/storage/base_storage.py +12 -0
  9. semantio-0.0.6/semantio/storage/in_memory_storage.py +14 -0
  10. semantio-0.0.6/semantio/storage/local_storage.py +29 -0
  11. semantio-0.0.6/semantio/tools/web_browser.py +439 -0
  12. {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/PKG-INFO +1 -1
  13. {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/SOURCES.txt +3 -0
  14. {semantio-0.0.4 → semantio-0.0.6}/setup.py +1 -1
  15. semantio-0.0.4/semantio/memory.py +0 -11
  16. semantio-0.0.4/semantio/storage/__init__.py +0 -0
  17. semantio-0.0.4/semantio/storage/local_storage.py +0 -0
  18. semantio-0.0.4/semantio/tools/__init__.py +0 -0
  19. semantio-0.0.4/semantio/tools/web_browser.py +0 -271
  20. {semantio-0.0.4 → semantio-0.0.6}/README.md +0 -0
  21. {semantio-0.0.4/semantio → semantio-0.0.6/semantio/api}/__init__.py +0 -0
  22. {semantio-0.0.4 → semantio-0.0.6}/semantio/api/api_generator.py +0 -0
  23. {semantio-0.0.4 → semantio-0.0.6}/semantio/api/fastapi_app.py +0 -0
  24. {semantio-0.0.4/semantio/api → semantio-0.0.6/semantio/cli}/__init__.py +0 -0
  25. {semantio-0.0.4 → semantio-0.0.6}/semantio/cli/main.py +0 -0
  26. {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/__init__.py +0 -0
  27. {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/document_loader.py +0 -0
  28. {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/retriever.py +0 -0
  29. {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/vector_store.py +0 -0
  30. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/__init__.py +0 -0
  31. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/anthropic.py +0 -0
  32. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/base_llm.py +0 -0
  33. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/deepseek.py +0 -0
  34. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/gemini.py +0 -0
  35. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/groq.py +0 -0
  36. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/mistral.py +0 -0
  37. {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/openai.py +0 -0
  38. {semantio-0.0.4 → semantio-0.0.6}/semantio/rag.py +0 -0
  39. {semantio-0.0.4 → semantio-0.0.6}/semantio/storage/cloud_storage.py +0 -0
  40. {semantio-0.0.4/semantio/cli → semantio-0.0.6/semantio/tools}/__init__.py +0 -0
  41. {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/base_tool.py +0 -0
  42. {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/crypto.py +0 -0
  43. {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/duckduckgo.py +0 -0
  44. {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/stocks.py +0 -0
  45. {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/__init__.py +0 -0
  46. {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/config.py +0 -0
  47. {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/date_utils.py +0 -0
  48. {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/file_utils.py +0 -0
  49. {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/logger.py +0 -0
  50. {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/validation_utils.py +0 -0
  51. {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/dependency_links.txt +0 -0
  52. {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/entry_points.txt +0 -0
  53. {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/requires.txt +0 -0
  54. {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/top_level.txt +0 -0
  55. {semantio-0.0.4 → semantio-0.0.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2025 Syenah
3
+ Copyright (c) 2025 Syenah (Semantio)
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -0,0 +1,4 @@
1
+ import warnings
2
+
3
+ # Suppress all warnings globally
4
+ warnings.simplefilter("ignore")
@@ -16,6 +16,7 @@ from .tools.base_tool import BaseTool
16
16
  from pathlib import Path
17
17
  import importlib
18
18
  import os
19
+ from .memory import Memory
19
20
 
20
21
  # Configure logging
21
22
  logging.basicConfig(level=logging.INFO)
@@ -48,6 +49,13 @@ class Agent(BaseModel):
48
49
  semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
49
50
  team: Optional[List['Agent']] = Field(None, description="List of assistants in the team.")
50
51
  auto_tool: bool = Field(False, description="Whether to automatically detect and call tools.")
52
+ memory: Memory = Field(default_factory=Memory)
53
+ memory_config: Dict = Field(
54
+ default_factory=lambda: {
55
+ "max_context_length": 4000,
56
+ "summarization_threshold": 3000
57
+ }
58
+ )
51
59
 
52
60
  # Allow arbitrary types
53
61
  model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -56,6 +64,11 @@ class Agent(BaseModel):
56
64
  super().__init__(**kwargs)
57
65
  # Initialize the model and tools here if needed
58
66
  self._initialize_model()
67
+ # Initialize memory with config
68
+ self.memory = Memory(
69
+ max_context_length=self.memory_config.get("max_context_length", 4000),
70
+ summarization_threshold=self.memory_config.get("summarization_threshold", 3000)
71
+ )
59
72
  # Initialize tools as an empty list if not provided
60
73
  if self.tools is None:
61
74
  self.tools = []
@@ -218,20 +231,31 @@ class Agent(BaseModel):
218
231
  markdown: bool = False,
219
232
  team: Optional[List['Agent']] = None,
220
233
  **kwargs,
221
- ) -> Union[str, Dict]: # Add return type hint
234
+ ) -> Union[str, Dict]:
222
235
  """Print the agent's response to the console and return it."""
236
+
237
+ # Store user message if provided
238
+ if message and isinstance(message, str):
239
+ self.memory.add_message(role="user", content=message)
223
240
 
224
241
  if stream:
225
242
  # Handle streaming response
226
243
  response = ""
227
244
  for chunk in self._stream_response(message, markdown=markdown, **kwargs):
228
- print(chunk)
245
+ print(chunk, end="", flush=True)
229
246
  response += chunk
247
+ # Store agent response
248
+ if response:
249
+ self.memory.add_message(role="assistant", content=response)
250
+ print() # New line after streaming
230
251
  return response
231
252
  else:
232
253
  # Generate and return the response
233
254
  response = self._generate_response(message, markdown=markdown, team=team, **kwargs)
234
255
  print(response) # Print the response to the console
256
+ # Store agent response
257
+ if response:
258
+ self.memory.add_message(role="assistant", content=response)
235
259
  return response
236
260
 
237
261
 
@@ -294,12 +318,10 @@ class Agent(BaseModel):
294
318
  # Use the specified team if provided
295
319
  if team is not None:
296
320
  return self._generate_team_response(message, team, markdown=markdown, **kwargs)
297
-
298
321
  # Initialize tool_outputs as an empty dictionary
299
322
  tool_outputs = {}
300
323
  responses = []
301
324
  tool_calls = []
302
-
303
325
  # Use the LLM to analyze the query and dynamically select tools when auto_tool is enabled
304
326
  if self.auto_tool:
305
327
  tool_calls = self._analyze_query_and_select_tools(message)
@@ -347,13 +369,17 @@ class Agent(BaseModel):
347
369
  try:
348
370
  # Prepare the context for the LLM
349
371
  context = {
372
+ "conversation_history": self.memory.get_context(self.llm_instance),
350
373
  "tool_outputs": tool_outputs,
351
374
  "rag_context": self.rag.retrieve(message) if self.rag else None,
352
- "knowledge_base_context": self._find_all_relevant_keys(message, self._flatten_data(self.knowledge_base)) if self.knowledge_base else None,
375
+ "knowledge_base": self._get_knowledge_context(message) if self.knowledge_base else None,
353
376
  }
354
-
377
+ # 3. Build a memory-aware prompt.
378
+ prompt = self._build_memory_prompt(message, context)
379
+ # To (convert MemoryEntry objects to dicts and remove metadata):
380
+ memory_entries = [{"role": e.role, "content": e.content} for e in self.memory.storage.retrieve()]
355
381
  # Generate a response using the LLM
356
- llm_response = self.llm_instance.generate(prompt=message, context=context, **kwargs)
382
+ llm_response = self.llm_instance.generate(prompt=prompt, context=context, memory=memory_entries, **kwargs)
357
383
  responses.append(f"**Analysis:**\n\n{llm_response}")
358
384
  except Exception as e:
359
385
  logger.error(f"Failed to generate LLM response: {e}")
@@ -363,25 +389,30 @@ class Agent(BaseModel):
363
389
  # Retrieve relevant context using RAG
364
390
  rag_context = self.rag.retrieve(message) if self.rag else None
365
391
  # Retrieve relevant context from the knowledge base (API result)
366
- knowledge_base_context = None
367
- if self.knowledge_base:
368
- # Flatten the knowledge base
369
- flattened_data = self._flatten_data(self.knowledge_base)
370
- # Find all relevant key-value pairs in the knowledge base
371
- relevant_values = self._find_all_relevant_keys(message, flattened_data)
372
- if relevant_values:
373
- knowledge_base_context = ", ".join(relevant_values)
392
+ # knowledge_base_context = None
393
+ # if self.knowledge_base:
394
+ # # Flatten the knowledge base
395
+ # flattened_data = self._flatten_data(self.knowledge_base)
396
+ # # Find all relevant key-value pairs in the knowledge base
397
+ # relevant_values = self._find_all_relevant_keys(message, flattened_data)
398
+ # if relevant_values:
399
+ # knowledge_base_context = ", ".join(relevant_values)
374
400
 
375
401
  # Combine both contexts (RAG and knowledge base)
376
402
  context = {
403
+ "conversation_history": self.memory.get_context(self.llm_instance),
377
404
  "rag_context": rag_context,
378
- "knowledge_base_context": knowledge_base_context,
405
+ "knowledge_base": self._get_knowledge_context(message),
379
406
  }
380
407
  # Prepare the prompt with instructions, description, and context
381
- prompt = self._build_prompt(message, context)
408
+ # 3. Build a memory-aware prompt.
409
+ prompt = self._build_memory_prompt(message, context)
410
+ # To (convert MemoryEntry objects to dicts and remove metadata):
411
+ memory_entries = [{"role": e.role, "content": e.content} for e in self.memory.storage.retrieve()]
382
412
 
383
413
  # Generate the response using the LLM
384
- response = self.llm_instance.generate(prompt=prompt, context=context, **kwargs)
414
+ response = self.llm_instance.generate(prompt=prompt, context=context, memory=memory_entries, **kwargs)
415
+
385
416
 
386
417
  # Format the response based on the json_output flag
387
418
  if self.json_output:
@@ -394,9 +425,37 @@ class Agent(BaseModel):
394
425
  if markdown:
395
426
  return f"**Response:**\n\n{response}"
396
427
  return response
397
- # Combine all responses into a single string
398
428
  return "\n\n".join(responses)
399
429
 
430
+ # Modified prompt construction with memory integration
431
+ def _build_memory_prompt(self, user_input: str, context: dict) -> str:
432
+ """Enhanced prompt builder with memory context."""
433
+ prompt_parts = []
434
+
435
+ if self.description:
436
+ prompt_parts.append(f"# ROLE\n{self.description}")
437
+
438
+ if self.instructions:
439
+ prompt_parts.append(f"# INSTRUCTIONS\n" + "\n".join(f"- {i}" for i in self.instructions))
440
+
441
+ if context['conversation_history']:
442
+ prompt_parts.append(f"# CONVERSATION HISTORY\n{context['conversation_history']}")
443
+
444
+ if context['knowledge_base']:
445
+ prompt_parts.append(f"# KNOWLEDGE BASE\n{context['knowledge_base']}")
446
+
447
+ prompt_parts.append(f"# USER INPUT\n{user_input}")
448
+
449
+ return "\n\n".join(prompt_parts)
450
+
451
+ def _get_knowledge_context(self, message: str) -> str:
452
+ """Retrieve and format knowledge base context."""
453
+ if not self.knowledge_base:
454
+ return ""
455
+
456
+ flattened = self._flatten_data(self.knowledge_base)
457
+ relevant = self._find_all_relevant_keys(message, flattened)
458
+ return "\n".join(f"- {item}" for item in relevant) if relevant else ""
400
459
  def _generate_team_response(self, message: str, team: List['Agent'], markdown: bool = False, **kwargs) -> str:
401
460
  """Generate a response using a team of assistants."""
402
461
  responses = []
@@ -543,17 +602,21 @@ class Agent(BaseModel):
543
602
  """Run the agent in a CLI app."""
544
603
  from rich.prompt import Prompt
545
604
 
605
+ # Print initial message if provided
546
606
  if message:
547
607
  self.print_response(message=message, **kwargs)
548
608
 
549
609
  _exit_on = exit_on or ["exit", "quit", "bye"]
550
610
  while True:
551
- message = Prompt.ask(f"[bold] {self.emoji} {self.user_name} [/bold]")
552
- if message in _exit_on:
611
+ try:
612
+ message = Prompt.ask(f"[bold] {self.emoji} {self.user_name} [/bold]")
613
+ if message in _exit_on:
614
+ break
615
+ self.print_response(message=message, **kwargs)
616
+ except KeyboardInterrupt:
617
+ print("\n\nSession ended. Goodbye!")
553
618
  break
554
619
 
555
- self.print_response(message=message, **kwargs)
556
-
557
620
  def _generate_api(self):
558
621
  """Generate an API for the agent if api=True."""
559
622
  from .api.api_generator import APIGenerator
@@ -0,0 +1,54 @@
1
+ from .models import MemoryEntry
2
+ from .storage import BaseMemoryStorage, InMemoryStorage, FileStorage
3
+ from typing import List, Dict, Optional
4
+ from .llm.base_llm import BaseLLM
5
+ class Memory:
6
+ def __init__(
7
+ self,
8
+ storage: BaseMemoryStorage = InMemoryStorage(),
9
+ max_context_length: int = 4000,
10
+ summarization_threshold: int = 3000
11
+ ):
12
+ self.storage = storage
13
+ self.max_context_length = max_context_length
14
+ self.summarization_threshold = summarization_threshold
15
+ self._current_context = ""
16
+
17
+ def add_message(self, role: str, content: str, metadata: Optional[Dict] = None):
18
+ entry = MemoryEntry(
19
+ role=role,
20
+ content=content,
21
+ metadata=metadata or {}
22
+ )
23
+ self.storage.store(entry)
24
+ self._manage_context()
25
+
26
+ def get_context(self, llm: Optional[BaseLLM] = None) -> str:
27
+ if len(self._current_context) < self.summarization_threshold:
28
+ return self._current_context
29
+
30
+ # Automatic summarization when context grows too large
31
+ if llm:
32
+ return self.summarize(llm)
33
+ return self._current_context[:self.max_context_length]
34
+ def _manage_context(self):
35
+ # Include roles in the conversation history
36
+ full_history = "\n".join([f"{e.role}: {e.content}" for e in self.storage.retrieve()])
37
+ if len(full_history) > self.max_context_length:
38
+ self._current_context = full_history[-self.max_context_length:]
39
+ else:
40
+ self._current_context = full_history
41
+
42
+ def summarize(self, llm: BaseLLM) -> str:
43
+ # Include roles in the history for summarization
44
+ history = "\n".join([f"{e.role}: {e.content}" for e in self.storage.retrieve()])
45
+ prompt = f"""
46
+ Summarize this conversation history maintaining key details and references:
47
+ {history[-self.summarization_threshold:]}
48
+ """
49
+ self._current_context = llm.generate(prompt)
50
+ return self._current_context
51
+
52
+ def clear(self):
53
+ self.storage = InMemoryStorage()
54
+ self._current_context = ""
@@ -0,0 +1,9 @@
1
+ from pydantic import BaseModel, Field
2
+ from datetime import datetime
3
+ from typing import Dict
4
+
5
+ class MemoryEntry(BaseModel):
6
+ role: str # "user" or "assistant"
7
+ content: str
8
+ timestamp: datetime = Field(default_factory=datetime.now)
9
+ metadata: Dict = Field(default_factory=dict)
@@ -0,0 +1,5 @@
1
+ from .base_storage import BaseMemoryStorage
2
+ from .in_memory_storage import InMemoryStorage
3
+ from .local_storage import FileStorage
4
+
5
+ __all__ = ['BaseMemoryStorage', 'InMemoryStorage', 'FileStorage']
@@ -0,0 +1,12 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List, Optional
3
+ from ..models import MemoryEntry
4
+
5
+ class BaseMemoryStorage(ABC):
6
+ @abstractmethod
7
+ def store(self, entry: MemoryEntry):
8
+ pass
9
+
10
+ @abstractmethod
11
+ def retrieve(self, query: Optional[str] = None, limit: int = 20) -> List[MemoryEntry]:
12
+ pass
@@ -0,0 +1,14 @@
1
+ # hashai/storage/in_memory_storage.py
2
+ from typing import List, Optional
3
+ from ..models import MemoryEntry
4
+ from .base_storage import BaseMemoryStorage
5
+
6
+ class InMemoryStorage(BaseMemoryStorage):
7
+ def __init__(self):
8
+ self.history: List[MemoryEntry] = []
9
+
10
+ def store(self, entry: MemoryEntry):
11
+ self.history.append(entry)
12
+
13
+ def retrieve(self, query: Optional[str] = None, limit: int = 10) -> List[MemoryEntry]:
14
+ return self.history[-limit:]
@@ -0,0 +1,29 @@
1
+ import json
2
+ from typing import List, Optional
3
+ from ..models import MemoryEntry
4
+ from .base_storage import BaseMemoryStorage
5
+
6
+ class FileStorage(BaseMemoryStorage):
7
+ def __init__(self, file_path: str = "memory.json"):
8
+ self.file_path = file_path
9
+ self.history = self._load_from_file()
10
+
11
+ def _load_from_file(self) -> List[MemoryEntry]:
12
+ try:
13
+ with open(self.file_path, "r") as f:
14
+ data = json.load(f)
15
+ return [MemoryEntry(**entry) for entry in data]
16
+ except (FileNotFoundError, json.JSONDecodeError):
17
+ return []
18
+
19
+ def _save_to_file(self):
20
+ with open(self.file_path, "w") as f:
21
+ data = [entry.dict() for entry in self.history]
22
+ json.dump(data, f, default=str)
23
+
24
+ def store(self, entry: MemoryEntry):
25
+ self.history.append(entry)
26
+ self._save_to_file()
27
+
28
+ def retrieve(self, query: Optional[str] = None, limit: int = 20) -> List[MemoryEntry]:
29
+ return self.history[-limit:]
@@ -0,0 +1,439 @@
1
+ # web_browser.py
2
+ from typing import Dict, Any, List, Optional, Callable
3
+ from pydantic import Field, BaseModel
4
+ from selenium import webdriver
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.common.action_chains import ActionChains
7
+ from selenium.webdriver.remote.webelement import WebElement
8
+ from selenium.webdriver.support.ui import WebDriverWait
9
+ from selenium.webdriver.support import expected_conditions as EC
10
+ from selenium.webdriver.chrome.options import Options
11
+ from selenium.webdriver.chrome.service import Service
12
+ from webdriver_manager.chrome import ChromeDriverManager
13
+ from bs4 import BeautifulSoup
14
+ import json
15
+ import time
16
+ import re
17
+ import logging
18
+ import os
19
+ import difflib
20
+ from .base_tool import BaseTool
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class BrowserPlan(BaseModel):
25
+ tasks: List[Dict[str, Any]] = Field(
26
+ ...,
27
+ description="List of automation tasks to execute"
28
+ )
29
+
30
+ class WebBrowserTool(BaseTool):
31
+ name: str = Field("WebBrowser", description="Name of the tool")
32
+ description: str = Field(
33
+ "Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
34
+ description="Tool description"
35
+ )
36
+
37
+ default_timeout: int = 15 # Default wait timeout in seconds
38
+ max_retries: int = 3 # Increased maximum retries for any task
39
+
40
+ def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
41
+ """Execute an advanced dynamic web automation workflow."""
42
+ driver = None
43
+ overall_start = time.time()
44
+ try:
45
+ headless = input.get("headless", False)
46
+ self.default_timeout = int(input.get("timeout", self.default_timeout))
47
+ self.max_retries = int(input.get("max_retries", self.max_retries))
48
+ driver = self._init_browser(headless)
49
+ results = []
50
+ current_url = ""
51
+
52
+ plan = self._generate_plan(input.get('query', ''), current_url)
53
+ if not plan.tasks:
54
+ raise ValueError("No valid tasks in the generated plan.")
55
+
56
+ # Dynamic mapping: action name to handler function.
57
+ action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
58
+ "navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
59
+ "click": lambda d, task: self._handle_click(d, task.get("selector", "")),
60
+ "type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
61
+ "wait": lambda d, task: self._handle_wait(task.get("value", "")),
62
+ "wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
63
+ "scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
64
+ "hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
65
+ "screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
66
+ "switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
67
+ "execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
68
+ "drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
69
+ }
70
+
71
+ for task in plan.tasks:
72
+ # Before each action, dismiss modals/overlays.
73
+ self._dismiss_unwanted_modals(driver)
74
+ action = task.get("action", "").lower()
75
+ logger.info(f"Executing task: {task.get('description', action)}")
76
+ start_time = time.time()
77
+ handler = action_map.get(action)
78
+ if not handler:
79
+ results.append({
80
+ "action": action,
81
+ "success": False,
82
+ "message": f"Unsupported action: {action}"
83
+ })
84
+ continue
85
+
86
+ result = self._execute_with_retries(driver, task, handler)
87
+ elapsed = time.time() - start_time
88
+ result["elapsed"] = elapsed
89
+ logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
90
+ results.append(result)
91
+
92
+ if not result.get('success', False):
93
+ logger.error(f"Task failed: {result.get('message')}")
94
+ self._capture_failure_screenshot(driver, action)
95
+ break
96
+
97
+ current_url = driver.current_url
98
+
99
+ overall_elapsed = time.time() - overall_start
100
+ logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
+ return {"status": "success", "results": results, "total_time": overall_elapsed}
102
+
103
+ except Exception as e:
104
+ logger.exception("Execution error:")
105
+ return {"status": "error", "message": str(e)}
106
+ finally:
107
+ if driver:
108
+ driver.quit()
109
+
110
+ def _init_browser(self, headless: bool) -> webdriver.Chrome:
111
+ """Initialize browser with advanced options."""
112
+ options = Options()
113
+ options.add_argument("--start-maximized")
114
+ options.add_argument("--disable-blink-features=AutomationControlled")
115
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
116
+ if headless:
117
+ options.add_argument("--headless=new")
118
+ return webdriver.Chrome(
119
+ service=Service(ChromeDriverManager().install()),
120
+ options=options
121
+ )
122
+
123
+ def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
124
+ """Generate an adaptive execution plan using an LLM or other dynamic planner."""
125
+ prompt = f"""Generate browser automation plan for: {query}
126
+
127
+ Current URL: {current_url or 'No page loaded yet'}
128
+
129
+ Required JSON format:
130
+ {{
131
+ "tasks": [
132
+ {{
133
+ "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
134
+ "selector": "CSS selector (optional)",
135
+ "value": "input text/URL/seconds/filename/target-selector",
136
+ "description": "action purpose"
137
+ }}
138
+ ]
139
+ }}
140
+
141
+ Guidelines:
142
+ 1. Prefer IDs in selectors (#element-id) and semantic attributes.
143
+ 2. Include wait steps after navigation and wait for AJAX where applicable.
144
+ 3. Dismiss any modals/pop-ups that are not part of the task.
145
+ 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
146
+ 5. For execute_script, 'value' should contain valid JavaScript.
147
+ 6. For switch_tab, 'value' should be an index or keyword 'new'.
148
+ """
149
+ response = self.llm.generate(prompt=prompt)
150
+ return self._parse_plan(response)
151
+
152
+ def _parse_plan(self, response: str) -> BrowserPlan:
153
+ """Robust JSON parsing with multiple fallback strategies."""
154
+ try:
155
+ json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
156
+ if json_match:
157
+ plan_data = json.loads(json_match.group(1).strip())
158
+ else:
159
+ json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
160
+ if not json_str_match:
161
+ raise ValueError("No JSON object found in the response.")
162
+ plan_data = json.loads(json_str_match.group())
163
+ validated_tasks = []
164
+ for task in plan_data.get("tasks", []):
165
+ if not all(key in task for key in ["action", "description"]):
166
+ logger.warning(f"Skipping task due to missing keys: {task}")
167
+ continue
168
+ validated_tasks.append({
169
+ "action": task["action"],
170
+ "selector": task.get("selector", ""),
171
+ "value": task.get("value", ""),
172
+ "description": task["description"]
173
+ })
174
+ return BrowserPlan(tasks=validated_tasks)
175
+ except (json.JSONDecodeError, AttributeError, ValueError) as e:
176
+ logger.error(f"Plan parsing failed: {e}")
177
+ return BrowserPlan(tasks=[])
178
+
179
+ def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
180
+ handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
181
+ """Execute a task with retry logic and exponential backoff."""
182
+ attempts = 0
183
+ result = {}
184
+ while attempts < self.max_retries:
185
+ result = self._execute_safe_task(driver, task, handler)
186
+ if result.get("success", False):
187
+ return result
188
+ attempts += 1
189
+ logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
190
+ time.sleep(1 * attempts)
191
+ return result
192
+
193
+ def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
194
+ handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
195
+ """Execute a task with comprehensive error handling."""
196
+ try:
197
+ return handler(driver, task)
198
+ except Exception as e:
199
+ action = task.get("action", "unknown")
200
+ logger.exception(f"Error executing task '{action}':")
201
+ return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
202
+
203
+ def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
204
+ """
205
+ Dismiss or remove unwanted modals, overlays, or pop-ups.
206
+ First attempts to click a close button; if not available, removes the element via JS.
207
+ """
208
+ try:
209
+ modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
210
+ for selector in modal_selectors:
211
+ elements = driver.find_elements(By.CSS_SELECTOR, selector)
212
+ for modal in elements:
213
+ if modal.is_displayed():
214
+ close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
215
+ dismissed = False
216
+ for close_sel in close_selectors:
217
+ try:
218
+ close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
219
+ if close_button.is_displayed():
220
+ close_button.click()
221
+ dismissed = True
222
+ logger.info(f"Dismissed modal using selector {close_sel}")
223
+ time.sleep(1)
224
+ break
225
+ except Exception:
226
+ continue
227
+ if not dismissed:
228
+ # Remove overlay by setting display to none
229
+ driver.execute_script("arguments[0].remove();", modal)
230
+ logger.info(f"Removed overlay/modal with selector {selector}")
231
+ except Exception as e:
232
+ logger.debug(f"Modal dismissal error: {e}")
233
+
234
+ def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
235
+ """
236
+ Advanced fallback for finding an element.
237
+ Searches across multiple attributes and inner text using fuzzy matching.
238
+ """
239
+ candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
240
+ best_match = None
241
+ best_ratio = 0.0
242
+ for candidate in candidates:
243
+ combined_text = " ".join([
244
+ candidate.get_attribute("id") or "",
245
+ candidate.get_attribute("name") or "",
246
+ candidate.get_attribute("placeholder") or "",
247
+ candidate.get_attribute("aria-label") or "",
248
+ candidate.text or "",
249
+ ])
250
+ ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
251
+ if ratio > best_ratio:
252
+ best_ratio = ratio
253
+ best_match = candidate
254
+ if best_ratio > 0.5:
255
+ logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
256
+ return best_match
257
+ return None
258
+
259
+ def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
260
+ """Handle navigation with URL correction."""
261
+ if not url.startswith(("http://", "https://")):
262
+ url = f"https://{url}"
263
+ try:
264
+ driver.get(url)
265
+ WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
266
+ return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
267
+ except Exception as e:
268
+ logger.error(f"Navigation to {url} failed: {e}")
269
+ return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
270
+
271
+ def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
272
+ """Handle click actions with fallback using JS if needed."""
273
+ try:
274
+ element = WebDriverWait(driver, self.default_timeout).until(
275
+ EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
276
+ )
277
+ driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
278
+ try:
279
+ element.click()
280
+ except Exception:
281
+ driver.execute_script("arguments[0].click();", element)
282
+ return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
283
+ except Exception as e:
284
+ logger.error(f"Click action failed on selector {selector}: {e}")
285
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
286
+
287
+ def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
288
+ """
289
+ Handle typing into an element.
290
+ If the primary selector fails, attempt advanced fallback detection.
291
+ """
292
+ try:
293
+ element = WebDriverWait(driver, self.default_timeout).until(
294
+ EC.presence_of_element_located((By.CSS_SELECTOR, selector))
295
+ )
296
+ except Exception as e:
297
+ # If the task seems to involve search or similar text, use advanced fallback.
298
+ if "search" in task.get("description", "").lower() or "search" in selector.lower():
299
+ logger.info("Primary selector failed; using advanced fallback for element detection.")
300
+ element = self._advanced_find_element(driver, "search")
301
+ if not element:
302
+ return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
303
+ else:
304
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
305
+ try:
306
+ element.clear()
307
+ element.send_keys(text)
308
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
309
+ except Exception as e:
310
+ logger.error(f"Typing action failed: {e}")
311
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
312
+
313
+ def _handle_wait(self, seconds: str) -> Dict[str, Any]:
314
+ """Handle a simple wait."""
315
+ try:
316
+ wait_time = float(seconds)
317
+ logger.info(f"Waiting for {wait_time} seconds")
318
+ time.sleep(wait_time)
319
+ return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
320
+ except ValueError as e:
321
+ logger.error(f"Invalid wait time provided: {seconds}")
322
+ return {"action": "wait", "success": False, "message": "Invalid wait time"}
323
+
324
+ def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
325
+ """
326
+ Wait until AJAX/network activity has subsided.
327
+ This implementation first checks for jQuery, then falls back to a generic check.
328
+ """
329
+ try:
330
+ timeout = int(seconds)
331
+ logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
332
+ end_time = time.time() + timeout
333
+ while time.time() < end_time:
334
+ ajax_complete = driver.execute_script("""
335
+ return (window.jQuery ? jQuery.active === 0 : true) &&
336
+ (typeof window.fetch === 'function' ? true : true);
337
+ """)
338
+ if ajax_complete:
339
+ break
340
+ time.sleep(0.5)
341
+ return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
342
+ except Exception as e:
343
+ logger.error(f"Wait for AJAX failed: {e}")
344
+ return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
345
+
346
+ def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
347
+ """Handle scrolling to a specific element or page bottom."""
348
+ try:
349
+ if selector:
350
+ element = WebDriverWait(driver, self.default_timeout).until(
351
+ EC.presence_of_element_located((By.CSS_SELECTOR, selector))
352
+ )
353
+ driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
354
+ scroll_target = selector
355
+ else:
356
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
357
+ scroll_target = "page bottom"
358
+ return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
359
+ except Exception as e:
360
+ logger.error(f"Scroll action failed on selector {selector}: {e}")
361
+ return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
362
+
363
+ def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
364
+ """Handle mouse hover action."""
365
+ try:
366
+ element = WebDriverWait(driver, self.default_timeout).until(
367
+ EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
368
+ )
369
+ ActionChains(driver).move_to_element(element).perform()
370
+ return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
371
+ except Exception as e:
372
+ logger.error(f"Hover action failed on selector {selector}: {e}")
373
+ return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
374
+
375
+ def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
376
+ """Capture a screenshot of the current browser state."""
377
+ try:
378
+ driver.save_screenshot(filename)
379
+ return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
380
+ except Exception as e:
381
+ logger.error(f"Screenshot capture failed: {e}")
382
+ return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
383
+
384
+ def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
385
+ """
386
+ Switch between tabs. 'value' can be an index or the keyword 'new'.
387
+ """
388
+ try:
389
+ handles = driver.window_handles
390
+ if value.lower() == "new":
391
+ target_handle = handles[-1]
392
+ else:
393
+ idx = int(value)
394
+ if idx < len(handles):
395
+ target_handle = handles[idx]
396
+ else:
397
+ return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
398
+ driver.switch_to.window(target_handle)
399
+ return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
400
+ except Exception as e:
401
+ logger.error(f"Switch tab failed: {e}")
402
+ return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
403
+
404
+ def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
405
+ """
406
+ Execute arbitrary JavaScript code.
407
+ """
408
+ try:
409
+ result = driver.execute_script(script)
410
+ return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
411
+ except Exception as e:
412
+ logger.error(f"Execute script failed: {e}")
413
+ return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
414
+
415
+ def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
416
+ """
417
+ Simulate a drag-and-drop operation.
418
+ """
419
+ try:
420
+ source = WebDriverWait(driver, self.default_timeout).until(
421
+ EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
422
+ )
423
+ target = WebDriverWait(driver, self.default_timeout).until(
424
+ EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
425
+ )
426
+ ActionChains(driver).drag_and_drop(source, target).perform()
427
+ return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
428
+ except Exception as e:
429
+ logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
430
+ return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
431
+
432
+ def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
433
+ """Capture a screenshot for debugging when an error occurs."""
434
+ filename = f"failure_{action}_{int(time.time())}.png"
435
+ try:
436
+ driver.save_screenshot(filename)
437
+ logger.info(f"Failure screenshot captured: {filename}")
438
+ except Exception as e:
439
+ logger.error(f"Failed to capture screenshot: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -4,6 +4,7 @@ setup.py
4
4
  semantio/__init__.py
5
5
  semantio/agent.py
6
6
  semantio/memory.py
7
+ semantio/models.py
7
8
  semantio/rag.py
8
9
  semantio.egg-info/PKG-INFO
9
10
  semantio.egg-info/SOURCES.txt
@@ -29,7 +30,9 @@ semantio/llm/groq.py
29
30
  semantio/llm/mistral.py
30
31
  semantio/llm/openai.py
31
32
  semantio/storage/__init__.py
33
+ semantio/storage/base_storage.py
32
34
  semantio/storage/cloud_storage.py
35
+ semantio/storage/in_memory_storage.py
33
36
  semantio/storage/local_storage.py
34
37
  semantio/tools/__init__.py
35
38
  semantio/tools/base_tool.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="semantio",
5
- version="0.0.4",
5
+ version="0.0.6",
6
6
  description="A powerful SDK for building AI agents",
7
7
  long_description=open("README.md").read(),
8
8
  long_description_content_type="text/markdown",
@@ -1,11 +0,0 @@
1
- from typing import List, Dict
2
-
3
- class Memory:
4
- def __init__(self):
5
- self.history = []
6
-
7
- def add_message(self, role: str, content: str):
8
- self.history.append({"role": role, "content": content})
9
-
10
- def get_history(self) -> List[Dict]:
11
- return self.history
File without changes
File without changes
File without changes
@@ -1,271 +0,0 @@
1
- # web_browser.py
2
- from typing import Dict, Any, List, Optional
3
- from pydantic import Field, BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.support.ui import WebDriverWait
7
- from selenium.webdriver.support import expected_conditions as EC
8
- from selenium.webdriver.chrome.options import Options
9
- from selenium.webdriver.chrome.service import Service
10
- from webdriver_manager.chrome import ChromeDriverManager
11
- from bs4 import BeautifulSoup
12
- import json
13
- import time
14
- import re
15
- import logging
16
- from .base_tool import BaseTool
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- class BrowserPlan(BaseModel):
21
- tasks: List[Dict[str, Any]] = Field(
22
- ...,
23
- description="List of automation tasks to execute"
24
- )
25
-
26
- class WebBrowserTool(BaseTool):
27
- name: str = Field("WebBrowser", description="Name of the tool")
28
- description: str = Field(
29
- "Universal web automation tool for dynamic website interactions",
30
- description="Tool description"
31
- )
32
-
33
- def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
34
- """Execute dynamic web automation workflow"""
35
- driver = None
36
- try:
37
- driver = self._init_browser(input.get("headless", False))
38
- results = []
39
- current_url = ""
40
-
41
- # Generate initial plan
42
- plan = self._generate_plan(input['query'], current_url)
43
-
44
- for task in plan.tasks:
45
- result = self._execute_safe_task(driver, task)
46
- results.append(result)
47
-
48
- if not result['success']:
49
- break
50
-
51
- # Update context for next tasks
52
- current_url = driver.current_url
53
-
54
- return {"status": "success", "results": results}
55
-
56
- except Exception as e:
57
- return {"status": "error", "message": str(e)}
58
- finally:
59
- if driver:
60
- driver.quit()
61
-
62
- def _init_browser(self, headless: bool) -> webdriver.Chrome:
63
- """Initialize browser with advanced options"""
64
- options = Options()
65
- options.add_argument("--start-maximized")
66
- options.add_argument("--disable-blink-features=AutomationControlled")
67
- options.add_experimental_option("excludeSwitches", ["enable-automation"])
68
-
69
- if headless:
70
- options.add_argument("--headless=new")
71
-
72
- return webdriver.Chrome(
73
- service=Service(ChromeDriverManager().install()),
74
- options=options
75
- )
76
-
77
- def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
78
- """Generate adaptive execution plan using LLM"""
79
- prompt = f"""Generate browser automation plan for: {query}
80
-
81
- Current URL: {current_url or 'No page loaded yet'}
82
-
83
- Required JSON format:
84
- {{
85
- "tasks": [
86
- {{
87
- "action": "navigate|click|type|wait|scroll",
88
- "selector": "CSS selector (optional)",
89
- "value": "input text/URL/seconds",
90
- "description": "action purpose"
91
- }}
92
- ]
93
- }}
94
-
95
- Guidelines:
96
- 1. Prefer IDs in selectors (#element-id)
97
- 2. Use semantic attributes (aria-label, name)
98
- 3. Include wait steps after navigation
99
- 4. Prioritize visible elements
100
- 5. Add scroll steps for hidden elements
101
- """
102
-
103
- response = self.llm.generate(prompt=prompt)
104
- return self._parse_plan(response)
105
-
106
- def _parse_plan(self, response: str) -> BrowserPlan:
107
- """Robust JSON parsing with multiple fallback strategies"""
108
- try:
109
- # Try extracting JSON from markdown code block
110
- json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
111
- if json_match:
112
- plan_data = json.loads(json_match.group(1).strip())
113
- else:
114
- # Fallback to extract first JSON object
115
- json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
116
- plan_data = json.loads(json_str)
117
-
118
- # Validate tasks structure
119
- validated_tasks = []
120
- for task in plan_data.get("tasks", []):
121
- if not all(key in task for key in ["action", "description"]):
122
- continue
123
- validated_tasks.append({
124
- "action": task["action"],
125
- "selector": task.get("selector", ""),
126
- "value": task.get("value", ""),
127
- "description": task["description"]
128
- })
129
-
130
- return BrowserPlan(tasks=validated_tasks)
131
-
132
- except (json.JSONDecodeError, AttributeError) as e:
133
- logger.error(f"Plan parsing failed: {e}")
134
- return BrowserPlan(tasks=[])
135
-
136
- def _execute_safe_task(self, driver, task: Dict) -> Dict[str, Any]:
137
- """Execute task with comprehensive error handling"""
138
- try:
139
- action = task["action"].lower()
140
- selector = task.get("selector", "")
141
- value = task.get("value", "")
142
-
143
- if action == "navigate":
144
- return self._handle_navigation(driver, value)
145
-
146
- elif action == "click":
147
- return self._handle_click(driver, selector)
148
-
149
- elif action == "type":
150
- return self._handle_typing(driver, selector, value)
151
-
152
- elif action == "wait":
153
- return self._handle_wait(value)
154
-
155
- elif action == "scroll":
156
- return self._handle_scroll(driver, selector)
157
-
158
- return {
159
- "action": action,
160
- "success": False,
161
- "message": f"Unsupported action: {action}"
162
- }
163
-
164
- except Exception as e:
165
- return {
166
- "action": action,
167
- "success": False,
168
- "message": f"Critical error: {str(e)}"
169
- }
170
-
171
- def _handle_navigation(self, driver, url: str) -> Dict[str, Any]:
172
- """Smart navigation handler"""
173
- if not url.startswith(("http://", "https://")):
174
- url = f"https://{url}"
175
-
176
- try:
177
- driver.get(url)
178
- WebDriverWait(driver, 15).until(
179
- EC.presence_of_element_located((By.TAG_NAME, "body"))
180
- )
181
- return {
182
- "action": "navigate",
183
- "success": True,
184
- "message": f"Navigated to {url}"
185
- }
186
- except Exception as e:
187
- return {
188
- "action": "navigate",
189
- "success": False,
190
- "message": f"Navigation failed: {str(e)}"
191
- }
192
-
193
- def _handle_click(self, driver, selector: str) -> Dict[str, Any]:
194
- """Dynamic click handler"""
195
- try:
196
- element = WebDriverWait(driver, 15).until(
197
- EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
198
- )
199
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
200
- element.click()
201
- return {
202
- "action": "click",
203
- "success": True,
204
- "message": f"Clicked element: {selector}"
205
- }
206
- except Exception as e:
207
- return {
208
- "action": "click",
209
- "success": False,
210
- "message": f"Click failed: {str(e)}"
211
- }
212
-
213
- def _handle_typing(self, driver, selector: str, text: str) -> Dict[str, Any]:
214
- """Universal typing handler"""
215
- try:
216
- element = WebDriverWait(driver, 15).until(
217
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
218
- )
219
- element.clear()
220
- element.send_keys(text)
221
- return {
222
- "action": "type",
223
- "success": True,
224
- "message": f"Typed '{text}' into {selector}"
225
- }
226
- except Exception as e:
227
- return {
228
- "action": "type",
229
- "success": False,
230
- "message": f"Typing failed: {str(e)}"
231
- }
232
-
233
- def _handle_wait(self, seconds: str) -> Dict[str, Any]:
234
- """Configurable wait handler"""
235
- try:
236
- wait_time = float(seconds)
237
- time.sleep(wait_time)
238
- return {
239
- "action": "wait",
240
- "success": True,
241
- "message": f"Waited {wait_time} seconds"
242
- }
243
- except ValueError:
244
- return {
245
- "action": "wait",
246
- "success": False,
247
- "message": "Invalid wait time"
248
- }
249
-
250
- def _handle_scroll(self, driver, selector: str) -> Dict[str, Any]:
251
- """Smart scroll handler"""
252
- try:
253
- if selector:
254
- element = WebDriverWait(driver, 15).until(
255
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
256
- )
257
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
258
- else:
259
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
260
-
261
- return {
262
- "action": "scroll",
263
- "success": True,
264
- "message": f"Scrolled to {selector or 'page bottom'}"
265
- }
266
- except Exception as e:
267
- return {
268
- "action": "scroll",
269
- "success": False,
270
- "message": f"Scroll failed: {str(e)}"
271
- }
File without changes
File without changes
File without changes
File without changes
File without changes