semantio 0.0.4__tar.gz → 0.0.6__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {semantio-0.0.4 → semantio-0.0.6}/LICENSE +1 -1
- {semantio-0.0.4 → semantio-0.0.6}/PKG-INFO +1 -1
- semantio-0.0.6/semantio/__init__.py +4 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/agent.py +86 -23
- semantio-0.0.6/semantio/memory.py +54 -0
- semantio-0.0.6/semantio/models.py +9 -0
- semantio-0.0.6/semantio/storage/__init__.py +5 -0
- semantio-0.0.6/semantio/storage/base_storage.py +12 -0
- semantio-0.0.6/semantio/storage/in_memory_storage.py +14 -0
- semantio-0.0.6/semantio/storage/local_storage.py +29 -0
- semantio-0.0.6/semantio/tools/web_browser.py +439 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/PKG-INFO +1 -1
- {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/SOURCES.txt +3 -0
- {semantio-0.0.4 → semantio-0.0.6}/setup.py +1 -1
- semantio-0.0.4/semantio/memory.py +0 -11
- semantio-0.0.4/semantio/storage/__init__.py +0 -0
- semantio-0.0.4/semantio/storage/local_storage.py +0 -0
- semantio-0.0.4/semantio/tools/__init__.py +0 -0
- semantio-0.0.4/semantio/tools/web_browser.py +0 -271
- {semantio-0.0.4 → semantio-0.0.6}/README.md +0 -0
- {semantio-0.0.4/semantio → semantio-0.0.6/semantio/api}/__init__.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/api/api_generator.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/api/fastapi_app.py +0 -0
- {semantio-0.0.4/semantio/api → semantio-0.0.6/semantio/cli}/__init__.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/cli/main.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/__init__.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/document_loader.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/retriever.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/knowledge_base/vector_store.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/__init__.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/anthropic.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/base_llm.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/deepseek.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/gemini.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/groq.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/mistral.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/llm/openai.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/rag.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/storage/cloud_storage.py +0 -0
- {semantio-0.0.4/semantio/cli → semantio-0.0.6/semantio/tools}/__init__.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/base_tool.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/crypto.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/duckduckgo.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/tools/stocks.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/__init__.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/config.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/date_utils.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/file_utils.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/logger.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio/utils/validation_utils.py +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/dependency_links.txt +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/entry_points.txt +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/requires.txt +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/semantio.egg-info/top_level.txt +0 -0
- {semantio-0.0.4 → semantio-0.0.6}/setup.cfg +0 -0
@@ -16,6 +16,7 @@ from .tools.base_tool import BaseTool
|
|
16
16
|
from pathlib import Path
|
17
17
|
import importlib
|
18
18
|
import os
|
19
|
+
from .memory import Memory
|
19
20
|
|
20
21
|
# Configure logging
|
21
22
|
logging.basicConfig(level=logging.INFO)
|
@@ -48,6 +49,13 @@ class Agent(BaseModel):
|
|
48
49
|
semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
|
49
50
|
team: Optional[List['Agent']] = Field(None, description="List of assistants in the team.")
|
50
51
|
auto_tool: bool = Field(False, description="Whether to automatically detect and call tools.")
|
52
|
+
memory: Memory = Field(default_factory=Memory)
|
53
|
+
memory_config: Dict = Field(
|
54
|
+
default_factory=lambda: {
|
55
|
+
"max_context_length": 4000,
|
56
|
+
"summarization_threshold": 3000
|
57
|
+
}
|
58
|
+
)
|
51
59
|
|
52
60
|
# Allow arbitrary types
|
53
61
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
@@ -56,6 +64,11 @@ class Agent(BaseModel):
|
|
56
64
|
super().__init__(**kwargs)
|
57
65
|
# Initialize the model and tools here if needed
|
58
66
|
self._initialize_model()
|
67
|
+
# Initialize memory with config
|
68
|
+
self.memory = Memory(
|
69
|
+
max_context_length=self.memory_config.get("max_context_length", 4000),
|
70
|
+
summarization_threshold=self.memory_config.get("summarization_threshold", 3000)
|
71
|
+
)
|
59
72
|
# Initialize tools as an empty list if not provided
|
60
73
|
if self.tools is None:
|
61
74
|
self.tools = []
|
@@ -218,20 +231,31 @@ class Agent(BaseModel):
|
|
218
231
|
markdown: bool = False,
|
219
232
|
team: Optional[List['Agent']] = None,
|
220
233
|
**kwargs,
|
221
|
-
) -> Union[str, Dict]:
|
234
|
+
) -> Union[str, Dict]:
|
222
235
|
"""Print the agent's response to the console and return it."""
|
236
|
+
|
237
|
+
# Store user message if provided
|
238
|
+
if message and isinstance(message, str):
|
239
|
+
self.memory.add_message(role="user", content=message)
|
223
240
|
|
224
241
|
if stream:
|
225
242
|
# Handle streaming response
|
226
243
|
response = ""
|
227
244
|
for chunk in self._stream_response(message, markdown=markdown, **kwargs):
|
228
|
-
print(chunk)
|
245
|
+
print(chunk, end="", flush=True)
|
229
246
|
response += chunk
|
247
|
+
# Store agent response
|
248
|
+
if response:
|
249
|
+
self.memory.add_message(role="assistant", content=response)
|
250
|
+
print() # New line after streaming
|
230
251
|
return response
|
231
252
|
else:
|
232
253
|
# Generate and return the response
|
233
254
|
response = self._generate_response(message, markdown=markdown, team=team, **kwargs)
|
234
255
|
print(response) # Print the response to the console
|
256
|
+
# Store agent response
|
257
|
+
if response:
|
258
|
+
self.memory.add_message(role="assistant", content=response)
|
235
259
|
return response
|
236
260
|
|
237
261
|
|
@@ -294,12 +318,10 @@ class Agent(BaseModel):
|
|
294
318
|
# Use the specified team if provided
|
295
319
|
if team is not None:
|
296
320
|
return self._generate_team_response(message, team, markdown=markdown, **kwargs)
|
297
|
-
|
298
321
|
# Initialize tool_outputs as an empty dictionary
|
299
322
|
tool_outputs = {}
|
300
323
|
responses = []
|
301
324
|
tool_calls = []
|
302
|
-
|
303
325
|
# Use the LLM to analyze the query and dynamically select tools when auto_tool is enabled
|
304
326
|
if self.auto_tool:
|
305
327
|
tool_calls = self._analyze_query_and_select_tools(message)
|
@@ -347,13 +369,17 @@ class Agent(BaseModel):
|
|
347
369
|
try:
|
348
370
|
# Prepare the context for the LLM
|
349
371
|
context = {
|
372
|
+
"conversation_history": self.memory.get_context(self.llm_instance),
|
350
373
|
"tool_outputs": tool_outputs,
|
351
374
|
"rag_context": self.rag.retrieve(message) if self.rag else None,
|
352
|
-
"
|
375
|
+
"knowledge_base": self._get_knowledge_context(message) if self.knowledge_base else None,
|
353
376
|
}
|
354
|
-
|
377
|
+
# 3. Build a memory-aware prompt.
|
378
|
+
prompt = self._build_memory_prompt(message, context)
|
379
|
+
# To (convert MemoryEntry objects to dicts and remove metadata):
|
380
|
+
memory_entries = [{"role": e.role, "content": e.content} for e in self.memory.storage.retrieve()]
|
355
381
|
# Generate a response using the LLM
|
356
|
-
llm_response = self.llm_instance.generate(prompt=
|
382
|
+
llm_response = self.llm_instance.generate(prompt=prompt, context=context, memory=memory_entries, **kwargs)
|
357
383
|
responses.append(f"**Analysis:**\n\n{llm_response}")
|
358
384
|
except Exception as e:
|
359
385
|
logger.error(f"Failed to generate LLM response: {e}")
|
@@ -363,25 +389,30 @@ class Agent(BaseModel):
|
|
363
389
|
# Retrieve relevant context using RAG
|
364
390
|
rag_context = self.rag.retrieve(message) if self.rag else None
|
365
391
|
# Retrieve relevant context from the knowledge base (API result)
|
366
|
-
knowledge_base_context = None
|
367
|
-
if self.knowledge_base:
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
392
|
+
# knowledge_base_context = None
|
393
|
+
# if self.knowledge_base:
|
394
|
+
# # Flatten the knowledge base
|
395
|
+
# flattened_data = self._flatten_data(self.knowledge_base)
|
396
|
+
# # Find all relevant key-value pairs in the knowledge base
|
397
|
+
# relevant_values = self._find_all_relevant_keys(message, flattened_data)
|
398
|
+
# if relevant_values:
|
399
|
+
# knowledge_base_context = ", ".join(relevant_values)
|
374
400
|
|
375
401
|
# Combine both contexts (RAG and knowledge base)
|
376
402
|
context = {
|
403
|
+
"conversation_history": self.memory.get_context(self.llm_instance),
|
377
404
|
"rag_context": rag_context,
|
378
|
-
"
|
405
|
+
"knowledge_base": self._get_knowledge_context(message),
|
379
406
|
}
|
380
407
|
# Prepare the prompt with instructions, description, and context
|
381
|
-
|
408
|
+
# 3. Build a memory-aware prompt.
|
409
|
+
prompt = self._build_memory_prompt(message, context)
|
410
|
+
# To (convert MemoryEntry objects to dicts and remove metadata):
|
411
|
+
memory_entries = [{"role": e.role, "content": e.content} for e in self.memory.storage.retrieve()]
|
382
412
|
|
383
413
|
# Generate the response using the LLM
|
384
|
-
response = self.llm_instance.generate(prompt=prompt, context=context, **kwargs)
|
414
|
+
response = self.llm_instance.generate(prompt=prompt, context=context, memory=memory_entries, **kwargs)
|
415
|
+
|
385
416
|
|
386
417
|
# Format the response based on the json_output flag
|
387
418
|
if self.json_output:
|
@@ -394,9 +425,37 @@ class Agent(BaseModel):
|
|
394
425
|
if markdown:
|
395
426
|
return f"**Response:**\n\n{response}"
|
396
427
|
return response
|
397
|
-
# Combine all responses into a single string
|
398
428
|
return "\n\n".join(responses)
|
399
429
|
|
430
|
+
# Modified prompt construction with memory integration
|
431
|
+
def _build_memory_prompt(self, user_input: str, context: dict) -> str:
|
432
|
+
"""Enhanced prompt builder with memory context."""
|
433
|
+
prompt_parts = []
|
434
|
+
|
435
|
+
if self.description:
|
436
|
+
prompt_parts.append(f"# ROLE\n{self.description}")
|
437
|
+
|
438
|
+
if self.instructions:
|
439
|
+
prompt_parts.append(f"# INSTRUCTIONS\n" + "\n".join(f"- {i}" for i in self.instructions))
|
440
|
+
|
441
|
+
if context['conversation_history']:
|
442
|
+
prompt_parts.append(f"# CONVERSATION HISTORY\n{context['conversation_history']}")
|
443
|
+
|
444
|
+
if context['knowledge_base']:
|
445
|
+
prompt_parts.append(f"# KNOWLEDGE BASE\n{context['knowledge_base']}")
|
446
|
+
|
447
|
+
prompt_parts.append(f"# USER INPUT\n{user_input}")
|
448
|
+
|
449
|
+
return "\n\n".join(prompt_parts)
|
450
|
+
|
451
|
+
def _get_knowledge_context(self, message: str) -> str:
|
452
|
+
"""Retrieve and format knowledge base context."""
|
453
|
+
if not self.knowledge_base:
|
454
|
+
return ""
|
455
|
+
|
456
|
+
flattened = self._flatten_data(self.knowledge_base)
|
457
|
+
relevant = self._find_all_relevant_keys(message, flattened)
|
458
|
+
return "\n".join(f"- {item}" for item in relevant) if relevant else ""
|
400
459
|
def _generate_team_response(self, message: str, team: List['Agent'], markdown: bool = False, **kwargs) -> str:
|
401
460
|
"""Generate a response using a team of assistants."""
|
402
461
|
responses = []
|
@@ -543,17 +602,21 @@ class Agent(BaseModel):
|
|
543
602
|
"""Run the agent in a CLI app."""
|
544
603
|
from rich.prompt import Prompt
|
545
604
|
|
605
|
+
# Print initial message if provided
|
546
606
|
if message:
|
547
607
|
self.print_response(message=message, **kwargs)
|
548
608
|
|
549
609
|
_exit_on = exit_on or ["exit", "quit", "bye"]
|
550
610
|
while True:
|
551
|
-
|
552
|
-
|
611
|
+
try:
|
612
|
+
message = Prompt.ask(f"[bold] {self.emoji} {self.user_name} [/bold]")
|
613
|
+
if message in _exit_on:
|
614
|
+
break
|
615
|
+
self.print_response(message=message, **kwargs)
|
616
|
+
except KeyboardInterrupt:
|
617
|
+
print("\n\nSession ended. Goodbye!")
|
553
618
|
break
|
554
619
|
|
555
|
-
self.print_response(message=message, **kwargs)
|
556
|
-
|
557
620
|
def _generate_api(self):
|
558
621
|
"""Generate an API for the agent if api=True."""
|
559
622
|
from .api.api_generator import APIGenerator
|
@@ -0,0 +1,54 @@
|
|
1
|
+
from .models import MemoryEntry
|
2
|
+
from .storage import BaseMemoryStorage, InMemoryStorage, FileStorage
|
3
|
+
from typing import List, Dict, Optional
|
4
|
+
from .llm.base_llm import BaseLLM
|
5
|
+
class Memory:
|
6
|
+
def __init__(
|
7
|
+
self,
|
8
|
+
storage: BaseMemoryStorage = InMemoryStorage(),
|
9
|
+
max_context_length: int = 4000,
|
10
|
+
summarization_threshold: int = 3000
|
11
|
+
):
|
12
|
+
self.storage = storage
|
13
|
+
self.max_context_length = max_context_length
|
14
|
+
self.summarization_threshold = summarization_threshold
|
15
|
+
self._current_context = ""
|
16
|
+
|
17
|
+
def add_message(self, role: str, content: str, metadata: Optional[Dict] = None):
|
18
|
+
entry = MemoryEntry(
|
19
|
+
role=role,
|
20
|
+
content=content,
|
21
|
+
metadata=metadata or {}
|
22
|
+
)
|
23
|
+
self.storage.store(entry)
|
24
|
+
self._manage_context()
|
25
|
+
|
26
|
+
def get_context(self, llm: Optional[BaseLLM] = None) -> str:
|
27
|
+
if len(self._current_context) < self.summarization_threshold:
|
28
|
+
return self._current_context
|
29
|
+
|
30
|
+
# Automatic summarization when context grows too large
|
31
|
+
if llm:
|
32
|
+
return self.summarize(llm)
|
33
|
+
return self._current_context[:self.max_context_length]
|
34
|
+
def _manage_context(self):
|
35
|
+
# Include roles in the conversation history
|
36
|
+
full_history = "\n".join([f"{e.role}: {e.content}" for e in self.storage.retrieve()])
|
37
|
+
if len(full_history) > self.max_context_length:
|
38
|
+
self._current_context = full_history[-self.max_context_length:]
|
39
|
+
else:
|
40
|
+
self._current_context = full_history
|
41
|
+
|
42
|
+
def summarize(self, llm: BaseLLM) -> str:
|
43
|
+
# Include roles in the history for summarization
|
44
|
+
history = "\n".join([f"{e.role}: {e.content}" for e in self.storage.retrieve()])
|
45
|
+
prompt = f"""
|
46
|
+
Summarize this conversation history maintaining key details and references:
|
47
|
+
{history[-self.summarization_threshold:]}
|
48
|
+
"""
|
49
|
+
self._current_context = llm.generate(prompt)
|
50
|
+
return self._current_context
|
51
|
+
|
52
|
+
def clear(self):
|
53
|
+
self.storage = InMemoryStorage()
|
54
|
+
self._current_context = ""
|
@@ -0,0 +1,9 @@
|
|
1
|
+
from pydantic import BaseModel, Field
|
2
|
+
from datetime import datetime
|
3
|
+
from typing import Dict
|
4
|
+
|
5
|
+
class MemoryEntry(BaseModel):
|
6
|
+
role: str # "user" or "assistant"
|
7
|
+
content: str
|
8
|
+
timestamp: datetime = Field(default_factory=datetime.now)
|
9
|
+
metadata: Dict = Field(default_factory=dict)
|
@@ -0,0 +1,12 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import List, Optional
|
3
|
+
from ..models import MemoryEntry
|
4
|
+
|
5
|
+
class BaseMemoryStorage(ABC):
|
6
|
+
@abstractmethod
|
7
|
+
def store(self, entry: MemoryEntry):
|
8
|
+
pass
|
9
|
+
|
10
|
+
@abstractmethod
|
11
|
+
def retrieve(self, query: Optional[str] = None, limit: int = 20) -> List[MemoryEntry]:
|
12
|
+
pass
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# hashai/storage/in_memory_storage.py
|
2
|
+
from typing import List, Optional
|
3
|
+
from ..models import MemoryEntry
|
4
|
+
from .base_storage import BaseMemoryStorage
|
5
|
+
|
6
|
+
class InMemoryStorage(BaseMemoryStorage):
|
7
|
+
def __init__(self):
|
8
|
+
self.history: List[MemoryEntry] = []
|
9
|
+
|
10
|
+
def store(self, entry: MemoryEntry):
|
11
|
+
self.history.append(entry)
|
12
|
+
|
13
|
+
def retrieve(self, query: Optional[str] = None, limit: int = 10) -> List[MemoryEntry]:
|
14
|
+
return self.history[-limit:]
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import json
|
2
|
+
from typing import List, Optional
|
3
|
+
from ..models import MemoryEntry
|
4
|
+
from .base_storage import BaseMemoryStorage
|
5
|
+
|
6
|
+
class FileStorage(BaseMemoryStorage):
|
7
|
+
def __init__(self, file_path: str = "memory.json"):
|
8
|
+
self.file_path = file_path
|
9
|
+
self.history = self._load_from_file()
|
10
|
+
|
11
|
+
def _load_from_file(self) -> List[MemoryEntry]:
|
12
|
+
try:
|
13
|
+
with open(self.file_path, "r") as f:
|
14
|
+
data = json.load(f)
|
15
|
+
return [MemoryEntry(**entry) for entry in data]
|
16
|
+
except (FileNotFoundError, json.JSONDecodeError):
|
17
|
+
return []
|
18
|
+
|
19
|
+
def _save_to_file(self):
|
20
|
+
with open(self.file_path, "w") as f:
|
21
|
+
data = [entry.dict() for entry in self.history]
|
22
|
+
json.dump(data, f, default=str)
|
23
|
+
|
24
|
+
def store(self, entry: MemoryEntry):
|
25
|
+
self.history.append(entry)
|
26
|
+
self._save_to_file()
|
27
|
+
|
28
|
+
def retrieve(self, query: Optional[str] = None, limit: int = 20) -> List[MemoryEntry]:
|
29
|
+
return self.history[-limit:]
|
@@ -0,0 +1,439 @@
|
|
1
|
+
# web_browser.py
|
2
|
+
from typing import Dict, Any, List, Optional, Callable
|
3
|
+
from pydantic import Field, BaseModel
|
4
|
+
from selenium import webdriver
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.common.action_chains import ActionChains
|
7
|
+
from selenium.webdriver.remote.webelement import WebElement
|
8
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
9
|
+
from selenium.webdriver.support import expected_conditions as EC
|
10
|
+
from selenium.webdriver.chrome.options import Options
|
11
|
+
from selenium.webdriver.chrome.service import Service
|
12
|
+
from webdriver_manager.chrome import ChromeDriverManager
|
13
|
+
from bs4 import BeautifulSoup
|
14
|
+
import json
|
15
|
+
import time
|
16
|
+
import re
|
17
|
+
import logging
|
18
|
+
import os
|
19
|
+
import difflib
|
20
|
+
from .base_tool import BaseTool
|
21
|
+
|
22
|
+
logger = logging.getLogger(__name__)
|
23
|
+
|
24
|
+
class BrowserPlan(BaseModel):
|
25
|
+
tasks: List[Dict[str, Any]] = Field(
|
26
|
+
...,
|
27
|
+
description="List of automation tasks to execute"
|
28
|
+
)
|
29
|
+
|
30
|
+
class WebBrowserTool(BaseTool):
|
31
|
+
name: str = Field("WebBrowser", description="Name of the tool")
|
32
|
+
description: str = Field(
|
33
|
+
"Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
|
34
|
+
description="Tool description"
|
35
|
+
)
|
36
|
+
|
37
|
+
default_timeout: int = 15 # Default wait timeout in seconds
|
38
|
+
max_retries: int = 3 # Increased maximum retries for any task
|
39
|
+
|
40
|
+
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
41
|
+
"""Execute an advanced dynamic web automation workflow."""
|
42
|
+
driver = None
|
43
|
+
overall_start = time.time()
|
44
|
+
try:
|
45
|
+
headless = input.get("headless", False)
|
46
|
+
self.default_timeout = int(input.get("timeout", self.default_timeout))
|
47
|
+
self.max_retries = int(input.get("max_retries", self.max_retries))
|
48
|
+
driver = self._init_browser(headless)
|
49
|
+
results = []
|
50
|
+
current_url = ""
|
51
|
+
|
52
|
+
plan = self._generate_plan(input.get('query', ''), current_url)
|
53
|
+
if not plan.tasks:
|
54
|
+
raise ValueError("No valid tasks in the generated plan.")
|
55
|
+
|
56
|
+
# Dynamic mapping: action name to handler function.
|
57
|
+
action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
|
58
|
+
"navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
|
59
|
+
"click": lambda d, task: self._handle_click(d, task.get("selector", "")),
|
60
|
+
"type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
|
61
|
+
"wait": lambda d, task: self._handle_wait(task.get("value", "")),
|
62
|
+
"wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
|
63
|
+
"scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
|
64
|
+
"hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
|
65
|
+
"screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
|
66
|
+
"switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
|
67
|
+
"execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
|
68
|
+
"drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
|
69
|
+
}
|
70
|
+
|
71
|
+
for task in plan.tasks:
|
72
|
+
# Before each action, dismiss modals/overlays.
|
73
|
+
self._dismiss_unwanted_modals(driver)
|
74
|
+
action = task.get("action", "").lower()
|
75
|
+
logger.info(f"Executing task: {task.get('description', action)}")
|
76
|
+
start_time = time.time()
|
77
|
+
handler = action_map.get(action)
|
78
|
+
if not handler:
|
79
|
+
results.append({
|
80
|
+
"action": action,
|
81
|
+
"success": False,
|
82
|
+
"message": f"Unsupported action: {action}"
|
83
|
+
})
|
84
|
+
continue
|
85
|
+
|
86
|
+
result = self._execute_with_retries(driver, task, handler)
|
87
|
+
elapsed = time.time() - start_time
|
88
|
+
result["elapsed"] = elapsed
|
89
|
+
logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
90
|
+
results.append(result)
|
91
|
+
|
92
|
+
if not result.get('success', False):
|
93
|
+
logger.error(f"Task failed: {result.get('message')}")
|
94
|
+
self._capture_failure_screenshot(driver, action)
|
95
|
+
break
|
96
|
+
|
97
|
+
current_url = driver.current_url
|
98
|
+
|
99
|
+
overall_elapsed = time.time() - overall_start
|
100
|
+
logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
101
|
+
return {"status": "success", "results": results, "total_time": overall_elapsed}
|
102
|
+
|
103
|
+
except Exception as e:
|
104
|
+
logger.exception("Execution error:")
|
105
|
+
return {"status": "error", "message": str(e)}
|
106
|
+
finally:
|
107
|
+
if driver:
|
108
|
+
driver.quit()
|
109
|
+
|
110
|
+
def _init_browser(self, headless: bool) -> webdriver.Chrome:
|
111
|
+
"""Initialize browser with advanced options."""
|
112
|
+
options = Options()
|
113
|
+
options.add_argument("--start-maximized")
|
114
|
+
options.add_argument("--disable-blink-features=AutomationControlled")
|
115
|
+
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
116
|
+
if headless:
|
117
|
+
options.add_argument("--headless=new")
|
118
|
+
return webdriver.Chrome(
|
119
|
+
service=Service(ChromeDriverManager().install()),
|
120
|
+
options=options
|
121
|
+
)
|
122
|
+
|
123
|
+
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
124
|
+
"""Generate an adaptive execution plan using an LLM or other dynamic planner."""
|
125
|
+
prompt = f"""Generate browser automation plan for: {query}
|
126
|
+
|
127
|
+
Current URL: {current_url or 'No page loaded yet'}
|
128
|
+
|
129
|
+
Required JSON format:
|
130
|
+
{{
|
131
|
+
"tasks": [
|
132
|
+
{{
|
133
|
+
"action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
|
134
|
+
"selector": "CSS selector (optional)",
|
135
|
+
"value": "input text/URL/seconds/filename/target-selector",
|
136
|
+
"description": "action purpose"
|
137
|
+
}}
|
138
|
+
]
|
139
|
+
}}
|
140
|
+
|
141
|
+
Guidelines:
|
142
|
+
1. Prefer IDs in selectors (#element-id) and semantic attributes.
|
143
|
+
2. Include wait steps after navigation and wait for AJAX where applicable.
|
144
|
+
3. Dismiss any modals/pop-ups that are not part of the task.
|
145
|
+
4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
|
146
|
+
5. For execute_script, 'value' should contain valid JavaScript.
|
147
|
+
6. For switch_tab, 'value' should be an index or keyword 'new'.
|
148
|
+
"""
|
149
|
+
response = self.llm.generate(prompt=prompt)
|
150
|
+
return self._parse_plan(response)
|
151
|
+
|
152
|
+
def _parse_plan(self, response: str) -> BrowserPlan:
|
153
|
+
"""Robust JSON parsing with multiple fallback strategies."""
|
154
|
+
try:
|
155
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
156
|
+
if json_match:
|
157
|
+
plan_data = json.loads(json_match.group(1).strip())
|
158
|
+
else:
|
159
|
+
json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
|
160
|
+
if not json_str_match:
|
161
|
+
raise ValueError("No JSON object found in the response.")
|
162
|
+
plan_data = json.loads(json_str_match.group())
|
163
|
+
validated_tasks = []
|
164
|
+
for task in plan_data.get("tasks", []):
|
165
|
+
if not all(key in task for key in ["action", "description"]):
|
166
|
+
logger.warning(f"Skipping task due to missing keys: {task}")
|
167
|
+
continue
|
168
|
+
validated_tasks.append({
|
169
|
+
"action": task["action"],
|
170
|
+
"selector": task.get("selector", ""),
|
171
|
+
"value": task.get("value", ""),
|
172
|
+
"description": task["description"]
|
173
|
+
})
|
174
|
+
return BrowserPlan(tasks=validated_tasks)
|
175
|
+
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
176
|
+
logger.error(f"Plan parsing failed: {e}")
|
177
|
+
return BrowserPlan(tasks=[])
|
178
|
+
|
179
|
+
def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
|
180
|
+
handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
181
|
+
"""Execute a task with retry logic and exponential backoff."""
|
182
|
+
attempts = 0
|
183
|
+
result = {}
|
184
|
+
while attempts < self.max_retries:
|
185
|
+
result = self._execute_safe_task(driver, task, handler)
|
186
|
+
if result.get("success", False):
|
187
|
+
return result
|
188
|
+
attempts += 1
|
189
|
+
logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
190
|
+
time.sleep(1 * attempts)
|
191
|
+
return result
|
192
|
+
|
193
|
+
def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
|
194
|
+
handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
195
|
+
"""Execute a task with comprehensive error handling."""
|
196
|
+
try:
|
197
|
+
return handler(driver, task)
|
198
|
+
except Exception as e:
|
199
|
+
action = task.get("action", "unknown")
|
200
|
+
logger.exception(f"Error executing task '{action}':")
|
201
|
+
return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
|
202
|
+
|
203
|
+
def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
|
204
|
+
"""
|
205
|
+
Dismiss or remove unwanted modals, overlays, or pop-ups.
|
206
|
+
First attempts to click a close button; if not available, removes the element via JS.
|
207
|
+
"""
|
208
|
+
try:
|
209
|
+
modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
|
210
|
+
for selector in modal_selectors:
|
211
|
+
elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
212
|
+
for modal in elements:
|
213
|
+
if modal.is_displayed():
|
214
|
+
close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
|
215
|
+
dismissed = False
|
216
|
+
for close_sel in close_selectors:
|
217
|
+
try:
|
218
|
+
close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
|
219
|
+
if close_button.is_displayed():
|
220
|
+
close_button.click()
|
221
|
+
dismissed = True
|
222
|
+
logger.info(f"Dismissed modal using selector {close_sel}")
|
223
|
+
time.sleep(1)
|
224
|
+
break
|
225
|
+
except Exception:
|
226
|
+
continue
|
227
|
+
if not dismissed:
|
228
|
+
# Remove overlay by setting display to none
|
229
|
+
driver.execute_script("arguments[0].remove();", modal)
|
230
|
+
logger.info(f"Removed overlay/modal with selector {selector}")
|
231
|
+
except Exception as e:
|
232
|
+
logger.debug(f"Modal dismissal error: {e}")
|
233
|
+
|
234
|
+
def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
|
235
|
+
"""
|
236
|
+
Advanced fallback for finding an element.
|
237
|
+
Searches across multiple attributes and inner text using fuzzy matching.
|
238
|
+
"""
|
239
|
+
candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
|
240
|
+
best_match = None
|
241
|
+
best_ratio = 0.0
|
242
|
+
for candidate in candidates:
|
243
|
+
combined_text = " ".join([
|
244
|
+
candidate.get_attribute("id") or "",
|
245
|
+
candidate.get_attribute("name") or "",
|
246
|
+
candidate.get_attribute("placeholder") or "",
|
247
|
+
candidate.get_attribute("aria-label") or "",
|
248
|
+
candidate.text or "",
|
249
|
+
])
|
250
|
+
ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
|
251
|
+
if ratio > best_ratio:
|
252
|
+
best_ratio = ratio
|
253
|
+
best_match = candidate
|
254
|
+
if best_ratio > 0.5:
|
255
|
+
logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
|
256
|
+
return best_match
|
257
|
+
return None
|
258
|
+
|
259
|
+
def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
|
260
|
+
"""Handle navigation with URL correction."""
|
261
|
+
if not url.startswith(("http://", "https://")):
|
262
|
+
url = f"https://{url}"
|
263
|
+
try:
|
264
|
+
driver.get(url)
|
265
|
+
WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
266
|
+
return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
|
267
|
+
except Exception as e:
|
268
|
+
logger.error(f"Navigation to {url} failed: {e}")
|
269
|
+
return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
|
270
|
+
|
271
|
+
def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
|
272
|
+
"""Handle click actions with fallback using JS if needed."""
|
273
|
+
try:
|
274
|
+
element = WebDriverWait(driver, self.default_timeout).until(
|
275
|
+
EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
|
276
|
+
)
|
277
|
+
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
278
|
+
try:
|
279
|
+
element.click()
|
280
|
+
except Exception:
|
281
|
+
driver.execute_script("arguments[0].click();", element)
|
282
|
+
return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
|
283
|
+
except Exception as e:
|
284
|
+
logger.error(f"Click action failed on selector {selector}: {e}")
|
285
|
+
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
286
|
+
|
287
|
+
def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
|
288
|
+
"""
|
289
|
+
Handle typing into an element.
|
290
|
+
If the primary selector fails, attempt advanced fallback detection.
|
291
|
+
"""
|
292
|
+
try:
|
293
|
+
element = WebDriverWait(driver, self.default_timeout).until(
|
294
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
295
|
+
)
|
296
|
+
except Exception as e:
|
297
|
+
# If the task seems to involve search or similar text, use advanced fallback.
|
298
|
+
if "search" in task.get("description", "").lower() or "search" in selector.lower():
|
299
|
+
logger.info("Primary selector failed; using advanced fallback for element detection.")
|
300
|
+
element = self._advanced_find_element(driver, "search")
|
301
|
+
if not element:
|
302
|
+
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
303
|
+
else:
|
304
|
+
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
305
|
+
try:
|
306
|
+
element.clear()
|
307
|
+
element.send_keys(text)
|
308
|
+
return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
|
309
|
+
except Exception as e:
|
310
|
+
logger.error(f"Typing action failed: {e}")
|
311
|
+
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
312
|
+
|
313
|
+
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
314
|
+
"""Handle a simple wait."""
|
315
|
+
try:
|
316
|
+
wait_time = float(seconds)
|
317
|
+
logger.info(f"Waiting for {wait_time} seconds")
|
318
|
+
time.sleep(wait_time)
|
319
|
+
return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
|
320
|
+
except ValueError as e:
|
321
|
+
logger.error(f"Invalid wait time provided: {seconds}")
|
322
|
+
return {"action": "wait", "success": False, "message": "Invalid wait time"}
|
323
|
+
|
324
|
+
def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
|
325
|
+
"""
|
326
|
+
Wait until AJAX/network activity has subsided.
|
327
|
+
This implementation first checks for jQuery, then falls back to a generic check.
|
328
|
+
"""
|
329
|
+
try:
|
330
|
+
timeout = int(seconds)
|
331
|
+
logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
|
332
|
+
end_time = time.time() + timeout
|
333
|
+
while time.time() < end_time:
|
334
|
+
ajax_complete = driver.execute_script("""
|
335
|
+
return (window.jQuery ? jQuery.active === 0 : true) &&
|
336
|
+
(typeof window.fetch === 'function' ? true : true);
|
337
|
+
""")
|
338
|
+
if ajax_complete:
|
339
|
+
break
|
340
|
+
time.sleep(0.5)
|
341
|
+
return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
|
342
|
+
except Exception as e:
|
343
|
+
logger.error(f"Wait for AJAX failed: {e}")
|
344
|
+
return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
|
345
|
+
|
346
|
+
def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
|
347
|
+
"""Handle scrolling to a specific element or page bottom."""
|
348
|
+
try:
|
349
|
+
if selector:
|
350
|
+
element = WebDriverWait(driver, self.default_timeout).until(
|
351
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
352
|
+
)
|
353
|
+
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
354
|
+
scroll_target = selector
|
355
|
+
else:
|
356
|
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
357
|
+
scroll_target = "page bottom"
|
358
|
+
return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
|
359
|
+
except Exception as e:
|
360
|
+
logger.error(f"Scroll action failed on selector {selector}: {e}")
|
361
|
+
return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
|
362
|
+
|
363
|
+
def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
|
364
|
+
"""Handle mouse hover action."""
|
365
|
+
try:
|
366
|
+
element = WebDriverWait(driver, self.default_timeout).until(
|
367
|
+
EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
|
368
|
+
)
|
369
|
+
ActionChains(driver).move_to_element(element).perform()
|
370
|
+
return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
|
371
|
+
except Exception as e:
|
372
|
+
logger.error(f"Hover action failed on selector {selector}: {e}")
|
373
|
+
return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
|
374
|
+
|
375
|
+
def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
|
376
|
+
"""Capture a screenshot of the current browser state."""
|
377
|
+
try:
|
378
|
+
driver.save_screenshot(filename)
|
379
|
+
return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
|
380
|
+
except Exception as e:
|
381
|
+
logger.error(f"Screenshot capture failed: {e}")
|
382
|
+
return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
|
383
|
+
|
384
|
+
def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
|
385
|
+
"""
|
386
|
+
Switch between tabs. 'value' can be an index or the keyword 'new'.
|
387
|
+
"""
|
388
|
+
try:
|
389
|
+
handles = driver.window_handles
|
390
|
+
if value.lower() == "new":
|
391
|
+
target_handle = handles[-1]
|
392
|
+
else:
|
393
|
+
idx = int(value)
|
394
|
+
if idx < len(handles):
|
395
|
+
target_handle = handles[idx]
|
396
|
+
else:
|
397
|
+
return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
|
398
|
+
driver.switch_to.window(target_handle)
|
399
|
+
return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
|
400
|
+
except Exception as e:
|
401
|
+
logger.error(f"Switch tab failed: {e}")
|
402
|
+
return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
|
403
|
+
|
404
|
+
def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
|
405
|
+
"""
|
406
|
+
Execute arbitrary JavaScript code.
|
407
|
+
"""
|
408
|
+
try:
|
409
|
+
result = driver.execute_script(script)
|
410
|
+
return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
|
411
|
+
except Exception as e:
|
412
|
+
logger.error(f"Execute script failed: {e}")
|
413
|
+
return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
|
414
|
+
|
415
|
+
def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
|
416
|
+
"""
|
417
|
+
Simulate a drag-and-drop operation.
|
418
|
+
"""
|
419
|
+
try:
|
420
|
+
source = WebDriverWait(driver, self.default_timeout).until(
|
421
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
|
422
|
+
)
|
423
|
+
target = WebDriverWait(driver, self.default_timeout).until(
|
424
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
|
425
|
+
)
|
426
|
+
ActionChains(driver).drag_and_drop(source, target).perform()
|
427
|
+
return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
|
428
|
+
except Exception as e:
|
429
|
+
logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
430
|
+
return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
|
431
|
+
|
432
|
+
def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
|
433
|
+
"""Capture a screenshot for debugging when an error occurs."""
|
434
|
+
filename = f"failure_{action}_{int(time.time())}.png"
|
435
|
+
try:
|
436
|
+
driver.save_screenshot(filename)
|
437
|
+
logger.info(f"Failure screenshot captured: {filename}")
|
438
|
+
except Exception as e:
|
439
|
+
logger.error(f"Failed to capture screenshot: {e}")
|
@@ -4,6 +4,7 @@ setup.py
|
|
4
4
|
semantio/__init__.py
|
5
5
|
semantio/agent.py
|
6
6
|
semantio/memory.py
|
7
|
+
semantio/models.py
|
7
8
|
semantio/rag.py
|
8
9
|
semantio.egg-info/PKG-INFO
|
9
10
|
semantio.egg-info/SOURCES.txt
|
@@ -29,7 +30,9 @@ semantio/llm/groq.py
|
|
29
30
|
semantio/llm/mistral.py
|
30
31
|
semantio/llm/openai.py
|
31
32
|
semantio/storage/__init__.py
|
33
|
+
semantio/storage/base_storage.py
|
32
34
|
semantio/storage/cloud_storage.py
|
35
|
+
semantio/storage/in_memory_storage.py
|
33
36
|
semantio/storage/local_storage.py
|
34
37
|
semantio/tools/__init__.py
|
35
38
|
semantio/tools/base_tool.py
|
@@ -1,11 +0,0 @@
|
|
1
|
-
from typing import List, Dict
|
2
|
-
|
3
|
-
class Memory:
|
4
|
-
def __init__(self):
|
5
|
-
self.history = []
|
6
|
-
|
7
|
-
def add_message(self, role: str, content: str):
|
8
|
-
self.history.append({"role": role, "content": content})
|
9
|
-
|
10
|
-
def get_history(self) -> List[Dict]:
|
11
|
-
return self.history
|
File without changes
|
File without changes
|
File without changes
|
@@ -1,271 +0,0 @@
|
|
1
|
-
# web_browser.py
|
2
|
-
from typing import Dict, Any, List, Optional
|
3
|
-
from pydantic import Field, BaseModel
|
4
|
-
from selenium import webdriver
|
5
|
-
from selenium.webdriver.common.by import By
|
6
|
-
from selenium.webdriver.support.ui import WebDriverWait
|
7
|
-
from selenium.webdriver.support import expected_conditions as EC
|
8
|
-
from selenium.webdriver.chrome.options import Options
|
9
|
-
from selenium.webdriver.chrome.service import Service
|
10
|
-
from webdriver_manager.chrome import ChromeDriverManager
|
11
|
-
from bs4 import BeautifulSoup
|
12
|
-
import json
|
13
|
-
import time
|
14
|
-
import re
|
15
|
-
import logging
|
16
|
-
from .base_tool import BaseTool
|
17
|
-
|
18
|
-
logger = logging.getLogger(__name__)
|
19
|
-
|
20
|
-
class BrowserPlan(BaseModel):
|
21
|
-
tasks: List[Dict[str, Any]] = Field(
|
22
|
-
...,
|
23
|
-
description="List of automation tasks to execute"
|
24
|
-
)
|
25
|
-
|
26
|
-
class WebBrowserTool(BaseTool):
|
27
|
-
name: str = Field("WebBrowser", description="Name of the tool")
|
28
|
-
description: str = Field(
|
29
|
-
"Universal web automation tool for dynamic website interactions",
|
30
|
-
description="Tool description"
|
31
|
-
)
|
32
|
-
|
33
|
-
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
34
|
-
"""Execute dynamic web automation workflow"""
|
35
|
-
driver = None
|
36
|
-
try:
|
37
|
-
driver = self._init_browser(input.get("headless", False))
|
38
|
-
results = []
|
39
|
-
current_url = ""
|
40
|
-
|
41
|
-
# Generate initial plan
|
42
|
-
plan = self._generate_plan(input['query'], current_url)
|
43
|
-
|
44
|
-
for task in plan.tasks:
|
45
|
-
result = self._execute_safe_task(driver, task)
|
46
|
-
results.append(result)
|
47
|
-
|
48
|
-
if not result['success']:
|
49
|
-
break
|
50
|
-
|
51
|
-
# Update context for next tasks
|
52
|
-
current_url = driver.current_url
|
53
|
-
|
54
|
-
return {"status": "success", "results": results}
|
55
|
-
|
56
|
-
except Exception as e:
|
57
|
-
return {"status": "error", "message": str(e)}
|
58
|
-
finally:
|
59
|
-
if driver:
|
60
|
-
driver.quit()
|
61
|
-
|
62
|
-
def _init_browser(self, headless: bool) -> webdriver.Chrome:
|
63
|
-
"""Initialize browser with advanced options"""
|
64
|
-
options = Options()
|
65
|
-
options.add_argument("--start-maximized")
|
66
|
-
options.add_argument("--disable-blink-features=AutomationControlled")
|
67
|
-
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
68
|
-
|
69
|
-
if headless:
|
70
|
-
options.add_argument("--headless=new")
|
71
|
-
|
72
|
-
return webdriver.Chrome(
|
73
|
-
service=Service(ChromeDriverManager().install()),
|
74
|
-
options=options
|
75
|
-
)
|
76
|
-
|
77
|
-
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
78
|
-
"""Generate adaptive execution plan using LLM"""
|
79
|
-
prompt = f"""Generate browser automation plan for: {query}
|
80
|
-
|
81
|
-
Current URL: {current_url or 'No page loaded yet'}
|
82
|
-
|
83
|
-
Required JSON format:
|
84
|
-
{{
|
85
|
-
"tasks": [
|
86
|
-
{{
|
87
|
-
"action": "navigate|click|type|wait|scroll",
|
88
|
-
"selector": "CSS selector (optional)",
|
89
|
-
"value": "input text/URL/seconds",
|
90
|
-
"description": "action purpose"
|
91
|
-
}}
|
92
|
-
]
|
93
|
-
}}
|
94
|
-
|
95
|
-
Guidelines:
|
96
|
-
1. Prefer IDs in selectors (#element-id)
|
97
|
-
2. Use semantic attributes (aria-label, name)
|
98
|
-
3. Include wait steps after navigation
|
99
|
-
4. Prioritize visible elements
|
100
|
-
5. Add scroll steps for hidden elements
|
101
|
-
"""
|
102
|
-
|
103
|
-
response = self.llm.generate(prompt=prompt)
|
104
|
-
return self._parse_plan(response)
|
105
|
-
|
106
|
-
def _parse_plan(self, response: str) -> BrowserPlan:
|
107
|
-
"""Robust JSON parsing with multiple fallback strategies"""
|
108
|
-
try:
|
109
|
-
# Try extracting JSON from markdown code block
|
110
|
-
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
111
|
-
if json_match:
|
112
|
-
plan_data = json.loads(json_match.group(1).strip())
|
113
|
-
else:
|
114
|
-
# Fallback to extract first JSON object
|
115
|
-
json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
|
116
|
-
plan_data = json.loads(json_str)
|
117
|
-
|
118
|
-
# Validate tasks structure
|
119
|
-
validated_tasks = []
|
120
|
-
for task in plan_data.get("tasks", []):
|
121
|
-
if not all(key in task for key in ["action", "description"]):
|
122
|
-
continue
|
123
|
-
validated_tasks.append({
|
124
|
-
"action": task["action"],
|
125
|
-
"selector": task.get("selector", ""),
|
126
|
-
"value": task.get("value", ""),
|
127
|
-
"description": task["description"]
|
128
|
-
})
|
129
|
-
|
130
|
-
return BrowserPlan(tasks=validated_tasks)
|
131
|
-
|
132
|
-
except (json.JSONDecodeError, AttributeError) as e:
|
133
|
-
logger.error(f"Plan parsing failed: {e}")
|
134
|
-
return BrowserPlan(tasks=[])
|
135
|
-
|
136
|
-
def _execute_safe_task(self, driver, task: Dict) -> Dict[str, Any]:
|
137
|
-
"""Execute task with comprehensive error handling"""
|
138
|
-
try:
|
139
|
-
action = task["action"].lower()
|
140
|
-
selector = task.get("selector", "")
|
141
|
-
value = task.get("value", "")
|
142
|
-
|
143
|
-
if action == "navigate":
|
144
|
-
return self._handle_navigation(driver, value)
|
145
|
-
|
146
|
-
elif action == "click":
|
147
|
-
return self._handle_click(driver, selector)
|
148
|
-
|
149
|
-
elif action == "type":
|
150
|
-
return self._handle_typing(driver, selector, value)
|
151
|
-
|
152
|
-
elif action == "wait":
|
153
|
-
return self._handle_wait(value)
|
154
|
-
|
155
|
-
elif action == "scroll":
|
156
|
-
return self._handle_scroll(driver, selector)
|
157
|
-
|
158
|
-
return {
|
159
|
-
"action": action,
|
160
|
-
"success": False,
|
161
|
-
"message": f"Unsupported action: {action}"
|
162
|
-
}
|
163
|
-
|
164
|
-
except Exception as e:
|
165
|
-
return {
|
166
|
-
"action": action,
|
167
|
-
"success": False,
|
168
|
-
"message": f"Critical error: {str(e)}"
|
169
|
-
}
|
170
|
-
|
171
|
-
def _handle_navigation(self, driver, url: str) -> Dict[str, Any]:
|
172
|
-
"""Smart navigation handler"""
|
173
|
-
if not url.startswith(("http://", "https://")):
|
174
|
-
url = f"https://{url}"
|
175
|
-
|
176
|
-
try:
|
177
|
-
driver.get(url)
|
178
|
-
WebDriverWait(driver, 15).until(
|
179
|
-
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
180
|
-
)
|
181
|
-
return {
|
182
|
-
"action": "navigate",
|
183
|
-
"success": True,
|
184
|
-
"message": f"Navigated to {url}"
|
185
|
-
}
|
186
|
-
except Exception as e:
|
187
|
-
return {
|
188
|
-
"action": "navigate",
|
189
|
-
"success": False,
|
190
|
-
"message": f"Navigation failed: {str(e)}"
|
191
|
-
}
|
192
|
-
|
193
|
-
def _handle_click(self, driver, selector: str) -> Dict[str, Any]:
|
194
|
-
"""Dynamic click handler"""
|
195
|
-
try:
|
196
|
-
element = WebDriverWait(driver, 15).until(
|
197
|
-
EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
|
198
|
-
)
|
199
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
|
200
|
-
element.click()
|
201
|
-
return {
|
202
|
-
"action": "click",
|
203
|
-
"success": True,
|
204
|
-
"message": f"Clicked element: {selector}"
|
205
|
-
}
|
206
|
-
except Exception as e:
|
207
|
-
return {
|
208
|
-
"action": "click",
|
209
|
-
"success": False,
|
210
|
-
"message": f"Click failed: {str(e)}"
|
211
|
-
}
|
212
|
-
|
213
|
-
def _handle_typing(self, driver, selector: str, text: str) -> Dict[str, Any]:
|
214
|
-
"""Universal typing handler"""
|
215
|
-
try:
|
216
|
-
element = WebDriverWait(driver, 15).until(
|
217
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
218
|
-
)
|
219
|
-
element.clear()
|
220
|
-
element.send_keys(text)
|
221
|
-
return {
|
222
|
-
"action": "type",
|
223
|
-
"success": True,
|
224
|
-
"message": f"Typed '{text}' into {selector}"
|
225
|
-
}
|
226
|
-
except Exception as e:
|
227
|
-
return {
|
228
|
-
"action": "type",
|
229
|
-
"success": False,
|
230
|
-
"message": f"Typing failed: {str(e)}"
|
231
|
-
}
|
232
|
-
|
233
|
-
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
234
|
-
"""Configurable wait handler"""
|
235
|
-
try:
|
236
|
-
wait_time = float(seconds)
|
237
|
-
time.sleep(wait_time)
|
238
|
-
return {
|
239
|
-
"action": "wait",
|
240
|
-
"success": True,
|
241
|
-
"message": f"Waited {wait_time} seconds"
|
242
|
-
}
|
243
|
-
except ValueError:
|
244
|
-
return {
|
245
|
-
"action": "wait",
|
246
|
-
"success": False,
|
247
|
-
"message": "Invalid wait time"
|
248
|
-
}
|
249
|
-
|
250
|
-
def _handle_scroll(self, driver, selector: str) -> Dict[str, Any]:
|
251
|
-
"""Smart scroll handler"""
|
252
|
-
try:
|
253
|
-
if selector:
|
254
|
-
element = WebDriverWait(driver, 15).until(
|
255
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
256
|
-
)
|
257
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
|
258
|
-
else:
|
259
|
-
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
260
|
-
|
261
|
-
return {
|
262
|
-
"action": "scroll",
|
263
|
-
"success": True,
|
264
|
-
"message": f"Scrolled to {selector or 'page bottom'}"
|
265
|
-
}
|
266
|
-
except Exception as e:
|
267
|
-
return {
|
268
|
-
"action": "scroll",
|
269
|
-
"success": False,
|
270
|
-
"message": f"Scroll failed: {str(e)}"
|
271
|
-
}
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|