semantio 0.0.2__py3-none-any.whl → 0.0.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- semantio/agent.py +37 -75
- semantio/cli/main.py +6 -6
- semantio/tools/web_browser.py +251 -133
- {semantio-0.0.2.dist-info → semantio-0.0.4.dist-info}/METADATA +5 -1
- {semantio-0.0.2.dist-info → semantio-0.0.4.dist-info}/RECORD +9 -10
- semantio/llm/llama.py +0 -0
- {semantio-0.0.2.dist-info → semantio-0.0.4.dist-info}/LICENSE +0 -0
- {semantio-0.0.2.dist-info → semantio-0.0.4.dist-info}/WHEEL +0 -0
- {semantio-0.0.2.dist-info → semantio-0.0.4.dist-info}/entry_points.txt +0 -0
- {semantio-0.0.2.dist-info → semantio-0.0.4.dist-info}/top_level.txt +0 -0
semantio/agent.py
CHANGED
@@ -21,24 +21,24 @@ import os
|
|
21
21
|
logging.basicConfig(level=logging.INFO)
|
22
22
|
logger = logging.getLogger(__name__)
|
23
23
|
|
24
|
-
class
|
24
|
+
class Agent(BaseModel):
|
25
25
|
# -*- Agent settings
|
26
|
-
name: Optional[str] = Field(None, description="Name of the
|
27
|
-
description: Optional[str] = Field(None, description="Description of the
|
28
|
-
instructions: Optional[List[str]] = Field(None, description="List of instructions for the
|
26
|
+
name: Optional[str] = Field(None, description="Name of the agent.")
|
27
|
+
description: Optional[str] = Field(None, description="Description of the agent's role.")
|
28
|
+
instructions: Optional[List[str]] = Field(None, description="List of instructions for the agent.")
|
29
29
|
model: Optional[str] = Field(None, description="This one is not in the use.")
|
30
30
|
show_tool_calls: bool = Field(False, description="Whether to show tool calls in the response.")
|
31
31
|
markdown: bool = Field(False, description="Whether to format the response in markdown.")
|
32
|
-
tools: Optional[List[BaseTool]] = Field(None, description="List of tools available to the
|
33
|
-
user_name: Optional[str] = Field("User", description="Name of the user interacting with the
|
34
|
-
emoji: Optional[str] = Field(":robot:", description="Emoji to represent the
|
32
|
+
tools: Optional[List[BaseTool]] = Field(None, description="List of tools available to the agent.")
|
33
|
+
user_name: Optional[str] = Field("User", description="Name of the user interacting with the agent.")
|
34
|
+
emoji: Optional[str] = Field(":robot:", description="Emoji to represent the agent in the CLI.")
|
35
35
|
rag: Optional[RAG] = Field(None, description="RAG instance for context retrieval.")
|
36
36
|
knowledge_base: Optional[Any] = Field(None, description="Knowledge base for domain-specific information.")
|
37
37
|
llm: Optional[str] = Field(None, description="The LLM provider to use (e.g., 'groq', 'openai', 'anthropic').")
|
38
38
|
llm_model: Optional[str] = Field(None, description="The specific model to use for the LLM provider.")
|
39
39
|
llm_instance: Optional[BaseLLM] = Field(None, description="The LLM instance to use.")
|
40
40
|
json_output: bool = Field(False, description="Whether to format the response as JSON.")
|
41
|
-
api: bool = Field(False, description="Whether to generate an API for the
|
41
|
+
api: bool = Field(False, description="Whether to generate an API for the agent.")
|
42
42
|
api_config: Optional[Dict] = Field(
|
43
43
|
None,
|
44
44
|
description="Configuration for the API (e.g., host, port, authentication).",
|
@@ -46,7 +46,7 @@ class Assistant(BaseModel):
|
|
46
46
|
api_generator: Optional[Any] = Field(None, description="The API generator instance.")
|
47
47
|
expected_output: Optional[Union[str, Dict]] = Field(None, description="The expected format or structure of the output.")
|
48
48
|
semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
|
49
|
-
team: Optional[List['
|
49
|
+
team: Optional[List['Agent']] = Field(None, description="List of assistants in the team.")
|
50
50
|
auto_tool: bool = Field(False, description="Whether to automatically detect and call tools.")
|
51
51
|
|
52
52
|
# Allow arbitrary types
|
@@ -56,8 +56,11 @@ class Assistant(BaseModel):
|
|
56
56
|
super().__init__(**kwargs)
|
57
57
|
# Initialize the model and tools here if needed
|
58
58
|
self._initialize_model()
|
59
|
-
#
|
59
|
+
# Initialize tools as an empty list if not provided
|
60
60
|
if self.tools is None:
|
61
|
+
self.tools = []
|
62
|
+
# Automatically discover and register tools if auto tool is enabled
|
63
|
+
if self.auto_tool and not self.tools:
|
61
64
|
self.tools = self._discover_tools()
|
62
65
|
# Pass the LLM instance to each tool
|
63
66
|
for tool in self.tools:
|
@@ -125,7 +128,7 @@ class Assistant(BaseModel):
|
|
125
128
|
try:
|
126
129
|
# Import the module
|
127
130
|
module_name = file.stem
|
128
|
-
module = importlib.import_module(f"
|
131
|
+
module = importlib.import_module(f"semantio.tools.{module_name}")
|
129
132
|
|
130
133
|
# Find all classes that inherit from BaseTool
|
131
134
|
for name, obj in module.__dict__.items():
|
@@ -197,7 +200,7 @@ class Assistant(BaseModel):
|
|
197
200
|
model_to_use = self.llm_model or default_model
|
198
201
|
|
199
202
|
# Dynamically import and initialize the LLM class
|
200
|
-
module_name = f"
|
203
|
+
module_name = f"semantio.llm.{llm_provider}"
|
201
204
|
llm_module = importlib.import_module(module_name)
|
202
205
|
llm_class = getattr(llm_module, llm_class_name)
|
203
206
|
self.llm_instance = llm_class(model=model_to_use, api_key=api_key)
|
@@ -213,11 +216,10 @@ class Assistant(BaseModel):
|
|
213
216
|
message: Optional[Union[str, Image, List, Dict]] = None,
|
214
217
|
stream: bool = False,
|
215
218
|
markdown: bool = False,
|
216
|
-
|
217
|
-
team: Optional[List['Assistant']] = None,
|
219
|
+
team: Optional[List['Agent']] = None,
|
218
220
|
**kwargs,
|
219
221
|
) -> Union[str, Dict]: # Add return type hint
|
220
|
-
"""Print the
|
222
|
+
"""Print the agent's response to the console and return it."""
|
221
223
|
|
222
224
|
if stream:
|
223
225
|
# Handle streaming response
|
@@ -228,60 +230,23 @@ class Assistant(BaseModel):
|
|
228
230
|
return response
|
229
231
|
else:
|
230
232
|
# Generate and return the response
|
231
|
-
response = self._generate_response(message, markdown=markdown,
|
233
|
+
response = self._generate_response(message, markdown=markdown, team=team, **kwargs)
|
232
234
|
print(response) # Print the response to the console
|
233
235
|
return response
|
234
236
|
|
235
237
|
|
236
238
|
def _stream_response(self, message: str, markdown: bool = False, **kwargs) -> Iterator[str]:
|
237
|
-
"""Stream the
|
239
|
+
"""Stream the agent's response."""
|
238
240
|
# Simulate streaming by yielding chunks of the response
|
239
241
|
response = self._generate_response(message, markdown=markdown, **kwargs)
|
240
242
|
for chunk in response.split():
|
241
243
|
yield chunk + " "
|
242
244
|
|
243
245
|
def register_tool(self, tool: BaseTool):
|
244
|
-
"""Register a tool for the
|
246
|
+
"""Register a tool for the agent."""
|
245
247
|
if self.tools is None:
|
246
248
|
self.tools = []
|
247
249
|
self.tools.append(tool)
|
248
|
-
|
249
|
-
def _detect_tool_call(self, message: str) -> Optional[Dict[str, Any]]:
|
250
|
-
"""
|
251
|
-
Use the LLM to detect which tool should be called based on the user's query.
|
252
|
-
"""
|
253
|
-
if not self.tools:
|
254
|
-
logger.warning("No tools available to detect.")
|
255
|
-
return None
|
256
|
-
|
257
|
-
# Create a prompt for the LLM
|
258
|
-
prompt = f"""
|
259
|
-
You are an AI assistant that helps users by selecting the most appropriate tool to answer their query. Below is a list of available tools and their functionalities:
|
260
|
-
|
261
|
-
{self._get_tool_descriptions()}
|
262
|
-
|
263
|
-
Based on the user's query, select the most appropriate tool. Respond with the name of the tool (e.g., "CryptoPriceChecker"). If no tool is suitable, respond with "None".
|
264
|
-
|
265
|
-
User Query: "{message}"
|
266
|
-
"""
|
267
|
-
|
268
|
-
try:
|
269
|
-
# Call the LLM to generate the response
|
270
|
-
response = self.llm_instance.generate(prompt=prompt)
|
271
|
-
tool_name = response.strip().replace('"', '').replace("'", "")
|
272
|
-
|
273
|
-
# Find the tool in the list of available tools
|
274
|
-
tool = next((t for t in self.tools if t.name.lower() == tool_name.lower()), None)
|
275
|
-
if tool:
|
276
|
-
logger.info(f"Detected tool call: {tool.name}")
|
277
|
-
return {
|
278
|
-
"tool": tool.name,
|
279
|
-
"input": {"query": message}
|
280
|
-
}
|
281
|
-
except Exception as e:
|
282
|
-
logger.error(f"Failed to detect tool call: {e}")
|
283
|
-
|
284
|
-
return None
|
285
250
|
|
286
251
|
def _analyze_query_and_select_tools(self, query: str) -> List[Dict[str, Any]]:
|
287
252
|
"""
|
@@ -290,7 +255,7 @@ class Assistant(BaseModel):
|
|
290
255
|
"""
|
291
256
|
# Create a prompt for the LLM to analyze the query and select tools
|
292
257
|
prompt = f"""
|
293
|
-
You are an AI
|
258
|
+
You are an AI agent that helps analyze user queries and select the most appropriate tools.
|
294
259
|
Below is a list of available tools and their functionalities:
|
295
260
|
|
296
261
|
{self._get_tool_descriptions()}
|
@@ -324,17 +289,16 @@ class Assistant(BaseModel):
|
|
324
289
|
return []
|
325
290
|
|
326
291
|
|
327
|
-
def _generate_response(self, message: str, markdown: bool = False,
|
328
|
-
"""Generate the
|
329
|
-
# Use the specified
|
330
|
-
if tools is not None:
|
331
|
-
self.tools = tools
|
292
|
+
def _generate_response(self, message: str, markdown: bool = False, team: Optional[List['Agent']] = None, **kwargs) -> str:
|
293
|
+
"""Generate the agent's response, including tool execution and context retrieval."""
|
294
|
+
# Use the specified team if provided
|
332
295
|
if team is not None:
|
333
296
|
return self._generate_team_response(message, team, markdown=markdown, **kwargs)
|
334
297
|
|
335
298
|
# Initialize tool_outputs as an empty dictionary
|
336
299
|
tool_outputs = {}
|
337
300
|
responses = []
|
301
|
+
tool_calls = []
|
338
302
|
|
339
303
|
# Use the LLM to analyze the query and dynamically select tools when auto_tool is enabled
|
340
304
|
if self.auto_tool:
|
@@ -344,7 +308,7 @@ class Assistant(BaseModel):
|
|
344
308
|
if self.tools:
|
345
309
|
tool_calls = [
|
346
310
|
{
|
347
|
-
"tool": tool.
|
311
|
+
"tool": tool.name,
|
348
312
|
"input": {
|
349
313
|
"query": message, # Use the message as the query
|
350
314
|
"context": None, # No context provided by default
|
@@ -352,10 +316,8 @@ class Assistant(BaseModel):
|
|
352
316
|
}
|
353
317
|
for tool in self.tools
|
354
318
|
]
|
355
|
-
else:
|
356
|
-
tool_calls = kwargs.get("tool_calls", [])
|
357
319
|
|
358
|
-
|
320
|
+
# Execute tools if any are detected
|
359
321
|
if tool_calls:
|
360
322
|
for tool_call in tool_calls:
|
361
323
|
tool_name = tool_call["tool"]
|
@@ -396,7 +358,7 @@ class Assistant(BaseModel):
|
|
396
358
|
except Exception as e:
|
397
359
|
logger.error(f"Failed to generate LLM response: {e}")
|
398
360
|
responses.append(f"An error occurred while generating the analysis: {e}")
|
399
|
-
if not tool_calls:
|
361
|
+
if not self.tools and not tool_calls:
|
400
362
|
# If no tools were executed, proceed with the original logic
|
401
363
|
# Retrieve relevant context using RAG
|
402
364
|
rag_context = self.rag.retrieve(message) if self.rag else None
|
@@ -435,12 +397,12 @@ class Assistant(BaseModel):
|
|
435
397
|
# Combine all responses into a single string
|
436
398
|
return "\n\n".join(responses)
|
437
399
|
|
438
|
-
def _generate_team_response(self, message: str, team: List['
|
400
|
+
def _generate_team_response(self, message: str, team: List['Agent'], markdown: bool = False, **kwargs) -> str:
|
439
401
|
"""Generate a response using a team of assistants."""
|
440
402
|
responses = []
|
441
|
-
for
|
442
|
-
response =
|
443
|
-
responses.append(f"**{
|
403
|
+
for agent in team:
|
404
|
+
response = agent.print_response(message, markdown=markdown, **kwargs)
|
405
|
+
responses.append(f"**{agent.name}:**\n\n{response}")
|
444
406
|
return "\n\n".join(responses)
|
445
407
|
|
446
408
|
def _build_prompt(self, message: str, context: Optional[List[Dict]]) -> str:
|
@@ -578,7 +540,7 @@ class Assistant(BaseModel):
|
|
578
540
|
exit_on: Optional[List[str]] = None,
|
579
541
|
**kwargs,
|
580
542
|
):
|
581
|
-
"""Run the
|
543
|
+
"""Run the agent in a CLI app."""
|
582
544
|
from rich.prompt import Prompt
|
583
545
|
|
584
546
|
if message:
|
@@ -593,15 +555,15 @@ class Assistant(BaseModel):
|
|
593
555
|
self.print_response(message=message, **kwargs)
|
594
556
|
|
595
557
|
def _generate_api(self):
|
596
|
-
"""Generate an API for the
|
558
|
+
"""Generate an API for the agent if api=True."""
|
597
559
|
from .api.api_generator import APIGenerator
|
598
560
|
self.api_generator = APIGenerator(self)
|
599
|
-
print(f"API generated for
|
561
|
+
print(f"API generated for agent '{self.name}'. Use `.run_api()` to start the API server.")
|
600
562
|
|
601
563
|
def run_api(self):
|
602
|
-
"""Run the API server for the
|
564
|
+
"""Run the API server for the agent."""
|
603
565
|
if not hasattr(self, 'api_generator'):
|
604
|
-
raise ValueError("API is not enabled for this
|
566
|
+
raise ValueError("API is not enabled for this agent. Set `api=True` when initializing the agent.")
|
605
567
|
|
606
568
|
# Get API configuration
|
607
569
|
host = self.api_config.get("host", "0.0.0.0") if self.api_config else "0.0.0.0"
|
semantio/cli/main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import argparse
|
2
2
|
import warnings
|
3
|
-
from
|
4
|
-
from
|
3
|
+
from semantio.agent import Agent
|
4
|
+
from semantio.llm import get_llm
|
5
5
|
from urllib3.exceptions import NotOpenSSLWarning
|
6
6
|
|
7
7
|
# Suppress the NotOpenSSLWarning
|
@@ -9,7 +9,7 @@ warnings.filterwarnings("ignore", category=NotOpenSSLWarning)
|
|
9
9
|
|
10
10
|
def main():
|
11
11
|
parser = argparse.ArgumentParser(description="opAi CLI")
|
12
|
-
parser.add_argument("--message", type=str, required=True, help="Message to send to the
|
12
|
+
parser.add_argument("--message", type=str, required=True, help="Message to send to the agent")
|
13
13
|
parser.add_argument("--provider", type=str, required=True, help="LLM provider (e.g., groq, openai)")
|
14
14
|
parser.add_argument("--api-key", type=str, required=True, help="API key for the LLM provider")
|
15
15
|
parser.add_argument("--model", type=str, default=None, help="Model name (e.g., mixtral-8x7b-32768)")
|
@@ -22,9 +22,9 @@ def main():
|
|
22
22
|
|
23
23
|
llm = get_llm(provider=args.provider, **llm_config)
|
24
24
|
|
25
|
-
# Create an
|
26
|
-
|
27
|
-
|
25
|
+
# Create an agent
|
26
|
+
agent = Agent(model=args.provider, llm=llm)
|
27
|
+
agent.print_response(args.message)
|
28
28
|
|
29
29
|
|
30
30
|
if __name__ == "__main__":
|
semantio/tools/web_browser.py
CHANGED
@@ -1,153 +1,271 @@
|
|
1
|
-
|
2
|
-
from
|
3
|
-
import
|
1
|
+
# web_browser.py
|
2
|
+
from typing import Dict, Any, List, Optional
|
3
|
+
from pydantic import Field, BaseModel
|
4
|
+
from selenium import webdriver
|
5
|
+
from selenium.webdriver.common.by import By
|
6
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
7
|
+
from selenium.webdriver.support import expected_conditions as EC
|
8
|
+
from selenium.webdriver.chrome.options import Options
|
9
|
+
from selenium.webdriver.chrome.service import Service
|
10
|
+
from webdriver_manager.chrome import ChromeDriverManager
|
11
|
+
from bs4 import BeautifulSoup
|
12
|
+
import json
|
13
|
+
import time
|
14
|
+
import re
|
4
15
|
import logging
|
16
|
+
from .base_tool import BaseTool
|
5
17
|
|
6
18
|
logger = logging.getLogger(__name__)
|
7
19
|
|
8
|
-
class
|
9
|
-
|
10
|
-
|
11
|
-
|
20
|
+
class BrowserPlan(BaseModel):
|
21
|
+
tasks: List[Dict[str, Any]] = Field(
|
22
|
+
...,
|
23
|
+
description="List of automation tasks to execute"
|
24
|
+
)
|
12
25
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
Start the browser and create a new context and page.
|
28
|
-
"""
|
29
|
-
self.playwright = await async_playwright().start()
|
30
|
-
self.browser = await self.playwright.chromium.launch(headless=self.headless)
|
31
|
-
self.context = await self.browser.new_context()
|
32
|
-
self.page = await self.context.new_page()
|
33
|
-
logger.info("Browser started successfully.")
|
34
|
-
|
35
|
-
async def close(self):
|
36
|
-
"""
|
37
|
-
Close the browser and cleanup resources.
|
38
|
-
"""
|
39
|
-
if self.browser:
|
40
|
-
await self.browser.close()
|
41
|
-
await self.playwright.stop()
|
42
|
-
logger.info("Browser closed successfully.")
|
43
|
-
|
44
|
-
async def navigate(self, url: str) -> str:
|
45
|
-
"""
|
46
|
-
Navigate to a specific URL.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
url (str): The URL to navigate to.
|
50
|
-
|
51
|
-
Returns:
|
52
|
-
str: The page title after navigation.
|
53
|
-
"""
|
54
|
-
if not self.page:
|
55
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
56
|
-
|
57
|
-
await self.page.goto(url)
|
58
|
-
title = await self.page.title()
|
59
|
-
logger.info(f"Navigated to {url}. Page title: {title}")
|
60
|
-
return title
|
61
|
-
|
62
|
-
async def fill_form(self, fields: Dict[str, str]) -> str:
|
63
|
-
"""
|
64
|
-
Fill a form with the provided fields.
|
65
|
-
|
66
|
-
Args:
|
67
|
-
fields (Dict[str, str]): A dictionary of field names and values to fill.
|
68
|
-
|
69
|
-
Returns:
|
70
|
-
str: A success message.
|
71
|
-
"""
|
72
|
-
if not self.page:
|
73
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
74
|
-
|
75
|
-
for field, value in fields.items():
|
76
|
-
await self.page.fill(f'input[name="{field}"]', value)
|
77
|
-
logger.info(f"Filled field '{field}' with value '{value}'.")
|
78
|
-
|
79
|
-
return "Form filled successfully."
|
80
|
-
|
81
|
-
async def click(self, selector: str) -> str:
|
82
|
-
"""
|
83
|
-
Click an element on the page.
|
84
|
-
|
85
|
-
Args:
|
86
|
-
selector (str): The CSS selector of the element to click.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
str: A success message.
|
90
|
-
"""
|
91
|
-
if not self.page:
|
92
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
26
|
+
class WebBrowserTool(BaseTool):
|
27
|
+
name: str = Field("WebBrowser", description="Name of the tool")
|
28
|
+
description: str = Field(
|
29
|
+
"Universal web automation tool for dynamic website interactions",
|
30
|
+
description="Tool description"
|
31
|
+
)
|
32
|
+
|
33
|
+
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
34
|
+
"""Execute dynamic web automation workflow"""
|
35
|
+
driver = None
|
36
|
+
try:
|
37
|
+
driver = self._init_browser(input.get("headless", False))
|
38
|
+
results = []
|
39
|
+
current_url = ""
|
93
40
|
|
94
|
-
|
95
|
-
|
96
|
-
|
41
|
+
# Generate initial plan
|
42
|
+
plan = self._generate_plan(input['query'], current_url)
|
43
|
+
|
44
|
+
for task in plan.tasks:
|
45
|
+
result = self._execute_safe_task(driver, task)
|
46
|
+
results.append(result)
|
47
|
+
|
48
|
+
if not result['success']:
|
49
|
+
break
|
50
|
+
|
51
|
+
# Update context for next tasks
|
52
|
+
current_url = driver.current_url
|
97
53
|
|
98
|
-
|
99
|
-
|
100
|
-
|
54
|
+
return {"status": "success", "results": results}
|
55
|
+
|
56
|
+
except Exception as e:
|
57
|
+
return {"status": "error", "message": str(e)}
|
58
|
+
finally:
|
59
|
+
if driver:
|
60
|
+
driver.quit()
|
101
61
|
|
102
|
-
|
103
|
-
|
62
|
+
def _init_browser(self, headless: bool) -> webdriver.Chrome:
|
63
|
+
"""Initialize browser with advanced options"""
|
64
|
+
options = Options()
|
65
|
+
options.add_argument("--start-maximized")
|
66
|
+
options.add_argument("--disable-blink-features=AutomationControlled")
|
67
|
+
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
68
|
+
|
69
|
+
if headless:
|
70
|
+
options.add_argument("--headless=new")
|
71
|
+
|
72
|
+
return webdriver.Chrome(
|
73
|
+
service=Service(ChromeDriverManager().install()),
|
74
|
+
options=options
|
75
|
+
)
|
104
76
|
|
105
|
-
|
106
|
-
|
77
|
+
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
78
|
+
"""Generate adaptive execution plan using LLM"""
|
79
|
+
prompt = f"""Generate browser automation plan for: {query}
|
80
|
+
|
81
|
+
Current URL: {current_url or 'No page loaded yet'}
|
82
|
+
|
83
|
+
Required JSON format:
|
84
|
+
{{
|
85
|
+
"tasks": [
|
86
|
+
{{
|
87
|
+
"action": "navigate|click|type|wait|scroll",
|
88
|
+
"selector": "CSS selector (optional)",
|
89
|
+
"value": "input text/URL/seconds",
|
90
|
+
"description": "action purpose"
|
91
|
+
}}
|
92
|
+
]
|
93
|
+
}}
|
94
|
+
|
95
|
+
Guidelines:
|
96
|
+
1. Prefer IDs in selectors (#element-id)
|
97
|
+
2. Use semantic attributes (aria-label, name)
|
98
|
+
3. Include wait steps after navigation
|
99
|
+
4. Prioritize visible elements
|
100
|
+
5. Add scroll steps for hidden elements
|
107
101
|
"""
|
108
|
-
|
109
|
-
|
102
|
+
|
103
|
+
response = self.llm.generate(prompt=prompt)
|
104
|
+
return self._parse_plan(response)
|
110
105
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
106
|
+
def _parse_plan(self, response: str) -> BrowserPlan:
|
107
|
+
"""Robust JSON parsing with multiple fallback strategies"""
|
108
|
+
try:
|
109
|
+
# Try extracting JSON from markdown code block
|
110
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
111
|
+
if json_match:
|
112
|
+
plan_data = json.loads(json_match.group(1).strip())
|
113
|
+
else:
|
114
|
+
# Fallback to extract first JSON object
|
115
|
+
json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
|
116
|
+
plan_data = json.loads(json_str)
|
117
|
+
|
118
|
+
# Validate tasks structure
|
119
|
+
validated_tasks = []
|
120
|
+
for task in plan_data.get("tasks", []):
|
121
|
+
if not all(key in task for key in ["action", "description"]):
|
122
|
+
continue
|
123
|
+
validated_tasks.append({
|
124
|
+
"action": task["action"],
|
125
|
+
"selector": task.get("selector", ""),
|
126
|
+
"value": task.get("value", ""),
|
127
|
+
"description": task["description"]
|
128
|
+
})
|
129
|
+
|
130
|
+
return BrowserPlan(tasks=validated_tasks)
|
131
|
+
|
132
|
+
except (json.JSONDecodeError, AttributeError) as e:
|
133
|
+
logger.error(f"Plan parsing failed: {e}")
|
134
|
+
return BrowserPlan(tasks=[])
|
117
135
|
|
118
|
-
|
136
|
+
def _execute_safe_task(self, driver, task: Dict) -> Dict[str, Any]:
|
137
|
+
"""Execute task with comprehensive error handling"""
|
138
|
+
try:
|
139
|
+
action = task["action"].lower()
|
140
|
+
selector = task.get("selector", "")
|
141
|
+
value = task.get("value", "")
|
142
|
+
|
143
|
+
if action == "navigate":
|
144
|
+
return self._handle_navigation(driver, value)
|
145
|
+
|
146
|
+
elif action == "click":
|
147
|
+
return self._handle_click(driver, selector)
|
148
|
+
|
149
|
+
elif action == "type":
|
150
|
+
return self._handle_typing(driver, selector, value)
|
151
|
+
|
152
|
+
elif action == "wait":
|
153
|
+
return self._handle_wait(value)
|
154
|
+
|
155
|
+
elif action == "scroll":
|
156
|
+
return self._handle_scroll(driver, selector)
|
157
|
+
|
158
|
+
return {
|
159
|
+
"action": action,
|
160
|
+
"success": False,
|
161
|
+
"message": f"Unsupported action: {action}"
|
162
|
+
}
|
163
|
+
|
164
|
+
except Exception as e:
|
165
|
+
return {
|
166
|
+
"action": action,
|
167
|
+
"success": False,
|
168
|
+
"message": f"Critical error: {str(e)}"
|
169
|
+
}
|
119
170
|
|
120
|
-
|
121
|
-
"""
|
122
|
-
|
171
|
+
def _handle_navigation(self, driver, url: str) -> Dict[str, Any]:
|
172
|
+
"""Smart navigation handler"""
|
173
|
+
if not url.startswith(("http://", "https://")):
|
174
|
+
url = f"https://{url}"
|
175
|
+
|
176
|
+
try:
|
177
|
+
driver.get(url)
|
178
|
+
WebDriverWait(driver, 15).until(
|
179
|
+
EC.presence_of_element_located((By.TAG_NAME, "body"))
|
180
|
+
)
|
181
|
+
return {
|
182
|
+
"action": "navigate",
|
183
|
+
"success": True,
|
184
|
+
"message": f"Navigated to {url}"
|
185
|
+
}
|
186
|
+
except Exception as e:
|
187
|
+
return {
|
188
|
+
"action": "navigate",
|
189
|
+
"success": False,
|
190
|
+
"message": f"Navigation failed: {str(e)}"
|
191
|
+
}
|
123
192
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
193
|
+
def _handle_click(self, driver, selector: str) -> Dict[str, Any]:
|
194
|
+
"""Dynamic click handler"""
|
195
|
+
try:
|
196
|
+
element = WebDriverWait(driver, 15).until(
|
197
|
+
EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
|
198
|
+
)
|
199
|
+
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
|
200
|
+
element.click()
|
201
|
+
return {
|
202
|
+
"action": "click",
|
203
|
+
"success": True,
|
204
|
+
"message": f"Clicked element: {selector}"
|
205
|
+
}
|
206
|
+
except Exception as e:
|
207
|
+
return {
|
208
|
+
"action": "click",
|
209
|
+
"success": False,
|
210
|
+
"message": f"Click failed: {str(e)}"
|
211
|
+
}
|
129
212
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
213
|
+
def _handle_typing(self, driver, selector: str, text: str) -> Dict[str, Any]:
|
214
|
+
"""Universal typing handler"""
|
215
|
+
try:
|
216
|
+
element = WebDriverWait(driver, 15).until(
|
217
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
218
|
+
)
|
219
|
+
element.clear()
|
220
|
+
element.send_keys(text)
|
221
|
+
return {
|
222
|
+
"action": "type",
|
223
|
+
"success": True,
|
224
|
+
"message": f"Typed '{text}' into {selector}"
|
225
|
+
}
|
226
|
+
except Exception as e:
|
227
|
+
return {
|
228
|
+
"action": "type",
|
229
|
+
"success": False,
|
230
|
+
"message": f"Typing failed: {str(e)}"
|
231
|
+
}
|
136
232
|
|
137
|
-
|
138
|
-
|
233
|
+
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
234
|
+
"""Configurable wait handler"""
|
235
|
+
try:
|
236
|
+
wait_time = float(seconds)
|
237
|
+
time.sleep(wait_time)
|
238
|
+
return {
|
239
|
+
"action": "wait",
|
240
|
+
"success": True,
|
241
|
+
"message": f"Waited {wait_time} seconds"
|
242
|
+
}
|
243
|
+
except ValueError:
|
244
|
+
return {
|
245
|
+
"action": "wait",
|
246
|
+
"success": False,
|
247
|
+
"message": "Invalid wait time"
|
248
|
+
}
|
139
249
|
|
250
|
+
def _handle_scroll(self, driver, selector: str) -> Dict[str, Any]:
|
251
|
+
"""Smart scroll handler"""
|
140
252
|
try:
|
141
|
-
if
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
return await self.click(details)
|
147
|
-
elif action == "scrape":
|
148
|
-
return str(await self.scrape(details))
|
253
|
+
if selector:
|
254
|
+
element = WebDriverWait(driver, 15).until(
|
255
|
+
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
256
|
+
)
|
257
|
+
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
|
149
258
|
else:
|
150
|
-
|
259
|
+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
260
|
+
|
261
|
+
return {
|
262
|
+
"action": "scroll",
|
263
|
+
"success": True,
|
264
|
+
"message": f"Scrolled to {selector or 'page bottom'}"
|
265
|
+
}
|
151
266
|
except Exception as e:
|
152
|
-
|
153
|
-
|
267
|
+
return {
|
268
|
+
"action": "scroll",
|
269
|
+
"success": False,
|
270
|
+
"message": f"Scroll failed: {str(e)}"
|
271
|
+
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: semantio
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.4
|
4
4
|
Summary: A powerful SDK for building AI agents
|
5
5
|
Home-page: https://github.com/Syenah/semantio
|
6
6
|
Author: Rakesh
|
@@ -33,6 +33,10 @@ Requires-Dist: sentence-transformers
|
|
33
33
|
Requires-Dist: fuzzywuzzy
|
34
34
|
Requires-Dist: duckduckgo-search
|
35
35
|
Requires-Dist: yfinance
|
36
|
+
Requires-Dist: selenium
|
37
|
+
Requires-Dist: beautifulsoup4
|
38
|
+
Requires-Dist: webdriver-manager
|
39
|
+
Requires-Dist: validators
|
36
40
|
|
37
41
|
# Semantio: The Mother of Your AI Agents
|
38
42
|
|
@@ -1,12 +1,12 @@
|
|
1
1
|
semantio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
semantio/agent.py,sha256=
|
2
|
+
semantio/agent.py,sha256=hKytSI5LqNnxqVvwI2hOINqPgrdhUXY9MS_90_crZPs,28584
|
3
3
|
semantio/memory.py,sha256=eNAwyAokppHzMcIyFgOw2hT2wnLQBd9GL4T5eallNV4,281
|
4
4
|
semantio/rag.py,sha256=ROy3Pa1NURcDs6qQZ8IMoa5Xlzt6I-msEq0C1p8UgB0,472
|
5
5
|
semantio/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
semantio/api/api_generator.py,sha256=Q-USITEpluRESEaQuOmF7m1vhLKYU9P8eGlQppKT9J4,829
|
7
7
|
semantio/api/fastapi_app.py,sha256=DyTgKJKikMe2G6wWmyzo1rBLXQFi8UWWUMY3UGH4f24,2128
|
8
8
|
semantio/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
semantio/cli/main.py,sha256=
|
9
|
+
semantio/cli/main.py,sha256=jUvSfehbHWALwracEgBopMIVMraSV9QmDUFfgGcxnP0,1091
|
10
10
|
semantio/knowledge_base/__init__.py,sha256=mvp0GFiGSjcxlkaDulAwKOCL9s6gsKTqhPKXF9N3n1g,172
|
11
11
|
semantio/knowledge_base/document_loader.py,sha256=nix0yZJ-JJoDbhLkpg5bKDMvNrwykmknI7MRIn0N81k,1910
|
12
12
|
semantio/knowledge_base/retriever.py,sha256=XpdzKS1UCncJImVMtG67VXMC7lp2eRzKnShjvktsFMM,1271
|
@@ -17,7 +17,6 @@ semantio/llm/base_llm.py,sha256=VFl_2S4kqYDuCTWIfWMbKU5aNbVqOCG33E4APOSHF90,668
|
|
17
17
|
semantio/llm/deepseek.py,sha256=oxX-Uw0_lY2sstYs5KGBGFB_hAZUbZomPADdib1mY2M,1100
|
18
18
|
semantio/llm/gemini.py,sha256=er3zv1jOvWQBGbPuv4fS4pR_c_abHyhroe-rkXupOO4,1959
|
19
19
|
semantio/llm/groq.py,sha256=1AH30paKzDIQjBjWPQPN44QwFHsIOVwI-a587-cDIVc,4285
|
20
|
-
semantio/llm/llama.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
20
|
semantio/llm/mistral.py,sha256=NpvaB1cE6-jMEBdT0mTf6Ca4Qq2LS8QivDKI6AgdRjE,1061
|
22
21
|
semantio/llm/openai.py,sha256=I3ab-d_zFxm-TDhYk6t1PzDtElPJEEQ2eSiARBNIGi4,5174
|
23
22
|
semantio/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -28,16 +27,16 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
|
|
28
27
|
semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
|
29
28
|
semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
|
30
29
|
semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
|
31
|
-
semantio/tools/web_browser.py,sha256=
|
30
|
+
semantio/tools/web_browser.py,sha256=wqr5pj2GybkK9IHDb8C1BipS8ujV2l36WlwA8ZbKd88,9711
|
32
31
|
semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
|
33
32
|
semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
|
34
33
|
semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
|
35
34
|
semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
|
36
35
|
semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
|
37
36
|
semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
|
38
|
-
semantio-0.0.
|
39
|
-
semantio-0.0.
|
40
|
-
semantio-0.0.
|
41
|
-
semantio-0.0.
|
42
|
-
semantio-0.0.
|
43
|
-
semantio-0.0.
|
37
|
+
semantio-0.0.4.dist-info/LICENSE,sha256=teQbWD2Zlcl1_Fo29o2tNbs6G26hbCQiUzds5fQGYlY,1063
|
38
|
+
semantio-0.0.4.dist-info/METADATA,sha256=youxODbkR3gNERG-mD7zbUbe5ix-0lUiWCHUI1_Y5IY,6913
|
39
|
+
semantio-0.0.4.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
40
|
+
semantio-0.0.4.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
|
41
|
+
semantio-0.0.4.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
|
42
|
+
semantio-0.0.4.dist-info/RECORD,,
|
semantio/llm/llama.py
DELETED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|