semantio 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- semantio/agent.py +102 -33
- semantio/cli/main.py +6 -6
- semantio/llm/__init__.py +12 -5
- semantio/llm/anthropic.py +41 -28
- semantio/llm/base_llm.py +10 -1
- semantio/llm/deepseek.py +27 -0
- semantio/llm/gemini.py +50 -0
- semantio/llm/groq.py +90 -8
- semantio/llm/mistral.py +27 -0
- semantio/llm/openai.py +125 -15
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/METADATA +7 -6
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/RECORD +16 -15
- semantio/llm/llama.py +0 -0
- semantio/tools/web_browser.py +0 -153
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/LICENSE +0 -0
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/WHEEL +0 -0
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/entry_points.txt +0 -0
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/top_level.txt +0 -0
semantio/agent.py
CHANGED
@@ -20,6 +20,7 @@ import os
|
|
20
20
|
# Configure logging
|
21
21
|
logging.basicConfig(level=logging.INFO)
|
22
22
|
logger = logging.getLogger(__name__)
|
23
|
+
|
23
24
|
class Agent(BaseModel):
|
24
25
|
# -*- Agent settings
|
25
26
|
name: Optional[str] = Field(None, description="Name of the agent.")
|
@@ -44,7 +45,10 @@ class Agent(BaseModel):
|
|
44
45
|
)
|
45
46
|
api_generator: Optional[Any] = Field(None, description="The API generator instance.")
|
46
47
|
expected_output: Optional[Union[str, Dict]] = Field(None, description="The expected format or structure of the output.")
|
47
|
-
semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
|
48
|
+
semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
|
49
|
+
team: Optional[List['Agent']] = Field(None, description="List of assistants in the team.")
|
50
|
+
auto_tool: bool = Field(False, description="Whether to automatically detect and call tools.")
|
51
|
+
|
48
52
|
# Allow arbitrary types
|
49
53
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
50
54
|
|
@@ -52,8 +56,9 @@ class Agent(BaseModel):
|
|
52
56
|
super().__init__(**kwargs)
|
53
57
|
# Initialize the model and tools here if needed
|
54
58
|
self._initialize_model()
|
55
|
-
# Automatically discover and register tools
|
56
|
-
self.tools
|
59
|
+
# Automatically discover and register tools if not provided
|
60
|
+
if self.tools is None:
|
61
|
+
self.tools = self._discover_tools()
|
57
62
|
# Pass the LLM instance to each tool
|
58
63
|
for tool in self.tools:
|
59
64
|
tool.llm = self.llm_instance
|
@@ -66,6 +71,41 @@ class Agent(BaseModel):
|
|
66
71
|
if self.api:
|
67
72
|
self._generate_api()
|
68
73
|
|
74
|
+
|
75
|
+
def _generate_response_from_image(self,message: str, image: Union[str, Image], markdown: bool = False, **kwargs) -> str:
|
76
|
+
"""
|
77
|
+
Send the image to the LLM for analysis if the LLM supports vision.
|
78
|
+
Supports both local images (PIL.Image) and image URLs.
|
79
|
+
"""
|
80
|
+
try:
|
81
|
+
# Check if the LLM supports vision
|
82
|
+
if not self.llm_instance or not self.llm_instance.supports_vision:
|
83
|
+
raise ValueError("Vision is not supported for the current model.")
|
84
|
+
prompt = self._build_prompt(message, context=None)
|
85
|
+
# Handle image URL
|
86
|
+
if isinstance(image, str) and image.startswith("http"):
|
87
|
+
# Directly pass the URL to the LLM
|
88
|
+
return self.llm_instance.generate_from_image_url(prompt,image, **kwargs)
|
89
|
+
|
90
|
+
# Handle local image (PIL.Image)
|
91
|
+
elif isinstance(image, Image):
|
92
|
+
# Convert the image to bytes
|
93
|
+
if image.mode == "RGBA":
|
94
|
+
image = image.convert("RGB") # Convert RGBA to RGB
|
95
|
+
image_bytes = io.BytesIO()
|
96
|
+
image.save(image_bytes, format="JPEG") # Save as PNG (or any supported format)
|
97
|
+
image_bytes = image_bytes.getvalue()
|
98
|
+
|
99
|
+
# Generate response using base64-encoded image bytes
|
100
|
+
return self.llm_instance.generate_from_image(prompt,image_bytes, **kwargs)
|
101
|
+
|
102
|
+
else:
|
103
|
+
raise ValueError("Unsupported image type. Provide either a URL or a PIL.Image.")
|
104
|
+
|
105
|
+
except Exception as e:
|
106
|
+
logger.error(f"Failed to generate response from image: {e}")
|
107
|
+
return f"An error occurred while processing the image: {e}"
|
108
|
+
|
69
109
|
def _discover_tools(self) -> List[BaseTool]:
|
70
110
|
"""
|
71
111
|
Automatically discover and register tools from the 'tools' directory.
|
@@ -85,7 +125,7 @@ class Agent(BaseModel):
|
|
85
125
|
try:
|
86
126
|
# Import the module
|
87
127
|
module_name = file.stem
|
88
|
-
module = importlib.import_module(f"
|
128
|
+
module = importlib.import_module(f"semantio.tools.{module_name}")
|
89
129
|
|
90
130
|
# Find all classes that inherit from BaseTool
|
91
131
|
for name, obj in module.__dict__.items():
|
@@ -122,12 +162,24 @@ class Agent(BaseModel):
|
|
122
162
|
},
|
123
163
|
"openai": {
|
124
164
|
"class": "OpenAILlm",
|
125
|
-
"default_model": "gpt-
|
165
|
+
"default_model": "gpt-4o",
|
126
166
|
},
|
127
167
|
"anthropic": {
|
128
168
|
"class": "AnthropicLlm",
|
129
169
|
"default_model": "claude-2.1",
|
130
170
|
},
|
171
|
+
"deepseek": {
|
172
|
+
"class": "DeepSeekLLM",
|
173
|
+
"default_model": "deepseek-chat",
|
174
|
+
},
|
175
|
+
"gemini": {
|
176
|
+
"class": "GeminiLLM",
|
177
|
+
"default_model": "gemini-1.5-flash",
|
178
|
+
},
|
179
|
+
"mistral": {
|
180
|
+
"class": "MistralLLM",
|
181
|
+
"default_model": "mistral-large-latest",
|
182
|
+
},
|
131
183
|
}
|
132
184
|
|
133
185
|
# Normalize the LLM provider name (case-insensitive)
|
@@ -145,7 +197,7 @@ class Agent(BaseModel):
|
|
145
197
|
model_to_use = self.llm_model or default_model
|
146
198
|
|
147
199
|
# Dynamically import and initialize the LLM class
|
148
|
-
module_name = f"
|
200
|
+
module_name = f"semantio.llm.{llm_provider}"
|
149
201
|
llm_module = importlib.import_module(module_name)
|
150
202
|
llm_class = getattr(llm_module, llm_class_name)
|
151
203
|
self.llm_instance = llm_class(model=model_to_use, api_key=api_key)
|
@@ -156,23 +208,16 @@ class Agent(BaseModel):
|
|
156
208
|
retriever = Retriever(vector_store)
|
157
209
|
return RAG(retriever)
|
158
210
|
|
159
|
-
def load_image_from_url(self, image_url: str) -> Image:
|
160
|
-
"""Load an image from a URL and return it as a PIL Image."""
|
161
|
-
response = requests.get(image_url)
|
162
|
-
image_bytes = response.content
|
163
|
-
return Image.open(io.BytesIO(image_bytes))
|
164
|
-
|
165
211
|
def print_response(
|
166
212
|
self,
|
167
213
|
message: Optional[Union[str, Image, List, Dict]] = None,
|
168
214
|
stream: bool = False,
|
169
215
|
markdown: bool = False,
|
216
|
+
tools: Optional[List[BaseTool]] = None,
|
217
|
+
team: Optional[List['Agent']] = None,
|
170
218
|
**kwargs,
|
171
219
|
) -> Union[str, Dict]: # Add return type hint
|
172
220
|
"""Print the agent's response to the console and return it."""
|
173
|
-
if isinstance(message, Image):
|
174
|
-
# Handle image input
|
175
|
-
message = self._process_image(message)
|
176
221
|
|
177
222
|
if stream:
|
178
223
|
# Handle streaming response
|
@@ -182,16 +227,11 @@ class Agent(BaseModel):
|
|
182
227
|
response += chunk
|
183
228
|
return response
|
184
229
|
else:
|
185
|
-
|
186
|
-
response = self._generate_response(message, markdown=markdown, **kwargs)
|
230
|
+
# Generate and return the response
|
231
|
+
response = self._generate_response(message, markdown=markdown, tools=tools, team=team, **kwargs)
|
187
232
|
print(response) # Print the response to the console
|
188
233
|
return response
|
189
234
|
|
190
|
-
def _process_image(self, image: Image) -> str:
|
191
|
-
"""Process the image and return a string representation."""
|
192
|
-
# Convert the image to text or extract relevant information
|
193
|
-
# For now, we'll just return a placeholder string
|
194
|
-
return "Image processed. Extracted text: [Placeholder]"
|
195
235
|
|
196
236
|
def _stream_response(self, message: str, markdown: bool = False, **kwargs) -> Iterator[str]:
|
197
237
|
"""Stream the agent's response."""
|
@@ -284,20 +324,43 @@ class Agent(BaseModel):
|
|
284
324
|
return []
|
285
325
|
|
286
326
|
|
287
|
-
def _generate_response(self, message: str, markdown: bool = False, **kwargs) -> str:
|
327
|
+
def _generate_response(self, message: str, markdown: bool = False, tools: Optional[List[BaseTool]] = None, team: Optional[List['Agent']] = None, **kwargs) -> str:
|
288
328
|
"""Generate the agent's response, including tool execution and context retrieval."""
|
289
|
-
# Use the
|
290
|
-
|
291
|
-
|
329
|
+
# Use the specified tools or team if provided
|
330
|
+
if tools is not None:
|
331
|
+
self.tools = tools
|
332
|
+
if team is not None:
|
333
|
+
return self._generate_team_response(message, team, markdown=markdown, **kwargs)
|
334
|
+
|
335
|
+
# Initialize tool_outputs as an empty dictionary
|
336
|
+
tool_outputs = {}
|
292
337
|
responses = []
|
293
|
-
tool_outputs = {} # Store outputs of all tools for collaboration
|
294
338
|
|
295
|
-
#
|
339
|
+
# Use the LLM to analyze the query and dynamically select tools when auto_tool is enabled
|
340
|
+
if self.auto_tool:
|
341
|
+
tool_calls = self._analyze_query_and_select_tools(message)
|
342
|
+
else:
|
343
|
+
# Check if tools are provided
|
344
|
+
if self.tools:
|
345
|
+
tool_calls = [
|
346
|
+
{
|
347
|
+
"tool": tool.__class__.__name__,
|
348
|
+
"input": {
|
349
|
+
"query": message, # Use the message as the query
|
350
|
+
"context": None, # No context provided by default
|
351
|
+
}
|
352
|
+
}
|
353
|
+
for tool in self.tools
|
354
|
+
]
|
355
|
+
else:
|
356
|
+
tool_calls = kwargs.get("tool_calls", [])
|
357
|
+
|
358
|
+
# Execute tools if any are detected
|
296
359
|
if tool_calls:
|
297
360
|
for tool_call in tool_calls:
|
298
361
|
tool_name = tool_call["tool"]
|
299
362
|
tool_input = tool_call["input"]
|
300
|
-
|
363
|
+
|
301
364
|
# Find the tool
|
302
365
|
tool = next((t for t in self.tools if t.name.lower() == tool_name.lower()), None)
|
303
366
|
if tool:
|
@@ -333,9 +396,8 @@ class Agent(BaseModel):
|
|
333
396
|
except Exception as e:
|
334
397
|
logger.error(f"Failed to generate LLM response: {e}")
|
335
398
|
responses.append(f"An error occurred while generating the analysis: {e}")
|
336
|
-
|
337
|
-
# If no tools were executed, proceed with the original logic
|
338
399
|
if not tool_calls:
|
400
|
+
# If no tools were executed, proceed with the original logic
|
339
401
|
# Retrieve relevant context using RAG
|
340
402
|
rag_context = self.rag.retrieve(message) if self.rag else None
|
341
403
|
# Retrieve relevant context from the knowledge base (API result)
|
@@ -370,8 +432,15 @@ class Agent(BaseModel):
|
|
370
432
|
if markdown:
|
371
433
|
return f"**Response:**\n\n{response}"
|
372
434
|
return response
|
373
|
-
|
374
|
-
|
435
|
+
# Combine all responses into a single string
|
436
|
+
return "\n\n".join(responses)
|
437
|
+
|
438
|
+
def _generate_team_response(self, message: str, team: List['Agent'], markdown: bool = False, **kwargs) -> str:
|
439
|
+
"""Generate a response using a team of assistants."""
|
440
|
+
responses = []
|
441
|
+
for agent in team:
|
442
|
+
response = agent.print_response(message, markdown=markdown, **kwargs)
|
443
|
+
responses.append(f"**{agent.name}:**\n\n{response}")
|
375
444
|
return "\n\n".join(responses)
|
376
445
|
|
377
446
|
def _build_prompt(self, message: str, context: Optional[List[Dict]]) -> str:
|
semantio/cli/main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import argparse
|
2
2
|
import warnings
|
3
|
-
from
|
4
|
-
from
|
3
|
+
from semantio.agent import Agent
|
4
|
+
from semantio.llm import get_llm
|
5
5
|
from urllib3.exceptions import NotOpenSSLWarning
|
6
6
|
|
7
7
|
# Suppress the NotOpenSSLWarning
|
@@ -9,7 +9,7 @@ warnings.filterwarnings("ignore", category=NotOpenSSLWarning)
|
|
9
9
|
|
10
10
|
def main():
|
11
11
|
parser = argparse.ArgumentParser(description="opAi CLI")
|
12
|
-
parser.add_argument("--message", type=str, required=True, help="Message to send to the
|
12
|
+
parser.add_argument("--message", type=str, required=True, help="Message to send to the agent")
|
13
13
|
parser.add_argument("--provider", type=str, required=True, help="LLM provider (e.g., groq, openai)")
|
14
14
|
parser.add_argument("--api-key", type=str, required=True, help="API key for the LLM provider")
|
15
15
|
parser.add_argument("--model", type=str, default=None, help="Model name (e.g., mixtral-8x7b-32768)")
|
@@ -22,9 +22,9 @@ def main():
|
|
22
22
|
|
23
23
|
llm = get_llm(provider=args.provider, **llm_config)
|
24
24
|
|
25
|
-
# Create an
|
26
|
-
|
27
|
-
|
25
|
+
# Create an agent
|
26
|
+
agent = Agent(model=args.provider, llm=llm)
|
27
|
+
agent.print_response(args.message)
|
28
28
|
|
29
29
|
|
30
30
|
if __name__ == "__main__":
|
semantio/llm/__init__.py
CHANGED
@@ -1,17 +1,24 @@
|
|
1
1
|
from .openai import OpenAILlm
|
2
|
-
from .anthropic import
|
3
|
-
# from .llama import LlamaLlm
|
2
|
+
from .anthropic import AnthropicLLM
|
4
3
|
from .groq import GroqLlm
|
4
|
+
from .mistral import MistralLLM
|
5
|
+
from .deepseek import DeepSeekLLM
|
6
|
+
from .gemini import GeminiLLM
|
5
7
|
|
6
8
|
def get_llm(provider: str, **kwargs):
|
7
9
|
provider = provider.lower() # Convert provider name to lowercase
|
8
10
|
if provider == "openai":
|
9
11
|
return OpenAILlm(**kwargs)
|
10
12
|
elif provider == "anthropic":
|
11
|
-
return
|
12
|
-
# elif provider == "llama":
|
13
|
-
# return LlamaLlm(**kwargs)
|
13
|
+
return AnthropicLLM(**kwargs)
|
14
14
|
elif provider == "groq":
|
15
15
|
return GroqLlm(**kwargs)
|
16
|
+
elif provider == "mistral":
|
17
|
+
return MistralLLM(**kwargs)
|
18
|
+
elif provider == "deepseek":
|
19
|
+
return DeepSeekLLM(**kwargs)
|
20
|
+
elif provider == "gemini":
|
21
|
+
return GeminiLLM(**kwargs)
|
22
|
+
|
16
23
|
else:
|
17
24
|
raise ValueError(f"Unsupported LLM provider: {provider}")
|
semantio/llm/anthropic.py
CHANGED
@@ -1,39 +1,52 @@
|
|
1
|
+
import os
|
1
2
|
from typing import List, Dict, Optional
|
2
3
|
from .base_llm import BaseLLM
|
3
4
|
import anthropic
|
4
|
-
import os
|
5
5
|
|
6
|
-
class
|
7
|
-
def __init__(
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
class AnthropicLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "claude-3-5-sonnet-20241022", api_key: Optional[str] = None):
|
8
|
+
"""
|
9
|
+
Initialize the Anthropic LLM class.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
model (str): The name of the model (e.g., claude-3-5-sonnet-20241022).
|
13
|
+
api_key (Optional[str]): The Anthropic API key. If not provided, it fetches from the environment.
|
14
|
+
"""
|
12
15
|
self.model = model
|
13
16
|
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
|
14
17
|
if not self.api_key:
|
15
18
|
raise ValueError("Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass it explicitly.")
|
16
|
-
self.client = anthropic.
|
19
|
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
20
|
+
|
21
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
22
|
+
"""
|
23
|
+
Generate text using Anthropic's Claude model.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
prompt (str): The user prompt.
|
27
|
+
context (Optional[List[Dict]]): Context to include in the conversation.
|
28
|
+
memory (Optional[List[Dict]]): Memory from previous interactions.
|
17
29
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
messages.append({"role": "system", "content": "Context: " + str(context)})
|
30
|
-
messages.append({"role": "user", "content": prompt})
|
30
|
+
Returns:
|
31
|
+
str: The generated response from the model.
|
32
|
+
"""
|
33
|
+
try:
|
34
|
+
# Prepare messages for the Anthropic API
|
35
|
+
messages = []
|
36
|
+
if memory:
|
37
|
+
messages.extend(memory)
|
38
|
+
if context:
|
39
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
40
|
+
messages.append({"role": "user", "content": prompt})
|
31
41
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
42
|
+
# Call the Anthropic API
|
43
|
+
response = self.client.messages.create(
|
44
|
+
model=self.model,
|
45
|
+
max_tokens=1024,
|
46
|
+
messages=messages,
|
47
|
+
)
|
37
48
|
|
38
|
-
|
39
|
-
|
49
|
+
# Extract and return the response
|
50
|
+
return response.content
|
51
|
+
except Exception as e:
|
52
|
+
raise ValueError(f"Error while generating response with Anthropic Claude: {e}")
|
semantio/llm/base_llm.py
CHANGED
@@ -9,4 +9,13 @@ class BaseLLM(ABC):
|
|
9
9
|
context: Optional[List[Dict]] = None,
|
10
10
|
memory: Optional[List[Dict]] = None,
|
11
11
|
) -> str:
|
12
|
-
pass
|
12
|
+
pass
|
13
|
+
|
14
|
+
@property
|
15
|
+
def supports_vision(self) -> bool:
|
16
|
+
"""Return True if the LLM supports vision tasks."""
|
17
|
+
return False
|
18
|
+
|
19
|
+
def generate_from_image(self, image_bytes: bytes, **kwargs) -> str:
|
20
|
+
"""Process an image if vision is supported. Default implementation raises an error."""
|
21
|
+
raise NotImplementedError("This LLM does not support vision tasks.")
|
semantio/llm/deepseek.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import List, Dict, Optional
|
2
|
+
from .base_llm import BaseLLM
|
3
|
+
from openai import OpenAI
|
4
|
+
import os
|
5
|
+
|
6
|
+
class DeepSeekLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "deepseek-chat", api_key: Optional[str] = None):
|
8
|
+
self.model = model
|
9
|
+
self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY")
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("DeepSeek API key is required. Set DEEPSEEK_API_KEY environment variable or pass it explicitly.")
|
12
|
+
self.client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
|
13
|
+
|
14
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
15
|
+
messages = []
|
16
|
+
if memory:
|
17
|
+
messages.extend(memory)
|
18
|
+
if context:
|
19
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
20
|
+
messages.append({"role": "user", "content": prompt})
|
21
|
+
|
22
|
+
response = self.client.chat.completions.create(
|
23
|
+
model=self.model,
|
24
|
+
messages=messages,
|
25
|
+
)
|
26
|
+
|
27
|
+
return response.choices[0].message.content
|
semantio/llm/gemini.py
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Dict, Optional
|
3
|
+
from .base_llm import BaseLLM
|
4
|
+
from google import genai
|
5
|
+
|
6
|
+
class GeminiLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "gemini-1.5-flash", api_key: Optional[str] = None):
|
8
|
+
"""
|
9
|
+
Initialize the Gemini LLM class.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
model (str): The name of the Gemini model (e.g., 'gemini-1.5-flash').
|
13
|
+
api_key (Optional[str]): The Gemini API key. If not provided, it fetches from the environment.
|
14
|
+
"""
|
15
|
+
self.model = model
|
16
|
+
self.api_key = api_key or os.getenv("GEMINI_API_KEY")
|
17
|
+
if not self.api_key:
|
18
|
+
raise ValueError("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it explicitly.")
|
19
|
+
|
20
|
+
# Initialize the client using the API key
|
21
|
+
self.client = genai.Client(api_key=self.api_key)
|
22
|
+
|
23
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
24
|
+
"""
|
25
|
+
Generate text using Google's Gemini model.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
prompt (str): The user prompt.
|
29
|
+
context (Optional[List[Dict]]): Context to include in the conversation.
|
30
|
+
memory (Optional[List[Dict]]): Memory from previous interactions.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
str: The generated response from the model.
|
34
|
+
"""
|
35
|
+
try:
|
36
|
+
# Prepare the chat history (optional context and memory)
|
37
|
+
history = memory if memory else []
|
38
|
+
if context:
|
39
|
+
history.append({"role": "system", "content": str(context)})
|
40
|
+
|
41
|
+
# Generate the content using the specified Gemini model
|
42
|
+
response = self.client.models.generate_content(
|
43
|
+
model=self.model,
|
44
|
+
contents=prompt
|
45
|
+
)
|
46
|
+
|
47
|
+
# Return the response text
|
48
|
+
return response.text
|
49
|
+
except Exception as e:
|
50
|
+
raise ValueError(f"Error while generating response with Gemini: {e}")
|
semantio/llm/groq.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
import base64
|
2
|
+
from typing import Optional, List, Dict
|
2
3
|
from .base_llm import BaseLLM
|
3
4
|
import groq
|
4
5
|
import os
|
@@ -15,12 +16,22 @@ class GroqLlm(BaseLLM):
|
|
15
16
|
raise ValueError("Groq API key is required. Set GROQ_API_KEY environment variable or pass it explicitly.")
|
16
17
|
self.client = groq.Client(api_key=self.api_key)
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
@property
|
20
|
+
def supports_vision(self) -> bool:
|
21
|
+
"""
|
22
|
+
Check if the model supports vision tasks.
|
23
|
+
"""
|
24
|
+
# List of Groq models that support vision
|
25
|
+
vision_models = [
|
26
|
+
"llama-3.2-11b-vision-preview",
|
27
|
+
"llama-3.2-90b-vision-preview"
|
28
|
+
]
|
29
|
+
return self.model in vision_models
|
30
|
+
|
31
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
32
|
+
"""
|
33
|
+
Generate a response to a text-based prompt.
|
34
|
+
"""
|
24
35
|
# Prepare messages for the Groq API
|
25
36
|
messages = []
|
26
37
|
if memory:
|
@@ -36,4 +47,75 @@ class GroqLlm(BaseLLM):
|
|
36
47
|
)
|
37
48
|
|
38
49
|
# Extract and return the response
|
39
|
-
return response.choices[0].message.content
|
50
|
+
return response.choices[0].message.content
|
51
|
+
|
52
|
+
def generate_from_image(self, prompt: str, image_bytes: bytes, **kwargs) -> str:
|
53
|
+
"""
|
54
|
+
Process an image and generate a response if the model supports vision.
|
55
|
+
"""
|
56
|
+
if not self.supports_vision:
|
57
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
58
|
+
|
59
|
+
try:
|
60
|
+
# Convert the image bytes to base64
|
61
|
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
62
|
+
|
63
|
+
# Construct the message payload
|
64
|
+
messages = [
|
65
|
+
{
|
66
|
+
"role": "user",
|
67
|
+
"content": [
|
68
|
+
{"type": "text", "text": prompt},
|
69
|
+
{
|
70
|
+
"type": "image_url",
|
71
|
+
"image_url": {
|
72
|
+
"url": f"data:image/jpeg;base64,{image_base64}",
|
73
|
+
},
|
74
|
+
},
|
75
|
+
],
|
76
|
+
}
|
77
|
+
]
|
78
|
+
|
79
|
+
# Call the Groq API with the base64-encoded image
|
80
|
+
response = self.client.chat.completions.create(
|
81
|
+
model=self.model,
|
82
|
+
messages=messages,
|
83
|
+
**kwargs,
|
84
|
+
)
|
85
|
+
|
86
|
+
# Extract and return the response text
|
87
|
+
return response.choices[0].message.content
|
88
|
+
except Exception as e:
|
89
|
+
raise ValueError(f"Error while processing image with Groq vision model: {e}")
|
90
|
+
|
91
|
+
|
92
|
+
def generate_from_image_url(self, prompt: str, image_url: str, **kwargs) -> str:
|
93
|
+
"""
|
94
|
+
Process an image URL and generate a response if the model supports vision.
|
95
|
+
"""
|
96
|
+
if not self.supports_vision:
|
97
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
98
|
+
|
99
|
+
try:
|
100
|
+
# Call the Groq API with the image URL
|
101
|
+
response = self.client.chat.completions.create(
|
102
|
+
model=self.model,
|
103
|
+
messages=[
|
104
|
+
{
|
105
|
+
"role": "user",
|
106
|
+
"content": [
|
107
|
+
{"type": "text", "text": prompt},
|
108
|
+
{
|
109
|
+
"type": "image_url",
|
110
|
+
"image_url": {
|
111
|
+
"url": image_url,
|
112
|
+
},
|
113
|
+
},
|
114
|
+
],
|
115
|
+
}
|
116
|
+
],
|
117
|
+
**kwargs,
|
118
|
+
)
|
119
|
+
return response.choices[0].message.content
|
120
|
+
except Exception as e:
|
121
|
+
raise ValueError(f"Error while processing image URL with Groq vision model: {e}")
|
semantio/llm/mistral.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import List, Dict, Optional
|
2
|
+
from .base_llm import BaseLLM
|
3
|
+
from mistralai import Mistral
|
4
|
+
import os
|
5
|
+
|
6
|
+
class MistralLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "mistral-large-latest", api_key: Optional[str] = None):
|
8
|
+
self.model = model
|
9
|
+
self.api_key = api_key or os.getenv("MISTRAL_API_KEY")
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("Mistral API key is required. Set MISTRAL_API_KEY environment variable or pass it explicitly.")
|
12
|
+
self.client = Mistral(api_key=self.api_key)
|
13
|
+
|
14
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
15
|
+
messages = []
|
16
|
+
if memory:
|
17
|
+
messages.extend(memory)
|
18
|
+
if context:
|
19
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
20
|
+
messages.append({"role": "user", "content": prompt})
|
21
|
+
|
22
|
+
response = self.client.chat.complete(
|
23
|
+
model=self.model,
|
24
|
+
messages=messages,
|
25
|
+
)
|
26
|
+
|
27
|
+
return response.choices[0].message.content
|
semantio/llm/openai.py
CHANGED
@@ -1,26 +1,136 @@
|
|
1
1
|
from typing import List, Dict, Optional
|
2
2
|
from .base_llm import BaseLLM
|
3
|
-
import
|
3
|
+
from openai import OpenAI
|
4
4
|
import os
|
5
|
+
import base64
|
5
6
|
|
6
7
|
class OpenAILlm(BaseLLM):
|
7
|
-
def __init__(self, model: str = "gpt-
|
8
|
+
def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
|
9
|
+
"""
|
10
|
+
Initialize the OpenAI LLM class.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
model (str): The name of the model (e.g., gpt-4o, gpt-4-vision).
|
14
|
+
api_key (Optional[str]): The OpenAI API key. If not provided, it fetches from the environment.
|
15
|
+
"""
|
8
16
|
self.model = model
|
9
17
|
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
10
18
|
if not self.api_key:
|
11
19
|
raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it explicitly.")
|
12
|
-
|
20
|
+
self.client = OpenAI(api_key=self.api_key)
|
13
21
|
|
14
22
|
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
"""
|
24
|
+
Generate text using OpenAI's ChatCompletion API.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
prompt (str): The user prompt.
|
28
|
+
context (Optional[List[Dict]]): Context to include in the conversation.
|
29
|
+
memory (Optional[List[Dict]]): Memory from previous interactions.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
str: The generated response from the model.
|
33
|
+
"""
|
34
|
+
try:
|
35
|
+
# Prepare messages for the OpenAI API
|
36
|
+
messages = []
|
37
|
+
if memory:
|
38
|
+
messages.extend(memory)
|
39
|
+
if context:
|
40
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
41
|
+
messages.append({"role": "user", "content": prompt})
|
42
|
+
|
43
|
+
# Call the ChatCompletion endpoint
|
44
|
+
response = self.client.chat.completions.create(
|
45
|
+
model=self.model,
|
46
|
+
messages=messages,
|
47
|
+
)
|
48
|
+
|
49
|
+
# Extract and return the response:
|
50
|
+
return response.choices[0].message.content
|
51
|
+
except Exception as e:
|
52
|
+
raise ValueError(f"Error while generating response with OpenAI: {e}")
|
53
|
+
|
54
|
+
@property
|
55
|
+
def supports_vision(self) -> bool:
|
56
|
+
"""
|
57
|
+
Check if the model supports vision tasks.
|
58
|
+
"""
|
59
|
+
# List of GPT models that support vision
|
60
|
+
vision_models =[
|
61
|
+
"gpt-4o", "gpt-4o mini", "o1", "o1 mini"
|
62
|
+
]
|
63
|
+
return self.model in vision_models
|
64
|
+
|
65
|
+
def generate_from_image_url(self, prompt: str, image_url: str, **kwargs) -> str:
|
66
|
+
"""
|
67
|
+
Process an image URL with OpenAI's vision-capable models, using instructions as the prompt.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
image_url (str): The URL of the image.
|
71
|
+
instructions (str): Instructions provided as the prompt for image analysis.
|
72
|
+
kwargs: Additional parameters for the OpenAI API.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
str: The response generated by the vision-capable model.
|
76
|
+
"""
|
77
|
+
if not self.supports_vision:
|
78
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
79
|
+
|
80
|
+
try:
|
81
|
+
# Use instructions as the prompt in the API call
|
82
|
+
response = self.client.chat.completions.create(
|
83
|
+
model=self.model,
|
84
|
+
messages=[
|
85
|
+
{
|
86
|
+
"role": "user",
|
87
|
+
"content": [
|
88
|
+
{"type": "text", "text": prompt}, # Using instructions as the prompt
|
89
|
+
{"type": "image_url", "image_url": {"url": image_url,},},
|
90
|
+
],
|
91
|
+
}
|
92
|
+
],
|
93
|
+
**kwargs,
|
94
|
+
)
|
95
|
+
return response.choices[0].message.content
|
96
|
+
except Exception as e:
|
97
|
+
raise ValueError(f"Error while processing image URL with OpenAI Vision model: {e}")
|
98
|
+
|
99
|
+
def generate_from_image(self, prompt: str, image_bytes: bytes, **kwargs) -> str:
|
100
|
+
"""
|
101
|
+
Process an image and generate a response if the model supports vision.
|
102
|
+
"""
|
103
|
+
if not self.supports_vision:
|
104
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
105
|
+
|
106
|
+
try:
|
107
|
+
# Convert the image bytes to base64
|
108
|
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
109
|
+
|
110
|
+
# Construct the message payload
|
111
|
+
messages = [
|
112
|
+
{
|
113
|
+
"role": "user",
|
114
|
+
"content": [
|
115
|
+
{"type": "text", "text": prompt},
|
116
|
+
{
|
117
|
+
"type": "image_url",
|
118
|
+
"image_url": {
|
119
|
+
"url": f"data:image/jpeg;base64,{image_base64}",
|
120
|
+
},
|
121
|
+
},
|
122
|
+
],
|
123
|
+
}
|
124
|
+
]
|
125
|
+
|
126
|
+
# Call the Groq API with the base64-encoded image
|
127
|
+
response = self.client.chat.completions.create(
|
128
|
+
model=self.model,
|
129
|
+
messages=messages,
|
130
|
+
**kwargs,
|
131
|
+
)
|
132
|
+
|
133
|
+
# Extract and return the response text
|
134
|
+
return response.choices[0].message.content
|
135
|
+
except Exception as e:
|
136
|
+
raise ValueError(f"Error while processing image with OpenAI vision model: {e}")
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: semantio
|
3
|
-
Version: 0.0.
|
4
|
-
Summary: A powerful SDK for building AI agents
|
3
|
+
Version: 0.0.3
|
4
|
+
Summary: A powerful SDK for building AI agents
|
5
5
|
Home-page: https://github.com/Syenah/semantio
|
6
6
|
Author: Rakesh
|
7
7
|
Author-email: rakeshsahoo689@gmail.com
|
@@ -19,7 +19,8 @@ License-File: LICENSE
|
|
19
19
|
Requires-Dist: openai
|
20
20
|
Requires-Dist: anthropic
|
21
21
|
Requires-Dist: groq
|
22
|
-
Requires-Dist:
|
22
|
+
Requires-Dist: google-genai
|
23
|
+
Requires-Dist: mistralai
|
23
24
|
Requires-Dist: faiss-cpu
|
24
25
|
Requires-Dist: pydantic
|
25
26
|
Requires-Dist: requests
|
@@ -32,8 +33,6 @@ Requires-Dist: sentence-transformers
|
|
32
33
|
Requires-Dist: fuzzywuzzy
|
33
34
|
Requires-Dist: duckduckgo-search
|
34
35
|
Requires-Dist: yfinance
|
35
|
-
Requires-Dist: forex-python
|
36
|
-
Requires-Dist: qrcode
|
37
36
|
|
38
37
|
# Semantio: The Mother of Your AI Agents
|
39
38
|
|
@@ -112,7 +111,9 @@ Semantio/
|
|
112
111
|
│ │ ├── __init__.py
|
113
112
|
│ │ ├── openai.py # OpenAI integration
|
114
113
|
│ │ ├── anthropic.py # Anthropic (Claude) integration
|
115
|
-
│ │ ├──
|
114
|
+
│ │ ├── deepseek.py # Deepseek integration
|
115
|
+
│ │ ├── gemini.py # Gemini integration
|
116
|
+
│ │ ├── mistral.py # Mistral integration
|
116
117
|
│ │ └── base_llm.py # Base class for LLMs
|
117
118
|
│ ├── knowledge_base/ # Knowledge base integration
|
118
119
|
│ │ ├── __init__.py
|
@@ -1,22 +1,24 @@
|
|
1
1
|
semantio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
semantio/agent.py,sha256=
|
2
|
+
semantio/agent.py,sha256=plQ4D76cnJ1FaGlEuKDeA53aW_hMDvt5sbmUuTHqvFQ,30143
|
3
3
|
semantio/memory.py,sha256=eNAwyAokppHzMcIyFgOw2hT2wnLQBd9GL4T5eallNV4,281
|
4
4
|
semantio/rag.py,sha256=ROy3Pa1NURcDs6qQZ8IMoa5Xlzt6I-msEq0C1p8UgB0,472
|
5
5
|
semantio/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
semantio/api/api_generator.py,sha256=Q-USITEpluRESEaQuOmF7m1vhLKYU9P8eGlQppKT9J4,829
|
7
7
|
semantio/api/fastapi_app.py,sha256=DyTgKJKikMe2G6wWmyzo1rBLXQFi8UWWUMY3UGH4f24,2128
|
8
8
|
semantio/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
semantio/cli/main.py,sha256=
|
9
|
+
semantio/cli/main.py,sha256=jUvSfehbHWALwracEgBopMIVMraSV9QmDUFfgGcxnP0,1091
|
10
10
|
semantio/knowledge_base/__init__.py,sha256=mvp0GFiGSjcxlkaDulAwKOCL9s6gsKTqhPKXF9N3n1g,172
|
11
11
|
semantio/knowledge_base/document_loader.py,sha256=nix0yZJ-JJoDbhLkpg5bKDMvNrwykmknI7MRIn0N81k,1910
|
12
12
|
semantio/knowledge_base/retriever.py,sha256=XpdzKS1UCncJImVMtG67VXMC7lp2eRzKnShjvktsFMM,1271
|
13
13
|
semantio/knowledge_base/vector_store.py,sha256=4Zv9kfqDD3cfn_4R8ZoLKdAQCZRYo_IENP_KkLB_RPc,987
|
14
|
-
semantio/llm/__init__.py,sha256
|
15
|
-
semantio/llm/anthropic.py,sha256
|
16
|
-
semantio/llm/base_llm.py,sha256=
|
17
|
-
semantio/llm/
|
18
|
-
semantio/llm/
|
19
|
-
semantio/llm/
|
14
|
+
semantio/llm/__init__.py,sha256=-4uKcqo9fBrEbvfxGE01XVHL9qEG2vKXfy5hlnUsRbw,779
|
15
|
+
semantio/llm/anthropic.py,sha256=-JTso9vr88T3JSipxE60uZjqDgfla1QFoSEBpXW2pXw,2054
|
16
|
+
semantio/llm/base_llm.py,sha256=VFl_2S4kqYDuCTWIfWMbKU5aNbVqOCG33E4APOSHF90,668
|
17
|
+
semantio/llm/deepseek.py,sha256=oxX-Uw0_lY2sstYs5KGBGFB_hAZUbZomPADdib1mY2M,1100
|
18
|
+
semantio/llm/gemini.py,sha256=er3zv1jOvWQBGbPuv4fS4pR_c_abHyhroe-rkXupOO4,1959
|
19
|
+
semantio/llm/groq.py,sha256=1AH30paKzDIQjBjWPQPN44QwFHsIOVwI-a587-cDIVc,4285
|
20
|
+
semantio/llm/mistral.py,sha256=NpvaB1cE6-jMEBdT0mTf6Ca4Qq2LS8QivDKI6AgdRjE,1061
|
21
|
+
semantio/llm/openai.py,sha256=I3ab-d_zFxm-TDhYk6t1PzDtElPJEEQ2eSiARBNIGi4,5174
|
20
22
|
semantio/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
23
|
semantio/storage/cloud_storage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
24
|
semantio/storage/local_storage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -25,16 +27,15 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
|
|
25
27
|
semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
|
26
28
|
semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
|
27
29
|
semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
|
28
|
-
semantio/tools/web_browser.py,sha256=LMwPFTHNTtqCp8MEHVlJJUSJa91vM7MZWIL5RDQKF4U,4980
|
29
30
|
semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
|
30
31
|
semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
|
31
32
|
semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
|
32
33
|
semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
|
33
34
|
semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
|
34
35
|
semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
|
35
|
-
semantio-0.0.
|
36
|
-
semantio-0.0.
|
37
|
-
semantio-0.0.
|
38
|
-
semantio-0.0.
|
39
|
-
semantio-0.0.
|
40
|
-
semantio-0.0.
|
36
|
+
semantio-0.0.3.dist-info/LICENSE,sha256=teQbWD2Zlcl1_Fo29o2tNbs6G26hbCQiUzds5fQGYlY,1063
|
37
|
+
semantio-0.0.3.dist-info/METADATA,sha256=M5Q-waTknpyWrD_HV9G76jMKgPHPrBBwM5Hl8we4ulo,6800
|
38
|
+
semantio-0.0.3.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
39
|
+
semantio-0.0.3.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
|
40
|
+
semantio-0.0.3.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
|
41
|
+
semantio-0.0.3.dist-info/RECORD,,
|
semantio/llm/llama.py
DELETED
File without changes
|
semantio/tools/web_browser.py
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
from typing import Dict, Any, Optional, List
|
2
|
-
from playwright.async_api import async_playwright
|
3
|
-
import asyncio
|
4
|
-
import logging
|
5
|
-
|
6
|
-
logger = logging.getLogger(__name__)
|
7
|
-
|
8
|
-
class WebBrowserTool:
|
9
|
-
"""
|
10
|
-
A tool for performing browser automation tasks using Playwright.
|
11
|
-
"""
|
12
|
-
|
13
|
-
def __init__(self, headless: bool = True):
|
14
|
-
"""
|
15
|
-
Initialize the WebBrowserTool.
|
16
|
-
|
17
|
-
Args:
|
18
|
-
headless (bool): Whether to run the browser in headless mode (default: True).
|
19
|
-
"""
|
20
|
-
self.headless = headless
|
21
|
-
self.browser = None
|
22
|
-
self.context = None
|
23
|
-
self.page = None
|
24
|
-
|
25
|
-
async def start(self):
|
26
|
-
"""
|
27
|
-
Start the browser and create a new context and page.
|
28
|
-
"""
|
29
|
-
self.playwright = await async_playwright().start()
|
30
|
-
self.browser = await self.playwright.chromium.launch(headless=self.headless)
|
31
|
-
self.context = await self.browser.new_context()
|
32
|
-
self.page = await self.context.new_page()
|
33
|
-
logger.info("Browser started successfully.")
|
34
|
-
|
35
|
-
async def close(self):
|
36
|
-
"""
|
37
|
-
Close the browser and cleanup resources.
|
38
|
-
"""
|
39
|
-
if self.browser:
|
40
|
-
await self.browser.close()
|
41
|
-
await self.playwright.stop()
|
42
|
-
logger.info("Browser closed successfully.")
|
43
|
-
|
44
|
-
async def navigate(self, url: str) -> str:
|
45
|
-
"""
|
46
|
-
Navigate to a specific URL.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
url (str): The URL to navigate to.
|
50
|
-
|
51
|
-
Returns:
|
52
|
-
str: The page title after navigation.
|
53
|
-
"""
|
54
|
-
if not self.page:
|
55
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
56
|
-
|
57
|
-
await self.page.goto(url)
|
58
|
-
title = await self.page.title()
|
59
|
-
logger.info(f"Navigated to {url}. Page title: {title}")
|
60
|
-
return title
|
61
|
-
|
62
|
-
async def fill_form(self, fields: Dict[str, str]) -> str:
|
63
|
-
"""
|
64
|
-
Fill a form with the provided fields.
|
65
|
-
|
66
|
-
Args:
|
67
|
-
fields (Dict[str, str]): A dictionary of field names and values to fill.
|
68
|
-
|
69
|
-
Returns:
|
70
|
-
str: A success message.
|
71
|
-
"""
|
72
|
-
if not self.page:
|
73
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
74
|
-
|
75
|
-
for field, value in fields.items():
|
76
|
-
await self.page.fill(f'input[name="{field}"]', value)
|
77
|
-
logger.info(f"Filled field '{field}' with value '{value}'.")
|
78
|
-
|
79
|
-
return "Form filled successfully."
|
80
|
-
|
81
|
-
async def click(self, selector: str) -> str:
|
82
|
-
"""
|
83
|
-
Click an element on the page.
|
84
|
-
|
85
|
-
Args:
|
86
|
-
selector (str): The CSS selector of the element to click.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
str: A success message.
|
90
|
-
"""
|
91
|
-
if not self.page:
|
92
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
93
|
-
|
94
|
-
await self.page.click(selector)
|
95
|
-
logger.info(f"Clicked element with selector '{selector}'.")
|
96
|
-
return f"Clicked element: {selector}"
|
97
|
-
|
98
|
-
async def scrape(self, selector: str) -> List[Dict[str, str]]:
|
99
|
-
"""
|
100
|
-
Scrape data from the page.
|
101
|
-
|
102
|
-
Args:
|
103
|
-
selector (str): The CSS selector of the elements to scrape.
|
104
|
-
|
105
|
-
Returns:
|
106
|
-
List[Dict[str, str]]: A list of dictionaries containing the scraped data.
|
107
|
-
"""
|
108
|
-
if not self.page:
|
109
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
110
|
-
|
111
|
-
elements = await self.page.query_selector_all(selector)
|
112
|
-
scraped_data = []
|
113
|
-
for element in elements:
|
114
|
-
text = await element.inner_text()
|
115
|
-
scraped_data.append({"text": text.strip()})
|
116
|
-
logger.info(f"Scraped text: {text.strip()}")
|
117
|
-
|
118
|
-
return scraped_data
|
119
|
-
|
120
|
-
async def execute_step(self, step: Dict[str, Any]) -> str:
|
121
|
-
"""
|
122
|
-
Execute a browser automation step.
|
123
|
-
|
124
|
-
Args:
|
125
|
-
step (Dict[str, Any]): A dictionary containing the step details.
|
126
|
-
- "action": The action to perform (e.g., "navigate", "fill_form", "click", "scrape").
|
127
|
-
- "details": The details required for the action (e.g., URL, form fields, selector).
|
128
|
-
- "website": The website to perform the action on (optional).
|
129
|
-
|
130
|
-
Returns:
|
131
|
-
str: The result of the step execution.
|
132
|
-
"""
|
133
|
-
action = step.get("action")
|
134
|
-
details = step.get("details")
|
135
|
-
website = step.get("website", "https://www.google.com")
|
136
|
-
|
137
|
-
if not self.page:
|
138
|
-
await self.start()
|
139
|
-
|
140
|
-
try:
|
141
|
-
if action == "navigate":
|
142
|
-
return await self.navigate(details)
|
143
|
-
elif action == "fill_form":
|
144
|
-
return await self.fill_form(details)
|
145
|
-
elif action == "click":
|
146
|
-
return await self.click(details)
|
147
|
-
elif action == "scrape":
|
148
|
-
return str(await self.scrape(details))
|
149
|
-
else:
|
150
|
-
return f"Unknown action: {action}"
|
151
|
-
except Exception as e:
|
152
|
-
logger.error(f"Error executing step: {e}")
|
153
|
-
return f"Error executing step: {e}"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|