semantio 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantio/agent.py +102 -33
- semantio/cli/main.py +6 -6
- semantio/llm/__init__.py +12 -5
- semantio/llm/anthropic.py +41 -28
- semantio/llm/base_llm.py +10 -1
- semantio/llm/deepseek.py +27 -0
- semantio/llm/gemini.py +50 -0
- semantio/llm/groq.py +90 -8
- semantio/llm/mistral.py +27 -0
- semantio/llm/openai.py +125 -15
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/METADATA +7 -6
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/RECORD +16 -15
- semantio/llm/llama.py +0 -0
- semantio/tools/web_browser.py +0 -153
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/LICENSE +0 -0
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/WHEEL +0 -0
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/entry_points.txt +0 -0
- {semantio-0.0.1.dist-info → semantio-0.0.3.dist-info}/top_level.txt +0 -0
semantio/agent.py
CHANGED
@@ -20,6 +20,7 @@ import os
|
|
20
20
|
# Configure logging
|
21
21
|
logging.basicConfig(level=logging.INFO)
|
22
22
|
logger = logging.getLogger(__name__)
|
23
|
+
|
23
24
|
class Agent(BaseModel):
|
24
25
|
# -*- Agent settings
|
25
26
|
name: Optional[str] = Field(None, description="Name of the agent.")
|
@@ -44,7 +45,10 @@ class Agent(BaseModel):
|
|
44
45
|
)
|
45
46
|
api_generator: Optional[Any] = Field(None, description="The API generator instance.")
|
46
47
|
expected_output: Optional[Union[str, Dict]] = Field(None, description="The expected format or structure of the output.")
|
47
|
-
semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
|
48
|
+
semantic_model: Optional[Any] = Field(None, description="SentenceTransformer model for semantic matching.")
|
49
|
+
team: Optional[List['Agent']] = Field(None, description="List of assistants in the team.")
|
50
|
+
auto_tool: bool = Field(False, description="Whether to automatically detect and call tools.")
|
51
|
+
|
48
52
|
# Allow arbitrary types
|
49
53
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
50
54
|
|
@@ -52,8 +56,9 @@ class Agent(BaseModel):
|
|
52
56
|
super().__init__(**kwargs)
|
53
57
|
# Initialize the model and tools here if needed
|
54
58
|
self._initialize_model()
|
55
|
-
# Automatically discover and register tools
|
56
|
-
self.tools
|
59
|
+
# Automatically discover and register tools if not provided
|
60
|
+
if self.tools is None:
|
61
|
+
self.tools = self._discover_tools()
|
57
62
|
# Pass the LLM instance to each tool
|
58
63
|
for tool in self.tools:
|
59
64
|
tool.llm = self.llm_instance
|
@@ -66,6 +71,41 @@ class Agent(BaseModel):
|
|
66
71
|
if self.api:
|
67
72
|
self._generate_api()
|
68
73
|
|
74
|
+
|
75
|
+
def _generate_response_from_image(self,message: str, image: Union[str, Image], markdown: bool = False, **kwargs) -> str:
|
76
|
+
"""
|
77
|
+
Send the image to the LLM for analysis if the LLM supports vision.
|
78
|
+
Supports both local images (PIL.Image) and image URLs.
|
79
|
+
"""
|
80
|
+
try:
|
81
|
+
# Check if the LLM supports vision
|
82
|
+
if not self.llm_instance or not self.llm_instance.supports_vision:
|
83
|
+
raise ValueError("Vision is not supported for the current model.")
|
84
|
+
prompt = self._build_prompt(message, context=None)
|
85
|
+
# Handle image URL
|
86
|
+
if isinstance(image, str) and image.startswith("http"):
|
87
|
+
# Directly pass the URL to the LLM
|
88
|
+
return self.llm_instance.generate_from_image_url(prompt,image, **kwargs)
|
89
|
+
|
90
|
+
# Handle local image (PIL.Image)
|
91
|
+
elif isinstance(image, Image):
|
92
|
+
# Convert the image to bytes
|
93
|
+
if image.mode == "RGBA":
|
94
|
+
image = image.convert("RGB") # Convert RGBA to RGB
|
95
|
+
image_bytes = io.BytesIO()
|
96
|
+
image.save(image_bytes, format="JPEG") # Save as PNG (or any supported format)
|
97
|
+
image_bytes = image_bytes.getvalue()
|
98
|
+
|
99
|
+
# Generate response using base64-encoded image bytes
|
100
|
+
return self.llm_instance.generate_from_image(prompt,image_bytes, **kwargs)
|
101
|
+
|
102
|
+
else:
|
103
|
+
raise ValueError("Unsupported image type. Provide either a URL or a PIL.Image.")
|
104
|
+
|
105
|
+
except Exception as e:
|
106
|
+
logger.error(f"Failed to generate response from image: {e}")
|
107
|
+
return f"An error occurred while processing the image: {e}"
|
108
|
+
|
69
109
|
def _discover_tools(self) -> List[BaseTool]:
|
70
110
|
"""
|
71
111
|
Automatically discover and register tools from the 'tools' directory.
|
@@ -85,7 +125,7 @@ class Agent(BaseModel):
|
|
85
125
|
try:
|
86
126
|
# Import the module
|
87
127
|
module_name = file.stem
|
88
|
-
module = importlib.import_module(f"
|
128
|
+
module = importlib.import_module(f"semantio.tools.{module_name}")
|
89
129
|
|
90
130
|
# Find all classes that inherit from BaseTool
|
91
131
|
for name, obj in module.__dict__.items():
|
@@ -122,12 +162,24 @@ class Agent(BaseModel):
|
|
122
162
|
},
|
123
163
|
"openai": {
|
124
164
|
"class": "OpenAILlm",
|
125
|
-
"default_model": "gpt-
|
165
|
+
"default_model": "gpt-4o",
|
126
166
|
},
|
127
167
|
"anthropic": {
|
128
168
|
"class": "AnthropicLlm",
|
129
169
|
"default_model": "claude-2.1",
|
130
170
|
},
|
171
|
+
"deepseek": {
|
172
|
+
"class": "DeepSeekLLM",
|
173
|
+
"default_model": "deepseek-chat",
|
174
|
+
},
|
175
|
+
"gemini": {
|
176
|
+
"class": "GeminiLLM",
|
177
|
+
"default_model": "gemini-1.5-flash",
|
178
|
+
},
|
179
|
+
"mistral": {
|
180
|
+
"class": "MistralLLM",
|
181
|
+
"default_model": "mistral-large-latest",
|
182
|
+
},
|
131
183
|
}
|
132
184
|
|
133
185
|
# Normalize the LLM provider name (case-insensitive)
|
@@ -145,7 +197,7 @@ class Agent(BaseModel):
|
|
145
197
|
model_to_use = self.llm_model or default_model
|
146
198
|
|
147
199
|
# Dynamically import and initialize the LLM class
|
148
|
-
module_name = f"
|
200
|
+
module_name = f"semantio.llm.{llm_provider}"
|
149
201
|
llm_module = importlib.import_module(module_name)
|
150
202
|
llm_class = getattr(llm_module, llm_class_name)
|
151
203
|
self.llm_instance = llm_class(model=model_to_use, api_key=api_key)
|
@@ -156,23 +208,16 @@ class Agent(BaseModel):
|
|
156
208
|
retriever = Retriever(vector_store)
|
157
209
|
return RAG(retriever)
|
158
210
|
|
159
|
-
def load_image_from_url(self, image_url: str) -> Image:
|
160
|
-
"""Load an image from a URL and return it as a PIL Image."""
|
161
|
-
response = requests.get(image_url)
|
162
|
-
image_bytes = response.content
|
163
|
-
return Image.open(io.BytesIO(image_bytes))
|
164
|
-
|
165
211
|
def print_response(
|
166
212
|
self,
|
167
213
|
message: Optional[Union[str, Image, List, Dict]] = None,
|
168
214
|
stream: bool = False,
|
169
215
|
markdown: bool = False,
|
216
|
+
tools: Optional[List[BaseTool]] = None,
|
217
|
+
team: Optional[List['Agent']] = None,
|
170
218
|
**kwargs,
|
171
219
|
) -> Union[str, Dict]: # Add return type hint
|
172
220
|
"""Print the agent's response to the console and return it."""
|
173
|
-
if isinstance(message, Image):
|
174
|
-
# Handle image input
|
175
|
-
message = self._process_image(message)
|
176
221
|
|
177
222
|
if stream:
|
178
223
|
# Handle streaming response
|
@@ -182,16 +227,11 @@ class Agent(BaseModel):
|
|
182
227
|
response += chunk
|
183
228
|
return response
|
184
229
|
else:
|
185
|
-
|
186
|
-
response = self._generate_response(message, markdown=markdown, **kwargs)
|
230
|
+
# Generate and return the response
|
231
|
+
response = self._generate_response(message, markdown=markdown, tools=tools, team=team, **kwargs)
|
187
232
|
print(response) # Print the response to the console
|
188
233
|
return response
|
189
234
|
|
190
|
-
def _process_image(self, image: Image) -> str:
|
191
|
-
"""Process the image and return a string representation."""
|
192
|
-
# Convert the image to text or extract relevant information
|
193
|
-
# For now, we'll just return a placeholder string
|
194
|
-
return "Image processed. Extracted text: [Placeholder]"
|
195
235
|
|
196
236
|
def _stream_response(self, message: str, markdown: bool = False, **kwargs) -> Iterator[str]:
|
197
237
|
"""Stream the agent's response."""
|
@@ -284,20 +324,43 @@ class Agent(BaseModel):
|
|
284
324
|
return []
|
285
325
|
|
286
326
|
|
287
|
-
def _generate_response(self, message: str, markdown: bool = False, **kwargs) -> str:
|
327
|
+
def _generate_response(self, message: str, markdown: bool = False, tools: Optional[List[BaseTool]] = None, team: Optional[List['Agent']] = None, **kwargs) -> str:
|
288
328
|
"""Generate the agent's response, including tool execution and context retrieval."""
|
289
|
-
# Use the
|
290
|
-
|
291
|
-
|
329
|
+
# Use the specified tools or team if provided
|
330
|
+
if tools is not None:
|
331
|
+
self.tools = tools
|
332
|
+
if team is not None:
|
333
|
+
return self._generate_team_response(message, team, markdown=markdown, **kwargs)
|
334
|
+
|
335
|
+
# Initialize tool_outputs as an empty dictionary
|
336
|
+
tool_outputs = {}
|
292
337
|
responses = []
|
293
|
-
tool_outputs = {} # Store outputs of all tools for collaboration
|
294
338
|
|
295
|
-
#
|
339
|
+
# Use the LLM to analyze the query and dynamically select tools when auto_tool is enabled
|
340
|
+
if self.auto_tool:
|
341
|
+
tool_calls = self._analyze_query_and_select_tools(message)
|
342
|
+
else:
|
343
|
+
# Check if tools are provided
|
344
|
+
if self.tools:
|
345
|
+
tool_calls = [
|
346
|
+
{
|
347
|
+
"tool": tool.__class__.__name__,
|
348
|
+
"input": {
|
349
|
+
"query": message, # Use the message as the query
|
350
|
+
"context": None, # No context provided by default
|
351
|
+
}
|
352
|
+
}
|
353
|
+
for tool in self.tools
|
354
|
+
]
|
355
|
+
else:
|
356
|
+
tool_calls = kwargs.get("tool_calls", [])
|
357
|
+
|
358
|
+
# Execute tools if any are detected
|
296
359
|
if tool_calls:
|
297
360
|
for tool_call in tool_calls:
|
298
361
|
tool_name = tool_call["tool"]
|
299
362
|
tool_input = tool_call["input"]
|
300
|
-
|
363
|
+
|
301
364
|
# Find the tool
|
302
365
|
tool = next((t for t in self.tools if t.name.lower() == tool_name.lower()), None)
|
303
366
|
if tool:
|
@@ -333,9 +396,8 @@ class Agent(BaseModel):
|
|
333
396
|
except Exception as e:
|
334
397
|
logger.error(f"Failed to generate LLM response: {e}")
|
335
398
|
responses.append(f"An error occurred while generating the analysis: {e}")
|
336
|
-
|
337
|
-
# If no tools were executed, proceed with the original logic
|
338
399
|
if not tool_calls:
|
400
|
+
# If no tools were executed, proceed with the original logic
|
339
401
|
# Retrieve relevant context using RAG
|
340
402
|
rag_context = self.rag.retrieve(message) if self.rag else None
|
341
403
|
# Retrieve relevant context from the knowledge base (API result)
|
@@ -370,8 +432,15 @@ class Agent(BaseModel):
|
|
370
432
|
if markdown:
|
371
433
|
return f"**Response:**\n\n{response}"
|
372
434
|
return response
|
373
|
-
|
374
|
-
|
435
|
+
# Combine all responses into a single string
|
436
|
+
return "\n\n".join(responses)
|
437
|
+
|
438
|
+
def _generate_team_response(self, message: str, team: List['Agent'], markdown: bool = False, **kwargs) -> str:
|
439
|
+
"""Generate a response using a team of assistants."""
|
440
|
+
responses = []
|
441
|
+
for agent in team:
|
442
|
+
response = agent.print_response(message, markdown=markdown, **kwargs)
|
443
|
+
responses.append(f"**{agent.name}:**\n\n{response}")
|
375
444
|
return "\n\n".join(responses)
|
376
445
|
|
377
446
|
def _build_prompt(self, message: str, context: Optional[List[Dict]]) -> str:
|
semantio/cli/main.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import argparse
|
2
2
|
import warnings
|
3
|
-
from
|
4
|
-
from
|
3
|
+
from semantio.agent import Agent
|
4
|
+
from semantio.llm import get_llm
|
5
5
|
from urllib3.exceptions import NotOpenSSLWarning
|
6
6
|
|
7
7
|
# Suppress the NotOpenSSLWarning
|
@@ -9,7 +9,7 @@ warnings.filterwarnings("ignore", category=NotOpenSSLWarning)
|
|
9
9
|
|
10
10
|
def main():
|
11
11
|
parser = argparse.ArgumentParser(description="opAi CLI")
|
12
|
-
parser.add_argument("--message", type=str, required=True, help="Message to send to the
|
12
|
+
parser.add_argument("--message", type=str, required=True, help="Message to send to the agent")
|
13
13
|
parser.add_argument("--provider", type=str, required=True, help="LLM provider (e.g., groq, openai)")
|
14
14
|
parser.add_argument("--api-key", type=str, required=True, help="API key for the LLM provider")
|
15
15
|
parser.add_argument("--model", type=str, default=None, help="Model name (e.g., mixtral-8x7b-32768)")
|
@@ -22,9 +22,9 @@ def main():
|
|
22
22
|
|
23
23
|
llm = get_llm(provider=args.provider, **llm_config)
|
24
24
|
|
25
|
-
# Create an
|
26
|
-
|
27
|
-
|
25
|
+
# Create an agent
|
26
|
+
agent = Agent(model=args.provider, llm=llm)
|
27
|
+
agent.print_response(args.message)
|
28
28
|
|
29
29
|
|
30
30
|
if __name__ == "__main__":
|
semantio/llm/__init__.py
CHANGED
@@ -1,17 +1,24 @@
|
|
1
1
|
from .openai import OpenAILlm
|
2
|
-
from .anthropic import
|
3
|
-
# from .llama import LlamaLlm
|
2
|
+
from .anthropic import AnthropicLLM
|
4
3
|
from .groq import GroqLlm
|
4
|
+
from .mistral import MistralLLM
|
5
|
+
from .deepseek import DeepSeekLLM
|
6
|
+
from .gemini import GeminiLLM
|
5
7
|
|
6
8
|
def get_llm(provider: str, **kwargs):
|
7
9
|
provider = provider.lower() # Convert provider name to lowercase
|
8
10
|
if provider == "openai":
|
9
11
|
return OpenAILlm(**kwargs)
|
10
12
|
elif provider == "anthropic":
|
11
|
-
return
|
12
|
-
# elif provider == "llama":
|
13
|
-
# return LlamaLlm(**kwargs)
|
13
|
+
return AnthropicLLM(**kwargs)
|
14
14
|
elif provider == "groq":
|
15
15
|
return GroqLlm(**kwargs)
|
16
|
+
elif provider == "mistral":
|
17
|
+
return MistralLLM(**kwargs)
|
18
|
+
elif provider == "deepseek":
|
19
|
+
return DeepSeekLLM(**kwargs)
|
20
|
+
elif provider == "gemini":
|
21
|
+
return GeminiLLM(**kwargs)
|
22
|
+
|
16
23
|
else:
|
17
24
|
raise ValueError(f"Unsupported LLM provider: {provider}")
|
semantio/llm/anthropic.py
CHANGED
@@ -1,39 +1,52 @@
|
|
1
|
+
import os
|
1
2
|
from typing import List, Dict, Optional
|
2
3
|
from .base_llm import BaseLLM
|
3
4
|
import anthropic
|
4
|
-
import os
|
5
5
|
|
6
|
-
class
|
7
|
-
def __init__(
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
6
|
+
class AnthropicLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "claude-3-5-sonnet-20241022", api_key: Optional[str] = None):
|
8
|
+
"""
|
9
|
+
Initialize the Anthropic LLM class.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
model (str): The name of the model (e.g., claude-3-5-sonnet-20241022).
|
13
|
+
api_key (Optional[str]): The Anthropic API key. If not provided, it fetches from the environment.
|
14
|
+
"""
|
12
15
|
self.model = model
|
13
16
|
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
|
14
17
|
if not self.api_key:
|
15
18
|
raise ValueError("Anthropic API key is required. Set ANTHROPIC_API_KEY environment variable or pass it explicitly.")
|
16
|
-
self.client = anthropic.
|
19
|
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
20
|
+
|
21
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
22
|
+
"""
|
23
|
+
Generate text using Anthropic's Claude model.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
prompt (str): The user prompt.
|
27
|
+
context (Optional[List[Dict]]): Context to include in the conversation.
|
28
|
+
memory (Optional[List[Dict]]): Memory from previous interactions.
|
17
29
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
messages.append({"role": "system", "content": "Context: " + str(context)})
|
30
|
-
messages.append({"role": "user", "content": prompt})
|
30
|
+
Returns:
|
31
|
+
str: The generated response from the model.
|
32
|
+
"""
|
33
|
+
try:
|
34
|
+
# Prepare messages for the Anthropic API
|
35
|
+
messages = []
|
36
|
+
if memory:
|
37
|
+
messages.extend(memory)
|
38
|
+
if context:
|
39
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
40
|
+
messages.append({"role": "user", "content": prompt})
|
31
41
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
42
|
+
# Call the Anthropic API
|
43
|
+
response = self.client.messages.create(
|
44
|
+
model=self.model,
|
45
|
+
max_tokens=1024,
|
46
|
+
messages=messages,
|
47
|
+
)
|
37
48
|
|
38
|
-
|
39
|
-
|
49
|
+
# Extract and return the response
|
50
|
+
return response.content
|
51
|
+
except Exception as e:
|
52
|
+
raise ValueError(f"Error while generating response with Anthropic Claude: {e}")
|
semantio/llm/base_llm.py
CHANGED
@@ -9,4 +9,13 @@ class BaseLLM(ABC):
|
|
9
9
|
context: Optional[List[Dict]] = None,
|
10
10
|
memory: Optional[List[Dict]] = None,
|
11
11
|
) -> str:
|
12
|
-
pass
|
12
|
+
pass
|
13
|
+
|
14
|
+
@property
|
15
|
+
def supports_vision(self) -> bool:
|
16
|
+
"""Return True if the LLM supports vision tasks."""
|
17
|
+
return False
|
18
|
+
|
19
|
+
def generate_from_image(self, image_bytes: bytes, **kwargs) -> str:
|
20
|
+
"""Process an image if vision is supported. Default implementation raises an error."""
|
21
|
+
raise NotImplementedError("This LLM does not support vision tasks.")
|
semantio/llm/deepseek.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import List, Dict, Optional
|
2
|
+
from .base_llm import BaseLLM
|
3
|
+
from openai import OpenAI
|
4
|
+
import os
|
5
|
+
|
6
|
+
class DeepSeekLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "deepseek-chat", api_key: Optional[str] = None):
|
8
|
+
self.model = model
|
9
|
+
self.api_key = api_key or os.getenv("DEEPSEEK_API_KEY")
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("DeepSeek API key is required. Set DEEPSEEK_API_KEY environment variable or pass it explicitly.")
|
12
|
+
self.client = OpenAI(api_key=self.api_key, base_url="https://api.deepseek.com")
|
13
|
+
|
14
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
15
|
+
messages = []
|
16
|
+
if memory:
|
17
|
+
messages.extend(memory)
|
18
|
+
if context:
|
19
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
20
|
+
messages.append({"role": "user", "content": prompt})
|
21
|
+
|
22
|
+
response = self.client.chat.completions.create(
|
23
|
+
model=self.model,
|
24
|
+
messages=messages,
|
25
|
+
)
|
26
|
+
|
27
|
+
return response.choices[0].message.content
|
semantio/llm/gemini.py
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
import os
|
2
|
+
from typing import List, Dict, Optional
|
3
|
+
from .base_llm import BaseLLM
|
4
|
+
from google import genai
|
5
|
+
|
6
|
+
class GeminiLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "gemini-1.5-flash", api_key: Optional[str] = None):
|
8
|
+
"""
|
9
|
+
Initialize the Gemini LLM class.
|
10
|
+
|
11
|
+
Args:
|
12
|
+
model (str): The name of the Gemini model (e.g., 'gemini-1.5-flash').
|
13
|
+
api_key (Optional[str]): The Gemini API key. If not provided, it fetches from the environment.
|
14
|
+
"""
|
15
|
+
self.model = model
|
16
|
+
self.api_key = api_key or os.getenv("GEMINI_API_KEY")
|
17
|
+
if not self.api_key:
|
18
|
+
raise ValueError("Gemini API key is required. Set GEMINI_API_KEY environment variable or pass it explicitly.")
|
19
|
+
|
20
|
+
# Initialize the client using the API key
|
21
|
+
self.client = genai.Client(api_key=self.api_key)
|
22
|
+
|
23
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
24
|
+
"""
|
25
|
+
Generate text using Google's Gemini model.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
prompt (str): The user prompt.
|
29
|
+
context (Optional[List[Dict]]): Context to include in the conversation.
|
30
|
+
memory (Optional[List[Dict]]): Memory from previous interactions.
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
str: The generated response from the model.
|
34
|
+
"""
|
35
|
+
try:
|
36
|
+
# Prepare the chat history (optional context and memory)
|
37
|
+
history = memory if memory else []
|
38
|
+
if context:
|
39
|
+
history.append({"role": "system", "content": str(context)})
|
40
|
+
|
41
|
+
# Generate the content using the specified Gemini model
|
42
|
+
response = self.client.models.generate_content(
|
43
|
+
model=self.model,
|
44
|
+
contents=prompt
|
45
|
+
)
|
46
|
+
|
47
|
+
# Return the response text
|
48
|
+
return response.text
|
49
|
+
except Exception as e:
|
50
|
+
raise ValueError(f"Error while generating response with Gemini: {e}")
|
semantio/llm/groq.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
1
|
+
import base64
|
2
|
+
from typing import Optional, List, Dict
|
2
3
|
from .base_llm import BaseLLM
|
3
4
|
import groq
|
4
5
|
import os
|
@@ -15,12 +16,22 @@ class GroqLlm(BaseLLM):
|
|
15
16
|
raise ValueError("Groq API key is required. Set GROQ_API_KEY environment variable or pass it explicitly.")
|
16
17
|
self.client = groq.Client(api_key=self.api_key)
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
@property
|
20
|
+
def supports_vision(self) -> bool:
|
21
|
+
"""
|
22
|
+
Check if the model supports vision tasks.
|
23
|
+
"""
|
24
|
+
# List of Groq models that support vision
|
25
|
+
vision_models = [
|
26
|
+
"llama-3.2-11b-vision-preview",
|
27
|
+
"llama-3.2-90b-vision-preview"
|
28
|
+
]
|
29
|
+
return self.model in vision_models
|
30
|
+
|
31
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
32
|
+
"""
|
33
|
+
Generate a response to a text-based prompt.
|
34
|
+
"""
|
24
35
|
# Prepare messages for the Groq API
|
25
36
|
messages = []
|
26
37
|
if memory:
|
@@ -36,4 +47,75 @@ class GroqLlm(BaseLLM):
|
|
36
47
|
)
|
37
48
|
|
38
49
|
# Extract and return the response
|
39
|
-
return response.choices[0].message.content
|
50
|
+
return response.choices[0].message.content
|
51
|
+
|
52
|
+
def generate_from_image(self, prompt: str, image_bytes: bytes, **kwargs) -> str:
|
53
|
+
"""
|
54
|
+
Process an image and generate a response if the model supports vision.
|
55
|
+
"""
|
56
|
+
if not self.supports_vision:
|
57
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
58
|
+
|
59
|
+
try:
|
60
|
+
# Convert the image bytes to base64
|
61
|
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
62
|
+
|
63
|
+
# Construct the message payload
|
64
|
+
messages = [
|
65
|
+
{
|
66
|
+
"role": "user",
|
67
|
+
"content": [
|
68
|
+
{"type": "text", "text": prompt},
|
69
|
+
{
|
70
|
+
"type": "image_url",
|
71
|
+
"image_url": {
|
72
|
+
"url": f"data:image/jpeg;base64,{image_base64}",
|
73
|
+
},
|
74
|
+
},
|
75
|
+
],
|
76
|
+
}
|
77
|
+
]
|
78
|
+
|
79
|
+
# Call the Groq API with the base64-encoded image
|
80
|
+
response = self.client.chat.completions.create(
|
81
|
+
model=self.model,
|
82
|
+
messages=messages,
|
83
|
+
**kwargs,
|
84
|
+
)
|
85
|
+
|
86
|
+
# Extract and return the response text
|
87
|
+
return response.choices[0].message.content
|
88
|
+
except Exception as e:
|
89
|
+
raise ValueError(f"Error while processing image with Groq vision model: {e}")
|
90
|
+
|
91
|
+
|
92
|
+
def generate_from_image_url(self, prompt: str, image_url: str, **kwargs) -> str:
|
93
|
+
"""
|
94
|
+
Process an image URL and generate a response if the model supports vision.
|
95
|
+
"""
|
96
|
+
if not self.supports_vision:
|
97
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
98
|
+
|
99
|
+
try:
|
100
|
+
# Call the Groq API with the image URL
|
101
|
+
response = self.client.chat.completions.create(
|
102
|
+
model=self.model,
|
103
|
+
messages=[
|
104
|
+
{
|
105
|
+
"role": "user",
|
106
|
+
"content": [
|
107
|
+
{"type": "text", "text": prompt},
|
108
|
+
{
|
109
|
+
"type": "image_url",
|
110
|
+
"image_url": {
|
111
|
+
"url": image_url,
|
112
|
+
},
|
113
|
+
},
|
114
|
+
],
|
115
|
+
}
|
116
|
+
],
|
117
|
+
**kwargs,
|
118
|
+
)
|
119
|
+
return response.choices[0].message.content
|
120
|
+
except Exception as e:
|
121
|
+
raise ValueError(f"Error while processing image URL with Groq vision model: {e}")
|
semantio/llm/mistral.py
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
from typing import List, Dict, Optional
|
2
|
+
from .base_llm import BaseLLM
|
3
|
+
from mistralai import Mistral
|
4
|
+
import os
|
5
|
+
|
6
|
+
class MistralLLM(BaseLLM):
|
7
|
+
def __init__(self, model: str = "mistral-large-latest", api_key: Optional[str] = None):
|
8
|
+
self.model = model
|
9
|
+
self.api_key = api_key or os.getenv("MISTRAL_API_KEY")
|
10
|
+
if not self.api_key:
|
11
|
+
raise ValueError("Mistral API key is required. Set MISTRAL_API_KEY environment variable or pass it explicitly.")
|
12
|
+
self.client = Mistral(api_key=self.api_key)
|
13
|
+
|
14
|
+
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
15
|
+
messages = []
|
16
|
+
if memory:
|
17
|
+
messages.extend(memory)
|
18
|
+
if context:
|
19
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
20
|
+
messages.append({"role": "user", "content": prompt})
|
21
|
+
|
22
|
+
response = self.client.chat.complete(
|
23
|
+
model=self.model,
|
24
|
+
messages=messages,
|
25
|
+
)
|
26
|
+
|
27
|
+
return response.choices[0].message.content
|
semantio/llm/openai.py
CHANGED
@@ -1,26 +1,136 @@
|
|
1
1
|
from typing import List, Dict, Optional
|
2
2
|
from .base_llm import BaseLLM
|
3
|
-
import
|
3
|
+
from openai import OpenAI
|
4
4
|
import os
|
5
|
+
import base64
|
5
6
|
|
6
7
|
class OpenAILlm(BaseLLM):
|
7
|
-
def __init__(self, model: str = "gpt-
|
8
|
+
def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
|
9
|
+
"""
|
10
|
+
Initialize the OpenAI LLM class.
|
11
|
+
|
12
|
+
Args:
|
13
|
+
model (str): The name of the model (e.g., gpt-4o, gpt-4-vision).
|
14
|
+
api_key (Optional[str]): The OpenAI API key. If not provided, it fetches from the environment.
|
15
|
+
"""
|
8
16
|
self.model = model
|
9
17
|
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
10
18
|
if not self.api_key:
|
11
19
|
raise ValueError("OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it explicitly.")
|
12
|
-
|
20
|
+
self.client = OpenAI(api_key=self.api_key)
|
13
21
|
|
14
22
|
def generate(self, prompt: str, context: Optional[List[Dict]] = None, memory: Optional[List[Dict]] = None) -> str:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
"""
|
24
|
+
Generate text using OpenAI's ChatCompletion API.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
prompt (str): The user prompt.
|
28
|
+
context (Optional[List[Dict]]): Context to include in the conversation.
|
29
|
+
memory (Optional[List[Dict]]): Memory from previous interactions.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
str: The generated response from the model.
|
33
|
+
"""
|
34
|
+
try:
|
35
|
+
# Prepare messages for the OpenAI API
|
36
|
+
messages = []
|
37
|
+
if memory:
|
38
|
+
messages.extend(memory)
|
39
|
+
if context:
|
40
|
+
messages.append({"role": "system", "content": "Context: " + str(context)})
|
41
|
+
messages.append({"role": "user", "content": prompt})
|
42
|
+
|
43
|
+
# Call the ChatCompletion endpoint
|
44
|
+
response = self.client.chat.completions.create(
|
45
|
+
model=self.model,
|
46
|
+
messages=messages,
|
47
|
+
)
|
48
|
+
|
49
|
+
# Extract and return the response:
|
50
|
+
return response.choices[0].message.content
|
51
|
+
except Exception as e:
|
52
|
+
raise ValueError(f"Error while generating response with OpenAI: {e}")
|
53
|
+
|
54
|
+
@property
|
55
|
+
def supports_vision(self) -> bool:
|
56
|
+
"""
|
57
|
+
Check if the model supports vision tasks.
|
58
|
+
"""
|
59
|
+
# List of GPT models that support vision
|
60
|
+
vision_models =[
|
61
|
+
"gpt-4o", "gpt-4o mini", "o1", "o1 mini"
|
62
|
+
]
|
63
|
+
return self.model in vision_models
|
64
|
+
|
65
|
+
def generate_from_image_url(self, prompt: str, image_url: str, **kwargs) -> str:
|
66
|
+
"""
|
67
|
+
Process an image URL with OpenAI's vision-capable models, using instructions as the prompt.
|
68
|
+
|
69
|
+
Args:
|
70
|
+
image_url (str): The URL of the image.
|
71
|
+
instructions (str): Instructions provided as the prompt for image analysis.
|
72
|
+
kwargs: Additional parameters for the OpenAI API.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
str: The response generated by the vision-capable model.
|
76
|
+
"""
|
77
|
+
if not self.supports_vision:
|
78
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
79
|
+
|
80
|
+
try:
|
81
|
+
# Use instructions as the prompt in the API call
|
82
|
+
response = self.client.chat.completions.create(
|
83
|
+
model=self.model,
|
84
|
+
messages=[
|
85
|
+
{
|
86
|
+
"role": "user",
|
87
|
+
"content": [
|
88
|
+
{"type": "text", "text": prompt}, # Using instructions as the prompt
|
89
|
+
{"type": "image_url", "image_url": {"url": image_url,},},
|
90
|
+
],
|
91
|
+
}
|
92
|
+
],
|
93
|
+
**kwargs,
|
94
|
+
)
|
95
|
+
return response.choices[0].message.content
|
96
|
+
except Exception as e:
|
97
|
+
raise ValueError(f"Error while processing image URL with OpenAI Vision model: {e}")
|
98
|
+
|
99
|
+
def generate_from_image(self, prompt: str, image_bytes: bytes, **kwargs) -> str:
|
100
|
+
"""
|
101
|
+
Process an image and generate a response if the model supports vision.
|
102
|
+
"""
|
103
|
+
if not self.supports_vision:
|
104
|
+
raise ValueError(f"Model '{self.model}' does not support vision tasks.")
|
105
|
+
|
106
|
+
try:
|
107
|
+
# Convert the image bytes to base64
|
108
|
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
109
|
+
|
110
|
+
# Construct the message payload
|
111
|
+
messages = [
|
112
|
+
{
|
113
|
+
"role": "user",
|
114
|
+
"content": [
|
115
|
+
{"type": "text", "text": prompt},
|
116
|
+
{
|
117
|
+
"type": "image_url",
|
118
|
+
"image_url": {
|
119
|
+
"url": f"data:image/jpeg;base64,{image_base64}",
|
120
|
+
},
|
121
|
+
},
|
122
|
+
],
|
123
|
+
}
|
124
|
+
]
|
125
|
+
|
126
|
+
# Call the Groq API with the base64-encoded image
|
127
|
+
response = self.client.chat.completions.create(
|
128
|
+
model=self.model,
|
129
|
+
messages=messages,
|
130
|
+
**kwargs,
|
131
|
+
)
|
132
|
+
|
133
|
+
# Extract and return the response text
|
134
|
+
return response.choices[0].message.content
|
135
|
+
except Exception as e:
|
136
|
+
raise ValueError(f"Error while processing image with OpenAI vision model: {e}")
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: semantio
|
3
|
-
Version: 0.0.
|
4
|
-
Summary: A powerful SDK for building AI agents
|
3
|
+
Version: 0.0.3
|
4
|
+
Summary: A powerful SDK for building AI agents
|
5
5
|
Home-page: https://github.com/Syenah/semantio
|
6
6
|
Author: Rakesh
|
7
7
|
Author-email: rakeshsahoo689@gmail.com
|
@@ -19,7 +19,8 @@ License-File: LICENSE
|
|
19
19
|
Requires-Dist: openai
|
20
20
|
Requires-Dist: anthropic
|
21
21
|
Requires-Dist: groq
|
22
|
-
Requires-Dist:
|
22
|
+
Requires-Dist: google-genai
|
23
|
+
Requires-Dist: mistralai
|
23
24
|
Requires-Dist: faiss-cpu
|
24
25
|
Requires-Dist: pydantic
|
25
26
|
Requires-Dist: requests
|
@@ -32,8 +33,6 @@ Requires-Dist: sentence-transformers
|
|
32
33
|
Requires-Dist: fuzzywuzzy
|
33
34
|
Requires-Dist: duckduckgo-search
|
34
35
|
Requires-Dist: yfinance
|
35
|
-
Requires-Dist: forex-python
|
36
|
-
Requires-Dist: qrcode
|
37
36
|
|
38
37
|
# Semantio: The Mother of Your AI Agents
|
39
38
|
|
@@ -112,7 +111,9 @@ Semantio/
|
|
112
111
|
│ │ ├── __init__.py
|
113
112
|
│ │ ├── openai.py # OpenAI integration
|
114
113
|
│ │ ├── anthropic.py # Anthropic (Claude) integration
|
115
|
-
│ │ ├──
|
114
|
+
│ │ ├── deepseek.py # Deepseek integration
|
115
|
+
│ │ ├── gemini.py # Gemini integration
|
116
|
+
│ │ ├── mistral.py # Mistral integration
|
116
117
|
│ │ └── base_llm.py # Base class for LLMs
|
117
118
|
│ ├── knowledge_base/ # Knowledge base integration
|
118
119
|
│ │ ├── __init__.py
|
@@ -1,22 +1,24 @@
|
|
1
1
|
semantio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
semantio/agent.py,sha256=
|
2
|
+
semantio/agent.py,sha256=plQ4D76cnJ1FaGlEuKDeA53aW_hMDvt5sbmUuTHqvFQ,30143
|
3
3
|
semantio/memory.py,sha256=eNAwyAokppHzMcIyFgOw2hT2wnLQBd9GL4T5eallNV4,281
|
4
4
|
semantio/rag.py,sha256=ROy3Pa1NURcDs6qQZ8IMoa5Xlzt6I-msEq0C1p8UgB0,472
|
5
5
|
semantio/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
semantio/api/api_generator.py,sha256=Q-USITEpluRESEaQuOmF7m1vhLKYU9P8eGlQppKT9J4,829
|
7
7
|
semantio/api/fastapi_app.py,sha256=DyTgKJKikMe2G6wWmyzo1rBLXQFi8UWWUMY3UGH4f24,2128
|
8
8
|
semantio/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
9
|
-
semantio/cli/main.py,sha256=
|
9
|
+
semantio/cli/main.py,sha256=jUvSfehbHWALwracEgBopMIVMraSV9QmDUFfgGcxnP0,1091
|
10
10
|
semantio/knowledge_base/__init__.py,sha256=mvp0GFiGSjcxlkaDulAwKOCL9s6gsKTqhPKXF9N3n1g,172
|
11
11
|
semantio/knowledge_base/document_loader.py,sha256=nix0yZJ-JJoDbhLkpg5bKDMvNrwykmknI7MRIn0N81k,1910
|
12
12
|
semantio/knowledge_base/retriever.py,sha256=XpdzKS1UCncJImVMtG67VXMC7lp2eRzKnShjvktsFMM,1271
|
13
13
|
semantio/knowledge_base/vector_store.py,sha256=4Zv9kfqDD3cfn_4R8ZoLKdAQCZRYo_IENP_KkLB_RPc,987
|
14
|
-
semantio/llm/__init__.py,sha256
|
15
|
-
semantio/llm/anthropic.py,sha256
|
16
|
-
semantio/llm/base_llm.py,sha256=
|
17
|
-
semantio/llm/
|
18
|
-
semantio/llm/
|
19
|
-
semantio/llm/
|
14
|
+
semantio/llm/__init__.py,sha256=-4uKcqo9fBrEbvfxGE01XVHL9qEG2vKXfy5hlnUsRbw,779
|
15
|
+
semantio/llm/anthropic.py,sha256=-JTso9vr88T3JSipxE60uZjqDgfla1QFoSEBpXW2pXw,2054
|
16
|
+
semantio/llm/base_llm.py,sha256=VFl_2S4kqYDuCTWIfWMbKU5aNbVqOCG33E4APOSHF90,668
|
17
|
+
semantio/llm/deepseek.py,sha256=oxX-Uw0_lY2sstYs5KGBGFB_hAZUbZomPADdib1mY2M,1100
|
18
|
+
semantio/llm/gemini.py,sha256=er3zv1jOvWQBGbPuv4fS4pR_c_abHyhroe-rkXupOO4,1959
|
19
|
+
semantio/llm/groq.py,sha256=1AH30paKzDIQjBjWPQPN44QwFHsIOVwI-a587-cDIVc,4285
|
20
|
+
semantio/llm/mistral.py,sha256=NpvaB1cE6-jMEBdT0mTf6Ca4Qq2LS8QivDKI6AgdRjE,1061
|
21
|
+
semantio/llm/openai.py,sha256=I3ab-d_zFxm-TDhYk6t1PzDtElPJEEQ2eSiARBNIGi4,5174
|
20
22
|
semantio/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
21
23
|
semantio/storage/cloud_storage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
24
|
semantio/storage/local_storage.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -25,16 +27,15 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
|
|
25
27
|
semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
|
26
28
|
semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
|
27
29
|
semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
|
28
|
-
semantio/tools/web_browser.py,sha256=LMwPFTHNTtqCp8MEHVlJJUSJa91vM7MZWIL5RDQKF4U,4980
|
29
30
|
semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
|
30
31
|
semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
|
31
32
|
semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
|
32
33
|
semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
|
33
34
|
semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
|
34
35
|
semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
|
35
|
-
semantio-0.0.
|
36
|
-
semantio-0.0.
|
37
|
-
semantio-0.0.
|
38
|
-
semantio-0.0.
|
39
|
-
semantio-0.0.
|
40
|
-
semantio-0.0.
|
36
|
+
semantio-0.0.3.dist-info/LICENSE,sha256=teQbWD2Zlcl1_Fo29o2tNbs6G26hbCQiUzds5fQGYlY,1063
|
37
|
+
semantio-0.0.3.dist-info/METADATA,sha256=M5Q-waTknpyWrD_HV9G76jMKgPHPrBBwM5Hl8we4ulo,6800
|
38
|
+
semantio-0.0.3.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
39
|
+
semantio-0.0.3.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
|
40
|
+
semantio-0.0.3.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
|
41
|
+
semantio-0.0.3.dist-info/RECORD,,
|
semantio/llm/llama.py
DELETED
File without changes
|
semantio/tools/web_browser.py
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
from typing import Dict, Any, Optional, List
|
2
|
-
from playwright.async_api import async_playwright
|
3
|
-
import asyncio
|
4
|
-
import logging
|
5
|
-
|
6
|
-
logger = logging.getLogger(__name__)
|
7
|
-
|
8
|
-
class WebBrowserTool:
|
9
|
-
"""
|
10
|
-
A tool for performing browser automation tasks using Playwright.
|
11
|
-
"""
|
12
|
-
|
13
|
-
def __init__(self, headless: bool = True):
|
14
|
-
"""
|
15
|
-
Initialize the WebBrowserTool.
|
16
|
-
|
17
|
-
Args:
|
18
|
-
headless (bool): Whether to run the browser in headless mode (default: True).
|
19
|
-
"""
|
20
|
-
self.headless = headless
|
21
|
-
self.browser = None
|
22
|
-
self.context = None
|
23
|
-
self.page = None
|
24
|
-
|
25
|
-
async def start(self):
|
26
|
-
"""
|
27
|
-
Start the browser and create a new context and page.
|
28
|
-
"""
|
29
|
-
self.playwright = await async_playwright().start()
|
30
|
-
self.browser = await self.playwright.chromium.launch(headless=self.headless)
|
31
|
-
self.context = await self.browser.new_context()
|
32
|
-
self.page = await self.context.new_page()
|
33
|
-
logger.info("Browser started successfully.")
|
34
|
-
|
35
|
-
async def close(self):
|
36
|
-
"""
|
37
|
-
Close the browser and cleanup resources.
|
38
|
-
"""
|
39
|
-
if self.browser:
|
40
|
-
await self.browser.close()
|
41
|
-
await self.playwright.stop()
|
42
|
-
logger.info("Browser closed successfully.")
|
43
|
-
|
44
|
-
async def navigate(self, url: str) -> str:
|
45
|
-
"""
|
46
|
-
Navigate to a specific URL.
|
47
|
-
|
48
|
-
Args:
|
49
|
-
url (str): The URL to navigate to.
|
50
|
-
|
51
|
-
Returns:
|
52
|
-
str: The page title after navigation.
|
53
|
-
"""
|
54
|
-
if not self.page:
|
55
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
56
|
-
|
57
|
-
await self.page.goto(url)
|
58
|
-
title = await self.page.title()
|
59
|
-
logger.info(f"Navigated to {url}. Page title: {title}")
|
60
|
-
return title
|
61
|
-
|
62
|
-
async def fill_form(self, fields: Dict[str, str]) -> str:
|
63
|
-
"""
|
64
|
-
Fill a form with the provided fields.
|
65
|
-
|
66
|
-
Args:
|
67
|
-
fields (Dict[str, str]): A dictionary of field names and values to fill.
|
68
|
-
|
69
|
-
Returns:
|
70
|
-
str: A success message.
|
71
|
-
"""
|
72
|
-
if not self.page:
|
73
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
74
|
-
|
75
|
-
for field, value in fields.items():
|
76
|
-
await self.page.fill(f'input[name="{field}"]', value)
|
77
|
-
logger.info(f"Filled field '{field}' with value '{value}'.")
|
78
|
-
|
79
|
-
return "Form filled successfully."
|
80
|
-
|
81
|
-
async def click(self, selector: str) -> str:
|
82
|
-
"""
|
83
|
-
Click an element on the page.
|
84
|
-
|
85
|
-
Args:
|
86
|
-
selector (str): The CSS selector of the element to click.
|
87
|
-
|
88
|
-
Returns:
|
89
|
-
str: A success message.
|
90
|
-
"""
|
91
|
-
if not self.page:
|
92
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
93
|
-
|
94
|
-
await self.page.click(selector)
|
95
|
-
logger.info(f"Clicked element with selector '{selector}'.")
|
96
|
-
return f"Clicked element: {selector}"
|
97
|
-
|
98
|
-
async def scrape(self, selector: str) -> List[Dict[str, str]]:
|
99
|
-
"""
|
100
|
-
Scrape data from the page.
|
101
|
-
|
102
|
-
Args:
|
103
|
-
selector (str): The CSS selector of the elements to scrape.
|
104
|
-
|
105
|
-
Returns:
|
106
|
-
List[Dict[str, str]]: A list of dictionaries containing the scraped data.
|
107
|
-
"""
|
108
|
-
if not self.page:
|
109
|
-
raise RuntimeError("Browser is not started. Call start() first.")
|
110
|
-
|
111
|
-
elements = await self.page.query_selector_all(selector)
|
112
|
-
scraped_data = []
|
113
|
-
for element in elements:
|
114
|
-
text = await element.inner_text()
|
115
|
-
scraped_data.append({"text": text.strip()})
|
116
|
-
logger.info(f"Scraped text: {text.strip()}")
|
117
|
-
|
118
|
-
return scraped_data
|
119
|
-
|
120
|
-
async def execute_step(self, step: Dict[str, Any]) -> str:
|
121
|
-
"""
|
122
|
-
Execute a browser automation step.
|
123
|
-
|
124
|
-
Args:
|
125
|
-
step (Dict[str, Any]): A dictionary containing the step details.
|
126
|
-
- "action": The action to perform (e.g., "navigate", "fill_form", "click", "scrape").
|
127
|
-
- "details": The details required for the action (e.g., URL, form fields, selector).
|
128
|
-
- "website": The website to perform the action on (optional).
|
129
|
-
|
130
|
-
Returns:
|
131
|
-
str: The result of the step execution.
|
132
|
-
"""
|
133
|
-
action = step.get("action")
|
134
|
-
details = step.get("details")
|
135
|
-
website = step.get("website", "https://www.google.com")
|
136
|
-
|
137
|
-
if not self.page:
|
138
|
-
await self.start()
|
139
|
-
|
140
|
-
try:
|
141
|
-
if action == "navigate":
|
142
|
-
return await self.navigate(details)
|
143
|
-
elif action == "fill_form":
|
144
|
-
return await self.fill_form(details)
|
145
|
-
elif action == "click":
|
146
|
-
return await self.click(details)
|
147
|
-
elif action == "scrape":
|
148
|
-
return str(await self.scrape(details))
|
149
|
-
else:
|
150
|
-
return f"Unknown action: {action}"
|
151
|
-
except Exception as e:
|
152
|
-
logger.error(f"Error executing step: {e}")
|
153
|
-
return f"Error executing step: {e}"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|