SimplerLLM 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- SimplerLLM/__init__.py +0 -0
- SimplerLLM/langauge/__init__.py +0 -0
- SimplerLLM/langauge/llm.py +136 -0
- SimplerLLM/langauge/llm_addons.py +56 -0
- SimplerLLM/langauge/llm_providers/__init__.py +0 -0
- SimplerLLM/langauge/llm_providers/gemeni_llm.py +125 -0
- SimplerLLM/langauge/llm_providers/openai_llm.py +341 -0
- SimplerLLM/prompts/__init__.py +0 -0
- SimplerLLM/prompts/prompt_builder.py +86 -0
- SimplerLLM/tools/__init__.py +0 -0
- SimplerLLM/tools/generic_text_loader.py +160 -0
- SimplerLLM/tools/json_helpers.py +130 -0
- SimplerLLM/tools/rapid_api.py +109 -0
- SimplerLLM/tools/serp.py +79 -0
- SimplerLLM-0.1.0.dist-info/METADATA +166 -0
- SimplerLLM-0.1.0.dist-info/RECORD +18 -0
- SimplerLLM-0.1.0.dist-info/WHEEL +5 -0
- SimplerLLM-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
class SimplePrompt:
|
|
2
|
+
"""
|
|
3
|
+
A class for creating and manipulating simple prompt templates.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
def __init__(self, template: str):
|
|
7
|
+
if not isinstance(template, str):
|
|
8
|
+
raise ValueError("Template must be a string")
|
|
9
|
+
self.template = template
|
|
10
|
+
self.content = '' # Holds the latest filled template
|
|
11
|
+
|
|
12
|
+
def assign_parms(self, **kwargs) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Assigns parameters to the template and returns the filled template.
|
|
15
|
+
"""
|
|
16
|
+
try:
|
|
17
|
+
self.content = self.template.format(**kwargs)
|
|
18
|
+
except KeyError as e:
|
|
19
|
+
raise KeyError(f"Missing a required key in the template: {e}")
|
|
20
|
+
except Exception as e:
|
|
21
|
+
# Catch-all for other exceptions related to string formatting
|
|
22
|
+
raise ValueError(f"Error processing the template: {e}")
|
|
23
|
+
return self.content
|
|
24
|
+
|
|
25
|
+
def update_template(self, new_template: str):
|
|
26
|
+
"""
|
|
27
|
+
Updates the template and clears the latest content.
|
|
28
|
+
"""
|
|
29
|
+
if not isinstance(new_template, str):
|
|
30
|
+
raise ValueError("New template must be a string")
|
|
31
|
+
self.template = new_template
|
|
32
|
+
self.content = ''
|
|
33
|
+
|
|
34
|
+
def __str__(self) -> str:
|
|
35
|
+
return self.content
|
|
36
|
+
|
|
37
|
+
def create_prompt_template(template_string: str) -> SimplePrompt:
|
|
38
|
+
"""
|
|
39
|
+
Factory function to create a SimpleTemplate instance.
|
|
40
|
+
"""
|
|
41
|
+
if not isinstance(template_string, str):
|
|
42
|
+
raise ValueError("Template string must be a string")
|
|
43
|
+
return SimplePrompt(template_string)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class MultiValuePrompt:
|
|
48
|
+
"""
|
|
49
|
+
A class for creating and manipulating prompt templates with multiple sets of parameters.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, template: str):
|
|
53
|
+
if not isinstance(template, str):
|
|
54
|
+
raise ValueError("Template must be a string")
|
|
55
|
+
self.template = template
|
|
56
|
+
self.generated_prompts = [] # Holds the generated prompts
|
|
57
|
+
|
|
58
|
+
def generate_prompts(self, params_list: list) -> list:
|
|
59
|
+
"""
|
|
60
|
+
Generates prompts for each set of parameters in the params_list.
|
|
61
|
+
"""
|
|
62
|
+
if not all(isinstance(params, dict) for params in params_list):
|
|
63
|
+
raise ValueError("Each item in params_list must be a dictionary")
|
|
64
|
+
|
|
65
|
+
self.generated_prompts = []
|
|
66
|
+
for params in params_list:
|
|
67
|
+
try:
|
|
68
|
+
filled_prompt = self.template.format(**params)
|
|
69
|
+
self.generated_prompts.append(filled_prompt)
|
|
70
|
+
except KeyError as e:
|
|
71
|
+
raise KeyError(f"Missing a required key in the template: {e}")
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise ValueError(f"Error processing the template: {e}")
|
|
74
|
+
|
|
75
|
+
return self.generated_prompts
|
|
76
|
+
|
|
77
|
+
def __str__(self) -> str:
|
|
78
|
+
return "\n".join(self.generated_prompts)
|
|
79
|
+
|
|
80
|
+
def create_multi_value_prompts(template_string: str) -> MultiValuePrompt:
|
|
81
|
+
"""
|
|
82
|
+
Factory function to create a FewShotPrompt instance.
|
|
83
|
+
"""
|
|
84
|
+
if not isinstance(template_string, str):
|
|
85
|
+
raise ValueError("Template string must be a string")
|
|
86
|
+
return MultiValuePrompt(template_string)
|
|
File without changes
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import newspaper
|
|
2
|
+
import os
|
|
3
|
+
import PyPDF2
|
|
4
|
+
import docx
|
|
5
|
+
from youtube_transcript_api import YouTubeTranscriptApi
|
|
6
|
+
import re
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TextDocument(BaseModel):
|
|
14
|
+
file_size: Optional[int] = None
|
|
15
|
+
word_count: int
|
|
16
|
+
character_count: int
|
|
17
|
+
content: str
|
|
18
|
+
title: Optional[str] = None
|
|
19
|
+
url_or_path: Optional[str] = None
|
|
20
|
+
|
|
21
|
+
def load_text(input_path_or_url):
|
|
22
|
+
# Check if the input is a URL
|
|
23
|
+
input_path_or_url = str.lower(input_path_or_url)
|
|
24
|
+
if re.match(r'http[s]?://', input_path_or_url):
|
|
25
|
+
# Process based on URL content
|
|
26
|
+
if "youtube.com" in input_path_or_url or "youtu.be" in input_path_or_url:
|
|
27
|
+
content = __read_youtube_video_transcript(input_path_or_url)
|
|
28
|
+
file_size = len(content.encode('utf-8')) # Size in bytes
|
|
29
|
+
return TextDocument(
|
|
30
|
+
word_count=len(content.split()),
|
|
31
|
+
character_count=len(content),
|
|
32
|
+
content=content,
|
|
33
|
+
file_size = file_size,
|
|
34
|
+
url_or_path=input_path_or_url
|
|
35
|
+
)
|
|
36
|
+
else:
|
|
37
|
+
article = __read_blog_from_url(input_path_or_url)
|
|
38
|
+
if article is not None:
|
|
39
|
+
file_size = len(article.text.encode('utf-8')) # Size in bytes
|
|
40
|
+
return TextDocument(
|
|
41
|
+
word_count=len(article.text.split()),
|
|
42
|
+
character_count=len(article.text),
|
|
43
|
+
content=article.text,
|
|
44
|
+
title=article.title,
|
|
45
|
+
file_size=file_size,
|
|
46
|
+
url_or_path=input_path_or_url
|
|
47
|
+
)
|
|
48
|
+
else:
|
|
49
|
+
try:
|
|
50
|
+
# Process based on file extension
|
|
51
|
+
file_ext = os.path.splitext(input_path_or_url)[1].lower()
|
|
52
|
+
if file_ext in ['.txt']:
|
|
53
|
+
file_size, num_words, num_chars, content = __read_text_file(input_path_or_url)
|
|
54
|
+
elif file_ext in ['.docx']:
|
|
55
|
+
file_size, num_words, num_chars, content = __read_docx_file(input_path_or_url)
|
|
56
|
+
elif file_ext in ['.pdf']:
|
|
57
|
+
file_size, num_words, num_chars, content = __read_pdf_file(input_path_or_url)
|
|
58
|
+
else:
|
|
59
|
+
# Fallback: try reading as a text file
|
|
60
|
+
file_size, num_words, num_chars, content = __read_text_file(input_path_or_url)
|
|
61
|
+
|
|
62
|
+
return TextDocument(
|
|
63
|
+
file_size=file_size,
|
|
64
|
+
word_count=num_words,
|
|
65
|
+
character_count=num_chars,
|
|
66
|
+
content=content,
|
|
67
|
+
url_or_path=input_path_or_url
|
|
68
|
+
)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
raise ValueError(f"Error processing file: {e}")
|
|
71
|
+
|
|
72
|
+
raise ValueError("Unable to process the input")
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def __read_text_file(file_path):
|
|
80
|
+
with open(file_path, 'r',encoding='utf-8') as file:
|
|
81
|
+
content = file.read()
|
|
82
|
+
|
|
83
|
+
file_size = os.path.getsize(file_path)
|
|
84
|
+
words = content.split()
|
|
85
|
+
num_words = len(words)
|
|
86
|
+
num_chars = len(content)
|
|
87
|
+
|
|
88
|
+
return file_size, num_words, num_chars, content
|
|
89
|
+
|
|
90
|
+
def __read_docx_file(file_path):
|
|
91
|
+
file_size = os.path.getsize(file_path)
|
|
92
|
+
doc = docx.Document(file_path)
|
|
93
|
+
content = "\n".join([para.text for para in doc.paragraphs])
|
|
94
|
+
|
|
95
|
+
words = content.split()
|
|
96
|
+
num_words = len(words)
|
|
97
|
+
num_chars = len(content)
|
|
98
|
+
|
|
99
|
+
return file_size, num_words, num_chars, content
|
|
100
|
+
|
|
101
|
+
def __read_pdf_file(file_path):
|
|
102
|
+
file_size = os.path.getsize(file_path)
|
|
103
|
+
|
|
104
|
+
with open(file_path, 'rb') as file:
|
|
105
|
+
reader = PyPDF2.PdfReader(file)
|
|
106
|
+
content = "".join([reader.pages[i].extract_text() for i in range(len(reader.pages))])
|
|
107
|
+
|
|
108
|
+
words = content.split()
|
|
109
|
+
num_words = len(words)
|
|
110
|
+
num_chars = len(content)
|
|
111
|
+
|
|
112
|
+
return file_size, num_words, num_chars, content
|
|
113
|
+
|
|
114
|
+
def __read_blog_from_url(url):
|
|
115
|
+
"""
|
|
116
|
+
Extracts the text content from a given URL using the newspaper package.
|
|
117
|
+
|
|
118
|
+
Parameters:
|
|
119
|
+
url (str): The URL of the article to extract text from.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
str: The text content of the article if extraction is successful, None otherwise.
|
|
123
|
+
"""
|
|
124
|
+
try:
|
|
125
|
+
article = newspaper.Article(url)
|
|
126
|
+
article.download()
|
|
127
|
+
|
|
128
|
+
if article.download_state == 2:
|
|
129
|
+
article.parse()
|
|
130
|
+
return article
|
|
131
|
+
else:
|
|
132
|
+
print(f"An error occurred while fetching the article")
|
|
133
|
+
return None
|
|
134
|
+
except newspaper.ArticleException as e:
|
|
135
|
+
print(f"An error occurred while fetching the article: {e}")
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
def __read_youtube_video_transcript(video_url):
|
|
139
|
+
"""
|
|
140
|
+
Fetches the transcript of a YouTube video given its URL.
|
|
141
|
+
|
|
142
|
+
Parameters:
|
|
143
|
+
video_url (str): The URL of the YouTube video.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
str: The transcript of the video if available, raises an error otherwise.
|
|
147
|
+
"""
|
|
148
|
+
# Enhanced regex to handle different YouTube URL formats
|
|
149
|
+
match = re.search(r"(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)", video_url)
|
|
150
|
+
if match:
|
|
151
|
+
video_id = match.group(1)
|
|
152
|
+
else:
|
|
153
|
+
raise ValueError("Invalid YouTube URL")
|
|
154
|
+
|
|
155
|
+
try:
|
|
156
|
+
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
|
157
|
+
transcript_text = " ".join([line["text"] for line in transcript])
|
|
158
|
+
return transcript_text
|
|
159
|
+
except Exception as e:
|
|
160
|
+
raise f"An error occurred while fetching the transcript: {e}"
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import json
|
|
3
|
+
from pydantic import BaseModel, ValidationError
|
|
4
|
+
from typing import get_type_hints
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from typing import List, get_type_hints, Type
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def convert_pydantic_to_json(model_instance):
|
|
11
|
+
"""
|
|
12
|
+
Converts a Pydantic model instance to a JSON string.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
model_instance (YourModel): An instance of your Pydantic model.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
str: A JSON string representation of the model.
|
|
19
|
+
"""
|
|
20
|
+
return model_instance.model_dump_json()
|
|
21
|
+
|
|
22
|
+
def extract_json_from_text(text_response):
|
|
23
|
+
# This pattern matches a string that starts with '{' and ends with '}'
|
|
24
|
+
pattern = r'\{[^{}]*\}'
|
|
25
|
+
|
|
26
|
+
matches = re.finditer(pattern, text_response)
|
|
27
|
+
json_objects = []
|
|
28
|
+
|
|
29
|
+
for match in matches:
|
|
30
|
+
json_str = match.group(0)
|
|
31
|
+
try:
|
|
32
|
+
# Validate if the extracted string is valid JSON
|
|
33
|
+
json_obj = json.loads(json_str)
|
|
34
|
+
json_objects.append(json_obj)
|
|
35
|
+
except json.JSONDecodeError:
|
|
36
|
+
# Extend the search for nested structures
|
|
37
|
+
extended_json_str = extend_search(text_response, match.span())
|
|
38
|
+
try:
|
|
39
|
+
json_obj = json.loads(extended_json_str)
|
|
40
|
+
json_objects.append(json_obj)
|
|
41
|
+
except json.JSONDecodeError:
|
|
42
|
+
# Handle cases where the extraction is not valid JSON
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
if json_objects:
|
|
46
|
+
return json_objects
|
|
47
|
+
else:
|
|
48
|
+
return None # Or handle this case as you prefer
|
|
49
|
+
|
|
50
|
+
def extend_search(text, span):
|
|
51
|
+
# Extend the search to try to capture nested structures
|
|
52
|
+
start, end = span
|
|
53
|
+
nest_count = 0
|
|
54
|
+
for i in range(start, len(text)):
|
|
55
|
+
if text[i] == '{':
|
|
56
|
+
nest_count += 1
|
|
57
|
+
elif text[i] == '}':
|
|
58
|
+
nest_count -= 1
|
|
59
|
+
if nest_count == 0:
|
|
60
|
+
return text[start:i+1]
|
|
61
|
+
return text[start:end]
|
|
62
|
+
|
|
63
|
+
def validate_json_with_pydantic_model(model_class, json_data):
|
|
64
|
+
"""
|
|
65
|
+
Validates JSON data against a specified Pydantic model.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
model_class (BaseModel): The Pydantic model class to validate against.
|
|
69
|
+
json_data (dict or list): JSON data to validate. Can be a dict for a single JSON object,
|
|
70
|
+
or a list for multiple JSON objects.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
list: A list of validated JSON objects that match the Pydantic model.
|
|
74
|
+
list: A list of errors for JSON objects that do not match the model.
|
|
75
|
+
"""
|
|
76
|
+
validated_data = []
|
|
77
|
+
validation_errors = []
|
|
78
|
+
|
|
79
|
+
if isinstance(json_data, list):
|
|
80
|
+
for item in json_data:
|
|
81
|
+
try:
|
|
82
|
+
model_instance = model_class(**item)
|
|
83
|
+
validated_data.append(model_instance.dict())
|
|
84
|
+
except ValidationError as e:
|
|
85
|
+
validation_errors.append({"error": str(e), "data": item})
|
|
86
|
+
elif isinstance(json_data, dict):
|
|
87
|
+
try:
|
|
88
|
+
model_instance = model_class(**json_data)
|
|
89
|
+
validated_data.append(model_instance.dict())
|
|
90
|
+
except ValidationError as e:
|
|
91
|
+
validation_errors.append({"error": str(e), "data": json_data})
|
|
92
|
+
else:
|
|
93
|
+
raise ValueError("Invalid JSON data type. Expected dict or list.")
|
|
94
|
+
|
|
95
|
+
return validated_data, validation_errors
|
|
96
|
+
|
|
97
|
+
def convert_json_to_pydantic_model(model_class, json_data):
|
|
98
|
+
try:
|
|
99
|
+
model_instance = model_class(**json_data)
|
|
100
|
+
return model_instance
|
|
101
|
+
except ValidationError as e:
|
|
102
|
+
print("Validation error:", e)
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
# Define a function to provide example values based on type
|
|
106
|
+
def example_value_for_type(field_type: Type):
|
|
107
|
+
if field_type == str:
|
|
108
|
+
return "example_string"
|
|
109
|
+
elif field_type == int:
|
|
110
|
+
return 0
|
|
111
|
+
elif field_type == float:
|
|
112
|
+
return 0.0
|
|
113
|
+
elif field_type == bool:
|
|
114
|
+
return True
|
|
115
|
+
elif field_type == List[str]:
|
|
116
|
+
return ["generated text 1", "generated text 2"]
|
|
117
|
+
elif field_type == List[int]:
|
|
118
|
+
return [1, 2, 3]
|
|
119
|
+
else:
|
|
120
|
+
return "Unsupported type"
|
|
121
|
+
|
|
122
|
+
# Function to generate a JSON example for any Pydantic model
|
|
123
|
+
def generate_json_example_from_pydantic(model_class: Type[BaseModel]) -> str:
|
|
124
|
+
example_data = {}
|
|
125
|
+
for field_name, field_type in get_type_hints(model_class).items():
|
|
126
|
+
example_data[field_name] = example_value_for_type(field_type)
|
|
127
|
+
|
|
128
|
+
model_instance = model_class(**example_data)
|
|
129
|
+
return model_instance.json()
|
|
130
|
+
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from dotenv import load_dotenv
|
|
2
|
+
import os
|
|
3
|
+
import time
|
|
4
|
+
import requests
|
|
5
|
+
import aiohttp
|
|
6
|
+
import asyncio
|
|
7
|
+
from typing import Optional, Any, Dict
|
|
8
|
+
|
|
9
|
+
load_dotenv() # Load the environment variables
|
|
10
|
+
|
|
11
|
+
class RapidAPIClient:
|
|
12
|
+
def __init__(self, api_key: Optional[str] = None, timeout: int = 30):
|
|
13
|
+
"""
|
|
14
|
+
Initialize the RapidAPI client.
|
|
15
|
+
|
|
16
|
+
:param api_key: Optional API key. If not provided, it will be read from the environment variable 'RAPID_API_KEY'.
|
|
17
|
+
:param timeout: Request timeout in seconds.
|
|
18
|
+
"""
|
|
19
|
+
self.api_key = api_key if api_key else os.getenv('RAPIDAPI_API_KEY')
|
|
20
|
+
self.timeout = timeout
|
|
21
|
+
|
|
22
|
+
if not self.api_key:
|
|
23
|
+
raise ValueError("API key must be provided or set as an environment variable 'RAPID_API_KEY'")
|
|
24
|
+
|
|
25
|
+
def _construct_headers(self, api_url: str, headers_extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
|
|
26
|
+
"""
|
|
27
|
+
Construct headers for the API call.
|
|
28
|
+
|
|
29
|
+
:param api_url: URL of the RapidAPI endpoint
|
|
30
|
+
:param headers_extra: Additional headers if required by the API
|
|
31
|
+
:return: Dictionary of headers
|
|
32
|
+
"""
|
|
33
|
+
headers = {
|
|
34
|
+
'x-rapidapi-key': self.api_key,
|
|
35
|
+
'x-rapidapi-host': api_url.split('/')[2]
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
if headers_extra:
|
|
39
|
+
headers.update(headers_extra)
|
|
40
|
+
|
|
41
|
+
return headers
|
|
42
|
+
|
|
43
|
+
def _check_response(self, response: requests.Response) -> Any:
|
|
44
|
+
"""
|
|
45
|
+
Check the response status and return the JSON data if successful.
|
|
46
|
+
|
|
47
|
+
:param response: Response object from requests library.
|
|
48
|
+
:return: JSON response from the API
|
|
49
|
+
"""
|
|
50
|
+
if response.status_code in [200, 201, 202, 204]:
|
|
51
|
+
return response.json() if response.text else None
|
|
52
|
+
response.raise_for_status()
|
|
53
|
+
|
|
54
|
+
def call_api(self, api_url: str, method: str = 'GET', headers_extra: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, str]] = None, json: Optional[Dict[str, Any]] = None, max_retries: int = 3, backoff_factor: int = 2) -> Any:
|
|
55
|
+
"""
|
|
56
|
+
Make a synchronous API call to a RapidAPI endpoint.
|
|
57
|
+
|
|
58
|
+
:param api_url: URL of the RapidAPI endpoint
|
|
59
|
+
:param method: HTTP method ('GET' or 'POST')
|
|
60
|
+
:param headers_extra: Additional headers if required by the API
|
|
61
|
+
:param params: Query parameters for GET request
|
|
62
|
+
:param data: Form data for POST request
|
|
63
|
+
:param json: JSON data for POST request
|
|
64
|
+
:param max_retries: Maximum number of retries
|
|
65
|
+
:param backoff_factor: Factor by which the delay increases during each retry
|
|
66
|
+
:return: JSON response from the API
|
|
67
|
+
"""
|
|
68
|
+
headers = self._construct_headers(api_url, headers_extra)
|
|
69
|
+
retries = 0
|
|
70
|
+
|
|
71
|
+
while retries < max_retries:
|
|
72
|
+
try:
|
|
73
|
+
with requests.request(method, api_url, headers=headers, params=params, data=data, json=json, timeout=self.timeout) as response:
|
|
74
|
+
return self._check_response(response)
|
|
75
|
+
except requests.RequestException as e:
|
|
76
|
+
retries += 1
|
|
77
|
+
if retries >= max_retries:
|
|
78
|
+
raise e
|
|
79
|
+
time.sleep(backoff_factor ** retries)
|
|
80
|
+
|
|
81
|
+
async def call_api_async(self, api_url: str, method: str = 'GET', headers_extra: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, str]] = None, json: Optional[Dict[str, Any]] = None, max_retries: int = 3, backoff_factor: int = 2) -> Any:
|
|
82
|
+
"""
|
|
83
|
+
Make an asynchronous API call to a RapidAPI endpoint.
|
|
84
|
+
|
|
85
|
+
:param api_url: URL of the RapidAPI endpoint
|
|
86
|
+
:param method: HTTP method ('GET' or 'POST')
|
|
87
|
+
:param headers_extra: Additional headers if required by the API
|
|
88
|
+
:param params: Query parameters for GET request
|
|
89
|
+
:param data: Form data for POST request
|
|
90
|
+
:param json: JSON data for POST request
|
|
91
|
+
:param max_retries: Maximum number of retries
|
|
92
|
+
:param backoff_factor: Factor by which the delay increases during each retry
|
|
93
|
+
:return: JSON response from the API
|
|
94
|
+
"""
|
|
95
|
+
headers = self._construct_headers(api_url, headers_extra)
|
|
96
|
+
|
|
97
|
+
async with aiohttp.ClientSession() as session:
|
|
98
|
+
retries = 0
|
|
99
|
+
while retries < max_retries:
|
|
100
|
+
try:
|
|
101
|
+
async with session.request(method, api_url, headers=headers, params=params, data=data, json=json, timeout=self.timeout) as response:
|
|
102
|
+
if response.status in [200, 201, 202, 204]:
|
|
103
|
+
return await response.json() if response.text else None
|
|
104
|
+
response.raise_for_status()
|
|
105
|
+
except aiohttp.ClientError as e:
|
|
106
|
+
retries += 1
|
|
107
|
+
if retries >= max_retries:
|
|
108
|
+
raise e
|
|
109
|
+
await asyncio.sleep(backoff_factor ** retries)
|
SimplerLLM/tools/serp.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
from duckduckgo_search import DDGS, AsyncDDGS
|
|
2
|
+
from dotenv import load_dotenv
|
|
3
|
+
from urllib.parse import urlparse
|
|
4
|
+
from pydantic import BaseModel, HttpUrl
|
|
5
|
+
from typing import Optional, List
|
|
6
|
+
|
|
7
|
+
class SearchResult(BaseModel):
|
|
8
|
+
URL: HttpUrl
|
|
9
|
+
Domain: Optional[str] = None
|
|
10
|
+
Title: Optional[str] = None
|
|
11
|
+
Description: Optional[str] = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_domain_from_url(url):
|
|
15
|
+
parsed_url = urlparse(url)
|
|
16
|
+
return parsed_url.netloc
|
|
17
|
+
|
|
18
|
+
# Load environment variables
|
|
19
|
+
load_dotenv()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
async def search_with_duck_duck_go_async(query, max_results=50):
|
|
24
|
+
"""
|
|
25
|
+
Perform an asynchronous search using the DuckDuckGo search engine.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
query (str): The search query string.
|
|
29
|
+
max_results (int, optional): The maximum number of results to return. Defaults to 50.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
str: A JSON string containing the search results, each result being a dictionary with URL, Title, and Description.
|
|
33
|
+
"""
|
|
34
|
+
async with AsyncDDGS() as ddgs:
|
|
35
|
+
results = []
|
|
36
|
+
async for r in ddgs.text(query, max_results=max_results):
|
|
37
|
+
results.append(r)
|
|
38
|
+
result_data = []
|
|
39
|
+
for result in results:
|
|
40
|
+
# Ensure all keys exist to avoid key errors
|
|
41
|
+
url = result.get("href", None)
|
|
42
|
+
title = result.get("title", None)
|
|
43
|
+
description = result.get("body", None)
|
|
44
|
+
if url:
|
|
45
|
+
domain = get_domain_from_url(url)
|
|
46
|
+
result_data.append(SearchResult(URL=url, Domain=domain, Title=title, Description=description))
|
|
47
|
+
else:
|
|
48
|
+
result_data.append(SearchResult(URL=url, Title=title, Description=description))
|
|
49
|
+
|
|
50
|
+
return result_data
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def search_with_duck_duck_go(query: str, max_results: int = 10) -> List[SearchResult]:
|
|
54
|
+
"""
|
|
55
|
+
Perform a synchronous search using the DuckDuckGo search engine.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
query (str): The search query string.
|
|
59
|
+
max_results (int, optional): The maximum number of results to return. Defaults to 50.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List[SearchResult]: A list of SearchResult objects, each containing URL, Title, and Description from the search results.
|
|
63
|
+
"""
|
|
64
|
+
with DDGS() as ddgs:
|
|
65
|
+
results = [r for r in ddgs.text(query, max_results=max_results)]
|
|
66
|
+
result_data = []
|
|
67
|
+
for result in results:
|
|
68
|
+
url = result.get("href", None)
|
|
69
|
+
title = result.get("title", None)
|
|
70
|
+
description = result.get("body", None)
|
|
71
|
+
if url:
|
|
72
|
+
domain = get_domain_from_url(url)
|
|
73
|
+
result_data.append(SearchResult(URL=url, Domain=domain, Title=title, Description=description))
|
|
74
|
+
else:
|
|
75
|
+
result_data.append(SearchResult(URL=url, Title=title, Description=description))
|
|
76
|
+
|
|
77
|
+
return result_data
|
|
78
|
+
|
|
79
|
+
|