SimplerLLM 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,86 @@
1
+ class SimplePrompt:
2
+ """
3
+ A class for creating and manipulating simple prompt templates.
4
+ """
5
+
6
+ def __init__(self, template: str):
7
+ if not isinstance(template, str):
8
+ raise ValueError("Template must be a string")
9
+ self.template = template
10
+ self.content = '' # Holds the latest filled template
11
+
12
+ def assign_parms(self, **kwargs) -> str:
13
+ """
14
+ Assigns parameters to the template and returns the filled template.
15
+ """
16
+ try:
17
+ self.content = self.template.format(**kwargs)
18
+ except KeyError as e:
19
+ raise KeyError(f"Missing a required key in the template: {e}")
20
+ except Exception as e:
21
+ # Catch-all for other exceptions related to string formatting
22
+ raise ValueError(f"Error processing the template: {e}")
23
+ return self.content
24
+
25
+ def update_template(self, new_template: str):
26
+ """
27
+ Updates the template and clears the latest content.
28
+ """
29
+ if not isinstance(new_template, str):
30
+ raise ValueError("New template must be a string")
31
+ self.template = new_template
32
+ self.content = ''
33
+
34
+ def __str__(self) -> str:
35
+ return self.content
36
+
37
+ def create_prompt_template(template_string: str) -> SimplePrompt:
38
+ """
39
+ Factory function to create a SimpleTemplate instance.
40
+ """
41
+ if not isinstance(template_string, str):
42
+ raise ValueError("Template string must be a string")
43
+ return SimplePrompt(template_string)
44
+
45
+
46
+
47
+ class MultiValuePrompt:
48
+ """
49
+ A class for creating and manipulating prompt templates with multiple sets of parameters.
50
+ """
51
+
52
+ def __init__(self, template: str):
53
+ if not isinstance(template, str):
54
+ raise ValueError("Template must be a string")
55
+ self.template = template
56
+ self.generated_prompts = [] # Holds the generated prompts
57
+
58
+ def generate_prompts(self, params_list: list) -> list:
59
+ """
60
+ Generates prompts for each set of parameters in the params_list.
61
+ """
62
+ if not all(isinstance(params, dict) for params in params_list):
63
+ raise ValueError("Each item in params_list must be a dictionary")
64
+
65
+ self.generated_prompts = []
66
+ for params in params_list:
67
+ try:
68
+ filled_prompt = self.template.format(**params)
69
+ self.generated_prompts.append(filled_prompt)
70
+ except KeyError as e:
71
+ raise KeyError(f"Missing a required key in the template: {e}")
72
+ except Exception as e:
73
+ raise ValueError(f"Error processing the template: {e}")
74
+
75
+ return self.generated_prompts
76
+
77
+ def __str__(self) -> str:
78
+ return "\n".join(self.generated_prompts)
79
+
80
+ def create_multi_value_prompts(template_string: str) -> MultiValuePrompt:
81
+ """
82
+ Factory function to create a FewShotPrompt instance.
83
+ """
84
+ if not isinstance(template_string, str):
85
+ raise ValueError("Template string must be a string")
86
+ return MultiValuePrompt(template_string)
File without changes
@@ -0,0 +1,160 @@
1
+ import newspaper
2
+ import os
3
+ import PyPDF2
4
+ import docx
5
+ from youtube_transcript_api import YouTubeTranscriptApi
6
+ import re
7
+ from urllib.parse import urlparse
8
+ from pydantic import BaseModel
9
+ from typing import Optional
10
+
11
+
12
+
13
+ class TextDocument(BaseModel):
14
+ file_size: Optional[int] = None
15
+ word_count: int
16
+ character_count: int
17
+ content: str
18
+ title: Optional[str] = None
19
+ url_or_path: Optional[str] = None
20
+
21
+ def load_text(input_path_or_url):
22
+ # Check if the input is a URL
23
+ input_path_or_url = str.lower(input_path_or_url)
24
+ if re.match(r'http[s]?://', input_path_or_url):
25
+ # Process based on URL content
26
+ if "youtube.com" in input_path_or_url or "youtu.be" in input_path_or_url:
27
+ content = __read_youtube_video_transcript(input_path_or_url)
28
+ file_size = len(content.encode('utf-8')) # Size in bytes
29
+ return TextDocument(
30
+ word_count=len(content.split()),
31
+ character_count=len(content),
32
+ content=content,
33
+ file_size = file_size,
34
+ url_or_path=input_path_or_url
35
+ )
36
+ else:
37
+ article = __read_blog_from_url(input_path_or_url)
38
+ if article is not None:
39
+ file_size = len(article.text.encode('utf-8')) # Size in bytes
40
+ return TextDocument(
41
+ word_count=len(article.text.split()),
42
+ character_count=len(article.text),
43
+ content=article.text,
44
+ title=article.title,
45
+ file_size=file_size,
46
+ url_or_path=input_path_or_url
47
+ )
48
+ else:
49
+ try:
50
+ # Process based on file extension
51
+ file_ext = os.path.splitext(input_path_or_url)[1].lower()
52
+ if file_ext in ['.txt']:
53
+ file_size, num_words, num_chars, content = __read_text_file(input_path_or_url)
54
+ elif file_ext in ['.docx']:
55
+ file_size, num_words, num_chars, content = __read_docx_file(input_path_or_url)
56
+ elif file_ext in ['.pdf']:
57
+ file_size, num_words, num_chars, content = __read_pdf_file(input_path_or_url)
58
+ else:
59
+ # Fallback: try reading as a text file
60
+ file_size, num_words, num_chars, content = __read_text_file(input_path_or_url)
61
+
62
+ return TextDocument(
63
+ file_size=file_size,
64
+ word_count=num_words,
65
+ character_count=num_chars,
66
+ content=content,
67
+ url_or_path=input_path_or_url
68
+ )
69
+ except Exception as e:
70
+ raise ValueError(f"Error processing file: {e}")
71
+
72
+ raise ValueError("Unable to process the input")
73
+
74
+
75
+
76
+
77
+
78
+
79
+ def __read_text_file(file_path):
80
+ with open(file_path, 'r',encoding='utf-8') as file:
81
+ content = file.read()
82
+
83
+ file_size = os.path.getsize(file_path)
84
+ words = content.split()
85
+ num_words = len(words)
86
+ num_chars = len(content)
87
+
88
+ return file_size, num_words, num_chars, content
89
+
90
+ def __read_docx_file(file_path):
91
+ file_size = os.path.getsize(file_path)
92
+ doc = docx.Document(file_path)
93
+ content = "\n".join([para.text for para in doc.paragraphs])
94
+
95
+ words = content.split()
96
+ num_words = len(words)
97
+ num_chars = len(content)
98
+
99
+ return file_size, num_words, num_chars, content
100
+
101
+ def __read_pdf_file(file_path):
102
+ file_size = os.path.getsize(file_path)
103
+
104
+ with open(file_path, 'rb') as file:
105
+ reader = PyPDF2.PdfReader(file)
106
+ content = "".join([reader.pages[i].extract_text() for i in range(len(reader.pages))])
107
+
108
+ words = content.split()
109
+ num_words = len(words)
110
+ num_chars = len(content)
111
+
112
+ return file_size, num_words, num_chars, content
113
+
114
+ def __read_blog_from_url(url):
115
+ """
116
+ Extracts the text content from a given URL using the newspaper package.
117
+
118
+ Parameters:
119
+ url (str): The URL of the article to extract text from.
120
+
121
+ Returns:
122
+ str: The text content of the article if extraction is successful, None otherwise.
123
+ """
124
+ try:
125
+ article = newspaper.Article(url)
126
+ article.download()
127
+
128
+ if article.download_state == 2:
129
+ article.parse()
130
+ return article
131
+ else:
132
+ print(f"An error occurred while fetching the article")
133
+ return None
134
+ except newspaper.ArticleException as e:
135
+ print(f"An error occurred while fetching the article: {e}")
136
+ return None
137
+
138
+ def __read_youtube_video_transcript(video_url):
139
+ """
140
+ Fetches the transcript of a YouTube video given its URL.
141
+
142
+ Parameters:
143
+ video_url (str): The URL of the YouTube video.
144
+
145
+ Returns:
146
+ str: The transcript of the video if available, raises an error otherwise.
147
+ """
148
+ # Enhanced regex to handle different YouTube URL formats
149
+ match = re.search(r"(?:youtube\.com/watch\?v=|youtu\.be/)([\w-]+)", video_url)
150
+ if match:
151
+ video_id = match.group(1)
152
+ else:
153
+ raise ValueError("Invalid YouTube URL")
154
+
155
+ try:
156
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
157
+ transcript_text = " ".join([line["text"] for line in transcript])
158
+ return transcript_text
159
+ except Exception as e:
160
+ raise f"An error occurred while fetching the transcript: {e}"
@@ -0,0 +1,130 @@
1
+ import re
2
+ import json
3
+ from pydantic import BaseModel, ValidationError
4
+ from typing import get_type_hints
5
+ from pydantic import BaseModel
6
+ from typing import List, get_type_hints, Type
7
+
8
+
9
+
10
+ def convert_pydantic_to_json(model_instance):
11
+ """
12
+ Converts a Pydantic model instance to a JSON string.
13
+
14
+ Args:
15
+ model_instance (YourModel): An instance of your Pydantic model.
16
+
17
+ Returns:
18
+ str: A JSON string representation of the model.
19
+ """
20
+ return model_instance.model_dump_json()
21
+
22
+ def extract_json_from_text(text_response):
23
+ # This pattern matches a string that starts with '{' and ends with '}'
24
+ pattern = r'\{[^{}]*\}'
25
+
26
+ matches = re.finditer(pattern, text_response)
27
+ json_objects = []
28
+
29
+ for match in matches:
30
+ json_str = match.group(0)
31
+ try:
32
+ # Validate if the extracted string is valid JSON
33
+ json_obj = json.loads(json_str)
34
+ json_objects.append(json_obj)
35
+ except json.JSONDecodeError:
36
+ # Extend the search for nested structures
37
+ extended_json_str = extend_search(text_response, match.span())
38
+ try:
39
+ json_obj = json.loads(extended_json_str)
40
+ json_objects.append(json_obj)
41
+ except json.JSONDecodeError:
42
+ # Handle cases where the extraction is not valid JSON
43
+ continue
44
+
45
+ if json_objects:
46
+ return json_objects
47
+ else:
48
+ return None # Or handle this case as you prefer
49
+
50
+ def extend_search(text, span):
51
+ # Extend the search to try to capture nested structures
52
+ start, end = span
53
+ nest_count = 0
54
+ for i in range(start, len(text)):
55
+ if text[i] == '{':
56
+ nest_count += 1
57
+ elif text[i] == '}':
58
+ nest_count -= 1
59
+ if nest_count == 0:
60
+ return text[start:i+1]
61
+ return text[start:end]
62
+
63
+ def validate_json_with_pydantic_model(model_class, json_data):
64
+ """
65
+ Validates JSON data against a specified Pydantic model.
66
+
67
+ Args:
68
+ model_class (BaseModel): The Pydantic model class to validate against.
69
+ json_data (dict or list): JSON data to validate. Can be a dict for a single JSON object,
70
+ or a list for multiple JSON objects.
71
+
72
+ Returns:
73
+ list: A list of validated JSON objects that match the Pydantic model.
74
+ list: A list of errors for JSON objects that do not match the model.
75
+ """
76
+ validated_data = []
77
+ validation_errors = []
78
+
79
+ if isinstance(json_data, list):
80
+ for item in json_data:
81
+ try:
82
+ model_instance = model_class(**item)
83
+ validated_data.append(model_instance.dict())
84
+ except ValidationError as e:
85
+ validation_errors.append({"error": str(e), "data": item})
86
+ elif isinstance(json_data, dict):
87
+ try:
88
+ model_instance = model_class(**json_data)
89
+ validated_data.append(model_instance.dict())
90
+ except ValidationError as e:
91
+ validation_errors.append({"error": str(e), "data": json_data})
92
+ else:
93
+ raise ValueError("Invalid JSON data type. Expected dict or list.")
94
+
95
+ return validated_data, validation_errors
96
+
97
+ def convert_json_to_pydantic_model(model_class, json_data):
98
+ try:
99
+ model_instance = model_class(**json_data)
100
+ return model_instance
101
+ except ValidationError as e:
102
+ print("Validation error:", e)
103
+ return None
104
+
105
+ # Define a function to provide example values based on type
106
+ def example_value_for_type(field_type: Type):
107
+ if field_type == str:
108
+ return "example_string"
109
+ elif field_type == int:
110
+ return 0
111
+ elif field_type == float:
112
+ return 0.0
113
+ elif field_type == bool:
114
+ return True
115
+ elif field_type == List[str]:
116
+ return ["generated text 1", "generated text 2"]
117
+ elif field_type == List[int]:
118
+ return [1, 2, 3]
119
+ else:
120
+ return "Unsupported type"
121
+
122
+ # Function to generate a JSON example for any Pydantic model
123
+ def generate_json_example_from_pydantic(model_class: Type[BaseModel]) -> str:
124
+ example_data = {}
125
+ for field_name, field_type in get_type_hints(model_class).items():
126
+ example_data[field_name] = example_value_for_type(field_type)
127
+
128
+ model_instance = model_class(**example_data)
129
+ return model_instance.json()
130
+
@@ -0,0 +1,109 @@
1
+ from dotenv import load_dotenv
2
+ import os
3
+ import time
4
+ import requests
5
+ import aiohttp
6
+ import asyncio
7
+ from typing import Optional, Any, Dict
8
+
9
+ load_dotenv() # Load the environment variables
10
+
11
+ class RapidAPIClient:
12
+ def __init__(self, api_key: Optional[str] = None, timeout: int = 30):
13
+ """
14
+ Initialize the RapidAPI client.
15
+
16
+ :param api_key: Optional API key. If not provided, it will be read from the environment variable 'RAPID_API_KEY'.
17
+ :param timeout: Request timeout in seconds.
18
+ """
19
+ self.api_key = api_key if api_key else os.getenv('RAPIDAPI_API_KEY')
20
+ self.timeout = timeout
21
+
22
+ if not self.api_key:
23
+ raise ValueError("API key must be provided or set as an environment variable 'RAPID_API_KEY'")
24
+
25
+ def _construct_headers(self, api_url: str, headers_extra: Optional[Dict[str, str]] = None) -> Dict[str, str]:
26
+ """
27
+ Construct headers for the API call.
28
+
29
+ :param api_url: URL of the RapidAPI endpoint
30
+ :param headers_extra: Additional headers if required by the API
31
+ :return: Dictionary of headers
32
+ """
33
+ headers = {
34
+ 'x-rapidapi-key': self.api_key,
35
+ 'x-rapidapi-host': api_url.split('/')[2]
36
+ }
37
+
38
+ if headers_extra:
39
+ headers.update(headers_extra)
40
+
41
+ return headers
42
+
43
+ def _check_response(self, response: requests.Response) -> Any:
44
+ """
45
+ Check the response status and return the JSON data if successful.
46
+
47
+ :param response: Response object from requests library.
48
+ :return: JSON response from the API
49
+ """
50
+ if response.status_code in [200, 201, 202, 204]:
51
+ return response.json() if response.text else None
52
+ response.raise_for_status()
53
+
54
+ def call_api(self, api_url: str, method: str = 'GET', headers_extra: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, str]] = None, json: Optional[Dict[str, Any]] = None, max_retries: int = 3, backoff_factor: int = 2) -> Any:
55
+ """
56
+ Make a synchronous API call to a RapidAPI endpoint.
57
+
58
+ :param api_url: URL of the RapidAPI endpoint
59
+ :param method: HTTP method ('GET' or 'POST')
60
+ :param headers_extra: Additional headers if required by the API
61
+ :param params: Query parameters for GET request
62
+ :param data: Form data for POST request
63
+ :param json: JSON data for POST request
64
+ :param max_retries: Maximum number of retries
65
+ :param backoff_factor: Factor by which the delay increases during each retry
66
+ :return: JSON response from the API
67
+ """
68
+ headers = self._construct_headers(api_url, headers_extra)
69
+ retries = 0
70
+
71
+ while retries < max_retries:
72
+ try:
73
+ with requests.request(method, api_url, headers=headers, params=params, data=data, json=json, timeout=self.timeout) as response:
74
+ return self._check_response(response)
75
+ except requests.RequestException as e:
76
+ retries += 1
77
+ if retries >= max_retries:
78
+ raise e
79
+ time.sleep(backoff_factor ** retries)
80
+
81
+ async def call_api_async(self, api_url: str, method: str = 'GET', headers_extra: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, data: Optional[Dict[str, str]] = None, json: Optional[Dict[str, Any]] = None, max_retries: int = 3, backoff_factor: int = 2) -> Any:
82
+ """
83
+ Make an asynchronous API call to a RapidAPI endpoint.
84
+
85
+ :param api_url: URL of the RapidAPI endpoint
86
+ :param method: HTTP method ('GET' or 'POST')
87
+ :param headers_extra: Additional headers if required by the API
88
+ :param params: Query parameters for GET request
89
+ :param data: Form data for POST request
90
+ :param json: JSON data for POST request
91
+ :param max_retries: Maximum number of retries
92
+ :param backoff_factor: Factor by which the delay increases during each retry
93
+ :return: JSON response from the API
94
+ """
95
+ headers = self._construct_headers(api_url, headers_extra)
96
+
97
+ async with aiohttp.ClientSession() as session:
98
+ retries = 0
99
+ while retries < max_retries:
100
+ try:
101
+ async with session.request(method, api_url, headers=headers, params=params, data=data, json=json, timeout=self.timeout) as response:
102
+ if response.status in [200, 201, 202, 204]:
103
+ return await response.json() if response.text else None
104
+ response.raise_for_status()
105
+ except aiohttp.ClientError as e:
106
+ retries += 1
107
+ if retries >= max_retries:
108
+ raise e
109
+ await asyncio.sleep(backoff_factor ** retries)
@@ -0,0 +1,79 @@
1
+ from duckduckgo_search import DDGS, AsyncDDGS
2
+ from dotenv import load_dotenv
3
+ from urllib.parse import urlparse
4
+ from pydantic import BaseModel, HttpUrl
5
+ from typing import Optional, List
6
+
7
+ class SearchResult(BaseModel):
8
+ URL: HttpUrl
9
+ Domain: Optional[str] = None
10
+ Title: Optional[str] = None
11
+ Description: Optional[str] = None
12
+
13
+
14
+ def get_domain_from_url(url):
15
+ parsed_url = urlparse(url)
16
+ return parsed_url.netloc
17
+
18
+ # Load environment variables
19
+ load_dotenv()
20
+
21
+
22
+
23
+ async def search_with_duck_duck_go_async(query, max_results=50):
24
+ """
25
+ Perform an asynchronous search using the DuckDuckGo search engine.
26
+
27
+ Args:
28
+ query (str): The search query string.
29
+ max_results (int, optional): The maximum number of results to return. Defaults to 50.
30
+
31
+ Returns:
32
+ str: A JSON string containing the search results, each result being a dictionary with URL, Title, and Description.
33
+ """
34
+ async with AsyncDDGS() as ddgs:
35
+ results = []
36
+ async for r in ddgs.text(query, max_results=max_results):
37
+ results.append(r)
38
+ result_data = []
39
+ for result in results:
40
+ # Ensure all keys exist to avoid key errors
41
+ url = result.get("href", None)
42
+ title = result.get("title", None)
43
+ description = result.get("body", None)
44
+ if url:
45
+ domain = get_domain_from_url(url)
46
+ result_data.append(SearchResult(URL=url, Domain=domain, Title=title, Description=description))
47
+ else:
48
+ result_data.append(SearchResult(URL=url, Title=title, Description=description))
49
+
50
+ return result_data
51
+
52
+
53
+ def search_with_duck_duck_go(query: str, max_results: int = 10) -> List[SearchResult]:
54
+ """
55
+ Perform a synchronous search using the DuckDuckGo search engine.
56
+
57
+ Args:
58
+ query (str): The search query string.
59
+ max_results (int, optional): The maximum number of results to return. Defaults to 50.
60
+
61
+ Returns:
62
+ List[SearchResult]: A list of SearchResult objects, each containing URL, Title, and Description from the search results.
63
+ """
64
+ with DDGS() as ddgs:
65
+ results = [r for r in ddgs.text(query, max_results=max_results)]
66
+ result_data = []
67
+ for result in results:
68
+ url = result.get("href", None)
69
+ title = result.get("title", None)
70
+ description = result.get("body", None)
71
+ if url:
72
+ domain = get_domain_from_url(url)
73
+ result_data.append(SearchResult(URL=url, Domain=domain, Title=title, Description=description))
74
+ else:
75
+ result_data.append(SearchResult(URL=url, Title=title, Description=description))
76
+
77
+ return result_data
78
+
79
+