ai-parrot 0.8.3__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (128) hide show
  1. ai_parrot-0.8.3.dist-info/LICENSE +21 -0
  2. ai_parrot-0.8.3.dist-info/METADATA +306 -0
  3. ai_parrot-0.8.3.dist-info/RECORD +128 -0
  4. ai_parrot-0.8.3.dist-info/WHEEL +6 -0
  5. ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
  6. parrot/__init__.py +30 -0
  7. parrot/bots/__init__.py +5 -0
  8. parrot/bots/abstract.py +1115 -0
  9. parrot/bots/agent.py +492 -0
  10. parrot/bots/basic.py +9 -0
  11. parrot/bots/bose.py +17 -0
  12. parrot/bots/chatbot.py +271 -0
  13. parrot/bots/cody.py +17 -0
  14. parrot/bots/copilot.py +117 -0
  15. parrot/bots/data.py +730 -0
  16. parrot/bots/dataframe.py +103 -0
  17. parrot/bots/hrbot.py +15 -0
  18. parrot/bots/interfaces/__init__.py +1 -0
  19. parrot/bots/interfaces/retrievers.py +12 -0
  20. parrot/bots/notebook.py +619 -0
  21. parrot/bots/odoo.py +17 -0
  22. parrot/bots/prompts/__init__.py +41 -0
  23. parrot/bots/prompts/agents.py +91 -0
  24. parrot/bots/prompts/data.py +214 -0
  25. parrot/bots/retrievals/__init__.py +1 -0
  26. parrot/bots/retrievals/constitutional.py +19 -0
  27. parrot/bots/retrievals/multi.py +122 -0
  28. parrot/bots/retrievals/retrieval.py +610 -0
  29. parrot/bots/tools/__init__.py +7 -0
  30. parrot/bots/tools/eda.py +325 -0
  31. parrot/bots/tools/pdf.py +50 -0
  32. parrot/bots/tools/plot.py +48 -0
  33. parrot/bots/troc.py +16 -0
  34. parrot/conf.py +170 -0
  35. parrot/crew/__init__.py +3 -0
  36. parrot/crew/tools/__init__.py +22 -0
  37. parrot/crew/tools/bing.py +13 -0
  38. parrot/crew/tools/config.py +43 -0
  39. parrot/crew/tools/duckgo.py +62 -0
  40. parrot/crew/tools/file.py +24 -0
  41. parrot/crew/tools/google.py +168 -0
  42. parrot/crew/tools/gtrends.py +16 -0
  43. parrot/crew/tools/md2pdf.py +25 -0
  44. parrot/crew/tools/rag.py +42 -0
  45. parrot/crew/tools/search.py +32 -0
  46. parrot/crew/tools/url.py +21 -0
  47. parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
  48. parrot/handlers/__init__.py +4 -0
  49. parrot/handlers/agents.py +292 -0
  50. parrot/handlers/bots.py +196 -0
  51. parrot/handlers/chat.py +192 -0
  52. parrot/interfaces/__init__.py +6 -0
  53. parrot/interfaces/database.py +27 -0
  54. parrot/interfaces/http.py +805 -0
  55. parrot/interfaces/images/__init__.py +0 -0
  56. parrot/interfaces/images/plugins/__init__.py +18 -0
  57. parrot/interfaces/images/plugins/abstract.py +58 -0
  58. parrot/interfaces/images/plugins/exif.py +709 -0
  59. parrot/interfaces/images/plugins/hash.py +52 -0
  60. parrot/interfaces/images/plugins/vision.py +104 -0
  61. parrot/interfaces/images/plugins/yolo.py +66 -0
  62. parrot/interfaces/images/plugins/zerodetect.py +197 -0
  63. parrot/llms/__init__.py +1 -0
  64. parrot/llms/abstract.py +69 -0
  65. parrot/llms/anthropic.py +58 -0
  66. parrot/llms/gemma.py +15 -0
  67. parrot/llms/google.py +44 -0
  68. parrot/llms/groq.py +67 -0
  69. parrot/llms/hf.py +45 -0
  70. parrot/llms/openai.py +61 -0
  71. parrot/llms/pipes.py +114 -0
  72. parrot/llms/vertex.py +89 -0
  73. parrot/loaders/__init__.py +9 -0
  74. parrot/loaders/abstract.py +628 -0
  75. parrot/loaders/files/__init__.py +0 -0
  76. parrot/loaders/files/abstract.py +39 -0
  77. parrot/loaders/files/text.py +63 -0
  78. parrot/loaders/txt.py +26 -0
  79. parrot/manager.py +333 -0
  80. parrot/models.py +504 -0
  81. parrot/py.typed +0 -0
  82. parrot/stores/__init__.py +11 -0
  83. parrot/stores/abstract.py +248 -0
  84. parrot/stores/chroma.py +188 -0
  85. parrot/stores/duck.py +162 -0
  86. parrot/stores/embeddings/__init__.py +10 -0
  87. parrot/stores/embeddings/abstract.py +46 -0
  88. parrot/stores/embeddings/base.py +52 -0
  89. parrot/stores/embeddings/bge.py +20 -0
  90. parrot/stores/embeddings/fastembed.py +17 -0
  91. parrot/stores/embeddings/google.py +18 -0
  92. parrot/stores/embeddings/huggingface.py +20 -0
  93. parrot/stores/embeddings/ollama.py +14 -0
  94. parrot/stores/embeddings/openai.py +26 -0
  95. parrot/stores/embeddings/transformers.py +21 -0
  96. parrot/stores/embeddings/vertexai.py +17 -0
  97. parrot/stores/empty.py +10 -0
  98. parrot/stores/faiss.py +160 -0
  99. parrot/stores/milvus.py +397 -0
  100. parrot/stores/postgres.py +653 -0
  101. parrot/stores/qdrant.py +170 -0
  102. parrot/tools/__init__.py +23 -0
  103. parrot/tools/abstract.py +68 -0
  104. parrot/tools/asknews.py +33 -0
  105. parrot/tools/basic.py +51 -0
  106. parrot/tools/bby.py +359 -0
  107. parrot/tools/bing.py +13 -0
  108. parrot/tools/docx.py +343 -0
  109. parrot/tools/duck.py +62 -0
  110. parrot/tools/execute.py +56 -0
  111. parrot/tools/gamma.py +28 -0
  112. parrot/tools/google.py +170 -0
  113. parrot/tools/gvoice.py +301 -0
  114. parrot/tools/results.py +278 -0
  115. parrot/tools/stack.py +27 -0
  116. parrot/tools/weather.py +70 -0
  117. parrot/tools/wikipedia.py +58 -0
  118. parrot/tools/zipcode.py +198 -0
  119. parrot/utils/__init__.py +2 -0
  120. parrot/utils/parsers/__init__.py +5 -0
  121. parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
  122. parrot/utils/toml.py +11 -0
  123. parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
  124. parrot/utils/uv.py +11 -0
  125. parrot/version.py +10 -0
  126. resources/users/__init__.py +5 -0
  127. resources/users/handlers.py +13 -0
  128. resources/users/models.py +205 -0
@@ -0,0 +1,805 @@
1
+ from typing import Optional, Union, Dict, Any
2
+ from collections.abc import Callable
3
+ import random
4
+ import os
5
+ import asyncio
6
+ import ssl
7
+ from pathlib import Path
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from functools import partial
10
+ from io import BytesIO
11
+ from email.message import Message
12
+ from urllib import parse
13
+ from urllib.parse import quote, urlencode, urlparse
14
+ import urllib3
15
+ import aiofiles
16
+ # parsing:
17
+ from bs4 import BeautifulSoup as bs
18
+ from lxml import html, etree
19
+ # backoff retries:
20
+ import backoff
21
+ # aiohttp:
22
+ import aiohttp
23
+ from aiohttp import BasicAuth
24
+ # httpx
25
+ import httpx
26
+ # config:
27
+ from datamodel.typedefs import SafeDict
28
+ from datamodel.parsers.json import JSONContent, json_encoder # pylint: disable=E0611
29
+ from navconfig.logging import logging
30
+ from proxylists.proxies import (
31
+ FreeProxy,
32
+ Oxylabs
33
+ )
34
+ from ..conf import (
35
+ HTTPCLIENT_MAX_SEMAPHORE,
36
+ HTTPCLIENT_MAX_WORKERS,
37
+ GOOGLE_SEARCH_API_KEY,
38
+ GOOGLE_SEARCH_ENGINE_ID
39
+ )
40
+
41
+
42
+ logging.getLogger("urllib3").setLevel(logging.WARNING)
43
+ urllib3.disable_warnings()
44
+ logging.getLogger("httpx").setLevel(logging.WARNING)
45
+ logging.getLogger("httpcore").setLevel(logging.WARNING)
46
+ logging.getLogger("aiohttp").setLevel(logging.WARNING)
47
+ logging.getLogger("hpack").setLevel(logging.INFO)
48
+
49
+
50
+ ua = [
51
+ # Chrome - Desktop (Windows)
52
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
53
+ # Chrome - Desktop (Mac)
54
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", # noqa
55
+ # Safari - Desktop (Mac)
56
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", # noqa
57
+ # Firefox - Desktop (Windows)
58
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0",
59
+ # Edge - Desktop (Windows)
60
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46", # noqa
61
+ # Chrome - Mobile (Android)
62
+ "Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
63
+ # Safari - Mobile (iOS)
64
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", # noqa
65
+ # Samsung Internet - Mobile (Android)
66
+ "Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/21.0 Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
67
+ # Firefox - Mobile (Android)
68
+ "Mozilla/5.0 (Android 13; Mobile; rv:118.0) Gecko/118.0 Firefox/118.0",
69
+ # Opera - Desktop (Windows)
70
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 OPR/104.0.0.0" # noqa
71
+ # Firefox - Desktop (Linux)
72
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
73
+ # Chrome - Desktop (Linux)
74
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
75
+ # Other:
76
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # noqa
77
+ ] # noqa
78
+
79
+ mobile_ua = [
80
+ "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19", # noqa
81
+ 'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1', # noqa
82
+ 'Mozilla/5.0 (Linux; Android 9; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.119 Mobile Safari/537.36', # noqa
83
+ 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.93 Mobile Safari/537.36', # noqa
84
+ 'Mozilla/5.0 (Linux; Android 10; HUAWEI VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36', # noqa
85
+ 'Mozilla/5.0 (iPad; CPU OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Mobile/15E148 Safari/604.1', # noqa
86
+ ]
87
+
88
+ impersonates = (
89
+ "chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107",
90
+ "chrome_108", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118",
91
+ "chrome_119", "chrome_120", "chrome_123", "chrome_124", "chrome_126", "chrome_127",
92
+ "chrome_128", "chrome_129", "chrome_130", "chrome_131",
93
+ "safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_ios_18.1.1",
94
+ "safari_15.3", "safari_15.5", "safari_15.6.1", "safari_16", "safari_16.5",
95
+ "safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
96
+ "safari_18", "safari_18.2",
97
+ "safari_ipad_18",
98
+ "edge_101", "edge_122", "edge_127", "edge_131",
99
+ "firefox_109", "firefox_117", "firefox_128", "firefox_133",
100
+ ) # fmt: skip
101
+
102
+ impersonates_os = ("android", "ios", "linux", "macos", "windows")
103
+
104
+
105
+ accept_list = {
106
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", # noqa
107
+ "text/html",
108
+ "application/xhtml+xml",
109
+ "application/xml",
110
+ "image/webp",
111
+ "image/apng",
112
+ "*/*",
113
+ "application/signed-exchange",
114
+ "application/json",
115
+ }
116
+
117
+ valid_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
118
+
119
+
120
+ def bad_gateway_exception(exc):
121
+ """Check if the exception is a 502 Bad Gateway error."""
122
+ return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
123
+
124
+
125
+ class HTTPService:
126
+ """
127
+ Abstraction Class for working with HTTP Clients.
128
+
129
+ - aiohttp Client
130
+ - HTTPx
131
+ - Requests
132
+ """
133
+ def __init__(self, *args, **kwargs):
134
+ self.use_proxy: bool = kwargs.pop("use_proxy", False)
135
+ self._free_proxy: bool = kwargs.pop('free_proxy', False)
136
+ self._proxies: list = []
137
+ self.rotate_ua: bool = kwargs.pop("rotate_ua", True)
138
+ self.use_async: bool = bool(kwargs.pop("use_async", True))
139
+ self.google_api_key: str = kwargs.pop('google_api_key', GOOGLE_SEARCH_API_KEY)
140
+ self.google_cse: str = kwargs.pop('google_cse', GOOGLE_SEARCH_ENGINE_ID)
141
+ self.headers: dict = kwargs.get('headers', {})
142
+ self.accept: str = kwargs.get('accept', "application/json")
143
+ self.timeout: int = kwargs.get('timeout', 30)
144
+ self.use_streams: bool = kwargs.get('use_streams', True)
145
+ self.as_binary: bool = kwargs.get('as_binary', False)
146
+ self.no_errors: dict = kwargs.get('no_errors', {})
147
+ self._httpclient: Callable = kwargs.get('httpclient', None)
148
+ self._ua: str = ""
149
+ if self.rotate_ua is True:
150
+ self._ua = random.choice(ua)
151
+ else:
152
+ self._ua: str = ua[0]
153
+ self.headers = {
154
+ "Accept": self.accept,
155
+ "Accept-Encoding": "gzip, deflate",
156
+ "DNT": "1",
157
+ "Connection": "keep-alive",
158
+ "Upgrade-Insecure-Requests": "1",
159
+ "User-Agent": self._ua,
160
+ **self.headers,
161
+ }
162
+ # potentially cookies to be used by request.
163
+ self.cookies = kwargs.get('cookies', {})
164
+ self._encoder = JSONContent()
165
+ # other arguments:
166
+ self.arguments = kwargs
167
+ # Executor:
168
+ self._executor = ThreadPoolExecutor(
169
+ max_workers=int(HTTPCLIENT_MAX_WORKERS)
170
+ )
171
+ self._semaphore = asyncio.Semaphore(
172
+ int(HTTPCLIENT_MAX_SEMAPHORE)
173
+ )
174
+ # Authentication information:
175
+ self.auth: dict = {}
176
+ self.auth_type: str = None
177
+ self.token_type: str = "Bearer"
178
+ self._user, self._pwd = None, None
179
+ self.credentials: dict = kwargs.get('credentials', {})
180
+ if "apikey" in self.credentials:
181
+ self.auth_type = "api_key"
182
+ elif "username" in self.credentials:
183
+ self.auth_type = "basic"
184
+ self._user = self.credentials["username"]
185
+ self._pwd = self.credentials["password"]
186
+ elif "token" in self.credentials:
187
+ self.auth_type = "token"
188
+ elif "key" in self.credentials:
189
+ self.auth_type = "key"
190
+ elif "auth" in self.credentials:
191
+ self.auth_type = "auth"
192
+ # Debugging:
193
+ self._debug: bool = kwargs.pop('debug', False)
194
+ # Parser to be used:
195
+ self._default_parser: str = kwargs.pop('bs4_parser', 'html.parser')
196
+ # Logger:
197
+ self.logger = logging.getLogger(__name__)
198
+
199
+ async def get_proxies(self, session_time: float = 1):
200
+ """
201
+ Asynchronously retrieves a list of free proxies.
202
+ TODO: SELECT or rotate the free/paid proxies.
203
+ """
204
+ if self._free_proxy is True:
205
+ return await FreeProxy().get_list()
206
+ else:
207
+ return await Oxylabs(
208
+ session_time=session_time,
209
+ timeout=10
210
+ ).get_list()
211
+
212
+ async def refresh_proxies(self):
213
+ """
214
+ Asynchronously refreshes the list of proxies if proxy usage is enabled.
215
+ """
216
+ if self.use_proxy is True:
217
+ self._proxies = await self.get_proxies()
218
+
219
+ def build_url(
220
+ self,
221
+ url: str,
222
+ queryparams: Optional[str] = None,
223
+ args: Optional[dict] = None,
224
+ params: Optional[dict] = None
225
+ ) -> str:
226
+ """
227
+ Constructs a full URL with optional query parameters and arguments.
228
+
229
+ Args:
230
+ url (str): The base URL.
231
+ queryparams (Optional[str]): The query parameters to append to the URL.
232
+ args (Optional[dict]): Additional arguments to format into the URL.
233
+ params (Optional[dict]): Additional query parameters to append to the URL.
234
+
235
+ Returns:
236
+ str: The constructed URL.
237
+ """
238
+ if args:
239
+ url = str(url).format_map(SafeDict(**args))
240
+ if queryparams is not None:
241
+ if "?" in url:
242
+ url += "&" + queryparams
243
+ else:
244
+ url += "?" + queryparams
245
+ if params is not None:
246
+ if "?" in url:
247
+ url += "&" + urlencode(params)
248
+ else:
249
+ url = f"{url}?{urlencode(params)}"
250
+ self.logger.debug(
251
+ f"URL: {url}"
252
+ )
253
+ return url
254
+
255
+ def extract_host(self, url):
256
+ """
257
+ Extracts the host from a URL.
258
+ """
259
+ parsed_url = urlparse(url)
260
+ return parsed_url.netloc
261
+
262
+ @backoff.on_exception(
263
+ backoff.expo,
264
+ (httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
265
+ max_tries=3,
266
+ max_time=120,
267
+ jitter=backoff.full_jitter,
268
+ on_backoff=lambda details: logging.warning(
269
+ f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
270
+ ),
271
+ giveup=lambda e: isinstance(e, httpx.HTTPStatusError) and e.response.status_code not in [429, 500, 502, 503, 504] # pylint: disable=C0301
272
+ )
273
+ async def _request(
274
+ self,
275
+ url: str,
276
+ method: str = 'get',
277
+ cookies: Optional[httpx.Cookies] = None,
278
+ params: Optional[Dict[str, Any]] = None,
279
+ data: Optional[Dict[str, Any]] = None,
280
+ headers: Optional[Dict[str, str]] = None,
281
+ timeout: Union[int, float] = 30.0,
282
+ use_proxy: bool = True,
283
+ free_proxy: bool = False,
284
+ use_ssl: bool = True,
285
+ use_json: bool = False,
286
+ follow_redirects: bool = True,
287
+ raise_for_status: bool = True,
288
+ full_response: bool = False,
289
+ connect_timeout: Union[int, float] = 5.0,
290
+ read_timeout: Union[int, float] = 20.0,
291
+ write_timeout: Union[int, float] = 5.0,
292
+ pool_timeout: Union[int, float] = 20.0,
293
+ num_retries: int = 2,
294
+ **kwargs
295
+ ) -> Dict[str, Any]:
296
+ """
297
+ Make an asynchronous HTTPx request, returning the response object.
298
+
299
+ Args:
300
+ url (str): The URL to send the request to.
301
+ method (str): The HTTP method to use (default: 'get').
302
+ headers (dict, optional): Dictionary of HTTP headers to include in the request.
303
+ cookies (httpx.Cookies, optional): Cookies to include in the request.
304
+ params (dict, optional): Dictionary of query parameters to include in the URL.
305
+ data (dict, optional): Dictionary of data to send in the request body.
306
+ timeout (float, optional): Total timeout for the request in seconds.
307
+ use_proxy (bool): Whether to use a proxy for the request.
308
+ free_proxy (bool): Whether to use a free proxy.
309
+ use_ssl (bool): Whether to use SSL for the request.
310
+ use_json (bool): Whether to send data as JSON.
311
+ follow_redirects (bool): Whether to follow redirects.
312
+ raise_for_status (bool): Whether to raise an exception for HTTP errors.
313
+ full_response (bool): Whether to return the full response object.
314
+ connect_timeout (float): Timeout for connecting to the server.
315
+ read_timeout (float): Timeout for reading from the server.
316
+ write_timeout (float): Timeout for writing to the server.
317
+ pool_timeout (float): Timeout for connection pool operations.
318
+ num_retries (int): Number of retries to attempt at the transport level.
319
+ **kwargs: Additional arguments to pass to httpx.AsyncClient.
320
+
321
+ Returns:
322
+ Tuple[Any, Optional[Dict[str, Any]]]: A tuple containing the result and any error information.
323
+
324
+ Raises:
325
+ httpx.TimeoutException: When the request times out.
326
+ httpx.TooManyRedirects: When too many redirects are encountered.
327
+ httpx.HTTPStatusError: When an HTTP error status is encountered.
328
+ httpx.HTTPError: When an HTTP-related error occurs.
329
+ AttributeError: When the HTTP method is invalid.
330
+ RuntimeError: When an unknown error occurs.
331
+ """
332
+ proxy_config = None
333
+ if use_proxy is True:
334
+ self._free_proxy = free_proxy
335
+ proxies = await self.get_proxies()
336
+ if not proxies and use_proxy:
337
+ self.logger.warning(
338
+ "No proxies available despite use_proxy=True"
339
+ )
340
+ proxy = proxies[0] if proxies else None # Ensure there's at least one proxy
341
+ proxy_config = {
342
+ "http://": f"http://{proxy}" if proxy else None,
343
+ "https://": f"http://{proxy}" if proxy else None # Using the same proxy for HTTPS
344
+ }
345
+
346
+ # Remove proxies with None values
347
+ proxy_config = {k: v for k, v in proxy_config.items() if v is not None}
348
+
349
+ ssl_context = None
350
+ if use_ssl:
351
+ # Define custom SSL context
352
+ ssl_context = ssl.create_default_context()
353
+ # Disable older protocols if needed
354
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
355
+ # Ensure at least TLS 1.2 is used
356
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
357
+ # Make this configurable rather than hardcoded to CERT_NONE
358
+ if kwargs.get('verify_ssl', True):
359
+ ssl_context.check_hostname = True
360
+ ssl_context.verify_mode = ssl.CERT_REQUIRED
361
+ else:
362
+ ssl_context.check_hostname = False
363
+ ssl_context.verify_mode = ssl.CERT_NONE
364
+
365
+ # Use AsyncHTTPTransport to pass in SSL context explicitly
366
+ transport_options = {
367
+ 'retries': num_retries,
368
+ 'verify': ssl_context
369
+ }
370
+ if 'transport_options' in kwargs:
371
+ transport_options.update(kwargs.pop('transport_options'))
372
+ transport = httpx.AsyncHTTPTransport(
373
+ **transport_options
374
+ )
375
+ timeout = httpx.Timeout(
376
+ timeout=timeout,
377
+ connect=connect_timeout,
378
+ read=read_timeout,
379
+ write=write_timeout,
380
+ pool=pool_timeout
381
+ )
382
+ method = method.upper()
383
+ if method not in valid_methods:
384
+ raise ValueError(
385
+ f"Invalid HTTP method: {method}. Must be one of {valid_methods}"
386
+ )
387
+ async with httpx.AsyncClient(
388
+ cookies=cookies,
389
+ transport=transport,
390
+ headers=headers,
391
+ proxy=proxy_config or None,
392
+ timeout=timeout,
393
+ # http2=kwargs.pop('use_http2', True),
394
+ follow_redirects=follow_redirects,
395
+ **kwargs
396
+ ) as client:
397
+ try:
398
+ args = {
399
+ "method": method.upper(),
400
+ "url": url,
401
+ "follow_redirects": follow_redirects
402
+ }
403
+ if data:
404
+ if use_json:
405
+ args["json"] = data
406
+ else:
407
+ args["data"] = data
408
+ if params:
409
+ args["params"] = params
410
+ if self._httpclient:
411
+ # keep session alive.
412
+ response = await client.request(
413
+ **args
414
+ )
415
+ else:
416
+ response = await client.request(**args)
417
+ if raise_for_status:
418
+ response.raise_for_status()
419
+ if full_response:
420
+ if self.logger.isEnabledFor(logging.DEBUG):
421
+ self.logger.debug(
422
+ f"Response from {url}: status={response.status_code}, headers={response.headers}"
423
+ )
424
+ return response, None
425
+ result, error = await self.process_response(
426
+ response,
427
+ url,
428
+ download=kwargs.get('download', False),
429
+ filename=kwargs.get('filename', None)
430
+ )
431
+ return result, error
432
+ except httpx.TimeoutException:
433
+ self.logger.error("Request timed out.")
434
+ raise
435
+ except httpx.TooManyRedirects:
436
+ self.logger.error("Too many redirects.")
437
+ raise
438
+ except httpx.HTTPStatusError as ex:
439
+ self.logger.error(
440
+ f"HTTP status error occurred: {ex.response.status_code} - {ex}"
441
+ )
442
+ raise
443
+ except httpx.HTTPError as ex:
444
+ self.logger.error(f"HTTP error occurred: {ex}")
445
+ raise
446
+ except AttributeError as e:
447
+ self.logger.error(f"HTTPx Client doesn't have attribute {method}: {e}")
448
+ raise
449
+ except Exception as exc:
450
+ self.logger.error(f'Unknown Error > {exc}')
451
+ raise RuntimeError(
452
+ f"An error occurred: {exc}"
453
+ ) from exc
454
+
455
+ @backoff.on_exception(
456
+ backoff.expo, # Use exponential backoff
457
+ (aiohttp.ClientError, # Retry on network-related errors
458
+ aiohttp.ServerTimeoutError, # Retry on timeouts
459
+ aiohttp.ClientResponseError), # Retry on certain HTTP errors
460
+ max_tries=3, # Maximum number of retries
461
+ max_time=60, # Maximum total time to try (in seconds)
462
+ jitter=backoff.full_jitter, # Use full jitter to randomize retry intervals
463
+ giveup=lambda e: isinstance(e, aiohttp.ClientResponseError) and e.status not in [429, 500, 502, 503, 504] # pylint: disable=C0301
464
+ )
465
+ async def async_request(
466
+ self,
467
+ url,
468
+ method: str = 'GET',
469
+ data: dict = None,
470
+ use_ssl: bool = False,
471
+ use_json: bool = False,
472
+ use_proxy: bool = False,
473
+ accept: Optional[str] = None,
474
+ download: bool = False,
475
+ full_response: bool = False
476
+ ):
477
+ """
478
+ Asynchronously sends an HTTP request using aiohttp.
479
+
480
+ :param url: The URL to send the request to.
481
+ :param method: The HTTP method to use (e.g., 'GET', 'POST').
482
+ :param data: The data to send in the request body.
483
+ :param use_json: Whether to send the data as JSON.
484
+ :param use_proxy: force proxy usage.
485
+ :param accept: The accept header to use.
486
+ :param download: Whether to download the response as a file.
487
+ :param full_response: Whether to return the full response object or result processed.
488
+ :return: A tuple containing the result and any error information.
489
+ """
490
+ result = []
491
+ error = {}
492
+ auth = None
493
+ proxy = None
494
+ ssl_context = None
495
+
496
+ if use_proxy is True:
497
+ self._proxies = await self.get_proxies()
498
+ if self._proxies:
499
+ proxy = random.choice(self._proxies)
500
+ if self.credentials:
501
+ if "apikey" in self.auth:
502
+ self.headers[
503
+ "Authorization"
504
+ ] = f"{self.token_type} {self.auth['apikey']}"
505
+ elif self.auth_type == "api_key":
506
+ self.headers = {**self.headers, **self.credentials}
507
+ elif self.auth_type == "key":
508
+ url = self.build_url(
509
+ url,
510
+ args=self.arguments,
511
+ queryparams=urlencode(self.credentials)
512
+ )
513
+ elif self.auth_type in ["basic", "auth", "user"]:
514
+ auth = BasicAuth(
515
+ self.credentials["username"],
516
+ self.credentials["password"]
517
+ )
518
+ elif "apikey" in self.auth:
519
+ self.headers["Authorization"] = f"{self.token_type} {self.auth['apikey']}"
520
+ elif self.auth:
521
+ token_type, token = list(self.auth.items())[0]
522
+ self.headers["Authorization"] = f"{token_type} {token}"
523
+ elif self._user and self.auth_type == "basic":
524
+ auth = BasicAuth(self._user, self._pwd)
525
+ self.logger.debug(
526
+ f"HTTP: Connecting to {url} using {method}",
527
+ level="DEBUG"
528
+ )
529
+ if auth is not None:
530
+ args = {"auth": auth}
531
+ else:
532
+ args = {}
533
+ if use_ssl:
534
+ ssl_context = ssl.create_default_context()
535
+ # Disable older protocols if needed
536
+ ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
537
+ # Ensure at least TLS 1.2 is used
538
+ ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
539
+ ssl_context.check_hostname = False
540
+ ssl_context.verify_mode = ssl.CERT_NONE
541
+ args['ssl'] = True
542
+ args['ssl_context'] = ssl_context
543
+ if accept is not None:
544
+ self.headers["Accept"] = accept
545
+ else:
546
+ self.headers["Accept"] = self.accept
547
+ if download is True:
548
+ self.headers["Accept"] = "application/octet-stream"
549
+ self.headers["Content-Type"] = "application/octet-stream"
550
+ if hasattr(self, "use_streams"):
551
+ self.headers["Transfer-Encoding"] = "chunked"
552
+ args["stream"] = True
553
+ timeout = aiohttp.ClientTimeout(total=self.timeout)
554
+ async with aiohttp.ClientSession(
555
+ headers=self.headers,
556
+ timeout=timeout,
557
+ auth=auth,
558
+ json_serialize=json_encoder,
559
+ ) as session:
560
+ try:
561
+ if use_json is True:
562
+ async with session.request(
563
+ method.upper(), url, json=data, proxy=proxy, **args
564
+ ) as response:
565
+ if full_response is True:
566
+ return full_response, None
567
+ result, error = await self.process_response(response, url, download=download)
568
+ else:
569
+ async with session.request(
570
+ method.upper(), url, data=data, proxy=proxy, **args
571
+ ) as response:
572
+ if full_response is True:
573
+ return full_response, None
574
+ # Process the response
575
+ result, error = await self.process_response(response, url, download=download)
576
+ except aiohttp.ClientError as e:
577
+ error = str(e)
578
+ return (result, error)
579
+
580
+ async def evaluate_error(
581
+ self, response: Union[str, list], message: Union[str, list, dict]
582
+ ) -> tuple:
583
+ """evaluate_response.
584
+
585
+ Check Response status and available payloads.
586
+ Args:
587
+ response (_type_): _description_
588
+ url (str): _description_
589
+
590
+ Returns:
591
+ tuple: _description_
592
+ """
593
+ if isinstance(response, list):
594
+ # a list of potential errors:
595
+ for msg in response:
596
+ if message in msg:
597
+ return True
598
+ if isinstance(response, dict) and "errors" in response:
599
+ errors = response["errors"]
600
+ if isinstance(errors, list):
601
+ for error in errors:
602
+ try:
603
+ if message in error:
604
+ return True
605
+ except TypeError:
606
+ if message == error:
607
+ return True
608
+ else:
609
+ if message == errors:
610
+ return True
611
+ else:
612
+ if message in response:
613
+ return True
614
+ return False
615
+
616
+ def response_status(self, response):
617
+ if hasattr(response, 'status_code'):
618
+ return response.status_code
619
+ return response.status
620
+
621
+ async def response_json(self, response):
622
+ if asyncio.iscoroutinefunction(response.json):
623
+ return await response.json()
624
+ return response.json()
625
+
626
+ async def response_read(self, response):
627
+ if hasattr(response, 'aread'):
628
+ return await response.aread()
629
+ return await response.read()
630
+
631
+ async def response_text(self, response):
632
+ if asyncio.iscoroutinefunction(response.text):
633
+ return await response.text()
634
+ return response.text
635
+
636
+ async def response_reason(self, response):
637
+ # Attempt to retrieve `reason`, `reason_phrase`, or fallback to an empty string
638
+ reason = getattr(response, 'reason', getattr(response, 'reason_phrase', b''))
639
+ return f"{reason!s}"
640
+
641
+ async def process_response(
642
+ self,
643
+ response, url: str,
644
+ download: bool = False,
645
+ filename: Optional[str] = None
646
+ ) -> tuple:
647
+ """
648
+ Processes the response from an HTTP request.
649
+
650
+ :param response: The response object from aiohttp.
651
+ :param url: The URL that was requested.
652
+ :param download: Whether to download the response as a file.
653
+ :param filename: The filename to use for downloading the response.
654
+ :return: A tuple containing the processed result and any error information.
655
+ """
656
+ error = None
657
+ result = None
658
+ # Process the response
659
+ status = self.response_status(response)
660
+
661
+ if status >= 400:
662
+ # Evaluate response body and headers.
663
+ print(" == ERROR Headers == ")
664
+ print(f"{response.headers}")
665
+ content_type = response.headers.get("Content-Type", "").lower()
666
+ if "application/json" in content_type:
667
+ message = await self.response_json(response)
668
+ elif "text/" in content_type:
669
+ message = await self.response_text(response)
670
+ elif "X-Error" in response.headers:
671
+ message = response.headers["X-Error"]
672
+ else:
673
+ # Fallback to a unified read method for the raw body content
674
+ message = await self.response_read(response)
675
+
676
+ # Combine response headers and body for enriched logging
677
+ error_context = {
678
+ "status": status,
679
+ "reason": await self.response_reason(response),
680
+ "headers": response.headers,
681
+ "body": message
682
+ }
683
+
684
+ # Log the detailed error context
685
+ self.logger.error(
686
+ f"Error: {error_context}"
687
+ )
688
+
689
+ # Additional error handling or custom evaluation based on status
690
+ if self.no_errors:
691
+ for key, msg in self.no_errors.items():
692
+ if int(key) == status and await self.evaluate_error(message, msg):
693
+ return response, status
694
+
695
+ # Raise an exception if error handling does not continue
696
+ raise ConnectionError(
697
+ f"HTTP Error {status}: {message!s}"
698
+ )
699
+ else:
700
+ if download is True:
701
+ if not filename:
702
+ filename = os.path.basename(url)
703
+ # Get the filename from the response headers, if available
704
+ content_disposition = response.headers.get("content-disposition")
705
+ if content_disposition:
706
+ msg = Message()
707
+ msg["Content-Disposition"] = response.headers.get("content-disposition")
708
+ filename = msg.get_param("filename", header="Content-Disposition")
709
+ utf8_filename = msg.get_param("filename*", header="Content-Disposition")
710
+ if utf8_filename:
711
+ _, utf8_filename = utf8_filename.split("''", 1)
712
+ filename = parse.unquote(utf8_filename)
713
+ if "{filename}" in str(filename):
714
+ filename = str(filename).format_map(
715
+ SafeDict(filename=filename)
716
+ )
717
+ if "{" in str(filename):
718
+ filename = str(filename).format_map(
719
+ SafeDict(**self.arguments)
720
+ )
721
+ if isinstance(filename, str):
722
+ filename = Path(filename)
723
+ # Saving File in Directory:
724
+ total_length = response.headers.get("Content-Length")
725
+ self.logger.info(
726
+ f"HTTPClient: Saving File {filename}, size: {total_length}"
727
+ )
728
+ pathname = filename.parent.absolute()
729
+ if not pathname.exists():
730
+ # Create a new directory
731
+ pathname.mkdir(parents=True, exist_ok=True)
732
+ transfer = response.headers.get("transfer-encoding", None)
733
+ if transfer is None:
734
+ chunk_size = int(total_length)
735
+ else:
736
+ chunk_size = 8192
737
+ # Asynchronous file writing
738
+ if filename.exists() and filename.is_file():
739
+ self.logger.warning(
740
+ f"HTTPClient: File Already exists: {filename}"
741
+ )
742
+ # Filename already exists
743
+ result = filename
744
+ return result, error
745
+ if self.use_streams is True:
746
+ async with aiofiles.open(filename, "wb") as file:
747
+ async for chunk in response.content.iter_chunked(chunk_size):
748
+ await file.write(chunk)
749
+ else:
750
+ with open(filename, "wb") as fp:
751
+ try:
752
+ fp.write(await self.response_read(response))
753
+ except Exception:
754
+ pass
755
+ self.logger.debug(
756
+ f"Filename Saved Successfully: {filename}"
757
+ )
758
+ result = filename
759
+ else:
760
+ if self.accept == 'application/octet-stream':
761
+ data = await self.response_read(response)
762
+ buffer = BytesIO(data)
763
+ buffer.seek(0)
764
+ result = buffer
765
+ elif self.accept in ('text/html'):
766
+ result = await self.response_read(response)
767
+ try:
768
+ # html parser for lxml
769
+ self._parser = html.fromstring(result)
770
+ # BeautifulSoup parser
771
+ self._bs = bs(response.text, self._default_parser)
772
+ result = self._bs
773
+ except Exception as e:
774
+ error = e
775
+ elif self.accept in ('application/xhtml+xml', 'application/xml'):
776
+ result = await self.response_read(response)
777
+ try:
778
+ self._parser = etree.fromstring(result) # pylint: disable=I1101
779
+ except etree.XMLSyntaxError: # pylint: disable=I1101
780
+ self._parser = html.fromstring(result)
781
+ except Exception as e:
782
+ error = e
783
+ elif self.accept == "application/json":
784
+ try:
785
+ result = await self.response_json(response)
786
+ except Exception as e:
787
+ logging.error(e)
788
+ # is not an json, try first with beautiful soup:
789
+ try:
790
+ self._bs = bs(
791
+ await self.response_text(response),
792
+ self._default_parser
793
+ )
794
+ result = self._bs
795
+ except Exception:
796
+ error = e
797
+ elif self.as_binary is True:
798
+ result = await self.response_read(
799
+ response
800
+ )
801
+ else:
802
+ result = await self.response_text(
803
+ response
804
+ )
805
+ return result, error