ai-parrot 0.8.3__cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
|
@@ -0,0 +1,805 @@
|
|
|
1
|
+
from typing import Optional, Union, Dict, Any
|
|
2
|
+
from collections.abc import Callable
|
|
3
|
+
import random
|
|
4
|
+
import os
|
|
5
|
+
import asyncio
|
|
6
|
+
import ssl
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
from functools import partial
|
|
10
|
+
from io import BytesIO
|
|
11
|
+
from email.message import Message
|
|
12
|
+
from urllib import parse
|
|
13
|
+
from urllib.parse import quote, urlencode, urlparse
|
|
14
|
+
import urllib3
|
|
15
|
+
import aiofiles
|
|
16
|
+
# parsing:
|
|
17
|
+
from bs4 import BeautifulSoup as bs
|
|
18
|
+
from lxml import html, etree
|
|
19
|
+
# backoff retries:
|
|
20
|
+
import backoff
|
|
21
|
+
# aiohttp:
|
|
22
|
+
import aiohttp
|
|
23
|
+
from aiohttp import BasicAuth
|
|
24
|
+
# httpx
|
|
25
|
+
import httpx
|
|
26
|
+
# config:
|
|
27
|
+
from datamodel.typedefs import SafeDict
|
|
28
|
+
from datamodel.parsers.json import JSONContent, json_encoder # pylint: disable=E0611
|
|
29
|
+
from navconfig.logging import logging
|
|
30
|
+
from proxylists.proxies import (
|
|
31
|
+
FreeProxy,
|
|
32
|
+
Oxylabs
|
|
33
|
+
)
|
|
34
|
+
from ..conf import (
|
|
35
|
+
HTTPCLIENT_MAX_SEMAPHORE,
|
|
36
|
+
HTTPCLIENT_MAX_WORKERS,
|
|
37
|
+
GOOGLE_SEARCH_API_KEY,
|
|
38
|
+
GOOGLE_SEARCH_ENGINE_ID
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
|
43
|
+
urllib3.disable_warnings()
|
|
44
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
45
|
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
|
46
|
+
logging.getLogger("aiohttp").setLevel(logging.WARNING)
|
|
47
|
+
logging.getLogger("hpack").setLevel(logging.INFO)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
ua = [
|
|
51
|
+
# Chrome - Desktop (Windows)
|
|
52
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
|
|
53
|
+
# Chrome - Desktop (Mac)
|
|
54
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36", # noqa
|
|
55
|
+
# Safari - Desktop (Mac)
|
|
56
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15", # noqa
|
|
57
|
+
# Firefox - Desktop (Windows)
|
|
58
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/118.0",
|
|
59
|
+
# Edge - Desktop (Windows)
|
|
60
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46", # noqa
|
|
61
|
+
# Chrome - Mobile (Android)
|
|
62
|
+
"Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
|
|
63
|
+
# Safari - Mobile (iOS)
|
|
64
|
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", # noqa
|
|
65
|
+
# Samsung Internet - Mobile (Android)
|
|
66
|
+
"Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/21.0 Chrome/118.0.0.0 Mobile Safari/537.36", # noqa
|
|
67
|
+
# Firefox - Mobile (Android)
|
|
68
|
+
"Mozilla/5.0 (Android 13; Mobile; rv:118.0) Gecko/118.0 Firefox/118.0",
|
|
69
|
+
# Opera - Desktop (Windows)
|
|
70
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 OPR/104.0.0.0" # noqa
|
|
71
|
+
# Firefox - Desktop (Linux)
|
|
72
|
+
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0",
|
|
73
|
+
# Chrome - Desktop (Linux)
|
|
74
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36",
|
|
75
|
+
# Other:
|
|
76
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", # noqa
|
|
77
|
+
] # noqa
|
|
78
|
+
|
|
79
|
+
mobile_ua = [
|
|
80
|
+
"Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19", # noqa
|
|
81
|
+
'Mozilla/5.0 (iPhone; CPU iPhone OS 12_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1', # noqa
|
|
82
|
+
'Mozilla/5.0 (Linux; Android 9; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.119 Mobile Safari/537.36', # noqa
|
|
83
|
+
'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.93 Mobile Safari/537.36', # noqa
|
|
84
|
+
'Mozilla/5.0 (Linux; Android 10; HUAWEI VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36', # noqa
|
|
85
|
+
'Mozilla/5.0 (iPad; CPU OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0 Mobile/15E148 Safari/604.1', # noqa
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
impersonates = (
|
|
89
|
+
"chrome_100", "chrome_101", "chrome_104", "chrome_105", "chrome_106", "chrome_107",
|
|
90
|
+
"chrome_108", "chrome_109", "chrome_114", "chrome_116", "chrome_117", "chrome_118",
|
|
91
|
+
"chrome_119", "chrome_120", "chrome_123", "chrome_124", "chrome_126", "chrome_127",
|
|
92
|
+
"chrome_128", "chrome_129", "chrome_130", "chrome_131",
|
|
93
|
+
"safari_ios_16.5", "safari_ios_17.2", "safari_ios_17.4.1", "safari_ios_18.1.1",
|
|
94
|
+
"safari_15.3", "safari_15.5", "safari_15.6.1", "safari_16", "safari_16.5",
|
|
95
|
+
"safari_17.0", "safari_17.2.1", "safari_17.4.1", "safari_17.5",
|
|
96
|
+
"safari_18", "safari_18.2",
|
|
97
|
+
"safari_ipad_18",
|
|
98
|
+
"edge_101", "edge_122", "edge_127", "edge_131",
|
|
99
|
+
"firefox_109", "firefox_117", "firefox_128", "firefox_133",
|
|
100
|
+
) # fmt: skip
|
|
101
|
+
|
|
102
|
+
impersonates_os = ("android", "ios", "linux", "macos", "windows")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
accept_list = {
|
|
106
|
+
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", # noqa
|
|
107
|
+
"text/html",
|
|
108
|
+
"application/xhtml+xml",
|
|
109
|
+
"application/xml",
|
|
110
|
+
"image/webp",
|
|
111
|
+
"image/apng",
|
|
112
|
+
"*/*",
|
|
113
|
+
"application/signed-exchange",
|
|
114
|
+
"application/json",
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
valid_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def bad_gateway_exception(exc):
|
|
121
|
+
"""Check if the exception is a 502 Bad Gateway error."""
|
|
122
|
+
return isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code == 502
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class HTTPService:
|
|
126
|
+
"""
|
|
127
|
+
Abstraction Class for working with HTTP Clients.
|
|
128
|
+
|
|
129
|
+
- aiohttp Client
|
|
130
|
+
- HTTPx
|
|
131
|
+
- Requests
|
|
132
|
+
"""
|
|
133
|
+
def __init__(self, *args, **kwargs):
|
|
134
|
+
self.use_proxy: bool = kwargs.pop("use_proxy", False)
|
|
135
|
+
self._free_proxy: bool = kwargs.pop('free_proxy', False)
|
|
136
|
+
self._proxies: list = []
|
|
137
|
+
self.rotate_ua: bool = kwargs.pop("rotate_ua", True)
|
|
138
|
+
self.use_async: bool = bool(kwargs.pop("use_async", True))
|
|
139
|
+
self.google_api_key: str = kwargs.pop('google_api_key', GOOGLE_SEARCH_API_KEY)
|
|
140
|
+
self.google_cse: str = kwargs.pop('google_cse', GOOGLE_SEARCH_ENGINE_ID)
|
|
141
|
+
self.headers: dict = kwargs.get('headers', {})
|
|
142
|
+
self.accept: str = kwargs.get('accept', "application/json")
|
|
143
|
+
self.timeout: int = kwargs.get('timeout', 30)
|
|
144
|
+
self.use_streams: bool = kwargs.get('use_streams', True)
|
|
145
|
+
self.as_binary: bool = kwargs.get('as_binary', False)
|
|
146
|
+
self.no_errors: dict = kwargs.get('no_errors', {})
|
|
147
|
+
self._httpclient: Callable = kwargs.get('httpclient', None)
|
|
148
|
+
self._ua: str = ""
|
|
149
|
+
if self.rotate_ua is True:
|
|
150
|
+
self._ua = random.choice(ua)
|
|
151
|
+
else:
|
|
152
|
+
self._ua: str = ua[0]
|
|
153
|
+
self.headers = {
|
|
154
|
+
"Accept": self.accept,
|
|
155
|
+
"Accept-Encoding": "gzip, deflate",
|
|
156
|
+
"DNT": "1",
|
|
157
|
+
"Connection": "keep-alive",
|
|
158
|
+
"Upgrade-Insecure-Requests": "1",
|
|
159
|
+
"User-Agent": self._ua,
|
|
160
|
+
**self.headers,
|
|
161
|
+
}
|
|
162
|
+
# potentially cookies to be used by request.
|
|
163
|
+
self.cookies = kwargs.get('cookies', {})
|
|
164
|
+
self._encoder = JSONContent()
|
|
165
|
+
# other arguments:
|
|
166
|
+
self.arguments = kwargs
|
|
167
|
+
# Executor:
|
|
168
|
+
self._executor = ThreadPoolExecutor(
|
|
169
|
+
max_workers=int(HTTPCLIENT_MAX_WORKERS)
|
|
170
|
+
)
|
|
171
|
+
self._semaphore = asyncio.Semaphore(
|
|
172
|
+
int(HTTPCLIENT_MAX_SEMAPHORE)
|
|
173
|
+
)
|
|
174
|
+
# Authentication information:
|
|
175
|
+
self.auth: dict = {}
|
|
176
|
+
self.auth_type: str = None
|
|
177
|
+
self.token_type: str = "Bearer"
|
|
178
|
+
self._user, self._pwd = None, None
|
|
179
|
+
self.credentials: dict = kwargs.get('credentials', {})
|
|
180
|
+
if "apikey" in self.credentials:
|
|
181
|
+
self.auth_type = "api_key"
|
|
182
|
+
elif "username" in self.credentials:
|
|
183
|
+
self.auth_type = "basic"
|
|
184
|
+
self._user = self.credentials["username"]
|
|
185
|
+
self._pwd = self.credentials["password"]
|
|
186
|
+
elif "token" in self.credentials:
|
|
187
|
+
self.auth_type = "token"
|
|
188
|
+
elif "key" in self.credentials:
|
|
189
|
+
self.auth_type = "key"
|
|
190
|
+
elif "auth" in self.credentials:
|
|
191
|
+
self.auth_type = "auth"
|
|
192
|
+
# Debugging:
|
|
193
|
+
self._debug: bool = kwargs.pop('debug', False)
|
|
194
|
+
# Parser to be used:
|
|
195
|
+
self._default_parser: str = kwargs.pop('bs4_parser', 'html.parser')
|
|
196
|
+
# Logger:
|
|
197
|
+
self.logger = logging.getLogger(__name__)
|
|
198
|
+
|
|
199
|
+
async def get_proxies(self, session_time: float = 1):
|
|
200
|
+
"""
|
|
201
|
+
Asynchronously retrieves a list of free proxies.
|
|
202
|
+
TODO: SELECT or rotate the free/paid proxies.
|
|
203
|
+
"""
|
|
204
|
+
if self._free_proxy is True:
|
|
205
|
+
return await FreeProxy().get_list()
|
|
206
|
+
else:
|
|
207
|
+
return await Oxylabs(
|
|
208
|
+
session_time=session_time,
|
|
209
|
+
timeout=10
|
|
210
|
+
).get_list()
|
|
211
|
+
|
|
212
|
+
async def refresh_proxies(self):
|
|
213
|
+
"""
|
|
214
|
+
Asynchronously refreshes the list of proxies if proxy usage is enabled.
|
|
215
|
+
"""
|
|
216
|
+
if self.use_proxy is True:
|
|
217
|
+
self._proxies = await self.get_proxies()
|
|
218
|
+
|
|
219
|
+
def build_url(
|
|
220
|
+
self,
|
|
221
|
+
url: str,
|
|
222
|
+
queryparams: Optional[str] = None,
|
|
223
|
+
args: Optional[dict] = None,
|
|
224
|
+
params: Optional[dict] = None
|
|
225
|
+
) -> str:
|
|
226
|
+
"""
|
|
227
|
+
Constructs a full URL with optional query parameters and arguments.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
url (str): The base URL.
|
|
231
|
+
queryparams (Optional[str]): The query parameters to append to the URL.
|
|
232
|
+
args (Optional[dict]): Additional arguments to format into the URL.
|
|
233
|
+
params (Optional[dict]): Additional query parameters to append to the URL.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
str: The constructed URL.
|
|
237
|
+
"""
|
|
238
|
+
if args:
|
|
239
|
+
url = str(url).format_map(SafeDict(**args))
|
|
240
|
+
if queryparams is not None:
|
|
241
|
+
if "?" in url:
|
|
242
|
+
url += "&" + queryparams
|
|
243
|
+
else:
|
|
244
|
+
url += "?" + queryparams
|
|
245
|
+
if params is not None:
|
|
246
|
+
if "?" in url:
|
|
247
|
+
url += "&" + urlencode(params)
|
|
248
|
+
else:
|
|
249
|
+
url = f"{url}?{urlencode(params)}"
|
|
250
|
+
self.logger.debug(
|
|
251
|
+
f"URL: {url}"
|
|
252
|
+
)
|
|
253
|
+
return url
|
|
254
|
+
|
|
255
|
+
def extract_host(self, url):
|
|
256
|
+
"""
|
|
257
|
+
Extracts the host from a URL.
|
|
258
|
+
"""
|
|
259
|
+
parsed_url = urlparse(url)
|
|
260
|
+
return parsed_url.netloc
|
|
261
|
+
|
|
262
|
+
@backoff.on_exception(
|
|
263
|
+
backoff.expo,
|
|
264
|
+
(httpx.HTTPStatusError, httpx.TimeoutException), # Catch HTTP errors and timeouts
|
|
265
|
+
max_tries=3,
|
|
266
|
+
max_time=120,
|
|
267
|
+
jitter=backoff.full_jitter,
|
|
268
|
+
on_backoff=lambda details: logging.warning(
|
|
269
|
+
f"Retrying HTTP Get: attempt {details['tries']} after {details['wait']:0.2f}s"
|
|
270
|
+
),
|
|
271
|
+
giveup=lambda e: isinstance(e, httpx.HTTPStatusError) and e.response.status_code not in [429, 500, 502, 503, 504] # pylint: disable=C0301
|
|
272
|
+
)
|
|
273
|
+
async def _request(
|
|
274
|
+
self,
|
|
275
|
+
url: str,
|
|
276
|
+
method: str = 'get',
|
|
277
|
+
cookies: Optional[httpx.Cookies] = None,
|
|
278
|
+
params: Optional[Dict[str, Any]] = None,
|
|
279
|
+
data: Optional[Dict[str, Any]] = None,
|
|
280
|
+
headers: Optional[Dict[str, str]] = None,
|
|
281
|
+
timeout: Union[int, float] = 30.0,
|
|
282
|
+
use_proxy: bool = True,
|
|
283
|
+
free_proxy: bool = False,
|
|
284
|
+
use_ssl: bool = True,
|
|
285
|
+
use_json: bool = False,
|
|
286
|
+
follow_redirects: bool = True,
|
|
287
|
+
raise_for_status: bool = True,
|
|
288
|
+
full_response: bool = False,
|
|
289
|
+
connect_timeout: Union[int, float] = 5.0,
|
|
290
|
+
read_timeout: Union[int, float] = 20.0,
|
|
291
|
+
write_timeout: Union[int, float] = 5.0,
|
|
292
|
+
pool_timeout: Union[int, float] = 20.0,
|
|
293
|
+
num_retries: int = 2,
|
|
294
|
+
**kwargs
|
|
295
|
+
) -> Dict[str, Any]:
|
|
296
|
+
"""
|
|
297
|
+
Make an asynchronous HTTPx request, returning the response object.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
url (str): The URL to send the request to.
|
|
301
|
+
method (str): The HTTP method to use (default: 'get').
|
|
302
|
+
headers (dict, optional): Dictionary of HTTP headers to include in the request.
|
|
303
|
+
cookies (httpx.Cookies, optional): Cookies to include in the request.
|
|
304
|
+
params (dict, optional): Dictionary of query parameters to include in the URL.
|
|
305
|
+
data (dict, optional): Dictionary of data to send in the request body.
|
|
306
|
+
timeout (float, optional): Total timeout for the request in seconds.
|
|
307
|
+
use_proxy (bool): Whether to use a proxy for the request.
|
|
308
|
+
free_proxy (bool): Whether to use a free proxy.
|
|
309
|
+
use_ssl (bool): Whether to use SSL for the request.
|
|
310
|
+
use_json (bool): Whether to send data as JSON.
|
|
311
|
+
follow_redirects (bool): Whether to follow redirects.
|
|
312
|
+
raise_for_status (bool): Whether to raise an exception for HTTP errors.
|
|
313
|
+
full_response (bool): Whether to return the full response object.
|
|
314
|
+
connect_timeout (float): Timeout for connecting to the server.
|
|
315
|
+
read_timeout (float): Timeout for reading from the server.
|
|
316
|
+
write_timeout (float): Timeout for writing to the server.
|
|
317
|
+
pool_timeout (float): Timeout for connection pool operations.
|
|
318
|
+
num_retries (int): Number of retries to attempt at the transport level.
|
|
319
|
+
**kwargs: Additional arguments to pass to httpx.AsyncClient.
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Tuple[Any, Optional[Dict[str, Any]]]: A tuple containing the result and any error information.
|
|
323
|
+
|
|
324
|
+
Raises:
|
|
325
|
+
httpx.TimeoutException: When the request times out.
|
|
326
|
+
httpx.TooManyRedirects: When too many redirects are encountered.
|
|
327
|
+
httpx.HTTPStatusError: When an HTTP error status is encountered.
|
|
328
|
+
httpx.HTTPError: When an HTTP-related error occurs.
|
|
329
|
+
AttributeError: When the HTTP method is invalid.
|
|
330
|
+
RuntimeError: When an unknown error occurs.
|
|
331
|
+
"""
|
|
332
|
+
proxy_config = None
|
|
333
|
+
if use_proxy is True:
|
|
334
|
+
self._free_proxy = free_proxy
|
|
335
|
+
proxies = await self.get_proxies()
|
|
336
|
+
if not proxies and use_proxy:
|
|
337
|
+
self.logger.warning(
|
|
338
|
+
"No proxies available despite use_proxy=True"
|
|
339
|
+
)
|
|
340
|
+
proxy = proxies[0] if proxies else None # Ensure there's at least one proxy
|
|
341
|
+
proxy_config = {
|
|
342
|
+
"http://": f"http://{proxy}" if proxy else None,
|
|
343
|
+
"https://": f"http://{proxy}" if proxy else None # Using the same proxy for HTTPS
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
# Remove proxies with None values
|
|
347
|
+
proxy_config = {k: v for k, v in proxy_config.items() if v is not None}
|
|
348
|
+
|
|
349
|
+
ssl_context = None
|
|
350
|
+
if use_ssl:
|
|
351
|
+
# Define custom SSL context
|
|
352
|
+
ssl_context = ssl.create_default_context()
|
|
353
|
+
# Disable older protocols if needed
|
|
354
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
|
355
|
+
# Ensure at least TLS 1.2 is used
|
|
356
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
|
357
|
+
# Make this configurable rather than hardcoded to CERT_NONE
|
|
358
|
+
if kwargs.get('verify_ssl', True):
|
|
359
|
+
ssl_context.check_hostname = True
|
|
360
|
+
ssl_context.verify_mode = ssl.CERT_REQUIRED
|
|
361
|
+
else:
|
|
362
|
+
ssl_context.check_hostname = False
|
|
363
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
|
364
|
+
|
|
365
|
+
# Use AsyncHTTPTransport to pass in SSL context explicitly
|
|
366
|
+
transport_options = {
|
|
367
|
+
'retries': num_retries,
|
|
368
|
+
'verify': ssl_context
|
|
369
|
+
}
|
|
370
|
+
if 'transport_options' in kwargs:
|
|
371
|
+
transport_options.update(kwargs.pop('transport_options'))
|
|
372
|
+
transport = httpx.AsyncHTTPTransport(
|
|
373
|
+
**transport_options
|
|
374
|
+
)
|
|
375
|
+
timeout = httpx.Timeout(
|
|
376
|
+
timeout=timeout,
|
|
377
|
+
connect=connect_timeout,
|
|
378
|
+
read=read_timeout,
|
|
379
|
+
write=write_timeout,
|
|
380
|
+
pool=pool_timeout
|
|
381
|
+
)
|
|
382
|
+
method = method.upper()
|
|
383
|
+
if method not in valid_methods:
|
|
384
|
+
raise ValueError(
|
|
385
|
+
f"Invalid HTTP method: {method}. Must be one of {valid_methods}"
|
|
386
|
+
)
|
|
387
|
+
async with httpx.AsyncClient(
|
|
388
|
+
cookies=cookies,
|
|
389
|
+
transport=transport,
|
|
390
|
+
headers=headers,
|
|
391
|
+
proxy=proxy_config or None,
|
|
392
|
+
timeout=timeout,
|
|
393
|
+
# http2=kwargs.pop('use_http2', True),
|
|
394
|
+
follow_redirects=follow_redirects,
|
|
395
|
+
**kwargs
|
|
396
|
+
) as client:
|
|
397
|
+
try:
|
|
398
|
+
args = {
|
|
399
|
+
"method": method.upper(),
|
|
400
|
+
"url": url,
|
|
401
|
+
"follow_redirects": follow_redirects
|
|
402
|
+
}
|
|
403
|
+
if data:
|
|
404
|
+
if use_json:
|
|
405
|
+
args["json"] = data
|
|
406
|
+
else:
|
|
407
|
+
args["data"] = data
|
|
408
|
+
if params:
|
|
409
|
+
args["params"] = params
|
|
410
|
+
if self._httpclient:
|
|
411
|
+
# keep session alive.
|
|
412
|
+
response = await client.request(
|
|
413
|
+
**args
|
|
414
|
+
)
|
|
415
|
+
else:
|
|
416
|
+
response = await client.request(**args)
|
|
417
|
+
if raise_for_status:
|
|
418
|
+
response.raise_for_status()
|
|
419
|
+
if full_response:
|
|
420
|
+
if self.logger.isEnabledFor(logging.DEBUG):
|
|
421
|
+
self.logger.debug(
|
|
422
|
+
f"Response from {url}: status={response.status_code}, headers={response.headers}"
|
|
423
|
+
)
|
|
424
|
+
return response, None
|
|
425
|
+
result, error = await self.process_response(
|
|
426
|
+
response,
|
|
427
|
+
url,
|
|
428
|
+
download=kwargs.get('download', False),
|
|
429
|
+
filename=kwargs.get('filename', None)
|
|
430
|
+
)
|
|
431
|
+
return result, error
|
|
432
|
+
except httpx.TimeoutException:
|
|
433
|
+
self.logger.error("Request timed out.")
|
|
434
|
+
raise
|
|
435
|
+
except httpx.TooManyRedirects:
|
|
436
|
+
self.logger.error("Too many redirects.")
|
|
437
|
+
raise
|
|
438
|
+
except httpx.HTTPStatusError as ex:
|
|
439
|
+
self.logger.error(
|
|
440
|
+
f"HTTP status error occurred: {ex.response.status_code} - {ex}"
|
|
441
|
+
)
|
|
442
|
+
raise
|
|
443
|
+
except httpx.HTTPError as ex:
|
|
444
|
+
self.logger.error(f"HTTP error occurred: {ex}")
|
|
445
|
+
raise
|
|
446
|
+
except AttributeError as e:
|
|
447
|
+
self.logger.error(f"HTTPx Client doesn't have attribute {method}: {e}")
|
|
448
|
+
raise
|
|
449
|
+
except Exception as exc:
|
|
450
|
+
self.logger.error(f'Unknown Error > {exc}')
|
|
451
|
+
raise RuntimeError(
|
|
452
|
+
f"An error occurred: {exc}"
|
|
453
|
+
) from exc
|
|
454
|
+
|
|
455
|
+
@backoff.on_exception(
|
|
456
|
+
backoff.expo, # Use exponential backoff
|
|
457
|
+
(aiohttp.ClientError, # Retry on network-related errors
|
|
458
|
+
aiohttp.ServerTimeoutError, # Retry on timeouts
|
|
459
|
+
aiohttp.ClientResponseError), # Retry on certain HTTP errors
|
|
460
|
+
max_tries=3, # Maximum number of retries
|
|
461
|
+
max_time=60, # Maximum total time to try (in seconds)
|
|
462
|
+
jitter=backoff.full_jitter, # Use full jitter to randomize retry intervals
|
|
463
|
+
giveup=lambda e: isinstance(e, aiohttp.ClientResponseError) and e.status not in [429, 500, 502, 503, 504] # pylint: disable=C0301
|
|
464
|
+
)
|
|
465
|
+
async def async_request(
|
|
466
|
+
self,
|
|
467
|
+
url,
|
|
468
|
+
method: str = 'GET',
|
|
469
|
+
data: dict = None,
|
|
470
|
+
use_ssl: bool = False,
|
|
471
|
+
use_json: bool = False,
|
|
472
|
+
use_proxy: bool = False,
|
|
473
|
+
accept: Optional[str] = None,
|
|
474
|
+
download: bool = False,
|
|
475
|
+
full_response: bool = False
|
|
476
|
+
):
|
|
477
|
+
"""
|
|
478
|
+
Asynchronously sends an HTTP request using aiohttp.
|
|
479
|
+
|
|
480
|
+
:param url: The URL to send the request to.
|
|
481
|
+
:param method: The HTTP method to use (e.g., 'GET', 'POST').
|
|
482
|
+
:param data: The data to send in the request body.
|
|
483
|
+
:param use_json: Whether to send the data as JSON.
|
|
484
|
+
:param use_proxy: force proxy usage.
|
|
485
|
+
:param accept: The accept header to use.
|
|
486
|
+
:param download: Whether to download the response as a file.
|
|
487
|
+
:param full_response: Whether to return the full response object or result processed.
|
|
488
|
+
:return: A tuple containing the result and any error information.
|
|
489
|
+
"""
|
|
490
|
+
result = []
|
|
491
|
+
error = {}
|
|
492
|
+
auth = None
|
|
493
|
+
proxy = None
|
|
494
|
+
ssl_context = None
|
|
495
|
+
|
|
496
|
+
if use_proxy is True:
|
|
497
|
+
self._proxies = await self.get_proxies()
|
|
498
|
+
if self._proxies:
|
|
499
|
+
proxy = random.choice(self._proxies)
|
|
500
|
+
if self.credentials:
|
|
501
|
+
if "apikey" in self.auth:
|
|
502
|
+
self.headers[
|
|
503
|
+
"Authorization"
|
|
504
|
+
] = f"{self.token_type} {self.auth['apikey']}"
|
|
505
|
+
elif self.auth_type == "api_key":
|
|
506
|
+
self.headers = {**self.headers, **self.credentials}
|
|
507
|
+
elif self.auth_type == "key":
|
|
508
|
+
url = self.build_url(
|
|
509
|
+
url,
|
|
510
|
+
args=self.arguments,
|
|
511
|
+
queryparams=urlencode(self.credentials)
|
|
512
|
+
)
|
|
513
|
+
elif self.auth_type in ["basic", "auth", "user"]:
|
|
514
|
+
auth = BasicAuth(
|
|
515
|
+
self.credentials["username"],
|
|
516
|
+
self.credentials["password"]
|
|
517
|
+
)
|
|
518
|
+
elif "apikey" in self.auth:
|
|
519
|
+
self.headers["Authorization"] = f"{self.token_type} {self.auth['apikey']}"
|
|
520
|
+
elif self.auth:
|
|
521
|
+
token_type, token = list(self.auth.items())[0]
|
|
522
|
+
self.headers["Authorization"] = f"{token_type} {token}"
|
|
523
|
+
elif self._user and self.auth_type == "basic":
|
|
524
|
+
auth = BasicAuth(self._user, self._pwd)
|
|
525
|
+
self.logger.debug(
|
|
526
|
+
f"HTTP: Connecting to {url} using {method}",
|
|
527
|
+
level="DEBUG"
|
|
528
|
+
)
|
|
529
|
+
if auth is not None:
|
|
530
|
+
args = {"auth": auth}
|
|
531
|
+
else:
|
|
532
|
+
args = {}
|
|
533
|
+
if use_ssl:
|
|
534
|
+
ssl_context = ssl.create_default_context()
|
|
535
|
+
# Disable older protocols if needed
|
|
536
|
+
ssl_context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1
|
|
537
|
+
# Ensure at least TLS 1.2 is used
|
|
538
|
+
ssl_context.minimum_version = ssl.TLSVersion.TLSv1_2
|
|
539
|
+
ssl_context.check_hostname = False
|
|
540
|
+
ssl_context.verify_mode = ssl.CERT_NONE
|
|
541
|
+
args['ssl'] = True
|
|
542
|
+
args['ssl_context'] = ssl_context
|
|
543
|
+
if accept is not None:
|
|
544
|
+
self.headers["Accept"] = accept
|
|
545
|
+
else:
|
|
546
|
+
self.headers["Accept"] = self.accept
|
|
547
|
+
if download is True:
|
|
548
|
+
self.headers["Accept"] = "application/octet-stream"
|
|
549
|
+
self.headers["Content-Type"] = "application/octet-stream"
|
|
550
|
+
if hasattr(self, "use_streams"):
|
|
551
|
+
self.headers["Transfer-Encoding"] = "chunked"
|
|
552
|
+
args["stream"] = True
|
|
553
|
+
timeout = aiohttp.ClientTimeout(total=self.timeout)
|
|
554
|
+
async with aiohttp.ClientSession(
|
|
555
|
+
headers=self.headers,
|
|
556
|
+
timeout=timeout,
|
|
557
|
+
auth=auth,
|
|
558
|
+
json_serialize=json_encoder,
|
|
559
|
+
) as session:
|
|
560
|
+
try:
|
|
561
|
+
if use_json is True:
|
|
562
|
+
async with session.request(
|
|
563
|
+
method.upper(), url, json=data, proxy=proxy, **args
|
|
564
|
+
) as response:
|
|
565
|
+
if full_response is True:
|
|
566
|
+
return full_response, None
|
|
567
|
+
result, error = await self.process_response(response, url, download=download)
|
|
568
|
+
else:
|
|
569
|
+
async with session.request(
|
|
570
|
+
method.upper(), url, data=data, proxy=proxy, **args
|
|
571
|
+
) as response:
|
|
572
|
+
if full_response is True:
|
|
573
|
+
return full_response, None
|
|
574
|
+
# Process the response
|
|
575
|
+
result, error = await self.process_response(response, url, download=download)
|
|
576
|
+
except aiohttp.ClientError as e:
|
|
577
|
+
error = str(e)
|
|
578
|
+
return (result, error)
|
|
579
|
+
|
|
580
|
+
async def evaluate_error(
|
|
581
|
+
self, response: Union[str, list], message: Union[str, list, dict]
|
|
582
|
+
) -> tuple:
|
|
583
|
+
"""evaluate_response.
|
|
584
|
+
|
|
585
|
+
Check Response status and available payloads.
|
|
586
|
+
Args:
|
|
587
|
+
response (_type_): _description_
|
|
588
|
+
url (str): _description_
|
|
589
|
+
|
|
590
|
+
Returns:
|
|
591
|
+
tuple: _description_
|
|
592
|
+
"""
|
|
593
|
+
if isinstance(response, list):
|
|
594
|
+
# a list of potential errors:
|
|
595
|
+
for msg in response:
|
|
596
|
+
if message in msg:
|
|
597
|
+
return True
|
|
598
|
+
if isinstance(response, dict) and "errors" in response:
|
|
599
|
+
errors = response["errors"]
|
|
600
|
+
if isinstance(errors, list):
|
|
601
|
+
for error in errors:
|
|
602
|
+
try:
|
|
603
|
+
if message in error:
|
|
604
|
+
return True
|
|
605
|
+
except TypeError:
|
|
606
|
+
if message == error:
|
|
607
|
+
return True
|
|
608
|
+
else:
|
|
609
|
+
if message == errors:
|
|
610
|
+
return True
|
|
611
|
+
else:
|
|
612
|
+
if message in response:
|
|
613
|
+
return True
|
|
614
|
+
return False
|
|
615
|
+
|
|
616
|
+
def response_status(self, response):
|
|
617
|
+
if hasattr(response, 'status_code'):
|
|
618
|
+
return response.status_code
|
|
619
|
+
return response.status
|
|
620
|
+
|
|
621
|
+
async def response_json(self, response):
|
|
622
|
+
if asyncio.iscoroutinefunction(response.json):
|
|
623
|
+
return await response.json()
|
|
624
|
+
return response.json()
|
|
625
|
+
|
|
626
|
+
async def response_read(self, response):
|
|
627
|
+
if hasattr(response, 'aread'):
|
|
628
|
+
return await response.aread()
|
|
629
|
+
return await response.read()
|
|
630
|
+
|
|
631
|
+
async def response_text(self, response):
|
|
632
|
+
if asyncio.iscoroutinefunction(response.text):
|
|
633
|
+
return await response.text()
|
|
634
|
+
return response.text
|
|
635
|
+
|
|
636
|
+
async def response_reason(self, response):
|
|
637
|
+
# Attempt to retrieve `reason`, `reason_phrase`, or fallback to an empty string
|
|
638
|
+
reason = getattr(response, 'reason', getattr(response, 'reason_phrase', b''))
|
|
639
|
+
return f"{reason!s}"
|
|
640
|
+
|
|
641
|
+
async def process_response(
|
|
642
|
+
self,
|
|
643
|
+
response, url: str,
|
|
644
|
+
download: bool = False,
|
|
645
|
+
filename: Optional[str] = None
|
|
646
|
+
) -> tuple:
|
|
647
|
+
"""
|
|
648
|
+
Processes the response from an HTTP request.
|
|
649
|
+
|
|
650
|
+
:param response: The response object from aiohttp.
|
|
651
|
+
:param url: The URL that was requested.
|
|
652
|
+
:param download: Whether to download the response as a file.
|
|
653
|
+
:param filename: The filename to use for downloading the response.
|
|
654
|
+
:return: A tuple containing the processed result and any error information.
|
|
655
|
+
"""
|
|
656
|
+
error = None
|
|
657
|
+
result = None
|
|
658
|
+
# Process the response
|
|
659
|
+
status = self.response_status(response)
|
|
660
|
+
|
|
661
|
+
if status >= 400:
|
|
662
|
+
# Evaluate response body and headers.
|
|
663
|
+
print(" == ERROR Headers == ")
|
|
664
|
+
print(f"{response.headers}")
|
|
665
|
+
content_type = response.headers.get("Content-Type", "").lower()
|
|
666
|
+
if "application/json" in content_type:
|
|
667
|
+
message = await self.response_json(response)
|
|
668
|
+
elif "text/" in content_type:
|
|
669
|
+
message = await self.response_text(response)
|
|
670
|
+
elif "X-Error" in response.headers:
|
|
671
|
+
message = response.headers["X-Error"]
|
|
672
|
+
else:
|
|
673
|
+
# Fallback to a unified read method for the raw body content
|
|
674
|
+
message = await self.response_read(response)
|
|
675
|
+
|
|
676
|
+
# Combine response headers and body for enriched logging
|
|
677
|
+
error_context = {
|
|
678
|
+
"status": status,
|
|
679
|
+
"reason": await self.response_reason(response),
|
|
680
|
+
"headers": response.headers,
|
|
681
|
+
"body": message
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
# Log the detailed error context
|
|
685
|
+
self.logger.error(
|
|
686
|
+
f"Error: {error_context}"
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
# Additional error handling or custom evaluation based on status
|
|
690
|
+
if self.no_errors:
|
|
691
|
+
for key, msg in self.no_errors.items():
|
|
692
|
+
if int(key) == status and await self.evaluate_error(message, msg):
|
|
693
|
+
return response, status
|
|
694
|
+
|
|
695
|
+
# Raise an exception if error handling does not continue
|
|
696
|
+
raise ConnectionError(
|
|
697
|
+
f"HTTP Error {status}: {message!s}"
|
|
698
|
+
)
|
|
699
|
+
else:
|
|
700
|
+
if download is True:
|
|
701
|
+
if not filename:
|
|
702
|
+
filename = os.path.basename(url)
|
|
703
|
+
# Get the filename from the response headers, if available
|
|
704
|
+
content_disposition = response.headers.get("content-disposition")
|
|
705
|
+
if content_disposition:
|
|
706
|
+
msg = Message()
|
|
707
|
+
msg["Content-Disposition"] = response.headers.get("content-disposition")
|
|
708
|
+
filename = msg.get_param("filename", header="Content-Disposition")
|
|
709
|
+
utf8_filename = msg.get_param("filename*", header="Content-Disposition")
|
|
710
|
+
if utf8_filename:
|
|
711
|
+
_, utf8_filename = utf8_filename.split("''", 1)
|
|
712
|
+
filename = parse.unquote(utf8_filename)
|
|
713
|
+
if "{filename}" in str(filename):
|
|
714
|
+
filename = str(filename).format_map(
|
|
715
|
+
SafeDict(filename=filename)
|
|
716
|
+
)
|
|
717
|
+
if "{" in str(filename):
|
|
718
|
+
filename = str(filename).format_map(
|
|
719
|
+
SafeDict(**self.arguments)
|
|
720
|
+
)
|
|
721
|
+
if isinstance(filename, str):
|
|
722
|
+
filename = Path(filename)
|
|
723
|
+
# Saving File in Directory:
|
|
724
|
+
total_length = response.headers.get("Content-Length")
|
|
725
|
+
self.logger.info(
|
|
726
|
+
f"HTTPClient: Saving File {filename}, size: {total_length}"
|
|
727
|
+
)
|
|
728
|
+
pathname = filename.parent.absolute()
|
|
729
|
+
if not pathname.exists():
|
|
730
|
+
# Create a new directory
|
|
731
|
+
pathname.mkdir(parents=True, exist_ok=True)
|
|
732
|
+
transfer = response.headers.get("transfer-encoding", None)
|
|
733
|
+
if transfer is None:
|
|
734
|
+
chunk_size = int(total_length)
|
|
735
|
+
else:
|
|
736
|
+
chunk_size = 8192
|
|
737
|
+
# Asynchronous file writing
|
|
738
|
+
if filename.exists() and filename.is_file():
|
|
739
|
+
self.logger.warning(
|
|
740
|
+
f"HTTPClient: File Already exists: {filename}"
|
|
741
|
+
)
|
|
742
|
+
# Filename already exists
|
|
743
|
+
result = filename
|
|
744
|
+
return result, error
|
|
745
|
+
if self.use_streams is True:
|
|
746
|
+
async with aiofiles.open(filename, "wb") as file:
|
|
747
|
+
async for chunk in response.content.iter_chunked(chunk_size):
|
|
748
|
+
await file.write(chunk)
|
|
749
|
+
else:
|
|
750
|
+
with open(filename, "wb") as fp:
|
|
751
|
+
try:
|
|
752
|
+
fp.write(await self.response_read(response))
|
|
753
|
+
except Exception:
|
|
754
|
+
pass
|
|
755
|
+
self.logger.debug(
|
|
756
|
+
f"Filename Saved Successfully: {filename}"
|
|
757
|
+
)
|
|
758
|
+
result = filename
|
|
759
|
+
else:
|
|
760
|
+
if self.accept == 'application/octet-stream':
|
|
761
|
+
data = await self.response_read(response)
|
|
762
|
+
buffer = BytesIO(data)
|
|
763
|
+
buffer.seek(0)
|
|
764
|
+
result = buffer
|
|
765
|
+
elif self.accept in ('text/html'):
|
|
766
|
+
result = await self.response_read(response)
|
|
767
|
+
try:
|
|
768
|
+
# html parser for lxml
|
|
769
|
+
self._parser = html.fromstring(result)
|
|
770
|
+
# BeautifulSoup parser
|
|
771
|
+
self._bs = bs(response.text, self._default_parser)
|
|
772
|
+
result = self._bs
|
|
773
|
+
except Exception as e:
|
|
774
|
+
error = e
|
|
775
|
+
elif self.accept in ('application/xhtml+xml', 'application/xml'):
|
|
776
|
+
result = await self.response_read(response)
|
|
777
|
+
try:
|
|
778
|
+
self._parser = etree.fromstring(result) # pylint: disable=I1101
|
|
779
|
+
except etree.XMLSyntaxError: # pylint: disable=I1101
|
|
780
|
+
self._parser = html.fromstring(result)
|
|
781
|
+
except Exception as e:
|
|
782
|
+
error = e
|
|
783
|
+
elif self.accept == "application/json":
|
|
784
|
+
try:
|
|
785
|
+
result = await self.response_json(response)
|
|
786
|
+
except Exception as e:
|
|
787
|
+
logging.error(e)
|
|
788
|
+
# is not an json, try first with beautiful soup:
|
|
789
|
+
try:
|
|
790
|
+
self._bs = bs(
|
|
791
|
+
await self.response_text(response),
|
|
792
|
+
self._default_parser
|
|
793
|
+
)
|
|
794
|
+
result = self._bs
|
|
795
|
+
except Exception:
|
|
796
|
+
error = e
|
|
797
|
+
elif self.as_binary is True:
|
|
798
|
+
result = await self.response_read(
|
|
799
|
+
response
|
|
800
|
+
)
|
|
801
|
+
else:
|
|
802
|
+
result = await self.response_text(
|
|
803
|
+
response
|
|
804
|
+
)
|
|
805
|
+
return result, error
|