abstract-webtools 0.1.6.138__py3-none-any.whl → 0.1.6.140__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/managers/__init__.py +1 -0
- abstract_webtools/managers/linkManager/linkManager.py +46 -24
- abstract_webtools/managers/middleManager/__init__.py +1 -0
- abstract_webtools/managers/middleManager/imports.py +18 -0
- abstract_webtools/managers/middleManager/src/UnifiedWebManager.py +135 -0
- abstract_webtools/managers/middleManager/src/__init__.py +2 -0
- abstract_webtools/managers/middleManager/src/legacy_tools.py +8 -0
- abstract_webtools/managers/requestManager/requestManager.py +225 -181
- abstract_webtools/managers/soupManager/soupManager.py +10 -10
- abstract_webtools/managers/urlManager/urlManager.py +20 -28
- {abstract_webtools-0.1.6.138.dist-info → abstract_webtools-0.1.6.140.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.6.138.dist-info → abstract_webtools-0.1.6.140.dist-info}/RECORD +14 -9
- {abstract_webtools-0.1.6.138.dist-info → abstract_webtools-0.1.6.140.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.6.138.dist-info → abstract_webtools-0.1.6.140.dist-info}/top_level.txt +0 -0
@@ -2,18 +2,7 @@ from ...abstract_webtools import *
|
|
2
2
|
from ..urlManager import *
|
3
3
|
from ..requestManager import *
|
4
4
|
from ..soupManager import *
|
5
|
-
|
6
|
-
if not url and not url_mgr:
|
7
|
-
return None
|
8
|
-
if url:
|
9
|
-
url_mgr = urlManager(url)
|
10
|
-
return url_mgr.url
|
11
|
-
def get_url_mgr(url=None,url_mgr=None):
|
12
|
-
if not url and not url_mgr:
|
13
|
-
return None
|
14
|
-
if url:
|
15
|
-
url_mgr = urlManager(url)
|
16
|
-
return url_mgr
|
5
|
+
from ..middleManager import *
|
17
6
|
class linkManager:
|
18
7
|
"""
|
19
8
|
LinkManager is a class for managing and extracting links and image links from a web page.
|
@@ -50,13 +39,40 @@ class linkManager:
|
|
50
39
|
- The LinkManager class helps manage and extract links and image links from web pages.
|
51
40
|
- The class provides flexibility in specifying criteria for link extraction.
|
52
41
|
"""
|
53
|
-
def __init__(self,
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
42
|
+
def __init__(self,
|
43
|
+
url=None,
|
44
|
+
source_code=None,
|
45
|
+
soup=None,
|
46
|
+
url_mgr=None,
|
47
|
+
req_mgr=None,
|
48
|
+
soup_mgr=None,
|
49
|
+
image_link_tags='img',
|
50
|
+
img_link_attrs='src',
|
51
|
+
link_tags='a',
|
52
|
+
link_attrs='href',
|
53
|
+
strict_order_tags=False,
|
54
|
+
img_attr_value_desired=None,
|
55
|
+
img_attr_value_undesired=None,
|
56
|
+
link_attr_value_desired=None,
|
57
|
+
link_attr_value_undesired=None,
|
58
|
+
associated_data_attr=["data-title",'alt','title'],
|
59
|
+
get_img=["data-title",'alt','title']
|
60
|
+
):
|
61
|
+
|
62
|
+
|
63
|
+
all_tools = get_soup_tools(
|
64
|
+
url=url,
|
65
|
+
url_mgr=url_mgr,
|
66
|
+
source_code=source_code,
|
67
|
+
req_mgr=req_mgr,
|
68
|
+
soup=soup,
|
69
|
+
soup_mgr=soup_mgr,
|
70
|
+
target_manager = self
|
71
|
+
)
|
59
72
|
|
73
|
+
|
74
|
+
|
75
|
+
|
60
76
|
self.strict_order_tags=strict_order_tags
|
61
77
|
self.image_link_tags=image_link_tags
|
62
78
|
self.img_link_attrs=img_link_attrs
|
@@ -171,19 +187,25 @@ class linkManager:
|
|
171
187
|
if url not in domains_ls and comp_domain == domain:
|
172
188
|
domains_ls.append(url)
|
173
189
|
return domains_ls
|
190
|
+
|
174
191
|
def find_all_desired_links(self,tag='img', attr='src',attr_value_desired=None,strict_order_tags=False,attr_value_undesired=None,associated_data_attr=None,all_desired=None,get_img=None):
|
175
192
|
all_desired = all_desired or self.find_all_desired(tag=tag,attr=attr,strict_order_tags=strict_order_tags,attr_value_desired=attr_value_desired,attr_value_undesired=attr_value_undesired,associated_data_attr=associated_data_attr,get_img=get_img)
|
176
193
|
assiciated_attrs = all_desired[-1]
|
177
194
|
valid_assiciated_attrs = []
|
178
195
|
desired_links=[]
|
179
196
|
for i,attr in enumerate(all_desired[:-1]):
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
197
|
+
|
198
|
+
self.url_mgr.domain = self.url_mgr.domain or ''
|
199
|
+
|
200
|
+
self.url_mgr.protocol = self.url_mgr.protocol or 'https'
|
201
|
+
|
202
|
+
if attr:
|
203
|
+
valid_attr=self.url_mgr.make_valid(attr,self.url_mgr.protocol+'://'+self.url_mgr.domain)
|
204
|
+
if valid_attr:
|
205
|
+
desired_links.append(valid_attr)
|
206
|
+
valid_assiciated_attrs.append(assiciated_attrs[i])
|
207
|
+
valid_assiciated_attrs[-1]["link"]=valid_attr
|
185
208
|
desired_links.append(valid_assiciated_attrs)
|
186
209
|
return desired_links
|
187
210
|
|
188
|
-
|
189
211
|
|
@@ -0,0 +1 @@
|
|
1
|
+
from .src import *
|
@@ -0,0 +1,18 @@
|
|
1
|
+
import logging
|
2
|
+
from ..urlManager import (
|
3
|
+
urlManager,
|
4
|
+
get_url,
|
5
|
+
get_url_mgr
|
6
|
+
)
|
7
|
+
from ..requestManager import (
|
8
|
+
requestManager,
|
9
|
+
get_source,
|
10
|
+
get_req_mgr
|
11
|
+
)
|
12
|
+
from ..soupManager import (
|
13
|
+
soupManager,
|
14
|
+
get_soup,
|
15
|
+
get_soup_mgr
|
16
|
+
)
|
17
|
+
from bs4 import BeautifulSoup
|
18
|
+
logging.basicConfig(level=logging.INFO)
|
@@ -0,0 +1,135 @@
|
|
1
|
+
from ..imports import *
|
2
|
+
|
3
|
+
class UnifiedWebManager:
|
4
|
+
"""
|
5
|
+
Unified middleware that ties together URL, request, and soup managers.
|
6
|
+
Lazily initializes components based on provided inputs.
|
7
|
+
|
8
|
+
Args:
|
9
|
+
url (str or None): The base URL.
|
10
|
+
source_code (str or bytes or None): Pre-fetched source code.
|
11
|
+
url_mgr (urlManager or None): Existing URL manager.
|
12
|
+
req_mgr (requestManager or None): Existing request manager.
|
13
|
+
soup_mgr (soupManager or None): Existing soup manager.
|
14
|
+
parse_type (str): Parser type for BeautifulSoup (default: "html.parser").
|
15
|
+
"""
|
16
|
+
def __init__(self, url=None, source_code=None, url_mgr=None, req_mgr=None, soup_mgr=None,soup=None, parse_type="html.parser"):
|
17
|
+
self._url = url
|
18
|
+
self._source_code = source_code
|
19
|
+
self._url_mgr = url_mgr
|
20
|
+
self._req_mgr = req_mgr
|
21
|
+
self._soup_mgr = soup_mgr
|
22
|
+
self._parse_type = parse_type
|
23
|
+
self._soup = None # Lazy
|
24
|
+
|
25
|
+
@property
|
26
|
+
def url_mgr(self):
|
27
|
+
if self._url_mgr is None:
|
28
|
+
if self._url is None:
|
29
|
+
logging.warning("No URL provided; URL manager cannot be created.")
|
30
|
+
return None
|
31
|
+
self._url_mgr = urlManager(url=self._url)
|
32
|
+
return self._url_mgr
|
33
|
+
|
34
|
+
@property
|
35
|
+
def url(self):
|
36
|
+
if self._url is None and self.url_mgr:
|
37
|
+
self._url = self.url_mgr.url
|
38
|
+
return self._url
|
39
|
+
|
40
|
+
@property
|
41
|
+
def req_mgr(self):
|
42
|
+
if self._req_mgr is None:
|
43
|
+
self._req_mgr = requestManager(
|
44
|
+
url=self.url,
|
45
|
+
url_mgr=self.url_mgr,
|
46
|
+
source_code=self._source_code
|
47
|
+
)
|
48
|
+
return self._req_mgr
|
49
|
+
|
50
|
+
@property
|
51
|
+
def source_code(self):
|
52
|
+
if self._source_code is None and self.req_mgr:
|
53
|
+
self._source_code = self.req_mgr.source_code
|
54
|
+
return self._source_code
|
55
|
+
|
56
|
+
@property
|
57
|
+
def soup_mgr(self):
|
58
|
+
if self._soup_mgr is None:
|
59
|
+
self._soup_mgr = soupManager(
|
60
|
+
url=self.url,
|
61
|
+
url_mgr=self.url_mgr,
|
62
|
+
req_mgr=self.req_mgr,
|
63
|
+
source_code=self.source_code
|
64
|
+
)
|
65
|
+
return self._soup_mgr
|
66
|
+
|
67
|
+
@property
|
68
|
+
def soup(self):
|
69
|
+
if self._soup is None:
|
70
|
+
source = self.source_code
|
71
|
+
if source is None:
|
72
|
+
logging.warning("No source code available; Soup cannot be created.")
|
73
|
+
return None
|
74
|
+
if isinstance(source, bytes):
|
75
|
+
source = source.decode('utf-8', errors='ignore')
|
76
|
+
self._soup = BeautifulSoup(source, self._parse_type)
|
77
|
+
return self._soup
|
78
|
+
|
79
|
+
def update_url(self, url):
|
80
|
+
"""Update the URL and reset dependent managers."""
|
81
|
+
self._url = url
|
82
|
+
self._url_mgr = None
|
83
|
+
self._req_mgr = None
|
84
|
+
self._soup_mgr = None
|
85
|
+
self._source_code = None
|
86
|
+
self._soup = None
|
87
|
+
|
88
|
+
def update_source_code(self, source_code):
|
89
|
+
"""Update the source code and reset dependent managers."""
|
90
|
+
self._source_code = source_code
|
91
|
+
self._req_mgr = None
|
92
|
+
self._soup_mgr = None
|
93
|
+
self._soup = None
|
94
|
+
|
95
|
+
# Convenience methods for direct access
|
96
|
+
def get_all_tools(self):
|
97
|
+
"""Return a dict with all components (similar to original getters)."""
|
98
|
+
return {
|
99
|
+
'url': self.url,
|
100
|
+
'url_mgr': self.url_mgr,
|
101
|
+
'source_code': self.source_code,
|
102
|
+
'req_mgr': self.req_mgr,
|
103
|
+
'soup': self.soup,
|
104
|
+
'soup_mgr': self.soup_mgr
|
105
|
+
}
|
106
|
+
def endow_to_manager(self, target_manager, all_tools=None):
|
107
|
+
"""
|
108
|
+
Endow (assign) the attributes from all_tools to the target manager instance.
|
109
|
+
|
110
|
+
Args:
|
111
|
+
target_manager: The instance (e.g., another manager class) to endow attributes to.
|
112
|
+
all_tools (dict or None): Optional dict of tools/attributes. If None, uses self.get_all_tools().
|
113
|
+
"""
|
114
|
+
if all_tools is None:
|
115
|
+
all_tools = self.get_all_tools()
|
116
|
+
for key, value in all_tools.items():
|
117
|
+
setattr(target_manager, key, value)
|
118
|
+
# Wrapper functions for backward compatibility
|
119
|
+
def get_url_tools(url=None, url_mgr=None):
|
120
|
+
mgr = UnifiedWebManager(url=url, url_mgr=url_mgr)
|
121
|
+
return {'url': mgr.url, 'url_mgr': mgr.url_mgr}
|
122
|
+
|
123
|
+
def get_req_tools(url=None, url_mgr=None, source_code=None, req_mgr=None):
|
124
|
+
mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr)
|
125
|
+
return {'url': mgr.url, 'url_mgr': mgr.url_mgr, 'source_code': mgr.source_code, 'req_mgr': mgr.req_mgr}
|
126
|
+
|
127
|
+
def get_soup_tools(url=None, url_mgr=None, source_code=None, req_mgr=None, soup=None, soup_mgr=None,target_manager=None):
|
128
|
+
mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr, soup_mgr=soup_mgr)
|
129
|
+
if soup is not None:
|
130
|
+
mgr._soup = soup # Allow overriding
|
131
|
+
if target_manager:
|
132
|
+
mgr.endow_to_manager(target_manager, all_tools=None)
|
133
|
+
return mgr.get_all_tools()
|
134
|
+
|
135
|
+
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from ...abstract_webtools import *
|
2
|
-
|
3
2
|
from ..userAgentManager import *
|
4
3
|
from ..cipherManager import *
|
5
4
|
from ..sslManager import *
|
@@ -7,10 +6,14 @@ from ..tlsAdapter import *
|
|
7
6
|
from ..networkManager import *
|
8
7
|
from ..seleniumManager import *
|
9
8
|
from ..urlManager import *
|
9
|
+
logging.basicConfig(level=logging.INFO)
|
10
|
+
|
10
11
|
class requestManager:
|
11
12
|
"""
|
12
|
-
|
13
|
-
|
13
|
+
requestManager is a class for making HTTP requests with error handling and retries.
|
14
|
+
It supports initializing with a provided source_code without requiring a URL.
|
15
|
+
If source_code is provided, it uses that as the response content and skips fetching.
|
16
|
+
Enhanced to parse source_code for URLs, PHP blocks, and React/JS data even if not HTML.
|
14
17
|
Args:
|
15
18
|
url (str or None): The URL to make requests to (default is None).
|
16
19
|
url_mgr (urlManager or None): An instance of urlManager (default is None).
|
@@ -57,278 +60,320 @@ class requestManager:
|
|
57
60
|
- It provides methods for authentication, response handling, and error management.
|
58
61
|
"""
|
59
62
|
def __init__(self,
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
63
|
+
url=None,
|
64
|
+
source_code=None,
|
65
|
+
url_mgr=None,
|
66
|
+
network_manager=None,
|
67
|
+
user_agent_manager=None,
|
68
|
+
ssl_manager=None,
|
69
|
+
ssl_options=None,
|
70
|
+
tls_adapter=None,
|
71
|
+
user_agent=None,
|
72
|
+
proxies=None,
|
73
|
+
headers=None,
|
74
|
+
cookies=None,
|
75
|
+
session=None,
|
76
|
+
adapter=None,
|
77
|
+
protocol=None,
|
78
|
+
ciphers=None,
|
79
|
+
spec_login=False,
|
80
|
+
login_referer=None,
|
81
|
+
login_user_agent=None,
|
82
|
+
auth=None,
|
83
|
+
login_url=None,
|
84
|
+
email=None,
|
85
|
+
password=None,
|
86
|
+
checkbox=None,
|
87
|
+
dropdown=None,
|
88
|
+
certification=None,
|
89
|
+
stream=False,
|
90
|
+
timeout=None,
|
91
|
+
last_request_time=None,
|
92
|
+
max_retries=None,
|
93
|
+
request_wait_limit=None):
|
94
|
+
self.url_mgr = get_url_mgr(url=url, url_mgr=url_mgr)
|
95
|
+
self.url = get_url(url=url, url_mgr=self.url_mgr)
|
96
|
+
self._url_mgr = self.url_mgr
|
97
|
+
self._url = self.url
|
98
|
+
self.user_agent = user_agent
|
99
|
+
self.user_agent_manager = user_agent_manager or UserAgentManager(user_agent=self.user_agent)
|
100
|
+
self.headers = headers or self.user_agent_manager.header or {'Accept': '*/*'}
|
101
|
+
self.user_agent = self.user_agent_manager.user_agent
|
102
|
+
self.ciphers = ciphers or CipherManager().ciphers_string
|
103
|
+
self.certification = certification
|
104
|
+
self.ssl_options = ssl_options
|
105
|
+
self.ssl_manager = ssl_manager or SSLManager(ciphers=self.ciphers, ssl_options=self.ssl_options, certification=self.certification)
|
106
|
+
self.tls_adapter = tls_adapter or TLSAdapter(ssl_manager=self.ssl_manager, certification=self.certification, ssl_options=self.ssl_manager.ssl_options)
|
107
|
+
self.network_manager = network_manager or NetworkManager(user_agent_manager=self.user_agent_manager, ssl_manager=self.ssl_manager, tls_adapter=self.tls_adapter, user_agent=user_agent, proxies=proxies, cookies=cookies, ciphers=ciphers, certification=certification, ssl_options=ssl_options)
|
108
|
+
self.stream = stream
|
109
|
+
self.tls_adapter = self.network_manager.tls_adapter
|
110
|
+
self.ciphers = self.network_manager.ciphers
|
111
|
+
self.certification = self.network_manager.certification
|
112
|
+
self.ssl_options = self.network_manager.ssl_options
|
113
|
+
self.proxies = self.network_manager.proxies
|
114
|
+
self.timeout = timeout
|
115
|
+
self.cookies = self.network_manager.cookies
|
116
|
+
self.session = session or requests.Session()
|
117
|
+
self.auth = auth
|
118
|
+
self.spec_login = spec_login
|
119
|
+
self.password = password
|
120
|
+
self.email = email
|
121
|
+
self.checkbox = checkbox
|
122
|
+
self.dropdown = dropdown
|
123
|
+
self.login_url = login_url
|
124
|
+
self.login_user_agent = login_user_agent
|
125
|
+
self.login_referer = login_referer
|
126
|
+
self.protocol = protocol or 'https://'
|
127
|
+
self.stream = stream if isinstance(stream, bool) else False
|
128
|
+
self.initialize_session()
|
129
|
+
self.last_request_time = last_request_time
|
130
|
+
self.max_retries = max_retries or 3
|
131
|
+
self.request_wait_limit = request_wait_limit or 1.5
|
132
|
+
self._response = None
|
133
|
+
self.status_code = None
|
134
|
+
self.source_code = None
|
135
|
+
self.source_code_bytes = None
|
136
|
+
self.source_code_json = {}
|
137
|
+
self.react_source_code = []
|
138
|
+
self.extracted_urls = []
|
139
|
+
self.php_blocks = []
|
140
|
+
self._response_data = None
|
141
|
+
if source_code is not None:
|
142
|
+
self._response = source_code
|
143
|
+
self.process_response_data()
|
144
|
+
else:
|
145
|
+
self.re_initialize()
|
146
|
+
|
147
|
+
def update_url_mgr(self, url_mgr):
|
148
|
+
self.url_mgr = url_mgr
|
141
149
|
self.re_initialize()
|
142
|
-
|
150
|
+
|
151
|
+
def update_url(self, url):
|
143
152
|
self.url_mgr.update_url(url=url)
|
144
153
|
self.re_initialize()
|
154
|
+
|
145
155
|
def re_initialize(self):
|
146
|
-
self._response=None
|
147
|
-
self.
|
156
|
+
self._response = None
|
157
|
+
if self.url_mgr.url is not None:
|
158
|
+
self.make_request()
|
148
159
|
self.source_code = None
|
149
|
-
self.source_code_bytes=None
|
160
|
+
self.source_code_bytes = None
|
150
161
|
self.source_code_json = {}
|
151
|
-
self.react_source_code=[]
|
162
|
+
self.react_source_code = []
|
163
|
+
self.extracted_urls = []
|
164
|
+
self.php_blocks = []
|
152
165
|
self._response_data = None
|
153
166
|
self.process_response_data()
|
167
|
+
|
154
168
|
@property
|
155
169
|
def response(self):
|
156
170
|
"""Lazy-loading of response."""
|
157
|
-
if self._response is None:
|
171
|
+
if self._response is None and self.url_mgr.url is not None:
|
158
172
|
self._response = self.fetch_response()
|
159
|
-
|
160
|
-
|
161
173
|
return self._response
|
162
|
-
|
163
|
-
|
174
|
+
|
175
|
+
def authenticate(self, session, login_url=None, email=None, password=None, checkbox=None, dropdown=None):
|
176
|
+
login_urls = login_url or [self.url_mgr.url, self.url_mgr.domain, self.url_mgr.url_join(url=self.url_mgr.domain, path='login'), self.url_mgr.url_join(url=self.url_mgr.domain, path='auth')]
|
164
177
|
s = session
|
165
|
-
if not isinstance(login_urls,list):
|
166
|
-
login_urls=[login_urls]
|
178
|
+
if not isinstance(login_urls, list):
|
179
|
+
login_urls = [login_urls]
|
167
180
|
for login_url in login_urls:
|
168
181
|
login_url_mgr = urlManager(login_url)
|
169
182
|
login_url = login_url_mgr.url
|
170
|
-
|
171
183
|
r = s.get(login_url)
|
172
184
|
soup = BeautifulSoup(r.content, "html.parser")
|
173
185
|
# Find the token or any CSRF protection token
|
174
186
|
token = soup.find('input', {'name': 'token'}).get('value') if soup.find('input', {'name': 'token'}) else None
|
175
|
-
if token
|
187
|
+
if token is not None:
|
176
188
|
break
|
177
189
|
login_data = {}
|
178
|
-
if email
|
179
|
-
login_data['email']=email
|
180
|
-
if password
|
190
|
+
if email is not None:
|
191
|
+
login_data['email'] = email
|
192
|
+
if password is not None:
|
181
193
|
login_data['password'] = password
|
182
|
-
if checkbox
|
194
|
+
if checkbox is not None:
|
183
195
|
login_data['checkbox'] = checkbox
|
184
|
-
if dropdown
|
185
|
-
login_data['dropdown']=dropdown
|
186
|
-
if token
|
196
|
+
if dropdown is not None:
|
197
|
+
login_data['dropdown'] = dropdown
|
198
|
+
if token is not None:
|
187
199
|
login_data['token'] = token
|
188
200
|
s.post(login_url, data=login_data)
|
189
201
|
return s
|
190
202
|
|
191
|
-
def fetch_response(self) ->
|
203
|
+
def fetch_response(self) -> requests.Response | None | str | bytes:
|
192
204
|
"""Actually fetches the response from the server."""
|
193
|
-
# You can further adapt this method to use retries or other logic you had
|
194
|
-
# in your original code, but the main goal here is to fetch and return the response
|
195
205
|
return self.try_request()
|
206
|
+
|
196
207
|
def spec_auth(self, session=None, email=None, password=None, login_url=None, login_referer=None, login_user_agent=None):
|
197
|
-
s = session or requests.
|
198
|
-
|
199
|
-
domain = self.url_mgr.url_join(self.url_mgr.get_correct_url(self.url_mgr.domain),'login') if login_url is None else login_url
|
208
|
+
s = session or requests.Session()
|
209
|
+
domain = self.url_mgr.url_join(self.url_mgr.get_correct_url(self.url_mgr.domain), 'login') if login_url is None else login_url
|
200
210
|
login_url = self.url_mgr.get_correct_url(url=domain)
|
201
|
-
|
202
211
|
login_referer = login_referer or self.url_mgr.url_join(url=login_url, path='?role=fast&to=&s=1&m=1&email=YOUR_EMAIL')
|
203
212
|
login_user_agent = login_user_agent or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:50.0) Gecko/20100101 Firefox/50.0'
|
204
|
-
|
205
213
|
headers = {"Referer": login_referer, 'User-Agent': login_user_agent}
|
206
214
|
payload = {'email': email, 'pass': password}
|
207
|
-
|
208
215
|
page = s.get(login_url)
|
209
216
|
soup = BeautifulSoup(page.content, 'lxml')
|
210
217
|
action_url = soup.find('form')['action']
|
211
218
|
s.post(action_url, data=payload, headers=headers)
|
212
219
|
return s
|
220
|
+
|
213
221
|
def initialize_session(self):
|
214
|
-
s = self.session
|
222
|
+
s = self.session
|
215
223
|
if self.auth:
|
216
|
-
s= self.auth
|
224
|
+
s = self.auth
|
217
225
|
elif self.spec_login:
|
218
|
-
s=self.spec_auth(session=s,email=self.email, password=self.password, login_url=self.login_url, login_referer=self.login_referer, login_user_agent=self.login_user_agent)
|
226
|
+
s = self.spec_auth(session=s, email=self.email, password=self.password, login_url=self.login_url, login_referer=self.login_referer, login_user_agent=self.login_user_agent)
|
219
227
|
elif any([self.password, self.email, self.login_url, self.checkbox, self.dropdown]):
|
220
|
-
s=self.authenticate(session=s, login_url=self.login_url, email=self.email, password=self.password, checkbox=self.checkbox, dropdown=self.dropdown)
|
228
|
+
s = self.authenticate(session=s, login_url=self.login_url, email=self.email, password=self.password, checkbox=self.checkbox, dropdown=self.dropdown)
|
221
229
|
s.proxies = self.proxies
|
222
230
|
s.cookies["cf_clearance"] = self.network_manager.cookies
|
223
231
|
s.headers.update(self.headers)
|
224
232
|
s.mount(self.protocol, self.network_manager.tls_adapter)
|
225
233
|
return s
|
234
|
+
|
226
235
|
def process_response_data(self):
|
227
236
|
"""Processes the fetched response data."""
|
228
237
|
if not self.response:
|
229
238
|
return # No data to process
|
230
|
-
if
|
231
|
-
|
239
|
+
if isinstance(self.response, (str, bytes)):
|
240
|
+
if isinstance(self.response, str):
|
241
|
+
self.source_code = self.response
|
242
|
+
self.source_code_bytes = self.response.encode('utf-8') # Assume UTF-8
|
243
|
+
else:
|
244
|
+
self.source_code_bytes = self.response
|
245
|
+
try:
|
246
|
+
self.source_code = self.response.decode('utf-8')
|
247
|
+
except UnicodeDecodeError:
|
248
|
+
self.source_code = self.response.decode('latin-1') # Fallback
|
249
|
+
# Check if it's JSON
|
250
|
+
try:
|
251
|
+
data = json.loads(self.source_code)
|
252
|
+
self.source_code_json = data.get("response", data)
|
253
|
+
except json.JSONDecodeError:
|
254
|
+
pass
|
232
255
|
else:
|
233
256
|
self.source_code = self.response.text
|
234
257
|
self.source_code_bytes = self.response.content
|
235
|
-
if self.response.headers.get('content-type'
|
236
|
-
|
237
|
-
|
258
|
+
if self.response.headers.get('content-type', '').startswith('application/json'):
|
259
|
+
try:
|
260
|
+
data = json.loads(self.source_code)
|
238
261
|
self.source_code_json = data.get("response", data)
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
262
|
+
except json.JSONDecodeError:
|
263
|
+
pass
|
264
|
+
self.extract_urls()
|
265
|
+
self.extract_php_blocks()
|
266
|
+
self.get_react_source_code()
|
267
|
+
|
268
|
+
def extract_urls(self):
|
269
|
+
"""Extract URLs from source_code using regex."""
|
270
|
+
if not self.source_code:
|
271
|
+
return
|
272
|
+
url_pattern = r'https?://[^\s<>"\']+'
|
273
|
+
self.extracted_urls = re.findall(url_pattern, self.source_code)
|
244
274
|
|
245
|
-
|
246
|
-
|
275
|
+
def extract_php_blocks(self):
|
276
|
+
"""Extract PHP blocks from source_code if present."""
|
277
|
+
if not self.source_code:
|
278
|
+
return
|
279
|
+
php_pattern = r'<\?php(.*?)?\?>'
|
280
|
+
self.php_blocks = re.findall(php_pattern, self.source_code, re.DOTALL)
|
247
281
|
|
248
|
-
|
249
|
-
list: A list of strings containing JavaScript and JSX source code found in <script> tags.
|
282
|
+
def get_react_source_code(self) -> list:
|
250
283
|
"""
|
251
|
-
if
|
284
|
+
Extracts JavaScript and JSX source code from <script> tags if HTML-like.
|
285
|
+
If not HTML and looks like JS/React code, appends the whole source_code.
|
286
|
+
"""
|
287
|
+
if not self.source_code:
|
252
288
|
return []
|
253
|
-
|
254
|
-
|
289
|
+
# Check if likely JS code (e.g., contains 'import', 'function', 'React')
|
290
|
+
is_js_like = any(keyword in self.source_code.lower() for keyword in ['import ', 'function ', 'react', 'export ', 'const ', 'let ', 'var '])
|
291
|
+
# Check if HTML-like
|
292
|
+
is_html_like = self.source_code.strip().startswith('<') or '<html' in self.source_code.lower() or '<!doctype' in self.source_code.lower()
|
293
|
+
if not is_html_like and is_js_like:
|
294
|
+
self.react_source_code.append(self.source_code)
|
295
|
+
return self.react_source_code
|
296
|
+
content = self.source_code_bytes or self.source_code.encode('utf-8')
|
297
|
+
soup = BeautifulSoup(content, "html.parser")
|
298
|
+
script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t.lower() or 'jsx' in t.lower()))
|
255
299
|
for script_tag in script_tags:
|
256
|
-
|
300
|
+
if script_tag.string:
|
301
|
+
self.react_source_code.append(script_tag.string)
|
302
|
+
# If no scripts found but JS-like, append whole
|
303
|
+
if not script_tags and is_js_like:
|
304
|
+
self.react_source_code.append(self.source_code)
|
305
|
+
return self.react_source_code
|
257
306
|
|
258
|
-
|
259
|
-
def get_status(url:str=None) -> int:
|
307
|
+
def get_status(self, url: str = None) -> int | None:
|
260
308
|
"""
|
261
309
|
Gets the HTTP status code of the given URL.
|
262
|
-
|
263
|
-
Args:
|
264
|
-
url (str): The URL to check the status of.
|
265
|
-
|
266
|
-
Returns:
|
267
|
-
int: The HTTP status code of the URL, or None if the request fails.
|
268
310
|
"""
|
269
|
-
|
270
|
-
|
311
|
+
url = url or self.url_mgr.url
|
312
|
+
if url is None:
|
313
|
+
return None
|
314
|
+
try:
|
315
|
+
response = requests.head(url, timeout=5)
|
316
|
+
return response.status_code
|
317
|
+
except requests.RequestException:
|
318
|
+
return None
|
319
|
+
|
271
320
|
def wait_between_requests(self):
|
272
321
|
"""
|
273
322
|
Wait between requests based on the request_wait_limit.
|
274
323
|
"""
|
275
324
|
if self.last_request_time:
|
276
|
-
sleep_time = self.request_wait_limit - (
|
325
|
+
sleep_time = self.request_wait_limit - (time.time() - self.last_request_time)
|
277
326
|
if sleep_time > 0:
|
278
327
|
logging.info(f"Sleeping for {sleep_time:.2f} seconds.")
|
279
|
-
|
328
|
+
time.sleep(sleep_time)
|
280
329
|
|
281
330
|
def make_request(self):
|
282
331
|
"""
|
283
332
|
Make a request and handle potential errors.
|
284
333
|
"""
|
285
|
-
|
286
|
-
|
334
|
+
if self.url_mgr.url is None:
|
335
|
+
return None
|
287
336
|
self.wait_between_requests()
|
288
337
|
for _ in range(self.max_retries):
|
289
338
|
try:
|
290
|
-
self.try_request()
|
291
|
-
if self.
|
292
|
-
self.
|
293
|
-
|
294
|
-
self.
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
339
|
+
self._response = self.try_request()
|
340
|
+
if self._response:
|
341
|
+
if not isinstance(self._response, (str, bytes)):
|
342
|
+
self.status_code = self._response.status_code
|
343
|
+
if self._response.status_code == 200:
|
344
|
+
self.last_request_time = time.time()
|
345
|
+
return self._response
|
346
|
+
elif self._response.status_code == 429:
|
347
|
+
logging.warning(f"Rate limited by {self.url_mgr.url}. Retrying...")
|
348
|
+
time.sleep(5)
|
299
349
|
except requests.Timeout as e:
|
300
|
-
logging.error(f"Request to {
|
350
|
+
logging.error(f"Request to {self.url_mgr.url} timed out: {e}")
|
301
351
|
except requests.ConnectionError:
|
302
352
|
logging.error(f"Connection error for URL {self.url_mgr.url}.")
|
303
|
-
except requests.Timeout:
|
304
|
-
logging.error(f"Request timeout for URL {self.url_mgr.url}.")
|
305
353
|
except requests.RequestException as e:
|
306
354
|
logging.error(f"Request exception for URL {self.url_mgr.url}: {e}")
|
307
355
|
try:
|
308
356
|
response = get_selenium_source(self.url_mgr.url)
|
309
357
|
if response:
|
310
|
-
self.
|
311
|
-
|
312
|
-
|
313
|
-
|
358
|
+
self._response = response
|
359
|
+
self.status_code = 200 # Assume success
|
360
|
+
return self._response
|
361
|
+
except Exception as e:
|
362
|
+
logging.error(f"Failed to retrieve content from {self.url_mgr.url} after {self.max_retries} retries: {e}")
|
314
363
|
return None
|
315
|
-
|
364
|
+
|
365
|
+
def try_request(self) -> requests.Response | str | bytes | None:
|
316
366
|
"""
|
317
367
|
Tries to make an HTTP request to the given URL using the provided session.
|
318
|
-
|
319
|
-
Args:
|
320
|
-
timeout (int): Timeout for the request.
|
321
|
-
|
322
|
-
Returns:
|
323
|
-
requests.Response or None: The response object if the request is successful, or None if the request fails.
|
324
368
|
"""
|
369
|
+
if self.url_mgr.url is None:
|
370
|
+
return None
|
325
371
|
try:
|
326
|
-
return get_selenium_source(self.url_mgr.url)#self.session.get(
|
327
|
-
except requests.
|
328
|
-
|
372
|
+
return get_selenium_source(self.url_mgr.url) # or self.session.get(self.url_mgr.url, timeout=self.timeout, stream=self.stream)
|
373
|
+
except requests.RequestException as e:
|
374
|
+
logging.error(f"Request failed: {e}")
|
329
375
|
return None
|
330
376
|
|
331
|
-
|
332
377
|
@property
|
333
378
|
def url(self):
|
334
379
|
return self.url_mgr.url
|
@@ -345,12 +390,11 @@ class SafeRequestSingleton:
|
|
345
390
|
elif SafeRequestSingleton._instance.url != url or SafeRequestSingleton._instance.headers != headers or SafeRequestSingleton._instance.max_retries != max_retries or SafeRequestSingleton._instance.request_wait_limit != request_wait_limit:
|
346
391
|
SafeRequestSingleton._instance = SafeRequest(url,url_mgr=urlManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
|
347
392
|
return SafeRequestSingleton._instance
|
393
|
+
def get_source(url=None,url_mgr=None,source_code=None,req_mgr=None):
|
394
|
+
req_mgr = get_req_mgr(req_mgr=req_mgr,url=url,url_mgr=url_mgr,source_code=source_code)
|
395
|
+
return req_mgr.source_code
|
348
396
|
def get_req_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None):
|
349
397
|
url = get_url(url=url,url_mgr=url_mgr)
|
350
398
|
url_mgr = get_url_mgr(url=url,url_mgr=url_mgr )
|
351
399
|
req_mgr = req_mgr or requestManager(url_mgr=url_mgr,url=url,source_code=source_code)
|
352
400
|
return req_mgr
|
353
|
-
def get_source(url=None,url_mgr=None,source_code=None,req_mgr=None):
|
354
|
-
# Placeholder for actual implementation.
|
355
|
-
req_mgr = get_req_mgr(req_mgr=req_mgr,url=url,url_mgr=url_mgr,source_code=source_code)
|
356
|
-
return req_mgr.source_code
|
@@ -49,7 +49,7 @@ class soupManager:
|
|
49
49
|
source_code = source_code or self.req_mgr.source_code or self.req_mgr.source_code_bytes
|
50
50
|
if source_code:
|
51
51
|
source_code = str(source_code)
|
52
|
-
self.source_code = source_code
|
52
|
+
self.source_code = source_code or ''
|
53
53
|
self.soup= BeautifulSoup(self.source_code, self.parse_type)
|
54
54
|
self.all_tags_and_attribute_names = self.get_all_tags_and_attribute_names()
|
55
55
|
self.all_tags = self.all_tags_and_attribute_names.get('tags')
|
@@ -340,15 +340,6 @@ class SoupManagerSingleton():
|
|
340
340
|
elif parse_type != SoupManagerSingleton._instance.parse_type or source_code != SoupManagerSingleton._instance.source_code:
|
341
341
|
SoupManagerSingleton._instance = SoupManager(url_mgr,requestManager,parse_type=parse_type,source_code=source_code)
|
342
342
|
return SoupManagerSingleton._instance
|
343
|
-
def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,parse_type="html.parser"):
|
344
|
-
url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
|
345
|
-
url = get_url(url=url,url_mgr=url_mgr)
|
346
|
-
req_mgr = get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
|
347
|
-
soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code)
|
348
|
-
return soup_mgr
|
349
|
-
def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,tags_list = None,parse_type="html.parser"):
|
350
|
-
soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
|
351
|
-
return soup_mgr.get_all_attribute_values(tags_list=tags_list)
|
352
343
|
def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,parse_type="html.parser"):
|
353
344
|
if source_code or soup_mgr:
|
354
345
|
if soup_mgr:
|
@@ -360,3 +351,12 @@ def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,p
|
|
360
351
|
source_code = req_mgr.source_code
|
361
352
|
soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
|
362
353
|
return soup_mgr.soup
|
354
|
+
def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,parse_type="html.parser"):
|
355
|
+
url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
|
356
|
+
url = get_url(url=url,url_mgr=url_mgr)
|
357
|
+
req_mgr = get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
|
358
|
+
soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code)
|
359
|
+
return soup_mgr
|
360
|
+
def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,tags_list = None,parse_type="html.parser"):
|
361
|
+
soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
|
362
|
+
return soup_mgr.get_all_attribute_values(tags_list=tags_list)
|
@@ -17,21 +17,21 @@ class urlManager:
|
|
17
17
|
Now handles url=None gracefully: sets internals to None/empty and methods return None or empty values without errors.
|
18
18
|
"""
|
19
19
|
def __init__(self, url=None, session=None):
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
20
|
+
self._url = url # Allow None
|
21
|
+
self.session = session or requests.Session()
|
22
|
+
if self._url is None:
|
23
|
+
self.clean_urls = []
|
24
|
+
self.url = None
|
25
|
+
self.protocol = None
|
26
|
+
self.domain = None
|
27
|
+
self.path = ""
|
28
|
+
self.query = ""
|
29
|
+
self.all_urls = []
|
30
|
+
else:
|
31
|
+
self.clean_urls = self.clean_url()
|
32
|
+
self.url = self.get_correct_url() or self._url
|
33
|
+
self.protocol, self.domain, self.path, self.query = self.url_to_pieces(self.url)
|
34
|
+
self.all_urls = []
|
35
35
|
|
36
36
|
def url_to_pieces(self, url):
|
37
37
|
"""
|
@@ -233,17 +233,9 @@ class urlManagerSingleton:
|
|
233
233
|
elif urlManagerSingleton._instance.session != session or urlManagerSingleton._instance.url != url:
|
234
234
|
urlManagerSingleton._instance = urlManager(url, session=session)
|
235
235
|
return urlManagerSingleton._instance
|
236
|
+
def get_url(url=None,url_mgr=None):
|
237
|
+
url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
|
238
|
+
return url_mgr.url
|
239
|
+
def get_url_mgr(url=None,url_mgr=None):
|
240
|
+
return url_mgr or urlManager(url)
|
236
241
|
|
237
|
-
def get_url(url=None, url_mgr=None):
|
238
|
-
if not url and not url_mgr:
|
239
|
-
return None
|
240
|
-
if url_mgr is None and url is not None:
|
241
|
-
url_mgr = urlManager(url)
|
242
|
-
return url_mgr.url if url_mgr else None
|
243
|
-
|
244
|
-
def get_url_mgr(url=None, url_mgr=None):
|
245
|
-
if url_mgr is None:
|
246
|
-
url_mgr = urlManager(url=url) # Always create instance, even if url=None
|
247
|
-
if url_mgr and url is None:
|
248
|
-
url = url_mgr.url
|
249
|
-
return url_mgr
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.140
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -10,7 +10,7 @@ abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,132
|
|
10
10
|
abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
|
11
11
|
abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
|
12
12
|
abstract_webtools/url_grabber_new.py,sha256=xb23qo4anOY0Ax3CAfaHJ8s5VEz61Sinh-XpEDFW7Is,3621
|
13
|
-
abstract_webtools/managers/__init__.py,sha256=
|
13
|
+
abstract_webtools/managers/__init__.py,sha256=d7Q6_McRuKOHmKuna19s0l1wMgtM1JgUX8rHaSqJIcE,436
|
14
14
|
abstract_webtools/managers/allss\.py,sha256=IBhlyRQHfK-BtwUnSEbIPqlI1MtZ8-XsdaHv0b91HQ0,269
|
15
15
|
abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
|
16
16
|
abstract_webtools/managers/crawlManager.py,sha256=62Ej6AQC6-qXX_EWOmcJ2szNvEjmebFGugMz65HF1qI,12983
|
@@ -30,16 +30,21 @@ abstract_webtools/managers/videoDownloader2.py,sha256=v3H6akdhvVWGrB-r35m3cp_-aK
|
|
30
30
|
abstract_webtools/managers/clownworld/__init__.py,sha256=eq25euhRbFqHLm1ibi_7FGz_oNWs-kkyAkETzK3r4_Q,35
|
31
31
|
abstract_webtools/managers/clownworld/get_bolshevid_video.py,sha256=dNZdOxhXSA13DWFjdSOmvYrI3HybkrrvTBaMDbJfhfo,10140
|
32
32
|
abstract_webtools/managers/linkManager/__init__.py,sha256=NpfWNzvTLSfsIWSeLYIxPzeLHADk_grSx5rfgCeWERw,27
|
33
|
-
abstract_webtools/managers/linkManager/linkManager.py,sha256=
|
33
|
+
abstract_webtools/managers/linkManager/linkManager.py,sha256=KYGjAu2YYF7NzztfHPVXAk1X1zjU39-bnFpaSxErTDg,12368
|
34
|
+
abstract_webtools/managers/middleManager/__init__.py,sha256=RLLS1CxPpixIiV50P6tFaJcQ9C2O3lz19I4EDMc_4rE,19
|
35
|
+
abstract_webtools/managers/middleManager/imports.py,sha256=T0cdlABayG64RI4PnDRf7gwLvcQ5owobD0EdaD0Fcuc,334
|
36
|
+
abstract_webtools/managers/middleManager/src/UnifiedWebManager.py,sha256=2jtr4ebfDjpw1TcaZ5D9A324jtKLNohx-Ol5JrCBon0,4996
|
37
|
+
abstract_webtools/managers/middleManager/src/__init__.py,sha256=YaSAh7AG1EvFWFZBIe4pGvzmfr60rpR9ZDWoQKqAMd0,61
|
38
|
+
abstract_webtools/managers/middleManager/src/legacy_tools.py,sha256=2cCnRaq8UO7HdtffNtAOsZFJm_mpZbpvBuX0pIIWGaM,125
|
34
39
|
abstract_webtools/managers/requestManager/__init__.py,sha256=z2qGtweEoO_OKr959LGxVXEMu1hu7PIkmh89BEh5TI8,30
|
35
|
-
abstract_webtools/managers/requestManager/requestManager.py,sha256=
|
40
|
+
abstract_webtools/managers/requestManager/requestManager.py,sha256=JxJKz52DhyExrdpRUFIsG4GTY89AQ_ogeujGLDhefLI,20118
|
36
41
|
abstract_webtools/managers/soupManager/__init__.py,sha256=mqfXfqM9sWlYpOkoXUqtBoVvk2KQx1862NnmRVJwGtY,27
|
37
42
|
abstract_webtools/managers/soupManager/asoueces.py,sha256=OaXqolZl0dI7b09NYwJ3Wnhuxf89ahZ1GjsOqy0GXfk,3506
|
38
|
-
abstract_webtools/managers/soupManager/soupManager.py,sha256=
|
43
|
+
abstract_webtools/managers/soupManager/soupManager.py,sha256=T76uvnR7rsbQk2DiwMNRprxiMO4rGSi32f3TZfft_pQ,17207
|
39
44
|
abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
|
40
45
|
abstract_webtools/managers/urlManager/urlManager (Copy).py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
|
41
|
-
abstract_webtools/managers/urlManager/urlManager.py,sha256=
|
42
|
-
abstract_webtools-0.1.6.
|
43
|
-
abstract_webtools-0.1.6.
|
44
|
-
abstract_webtools-0.1.6.
|
45
|
-
abstract_webtools-0.1.6.
|
46
|
+
abstract_webtools/managers/urlManager/urlManager.py,sha256=vY4KQXtcrlC2YtlultxQpVe581l5kAuT5VGA0WrI16g,8945
|
47
|
+
abstract_webtools-0.1.6.140.dist-info/METADATA,sha256=kGrkruUx1tWeifzVXhxWsTX8a8jGNgeHxiDH6FCbv_o,7289
|
48
|
+
abstract_webtools-0.1.6.140.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
49
|
+
abstract_webtools-0.1.6.140.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
50
|
+
abstract_webtools-0.1.6.140.dist-info/RECORD,,
|
File without changes
|
{abstract_webtools-0.1.6.138.dist-info → abstract_webtools-0.1.6.140.dist-info}/top_level.txt
RENAMED
File without changes
|