abstract-webtools 0.1.5.96__py3-none-any.whl → 0.1.5.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/managers/requestManager.py +2 -2
- abstract_webtools/managers/soupManager.py +5 -5
- {abstract_webtools-0.1.5.96.dist-info → abstract_webtools-0.1.5.98.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.5.96.dist-info → abstract_webtools-0.1.5.98.dist-info}/RECORD +7 -7
- {abstract_webtools-0.1.5.96.dist-info → abstract_webtools-0.1.5.98.dist-info}/LICENSE +0 -0
- {abstract_webtools-0.1.5.96.dist-info → abstract_webtools-0.1.5.98.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.5.96.dist-info → abstract_webtools-0.1.5.98.dist-info}/top_level.txt +0 -0
@@ -341,10 +341,10 @@ class SafeRequestSingleton:
|
|
341
341
|
elif SafeRequestSingleton._instance.url != url or SafeRequestSingleton._instance.headers != headers or SafeRequestSingleton._instance.max_retries != max_retries or SafeRequestSingleton._instance.request_wait_limit != request_wait_limit:
|
342
342
|
SafeRequestSingleton._instance = SafeRequest(url,url_mgr=urlManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
|
343
343
|
return SafeRequestSingleton._instance
|
344
|
-
def get_req_mgr(url=None,url_mgr=None,source_code=None):
|
344
|
+
def get_req_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None):
|
345
345
|
url = get_url(url=url,url_mgr=url_mgr)
|
346
346
|
url_mgr = get_url_mgr(url=url,url_mgr=url_mgr )
|
347
|
-
req_mgr = requestManager(url_mgr=url_mgr,url=url,source_code=source_code)
|
347
|
+
req_mgr = req_mgr or requestManager(url_mgr=url_mgr,url=url,source_code=source_code)
|
348
348
|
return req_mgr
|
349
349
|
def get_source(url=None,url_mgr=None,source_code=None):
|
350
350
|
# Placeholder for actual implementation.
|
@@ -279,21 +279,21 @@ class soupManager:
|
|
279
279
|
attribute_names_list = list(attribute_names)
|
280
280
|
return {"tags":tag_names_list,"attributes":attribute_names_list}
|
281
281
|
|
282
|
-
def get_all_attribute_values(self,
|
282
|
+
def get_all_attribute_values(self, tags_list=None):
|
283
283
|
"""
|
284
284
|
Collects all attribute values for each specified tag or all tags if none are specified.
|
285
285
|
|
286
286
|
Parameters:
|
287
|
-
-
|
287
|
+
- tags_list: List of specific tags to retrieve attributes from, e.g., ['script', 'img'].
|
288
288
|
If None, retrieves attributes for all tags.
|
289
289
|
|
290
290
|
Returns:
|
291
291
|
- attribute_values: Dictionary where each key is an attribute and the value is a list of unique values for that attribute.
|
292
292
|
"""
|
293
293
|
attribute_values = {}
|
294
|
-
|
295
|
-
# Get all tags matching
|
296
|
-
for tag_name in
|
294
|
+
tags_list = tags_list or self.all_tags_and_attributes
|
295
|
+
# Get all tags matching tags_list criteria
|
296
|
+
for tag_name in tags_list:
|
297
297
|
for tag in self.soup.find_all(tag_name):
|
298
298
|
for attr, value in tag.attrs.items():
|
299
299
|
if attr not in attribute_values:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.5.
|
3
|
+
Version: 0.1.5.98
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -13,16 +13,16 @@ abstract_webtools/managers/dynamicRateLimiter.py,sha256=gopQcQo50JG2D0KcyepNCIQ_
|
|
13
13
|
abstract_webtools/managers/linkManager.py,sha256=m6y9s8jknrTX8RtOAFKeHd4yd23G7Rgf0T7Sp7wmHUw,12180
|
14
14
|
abstract_webtools/managers/mySocketClient.py,sha256=-j1Q8Ds9RCSbjZdx3ZF9mVpgwxaO0BBssanUcpYVQoY,2045
|
15
15
|
abstract_webtools/managers/networkManager.py,sha256=Op2QDXrP-gmm0tCToe-Ryt9xuOtMppcN2KLKP1WZiu0,952
|
16
|
-
abstract_webtools/managers/requestManager.py,sha256=
|
16
|
+
abstract_webtools/managers/requestManager.py,sha256=YksYgRivMMuZNOzyL5vaoXv9MLhgYeuLxO_UJiqPGWw,17312
|
17
17
|
abstract_webtools/managers/seleniumManager.py,sha256=CtQQYtDrFfgp8ujC6i5SCe0b_hgIA1K68io0aO4igoM,3623
|
18
|
-
abstract_webtools/managers/soupManager.py,sha256=
|
18
|
+
abstract_webtools/managers/soupManager.py,sha256=7nSaq7OHaimc8602BihAXCA2ra0dbsB26d4yJzsRARc,16548
|
19
19
|
abstract_webtools/managers/sslManager.py,sha256=C-QgQw9CW84uOE5kx2MPjC3RsLbE2JQqdwdTs0H4ecc,1370
|
20
20
|
abstract_webtools/managers/tlsAdapter.py,sha256=XZSMZz9EUOhv-h3_Waf6mjV1dA3oN_M_oWuoo4VZ_HE,1454
|
21
21
|
abstract_webtools/managers/urlManager.py,sha256=XqMrCM84BeWEfWtHc_8UFpT91ZtG-okzdKdCuC49vsA,8678
|
22
22
|
abstract_webtools/managers/userAgentManager.py,sha256=33SB2p2FG7EYZl7l2iYm1U4gI9PcdkGTZHw5lg_Ogrw,1653
|
23
23
|
abstract_webtools/managers/videoDownloader.py,sha256=6G_aLc05BTMUYUWc7iqYtHF_BaR7DnCNK_NJ-QnjsYY,10531
|
24
|
-
abstract_webtools-0.1.5.
|
25
|
-
abstract_webtools-0.1.5.
|
26
|
-
abstract_webtools-0.1.5.
|
27
|
-
abstract_webtools-0.1.5.
|
28
|
-
abstract_webtools-0.1.5.
|
24
|
+
abstract_webtools-0.1.5.98.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
25
|
+
abstract_webtools-0.1.5.98.dist-info/METADATA,sha256=uLWlH20TJTSC9DWSfZrN5nCRRcuv-gBpTFouxM-GaFo,15858
|
26
|
+
abstract_webtools-0.1.5.98.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
|
27
|
+
abstract_webtools-0.1.5.98.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
28
|
+
abstract_webtools-0.1.5.98.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|