abstract-webtools 0.1.6.140__tar.gz → 0.1.6.141__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/PKG-INFO +1 -1
  2. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/setup.py +1 -1
  3. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/linkManager/linkManager.py +10 -14
  4. abstract_webtools-0.1.6.141/src/abstract_webtools/managers/middleManager/src/UnifiedWebManage3r.py +136 -0
  5. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/middleManager/src/UnifiedWebManager.py +21 -26
  6. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/requestManager/requestManager.py +83 -84
  7. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/soupManager/soupManager.py +11 -8
  8. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
  9. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools.egg-info/SOURCES.txt +1 -0
  10. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/README.md +0 -0
  11. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/pyproject.toml +0 -0
  12. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/setup.cfg +0 -0
  13. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/__init__.py +0 -0
  14. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/abstract_usurpit.py +0 -0
  15. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/abstract_webtools.py +0 -0
  16. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/big_user_agent_list.py +0 -0
  17. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/domain_identifier.py +0 -0
  18. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/extention_list.py +0 -0
  19. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/find_dirs.py +0 -0
  20. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/k2s_downloader.py +0 -0
  21. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/main.py +0 -0
  22. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/__init__.py +0 -0
  23. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/allss//.py" +0 -0
  24. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/cipherManager.py +0 -0
  25. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/clownworld/__init__.py +0 -0
  26. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/clownworld/get_bolshevid_video.py +0 -0
  27. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/crawlManager.py +0 -0
  28. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
  29. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/curlMgr.py +0 -0
  30. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/domainManager.py +0 -0
  31. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
  32. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/get_test.py +0 -0
  33. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
  34. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/middleManager/__init__.py +0 -0
  35. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/middleManager/imports.py +0 -0
  36. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/middleManager/src/__init__.py +0 -0
  37. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/middleManager/src/legacy_tools.py +0 -0
  38. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/mySocketClient.py +0 -0
  39. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/networkManager.py +0 -0
  40. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
  41. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/seleniumManager.py +0 -0
  42. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
  43. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
  44. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/sslManager.py +0 -0
  45. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
  46. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
  47. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/urlManager/urlManager (Copy).py +0 -0
  48. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
  49. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/userAgentManager.py +0 -0
  50. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/videoDownloader.py +0 -0
  51. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
  52. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/soup_gui.py +0 -0
  53. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/url_grabber.py +0 -0
  54. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools/url_grabber_new.py +0 -0
  55. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  56. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools.egg-info/requires.txt +0 -0
  57. {abstract_webtools-0.1.6.140 → abstract_webtools-0.1.6.141}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.140
3
+ Version: 0.1.6.141
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -4,7 +4,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
4
4
  long_description = fh.read()
5
5
  setuptools.setup(
6
6
  name='abstract_webtools',
7
- version='0.1.6.140',
7
+ version='0.1.6.141',
8
8
  author='putkoff',
9
9
  author_email='partners@abstractendeavors.com',
10
10
  description='Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.',
@@ -2,7 +2,7 @@ from ...abstract_webtools import *
2
2
  from ..urlManager import *
3
3
  from ..requestManager import *
4
4
  from ..soupManager import *
5
- from ..middleManager import *
5
+
6
6
  class linkManager:
7
7
  """
8
8
  LinkManager is a class for managing and extracting links and image links from a web page.
@@ -46,6 +46,7 @@ class linkManager:
46
46
  url_mgr=None,
47
47
  req_mgr=None,
48
48
  soup_mgr=None,
49
+ parse_type=None,
49
50
  image_link_tags='img',
50
51
  img_link_attrs='src',
51
52
  link_tags='a',
@@ -58,21 +59,15 @@ class linkManager:
58
59
  associated_data_attr=["data-title",'alt','title'],
59
60
  get_img=["data-title",'alt','title']
60
61
  ):
61
-
62
-
63
- all_tools = get_soup_tools(
64
- url=url,
65
- url_mgr=url_mgr,
66
- source_code=source_code,
67
- req_mgr=req_mgr,
68
- soup=soup,
69
- soup_mgr=soup_mgr,
70
- target_manager = self
71
- )
72
62
 
73
-
74
-
63
+ self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
64
+ self.url = get_url(url=url,url_mgr=self.url_mgr)
65
+ self.req_mgr = get_req_mgr(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=req_mgr)
66
+ self.source_code = get_source(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=self.req_mgr)
67
+ self.soup_mgr = get_soup_mgr(url=self.url,url_mgr=self.url_mgr,source_code=self.source_code,req_mgr=self.req_mgr,soup_mgr=soup_mgr,soup=soup,parse_type=parse_type)
75
68
 
69
+ self.soup = get_soup(url=self.url,url_mgr=self.url_mgr,req_mgr=self.req_mgr,source_code=self.source_code,soup_mgr=self.soup_mgr)
70
+
76
71
  self.strict_order_tags=strict_order_tags
77
72
  self.image_link_tags=image_link_tags
78
73
  self.img_link_attrs=img_link_attrs
@@ -94,6 +89,7 @@ class linkManager:
94
89
  attr_value_undesired=self.link_attr_value_undesired,
95
90
  associated_data_attr=self.associated_data_attr,
96
91
  get_img=get_img)
92
+
97
93
  def re_initialize(self):
98
94
  self.all_desired_image_links=self.find_all_desired_links(tag=self.image_link_tags,attr=self.img_link_attrs,strict_order_tags=self.strict_order_tags,attr_value_desired=self.img_attr_value_desired,attr_value_undesired=self.img_attr_value_undesired)
99
95
  self.all_desired_links=self.find_all_desired_links(tag=self.link_tags,attr=self.link_attrs,strict_order_tags=self.strict_order_tags,attr_value_desired=self.link_attr_value_desired,attr_value_undesired=self.link_attr_value_undesired,associated_data_attr=self.associated_data_attr,get_img=self.get_img)
@@ -0,0 +1,136 @@
1
+ from ..imports import *
2
+
3
+ class UnifiedWebManager:
4
+ """
5
+ Unified middleware that ties together URL, request, and soup managers.
6
+ Lazily initializes components based on provided inputs.
7
+
8
+ Args:
9
+ url (str or None): The base URL.
10
+ source_code (str or bytes or None): Pre-fetched source code.
11
+ url_mgr (urlManager or None): Existing URL manager.
12
+ req_mgr (requestManager or None): Existing request manager.
13
+ soup_mgr (soupManager or None): Existing soup manager.
14
+ parse_type (str): Parser type for BeautifulSoup (default: "html.parser").
15
+ """
16
+ def __init__(self, url=None, source_code=None, url_mgr=None, req_mgr=None, soup_mgr=None,soup=None, parse_type="html.parser"):
17
+ self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
18
+ self.url = get_url(url=url,url_mgr=self.url_mgr)
19
+ self.req_mgr = get_source(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=req_mgr)
20
+ self.source_code = get_source(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=self.req_mgr)
21
+ self.soup_mgr = get_soup_mgr(url=self.url,url_mgr=self.url_mgr,source_code=self.source_code,req_mgr=self.req_mgr,soup_mgr=soup_mgr,soup=soup,parse_type=parse_type)
22
+ self.soup = get_soup(url=self.url,url_mgr=self.url_mgr,req_mgr=self.req_mgr,source_code=self.source_code,soup_mgr=self.soup_mgr)
23
+
24
+ @property
25
+ def url_mgr(self):
26
+ if self.url_mgr is None:
27
+ if self.url is None:
28
+ logging.warning("No URL provided; URL manager cannot be created.")
29
+ return None
30
+ self.url_mgr = urlManager(url=self.url)
31
+ return self.url_mgr
32
+
33
+ @property
34
+ def url(self):
35
+ if self.url is None and self.url_mgr:
36
+ self.url = self.url_mgr.url
37
+ return self.url
38
+
39
+ @property
40
+ def req_mgr(self):
41
+ if self.req_mgr is None:
42
+ self.req_mgr = requestManager(
43
+ url=self.url,
44
+ url_mgr=self.url_mgr,
45
+ source_code=self.source_code
46
+ )
47
+ return self.req_mgr
48
+
49
+ @property
50
+ def source_code(self):
51
+ if self.source_code is None and self.req_mgr:
52
+ self.source_code = self.req_mgr.source_code
53
+ return self.source_code
54
+
55
+ @property
56
+ def soup_mgr(self):
57
+ if self.soup_mgr is None:
58
+ self.soup_mgr = soupManager(
59
+ url=self.url,
60
+ url_mgr=self.url_mgr,
61
+ req_mgr=self.req_mgr,
62
+ source_code=self.source_code
63
+ )
64
+ return self.soup_mgr
65
+
66
+ @property
67
+ def soup(self):
68
+ if self.soup is None:
69
+ source = self.source_code
70
+ if source is None:
71
+ logging.warning("No source code available; Soup cannot be created.")
72
+ return None
73
+ if isinstance(source, bytes):
74
+ source = source.decode('utf-8', errors='ignore')
75
+ self.soup = BeautifulSoup(source, self.parse_type)
76
+ return self.soup
77
+
78
+ def update_url(self, url):
79
+ """Update the URL and reset dependent managers."""
80
+ self.url = url
81
+ self.url_mgr = None
82
+ self.req_mgr = None
83
+ self.soup_mgr = None
84
+ self.source_code = None
85
+ self.soup = None
86
+
87
+ def update_source_code(self, source_code):
88
+ """Update the source code and reset dependent managers."""
89
+ self.source_code = source_code
90
+ self.req_mgr = None
91
+ self.soup_mgr = None
92
+ self.soup = None
93
+
94
+ # Convenience methods for direct access
95
+ def get_all_tools(self):
96
+ """Return a dict with all components (similar to original getters)."""
97
+ return {
98
+ 'url': self.url,
99
+ 'url_mgr': self.url_mgr,
100
+ 'source_code': self.source_code,
101
+ 'req_mgr': self.req_mgr,
102
+ 'soup': self.soup,
103
+ 'soup_mgr': self.soup_mgr
104
+ }
105
+ def endow_to_manager(self, target_manager, all_tools=None):
106
+ """
107
+ Endow (assign) the attributes from all_tools to the target manager instance.
108
+
109
+ Args:
110
+ target_manager: The instance (e.g., another manager class) to endow attributes to.
111
+ all_tools (dict or None): Optional dict of tools/attributes. If None, uses self.get_all_tools().
112
+ """
113
+ if all_tools is None:
114
+ all_tools = self.get_all_tools()
115
+ for key, value in all_tools.items():
116
+ setattr(target_manager, key, value)
117
+ return target_manager
118
+ # Wrapper functions for backward compatibility
119
+ def get_url_tools(url=None, url_mgr=None):
120
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr)
121
+ return {'url': mgr.url, 'url_mgr': mgr.url_mgr}
122
+
123
+ def get_req_tools(url=None, url_mgr=None, source_code=None, req_mgr=None):
124
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr)
125
+ return {'url': mgr.url, 'url_mgr': mgr.url_mgr, 'source_code': mgr.source_code, 'req_mgr': mgr.req_mgr}
126
+
127
+ def get_soup_tools(url=None, url_mgr=None, source_code=None, req_mgr=None, soup=None, soup_mgr=None,target_manager=None):
128
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr, soup_mgr=soup_mgr)
129
+ if soup is not None:
130
+ mgr.soup = soup # Allow overriding
131
+ if target_manager:
132
+ mgr.endow_to_manager(target_manager, all_tools=None)
133
+ return target_manager
134
+ return mgr.get_all_tools()
135
+
136
+
@@ -1,5 +1,9 @@
1
+ import logging
2
+ from bs4 import BeautifulSoup
1
3
  from ..imports import *
2
4
 
5
+ logging.basicConfig(level=logging.INFO)
6
+
3
7
  class UnifiedWebManager:
4
8
  """
5
9
  Unified middleware that ties together URL, request, and soup managers.
@@ -11,16 +15,17 @@ class UnifiedWebManager:
11
15
  url_mgr (urlManager or None): Existing URL manager.
12
16
  req_mgr (requestManager or None): Existing request manager.
13
17
  soup_mgr (soupManager or None): Existing soup manager.
18
+ soup (BeautifulSoup or None): Pre-parsed soup object.
14
19
  parse_type (str): Parser type for BeautifulSoup (default: "html.parser").
15
20
  """
16
- def __init__(self, url=None, source_code=None, url_mgr=None, req_mgr=None, soup_mgr=None,soup=None, parse_type="html.parser"):
21
+ def __init__(self, url=None, source_code=None, url_mgr=None, req_mgr=None, soup_mgr=None, soup=None, parse_type="html.parser"):
17
22
  self._url = url
18
23
  self._source_code = source_code
19
24
  self._url_mgr = url_mgr
20
25
  self._req_mgr = req_mgr
21
26
  self._soup_mgr = soup_mgr
27
+ self._soup = soup
22
28
  self._parse_type = parse_type
23
- self._soup = None # Lazy
24
29
 
25
30
  @property
26
31
  def url_mgr(self):
@@ -28,40 +33,31 @@ class UnifiedWebManager:
28
33
  if self._url is None:
29
34
  logging.warning("No URL provided; URL manager cannot be created.")
30
35
  return None
31
- self._url_mgr = urlManager(url=self._url)
36
+ self._url_mgr = get_url_mgr(url=self._url)
32
37
  return self._url_mgr
33
38
 
34
39
  @property
35
40
  def url(self):
36
- if self._url is None and self.url_mgr:
37
- self._url = self.url_mgr.url
41
+ if self._url is None and self.url_mgr is not None:
42
+ self._url = get_url(url_mgr=self.url_mgr)
38
43
  return self._url
39
44
 
40
45
  @property
41
46
  def req_mgr(self):
42
47
  if self._req_mgr is None:
43
- self._req_mgr = requestManager(
44
- url=self.url,
45
- url_mgr=self.url_mgr,
46
- source_code=self._source_code
47
- )
48
+ self._req_mgr = get_req_mgr(url=self.url, url_mgr=self.url_mgr, source_code=self._source_code)
48
49
  return self._req_mgr
49
50
 
50
51
  @property
51
52
  def source_code(self):
52
- if self._source_code is None and self.req_mgr:
53
- self._source_code = self.req_mgr.source_code
53
+ if self._source_code is None and self.req_mgr is not None:
54
+ self._source_code = get_source(req_mgr=self.req_mgr)
54
55
  return self._source_code
55
56
 
56
57
  @property
57
58
  def soup_mgr(self):
58
59
  if self._soup_mgr is None:
59
- self._soup_mgr = soupManager(
60
- url=self.url,
61
- url_mgr=self.url_mgr,
62
- req_mgr=self.req_mgr,
63
- source_code=self.source_code
64
- )
60
+ self._soup_mgr = get_soup_mgr(url=self.url, url_mgr=self.url_mgr, source_code=self.source_code, req_mgr=self.req_mgr)
65
61
  return self._soup_mgr
66
62
 
67
63
  @property
@@ -73,7 +69,7 @@ class UnifiedWebManager:
73
69
  return None
74
70
  if isinstance(source, bytes):
75
71
  source = source.decode('utf-8', errors='ignore')
76
- self._soup = BeautifulSoup(source, self._parse_type)
72
+ self._soup = get_soup(source_code=source, parse_type=self._parse_type)
77
73
  return self._soup
78
74
 
79
75
  def update_url(self, url):
@@ -103,6 +99,7 @@ class UnifiedWebManager:
103
99
  'soup': self.soup,
104
100
  'soup_mgr': self.soup_mgr
105
101
  }
102
+
106
103
  def endow_to_manager(self, target_manager, all_tools=None):
107
104
  """
108
105
  Endow (assign) the attributes from all_tools to the target manager instance.
@@ -115,6 +112,8 @@ class UnifiedWebManager:
115
112
  all_tools = self.get_all_tools()
116
113
  for key, value in all_tools.items():
117
114
  setattr(target_manager, key, value)
115
+ return target_manager
116
+
118
117
  # Wrapper functions for backward compatibility
119
118
  def get_url_tools(url=None, url_mgr=None):
120
119
  mgr = UnifiedWebManager(url=url, url_mgr=url_mgr)
@@ -124,12 +123,8 @@ def get_req_tools(url=None, url_mgr=None, source_code=None, req_mgr=None):
124
123
  mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr)
125
124
  return {'url': mgr.url, 'url_mgr': mgr.url_mgr, 'source_code': mgr.source_code, 'req_mgr': mgr.req_mgr}
126
125
 
127
- def get_soup_tools(url=None, url_mgr=None, source_code=None, req_mgr=None, soup=None, soup_mgr=None,target_manager=None):
128
- mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr, soup_mgr=soup_mgr)
129
- if soup is not None:
130
- mgr._soup = soup # Allow overriding
126
+ def get_soup_tools(url=None, url_mgr=None, source_code=None, req_mgr=None, soup=None, soup_mgr=None, target_manager=None):
127
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr, soup_mgr=soup_mgr, soup=soup)
131
128
  if target_manager:
132
- mgr.endow_to_manager(target_manager, all_tools=None)
129
+ return mgr.endow_to_manager(target_manager)
133
130
  return mgr.get_all_tools()
134
-
135
-
@@ -59,90 +59,89 @@ class requestManager:
59
59
  - The SafeRequest class is designed for making HTTP requests with error handling and retries.
60
60
  - It provides methods for authentication, response handling, and error management.
61
61
  """
62
- def __init__(self,
63
- url=None,
64
- source_code=None,
65
- url_mgr=None,
66
- network_manager=None,
67
- user_agent_manager=None,
68
- ssl_manager=None,
69
- ssl_options=None,
70
- tls_adapter=None,
71
- user_agent=None,
72
- proxies=None,
73
- headers=None,
74
- cookies=None,
75
- session=None,
76
- adapter=None,
77
- protocol=None,
78
- ciphers=None,
79
- spec_login=False,
80
- login_referer=None,
81
- login_user_agent=None,
82
- auth=None,
83
- login_url=None,
84
- email=None,
85
- password=None,
86
- checkbox=None,
87
- dropdown=None,
88
- certification=None,
89
- stream=False,
90
- timeout=None,
91
- last_request_time=None,
92
- max_retries=None,
93
- request_wait_limit=None):
94
- self.url_mgr = get_url_mgr(url=url, url_mgr=url_mgr)
95
- self.url = get_url(url=url, url_mgr=self.url_mgr)
96
- self._url_mgr = self.url_mgr
97
- self._url = self.url
98
- self.user_agent = user_agent
99
- self.user_agent_manager = user_agent_manager or UserAgentManager(user_agent=self.user_agent)
100
- self.headers = headers or self.user_agent_manager.header or {'Accept': '*/*'}
101
- self.user_agent = self.user_agent_manager.user_agent
102
- self.ciphers = ciphers or CipherManager().ciphers_string
103
- self.certification = certification
104
- self.ssl_options = ssl_options
105
- self.ssl_manager = ssl_manager or SSLManager(ciphers=self.ciphers, ssl_options=self.ssl_options, certification=self.certification)
106
- self.tls_adapter = tls_adapter or TLSAdapter(ssl_manager=self.ssl_manager, certification=self.certification, ssl_options=self.ssl_manager.ssl_options)
107
- self.network_manager = network_manager or NetworkManager(user_agent_manager=self.user_agent_manager, ssl_manager=self.ssl_manager, tls_adapter=self.tls_adapter, user_agent=user_agent, proxies=proxies, cookies=cookies, ciphers=ciphers, certification=certification, ssl_options=ssl_options)
108
- self.stream = stream
109
- self.tls_adapter = self.network_manager.tls_adapter
110
- self.ciphers = self.network_manager.ciphers
111
- self.certification = self.network_manager.certification
112
- self.ssl_options = self.network_manager.ssl_options
113
- self.proxies = self.network_manager.proxies
114
- self.timeout = timeout
115
- self.cookies = self.network_manager.cookies
116
- self.session = session or requests.Session()
117
- self.auth = auth
118
- self.spec_login = spec_login
119
- self.password = password
120
- self.email = email
121
- self.checkbox = checkbox
122
- self.dropdown = dropdown
123
- self.login_url = login_url
124
- self.login_user_agent = login_user_agent
125
- self.login_referer = login_referer
126
- self.protocol = protocol or 'https://'
127
- self.stream = stream if isinstance(stream, bool) else False
128
- self.initialize_session()
129
- self.last_request_time = last_request_time
130
- self.max_retries = max_retries or 3
131
- self.request_wait_limit = request_wait_limit or 1.5
132
- self._response = None
133
- self.status_code = None
134
- self.source_code = None
135
- self.source_code_bytes = None
136
- self.source_code_json = {}
137
- self.react_source_code = []
138
- self.extracted_urls = []
139
- self.php_blocks = []
140
- self._response_data = None
141
- if source_code is not None:
142
- self._response = source_code
143
- self.process_response_data()
144
- else:
145
- self.re_initialize()
62
+ def __init__(self,url=None,
63
+ source_code=None,
64
+ url_mgr=None,
65
+ network_manager=None,
66
+ user_agent_manager=None,
67
+ ssl_manager=None,
68
+ ssl_options=None,
69
+ tls_adapter=None,
70
+ user_agent=None,
71
+ proxies=None,
72
+ headers=None,
73
+ cookies=None,
74
+ session=None,
75
+ adapter=None,
76
+ protocol=None,
77
+ ciphers=None,
78
+ spec_login=False,
79
+ login_referer=None,
80
+ login_user_agent=None,
81
+ auth=None,
82
+ login_url=None,
83
+ email=None,
84
+ password=None,
85
+ checkbox=None,
86
+ dropdown=None,
87
+ certification=None,
88
+ stream=False,
89
+ timeout=None,
90
+ last_request_time=None,
91
+ max_retries=None,
92
+ request_wait_limit=None):
93
+ self.url_mgr = get_url_mgr(url=url, url_mgr=url_mgr)
94
+ self.url = get_url(url=url, url_mgr=self.url_mgr)
95
+ self._url_mgr = self.url_mgr
96
+ self._url = self.url
97
+ self.user_agent = user_agent
98
+ self.user_agent_manager = user_agent_manager or UserAgentManager(user_agent=self.user_agent)
99
+ self.headers = headers or self.user_agent_manager.header or {'Accept': '*/*'}
100
+ self.user_agent = self.user_agent_manager.user_agent
101
+ self.ciphers = ciphers or CipherManager().ciphers_string
102
+ self.certification = certification
103
+ self.ssl_options = ssl_options
104
+ self.ssl_manager = ssl_manager or SSLManager(ciphers=self.ciphers, ssl_options=self.ssl_options, certification=self.certification)
105
+ self.tls_adapter = tls_adapter or TLSAdapter(ssl_manager=self.ssl_manager, certification=self.certification, ssl_options=self.ssl_manager.ssl_options)
106
+ self.network_manager = network_manager or NetworkManager(user_agent_manager=self.user_agent_manager, ssl_manager=self.ssl_manager, tls_adapter=self.tls_adapter, user_agent=user_agent, proxies=proxies, cookies=cookies, ciphers=ciphers, certification=certification, ssl_options=ssl_options)
107
+ self.stream = stream
108
+ self.tls_adapter = self.network_manager.tls_adapter
109
+ self.ciphers = self.network_manager.ciphers
110
+ self.certification = self.network_manager.certification
111
+ self.ssl_options = self.network_manager.ssl_options
112
+ self.proxies = self.network_manager.proxies
113
+ self.timeout = timeout
114
+ self.cookies = self.network_manager.cookies
115
+ self.session = session or requests.Session()
116
+ self.auth = auth
117
+ self.spec_login = spec_login
118
+ self.password = password
119
+ self.email = email
120
+ self.checkbox = checkbox
121
+ self.dropdown = dropdown
122
+ self.login_url = login_url
123
+ self.login_user_agent = login_user_agent
124
+ self.login_referer = login_referer
125
+ self.protocol = protocol or 'https://'
126
+ self.stream = stream if isinstance(stream, bool) else False
127
+ self.initialize_session()
128
+ self.last_request_time = last_request_time
129
+ self.max_retries = max_retries or 3
130
+ self.request_wait_limit = request_wait_limit or 1.5
131
+ self._response = None
132
+ self.status_code = None
133
+ self.source_code = None
134
+ self.source_code_bytes = None
135
+ self.source_code_json = {}
136
+ self.react_source_code = []
137
+ self.extracted_urls = []
138
+ self.php_blocks = []
139
+ self._response_data = None
140
+ if source_code is not None:
141
+ self._response = source_code
142
+ self.process_response_data()
143
+ else:
144
+ self.re_initialize()
146
145
 
147
146
  def update_url_mgr(self, url_mgr):
148
147
  self.url_mgr = url_mgr
@@ -39,7 +39,7 @@ class soupManager:
39
39
  - The SoupManager class is designed for parsing HTML source code using BeautifulSoup.
40
40
  - It provides various methods to extract data and discover elements within the source code.
41
41
  """
42
- def __init__(self,url=None,source_code=None,url_mgr=None,req_mgr=None, parse_type="html.parser"):
42
+ def __init__(self,url=None,source_code=None,url_mgr=None,req_mgr=None,soup=None, parse_type="html.parser"):
43
43
  self.soup=[]
44
44
  url = get_url(url=url,url_mgr=url_mgr)
45
45
  self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
@@ -50,7 +50,7 @@ class soupManager:
50
50
  if source_code:
51
51
  source_code = str(source_code)
52
52
  self.source_code = source_code or ''
53
- self.soup= BeautifulSoup(self.source_code, self.parse_type)
53
+ self.soup= soup or BeautifulSoup(self.source_code, self.parse_type)
54
54
  self.all_tags_and_attribute_names = self.get_all_tags_and_attribute_names()
55
55
  self.all_tags = self.all_tags_and_attribute_names.get('tags')
56
56
  self.all_attribute_names = self.all_tags_and_attribute_names.get('attributes')
@@ -340,7 +340,8 @@ class SoupManagerSingleton():
340
340
  elif parse_type != SoupManagerSingleton._instance.parse_type or source_code != SoupManagerSingleton._instance.source_code:
341
341
  SoupManagerSingleton._instance = SoupManager(url_mgr,requestManager,parse_type=parse_type,source_code=source_code)
342
342
  return SoupManagerSingleton._instance
343
- def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,parse_type="html.parser"):
343
+ def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,soup=None,parse_type=None):
344
+ parse_type = parse_type or "html.parser"
344
345
  if source_code or soup_mgr:
345
346
  if soup_mgr:
346
347
  return soup_mgr.soup
@@ -349,14 +350,16 @@ def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,p
349
350
  url = get_url(url=url,url_mgr=url_mgr)
350
351
  req_mgr = req_mgr or get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
351
352
  source_code = req_mgr.source_code
352
- soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
353
+ soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr,soup=soup)
353
354
  return soup_mgr.soup
354
- def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,parse_type="html.parser"):
355
+ def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,soup=None,parse_type=None):
356
+ parse_type = parse_type or "html.parser"
355
357
  url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
356
358
  url = get_url(url=url,url_mgr=url_mgr)
357
359
  req_mgr = get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
358
- soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code)
360
+ soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code,soup=soup)
359
361
  return soup_mgr
360
- def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,tags_list = None,parse_type="html.parser"):
361
- soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
362
+ def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,soup=None,tags_list = None,parse_type=None):
363
+ parse_type = parse_type or "html.parser"
364
+ soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr,soup=soup)
362
365
  return soup_mgr.get_all_attribute_values(tags_list=tags_list)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.140
3
+ Version: 0.1.6.141
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -42,6 +42,7 @@ src/abstract_webtools/managers/linkManager/__init__.py
42
42
  src/abstract_webtools/managers/linkManager/linkManager.py
43
43
  src/abstract_webtools/managers/middleManager/__init__.py
44
44
  src/abstract_webtools/managers/middleManager/imports.py
45
+ src/abstract_webtools/managers/middleManager/src/UnifiedWebManage3r.py
45
46
  src/abstract_webtools/managers/middleManager/src/UnifiedWebManager.py
46
47
  src/abstract_webtools/managers/middleManager/src/__init__.py
47
48
  src/abstract_webtools/managers/middleManager/src/legacy_tools.py