abstract-webtools 0.1.6.139__py3-none-any.whl → 0.1.6.141__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,3 +12,4 @@ from .urlManager import *
12
12
  from .userAgentManager import *
13
13
  from .seleniumManager import *
14
14
  from .videoDownloader import *
15
+ from .middleManager import *
@@ -2,11 +2,7 @@ from ...abstract_webtools import *
2
2
  from ..urlManager import *
3
3
  from ..requestManager import *
4
4
  from ..soupManager import *
5
- def get_url(url=None,url_mgr=None):
6
- url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
7
- return url_mgr.url
8
- def get_url_mgr(url=None,url_mgr=None):
9
- return url_mgr or urlManager(url)
5
+
10
6
  class linkManager:
11
7
  """
12
8
  LinkManager is a class for managing and extracting links and image links from a web page.
@@ -43,12 +39,34 @@ class linkManager:
43
39
  - The LinkManager class helps manage and extract links and image links from web pages.
44
40
  - The class provides flexibility in specifying criteria for link extraction.
45
41
  """
46
- def __init__(self,url=None,source_code=None,url_mgr=None,req_mgr=None,soup_manager=None,image_link_tags='img',img_link_attrs='src',link_tags='a',link_attrs='href',strict_order_tags=False,img_attr_value_desired=None,img_attr_value_undesired=None,link_attr_value_desired=None,link_attr_value_undesired=None,associated_data_attr=["data-title",'alt','title'],get_img=["data-title",'alt','title']):
42
+ def __init__(self,
43
+ url=None,
44
+ source_code=None,
45
+ soup=None,
46
+ url_mgr=None,
47
+ req_mgr=None,
48
+ soup_mgr=None,
49
+ parse_type=None,
50
+ image_link_tags='img',
51
+ img_link_attrs='src',
52
+ link_tags='a',
53
+ link_attrs='href',
54
+ strict_order_tags=False,
55
+ img_attr_value_desired=None,
56
+ img_attr_value_undesired=None,
57
+ link_attr_value_desired=None,
58
+ link_attr_value_undesired=None,
59
+ associated_data_attr=["data-title",'alt','title'],
60
+ get_img=["data-title",'alt','title']
61
+ ):
62
+
47
63
  self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
48
- self.url=self.url_mgr.url
49
- self.req_mgr = get_req_mgr(req_mgr=req_mgr,url=self.url,url_mgr=self.url_mgr,source_code=source_code)
50
- self.source_code = source_code or self.req_mgr.source_code or self.req_mgr.source_code_bytes
51
- self.soup_mgr = get_soup_mgr(req_mgr=self.req_mgr,url_mgr=self.url_mgr,source_code = source_code)
64
+ self.url = get_url(url=url,url_mgr=self.url_mgr)
65
+ self.req_mgr = get_req_mgr(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=req_mgr)
66
+ self.source_code = get_source(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=self.req_mgr)
67
+ self.soup_mgr = get_soup_mgr(url=self.url,url_mgr=self.url_mgr,source_code=self.source_code,req_mgr=self.req_mgr,soup_mgr=soup_mgr,soup=soup,parse_type=parse_type)
68
+
69
+ self.soup = get_soup(url=self.url,url_mgr=self.url_mgr,req_mgr=self.req_mgr,source_code=self.source_code,soup_mgr=self.soup_mgr)
52
70
 
53
71
  self.strict_order_tags=strict_order_tags
54
72
  self.image_link_tags=image_link_tags
@@ -71,6 +89,7 @@ class linkManager:
71
89
  attr_value_undesired=self.link_attr_value_undesired,
72
90
  associated_data_attr=self.associated_data_attr,
73
91
  get_img=get_img)
92
+
74
93
  def re_initialize(self):
75
94
  self.all_desired_image_links=self.find_all_desired_links(tag=self.image_link_tags,attr=self.img_link_attrs,strict_order_tags=self.strict_order_tags,attr_value_desired=self.img_attr_value_desired,attr_value_undesired=self.img_attr_value_undesired)
76
95
  self.all_desired_links=self.find_all_desired_links(tag=self.link_tags,attr=self.link_attrs,strict_order_tags=self.strict_order_tags,attr_value_desired=self.link_attr_value_desired,attr_value_undesired=self.link_attr_value_undesired,associated_data_attr=self.associated_data_attr,get_img=self.get_img)
@@ -164,19 +183,25 @@ class linkManager:
164
183
  if url not in domains_ls and comp_domain == domain:
165
184
  domains_ls.append(url)
166
185
  return domains_ls
186
+
167
187
  def find_all_desired_links(self,tag='img', attr='src',attr_value_desired=None,strict_order_tags=False,attr_value_undesired=None,associated_data_attr=None,all_desired=None,get_img=None):
168
188
  all_desired = all_desired or self.find_all_desired(tag=tag,attr=attr,strict_order_tags=strict_order_tags,attr_value_desired=attr_value_desired,attr_value_undesired=attr_value_undesired,associated_data_attr=associated_data_attr,get_img=get_img)
169
189
  assiciated_attrs = all_desired[-1]
170
190
  valid_assiciated_attrs = []
171
191
  desired_links=[]
172
192
  for i,attr in enumerate(all_desired[:-1]):
173
- valid_attr=self.url_mgr.make_valid(attr,self.url_mgr.protocol+'://'+self.url_mgr.domain)
174
- if valid_attr:
175
- desired_links.append(valid_attr)
176
- valid_assiciated_attrs.append(assiciated_attrs[i])
177
- valid_assiciated_attrs[-1]["link"]=valid_attr
193
+
194
+ self.url_mgr.domain = self.url_mgr.domain or ''
195
+
196
+ self.url_mgr.protocol = self.url_mgr.protocol or 'https'
197
+
198
+ if attr:
199
+ valid_attr=self.url_mgr.make_valid(attr,self.url_mgr.protocol+'://'+self.url_mgr.domain)
200
+ if valid_attr:
201
+ desired_links.append(valid_attr)
202
+ valid_assiciated_attrs.append(assiciated_attrs[i])
203
+ valid_assiciated_attrs[-1]["link"]=valid_attr
178
204
  desired_links.append(valid_assiciated_attrs)
179
205
  return desired_links
180
206
 
181
-
182
207
 
@@ -0,0 +1 @@
1
+ from .src import *
@@ -0,0 +1,18 @@
1
+ import logging
2
+ from ..urlManager import (
3
+ urlManager,
4
+ get_url,
5
+ get_url_mgr
6
+ )
7
+ from ..requestManager import (
8
+ requestManager,
9
+ get_source,
10
+ get_req_mgr
11
+ )
12
+ from ..soupManager import (
13
+ soupManager,
14
+ get_soup,
15
+ get_soup_mgr
16
+ )
17
+ from bs4 import BeautifulSoup
18
+ logging.basicConfig(level=logging.INFO)
@@ -0,0 +1,136 @@
1
+ from ..imports import *
2
+
3
+ class UnifiedWebManager:
4
+ """
5
+ Unified middleware that ties together URL, request, and soup managers.
6
+ Lazily initializes components based on provided inputs.
7
+
8
+ Args:
9
+ url (str or None): The base URL.
10
+ source_code (str or bytes or None): Pre-fetched source code.
11
+ url_mgr (urlManager or None): Existing URL manager.
12
+ req_mgr (requestManager or None): Existing request manager.
13
+ soup_mgr (soupManager or None): Existing soup manager.
14
+ parse_type (str): Parser type for BeautifulSoup (default: "html.parser").
15
+ """
16
+ def __init__(self, url=None, source_code=None, url_mgr=None, req_mgr=None, soup_mgr=None,soup=None, parse_type="html.parser"):
17
+ self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
18
+ self.url = get_url(url=url,url_mgr=self.url_mgr)
19
+ self.req_mgr = get_source(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=req_mgr)
20
+ self.source_code = get_source(url=self.url,url_mgr=self.url_mgr,source_code=source_code,req_mgr=self.req_mgr)
21
+ self.soup_mgr = get_soup_mgr(url=self.url,url_mgr=self.url_mgr,source_code=self.source_code,req_mgr=self.req_mgr,soup_mgr=soup_mgr,soup=soup,parse_type=parse_type)
22
+ self.soup = get_soup(url=self.url,url_mgr=self.url_mgr,req_mgr=self.req_mgr,source_code=self.source_code,soup_mgr=self.soup_mgr)
23
+
24
+ @property
25
+ def url_mgr(self):
26
+ if self.url_mgr is None:
27
+ if self.url is None:
28
+ logging.warning("No URL provided; URL manager cannot be created.")
29
+ return None
30
+ self.url_mgr = urlManager(url=self.url)
31
+ return self.url_mgr
32
+
33
+ @property
34
+ def url(self):
35
+ if self.url is None and self.url_mgr:
36
+ self.url = self.url_mgr.url
37
+ return self.url
38
+
39
+ @property
40
+ def req_mgr(self):
41
+ if self.req_mgr is None:
42
+ self.req_mgr = requestManager(
43
+ url=self.url,
44
+ url_mgr=self.url_mgr,
45
+ source_code=self.source_code
46
+ )
47
+ return self.req_mgr
48
+
49
+ @property
50
+ def source_code(self):
51
+ if self.source_code is None and self.req_mgr:
52
+ self.source_code = self.req_mgr.source_code
53
+ return self.source_code
54
+
55
+ @property
56
+ def soup_mgr(self):
57
+ if self.soup_mgr is None:
58
+ self.soup_mgr = soupManager(
59
+ url=self.url,
60
+ url_mgr=self.url_mgr,
61
+ req_mgr=self.req_mgr,
62
+ source_code=self.source_code
63
+ )
64
+ return self.soup_mgr
65
+
66
+ @property
67
+ def soup(self):
68
+ if self.soup is None:
69
+ source = self.source_code
70
+ if source is None:
71
+ logging.warning("No source code available; Soup cannot be created.")
72
+ return None
73
+ if isinstance(source, bytes):
74
+ source = source.decode('utf-8', errors='ignore')
75
+ self.soup = BeautifulSoup(source, self.parse_type)
76
+ return self.soup
77
+
78
+ def update_url(self, url):
79
+ """Update the URL and reset dependent managers."""
80
+ self.url = url
81
+ self.url_mgr = None
82
+ self.req_mgr = None
83
+ self.soup_mgr = None
84
+ self.source_code = None
85
+ self.soup = None
86
+
87
+ def update_source_code(self, source_code):
88
+ """Update the source code and reset dependent managers."""
89
+ self.source_code = source_code
90
+ self.req_mgr = None
91
+ self.soup_mgr = None
92
+ self.soup = None
93
+
94
+ # Convenience methods for direct access
95
+ def get_all_tools(self):
96
+ """Return a dict with all components (similar to original getters)."""
97
+ return {
98
+ 'url': self.url,
99
+ 'url_mgr': self.url_mgr,
100
+ 'source_code': self.source_code,
101
+ 'req_mgr': self.req_mgr,
102
+ 'soup': self.soup,
103
+ 'soup_mgr': self.soup_mgr
104
+ }
105
+ def endow_to_manager(self, target_manager, all_tools=None):
106
+ """
107
+ Endow (assign) the attributes from all_tools to the target manager instance.
108
+
109
+ Args:
110
+ target_manager: The instance (e.g., another manager class) to endow attributes to.
111
+ all_tools (dict or None): Optional dict of tools/attributes. If None, uses self.get_all_tools().
112
+ """
113
+ if all_tools is None:
114
+ all_tools = self.get_all_tools()
115
+ for key, value in all_tools.items():
116
+ setattr(target_manager, key, value)
117
+ return target_manager
118
+ # Wrapper functions for backward compatibility
119
+ def get_url_tools(url=None, url_mgr=None):
120
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr)
121
+ return {'url': mgr.url, 'url_mgr': mgr.url_mgr}
122
+
123
+ def get_req_tools(url=None, url_mgr=None, source_code=None, req_mgr=None):
124
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr)
125
+ return {'url': mgr.url, 'url_mgr': mgr.url_mgr, 'source_code': mgr.source_code, 'req_mgr': mgr.req_mgr}
126
+
127
+ def get_soup_tools(url=None, url_mgr=None, source_code=None, req_mgr=None, soup=None, soup_mgr=None,target_manager=None):
128
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr, soup_mgr=soup_mgr)
129
+ if soup is not None:
130
+ mgr.soup = soup # Allow overriding
131
+ if target_manager:
132
+ mgr.endow_to_manager(target_manager, all_tools=None)
133
+ return target_manager
134
+ return mgr.get_all_tools()
135
+
136
+
@@ -0,0 +1,130 @@
1
+ import logging
2
+ from bs4 import BeautifulSoup
3
+ from ..imports import *
4
+
5
+ logging.basicConfig(level=logging.INFO)
6
+
7
+ class UnifiedWebManager:
8
+ """
9
+ Unified middleware that ties together URL, request, and soup managers.
10
+ Lazily initializes components based on provided inputs.
11
+
12
+ Args:
13
+ url (str or None): The base URL.
14
+ source_code (str or bytes or None): Pre-fetched source code.
15
+ url_mgr (urlManager or None): Existing URL manager.
16
+ req_mgr (requestManager or None): Existing request manager.
17
+ soup_mgr (soupManager or None): Existing soup manager.
18
+ soup (BeautifulSoup or None): Pre-parsed soup object.
19
+ parse_type (str): Parser type for BeautifulSoup (default: "html.parser").
20
+ """
21
+ def __init__(self, url=None, source_code=None, url_mgr=None, req_mgr=None, soup_mgr=None, soup=None, parse_type="html.parser"):
22
+ self._url = url
23
+ self._source_code = source_code
24
+ self._url_mgr = url_mgr
25
+ self._req_mgr = req_mgr
26
+ self._soup_mgr = soup_mgr
27
+ self._soup = soup
28
+ self._parse_type = parse_type
29
+
30
+ @property
31
+ def url_mgr(self):
32
+ if self._url_mgr is None:
33
+ if self._url is None:
34
+ logging.warning("No URL provided; URL manager cannot be created.")
35
+ return None
36
+ self._url_mgr = get_url_mgr(url=self._url)
37
+ return self._url_mgr
38
+
39
+ @property
40
+ def url(self):
41
+ if self._url is None and self.url_mgr is not None:
42
+ self._url = get_url(url_mgr=self.url_mgr)
43
+ return self._url
44
+
45
+ @property
46
+ def req_mgr(self):
47
+ if self._req_mgr is None:
48
+ self._req_mgr = get_req_mgr(url=self.url, url_mgr=self.url_mgr, source_code=self._source_code)
49
+ return self._req_mgr
50
+
51
+ @property
52
+ def source_code(self):
53
+ if self._source_code is None and self.req_mgr is not None:
54
+ self._source_code = get_source(req_mgr=self.req_mgr)
55
+ return self._source_code
56
+
57
+ @property
58
+ def soup_mgr(self):
59
+ if self._soup_mgr is None:
60
+ self._soup_mgr = get_soup_mgr(url=self.url, url_mgr=self.url_mgr, source_code=self.source_code, req_mgr=self.req_mgr)
61
+ return self._soup_mgr
62
+
63
+ @property
64
+ def soup(self):
65
+ if self._soup is None:
66
+ source = self.source_code
67
+ if source is None:
68
+ logging.warning("No source code available; Soup cannot be created.")
69
+ return None
70
+ if isinstance(source, bytes):
71
+ source = source.decode('utf-8', errors='ignore')
72
+ self._soup = get_soup(source_code=source, parse_type=self._parse_type)
73
+ return self._soup
74
+
75
+ def update_url(self, url):
76
+ """Update the URL and reset dependent managers."""
77
+ self._url = url
78
+ self._url_mgr = None
79
+ self._req_mgr = None
80
+ self._soup_mgr = None
81
+ self._source_code = None
82
+ self._soup = None
83
+
84
+ def update_source_code(self, source_code):
85
+ """Update the source code and reset dependent managers."""
86
+ self._source_code = source_code
87
+ self._req_mgr = None
88
+ self._soup_mgr = None
89
+ self._soup = None
90
+
91
+ # Convenience methods for direct access
92
+ def get_all_tools(self):
93
+ """Return a dict with all components (similar to original getters)."""
94
+ return {
95
+ 'url': self.url,
96
+ 'url_mgr': self.url_mgr,
97
+ 'source_code': self.source_code,
98
+ 'req_mgr': self.req_mgr,
99
+ 'soup': self.soup,
100
+ 'soup_mgr': self.soup_mgr
101
+ }
102
+
103
+ def endow_to_manager(self, target_manager, all_tools=None):
104
+ """
105
+ Endow (assign) the attributes from all_tools to the target manager instance.
106
+
107
+ Args:
108
+ target_manager: The instance (e.g., another manager class) to endow attributes to.
109
+ all_tools (dict or None): Optional dict of tools/attributes. If None, uses self.get_all_tools().
110
+ """
111
+ if all_tools is None:
112
+ all_tools = self.get_all_tools()
113
+ for key, value in all_tools.items():
114
+ setattr(target_manager, key, value)
115
+ return target_manager
116
+
117
+ # Wrapper functions for backward compatibility
118
+ def get_url_tools(url=None, url_mgr=None):
119
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr)
120
+ return {'url': mgr.url, 'url_mgr': mgr.url_mgr}
121
+
122
+ def get_req_tools(url=None, url_mgr=None, source_code=None, req_mgr=None):
123
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr)
124
+ return {'url': mgr.url, 'url_mgr': mgr.url_mgr, 'source_code': mgr.source_code, 'req_mgr': mgr.req_mgr}
125
+
126
+ def get_soup_tools(url=None, url_mgr=None, source_code=None, req_mgr=None, soup=None, soup_mgr=None, target_manager=None):
127
+ mgr = UnifiedWebManager(url=url, url_mgr=url_mgr, source_code=source_code, req_mgr=req_mgr, soup_mgr=soup_mgr, soup=soup)
128
+ if target_manager:
129
+ return mgr.endow_to_manager(target_manager)
130
+ return mgr.get_all_tools()
@@ -0,0 +1,2 @@
1
+ from .UnifiedWebManager import *
2
+ from .legacy_tools import *
@@ -0,0 +1,8 @@
1
+ from ..imports import (
2
+ get_url,
3
+ get_soup_mgr,
4
+ get_source,
5
+ get_req_mgr,
6
+ get_soup,
7
+ get_soup_mgr
8
+ )
@@ -1,5 +1,4 @@
1
1
  from ...abstract_webtools import *
2
-
3
2
  from ..userAgentManager import *
4
3
  from ..cipherManager import *
5
4
  from ..sslManager import *
@@ -7,10 +6,14 @@ from ..tlsAdapter import *
7
6
  from ..networkManager import *
8
7
  from ..seleniumManager import *
9
8
  from ..urlManager import *
9
+ logging.basicConfig(level=logging.INFO)
10
+
10
11
  class requestManager:
11
12
  """
12
- SafeRequest is a class for making HTTP requests with error handling and retries.
13
-
13
+ requestManager is a class for making HTTP requests with error handling and retries.
14
+ It supports initializing with a provided source_code without requiring a URL.
15
+ If source_code is provided, it uses that as the response content and skips fetching.
16
+ Enhanced to parse source_code for URLs, PHP blocks, and React/JS data even if not HTML.
14
17
  Args:
15
18
  url (str or None): The URL to make requests to (default is None).
16
19
  url_mgr (urlManager or None): An instance of urlManager (default is None).
@@ -56,8 +59,7 @@ class requestManager:
56
59
  - The SafeRequest class is designed for making HTTP requests with error handling and retries.
57
60
  - It provides methods for authentication, response handling, and error management.
58
61
  """
59
- def __init__(self,
60
- url=None,
62
+ def __init__(self,url=None,
61
63
  source_code=None,
62
64
  url_mgr=None,
63
65
  network_manager=None,
@@ -78,257 +80,299 @@ class requestManager:
78
80
  login_user_agent=None,
79
81
  auth=None,
80
82
  login_url=None,
81
- email = None,
83
+ email=None,
82
84
  password=None,
83
85
  checkbox=None,
84
86
  dropdown=None,
85
87
  certification=None,
86
88
  stream=False,
87
- timeout = None,
89
+ timeout=None,
88
90
  last_request_time=None,
89
91
  max_retries=None,
90
- request_wait_limit=
91
- None):
92
- self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
93
- self.url=get_url(url=url,url_mgr=self.url_mgr)
92
+ request_wait_limit=None):
93
+ self.url_mgr = get_url_mgr(url=url, url_mgr=url_mgr)
94
+ self.url = get_url(url=url, url_mgr=self.url_mgr)
94
95
  self._url_mgr = self.url_mgr
95
- self._url=self.url
96
+ self._url = self.url
96
97
  self.user_agent = user_agent
97
98
  self.user_agent_manager = user_agent_manager or UserAgentManager(user_agent=self.user_agent)
98
- self.headers= headers or self.user_agent_manager.header or {'Accept': '*/*'}
99
- self.user_agent= self.user_agent_manager.user_agent
100
- self.ciphers=ciphers or CipherManager().ciphers_string
101
- self.certification=certification
102
- self.ssl_options=ssl_options
99
+ self.headers = headers or self.user_agent_manager.header or {'Accept': '*/*'}
100
+ self.user_agent = self.user_agent_manager.user_agent
101
+ self.ciphers = ciphers or CipherManager().ciphers_string
102
+ self.certification = certification
103
+ self.ssl_options = ssl_options
103
104
  self.ssl_manager = ssl_manager or SSLManager(ciphers=self.ciphers, ssl_options=self.ssl_options, certification=self.certification)
104
- self.tls_adapter=tls_adapter or TLSAdapter(ssl_manager=self.ssl_manager,certification=self.certification,ssl_options=self.ssl_manager.ssl_options)
105
- self.network_manager= network_manager or NetworkManager(user_agent_manager=self.user_agent_manager,ssl_manager=self.ssl_manager, tls_adapter=self.tls_adapter,user_agent=user_agent,proxies=proxies,cookies=cookies,ciphers=ciphers, certification=certification, ssl_options=ssl_options)
106
- self.stream=stream
107
- self.tls_adapter=self.network_manager.tls_adapter
108
- self.ciphers=self.network_manager.ciphers
109
- self.certification=self.network_manager.certification
110
- self.ssl_options=self.network_manager.ssl_options
111
- self.proxies=self.network_manager.proxies
112
- self.timeout=timeout
113
- self.cookies=self.network_manager.cookies
114
- self.session = session or requests.session()
105
+ self.tls_adapter = tls_adapter or TLSAdapter(ssl_manager=self.ssl_manager, certification=self.certification, ssl_options=self.ssl_manager.ssl_options)
106
+ self.network_manager = network_manager or NetworkManager(user_agent_manager=self.user_agent_manager, ssl_manager=self.ssl_manager, tls_adapter=self.tls_adapter, user_agent=user_agent, proxies=proxies, cookies=cookies, ciphers=ciphers, certification=certification, ssl_options=ssl_options)
107
+ self.stream = stream
108
+ self.tls_adapter = self.network_manager.tls_adapter
109
+ self.ciphers = self.network_manager.ciphers
110
+ self.certification = self.network_manager.certification
111
+ self.ssl_options = self.network_manager.ssl_options
112
+ self.proxies = self.network_manager.proxies
113
+ self.timeout = timeout
114
+ self.cookies = self.network_manager.cookies
115
+ self.session = session or requests.Session()
115
116
  self.auth = auth
116
- self.spec_login=spec_login
117
- self.password=password
117
+ self.spec_login = spec_login
118
+ self.password = password
118
119
  self.email = email
119
- self.checkbox=checkbox
120
- self.dropdown=dropdown
121
- self.login_url=login_url
122
- self.login_user_agent=login_user_agent
123
- self.login_referer=login_referer
124
- self.protocol=protocol or 'https://'
125
-
126
- self.stream=stream if isinstance(stream,bool) else False
120
+ self.checkbox = checkbox
121
+ self.dropdown = dropdown
122
+ self.login_url = login_url
123
+ self.login_user_agent = login_user_agent
124
+ self.login_referer = login_referer
125
+ self.protocol = protocol or 'https://'
126
+ self.stream = stream if isinstance(stream, bool) else False
127
127
  self.initialize_session()
128
- self.last_request_time=last_request_time
128
+ self.last_request_time = last_request_time
129
129
  self.max_retries = max_retries or 3
130
130
  self.request_wait_limit = request_wait_limit or 1.5
131
- self._response=None
132
- self.status_code=None
133
- self.source_code = get_selenium_source(self.url)
134
- self.source_code_bytes=None
131
+ self._response = None
132
+ self.status_code = None
133
+ self.source_code = None
134
+ self.source_code_bytes = None
135
135
  self.source_code_json = {}
136
- self.react_source_code=[]
136
+ self.react_source_code = []
137
+ self.extracted_urls = []
138
+ self.php_blocks = []
137
139
  self._response_data = None
138
- self.process_response_data()
139
- def update_url_mgr(self,url_mgr):
140
- self.url_mgr=url_mgr
140
+ if source_code is not None:
141
+ self._response = source_code
142
+ self.process_response_data()
143
+ else:
144
+ self.re_initialize()
145
+
146
+ def update_url_mgr(self, url_mgr):
147
+ self.url_mgr = url_mgr
141
148
  self.re_initialize()
142
- def update_url(self,url):
149
+
150
+ def update_url(self, url):
143
151
  self.url_mgr.update_url(url=url)
144
152
  self.re_initialize()
153
+
145
154
  def re_initialize(self):
146
- self._response=None
147
- self.make_request()
155
+ self._response = None
156
+ if self.url_mgr.url is not None:
157
+ self.make_request()
148
158
  self.source_code = None
149
- self.source_code_bytes=None
159
+ self.source_code_bytes = None
150
160
  self.source_code_json = {}
151
- self.react_source_code=[]
161
+ self.react_source_code = []
162
+ self.extracted_urls = []
163
+ self.php_blocks = []
152
164
  self._response_data = None
153
165
  self.process_response_data()
166
+
154
167
  @property
155
168
  def response(self):
156
169
  """Lazy-loading of response."""
157
- if self._response is None:
170
+ if self._response is None and self.url_mgr.url is not None:
158
171
  self._response = self.fetch_response()
159
-
160
-
161
172
  return self._response
162
- def authenticate(self,session, login_url=None, email=None, password=None,checkbox=None,dropdown=None):
163
- login_urls = login_url or [self.url_mgr.url,self.url_mgr.domain,self.url_mgr.url_join(url=self.url_mgr.domain,path='login'),self.url_mgr.url_join(url=self.url_mgr.domain,path='auth')]
173
+
174
+ def authenticate(self, session, login_url=None, email=None, password=None, checkbox=None, dropdown=None):
175
+ login_urls = login_url or [self.url_mgr.url, self.url_mgr.domain, self.url_mgr.url_join(url=self.url_mgr.domain, path='login'), self.url_mgr.url_join(url=self.url_mgr.domain, path='auth')]
164
176
  s = session
165
- if not isinstance(login_urls,list):
166
- login_urls=[login_urls]
177
+ if not isinstance(login_urls, list):
178
+ login_urls = [login_urls]
167
179
  for login_url in login_urls:
168
180
  login_url_mgr = urlManager(login_url)
169
181
  login_url = login_url_mgr.url
170
-
171
182
  r = s.get(login_url)
172
183
  soup = BeautifulSoup(r.content, "html.parser")
173
184
  # Find the token or any CSRF protection token
174
185
  token = soup.find('input', {'name': 'token'}).get('value') if soup.find('input', {'name': 'token'}) else None
175
- if token != None:
186
+ if token is not None:
176
187
  break
177
188
  login_data = {}
178
- if email != None:
179
- login_data['email']=email
180
- if password != None:
189
+ if email is not None:
190
+ login_data['email'] = email
191
+ if password is not None:
181
192
  login_data['password'] = password
182
- if checkbox != None:
193
+ if checkbox is not None:
183
194
  login_data['checkbox'] = checkbox
184
- if dropdown != None:
185
- login_data['dropdown']=dropdown
186
- if token != None:
195
+ if dropdown is not None:
196
+ login_data['dropdown'] = dropdown
197
+ if token is not None:
187
198
  login_data['token'] = token
188
199
  s.post(login_url, data=login_data)
189
200
  return s
190
201
 
191
- def fetch_response(self) -> Union[requests.Response, None]:
202
+ def fetch_response(self) -> requests.Response | None | str | bytes:
192
203
  """Actually fetches the response from the server."""
193
- # You can further adapt this method to use retries or other logic you had
194
- # in your original code, but the main goal here is to fetch and return the response
195
204
  return self.try_request()
205
+
196
206
  def spec_auth(self, session=None, email=None, password=None, login_url=None, login_referer=None, login_user_agent=None):
197
- s = session or requests.session()
198
-
199
- domain = self.url_mgr.url_join(self.url_mgr.get_correct_url(self.url_mgr.domain),'login') if login_url is None else login_url
207
+ s = session or requests.Session()
208
+ domain = self.url_mgr.url_join(self.url_mgr.get_correct_url(self.url_mgr.domain), 'login') if login_url is None else login_url
200
209
  login_url = self.url_mgr.get_correct_url(url=domain)
201
-
202
210
  login_referer = login_referer or self.url_mgr.url_join(url=login_url, path='?role=fast&to=&s=1&m=1&email=YOUR_EMAIL')
203
211
  login_user_agent = login_user_agent or 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:50.0) Gecko/20100101 Firefox/50.0'
204
-
205
212
  headers = {"Referer": login_referer, 'User-Agent': login_user_agent}
206
213
  payload = {'email': email, 'pass': password}
207
-
208
214
  page = s.get(login_url)
209
215
  soup = BeautifulSoup(page.content, 'lxml')
210
216
  action_url = soup.find('form')['action']
211
217
  s.post(action_url, data=payload, headers=headers)
212
218
  return s
219
+
213
220
  def initialize_session(self):
214
- s = self.session
221
+ s = self.session
215
222
  if self.auth:
216
- s= self.auth
223
+ s = self.auth
217
224
  elif self.spec_login:
218
- s=self.spec_auth(session=s,email=self.email, password=self.password, login_url=self.login_url, login_referer=self.login_referer, login_user_agent=self.login_user_agent)
225
+ s = self.spec_auth(session=s, email=self.email, password=self.password, login_url=self.login_url, login_referer=self.login_referer, login_user_agent=self.login_user_agent)
219
226
  elif any([self.password, self.email, self.login_url, self.checkbox, self.dropdown]):
220
- s=self.authenticate(session=s, login_url=self.login_url, email=self.email, password=self.password, checkbox=self.checkbox, dropdown=self.dropdown)
227
+ s = self.authenticate(session=s, login_url=self.login_url, email=self.email, password=self.password, checkbox=self.checkbox, dropdown=self.dropdown)
221
228
  s.proxies = self.proxies
222
229
  s.cookies["cf_clearance"] = self.network_manager.cookies
223
230
  s.headers.update(self.headers)
224
231
  s.mount(self.protocol, self.network_manager.tls_adapter)
225
232
  return s
233
+
226
234
  def process_response_data(self):
227
235
  """Processes the fetched response data."""
228
236
  if not self.response:
229
237
  return # No data to process
230
- if isinstance(self.response,str):
231
- self.source_code = self.response
238
+ if isinstance(self.response, (str, bytes)):
239
+ if isinstance(self.response, str):
240
+ self.source_code = self.response
241
+ self.source_code_bytes = self.response.encode('utf-8') # Assume UTF-8
242
+ else:
243
+ self.source_code_bytes = self.response
244
+ try:
245
+ self.source_code = self.response.decode('utf-8')
246
+ except UnicodeDecodeError:
247
+ self.source_code = self.response.decode('latin-1') # Fallback
248
+ # Check if it's JSON
249
+ try:
250
+ data = json.loads(self.source_code)
251
+ self.source_code_json = data.get("response", data)
252
+ except json.JSONDecodeError:
253
+ pass
232
254
  else:
233
255
  self.source_code = self.response.text
234
256
  self.source_code_bytes = self.response.content
235
- if self.response.headers.get('content-type') == 'application/json':
236
- data = convert_to_json(self.source_code)
237
- if data:
257
+ if self.response.headers.get('content-type', '').startswith('application/json'):
258
+ try:
259
+ data = json.loads(self.source_code)
238
260
  self.source_code_json = data.get("response", data)
239
-
240
- self.get_react_source_code()
241
- def get_react_source_code(self) -> list:
242
- """
243
- Fetches the source code of the specified URL and extracts JavaScript and JSX source code (React components).
261
+ except json.JSONDecodeError:
262
+ pass
263
+ self.extract_urls()
264
+ self.extract_php_blocks()
265
+ self.get_react_source_code()
266
+
267
+ def extract_urls(self):
268
+ """Extract URLs from source_code using regex."""
269
+ if not self.source_code:
270
+ return
271
+ url_pattern = r'https?://[^\s<>"\']+'
272
+ self.extracted_urls = re.findall(url_pattern, self.source_code)
244
273
 
245
- Args:
246
- url (str): The URL to fetch the source code from.
274
+ def extract_php_blocks(self):
275
+ """Extract PHP blocks from source_code if present."""
276
+ if not self.source_code:
277
+ return
278
+ php_pattern = r'<\?php(.*?)?\?>'
279
+ self.php_blocks = re.findall(php_pattern, self.source_code, re.DOTALL)
247
280
 
248
- Returns:
249
- list: A list of strings containing JavaScript and JSX source code found in <script> tags.
281
+ def get_react_source_code(self) -> list:
250
282
  """
251
- if self.url_mgr.url is None:
283
+ Extracts JavaScript and JSX source code from <script> tags if HTML-like.
284
+ If not HTML and looks like JS/React code, appends the whole source_code.
285
+ """
286
+ if not self.source_code:
252
287
  return []
253
- soup = BeautifulSoup(self.source_code_bytes,"html.parser")
254
- script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t or 'jsx' in t))
288
+ # Check if likely JS code (e.g., contains 'import', 'function', 'React')
289
+ is_js_like = any(keyword in self.source_code.lower() for keyword in ['import ', 'function ', 'react', 'export ', 'const ', 'let ', 'var '])
290
+ # Check if HTML-like
291
+ is_html_like = self.source_code.strip().startswith('<') or '<html' in self.source_code.lower() or '<!doctype' in self.source_code.lower()
292
+ if not is_html_like and is_js_like:
293
+ self.react_source_code.append(self.source_code)
294
+ return self.react_source_code
295
+ content = self.source_code_bytes or self.source_code.encode('utf-8')
296
+ soup = BeautifulSoup(content, "html.parser")
297
+ script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t.lower() or 'jsx' in t.lower()))
255
298
  for script_tag in script_tags:
256
- self.react_source_code.append(script_tag.string)
299
+ if script_tag.string:
300
+ self.react_source_code.append(script_tag.string)
301
+ # If no scripts found but JS-like, append whole
302
+ if not script_tags and is_js_like:
303
+ self.react_source_code.append(self.source_code)
304
+ return self.react_source_code
257
305
 
258
-
259
- def get_status(url:str=None) -> int:
306
+ def get_status(self, url: str = None) -> int | None:
260
307
  """
261
308
  Gets the HTTP status code of the given URL.
262
-
263
- Args:
264
- url (str): The URL to check the status of.
265
-
266
- Returns:
267
- int: The HTTP status code of the URL, or None if the request fails.
268
309
  """
269
- # Get the status code of the URL
270
- return try_request(url=url).status_code
310
+ url = url or self.url_mgr.url
311
+ if url is None:
312
+ return None
313
+ try:
314
+ response = requests.head(url, timeout=5)
315
+ return response.status_code
316
+ except requests.RequestException:
317
+ return None
318
+
271
319
  def wait_between_requests(self):
272
320
  """
273
321
  Wait between requests based on the request_wait_limit.
274
322
  """
275
323
  if self.last_request_time:
276
- sleep_time = self.request_wait_limit - (get_time_stamp() - self.last_request_time)
324
+ sleep_time = self.request_wait_limit - (time.time() - self.last_request_time)
277
325
  if sleep_time > 0:
278
326
  logging.info(f"Sleeping for {sleep_time:.2f} seconds.")
279
- get_sleep(sleep_time)
327
+ time.sleep(sleep_time)
280
328
 
281
329
  def make_request(self):
282
330
  """
283
331
  Make a request and handle potential errors.
284
332
  """
285
- # Update the instance attributes if they are passed
286
-
333
+ if self.url_mgr.url is None:
334
+ return None
287
335
  self.wait_between_requests()
288
336
  for _ in range(self.max_retries):
289
337
  try:
290
- self.try_request() # 10 seconds timeout
291
- if self.response:
292
- self.status_code = self.response.status_code
293
- if self.response.status_code == 200:
294
- self.last_request_time = get_time_stamp()
295
- return self.response
296
- elif self.response.status_code == 429:
297
- logging.warning(f"Rate limited by {self.url_mgr.url}. Retrying...")
298
- get_sleep(5) # adjust this based on the server's rate limit reset time
338
+ self._response = self.try_request()
339
+ if self._response:
340
+ if not isinstance(self._response, (str, bytes)):
341
+ self.status_code = self._response.status_code
342
+ if self._response.status_code == 200:
343
+ self.last_request_time = time.time()
344
+ return self._response
345
+ elif self._response.status_code == 429:
346
+ logging.warning(f"Rate limited by {self.url_mgr.url}. Retrying...")
347
+ time.sleep(5)
299
348
  except requests.Timeout as e:
300
- logging.error(f"Request to {cleaned_url} timed out: {e}")
349
+ logging.error(f"Request to {self.url_mgr.url} timed out: {e}")
301
350
  except requests.ConnectionError:
302
351
  logging.error(f"Connection error for URL {self.url_mgr.url}.")
303
- except requests.Timeout:
304
- logging.error(f"Request timeout for URL {self.url_mgr.url}.")
305
352
  except requests.RequestException as e:
306
353
  logging.error(f"Request exception for URL {self.url_mgr.url}: {e}")
307
354
  try:
308
355
  response = get_selenium_source(self.url_mgr.url)
309
356
  if response:
310
- self.response = response
311
- return self.response
312
- except:
313
- logging.error(f"Failed to retrieve content from {self.url_mgr.url} after {self.max_retries} retries.")
357
+ self._response = response
358
+ self.status_code = 200 # Assume success
359
+ return self._response
360
+ except Exception as e:
361
+ logging.error(f"Failed to retrieve content from {self.url_mgr.url} after {self.max_retries} retries: {e}")
314
362
  return None
315
- def try_request(self) -> Union[requests.Response, None]:
363
+
364
+ def try_request(self) -> requests.Response | str | bytes | None:
316
365
  """
317
366
  Tries to make an HTTP request to the given URL using the provided session.
318
-
319
- Args:
320
- timeout (int): Timeout for the request.
321
-
322
- Returns:
323
- requests.Response or None: The response object if the request is successful, or None if the request fails.
324
367
  """
368
+ if self.url_mgr.url is None:
369
+ return None
325
370
  try:
326
- return get_selenium_source(self.url_mgr.url)#self.session.get(url=self.url_mgr.url, timeout=self.timeout,stream=self.stream)
327
- except requests.exceptions.RequestException as e:
328
- print(e)
371
+ return get_selenium_source(self.url_mgr.url) # or self.session.get(self.url_mgr.url, timeout=self.timeout, stream=self.stream)
372
+ except requests.RequestException as e:
373
+ logging.error(f"Request failed: {e}")
329
374
  return None
330
375
 
331
-
332
376
  @property
333
377
  def url(self):
334
378
  return self.url_mgr.url
@@ -345,12 +389,11 @@ class SafeRequestSingleton:
345
389
  elif SafeRequestSingleton._instance.url != url or SafeRequestSingleton._instance.headers != headers or SafeRequestSingleton._instance.max_retries != max_retries or SafeRequestSingleton._instance.request_wait_limit != request_wait_limit:
346
390
  SafeRequestSingleton._instance = SafeRequest(url,url_mgr=urlManagerSingleton,headers=headers,max_retries=max_retries,last_request_time=last_request_time,request_wait_limit=request_wait_limit)
347
391
  return SafeRequestSingleton._instance
392
+ def get_source(url=None,url_mgr=None,source_code=None,req_mgr=None):
393
+ req_mgr = get_req_mgr(req_mgr=req_mgr,url=url,url_mgr=url_mgr,source_code=source_code)
394
+ return req_mgr.source_code
348
395
  def get_req_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None):
349
396
  url = get_url(url=url,url_mgr=url_mgr)
350
397
  url_mgr = get_url_mgr(url=url,url_mgr=url_mgr )
351
398
  req_mgr = req_mgr or requestManager(url_mgr=url_mgr,url=url,source_code=source_code)
352
399
  return req_mgr
353
- def get_source(url=None,url_mgr=None,source_code=None,req_mgr=None):
354
- # Placeholder for actual implementation.
355
- req_mgr = get_req_mgr(req_mgr=req_mgr,url=url,url_mgr=url_mgr,source_code=source_code)
356
- return req_mgr.source_code
@@ -39,7 +39,7 @@ class soupManager:
39
39
  - The SoupManager class is designed for parsing HTML source code using BeautifulSoup.
40
40
  - It provides various methods to extract data and discover elements within the source code.
41
41
  """
42
- def __init__(self,url=None,source_code=None,url_mgr=None,req_mgr=None, parse_type="html.parser"):
42
+ def __init__(self,url=None,source_code=None,url_mgr=None,req_mgr=None,soup=None, parse_type="html.parser"):
43
43
  self.soup=[]
44
44
  url = get_url(url=url,url_mgr=url_mgr)
45
45
  self.url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
@@ -49,8 +49,8 @@ class soupManager:
49
49
  source_code = source_code or self.req_mgr.source_code or self.req_mgr.source_code_bytes
50
50
  if source_code:
51
51
  source_code = str(source_code)
52
- self.source_code = source_code
53
- self.soup= BeautifulSoup(self.source_code, self.parse_type)
52
+ self.source_code = source_code or ''
53
+ self.soup= soup or BeautifulSoup(self.source_code, self.parse_type)
54
54
  self.all_tags_and_attribute_names = self.get_all_tags_and_attribute_names()
55
55
  self.all_tags = self.all_tags_and_attribute_names.get('tags')
56
56
  self.all_attribute_names = self.all_tags_and_attribute_names.get('attributes')
@@ -340,16 +340,8 @@ class SoupManagerSingleton():
340
340
  elif parse_type != SoupManagerSingleton._instance.parse_type or source_code != SoupManagerSingleton._instance.source_code:
341
341
  SoupManagerSingleton._instance = SoupManager(url_mgr,requestManager,parse_type=parse_type,source_code=source_code)
342
342
  return SoupManagerSingleton._instance
343
- def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,parse_type="html.parser"):
344
- url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
345
- url = get_url(url=url,url_mgr=url_mgr)
346
- req_mgr = get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
347
- soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code)
348
- return soup_mgr
349
- def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,tags_list = None,parse_type="html.parser"):
350
- soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
351
- return soup_mgr.get_all_attribute_values(tags_list=tags_list)
352
- def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,parse_type="html.parser"):
343
+ def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,soup=None,parse_type=None):
344
+ parse_type = parse_type or "html.parser"
353
345
  if source_code or soup_mgr:
354
346
  if soup_mgr:
355
347
  return soup_mgr.soup
@@ -358,5 +350,16 @@ def get_soup(url=None,url_mgr=None,req_mgr=None,source_code=None,soup_mgr=None,p
358
350
  url = get_url(url=url,url_mgr=url_mgr)
359
351
  req_mgr = req_mgr or get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
360
352
  source_code = req_mgr.source_code
361
- soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr)
353
+ soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr,soup=soup)
362
354
  return soup_mgr.soup
355
+ def get_soup_mgr(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,soup=None,parse_type=None):
356
+ parse_type = parse_type or "html.parser"
357
+ url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
358
+ url = get_url(url=url,url_mgr=url_mgr)
359
+ req_mgr = get_req_mgr(url_mgr=url_mgr,url=url,source_code=source_code)
360
+ soup_mgr = soup_mgr or soupManager(url_mgr=url_mgr,req_mgr=req_mgr,url=url,source_code=source_code,soup=soup)
361
+ return soup_mgr
362
+ def get_all_attribute_values(url=None,url_mgr=None,source_code=None,req_mgr=None,soup_mgr=None,soup=None,tags_list = None,parse_type=None):
363
+ parse_type = parse_type or "html.parser"
364
+ soup_mgr = get_soup_mgr(url=url,url_mgr=url_mgr,source_code=source_code,req_mgr=req_mgr,soup_mgr=soup_mgr,soup=soup)
365
+ return soup_mgr.get_all_attribute_values(tags_list=tags_list)
@@ -17,21 +17,21 @@ class urlManager:
17
17
  Now handles url=None gracefully: sets internals to None/empty and methods return None or empty values without errors.
18
18
  """
19
19
  def __init__(self, url=None, session=None):
20
- self._url = url # Allow None
21
- self.session = session or requests.Session()
22
- if self._url is None:
23
- self.clean_urls = []
24
- self.url = None
25
- self.protocol = None
26
- self.domain = None
27
- self.path = ""
28
- self.query = ""
29
- self.all_urls = []
30
- else:
31
- self.clean_urls = self.clean_url()
32
- self.url = self.get_correct_url() or self._url
33
- self.protocol, self.domain, self.path, self.query = self.url_to_pieces(self.url)
34
- self.all_urls = []
20
+ self._url = url # Allow None
21
+ self.session = session or requests.Session()
22
+ if self._url is None:
23
+ self.clean_urls = []
24
+ self.url = None
25
+ self.protocol = None
26
+ self.domain = None
27
+ self.path = ""
28
+ self.query = ""
29
+ self.all_urls = []
30
+ else:
31
+ self.clean_urls = self.clean_url()
32
+ self.url = self.get_correct_url() or self._url
33
+ self.protocol, self.domain, self.path, self.query = self.url_to_pieces(self.url)
34
+ self.all_urls = []
35
35
 
36
36
  def url_to_pieces(self, url):
37
37
  """
@@ -233,17 +233,9 @@ class urlManagerSingleton:
233
233
  elif urlManagerSingleton._instance.session != session or urlManagerSingleton._instance.url != url:
234
234
  urlManagerSingleton._instance = urlManager(url, session=session)
235
235
  return urlManagerSingleton._instance
236
+ def get_url(url=None,url_mgr=None):
237
+ url_mgr = get_url_mgr(url=url,url_mgr=url_mgr)
238
+ return url_mgr.url
239
+ def get_url_mgr(url=None,url_mgr=None):
240
+ return url_mgr or urlManager(url)
236
241
 
237
- def get_url(url=None, url_mgr=None):
238
- if not url and not url_mgr:
239
- return None
240
- if url_mgr is None and url is not None:
241
- url_mgr = urlManager(url)
242
- return url_mgr.url if url_mgr else None
243
-
244
- def get_url_mgr(url=None, url_mgr=None):
245
- if url_mgr is None:
246
- url_mgr = urlManager(url=url) # Always create instance, even if url=None
247
- if url_mgr and url is None:
248
- url = url_mgr.url
249
- return url_mgr
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.139
3
+ Version: 0.1.6.141
4
4
  Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
6
6
  Author: putkoff
@@ -10,7 +10,7 @@ abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,132
10
10
  abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
11
11
  abstract_webtools/url_grabber.py,sha256=pnCCev7ZIuM-6cAGTLmK5HfzZg_AX-fLcRpB6ZE70B8,10441
12
12
  abstract_webtools/url_grabber_new.py,sha256=xb23qo4anOY0Ax3CAfaHJ8s5VEz61Sinh-XpEDFW7Is,3621
13
- abstract_webtools/managers/__init__.py,sha256=9pgy52NB-ONxLqoCRF52GZ6G7GM6Uc0-fgA1HvKcwxc,407
13
+ abstract_webtools/managers/__init__.py,sha256=d7Q6_McRuKOHmKuna19s0l1wMgtM1JgUX8rHaSqJIcE,436
14
14
  abstract_webtools/managers/allss\.py,sha256=IBhlyRQHfK-BtwUnSEbIPqlI1MtZ8-XsdaHv0b91HQ0,269
15
15
  abstract_webtools/managers/cipherManager.py,sha256=NHQGdR11eNSm-1H-GezD5dyQgsPTJwY5kczt8Sher2s,1621
16
16
  abstract_webtools/managers/crawlManager.py,sha256=62Ej6AQC6-qXX_EWOmcJ2szNvEjmebFGugMz65HF1qI,12983
@@ -30,16 +30,22 @@ abstract_webtools/managers/videoDownloader2.py,sha256=v3H6akdhvVWGrB-r35m3cp_-aK
30
30
  abstract_webtools/managers/clownworld/__init__.py,sha256=eq25euhRbFqHLm1ibi_7FGz_oNWs-kkyAkETzK3r4_Q,35
31
31
  abstract_webtools/managers/clownworld/get_bolshevid_video.py,sha256=dNZdOxhXSA13DWFjdSOmvYrI3HybkrrvTBaMDbJfhfo,10140
32
32
  abstract_webtools/managers/linkManager/__init__.py,sha256=NpfWNzvTLSfsIWSeLYIxPzeLHADk_grSx5rfgCeWERw,27
33
- abstract_webtools/managers/linkManager/linkManager.py,sha256=bW7KmThqTOXPXj_IV0GDNTlDzxXomvr_puBBmt94Y1g,12057
33
+ abstract_webtools/managers/linkManager/linkManager.py,sha256=QrAJq-Zt907jnsm2P9si8SQ5O5QrXor7Jn5W5rge4xU,12662
34
+ abstract_webtools/managers/middleManager/__init__.py,sha256=RLLS1CxPpixIiV50P6tFaJcQ9C2O3lz19I4EDMc_4rE,19
35
+ abstract_webtools/managers/middleManager/imports.py,sha256=T0cdlABayG64RI4PnDRf7gwLvcQ5owobD0EdaD0Fcuc,334
36
+ abstract_webtools/managers/middleManager/src/UnifiedWebManage3r.py,sha256=j_EBd2QkGFTLBKUen9k-mRWHfT6NwtFfoFCI-AagKtA,5442
37
+ abstract_webtools/managers/middleManager/src/UnifiedWebManager.py,sha256=qYCvfjUbyXrJEvOEqX7SkW7qyoaXP641DUno0N2ivN8,5022
38
+ abstract_webtools/managers/middleManager/src/__init__.py,sha256=YaSAh7AG1EvFWFZBIe4pGvzmfr60rpR9ZDWoQKqAMd0,61
39
+ abstract_webtools/managers/middleManager/src/legacy_tools.py,sha256=2cCnRaq8UO7HdtffNtAOsZFJm_mpZbpvBuX0pIIWGaM,125
34
40
  abstract_webtools/managers/requestManager/__init__.py,sha256=z2qGtweEoO_OKr959LGxVXEMu1hu7PIkmh89BEh5TI8,30
35
- abstract_webtools/managers/requestManager/requestManager.py,sha256=g3kqaGRXu5ZR8rj58xN32fBVPpDXI_CRuW3tJgnbznE,17459
41
+ abstract_webtools/managers/requestManager/requestManager.py,sha256=26BdfGrkWq2ouDaf0P8HTVK46PtPZJHUO46lIZgd8D8,19768
36
42
  abstract_webtools/managers/soupManager/__init__.py,sha256=mqfXfqM9sWlYpOkoXUqtBoVvk2KQx1862NnmRVJwGtY,27
37
43
  abstract_webtools/managers/soupManager/asoueces.py,sha256=OaXqolZl0dI7b09NYwJ3Wnhuxf89ahZ1GjsOqy0GXfk,3506
38
- abstract_webtools/managers/soupManager/soupManager.py,sha256=U3_o189-OWoBRaSCe2sIkg-bHxBt2mKpYMyZd-nJjLQ,17201
44
+ abstract_webtools/managers/soupManager/soupManager.py,sha256=75gwqVXIRwgVqzATBC-DiJF2AT_AdE6FSBWy3DbW5ZA,17393
39
45
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
40
46
  abstract_webtools/managers/urlManager/urlManager (Copy).py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
41
- abstract_webtools/managers/urlManager/urlManager.py,sha256=LG8WiEsf16vMzum48D5rmbRNK6VzYG4FyOTr2FJiOEc,9133
42
- abstract_webtools-0.1.6.139.dist-info/METADATA,sha256=hCJLYjOtzk6EDrOMGvw3LklUZphmj-gO3GpMLZKHJjQ,7289
43
- abstract_webtools-0.1.6.139.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- abstract_webtools-0.1.6.139.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
45
- abstract_webtools-0.1.6.139.dist-info/RECORD,,
47
+ abstract_webtools/managers/urlManager/urlManager.py,sha256=vY4KQXtcrlC2YtlultxQpVe581l5kAuT5VGA0WrI16g,8945
48
+ abstract_webtools-0.1.6.141.dist-info/METADATA,sha256=B2mQsvg0AVXNBUQADJ9ttFvCh0YleyRUjvFmcPfCpNo,7289
49
+ abstract_webtools-0.1.6.141.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
+ abstract_webtools-0.1.6.141.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
51
+ abstract_webtools-0.1.6.141.dist-info/RECORD,,