abstract-webtools 0.1.6.146__tar.gz → 0.1.6.148__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. abstract_webtools-0.1.6.148/PKG-INFO +482 -0
  2. abstract_webtools-0.1.6.148/README.md +445 -0
  3. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/pyproject.toml +0 -2
  4. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/setup.cfg +0 -0
  5. abstract_webtools-0.1.6.148/setup.py +45 -0
  6. abstract_webtools-0.1.6.148/src/abstract_webtools/__init__.py +9 -0
  7. abstract_webtools-0.1.6.148/src/abstract_webtools/abstract_webtools.py +1869 -0
  8. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/big_user_agent_list.py +0 -0
  9. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/main.py +0 -0
  10. abstract_webtools-0.1.6.148/src/abstract_webtools/managers/seleneumManager.py +242 -0
  11. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/soup_gui.py +0 -0
  12. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/url_grabber.py +73 -1
  13. abstract_webtools-0.1.6.148/src/abstract_webtools.egg-info/PKG-INFO +482 -0
  14. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools.egg-info/SOURCES.txt +2 -1
  15. abstract_webtools-0.1.6.148/src/abstract_webtools.egg-info/requires.txt +15 -0
  16. abstract_webtools-0.1.6.146/PKG-INFO +0 -196
  17. abstract_webtools-0.1.6.146/README.md +0 -168
  18. abstract_webtools-0.1.6.146/setup.py +0 -28
  19. abstract_webtools-0.1.6.146/src/abstract_webtools/__init__.py +0 -3
  20. abstract_webtools-0.1.6.146/src/abstract_webtools/abstract_webtools.py +0 -101
  21. abstract_webtools-0.1.6.146/src/abstract_webtools.egg-info/PKG-INFO +0 -196
  22. abstract_webtools-0.1.6.146/src/abstract_webtools.egg-info/requires.txt +0 -4
  23. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/abstract_usurpit.py +0 -0
  24. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/domain_identifier.py +0 -0
  25. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/extention_list.py +0 -0
  26. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/find_dirs.py +0 -0
  27. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/k2s_downloader.py +0 -0
  28. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/__init__.py +0 -0
  29. /abstract_webtools-0.1.6.146/src/abstract_webtools/managers/allss//.py" → /abstract_webtools-0.1.6.148/src/abstract_webtools/managers/allss.py +0 -0
  30. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/cipherManager.py +0 -0
  31. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/clownworld/__init__.py +0 -0
  32. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/clownworld/get_bolshevid_video.py +0 -0
  33. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/crawlManager.py +0 -0
  34. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
  35. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/curlMgr.py +0 -0
  36. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/domainManager.py +0 -0
  37. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
  38. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/get_test.py +0 -0
  39. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
  40. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/linkManager/linkManager.py +0 -0
  41. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/meta_dump.py +0 -0
  42. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/middleManager/__init__.py +0 -0
  43. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/middleManager/imports.py +0 -0
  44. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/middleManager/src/UnifiedWebManage3r.py +0 -0
  45. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/middleManager/src/UnifiedWebManager.py +0 -0
  46. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/middleManager/src/__init__.py +0 -0
  47. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/middleManager/src/legacy_tools.py +0 -0
  48. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/mySocketClient.py +0 -0
  49. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/networkManager.py +0 -0
  50. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
  51. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/requestManager/requestManager.py +0 -0
  52. /abstract_webtools-0.1.6.146/src/abstract_webtools/managers/seleneumManager.py → /abstract_webtools-0.1.6.148/src/abstract_webtools/managers/seleniumManager.py +0 -0
  53. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
  54. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
  55. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/soupManager/soupManager.py +0 -0
  56. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/sslManager.py +0 -0
  57. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
  58. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
  59. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/urlManager/urlManager (Copy).py +0 -0
  60. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
  61. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/userAgentManager.py +0 -0
  62. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/videoDownloader.py +0 -0
  63. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
  64. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools/url_grabber_new.py +0 -0
  65. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  66. {abstract_webtools-0.1.6.146 → abstract_webtools-0.1.6.148}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -0,0 +1,482 @@
1
+ Metadata-Version: 2.4
2
+ Name: abstract_webtools
3
+ Version: 0.1.6.148
4
+ Summary: Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.
5
+ Home-page: https://github.com/AbstractEndeavors/abstract_webtools
6
+ Author: putkoff
7
+ Author-email: partners@abstractendeavors.com
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: requests>=2.31.0
16
+ Requires-Dist: urllib3>=2.0.4
17
+ Requires-Dist: beautifulsoup4>=4.12.0
18
+ Provides-Extra: gui
19
+ Requires-Dist: PySimpleGUI>=4.60.5; extra == "gui"
20
+ Requires-Dist: PyQt5>=5.15.0; extra == "gui"
21
+ Provides-Extra: drivers
22
+ Requires-Dist: selenium>=4.15.2; extra == "drivers"
23
+ Requires-Dist: webdriver-manager>=4.0.0; extra == "drivers"
24
+ Provides-Extra: media
25
+ Requires-Dist: yt-dlp>=2024.4.9; extra == "media"
26
+ Requires-Dist: m3u8>=4.0.0; extra == "media"
27
+ Dynamic: author
28
+ Dynamic: author-email
29
+ Dynamic: classifier
30
+ Dynamic: description
31
+ Dynamic: description-content-type
32
+ Dynamic: home-page
33
+ Dynamic: provides-extra
34
+ Dynamic: requires-dist
35
+ Dynamic: requires-python
36
+ Dynamic: summary
37
+
38
+ <<<<<<< HEAD
39
+ =======
40
+
41
+ >>>>>>> ba4baf2 (Deploy version 0.1.6.147 at 2025-09-07 09:40:38 UTC)
42
+ # Abstract WebTools
43
+ Provides utilities for inspecting and parsing web content, including React components and URL utilities, with enhanced capabilities for managing HTTP requests and TLS configurations.
44
+
45
+ - **Features**:
46
+ - URL Validation: Ensures URL correctness and attempts different URL variations.
47
+ - HTTP Request Manager: Custom HTTP request handling, including tailored user agents and improved TLS security through a custom adapter.
48
+ - Source Code Acquisition: Retrieves the source code of specified websites.
49
+ - React Component Parsing: Extracts JavaScript and JSX source code from web pages.
50
+ - Comprehensive Link Extraction: Collects all internal links from a specified website.
51
+ - Web Content Analysis: Extracts and categorizes various web content components such as HTML elements, attribute values, attribute names, and class names.
52
+
53
+ ### abstract_webtools.py
54
+ **Description:**
55
+ Abstract WebTools offers a suite of utilities designed for web content inspection and parsing. One of its standout features is its ability to analyze URLs, ensuring their validity and automatically attempting different URL variations to obtain correct website access. It boasts a custom HTTP request management system that tailors user-agent strings and employs a specialized TLS adapter for heightened security. The toolkit also provides robust capabilities for extracting source code, including detecting React components on web pages. Additionally, it offers functionalities for extracting all internal website links and performing in-depth web content analysis. This makes Abstract WebTools an indispensable tool for web developers, cybersecurity professionals, and digital analysts.
56
+ ![image](https://github.com/AbstractEndeavors/abstract_essentials/assets/57512254/0451d8ea-996f-4de5-9e6c-92a606aae4ef)
57
+
58
+ - **Dependencies**:
59
+ - `requests`
60
+ - `ssl`
61
+ - `HTTPAdapter` from `requests.adapters`
62
+ - `PoolManager` from `urllib3.poolmanager`
63
+ - `ssl_` from `urllib3.util`
64
+ - `urlparse`, `urljoin` from `urllib.parse`
65
+ - `BeautifulSoup` from `bs4`
66
+
67
+
68
+ # UrlManager
69
+
70
+ The `UrlManager` is a Python class designed to handle and manipulate URLs. It provides methods for cleaning and normalizing URLs, determining the correct version of a URL, extracting URL components, and more. This class is particularly useful for web scraping, web crawling, or any application where URL management is essential.
71
+
72
+ ## Usage
73
+
74
+ To use the `UrlManager` class, first import it into your Python script:
75
+
76
+ ```python
77
+ from abstract_webtools import UrlManager
78
+ ```
79
+
80
+ ### Initializing a UrlManager Object
81
+
82
+ You can create a `UrlManager` object by providing an initial URL and an optional `requests` session. If no URL is provided, it defaults to 'www.example.com':
83
+
84
+ ```python
85
+ url_manager = UrlManager(url='https://www.example.com')
86
+ ```
87
+
88
+ ### URL Cleaning and Normalization
89
+
90
+ The `clean_url` method takes a URL and returns a list of potential URL variations, including versions with and without 'www.', 'http://', and 'https://':
91
+
92
+ ```python
93
+ cleaned_urls = url_manager.clean_url()
94
+ ```
95
+
96
+ ### Getting the Correct URL
97
+
98
+ The `get_correct_url` method tries each possible URL variation with an HTTP request to determine the correct version of the URL:
99
+
100
+ ```python
101
+ correct_url = url_manager.get_correct_url()
102
+ ```
103
+
104
+ ### Updating the URL
105
+
106
+ You can update the URL associated with the `UrlManager` object using the `update_url` method:
107
+
108
+ ```python
109
+ url_manager.update_url('https://www.example2.com')
110
+ ```
111
+
112
+ ### Extracting URL Components
113
+
114
+ The `url_to_pieces` method extracts various components of the URL, such as protocol, domain name, path, and query:
115
+
116
+ ```python
117
+ url_manager.url_to_pieces()
118
+ print(url_manager.protocol)
119
+ print(url_manager.domain_name)
120
+ print(url_manager.path)
121
+ print(url_manager.query)
122
+ ```
123
+
124
+ ### Additional Utility Methods
125
+
126
+ - `get_domain_name(url)`: Returns the domain name (netloc) of a given URL.
127
+ - `is_valid_url(url)`: Checks if a URL is valid.
128
+ - `make_valid(href, url)`: Ensures a relative or incomplete URL is valid by joining it with a base URL.
129
+ - `get_relative_href(url, href)`: Converts a relative URL to an absolute URL based on a base URL.
130
+
131
+ ## Compatibility Note
132
+
133
+ The `get_domain` method is kept for compatibility but is inconsistent. Use it only for "webpage_url_domain." Similarly, `url_basename`, `base_url`, and `urljoin` methods are available for URL manipulation.
134
+
135
+ ## Example
136
+
137
+ Here's a quick example of using the `UrlManager` class:
138
+
139
+ ```python
140
+ from abstract_webtools import UrlManager
141
+
142
+ url_manager = UrlManager(url='https://www.example.com')
143
+ cleaned_urls = url_manager.clean_url()
144
+ correct_url = url_manager.get_correct_url()
145
+ url_manager.update_url('https://www.example2.com')
146
+
147
+ print(f"Cleaned URLs: {cleaned_urls}")
148
+ print(f"Correct URL: {correct_url}")
149
+ ```
150
+
151
+ ## Dependencies
152
+
153
+ The `UrlManager` class relies on the `requests` library for making HTTP requests. Ensure you have the `requests` library installed in your Python environment.
154
+ # SafeRequest
155
+
156
+ The `SafeRequest` class is a versatile Python utility designed to handle HTTP requests with enhanced safety features. It integrates with other managers like `UrlManager`, `NetworkManager`, and `UserAgentManager` to manage various aspects of the request, such as user-agent, SSL/TLS settings, proxies, headers, and more.
157
+
158
+ ## Usage
159
+
160
+ To use the `SafeRequest` class, first import it into your Python script:
161
+
162
+ ```python
163
+ from abstract_webtools import SafeRequest
164
+ ```
165
+
166
+ ### Initializing a SafeRequest Object
167
+
168
+ You can create a `SafeRequest` object with various configuration options. By default, it uses sensible default values, but you can customize it as needed:
169
+
170
+ ```python
171
+ safe_request = SafeRequest(url='https://www.example.com')
172
+ ```
173
+
174
+ ### Updating URL and UrlManager
175
+
176
+ You can update the URL associated with the `SafeRequest` object using the `update_url` method, which also updates the underlying `UrlManager`:
177
+
178
+ ```python
179
+ safe_request.update_url('https://www.example2.com')
180
+ ```
181
+
182
+ You can also update the `UrlManager` directly:
183
+
184
+ ```python
185
+ from url_manager import UrlManager
186
+
187
+ url_manager = UrlManager(url='https://www.example3.com')
188
+ safe_request.update_url_manager(url_manager)
189
+ ```
190
+
191
+ ### Making HTTP Requests
192
+
193
+ The `SafeRequest` class handles making HTTP requests using the `try_request` method. It handles retries, timeouts, and rate limiting:
194
+
195
+ ```python
196
+ response = safe_request.try_request()
197
+ if response:
198
+ # Process the response here
199
+ ```
200
+
201
+ ### Accessing Response Data
202
+
203
+ You can access the response data in various formats:
204
+
205
+ - `safe_request.source_code`: HTML source code as a string.
206
+ - `safe_request.source_code_bytes`: HTML source code as bytes.
207
+ - `safe_request.source_code_json`: JSON data from the response (if the content type is JSON).
208
+ - `safe_request.react_source_code`: JavaScript and JSX source code extracted from `<script>` tags.
209
+
210
+ ### Customizing Request Configuration
211
+
212
+ The `SafeRequest` class provides several options for customizing the request, such as headers, user-agent, proxies, SSL/TLS settings, and more. These can be set during initialization or updated later.
213
+
214
+ ### Handling Rate Limiting
215
+
216
+ The class can handle rate limiting scenarios by implementing rate limiters and waiting between requests.
217
+
218
+ ### Error Handling
219
+
220
+ The `SafeRequest` class handles various request-related exceptions and provides error messages for easier debugging.
221
+
222
+ ## Dependencies
223
+
224
+ The `SafeRequest` class relies on the `requests` library for making HTTP requests. Ensure you have the `requests` library installed in your Python environment:
225
+
226
+ ```bash
227
+ pip install requests
228
+ ```
229
+
230
+ ## Example
231
+
232
+ Here's a quick example of using the `SafeRequest` class:
233
+
234
+ ```python
235
+ from abstract_webtools import SafeRequest
236
+
237
+ safe_request = SafeRequest(url='https://www.example.com')
238
+ response = safe_request.try_request()
239
+ if response:
240
+ print(f"Response status code: {response.status_code}")
241
+ print(f"HTML source code: {safe_request.source_code}")
242
+ ```
243
+
244
+ # SoupManager
245
+
246
+ The `SoupManager` class is a Python utility designed to simplify web scraping by providing easy access to the BeautifulSoup library. It allows you to parse and manipulate HTML or XML source code from a URL or provided source code.
247
+
248
+ ## Usage
249
+
250
+ To use the `SoupManager` class, first import it into your Python script:
251
+
252
+ ```python
253
+ from abstract_webtools import SoupManager
254
+ ```
255
+
256
+ ### Initializing a SoupManager Object
257
+
258
+ You can create a `SoupManager` object with various configuration options. By default, it uses sensible default values, but you can customize it as needed:
259
+
260
+ ```python
261
+ soup_manager = SoupManager(url='https://www.example.com')
262
+ ```
263
+
264
+ ### Updating URL and Request Manager
265
+
266
+ You can update the URL associated with the `SoupManager` object using the `update_url` method, which also updates the underlying `UrlManager` and `SafeRequest`:
267
+
268
+ ```python
269
+ soup_manager.update_url('https://www.example2.com')
270
+ ```
271
+
272
+ You can also update the source code directly:
273
+
274
+ ```python
275
+ source_code = '<html>...</html>'
276
+ soup_manager.update_source_code(source_code)
277
+ ```
278
+
279
+ ### Accessing and Parsing HTML
280
+
281
+ The `SoupManager` class provides easy access to the BeautifulSoup object, allowing you to search, extract, and manipulate HTML elements easily. You can use methods like `find_all`, `get_class`, `has_attributes`, and more to work with the HTML content.
282
+
283
+ ```python
284
+ elements = soup_manager.find_all(tag='a')
285
+ ```
286
+
287
+ ### Extracting Links
288
+
289
+ The class also includes methods for extracting all website links from the HTML source code:
290
+
291
+ ```python
292
+ all_links = soup_manager.all_links
293
+ ```
294
+
295
+ ### Extracting Meta Tags
296
+
297
+ You can extract meta tags from the HTML source code using the `meta_tags` property:
298
+
299
+ ```python
300
+ meta_tags = soup_manager.meta_tags
301
+ ```
302
+
303
+ ### Customizing Parsing
304
+
305
+ You can customize the parsing behavior by specifying the parser type during initialization or updating it:
306
+
307
+ ```python
308
+ soup_manager.update_parse_type('lxml')
309
+ <<<<<<< HEAD
310
+ =======
311
+ # Unknown Package (vUnknown Version)
312
+ ```
313
+ No description available
314
+
315
+ ## Installation
316
+
317
+ ```bash
318
+ pip install Unknown Package
319
+ >>>>>>> ba4baf2 (Deploy version 0.1.6.147 at 2025-09-07 09:40:38 UTC)
320
+ ```
321
+
322
+ ## Dependencies
323
+
324
+ The `SoupManager` class relies on the `BeautifulSoup` library for parsing HTML or XML. Ensure you have the `beautifulsoup4` library installed in your Python environment:
325
+
326
+ ```bash
327
+ pip install beautifulsoup4
328
+ ```
329
+
330
+ ## Example
331
+
332
+ Here's a quick example of using the `SoupManager` class:
333
+
334
+ ```python
335
+ from abstract_webtools import SoupManager
336
+
337
+ soup_manager = SoupManager(url='https://www.example.com')
338
+ all_links = soup_manager.all_links
339
+ print(f"All Links: {all_links}")
340
+ ```
341
+ # LinkManager
342
+
343
+ The `LinkManager` class is a Python utility designed to simplify the extraction and management of links (URLs) and associated data from HTML source code. It leverages other classes like `UrlManager`, `SafeRequest`, and `SoupManager` to facilitate link extraction and manipulation.
344
+
345
+ ## Usage
346
+
347
+ To use the `LinkManager` class, first import it into your Python script:
348
+
349
+ ```python
350
+ from abstract_webtools import LinkManager
351
+ ```
352
+
353
+ ### Initializing a LinkManager Object
354
+
355
+ You can create a `LinkManager` object with various configuration options. By default, it uses sensible default values, but you can customize it as needed:
356
+
357
+ ```python
358
+ link_manager = LinkManager(url='https://www.example.com')
359
+ ```
360
+
361
+ ### Updating URL and Request Manager
362
+
363
+ You can update the URL associated with the `LinkManager` object using the `update_url` method, which also updates the underlying `UrlManager`, `SafeRequest`, and `SoupManager`:
364
+
365
+ ```python
366
+ link_manager.update_url('https://www.example2.com')
367
+ ```
368
+
369
+ ### Accessing Extracted Links
370
+
371
+ The `LinkManager` class provides easy access to extracted links and associated data:
372
+
373
+ ```python
374
+ all_links = link_manager.all_desired_links
375
+ ```
376
+
377
+ ### Customizing Link Extraction
378
+
379
+ You can customize the link extraction behavior by specifying various parameters during initialization or updating them:
380
+
381
+ ```python
382
+ link_manager.update_desired(
383
+ img_attr_value_desired=['thumbnail', 'image'],
384
+ img_attr_value_undesired=['icon'],
385
+ link_attr_value_desired=['blog', 'article'],
386
+ link_attr_value_undesired=['archive'],
387
+ image_link_tags='img',
388
+ img_link_attrs='src',
389
+ link_tags='a',
390
+ link_attrs='href',
391
+ strict_order_tags=True,
392
+ associated_data_attr=['data-title', 'alt', 'title'],
393
+ get_img=['data-title', 'alt', 'title']
394
+ )
395
+ ```
396
+
397
+ ## Dependencies
398
+
399
+ The `LinkManager` class relies on other classes within the `abstract_webtools` module, such as `UrlManager`, `SafeRequest`, and `SoupManager`. Ensure you have these classes and their dependencies correctly set up in your Python environment.
400
+
401
+ ## Example
402
+
403
+ Here's a quick example of using the `LinkManager` class:
404
+
405
+ ```python
406
+ from abstract_webtools import LinkManager
407
+
408
+ link_manager = LinkManager(url='https://www.example.com')
409
+ all_links = link_manager.all_desired_links
410
+ print(f"All Links: {all_links}")
411
+ ```
412
+ ##Overall Usecases
413
+ ```python
414
+ from abstract_webtools import UrlManager, SafeRequest, SoupManager, LinkManager, VideoDownloader
415
+
416
+ # --- UrlManager: Manages and manipulates URLs for web scraping/crawling ---
417
+ url = "example.com"
418
+ url_manager = UrlManager(url=url)
419
+
420
+ # --- SafeRequest: Safely handles HTTP requests by managing user-agent, SSL/TLS, proxies, headers, etc. ---
421
+ request_manager = SafeRequest(
422
+ url_manager=url_manager,
423
+ proxies={'8.219.195.47', '8.219.197.111'},
424
+ timeout=(3.05, 70)
425
+ )
426
+
427
+ # --- SoupManager: Simplifies web scraping with easy access to BeautifulSoup ---
428
+ soup_manager = SoupManager(
429
+ url_manager=url_manager,
430
+ request_manager=request_manager
431
+ )
432
+
433
+ # --- LinkManager: Extracts and manages links and associated data from HTML source code ---
434
+ link_manager = LinkManager(
435
+ url_manager=url_manager,
436
+ soup_manager=soup_manager,
437
+ link_attr_value_desired=['/view_video.php?viewkey='],
438
+ link_attr_value_undesired=['phantomjs']
439
+ )
440
+
441
+ # Download videos from provided links (list or string)
442
+ video_manager = VideoDownloader(link=link_manager.all_desired_links).download()
443
+
444
+ # Use them individually, with default dependencies for basic inputs:
445
+ standalone_soup = SoupManager(url=url).soup
446
+ standalone_links = LinkManager(url=url).all_desired_links
447
+
448
+ # Updating methods for manager classes
449
+ url_1 = 'thedailydialectics.com'
450
+ print(f"updating URL to {url_1}")
451
+ url_manager.update_url(url=url_1)
452
+ request_manager.update_url(url=url_1)
453
+ soup_manager.update_url(url=url_1)
454
+ link_manager.update_url(url=url_1)
455
+
456
+ # Updating URL manager references
457
+ request_manager.update_url_manager(url_manager=url_manager)
458
+ soup_manager.update_url_manager(url_manager=url_manager)
459
+ link_manager.update_url_manager(url_manager=url_manager)
460
+
461
+ # Updating source code for managers
462
+ source_code_bytes = request_manager.source_code_bytes
463
+ soup_manager.update_source_code(source_code=source_code_bytes)
464
+ link_manager.update_source_code(source_code=source_code_bytes)
465
+ ```
466
+ ## License
467
+
468
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
469
+
470
+ #### Module Information
471
+ -**Author**: putkoff
472
+ -**Author Email**: partners@abstractendeavors.com
473
+ -**Github**: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
474
+ -**PYPI**: https://pypi.org/project/abstract-webtools
475
+ -**Part of**: abstract_essentials
476
+ -**Date**: 10/10/2023
477
+ -**Version**: 0.1.4.54
478
+ <<<<<<< HEAD
479
+ ---
480
+ =======
481
+ >>>>>>> ba4baf2 (Deploy version 0.1.6.147 at 2025-09-07 09:40:38 UTC)
482
+