abstract-webtools 0.1.4.13__py3-none-any.whl → 0.1.4.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstract_webtools/abstract_webtools.py +45 -0
- abstract_webtools/dfgdsf.py +53 -0
- abstract_webtools/grab_source_gui.py +112 -73
- abstract_webtools/sou.py +104 -0
- {abstract_webtools-0.1.4.13.dist-info → abstract_webtools-0.1.4.14.dist-info}/METADATA +1 -1
- abstract_webtools-0.1.4.14.dist-info/RECORD +13 -0
- abstract_webtools-0.1.4.13.dist-info/RECORD +0 -11
- {abstract_webtools-0.1.4.13.dist-info → abstract_webtools-0.1.4.14.dist-info}/LICENSE +0 -0
- {abstract_webtools-0.1.4.13.dist-info → abstract_webtools-0.1.4.14.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.4.13.dist-info → abstract_webtools-0.1.4.14.dist-info}/top_level.txt +0 -0
@@ -66,6 +66,12 @@ To utilize this module, simply import the required function or class and use it
|
|
66
66
|
Author: putkoff
|
67
67
|
Version: 1.0
|
68
68
|
"""
|
69
|
+
# -*- coding: UTF-8 -*-
|
70
|
+
import requests
|
71
|
+
import os
|
72
|
+
# Google Chrome Driver
|
73
|
+
from selenium import webdriver
|
74
|
+
import yt_dlp
|
69
75
|
import ssl
|
70
76
|
import requests
|
71
77
|
from requests.adapters import HTTPAdapter
|
@@ -650,7 +656,46 @@ class URLManagerSingleton:
|
|
650
656
|
elif URLManagerSingleton._instance.session != session or URLManagerSingleton._instance.url != url:
|
651
657
|
URLManagerSingleton._instance = URLManager(url=url,session=session)
|
652
658
|
return URLManagerSingleton._instance
|
659
|
+
|
660
|
+
class VideoDownloader:
|
653
661
|
|
662
|
+
def __init__(self, url,title=None,download_directory=os.getcwd(),user_agent=None,video_extention='mp4'):
|
663
|
+
self.url = url
|
664
|
+
self.video_extention=video_extention
|
665
|
+
self.header = UserAgentManagerSingleton().get_instance(user_agent=user_agent).user_agent_header
|
666
|
+
self.base_name = os.path.basename(self.url)
|
667
|
+
self.file_name,self.ext = os.path.splitext(self.base_name)
|
668
|
+
self.download_directory=download_directory
|
669
|
+
self.title = url.split('/')[3] if title == None else title
|
670
|
+
self.video_urls = []
|
671
|
+
self.fetch_video_urls()
|
672
|
+
self.download_videos()
|
673
|
+
def fetch_video_urls(self):
|
674
|
+
driver = webdriver.Chrome()
|
675
|
+
driver.get(self.url)
|
676
|
+
self.page_source = driver.page_source
|
677
|
+
for each in self.page_source.split('<source ')[1:]:
|
678
|
+
# NOTE: Make sure to import the `eatAll` function and use it here.
|
679
|
+
self.video_urls.append(eatAll(each.split('.{self.video_extention}'.replace('..','.'))[0].split('http')[-1],['h','t','t','p','s',':','//','/','s','=',' ','\n','\t',''])+'.mp4')
|
680
|
+
def download_videos(self):
|
681
|
+
for video_url in self.video_urls:
|
682
|
+
ydl_opts = {}
|
683
|
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
684
|
+
info = ydl.extract_info(self.url)
|
685
|
+
self.base_name = os.path.basename(info['url'])
|
686
|
+
self.file_name,self.ext = os.path.splitext(self.base_name)
|
687
|
+
video_content =SafeRequestSingleton().get_instance(url=info['url']).response
|
688
|
+
print("Start downloading")
|
689
|
+
content_length = int(video_content.headers['content-length'])
|
690
|
+
print(f'Size: {content_length / 1024 / 1024:.2f}MB')
|
691
|
+
down_size = 0
|
692
|
+
with open(f'{os.path.join(self.download_directory,self.base_name)}', "wb") as video:
|
693
|
+
for chunk in video_content.iter_content(chunk_size=1024 * 1024):
|
694
|
+
if chunk:
|
695
|
+
video.write(chunk)
|
696
|
+
down_size += len(chunk)
|
697
|
+
print(f'Progress: {down_size / content_length:.2%}', end='\r')
|
698
|
+
|
654
699
|
def get_limited_request(request_url=str,service_name="default"):
|
655
700
|
manager = DynamicRateLimiterManagerSingleton.get_instance() # Get the singleton instance
|
656
701
|
unwanted_response=True
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
data ="""
|
3
|
+
<html>
|
4
|
+
<head>
|
5
|
+
<title>Example Domain</title>
|
6
|
+
|
7
|
+
<meta charset="utf-8" />
|
8
|
+
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
|
9
|
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
10
|
+
<style type="text/css">
|
11
|
+
body {
|
12
|
+
background-color: #f0f0f2;
|
13
|
+
margin: 0;
|
14
|
+
padding: 0;
|
15
|
+
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
16
|
+
|
17
|
+
}
|
18
|
+
div {
|
19
|
+
width: 600px;
|
20
|
+
margin: 5em auto;
|
21
|
+
padding: 2em;
|
22
|
+
background-color: #fdfdff;
|
23
|
+
border-radius: 0.5em;
|
24
|
+
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
|
25
|
+
}
|
26
|
+
a:link, a:visited {
|
27
|
+
color: #38488f;
|
28
|
+
text-decoration: none;
|
29
|
+
}
|
30
|
+
@media (max-width: 700px) {
|
31
|
+
div {
|
32
|
+
margin: 0 auto;
|
33
|
+
width: auto;
|
34
|
+
}
|
35
|
+
}
|
36
|
+
</style>
|
37
|
+
</head>
|
38
|
+
|
39
|
+
<body>
|
40
|
+
<div>
|
41
|
+
<h1>Example Domain</h1>
|
42
|
+
<p>This domain is for use in illustrative examples in documents. You may use this
|
43
|
+
domain in literature without prior coordination or asking for permission.</p>
|
44
|
+
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
|
45
|
+
</div>
|
46
|
+
</body>
|
47
|
+
</html>
|
48
|
+
|
49
|
+
"""
|
50
|
+
|
51
|
+
|
52
|
+
soup = BeautifulSoup(data,'html.parser').find_all('h1')
|
53
|
+
input(soup)
|
@@ -8,7 +8,44 @@ from abstract_utilities import *
|
|
8
8
|
import PySimpleGUI as sg
|
9
9
|
import inspect
|
10
10
|
import re
|
11
|
+
from .abstract_webtools import URLManagerSingleton,CipherManagerSingleton,SafeRequestSingleton,UserAgentManagerSingleton
|
11
12
|
from abstract_utilities.class_utils import call_functions,process_args,get_fun,mk_fun
|
13
|
+
window = None
|
14
|
+
def get_request_manager(url=None,):
|
15
|
+
return SafeRequestSingleton().get_instance(url=url)
|
16
|
+
def get_url_manager(url=None):
|
17
|
+
return URLManagerSingleton().get_instance(url=url)
|
18
|
+
def get_soup_manager(url=None,selected_option='html.parser',delim=""):
|
19
|
+
return SoupManagerSingleton().get_instance(url=url,selected_option=selected_option,delim=delim)
|
20
|
+
def get_user_agent_manager(user_agent=None):
|
21
|
+
return UserAgentManagerSingleton().get_instance(user_agent=user_agent)
|
22
|
+
def get_cipher_manager(cipher_list=None):
|
23
|
+
return CipherManagerSingleton().get_instance(cipher_list=cipher_list)
|
24
|
+
def get_url():
|
25
|
+
if window is None:
|
26
|
+
return 'example.com'
|
27
|
+
event, values = window.read()
|
28
|
+
return values['-URL-']
|
29
|
+
class SoupManager:
|
30
|
+
@staticmethod
|
31
|
+
def get_parser_choices():
|
32
|
+
return ['html.parser', 'lxml', 'html5lib']
|
33
|
+
def __init__(self,url=None,selected_option='html.parser',delim="a"):
|
34
|
+
self.url=url
|
35
|
+
self.selected_option = selected_option
|
36
|
+
self.url_manager = get_url_manager(url=self.url)
|
37
|
+
self.request_manager = SafeRequestSingleton().get_instance(url=self.url_manager.correct_url)
|
38
|
+
self.soup = BeautifulSoup(self.request_manager.source_code, self.selected_option)
|
39
|
+
self.find_all = self.soup.find_all(delim)
|
40
|
+
class SoupManagerSingleton:
|
41
|
+
_instance = None
|
42
|
+
@staticmethod
|
43
|
+
def get_instance(url=None,selected_option='html.parser',delim=""):
|
44
|
+
if SoupManagerSingleton._instance is None:
|
45
|
+
SoupManagerSingleton._instance = SoupManager(url=url,selected_option=selected_option,delim=delim)
|
46
|
+
elif SoupManagerSingleton._instance.url != url:
|
47
|
+
SoupManagerSingleton._instance = SoupManager(url=url,selected_option=selected_option,delim=delim)
|
48
|
+
return SoupManagerSingleton._instance
|
12
49
|
def get_gui_fun(name:str='',args:dict={}):
|
13
50
|
import PySimpleGUI
|
14
51
|
return get_fun({"instance":PySimpleGUI,"name":name,"args":args})
|
@@ -71,14 +108,9 @@ def get_cypher_checks(ciphers:list=get_ciphers()):
|
|
71
108
|
ls = create_columns(ls,k,5)
|
72
109
|
return ls
|
73
110
|
def format_url(url):
|
74
|
-
#
|
111
|
+
# Ensure the URL starts with 'http://' or 'https://'
|
75
112
|
if not url.startswith(('http://', 'https://')):
|
76
|
-
# Add 'https://' prefix if missing
|
77
113
|
url = 'https://' + url
|
78
|
-
# Check if the URL has a valid format
|
79
|
-
if not re.match(r'^https?://\w+', url):
|
80
|
-
# Return None if the URL is invalid
|
81
|
-
return None
|
82
114
|
return url
|
83
115
|
def try_request(url: str, session:type(requests.Session)=requests):
|
84
116
|
try:
|
@@ -86,12 +118,8 @@ def try_request(url: str, session:type(requests.Session)=requests):
|
|
86
118
|
except requests.exceptions.RequestException as e:
|
87
119
|
print(e)
|
88
120
|
return None
|
89
|
-
|
90
|
-
|
91
|
-
soup = change_glob('last_soup',BeautifulSoup(data, selected_option))
|
92
|
-
except:
|
93
|
-
soup = None
|
94
|
-
return soup
|
121
|
+
|
122
|
+
|
95
123
|
def get_parsed_html(url:str='https://www.example.com', header:str=create_user_agent()):
|
96
124
|
s = requests.Session()
|
97
125
|
s.cookies["cf_clearance"] = "cb4c883efc59d0e990caf7508902591f4569e7bf-1617321078-0-150"
|
@@ -123,6 +151,7 @@ def parse_all(data):
|
|
123
151
|
if dat[c+1] not in ls_class:
|
124
152
|
ls_class.append(dat[c+1])
|
125
153
|
return ls_type,ls_desc,ls_tag,ls_class
|
154
|
+
|
126
155
|
def parse_react_source(data):
|
127
156
|
soup = BeautifulSoup(data, 'html.parser')
|
128
157
|
script_tags = soup.find_all('script', type=lambda t: t and ('javascript' in t or 'jsx' in t))
|
@@ -131,10 +160,9 @@ def parse_react_source(data):
|
|
131
160
|
react_source_code.append(script_tag.string)
|
132
161
|
return react_source_code
|
133
162
|
def all_soup(data,tag,typ,clas,inp):
|
134
|
-
|
135
|
-
return getattr(last_soup,tag,typ)
|
163
|
+
return getattr(get_soup_manager(url=get_url()).soup,tag,typ)
|
136
164
|
def find_all_soup(string:str):
|
137
|
-
return
|
165
|
+
return get_soup_manager(url=get_url(),delim=string).find_all
|
138
166
|
def get_bs4_options():
|
139
167
|
bs4_options = [
|
140
168
|
'BeautifulSoup',
|
@@ -175,91 +203,102 @@ def get_gpt_layout():
|
|
175
203
|
[sg.Checkbox('',default=False,key='-CHECK_TYPE-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_TYPE-',enable_events=True)],
|
176
204
|
[sg.Checkbox('',default=False,key='-CHECK_CLASS-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_CLASS-',enable_events=True)],
|
177
205
|
sg.Input(key='-SOUP_INPUT-'), sg.Button('get soup'),sg.Button('all soup')]],
|
206
|
+
|
178
207
|
[get_multi_line({"key":"-FIND_ALL_OUTPUT-"})]]
|
179
208
|
return layout
|
180
|
-
def display_soup(window,values,
|
181
|
-
|
182
|
-
if soup != None:
|
209
|
+
def display_soup(window,values,soup_manager):
|
210
|
+
event, values = window.read()
|
211
|
+
if soup_manager.soup != None:
|
183
212
|
#window['-REACT_OUTPUT-'].update(value=parse_react_source(source_code))
|
184
|
-
window['-SOUP_OUTPUT-'].update(value=soup)
|
185
|
-
ls_type,ls_desc,ls_tag,ls_class = parse_all(soup)
|
186
|
-
|
213
|
+
window['-SOUP_OUTPUT-'].update(value=soup_manager.soup )
|
214
|
+
ls_type,ls_desc,ls_tag,ls_class = parse_all(soup_manager.soup)
|
215
|
+
if len(ls_type)>0:
|
216
|
+
window['-SOUP_TAG-'].update(values=ls_type)
|
187
217
|
if len(ls_desc)>0:
|
188
218
|
window['-SOUP_ELEMENT-'].update(values=ls_desc,value=ls_desc[0])
|
189
219
|
if len(ls_tag) >0:
|
190
220
|
window['-SOUP_TYPE-'].update(values=ls_tag,value=ls_tag[0])
|
191
221
|
if len(ls_class) >0:
|
192
222
|
window['-SOUP_CLASS-'].update(values=ls_class,value=ls_class[0])
|
223
|
+
window['-FIND_ALL_OUTPUT-'].update(value=soup_manager.find_all)
|
224
|
+
def get_selected_cipher_list():
|
225
|
+
ls = []
|
226
|
+
event, values = window.read()
|
227
|
+
for k in range(len(get_ciphers())):
|
228
|
+
if values[get_ciphers()[k]] == True:
|
229
|
+
ls.append(get_ciphers()[k])
|
230
|
+
return ls
|
231
|
+
def process_url(window,values):
|
232
|
+
url = values['-URL-']
|
233
|
+
delim= values['-SOUP_INPUT-']
|
234
|
+
selected_option = values['-PARSER-']
|
235
|
+
user_agent=values['-USERAGENT-']
|
236
|
+
soup_input =values['-SOUP_TAG-']
|
237
|
+
cipher_list = get_selected_cipher_list()
|
238
|
+
try:
|
239
|
+
url_manager = get_url_manager(url)
|
240
|
+
cipher_manager = get_cipher_manager(cipher_list=cipher_list)
|
241
|
+
request_manager = get_request_manager(url=url_manager.correct_url)
|
242
|
+
soup_manager = get_soup_manager(url=url,selected_option=selected_option,delim=delim)
|
243
|
+
user_agent_manager = get_user_agent_manager(user_agent=user_agent)
|
244
|
+
window['-STATUS_CODE-'].update(value=request_manager.status_code)
|
245
|
+
window['-SOURCECODE-'].update(value=request_manager.source_code)
|
246
|
+
window['-SOUP_OUTPUT-'].update(value=soup_manager.soup)
|
247
|
+
window['-CIPHERS_OUTPUT-'].update(value=cipher_manager.ciphers_string)
|
248
|
+
except:
|
249
|
+
print(url)
|
250
|
+
|
251
|
+
|
252
|
+
return url_manager,request_manager,soup_manager,user_agent_manager,cipher_manager
|
193
253
|
def url_grabber_while(window):
|
194
254
|
while True:
|
255
|
+
|
256
|
+
|
195
257
|
event, values = window.read()
|
196
258
|
if event == sg.WINDOW_CLOSED:
|
197
259
|
break
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
260
|
+
if 'SOUP_' in event:
|
261
|
+
soup_manager = get_soup_manager(url=url_manager.correct_url,selected_option=selected_option,delim=values['-SOUP_INPUT-'])
|
262
|
+
name = event.split("SOUP_")[-1][:-1]
|
263
|
+
check_name = f'-CHECK_{name}-'
|
264
|
+
window[check_name].update(value=True)
|
265
|
+
for each in values.keys():
|
266
|
+
if 'CHECK_' in each and each != check_name:
|
267
|
+
window[each].update(value=False)
|
268
|
+
window[check_name].update(value=find_all_soup(values[event]))
|
269
|
+
cipher_list=get_selected_cipher_list()
|
270
|
+
url_manager,request_manager,soup_manager,user_agent_manager,cipher_manager=process_url(window,values)
|
271
|
+
if event=='-CORRECT_URL-':
|
272
|
+
if url_manager.correct_url:
|
273
|
+
window['-URL-'].update(value=url_manager.correct_url)
|
274
|
+
if event == 'all soup':
|
275
|
+
selected_option = values['-PARSER-']
|
276
|
+
soup_manager = get_soup_manager(url=url_manager.correct_url,selected_option=selected_option,delim=values['-SOUP_INPUT-'])
|
277
|
+
display_soup(window,values,soup_manager)
|
278
|
+
|
207
279
|
if event == 'all soup':
|
208
|
-
|
209
|
-
|
280
|
+
delim= values['-SOUP_INPUT-']
|
281
|
+
window['-FIND_ALL_OUTPUT-'].update(value=soup_manager.find_all)
|
282
|
+
|
283
|
+
elif event == 'get soup':
|
210
284
|
window['-FIND_ALL_OUTPUT-'].update(value=all_soup(values['-SOUP_OUTPUT-'],values['-SOUP_TAG-'],values['-SOUP_TYPE-'],values['-SOUP_CLASS-'],values['-SOUP_INPUT-']))
|
211
|
-
if event == '-URL-':
|
212
|
-
url = format_url(values['-URL-'])
|
213
|
-
if url == None:
|
214
|
-
url = ''
|
215
|
-
try:
|
216
|
-
r = get_request(url)
|
217
|
-
window['-STATUS_CODE-'].update(value=f'{r.status_code}')
|
218
|
-
window['-CORRECT_URL-'].update(visible=True)
|
219
|
-
window["-URL_WARNING-"].update(value= url +' is valid')
|
220
|
-
except:
|
221
|
-
window["-URL_WARNING-"].update(value=url +' is an invalid url')
|
222
|
-
window['-STATUS_CODE-'].update(value='fail')
|
223
|
-
window['-CORRECT_URL-'].update(visible=False)
|
224
|
-
if event=='-CORRECT_URL-':
|
225
|
-
window['-URL-'].update(value=format_url(values['-URL-']))
|
226
285
|
if event == '-CUSTOMUA-':
|
227
286
|
window['-SOURCECODE-'].update(disabled=values['-CUSTOMUA-'])
|
228
287
|
if not values['-CUSTOMUA-']:
|
229
|
-
|
288
|
+
|
289
|
+
window['-USERAGENT-'].update(value=user_agent_manager.user_agent_header)
|
230
290
|
window['-USERAGENT-'].update(disabled=True)
|
231
291
|
else:
|
232
292
|
window['-USERAGENT-'].update(disabled=False)
|
233
|
-
if event in get_ciphers():
|
234
|
-
ls = []
|
235
|
-
for k in range(len(get_ciphers())):
|
236
|
-
if values[get_ciphers()[k]] == True:
|
237
|
-
ls.append(get_ciphers()[k])
|
238
|
-
window['-CIPHERS_OUTPUT-'].update(value=create_ciphers_string(ls=ls))
|
239
|
-
if event == '-PARSER-':
|
240
|
-
display_soup(window,values,source_code)
|
241
|
-
#description = next((desc for option, desc in get_bs4_options() if option == selected_option), '')
|
242
|
-
#window['-DESCRIPTION-'].update(value=description)
|
243
293
|
if event == 'Grab URL':
|
244
|
-
|
245
|
-
formatted_url = format_url(url)
|
246
|
-
if formatted_url:
|
294
|
+
if url_manager.correct_url:
|
247
295
|
# Perform actions with the formatted URL
|
248
|
-
|
249
|
-
window['-SOURCECODE-'].update(value=source_code)
|
250
|
-
display_soup(window,values,source_code)
|
296
|
+
process_url(window,values)
|
251
297
|
else:
|
252
298
|
# Invalid URL format, display an error message
|
253
299
|
sg.popup('Invalid URL format. Please enter a valid URL.')
|
254
|
-
user_agent = values['-USERAGENT-']
|
255
|
-
source_code = get_parsed_html(url=formatted_url, header=create_user_agent(values['-CUSTOMUA-']))
|
256
|
-
window['-SOURCECODE-'].update(value=source_code)
|
257
|
-
display_soup(window,values,source_code)
|
258
300
|
if event == 'Action':
|
259
|
-
source_code = window['-SOURCECODE-'].get()
|
260
301
|
selected_option = values['-PARSER-']
|
261
|
-
soup = display_soup(window,values,source_code)
|
262
|
-
window['-SOURCECODE-'].update(value=source_code)
|
263
302
|
if selected_option == 'BeautifulSoup':
|
264
303
|
result = soup
|
265
304
|
elif selected_option == 'Tag':
|
@@ -276,13 +315,13 @@ def url_grabber_while(window):
|
|
276
315
|
result = soup.find(text=lambda text: isinstance(text, CData))
|
277
316
|
else:
|
278
317
|
result = None
|
279
|
-
window['-
|
318
|
+
window['-SOUP_OUTPUT-'].update(value=str(result))
|
280
319
|
|
281
320
|
|
282
321
|
def url_grabber_component():
|
283
322
|
globals()['curr_check']='TAG-'
|
284
323
|
layout = get_gpt_layout()
|
285
|
-
window = get_gui_fun(name='Window',args={'title':'URL Grabber', 'layout':layout,**expandable()})
|
324
|
+
globals()['window'] = get_gui_fun(name='Window',args={'title':'URL Grabber', 'layout':layout,**expandable()})
|
286
325
|
url_grabber_while(window)
|
287
326
|
|
288
327
|
|
abstract_webtools/sou.py
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
import PySimpleGUI as sg
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
import requests
|
4
|
+
|
5
|
+
# Function to send an HTTP GET request and return the response object
|
6
|
+
def try_request(url):
|
7
|
+
try:
|
8
|
+
response = requests.get(url)
|
9
|
+
return response
|
10
|
+
except requests.exceptions.RequestException as e:
|
11
|
+
return None
|
12
|
+
|
13
|
+
# Function to get a list of User-Agent strings (replace with your own list)
|
14
|
+
def get_user_agents():
|
15
|
+
user_agents = ["User-Agent 1", "User-Agent 2", "User-Agent 3"]
|
16
|
+
return user_agents
|
17
|
+
|
18
|
+
# Function to create a layout for checking multiple elements at once
|
19
|
+
def get_multi_line(config):
|
20
|
+
# Replace with your multi-line element configuration
|
21
|
+
return sg.Multiline("", key=config["key"], size=(40, 10), autoscroll=True, reroute_stdout=True)
|
22
|
+
|
23
|
+
# Function to create a layout for checking different types of elements in BeautifulSoup
|
24
|
+
def get_cypher_checks():
|
25
|
+
# Replace with your checkbox and combo box configuration for different element types
|
26
|
+
return [sg.Checkbox('Check Tag', default=True, key='-CHECK_TAG-', enable_events=True), sg.Combo([], size=(15, 1), key='-SOUP_TAG-', enable_events=True)]
|
27
|
+
|
28
|
+
# Add more parsing capabilities if needed
|
29
|
+
parser_choices = ['html.parser', 'lxml', 'html5lib']
|
30
|
+
|
31
|
+
# Create the GUI layout
|
32
|
+
sg.theme('LightGrey1')
|
33
|
+
layout = [
|
34
|
+
[sg.Text('URL:', size=(8, 1)), sg.Input('www.example.com', key='-URL-', enable_events=True),
|
35
|
+
sg.Text('Status:'), sg.Text('', key="-STATUS_CODE-"),
|
36
|
+
sg.Text('', key="-URL_WARNING-"), sg.Button('Correct URL', key='-CORRECT_URL-', visible=True)],
|
37
|
+
[sg.Checkbox('Custom User-Agent', default=False, key='-CUSTOMUA-', enable_events=True)],
|
38
|
+
[sg.Text('User-Agent:', size=(8, 1)), sg.Combo(get_user_agents(), default_value=get_user_agents()[0], key='-USERAGENT-', disabled=False)],
|
39
|
+
[get_cypher_checks()],
|
40
|
+
[sg.Button('Grab URL'), sg.Button('Action')],
|
41
|
+
[get_multi_line({"key": "-SOURCECODE-"})],
|
42
|
+
[sg.Text('Parsing Capabilities:', size=(15, 1)), sg.DropDown(parser_choices, default_value='html.parser', key='-PARSER-', enable_events=True)],
|
43
|
+
[get_multi_line({"key": "-SOUP_OUTPUT-"})],
|
44
|
+
[sg.Text('Find Soup:')],
|
45
|
+
[
|
46
|
+
[sg.Checkbox('', default=True, key='-CHECK_TAG-', enable_events=True), sg.Combo([], size=(15, 1), key='-SOUP_TAG-', enable_events=True)],
|
47
|
+
[sg.Checkbox('', default=False, key='-CHECK_ELEMENT-', enable_events=True), sg.Combo([], size=(15, 1), key='-SOUP_ELEMENT-', enable_events=True)],
|
48
|
+
[sg.Checkbox('', default=False, key='-CHECK_TYPE-', enable_events=True), sg.Combo([], size=(15, 1), key='-SOUP_TYPE-', enable_events=True)],
|
49
|
+
[sg.Checkbox('', default=False, key='-CHECK_CLASS-', enable_events=True), sg.Combo([], size=(15, 1), key='-SOUP_CLASS-', enable_events=True)],
|
50
|
+
sg.Input(key='-SOUP_INPUT-'), sg.Button('Get Soup'), sg.Button('All Soup')
|
51
|
+
],
|
52
|
+
[get_multi_line({"key": "-FIND_ALL_OUTPUT-"})]
|
53
|
+
]
|
54
|
+
|
55
|
+
# Create the window
|
56
|
+
window = sg.Window('BeautifulSoup Console', layout, finalize=True)
|
57
|
+
|
58
|
+
while True:
|
59
|
+
event, values = window.read()
|
60
|
+
|
61
|
+
if event == sg.WIN_CLOSED:
|
62
|
+
break
|
63
|
+
|
64
|
+
# Handle URL input change event
|
65
|
+
if event == '-URL-':
|
66
|
+
url = values['-URL-']
|
67
|
+
response = try_request(url)
|
68
|
+
if response:
|
69
|
+
window['-STATUS_CODE-'].update(response.status_code)
|
70
|
+
window['-URL_WARNING-'].update('Valid URL')
|
71
|
+
window['-CORRECT_URL-'].update(visible=False)
|
72
|
+
else:
|
73
|
+
window['-URL_WARNING-'].update('Invalid URL')
|
74
|
+
window['-STATUS_CODE-'].update('')
|
75
|
+
|
76
|
+
# Handle Custom User-Agent checkbox
|
77
|
+
if event == '-CUSTOMUA-':
|
78
|
+
custom_ua_enabled = values['-CUSTOMUA-']
|
79
|
+
window['-USERAGENT-'].update(disabled=not custom_ua_enabled)
|
80
|
+
|
81
|
+
# Handle Grab URL button click event
|
82
|
+
if event == 'Grab URL':
|
83
|
+
url = values['-URL-']
|
84
|
+
response = try_request(url)
|
85
|
+
if response:
|
86
|
+
soup = BeautifulSoup(response.text, values['-PARSER-'])
|
87
|
+
window['-SOUP_OUTPUT-'].print(soup.prettify(), end='', text_color='black')
|
88
|
+
|
89
|
+
# Handle Get Soup button click event
|
90
|
+
if event == 'Get Soup':
|
91
|
+
tag = values['-SOUP_TAG-']
|
92
|
+
element = values['-SOUP_ELEMENT-']
|
93
|
+
element_type = values['-SOUP_TYPE-']
|
94
|
+
class_name = values['-SOUP_CLASS-']
|
95
|
+
input_text = values['-SOUP_INPUT-']
|
96
|
+
|
97
|
+
# Replace with your BeautifulSoup logic to find and display the selected elements
|
98
|
+
# Example: soup.find(tag, {"class": class_name})
|
99
|
+
# Update the -FIND_ALL_OUTPUT- element with the result
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
# Close the window
|
104
|
+
window.close()
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: abstract-webtools
|
3
|
-
Version: 0.1.4.
|
3
|
+
Version: 0.1.4.14
|
4
4
|
Summary: Abstract Web Tools is a Python package that provides various utility functions for web scraping tasks. It is built on top of popular libraries such as `requests`, `BeautifulSoup`, and `urllib3` to simplify the process of fetching and parsing web content.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_essentials/tree/main/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -0,0 +1,13 @@
|
|
1
|
+
abstract_webtools/__init__.py,sha256=2SWEfdPDHqqjUYsOQYlaOHF644ZYcO160nWKiAjga4w,34
|
2
|
+
abstract_webtools/abstract_crawler.py,sha256=e8jVVv1_EB8poqlrdQaJ19z9Z0t8un5uc-DKnj1Ud5s,8002
|
3
|
+
abstract_webtools/abstract_webtools.py,sha256=vrEkybF7vu6uXejCs7niOWfzo3I1xgznKvdBaZLWAMs,42540
|
4
|
+
abstract_webtools/abstract_webtools2.py,sha256=dlhhgmUTaN_NgkT6GcJMVBLuXjmW38gAOeCrKxYqytk,30685
|
5
|
+
abstract_webtools/dfgdsf.py,sha256=T1pj-ne_qVfaAdu1MIdtW3q3UZqNP78Kt0OMhz4Musk,1355
|
6
|
+
abstract_webtools/grab_source_gui.py,sha256=Wz-FKLOuPQlBYz3kojXihpMbS4rqv4NWGr9ezF-Jt2g,16356
|
7
|
+
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
8
|
+
abstract_webtools/sou.py,sha256=8HjmcpXJFi_kC2O-SVGebUIFY5I5B9bPP9L8BAiWhfk,4526
|
9
|
+
abstract_webtools-0.1.4.14.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
10
|
+
abstract_webtools-0.1.4.14.dist-info/METADATA,sha256=y6-qH_64FxwN8wqSRYs16px8o0UtvlUQxa9mj4c4Pas,8963
|
11
|
+
abstract_webtools-0.1.4.14.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
12
|
+
abstract_webtools-0.1.4.14.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
13
|
+
abstract_webtools-0.1.4.14.dist-info/RECORD,,
|
@@ -1,11 +0,0 @@
|
|
1
|
-
abstract_webtools/__init__.py,sha256=2SWEfdPDHqqjUYsOQYlaOHF644ZYcO160nWKiAjga4w,34
|
2
|
-
abstract_webtools/abstract_crawler.py,sha256=e8jVVv1_EB8poqlrdQaJ19z9Z0t8un5uc-DKnj1Ud5s,8002
|
3
|
-
abstract_webtools/abstract_webtools.py,sha256=70_WHKr-QMyDKN7JvPmz0ghl1bvvfMaEYfP6blMCCyI,40210
|
4
|
-
abstract_webtools/abstract_webtools2.py,sha256=dlhhgmUTaN_NgkT6GcJMVBLuXjmW38gAOeCrKxYqytk,30685
|
5
|
-
abstract_webtools/grab_source_gui.py,sha256=w7vDsEu1IfOIzcsfWeP2IpdS3yhFA_x5IVIeLFlfppw,14708
|
6
|
-
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
7
|
-
abstract_webtools-0.1.4.13.dist-info/LICENSE,sha256=g3WEJFiVS27HyCGRTwKSsMLyciMaGFdWcZGOe1QalZk,3877
|
8
|
-
abstract_webtools-0.1.4.13.dist-info/METADATA,sha256=84w2lGu0T9Pqsv59ITVr_2Sev6VGWh34n5XWXagJUpE,8963
|
9
|
-
abstract_webtools-0.1.4.13.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
10
|
-
abstract_webtools-0.1.4.13.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
11
|
-
abstract_webtools-0.1.4.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|