abstract-webtools 0.1.6.153__py3-none-any.whl → 0.1.6.155__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {abstract_webtools-0.1.6.153.dist-info → abstract_webtools-0.1.6.155.dist-info}/METADATA +1 -1
- {abstract_webtools-0.1.6.153.dist-info → abstract_webtools-0.1.6.155.dist-info}/RECORD +4 -5
- abstract_webtools/url_grabber.py +0 -261
- {abstract_webtools-0.1.6.153.dist-info → abstract_webtools-0.1.6.155.dist-info}/WHEEL +0 -0
- {abstract_webtools-0.1.6.153.dist-info → abstract_webtools-0.1.6.155.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: abstract_webtools
|
3
|
-
Version: 0.1.6.
|
3
|
+
Version: 0.1.6.155
|
4
4
|
Summary: Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.
|
5
5
|
Home-page: https://github.com/AbstractEndeavors/abstract_webtools
|
6
6
|
Author: putkoff
|
@@ -8,7 +8,6 @@ abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZC
|
|
8
8
|
abstract_webtools/k2s_downloader.py,sha256=t0tCKAfDNQGn9tKh3eg0XVU0bY-MmYITwJa3ANf7090,6988
|
9
9
|
abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
|
10
10
|
abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
|
11
|
-
abstract_webtools/url_grabber.py,sha256=-QUENEmimMPrJ6Skg5-bPXl-Bp0VxbelWL6fQgR3o1I,13595
|
12
11
|
abstract_webtools/url_grabber_new.py,sha256=xb23qo4anOY0Ax3CAfaHJ8s5VEz61Sinh-XpEDFW7Is,3621
|
13
12
|
abstract_webtools/managers/__init__.py,sha256=RXQAK5z9nYlocM91P2OC4jR352-MiqT5bAi4xZl7_FU,470
|
14
13
|
abstract_webtools/managers/allss.py,sha256=IBhlyRQHfK-BtwUnSEbIPqlI1MtZ8-XsdaHv0b91HQ0,269
|
@@ -47,7 +46,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=6vWYnZGuimStbNiuH_V
|
|
47
46
|
abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
|
48
47
|
abstract_webtools/managers/urlManager/urlManager (Copy).py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
|
49
48
|
abstract_webtools/managers/urlManager/urlManager.py,sha256=vY4KQXtcrlC2YtlultxQpVe581l5kAuT5VGA0WrI16g,8945
|
50
|
-
abstract_webtools-0.1.6.
|
51
|
-
abstract_webtools-0.1.6.
|
52
|
-
abstract_webtools-0.1.6.
|
53
|
-
abstract_webtools-0.1.6.
|
49
|
+
abstract_webtools-0.1.6.155.dist-info/METADATA,sha256=P74QxvGgUqxZfbOkQ7C7BfT5ODDPWhsSxxUjNjGTyW4,16573
|
50
|
+
abstract_webtools-0.1.6.155.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
51
|
+
abstract_webtools-0.1.6.155.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
|
52
|
+
abstract_webtools-0.1.6.155.dist-info/RECORD,,
|
abstract_webtools/url_grabber.py
DELETED
@@ -1,261 +0,0 @@
|
|
1
|
-
from abstract_gui import make_component,sg
|
2
|
-
import inspect
|
3
|
-
import re
|
4
|
-
from . import UserAgentManager,UrlManager,SafeRequest,SoupManager,LinkManager,CipherManager,requests,ssl,BeautifulSoup,HTTPAdapter,PoolManager,ssl_
|
5
|
-
from .managers import *
|
6
|
-
window = None
|
7
|
-
|
8
|
-
def get_attrs(values):
|
9
|
-
tags_js={'tag':[],'attribute':[],'input':[]}
|
10
|
-
for each in ['-SOUP_TAG-','-SOUP_ATTRIBUTE-','-SOUP_ATTRIBUTE_1-','-SOUP_ATTRIBUTE_2-']:
|
11
|
-
if values[each[:-1]+'_BOOL-'] == True:
|
12
|
-
for types in ['tag','attribute']:
|
13
|
-
if types in each.lower():
|
14
|
-
tags_js[types].append(values[each])
|
15
|
-
input_val = values['-SOUP_VALUES_INPUT-']
|
16
|
-
if input_val == '':
|
17
|
-
tags_js['input']=None
|
18
|
-
else:
|
19
|
-
tags_js['input']= input_val
|
20
|
-
if tags_js['tag']==[]:
|
21
|
-
tags_js['tag']=None if match.group(1) else None
|
22
|
-
else:
|
23
|
-
tags_js['tag']=tags_js['tag'][0]
|
24
|
-
if tags_js['attribute']==[]:
|
25
|
-
tags_js['attribute']=None
|
26
|
-
else:
|
27
|
-
tags_js['attribute']=tags_js['attribute'][0]
|
28
|
-
return tags_js
|
29
|
-
|
30
|
-
def get_user_agent_mgr(user_agent=None):
|
31
|
-
return UserAgentManager(user_agent=user_agent)
|
32
|
-
def get_cipher_list():
|
33
|
-
return CipherManager().get_default_ciphers()
|
34
|
-
def get_parse_type_choices():
|
35
|
-
return ['html.parser', 'lxml', 'html5lib']
|
36
|
-
def expandable(size:tuple=(None,None)):
|
37
|
-
return {"size": size,"resizable": True,"scrollable": True,"auto_size_text": True,"expand_x":True,"expand_y": True}
|
38
|
-
def change_glob(var:any,val:any):
|
39
|
-
globals()[var]=val
|
40
|
-
return val
|
41
|
-
def get_parse_type_choices():
|
42
|
-
bs4_module = inspect.getmodule(BeautifulSoup)
|
43
|
-
docstring = bs4_module.__builtins__
|
44
|
-
start_index = docstring.find("parse_types")
|
45
|
-
end_index = docstring.find(")", start_index)
|
46
|
-
choices_text = docstring[start_index:end_index]
|
47
|
-
choices = [choice.strip() for choice in choices_text.split(",")]
|
48
|
-
return choices
|
49
|
-
def get_browsers():
|
50
|
-
return 'Chrome,Firefox,Safari,Microsoft Edge,Internet Explorer,Opera'.split(',')
|
51
|
-
def get_user_agents():
|
52
|
-
from .big_user_agent_list import big_user_agent_list
|
53
|
-
return big_user_agent_list
|
54
|
-
def create_user_agent(user_agent:str=get_user_agents()[0]):
|
55
|
-
return {"user-agent": user_agent}
|
56
|
-
def get_operating_systems():
|
57
|
-
return ['Windows NT 10.0','Macintosh; Intel Mac OS X 10_15_7','Linux','Android','iOS']
|
58
|
-
def create_columns(ls,i,k):
|
59
|
-
if float(i)%float(k)==float(0.00) and i != 0:
|
60
|
-
lsN = list(ls[:-k])
|
61
|
-
lsN.append(list(ls[-k:]))
|
62
|
-
ls = lsN
|
63
|
-
return ls
|
64
|
-
def get_cypher_checks():
|
65
|
-
ciphers_list = get_cipher_list()
|
66
|
-
ls=[[[sg.Text('CIPHERS: ')],sg.Multiline('',key='-CIPHERS_OUTPUT-', size=(80, 5), disabled=False)]]
|
67
|
-
for k,cipher in enumerate(ciphers_list):
|
68
|
-
ls.append(sg.Checkbox(cipher,key=cipher,default=True,enable_events=True))
|
69
|
-
ls = create_columns(ls,k,5)
|
70
|
-
return ls
|
71
|
-
def get_bs4_options():
|
72
|
-
bs4_options = [
|
73
|
-
'BeautifulSoup',
|
74
|
-
'Tag',
|
75
|
-
'NavigableString',
|
76
|
-
'Comment',
|
77
|
-
'ResultSet',
|
78
|
-
'SoupStrainer',
|
79
|
-
'CData'
|
80
|
-
]
|
81
|
-
descriptions = [
|
82
|
-
'The main BeautifulSoup class used for parsing HTML.',
|
83
|
-
'Represents an HTML tag.',
|
84
|
-
'Represents a string within an HTML document.',
|
85
|
-
'Represents an HTML comment.',
|
86
|
-
'Represents a collection of tags found during a search.',
|
87
|
-
'Allows parsing only a specific subset of the HTML document.',
|
88
|
-
'Represents a CDATA section within an XML document.'
|
89
|
-
]
|
90
|
-
return list(zip(bs4_options, descriptions))
|
91
|
-
def get_multi_line(args):
|
92
|
-
return make_component("Multiline",**args,**expandable())
|
93
|
-
def get_gpt_layout(url):
|
94
|
-
# Add a dropdown for selecting BeautifulSoup parsing capabilities
|
95
|
-
parse_type_choices = ['html.parser', 'lxml', 'html5lib']
|
96
|
-
make_component("theme",'LightGrey1')
|
97
|
-
layout = [[sg.Text('URL:', size=(8, 1)), sg.Input(url, key='-URL-',enable_events=True),sg.Text('status:'),sg.Text('200',key="-STATUS_CODE-")
|
98
|
-
,sg.Text(f'success: {url} is valid',key="-URL_WARNING-"),sg.Button('Grab URL',key='-GRAB_URL-',visible=True)],
|
99
|
-
[sg.Checkbox('Custom User-Agent', default=False, key='-CUSTOMUA-', enable_events=True)],
|
100
|
-
[sg.Text('User-Agent:', size=(8, 1)), sg.Combo(get_user_agents(), default_value='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36', key='-USERAGENT-', disabled=False)],
|
101
|
-
[get_cypher_checks()],
|
102
|
-
[sg.Button('Grab URL'), sg.Button('Action'),sg.Button('Get All Text')],
|
103
|
-
[sg.Text('Parsing Capabilities:', size=(15, 1)), sg.DropDown(parse_type_choices, default_value='html.parser', key='-parse_type-',enable_events=True)],
|
104
|
-
[get_multi_line({"key":'-SOURCECODE-'})],
|
105
|
-
[sg.Text('find soup:'),[[sg.Checkbox('',default=True,key='-SOUP_TAG_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_TAG-',enable_events=True)],
|
106
|
-
[sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE-',enable_events=True)],
|
107
|
-
[sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_1_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_1-',enable_events=True)],
|
108
|
-
[sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_2_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_2-',enable_events=True)],
|
109
|
-
sg.Input(key='-SOUP_VALUES_INPUT-'), sg.Button('get soup'),sg.Button('all soup'),sg.Button('Send Soup')]],
|
110
|
-
[get_multi_line({"key":"-FIND_ALL_OUTPUT-"})]]
|
111
|
-
return layout
|
112
|
-
def get_selected_cipher_list():
|
113
|
-
ls = []
|
114
|
-
ciphers_list = get_cipher_list()
|
115
|
-
event, values = window.read()
|
116
|
-
for cipher in ciphers_list:
|
117
|
-
if values[cipher] == True:
|
118
|
-
ls.append(cipher)
|
119
|
-
return ls
|
120
|
-
def update_status(window,warn,warn_url,response_code,valid):
|
121
|
-
window['-URL-'].update(value=warn_url)
|
122
|
-
window['-STATUS_CODE-'].update(value=response_code)
|
123
|
-
window["-URL_WARNING-"].update(value=f"{warn} : {warn_url} is {valid}")
|
124
|
-
def process_url(window,values):
|
125
|
-
response_code=False
|
126
|
-
temp_mgr=None
|
127
|
-
warn='warning'
|
128
|
-
valid='invalid'
|
129
|
-
warn_url = values['-URL-']
|
130
|
-
if warn_url=='' or warn_url == None:
|
131
|
-
update_status(window,warn,warn_url,response_code,valid)
|
132
|
-
return False
|
133
|
-
temp_url=UrlManager(url=warn_url).url
|
134
|
-
if temp_url:
|
135
|
-
valid='valid'
|
136
|
-
response_code = SafeRequest(url=temp_mgr).response.status_code
|
137
|
-
temp_url=urlManager(url=warn_url).url
|
138
|
-
if temp_url:
|
139
|
-
valid='valid'
|
140
|
-
response_code = requestManager(url=temp_mgr).response.status_code
|
141
|
-
warn = 'success'
|
142
|
-
warn_url = temp_mgr
|
143
|
-
update_status(window,warn,warn_url,response_code,valid)
|
144
|
-
return temp_mgr
|
145
|
-
update_status(window,warn,warn_url,response_code,valid)
|
146
|
-
return False
|
147
|
-
def update_url(url_manager,request_manager,soup_manager,link_manager,values,cipher_list=get_cipher_list(),user_agent=get_user_agents()[0]):
|
148
|
-
ciphers = CipherManager(cipher_list=cipher_list).ciphers_string
|
149
|
-
request_manager = SafeRequest(url_manager=url_manager,ciphers=ciphers,user_agent=get_user_agents()[0])
|
150
|
-
if request_manager.source_code:
|
151
|
-
soup_manager= SoupManager(url_manager=url_manager,request_manager=request_manager)
|
152
|
-
link_manager= LinkManager(url_manager=url_manager,request_manager=request_manager,soup_manager=soup_manager)
|
153
|
-
window['-URL-'].update(value=url_manager.url)
|
154
|
-
window['-CIPHERS_OUTPUT-'].update(value=request_manager.ciphers)
|
155
|
-
return update_source_code(url_manager,request_manager,soup_manager,link_manager,values)
|
156
|
-
else:
|
157
|
-
return url_manager,request_manager,soup_manager,link_manager
|
158
|
-
def update_source_code(url_manager,request_manager,soup_manager,link_manager,values):
|
159
|
-
parse_type = values['-parse_type-']
|
160
|
-
if parse_type != soup_manager.parse_type:
|
161
|
-
soup_manager.update_parse_type(parse_type=parse_type)
|
162
|
-
all_tags=soup_manager.get_all_tags_and_attribute_names()
|
163
|
-
window['-SOURCECODE-'].update(value=soup_manager.soup)
|
164
|
-
window['-SOURCECODE-'].update(value=soup_manager.soupdef update_url(url_mgr,request_mgr,soup_mgr,link_mgr,values,cipher_list=get_cipher_list(),user_agent=get_user_agents()[0]):
|
165
|
-
ciphers = CipherManager(cipher_list=cipher_list).ciphers_string
|
166
|
-
request_mgr = requestManager(url_mgr=url_mgr,ciphers=ciphers,user_agent=get_user_agents()[0])
|
167
|
-
if request_mgr.source_code:
|
168
|
-
soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
|
169
|
-
link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
|
170
|
-
window['-URL-'].update(value=url_mgr.url)
|
171
|
-
window['-CIPHERS_OUTPUT-'].update(value=request_mgr.ciphers)
|
172
|
-
return update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values)
|
173
|
-
else:
|
174
|
-
return url_mgr,request_mgr,soup_mgr,link_mgr
|
175
|
-
def update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values):
|
176
|
-
parse_type = values['-parse_type-']
|
177
|
-
if parse_type != soup_mgr.parse_type:
|
178
|
-
soup_mgr.update_parse_type(parse_type=parse_type)
|
179
|
-
all_tags=soup_mgr.get_all_tags_and_attribute_names()
|
180
|
-
window['-SOURCECODE-'].update(value=soup_mgr.soup)
|
181
|
-
if values['-SOUP_TAG-'] != all_tags['tags']:
|
182
|
-
window['-SOUP_TAG-'].update(values=all_tags['tags'],value=all_tags['tags'][0])
|
183
|
-
if values['-SOUP_ATTRIBUTE-'] != all_tags['attributes']:
|
184
|
-
window['-SOUP_ATTRIBUTE-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
|
185
|
-
window['-SOUP_ATTRIBUTE_1-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
|
186
|
-
window['-SOUP_ATTRIBUTE_2-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
|
187
|
-
return url_manager,request_manager,soup_manager,link_manager
|
188
|
-
def url_grabber_while(window,initial_url="www.example.com"):
|
189
|
-
return_data=None
|
190
|
-
url_grab = False
|
191
|
-
url_manager=UrlManager(url=initial_url)
|
192
|
-
request_manager = SafeRequest(url_manager=url_manager)
|
193
|
-
soup_manager= SoupManager(url_manager=url_manager,request_manager=request_manager)
|
194
|
-
link_manager= LinkManager(url_manager=url_manager,request_manager=request_manager,soup_manager=soup_manager)
|
195
|
-
return url_mgr,request_mgr,soup_mgr,link_mgr
|
196
|
-
def url_grabber_while(window,initial_url="www.example.com"):
|
197
|
-
return_data=None
|
198
|
-
url_grab = False
|
199
|
-
url_mgr=urlManager(url=initial_url)
|
200
|
-
request_mgr = requestManager(url_mgr=url_mgr)
|
201
|
-
soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
|
202
|
-
link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
|
203
|
-
while True:
|
204
|
-
event, values = window.read()
|
205
|
-
if event == sg.WINDOW_CLOSED:
|
206
|
-
break
|
207
|
-
if event=='-GRAB_URL-' or not url_grab:
|
208
|
-
url=values['-URL-']
|
209
|
-
if UrlManager(url=url).url:
|
210
|
-
if url != url_manager.url or url == initial_url:
|
211
|
-
url_manager = UrlManager(url=url)
|
212
|
-
|
213
|
-
url_manager,request_manager,soup_manager,link_manager=update_url(url_manager=url_manager,request_manager=request_manager,soup_manager=soup_manager,link_manager=link_manager,values=values)
|
214
|
-
window['-URL-'].update(value=url_manager.url)
|
215
|
-
url_grab=True
|
216
|
-
if event == 'get soup':
|
217
|
-
tags_js = get_attrs(values)
|
218
|
-
all_desired=soup_manager.find_tags_by_attributes(tag=tags_js['tag'], attr=tags_js['attribute'],attr_values=tags_js['input'])
|
219
|
-
if urlManager(url=url).url:
|
220
|
-
if url != url_mgr.url or url == initial_url:
|
221
|
-
url_mgr = urlManager(url=url)
|
222
|
-
|
223
|
-
url_mgr,request_mgr,soup_mgr,link_mgr=update_url(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr,link_mgr=link_mgr,values=values)
|
224
|
-
window['-URL-'].update(value=url_mgr.url)
|
225
|
-
url_grab=True
|
226
|
-
if event == 'get soup':
|
227
|
-
tags_js = get_attrs(values)
|
228
|
-
all_desired=soup_mgr.find_tags_by_attributes(tag=tags_js['tag'], attr=tags_js['attribute'],attr_values=tags_js['input'])
|
229
|
-
window['-FIND_ALL_OUTPUT-'].update(value=all_desired)
|
230
|
-
if event == '-CUSTOMUA-':
|
231
|
-
window['-SOURCECODE-'].update(disabled=values['-CUSTOMUA-'])
|
232
|
-
if not values['-CUSTOMUA-']:
|
233
|
-
window['-USERAGENT-'].update(value=user_agent_manager.user_agent_header)
|
234
|
-
window['-USERAGENT-'].update(value=user_agent_mgr.user_agent_header)
|
235
|
-
window['-USERAGENT-'].update(disabled=True)
|
236
|
-
else:
|
237
|
-
window['-USERAGENT-'].update(disabled=False)
|
238
|
-
if event=='Get All Text':
|
239
|
-
window['-FIND_ALL_OUTPUT-'].update(value=soup_manager.extract_text_sections())
|
240
|
-
if event == 'Action':
|
241
|
-
parse_type = values['-parse_type-']
|
242
|
-
if parse_type != soup_manager.parse_type:
|
243
|
-
soup_manager.update_parse_type(parse_type=parse_type)
|
244
|
-
window['-SOURCECODE-'].update(value=soup_manager.soup)
|
245
|
-
window['-FIND_ALL_OUTPUT-'].update(value=soup_mgr.extract_text_sections())
|
246
|
-
if event == 'Action':
|
247
|
-
parse_type = values['-parse_type-']
|
248
|
-
if parse_type != soup_mgr.parse_type:
|
249
|
-
soup_mgr.update_parse_type(parse_type=parse_type)
|
250
|
-
window['-SOURCECODE-'].update(value=soup_mgr.soup)
|
251
|
-
elif event == 'Send Soup':
|
252
|
-
return_data = values['-FIND_ALL_OUTPUT-']
|
253
|
-
break
|
254
|
-
window.close()
|
255
|
-
return return_data
|
256
|
-
def url_grabber_component(url=None):
|
257
|
-
if url==None:
|
258
|
-
url = "www.example.com"
|
259
|
-
globals()['window'] = make_component('Window','URL Grabber', layout=get_gpt_layout(url),**expandable())
|
260
|
-
return url_grabber_while(window,initial_url=url)
|
261
|
-
|
File without changes
|
{abstract_webtools-0.1.6.153.dist-info → abstract_webtools-0.1.6.155.dist-info}/top_level.txt
RENAMED
File without changes
|