abstract-webtools 0.1.6.153__py3-none-any.whl → 0.1.6.155__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.153
3
+ Version: 0.1.6.155
4
4
  Summary: Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_webtools
6
6
  Author: putkoff
@@ -8,7 +8,6 @@ abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZC
8
8
  abstract_webtools/k2s_downloader.py,sha256=t0tCKAfDNQGn9tKh3eg0XVU0bY-MmYITwJa3ANf7090,6988
9
9
  abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
10
10
  abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
11
- abstract_webtools/url_grabber.py,sha256=-QUENEmimMPrJ6Skg5-bPXl-Bp0VxbelWL6fQgR3o1I,13595
12
11
  abstract_webtools/url_grabber_new.py,sha256=xb23qo4anOY0Ax3CAfaHJ8s5VEz61Sinh-XpEDFW7Is,3621
13
12
  abstract_webtools/managers/__init__.py,sha256=RXQAK5z9nYlocM91P2OC4jR352-MiqT5bAi4xZl7_FU,470
14
13
  abstract_webtools/managers/allss.py,sha256=IBhlyRQHfK-BtwUnSEbIPqlI1MtZ8-XsdaHv0b91HQ0,269
@@ -47,7 +46,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=6vWYnZGuimStbNiuH_V
47
46
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
48
47
  abstract_webtools/managers/urlManager/urlManager (Copy).py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
49
48
  abstract_webtools/managers/urlManager/urlManager.py,sha256=vY4KQXtcrlC2YtlultxQpVe581l5kAuT5VGA0WrI16g,8945
50
- abstract_webtools-0.1.6.153.dist-info/METADATA,sha256=ENDlCW7dsBPA6z16s3-aHzcFRsEny_jodY-9dNARfSo,16573
51
- abstract_webtools-0.1.6.153.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- abstract_webtools-0.1.6.153.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
53
- abstract_webtools-0.1.6.153.dist-info/RECORD,,
49
+ abstract_webtools-0.1.6.155.dist-info/METADATA,sha256=P74QxvGgUqxZfbOkQ7C7BfT5ODDPWhsSxxUjNjGTyW4,16573
50
+ abstract_webtools-0.1.6.155.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
51
+ abstract_webtools-0.1.6.155.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
52
+ abstract_webtools-0.1.6.155.dist-info/RECORD,,
@@ -1,261 +0,0 @@
1
- from abstract_gui import make_component,sg
2
- import inspect
3
- import re
4
- from . import UserAgentManager,UrlManager,SafeRequest,SoupManager,LinkManager,CipherManager,requests,ssl,BeautifulSoup,HTTPAdapter,PoolManager,ssl_
5
- from .managers import *
6
- window = None
7
-
8
- def get_attrs(values):
9
- tags_js={'tag':[],'attribute':[],'input':[]}
10
- for each in ['-SOUP_TAG-','-SOUP_ATTRIBUTE-','-SOUP_ATTRIBUTE_1-','-SOUP_ATTRIBUTE_2-']:
11
- if values[each[:-1]+'_BOOL-'] == True:
12
- for types in ['tag','attribute']:
13
- if types in each.lower():
14
- tags_js[types].append(values[each])
15
- input_val = values['-SOUP_VALUES_INPUT-']
16
- if input_val == '':
17
- tags_js['input']=None
18
- else:
19
- tags_js['input']= input_val
20
- if tags_js['tag']==[]:
21
- tags_js['tag']=None if match.group(1) else None
22
- else:
23
- tags_js['tag']=tags_js['tag'][0]
24
- if tags_js['attribute']==[]:
25
- tags_js['attribute']=None
26
- else:
27
- tags_js['attribute']=tags_js['attribute'][0]
28
- return tags_js
29
-
30
- def get_user_agent_mgr(user_agent=None):
31
- return UserAgentManager(user_agent=user_agent)
32
- def get_cipher_list():
33
- return CipherManager().get_default_ciphers()
34
- def get_parse_type_choices():
35
- return ['html.parser', 'lxml', 'html5lib']
36
- def expandable(size:tuple=(None,None)):
37
- return {"size": size,"resizable": True,"scrollable": True,"auto_size_text": True,"expand_x":True,"expand_y": True}
38
- def change_glob(var:any,val:any):
39
- globals()[var]=val
40
- return val
41
- def get_parse_type_choices():
42
- bs4_module = inspect.getmodule(BeautifulSoup)
43
- docstring = bs4_module.__builtins__
44
- start_index = docstring.find("parse_types")
45
- end_index = docstring.find(")", start_index)
46
- choices_text = docstring[start_index:end_index]
47
- choices = [choice.strip() for choice in choices_text.split(",")]
48
- return choices
49
- def get_browsers():
50
- return 'Chrome,Firefox,Safari,Microsoft Edge,Internet Explorer,Opera'.split(',')
51
- def get_user_agents():
52
- from .big_user_agent_list import big_user_agent_list
53
- return big_user_agent_list
54
- def create_user_agent(user_agent:str=get_user_agents()[0]):
55
- return {"user-agent": user_agent}
56
- def get_operating_systems():
57
- return ['Windows NT 10.0','Macintosh; Intel Mac OS X 10_15_7','Linux','Android','iOS']
58
- def create_columns(ls,i,k):
59
- if float(i)%float(k)==float(0.00) and i != 0:
60
- lsN = list(ls[:-k])
61
- lsN.append(list(ls[-k:]))
62
- ls = lsN
63
- return ls
64
- def get_cypher_checks():
65
- ciphers_list = get_cipher_list()
66
- ls=[[[sg.Text('CIPHERS: ')],sg.Multiline('',key='-CIPHERS_OUTPUT-', size=(80, 5), disabled=False)]]
67
- for k,cipher in enumerate(ciphers_list):
68
- ls.append(sg.Checkbox(cipher,key=cipher,default=True,enable_events=True))
69
- ls = create_columns(ls,k,5)
70
- return ls
71
- def get_bs4_options():
72
- bs4_options = [
73
- 'BeautifulSoup',
74
- 'Tag',
75
- 'NavigableString',
76
- 'Comment',
77
- 'ResultSet',
78
- 'SoupStrainer',
79
- 'CData'
80
- ]
81
- descriptions = [
82
- 'The main BeautifulSoup class used for parsing HTML.',
83
- 'Represents an HTML tag.',
84
- 'Represents a string within an HTML document.',
85
- 'Represents an HTML comment.',
86
- 'Represents a collection of tags found during a search.',
87
- 'Allows parsing only a specific subset of the HTML document.',
88
- 'Represents a CDATA section within an XML document.'
89
- ]
90
- return list(zip(bs4_options, descriptions))
91
- def get_multi_line(args):
92
- return make_component("Multiline",**args,**expandable())
93
- def get_gpt_layout(url):
94
- # Add a dropdown for selecting BeautifulSoup parsing capabilities
95
- parse_type_choices = ['html.parser', 'lxml', 'html5lib']
96
- make_component("theme",'LightGrey1')
97
- layout = [[sg.Text('URL:', size=(8, 1)), sg.Input(url, key='-URL-',enable_events=True),sg.Text('status:'),sg.Text('200',key="-STATUS_CODE-")
98
- ,sg.Text(f'success: {url} is valid',key="-URL_WARNING-"),sg.Button('Grab URL',key='-GRAB_URL-',visible=True)],
99
- [sg.Checkbox('Custom User-Agent', default=False, key='-CUSTOMUA-', enable_events=True)],
100
- [sg.Text('User-Agent:', size=(8, 1)), sg.Combo(get_user_agents(), default_value='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36', key='-USERAGENT-', disabled=False)],
101
- [get_cypher_checks()],
102
- [sg.Button('Grab URL'), sg.Button('Action'),sg.Button('Get All Text')],
103
- [sg.Text('Parsing Capabilities:', size=(15, 1)), sg.DropDown(parse_type_choices, default_value='html.parser', key='-parse_type-',enable_events=True)],
104
- [get_multi_line({"key":'-SOURCECODE-'})],
105
- [sg.Text('find soup:'),[[sg.Checkbox('',default=True,key='-SOUP_TAG_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_TAG-',enable_events=True)],
106
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE-',enable_events=True)],
107
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_1_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_1-',enable_events=True)],
108
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_2_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_2-',enable_events=True)],
109
- sg.Input(key='-SOUP_VALUES_INPUT-'), sg.Button('get soup'),sg.Button('all soup'),sg.Button('Send Soup')]],
110
- [get_multi_line({"key":"-FIND_ALL_OUTPUT-"})]]
111
- return layout
112
- def get_selected_cipher_list():
113
- ls = []
114
- ciphers_list = get_cipher_list()
115
- event, values = window.read()
116
- for cipher in ciphers_list:
117
- if values[cipher] == True:
118
- ls.append(cipher)
119
- return ls
120
- def update_status(window,warn,warn_url,response_code,valid):
121
- window['-URL-'].update(value=warn_url)
122
- window['-STATUS_CODE-'].update(value=response_code)
123
- window["-URL_WARNING-"].update(value=f"{warn} : {warn_url} is {valid}")
124
- def process_url(window,values):
125
- response_code=False
126
- temp_mgr=None
127
- warn='warning'
128
- valid='invalid'
129
- warn_url = values['-URL-']
130
- if warn_url=='' or warn_url == None:
131
- update_status(window,warn,warn_url,response_code,valid)
132
- return False
133
- temp_url=UrlManager(url=warn_url).url
134
- if temp_url:
135
- valid='valid'
136
- response_code = SafeRequest(url=temp_mgr).response.status_code
137
- temp_url=urlManager(url=warn_url).url
138
- if temp_url:
139
- valid='valid'
140
- response_code = requestManager(url=temp_mgr).response.status_code
141
- warn = 'success'
142
- warn_url = temp_mgr
143
- update_status(window,warn,warn_url,response_code,valid)
144
- return temp_mgr
145
- update_status(window,warn,warn_url,response_code,valid)
146
- return False
147
- def update_url(url_manager,request_manager,soup_manager,link_manager,values,cipher_list=get_cipher_list(),user_agent=get_user_agents()[0]):
148
- ciphers = CipherManager(cipher_list=cipher_list).ciphers_string
149
- request_manager = SafeRequest(url_manager=url_manager,ciphers=ciphers,user_agent=get_user_agents()[0])
150
- if request_manager.source_code:
151
- soup_manager= SoupManager(url_manager=url_manager,request_manager=request_manager)
152
- link_manager= LinkManager(url_manager=url_manager,request_manager=request_manager,soup_manager=soup_manager)
153
- window['-URL-'].update(value=url_manager.url)
154
- window['-CIPHERS_OUTPUT-'].update(value=request_manager.ciphers)
155
- return update_source_code(url_manager,request_manager,soup_manager,link_manager,values)
156
- else:
157
- return url_manager,request_manager,soup_manager,link_manager
158
- def update_source_code(url_manager,request_manager,soup_manager,link_manager,values):
159
- parse_type = values['-parse_type-']
160
- if parse_type != soup_manager.parse_type:
161
- soup_manager.update_parse_type(parse_type=parse_type)
162
- all_tags=soup_manager.get_all_tags_and_attribute_names()
163
- window['-SOURCECODE-'].update(value=soup_manager.soup)
164
- window['-SOURCECODE-'].update(value=soup_manager.soupdef update_url(url_mgr,request_mgr,soup_mgr,link_mgr,values,cipher_list=get_cipher_list(),user_agent=get_user_agents()[0]):
165
- ciphers = CipherManager(cipher_list=cipher_list).ciphers_string
166
- request_mgr = requestManager(url_mgr=url_mgr,ciphers=ciphers,user_agent=get_user_agents()[0])
167
- if request_mgr.source_code:
168
- soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
169
- link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
170
- window['-URL-'].update(value=url_mgr.url)
171
- window['-CIPHERS_OUTPUT-'].update(value=request_mgr.ciphers)
172
- return update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values)
173
- else:
174
- return url_mgr,request_mgr,soup_mgr,link_mgr
175
- def update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values):
176
- parse_type = values['-parse_type-']
177
- if parse_type != soup_mgr.parse_type:
178
- soup_mgr.update_parse_type(parse_type=parse_type)
179
- all_tags=soup_mgr.get_all_tags_and_attribute_names()
180
- window['-SOURCECODE-'].update(value=soup_mgr.soup)
181
- if values['-SOUP_TAG-'] != all_tags['tags']:
182
- window['-SOUP_TAG-'].update(values=all_tags['tags'],value=all_tags['tags'][0])
183
- if values['-SOUP_ATTRIBUTE-'] != all_tags['attributes']:
184
- window['-SOUP_ATTRIBUTE-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
185
- window['-SOUP_ATTRIBUTE_1-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
186
- window['-SOUP_ATTRIBUTE_2-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
187
- return url_manager,request_manager,soup_manager,link_manager
188
- def url_grabber_while(window,initial_url="www.example.com"):
189
- return_data=None
190
- url_grab = False
191
- url_manager=UrlManager(url=initial_url)
192
- request_manager = SafeRequest(url_manager=url_manager)
193
- soup_manager= SoupManager(url_manager=url_manager,request_manager=request_manager)
194
- link_manager= LinkManager(url_manager=url_manager,request_manager=request_manager,soup_manager=soup_manager)
195
- return url_mgr,request_mgr,soup_mgr,link_mgr
196
- def url_grabber_while(window,initial_url="www.example.com"):
197
- return_data=None
198
- url_grab = False
199
- url_mgr=urlManager(url=initial_url)
200
- request_mgr = requestManager(url_mgr=url_mgr)
201
- soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
202
- link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
203
- while True:
204
- event, values = window.read()
205
- if event == sg.WINDOW_CLOSED:
206
- break
207
- if event=='-GRAB_URL-' or not url_grab:
208
- url=values['-URL-']
209
- if UrlManager(url=url).url:
210
- if url != url_manager.url or url == initial_url:
211
- url_manager = UrlManager(url=url)
212
-
213
- url_manager,request_manager,soup_manager,link_manager=update_url(url_manager=url_manager,request_manager=request_manager,soup_manager=soup_manager,link_manager=link_manager,values=values)
214
- window['-URL-'].update(value=url_manager.url)
215
- url_grab=True
216
- if event == 'get soup':
217
- tags_js = get_attrs(values)
218
- all_desired=soup_manager.find_tags_by_attributes(tag=tags_js['tag'], attr=tags_js['attribute'],attr_values=tags_js['input'])
219
- if urlManager(url=url).url:
220
- if url != url_mgr.url or url == initial_url:
221
- url_mgr = urlManager(url=url)
222
-
223
- url_mgr,request_mgr,soup_mgr,link_mgr=update_url(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr,link_mgr=link_mgr,values=values)
224
- window['-URL-'].update(value=url_mgr.url)
225
- url_grab=True
226
- if event == 'get soup':
227
- tags_js = get_attrs(values)
228
- all_desired=soup_mgr.find_tags_by_attributes(tag=tags_js['tag'], attr=tags_js['attribute'],attr_values=tags_js['input'])
229
- window['-FIND_ALL_OUTPUT-'].update(value=all_desired)
230
- if event == '-CUSTOMUA-':
231
- window['-SOURCECODE-'].update(disabled=values['-CUSTOMUA-'])
232
- if not values['-CUSTOMUA-']:
233
- window['-USERAGENT-'].update(value=user_agent_manager.user_agent_header)
234
- window['-USERAGENT-'].update(value=user_agent_mgr.user_agent_header)
235
- window['-USERAGENT-'].update(disabled=True)
236
- else:
237
- window['-USERAGENT-'].update(disabled=False)
238
- if event=='Get All Text':
239
- window['-FIND_ALL_OUTPUT-'].update(value=soup_manager.extract_text_sections())
240
- if event == 'Action':
241
- parse_type = values['-parse_type-']
242
- if parse_type != soup_manager.parse_type:
243
- soup_manager.update_parse_type(parse_type=parse_type)
244
- window['-SOURCECODE-'].update(value=soup_manager.soup)
245
- window['-FIND_ALL_OUTPUT-'].update(value=soup_mgr.extract_text_sections())
246
- if event == 'Action':
247
- parse_type = values['-parse_type-']
248
- if parse_type != soup_mgr.parse_type:
249
- soup_mgr.update_parse_type(parse_type=parse_type)
250
- window['-SOURCECODE-'].update(value=soup_mgr.soup)
251
- elif event == 'Send Soup':
252
- return_data = values['-FIND_ALL_OUTPUT-']
253
- break
254
- window.close()
255
- return return_data
256
- def url_grabber_component(url=None):
257
- if url==None:
258
- url = "www.example.com"
259
- globals()['window'] = make_component('Window','URL Grabber', layout=get_gpt_layout(url),**expandable())
260
- return url_grabber_while(window,initial_url=url)
261
-