abstract-webtools 0.1.6.154__py3-none-any.whl → 0.1.6.156__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  from .abstract_webtools import *
2
- from .url_grabber import url_grabber_component
2
+
3
3
  from .abstract_webtools import get_url_mgr
4
4
  from .managers import *
5
5
  from .abstract_usurpit import usurpManager,usurpit
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.154
3
+ Version: 0.1.6.156
4
4
  Summary: Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_webtools
6
6
  Author: putkoff
@@ -1,4 +1,4 @@
1
- abstract_webtools/__init__.py,sha256=yzVSyqPwhUPBijfAj3AMl9GgewR_jvVNgFEKwkXV0TY,198
1
+ abstract_webtools/__init__.py,sha256=P2eRTWMkZPqdrX3K5CjVoYZE0kA9xxYc74K8VVJi5Fo,152
2
2
  abstract_webtools/abstract_usurpit.py,sha256=2idbYXLFhXh8VPfdYgWICNH8dehnZRCdt4U5sTsVxo4,9663
3
3
  abstract_webtools/abstract_webtools.py,sha256=rBSuzveYUtXugRcsEMrXCRbuMps7ntFeJG86ZHvw_-k,91587
4
4
  abstract_webtools/big_user_agent_list.py,sha256=5ZkrUWmfzYL5yaULREslh9ZiRQeITbSjqZlp2KQON3w,131923
@@ -8,7 +8,6 @@ abstract_webtools/find_dirs.py,sha256=BlE4ruzMABqmv03NcutZ1j5N3pCc-Q4uNEAMpNolZC
8
8
  abstract_webtools/k2s_downloader.py,sha256=t0tCKAfDNQGn9tKh3eg0XVU0bY-MmYITwJa3ANf7090,6988
9
9
  abstract_webtools/main.py,sha256=_I7pPXPkoLZOoYGLQDrSLGhGuQt6-PVyXEHZSmglk2g,1329
10
10
  abstract_webtools/soup_gui.py,sha256=n95YAps1R6DpMwR4UbthSqQby0C5WHUa9tsW-f2qpLg,5184
11
- abstract_webtools/url_grabber.py,sha256=FAry0gmZH4Iy646mYq9yKN4hh5ME6MiTr_AmP5WAR90,10440
12
11
  abstract_webtools/url_grabber_new.py,sha256=xb23qo4anOY0Ax3CAfaHJ8s5VEz61Sinh-XpEDFW7Is,3621
13
12
  abstract_webtools/managers/__init__.py,sha256=RXQAK5z9nYlocM91P2OC4jR352-MiqT5bAi4xZl7_FU,470
14
13
  abstract_webtools/managers/allss.py,sha256=IBhlyRQHfK-BtwUnSEbIPqlI1MtZ8-XsdaHv0b91HQ0,269
@@ -47,7 +46,7 @@ abstract_webtools/managers/soupManager/soupManager.py,sha256=6vWYnZGuimStbNiuH_V
47
46
  abstract_webtools/managers/urlManager/__init__.py,sha256=gaJCHeK91Z-eYsBnxgdhbIUten1-gbx-zqx70R6ag-Y,26
48
47
  abstract_webtools/managers/urlManager/urlManager (Copy).py,sha256=vCFuLADmv3h7icaaoAsImGqb_49VizPY_ZvMl-C7PYk,7756
49
48
  abstract_webtools/managers/urlManager/urlManager.py,sha256=vY4KQXtcrlC2YtlultxQpVe581l5kAuT5VGA0WrI16g,8945
50
- abstract_webtools-0.1.6.154.dist-info/METADATA,sha256=LePpjPoDmdzjKrrzctyP_nsCoa04MMVrSFzZZgoa8Vw,16573
51
- abstract_webtools-0.1.6.154.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- abstract_webtools-0.1.6.154.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
53
- abstract_webtools-0.1.6.154.dist-info/RECORD,,
49
+ abstract_webtools-0.1.6.156.dist-info/METADATA,sha256=qtxMiWowCso_y87NvZFOaObZZwu9hrf1OfWxVhOwOl8,16573
50
+ abstract_webtools-0.1.6.156.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
51
+ abstract_webtools-0.1.6.156.dist-info/top_level.txt,sha256=2DMJ7RmjTcjCsa-uwAV0K6eXXlIIkFDEjBLg_uyCmCI,18
52
+ abstract_webtools-0.1.6.156.dist-info/RECORD,,
@@ -1,212 +0,0 @@
1
- from abstract_gui import make_component,sg
2
- import inspect
3
- import re
4
- from .managers import *
5
- window = None
6
-
7
- def get_attrs(values):
8
- tags_js={'tag':[],'attribute':[],'input':[]}
9
- for each in ['-SOUP_TAG-','-SOUP_ATTRIBUTE-','-SOUP_ATTRIBUTE_1-','-SOUP_ATTRIBUTE_2-']:
10
- if values[each[:-1]+'_BOOL-'] == True:
11
- for types in ['tag','attribute']:
12
- if types in each.lower():
13
- tags_js[types].append(values[each])
14
- input_val = values['-SOUP_VALUES_INPUT-']
15
- if input_val == '':
16
- tags_js['input']=None
17
- else:
18
- tags_js['input']= input_val
19
- if tags_js['tag']==[]:
20
- tags_js['tag']=None if match.group(1) else None
21
- else:
22
- tags_js['tag']=tags_js['tag'][0]
23
- if tags_js['attribute']==[]:
24
- tags_js['attribute']=None
25
- else:
26
- tags_js['attribute']=tags_js['attribute'][0]
27
- return tags_js
28
- def get_user_agent_mgr(user_agent=None):
29
- return UserAgentManager(user_agent=user_agent)
30
- def get_cipher_list():
31
- return CipherManager().get_default_ciphers()
32
- def get_parse_type_choices():
33
- return ['html.parser', 'lxml', 'html5lib']
34
- def expandable(size:tuple=(None,None)):
35
- return {"size": size,"resizable": True,"scrollable": True,"auto_size_text": True,"expand_x":True,"expand_y": True}
36
- def change_glob(var:any,val:any):
37
- globals()[var]=val
38
- return val
39
- def get_parse_type_choices():
40
- bs4_module = inspect.getmodule(BeautifulSoup)
41
- docstring = bs4_module.__builtins__
42
- start_index = docstring.find("parse_types")
43
- end_index = docstring.find(")", start_index)
44
- choices_text = docstring[start_index:end_index]
45
- choices = [choice.strip() for choice in choices_text.split(",")]
46
- return choices
47
- def get_browsers():
48
- return 'Chrome,Firefox,Safari,Microsoft Edge,Internet Explorer,Opera'.split(',')
49
- def get_user_agents():
50
- from .big_user_agent_list import big_user_agent_list
51
- return big_user_agent_list
52
- def create_user_agent(user_agent:str=get_user_agents()[0]):
53
- return {"user-agent": user_agent}
54
- def get_operating_systems():
55
- return ['Windows NT 10.0','Macintosh; Intel Mac OS X 10_15_7','Linux','Android','iOS']
56
- def create_columns(ls,i,k):
57
- if float(i)%float(k)==float(0.00) and i != 0:
58
- lsN = list(ls[:-k])
59
- lsN.append(list(ls[-k:]))
60
- ls = lsN
61
- return ls
62
- def get_cypher_checks():
63
- ciphers_list = get_cipher_list()
64
- ls=[[[sg.Text('CIPHERS: ')],sg.Multiline('',key='-CIPHERS_OUTPUT-', size=(80, 5), disabled=False)]]
65
- for k,cipher in enumerate(ciphers_list):
66
- ls.append(sg.Checkbox(cipher,key=cipher,default=True,enable_events=True))
67
- ls = create_columns(ls,k,5)
68
- return ls
69
- def get_bs4_options():
70
- bs4_options = [
71
- 'BeautifulSoup',
72
- 'Tag',
73
- 'NavigableString',
74
- 'Comment',
75
- 'ResultSet',
76
- 'SoupStrainer',
77
- 'CData'
78
- ]
79
- descriptions = [
80
- 'The main BeautifulSoup class used for parsing HTML.',
81
- 'Represents an HTML tag.',
82
- 'Represents a string within an HTML document.',
83
- 'Represents an HTML comment.',
84
- 'Represents a collection of tags found during a search.',
85
- 'Allows parsing only a specific subset of the HTML document.',
86
- 'Represents a CDATA section within an XML document.'
87
- ]
88
- return list(zip(bs4_options, descriptions))
89
- def get_multi_line(args):
90
- return make_component("Multiline",**args,**expandable())
91
- def get_gpt_layout(url):
92
- # Add a dropdown for selecting BeautifulSoup parsing capabilities
93
- parse_type_choices = ['html.parser', 'lxml', 'html5lib']
94
- make_component("theme",'LightGrey1')
95
- layout = [[sg.Text('URL:', size=(8, 1)), sg.Input(url, key='-URL-',enable_events=True),sg.Text('status:'),sg.Text('200',key="-STATUS_CODE-")
96
- ,sg.Text(f'success: {url} is valid',key="-URL_WARNING-"),sg.Button('Grab URL',key='-GRAB_URL-',visible=True)],
97
- [sg.Checkbox('Custom User-Agent', default=False, key='-CUSTOMUA-', enable_events=True)],
98
- [sg.Text('User-Agent:', size=(8, 1)), sg.Combo(get_user_agents(), default_value='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36', key='-USERAGENT-', disabled=False)],
99
- [get_cypher_checks()],
100
- [sg.Button('Grab URL'), sg.Button('Action'),sg.Button('Get All Text')],
101
- [sg.Text('Parsing Capabilities:', size=(15, 1)), sg.DropDown(parse_type_choices, default_value='html.parser', key='-parse_type-',enable_events=True)],
102
- [get_multi_line({"key":'-SOURCECODE-'})],
103
- [sg.Text('find soup:'),[[sg.Checkbox('',default=True,key='-SOUP_TAG_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_TAG-',enable_events=True)],
104
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE-',enable_events=True)],
105
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_1_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_1-',enable_events=True)],
106
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_2_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_2-',enable_events=True)],
107
- sg.Input(key='-SOUP_VALUES_INPUT-'), sg.Button('get soup'),sg.Button('all soup'),sg.Button('Send Soup')]],
108
- [get_multi_line({"key":"-FIND_ALL_OUTPUT-"})]]
109
- return layout
110
- def get_selected_cipher_list():
111
- ls = []
112
- ciphers_list = get_cipher_list()
113
- event, values = window.read()
114
- for cipher in ciphers_list:
115
- if values[cipher] == True:
116
- ls.append(cipher)
117
- return ls
118
- def update_status(window,warn,warn_url,response_code,valid):
119
- window['-URL-'].update(value=warn_url)
120
- window['-STATUS_CODE-'].update(value=response_code)
121
- window["-URL_WARNING-"].update(value=f"{warn} : {warn_url} is {valid}")
122
- def process_url(window,values):
123
- response_code=False
124
- temp_mgr=None
125
- warn='warning'
126
- valid='invalid'
127
- warn_url = values['-URL-']
128
- if warn_url=='' or warn_url == None:
129
- update_status(window,warn,warn_url,response_code,valid)
130
- return False
131
- temp_url=urlManager(url=warn_url).url
132
- if temp_url:
133
- valid='valid'
134
- response_code = requestManager(url=temp_mgr).response.status_code
135
- warn = 'success'
136
- warn_url = temp_mgr
137
- update_status(window,warn,warn_url,response_code,valid)
138
- return temp_mgr
139
- update_status(window,warn,warn_url,response_code,valid)
140
- return False
141
- def update_url(url_mgr,request_mgr,soup_mgr,link_mgr,values,cipher_list=get_cipher_list(),user_agent=get_user_agents()[0]):
142
- ciphers = CipherManager(cipher_list=cipher_list).ciphers_string
143
- request_mgr = requestManager(url_mgr=url_mgr,ciphers=ciphers,user_agent=get_user_agents()[0])
144
- if request_mgr.source_code:
145
- soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
146
- link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
147
- window['-URL-'].update(value=url_mgr.url)
148
- window['-CIPHERS_OUTPUT-'].update(value=request_mgr.ciphers)
149
- return update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values)
150
- else:
151
- return url_mgr,request_mgr,soup_mgr,link_mgr
152
- def update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values):
153
- parse_type = values['-parse_type-']
154
- if parse_type != soup_mgr.parse_type:
155
- soup_mgr.update_parse_type(parse_type=parse_type)
156
- all_tags=soup_mgr.get_all_tags_and_attribute_names()
157
- window['-SOURCECODE-'].update(value=soup_mgr.soup)
158
- if values['-SOUP_TAG-'] != all_tags['tags']:
159
- window['-SOUP_TAG-'].update(values=all_tags['tags'],value=all_tags['tags'][0])
160
- if values['-SOUP_ATTRIBUTE-'] != all_tags['attributes']:
161
- window['-SOUP_ATTRIBUTE-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
162
- window['-SOUP_ATTRIBUTE_1-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
163
- window['-SOUP_ATTRIBUTE_2-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
164
- return url_mgr,request_mgr,soup_mgr,link_mgr
165
- def url_grabber_while(window,initial_url="www.example.com"):
166
- return_data=None
167
- url_grab = False
168
- url_mgr=urlManager(url=initial_url)
169
- request_mgr = requestManager(url_mgr=url_mgr)
170
- soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
171
- link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
172
- while True:
173
- event, values = window.read()
174
- if event == sg.WINDOW_CLOSED:
175
- break
176
- if event=='-GRAB_URL-' or not url_grab:
177
- url=values['-URL-']
178
- if urlManager(url=url).url:
179
- if url != url_mgr.url or url == initial_url:
180
- url_mgr = urlManager(url=url)
181
-
182
- url_mgr,request_mgr,soup_mgr,link_mgr=update_url(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr,link_mgr=link_mgr,values=values)
183
- window['-URL-'].update(value=url_mgr.url)
184
- url_grab=True
185
- if event == 'get soup':
186
- tags_js = get_attrs(values)
187
- all_desired=soup_mgr.find_tags_by_attributes(tag=tags_js['tag'], attr=tags_js['attribute'],attr_values=tags_js['input'])
188
- window['-FIND_ALL_OUTPUT-'].update(value=all_desired)
189
- if event == '-CUSTOMUA-':
190
- window['-SOURCECODE-'].update(disabled=values['-CUSTOMUA-'])
191
- if not values['-CUSTOMUA-']:
192
- window['-USERAGENT-'].update(value=user_agent_mgr.user_agent_header)
193
- window['-USERAGENT-'].update(disabled=True)
194
- else:
195
- window['-USERAGENT-'].update(disabled=False)
196
- if event=='Get All Text':
197
- window['-FIND_ALL_OUTPUT-'].update(value=soup_mgr.extract_text_sections())
198
- if event == 'Action':
199
- parse_type = values['-parse_type-']
200
- if parse_type != soup_mgr.parse_type:
201
- soup_mgr.update_parse_type(parse_type=parse_type)
202
- window['-SOURCECODE-'].update(value=soup_mgr.soup)
203
- elif event == 'Send Soup':
204
- return_data = values['-FIND_ALL_OUTPUT-']
205
- break
206
- window.close()
207
- return return_data
208
- def url_grabber_component(url=None):
209
- if url==None:
210
- url = "www.example.com"
211
- globals()['window'] = make_component('Window','URL Grabber', layout=get_gpt_layout(url),**expandable())
212
- return url_grabber_while(window,initial_url=url)