abstract-webtools 0.1.6.154__tar.gz → 0.1.6.155__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/PKG-INFO +1 -1
  2. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/setup.py +1 -1
  3. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools.egg-info/PKG-INFO +1 -1
  4. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools.egg-info/SOURCES.txt +0 -1
  5. abstract_webtools-0.1.6.154/src/abstract_webtools/url_grabber.py +0 -212
  6. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/README.md +0 -0
  7. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/pyproject.toml +0 -0
  8. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/setup.cfg +0 -0
  9. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/__init__.py +0 -0
  10. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/abstract_usurpit.py +0 -0
  11. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/abstract_webtools.py +0 -0
  12. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/big_user_agent_list.py +0 -0
  13. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/domain_identifier.py +0 -0
  14. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/extention_list.py +0 -0
  15. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/find_dirs.py +0 -0
  16. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/k2s_downloader.py +0 -0
  17. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/main.py +0 -0
  18. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/__init__.py +0 -0
  19. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/allss.py +0 -0
  20. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/cipherManager.py +0 -0
  21. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/clownworld/__init__.py +0 -0
  22. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/clownworld/get_bolshevid_video.py +0 -0
  23. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/crawlManager.py +0 -0
  24. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/crawlmgr2.py +0 -0
  25. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/curlMgr.py +0 -0
  26. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/domainManager.py +0 -0
  27. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/dynamicRateLimiter.py +0 -0
  28. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/get_test.py +0 -0
  29. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/linkManager/__init__.py +0 -0
  30. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/linkManager/linkManager.py +0 -0
  31. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/meta_dump.py +0 -0
  32. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/middleManager/__init__.py +0 -0
  33. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/middleManager/imports.py +0 -0
  34. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/middleManager/src/UnifiedWebManage3r.py +0 -0
  35. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/middleManager/src/UnifiedWebManager.py +0 -0
  36. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/middleManager/src/__init__.py +0 -0
  37. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/middleManager/src/legacy_tools.py +0 -0
  38. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/mySocketClient.py +0 -0
  39. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/networkManager.py +0 -0
  40. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/requestManager/__init__.py +0 -0
  41. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/requestManager/requestManager.py +0 -0
  42. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/seleneumManager.py +0 -0
  43. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/seleniumManager.py +0 -0
  44. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/soupManager/__init__.py +0 -0
  45. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/soupManager/asoueces.py +0 -0
  46. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/soupManager/soupManager.py +0 -0
  47. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/sslManager.py +0 -0
  48. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/tlsAdapter.py +0 -0
  49. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/urlManager/__init__.py +0 -0
  50. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/urlManager/urlManager (Copy).py +0 -0
  51. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/urlManager/urlManager.py +0 -0
  52. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/userAgentManager.py +0 -0
  53. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/videoDownloader.py +0 -0
  54. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/managers/videoDownloader2.py +0 -0
  55. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/soup_gui.py +0 -0
  56. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools/url_grabber_new.py +0 -0
  57. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools.egg-info/dependency_links.txt +0 -0
  58. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools.egg-info/requires.txt +0 -0
  59. {abstract_webtools-0.1.6.154 → abstract_webtools-0.1.6.155}/src/abstract_webtools.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.154
3
+ Version: 0.1.6.155
4
4
  Summary: Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_webtools
6
6
  Author: putkoff
@@ -5,7 +5,7 @@ README = Path("README.md").read_text(encoding="utf-8")
5
5
 
6
6
  setup(
7
7
  name="abstract_webtools",
8
- version='0.1.6.154', # bump once per release
8
+ version='0.1.6.155', # bump once per release
9
9
  author="putkoff",
10
10
  author_email="partners@abstractendeavors.com",
11
11
  description="Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: abstract_webtools
3
- Version: 0.1.6.154
3
+ Version: 0.1.6.155
4
4
  Summary: Utilities for fetching/parsing web content with requests/urllib3/BS4 and helpers.
5
5
  Home-page: https://github.com/AbstractEndeavors/abstract_webtools
6
6
  Author: putkoff
@@ -12,7 +12,6 @@ src/abstract_webtools/find_dirs.py
12
12
  src/abstract_webtools/k2s_downloader.py
13
13
  src/abstract_webtools/main.py
14
14
  src/abstract_webtools/soup_gui.py
15
- src/abstract_webtools/url_grabber.py
16
15
  src/abstract_webtools/url_grabber_new.py
17
16
  src/abstract_webtools.egg-info/PKG-INFO
18
17
  src/abstract_webtools.egg-info/SOURCES.txt
@@ -1,212 +0,0 @@
1
- from abstract_gui import make_component,sg
2
- import inspect
3
- import re
4
- from .managers import *
5
- window = None
6
-
7
- def get_attrs(values):
8
- tags_js={'tag':[],'attribute':[],'input':[]}
9
- for each in ['-SOUP_TAG-','-SOUP_ATTRIBUTE-','-SOUP_ATTRIBUTE_1-','-SOUP_ATTRIBUTE_2-']:
10
- if values[each[:-1]+'_BOOL-'] == True:
11
- for types in ['tag','attribute']:
12
- if types in each.lower():
13
- tags_js[types].append(values[each])
14
- input_val = values['-SOUP_VALUES_INPUT-']
15
- if input_val == '':
16
- tags_js['input']=None
17
- else:
18
- tags_js['input']= input_val
19
- if tags_js['tag']==[]:
20
- tags_js['tag']=None if match.group(1) else None
21
- else:
22
- tags_js['tag']=tags_js['tag'][0]
23
- if tags_js['attribute']==[]:
24
- tags_js['attribute']=None
25
- else:
26
- tags_js['attribute']=tags_js['attribute'][0]
27
- return tags_js
28
- def get_user_agent_mgr(user_agent=None):
29
- return UserAgentManager(user_agent=user_agent)
30
- def get_cipher_list():
31
- return CipherManager().get_default_ciphers()
32
- def get_parse_type_choices():
33
- return ['html.parser', 'lxml', 'html5lib']
34
- def expandable(size:tuple=(None,None)):
35
- return {"size": size,"resizable": True,"scrollable": True,"auto_size_text": True,"expand_x":True,"expand_y": True}
36
- def change_glob(var:any,val:any):
37
- globals()[var]=val
38
- return val
39
- def get_parse_type_choices():
40
- bs4_module = inspect.getmodule(BeautifulSoup)
41
- docstring = bs4_module.__builtins__
42
- start_index = docstring.find("parse_types")
43
- end_index = docstring.find(")", start_index)
44
- choices_text = docstring[start_index:end_index]
45
- choices = [choice.strip() for choice in choices_text.split(",")]
46
- return choices
47
- def get_browsers():
48
- return 'Chrome,Firefox,Safari,Microsoft Edge,Internet Explorer,Opera'.split(',')
49
- def get_user_agents():
50
- from .big_user_agent_list import big_user_agent_list
51
- return big_user_agent_list
52
- def create_user_agent(user_agent:str=get_user_agents()[0]):
53
- return {"user-agent": user_agent}
54
- def get_operating_systems():
55
- return ['Windows NT 10.0','Macintosh; Intel Mac OS X 10_15_7','Linux','Android','iOS']
56
- def create_columns(ls,i,k):
57
- if float(i)%float(k)==float(0.00) and i != 0:
58
- lsN = list(ls[:-k])
59
- lsN.append(list(ls[-k:]))
60
- ls = lsN
61
- return ls
62
- def get_cypher_checks():
63
- ciphers_list = get_cipher_list()
64
- ls=[[[sg.Text('CIPHERS: ')],sg.Multiline('',key='-CIPHERS_OUTPUT-', size=(80, 5), disabled=False)]]
65
- for k,cipher in enumerate(ciphers_list):
66
- ls.append(sg.Checkbox(cipher,key=cipher,default=True,enable_events=True))
67
- ls = create_columns(ls,k,5)
68
- return ls
69
- def get_bs4_options():
70
- bs4_options = [
71
- 'BeautifulSoup',
72
- 'Tag',
73
- 'NavigableString',
74
- 'Comment',
75
- 'ResultSet',
76
- 'SoupStrainer',
77
- 'CData'
78
- ]
79
- descriptions = [
80
- 'The main BeautifulSoup class used for parsing HTML.',
81
- 'Represents an HTML tag.',
82
- 'Represents a string within an HTML document.',
83
- 'Represents an HTML comment.',
84
- 'Represents a collection of tags found during a search.',
85
- 'Allows parsing only a specific subset of the HTML document.',
86
- 'Represents a CDATA section within an XML document.'
87
- ]
88
- return list(zip(bs4_options, descriptions))
89
- def get_multi_line(args):
90
- return make_component("Multiline",**args,**expandable())
91
- def get_gpt_layout(url):
92
- # Add a dropdown for selecting BeautifulSoup parsing capabilities
93
- parse_type_choices = ['html.parser', 'lxml', 'html5lib']
94
- make_component("theme",'LightGrey1')
95
- layout = [[sg.Text('URL:', size=(8, 1)), sg.Input(url, key='-URL-',enable_events=True),sg.Text('status:'),sg.Text('200',key="-STATUS_CODE-")
96
- ,sg.Text(f'success: {url} is valid',key="-URL_WARNING-"),sg.Button('Grab URL',key='-GRAB_URL-',visible=True)],
97
- [sg.Checkbox('Custom User-Agent', default=False, key='-CUSTOMUA-', enable_events=True)],
98
- [sg.Text('User-Agent:', size=(8, 1)), sg.Combo(get_user_agents(), default_value='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36', key='-USERAGENT-', disabled=False)],
99
- [get_cypher_checks()],
100
- [sg.Button('Grab URL'), sg.Button('Action'),sg.Button('Get All Text')],
101
- [sg.Text('Parsing Capabilities:', size=(15, 1)), sg.DropDown(parse_type_choices, default_value='html.parser', key='-parse_type-',enable_events=True)],
102
- [get_multi_line({"key":'-SOURCECODE-'})],
103
- [sg.Text('find soup:'),[[sg.Checkbox('',default=True,key='-SOUP_TAG_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_TAG-',enable_events=True)],
104
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE-',enable_events=True)],
105
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_1_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_1-',enable_events=True)],
106
- [sg.Checkbox('',default=False,key='-SOUP_ATTRIBUTE_2_BOOL-',enable_events=True),sg.Combo([], size=(15, 1),key='-SOUP_ATTRIBUTE_2-',enable_events=True)],
107
- sg.Input(key='-SOUP_VALUES_INPUT-'), sg.Button('get soup'),sg.Button('all soup'),sg.Button('Send Soup')]],
108
- [get_multi_line({"key":"-FIND_ALL_OUTPUT-"})]]
109
- return layout
110
- def get_selected_cipher_list():
111
- ls = []
112
- ciphers_list = get_cipher_list()
113
- event, values = window.read()
114
- for cipher in ciphers_list:
115
- if values[cipher] == True:
116
- ls.append(cipher)
117
- return ls
118
- def update_status(window,warn,warn_url,response_code,valid):
119
- window['-URL-'].update(value=warn_url)
120
- window['-STATUS_CODE-'].update(value=response_code)
121
- window["-URL_WARNING-"].update(value=f"{warn} : {warn_url} is {valid}")
122
- def process_url(window,values):
123
- response_code=False
124
- temp_mgr=None
125
- warn='warning'
126
- valid='invalid'
127
- warn_url = values['-URL-']
128
- if warn_url=='' or warn_url == None:
129
- update_status(window,warn,warn_url,response_code,valid)
130
- return False
131
- temp_url=urlManager(url=warn_url).url
132
- if temp_url:
133
- valid='valid'
134
- response_code = requestManager(url=temp_mgr).response.status_code
135
- warn = 'success'
136
- warn_url = temp_mgr
137
- update_status(window,warn,warn_url,response_code,valid)
138
- return temp_mgr
139
- update_status(window,warn,warn_url,response_code,valid)
140
- return False
141
- def update_url(url_mgr,request_mgr,soup_mgr,link_mgr,values,cipher_list=get_cipher_list(),user_agent=get_user_agents()[0]):
142
- ciphers = CipherManager(cipher_list=cipher_list).ciphers_string
143
- request_mgr = requestManager(url_mgr=url_mgr,ciphers=ciphers,user_agent=get_user_agents()[0])
144
- if request_mgr.source_code:
145
- soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
146
- link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
147
- window['-URL-'].update(value=url_mgr.url)
148
- window['-CIPHERS_OUTPUT-'].update(value=request_mgr.ciphers)
149
- return update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values)
150
- else:
151
- return url_mgr,request_mgr,soup_mgr,link_mgr
152
- def update_source_code(url_mgr,request_mgr,soup_mgr,link_mgr,values):
153
- parse_type = values['-parse_type-']
154
- if parse_type != soup_mgr.parse_type:
155
- soup_mgr.update_parse_type(parse_type=parse_type)
156
- all_tags=soup_mgr.get_all_tags_and_attribute_names()
157
- window['-SOURCECODE-'].update(value=soup_mgr.soup)
158
- if values['-SOUP_TAG-'] != all_tags['tags']:
159
- window['-SOUP_TAG-'].update(values=all_tags['tags'],value=all_tags['tags'][0])
160
- if values['-SOUP_ATTRIBUTE-'] != all_tags['attributes']:
161
- window['-SOUP_ATTRIBUTE-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
162
- window['-SOUP_ATTRIBUTE_1-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
163
- window['-SOUP_ATTRIBUTE_2-'].update(values=all_tags['attributes'],value=all_tags['attributes'][0])
164
- return url_mgr,request_mgr,soup_mgr,link_mgr
165
- def url_grabber_while(window,initial_url="www.example.com"):
166
- return_data=None
167
- url_grab = False
168
- url_mgr=urlManager(url=initial_url)
169
- request_mgr = requestManager(url_mgr=url_mgr)
170
- soup_mgr= SoupManager(url_mgr=url_mgr,request_mgr=request_mgr)
171
- link_mgr= LinkManager(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr)
172
- while True:
173
- event, values = window.read()
174
- if event == sg.WINDOW_CLOSED:
175
- break
176
- if event=='-GRAB_URL-' or not url_grab:
177
- url=values['-URL-']
178
- if urlManager(url=url).url:
179
- if url != url_mgr.url or url == initial_url:
180
- url_mgr = urlManager(url=url)
181
-
182
- url_mgr,request_mgr,soup_mgr,link_mgr=update_url(url_mgr=url_mgr,request_mgr=request_mgr,soup_mgr=soup_mgr,link_mgr=link_mgr,values=values)
183
- window['-URL-'].update(value=url_mgr.url)
184
- url_grab=True
185
- if event == 'get soup':
186
- tags_js = get_attrs(values)
187
- all_desired=soup_mgr.find_tags_by_attributes(tag=tags_js['tag'], attr=tags_js['attribute'],attr_values=tags_js['input'])
188
- window['-FIND_ALL_OUTPUT-'].update(value=all_desired)
189
- if event == '-CUSTOMUA-':
190
- window['-SOURCECODE-'].update(disabled=values['-CUSTOMUA-'])
191
- if not values['-CUSTOMUA-']:
192
- window['-USERAGENT-'].update(value=user_agent_mgr.user_agent_header)
193
- window['-USERAGENT-'].update(disabled=True)
194
- else:
195
- window['-USERAGENT-'].update(disabled=False)
196
- if event=='Get All Text':
197
- window['-FIND_ALL_OUTPUT-'].update(value=soup_mgr.extract_text_sections())
198
- if event == 'Action':
199
- parse_type = values['-parse_type-']
200
- if parse_type != soup_mgr.parse_type:
201
- soup_mgr.update_parse_type(parse_type=parse_type)
202
- window['-SOURCECODE-'].update(value=soup_mgr.soup)
203
- elif event == 'Send Soup':
204
- return_data = values['-FIND_ALL_OUTPUT-']
205
- break
206
- window.close()
207
- return return_data
208
- def url_grabber_component(url=None):
209
- if url==None:
210
- url = "www.example.com"
211
- globals()['window'] = make_component('Window','URL Grabber', layout=get_gpt_layout(url),**expandable())
212
- return url_grabber_while(window,initial_url=url)