scraping-rtn 0.0.5__tar.gz → 0.0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scraping-rtn might be problematic. Click here for more details.
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/PKG-INFO +1 -1
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/pyproject.toml +1 -1
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn/RtnSingleTeamYear.py +46 -12
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn/src.py +14 -2
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn.egg-info/PKG-INFO +1 -1
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/LICENSE +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/README.md +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/setup.cfg +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn/__init__.py +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn.egg-info/SOURCES.txt +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn.egg-info/dependency_links.txt +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn.egg-info/requires.txt +0 -0
- {scraping_rtn-0.0.5 → scraping_rtn-0.0.6.0}/src/scraping_rtn.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scraping_rtn
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6.0
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts,
|
|
2
|
-
SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
1
|
+
from .src import EVENT_MAP, EVENTS, session_setup, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
|
|
2
|
+
get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import numpy as np
|
|
5
5
|
from datetime import datetime
|
|
@@ -15,8 +15,14 @@ class RtnSingleTeamYear(object):
|
|
|
15
15
|
else:
|
|
16
16
|
self.team_id = team_id
|
|
17
17
|
|
|
18
|
+
def connect_session(self):
|
|
19
|
+
self.session = session_setup()
|
|
20
|
+
|
|
18
21
|
def get_team_mapping(self):
|
|
19
|
-
|
|
22
|
+
if not hasattr(self, 'session'):
|
|
23
|
+
self.connect_session()
|
|
24
|
+
|
|
25
|
+
all_teams_data = get_data_from_api(session=self.session, endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
|
|
20
26
|
return {team['team_name']: team['id'] for team in all_teams_data['teams']}
|
|
21
27
|
|
|
22
28
|
def get_team_id(self):
|
|
@@ -33,10 +39,13 @@ class RtnSingleTeamYear(object):
|
|
|
33
39
|
# raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
|
|
34
40
|
|
|
35
41
|
def _get_raw_roster(self):
|
|
42
|
+
if not hasattr(self, 'session'):
|
|
43
|
+
self.connect_session()
|
|
44
|
+
|
|
36
45
|
rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
|
|
37
46
|
school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
|
|
38
47
|
|
|
39
|
-
roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
|
|
48
|
+
roster_data = get_data_from_api(session=self.session, endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
|
|
40
49
|
|
|
41
50
|
self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
|
|
42
51
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
|
|
@@ -57,7 +66,10 @@ class RtnSingleTeamYear(object):
|
|
|
57
66
|
return self.roster
|
|
58
67
|
|
|
59
68
|
def _get_raw_season_results(self):
|
|
60
|
-
|
|
69
|
+
if not hasattr(self, 'session'):
|
|
70
|
+
self.connect_session()
|
|
71
|
+
|
|
72
|
+
meets = get_data_from_api(session=self.session, endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
|
|
61
73
|
name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
|
|
62
74
|
'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
|
|
63
75
|
'opponent': 'Opponents', 'meet_desc': 'Meet Name', 'linked_id': 'Meet ID'}
|
|
@@ -110,10 +122,13 @@ class RtnSingleTeamYear(object):
|
|
|
110
122
|
return self.season_results
|
|
111
123
|
|
|
112
124
|
def _team_event_scores_by_meet(self):
|
|
125
|
+
if not hasattr(self, 'session'):
|
|
126
|
+
self.connect_session()
|
|
127
|
+
|
|
113
128
|
team_scores_all = []
|
|
114
129
|
for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
|
|
115
130
|
try:
|
|
116
|
-
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
|
|
131
|
+
meet_res = get_data_from_api(session=self.session, endpoint='meetresults', suffix=str(meet_id)).json()
|
|
117
132
|
# This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
|
|
118
133
|
team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
|
|
119
134
|
assert len(team_scores) == 1, 'Multiple team scores??'
|
|
@@ -129,7 +144,10 @@ class RtnSingleTeamYear(object):
|
|
|
129
144
|
self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
|
|
130
145
|
|
|
131
146
|
def _team_event_scores_team_consistency(self):
|
|
132
|
-
|
|
147
|
+
if not hasattr(self, 'session'):
|
|
148
|
+
self.connect_session()
|
|
149
|
+
|
|
150
|
+
res = get_data_from_api(session=self.session, endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
|
|
133
151
|
if len(res['labels']) == 0:
|
|
134
152
|
print(f'No team consistency data found for {self.team_name} in {self.year}')
|
|
135
153
|
for i in range(len(self._raw_season_results)):
|
|
@@ -173,10 +191,13 @@ class RtnSingleTeamYear(object):
|
|
|
173
191
|
return self.individual_results
|
|
174
192
|
|
|
175
193
|
def _individual_scores_by_meet(self):
|
|
194
|
+
if not hasattr(self, 'session'):
|
|
195
|
+
self.connect_session()
|
|
196
|
+
|
|
176
197
|
individual_scores_all = []
|
|
177
198
|
for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
|
|
178
199
|
try:
|
|
179
|
-
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
|
|
200
|
+
meet_res = get_data_from_api(session=self.session, endpoint='meetresults', suffix=str(meet_id)).json()
|
|
180
201
|
if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
|
|
181
202
|
print(f'No data found for meet {meet_id}')
|
|
182
203
|
continue
|
|
@@ -212,10 +233,13 @@ class RtnSingleTeamYear(object):
|
|
|
212
233
|
'Meet Name', 'Meet ID'])
|
|
213
234
|
|
|
214
235
|
def _individual_scores_individual_consistency(self):
|
|
236
|
+
if not hasattr(self, 'session'):
|
|
237
|
+
self.connect_session()
|
|
238
|
+
|
|
215
239
|
ind_consistency_all = []
|
|
216
240
|
for gymnast in self._raw_roster:
|
|
217
241
|
try:
|
|
218
|
-
res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
|
|
242
|
+
res = get_data_from_api(session=self.session, endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
|
|
219
243
|
ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
|
|
220
244
|
'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
|
|
221
245
|
'UB': round(float(res['ubs'][i]), 4) if res['ubs'][i] is not None else np.nan,
|
|
@@ -249,11 +273,14 @@ class RtnSingleTeamYear(object):
|
|
|
249
273
|
return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
|
|
250
274
|
|
|
251
275
|
def _get_raw_individual_nqs(self):
|
|
276
|
+
if not hasattr(self, 'session'):
|
|
277
|
+
self.connect_session()
|
|
278
|
+
|
|
252
279
|
name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
|
|
253
280
|
# 'maxaa': 'AA',
|
|
254
281
|
'gid': 'Gymnast ID'}
|
|
255
282
|
|
|
256
|
-
nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
|
|
283
|
+
nqsData = get_data_from_api(session=self.session, endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
|
|
257
284
|
ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
|
|
258
285
|
for k, v in data.items() if k in name_map.keys()} for data in nqsData['ind']]
|
|
259
286
|
|
|
@@ -265,17 +292,24 @@ class RtnSingleTeamYear(object):
|
|
|
265
292
|
self._raw_individual_nqs = []
|
|
266
293
|
|
|
267
294
|
def _get_current_week(self):
|
|
295
|
+
if not hasattr(self, 'session'):
|
|
296
|
+
self.connect_session()
|
|
297
|
+
|
|
268
298
|
if not hasattr(self, 'week'):
|
|
269
|
-
|
|
299
|
+
week_data = get_data_from_api(session=self.session, endpoint='currentweek', suffix=str(self.year)).json()
|
|
300
|
+
return min(int(week_data['week']), int(week_data['max']))
|
|
270
301
|
|
|
271
302
|
def _get_raw_rankings(self, team_vs_ind, event, week):
|
|
303
|
+
if not hasattr(self, 'session'):
|
|
304
|
+
self.connect_session()
|
|
305
|
+
|
|
272
306
|
team_ind_map = {'team': 0, 'ind': 1}
|
|
273
307
|
event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
|
|
274
308
|
rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
|
|
275
309
|
'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
|
|
276
310
|
'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
|
|
277
311
|
|
|
278
|
-
res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
|
|
312
|
+
res = get_data_from_api(session=self.session, endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
|
|
279
313
|
if team_vs_ind == 'ind':
|
|
280
314
|
self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
|
|
281
315
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Event': event}}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import requests
|
|
2
|
+
from requests.adapters import HTTPAdapter
|
|
3
|
+
from urllib3.util.retry import Retry
|
|
2
4
|
from functools import lru_cache
|
|
3
5
|
from datetime import datetime
|
|
4
6
|
|
|
@@ -19,12 +21,22 @@ def validate_input(teams):
|
|
|
19
21
|
return teams
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
def session_setup():
|
|
25
|
+
session = requests.Session()
|
|
26
|
+
retry = Retry(connect=3, backoff_factor=0.5)
|
|
27
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
28
|
+
session.mount('http://', adapter)
|
|
29
|
+
session.mount('https://', adapter)
|
|
30
|
+
|
|
31
|
+
return session
|
|
32
|
+
|
|
33
|
+
|
|
22
34
|
@lru_cache(maxsize=1000000)
|
|
23
|
-
def get_data_from_api(endpoint, suffix):
|
|
35
|
+
def get_data_from_api(session, endpoint, suffix):
|
|
24
36
|
url = 'https://www.roadtonationals.com/api/women/' + endpoint
|
|
25
37
|
if suffix:
|
|
26
38
|
url += '/' + suffix
|
|
27
|
-
return
|
|
39
|
+
return session.get(url)
|
|
28
40
|
|
|
29
41
|
|
|
30
42
|
def fix_opponents(ops):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scraping_rtn
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.6.0
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|