scraping-rtn 0.0.6.0__tar.gz → 0.0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scraping-rtn might be problematic. Click here for more details.
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/PKG-INFO +1 -1
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/pyproject.toml +1 -1
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn/RtnSingleTeamYear.py +11 -44
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn/src.py +3 -6
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn.egg-info/PKG-INFO +1 -1
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/LICENSE +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/README.md +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/setup.cfg +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn/__init__.py +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn.egg-info/SOURCES.txt +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn.egg-info/dependency_links.txt +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn.egg-info/requires.txt +0 -0
- {scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scraping_rtn
|
|
3
|
-
Version: 0.0.6.
|
|
3
|
+
Version: 0.0.6.1
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .src import EVENT_MAP, EVENTS,
|
|
1
|
+
from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
|
|
2
2
|
get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import numpy as np
|
|
@@ -15,14 +15,8 @@ class RtnSingleTeamYear(object):
|
|
|
15
15
|
else:
|
|
16
16
|
self.team_id = team_id
|
|
17
17
|
|
|
18
|
-
def connect_session(self):
|
|
19
|
-
self.session = session_setup()
|
|
20
|
-
|
|
21
18
|
def get_team_mapping(self):
|
|
22
|
-
|
|
23
|
-
self.connect_session()
|
|
24
|
-
|
|
25
|
-
all_teams_data = get_data_from_api(session=self.session, endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
|
|
19
|
+
all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
|
|
26
20
|
return {team['team_name']: team['id'] for team in all_teams_data['teams']}
|
|
27
21
|
|
|
28
22
|
def get_team_id(self):
|
|
@@ -39,13 +33,10 @@ class RtnSingleTeamYear(object):
|
|
|
39
33
|
# raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
|
|
40
34
|
|
|
41
35
|
def _get_raw_roster(self):
|
|
42
|
-
if not hasattr(self, 'session'):
|
|
43
|
-
self.connect_session()
|
|
44
|
-
|
|
45
36
|
rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
|
|
46
37
|
school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
|
|
47
38
|
|
|
48
|
-
roster_data = get_data_from_api(
|
|
39
|
+
roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
|
|
49
40
|
|
|
50
41
|
self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
|
|
51
42
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
|
|
@@ -66,10 +57,7 @@ class RtnSingleTeamYear(object):
|
|
|
66
57
|
return self.roster
|
|
67
58
|
|
|
68
59
|
def _get_raw_season_results(self):
|
|
69
|
-
|
|
70
|
-
self.connect_session()
|
|
71
|
-
|
|
72
|
-
meets = get_data_from_api(session=self.session, endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
|
|
60
|
+
meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
|
|
73
61
|
name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
|
|
74
62
|
'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
|
|
75
63
|
'opponent': 'Opponents', 'meet_desc': 'Meet Name', 'linked_id': 'Meet ID'}
|
|
@@ -122,13 +110,10 @@ class RtnSingleTeamYear(object):
|
|
|
122
110
|
return self.season_results
|
|
123
111
|
|
|
124
112
|
def _team_event_scores_by_meet(self):
|
|
125
|
-
if not hasattr(self, 'session'):
|
|
126
|
-
self.connect_session()
|
|
127
|
-
|
|
128
113
|
team_scores_all = []
|
|
129
114
|
for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
|
|
130
115
|
try:
|
|
131
|
-
meet_res = get_data_from_api(
|
|
116
|
+
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
|
|
132
117
|
# This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
|
|
133
118
|
team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
|
|
134
119
|
assert len(team_scores) == 1, 'Multiple team scores??'
|
|
@@ -144,10 +129,7 @@ class RtnSingleTeamYear(object):
|
|
|
144
129
|
self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
|
|
145
130
|
|
|
146
131
|
def _team_event_scores_team_consistency(self):
|
|
147
|
-
|
|
148
|
-
self.connect_session()
|
|
149
|
-
|
|
150
|
-
res = get_data_from_api(session=self.session, endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
|
|
132
|
+
res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
|
|
151
133
|
if len(res['labels']) == 0:
|
|
152
134
|
print(f'No team consistency data found for {self.team_name} in {self.year}')
|
|
153
135
|
for i in range(len(self._raw_season_results)):
|
|
@@ -191,13 +173,10 @@ class RtnSingleTeamYear(object):
|
|
|
191
173
|
return self.individual_results
|
|
192
174
|
|
|
193
175
|
def _individual_scores_by_meet(self):
|
|
194
|
-
if not hasattr(self, 'session'):
|
|
195
|
-
self.connect_session()
|
|
196
|
-
|
|
197
176
|
individual_scores_all = []
|
|
198
177
|
for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
|
|
199
178
|
try:
|
|
200
|
-
meet_res = get_data_from_api(
|
|
179
|
+
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
|
|
201
180
|
if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
|
|
202
181
|
print(f'No data found for meet {meet_id}')
|
|
203
182
|
continue
|
|
@@ -233,13 +212,10 @@ class RtnSingleTeamYear(object):
|
|
|
233
212
|
'Meet Name', 'Meet ID'])
|
|
234
213
|
|
|
235
214
|
def _individual_scores_individual_consistency(self):
|
|
236
|
-
if not hasattr(self, 'session'):
|
|
237
|
-
self.connect_session()
|
|
238
|
-
|
|
239
215
|
ind_consistency_all = []
|
|
240
216
|
for gymnast in self._raw_roster:
|
|
241
217
|
try:
|
|
242
|
-
res = get_data_from_api(
|
|
218
|
+
res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
|
|
243
219
|
ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
|
|
244
220
|
'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
|
|
245
221
|
'UB': round(float(res['ubs'][i]), 4) if res['ubs'][i] is not None else np.nan,
|
|
@@ -273,14 +249,11 @@ class RtnSingleTeamYear(object):
|
|
|
273
249
|
return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
|
|
274
250
|
|
|
275
251
|
def _get_raw_individual_nqs(self):
|
|
276
|
-
if not hasattr(self, 'session'):
|
|
277
|
-
self.connect_session()
|
|
278
|
-
|
|
279
252
|
name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
|
|
280
253
|
# 'maxaa': 'AA',
|
|
281
254
|
'gid': 'Gymnast ID'}
|
|
282
255
|
|
|
283
|
-
nqsData = get_data_from_api(
|
|
256
|
+
nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
|
|
284
257
|
ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
|
|
285
258
|
for k, v in data.items() if k in name_map.keys()} for data in nqsData['ind']]
|
|
286
259
|
|
|
@@ -292,24 +265,18 @@ class RtnSingleTeamYear(object):
|
|
|
292
265
|
self._raw_individual_nqs = []
|
|
293
266
|
|
|
294
267
|
def _get_current_week(self):
|
|
295
|
-
if not hasattr(self, 'session'):
|
|
296
|
-
self.connect_session()
|
|
297
|
-
|
|
298
268
|
if not hasattr(self, 'week'):
|
|
299
|
-
week_data = get_data_from_api(
|
|
269
|
+
week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()
|
|
300
270
|
return min(int(week_data['week']), int(week_data['max']))
|
|
301
271
|
|
|
302
272
|
def _get_raw_rankings(self, team_vs_ind, event, week):
|
|
303
|
-
if not hasattr(self, 'session'):
|
|
304
|
-
self.connect_session()
|
|
305
|
-
|
|
306
273
|
team_ind_map = {'team': 0, 'ind': 1}
|
|
307
274
|
event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
|
|
308
275
|
rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
|
|
309
276
|
'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
|
|
310
277
|
'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
|
|
311
278
|
|
|
312
|
-
res = get_data_from_api(
|
|
279
|
+
res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
|
|
313
280
|
if team_vs_ind == 'ind':
|
|
314
281
|
self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
|
|
315
282
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Event': event}}
|
|
@@ -21,21 +21,18 @@ def validate_input(teams):
|
|
|
21
21
|
return teams
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
@lru_cache(maxsize=1000000)
|
|
25
|
+
def get_data_from_api(endpoint, suffix):
|
|
25
26
|
session = requests.Session()
|
|
26
27
|
retry = Retry(connect=3, backoff_factor=0.5)
|
|
27
28
|
adapter = HTTPAdapter(max_retries=retry)
|
|
28
29
|
session.mount('http://', adapter)
|
|
29
30
|
session.mount('https://', adapter)
|
|
30
31
|
|
|
31
|
-
return session
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
@lru_cache(maxsize=1000000)
|
|
35
|
-
def get_data_from_api(session, endpoint, suffix):
|
|
36
32
|
url = 'https://www.roadtonationals.com/api/women/' + endpoint
|
|
37
33
|
if suffix:
|
|
38
34
|
url += '/' + suffix
|
|
35
|
+
|
|
39
36
|
return session.get(url)
|
|
40
37
|
|
|
41
38
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: scraping_rtn
|
|
3
|
-
Version: 0.0.6.
|
|
3
|
+
Version: 0.0.6.1
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scraping_rtn-0.0.6.0 → scraping_rtn-0.0.6.1}/src/scraping_rtn.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|