scraping-rtn 0.0.1__tar.gz → 0.0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/PKG-INFO +4 -4
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/pyproject.toml +11 -4
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn/RtnSingleTeamYear.py +93 -47
- scraping_rtn-0.0.8.0/src/scraping_rtn/__init__.py +107 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn/src.py +21 -3
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn.egg-info/PKG-INFO +5 -5
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn.egg-info/SOURCES.txt +0 -1
- scraping_rtn-0.0.1/LICENSE +0 -21
- scraping_rtn-0.0.1/src/scraping_rtn/__init__.py +0 -99
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/README.md +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/setup.cfg +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn.egg-info/dependency_links.txt +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn.egg-info/requires.txt +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.0}/src/scraping_rtn.egg-info/top_level.txt +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: scraping_rtn
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8.0
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
|
-
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ceharmon/scraping_rtn
|
|
7
8
|
Classifier: Programming Language :: Python :: 3
|
|
8
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
10
|
Classifier: Operating System :: OS Independent
|
|
10
11
|
Requires-Python: >=3.9
|
|
11
12
|
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
13
|
Requires-Dist: pandas>=1.5.3
|
|
14
14
|
Requires-Dist: numpy>=1.23.5
|
|
15
15
|
Requires-Dist: requests>=2.28.1
|
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["setuptools>=
|
|
2
|
+
requires = ["setuptools>=69.0", "wheel"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "scraping_rtn"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.8.0"
|
|
8
8
|
authors = [
|
|
9
|
-
{ name="Claire Harmon", email="ceharmon220@gmail.com" },
|
|
9
|
+
{ name = "Claire Harmon", email = "ceharmon220@gmail.com" },
|
|
10
10
|
]
|
|
11
11
|
description = "package to scrape gymnastics data from Road To Nationals"
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
requires-python = ">=3.9"
|
|
14
|
+
|
|
15
|
+
license = { text = "MIT" }
|
|
16
|
+
|
|
14
17
|
dependencies = [
|
|
15
18
|
"pandas >=1.5.3",
|
|
16
19
|
"numpy >=1.23.5",
|
|
17
20
|
"requests >=2.28.1"
|
|
18
21
|
]
|
|
22
|
+
|
|
19
23
|
classifiers = [
|
|
20
24
|
"Programming Language :: Python :: 3",
|
|
21
25
|
"License :: OSI Approved :: MIT License",
|
|
@@ -23,4 +27,7 @@ classifiers = [
|
|
|
23
27
|
]
|
|
24
28
|
|
|
25
29
|
[project.urls]
|
|
26
|
-
|
|
30
|
+
Homepage = "https://github.com/ceharmon/scraping_rtn"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools]
|
|
33
|
+
license-files = []
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts,
|
|
2
|
-
SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
1
|
+
from .src import EVENT_MAP, EVENTS, get_session, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
|
|
2
|
+
get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import numpy as np
|
|
5
|
+
from datetime import datetime
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class RtnSingleTeamYear(object):
|
|
8
|
-
def __init__(self, year, team_name, team_id=None):
|
|
9
|
+
def __init__(self, year, team_name, team_id=None, session=None):
|
|
9
10
|
self.year = year
|
|
10
11
|
if team_name is not None:
|
|
11
12
|
self.team_name = team_name
|
|
@@ -14,33 +15,47 @@ class RtnSingleTeamYear(object):
|
|
|
14
15
|
else:
|
|
15
16
|
self.team_id = team_id
|
|
16
17
|
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
if session is None:
|
|
19
|
+
self.session = get_session()
|
|
20
|
+
else:
|
|
21
|
+
self.session = session
|
|
22
|
+
|
|
23
|
+
def get_team_mapping(self, force_update=False):
|
|
24
|
+
if force_update:
|
|
25
|
+
get_data_from_api.cache_clear()
|
|
26
|
+
|
|
27
|
+
all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1', session=self.session).json()
|
|
19
28
|
return {team['team_name']: team['id'] for team in all_teams_data['teams']}
|
|
20
29
|
|
|
21
30
|
def get_team_id(self):
|
|
22
31
|
if not hasattr(self, 'team_id_map'):
|
|
23
32
|
self.team_id_map = self.get_team_mapping()
|
|
24
33
|
|
|
34
|
+
if self.team_name and self.team_name not in self.team_id_map.keys():
|
|
35
|
+
raise ValueError(f'Unknown team name: {self.team_name}')
|
|
36
|
+
|
|
25
37
|
return self.team_id_map.get(self.team_name, -1)
|
|
26
38
|
# if self.team_name in self.team_id_map.keys():
|
|
27
39
|
# return self.team_id_map[self.team_name]
|
|
28
40
|
# else:
|
|
29
41
|
# raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
|
|
30
42
|
|
|
31
|
-
def _get_raw_roster(self):
|
|
43
|
+
def _get_raw_roster(self, force_update=False):
|
|
32
44
|
rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
|
|
33
45
|
school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
|
|
34
46
|
|
|
35
|
-
|
|
47
|
+
if force_update:
|
|
48
|
+
get_data_from_api.cache_clear()
|
|
36
49
|
|
|
37
|
-
|
|
50
|
+
roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1', session=self.session).json()
|
|
51
|
+
|
|
52
|
+
self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
|
|
38
53
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
|
|
39
54
|
for data in roster_data]
|
|
40
55
|
|
|
41
|
-
def get_roster(self, include_hometowns=False, include_class=False, include_events=False):
|
|
56
|
+
def get_roster(self, include_hometowns=False, include_class=False, include_events=False, force_update=False):
|
|
42
57
|
if not hasattr(self, 'raw_roster'):
|
|
43
|
-
self._get_raw_roster()
|
|
58
|
+
self._get_raw_roster(force_update=force_update)
|
|
44
59
|
|
|
45
60
|
extra_cols = get_extra_cols(include_hometowns=include_hometowns, include_class=include_class, include_events=include_events)
|
|
46
61
|
|
|
@@ -52,20 +67,23 @@ class RtnSingleTeamYear(object):
|
|
|
52
67
|
|
|
53
68
|
return self.roster
|
|
54
69
|
|
|
55
|
-
def _get_raw_season_results(self):
|
|
56
|
-
|
|
70
|
+
def _get_raw_season_results(self, force_update=False):
|
|
71
|
+
if force_update:
|
|
72
|
+
get_data_from_api.cache_clear()
|
|
73
|
+
|
|
74
|
+
meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id), session=self.session).json()
|
|
57
75
|
name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
|
|
58
76
|
'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
|
|
59
77
|
'opponent': 'Opponents', 'meet_desc': 'Meet Name', 'linked_id': 'Meet ID'}
|
|
60
78
|
|
|
61
79
|
self._raw_season_results = [{name_map.get(k, k): fix_opponents(v) if k == 'opponent'
|
|
62
80
|
else (normalize_date(v) if k == 'meet_date' else v)
|
|
63
|
-
for k, v in data.items() if k != 'jas'} for data in meets['meets']]
|
|
81
|
+
for k, v in data.items() if k != 'jas'} for data in meets['meets'] if data['team_name'] == self.team_name]
|
|
64
82
|
self._raw_schedule = [{k: v for k, v in data.items() if k not in ('Score', 'VT', 'UB', 'BB', 'FX')} for data in self._raw_season_results]
|
|
65
83
|
|
|
66
|
-
def get_schedule(self):
|
|
84
|
+
def get_schedule(self, force_update=False):
|
|
67
85
|
if not hasattr(self, '_raw_schedule'):
|
|
68
|
-
self._get_raw_season_results()
|
|
86
|
+
self._get_raw_season_results(force_update=force_update)
|
|
69
87
|
|
|
70
88
|
if len(self._raw_schedule) > 0:
|
|
71
89
|
return pd.DataFrame(self._raw_schedule)
|
|
@@ -87,29 +105,32 @@ class RtnSingleTeamYear(object):
|
|
|
87
105
|
* Uses team meet id to join back to meet info, such as opponent, etc.
|
|
88
106
|
"""
|
|
89
107
|
if not hasattr(self, '_raw_season_results'):
|
|
90
|
-
self._get_raw_season_results()
|
|
108
|
+
self._get_raw_season_results(force_update=force_update)
|
|
91
109
|
|
|
92
110
|
if len(self._raw_season_results) > 0:
|
|
93
111
|
if (len({'VT', 'UB', 'BB', 'FX'}.intersection(self._raw_season_results[0].keys())) != 4 or force_update):
|
|
94
112
|
if method == 'team_consistency':
|
|
95
|
-
self._team_event_scores_team_consistency()
|
|
113
|
+
self._team_event_scores_team_consistency(force_update=force_update)
|
|
96
114
|
elif method == 'by_meet':
|
|
97
|
-
self._team_event_scores_by_meet()
|
|
115
|
+
self._team_event_scores_by_meet(force_update=force_update)
|
|
98
116
|
else:
|
|
99
117
|
raise ValueError('Method must be "team_consistency" or "by_meet"')
|
|
100
118
|
|
|
101
119
|
# TODO: different way to drop duplicates?
|
|
102
|
-
self.season_results = pd.DataFrame(self._raw_season_results).drop_duplicates()
|
|
120
|
+
self.season_results = pd.DataFrame(self._raw_season_results).dropna(subset=['Score']).drop_duplicates()
|
|
103
121
|
else:
|
|
104
122
|
self.season_results = pd.DataFrame(columns=SCHEDULE_COLS + RESULTS_COLS)
|
|
105
123
|
|
|
106
124
|
return self.season_results
|
|
107
125
|
|
|
108
|
-
def _team_event_scores_by_meet(self):
|
|
126
|
+
def _team_event_scores_by_meet(self, force_update=False):
|
|
109
127
|
team_scores_all = []
|
|
110
|
-
for meet_id in [data['Team Meet ID'] for data in self._raw_season_results]:
|
|
128
|
+
for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
|
|
111
129
|
try:
|
|
112
|
-
|
|
130
|
+
if force_update:
|
|
131
|
+
get_data_from_api.cache_clear()
|
|
132
|
+
|
|
133
|
+
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id), session=self.session).json()
|
|
113
134
|
# This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
|
|
114
135
|
team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
|
|
115
136
|
assert len(team_scores) == 1, 'Multiple team scores??'
|
|
@@ -124,10 +145,13 @@ class RtnSingleTeamYear(object):
|
|
|
124
145
|
for i in range(len(self._raw_season_results)):
|
|
125
146
|
self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
|
|
126
147
|
|
|
127
|
-
def _team_event_scores_team_consistency(self):
|
|
128
|
-
|
|
148
|
+
def _team_event_scores_team_consistency(self, force_update=False):
|
|
149
|
+
if force_update:
|
|
150
|
+
get_data_from_api.cache_clear()
|
|
151
|
+
|
|
152
|
+
res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}', session=self.session).json()
|
|
129
153
|
if len(res['labels']) == 0:
|
|
130
|
-
print(f'No team consistency data found for
|
|
154
|
+
print(f'No team consistency data found for {self.team_name} in {self.year}')
|
|
131
155
|
for i in range(len(self._raw_season_results)):
|
|
132
156
|
self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
|
|
133
157
|
else:
|
|
@@ -145,12 +169,10 @@ class RtnSingleTeamYear(object):
|
|
|
145
169
|
Methods:
|
|
146
170
|
* Individual Consistency - Uses Individual Consistency tab from RTN
|
|
147
171
|
* Tends to have more complete data, especially for older years
|
|
148
|
-
* Requires summing of all events to get AA (code does this for you)
|
|
149
172
|
* Relies on date to join back to meet info, such as opponent, etc.
|
|
150
173
|
* One API call per gymnast, relative speed depends on number of meets vs number of gymnasts
|
|
151
174
|
* By Meet - loops through each meet to get scores
|
|
152
175
|
* Older meets tend to be missing
|
|
153
|
-
* Includes AA scores in the response
|
|
154
176
|
* Uses team meet id to join back to meet info, such as opponent, etc.
|
|
155
177
|
* One API call per meet, relative speed depends on number of meets vs number of gymnasts
|
|
156
178
|
"""
|
|
@@ -162,20 +184,31 @@ class RtnSingleTeamYear(object):
|
|
|
162
184
|
if not hasattr(self, '_raw_roster'):
|
|
163
185
|
self.get_roster()
|
|
164
186
|
|
|
165
|
-
self._individual_scores_individual_consistency()
|
|
187
|
+
self._individual_scores_individual_consistency(force_update=force_update)
|
|
166
188
|
elif method == 'by_meet':
|
|
167
|
-
self._individual_scores_by_meet()
|
|
189
|
+
self._individual_scores_by_meet(force_update=force_update)
|
|
168
190
|
else:
|
|
169
191
|
raise ValueError('Method must be "individual_consistency" or "by_meet"')
|
|
170
192
|
|
|
171
193
|
return self.individual_results
|
|
172
194
|
|
|
173
|
-
def _individual_scores_by_meet(self):
|
|
195
|
+
def _individual_scores_by_meet(self, force_update=False):
|
|
174
196
|
individual_scores_all = []
|
|
175
|
-
for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule]:
|
|
197
|
+
for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
|
|
176
198
|
try:
|
|
177
|
-
|
|
178
|
-
|
|
199
|
+
if force_update:
|
|
200
|
+
get_data_from_api.cache_clear()
|
|
201
|
+
|
|
202
|
+
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id), session=self.session).json()
|
|
203
|
+
if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
|
|
204
|
+
print(f'No data found for meet {meet_id}')
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
if 'team_name' in meet_res['scores'][0][0]:
|
|
208
|
+
team_inds = [ind for ind, scores in enumerate(meet_res['scores']) if len(scores) > 0 and scores[0]['team_name'] == self.team_name]
|
|
209
|
+
else:
|
|
210
|
+
raise ValueError('Key not found')
|
|
211
|
+
|
|
179
212
|
if len(team_inds) == 0:
|
|
180
213
|
print(f'No scores found at meet {meet_id}')
|
|
181
214
|
continue
|
|
@@ -195,16 +228,20 @@ class RtnSingleTeamYear(object):
|
|
|
195
228
|
if len(individual_scores_all) > 0:
|
|
196
229
|
merge_dicts(dict1=individual_scores_all, dict2=self._raw_schedule, merge_field='Team Meet ID')
|
|
197
230
|
self.individual_results = pd.DataFrame(individual_scores_all)
|
|
231
|
+
self.individual_results['AA'] = self.individual_results[['VT', 'UB', 'BB', 'FX']].dropna(how='any').astype(float).T.sum().round(4)
|
|
198
232
|
else:
|
|
199
233
|
self.individual_results = pd.DataFrame(columns=['Meet Date', 'VT', 'UB', 'BB', 'FX', 'AA', 'Gymnast ID', 'Name',
|
|
200
234
|
'Team ID', 'Team', 'Team Meet ID', 'Home/Away', 'Opponents',
|
|
201
235
|
'Meet Name', 'Meet ID'])
|
|
202
236
|
|
|
203
|
-
def _individual_scores_individual_consistency(self):
|
|
237
|
+
def _individual_scores_individual_consistency(self, force_update=False):
|
|
204
238
|
ind_consistency_all = []
|
|
205
239
|
for gymnast in self._raw_roster:
|
|
206
240
|
try:
|
|
207
|
-
|
|
241
|
+
if force_update:
|
|
242
|
+
get_data_from_api.cache_clear()
|
|
243
|
+
|
|
244
|
+
res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}", session=self.session).json()
|
|
208
245
|
ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
|
|
209
246
|
'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
|
|
210
247
|
'UB': round(float(res['ubs'][i]), 4) if res['ubs'][i] is not None else np.nan,
|
|
@@ -225,24 +262,26 @@ class RtnSingleTeamYear(object):
|
|
|
225
262
|
else:
|
|
226
263
|
self.individual_results = pd.DataFrame(columns=SCHEDULE_COLS + IND_RESULTS_COLS)
|
|
227
264
|
|
|
228
|
-
def get_individual_nqs(self):
|
|
265
|
+
def get_individual_nqs(self, force_update=False):
|
|
229
266
|
if not hasattr(self, '_raw_roster'):
|
|
230
|
-
self._get_raw_roster()
|
|
267
|
+
self._get_raw_roster(force_update=force_update)
|
|
231
268
|
|
|
232
269
|
if not hasattr(self, '_raw_individual_nqs'):
|
|
233
|
-
self._get_raw_individual_nqs()
|
|
270
|
+
self._get_raw_individual_nqs(force_update=force_update)
|
|
234
271
|
|
|
235
272
|
if len(self._raw_individual_nqs) > 0:
|
|
236
273
|
return pd.DataFrame(self._raw_individual_nqs)
|
|
237
274
|
else:
|
|
238
275
|
return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
|
|
239
276
|
|
|
240
|
-
def _get_raw_individual_nqs(self):
|
|
277
|
+
def _get_raw_individual_nqs(self, force_update=False):
|
|
241
278
|
name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
|
|
242
279
|
# 'maxaa': 'AA',
|
|
243
280
|
'gid': 'Gymnast ID'}
|
|
281
|
+
if force_update:
|
|
282
|
+
get_data_from_api.cache_clear()
|
|
244
283
|
|
|
245
|
-
nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
|
|
284
|
+
nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4', session=self.session).json()
|
|
246
285
|
ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
|
|
247
286
|
for k, v in data.items() if k in name_map.keys()} for data in nqsData['ind']]
|
|
248
287
|
|
|
@@ -253,18 +292,25 @@ class RtnSingleTeamYear(object):
|
|
|
253
292
|
else:
|
|
254
293
|
self._raw_individual_nqs = []
|
|
255
294
|
|
|
256
|
-
def _get_current_week(self):
|
|
295
|
+
def _get_current_week(self, force_update=False):
|
|
257
296
|
if not hasattr(self, 'week'):
|
|
258
|
-
|
|
297
|
+
if force_update:
|
|
298
|
+
get_data_from_api.cache_clear()
|
|
259
299
|
|
|
260
|
-
|
|
300
|
+
week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year), session=self.session).json()
|
|
301
|
+
return min(int(week_data['week']), int(week_data['max']))
|
|
302
|
+
|
|
303
|
+
def _get_raw_rankings(self, team_vs_ind, event, week, force_update=False):
|
|
261
304
|
team_ind_map = {'team': 0, 'ind': 1}
|
|
262
305
|
event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
|
|
263
306
|
rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
|
|
264
307
|
'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
|
|
265
308
|
'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
|
|
266
309
|
|
|
267
|
-
|
|
310
|
+
if force_update:
|
|
311
|
+
get_data_from_api.cache_clear()
|
|
312
|
+
|
|
313
|
+
res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}', session=self.session).json()
|
|
268
314
|
if team_vs_ind == 'ind':
|
|
269
315
|
self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
|
|
270
316
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Event': event}}
|
|
@@ -274,9 +320,9 @@ class RtnSingleTeamYear(object):
|
|
|
274
320
|
**{'Event': event}}
|
|
275
321
|
for data in res['data']]
|
|
276
322
|
|
|
277
|
-
def get_overall_rankings(self, team_vs_ind='team', event='AA', week=None):
|
|
323
|
+
def get_overall_rankings(self, team_vs_ind='team', event='AA', week=None, force_update=False):
|
|
278
324
|
if not week:
|
|
279
|
-
week = self._get_current_week()
|
|
325
|
+
week = self._get_current_week(force_update=force_update)
|
|
280
326
|
|
|
281
327
|
if not hasattr(self, '_raw_rankings'):
|
|
282
328
|
self._raw_rankings = {'team': {event: None for event in EVENT_MAP.values()},
|
|
@@ -288,7 +334,7 @@ class RtnSingleTeamYear(object):
|
|
|
288
334
|
'Division', 'Conference', 'Region', 'USAG']}
|
|
289
335
|
|
|
290
336
|
if self._raw_rankings[team_vs_ind][event] is None:
|
|
291
|
-
self._get_raw_rankings(team_vs_ind=team_vs_ind, event=event, week=week)
|
|
337
|
+
self._get_raw_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
|
|
292
338
|
|
|
293
339
|
return pd.DataFrame(self._raw_rankings[team_vs_ind][event])[col_orders[team_vs_ind]]
|
|
294
340
|
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
from .RtnSingleTeamYear import RtnSingleTeamYear
|
|
2
|
+
from .src import get_session, validate_input, get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, EVENTS, ROSTER_COLS
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
BLANK_SPACES = ' '*30
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def save(df, filename):
|
|
9
|
+
df.to_csv(filename, index=False)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def all_teams(year, force_update=False):
|
|
13
|
+
rtn = RtnSingleTeamYear(year=year, team_name=None)
|
|
14
|
+
return list(rtn.get_team_mapping(force_update=force_update).keys())
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False, force_update=False):
|
|
18
|
+
teams = validate_input(teams)
|
|
19
|
+
session = get_session()
|
|
20
|
+
|
|
21
|
+
all_rosters = []
|
|
22
|
+
for i, team in enumerate(teams):
|
|
23
|
+
if verbose:
|
|
24
|
+
print(f'Getting roster for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
25
|
+
rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
|
|
26
|
+
res = rtn.get_roster(include_hometowns=include_hometowns, include_class=include_class,
|
|
27
|
+
include_events=include_events, force_update=force_update if i == 0 else False)
|
|
28
|
+
if verbose and len(res) == 0:
|
|
29
|
+
print(f'\tNo roster found for {team}')
|
|
30
|
+
all_rosters.append(res)
|
|
31
|
+
|
|
32
|
+
extra_cols = get_extra_cols(include_hometowns=include_hometowns, include_class=include_class, include_events=include_events)
|
|
33
|
+
return pd.concat(all_rosters)[ROSTER_COLS + extra_cols]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def schedule(year, teams, verbose=False, force_update=False):
|
|
37
|
+
teams = validate_input(teams)
|
|
38
|
+
session = get_session()
|
|
39
|
+
|
|
40
|
+
all_schedules = []
|
|
41
|
+
for i, team in enumerate(teams):
|
|
42
|
+
if verbose:
|
|
43
|
+
print(f'Getting schedule for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
44
|
+
rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
|
|
45
|
+
res = rtn.get_schedule(force_update=force_update if i == 0 else False)
|
|
46
|
+
if verbose and len(res) == 0:
|
|
47
|
+
print(f'\tNo schedule found for {team}')
|
|
48
|
+
all_schedules.append(res)
|
|
49
|
+
|
|
50
|
+
return pd.concat(all_schedules)[SCHEDULE_COLS]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def team_results(year, teams, method='team_consistency', force_update=False, verbose=False):
|
|
54
|
+
teams = validate_input(teams)
|
|
55
|
+
session = get_session()
|
|
56
|
+
|
|
57
|
+
all_results = []
|
|
58
|
+
for i, team in enumerate(teams):
|
|
59
|
+
if verbose:
|
|
60
|
+
print(f'Getting schedule and results for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
61
|
+
rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
|
|
62
|
+
res = rtn.get_team_scores(method=method, force_update=force_update if i == 0 else False)
|
|
63
|
+
if verbose and len(res) == 0:
|
|
64
|
+
print(f'\tNo schedule and results found for {team}')
|
|
65
|
+
all_results.append(res)
|
|
66
|
+
|
|
67
|
+
return pd.concat(all_results)[SCHEDULE_COLS + RESULTS_COLS]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def individual_results(year, teams, method='by_meet', force_update=False, verbose=False):
|
|
71
|
+
teams = validate_input(teams)
|
|
72
|
+
session = get_session()
|
|
73
|
+
|
|
74
|
+
all_scores = []
|
|
75
|
+
for i, team in enumerate(teams):
|
|
76
|
+
if verbose:
|
|
77
|
+
print(f'Getting scores for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
78
|
+
rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
|
|
79
|
+
res = rtn.get_individual_scores(method=method, force_update=force_update if i == 0 else False)
|
|
80
|
+
if verbose and len(res) == 0:
|
|
81
|
+
print(f'\tNo scores found for {team}')
|
|
82
|
+
all_scores.append(res)
|
|
83
|
+
|
|
84
|
+
return pd.concat(all_scores)[SCHEDULE_COLS + IND_RESULTS_COLS]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def individual_nqs(year, teams, verbose=False, force_update=False):
|
|
88
|
+
teams = validate_input(teams)
|
|
89
|
+
session = get_session()
|
|
90
|
+
|
|
91
|
+
all_nqs = []
|
|
92
|
+
for i, team in enumerate(teams):
|
|
93
|
+
if verbose:
|
|
94
|
+
print(f'Getting individual NQS for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
95
|
+
rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
|
|
96
|
+
res = rtn.get_individual_nqs(force_update=force_update if i == 0 else False)
|
|
97
|
+
if verbose and len(res) == 0:
|
|
98
|
+
print(f'\tNo individual NQS found for {team}')
|
|
99
|
+
all_nqs.append(res)
|
|
100
|
+
|
|
101
|
+
return pd.concat(all_nqs)[ROSTER_COLS + EVENTS] # + ['AA']]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def rankings(year, team_vs_ind='team', event='AA', week=None, force_update=False):
|
|
105
|
+
session = get_session()
|
|
106
|
+
rtn = RtnSingleTeamYear(year=year, team_name=None, session=session)
|
|
107
|
+
return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import requests
|
|
2
|
+
from requests.adapters import HTTPAdapter
|
|
3
|
+
from urllib3.util.retry import Retry
|
|
2
4
|
from functools import lru_cache
|
|
3
5
|
from datetime import datetime
|
|
4
6
|
|
|
@@ -19,16 +21,32 @@ def validate_input(teams):
|
|
|
19
21
|
return teams
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
def get_session():
|
|
25
|
+
session = requests.Session()
|
|
26
|
+
retry = Retry(connect=3, backoff_factor=0.5)
|
|
27
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
28
|
+
session.mount('http://', adapter)
|
|
29
|
+
session.mount('https://', adapter)
|
|
30
|
+
return session
|
|
31
|
+
|
|
32
|
+
|
|
22
33
|
@lru_cache(maxsize=1000000)
|
|
23
|
-
def get_data_from_api(endpoint, suffix):
|
|
34
|
+
def get_data_from_api(endpoint, suffix, session=None):
|
|
35
|
+
if not session:
|
|
36
|
+
session = get_session()
|
|
37
|
+
|
|
24
38
|
url = 'https://www.roadtonationals.com/api/women/' + endpoint
|
|
25
39
|
if suffix:
|
|
26
40
|
url += '/' + suffix
|
|
27
|
-
|
|
41
|
+
|
|
42
|
+
return session.get(url)
|
|
28
43
|
|
|
29
44
|
|
|
30
45
|
def fix_opponents(ops):
|
|
31
|
-
|
|
46
|
+
if not isinstance(ops, str):
|
|
47
|
+
return ops
|
|
48
|
+
|
|
49
|
+
ops = (ops.replace(', ', '/').replace(',','/').replace(' and ', '/').replace(' @ ', '/').replace(' w/ ', '/').replace(' with ', '/'))
|
|
32
50
|
|
|
33
51
|
if 'william & mary' in ops.lower():
|
|
34
52
|
# Todo: title case is going to mess with other opponents here, full mapping might fix that
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name:
|
|
3
|
-
Version: 0.0.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scraping_rtn
|
|
3
|
+
Version: 0.0.8.0
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
|
-
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ceharmon/scraping_rtn
|
|
7
8
|
Classifier: Programming Language :: Python :: 3
|
|
8
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
10
|
Classifier: Operating System :: OS Independent
|
|
10
11
|
Requires-Python: >=3.9
|
|
11
12
|
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
13
|
Requires-Dist: pandas>=1.5.3
|
|
14
14
|
Requires-Dist: numpy>=1.23.5
|
|
15
15
|
Requires-Dist: requests>=2.28.1
|
scraping_rtn-0.0.1/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2023 Claire Harmon
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
from .RtnSingleTeamYear import RtnSingleTeamYear
|
|
2
|
-
from .src import validate_input, get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, EVENTS, ROSTER_COLS
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def save(df, filename):
|
|
7
|
-
df.to_csv(filename, index=False)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def all_teams(year):
|
|
11
|
-
rtn = RtnSingleTeamYear(year=year, team_name=None)
|
|
12
|
-
return list(rtn.get_team_mapping().keys())
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False):
|
|
16
|
-
teams = validate_input(teams)
|
|
17
|
-
|
|
18
|
-
all_rosters = []
|
|
19
|
-
for team in teams:
|
|
20
|
-
if verbose:
|
|
21
|
-
print(f'Getting roster for {team} ', end='\r')
|
|
22
|
-
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
23
|
-
res = rtn.get_roster(include_hometowns=include_hometowns, include_class=include_class,
|
|
24
|
-
include_events=include_events)
|
|
25
|
-
if verbose and len(res) == 0:
|
|
26
|
-
print(f'\tNo roster found for {team}')
|
|
27
|
-
all_rosters.append(res)
|
|
28
|
-
|
|
29
|
-
extra_cols = get_extra_cols(include_hometowns=include_hometowns, include_class=include_class, include_events=include_events)
|
|
30
|
-
return pd.concat(all_rosters)[ROSTER_COLS + extra_cols]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def schedule(year, teams, verbose=False):
|
|
34
|
-
teams = validate_input(teams)
|
|
35
|
-
|
|
36
|
-
all_schedules = []
|
|
37
|
-
for team in teams:
|
|
38
|
-
if verbose:
|
|
39
|
-
print(f'Getting schedule for {team} ', end='\r')
|
|
40
|
-
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
41
|
-
res = rtn.get_schedule()
|
|
42
|
-
if verbose and len(res) == 0:
|
|
43
|
-
print(f'\tNo schedule found for {team}')
|
|
44
|
-
all_schedules.append(res)
|
|
45
|
-
|
|
46
|
-
return pd.concat(all_schedules)[SCHEDULE_COLS]
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def team_results(year, teams, method='team_consistency', force_update=False, verbose=False):
|
|
50
|
-
teams = validate_input(teams)
|
|
51
|
-
|
|
52
|
-
all_results = []
|
|
53
|
-
for team in teams:
|
|
54
|
-
if verbose:
|
|
55
|
-
print(f'Getting schedule and results for {team} ', end='\r')
|
|
56
|
-
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
57
|
-
res = rtn.get_team_scores(method=method,force_update=force_update)
|
|
58
|
-
if verbose and len(res) == 0:
|
|
59
|
-
print(f'\tNo schedule and results found for {team}')
|
|
60
|
-
all_results.append(res)
|
|
61
|
-
|
|
62
|
-
return pd.concat(all_results)[SCHEDULE_COLS + RESULTS_COLS]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def individual_results(year, teams, method='by_meet', force_update=False, verbose=False):
|
|
66
|
-
teams = validate_input(teams)
|
|
67
|
-
|
|
68
|
-
all_scores = []
|
|
69
|
-
for team in teams:
|
|
70
|
-
if verbose:
|
|
71
|
-
print(f'Getting scores for {team} ', end='\r')
|
|
72
|
-
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
73
|
-
res = rtn.get_individual_scores(method=method, force_update=force_update)
|
|
74
|
-
if verbose and len(res) == 0:
|
|
75
|
-
print(f'\tNo scores found for {team}')
|
|
76
|
-
all_scores.append(res)
|
|
77
|
-
|
|
78
|
-
return pd.concat(all_scores)[SCHEDULE_COLS + IND_RESULTS_COLS]
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def individual_nqs(year, teams, verbose=False):
|
|
82
|
-
teams = validate_input(teams)
|
|
83
|
-
|
|
84
|
-
all_nqs = []
|
|
85
|
-
for team in teams:
|
|
86
|
-
if verbose:
|
|
87
|
-
print(f'Getting individual NQS for {team} ', end='\r')
|
|
88
|
-
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
89
|
-
res = rtn.get_individual_nqs()
|
|
90
|
-
if verbose and len(res) == 0:
|
|
91
|
-
print(f'\tNo individual NQS found for {team}')
|
|
92
|
-
all_nqs.append(res)
|
|
93
|
-
|
|
94
|
-
return pd.concat(all_nqs)[ROSTER_COLS + EVENTS] # + ['AA']]
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def rankings(year, team_vs_ind='team', event='AA', week=None):
|
|
98
|
-
rtn = RtnSingleTeamYear(year=year, team_name=None)
|
|
99
|
-
return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|