scraping-rtn 0.0.1__tar.gz → 0.0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/PKG-INFO +4 -4
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/pyproject.toml +11 -4
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn/RtnSingleTeamYear.py +79 -38
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn/__init__.py +27 -25
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn/src.py +14 -2
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn.egg-info/PKG-INFO +5 -5
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn.egg-info/SOURCES.txt +0 -1
- scraping_rtn-0.0.1/LICENSE +0 -21
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/README.md +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/setup.cfg +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn.egg-info/dependency_links.txt +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn.egg-info/requires.txt +0 -0
- {scraping_rtn-0.0.1 → scraping_rtn-0.0.8.2}/src/scraping_rtn.egg-info/top_level.txt +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: scraping_rtn
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.8.2
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
|
-
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ceharmon/scraping_rtn
|
|
7
8
|
Classifier: Programming Language :: Python :: 3
|
|
8
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
10
|
Classifier: Operating System :: OS Independent
|
|
10
11
|
Requires-Python: >=3.9
|
|
11
12
|
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
13
|
Requires-Dist: pandas>=1.5.3
|
|
14
14
|
Requires-Dist: numpy>=1.23.5
|
|
15
15
|
Requires-Dist: requests>=2.28.1
|
|
@@ -1,21 +1,25 @@
|
|
|
1
1
|
[build-system]
|
|
2
|
-
requires = ["setuptools>=
|
|
2
|
+
requires = ["setuptools>=69.0", "wheel"]
|
|
3
3
|
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "scraping_rtn"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.8.2"
|
|
8
8
|
authors = [
|
|
9
|
-
{ name="Claire Harmon", email="ceharmon220@gmail.com" },
|
|
9
|
+
{ name = "Claire Harmon", email = "ceharmon220@gmail.com" },
|
|
10
10
|
]
|
|
11
11
|
description = "package to scrape gymnastics data from Road To Nationals"
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
requires-python = ">=3.9"
|
|
14
|
+
|
|
15
|
+
license = { text = "MIT" }
|
|
16
|
+
|
|
14
17
|
dependencies = [
|
|
15
18
|
"pandas >=1.5.3",
|
|
16
19
|
"numpy >=1.23.5",
|
|
17
20
|
"requests >=2.28.1"
|
|
18
21
|
]
|
|
22
|
+
|
|
19
23
|
classifiers = [
|
|
20
24
|
"Programming Language :: Python :: 3",
|
|
21
25
|
"License :: OSI Approved :: MIT License",
|
|
@@ -23,4 +27,7 @@ classifiers = [
|
|
|
23
27
|
]
|
|
24
28
|
|
|
25
29
|
[project.urls]
|
|
26
|
-
|
|
30
|
+
Homepage = "https://github.com/ceharmon/scraping_rtn"
|
|
31
|
+
|
|
32
|
+
[tool.setuptools]
|
|
33
|
+
license-files = []
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts,
|
|
2
|
-
SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
1
|
+
from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
|
|
2
|
+
get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
|
|
3
3
|
import pandas as pd
|
|
4
4
|
import numpy as np
|
|
5
|
+
from datetime import datetime
|
|
5
6
|
|
|
6
7
|
|
|
7
8
|
class RtnSingleTeamYear(object):
|
|
8
|
-
def __init__(self, year, team_name, team_id=None):
|
|
9
|
+
def __init__(self, year, team_name, team_id=None, session=None):
|
|
9
10
|
self.year = year
|
|
10
11
|
if team_name is not None:
|
|
11
12
|
self.team_name = team_name
|
|
@@ -14,7 +15,10 @@ class RtnSingleTeamYear(object):
|
|
|
14
15
|
else:
|
|
15
16
|
self.team_id = team_id
|
|
16
17
|
|
|
17
|
-
def get_team_mapping(self):
|
|
18
|
+
def get_team_mapping(self, force_update=False):
|
|
19
|
+
if force_update:
|
|
20
|
+
get_data_from_api.cache_clear()
|
|
21
|
+
|
|
18
22
|
all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
|
|
19
23
|
return {team['team_name']: team['id'] for team in all_teams_data['teams']}
|
|
20
24
|
|
|
@@ -22,25 +26,31 @@ class RtnSingleTeamYear(object):
|
|
|
22
26
|
if not hasattr(self, 'team_id_map'):
|
|
23
27
|
self.team_id_map = self.get_team_mapping()
|
|
24
28
|
|
|
29
|
+
if self.team_name and self.team_name not in self.team_id_map.keys():
|
|
30
|
+
raise ValueError(f'Unknown team name: {self.team_name}')
|
|
31
|
+
|
|
25
32
|
return self.team_id_map.get(self.team_name, -1)
|
|
26
33
|
# if self.team_name in self.team_id_map.keys():
|
|
27
34
|
# return self.team_id_map[self.team_name]
|
|
28
35
|
# else:
|
|
29
36
|
# raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
|
|
30
37
|
|
|
31
|
-
def _get_raw_roster(self):
|
|
38
|
+
def _get_raw_roster(self, force_update=False):
|
|
32
39
|
rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
|
|
33
40
|
school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
|
|
34
41
|
|
|
42
|
+
if force_update:
|
|
43
|
+
get_data_from_api.cache_clear()
|
|
44
|
+
|
|
35
45
|
roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
|
|
36
46
|
|
|
37
|
-
self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map
|
|
47
|
+
self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
|
|
38
48
|
**{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
|
|
39
49
|
for data in roster_data]
|
|
40
50
|
|
|
41
|
-
def get_roster(self, include_hometowns=False, include_class=False, include_events=False):
|
|
51
|
+
def get_roster(self, include_hometowns=False, include_class=False, include_events=False, force_update=False):
|
|
42
52
|
if not hasattr(self, 'raw_roster'):
|
|
43
|
-
self._get_raw_roster()
|
|
53
|
+
self._get_raw_roster(force_update=force_update)
|
|
44
54
|
|
|
45
55
|
extra_cols = get_extra_cols(include_hometowns=include_hometowns, include_class=include_class, include_events=include_events)
|
|
46
56
|
|
|
@@ -52,7 +62,10 @@ class RtnSingleTeamYear(object):
|
|
|
52
62
|
|
|
53
63
|
return self.roster
|
|
54
64
|
|
|
55
|
-
def _get_raw_season_results(self):
|
|
65
|
+
def _get_raw_season_results(self, force_update=False):
|
|
66
|
+
if force_update:
|
|
67
|
+
get_data_from_api.cache_clear()
|
|
68
|
+
|
|
56
69
|
meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
|
|
57
70
|
name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
|
|
58
71
|
'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
|
|
@@ -60,12 +73,12 @@ class RtnSingleTeamYear(object):
|
|
|
60
73
|
|
|
61
74
|
self._raw_season_results = [{name_map.get(k, k): fix_opponents(v) if k == 'opponent'
|
|
62
75
|
else (normalize_date(v) if k == 'meet_date' else v)
|
|
63
|
-
for k, v in data.items() if k != 'jas'} for data in meets['meets']]
|
|
76
|
+
for k, v in data.items() if k != 'jas'} for data in meets['meets'] if data['team_name'] == self.team_name]
|
|
64
77
|
self._raw_schedule = [{k: v for k, v in data.items() if k not in ('Score', 'VT', 'UB', 'BB', 'FX')} for data in self._raw_season_results]
|
|
65
78
|
|
|
66
|
-
def get_schedule(self):
|
|
79
|
+
def get_schedule(self, force_update=False):
|
|
67
80
|
if not hasattr(self, '_raw_schedule'):
|
|
68
|
-
self._get_raw_season_results()
|
|
81
|
+
self._get_raw_season_results(force_update=force_update)
|
|
69
82
|
|
|
70
83
|
if len(self._raw_schedule) > 0:
|
|
71
84
|
return pd.DataFrame(self._raw_schedule)
|
|
@@ -87,28 +100,31 @@ class RtnSingleTeamYear(object):
|
|
|
87
100
|
* Uses team meet id to join back to meet info, such as opponent, etc.
|
|
88
101
|
"""
|
|
89
102
|
if not hasattr(self, '_raw_season_results'):
|
|
90
|
-
self._get_raw_season_results()
|
|
103
|
+
self._get_raw_season_results(force_update=force_update)
|
|
91
104
|
|
|
92
105
|
if len(self._raw_season_results) > 0:
|
|
93
106
|
if (len({'VT', 'UB', 'BB', 'FX'}.intersection(self._raw_season_results[0].keys())) != 4 or force_update):
|
|
94
107
|
if method == 'team_consistency':
|
|
95
|
-
self._team_event_scores_team_consistency()
|
|
108
|
+
self._team_event_scores_team_consistency(force_update=force_update)
|
|
96
109
|
elif method == 'by_meet':
|
|
97
|
-
self._team_event_scores_by_meet()
|
|
110
|
+
self._team_event_scores_by_meet(force_update=force_update)
|
|
98
111
|
else:
|
|
99
112
|
raise ValueError('Method must be "team_consistency" or "by_meet"')
|
|
100
113
|
|
|
101
114
|
# TODO: different way to drop duplicates?
|
|
102
|
-
self.season_results = pd.DataFrame(self._raw_season_results).drop_duplicates()
|
|
115
|
+
self.season_results = pd.DataFrame(self._raw_season_results).dropna(subset=['Score']).drop_duplicates()
|
|
103
116
|
else:
|
|
104
117
|
self.season_results = pd.DataFrame(columns=SCHEDULE_COLS + RESULTS_COLS)
|
|
105
118
|
|
|
106
119
|
return self.season_results
|
|
107
120
|
|
|
108
|
-
def _team_event_scores_by_meet(self):
|
|
121
|
+
def _team_event_scores_by_meet(self, force_update=False):
|
|
109
122
|
team_scores_all = []
|
|
110
|
-
for meet_id in [data['Team Meet ID'] for data in self._raw_season_results]:
|
|
123
|
+
for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
|
|
111
124
|
try:
|
|
125
|
+
if force_update:
|
|
126
|
+
get_data_from_api.cache_clear()
|
|
127
|
+
|
|
112
128
|
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
|
|
113
129
|
# This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
|
|
114
130
|
team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
|
|
@@ -124,10 +140,13 @@ class RtnSingleTeamYear(object):
|
|
|
124
140
|
for i in range(len(self._raw_season_results)):
|
|
125
141
|
self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
|
|
126
142
|
|
|
127
|
-
def _team_event_scores_team_consistency(self):
|
|
143
|
+
def _team_event_scores_team_consistency(self, force_update=False):
|
|
144
|
+
if force_update:
|
|
145
|
+
get_data_from_api.cache_clear()
|
|
146
|
+
|
|
128
147
|
res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
|
|
129
148
|
if len(res['labels']) == 0:
|
|
130
|
-
print(f'No team consistency data found for
|
|
149
|
+
print(f'No team consistency data found for {self.team_name} in {self.year}')
|
|
131
150
|
for i in range(len(self._raw_season_results)):
|
|
132
151
|
self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
|
|
133
152
|
else:
|
|
@@ -145,12 +164,10 @@ class RtnSingleTeamYear(object):
|
|
|
145
164
|
Methods:
|
|
146
165
|
* Individual Consistency - Uses Individual Consistency tab from RTN
|
|
147
166
|
* Tends to have more complete data, especially for older years
|
|
148
|
-
* Requires summing of all events to get AA (code does this for you)
|
|
149
167
|
* Relies on date to join back to meet info, such as opponent, etc.
|
|
150
168
|
* One API call per gymnast, relative speed depends on number of meets vs number of gymnasts
|
|
151
169
|
* By Meet - loops through each meet to get scores
|
|
152
170
|
* Older meets tend to be missing
|
|
153
|
-
* Includes AA scores in the response
|
|
154
171
|
* Uses team meet id to join back to meet info, such as opponent, etc.
|
|
155
172
|
* One API call per meet, relative speed depends on number of meets vs number of gymnasts
|
|
156
173
|
"""
|
|
@@ -162,20 +179,31 @@ class RtnSingleTeamYear(object):
|
|
|
162
179
|
if not hasattr(self, '_raw_roster'):
|
|
163
180
|
self.get_roster()
|
|
164
181
|
|
|
165
|
-
self._individual_scores_individual_consistency()
|
|
182
|
+
self._individual_scores_individual_consistency(force_update=force_update)
|
|
166
183
|
elif method == 'by_meet':
|
|
167
|
-
self._individual_scores_by_meet()
|
|
184
|
+
self._individual_scores_by_meet(force_update=force_update)
|
|
168
185
|
else:
|
|
169
186
|
raise ValueError('Method must be "individual_consistency" or "by_meet"')
|
|
170
187
|
|
|
171
188
|
return self.individual_results
|
|
172
189
|
|
|
173
|
-
def _individual_scores_by_meet(self):
|
|
190
|
+
def _individual_scores_by_meet(self, force_update=False):
|
|
174
191
|
individual_scores_all = []
|
|
175
|
-
for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule]:
|
|
192
|
+
for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
|
|
176
193
|
try:
|
|
194
|
+
if force_update:
|
|
195
|
+
get_data_from_api.cache_clear()
|
|
196
|
+
|
|
177
197
|
meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
|
|
178
|
-
|
|
198
|
+
if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
|
|
199
|
+
print(f'No data found for meet {meet_id}')
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
if 'team_name' in meet_res['scores'][0][0]:
|
|
203
|
+
team_inds = [ind for ind, scores in enumerate(meet_res['scores']) if len(scores) > 0 and scores[0]['team_name'] == self.team_name]
|
|
204
|
+
else:
|
|
205
|
+
raise ValueError('Key not found')
|
|
206
|
+
|
|
179
207
|
if len(team_inds) == 0:
|
|
180
208
|
print(f'No scores found at meet {meet_id}')
|
|
181
209
|
continue
|
|
@@ -195,15 +223,19 @@ class RtnSingleTeamYear(object):
|
|
|
195
223
|
if len(individual_scores_all) > 0:
|
|
196
224
|
merge_dicts(dict1=individual_scores_all, dict2=self._raw_schedule, merge_field='Team Meet ID')
|
|
197
225
|
self.individual_results = pd.DataFrame(individual_scores_all)
|
|
226
|
+
self.individual_results['AA'] = self.individual_results[['VT', 'UB', 'BB', 'FX']].dropna(how='any').astype(float).T.sum().round(4)
|
|
198
227
|
else:
|
|
199
228
|
self.individual_results = pd.DataFrame(columns=['Meet Date', 'VT', 'UB', 'BB', 'FX', 'AA', 'Gymnast ID', 'Name',
|
|
200
229
|
'Team ID', 'Team', 'Team Meet ID', 'Home/Away', 'Opponents',
|
|
201
230
|
'Meet Name', 'Meet ID'])
|
|
202
231
|
|
|
203
|
-
def _individual_scores_individual_consistency(self):
|
|
232
|
+
def _individual_scores_individual_consistency(self, force_update=False):
|
|
204
233
|
ind_consistency_all = []
|
|
205
234
|
for gymnast in self._raw_roster:
|
|
206
235
|
try:
|
|
236
|
+
if force_update:
|
|
237
|
+
get_data_from_api.cache_clear()
|
|
238
|
+
|
|
207
239
|
res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
|
|
208
240
|
ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
|
|
209
241
|
'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
|
|
@@ -225,22 +257,24 @@ class RtnSingleTeamYear(object):
|
|
|
225
257
|
else:
|
|
226
258
|
self.individual_results = pd.DataFrame(columns=SCHEDULE_COLS + IND_RESULTS_COLS)
|
|
227
259
|
|
|
228
|
-
def get_individual_nqs(self):
|
|
260
|
+
def get_individual_nqs(self, force_update=False):
|
|
229
261
|
if not hasattr(self, '_raw_roster'):
|
|
230
|
-
self._get_raw_roster()
|
|
262
|
+
self._get_raw_roster(force_update=force_update)
|
|
231
263
|
|
|
232
264
|
if not hasattr(self, '_raw_individual_nqs'):
|
|
233
|
-
self._get_raw_individual_nqs()
|
|
265
|
+
self._get_raw_individual_nqs(force_update=force_update)
|
|
234
266
|
|
|
235
267
|
if len(self._raw_individual_nqs) > 0:
|
|
236
268
|
return pd.DataFrame(self._raw_individual_nqs)
|
|
237
269
|
else:
|
|
238
270
|
return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
|
|
239
271
|
|
|
240
|
-
def _get_raw_individual_nqs(self):
|
|
272
|
+
def _get_raw_individual_nqs(self, force_update=False):
|
|
241
273
|
name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
|
|
242
274
|
# 'maxaa': 'AA',
|
|
243
275
|
'gid': 'Gymnast ID'}
|
|
276
|
+
if force_update:
|
|
277
|
+
get_data_from_api.cache_clear()
|
|
244
278
|
|
|
245
279
|
nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
|
|
246
280
|
ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
|
|
@@ -253,17 +287,24 @@ class RtnSingleTeamYear(object):
|
|
|
253
287
|
else:
|
|
254
288
|
self._raw_individual_nqs = []
|
|
255
289
|
|
|
256
|
-
def _get_current_week(self):
|
|
290
|
+
def _get_current_week(self, force_update=False):
|
|
257
291
|
if not hasattr(self, 'week'):
|
|
258
|
-
|
|
292
|
+
if force_update:
|
|
293
|
+
get_data_from_api.cache_clear()
|
|
294
|
+
|
|
295
|
+
week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()
|
|
296
|
+
return min(int(week_data['week']), int(week_data['max']))
|
|
259
297
|
|
|
260
|
-
def _get_raw_rankings(self, team_vs_ind, event, week):
|
|
298
|
+
def _get_raw_rankings(self, team_vs_ind, event, week, force_update=False):
|
|
261
299
|
team_ind_map = {'team': 0, 'ind': 1}
|
|
262
300
|
event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
|
|
263
301
|
rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
|
|
264
302
|
'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
|
|
265
303
|
'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
|
|
266
304
|
|
|
305
|
+
if force_update:
|
|
306
|
+
get_data_from_api.cache_clear()
|
|
307
|
+
|
|
267
308
|
res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
|
|
268
309
|
if team_vs_ind == 'ind':
|
|
269
310
|
self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
|
|
@@ -274,9 +315,9 @@ class RtnSingleTeamYear(object):
|
|
|
274
315
|
**{'Event': event}}
|
|
275
316
|
for data in res['data']]
|
|
276
317
|
|
|
277
|
-
def get_overall_rankings(self, team_vs_ind='team', event='AA', week=None):
|
|
318
|
+
def get_overall_rankings(self, team_vs_ind='team', event='AA', week=None, force_update=False):
|
|
278
319
|
if not week:
|
|
279
|
-
week = self._get_current_week()
|
|
320
|
+
week = self._get_current_week(force_update=force_update)
|
|
280
321
|
|
|
281
322
|
if not hasattr(self, '_raw_rankings'):
|
|
282
323
|
self._raw_rankings = {'team': {event: None for event in EVENT_MAP.values()},
|
|
@@ -288,7 +329,7 @@ class RtnSingleTeamYear(object):
|
|
|
288
329
|
'Division', 'Conference', 'Region', 'USAG']}
|
|
289
330
|
|
|
290
331
|
if self._raw_rankings[team_vs_ind][event] is None:
|
|
291
|
-
self._get_raw_rankings(team_vs_ind=team_vs_ind, event=event, week=week)
|
|
332
|
+
self._get_raw_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
|
|
292
333
|
|
|
293
334
|
return pd.DataFrame(self._raw_rankings[team_vs_ind][event])[col_orders[team_vs_ind]]
|
|
294
335
|
|
|
@@ -2,26 +2,28 @@ from .RtnSingleTeamYear import RtnSingleTeamYear
|
|
|
2
2
|
from .src import validate_input, get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, EVENTS, ROSTER_COLS
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
|
+
BLANK_SPACES = ' '*30
|
|
6
|
+
|
|
5
7
|
|
|
6
8
|
def save(df, filename):
|
|
7
9
|
df.to_csv(filename, index=False)
|
|
8
10
|
|
|
9
11
|
|
|
10
|
-
def all_teams(year):
|
|
12
|
+
def all_teams(year, force_update=False):
|
|
11
13
|
rtn = RtnSingleTeamYear(year=year, team_name=None)
|
|
12
|
-
return list(rtn.get_team_mapping().keys())
|
|
14
|
+
return list(rtn.get_team_mapping(force_update=force_update).keys())
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False):
|
|
17
|
+
def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False, force_update=False):
|
|
16
18
|
teams = validate_input(teams)
|
|
17
19
|
|
|
18
20
|
all_rosters = []
|
|
19
|
-
for team in teams:
|
|
21
|
+
for i, team in enumerate(teams):
|
|
20
22
|
if verbose:
|
|
21
|
-
print(f'Getting roster for {team}
|
|
23
|
+
print(f'Getting roster for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
22
24
|
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
23
25
|
res = rtn.get_roster(include_hometowns=include_hometowns, include_class=include_class,
|
|
24
|
-
include_events=include_events)
|
|
26
|
+
include_events=include_events, force_update=force_update if i == 0 else False)
|
|
25
27
|
if verbose and len(res) == 0:
|
|
26
28
|
print(f'\tNo roster found for {team}')
|
|
27
29
|
all_rosters.append(res)
|
|
@@ -30,15 +32,15 @@ def roster(year, teams, include_hometowns=False, include_class=False, include_ev
|
|
|
30
32
|
return pd.concat(all_rosters)[ROSTER_COLS + extra_cols]
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
def schedule(year, teams, verbose=False):
|
|
35
|
+
def schedule(year, teams, verbose=False, force_update=False):
|
|
34
36
|
teams = validate_input(teams)
|
|
35
37
|
|
|
36
38
|
all_schedules = []
|
|
37
|
-
for team in teams:
|
|
39
|
+
for i, team in enumerate(teams):
|
|
38
40
|
if verbose:
|
|
39
|
-
print(f'Getting schedule for {team}
|
|
41
|
+
print(f'Getting schedule for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
40
42
|
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
41
|
-
res = rtn.get_schedule()
|
|
43
|
+
res = rtn.get_schedule(force_update=force_update if i == 0 else False)
|
|
42
44
|
if verbose and len(res) == 0:
|
|
43
45
|
print(f'\tNo schedule found for {team}')
|
|
44
46
|
all_schedules.append(res)
|
|
@@ -48,13 +50,13 @@ def schedule(year, teams, verbose=False):
|
|
|
48
50
|
|
|
49
51
|
def team_results(year, teams, method='team_consistency', force_update=False, verbose=False):
|
|
50
52
|
teams = validate_input(teams)
|
|
51
|
-
|
|
53
|
+
|
|
52
54
|
all_results = []
|
|
53
|
-
for team in teams:
|
|
55
|
+
for i, team in enumerate(teams):
|
|
54
56
|
if verbose:
|
|
55
|
-
print(f'Getting schedule and results for {team}
|
|
57
|
+
print(f'Getting schedule and results for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
56
58
|
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
57
|
-
res = rtn.get_team_scores(method=method,force_update=force_update)
|
|
59
|
+
res = rtn.get_team_scores(method=method, force_update=force_update if i == 0 else False)
|
|
58
60
|
if verbose and len(res) == 0:
|
|
59
61
|
print(f'\tNo schedule and results found for {team}')
|
|
60
62
|
all_results.append(res)
|
|
@@ -64,13 +66,13 @@ def team_results(year, teams, method='team_consistency', force_update=False, ver
|
|
|
64
66
|
|
|
65
67
|
def individual_results(year, teams, method='by_meet', force_update=False, verbose=False):
|
|
66
68
|
teams = validate_input(teams)
|
|
67
|
-
|
|
69
|
+
|
|
68
70
|
all_scores = []
|
|
69
|
-
for team in teams:
|
|
71
|
+
for i, team in enumerate(teams):
|
|
70
72
|
if verbose:
|
|
71
|
-
print(f'Getting scores for {team}
|
|
73
|
+
print(f'Getting scores for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
72
74
|
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
73
|
-
res = rtn.get_individual_scores(method=method, force_update=force_update)
|
|
75
|
+
res = rtn.get_individual_scores(method=method, force_update=force_update if i == 0 else False)
|
|
74
76
|
if verbose and len(res) == 0:
|
|
75
77
|
print(f'\tNo scores found for {team}')
|
|
76
78
|
all_scores.append(res)
|
|
@@ -78,15 +80,15 @@ def individual_results(year, teams, method='by_meet', force_update=False, verbos
|
|
|
78
80
|
return pd.concat(all_scores)[SCHEDULE_COLS + IND_RESULTS_COLS]
|
|
79
81
|
|
|
80
82
|
|
|
81
|
-
def individual_nqs(year, teams, verbose=False):
|
|
83
|
+
def individual_nqs(year, teams, verbose=False, force_update=False):
|
|
82
84
|
teams = validate_input(teams)
|
|
83
|
-
|
|
85
|
+
|
|
84
86
|
all_nqs = []
|
|
85
|
-
for team in teams:
|
|
87
|
+
for i, team in enumerate(teams):
|
|
86
88
|
if verbose:
|
|
87
|
-
print(f'Getting individual NQS for {team}
|
|
89
|
+
print(f'Getting individual NQS for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
|
|
88
90
|
rtn = RtnSingleTeamYear(year=year, team_name=team)
|
|
89
|
-
res = rtn.get_individual_nqs()
|
|
91
|
+
res = rtn.get_individual_nqs(force_update=force_update if i == 0 else False)
|
|
90
92
|
if verbose and len(res) == 0:
|
|
91
93
|
print(f'\tNo individual NQS found for {team}')
|
|
92
94
|
all_nqs.append(res)
|
|
@@ -94,6 +96,6 @@ def individual_nqs(year, teams, verbose=False):
|
|
|
94
96
|
return pd.concat(all_nqs)[ROSTER_COLS + EVENTS] # + ['AA']]
|
|
95
97
|
|
|
96
98
|
|
|
97
|
-
def rankings(year, team_vs_ind='team', event='AA', week=None):
|
|
99
|
+
def rankings(year, team_vs_ind='team', event='AA', week=None, force_update=False):
|
|
98
100
|
rtn = RtnSingleTeamYear(year=year, team_name=None)
|
|
99
|
-
return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week)
|
|
101
|
+
return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
|
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
import requests
|
|
2
|
+
from requests.adapters import HTTPAdapter
|
|
3
|
+
from urllib3.util.retry import Retry
|
|
2
4
|
from functools import lru_cache
|
|
3
5
|
from datetime import datetime
|
|
4
6
|
|
|
@@ -21,14 +23,24 @@ def validate_input(teams):
|
|
|
21
23
|
|
|
22
24
|
@lru_cache(maxsize=1000000)
|
|
23
25
|
def get_data_from_api(endpoint, suffix):
|
|
26
|
+
session = requests.Session()
|
|
27
|
+
retry = Retry(connect=3, backoff_factor=0.5)
|
|
28
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
29
|
+
session.mount('http://', adapter)
|
|
30
|
+
session.mount('https://', adapter)
|
|
31
|
+
|
|
24
32
|
url = 'https://www.roadtonationals.com/api/women/' + endpoint
|
|
25
33
|
if suffix:
|
|
26
34
|
url += '/' + suffix
|
|
27
|
-
|
|
35
|
+
|
|
36
|
+
return session.get(url)
|
|
28
37
|
|
|
29
38
|
|
|
30
39
|
def fix_opponents(ops):
|
|
31
|
-
|
|
40
|
+
if not isinstance(ops, str):
|
|
41
|
+
return ops
|
|
42
|
+
|
|
43
|
+
ops = (ops.replace(', ', '/').replace(',','/').replace(' and ', '/').replace(' @ ', '/').replace(' w/ ', '/').replace(' with ', '/'))
|
|
32
44
|
|
|
33
45
|
if 'william & mary' in ops.lower():
|
|
34
46
|
# Todo: title case is going to mess with other opponents here, full mapping might fix that
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
2
|
-
Name:
|
|
3
|
-
Version: 0.0.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scraping_rtn
|
|
3
|
+
Version: 0.0.8.2
|
|
4
4
|
Summary: package to scrape gymnastics data from Road To Nationals
|
|
5
5
|
Author-email: Claire Harmon <ceharmon220@gmail.com>
|
|
6
|
-
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ceharmon/scraping_rtn
|
|
7
8
|
Classifier: Programming Language :: Python :: 3
|
|
8
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
9
10
|
Classifier: Operating System :: OS Independent
|
|
10
11
|
Requires-Python: >=3.9
|
|
11
12
|
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE
|
|
13
13
|
Requires-Dist: pandas>=1.5.3
|
|
14
14
|
Requires-Dist: numpy>=1.23.5
|
|
15
15
|
Requires-Dist: requests>=2.28.1
|
scraping_rtn-0.0.1/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2023 Claire Harmon
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|