scraping-rtn 0.0.1__tar.gz → 0.0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,15 +1,15 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: scraping_rtn
3
- Version: 0.0.1
3
+ Version: 0.0.8.2
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
- Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ceharmon/scraping_rtn
7
8
  Classifier: Programming Language :: Python :: 3
8
9
  Classifier: License :: OSI Approved :: MIT License
9
10
  Classifier: Operating System :: OS Independent
10
11
  Requires-Python: >=3.9
11
12
  Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
13
  Requires-Dist: pandas>=1.5.3
14
14
  Requires-Dist: numpy>=1.23.5
15
15
  Requires-Dist: requests>=2.28.1
@@ -1,21 +1,25 @@
1
1
  [build-system]
2
- requires = ["setuptools>=61.0"]
2
+ requires = ["setuptools>=69.0", "wheel"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scraping_rtn"
7
- version = "0.0.1"
7
+ version = "0.0.8.2"
8
8
  authors = [
9
- { name="Claire Harmon", email="ceharmon220@gmail.com" },
9
+ { name = "Claire Harmon", email = "ceharmon220@gmail.com" },
10
10
  ]
11
11
  description = "package to scrape gymnastics data from Road To Nationals"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.9"
14
+
15
+ license = { text = "MIT" }
16
+
14
17
  dependencies = [
15
18
  "pandas >=1.5.3",
16
19
  "numpy >=1.23.5",
17
20
  "requests >=2.28.1"
18
21
  ]
22
+
19
23
  classifiers = [
20
24
  "Programming Language :: Python :: 3",
21
25
  "License :: OSI Approved :: MIT License",
@@ -23,4 +27,7 @@ classifiers = [
23
27
  ]
24
28
 
25
29
  [project.urls]
26
- "Homepage" = "https://github.com/cgn-charmon/scraping_rtn"
30
+ Homepage = "https://github.com/ceharmon/scraping_rtn"
31
+
32
+ [tool.setuptools]
33
+ license-files = []
@@ -1,11 +1,12 @@
1
- from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, get_extra_cols, \
2
- SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
1
+ from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
2
+ get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
3
3
  import pandas as pd
4
4
  import numpy as np
5
+ from datetime import datetime
5
6
 
6
7
 
7
8
  class RtnSingleTeamYear(object):
8
- def __init__(self, year, team_name, team_id=None):
9
+ def __init__(self, year, team_name, team_id=None, session=None):
9
10
  self.year = year
10
11
  if team_name is not None:
11
12
  self.team_name = team_name
@@ -14,7 +15,10 @@ class RtnSingleTeamYear(object):
14
15
  else:
15
16
  self.team_id = team_id
16
17
 
17
- def get_team_mapping(self):
18
+ def get_team_mapping(self, force_update=False):
19
+ if force_update:
20
+ get_data_from_api.cache_clear()
21
+
18
22
  all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
19
23
  return {team['team_name']: team['id'] for team in all_teams_data['teams']}
20
24
 
@@ -22,25 +26,31 @@ class RtnSingleTeamYear(object):
22
26
  if not hasattr(self, 'team_id_map'):
23
27
  self.team_id_map = self.get_team_mapping()
24
28
 
29
+ if self.team_name and self.team_name not in self.team_id_map.keys():
30
+ raise ValueError(f'Unknown team name: {self.team_name}')
31
+
25
32
  return self.team_id_map.get(self.team_name, -1)
26
33
  # if self.team_name in self.team_id_map.keys():
27
34
  # return self.team_id_map[self.team_name]
28
35
  # else:
29
36
  # raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
30
37
 
31
- def _get_raw_roster(self):
38
+ def _get_raw_roster(self, force_update=False):
32
39
  rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
33
40
  school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
34
41
 
42
+ if force_update:
43
+ get_data_from_api.cache_clear()
44
+
35
45
  roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
36
46
 
37
- self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map[v] for k, v in data.items()},
47
+ self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
38
48
  **{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
39
49
  for data in roster_data]
40
50
 
41
- def get_roster(self, include_hometowns=False, include_class=False, include_events=False):
51
+ def get_roster(self, include_hometowns=False, include_class=False, include_events=False, force_update=False):
42
52
  if not hasattr(self, 'raw_roster'):
43
- self._get_raw_roster()
53
+ self._get_raw_roster(force_update=force_update)
44
54
 
45
55
  extra_cols = get_extra_cols(include_hometowns=include_hometowns, include_class=include_class, include_events=include_events)
46
56
 
@@ -52,7 +62,10 @@ class RtnSingleTeamYear(object):
52
62
 
53
63
  return self.roster
54
64
 
55
- def _get_raw_season_results(self):
65
+ def _get_raw_season_results(self, force_update=False):
66
+ if force_update:
67
+ get_data_from_api.cache_clear()
68
+
56
69
  meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
57
70
  name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
58
71
  'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
@@ -60,12 +73,12 @@ class RtnSingleTeamYear(object):
60
73
 
61
74
  self._raw_season_results = [{name_map.get(k, k): fix_opponents(v) if k == 'opponent'
62
75
  else (normalize_date(v) if k == 'meet_date' else v)
63
- for k, v in data.items() if k != 'jas'} for data in meets['meets']]
76
+ for k, v in data.items() if k != 'jas'} for data in meets['meets'] if data['team_name'] == self.team_name]
64
77
  self._raw_schedule = [{k: v for k, v in data.items() if k not in ('Score', 'VT', 'UB', 'BB', 'FX')} for data in self._raw_season_results]
65
78
 
66
- def get_schedule(self):
79
+ def get_schedule(self, force_update=False):
67
80
  if not hasattr(self, '_raw_schedule'):
68
- self._get_raw_season_results()
81
+ self._get_raw_season_results(force_update=force_update)
69
82
 
70
83
  if len(self._raw_schedule) > 0:
71
84
  return pd.DataFrame(self._raw_schedule)
@@ -87,28 +100,31 @@ class RtnSingleTeamYear(object):
87
100
  * Uses team meet id to join back to meet info, such as opponent, etc.
88
101
  """
89
102
  if not hasattr(self, '_raw_season_results'):
90
- self._get_raw_season_results()
103
+ self._get_raw_season_results(force_update=force_update)
91
104
 
92
105
  if len(self._raw_season_results) > 0:
93
106
  if (len({'VT', 'UB', 'BB', 'FX'}.intersection(self._raw_season_results[0].keys())) != 4 or force_update):
94
107
  if method == 'team_consistency':
95
- self._team_event_scores_team_consistency()
108
+ self._team_event_scores_team_consistency(force_update=force_update)
96
109
  elif method == 'by_meet':
97
- self._team_event_scores_by_meet()
110
+ self._team_event_scores_by_meet(force_update=force_update)
98
111
  else:
99
112
  raise ValueError('Method must be "team_consistency" or "by_meet"')
100
113
 
101
114
  # TODO: different way to drop duplicates?
102
- self.season_results = pd.DataFrame(self._raw_season_results).drop_duplicates()
115
+ self.season_results = pd.DataFrame(self._raw_season_results).dropna(subset=['Score']).drop_duplicates()
103
116
  else:
104
117
  self.season_results = pd.DataFrame(columns=SCHEDULE_COLS + RESULTS_COLS)
105
118
 
106
119
  return self.season_results
107
120
 
108
- def _team_event_scores_by_meet(self):
121
+ def _team_event_scores_by_meet(self, force_update=False):
109
122
  team_scores_all = []
110
- for meet_id in [data['Team Meet ID'] for data in self._raw_season_results]:
123
+ for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
111
124
  try:
125
+ if force_update:
126
+ get_data_from_api.cache_clear()
127
+
112
128
  meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
113
129
  # This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
114
130
  team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
@@ -124,10 +140,13 @@ class RtnSingleTeamYear(object):
124
140
  for i in range(len(self._raw_season_results)):
125
141
  self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
126
142
 
127
- def _team_event_scores_team_consistency(self):
143
+ def _team_event_scores_team_consistency(self, force_update=False):
144
+ if force_update:
145
+ get_data_from_api.cache_clear()
146
+
128
147
  res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
129
148
  if len(res['labels']) == 0:
130
- print(f'No team consistency data found for year {self.year}')
149
+ print(f'No team consistency data found for {self.team_name} in {self.year}')
131
150
  for i in range(len(self._raw_season_results)):
132
151
  self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
133
152
  else:
@@ -145,12 +164,10 @@ class RtnSingleTeamYear(object):
145
164
  Methods:
146
165
  * Individual Consistency - Uses Individual Consistency tab from RTN
147
166
  * Tends to have more complete data, especially for older years
148
- * Requires summing of all events to get AA (code does this for you)
149
167
  * Relies on date to join back to meet info, such as opponent, etc.
150
168
  * One API call per gymnast, relative speed depends on number of meets vs number of gymnasts
151
169
  * By Meet - loops through each meet to get scores
152
170
  * Older meets tend to be missing
153
- * Includes AA scores in the response
154
171
  * Uses team meet id to join back to meet info, such as opponent, etc.
155
172
  * One API call per meet, relative speed depends on number of meets vs number of gymnasts
156
173
  """
@@ -162,20 +179,31 @@ class RtnSingleTeamYear(object):
162
179
  if not hasattr(self, '_raw_roster'):
163
180
  self.get_roster()
164
181
 
165
- self._individual_scores_individual_consistency()
182
+ self._individual_scores_individual_consistency(force_update=force_update)
166
183
  elif method == 'by_meet':
167
- self._individual_scores_by_meet()
184
+ self._individual_scores_by_meet(force_update=force_update)
168
185
  else:
169
186
  raise ValueError('Method must be "individual_consistency" or "by_meet"')
170
187
 
171
188
  return self.individual_results
172
189
 
173
- def _individual_scores_by_meet(self):
190
+ def _individual_scores_by_meet(self, force_update=False):
174
191
  individual_scores_all = []
175
- for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule]:
192
+ for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
176
193
  try:
194
+ if force_update:
195
+ get_data_from_api.cache_clear()
196
+
177
197
  meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
178
- team_inds = [ind for ind, scores in enumerate(meet_res['scores']) if len(scores) > 0 and scores[0]['team_name'] == self.team_name]
198
+ if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
199
+ print(f'No data found for meet {meet_id}')
200
+ continue
201
+
202
+ if 'team_name' in meet_res['scores'][0][0]:
203
+ team_inds = [ind for ind, scores in enumerate(meet_res['scores']) if len(scores) > 0 and scores[0]['team_name'] == self.team_name]
204
+ else:
205
+ raise ValueError('Key not found')
206
+
179
207
  if len(team_inds) == 0:
180
208
  print(f'No scores found at meet {meet_id}')
181
209
  continue
@@ -195,15 +223,19 @@ class RtnSingleTeamYear(object):
195
223
  if len(individual_scores_all) > 0:
196
224
  merge_dicts(dict1=individual_scores_all, dict2=self._raw_schedule, merge_field='Team Meet ID')
197
225
  self.individual_results = pd.DataFrame(individual_scores_all)
226
+ self.individual_results['AA'] = self.individual_results[['VT', 'UB', 'BB', 'FX']].dropna(how='any').astype(float).T.sum().round(4)
198
227
  else:
199
228
  self.individual_results = pd.DataFrame(columns=['Meet Date', 'VT', 'UB', 'BB', 'FX', 'AA', 'Gymnast ID', 'Name',
200
229
  'Team ID', 'Team', 'Team Meet ID', 'Home/Away', 'Opponents',
201
230
  'Meet Name', 'Meet ID'])
202
231
 
203
- def _individual_scores_individual_consistency(self):
232
+ def _individual_scores_individual_consistency(self, force_update=False):
204
233
  ind_consistency_all = []
205
234
  for gymnast in self._raw_roster:
206
235
  try:
236
+ if force_update:
237
+ get_data_from_api.cache_clear()
238
+
207
239
  res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
208
240
  ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
209
241
  'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
@@ -225,22 +257,24 @@ class RtnSingleTeamYear(object):
225
257
  else:
226
258
  self.individual_results = pd.DataFrame(columns=SCHEDULE_COLS + IND_RESULTS_COLS)
227
259
 
228
- def get_individual_nqs(self):
260
+ def get_individual_nqs(self, force_update=False):
229
261
  if not hasattr(self, '_raw_roster'):
230
- self._get_raw_roster()
262
+ self._get_raw_roster(force_update=force_update)
231
263
 
232
264
  if not hasattr(self, '_raw_individual_nqs'):
233
- self._get_raw_individual_nqs()
265
+ self._get_raw_individual_nqs(force_update=force_update)
234
266
 
235
267
  if len(self._raw_individual_nqs) > 0:
236
268
  return pd.DataFrame(self._raw_individual_nqs)
237
269
  else:
238
270
  return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
239
271
 
240
- def _get_raw_individual_nqs(self):
272
+ def _get_raw_individual_nqs(self, force_update=False):
241
273
  name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
242
274
  # 'maxaa': 'AA',
243
275
  'gid': 'Gymnast ID'}
276
+ if force_update:
277
+ get_data_from_api.cache_clear()
244
278
 
245
279
  nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
246
280
  ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
@@ -253,17 +287,24 @@ class RtnSingleTeamYear(object):
253
287
  else:
254
288
  self._raw_individual_nqs = []
255
289
 
256
- def _get_current_week(self):
290
+ def _get_current_week(self, force_update=False):
257
291
  if not hasattr(self, 'week'):
258
- return get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()['max']
292
+ if force_update:
293
+ get_data_from_api.cache_clear()
294
+
295
+ week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()
296
+ return min(int(week_data['week']), int(week_data['max']))
259
297
 
260
- def _get_raw_rankings(self, team_vs_ind, event, week):
298
+ def _get_raw_rankings(self, team_vs_ind, event, week, force_update=False):
261
299
  team_ind_map = {'team': 0, 'ind': 1}
262
300
  event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
263
301
  rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
264
302
  'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
265
303
  'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
266
304
 
305
+ if force_update:
306
+ get_data_from_api.cache_clear()
307
+
267
308
  res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
268
309
  if team_vs_ind == 'ind':
269
310
  self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
@@ -274,9 +315,9 @@ class RtnSingleTeamYear(object):
274
315
  **{'Event': event}}
275
316
  for data in res['data']]
276
317
 
277
- def get_overall_rankings(self, team_vs_ind='team', event='AA', week=None):
318
+ def get_overall_rankings(self, team_vs_ind='team', event='AA', week=None, force_update=False):
278
319
  if not week:
279
- week = self._get_current_week()
320
+ week = self._get_current_week(force_update=force_update)
280
321
 
281
322
  if not hasattr(self, '_raw_rankings'):
282
323
  self._raw_rankings = {'team': {event: None for event in EVENT_MAP.values()},
@@ -288,7 +329,7 @@ class RtnSingleTeamYear(object):
288
329
  'Division', 'Conference', 'Region', 'USAG']}
289
330
 
290
331
  if self._raw_rankings[team_vs_ind][event] is None:
291
- self._get_raw_rankings(team_vs_ind=team_vs_ind, event=event, week=week)
332
+ self._get_raw_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
292
333
 
293
334
  return pd.DataFrame(self._raw_rankings[team_vs_ind][event])[col_orders[team_vs_ind]]
294
335
 
@@ -2,26 +2,28 @@ from .RtnSingleTeamYear import RtnSingleTeamYear
2
2
  from .src import validate_input, get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, EVENTS, ROSTER_COLS
3
3
  import pandas as pd
4
4
 
5
+ BLANK_SPACES = ' '*30
6
+
5
7
 
6
8
  def save(df, filename):
7
9
  df.to_csv(filename, index=False)
8
10
 
9
11
 
10
- def all_teams(year):
12
+ def all_teams(year, force_update=False):
11
13
  rtn = RtnSingleTeamYear(year=year, team_name=None)
12
- return list(rtn.get_team_mapping().keys())
14
+ return list(rtn.get_team_mapping(force_update=force_update).keys())
13
15
 
14
16
 
15
- def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False):
17
+ def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False, force_update=False):
16
18
  teams = validate_input(teams)
17
19
 
18
20
  all_rosters = []
19
- for team in teams:
21
+ for i, team in enumerate(teams):
20
22
  if verbose:
21
- print(f'Getting roster for {team} ', end='\r')
23
+ print(f'Getting roster for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
22
24
  rtn = RtnSingleTeamYear(year=year, team_name=team)
23
25
  res = rtn.get_roster(include_hometowns=include_hometowns, include_class=include_class,
24
- include_events=include_events)
26
+ include_events=include_events, force_update=force_update if i == 0 else False)
25
27
  if verbose and len(res) == 0:
26
28
  print(f'\tNo roster found for {team}')
27
29
  all_rosters.append(res)
@@ -30,15 +32,15 @@ def roster(year, teams, include_hometowns=False, include_class=False, include_ev
30
32
  return pd.concat(all_rosters)[ROSTER_COLS + extra_cols]
31
33
 
32
34
 
33
- def schedule(year, teams, verbose=False):
35
+ def schedule(year, teams, verbose=False, force_update=False):
34
36
  teams = validate_input(teams)
35
37
 
36
38
  all_schedules = []
37
- for team in teams:
39
+ for i, team in enumerate(teams):
38
40
  if verbose:
39
- print(f'Getting schedule for {team} ', end='\r')
41
+ print(f'Getting schedule for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
40
42
  rtn = RtnSingleTeamYear(year=year, team_name=team)
41
- res = rtn.get_schedule()
43
+ res = rtn.get_schedule(force_update=force_update if i == 0 else False)
42
44
  if verbose and len(res) == 0:
43
45
  print(f'\tNo schedule found for {team}')
44
46
  all_schedules.append(res)
@@ -48,13 +50,13 @@ def schedule(year, teams, verbose=False):
48
50
 
49
51
  def team_results(year, teams, method='team_consistency', force_update=False, verbose=False):
50
52
  teams = validate_input(teams)
51
-
53
+
52
54
  all_results = []
53
- for team in teams:
55
+ for i, team in enumerate(teams):
54
56
  if verbose:
55
- print(f'Getting schedule and results for {team} ', end='\r')
57
+ print(f'Getting schedule and results for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
56
58
  rtn = RtnSingleTeamYear(year=year, team_name=team)
57
- res = rtn.get_team_scores(method=method,force_update=force_update)
59
+ res = rtn.get_team_scores(method=method, force_update=force_update if i == 0 else False)
58
60
  if verbose and len(res) == 0:
59
61
  print(f'\tNo schedule and results found for {team}')
60
62
  all_results.append(res)
@@ -64,13 +66,13 @@ def team_results(year, teams, method='team_consistency', force_update=False, ver
64
66
 
65
67
  def individual_results(year, teams, method='by_meet', force_update=False, verbose=False):
66
68
  teams = validate_input(teams)
67
-
69
+
68
70
  all_scores = []
69
- for team in teams:
71
+ for i, team in enumerate(teams):
70
72
  if verbose:
71
- print(f'Getting scores for {team} ', end='\r')
73
+ print(f'Getting scores for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
72
74
  rtn = RtnSingleTeamYear(year=year, team_name=team)
73
- res = rtn.get_individual_scores(method=method, force_update=force_update)
75
+ res = rtn.get_individual_scores(method=method, force_update=force_update if i == 0 else False)
74
76
  if verbose and len(res) == 0:
75
77
  print(f'\tNo scores found for {team}')
76
78
  all_scores.append(res)
@@ -78,15 +80,15 @@ def individual_results(year, teams, method='by_meet', force_update=False, verbos
78
80
  return pd.concat(all_scores)[SCHEDULE_COLS + IND_RESULTS_COLS]
79
81
 
80
82
 
81
- def individual_nqs(year, teams, verbose=False):
83
+ def individual_nqs(year, teams, verbose=False, force_update=False):
82
84
  teams = validate_input(teams)
83
-
85
+
84
86
  all_nqs = []
85
- for team in teams:
87
+ for i, team in enumerate(teams):
86
88
  if verbose:
87
- print(f'Getting individual NQS for {team} ', end='\r')
89
+ print(f'Getting individual NQS for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
88
90
  rtn = RtnSingleTeamYear(year=year, team_name=team)
89
- res = rtn.get_individual_nqs()
91
+ res = rtn.get_individual_nqs(force_update=force_update if i == 0 else False)
90
92
  if verbose and len(res) == 0:
91
93
  print(f'\tNo individual NQS found for {team}')
92
94
  all_nqs.append(res)
@@ -94,6 +96,6 @@ def individual_nqs(year, teams, verbose=False):
94
96
  return pd.concat(all_nqs)[ROSTER_COLS + EVENTS] # + ['AA']]
95
97
 
96
98
 
97
- def rankings(year, team_vs_ind='team', event='AA', week=None):
99
+ def rankings(year, team_vs_ind='team', event='AA', week=None, force_update=False):
98
100
  rtn = RtnSingleTeamYear(year=year, team_name=None)
99
- return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week)
101
+ return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
@@ -1,4 +1,6 @@
1
1
  import requests
2
+ from requests.adapters import HTTPAdapter
3
+ from urllib3.util.retry import Retry
2
4
  from functools import lru_cache
3
5
  from datetime import datetime
4
6
 
@@ -21,14 +23,24 @@ def validate_input(teams):
21
23
 
22
24
  @lru_cache(maxsize=1000000)
23
25
  def get_data_from_api(endpoint, suffix):
26
+ session = requests.Session()
27
+ retry = Retry(connect=3, backoff_factor=0.5)
28
+ adapter = HTTPAdapter(max_retries=retry)
29
+ session.mount('http://', adapter)
30
+ session.mount('https://', adapter)
31
+
24
32
  url = 'https://www.roadtonationals.com/api/women/' + endpoint
25
33
  if suffix:
26
34
  url += '/' + suffix
27
- return requests.get(url)
35
+
36
+ return session.get(url)
28
37
 
29
38
 
30
39
  def fix_opponents(ops):
31
- ops = (ops.replace(', ', '/').replace(' and ', '/').replace(' @ ', '/').replace(' w/ ', '/').replace(' with ', '/'))
40
+ if not isinstance(ops, str):
41
+ return ops
42
+
43
+ ops = (ops.replace(', ', '/').replace(',','/').replace(' and ', '/').replace(' @ ', '/').replace(' w/ ', '/').replace(' with ', '/'))
32
44
 
33
45
  if 'william & mary' in ops.lower():
34
46
  # Todo: title case is going to mess with other opponents here, full mapping might fix that
@@ -1,15 +1,15 @@
1
- Metadata-Version: 2.1
2
- Name: scraping-rtn
3
- Version: 0.0.1
1
+ Metadata-Version: 2.4
2
+ Name: scraping_rtn
3
+ Version: 0.0.8.2
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
- Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/ceharmon/scraping_rtn
7
8
  Classifier: Programming Language :: Python :: 3
8
9
  Classifier: License :: OSI Approved :: MIT License
9
10
  Classifier: Operating System :: OS Independent
10
11
  Requires-Python: >=3.9
11
12
  Description-Content-Type: text/markdown
12
- License-File: LICENSE
13
13
  Requires-Dist: pandas>=1.5.3
14
14
  Requires-Dist: numpy>=1.23.5
15
15
  Requires-Dist: requests>=2.28.1
@@ -1,4 +1,3 @@
1
- LICENSE
2
1
  README.md
3
2
  pyproject.toml
4
3
  src/scraping_rtn/RtnSingleTeamYear.py
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2023 Claire Harmon
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
File without changes
File without changes