scraping-rtn 0.0.5__tar.gz → 0.0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scraping-rtn might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scraping_rtn
3
- Version: 0.0.5
3
+ Version: 0.0.6.0
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scraping_rtn"
7
- version = "0.0.5"
7
+ version = "0.0.6.0"
8
8
  authors = [
9
9
  { name="Claire Harmon", email="ceharmon220@gmail.com" },
10
10
  ]
@@ -1,5 +1,5 @@
1
- from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, get_extra_cols, \
2
- SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
1
+ from .src import EVENT_MAP, EVENTS, session_setup, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
2
+ get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
3
3
  import pandas as pd
4
4
  import numpy as np
5
5
  from datetime import datetime
@@ -15,8 +15,14 @@ class RtnSingleTeamYear(object):
15
15
  else:
16
16
  self.team_id = team_id
17
17
 
18
+ def connect_session(self):
19
+ self.session = session_setup()
20
+
18
21
  def get_team_mapping(self):
19
- all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
22
+ if not hasattr(self, 'session'):
23
+ self.connect_session()
24
+
25
+ all_teams_data = get_data_from_api(session=self.session, endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
20
26
  return {team['team_name']: team['id'] for team in all_teams_data['teams']}
21
27
 
22
28
  def get_team_id(self):
@@ -33,10 +39,13 @@ class RtnSingleTeamYear(object):
33
39
  # raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
34
40
 
35
41
  def _get_raw_roster(self):
42
+ if not hasattr(self, 'session'):
43
+ self.connect_session()
44
+
36
45
  rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
37
46
  school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
38
47
 
39
- roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
48
+ roster_data = get_data_from_api(session=self.session, endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
40
49
 
41
50
  self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
42
51
  **{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
@@ -57,7 +66,10 @@ class RtnSingleTeamYear(object):
57
66
  return self.roster
58
67
 
59
68
  def _get_raw_season_results(self):
60
- meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
69
+ if not hasattr(self, 'session'):
70
+ self.connect_session()
71
+
72
+ meets = get_data_from_api(session=self.session, endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
61
73
  name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
62
74
  'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
63
75
  'opponent': 'Opponents', 'meet_desc': 'Meet Name', 'linked_id': 'Meet ID'}
@@ -110,10 +122,13 @@ class RtnSingleTeamYear(object):
110
122
  return self.season_results
111
123
 
112
124
  def _team_event_scores_by_meet(self):
125
+ if not hasattr(self, 'session'):
126
+ self.connect_session()
127
+
113
128
  team_scores_all = []
114
129
  for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
115
130
  try:
116
- meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
131
+ meet_res = get_data_from_api(session=self.session, endpoint='meetresults', suffix=str(meet_id)).json()
117
132
  # This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
118
133
  team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
119
134
  assert len(team_scores) == 1, 'Multiple team scores??'
@@ -129,7 +144,10 @@ class RtnSingleTeamYear(object):
129
144
  self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
130
145
 
131
146
  def _team_event_scores_team_consistency(self):
132
- res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
147
+ if not hasattr(self, 'session'):
148
+ self.connect_session()
149
+
150
+ res = get_data_from_api(session=self.session, endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
133
151
  if len(res['labels']) == 0:
134
152
  print(f'No team consistency data found for {self.team_name} in {self.year}')
135
153
  for i in range(len(self._raw_season_results)):
@@ -173,10 +191,13 @@ class RtnSingleTeamYear(object):
173
191
  return self.individual_results
174
192
 
175
193
  def _individual_scores_by_meet(self):
194
+ if not hasattr(self, 'session'):
195
+ self.connect_session()
196
+
176
197
  individual_scores_all = []
177
198
  for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
178
199
  try:
179
- meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
200
+ meet_res = get_data_from_api(session=self.session, endpoint='meetresults', suffix=str(meet_id)).json()
180
201
  if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
181
202
  print(f'No data found for meet {meet_id}')
182
203
  continue
@@ -212,10 +233,13 @@ class RtnSingleTeamYear(object):
212
233
  'Meet Name', 'Meet ID'])
213
234
 
214
235
  def _individual_scores_individual_consistency(self):
236
+ if not hasattr(self, 'session'):
237
+ self.connect_session()
238
+
215
239
  ind_consistency_all = []
216
240
  for gymnast in self._raw_roster:
217
241
  try:
218
- res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
242
+ res = get_data_from_api(session=self.session, endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
219
243
  ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
220
244
  'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
221
245
  'UB': round(float(res['ubs'][i]), 4) if res['ubs'][i] is not None else np.nan,
@@ -249,11 +273,14 @@ class RtnSingleTeamYear(object):
249
273
  return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
250
274
 
251
275
  def _get_raw_individual_nqs(self):
276
+ if not hasattr(self, 'session'):
277
+ self.connect_session()
278
+
252
279
  name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
253
280
  # 'maxaa': 'AA',
254
281
  'gid': 'Gymnast ID'}
255
282
 
256
- nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
283
+ nqsData = get_data_from_api(session=self.session, endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
257
284
  ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
258
285
  for k, v in data.items() if k in name_map.keys()} for data in nqsData['ind']]
259
286
 
@@ -265,17 +292,24 @@ class RtnSingleTeamYear(object):
265
292
  self._raw_individual_nqs = []
266
293
 
267
294
  def _get_current_week(self):
295
+ if not hasattr(self, 'session'):
296
+ self.connect_session()
297
+
268
298
  if not hasattr(self, 'week'):
269
- return get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()['max']
299
+ week_data = get_data_from_api(session=self.session, endpoint='currentweek', suffix=str(self.year)).json()
300
+ return min(int(week_data['week']), int(week_data['max']))
270
301
 
271
302
  def _get_raw_rankings(self, team_vs_ind, event, week):
303
+ if not hasattr(self, 'session'):
304
+ self.connect_session()
305
+
272
306
  team_ind_map = {'team': 0, 'ind': 1}
273
307
  event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
274
308
  rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
275
309
  'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
276
310
  'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
277
311
 
278
- res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
312
+ res = get_data_from_api(session=self.session, endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
279
313
  if team_vs_ind == 'ind':
280
314
  self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
281
315
  **{'Name': data['fname'] + ' ' + data['lname'], 'Event': event}}
@@ -1,4 +1,6 @@
1
1
  import requests
2
+ from requests.adapters import HTTPAdapter
3
+ from urllib3.util.retry import Retry
2
4
  from functools import lru_cache
3
5
  from datetime import datetime
4
6
 
@@ -19,12 +21,22 @@ def validate_input(teams):
19
21
  return teams
20
22
 
21
23
 
24
+ def session_setup():
25
+ session = requests.Session()
26
+ retry = Retry(connect=3, backoff_factor=0.5)
27
+ adapter = HTTPAdapter(max_retries=retry)
28
+ session.mount('http://', adapter)
29
+ session.mount('https://', adapter)
30
+
31
+ return session
32
+
33
+
22
34
  @lru_cache(maxsize=1000000)
23
- def get_data_from_api(endpoint, suffix):
35
+ def get_data_from_api(session, endpoint, suffix):
24
36
  url = 'https://www.roadtonationals.com/api/women/' + endpoint
25
37
  if suffix:
26
38
  url += '/' + suffix
27
- return requests.get(url)
39
+ return session.get(url)
28
40
 
29
41
 
30
42
  def fix_opponents(ops):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scraping_rtn
3
- Version: 0.0.5
3
+ Version: 0.0.6.0
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
File without changes
File without changes
File without changes