scraping-rtn 0.0.6.0__tar.gz → 0.0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scraping-rtn might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scraping_rtn
3
- Version: 0.0.6.0
3
+ Version: 0.0.6.1
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scraping_rtn"
7
- version = "0.0.6.0"
7
+ version = "0.0.6.1"
8
8
  authors = [
9
9
  { name="Claire Harmon", email="ceharmon220@gmail.com" },
10
10
  ]
@@ -1,4 +1,4 @@
1
- from .src import EVENT_MAP, EVENTS, session_setup, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
1
+ from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
2
2
  get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
3
3
  import pandas as pd
4
4
  import numpy as np
@@ -15,14 +15,8 @@ class RtnSingleTeamYear(object):
15
15
  else:
16
16
  self.team_id = team_id
17
17
 
18
- def connect_session(self):
19
- self.session = session_setup()
20
-
21
18
  def get_team_mapping(self):
22
- if not hasattr(self, 'session'):
23
- self.connect_session()
24
-
25
- all_teams_data = get_data_from_api(session=self.session, endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
19
+ all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
26
20
  return {team['team_name']: team['id'] for team in all_teams_data['teams']}
27
21
 
28
22
  def get_team_id(self):
@@ -39,13 +33,10 @@ class RtnSingleTeamYear(object):
39
33
  # raise ValueError(f'{self.team_name} does not exist in data for {self.year}')
40
34
 
41
35
  def _get_raw_roster(self):
42
- if not hasattr(self, 'session'):
43
- self.connect_session()
44
-
45
36
  rename_map = {'id': 'Gymnast ID', 'hometown': 'Hometown', 'school_year': 'School Year', 'events': 'Events'}
46
37
  school_year_map = {'1': 'FR', '2': 'SO', '3': 'JR', '4': 'SR'}
47
38
 
48
- roster_data = get_data_from_api(session=self.session, endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
39
+ roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
49
40
 
50
41
  self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
51
42
  **{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
@@ -66,10 +57,7 @@ class RtnSingleTeamYear(object):
66
57
  return self.roster
67
58
 
68
59
  def _get_raw_season_results(self):
69
- if not hasattr(self, 'session'):
70
- self.connect_session()
71
-
72
- meets = get_data_from_api(session=self.session, endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
60
+ meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
73
61
  name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
74
62
  'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
75
63
  'opponent': 'Opponents', 'meet_desc': 'Meet Name', 'linked_id': 'Meet ID'}
@@ -122,13 +110,10 @@ class RtnSingleTeamYear(object):
122
110
  return self.season_results
123
111
 
124
112
  def _team_event_scores_by_meet(self):
125
- if not hasattr(self, 'session'):
126
- self.connect_session()
127
-
128
113
  team_scores_all = []
129
114
  for meet_id in [data['Team Meet ID'] for data in self._raw_season_results if data['Meet Date'] <= datetime.now()]:
130
115
  try:
131
- meet_res = get_data_from_api(session=self.session, endpoint='meetresults', suffix=str(meet_id)).json()
116
+ meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
132
117
  # This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
133
118
  team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
134
119
  assert len(team_scores) == 1, 'Multiple team scores??'
@@ -144,10 +129,7 @@ class RtnSingleTeamYear(object):
144
129
  self._raw_season_results[i].update({'VT': np.nan, 'UB': np.nan, 'BB': np.nan, 'FX': np.nan})
145
130
 
146
131
  def _team_event_scores_team_consistency(self):
147
- if not hasattr(self, 'session'):
148
- self.connect_session()
149
-
150
- res = get_data_from_api(session=self.session, endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
132
+ res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
151
133
  if len(res['labels']) == 0:
152
134
  print(f'No team consistency data found for {self.team_name} in {self.year}')
153
135
  for i in range(len(self._raw_season_results)):
@@ -191,13 +173,10 @@ class RtnSingleTeamYear(object):
191
173
  return self.individual_results
192
174
 
193
175
  def _individual_scores_by_meet(self):
194
- if not hasattr(self, 'session'):
195
- self.connect_session()
196
-
197
176
  individual_scores_all = []
198
177
  for meet_id in [meet['Team Meet ID'] for meet in self._raw_schedule if meet['Meet Date'] <= datetime.now()]:
199
178
  try:
200
- meet_res = get_data_from_api(session=self.session, endpoint='meetresults', suffix=str(meet_id)).json()
179
+ meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
201
180
  if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
202
181
  print(f'No data found for meet {meet_id}')
203
182
  continue
@@ -233,13 +212,10 @@ class RtnSingleTeamYear(object):
233
212
  'Meet Name', 'Meet ID'])
234
213
 
235
214
  def _individual_scores_individual_consistency(self):
236
- if not hasattr(self, 'session'):
237
- self.connect_session()
238
-
239
215
  ind_consistency_all = []
240
216
  for gymnast in self._raw_roster:
241
217
  try:
242
- res = get_data_from_api(session=self.session, endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
218
+ res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
243
219
  ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
244
220
  'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
245
221
  'UB': round(float(res['ubs'][i]), 4) if res['ubs'][i] is not None else np.nan,
@@ -273,14 +249,11 @@ class RtnSingleTeamYear(object):
273
249
  return pd.DataFrame(columns=ROSTER_COLS + EVENTS) # + ['AA'])
274
250
 
275
251
  def _get_raw_individual_nqs(self):
276
- if not hasattr(self, 'session'):
277
- self.connect_session()
278
-
279
252
  name_map = {'maxv': 'VT', 'maxub': 'UB', 'maxbb': 'BB', 'maxfx': 'FX',
280
253
  # 'maxaa': 'AA',
281
254
  'gid': 'Gymnast ID'}
282
255
 
283
- nqsData = get_data_from_api(session=self.session, endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
256
+ nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
284
257
  ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
285
258
  for k, v in data.items() if k in name_map.keys()} for data in nqsData['ind']]
286
259
 
@@ -292,24 +265,18 @@ class RtnSingleTeamYear(object):
292
265
  self._raw_individual_nqs = []
293
266
 
294
267
  def _get_current_week(self):
295
- if not hasattr(self, 'session'):
296
- self.connect_session()
297
-
298
268
  if not hasattr(self, 'week'):
299
- week_data = get_data_from_api(session=self.session, endpoint='currentweek', suffix=str(self.year)).json()
269
+ week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()
300
270
  return min(int(week_data['week']), int(week_data['max']))
301
271
 
302
272
  def _get_raw_rankings(self, team_vs_ind, event, week):
303
- if not hasattr(self, 'session'):
304
- self.connect_session()
305
-
306
273
  team_ind_map = {'team': 0, 'ind': 1}
307
274
  event_api_map = {'VT': 1, 'UB': 2, 'BB': 3, 'FX': 4, 'AA': 5}
308
275
  rename_map = {'rank': 'Rank', 'gid': 'Gymnast ID', 'team': 'Team', 'tid': 'Team ID',
309
276
  'rqs': 'NQS', 'reg': 'Region', 'con': 'Conference', 'div': 'Division',
310
277
  'usag': 'USAG', 'ave': 'Average', 'high': 'High', 'name': 'Team'}
311
278
 
312
- res = get_data_from_api(session=self.session, endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
279
+ res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
313
280
  if team_vs_ind == 'ind':
314
281
  self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
315
282
  **{'Name': data['fname'] + ' ' + data['lname'], 'Event': event}}
@@ -21,21 +21,18 @@ def validate_input(teams):
21
21
  return teams
22
22
 
23
23
 
24
- def session_setup():
24
+ @lru_cache(maxsize=1000000)
25
+ def get_data_from_api(endpoint, suffix):
25
26
  session = requests.Session()
26
27
  retry = Retry(connect=3, backoff_factor=0.5)
27
28
  adapter = HTTPAdapter(max_retries=retry)
28
29
  session.mount('http://', adapter)
29
30
  session.mount('https://', adapter)
30
31
 
31
- return session
32
-
33
-
34
- @lru_cache(maxsize=1000000)
35
- def get_data_from_api(session, endpoint, suffix):
36
32
  url = 'https://www.roadtonationals.com/api/women/' + endpoint
37
33
  if suffix:
38
34
  url += '/' + suffix
35
+
39
36
  return session.get(url)
40
37
 
41
38
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scraping_rtn
3
- Version: 0.0.6.0
3
+ Version: 0.0.6.1
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/cgn-charmon/scraping_rtn
File without changes
File without changes
File without changes