scraping-rtn 0.0.8.0__tar.gz → 0.0.8.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scraping_rtn
3
- Version: 0.0.8.0
3
+ Version: 0.0.8.2
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
6
  License: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "scraping_rtn"
7
- version = "0.0.8.0"
7
+ version = "0.0.8.2"
8
8
  authors = [
9
9
  { name = "Claire Harmon", email = "ceharmon220@gmail.com" },
10
10
  ]
@@ -1,4 +1,4 @@
1
- from .src import EVENT_MAP, EVENTS, get_session, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
1
+ from .src import EVENT_MAP, EVENTS, get_data_from_api, fix_opponents, normalize_date, merge_dicts, \
2
2
  get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, ROSTER_COLS
3
3
  import pandas as pd
4
4
  import numpy as np
@@ -15,16 +15,11 @@ class RtnSingleTeamYear(object):
15
15
  else:
16
16
  self.team_id = team_id
17
17
 
18
- if session is None:
19
- self.session = get_session()
20
- else:
21
- self.session = session
22
-
23
18
  def get_team_mapping(self, force_update=False):
24
19
  if force_update:
25
20
  get_data_from_api.cache_clear()
26
21
 
27
- all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1', session=self.session).json()
22
+ all_teams_data = get_data_from_api(endpoint='gymnasts2', suffix=str(self.year) + '/1').json()
28
23
  return {team['team_name']: team['id'] for team in all_teams_data['teams']}
29
24
 
30
25
  def get_team_id(self):
@@ -47,7 +42,7 @@ class RtnSingleTeamYear(object):
47
42
  if force_update:
48
43
  get_data_from_api.cache_clear()
49
44
 
50
- roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1', session=self.session).json()
45
+ roster_data = get_data_from_api(endpoint='rostermain', suffix=str(self.year)+'/'+str(self.team_id)+'/1').json()
51
46
 
52
47
  self._raw_roster = [{**{rename_map.get(k, k): v if k != 'school_year' else school_year_map.get(v, '') for k, v in data.items()},
53
48
  **{'Name': data['fname'] + ' ' + data['lname'], 'Team': self.team_name}}
@@ -71,7 +66,7 @@ class RtnSingleTeamYear(object):
71
66
  if force_update:
72
67
  get_data_from_api.cache_clear()
73
68
 
74
- meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id), session=self.session).json()
69
+ meets = get_data_from_api(endpoint='dashboard', suffix=str(self.year)+'/'+str(self.team_id)).json()
75
70
  name_map = {'team_id': 'Team ID', 'team_name': 'Team', 'meet_id': 'Team Meet ID',
76
71
  'meet_date': 'Meet Date', 'team_score': 'Score', 'home': 'Home/Away',
77
72
  'opponent': 'Opponents', 'meet_desc': 'Meet Name', 'linked_id': 'Meet ID'}
@@ -130,7 +125,7 @@ class RtnSingleTeamYear(object):
130
125
  if force_update:
131
126
  get_data_from_api.cache_clear()
132
127
 
133
- meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id), session=self.session).json()
128
+ meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
134
129
  # This API call returns scores from all teams at this meet, not just this team. Need to pick out correct score
135
130
  team_scores = [score for score in meet_res['teams'] if score['tname'] == self.team_name and score['mid'] == str(meet_id)]
136
131
  assert len(team_scores) == 1, 'Multiple team scores??'
@@ -149,7 +144,7 @@ class RtnSingleTeamYear(object):
149
144
  if force_update:
150
145
  get_data_from_api.cache_clear()
151
146
 
152
- res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}', session=self.session).json()
147
+ res = get_data_from_api(endpoint='teamConsistency', suffix=f'{self.year}/{self.team_id}').json()
153
148
  if len(res['labels']) == 0:
154
149
  print(f'No team consistency data found for {self.team_name} in {self.year}')
155
150
  for i in range(len(self._raw_season_results)):
@@ -199,7 +194,7 @@ class RtnSingleTeamYear(object):
199
194
  if force_update:
200
195
  get_data_from_api.cache_clear()
201
196
 
202
- meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id), session=self.session).json()
197
+ meet_res = get_data_from_api(endpoint='meetresults', suffix=str(meet_id)).json()
203
198
  if len(meet_res) == 0 or len(meet_res['scores']) == 0 or len(meet_res['scores'][0]) == 0:
204
199
  print(f'No data found for meet {meet_id}')
205
200
  continue
@@ -241,7 +236,7 @@ class RtnSingleTeamYear(object):
241
236
  if force_update:
242
237
  get_data_from_api.cache_clear()
243
238
 
244
- res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}", session=self.session).json()
239
+ res = get_data_from_api(endpoint='indConsistency', suffix=f"{self.year}/{gymnast['Gymnast ID']}").json()
245
240
  ind_consistency = [{'Meet Date': normalize_date(res['labels'][i][:7] + str(self.year), dt_format='%b-%d-%Y'),
246
241
  'VT': round(float(res['vts'][i]), 4) if res['vts'][i] is not None else np.nan,
247
242
  'UB': round(float(res['ubs'][i]), 4) if res['ubs'][i] is not None else np.nan,
@@ -281,7 +276,7 @@ class RtnSingleTeamYear(object):
281
276
  if force_update:
282
277
  get_data_from_api.cache_clear()
283
278
 
284
- nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4', session=self.session).json()
279
+ nqsData = get_data_from_api(endpoint='rostermain', suffix=f'{self.year}/{self.team_id}/4').json()
285
280
  ind_nqs = [{name_map[k]: round(float(v), 4) if k != 'gid' and v != '' else (np.nan if k != 'gid' else v)
286
281
  for k, v in data.items() if k in name_map.keys()} for data in nqsData['ind']]
287
282
 
@@ -297,7 +292,7 @@ class RtnSingleTeamYear(object):
297
292
  if force_update:
298
293
  get_data_from_api.cache_clear()
299
294
 
300
- week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year), session=self.session).json()
295
+ week_data = get_data_from_api(endpoint='currentweek', suffix=str(self.year)).json()
301
296
  return min(int(week_data['week']), int(week_data['max']))
302
297
 
303
298
  def _get_raw_rankings(self, team_vs_ind, event, week, force_update=False):
@@ -310,7 +305,7 @@ class RtnSingleTeamYear(object):
310
305
  if force_update:
311
306
  get_data_from_api.cache_clear()
312
307
 
313
- res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}', session=self.session).json()
308
+ res = get_data_from_api(endpoint='results', suffix=f'{self.year}/{week}/{team_ind_map[team_vs_ind]}/{event_api_map[event]}').json()
314
309
  if team_vs_ind == 'ind':
315
310
  self._raw_rankings[team_vs_ind][event] = [{**{rename_map.get(k): float(v) if k in ['rqs', 'ave', 'high'] else v for k, v in data.items() if k in rename_map},
316
311
  **{'Name': data['fname'] + ' ' + data['lname'], 'Event': event}}
@@ -1,5 +1,5 @@
1
1
  from .RtnSingleTeamYear import RtnSingleTeamYear
2
- from .src import get_session, validate_input, get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, EVENTS, ROSTER_COLS
2
+ from .src import validate_input, get_extra_cols, SCHEDULE_COLS, RESULTS_COLS, IND_RESULTS_COLS, EVENTS, ROSTER_COLS
3
3
  import pandas as pd
4
4
 
5
5
  BLANK_SPACES = ' '*30
@@ -16,13 +16,12 @@ def all_teams(year, force_update=False):
16
16
 
17
17
  def roster(year, teams, include_hometowns=False, include_class=False, include_events=False, verbose=False, force_update=False):
18
18
  teams = validate_input(teams)
19
- session = get_session()
20
19
 
21
20
  all_rosters = []
22
21
  for i, team in enumerate(teams):
23
22
  if verbose:
24
23
  print(f'Getting roster for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
25
- rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
24
+ rtn = RtnSingleTeamYear(year=year, team_name=team)
26
25
  res = rtn.get_roster(include_hometowns=include_hometowns, include_class=include_class,
27
26
  include_events=include_events, force_update=force_update if i == 0 else False)
28
27
  if verbose and len(res) == 0:
@@ -35,13 +34,12 @@ def roster(year, teams, include_hometowns=False, include_class=False, include_ev
35
34
 
36
35
  def schedule(year, teams, verbose=False, force_update=False):
37
36
  teams = validate_input(teams)
38
- session = get_session()
39
37
 
40
38
  all_schedules = []
41
39
  for i, team in enumerate(teams):
42
40
  if verbose:
43
41
  print(f'Getting schedule for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
44
- rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
42
+ rtn = RtnSingleTeamYear(year=year, team_name=team)
45
43
  res = rtn.get_schedule(force_update=force_update if i == 0 else False)
46
44
  if verbose and len(res) == 0:
47
45
  print(f'\tNo schedule found for {team}')
@@ -52,13 +50,12 @@ def schedule(year, teams, verbose=False, force_update=False):
52
50
 
53
51
  def team_results(year, teams, method='team_consistency', force_update=False, verbose=False):
54
52
  teams = validate_input(teams)
55
- session = get_session()
56
53
 
57
54
  all_results = []
58
55
  for i, team in enumerate(teams):
59
56
  if verbose:
60
57
  print(f'Getting schedule and results for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
61
- rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
58
+ rtn = RtnSingleTeamYear(year=year, team_name=team)
62
59
  res = rtn.get_team_scores(method=method, force_update=force_update if i == 0 else False)
63
60
  if verbose and len(res) == 0:
64
61
  print(f'\tNo schedule and results found for {team}')
@@ -69,13 +66,12 @@ def team_results(year, teams, method='team_consistency', force_update=False, ver
69
66
 
70
67
  def individual_results(year, teams, method='by_meet', force_update=False, verbose=False):
71
68
  teams = validate_input(teams)
72
- session = get_session()
73
69
 
74
70
  all_scores = []
75
71
  for i, team in enumerate(teams):
76
72
  if verbose:
77
73
  print(f'Getting scores for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
78
- rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
74
+ rtn = RtnSingleTeamYear(year=year, team_name=team)
79
75
  res = rtn.get_individual_scores(method=method, force_update=force_update if i == 0 else False)
80
76
  if verbose and len(res) == 0:
81
77
  print(f'\tNo scores found for {team}')
@@ -86,13 +82,12 @@ def individual_results(year, teams, method='by_meet', force_update=False, verbos
86
82
 
87
83
  def individual_nqs(year, teams, verbose=False, force_update=False):
88
84
  teams = validate_input(teams)
89
- session = get_session()
90
85
 
91
86
  all_nqs = []
92
87
  for i, team in enumerate(teams):
93
88
  if verbose:
94
89
  print(f'Getting individual NQS for {team}{BLANK_SPACES}', end='\r' if team != teams[-1] else None)
95
- rtn = RtnSingleTeamYear(year=year, team_name=team, session=session)
90
+ rtn = RtnSingleTeamYear(year=year, team_name=team)
96
91
  res = rtn.get_individual_nqs(force_update=force_update if i == 0 else False)
97
92
  if verbose and len(res) == 0:
98
93
  print(f'\tNo individual NQS found for {team}')
@@ -102,6 +97,5 @@ def individual_nqs(year, teams, verbose=False, force_update=False):
102
97
 
103
98
 
104
99
  def rankings(year, team_vs_ind='team', event='AA', week=None, force_update=False):
105
- session = get_session()
106
- rtn = RtnSingleTeamYear(year=year, team_name=None, session=session)
100
+ rtn = RtnSingleTeamYear(year=year, team_name=None)
107
101
  return rtn.get_overall_rankings(team_vs_ind=team_vs_ind, event=event, week=week, force_update=force_update)
@@ -21,19 +21,13 @@ def validate_input(teams):
21
21
  return teams
22
22
 
23
23
 
24
- def get_session():
24
+ @lru_cache(maxsize=1000000)
25
+ def get_data_from_api(endpoint, suffix):
25
26
  session = requests.Session()
26
27
  retry = Retry(connect=3, backoff_factor=0.5)
27
28
  adapter = HTTPAdapter(max_retries=retry)
28
29
  session.mount('http://', adapter)
29
30
  session.mount('https://', adapter)
30
- return session
31
-
32
-
33
- @lru_cache(maxsize=1000000)
34
- def get_data_from_api(endpoint, suffix, session=None):
35
- if not session:
36
- session = get_session()
37
31
 
38
32
  url = 'https://www.roadtonationals.com/api/women/' + endpoint
39
33
  if suffix:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scraping_rtn
3
- Version: 0.0.8.0
3
+ Version: 0.0.8.2
4
4
  Summary: package to scrape gymnastics data from Road To Nationals
5
5
  Author-email: Claire Harmon <ceharmon220@gmail.com>
6
6
  License: MIT
File without changes
File without changes