hockey-blast-common-lib 0.1.29__tar.gz → 0.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/PKG-INFO +1 -1
  2. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/aggregate_skater_stats.py +66 -114
  3. hockey_blast_common_lib-0.1.31/hockey_blast_common_lib/assign_skater_skill.py +46 -0
  4. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/hockey_blast_sample_backup.sql.gz +0 -0
  5. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/options.py +1 -1
  6. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/skills_in_divisions.py +0 -49
  7. hockey_blast_common_lib-0.1.31/hockey_blast_common_lib/skills_propagation.py +368 -0
  8. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/stats_models.py +14 -0
  9. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib.egg-info/PKG-INFO +1 -1
  10. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib.egg-info/SOURCES.txt +1 -0
  11. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/setup.py +1 -1
  12. hockey_blast_common_lib-0.1.29/hockey_blast_common_lib/skills_propagation.py +0 -251
  13. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/MANIFEST.in +0 -0
  14. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/README.md +0 -0
  15. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/__init__.py +0 -0
  16. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/aggregate_goalie_stats.py +0 -0
  17. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/aggregate_human_stats.py +0 -0
  18. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/aggregate_referee_stats.py +0 -0
  19. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/db_connection.py +0 -0
  20. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/dump_sample_db.sh +0 -0
  21. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/models.py +0 -0
  22. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/restore_sample_db.sh +0 -0
  23. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/utils.py +0 -0
  24. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib/wsgi.py +0 -0
  25. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib.egg-info/dependency_links.txt +0 -0
  26. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib.egg-info/requires.txt +0 -0
  27. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/hockey_blast_common_lib.egg-info/top_level.txt +0 -0
  28. {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.31}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hockey-blast-common-lib
3
- Version: 0.1.29
3
+ Version: 0.1.31
4
4
  Summary: Common library for shared functionality and DB models
5
5
  Author: Pavel Kletskov
6
6
  Author-email: kletskov@gmail.com
@@ -6,17 +6,28 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
6
6
  from datetime import datetime, timedelta
7
7
  import sqlalchemy
8
8
 
9
- from hockey_blast_common_lib.models import Game, Goal, Penalty, GameRoster, Organization, Division
9
+ from hockey_blast_common_lib.models import Game, Goal, Penalty, GameRoster, Organization, Division, Human, Level
10
10
  from hockey_blast_common_lib.stats_models import OrgStatsSkater, DivisionStatsSkater, OrgStatsWeeklySkater, OrgStatsDailySkater, DivisionStatsWeeklySkater, DivisionStatsDailySkater, LevelStatsSkater
11
11
  from hockey_blast_common_lib.db_connection import create_session
12
12
  from sqlalchemy.sql import func, case
13
13
  from hockey_blast_common_lib.options import not_human_names, parse_args, MIN_GAMES_FOR_ORG_STATS, MIN_GAMES_FOR_DIVISION_STATS, MIN_GAMES_FOR_LEVEL_STATS
14
14
  from hockey_blast_common_lib.utils import get_org_id_from_alias, get_human_ids_by_names, get_division_ids_for_last_season_in_all_leagues, get_all_division_ids_for_org
15
15
  from sqlalchemy import func, case, and_
16
+ from collections import defaultdict
16
17
 
17
- def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_filter_out, filter_human_id=None, aggregation_window=None):
18
+ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_filter_out, debug_human_id=None, aggregation_window=None):
18
19
  human_ids_to_filter = get_human_ids_by_names(session, names_to_filter_out)
19
20
 
21
+ # Get the name of the aggregation, for debug purposes
22
+ if aggregation_type == 'org':
23
+ aggregation_name = session.query(Organization).filter(Organization.id == aggregation_id).first().organization_name
24
+ elif aggregation_type == 'division':
25
+ aggregation_name = session.query(Division).filter(Division.id == aggregation_id).first().level
26
+ elif aggregation_type == 'level':
27
+ aggregation_name = session.query(Level).filter(Level.id == aggregation_id).first().level_name
28
+ else:
29
+ aggregation_name = "Unknown"
30
+
20
31
  if aggregation_type == 'org':
21
32
  if aggregation_window == 'Daily':
22
33
  StatsModel = OrgStatsDailySkater
@@ -39,6 +50,10 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
39
50
  StatsModel = LevelStatsSkater
40
51
  min_games = MIN_GAMES_FOR_LEVEL_STATS
41
52
  filter_condition = Division.level_id == aggregation_id
53
+ # Add filter to only include games for the last 5 years
54
+ # five_years_ago = datetime.now() - timedelta(days=5*365)
55
+ # level_window_filter = func.cast(func.concat(Game.date, ' ', Game.time), sqlalchemy.types.TIMESTAMP) >= five_years_ago
56
+ # filter_condition = filter_condition & level_window_filter
42
57
  else:
43
58
  raise ValueError("Invalid aggregation type")
44
59
 
@@ -63,8 +78,8 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
63
78
 
64
79
  # Filter for specific human_id if provided
65
80
  human_filter = []
66
- if filter_human_id:
67
- human_filter = [GameRoster.human_id == filter_human_id]
81
+ # if debug_human_id:
82
+ # human_filter = [GameRoster.human_id == debug_human_id]
68
83
 
69
84
  # Aggregate games played for each human in each division, excluding goalies
70
85
  games_played_stats = session.query(
@@ -72,7 +87,7 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
72
87
  GameRoster.human_id,
73
88
  func.count(Game.id).label('games_played'),
74
89
  func.array_agg(Game.id).label('game_ids')
75
- ).join(GameRoster, Game.id == GameRoster.game_id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, GameRoster.human_id).all()
90
+ ).join(GameRoster, Game.id == GameRoster.game_id).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, GameRoster.human_id).all()
76
91
 
77
92
  # Aggregate goals for each human in each division, excluding goalies
78
93
  goals_stats = session.query(
@@ -80,7 +95,7 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
80
95
  Goal.goal_scorer_id.label('human_id'),
81
96
  func.count(Goal.id).label('goals'),
82
97
  func.array_agg(Goal.game_id).label('goal_game_ids')
83
- ).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.goal_scorer_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.goal_scorer_id).all()
98
+ ).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.goal_scorer_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.goal_scorer_id).all()
84
99
 
85
100
  # Aggregate assists for each human in each division, excluding goalies
86
101
  assists_stats = session.query(
@@ -88,14 +103,14 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
88
103
  Goal.assist_1_id.label('human_id'),
89
104
  func.count(Goal.id).label('assists'),
90
105
  func.array_agg(Goal.game_id).label('assist_game_ids')
91
- ).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_1_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_1_id).all()
106
+ ).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_1_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_1_id).all()
92
107
 
93
108
  assists_stats_2 = session.query(
94
109
  Game.org_id,
95
110
  Goal.assist_2_id.label('human_id'),
96
111
  func.count(Goal.id).label('assists'),
97
112
  func.array_agg(Goal.game_id).label('assist_2_game_ids')
98
- ).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_2_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_2_id).all()
113
+ ).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_2_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_2_id).all()
99
114
 
100
115
  # Aggregate penalties for each human in each division, excluding goalies
101
116
  penalties_stats = session.query(
@@ -103,7 +118,7 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
103
118
  Penalty.penalized_player_id.label('human_id'),
104
119
  func.count(Penalty.id).label('penalties'),
105
120
  func.array_agg(Penalty.game_id).label('penalty_game_ids')
106
- ).join(Game, Game.id == Penalty.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Penalty.penalized_player_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Penalty.penalized_player_id).all()
121
+ ).join(Game, Game.id == Penalty.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Penalty.penalized_player_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Penalty.penalized_player_id).all()
107
122
 
108
123
  # Combine the results
109
124
  stats_dict = {}
@@ -111,6 +126,8 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
111
126
  if stat.human_id in human_ids_to_filter:
112
127
  continue
113
128
  key = (aggregation_id, stat.human_id)
129
+ if stat.games_played < min_games:
130
+ continue
114
131
  stats_dict[key] = {
115
132
  'games_played': stat.games_played,
116
133
  'goals': 0,
@@ -127,94 +144,26 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
127
144
  }
128
145
 
129
146
  for stat in goals_stats:
130
- if stat.human_id in human_ids_to_filter:
131
- continue
132
147
  key = (aggregation_id, stat.human_id)
133
- if key not in stats_dict:
134
- stats_dict[key] = {
135
- 'games_played': 0,
136
- 'goals': stat.goals,
137
- 'assists': 0,
138
- 'penalties': 0,
139
- 'points': stat.goals, # Initialize points with goals
140
- 'goals_per_game': 0.0,
141
- 'points_per_game': 0.0,
142
- 'assists_per_game': 0.0,
143
- 'penalties_per_game': 0.0,
144
- 'game_ids': [],
145
- 'first_game_id': None,
146
- 'last_game_id': None
147
- }
148
- else:
148
+ if key in stats_dict:
149
149
  stats_dict[key]['goals'] += stat.goals
150
150
  stats_dict[key]['points'] += stat.goals # Update points
151
151
 
152
152
  for stat in assists_stats:
153
- if stat.human_id in human_ids_to_filter:
154
- continue
155
153
  key = (aggregation_id, stat.human_id)
156
- if key not in stats_dict:
157
- stats_dict[key] = {
158
- 'games_played': 0,
159
- 'goals': 0,
160
- 'assists': stat.assists,
161
- 'penalties': 0,
162
- 'points': stat.assists, # Initialize points with assists
163
- 'goals_per_game': 0.0,
164
- 'points_per_game': 0.0,
165
- 'assists_per_game': 0.0,
166
- 'penalties_per_game': 0.0,
167
- 'game_ids': [],
168
- 'first_game_id': None,
169
- 'last_game_id': None
170
- }
171
- else:
154
+ if key in stats_dict:
172
155
  stats_dict[key]['assists'] += stat.assists
173
156
  stats_dict[key]['points'] += stat.assists # Update points
174
157
 
175
158
  for stat in assists_stats_2:
176
- if stat.human_id in human_ids_to_filter:
177
- continue
178
159
  key = (aggregation_id, stat.human_id)
179
- if key not in stats_dict:
180
- stats_dict[key] = {
181
- 'games_played': 0,
182
- 'goals': 0,
183
- 'assists': stat.assists,
184
- 'penalties': 0,
185
- 'points': stat.assists, # Initialize points with assists
186
- 'goals_per_game': 0.0,
187
- 'points_per_game': 0.0,
188
- 'assists_per_game': 0.0,
189
- 'penalties_per_game': 0.0,
190
- 'game_ids': [],
191
- 'first_game_id': None,
192
- 'last_game_id': None
193
- }
194
- else:
160
+ if key in stats_dict:
195
161
  stats_dict[key]['assists'] += stat.assists
196
162
  stats_dict[key]['points'] += stat.assists # Update points
197
163
 
198
164
  for stat in penalties_stats:
199
- if stat.human_id in human_ids_to_filter:
200
- continue
201
165
  key = (aggregation_id, stat.human_id)
202
- if key not in stats_dict:
203
- stats_dict[key] = {
204
- 'games_played': 0,
205
- 'goals': 0,
206
- 'assists': 0,
207
- 'penalties': stat.penalties,
208
- 'points': 0, # Initialize points
209
- 'goals_per_game': 0.0,
210
- 'points_per_game': 0.0,
211
- 'assists_per_game': 0.0,
212
- 'penalties_per_game': 0.0,
213
- 'game_ids': [],
214
- 'first_game_id': None,
215
- 'last_game_id': None
216
- }
217
- else:
166
+ if key in stats_dict:
218
167
  stats_dict[key]['penalties'] += stat.penalties
219
168
 
220
169
  # Calculate per game stats
@@ -237,20 +186,10 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
237
186
  stat['first_game_id'] = first_game.id if first_game else None
238
187
  stat['last_game_id'] = last_game.id if last_game else None
239
188
 
240
- # Debug output for totals if filter_human_id is provided
241
- if filter_human_id:
242
- for key, stat in stats_dict.items():
243
- if key[1] == filter_human_id:
244
- print(f"Human ID: {filter_human_id}")
245
- print(f"Total Games Played: {stat['games_played']}")
246
- print(f"Total Goals: {stat['goals']}")
247
- print(f"Total Assists: {stat['assists']}")
248
- print(f"Total Penalties: {stat['penalties']}")
249
-
250
189
  # Calculate total_in_rank
251
190
  total_in_rank = len(stats_dict)
252
191
 
253
- # Assign ranks
192
+ # Assign ranks within each level
254
193
  def assign_ranks(stats_dict, field):
255
194
  sorted_stats = sorted(stats_dict.items(), key=lambda x: x[1][field], reverse=True)
256
195
  for rank, (key, stat) in enumerate(sorted_stats, start=1):
@@ -266,13 +205,22 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
266
205
  assign_ranks(stats_dict, 'assists_per_game')
267
206
  assign_ranks(stats_dict, 'penalties_per_game')
268
207
 
208
+ # Debug output for specific human
209
+ if debug_human_id:
210
+ if any(key[1] == debug_human_id for key in stats_dict):
211
+ human = session.query(Human).filter(Human.id == debug_human_id).first()
212
+ human_name = f"{human.first_name} {human.last_name}" if human else "Unknown"
213
+ print(f"For Human {debug_human_id} ({human_name}) for {aggregation_type} {aggregation_id} ({aggregation_name}) , total_in_rank {total_in_rank} and window {aggregation_window}:")
214
+ for key, stat in stats_dict.items():
215
+ if key[1] == debug_human_id:
216
+ for k, v in stat.items():
217
+ print(f"{k}: {v}")
218
+
269
219
  # Insert aggregated stats into the appropriate table with progress output
270
220
  total_items = len(stats_dict)
271
221
  batch_size = 1000
272
222
  for i, (key, stat) in enumerate(stats_dict.items(), 1):
273
223
  aggregation_id, human_id = key
274
- if stat['games_played'] < min_games:
275
- continue
276
224
  goals_per_game = stat['goals'] / stat['games_played'] if stat['games_played'] > 0 else 0.0
277
225
  points_per_game = (stat['goals'] + stat['assists']) / stat['games_played'] if stat['games_played'] > 0 else 0.0
278
226
  assists_per_game = stat['assists'] / stat['games_played'] if stat['games_played'] > 0 else 0.0
@@ -306,33 +254,37 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
306
254
  # Commit in batches
307
255
  if i % batch_size == 0:
308
256
  session.commit()
309
- print(f"\r{i}/{total_items} ({(i/total_items)*100:.2f}%)", end="")
257
+ if debug_human_id is None:
258
+ print(f"\r{i}/{total_items} ({(i/total_items)*100:.2f}%)", end="")
259
+
310
260
  session.commit()
311
- print(f"\r{total_items}/{total_items} (100.00%)")
312
- print("\nDone.")
261
+ if debug_human_id is None:
262
+ print(f"\r{total_items}/{total_items} (100.00%)")
313
263
 
314
264
  if __name__ == "__main__":
315
265
  session = create_session("boss")
266
+ human_id_to_debug = 117076
316
267
 
317
268
  # Get all org_id present in the Organization table
318
269
  org_ids = session.query(Organization.id).all()
319
270
  org_ids = [org_id[0] for org_id in org_ids]
320
271
 
321
- # for org_id in org_ids:
322
- # division_ids = get_all_division_ids_for_org(session, org_id)
323
- # print(f"Aggregating skater stats for {len(division_ids)} divisions in org_id {org_id}...")
324
- # total_divisions = len(division_ids)
325
- # processed_divisions = 0
326
- # for division_id in division_ids:
327
- # aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, filter_human_id=None)
328
- # aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, filter_human_id=None, aggregation_window='Weekly')
329
- # aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, filter_human_id=None, aggregation_window='Daily')
330
- # processed_divisions += 1
331
- # print(f"\rProcessed {processed_divisions}/{total_divisions} divisions ({(processed_divisions/total_divisions)*100:.2f}%)", end="")
272
+ for org_id in org_ids:
273
+ division_ids = get_all_division_ids_for_org(session, org_id)
274
+ print(f"Aggregating skater stats for {len(division_ids)} divisions in org_id {org_id}...")
275
+ total_divisions = len(division_ids)
276
+ processed_divisions = 0
277
+ for division_id in division_ids:
278
+ aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug)
279
+ aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Weekly')
280
+ aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Daily')
281
+ processed_divisions += 1
282
+ if human_id_to_debug is None:
283
+ print(f"\rProcessed {processed_divisions}/{total_divisions} divisions ({(processed_divisions/total_divisions)*100:.2f}%)", end="")
332
284
 
333
- # aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, filter_human_id=None)
334
- # aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, filter_human_id=None, aggregation_window='Weekly')
335
- # aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, filter_human_id=None, aggregation_window='Daily')
285
+ aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug)
286
+ aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Weekly')
287
+ aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Daily')
336
288
 
337
289
  # Aggregate by level
338
290
  level_ids = session.query(Division.level_id).distinct().all()
@@ -342,7 +294,7 @@ if __name__ == "__main__":
342
294
  for level_id in level_ids:
343
295
  if level_id is None:
344
296
  continue
345
- print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
297
+ if human_id_to_debug is None:
298
+ print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
346
299
  processed_levels += 1
347
- aggregate_skater_stats(session, aggregation_type='level', aggregation_id=level_id, names_to_filter_out=not_human_names, filter_human_id=None)
348
- print("\nDone.")
300
+ aggregate_skater_stats(session, aggregation_type='level', aggregation_id=level_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug)
@@ -0,0 +1,46 @@
1
+ import sys, os
2
+
3
+ # Add the package directory to the Python path
4
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
5
+
6
+ from hockey_blast_common_lib.models import Human, LevelStatsSkater
7
+ from hockey_blast_common_lib.db_connection import create_session
8
+ from sqlalchemy.sql import func
9
+
10
+ def calculate_skater_skill_value(human_id, level_stats):
11
+ max_skill_value = 0
12
+
13
+ for stat in level_stats:
14
+ level_skill_value = stat.level.skill_value
15
+ if level_skill_value < 0:
16
+ continue
17
+ ppg_ratio = stat.points_per_game_rank / stat.total_in_rank
18
+ games_played_ratio = stat.games_played_rank / stat.total_in_rank
19
+
20
+ # Take the maximum of the two ratios
21
+ skill_value = level_skill_value * max(ppg_ratio, games_played_ratio)
22
+ max_skill_value = max(max_skill_value, skill_value)
23
+
24
+ return max_skill_value
25
+
26
+ def assign_skater_skill_values():
27
+ session = create_session("boss")
28
+
29
+ humans = session.query(Human).all()
30
+ total_humans = len(humans)
31
+ processed_humans = 0
32
+
33
+ for human in humans:
34
+ level_stats = session.query(LevelStatsSkater).filter(LevelStatsSkater.human_id == human.id).all()
35
+ if level_stats:
36
+ skater_skill_value = calculate_skater_skill_value(human.id, level_stats)
37
+ human.skater_skill_value = skater_skill_value
38
+ session.commit()
39
+
40
+ processed_humans += 1
41
+ print(f"\rProcessed {processed_humans}/{total_humans} humans ({(processed_humans/total_humans)*100:.2f}%)", end="")
42
+
43
+ print("\nSkater skill values have been assigned to all humans.")
44
+
45
+ if __name__ == "__main__":
46
+ assign_skater_skill_values()
@@ -4,7 +4,7 @@ MAX_HUMAN_SEARCH_RESULTS = 25
4
4
  MAX_TEAM_SEARCH_RESULTS = 25
5
5
  MIN_GAMES_FOR_ORG_STATS = 1
6
6
  MIN_GAMES_FOR_DIVISION_STATS = 1
7
- MIN_GAMES_FOR_LEVEL_STATS = 10
7
+ MIN_GAMES_FOR_LEVEL_STATS = 20
8
8
 
9
9
  orgs = {'caha', 'sharksice', 'tvice'}
10
10
 
@@ -47,55 +47,6 @@ def analyze_levels(org):
47
47
 
48
48
  session.close()
49
49
 
50
- def reset_skill_values_in_divisions():
51
- session = create_session("boss")
52
-
53
- # Fetch all records from the Division table
54
- divisions = session.query(Division).all()
55
-
56
- for division in divisions:
57
- # Look up the Skill table using the level from Division
58
- div_level = division.level
59
- # Query to find the matching Skill
60
- level = session.query(Level).filter(Level.org_id == division.org_id, Level.level_name == div_level).one_or_none()
61
-
62
- if not level:
63
- # If no match found, check each alternative name individually
64
- skills = session.query(Level).filter(Level.org_id == division.org_id).all()
65
- for s in skills:
66
- alternative_names = s.level_alternative_name.split(',')
67
- if div_level in alternative_names:
68
- level = s
69
- break
70
-
71
- if level:
72
- # Assign the skill_value and set skill_propagation_sequence to 0
73
- division.level_id = level.id
74
- if level.is_seed:
75
- level.skill_propagation_sequence = 0
76
- else:
77
- level.skill_propagation_sequence = -1
78
- level.skill_value = -1
79
- else:
80
- # Add new Skill with values previously used for division
81
- new_level = Level(
82
- org_id=division.org_id,
83
- skill_value=-1,
84
- level_name=division.level,
85
- level_alternative_name='',
86
- is_seed=False,
87
- skill_propagation_sequence=-1
88
- )
89
- session.add(new_level)
90
- session.commit()
91
- division.skill_id = new_level.id
92
- print(f"Created new Level for Division {division.level}")
93
-
94
- # Commit the changes to the Division
95
- session.commit()
96
-
97
- print("Level values and propagation sequences have been populated into the Division table.")
98
-
99
50
  def fill_seed_skills():
100
51
  session = create_session("boss")
101
52
 
@@ -0,0 +1,368 @@
1
+ import sys
2
+ import os
3
+ from collections import defaultdict
4
+ import numpy as np
5
+
6
+ # Add the project root directory to the Python path
7
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
+
9
+ from hockey_blast_common_lib.models import Level, Division
10
+ from hockey_blast_common_lib.stats_models import LevelsGraphEdge, LevelStatsSkater, SkillValuePPGRatio
11
+ from hockey_blast_common_lib.db_connection import create_session
12
+ from sqlalchemy import func
13
+
14
+ import numpy as np
15
+
16
+ class Config:
17
+ MIN_GAMES_PLAYED_FOR_EDGE = 10
18
+ MIN_PPG_FOR_EDGE = 0.5
19
+ MIN_HUMANS_FOR_EDGE = 2
20
+ MAX_PROPAGATION_SEQUENCE = 4
21
+ MIN_CONNECTIONS_FOR_CORRELATION = 20
22
+ MIN_CONNECTIONS_FOR_PROPAGATION = 5
23
+ MAX_SKILL_DIFF_IN_EDGE = 30
24
+
25
+ @staticmethod
26
+ def discard_outliers(data, m=2):
27
+ """
28
+ Discard outliers from the data using the modified Z-score method.
29
+ :param data: List of data points
30
+ :param m: Threshold for the modified Z-score
31
+ :return: List of data points with outliers removed
32
+ """
33
+ if len(data) == 0:
34
+ return data
35
+ median = np.median(data)
36
+ diff = np.abs(data - median)
37
+ med_abs_deviation = np.median(diff)
38
+ if med_abs_deviation == 0:
39
+ return data
40
+ modified_z_score = 0.6745 * diff / med_abs_deviation
41
+ return data[modified_z_score < m]
42
+
43
+ def reset_skill_values_in_divisions():
44
+ session = create_session("boss")
45
+
46
+ # Fetch all records from the Division table
47
+ divisions = session.query(Division).all()
48
+
49
+ for division in divisions:
50
+ # Look up the Skill table using the level from Division
51
+ div_level = division.level
52
+ # Query to find the matching Skill
53
+ level = session.query(Level).filter(Level.org_id == division.org_id, Level.level_name == div_level).one_or_none()
54
+
55
+ if not level:
56
+ # If no match found, check each alternative name individually
57
+ skills = session.query(Level).filter(Level.org_id == division.org_id).all()
58
+ for s in skills:
59
+ alternative_names = s.level_alternative_name.split(',')
60
+ if div_level in alternative_names:
61
+ level = s
62
+ break
63
+
64
+ if level:
65
+ # Assign the skill_value and set skill_propagation_sequence to 0
66
+ division.level_id = level.id
67
+ if level.is_seed:
68
+ level.skill_propagation_sequence = 0
69
+ else:
70
+ level.skill_propagation_sequence = -1
71
+ level.skill_value = -1
72
+ else:
73
+ # Add new Skill with values previously used for division
74
+ new_level = Level(
75
+ org_id=division.org_id,
76
+ skill_value=-1,
77
+ level_name=division.level,
78
+ level_alternative_name='',
79
+ is_seed=False,
80
+ skill_propagation_sequence=-1
81
+ )
82
+ session.add(new_level)
83
+ session.commit()
84
+ division.skill_id = new_level.id
85
+ print(f"Created new Level for Division {division.level}")
86
+
87
+ # Commit the changes to the Division
88
+ session.commit()
89
+
90
+ print("Level values and propagation sequences have been populated into the Division table.")
91
+
92
+ def build_levels_graph_edges():
93
+ # Creates unique edges from levelA to levelB (there is no reverse edge levelB to levelA)
94
+ session = create_session("boss")
95
+
96
+ # Delete all existing edges
97
+ session.query(LevelsGraphEdge).delete()
98
+ session.commit()
99
+
100
+ # Query to get all level stats
101
+ level_stats = session.query(LevelStatsSkater).all()
102
+
103
+ # Dictionary to store stats by level and human
104
+ level_human_stats = defaultdict(lambda: defaultdict(dict))
105
+
106
+ for stat in level_stats:
107
+ if stat.games_played >= Config.MIN_GAMES_PLAYED_FOR_EDGE and stat.points_per_game >= Config.MIN_PPG_FOR_EDGE:
108
+ level_human_stats[stat.level_id][stat.human_id] = {
109
+ 'games_played': stat.games_played,
110
+ 'points_per_game': stat.points_per_game
111
+ }
112
+
113
+ # Dictionary to store edges
114
+ edges = {}
115
+
116
+ # Build edges
117
+ total_levels = len(level_human_stats)
118
+ processed_levels = 0
119
+ for from_level_id, from_humans in level_human_stats.items():
120
+ from_level = session.query(Level).filter_by(id=from_level_id).first()
121
+ for to_level_id, to_humans in level_human_stats.items():
122
+ to_level = session.query(Level).filter_by(id=to_level_id).first()
123
+ if from_level.id >= to_level.id:
124
+ continue
125
+
126
+ common_humans = set(from_humans.keys()) & set(to_humans.keys())
127
+ n_connections = len(common_humans)
128
+ n_games = 0
129
+
130
+ if n_connections < Config.MIN_HUMANS_FOR_EDGE:
131
+ continue
132
+
133
+ ppg_ratios = []
134
+ # if from_level.id == 223 and to_level.id == 219: #216
135
+ # print(f"Debug: From Level ID: {from_level.id}, To Level ID: {to_level.id}")
136
+ for human_id in common_humans:
137
+ from_ppg = from_humans[human_id]['points_per_game']
138
+ to_ppg = to_humans[human_id]['points_per_game']
139
+ from_games = from_humans[human_id]['games_played']
140
+ to_games = to_humans[human_id]['games_played']
141
+ min_games = min(from_games, to_games)
142
+ n_games += min_games
143
+
144
+ # if from_level.id == 223 and to_level.id == 219: #216
145
+ # print(f"Human {human_id} From PPG: {from_ppg}, To PPG: {to_ppg}, Min Games: {min_games} n_games: {n_games}")
146
+
147
+ if from_ppg > 0 and to_ppg > 0:
148
+ ppg_ratios.append(to_ppg / from_ppg)
149
+
150
+ if not ppg_ratios:
151
+ continue
152
+
153
+ # Discard outliers
154
+ ppg_ratios = Config.discard_outliers(np.array(ppg_ratios))
155
+
156
+ if len(ppg_ratios) == 0:
157
+ continue
158
+
159
+ avg_ppg_ratio = float(sum(ppg_ratios) / len(ppg_ratios))
160
+
161
+ # if sorted([from_level.id, to_level.id]) == [219, 223]:
162
+ # print(f"From {from_level_id} to {to_level_id} n_connections {n_connections} n_games: {n_games}")
163
+
164
+ edge = LevelsGraphEdge(
165
+ from_level_id=from_level_id,
166
+ to_level_id=to_level_id,
167
+ n_connections=n_connections,
168
+ ppg_ratio=avg_ppg_ratio,
169
+ n_games=n_games # Store the number of games
170
+ )
171
+ edges[(from_level_id, to_level_id)] = edge
172
+
173
+ processed_levels += 1
174
+ print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
175
+
176
+ # Insert edges into the database
177
+ for edge in edges.values():
178
+ session.add(edge)
179
+ session.commit()
180
+
181
+ print("\nLevels graph edges have been populated into the database.")
182
+
183
+ def propagate_skill_levels(propagation_sequence):
184
+ min_skill_value = float('inf')
185
+ max_skill_value = float('-inf')
186
+
187
+ session = create_session("boss")
188
+
189
+ if propagation_sequence == 0:
190
+ # Delete all existing correlation data
191
+ session.query(SkillValuePPGRatio).delete()
192
+ session.commit()
193
+
194
+ # Build and save the correlation data
195
+ levels = session.query(Level).filter(Level.skill_propagation_sequence == 0).all()
196
+ level_ids = {level.id for level in levels}
197
+ correlation_data = defaultdict(list)
198
+
199
+ for level in levels:
200
+ if level.skill_value == -1:
201
+ continue
202
+
203
+ edges = session.query(LevelsGraphEdge).filter(
204
+ (LevelsGraphEdge.from_level_id == level.id) |
205
+ (LevelsGraphEdge.to_level_id == level.id)
206
+ ).all()
207
+
208
+ for edge in edges:
209
+ if edge.n_connections < Config.MIN_CONNECTIONS_FOR_CORRELATION:
210
+ continue
211
+
212
+ if edge.from_level_id == level.id:
213
+ target_level_id = edge.to_level_id
214
+ ppg_ratio_edge = edge.ppg_ratio
215
+ else:
216
+ # We go over same edge twice in this logic, let's skip the reverse edge
217
+ continue
218
+
219
+ if target_level_id not in level_ids:
220
+ continue
221
+
222
+ target_level = session.query(Level).filter_by(id=target_level_id).first()
223
+ if target_level:
224
+ skill_value_from = level.skill_value
225
+ skill_value_to = target_level.skill_value
226
+
227
+ # Same skill value - no correlation
228
+ if skill_value_from == skill_value_to:
229
+ continue
230
+
231
+
232
+ # Since we go over all levels in the sequence 0, we will see each edge twice
233
+ # This condition eliminates duplicates
234
+ if abs(skill_value_from - skill_value_to) > Config.MAX_SKILL_DIFF_IN_EDGE:
235
+ continue
236
+
237
+ # Debug prints
238
+ # print(f"From Skill {level.skill_value} to {target_level.skill_value} ratio: {ppg_ratio}")
239
+
240
+ # Ensure INCREASING SKILL VALUES for the correlation data!
241
+ if skill_value_from > skill_value_to:
242
+ skill_value_from, skill_value_to = skill_value_to, skill_value_from
243
+ ppg_ratio_edge = 1 / ppg_ratio_edge
244
+
245
+ correlation_data[(skill_value_from, skill_value_to)].append(
246
+ (ppg_ratio_edge, edge.n_games)
247
+ )
248
+
249
+ # Save correlation data to the database
250
+ for (skill_value_from, skill_value_to), ppg_ratios in correlation_data.items():
251
+ ppg_ratios = [(ppg_ratio, n_games) for ppg_ratio, n_games in ppg_ratios]
252
+ ppg_ratios_array = np.array(ppg_ratios, dtype=[('ppg_ratio', float), ('n_games', int)])
253
+ ppg_ratios_filtered = Config.discard_outliers(ppg_ratios_array['ppg_ratio'])
254
+ if len(ppg_ratios_filtered) > 0:
255
+ avg_ppg_ratio = float(sum(ppg_ratio * n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered) / sum(n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered))
256
+ total_n_games = sum(n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered)
257
+ correlation = SkillValuePPGRatio(
258
+ from_skill_value=skill_value_from,
259
+ to_skill_value=skill_value_to,
260
+ ppg_ratio=avg_ppg_ratio,
261
+ n_games=total_n_games # Store the sum of games
262
+ )
263
+ session.add(correlation)
264
+ session.commit()
265
+ # Update min and max skill values
266
+ min_skill_value = min(min_skill_value, skill_value_from, skill_value_to)
267
+ max_skill_value = max(max_skill_value, skill_value_from, skill_value_to)
268
+
269
+ # Propagate skill levels
270
+ levels = session.query(Level).filter(Level.skill_propagation_sequence == propagation_sequence).all()
271
+ suggested_skill_values = defaultdict(list)
272
+
273
+ for level in levels:
274
+ edges = session.query(LevelsGraphEdge).filter(
275
+ (LevelsGraphEdge.from_level_id == level.id) |
276
+ (LevelsGraphEdge.to_level_id == level.id)
277
+ ).all()
278
+
279
+ for edge in edges:
280
+ if edge.n_connections < Config.MIN_CONNECTIONS_FOR_PROPAGATION:
281
+ continue
282
+
283
+ if edge.from_level_id == level.id:
284
+ target_level_id = edge.to_level_id
285
+ ppg_ratio_edge = edge.ppg_ratio
286
+ else:
287
+ target_level_id = edge.from_level_id
288
+ ppg_ratio_edge = 1 / edge.ppg_ratio
289
+
290
+ target_level = session.query(Level).filter_by(id=target_level_id).first()
291
+ if target_level and target_level.skill_propagation_sequence == -1:
292
+ correlations = session.query(SkillValuePPGRatio).filter(
293
+ (SkillValuePPGRatio.from_skill_value <= level.skill_value) &
294
+ (SkillValuePPGRatio.to_skill_value >= level.skill_value)
295
+ ).all()
296
+
297
+ if correlations:
298
+ weighted_skill_values = []
299
+ for correlation in correlations:
300
+ # Skill value always increases in the correlation data
301
+ # Let's avoid extrapolating from the end of the edge and away from the edge!
302
+
303
+ # Check left side of the edge
304
+ if (level.skill_value == correlation.from_skill_value and level.skill_value > min_skill_value):
305
+ if ppg_ratio_edge < 1:
306
+ continue
307
+ # Check right side of the edge
308
+ if (level.skill_value == correlation.to_skill_value and level.skill_value < max_skill_value):
309
+ if ppg_ratio_edge > 1:
310
+ continue
311
+
312
+
313
+ # First confirm which way are we going here
314
+ if (ppg_ratio_edge < 1 and correlation.ppg_ratio > 1) or (ppg_ratio_edge > 1 and correlation.ppg_ratio < 1):
315
+ # Reverse the correlation
316
+ from_skill_value=correlation.to_skill_value
317
+ to_skill_value=correlation.from_skill_value
318
+ ppg_ratio_range = 1 / correlation.ppg_ratio
319
+ else:
320
+ from_skill_value=correlation.from_skill_value
321
+ to_skill_value=correlation.to_skill_value
322
+ ppg_ratio_range = correlation.ppg_ratio
323
+
324
+ # Now both ratios are either < 1 or > 1
325
+ if ppg_ratio_edge < 1:
326
+ ppg_ratio_for_extrapolation = 1 / ppg_ratio_edge
327
+ ppg_ratio_range = 1 / ppg_ratio_range
328
+ else:
329
+ ppg_ratio_for_extrapolation = ppg_ratio_edge
330
+
331
+ # Interpolate or extrapolate skill value
332
+ skill_value_range = to_skill_value - from_skill_value
333
+ skill_value_diff = (ppg_ratio_for_extrapolation / ppg_ratio_range) * skill_value_range
334
+ new_skill_value = level.skill_value + skill_value_diff
335
+ weighted_skill_values.append((new_skill_value, correlation.n_games))
336
+ # if target_level.id == 229:
337
+ # print(f"Debug: From Level ID: {level.id}, To Level ID: {target_level.id}")
338
+ # print(f"Debug: From Skill Value: {level.skill_value} PPG Ratio: {ppg_ratio_for_extrapolation}, PPG Ratio Range: {ppg_ratio_range}")
339
+ # print(f"Debug: Skill Value Range: {skill_value_range}, Skill Value Diff: {skill_value_diff}")
340
+ # print(f"Debug: New Skill Value: {new_skill_value}")
341
+
342
+ # Calculate weighted average of new skill values
343
+ total_n_games = sum(n_games for _, n_games in weighted_skill_values)
344
+ weighted_avg_skill_value = sum(skill_value * n_games for skill_value, n_games in weighted_skill_values) / total_n_games
345
+ suggested_skill_values[target_level_id].append(weighted_avg_skill_value)
346
+
347
+ # Update skill values for target levels
348
+ for target_level_id, skill_values in suggested_skill_values.items():
349
+ skill_values = Config.discard_outliers(np.array(skill_values))
350
+ if len(skill_values) > 0:
351
+ avg_skill_value = float(sum(skill_values) / len(skill_values))
352
+ avg_skill_value = max(avg_skill_value, 9.6)
353
+ if avg_skill_value < min_skill_value:
354
+ avg_skill_value = min_skill_value - 0.01
355
+ session.query(Level).filter_by(id=target_level_id).update({
356
+ 'skill_value': avg_skill_value,
357
+ 'skill_propagation_sequence': propagation_sequence + 1
358
+ })
359
+ session.commit()
360
+
361
+ print(f"Skill levels have been propagated for sequence {propagation_sequence}.")
362
+
363
+ if __name__ == "__main__":
364
+ reset_skill_values_in_divisions()
365
+ build_levels_graph_edges()
366
+
367
+ for sequence in range(Config.MAX_PROPAGATION_SEQUENCE + 1):
368
+ propagate_skill_levels(sequence)
@@ -583,6 +583,7 @@ class LevelsGraphEdge(db.Model):
583
583
  to_level_id = db.Column(db.Integer, db.ForeignKey('levels.id'), nullable=False)
584
584
  n_connections = db.Column(db.Integer, nullable=False)
585
585
  ppg_ratio = db.Column(db.Float, nullable=False)
586
+ n_games = db.Column(db.Integer, nullable=False) # New field to store the number of games
586
587
 
587
588
  __table_args__ = (
588
589
  db.UniqueConstraint('from_level_id', 'to_level_id', name='_from_to_level_uc'),
@@ -598,3 +599,16 @@ class SkillPropagationCorrelation(db.Model):
598
599
  __table_args__ = (
599
600
  db.UniqueConstraint('skill_value_from', 'skill_value_to', 'ppg_ratio', name='_skill_value_ppg_ratio_uc'),
600
601
  )
602
+
603
+ # How PPG changes with INCREASING SKILL VALUES
604
+ class SkillValuePPGRatio(db.Model):
605
+ __tablename__ = 'skill_value_ppg_ratios'
606
+ id = db.Column(db.Integer, primary_key=True)
607
+ from_skill_value = db.Column(db.Float, nullable=False)
608
+ to_skill_value = db.Column(db.Float, nullable=False)
609
+ ppg_ratio = db.Column(db.Float, nullable=False)
610
+ n_games = db.Column(db.Integer, nullable=False) # New field to store the sum of games
611
+
612
+ __table_args__ = (
613
+ db.UniqueConstraint('from_skill_value', 'to_skill_value', name='_from_to_skill_value_uc'),
614
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hockey-blast-common-lib
3
- Version: 0.1.29
3
+ Version: 0.1.31
4
4
  Summary: Common library for shared functionality and DB models
5
5
  Author: Pavel Kletskov
6
6
  Author-email: kletskov@gmail.com
@@ -6,6 +6,7 @@ hockey_blast_common_lib/aggregate_goalie_stats.py
6
6
  hockey_blast_common_lib/aggregate_human_stats.py
7
7
  hockey_blast_common_lib/aggregate_referee_stats.py
8
8
  hockey_blast_common_lib/aggregate_skater_stats.py
9
+ hockey_blast_common_lib/assign_skater_skill.py
9
10
  hockey_blast_common_lib/db_connection.py
10
11
  hockey_blast_common_lib/dump_sample_db.sh
11
12
  hockey_blast_common_lib/hockey_blast_sample_backup.sql.gz
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='hockey-blast-common-lib', # The name of your package
5
- version='0.1.29',
5
+ version='0.1.31',
6
6
  description='Common library for shared functionality and DB models',
7
7
  author='Pavel Kletskov',
8
8
  author_email='kletskov@gmail.com',
@@ -1,251 +0,0 @@
1
- import sys
2
- import os
3
- from collections import defaultdict
4
- import numpy as np
5
-
6
- # Add the project root directory to the Python path
7
- sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
8
-
9
- from hockey_blast_common_lib.models import Level, Season
10
- from hockey_blast_common_lib.stats_models import LevelsGraphEdge, LevelStatsSkater, SkillPropagationCorrelation
11
- from hockey_blast_common_lib.db_connection import create_session
12
- from sqlalchemy import func
13
-
14
- import numpy as np
15
-
16
- class Config:
17
- MIN_GAMES_PLAYED = 10
18
- MIN_PPG = 0.3
19
- MIN_HUMANS_FOR_EDGE = 5
20
- MAX_START_DATE_DIFF_MONTHS = 15
21
- MAX_PROPAGATION_SEQUENCE = 0
22
- MIN_CONNECTIONS_FOR_CORRELATION = 40
23
- MIN_CONNECTIONS_FOR_PROPAGATION = 3
24
-
25
- @staticmethod
26
- def discard_outliers(data, m=2):
27
- """
28
- Discard outliers from the data using the modified Z-score method.
29
- :param data: List of data points
30
- :param m: Threshold for the modified Z-score
31
- :return: List of data points with outliers removed
32
- """
33
- if len(data) == 0:
34
- return data
35
- median = np.median(data)
36
- diff = np.abs(data - median)
37
- med_abs_deviation = np.median(diff)
38
- if med_abs_deviation == 0:
39
- return data
40
- modified_z_score = 0.6745 * diff / med_abs_deviation
41
- return data[modified_z_score < m]
42
-
43
-
44
- def build_levels_graph_edges():
45
- session = create_session("boss")
46
-
47
- # Delete all existing edges
48
- session.query(LevelsGraphEdge).delete()
49
- session.commit()
50
-
51
- # Query to get all level stats
52
- level_stats = session.query(LevelStatsSkater).all()
53
-
54
- # Dictionary to store stats by level and human
55
- level_human_stats = defaultdict(lambda: defaultdict(dict))
56
-
57
- for stat in level_stats:
58
- if stat.games_played >= Config.MIN_GAMES_PLAYED and stat.points_per_game >= Config.MIN_PPG:
59
- level_human_stats[stat.aggregation_id][stat.human_id] = {
60
- 'games_played': stat.games_played,
61
- 'points_per_game': stat.points_per_game
62
- }
63
-
64
- # Dictionary to store edges
65
- edges = {}
66
-
67
- # Build edges
68
- total_levels = len(level_human_stats)
69
- processed_levels = 0
70
- for from_level_id, from_humans in level_human_stats.items():
71
- from_level = session.query(Level).filter_by(id=from_level_id).first()
72
- from_season = session.query(Season).filter_by(id=from_level.season_id).first()
73
- for to_level_id, to_humans in level_human_stats.items():
74
- to_level = session.query(Level).filter_by(id=to_level_id).first()
75
- to_season = session.query(Season).filter_by(id=to_level.season_id).first()
76
-
77
- if from_level.skill_value >= to_level.skill_value:
78
- continue
79
-
80
- # TMP DEBUG HACK
81
- if from_level.skill_value != 10 and to_level.skill_value != 30:
82
- continue
83
-
84
- # Check if the start dates are within the allowed difference
85
- if abs((from_season.start_date - to_season.start_date).days) > Config.MAX_START_DATE_DIFF_MONTHS * 30:
86
- continue
87
-
88
- common_humans = set(from_humans.keys()) & set(to_humans.keys())
89
- n_connections = len(common_humans)
90
-
91
- if n_connections < Config.MIN_HUMANS_FOR_EDGE:
92
- continue
93
-
94
- ppg_ratios = []
95
- for human_id in common_humans:
96
- from_ppg = from_humans[human_id]['points_per_game']
97
- to_ppg = to_humans[human_id]['points_per_game']
98
- if from_level.skill_value == 10 and to_level.skill_value == 30:
99
- print(f"Human {human_id} From PPG: {from_ppg}, To PPG: {to_ppg}")
100
- if from_ppg > 0 and to_ppg > 0:
101
- ppg_ratios.append(to_ppg / from_ppg)
102
-
103
- if not ppg_ratios:
104
- continue
105
-
106
- # Discard outliers
107
- ppg_ratios = Config.discard_outliers(np.array(ppg_ratios))
108
-
109
- if len(ppg_ratios) == 0:
110
- continue
111
-
112
- avg_ppg_ratio = float(sum(ppg_ratios) / len(ppg_ratios))
113
- if avg_ppg_ratio < 1.0:
114
- avg_ppg_ratio = 1 / avg_ppg_ratio
115
- from_level_id, to_level_id = to_level_id, from_level_id
116
-
117
- edge = LevelsGraphEdge(
118
- from_level_id=from_level_id,
119
- to_level_id=to_level_id,
120
- n_connections=n_connections,
121
- ppg_ratio=avg_ppg_ratio
122
- )
123
- edges[(from_level_id, to_level_id)] = edge
124
-
125
- processed_levels += 1
126
- print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
127
-
128
- # Insert edges into the database
129
- for edge in edges.values():
130
- session.add(edge)
131
- session.commit()
132
-
133
- print("\nLevels graph edges have been populated into the database.")
134
-
135
- def propagate_skill_levels(propagation_sequence):
136
- session = create_session("boss")
137
-
138
- if propagation_sequence == 0:
139
- # Delete all existing correlation data
140
- session.query(SkillPropagationCorrelation).delete()
141
- session.commit()
142
-
143
- # Build and save the correlation data
144
- levels = session.query(Level).filter(Level.skill_propagation_sequence == 0).all()
145
- level_ids = {level.id for level in levels}
146
- correlation_data = defaultdict(list)
147
-
148
- for level in levels:
149
- if level.skill_value == -1:
150
- continue
151
-
152
- edges = session.query(LevelsGraphEdge).filter(
153
- (LevelsGraphEdge.from_level_id == level.id) |
154
- (LevelsGraphEdge.to_level_id == level.id)
155
- ).all()
156
-
157
- for edge in edges:
158
- if edge.n_connections < Config.MIN_CONNECTIONS_FOR_CORRELATION:
159
- continue
160
-
161
- if edge.from_level_id == level.id:
162
- target_level_id = edge.to_level_id
163
- ppg_ratio = edge.ppg_ratio
164
- else:
165
- target_level_id = edge.from_level_id
166
- ppg_ratio = 1 / edge.ppg_ratio
167
-
168
- if target_level_id not in level_ids:
169
- continue
170
-
171
- target_level = session.query(Level).filter_by(id=target_level_id).first()
172
- if target_level:
173
- skill_value_from = level.skill_value
174
- skill_value_to = target_level.skill_value
175
-
176
- # Since we go over all levels in the sequence 0, we will see each edge twice
177
- # This condition eliminates duplicates
178
- if skill_value_from >= skill_value_to:
179
- continue
180
-
181
- # Debug prints
182
- print(f"From Skill {level.skill_value} to {target_level.skill_value} ratio: {ppg_ratio}")
183
-
184
- correlation_data[(skill_value_from, skill_value_to)].append(
185
- ppg_ratio
186
- )
187
-
188
- # Save correlation data to the database
189
- for (skill_value_from, skill_value_to), ppg_ratios in correlation_data.items():
190
- ppg_ratios = Config.discard_outliers(np.array(ppg_ratios))
191
- if len(ppg_ratios) > 0:
192
- avg_ppg_ratio = float(sum(ppg_ratios) / len(ppg_ratios))
193
- correlation = SkillPropagationCorrelation(
194
- skill_value_from=skill_value_from,
195
- skill_value_to=skill_value_to,
196
- ppg_ratio=avg_ppg_ratio
197
- )
198
- session.add(correlation)
199
- session.commit()
200
-
201
- return
202
- # Propagate skill levels
203
- levels = session.query(Level).filter(Level.skill_propagation_sequence == propagation_sequence).all()
204
- suggested_skill_values = defaultdict(list)
205
-
206
- for level in levels:
207
- edges = session.query(LevelsGraphEdge).filter(
208
- (LevelsGraphEdge.from_level_id == level.id) |
209
- (LevelsGraphEdge.to_level_id == level.id)
210
- ).all()
211
-
212
- for edge in edges:
213
- if edge.n_connections < Config.MIN_CONNECTIONS_FOR_PROPAGATION:
214
- continue
215
-
216
- if edge.from_level_id == level.id:
217
- target_level_id = edge.to_level_id
218
- ppg_ratio = edge.ppg_ratio
219
- else:
220
- target_level_id = edge.from_level_id
221
- ppg_ratio = 1 / edge.ppg_ratio
222
-
223
- target_level = session.query(Level).filter_by(id=target_level_id).first()
224
- if target_level and target_level.skill_propagation_sequence == -1:
225
- correlation = session.query(SkillPropagationCorrelation).filter_by(
226
- skill_value_from=min(level.skill_value, target_level.skill_value),
227
- skill_value_to=max(level.skill_value, target_level.skill_value),
228
- ppg_ratio=ppg_ratio if level.skill_value < target_level.skill_value else 1 / ppg_ratio
229
- ).first()
230
-
231
- if correlation:
232
- suggested_skill_values[target_level_id].append(correlation.skill_value_to)
233
-
234
- # Update skill values for target levels
235
- for target_level_id, skill_values in suggested_skill_values.items():
236
- skill_values = Config.discard_outliers(np.array(skill_values))
237
- if len(skill_values) > 0:
238
- avg_skill_value = float(sum(skill_values) / len(skill_values))
239
- session.query(Level).filter_by(id=target_level_id).update({
240
- 'skill_value': avg_skill_value,
241
- 'skill_propagation_sequence': propagation_sequence + 1
242
- })
243
- session.commit()
244
-
245
- print(f"Skill levels have been propagated for sequence {propagation_sequence}.")
246
-
247
- if __name__ == "__main__":
248
- build_levels_graph_edges()
249
-
250
- for sequence in range(Config.MAX_PROPAGATION_SEQUENCE + 1):
251
- propagate_skill_levels(sequence)