hockey-blast-common-lib 0.1.29__tar.gz → 0.1.32__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/PKG-INFO +1 -1
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/aggregate_skater_stats.py +66 -114
- hockey_blast_common_lib-0.1.32/hockey_blast_common_lib/assign_skater_skill.py +48 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/hockey_blast_sample_backup.sql.gz +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/models.py +1 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/options.py +1 -1
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/skills_in_divisions.py +0 -50
- hockey_blast_common_lib-0.1.32/hockey_blast_common_lib/skills_propagation.py +368 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/stats_models.py +14 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib.egg-info/PKG-INFO +1 -1
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib.egg-info/SOURCES.txt +1 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/setup.py +1 -1
- hockey_blast_common_lib-0.1.29/hockey_blast_common_lib/skills_propagation.py +0 -251
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/MANIFEST.in +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/README.md +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/__init__.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/aggregate_goalie_stats.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/aggregate_human_stats.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/aggregate_referee_stats.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/db_connection.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/dump_sample_db.sh +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/restore_sample_db.sh +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/utils.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/wsgi.py +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib.egg-info/dependency_links.txt +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib.egg-info/requires.txt +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib.egg-info/top_level.txt +0 -0
- {hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/setup.cfg +0 -0
@@ -6,17 +6,28 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
6
6
|
from datetime import datetime, timedelta
|
7
7
|
import sqlalchemy
|
8
8
|
|
9
|
-
from hockey_blast_common_lib.models import Game, Goal, Penalty, GameRoster, Organization, Division
|
9
|
+
from hockey_blast_common_lib.models import Game, Goal, Penalty, GameRoster, Organization, Division, Human, Level
|
10
10
|
from hockey_blast_common_lib.stats_models import OrgStatsSkater, DivisionStatsSkater, OrgStatsWeeklySkater, OrgStatsDailySkater, DivisionStatsWeeklySkater, DivisionStatsDailySkater, LevelStatsSkater
|
11
11
|
from hockey_blast_common_lib.db_connection import create_session
|
12
12
|
from sqlalchemy.sql import func, case
|
13
13
|
from hockey_blast_common_lib.options import not_human_names, parse_args, MIN_GAMES_FOR_ORG_STATS, MIN_GAMES_FOR_DIVISION_STATS, MIN_GAMES_FOR_LEVEL_STATS
|
14
14
|
from hockey_blast_common_lib.utils import get_org_id_from_alias, get_human_ids_by_names, get_division_ids_for_last_season_in_all_leagues, get_all_division_ids_for_org
|
15
15
|
from sqlalchemy import func, case, and_
|
16
|
+
from collections import defaultdict
|
16
17
|
|
17
|
-
def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_filter_out,
|
18
|
+
def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_filter_out, debug_human_id=None, aggregation_window=None):
|
18
19
|
human_ids_to_filter = get_human_ids_by_names(session, names_to_filter_out)
|
19
20
|
|
21
|
+
# Get the name of the aggregation, for debug purposes
|
22
|
+
if aggregation_type == 'org':
|
23
|
+
aggregation_name = session.query(Organization).filter(Organization.id == aggregation_id).first().organization_name
|
24
|
+
elif aggregation_type == 'division':
|
25
|
+
aggregation_name = session.query(Division).filter(Division.id == aggregation_id).first().level
|
26
|
+
elif aggregation_type == 'level':
|
27
|
+
aggregation_name = session.query(Level).filter(Level.id == aggregation_id).first().level_name
|
28
|
+
else:
|
29
|
+
aggregation_name = "Unknown"
|
30
|
+
|
20
31
|
if aggregation_type == 'org':
|
21
32
|
if aggregation_window == 'Daily':
|
22
33
|
StatsModel = OrgStatsDailySkater
|
@@ -39,6 +50,10 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
39
50
|
StatsModel = LevelStatsSkater
|
40
51
|
min_games = MIN_GAMES_FOR_LEVEL_STATS
|
41
52
|
filter_condition = Division.level_id == aggregation_id
|
53
|
+
# Add filter to only include games for the last 5 years
|
54
|
+
# five_years_ago = datetime.now() - timedelta(days=5*365)
|
55
|
+
# level_window_filter = func.cast(func.concat(Game.date, ' ', Game.time), sqlalchemy.types.TIMESTAMP) >= five_years_ago
|
56
|
+
# filter_condition = filter_condition & level_window_filter
|
42
57
|
else:
|
43
58
|
raise ValueError("Invalid aggregation type")
|
44
59
|
|
@@ -63,8 +78,8 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
63
78
|
|
64
79
|
# Filter for specific human_id if provided
|
65
80
|
human_filter = []
|
66
|
-
if
|
67
|
-
|
81
|
+
# if debug_human_id:
|
82
|
+
# human_filter = [GameRoster.human_id == debug_human_id]
|
68
83
|
|
69
84
|
# Aggregate games played for each human in each division, excluding goalies
|
70
85
|
games_played_stats = session.query(
|
@@ -72,7 +87,7 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
72
87
|
GameRoster.human_id,
|
73
88
|
func.count(Game.id).label('games_played'),
|
74
89
|
func.array_agg(Game.id).label('game_ids')
|
75
|
-
).join(GameRoster, Game.id == GameRoster.game_id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, GameRoster.human_id).all()
|
90
|
+
).join(GameRoster, Game.id == GameRoster.game_id).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, GameRoster.human_id).all()
|
76
91
|
|
77
92
|
# Aggregate goals for each human in each division, excluding goalies
|
78
93
|
goals_stats = session.query(
|
@@ -80,7 +95,7 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
80
95
|
Goal.goal_scorer_id.label('human_id'),
|
81
96
|
func.count(Goal.id).label('goals'),
|
82
97
|
func.array_agg(Goal.game_id).label('goal_game_ids')
|
83
|
-
).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.goal_scorer_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.goal_scorer_id).all()
|
98
|
+
).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.goal_scorer_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.goal_scorer_id).all()
|
84
99
|
|
85
100
|
# Aggregate assists for each human in each division, excluding goalies
|
86
101
|
assists_stats = session.query(
|
@@ -88,14 +103,14 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
88
103
|
Goal.assist_1_id.label('human_id'),
|
89
104
|
func.count(Goal.id).label('assists'),
|
90
105
|
func.array_agg(Goal.game_id).label('assist_game_ids')
|
91
|
-
).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_1_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_1_id).all()
|
106
|
+
).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_1_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_1_id).all()
|
92
107
|
|
93
108
|
assists_stats_2 = session.query(
|
94
109
|
Game.org_id,
|
95
110
|
Goal.assist_2_id.label('human_id'),
|
96
111
|
func.count(Goal.id).label('assists'),
|
97
112
|
func.array_agg(Goal.game_id).label('assist_2_game_ids')
|
98
|
-
).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_2_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_2_id).all()
|
113
|
+
).join(Game, Game.id == Goal.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Goal.assist_2_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Goal.assist_2_id).all()
|
99
114
|
|
100
115
|
# Aggregate penalties for each human in each division, excluding goalies
|
101
116
|
penalties_stats = session.query(
|
@@ -103,7 +118,7 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
103
118
|
Penalty.penalized_player_id.label('human_id'),
|
104
119
|
func.count(Penalty.id).label('penalties'),
|
105
120
|
func.array_agg(Penalty.game_id).label('penalty_game_ids')
|
106
|
-
).join(Game, Game.id == Penalty.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Penalty.penalized_player_id == GameRoster.human_id)).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Penalty.penalized_player_id).all()
|
121
|
+
).join(Game, Game.id == Penalty.game_id).join(GameRoster, and_(Game.id == GameRoster.game_id, Penalty.penalized_player_id == GameRoster.human_id)).join(Division, Game.division_id == Division.id).filter(filter_condition, ~GameRoster.role.ilike('g'), *human_filter).group_by(Game.org_id, Penalty.penalized_player_id).all()
|
107
122
|
|
108
123
|
# Combine the results
|
109
124
|
stats_dict = {}
|
@@ -111,6 +126,8 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
111
126
|
if stat.human_id in human_ids_to_filter:
|
112
127
|
continue
|
113
128
|
key = (aggregation_id, stat.human_id)
|
129
|
+
if stat.games_played < min_games:
|
130
|
+
continue
|
114
131
|
stats_dict[key] = {
|
115
132
|
'games_played': stat.games_played,
|
116
133
|
'goals': 0,
|
@@ -127,94 +144,26 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
127
144
|
}
|
128
145
|
|
129
146
|
for stat in goals_stats:
|
130
|
-
if stat.human_id in human_ids_to_filter:
|
131
|
-
continue
|
132
147
|
key = (aggregation_id, stat.human_id)
|
133
|
-
if key
|
134
|
-
stats_dict[key] = {
|
135
|
-
'games_played': 0,
|
136
|
-
'goals': stat.goals,
|
137
|
-
'assists': 0,
|
138
|
-
'penalties': 0,
|
139
|
-
'points': stat.goals, # Initialize points with goals
|
140
|
-
'goals_per_game': 0.0,
|
141
|
-
'points_per_game': 0.0,
|
142
|
-
'assists_per_game': 0.0,
|
143
|
-
'penalties_per_game': 0.0,
|
144
|
-
'game_ids': [],
|
145
|
-
'first_game_id': None,
|
146
|
-
'last_game_id': None
|
147
|
-
}
|
148
|
-
else:
|
148
|
+
if key in stats_dict:
|
149
149
|
stats_dict[key]['goals'] += stat.goals
|
150
150
|
stats_dict[key]['points'] += stat.goals # Update points
|
151
151
|
|
152
152
|
for stat in assists_stats:
|
153
|
-
if stat.human_id in human_ids_to_filter:
|
154
|
-
continue
|
155
153
|
key = (aggregation_id, stat.human_id)
|
156
|
-
if key
|
157
|
-
stats_dict[key] = {
|
158
|
-
'games_played': 0,
|
159
|
-
'goals': 0,
|
160
|
-
'assists': stat.assists,
|
161
|
-
'penalties': 0,
|
162
|
-
'points': stat.assists, # Initialize points with assists
|
163
|
-
'goals_per_game': 0.0,
|
164
|
-
'points_per_game': 0.0,
|
165
|
-
'assists_per_game': 0.0,
|
166
|
-
'penalties_per_game': 0.0,
|
167
|
-
'game_ids': [],
|
168
|
-
'first_game_id': None,
|
169
|
-
'last_game_id': None
|
170
|
-
}
|
171
|
-
else:
|
154
|
+
if key in stats_dict:
|
172
155
|
stats_dict[key]['assists'] += stat.assists
|
173
156
|
stats_dict[key]['points'] += stat.assists # Update points
|
174
157
|
|
175
158
|
for stat in assists_stats_2:
|
176
|
-
if stat.human_id in human_ids_to_filter:
|
177
|
-
continue
|
178
159
|
key = (aggregation_id, stat.human_id)
|
179
|
-
if key
|
180
|
-
stats_dict[key] = {
|
181
|
-
'games_played': 0,
|
182
|
-
'goals': 0,
|
183
|
-
'assists': stat.assists,
|
184
|
-
'penalties': 0,
|
185
|
-
'points': stat.assists, # Initialize points with assists
|
186
|
-
'goals_per_game': 0.0,
|
187
|
-
'points_per_game': 0.0,
|
188
|
-
'assists_per_game': 0.0,
|
189
|
-
'penalties_per_game': 0.0,
|
190
|
-
'game_ids': [],
|
191
|
-
'first_game_id': None,
|
192
|
-
'last_game_id': None
|
193
|
-
}
|
194
|
-
else:
|
160
|
+
if key in stats_dict:
|
195
161
|
stats_dict[key]['assists'] += stat.assists
|
196
162
|
stats_dict[key]['points'] += stat.assists # Update points
|
197
163
|
|
198
164
|
for stat in penalties_stats:
|
199
|
-
if stat.human_id in human_ids_to_filter:
|
200
|
-
continue
|
201
165
|
key = (aggregation_id, stat.human_id)
|
202
|
-
if key
|
203
|
-
stats_dict[key] = {
|
204
|
-
'games_played': 0,
|
205
|
-
'goals': 0,
|
206
|
-
'assists': 0,
|
207
|
-
'penalties': stat.penalties,
|
208
|
-
'points': 0, # Initialize points
|
209
|
-
'goals_per_game': 0.0,
|
210
|
-
'points_per_game': 0.0,
|
211
|
-
'assists_per_game': 0.0,
|
212
|
-
'penalties_per_game': 0.0,
|
213
|
-
'game_ids': [],
|
214
|
-
'first_game_id': None,
|
215
|
-
'last_game_id': None
|
216
|
-
}
|
217
|
-
else:
|
166
|
+
if key in stats_dict:
|
218
167
|
stats_dict[key]['penalties'] += stat.penalties
|
219
168
|
|
220
169
|
# Calculate per game stats
|
@@ -237,20 +186,10 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
237
186
|
stat['first_game_id'] = first_game.id if first_game else None
|
238
187
|
stat['last_game_id'] = last_game.id if last_game else None
|
239
188
|
|
240
|
-
# Debug output for totals if filter_human_id is provided
|
241
|
-
if filter_human_id:
|
242
|
-
for key, stat in stats_dict.items():
|
243
|
-
if key[1] == filter_human_id:
|
244
|
-
print(f"Human ID: {filter_human_id}")
|
245
|
-
print(f"Total Games Played: {stat['games_played']}")
|
246
|
-
print(f"Total Goals: {stat['goals']}")
|
247
|
-
print(f"Total Assists: {stat['assists']}")
|
248
|
-
print(f"Total Penalties: {stat['penalties']}")
|
249
|
-
|
250
189
|
# Calculate total_in_rank
|
251
190
|
total_in_rank = len(stats_dict)
|
252
191
|
|
253
|
-
# Assign ranks
|
192
|
+
# Assign ranks within each level
|
254
193
|
def assign_ranks(stats_dict, field):
|
255
194
|
sorted_stats = sorted(stats_dict.items(), key=lambda x: x[1][field], reverse=True)
|
256
195
|
for rank, (key, stat) in enumerate(sorted_stats, start=1):
|
@@ -266,13 +205,22 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
266
205
|
assign_ranks(stats_dict, 'assists_per_game')
|
267
206
|
assign_ranks(stats_dict, 'penalties_per_game')
|
268
207
|
|
208
|
+
# Debug output for specific human
|
209
|
+
if debug_human_id:
|
210
|
+
if any(key[1] == debug_human_id for key in stats_dict):
|
211
|
+
human = session.query(Human).filter(Human.id == debug_human_id).first()
|
212
|
+
human_name = f"{human.first_name} {human.last_name}" if human else "Unknown"
|
213
|
+
print(f"For Human {debug_human_id} ({human_name}) for {aggregation_type} {aggregation_id} ({aggregation_name}) , total_in_rank {total_in_rank} and window {aggregation_window}:")
|
214
|
+
for key, stat in stats_dict.items():
|
215
|
+
if key[1] == debug_human_id:
|
216
|
+
for k, v in stat.items():
|
217
|
+
print(f"{k}: {v}")
|
218
|
+
|
269
219
|
# Insert aggregated stats into the appropriate table with progress output
|
270
220
|
total_items = len(stats_dict)
|
271
221
|
batch_size = 1000
|
272
222
|
for i, (key, stat) in enumerate(stats_dict.items(), 1):
|
273
223
|
aggregation_id, human_id = key
|
274
|
-
if stat['games_played'] < min_games:
|
275
|
-
continue
|
276
224
|
goals_per_game = stat['goals'] / stat['games_played'] if stat['games_played'] > 0 else 0.0
|
277
225
|
points_per_game = (stat['goals'] + stat['assists']) / stat['games_played'] if stat['games_played'] > 0 else 0.0
|
278
226
|
assists_per_game = stat['assists'] / stat['games_played'] if stat['games_played'] > 0 else 0.0
|
@@ -306,33 +254,37 @@ def aggregate_skater_stats(session, aggregation_type, aggregation_id, names_to_f
|
|
306
254
|
# Commit in batches
|
307
255
|
if i % batch_size == 0:
|
308
256
|
session.commit()
|
309
|
-
|
257
|
+
if debug_human_id is None:
|
258
|
+
print(f"\r{i}/{total_items} ({(i/total_items)*100:.2f}%)", end="")
|
259
|
+
|
310
260
|
session.commit()
|
311
|
-
|
312
|
-
|
261
|
+
if debug_human_id is None:
|
262
|
+
print(f"\r{total_items}/{total_items} (100.00%)")
|
313
263
|
|
314
264
|
if __name__ == "__main__":
|
315
265
|
session = create_session("boss")
|
266
|
+
human_id_to_debug = 117076
|
316
267
|
|
317
268
|
# Get all org_id present in the Organization table
|
318
269
|
org_ids = session.query(Organization.id).all()
|
319
270
|
org_ids = [org_id[0] for org_id in org_ids]
|
320
271
|
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
272
|
+
for org_id in org_ids:
|
273
|
+
division_ids = get_all_division_ids_for_org(session, org_id)
|
274
|
+
print(f"Aggregating skater stats for {len(division_ids)} divisions in org_id {org_id}...")
|
275
|
+
total_divisions = len(division_ids)
|
276
|
+
processed_divisions = 0
|
277
|
+
for division_id in division_ids:
|
278
|
+
aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug)
|
279
|
+
aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Weekly')
|
280
|
+
aggregate_skater_stats(session, aggregation_type='division', aggregation_id=division_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Daily')
|
281
|
+
processed_divisions += 1
|
282
|
+
if human_id_to_debug is None:
|
283
|
+
print(f"\rProcessed {processed_divisions}/{total_divisions} divisions ({(processed_divisions/total_divisions)*100:.2f}%)", end="")
|
332
284
|
|
333
|
-
|
334
|
-
|
335
|
-
|
285
|
+
aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug)
|
286
|
+
aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Weekly')
|
287
|
+
aggregate_skater_stats(session, aggregation_type='org', aggregation_id=org_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug, aggregation_window='Daily')
|
336
288
|
|
337
289
|
# Aggregate by level
|
338
290
|
level_ids = session.query(Division.level_id).distinct().all()
|
@@ -342,7 +294,7 @@ if __name__ == "__main__":
|
|
342
294
|
for level_id in level_ids:
|
343
295
|
if level_id is None:
|
344
296
|
continue
|
345
|
-
|
297
|
+
if human_id_to_debug is None:
|
298
|
+
print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
|
346
299
|
processed_levels += 1
|
347
|
-
aggregate_skater_stats(session, aggregation_type='level', aggregation_id=level_id, names_to_filter_out=not_human_names,
|
348
|
-
print("\nDone.")
|
300
|
+
aggregate_skater_stats(session, aggregation_type='level', aggregation_id=level_id, names_to_filter_out=not_human_names, debug_human_id=human_id_to_debug)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
import sys, os
|
2
|
+
|
3
|
+
# Add the package directory to the Python path
|
4
|
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
5
|
+
|
6
|
+
from hockey_blast_common_lib.models import Human, Level
|
7
|
+
from hockey_blast_common_lib.stats_models import LevelStatsSkater
|
8
|
+
from hockey_blast_common_lib.db_connection import create_session
|
9
|
+
from sqlalchemy.sql import func
|
10
|
+
|
11
|
+
def calculate_skater_skill_value(session, human_id, level_stats):
|
12
|
+
max_skill_value = 0
|
13
|
+
|
14
|
+
for stat in level_stats:
|
15
|
+
level = session.query(Level).filter(Level.id == stat.level_id).first()
|
16
|
+
if not level or level.skill_value < 0:
|
17
|
+
continue
|
18
|
+
level_skill_value = level.skill_value
|
19
|
+
ppg_ratio = stat.points_per_game_rank / stat.total_in_rank
|
20
|
+
games_played_ratio = stat.games_played_rank / stat.total_in_rank
|
21
|
+
|
22
|
+
# Take the maximum of the two ratios
|
23
|
+
skill_value = level_skill_value * max(ppg_ratio, games_played_ratio)
|
24
|
+
max_skill_value = max(max_skill_value, skill_value)
|
25
|
+
|
26
|
+
return max_skill_value
|
27
|
+
|
28
|
+
def assign_skater_skill_values():
|
29
|
+
session = create_session("boss")
|
30
|
+
|
31
|
+
humans = session.query(Human).all()
|
32
|
+
total_humans = len(humans)
|
33
|
+
processed_humans = 0
|
34
|
+
|
35
|
+
for human in humans:
|
36
|
+
level_stats = session.query(LevelStatsSkater).filter(LevelStatsSkater.human_id == human.id).all()
|
37
|
+
if level_stats:
|
38
|
+
skater_skill_value = calculate_skater_skill_value(session, human.id, level_stats)
|
39
|
+
human.skater_skill_value = skater_skill_value
|
40
|
+
session.commit()
|
41
|
+
|
42
|
+
processed_humans += 1
|
43
|
+
print(f"\rProcessed {processed_humans}/{total_humans} humans ({(processed_humans/total_humans)*100:.2f}%)", end="")
|
44
|
+
|
45
|
+
print("\nSkater skill values have been assigned to all humans.")
|
46
|
+
|
47
|
+
if __name__ == "__main__":
|
48
|
+
assign_skater_skill_values()
|
Binary file
|
{hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/models.py
RENAMED
@@ -106,6 +106,7 @@ class Human(db.Model):
|
|
106
106
|
last_name = db.Column(db.String(100))
|
107
107
|
first_date = db.Column(db.Date)
|
108
108
|
last_date = db.Column(db.Date)
|
109
|
+
skater_skill_value = db.Column(db.Float, nullable=True)
|
109
110
|
__table_args__ = (
|
110
111
|
db.UniqueConstraint('first_name', 'middle_name', 'last_name', name='_human_name_uc'),
|
111
112
|
)
|
@@ -47,55 +47,6 @@ def analyze_levels(org):
|
|
47
47
|
|
48
48
|
session.close()
|
49
49
|
|
50
|
-
def reset_skill_values_in_divisions():
|
51
|
-
session = create_session("boss")
|
52
|
-
|
53
|
-
# Fetch all records from the Division table
|
54
|
-
divisions = session.query(Division).all()
|
55
|
-
|
56
|
-
for division in divisions:
|
57
|
-
# Look up the Skill table using the level from Division
|
58
|
-
div_level = division.level
|
59
|
-
# Query to find the matching Skill
|
60
|
-
level = session.query(Level).filter(Level.org_id == division.org_id, Level.level_name == div_level).one_or_none()
|
61
|
-
|
62
|
-
if not level:
|
63
|
-
# If no match found, check each alternative name individually
|
64
|
-
skills = session.query(Level).filter(Level.org_id == division.org_id).all()
|
65
|
-
for s in skills:
|
66
|
-
alternative_names = s.level_alternative_name.split(',')
|
67
|
-
if div_level in alternative_names:
|
68
|
-
level = s
|
69
|
-
break
|
70
|
-
|
71
|
-
if level:
|
72
|
-
# Assign the skill_value and set skill_propagation_sequence to 0
|
73
|
-
division.level_id = level.id
|
74
|
-
if level.is_seed:
|
75
|
-
level.skill_propagation_sequence = 0
|
76
|
-
else:
|
77
|
-
level.skill_propagation_sequence = -1
|
78
|
-
level.skill_value = -1
|
79
|
-
else:
|
80
|
-
# Add new Skill with values previously used for division
|
81
|
-
new_level = Level(
|
82
|
-
org_id=division.org_id,
|
83
|
-
skill_value=-1,
|
84
|
-
level_name=division.level,
|
85
|
-
level_alternative_name='',
|
86
|
-
is_seed=False,
|
87
|
-
skill_propagation_sequence=-1
|
88
|
-
)
|
89
|
-
session.add(new_level)
|
90
|
-
session.commit()
|
91
|
-
division.skill_id = new_level.id
|
92
|
-
print(f"Created new Level for Division {division.level}")
|
93
|
-
|
94
|
-
# Commit the changes to the Division
|
95
|
-
session.commit()
|
96
|
-
|
97
|
-
print("Level values and propagation sequences have been populated into the Division table.")
|
98
|
-
|
99
50
|
def fill_seed_skills():
|
100
51
|
session = create_session("boss")
|
101
52
|
|
@@ -189,6 +140,5 @@ def populate_league_ids():
|
|
189
140
|
if __name__ == "__main__":
|
190
141
|
# delete_all_skills()
|
191
142
|
#fill_seed_skills()
|
192
|
-
reset_skill_values_in_divisions()
|
193
143
|
#populate_season_ids() # Call the function to populate season_ids
|
194
144
|
#populate_league_ids() # Call the new function to populate league_ids
|
@@ -0,0 +1,368 @@
|
|
1
|
+
import sys
|
2
|
+
import os
|
3
|
+
from collections import defaultdict
|
4
|
+
import numpy as np
|
5
|
+
|
6
|
+
# Add the project root directory to the Python path
|
7
|
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
8
|
+
|
9
|
+
from hockey_blast_common_lib.models import Level, Division
|
10
|
+
from hockey_blast_common_lib.stats_models import LevelsGraphEdge, LevelStatsSkater, SkillValuePPGRatio
|
11
|
+
from hockey_blast_common_lib.db_connection import create_session
|
12
|
+
from sqlalchemy import func
|
13
|
+
|
14
|
+
import numpy as np
|
15
|
+
|
16
|
+
class Config:
|
17
|
+
MIN_GAMES_PLAYED_FOR_EDGE = 10
|
18
|
+
MIN_PPG_FOR_EDGE = 0.5
|
19
|
+
MIN_HUMANS_FOR_EDGE = 2
|
20
|
+
MAX_PROPAGATION_SEQUENCE = 4
|
21
|
+
MIN_CONNECTIONS_FOR_CORRELATION = 20
|
22
|
+
MIN_CONNECTIONS_FOR_PROPAGATION = 5
|
23
|
+
MAX_SKILL_DIFF_IN_EDGE = 30
|
24
|
+
|
25
|
+
@staticmethod
|
26
|
+
def discard_outliers(data, m=2):
|
27
|
+
"""
|
28
|
+
Discard outliers from the data using the modified Z-score method.
|
29
|
+
:param data: List of data points
|
30
|
+
:param m: Threshold for the modified Z-score
|
31
|
+
:return: List of data points with outliers removed
|
32
|
+
"""
|
33
|
+
if len(data) == 0:
|
34
|
+
return data
|
35
|
+
median = np.median(data)
|
36
|
+
diff = np.abs(data - median)
|
37
|
+
med_abs_deviation = np.median(diff)
|
38
|
+
if med_abs_deviation == 0:
|
39
|
+
return data
|
40
|
+
modified_z_score = 0.6745 * diff / med_abs_deviation
|
41
|
+
return data[modified_z_score < m]
|
42
|
+
|
43
|
+
def reset_skill_values_in_divisions():
|
44
|
+
session = create_session("boss")
|
45
|
+
|
46
|
+
# Fetch all records from the Division table
|
47
|
+
divisions = session.query(Division).all()
|
48
|
+
|
49
|
+
for division in divisions:
|
50
|
+
# Look up the Skill table using the level from Division
|
51
|
+
div_level = division.level
|
52
|
+
# Query to find the matching Skill
|
53
|
+
level = session.query(Level).filter(Level.org_id == division.org_id, Level.level_name == div_level).one_or_none()
|
54
|
+
|
55
|
+
if not level:
|
56
|
+
# If no match found, check each alternative name individually
|
57
|
+
skills = session.query(Level).filter(Level.org_id == division.org_id).all()
|
58
|
+
for s in skills:
|
59
|
+
alternative_names = s.level_alternative_name.split(',')
|
60
|
+
if div_level in alternative_names:
|
61
|
+
level = s
|
62
|
+
break
|
63
|
+
|
64
|
+
if level:
|
65
|
+
# Assign the skill_value and set skill_propagation_sequence to 0
|
66
|
+
division.level_id = level.id
|
67
|
+
if level.is_seed:
|
68
|
+
level.skill_propagation_sequence = 0
|
69
|
+
else:
|
70
|
+
level.skill_propagation_sequence = -1
|
71
|
+
level.skill_value = -1
|
72
|
+
else:
|
73
|
+
# Add new Skill with values previously used for division
|
74
|
+
new_level = Level(
|
75
|
+
org_id=division.org_id,
|
76
|
+
skill_value=-1,
|
77
|
+
level_name=division.level,
|
78
|
+
level_alternative_name='',
|
79
|
+
is_seed=False,
|
80
|
+
skill_propagation_sequence=-1
|
81
|
+
)
|
82
|
+
session.add(new_level)
|
83
|
+
session.commit()
|
84
|
+
division.skill_id = new_level.id
|
85
|
+
print(f"Created new Level for Division {division.level}")
|
86
|
+
|
87
|
+
# Commit the changes to the Division
|
88
|
+
session.commit()
|
89
|
+
|
90
|
+
print("Level values and propagation sequences have been populated into the Division table.")
|
91
|
+
|
92
|
+
def build_levels_graph_edges():
|
93
|
+
# Creates unique edges from levelA to levelB (there is no reverse edge levelB to levelA)
|
94
|
+
session = create_session("boss")
|
95
|
+
|
96
|
+
# Delete all existing edges
|
97
|
+
session.query(LevelsGraphEdge).delete()
|
98
|
+
session.commit()
|
99
|
+
|
100
|
+
# Query to get all level stats
|
101
|
+
level_stats = session.query(LevelStatsSkater).all()
|
102
|
+
|
103
|
+
# Dictionary to store stats by level and human
|
104
|
+
level_human_stats = defaultdict(lambda: defaultdict(dict))
|
105
|
+
|
106
|
+
for stat in level_stats:
|
107
|
+
if stat.games_played >= Config.MIN_GAMES_PLAYED_FOR_EDGE and stat.points_per_game >= Config.MIN_PPG_FOR_EDGE:
|
108
|
+
level_human_stats[stat.level_id][stat.human_id] = {
|
109
|
+
'games_played': stat.games_played,
|
110
|
+
'points_per_game': stat.points_per_game
|
111
|
+
}
|
112
|
+
|
113
|
+
# Dictionary to store edges
|
114
|
+
edges = {}
|
115
|
+
|
116
|
+
# Build edges
|
117
|
+
total_levels = len(level_human_stats)
|
118
|
+
processed_levels = 0
|
119
|
+
for from_level_id, from_humans in level_human_stats.items():
|
120
|
+
from_level = session.query(Level).filter_by(id=from_level_id).first()
|
121
|
+
for to_level_id, to_humans in level_human_stats.items():
|
122
|
+
to_level = session.query(Level).filter_by(id=to_level_id).first()
|
123
|
+
if from_level.id >= to_level.id:
|
124
|
+
continue
|
125
|
+
|
126
|
+
common_humans = set(from_humans.keys()) & set(to_humans.keys())
|
127
|
+
n_connections = len(common_humans)
|
128
|
+
n_games = 0
|
129
|
+
|
130
|
+
if n_connections < Config.MIN_HUMANS_FOR_EDGE:
|
131
|
+
continue
|
132
|
+
|
133
|
+
ppg_ratios = []
|
134
|
+
# if from_level.id == 223 and to_level.id == 219: #216
|
135
|
+
# print(f"Debug: From Level ID: {from_level.id}, To Level ID: {to_level.id}")
|
136
|
+
for human_id in common_humans:
|
137
|
+
from_ppg = from_humans[human_id]['points_per_game']
|
138
|
+
to_ppg = to_humans[human_id]['points_per_game']
|
139
|
+
from_games = from_humans[human_id]['games_played']
|
140
|
+
to_games = to_humans[human_id]['games_played']
|
141
|
+
min_games = min(from_games, to_games)
|
142
|
+
n_games += min_games
|
143
|
+
|
144
|
+
# if from_level.id == 223 and to_level.id == 219: #216
|
145
|
+
# print(f"Human {human_id} From PPG: {from_ppg}, To PPG: {to_ppg}, Min Games: {min_games} n_games: {n_games}")
|
146
|
+
|
147
|
+
if from_ppg > 0 and to_ppg > 0:
|
148
|
+
ppg_ratios.append(to_ppg / from_ppg)
|
149
|
+
|
150
|
+
if not ppg_ratios:
|
151
|
+
continue
|
152
|
+
|
153
|
+
# Discard outliers
|
154
|
+
ppg_ratios = Config.discard_outliers(np.array(ppg_ratios))
|
155
|
+
|
156
|
+
if len(ppg_ratios) == 0:
|
157
|
+
continue
|
158
|
+
|
159
|
+
avg_ppg_ratio = float(sum(ppg_ratios) / len(ppg_ratios))
|
160
|
+
|
161
|
+
# if sorted([from_level.id, to_level.id]) == [219, 223]:
|
162
|
+
# print(f"From {from_level_id} to {to_level_id} n_connections {n_connections} n_games: {n_games}")
|
163
|
+
|
164
|
+
edge = LevelsGraphEdge(
|
165
|
+
from_level_id=from_level_id,
|
166
|
+
to_level_id=to_level_id,
|
167
|
+
n_connections=n_connections,
|
168
|
+
ppg_ratio=avg_ppg_ratio,
|
169
|
+
n_games=n_games # Store the number of games
|
170
|
+
)
|
171
|
+
edges[(from_level_id, to_level_id)] = edge
|
172
|
+
|
173
|
+
processed_levels += 1
|
174
|
+
print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
|
175
|
+
|
176
|
+
# Insert edges into the database
|
177
|
+
for edge in edges.values():
|
178
|
+
session.add(edge)
|
179
|
+
session.commit()
|
180
|
+
|
181
|
+
print("\nLevels graph edges have been populated into the database.")
|
182
|
+
|
183
|
+
def propagate_skill_levels(propagation_sequence):
|
184
|
+
min_skill_value = float('inf')
|
185
|
+
max_skill_value = float('-inf')
|
186
|
+
|
187
|
+
session = create_session("boss")
|
188
|
+
|
189
|
+
if propagation_sequence == 0:
|
190
|
+
# Delete all existing correlation data
|
191
|
+
session.query(SkillValuePPGRatio).delete()
|
192
|
+
session.commit()
|
193
|
+
|
194
|
+
# Build and save the correlation data
|
195
|
+
levels = session.query(Level).filter(Level.skill_propagation_sequence == 0).all()
|
196
|
+
level_ids = {level.id for level in levels}
|
197
|
+
correlation_data = defaultdict(list)
|
198
|
+
|
199
|
+
for level in levels:
|
200
|
+
if level.skill_value == -1:
|
201
|
+
continue
|
202
|
+
|
203
|
+
edges = session.query(LevelsGraphEdge).filter(
|
204
|
+
(LevelsGraphEdge.from_level_id == level.id) |
|
205
|
+
(LevelsGraphEdge.to_level_id == level.id)
|
206
|
+
).all()
|
207
|
+
|
208
|
+
for edge in edges:
|
209
|
+
if edge.n_connections < Config.MIN_CONNECTIONS_FOR_CORRELATION:
|
210
|
+
continue
|
211
|
+
|
212
|
+
if edge.from_level_id == level.id:
|
213
|
+
target_level_id = edge.to_level_id
|
214
|
+
ppg_ratio_edge = edge.ppg_ratio
|
215
|
+
else:
|
216
|
+
# We go over same edge twice in this logic, let's skip the reverse edge
|
217
|
+
continue
|
218
|
+
|
219
|
+
if target_level_id not in level_ids:
|
220
|
+
continue
|
221
|
+
|
222
|
+
target_level = session.query(Level).filter_by(id=target_level_id).first()
|
223
|
+
if target_level:
|
224
|
+
skill_value_from = level.skill_value
|
225
|
+
skill_value_to = target_level.skill_value
|
226
|
+
|
227
|
+
# Same skill value - no correlation
|
228
|
+
if skill_value_from == skill_value_to:
|
229
|
+
continue
|
230
|
+
|
231
|
+
|
232
|
+
# Since we go over all levels in the sequence 0, we will see each edge twice
|
233
|
+
# This condition eliminates duplicates
|
234
|
+
if abs(skill_value_from - skill_value_to) > Config.MAX_SKILL_DIFF_IN_EDGE:
|
235
|
+
continue
|
236
|
+
|
237
|
+
# Debug prints
|
238
|
+
# print(f"From Skill {level.skill_value} to {target_level.skill_value} ratio: {ppg_ratio}")
|
239
|
+
|
240
|
+
# Ensure INCREASING SKILL VALUES for the correlation data!
|
241
|
+
if skill_value_from > skill_value_to:
|
242
|
+
skill_value_from, skill_value_to = skill_value_to, skill_value_from
|
243
|
+
ppg_ratio_edge = 1 / ppg_ratio_edge
|
244
|
+
|
245
|
+
correlation_data[(skill_value_from, skill_value_to)].append(
|
246
|
+
(ppg_ratio_edge, edge.n_games)
|
247
|
+
)
|
248
|
+
|
249
|
+
# Save correlation data to the database
|
250
|
+
for (skill_value_from, skill_value_to), ppg_ratios in correlation_data.items():
|
251
|
+
ppg_ratios = [(ppg_ratio, n_games) for ppg_ratio, n_games in ppg_ratios]
|
252
|
+
ppg_ratios_array = np.array(ppg_ratios, dtype=[('ppg_ratio', float), ('n_games', int)])
|
253
|
+
ppg_ratios_filtered = Config.discard_outliers(ppg_ratios_array['ppg_ratio'])
|
254
|
+
if len(ppg_ratios_filtered) > 0:
|
255
|
+
avg_ppg_ratio = float(sum(ppg_ratio * n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered) / sum(n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered))
|
256
|
+
total_n_games = sum(n_games for ppg_ratio, n_games in ppg_ratios if ppg_ratio in ppg_ratios_filtered)
|
257
|
+
correlation = SkillValuePPGRatio(
|
258
|
+
from_skill_value=skill_value_from,
|
259
|
+
to_skill_value=skill_value_to,
|
260
|
+
ppg_ratio=avg_ppg_ratio,
|
261
|
+
n_games=total_n_games # Store the sum of games
|
262
|
+
)
|
263
|
+
session.add(correlation)
|
264
|
+
session.commit()
|
265
|
+
# Update min and max skill values
|
266
|
+
min_skill_value = min(min_skill_value, skill_value_from, skill_value_to)
|
267
|
+
max_skill_value = max(max_skill_value, skill_value_from, skill_value_to)
|
268
|
+
|
269
|
+
# Propagate skill levels
|
270
|
+
levels = session.query(Level).filter(Level.skill_propagation_sequence == propagation_sequence).all()
|
271
|
+
suggested_skill_values = defaultdict(list)
|
272
|
+
|
273
|
+
for level in levels:
|
274
|
+
edges = session.query(LevelsGraphEdge).filter(
|
275
|
+
(LevelsGraphEdge.from_level_id == level.id) |
|
276
|
+
(LevelsGraphEdge.to_level_id == level.id)
|
277
|
+
).all()
|
278
|
+
|
279
|
+
for edge in edges:
|
280
|
+
if edge.n_connections < Config.MIN_CONNECTIONS_FOR_PROPAGATION:
|
281
|
+
continue
|
282
|
+
|
283
|
+
if edge.from_level_id == level.id:
|
284
|
+
target_level_id = edge.to_level_id
|
285
|
+
ppg_ratio_edge = edge.ppg_ratio
|
286
|
+
else:
|
287
|
+
target_level_id = edge.from_level_id
|
288
|
+
ppg_ratio_edge = 1 / edge.ppg_ratio
|
289
|
+
|
290
|
+
target_level = session.query(Level).filter_by(id=target_level_id).first()
|
291
|
+
if target_level and target_level.skill_propagation_sequence == -1:
|
292
|
+
correlations = session.query(SkillValuePPGRatio).filter(
|
293
|
+
(SkillValuePPGRatio.from_skill_value <= level.skill_value) &
|
294
|
+
(SkillValuePPGRatio.to_skill_value >= level.skill_value)
|
295
|
+
).all()
|
296
|
+
|
297
|
+
if correlations:
|
298
|
+
weighted_skill_values = []
|
299
|
+
for correlation in correlations:
|
300
|
+
# Skill value always increases in the correlation data
|
301
|
+
# Let's avoid extrapolating from the end of the edge and away from the edge!
|
302
|
+
|
303
|
+
# Check left side of the edge
|
304
|
+
if (level.skill_value == correlation.from_skill_value and level.skill_value > min_skill_value):
|
305
|
+
if ppg_ratio_edge < 1:
|
306
|
+
continue
|
307
|
+
# Check right side of the edge
|
308
|
+
if (level.skill_value == correlation.to_skill_value and level.skill_value < max_skill_value):
|
309
|
+
if ppg_ratio_edge > 1:
|
310
|
+
continue
|
311
|
+
|
312
|
+
|
313
|
+
# First confirm which way are we going here
|
314
|
+
if (ppg_ratio_edge < 1 and correlation.ppg_ratio > 1) or (ppg_ratio_edge > 1 and correlation.ppg_ratio < 1):
|
315
|
+
# Reverse the correlation
|
316
|
+
from_skill_value=correlation.to_skill_value
|
317
|
+
to_skill_value=correlation.from_skill_value
|
318
|
+
ppg_ratio_range = 1 / correlation.ppg_ratio
|
319
|
+
else:
|
320
|
+
from_skill_value=correlation.from_skill_value
|
321
|
+
to_skill_value=correlation.to_skill_value
|
322
|
+
ppg_ratio_range = correlation.ppg_ratio
|
323
|
+
|
324
|
+
# Now both ratios are either < 1 or > 1
|
325
|
+
if ppg_ratio_edge < 1:
|
326
|
+
ppg_ratio_for_extrapolation = 1 / ppg_ratio_edge
|
327
|
+
ppg_ratio_range = 1 / ppg_ratio_range
|
328
|
+
else:
|
329
|
+
ppg_ratio_for_extrapolation = ppg_ratio_edge
|
330
|
+
|
331
|
+
# Interpolate or extrapolate skill value
|
332
|
+
skill_value_range = to_skill_value - from_skill_value
|
333
|
+
skill_value_diff = (ppg_ratio_for_extrapolation / ppg_ratio_range) * skill_value_range
|
334
|
+
new_skill_value = level.skill_value + skill_value_diff
|
335
|
+
weighted_skill_values.append((new_skill_value, correlation.n_games))
|
336
|
+
# if target_level.id == 229:
|
337
|
+
# print(f"Debug: From Level ID: {level.id}, To Level ID: {target_level.id}")
|
338
|
+
# print(f"Debug: From Skill Value: {level.skill_value} PPG Ratio: {ppg_ratio_for_extrapolation}, PPG Ratio Range: {ppg_ratio_range}")
|
339
|
+
# print(f"Debug: Skill Value Range: {skill_value_range}, Skill Value Diff: {skill_value_diff}")
|
340
|
+
# print(f"Debug: New Skill Value: {new_skill_value}")
|
341
|
+
|
342
|
+
# Calculate weighted average of new skill values
|
343
|
+
total_n_games = sum(n_games for _, n_games in weighted_skill_values)
|
344
|
+
weighted_avg_skill_value = sum(skill_value * n_games for skill_value, n_games in weighted_skill_values) / total_n_games
|
345
|
+
suggested_skill_values[target_level_id].append(weighted_avg_skill_value)
|
346
|
+
|
347
|
+
# Update skill values for target levels
|
348
|
+
for target_level_id, skill_values in suggested_skill_values.items():
|
349
|
+
skill_values = Config.discard_outliers(np.array(skill_values))
|
350
|
+
if len(skill_values) > 0:
|
351
|
+
avg_skill_value = float(sum(skill_values) / len(skill_values))
|
352
|
+
avg_skill_value = max(avg_skill_value, 9.6)
|
353
|
+
if avg_skill_value < min_skill_value:
|
354
|
+
avg_skill_value = min_skill_value - 0.01
|
355
|
+
session.query(Level).filter_by(id=target_level_id).update({
|
356
|
+
'skill_value': avg_skill_value,
|
357
|
+
'skill_propagation_sequence': propagation_sequence + 1
|
358
|
+
})
|
359
|
+
session.commit()
|
360
|
+
|
361
|
+
print(f"Skill levels have been propagated for sequence {propagation_sequence}.")
|
362
|
+
|
363
|
+
if __name__ == "__main__":
|
364
|
+
reset_skill_values_in_divisions()
|
365
|
+
build_levels_graph_edges()
|
366
|
+
|
367
|
+
for sequence in range(Config.MAX_PROPAGATION_SEQUENCE + 1):
|
368
|
+
propagate_skill_levels(sequence)
|
@@ -583,6 +583,7 @@ class LevelsGraphEdge(db.Model):
|
|
583
583
|
to_level_id = db.Column(db.Integer, db.ForeignKey('levels.id'), nullable=False)
|
584
584
|
n_connections = db.Column(db.Integer, nullable=False)
|
585
585
|
ppg_ratio = db.Column(db.Float, nullable=False)
|
586
|
+
n_games = db.Column(db.Integer, nullable=False) # New field to store the number of games
|
586
587
|
|
587
588
|
__table_args__ = (
|
588
589
|
db.UniqueConstraint('from_level_id', 'to_level_id', name='_from_to_level_uc'),
|
@@ -598,3 +599,16 @@ class SkillPropagationCorrelation(db.Model):
|
|
598
599
|
__table_args__ = (
|
599
600
|
db.UniqueConstraint('skill_value_from', 'skill_value_to', 'ppg_ratio', name='_skill_value_ppg_ratio_uc'),
|
600
601
|
)
|
602
|
+
|
603
|
+
# How PPG changes with INCREASING SKILL VALUES
|
604
|
+
class SkillValuePPGRatio(db.Model):
|
605
|
+
__tablename__ = 'skill_value_ppg_ratios'
|
606
|
+
id = db.Column(db.Integer, primary_key=True)
|
607
|
+
from_skill_value = db.Column(db.Float, nullable=False)
|
608
|
+
to_skill_value = db.Column(db.Float, nullable=False)
|
609
|
+
ppg_ratio = db.Column(db.Float, nullable=False)
|
610
|
+
n_games = db.Column(db.Integer, nullable=False) # New field to store the sum of games
|
611
|
+
|
612
|
+
__table_args__ = (
|
613
|
+
db.UniqueConstraint('from_skill_value', 'to_skill_value', name='_from_to_skill_value_uc'),
|
614
|
+
)
|
@@ -6,6 +6,7 @@ hockey_blast_common_lib/aggregate_goalie_stats.py
|
|
6
6
|
hockey_blast_common_lib/aggregate_human_stats.py
|
7
7
|
hockey_blast_common_lib/aggregate_referee_stats.py
|
8
8
|
hockey_blast_common_lib/aggregate_skater_stats.py
|
9
|
+
hockey_blast_common_lib/assign_skater_skill.py
|
9
10
|
hockey_blast_common_lib/db_connection.py
|
10
11
|
hockey_blast_common_lib/dump_sample_db.sh
|
11
12
|
hockey_blast_common_lib/hockey_blast_sample_backup.sql.gz
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name='hockey-blast-common-lib', # The name of your package
|
5
|
-
version='0.1.
|
5
|
+
version='0.1.32',
|
6
6
|
description='Common library for shared functionality and DB models',
|
7
7
|
author='Pavel Kletskov',
|
8
8
|
author_email='kletskov@gmail.com',
|
@@ -1,251 +0,0 @@
|
|
1
|
-
import sys
|
2
|
-
import os
|
3
|
-
from collections import defaultdict
|
4
|
-
import numpy as np
|
5
|
-
|
6
|
-
# Add the project root directory to the Python path
|
7
|
-
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
8
|
-
|
9
|
-
from hockey_blast_common_lib.models import Level, Season
|
10
|
-
from hockey_blast_common_lib.stats_models import LevelsGraphEdge, LevelStatsSkater, SkillPropagationCorrelation
|
11
|
-
from hockey_blast_common_lib.db_connection import create_session
|
12
|
-
from sqlalchemy import func
|
13
|
-
|
14
|
-
import numpy as np
|
15
|
-
|
16
|
-
class Config:
|
17
|
-
MIN_GAMES_PLAYED = 10
|
18
|
-
MIN_PPG = 0.3
|
19
|
-
MIN_HUMANS_FOR_EDGE = 5
|
20
|
-
MAX_START_DATE_DIFF_MONTHS = 15
|
21
|
-
MAX_PROPAGATION_SEQUENCE = 0
|
22
|
-
MIN_CONNECTIONS_FOR_CORRELATION = 40
|
23
|
-
MIN_CONNECTIONS_FOR_PROPAGATION = 3
|
24
|
-
|
25
|
-
@staticmethod
|
26
|
-
def discard_outliers(data, m=2):
|
27
|
-
"""
|
28
|
-
Discard outliers from the data using the modified Z-score method.
|
29
|
-
:param data: List of data points
|
30
|
-
:param m: Threshold for the modified Z-score
|
31
|
-
:return: List of data points with outliers removed
|
32
|
-
"""
|
33
|
-
if len(data) == 0:
|
34
|
-
return data
|
35
|
-
median = np.median(data)
|
36
|
-
diff = np.abs(data - median)
|
37
|
-
med_abs_deviation = np.median(diff)
|
38
|
-
if med_abs_deviation == 0:
|
39
|
-
return data
|
40
|
-
modified_z_score = 0.6745 * diff / med_abs_deviation
|
41
|
-
return data[modified_z_score < m]
|
42
|
-
|
43
|
-
|
44
|
-
def build_levels_graph_edges():
|
45
|
-
session = create_session("boss")
|
46
|
-
|
47
|
-
# Delete all existing edges
|
48
|
-
session.query(LevelsGraphEdge).delete()
|
49
|
-
session.commit()
|
50
|
-
|
51
|
-
# Query to get all level stats
|
52
|
-
level_stats = session.query(LevelStatsSkater).all()
|
53
|
-
|
54
|
-
# Dictionary to store stats by level and human
|
55
|
-
level_human_stats = defaultdict(lambda: defaultdict(dict))
|
56
|
-
|
57
|
-
for stat in level_stats:
|
58
|
-
if stat.games_played >= Config.MIN_GAMES_PLAYED and stat.points_per_game >= Config.MIN_PPG:
|
59
|
-
level_human_stats[stat.aggregation_id][stat.human_id] = {
|
60
|
-
'games_played': stat.games_played,
|
61
|
-
'points_per_game': stat.points_per_game
|
62
|
-
}
|
63
|
-
|
64
|
-
# Dictionary to store edges
|
65
|
-
edges = {}
|
66
|
-
|
67
|
-
# Build edges
|
68
|
-
total_levels = len(level_human_stats)
|
69
|
-
processed_levels = 0
|
70
|
-
for from_level_id, from_humans in level_human_stats.items():
|
71
|
-
from_level = session.query(Level).filter_by(id=from_level_id).first()
|
72
|
-
from_season = session.query(Season).filter_by(id=from_level.season_id).first()
|
73
|
-
for to_level_id, to_humans in level_human_stats.items():
|
74
|
-
to_level = session.query(Level).filter_by(id=to_level_id).first()
|
75
|
-
to_season = session.query(Season).filter_by(id=to_level.season_id).first()
|
76
|
-
|
77
|
-
if from_level.skill_value >= to_level.skill_value:
|
78
|
-
continue
|
79
|
-
|
80
|
-
# TMP DEBUG HACK
|
81
|
-
if from_level.skill_value != 10 and to_level.skill_value != 30:
|
82
|
-
continue
|
83
|
-
|
84
|
-
# Check if the start dates are within the allowed difference
|
85
|
-
if abs((from_season.start_date - to_season.start_date).days) > Config.MAX_START_DATE_DIFF_MONTHS * 30:
|
86
|
-
continue
|
87
|
-
|
88
|
-
common_humans = set(from_humans.keys()) & set(to_humans.keys())
|
89
|
-
n_connections = len(common_humans)
|
90
|
-
|
91
|
-
if n_connections < Config.MIN_HUMANS_FOR_EDGE:
|
92
|
-
continue
|
93
|
-
|
94
|
-
ppg_ratios = []
|
95
|
-
for human_id in common_humans:
|
96
|
-
from_ppg = from_humans[human_id]['points_per_game']
|
97
|
-
to_ppg = to_humans[human_id]['points_per_game']
|
98
|
-
if from_level.skill_value == 10 and to_level.skill_value == 30:
|
99
|
-
print(f"Human {human_id} From PPG: {from_ppg}, To PPG: {to_ppg}")
|
100
|
-
if from_ppg > 0 and to_ppg > 0:
|
101
|
-
ppg_ratios.append(to_ppg / from_ppg)
|
102
|
-
|
103
|
-
if not ppg_ratios:
|
104
|
-
continue
|
105
|
-
|
106
|
-
# Discard outliers
|
107
|
-
ppg_ratios = Config.discard_outliers(np.array(ppg_ratios))
|
108
|
-
|
109
|
-
if len(ppg_ratios) == 0:
|
110
|
-
continue
|
111
|
-
|
112
|
-
avg_ppg_ratio = float(sum(ppg_ratios) / len(ppg_ratios))
|
113
|
-
if avg_ppg_ratio < 1.0:
|
114
|
-
avg_ppg_ratio = 1 / avg_ppg_ratio
|
115
|
-
from_level_id, to_level_id = to_level_id, from_level_id
|
116
|
-
|
117
|
-
edge = LevelsGraphEdge(
|
118
|
-
from_level_id=from_level_id,
|
119
|
-
to_level_id=to_level_id,
|
120
|
-
n_connections=n_connections,
|
121
|
-
ppg_ratio=avg_ppg_ratio
|
122
|
-
)
|
123
|
-
edges[(from_level_id, to_level_id)] = edge
|
124
|
-
|
125
|
-
processed_levels += 1
|
126
|
-
print(f"\rProcessed {processed_levels}/{total_levels} levels ({(processed_levels/total_levels)*100:.2f}%)", end="")
|
127
|
-
|
128
|
-
# Insert edges into the database
|
129
|
-
for edge in edges.values():
|
130
|
-
session.add(edge)
|
131
|
-
session.commit()
|
132
|
-
|
133
|
-
print("\nLevels graph edges have been populated into the database.")
|
134
|
-
|
135
|
-
def propagate_skill_levels(propagation_sequence):
|
136
|
-
session = create_session("boss")
|
137
|
-
|
138
|
-
if propagation_sequence == 0:
|
139
|
-
# Delete all existing correlation data
|
140
|
-
session.query(SkillPropagationCorrelation).delete()
|
141
|
-
session.commit()
|
142
|
-
|
143
|
-
# Build and save the correlation data
|
144
|
-
levels = session.query(Level).filter(Level.skill_propagation_sequence == 0).all()
|
145
|
-
level_ids = {level.id for level in levels}
|
146
|
-
correlation_data = defaultdict(list)
|
147
|
-
|
148
|
-
for level in levels:
|
149
|
-
if level.skill_value == -1:
|
150
|
-
continue
|
151
|
-
|
152
|
-
edges = session.query(LevelsGraphEdge).filter(
|
153
|
-
(LevelsGraphEdge.from_level_id == level.id) |
|
154
|
-
(LevelsGraphEdge.to_level_id == level.id)
|
155
|
-
).all()
|
156
|
-
|
157
|
-
for edge in edges:
|
158
|
-
if edge.n_connections < Config.MIN_CONNECTIONS_FOR_CORRELATION:
|
159
|
-
continue
|
160
|
-
|
161
|
-
if edge.from_level_id == level.id:
|
162
|
-
target_level_id = edge.to_level_id
|
163
|
-
ppg_ratio = edge.ppg_ratio
|
164
|
-
else:
|
165
|
-
target_level_id = edge.from_level_id
|
166
|
-
ppg_ratio = 1 / edge.ppg_ratio
|
167
|
-
|
168
|
-
if target_level_id not in level_ids:
|
169
|
-
continue
|
170
|
-
|
171
|
-
target_level = session.query(Level).filter_by(id=target_level_id).first()
|
172
|
-
if target_level:
|
173
|
-
skill_value_from = level.skill_value
|
174
|
-
skill_value_to = target_level.skill_value
|
175
|
-
|
176
|
-
# Since we go over all levels in the sequence 0, we will see each edge twice
|
177
|
-
# This condition eliminates duplicates
|
178
|
-
if skill_value_from >= skill_value_to:
|
179
|
-
continue
|
180
|
-
|
181
|
-
# Debug prints
|
182
|
-
print(f"From Skill {level.skill_value} to {target_level.skill_value} ratio: {ppg_ratio}")
|
183
|
-
|
184
|
-
correlation_data[(skill_value_from, skill_value_to)].append(
|
185
|
-
ppg_ratio
|
186
|
-
)
|
187
|
-
|
188
|
-
# Save correlation data to the database
|
189
|
-
for (skill_value_from, skill_value_to), ppg_ratios in correlation_data.items():
|
190
|
-
ppg_ratios = Config.discard_outliers(np.array(ppg_ratios))
|
191
|
-
if len(ppg_ratios) > 0:
|
192
|
-
avg_ppg_ratio = float(sum(ppg_ratios) / len(ppg_ratios))
|
193
|
-
correlation = SkillPropagationCorrelation(
|
194
|
-
skill_value_from=skill_value_from,
|
195
|
-
skill_value_to=skill_value_to,
|
196
|
-
ppg_ratio=avg_ppg_ratio
|
197
|
-
)
|
198
|
-
session.add(correlation)
|
199
|
-
session.commit()
|
200
|
-
|
201
|
-
return
|
202
|
-
# Propagate skill levels
|
203
|
-
levels = session.query(Level).filter(Level.skill_propagation_sequence == propagation_sequence).all()
|
204
|
-
suggested_skill_values = defaultdict(list)
|
205
|
-
|
206
|
-
for level in levels:
|
207
|
-
edges = session.query(LevelsGraphEdge).filter(
|
208
|
-
(LevelsGraphEdge.from_level_id == level.id) |
|
209
|
-
(LevelsGraphEdge.to_level_id == level.id)
|
210
|
-
).all()
|
211
|
-
|
212
|
-
for edge in edges:
|
213
|
-
if edge.n_connections < Config.MIN_CONNECTIONS_FOR_PROPAGATION:
|
214
|
-
continue
|
215
|
-
|
216
|
-
if edge.from_level_id == level.id:
|
217
|
-
target_level_id = edge.to_level_id
|
218
|
-
ppg_ratio = edge.ppg_ratio
|
219
|
-
else:
|
220
|
-
target_level_id = edge.from_level_id
|
221
|
-
ppg_ratio = 1 / edge.ppg_ratio
|
222
|
-
|
223
|
-
target_level = session.query(Level).filter_by(id=target_level_id).first()
|
224
|
-
if target_level and target_level.skill_propagation_sequence == -1:
|
225
|
-
correlation = session.query(SkillPropagationCorrelation).filter_by(
|
226
|
-
skill_value_from=min(level.skill_value, target_level.skill_value),
|
227
|
-
skill_value_to=max(level.skill_value, target_level.skill_value),
|
228
|
-
ppg_ratio=ppg_ratio if level.skill_value < target_level.skill_value else 1 / ppg_ratio
|
229
|
-
).first()
|
230
|
-
|
231
|
-
if correlation:
|
232
|
-
suggested_skill_values[target_level_id].append(correlation.skill_value_to)
|
233
|
-
|
234
|
-
# Update skill values for target levels
|
235
|
-
for target_level_id, skill_values in suggested_skill_values.items():
|
236
|
-
skill_values = Config.discard_outliers(np.array(skill_values))
|
237
|
-
if len(skill_values) > 0:
|
238
|
-
avg_skill_value = float(sum(skill_values) / len(skill_values))
|
239
|
-
session.query(Level).filter_by(id=target_level_id).update({
|
240
|
-
'skill_value': avg_skill_value,
|
241
|
-
'skill_propagation_sequence': propagation_sequence + 1
|
242
|
-
})
|
243
|
-
session.commit()
|
244
|
-
|
245
|
-
print(f"Skill levels have been propagated for sequence {propagation_sequence}.")
|
246
|
-
|
247
|
-
if __name__ == "__main__":
|
248
|
-
build_levels_graph_edges()
|
249
|
-
|
250
|
-
for sequence in range(Config.MAX_PROPAGATION_SEQUENCE + 1):
|
251
|
-
propagate_skill_levels(sequence)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/utils.py
RENAMED
File without changes
|
{hockey_blast_common_lib-0.1.29 → hockey_blast_common_lib-0.1.32}/hockey_blast_common_lib/wsgi.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|