GameSentenceMiner 2.19.16__py3-none-any.whl → 2.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GameSentenceMiner might be problematic. Click here for more details.

Files changed (70) hide show
  1. GameSentenceMiner/__init__.py +39 -0
  2. GameSentenceMiner/anki.py +6 -3
  3. GameSentenceMiner/gametext.py +13 -2
  4. GameSentenceMiner/gsm.py +40 -3
  5. GameSentenceMiner/locales/en_us.json +4 -0
  6. GameSentenceMiner/locales/ja_jp.json +4 -0
  7. GameSentenceMiner/locales/zh_cn.json +4 -0
  8. GameSentenceMiner/obs.py +4 -1
  9. GameSentenceMiner/owocr/owocr/ocr.py +304 -134
  10. GameSentenceMiner/owocr/owocr/run.py +1 -1
  11. GameSentenceMiner/ui/anki_confirmation.py +4 -2
  12. GameSentenceMiner/ui/config_gui.py +12 -0
  13. GameSentenceMiner/util/configuration.py +6 -2
  14. GameSentenceMiner/util/cron/__init__.py +12 -0
  15. GameSentenceMiner/util/cron/daily_rollup.py +613 -0
  16. GameSentenceMiner/util/cron/jiten_update.py +397 -0
  17. GameSentenceMiner/util/cron/populate_games.py +154 -0
  18. GameSentenceMiner/util/cron/run_crons.py +148 -0
  19. GameSentenceMiner/util/cron/setup_populate_games_cron.py +118 -0
  20. GameSentenceMiner/util/cron_table.py +334 -0
  21. GameSentenceMiner/util/db.py +236 -49
  22. GameSentenceMiner/util/ffmpeg.py +23 -4
  23. GameSentenceMiner/util/games_table.py +340 -93
  24. GameSentenceMiner/util/jiten_api_client.py +188 -0
  25. GameSentenceMiner/util/stats_rollup_table.py +216 -0
  26. GameSentenceMiner/web/anki_api_endpoints.py +438 -220
  27. GameSentenceMiner/web/database_api.py +955 -1259
  28. GameSentenceMiner/web/jiten_database_api.py +1015 -0
  29. GameSentenceMiner/web/rollup_stats.py +672 -0
  30. GameSentenceMiner/web/static/css/dashboard-shared.css +75 -13
  31. GameSentenceMiner/web/static/css/overview.css +604 -47
  32. GameSentenceMiner/web/static/css/search.css +226 -0
  33. GameSentenceMiner/web/static/css/shared.css +762 -0
  34. GameSentenceMiner/web/static/css/stats.css +221 -0
  35. GameSentenceMiner/web/static/js/components/bar-chart.js +339 -0
  36. GameSentenceMiner/web/static/js/database-bulk-operations.js +320 -0
  37. GameSentenceMiner/web/static/js/database-game-data.js +390 -0
  38. GameSentenceMiner/web/static/js/database-game-operations.js +213 -0
  39. GameSentenceMiner/web/static/js/database-helpers.js +44 -0
  40. GameSentenceMiner/web/static/js/database-jiten-integration.js +750 -0
  41. GameSentenceMiner/web/static/js/database-popups.js +89 -0
  42. GameSentenceMiner/web/static/js/database-tabs.js +64 -0
  43. GameSentenceMiner/web/static/js/database-text-management.js +371 -0
  44. GameSentenceMiner/web/static/js/database.js +86 -718
  45. GameSentenceMiner/web/static/js/goals.js +79 -18
  46. GameSentenceMiner/web/static/js/heatmap.js +29 -23
  47. GameSentenceMiner/web/static/js/overview.js +1205 -339
  48. GameSentenceMiner/web/static/js/regex-patterns.js +100 -0
  49. GameSentenceMiner/web/static/js/search.js +215 -18
  50. GameSentenceMiner/web/static/js/shared.js +193 -39
  51. GameSentenceMiner/web/static/js/stats.js +1536 -179
  52. GameSentenceMiner/web/stats.py +1142 -269
  53. GameSentenceMiner/web/stats_api.py +2104 -0
  54. GameSentenceMiner/web/templates/anki_stats.html +4 -18
  55. GameSentenceMiner/web/templates/components/date-range.html +118 -3
  56. GameSentenceMiner/web/templates/components/html-head.html +40 -6
  57. GameSentenceMiner/web/templates/components/js-config.html +8 -8
  58. GameSentenceMiner/web/templates/components/regex-input.html +160 -0
  59. GameSentenceMiner/web/templates/database.html +564 -117
  60. GameSentenceMiner/web/templates/goals.html +41 -5
  61. GameSentenceMiner/web/templates/overview.html +159 -129
  62. GameSentenceMiner/web/templates/search.html +78 -9
  63. GameSentenceMiner/web/templates/stats.html +159 -5
  64. GameSentenceMiner/web/texthooking_page.py +280 -111
  65. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/METADATA +43 -2
  66. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/RECORD +70 -47
  67. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/WHEEL +0 -0
  68. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/entry_points.txt +0 -0
  69. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/licenses/LICENSE +0 -0
  70. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,672 @@
1
+ """
2
+ Rollup Statistics Module
3
+
4
+ This module handles all rollup-based statistics calculations for optimal performance.
5
+ It aggregates pre-calculated daily rollup data instead of processing individual lines.
6
+
7
+ Key Performance Strategy:
8
+ - Use StatsRollupTable for historical data (fast aggregation)
9
+ - Calculate only today's data live from GameLinesTable
10
+ - Combine rollup + live data for complete statistics
11
+ """
12
+
13
+ import datetime
14
+ import json
15
+ from collections import defaultdict
16
+ from typing import Dict, List, Optional
17
+
18
+ from GameSentenceMiner.util.stats_rollup_table import StatsRollupTable
19
+ from GameSentenceMiner.util.db import GameLinesTable
20
+ from GameSentenceMiner.util.configuration import logger
21
+
22
+
23
+ def aggregate_rollup_data(rollups: List) -> Dict:
24
+ """
25
+ Aggregate multiple daily rollup records into a single statistics object.
26
+
27
+ Args:
28
+ rollups: List of StatsRollupTable records
29
+
30
+ Returns:
31
+ Dictionary with aggregated statistics matching the stats API format
32
+ """
33
+ if not rollups:
34
+ return {
35
+ "total_lines": 0,
36
+ "total_characters": 0,
37
+ "total_sessions": 0,
38
+ "unique_games_played": 0,
39
+ "total_reading_time_seconds": 0.0,
40
+ "total_active_time_seconds": 0.0,
41
+ "average_reading_speed_chars_per_hour": 0.0,
42
+ "peak_reading_speed_chars_per_hour": 0.0,
43
+ "longest_session_seconds": 0.0,
44
+ "shortest_session_seconds": 0.0,
45
+ "average_session_seconds": 0.0,
46
+ "max_chars_in_session": 0,
47
+ "max_time_in_session_seconds": 0.0,
48
+ "games_completed": 0,
49
+ "games_started": 0,
50
+ "anki_cards_created": 0,
51
+ "lines_with_screenshots": 0,
52
+ "lines_with_audio": 0,
53
+ "lines_with_translations": 0,
54
+ "unique_kanji_seen": 0,
55
+ "kanji_frequency_data": {},
56
+ "hourly_activity_data": {},
57
+ "hourly_reading_speed_data": {},
58
+ "game_activity_data": {},
59
+ "games_played_ids": [],
60
+ }
61
+
62
+ # ADDITIVE fields - sum across all days
63
+ total_lines = sum(r.total_lines for r in rollups)
64
+ total_characters = sum(r.total_characters for r in rollups)
65
+ total_sessions = sum(r.total_sessions for r in rollups)
66
+ total_reading_time = sum(r.total_reading_time_seconds for r in rollups)
67
+ total_active_time = sum(r.total_active_time_seconds for r in rollups)
68
+ anki_cards_created = sum(r.anki_cards_created for r in rollups)
69
+ lines_with_screenshots = sum(r.lines_with_screenshots for r in rollups)
70
+ lines_with_audio = sum(r.lines_with_audio for r in rollups)
71
+ lines_with_translations = sum(r.lines_with_translations for r in rollups)
72
+ games_completed = sum(r.games_completed for r in rollups)
73
+
74
+ # MAXIMUM fields - take highest value across all days
75
+ peak_reading_speed = max(
76
+ (r.peak_reading_speed_chars_per_hour for r in rollups), default=0.0
77
+ )
78
+ longest_session = max((r.longest_session_seconds for r in rollups), default=0.0)
79
+ max_chars_in_session = max((r.max_chars_in_session for r in rollups), default=0)
80
+ max_time_in_session = max(
81
+ (r.max_time_in_session_seconds for r in rollups), default=0.0
82
+ )
83
+
84
+ # MINIMUM field - take smallest non-zero value
85
+ shortest_session_values = [
86
+ r.shortest_session_seconds for r in rollups if r.shortest_session_seconds > 0
87
+ ]
88
+ shortest_session = min(shortest_session_values) if shortest_session_values else 0.0
89
+
90
+ # WEIGHTED AVERAGE - average reading speed weighted by active time
91
+ if total_active_time > 0:
92
+ weighted_speed_sum = sum(
93
+ r.average_reading_speed_chars_per_hour * r.total_active_time_seconds
94
+ for r in rollups
95
+ if r.total_active_time_seconds > 0
96
+ )
97
+ avg_reading_speed = weighted_speed_sum / total_active_time
98
+ else:
99
+ avg_reading_speed = 0.0
100
+
101
+ # WEIGHTED AVERAGE - average session duration weighted by number of sessions
102
+ if total_sessions > 0:
103
+ weighted_session_sum = sum(
104
+ r.average_session_seconds * r.total_sessions
105
+ for r in rollups
106
+ if r.total_sessions > 0
107
+ )
108
+ avg_session_seconds = weighted_session_sum / total_sessions
109
+ else:
110
+ avg_session_seconds = 0.0
111
+
112
+ # MERGE - Combine game IDs (union)
113
+ all_games_played = set()
114
+ for rollup in rollups:
115
+ if rollup.games_played_ids:
116
+ try:
117
+ games_ids = (
118
+ json.loads(rollup.games_played_ids)
119
+ if isinstance(rollup.games_played_ids, str)
120
+ else rollup.games_played_ids
121
+ )
122
+ all_games_played.update(games_ids)
123
+ except (json.JSONDecodeError, TypeError):
124
+ logger.warning(
125
+ f"Failed to parse games_played_ids for rollup date {rollup.date}"
126
+ )
127
+
128
+ # MERGE - Combine game activity data (sum chars/time/lines per game)
129
+ combined_game_activity = {}
130
+ for rollup in rollups:
131
+ if rollup.game_activity_data:
132
+ try:
133
+ game_data = (
134
+ json.loads(rollup.game_activity_data)
135
+ if isinstance(rollup.game_activity_data, str)
136
+ else rollup.game_activity_data
137
+ )
138
+ for game_id, activity in game_data.items():
139
+ if game_id in combined_game_activity:
140
+ combined_game_activity[game_id]["chars"] += activity.get(
141
+ "chars", 0
142
+ )
143
+ combined_game_activity[game_id]["time"] += activity.get(
144
+ "time", 0
145
+ )
146
+ combined_game_activity[game_id]["lines"] += activity.get(
147
+ "lines", 0
148
+ )
149
+ else:
150
+ combined_game_activity[game_id] = {
151
+ "title": activity.get("title", f"Game {game_id}"),
152
+ "chars": activity.get("chars", 0),
153
+ "time": activity.get("time", 0),
154
+ "lines": activity.get("lines", 0),
155
+ }
156
+ except (json.JSONDecodeError, TypeError):
157
+ logger.warning(
158
+ f"Failed to parse game_activity_data for rollup date {rollup.date}"
159
+ )
160
+
161
+ # MERGE - Combine kanji frequency data (sum frequencies)
162
+ combined_kanji_frequency = {}
163
+ for rollup in rollups:
164
+ if rollup.kanji_frequency_data:
165
+ try:
166
+ kanji_data = (
167
+ json.loads(rollup.kanji_frequency_data)
168
+ if isinstance(rollup.kanji_frequency_data, str)
169
+ else rollup.kanji_frequency_data
170
+ )
171
+ for kanji, count in kanji_data.items():
172
+ combined_kanji_frequency[kanji] = (
173
+ combined_kanji_frequency.get(kanji, 0) + count
174
+ )
175
+ except (json.JSONDecodeError, TypeError):
176
+ logger.warning(
177
+ f"Failed to parse kanji_frequency_data for rollup date {rollup.date}"
178
+ )
179
+
180
+ # MERGE - Combine hourly activity data (sum characters per hour)
181
+ combined_hourly_activity = {}
182
+ for rollup in rollups:
183
+ if rollup.hourly_activity_data:
184
+ try:
185
+ hourly_data = (
186
+ json.loads(rollup.hourly_activity_data)
187
+ if isinstance(rollup.hourly_activity_data, str)
188
+ else rollup.hourly_activity_data
189
+ )
190
+ for hour, chars in hourly_data.items():
191
+ combined_hourly_activity[hour] = (
192
+ combined_hourly_activity.get(hour, 0) + chars
193
+ )
194
+ except (json.JSONDecodeError, TypeError):
195
+ logger.warning(
196
+ f"Failed to parse hourly_activity_data for rollup date {rollup.date}"
197
+ )
198
+
199
+ # MERGE - Combine hourly reading speeds (average across days for each hour)
200
+ hourly_speed_lists = defaultdict(list)
201
+ for rollup in rollups:
202
+ if rollup.hourly_reading_speed_data:
203
+ try:
204
+ speed_data = (
205
+ json.loads(rollup.hourly_reading_speed_data)
206
+ if isinstance(rollup.hourly_reading_speed_data, str)
207
+ else rollup.hourly_reading_speed_data
208
+ )
209
+ for hour, speed in speed_data.items():
210
+ if speed > 0:
211
+ hourly_speed_lists[hour].append(speed)
212
+ except (json.JSONDecodeError, TypeError):
213
+ logger.warning(
214
+ f"Failed to parse hourly_reading_speed_data for rollup date {rollup.date}"
215
+ )
216
+
217
+ # Average the speeds for each hour
218
+ combined_hourly_speeds = {}
219
+ for hour, speeds in hourly_speed_lists.items():
220
+ combined_hourly_speeds[hour] = sum(speeds) / len(speeds) if speeds else 0
221
+
222
+ return {
223
+ "total_lines": total_lines,
224
+ "total_characters": total_characters,
225
+ "total_sessions": total_sessions,
226
+ "unique_games_played": len(all_games_played),
227
+ "total_reading_time_seconds": total_reading_time,
228
+ "total_active_time_seconds": total_active_time,
229
+ "average_reading_speed_chars_per_hour": avg_reading_speed,
230
+ "peak_reading_speed_chars_per_hour": peak_reading_speed,
231
+ "longest_session_seconds": longest_session,
232
+ "shortest_session_seconds": shortest_session,
233
+ "average_session_seconds": avg_session_seconds,
234
+ "max_chars_in_session": max_chars_in_session,
235
+ "max_time_in_session_seconds": max_time_in_session,
236
+ "games_completed": games_completed,
237
+ "games_started": len(all_games_played),
238
+ "anki_cards_created": anki_cards_created,
239
+ "lines_with_screenshots": lines_with_screenshots,
240
+ "lines_with_audio": lines_with_audio,
241
+ "lines_with_translations": lines_with_translations,
242
+ "unique_kanji_seen": len(combined_kanji_frequency),
243
+ "kanji_frequency_data": combined_kanji_frequency,
244
+ "hourly_activity_data": combined_hourly_activity,
245
+ "hourly_reading_speed_data": combined_hourly_speeds,
246
+ "game_activity_data": combined_game_activity,
247
+ "games_played_ids": list(all_games_played),
248
+ }
249
+
250
+
251
+ def calculate_live_stats_for_today(today_lines: List) -> Dict:
252
+ """
253
+ Calculate live statistics for today using existing stats.py functions.
254
+
255
+ Args:
256
+ today_lines: List of GameLinesTable records for today
257
+
258
+ Returns:
259
+ Dictionary with today's statistics in rollup format
260
+ """
261
+ if not today_lines:
262
+ return aggregate_rollup_data([]) # Return empty stats
263
+
264
+ # Import here to avoid circular dependency
265
+ from GameSentenceMiner.util.cron.daily_rollup import (
266
+ analyze_sessions,
267
+ analyze_hourly_data,
268
+ analyze_game_activity,
269
+ analyze_kanji_data,
270
+ )
271
+
272
+ # Calculate basic stats
273
+ total_lines = len(today_lines)
274
+ total_characters = sum(
275
+ len(line.line_text) if line.line_text else 0 for line in today_lines
276
+ )
277
+
278
+ # Calculate Anki integration stats
279
+ lines_with_screenshots = sum(
280
+ 1
281
+ for line in today_lines
282
+ if line.screenshot_in_anki and line.screenshot_in_anki.strip()
283
+ )
284
+ lines_with_audio = sum(
285
+ 1 for line in today_lines if line.audio_in_anki and line.audio_in_anki.strip()
286
+ )
287
+ lines_with_translations = sum(
288
+ 1 for line in today_lines if line.translation and line.translation.strip()
289
+ )
290
+ anki_cards = sum(
291
+ 1
292
+ for line in today_lines
293
+ if (line.screenshot_in_anki and line.screenshot_in_anki.strip())
294
+ or (line.audio_in_anki and line.audio_in_anki.strip())
295
+ )
296
+
297
+ # Analyze sessions
298
+ session_stats = analyze_sessions(today_lines)
299
+
300
+ # Calculate reading speeds
301
+ total_time_seconds = session_stats["total_time"]
302
+ total_time_hours = total_time_seconds / 3600 if total_time_seconds > 0 else 0
303
+ average_speed = (
304
+ (total_characters / total_time_hours) if total_time_hours > 0 else 0.0
305
+ )
306
+
307
+ # Calculate peak speed (best hourly speed)
308
+ hourly_data = analyze_hourly_data(today_lines)
309
+ peak_speed = (
310
+ max(hourly_data["hourly_speeds"].values())
311
+ if hourly_data["hourly_speeds"]
312
+ else 0.0
313
+ )
314
+
315
+ # Analyze game activity
316
+ today_str = datetime.date.today().strftime("%Y-%m-%d")
317
+ game_activity = analyze_game_activity(today_lines, today_str)
318
+
319
+ # Analyze kanji
320
+ kanji_data = analyze_kanji_data(today_lines)
321
+
322
+ return {
323
+ "total_lines": total_lines,
324
+ "total_characters": total_characters,
325
+ "total_sessions": session_stats["count"],
326
+ "unique_games_played": len(game_activity["game_ids"]),
327
+ "total_reading_time_seconds": total_time_seconds,
328
+ "total_active_time_seconds": session_stats["active_time"],
329
+ "average_reading_speed_chars_per_hour": average_speed,
330
+ "peak_reading_speed_chars_per_hour": peak_speed,
331
+ "longest_session_seconds": session_stats["longest"],
332
+ "shortest_session_seconds": session_stats["shortest"],
333
+ "average_session_seconds": session_stats["average"],
334
+ "max_chars_in_session": session_stats["max_chars"],
335
+ "max_time_in_session_seconds": session_stats["max_time"],
336
+ "games_completed": game_activity["completed"],
337
+ "games_started": game_activity["started"],
338
+ "anki_cards_created": anki_cards,
339
+ "lines_with_screenshots": lines_with_screenshots,
340
+ "lines_with_audio": lines_with_audio,
341
+ "lines_with_translations": lines_with_translations,
342
+ "unique_kanji_seen": kanji_data["unique_count"],
343
+ "kanji_frequency_data": kanji_data["frequencies"],
344
+ "hourly_activity_data": hourly_data["hourly_activity"],
345
+ "hourly_reading_speed_data": hourly_data["hourly_speeds"],
346
+ "game_activity_data": game_activity["details"],
347
+ "games_played_ids": game_activity["game_ids"],
348
+ }
349
+
350
+
351
+ def combine_rollup_and_live_stats(rollup_stats: Dict, live_stats: Dict) -> Dict:
352
+ """
353
+ Combine rollup statistics with live statistics for today.
354
+
355
+ Args:
356
+ rollup_stats: Aggregated rollup statistics (can be None)
357
+ live_stats: Live calculated statistics for today (can be None)
358
+
359
+ Returns:
360
+ Combined statistics dictionary
361
+ """
362
+ if not rollup_stats and not live_stats:
363
+ return aggregate_rollup_data([]) # Return empty stats
364
+ elif not rollup_stats:
365
+ return live_stats
366
+ elif not live_stats:
367
+ return rollup_stats
368
+
369
+ # Combine both datasets
370
+ combined = {}
371
+
372
+ # ADDITIVE fields - sum rollup + live
373
+ additive_fields = [
374
+ "total_lines",
375
+ "total_characters",
376
+ "total_sessions",
377
+ "total_reading_time_seconds",
378
+ "total_active_time_seconds",
379
+ "games_completed",
380
+ "anki_cards_created",
381
+ "lines_with_screenshots",
382
+ "lines_with_audio",
383
+ "lines_with_translations",
384
+ ]
385
+
386
+ for field in additive_fields:
387
+ combined[field] = rollup_stats.get(field, 0) + live_stats.get(field, 0)
388
+
389
+ # MAXIMUM fields - take highest value
390
+ max_fields = [
391
+ "peak_reading_speed_chars_per_hour",
392
+ "longest_session_seconds",
393
+ "max_chars_in_session",
394
+ "max_time_in_session_seconds",
395
+ ]
396
+
397
+ for field in max_fields:
398
+ combined[field] = max(rollup_stats.get(field, 0), live_stats.get(field, 0))
399
+
400
+ # MINIMUM field - take smallest non-zero value
401
+ rollup_shortest = rollup_stats.get("shortest_session_seconds", 0)
402
+ live_shortest = live_stats.get("shortest_session_seconds", 0)
403
+ if rollup_shortest > 0 and live_shortest > 0:
404
+ combined["shortest_session_seconds"] = min(rollup_shortest, live_shortest)
405
+ elif rollup_shortest > 0:
406
+ combined["shortest_session_seconds"] = rollup_shortest
407
+ elif live_shortest > 0:
408
+ combined["shortest_session_seconds"] = live_shortest
409
+ else:
410
+ combined["shortest_session_seconds"] = 0.0
411
+
412
+ # WEIGHTED AVERAGE - average reading speed weighted by active time
413
+ rollup_time = rollup_stats.get("total_active_time_seconds", 0)
414
+ live_time = live_stats.get("total_active_time_seconds", 0)
415
+ total_time = rollup_time + live_time
416
+
417
+ if total_time > 0:
418
+ combined["average_reading_speed_chars_per_hour"] = (
419
+ rollup_stats.get("average_reading_speed_chars_per_hour", 0) * rollup_time
420
+ + live_stats.get("average_reading_speed_chars_per_hour", 0) * live_time
421
+ ) / total_time
422
+ else:
423
+ combined["average_reading_speed_chars_per_hour"] = 0.0
424
+
425
+ # WEIGHTED AVERAGE - average session duration weighted by session count
426
+ rollup_sessions = rollup_stats.get("total_sessions", 0)
427
+ live_sessions = live_stats.get("total_sessions", 0)
428
+ total_sessions = rollup_sessions + live_sessions
429
+
430
+ if total_sessions > 0:
431
+ combined["average_session_seconds"] = (
432
+ rollup_stats.get("average_session_seconds", 0) * rollup_sessions
433
+ + live_stats.get("average_session_seconds", 0) * live_sessions
434
+ ) / total_sessions
435
+ else:
436
+ combined["average_session_seconds"] = 0.0
437
+
438
+ # MERGE - Combine unique games (union)
439
+ rollup_games = set(rollup_stats.get("games_played_ids", []))
440
+ live_games = set(live_stats.get("games_played_ids", []))
441
+ all_games = rollup_games.union(live_games)
442
+ combined["unique_games_played"] = len(all_games)
443
+ combined["games_played_ids"] = list(all_games)
444
+ combined["games_started"] = len(all_games)
445
+
446
+ # MERGE - Combine kanji frequency data (sum frequencies)
447
+ rollup_kanji = rollup_stats.get("kanji_frequency_data", {})
448
+ live_kanji = live_stats.get("kanji_frequency_data", {})
449
+ combined_kanji = {}
450
+
451
+ for kanji, count in rollup_kanji.items():
452
+ combined_kanji[kanji] = count
453
+ for kanji, count in live_kanji.items():
454
+ combined_kanji[kanji] = combined_kanji.get(kanji, 0) + count
455
+
456
+ combined["kanji_frequency_data"] = combined_kanji
457
+ combined["unique_kanji_seen"] = len(combined_kanji)
458
+
459
+ # MERGE - Combine hourly activity data (sum characters per hour)
460
+ rollup_hourly = rollup_stats.get("hourly_activity_data", {})
461
+ live_hourly = live_stats.get("hourly_activity_data", {})
462
+ combined_hourly = {}
463
+
464
+ for hour in set(list(rollup_hourly.keys()) + list(live_hourly.keys())):
465
+ combined_hourly[hour] = rollup_hourly.get(hour, 0) + live_hourly.get(hour, 0)
466
+
467
+ combined["hourly_activity_data"] = combined_hourly
468
+
469
+ # MERGE - Combine hourly reading speed data (average)
470
+ rollup_speeds = rollup_stats.get("hourly_reading_speed_data", {})
471
+ live_speeds = live_stats.get("hourly_reading_speed_data", {})
472
+ combined_speeds = {}
473
+
474
+ for hour in set(list(rollup_speeds.keys()) + list(live_speeds.keys())):
475
+ speeds = []
476
+ if hour in rollup_speeds and rollup_speeds[hour] > 0:
477
+ speeds.append(rollup_speeds[hour])
478
+ if hour in live_speeds and live_speeds[hour] > 0:
479
+ speeds.append(live_speeds[hour])
480
+ combined_speeds[hour] = sum(speeds) / len(speeds) if speeds else 0
481
+
482
+ combined["hourly_reading_speed_data"] = combined_speeds
483
+
484
+ # MERGE - Combine game activity data (sum chars/time/lines per game)
485
+ rollup_games_activity = rollup_stats.get("game_activity_data", {})
486
+ live_games_activity = live_stats.get("game_activity_data", {})
487
+ combined_games_activity = {}
488
+
489
+ for game_id in set(
490
+ list(rollup_games_activity.keys()) + list(live_games_activity.keys())
491
+ ):
492
+ rollup_activity = rollup_games_activity.get(
493
+ game_id, {"chars": 0, "time": 0, "lines": 0}
494
+ )
495
+ live_activity = live_games_activity.get(
496
+ game_id, {"chars": 0, "time": 0, "lines": 0}
497
+ )
498
+
499
+ combined_games_activity[game_id] = {
500
+ "title": rollup_activity.get("title")
501
+ or live_activity.get("title", f"Game {game_id}"),
502
+ "chars": rollup_activity.get("chars", 0) + live_activity.get("chars", 0),
503
+ "time": rollup_activity.get("time", 0) + live_activity.get("time", 0),
504
+ "lines": rollup_activity.get("lines", 0) + live_activity.get("lines", 0),
505
+ }
506
+
507
+ combined["game_activity_data"] = combined_games_activity
508
+
509
+ return combined
510
+
511
+
512
+ def build_heatmap_from_rollup(rollups: List, filter_year: Optional[str] = None) -> Dict:
513
+ """
514
+ Build heatmap data from rollup records instead of individual lines.
515
+ Much faster than processing all lines.
516
+
517
+ Args:
518
+ rollups: List of StatsRollupTable records
519
+ filter_year: Optional year filter (e.g., "2024")
520
+
521
+ Returns:
522
+ Dictionary mapping year -> date -> character count
523
+ """
524
+ heatmap_data = defaultdict(lambda: defaultdict(int))
525
+
526
+ for rollup in rollups:
527
+ date_str = rollup.date # Already in YYYY-MM-DD format
528
+ year = date_str.split("-")[0]
529
+
530
+ # Filter by year if specified
531
+ if filter_year and year != filter_year:
532
+ continue
533
+
534
+ # Use total_characters from rollup
535
+ heatmap_data[year][date_str] = rollup.total_characters
536
+
537
+ return dict(heatmap_data)
538
+
539
+
540
+ def build_daily_chart_data_from_rollup(rollups: List) -> Dict:
541
+ """
542
+ Build daily chart data structure from rollup records.
543
+ Returns data organized by date and game for chart visualization.
544
+
545
+ Args:
546
+ rollups: List of StatsRollupTable records
547
+
548
+ Returns:
549
+ Dictionary with daily_data structure for charts
550
+ """
551
+ daily_data = defaultdict(lambda: defaultdict(lambda: {"lines": 0, "chars": 0}))
552
+
553
+ for rollup in rollups:
554
+ date_str = rollup.date
555
+ if rollup.game_activity_data:
556
+ try:
557
+ game_data = (
558
+ json.loads(rollup.game_activity_data)
559
+ if isinstance(rollup.game_activity_data, str)
560
+ else rollup.game_activity_data
561
+ )
562
+
563
+ for game_id, activity in game_data.items():
564
+ display_name = activity.get("title", f"Game {game_id}")
565
+ daily_data[date_str][display_name]["lines"] = activity.get(
566
+ "lines", 0
567
+ )
568
+ daily_data[date_str][display_name]["chars"] = activity.get(
569
+ "chars", 0
570
+ )
571
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
572
+ logger.warning(f"Error parsing rollup data for {date_str}: {e}")
573
+ continue
574
+
575
+ return daily_data
576
+
577
+
578
+ def calculate_day_of_week_averages_from_rollup(rollups: List) -> Dict:
579
+ """
580
+ Pre-compute day of week activity averages from rollup data.
581
+ This is much faster than calculating on every API request.
582
+
583
+ Args:
584
+ rollups: List of StatsRollupTable records
585
+
586
+ Returns:
587
+ Dictionary with day of week data including averages:
588
+ {
589
+ "chars": [Mon, Tue, Wed, Thu, Fri, Sat, Sun],
590
+ "hours": [Mon, Tue, Wed, Thu, Fri, Sat, Sun],
591
+ "counts": [Mon, Tue, Wed, Thu, Fri, Sat, Sun],
592
+ "avg_hours": [Mon, Tue, Wed, Thu, Fri, Sat, Sun]
593
+ }
594
+ """
595
+ day_of_week_data = {
596
+ "chars": [0] * 7,
597
+ "hours": [0] * 7,
598
+ "counts": [0] * 7,
599
+ "avg_hours": [0] * 7
600
+ }
601
+
602
+ for rollup in rollups:
603
+ try:
604
+ date_obj = datetime.datetime.strptime(rollup.date, "%Y-%m-%d")
605
+ day_of_week = date_obj.weekday() # 0=Monday, 6=Sunday
606
+ day_of_week_data["chars"][day_of_week] += rollup.total_characters
607
+ day_of_week_data["hours"][day_of_week] += rollup.total_reading_time_seconds / 3600
608
+ day_of_week_data["counts"][day_of_week] += 1
609
+ except (ValueError, AttributeError) as e:
610
+ logger.warning(f"Error parsing date for rollup {rollup.date}: {e}")
611
+ continue
612
+
613
+ # Calculate averages
614
+ for i in range(7):
615
+ if day_of_week_data["counts"][i] > 0:
616
+ day_of_week_data["avg_hours"][i] = round(
617
+ day_of_week_data["hours"][i] / day_of_week_data["counts"][i], 2
618
+ )
619
+
620
+ return day_of_week_data
621
+
622
+
623
+ def calculate_difficulty_speed_from_rollup(combined_stats: Dict) -> Dict:
624
+ """
625
+ Pre-compute reading speed by difficulty from rollup game activity data.
626
+ This avoids recalculating on every API request.
627
+
628
+ Args:
629
+ combined_stats: Combined rollup statistics with game_activity_data
630
+
631
+ Returns:
632
+ Dictionary with difficulty speed data:
633
+ {
634
+ "labels": ["Difficulty 1", "Difficulty 2", ...],
635
+ "speeds": [speed1, speed2, ...]
636
+ }
637
+ """
638
+ from GameSentenceMiner.util.games_table import GamesTable
639
+
640
+ difficulty_speed_data = {"labels": [], "speeds": []}
641
+
642
+ try:
643
+ # Get all games with difficulty ratings
644
+ all_games = GamesTable.all()
645
+ difficulty_groups = {} # difficulty -> {chars: total, time: total}
646
+
647
+ for game in all_games:
648
+ if game.difficulty is not None:
649
+ difficulty = game.difficulty
650
+ if difficulty not in difficulty_groups:
651
+ difficulty_groups[difficulty] = {"chars": 0, "time": 0}
652
+
653
+ # Get stats for this game from game_activity_data
654
+ game_activity = combined_stats.get("game_activity_data", {})
655
+ if game.id in game_activity:
656
+ activity = game_activity[game.id]
657
+ difficulty_groups[difficulty]["chars"] += activity.get("chars", 0)
658
+ difficulty_groups[difficulty]["time"] += activity.get("time", 0)
659
+
660
+ # Calculate average speed for each difficulty
661
+ for difficulty in sorted(difficulty_groups.keys()):
662
+ data = difficulty_groups[difficulty]
663
+ if data["time"] > 0 and data["chars"] > 0:
664
+ hours = data["time"] / 3600
665
+ speed = int(data["chars"] / hours)
666
+ difficulty_speed_data["labels"].append(f"Difficulty {difficulty}")
667
+ difficulty_speed_data["speeds"].append(speed)
668
+
669
+ except Exception as e:
670
+ logger.error(f"Error calculating difficulty speed from rollup: {e}")
671
+
672
+ return difficulty_speed_data