GameSentenceMiner 2.19.16__py3-none-any.whl → 2.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GameSentenceMiner might be problematic. Click here for more details.

Files changed (70) hide show
  1. GameSentenceMiner/__init__.py +39 -0
  2. GameSentenceMiner/anki.py +6 -3
  3. GameSentenceMiner/gametext.py +13 -2
  4. GameSentenceMiner/gsm.py +40 -3
  5. GameSentenceMiner/locales/en_us.json +4 -0
  6. GameSentenceMiner/locales/ja_jp.json +4 -0
  7. GameSentenceMiner/locales/zh_cn.json +4 -0
  8. GameSentenceMiner/obs.py +4 -1
  9. GameSentenceMiner/owocr/owocr/ocr.py +304 -134
  10. GameSentenceMiner/owocr/owocr/run.py +1 -1
  11. GameSentenceMiner/ui/anki_confirmation.py +4 -2
  12. GameSentenceMiner/ui/config_gui.py +12 -0
  13. GameSentenceMiner/util/configuration.py +6 -2
  14. GameSentenceMiner/util/cron/__init__.py +12 -0
  15. GameSentenceMiner/util/cron/daily_rollup.py +613 -0
  16. GameSentenceMiner/util/cron/jiten_update.py +397 -0
  17. GameSentenceMiner/util/cron/populate_games.py +154 -0
  18. GameSentenceMiner/util/cron/run_crons.py +148 -0
  19. GameSentenceMiner/util/cron/setup_populate_games_cron.py +118 -0
  20. GameSentenceMiner/util/cron_table.py +334 -0
  21. GameSentenceMiner/util/db.py +236 -49
  22. GameSentenceMiner/util/ffmpeg.py +23 -4
  23. GameSentenceMiner/util/games_table.py +340 -93
  24. GameSentenceMiner/util/jiten_api_client.py +188 -0
  25. GameSentenceMiner/util/stats_rollup_table.py +216 -0
  26. GameSentenceMiner/web/anki_api_endpoints.py +438 -220
  27. GameSentenceMiner/web/database_api.py +955 -1259
  28. GameSentenceMiner/web/jiten_database_api.py +1015 -0
  29. GameSentenceMiner/web/rollup_stats.py +672 -0
  30. GameSentenceMiner/web/static/css/dashboard-shared.css +75 -13
  31. GameSentenceMiner/web/static/css/overview.css +604 -47
  32. GameSentenceMiner/web/static/css/search.css +226 -0
  33. GameSentenceMiner/web/static/css/shared.css +762 -0
  34. GameSentenceMiner/web/static/css/stats.css +221 -0
  35. GameSentenceMiner/web/static/js/components/bar-chart.js +339 -0
  36. GameSentenceMiner/web/static/js/database-bulk-operations.js +320 -0
  37. GameSentenceMiner/web/static/js/database-game-data.js +390 -0
  38. GameSentenceMiner/web/static/js/database-game-operations.js +213 -0
  39. GameSentenceMiner/web/static/js/database-helpers.js +44 -0
  40. GameSentenceMiner/web/static/js/database-jiten-integration.js +750 -0
  41. GameSentenceMiner/web/static/js/database-popups.js +89 -0
  42. GameSentenceMiner/web/static/js/database-tabs.js +64 -0
  43. GameSentenceMiner/web/static/js/database-text-management.js +371 -0
  44. GameSentenceMiner/web/static/js/database.js +86 -718
  45. GameSentenceMiner/web/static/js/goals.js +79 -18
  46. GameSentenceMiner/web/static/js/heatmap.js +29 -23
  47. GameSentenceMiner/web/static/js/overview.js +1205 -339
  48. GameSentenceMiner/web/static/js/regex-patterns.js +100 -0
  49. GameSentenceMiner/web/static/js/search.js +215 -18
  50. GameSentenceMiner/web/static/js/shared.js +193 -39
  51. GameSentenceMiner/web/static/js/stats.js +1536 -179
  52. GameSentenceMiner/web/stats.py +1142 -269
  53. GameSentenceMiner/web/stats_api.py +2104 -0
  54. GameSentenceMiner/web/templates/anki_stats.html +4 -18
  55. GameSentenceMiner/web/templates/components/date-range.html +118 -3
  56. GameSentenceMiner/web/templates/components/html-head.html +40 -6
  57. GameSentenceMiner/web/templates/components/js-config.html +8 -8
  58. GameSentenceMiner/web/templates/components/regex-input.html +160 -0
  59. GameSentenceMiner/web/templates/database.html +564 -117
  60. GameSentenceMiner/web/templates/goals.html +41 -5
  61. GameSentenceMiner/web/templates/overview.html +159 -129
  62. GameSentenceMiner/web/templates/search.html +78 -9
  63. GameSentenceMiner/web/templates/stats.html +159 -5
  64. GameSentenceMiner/web/texthooking_page.py +280 -111
  65. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/METADATA +43 -2
  66. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/RECORD +70 -47
  67. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/WHEEL +0 -0
  68. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/entry_points.txt +0 -0
  69. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/licenses/LICENSE +0 -0
  70. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,613 @@
1
+ """
2
+ Daily Statistics Rollup Cron Job for GameSentenceMiner
3
+
4
+ This module provides a cron job that runs once a day to roll up all statistics
5
+ from all previous dates up to but not including today (so up to yesterday).
6
+
7
+ This is designed to be called by the cron system via run_crons.py.
8
+
9
+ Usage:
10
+ from GameSentenceMiner.util.cron.daily_rollup import run_daily_rollup
11
+
12
+ # Run the daily rollup
13
+ result = run_daily_rollup()
14
+ print(f"Processed {result['processed']} dates")
15
+ """
16
+
17
+ import time
18
+ from collections import defaultdict
19
+ from datetime import datetime, timedelta
20
+ from typing import Dict, List, Optional
21
+
22
+ from GameSentenceMiner.util.configuration import get_stats_config, logger
23
+ from GameSentenceMiner.util.db import GameLinesTable
24
+ from GameSentenceMiner.util.games_table import GamesTable
25
+ from GameSentenceMiner.util.stats_rollup_table import StatsRollupTable
26
+ from GameSentenceMiner.web.stats import (
27
+ calculate_actual_reading_time,
28
+ calculate_hourly_activity,
29
+ calculate_hourly_reading_speed,
30
+ calculate_kanji_frequency,
31
+ )
32
+
33
+
34
+ def get_first_data_date() -> Optional[str]:
35
+ """Get the first date where user has data in GSM."""
36
+ result = GameLinesTable._db.fetchone(
37
+ f"SELECT DATE(datetime(MIN(timestamp), 'unixepoch', 'localtime')) FROM {GameLinesTable._table}"
38
+ )
39
+ return result[0] if result and result[0] else None
40
+
41
+
42
+ def get_all_data_dates() -> List[str]:
43
+ """Get all dates that have data in GSM."""
44
+ rows = GameLinesTable._db.fetchall(
45
+ f"SELECT DISTINCT DATE(datetime(timestamp, 'unixepoch', 'localtime')) as date "
46
+ f"FROM {GameLinesTable._table} ORDER BY date"
47
+ )
48
+ return [row[0] for row in rows if row[0]]
49
+
50
+
51
+ def analyze_sessions(lines: List) -> Dict:
52
+ """
53
+ Analyze sessions from lines using session gap logic.
54
+
55
+ Args:
56
+ lines: List of GameLinesTable records
57
+
58
+ Returns:
59
+ Dictionary with session statistics
60
+ """
61
+ if not lines or len(lines) < 2:
62
+ return {
63
+ 'count': 1 if lines else 0,
64
+ 'total_time': 0.0,
65
+ 'active_time': 0.0,
66
+ 'longest': 0.0,
67
+ 'shortest': 0.0,
68
+ 'average': 0.0,
69
+ 'max_chars': sum(len(line.line_text) if line.line_text else 0 for line in lines),
70
+ 'max_time': 0.0
71
+ }
72
+
73
+ # Sort lines by timestamp
74
+ sorted_lines = sorted(lines, key=lambda line: float(line.timestamp))
75
+ session_gap = get_stats_config().session_gap_seconds
76
+
77
+ # Group lines into sessions
78
+ sessions = []
79
+ current_session = [sorted_lines[0]]
80
+
81
+ for line in sorted_lines[1:]:
82
+ time_gap = float(line.timestamp) - float(current_session[-1].timestamp)
83
+ if time_gap <= session_gap:
84
+ current_session.append(line)
85
+ else:
86
+ sessions.append(current_session)
87
+ current_session = [line]
88
+
89
+ # Don't forget the last session
90
+ if current_session:
91
+ sessions.append(current_session)
92
+
93
+ # Calculate session statistics
94
+ session_durations = []
95
+ session_char_counts = []
96
+ total_active_time = 0.0
97
+
98
+ for session in sessions:
99
+ if len(session) >= 2:
100
+ timestamps = [float(line.timestamp) for line in session]
101
+ duration = calculate_actual_reading_time(timestamps)
102
+ session_durations.append(duration)
103
+ total_active_time += duration
104
+ else:
105
+ session_durations.append(0.0)
106
+
107
+ chars = sum(len(line.line_text) if line.line_text else 0 for line in session)
108
+ session_char_counts.append(chars)
109
+
110
+ # Calculate total reading time (including gaps up to session_gap)
111
+ timestamps = [float(line.timestamp) for line in sorted_lines]
112
+ total_reading_time = calculate_actual_reading_time(timestamps)
113
+
114
+ return {
115
+ 'count': len(sessions),
116
+ 'total_time': total_reading_time,
117
+ 'active_time': total_active_time,
118
+ 'longest': max(session_durations) if session_durations else 0.0,
119
+ 'shortest': min(d for d in session_durations if d > 0) if any(d > 0 for d in session_durations) else 0.0,
120
+ 'average': sum(session_durations) / len(session_durations) if session_durations else 0.0,
121
+ 'max_chars': max(session_char_counts) if session_char_counts else 0,
122
+ 'max_time': max(session_durations) if session_durations else 0.0
123
+ }
124
+
125
+
126
+ def analyze_hourly_data(lines: List) -> Dict:
127
+ """
128
+ Analyze hourly activity and reading speed patterns.
129
+
130
+ Args:
131
+ lines: List of GameLinesTable records
132
+
133
+ Returns:
134
+ Dictionary with hourly activity and speed data
135
+ """
136
+ if not lines:
137
+ return {
138
+ 'hourly_activity': {},
139
+ 'hourly_speeds': {}
140
+ }
141
+
142
+ # Use existing functions from stats.py
143
+ hourly_chars = calculate_hourly_activity(lines)
144
+ hourly_speeds = calculate_hourly_reading_speed(lines)
145
+
146
+ # Convert to dictionaries (hour -> value)
147
+ hourly_activity_dict = {str(hour): chars for hour, chars in enumerate(hourly_chars) if chars > 0}
148
+ hourly_speed_dict = {str(hour): speed for hour, speed in enumerate(hourly_speeds) if speed > 0}
149
+
150
+ return {
151
+ 'hourly_activity': hourly_activity_dict,
152
+ 'hourly_speeds': hourly_speed_dict
153
+ }
154
+
155
+
156
+ def analyze_game_activity(lines: List, date_str: str) -> Dict:
157
+ """
158
+ Analyze per-game activity for the day.
159
+
160
+ Args:
161
+ lines: List of GameLinesTable records
162
+ date_str: Date in YYYY-MM-DD format
163
+
164
+ Returns:
165
+ Dictionary with game activity data
166
+ """
167
+ if not lines:
168
+ return {
169
+ 'completed': 0,
170
+ 'started': 0,
171
+ 'details': {},
172
+ 'game_ids': []
173
+ }
174
+
175
+ game_data = defaultdict(lambda: {'chars': 0, 'lines': 0, 'timestamps': [], 'game_name': None})
176
+ game_ids = set()
177
+
178
+ for line in lines:
179
+ if line.game_id and line.game_id.strip():
180
+ game_id = str(line.game_id)
181
+ game_ids.add(game_id)
182
+
183
+ chars = len(line.line_text) if line.line_text else 0
184
+ game_data[game_id]['chars'] += chars
185
+ game_data[game_id]['lines'] += 1
186
+ game_data[game_id]['timestamps'].append(float(line.timestamp))
187
+
188
+ # Store game_name as fallback for title lookup
189
+ if hasattr(line, 'game_name') and line.game_name and not game_data[game_id]['game_name']:
190
+ game_data[game_id]['game_name'] = line.game_name
191
+ else:
192
+ # DEBUG: Log lines without game_id
193
+ if hasattr(line, 'game_name') and line.game_name:
194
+ logger.debug(f"[ROLLUP_DEBUG] Line without game_id but has game_name: '{line.game_name}'")
195
+
196
+ # Calculate time spent per game and get game titles
197
+ game_details = {}
198
+ for game_id, data in game_data.items():
199
+ time_spent = calculate_actual_reading_time(data['timestamps']) if len(data['timestamps']) >= 2 else 0.0
200
+
201
+ # Title resolution with proper fallback chain:
202
+ # 1. games_table.title_original (best - linked game with metadata)
203
+ # 2. game_name (OBS scene name - good fallback)
204
+ # 3. Shortened UUID (last resort - better than "Unknown Game")
205
+ try:
206
+ game = GamesTable.get(game_id) # game_id is already a UUID string
207
+ if game and game.title_original:
208
+ # Best case: we have the game in the database with a proper title
209
+ title = game.title_original
210
+ logger.debug(f"[ROLLUP_TITLE] Using games_table title for {game_id[:8]}...: '{title}'")
211
+ elif data['game_name']:
212
+ # Good fallback: use OBS scene name
213
+ title = data['game_name']
214
+ logger.debug(f"[ROLLUP_TITLE] Using OBS scene name for {game_id[:8]}...: '{title}'")
215
+ else:
216
+ # Last resort: shortened UUID (better than "Unknown Game" for debugging)
217
+ title = f"Game {game_id[:8]}"
218
+ logger.warning(f"[ROLLUP_TITLE] No title or game_name for {game_id[:8]}..., using shortened UUID")
219
+ except Exception as e:
220
+ # Exception during lookup - use fallback chain
221
+ if data['game_name']:
222
+ title = data['game_name']
223
+ logger.info(f"[ROLLUP_TITLE] Exception during lookup, using game_name '{title}' for {game_id[:8]}...: {e}")
224
+ else:
225
+ title = f"Game {game_id[:8]}"
226
+ logger.warning(f"[ROLLUP_TITLE] Exception and no game_name for {game_id[:8]}..., using shortened UUID: {e}")
227
+
228
+ game_details[game_id] = {
229
+ 'title': title,
230
+ 'chars': data['chars'],
231
+ 'time': time_spent,
232
+ 'lines': data['lines']
233
+ }
234
+
235
+ # For basic version: games_started = unique games played, games_completed = 0
236
+ # (Can be enhanced later to track actual state changes)
237
+ return {
238
+ 'completed': 0, # Basic version: not tracking completion state changes
239
+ 'started': len(game_ids), # Basic version: count unique games played
240
+ 'details': game_details,
241
+ 'game_ids': list(game_ids)
242
+ }
243
+
244
+
245
+ def analyze_kanji_data(lines: List) -> Dict:
246
+ """
247
+ Analyze kanji frequency for the day.
248
+
249
+ Args:
250
+ lines: List of GameLinesTable records
251
+
252
+ Returns:
253
+ Dictionary with kanji frequency data
254
+ """
255
+ if not lines:
256
+ return {
257
+ 'unique_count': 0,
258
+ 'frequencies': {}
259
+ }
260
+
261
+ # Use existing function from stats.py
262
+ kanji_result = calculate_kanji_frequency(lines)
263
+
264
+ # Convert to simple frequency dictionary
265
+ frequencies = {}
266
+ for item in kanji_result.get('kanji_data', []):
267
+ frequencies[item['kanji']] = item['frequency']
268
+
269
+ return {
270
+ 'unique_count': kanji_result.get('unique_count', 0),
271
+ 'frequencies': frequencies
272
+ }
273
+
274
+
275
+ def calculate_daily_stats(date_str: str) -> Dict:
276
+ """
277
+ Calculate comprehensive daily statistics for a given date using existing functions.
278
+
279
+ Args:
280
+ date_str: Date in YYYY-MM-DD format
281
+
282
+ Returns:
283
+ Dictionary with all 27 fields for StatsRollupTable
284
+ """
285
+ logger.info(f"Calculating daily stats for {date_str}")
286
+
287
+ # Convert date to timestamp range
288
+ date_start = datetime.strptime(date_str, '%Y-%m-%d').timestamp()
289
+ date_end = date_start + 86400 # +24 hours
290
+
291
+ # Get all lines for this day
292
+ lines = GameLinesTable.get_lines_filtered_by_timestamp(date_start, date_end, for_stats=True)
293
+
294
+ if not lines:
295
+ logger.info(f"No lines found for {date_str}")
296
+ return {
297
+ 'date': date_str,
298
+ 'total_lines': 0,
299
+ 'total_characters': 0,
300
+ 'total_sessions': 0,
301
+ 'unique_games_played': 0,
302
+ 'total_reading_time_seconds': 0.0,
303
+ 'total_active_time_seconds': 0.0,
304
+ 'longest_session_seconds': 0.0,
305
+ 'shortest_session_seconds': 0.0,
306
+ 'average_session_seconds': 0.0,
307
+ 'average_reading_speed_chars_per_hour': 0.0,
308
+ 'peak_reading_speed_chars_per_hour': 0.0,
309
+ 'games_completed': 0,
310
+ 'games_started': 0,
311
+ 'anki_cards_created': 0,
312
+ 'lines_with_screenshots': 0,
313
+ 'lines_with_audio': 0,
314
+ 'lines_with_translations': 0,
315
+ 'unique_kanji_seen': 0,
316
+ 'kanji_frequency_data': '{}',
317
+ 'hourly_activity_data': '{}',
318
+ 'hourly_reading_speed_data': '{}',
319
+ 'game_activity_data': '{}',
320
+ 'games_played_ids': '[]',
321
+ 'max_chars_in_session': 0,
322
+ 'max_time_in_session_seconds': 0.0
323
+ }
324
+
325
+ logger.info(f"Processing {len(lines)} lines for {date_str}")
326
+
327
+ # Calculate basic stats
328
+ total_lines = len(lines)
329
+ total_characters = sum(len(line.line_text) if line.line_text else 0 for line in lines)
330
+
331
+ # Calculate Anki integration stats
332
+ lines_with_screenshots = sum(1 for line in lines if line.screenshot_in_anki and line.screenshot_in_anki.strip())
333
+ lines_with_audio = sum(1 for line in lines if line.audio_in_anki and line.audio_in_anki.strip())
334
+ lines_with_translations = sum(1 for line in lines if line.translation and line.translation.strip())
335
+ anki_cards = sum(1 for line in lines
336
+ if (line.screenshot_in_anki and line.screenshot_in_anki.strip()) or
337
+ (line.audio_in_anki and line.audio_in_anki.strip()))
338
+
339
+ # Analyze sessions
340
+ session_stats = analyze_sessions(lines)
341
+
342
+ # Calculate reading speeds
343
+ timestamps = [float(line.timestamp) for line in lines]
344
+ total_time_seconds = session_stats['total_time']
345
+ total_time_hours = total_time_seconds / 3600 if total_time_seconds > 0 else 0
346
+
347
+ average_speed = (total_characters / total_time_hours) if total_time_hours > 0 else 0.0
348
+
349
+ # Calculate peak speed (best hourly speed)
350
+ hourly_data = analyze_hourly_data(lines)
351
+ peak_speed = max(hourly_data['hourly_speeds'].values()) if hourly_data['hourly_speeds'] else 0.0
352
+
353
+ # Analyze game activity
354
+ game_activity = analyze_game_activity(lines, date_str)
355
+
356
+ # Analyze kanji
357
+ kanji_data = analyze_kanji_data(lines)
358
+
359
+ # Import json for serialization
360
+ import json
361
+
362
+ return {
363
+ 'date': date_str,
364
+ 'total_lines': total_lines,
365
+ 'total_characters': total_characters,
366
+ 'total_sessions': session_stats['count'],
367
+ 'unique_games_played': len(game_activity['game_ids']),
368
+ 'total_reading_time_seconds': total_time_seconds,
369
+ 'total_active_time_seconds': session_stats['active_time'],
370
+ 'longest_session_seconds': session_stats['longest'],
371
+ 'shortest_session_seconds': session_stats['shortest'],
372
+ 'average_session_seconds': session_stats['average'],
373
+ 'average_reading_speed_chars_per_hour': average_speed,
374
+ 'peak_reading_speed_chars_per_hour': peak_speed,
375
+ 'games_completed': game_activity['completed'],
376
+ 'games_started': game_activity['started'],
377
+ 'anki_cards_created': anki_cards,
378
+ 'lines_with_screenshots': lines_with_screenshots,
379
+ 'lines_with_audio': lines_with_audio,
380
+ 'lines_with_translations': lines_with_translations,
381
+ 'unique_kanji_seen': kanji_data['unique_count'],
382
+ 'kanji_frequency_data': json.dumps(kanji_data['frequencies'], ensure_ascii=False),
383
+ 'hourly_activity_data': json.dumps(hourly_data['hourly_activity']),
384
+ 'hourly_reading_speed_data': json.dumps(hourly_data['hourly_speeds']),
385
+ 'game_activity_data': json.dumps(game_activity['details'], ensure_ascii=False),
386
+ 'games_played_ids': json.dumps(game_activity['game_ids']),
387
+ 'max_chars_in_session': session_stats['max_chars'],
388
+ 'max_time_in_session_seconds': session_stats['max_time']
389
+ }
390
+
391
+
392
+ def run_daily_rollup() -> Dict:
393
+ """
394
+ Run the daily statistics rollup for all dates up to yesterday.
395
+
396
+ This function:
397
+ 1. Finds the first date where user has data in GSM
398
+ 2. Loops from that date to yesterday (current day minus one day)
399
+ 3. Checks if StatsRollupTable.date exists for each date
400
+ 4. Precomputes all data and inserts into table if missing
401
+
402
+ This is the main entry point for the daily rollup cron job.
403
+
404
+ Returns:
405
+ Dictionary with summary statistics
406
+ """
407
+ logger.info("Starting daily statistics rollup cron job")
408
+
409
+ start_time = time.time()
410
+
411
+ try:
412
+ # Get the first date where user has data
413
+ first_date = get_first_data_date()
414
+
415
+ if first_date is None:
416
+ logger.warning("No data found in GameLinesTable")
417
+ return {
418
+ 'success': True,
419
+ 'start_date': None,
420
+ 'end_date': None,
421
+ 'total_dates': 0,
422
+ 'processed': 0,
423
+ 'overwritten': 0,
424
+ 'errors': 0,
425
+ 'elapsed_time': time.time() - start_time,
426
+ 'error_message': None
427
+ }
428
+
429
+ # Calculate yesterday (current day minus one day)
430
+ yesterday = datetime.now() - timedelta(days=1)
431
+ end_date = yesterday.strftime('%Y-%m-%d')
432
+
433
+ logger.info(f"Date range: {first_date} to {end_date}")
434
+
435
+ # Get all dates that have actual data
436
+ all_data_dates = get_all_data_dates()
437
+ logger.debug(f"Found {len(all_data_dates)} dates with data in total")
438
+
439
+ # Filter to dates up to yesterday
440
+ start_dt = datetime.strptime(first_date, '%Y-%m-%d')
441
+ end_dt = datetime.strptime(end_date, '%Y-%m-%d')
442
+
443
+ dates_to_process = [
444
+ date for date in all_data_dates
445
+ if start_dt <= datetime.strptime(date, '%Y-%m-%d') <= end_dt
446
+ ]
447
+
448
+ total_dates = len(dates_to_process)
449
+ logger.info(f"Processing {total_dates} dates in range (up to yesterday)")
450
+
451
+ if total_dates == 0:
452
+ logger.info("No dates to process")
453
+ return {
454
+ 'success': True,
455
+ 'start_date': first_date,
456
+ 'end_date': end_date,
457
+ 'total_dates': 0,
458
+ 'processed': 0,
459
+ 'overwritten': 0,
460
+ 'errors': 0,
461
+ 'elapsed_time': time.time() - start_time,
462
+ 'error_message': None
463
+ }
464
+
465
+ # Process each date
466
+ processed = 0
467
+ overwritten = 0
468
+ errors = 0
469
+
470
+ for i, date_str in enumerate(dates_to_process, 1):
471
+ try:
472
+ # Always calculate fresh stats for the date
473
+ logger.info(f"Processing {i}/{total_dates}: {date_str}")
474
+ stats = calculate_daily_stats(date_str)
475
+
476
+ # Check if rollup already exists
477
+ existing = StatsRollupTable.get_by_date(date_str)
478
+
479
+ if existing:
480
+ # Update all fields in existing rollup
481
+ existing.date = stats['date']
482
+ existing.total_lines = stats['total_lines']
483
+ existing.total_characters = stats['total_characters']
484
+ existing.total_sessions = stats['total_sessions']
485
+ existing.unique_games_played = stats['unique_games_played']
486
+ existing.total_reading_time_seconds = stats['total_reading_time_seconds']
487
+ existing.total_active_time_seconds = stats['total_active_time_seconds']
488
+ existing.longest_session_seconds = stats['longest_session_seconds']
489
+ existing.shortest_session_seconds = stats['shortest_session_seconds']
490
+ existing.average_session_seconds = stats['average_session_seconds']
491
+ existing.average_reading_speed_chars_per_hour = stats['average_reading_speed_chars_per_hour']
492
+ existing.peak_reading_speed_chars_per_hour = stats['peak_reading_speed_chars_per_hour']
493
+ existing.games_completed = stats['games_completed']
494
+ existing.games_started = stats['games_started']
495
+ existing.anki_cards_created = stats['anki_cards_created']
496
+ existing.lines_with_screenshots = stats['lines_with_screenshots']
497
+ existing.lines_with_audio = stats['lines_with_audio']
498
+ existing.lines_with_translations = stats['lines_with_translations']
499
+ existing.unique_kanji_seen = stats['unique_kanji_seen']
500
+ existing.kanji_frequency_data = stats['kanji_frequency_data']
501
+ existing.hourly_activity_data = stats['hourly_activity_data']
502
+ existing.hourly_reading_speed_data = stats['hourly_reading_speed_data']
503
+ existing.game_activity_data = stats['game_activity_data']
504
+ existing.games_played_ids = stats['games_played_ids']
505
+ existing.max_chars_in_session = stats['max_chars_in_session']
506
+ existing.max_time_in_session_seconds = stats['max_time_in_session_seconds']
507
+ existing.updated_at = time.time()
508
+ existing.save()
509
+
510
+ overwritten += 1
511
+ logger.debug(f"Overwritten rollup for {date_str}")
512
+ else:
513
+ # Create and save new rollup entry with all 27 fields
514
+ rollup = StatsRollupTable(
515
+ date=stats['date'],
516
+ total_lines=stats['total_lines'],
517
+ total_characters=stats['total_characters'],
518
+ total_sessions=stats['total_sessions'],
519
+ unique_games_played=stats['unique_games_played'],
520
+ total_reading_time_seconds=stats['total_reading_time_seconds'],
521
+ total_active_time_seconds=stats['total_active_time_seconds'],
522
+ longest_session_seconds=stats['longest_session_seconds'],
523
+ shortest_session_seconds=stats['shortest_session_seconds'],
524
+ average_session_seconds=stats['average_session_seconds'],
525
+ average_reading_speed_chars_per_hour=stats['average_reading_speed_chars_per_hour'],
526
+ peak_reading_speed_chars_per_hour=stats['peak_reading_speed_chars_per_hour'],
527
+ games_completed=stats['games_completed'],
528
+ games_started=stats['games_started'],
529
+ anki_cards_created=stats['anki_cards_created'],
530
+ lines_with_screenshots=stats['lines_with_screenshots'],
531
+ lines_with_audio=stats['lines_with_audio'],
532
+ lines_with_translations=stats['lines_with_translations'],
533
+ unique_kanji_seen=stats['unique_kanji_seen'],
534
+ kanji_frequency_data=stats['kanji_frequency_data'],
535
+ hourly_activity_data=stats['hourly_activity_data'],
536
+ hourly_reading_speed_data=stats['hourly_reading_speed_data'],
537
+ game_activity_data=stats['game_activity_data'],
538
+ games_played_ids=stats['games_played_ids'],
539
+ max_chars_in_session=stats['max_chars_in_session'],
540
+ max_time_in_session_seconds=stats['max_time_in_session_seconds'],
541
+ created_at=time.time(),
542
+ updated_at=time.time()
543
+ )
544
+ rollup.save()
545
+
546
+ processed += 1
547
+ logger.debug(f"Created rollup for {date_str}")
548
+
549
+ # Progress update every 10 dates
550
+ if processed % 10 == 0:
551
+ logger.info(f"Progress: {processed}/{total_dates} dates processed")
552
+
553
+ except Exception as e:
554
+ logger.error(f"Error processing {date_str}: {e}", exc_info=True)
555
+ errors += 1
556
+ continue
557
+
558
+ elapsed_time = time.time() - start_time
559
+
560
+ # Log summary
561
+ logger.info("Daily rollup cron job completed")
562
+ logger.info(f"Date range: {first_date} to {end_date}, Total dates: {total_dates}, Processed: {processed}, Overwritten: {overwritten}, Errors: {errors}, Time: {elapsed_time:.2f}s")
563
+
564
+ return {
565
+ 'success': True,
566
+ 'start_date': first_date,
567
+ 'end_date': end_date,
568
+ 'total_dates': total_dates,
569
+ 'processed': processed,
570
+ 'overwritten': overwritten,
571
+ 'errors': errors,
572
+ 'elapsed_time': elapsed_time,
573
+ 'error_message': None
574
+ }
575
+
576
+ except Exception as e:
577
+ elapsed_time = time.time() - start_time
578
+ error_msg = str(e)
579
+ logger.error(f"Fatal error in daily rollup cron job: {error_msg}", exc_info=True)
580
+
581
+ return {
582
+ 'success': False,
583
+ 'start_date': None,
584
+ 'end_date': None,
585
+ 'total_dates': 0,
586
+ 'processed': 0,
587
+ 'overwritten': 0,
588
+ 'errors': 1,
589
+ 'elapsed_time': elapsed_time,
590
+ 'error_message': error_msg
591
+ }
592
+
593
+
594
+ # Example usage for testing
595
+ if __name__ == '__main__':
596
+ # Run the daily rollup
597
+ result = run_daily_rollup()
598
+
599
+ # Print summary
600
+ print("\n" + "=" * 80)
601
+ print("DAILY ROLLUP SUMMARY")
602
+ print("=" * 80)
603
+ print(f"Success: {'Yes' if result['success'] else 'No'}")
604
+ if result['start_date']:
605
+ print(f"Date range: {result['start_date']} to {result['end_date']}")
606
+ print(f"Total dates with data: {result['total_dates']}")
607
+ print(f"Successfully processed: {result['processed']}")
608
+ print(f"Overwritten: {result['overwritten']}")
609
+ print(f"Errors: {result['errors']}")
610
+ print(f"Time elapsed: {result['elapsed_time']:.2f} seconds")
611
+ if result['error_message']:
612
+ print(f"Error: {result['error_message']}")
613
+ print("=" * 80)