GameSentenceMiner 2.19.16__py3-none-any.whl → 2.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of GameSentenceMiner might be problematic. Click here for more details.
- GameSentenceMiner/__init__.py +39 -0
- GameSentenceMiner/anki.py +6 -3
- GameSentenceMiner/gametext.py +13 -2
- GameSentenceMiner/gsm.py +40 -3
- GameSentenceMiner/locales/en_us.json +4 -0
- GameSentenceMiner/locales/ja_jp.json +4 -0
- GameSentenceMiner/locales/zh_cn.json +4 -0
- GameSentenceMiner/obs.py +4 -1
- GameSentenceMiner/owocr/owocr/ocr.py +304 -134
- GameSentenceMiner/owocr/owocr/run.py +1 -1
- GameSentenceMiner/ui/anki_confirmation.py +4 -2
- GameSentenceMiner/ui/config_gui.py +12 -0
- GameSentenceMiner/util/configuration.py +6 -2
- GameSentenceMiner/util/cron/__init__.py +12 -0
- GameSentenceMiner/util/cron/daily_rollup.py +613 -0
- GameSentenceMiner/util/cron/jiten_update.py +397 -0
- GameSentenceMiner/util/cron/populate_games.py +154 -0
- GameSentenceMiner/util/cron/run_crons.py +148 -0
- GameSentenceMiner/util/cron/setup_populate_games_cron.py +118 -0
- GameSentenceMiner/util/cron_table.py +334 -0
- GameSentenceMiner/util/db.py +236 -49
- GameSentenceMiner/util/ffmpeg.py +23 -4
- GameSentenceMiner/util/games_table.py +340 -93
- GameSentenceMiner/util/jiten_api_client.py +188 -0
- GameSentenceMiner/util/stats_rollup_table.py +216 -0
- GameSentenceMiner/web/anki_api_endpoints.py +438 -220
- GameSentenceMiner/web/database_api.py +955 -1259
- GameSentenceMiner/web/jiten_database_api.py +1015 -0
- GameSentenceMiner/web/rollup_stats.py +672 -0
- GameSentenceMiner/web/static/css/dashboard-shared.css +75 -13
- GameSentenceMiner/web/static/css/overview.css +604 -47
- GameSentenceMiner/web/static/css/search.css +226 -0
- GameSentenceMiner/web/static/css/shared.css +762 -0
- GameSentenceMiner/web/static/css/stats.css +221 -0
- GameSentenceMiner/web/static/js/components/bar-chart.js +339 -0
- GameSentenceMiner/web/static/js/database-bulk-operations.js +320 -0
- GameSentenceMiner/web/static/js/database-game-data.js +390 -0
- GameSentenceMiner/web/static/js/database-game-operations.js +213 -0
- GameSentenceMiner/web/static/js/database-helpers.js +44 -0
- GameSentenceMiner/web/static/js/database-jiten-integration.js +750 -0
- GameSentenceMiner/web/static/js/database-popups.js +89 -0
- GameSentenceMiner/web/static/js/database-tabs.js +64 -0
- GameSentenceMiner/web/static/js/database-text-management.js +371 -0
- GameSentenceMiner/web/static/js/database.js +86 -718
- GameSentenceMiner/web/static/js/goals.js +79 -18
- GameSentenceMiner/web/static/js/heatmap.js +29 -23
- GameSentenceMiner/web/static/js/overview.js +1205 -339
- GameSentenceMiner/web/static/js/regex-patterns.js +100 -0
- GameSentenceMiner/web/static/js/search.js +215 -18
- GameSentenceMiner/web/static/js/shared.js +193 -39
- GameSentenceMiner/web/static/js/stats.js +1536 -179
- GameSentenceMiner/web/stats.py +1142 -269
- GameSentenceMiner/web/stats_api.py +2104 -0
- GameSentenceMiner/web/templates/anki_stats.html +4 -18
- GameSentenceMiner/web/templates/components/date-range.html +118 -3
- GameSentenceMiner/web/templates/components/html-head.html +40 -6
- GameSentenceMiner/web/templates/components/js-config.html +8 -8
- GameSentenceMiner/web/templates/components/regex-input.html +160 -0
- GameSentenceMiner/web/templates/database.html +564 -117
- GameSentenceMiner/web/templates/goals.html +41 -5
- GameSentenceMiner/web/templates/overview.html +159 -129
- GameSentenceMiner/web/templates/search.html +78 -9
- GameSentenceMiner/web/templates/stats.html +159 -5
- GameSentenceMiner/web/texthooking_page.py +280 -111
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/METADATA +43 -2
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/RECORD +70 -47
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Daily Statistics Rollup Cron Job for GameSentenceMiner
|
|
3
|
+
|
|
4
|
+
This module provides a cron job that runs once a day to roll up all statistics
|
|
5
|
+
from all previous dates up to but not including today (so up to yesterday).
|
|
6
|
+
|
|
7
|
+
This is designed to be called by the cron system via run_crons.py.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from GameSentenceMiner.util.cron.daily_rollup import run_daily_rollup
|
|
11
|
+
|
|
12
|
+
# Run the daily rollup
|
|
13
|
+
result = run_daily_rollup()
|
|
14
|
+
print(f"Processed {result['processed']} dates")
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import time
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
from datetime import datetime, timedelta
|
|
20
|
+
from typing import Dict, List, Optional
|
|
21
|
+
|
|
22
|
+
from GameSentenceMiner.util.configuration import get_stats_config, logger
|
|
23
|
+
from GameSentenceMiner.util.db import GameLinesTable
|
|
24
|
+
from GameSentenceMiner.util.games_table import GamesTable
|
|
25
|
+
from GameSentenceMiner.util.stats_rollup_table import StatsRollupTable
|
|
26
|
+
from GameSentenceMiner.web.stats import (
|
|
27
|
+
calculate_actual_reading_time,
|
|
28
|
+
calculate_hourly_activity,
|
|
29
|
+
calculate_hourly_reading_speed,
|
|
30
|
+
calculate_kanji_frequency,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_first_data_date() -> Optional[str]:
|
|
35
|
+
"""Get the first date where user has data in GSM."""
|
|
36
|
+
result = GameLinesTable._db.fetchone(
|
|
37
|
+
f"SELECT DATE(datetime(MIN(timestamp), 'unixepoch', 'localtime')) FROM {GameLinesTable._table}"
|
|
38
|
+
)
|
|
39
|
+
return result[0] if result and result[0] else None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_all_data_dates() -> List[str]:
|
|
43
|
+
"""Get all dates that have data in GSM."""
|
|
44
|
+
rows = GameLinesTable._db.fetchall(
|
|
45
|
+
f"SELECT DISTINCT DATE(datetime(timestamp, 'unixepoch', 'localtime')) as date "
|
|
46
|
+
f"FROM {GameLinesTable._table} ORDER BY date"
|
|
47
|
+
)
|
|
48
|
+
return [row[0] for row in rows if row[0]]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def analyze_sessions(lines: List) -> Dict:
|
|
52
|
+
"""
|
|
53
|
+
Analyze sessions from lines using session gap logic.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
lines: List of GameLinesTable records
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Dictionary with session statistics
|
|
60
|
+
"""
|
|
61
|
+
if not lines or len(lines) < 2:
|
|
62
|
+
return {
|
|
63
|
+
'count': 1 if lines else 0,
|
|
64
|
+
'total_time': 0.0,
|
|
65
|
+
'active_time': 0.0,
|
|
66
|
+
'longest': 0.0,
|
|
67
|
+
'shortest': 0.0,
|
|
68
|
+
'average': 0.0,
|
|
69
|
+
'max_chars': sum(len(line.line_text) if line.line_text else 0 for line in lines),
|
|
70
|
+
'max_time': 0.0
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
# Sort lines by timestamp
|
|
74
|
+
sorted_lines = sorted(lines, key=lambda line: float(line.timestamp))
|
|
75
|
+
session_gap = get_stats_config().session_gap_seconds
|
|
76
|
+
|
|
77
|
+
# Group lines into sessions
|
|
78
|
+
sessions = []
|
|
79
|
+
current_session = [sorted_lines[0]]
|
|
80
|
+
|
|
81
|
+
for line in sorted_lines[1:]:
|
|
82
|
+
time_gap = float(line.timestamp) - float(current_session[-1].timestamp)
|
|
83
|
+
if time_gap <= session_gap:
|
|
84
|
+
current_session.append(line)
|
|
85
|
+
else:
|
|
86
|
+
sessions.append(current_session)
|
|
87
|
+
current_session = [line]
|
|
88
|
+
|
|
89
|
+
# Don't forget the last session
|
|
90
|
+
if current_session:
|
|
91
|
+
sessions.append(current_session)
|
|
92
|
+
|
|
93
|
+
# Calculate session statistics
|
|
94
|
+
session_durations = []
|
|
95
|
+
session_char_counts = []
|
|
96
|
+
total_active_time = 0.0
|
|
97
|
+
|
|
98
|
+
for session in sessions:
|
|
99
|
+
if len(session) >= 2:
|
|
100
|
+
timestamps = [float(line.timestamp) for line in session]
|
|
101
|
+
duration = calculate_actual_reading_time(timestamps)
|
|
102
|
+
session_durations.append(duration)
|
|
103
|
+
total_active_time += duration
|
|
104
|
+
else:
|
|
105
|
+
session_durations.append(0.0)
|
|
106
|
+
|
|
107
|
+
chars = sum(len(line.line_text) if line.line_text else 0 for line in session)
|
|
108
|
+
session_char_counts.append(chars)
|
|
109
|
+
|
|
110
|
+
# Calculate total reading time (including gaps up to session_gap)
|
|
111
|
+
timestamps = [float(line.timestamp) for line in sorted_lines]
|
|
112
|
+
total_reading_time = calculate_actual_reading_time(timestamps)
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
'count': len(sessions),
|
|
116
|
+
'total_time': total_reading_time,
|
|
117
|
+
'active_time': total_active_time,
|
|
118
|
+
'longest': max(session_durations) if session_durations else 0.0,
|
|
119
|
+
'shortest': min(d for d in session_durations if d > 0) if any(d > 0 for d in session_durations) else 0.0,
|
|
120
|
+
'average': sum(session_durations) / len(session_durations) if session_durations else 0.0,
|
|
121
|
+
'max_chars': max(session_char_counts) if session_char_counts else 0,
|
|
122
|
+
'max_time': max(session_durations) if session_durations else 0.0
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def analyze_hourly_data(lines: List) -> Dict:
|
|
127
|
+
"""
|
|
128
|
+
Analyze hourly activity and reading speed patterns.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
lines: List of GameLinesTable records
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Dictionary with hourly activity and speed data
|
|
135
|
+
"""
|
|
136
|
+
if not lines:
|
|
137
|
+
return {
|
|
138
|
+
'hourly_activity': {},
|
|
139
|
+
'hourly_speeds': {}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# Use existing functions from stats.py
|
|
143
|
+
hourly_chars = calculate_hourly_activity(lines)
|
|
144
|
+
hourly_speeds = calculate_hourly_reading_speed(lines)
|
|
145
|
+
|
|
146
|
+
# Convert to dictionaries (hour -> value)
|
|
147
|
+
hourly_activity_dict = {str(hour): chars for hour, chars in enumerate(hourly_chars) if chars > 0}
|
|
148
|
+
hourly_speed_dict = {str(hour): speed for hour, speed in enumerate(hourly_speeds) if speed > 0}
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
'hourly_activity': hourly_activity_dict,
|
|
152
|
+
'hourly_speeds': hourly_speed_dict
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def analyze_game_activity(lines: List, date_str: str) -> Dict:
|
|
157
|
+
"""
|
|
158
|
+
Analyze per-game activity for the day.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
lines: List of GameLinesTable records
|
|
162
|
+
date_str: Date in YYYY-MM-DD format
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Dictionary with game activity data
|
|
166
|
+
"""
|
|
167
|
+
if not lines:
|
|
168
|
+
return {
|
|
169
|
+
'completed': 0,
|
|
170
|
+
'started': 0,
|
|
171
|
+
'details': {},
|
|
172
|
+
'game_ids': []
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
game_data = defaultdict(lambda: {'chars': 0, 'lines': 0, 'timestamps': [], 'game_name': None})
|
|
176
|
+
game_ids = set()
|
|
177
|
+
|
|
178
|
+
for line in lines:
|
|
179
|
+
if line.game_id and line.game_id.strip():
|
|
180
|
+
game_id = str(line.game_id)
|
|
181
|
+
game_ids.add(game_id)
|
|
182
|
+
|
|
183
|
+
chars = len(line.line_text) if line.line_text else 0
|
|
184
|
+
game_data[game_id]['chars'] += chars
|
|
185
|
+
game_data[game_id]['lines'] += 1
|
|
186
|
+
game_data[game_id]['timestamps'].append(float(line.timestamp))
|
|
187
|
+
|
|
188
|
+
# Store game_name as fallback for title lookup
|
|
189
|
+
if hasattr(line, 'game_name') and line.game_name and not game_data[game_id]['game_name']:
|
|
190
|
+
game_data[game_id]['game_name'] = line.game_name
|
|
191
|
+
else:
|
|
192
|
+
# DEBUG: Log lines without game_id
|
|
193
|
+
if hasattr(line, 'game_name') and line.game_name:
|
|
194
|
+
logger.debug(f"[ROLLUP_DEBUG] Line without game_id but has game_name: '{line.game_name}'")
|
|
195
|
+
|
|
196
|
+
# Calculate time spent per game and get game titles
|
|
197
|
+
game_details = {}
|
|
198
|
+
for game_id, data in game_data.items():
|
|
199
|
+
time_spent = calculate_actual_reading_time(data['timestamps']) if len(data['timestamps']) >= 2 else 0.0
|
|
200
|
+
|
|
201
|
+
# Title resolution with proper fallback chain:
|
|
202
|
+
# 1. games_table.title_original (best - linked game with metadata)
|
|
203
|
+
# 2. game_name (OBS scene name - good fallback)
|
|
204
|
+
# 3. Shortened UUID (last resort - better than "Unknown Game")
|
|
205
|
+
try:
|
|
206
|
+
game = GamesTable.get(game_id) # game_id is already a UUID string
|
|
207
|
+
if game and game.title_original:
|
|
208
|
+
# Best case: we have the game in the database with a proper title
|
|
209
|
+
title = game.title_original
|
|
210
|
+
logger.debug(f"[ROLLUP_TITLE] Using games_table title for {game_id[:8]}...: '{title}'")
|
|
211
|
+
elif data['game_name']:
|
|
212
|
+
# Good fallback: use OBS scene name
|
|
213
|
+
title = data['game_name']
|
|
214
|
+
logger.debug(f"[ROLLUP_TITLE] Using OBS scene name for {game_id[:8]}...: '{title}'")
|
|
215
|
+
else:
|
|
216
|
+
# Last resort: shortened UUID (better than "Unknown Game" for debugging)
|
|
217
|
+
title = f"Game {game_id[:8]}"
|
|
218
|
+
logger.warning(f"[ROLLUP_TITLE] No title or game_name for {game_id[:8]}..., using shortened UUID")
|
|
219
|
+
except Exception as e:
|
|
220
|
+
# Exception during lookup - use fallback chain
|
|
221
|
+
if data['game_name']:
|
|
222
|
+
title = data['game_name']
|
|
223
|
+
logger.info(f"[ROLLUP_TITLE] Exception during lookup, using game_name '{title}' for {game_id[:8]}...: {e}")
|
|
224
|
+
else:
|
|
225
|
+
title = f"Game {game_id[:8]}"
|
|
226
|
+
logger.warning(f"[ROLLUP_TITLE] Exception and no game_name for {game_id[:8]}..., using shortened UUID: {e}")
|
|
227
|
+
|
|
228
|
+
game_details[game_id] = {
|
|
229
|
+
'title': title,
|
|
230
|
+
'chars': data['chars'],
|
|
231
|
+
'time': time_spent,
|
|
232
|
+
'lines': data['lines']
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
# For basic version: games_started = unique games played, games_completed = 0
|
|
236
|
+
# (Can be enhanced later to track actual state changes)
|
|
237
|
+
return {
|
|
238
|
+
'completed': 0, # Basic version: not tracking completion state changes
|
|
239
|
+
'started': len(game_ids), # Basic version: count unique games played
|
|
240
|
+
'details': game_details,
|
|
241
|
+
'game_ids': list(game_ids)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def analyze_kanji_data(lines: List) -> Dict:
|
|
246
|
+
"""
|
|
247
|
+
Analyze kanji frequency for the day.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
lines: List of GameLinesTable records
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Dictionary with kanji frequency data
|
|
254
|
+
"""
|
|
255
|
+
if not lines:
|
|
256
|
+
return {
|
|
257
|
+
'unique_count': 0,
|
|
258
|
+
'frequencies': {}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
# Use existing function from stats.py
|
|
262
|
+
kanji_result = calculate_kanji_frequency(lines)
|
|
263
|
+
|
|
264
|
+
# Convert to simple frequency dictionary
|
|
265
|
+
frequencies = {}
|
|
266
|
+
for item in kanji_result.get('kanji_data', []):
|
|
267
|
+
frequencies[item['kanji']] = item['frequency']
|
|
268
|
+
|
|
269
|
+
return {
|
|
270
|
+
'unique_count': kanji_result.get('unique_count', 0),
|
|
271
|
+
'frequencies': frequencies
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def calculate_daily_stats(date_str: str) -> Dict:
|
|
276
|
+
"""
|
|
277
|
+
Calculate comprehensive daily statistics for a given date using existing functions.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
date_str: Date in YYYY-MM-DD format
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Dictionary with all 27 fields for StatsRollupTable
|
|
284
|
+
"""
|
|
285
|
+
logger.info(f"Calculating daily stats for {date_str}")
|
|
286
|
+
|
|
287
|
+
# Convert date to timestamp range
|
|
288
|
+
date_start = datetime.strptime(date_str, '%Y-%m-%d').timestamp()
|
|
289
|
+
date_end = date_start + 86400 # +24 hours
|
|
290
|
+
|
|
291
|
+
# Get all lines for this day
|
|
292
|
+
lines = GameLinesTable.get_lines_filtered_by_timestamp(date_start, date_end, for_stats=True)
|
|
293
|
+
|
|
294
|
+
if not lines:
|
|
295
|
+
logger.info(f"No lines found for {date_str}")
|
|
296
|
+
return {
|
|
297
|
+
'date': date_str,
|
|
298
|
+
'total_lines': 0,
|
|
299
|
+
'total_characters': 0,
|
|
300
|
+
'total_sessions': 0,
|
|
301
|
+
'unique_games_played': 0,
|
|
302
|
+
'total_reading_time_seconds': 0.0,
|
|
303
|
+
'total_active_time_seconds': 0.0,
|
|
304
|
+
'longest_session_seconds': 0.0,
|
|
305
|
+
'shortest_session_seconds': 0.0,
|
|
306
|
+
'average_session_seconds': 0.0,
|
|
307
|
+
'average_reading_speed_chars_per_hour': 0.0,
|
|
308
|
+
'peak_reading_speed_chars_per_hour': 0.0,
|
|
309
|
+
'games_completed': 0,
|
|
310
|
+
'games_started': 0,
|
|
311
|
+
'anki_cards_created': 0,
|
|
312
|
+
'lines_with_screenshots': 0,
|
|
313
|
+
'lines_with_audio': 0,
|
|
314
|
+
'lines_with_translations': 0,
|
|
315
|
+
'unique_kanji_seen': 0,
|
|
316
|
+
'kanji_frequency_data': '{}',
|
|
317
|
+
'hourly_activity_data': '{}',
|
|
318
|
+
'hourly_reading_speed_data': '{}',
|
|
319
|
+
'game_activity_data': '{}',
|
|
320
|
+
'games_played_ids': '[]',
|
|
321
|
+
'max_chars_in_session': 0,
|
|
322
|
+
'max_time_in_session_seconds': 0.0
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
logger.info(f"Processing {len(lines)} lines for {date_str}")
|
|
326
|
+
|
|
327
|
+
# Calculate basic stats
|
|
328
|
+
total_lines = len(lines)
|
|
329
|
+
total_characters = sum(len(line.line_text) if line.line_text else 0 for line in lines)
|
|
330
|
+
|
|
331
|
+
# Calculate Anki integration stats
|
|
332
|
+
lines_with_screenshots = sum(1 for line in lines if line.screenshot_in_anki and line.screenshot_in_anki.strip())
|
|
333
|
+
lines_with_audio = sum(1 for line in lines if line.audio_in_anki and line.audio_in_anki.strip())
|
|
334
|
+
lines_with_translations = sum(1 for line in lines if line.translation and line.translation.strip())
|
|
335
|
+
anki_cards = sum(1 for line in lines
|
|
336
|
+
if (line.screenshot_in_anki and line.screenshot_in_anki.strip()) or
|
|
337
|
+
(line.audio_in_anki and line.audio_in_anki.strip()))
|
|
338
|
+
|
|
339
|
+
# Analyze sessions
|
|
340
|
+
session_stats = analyze_sessions(lines)
|
|
341
|
+
|
|
342
|
+
# Calculate reading speeds
|
|
343
|
+
timestamps = [float(line.timestamp) for line in lines]
|
|
344
|
+
total_time_seconds = session_stats['total_time']
|
|
345
|
+
total_time_hours = total_time_seconds / 3600 if total_time_seconds > 0 else 0
|
|
346
|
+
|
|
347
|
+
average_speed = (total_characters / total_time_hours) if total_time_hours > 0 else 0.0
|
|
348
|
+
|
|
349
|
+
# Calculate peak speed (best hourly speed)
|
|
350
|
+
hourly_data = analyze_hourly_data(lines)
|
|
351
|
+
peak_speed = max(hourly_data['hourly_speeds'].values()) if hourly_data['hourly_speeds'] else 0.0
|
|
352
|
+
|
|
353
|
+
# Analyze game activity
|
|
354
|
+
game_activity = analyze_game_activity(lines, date_str)
|
|
355
|
+
|
|
356
|
+
# Analyze kanji
|
|
357
|
+
kanji_data = analyze_kanji_data(lines)
|
|
358
|
+
|
|
359
|
+
# Import json for serialization
|
|
360
|
+
import json
|
|
361
|
+
|
|
362
|
+
return {
|
|
363
|
+
'date': date_str,
|
|
364
|
+
'total_lines': total_lines,
|
|
365
|
+
'total_characters': total_characters,
|
|
366
|
+
'total_sessions': session_stats['count'],
|
|
367
|
+
'unique_games_played': len(game_activity['game_ids']),
|
|
368
|
+
'total_reading_time_seconds': total_time_seconds,
|
|
369
|
+
'total_active_time_seconds': session_stats['active_time'],
|
|
370
|
+
'longest_session_seconds': session_stats['longest'],
|
|
371
|
+
'shortest_session_seconds': session_stats['shortest'],
|
|
372
|
+
'average_session_seconds': session_stats['average'],
|
|
373
|
+
'average_reading_speed_chars_per_hour': average_speed,
|
|
374
|
+
'peak_reading_speed_chars_per_hour': peak_speed,
|
|
375
|
+
'games_completed': game_activity['completed'],
|
|
376
|
+
'games_started': game_activity['started'],
|
|
377
|
+
'anki_cards_created': anki_cards,
|
|
378
|
+
'lines_with_screenshots': lines_with_screenshots,
|
|
379
|
+
'lines_with_audio': lines_with_audio,
|
|
380
|
+
'lines_with_translations': lines_with_translations,
|
|
381
|
+
'unique_kanji_seen': kanji_data['unique_count'],
|
|
382
|
+
'kanji_frequency_data': json.dumps(kanji_data['frequencies'], ensure_ascii=False),
|
|
383
|
+
'hourly_activity_data': json.dumps(hourly_data['hourly_activity']),
|
|
384
|
+
'hourly_reading_speed_data': json.dumps(hourly_data['hourly_speeds']),
|
|
385
|
+
'game_activity_data': json.dumps(game_activity['details'], ensure_ascii=False),
|
|
386
|
+
'games_played_ids': json.dumps(game_activity['game_ids']),
|
|
387
|
+
'max_chars_in_session': session_stats['max_chars'],
|
|
388
|
+
'max_time_in_session_seconds': session_stats['max_time']
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def run_daily_rollup() -> Dict:
|
|
393
|
+
"""
|
|
394
|
+
Run the daily statistics rollup for all dates up to yesterday.
|
|
395
|
+
|
|
396
|
+
This function:
|
|
397
|
+
1. Finds the first date where user has data in GSM
|
|
398
|
+
2. Loops from that date to yesterday (current day minus one day)
|
|
399
|
+
3. Checks if StatsRollupTable.date exists for each date
|
|
400
|
+
4. Precomputes all data and inserts into table if missing
|
|
401
|
+
|
|
402
|
+
This is the main entry point for the daily rollup cron job.
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Dictionary with summary statistics
|
|
406
|
+
"""
|
|
407
|
+
logger.info("Starting daily statistics rollup cron job")
|
|
408
|
+
|
|
409
|
+
start_time = time.time()
|
|
410
|
+
|
|
411
|
+
try:
|
|
412
|
+
# Get the first date where user has data
|
|
413
|
+
first_date = get_first_data_date()
|
|
414
|
+
|
|
415
|
+
if first_date is None:
|
|
416
|
+
logger.warning("No data found in GameLinesTable")
|
|
417
|
+
return {
|
|
418
|
+
'success': True,
|
|
419
|
+
'start_date': None,
|
|
420
|
+
'end_date': None,
|
|
421
|
+
'total_dates': 0,
|
|
422
|
+
'processed': 0,
|
|
423
|
+
'overwritten': 0,
|
|
424
|
+
'errors': 0,
|
|
425
|
+
'elapsed_time': time.time() - start_time,
|
|
426
|
+
'error_message': None
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Calculate yesterday (current day minus one day)
|
|
430
|
+
yesterday = datetime.now() - timedelta(days=1)
|
|
431
|
+
end_date = yesterday.strftime('%Y-%m-%d')
|
|
432
|
+
|
|
433
|
+
logger.info(f"Date range: {first_date} to {end_date}")
|
|
434
|
+
|
|
435
|
+
# Get all dates that have actual data
|
|
436
|
+
all_data_dates = get_all_data_dates()
|
|
437
|
+
logger.debug(f"Found {len(all_data_dates)} dates with data in total")
|
|
438
|
+
|
|
439
|
+
# Filter to dates up to yesterday
|
|
440
|
+
start_dt = datetime.strptime(first_date, '%Y-%m-%d')
|
|
441
|
+
end_dt = datetime.strptime(end_date, '%Y-%m-%d')
|
|
442
|
+
|
|
443
|
+
dates_to_process = [
|
|
444
|
+
date for date in all_data_dates
|
|
445
|
+
if start_dt <= datetime.strptime(date, '%Y-%m-%d') <= end_dt
|
|
446
|
+
]
|
|
447
|
+
|
|
448
|
+
total_dates = len(dates_to_process)
|
|
449
|
+
logger.info(f"Processing {total_dates} dates in range (up to yesterday)")
|
|
450
|
+
|
|
451
|
+
if total_dates == 0:
|
|
452
|
+
logger.info("No dates to process")
|
|
453
|
+
return {
|
|
454
|
+
'success': True,
|
|
455
|
+
'start_date': first_date,
|
|
456
|
+
'end_date': end_date,
|
|
457
|
+
'total_dates': 0,
|
|
458
|
+
'processed': 0,
|
|
459
|
+
'overwritten': 0,
|
|
460
|
+
'errors': 0,
|
|
461
|
+
'elapsed_time': time.time() - start_time,
|
|
462
|
+
'error_message': None
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
# Process each date
|
|
466
|
+
processed = 0
|
|
467
|
+
overwritten = 0
|
|
468
|
+
errors = 0
|
|
469
|
+
|
|
470
|
+
for i, date_str in enumerate(dates_to_process, 1):
|
|
471
|
+
try:
|
|
472
|
+
# Always calculate fresh stats for the date
|
|
473
|
+
logger.info(f"Processing {i}/{total_dates}: {date_str}")
|
|
474
|
+
stats = calculate_daily_stats(date_str)
|
|
475
|
+
|
|
476
|
+
# Check if rollup already exists
|
|
477
|
+
existing = StatsRollupTable.get_by_date(date_str)
|
|
478
|
+
|
|
479
|
+
if existing:
|
|
480
|
+
# Update all fields in existing rollup
|
|
481
|
+
existing.date = stats['date']
|
|
482
|
+
existing.total_lines = stats['total_lines']
|
|
483
|
+
existing.total_characters = stats['total_characters']
|
|
484
|
+
existing.total_sessions = stats['total_sessions']
|
|
485
|
+
existing.unique_games_played = stats['unique_games_played']
|
|
486
|
+
existing.total_reading_time_seconds = stats['total_reading_time_seconds']
|
|
487
|
+
existing.total_active_time_seconds = stats['total_active_time_seconds']
|
|
488
|
+
existing.longest_session_seconds = stats['longest_session_seconds']
|
|
489
|
+
existing.shortest_session_seconds = stats['shortest_session_seconds']
|
|
490
|
+
existing.average_session_seconds = stats['average_session_seconds']
|
|
491
|
+
existing.average_reading_speed_chars_per_hour = stats['average_reading_speed_chars_per_hour']
|
|
492
|
+
existing.peak_reading_speed_chars_per_hour = stats['peak_reading_speed_chars_per_hour']
|
|
493
|
+
existing.games_completed = stats['games_completed']
|
|
494
|
+
existing.games_started = stats['games_started']
|
|
495
|
+
existing.anki_cards_created = stats['anki_cards_created']
|
|
496
|
+
existing.lines_with_screenshots = stats['lines_with_screenshots']
|
|
497
|
+
existing.lines_with_audio = stats['lines_with_audio']
|
|
498
|
+
existing.lines_with_translations = stats['lines_with_translations']
|
|
499
|
+
existing.unique_kanji_seen = stats['unique_kanji_seen']
|
|
500
|
+
existing.kanji_frequency_data = stats['kanji_frequency_data']
|
|
501
|
+
existing.hourly_activity_data = stats['hourly_activity_data']
|
|
502
|
+
existing.hourly_reading_speed_data = stats['hourly_reading_speed_data']
|
|
503
|
+
existing.game_activity_data = stats['game_activity_data']
|
|
504
|
+
existing.games_played_ids = stats['games_played_ids']
|
|
505
|
+
existing.max_chars_in_session = stats['max_chars_in_session']
|
|
506
|
+
existing.max_time_in_session_seconds = stats['max_time_in_session_seconds']
|
|
507
|
+
existing.updated_at = time.time()
|
|
508
|
+
existing.save()
|
|
509
|
+
|
|
510
|
+
overwritten += 1
|
|
511
|
+
logger.debug(f"Overwritten rollup for {date_str}")
|
|
512
|
+
else:
|
|
513
|
+
# Create and save new rollup entry with all 27 fields
|
|
514
|
+
rollup = StatsRollupTable(
|
|
515
|
+
date=stats['date'],
|
|
516
|
+
total_lines=stats['total_lines'],
|
|
517
|
+
total_characters=stats['total_characters'],
|
|
518
|
+
total_sessions=stats['total_sessions'],
|
|
519
|
+
unique_games_played=stats['unique_games_played'],
|
|
520
|
+
total_reading_time_seconds=stats['total_reading_time_seconds'],
|
|
521
|
+
total_active_time_seconds=stats['total_active_time_seconds'],
|
|
522
|
+
longest_session_seconds=stats['longest_session_seconds'],
|
|
523
|
+
shortest_session_seconds=stats['shortest_session_seconds'],
|
|
524
|
+
average_session_seconds=stats['average_session_seconds'],
|
|
525
|
+
average_reading_speed_chars_per_hour=stats['average_reading_speed_chars_per_hour'],
|
|
526
|
+
peak_reading_speed_chars_per_hour=stats['peak_reading_speed_chars_per_hour'],
|
|
527
|
+
games_completed=stats['games_completed'],
|
|
528
|
+
games_started=stats['games_started'],
|
|
529
|
+
anki_cards_created=stats['anki_cards_created'],
|
|
530
|
+
lines_with_screenshots=stats['lines_with_screenshots'],
|
|
531
|
+
lines_with_audio=stats['lines_with_audio'],
|
|
532
|
+
lines_with_translations=stats['lines_with_translations'],
|
|
533
|
+
unique_kanji_seen=stats['unique_kanji_seen'],
|
|
534
|
+
kanji_frequency_data=stats['kanji_frequency_data'],
|
|
535
|
+
hourly_activity_data=stats['hourly_activity_data'],
|
|
536
|
+
hourly_reading_speed_data=stats['hourly_reading_speed_data'],
|
|
537
|
+
game_activity_data=stats['game_activity_data'],
|
|
538
|
+
games_played_ids=stats['games_played_ids'],
|
|
539
|
+
max_chars_in_session=stats['max_chars_in_session'],
|
|
540
|
+
max_time_in_session_seconds=stats['max_time_in_session_seconds'],
|
|
541
|
+
created_at=time.time(),
|
|
542
|
+
updated_at=time.time()
|
|
543
|
+
)
|
|
544
|
+
rollup.save()
|
|
545
|
+
|
|
546
|
+
processed += 1
|
|
547
|
+
logger.debug(f"Created rollup for {date_str}")
|
|
548
|
+
|
|
549
|
+
# Progress update every 10 dates
|
|
550
|
+
if processed % 10 == 0:
|
|
551
|
+
logger.info(f"Progress: {processed}/{total_dates} dates processed")
|
|
552
|
+
|
|
553
|
+
except Exception as e:
|
|
554
|
+
logger.error(f"Error processing {date_str}: {e}", exc_info=True)
|
|
555
|
+
errors += 1
|
|
556
|
+
continue
|
|
557
|
+
|
|
558
|
+
elapsed_time = time.time() - start_time
|
|
559
|
+
|
|
560
|
+
# Log summary
|
|
561
|
+
logger.info("Daily rollup cron job completed")
|
|
562
|
+
logger.info(f"Date range: {first_date} to {end_date}, Total dates: {total_dates}, Processed: {processed}, Overwritten: {overwritten}, Errors: {errors}, Time: {elapsed_time:.2f}s")
|
|
563
|
+
|
|
564
|
+
return {
|
|
565
|
+
'success': True,
|
|
566
|
+
'start_date': first_date,
|
|
567
|
+
'end_date': end_date,
|
|
568
|
+
'total_dates': total_dates,
|
|
569
|
+
'processed': processed,
|
|
570
|
+
'overwritten': overwritten,
|
|
571
|
+
'errors': errors,
|
|
572
|
+
'elapsed_time': elapsed_time,
|
|
573
|
+
'error_message': None
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
except Exception as e:
|
|
577
|
+
elapsed_time = time.time() - start_time
|
|
578
|
+
error_msg = str(e)
|
|
579
|
+
logger.error(f"Fatal error in daily rollup cron job: {error_msg}", exc_info=True)
|
|
580
|
+
|
|
581
|
+
return {
|
|
582
|
+
'success': False,
|
|
583
|
+
'start_date': None,
|
|
584
|
+
'end_date': None,
|
|
585
|
+
'total_dates': 0,
|
|
586
|
+
'processed': 0,
|
|
587
|
+
'overwritten': 0,
|
|
588
|
+
'errors': 1,
|
|
589
|
+
'elapsed_time': elapsed_time,
|
|
590
|
+
'error_message': error_msg
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
# Example usage for testing
|
|
595
|
+
if __name__ == '__main__':
|
|
596
|
+
# Run the daily rollup
|
|
597
|
+
result = run_daily_rollup()
|
|
598
|
+
|
|
599
|
+
# Print summary
|
|
600
|
+
print("\n" + "=" * 80)
|
|
601
|
+
print("DAILY ROLLUP SUMMARY")
|
|
602
|
+
print("=" * 80)
|
|
603
|
+
print(f"Success: {'Yes' if result['success'] else 'No'}")
|
|
604
|
+
if result['start_date']:
|
|
605
|
+
print(f"Date range: {result['start_date']} to {result['end_date']}")
|
|
606
|
+
print(f"Total dates with data: {result['total_dates']}")
|
|
607
|
+
print(f"Successfully processed: {result['processed']}")
|
|
608
|
+
print(f"Overwritten: {result['overwritten']}")
|
|
609
|
+
print(f"Errors: {result['errors']}")
|
|
610
|
+
print(f"Time elapsed: {result['elapsed_time']:.2f} seconds")
|
|
611
|
+
if result['error_message']:
|
|
612
|
+
print(f"Error: {result['error_message']}")
|
|
613
|
+
print("=" * 80)
|