GameSentenceMiner 2.19.16__py3-none-any.whl → 2.20.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of GameSentenceMiner might be problematic. Click here for more details.
- GameSentenceMiner/__init__.py +39 -0
- GameSentenceMiner/anki.py +6 -3
- GameSentenceMiner/gametext.py +13 -2
- GameSentenceMiner/gsm.py +40 -3
- GameSentenceMiner/locales/en_us.json +4 -0
- GameSentenceMiner/locales/ja_jp.json +4 -0
- GameSentenceMiner/locales/zh_cn.json +4 -0
- GameSentenceMiner/obs.py +4 -1
- GameSentenceMiner/owocr/owocr/ocr.py +304 -134
- GameSentenceMiner/owocr/owocr/run.py +1 -1
- GameSentenceMiner/ui/anki_confirmation.py +4 -2
- GameSentenceMiner/ui/config_gui.py +12 -0
- GameSentenceMiner/util/configuration.py +6 -2
- GameSentenceMiner/util/cron/__init__.py +12 -0
- GameSentenceMiner/util/cron/daily_rollup.py +613 -0
- GameSentenceMiner/util/cron/jiten_update.py +397 -0
- GameSentenceMiner/util/cron/populate_games.py +154 -0
- GameSentenceMiner/util/cron/run_crons.py +148 -0
- GameSentenceMiner/util/cron/setup_populate_games_cron.py +118 -0
- GameSentenceMiner/util/cron_table.py +334 -0
- GameSentenceMiner/util/db.py +236 -49
- GameSentenceMiner/util/ffmpeg.py +23 -4
- GameSentenceMiner/util/games_table.py +340 -93
- GameSentenceMiner/util/jiten_api_client.py +188 -0
- GameSentenceMiner/util/stats_rollup_table.py +216 -0
- GameSentenceMiner/web/anki_api_endpoints.py +438 -220
- GameSentenceMiner/web/database_api.py +955 -1259
- GameSentenceMiner/web/jiten_database_api.py +1015 -0
- GameSentenceMiner/web/rollup_stats.py +672 -0
- GameSentenceMiner/web/static/css/dashboard-shared.css +75 -13
- GameSentenceMiner/web/static/css/overview.css +604 -47
- GameSentenceMiner/web/static/css/search.css +226 -0
- GameSentenceMiner/web/static/css/shared.css +762 -0
- GameSentenceMiner/web/static/css/stats.css +221 -0
- GameSentenceMiner/web/static/js/components/bar-chart.js +339 -0
- GameSentenceMiner/web/static/js/database-bulk-operations.js +320 -0
- GameSentenceMiner/web/static/js/database-game-data.js +390 -0
- GameSentenceMiner/web/static/js/database-game-operations.js +213 -0
- GameSentenceMiner/web/static/js/database-helpers.js +44 -0
- GameSentenceMiner/web/static/js/database-jiten-integration.js +750 -0
- GameSentenceMiner/web/static/js/database-popups.js +89 -0
- GameSentenceMiner/web/static/js/database-tabs.js +64 -0
- GameSentenceMiner/web/static/js/database-text-management.js +371 -0
- GameSentenceMiner/web/static/js/database.js +86 -718
- GameSentenceMiner/web/static/js/goals.js +79 -18
- GameSentenceMiner/web/static/js/heatmap.js +29 -23
- GameSentenceMiner/web/static/js/overview.js +1205 -339
- GameSentenceMiner/web/static/js/regex-patterns.js +100 -0
- GameSentenceMiner/web/static/js/search.js +215 -18
- GameSentenceMiner/web/static/js/shared.js +193 -39
- GameSentenceMiner/web/static/js/stats.js +1536 -179
- GameSentenceMiner/web/stats.py +1142 -269
- GameSentenceMiner/web/stats_api.py +2104 -0
- GameSentenceMiner/web/templates/anki_stats.html +4 -18
- GameSentenceMiner/web/templates/components/date-range.html +118 -3
- GameSentenceMiner/web/templates/components/html-head.html +40 -6
- GameSentenceMiner/web/templates/components/js-config.html +8 -8
- GameSentenceMiner/web/templates/components/regex-input.html +160 -0
- GameSentenceMiner/web/templates/database.html +564 -117
- GameSentenceMiner/web/templates/goals.html +41 -5
- GameSentenceMiner/web/templates/overview.html +159 -129
- GameSentenceMiner/web/templates/search.html +78 -9
- GameSentenceMiner/web/templates/stats.html +159 -5
- GameSentenceMiner/web/texthooking_page.py +280 -111
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/METADATA +43 -2
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/RECORD +70 -47
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,672 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Rollup Statistics Module
|
|
3
|
+
|
|
4
|
+
This module handles all rollup-based statistics calculations for optimal performance.
|
|
5
|
+
It aggregates pre-calculated daily rollup data instead of processing individual lines.
|
|
6
|
+
|
|
7
|
+
Key Performance Strategy:
|
|
8
|
+
- Use StatsRollupTable for historical data (fast aggregation)
|
|
9
|
+
- Calculate only today's data live from GameLinesTable
|
|
10
|
+
- Combine rollup + live data for complete statistics
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import datetime
|
|
14
|
+
import json
|
|
15
|
+
from collections import defaultdict
|
|
16
|
+
from typing import Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
from GameSentenceMiner.util.stats_rollup_table import StatsRollupTable
|
|
19
|
+
from GameSentenceMiner.util.db import GameLinesTable
|
|
20
|
+
from GameSentenceMiner.util.configuration import logger
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def aggregate_rollup_data(rollups: List) -> Dict:
|
|
24
|
+
"""
|
|
25
|
+
Aggregate multiple daily rollup records into a single statistics object.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
rollups: List of StatsRollupTable records
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Dictionary with aggregated statistics matching the stats API format
|
|
32
|
+
"""
|
|
33
|
+
if not rollups:
|
|
34
|
+
return {
|
|
35
|
+
"total_lines": 0,
|
|
36
|
+
"total_characters": 0,
|
|
37
|
+
"total_sessions": 0,
|
|
38
|
+
"unique_games_played": 0,
|
|
39
|
+
"total_reading_time_seconds": 0.0,
|
|
40
|
+
"total_active_time_seconds": 0.0,
|
|
41
|
+
"average_reading_speed_chars_per_hour": 0.0,
|
|
42
|
+
"peak_reading_speed_chars_per_hour": 0.0,
|
|
43
|
+
"longest_session_seconds": 0.0,
|
|
44
|
+
"shortest_session_seconds": 0.0,
|
|
45
|
+
"average_session_seconds": 0.0,
|
|
46
|
+
"max_chars_in_session": 0,
|
|
47
|
+
"max_time_in_session_seconds": 0.0,
|
|
48
|
+
"games_completed": 0,
|
|
49
|
+
"games_started": 0,
|
|
50
|
+
"anki_cards_created": 0,
|
|
51
|
+
"lines_with_screenshots": 0,
|
|
52
|
+
"lines_with_audio": 0,
|
|
53
|
+
"lines_with_translations": 0,
|
|
54
|
+
"unique_kanji_seen": 0,
|
|
55
|
+
"kanji_frequency_data": {},
|
|
56
|
+
"hourly_activity_data": {},
|
|
57
|
+
"hourly_reading_speed_data": {},
|
|
58
|
+
"game_activity_data": {},
|
|
59
|
+
"games_played_ids": [],
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# ADDITIVE fields - sum across all days
|
|
63
|
+
total_lines = sum(r.total_lines for r in rollups)
|
|
64
|
+
total_characters = sum(r.total_characters for r in rollups)
|
|
65
|
+
total_sessions = sum(r.total_sessions for r in rollups)
|
|
66
|
+
total_reading_time = sum(r.total_reading_time_seconds for r in rollups)
|
|
67
|
+
total_active_time = sum(r.total_active_time_seconds for r in rollups)
|
|
68
|
+
anki_cards_created = sum(r.anki_cards_created for r in rollups)
|
|
69
|
+
lines_with_screenshots = sum(r.lines_with_screenshots for r in rollups)
|
|
70
|
+
lines_with_audio = sum(r.lines_with_audio for r in rollups)
|
|
71
|
+
lines_with_translations = sum(r.lines_with_translations for r in rollups)
|
|
72
|
+
games_completed = sum(r.games_completed for r in rollups)
|
|
73
|
+
|
|
74
|
+
# MAXIMUM fields - take highest value across all days
|
|
75
|
+
peak_reading_speed = max(
|
|
76
|
+
(r.peak_reading_speed_chars_per_hour for r in rollups), default=0.0
|
|
77
|
+
)
|
|
78
|
+
longest_session = max((r.longest_session_seconds for r in rollups), default=0.0)
|
|
79
|
+
max_chars_in_session = max((r.max_chars_in_session for r in rollups), default=0)
|
|
80
|
+
max_time_in_session = max(
|
|
81
|
+
(r.max_time_in_session_seconds for r in rollups), default=0.0
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# MINIMUM field - take smallest non-zero value
|
|
85
|
+
shortest_session_values = [
|
|
86
|
+
r.shortest_session_seconds for r in rollups if r.shortest_session_seconds > 0
|
|
87
|
+
]
|
|
88
|
+
shortest_session = min(shortest_session_values) if shortest_session_values else 0.0
|
|
89
|
+
|
|
90
|
+
# WEIGHTED AVERAGE - average reading speed weighted by active time
|
|
91
|
+
if total_active_time > 0:
|
|
92
|
+
weighted_speed_sum = sum(
|
|
93
|
+
r.average_reading_speed_chars_per_hour * r.total_active_time_seconds
|
|
94
|
+
for r in rollups
|
|
95
|
+
if r.total_active_time_seconds > 0
|
|
96
|
+
)
|
|
97
|
+
avg_reading_speed = weighted_speed_sum / total_active_time
|
|
98
|
+
else:
|
|
99
|
+
avg_reading_speed = 0.0
|
|
100
|
+
|
|
101
|
+
# WEIGHTED AVERAGE - average session duration weighted by number of sessions
|
|
102
|
+
if total_sessions > 0:
|
|
103
|
+
weighted_session_sum = sum(
|
|
104
|
+
r.average_session_seconds * r.total_sessions
|
|
105
|
+
for r in rollups
|
|
106
|
+
if r.total_sessions > 0
|
|
107
|
+
)
|
|
108
|
+
avg_session_seconds = weighted_session_sum / total_sessions
|
|
109
|
+
else:
|
|
110
|
+
avg_session_seconds = 0.0
|
|
111
|
+
|
|
112
|
+
# MERGE - Combine game IDs (union)
|
|
113
|
+
all_games_played = set()
|
|
114
|
+
for rollup in rollups:
|
|
115
|
+
if rollup.games_played_ids:
|
|
116
|
+
try:
|
|
117
|
+
games_ids = (
|
|
118
|
+
json.loads(rollup.games_played_ids)
|
|
119
|
+
if isinstance(rollup.games_played_ids, str)
|
|
120
|
+
else rollup.games_played_ids
|
|
121
|
+
)
|
|
122
|
+
all_games_played.update(games_ids)
|
|
123
|
+
except (json.JSONDecodeError, TypeError):
|
|
124
|
+
logger.warning(
|
|
125
|
+
f"Failed to parse games_played_ids for rollup date {rollup.date}"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# MERGE - Combine game activity data (sum chars/time/lines per game)
|
|
129
|
+
combined_game_activity = {}
|
|
130
|
+
for rollup in rollups:
|
|
131
|
+
if rollup.game_activity_data:
|
|
132
|
+
try:
|
|
133
|
+
game_data = (
|
|
134
|
+
json.loads(rollup.game_activity_data)
|
|
135
|
+
if isinstance(rollup.game_activity_data, str)
|
|
136
|
+
else rollup.game_activity_data
|
|
137
|
+
)
|
|
138
|
+
for game_id, activity in game_data.items():
|
|
139
|
+
if game_id in combined_game_activity:
|
|
140
|
+
combined_game_activity[game_id]["chars"] += activity.get(
|
|
141
|
+
"chars", 0
|
|
142
|
+
)
|
|
143
|
+
combined_game_activity[game_id]["time"] += activity.get(
|
|
144
|
+
"time", 0
|
|
145
|
+
)
|
|
146
|
+
combined_game_activity[game_id]["lines"] += activity.get(
|
|
147
|
+
"lines", 0
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
combined_game_activity[game_id] = {
|
|
151
|
+
"title": activity.get("title", f"Game {game_id}"),
|
|
152
|
+
"chars": activity.get("chars", 0),
|
|
153
|
+
"time": activity.get("time", 0),
|
|
154
|
+
"lines": activity.get("lines", 0),
|
|
155
|
+
}
|
|
156
|
+
except (json.JSONDecodeError, TypeError):
|
|
157
|
+
logger.warning(
|
|
158
|
+
f"Failed to parse game_activity_data for rollup date {rollup.date}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# MERGE - Combine kanji frequency data (sum frequencies)
|
|
162
|
+
combined_kanji_frequency = {}
|
|
163
|
+
for rollup in rollups:
|
|
164
|
+
if rollup.kanji_frequency_data:
|
|
165
|
+
try:
|
|
166
|
+
kanji_data = (
|
|
167
|
+
json.loads(rollup.kanji_frequency_data)
|
|
168
|
+
if isinstance(rollup.kanji_frequency_data, str)
|
|
169
|
+
else rollup.kanji_frequency_data
|
|
170
|
+
)
|
|
171
|
+
for kanji, count in kanji_data.items():
|
|
172
|
+
combined_kanji_frequency[kanji] = (
|
|
173
|
+
combined_kanji_frequency.get(kanji, 0) + count
|
|
174
|
+
)
|
|
175
|
+
except (json.JSONDecodeError, TypeError):
|
|
176
|
+
logger.warning(
|
|
177
|
+
f"Failed to parse kanji_frequency_data for rollup date {rollup.date}"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# MERGE - Combine hourly activity data (sum characters per hour)
|
|
181
|
+
combined_hourly_activity = {}
|
|
182
|
+
for rollup in rollups:
|
|
183
|
+
if rollup.hourly_activity_data:
|
|
184
|
+
try:
|
|
185
|
+
hourly_data = (
|
|
186
|
+
json.loads(rollup.hourly_activity_data)
|
|
187
|
+
if isinstance(rollup.hourly_activity_data, str)
|
|
188
|
+
else rollup.hourly_activity_data
|
|
189
|
+
)
|
|
190
|
+
for hour, chars in hourly_data.items():
|
|
191
|
+
combined_hourly_activity[hour] = (
|
|
192
|
+
combined_hourly_activity.get(hour, 0) + chars
|
|
193
|
+
)
|
|
194
|
+
except (json.JSONDecodeError, TypeError):
|
|
195
|
+
logger.warning(
|
|
196
|
+
f"Failed to parse hourly_activity_data for rollup date {rollup.date}"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# MERGE - Combine hourly reading speeds (average across days for each hour)
|
|
200
|
+
hourly_speed_lists = defaultdict(list)
|
|
201
|
+
for rollup in rollups:
|
|
202
|
+
if rollup.hourly_reading_speed_data:
|
|
203
|
+
try:
|
|
204
|
+
speed_data = (
|
|
205
|
+
json.loads(rollup.hourly_reading_speed_data)
|
|
206
|
+
if isinstance(rollup.hourly_reading_speed_data, str)
|
|
207
|
+
else rollup.hourly_reading_speed_data
|
|
208
|
+
)
|
|
209
|
+
for hour, speed in speed_data.items():
|
|
210
|
+
if speed > 0:
|
|
211
|
+
hourly_speed_lists[hour].append(speed)
|
|
212
|
+
except (json.JSONDecodeError, TypeError):
|
|
213
|
+
logger.warning(
|
|
214
|
+
f"Failed to parse hourly_reading_speed_data for rollup date {rollup.date}"
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Average the speeds for each hour
|
|
218
|
+
combined_hourly_speeds = {}
|
|
219
|
+
for hour, speeds in hourly_speed_lists.items():
|
|
220
|
+
combined_hourly_speeds[hour] = sum(speeds) / len(speeds) if speeds else 0
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
"total_lines": total_lines,
|
|
224
|
+
"total_characters": total_characters,
|
|
225
|
+
"total_sessions": total_sessions,
|
|
226
|
+
"unique_games_played": len(all_games_played),
|
|
227
|
+
"total_reading_time_seconds": total_reading_time,
|
|
228
|
+
"total_active_time_seconds": total_active_time,
|
|
229
|
+
"average_reading_speed_chars_per_hour": avg_reading_speed,
|
|
230
|
+
"peak_reading_speed_chars_per_hour": peak_reading_speed,
|
|
231
|
+
"longest_session_seconds": longest_session,
|
|
232
|
+
"shortest_session_seconds": shortest_session,
|
|
233
|
+
"average_session_seconds": avg_session_seconds,
|
|
234
|
+
"max_chars_in_session": max_chars_in_session,
|
|
235
|
+
"max_time_in_session_seconds": max_time_in_session,
|
|
236
|
+
"games_completed": games_completed,
|
|
237
|
+
"games_started": len(all_games_played),
|
|
238
|
+
"anki_cards_created": anki_cards_created,
|
|
239
|
+
"lines_with_screenshots": lines_with_screenshots,
|
|
240
|
+
"lines_with_audio": lines_with_audio,
|
|
241
|
+
"lines_with_translations": lines_with_translations,
|
|
242
|
+
"unique_kanji_seen": len(combined_kanji_frequency),
|
|
243
|
+
"kanji_frequency_data": combined_kanji_frequency,
|
|
244
|
+
"hourly_activity_data": combined_hourly_activity,
|
|
245
|
+
"hourly_reading_speed_data": combined_hourly_speeds,
|
|
246
|
+
"game_activity_data": combined_game_activity,
|
|
247
|
+
"games_played_ids": list(all_games_played),
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def calculate_live_stats_for_today(today_lines: List) -> Dict:
|
|
252
|
+
"""
|
|
253
|
+
Calculate live statistics for today using existing stats.py functions.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
today_lines: List of GameLinesTable records for today
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Dictionary with today's statistics in rollup format
|
|
260
|
+
"""
|
|
261
|
+
if not today_lines:
|
|
262
|
+
return aggregate_rollup_data([]) # Return empty stats
|
|
263
|
+
|
|
264
|
+
# Import here to avoid circular dependency
|
|
265
|
+
from GameSentenceMiner.util.cron.daily_rollup import (
|
|
266
|
+
analyze_sessions,
|
|
267
|
+
analyze_hourly_data,
|
|
268
|
+
analyze_game_activity,
|
|
269
|
+
analyze_kanji_data,
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
# Calculate basic stats
|
|
273
|
+
total_lines = len(today_lines)
|
|
274
|
+
total_characters = sum(
|
|
275
|
+
len(line.line_text) if line.line_text else 0 for line in today_lines
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Calculate Anki integration stats
|
|
279
|
+
lines_with_screenshots = sum(
|
|
280
|
+
1
|
|
281
|
+
for line in today_lines
|
|
282
|
+
if line.screenshot_in_anki and line.screenshot_in_anki.strip()
|
|
283
|
+
)
|
|
284
|
+
lines_with_audio = sum(
|
|
285
|
+
1 for line in today_lines if line.audio_in_anki and line.audio_in_anki.strip()
|
|
286
|
+
)
|
|
287
|
+
lines_with_translations = sum(
|
|
288
|
+
1 for line in today_lines if line.translation and line.translation.strip()
|
|
289
|
+
)
|
|
290
|
+
anki_cards = sum(
|
|
291
|
+
1
|
|
292
|
+
for line in today_lines
|
|
293
|
+
if (line.screenshot_in_anki and line.screenshot_in_anki.strip())
|
|
294
|
+
or (line.audio_in_anki and line.audio_in_anki.strip())
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Analyze sessions
|
|
298
|
+
session_stats = analyze_sessions(today_lines)
|
|
299
|
+
|
|
300
|
+
# Calculate reading speeds
|
|
301
|
+
total_time_seconds = session_stats["total_time"]
|
|
302
|
+
total_time_hours = total_time_seconds / 3600 if total_time_seconds > 0 else 0
|
|
303
|
+
average_speed = (
|
|
304
|
+
(total_characters / total_time_hours) if total_time_hours > 0 else 0.0
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Calculate peak speed (best hourly speed)
|
|
308
|
+
hourly_data = analyze_hourly_data(today_lines)
|
|
309
|
+
peak_speed = (
|
|
310
|
+
max(hourly_data["hourly_speeds"].values())
|
|
311
|
+
if hourly_data["hourly_speeds"]
|
|
312
|
+
else 0.0
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Analyze game activity
|
|
316
|
+
today_str = datetime.date.today().strftime("%Y-%m-%d")
|
|
317
|
+
game_activity = analyze_game_activity(today_lines, today_str)
|
|
318
|
+
|
|
319
|
+
# Analyze kanji
|
|
320
|
+
kanji_data = analyze_kanji_data(today_lines)
|
|
321
|
+
|
|
322
|
+
return {
|
|
323
|
+
"total_lines": total_lines,
|
|
324
|
+
"total_characters": total_characters,
|
|
325
|
+
"total_sessions": session_stats["count"],
|
|
326
|
+
"unique_games_played": len(game_activity["game_ids"]),
|
|
327
|
+
"total_reading_time_seconds": total_time_seconds,
|
|
328
|
+
"total_active_time_seconds": session_stats["active_time"],
|
|
329
|
+
"average_reading_speed_chars_per_hour": average_speed,
|
|
330
|
+
"peak_reading_speed_chars_per_hour": peak_speed,
|
|
331
|
+
"longest_session_seconds": session_stats["longest"],
|
|
332
|
+
"shortest_session_seconds": session_stats["shortest"],
|
|
333
|
+
"average_session_seconds": session_stats["average"],
|
|
334
|
+
"max_chars_in_session": session_stats["max_chars"],
|
|
335
|
+
"max_time_in_session_seconds": session_stats["max_time"],
|
|
336
|
+
"games_completed": game_activity["completed"],
|
|
337
|
+
"games_started": game_activity["started"],
|
|
338
|
+
"anki_cards_created": anki_cards,
|
|
339
|
+
"lines_with_screenshots": lines_with_screenshots,
|
|
340
|
+
"lines_with_audio": lines_with_audio,
|
|
341
|
+
"lines_with_translations": lines_with_translations,
|
|
342
|
+
"unique_kanji_seen": kanji_data["unique_count"],
|
|
343
|
+
"kanji_frequency_data": kanji_data["frequencies"],
|
|
344
|
+
"hourly_activity_data": hourly_data["hourly_activity"],
|
|
345
|
+
"hourly_reading_speed_data": hourly_data["hourly_speeds"],
|
|
346
|
+
"game_activity_data": game_activity["details"],
|
|
347
|
+
"games_played_ids": game_activity["game_ids"],
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def combine_rollup_and_live_stats(rollup_stats: Dict, live_stats: Dict) -> Dict:
|
|
352
|
+
"""
|
|
353
|
+
Combine rollup statistics with live statistics for today.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
rollup_stats: Aggregated rollup statistics (can be None)
|
|
357
|
+
live_stats: Live calculated statistics for today (can be None)
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Combined statistics dictionary
|
|
361
|
+
"""
|
|
362
|
+
if not rollup_stats and not live_stats:
|
|
363
|
+
return aggregate_rollup_data([]) # Return empty stats
|
|
364
|
+
elif not rollup_stats:
|
|
365
|
+
return live_stats
|
|
366
|
+
elif not live_stats:
|
|
367
|
+
return rollup_stats
|
|
368
|
+
|
|
369
|
+
# Combine both datasets
|
|
370
|
+
combined = {}
|
|
371
|
+
|
|
372
|
+
# ADDITIVE fields - sum rollup + live
|
|
373
|
+
additive_fields = [
|
|
374
|
+
"total_lines",
|
|
375
|
+
"total_characters",
|
|
376
|
+
"total_sessions",
|
|
377
|
+
"total_reading_time_seconds",
|
|
378
|
+
"total_active_time_seconds",
|
|
379
|
+
"games_completed",
|
|
380
|
+
"anki_cards_created",
|
|
381
|
+
"lines_with_screenshots",
|
|
382
|
+
"lines_with_audio",
|
|
383
|
+
"lines_with_translations",
|
|
384
|
+
]
|
|
385
|
+
|
|
386
|
+
for field in additive_fields:
|
|
387
|
+
combined[field] = rollup_stats.get(field, 0) + live_stats.get(field, 0)
|
|
388
|
+
|
|
389
|
+
# MAXIMUM fields - take highest value
|
|
390
|
+
max_fields = [
|
|
391
|
+
"peak_reading_speed_chars_per_hour",
|
|
392
|
+
"longest_session_seconds",
|
|
393
|
+
"max_chars_in_session",
|
|
394
|
+
"max_time_in_session_seconds",
|
|
395
|
+
]
|
|
396
|
+
|
|
397
|
+
for field in max_fields:
|
|
398
|
+
combined[field] = max(rollup_stats.get(field, 0), live_stats.get(field, 0))
|
|
399
|
+
|
|
400
|
+
# MINIMUM field - take smallest non-zero value
|
|
401
|
+
rollup_shortest = rollup_stats.get("shortest_session_seconds", 0)
|
|
402
|
+
live_shortest = live_stats.get("shortest_session_seconds", 0)
|
|
403
|
+
if rollup_shortest > 0 and live_shortest > 0:
|
|
404
|
+
combined["shortest_session_seconds"] = min(rollup_shortest, live_shortest)
|
|
405
|
+
elif rollup_shortest > 0:
|
|
406
|
+
combined["shortest_session_seconds"] = rollup_shortest
|
|
407
|
+
elif live_shortest > 0:
|
|
408
|
+
combined["shortest_session_seconds"] = live_shortest
|
|
409
|
+
else:
|
|
410
|
+
combined["shortest_session_seconds"] = 0.0
|
|
411
|
+
|
|
412
|
+
# WEIGHTED AVERAGE - average reading speed weighted by active time
|
|
413
|
+
rollup_time = rollup_stats.get("total_active_time_seconds", 0)
|
|
414
|
+
live_time = live_stats.get("total_active_time_seconds", 0)
|
|
415
|
+
total_time = rollup_time + live_time
|
|
416
|
+
|
|
417
|
+
if total_time > 0:
|
|
418
|
+
combined["average_reading_speed_chars_per_hour"] = (
|
|
419
|
+
rollup_stats.get("average_reading_speed_chars_per_hour", 0) * rollup_time
|
|
420
|
+
+ live_stats.get("average_reading_speed_chars_per_hour", 0) * live_time
|
|
421
|
+
) / total_time
|
|
422
|
+
else:
|
|
423
|
+
combined["average_reading_speed_chars_per_hour"] = 0.0
|
|
424
|
+
|
|
425
|
+
# WEIGHTED AVERAGE - average session duration weighted by session count
|
|
426
|
+
rollup_sessions = rollup_stats.get("total_sessions", 0)
|
|
427
|
+
live_sessions = live_stats.get("total_sessions", 0)
|
|
428
|
+
total_sessions = rollup_sessions + live_sessions
|
|
429
|
+
|
|
430
|
+
if total_sessions > 0:
|
|
431
|
+
combined["average_session_seconds"] = (
|
|
432
|
+
rollup_stats.get("average_session_seconds", 0) * rollup_sessions
|
|
433
|
+
+ live_stats.get("average_session_seconds", 0) * live_sessions
|
|
434
|
+
) / total_sessions
|
|
435
|
+
else:
|
|
436
|
+
combined["average_session_seconds"] = 0.0
|
|
437
|
+
|
|
438
|
+
# MERGE - Combine unique games (union)
|
|
439
|
+
rollup_games = set(rollup_stats.get("games_played_ids", []))
|
|
440
|
+
live_games = set(live_stats.get("games_played_ids", []))
|
|
441
|
+
all_games = rollup_games.union(live_games)
|
|
442
|
+
combined["unique_games_played"] = len(all_games)
|
|
443
|
+
combined["games_played_ids"] = list(all_games)
|
|
444
|
+
combined["games_started"] = len(all_games)
|
|
445
|
+
|
|
446
|
+
# MERGE - Combine kanji frequency data (sum frequencies)
|
|
447
|
+
rollup_kanji = rollup_stats.get("kanji_frequency_data", {})
|
|
448
|
+
live_kanji = live_stats.get("kanji_frequency_data", {})
|
|
449
|
+
combined_kanji = {}
|
|
450
|
+
|
|
451
|
+
for kanji, count in rollup_kanji.items():
|
|
452
|
+
combined_kanji[kanji] = count
|
|
453
|
+
for kanji, count in live_kanji.items():
|
|
454
|
+
combined_kanji[kanji] = combined_kanji.get(kanji, 0) + count
|
|
455
|
+
|
|
456
|
+
combined["kanji_frequency_data"] = combined_kanji
|
|
457
|
+
combined["unique_kanji_seen"] = len(combined_kanji)
|
|
458
|
+
|
|
459
|
+
# MERGE - Combine hourly activity data (sum characters per hour)
|
|
460
|
+
rollup_hourly = rollup_stats.get("hourly_activity_data", {})
|
|
461
|
+
live_hourly = live_stats.get("hourly_activity_data", {})
|
|
462
|
+
combined_hourly = {}
|
|
463
|
+
|
|
464
|
+
for hour in set(list(rollup_hourly.keys()) + list(live_hourly.keys())):
|
|
465
|
+
combined_hourly[hour] = rollup_hourly.get(hour, 0) + live_hourly.get(hour, 0)
|
|
466
|
+
|
|
467
|
+
combined["hourly_activity_data"] = combined_hourly
|
|
468
|
+
|
|
469
|
+
# MERGE - Combine hourly reading speed data (average)
|
|
470
|
+
rollup_speeds = rollup_stats.get("hourly_reading_speed_data", {})
|
|
471
|
+
live_speeds = live_stats.get("hourly_reading_speed_data", {})
|
|
472
|
+
combined_speeds = {}
|
|
473
|
+
|
|
474
|
+
for hour in set(list(rollup_speeds.keys()) + list(live_speeds.keys())):
|
|
475
|
+
speeds = []
|
|
476
|
+
if hour in rollup_speeds and rollup_speeds[hour] > 0:
|
|
477
|
+
speeds.append(rollup_speeds[hour])
|
|
478
|
+
if hour in live_speeds and live_speeds[hour] > 0:
|
|
479
|
+
speeds.append(live_speeds[hour])
|
|
480
|
+
combined_speeds[hour] = sum(speeds) / len(speeds) if speeds else 0
|
|
481
|
+
|
|
482
|
+
combined["hourly_reading_speed_data"] = combined_speeds
|
|
483
|
+
|
|
484
|
+
# MERGE - Combine game activity data (sum chars/time/lines per game)
|
|
485
|
+
rollup_games_activity = rollup_stats.get("game_activity_data", {})
|
|
486
|
+
live_games_activity = live_stats.get("game_activity_data", {})
|
|
487
|
+
combined_games_activity = {}
|
|
488
|
+
|
|
489
|
+
for game_id in set(
|
|
490
|
+
list(rollup_games_activity.keys()) + list(live_games_activity.keys())
|
|
491
|
+
):
|
|
492
|
+
rollup_activity = rollup_games_activity.get(
|
|
493
|
+
game_id, {"chars": 0, "time": 0, "lines": 0}
|
|
494
|
+
)
|
|
495
|
+
live_activity = live_games_activity.get(
|
|
496
|
+
game_id, {"chars": 0, "time": 0, "lines": 0}
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
combined_games_activity[game_id] = {
|
|
500
|
+
"title": rollup_activity.get("title")
|
|
501
|
+
or live_activity.get("title", f"Game {game_id}"),
|
|
502
|
+
"chars": rollup_activity.get("chars", 0) + live_activity.get("chars", 0),
|
|
503
|
+
"time": rollup_activity.get("time", 0) + live_activity.get("time", 0),
|
|
504
|
+
"lines": rollup_activity.get("lines", 0) + live_activity.get("lines", 0),
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
combined["game_activity_data"] = combined_games_activity
|
|
508
|
+
|
|
509
|
+
return combined
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def build_heatmap_from_rollup(rollups: List, filter_year: Optional[str] = None) -> Dict:
|
|
513
|
+
"""
|
|
514
|
+
Build heatmap data from rollup records instead of individual lines.
|
|
515
|
+
Much faster than processing all lines.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
rollups: List of StatsRollupTable records
|
|
519
|
+
filter_year: Optional year filter (e.g., "2024")
|
|
520
|
+
|
|
521
|
+
Returns:
|
|
522
|
+
Dictionary mapping year -> date -> character count
|
|
523
|
+
"""
|
|
524
|
+
heatmap_data = defaultdict(lambda: defaultdict(int))
|
|
525
|
+
|
|
526
|
+
for rollup in rollups:
|
|
527
|
+
date_str = rollup.date # Already in YYYY-MM-DD format
|
|
528
|
+
year = date_str.split("-")[0]
|
|
529
|
+
|
|
530
|
+
# Filter by year if specified
|
|
531
|
+
if filter_year and year != filter_year:
|
|
532
|
+
continue
|
|
533
|
+
|
|
534
|
+
# Use total_characters from rollup
|
|
535
|
+
heatmap_data[year][date_str] = rollup.total_characters
|
|
536
|
+
|
|
537
|
+
return dict(heatmap_data)
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def build_daily_chart_data_from_rollup(rollups: List) -> Dict:
|
|
541
|
+
"""
|
|
542
|
+
Build daily chart data structure from rollup records.
|
|
543
|
+
Returns data organized by date and game for chart visualization.
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
rollups: List of StatsRollupTable records
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Dictionary with daily_data structure for charts
|
|
550
|
+
"""
|
|
551
|
+
daily_data = defaultdict(lambda: defaultdict(lambda: {"lines": 0, "chars": 0}))
|
|
552
|
+
|
|
553
|
+
for rollup in rollups:
|
|
554
|
+
date_str = rollup.date
|
|
555
|
+
if rollup.game_activity_data:
|
|
556
|
+
try:
|
|
557
|
+
game_data = (
|
|
558
|
+
json.loads(rollup.game_activity_data)
|
|
559
|
+
if isinstance(rollup.game_activity_data, str)
|
|
560
|
+
else rollup.game_activity_data
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
for game_id, activity in game_data.items():
|
|
564
|
+
display_name = activity.get("title", f"Game {game_id}")
|
|
565
|
+
daily_data[date_str][display_name]["lines"] = activity.get(
|
|
566
|
+
"lines", 0
|
|
567
|
+
)
|
|
568
|
+
daily_data[date_str][display_name]["chars"] = activity.get(
|
|
569
|
+
"chars", 0
|
|
570
|
+
)
|
|
571
|
+
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
|
572
|
+
logger.warning(f"Error parsing rollup data for {date_str}: {e}")
|
|
573
|
+
continue
|
|
574
|
+
|
|
575
|
+
return daily_data
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def calculate_day_of_week_averages_from_rollup(rollups: List) -> Dict:
|
|
579
|
+
"""
|
|
580
|
+
Pre-compute day of week activity averages from rollup data.
|
|
581
|
+
This is much faster than calculating on every API request.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
rollups: List of StatsRollupTable records
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
Dictionary with day of week data including averages:
|
|
588
|
+
{
|
|
589
|
+
"chars": [Mon, Tue, Wed, Thu, Fri, Sat, Sun],
|
|
590
|
+
"hours": [Mon, Tue, Wed, Thu, Fri, Sat, Sun],
|
|
591
|
+
"counts": [Mon, Tue, Wed, Thu, Fri, Sat, Sun],
|
|
592
|
+
"avg_hours": [Mon, Tue, Wed, Thu, Fri, Sat, Sun]
|
|
593
|
+
}
|
|
594
|
+
"""
|
|
595
|
+
day_of_week_data = {
|
|
596
|
+
"chars": [0] * 7,
|
|
597
|
+
"hours": [0] * 7,
|
|
598
|
+
"counts": [0] * 7,
|
|
599
|
+
"avg_hours": [0] * 7
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
for rollup in rollups:
|
|
603
|
+
try:
|
|
604
|
+
date_obj = datetime.datetime.strptime(rollup.date, "%Y-%m-%d")
|
|
605
|
+
day_of_week = date_obj.weekday() # 0=Monday, 6=Sunday
|
|
606
|
+
day_of_week_data["chars"][day_of_week] += rollup.total_characters
|
|
607
|
+
day_of_week_data["hours"][day_of_week] += rollup.total_reading_time_seconds / 3600
|
|
608
|
+
day_of_week_data["counts"][day_of_week] += 1
|
|
609
|
+
except (ValueError, AttributeError) as e:
|
|
610
|
+
logger.warning(f"Error parsing date for rollup {rollup.date}: {e}")
|
|
611
|
+
continue
|
|
612
|
+
|
|
613
|
+
# Calculate averages
|
|
614
|
+
for i in range(7):
|
|
615
|
+
if day_of_week_data["counts"][i] > 0:
|
|
616
|
+
day_of_week_data["avg_hours"][i] = round(
|
|
617
|
+
day_of_week_data["hours"][i] / day_of_week_data["counts"][i], 2
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
return day_of_week_data
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
def calculate_difficulty_speed_from_rollup(combined_stats: Dict) -> Dict:
|
|
624
|
+
"""
|
|
625
|
+
Pre-compute reading speed by difficulty from rollup game activity data.
|
|
626
|
+
This avoids recalculating on every API request.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
combined_stats: Combined rollup statistics with game_activity_data
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
Dictionary with difficulty speed data:
|
|
633
|
+
{
|
|
634
|
+
"labels": ["Difficulty 1", "Difficulty 2", ...],
|
|
635
|
+
"speeds": [speed1, speed2, ...]
|
|
636
|
+
}
|
|
637
|
+
"""
|
|
638
|
+
from GameSentenceMiner.util.games_table import GamesTable
|
|
639
|
+
|
|
640
|
+
difficulty_speed_data = {"labels": [], "speeds": []}
|
|
641
|
+
|
|
642
|
+
try:
|
|
643
|
+
# Get all games with difficulty ratings
|
|
644
|
+
all_games = GamesTable.all()
|
|
645
|
+
difficulty_groups = {} # difficulty -> {chars: total, time: total}
|
|
646
|
+
|
|
647
|
+
for game in all_games:
|
|
648
|
+
if game.difficulty is not None:
|
|
649
|
+
difficulty = game.difficulty
|
|
650
|
+
if difficulty not in difficulty_groups:
|
|
651
|
+
difficulty_groups[difficulty] = {"chars": 0, "time": 0}
|
|
652
|
+
|
|
653
|
+
# Get stats for this game from game_activity_data
|
|
654
|
+
game_activity = combined_stats.get("game_activity_data", {})
|
|
655
|
+
if game.id in game_activity:
|
|
656
|
+
activity = game_activity[game.id]
|
|
657
|
+
difficulty_groups[difficulty]["chars"] += activity.get("chars", 0)
|
|
658
|
+
difficulty_groups[difficulty]["time"] += activity.get("time", 0)
|
|
659
|
+
|
|
660
|
+
# Calculate average speed for each difficulty
|
|
661
|
+
for difficulty in sorted(difficulty_groups.keys()):
|
|
662
|
+
data = difficulty_groups[difficulty]
|
|
663
|
+
if data["time"] > 0 and data["chars"] > 0:
|
|
664
|
+
hours = data["time"] / 3600
|
|
665
|
+
speed = int(data["chars"] / hours)
|
|
666
|
+
difficulty_speed_data["labels"].append(f"Difficulty {difficulty}")
|
|
667
|
+
difficulty_speed_data["speeds"].append(speed)
|
|
668
|
+
|
|
669
|
+
except Exception as e:
|
|
670
|
+
logger.error(f"Error calculating difficulty speed from rollup: {e}")
|
|
671
|
+
|
|
672
|
+
return difficulty_speed_data
|