GameSentenceMiner 2.19.16__py3-none-any.whl → 2.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GameSentenceMiner might be problematic. Click here for more details.

Files changed (70) hide show
  1. GameSentenceMiner/__init__.py +39 -0
  2. GameSentenceMiner/anki.py +6 -3
  3. GameSentenceMiner/gametext.py +13 -2
  4. GameSentenceMiner/gsm.py +40 -3
  5. GameSentenceMiner/locales/en_us.json +4 -0
  6. GameSentenceMiner/locales/ja_jp.json +4 -0
  7. GameSentenceMiner/locales/zh_cn.json +4 -0
  8. GameSentenceMiner/obs.py +4 -1
  9. GameSentenceMiner/owocr/owocr/ocr.py +304 -134
  10. GameSentenceMiner/owocr/owocr/run.py +1 -1
  11. GameSentenceMiner/ui/anki_confirmation.py +4 -2
  12. GameSentenceMiner/ui/config_gui.py +12 -0
  13. GameSentenceMiner/util/configuration.py +6 -2
  14. GameSentenceMiner/util/cron/__init__.py +12 -0
  15. GameSentenceMiner/util/cron/daily_rollup.py +613 -0
  16. GameSentenceMiner/util/cron/jiten_update.py +397 -0
  17. GameSentenceMiner/util/cron/populate_games.py +154 -0
  18. GameSentenceMiner/util/cron/run_crons.py +148 -0
  19. GameSentenceMiner/util/cron/setup_populate_games_cron.py +118 -0
  20. GameSentenceMiner/util/cron_table.py +334 -0
  21. GameSentenceMiner/util/db.py +236 -49
  22. GameSentenceMiner/util/ffmpeg.py +23 -4
  23. GameSentenceMiner/util/games_table.py +340 -93
  24. GameSentenceMiner/util/jiten_api_client.py +188 -0
  25. GameSentenceMiner/util/stats_rollup_table.py +216 -0
  26. GameSentenceMiner/web/anki_api_endpoints.py +438 -220
  27. GameSentenceMiner/web/database_api.py +955 -1259
  28. GameSentenceMiner/web/jiten_database_api.py +1015 -0
  29. GameSentenceMiner/web/rollup_stats.py +672 -0
  30. GameSentenceMiner/web/static/css/dashboard-shared.css +75 -13
  31. GameSentenceMiner/web/static/css/overview.css +604 -47
  32. GameSentenceMiner/web/static/css/search.css +226 -0
  33. GameSentenceMiner/web/static/css/shared.css +762 -0
  34. GameSentenceMiner/web/static/css/stats.css +221 -0
  35. GameSentenceMiner/web/static/js/components/bar-chart.js +339 -0
  36. GameSentenceMiner/web/static/js/database-bulk-operations.js +320 -0
  37. GameSentenceMiner/web/static/js/database-game-data.js +390 -0
  38. GameSentenceMiner/web/static/js/database-game-operations.js +213 -0
  39. GameSentenceMiner/web/static/js/database-helpers.js +44 -0
  40. GameSentenceMiner/web/static/js/database-jiten-integration.js +750 -0
  41. GameSentenceMiner/web/static/js/database-popups.js +89 -0
  42. GameSentenceMiner/web/static/js/database-tabs.js +64 -0
  43. GameSentenceMiner/web/static/js/database-text-management.js +371 -0
  44. GameSentenceMiner/web/static/js/database.js +86 -718
  45. GameSentenceMiner/web/static/js/goals.js +79 -18
  46. GameSentenceMiner/web/static/js/heatmap.js +29 -23
  47. GameSentenceMiner/web/static/js/overview.js +1205 -339
  48. GameSentenceMiner/web/static/js/regex-patterns.js +100 -0
  49. GameSentenceMiner/web/static/js/search.js +215 -18
  50. GameSentenceMiner/web/static/js/shared.js +193 -39
  51. GameSentenceMiner/web/static/js/stats.js +1536 -179
  52. GameSentenceMiner/web/stats.py +1142 -269
  53. GameSentenceMiner/web/stats_api.py +2104 -0
  54. GameSentenceMiner/web/templates/anki_stats.html +4 -18
  55. GameSentenceMiner/web/templates/components/date-range.html +118 -3
  56. GameSentenceMiner/web/templates/components/html-head.html +40 -6
  57. GameSentenceMiner/web/templates/components/js-config.html +8 -8
  58. GameSentenceMiner/web/templates/components/regex-input.html +160 -0
  59. GameSentenceMiner/web/templates/database.html +564 -117
  60. GameSentenceMiner/web/templates/goals.html +41 -5
  61. GameSentenceMiner/web/templates/overview.html +159 -129
  62. GameSentenceMiner/web/templates/search.html +78 -9
  63. GameSentenceMiner/web/templates/stats.html +159 -5
  64. GameSentenceMiner/web/texthooking_page.py +280 -111
  65. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/METADATA +43 -2
  66. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/RECORD +70 -47
  67. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/WHEEL +0 -0
  68. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/entry_points.txt +0 -0
  69. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/licenses/LICENSE +0 -0
  70. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,66 @@
1
1
  import datetime
2
+ import json
2
3
  from collections import defaultdict
4
+ from typing import List, Dict
3
5
 
4
6
  from GameSentenceMiner.util.db import GameLinesTable
5
7
  from GameSentenceMiner.util.configuration import get_stats_config, logger, get_config
8
+ from GameSentenceMiner.util.games_table import GamesTable
9
+
10
+
11
+ def build_game_display_name_mapping(all_lines):
12
+ """
13
+ Build a mapping of game_name -> display_name (title_original if available).
14
+
15
+ This centralizes the logic for converting OBS scene names to clean game titles
16
+ for display in charts and statistics.
17
+
18
+ Args:
19
+ all_lines: List of GameLinesTable records
20
+
21
+ Returns:
22
+ dict: Mapping of game_name to display_name (title_original from games table)
23
+ """
24
+ game_name_to_display = {}
25
+ unique_game_names = set(line.game_name or "Unknown Game" for line in all_lines)
26
+
27
+ logger.debug(
28
+ f"Building display name mapping for {len(unique_game_names)} unique games"
29
+ )
30
+
31
+ for game_name in unique_game_names:
32
+ # Find any line with this game_name to get game_id
33
+ sample_line = next(
34
+ (
35
+ line
36
+ for line in all_lines
37
+ if (line.game_name or "Unknown Game") == game_name
38
+ ),
39
+ None,
40
+ )
41
+ if sample_line:
42
+ game_metadata = GamesTable.get_by_game_line(sample_line)
43
+ if game_metadata and game_metadata.title_original:
44
+ game_name_to_display[game_name] = game_metadata.title_original
45
+ logger.debug(
46
+ f"Mapped '{game_name}' -> '{game_metadata.title_original}'"
47
+ )
48
+ else:
49
+ game_name_to_display[game_name] = game_name
50
+ logger.debug(f"No metadata for '{game_name}', using original name")
51
+
52
+ return game_name_to_display
6
53
 
7
54
 
8
55
  def is_kanji(char):
9
56
  """Check if a character is a kanji (CJK Unified Ideographs)."""
10
57
  # Validate input is a single character
11
58
  if not isinstance(char, str) or len(char) != 1:
12
- logger.warning(f"is_kanji() received invalid input: {repr(char)} (type: {type(char)}, length: {len(char) if isinstance(char, str) else 'N/A'})")
59
+ logger.warning(
60
+ f"is_kanji() received invalid input: {repr(char)} (type: {type(char)}, length: {len(char) if isinstance(char, str) else 'N/A'})"
61
+ )
13
62
  return False
14
-
63
+
15
64
  try:
16
65
  code_point = ord(char)
17
66
  # CJK Unified Ideographs (most common kanji range)
@@ -21,65 +70,66 @@ def is_kanji(char):
21
70
  logger.warning(f"is_kanji() failed to process character {repr(char)}: {e}")
22
71
  return False
23
72
 
73
+
24
74
  def interpolate_color(color1, color2, factor):
25
75
  """Interpolate between two hex colors."""
76
+
26
77
  # Convert hex to RGB
27
78
  def hex_to_rgb(hex_color):
28
- hex_color = hex_color.lstrip('#')
29
- return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
30
-
79
+ hex_color = hex_color.lstrip("#")
80
+ return tuple(int(hex_color[i : i + 2], 16) for i in (0, 2, 4))
81
+
31
82
  # Convert RGB to hex
32
83
  def rgb_to_hex(rgb):
33
84
  return f"#{int(rgb[0]):02x}{int(rgb[1]):02x}{int(rgb[2]):02x}"
34
-
85
+
35
86
  rgb1 = hex_to_rgb(color1)
36
87
  rgb2 = hex_to_rgb(color2)
37
-
88
+
38
89
  # Interpolate each channel
39
- rgb_result = tuple(
40
- rgb1[i] + factor * (rgb2[i] - rgb1[i])
41
- for i in range(3)
42
- )
43
-
90
+ rgb_result = tuple(rgb1[i] + factor * (rgb2[i] - rgb1[i]) for i in range(3))
91
+
44
92
  return rgb_to_hex(rgb_result)
45
93
 
94
+
46
95
  def get_gradient_color(frequency, max_frequency):
47
96
  """Get color from gradient based on frequency."""
48
97
  if max_frequency == 0:
49
98
  return "#ebedf0" # Default color for no encounters
50
-
99
+
51
100
  # kanji with 300+ encounters should always get cyan color cause i think u should know them
52
101
  if frequency > 300:
53
102
  return "#2ee6e0"
54
-
103
+
55
104
  # Normalize frequency to 0-1 range with square root transformation
56
105
  # This creates a smoother, more visually pleasing gradient by spreading
57
106
  # out the lower frequencies (since kanji frequency follows Zipf's law)
58
107
  ratio = (frequency / max_frequency) ** 0.5
59
-
108
+
60
109
  # Define gradient colors: least seen → most seen
61
110
  # #e6342e (red) → #e6dc2e (yellow) → #3be62f (green) → #2ee6e0 (cyan)
62
111
  colors = ["#e6342e", "#e6dc2e", "#3be62f", "#2ee6e0"]
63
-
112
+
64
113
  if ratio == 0:
65
114
  return "#ebedf0" # No encounters
66
-
115
+
67
116
  # Scale ratio to fit the 3 gradient segments
68
117
  scaled_ratio = ratio * (len(colors) - 1)
69
118
  segment = int(scaled_ratio)
70
119
  local_ratio = scaled_ratio - segment
71
-
120
+
72
121
  # Clamp segment to valid range
73
122
  if segment >= len(colors) - 1:
74
123
  return colors[-1]
75
-
124
+
76
125
  # Interpolate between adjacent colors
77
126
  return interpolate_color(colors[segment], colors[segment + 1], local_ratio)
78
127
 
128
+
79
129
  def calculate_kanji_frequency(all_lines):
80
130
  """Calculate frequency of kanji characters across all lines with gradient coloring."""
81
131
  kanji_count = defaultdict(int)
82
-
132
+
83
133
  for line in all_lines:
84
134
  if line.line_text:
85
135
  # Ensure line_text is a string and handle any encoding issues
@@ -89,53 +139,49 @@ def calculate_kanji_frequency(all_lines):
89
139
  if is_kanji(char):
90
140
  kanji_count[char] += 1
91
141
  except Exception as e:
92
- logger.warning(f"Error processing line text for kanji frequency: {repr(line.line_text)}, error: {e}")
142
+ logger.warning(
143
+ f"Error processing line text for kanji frequency: {repr(line.line_text)}, error: {e}"
144
+ )
93
145
  continue
94
-
146
+
95
147
  if not kanji_count:
96
- return {
97
- "kanji_data": [],
98
- "unique_count": 0
99
- }
100
-
148
+ return {"kanji_data": [], "unique_count": 0}
149
+
101
150
  # Find max frequency for gradient calculation
102
151
  max_frequency = max(kanji_count.values())
103
-
152
+
104
153
  # Sort kanji by frequency (most frequent first)
105
154
  sorted_kanji = sorted(kanji_count.items(), key=lambda x: x[1], reverse=True)
106
-
155
+
107
156
  # Add gradient colors to each kanji
108
157
  kanji_data = []
109
158
  for kanji, count in sorted_kanji:
110
159
  color = get_gradient_color(count, max_frequency)
111
- kanji_data.append({
112
- "kanji": kanji,
113
- "frequency": count,
114
- "color": color
115
- })
116
-
160
+ kanji_data.append({"kanji": kanji, "frequency": count, "color": color})
161
+
117
162
  return {
118
163
  "kanji_data": kanji_data,
119
164
  "unique_count": len(sorted_kanji),
120
- "max_frequency": max_frequency
165
+ "max_frequency": max_frequency,
121
166
  }
122
167
 
168
+
123
169
  def calculate_heatmap_data(all_lines, filter_year=None):
124
170
  """Calculate heatmap data for reading activity."""
125
171
  heatmap_data = defaultdict(lambda: defaultdict(int))
126
-
172
+
127
173
  for line in all_lines:
128
174
  date_obj = datetime.date.fromtimestamp(float(line.timestamp))
129
175
  year = str(date_obj.year)
130
-
176
+
131
177
  # Filter by year if specified
132
178
  if filter_year and year != filter_year:
133
179
  continue
134
-
135
- date_str = date_obj.strftime('%Y-%m-%d')
180
+
181
+ date_str = date_obj.strftime("%Y-%m-%d")
136
182
  char_count = len(line.line_text) if line.line_text else 0
137
183
  heatmap_data[year][date_str] += char_count
138
-
184
+
139
185
  return dict(heatmap_data)
140
186
 
141
187
 
@@ -145,135 +191,223 @@ def calculate_mining_heatmap_data(all_lines, filter_year=None):
145
191
  Counts lines where screenshot_in_anki OR audio_in_anki is not empty.
146
192
  """
147
193
  heatmap_data = defaultdict(lambda: defaultdict(int))
148
-
194
+
149
195
  for line in all_lines:
150
196
  # Check if line has been mined (either screenshot or audio in Anki)
151
197
  has_screenshot = line.screenshot_in_anki and line.screenshot_in_anki.strip()
152
198
  has_audio = line.audio_in_anki and line.audio_in_anki.strip()
153
-
199
+
154
200
  if not (has_screenshot or has_audio):
155
201
  continue # Skip lines that haven't been mined
156
-
202
+
157
203
  date_obj = datetime.date.fromtimestamp(float(line.timestamp))
158
204
  year = str(date_obj.year)
159
-
205
+
160
206
  # Filter by year if specified
161
207
  if filter_year and year != filter_year:
162
208
  continue
163
-
164
- date_str = date_obj.strftime('%Y-%m-%d')
209
+
210
+ date_str = date_obj.strftime("%Y-%m-%d")
165
211
  heatmap_data[year][date_str] += 1 # Count mined lines, not characters
166
-
212
+
167
213
  return dict(heatmap_data)
168
214
 
169
215
 
170
- def calculate_total_chars_per_game(all_lines):
171
- """Calculate total characters read per game."""
172
- game_data = defaultdict(lambda: {'total_chars': 0, 'first_time': None})
216
+ def calculate_reading_speed_heatmap_data(all_lines, filter_year=None):
217
+ """
218
+ Calculate daily average reading speed (chars/hour) for heatmap visualization.
219
+ Returns both heatmap data and maximum reading speed for percentage-based coloring.
220
+
221
+ Args:
222
+ all_lines: List of GameLinesTable records
223
+ filter_year: Optional year filter (string)
224
+
225
+ Returns:
226
+ tuple: (heatmap_data dict, max_reading_speed float)
227
+ heatmap_data format: {year: {date: speed_in_chars_per_hour}}
228
+ """
229
+ # Group lines by date
230
+ daily_data = defaultdict(lambda: {"chars": 0, "timestamps": []})
173
231
 
174
232
  for line in all_lines:
175
- game = line.game_name or "Unknown Game"
176
- timestamp = float(line.timestamp)
177
- char_count = len(line.line_text) if line.line_text else 0
233
+ date_obj = datetime.date.fromtimestamp(float(line.timestamp))
234
+ year = str(date_obj.year)
178
235
 
179
- game_data[game]['total_chars'] += char_count
236
+ # Filter by year if specified
237
+ if filter_year and year != filter_year:
238
+ continue
180
239
 
181
- if game_data[game]['first_time'] is None:
182
- game_data[game]['first_time'] = timestamp
240
+ date_str = date_obj.strftime("%Y-%m-%d")
241
+ char_count = len(line.line_text) if line.line_text else 0
242
+
243
+ daily_data[date_str]["chars"] += char_count
244
+ daily_data[date_str]["timestamps"].append(float(line.timestamp))
245
+
246
+ # Calculate reading speed for each day
247
+ heatmap_data = defaultdict(lambda: defaultdict(int))
248
+ max_speed = 0
183
249
 
250
+ for date_str, data in daily_data.items():
251
+ if len(data["timestamps"]) >= 2 and data["chars"] > 0:
252
+ # Calculate actual reading time for this day
253
+ reading_time_seconds = calculate_actual_reading_time(data["timestamps"])
254
+ reading_time_hours = reading_time_seconds / 3600
255
+
256
+ if reading_time_hours > 0:
257
+ # Calculate speed (chars per hour)
258
+ speed = int(data["chars"] / reading_time_hours)
259
+
260
+ # Extract year from date string
261
+ year = date_str.split("-")[0]
262
+ heatmap_data[year][date_str] = speed
263
+
264
+ # Track maximum speed
265
+ max_speed = max(max_speed, speed)
266
+
267
+ return dict(heatmap_data), max_speed
268
+
269
+
270
+ def calculate_total_chars_per_game(all_lines, game_name_to_display=None):
271
+ """Calculate total characters read per game."""
272
+ if game_name_to_display is None:
273
+ # Fallback for backward compatibility
274
+ game_name_to_display = build_game_display_name_mapping(all_lines)
275
+
276
+ game_data = defaultdict(lambda: {"total_chars": 0, "first_time": None})
277
+
278
+ for line in all_lines:
279
+ game_name = line.game_name or "Unknown Game"
280
+ display_name = game_name_to_display.get(game_name, game_name)
281
+ timestamp = float(line.timestamp)
282
+ char_count = len(line.line_text) if line.line_text else 0
283
+
284
+ game_data[display_name]["total_chars"] += char_count
285
+
286
+ if game_data[display_name]["first_time"] is None:
287
+ game_data[display_name]["first_time"] = timestamp
288
+
184
289
  # Sort by first appearance time and filter out games with no characters
185
290
  char_data = []
186
291
  for game, data in game_data.items():
187
- if data['total_chars'] > 0:
188
- char_data.append((game, data['total_chars'], data['first_time']))
189
-
292
+ if data["total_chars"] > 0:
293
+ char_data.append((game, data["total_chars"], data["first_time"]))
294
+
190
295
  # Sort by first appearance time
191
296
  char_data.sort(key=lambda x: x[2])
192
-
297
+
193
298
  return {
194
299
  "labels": [item[0] for item in char_data],
195
- "totals": [item[1] for item in char_data]
300
+ "totals": [item[1] for item in char_data],
196
301
  }
197
302
 
198
- def calculate_reading_time_per_game(all_lines):
303
+
304
+ def calculate_reading_time_per_game(all_lines, game_name_to_display=None):
199
305
  """Calculate total reading time per game in hours using AFK timer logic."""
200
- game_data = defaultdict(lambda: {'timestamps': [], 'first_time': None})
201
-
306
+ if game_name_to_display is None:
307
+ # Fallback for backward compatibility
308
+ game_name_to_display = build_game_display_name_mapping(all_lines)
309
+
310
+ game_data = defaultdict(lambda: {"timestamps": [], "first_time": None})
311
+
202
312
  for line in all_lines:
203
- game = line.game_name or "Unknown Game"
313
+ game_name = line.game_name or "Unknown Game"
314
+ display_name = game_name_to_display.get(game_name, game_name)
204
315
  timestamp = float(line.timestamp)
205
-
206
- game_data[game]['timestamps'].append(timestamp)
207
- if game_data[game]['first_time'] is None:
208
- game_data[game]['first_time'] = timestamp
209
-
316
+
317
+ game_data[display_name]["timestamps"].append(timestamp)
318
+ if game_data[display_name]["first_time"] is None:
319
+ game_data[display_name]["first_time"] = timestamp
320
+
210
321
  # Calculate actual reading time for each game
211
322
  time_data = []
212
323
  for game, data in game_data.items():
213
- if len(data['timestamps']) >= 2:
324
+ if len(data["timestamps"]) >= 2:
214
325
  # Use actual reading time calculation
215
- reading_time_seconds = calculate_actual_reading_time(data['timestamps'])
326
+ reading_time_seconds = calculate_actual_reading_time(data["timestamps"])
216
327
  hours = reading_time_seconds / 3600 # Convert to hours
217
328
  if hours > 0:
218
- time_data.append((game, hours, data['first_time']))
219
-
329
+ time_data.append((game, hours, data["first_time"]))
330
+
220
331
  # Sort by first appearance time
221
332
  time_data.sort(key=lambda x: x[2])
222
-
333
+
223
334
  return {
224
335
  "labels": [item[0] for item in time_data],
225
- "totals": [round(item[1], 2) for item in time_data] # Round to 2 decimals for hours
336
+ "totals": [
337
+ round(item[1], 2) for item in time_data
338
+ ], # Round to 2 decimals for hours
226
339
  }
227
340
 
228
- def calculate_reading_speed_per_game(all_lines):
341
+
342
+ def calculate_reading_speed_per_game(all_lines, game_name_to_display=None):
229
343
  """Calculate average reading speed per game (chars/hour) using AFK timer logic."""
230
- game_data = defaultdict(lambda: {'chars': 0, 'timestamps': [], 'first_time': None})
231
-
344
+ if game_name_to_display is None:
345
+ # Fallback for backward compatibility
346
+ game_name_to_display = build_game_display_name_mapping(all_lines)
347
+
348
+ game_data = defaultdict(lambda: {"chars": 0, "timestamps": [], "first_time": None})
349
+
232
350
  for line in all_lines:
233
- game = line.game_name or "Unknown Game"
351
+ game_name = line.game_name or "Unknown Game"
352
+ display_name = game_name_to_display.get(game_name, game_name)
234
353
  timestamp = float(line.timestamp)
235
354
  char_count = len(line.line_text) if line.line_text else 0
236
-
237
- game_data[game]['chars'] += char_count
238
- game_data[game]['timestamps'].append(timestamp)
239
-
240
- if game_data[game]['first_time'] is None:
241
- game_data[game]['first_time'] = timestamp
242
-
355
+
356
+ game_data[display_name]["chars"] += char_count
357
+ game_data[display_name]["timestamps"].append(timestamp)
358
+
359
+ if game_data[display_name]["first_time"] is None:
360
+ game_data[display_name]["first_time"] = timestamp
361
+
243
362
  # Calculate speeds using actual reading time
244
363
  speed_data = []
245
364
  for game, data in game_data.items():
246
- if len(data['timestamps']) >= 2 and data['chars'] > 0:
365
+ if len(data["timestamps"]) >= 2 and data["chars"] > 0:
247
366
  # Use actual reading time calculation
248
- reading_time_seconds = calculate_actual_reading_time(data['timestamps'])
367
+ reading_time_seconds = calculate_actual_reading_time(data["timestamps"])
249
368
  hours = reading_time_seconds / 3600 # Convert to hours
250
369
  if hours > 0:
251
- speed = data['chars'] / hours
252
- speed_data.append((game, speed, data['first_time']))
253
-
370
+ speed = data["chars"] / hours
371
+ speed_data.append((game, speed, data["first_time"]))
372
+
254
373
  # Sort by first appearance time
255
374
  speed_data.sort(key=lambda x: x[2])
256
-
375
+
257
376
  return {
258
377
  "labels": [item[0] for item in speed_data],
259
- "totals": [round(item[1], 0) for item in speed_data] # Round to whole numbers for chars/hour
378
+ "totals": [
379
+ round(item[1], 0) for item in speed_data
380
+ ], # Round to whole numbers for chars/hour
260
381
  }
261
382
 
383
+
262
384
  def generate_game_colors(game_count):
263
385
  """Generate visually distinct colors for games using HSL color space."""
264
386
  colors = []
265
-
387
+
266
388
  # Predefined set of good colors for the first few games
267
389
  predefined_colors = [
268
- '#3498db', '#e74c3c', '#2ecc71', '#f1c40f', '#9b59b6',
269
- '#1abc9c', '#e67e22', '#34495e', '#16a085', '#27ae60',
270
- '#2980b9', '#8e44ad', '#d35400', '#c0392b', '#7f8c8d'
390
+ "#3498db",
391
+ "#e74c3c",
392
+ "#2ecc71",
393
+ "#f1c40f",
394
+ "#9b59b6",
395
+ "#1abc9c",
396
+ "#e67e22",
397
+ "#34495e",
398
+ "#16a085",
399
+ "#27ae60",
400
+ "#2980b9",
401
+ "#8e44ad",
402
+ "#d35400",
403
+ "#c0392b",
404
+ "#7f8c8d",
271
405
  ]
272
-
406
+
273
407
  # Use predefined colors first
274
408
  for i in range(min(game_count, len(predefined_colors))):
275
409
  colors.append(predefined_colors[i])
276
-
410
+
277
411
  # Generate additional colors using HSL if needed
278
412
  if game_count > len(predefined_colors):
279
413
  remaining = game_count - len(predefined_colors)
@@ -282,13 +416,14 @@ def generate_game_colors(game_count):
282
416
  hue = (i * 360 / remaining) % 360
283
417
  # Use varied saturation and lightness for visual distinction
284
418
  saturation = 65 + (i % 3) * 10 # 65%, 75%, 85%
285
- lightness = 45 + (i % 2) * 10 # 45%, 55%
286
-
419
+ lightness = 45 + (i % 2) * 10 # 45%, 55%
420
+
287
421
  # Convert HSL to hex
288
- colors.append(f'hsl({hue:.0f}, {saturation}%, {lightness}%)')
289
-
422
+ colors.append(f"hsl({hue:.0f}, {saturation}%, {lightness}%)")
423
+
290
424
  return colors
291
425
 
426
+
292
427
  def format_large_number(num):
293
428
  """Format large numbers with appropriate units (K for thousands, M for millions)."""
294
429
  if num >= 1000000:
@@ -298,77 +433,84 @@ def format_large_number(num):
298
433
  else:
299
434
  return str(int(num))
300
435
 
436
+
301
437
  def calculate_actual_reading_time(timestamps, afk_timer_seconds=None):
302
438
  """
303
439
  Calculate actual reading time using AFK timer logic.
304
-
440
+
305
441
  Args:
306
442
  timestamps: List of timestamps (as floats)
307
443
  afk_timer_seconds: Maximum time between entries to count as active reading.
308
444
  If None, uses config value. Defaults to 120 seconds (2 minutes).
309
-
445
+
310
446
  Returns:
311
447
  float: Actual reading time in seconds
312
448
  """
313
449
  if not timestamps or len(timestamps) < 2:
314
450
  return 0.0
315
-
451
+
316
452
  if afk_timer_seconds is None:
317
453
  afk_timer_seconds = get_stats_config().afk_timer_seconds
318
-
454
+
319
455
  # Sort timestamps to ensure chronological order
320
456
  sorted_timestamps = sorted(timestamps)
321
457
  total_reading_time = 0.0
322
-
458
+
323
459
  # Calculate time between consecutive entries
324
460
  for i in range(1, len(sorted_timestamps)):
325
- time_gap = sorted_timestamps[i] - sorted_timestamps[i-1]
326
-
461
+ time_gap = sorted_timestamps[i] - sorted_timestamps[i - 1]
462
+
327
463
  # Cap the gap at AFK timer limit
328
464
  if time_gap > afk_timer_seconds:
329
465
  total_reading_time += afk_timer_seconds
330
466
  else:
331
467
  total_reading_time += time_gap
332
-
468
+
333
469
  return total_reading_time
334
470
 
471
+
335
472
  def calculate_daily_reading_time(lines):
336
473
  """
337
474
  Calculate actual reading time per day using AFK timer logic.
338
-
475
+
339
476
  Args:
340
477
  lines: List of game lines
341
-
478
+
342
479
  Returns:
343
480
  dict: Dictionary mapping date strings to reading time in hours
344
481
  """
345
482
  daily_timestamps = defaultdict(list)
346
-
483
+
347
484
  # Group timestamps by day
348
485
  for line in lines:
349
- date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
486
+ date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime(
487
+ "%Y-%m-%d"
488
+ )
350
489
  daily_timestamps[date_str].append(float(line.timestamp))
351
-
490
+
352
491
  # Calculate reading time for each day
353
492
  daily_reading_time = {}
354
493
  for date_str, timestamps in daily_timestamps.items():
355
494
  if len(timestamps) >= 2:
356
495
  reading_time_seconds = calculate_actual_reading_time(timestamps)
357
- daily_reading_time[date_str] = reading_time_seconds / 3600 # Convert to hours
496
+ daily_reading_time[date_str] = (
497
+ reading_time_seconds / 3600
498
+ ) # Convert to hours
358
499
  else:
359
500
  daily_reading_time[date_str] = 0.0
360
-
501
+
361
502
  return daily_reading_time
362
503
 
504
+
363
505
  def calculate_time_based_streak(lines, streak_requirement_hours=None):
364
506
  """
365
507
  Calculate reading streak based on time requirements rather than daily activity.
366
-
508
+
367
509
  Args:
368
510
  lines: List of game lines
369
511
  streak_requirement_hours: Minimum hours of reading per day to maintain streak.
370
512
  If None, uses config value. Defaults to 1.0.
371
-
513
+
372
514
  Returns:
373
515
  int: Current streak in days
374
516
  """
@@ -377,42 +519,54 @@ def calculate_time_based_streak(lines, streak_requirement_hours=None):
377
519
  try:
378
520
  streak_requirement_hours = get_stats_config().streak_requirement_hours
379
521
  except AttributeError:
380
- streak_requirement_hours = getattr(get_config().advanced, 'streak_requirement_hours', 1.0)
522
+ streak_requirement_hours = getattr(
523
+ get_config().advanced, "streak_requirement_hours", 1.0
524
+ )
381
525
  # Add debug logging
382
- logger.debug(f"Calculating streak with requirement: {streak_requirement_hours} hours")
526
+ logger.debug(
527
+ f"Calculating streak with requirement: {streak_requirement_hours} hours"
528
+ )
383
529
  logger.debug(f"Processing {len(lines)} lines for streak calculation")
384
-
530
+
385
531
  # Calculate daily reading time
386
532
  daily_reading_time = calculate_daily_reading_time(lines)
387
-
533
+
388
534
  if not daily_reading_time:
389
535
  logger.debug("No daily reading time data available")
390
536
  return 0
391
-
392
- logger.debug(f"Daily reading time data: {dict(list(daily_reading_time.items())[:5])}") # Show first 5 days
393
-
537
+
538
+ logger.debug(
539
+ f"Daily reading time data: {dict(list(daily_reading_time.items())[:5])}"
540
+ ) # Show first 5 days
541
+
394
542
  # Check streak from today backwards
395
543
  today = datetime.date.today()
396
544
  current_streak = 0
397
-
545
+
398
546
  check_date = today
399
547
  consecutive_days_checked = 0
400
548
  while consecutive_days_checked < 365: # Check max 365 days back
401
- date_str = check_date.strftime('%Y-%m-%d')
549
+ date_str = check_date.strftime("%Y-%m-%d")
402
550
  reading_hours = daily_reading_time.get(date_str, 0.0)
403
-
404
- logger.debug(f"Checking {date_str}: {reading_hours:.4f} hours vs requirement {streak_requirement_hours}")
405
-
551
+
552
+ logger.debug(
553
+ f"Checking {date_str}: {reading_hours:.4f} hours vs requirement {streak_requirement_hours}"
554
+ )
555
+
406
556
  if reading_hours >= streak_requirement_hours:
407
557
  current_streak += 1
408
- logger.debug(f"Day {date_str} qualifies for streak. Current streak: {current_streak}")
558
+ logger.debug(
559
+ f"Day {date_str} qualifies for streak. Current streak: {current_streak}"
560
+ )
409
561
  else:
410
- logger.debug(f"Day {date_str} breaks streak. Reading hours {reading_hours:.4f} < requirement {streak_requirement_hours}")
562
+ logger.debug(
563
+ f"Day {date_str} breaks streak. Reading hours {reading_hours:.4f} < requirement {streak_requirement_hours}"
564
+ )
411
565
  break
412
-
566
+
413
567
  check_date -= datetime.timedelta(days=1)
414
568
  consecutive_days_checked += 1
415
-
569
+
416
570
  logger.debug(f"Final calculated streak: {current_streak} days")
417
571
  return current_streak
418
572
 
@@ -437,112 +591,192 @@ def format_time_human_readable(hours):
437
591
  else:
438
592
  return f"{days}d"
439
593
 
594
+
440
595
  def calculate_current_game_stats(all_lines):
441
596
  """Calculate statistics for the currently active game (most recent entry)."""
442
597
  if not all_lines:
443
598
  return None
444
-
599
+
445
600
  # Sort lines by timestamp to find the most recent
446
601
  sorted_lines = sorted(all_lines, key=lambda line: float(line.timestamp))
447
-
448
- # Get the current game (game with most recent entry)
449
- current_game_name = sorted_lines[-1].game_name or "Unknown Game"
450
-
602
+
603
+ # Get the current game line (most recent entry)
604
+ current_game_line = sorted_lines[-1]
605
+ current_game_name = current_game_line.game_name or "Unknown Game"
606
+
451
607
  # Filter lines for current game
452
- current_game_lines = [line for line in all_lines if (line.game_name or "Unknown Game") == current_game_name]
453
-
608
+ current_game_lines = [
609
+ line
610
+ for line in all_lines
611
+ if (line.game_name or "Unknown Game") == current_game_name
612
+ ]
613
+
454
614
  if not current_game_lines:
455
615
  return None
456
-
616
+
617
+ # Fetch game metadata from games table using game_id relationship
618
+ logger.debug(
619
+ f"Current game line: game_name='{current_game_line.game_name}', game_id='{current_game_line.game_id}'"
620
+ )
621
+ game_metadata = GamesTable.get_by_game_line(current_game_line)
622
+ if game_metadata:
623
+ logger.debug(
624
+ f"Found game metadata: id={game_metadata.id}, title_original='{game_metadata.title_original}', deck_id={game_metadata.deck_id}, has_image={bool(game_metadata.image)}"
625
+ )
626
+ else:
627
+ logger.debug(f"No game metadata found for game_name='{current_game_name}'")
628
+
457
629
  # Calculate basic statistics
458
- total_characters = sum(len(line.line_text) if line.line_text else 0 for line in current_game_lines)
630
+ total_characters = sum(
631
+ len(line.line_text) if line.line_text else 0 for line in current_game_lines
632
+ )
459
633
  total_sentences = len(current_game_lines)
460
-
634
+
461
635
  # Calculate actual reading time using AFK timer
462
636
  timestamps = [float(line.timestamp) for line in current_game_lines]
463
637
  min_timestamp = min(timestamps)
464
638
  max_timestamp = max(timestamps)
465
639
  total_time_seconds = calculate_actual_reading_time(timestamps)
466
640
  total_time_hours = total_time_seconds / 3600
467
-
641
+
468
642
  # Calculate reading speed (with edge case handling)
469
- reading_speed = int(total_characters / total_time_hours) if total_time_hours > 0 else 0
470
-
643
+ reading_speed = (
644
+ int(total_characters / total_time_hours) if total_time_hours > 0 else 0
645
+ )
646
+
471
647
  # Calculate sessions (gaps of more than session_gap_seconds = new session)
472
648
  sorted_timestamps = sorted(timestamps)
473
649
  sessions = 1
474
650
  session_gap = get_stats_config().session_gap_seconds
475
651
  for i in range(1, len(sorted_timestamps)):
476
- time_gap = sorted_timestamps[i] - sorted_timestamps[i-1]
652
+ time_gap = sorted_timestamps[i] - sorted_timestamps[i - 1]
477
653
  if time_gap > session_gap:
478
654
  sessions += 1
479
-
655
+
480
656
  # Calculate daily activity for progress trend
481
657
  daily_activity = defaultdict(int)
482
658
  for line in current_game_lines:
483
- date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
659
+ date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime(
660
+ "%Y-%m-%d"
661
+ )
484
662
  daily_activity[date_str] += len(line.line_text) if line.line_text else 0
485
-
663
+
486
664
  # Calculate monthly progress (last 30 days)
487
665
  today = datetime.date.today()
488
666
  monthly_chars = 0
489
667
  for i in range(30):
490
668
  date = today - datetime.timedelta(days=i)
491
- date_str = date.strftime('%Y-%m-%d')
669
+ date_str = date.strftime("%Y-%m-%d")
492
670
  monthly_chars += daily_activity.get(date_str, 0)
493
-
671
+
494
672
  # Calculate reading streak using time-based requirements
495
673
  current_streak = calculate_time_based_streak(current_game_lines)
496
-
497
- return {
498
- 'game_name': current_game_name,
499
- 'total_characters': total_characters,
500
- 'total_characters_formatted': format_large_number(total_characters),
501
- 'total_sentences': total_sentences,
502
- 'total_time_hours': total_time_hours,
503
- 'total_time_formatted': format_time_human_readable(total_time_hours),
504
- 'reading_speed': reading_speed,
505
- 'reading_speed_formatted': format_large_number(reading_speed),
506
- 'sessions': sessions,
507
- 'monthly_characters': monthly_chars,
508
- 'monthly_characters_formatted': format_large_number(monthly_chars),
509
- 'current_streak': current_streak,
510
- 'first_date': datetime.date.fromtimestamp(min_timestamp).strftime('%Y-%m-%d'),
511
- 'last_date': datetime.date.fromtimestamp(max_timestamp).strftime('%Y-%m-%d'),
512
- 'daily_activity': dict(daily_activity)
674
+
675
+ # Calculate progress percentage if game metadata is available
676
+ # game_metadata.character_count should contain jiten.moe's total character count
677
+ progress_percentage = 0
678
+ if (
679
+ game_metadata
680
+ and game_metadata.character_count
681
+ and game_metadata.character_count > 0
682
+ ):
683
+ progress_percentage = min(
684
+ 100, (total_characters / game_metadata.character_count) * 100
685
+ )
686
+ logger.debug(
687
+ f"Game progress: {current_game_name}, Mined: {total_characters}, Total: {game_metadata.character_count}, Progress: {progress_percentage:.1f}%"
688
+ )
689
+ else:
690
+ logger.debug(
691
+ f"Game progress: {current_game_name}, No character_count available (metadata={bool(game_metadata)}, count={game_metadata.character_count if game_metadata else 'N/A'})"
692
+ )
693
+
694
+ # Build result dictionary with game metadata
695
+ result = {
696
+ "game_name": current_game_name,
697
+ "total_characters": total_characters,
698
+ "total_characters_formatted": format_large_number(total_characters),
699
+ "total_sentences": total_sentences,
700
+ "total_time_hours": total_time_hours,
701
+ "total_time_formatted": format_time_human_readable(total_time_hours),
702
+ "reading_speed": reading_speed,
703
+ "reading_speed_formatted": format_large_number(reading_speed),
704
+ "sessions": sessions,
705
+ "monthly_characters": monthly_chars,
706
+ "monthly_characters_formatted": format_large_number(monthly_chars),
707
+ "current_streak": current_streak,
708
+ "first_date": datetime.date.fromtimestamp(min_timestamp).strftime("%Y-%m-%d"),
709
+ "last_date": datetime.date.fromtimestamp(max_timestamp).strftime("%Y-%m-%d"),
710
+ "daily_activity": dict(daily_activity),
711
+ "progress_percentage": round(progress_percentage, 1),
513
712
  }
514
713
 
714
+ # Add game metadata if available
715
+ if game_metadata:
716
+ result["title_original"] = game_metadata.title_original or ""
717
+ result["title_romaji"] = game_metadata.title_romaji or ""
718
+ result["title_english"] = game_metadata.title_english or ""
719
+ result["type"] = game_metadata.type or ""
720
+ result["description"] = game_metadata.description or ""
721
+ result["image"] = game_metadata.image or ""
722
+ result["game_character_count"] = (
723
+ game_metadata.character_count or 0
724
+ ) # Jiten.moe total
725
+ result["links"] = game_metadata.links or [] # Add links array
726
+ result["completed"] = game_metadata.completed or False # Add completion status
727
+
728
+ # Debug logging for image data
729
+ logger.debug(
730
+ f"Game metadata for '{current_game_name}': has_image={bool(game_metadata.image)}, image_length={len(game_metadata.image) if game_metadata.image else 0}"
731
+ )
732
+ else:
733
+ result["title_original"] = ""
734
+ result["title_romaji"] = ""
735
+ result["title_english"] = ""
736
+ result["type"] = ""
737
+ result["description"] = ""
738
+ result["image"] = ""
739
+ result["game_character_count"] = 0 # No jiten data available
740
+ result["links"] = [] # Empty links array when no metadata
741
+ logger.debug(f"No game metadata found for '{current_game_name}'")
742
+
743
+ return result
744
+
745
+
515
746
  def calculate_average_daily_reading_time(all_lines):
516
747
  """
517
748
  Calculate average reading time per day based only on days with reading activity.
518
-
749
+
519
750
  Args:
520
751
  all_lines: List of game lines
521
-
752
+
522
753
  Returns:
523
754
  float: Average reading time in hours per active day, 0 if no active days
524
755
  """
525
756
  if not all_lines:
526
757
  return 0.0
527
-
758
+
528
759
  # Calculate daily reading time using existing function
529
760
  daily_reading_time = calculate_daily_reading_time(all_lines)
530
-
761
+
531
762
  if not daily_reading_time:
532
763
  return 0.0
533
-
764
+
534
765
  # Count only days with reading activity > 0
535
- active_days = [day_hours for day_hours in daily_reading_time.values() if day_hours > 0]
536
-
766
+ active_days = [
767
+ day_hours for day_hours in daily_reading_time.values() if day_hours > 0
768
+ ]
769
+
537
770
  if not active_days:
538
771
  return 0.0
539
-
772
+
540
773
  # Calculate average: total hours / number of active days
541
774
  total_hours = sum(active_days)
542
775
  average_hours = total_hours / len(active_days)
543
-
776
+
544
777
  return average_hours
545
778
 
779
+
546
780
  def calculate_hourly_activity(all_lines):
547
781
  """
548
782
  Calculate reading activity aggregated by hour of day (0-23).
@@ -550,17 +784,18 @@ def calculate_hourly_activity(all_lines):
550
784
  """
551
785
  if not all_lines:
552
786
  return [0] * 24
553
-
787
+
554
788
  hourly_chars = [0] * 24
555
-
789
+
556
790
  for line in all_lines:
557
791
  # Get hour from timestamp (0-23)
558
792
  hour = datetime.datetime.fromtimestamp(float(line.timestamp)).hour
559
793
  char_count = len(line.line_text) if line.line_text else 0
560
794
  hourly_chars[hour] += char_count
561
-
795
+
562
796
  return hourly_chars
563
797
 
798
+
564
799
  def calculate_hourly_reading_speed(all_lines):
565
800
  """
566
801
  Calculate average reading speed (chars/hour) aggregated by hour of day (0-23).
@@ -568,96 +803,91 @@ def calculate_hourly_reading_speed(all_lines):
568
803
  """
569
804
  if not all_lines:
570
805
  return [0] * 24
571
-
806
+
572
807
  # Group lines by hour and collect timestamps for each hour
573
- hourly_data = defaultdict(lambda: {'chars': 0, 'timestamps': []})
574
-
808
+ hourly_data = defaultdict(lambda: {"chars": 0, "timestamps": []})
809
+
575
810
  for line in all_lines:
576
811
  hour = datetime.datetime.fromtimestamp(float(line.timestamp)).hour
577
812
  char_count = len(line.line_text) if line.line_text else 0
578
-
579
- hourly_data[hour]['chars'] += char_count
580
- hourly_data[hour]['timestamps'].append(float(line.timestamp))
581
-
813
+
814
+ hourly_data[hour]["chars"] += char_count
815
+ hourly_data[hour]["timestamps"].append(float(line.timestamp))
816
+
582
817
  # Calculate average reading speed for each hour
583
818
  hourly_speeds = [0] * 24
584
-
819
+
585
820
  for hour in range(24):
586
- if hour in hourly_data and len(hourly_data[hour]['timestamps']) >= 2:
587
- chars = hourly_data[hour]['chars']
588
- timestamps = hourly_data[hour]['timestamps']
589
-
821
+ if hour in hourly_data and len(hourly_data[hour]["timestamps"]) >= 2:
822
+ chars = hourly_data[hour]["chars"]
823
+ timestamps = hourly_data[hour]["timestamps"]
824
+
590
825
  # Calculate actual reading time for this hour across all days
591
826
  reading_time_seconds = calculate_actual_reading_time(timestamps)
592
827
  reading_time_hours = reading_time_seconds / 3600
593
-
828
+
594
829
  # Calculate speed (chars per hour)
595
830
  if reading_time_hours > 0:
596
831
  hourly_speeds[hour] = int(chars / reading_time_hours)
597
-
832
+
598
833
  return hourly_speeds
599
834
 
835
+
600
836
  def calculate_peak_daily_stats(all_lines):
601
837
  """
602
838
  Calculate peak daily statistics: most chars read in a day and most hours studied in a day.
603
-
839
+
604
840
  Args:
605
841
  all_lines: List of game lines
606
-
842
+
607
843
  Returns:
608
844
  dict: Dictionary containing max_daily_chars and max_daily_hours
609
845
  """
610
846
  if not all_lines:
611
- return {
612
- 'max_daily_chars': 0,
613
- 'max_daily_hours': 0.0
614
- }
615
-
847
+ return {"max_daily_chars": 0, "max_daily_hours": 0.0}
848
+
616
849
  # Calculate daily reading time using existing function
617
850
  daily_reading_time = calculate_daily_reading_time(all_lines)
618
-
851
+
619
852
  # Calculate daily character counts
620
853
  daily_chars = defaultdict(int)
621
854
  for line in all_lines:
622
- date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
855
+ date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime(
856
+ "%Y-%m-%d"
857
+ )
623
858
  char_count = len(line.line_text) if line.line_text else 0
624
859
  daily_chars[date_str] += char_count
625
-
860
+
626
861
  # Find maximums
627
862
  max_daily_chars = max(daily_chars.values()) if daily_chars else 0
628
863
  max_daily_hours = max(daily_reading_time.values()) if daily_reading_time else 0.0
629
-
630
- return {
631
- 'max_daily_chars': max_daily_chars,
632
- 'max_daily_hours': max_daily_hours
633
- }
864
+
865
+ return {"max_daily_chars": max_daily_chars, "max_daily_hours": max_daily_hours}
866
+
634
867
 
635
868
  def calculate_peak_session_stats(all_lines):
636
869
  """
637
870
  Calculate peak session statistics: longest session and most chars in a session.
638
-
871
+
639
872
  Args:
640
873
  all_lines: List of game lines
641
-
874
+
642
875
  Returns:
643
876
  dict: Dictionary containing longest_session_hours and max_session_chars
644
877
  """
645
878
  if not all_lines:
646
- return {
647
- 'longest_session_hours': 0.0,
648
- 'max_session_chars': 0
649
- }
650
-
879
+ return {"longest_session_hours": 0.0, "max_session_chars": 0}
880
+
651
881
  # Sort lines by timestamp
652
882
  sorted_lines = sorted(all_lines, key=lambda line: float(line.timestamp))
653
-
883
+
654
884
  # Get session gap from config
655
885
  session_gap = get_stats_config().session_gap_seconds
656
-
886
+
657
887
  # Group lines into sessions
658
888
  sessions = []
659
889
  current_session = []
660
-
890
+
661
891
  for line in sorted_lines:
662
892
  if not current_session:
663
893
  current_session = [line]
@@ -671,25 +901,27 @@ def calculate_peak_session_stats(all_lines):
671
901
  if current_session:
672
902
  sessions.append(current_session)
673
903
  current_session = [line]
674
-
904
+
675
905
  # Don't forget the last session
676
906
  if current_session:
677
907
  sessions.append(current_session)
678
-
908
+
679
909
  # Calculate session statistics
680
910
  longest_session_hours = 0.0
681
911
  max_session_chars = 0
682
-
912
+
683
913
  for session in sessions:
684
914
  if len(session) >= 2:
685
915
  # Calculate session duration using actual reading time
686
916
  timestamps = [float(line.timestamp) for line in session]
687
917
  session_time_seconds = calculate_actual_reading_time(timestamps)
688
918
  session_hours = session_time_seconds / 3600
689
-
919
+
690
920
  # Calculate session character count
691
- session_chars = sum(len(line.line_text) if line.line_text else 0 for line in session)
692
-
921
+ session_chars = sum(
922
+ len(line.line_text) if line.line_text else 0 for line in session
923
+ )
924
+
693
925
  # Update maximums
694
926
  longest_session_hours = max(longest_session_hours, session_hours)
695
927
  max_session_chars = max(max_session_chars, session_chars)
@@ -697,79 +929,720 @@ def calculate_peak_session_stats(all_lines):
697
929
  # Single line session - count characters but no time
698
930
  session_chars = len(session[0].line_text) if session[0].line_text else 0
699
931
  max_session_chars = max(max_session_chars, session_chars)
700
-
932
+
701
933
  return {
702
- 'longest_session_hours': longest_session_hours,
703
- 'max_session_chars': max_session_chars
934
+ "longest_session_hours": longest_session_hours,
935
+ "max_session_chars": max_session_chars,
704
936
  }
705
937
 
938
+
939
+ def calculate_game_milestones(all_lines=None):
940
+ """
941
+ Calculate oldest and newest games by release year from the games table.
942
+ Returns games with earliest and latest release dates from all games in the database.
943
+
944
+ Args:
945
+ all_lines: Unused parameter (kept for API compatibility)
946
+
947
+ Returns:
948
+ dict: Dictionary containing oldest_game and newest_game data, or None if no games with release dates
949
+ """
950
+ from GameSentenceMiner.util.games_table import GamesTable
951
+
952
+ # Get all games from the games table
953
+ all_games = GamesTable.all()
954
+
955
+ if not all_games:
956
+ logger.debug("[MILESTONES] No games found in games table")
957
+ return None
958
+
959
+ logger.debug(f"[MILESTONES] Found {len(all_games)} total games in database")
960
+
961
+ # Filter games that have valid release dates
962
+ games_with_dates = []
963
+
964
+ for game in all_games:
965
+ if game.release_date and game.release_date.strip():
966
+ logger.debug(
967
+ f"[MILESTONES] Adding game: {game.title_original} (release: {game.release_date})"
968
+ )
969
+
970
+ # Get first played date for this game (if any)
971
+ first_played = GamesTable.get_start_date(game.id)
972
+
973
+ games_with_dates.append(
974
+ {
975
+ "id": game.id,
976
+ "title_original": game.title_original,
977
+ "title_romaji": game.title_romaji,
978
+ "title_english": game.title_english,
979
+ "type": game.type,
980
+ "image": game.image,
981
+ "release_date": game.release_date,
982
+ "first_played": first_played,
983
+ "difficulty": game.difficulty,
984
+ }
985
+ )
986
+
987
+ if not games_with_dates:
988
+ logger.debug("[MILESTONES] No games with release dates found")
989
+ return None
990
+
991
+ logger.debug(f"[MILESTONES] Found {len(games_with_dates)} games with release dates")
992
+
993
+ # Sort by release date to find oldest and newest
994
+ # Parse release dates for sorting (handle ISO format: "2009-10-15T00:00:00")
995
+ def parse_release_date(game):
996
+ try:
997
+ # Extract just the date part (YYYY-MM-DD)
998
+ date_str = game["release_date"].split("T")[0]
999
+ return date_str
1000
+ except:
1001
+ return "9999-12-31" # Put invalid dates at the end
1002
+
1003
+ games_with_dates.sort(key=parse_release_date)
1004
+
1005
+ oldest_game = games_with_dates[0] if games_with_dates else None
1006
+ newest_game = games_with_dates[-1] if games_with_dates else None
1007
+
1008
+ # Ensure we don't return the same game for both oldest and newest if we have multiple games
1009
+ if (
1010
+ len(games_with_dates) > 1
1011
+ and oldest_game
1012
+ and newest_game
1013
+ and oldest_game["id"] == newest_game["id"]
1014
+ ):
1015
+ logger.warning(
1016
+ f"[MILESTONES] Same game detected for oldest and newest: {oldest_game['title_original']}"
1017
+ )
1018
+ # This shouldn't happen, but just in case
1019
+ newest_game = games_with_dates[-2] if len(games_with_dates) > 1 else oldest_game
1020
+
1021
+ logger.debug(
1022
+ f"[MILESTONES] Oldest: {oldest_game['title_original'] if oldest_game else 'None'} ({parse_release_date(oldest_game) if oldest_game else 'None'})"
1023
+ )
1024
+ logger.debug(
1025
+ f"[MILESTONES] Newest: {newest_game['title_original'] if newest_game else 'None'} ({parse_release_date(newest_game) if newest_game else 'None'})"
1026
+ )
1027
+
1028
+ # Format the release dates for display (extract date in YYYY-MM-DD format)
1029
+ def format_release_date(release_date_str):
1030
+ try:
1031
+ # Extract date part from "2009-10-15T00:00:00" -> "2009-10-15"
1032
+ return release_date_str.split("T")[0]
1033
+ except:
1034
+ return "Unknown"
1035
+
1036
+ # Format first played dates
1037
+ def format_first_played(timestamp):
1038
+ if timestamp:
1039
+ return datetime.date.fromtimestamp(timestamp).strftime("%Y-%m-%d")
1040
+ return "Unknown"
1041
+
1042
+ result = {}
1043
+
1044
+ if oldest_game:
1045
+ result["oldest_game"] = {
1046
+ "title_original": oldest_game["title_original"],
1047
+ "title_romaji": oldest_game["title_romaji"],
1048
+ "title_english": oldest_game["title_english"],
1049
+ "type": oldest_game["type"],
1050
+ "image": oldest_game["image"],
1051
+ "release_date": format_release_date(oldest_game["release_date"]),
1052
+ "release_date_full": oldest_game["release_date"],
1053
+ "first_played": format_first_played(oldest_game["first_played"]),
1054
+ "difficulty": oldest_game["difficulty"],
1055
+ }
1056
+
1057
+ if newest_game:
1058
+ result["newest_game"] = {
1059
+ "title_original": newest_game["title_original"],
1060
+ "title_romaji": newest_game["title_romaji"],
1061
+ "title_english": newest_game["title_english"],
1062
+ "type": newest_game["type"],
1063
+ "image": newest_game["image"],
1064
+ "release_date": format_release_date(newest_game["release_date"]),
1065
+ "release_date_full": newest_game["release_date"],
1066
+ "first_played": format_first_played(newest_game["first_played"]),
1067
+ "difficulty": newest_game["difficulty"],
1068
+ }
1069
+
1070
+ return result if result else None
1071
+
1072
+
1073
+ def calculate_completed_games_count():
1074
+ """
1075
+ Count the number of completed games from the games table.
1076
+
1077
+ Returns:
1078
+ int: Number of games marked as completed
1079
+ """
1080
+ completed_games = GamesTable.get_all_completed()
1081
+ return len(completed_games)
1082
+
1083
+
706
1084
  def calculate_all_games_stats(all_lines):
707
1085
  """Calculate aggregate statistics for all games combined."""
708
1086
  if not all_lines:
709
1087
  return None
710
-
1088
+
711
1089
  # Calculate basic statistics
712
- total_characters = sum(len(line.line_text) if line.line_text else 0 for line in all_lines)
1090
+ total_characters = sum(
1091
+ len(line.line_text) if line.line_text else 0 for line in all_lines
1092
+ )
713
1093
  total_sentences = len(all_lines)
714
-
1094
+
715
1095
  # Calculate actual reading time using AFK timer
716
1096
  timestamps = [float(line.timestamp) for line in all_lines]
717
1097
  min_timestamp = min(timestamps)
718
1098
  max_timestamp = max(timestamps)
719
1099
  total_time_seconds = calculate_actual_reading_time(timestamps)
720
1100
  total_time_hours = total_time_seconds / 3600
721
-
1101
+
722
1102
  # Calculate reading speed (with edge case handling)
723
- reading_speed = int(total_characters / total_time_hours) if total_time_hours > 0 else 0
724
-
1103
+ reading_speed = (
1104
+ int(total_characters / total_time_hours) if total_time_hours > 0 else 0
1105
+ )
1106
+
725
1107
  # Calculate sessions across all games (gaps of more than 1 hour = new session)
726
1108
  sorted_timestamps = sorted(timestamps)
727
1109
  sessions = 1
728
1110
  session_gap = get_stats_config().session_gap_seconds
729
1111
  for i in range(1, len(sorted_timestamps)):
730
- time_gap = sorted_timestamps[i] - sorted_timestamps[i-1]
1112
+ time_gap = sorted_timestamps[i] - sorted_timestamps[i - 1]
731
1113
  if time_gap > session_gap:
732
1114
  sessions += 1
733
-
1115
+
734
1116
  # Calculate daily activity for progress trend
735
1117
  daily_activity = defaultdict(int)
736
1118
  for line in all_lines:
737
- date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
1119
+ date_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime(
1120
+ "%Y-%m-%d"
1121
+ )
738
1122
  daily_activity[date_str] += len(line.line_text) if line.line_text else 0
739
-
1123
+
740
1124
  # Calculate monthly progress (last 30 days)
741
1125
  today = datetime.date.today()
742
1126
  monthly_chars = 0
743
1127
  for i in range(30):
744
1128
  date = today - datetime.timedelta(days=i)
745
- date_str = date.strftime('%Y-%m-%d')
1129
+ date_str = date.strftime("%Y-%m-%d")
746
1130
  monthly_chars += daily_activity.get(date_str, 0)
747
-
1131
+
748
1132
  # Calculate reading streak using time-based requirements
749
1133
  current_streak = calculate_time_based_streak(all_lines)
750
-
1134
+
751
1135
  # Calculate average daily reading time
752
1136
  avg_daily_time_hours = calculate_average_daily_reading_time(all_lines)
753
-
754
- # Count unique games
755
- unique_games = len(set(line.game_name or "Unknown Game" for line in all_lines))
756
-
1137
+
1138
+ # Count completed games from games table
1139
+ completed_games = calculate_completed_games_count()
1140
+
1141
+ return {
1142
+ "total_characters": total_characters,
1143
+ "total_characters_formatted": format_large_number(total_characters),
1144
+ "total_sentences": total_sentences,
1145
+ "total_time_hours": total_time_hours,
1146
+ "total_time_formatted": format_time_human_readable(total_time_hours),
1147
+ "reading_speed": reading_speed,
1148
+ "reading_speed_formatted": format_large_number(reading_speed),
1149
+ "sessions": sessions,
1150
+ "completed_games": completed_games,
1151
+ "monthly_characters": monthly_chars,
1152
+ "monthly_characters_formatted": format_large_number(monthly_chars),
1153
+ "current_streak": current_streak,
1154
+ "avg_daily_time_hours": avg_daily_time_hours,
1155
+ "avg_daily_time_formatted": format_time_human_readable(avg_daily_time_hours),
1156
+ "first_date": datetime.date.fromtimestamp(min_timestamp).strftime("%Y-%m-%d"),
1157
+ "last_date": datetime.date.fromtimestamp(max_timestamp).strftime("%Y-%m-%d"),
1158
+ "daily_activity": dict(daily_activity),
1159
+ }
1160
+
1161
+
1162
+ def aggregate_rollup_data(rollups: List) -> Dict:
1163
+ """
1164
+ Aggregate multiple daily rollup records into a single statistics object.
1165
+
1166
+ Args:
1167
+ rollups: List of StatsRollupTable records
1168
+
1169
+ Returns:
1170
+ Dictionary with aggregated statistics matching the stats API format
1171
+ """
1172
+ if not rollups:
1173
+ return {
1174
+ "total_lines": 0,
1175
+ "total_characters": 0,
1176
+ "total_sessions": 0,
1177
+ "unique_games_played": 0,
1178
+ "total_reading_time_seconds": 0.0,
1179
+ "total_active_time_seconds": 0.0,
1180
+ "average_reading_speed_chars_per_hour": 0.0,
1181
+ "peak_reading_speed_chars_per_hour": 0.0,
1182
+ "longest_session_seconds": 0.0,
1183
+ "shortest_session_seconds": 0.0,
1184
+ "average_session_seconds": 0.0,
1185
+ "max_chars_in_session": 0,
1186
+ "max_time_in_session_seconds": 0.0,
1187
+ "games_completed": 0,
1188
+ "games_started": 0,
1189
+ "anki_cards_created": 0,
1190
+ "lines_with_screenshots": 0,
1191
+ "lines_with_audio": 0,
1192
+ "lines_with_translations": 0,
1193
+ "unique_kanji_seen": 0,
1194
+ "kanji_frequency_data": {},
1195
+ "hourly_activity_data": {},
1196
+ "hourly_reading_speed_data": {},
1197
+ "game_activity_data": {},
1198
+ "games_played_ids": [],
1199
+ }
1200
+
1201
+ # ADDITIVE fields - sum across all days
1202
+ total_lines = sum(r.total_lines for r in rollups)
1203
+ total_characters = sum(r.total_characters for r in rollups)
1204
+ total_sessions = sum(r.total_sessions for r in rollups)
1205
+ total_reading_time = sum(r.total_reading_time_seconds for r in rollups)
1206
+ total_active_time = sum(r.total_active_time_seconds for r in rollups)
1207
+ anki_cards_created = sum(r.anki_cards_created for r in rollups)
1208
+ lines_with_screenshots = sum(r.lines_with_screenshots for r in rollups)
1209
+ lines_with_audio = sum(r.lines_with_audio for r in rollups)
1210
+ lines_with_translations = sum(r.lines_with_translations for r in rollups)
1211
+ games_completed = sum(r.games_completed for r in rollups)
1212
+
1213
+ # MAXIMUM fields - take highest value across all days
1214
+ peak_reading_speed = max(
1215
+ (r.peak_reading_speed_chars_per_hour for r in rollups), default=0.0
1216
+ )
1217
+ longest_session = max((r.longest_session_seconds for r in rollups), default=0.0)
1218
+ max_chars_in_session = max((r.max_chars_in_session for r in rollups), default=0)
1219
+ max_time_in_session = max(
1220
+ (r.max_time_in_session_seconds for r in rollups), default=0.0
1221
+ )
1222
+
1223
+ # MINIMUM field - take smallest non-zero value
1224
+ shortest_session_values = [
1225
+ r.shortest_session_seconds for r in rollups if r.shortest_session_seconds > 0
1226
+ ]
1227
+ shortest_session = min(shortest_session_values) if shortest_session_values else 0.0
1228
+
1229
+ # WEIGHTED AVERAGE - average reading speed weighted by active time
1230
+ if total_active_time > 0:
1231
+ weighted_speed_sum = sum(
1232
+ r.average_reading_speed_chars_per_hour * r.total_active_time_seconds
1233
+ for r in rollups
1234
+ if r.total_active_time_seconds > 0
1235
+ )
1236
+ avg_reading_speed = weighted_speed_sum / total_active_time
1237
+ else:
1238
+ avg_reading_speed = 0.0
1239
+
1240
+ # WEIGHTED AVERAGE - average session duration weighted by number of sessions
1241
+ if total_sessions > 0:
1242
+ weighted_session_sum = sum(
1243
+ r.average_session_seconds * r.total_sessions
1244
+ for r in rollups
1245
+ if r.total_sessions > 0
1246
+ )
1247
+ avg_session_seconds = weighted_session_sum / total_sessions
1248
+ else:
1249
+ avg_session_seconds = 0.0
1250
+
1251
+ # MERGE - Combine game IDs (union)
1252
+ all_games_played = set()
1253
+ for rollup in rollups:
1254
+ if rollup.games_played_ids:
1255
+ try:
1256
+ games_ids = (
1257
+ json.loads(rollup.games_played_ids)
1258
+ if isinstance(rollup.games_played_ids, str)
1259
+ else rollup.games_played_ids
1260
+ )
1261
+ all_games_played.update(games_ids)
1262
+ except (json.JSONDecodeError, TypeError):
1263
+ logger.warning(
1264
+ f"Failed to parse games_played_ids for rollup date {rollup.date}"
1265
+ )
1266
+
1267
+ # MERGE - Combine game activity data (sum chars/time/lines per game)
1268
+ combined_game_activity = {}
1269
+ for rollup in rollups:
1270
+ if rollup.game_activity_data:
1271
+ try:
1272
+ game_data = (
1273
+ json.loads(rollup.game_activity_data)
1274
+ if isinstance(rollup.game_activity_data, str)
1275
+ else rollup.game_activity_data
1276
+ )
1277
+ for game_id, activity in game_data.items():
1278
+ if game_id in combined_game_activity:
1279
+ combined_game_activity[game_id]["chars"] += activity.get(
1280
+ "chars", 0
1281
+ )
1282
+ combined_game_activity[game_id]["time"] += activity.get(
1283
+ "time", 0
1284
+ )
1285
+ combined_game_activity[game_id]["lines"] += activity.get(
1286
+ "lines", 0
1287
+ )
1288
+ else:
1289
+ combined_game_activity[game_id] = {
1290
+ "title": activity.get("title", f"Game {game_id}"),
1291
+ "chars": activity.get("chars", 0),
1292
+ "time": activity.get("time", 0),
1293
+ "lines": activity.get("lines", 0),
1294
+ }
1295
+ except (json.JSONDecodeError, TypeError):
1296
+ logger.warning(
1297
+ f"Failed to parse game_activity_data for rollup date {rollup.date}"
1298
+ )
1299
+
1300
+ # MERGE - Combine kanji frequency data (sum frequencies)
1301
+ combined_kanji_frequency = {}
1302
+ for rollup in rollups:
1303
+ if rollup.kanji_frequency_data:
1304
+ try:
1305
+ kanji_data = (
1306
+ json.loads(rollup.kanji_frequency_data)
1307
+ if isinstance(rollup.kanji_frequency_data, str)
1308
+ else rollup.kanji_frequency_data
1309
+ )
1310
+ for kanji, count in kanji_data.items():
1311
+ combined_kanji_frequency[kanji] = (
1312
+ combined_kanji_frequency.get(kanji, 0) + count
1313
+ )
1314
+ except (json.JSONDecodeError, TypeError):
1315
+ logger.warning(
1316
+ f"Failed to parse kanji_frequency_data for rollup date {rollup.date}"
1317
+ )
1318
+
1319
+ # MERGE - Combine hourly activity data (sum characters per hour)
1320
+ combined_hourly_activity = {}
1321
+ for rollup in rollups:
1322
+ if rollup.hourly_activity_data:
1323
+ try:
1324
+ hourly_data = (
1325
+ json.loads(rollup.hourly_activity_data)
1326
+ if isinstance(rollup.hourly_activity_data, str)
1327
+ else rollup.hourly_activity_data
1328
+ )
1329
+ for hour, chars in hourly_data.items():
1330
+ combined_hourly_activity[hour] = (
1331
+ combined_hourly_activity.get(hour, 0) + chars
1332
+ )
1333
+ except (json.JSONDecodeError, TypeError):
1334
+ logger.warning(
1335
+ f"Failed to parse hourly_activity_data for rollup date {rollup.date}"
1336
+ )
1337
+
1338
+ # MERGE - Combine hourly reading speeds (average across days for each hour)
1339
+ hourly_speed_lists = defaultdict(list)
1340
+ for rollup in rollups:
1341
+ if rollup.hourly_reading_speed_data:
1342
+ try:
1343
+ speed_data = (
1344
+ json.loads(rollup.hourly_reading_speed_data)
1345
+ if isinstance(rollup.hourly_reading_speed_data, str)
1346
+ else rollup.hourly_reading_speed_data
1347
+ )
1348
+ for hour, speed in speed_data.items():
1349
+ if speed > 0:
1350
+ hourly_speed_lists[hour].append(speed)
1351
+ except (json.JSONDecodeError, TypeError):
1352
+ logger.warning(
1353
+ f"Failed to parse hourly_reading_speed_data for rollup date {rollup.date}"
1354
+ )
1355
+
1356
+ # Average the speeds for each hour
1357
+ combined_hourly_speeds = {}
1358
+ for hour, speeds in hourly_speed_lists.items():
1359
+ combined_hourly_speeds[hour] = sum(speeds) / len(speeds) if speeds else 0
1360
+
757
1361
  return {
758
- 'total_characters': total_characters,
759
- 'total_characters_formatted': format_large_number(total_characters),
760
- 'total_sentences': total_sentences,
761
- 'total_time_hours': total_time_hours,
762
- 'total_time_formatted': format_time_human_readable(total_time_hours),
763
- 'reading_speed': reading_speed,
764
- 'reading_speed_formatted': format_large_number(reading_speed),
765
- 'sessions': sessions,
766
- 'unique_games': unique_games,
767
- 'monthly_characters': monthly_chars,
768
- 'monthly_characters_formatted': format_large_number(monthly_chars),
769
- 'current_streak': current_streak,
770
- 'avg_daily_time_hours': avg_daily_time_hours,
771
- 'avg_daily_time_formatted': format_time_human_readable(avg_daily_time_hours),
772
- 'first_date': datetime.date.fromtimestamp(min_timestamp).strftime('%Y-%m-%d'),
773
- 'last_date': datetime.date.fromtimestamp(max_timestamp).strftime('%Y-%m-%d'),
774
- 'daily_activity': dict(daily_activity)
775
- }
1362
+ "total_lines": total_lines,
1363
+ "total_characters": total_characters,
1364
+ "total_sessions": total_sessions,
1365
+ "unique_games_played": len(all_games_played),
1366
+ "total_reading_time_seconds": total_reading_time,
1367
+ "total_active_time_seconds": total_active_time,
1368
+ "average_reading_speed_chars_per_hour": avg_reading_speed,
1369
+ "peak_reading_speed_chars_per_hour": peak_reading_speed,
1370
+ "longest_session_seconds": longest_session,
1371
+ "shortest_session_seconds": shortest_session,
1372
+ "average_session_seconds": avg_session_seconds,
1373
+ "max_chars_in_session": max_chars_in_session,
1374
+ "max_time_in_session_seconds": max_time_in_session,
1375
+ "games_completed": games_completed,
1376
+ "games_started": len(all_games_played),
1377
+ "anki_cards_created": anki_cards_created,
1378
+ "lines_with_screenshots": lines_with_screenshots,
1379
+ "lines_with_audio": lines_with_audio,
1380
+ "lines_with_translations": lines_with_translations,
1381
+ "unique_kanji_seen": len(combined_kanji_frequency),
1382
+ "kanji_frequency_data": combined_kanji_frequency,
1383
+ "hourly_activity_data": combined_hourly_activity,
1384
+ "hourly_reading_speed_data": combined_hourly_speeds,
1385
+ "game_activity_data": combined_game_activity,
1386
+ "games_played_ids": list(all_games_played),
1387
+ }
1388
+
1389
+
1390
+ def calculate_live_stats_for_today(today_lines: List) -> Dict:
1391
+ """
1392
+ Calculate live statistics for today using existing stats.py functions.
1393
+
1394
+ Args:
1395
+ today_lines: List of GameLinesTable records for today
1396
+
1397
+ Returns:
1398
+ Dictionary with today's statistics in rollup format
1399
+ """
1400
+ if not today_lines:
1401
+ return aggregate_rollup_data([]) # Return empty stats
1402
+
1403
+ # Import here to avoid circular dependency
1404
+ from GameSentenceMiner.util.cron.daily_rollup import (
1405
+ analyze_sessions,
1406
+ analyze_hourly_data,
1407
+ analyze_game_activity,
1408
+ analyze_kanji_data,
1409
+ )
1410
+
1411
+ # Calculate basic stats
1412
+ total_lines = len(today_lines)
1413
+ total_characters = sum(
1414
+ len(line.line_text) if line.line_text else 0 for line in today_lines
1415
+ )
1416
+
1417
+ # Calculate Anki integration stats
1418
+ lines_with_screenshots = sum(
1419
+ 1
1420
+ for line in today_lines
1421
+ if line.screenshot_in_anki and line.screenshot_in_anki.strip()
1422
+ )
1423
+ lines_with_audio = sum(
1424
+ 1 for line in today_lines if line.audio_in_anki and line.audio_in_anki.strip()
1425
+ )
1426
+ lines_with_translations = sum(
1427
+ 1 for line in today_lines if line.translation and line.translation.strip()
1428
+ )
1429
+ anki_cards = sum(
1430
+ 1
1431
+ for line in today_lines
1432
+ if (line.screenshot_in_anki and line.screenshot_in_anki.strip())
1433
+ or (line.audio_in_anki and line.audio_in_anki.strip())
1434
+ )
1435
+
1436
+ # Analyze sessions
1437
+ session_stats = analyze_sessions(today_lines)
1438
+
1439
+ # Calculate reading speeds
1440
+ total_time_seconds = session_stats["total_time"]
1441
+ total_time_hours = total_time_seconds / 3600 if total_time_seconds > 0 else 0
1442
+ average_speed = (
1443
+ (total_characters / total_time_hours) if total_time_hours > 0 else 0.0
1444
+ )
1445
+
1446
+ # Calculate peak speed (best hourly speed)
1447
+ hourly_data = analyze_hourly_data(today_lines)
1448
+ peak_speed = (
1449
+ max(hourly_data["hourly_speeds"].values())
1450
+ if hourly_data["hourly_speeds"]
1451
+ else 0.0
1452
+ )
1453
+
1454
+ # Analyze game activity
1455
+ today_str = datetime.date.today().strftime("%Y-%m-%d")
1456
+ game_activity = analyze_game_activity(today_lines, today_str)
1457
+
1458
+ # Analyze kanji
1459
+ kanji_data = analyze_kanji_data(today_lines)
1460
+
1461
+ return {
1462
+ "total_lines": total_lines,
1463
+ "total_characters": total_characters,
1464
+ "total_sessions": session_stats["count"],
1465
+ "unique_games_played": len(game_activity["game_ids"]),
1466
+ "total_reading_time_seconds": total_time_seconds,
1467
+ "total_active_time_seconds": session_stats["active_time"],
1468
+ "average_reading_speed_chars_per_hour": average_speed,
1469
+ "peak_reading_speed_chars_per_hour": peak_speed,
1470
+ "longest_session_seconds": session_stats["longest"],
1471
+ "shortest_session_seconds": session_stats["shortest"],
1472
+ "average_session_seconds": session_stats["average"],
1473
+ "max_chars_in_session": session_stats["max_chars"],
1474
+ "max_time_in_session_seconds": session_stats["max_time"],
1475
+ "games_completed": game_activity["completed"],
1476
+ "games_started": game_activity["started"],
1477
+ "anki_cards_created": anki_cards,
1478
+ "lines_with_screenshots": lines_with_screenshots,
1479
+ "lines_with_audio": lines_with_audio,
1480
+ "lines_with_translations": lines_with_translations,
1481
+ "unique_kanji_seen": kanji_data["unique_count"],
1482
+ "kanji_frequency_data": kanji_data["frequencies"],
1483
+ "hourly_activity_data": hourly_data["hourly_activity"],
1484
+ "hourly_reading_speed_data": hourly_data["hourly_speeds"],
1485
+ "game_activity_data": game_activity["details"],
1486
+ "games_played_ids": game_activity["game_ids"],
1487
+ }
1488
+
1489
+
1490
+ def combine_rollup_and_live_stats(rollup_stats: Dict, live_stats: Dict) -> Dict:
1491
+ """
1492
+ Combine rollup statistics with live statistics for today.
1493
+
1494
+ Args:
1495
+ rollup_stats: Aggregated rollup statistics (can be None)
1496
+ live_stats: Live calculated statistics for today (can be None)
1497
+
1498
+ Returns:
1499
+ Combined statistics dictionary
1500
+ """
1501
+ if not rollup_stats and not live_stats:
1502
+ return aggregate_rollup_data([]) # Return empty stats
1503
+ elif not rollup_stats:
1504
+ return live_stats
1505
+ elif not live_stats:
1506
+ return rollup_stats
1507
+
1508
+ # Combine both datasets
1509
+ combined = {}
1510
+
1511
+ # ADDITIVE fields - sum rollup + live
1512
+ additive_fields = [
1513
+ "total_lines",
1514
+ "total_characters",
1515
+ "total_sessions",
1516
+ "total_reading_time_seconds",
1517
+ "total_active_time_seconds",
1518
+ "games_completed",
1519
+ "anki_cards_created",
1520
+ "lines_with_screenshots",
1521
+ "lines_with_audio",
1522
+ "lines_with_translations",
1523
+ ]
1524
+
1525
+ for field in additive_fields:
1526
+ combined[field] = rollup_stats.get(field, 0) + live_stats.get(field, 0)
1527
+
1528
+ # MAXIMUM fields - take highest value
1529
+ max_fields = [
1530
+ "peak_reading_speed_chars_per_hour",
1531
+ "longest_session_seconds",
1532
+ "max_chars_in_session",
1533
+ "max_time_in_session_seconds",
1534
+ ]
1535
+
1536
+ for field in max_fields:
1537
+ combined[field] = max(rollup_stats.get(field, 0), live_stats.get(field, 0))
1538
+
1539
+ # MINIMUM field - take smallest non-zero value
1540
+ rollup_shortest = rollup_stats.get("shortest_session_seconds", 0)
1541
+ live_shortest = live_stats.get("shortest_session_seconds", 0)
1542
+ if rollup_shortest > 0 and live_shortest > 0:
1543
+ combined["shortest_session_seconds"] = min(rollup_shortest, live_shortest)
1544
+ elif rollup_shortest > 0:
1545
+ combined["shortest_session_seconds"] = rollup_shortest
1546
+ elif live_shortest > 0:
1547
+ combined["shortest_session_seconds"] = live_shortest
1548
+ else:
1549
+ combined["shortest_session_seconds"] = 0.0
1550
+
1551
+ # WEIGHTED AVERAGE - average reading speed weighted by active time
1552
+ rollup_time = rollup_stats.get("total_active_time_seconds", 0)
1553
+ live_time = live_stats.get("total_active_time_seconds", 0)
1554
+ total_time = rollup_time + live_time
1555
+
1556
+ if total_time > 0:
1557
+ combined["average_reading_speed_chars_per_hour"] = (
1558
+ rollup_stats.get("average_reading_speed_chars_per_hour", 0) * rollup_time
1559
+ + live_stats.get("average_reading_speed_chars_per_hour", 0) * live_time
1560
+ ) / total_time
1561
+ else:
1562
+ combined["average_reading_speed_chars_per_hour"] = 0.0
1563
+
1564
+ # WEIGHTED AVERAGE - average session duration weighted by session count
1565
+ rollup_sessions = rollup_stats.get("total_sessions", 0)
1566
+ live_sessions = live_stats.get("total_sessions", 0)
1567
+ total_sessions = rollup_sessions + live_sessions
1568
+
1569
+ if total_sessions > 0:
1570
+ combined["average_session_seconds"] = (
1571
+ rollup_stats.get("average_session_seconds", 0) * rollup_sessions
1572
+ + live_stats.get("average_session_seconds", 0) * live_sessions
1573
+ ) / total_sessions
1574
+ else:
1575
+ combined["average_session_seconds"] = 0.0
1576
+
1577
+ # MERGE - Combine unique games (union)
1578
+ rollup_games = set(rollup_stats.get("games_played_ids", []))
1579
+ live_games = set(live_stats.get("games_played_ids", []))
1580
+ all_games = rollup_games.union(live_games)
1581
+ combined["unique_games_played"] = len(all_games)
1582
+ combined["games_played_ids"] = list(all_games)
1583
+ combined["games_started"] = len(all_games)
1584
+
1585
+ # MERGE - Combine kanji frequency data (sum frequencies)
1586
+ rollup_kanji = rollup_stats.get("kanji_frequency_data", {})
1587
+ live_kanji = live_stats.get("kanji_frequency_data", {})
1588
+ combined_kanji = {}
1589
+
1590
+ for kanji, count in rollup_kanji.items():
1591
+ combined_kanji[kanji] = count
1592
+ for kanji, count in live_kanji.items():
1593
+ combined_kanji[kanji] = combined_kanji.get(kanji, 0) + count
1594
+
1595
+ combined["kanji_frequency_data"] = combined_kanji
1596
+ combined["unique_kanji_seen"] = len(combined_kanji)
1597
+
1598
+ # MERGE - Combine hourly activity data (sum characters per hour)
1599
+ rollup_hourly = rollup_stats.get("hourly_activity_data", {})
1600
+ live_hourly = live_stats.get("hourly_activity_data", {})
1601
+ combined_hourly = {}
1602
+
1603
+ for hour in set(list(rollup_hourly.keys()) + list(live_hourly.keys())):
1604
+ combined_hourly[hour] = rollup_hourly.get(hour, 0) + live_hourly.get(hour, 0)
1605
+
1606
+ combined["hourly_activity_data"] = combined_hourly
1607
+
1608
+ # MERGE - Combine hourly reading speed data (average)
1609
+ rollup_speeds = rollup_stats.get("hourly_reading_speed_data", {})
1610
+ live_speeds = live_stats.get("hourly_reading_speed_data", {})
1611
+ combined_speeds = {}
1612
+
1613
+ for hour in set(list(rollup_speeds.keys()) + list(live_speeds.keys())):
1614
+ speeds = []
1615
+ if hour in rollup_speeds and rollup_speeds[hour] > 0:
1616
+ speeds.append(rollup_speeds[hour])
1617
+ if hour in live_speeds and live_speeds[hour] > 0:
1618
+ speeds.append(live_speeds[hour])
1619
+ combined_speeds[hour] = sum(speeds) / len(speeds) if speeds else 0
1620
+
1621
+ combined["hourly_reading_speed_data"] = combined_speeds
1622
+
1623
+ # MERGE - Combine game activity data (sum chars/time/lines per game)
1624
+ rollup_games_activity = rollup_stats.get("game_activity_data", {})
1625
+ live_games_activity = live_stats.get("game_activity_data", {})
1626
+ combined_games_activity = {}
1627
+
1628
+ for game_id in set(
1629
+ list(rollup_games_activity.keys()) + list(live_games_activity.keys())
1630
+ ):
1631
+ rollup_activity = rollup_games_activity.get(
1632
+ game_id, {"chars": 0, "time": 0, "lines": 0}
1633
+ )
1634
+ live_activity = live_games_activity.get(
1635
+ game_id, {"chars": 0, "time": 0, "lines": 0}
1636
+ )
1637
+
1638
+ combined_games_activity[game_id] = {
1639
+ "title": rollup_activity.get("title")
1640
+ or live_activity.get("title", f"Game {game_id}"),
1641
+ "chars": rollup_activity.get("chars", 0) + live_activity.get("chars", 0),
1642
+ "time": rollup_activity.get("time", 0) + live_activity.get("time", 0),
1643
+ "lines": rollup_activity.get("lines", 0) + live_activity.get("lines", 0),
1644
+ }
1645
+
1646
+ combined["game_activity_data"] = combined_games_activity
1647
+
1648
+ return combined