GameSentenceMiner 2.19.16__py3-none-any.whl → 2.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GameSentenceMiner might be problematic. Click here for more details.

Files changed (70) hide show
  1. GameSentenceMiner/__init__.py +39 -0
  2. GameSentenceMiner/anki.py +6 -3
  3. GameSentenceMiner/gametext.py +13 -2
  4. GameSentenceMiner/gsm.py +40 -3
  5. GameSentenceMiner/locales/en_us.json +4 -0
  6. GameSentenceMiner/locales/ja_jp.json +4 -0
  7. GameSentenceMiner/locales/zh_cn.json +4 -0
  8. GameSentenceMiner/obs.py +4 -1
  9. GameSentenceMiner/owocr/owocr/ocr.py +304 -134
  10. GameSentenceMiner/owocr/owocr/run.py +1 -1
  11. GameSentenceMiner/ui/anki_confirmation.py +4 -2
  12. GameSentenceMiner/ui/config_gui.py +12 -0
  13. GameSentenceMiner/util/configuration.py +6 -2
  14. GameSentenceMiner/util/cron/__init__.py +12 -0
  15. GameSentenceMiner/util/cron/daily_rollup.py +613 -0
  16. GameSentenceMiner/util/cron/jiten_update.py +397 -0
  17. GameSentenceMiner/util/cron/populate_games.py +154 -0
  18. GameSentenceMiner/util/cron/run_crons.py +148 -0
  19. GameSentenceMiner/util/cron/setup_populate_games_cron.py +118 -0
  20. GameSentenceMiner/util/cron_table.py +334 -0
  21. GameSentenceMiner/util/db.py +236 -49
  22. GameSentenceMiner/util/ffmpeg.py +23 -4
  23. GameSentenceMiner/util/games_table.py +340 -93
  24. GameSentenceMiner/util/jiten_api_client.py +188 -0
  25. GameSentenceMiner/util/stats_rollup_table.py +216 -0
  26. GameSentenceMiner/web/anki_api_endpoints.py +438 -220
  27. GameSentenceMiner/web/database_api.py +955 -1259
  28. GameSentenceMiner/web/jiten_database_api.py +1015 -0
  29. GameSentenceMiner/web/rollup_stats.py +672 -0
  30. GameSentenceMiner/web/static/css/dashboard-shared.css +75 -13
  31. GameSentenceMiner/web/static/css/overview.css +604 -47
  32. GameSentenceMiner/web/static/css/search.css +226 -0
  33. GameSentenceMiner/web/static/css/shared.css +762 -0
  34. GameSentenceMiner/web/static/css/stats.css +221 -0
  35. GameSentenceMiner/web/static/js/components/bar-chart.js +339 -0
  36. GameSentenceMiner/web/static/js/database-bulk-operations.js +320 -0
  37. GameSentenceMiner/web/static/js/database-game-data.js +390 -0
  38. GameSentenceMiner/web/static/js/database-game-operations.js +213 -0
  39. GameSentenceMiner/web/static/js/database-helpers.js +44 -0
  40. GameSentenceMiner/web/static/js/database-jiten-integration.js +750 -0
  41. GameSentenceMiner/web/static/js/database-popups.js +89 -0
  42. GameSentenceMiner/web/static/js/database-tabs.js +64 -0
  43. GameSentenceMiner/web/static/js/database-text-management.js +371 -0
  44. GameSentenceMiner/web/static/js/database.js +86 -718
  45. GameSentenceMiner/web/static/js/goals.js +79 -18
  46. GameSentenceMiner/web/static/js/heatmap.js +29 -23
  47. GameSentenceMiner/web/static/js/overview.js +1205 -339
  48. GameSentenceMiner/web/static/js/regex-patterns.js +100 -0
  49. GameSentenceMiner/web/static/js/search.js +215 -18
  50. GameSentenceMiner/web/static/js/shared.js +193 -39
  51. GameSentenceMiner/web/static/js/stats.js +1536 -179
  52. GameSentenceMiner/web/stats.py +1142 -269
  53. GameSentenceMiner/web/stats_api.py +2104 -0
  54. GameSentenceMiner/web/templates/anki_stats.html +4 -18
  55. GameSentenceMiner/web/templates/components/date-range.html +118 -3
  56. GameSentenceMiner/web/templates/components/html-head.html +40 -6
  57. GameSentenceMiner/web/templates/components/js-config.html +8 -8
  58. GameSentenceMiner/web/templates/components/regex-input.html +160 -0
  59. GameSentenceMiner/web/templates/database.html +564 -117
  60. GameSentenceMiner/web/templates/goals.html +41 -5
  61. GameSentenceMiner/web/templates/overview.html +159 -129
  62. GameSentenceMiner/web/templates/search.html +78 -9
  63. GameSentenceMiner/web/templates/stats.html +159 -5
  64. GameSentenceMiner/web/texthooking_page.py +280 -111
  65. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/METADATA +43 -2
  66. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/RECORD +70 -47
  67. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/WHEEL +0 -0
  68. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/entry_points.txt +0 -0
  69. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/licenses/LICENSE +0 -0
  70. {gamesentenceminer-2.19.16.dist-info → gamesentenceminer-2.20.0.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,34 @@
1
1
  """
2
2
  Separate API endpoints for Anki statistics to improve performance through progressive loading.
3
3
  These endpoints replace the monolithic /api/anki_stats_combined endpoint.
4
+
5
+ Uses hybrid rollup + live approach similar to /api/stats for GSM-based data (kanji, mining heatmap).
6
+ Anki review data (retention, game stats) still requires direct AnkiConnect queries.
4
7
  """
5
8
 
6
9
  import concurrent.futures
10
+ import datetime
11
+ import traceback
7
12
  from flask import request, jsonify
8
13
  from GameSentenceMiner.util.configuration import get_config
9
14
  from GameSentenceMiner.anki import invoke
10
- from GameSentenceMiner.web.stats import calculate_kanji_frequency, calculate_mining_heatmap_data, is_kanji
15
+ from GameSentenceMiner.web.stats import (
16
+ calculate_kanji_frequency,
17
+ calculate_mining_heatmap_data,
18
+ is_kanji,
19
+ aggregate_rollup_data,
20
+ calculate_live_stats_for_today,
21
+ combine_rollup_and_live_stats,
22
+ )
11
23
  from GameSentenceMiner.util.db import GameLinesTable
24
+ from GameSentenceMiner.util.stats_rollup_table import StatsRollupTable
12
25
  from GameSentenceMiner.util.configuration import logger
13
26
 
14
27
 
15
28
  def register_anki_api_endpoints(app):
16
29
  """Register all Anki API endpoints with the Flask app."""
17
-
18
- @app.route('/api/anki_earliest_date')
30
+
31
+ @app.route("/api/anki_earliest_date")
19
32
  def api_anki_earliest_date():
20
33
  """Get the earliest Anki card creation date for date range initialization."""
21
34
  try:
@@ -24,35 +37,150 @@ def register_anki_api_endpoints(app):
24
37
  # Only get first 100 cards to find earliest date quickly
25
38
  sample_cards = card_ids[:100] if len(card_ids) > 100 else card_ids
26
39
  cards_info = invoke("cardsInfo", cards=sample_cards)
27
- created_times = [card.get("created", 0) for card in cards_info if "created" in card]
40
+ created_times = [
41
+ card.get("created", 0) for card in cards_info if "created" in card
42
+ ]
28
43
  earliest_date = min(created_times) if created_times else 0
29
44
  else:
30
45
  earliest_date = 0
31
-
46
+
32
47
  return jsonify({"earliest_date": earliest_date})
33
48
  except Exception as e:
34
49
  logger.error(f"Failed to fetch earliest date from Anki: {e}")
35
50
  return jsonify({"earliest_date": 0})
36
51
 
37
- @app.route('/api/anki_kanji_stats')
52
+ @app.route("/api/anki_kanji_stats")
38
53
  def api_anki_kanji_stats():
39
- """Get kanji statistics including missing kanji analysis."""
40
- start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
41
- end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
42
-
54
+ """
55
+ Get kanji statistics including missing kanji analysis.
56
+ Uses hybrid rollup + live approach for GSM kanji data.
57
+ """
58
+ start_timestamp = (
59
+ int(request.args.get("start_timestamp"))
60
+ if request.args.get("start_timestamp")
61
+ else None
62
+ )
63
+ end_timestamp = (
64
+ int(request.args.get("end_timestamp"))
65
+ if request.args.get("end_timestamp")
66
+ else None
67
+ )
68
+
43
69
  try:
44
- # Fetch GSM lines
45
- try:
46
- all_lines = (
47
- GameLinesTable.get_lines_filtered_by_timestamp(start_timestamp / 1000, end_timestamp / 1000)
48
- if start_timestamp is not None and end_timestamp is not None
49
- else GameLinesTable.all()
70
+ # === HYBRID ROLLUP + LIVE APPROACH FOR GSM KANJI ===
71
+ today = datetime.date.today()
72
+ today_str = today.strftime("%Y-%m-%d")
73
+
74
+ # Determine date range
75
+ if start_timestamp and end_timestamp:
76
+ try:
77
+ # Convert milliseconds to seconds for fromtimestamp
78
+ # Handle negative timestamps (before epoch) by clamping to epoch
79
+ start_ts_seconds = max(0, start_timestamp / 1000.0)
80
+ end_ts_seconds = max(0, end_timestamp / 1000.0)
81
+
82
+ start_date = datetime.date.fromtimestamp(start_ts_seconds)
83
+ end_date = datetime.date.fromtimestamp(end_ts_seconds)
84
+ start_date_str = start_date.strftime("%Y-%m-%d")
85
+ end_date_str = end_date.strftime("%Y-%m-%d")
86
+ except (ValueError, OSError) as e:
87
+ logger.error(
88
+ f"Invalid timestamp conversion: start={start_timestamp}, end={end_timestamp}, error={e}"
89
+ )
90
+ # Fallback to using all data
91
+ start_date_str = None
92
+ end_date_str = today_str
93
+ else:
94
+ start_date_str = None
95
+ end_date_str = today_str
96
+
97
+ # Check if today is in the date range
98
+ today_in_range = (not end_date_str) or (end_date_str >= today_str)
99
+
100
+ # Query rollup data for historical dates (up to yesterday)
101
+ rollup_stats = None
102
+ if start_date_str:
103
+ yesterday = today - datetime.timedelta(days=1)
104
+ yesterday_str = yesterday.strftime("%Y-%m-%d")
105
+
106
+ if start_date_str <= yesterday_str:
107
+ rollup_end = (
108
+ min(end_date_str, yesterday_str)
109
+ if end_date_str
110
+ else yesterday_str
111
+ )
112
+ rollups = StatsRollupTable.get_date_range(
113
+ start_date_str, rollup_end
114
+ )
115
+
116
+ if rollups:
117
+ rollup_stats = aggregate_rollup_data(rollups)
118
+
119
+ # Calculate today's stats live if needed
120
+ live_stats = None
121
+ if today_in_range:
122
+ today_start = datetime.datetime.combine(
123
+ today, datetime.time.min
124
+ ).timestamp()
125
+ today_end = datetime.datetime.combine(
126
+ today, datetime.time.max
127
+ ).timestamp()
128
+ today_lines = GameLinesTable.get_lines_filtered_by_timestamp(
129
+ start=today_start, end=today_end, for_stats=True
50
130
  )
51
- except Exception as e:
52
- logger.warning(f"Failed to filter lines by timestamp: {e}, fetching all lines instead")
53
- all_lines = GameLinesTable.all()
54
-
55
- # Use concurrent processing for Anki API calls and kanji calculation
131
+
132
+ if today_lines:
133
+ live_stats = calculate_live_stats_for_today(today_lines)
134
+
135
+ # Combine rollup and live stats
136
+ combined_stats = combine_rollup_and_live_stats(rollup_stats, live_stats)
137
+
138
+ # Extract kanji frequency data from combined stats
139
+ kanji_freq_dict = combined_stats.get("kanji_frequency_data", {})
140
+
141
+ # If no rollup data, fall back to querying all lines
142
+ if not kanji_freq_dict:
143
+ logger.debug(
144
+ "[Anki Kanji] No rollup data, falling back to direct query"
145
+ )
146
+ try:
147
+ if start_timestamp is not None and end_timestamp is not None:
148
+ # Handle negative timestamps by clamping to 0
149
+ start_ts = max(0, start_timestamp / 1000.0)
150
+ end_ts = max(0, end_timestamp / 1000.0)
151
+ all_lines = GameLinesTable.get_lines_filtered_by_timestamp(
152
+ start=start_ts, end=end_ts, for_stats=True
153
+ )
154
+ else:
155
+ all_lines = GameLinesTable.all()
156
+ except Exception as e:
157
+ logger.error(f"Error querying lines by timestamp: {e}")
158
+ logger.error(traceback.format_exc())
159
+ all_lines = GameLinesTable.all()
160
+ gsm_kanji_stats = calculate_kanji_frequency(all_lines)
161
+ else:
162
+ # Convert rollup kanji data to expected format
163
+ from GameSentenceMiner.web.stats import get_gradient_color
164
+
165
+ max_frequency = max(kanji_freq_dict.values()) if kanji_freq_dict else 0
166
+ sorted_kanji = sorted(
167
+ kanji_freq_dict.items(), key=lambda x: x[1], reverse=True
168
+ )
169
+
170
+ kanji_data = []
171
+ for kanji, count in sorted_kanji:
172
+ color = get_gradient_color(count, max_frequency)
173
+ kanji_data.append(
174
+ {"kanji": kanji, "frequency": count, "color": color}
175
+ )
176
+
177
+ gsm_kanji_stats = {
178
+ "kanji_data": kanji_data,
179
+ "unique_count": len(sorted_kanji),
180
+ "max_frequency": max_frequency,
181
+ }
182
+
183
+ # Fetch Anki kanji (still requires direct query)
56
184
  def get_anki_kanji():
57
185
  try:
58
186
  note_ids = invoke("findNotes", query="")
@@ -61,18 +189,24 @@ def register_anki_api_endpoints(app):
61
189
  # Process in smaller batches for better performance
62
190
  batch_size = 500
63
191
  for i in range(0, len(note_ids), batch_size):
64
- batch_ids = note_ids[i:i+batch_size]
192
+ batch_ids = note_ids[i : i + batch_size]
65
193
  notes_info = invoke("notesInfo", notes=batch_ids)
66
194
  for note in notes_info:
67
195
  # Filter by timestamp if provided
68
- note_created = note.get("created", None) or note.get("mod", None)
69
- if start_timestamp and end_timestamp and note_created is not None:
196
+ note_created = note.get("created", None) or note.get(
197
+ "mod", None
198
+ )
199
+ if (
200
+ start_timestamp
201
+ and end_timestamp
202
+ and note_created is not None
203
+ ):
70
204
  note_created_int = int(note_created)
71
205
  start_ts = int(start_timestamp)
72
206
  end_ts = int(end_timestamp)
73
207
  if not (start_ts <= note_created_int <= end_ts):
74
208
  continue
75
-
209
+
76
210
  fields = note.get("fields", {})
77
211
  first_field = next(iter(fields.values()), None)
78
212
  if first_field and "value" in first_field:
@@ -84,388 +218,464 @@ def register_anki_api_endpoints(app):
84
218
  except Exception as e:
85
219
  logger.error(f"Failed to fetch kanji from Anki: {e}")
86
220
  return set()
87
-
88
- def get_gsm_kanji():
89
- return calculate_kanji_frequency(all_lines)
90
-
91
- # Run both operations concurrently
92
- with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
93
- anki_future = executor.submit(get_anki_kanji)
94
- gsm_future = executor.submit(get_gsm_kanji)
95
-
96
- anki_kanji_set = anki_future.result()
97
- gsm_kanji_stats = gsm_future.result()
98
-
221
+
222
+ anki_kanji_set = get_anki_kanji()
223
+
99
224
  gsm_kanji_list = gsm_kanji_stats.get("kanji_data", [])
100
225
  gsm_kanji_set = set([k["kanji"] for k in gsm_kanji_list])
101
-
226
+
102
227
  # Find missing kanji
103
228
  missing_kanji = [
104
229
  {"kanji": k["kanji"], "frequency": k["frequency"]}
105
- for k in gsm_kanji_list if k["kanji"] not in anki_kanji_set
230
+ for k in gsm_kanji_list
231
+ if k["kanji"] not in anki_kanji_set
106
232
  ]
107
233
  missing_kanji.sort(key=lambda x: x["frequency"], reverse=True)
108
-
234
+
109
235
  # Calculate coverage
110
236
  anki_kanji_count = len(anki_kanji_set)
111
237
  gsm_kanji_count = len(gsm_kanji_set)
112
- coverage_percent = (anki_kanji_count / gsm_kanji_count * 100) if gsm_kanji_count else 0.0
113
-
114
- return jsonify({
115
- "missing_kanji": missing_kanji,
116
- "anki_kanji_count": anki_kanji_count,
117
- "gsm_kanji_count": gsm_kanji_count,
118
- "coverage_percent": round(coverage_percent, 1)
119
- })
120
-
238
+ coverage_percent = (
239
+ (anki_kanji_count / gsm_kanji_count * 100) if gsm_kanji_count else 0.0
240
+ )
241
+
242
+ return jsonify(
243
+ {
244
+ "missing_kanji": missing_kanji,
245
+ "anki_kanji_count": anki_kanji_count,
246
+ "gsm_kanji_count": gsm_kanji_count,
247
+ "coverage_percent": round(coverage_percent, 1),
248
+ }
249
+ )
250
+
121
251
  except Exception as e:
122
252
  logger.error(f"Error fetching kanji stats: {e}")
253
+ logger.error(traceback.format_exc())
123
254
  return jsonify({"error": str(e)}), 500
124
255
 
125
- @app.route('/api/anki_game_stats')
256
+ @app.route("/api/anki_game_stats")
126
257
  def api_anki_game_stats():
127
258
  """Get game-specific Anki statistics."""
128
- start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
129
- end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
259
+ start_timestamp = (
260
+ int(request.args.get("start_timestamp"))
261
+ if request.args.get("start_timestamp")
262
+ else None
263
+ )
264
+ end_timestamp = (
265
+ int(request.args.get("end_timestamp"))
266
+ if request.args.get("end_timestamp")
267
+ else None
268
+ )
130
269
  parent_tag = get_config().anki.parent_tag.strip() or "Game"
131
-
270
+
132
271
  try:
133
272
  # Find all cards with Game:: parent tag
134
273
  query = f"tag:{parent_tag}::*"
135
274
  card_ids = invoke("findCards", query=query)
136
275
  game_stats = []
137
-
276
+
138
277
  if not card_ids:
139
278
  return jsonify([])
140
-
279
+
141
280
  # Get card info and filter by date
142
281
  cards_info = invoke("cardsInfo", cards=card_ids)
143
-
282
+
144
283
  if start_timestamp and end_timestamp:
145
284
  cards_info = [
146
- card for card in cards_info
147
- if start_timestamp <= card.get('created', 0) <= end_timestamp
285
+ card
286
+ for card in cards_info
287
+ if start_timestamp <= card.get("created", 0) <= end_timestamp
148
288
  ]
149
-
289
+
150
290
  if not cards_info:
151
291
  return jsonify([])
152
-
292
+
153
293
  # Get all unique note IDs and fetch note info in one batch call
154
- note_ids = list(set(card['note'] for card in cards_info))
294
+ note_ids = list(set(card["note"] for card in cards_info))
155
295
  notes_info_list = invoke("notesInfo", notes=note_ids)
156
- notes_info = {note['noteId']: note for note in notes_info_list}
157
-
296
+ notes_info = {note["noteId"]: note for note in notes_info_list}
297
+
158
298
  # Create card-to-note mapping
159
- card_to_note = {str(card['cardId']): card['note'] for card in cards_info}
160
-
299
+ card_to_note = {str(card["cardId"]): card["note"] for card in cards_info}
300
+
161
301
  # Group cards by game
162
302
  game_cards = {}
163
303
  for card in cards_info:
164
- note_id = card['note']
304
+ note_id = card["note"]
165
305
  note_info = notes_info.get(note_id)
166
306
  if not note_info:
167
307
  continue
168
-
169
- tags = note_info.get('tags', [])
170
-
308
+
309
+ tags = note_info.get("tags", [])
310
+
171
311
  # Find game tag (format: Game::GameName)
172
312
  game_tag = None
173
313
  for tag in tags:
174
- if tag.startswith(f'{parent_tag}::'):
175
- tag_parts = tag.split('::')
314
+ if tag.startswith(f"{parent_tag}::"):
315
+ tag_parts = tag.split("::")
176
316
  if len(tag_parts) >= 2:
177
317
  game_tag = tag_parts[1]
178
318
  break
179
-
319
+
180
320
  if game_tag:
181
321
  if game_tag not in game_cards:
182
322
  game_cards[game_tag] = []
183
- game_cards[game_tag].append(card['cardId'])
184
-
323
+ game_cards[game_tag].append(card["cardId"])
324
+
185
325
  # Process games concurrently
186
326
  def process_game(game_name, card_ids):
187
327
  try:
188
328
  # Get review history for all cards in this game
189
329
  reviews_data = invoke("getReviewsOfCards", cards=card_ids)
190
-
330
+
191
331
  # Group reviews by note ID and calculate per-note retention
192
332
  note_stats = {}
193
-
333
+
194
334
  for card_id_str, reviews in reviews_data.items():
195
335
  if not reviews:
196
336
  continue
197
-
337
+
198
338
  note_id = card_to_note.get(card_id_str)
199
339
  if not note_id:
200
340
  continue
201
-
341
+
202
342
  # Filter reviews by timestamp if provided
203
343
  filtered_reviews = reviews
204
344
  if start_timestamp and end_timestamp:
205
345
  filtered_reviews = [
206
- r for r in reviews
207
- if start_timestamp <= r.get('time', 0) <= end_timestamp
346
+ r
347
+ for r in reviews
348
+ if start_timestamp <= r.get("time", 0) <= end_timestamp
208
349
  ]
209
-
350
+
210
351
  for review in filtered_reviews:
211
352
  # Only count review-type entries (type=1)
212
- review_type = review.get('type', -1)
353
+ review_type = review.get("type", -1)
213
354
  if review_type != 1:
214
355
  continue
215
-
356
+
216
357
  if note_id not in note_stats:
217
- note_stats[note_id] = {'passed': 0, 'failed': 0, 'total_time': 0}
218
-
219
- note_stats[note_id]['total_time'] += review['time']
220
-
358
+ note_stats[note_id] = {
359
+ "passed": 0,
360
+ "failed": 0,
361
+ "total_time": 0,
362
+ }
363
+
364
+ note_stats[note_id]["total_time"] += review["time"]
365
+
221
366
  # Ease: 1=Again, 2=Hard, 3=Good, 4=Easy
222
- if review['ease'] == 1:
223
- note_stats[note_id]['failed'] += 1
367
+ if review["ease"] == 1:
368
+ note_stats[note_id]["failed"] += 1
224
369
  else:
225
- note_stats[note_id]['passed'] += 1
226
-
370
+ note_stats[note_id]["passed"] += 1
371
+
227
372
  if note_stats:
228
373
  # Calculate per-note retention and average them
229
374
  retention_sum = 0
230
375
  total_time = 0
231
376
  total_reviews = 0
232
-
377
+
233
378
  for note_id, stats in note_stats.items():
234
- passed = stats['passed']
235
- failed = stats['failed']
379
+ passed = stats["passed"]
380
+ failed = stats["failed"]
236
381
  total = passed + failed
237
-
382
+
238
383
  if total > 0:
239
384
  note_retention = passed / total
240
385
  retention_sum += note_retention
241
- total_time += stats['total_time']
386
+ total_time += stats["total_time"]
242
387
  total_reviews += total
243
-
388
+
244
389
  # Average retention across all notes
245
390
  note_count = len(note_stats)
246
- avg_retention = (retention_sum / note_count) * 100 if note_count > 0 else 0
247
- avg_time_seconds = (total_time / total_reviews / 1000.0) if total_reviews > 0 else 0
248
-
391
+ avg_retention = (
392
+ (retention_sum / note_count) * 100 if note_count > 0 else 0
393
+ )
394
+ avg_time_seconds = (
395
+ (total_time / total_reviews / 1000.0)
396
+ if total_reviews > 0
397
+ else 0
398
+ )
399
+
249
400
  return {
250
- 'game_name': game_name,
251
- 'avg_time_per_card': round(avg_time_seconds, 2),
252
- 'retention_pct': round(avg_retention, 1),
253
- 'total_reviews': total_reviews,
254
- 'mined_lines': 0
401
+ "game_name": game_name,
402
+ "avg_time_per_card": round(avg_time_seconds, 2),
403
+ "retention_pct": round(avg_retention, 1),
404
+ "total_reviews": total_reviews,
405
+ "mined_lines": 0,
255
406
  }
256
407
  return None
257
408
  except Exception as e:
258
409
  logger.error(f"Error processing game {game_name}: {e}")
259
410
  return None
260
-
411
+
261
412
  # Process games in parallel
262
413
  with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
263
- futures = {executor.submit(process_game, game_name, card_ids): game_name
264
- for game_name, card_ids in game_cards.items()}
265
-
414
+ futures = {
415
+ executor.submit(process_game, game_name, card_ids): game_name
416
+ for game_name, card_ids in game_cards.items()
417
+ }
418
+
266
419
  for future in concurrent.futures.as_completed(futures):
267
420
  result = future.result()
268
421
  if result:
269
422
  game_stats.append(result)
270
-
423
+
271
424
  # Sort by game name
272
- game_stats.sort(key=lambda x: x['game_name'])
425
+ game_stats.sort(key=lambda x: x["game_name"])
273
426
  return jsonify(game_stats)
274
-
427
+
275
428
  except Exception as e:
276
429
  logger.error(f"Failed to fetch game stats from Anki: {e}")
277
430
  return jsonify([])
278
431
 
279
- @app.route('/api/anki_nsfw_sfw_retention')
432
+ @app.route("/api/anki_nsfw_sfw_retention")
280
433
  def api_anki_nsfw_sfw_retention():
281
434
  """Get NSFW vs SFW retention statistics."""
282
- start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
283
- end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
284
-
435
+ start_timestamp = (
436
+ int(request.args.get("start_timestamp"))
437
+ if request.args.get("start_timestamp")
438
+ else None
439
+ )
440
+ end_timestamp = (
441
+ int(request.args.get("end_timestamp"))
442
+ if request.args.get("end_timestamp")
443
+ else None
444
+ )
445
+
285
446
  def calculate_retention_for_cards(card_ids, start_timestamp, end_timestamp):
286
447
  if not card_ids:
287
448
  return 0.0, 0, 0.0
288
-
449
+
289
450
  try:
290
451
  # Get card info to filter by date
291
452
  cards_info = invoke("cardsInfo", cards=card_ids)
292
-
453
+
293
454
  # Use card['created'] for date filtering
294
455
  if start_timestamp and end_timestamp:
295
456
  cards_info = [
296
- card for card in cards_info
297
- if start_timestamp <= card.get('created', 0) <= end_timestamp
457
+ card
458
+ for card in cards_info
459
+ if start_timestamp <= card.get("created", 0) <= end_timestamp
298
460
  ]
299
-
461
+
300
462
  if not cards_info:
301
463
  return 0.0, 0, 0.0
302
-
464
+
303
465
  # Create card-to-note mapping
304
- card_to_note = {str(card['cardId']): card['note'] for card in cards_info}
305
-
466
+ card_to_note = {
467
+ str(card["cardId"]): card["note"] for card in cards_info
468
+ }
469
+
306
470
  # Get review history for all cards
307
- reviews_data = invoke("getReviewsOfCards", cards=[card['cardId'] for card in cards_info])
308
-
471
+ reviews_data = invoke(
472
+ "getReviewsOfCards", cards=[card["cardId"] for card in cards_info]
473
+ )
474
+
309
475
  # Group reviews by note ID and calculate per-note retention
310
476
  note_stats = {}
311
-
477
+
312
478
  for card_id_str, reviews in reviews_data.items():
313
479
  if not reviews:
314
480
  continue
315
-
481
+
316
482
  note_id = card_to_note.get(card_id_str)
317
483
  if not note_id:
318
484
  continue
319
-
485
+
320
486
  # Filter reviews by timestamp if provided
321
487
  filtered_reviews = reviews
322
488
  if start_timestamp and end_timestamp:
323
489
  filtered_reviews = [
324
- r for r in reviews
325
- if start_timestamp <= r.get('time', 0) <= end_timestamp
490
+ r
491
+ for r in reviews
492
+ if start_timestamp <= r.get("time", 0) <= end_timestamp
326
493
  ]
327
-
494
+
328
495
  for review in filtered_reviews:
329
496
  # Only count review-type entries (type=1)
330
- review_type = review.get('type', -1)
497
+ review_type = review.get("type", -1)
331
498
  if review_type != 1:
332
499
  continue
333
-
500
+
334
501
  if note_id not in note_stats:
335
- note_stats[note_id] = {'passed': 0, 'failed': 0, 'total_time': 0}
336
-
337
- note_stats[note_id]['total_time'] += review['time']
338
-
502
+ note_stats[note_id] = {
503
+ "passed": 0,
504
+ "failed": 0,
505
+ "total_time": 0,
506
+ }
507
+
508
+ note_stats[note_id]["total_time"] += review["time"]
509
+
339
510
  # Ease: 1=Again, 2=Hard, 3=Good, 4=Easy
340
- if review['ease'] == 1:
341
- note_stats[note_id]['failed'] += 1
511
+ if review["ease"] == 1:
512
+ note_stats[note_id]["failed"] += 1
342
513
  else:
343
- note_stats[note_id]['passed'] += 1
344
-
514
+ note_stats[note_id]["passed"] += 1
515
+
345
516
  if not note_stats:
346
517
  return 0.0, 0, 0.0
347
-
518
+
348
519
  # Calculate per-note retention and average them
349
520
  retention_sum = 0
350
521
  total_reviews = 0
351
522
  total_time = 0
352
-
523
+
353
524
  for note_id, stats in note_stats.items():
354
- passed = stats['passed']
355
- failed = stats['failed']
525
+ passed = stats["passed"]
526
+ failed = stats["failed"]
356
527
  total = passed + failed
357
-
528
+
358
529
  if total > 0:
359
530
  note_retention = passed / total
360
531
  retention_sum += note_retention
361
532
  total_reviews += total
362
- total_time += stats['total_time']
363
-
533
+ total_time += stats["total_time"]
534
+
364
535
  # Average retention across all notes
365
536
  note_count = len(note_stats)
366
- avg_retention = (retention_sum / note_count) * 100 if note_count > 0 else 0
367
- avg_time_seconds = (total_time / total_reviews / 1000.0) if total_reviews > 0 else 0
368
-
537
+ avg_retention = (
538
+ (retention_sum / note_count) * 100 if note_count > 0 else 0
539
+ )
540
+ avg_time_seconds = (
541
+ (total_time / total_reviews / 1000.0) if total_reviews > 0 else 0
542
+ )
543
+
369
544
  return avg_retention, total_reviews, avg_time_seconds
370
-
545
+
371
546
  except Exception as e:
372
547
  logger.error(f"Error calculating retention for cards: {e}")
373
548
  return 0.0, 0, 0.0
374
-
549
+
375
550
  try:
376
551
  # Query for NSFW and SFW cards concurrently
377
552
  def get_nsfw_cards():
378
553
  return invoke("findCards", query="tag:Game tag:NSFW")
379
-
554
+
380
555
  def get_sfw_cards():
381
556
  return invoke("findCards", query="tag:Game -tag:NSFW")
382
-
557
+
383
558
  with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
384
559
  nsfw_future = executor.submit(get_nsfw_cards)
385
560
  sfw_future = executor.submit(get_sfw_cards)
386
-
561
+
387
562
  nsfw_card_ids = nsfw_future.result()
388
563
  sfw_card_ids = sfw_future.result()
389
-
564
+
390
565
  # Calculate retention for both categories concurrently
391
566
  with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
392
- nsfw_future = executor.submit(calculate_retention_for_cards, nsfw_card_ids, start_timestamp, end_timestamp)
393
- sfw_future = executor.submit(calculate_retention_for_cards, sfw_card_ids, start_timestamp, end_timestamp)
394
-
567
+ nsfw_future = executor.submit(
568
+ calculate_retention_for_cards,
569
+ nsfw_card_ids,
570
+ start_timestamp,
571
+ end_timestamp,
572
+ )
573
+ sfw_future = executor.submit(
574
+ calculate_retention_for_cards,
575
+ sfw_card_ids,
576
+ start_timestamp,
577
+ end_timestamp,
578
+ )
579
+
395
580
  nsfw_retention, nsfw_reviews, nsfw_avg_time = nsfw_future.result()
396
581
  sfw_retention, sfw_reviews, sfw_avg_time = sfw_future.result()
397
-
398
- return jsonify({
399
- 'nsfw_retention': round(nsfw_retention, 1),
400
- 'sfw_retention': round(sfw_retention, 1),
401
- 'nsfw_reviews': nsfw_reviews,
402
- 'sfw_reviews': sfw_reviews,
403
- 'nsfw_avg_time': round(nsfw_avg_time, 2),
404
- 'sfw_avg_time': round(sfw_avg_time, 2)
405
- })
406
-
582
+
583
+ return jsonify(
584
+ {
585
+ "nsfw_retention": round(nsfw_retention, 1),
586
+ "sfw_retention": round(sfw_retention, 1),
587
+ "nsfw_reviews": nsfw_reviews,
588
+ "sfw_reviews": sfw_reviews,
589
+ "nsfw_avg_time": round(nsfw_avg_time, 2),
590
+ "sfw_avg_time": round(sfw_avg_time, 2),
591
+ }
592
+ )
593
+
407
594
  except Exception as e:
408
595
  logger.error(f"Failed to fetch NSFW/SFW retention stats from Anki: {e}")
409
- return jsonify({
410
- 'nsfw_retention': 0,
411
- 'sfw_retention': 0,
412
- 'nsfw_reviews': 0,
413
- 'sfw_reviews': 0,
414
- 'nsfw_avg_time': 0,
415
- 'sfw_avg_time': 0
416
- })
417
-
418
- @app.route('/api/anki_mining_heatmap')
596
+ return jsonify(
597
+ {
598
+ "nsfw_retention": 0,
599
+ "sfw_retention": 0,
600
+ "nsfw_reviews": 0,
601
+ "sfw_reviews": 0,
602
+ "nsfw_avg_time": 0,
603
+ "sfw_avg_time": 0,
604
+ }
605
+ )
606
+
607
+ @app.route("/api/anki_mining_heatmap")
419
608
  def api_anki_mining_heatmap():
420
- """Get mining heatmap data."""
421
- start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
422
- end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
423
-
609
+ """
610
+ Get mining heatmap data.
611
+
612
+ Note: Currently uses direct query approach since mining heatmap requires checking
613
+ specific fields (screenshot_in_anki, audio_in_anki) which aren't aggregated in rollup.
614
+ Could be optimized in future by adding daily mining counts to rollup table.
615
+ """
616
+ start_timestamp = (
617
+ int(request.args.get("start_timestamp"))
618
+ if request.args.get("start_timestamp")
619
+ else None
620
+ )
621
+ end_timestamp = (
622
+ int(request.args.get("end_timestamp"))
623
+ if request.args.get("end_timestamp")
624
+ else None
625
+ )
626
+
424
627
  try:
425
- # Fetch GSM lines
628
+ # Fetch GSM lines (direct query needed for mining-specific fields)
426
629
  try:
427
- all_lines = (
428
- GameLinesTable.get_lines_filtered_by_timestamp(start_timestamp / 1000, end_timestamp / 1000)
429
- if start_timestamp is not None and end_timestamp is not None
430
- else GameLinesTable.all()
431
- )
630
+ if start_timestamp is not None and end_timestamp is not None:
631
+ # Handle negative timestamps by clamping to 0
632
+ start_ts = max(0, start_timestamp / 1000.0)
633
+ end_ts = max(0, end_timestamp / 1000.0)
634
+ all_lines = GameLinesTable.get_lines_filtered_by_timestamp(
635
+ start=start_ts, end=end_ts, for_stats=True
636
+ )
637
+ else:
638
+ all_lines = GameLinesTable.all()
432
639
  except Exception as e:
433
- logger.warning(f"Failed to filter lines by timestamp: {e}, fetching all lines instead")
640
+ logger.warning(
641
+ f"Failed to filter lines by timestamp: {e}, fetching all lines instead"
642
+ )
643
+ logger.warning(traceback.format_exc())
434
644
  all_lines = GameLinesTable.all()
435
-
645
+
436
646
  # Calculate mining heatmap
437
647
  mining_heatmap = calculate_mining_heatmap_data(all_lines)
438
648
  return jsonify(mining_heatmap)
439
-
649
+
440
650
  except Exception as e:
441
651
  logger.error(f"Error fetching mining heatmap: {e}")
442
652
  return jsonify({})
443
653
 
444
654
  # Keep the original combined endpoint for backward compatibility
445
- @app.route('/api/anki_stats_combined')
655
+ @app.route("/api/anki_stats_combined")
446
656
  def api_anki_stats_combined():
447
657
  """
448
658
  Legacy combined endpoint - now redirects to individual endpoints.
449
659
  Kept for backward compatibility but should be deprecated.
450
660
  """
451
- start_timestamp = request.args.get('start_timestamp')
452
- end_timestamp = request.args.get('end_timestamp')
453
-
661
+ start_timestamp = request.args.get("start_timestamp")
662
+ end_timestamp = request.args.get("end_timestamp")
663
+
454
664
  # Build query parameters
455
665
  params = {}
456
666
  if start_timestamp:
457
- params['start_timestamp'] = start_timestamp
667
+ params["start_timestamp"] = start_timestamp
458
668
  if end_timestamp:
459
- params['end_timestamp'] = end_timestamp
460
-
669
+ params["end_timestamp"] = end_timestamp
670
+
461
671
  try:
462
672
  # Use concurrent requests to fetch all data
463
673
  import requests
464
674
  from urllib.parse import urlencode
465
-
466
- base_url = request.url_root.rstrip('/')
675
+
676
+ base_url = request.url_root.rstrip("/")
467
677
  query_string = urlencode(params) if params else ""
468
-
678
+
469
679
  def fetch_endpoint(endpoint):
470
680
  url = f"{base_url}/api/{endpoint}"
471
681
  if query_string:
@@ -476,31 +686,39 @@ def register_anki_api_endpoints(app):
476
686
  except Exception as e:
477
687
  logger.error(f"Error fetching {endpoint}: {e}")
478
688
  return {}
479
-
689
+
480
690
  with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
481
691
  futures = {
482
- 'earliest_date': executor.submit(fetch_endpoint, 'anki_earliest_date'),
483
- 'kanji_stats': executor.submit(fetch_endpoint, 'anki_kanji_stats'),
484
- 'game_stats': executor.submit(fetch_endpoint, 'anki_game_stats'),
485
- 'nsfw_sfw_retention': executor.submit(fetch_endpoint, 'anki_nsfw_sfw_retention'),
486
- 'mining_heatmap': executor.submit(fetch_endpoint, 'anki_mining_heatmap')
692
+ "earliest_date": executor.submit(
693
+ fetch_endpoint, "anki_earliest_date"
694
+ ),
695
+ "kanji_stats": executor.submit(fetch_endpoint, "anki_kanji_stats"),
696
+ "game_stats": executor.submit(fetch_endpoint, "anki_game_stats"),
697
+ "nsfw_sfw_retention": executor.submit(
698
+ fetch_endpoint, "anki_nsfw_sfw_retention"
699
+ ),
700
+ "mining_heatmap": executor.submit(
701
+ fetch_endpoint, "anki_mining_heatmap"
702
+ ),
487
703
  }
488
-
704
+
489
705
  results = {}
490
706
  for key, future in futures.items():
491
707
  results[key] = future.result()
492
-
708
+
493
709
  # Format response to match original structure
494
710
  combined_response = {
495
- "kanji_stats": results.get('kanji_stats', {}),
496
- "game_stats": results.get('game_stats', []),
497
- "nsfw_sfw_retention": results.get('nsfw_sfw_retention', {}),
498
- "mining_heatmap": results.get('mining_heatmap', {}),
499
- "earliest_date": results.get('earliest_date', {}).get('earliest_date', 0)
711
+ "kanji_stats": results.get("kanji_stats", {}),
712
+ "game_stats": results.get("game_stats", []),
713
+ "nsfw_sfw_retention": results.get("nsfw_sfw_retention", {}),
714
+ "mining_heatmap": results.get("mining_heatmap", {}),
715
+ "earliest_date": results.get("earliest_date", {}).get(
716
+ "earliest_date", 0
717
+ ),
500
718
  }
501
-
719
+
502
720
  return jsonify(combined_response)
503
-
721
+
504
722
  except Exception as e:
505
723
  logger.error(f"Error in combined endpoint: {e}")
506
- return jsonify({"error": str(e)}), 500
724
+ return jsonify({"error": str(e)}), 500