GameSentenceMiner 2.18.14__py3-none-any.whl → 2.18.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GameSentenceMiner might be problematic. Click here for more details.

Files changed (36) hide show
  1. GameSentenceMiner/anki.py +8 -53
  2. GameSentenceMiner/obs.py +1 -2
  3. GameSentenceMiner/ui/anki_confirmation.py +16 -2
  4. GameSentenceMiner/util/db.py +11 -7
  5. GameSentenceMiner/util/games_table.py +320 -0
  6. GameSentenceMiner/vad.py +3 -3
  7. GameSentenceMiner/web/anki_api_endpoints.py +506 -0
  8. GameSentenceMiner/web/database_api.py +239 -117
  9. GameSentenceMiner/web/static/css/loading-skeleton.css +41 -0
  10. GameSentenceMiner/web/static/css/search.css +54 -0
  11. GameSentenceMiner/web/static/css/stats.css +76 -0
  12. GameSentenceMiner/web/static/js/anki_stats.js +304 -50
  13. GameSentenceMiner/web/static/js/database.js +44 -7
  14. GameSentenceMiner/web/static/js/heatmap.js +326 -0
  15. GameSentenceMiner/web/static/js/overview.js +20 -224
  16. GameSentenceMiner/web/static/js/search.js +190 -23
  17. GameSentenceMiner/web/static/js/stats.js +371 -1
  18. GameSentenceMiner/web/stats.py +188 -0
  19. GameSentenceMiner/web/templates/anki_stats.html +145 -58
  20. GameSentenceMiner/web/templates/components/date-range.html +19 -0
  21. GameSentenceMiner/web/templates/components/html-head.html +45 -0
  22. GameSentenceMiner/web/templates/components/js-config.html +37 -0
  23. GameSentenceMiner/web/templates/components/popups.html +15 -0
  24. GameSentenceMiner/web/templates/components/settings-modal.html +233 -0
  25. GameSentenceMiner/web/templates/database.html +13 -3
  26. GameSentenceMiner/web/templates/goals.html +9 -31
  27. GameSentenceMiner/web/templates/overview.html +16 -223
  28. GameSentenceMiner/web/templates/search.html +46 -0
  29. GameSentenceMiner/web/templates/stats.html +49 -311
  30. GameSentenceMiner/web/texthooking_page.py +4 -66
  31. {gamesentenceminer-2.18.14.dist-info → gamesentenceminer-2.18.16.dist-info}/METADATA +1 -1
  32. {gamesentenceminer-2.18.14.dist-info → gamesentenceminer-2.18.16.dist-info}/RECORD +36 -27
  33. {gamesentenceminer-2.18.14.dist-info → gamesentenceminer-2.18.16.dist-info}/WHEEL +0 -0
  34. {gamesentenceminer-2.18.14.dist-info → gamesentenceminer-2.18.16.dist-info}/entry_points.txt +0 -0
  35. {gamesentenceminer-2.18.14.dist-info → gamesentenceminer-2.18.16.dist-info}/licenses/LICENSE +0 -0
  36. {gamesentenceminer-2.18.14.dist-info → gamesentenceminer-2.18.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,506 @@
1
+ """
2
+ Separate API endpoints for Anki statistics to improve performance through progressive loading.
3
+ These endpoints replace the monolithic /api/anki_stats_combined endpoint.
4
+ """
5
+
6
+ import concurrent.futures
7
+ from flask import request, jsonify
8
+ from GameSentenceMiner.util.configuration import get_config
9
+ from GameSentenceMiner.anki import invoke
10
+ from GameSentenceMiner.web.stats import calculate_kanji_frequency, calculate_mining_heatmap_data, is_kanji
11
+ from GameSentenceMiner.util.db import GameLinesTable
12
+ from GameSentenceMiner.util.configuration import logger
13
+
14
+
15
+ def register_anki_api_endpoints(app):
16
+ """Register all Anki API endpoints with the Flask app."""
17
+
18
+ @app.route('/api/anki_earliest_date')
19
+ def api_anki_earliest_date():
20
+ """Get the earliest Anki card creation date for date range initialization."""
21
+ try:
22
+ card_ids = invoke("findCards", query="")
23
+ if card_ids:
24
+ # Only get first 100 cards to find earliest date quickly
25
+ sample_cards = card_ids[:100] if len(card_ids) > 100 else card_ids
26
+ cards_info = invoke("cardsInfo", cards=sample_cards)
27
+ created_times = [card.get("created", 0) for card in cards_info if "created" in card]
28
+ earliest_date = min(created_times) if created_times else 0
29
+ else:
30
+ earliest_date = 0
31
+
32
+ return jsonify({"earliest_date": earliest_date})
33
+ except Exception as e:
34
+ logger.error(f"Failed to fetch earliest date from Anki: {e}")
35
+ return jsonify({"earliest_date": 0})
36
+
37
+ @app.route('/api/anki_kanji_stats')
38
+ def api_anki_kanji_stats():
39
+ """Get kanji statistics including missing kanji analysis."""
40
+ start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
41
+ end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
42
+
43
+ try:
44
+ # Fetch GSM lines
45
+ try:
46
+ all_lines = (
47
+ GameLinesTable.get_lines_filtered_by_timestamp(start_timestamp / 1000, end_timestamp / 1000)
48
+ if start_timestamp is not None and end_timestamp is not None
49
+ else GameLinesTable.all()
50
+ )
51
+ except Exception as e:
52
+ logger.warning(f"Failed to filter lines by timestamp: {e}, fetching all lines instead")
53
+ all_lines = GameLinesTable.all()
54
+
55
+ # Use concurrent processing for Anki API calls and kanji calculation
56
+ def get_anki_kanji():
57
+ try:
58
+ note_ids = invoke("findNotes", query="")
59
+ anki_kanji_set = set()
60
+ if note_ids:
61
+ # Process in smaller batches for better performance
62
+ batch_size = 500
63
+ for i in range(0, len(note_ids), batch_size):
64
+ batch_ids = note_ids[i:i+batch_size]
65
+ notes_info = invoke("notesInfo", notes=batch_ids)
66
+ for note in notes_info:
67
+ # Filter by timestamp if provided
68
+ note_created = note.get("created", None) or note.get("mod", None)
69
+ if start_timestamp and end_timestamp and note_created is not None:
70
+ note_created_int = int(note_created)
71
+ start_ts = int(start_timestamp)
72
+ end_ts = int(end_timestamp)
73
+ if not (start_ts <= note_created_int <= end_ts):
74
+ continue
75
+
76
+ fields = note.get("fields", {})
77
+ first_field = next(iter(fields.values()), None)
78
+ if first_field and "value" in first_field:
79
+ first_field_value = first_field["value"]
80
+ for char in first_field_value:
81
+ if is_kanji(char):
82
+ anki_kanji_set.add(char)
83
+ return anki_kanji_set
84
+ except Exception as e:
85
+ logger.error(f"Failed to fetch kanji from Anki: {e}")
86
+ return set()
87
+
88
+ def get_gsm_kanji():
89
+ return calculate_kanji_frequency(all_lines)
90
+
91
+ # Run both operations concurrently
92
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
93
+ anki_future = executor.submit(get_anki_kanji)
94
+ gsm_future = executor.submit(get_gsm_kanji)
95
+
96
+ anki_kanji_set = anki_future.result()
97
+ gsm_kanji_stats = gsm_future.result()
98
+
99
+ gsm_kanji_list = gsm_kanji_stats.get("kanji_data", [])
100
+ gsm_kanji_set = set([k["kanji"] for k in gsm_kanji_list])
101
+
102
+ # Find missing kanji
103
+ missing_kanji = [
104
+ {"kanji": k["kanji"], "frequency": k["frequency"]}
105
+ for k in gsm_kanji_list if k["kanji"] not in anki_kanji_set
106
+ ]
107
+ missing_kanji.sort(key=lambda x: x["frequency"], reverse=True)
108
+
109
+ # Calculate coverage
110
+ anki_kanji_count = len(anki_kanji_set)
111
+ gsm_kanji_count = len(gsm_kanji_set)
112
+ coverage_percent = (anki_kanji_count / gsm_kanji_count * 100) if gsm_kanji_count else 0.0
113
+
114
+ return jsonify({
115
+ "missing_kanji": missing_kanji,
116
+ "anki_kanji_count": anki_kanji_count,
117
+ "gsm_kanji_count": gsm_kanji_count,
118
+ "coverage_percent": round(coverage_percent, 1)
119
+ })
120
+
121
+ except Exception as e:
122
+ logger.error(f"Error fetching kanji stats: {e}")
123
+ return jsonify({"error": str(e)}), 500
124
+
125
+ @app.route('/api/anki_game_stats')
126
+ def api_anki_game_stats():
127
+ """Get game-specific Anki statistics."""
128
+ start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
129
+ end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
130
+ parent_tag = get_config().anki.parent_tag.strip() or "Game"
131
+
132
+ try:
133
+ # Find all cards with Game:: parent tag
134
+ query = f"tag:{parent_tag}::*"
135
+ card_ids = invoke("findCards", query=query)
136
+ game_stats = []
137
+
138
+ if not card_ids:
139
+ return jsonify([])
140
+
141
+ # Get card info and filter by date
142
+ cards_info = invoke("cardsInfo", cards=card_ids)
143
+
144
+ if start_timestamp and end_timestamp:
145
+ cards_info = [
146
+ card for card in cards_info
147
+ if start_timestamp <= card.get('created', 0) <= end_timestamp
148
+ ]
149
+
150
+ if not cards_info:
151
+ return jsonify([])
152
+
153
+ # Get all unique note IDs and fetch note info in one batch call
154
+ note_ids = list(set(card['note'] for card in cards_info))
155
+ notes_info_list = invoke("notesInfo", notes=note_ids)
156
+ notes_info = {note['noteId']: note for note in notes_info_list}
157
+
158
+ # Create card-to-note mapping
159
+ card_to_note = {str(card['cardId']): card['note'] for card in cards_info}
160
+
161
+ # Group cards by game
162
+ game_cards = {}
163
+ for card in cards_info:
164
+ note_id = card['note']
165
+ note_info = notes_info.get(note_id)
166
+ if not note_info:
167
+ continue
168
+
169
+ tags = note_info.get('tags', [])
170
+
171
+ # Find game tag (format: Game::GameName)
172
+ game_tag = None
173
+ for tag in tags:
174
+ if tag.startswith(f'{parent_tag}::'):
175
+ tag_parts = tag.split('::')
176
+ if len(tag_parts) >= 2:
177
+ game_tag = tag_parts[1]
178
+ break
179
+
180
+ if game_tag:
181
+ if game_tag not in game_cards:
182
+ game_cards[game_tag] = []
183
+ game_cards[game_tag].append(card['cardId'])
184
+
185
+ # Process games concurrently
186
+ def process_game(game_name, card_ids):
187
+ try:
188
+ # Get review history for all cards in this game
189
+ reviews_data = invoke("getReviewsOfCards", cards=card_ids)
190
+
191
+ # Group reviews by note ID and calculate per-note retention
192
+ note_stats = {}
193
+
194
+ for card_id_str, reviews in reviews_data.items():
195
+ if not reviews:
196
+ continue
197
+
198
+ note_id = card_to_note.get(card_id_str)
199
+ if not note_id:
200
+ continue
201
+
202
+ # Filter reviews by timestamp if provided
203
+ filtered_reviews = reviews
204
+ if start_timestamp and end_timestamp:
205
+ filtered_reviews = [
206
+ r for r in reviews
207
+ if start_timestamp <= r.get('time', 0) <= end_timestamp
208
+ ]
209
+
210
+ for review in filtered_reviews:
211
+ # Only count review-type entries (type=1)
212
+ review_type = review.get('type', -1)
213
+ if review_type != 1:
214
+ continue
215
+
216
+ if note_id not in note_stats:
217
+ note_stats[note_id] = {'passed': 0, 'failed': 0, 'total_time': 0}
218
+
219
+ note_stats[note_id]['total_time'] += review['time']
220
+
221
+ # Ease: 1=Again, 2=Hard, 3=Good, 4=Easy
222
+ if review['ease'] == 1:
223
+ note_stats[note_id]['failed'] += 1
224
+ else:
225
+ note_stats[note_id]['passed'] += 1
226
+
227
+ if note_stats:
228
+ # Calculate per-note retention and average them
229
+ retention_sum = 0
230
+ total_time = 0
231
+ total_reviews = 0
232
+
233
+ for note_id, stats in note_stats.items():
234
+ passed = stats['passed']
235
+ failed = stats['failed']
236
+ total = passed + failed
237
+
238
+ if total > 0:
239
+ note_retention = passed / total
240
+ retention_sum += note_retention
241
+ total_time += stats['total_time']
242
+ total_reviews += total
243
+
244
+ # Average retention across all notes
245
+ note_count = len(note_stats)
246
+ avg_retention = (retention_sum / note_count) * 100 if note_count > 0 else 0
247
+ avg_time_seconds = (total_time / total_reviews / 1000.0) if total_reviews > 0 else 0
248
+
249
+ return {
250
+ 'game_name': game_name,
251
+ 'avg_time_per_card': round(avg_time_seconds, 2),
252
+ 'retention_pct': round(avg_retention, 1),
253
+ 'total_reviews': total_reviews,
254
+ 'mined_lines': 0
255
+ }
256
+ return None
257
+ except Exception as e:
258
+ logger.error(f"Error processing game {game_name}: {e}")
259
+ return None
260
+
261
+ # Process games in parallel
262
+ with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
263
+ futures = {executor.submit(process_game, game_name, card_ids): game_name
264
+ for game_name, card_ids in game_cards.items()}
265
+
266
+ for future in concurrent.futures.as_completed(futures):
267
+ result = future.result()
268
+ if result:
269
+ game_stats.append(result)
270
+
271
+ # Sort by game name
272
+ game_stats.sort(key=lambda x: x['game_name'])
273
+ return jsonify(game_stats)
274
+
275
+ except Exception as e:
276
+ logger.error(f"Failed to fetch game stats from Anki: {e}")
277
+ return jsonify([])
278
+
279
+ @app.route('/api/anki_nsfw_sfw_retention')
280
+ def api_anki_nsfw_sfw_retention():
281
+ """Get NSFW vs SFW retention statistics."""
282
+ start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
283
+ end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
284
+
285
+ def calculate_retention_for_cards(card_ids, start_timestamp, end_timestamp):
286
+ if not card_ids:
287
+ return 0.0, 0, 0.0
288
+
289
+ try:
290
+ # Get card info to filter by date
291
+ cards_info = invoke("cardsInfo", cards=card_ids)
292
+
293
+ # Use card['created'] for date filtering
294
+ if start_timestamp and end_timestamp:
295
+ cards_info = [
296
+ card for card in cards_info
297
+ if start_timestamp <= card.get('created', 0) <= end_timestamp
298
+ ]
299
+
300
+ if not cards_info:
301
+ return 0.0, 0, 0.0
302
+
303
+ # Create card-to-note mapping
304
+ card_to_note = {str(card['cardId']): card['note'] for card in cards_info}
305
+
306
+ # Get review history for all cards
307
+ reviews_data = invoke("getReviewsOfCards", cards=[card['cardId'] for card in cards_info])
308
+
309
+ # Group reviews by note ID and calculate per-note retention
310
+ note_stats = {}
311
+
312
+ for card_id_str, reviews in reviews_data.items():
313
+ if not reviews:
314
+ continue
315
+
316
+ note_id = card_to_note.get(card_id_str)
317
+ if not note_id:
318
+ continue
319
+
320
+ # Filter reviews by timestamp if provided
321
+ filtered_reviews = reviews
322
+ if start_timestamp and end_timestamp:
323
+ filtered_reviews = [
324
+ r for r in reviews
325
+ if start_timestamp <= r.get('time', 0) <= end_timestamp
326
+ ]
327
+
328
+ for review in filtered_reviews:
329
+ # Only count review-type entries (type=1)
330
+ review_type = review.get('type', -1)
331
+ if review_type != 1:
332
+ continue
333
+
334
+ if note_id not in note_stats:
335
+ note_stats[note_id] = {'passed': 0, 'failed': 0, 'total_time': 0}
336
+
337
+ note_stats[note_id]['total_time'] += review['time']
338
+
339
+ # Ease: 1=Again, 2=Hard, 3=Good, 4=Easy
340
+ if review['ease'] == 1:
341
+ note_stats[note_id]['failed'] += 1
342
+ else:
343
+ note_stats[note_id]['passed'] += 1
344
+
345
+ if not note_stats:
346
+ return 0.0, 0, 0.0
347
+
348
+ # Calculate per-note retention and average them
349
+ retention_sum = 0
350
+ total_reviews = 0
351
+ total_time = 0
352
+
353
+ for note_id, stats in note_stats.items():
354
+ passed = stats['passed']
355
+ failed = stats['failed']
356
+ total = passed + failed
357
+
358
+ if total > 0:
359
+ note_retention = passed / total
360
+ retention_sum += note_retention
361
+ total_reviews += total
362
+ total_time += stats['total_time']
363
+
364
+ # Average retention across all notes
365
+ note_count = len(note_stats)
366
+ avg_retention = (retention_sum / note_count) * 100 if note_count > 0 else 0
367
+ avg_time_seconds = (total_time / total_reviews / 1000.0) if total_reviews > 0 else 0
368
+
369
+ return avg_retention, total_reviews, avg_time_seconds
370
+
371
+ except Exception as e:
372
+ logger.error(f"Error calculating retention for cards: {e}")
373
+ return 0.0, 0, 0.0
374
+
375
+ try:
376
+ # Query for NSFW and SFW cards concurrently
377
+ def get_nsfw_cards():
378
+ return invoke("findCards", query="tag:Game tag:NSFW")
379
+
380
+ def get_sfw_cards():
381
+ return invoke("findCards", query="tag:Game -tag:NSFW")
382
+
383
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
384
+ nsfw_future = executor.submit(get_nsfw_cards)
385
+ sfw_future = executor.submit(get_sfw_cards)
386
+
387
+ nsfw_card_ids = nsfw_future.result()
388
+ sfw_card_ids = sfw_future.result()
389
+
390
+ # Calculate retention for both categories concurrently
391
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
392
+ nsfw_future = executor.submit(calculate_retention_for_cards, nsfw_card_ids, start_timestamp, end_timestamp)
393
+ sfw_future = executor.submit(calculate_retention_for_cards, sfw_card_ids, start_timestamp, end_timestamp)
394
+
395
+ nsfw_retention, nsfw_reviews, nsfw_avg_time = nsfw_future.result()
396
+ sfw_retention, sfw_reviews, sfw_avg_time = sfw_future.result()
397
+
398
+ return jsonify({
399
+ 'nsfw_retention': round(nsfw_retention, 1),
400
+ 'sfw_retention': round(sfw_retention, 1),
401
+ 'nsfw_reviews': nsfw_reviews,
402
+ 'sfw_reviews': sfw_reviews,
403
+ 'nsfw_avg_time': round(nsfw_avg_time, 2),
404
+ 'sfw_avg_time': round(sfw_avg_time, 2)
405
+ })
406
+
407
+ except Exception as e:
408
+ logger.error(f"Failed to fetch NSFW/SFW retention stats from Anki: {e}")
409
+ return jsonify({
410
+ 'nsfw_retention': 0,
411
+ 'sfw_retention': 0,
412
+ 'nsfw_reviews': 0,
413
+ 'sfw_reviews': 0,
414
+ 'nsfw_avg_time': 0,
415
+ 'sfw_avg_time': 0
416
+ })
417
+
418
+ @app.route('/api/anki_mining_heatmap')
419
+ def api_anki_mining_heatmap():
420
+ """Get mining heatmap data."""
421
+ start_timestamp = int(request.args.get('start_timestamp')) if request.args.get('start_timestamp') else None
422
+ end_timestamp = int(request.args.get('end_timestamp')) if request.args.get('end_timestamp') else None
423
+
424
+ try:
425
+ # Fetch GSM lines
426
+ try:
427
+ all_lines = (
428
+ GameLinesTable.get_lines_filtered_by_timestamp(start_timestamp / 1000, end_timestamp / 1000)
429
+ if start_timestamp is not None and end_timestamp is not None
430
+ else GameLinesTable.all()
431
+ )
432
+ except Exception as e:
433
+ logger.warning(f"Failed to filter lines by timestamp: {e}, fetching all lines instead")
434
+ all_lines = GameLinesTable.all()
435
+
436
+ # Calculate mining heatmap
437
+ mining_heatmap = calculate_mining_heatmap_data(all_lines)
438
+ return jsonify(mining_heatmap)
439
+
440
+ except Exception as e:
441
+ logger.error(f"Error fetching mining heatmap: {e}")
442
+ return jsonify({})
443
+
444
+ # Keep the original combined endpoint for backward compatibility
445
+ @app.route('/api/anki_stats_combined')
446
+ def api_anki_stats_combined():
447
+ """
448
+ Legacy combined endpoint - now redirects to individual endpoints.
449
+ Kept for backward compatibility but should be deprecated.
450
+ """
451
+ start_timestamp = request.args.get('start_timestamp')
452
+ end_timestamp = request.args.get('end_timestamp')
453
+
454
+ # Build query parameters
455
+ params = {}
456
+ if start_timestamp:
457
+ params['start_timestamp'] = start_timestamp
458
+ if end_timestamp:
459
+ params['end_timestamp'] = end_timestamp
460
+
461
+ try:
462
+ # Use concurrent requests to fetch all data
463
+ import requests
464
+ from urllib.parse import urlencode
465
+
466
+ base_url = request.url_root.rstrip('/')
467
+ query_string = urlencode(params) if params else ""
468
+
469
+ def fetch_endpoint(endpoint):
470
+ url = f"{base_url}/api/{endpoint}"
471
+ if query_string:
472
+ url += f"?{query_string}"
473
+ try:
474
+ response = requests.get(url, timeout=30)
475
+ return response.json() if response.status_code == 200 else {}
476
+ except Exception as e:
477
+ logger.error(f"Error fetching {endpoint}: {e}")
478
+ return {}
479
+
480
+ with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
481
+ futures = {
482
+ 'earliest_date': executor.submit(fetch_endpoint, 'anki_earliest_date'),
483
+ 'kanji_stats': executor.submit(fetch_endpoint, 'anki_kanji_stats'),
484
+ 'game_stats': executor.submit(fetch_endpoint, 'anki_game_stats'),
485
+ 'nsfw_sfw_retention': executor.submit(fetch_endpoint, 'anki_nsfw_sfw_retention'),
486
+ 'mining_heatmap': executor.submit(fetch_endpoint, 'anki_mining_heatmap')
487
+ }
488
+
489
+ results = {}
490
+ for key, future in futures.items():
491
+ results[key] = future.result()
492
+
493
+ # Format response to match original structure
494
+ combined_response = {
495
+ "kanji_stats": results.get('kanji_stats', {}),
496
+ "game_stats": results.get('game_stats', []),
497
+ "nsfw_sfw_retention": results.get('nsfw_sfw_retention', {}),
498
+ "mining_heatmap": results.get('mining_heatmap', {}),
499
+ "earliest_date": results.get('earliest_date', {}).get('earliest_date', 0)
500
+ }
501
+
502
+ return jsonify(combined_response)
503
+
504
+ except Exception as e:
505
+ logger.error(f"Error in combined endpoint: {e}")
506
+ return jsonify({"error": str(e)}), 500