GameSentenceMiner 2.15.9__py3-none-any.whl → 2.15.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,783 @@
1
+ import datetime
2
+ import re
3
+ from collections import defaultdict
4
+
5
+ import flask
6
+ from flask import request, jsonify
7
+
8
+ from GameSentenceMiner.util.db import GameLinesTable
9
+ from GameSentenceMiner.util.configuration import logger, get_config, save_current_config
10
+ from GameSentenceMiner.web.stats import (
11
+ calculate_kanji_frequency, calculate_heatmap_data, calculate_total_chars_per_game,
12
+ calculate_reading_time_per_game, calculate_reading_speed_per_game,
13
+ calculate_current_game_stats, calculate_all_games_stats, calculate_daily_reading_time,
14
+ calculate_time_based_streak, calculate_actual_reading_time
15
+ )
16
+
17
+
18
+ def register_database_api_routes(app):
19
+ """Register all database API routes with the Flask app."""
20
+
21
+ @app.route('/api/search-sentences')
22
+ def api_search_sentences():
23
+ """
24
+ API endpoint for searching sentences with filters and pagination.
25
+ """
26
+ try:
27
+ # Get query parameters
28
+ query = request.args.get('q', '').strip()
29
+ game_filter = request.args.get('game', '')
30
+ sort_by = request.args.get('sort', 'relevance')
31
+ page = int(request.args.get('page', 1))
32
+ page_size = int(request.args.get('page_size', 20))
33
+
34
+ # Validate parameters
35
+ if not query:
36
+ return jsonify({'error': 'Search query is required'}), 400
37
+
38
+ if page < 1:
39
+ page = 1
40
+ if page_size < 1 or page_size > 100:
41
+ page_size = 20
42
+
43
+ # Build the SQL query
44
+ base_query = f"SELECT * FROM {GameLinesTable._table} WHERE line_text LIKE ?"
45
+ params = [f'%{query}%']
46
+
47
+ # Add game filter if specified
48
+ if game_filter:
49
+ base_query += " AND game_name = ?"
50
+ params.append(game_filter)
51
+
52
+ # Add sorting
53
+ if sort_by == 'date_desc':
54
+ base_query += " ORDER BY timestamp DESC"
55
+ elif sort_by == 'date_asc':
56
+ base_query += " ORDER BY timestamp ASC"
57
+ elif sort_by == 'game_name':
58
+ base_query += " ORDER BY game_name, timestamp DESC"
59
+ else: # relevance - could be enhanced with proper scoring
60
+ base_query += " ORDER BY timestamp DESC"
61
+
62
+ # Get total count for pagination
63
+ count_query = f"SELECT COUNT(*) FROM {GameLinesTable._table} WHERE line_text LIKE ?"
64
+ count_params = [f'%{query}%']
65
+ if game_filter:
66
+ count_query += " AND game_name = ?"
67
+ count_params.append(game_filter)
68
+
69
+ total_results = GameLinesTable._db.fetchone(count_query, count_params)[0]
70
+
71
+ # Add pagination
72
+ offset = (page - 1) * page_size
73
+ base_query += f" LIMIT ? OFFSET ?"
74
+ params.extend([page_size, offset])
75
+
76
+ # Execute search query
77
+ rows = GameLinesTable._db.fetchall(base_query, params)
78
+
79
+ # Format results
80
+ results = []
81
+ for row in rows:
82
+ game_line = GameLinesTable.from_row(row)
83
+ if game_line:
84
+ results.append({
85
+ 'id': game_line.id,
86
+ 'sentence': game_line.line_text or '',
87
+ 'game_name': game_line.game_name or 'Unknown Game',
88
+ 'timestamp': float(game_line.timestamp) if game_line.timestamp else 0,
89
+ 'translation': game_line.translation or None,
90
+ 'has_audio': bool(game_line.audio_path),
91
+ 'has_screenshot': bool(game_line.screenshot_path)
92
+ })
93
+
94
+ return jsonify({
95
+ 'results': results,
96
+ 'total': total_results,
97
+ 'page': page,
98
+ 'page_size': page_size,
99
+ 'total_pages': (total_results + page_size - 1) // page_size
100
+ }), 200
101
+
102
+ except ValueError as e:
103
+ return jsonify({'error': 'Invalid pagination parameters'}), 400
104
+ except Exception as e:
105
+ logger.error(f"Error in sentence search: {e}")
106
+ return jsonify({'error': 'Search failed'}), 500
107
+
108
+ @app.route('/api/games-list')
109
+ def api_games_list():
110
+ """
111
+ Provides game list with metadata for deletion interface.
112
+ """
113
+ try:
114
+ game_names = GameLinesTable.get_all_games_with_lines()
115
+ games_data = []
116
+
117
+ for game_name in game_names:
118
+ lines = GameLinesTable.get_all_lines_for_scene(game_name)
119
+ if not lines:
120
+ continue
121
+
122
+ # Calculate metadata
123
+ sentence_count = len(lines)
124
+ timestamps = [float(line.timestamp) for line in lines]
125
+ min_date = datetime.date.fromtimestamp(min(timestamps))
126
+ max_date = datetime.date.fromtimestamp(max(timestamps))
127
+ total_chars = sum(len(line.line_text) if line.line_text else 0 for line in lines)
128
+
129
+ games_data.append({
130
+ 'name': game_name,
131
+ 'sentence_count': sentence_count,
132
+ 'first_entry_date': min_date.strftime('%Y-%m-%d'),
133
+ 'last_entry_date': max_date.strftime('%Y-%m-%d'),
134
+ 'total_characters': total_chars,
135
+ 'date_range': f"{min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}" if min_date != max_date else min_date.strftime('%Y-%m-%d')
136
+ })
137
+
138
+ # Sort by first entry date (most recent first)
139
+ games_data.sort(key=lambda x: x['first_entry_date'], reverse=True)
140
+
141
+ return jsonify({'games': games_data}), 200
142
+
143
+ except Exception as e:
144
+ logger.error(f"Error fetching games list: {e}")
145
+ return jsonify({'error': 'Failed to fetch games list'}), 500
146
+
147
+ @app.route('/api/delete-games', methods=['POST'])
148
+ def api_delete_games():
149
+ """
150
+ Handles bulk deletion of games and their associated data.
151
+ """
152
+ try:
153
+ data = request.get_json()
154
+ game_names = data.get('game_names', [])
155
+
156
+ if not game_names:
157
+ return jsonify({'error': 'No games specified for deletion'}), 400
158
+
159
+ if not isinstance(game_names, list):
160
+ return jsonify({'error': 'game_names must be a list'}), 400
161
+
162
+ # Validate that all games exist
163
+ existing_games = GameLinesTable.get_all_games_with_lines()
164
+ invalid_games = [name for name in game_names if name not in existing_games]
165
+
166
+ if invalid_games:
167
+ return jsonify({'error': f'Games not found: {", ".join(invalid_games)}'}), 400
168
+
169
+ deletion_results = {}
170
+ total_deleted = 0
171
+
172
+ # Delete each game's data
173
+ for game_name in game_names:
174
+ try:
175
+ # Get lines for this game before deletion for counting
176
+ lines = GameLinesTable.get_all_lines_for_scene(game_name)
177
+ lines_count = len(lines)
178
+
179
+ # Delete all lines for this game using the database connection
180
+ GameLinesTable._db.execute(
181
+ f"DELETE FROM {GameLinesTable._table} WHERE game_name=?",
182
+ (game_name,),
183
+ commit=True
184
+ )
185
+
186
+ deletion_results[game_name] = {
187
+ 'deleted_sentences': lines_count,
188
+ 'status': 'success'
189
+ }
190
+ total_deleted += lines_count
191
+
192
+ logger.info(f"Deleted {lines_count} sentences for game: {game_name}")
193
+
194
+ except Exception as e:
195
+ logger.error(f"Error deleting game {game_name}: {e}")
196
+ deletion_results[game_name] = {
197
+ 'deleted_sentences': 0,
198
+ 'status': 'error',
199
+ 'error': str(e)
200
+ }
201
+
202
+ # Check if any deletions were successful
203
+ successful_deletions = [name for name, result in deletion_results.items() if result['status'] == 'success']
204
+ failed_deletions = [name for name, result in deletion_results.items() if result['status'] == 'error']
205
+
206
+ response_data = {
207
+ 'message': f'Deletion completed. {len(successful_deletions)} games successfully deleted.',
208
+ 'total_sentences_deleted': total_deleted,
209
+ 'successful_games': successful_deletions,
210
+ 'failed_games': failed_deletions,
211
+ 'detailed_results': deletion_results
212
+ }
213
+
214
+ if failed_deletions:
215
+ response_data['warning'] = f'Some games failed to delete: {", ".join(failed_deletions)}'
216
+ return jsonify(response_data), 207 # Multi-Status (partial success)
217
+ else:
218
+ return jsonify(response_data), 200
219
+
220
+ except Exception as e:
221
+ logger.error(f"Error in bulk game deletion: {e}")
222
+ return jsonify({'error': f'Failed to delete games: {str(e)}'}), 500
223
+
224
+ @app.route('/api/settings', methods=['GET'])
225
+ def api_get_settings():
226
+ """
227
+ Get current AFK timer, session gap, and streak requirement settings.
228
+ """
229
+ try:
230
+ config = get_config()
231
+ return jsonify({
232
+ 'afk_timer_seconds': config.advanced.afk_timer_seconds,
233
+ 'session_gap_seconds': config.advanced.session_gap_seconds,
234
+ 'streak_requirement_hours': getattr(config.advanced, 'streak_requirement_hours', 1.0)
235
+ }), 200
236
+ except Exception as e:
237
+ logger.error(f"Error getting settings: {e}")
238
+ return jsonify({'error': 'Failed to get settings'}), 500
239
+
240
+ @app.route('/api/settings', methods=['POST'])
241
+ def api_save_settings():
242
+ """
243
+ Save/update AFK timer, session gap, and streak requirement settings.
244
+ """
245
+ try:
246
+ data = request.get_json()
247
+
248
+ if not data:
249
+ return jsonify({'error': 'No data provided'}), 400
250
+
251
+ afk_timer = data.get('afk_timer_seconds')
252
+ session_gap = data.get('session_gap_seconds')
253
+ streak_requirement = data.get('streak_requirement_hours')
254
+
255
+ # Validate input - only require the settings that are provided
256
+ settings_to_update = {}
257
+
258
+ if afk_timer is not None:
259
+ try:
260
+ afk_timer = int(afk_timer)
261
+ if afk_timer < 30 or afk_timer > 600:
262
+ return jsonify({'error': 'AFK timer must be between 30 and 600 seconds'}), 400
263
+ settings_to_update['afk_timer_seconds'] = afk_timer
264
+ except (ValueError, TypeError):
265
+ return jsonify({'error': 'AFK timer must be a valid integer'}), 400
266
+
267
+ if session_gap is not None:
268
+ try:
269
+ session_gap = int(session_gap)
270
+ if session_gap < 300 or session_gap > 7200:
271
+ return jsonify({'error': 'Session gap must be between 300 and 7200 seconds (5 minutes to 2 hours)'}), 400
272
+ settings_to_update['session_gap_seconds'] = session_gap
273
+ except (ValueError, TypeError):
274
+ return jsonify({'error': 'Session gap must be a valid integer'}), 400
275
+
276
+ if streak_requirement is not None:
277
+ try:
278
+ streak_requirement = float(streak_requirement)
279
+ if streak_requirement < 0.01 or streak_requirement > 24:
280
+ return jsonify({'error': 'Streak requirement must be between 0.01 and 24 hours'}), 400
281
+ settings_to_update['streak_requirement_hours'] = streak_requirement
282
+ except (ValueError, TypeError):
283
+ return jsonify({'error': 'Streak requirement must be a valid number'}), 400
284
+
285
+ if not settings_to_update:
286
+ return jsonify({'error': 'No valid settings provided'}), 400
287
+
288
+ # Update configuration
289
+ config = get_config()
290
+
291
+ if 'afk_timer_seconds' in settings_to_update:
292
+ config.advanced.afk_timer_seconds = settings_to_update['afk_timer_seconds']
293
+ if 'session_gap_seconds' in settings_to_update:
294
+ config.advanced.session_gap_seconds = settings_to_update['session_gap_seconds']
295
+ if 'streak_requirement_hours' in settings_to_update:
296
+ setattr(config.advanced, 'streak_requirement_hours', settings_to_update['streak_requirement_hours'])
297
+
298
+ # Save configuration
299
+ save_current_config(config)
300
+
301
+ logger.info(f"Settings updated: {settings_to_update}")
302
+
303
+ response_data = {'message': 'Settings saved successfully'}
304
+ response_data.update(settings_to_update)
305
+
306
+ return jsonify(response_data), 200
307
+
308
+ except Exception as e:
309
+ logger.error(f"Error saving settings: {e}")
310
+ return jsonify({'error': 'Failed to save settings'}), 500
311
+
312
+
313
+ @app.route('/api/preview-text-deletion', methods=['POST'])
314
+ def api_preview_text_deletion():
315
+ """
316
+ Preview text lines that would be deleted based on regex or exact text matching.
317
+ """
318
+ try:
319
+ data = request.get_json()
320
+ if not data:
321
+ return jsonify({'error': 'No data provided'}), 400
322
+
323
+ regex_pattern = data.get('regex_pattern')
324
+ exact_text = data.get('exact_text')
325
+ case_sensitive = data.get('case_sensitive', False)
326
+ use_regex = data.get('use_regex', False)
327
+
328
+ if not regex_pattern and not exact_text:
329
+ return jsonify({'error': 'Either regex_pattern or exact_text must be provided'}), 400
330
+
331
+ # Get all lines from database
332
+ all_lines = GameLinesTable.all()
333
+ if not all_lines:
334
+ return jsonify({'count': 0, 'samples': []}), 200
335
+
336
+ matches = []
337
+
338
+ if regex_pattern and use_regex:
339
+ # Use regex matching
340
+ try:
341
+ # Ensure regex_pattern is a string
342
+ if not isinstance(regex_pattern, str):
343
+ return jsonify({'error': 'Regex pattern must be a string'}), 400
344
+
345
+ flags = 0 if case_sensitive else re.IGNORECASE
346
+ pattern = re.compile(regex_pattern, flags)
347
+
348
+ for line in all_lines:
349
+ if line.line_text and isinstance(line.line_text, str) and pattern.search(line.line_text):
350
+ matches.append(line.line_text)
351
+
352
+ except re.error as e:
353
+ return jsonify({'error': f'Invalid regex pattern: {str(e)}'}), 400
354
+
355
+ elif exact_text:
356
+ # Use exact text matching - ensure exact_text is properly handled
357
+ if isinstance(exact_text, list):
358
+ text_lines = exact_text
359
+ elif isinstance(exact_text, str):
360
+ text_lines = [exact_text]
361
+ else:
362
+ return jsonify({'error': 'exact_text must be a string or list of strings'}), 400
363
+
364
+ for line in all_lines:
365
+ if line.line_text and isinstance(line.line_text, str):
366
+ line_text = line.line_text if case_sensitive else line.line_text.lower()
367
+
368
+ for target_text in text_lines:
369
+ # Ensure target_text is a string
370
+ if not isinstance(target_text, str):
371
+ continue
372
+ compare_text = target_text if case_sensitive else target_text.lower()
373
+ if compare_text in line_text:
374
+ matches.append(line.line_text)
375
+ break
376
+
377
+ # Remove duplicates while preserving order
378
+ unique_matches = []
379
+ seen = set()
380
+ for match in matches:
381
+ if match not in seen:
382
+ unique_matches.append(match)
383
+ seen.add(match)
384
+
385
+ # Get sample matches (first 10)
386
+ samples = unique_matches[:10]
387
+
388
+ return jsonify({
389
+ 'count': len(unique_matches),
390
+ 'samples': samples
391
+ }), 200
392
+
393
+ except Exception as e:
394
+ logger.error(f"Error in preview text deletion: {e}")
395
+ return jsonify({'error': f'Preview failed: {str(e)}'}), 500
396
+
397
+ @app.route('/api/delete-text-lines', methods=['POST'])
398
+ def api_delete_text_lines():
399
+ """
400
+ Delete text lines from database based on regex or exact text matching.
401
+ """
402
+ try:
403
+ data = request.get_json()
404
+ if not data:
405
+ return jsonify({'error': 'No data provided'}), 400
406
+
407
+ regex_pattern = data.get('regex_pattern')
408
+ exact_text = data.get('exact_text')
409
+ case_sensitive = data.get('case_sensitive', False)
410
+ use_regex = data.get('use_regex', False)
411
+
412
+ if not regex_pattern and not exact_text:
413
+ return jsonify({'error': 'Either regex_pattern or exact_text must be provided'}), 400
414
+
415
+ # Get all lines from database
416
+ all_lines = GameLinesTable.all()
417
+ if not all_lines:
418
+ return jsonify({'deleted_count': 0}), 200
419
+
420
+ lines_to_delete = []
421
+
422
+ if regex_pattern and use_regex:
423
+ # Use regex matching
424
+ try:
425
+ # Ensure regex_pattern is a string
426
+ if not isinstance(regex_pattern, str):
427
+ return jsonify({'error': 'Regex pattern must be a string'}), 400
428
+
429
+ flags = 0 if case_sensitive else re.IGNORECASE
430
+ pattern = re.compile(regex_pattern, flags)
431
+
432
+ for line in all_lines:
433
+ if line.line_text and isinstance(line.line_text, str) and pattern.search(line.line_text):
434
+ lines_to_delete.append(line.id)
435
+
436
+ except re.error as e:
437
+ return jsonify({'error': f'Invalid regex pattern: {str(e)}'}), 400
438
+
439
+ elif exact_text:
440
+ # Use exact text matching - ensure exact_text is properly handled
441
+ if isinstance(exact_text, list):
442
+ text_lines = exact_text
443
+ elif isinstance(exact_text, str):
444
+ text_lines = [exact_text]
445
+ else:
446
+ return jsonify({'error': 'exact_text must be a string or list of strings'}), 400
447
+
448
+ for line in all_lines:
449
+ if line.line_text and isinstance(line.line_text, str):
450
+ line_text = line.line_text if case_sensitive else line.line_text.lower()
451
+
452
+ for target_text in text_lines:
453
+ # Ensure target_text is a string
454
+ if not isinstance(target_text, str):
455
+ continue
456
+ compare_text = target_text if case_sensitive else target_text.lower()
457
+ if compare_text in line_text:
458
+ lines_to_delete.append(line.id)
459
+ break
460
+
461
+ # Delete the matching lines
462
+ deleted_count = 0
463
+ for line_id in set(lines_to_delete): # Remove duplicates
464
+ try:
465
+ GameLinesTable._db.execute(
466
+ f"DELETE FROM {GameLinesTable._table} WHERE id=?",
467
+ (line_id,),
468
+ commit=True
469
+ )
470
+ deleted_count += 1
471
+ except Exception as e:
472
+ logger.warning(f"Failed to delete line {line_id}: {e}")
473
+
474
+ logger.info(f"Deleted {deleted_count} lines using pattern: {regex_pattern or exact_text}")
475
+
476
+ return jsonify({
477
+ 'deleted_count': deleted_count,
478
+ 'message': f'Successfully deleted {deleted_count} lines'
479
+ }), 200
480
+
481
+ except Exception as e:
482
+ logger.error(f"Error in delete text lines: {e}")
483
+ return jsonify({'error': f'Deletion failed: {str(e)}'}), 500
484
+
485
+ @app.route('/api/preview-deduplication', methods=['POST'])
486
+ def api_preview_deduplication():
487
+ """
488
+ Preview duplicate sentences that would be removed based on time window and game selection.
489
+ """
490
+ try:
491
+ data = request.get_json()
492
+ if not data:
493
+ return jsonify({'error': 'No data provided'}), 400
494
+
495
+ games = data.get('games', [])
496
+ time_window_minutes = data.get('time_window_minutes', 5)
497
+ case_sensitive = data.get('case_sensitive', False)
498
+
499
+ if not games:
500
+ return jsonify({'error': 'At least one game must be selected'}), 400
501
+
502
+ # Get lines from selected games
503
+ if 'all' in games:
504
+ all_lines = GameLinesTable.all()
505
+ else:
506
+ all_lines = []
507
+ for game_name in games:
508
+ game_lines = GameLinesTable.get_all_lines_for_scene(game_name)
509
+ all_lines.extend(game_lines)
510
+
511
+ if not all_lines:
512
+ return jsonify({'duplicates_count': 0, 'games_affected': 0, 'samples': []}), 200
513
+
514
+ # Group lines by game and sort by timestamp
515
+ game_lines = defaultdict(list)
516
+ for line in all_lines:
517
+ game_name = line.game_name or "Unknown Game"
518
+ game_lines[game_name].append(line)
519
+
520
+ # Sort lines within each game by timestamp
521
+ for game_name in game_lines:
522
+ game_lines[game_name].sort(key=lambda x: float(x.timestamp))
523
+
524
+ duplicates_to_remove = []
525
+ duplicate_samples = {}
526
+ time_window_seconds = time_window_minutes * 60
527
+
528
+ # Find duplicates within time window for each game
529
+ for game_name, lines in game_lines.items():
530
+ text_timeline = []
531
+
532
+ for line in lines:
533
+ if not line.line_text or not line.line_text.strip():
534
+ continue
535
+
536
+ line_text = line.line_text if case_sensitive else line.line_text.lower()
537
+ timestamp = float(line.timestamp)
538
+
539
+ # Check for duplicates within time window
540
+ for prev_text, prev_timestamp, prev_line_id in reversed(text_timeline):
541
+ if timestamp - prev_timestamp > time_window_seconds:
542
+ break # Outside time window
543
+
544
+ if prev_text == line_text:
545
+ # Found duplicate within time window
546
+ duplicates_to_remove.append(line.id)
547
+
548
+ # Store sample for preview
549
+ if line_text not in duplicate_samples:
550
+ duplicate_samples[line_text] = {
551
+ 'text': line.line_text, # Original case
552
+ 'occurrences': 1
553
+ }
554
+ duplicate_samples[line_text]['occurrences'] += 1
555
+ break
556
+
557
+ text_timeline.append((line_text, timestamp, line.id))
558
+
559
+ # Calculate statistics
560
+ duplicates_count = len(duplicates_to_remove)
561
+ games_affected = len([game for game in game_lines.keys() if any(
562
+ line.id in duplicates_to_remove for line in game_lines[game]
563
+ )])
564
+
565
+ # Get sample duplicates
566
+ samples = list(duplicate_samples.values())[:10]
567
+
568
+ return jsonify({
569
+ 'duplicates_count': duplicates_count,
570
+ 'games_affected': games_affected,
571
+ 'samples': samples
572
+ }), 200
573
+
574
+ except Exception as e:
575
+ logger.error(f"Error in preview deduplication: {e}")
576
+ return jsonify({'error': f'Preview failed: {str(e)}'}), 500
577
+
578
+ @app.route('/api/deduplicate', methods=['POST'])
579
+ def api_deduplicate():
580
+ """
581
+ Remove duplicate sentences from database based on time window and game selection.
582
+ """
583
+ try:
584
+ data = request.get_json()
585
+ if not data:
586
+ return jsonify({'error': 'No data provided'}), 400
587
+
588
+ games = data.get('games', [])
589
+ time_window_minutes = data.get('time_window_minutes', 5)
590
+ case_sensitive = data.get('case_sensitive', False)
591
+ preserve_newest = data.get('preserve_newest', False)
592
+
593
+ if not games:
594
+ return jsonify({'error': 'At least one game must be selected'}), 400
595
+
596
+ # Get lines from selected games
597
+ if 'all' in games:
598
+ all_lines = GameLinesTable.all()
599
+ else:
600
+ all_lines = []
601
+ for game_name in games:
602
+ game_lines = GameLinesTable.get_all_lines_for_scene(game_name)
603
+ all_lines.extend(game_lines)
604
+
605
+ if not all_lines:
606
+ return jsonify({'deleted_count': 0}), 200
607
+
608
+ # Group lines by game and sort by timestamp
609
+ game_lines = defaultdict(list)
610
+ for line in all_lines:
611
+ game_name = line.game_name or "Unknown Game"
612
+ game_lines[game_name].append(line)
613
+
614
+ # Sort lines within each game by timestamp
615
+ for game_name in game_lines:
616
+ game_lines[game_name].sort(key=lambda x: float(x.timestamp))
617
+
618
+ duplicates_to_remove = []
619
+ time_window_seconds = time_window_minutes * 60
620
+
621
+ # Find duplicates within time window for each game
622
+ for game_name, lines in game_lines.items():
623
+ text_timeline = []
624
+
625
+ for line in lines:
626
+ if not line.line_text or not line.line_text.strip():
627
+ continue
628
+
629
+ line_text = line.line_text if case_sensitive else line.line_text.lower()
630
+ timestamp = float(line.timestamp)
631
+
632
+ # Check for duplicates within time window
633
+ duplicate_found = False
634
+ for i, (prev_text, prev_timestamp, prev_line_id) in enumerate(reversed(text_timeline)):
635
+ if timestamp - prev_timestamp > time_window_seconds:
636
+ break # Outside time window
637
+
638
+ if prev_text == line_text:
639
+ # Found duplicate within time window
640
+ if preserve_newest:
641
+ # Remove the older one (previous)
642
+ duplicates_to_remove.append(prev_line_id)
643
+ # Update timeline to replace old entry with new one
644
+ timeline_index = len(text_timeline) - 1 - i
645
+ text_timeline[timeline_index] = (line_text, timestamp, line.id)
646
+ else:
647
+ # Remove the newer one (current)
648
+ duplicates_to_remove.append(line.id)
649
+
650
+ duplicate_found = True
651
+ break
652
+
653
+ if not duplicate_found:
654
+ text_timeline.append((line_text, timestamp, line.id))
655
+
656
+ # Delete the duplicate lines
657
+ deleted_count = 0
658
+ for line_id in set(duplicates_to_remove): # Remove duplicates from deletion list
659
+ try:
660
+ GameLinesTable._db.execute(
661
+ f"DELETE FROM {GameLinesTable._table} WHERE id=?",
662
+ (line_id,),
663
+ commit=True
664
+ )
665
+ deleted_count += 1
666
+ except Exception as e:
667
+ logger.warning(f"Failed to delete duplicate line {line_id}: {e}")
668
+
669
+ logger.info(f"Deduplication completed: removed {deleted_count} duplicate sentences from {len(games)} games with {time_window_minutes}min window")
670
+
671
+ return jsonify({
672
+ 'deleted_count': deleted_count,
673
+ 'message': f'Successfully removed {deleted_count} duplicate sentences'
674
+ }), 200
675
+
676
+ except Exception as e:
677
+ logger.error(f"Error in deduplication: {e}")
678
+ return jsonify({'error': f'Deduplication failed: {str(e)}'}), 500
679
+
680
+ @app.route('/api/stats')
681
+ def api_stats():
682
+ """
683
+ Provides aggregated, cumulative stats for charting.
684
+ Accepts optional 'year' parameter to filter heatmap data.
685
+ """
686
+ # Get optional year filter parameter
687
+ filter_year = request.args.get('year', None)
688
+
689
+ # 1. Fetch all lines and sort them chronologically
690
+ all_lines = sorted(GameLinesTable.all(), key=lambda line: line.timestamp)
691
+
692
+ if not all_lines:
693
+ return jsonify({"labels": [], "datasets": []})
694
+
695
+ # 2. Process data into daily totals for each game
696
+ # Structure: daily_data[date_str][game_name] = {'lines': N, 'chars': N}
697
+ daily_data = defaultdict(lambda: defaultdict(lambda: {'lines': 0, 'chars': 0}))
698
+
699
+ for line in all_lines:
700
+ day_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
701
+ game = line.game_name or "Unknown Game"
702
+
703
+ daily_data[day_str][game]['lines'] += 1
704
+ daily_data[day_str][game]['chars'] += len(line.line_text) if line.line_text else 0
705
+
706
+ # 3. Create cumulative datasets for Chart.js
707
+ sorted_days = sorted(daily_data.keys())
708
+ game_names = GameLinesTable.get_all_games_with_lines()
709
+
710
+ # Keep track of the running total for each metric for each game
711
+ cumulative_totals = defaultdict(lambda: {'lines': 0, 'chars': 0})
712
+
713
+ # Structure for final data: final_data[game_name][metric] = [day1_val, day2_val, ...]
714
+ final_data = defaultdict(lambda: defaultdict(list))
715
+
716
+ for day in sorted_days:
717
+ for game in game_names:
718
+ # Add the day's total to the cumulative total
719
+ cumulative_totals[game]['lines'] += daily_data[day][game]['lines']
720
+ cumulative_totals[game]['chars'] += daily_data[day][game]['chars']
721
+
722
+ # Append the new cumulative total to the list for that day
723
+ final_data[game]['lines'].append(cumulative_totals[game]['lines'])
724
+ final_data[game]['chars'].append(cumulative_totals[game]['chars'])
725
+
726
+ # 4. Format into Chart.js dataset structure
727
+ datasets = []
728
+ # A simple color palette for the chart lines
729
+ colors = ['#3498db', '#e74c3c', '#2ecc71', '#f1c40f', '#9b59b6', '#1abc9c', '#e67e22']
730
+
731
+ for i, game in enumerate(game_names):
732
+ color = colors[i % len(colors)]
733
+
734
+ datasets.append({
735
+ "label": f"{game} - Lines Received",
736
+ "data": final_data[game]['lines'],
737
+ "borderColor": color,
738
+ "backgroundColor": f"{color}33", # Semi-transparent for fill
739
+ "fill": False,
740
+ "tension": 0.1
741
+ })
742
+ datasets.append({
743
+ "label": f"{game} - Characters Read",
744
+ "data": final_data[game]['chars'],
745
+ "borderColor": color,
746
+ "backgroundColor": f"{color}33",
747
+ "fill": False,
748
+ "tension": 0.1,
749
+ "hidden": True # Hide by default to not clutter the chart
750
+ })
751
+
752
+ # 5. Calculate additional chart data
753
+ kanji_grid_data = calculate_kanji_frequency(all_lines)
754
+ heatmap_data = calculate_heatmap_data(all_lines, filter_year)
755
+ total_chars_data = calculate_total_chars_per_game(all_lines)
756
+ reading_time_data = calculate_reading_time_per_game(all_lines)
757
+ reading_speed_per_game_data = calculate_reading_speed_per_game(all_lines)
758
+
759
+ # 6. Calculate dashboard statistics
760
+ current_game_stats = calculate_current_game_stats(all_lines)
761
+ all_games_stats = calculate_all_games_stats(all_lines)
762
+
763
+ # 7. Prepare allLinesData for frontend calculations (needed for average daily time)
764
+ all_lines_data = []
765
+ for line in all_lines:
766
+ all_lines_data.append({
767
+ 'timestamp': float(line.timestamp),
768
+ 'game_name': line.game_name or 'Unknown Game',
769
+ 'characters': len(line.line_text) if line.line_text else 0
770
+ })
771
+
772
+ return jsonify({
773
+ "labels": sorted_days,
774
+ "datasets": datasets,
775
+ "kanjiGridData": kanji_grid_data,
776
+ "heatmapData": heatmap_data,
777
+ "totalCharsPerGame": total_chars_data,
778
+ "readingTimePerGame": reading_time_data,
779
+ "readingSpeedPerGame": reading_speed_per_game_data,
780
+ "currentGameStats": current_game_stats,
781
+ "allGamesStats": all_games_stats,
782
+ "allLinesData": all_lines_data
783
+ })