GameSentenceMiner 2.15.9__py3-none-any.whl → 2.15.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/ocr/gsm_ocr_config.py +1 -1
- GameSentenceMiner/ocr/owocr_helper.py +21 -12
- GameSentenceMiner/owocr/owocr/run.py +2 -2
- GameSentenceMiner/util/configuration.py +3 -0
- GameSentenceMiner/util/text_log.py +2 -2
- GameSentenceMiner/web/database_api.py +783 -0
- GameSentenceMiner/web/events.py +178 -0
- GameSentenceMiner/web/stats.py +582 -0
- GameSentenceMiner/web/templates/database.html +277 -0
- GameSentenceMiner/web/templates/search.html +103 -0
- GameSentenceMiner/web/templates/stats.html +330 -0
- GameSentenceMiner/web/templates/text_replacements.html +211 -0
- GameSentenceMiner/web/templates/utility.html +2 -2
- GameSentenceMiner/web/texthooking_page.py +58 -316
- GameSentenceMiner/web/websockets.py +120 -0
- {gamesentenceminer-2.15.9.dist-info → gamesentenceminer-2.15.10.dist-info}/METADATA +1 -1
- {gamesentenceminer-2.15.9.dist-info → gamesentenceminer-2.15.10.dist-info}/RECORD +21 -15
- GameSentenceMiner/web/templates/__init__.py +0 -0
- {gamesentenceminer-2.15.9.dist-info → gamesentenceminer-2.15.10.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.15.9.dist-info → gamesentenceminer-2.15.10.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.15.9.dist-info → gamesentenceminer-2.15.10.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.15.9.dist-info → gamesentenceminer-2.15.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,783 @@
|
|
1
|
+
import datetime
|
2
|
+
import re
|
3
|
+
from collections import defaultdict
|
4
|
+
|
5
|
+
import flask
|
6
|
+
from flask import request, jsonify
|
7
|
+
|
8
|
+
from GameSentenceMiner.util.db import GameLinesTable
|
9
|
+
from GameSentenceMiner.util.configuration import logger, get_config, save_current_config
|
10
|
+
from GameSentenceMiner.web.stats import (
|
11
|
+
calculate_kanji_frequency, calculate_heatmap_data, calculate_total_chars_per_game,
|
12
|
+
calculate_reading_time_per_game, calculate_reading_speed_per_game,
|
13
|
+
calculate_current_game_stats, calculate_all_games_stats, calculate_daily_reading_time,
|
14
|
+
calculate_time_based_streak, calculate_actual_reading_time
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
def register_database_api_routes(app):
|
19
|
+
"""Register all database API routes with the Flask app."""
|
20
|
+
|
21
|
+
@app.route('/api/search-sentences')
|
22
|
+
def api_search_sentences():
|
23
|
+
"""
|
24
|
+
API endpoint for searching sentences with filters and pagination.
|
25
|
+
"""
|
26
|
+
try:
|
27
|
+
# Get query parameters
|
28
|
+
query = request.args.get('q', '').strip()
|
29
|
+
game_filter = request.args.get('game', '')
|
30
|
+
sort_by = request.args.get('sort', 'relevance')
|
31
|
+
page = int(request.args.get('page', 1))
|
32
|
+
page_size = int(request.args.get('page_size', 20))
|
33
|
+
|
34
|
+
# Validate parameters
|
35
|
+
if not query:
|
36
|
+
return jsonify({'error': 'Search query is required'}), 400
|
37
|
+
|
38
|
+
if page < 1:
|
39
|
+
page = 1
|
40
|
+
if page_size < 1 or page_size > 100:
|
41
|
+
page_size = 20
|
42
|
+
|
43
|
+
# Build the SQL query
|
44
|
+
base_query = f"SELECT * FROM {GameLinesTable._table} WHERE line_text LIKE ?"
|
45
|
+
params = [f'%{query}%']
|
46
|
+
|
47
|
+
# Add game filter if specified
|
48
|
+
if game_filter:
|
49
|
+
base_query += " AND game_name = ?"
|
50
|
+
params.append(game_filter)
|
51
|
+
|
52
|
+
# Add sorting
|
53
|
+
if sort_by == 'date_desc':
|
54
|
+
base_query += " ORDER BY timestamp DESC"
|
55
|
+
elif sort_by == 'date_asc':
|
56
|
+
base_query += " ORDER BY timestamp ASC"
|
57
|
+
elif sort_by == 'game_name':
|
58
|
+
base_query += " ORDER BY game_name, timestamp DESC"
|
59
|
+
else: # relevance - could be enhanced with proper scoring
|
60
|
+
base_query += " ORDER BY timestamp DESC"
|
61
|
+
|
62
|
+
# Get total count for pagination
|
63
|
+
count_query = f"SELECT COUNT(*) FROM {GameLinesTable._table} WHERE line_text LIKE ?"
|
64
|
+
count_params = [f'%{query}%']
|
65
|
+
if game_filter:
|
66
|
+
count_query += " AND game_name = ?"
|
67
|
+
count_params.append(game_filter)
|
68
|
+
|
69
|
+
total_results = GameLinesTable._db.fetchone(count_query, count_params)[0]
|
70
|
+
|
71
|
+
# Add pagination
|
72
|
+
offset = (page - 1) * page_size
|
73
|
+
base_query += f" LIMIT ? OFFSET ?"
|
74
|
+
params.extend([page_size, offset])
|
75
|
+
|
76
|
+
# Execute search query
|
77
|
+
rows = GameLinesTable._db.fetchall(base_query, params)
|
78
|
+
|
79
|
+
# Format results
|
80
|
+
results = []
|
81
|
+
for row in rows:
|
82
|
+
game_line = GameLinesTable.from_row(row)
|
83
|
+
if game_line:
|
84
|
+
results.append({
|
85
|
+
'id': game_line.id,
|
86
|
+
'sentence': game_line.line_text or '',
|
87
|
+
'game_name': game_line.game_name or 'Unknown Game',
|
88
|
+
'timestamp': float(game_line.timestamp) if game_line.timestamp else 0,
|
89
|
+
'translation': game_line.translation or None,
|
90
|
+
'has_audio': bool(game_line.audio_path),
|
91
|
+
'has_screenshot': bool(game_line.screenshot_path)
|
92
|
+
})
|
93
|
+
|
94
|
+
return jsonify({
|
95
|
+
'results': results,
|
96
|
+
'total': total_results,
|
97
|
+
'page': page,
|
98
|
+
'page_size': page_size,
|
99
|
+
'total_pages': (total_results + page_size - 1) // page_size
|
100
|
+
}), 200
|
101
|
+
|
102
|
+
except ValueError as e:
|
103
|
+
return jsonify({'error': 'Invalid pagination parameters'}), 400
|
104
|
+
except Exception as e:
|
105
|
+
logger.error(f"Error in sentence search: {e}")
|
106
|
+
return jsonify({'error': 'Search failed'}), 500
|
107
|
+
|
108
|
+
@app.route('/api/games-list')
|
109
|
+
def api_games_list():
|
110
|
+
"""
|
111
|
+
Provides game list with metadata for deletion interface.
|
112
|
+
"""
|
113
|
+
try:
|
114
|
+
game_names = GameLinesTable.get_all_games_with_lines()
|
115
|
+
games_data = []
|
116
|
+
|
117
|
+
for game_name in game_names:
|
118
|
+
lines = GameLinesTable.get_all_lines_for_scene(game_name)
|
119
|
+
if not lines:
|
120
|
+
continue
|
121
|
+
|
122
|
+
# Calculate metadata
|
123
|
+
sentence_count = len(lines)
|
124
|
+
timestamps = [float(line.timestamp) for line in lines]
|
125
|
+
min_date = datetime.date.fromtimestamp(min(timestamps))
|
126
|
+
max_date = datetime.date.fromtimestamp(max(timestamps))
|
127
|
+
total_chars = sum(len(line.line_text) if line.line_text else 0 for line in lines)
|
128
|
+
|
129
|
+
games_data.append({
|
130
|
+
'name': game_name,
|
131
|
+
'sentence_count': sentence_count,
|
132
|
+
'first_entry_date': min_date.strftime('%Y-%m-%d'),
|
133
|
+
'last_entry_date': max_date.strftime('%Y-%m-%d'),
|
134
|
+
'total_characters': total_chars,
|
135
|
+
'date_range': f"{min_date.strftime('%Y-%m-%d')} to {max_date.strftime('%Y-%m-%d')}" if min_date != max_date else min_date.strftime('%Y-%m-%d')
|
136
|
+
})
|
137
|
+
|
138
|
+
# Sort by first entry date (most recent first)
|
139
|
+
games_data.sort(key=lambda x: x['first_entry_date'], reverse=True)
|
140
|
+
|
141
|
+
return jsonify({'games': games_data}), 200
|
142
|
+
|
143
|
+
except Exception as e:
|
144
|
+
logger.error(f"Error fetching games list: {e}")
|
145
|
+
return jsonify({'error': 'Failed to fetch games list'}), 500
|
146
|
+
|
147
|
+
@app.route('/api/delete-games', methods=['POST'])
|
148
|
+
def api_delete_games():
|
149
|
+
"""
|
150
|
+
Handles bulk deletion of games and their associated data.
|
151
|
+
"""
|
152
|
+
try:
|
153
|
+
data = request.get_json()
|
154
|
+
game_names = data.get('game_names', [])
|
155
|
+
|
156
|
+
if not game_names:
|
157
|
+
return jsonify({'error': 'No games specified for deletion'}), 400
|
158
|
+
|
159
|
+
if not isinstance(game_names, list):
|
160
|
+
return jsonify({'error': 'game_names must be a list'}), 400
|
161
|
+
|
162
|
+
# Validate that all games exist
|
163
|
+
existing_games = GameLinesTable.get_all_games_with_lines()
|
164
|
+
invalid_games = [name for name in game_names if name not in existing_games]
|
165
|
+
|
166
|
+
if invalid_games:
|
167
|
+
return jsonify({'error': f'Games not found: {", ".join(invalid_games)}'}), 400
|
168
|
+
|
169
|
+
deletion_results = {}
|
170
|
+
total_deleted = 0
|
171
|
+
|
172
|
+
# Delete each game's data
|
173
|
+
for game_name in game_names:
|
174
|
+
try:
|
175
|
+
# Get lines for this game before deletion for counting
|
176
|
+
lines = GameLinesTable.get_all_lines_for_scene(game_name)
|
177
|
+
lines_count = len(lines)
|
178
|
+
|
179
|
+
# Delete all lines for this game using the database connection
|
180
|
+
GameLinesTable._db.execute(
|
181
|
+
f"DELETE FROM {GameLinesTable._table} WHERE game_name=?",
|
182
|
+
(game_name,),
|
183
|
+
commit=True
|
184
|
+
)
|
185
|
+
|
186
|
+
deletion_results[game_name] = {
|
187
|
+
'deleted_sentences': lines_count,
|
188
|
+
'status': 'success'
|
189
|
+
}
|
190
|
+
total_deleted += lines_count
|
191
|
+
|
192
|
+
logger.info(f"Deleted {lines_count} sentences for game: {game_name}")
|
193
|
+
|
194
|
+
except Exception as e:
|
195
|
+
logger.error(f"Error deleting game {game_name}: {e}")
|
196
|
+
deletion_results[game_name] = {
|
197
|
+
'deleted_sentences': 0,
|
198
|
+
'status': 'error',
|
199
|
+
'error': str(e)
|
200
|
+
}
|
201
|
+
|
202
|
+
# Check if any deletions were successful
|
203
|
+
successful_deletions = [name for name, result in deletion_results.items() if result['status'] == 'success']
|
204
|
+
failed_deletions = [name for name, result in deletion_results.items() if result['status'] == 'error']
|
205
|
+
|
206
|
+
response_data = {
|
207
|
+
'message': f'Deletion completed. {len(successful_deletions)} games successfully deleted.',
|
208
|
+
'total_sentences_deleted': total_deleted,
|
209
|
+
'successful_games': successful_deletions,
|
210
|
+
'failed_games': failed_deletions,
|
211
|
+
'detailed_results': deletion_results
|
212
|
+
}
|
213
|
+
|
214
|
+
if failed_deletions:
|
215
|
+
response_data['warning'] = f'Some games failed to delete: {", ".join(failed_deletions)}'
|
216
|
+
return jsonify(response_data), 207 # Multi-Status (partial success)
|
217
|
+
else:
|
218
|
+
return jsonify(response_data), 200
|
219
|
+
|
220
|
+
except Exception as e:
|
221
|
+
logger.error(f"Error in bulk game deletion: {e}")
|
222
|
+
return jsonify({'error': f'Failed to delete games: {str(e)}'}), 500
|
223
|
+
|
224
|
+
@app.route('/api/settings', methods=['GET'])
|
225
|
+
def api_get_settings():
|
226
|
+
"""
|
227
|
+
Get current AFK timer, session gap, and streak requirement settings.
|
228
|
+
"""
|
229
|
+
try:
|
230
|
+
config = get_config()
|
231
|
+
return jsonify({
|
232
|
+
'afk_timer_seconds': config.advanced.afk_timer_seconds,
|
233
|
+
'session_gap_seconds': config.advanced.session_gap_seconds,
|
234
|
+
'streak_requirement_hours': getattr(config.advanced, 'streak_requirement_hours', 1.0)
|
235
|
+
}), 200
|
236
|
+
except Exception as e:
|
237
|
+
logger.error(f"Error getting settings: {e}")
|
238
|
+
return jsonify({'error': 'Failed to get settings'}), 500
|
239
|
+
|
240
|
+
@app.route('/api/settings', methods=['POST'])
|
241
|
+
def api_save_settings():
|
242
|
+
"""
|
243
|
+
Save/update AFK timer, session gap, and streak requirement settings.
|
244
|
+
"""
|
245
|
+
try:
|
246
|
+
data = request.get_json()
|
247
|
+
|
248
|
+
if not data:
|
249
|
+
return jsonify({'error': 'No data provided'}), 400
|
250
|
+
|
251
|
+
afk_timer = data.get('afk_timer_seconds')
|
252
|
+
session_gap = data.get('session_gap_seconds')
|
253
|
+
streak_requirement = data.get('streak_requirement_hours')
|
254
|
+
|
255
|
+
# Validate input - only require the settings that are provided
|
256
|
+
settings_to_update = {}
|
257
|
+
|
258
|
+
if afk_timer is not None:
|
259
|
+
try:
|
260
|
+
afk_timer = int(afk_timer)
|
261
|
+
if afk_timer < 30 or afk_timer > 600:
|
262
|
+
return jsonify({'error': 'AFK timer must be between 30 and 600 seconds'}), 400
|
263
|
+
settings_to_update['afk_timer_seconds'] = afk_timer
|
264
|
+
except (ValueError, TypeError):
|
265
|
+
return jsonify({'error': 'AFK timer must be a valid integer'}), 400
|
266
|
+
|
267
|
+
if session_gap is not None:
|
268
|
+
try:
|
269
|
+
session_gap = int(session_gap)
|
270
|
+
if session_gap < 300 or session_gap > 7200:
|
271
|
+
return jsonify({'error': 'Session gap must be between 300 and 7200 seconds (5 minutes to 2 hours)'}), 400
|
272
|
+
settings_to_update['session_gap_seconds'] = session_gap
|
273
|
+
except (ValueError, TypeError):
|
274
|
+
return jsonify({'error': 'Session gap must be a valid integer'}), 400
|
275
|
+
|
276
|
+
if streak_requirement is not None:
|
277
|
+
try:
|
278
|
+
streak_requirement = float(streak_requirement)
|
279
|
+
if streak_requirement < 0.01 or streak_requirement > 24:
|
280
|
+
return jsonify({'error': 'Streak requirement must be between 0.01 and 24 hours'}), 400
|
281
|
+
settings_to_update['streak_requirement_hours'] = streak_requirement
|
282
|
+
except (ValueError, TypeError):
|
283
|
+
return jsonify({'error': 'Streak requirement must be a valid number'}), 400
|
284
|
+
|
285
|
+
if not settings_to_update:
|
286
|
+
return jsonify({'error': 'No valid settings provided'}), 400
|
287
|
+
|
288
|
+
# Update configuration
|
289
|
+
config = get_config()
|
290
|
+
|
291
|
+
if 'afk_timer_seconds' in settings_to_update:
|
292
|
+
config.advanced.afk_timer_seconds = settings_to_update['afk_timer_seconds']
|
293
|
+
if 'session_gap_seconds' in settings_to_update:
|
294
|
+
config.advanced.session_gap_seconds = settings_to_update['session_gap_seconds']
|
295
|
+
if 'streak_requirement_hours' in settings_to_update:
|
296
|
+
setattr(config.advanced, 'streak_requirement_hours', settings_to_update['streak_requirement_hours'])
|
297
|
+
|
298
|
+
# Save configuration
|
299
|
+
save_current_config(config)
|
300
|
+
|
301
|
+
logger.info(f"Settings updated: {settings_to_update}")
|
302
|
+
|
303
|
+
response_data = {'message': 'Settings saved successfully'}
|
304
|
+
response_data.update(settings_to_update)
|
305
|
+
|
306
|
+
return jsonify(response_data), 200
|
307
|
+
|
308
|
+
except Exception as e:
|
309
|
+
logger.error(f"Error saving settings: {e}")
|
310
|
+
return jsonify({'error': 'Failed to save settings'}), 500
|
311
|
+
|
312
|
+
|
313
|
+
@app.route('/api/preview-text-deletion', methods=['POST'])
|
314
|
+
def api_preview_text_deletion():
|
315
|
+
"""
|
316
|
+
Preview text lines that would be deleted based on regex or exact text matching.
|
317
|
+
"""
|
318
|
+
try:
|
319
|
+
data = request.get_json()
|
320
|
+
if not data:
|
321
|
+
return jsonify({'error': 'No data provided'}), 400
|
322
|
+
|
323
|
+
regex_pattern = data.get('regex_pattern')
|
324
|
+
exact_text = data.get('exact_text')
|
325
|
+
case_sensitive = data.get('case_sensitive', False)
|
326
|
+
use_regex = data.get('use_regex', False)
|
327
|
+
|
328
|
+
if not regex_pattern and not exact_text:
|
329
|
+
return jsonify({'error': 'Either regex_pattern or exact_text must be provided'}), 400
|
330
|
+
|
331
|
+
# Get all lines from database
|
332
|
+
all_lines = GameLinesTable.all()
|
333
|
+
if not all_lines:
|
334
|
+
return jsonify({'count': 0, 'samples': []}), 200
|
335
|
+
|
336
|
+
matches = []
|
337
|
+
|
338
|
+
if regex_pattern and use_regex:
|
339
|
+
# Use regex matching
|
340
|
+
try:
|
341
|
+
# Ensure regex_pattern is a string
|
342
|
+
if not isinstance(regex_pattern, str):
|
343
|
+
return jsonify({'error': 'Regex pattern must be a string'}), 400
|
344
|
+
|
345
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
346
|
+
pattern = re.compile(regex_pattern, flags)
|
347
|
+
|
348
|
+
for line in all_lines:
|
349
|
+
if line.line_text and isinstance(line.line_text, str) and pattern.search(line.line_text):
|
350
|
+
matches.append(line.line_text)
|
351
|
+
|
352
|
+
except re.error as e:
|
353
|
+
return jsonify({'error': f'Invalid regex pattern: {str(e)}'}), 400
|
354
|
+
|
355
|
+
elif exact_text:
|
356
|
+
# Use exact text matching - ensure exact_text is properly handled
|
357
|
+
if isinstance(exact_text, list):
|
358
|
+
text_lines = exact_text
|
359
|
+
elif isinstance(exact_text, str):
|
360
|
+
text_lines = [exact_text]
|
361
|
+
else:
|
362
|
+
return jsonify({'error': 'exact_text must be a string or list of strings'}), 400
|
363
|
+
|
364
|
+
for line in all_lines:
|
365
|
+
if line.line_text and isinstance(line.line_text, str):
|
366
|
+
line_text = line.line_text if case_sensitive else line.line_text.lower()
|
367
|
+
|
368
|
+
for target_text in text_lines:
|
369
|
+
# Ensure target_text is a string
|
370
|
+
if not isinstance(target_text, str):
|
371
|
+
continue
|
372
|
+
compare_text = target_text if case_sensitive else target_text.lower()
|
373
|
+
if compare_text in line_text:
|
374
|
+
matches.append(line.line_text)
|
375
|
+
break
|
376
|
+
|
377
|
+
# Remove duplicates while preserving order
|
378
|
+
unique_matches = []
|
379
|
+
seen = set()
|
380
|
+
for match in matches:
|
381
|
+
if match not in seen:
|
382
|
+
unique_matches.append(match)
|
383
|
+
seen.add(match)
|
384
|
+
|
385
|
+
# Get sample matches (first 10)
|
386
|
+
samples = unique_matches[:10]
|
387
|
+
|
388
|
+
return jsonify({
|
389
|
+
'count': len(unique_matches),
|
390
|
+
'samples': samples
|
391
|
+
}), 200
|
392
|
+
|
393
|
+
except Exception as e:
|
394
|
+
logger.error(f"Error in preview text deletion: {e}")
|
395
|
+
return jsonify({'error': f'Preview failed: {str(e)}'}), 500
|
396
|
+
|
397
|
+
@app.route('/api/delete-text-lines', methods=['POST'])
|
398
|
+
def api_delete_text_lines():
|
399
|
+
"""
|
400
|
+
Delete text lines from database based on regex or exact text matching.
|
401
|
+
"""
|
402
|
+
try:
|
403
|
+
data = request.get_json()
|
404
|
+
if not data:
|
405
|
+
return jsonify({'error': 'No data provided'}), 400
|
406
|
+
|
407
|
+
regex_pattern = data.get('regex_pattern')
|
408
|
+
exact_text = data.get('exact_text')
|
409
|
+
case_sensitive = data.get('case_sensitive', False)
|
410
|
+
use_regex = data.get('use_regex', False)
|
411
|
+
|
412
|
+
if not regex_pattern and not exact_text:
|
413
|
+
return jsonify({'error': 'Either regex_pattern or exact_text must be provided'}), 400
|
414
|
+
|
415
|
+
# Get all lines from database
|
416
|
+
all_lines = GameLinesTable.all()
|
417
|
+
if not all_lines:
|
418
|
+
return jsonify({'deleted_count': 0}), 200
|
419
|
+
|
420
|
+
lines_to_delete = []
|
421
|
+
|
422
|
+
if regex_pattern and use_regex:
|
423
|
+
# Use regex matching
|
424
|
+
try:
|
425
|
+
# Ensure regex_pattern is a string
|
426
|
+
if not isinstance(regex_pattern, str):
|
427
|
+
return jsonify({'error': 'Regex pattern must be a string'}), 400
|
428
|
+
|
429
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
430
|
+
pattern = re.compile(regex_pattern, flags)
|
431
|
+
|
432
|
+
for line in all_lines:
|
433
|
+
if line.line_text and isinstance(line.line_text, str) and pattern.search(line.line_text):
|
434
|
+
lines_to_delete.append(line.id)
|
435
|
+
|
436
|
+
except re.error as e:
|
437
|
+
return jsonify({'error': f'Invalid regex pattern: {str(e)}'}), 400
|
438
|
+
|
439
|
+
elif exact_text:
|
440
|
+
# Use exact text matching - ensure exact_text is properly handled
|
441
|
+
if isinstance(exact_text, list):
|
442
|
+
text_lines = exact_text
|
443
|
+
elif isinstance(exact_text, str):
|
444
|
+
text_lines = [exact_text]
|
445
|
+
else:
|
446
|
+
return jsonify({'error': 'exact_text must be a string or list of strings'}), 400
|
447
|
+
|
448
|
+
for line in all_lines:
|
449
|
+
if line.line_text and isinstance(line.line_text, str):
|
450
|
+
line_text = line.line_text if case_sensitive else line.line_text.lower()
|
451
|
+
|
452
|
+
for target_text in text_lines:
|
453
|
+
# Ensure target_text is a string
|
454
|
+
if not isinstance(target_text, str):
|
455
|
+
continue
|
456
|
+
compare_text = target_text if case_sensitive else target_text.lower()
|
457
|
+
if compare_text in line_text:
|
458
|
+
lines_to_delete.append(line.id)
|
459
|
+
break
|
460
|
+
|
461
|
+
# Delete the matching lines
|
462
|
+
deleted_count = 0
|
463
|
+
for line_id in set(lines_to_delete): # Remove duplicates
|
464
|
+
try:
|
465
|
+
GameLinesTable._db.execute(
|
466
|
+
f"DELETE FROM {GameLinesTable._table} WHERE id=?",
|
467
|
+
(line_id,),
|
468
|
+
commit=True
|
469
|
+
)
|
470
|
+
deleted_count += 1
|
471
|
+
except Exception as e:
|
472
|
+
logger.warning(f"Failed to delete line {line_id}: {e}")
|
473
|
+
|
474
|
+
logger.info(f"Deleted {deleted_count} lines using pattern: {regex_pattern or exact_text}")
|
475
|
+
|
476
|
+
return jsonify({
|
477
|
+
'deleted_count': deleted_count,
|
478
|
+
'message': f'Successfully deleted {deleted_count} lines'
|
479
|
+
}), 200
|
480
|
+
|
481
|
+
except Exception as e:
|
482
|
+
logger.error(f"Error in delete text lines: {e}")
|
483
|
+
return jsonify({'error': f'Deletion failed: {str(e)}'}), 500
|
484
|
+
|
485
|
+
@app.route('/api/preview-deduplication', methods=['POST'])
|
486
|
+
def api_preview_deduplication():
|
487
|
+
"""
|
488
|
+
Preview duplicate sentences that would be removed based on time window and game selection.
|
489
|
+
"""
|
490
|
+
try:
|
491
|
+
data = request.get_json()
|
492
|
+
if not data:
|
493
|
+
return jsonify({'error': 'No data provided'}), 400
|
494
|
+
|
495
|
+
games = data.get('games', [])
|
496
|
+
time_window_minutes = data.get('time_window_minutes', 5)
|
497
|
+
case_sensitive = data.get('case_sensitive', False)
|
498
|
+
|
499
|
+
if not games:
|
500
|
+
return jsonify({'error': 'At least one game must be selected'}), 400
|
501
|
+
|
502
|
+
# Get lines from selected games
|
503
|
+
if 'all' in games:
|
504
|
+
all_lines = GameLinesTable.all()
|
505
|
+
else:
|
506
|
+
all_lines = []
|
507
|
+
for game_name in games:
|
508
|
+
game_lines = GameLinesTable.get_all_lines_for_scene(game_name)
|
509
|
+
all_lines.extend(game_lines)
|
510
|
+
|
511
|
+
if not all_lines:
|
512
|
+
return jsonify({'duplicates_count': 0, 'games_affected': 0, 'samples': []}), 200
|
513
|
+
|
514
|
+
# Group lines by game and sort by timestamp
|
515
|
+
game_lines = defaultdict(list)
|
516
|
+
for line in all_lines:
|
517
|
+
game_name = line.game_name or "Unknown Game"
|
518
|
+
game_lines[game_name].append(line)
|
519
|
+
|
520
|
+
# Sort lines within each game by timestamp
|
521
|
+
for game_name in game_lines:
|
522
|
+
game_lines[game_name].sort(key=lambda x: float(x.timestamp))
|
523
|
+
|
524
|
+
duplicates_to_remove = []
|
525
|
+
duplicate_samples = {}
|
526
|
+
time_window_seconds = time_window_minutes * 60
|
527
|
+
|
528
|
+
# Find duplicates within time window for each game
|
529
|
+
for game_name, lines in game_lines.items():
|
530
|
+
text_timeline = []
|
531
|
+
|
532
|
+
for line in lines:
|
533
|
+
if not line.line_text or not line.line_text.strip():
|
534
|
+
continue
|
535
|
+
|
536
|
+
line_text = line.line_text if case_sensitive else line.line_text.lower()
|
537
|
+
timestamp = float(line.timestamp)
|
538
|
+
|
539
|
+
# Check for duplicates within time window
|
540
|
+
for prev_text, prev_timestamp, prev_line_id in reversed(text_timeline):
|
541
|
+
if timestamp - prev_timestamp > time_window_seconds:
|
542
|
+
break # Outside time window
|
543
|
+
|
544
|
+
if prev_text == line_text:
|
545
|
+
# Found duplicate within time window
|
546
|
+
duplicates_to_remove.append(line.id)
|
547
|
+
|
548
|
+
# Store sample for preview
|
549
|
+
if line_text not in duplicate_samples:
|
550
|
+
duplicate_samples[line_text] = {
|
551
|
+
'text': line.line_text, # Original case
|
552
|
+
'occurrences': 1
|
553
|
+
}
|
554
|
+
duplicate_samples[line_text]['occurrences'] += 1
|
555
|
+
break
|
556
|
+
|
557
|
+
text_timeline.append((line_text, timestamp, line.id))
|
558
|
+
|
559
|
+
# Calculate statistics
|
560
|
+
duplicates_count = len(duplicates_to_remove)
|
561
|
+
games_affected = len([game for game in game_lines.keys() if any(
|
562
|
+
line.id in duplicates_to_remove for line in game_lines[game]
|
563
|
+
)])
|
564
|
+
|
565
|
+
# Get sample duplicates
|
566
|
+
samples = list(duplicate_samples.values())[:10]
|
567
|
+
|
568
|
+
return jsonify({
|
569
|
+
'duplicates_count': duplicates_count,
|
570
|
+
'games_affected': games_affected,
|
571
|
+
'samples': samples
|
572
|
+
}), 200
|
573
|
+
|
574
|
+
except Exception as e:
|
575
|
+
logger.error(f"Error in preview deduplication: {e}")
|
576
|
+
return jsonify({'error': f'Preview failed: {str(e)}'}), 500
|
577
|
+
|
578
|
+
@app.route('/api/deduplicate', methods=['POST'])
|
579
|
+
def api_deduplicate():
|
580
|
+
"""
|
581
|
+
Remove duplicate sentences from database based on time window and game selection.
|
582
|
+
"""
|
583
|
+
try:
|
584
|
+
data = request.get_json()
|
585
|
+
if not data:
|
586
|
+
return jsonify({'error': 'No data provided'}), 400
|
587
|
+
|
588
|
+
games = data.get('games', [])
|
589
|
+
time_window_minutes = data.get('time_window_minutes', 5)
|
590
|
+
case_sensitive = data.get('case_sensitive', False)
|
591
|
+
preserve_newest = data.get('preserve_newest', False)
|
592
|
+
|
593
|
+
if not games:
|
594
|
+
return jsonify({'error': 'At least one game must be selected'}), 400
|
595
|
+
|
596
|
+
# Get lines from selected games
|
597
|
+
if 'all' in games:
|
598
|
+
all_lines = GameLinesTable.all()
|
599
|
+
else:
|
600
|
+
all_lines = []
|
601
|
+
for game_name in games:
|
602
|
+
game_lines = GameLinesTable.get_all_lines_for_scene(game_name)
|
603
|
+
all_lines.extend(game_lines)
|
604
|
+
|
605
|
+
if not all_lines:
|
606
|
+
return jsonify({'deleted_count': 0}), 200
|
607
|
+
|
608
|
+
# Group lines by game and sort by timestamp
|
609
|
+
game_lines = defaultdict(list)
|
610
|
+
for line in all_lines:
|
611
|
+
game_name = line.game_name or "Unknown Game"
|
612
|
+
game_lines[game_name].append(line)
|
613
|
+
|
614
|
+
# Sort lines within each game by timestamp
|
615
|
+
for game_name in game_lines:
|
616
|
+
game_lines[game_name].sort(key=lambda x: float(x.timestamp))
|
617
|
+
|
618
|
+
duplicates_to_remove = []
|
619
|
+
time_window_seconds = time_window_minutes * 60
|
620
|
+
|
621
|
+
# Find duplicates within time window for each game
|
622
|
+
for game_name, lines in game_lines.items():
|
623
|
+
text_timeline = []
|
624
|
+
|
625
|
+
for line in lines:
|
626
|
+
if not line.line_text or not line.line_text.strip():
|
627
|
+
continue
|
628
|
+
|
629
|
+
line_text = line.line_text if case_sensitive else line.line_text.lower()
|
630
|
+
timestamp = float(line.timestamp)
|
631
|
+
|
632
|
+
# Check for duplicates within time window
|
633
|
+
duplicate_found = False
|
634
|
+
for i, (prev_text, prev_timestamp, prev_line_id) in enumerate(reversed(text_timeline)):
|
635
|
+
if timestamp - prev_timestamp > time_window_seconds:
|
636
|
+
break # Outside time window
|
637
|
+
|
638
|
+
if prev_text == line_text:
|
639
|
+
# Found duplicate within time window
|
640
|
+
if preserve_newest:
|
641
|
+
# Remove the older one (previous)
|
642
|
+
duplicates_to_remove.append(prev_line_id)
|
643
|
+
# Update timeline to replace old entry with new one
|
644
|
+
timeline_index = len(text_timeline) - 1 - i
|
645
|
+
text_timeline[timeline_index] = (line_text, timestamp, line.id)
|
646
|
+
else:
|
647
|
+
# Remove the newer one (current)
|
648
|
+
duplicates_to_remove.append(line.id)
|
649
|
+
|
650
|
+
duplicate_found = True
|
651
|
+
break
|
652
|
+
|
653
|
+
if not duplicate_found:
|
654
|
+
text_timeline.append((line_text, timestamp, line.id))
|
655
|
+
|
656
|
+
# Delete the duplicate lines
|
657
|
+
deleted_count = 0
|
658
|
+
for line_id in set(duplicates_to_remove): # Remove duplicates from deletion list
|
659
|
+
try:
|
660
|
+
GameLinesTable._db.execute(
|
661
|
+
f"DELETE FROM {GameLinesTable._table} WHERE id=?",
|
662
|
+
(line_id,),
|
663
|
+
commit=True
|
664
|
+
)
|
665
|
+
deleted_count += 1
|
666
|
+
except Exception as e:
|
667
|
+
logger.warning(f"Failed to delete duplicate line {line_id}: {e}")
|
668
|
+
|
669
|
+
logger.info(f"Deduplication completed: removed {deleted_count} duplicate sentences from {len(games)} games with {time_window_minutes}min window")
|
670
|
+
|
671
|
+
return jsonify({
|
672
|
+
'deleted_count': deleted_count,
|
673
|
+
'message': f'Successfully removed {deleted_count} duplicate sentences'
|
674
|
+
}), 200
|
675
|
+
|
676
|
+
except Exception as e:
|
677
|
+
logger.error(f"Error in deduplication: {e}")
|
678
|
+
return jsonify({'error': f'Deduplication failed: {str(e)}'}), 500
|
679
|
+
|
680
|
+
@app.route('/api/stats')
|
681
|
+
def api_stats():
|
682
|
+
"""
|
683
|
+
Provides aggregated, cumulative stats for charting.
|
684
|
+
Accepts optional 'year' parameter to filter heatmap data.
|
685
|
+
"""
|
686
|
+
# Get optional year filter parameter
|
687
|
+
filter_year = request.args.get('year', None)
|
688
|
+
|
689
|
+
# 1. Fetch all lines and sort them chronologically
|
690
|
+
all_lines = sorted(GameLinesTable.all(), key=lambda line: line.timestamp)
|
691
|
+
|
692
|
+
if not all_lines:
|
693
|
+
return jsonify({"labels": [], "datasets": []})
|
694
|
+
|
695
|
+
# 2. Process data into daily totals for each game
|
696
|
+
# Structure: daily_data[date_str][game_name] = {'lines': N, 'chars': N}
|
697
|
+
daily_data = defaultdict(lambda: defaultdict(lambda: {'lines': 0, 'chars': 0}))
|
698
|
+
|
699
|
+
for line in all_lines:
|
700
|
+
day_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
|
701
|
+
game = line.game_name or "Unknown Game"
|
702
|
+
|
703
|
+
daily_data[day_str][game]['lines'] += 1
|
704
|
+
daily_data[day_str][game]['chars'] += len(line.line_text) if line.line_text else 0
|
705
|
+
|
706
|
+
# 3. Create cumulative datasets for Chart.js
|
707
|
+
sorted_days = sorted(daily_data.keys())
|
708
|
+
game_names = GameLinesTable.get_all_games_with_lines()
|
709
|
+
|
710
|
+
# Keep track of the running total for each metric for each game
|
711
|
+
cumulative_totals = defaultdict(lambda: {'lines': 0, 'chars': 0})
|
712
|
+
|
713
|
+
# Structure for final data: final_data[game_name][metric] = [day1_val, day2_val, ...]
|
714
|
+
final_data = defaultdict(lambda: defaultdict(list))
|
715
|
+
|
716
|
+
for day in sorted_days:
|
717
|
+
for game in game_names:
|
718
|
+
# Add the day's total to the cumulative total
|
719
|
+
cumulative_totals[game]['lines'] += daily_data[day][game]['lines']
|
720
|
+
cumulative_totals[game]['chars'] += daily_data[day][game]['chars']
|
721
|
+
|
722
|
+
# Append the new cumulative total to the list for that day
|
723
|
+
final_data[game]['lines'].append(cumulative_totals[game]['lines'])
|
724
|
+
final_data[game]['chars'].append(cumulative_totals[game]['chars'])
|
725
|
+
|
726
|
+
# 4. Format into Chart.js dataset structure
|
727
|
+
datasets = []
|
728
|
+
# A simple color palette for the chart lines
|
729
|
+
colors = ['#3498db', '#e74c3c', '#2ecc71', '#f1c40f', '#9b59b6', '#1abc9c', '#e67e22']
|
730
|
+
|
731
|
+
for i, game in enumerate(game_names):
|
732
|
+
color = colors[i % len(colors)]
|
733
|
+
|
734
|
+
datasets.append({
|
735
|
+
"label": f"{game} - Lines Received",
|
736
|
+
"data": final_data[game]['lines'],
|
737
|
+
"borderColor": color,
|
738
|
+
"backgroundColor": f"{color}33", # Semi-transparent for fill
|
739
|
+
"fill": False,
|
740
|
+
"tension": 0.1
|
741
|
+
})
|
742
|
+
datasets.append({
|
743
|
+
"label": f"{game} - Characters Read",
|
744
|
+
"data": final_data[game]['chars'],
|
745
|
+
"borderColor": color,
|
746
|
+
"backgroundColor": f"{color}33",
|
747
|
+
"fill": False,
|
748
|
+
"tension": 0.1,
|
749
|
+
"hidden": True # Hide by default to not clutter the chart
|
750
|
+
})
|
751
|
+
|
752
|
+
# 5. Calculate additional chart data
|
753
|
+
kanji_grid_data = calculate_kanji_frequency(all_lines)
|
754
|
+
heatmap_data = calculate_heatmap_data(all_lines, filter_year)
|
755
|
+
total_chars_data = calculate_total_chars_per_game(all_lines)
|
756
|
+
reading_time_data = calculate_reading_time_per_game(all_lines)
|
757
|
+
reading_speed_per_game_data = calculate_reading_speed_per_game(all_lines)
|
758
|
+
|
759
|
+
# 6. Calculate dashboard statistics
|
760
|
+
current_game_stats = calculate_current_game_stats(all_lines)
|
761
|
+
all_games_stats = calculate_all_games_stats(all_lines)
|
762
|
+
|
763
|
+
# 7. Prepare allLinesData for frontend calculations (needed for average daily time)
|
764
|
+
all_lines_data = []
|
765
|
+
for line in all_lines:
|
766
|
+
all_lines_data.append({
|
767
|
+
'timestamp': float(line.timestamp),
|
768
|
+
'game_name': line.game_name or 'Unknown Game',
|
769
|
+
'characters': len(line.line_text) if line.line_text else 0
|
770
|
+
})
|
771
|
+
|
772
|
+
return jsonify({
|
773
|
+
"labels": sorted_days,
|
774
|
+
"datasets": datasets,
|
775
|
+
"kanjiGridData": kanji_grid_data,
|
776
|
+
"heatmapData": heatmap_data,
|
777
|
+
"totalCharsPerGame": total_chars_data,
|
778
|
+
"readingTimePerGame": reading_time_data,
|
779
|
+
"readingSpeedPerGame": reading_speed_per_game_data,
|
780
|
+
"currentGameStats": current_game_stats,
|
781
|
+
"allGamesStats": all_games_stats,
|
782
|
+
"allLinesData": all_lines_data
|
783
|
+
})
|