GameSentenceMiner 2.16.5__py3-none-any.whl → 2.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
GameSentenceMiner/anki.py CHANGED
@@ -87,13 +87,18 @@ def update_anki_card(last_note: AnkiCard, note=None, audio_path='', video_path='
87
87
 
88
88
  if update_picture and screenshot_in_anki:
89
89
  note['fields'][get_config().anki.picture_field] = image_html
90
-
90
+
91
91
  if video_in_anki:
92
92
  note['fields'][get_config().anki.video_field] = video_in_anki
93
-
93
+
94
94
  if not get_config().screenshot.enabled:
95
95
  logger.info("Skipping Adding Screenshot to Anki, Screenshot is disabled in settings")
96
96
 
97
+ # Add game name to field if configured
98
+ game_name_field = get_config().anki.game_name_field
99
+ if note and 'fields' in note and game_name_field:
100
+ note['fields'][game_name_field] = get_current_game()
101
+
97
102
  if note and 'fields' in note and get_config().ai.enabled:
98
103
  sentence_field = note['fields'].get(get_config().anki.sentence_field, {})
99
104
  sentence_to_translate = sentence_field if sentence_field else last_note.get_field(
@@ -307,6 +307,7 @@ class ConfigApp:
307
307
  self.word_field_value = tk.StringVar(value=self.settings.anki.word_field)
308
308
  self.previous_sentence_field_value = tk.StringVar(value=self.settings.anki.previous_sentence_field)
309
309
  self.previous_image_field_value = tk.StringVar(value=self.settings.anki.previous_image_field)
310
+ self.game_name_field_value = tk.StringVar(value=self.settings.anki.game_name_field)
310
311
  self.video_field_value = tk.StringVar(value=self.settings.anki.video_field)
311
312
  self.custom_tags_value = tk.StringVar(value=', '.join(self.settings.anki.custom_tags))
312
313
  self.tags_to_check_value = tk.StringVar(value=', '.join(self.settings.anki.tags_to_check))
@@ -528,6 +529,7 @@ class ConfigApp:
528
529
  previous_sentence_field=self.previous_sentence_field_value.get(),
529
530
  previous_image_field=self.previous_image_field_value.get(),
530
531
  video_field=self.video_field_value.get(),
532
+ game_name_field=self.game_name_field_value.get(),
531
533
  custom_tags=[tag.strip() for tag in self.custom_tags_value.get().split(',') if tag.strip()],
532
534
  tags_to_check=[tag.strip().lower() for tag in self.tags_to_check_value.get().split(',') if tag.strip()],
533
535
  add_game_tag=self.add_game_tag_value.get(),
@@ -1323,6 +1325,12 @@ class ConfigApp:
1323
1325
  row=self.current_row, column=0)
1324
1326
  ttk.Entry(anki_frame, textvariable=self.video_field_value).grid(row=self.current_row, column=1, sticky='EW', pady=2)
1325
1327
  self.current_row += 1
1328
+
1329
+ game_name_field_i18n = anki_i18n.get('game_name_field', {})
1330
+ HoverInfoLabelWidget(anki_frame, text=game_name_field_i18n.get('label', 'Game Name Field:'),
1331
+ tooltip=game_name_field_i18n.get('tooltip', 'Field in Anki for the game name.'), row=self.current_row, column=0)
1332
+ ttk.Entry(anki_frame, textvariable=self.game_name_field_value).grid(row=self.current_row, column=1, columnspan=3, sticky='EW', pady=2)
1333
+ self.current_row += 1
1326
1334
 
1327
1335
  tags_i18n = anki_i18n.get('custom_tags', {})
1328
1336
  HoverInfoLabelWidget(anki_frame, text=tags_i18n.get('label', '...'), tooltip=tags_i18n.get('tooltip', '...'),
@@ -184,12 +184,16 @@
184
184
  },
185
185
  "video_field": {
186
186
  "label": "Video Field:",
187
- "tooltip": "Field in Anki for associated videos. This will be AV1 encoded video of the VAD Trimmed Voiceline, if no Voice found, this will be empty."
187
+ "tooltip": "Field in Anki for associated videos. This will be AV1 encoded video of the VAD Trimmed Voiceline, if no Voice found, this will be empty. (OPTIONAL)"
188
188
  },
189
189
  "custom_tags": {
190
190
  "label": "Add Tags:",
191
191
  "tooltip": "Comma-separated custom tags for the Anki cards."
192
192
  },
193
+ "game_name_field": {
194
+ "label": "Game Name Field:",
195
+ "tooltip": "Field in Anki for the game name. If empty, game name will not be added as a field. (OPTIONAL)"
196
+ },
193
197
  "tags_to_check": {
194
198
  "label": "Tags to work on:",
195
199
  "tooltip": "Comma-separated Tags, script will only do 1-click on cards with these tags (Recommend keep empty, or use Yomitan Profile to add custom tag from texthooker page)"
@@ -189,6 +189,10 @@
189
189
  "label": "追加タグ:",
190
190
  "tooltip": "Ankiカードに追加するカスタムタグ(カンマ区切り)。"
191
191
  },
192
+ "game_name_field": {
193
+ "label": "ゲーム名フィールド:",
194
+ "tooltip": "Ankiのゲーム名用フィールド。空欄の場合は追加されません。"
195
+ },
192
196
  "tags_to_check": {
193
197
  "label": "対象タグ:",
194
198
  "tooltip": "これらのタグを持つカードのみワンクリック対象になります(通常は空を推奨)。"
@@ -190,6 +190,10 @@
190
190
  "label": "添加标签:",
191
191
  "tooltip": "Anki 卡片的自定义标签(以逗号分隔)。"
192
192
  },
193
+ "game_name_field": {
194
+ "label": "游戏名称字段:",
195
+ "tooltip": "Anki 中用于游戏名称的字段。如果为空,则不会添加游戏名称。"
196
+ },
193
197
  "tags_to_check": {
194
198
  "label": "处理的标签:",
195
199
  "tooltip": "脚本将只对带有这些标签的卡片进行一键操作(建议留空)。"
@@ -441,6 +441,7 @@ class Anki:
441
441
  custom_tags: List[str] = None
442
442
  tags_to_check: List[str] = None
443
443
  add_game_tag: bool = True
444
+ game_name_field: str = ''
444
445
  polling_rate: int = 200
445
446
  overwrite_audio: bool = False
446
447
  overwrite_picture: bool = True
@@ -780,6 +781,15 @@ class ProfileConfig:
780
781
  def config_changed(self, new: 'ProfileConfig') -> bool:
781
782
  return self != new
782
783
 
784
+ @dataclass_json
785
+ @dataclass
786
+ class StatsConfig:
787
+ afk_timer_seconds: int = 120
788
+ session_gap_seconds: int = 3600
789
+ streak_requirement_hours: float = 0.01 # 1 second required per day to keep your streak by default
790
+ reading_hours_target: int = 1500 # Target reading hours based on TMW N1 achievement data
791
+ character_count_target: int = 25000000 # Target character count (25M) inspired by Discord server milestones
792
+ games_target: int = 100 # Target VNs/games completed based on Refold community standards
783
793
 
784
794
  @dataclass_json
785
795
  @dataclass
@@ -788,6 +798,7 @@ class Config:
788
798
  current_profile: str = DEFAULT_CONFIG
789
799
  switch_to_default_if_not_found: bool = True
790
800
  locale: str = Locale.English.value
801
+ stats: StatsConfig = field(default_factory=StatsConfig)
791
802
 
792
803
  @classmethod
793
804
  def new(cls):
@@ -812,6 +823,18 @@ class Config:
812
823
  return cls.from_dict(data)
813
824
  else:
814
825
  return cls.new()
826
+
827
+ def __post_init__(self):
828
+ # Move Stats to global config if found in profiles for legacy support
829
+ default_stats = StatsConfig()
830
+ for profile in self.configs.values():
831
+ if profile.advanced:
832
+ if profile.advanced.afk_timer_seconds != default_stats.afk_timer_seconds:
833
+ self.stats.afk_timer_seconds = profile.advanced.afk_timer_seconds
834
+ if profile.advanced.session_gap_seconds != default_stats.session_gap_seconds:
835
+ self.stats.session_gap_seconds = profile.advanced.session_gap_seconds
836
+ if profile.advanced.streak_requirement_hours != default_stats.streak_requirement_hours:
837
+ self.stats.streak_requirement_hours = profile.advanced.streak_requirement_hours
815
838
 
816
839
  def save(self):
817
840
  with open(get_config_path(), 'w') as file:
@@ -1069,6 +1092,12 @@ def reload_config():
1069
1092
  logger.warning(
1070
1093
  "Backfill is enabled, but full auto is also enabled. Disabling backfill...")
1071
1094
  config.features.backfill_audio = False
1095
+
1096
+ def get_stats_config():
1097
+ global config_instance
1098
+ if config_instance is None:
1099
+ config_instance = load_config()
1100
+ return config_instance.stats
1072
1101
 
1073
1102
 
1074
1103
  def get_master_config():
@@ -1085,6 +1114,12 @@ def save_current_config(config):
1085
1114
  config_instance.set_config_for_profile(
1086
1115
  config_instance.current_profile, config)
1087
1116
  save_full_config(config_instance)
1117
+
1118
+
1119
+ def save_stats_config(stats_config):
1120
+ global config_instance
1121
+ config_instance.stats = stats_config
1122
+ save_full_config(config_instance)
1088
1123
 
1089
1124
 
1090
1125
  def switch_profile_and_save(profile_name):
@@ -11,7 +11,7 @@ from flask import request, jsonify
11
11
  import regex
12
12
 
13
13
  from GameSentenceMiner.util.db import GameLinesTable
14
- from GameSentenceMiner.util.configuration import logger, get_config, save_current_config
14
+ from GameSentenceMiner.util.configuration import get_stats_config, logger, get_config, save_current_config, save_stats_config
15
15
  from GameSentenceMiner.web.stats import (
16
16
  calculate_kanji_frequency, calculate_heatmap_data, calculate_total_chars_per_game,
17
17
  calculate_reading_time_per_game, calculate_reading_speed_per_game,
@@ -35,6 +35,7 @@ def register_database_api_routes(app):
35
35
  sort_by = request.args.get('sort', 'relevance')
36
36
  page = int(request.args.get('page', 1))
37
37
  page_size = int(request.args.get('page_size', 20))
38
+ use_regex = request.args.get('use_regex', 'false').lower() == 'true'
38
39
 
39
40
  # Validate parameters
40
41
  if not query:
@@ -44,65 +45,129 @@ def register_database_api_routes(app):
44
45
  page = 1
45
46
  if page_size < 1 or page_size > 100:
46
47
  page_size = 20
47
-
48
- # Build the SQL query
49
- base_query = f"SELECT * FROM {GameLinesTable._table} WHERE line_text LIKE ?"
50
- params = [f'%{query}%']
51
-
52
- # Add game filter if specified
53
- if game_filter:
54
- base_query += " AND game_name = ?"
55
- params.append(game_filter)
56
-
57
- # Add sorting
58
- if sort_by == 'date_desc':
59
- base_query += " ORDER BY timestamp DESC"
60
- elif sort_by == 'date_asc':
61
- base_query += " ORDER BY timestamp ASC"
62
- elif sort_by == 'game_name':
63
- base_query += " ORDER BY game_name, timestamp DESC"
64
- else: # relevance - could be enhanced with proper scoring
65
- base_query += " ORDER BY timestamp DESC"
66
-
67
- # Get total count for pagination
68
- count_query = f"SELECT COUNT(*) FROM {GameLinesTable._table} WHERE line_text LIKE ?"
69
- count_params = [f'%{query}%']
70
- if game_filter:
71
- count_query += " AND game_name = ?"
72
- count_params.append(game_filter)
73
-
74
- total_results = GameLinesTable._db.fetchone(count_query, count_params)[0]
75
-
76
- # Add pagination
77
- offset = (page - 1) * page_size
78
- base_query += f" LIMIT ? OFFSET ?"
79
- params.extend([page_size, offset])
80
-
81
- # Execute search query
82
- rows = GameLinesTable._db.fetchall(base_query, params)
83
-
84
- # Format results
85
- results = []
86
- for row in rows:
87
- game_line = GameLinesTable.from_row(row)
88
- if game_line:
89
- results.append({
90
- 'id': game_line.id,
91
- 'sentence': game_line.line_text or '',
92
- 'game_name': game_line.game_name or 'Unknown Game',
93
- 'timestamp': float(game_line.timestamp) if game_line.timestamp else 0,
94
- 'translation': game_line.translation or None,
95
- 'has_audio': bool(game_line.audio_path),
96
- 'has_screenshot': bool(game_line.screenshot_path)
97
- })
98
-
99
- return jsonify({
100
- 'results': results,
101
- 'total': total_results,
102
- 'page': page,
103
- 'page_size': page_size,
104
- 'total_pages': (total_results + page_size - 1) // page_size
105
- }), 200
48
+
49
+ if use_regex:
50
+ # Regex search: fetch all candidate rows, filter in Python
51
+ try:
52
+ # Ensure query is a string
53
+ if not isinstance(query, str):
54
+ return jsonify({'error': 'Invalid query parameter type'}), 400
55
+
56
+ all_lines = GameLinesTable.all()
57
+ if game_filter:
58
+ all_lines = [line for line in all_lines if line.game_name == game_filter]
59
+
60
+ # Compile regex pattern with proper error handling
61
+ try:
62
+ pattern = re.compile(query, re.IGNORECASE)
63
+ except re.error as regex_err:
64
+ return jsonify({'error': f'Invalid regex pattern: {str(regex_err)}'}), 400
65
+
66
+ # Filter lines using regex
67
+ filtered_lines = []
68
+ for line in all_lines:
69
+ if line.line_text and isinstance(line.line_text, str):
70
+ try:
71
+ if pattern.search(line.line_text):
72
+ filtered_lines.append(line)
73
+ except Exception as search_err:
74
+ # Log but continue with other lines
75
+ logger.warning(f"Regex search error on line {line.id}: {search_err}")
76
+ continue
77
+
78
+ # Sorting (default: timestamp DESC, or as specified)
79
+ if sort_by == 'date_asc':
80
+ filtered_lines.sort(key=lambda l: float(l.timestamp) if l.timestamp else 0)
81
+ elif sort_by == 'game_name':
82
+ filtered_lines.sort(key=lambda l: (l.game_name or '', -(float(l.timestamp) if l.timestamp else 0)))
83
+ else: # date_desc or relevance
84
+ filtered_lines.sort(key=lambda l: -(float(l.timestamp) if l.timestamp else 0))
85
+
86
+ total_results = len(filtered_lines)
87
+ # Pagination
88
+ start = (page - 1) * page_size
89
+ end = start + page_size
90
+ paged_lines = filtered_lines[start:end]
91
+ results = []
92
+ for line in paged_lines:
93
+ results.append({
94
+ 'id': line.id,
95
+ 'sentence': line.line_text or '',
96
+ 'game_name': line.game_name or 'Unknown Game',
97
+ 'timestamp': float(line.timestamp) if line.timestamp else 0,
98
+ 'translation': line.translation or None,
99
+ 'has_audio': bool(getattr(line, 'audio_path', None)),
100
+ 'has_screenshot': bool(getattr(line, 'screenshot_path', None))
101
+ })
102
+ return jsonify({
103
+ 'results': results,
104
+ 'total': total_results,
105
+ 'page': page,
106
+ 'page_size': page_size,
107
+ 'total_pages': (total_results + page_size - 1) // page_size
108
+ }), 200
109
+ except Exception as e:
110
+ logger.error(f"Regex search failed: {e}")
111
+ return jsonify({'error': f'Search failed: {str(e)}'}), 500
112
+ else:
113
+ # Build the SQL query
114
+ base_query = f"SELECT * FROM {GameLinesTable._table} WHERE line_text LIKE ?"
115
+ params = [f'%{query}%']
116
+
117
+ # Add game filter if specified
118
+ if game_filter:
119
+ base_query += " AND game_name = ?"
120
+ params.append(game_filter)
121
+
122
+ # Add sorting
123
+ if sort_by == 'date_desc':
124
+ base_query += " ORDER BY timestamp DESC"
125
+ elif sort_by == 'date_asc':
126
+ base_query += " ORDER BY timestamp ASC"
127
+ elif sort_by == 'game_name':
128
+ base_query += " ORDER BY game_name, timestamp DESC"
129
+ else: # relevance - could be enhanced with proper scoring
130
+ base_query += " ORDER BY timestamp DESC"
131
+
132
+ # Get total count for pagination
133
+ count_query = f"SELECT COUNT(*) FROM {GameLinesTable._table} WHERE line_text LIKE ?"
134
+ count_params = [f'%{query}%']
135
+ if game_filter:
136
+ count_query += " AND game_name = ?"
137
+ count_params.append(game_filter)
138
+
139
+ total_results = GameLinesTable._db.fetchone(count_query, count_params)[0]
140
+
141
+ # Add pagination
142
+ offset = (page - 1) * page_size
143
+ base_query += f" LIMIT ? OFFSET ?"
144
+ params.extend([page_size, offset])
145
+
146
+ # Execute search query
147
+ rows = GameLinesTable._db.fetchall(base_query, params)
148
+
149
+ # Format results
150
+ results = []
151
+ for row in rows:
152
+ game_line = GameLinesTable.from_row(row)
153
+ if game_line:
154
+ results.append({
155
+ 'id': game_line.id,
156
+ 'sentence': game_line.line_text or '',
157
+ 'game_name': game_line.game_name or 'Unknown Game',
158
+ 'timestamp': float(game_line.timestamp) if game_line.timestamp else 0,
159
+ 'translation': game_line.translation or None,
160
+ 'has_audio': bool(game_line.audio_path),
161
+ 'has_screenshot': bool(game_line.screenshot_path)
162
+ })
163
+
164
+ return jsonify({
165
+ 'results': results,
166
+ 'total': total_results,
167
+ 'page': page,
168
+ 'page_size': page_size,
169
+ 'total_pages': (total_results + page_size - 1) // page_size
170
+ }), 200
106
171
 
107
172
  except ValueError as e:
108
173
  return jsonify({'error': 'Invalid pagination parameters'}), 400
@@ -229,14 +294,17 @@ def register_database_api_routes(app):
229
294
  @app.route('/api/settings', methods=['GET'])
230
295
  def api_get_settings():
231
296
  """
232
- Get current AFK timer, session gap, and streak requirement settings.
297
+ Get current AFK timer, session gap, streak requirement, and goal settings.
233
298
  """
234
299
  try:
235
- config = get_config()
300
+ config = get_stats_config()
236
301
  return jsonify({
237
- 'afk_timer_seconds': config.advanced.afk_timer_seconds,
238
- 'session_gap_seconds': config.advanced.session_gap_seconds,
239
- 'streak_requirement_hours': getattr(config.advanced, 'streak_requirement_hours', 1.0)
302
+ 'afk_timer_seconds': config.afk_timer_seconds,
303
+ 'session_gap_seconds': config.session_gap_seconds,
304
+ 'streak_requirement_hours': config.streak_requirement_hours,
305
+ 'reading_hours_target': config.reading_hours_target,
306
+ 'character_count_target': config.character_count_target,
307
+ 'games_target': config.games_target
240
308
  }), 200
241
309
  except Exception as e:
242
310
  logger.error(f"Error getting settings: {e}")
@@ -245,7 +313,7 @@ def register_database_api_routes(app):
245
313
  @app.route('/api/settings', methods=['POST'])
246
314
  def api_save_settings():
247
315
  """
248
- Save/update AFK timer, session gap, and streak requirement settings.
316
+ Save/update AFK timer, session gap, streak requirement, and goal settings.
249
317
  """
250
318
  try:
251
319
  data = request.get_json()
@@ -256,6 +324,9 @@ def register_database_api_routes(app):
256
324
  afk_timer = data.get('afk_timer_seconds')
257
325
  session_gap = data.get('session_gap_seconds')
258
326
  streak_requirement = data.get('streak_requirement_hours')
327
+ reading_hours_target = data.get('reading_hours_target')
328
+ character_count_target = data.get('character_count_target')
329
+ games_target = data.get('games_target')
259
330
 
260
331
  # Validate input - only require the settings that are provided
261
332
  settings_to_update = {}
@@ -287,22 +358,54 @@ def register_database_api_routes(app):
287
358
  except (ValueError, TypeError):
288
359
  return jsonify({'error': 'Streak requirement must be a valid number'}), 400
289
360
 
361
+ if reading_hours_target is not None:
362
+ try:
363
+ reading_hours_target = int(reading_hours_target)
364
+ if reading_hours_target < 1 or reading_hours_target > 10000:
365
+ return jsonify({'error': 'Reading hours target must be between 1 and 10,000 hours'}), 400
366
+ settings_to_update['reading_hours_target'] = reading_hours_target
367
+ except (ValueError, TypeError):
368
+ return jsonify({'error': 'Reading hours target must be a valid integer'}), 400
369
+
370
+ if character_count_target is not None:
371
+ try:
372
+ character_count_target = int(character_count_target)
373
+ if character_count_target < 1000 or character_count_target > 1000000000:
374
+ return jsonify({'error': 'Character count target must be between 1,000 and 1,000,000,000 characters'}), 400
375
+ settings_to_update['character_count_target'] = character_count_target
376
+ except (ValueError, TypeError):
377
+ return jsonify({'error': 'Character count target must be a valid integer'}), 400
378
+
379
+ if games_target is not None:
380
+ try:
381
+ games_target = int(games_target)
382
+ if games_target < 1 or games_target > 1000:
383
+ return jsonify({'error': 'Games target must be between 1 and 1,000'}), 400
384
+ settings_to_update['games_target'] = games_target
385
+ except (ValueError, TypeError):
386
+ return jsonify({'error': 'Games target must be a valid integer'}), 400
387
+
290
388
  if not settings_to_update:
291
389
  return jsonify({'error': 'No valid settings provided'}), 400
292
390
 
293
391
  # Update configuration
294
- config = get_config()
392
+ config = get_stats_config()
295
393
 
296
394
  if 'afk_timer_seconds' in settings_to_update:
297
- config.advanced.afk_timer_seconds = settings_to_update['afk_timer_seconds']
395
+ config.afk_timer_seconds = settings_to_update['afk_timer_seconds']
298
396
  if 'session_gap_seconds' in settings_to_update:
299
- config.advanced.session_gap_seconds = settings_to_update['session_gap_seconds']
397
+ config.session_gap_seconds = settings_to_update['session_gap_seconds']
300
398
  if 'streak_requirement_hours' in settings_to_update:
301
- setattr(config.advanced, 'streak_requirement_hours', settings_to_update['streak_requirement_hours'])
302
-
303
- # Save configuration
304
- save_current_config(config)
305
-
399
+ config.streak_requirement_hours = settings_to_update['streak_requirement_hours']
400
+ if 'reading_hours_target' in settings_to_update:
401
+ config.reading_hours_target = settings_to_update['reading_hours_target']
402
+ if 'character_count_target' in settings_to_update:
403
+ config.character_count_target = settings_to_update['character_count_target']
404
+ if 'games_target' in settings_to_update:
405
+ config.games_target = settings_to_update['games_target']
406
+
407
+ save_stats_config(config)
408
+
306
409
  logger.info(f"Settings updated: {settings_to_update}")
307
410
 
308
411
  response_data = {'message': 'Settings saved successfully'}
@@ -688,7 +791,7 @@ def register_database_api_routes(app):
688
791
  Provides aggregated, cumulative stats for charting.
689
792
  Accepts optional 'year' parameter to filter heatmap data.
690
793
  """
691
- punctionation_regex = regex.compile(r'[\p{P}\p{S}]')
794
+ punctionation_regex = regex.compile(r'[\p{P}\p{S}\p{Z}]')
692
795
  # Get optional year filter parameter
693
796
  filter_year = request.args.get('year', None)
694
797
 
@@ -712,14 +815,22 @@ def register_database_api_routes(app):
712
815
  # end_time = time.perf_counter()
713
816
  # logger.info(f"Without Punctuation removal and daily aggregation took {end_time - start_time:.4f} seconds for {len(all_lines)} lines")
714
817
 
818
+ # start_time = time.perf_counter()
819
+ wrong_instance_found = False
715
820
  for line in all_lines:
716
821
  day_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
717
822
  game = line.game_name or "Unknown Game"
718
823
  # Remove punctuation and symbols from line text before counting characters
719
- clean_text = punctionation_regex.sub('', line.line_text) if line.line_text else ''
824
+ clean_text = punctionation_regex.sub('', str(line.line_text)) if line.line_text else ''
825
+ if not isinstance(clean_text, str) and not wrong_instance_found:
826
+ logger.info(f"Non-string line_text encountered: {clean_text} (type: {type(clean_text)})")
827
+ wrong_instance_found = True
828
+
720
829
  line.line_text = clean_text # Update line text to cleaned version for future use
721
830
  daily_data[day_str][game]['lines'] += 1
722
831
  daily_data[day_str][game]['chars'] += len(clean_text)
832
+ # end_time = time.perf_counter()
833
+ # logger.info(f"With Punctuation removal and daily aggregation took {end_time - start_time:.4f} seconds for {len(all_lines)} lines")
723
834
 
724
835
  # 3. Create cumulative datasets for Chart.js
725
836
  sorted_days = sorted(daily_data.keys())
@@ -867,21 +978,24 @@ def register_database_api_routes(app):
867
978
  imported_lines = []
868
979
  games_set = set()
869
980
  errors = []
870
- seen_uuids = set() # Track UUIDs within this import batch
871
-
981
+ seen_uuids = set() # Track UUIDs + Line within import batch
982
+
983
+ def get_line_hash(uuid: str, line_text: str) -> str:
984
+ return uuid + '|' + line_text.strip()
985
+
872
986
  for row_num, row in enumerate(csv_reader):
873
987
  try:
874
988
  # Extract and validate required fields
875
- uuid = row.get('uuid', '').strip()
876
- name = row.get('name', '').strip()
989
+ game_uuid = row.get('uuid', '').strip()
990
+ game_name = row.get('name', '').strip()
877
991
  line = row.get('line', '').strip()
878
992
  time_str = row.get('time', '').strip()
879
993
 
880
994
  # Validate required fields
881
- if not uuid:
995
+ if not game_uuid:
882
996
  errors.append(f"Row {row_num}: Missing UUID")
883
997
  continue
884
- if not name:
998
+ if not game_name:
885
999
  errors.append(f"Row {row_num}: Missing name")
886
1000
  continue
887
1001
  if not line:
@@ -891,12 +1005,12 @@ def register_database_api_routes(app):
891
1005
  errors.append(f"Row {row_num}: Missing time")
892
1006
  continue
893
1007
 
894
- # Check for duplicates within this import batch
895
- if uuid in seen_uuids:
896
- logger.info(f"Skipping duplicate UUID within import batch: {uuid}")
1008
+ line_hash = get_line_hash(game_uuid, line)
1009
+ if line_hash in seen_uuids:
1010
+ logger.info(f"Skipping duplicate line from game UUID {game_uuid} in import batch")
897
1011
  continue
898
- seen_uuids.add(uuid)
899
-
1012
+ seen_uuids.add(line_hash)
1013
+
900
1014
  # Convert time to timestamp
901
1015
  try:
902
1016
  timestamp = float(time_str)
@@ -907,22 +1021,22 @@ def register_database_api_routes(app):
907
1021
  # Clean up line text (remove extra whitespace and newlines)
908
1022
  line_text = line.strip()
909
1023
 
910
- # Check if this UUID already exists in database
911
- existing_line = GameLinesTable.get(uuid)
1024
+ # Check if this line already exists in database
1025
+ existing_line = GameLinesTable.get(line_hash)
912
1026
  if existing_line:
913
- logger.info(f"Skipping duplicate UUID already in database: {uuid}")
1027
+ logger.info(f"Skipping duplicate UUID already in database: {line_hash}")
914
1028
  continue
915
1029
 
916
1030
  # Create GameLinesTable entry
917
1031
  game_line = GameLinesTable(
918
- id=uuid,
919
- game_name=name,
1032
+ id=line_hash,
1033
+ game_name=game_name,
920
1034
  line_text=line_text,
921
1035
  timestamp=timestamp
922
1036
  )
923
1037
 
924
1038
  imported_lines.append(game_line)
925
- games_set.add(name)
1039
+ games_set.add(game_name)
926
1040
 
927
1041
  except Exception as e:
928
1042
  errors.append(f"Row {row_num}: Error processing row - {str(e)}")