GameSentenceMiner 2.18.16__py3-none-any.whl → 2.18.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -111,8 +111,9 @@ def post_process(text, keep_blank_lines=False):
111
111
  text = '\n'.join([''.join(i.split()) for i in text.splitlines()])
112
112
  else:
113
113
  text = ''.join([''.join(i.split()) for i in text.splitlines()])
114
- text = text.replace('…', '...')
115
- text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
114
+ text = text.replace('…', '・・・')
115
+ text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '', text)
116
+ text = re.sub(r'・{3,}', '・・・', text)
116
117
  text = jaconv.h2z(text, ascii=True, digit=True)
117
118
  return text
118
119
 
@@ -1392,7 +1392,11 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
1392
1392
  # print(engine_index)
1393
1393
 
1394
1394
  if res:
1395
- text = do_configured_ocr_replacements(text)
1395
+ if isinstance(text, list):
1396
+ for i, line in enumerate(text):
1397
+ text[i] = do_configured_ocr_replacements(line)
1398
+ else:
1399
+ text = do_configured_ocr_replacements(text)
1396
1400
  if filtering:
1397
1401
  text, orig_text = filtering(text, last_result, engine=engine, is_second_ocr=is_second_ocr)
1398
1402
  if get_ocr_language() == "ja" or get_ocr_language() == "zh":
@@ -888,15 +888,16 @@ class Config:
888
888
  if profile.advanced.streak_requirement_hours != default_stats.streak_requirement_hours:
889
889
  self.stats.streak_requirement_hours = profile.advanced.streak_requirement_hours
890
890
 
891
+ self.overlay = self.get_config().overlay
892
+
891
893
  # Add a way to migrate certain things based on version if needed, also help with better defaults
892
894
  if self.version:
893
- if self.version != get_current_version():
895
+ current_version = get_current_version()
896
+ if self.version != current_version:
894
897
  from packaging import version
895
- logger.info(f"New Config Found: {self.version} != {get_current_version()}")
898
+ logger.info(f"New Config Found: {self.version} != {current_version}")
896
899
  # Handle version mismatch
897
- changed = False
898
900
  if version.parse(self.version) < version.parse("2.18.0"):
899
- changed = True
900
901
  # Example, doesn't need to be done
901
902
  for profile in self.configs.values():
902
903
  profile.obs.get_game_from_scene = True
@@ -904,11 +905,7 @@ class Config:
904
905
  if profile.vad.selected_vad_model == WHISPER and profile.vad.backup_vad_model == SILERO:
905
906
  profile.vad.backup_vad_model = OFF
906
907
 
907
- if changed:
908
- self.save()
909
- self.overlay = self.get_config().overlay
910
-
911
- self.version = get_current_version()
908
+ self.save()
912
909
 
913
910
  def save(self):
914
911
  with open(get_config_path(), 'w') as file:
@@ -15,10 +15,14 @@ import uuid
15
15
  import pytz
16
16
  from datetime import timedelta
17
17
 
18
+ import regex
19
+
18
20
  from GameSentenceMiner.util.text_log import GameLine
19
21
  from GameSentenceMiner.util.configuration import get_stats_config, logger, is_dev
20
22
  import gzip
21
23
 
24
+ # Matches any Unicode punctuation (\p{P}), symbol (\p{S}), or separator (\p{Z}); \p{Z} includes whitespace/separator chars
25
+ punctuation_regex = regex.compile(r'[\p{P}\p{S}\p{Z}]')
22
26
 
23
27
  class SQLiteDB:
24
28
  """
@@ -136,7 +140,7 @@ class SQLiteDBTable:
136
140
  return cls.from_row(row) if row else None
137
141
 
138
142
  @classmethod
139
- def from_row(cls: Type[T], row: Tuple) -> T:
143
+ def from_row(cls: Type[T], row: Tuple, clean_columns: list = []) -> T:
140
144
  if not row:
141
145
  return None
142
146
  obj = cls()
@@ -162,6 +166,9 @@ class SQLiteDBTable:
162
166
  field = expected_fields[expected_pos]
163
167
  field_type = cls._types[expected_pos]
164
168
 
169
+ if field in clean_columns and isinstance(row_value, str):
170
+ row_value = punctuation_regex.sub('', row_value).strip()
171
+
165
172
  cls._set_field_value(obj, field, field_type, row_value, expected_pos == 0 and field == cls._pk)
166
173
 
167
174
  except Exception as e:
@@ -483,6 +490,12 @@ class GameLinesTable(SQLiteDBTable):
483
490
  self.translation = translation if translation is not None else ''
484
491
  self.original_game_name = original_game_name if original_game_name is not None else ''
485
492
  self.game_id = game_id if game_id is not None else ''
493
+
494
+ @classmethod
495
+ def all(cls, for_stats: bool = False) -> List['GameLinesTable']:
496
+ rows = cls._db.fetchall(f"SELECT * FROM {cls._table}")
497
+ clean_columns = ['line_text'] if for_stats else []
498
+ return [cls.from_row(row, clean_columns=clean_columns) for row in rows]
486
499
 
487
500
  @classmethod
488
501
  def get_all_lines_for_scene(cls, game_name: str) -> List['GameLinesTable']:
@@ -539,7 +552,7 @@ class GameLinesTable(SQLiteDBTable):
539
552
  )
540
553
 
541
554
  @classmethod
542
- def get_lines_filtered_by_timestamp(cls, start: Optional[float] = None, end: Optional[float] = None) -> List['GameLinesTable']:
555
+ def get_lines_filtered_by_timestamp(cls, start: Optional[float] = None, end: Optional[float] = None, for_stats=False) -> List['GameLinesTable']:
543
556
  """
544
557
  Fetches all lines optionally filtered by start and end timestamps.
545
558
  If start or end is None, that bound is ignored.
@@ -565,8 +578,9 @@ class GameLinesTable(SQLiteDBTable):
565
578
 
566
579
  # Execute the query
567
580
  rows = cls._db.fetchall(query, tuple(params))
568
- return [cls.from_row(row) for row in rows]
569
-
581
+ clean_columns = ['line_text'] if for_stats else []
582
+ return [cls.from_row(row, clean_columns=clean_columns) for row in rows]
583
+
570
584
  class StatsRollupTable(SQLiteDBTable):
571
585
  _table = 'stats_rollup'
572
586
  _fields = ['date', 'games_played', 'lines_mined', 'anki_cards_created', 'time_spent_mining']
@@ -24,7 +24,6 @@ from GameSentenceMiner.web.stats import (
24
24
  calculate_hourly_reading_speed, calculate_peak_daily_stats, calculate_peak_session_stats
25
25
  )
26
26
 
27
-
28
27
  def register_database_api_routes(app):
29
28
  """Register all database API routes with the Flask app."""
30
29
 
@@ -1069,7 +1068,6 @@ def register_database_api_routes(app):
1069
1068
  Accepts optional 'year' parameter to filter heatmap data.
1070
1069
  """
1071
1070
  try:
1072
- punctionation_regex = regex.compile(r'[\p{P}\p{S}\p{Z}]')
1073
1071
  # Get optional year filter parameter
1074
1072
  filter_year = request.args.get('year', None)
1075
1073
 
@@ -1082,7 +1080,7 @@ def register_database_api_routes(app):
1082
1080
  end_timestamp = float(end_timestamp) if end_timestamp else None
1083
1081
 
1084
1082
  # 1. Fetch all lines and sort them chronologically
1085
- all_lines = GameLinesTable.get_lines_filtered_by_timestamp(start=start_timestamp, end=end_timestamp)
1083
+ all_lines = GameLinesTable.get_lines_filtered_by_timestamp(start=start_timestamp, end=end_timestamp, for_stats=True)
1086
1084
 
1087
1085
  if not all_lines:
1088
1086
  return jsonify({"labels": [], "datasets": []})
@@ -1095,14 +1093,12 @@ def register_database_api_routes(app):
1095
1093
  day_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
1096
1094
  game = line.game_name or "Unknown Game"
1097
1095
  # Remove punctuation and symbols from line text before counting characters
1098
- clean_text = punctionation_regex.sub('', str(line.line_text)) if line.line_text else ''
1099
- if not isinstance(clean_text, str) and not wrong_instance_found:
1100
- logger.info(f"Non-string line_text encountered: {clean_text} (type: {type(clean_text)})")
1096
+ if not isinstance(line.line_text, str) and not wrong_instance_found:
1097
+ logger.info(f"Non-string line_text encountered: {line.line_text} (type: {type(line.line_text)})")
1101
1098
  wrong_instance_found = True
1102
1099
 
1103
- line.line_text = clean_text # Update line text to cleaned version for future use
1104
1100
  daily_data[day_str][game]['lines'] += 1
1105
- daily_data[day_str][game]['chars'] += len(clean_text)
1101
+ daily_data[day_str][game]['chars'] += len(line.line_text)
1106
1102
 
1107
1103
  # 3. Create cumulative datasets for Chart.js
1108
1104
  sorted_days = sorted(daily_data.keys())
@@ -1318,7 +1314,7 @@ def register_database_api_routes(app):
1318
1314
  today = datetime.date.today()
1319
1315
 
1320
1316
  # Get all lines for overall progress
1321
- all_lines = GameLinesTable.all()
1317
+ all_lines = GameLinesTable.all(for_stats=True)
1322
1318
  if not all_lines:
1323
1319
  return jsonify({
1324
1320
  'hours': {'required': 0, 'progress': 0, 'has_target': False},
@@ -1428,7 +1424,7 @@ def register_database_api_routes(app):
1428
1424
  thirty_days_ago = today - datetime.timedelta(days=30)
1429
1425
 
1430
1426
  # Get all lines
1431
- all_lines = GameLinesTable.all()
1427
+ all_lines = GameLinesTable.all(for_stats=True)
1432
1428
  if not all_lines:
1433
1429
  return jsonify({
1434
1430
  'hours': {'projection': 0, 'daily_average': 0},
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.18.16
4
- Summary: A tool for mining sentences from games. Update: Overlay?
3
+ Version: 2.18.18
4
+ Summary: A tool for mining sentences from games. Fix Goals Char Count
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
7
7
  Project-URL: Homepage, https://github.com/bpwhelan/GameSentenceMiner
@@ -27,8 +27,8 @@ GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9
27
27
  GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
28
28
  GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
29
29
  GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
30
- GameSentenceMiner/owocr/owocr/ocr.py,sha256=8cqZEUF90UlV3jBIvxKBga6YBFGjNBCVu1UiBcwISG0,72215
31
- GameSentenceMiner/owocr/owocr/run.py,sha256=Z7VkoFrsoQbMTHc6CmwpcMzsOROK9A_RJRwhlxw15oA,81871
30
+ GameSentenceMiner/owocr/owocr/ocr.py,sha256=XR6tbcj8ctDXn8NlpXrRZIel60zj2h3R0NKWBtEE5M4,72273
31
+ GameSentenceMiner/owocr/owocr/run.py,sha256=z3EaF_a5m9T_ZrELYoaAzHPqzTO0cd7MQCndcnWXq_4,82035
32
32
  GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
33
33
  GameSentenceMiner/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  GameSentenceMiner/tools/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
@@ -42,8 +42,8 @@ GameSentenceMiner/ui/furigana_filter_preview.py,sha256=DAT2-j6vSDHr9ufk6PiaLikEs
42
42
  GameSentenceMiner/ui/screenshot_selector.py,sha256=7QvDhOMpA0ej8x_lYtu6fhmrWbM1GCg-dps3XVWwk1Q,8234
43
43
  GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  GameSentenceMiner/util/audio_player.py,sha256=-yFsf0qoTSS1ga5rCmEJZJGUSJzXCvfZHY3t0NxycDk,7896
45
- GameSentenceMiner/util/configuration.py,sha256=8cXpeUJ4hOr8Qd4JPAx1qn_phBIFuKR5D-PybThu-Qk,48233
46
- GameSentenceMiner/util/db.py,sha256=CneZuFGIH6fosHblly4lcrWfU0Qjj5l0coxJy7m1igw,32237
45
+ GameSentenceMiner/util/configuration.py,sha256=qndhFAN4oC1dawklllS3UBhK2DCVSTloGdZxDoTUGr4,48137
46
+ GameSentenceMiner/util/db.py,sha256=FQUvMHcQv_bRNE9LfrsFIxXXXog8BhOA1t4mr_UXoiI,33019
47
47
  GameSentenceMiner/util/electron_config.py,sha256=KfeJToeFFVw0IR5MKa-gBzpzaGrU-lyJbR9z-sDEHYU,8767
48
48
  GameSentenceMiner/util/ffmpeg.py,sha256=cAzztfY36Xf2WvsJDjavoiMOvA9ac2GVdCrSB4LzHk4,29007
49
49
  GameSentenceMiner/util/games_table.py,sha256=VM68MAsdyE6tpdwM4bDSk67qioBOvsEO8-TpnRmUnSo,12003
@@ -63,7 +63,7 @@ GameSentenceMiner/util/win10toast/__init__.py,sha256=6TL2w6rzNmpJEp6_v2cAJP_7ExA
63
63
  GameSentenceMiner/util/win10toast/__main__.py,sha256=5MYnBcFj8y_6Dyc1kiPd0_FsUuh4yl1cv5wsleU6V4w,668
64
64
  GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
65
  GameSentenceMiner/web/anki_api_endpoints.py,sha256=r30OTT3YVfgbF6aJ-EGWZLF-j2D9L63jLkRXMycU0p8,23681
66
- GameSentenceMiner/web/database_api.py,sha256=GyiMZWiT9q7fzA7D26YYE73DnAj3jCk9KUGT1P349y4,89996
66
+ GameSentenceMiner/web/database_api.py,sha256=fLpVMZLn-LNwm-M0jNZLDSGH9dV6ml4zn28hNPHcOKY,89783
67
67
  GameSentenceMiner/web/events.py,sha256=6Vyz5c9MdpMIa7Zqljqhap2XFQnAVYJ0CdQV64TSZsA,5119
68
68
  GameSentenceMiner/web/gsm_websocket.py,sha256=B0VKpxmsRu0WRh5nFWlpDPBQ6-K2ed7TEIa0O6YWeoo,4166
69
69
  GameSentenceMiner/web/service.py,sha256=6cgUmDgtp3ZKzuPFszowjPoq-BDtC1bS3ux6sykeaqo,6662
@@ -135,9 +135,9 @@ GameSentenceMiner/web/templates/components/kanji_grid/thousand_character_classic
135
135
  GameSentenceMiner/web/templates/components/kanji_grid/wanikani_levels.json,sha256=8wjnnaYQqmho6t5tMxrIAc03512A2tYhQh5dfsQnfAM,11372
136
136
  GameSentenceMiner/web/templates/components/kanji_grid/words_hk_frequency_list.json,sha256=wRkqZNPzz6DT9OTPHpXwfqW96Qb96stCQNNgOL-ZdKk,17535
137
137
  GameSentenceMiner/wip/__init___.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
- gamesentenceminer-2.18.16.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
139
- gamesentenceminer-2.18.16.dist-info/METADATA,sha256=X02yygMguMepPkrv_08yYZLA9v_SzhAwbwAfKFHhJ4U,7488
140
- gamesentenceminer-2.18.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
- gamesentenceminer-2.18.16.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
142
- gamesentenceminer-2.18.16.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
143
- gamesentenceminer-2.18.16.dist-info/RECORD,,
138
+ gamesentenceminer-2.18.18.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
139
+ gamesentenceminer-2.18.18.dist-info/METADATA,sha256=Iyd8Jbyw4qbRxOd8MQD2ABDh0JP2v_qwqEeK69nrefE,7492
140
+ gamesentenceminer-2.18.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
141
+ gamesentenceminer-2.18.18.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
142
+ gamesentenceminer-2.18.18.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
143
+ gamesentenceminer-2.18.18.dist-info/RECORD,,