GameSentenceMiner 2.18.16__py3-none-any.whl → 2.18.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/owocr/owocr/ocr.py +3 -2
- GameSentenceMiner/owocr/owocr/run.py +5 -1
- GameSentenceMiner/util/configuration.py +6 -9
- GameSentenceMiner/util/db.py +18 -4
- GameSentenceMiner/web/database_api.py +6 -10
- {gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/METADATA +2 -2
- {gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/RECORD +11 -11
- {gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/top_level.txt +0 -0
|
@@ -111,8 +111,9 @@ def post_process(text, keep_blank_lines=False):
|
|
|
111
111
|
text = '\n'.join([''.join(i.split()) for i in text.splitlines()])
|
|
112
112
|
else:
|
|
113
113
|
text = ''.join([''.join(i.split()) for i in text.splitlines()])
|
|
114
|
-
text = text.replace('…', '
|
|
115
|
-
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '
|
|
114
|
+
text = text.replace('…', '・・・')
|
|
115
|
+
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '・', text)
|
|
116
|
+
text = re.sub(r'・{3,}', '・・・', text)
|
|
116
117
|
text = jaconv.h2z(text, ascii=True, digit=True)
|
|
117
118
|
return text
|
|
118
119
|
|
|
@@ -1392,7 +1392,11 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
|
1392
1392
|
# print(engine_index)
|
|
1393
1393
|
|
|
1394
1394
|
if res:
|
|
1395
|
-
|
|
1395
|
+
if isinstance(text, list):
|
|
1396
|
+
for i, line in enumerate(text):
|
|
1397
|
+
text[i] = do_configured_ocr_replacements(line)
|
|
1398
|
+
else:
|
|
1399
|
+
text = do_configured_ocr_replacements(text)
|
|
1396
1400
|
if filtering:
|
|
1397
1401
|
text, orig_text = filtering(text, last_result, engine=engine, is_second_ocr=is_second_ocr)
|
|
1398
1402
|
if get_ocr_language() == "ja" or get_ocr_language() == "zh":
|
|
@@ -888,15 +888,16 @@ class Config:
|
|
|
888
888
|
if profile.advanced.streak_requirement_hours != default_stats.streak_requirement_hours:
|
|
889
889
|
self.stats.streak_requirement_hours = profile.advanced.streak_requirement_hours
|
|
890
890
|
|
|
891
|
+
self.overlay = self.get_config().overlay
|
|
892
|
+
|
|
891
893
|
# Add a way to migrate certain things based on version if needed, also help with better defaults
|
|
892
894
|
if self.version:
|
|
893
|
-
|
|
895
|
+
current_version = get_current_version()
|
|
896
|
+
if self.version != current_version:
|
|
894
897
|
from packaging import version
|
|
895
|
-
logger.info(f"New Config Found: {self.version} != {
|
|
898
|
+
logger.info(f"New Config Found: {self.version} != {current_version}")
|
|
896
899
|
# Handle version mismatch
|
|
897
|
-
changed = False
|
|
898
900
|
if version.parse(self.version) < version.parse("2.18.0"):
|
|
899
|
-
changed = True
|
|
900
901
|
# Example, doesn't need to be done
|
|
901
902
|
for profile in self.configs.values():
|
|
902
903
|
profile.obs.get_game_from_scene = True
|
|
@@ -904,11 +905,7 @@ class Config:
|
|
|
904
905
|
if profile.vad.selected_vad_model == WHISPER and profile.vad.backup_vad_model == SILERO:
|
|
905
906
|
profile.vad.backup_vad_model = OFF
|
|
906
907
|
|
|
907
|
-
|
|
908
|
-
self.save()
|
|
909
|
-
self.overlay = self.get_config().overlay
|
|
910
|
-
|
|
911
|
-
self.version = get_current_version()
|
|
908
|
+
self.save()
|
|
912
909
|
|
|
913
910
|
def save(self):
|
|
914
911
|
with open(get_config_path(), 'w') as file:
|
GameSentenceMiner/util/db.py
CHANGED
|
@@ -15,10 +15,14 @@ import uuid
|
|
|
15
15
|
import pytz
|
|
16
16
|
from datetime import timedelta
|
|
17
17
|
|
|
18
|
+
import regex
|
|
19
|
+
|
|
18
20
|
from GameSentenceMiner.util.text_log import GameLine
|
|
19
21
|
from GameSentenceMiner.util.configuration import get_stats_config, logger, is_dev
|
|
20
22
|
import gzip
|
|
21
23
|
|
|
24
|
+
# Matches any Unicode punctuation (\p{P}), symbol (\p{S}), or separator (\p{Z}); \p{Z} includes whitespace/separator chars
|
|
25
|
+
punctuation_regex = regex.compile(r'[\p{P}\p{S}\p{Z}]')
|
|
22
26
|
|
|
23
27
|
class SQLiteDB:
|
|
24
28
|
"""
|
|
@@ -136,7 +140,7 @@ class SQLiteDBTable:
|
|
|
136
140
|
return cls.from_row(row) if row else None
|
|
137
141
|
|
|
138
142
|
@classmethod
|
|
139
|
-
def from_row(cls: Type[T], row: Tuple) -> T:
|
|
143
|
+
def from_row(cls: Type[T], row: Tuple, clean_columns: list = []) -> T:
|
|
140
144
|
if not row:
|
|
141
145
|
return None
|
|
142
146
|
obj = cls()
|
|
@@ -162,6 +166,9 @@ class SQLiteDBTable:
|
|
|
162
166
|
field = expected_fields[expected_pos]
|
|
163
167
|
field_type = cls._types[expected_pos]
|
|
164
168
|
|
|
169
|
+
if field in clean_columns and isinstance(row_value, str):
|
|
170
|
+
row_value = punctuation_regex.sub('', row_value).strip()
|
|
171
|
+
|
|
165
172
|
cls._set_field_value(obj, field, field_type, row_value, expected_pos == 0 and field == cls._pk)
|
|
166
173
|
|
|
167
174
|
except Exception as e:
|
|
@@ -483,6 +490,12 @@ class GameLinesTable(SQLiteDBTable):
|
|
|
483
490
|
self.translation = translation if translation is not None else ''
|
|
484
491
|
self.original_game_name = original_game_name if original_game_name is not None else ''
|
|
485
492
|
self.game_id = game_id if game_id is not None else ''
|
|
493
|
+
|
|
494
|
+
@classmethod
|
|
495
|
+
def all(cls, for_stats: bool = False) -> List['GameLinesTable']:
|
|
496
|
+
rows = cls._db.fetchall(f"SELECT * FROM {cls._table}")
|
|
497
|
+
clean_columns = ['line_text'] if for_stats else []
|
|
498
|
+
return [cls.from_row(row, clean_columns=clean_columns) for row in rows]
|
|
486
499
|
|
|
487
500
|
@classmethod
|
|
488
501
|
def get_all_lines_for_scene(cls, game_name: str) -> List['GameLinesTable']:
|
|
@@ -539,7 +552,7 @@ class GameLinesTable(SQLiteDBTable):
|
|
|
539
552
|
)
|
|
540
553
|
|
|
541
554
|
@classmethod
|
|
542
|
-
def get_lines_filtered_by_timestamp(cls, start: Optional[float] = None, end: Optional[float] = None) -> List['GameLinesTable']:
|
|
555
|
+
def get_lines_filtered_by_timestamp(cls, start: Optional[float] = None, end: Optional[float] = None, for_stats=False) -> List['GameLinesTable']:
|
|
543
556
|
"""
|
|
544
557
|
Fetches all lines optionally filtered by start and end timestamps.
|
|
545
558
|
If start or end is None, that bound is ignored.
|
|
@@ -565,8 +578,9 @@ class GameLinesTable(SQLiteDBTable):
|
|
|
565
578
|
|
|
566
579
|
# Execute the query
|
|
567
580
|
rows = cls._db.fetchall(query, tuple(params))
|
|
568
|
-
|
|
569
|
-
|
|
581
|
+
clean_columns = ['line_text'] if for_stats else []
|
|
582
|
+
return [cls.from_row(row, clean_columns=clean_columns) for row in rows]
|
|
583
|
+
|
|
570
584
|
class StatsRollupTable(SQLiteDBTable):
|
|
571
585
|
_table = 'stats_rollup'
|
|
572
586
|
_fields = ['date', 'games_played', 'lines_mined', 'anki_cards_created', 'time_spent_mining']
|
|
@@ -24,7 +24,6 @@ from GameSentenceMiner.web.stats import (
|
|
|
24
24
|
calculate_hourly_reading_speed, calculate_peak_daily_stats, calculate_peak_session_stats
|
|
25
25
|
)
|
|
26
26
|
|
|
27
|
-
|
|
28
27
|
def register_database_api_routes(app):
|
|
29
28
|
"""Register all database API routes with the Flask app."""
|
|
30
29
|
|
|
@@ -1069,7 +1068,6 @@ def register_database_api_routes(app):
|
|
|
1069
1068
|
Accepts optional 'year' parameter to filter heatmap data.
|
|
1070
1069
|
"""
|
|
1071
1070
|
try:
|
|
1072
|
-
punctionation_regex = regex.compile(r'[\p{P}\p{S}\p{Z}]')
|
|
1073
1071
|
# Get optional year filter parameter
|
|
1074
1072
|
filter_year = request.args.get('year', None)
|
|
1075
1073
|
|
|
@@ -1082,7 +1080,7 @@ def register_database_api_routes(app):
|
|
|
1082
1080
|
end_timestamp = float(end_timestamp) if end_timestamp else None
|
|
1083
1081
|
|
|
1084
1082
|
# 1. Fetch all lines and sort them chronologically
|
|
1085
|
-
all_lines = GameLinesTable.get_lines_filtered_by_timestamp(start=start_timestamp, end=end_timestamp)
|
|
1083
|
+
all_lines = GameLinesTable.get_lines_filtered_by_timestamp(start=start_timestamp, end=end_timestamp, for_stats=True)
|
|
1086
1084
|
|
|
1087
1085
|
if not all_lines:
|
|
1088
1086
|
return jsonify({"labels": [], "datasets": []})
|
|
@@ -1095,14 +1093,12 @@ def register_database_api_routes(app):
|
|
|
1095
1093
|
day_str = datetime.date.fromtimestamp(float(line.timestamp)).strftime('%Y-%m-%d')
|
|
1096
1094
|
game = line.game_name or "Unknown Game"
|
|
1097
1095
|
# Remove punctuation and symbols from line text before counting characters
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
logger.info(f"Non-string line_text encountered: {clean_text} (type: {type(clean_text)})")
|
|
1096
|
+
if not isinstance(line.line_text, str) and not wrong_instance_found:
|
|
1097
|
+
logger.info(f"Non-string line_text encountered: {line.line_text} (type: {type(line.line_text)})")
|
|
1101
1098
|
wrong_instance_found = True
|
|
1102
1099
|
|
|
1103
|
-
line.line_text = clean_text # Update line text to cleaned version for future use
|
|
1104
1100
|
daily_data[day_str][game]['lines'] += 1
|
|
1105
|
-
daily_data[day_str][game]['chars'] += len(
|
|
1101
|
+
daily_data[day_str][game]['chars'] += len(line.line_text)
|
|
1106
1102
|
|
|
1107
1103
|
# 3. Create cumulative datasets for Chart.js
|
|
1108
1104
|
sorted_days = sorted(daily_data.keys())
|
|
@@ -1318,7 +1314,7 @@ def register_database_api_routes(app):
|
|
|
1318
1314
|
today = datetime.date.today()
|
|
1319
1315
|
|
|
1320
1316
|
# Get all lines for overall progress
|
|
1321
|
-
all_lines = GameLinesTable.all()
|
|
1317
|
+
all_lines = GameLinesTable.all(for_stats=True)
|
|
1322
1318
|
if not all_lines:
|
|
1323
1319
|
return jsonify({
|
|
1324
1320
|
'hours': {'required': 0, 'progress': 0, 'has_target': False},
|
|
@@ -1428,7 +1424,7 @@ def register_database_api_routes(app):
|
|
|
1428
1424
|
thirty_days_ago = today - datetime.timedelta(days=30)
|
|
1429
1425
|
|
|
1430
1426
|
# Get all lines
|
|
1431
|
-
all_lines = GameLinesTable.all()
|
|
1427
|
+
all_lines = GameLinesTable.all(for_stats=True)
|
|
1432
1428
|
if not all_lines:
|
|
1433
1429
|
return jsonify({
|
|
1434
1430
|
'hours': {'projection': 0, 'daily_average': 0},
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: GameSentenceMiner
|
|
3
|
-
Version: 2.18.
|
|
4
|
-
Summary: A tool for mining sentences from games.
|
|
3
|
+
Version: 2.18.18
|
|
4
|
+
Summary: A tool for mining sentences from games. Fix Goals Char Count
|
|
5
5
|
Author-email: Beangate <bpwhelan95@gmail.com>
|
|
6
6
|
License: MIT License
|
|
7
7
|
Project-URL: Homepage, https://github.com/bpwhelan/GameSentenceMiner
|
|
@@ -27,8 +27,8 @@ GameSentenceMiner/owocr/owocr/__init__.py,sha256=87hfN5u_PbL_onLfMACbc0F5j4KyIK9
|
|
|
27
27
|
GameSentenceMiner/owocr/owocr/__main__.py,sha256=XQaqZY99EKoCpU-gWQjNbTs7Kg17HvBVE7JY8LqIE0o,157
|
|
28
28
|
GameSentenceMiner/owocr/owocr/config.py,sha256=qM7kISHdUhuygGXOxmgU6Ef2nwBShrZtdqu4InDCViE,8103
|
|
29
29
|
GameSentenceMiner/owocr/owocr/lens_betterproto.py,sha256=oNoISsPilVVRBBPVDtb4-roJtAhp8ZAuFTci3TGXtMc,39141
|
|
30
|
-
GameSentenceMiner/owocr/owocr/ocr.py,sha256=
|
|
31
|
-
GameSentenceMiner/owocr/owocr/run.py,sha256=
|
|
30
|
+
GameSentenceMiner/owocr/owocr/ocr.py,sha256=XR6tbcj8ctDXn8NlpXrRZIel60zj2h3R0NKWBtEE5M4,72273
|
|
31
|
+
GameSentenceMiner/owocr/owocr/run.py,sha256=z3EaF_a5m9T_ZrELYoaAzHPqzTO0cd7MQCndcnWXq_4,82035
|
|
32
32
|
GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py,sha256=Na6XStbQBtpQUSdbN3QhEswtKuU1JjReFk_K8t5ezQE,3395
|
|
33
33
|
GameSentenceMiner/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
GameSentenceMiner/tools/audio_offset_selector.py,sha256=8Stk3BP-XVIuzRv9nl9Eqd2D-1yD3JrgU-CamBywJmY,8542
|
|
@@ -42,8 +42,8 @@ GameSentenceMiner/ui/furigana_filter_preview.py,sha256=DAT2-j6vSDHr9ufk6PiaLikEs
|
|
|
42
42
|
GameSentenceMiner/ui/screenshot_selector.py,sha256=7QvDhOMpA0ej8x_lYtu6fhmrWbM1GCg-dps3XVWwk1Q,8234
|
|
43
43
|
GameSentenceMiner/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
44
|
GameSentenceMiner/util/audio_player.py,sha256=-yFsf0qoTSS1ga5rCmEJZJGUSJzXCvfZHY3t0NxycDk,7896
|
|
45
|
-
GameSentenceMiner/util/configuration.py,sha256=
|
|
46
|
-
GameSentenceMiner/util/db.py,sha256=
|
|
45
|
+
GameSentenceMiner/util/configuration.py,sha256=qndhFAN4oC1dawklllS3UBhK2DCVSTloGdZxDoTUGr4,48137
|
|
46
|
+
GameSentenceMiner/util/db.py,sha256=FQUvMHcQv_bRNE9LfrsFIxXXXog8BhOA1t4mr_UXoiI,33019
|
|
47
47
|
GameSentenceMiner/util/electron_config.py,sha256=KfeJToeFFVw0IR5MKa-gBzpzaGrU-lyJbR9z-sDEHYU,8767
|
|
48
48
|
GameSentenceMiner/util/ffmpeg.py,sha256=cAzztfY36Xf2WvsJDjavoiMOvA9ac2GVdCrSB4LzHk4,29007
|
|
49
49
|
GameSentenceMiner/util/games_table.py,sha256=VM68MAsdyE6tpdwM4bDSk67qioBOvsEO8-TpnRmUnSo,12003
|
|
@@ -63,7 +63,7 @@ GameSentenceMiner/util/win10toast/__init__.py,sha256=6TL2w6rzNmpJEp6_v2cAJP_7ExA
|
|
|
63
63
|
GameSentenceMiner/util/win10toast/__main__.py,sha256=5MYnBcFj8y_6Dyc1kiPd0_FsUuh4yl1cv5wsleU6V4w,668
|
|
64
64
|
GameSentenceMiner/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
65
|
GameSentenceMiner/web/anki_api_endpoints.py,sha256=r30OTT3YVfgbF6aJ-EGWZLF-j2D9L63jLkRXMycU0p8,23681
|
|
66
|
-
GameSentenceMiner/web/database_api.py,sha256=
|
|
66
|
+
GameSentenceMiner/web/database_api.py,sha256=fLpVMZLn-LNwm-M0jNZLDSGH9dV6ml4zn28hNPHcOKY,89783
|
|
67
67
|
GameSentenceMiner/web/events.py,sha256=6Vyz5c9MdpMIa7Zqljqhap2XFQnAVYJ0CdQV64TSZsA,5119
|
|
68
68
|
GameSentenceMiner/web/gsm_websocket.py,sha256=B0VKpxmsRu0WRh5nFWlpDPBQ6-K2ed7TEIa0O6YWeoo,4166
|
|
69
69
|
GameSentenceMiner/web/service.py,sha256=6cgUmDgtp3ZKzuPFszowjPoq-BDtC1bS3ux6sykeaqo,6662
|
|
@@ -135,9 +135,9 @@ GameSentenceMiner/web/templates/components/kanji_grid/thousand_character_classic
|
|
|
135
135
|
GameSentenceMiner/web/templates/components/kanji_grid/wanikani_levels.json,sha256=8wjnnaYQqmho6t5tMxrIAc03512A2tYhQh5dfsQnfAM,11372
|
|
136
136
|
GameSentenceMiner/web/templates/components/kanji_grid/words_hk_frequency_list.json,sha256=wRkqZNPzz6DT9OTPHpXwfqW96Qb96stCQNNgOL-ZdKk,17535
|
|
137
137
|
GameSentenceMiner/wip/__init___.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
|
-
gamesentenceminer-2.18.
|
|
139
|
-
gamesentenceminer-2.18.
|
|
140
|
-
gamesentenceminer-2.18.
|
|
141
|
-
gamesentenceminer-2.18.
|
|
142
|
-
gamesentenceminer-2.18.
|
|
143
|
-
gamesentenceminer-2.18.
|
|
138
|
+
gamesentenceminer-2.18.18.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
139
|
+
gamesentenceminer-2.18.18.dist-info/METADATA,sha256=Iyd8Jbyw4qbRxOd8MQD2ABDh0JP2v_qwqEeK69nrefE,7492
|
|
140
|
+
gamesentenceminer-2.18.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
141
|
+
gamesentenceminer-2.18.18.dist-info/entry_points.txt,sha256=2APEP25DbfjSxGeHtwBstMH8mulVhLkqF_b9bqzU6vQ,65
|
|
142
|
+
gamesentenceminer-2.18.18.dist-info/top_level.txt,sha256=V1hUY6xVSyUEohb0uDoN4UIE6rUZ_JYx8yMyPGX4PgQ,18
|
|
143
|
+
gamesentenceminer-2.18.18.dist-info/RECORD,,
|
|
File without changes
|
{gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{gamesentenceminer-2.18.16.dist-info → gamesentenceminer-2.18.18.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|