GameSentenceMiner 2.10.16__py3-none-any.whl → 2.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/ai/ai_prompting.py +339 -42
- GameSentenceMiner/anki.py +8 -4
- GameSentenceMiner/config_gui.py +25 -17
- GameSentenceMiner/gsm.py +19 -17
- GameSentenceMiner/ocr/owocr_helper.py +9 -6
- GameSentenceMiner/owocr/owocr/ocr.py +86 -41
- GameSentenceMiner/owocr/owocr/run.py +11 -4
- GameSentenceMiner/util/configuration.py +11 -9
- GameSentenceMiner/util/text_log.py +4 -0
- GameSentenceMiner/vad.py +209 -174
- GameSentenceMiner/web/templates/index.html +21 -20
- GameSentenceMiner/web/texthooking_page.py +15 -1
- {gamesentenceminer-2.10.16.dist-info → gamesentenceminer-2.11.0.dist-info}/METADATA +2 -2
- {gamesentenceminer-2.10.16.dist-info → gamesentenceminer-2.11.0.dist-info}/RECORD +18 -18
- {gamesentenceminer-2.10.16.dist-info → gamesentenceminer-2.11.0.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.10.16.dist-info → gamesentenceminer-2.11.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.10.16.dist-info → gamesentenceminer-2.11.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.10.16.dist-info → gamesentenceminer-2.11.0.dist-info}/top_level.txt +0 -0
GameSentenceMiner/gsm.py
CHANGED
@@ -97,35 +97,37 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
97
97
|
skip_delete = True
|
98
98
|
return
|
99
99
|
|
100
|
-
mined_line = get_text_event(last_note)
|
101
|
-
gsm_state.last_mined_line = mined_line
|
102
|
-
if os.path.exists(video_path) and os.access(video_path, os.R_OK):
|
103
|
-
logger.debug(f"Video found and is readable: {video_path}")
|
104
|
-
if get_config().obs.minimum_replay_size and not ffmpeg.is_video_big_enough(video_path,
|
105
|
-
get_config().obs.minimum_replay_size):
|
106
|
-
logger.debug("Checking if video is big enough")
|
107
|
-
notification.send_check_obs_notification(reason="Video may be empty, check scene in OBS.")
|
108
|
-
logger.error(
|
109
|
-
f"Video was unusually small, potentially empty! Check OBS for Correct Scene Settings! Path: {video_path}")
|
110
|
-
return
|
111
|
-
|
112
100
|
# Just for safety
|
113
101
|
if not last_note:
|
114
102
|
if get_config().anki.update_anki:
|
115
103
|
last_note = anki.get_last_anki_card()
|
116
104
|
if get_config().features.backfill_audio:
|
117
105
|
last_note = anki.get_cards_by_sentence(gametext.current_line_after_regex)
|
106
|
+
|
107
|
+
# Get Info of line mined
|
118
108
|
line_cutoff = None
|
119
109
|
start_line = None
|
120
|
-
if mined_line:
|
121
|
-
start_line = mined_line
|
122
|
-
if mined_line.next:
|
123
|
-
line_cutoff = mined_line.next.time
|
124
|
-
|
125
110
|
if selected_lines:
|
126
111
|
start_line = selected_lines[0]
|
127
112
|
mined_line = get_mined_line(last_note, selected_lines)
|
128
113
|
line_cutoff = selected_lines[-1].get_next_time()
|
114
|
+
else:
|
115
|
+
mined_line = get_text_event(last_note)
|
116
|
+
if mined_line:
|
117
|
+
start_line = mined_line
|
118
|
+
if mined_line.next:
|
119
|
+
line_cutoff = mined_line.next.time
|
120
|
+
gsm_state.last_mined_line = mined_line
|
121
|
+
|
122
|
+
if os.path.exists(video_path) and os.access(video_path, os.R_OK):
|
123
|
+
logger.debug(f"Video found and is readable: {video_path}")
|
124
|
+
if get_config().obs.minimum_replay_size and not ffmpeg.is_video_big_enough(video_path,
|
125
|
+
get_config().obs.minimum_replay_size):
|
126
|
+
logger.debug("Checking if video is big enough")
|
127
|
+
notification.send_check_obs_notification(reason="Video may be empty, check scene in OBS.")
|
128
|
+
logger.error(
|
129
|
+
f"Video was unusually small, potentially empty! Check OBS for Correct Scene Settings! Path: {video_path}")
|
130
|
+
return
|
129
131
|
|
130
132
|
if last_note:
|
131
133
|
logger.debug(last_note.to_json())
|
@@ -195,10 +195,10 @@ all_cords = None
|
|
195
195
|
rectangles = None
|
196
196
|
last_ocr2_result = []
|
197
197
|
|
198
|
-
def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False):
|
198
|
+
def do_second_ocr(ocr1_text, time, img, filtering, ignore_furigana_filter=False, ignore_previous_result=False):
|
199
199
|
global twopassocr, ocr2, last_ocr2_result
|
200
200
|
try:
|
201
|
-
orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
|
201
|
+
orig_text, text = run.process_and_write_results(img, None, last_ocr2_result if not ignore_previous_result else None, filtering, None,
|
202
202
|
engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity if not ignore_furigana_filter else 0)
|
203
203
|
|
204
204
|
if compare_ocr_results(last_ocr2_result, orig_text):
|
@@ -344,7 +344,8 @@ def run_oneocr(ocr_config: OCRConfig, rectangles):
|
|
344
344
|
gsm_ocr_config=ocr_config,
|
345
345
|
screen_capture_areas=screen_areas,
|
346
346
|
furigana_filter_sensitivity=furigana_filter_sensitivity,
|
347
|
-
screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None
|
347
|
+
screen_capture_combo=manual_ocr_hotkey if manual_ocr_hotkey and manual else None,
|
348
|
+
keep_line_breaks=keep_newline)
|
348
349
|
except Exception as e:
|
349
350
|
logger.exception(f"Error running OneOCR: {e}")
|
350
351
|
done = True
|
@@ -359,14 +360,14 @@ def add_ss_hotkey(ss_hotkey="ctrl+shift+g"):
|
|
359
360
|
def capture():
|
360
361
|
print("Taking screenshot...")
|
361
362
|
img = cropper.run()
|
362
|
-
do_second_ocr("", datetime.now(), img, filtering, ignore_furigana_filter=True)
|
363
|
+
do_second_ocr("", datetime.now(), img, filtering, ignore_furigana_filter=True, ignore_previous_result=True)
|
363
364
|
def capture_main_monitor():
|
364
365
|
print("Taking screenshot of main monitor...")
|
365
366
|
with mss.mss() as sct:
|
366
367
|
main_monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
|
367
368
|
img = sct.grab(main_monitor)
|
368
369
|
img_bytes = mss.tools.to_png(img.rgb, img.size)
|
369
|
-
do_second_ocr("", datetime.now(), img_bytes, filtering, ignore_furigana_filter=True)
|
370
|
+
do_second_ocr("", datetime.now(), img_bytes, filtering, ignore_furigana_filter=True, ignore_previous_result=True)
|
370
371
|
hotkey_reg = None
|
371
372
|
try:
|
372
373
|
hotkey_reg = keyboard.add_hotkey(ss_hotkey, capture)
|
@@ -404,7 +405,7 @@ def set_force_stable_hotkey():
|
|
404
405
|
|
405
406
|
if __name__ == "__main__":
|
406
407
|
try:
|
407
|
-
global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config
|
408
|
+
global ocr1, ocr2, twopassocr, language, ss_clipboard, ss, ocr_config, furigana_filter_sensitivity, area_select_ocr_hotkey, window, optimize_second_scan, use_window_for_config, keep_newline
|
408
409
|
import sys
|
409
410
|
|
410
411
|
import argparse
|
@@ -428,6 +429,7 @@ if __name__ == "__main__":
|
|
428
429
|
help="Optimize second scan by cropping based on first scan results")
|
429
430
|
parser.add_argument("--use_window_for_config", action="store_true",
|
430
431
|
help="Use the specified window for loading OCR configuration")
|
432
|
+
parser.add_argument("--keep_newline", action="store_true", help="Keep new lines in OCR output")
|
431
433
|
|
432
434
|
args = parser.parse_args()
|
433
435
|
|
@@ -446,6 +448,7 @@ if __name__ == "__main__":
|
|
446
448
|
clipboard_output = args.clipboard_output
|
447
449
|
optimize_second_scan = args.optimize_second_scan
|
448
450
|
use_window_for_config = args.use_window_for_config
|
451
|
+
keep_newline = args.keep_newline
|
449
452
|
|
450
453
|
window = None
|
451
454
|
logger.info(f"Received arguments: {vars(args)}")
|
@@ -14,7 +14,6 @@ from urllib.parse import urlparse, parse_qs
|
|
14
14
|
import jaconv
|
15
15
|
import numpy as np
|
16
16
|
from PIL import Image
|
17
|
-
from google.generativeai import GenerationConfig
|
18
17
|
from loguru import logger
|
19
18
|
import requests
|
20
19
|
|
@@ -92,8 +91,11 @@ def empty_post_process(text):
|
|
92
91
|
return text
|
93
92
|
|
94
93
|
|
95
|
-
def post_process(text):
|
96
|
-
|
94
|
+
def post_process(text, keep_blank_lines=False):
|
95
|
+
if keep_blank_lines:
|
96
|
+
text = '\n'.join([''.join(i.split()) for i in text.splitlines()])
|
97
|
+
else:
|
98
|
+
text = ''.join([''.join(i.split()) for i in text.splitlines()])
|
97
99
|
text = text.replace('…', '...')
|
98
100
|
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
|
99
101
|
text = jaconv.h2z(text, ascii=True, digit=True)
|
@@ -305,22 +307,42 @@ class GoogleLens:
|
|
305
307
|
response_proto = LensOverlayServerResponse().FromString(res.content)
|
306
308
|
response_dict = response_proto.to_dict(betterproto.Casing.SNAKE)
|
307
309
|
|
308
|
-
|
309
|
-
|
310
|
+
with open(os.path.join(r"C:\Users\Beangate\GSM\Electron App\test", 'glens_response.json'), 'w', encoding='utf-8') as f:
|
311
|
+
json.dump(response_dict, f, indent=4, ensure_ascii=False)
|
310
312
|
res = ''
|
311
313
|
text = response_dict['objects_response']['text']
|
312
314
|
skipped = []
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
315
|
+
previous_line = None
|
316
|
+
if 'text_layout' in text:
|
317
|
+
for paragraph in text['text_layout']['paragraphs']:
|
318
|
+
if previous_line:
|
319
|
+
prev_bbox = previous_line['geometry']['bounding_box']
|
320
|
+
curr_bbox = paragraph['geometry']['bounding_box']
|
321
|
+
vertical_space = abs(curr_bbox['center_y'] - prev_bbox['center_y']) * img.height
|
322
|
+
prev_height = prev_bbox['height'] * img.height
|
323
|
+
current_height = curr_bbox['height'] * img.height
|
324
|
+
avg_height = (prev_height + current_height) / 2
|
325
|
+
# If vertical space is close to previous line's height, add a blank line
|
326
|
+
# logger.info(f"Vertical space: {vertical_space}, Average height: {avg_height}")
|
327
|
+
# logger.info(avg_height * 2)
|
328
|
+
if vertical_space > avg_height * 2:
|
329
|
+
logger.info('Adding blank line')
|
330
|
+
res += 'BLANK_LINE'
|
331
|
+
for line in paragraph['lines']:
|
332
|
+
if furigana_filter_sensitivity:
|
317
333
|
if furigana_filter_sensitivity < line['geometry']['bounding_box']['width'] * img.width and furigana_filter_sensitivity < line['geometry']['bounding_box']['height'] * img.height:
|
318
334
|
for word in line['words']:
|
319
335
|
res += word['plain_text'] + word['text_separator']
|
320
336
|
else:
|
321
337
|
skipped.append(word['plain_text'] for word in line['words'])
|
322
338
|
continue
|
323
|
-
|
339
|
+
else:
|
340
|
+
for word in line['words']:
|
341
|
+
res += word['plain_text'] + word['text_separator']
|
342
|
+
else:
|
343
|
+
continue
|
344
|
+
previous_line = paragraph
|
345
|
+
res += '\n'
|
324
346
|
# logger.info(
|
325
347
|
# f"Skipped {len(skipped)} chars due to furigana filter sensitivity: {furigana_filter_sensitivity}")
|
326
348
|
# widths = []
|
@@ -351,16 +373,16 @@ class GoogleLens:
|
|
351
373
|
# else:
|
352
374
|
# continue
|
353
375
|
# res += '\n'
|
354
|
-
else:
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
376
|
+
# else:
|
377
|
+
# if 'text_layout' in text:
|
378
|
+
# paragraphs = text['text_layout']['paragraphs']
|
379
|
+
# for paragraph in paragraphs:
|
380
|
+
# for line in paragraph['lines']:
|
381
|
+
# for word in line['words']:
|
382
|
+
# res += word['plain_text'] + word['text_separator']
|
383
|
+
# else:
|
384
|
+
# continue
|
385
|
+
# res += '\n'
|
364
386
|
|
365
387
|
x = (True, res)
|
366
388
|
|
@@ -1128,17 +1150,33 @@ class GeminiOCR:
|
|
1128
1150
|
# if "google-generativeai" not in sys.modules:
|
1129
1151
|
# logger.warning('google-generativeai not available, GeminiOCR will not work!')
|
1130
1152
|
# else:
|
1131
|
-
|
1153
|
+
from google import genai
|
1154
|
+
from google.genai import types
|
1132
1155
|
try:
|
1133
1156
|
self.api_key = config['api_key']
|
1134
1157
|
if not self.api_key:
|
1135
1158
|
logger.warning('Gemini API key not provided, GeminiOCR will not work!')
|
1136
1159
|
else:
|
1137
|
-
genai.
|
1138
|
-
self.model =
|
1160
|
+
self.client = genai.Client(api_key=self.api_key)
|
1161
|
+
self.model = config['model']
|
1162
|
+
self.generation_config = types.GenerateContentConfig(
|
1139
1163
|
temperature=0.0,
|
1140
|
-
max_output_tokens=300
|
1141
|
-
|
1164
|
+
max_output_tokens=300,
|
1165
|
+
safety_settings=[
|
1166
|
+
types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
1167
|
+
threshold=types.HarmBlockThreshold.BLOCK_NONE),
|
1168
|
+
types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
1169
|
+
threshold=types.HarmBlockThreshold.BLOCK_NONE),
|
1170
|
+
types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
1171
|
+
threshold=types.HarmBlockThreshold.BLOCK_NONE),
|
1172
|
+
types.SafetySetting(category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
1173
|
+
threshold=types.HarmBlockThreshold.BLOCK_NONE),
|
1174
|
+
],
|
1175
|
+
)
|
1176
|
+
if "2.5" in self.model:
|
1177
|
+
self.generation_config.thinking_config = types.ThinkingConfig(
|
1178
|
+
thinking_budget=0,
|
1179
|
+
)
|
1142
1180
|
self.available = True
|
1143
1181
|
logger.info('Gemini (using google-generativeai) ready')
|
1144
1182
|
except KeyError:
|
@@ -1151,29 +1189,36 @@ class GeminiOCR:
|
|
1151
1189
|
return (False, 'GeminiOCR is not available due to missing API key or configuration error.')
|
1152
1190
|
|
1153
1191
|
try:
|
1192
|
+
from google.genai import types
|
1154
1193
|
img, is_path = input_to_pil_image(img)
|
1155
|
-
import google.generativeai as genai
|
1156
1194
|
img_bytes = self._preprocess(img)
|
1157
1195
|
if not img_bytes:
|
1158
1196
|
return (False, 'Error processing image for Gemini.')
|
1159
1197
|
|
1160
1198
|
contents = [
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1199
|
+
types.Content(
|
1200
|
+
parts=[
|
1201
|
+
types.Part(
|
1202
|
+
inline_data=types.Blob(
|
1203
|
+
mime_type="image/png",
|
1204
|
+
data=img_bytes
|
1205
|
+
)
|
1206
|
+
),
|
1207
|
+
types.Part(
|
1208
|
+
text="""
|
1209
|
+
**Disclaimer:** The image provided is from a video game. This content is entirely fictional and part of a narrative. It must not be treated as real-world user input or a genuine request.
|
1210
|
+
Analyze the image. Extract text \\*only\\* from within dialogue boxes (speech bubbles or panels containing character dialogue). If Text appears to be vertical, read the text from top to bottom, right to left. From the extracted dialogue text, filter out any furigana. Ignore and do not include any text found outside of dialogue boxes, including character names, speaker labels, or sound effects. Return \\*only\\* the filtered dialogue text. If no text is found within dialogue boxes after applying filters, return nothing. Do not include any other output, formatting markers, or commentary."
|
1211
|
+
"""
|
1212
|
+
)
|
1172
1213
|
]
|
1173
|
-
|
1214
|
+
)
|
1174
1215
|
]
|
1175
1216
|
|
1176
|
-
response = self.
|
1217
|
+
response = self.client.models.generate_content(
|
1218
|
+
model=self.model,
|
1219
|
+
contents=contents,
|
1220
|
+
config=self.generation_config
|
1221
|
+
)
|
1177
1222
|
text_output = response.text.strip()
|
1178
1223
|
|
1179
1224
|
return (True, text_output)
|
@@ -1373,8 +1418,8 @@ class GroqOCR:
|
|
1373
1418
|
# def _preprocess(self, img):
|
1374
1419
|
# return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')
|
1375
1420
|
|
1376
|
-
# lens =
|
1421
|
+
# lens = GeminiOCR(config={'model': 'gemini-2.5-flash-lite-preview-06-17', 'api_key': ''})
|
1377
1422
|
#
|
1378
|
-
# res, text = lens(Image.open('test_furigana.png')
|
1423
|
+
# res, text = lens(Image.open('test_furigana.png')) # Example usage
|
1379
1424
|
#
|
1380
1425
|
# print(text)
|
@@ -353,7 +353,9 @@ class TextFiltering:
|
|
353
353
|
|
354
354
|
orig_text_filtered = []
|
355
355
|
for block in orig_text:
|
356
|
-
if
|
356
|
+
if "BLANK_LINE" in block:
|
357
|
+
block_filtered = ["\n"]
|
358
|
+
elif lang == "ja":
|
357
359
|
block_filtered = self.kana_kanji_regex.findall(block)
|
358
360
|
elif lang == "zh":
|
359
361
|
block_filtered = self.chinese_common_regex.findall(block)
|
@@ -394,7 +396,8 @@ class TextFiltering:
|
|
394
396
|
new_blocks = []
|
395
397
|
for idx, block in enumerate(orig_text):
|
396
398
|
if orig_text_filtered[idx] and (orig_text_filtered[idx] not in last_text):
|
397
|
-
new_blocks.append(block)
|
399
|
+
new_blocks.append(str(block).strip().replace("BLANK_LINE", "\n"))
|
400
|
+
|
398
401
|
|
399
402
|
final_blocks = []
|
400
403
|
if self.accurate_filtering:
|
@@ -407,9 +410,10 @@ class TextFiltering:
|
|
407
410
|
else:
|
408
411
|
for block in new_blocks:
|
409
412
|
# This only filters out NON JA/ZH from text when lang is JA/ZH
|
410
|
-
if lang not in ["ja", "zh"] or self.classify(block)[0] in ['ja', 'zh']:
|
413
|
+
if lang not in ["ja", "zh"] or self.classify(block)[0] in ['ja', 'zh'] or block == "\n":
|
411
414
|
final_blocks.append(block)
|
412
415
|
|
416
|
+
|
413
417
|
text = '\n'.join(final_blocks)
|
414
418
|
return text, orig_text_filtered
|
415
419
|
|
@@ -937,7 +941,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
|
|
937
941
|
if filtering:
|
938
942
|
text, orig_text = filtering(text, last_result)
|
939
943
|
if lang == "ja" or lang == "zh":
|
940
|
-
text = post_process(text)
|
944
|
+
text = post_process(text, keep_blank_lines=keep_new_lines)
|
941
945
|
logger.opt(ansi=True).info(f'Text recognized in {end_time - start_time:0.03f}s using <{engine_color}>{engine_instance.readable_name}</{engine_color}>: {text}')
|
942
946
|
if notify and config.get_general('notifications'):
|
943
947
|
notifier.send(title='owocr', message='Text recognized: ' + text)
|
@@ -999,6 +1003,7 @@ def run(read_from=None,
|
|
999
1003
|
ocr2=None,
|
1000
1004
|
gsm_ocr_config=None,
|
1001
1005
|
furigana_filter_sensitivity=None,
|
1006
|
+
keep_line_breaks=False,
|
1002
1007
|
):
|
1003
1008
|
"""
|
1004
1009
|
Japanese OCR client
|
@@ -1075,11 +1080,13 @@ def run(read_from=None,
|
|
1075
1080
|
global engine_instances
|
1076
1081
|
global engine_keys
|
1077
1082
|
global lang
|
1083
|
+
global keep_new_lines
|
1078
1084
|
lang = language
|
1079
1085
|
engine_instances = []
|
1080
1086
|
config_engines = []
|
1081
1087
|
engine_keys = []
|
1082
1088
|
default_engine = ''
|
1089
|
+
keep_new_lines = keep_line_breaks
|
1083
1090
|
|
1084
1091
|
if len(config.get_general('engines')) > 0:
|
1085
1092
|
for config_engine in config.get_general('engines').split(','):
|
@@ -16,13 +16,13 @@ import toml
|
|
16
16
|
from dataclasses_json import dataclass_json
|
17
17
|
|
18
18
|
OFF = 'OFF'
|
19
|
-
VOSK = 'VOSK'
|
19
|
+
# VOSK = 'VOSK'
|
20
20
|
SILERO = 'SILERO'
|
21
21
|
WHISPER = 'WHISPER'
|
22
|
-
GROQ = 'GROQ'
|
22
|
+
# GROQ = 'GROQ'
|
23
23
|
|
24
|
-
VOSK_BASE = 'BASE'
|
25
|
-
VOSK_SMALL = 'SMALL'
|
24
|
+
# VOSK_BASE = 'BASE'
|
25
|
+
# VOSK_SMALL = 'SMALL'
|
26
26
|
|
27
27
|
WHISPER_TINY = 'tiny'
|
28
28
|
WHISPER_BASE = 'base'
|
@@ -33,6 +33,7 @@ WHISPER_TURBO = 'turbo'
|
|
33
33
|
|
34
34
|
AI_GEMINI = 'Gemini'
|
35
35
|
AI_GROQ = 'Groq'
|
36
|
+
AI_LOCAL = 'Local'
|
36
37
|
|
37
38
|
INFO = 'INFO'
|
38
39
|
DEBUG = 'DEBUG'
|
@@ -219,7 +220,7 @@ class VAD:
|
|
219
220
|
whisper_model: str = WHISPER_BASE
|
220
221
|
do_vad_postprocessing: bool = True
|
221
222
|
language: str = 'ja'
|
222
|
-
vosk_url: str = VOSK_BASE
|
223
|
+
# vosk_url: str = VOSK_BASE
|
223
224
|
selected_vad_model: str = WHISPER
|
224
225
|
backup_vad_model: str = SILERO
|
225
226
|
trim_beginning: bool = False
|
@@ -234,11 +235,11 @@ class VAD:
|
|
234
235
|
def is_whisper(self):
|
235
236
|
return self.selected_vad_model == WHISPER or self.backup_vad_model == WHISPER
|
236
237
|
|
237
|
-
def is_vosk(self):
|
238
|
-
|
238
|
+
# def is_vosk(self):
|
239
|
+
# return self.selected_vad_model == VOSK or self.backup_vad_model == VOSK
|
239
240
|
|
240
|
-
def is_groq(self):
|
241
|
-
|
241
|
+
# def is_groq(self):
|
242
|
+
# return self.selected_vad_model == GROQ or self.backup_vad_model == GROQ
|
242
243
|
|
243
244
|
|
244
245
|
@dataclass_json
|
@@ -266,6 +267,7 @@ class Ai:
|
|
266
267
|
anki_field: str = ''
|
267
268
|
provider: str = AI_GEMINI
|
268
269
|
gemini_model: str = 'gemini-2.5-flash'
|
270
|
+
local_model: str = OFF
|
269
271
|
groq_model: str = 'meta-llama/llama-4-scout-17b-16e-instruct'
|
270
272
|
api_key: str = '' # Deprecated
|
271
273
|
gemini_api_key: str = ''
|
@@ -20,6 +20,7 @@ class GameLine:
|
|
20
20
|
next: 'GameLine | None'
|
21
21
|
index: int = 0
|
22
22
|
scene: str = ""
|
23
|
+
TL: str = ""
|
23
24
|
|
24
25
|
def get_previous_time(self):
|
25
26
|
if self.prev:
|
@@ -31,6 +32,9 @@ class GameLine:
|
|
31
32
|
return self.next.time
|
32
33
|
return 0
|
33
34
|
|
35
|
+
def set_TL(self, tl: str):
|
36
|
+
self.TL = tl
|
37
|
+
|
34
38
|
def __str__(self):
|
35
39
|
return str({"text": self.text, "time": self.time})
|
36
40
|
|