GameSentenceMiner 2.8.49__tar.gz → 2.8.51__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/anki.py +3 -2
  2. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/configuration.py +1 -1
  3. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ffmpeg.py +5 -5
  4. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/gsm.py +15 -9
  5. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ocr/owocr_helper.py +51 -68
  6. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/ocr.py +9 -2
  7. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/run.py +6 -8
  8. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/vad/silero_trim.py +8 -2
  9. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/vad/vosk_helper.py +8 -2
  10. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/vad/whisper_helper.py +9 -3
  11. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/templates/utility.html +15 -3
  12. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner.egg-info/PKG-INFO +1 -1
  13. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/PKG-INFO +1 -1
  14. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/pyproject.toml +1 -1
  15. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/__init__.py +0 -0
  16. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ai/__init__.py +0 -0
  17. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ai/ai_prompting.py +0 -0
  18. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/communication/__init__.py +0 -0
  19. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/communication/send.py +0 -0
  20. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/communication/websocket.py +0 -0
  21. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/config_gui.py +0 -0
  22. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/downloader/Untitled_json.py +0 -0
  23. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/downloader/__init__.py +0 -0
  24. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/downloader/download_tools.py +0 -0
  25. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/downloader/oneocr_dl.py +0 -0
  26. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/electron_config.py +0 -0
  27. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/gametext.py +0 -0
  28. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/model.py +0 -0
  29. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/notification.py +0 -0
  30. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/obs.py +0 -0
  31. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/obs_back.py +0 -0
  32. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ocr/__init__.py +0 -0
  33. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ocr/gsm_ocr_config.py +0 -0
  34. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ocr/ocrconfig.py +0 -0
  35. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ocr/owocr_area_selector.py +0 -0
  36. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/__init__.py +0 -0
  37. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/__main__.py +0 -0
  38. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/config.py +0 -0
  39. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/lens_betterproto.py +0 -0
  40. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +0 -0
  41. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/package.py +0 -0
  42. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/ss_selector.py +0 -0
  43. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/text_log.py +0 -0
  44. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/util.py +0 -0
  45. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/vad/__init__.py +0 -0
  46. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/vad/result.py +0 -0
  47. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/__init__.py +0 -0
  48. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/__init__.py +0 -0
  49. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/apple-touch-icon.png +0 -0
  50. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/favicon-96x96.png +0 -0
  51. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/favicon.ico +0 -0
  52. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/favicon.svg +0 -0
  53. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/site.webmanifest +0 -0
  54. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/style.css +0 -0
  55. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/web-app-manifest-192x192.png +0 -0
  56. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/static/web-app-manifest-512x512.png +0 -0
  57. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/templates/__init__.py +0 -0
  58. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/templates/text_replacements.html +0 -0
  59. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner/web/texthooking_page.py +0 -0
  60. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner.egg-info/SOURCES.txt +0 -0
  61. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner.egg-info/dependency_links.txt +0 -0
  62. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner.egg-info/entry_points.txt +0 -0
  63. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner.egg-info/requires.txt +0 -0
  64. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/GameSentenceMiner.egg-info/top_level.txt +0 -0
  65. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/LICENSE +0 -0
  66. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/README.md +0 -0
  67. {gamesentenceminer-2.8.49 → gamesentenceminer-2.8.51}/setup.cfg +0 -0
@@ -40,12 +40,13 @@ def update_anki_card(last_note: AnkiCard, note=None, audio_path='', video_path='
40
40
  if update_audio:
41
41
  audio_in_anki = store_media_file(audio_path)
42
42
  if update_picture:
43
+ logger.info("Getting Screenshot...")
43
44
  screenshot = ffmpeg.get_screenshot(video_path, ss_time)
44
45
  wait_for_stable_file(screenshot)
45
46
  screenshot_in_anki = store_media_file(screenshot)
46
47
  if get_config().paths.remove_screenshot:
47
48
  os.remove(screenshot)
48
- if get_config().anki.previous_image_field:
49
+ if get_config().anki.previous_image_field and game_line.prev:
49
50
  prev_screenshot = ffmpeg.get_screenshot_for_line(video_path, selected_lines[0].prev if selected_lines else game_line.prev)
50
51
  wait_for_stable_file(prev_screenshot)
51
52
  prev_screenshot_in_anki = store_media_file(prev_screenshot)
@@ -184,7 +185,7 @@ def store_media_file(path):
184
185
  return invoke('storeMediaFile', filename=path, data=convert_to_base64(path))
185
186
  except Exception as e:
186
187
  logger.error(f"Error storing media file, check anki card for blank media fields: {e}")
187
- return "None"
188
+ return None
188
189
 
189
190
 
190
191
  def convert_to_base64(file_path):
@@ -46,7 +46,7 @@ class General:
46
46
  use_both_clipboard_and_websocket: bool = False
47
47
  websocket_uri: str = 'localhost:6677'
48
48
  open_config_on_startup: bool = False
49
- open_multimine_on_startup: bool = False
49
+ open_multimine_on_startup: bool = True
50
50
  texthook_replacement_regex: str = ""
51
51
  texthooker_port: int = 55000
52
52
 
@@ -146,19 +146,19 @@ def get_screenshot_time(video_path, game_line, default_beginning=False, vad_resu
146
146
  # logger.info(f"Using VAD result {vad_result} for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
147
147
  if get_config().screenshot.screenshot_timing_setting == "beginning":
148
148
  screenshot_time_from_beginning = line_timestamp_in_video + screenshot_offset
149
- logger.info(f"Using 'beginning' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
149
+ logger.debug(f"Using 'beginning' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
150
150
  elif get_config().screenshot.screenshot_timing_setting == "middle":
151
151
  if game_line.next:
152
152
  screenshot_time_from_beginning = line_timestamp_in_video + ((game_line.next.time - game_line.time).total_seconds() / 2) + screenshot_offset
153
153
  else:
154
154
  screenshot_time_from_beginning = (file_length - ((file_length - line_timestamp_in_video) / 2)) + screenshot_offset
155
- logger.info(f"Using 'middle' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
155
+ logger.debug(f"Using 'middle' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
156
156
  elif get_config().screenshot.screenshot_timing_setting == "end":
157
157
  if game_line.next:
158
158
  screenshot_time_from_beginning = line_timestamp_in_video + (game_line.next.time - game_line.time).total_seconds() - screenshot_offset
159
159
  else:
160
160
  screenshot_time_from_beginning = file_length - screenshot_offset
161
- logger.info(f"Using 'end' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
161
+ logger.debug(f"Using 'end' setting for screenshot time: {screenshot_time_from_beginning} seconds from beginning of replay")
162
162
  else:
163
163
  logger.error(f"Invalid screenshot timing setting: {get_config().screenshot.screenshot_timing_setting}")
164
164
  screenshot_time_from_beginning = line_timestamp_in_video + screenshot_offset
@@ -317,7 +317,7 @@ def trim_audio_based_on_last_line(untrimmed_audio, video_path, game_line, next_l
317
317
  logger.debug(" ".join(ffmpeg_command))
318
318
  subprocess.run(ffmpeg_command)
319
319
 
320
- logger.info(f"{total_seconds_after_offset} trimmed off of beginning")
320
+ logger.debug(f"{total_seconds_after_offset} trimmed off of beginning")
321
321
 
322
322
  logger.debug(f"Audio trimmed and saved to {trimmed_audio}")
323
323
  return trimmed_audio
@@ -412,7 +412,7 @@ def trim_audio(input_audio, start_time, end_time, output_audio):
412
412
  command.extend(['-i', input_audio])
413
413
 
414
414
  if get_config().vad.trim_beginning and start_time > 0:
415
- logger.info(f"trimming beginning to {start_time}")
415
+ logger.debug(f"trimming beginning to {start_time}")
416
416
  command.extend(['-ss', f"{start_time:.2f}"])
417
417
 
418
418
  command.extend([
@@ -186,6 +186,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
186
186
 
187
187
  @staticmethod
188
188
  def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False):
189
+ logger.info("Getting audio from video...")
189
190
  trimmed_audio = get_audio_and_trim(video_path, game_line, next_line_time, anki_card_creation_time)
190
191
  if temporary:
191
192
  return trimmed_audio
@@ -195,13 +196,18 @@ class VideoToAudioHandler(FileSystemEventHandler):
195
196
  f"{obs.get_current_game(sanitize=True)}.{get_config().audio.extension}"))
196
197
  result = VADResult(False, 0, 0)
197
198
  if get_config().vad.do_vad_postprocessing:
198
- result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio, vad_trimmed_audio)
199
+ logger.info("Trimming audio with Voice Detection...")
200
+ result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio, vad_trimmed_audio, game_line=game_line)
199
201
  if not result.success:
200
202
  result = do_vad_processing(get_config().vad.selected_vad_model, trimmed_audio,
201
- vad_trimmed_audio)
202
- if not result.success and get_config().vad.add_audio_on_no_results:
203
- logger.info("No voice activity detected, using full audio.")
204
- vad_trimmed_audio = trimmed_audio
203
+ vad_trimmed_audio, game_line=game_line)
204
+ if not result.success:
205
+ if get_config().vad.add_audio_on_no_results:
206
+ logger.info("No voice activity detected, using full audio.")
207
+ vad_trimmed_audio = trimmed_audio
208
+ else:
209
+ logger.info("No voice activity detected.")
210
+ return None, result, None
205
211
  if timing_only:
206
212
  return result
207
213
  if get_config().audio.ffmpeg_reencode_options and os.path.exists(vad_trimmed_audio):
@@ -212,19 +218,19 @@ class VideoToAudioHandler(FileSystemEventHandler):
212
218
  return final_audio_output, result, vad_trimmed_audio
213
219
 
214
220
 
215
- def do_vad_processing(model, trimmed_audio, vad_trimmed_audio, second_pass=False):
221
+ def do_vad_processing(model, trimmed_audio, vad_trimmed_audio, game_line=None, second_pass=False):
216
222
  match model:
217
223
  case configuration.OFF:
218
224
  pass
219
225
  case configuration.SILERO:
220
226
  from GameSentenceMiner.vad import silero_trim
221
- return silero_trim.process_audio_with_silero(trimmed_audio, vad_trimmed_audio)
227
+ return silero_trim.process_audio_with_silero(trimmed_audio, vad_trimmed_audio, game_line)
222
228
  case configuration.VOSK:
223
229
  from GameSentenceMiner.vad import vosk_helper
224
- return vosk_helper.process_audio_with_vosk(trimmed_audio, vad_trimmed_audio)
230
+ return vosk_helper.process_audio_with_vosk(trimmed_audio, vad_trimmed_audio, game_line)
225
231
  case configuration.WHISPER:
226
232
  from GameSentenceMiner.vad import whisper_helper
227
- return whisper_helper.process_audio_with_whisper(trimmed_audio, vad_trimmed_audio)
233
+ return whisper_helper.process_audio_with_whisper(trimmed_audio, vad_trimmed_audio, game_line)
228
234
 
229
235
 
230
236
  def play_audio_in_external(filepath):
@@ -180,7 +180,7 @@ class WebsocketServerThread(threading.Thread):
180
180
  finally:
181
181
  self.clients.remove(websocket)
182
182
 
183
- def send_text(self, text, line_time: datetime):
183
+ async def send_text(self, text, line_time: datetime):
184
184
  if text:
185
185
  return asyncio.run_coroutine_threadsafe(
186
186
  self.send_text_coroutine(json.dumps({"sentence": text, "time": line_time.isoformat()})), self.loop)
@@ -212,8 +212,6 @@ def do_second_ocr(ocr1_text, time, img, filtering, scrolling=False):
212
212
  try:
213
213
  orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
214
214
  engine=ocr2)
215
- print(filtering)
216
- print(last_ocr2_result)
217
215
  if scrolling:
218
216
  return text
219
217
  if fuzz.ratio(last_ocr2_result, orig_text) >= 80:
@@ -221,7 +219,7 @@ def do_second_ocr(ocr1_text, time, img, filtering, scrolling=False):
221
219
  return
222
220
  save_result_image(img)
223
221
  last_ocr2_result = orig_text
224
- send_result(text, time)
222
+ asyncio.run(send_result(text, time))
225
223
  except json.JSONDecodeError:
226
224
  print("Invalid JSON received.")
227
225
  except Exception as e:
@@ -238,13 +236,17 @@ def save_result_image(img):
238
236
  img.close()
239
237
 
240
238
 
241
- def send_result(text, time):
239
+ async def send_result(text, time):
242
240
  if text:
243
241
  text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
244
- if get_config().advanced.ocr_sends_to_clipboard:
242
+ if get_config().advanced.ocr_sends_to_clipboard or ssonly:
245
243
  import pyperclip
246
244
  pyperclip.copy(text)
247
- websocket_server_thread.send_text(text, time)
245
+ if not ssonly:
246
+ try:
247
+ await websocket_server_thread.send_text(text, time)
248
+ except Exception as e:
249
+ logger.debug(f"Error sending text to websocket: {e}")
248
250
 
249
251
 
250
252
  previous_text_list = []
@@ -253,74 +255,57 @@ previous_ocr1_result = "" # Store last OCR1 result
253
255
  last_oneocr_time = None # Store last OCR time
254
256
  text_stable_start_time = None # Store the start time when text becomes stable
255
257
  previous_img = None
256
- orig_text_result = "" # Store original text result
258
+ previous_orig_text = "" # Store original text result
257
259
  TEXT_APPEARENCE_DELAY = get_ocr_scan_rate() * 1000 + 500 # Adjust as needed
258
260
  force_stable = False
259
261
  scrolling_text_images = []
260
262
 
261
- def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering=None):
262
- global twopassocr, ocr2, previous_text, last_oneocr_time, text_stable_start_time, orig_text_result, previous_img, force_stable, previous_ocr1_result, scrolling_text_images, previous_text_list
263
+ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering=None, crop_coords=None):
264
+ global twopassocr, ocr2, previous_text, last_oneocr_time, text_stable_start_time, previous_orig_text, previous_img, force_stable, previous_ocr1_result, scrolling_text_images, previous_text_list
263
265
  orig_text_string = ''.join([item for item in orig_text if item is not None]) if orig_text else ""
264
266
  if came_from_ss:
265
267
  save_result_image(img)
266
- send_result(text, time)
268
+ asyncio.run(send_result(text, time))
267
269
  return
268
270
 
269
271
  line_start_time = time if time else datetime.now()
270
272
 
271
273
  if not twopassocr:
272
- if previous_text and fuzz.ratio(orig_text_string, previous_text) >= 80:
274
+ if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
273
275
  logger.info("Seems like Text we already sent, not doing anything.")
274
276
  return
275
277
  save_result_image(img)
276
- send_result(text, time)
277
- orig_text_result = orig_text_string
278
- previous_text = previous_text
278
+ asyncio.run(send_result(text, time))
279
+ previous_orig_text = orig_text_string
280
+ previous_text = None
279
281
  previous_img = None
280
282
  text_stable_start_time = None
281
283
  last_oneocr_time = None
282
284
  return
283
285
  if not text or force_stable:
284
- # if scrolling_text_images:
285
- # stable_time = text_stable_start_time
286
- # full_text = "".join([do_second_ocr(orig_text_string, line_start_time, img, filtering, True) for img in scrolling_text_images])
287
- # scrolling_text_images = []
288
- # send_result(full_text, stable_time)
289
- # orig_text_result = orig_text_string
290
- # previous_text = previous_text
291
- # previous_img = None
292
- # text_stable_start_time = None
293
- # last_oneocr_time = None
294
286
  force_stable = False
295
- if previous_text:
296
- if text_stable_start_time:
297
- stable_time = text_stable_start_time
298
- previous_img_local = previous_img
299
- if fuzz.ratio(orig_text_string, previous_ocr1_result) >= 90:
300
- logger.info("Seems like Text we already sent, not doing anything.")
301
- return
302
- orig_text_result = orig_text_string
303
- previous_ocr1_result = previous_text
304
- do_second_ocr(previous_text, stable_time, previous_img_local, filtering)
305
- previous_img = None
306
- text_stable_start_time = None
307
- last_oneocr_time = None
308
- return
287
+ if previous_text and text_stable_start_time:
288
+ stable_time = text_stable_start_time
289
+ previous_img_local = previous_img
290
+ if previous_text and fuzz.ratio(orig_text_string, previous_orig_text) >= 90:
291
+ logger.info("Seems like Text we already sent, not doing anything.")
292
+ return
293
+ previous_orig_text = orig_text_string
294
+ previous_ocr1_result = previous_text
295
+ if crop_coords:
296
+ previous_img_local.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
297
+ previous_img_local = previous_img_local.crop(crop_coords)
298
+ do_second_ocr(previous_text, stable_time, previous_img_local, filtering)
299
+ previous_img = None
300
+ previous_text = None
301
+ text_stable_start_time = None
302
+ last_oneocr_time = None
303
+ previous_text = None
309
304
  return
310
- # elif previous_text_list and all(
311
- # fuzz.partial_ratio(token, prev_token) >= 95 for token in orig_text for prev_token in
312
- # previous_text_list[1:]):
313
- # logger.info(f"Previous text: {previous_text_list}. Current text: {orig_text}.")
314
- # logger.info("Seems like Scrolling text potentially...")
315
- # previous_img_local = previous_img
316
- # scrolling_text_images.append(previous_img_local)
317
- # previous_text_list = orig_text
318
- # previous_text = orig_text_string
319
- # return
320
305
 
321
306
  if not text_stable_start_time:
322
307
  text_stable_start_time = line_start_time
323
- previous_text = orig_text_string
308
+ previous_text = text
324
309
  previous_text_list = orig_text
325
310
  last_oneocr_time = line_start_time
326
311
  previous_img = img
@@ -333,15 +318,17 @@ def run_oneocr(ocr_config: OCRConfig, area=False):
333
318
  print("Running OneOCR")
334
319
  screen_area = None
335
320
  screen_areas = []
336
- for rect_config in ocr_config.rectangles:
337
- coords = rect_config.coordinates
338
- monitor_config = rect_config.monitor
339
- screen_area = ",".join(str(c) for c in coords) if area else None
340
- if screen_area:
341
- screen_areas.append(screen_area)
321
+ if not ssonly:
322
+ for rect_config in ocr_config.rectangles:
323
+ coords = rect_config.coordinates
324
+ monitor_config = rect_config.monitor
325
+ screen_area = ",".join(str(c) for c in coords) if area else None
326
+ if screen_area:
327
+ screen_areas.append(screen_area)
342
328
  exclusions = list(rect.coordinates for rect in list(filter(lambda x: x.is_excluded, ocr_config.rectangles)))
343
- run.run(read_from="screencapture",
344
- read_from_secondary="clipboard",
329
+ run.init_config(False)
330
+ run.run(read_from="screencapture" if not ssonly else "clipboard",
331
+ read_from_secondary="clipboard" if not ssonly else None,
345
332
  write_to="callback",
346
333
  screen_capture_area=screen_area,
347
334
  # screen_capture_monitor=monitor_config['index'],
@@ -387,7 +374,7 @@ def set_force_stable_hotkey():
387
374
  print("Press Ctrl+Shift+F to toggle force stable mode.")
388
375
 
389
376
  if __name__ == "__main__":
390
- global ocr1, ocr2, twopassocr, language
377
+ global ocr1, ocr2, twopassocr, language, ssonly
391
378
  import sys
392
379
 
393
380
  args = sys.argv[1:]
@@ -411,6 +398,8 @@ if __name__ == "__main__":
411
398
  ocr1 = "oneocr"
412
399
  ocr2 = "glens"
413
400
  twopassocr = True
401
+
402
+ ssonly = "--ssonly" in args
414
403
  logger.info(f"Received arguments: ocr1={ocr1}, ocr2={ocr2}, twopassocr={twopassocr}")
415
404
  # set_force_stable_hotkey()
416
405
  global ocr_config
@@ -431,15 +420,9 @@ if __name__ == "__main__":
431
420
  if ocr_config:
432
421
  rectangles = list(filter(lambda rect: not rect.is_excluded, ocr_config.rectangles))
433
422
  oneocr_threads = []
434
- run.init_config(False)
435
- if rectangles:
436
- thread = threading.Thread(target=run_oneocr, args=(ocr_config,True, ), daemon=True)
437
- oneocr_threads.append(thread)
438
- thread.start()
439
- else:
440
- single_ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config,False, ), daemon=True)
441
- oneocr_threads.append(single_ocr_thread)
442
- single_ocr_thread.start()
423
+ single_ocr_thread = threading.Thread(target=run_oneocr, args=(ocr_config,ocr_config.rectangles ), daemon=True)
424
+ oneocr_threads.append(single_ocr_thread)
425
+ single_ocr_thread.start()
443
426
  websocket_server_thread = WebsocketServerThread(read=True)
444
427
  websocket_server_thread.start()
445
428
  try:
@@ -17,6 +17,8 @@ from google.generativeai import GenerationConfig
17
17
  from loguru import logger
18
18
  import requests
19
19
 
20
+ from ...configuration import get_temporary_directory
21
+
20
22
  try:
21
23
  from manga_ocr import MangaOcr as MOCR
22
24
  except ImportError:
@@ -765,11 +767,16 @@ class OneOCR:
765
767
  img = input_to_pil_image(img)
766
768
  if not img:
767
769
  return (False, 'Invalid image provided')
768
-
770
+ crop_coords = None
769
771
  if sys.platform == 'win32':
770
772
  try:
771
773
  ocr_resp = self.model.recognize_pil(img)
772
774
  # print(json.dumps(ocr_resp))
775
+ x_coords = [line['bounding_rect'][f'x{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
776
+ y_coords = [line['bounding_rect'][f'y{i}'] for line in ocr_resp['lines'] for i in range(1, 5)]
777
+ if x_coords and y_coords:
778
+ crop_coords = (min(x_coords) - 5, min(y_coords) - 5, max(x_coords) + 5, max(y_coords) + 5)
779
+
773
780
  res = ocr_resp['text']
774
781
  except RuntimeError as e:
775
782
  return (False, e)
@@ -787,7 +794,7 @@ class OneOCR:
787
794
 
788
795
  res = res.json()['text']
789
796
 
790
- x = (True, res)
797
+ x = (True, res, crop_coords)
791
798
 
792
799
  # img.close()
793
800
  return x
@@ -381,10 +381,7 @@ class TextFiltering:
381
381
  else:
382
382
  orig_text_filtered.append(None)
383
383
 
384
- if not isinstance(last_result, tuple):
385
- print(type(last_result))
386
384
  if isinstance(last_result, list):
387
- print("last_result is a list")
388
385
  last_text = last_result
389
386
  elif last_result and last_result[1] == engine_index:
390
387
  last_text = last_result[0]
@@ -406,7 +403,6 @@ class TextFiltering:
406
403
  break
407
404
  else:
408
405
  for block in new_blocks:
409
- print(block)
410
406
  if lang not in ["ja", "zh"] or self.classify(block)[0] == lang:
411
407
  final_blocks.append(block)
412
408
 
@@ -752,7 +748,6 @@ class ScreenshotClass:
752
748
 
753
749
  if rand_int == 1:
754
750
  img.save(os.path.join(get_temporary_directory(), 'after_crop.png'), 'PNG')
755
- print(f'OCR images saved to {get_temporary_directory()} if debugging is needed, this is 1/20 chance')
756
751
 
757
752
  return img
758
753
 
@@ -897,7 +892,9 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
897
892
  engine_color = config.get_general('engine_color')
898
893
 
899
894
  start_time = time.time()
900
- res, text = engine_instance(img_or_path)
895
+ result = engine_instance(img_or_path)
896
+ res, text, crop_coords = (*result, None)[:3]
897
+
901
898
  end_time = time.time()
902
899
 
903
900
  if not res and ocr_2 == engine:
@@ -908,7 +905,8 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
908
905
  if last_result:
909
906
  last_result = []
910
907
  break
911
- res, text = engine_instance(img_or_path)
908
+ result = engine_instance(img_or_path)
909
+ res, text, crop_coords = (*result, None)[:3]
912
910
 
913
911
  orig_text = []
914
912
  # print(filtering)
@@ -933,7 +931,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
933
931
  elif write_to == 'clipboard':
934
932
  pyperclipfix.copy(text)
935
933
  elif write_to == "callback":
936
- txt_callback(text, orig_text, ocr_start_time, img_or_path, bool(engine), filtering)
934
+ txt_callback(text, orig_text, ocr_start_time, img_or_path, bool(engine), filtering, crop_coords)
937
935
  elif write_to:
938
936
  with Path(write_to).open('a', encoding='utf-8') as f:
939
937
  f.write(text + '\n')
@@ -27,7 +27,7 @@ def detect_voice_with_silero(input_audio):
27
27
 
28
28
 
29
29
  # Example usage of Silero with trimming
30
- def process_audio_with_silero(input_audio, output_audio):
30
+ def process_audio_with_silero(input_audio, output_audio, game_line):
31
31
  voice_activity = detect_voice_with_silero(input_audio)
32
32
 
33
33
  if not voice_activity:
@@ -35,7 +35,13 @@ def process_audio_with_silero(input_audio, output_audio):
35
35
 
36
36
  # Trim based on the first and last speech detected
37
37
  start_time = voice_activity[0]['start'] if voice_activity else 0
38
- end_time = voice_activity[-1]['end'] if voice_activity else 0
38
+ if (game_line.next and len(voice_activity) > 1
39
+ and voice_activity[-1]['end'] - get_config().audio.beginning_offset > len(input_audio) / 16000
40
+ and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
41
+ end_time = voice_activity[-2]['end']
42
+ logger.info("Using the second last timestamp for trimming")
43
+ else:
44
+ end_time = voice_activity[-1]['end'] if voice_activity else 0
39
45
 
40
46
  # Trim the audio using FFmpeg
41
47
  ffmpeg.trim_audio(input_audio, start_time + get_config().vad.beginning_offset, end_time + get_config().audio.end_offset, output_audio)
@@ -123,7 +123,7 @@ def detect_voice_with_vosk(input_audio):
123
123
 
124
124
 
125
125
  # Example usage of Vosk with trimming
126
- def process_audio_with_vosk(input_audio, output_audio):
126
+ def process_audio_with_vosk(input_audio, output_audio, game_line):
127
127
  voice_activity, total_duration = detect_voice_with_vosk(input_audio)
128
128
 
129
129
  if not voice_activity:
@@ -132,7 +132,13 @@ def process_audio_with_vosk(input_audio, output_audio):
132
132
 
133
133
  # Trim based on the first and last speech detected
134
134
  start_time = voice_activity[0]['start'] if voice_activity else 0
135
- end_time = voice_activity[-1]['end'] if voice_activity else total_duration
135
+ if (game_line.next and len(voice_activity) > 1
136
+ and voice_activity[-1]['end'] - get_config().audio.beginning_offset > len(input_audio) / 16000
137
+ and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
138
+ end_time = voice_activity[-2]['end']
139
+ logger.info("Using the second last timestamp for trimming")
140
+ else:
141
+ end_time = voice_activity[-1]['end'] if voice_activity else 0
136
142
 
137
143
  if get_config().vad.trim_beginning:
138
144
  logger.info(f"VAD Trimmed Beginning of Audio to {start_time}")
@@ -70,7 +70,7 @@ def detect_voice_with_whisper(input_audio):
70
70
 
71
71
 
72
72
  # Example usage of Whisper with trimming
73
- def process_audio_with_whisper(input_audio, output_audio):
73
+ def process_audio_with_whisper(input_audio, output_audio, game_line):
74
74
  voice_activity = detect_voice_with_whisper(input_audio)
75
75
 
76
76
  if not voice_activity:
@@ -78,8 +78,14 @@ def process_audio_with_whisper(input_audio, output_audio):
78
78
  return VADResult(False, 0, 0)
79
79
 
80
80
  # Trim based on the first and last speech detected
81
- start_time = voice_activity[0]['start']
82
- end_time = voice_activity[-1]['end']
81
+ start_time = voice_activity[0]['start'] if voice_activity else 0
82
+ if (game_line.next and len(voice_activity) > 1
83
+ and voice_activity[-1]['end'] - get_config().audio.beginning_offset > len(input_audio) / 16000
84
+ and (voice_activity[-1]['start'] - voice_activity[-2]['end']) > 3.0):
85
+ end_time = voice_activity[-2]['end']
86
+ logger.info("Using the second last timestamp for trimming")
87
+ else:
88
+ end_time = voice_activity[-1]['end'] if voice_activity else 0
83
89
 
84
90
  if get_config().vad.trim_beginning:
85
91
  logger.info(f"VAD Trimmed Beginning of Audio to {start_time}")
@@ -126,7 +126,6 @@
126
126
  <script>
127
127
  let mainStyle = document.querySelector('head style');
128
128
  let deleteHistoryButton = document.getElementById('delete-history');
129
- console.log(mainStyle);
130
129
  let displayedEventIds = new Set();
131
130
  let isTabActive = true;
132
131
  let isFetching = false; // Flag to track if a fetch is in progress
@@ -141,6 +140,8 @@
141
140
  let hoveredCheckboxes = new Set();
142
141
  let checkboxes = []; // Will hold all checkbox elements
143
142
  let checkboxMap = {};
143
+ let textLines = []; // Will hold all textline elements
144
+ let textLineMap = {};
144
145
  let checkboxes_being_updated = new Set();
145
146
 
146
147
  // Shift click selection variable
@@ -170,7 +171,6 @@
170
171
  }
171
172
  }
172
173
  if (!ev.history) {
173
- console.log(checkboxMap[ev.id])
174
174
  if (!checkboxes_being_updated.has(ev.id)) {
175
175
  const checkbox = checkboxMap[ev.id];
176
176
  if (checkbox) {
@@ -219,6 +219,7 @@
219
219
  });
220
220
  }
221
221
 
222
+
222
223
  function addNewEvent(event) {
223
224
  displayedEventIds.add(event.id);
224
225
  const container = document.getElementById('session-events');
@@ -237,7 +238,7 @@
237
238
  aria-label="Mark item"
238
239
  data-event-id="${event.id}"
239
240
  onchange="toggleCheckbox('${event.id}', this.checked)">
240
- <p>${event.text}</p>
241
+ <p id="textline-${event.id}" contenteditable="false" ondblclick="this.contentEditable = this.contentEditable === 'true' ? 'false' : 'true'; if (this.contentEditable === 'true') this.focus();">${event.text}</p>
241
242
  <div class="textline-buttons">
242
243
  <button onclick="buttonClick('${event.id}', 'Screenshot')" title="Screenshot" style="background-color: #333; color: #fff; border: 1px solid #555; padding: 6px 10px; font-size: 10px; border-radius: 4px; cursor: pointer; transition: background-color 0.3s;">
243
244
  &#x1F4F7;
@@ -258,6 +259,11 @@
258
259
  checkboxes.push(checkbox);
259
260
  checkboxMap[event.id] = checkbox; // Store the checkbox in the map for easy access
260
261
 
262
+ let textline = shadowRoot.querySelector('#textline-' + event.id);
263
+ textLines.push(textline);
264
+ textLineMap[event.id] = textline; // Store the textline in the map for easy access
265
+
266
+
261
267
  container.appendChild(div);
262
268
  window.scrollTo({
263
269
  top: document.documentElement.scrollHeight,
@@ -286,6 +292,12 @@
286
292
  });
287
293
  }
288
294
 
295
+ function textDoubleClicked(id) {
296
+ const textElement = textLineMap[id];
297
+ console.log(textElement);
298
+ textElement.contenteditable = textElement.contenteditable === "true" ? "false" : "true";
299
+ }
300
+
289
301
  async function toggleCheckbox(id, checked) {
290
302
  try {
291
303
  checkboxes_being_updated.add(id);
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.8.49
3
+ Version: 2.8.51
4
4
  Summary: A tool for mining sentences from games. Update: Multi-Line Mining! Fixed!
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: GameSentenceMiner
3
- Version: 2.8.49
3
+ Version: 2.8.51
4
4
  Summary: A tool for mining sentences from games. Update: Multi-Line Mining! Fixed!
5
5
  Author-email: Beangate <bpwhelan95@gmail.com>
6
6
  License: MIT License
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
 
8
8
  [project]
9
9
  name = "GameSentenceMiner"
10
- version = "2.8.49"
10
+ version = "2.8.51"
11
11
  description = "A tool for mining sentences from games. Update: Multi-Line Mining! Fixed!"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.10"