GameSentenceMiner 2.19.6__py3-none-any.whl → 2.19.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GameSentenceMiner might be problematic. Click here for more details.

GameSentenceMiner/anki.py CHANGED
@@ -28,7 +28,7 @@ import re
28
28
  import platform
29
29
  import sys
30
30
 
31
- from dataclasses import dataclass
31
+ from dataclasses import dataclass, field
32
32
  from typing import Dict, Any, List
33
33
 
34
34
  # Global variables to track state
@@ -58,6 +58,8 @@ class MediaAssets:
58
58
  final_prev_screenshot_path: str = ''
59
59
  final_video_path: str = ''
60
60
 
61
+ extra_tags: List[str] = field(default_factory=list)
62
+
61
63
 
62
64
  def _determine_update_conditions(last_note: 'AnkiCard') -> (bool, bool):
63
65
  """Determine if audio and picture fields should be updated."""
@@ -83,7 +85,10 @@ def _generate_media_files(reuse_audio: bool, game_line: 'GameLine', video_path:
83
85
  assets.screenshot_in_anki = anki_result.screenshot_in_anki
84
86
  assets.prev_screenshot_in_anki = anki_result.prev_screenshot_in_anki
85
87
  assets.video_in_anki = anki_result.video_in_anki
88
+ assets.extra_tags = anki_result.extra_tags
86
89
  return assets
90
+
91
+ assets.extra_tags = []
87
92
 
88
93
  # --- Generate new media files ---
89
94
  if config.anki.picture_field and config.screenshot.enabled:
@@ -242,7 +247,7 @@ def update_anki_card(last_note: 'AnkiCard', note=None, audio_path='', video_path
242
247
 
243
248
  # Add NSFW tag if checkbox was selected
244
249
  if add_nsfw_tag:
245
- tags.append("NSFW")
250
+ assets.extra_tags.append("NSFW")
246
251
 
247
252
  # 5. If creating new media, store files in Anki's collection. Then update note fields.
248
253
  if not use_existing_files:
@@ -266,6 +271,9 @@ def update_anki_card(last_note: 'AnkiCard', note=None, audio_path='', video_path
266
271
  if config.audio.external_tool and config.audio.external_tool_enabled:
267
272
  anki_media_audio_path = os.path.join(config.audio.anki_media_collection, assets.audio_in_anki)
268
273
  open_audio_in_external(anki_media_audio_path)
274
+
275
+ for extra_tag in assets.extra_tags:
276
+ tags.append(extra_tag)
269
277
 
270
278
  # 6. Asynchronously update the note in Anki
271
279
  run_new_thread(lambda: check_and_update_note(last_note, note, tags))
@@ -284,7 +292,8 @@ def update_anki_card(last_note: 'AnkiCard', note=None, audio_path='', video_path
284
292
  multi_line=bool(selected_lines and len(selected_lines) > 1),
285
293
  video_in_anki=assets.video_in_anki or '',
286
294
  word_path=word_path,
287
- word=tango
295
+ word=tango,
296
+ extra_tags=assets.extra_tags
288
297
  )
289
298
 
290
299
  # 9. Update the local application database with final paths
@@ -358,6 +358,10 @@
358
358
  "hotkey_updates_anki": {
359
359
  "label": "Screenshot Hotkey Updates Anki:",
360
360
  "tooltip": "Enable to allow Screenshot hotkey/button to update the latest anki card."
361
+ },
362
+ "trim_black_bars": {
363
+ "label": "Trim Black Bars:",
364
+ "tooltip": "Automatically trim black bars from screenshots. Useful for games with letterboxing/pillarboxing (e.g., 4:3 games on 16:9 displays)."
361
365
  }
362
366
  },
363
367
  "audio": {
@@ -357,6 +357,10 @@
357
357
  "hotkey_updates_anki": {
358
358
  "label": "ホットキーでAnkiを更新:",
359
359
  "tooltip": "撮影ホットキーで最新のAnkiカードを更新できるようにします。"
360
+ },
361
+ "trim_black_bars": {
362
+ "label": "黒帯をトリミング:",
363
+ "tooltip": "スクリーンショットから黒帯を自動的にトリミングします。レターボックス/ピラーボックスのあるゲーム(16:9ディスプレイ上の4:3ゲームなど)に便利です。"
360
364
  }
361
365
  },
362
366
  "audio": {
@@ -358,6 +358,10 @@
358
358
  "hotkey_updates_anki": {
359
359
  "label": "截图热键更新 Anki:",
360
360
  "tooltip": "允许截图热键/按钮更新最新的 Anki 卡片。"
361
+ },
362
+ "trim_black_bars": {
363
+ "label": "裁剪黑边:",
364
+ "tooltip": "自动裁剪截图中的黑边。适用于有信箱/柱状框的游戏(例如在 16:9 显示器上的 4:3 游戏)。"
361
365
  }
362
366
  },
363
367
  "audio": {
GameSentenceMiner/obs.py CHANGED
@@ -565,6 +565,24 @@ def get_active_source():
565
565
  return None
566
566
  return get_source_from_scene(current_game)
567
567
 
568
+ def get_active_video_sources():
569
+ current_game = get_current_game()
570
+ if not current_game:
571
+ return None
572
+ scene_items_response = []
573
+ try:
574
+ with connection_pool.get_client() as client:
575
+ client: obs.ReqClient
576
+ response = client.get_scene_item_list(name=current_game)
577
+ scene_items_response = response.scene_items if response else []
578
+ except Exception as e:
579
+ logger.error(f"Error getting scene items for active video source: {e}")
580
+ return None
581
+ if not scene_items_response:
582
+ return None
583
+ video_sources = ['window_capture', 'game_capture', 'monitor_capture']
584
+ return [item for item in scene_items_response if item.get('inputKind') in video_sources]
585
+
568
586
  def get_record_directory():
569
587
  try:
570
588
  with connection_pool.get_client() as client:
@@ -709,32 +727,143 @@ def get_screenshot_base64(compression=75, width=None, height=None):
709
727
  return None
710
728
 
711
729
 
712
- def get_screenshot_PIL(source_name=None, compression=75, img_format='png', width=None, height=None, retry=3):
730
+ def get_screenshot_PIL_from_source(source_name, compression=75, img_format='png', width=None, height=None, retry=3):
731
+ """
732
+ Get a PIL Image screenshot from a specific OBS source.
733
+
734
+ Args:
735
+ source_name: The name of the OBS source to capture
736
+ compression: Image quality (0-100)
737
+ img_format: Image format ('png' or 'jpg')
738
+ width: Optional width to resize
739
+ height: Optional height to resize
740
+ retry: Number of retry attempts
741
+
742
+ Returns:
743
+ PIL.Image or None if failed
744
+ """
713
745
  import io
714
746
  import base64
715
747
  from PIL import Image
748
+
716
749
  if not source_name:
717
- source_name = get_active_source().get('sourceName', None)
718
- if not source_name:
719
- logger.error("No active source found in the current scene.")
750
+ logger.error("No source name provided.")
720
751
  return None
721
- while True:
722
- with connection_pool.get_client() as client:
723
- client: obs.ReqClient
724
- response = client.get_source_screenshot(name=source_name, img_format=img_format, quality=compression, width=width, height=height)
752
+
753
+ for attempt in range(retry):
725
754
  try:
726
- response.image_data = response.image_data.split(',', 1)[-1] # Remove data:image/png;base64, prefix if present
755
+ with connection_pool.get_client() as client:
756
+ client: obs.ReqClient
757
+ response = client.get_source_screenshot(name=source_name, img_format=img_format, quality=compression, width=width, height=height)
758
+
759
+ if response and hasattr(response, 'image_data') and response.image_data:
760
+ image_data = response.image_data.split(',', 1)[-1] # Remove data:image/png;base64, prefix if present
761
+ image_data = base64.b64decode(image_data)
762
+ img = Image.open(io.BytesIO(image_data)).convert("RGBA")
763
+ return img
727
764
  except AttributeError:
728
- retry -= 1
729
- if retry <= 0:
730
- logger.error(f"Error getting screenshot: {response}")
765
+ if attempt >= retry - 1:
766
+ logger.error(f"Error getting screenshot from source '{source_name}': Invalid response")
731
767
  return None
768
+ time.sleep(0.1)
769
+ except Exception as e:
770
+ logger.error(f"Error getting screenshot from source '{source_name}': {e}")
771
+ return None
772
+
773
+ return None
774
+
775
+
776
+ def get_best_source_for_screenshot():
777
+ """
778
+ Get the best available video source dict based on priority and image validation.
779
+
780
+ Priority order: window_capture > game_capture > monitor_capture
781
+
782
+ Returns:
783
+ The source dict of the best available source, or None if no valid source found.
784
+ """
785
+ return get_screenshot_PIL(return_source_dict=True)
786
+
787
+
788
+ def get_screenshot_PIL(source_name=None, compression=75, img_format='png', width=None, height=None, retry=3, return_source_dict=False):
789
+ """
790
+ Get a PIL Image screenshot. If no source_name is provided, automatically selects
791
+ the best available source based on priority and validates it has actual image data.
792
+
793
+ Priority order: window_capture > game_capture > monitor_capture
794
+
795
+ Args:
796
+ source_name: Optional specific OBS source name. If None, auto-selects best source.
797
+ compression: Image quality (0-100)
798
+ img_format: Image format ('png' or 'jpg')
799
+ width: Optional width to resize
800
+ height: Optional height to resize
801
+ retry: Number of retry attempts
802
+ return_source_dict: If True, returns only the source dict. If False, returns only the PIL.Image.
803
+
804
+ Returns:
805
+ PIL.Image if return_source_dict=False, or source dict if return_source_dict=True.
806
+ Returns None if failed.
807
+ """
808
+ import io
809
+ import base64
810
+ from PIL import Image
811
+
812
+ # If source_name is provided, use it directly
813
+ if source_name:
814
+ if return_source_dict:
815
+ # Need to find the source dict for this source_name
816
+ current_sources = get_active_video_sources()
817
+ if current_sources:
818
+ for src in current_sources:
819
+ if src.get('sourceName') == source_name:
820
+ return src
821
+ return None
822
+ img = get_screenshot_PIL_from_source(source_name, compression, img_format, width, height, retry)
823
+ return img
824
+
825
+ # Get all available video sources
826
+ current_sources = get_active_video_sources()
827
+ if not current_sources:
828
+ logger.error("No active video sources found in the current scene.")
829
+ return None
830
+
831
+ # Priority: window_capture (0) > game_capture (1) > monitor_capture (2)
832
+ priority_map = {'window_capture': 0, 'game_capture': 1, 'monitor_capture': 2}
833
+
834
+ # Sort sources by priority
835
+ sorted_sources = sorted(
836
+ current_sources,
837
+ key=lambda x: priority_map.get(x.get('inputKind'), 999)
838
+ )
839
+
840
+ # Try each source in priority order
841
+ for source in sorted_sources:
842
+ found_source_name = source.get('sourceName')
843
+ if not found_source_name:
732
844
  continue
733
- if response and response.image_data:
734
- image_data = response.image_data.split(',', 1)[-1] # Remove data:image/png;base64, prefix if present
735
- image_data = base64.b64decode(image_data)
736
- img = Image.open(io.BytesIO(image_data)).convert("RGBA")
737
- return img
845
+
846
+ img = get_screenshot_PIL_from_source(found_source_name, compression, img_format, width, height, retry)
847
+
848
+ if img:
849
+ # Validate that the image has actual content (not completely empty/black)
850
+ try:
851
+ extrema = img.getextrema()
852
+ if isinstance(extrema[0], tuple):
853
+ is_empty = all(e[0] == e[1] for e in extrema)
854
+ else:
855
+ is_empty = extrema[0] == extrema[1]
856
+
857
+ if not is_empty:
858
+ return source if return_source_dict else img
859
+ else:
860
+ logger.debug(f"Source '{found_source_name}' returned an empty image, trying next source")
861
+ except Exception as e:
862
+ logger.warning(f"Failed to validate image from source '{found_source_name}': {e}")
863
+ # If validation fails, still return the image as it might be valid
864
+ return source if return_source_dict else img
865
+
866
+ logger.error("No active source with valid image data found.")
738
867
  return None
739
868
 
740
869
 
@@ -915,6 +1044,13 @@ def create_scene():
915
1044
  if __name__ == '__main__':
916
1045
  logging.basicConfig(level=logging.INFO)
917
1046
  connect_to_obs_sync()
1047
+ try:
1048
+ with connection_pool.get_client() as client:
1049
+ client: obs.ReqClient
1050
+ resp = client.get_scene_item_list(get_current_scene())
1051
+ print(resp.scene_items)
1052
+ except Exception as e:
1053
+ print(f"Error: {e}")
918
1054
 
919
1055
  # outputs = get_output_list()
920
1056
  # print(outputs)
@@ -54,8 +54,11 @@ class ScreenSelector:
54
54
  raise RuntimeError("mss is required for screen selection.")
55
55
 
56
56
  if self.use_obs_screenshot:
57
- print("Using OBS screenshot as target.")
58
- self.screenshot_img = obs.get_screenshot_PIL(compression=75)
57
+ sources = obs.get_active_video_sources()
58
+ best_source = obs.get_best_source_for_screenshot()
59
+ if len(sources) > 1:
60
+ logger.warning(f"Warning: Multiple active video sources found in OBS. Using '{best_source.get('sourceName')}' for screenshot. Please ensure only one source is active for best results.")
61
+ self.screenshot_img = obs.get_screenshot_PIL(compression=100, img_format='jpg')
59
62
  # print(screenshot_base64)
60
63
  if not self.screenshot_img:
61
64
  raise RuntimeError("Failed to get OBS screenshot.")
@@ -391,7 +391,7 @@ def text_callback(text, orig_text, time, img=None, came_from_ss=False, filtering
391
391
  stable_time = last_meiki_crop_time
392
392
  previous_img_local = previous_img
393
393
  pre_crop_image = previous_img_local
394
- ocr2_image = get_ocr2_image(crop_coords, og_image=previous_img_local, ocr2_engine=get_ocr_ocr2())
394
+ ocr2_image = get_ocr2_image(crop_coords, og_image=previous_img_local, ocr2_engine=get_ocr_ocr2(), extra_padding=10)
395
395
  # Use the earlier timestamp for when the stable crop started if available
396
396
  # ocr2_image.show()
397
397
  second_ocr_queue.put((text, stable_time, ocr2_image, filtering, pre_crop_image))
@@ -482,22 +482,54 @@ done = False
482
482
  # Create a queue for tasks
483
483
  second_ocr_queue = queue.Queue()
484
484
 
485
- def get_ocr2_image(crop_coords, og_image: Image.Image, ocr2_engine=None):
485
+ def get_ocr2_image(crop_coords, og_image: Image.Image, ocr2_engine=None, extra_padding=0):
486
486
  """
487
487
  Returns the image to use for the second OCR pass, cropping and scaling as needed.
488
488
  Logic is unchanged, but code is refactored for clarity and maintainability.
489
489
  """
490
490
  def return_original_image():
491
+ """Return a (possibly cropped) PIL.Image based on the original image and padding."""
491
492
  logger.debug("Returning original image for OCR2 (no cropping or optimization).")
493
+ # Convert bytes to PIL.Image if necessary
494
+ img = og_image
495
+ if isinstance(og_image, (bytes, bytearray)):
496
+ try:
497
+ img = Image.open(io.BytesIO(og_image)).convert('RGB')
498
+ except Exception:
499
+ # If conversion fails, just return og_image as-is
500
+ return og_image
501
+
492
502
  if not crop_coords or not get_ocr_optimize_second_scan():
493
- return og_image
503
+ return img
504
+
494
505
  x1, y1, x2, y2 = crop_coords
495
- x1 = min(max(0, x1), og_image.width)
496
- y1 = min(max(0, y1), og_image.height)
497
- x2 = min(max(0, x2), og_image.width)
498
- y2 = min(max(0, y2), og_image.height)
499
- og_image.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
500
- return og_image.crop((x1, y1, x2, y2))
506
+ # Apply integer padding (can be negative to shrink)
507
+ pad = int(extra_padding or 0)
508
+ x1 = x1 - pad
509
+ y1 = y1 - pad
510
+ x2 = x2 + pad
511
+ y2 = y2 + pad
512
+
513
+ # Clamp coordinates to image bounds
514
+ x1 = min(max(0, int(x1)), img.width)
515
+ y1 = min(max(0, int(y1)), img.height)
516
+ x2 = min(max(0, int(x2)), img.width)
517
+ y2 = min(max(0, int(y2)), img.height)
518
+
519
+ # Ensure at least a 1-pixel width/height
520
+ if x2 <= x1:
521
+ x2 = min(img.width, x1 + 1)
522
+ x1 = max(0, x2 - 1)
523
+ if y2 <= y1:
524
+ y2 = min(img.height, y1 + 1)
525
+ y1 = max(0, y2 - 1)
526
+
527
+ try:
528
+ img.save(os.path.join(get_temporary_directory(), "pre_oneocrcrop.png"))
529
+ except Exception:
530
+ # don't fail just because we couldn't save a debug image
531
+ logger.debug("Could not save pre_oneocrcrop.png for debugging")
532
+ return img.crop((x1, y1, x2, y2))
501
533
 
502
534
  # TODO Get rid of this check, and just always convert to full res
503
535
  LOCAL_OCR_ENGINES = ['easyocr', 'oneocr', 'rapidocr', 'mangaocr', 'winrtocr']
@@ -541,16 +573,33 @@ def get_ocr2_image(crop_coords, og_image: Image.Image, ocr2_engine=None):
541
573
  x2 = int(crop_coords[2] * width_ratio)
542
574
  y2 = int(crop_coords[3] * height_ratio)
543
575
 
576
+ # Scale padding separately for X and Y
577
+ pad_x = int(round((extra_padding or 0) * width_ratio))
578
+ pad_y = int(round((extra_padding or 0) * height_ratio))
579
+
580
+ x1 = x1 - pad_x
581
+ y1 = y1 - pad_y
582
+ x2 = x2 + pad_x
583
+ y2 = y2 + pad_y
584
+
544
585
  # Clamp coordinates to image bounds
545
- x1 = min(max(0, x1), img.width)
546
- y1 = min(max(0, y1), img.height)
547
- x2 = min(max(0, x2), img.width)
548
- y2 = min(max(0, y2), img.height)
549
-
586
+ x1 = min(max(0, int(x1)), img.width)
587
+ y1 = min(max(0, int(y1)), img.height)
588
+ x2 = min(max(0, int(x2)), img.width)
589
+ y2 = min(max(0, int(y2)), img.height)
590
+
591
+ # Ensure at least a 1-pixel width/height
592
+ if x2 <= x1:
593
+ x2 = min(img.width, x1 + 1)
594
+ x1 = max(0, x2 - 1)
595
+ if y2 <= y1:
596
+ y2 = min(img.height, y1 + 1)
597
+ y1 = max(0, y2 - 1)
598
+
550
599
  logger.debug(f"Scaled crop coordinates: {(x1, y1, x2, y2)}")
551
-
600
+
552
601
  img = run.apply_ocr_config_to_image(img, ocr_config_local, is_secondary=False)
553
-
602
+
554
603
  ret = img.crop((x1, y1, x2, y2))
555
604
  return ret
556
605
 
@@ -763,7 +812,7 @@ if __name__ == "__main__":
763
812
  try:
764
813
  while not done:
765
814
  time.sleep(1)
766
- except KeyboardInterrupt as e:
815
+ except KeyboardInterrupt:
767
816
  pass
768
817
  else:
769
818
  print("Failed to load OCR configuration. Please check the logs.")
@@ -1038,7 +1038,8 @@ class OBSScreenshotThread(threading.Thread):
1038
1038
  def init_config(self, source=None, scene=None):
1039
1039
  import GameSentenceMiner.obs as obs
1040
1040
  obs.update_current_game()
1041
- self.current_source = source if source else obs.get_active_source()
1041
+ current_sources = obs.get_active_video_sources()
1042
+ self.current_source = source if source else obs.get_best_source_for_screenshot()
1042
1043
  logger.debug(f"Current OBS source: {self.current_source}")
1043
1044
  self.source_width = self.current_source.get(
1044
1045
  "sceneItemTransform").get("sourceWidth") or self.width
@@ -1056,6 +1057,8 @@ class OBSScreenshotThread(threading.Thread):
1056
1057
  f"Using source dimensions: {self.width}x{self.height}")
1057
1058
  self.current_source_name = self.current_source.get(
1058
1059
  "sourceName") or None
1060
+ if len(current_sources) > 1:
1061
+ logger.error(f"Multiple active video sources found in OBS. Using {self.current_source_name} for Screenshot. Please ensure only one source is active for best results.")
1059
1062
  self.current_scene = scene if scene else obs.get_current_game()
1060
1063
  self.ocr_config = get_scene_ocr_config(refresh=True)
1061
1064
  if not self.ocr_config:
@@ -1394,7 +1397,7 @@ def process_and_write_results(img_or_path, write_to=None, last_result=None, filt
1394
1397
  if res:
1395
1398
  if 'provider' in text:
1396
1399
  if write_to == 'callback':
1397
- logger.opt(ansi=True).info(f"{len(text['boxes'])} text boxes recognized using Meiki:")
1400
+ logger.opt(ansi=True).info(f"{len(text['boxes'])} text boxes recognized in {end_time - start_time:0.03f}s using Meiki:")
1398
1401
  txt_callback('', '', ocr_start_time,
1399
1402
  img_or_path, is_second_ocr, filtering, text.get('crop_coords', None), meiki_boxes=text.get('boxes', []))
1400
1403
  return str(text), str(text)
@@ -449,6 +449,7 @@ class ConfigApp:
449
449
  self.screenshot_timing_value = tk.StringVar(value=self.settings.screenshot.screenshot_timing_setting)
450
450
  self.use_screenshot_selector_value = tk.BooleanVar(value=self.settings.screenshot.use_screenshot_selector)
451
451
  self.animated_screenshot_value = tk.BooleanVar(value=self.settings.screenshot.animated)
452
+ self.trim_black_bars_value = tk.BooleanVar(value=self.settings.screenshot.trim_black_bars_wip)
452
453
 
453
454
  # Audio Settings
454
455
  self.audio_enabled_value = tk.BooleanVar(value=self.settings.audio.enabled)
@@ -703,6 +704,7 @@ class ConfigApp:
703
704
  seconds_after_line=float(self.seconds_after_line_value.get()) if self.seconds_after_line_value.get() else 0.0,
704
705
  screenshot_timing_setting=self.screenshot_timing_value.get(),
705
706
  use_screenshot_selector=self.use_screenshot_selector_value.get(),
707
+ trim_black_bars_wip=self.trim_black_bars_value.get(),
706
708
  ),
707
709
  audio=Audio(
708
710
  enabled=self.audio_enabled_value.get(),
@@ -771,6 +773,7 @@ class ConfigApp:
771
773
  use_canned_context_prompt=self.use_canned_context_prompt_value.get(),
772
774
  custom_prompt=self.custom_prompt.get("1.0", tk.END).strip(),
773
775
  dialogue_context_length=int(self.ai_dialogue_context_length_value.get()),
776
+ custom_texthooker_prompt=self.custom_texthooker_prompt.get("1.0", tk.END).strip(),
774
777
  ),
775
778
  overlay=Overlay(
776
779
  websocket_port=int(self.overlay_websocket_port_value.get()),
@@ -1765,6 +1768,14 @@ class ConfigApp:
1765
1768
  row=self.current_row, column=1, sticky='W', pady=2)
1766
1769
  self.current_row += 1
1767
1770
 
1771
+ trim_black_bars_i18n = ss_i18n.get('trim_black_bars', {})
1772
+ HoverInfoLabelWidget(screenshot_frame, text=trim_black_bars_i18n.get('label', '...'),
1773
+ tooltip=trim_black_bars_i18n.get('tooltip', '...'),
1774
+ row=self.current_row, column=0)
1775
+ ttk.Checkbutton(screenshot_frame, variable=self.trim_black_bars_value, bootstyle="round-toggle").grid(
1776
+ row=self.current_row, column=1, sticky='W', pady=2)
1777
+ self.current_row += 1
1778
+
1768
1779
  self.add_reset_button(screenshot_frame, "screenshot", self.current_row, 0, self.create_screenshot_tab)
1769
1780
 
1770
1781
  for col in range(3):
@@ -2299,6 +2310,16 @@ class ConfigApp:
2299
2310
  self.custom_prompt.insert(tk.END, self.settings.ai.custom_prompt)
2300
2311
  self.custom_prompt.grid(row=self.current_row, column=1, sticky='EW', pady=2)
2301
2312
  self.current_row += 1
2313
+
2314
+ custom_texthooker_prompt_i18n = ai_i18n.get('custom_texthooker_prompt', {})
2315
+ HoverInfoLabelWidget(ai_frame, text=custom_texthooker_prompt_i18n.get('label', 'Custom Texthooker Prompt:'), tooltip=custom_texthooker_prompt_i18n.get('tooltip', 'Custom Prompt to use for Texthooker Translate Button.'),
2316
+ row=self.current_row, column=0)
2317
+ self.custom_texthooker_prompt = scrolledtext.ScrolledText(ai_frame, width=50, height=5, font=("TkDefaultFont", 9),
2318
+ relief="solid", borderwidth=1,
2319
+ highlightbackground=ttk.Style().colors.border)
2320
+ self.custom_texthooker_prompt.insert(tk.END, self.settings.ai.custom_texthooker_prompt)
2321
+ self.custom_texthooker_prompt.grid(row=self.current_row, column=1, sticky='EW', pady=2)
2322
+ self.current_row += 1
2302
2323
 
2303
2324
  self.add_reset_button(ai_frame, "ai", self.current_row, 0, self.create_ai_tab)
2304
2325
 
@@ -1,4 +1,6 @@
1
+ import math
1
2
  import os
3
+ import re
2
4
  import subprocess
3
5
  import json
4
6
  import tkinter as tk
@@ -6,8 +8,9 @@ from tkinter import messagebox
6
8
  import ttkbootstrap as ttk
7
9
  from PIL import Image, ImageTk
8
10
 
11
+ from GameSentenceMiner.util import ffmpeg
9
12
  from GameSentenceMiner.util.gsm_utils import sanitize_filename
10
- from GameSentenceMiner.util.configuration import get_temporary_directory, logger, ffmpeg_base_command_list, get_ffprobe_path
13
+ from GameSentenceMiner.util.configuration import get_config, get_temporary_directory, logger, ffmpeg_base_command_list, get_ffprobe_path, ffmpeg_base_command_list_info
11
14
 
12
15
 
13
16
  class ScreenshotSelectorDialog(tk.Toplevel):
@@ -65,7 +68,7 @@ class ScreenshotSelectorDialog(tk.Toplevel):
65
68
  # Force always on top to ensure visibility
66
69
 
67
70
  def _extract_frames(self, video_path, timestamp, mode):
68
- """Extracts frames using ffmpeg. Encapsulated from the original script."""
71
+ """Extracts frames using ffmpeg, with automatic black bar removal."""
69
72
  temp_dir = os.path.join(
70
73
  get_temporary_directory(False),
71
74
  "screenshot_frames",
@@ -87,17 +90,36 @@ class ScreenshotSelectorDialog(tk.Toplevel):
87
90
  logger.warning(f"Timestamp {timestamp_number} exceeds video duration {video_duration}.")
88
91
  return [], None
89
92
 
93
+ video_filters = []
94
+
95
+ if get_config().screenshot.trim_black_bars_wip:
96
+ crop_filter = ffmpeg.find_black_bars(video_path, timestamp_number)
97
+ if crop_filter:
98
+ video_filters.append(crop_filter)
99
+
100
+ # Always add the frame extraction filter
101
+ video_filters.append(f"fps=1/{0.25}")
102
+
90
103
  try:
104
+ # Build the final command for frame extraction
91
105
  command = ffmpeg_base_command_list + [
92
- "-y",
106
+ "-y", # Overwrite output files without asking
93
107
  "-ss", str(timestamp_number),
94
- "-i", video_path,
95
- "-vf", f"fps=1/{0.25}",
108
+ "-i", video_path
109
+ ]
110
+
111
+ # Chain all collected filters (crop and fps) together with a comma
112
+ command.extend(["-vf", ",".join(video_filters)])
113
+
114
+ command.extend([
96
115
  "-vframes", "20",
97
116
  os.path.join(temp_dir, "frame_%02d.png")
98
- ]
117
+ ])
118
+
119
+ logger.debug(f"Executing frame extraction command: {' '.join(command)}")
99
120
  subprocess.run(command, check=True, capture_output=True, text=True)
100
121
 
122
+ # The rest of your logic remains the same
101
123
  for i in range(1, 21):
102
124
  frame_path = os.path.join(temp_dir, f"frame_{i:02d}.png")
103
125
  if os.path.exists(frame_path):
@@ -122,7 +144,7 @@ class ScreenshotSelectorDialog(tk.Toplevel):
122
144
  except Exception as e:
123
145
  logger.error(f"An unexpected error occurred during frame extraction: {e}")
124
146
  return [], None
125
-
147
+
126
148
  def _build_image_grid(self, image_paths, golden_frame):
127
149
  """Creates and displays the grid of selectable images."""
128
150
  self.images = [] # Keep a reference to images to prevent garbage collection
@@ -12,7 +12,7 @@ from logging.handlers import RotatingFileHandler
12
12
  from os.path import expanduser
13
13
  from sys import platform
14
14
  import time
15
- from typing import List, Dict
15
+ from typing import Any, List, Dict
16
16
  import sys
17
17
  from enum import Enum
18
18
 
@@ -59,6 +59,28 @@ supported_formats = {
59
59
  'm4a': 'aac',
60
60
  }
61
61
 
62
+ KNOWN_ASPECT_RATIOS = [
63
+ # --- Classic / Legacy ---
64
+ {"name": "4:3 (SD / Retro Games)", "ratio": 4 / 3},
65
+ {"name": "5:4 (Old PC Monitors)", "ratio": 5 / 4},
66
+ {"name": "3:2 (Handheld / GBA / DS / DSLR)", "ratio": 3 / 2},
67
+
68
+ # --- Modern Displays ---
69
+ {"name": "16:10 (PC Widescreen)", "ratio": 16 / 10},
70
+ {"name": "16:9 (Standard HD / 1080p / 4K)", "ratio": 16 / 9},
71
+ {"name": "18:9 (Mobile / Some Modern Laptops)", "ratio": 18 / 9},
72
+ {"name": "19.5:9 (Modern Smartphones)", "ratio": 19.5 / 9},
73
+ {"name": "21:9 (UltraWide)", "ratio": 21 / 9},
74
+ {"name": "24:10 (UltraWide+)", "ratio": 24 / 10},
75
+ {"name": "32:9 (Super UltraWide)", "ratio": 32 / 9},
76
+
77
+ # --- Vertical / Mobile ---
78
+ {"name": "9:16 (Portrait Mode)", "ratio": 9 / 16},
79
+ {"name": "3:4 (Portrait 4:3)", "ratio": 3 / 4},
80
+ {"name": "1:1 (Square / UI Capture)", "ratio": 1 / 1},
81
+ ]
82
+
83
+ KNOWN_ASPECT_RATIOS_DICT = {item["name"]: item["ratio"] for item in KNOWN_ASPECT_RATIOS}
62
84
 
63
85
  def is_linux():
64
86
  return platform == 'linux'
@@ -490,6 +512,7 @@ class Screenshot:
490
512
  use_new_screenshot_logic: bool = False
491
513
  screenshot_timing_setting: str = 'beginning' # 'middle', 'end'
492
514
  use_screenshot_selector: bool = False
515
+ trim_black_bars_wip: bool = True
493
516
 
494
517
  def __post_init__(self):
495
518
  if not self.screenshot_timing_setting and self.use_beginning_of_line_as_screenshot:
@@ -632,6 +655,7 @@ class Ai:
632
655
  use_canned_translation_prompt: bool = True
633
656
  use_canned_context_prompt: bool = False
634
657
  custom_prompt: str = ''
658
+ custom_texthooker_prompt: str = ''
635
659
  dialogue_context_length: int = 10
636
660
 
637
661
  def __post_init__(self):
@@ -1321,10 +1345,11 @@ class AnkiUpdateResult:
1321
1345
  video_in_anki: str = ''
1322
1346
  word_path: str = ''
1323
1347
  word: str = ''
1348
+ extra_tags: List[str] = field(default_factory=list)
1324
1349
 
1325
1350
  @staticmethod
1326
1351
  def failure():
1327
- return AnkiUpdateResult(success=False, audio_in_anki='', screenshot_in_anki='', prev_screenshot_in_anki='', sentence_in_anki='', multi_line=False, video_in_anki='', word_path='', word='')
1352
+ return AnkiUpdateResult(success=False, audio_in_anki='', screenshot_in_anki='', prev_screenshot_in_anki='', sentence_in_anki='', multi_line=False, video_in_anki='', word_path='', word='', extra_tags=[])
1328
1353
 
1329
1354
 
1330
1355
  @dataclass_json
@@ -1376,6 +1401,8 @@ def get_ffprobe_path():
1376
1401
 
1377
1402
  ffmpeg_base_command_list = [get_ffmpeg_path(), "-hide_banner", "-loglevel", "error", '-nostdin']
1378
1403
 
1404
+ ffmpeg_base_command_list_info = [get_ffmpeg_path(), "-hide_banner", "-loglevel", "info", '-nostdin']
1405
+
1379
1406
 
1380
1407
  # logger.debug(f"Running in development mode: {is_dev}")
1381
1408
  # logger.debug(f"Running on Beangate's PC: {is_beangate}")