PyPI - GameSentenceMiner - Versions diffs - 2.8.17__tar.gz → 2.8.19__tar.gz - Mend

GameSentenceMiner 2.8.17tar.gz → 2.8.19tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner/config_gui.py RENAMED Viewed

@@ -166,7 +166,7 @@ class ConfigApp:
                 extension=self.audio_extension.get(),
                 beginning_offset=float(self.beginning_offset.get()),
                 end_offset=float(self.end_offset.get()),
-                ffmpeg_reencode_options=self.ffmpeg_reencode_options.get(),
+                ffmpeg_reencode_options=self.audio_ffmpeg_reencode_options.get(),
                 external_tool = self.external_tool.get(),
                 anki_media_collection=self.anki_media_collection.get(),
                 external_tool_enabled=self.external_tool_enabled.get(),
@@ -221,6 +221,9 @@ class ConfigApp:
             )
         )
+        if self.ffmpeg_audio_preset_options.get() == "Custom":
+            config.audio.custom_encode_settings = self.audio_ffmpeg_reencode_options.get()
         if config.features.backfill_audio and config.features.full_auto:
             messagebox.showerror("Configuration Error", "Cannot have Full Auto and Backfill mode on at the same time! Note: Backfill is a very niche workflow.")
             return
@@ -789,13 +792,13 @@ class ConfigApp:
         #
     def update_audio_ffmpeg_settings(self, event):
-        selected_option = self.ffmpeg_preset_options.get()
-        if selected_option in self.ffmpeg_preset_options_map:
-            self.ffmpeg_reencode_options.delete(0, tk.END)
-            self.ffmpeg_reencode_options.insert(0, self.ffmpeg_preset_options_map[selected_option])
+        selected_option = self.ffmpeg_audio_preset_options.get()
+        if selected_option in self.ffmpeg_audio_preset_options_map:
+            self.audio_ffmpeg_reencode_options.delete(0, tk.END)
+            self.audio_ffmpeg_reencode_options.insert(0, self.ffmpeg_audio_preset_options_map[selected_option])
         else:
-            self.ffmpeg_reencode_options.delete(0, tk.END)
-            self.ffmpeg_reencode_options.insert(0, "")
+            self.audio_ffmpeg_reencode_options.delete(0, tk.END)
+            self.audio_ffmpeg_reencode_options.insert(0, "")
     @new_tab
     def create_audio_tab(self):
@@ -830,29 +833,29 @@ class ConfigApp:
         ttk.Label(audio_frame, text="FFmpeg Preset Options:").grid(row=self.current_row, column=0, sticky='W')
         # Define display names and their corresponding values
-        self.ffmpeg_preset_options_map = {
+        self.ffmpeg_audio_preset_options_map = {
             "No Re-encode" : "",
             "Simple loudness normalization (Simplest, Start Here)": "-c:a libopus -f opus -af \"loudnorm=I=-23:LRA=7:TP=-2\"",
             "Downmix to mono with normalization (Recommended(?))": "-c:a libopus -ac 1 -f opus -application voip -apply_phase_inv 0 -af \"loudnorm=I=-23:dual_mono=true\"",
             "Downmix to mono, 30kbps, normalized (Optimal(?))": "-c:a libopus -b:a 30k -ac 1 -f opus -application voip -apply_phase_inv 0 -af \"loudnorm=I=-23:dual_mono=true\"",
+            "Custom": get_config().audio.custom_encode_settings,
         }
         # Create a Combobox with display names
-        self.ffmpeg_preset_options = ttk.Combobox(audio_frame, values=list(self.ffmpeg_preset_options_map.keys()), width=50)
+        self.ffmpeg_audio_preset_options = ttk.Combobox(audio_frame, values=list(self.ffmpeg_audio_preset_options_map.keys()), width=50)
         # self.ffmpeg_preset_options.set("Downmix to mono with normalization")  # Set default display name
-        self.ffmpeg_preset_options.grid(row=self.current_row, column=1)
+        self.ffmpeg_audio_preset_options.grid(row=self.current_row, column=1)
         # Bind selection to update settings
-        self.ffmpeg_preset_options.bind("<<ComboboxSelected>>", self.update_audio_ffmpeg_settings)
+        self.ffmpeg_audio_preset_options.bind("<<ComboboxSelected>>", self.update_audio_ffmpeg_settings)
         self.add_label_and_increment_row(audio_frame, "Select a preset FFmpeg option for re-encoding screenshots.",
                                          row=self.current_row, column=2)
         ttk.Label(audio_frame, text="FFmpeg Reencode Options:").grid(row=self.current_row, column=0, sticky='W')
-        self.ffmpeg_reencode_options = ttk.Entry(audio_frame, width=50)
-        self.ffmpeg_reencode_options.insert(0, self.settings.audio.ffmpeg_reencode_options)
-        self.ffmpeg_reencode_options.grid(row=self.current_row, column=1)
+        self.audio_ffmpeg_reencode_options = ttk.Entry(audio_frame, width=50)
+        self.audio_ffmpeg_reencode_options.insert(0, self.settings.audio.ffmpeg_reencode_options)
+        self.audio_ffmpeg_reencode_options.grid(row=self.current_row, column=1)
         self.add_label_and_increment_row(audio_frame, "Custom FFmpeg options for re-encoding audio files.",
                                          row=self.current_row, column=2)

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner/configuration.py RENAMED Viewed

@@ -137,6 +137,7 @@ class Audio:
     external_tool: str = ""
     anki_media_collection: str = ""
     external_tool_enabled: bool = True
+    custom_encode_settings: str = ''
 @dataclass_json

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner/gsm.py RENAMED Viewed

@@ -192,9 +192,9 @@ class VideoToAudioHandler(FileSystemEventHandler):
                 logger.info("No voice activity detected, using full audio.")
                 vad_trimmed_audio = trimmed_audio
                 should_update_audio = True
-        if get_config().audio.ffmpeg_reencode_options and os.path.exists(vad_trimmed_audio):
+        if get_config().audio.audio_ffmpeg_reencode_options and os.path.exists(vad_trimmed_audio):
             ffmpeg.reencode_file_with_user_config(vad_trimmed_audio, final_audio_output,
-                                                  get_config().audio.ffmpeg_reencode_options)
+                                                  get_config().audio.audio_ffmpeg_reencode_options)
         elif os.path.exists(vad_trimmed_audio):
             shutil.move(vad_trimmed_audio, final_audio_output)
         return final_audio_output, should_update_audio, vad_trimmed_audio, vad_beginning, vad_end

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner/owocr/owocr/__main__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from .run import run, init_config
+from GameSentenceMiner.owocr.owocr.run import run, init_config
 def main():

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner/owocr/owocr/ocr.py RENAMED Viewed

@@ -1048,3 +1048,79 @@ class GeminiOCR:
     def _preprocess(self, img):
         return pil_image_to_bytes(img, png_compression=1)
+class GroqOCR:
+    name = 'groq'
+    readable_name = 'Groq OCR'
+    key = 'j'
+    available = False
+    def __init__(self, config={'api_key': None}):
+        try:
+            import groq
+            self.api_key = config['api_key']
+            if not self.api_key:
+                logger.warning('Groq API key not provided, GroqOCR will not work!')
+            else:
+                self.client = groq.Groq(api_key=self.api_key)
+                self.available = True
+                logger.info('Groq OCR ready')
+        except ImportError:
+            logger.warning('groq module not available, GroqOCR will not work!')
+        except Exception as e:
+            logger.error(f'Error initializing Groq client: {e}')
+    def __call__(self, img_or_path):
+        if not self.available:
+            return (False, 'GroqOCR is not available due to missing API key or configuration error.')
+        try:
+            if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
+                img = Image.open(img_or_path).convert("RGB")
+            elif isinstance(img_or_path, Image.Image):
+                img = img_or_path.convert("RGB")
+            else:
+                raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
+            img_base64 = self._preprocess(img)
+            if not img_base64:
+                return (False, 'Error processing image for Groq.')
+            prompt = (
+                "Analyze this image and extract text from it"
+                # "(speech bubbles or panels containing character dialogue). From the extracted dialogue text, "
+                # "filter out any furigana. Ignore and do not include any text found outside of dialogue boxes, "
+                # "including character names, speaker labels, or sound effects. Return *only* the filtered dialogue text. "
+                # "If no text is found within dialogue boxes after applying filters, return an empty string. "
+                # "OR, if there are no text bubbles or dialogue boxes found, return everything."
+                "Do not include any other output, formatting markers, or commentary, only the text from the image."
+            )
+            response = self.client.chat.completions.create(
+                model="meta-llama/llama-4-scout-17b-16e-instruct",
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}},
+                        ],
+                    }
+                ],
+                max_tokens=300
+            )
+            if response.choices and response.choices[0].message.content:
+                text_output = response.choices[0].message.content.strip()
+                return (True, text_output)
+            else:
+                return (True, "")
+        except FileNotFoundError:
+            return (False, f'File not found: {img_or_path}')
+        except Exception as e:
+            return (False, f'Groq API request failed: {e}')
+    def _preprocess(self, img):
+        return base64.b64encode(pil_image_to_bytes(img, png_compression=1)).decode('utf-8')

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner/owocr/owocr/run.py RENAMED Viewed

@@ -585,7 +585,8 @@ def process_and_write_results(img_or_path, write_to, notifications, last_result,
         for i, instance in enumerate(engine_instances):
             if instance.name.lower() in engine.lower():
                 engine_instance = instance
-                last_result = (last_result[0], i)
+                if last_result:
+                    last_result = (last_result[0], i)
                 break
     else:
         engine_instance = engine_instances[engine_index]

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/GameSentenceMiner.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.8.17
+Version: 2.8.19
 Summary: A tool for mining sentences from games. Update: Multi-Line Mining! Fixed!
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: GameSentenceMiner
-Version: 2.8.17
+Version: 2.8.19
 Summary: A tool for mining sentences from games. Update: Multi-Line Mining! Fixed!
 Author-email: Beangate <bpwhelan95@gmail.com>
 License: MIT License

{gamesentenceminer-2.8.17 → gamesentenceminer-2.8.19}/pyproject.toml RENAMED Viewed

@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "GameSentenceMiner"
-version = "2.8.17"
+version = "2.8.19"
 description = "A tool for mining sentences from games. Update: Multi-Line Mining! Fixed!"
 readme = "README.md"
 requires-python = ">=3.10"