GameSentenceMiner 2.9.29__py3-none-any.whl → 2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/anki.py +16 -14
- GameSentenceMiner/config_gui.py +826 -628
- GameSentenceMiner/gametext.py +5 -2
- GameSentenceMiner/gsm.py +19 -12
- GameSentenceMiner/ocr/gsm_ocr_config.py +16 -0
- GameSentenceMiner/ocr/owocr_area_selector.py +2 -0
- GameSentenceMiner/ocr/owocr_helper.py +18 -33
- GameSentenceMiner/ocr/ss_picker.py +17 -1
- GameSentenceMiner/util/audio_offset_selector.py +205 -0
- GameSentenceMiner/util/configuration.py +45 -16
- GameSentenceMiner/util/ffmpeg.py +23 -95
- GameSentenceMiner/util/gsm_utils.py +64 -5
- GameSentenceMiner/util/text_log.py +2 -2
- GameSentenceMiner/vad.py +3 -14
- GameSentenceMiner/web/service.py +10 -7
- GameSentenceMiner/web/texthooking_page.py +2 -2
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.1.dist-info}/METADATA +4 -2
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.1.dist-info}/RECORD +22 -21
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.1.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.1.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.1.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.1.dist-info}/top_level.txt +0 -0
GameSentenceMiner/gametext.py
CHANGED
@@ -14,11 +14,13 @@ current_line = ''
|
|
14
14
|
current_line_after_regex = ''
|
15
15
|
current_line_time = datetime.now()
|
16
16
|
|
17
|
+
last_clipboard = ''
|
18
|
+
|
17
19
|
reconnecting = False
|
18
20
|
websocket_connected = {}
|
19
21
|
|
20
22
|
async def monitor_clipboard():
|
21
|
-
global current_line
|
23
|
+
global current_line, last_clipboard
|
22
24
|
current_line = pyperclip.paste()
|
23
25
|
send_message_on_resume = False
|
24
26
|
while True:
|
@@ -37,7 +39,8 @@ async def monitor_clipboard():
|
|
37
39
|
gsm_status.clipboard_enabled = True
|
38
40
|
current_clipboard = pyperclip.paste()
|
39
41
|
|
40
|
-
if current_clipboard and current_clipboard != current_line:
|
42
|
+
if current_clipboard and current_clipboard != current_line and current_clipboard != last_clipboard:
|
43
|
+
last_clipboard = current_clipboard
|
41
44
|
await handle_new_text_event(current_clipboard)
|
42
45
|
|
43
46
|
await asyncio.sleep(0.05)
|
GameSentenceMiner/gsm.py
CHANGED
@@ -3,7 +3,7 @@ import subprocess
|
|
3
3
|
import sys
|
4
4
|
|
5
5
|
import os
|
6
|
-
|
6
|
+
import warnings
|
7
7
|
|
8
8
|
os.environ.pop('TCL_LIBRARY', None)
|
9
9
|
|
@@ -60,7 +60,7 @@ obs_paused = False
|
|
60
60
|
icon: Icon
|
61
61
|
menu: Menu
|
62
62
|
root = None
|
63
|
-
|
63
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
64
64
|
|
65
65
|
|
66
66
|
class VideoToAudioHandler(FileSystemEventHandler):
|
@@ -78,14 +78,20 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
78
78
|
|
79
79
|
def process_replay(self, video_path):
|
80
80
|
vad_trimmed_audio = ''
|
81
|
+
final_audio_output = ''
|
81
82
|
skip_delete = False
|
83
|
+
selected_lines = []
|
84
|
+
anki_card_creation_time = None
|
85
|
+
mined_line = None
|
82
86
|
gsm_state.previous_replay = video_path
|
83
87
|
if gsm_state.line_for_audio or gsm_state.line_for_screenshot:
|
84
88
|
handle_texthooker_button(video_path, get_audio_from_video=VideoToAudioHandler.get_audio)
|
85
89
|
return
|
86
90
|
try:
|
87
91
|
if anki.card_queue and len(anki.card_queue) > 0:
|
88
|
-
last_note, anki_card_creation_time = anki.card_queue.pop(0)
|
92
|
+
last_note, anki_card_creation_time, selected_lines = anki.card_queue.pop(0)
|
93
|
+
elif get_config().features.backfill_audio:
|
94
|
+
last_note = anki.get_cards_by_sentence(gametext.current_line_after_regex)
|
89
95
|
else:
|
90
96
|
logger.info("Replay buffer initiated externally. Skipping processing.")
|
91
97
|
skip_delete = True
|
@@ -102,8 +108,9 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
102
108
|
logger.error(
|
103
109
|
f"Video was unusually small, potentially empty! Check OBS for Correct Scene Settings! Path: {video_path}")
|
104
110
|
return
|
111
|
+
|
112
|
+
# Just for safety
|
105
113
|
if not last_note:
|
106
|
-
logger.debug("Attempting to get last anki card")
|
107
114
|
if get_config().anki.update_anki:
|
108
115
|
last_note = anki.get_last_anki_card()
|
109
116
|
if get_config().features.backfill_audio:
|
@@ -115,9 +122,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
115
122
|
if mined_line.next:
|
116
123
|
line_cutoff = mined_line.next.time
|
117
124
|
|
118
|
-
selected_lines
|
119
|
-
if texthooking_page.are_lines_selected():
|
120
|
-
selected_lines = texthooking_page.get_selected_lines()
|
125
|
+
if selected_lines:
|
121
126
|
start_line = selected_lines[0]
|
122
127
|
mined_line = get_mined_line(last_note, selected_lines)
|
123
128
|
line_cutoff = selected_lines[-1].get_next_time()
|
@@ -126,7 +131,6 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
126
131
|
logger.debug(last_note.to_json())
|
127
132
|
note = anki.get_initial_card_info(last_note, selected_lines)
|
128
133
|
tango = last_note.get_field(get_config().anki.word_field) if last_note else ''
|
129
|
-
texthooking_page.reset_checked_lines()
|
130
134
|
|
131
135
|
if get_config().anki.sentence_audio_field and get_config().audio.enabled:
|
132
136
|
logger.debug("Attempting to get audio from video")
|
@@ -138,7 +142,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
138
142
|
mined_line=mined_line)
|
139
143
|
else:
|
140
144
|
final_audio_output = ""
|
141
|
-
vad_result = VADResult(
|
145
|
+
vad_result = VADResult(True, 0, 0, '')
|
142
146
|
vad_trimmed_audio = ""
|
143
147
|
if not get_config().audio.enabled:
|
144
148
|
logger.info("Audio is disabled in config, skipping audio processing!")
|
@@ -167,7 +171,8 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
167
171
|
elif get_config().features.notify_on_update and vad_result.success:
|
168
172
|
notification.send_audio_generated_notification(vad_trimmed_audio)
|
169
173
|
except Exception as e:
|
170
|
-
|
174
|
+
if mined_line:
|
175
|
+
anki_results[mined_line.id] = AnkiUpdateResult.failure()
|
171
176
|
logger.error(f"Failed Processing and/or adding to Anki: Reason {e}")
|
172
177
|
logger.debug(f"Some error was hit catching to allow further work to be done: {e}", exc_info=True)
|
173
178
|
notification.send_error_no_anki_update()
|
@@ -177,6 +182,8 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
177
182
|
os.remove(video_path)
|
178
183
|
if vad_trimmed_audio and get_config().paths.remove_audio and os.path.exists(vad_trimmed_audio):
|
179
184
|
os.remove(vad_trimmed_audio)
|
185
|
+
if final_audio_output and get_config().paths.remove_audio and os.path.exists(final_audio_output):
|
186
|
+
os.remove(final_audio_output)
|
180
187
|
|
181
188
|
@staticmethod
|
182
189
|
def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False, mined_line=None):
|
@@ -191,9 +198,9 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
191
198
|
vad_result = vad_processor.trim_audio_with_vad(trimmed_audio, vad_trimmed_audio, game_line)
|
192
199
|
if timing_only:
|
193
200
|
return vad_result
|
194
|
-
if get_config().audio.
|
201
|
+
if get_config().audio.ffmpeg_reencode_options_to_use and os.path.exists(vad_trimmed_audio):
|
195
202
|
ffmpeg.reencode_file_with_user_config(vad_trimmed_audio, final_audio_output,
|
196
|
-
get_config().audio.
|
203
|
+
get_config().audio.ffmpeg_reencode_options_to_use)
|
197
204
|
elif os.path.exists(vad_trimmed_audio):
|
198
205
|
shutil.move(vad_trimmed_audio, final_audio_output)
|
199
206
|
return final_audio_output, vad_result, vad_trimmed_audio
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import ctypes
|
2
|
+
from copy import deepcopy
|
2
3
|
from dataclasses import dataclass
|
3
4
|
from math import floor, ceil
|
4
5
|
|
@@ -43,12 +44,14 @@ class WindowGeometry:
|
|
43
44
|
class OCRConfig:
|
44
45
|
scene: str
|
45
46
|
rectangles: List[Rectangle]
|
47
|
+
pre_scale_rectangles: List[Rectangle] = None
|
46
48
|
coordinate_system: str = None
|
47
49
|
window_geometry: Optional[WindowGeometry] = None
|
48
50
|
window: Optional[str] = None
|
49
51
|
language: str = "ja"
|
50
52
|
|
51
53
|
def __post_init__(self):
|
54
|
+
self.pre_scale_rectangles = deepcopy(self.rectangles)
|
52
55
|
if self.coordinate_system and self.coordinate_system == "percentage" and self.window:
|
53
56
|
import pygetwindow as gw
|
54
57
|
try:
|
@@ -71,6 +74,19 @@ class OCRConfig:
|
|
71
74
|
ceil(rectangle.coordinates[3] * self.window_geometry.height),
|
72
75
|
]
|
73
76
|
|
77
|
+
def scale_to_custom_size(self, width, height):
|
78
|
+
print(self.pre_scale_rectangles)
|
79
|
+
self.rectangles = self.pre_scale_rectangles.copy()
|
80
|
+
if self.coordinate_system and self.coordinate_system == "percentage":
|
81
|
+
for rectangle in self.rectangles:
|
82
|
+
rectangle.coordinates = [
|
83
|
+
floor(rectangle.coordinates[0] * width),
|
84
|
+
floor(rectangle.coordinates[1] * height),
|
85
|
+
floor(rectangle.coordinates[2] * width),
|
86
|
+
floor(rectangle.coordinates[3] * height),
|
87
|
+
]
|
88
|
+
|
89
|
+
|
74
90
|
def get_window(title):
|
75
91
|
import pygetwindow as gw
|
76
92
|
windows = gw.getWindowsWithTitle(title)
|
@@ -731,6 +731,8 @@ class ScreenSelector:
|
|
731
731
|
window.bind('<Button-3>', on_right_click) # Right click delete
|
732
732
|
window.bind("<m>", toggle_image_mode) # Toggle image mode (alpha)
|
733
733
|
|
734
|
+
canvas.focus_set()
|
735
|
+
|
734
736
|
def start(self):
|
735
737
|
"""Initializes the Tkinter root and creates windows for each monitor."""
|
736
738
|
self.root = tk.Tk()
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
import io
|
2
3
|
import json
|
3
4
|
import logging
|
4
5
|
import os
|
@@ -11,7 +12,9 @@ from pathlib import Path
|
|
11
12
|
from tkinter import messagebox
|
12
13
|
|
13
14
|
import mss
|
15
|
+
import mss.tools
|
14
16
|
import websockets
|
17
|
+
from PIL import Image
|
15
18
|
from rapidfuzz import fuzz
|
16
19
|
|
17
20
|
from GameSentenceMiner import obs
|
@@ -44,35 +47,6 @@ console_handler.setFormatter(formatter)
|
|
44
47
|
logger.addHandler(console_handler)
|
45
48
|
|
46
49
|
|
47
|
-
def get_new_game_cords():
|
48
|
-
"""Allows multiple coordinate selections."""
|
49
|
-
coords_list = []
|
50
|
-
with mss.mss() as sct:
|
51
|
-
monitors = sct.monitors
|
52
|
-
monitor_map = {i: mon for i, mon in enumerate(monitors)}
|
53
|
-
while True:
|
54
|
-
selected_monitor_index, cords = screen_coordinate_picker.get_screen_selection_with_monitor(monitor_map)
|
55
|
-
selected_monitor = monitor_map[selected_monitor_index]
|
56
|
-
coords_list.append({"monitor": {"left": selected_monitor["left"], "top": selected_monitor["top"],
|
57
|
-
"width": selected_monitor["width"], "height": selected_monitor["height"],
|
58
|
-
"index": selected_monitor_index}, "coordinates": cords,
|
59
|
-
"is_excluded": False})
|
60
|
-
if messagebox.askyesno("Add Another Region", "Do you want to add another region?"):
|
61
|
-
continue
|
62
|
-
else:
|
63
|
-
break
|
64
|
-
app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
|
65
|
-
ocr_config_dir = app_dir / "ocr_config"
|
66
|
-
ocr_config_dir.mkdir(parents=True, exist_ok=True)
|
67
|
-
obs.connect_to_obs_sync()
|
68
|
-
scene = sanitize_filename(obs.get_current_scene())
|
69
|
-
config_path = ocr_config_dir / f"{scene}.json"
|
70
|
-
with open(config_path, 'w') as f:
|
71
|
-
json.dump({"scene": scene, "window": None, "rectangles": coords_list}, f, indent=4)
|
72
|
-
print(f"Saved OCR config to {config_path}")
|
73
|
-
return coords_list
|
74
|
-
|
75
|
-
|
76
50
|
def get_ocr_config(window=None) -> OCRConfig:
|
77
51
|
"""Loads and updates screen capture areas from the corresponding JSON file."""
|
78
52
|
app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
|
@@ -213,11 +187,11 @@ def do_second_ocr(ocr1_text, time, img, filtering):
|
|
213
187
|
try:
|
214
188
|
orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
|
215
189
|
engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity)
|
216
|
-
if fuzz.ratio(last_ocr2_result,
|
190
|
+
if fuzz.ratio(last_ocr2_result, orig_text) >= 90:
|
217
191
|
logger.info("Seems like the same text from previous ocr2 result, not sending")
|
218
192
|
return
|
219
193
|
save_result_image(img)
|
220
|
-
last_ocr2_result =
|
194
|
+
last_ocr2_result = orig_text
|
221
195
|
asyncio.run(send_result(text, time))
|
222
196
|
except json.JSONDecodeError:
|
223
197
|
print("Invalid JSON received.")
|
@@ -238,7 +212,7 @@ def save_result_image(img):
|
|
238
212
|
async def send_result(text, time):
|
239
213
|
if text:
|
240
214
|
text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
|
241
|
-
if
|
215
|
+
if clipboard_output:
|
242
216
|
import pyperclip
|
243
217
|
pyperclip.copy(text)
|
244
218
|
try:
|
@@ -365,14 +339,23 @@ def run_oneocr(ocr_config: OCRConfig, rectangles):
|
|
365
339
|
|
366
340
|
def add_ss_hotkey(ss_hotkey="ctrl+shift+g"):
|
367
341
|
import keyboard
|
368
|
-
|
342
|
+
secret_ss_hotkey = "F15"
|
369
343
|
filtering = TextFiltering(lang=language)
|
344
|
+
cropper = ScreenCropper()
|
370
345
|
def capture():
|
371
346
|
print("Taking screenshot...")
|
372
347
|
img = cropper.run()
|
373
348
|
do_second_ocr("", datetime.now(), img, filtering)
|
349
|
+
def capture_main_monitor():
|
350
|
+
print("Taking screenshot of main monitor...")
|
351
|
+
with mss.mss() as sct:
|
352
|
+
main_monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
|
353
|
+
img = sct.grab(main_monitor)
|
354
|
+
img_bytes = mss.tools.to_png(img.rgb, img.size)
|
355
|
+
do_second_ocr("", datetime.now(), img_bytes, filtering)
|
374
356
|
try:
|
375
357
|
keyboard.add_hotkey(ss_hotkey, capture)
|
358
|
+
keyboard.add_hotkey(secret_ss_hotkey, capture_main_monitor)
|
376
359
|
print(f"Press {ss_hotkey} to take a screenshot.")
|
377
360
|
except Exception as e:
|
378
361
|
logger.error(f"Error setting up screenshot hotkey with keyboard, Attempting Backup: {e}")
|
@@ -414,6 +397,7 @@ if __name__ == "__main__":
|
|
414
397
|
parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1, help="Enable two-pass OCR (default: 1)")
|
415
398
|
parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
|
416
399
|
parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
|
400
|
+
parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
|
417
401
|
parser.add_argument("--window", type=str, help="Specify the window name for OCR")
|
418
402
|
parser.add_argument("--furigana_filter_sensitivity", type=float, default=0, help="Furigana Filter Sensitivity for OCR (default: 0)")
|
419
403
|
parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
|
@@ -431,6 +415,7 @@ if __name__ == "__main__":
|
|
431
415
|
furigana_filter_sensitivity = args.furigana_filter_sensitivity
|
432
416
|
ss_hotkey = args.area_select_ocr_hotkey.lower()
|
433
417
|
manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift", "<shift>").replace("alt", "<alt>") if args.manual_ocr_hotkey else None
|
418
|
+
clipboard_output = args.clipboard_output
|
434
419
|
|
435
420
|
logger.info(f"Received arguments: {vars(args)}")
|
436
421
|
# set_force_stable_hotkey()
|
@@ -7,6 +7,7 @@ import io
|
|
7
7
|
|
8
8
|
class ScreenCropper:
|
9
9
|
def __init__(self):
|
10
|
+
self.main_monitor = None
|
10
11
|
self.root = None
|
11
12
|
self.canvas = None
|
12
13
|
self.captured_image = None
|
@@ -23,6 +24,7 @@ class ScreenCropper:
|
|
23
24
|
try:
|
24
25
|
with mss.mss() as sct:
|
25
26
|
all_monitors_bbox = sct.monitors[0]
|
27
|
+
self.main_monitor = sct.monitors[1]
|
26
28
|
self.monitor_geometry = {
|
27
29
|
'left': all_monitors_bbox['left'],
|
28
30
|
'top': all_monitors_bbox['top'],
|
@@ -74,6 +76,15 @@ class ScreenCropper:
|
|
74
76
|
|
75
77
|
self.root.destroy()
|
76
78
|
|
79
|
+
def _on_enter(self, event):
|
80
|
+
print(event)
|
81
|
+
print("Enter key pressed, grabbing main monitor area.")
|
82
|
+
self.cropped_image = self.captured_image.crop((self.main_monitor['left'], self.main_monitor['top'],
|
83
|
+
self.main_monitor['left'] + self.main_monitor['width'],
|
84
|
+
self.main_monitor['top'] + self.main_monitor['height']))
|
85
|
+
self.root.destroy()
|
86
|
+
|
87
|
+
|
77
88
|
def show_image_and_select_box(self):
|
78
89
|
if self.captured_image is None or self.monitor_geometry is None:
|
79
90
|
print("No image or monitor geometry to display. Capture all monitors first.")
|
@@ -101,13 +112,18 @@ class ScreenCropper:
|
|
101
112
|
self.canvas.bind("<B1-Motion>", self._on_mouse_drag)
|
102
113
|
self.canvas.bind("<ButtonRelease-1>", self._on_button_release)
|
103
114
|
|
115
|
+
|
104
116
|
self.root.mainloop()
|
105
117
|
|
106
118
|
def get_cropped_image(self):
|
107
119
|
return self.cropped_image
|
108
120
|
|
109
|
-
def run(self):
|
121
|
+
def run(self, return_main_monitor=False):
|
110
122
|
self.grab_all_monitors()
|
123
|
+
if return_main_monitor and self.captured_image:
|
124
|
+
return self.captured_image.crop((self.main_monitor['left'], self.main_monitor['top'],
|
125
|
+
self.main_monitor['left'] + self.main_monitor['width'],
|
126
|
+
self.main_monitor['top'] + self.main_monitor['height']))
|
111
127
|
if self.captured_image and self.monitor_geometry:
|
112
128
|
self.show_image_and_select_box()
|
113
129
|
return self.get_cropped_image()
|
@@ -0,0 +1,205 @@
|
|
1
|
+
import os
|
2
|
+
import tkinter as tk
|
3
|
+
from tkinter import filedialog, messagebox
|
4
|
+
import soundfile as sf
|
5
|
+
import numpy as np
|
6
|
+
import matplotlib.pyplot as plt
|
7
|
+
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
8
|
+
import sounddevice as sd
|
9
|
+
|
10
|
+
from GameSentenceMiner.util import ffmpeg
|
11
|
+
|
12
|
+
|
13
|
+
class AudioOffsetGUI:
|
14
|
+
|
15
|
+
def __init__(self, master, audio_file_path=None):
|
16
|
+
self.master = master
|
17
|
+
master.title("Audio Offset Adjuster")
|
18
|
+
master.geometry("1000x700")
|
19
|
+
|
20
|
+
master.tk_setPalette(background='#2E2E2E', foreground='white',
|
21
|
+
activeBackground='#4F4F4F', activeForeground='white')
|
22
|
+
|
23
|
+
self.audio_data = None
|
24
|
+
self.samplerate = None
|
25
|
+
self.duration = 0.0
|
26
|
+
|
27
|
+
self.fig, self.ax = plt.subplots(figsize=(10, 4))
|
28
|
+
self.canvas = FigureCanvasTkAgg(self.fig, master=master)
|
29
|
+
self.canvas_widget = self.canvas.get_tk_widget()
|
30
|
+
self.canvas_widget.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=10, pady=10)
|
31
|
+
|
32
|
+
plt.style.use('dark_background')
|
33
|
+
self.fig.set_facecolor('#2E2E2E')
|
34
|
+
self.ax.set_facecolor('#2E2E2E')
|
35
|
+
self.ax.tick_params(axis='x', colors='white')
|
36
|
+
self.ax.tick_params(axis='y', colors='white')
|
37
|
+
self.ax.spines['bottom'].set_color('white')
|
38
|
+
self.ax.spines['left'].set_color('white')
|
39
|
+
self.ax.spines['top'].set_color('white')
|
40
|
+
self.ax.spines['right'].set_color('white')
|
41
|
+
self.ax.set_xlabel("Time (s)", color='white')
|
42
|
+
self.ax.set_ylabel("Amplitude", color='white')
|
43
|
+
|
44
|
+
self.beg_offset_line = None
|
45
|
+
# self.end_offset_line is removed as there's no end slider
|
46
|
+
|
47
|
+
self.create_widgets()
|
48
|
+
|
49
|
+
self.load_audio(audio_file_path)
|
50
|
+
|
51
|
+
|
52
|
+
def create_widgets(self):
|
53
|
+
control_frame = tk.Frame(self.master, bg='#2E2E2E')
|
54
|
+
control_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=10, padx=10)
|
55
|
+
|
56
|
+
self.play_button = tk.Button(control_frame, text="Play/Pause Segment", command=self.play_segment, bg='#4F4F4F', fg='white')
|
57
|
+
self.play_button.pack(side=tk.RIGHT, padx=5)
|
58
|
+
|
59
|
+
self.output_button = tk.Button(control_frame, text="Get Offset", command=self.get_offsets, bg='#4F4F4F', fg='white')
|
60
|
+
self.output_button.pack(side=tk.RIGHT, padx=5)
|
61
|
+
|
62
|
+
self.beg_offset_label = tk.Label(control_frame, text="Beginning Offset: 0.00s", bg='#2E2E2E', fg='white')
|
63
|
+
self.beg_offset_label.pack(side=tk.LEFT, padx=10)
|
64
|
+
|
65
|
+
self.end_offset_label = tk.Label(control_frame, text="End Offset: Full Duration", bg='#2E2E2E', fg='white')
|
66
|
+
self.end_offset_label.pack(side=tk.LEFT, padx=10)
|
67
|
+
|
68
|
+
slider_frame = tk.Frame(self.master, bg='#2E2E2E')
|
69
|
+
slider_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=5, padx=10)
|
70
|
+
|
71
|
+
beg_slider_label = tk.Label(slider_frame, text="Start Trim:", bg='#2E2E2E', fg='white')
|
72
|
+
beg_slider_label.pack(side=tk.LEFT)
|
73
|
+
self.beg_slider = tk.Scale(slider_frame, from_=0, to=100, orient=tk.HORIZONTAL, resolution=0.5,
|
74
|
+
command=self.on_slider_change, bg='#2E2E2E', fg='white', troughcolor='#4F4F4F',
|
75
|
+
highlightbackground='#2E2E2E', length=300)
|
76
|
+
self.beg_slider.pack(side=tk.LEFT, expand=True, fill=tk.X, padx=5)
|
77
|
+
|
78
|
+
# Removed end_slider and its associated label
|
79
|
+
|
80
|
+
def load_audio(self, file_path):
|
81
|
+
if file_path:
|
82
|
+
try:
|
83
|
+
self.audio_data, self.samplerate = sf.read(file_path)
|
84
|
+
if self.audio_data.ndim > 1:
|
85
|
+
self.audio_data = self.audio_data[:, 0]
|
86
|
+
self.duration = len(self.audio_data) / self.samplerate
|
87
|
+
self.plot_waveform()
|
88
|
+
self.beg_slider.config(to=self.duration)
|
89
|
+
self.beg_slider.set(0) # Reset start slider to 0
|
90
|
+
except Exception as e:
|
91
|
+
messagebox.showerror("Error", f"Failed to load audio file: {e}")
|
92
|
+
self.audio_data = None
|
93
|
+
self.samplerate = None
|
94
|
+
self.duration = 0.0
|
95
|
+
|
96
|
+
def plot_waveform(self):
|
97
|
+
self.ax.clear()
|
98
|
+
if self.audio_data is not None:
|
99
|
+
time = np.linspace(0, self.duration, len(self.audio_data))
|
100
|
+
self.ax.plot(time, self.audio_data, color='#1E90FF')
|
101
|
+
self.ax.set_xlim(0, self.duration)
|
102
|
+
self.ax.set_ylim(np.min(self.audio_data), np.max(self.audio_data))
|
103
|
+
self.ax.set_title("Audio", color='white')
|
104
|
+
|
105
|
+
if self.beg_offset_line:
|
106
|
+
self.beg_offset_line.remove()
|
107
|
+
# self.end_offset_line.remove() is removed
|
108
|
+
|
109
|
+
self.beg_offset_line = self.ax.axvline(self.beg_slider.get(), color='red', linestyle='--', linewidth=2)
|
110
|
+
# The end line is now always at the duration
|
111
|
+
self.ax.axvline(self.duration, color='green', linestyle='--', linewidth=2)
|
112
|
+
|
113
|
+
self.update_offset_labels()
|
114
|
+
else:
|
115
|
+
self.ax.text(0.5, 0.5, "No audio loaded",
|
116
|
+
horizontalalignment='center', verticalalignment='center',
|
117
|
+
transform=self.ax.transAxes, color='white', fontsize=16)
|
118
|
+
|
119
|
+
self.fig.canvas.draw_idle()
|
120
|
+
|
121
|
+
def on_slider_change(self, val):
|
122
|
+
if self.audio_data is None:
|
123
|
+
return
|
124
|
+
|
125
|
+
beg_val = float(self.beg_slider.get())
|
126
|
+
|
127
|
+
if self.beg_offset_line:
|
128
|
+
self.beg_offset_line.set_xdata([beg_val])
|
129
|
+
|
130
|
+
self.update_offset_labels()
|
131
|
+
self.fig.canvas.draw_idle()
|
132
|
+
|
133
|
+
def play_segment(self):
|
134
|
+
if self.audio_data is None:
|
135
|
+
messagebox.showinfo("Play Audio", "No audio file loaded yet.")
|
136
|
+
return
|
137
|
+
|
138
|
+
if hasattr(self, 'is_playing') and self.is_playing:
|
139
|
+
sd.stop()
|
140
|
+
self.is_playing = False
|
141
|
+
return
|
142
|
+
|
143
|
+
beg_offset = self.beg_slider.get()
|
144
|
+
end_offset = self.duration # End offset is now always full duration
|
145
|
+
|
146
|
+
if beg_offset >= end_offset:
|
147
|
+
messagebox.showwarning("Play Audio", "Start offset must be less than end offset.")
|
148
|
+
return
|
149
|
+
|
150
|
+
start_frame = int(beg_offset * self.samplerate)
|
151
|
+
end_frame = int(end_offset * self.samplerate)
|
152
|
+
|
153
|
+
if start_frame >= len(self.audio_data) or end_frame <= 0:
|
154
|
+
messagebox.showwarning("Play Audio", "Selected segment is out of audio range.")
|
155
|
+
return
|
156
|
+
|
157
|
+
segment_to_play = self.audio_data[start_frame:end_frame]
|
158
|
+
|
159
|
+
try:
|
160
|
+
self.is_playing = True
|
161
|
+
sd.play(segment_to_play, self.samplerate)
|
162
|
+
except Exception as e:
|
163
|
+
self.is_playing = False
|
164
|
+
messagebox.showerror("Audio Playback Error", f"Failed to play audio: {e}")
|
165
|
+
|
166
|
+
def update_offset_labels(self):
|
167
|
+
if self.beg_offset_line: # We no longer have an end_offset_line object
|
168
|
+
beg_val = self.beg_offset_line.get_xdata()[0] - 5.0 # Adjusting for the 5 seconds offset
|
169
|
+
self.beg_offset_label.config(text=f"Beginning Offset: {beg_val:.2f}s")
|
170
|
+
|
171
|
+
def get_offsets(self):
|
172
|
+
if self.audio_data is None:
|
173
|
+
messagebox.showinfo("Offsets", "No audio file loaded yet.")
|
174
|
+
return
|
175
|
+
|
176
|
+
beg_offset = self.beg_slider.get() - 5.0
|
177
|
+
end_offset = self.duration # End offset is always full duration
|
178
|
+
|
179
|
+
print(f"{beg_offset:.2f}")
|
180
|
+
exit(0)
|
181
|
+
|
182
|
+
def run_audio_offset_gui(path=None, beginning_offset=0, end_offset=None):
|
183
|
+
temp_file_path = os.path.join(os.path.dirname(path), "temp_audio.opus")
|
184
|
+
|
185
|
+
if os.path.exists(temp_file_path):
|
186
|
+
os.remove(temp_file_path)
|
187
|
+
|
188
|
+
ffmpeg.trim_audio(path, beginning_offset - 5, end_offset, temp_file_path, True, 0, 0)
|
189
|
+
|
190
|
+
root = tk.Tk()
|
191
|
+
root.protocol("WM_DELETE_WINDOW", lambda: exit(1)) # Exit when the window is closed
|
192
|
+
app = AudioOffsetGUI(root, audio_file_path=temp_file_path)
|
193
|
+
root.mainloop()
|
194
|
+
|
195
|
+
|
196
|
+
if __name__ == "__main__":
|
197
|
+
import argparse
|
198
|
+
|
199
|
+
parser = argparse.ArgumentParser(description="Run Audio Offset GUI")
|
200
|
+
parser.add_argument("--path", type=str, required=True, help="Path to the audio file")
|
201
|
+
parser.add_argument("--beginning_offset", type=float, default=0, help="Beginning offset in seconds")
|
202
|
+
parser.add_argument("--end_offset", type=float, default=None, help="End offset in seconds")
|
203
|
+
|
204
|
+
args = parser.parse_args()
|
205
|
+
run_audio_offset_gui(path=args.path, beginning_offset=args.beginning_offset, end_offset=args.end_offset)
|