GameSentenceMiner 2.9.29__py3-none-any.whl → 2.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/anki.py +9 -9
- GameSentenceMiner/config_gui.py +826 -628
- GameSentenceMiner/gametext.py +5 -2
- GameSentenceMiner/gsm.py +10 -10
- GameSentenceMiner/ocr/gsm_ocr_config.py +16 -0
- GameSentenceMiner/ocr/owocr_area_selector.py +2 -0
- GameSentenceMiner/ocr/owocr_helper.py +18 -33
- GameSentenceMiner/ocr/ss_picker.py +17 -1
- GameSentenceMiner/util/audio_offset_selector.py +205 -0
- GameSentenceMiner/util/configuration.py +45 -16
- GameSentenceMiner/util/ffmpeg.py +23 -95
- GameSentenceMiner/util/gsm_utils.py +56 -1
- GameSentenceMiner/util/text_log.py +2 -2
- GameSentenceMiner/vad.py +3 -14
- GameSentenceMiner/web/texthooking_page.py +2 -2
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/METADATA +4 -2
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/RECORD +21 -20
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.9.29.dist-info → gamesentenceminer-2.10.0.dist-info}/top_level.txt +0 -0
GameSentenceMiner/gametext.py
CHANGED
@@ -14,11 +14,13 @@ current_line = ''
|
|
14
14
|
current_line_after_regex = ''
|
15
15
|
current_line_time = datetime.now()
|
16
16
|
|
17
|
+
last_clipboard = ''
|
18
|
+
|
17
19
|
reconnecting = False
|
18
20
|
websocket_connected = {}
|
19
21
|
|
20
22
|
async def monitor_clipboard():
|
21
|
-
global current_line
|
23
|
+
global current_line, last_clipboard
|
22
24
|
current_line = pyperclip.paste()
|
23
25
|
send_message_on_resume = False
|
24
26
|
while True:
|
@@ -37,7 +39,8 @@ async def monitor_clipboard():
|
|
37
39
|
gsm_status.clipboard_enabled = True
|
38
40
|
current_clipboard = pyperclip.paste()
|
39
41
|
|
40
|
-
if current_clipboard and current_clipboard != current_line:
|
42
|
+
if current_clipboard and current_clipboard != current_line and current_clipboard != last_clipboard:
|
43
|
+
last_clipboard = current_clipboard
|
41
44
|
await handle_new_text_event(current_clipboard)
|
42
45
|
|
43
46
|
await asyncio.sleep(0.05)
|
GameSentenceMiner/gsm.py
CHANGED
@@ -3,7 +3,7 @@ import subprocess
|
|
3
3
|
import sys
|
4
4
|
|
5
5
|
import os
|
6
|
-
|
6
|
+
import warnings
|
7
7
|
|
8
8
|
os.environ.pop('TCL_LIBRARY', None)
|
9
9
|
|
@@ -60,7 +60,7 @@ obs_paused = False
|
|
60
60
|
icon: Icon
|
61
61
|
menu: Menu
|
62
62
|
root = None
|
63
|
-
|
63
|
+
warnings.simplefilter("ignore", DeprecationWarning)
|
64
64
|
|
65
65
|
|
66
66
|
class VideoToAudioHandler(FileSystemEventHandler):
|
@@ -78,6 +78,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
78
78
|
|
79
79
|
def process_replay(self, video_path):
|
80
80
|
vad_trimmed_audio = ''
|
81
|
+
final_audio_output = ''
|
81
82
|
skip_delete = False
|
82
83
|
gsm_state.previous_replay = video_path
|
83
84
|
if gsm_state.line_for_audio or gsm_state.line_for_screenshot:
|
@@ -85,7 +86,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
85
86
|
return
|
86
87
|
try:
|
87
88
|
if anki.card_queue and len(anki.card_queue) > 0:
|
88
|
-
last_note, anki_card_creation_time = anki.card_queue.pop(0)
|
89
|
+
last_note, anki_card_creation_time, selected_lines = anki.card_queue.pop(0)
|
89
90
|
else:
|
90
91
|
logger.info("Replay buffer initiated externally. Skipping processing.")
|
91
92
|
skip_delete = True
|
@@ -115,9 +116,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
115
116
|
if mined_line.next:
|
116
117
|
line_cutoff = mined_line.next.time
|
117
118
|
|
118
|
-
selected_lines
|
119
|
-
if texthooking_page.are_lines_selected():
|
120
|
-
selected_lines = texthooking_page.get_selected_lines()
|
119
|
+
if selected_lines:
|
121
120
|
start_line = selected_lines[0]
|
122
121
|
mined_line = get_mined_line(last_note, selected_lines)
|
123
122
|
line_cutoff = selected_lines[-1].get_next_time()
|
@@ -126,7 +125,6 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
126
125
|
logger.debug(last_note.to_json())
|
127
126
|
note = anki.get_initial_card_info(last_note, selected_lines)
|
128
127
|
tango = last_note.get_field(get_config().anki.word_field) if last_note else ''
|
129
|
-
texthooking_page.reset_checked_lines()
|
130
128
|
|
131
129
|
if get_config().anki.sentence_audio_field and get_config().audio.enabled:
|
132
130
|
logger.debug("Attempting to get audio from video")
|
@@ -138,7 +136,7 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
138
136
|
mined_line=mined_line)
|
139
137
|
else:
|
140
138
|
final_audio_output = ""
|
141
|
-
vad_result = VADResult(
|
139
|
+
vad_result = VADResult(True, 0, 0, '')
|
142
140
|
vad_trimmed_audio = ""
|
143
141
|
if not get_config().audio.enabled:
|
144
142
|
logger.info("Audio is disabled in config, skipping audio processing!")
|
@@ -177,6 +175,8 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
177
175
|
os.remove(video_path)
|
178
176
|
if vad_trimmed_audio and get_config().paths.remove_audio and os.path.exists(vad_trimmed_audio):
|
179
177
|
os.remove(vad_trimmed_audio)
|
178
|
+
if final_audio_output and get_config().paths.remove_audio and os.path.exists(final_audio_output):
|
179
|
+
os.remove(final_audio_output)
|
180
180
|
|
181
181
|
@staticmethod
|
182
182
|
def get_audio(game_line, next_line_time, video_path, anki_card_creation_time=None, temporary=False, timing_only=False, mined_line=None):
|
@@ -191,9 +191,9 @@ class VideoToAudioHandler(FileSystemEventHandler):
|
|
191
191
|
vad_result = vad_processor.trim_audio_with_vad(trimmed_audio, vad_trimmed_audio, game_line)
|
192
192
|
if timing_only:
|
193
193
|
return vad_result
|
194
|
-
if get_config().audio.
|
194
|
+
if get_config().audio.ffmpeg_reencode_options_to_use and os.path.exists(vad_trimmed_audio):
|
195
195
|
ffmpeg.reencode_file_with_user_config(vad_trimmed_audio, final_audio_output,
|
196
|
-
get_config().audio.
|
196
|
+
get_config().audio.ffmpeg_reencode_options_to_use)
|
197
197
|
elif os.path.exists(vad_trimmed_audio):
|
198
198
|
shutil.move(vad_trimmed_audio, final_audio_output)
|
199
199
|
return final_audio_output, vad_result, vad_trimmed_audio
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import ctypes
|
2
|
+
from copy import deepcopy
|
2
3
|
from dataclasses import dataclass
|
3
4
|
from math import floor, ceil
|
4
5
|
|
@@ -43,12 +44,14 @@ class WindowGeometry:
|
|
43
44
|
class OCRConfig:
|
44
45
|
scene: str
|
45
46
|
rectangles: List[Rectangle]
|
47
|
+
pre_scale_rectangles: List[Rectangle] = None
|
46
48
|
coordinate_system: str = None
|
47
49
|
window_geometry: Optional[WindowGeometry] = None
|
48
50
|
window: Optional[str] = None
|
49
51
|
language: str = "ja"
|
50
52
|
|
51
53
|
def __post_init__(self):
|
54
|
+
self.pre_scale_rectangles = deepcopy(self.rectangles)
|
52
55
|
if self.coordinate_system and self.coordinate_system == "percentage" and self.window:
|
53
56
|
import pygetwindow as gw
|
54
57
|
try:
|
@@ -71,6 +74,19 @@ class OCRConfig:
|
|
71
74
|
ceil(rectangle.coordinates[3] * self.window_geometry.height),
|
72
75
|
]
|
73
76
|
|
77
|
+
def scale_to_custom_size(self, width, height):
|
78
|
+
print(self.pre_scale_rectangles)
|
79
|
+
self.rectangles = self.pre_scale_rectangles.copy()
|
80
|
+
if self.coordinate_system and self.coordinate_system == "percentage":
|
81
|
+
for rectangle in self.rectangles:
|
82
|
+
rectangle.coordinates = [
|
83
|
+
floor(rectangle.coordinates[0] * width),
|
84
|
+
floor(rectangle.coordinates[1] * height),
|
85
|
+
floor(rectangle.coordinates[2] * width),
|
86
|
+
floor(rectangle.coordinates[3] * height),
|
87
|
+
]
|
88
|
+
|
89
|
+
|
74
90
|
def get_window(title):
|
75
91
|
import pygetwindow as gw
|
76
92
|
windows = gw.getWindowsWithTitle(title)
|
@@ -731,6 +731,8 @@ class ScreenSelector:
|
|
731
731
|
window.bind('<Button-3>', on_right_click) # Right click delete
|
732
732
|
window.bind("<m>", toggle_image_mode) # Toggle image mode (alpha)
|
733
733
|
|
734
|
+
canvas.focus_set()
|
735
|
+
|
734
736
|
def start(self):
|
735
737
|
"""Initializes the Tkinter root and creates windows for each monitor."""
|
736
738
|
self.root = tk.Tk()
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import asyncio
|
2
|
+
import io
|
2
3
|
import json
|
3
4
|
import logging
|
4
5
|
import os
|
@@ -11,7 +12,9 @@ from pathlib import Path
|
|
11
12
|
from tkinter import messagebox
|
12
13
|
|
13
14
|
import mss
|
15
|
+
import mss.tools
|
14
16
|
import websockets
|
17
|
+
from PIL import Image
|
15
18
|
from rapidfuzz import fuzz
|
16
19
|
|
17
20
|
from GameSentenceMiner import obs
|
@@ -44,35 +47,6 @@ console_handler.setFormatter(formatter)
|
|
44
47
|
logger.addHandler(console_handler)
|
45
48
|
|
46
49
|
|
47
|
-
def get_new_game_cords():
|
48
|
-
"""Allows multiple coordinate selections."""
|
49
|
-
coords_list = []
|
50
|
-
with mss.mss() as sct:
|
51
|
-
monitors = sct.monitors
|
52
|
-
monitor_map = {i: mon for i, mon in enumerate(monitors)}
|
53
|
-
while True:
|
54
|
-
selected_monitor_index, cords = screen_coordinate_picker.get_screen_selection_with_monitor(monitor_map)
|
55
|
-
selected_monitor = monitor_map[selected_monitor_index]
|
56
|
-
coords_list.append({"monitor": {"left": selected_monitor["left"], "top": selected_monitor["top"],
|
57
|
-
"width": selected_monitor["width"], "height": selected_monitor["height"],
|
58
|
-
"index": selected_monitor_index}, "coordinates": cords,
|
59
|
-
"is_excluded": False})
|
60
|
-
if messagebox.askyesno("Add Another Region", "Do you want to add another region?"):
|
61
|
-
continue
|
62
|
-
else:
|
63
|
-
break
|
64
|
-
app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
|
65
|
-
ocr_config_dir = app_dir / "ocr_config"
|
66
|
-
ocr_config_dir.mkdir(parents=True, exist_ok=True)
|
67
|
-
obs.connect_to_obs_sync()
|
68
|
-
scene = sanitize_filename(obs.get_current_scene())
|
69
|
-
config_path = ocr_config_dir / f"{scene}.json"
|
70
|
-
with open(config_path, 'w') as f:
|
71
|
-
json.dump({"scene": scene, "window": None, "rectangles": coords_list}, f, indent=4)
|
72
|
-
print(f"Saved OCR config to {config_path}")
|
73
|
-
return coords_list
|
74
|
-
|
75
|
-
|
76
50
|
def get_ocr_config(window=None) -> OCRConfig:
|
77
51
|
"""Loads and updates screen capture areas from the corresponding JSON file."""
|
78
52
|
app_dir = Path.home() / "AppData" / "Roaming" / "GameSentenceMiner"
|
@@ -213,11 +187,11 @@ def do_second_ocr(ocr1_text, time, img, filtering):
|
|
213
187
|
try:
|
214
188
|
orig_text, text = run.process_and_write_results(img, None, last_ocr2_result, filtering, None,
|
215
189
|
engine=ocr2, furigana_filter_sensitivity=furigana_filter_sensitivity)
|
216
|
-
if fuzz.ratio(last_ocr2_result,
|
190
|
+
if fuzz.ratio(last_ocr2_result, orig_text) >= 90:
|
217
191
|
logger.info("Seems like the same text from previous ocr2 result, not sending")
|
218
192
|
return
|
219
193
|
save_result_image(img)
|
220
|
-
last_ocr2_result =
|
194
|
+
last_ocr2_result = orig_text
|
221
195
|
asyncio.run(send_result(text, time))
|
222
196
|
except json.JSONDecodeError:
|
223
197
|
print("Invalid JSON received.")
|
@@ -238,7 +212,7 @@ def save_result_image(img):
|
|
238
212
|
async def send_result(text, time):
|
239
213
|
if text:
|
240
214
|
text = do_text_replacements(text, OCR_REPLACEMENTS_FILE)
|
241
|
-
if
|
215
|
+
if clipboard_output:
|
242
216
|
import pyperclip
|
243
217
|
pyperclip.copy(text)
|
244
218
|
try:
|
@@ -365,14 +339,23 @@ def run_oneocr(ocr_config: OCRConfig, rectangles):
|
|
365
339
|
|
366
340
|
def add_ss_hotkey(ss_hotkey="ctrl+shift+g"):
|
367
341
|
import keyboard
|
368
|
-
|
342
|
+
secret_ss_hotkey = "F15"
|
369
343
|
filtering = TextFiltering(lang=language)
|
344
|
+
cropper = ScreenCropper()
|
370
345
|
def capture():
|
371
346
|
print("Taking screenshot...")
|
372
347
|
img = cropper.run()
|
373
348
|
do_second_ocr("", datetime.now(), img, filtering)
|
349
|
+
def capture_main_monitor():
|
350
|
+
print("Taking screenshot of main monitor...")
|
351
|
+
with mss.mss() as sct:
|
352
|
+
main_monitor = sct.monitors[1] if len(sct.monitors) > 1 else sct.monitors[0]
|
353
|
+
img = sct.grab(main_monitor)
|
354
|
+
img_bytes = mss.tools.to_png(img.rgb, img.size)
|
355
|
+
do_second_ocr("", datetime.now(), img_bytes, filtering)
|
374
356
|
try:
|
375
357
|
keyboard.add_hotkey(ss_hotkey, capture)
|
358
|
+
keyboard.add_hotkey(secret_ss_hotkey, capture_main_monitor)
|
376
359
|
print(f"Press {ss_hotkey} to take a screenshot.")
|
377
360
|
except Exception as e:
|
378
361
|
logger.error(f"Error setting up screenshot hotkey with keyboard, Attempting Backup: {e}")
|
@@ -414,6 +397,7 @@ if __name__ == "__main__":
|
|
414
397
|
parser.add_argument("--twopassocr", type=int, choices=[0, 1], default=1, help="Enable two-pass OCR (default: 1)")
|
415
398
|
parser.add_argument("--manual", action="store_true", help="Use screenshot-only mode")
|
416
399
|
parser.add_argument("--clipboard", action="store_true", help="Use clipboard for input")
|
400
|
+
parser.add_argument("--clipboard-output", action="store_true", default=False, help="Use clipboard for output")
|
417
401
|
parser.add_argument("--window", type=str, help="Specify the window name for OCR")
|
418
402
|
parser.add_argument("--furigana_filter_sensitivity", type=float, default=0, help="Furigana Filter Sensitivity for OCR (default: 0)")
|
419
403
|
parser.add_argument("--manual_ocr_hotkey", type=str, default=None, help="Hotkey for manual OCR (default: None)")
|
@@ -431,6 +415,7 @@ if __name__ == "__main__":
|
|
431
415
|
furigana_filter_sensitivity = args.furigana_filter_sensitivity
|
432
416
|
ss_hotkey = args.area_select_ocr_hotkey.lower()
|
433
417
|
manual_ocr_hotkey = args.manual_ocr_hotkey.lower().replace("ctrl", "<ctrl>").replace("shift", "<shift>").replace("alt", "<alt>") if args.manual_ocr_hotkey else None
|
418
|
+
clipboard_output = args.clipboard_output
|
434
419
|
|
435
420
|
logger.info(f"Received arguments: {vars(args)}")
|
436
421
|
# set_force_stable_hotkey()
|
@@ -7,6 +7,7 @@ import io
|
|
7
7
|
|
8
8
|
class ScreenCropper:
|
9
9
|
def __init__(self):
|
10
|
+
self.main_monitor = None
|
10
11
|
self.root = None
|
11
12
|
self.canvas = None
|
12
13
|
self.captured_image = None
|
@@ -23,6 +24,7 @@ class ScreenCropper:
|
|
23
24
|
try:
|
24
25
|
with mss.mss() as sct:
|
25
26
|
all_monitors_bbox = sct.monitors[0]
|
27
|
+
self.main_monitor = sct.monitors[1]
|
26
28
|
self.monitor_geometry = {
|
27
29
|
'left': all_monitors_bbox['left'],
|
28
30
|
'top': all_monitors_bbox['top'],
|
@@ -74,6 +76,15 @@ class ScreenCropper:
|
|
74
76
|
|
75
77
|
self.root.destroy()
|
76
78
|
|
79
|
+
def _on_enter(self, event):
|
80
|
+
print(event)
|
81
|
+
print("Enter key pressed, grabbing main monitor area.")
|
82
|
+
self.cropped_image = self.captured_image.crop((self.main_monitor['left'], self.main_monitor['top'],
|
83
|
+
self.main_monitor['left'] + self.main_monitor['width'],
|
84
|
+
self.main_monitor['top'] + self.main_monitor['height']))
|
85
|
+
self.root.destroy()
|
86
|
+
|
87
|
+
|
77
88
|
def show_image_and_select_box(self):
|
78
89
|
if self.captured_image is None or self.monitor_geometry is None:
|
79
90
|
print("No image or monitor geometry to display. Capture all monitors first.")
|
@@ -101,13 +112,18 @@ class ScreenCropper:
|
|
101
112
|
self.canvas.bind("<B1-Motion>", self._on_mouse_drag)
|
102
113
|
self.canvas.bind("<ButtonRelease-1>", self._on_button_release)
|
103
114
|
|
115
|
+
|
104
116
|
self.root.mainloop()
|
105
117
|
|
106
118
|
def get_cropped_image(self):
|
107
119
|
return self.cropped_image
|
108
120
|
|
109
|
-
def run(self):
|
121
|
+
def run(self, return_main_monitor=False):
|
110
122
|
self.grab_all_monitors()
|
123
|
+
if return_main_monitor and self.captured_image:
|
124
|
+
return self.captured_image.crop((self.main_monitor['left'], self.main_monitor['top'],
|
125
|
+
self.main_monitor['left'] + self.main_monitor['width'],
|
126
|
+
self.main_monitor['top'] + self.main_monitor['height']))
|
111
127
|
if self.captured_image and self.monitor_geometry:
|
112
128
|
self.show_image_and_select_box()
|
113
129
|
return self.get_cropped_image()
|
@@ -0,0 +1,205 @@
|
|
1
|
+
import os
|
2
|
+
import tkinter as tk
|
3
|
+
from tkinter import filedialog, messagebox
|
4
|
+
import soundfile as sf
|
5
|
+
import numpy as np
|
6
|
+
import matplotlib.pyplot as plt
|
7
|
+
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
8
|
+
import sounddevice as sd
|
9
|
+
|
10
|
+
from GameSentenceMiner.util import ffmpeg
|
11
|
+
|
12
|
+
|
13
|
+
class AudioOffsetGUI:
|
14
|
+
|
15
|
+
def __init__(self, master, audio_file_path=None):
|
16
|
+
self.master = master
|
17
|
+
master.title("Audio Offset Adjuster")
|
18
|
+
master.geometry("1000x700")
|
19
|
+
|
20
|
+
master.tk_setPalette(background='#2E2E2E', foreground='white',
|
21
|
+
activeBackground='#4F4F4F', activeForeground='white')
|
22
|
+
|
23
|
+
self.audio_data = None
|
24
|
+
self.samplerate = None
|
25
|
+
self.duration = 0.0
|
26
|
+
|
27
|
+
self.fig, self.ax = plt.subplots(figsize=(10, 4))
|
28
|
+
self.canvas = FigureCanvasTkAgg(self.fig, master=master)
|
29
|
+
self.canvas_widget = self.canvas.get_tk_widget()
|
30
|
+
self.canvas_widget.pack(side=tk.TOP, fill=tk.BOTH, expand=True, padx=10, pady=10)
|
31
|
+
|
32
|
+
plt.style.use('dark_background')
|
33
|
+
self.fig.set_facecolor('#2E2E2E')
|
34
|
+
self.ax.set_facecolor('#2E2E2E')
|
35
|
+
self.ax.tick_params(axis='x', colors='white')
|
36
|
+
self.ax.tick_params(axis='y', colors='white')
|
37
|
+
self.ax.spines['bottom'].set_color('white')
|
38
|
+
self.ax.spines['left'].set_color('white')
|
39
|
+
self.ax.spines['top'].set_color('white')
|
40
|
+
self.ax.spines['right'].set_color('white')
|
41
|
+
self.ax.set_xlabel("Time (s)", color='white')
|
42
|
+
self.ax.set_ylabel("Amplitude", color='white')
|
43
|
+
|
44
|
+
self.beg_offset_line = None
|
45
|
+
# self.end_offset_line is removed as there's no end slider
|
46
|
+
|
47
|
+
self.create_widgets()
|
48
|
+
|
49
|
+
self.load_audio(audio_file_path)
|
50
|
+
|
51
|
+
|
52
|
+
def create_widgets(self):
|
53
|
+
control_frame = tk.Frame(self.master, bg='#2E2E2E')
|
54
|
+
control_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=10, padx=10)
|
55
|
+
|
56
|
+
self.play_button = tk.Button(control_frame, text="Play/Pause Segment", command=self.play_segment, bg='#4F4F4F', fg='white')
|
57
|
+
self.play_button.pack(side=tk.RIGHT, padx=5)
|
58
|
+
|
59
|
+
self.output_button = tk.Button(control_frame, text="Get Offset", command=self.get_offsets, bg='#4F4F4F', fg='white')
|
60
|
+
self.output_button.pack(side=tk.RIGHT, padx=5)
|
61
|
+
|
62
|
+
self.beg_offset_label = tk.Label(control_frame, text="Beginning Offset: 0.00s", bg='#2E2E2E', fg='white')
|
63
|
+
self.beg_offset_label.pack(side=tk.LEFT, padx=10)
|
64
|
+
|
65
|
+
self.end_offset_label = tk.Label(control_frame, text="End Offset: Full Duration", bg='#2E2E2E', fg='white')
|
66
|
+
self.end_offset_label.pack(side=tk.LEFT, padx=10)
|
67
|
+
|
68
|
+
slider_frame = tk.Frame(self.master, bg='#2E2E2E')
|
69
|
+
slider_frame.pack(side=tk.BOTTOM, fill=tk.X, pady=5, padx=10)
|
70
|
+
|
71
|
+
beg_slider_label = tk.Label(slider_frame, text="Start Trim:", bg='#2E2E2E', fg='white')
|
72
|
+
beg_slider_label.pack(side=tk.LEFT)
|
73
|
+
self.beg_slider = tk.Scale(slider_frame, from_=0, to=100, orient=tk.HORIZONTAL, resolution=0.5,
|
74
|
+
command=self.on_slider_change, bg='#2E2E2E', fg='white', troughcolor='#4F4F4F',
|
75
|
+
highlightbackground='#2E2E2E', length=300)
|
76
|
+
self.beg_slider.pack(side=tk.LEFT, expand=True, fill=tk.X, padx=5)
|
77
|
+
|
78
|
+
# Removed end_slider and its associated label
|
79
|
+
|
80
|
+
def load_audio(self, file_path):
|
81
|
+
if file_path:
|
82
|
+
try:
|
83
|
+
self.audio_data, self.samplerate = sf.read(file_path)
|
84
|
+
if self.audio_data.ndim > 1:
|
85
|
+
self.audio_data = self.audio_data[:, 0]
|
86
|
+
self.duration = len(self.audio_data) / self.samplerate
|
87
|
+
self.plot_waveform()
|
88
|
+
self.beg_slider.config(to=self.duration)
|
89
|
+
self.beg_slider.set(0) # Reset start slider to 0
|
90
|
+
except Exception as e:
|
91
|
+
messagebox.showerror("Error", f"Failed to load audio file: {e}")
|
92
|
+
self.audio_data = None
|
93
|
+
self.samplerate = None
|
94
|
+
self.duration = 0.0
|
95
|
+
|
96
|
+
def plot_waveform(self):
|
97
|
+
self.ax.clear()
|
98
|
+
if self.audio_data is not None:
|
99
|
+
time = np.linspace(0, self.duration, len(self.audio_data))
|
100
|
+
self.ax.plot(time, self.audio_data, color='#1E90FF')
|
101
|
+
self.ax.set_xlim(0, self.duration)
|
102
|
+
self.ax.set_ylim(np.min(self.audio_data), np.max(self.audio_data))
|
103
|
+
self.ax.set_title("Audio", color='white')
|
104
|
+
|
105
|
+
if self.beg_offset_line:
|
106
|
+
self.beg_offset_line.remove()
|
107
|
+
# self.end_offset_line.remove() is removed
|
108
|
+
|
109
|
+
self.beg_offset_line = self.ax.axvline(self.beg_slider.get(), color='red', linestyle='--', linewidth=2)
|
110
|
+
# The end line is now always at the duration
|
111
|
+
self.ax.axvline(self.duration, color='green', linestyle='--', linewidth=2)
|
112
|
+
|
113
|
+
self.update_offset_labels()
|
114
|
+
else:
|
115
|
+
self.ax.text(0.5, 0.5, "No audio loaded",
|
116
|
+
horizontalalignment='center', verticalalignment='center',
|
117
|
+
transform=self.ax.transAxes, color='white', fontsize=16)
|
118
|
+
|
119
|
+
self.fig.canvas.draw_idle()
|
120
|
+
|
121
|
+
def on_slider_change(self, val):
|
122
|
+
if self.audio_data is None:
|
123
|
+
return
|
124
|
+
|
125
|
+
beg_val = float(self.beg_slider.get())
|
126
|
+
|
127
|
+
if self.beg_offset_line:
|
128
|
+
self.beg_offset_line.set_xdata([beg_val])
|
129
|
+
|
130
|
+
self.update_offset_labels()
|
131
|
+
self.fig.canvas.draw_idle()
|
132
|
+
|
133
|
+
def play_segment(self):
|
134
|
+
if self.audio_data is None:
|
135
|
+
messagebox.showinfo("Play Audio", "No audio file loaded yet.")
|
136
|
+
return
|
137
|
+
|
138
|
+
if hasattr(self, 'is_playing') and self.is_playing:
|
139
|
+
sd.stop()
|
140
|
+
self.is_playing = False
|
141
|
+
return
|
142
|
+
|
143
|
+
beg_offset = self.beg_slider.get()
|
144
|
+
end_offset = self.duration # End offset is now always full duration
|
145
|
+
|
146
|
+
if beg_offset >= end_offset:
|
147
|
+
messagebox.showwarning("Play Audio", "Start offset must be less than end offset.")
|
148
|
+
return
|
149
|
+
|
150
|
+
start_frame = int(beg_offset * self.samplerate)
|
151
|
+
end_frame = int(end_offset * self.samplerate)
|
152
|
+
|
153
|
+
if start_frame >= len(self.audio_data) or end_frame <= 0:
|
154
|
+
messagebox.showwarning("Play Audio", "Selected segment is out of audio range.")
|
155
|
+
return
|
156
|
+
|
157
|
+
segment_to_play = self.audio_data[start_frame:end_frame]
|
158
|
+
|
159
|
+
try:
|
160
|
+
self.is_playing = True
|
161
|
+
sd.play(segment_to_play, self.samplerate)
|
162
|
+
except Exception as e:
|
163
|
+
self.is_playing = False
|
164
|
+
messagebox.showerror("Audio Playback Error", f"Failed to play audio: {e}")
|
165
|
+
|
166
|
+
def update_offset_labels(self):
|
167
|
+
if self.beg_offset_line: # We no longer have an end_offset_line object
|
168
|
+
beg_val = self.beg_offset_line.get_xdata()[0] - 5.0 # Adjusting for the 5 seconds offset
|
169
|
+
self.beg_offset_label.config(text=f"Beginning Offset: {beg_val:.2f}s")
|
170
|
+
|
171
|
+
def get_offsets(self):
|
172
|
+
if self.audio_data is None:
|
173
|
+
messagebox.showinfo("Offsets", "No audio file loaded yet.")
|
174
|
+
return
|
175
|
+
|
176
|
+
beg_offset = self.beg_slider.get() - 5.0
|
177
|
+
end_offset = self.duration # End offset is always full duration
|
178
|
+
|
179
|
+
print(f"{beg_offset:.2f}")
|
180
|
+
exit(0)
|
181
|
+
|
182
|
+
def run_audio_offset_gui(path=None, beginning_offset=0, end_offset=None):
|
183
|
+
temp_file_path = os.path.join(os.path.dirname(path), "temp_audio.opus")
|
184
|
+
|
185
|
+
if os.path.exists(temp_file_path):
|
186
|
+
os.remove(temp_file_path)
|
187
|
+
|
188
|
+
ffmpeg.trim_audio(path, beginning_offset - 5, end_offset, temp_file_path, True, 0, 0)
|
189
|
+
|
190
|
+
root = tk.Tk()
|
191
|
+
root.protocol("WM_DELETE_WINDOW", lambda: exit(1)) # Exit when the window is closed
|
192
|
+
app = AudioOffsetGUI(root, audio_file_path=temp_file_path)
|
193
|
+
root.mainloop()
|
194
|
+
|
195
|
+
|
196
|
+
if __name__ == "__main__":
|
197
|
+
import argparse
|
198
|
+
|
199
|
+
parser = argparse.ArgumentParser(description="Run Audio Offset GUI")
|
200
|
+
parser.add_argument("--path", type=str, required=True, help="Path to the audio file")
|
201
|
+
parser.add_argument("--beginning_offset", type=float, default=0, help="Beginning offset in seconds")
|
202
|
+
parser.add_argument("--end_offset", type=float, default=None, help="End offset in seconds")
|
203
|
+
|
204
|
+
args = parser.parse_args()
|
205
|
+
run_audio_offset_gui(path=args.path, beginning_offset=args.beginning_offset, end_offset=args.end_offset)
|