mkv-episode-matcher 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mkv_episode_matcher/episode_identification.py +118 -176
- mkv_episode_matcher/episode_matcher.py +16 -34
- mkv_episode_matcher/utils.py +7 -10
- {mkv_episode_matcher-0.3.2.dist-info → mkv_episode_matcher-0.3.3.dist-info}/METADATA +1 -1
- {mkv_episode_matcher-0.3.2.dist-info → mkv_episode_matcher-0.3.3.dist-info}/RECORD +8 -8
- {mkv_episode_matcher-0.3.2.dist-info → mkv_episode_matcher-0.3.3.dist-info}/WHEEL +0 -0
- {mkv_episode_matcher-0.3.2.dist-info → mkv_episode_matcher-0.3.3.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.3.2.dist-info → mkv_episode_matcher-0.3.3.dist-info}/top_level.txt +0 -0
|
@@ -1,208 +1,150 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import json
|
|
3
2
|
import os
|
|
4
|
-
import
|
|
3
|
+
import subprocess
|
|
4
|
+
import tempfile
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
import torch
|
|
6
7
|
from rapidfuzz import fuzz
|
|
7
|
-
from collections import defaultdict
|
|
8
|
-
import re
|
|
9
8
|
from loguru import logger
|
|
10
|
-
import
|
|
11
|
-
import
|
|
12
|
-
|
|
9
|
+
import whisper
|
|
10
|
+
import numpy as np
|
|
11
|
+
import re
|
|
13
12
|
class EpisodeMatcher:
|
|
14
|
-
def __init__(self, cache_dir, show_name,min_confidence=0.6):
|
|
13
|
+
def __init__(self, cache_dir, show_name, min_confidence=0.6):
|
|
15
14
|
self.cache_dir = Path(cache_dir)
|
|
16
15
|
self.min_confidence = min_confidence
|
|
17
|
-
self.
|
|
18
|
-
self.
|
|
16
|
+
self.show_name = show_name
|
|
17
|
+
self.chunk_duration = 300 # 5 minutes
|
|
18
|
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
19
|
+
self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
|
|
20
|
+
self.temp_dir.mkdir(exist_ok=True)
|
|
19
21
|
|
|
20
22
|
def clean_text(self, text):
|
|
21
|
-
|
|
22
|
-
# Remove stage directions like [groans] and <i>SHIP:</i>
|
|
23
|
+
text = text.lower().strip()
|
|
23
24
|
text = re.sub(r'\[.*?\]|\<.*?\>', '', text)
|
|
24
|
-
# Remove repeated words with dashes (e.g., "Y-y-you" -> "you")
|
|
25
25
|
text = re.sub(r'([A-Za-z])-\1+', r'\1', text)
|
|
26
|
-
|
|
27
|
-
text = ' '.join(text.split())
|
|
28
|
-
return text.lower()
|
|
26
|
+
return ' '.join(text.split())
|
|
29
27
|
|
|
30
28
|
def chunk_score(self, whisper_chunk, ref_chunk):
|
|
31
|
-
"""Calculate fuzzy match score between two chunks of text."""
|
|
32
29
|
whisper_clean = self.clean_text(whisper_chunk)
|
|
33
30
|
ref_clean = self.clean_text(ref_chunk)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
token_sort = fuzz.token_sort_ratio(whisper_clean, ref_clean)
|
|
37
|
-
# Use partial ratio to catch substring matches
|
|
38
|
-
partial = fuzz.partial_ratio(whisper_clean, ref_clean)
|
|
39
|
-
|
|
40
|
-
# Weight token sort more heavily but consider partial matches
|
|
41
|
-
return (token_sort * 0.7 + partial * 0.3) / 100.0
|
|
31
|
+
return (fuzz.token_sort_ratio(whisper_clean, ref_clean) * 0.7 +
|
|
32
|
+
fuzz.partial_ratio(whisper_clean, ref_clean) * 0.3) / 100.0
|
|
42
33
|
|
|
43
|
-
def
|
|
44
|
-
"""
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
34
|
+
def extract_audio_chunk(self, mkv_file, start_time):
|
|
35
|
+
"""Extract a chunk of audio from MKV file."""
|
|
36
|
+
chunk_path = self.temp_dir / f"chunk_{start_time}.wav"
|
|
37
|
+
if not chunk_path.exists():
|
|
38
|
+
cmd = [
|
|
39
|
+
'ffmpeg',
|
|
40
|
+
'-ss', str(start_time),
|
|
41
|
+
'-t', str(self.chunk_duration),
|
|
42
|
+
'-i', mkv_file,
|
|
43
|
+
'-vn',
|
|
44
|
+
'-acodec', 'pcm_s16le',
|
|
45
|
+
'-ar', '16000',
|
|
46
|
+
'-ac', '1',
|
|
47
|
+
str(chunk_path)
|
|
48
|
+
]
|
|
49
|
+
subprocess.run(cmd, capture_output=True)
|
|
50
|
+
return str(chunk_path)
|
|
57
51
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
# Match against reference files
|
|
65
|
-
match = self.match_all_references(reference_dir)
|
|
66
|
-
|
|
67
|
-
if match and match['confidence'] >= self.min_confidence:
|
|
68
|
-
# Extract season and episode from filename
|
|
69
|
-
match_file = Path(match['file'])
|
|
70
|
-
season_ep = re.search(r'S(\d+)E(\d+)', match_file.stem)
|
|
71
|
-
if season_ep:
|
|
72
|
-
season, episode = map(int, season_ep.groups())
|
|
73
|
-
return {
|
|
74
|
-
'season': season,
|
|
75
|
-
'episode': episode,
|
|
76
|
-
'confidence': match['confidence'],
|
|
77
|
-
'reference_file': str(match_file),
|
|
78
|
-
'chunk_scores': match['chunk_scores']
|
|
79
|
-
}
|
|
52
|
+
def load_reference_chunk(self, srt_file, chunk_idx):
|
|
53
|
+
"""Load reference subtitles for a specific time chunk."""
|
|
54
|
+
chunk_start = chunk_idx * self.chunk_duration
|
|
55
|
+
chunk_end = chunk_start + self.chunk_duration
|
|
56
|
+
text_lines = []
|
|
80
57
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def match_all_references(self, reference_dir):
|
|
84
|
-
"""Process all reference files and track matching scores."""
|
|
85
|
-
results = defaultdict(list)
|
|
86
|
-
best_match = None
|
|
87
|
-
best_confidence = 0
|
|
88
|
-
|
|
89
|
-
def process_chunks(ref_segments, filename):
|
|
90
|
-
nonlocal best_match, best_confidence
|
|
91
|
-
|
|
92
|
-
chunk_size = 300 # 5 minute chunks
|
|
93
|
-
whisper_chunks = defaultdict(list)
|
|
94
|
-
ref_chunks = defaultdict(list)
|
|
58
|
+
with open(srt_file, 'r', encoding='utf-8') as f:
|
|
59
|
+
content = f.read().strip()
|
|
95
60
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
61
|
+
for block in content.split('\n\n'):
|
|
62
|
+
lines = block.split('\n')
|
|
63
|
+
if len(lines) < 3 or '-->' not in lines[1]: # Skip malformed blocks
|
|
64
|
+
continue
|
|
100
65
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
# Score each chunk
|
|
106
|
-
for chunk_idx in whisper_chunks:
|
|
107
|
-
whisper_text = ' '.join(whisper_chunks[chunk_idx])
|
|
66
|
+
try:
|
|
67
|
+
timestamp = lines[1]
|
|
68
|
+
text = ' '.join(lines[2:])
|
|
108
69
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
if ref_idx in ref_chunks:
|
|
113
|
-
ref_text = ' '.join(ref_chunks[ref_idx])
|
|
114
|
-
score = self.chunk_score(whisper_text, ref_text)
|
|
115
|
-
scores.append(score)
|
|
70
|
+
end_time = timestamp.split(' --> ')[1].strip()
|
|
71
|
+
hours, minutes, seconds = map(float, end_time.replace(',','.').split(':'))
|
|
72
|
+
total_seconds = hours * 3600 + minutes * 60 + seconds
|
|
116
73
|
|
|
117
|
-
if
|
|
118
|
-
|
|
119
|
-
logger.info(f"File: {filename}, "
|
|
120
|
-
f"Time: {chunk_idx*chunk_size}-{(chunk_idx+1)*chunk_size}s, "
|
|
121
|
-
f"Confidence: {chunk_confidence:.2f}")
|
|
122
|
-
|
|
123
|
-
results[filename].append({
|
|
124
|
-
'chunk_idx': chunk_idx,
|
|
125
|
-
'confidence': chunk_confidence
|
|
126
|
-
})
|
|
74
|
+
if chunk_start <= total_seconds <= chunk_end:
|
|
75
|
+
text_lines.append(text)
|
|
127
76
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
chunk_scores = results[filename]
|
|
131
|
-
confidence = sum(c['confidence'] * (0.9 ** c['chunk_idx'])
|
|
132
|
-
for c in chunk_scores) / len(chunk_scores)
|
|
133
|
-
|
|
134
|
-
if confidence > best_confidence:
|
|
135
|
-
best_confidence = confidence
|
|
136
|
-
best_match = {
|
|
137
|
-
'file': filename,
|
|
138
|
-
'confidence': confidence,
|
|
139
|
-
'chunk_scores': chunk_scores
|
|
140
|
-
}
|
|
141
|
-
return True
|
|
142
|
-
|
|
143
|
-
return False
|
|
144
|
-
|
|
145
|
-
# Process each reference file
|
|
146
|
-
for ref_file in glob.glob(os.path.join(reference_dir, "*.srt")):
|
|
147
|
-
ref_segments = self.parse_srt_to_segments(ref_file)
|
|
148
|
-
filename = os.path.basename(ref_file)
|
|
149
|
-
|
|
150
|
-
if process_chunks(ref_segments, filename):
|
|
151
|
-
break
|
|
152
|
-
|
|
153
|
-
# If no early match found, find best overall match
|
|
154
|
-
if not best_match:
|
|
155
|
-
for filename, chunks in results.items():
|
|
156
|
-
# Weight earlier chunks more heavily
|
|
157
|
-
confidence = sum(c['confidence'] * (0.9 ** c['chunk_idx'])
|
|
158
|
-
for c in chunks) / len(chunks)
|
|
77
|
+
except (IndexError, ValueError):
|
|
78
|
+
continue
|
|
159
79
|
|
|
160
|
-
|
|
161
|
-
best_confidence = confidence
|
|
162
|
-
best_match = {
|
|
163
|
-
'file': filename,
|
|
164
|
-
'confidence': confidence,
|
|
165
|
-
'chunk_scores': chunks
|
|
166
|
-
}
|
|
167
|
-
|
|
168
|
-
return best_match
|
|
80
|
+
return ' '.join(text_lines)
|
|
169
81
|
|
|
170
|
-
def
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
82
|
+
def identify_episode(self, video_file, temp_dir, season_number):
|
|
83
|
+
try:
|
|
84
|
+
# Get video duration
|
|
85
|
+
duration = float(subprocess.check_output([
|
|
86
|
+
'ffprobe', '-v', 'error',
|
|
87
|
+
'-show_entries', 'format=duration',
|
|
88
|
+
'-of', 'default=noprint_wrappers=1:nokey=1',
|
|
89
|
+
video_file
|
|
90
|
+
]).decode())
|
|
91
|
+
|
|
92
|
+
total_chunks = int(np.ceil(duration / self.chunk_duration))
|
|
93
|
+
|
|
94
|
+
# Load Whisper model
|
|
95
|
+
model = whisper.load_model("base", device=self.device)
|
|
177
96
|
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
97
|
+
# Get season-specific reference files
|
|
98
|
+
reference_dir = self.cache_dir / "data" / self.show_name
|
|
99
|
+
season_pattern = f"S{season_number:02d}E"
|
|
100
|
+
reference_files = [
|
|
101
|
+
f for f in reference_dir.glob("*.srt")
|
|
102
|
+
if season_pattern in f.name
|
|
103
|
+
]
|
|
181
104
|
|
|
182
|
-
if
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
105
|
+
if not reference_files:
|
|
106
|
+
logger.error(f"No reference files found for season {season_number}")
|
|
107
|
+
return None
|
|
108
|
+
|
|
109
|
+
# Process chunks until match found
|
|
110
|
+
for chunk_idx in range(min(3, total_chunks)): # Only try first 3 chunks
|
|
111
|
+
start_time = chunk_idx * self.chunk_duration
|
|
112
|
+
audio_path = self.extract_audio_chunk(video_file, start_time)
|
|
113
|
+
|
|
114
|
+
# Transcribe chunk
|
|
115
|
+
result = model.transcribe(
|
|
116
|
+
audio_path,
|
|
117
|
+
task="transcribe",
|
|
118
|
+
language="en"
|
|
119
|
+
)
|
|
186
120
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
current_segment['end'] = self.timestr_to_seconds(end)
|
|
121
|
+
chunk_text = result["text"]
|
|
122
|
+
best_confidence = 0
|
|
123
|
+
best_match = None
|
|
191
124
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
current_segment['text'] = line
|
|
125
|
+
# Compare with reference chunks
|
|
126
|
+
for ref_file in reference_files:
|
|
127
|
+
ref_text = self.load_reference_chunk(ref_file, chunk_idx)
|
|
128
|
+
confidence = self.chunk_score(chunk_text, ref_text)
|
|
197
129
|
|
|
198
|
-
|
|
130
|
+
if confidence > best_confidence:
|
|
131
|
+
best_confidence = confidence
|
|
132
|
+
best_match = ref_file
|
|
133
|
+
|
|
134
|
+
if confidence > self.min_confidence:
|
|
135
|
+
season_ep = re.search(r'S(\d+)E(\d+)', best_match.stem)
|
|
136
|
+
if season_ep:
|
|
137
|
+
season, episode = map(int, season_ep.groups())
|
|
138
|
+
return {
|
|
139
|
+
'season': season,
|
|
140
|
+
'episode': episode,
|
|
141
|
+
'confidence': best_confidence,
|
|
142
|
+
'reference_file': str(best_match),
|
|
143
|
+
}
|
|
199
144
|
|
|
200
|
-
|
|
201
|
-
segments.append(current_segment)
|
|
145
|
+
return None
|
|
202
146
|
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
h, m, s = timestr.replace(',','.').split(':')
|
|
208
|
-
return float(h) * 3600 + float(m) * 60 + float(s)
|
|
147
|
+
finally:
|
|
148
|
+
# Cleanup temp files
|
|
149
|
+
for file in self.temp_dir.glob("chunk_*.wav"):
|
|
150
|
+
file.unlink()
|
|
@@ -5,7 +5,7 @@ import shutil
|
|
|
5
5
|
import glob
|
|
6
6
|
import os
|
|
7
7
|
from loguru import logger
|
|
8
|
-
|
|
8
|
+
import re
|
|
9
9
|
from mkv_episode_matcher.__main__ import CONFIG_FILE, CACHE_DIR
|
|
10
10
|
from mkv_episode_matcher.config import get_config
|
|
11
11
|
from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
|
|
@@ -17,20 +17,18 @@ from mkv_episode_matcher.utils import (
|
|
|
17
17
|
get_subtitles,
|
|
18
18
|
process_reference_srt_files,
|
|
19
19
|
process_srt_files,
|
|
20
|
-
compare_and_rename_files,get_valid_seasons
|
|
20
|
+
compare_and_rename_files,get_valid_seasons,rename_episode_file
|
|
21
21
|
)
|
|
22
22
|
from mkv_episode_matcher.speech_to_text import process_speech_to_text
|
|
23
23
|
from mkv_episode_matcher.episode_identification import EpisodeMatcher
|
|
24
24
|
|
|
25
25
|
def process_show(season=None, dry_run=False, get_subs=False):
|
|
26
|
-
"""Process the show using
|
|
26
|
+
"""Process the show using streaming speech recognition with OCR fallback."""
|
|
27
27
|
config = get_config(CONFIG_FILE)
|
|
28
28
|
show_dir = config.get("show_dir")
|
|
29
29
|
show_name = clean_text(os.path.basename(show_dir))
|
|
30
|
-
|
|
31
|
-
matcher = EpisodeMatcher(CACHE_DIR,show_name)
|
|
30
|
+
matcher = EpisodeMatcher(CACHE_DIR, show_name)
|
|
32
31
|
|
|
33
|
-
# Get valid season directories
|
|
34
32
|
season_paths = get_valid_seasons(show_dir)
|
|
35
33
|
if not season_paths:
|
|
36
34
|
logger.warning(f"No seasons with .mkv files found")
|
|
@@ -43,9 +41,7 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
43
41
|
return
|
|
44
42
|
season_paths = [season_path]
|
|
45
43
|
|
|
46
|
-
# Process each season
|
|
47
44
|
for season_path in season_paths:
|
|
48
|
-
# Get MKV files that haven't been processed
|
|
49
45
|
mkv_files = [f for f in glob.glob(os.path.join(season_path, "*.mkv"))
|
|
50
46
|
if not check_filename(f)]
|
|
51
47
|
|
|
@@ -53,66 +49,52 @@ def process_show(season=None, dry_run=False, get_subs=False):
|
|
|
53
49
|
logger.info(f"No new files to process in {season_path}")
|
|
54
50
|
continue
|
|
55
51
|
|
|
56
|
-
|
|
52
|
+
season_num = int(re.search(r'Season (\d+)', season_path).group(1))
|
|
57
53
|
temp_dir = Path(season_path) / "temp"
|
|
58
54
|
ocr_dir = Path(season_path) / "ocr"
|
|
59
55
|
temp_dir.mkdir(exist_ok=True)
|
|
60
56
|
ocr_dir.mkdir(exist_ok=True)
|
|
61
57
|
|
|
62
58
|
try:
|
|
63
|
-
# Download subtitles if requested
|
|
64
59
|
if get_subs:
|
|
65
|
-
show_id = fetch_show_id(matcher.
|
|
60
|
+
show_id = fetch_show_id(matcher.show_name)
|
|
66
61
|
if show_id:
|
|
67
|
-
seasons
|
|
68
|
-
|
|
62
|
+
get_subtitles(show_id, seasons={season_num})
|
|
63
|
+
|
|
69
64
|
unmatched_files = []
|
|
70
|
-
|
|
71
|
-
# First pass: Try speech recognition matching
|
|
72
65
|
for mkv_file in mkv_files:
|
|
73
66
|
logger.info(f"Attempting speech recognition match for {mkv_file}")
|
|
67
|
+
match = matcher.identify_episode(mkv_file, temp_dir, season_num)
|
|
74
68
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
match = matcher.identify_episode(mkv_file, temp_dir)
|
|
78
|
-
|
|
79
|
-
if match and match['confidence'] >= matcher.min_confidence:
|
|
80
|
-
# Rename the file
|
|
81
|
-
new_name = f"{matcher.series_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
|
|
69
|
+
if match:
|
|
70
|
+
new_name = f"{matcher.show_name} - S{match['season']:02d}E{match['episode']:02d}.mkv"
|
|
82
71
|
new_path = os.path.join(season_path, new_name)
|
|
83
72
|
|
|
84
73
|
logger.info(f"Speech matched {os.path.basename(mkv_file)} to {new_name} "
|
|
85
74
|
f"(confidence: {match['confidence']:.2f})")
|
|
86
75
|
|
|
87
76
|
if not dry_run:
|
|
88
|
-
|
|
77
|
+
logger.info(f"Renaming {mkv_file} to {new_name}")
|
|
78
|
+
rename_episode_file(mkv_file, new_name)
|
|
89
79
|
else:
|
|
90
|
-
logger.info(f"Speech recognition match failed for {mkv_file},
|
|
80
|
+
logger.info(f"Speech recognition match failed for {mkv_file}, trying OCR")
|
|
91
81
|
unmatched_files.append(mkv_file)
|
|
92
82
|
|
|
93
|
-
#
|
|
83
|
+
# OCR fallback for unmatched files
|
|
94
84
|
if unmatched_files:
|
|
95
85
|
logger.info(f"Attempting OCR matching for {len(unmatched_files)} unmatched files")
|
|
96
|
-
|
|
97
|
-
# Convert files to SRT using OCR
|
|
98
86
|
convert_mkv_to_srt(season_path, unmatched_files)
|
|
99
87
|
|
|
100
|
-
|
|
101
|
-
reference_text_dict = process_reference_srt_files(matcher.series_name)
|
|
88
|
+
reference_text_dict = process_reference_srt_files(matcher.show_name)
|
|
102
89
|
srt_text_dict = process_srt_files(str(ocr_dir))
|
|
103
90
|
|
|
104
|
-
# Compare and rename
|
|
105
91
|
compare_and_rename_files(
|
|
106
92
|
srt_text_dict,
|
|
107
93
|
reference_text_dict,
|
|
108
94
|
dry_run=dry_run,
|
|
109
|
-
min_confidence=0.1 # Lower threshold for OCR
|
|
110
95
|
)
|
|
111
|
-
|
|
112
|
-
|
|
113
96
|
|
|
114
97
|
finally:
|
|
115
|
-
# Cleanup
|
|
116
98
|
if not dry_run:
|
|
117
99
|
shutil.rmtree(temp_dir)
|
|
118
100
|
cleanup_ocr_files(show_dir)
|
mkv_episode_matcher/utils.py
CHANGED
|
@@ -117,8 +117,10 @@ def rename_episode_file(original_file_path, new_filename):
|
|
|
117
117
|
except OSError as e:
|
|
118
118
|
logger.error(f"Failed to rename file: {e}")
|
|
119
119
|
return None
|
|
120
|
-
|
|
121
|
-
|
|
120
|
+
except FileExistsError as e:
|
|
121
|
+
logger.error(f"Failed to rename file: {e}")
|
|
122
|
+
return None
|
|
123
|
+
|
|
122
124
|
def get_subtitles(show_id, seasons: set[int]):
|
|
123
125
|
"""
|
|
124
126
|
Retrieves and saves subtitles for a given TV show and seasons.
|
|
@@ -233,9 +235,7 @@ def clean_text(text):
|
|
|
233
235
|
cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
|
|
234
236
|
# Strip leading/trailing whitespace
|
|
235
237
|
return cleaned_text.strip()
|
|
236
|
-
# mkv_episode_matcher/utils.py
|
|
237
238
|
|
|
238
|
-
# Add this to your existing utils.py, keeping all other functions
|
|
239
239
|
|
|
240
240
|
def process_reference_srt_files(series_name):
|
|
241
241
|
"""
|
|
@@ -357,12 +357,9 @@ def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
|
357
357
|
logger.info(f"Matching lines: {matching_lines}")
|
|
358
358
|
logger.info(f"Found matching file: {mkv_file} ->{reference}")
|
|
359
359
|
new_filename = os.path.join(parent_dir, reference)
|
|
360
|
-
if not
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
os.rename(mkv_file, new_filename)
|
|
364
|
-
else:
|
|
365
|
-
logger.info(f"File {new_filename} already exists, skipping")
|
|
360
|
+
if not dry_run:
|
|
361
|
+
logger.info(f"Renaming {mkv_file} to {new_filename}")
|
|
362
|
+
rename_episode_file(mkv_file, new_filename)
|
|
366
363
|
|
|
367
364
|
def compare_text(text1, text2):
|
|
368
365
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
6
|
Author: Jonathan Sakkos
|
|
@@ -2,12 +2,12 @@ mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb
|
|
|
2
2
|
mkv_episode_matcher/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
3
3
|
mkv_episode_matcher/__main__.py,sha256=3ZcCUxeI7rUA-4oiCD2WXBiOFJAqLsVVWfZKN446FwQ,6792
|
|
4
4
|
mkv_episode_matcher/config.py,sha256=zDDKBcsDt5fME9BRqiTi7yWKeast1pZh36BNYMvIBYM,2419
|
|
5
|
-
mkv_episode_matcher/episode_identification.py,sha256=
|
|
6
|
-
mkv_episode_matcher/episode_matcher.py,sha256=
|
|
5
|
+
mkv_episode_matcher/episode_identification.py,sha256=NopEkcBFFUjjrAujogeVcdISv8UZHFjYr5RJLM0j468,5875
|
|
6
|
+
mkv_episode_matcher/episode_matcher.py,sha256=BJ76DPxsmZs-KfHZZ_0WvKSBZWXsUEO6lW34YdYEaxM,3979
|
|
7
7
|
mkv_episode_matcher/mkv_to_srt.py,sha256=4yxBHRVhgVby0UtQ2aTXGuoQpid8pkgjMIaHU6GCdzc,10857
|
|
8
8
|
mkv_episode_matcher/speech_to_text.py,sha256=-bnGvmtPCKyHFPEaXwIcEYTf_P13rNpAJA-2UFeRFrs,2806
|
|
9
9
|
mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
|
|
10
|
-
mkv_episode_matcher/utils.py,sha256=
|
|
10
|
+
mkv_episode_matcher/utils.py,sha256=YthQByumTL5eGdFTJMoI8csAe4Vc-sPo8XqOKzbj4g4,13975
|
|
11
11
|
mkv_episode_matcher/libraries/pgs2srt/.gitignore,sha256=mt3uxWYZaFurMw_yGE258gWhtGKPVR7e3Ll4ALJpyj4,23
|
|
12
12
|
mkv_episode_matcher/libraries/pgs2srt/README.md,sha256=olb25G17tj0kxPgp_LcH5I2QWXjgP1m8JFyjYRGz4UU,1374
|
|
13
13
|
mkv_episode_matcher/libraries/pgs2srt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -18,8 +18,8 @@ mkv_episode_matcher/libraries/pgs2srt/requirements.txt,sha256=sg87dqWw_qpbwciw-M
|
|
|
18
18
|
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py,sha256=geT1LXdVd8yED9zoJ9K1XfP2JzGcM7u1SslHYrJI09o,10061
|
|
19
19
|
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py,sha256=GKtVy_Lxv-z27mkRG8pJF2znKWXwZTot7jL6kN-zIxM,10503
|
|
20
20
|
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py,sha256=AlJHUYXl85J95OzGRik-AHVfzDd7Q8BJCvD4Nr8kRIk,938598
|
|
21
|
-
mkv_episode_matcher-0.3.
|
|
22
|
-
mkv_episode_matcher-0.3.
|
|
23
|
-
mkv_episode_matcher-0.3.
|
|
24
|
-
mkv_episode_matcher-0.3.
|
|
25
|
-
mkv_episode_matcher-0.3.
|
|
21
|
+
mkv_episode_matcher-0.3.3.dist-info/METADATA,sha256=bAgcQzwsAaYHZ9YgKdTtSB1KtKjc0Y4Ylx_mQyll-I4,4640
|
|
22
|
+
mkv_episode_matcher-0.3.3.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
23
|
+
mkv_episode_matcher-0.3.3.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
|
|
24
|
+
mkv_episode_matcher-0.3.3.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
|
|
25
|
+
mkv_episode_matcher-0.3.3.dist-info/RECORD,,
|
|
File without changes
|
{mkv_episode_matcher-0.3.2.dist-info → mkv_episode_matcher-0.3.3.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|