mkv-episode-matcher 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/episode_identification.py +144 -25
- {mkv_episode_matcher-0.3.4.dist-info → mkv_episode_matcher-0.3.5.dist-info}/METADATA +28 -27
- {mkv_episode_matcher-0.3.4.dist-info → mkv_episode_matcher-0.3.5.dist-info}/RECORD +6 -6
- {mkv_episode_matcher-0.3.4.dist-info → mkv_episode_matcher-0.3.5.dist-info}/WHEEL +1 -1
- {mkv_episode_matcher-0.3.4.dist-info → mkv_episode_matcher-0.3.5.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.3.4.dist-info → mkv_episode_matcher-0.3.5.dist-info}/top_level.txt +0 -0
|
@@ -9,6 +9,10 @@ from loguru import logger
|
|
|
9
9
|
import whisper
|
|
10
10
|
import numpy as np
|
|
11
11
|
import re
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import chardet
|
|
14
|
+
from loguru import logger
|
|
15
|
+
|
|
12
16
|
class EpisodeMatcher:
|
|
13
17
|
def __init__(self, cache_dir, show_name, min_confidence=0.6):
|
|
14
18
|
self.cache_dir = Path(cache_dir)
|
|
@@ -50,34 +54,32 @@ class EpisodeMatcher:
|
|
|
50
54
|
return str(chunk_path)
|
|
51
55
|
|
|
52
56
|
def load_reference_chunk(self, srt_file, chunk_idx):
|
|
53
|
-
"""
|
|
57
|
+
"""
|
|
58
|
+
Load reference subtitles for a specific time chunk with robust encoding handling.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
srt_file (str or Path): Path to the SRT file
|
|
62
|
+
chunk_idx (int): Index of the chunk to load
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
str: Combined text from the subtitle chunk
|
|
66
|
+
"""
|
|
54
67
|
chunk_start = chunk_idx * self.chunk_duration
|
|
55
68
|
chunk_end = chunk_start + self.chunk_duration
|
|
56
|
-
text_lines = []
|
|
57
69
|
|
|
58
|
-
|
|
59
|
-
content
|
|
70
|
+
try:
|
|
71
|
+
# Read the file content using our robust reader
|
|
72
|
+
reader = SubtitleReader()
|
|
73
|
+
content = reader.read_srt_file(srt_file)
|
|
60
74
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
end_time = timestamp.split(' --> ')[1].strip()
|
|
71
|
-
hours, minutes, seconds = map(float, end_time.replace(',','.').split(':'))
|
|
72
|
-
total_seconds = hours * 3600 + minutes * 60 + seconds
|
|
73
|
-
|
|
74
|
-
if chunk_start <= total_seconds <= chunk_end:
|
|
75
|
-
text_lines.append(text)
|
|
76
|
-
|
|
77
|
-
except (IndexError, ValueError):
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
return ' '.join(text_lines)
|
|
75
|
+
# Extract subtitles for the time chunk
|
|
76
|
+
text_lines = reader.extract_subtitle_chunk(content, chunk_start, chunk_end)
|
|
77
|
+
|
|
78
|
+
return ' '.join(text_lines)
|
|
79
|
+
|
|
80
|
+
except Exception as e:
|
|
81
|
+
logger.error(f"Error loading reference chunk from {srt_file}: {e}")
|
|
82
|
+
return ''
|
|
81
83
|
|
|
82
84
|
def identify_episode(self, video_file, temp_dir, season_number):
|
|
83
85
|
try:
|
|
@@ -147,4 +149,121 @@ class EpisodeMatcher:
|
|
|
147
149
|
finally:
|
|
148
150
|
# Cleanup temp files
|
|
149
151
|
for file in self.temp_dir.glob("chunk_*.wav"):
|
|
150
|
-
file.unlink()
|
|
152
|
+
file.unlink()
|
|
153
|
+
|
|
154
|
+
def detect_file_encoding(file_path):
|
|
155
|
+
"""
|
|
156
|
+
Detect the encoding of a file using chardet.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
file_path (str or Path): Path to the file
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
str: Detected encoding, defaults to 'utf-8' if detection fails
|
|
163
|
+
"""
|
|
164
|
+
try:
|
|
165
|
+
with open(file_path, 'rb') as f:
|
|
166
|
+
raw_data = f.read()
|
|
167
|
+
result = chardet.detect(raw_data)
|
|
168
|
+
encoding = result['encoding']
|
|
169
|
+
confidence = result['confidence']
|
|
170
|
+
|
|
171
|
+
logger.debug(f"Detected encoding {encoding} with {confidence:.2%} confidence for {file_path}")
|
|
172
|
+
return encoding if encoding else 'utf-8'
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.warning(f"Error detecting encoding for {file_path}: {e}")
|
|
175
|
+
return 'utf-8'
|
|
176
|
+
|
|
177
|
+
def read_file_with_fallback(file_path, encodings=None):
|
|
178
|
+
"""
|
|
179
|
+
Read a file trying multiple encodings in order of preference.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
file_path (str or Path): Path to the file
|
|
183
|
+
encodings (list): List of encodings to try, defaults to common subtitle encodings
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
str: File contents
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
ValueError: If file cannot be read with any encoding
|
|
190
|
+
"""
|
|
191
|
+
if encodings is None:
|
|
192
|
+
# First try detected encoding, then fallback to common subtitle encodings
|
|
193
|
+
detected = detect_file_encoding(file_path)
|
|
194
|
+
encodings = [detected, 'utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
|
|
195
|
+
|
|
196
|
+
file_path = Path(file_path)
|
|
197
|
+
errors = []
|
|
198
|
+
|
|
199
|
+
for encoding in encodings:
|
|
200
|
+
try:
|
|
201
|
+
with open(file_path, 'r', encoding=encoding) as f:
|
|
202
|
+
content = f.read()
|
|
203
|
+
logger.debug(f"Successfully read {file_path} using {encoding} encoding")
|
|
204
|
+
return content
|
|
205
|
+
except UnicodeDecodeError as e:
|
|
206
|
+
errors.append(f"{encoding}: {str(e)}")
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
error_msg = f"Failed to read {file_path} with any encoding. Errors:\n" + "\n".join(errors)
|
|
210
|
+
logger.error(error_msg)
|
|
211
|
+
raise ValueError(error_msg)
|
|
212
|
+
|
|
213
|
+
class SubtitleReader:
|
|
214
|
+
"""Helper class for reading and parsing subtitle files."""
|
|
215
|
+
|
|
216
|
+
@staticmethod
|
|
217
|
+
def parse_timestamp(timestamp):
|
|
218
|
+
"""Parse SRT timestamp into seconds."""
|
|
219
|
+
hours, minutes, seconds = timestamp.replace(',', '.').split(':')
|
|
220
|
+
return float(hours) * 3600 + float(minutes) * 60 + float(seconds)
|
|
221
|
+
|
|
222
|
+
@staticmethod
|
|
223
|
+
def read_srt_file(file_path):
|
|
224
|
+
"""
|
|
225
|
+
Read an SRT file and return its contents with robust encoding handling.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
file_path (str or Path): Path to the SRT file
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
str: Contents of the SRT file
|
|
232
|
+
"""
|
|
233
|
+
return read_file_with_fallback(file_path)
|
|
234
|
+
|
|
235
|
+
@staticmethod
|
|
236
|
+
def extract_subtitle_chunk(content, start_time, end_time):
|
|
237
|
+
"""
|
|
238
|
+
Extract subtitle text for a specific time window.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
content (str): Full SRT file content
|
|
242
|
+
start_time (float): Chunk start time in seconds
|
|
243
|
+
end_time (float): Chunk end time in seconds
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
list: List of subtitle texts within the time window
|
|
247
|
+
"""
|
|
248
|
+
text_lines = []
|
|
249
|
+
|
|
250
|
+
for block in content.strip().split('\n\n'):
|
|
251
|
+
lines = block.split('\n')
|
|
252
|
+
if len(lines) < 3 or '-->' not in lines[1]:
|
|
253
|
+
continue
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
timestamp = lines[1]
|
|
257
|
+
text = ' '.join(lines[2:])
|
|
258
|
+
|
|
259
|
+
end_stamp = timestamp.split(' --> ')[1].strip()
|
|
260
|
+
total_seconds = SubtitleReader.parse_timestamp(end_stamp)
|
|
261
|
+
|
|
262
|
+
if start_time <= total_seconds <= end_time:
|
|
263
|
+
text_lines.append(text)
|
|
264
|
+
|
|
265
|
+
except (IndexError, ValueError) as e:
|
|
266
|
+
logger.warning(f"Error parsing subtitle block: {e}")
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
return text_lines
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
6
|
Author: Jonathan Sakkos
|
|
@@ -51,6 +51,14 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
|
|
|
51
51
|
- ✨ **Bulk Processing**: Handle entire seasons at once
|
|
52
52
|
- 🧪 **Dry Run Mode**: Test changes before applying
|
|
53
53
|
|
|
54
|
+
## Prerequisites
|
|
55
|
+
|
|
56
|
+
- Python 3.9 or higher
|
|
57
|
+
- [FFmpeg](https://ffmpeg.org/download.html) installed and available in system PATH
|
|
58
|
+
- [Tesseract OCR](https://github.com/UB-Mannheim/tesseract/wiki) installed (required for image-based subtitle processing)
|
|
59
|
+
- TMDb API key
|
|
60
|
+
- OpenSubtitles account (optional, for subtitle downloads)
|
|
61
|
+
|
|
54
62
|
## Quick Start
|
|
55
63
|
|
|
56
64
|
1. Install the package:
|
|
@@ -60,37 +68,13 @@ pip install mkv-episode-matcher
|
|
|
60
68
|
|
|
61
69
|
2. Run on your show directory:
|
|
62
70
|
```bash
|
|
63
|
-
mkv-match --show-dir "path/to/your/show" --
|
|
71
|
+
mkv-match --show-dir "path/to/your/show" --get-subs true
|
|
64
72
|
```
|
|
65
73
|
|
|
66
|
-
## Requirements
|
|
67
|
-
|
|
68
|
-
- Python 3.8 or higher
|
|
69
|
-
- TMDb API key
|
|
70
|
-
- OpenSubtitles account (optional, for subtitle downloads)
|
|
71
|
-
|
|
72
74
|
## Documentation
|
|
73
75
|
|
|
74
76
|
Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
|
|
75
77
|
|
|
76
|
-
## Basic Usage
|
|
77
|
-
|
|
78
|
-
```python
|
|
79
|
-
from mkv_episode_matcher import process_show
|
|
80
|
-
|
|
81
|
-
# Process all seasons
|
|
82
|
-
process_show()
|
|
83
|
-
|
|
84
|
-
# Process specific season
|
|
85
|
-
process_show(season=1)
|
|
86
|
-
|
|
87
|
-
# Test run without making changes
|
|
88
|
-
process_show(season=1, dry_run=True)
|
|
89
|
-
|
|
90
|
-
# Process and download subtitles
|
|
91
|
-
process_show(get_subs=True)
|
|
92
|
-
```
|
|
93
|
-
|
|
94
78
|
## Directory Structure
|
|
95
79
|
|
|
96
80
|
MKV Episode Matcher expects your TV shows to be organized as follows:
|
|
@@ -105,6 +89,23 @@ Show Name/
|
|
|
105
89
|
│ └── episode2.mkv
|
|
106
90
|
```
|
|
107
91
|
|
|
92
|
+
## Reference Subtitle File Structure
|
|
93
|
+
|
|
94
|
+
Subtitle files that are not automatically downloaded using the `--get-subs` flag should be named as follows:
|
|
95
|
+
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
~/.mkv-episode-matcher/cache/data/Show Name/
|
|
99
|
+
├── Show Name - S01E01.srt
|
|
100
|
+
├── Show Name - S01E02.srt
|
|
101
|
+
└── ...
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
On Windows, the cache directory is located at `C:\Users\{username}\.mkv-episode-matcher\cache\data\`
|
|
105
|
+
|
|
106
|
+
Reference subtitle files should follow this naming pattern:
|
|
107
|
+
`{show_name} - S{season:02d}E{episode:02d}.srt`
|
|
108
|
+
|
|
108
109
|
## Contributing
|
|
109
110
|
|
|
110
111
|
1. Fork the repository
|
|
@@ -2,7 +2,7 @@ mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb
|
|
|
2
2
|
mkv_episode_matcher/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
3
3
|
mkv_episode_matcher/__main__.py,sha256=3ZcCUxeI7rUA-4oiCD2WXBiOFJAqLsVVWfZKN446FwQ,6792
|
|
4
4
|
mkv_episode_matcher/config.py,sha256=zDDKBcsDt5fME9BRqiTi7yWKeast1pZh36BNYMvIBYM,2419
|
|
5
|
-
mkv_episode_matcher/episode_identification.py,sha256=
|
|
5
|
+
mkv_episode_matcher/episode_identification.py,sha256=xYqHq1YFbZT8L1Gfa_DhSStrLblKTWxZte__B0qikQU,9739
|
|
6
6
|
mkv_episode_matcher/episode_matcher.py,sha256=BJ76DPxsmZs-KfHZZ_0WvKSBZWXsUEO6lW34YdYEaxM,3979
|
|
7
7
|
mkv_episode_matcher/mkv_to_srt.py,sha256=4yxBHRVhgVby0UtQ2aTXGuoQpid8pkgjMIaHU6GCdzc,10857
|
|
8
8
|
mkv_episode_matcher/speech_to_text.py,sha256=-bnGvmtPCKyHFPEaXwIcEYTf_P13rNpAJA-2UFeRFrs,2806
|
|
@@ -19,8 +19,8 @@ mkv_episode_matcher/libraries/pgs2srt/requirements.txt,sha256=sg87dqWw_qpbwciw-M
|
|
|
19
19
|
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py,sha256=geT1LXdVd8yED9zoJ9K1XfP2JzGcM7u1SslHYrJI09o,10061
|
|
20
20
|
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py,sha256=GKtVy_Lxv-z27mkRG8pJF2znKWXwZTot7jL6kN-zIxM,10503
|
|
21
21
|
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py,sha256=AlJHUYXl85J95OzGRik-AHVfzDd7Q8BJCvD4Nr8kRIk,938598
|
|
22
|
-
mkv_episode_matcher-0.3.
|
|
23
|
-
mkv_episode_matcher-0.3.
|
|
24
|
-
mkv_episode_matcher-0.3.
|
|
25
|
-
mkv_episode_matcher-0.3.
|
|
26
|
-
mkv_episode_matcher-0.3.
|
|
22
|
+
mkv_episode_matcher-0.3.5.dist-info/METADATA,sha256=mTKSbM9Ai5UDKyj2K4AKgkdjdPVEaxylfHHp95wVZv4,5048
|
|
23
|
+
mkv_episode_matcher-0.3.5.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
24
|
+
mkv_episode_matcher-0.3.5.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
|
|
25
|
+
mkv_episode_matcher-0.3.5.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
|
|
26
|
+
mkv_episode_matcher-0.3.5.dist-info/RECORD,,
|
{mkv_episode_matcher-0.3.4.dist-info → mkv_episode_matcher-0.3.5.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|