mkv-episode-matcher 0.1.12__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/__main__.py +8 -4
- mkv_episode_matcher/episode_matcher.py +40 -27
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +38 -12
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +16644 -193
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +125 -80
- mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +7 -5
- mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +49 -20
- mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +53 -49
- mkv_episode_matcher/mkv_to_srt.py +150 -22
- mkv_episode_matcher/utils.py +19 -18
- mkv_episode_matcher-0.2.0.dist-info/METADATA +117 -0
- mkv_episode_matcher-0.2.0.dist-info/RECORD +23 -0
- {mkv_episode_matcher-0.1.12.dist-info → mkv_episode_matcher-0.2.0.dist-info}/WHEEL +1 -1
- mkv_episode_matcher/notebooks/get_subtitles_test.ipynb +0 -252
- mkv_episode_matcher/notebooks/whisper.ipynb +0 -122
- mkv_episode_matcher/old_requirements.txt +0 -7
- mkv_episode_matcher-0.1.12.dist-info/METADATA +0 -113
- mkv_episode_matcher-0.1.12.dist-info/RECORD +0 -26
- {mkv_episode_matcher-0.1.12.dist-info → mkv_episode_matcher-0.2.0.dist-info}/entry_points.txt +0 -0
- {mkv_episode_matcher-0.1.12.dist-info → mkv_episode_matcher-0.2.0.dist-info}/top_level.txt +0 -0
|
@@ -9,15 +9,28 @@ sys.path.append(os.path.join(parent_dir, "libraries", "pgs2srt"))
|
|
|
9
9
|
import re
|
|
10
10
|
from concurrent.futures import ThreadPoolExecutor
|
|
11
11
|
from datetime import datetime, timedelta
|
|
12
|
-
|
|
12
|
+
from pathlib import Path
|
|
13
13
|
import pytesseract
|
|
14
14
|
from imagemaker import make_image
|
|
15
15
|
from loguru import logger
|
|
16
16
|
from pgsreader import PGSReader
|
|
17
17
|
from PIL import Image, ImageOps
|
|
18
|
-
|
|
18
|
+
from typing import Optional
|
|
19
19
|
from mkv_episode_matcher.__main__ import CONFIG_FILE
|
|
20
20
|
from mkv_episode_matcher.config import get_config
|
|
21
|
+
def check_if_processed(filename: str) -> bool:
|
|
22
|
+
"""
|
|
23
|
+
Check if the file has already been processed (has SxxExx format)
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
filename (str): Filename to check
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
bool: True if file is already processed
|
|
30
|
+
"""
|
|
31
|
+
import re
|
|
32
|
+
match = re.search(r"S\d+E\d+", filename)
|
|
33
|
+
return bool(match)
|
|
21
34
|
|
|
22
35
|
|
|
23
36
|
def convert_mkv_to_sup(mkv_file, output_dir):
|
|
@@ -51,21 +64,23 @@ def convert_mkv_to_sup(mkv_file, output_dir):
|
|
|
51
64
|
|
|
52
65
|
|
|
53
66
|
@logger.catch
|
|
54
|
-
def perform_ocr(sup_file_path):
|
|
67
|
+
def perform_ocr(sup_file_path: str) -> Optional[str]:
|
|
55
68
|
"""
|
|
56
69
|
Perform OCR on a .sup file and save the extracted text to a .srt file.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
sup_file_path (str): Path to the .sup file.
|
|
70
|
+
Returns the path to the created SRT file.
|
|
60
71
|
"""
|
|
61
|
-
|
|
62
72
|
# Get the base name of the .sup file without the extension
|
|
63
73
|
base_name = os.path.splitext(os.path.basename(sup_file_path))[0]
|
|
64
74
|
output_dir = os.path.dirname(sup_file_path)
|
|
65
75
|
logger.info(f"Performing OCR on {sup_file_path}")
|
|
76
|
+
|
|
66
77
|
# Construct the output .srt file path
|
|
67
78
|
srt_file = os.path.join(output_dir, f"{base_name}.srt")
|
|
68
79
|
|
|
80
|
+
if os.path.exists(srt_file):
|
|
81
|
+
logger.info(f"SRT file {srt_file} already exists, skipping OCR")
|
|
82
|
+
return srt_file
|
|
83
|
+
|
|
69
84
|
# Load a PGS/SUP file.
|
|
70
85
|
pgs = PGSReader(sup_file_path)
|
|
71
86
|
|
|
@@ -151,24 +166,137 @@ def perform_ocr(sup_file_path):
|
|
|
151
166
|
logger.info(f"Saved to: {srt_file}")
|
|
152
167
|
|
|
153
168
|
|
|
154
|
-
def convert_mkv_to_srt(season_path, mkv_files):
|
|
155
|
-
|
|
156
|
-
|
|
169
|
+
# def convert_mkv_to_srt(season_path, mkv_files):
|
|
170
|
+
# """
|
|
171
|
+
# Converts MKV files to SRT format.
|
|
157
172
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
173
|
+
# Args:
|
|
174
|
+
# season_path (str): The path to the season directory.
|
|
175
|
+
# mkv_files (list): List of MKV files to convert.
|
|
161
176
|
|
|
162
|
-
|
|
163
|
-
|
|
177
|
+
# Returns:
|
|
178
|
+
# None
|
|
179
|
+
# """
|
|
180
|
+
# logger.info(f"Converting {len(mkv_files)} files to SRT")
|
|
181
|
+
# output_dir = os.path.join(season_path, "ocr")
|
|
182
|
+
# os.makedirs(output_dir, exist_ok=True)
|
|
183
|
+
# sup_files = []
|
|
184
|
+
# for mkv_file in mkv_files:
|
|
185
|
+
# sup_file = convert_mkv_to_sup(mkv_file, output_dir)
|
|
186
|
+
# sup_files.append(sup_file)
|
|
187
|
+
# with ThreadPoolExecutor() as executor:
|
|
188
|
+
# for sup_file in sup_files:
|
|
189
|
+
# executor.submit(perform_ocr, sup_file)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def extract_subtitles(mkv_file: str, output_dir: str) -> Optional[str]:
|
|
194
|
+
"""
|
|
195
|
+
Extract subtitles from MKV file based on detected subtitle type.
|
|
196
|
+
"""
|
|
197
|
+
subtitle_type, stream_index = detect_subtitle_type(mkv_file)
|
|
198
|
+
if not subtitle_type:
|
|
199
|
+
logger.error(f"No supported subtitle streams found in {mkv_file}")
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
base_name = Path(mkv_file).stem
|
|
203
|
+
|
|
204
|
+
if subtitle_type == 'subrip':
|
|
205
|
+
# For SRT subtitles, extract directly to .srt
|
|
206
|
+
output_file = os.path.join(output_dir, f"{base_name}.srt")
|
|
207
|
+
if not os.path.exists(output_file):
|
|
208
|
+
cmd = [
|
|
209
|
+
"ffmpeg", "-i", mkv_file,
|
|
210
|
+
"-map", f"0:{stream_index}",
|
|
211
|
+
output_file
|
|
212
|
+
]
|
|
213
|
+
else:
|
|
214
|
+
# For DVD or PGS subtitles, extract to SUP format first
|
|
215
|
+
output_file = os.path.join(output_dir, f"{base_name}.sup")
|
|
216
|
+
if not os.path.exists(output_file):
|
|
217
|
+
cmd = [
|
|
218
|
+
"ffmpeg", "-i", mkv_file,
|
|
219
|
+
"-map", f"0:{stream_index}",
|
|
220
|
+
"-c", "copy",
|
|
221
|
+
output_file
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
if not os.path.exists(output_file):
|
|
225
|
+
try:
|
|
226
|
+
subprocess.run(cmd, check=True)
|
|
227
|
+
logger.info(f"Extracted subtitles from {mkv_file} to {output_file}")
|
|
228
|
+
return output_file
|
|
229
|
+
except subprocess.CalledProcessError as e:
|
|
230
|
+
logger.error(f"Error extracting subtitles: {e}")
|
|
231
|
+
return None
|
|
232
|
+
else:
|
|
233
|
+
logger.info(f"Subtitle file {output_file} already exists, skipping extraction")
|
|
234
|
+
return output_file
|
|
235
|
+
|
|
236
|
+
def convert_mkv_to_srt(season_path: str, mkv_files: list[str]) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Convert subtitles from MKV files to SRT format.
|
|
164
239
|
"""
|
|
165
240
|
logger.info(f"Converting {len(mkv_files)} files to SRT")
|
|
241
|
+
|
|
242
|
+
# Filter out already processed files
|
|
243
|
+
unprocessed_files = []
|
|
244
|
+
for mkv_file in mkv_files:
|
|
245
|
+
if check_if_processed(os.path.basename(mkv_file)):
|
|
246
|
+
logger.info(f"Skipping {mkv_file} - already processed")
|
|
247
|
+
continue
|
|
248
|
+
unprocessed_files.append(mkv_file)
|
|
249
|
+
|
|
250
|
+
if not unprocessed_files:
|
|
251
|
+
logger.info("No new files to process")
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
# Create OCR directory
|
|
166
255
|
output_dir = os.path.join(season_path, "ocr")
|
|
167
256
|
os.makedirs(output_dir, exist_ok=True)
|
|
168
|
-
|
|
169
|
-
for mkv_file in
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
257
|
+
|
|
258
|
+
for mkv_file in unprocessed_files:
|
|
259
|
+
subtitle_file = extract_subtitles(mkv_file, output_dir)
|
|
260
|
+
if not subtitle_file:
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
if subtitle_file.endswith('.srt'):
|
|
264
|
+
# Already have SRT, keep it in OCR directory
|
|
265
|
+
logger.info(f"Extracted SRT subtitle to {subtitle_file}")
|
|
266
|
+
else:
|
|
267
|
+
# For SUP files (DVD or PGS), perform OCR
|
|
268
|
+
srt_file = perform_ocr(subtitle_file)
|
|
269
|
+
if srt_file:
|
|
270
|
+
logger.info(f"Created SRT from OCR: {srt_file}")
|
|
271
|
+
|
|
272
|
+
def detect_subtitle_type(mkv_file: str) -> tuple[Optional[str], Optional[int]]:
|
|
273
|
+
"""
|
|
274
|
+
Detect the type and index of subtitle streams in an MKV file.
|
|
275
|
+
"""
|
|
276
|
+
cmd = ["ffmpeg", "-i", mkv_file]
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
280
|
+
|
|
281
|
+
subtitle_streams = []
|
|
282
|
+
for line in result.stderr.split('\n'):
|
|
283
|
+
if 'Subtitle' in line:
|
|
284
|
+
stream_index = int(line.split('#0:')[1].split('(')[0])
|
|
285
|
+
if 'subrip' in line:
|
|
286
|
+
subtitle_streams.append(('subrip', stream_index))
|
|
287
|
+
elif 'dvd_subtitle' in line:
|
|
288
|
+
subtitle_streams.append(('dvd_subtitle', stream_index))
|
|
289
|
+
elif 'hdmv_pgs_subtitle' in line:
|
|
290
|
+
subtitle_streams.append(('hdmv_pgs_subtitle', stream_index))
|
|
291
|
+
|
|
292
|
+
# Prioritize subtitle formats: SRT > DVD > PGS
|
|
293
|
+
for format_priority in ['subrip', 'dvd_subtitle', 'hdmv_pgs_subtitle']:
|
|
294
|
+
for format_type, index in subtitle_streams:
|
|
295
|
+
if format_type == format_priority:
|
|
296
|
+
return format_type, index
|
|
297
|
+
|
|
298
|
+
return None, None
|
|
299
|
+
|
|
300
|
+
except subprocess.CalledProcessError as e:
|
|
301
|
+
logger.error(f"Error detecting subtitle type: {e}")
|
|
302
|
+
return None, None
|
mkv_episode_matcher/utils.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
import os
|
|
3
3
|
import re
|
|
4
4
|
import shutil
|
|
5
|
-
from typing import Set
|
|
6
5
|
|
|
7
6
|
import requests
|
|
8
7
|
from loguru import logger
|
|
@@ -117,7 +116,7 @@ def rename_episode_file(original_file_path, season_number, episode_number):
|
|
|
117
116
|
os.rename(original_file_path, new_file_path)
|
|
118
117
|
|
|
119
118
|
|
|
120
|
-
def get_subtitles(show_id, seasons:
|
|
119
|
+
def get_subtitles(show_id, seasons: set[int]):
|
|
121
120
|
"""
|
|
122
121
|
Retrieves and saves subtitles for a given TV show and seasons.
|
|
123
122
|
|
|
@@ -138,16 +137,14 @@ def get_subtitles(show_id, seasons: Set[int]):
|
|
|
138
137
|
open_subtitles_user_agent = config.get("open_subtitles_user_agent")
|
|
139
138
|
open_subtitles_username = config.get("open_subtitles_username")
|
|
140
139
|
open_subtitles_password = config.get("open_subtitles_password")
|
|
141
|
-
if not all(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
]
|
|
150
|
-
):
|
|
140
|
+
if not all([
|
|
141
|
+
show_dir,
|
|
142
|
+
tmdb_api_key,
|
|
143
|
+
open_subtitles_api_key,
|
|
144
|
+
open_subtitles_user_agent,
|
|
145
|
+
open_subtitles_username,
|
|
146
|
+
open_subtitles_password,
|
|
147
|
+
]):
|
|
151
148
|
logger.error("Missing configuration settings. Please run the setup script.")
|
|
152
149
|
try:
|
|
153
150
|
# Initialize the OpenSubtitles client
|
|
@@ -164,11 +161,8 @@ def get_subtitles(show_id, seasons: Set[int]):
|
|
|
164
161
|
|
|
165
162
|
for episode in range(1, episodes + 1):
|
|
166
163
|
logger.info(f"Processing Season {season}, Episode {episode}...")
|
|
167
|
-
series_cache_dir =os.path.join(
|
|
168
|
-
|
|
169
|
-
"data",
|
|
170
|
-
series_name)
|
|
171
|
-
os.makedirs(series_cache_dir,exist_ok=True)
|
|
164
|
+
series_cache_dir = os.path.join(CACHE_DIR, "data", series_name)
|
|
165
|
+
os.makedirs(series_cache_dir, exist_ok=True)
|
|
172
166
|
srt_filepath = os.path.join(
|
|
173
167
|
series_cache_dir,
|
|
174
168
|
f"{series_name} - S{season:02d}E{episode:02d}.srt",
|
|
@@ -179,7 +173,7 @@ def get_subtitles(show_id, seasons: Set[int]):
|
|
|
179
173
|
response = requests.get(url)
|
|
180
174
|
response.raise_for_status()
|
|
181
175
|
episode_data = response.json()
|
|
182
|
-
|
|
176
|
+
episode_data["name"]
|
|
183
177
|
episode_id = episode_data["id"]
|
|
184
178
|
# search for the subtitle
|
|
185
179
|
response = subtitles.search(tmdb_id=episode_id, languages="en")
|
|
@@ -229,3 +223,10 @@ def cleanup_ocr_files(show_dir):
|
|
|
229
223
|
if os.path.exists(ocr_dir_path):
|
|
230
224
|
logger.info(f"Cleaning up OCR files in {ocr_dir_path}")
|
|
231
225
|
shutil.rmtree(ocr_dir_path)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def clean_text(text):
|
|
229
|
+
# Remove brackets, parentheses, and their content
|
|
230
|
+
cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
|
|
231
|
+
# Strip leading/trailing whitespace
|
|
232
|
+
return cleaned_text.strip()
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: mkv-episode-matcher
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
|
+
Home-page: https://github.com/Jsakkos/mkv-episode-matcher
|
|
6
|
+
Author: Jonathan Sakkos
|
|
7
|
+
Author-email: Jsakkos <jonathansakkos@gmail.com>
|
|
8
|
+
License: MIT
|
|
9
|
+
Project-URL: Documentation, https://github.com/Jsakkos/mkv-episode-matcher#readme
|
|
10
|
+
Project-URL: Issues, https://github.com/Jsakkos/mkv-episode-matcher/issues
|
|
11
|
+
Project-URL: Source, https://github.com/Jsakkos/mkv-episode-matcher
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Programming Language :: Python
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
16
|
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
Requires-Dist: configparser>=7.1.0
|
|
20
|
+
Requires-Dist: ffmpeg>=1.4
|
|
21
|
+
Requires-Dist: loguru>=0.7.2
|
|
22
|
+
Requires-Dist: numpy>=2.1.3
|
|
23
|
+
Requires-Dist: opensubtitlescom>=0.1.5
|
|
24
|
+
Requires-Dist: pytesseract>=0.3.13
|
|
25
|
+
Requires-Dist: requests>=2.32.3
|
|
26
|
+
Requires-Dist: tmdb-client>=0.0.1
|
|
27
|
+
|
|
28
|
+
# MKV Episode Matcher
|
|
29
|
+
|
|
30
|
+
[](https://jsakkos.github.io/mkv-episode-matcher/)
|
|
31
|
+
[](https://badge.fury.io/py/mkv-episode-matcher)
|
|
32
|
+
[](https://opensource.org/licenses/MIT)
|
|
33
|
+
|
|
34
|
+
Automatically match and rename your MKV TV episodes using The Movie Database (TMDb).
|
|
35
|
+
|
|
36
|
+
## Features
|
|
37
|
+
|
|
38
|
+
- 🎯 **Automatic Episode Matching**: Uses TMDb to accurately identify episodes
|
|
39
|
+
- 📝 **Subtitle Extraction**: Extracts subtitles from MKV files
|
|
40
|
+
- 🔍 **OCR Support**: Handles image-based subtitles
|
|
41
|
+
- 🚀 **Multi-threaded**: Fast processing of multiple files
|
|
42
|
+
- ⬇️ **Subtitle Downloads**: Integration with OpenSubtitles
|
|
43
|
+
- ✨ **Bulk Processing**: Handle entire seasons at once
|
|
44
|
+
- 🧪 **Dry Run Mode**: Test changes before applying
|
|
45
|
+
|
|
46
|
+
## Quick Start
|
|
47
|
+
|
|
48
|
+
1. Install the package:
|
|
49
|
+
```bash
|
|
50
|
+
pip install mkv-episode-matcher
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
2. Run on your show directory:
|
|
54
|
+
```bash
|
|
55
|
+
mkv-match --show-dir "path/to/your/show" --season 1
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Requirements
|
|
59
|
+
|
|
60
|
+
- Python 3.8 or higher
|
|
61
|
+
- TMDb API key
|
|
62
|
+
- OpenSubtitles account (optional, for subtitle downloads)
|
|
63
|
+
|
|
64
|
+
## Documentation
|
|
65
|
+
|
|
66
|
+
Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
|
|
67
|
+
|
|
68
|
+
## Basic Usage
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
from mkv_episode_matcher import process_show
|
|
72
|
+
|
|
73
|
+
# Process all seasons
|
|
74
|
+
process_show()
|
|
75
|
+
|
|
76
|
+
# Process specific season
|
|
77
|
+
process_show(season=1)
|
|
78
|
+
|
|
79
|
+
# Test run without making changes
|
|
80
|
+
process_show(season=1, dry_run=True)
|
|
81
|
+
|
|
82
|
+
# Process and download subtitles
|
|
83
|
+
process_show(get_subs=True)
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Directory Structure
|
|
87
|
+
|
|
88
|
+
MKV Episode Matcher expects your TV shows to be organized as follows:
|
|
89
|
+
|
|
90
|
+
```
|
|
91
|
+
Show Name/
|
|
92
|
+
├── Season 1/
|
|
93
|
+
│ ├── episode1.mkv
|
|
94
|
+
│ ├── episode2.mkv
|
|
95
|
+
├── Season 2/
|
|
96
|
+
│ ├── episode1.mkv
|
|
97
|
+
│ └── episode2.mkv
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Contributing
|
|
101
|
+
|
|
102
|
+
1. Fork the repository
|
|
103
|
+
2. Create a feature branch (`git checkout -b feature/amazing-feature`)
|
|
104
|
+
3. Commit your changes (`git commit -m 'Add amazing feature'`)
|
|
105
|
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
|
106
|
+
5. Open a Pull Request
|
|
107
|
+
|
|
108
|
+
## License
|
|
109
|
+
|
|
110
|
+
Distributed under the MIT License. See `LICENSE` for more information.
|
|
111
|
+
|
|
112
|
+
## Acknowledgments
|
|
113
|
+
|
|
114
|
+
- [TMDb](https://www.themoviedb.org/) for their excellent API
|
|
115
|
+
- [OpenSubtitles](https://www.opensubtitles.com/) for subtitle integration
|
|
116
|
+
- All contributors who have helped improve this project
|
|
117
|
+
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
mkv_episode_matcher/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
|
|
2
|
+
mkv_episode_matcher/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
3
|
+
mkv_episode_matcher/__main__.py,sha256=3ZcCUxeI7rUA-4oiCD2WXBiOFJAqLsVVWfZKN446FwQ,6792
|
|
4
|
+
mkv_episode_matcher/config.py,sha256=zDDKBcsDt5fME9BRqiTi7yWKeast1pZh36BNYMvIBYM,2419
|
|
5
|
+
mkv_episode_matcher/episode_matcher.py,sha256=YBbRL-NIIvBwKojQOHoDsE3EQYy9_hn1j-4CAuLwM78,9854
|
|
6
|
+
mkv_episode_matcher/mkv_to_srt.py,sha256=4yxBHRVhgVby0UtQ2aTXGuoQpid8pkgjMIaHU6GCdzc,10857
|
|
7
|
+
mkv_episode_matcher/tmdb_client.py,sha256=LbMCgjmp7sCbrQo_CDlpcnryKPz5S7inE24YY9Pyjk4,4172
|
|
8
|
+
mkv_episode_matcher/utils.py,sha256=5YWpRbciIAlmhBxa-etGtMA6IabswE3CuefRVQDROz4,9526
|
|
9
|
+
mkv_episode_matcher/libraries/pgs2srt/.gitignore,sha256=mt3uxWYZaFurMw_yGE258gWhtGKPVR7e3Ll4ALJpyj4,23
|
|
10
|
+
mkv_episode_matcher/libraries/pgs2srt/README.md,sha256=olb25G17tj0kxPgp_LcH5I2QWXjgP1m8JFyjYRGz4UU,1374
|
|
11
|
+
mkv_episode_matcher/libraries/pgs2srt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
mkv_episode_matcher/libraries/pgs2srt/imagemaker.py,sha256=mOlUt8eJ4LOqMYerOuWmQPPWcB-Umup2lBJlqzy_pPg,2736
|
|
13
|
+
mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py,sha256=UdCCUUWxKXCxBebiNBsrM95R6-zmJhSUAHcEPxUmbNU,4416
|
|
14
|
+
mkv_episode_matcher/libraries/pgs2srt/pgsreader.py,sha256=h5vZSLPVHir-epuNa-L5MpJYpyyUk0h_13DtmrNG9Xc,7001
|
|
15
|
+
mkv_episode_matcher/libraries/pgs2srt/requirements.txt,sha256=sg87dqWw_qpbwciw-Mc5mRJnV9LaCni2cybnT5ANqnA,59
|
|
16
|
+
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py,sha256=geT1LXdVd8yED9zoJ9K1XfP2JzGcM7u1SslHYrJI09o,10061
|
|
17
|
+
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py,sha256=GKtVy_Lxv-z27mkRG8pJF2znKWXwZTot7jL6kN-zIxM,10503
|
|
18
|
+
mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py,sha256=AlJHUYXl85J95OzGRik-AHVfzDd7Q8BJCvD4Nr8kRIk,938598
|
|
19
|
+
mkv_episode_matcher-0.2.0.dist-info/METADATA,sha256=aocARhBMBFQ5HRTgCCKa5p9pm-4Kw6SJxoxz31sg7HY,3710
|
|
20
|
+
mkv_episode_matcher-0.2.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
21
|
+
mkv_episode_matcher-0.2.0.dist-info/entry_points.txt,sha256=IglJ43SuCZq2eQ3shMFILCkmQASJHnDCI3ogohW2Hn4,64
|
|
22
|
+
mkv_episode_matcher-0.2.0.dist-info/top_level.txt,sha256=XRLbd93HUaedeWLtkyTvQjFcE5QcBRYa3V-CfHrq-OI,20
|
|
23
|
+
mkv_episode_matcher-0.2.0.dist-info/RECORD,,
|
|
@@ -1,252 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"cells": [
|
|
3
|
-
{
|
|
4
|
-
"cell_type": "markdown",
|
|
5
|
-
"metadata": {},
|
|
6
|
-
"source": [
|
|
7
|
-
"# Load imports and create config directory"
|
|
8
|
-
]
|
|
9
|
-
},
|
|
10
|
-
{
|
|
11
|
-
"cell_type": "code",
|
|
12
|
-
"execution_count": null,
|
|
13
|
-
"metadata": {},
|
|
14
|
-
"outputs": [],
|
|
15
|
-
"source": [
|
|
16
|
-
"# __main__.py\n",
|
|
17
|
-
"import argparse\n",
|
|
18
|
-
"import os\n",
|
|
19
|
-
"\n",
|
|
20
|
-
"from loguru import logger\n",
|
|
21
|
-
"\n",
|
|
22
|
-
"from mkv_episode_matcher.config import get_config, set_config\n",
|
|
23
|
-
"from mkv_episode_matcher.utils import check_filename, cleanup_ocr_files, get_subtitles\n",
|
|
24
|
-
"from mkv_episode_matcher.tmdb_client import fetch_show_id,fetch_season_details\n",
|
|
25
|
-
"import os\n",
|
|
26
|
-
"import re\n",
|
|
27
|
-
"import shutil\n",
|
|
28
|
-
"from typing import Set\n",
|
|
29
|
-
"\n",
|
|
30
|
-
"import requests\n",
|
|
31
|
-
"from loguru import logger\n",
|
|
32
|
-
"from opensubtitlescom import OpenSubtitles\n",
|
|
33
|
-
"# Log the start of the application\n",
|
|
34
|
-
"logger.info(\"Starting the application\")\n",
|
|
35
|
-
"\n",
|
|
36
|
-
"\n",
|
|
37
|
-
"\n",
|
|
38
|
-
"# Check if the configuration directory exists, if not create it\n",
|
|
39
|
-
"if not os.path.exists(os.path.join(os.path.expanduser(\"~\"), \".mkv-episode-matcher\")):\n",
|
|
40
|
-
" os.makedirs(os.path.join(os.path.expanduser(\"~\"), \".mkv-episode-matcher\"))\n",
|
|
41
|
-
"\n",
|
|
42
|
-
"# Define the paths for the configuration file and cache directory\n",
|
|
43
|
-
"CONFIG_FILE = os.path.join(\n",
|
|
44
|
-
" os.path.expanduser(\"~\"), \".mkv-episode-matcher\", \"config.ini\"\n",
|
|
45
|
-
")\n",
|
|
46
|
-
"CACHE_DIR = os.path.join(os.path.expanduser(\"~\"), \".mkv-episode-matcher\", \"cache\")"
|
|
47
|
-
]
|
|
48
|
-
},
|
|
49
|
-
{
|
|
50
|
-
"cell_type": "markdown",
|
|
51
|
-
"metadata": {},
|
|
52
|
-
"source": [
|
|
53
|
-
"# Load configuration settings from config.ini"
|
|
54
|
-
]
|
|
55
|
-
},
|
|
56
|
-
{
|
|
57
|
-
"cell_type": "code",
|
|
58
|
-
"execution_count": null,
|
|
59
|
-
"metadata": {},
|
|
60
|
-
"outputs": [],
|
|
61
|
-
"source": [
|
|
62
|
-
"config = get_config(CONFIG_FILE)\n",
|
|
63
|
-
"show_dir = config.get(\"show_dir\")\n",
|
|
64
|
-
"show_name = os.path.basename(show_dir)\n",
|
|
65
|
-
"series_name = os.path.basename(show_dir)\n",
|
|
66
|
-
"tmdb_api_key = config.get(\"tmdb_api_key\")\n",
|
|
67
|
-
"open_subtitles_api_key = config.get(\"open_subtitles_api_key\")\n",
|
|
68
|
-
"open_subtitles_user_agent = config.get(\"open_subtitles_user_agent\")\n",
|
|
69
|
-
"open_subtitles_username = config.get(\"open_subtitles_username\")\n",
|
|
70
|
-
"open_subtitles_password = config.get(\"open_subtitles_password\")"
|
|
71
|
-
]
|
|
72
|
-
},
|
|
73
|
-
{
|
|
74
|
-
"cell_type": "markdown",
|
|
75
|
-
"metadata": {},
|
|
76
|
-
"source": [
|
|
77
|
-
"# Make sure all required info exists in config.ini"
|
|
78
|
-
]
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"cell_type": "code",
|
|
82
|
-
"execution_count": null,
|
|
83
|
-
"metadata": {},
|
|
84
|
-
"outputs": [],
|
|
85
|
-
"source": [
|
|
86
|
-
"for x in [\n",
|
|
87
|
-
" show_dir,\n",
|
|
88
|
-
" tmdb_api_key,\n",
|
|
89
|
-
" open_subtitles_api_key,\n",
|
|
90
|
-
" open_subtitles_user_agent,\n",
|
|
91
|
-
" open_subtitles_username,\n",
|
|
92
|
-
" open_subtitles_password,\n",
|
|
93
|
-
" ]:\n",
|
|
94
|
-
" try:\n",
|
|
95
|
-
" print(x)\n",
|
|
96
|
-
" except:\n",
|
|
97
|
-
" print('failed')"
|
|
98
|
-
]
|
|
99
|
-
},
|
|
100
|
-
{
|
|
101
|
-
"cell_type": "code",
|
|
102
|
-
"execution_count": null,
|
|
103
|
-
"metadata": {},
|
|
104
|
-
"outputs": [],
|
|
105
|
-
"source": [
|
|
106
|
-
"if not all(\n",
|
|
107
|
-
" [\n",
|
|
108
|
-
" show_dir,\n",
|
|
109
|
-
" tmdb_api_key,\n",
|
|
110
|
-
" open_subtitles_api_key,\n",
|
|
111
|
-
" open_subtitles_user_agent,\n",
|
|
112
|
-
" open_subtitles_username,\n",
|
|
113
|
-
" open_subtitles_password,\n",
|
|
114
|
-
" ]\n",
|
|
115
|
-
"):\n",
|
|
116
|
-
" logger.error(\"Missing configuration settings. Please run the setup script.\")"
|
|
117
|
-
]
|
|
118
|
-
},
|
|
119
|
-
{
|
|
120
|
-
"cell_type": "markdown",
|
|
121
|
-
"metadata": {},
|
|
122
|
-
"source": [
|
|
123
|
-
"# Make sure show can be found on TMDb\n",
|
|
124
|
-
"The show id is used to search on opensubtitles"
|
|
125
|
-
]
|
|
126
|
-
},
|
|
127
|
-
{
|
|
128
|
-
"cell_type": "code",
|
|
129
|
-
"execution_count": null,
|
|
130
|
-
"metadata": {},
|
|
131
|
-
"outputs": [],
|
|
132
|
-
"source": [
|
|
133
|
-
"show_id = fetch_show_id(show_name)\n",
|
|
134
|
-
"if show_id is None:\n",
|
|
135
|
-
" logger.error(f\"Could not find show '{os.path.basename(show_name)}' on TMDb.\")\n",
|
|
136
|
-
"else:\n",
|
|
137
|
-
" print(show_id)"
|
|
138
|
-
]
|
|
139
|
-
},
|
|
140
|
-
{
|
|
141
|
-
"cell_type": "markdown",
|
|
142
|
-
"metadata": {},
|
|
143
|
-
"source": [
|
|
144
|
-
"# Try getting the first season automatically"
|
|
145
|
-
]
|
|
146
|
-
},
|
|
147
|
-
{
|
|
148
|
-
"cell_type": "code",
|
|
149
|
-
"execution_count": null,
|
|
150
|
-
"metadata": {},
|
|
151
|
-
"outputs": [],
|
|
152
|
-
"source": [
|
|
153
|
-
"get_subtitles(show_id, seasons=set([1]))"
|
|
154
|
-
]
|
|
155
|
-
},
|
|
156
|
-
{
|
|
157
|
-
"cell_type": "markdown",
|
|
158
|
-
"metadata": {},
|
|
159
|
-
"source": [
|
|
160
|
-
"# Check if there's an issue in the get_subtitles function"
|
|
161
|
-
]
|
|
162
|
-
},
|
|
163
|
-
{
|
|
164
|
-
"cell_type": "code",
|
|
165
|
-
"execution_count": null,
|
|
166
|
-
"metadata": {},
|
|
167
|
-
"outputs": [],
|
|
168
|
-
"source": [
|
|
169
|
-
"try:\n",
|
|
170
|
-
" # Initialize the OpenSubtitles client\n",
|
|
171
|
-
" subtitles = OpenSubtitles(open_subtitles_user_agent, open_subtitles_api_key)\n",
|
|
172
|
-
"\n",
|
|
173
|
-
" # Log in (retrieve auth token)\n",
|
|
174
|
-
" subtitles.login(open_subtitles_username, open_subtitles_password)\n",
|
|
175
|
-
"except Exception as e:\n",
|
|
176
|
-
" logger.error(f\"Failed to log in to OpenSubtitles: {e}\")\n",
|
|
177
|
-
"\n",
|
|
178
|
-
"for season in [1]:\n",
|
|
179
|
-
" episodes = fetch_season_details(show_id, season)\n",
|
|
180
|
-
" logger.info(f\"Found {episodes} episodes in Season {season}\")\n",
|
|
181
|
-
"\n",
|
|
182
|
-
" for episode in range(1, episodes + 1):\n",
|
|
183
|
-
" logger.info(f\"Processing Season {season}, Episode {episode}...\")\n",
|
|
184
|
-
" series_cache_dir =os.path.join(\n",
|
|
185
|
-
" CACHE_DIR,\n",
|
|
186
|
-
" \"data\",\n",
|
|
187
|
-
" series_name)\n",
|
|
188
|
-
" os.makedirs(series_cache_dir,exist_ok=True)\n",
|
|
189
|
-
" srt_filepath = os.path.join(\n",
|
|
190
|
-
" series_cache_dir,\n",
|
|
191
|
-
" f\"{series_name} - S{season:02d}E{episode:02d}.srt\",\n",
|
|
192
|
-
" )\n",
|
|
193
|
-
" if not os.path.exists(srt_filepath):\n",
|
|
194
|
-
" # get the episode info from TMDB\n",
|
|
195
|
-
" url = f\"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}\"\n",
|
|
196
|
-
" response = requests.get(url)\n",
|
|
197
|
-
" response.raise_for_status()\n",
|
|
198
|
-
" episode_data = response.json()\n",
|
|
199
|
-
" episode_name = episode_data[\"name\"]\n",
|
|
200
|
-
" episode_id = episode_data[\"id\"]\n",
|
|
201
|
-
" # search for the subtitle\n",
|
|
202
|
-
" response = subtitles.search(tmdb_id=episode_id, languages=\"en\")\n",
|
|
203
|
-
" if len(response.data) == 0:\n",
|
|
204
|
-
" logger.warning(\n",
|
|
205
|
-
" f\"No subtitles found for {series_name} - S{season:02d}E{episode:02d}\"\n",
|
|
206
|
-
" )\n",
|
|
207
|
-
"\n",
|
|
208
|
-
" for subtitle in response.data:\n",
|
|
209
|
-
" subtitle_dict = subtitle.to_dict()\n",
|
|
210
|
-
" # Remove special characters and convert to uppercase\n",
|
|
211
|
-
" filename_clean = re.sub(\n",
|
|
212
|
-
" r\"\\W+\", \" \", subtitle_dict[\"file_name\"]\n",
|
|
213
|
-
" ).upper()\n",
|
|
214
|
-
" if f\"E{episode:02d}\" in filename_clean:\n",
|
|
215
|
-
" logger.info(f\"Original filename: {subtitle_dict['file_name']}\")\n",
|
|
216
|
-
" srt_file = subtitles.download_and_save(subtitle)\n",
|
|
217
|
-
" series_name = series_name.replace(\":\", \" -\")\n",
|
|
218
|
-
" shutil.move(os.path.join(os.getcwd(),srt_file), srt_filepath)\n",
|
|
219
|
-
" logger.info(f\"Subtitle saved to {srt_filepath}\")\n",
|
|
220
|
-
" break\n",
|
|
221
|
-
" else:\n",
|
|
222
|
-
" continue\n",
|
|
223
|
-
" else:\n",
|
|
224
|
-
" logger.info(\n",
|
|
225
|
-
" f\"Subtitle already exists for {series_name} - S{season:02d}E{episode:02d}\"\n",
|
|
226
|
-
" )\n",
|
|
227
|
-
" continue"
|
|
228
|
-
]
|
|
229
|
-
}
|
|
230
|
-
],
|
|
231
|
-
"metadata": {
|
|
232
|
-
"kernelspec": {
|
|
233
|
-
"display_name": "mkv",
|
|
234
|
-
"language": "python",
|
|
235
|
-
"name": "python3"
|
|
236
|
-
},
|
|
237
|
-
"language_info": {
|
|
238
|
-
"codemirror_mode": {
|
|
239
|
-
"name": "ipython",
|
|
240
|
-
"version": 3
|
|
241
|
-
},
|
|
242
|
-
"file_extension": ".py",
|
|
243
|
-
"mimetype": "text/x-python",
|
|
244
|
-
"name": "python",
|
|
245
|
-
"nbconvert_exporter": "python",
|
|
246
|
-
"pygments_lexer": "ipython3",
|
|
247
|
-
"version": "3.12.1"
|
|
248
|
-
}
|
|
249
|
-
},
|
|
250
|
-
"nbformat": 4,
|
|
251
|
-
"nbformat_minor": 2
|
|
252
|
-
}
|