mkv-episode-matcher 0.1.5__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher/__init__.py +1 -1
- mkv_episode_matcher/__main__.py +2 -2
- mkv_episode_matcher/libraries/pgs2srt/.gitignore +2 -2
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +295 -295
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +249 -249
- mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +215 -215
- mkv_episode_matcher/libraries/pgs2srt/README.md +26 -26
- mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +87 -87
- mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +121 -121
- mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +221 -221
- mkv_episode_matcher/libraries/pgs2srt/requirements.txt +4 -4
- mkv_episode_matcher/mkv_to_srt.py +174 -174
- mkv_episode_matcher/notebooks/get_subtitles_test.ipynb +252 -0
- mkv_episode_matcher/requirements.txt +6 -7
- mkv_episode_matcher/utils.py +5 -2
- {mkv_episode_matcher-0.1.5.dist-info → mkv_episode_matcher-0.1.10.dist-info}/METADATA +53 -37
- mkv_episode_matcher-0.1.10.dist-info/RECORD +25 -0
- {mkv_episode_matcher-0.1.5.dist-info → mkv_episode_matcher-0.1.10.dist-info}/WHEEL +2 -1
- mkv_episode_matcher-0.1.10.dist-info/top_level.txt +1 -0
- mkv_episode_matcher/libraries/pgs2srt/.git +0 -1
- mkv_episode_matcher-0.1.5.dist-info/RECORD +0 -24
- {mkv_episode_matcher-0.1.5.dist-info → mkv_episode_matcher-0.1.10.dist-info}/entry_points.txt +0 -0
|
@@ -1,174 +1,174 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import subprocess
|
|
3
|
-
import sys
|
|
4
|
-
|
|
5
|
-
# Get the absolute path of the parent directory of the current script.
|
|
6
|
-
parent_dir = os.path.dirname(os.path.abspath(__file__))
|
|
7
|
-
# Add the 'pgs2srt' directory to the Python path.
|
|
8
|
-
sys.path.append(os.path.join(parent_dir, "libraries", "pgs2srt"))
|
|
9
|
-
import re
|
|
10
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
|
|
13
|
-
import pytesseract
|
|
14
|
-
from imagemaker import make_image
|
|
15
|
-
from loguru import logger
|
|
16
|
-
from pgsreader import PGSReader
|
|
17
|
-
from PIL import Image, ImageOps
|
|
18
|
-
|
|
19
|
-
from mkv_episode_matcher.__main__ import CONFIG_FILE
|
|
20
|
-
from mkv_episode_matcher.config import get_config
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def convert_mkv_to_sup(mkv_file, output_dir):
|
|
24
|
-
"""
|
|
25
|
-
Convert an .mkv file to a .sup file using FFmpeg and pgs2srt.
|
|
26
|
-
|
|
27
|
-
Args:
|
|
28
|
-
mkv_file (str): Path to the .mkv file.
|
|
29
|
-
output_dir (str): Path to the directory where the .sup file will be saved.
|
|
30
|
-
|
|
31
|
-
Returns:
|
|
32
|
-
str: Path to the converted .sup file.
|
|
33
|
-
"""
|
|
34
|
-
# Get the base name of the .mkv file without the extension
|
|
35
|
-
base_name = os.path.splitext(os.path.basename(mkv_file))[0]
|
|
36
|
-
|
|
37
|
-
# Construct the output .sup file path
|
|
38
|
-
sup_file = os.path.join(output_dir, f"{base_name}.sup")
|
|
39
|
-
if not os.path.exists(sup_file):
|
|
40
|
-
logger.info(f"Processing {mkv_file} to {sup_file}")
|
|
41
|
-
# FFmpeg command to convert .mkv to .sup
|
|
42
|
-
ffmpeg_cmd = ["ffmpeg", "-i", mkv_file, "-map", "0:s:0", "-c", "copy", sup_file]
|
|
43
|
-
try:
|
|
44
|
-
subprocess.run(ffmpeg_cmd, check=True)
|
|
45
|
-
logger.info(f"Converted {mkv_file} to {sup_file}")
|
|
46
|
-
except subprocess.CalledProcessError as e:
|
|
47
|
-
logger.error(f"Error converting {mkv_file}: {e}")
|
|
48
|
-
else:
|
|
49
|
-
logger.info(f"File {sup_file} already exists, skipping")
|
|
50
|
-
return sup_file
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
@logger.catch
|
|
54
|
-
def perform_ocr(sup_file_path):
|
|
55
|
-
"""
|
|
56
|
-
Perform OCR on a .sup file and save the extracted text to a .srt file.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
sup_file_path (str): Path to the .sup file.
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
# Get the base name of the .sup file without the extension
|
|
63
|
-
base_name = os.path.splitext(os.path.basename(sup_file_path))[0]
|
|
64
|
-
output_dir = os.path.dirname(sup_file_path)
|
|
65
|
-
logger.info(f"Performing OCR on {sup_file_path}")
|
|
66
|
-
# Construct the output .srt file path
|
|
67
|
-
srt_file = os.path.join(output_dir, f"{base_name}.srt")
|
|
68
|
-
|
|
69
|
-
# Load a PGS/SUP file.
|
|
70
|
-
pgs = PGSReader(sup_file_path)
|
|
71
|
-
|
|
72
|
-
# Set index
|
|
73
|
-
i = 0
|
|
74
|
-
|
|
75
|
-
# Complete subtitle track index
|
|
76
|
-
si = 0
|
|
77
|
-
|
|
78
|
-
tesseract_lang = "eng"
|
|
79
|
-
tesseract_config = f"-c tessedit_char_blacklist=[] --psm 6 --oem {1}"
|
|
80
|
-
|
|
81
|
-
config = get_config(CONFIG_FILE)
|
|
82
|
-
tesseract_path = config.get("tesseract_path")
|
|
83
|
-
logger.debug(f"Setting Teesseract Path to {tesseract_path}")
|
|
84
|
-
pytesseract.pytesseract.tesseract_cmd = str(tesseract_path)
|
|
85
|
-
|
|
86
|
-
# SubRip output
|
|
87
|
-
output = ""
|
|
88
|
-
|
|
89
|
-
if not os.path.exists(srt_file):
|
|
90
|
-
# Iterate the pgs generator
|
|
91
|
-
for ds in pgs.iter_displaysets():
|
|
92
|
-
# If set has image, parse the image
|
|
93
|
-
if ds.has_image:
|
|
94
|
-
# Get Palette Display Segment
|
|
95
|
-
pds = ds.pds[0]
|
|
96
|
-
# Get Object Display Segment
|
|
97
|
-
ods = ds.ods[0]
|
|
98
|
-
|
|
99
|
-
if pds and ods:
|
|
100
|
-
# Create and show the bitmap image and convert it to RGBA
|
|
101
|
-
src = make_image(ods, pds).convert("RGBA")
|
|
102
|
-
|
|
103
|
-
# Create grayscale image with black background
|
|
104
|
-
img = Image.new("L", src.size, "BLACK")
|
|
105
|
-
# Paste the subtitle bitmap
|
|
106
|
-
img.paste(src, (0, 0), src)
|
|
107
|
-
# Invert images so the text is readable by Tesseract
|
|
108
|
-
img = ImageOps.invert(img)
|
|
109
|
-
|
|
110
|
-
# Parse the image with tesesract
|
|
111
|
-
text = pytesseract.image_to_string(
|
|
112
|
-
img, lang=tesseract_lang, config=tesseract_config
|
|
113
|
-
).strip()
|
|
114
|
-
|
|
115
|
-
# Replace "|" with "I"
|
|
116
|
-
# Works better than blacklisting "|" in Tesseract,
|
|
117
|
-
# which results in I becoming "!" "i" and "1"
|
|
118
|
-
text = re.sub(r"[|/\\]", "I", text)
|
|
119
|
-
text = re.sub(r"[_]", "L", text)
|
|
120
|
-
start = datetime.fromtimestamp(ods.presentation_timestamp / 1000)
|
|
121
|
-
start = start + timedelta(hours=-1)
|
|
122
|
-
|
|
123
|
-
else:
|
|
124
|
-
# Get Presentation Composition Segment
|
|
125
|
-
pcs = ds.pcs[0]
|
|
126
|
-
|
|
127
|
-
if pcs:
|
|
128
|
-
end = datetime.fromtimestamp(pcs.presentation_timestamp / 1000)
|
|
129
|
-
end = end + timedelta(hours=-1)
|
|
130
|
-
|
|
131
|
-
if (
|
|
132
|
-
isinstance(start, datetime)
|
|
133
|
-
and isinstance(end, datetime)
|
|
134
|
-
and len(text)
|
|
135
|
-
):
|
|
136
|
-
si = si + 1
|
|
137
|
-
sub_output = str(si) + "\n"
|
|
138
|
-
sub_output += (
|
|
139
|
-
start.strftime("%H:%M:%S,%f")[0:12]
|
|
140
|
-
+ " --> "
|
|
141
|
-
+ end.strftime("%H:%M:%S,%f")[0:12]
|
|
142
|
-
+ "\n"
|
|
143
|
-
)
|
|
144
|
-
sub_output += text + "\n\n"
|
|
145
|
-
|
|
146
|
-
output += sub_output
|
|
147
|
-
start = end = text = None
|
|
148
|
-
i = i + 1
|
|
149
|
-
with open(srt_file, "w") as f:
|
|
150
|
-
f.write(output)
|
|
151
|
-
logger.info(f"Saved to: {srt_file}")
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
def convert_mkv_to_srt(season_path, mkv_files):
|
|
155
|
-
"""
|
|
156
|
-
Converts MKV files to SRT format.
|
|
157
|
-
|
|
158
|
-
Args:
|
|
159
|
-
season_path (str): The path to the season directory.
|
|
160
|
-
mkv_files (list): List of MKV files to convert.
|
|
161
|
-
|
|
162
|
-
Returns:
|
|
163
|
-
None
|
|
164
|
-
"""
|
|
165
|
-
logger.info(f"Converting {len(mkv_files)} files to SRT")
|
|
166
|
-
output_dir = os.path.join(season_path, "ocr")
|
|
167
|
-
os.makedirs(output_dir, exist_ok=True)
|
|
168
|
-
sup_files = []
|
|
169
|
-
for mkv_file in mkv_files:
|
|
170
|
-
sup_file = convert_mkv_to_sup(mkv_file, output_dir)
|
|
171
|
-
sup_files.append(sup_file)
|
|
172
|
-
with ThreadPoolExecutor() as executor:
|
|
173
|
-
for sup_file in sup_files:
|
|
174
|
-
executor.submit(perform_ocr, sup_file)
|
|
1
|
+
import os
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
# Get the absolute path of the parent directory of the current script.
|
|
6
|
+
parent_dir = os.path.dirname(os.path.abspath(__file__))
|
|
7
|
+
# Add the 'pgs2srt' directory to the Python path.
|
|
8
|
+
sys.path.append(os.path.join(parent_dir, "libraries", "pgs2srt"))
|
|
9
|
+
import re
|
|
10
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
11
|
+
from datetime import datetime, timedelta
|
|
12
|
+
|
|
13
|
+
import pytesseract
|
|
14
|
+
from imagemaker import make_image
|
|
15
|
+
from loguru import logger
|
|
16
|
+
from pgsreader import PGSReader
|
|
17
|
+
from PIL import Image, ImageOps
|
|
18
|
+
|
|
19
|
+
from mkv_episode_matcher.__main__ import CONFIG_FILE
|
|
20
|
+
from mkv_episode_matcher.config import get_config
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def convert_mkv_to_sup(mkv_file, output_dir):
|
|
24
|
+
"""
|
|
25
|
+
Convert an .mkv file to a .sup file using FFmpeg and pgs2srt.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
mkv_file (str): Path to the .mkv file.
|
|
29
|
+
output_dir (str): Path to the directory where the .sup file will be saved.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
str: Path to the converted .sup file.
|
|
33
|
+
"""
|
|
34
|
+
# Get the base name of the .mkv file without the extension
|
|
35
|
+
base_name = os.path.splitext(os.path.basename(mkv_file))[0]
|
|
36
|
+
|
|
37
|
+
# Construct the output .sup file path
|
|
38
|
+
sup_file = os.path.join(output_dir, f"{base_name}.sup")
|
|
39
|
+
if not os.path.exists(sup_file):
|
|
40
|
+
logger.info(f"Processing {mkv_file} to {sup_file}")
|
|
41
|
+
# FFmpeg command to convert .mkv to .sup
|
|
42
|
+
ffmpeg_cmd = ["ffmpeg", "-i", mkv_file, "-map", "0:s:0", "-c", "copy", sup_file]
|
|
43
|
+
try:
|
|
44
|
+
subprocess.run(ffmpeg_cmd, check=True)
|
|
45
|
+
logger.info(f"Converted {mkv_file} to {sup_file}")
|
|
46
|
+
except subprocess.CalledProcessError as e:
|
|
47
|
+
logger.error(f"Error converting {mkv_file}: {e}")
|
|
48
|
+
else:
|
|
49
|
+
logger.info(f"File {sup_file} already exists, skipping")
|
|
50
|
+
return sup_file
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@logger.catch
|
|
54
|
+
def perform_ocr(sup_file_path):
|
|
55
|
+
"""
|
|
56
|
+
Perform OCR on a .sup file and save the extracted text to a .srt file.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
sup_file_path (str): Path to the .sup file.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
# Get the base name of the .sup file without the extension
|
|
63
|
+
base_name = os.path.splitext(os.path.basename(sup_file_path))[0]
|
|
64
|
+
output_dir = os.path.dirname(sup_file_path)
|
|
65
|
+
logger.info(f"Performing OCR on {sup_file_path}")
|
|
66
|
+
# Construct the output .srt file path
|
|
67
|
+
srt_file = os.path.join(output_dir, f"{base_name}.srt")
|
|
68
|
+
|
|
69
|
+
# Load a PGS/SUP file.
|
|
70
|
+
pgs = PGSReader(sup_file_path)
|
|
71
|
+
|
|
72
|
+
# Set index
|
|
73
|
+
i = 0
|
|
74
|
+
|
|
75
|
+
# Complete subtitle track index
|
|
76
|
+
si = 0
|
|
77
|
+
|
|
78
|
+
tesseract_lang = "eng"
|
|
79
|
+
tesseract_config = f"-c tessedit_char_blacklist=[] --psm 6 --oem {1}"
|
|
80
|
+
|
|
81
|
+
config = get_config(CONFIG_FILE)
|
|
82
|
+
tesseract_path = config.get("tesseract_path")
|
|
83
|
+
logger.debug(f"Setting Teesseract Path to {tesseract_path}")
|
|
84
|
+
pytesseract.pytesseract.tesseract_cmd = str(tesseract_path)
|
|
85
|
+
|
|
86
|
+
# SubRip output
|
|
87
|
+
output = ""
|
|
88
|
+
|
|
89
|
+
if not os.path.exists(srt_file):
|
|
90
|
+
# Iterate the pgs generator
|
|
91
|
+
for ds in pgs.iter_displaysets():
|
|
92
|
+
# If set has image, parse the image
|
|
93
|
+
if ds.has_image:
|
|
94
|
+
# Get Palette Display Segment
|
|
95
|
+
pds = ds.pds[0]
|
|
96
|
+
# Get Object Display Segment
|
|
97
|
+
ods = ds.ods[0]
|
|
98
|
+
|
|
99
|
+
if pds and ods:
|
|
100
|
+
# Create and show the bitmap image and convert it to RGBA
|
|
101
|
+
src = make_image(ods, pds).convert("RGBA")
|
|
102
|
+
|
|
103
|
+
# Create grayscale image with black background
|
|
104
|
+
img = Image.new("L", src.size, "BLACK")
|
|
105
|
+
# Paste the subtitle bitmap
|
|
106
|
+
img.paste(src, (0, 0), src)
|
|
107
|
+
# Invert images so the text is readable by Tesseract
|
|
108
|
+
img = ImageOps.invert(img)
|
|
109
|
+
|
|
110
|
+
# Parse the image with tesesract
|
|
111
|
+
text = pytesseract.image_to_string(
|
|
112
|
+
img, lang=tesseract_lang, config=tesseract_config
|
|
113
|
+
).strip()
|
|
114
|
+
|
|
115
|
+
# Replace "|" with "I"
|
|
116
|
+
# Works better than blacklisting "|" in Tesseract,
|
|
117
|
+
# which results in I becoming "!" "i" and "1"
|
|
118
|
+
text = re.sub(r"[|/\\]", "I", text)
|
|
119
|
+
text = re.sub(r"[_]", "L", text)
|
|
120
|
+
start = datetime.fromtimestamp(ods.presentation_timestamp / 1000)
|
|
121
|
+
start = start + timedelta(hours=-1)
|
|
122
|
+
|
|
123
|
+
else:
|
|
124
|
+
# Get Presentation Composition Segment
|
|
125
|
+
pcs = ds.pcs[0]
|
|
126
|
+
|
|
127
|
+
if pcs:
|
|
128
|
+
end = datetime.fromtimestamp(pcs.presentation_timestamp / 1000)
|
|
129
|
+
end = end + timedelta(hours=-1)
|
|
130
|
+
|
|
131
|
+
if (
|
|
132
|
+
isinstance(start, datetime)
|
|
133
|
+
and isinstance(end, datetime)
|
|
134
|
+
and len(text)
|
|
135
|
+
):
|
|
136
|
+
si = si + 1
|
|
137
|
+
sub_output = str(si) + "\n"
|
|
138
|
+
sub_output += (
|
|
139
|
+
start.strftime("%H:%M:%S,%f")[0:12]
|
|
140
|
+
+ " --> "
|
|
141
|
+
+ end.strftime("%H:%M:%S,%f")[0:12]
|
|
142
|
+
+ "\n"
|
|
143
|
+
)
|
|
144
|
+
sub_output += text + "\n\n"
|
|
145
|
+
|
|
146
|
+
output += sub_output
|
|
147
|
+
start = end = text = None
|
|
148
|
+
i = i + 1
|
|
149
|
+
with open(srt_file, "w") as f:
|
|
150
|
+
f.write(output)
|
|
151
|
+
logger.info(f"Saved to: {srt_file}")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def convert_mkv_to_srt(season_path, mkv_files):
|
|
155
|
+
"""
|
|
156
|
+
Converts MKV files to SRT format.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
season_path (str): The path to the season directory.
|
|
160
|
+
mkv_files (list): List of MKV files to convert.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
None
|
|
164
|
+
"""
|
|
165
|
+
logger.info(f"Converting {len(mkv_files)} files to SRT")
|
|
166
|
+
output_dir = os.path.join(season_path, "ocr")
|
|
167
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
168
|
+
sup_files = []
|
|
169
|
+
for mkv_file in mkv_files:
|
|
170
|
+
sup_file = convert_mkv_to_sup(mkv_file, output_dir)
|
|
171
|
+
sup_files.append(sup_file)
|
|
172
|
+
with ThreadPoolExecutor() as executor:
|
|
173
|
+
for sup_file in sup_files:
|
|
174
|
+
executor.submit(perform_ocr, sup_file)
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "markdown",
|
|
5
|
+
"metadata": {},
|
|
6
|
+
"source": [
|
|
7
|
+
"# Load imports and create config directory"
|
|
8
|
+
]
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"cell_type": "code",
|
|
12
|
+
"execution_count": null,
|
|
13
|
+
"metadata": {},
|
|
14
|
+
"outputs": [],
|
|
15
|
+
"source": [
|
|
16
|
+
"# __main__.py\n",
|
|
17
|
+
"import argparse\n",
|
|
18
|
+
"import os\n",
|
|
19
|
+
"\n",
|
|
20
|
+
"from loguru import logger\n",
|
|
21
|
+
"\n",
|
|
22
|
+
"from mkv_episode_matcher.config import get_config, set_config\n",
|
|
23
|
+
"from mkv_episode_matcher.utils import check_filename, cleanup_ocr_files, get_subtitles\n",
|
|
24
|
+
"from mkv_episode_matcher.tmdb_client import fetch_show_id,fetch_season_details\n",
|
|
25
|
+
"import os\n",
|
|
26
|
+
"import re\n",
|
|
27
|
+
"import shutil\n",
|
|
28
|
+
"from typing import Set\n",
|
|
29
|
+
"\n",
|
|
30
|
+
"import requests\n",
|
|
31
|
+
"from loguru import logger\n",
|
|
32
|
+
"from opensubtitlescom import OpenSubtitles\n",
|
|
33
|
+
"# Log the start of the application\n",
|
|
34
|
+
"logger.info(\"Starting the application\")\n",
|
|
35
|
+
"\n",
|
|
36
|
+
"\n",
|
|
37
|
+
"\n",
|
|
38
|
+
"# Check if the configuration directory exists, if not create it\n",
|
|
39
|
+
"if not os.path.exists(os.path.join(os.path.expanduser(\"~\"), \".mkv-episode-matcher\")):\n",
|
|
40
|
+
" os.makedirs(os.path.join(os.path.expanduser(\"~\"), \".mkv-episode-matcher\"))\n",
|
|
41
|
+
"\n",
|
|
42
|
+
"# Define the paths for the configuration file and cache directory\n",
|
|
43
|
+
"CONFIG_FILE = os.path.join(\n",
|
|
44
|
+
" os.path.expanduser(\"~\"), \".mkv-episode-matcher\", \"config.ini\"\n",
|
|
45
|
+
")\n",
|
|
46
|
+
"CACHE_DIR = os.path.join(os.path.expanduser(\"~\"), \".mkv-episode-matcher\", \"cache\")"
|
|
47
|
+
]
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"cell_type": "markdown",
|
|
51
|
+
"metadata": {},
|
|
52
|
+
"source": [
|
|
53
|
+
"# Load configuration settings from config.ini"
|
|
54
|
+
]
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"cell_type": "code",
|
|
58
|
+
"execution_count": null,
|
|
59
|
+
"metadata": {},
|
|
60
|
+
"outputs": [],
|
|
61
|
+
"source": [
|
|
62
|
+
"config = get_config(CONFIG_FILE)\n",
|
|
63
|
+
"show_dir = config.get(\"show_dir\")\n",
|
|
64
|
+
"show_name = os.path.basename(show_dir)\n",
|
|
65
|
+
"series_name = os.path.basename(show_dir)\n",
|
|
66
|
+
"tmdb_api_key = config.get(\"tmdb_api_key\")\n",
|
|
67
|
+
"open_subtitles_api_key = config.get(\"open_subtitles_api_key\")\n",
|
|
68
|
+
"open_subtitles_user_agent = config.get(\"open_subtitles_user_agent\")\n",
|
|
69
|
+
"open_subtitles_username = config.get(\"open_subtitles_username\")\n",
|
|
70
|
+
"open_subtitles_password = config.get(\"open_subtitles_password\")"
|
|
71
|
+
]
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"cell_type": "markdown",
|
|
75
|
+
"metadata": {},
|
|
76
|
+
"source": [
|
|
77
|
+
"# Make sure all required info exists in config.ini"
|
|
78
|
+
]
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
"cell_type": "code",
|
|
82
|
+
"execution_count": null,
|
|
83
|
+
"metadata": {},
|
|
84
|
+
"outputs": [],
|
|
85
|
+
"source": [
|
|
86
|
+
"for x in [\n",
|
|
87
|
+
" show_dir,\n",
|
|
88
|
+
" tmdb_api_key,\n",
|
|
89
|
+
" open_subtitles_api_key,\n",
|
|
90
|
+
" open_subtitles_user_agent,\n",
|
|
91
|
+
" open_subtitles_username,\n",
|
|
92
|
+
" open_subtitles_password,\n",
|
|
93
|
+
" ]:\n",
|
|
94
|
+
" try:\n",
|
|
95
|
+
" print(x)\n",
|
|
96
|
+
" except:\n",
|
|
97
|
+
" print('failed')"
|
|
98
|
+
]
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
"cell_type": "code",
|
|
102
|
+
"execution_count": null,
|
|
103
|
+
"metadata": {},
|
|
104
|
+
"outputs": [],
|
|
105
|
+
"source": [
|
|
106
|
+
"if not all(\n",
|
|
107
|
+
" [\n",
|
|
108
|
+
" show_dir,\n",
|
|
109
|
+
" tmdb_api_key,\n",
|
|
110
|
+
" open_subtitles_api_key,\n",
|
|
111
|
+
" open_subtitles_user_agent,\n",
|
|
112
|
+
" open_subtitles_username,\n",
|
|
113
|
+
" open_subtitles_password,\n",
|
|
114
|
+
" ]\n",
|
|
115
|
+
"):\n",
|
|
116
|
+
" logger.error(\"Missing configuration settings. Please run the setup script.\")"
|
|
117
|
+
]
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
"cell_type": "markdown",
|
|
121
|
+
"metadata": {},
|
|
122
|
+
"source": [
|
|
123
|
+
"# Make sure show can be found on TMDb\n",
|
|
124
|
+
"The show id is used to search on opensubtitles"
|
|
125
|
+
]
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"cell_type": "code",
|
|
129
|
+
"execution_count": null,
|
|
130
|
+
"metadata": {},
|
|
131
|
+
"outputs": [],
|
|
132
|
+
"source": [
|
|
133
|
+
"show_id = fetch_show_id(show_name)\n",
|
|
134
|
+
"if show_id is None:\n",
|
|
135
|
+
" logger.error(f\"Could not find show '{os.path.basename(show_name)}' on TMDb.\")\n",
|
|
136
|
+
"else:\n",
|
|
137
|
+
" print(show_id)"
|
|
138
|
+
]
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"cell_type": "markdown",
|
|
142
|
+
"metadata": {},
|
|
143
|
+
"source": [
|
|
144
|
+
"# Try getting the first season automatically"
|
|
145
|
+
]
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"cell_type": "code",
|
|
149
|
+
"execution_count": null,
|
|
150
|
+
"metadata": {},
|
|
151
|
+
"outputs": [],
|
|
152
|
+
"source": [
|
|
153
|
+
"get_subtitles(show_id, seasons=set([1]))"
|
|
154
|
+
]
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"cell_type": "markdown",
|
|
158
|
+
"metadata": {},
|
|
159
|
+
"source": [
|
|
160
|
+
"# Check if there's an issue in the get_subtitles function"
|
|
161
|
+
]
|
|
162
|
+
},
|
|
163
|
+
{
|
|
164
|
+
"cell_type": "code",
|
|
165
|
+
"execution_count": null,
|
|
166
|
+
"metadata": {},
|
|
167
|
+
"outputs": [],
|
|
168
|
+
"source": [
|
|
169
|
+
"try:\n",
|
|
170
|
+
" # Initialize the OpenSubtitles client\n",
|
|
171
|
+
" subtitles = OpenSubtitles(open_subtitles_user_agent, open_subtitles_api_key)\n",
|
|
172
|
+
"\n",
|
|
173
|
+
" # Log in (retrieve auth token)\n",
|
|
174
|
+
" subtitles.login(open_subtitles_username, open_subtitles_password)\n",
|
|
175
|
+
"except Exception as e:\n",
|
|
176
|
+
" logger.error(f\"Failed to log in to OpenSubtitles: {e}\")\n",
|
|
177
|
+
"\n",
|
|
178
|
+
"for season in [1]:\n",
|
|
179
|
+
" episodes = fetch_season_details(show_id, season)\n",
|
|
180
|
+
" logger.info(f\"Found {episodes} episodes in Season {season}\")\n",
|
|
181
|
+
"\n",
|
|
182
|
+
" for episode in range(1, episodes + 1):\n",
|
|
183
|
+
" logger.info(f\"Processing Season {season}, Episode {episode}...\")\n",
|
|
184
|
+
" series_cache_dir =os.path.join(\n",
|
|
185
|
+
" CACHE_DIR,\n",
|
|
186
|
+
" \"data\",\n",
|
|
187
|
+
" series_name)\n",
|
|
188
|
+
" os.makedirs(series_cache_dir,exist_ok=True)\n",
|
|
189
|
+
" srt_filepath = os.path.join(\n",
|
|
190
|
+
" series_cache_dir,\n",
|
|
191
|
+
" f\"{series_name} - S{season:02d}E{episode:02d}.srt\",\n",
|
|
192
|
+
" )\n",
|
|
193
|
+
" if not os.path.exists(srt_filepath):\n",
|
|
194
|
+
" # get the episode info from TMDB\n",
|
|
195
|
+
" url = f\"https://api.themoviedb.org/3/tv/{show_id}/season/{season}/episode/{episode}?api_key={tmdb_api_key}\"\n",
|
|
196
|
+
" response = requests.get(url)\n",
|
|
197
|
+
" response.raise_for_status()\n",
|
|
198
|
+
" episode_data = response.json()\n",
|
|
199
|
+
" episode_name = episode_data[\"name\"]\n",
|
|
200
|
+
" episode_id = episode_data[\"id\"]\n",
|
|
201
|
+
" # search for the subtitle\n",
|
|
202
|
+
" response = subtitles.search(tmdb_id=episode_id, languages=\"en\")\n",
|
|
203
|
+
" if len(response.data) == 0:\n",
|
|
204
|
+
" logger.warning(\n",
|
|
205
|
+
" f\"No subtitles found for {series_name} - S{season:02d}E{episode:02d}\"\n",
|
|
206
|
+
" )\n",
|
|
207
|
+
"\n",
|
|
208
|
+
" for subtitle in response.data:\n",
|
|
209
|
+
" subtitle_dict = subtitle.to_dict()\n",
|
|
210
|
+
" # Remove special characters and convert to uppercase\n",
|
|
211
|
+
" filename_clean = re.sub(\n",
|
|
212
|
+
" r\"\\W+\", \" \", subtitle_dict[\"file_name\"]\n",
|
|
213
|
+
" ).upper()\n",
|
|
214
|
+
" if f\"E{episode:02d}\" in filename_clean:\n",
|
|
215
|
+
" logger.info(f\"Original filename: {subtitle_dict['file_name']}\")\n",
|
|
216
|
+
" srt_file = subtitles.download_and_save(subtitle)\n",
|
|
217
|
+
" series_name = series_name.replace(\":\", \" -\")\n",
|
|
218
|
+
" shutil.move(os.path.join(os.getcwd(),srt_file), srt_filepath)\n",
|
|
219
|
+
" logger.info(f\"Subtitle saved to {srt_filepath}\")\n",
|
|
220
|
+
" break\n",
|
|
221
|
+
" else:\n",
|
|
222
|
+
" continue\n",
|
|
223
|
+
" else:\n",
|
|
224
|
+
" logger.info(\n",
|
|
225
|
+
" f\"Subtitle already exists for {series_name} - S{season:02d}E{episode:02d}\"\n",
|
|
226
|
+
" )\n",
|
|
227
|
+
" continue"
|
|
228
|
+
]
|
|
229
|
+
}
|
|
230
|
+
],
|
|
231
|
+
"metadata": {
|
|
232
|
+
"kernelspec": {
|
|
233
|
+
"display_name": "mkv",
|
|
234
|
+
"language": "python",
|
|
235
|
+
"name": "python3"
|
|
236
|
+
},
|
|
237
|
+
"language_info": {
|
|
238
|
+
"codemirror_mode": {
|
|
239
|
+
"name": "ipython",
|
|
240
|
+
"version": 3
|
|
241
|
+
},
|
|
242
|
+
"file_extension": ".py",
|
|
243
|
+
"mimetype": "text/x-python",
|
|
244
|
+
"name": "python",
|
|
245
|
+
"nbconvert_exporter": "python",
|
|
246
|
+
"pygments_lexer": "ipython3",
|
|
247
|
+
"version": "3.12.1"
|
|
248
|
+
}
|
|
249
|
+
},
|
|
250
|
+
"nbformat": 4,
|
|
251
|
+
"nbformat_minor": 2
|
|
252
|
+
}
|
mkv_episode_matcher/utils.py
CHANGED
|
@@ -164,10 +164,13 @@ def get_subtitles(show_id, seasons: Set[int]):
|
|
|
164
164
|
|
|
165
165
|
for episode in range(1, episodes + 1):
|
|
166
166
|
logger.info(f"Processing Season {season}, Episode {episode}...")
|
|
167
|
-
|
|
167
|
+
series_cache_dir =os.path.join(
|
|
168
168
|
CACHE_DIR,
|
|
169
169
|
"data",
|
|
170
|
-
series_name
|
|
170
|
+
series_name)
|
|
171
|
+
os.makedirs(series_cache_dir,exist_ok=True)
|
|
172
|
+
srt_filepath = os.path.join(
|
|
173
|
+
series_cache_dir,
|
|
171
174
|
f"{series_name} - S{season:02d}E{episode:02d}.srt",
|
|
172
175
|
)
|
|
173
176
|
if not os.path.exists(srt_filepath):
|