mkv-episode-matcher 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mkv-episode-matcher might be problematic. Click here for more details.
- mkv_episode_matcher-0.1.3/.coverage.DESKTOP-NTJ52LL.19040.XkHNEbEx +0 -0
- mkv_episode_matcher-0.1.3/.coverage.DESKTOP-NTJ52LL.24340.XjsBEKWx +0 -0
- mkv_episode_matcher-0.1.3/.gitattributes +2 -0
- mkv_episode_matcher-0.1.3/.github/workflows/ci.yml +29 -0
- mkv_episode_matcher-0.1.3/.gitmodules +3 -0
- mkv_episode_matcher-0.1.3/.vscode/settings.json +11 -0
- {mkv_episode_matcher-0.1.1 → mkv_episode_matcher-0.1.3}/PKG-INFO +1 -1
- mkv_episode_matcher-0.1.3/docs/index.md +17 -0
- mkv_episode_matcher-0.1.3/mkdocs.yml +6 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/.gitattributes +2 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/__init__.py +1 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/__main__.py +179 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/config.py +82 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/episode_matcher.py +237 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/.git +1 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/.gitignore +2 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +295 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +249 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +215 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/README.md +26 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/__init__.py +0 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +87 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +121 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +221 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/libraries/pgs2srt/requirements.txt +4 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/mkv_to_srt.py +179 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/requirements.txt +8 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/tmdb_client.py +134 -0
- mkv_episode_matcher-0.1.3/mkv_episode_matcher/utils.py +228 -0
- {mkv_episode_matcher-0.1.1 → mkv_episode_matcher-0.1.3}/pyproject.toml +0 -4
- mkv_episode_matcher-0.1.3/tests/__init__.py +0 -0
- {mkv_episode_matcher-0.1.1 → mkv_episode_matcher-0.1.3}/.gitignore +0 -0
- {mkv_episode_matcher-0.1.1 → mkv_episode_matcher-0.1.3}/README.md +0 -0
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: ci
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches:
|
|
5
|
+
- master
|
|
6
|
+
- main
|
|
7
|
+
permissions:
|
|
8
|
+
contents: write
|
|
9
|
+
jobs:
|
|
10
|
+
deploy:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- name: Configure Git Credentials
|
|
15
|
+
run: |
|
|
16
|
+
git config user.name github-actions[bot]
|
|
17
|
+
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
|
|
18
|
+
- uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: 3.x
|
|
21
|
+
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
|
|
22
|
+
- uses: actions/cache@v4
|
|
23
|
+
with:
|
|
24
|
+
key: mkdocs-material-${{ env.cache_id }}
|
|
25
|
+
path: .cache
|
|
26
|
+
restore-keys: |
|
|
27
|
+
mkdocs-material-
|
|
28
|
+
- run: pip install mkdocs-material
|
|
29
|
+
- run: mkdocs gh-deploy --force
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: mkv-episode-matcher
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
|
|
5
5
|
Project-URL: Documentation, https://github.com/Jsakkos/mkv-episode-matcher#readme
|
|
6
6
|
Project-URL: Issues, https://github.com/Jsakkos/mkv-episode-matcher/issues
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Welcome to MkDocs
|
|
2
|
+
|
|
3
|
+
For full documentation visit [mkdocs.org](https://www.mkdocs.org).
|
|
4
|
+
|
|
5
|
+
## Commands
|
|
6
|
+
|
|
7
|
+
* `mkdocs new [dir-name]` - Create a new project.
|
|
8
|
+
* `mkdocs serve` - Start the live-reloading docs server.
|
|
9
|
+
* `mkdocs build` - Build the documentation site.
|
|
10
|
+
* `mkdocs -h` - Print help message and exit.
|
|
11
|
+
|
|
12
|
+
## Project layout
|
|
13
|
+
|
|
14
|
+
mkdocs.yml # The configuration file.
|
|
15
|
+
docs/
|
|
16
|
+
index.md # The documentation homepage.
|
|
17
|
+
... # Other markdown pages, images and other files.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "0.1.3"
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# __main__.py
|
|
2
|
+
import argparse
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
from .config import get_config, set_config
|
|
8
|
+
|
|
9
|
+
# Log the start of the application
|
|
10
|
+
logger.info("Starting the application")
|
|
11
|
+
|
|
12
|
+
# Check if logs directory exists, if not create it
|
|
13
|
+
if not os.path.exists('./logs'):
|
|
14
|
+
os.mkdir('./logs')
|
|
15
|
+
|
|
16
|
+
# Add a new handler for stdout logs
|
|
17
|
+
logger.add("./logs/file_stdout.log", format="{time} {level} {message}", level="DEBUG", rotation="10 MB")
|
|
18
|
+
|
|
19
|
+
# Add a new handler for error logs
|
|
20
|
+
logger.add("./logs/file_errors.log", level="ERROR", rotation="10 MB")
|
|
21
|
+
|
|
22
|
+
# Check if the configuration directory exists, if not create it
|
|
23
|
+
if not os.path.exists(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher")):
|
|
24
|
+
os.makedirs(os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher"))
|
|
25
|
+
|
|
26
|
+
# Define the paths for the configuration file and cache directory
|
|
27
|
+
CONFIG_FILE = os.path.join(
|
|
28
|
+
os.path.expanduser("~"), ".mkv-episode-matcher", "config.ini"
|
|
29
|
+
)
|
|
30
|
+
CACHE_DIR = os.path.join(os.path.expanduser("~"), ".mkv-episode-matcher", "cache")
|
|
31
|
+
|
|
32
|
+
# Check if the cache directory exists, if not create it
|
|
33
|
+
if not os.path.exists(CACHE_DIR):
|
|
34
|
+
os.makedirs(CACHE_DIR)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@logger.catch
|
|
38
|
+
def main():
|
|
39
|
+
"""
|
|
40
|
+
Entry point of the application.
|
|
41
|
+
|
|
42
|
+
This function is responsible for starting the application, parsing command-line arguments,
|
|
43
|
+
setting the configuration, and processing the show.
|
|
44
|
+
|
|
45
|
+
Command-line arguments:
|
|
46
|
+
--tmdb-api-key: The API key for the TMDb API. If not provided, the function will try to get it from the cache or prompt the user to input it.
|
|
47
|
+
--show-dir: The main directory of the show. If not provided, the function will prompt the user to input it.
|
|
48
|
+
--season: The season number to be processed. If not provided, all seasons will be processed.
|
|
49
|
+
--dry-run: A boolean flag indicating whether to perform a dry run (i.e., not rename any files). If not provided, the function will rename files.
|
|
50
|
+
--get-subs: A boolean flag indicating whether to download subtitles for the show. If not provided, the function will not download subtitles.
|
|
51
|
+
--tesseract-path: The path to the tesseract executable. If not provided, the function will try to get it from the cache or prompt the user to input it.
|
|
52
|
+
|
|
53
|
+
The function logs its progress to two separate log files: one for standard output and one for errors.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# Parse command-line arguments
|
|
58
|
+
parser = argparse.ArgumentParser(description="Process shows with TMDb API")
|
|
59
|
+
parser.add_argument("--tmdb-api-key", help="TMDb API key")
|
|
60
|
+
parser.add_argument("--show-dir", help="Main directory of the show")
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
"--season",
|
|
63
|
+
type=int,
|
|
64
|
+
default=None,
|
|
65
|
+
nargs="?",
|
|
66
|
+
help="Specify the season number to be processed (default: None)",
|
|
67
|
+
)
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--dry-run",
|
|
70
|
+
type=bool,
|
|
71
|
+
default=None,
|
|
72
|
+
nargs="?",
|
|
73
|
+
help="Don't rename any files (default: None)",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--get-subs",
|
|
77
|
+
type=bool,
|
|
78
|
+
default=None,
|
|
79
|
+
nargs="?",
|
|
80
|
+
help="Download subtitles for the show (default: None)",
|
|
81
|
+
)
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
"--tesseract-path",
|
|
84
|
+
type=str,
|
|
85
|
+
default=None,
|
|
86
|
+
nargs="?",
|
|
87
|
+
help="Path to the tesseract executable (default: None)",
|
|
88
|
+
)
|
|
89
|
+
args = parser.parse_args()
|
|
90
|
+
logger.debug(f"Command-line arguments: {args}")
|
|
91
|
+
open_subtitles_api_key = ""
|
|
92
|
+
open_subtitles_user_agent = ""
|
|
93
|
+
open_subtitles_username = ""
|
|
94
|
+
open_subtitles_password = ""
|
|
95
|
+
# Check if API key is provided via command-line argument
|
|
96
|
+
tmdb_api_key = args.tmdb_api_key
|
|
97
|
+
|
|
98
|
+
# If API key is not provided, try to get it from the cache
|
|
99
|
+
if not tmdb_api_key:
|
|
100
|
+
cached_config = get_config(CONFIG_FILE)
|
|
101
|
+
if cached_config:
|
|
102
|
+
tmdb_api_key = cached_config.get("tmdb_api_key")
|
|
103
|
+
|
|
104
|
+
# If API key is still not available, prompt the user to input it
|
|
105
|
+
if not tmdb_api_key:
|
|
106
|
+
tmdb_api_key = input("Enter your TMDb API key: ")
|
|
107
|
+
# Cache the API key
|
|
108
|
+
|
|
109
|
+
logger.debug(f"TMDb API Key: {tmdb_api_key}")
|
|
110
|
+
if args.get_subs:
|
|
111
|
+
logger.debug("Getting OpenSubtitles API key")
|
|
112
|
+
cached_config = get_config(CONFIG_FILE)
|
|
113
|
+
try:
|
|
114
|
+
open_subtitles_api_key = cached_config.get("open_subtitles_api_key")
|
|
115
|
+
open_subtitles_user_agent = cached_config.get("open_subtitles_user_agent")
|
|
116
|
+
open_subtitles_username = cached_config.get("open_subtitles_username")
|
|
117
|
+
open_subtitles_password = cached_config.get("open_subtitles_password")
|
|
118
|
+
except:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
if not open_subtitles_api_key:
|
|
122
|
+
open_subtitles_api_key = input("Enter your OpenSubtitles API key: ")
|
|
123
|
+
|
|
124
|
+
if not open_subtitles_user_agent:
|
|
125
|
+
open_subtitles_user_agent = input("Enter your OpenSubtitles User Agent: ")
|
|
126
|
+
|
|
127
|
+
if not open_subtitles_username:
|
|
128
|
+
open_subtitles_username = input("Enter your OpenSubtitles Username: ")
|
|
129
|
+
|
|
130
|
+
if not open_subtitles_password:
|
|
131
|
+
open_subtitles_password = input("Enter your OpenSubtitles Password: ")
|
|
132
|
+
|
|
133
|
+
# If show directory is provided via command-line argument, use it
|
|
134
|
+
show_dir = args.show_dir
|
|
135
|
+
if not show_dir:
|
|
136
|
+
show_dir = cached_config.get("show_dir")
|
|
137
|
+
if not show_dir:
|
|
138
|
+
# If show directory is not provided, prompt the user to input it
|
|
139
|
+
show_dir = input("Enter the main directory of the show:")
|
|
140
|
+
logger.info(f"Show Directory: {show_dir}")
|
|
141
|
+
# if the user does not provide a show directory, make the default show directory the current working directory
|
|
142
|
+
if not show_dir:
|
|
143
|
+
show_dir = os.getcwd()
|
|
144
|
+
if not args.tesseract_path:
|
|
145
|
+
tesseract_path = cached_config.get("tesseract_path")
|
|
146
|
+
|
|
147
|
+
if not tesseract_path:
|
|
148
|
+
tesseract_path = input(
|
|
149
|
+
r"Enter the path to the tesseract executable: ['C:\Program Files\Tesseract-OCR\tesseract.exe']"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
else:
|
|
153
|
+
tesseract_path = args.tesseract_path
|
|
154
|
+
logger.debug(f"Teesseract Path: {tesseract_path}")
|
|
155
|
+
logger.debug(f"Show Directory: {show_dir}")
|
|
156
|
+
|
|
157
|
+
# Set the configuration
|
|
158
|
+
set_config(
|
|
159
|
+
tmdb_api_key,
|
|
160
|
+
open_subtitles_api_key,
|
|
161
|
+
open_subtitles_user_agent,
|
|
162
|
+
open_subtitles_username,
|
|
163
|
+
open_subtitles_password,
|
|
164
|
+
show_dir,
|
|
165
|
+
CONFIG_FILE,
|
|
166
|
+
tesseract_path=tesseract_path,
|
|
167
|
+
)
|
|
168
|
+
logger.info("Configuration set")
|
|
169
|
+
|
|
170
|
+
# Process the show
|
|
171
|
+
from .episode_matcher import process_show
|
|
172
|
+
|
|
173
|
+
process_show(args.season, dry_run=args.dry_run, get_subs=args.get_subs)
|
|
174
|
+
logger.info("Show processing completed")
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# Run the main function if the script is run directly
|
|
178
|
+
if __name__ == "__main__":
|
|
179
|
+
main()
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# config.py
|
|
2
|
+
import configparser
|
|
3
|
+
import multiprocessing
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from loguru import logger
|
|
7
|
+
|
|
8
|
+
MAX_THREADS = 4
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_total_threads():
|
|
12
|
+
return multiprocessing.cpu_count()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
total_threads = get_total_threads()
|
|
16
|
+
|
|
17
|
+
if total_threads < MAX_THREADS:
|
|
18
|
+
MAX_THREADS = total_threads
|
|
19
|
+
logger.info(f"Total available threads: {total_threads} -> Setting max to {MAX_THREADS}")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def set_config(
|
|
23
|
+
tmdb_api_key,
|
|
24
|
+
open_subtitles_api_key,
|
|
25
|
+
open_subtitles_user_agent,
|
|
26
|
+
open_subtitles_username,
|
|
27
|
+
open_subtitles_password,
|
|
28
|
+
show_dir,
|
|
29
|
+
file,
|
|
30
|
+
tesseract_path=None,
|
|
31
|
+
):
|
|
32
|
+
"""
|
|
33
|
+
Sets the configuration values and writes them to a file.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
tmdb_api_key (str): The API key for TMDB (The Movie Database).
|
|
37
|
+
open_subtitles_api_key (str): The API key for OpenSubtitles.
|
|
38
|
+
open_subtitles_user_agent (str): The user agent for OpenSubtitles.
|
|
39
|
+
open_subtitles_username (str): The username for OpenSubtitles.
|
|
40
|
+
open_subtitles_password (str): The password for OpenSubtitles.
|
|
41
|
+
show_dir (str): The directory where the TV show episodes are located.
|
|
42
|
+
file (str): The path to the configuration file.
|
|
43
|
+
tesseract_path (str, optional): The path to the Tesseract OCR executable.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
None
|
|
47
|
+
"""
|
|
48
|
+
config = configparser.ConfigParser()
|
|
49
|
+
config["Config"] = {
|
|
50
|
+
"tmdb_api_key": str(tmdb_api_key),
|
|
51
|
+
"show_dir": show_dir,
|
|
52
|
+
"max_threads": int(MAX_THREADS),
|
|
53
|
+
"open_subtitles_api_key": str(open_subtitles_api_key),
|
|
54
|
+
"open_subtitles_user_agent": str(open_subtitles_user_agent),
|
|
55
|
+
"open_subtitles_username": str(open_subtitles_username),
|
|
56
|
+
"open_subtitles_password": str(open_subtitles_password),
|
|
57
|
+
"tesseract_path": str(tesseract_path),
|
|
58
|
+
}
|
|
59
|
+
logger.info(
|
|
60
|
+
f"Setting config with API:{tmdb_api_key}, show_dir: {show_dir}, and max_threads: {MAX_THREADS}"
|
|
61
|
+
)
|
|
62
|
+
with open(file, "w") as configfile:
|
|
63
|
+
config.write(configfile)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_config(file):
|
|
67
|
+
"""
|
|
68
|
+
Read and return the configuration from the specified file.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
file (str): The path to the configuration file.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
dict: The configuration settings as a dictionary.
|
|
75
|
+
|
|
76
|
+
"""
|
|
77
|
+
logger.info(f"Loading config from {file}")
|
|
78
|
+
config = configparser.ConfigParser()
|
|
79
|
+
if os.path.exists(file):
|
|
80
|
+
config.read(file)
|
|
81
|
+
return config["Config"] if "Config" in config else None
|
|
82
|
+
return {}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# episode_matcher.py
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from loguru import logger
|
|
6
|
+
|
|
7
|
+
from mkv_episode_matcher.__main__ import CACHE_DIR, CONFIG_FILE
|
|
8
|
+
from mkv_episode_matcher.config import get_config
|
|
9
|
+
from mkv_episode_matcher.mkv_to_srt import convert_mkv_to_srt
|
|
10
|
+
from mkv_episode_matcher.tmdb_client import fetch_show_id
|
|
11
|
+
from mkv_episode_matcher.utils import check_filename, cleanup_ocr_files, get_subtitles
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# hash_data = {}
|
|
15
|
+
@logger.catch
|
|
16
|
+
def process_show(season=None, dry_run=False, get_subs=False):
|
|
17
|
+
"""
|
|
18
|
+
Process the show by downloading episode images and finding matching episodes.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
season (int, optional): The season number to process. If provided, only that season will be processed. Defaults to None.
|
|
22
|
+
force (bool, optional): Whether to force re-processing of episodes even if they already exist. Defaults to False.
|
|
23
|
+
dry_run (bool, optional): Whether to perform a dry run without actually processing the episodes. Defaults to False.
|
|
24
|
+
threshold (float, optional): The threshold value for matching episodes. Defaults to None.
|
|
25
|
+
"""
|
|
26
|
+
config = get_config(CONFIG_FILE)
|
|
27
|
+
show_dir = config.get("show_dir")
|
|
28
|
+
show_name = os.path.basename(show_dir)
|
|
29
|
+
logger.info(f"Processing show '{show_name}'...")
|
|
30
|
+
show_id = fetch_show_id(show_name)
|
|
31
|
+
|
|
32
|
+
if show_id is None:
|
|
33
|
+
logger.error(f"Could not find show '{os.path.basename(show_dir)}' on TMDb.")
|
|
34
|
+
return
|
|
35
|
+
season_paths = [
|
|
36
|
+
os.path.join(show_dir, d)
|
|
37
|
+
for d in os.listdir(show_dir)
|
|
38
|
+
if os.path.isdir(os.path.join(show_dir, d))
|
|
39
|
+
]
|
|
40
|
+
logger.info(
|
|
41
|
+
f"Found {len(season_paths)} seasons for show '{os.path.basename(show_dir)}'"
|
|
42
|
+
)
|
|
43
|
+
seasons_to_process = [
|
|
44
|
+
int(os.path.basename(season_path).split()[-1]) for season_path in season_paths
|
|
45
|
+
]
|
|
46
|
+
if get_subs:
|
|
47
|
+
get_subtitles(show_id, seasons=set(seasons_to_process))
|
|
48
|
+
if season is not None:
|
|
49
|
+
mkv_files = [
|
|
50
|
+
os.path.join(show_dir, season)
|
|
51
|
+
for f in os.listdir(show_dir)
|
|
52
|
+
if f.endswith(".mkv")
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
season_path = os.path.join(show_dir, f"Season {season}")
|
|
56
|
+
else:
|
|
57
|
+
for season_path in os.listdir(show_dir):
|
|
58
|
+
season_path = os.path.join(show_dir, season_path)
|
|
59
|
+
mkv_files = [
|
|
60
|
+
os.path.join(season_path, f)
|
|
61
|
+
for f in os.listdir(season_path)
|
|
62
|
+
if f.endswith(".mkv")
|
|
63
|
+
]
|
|
64
|
+
# Filter out files that have already been processed
|
|
65
|
+
for f in mkv_files:
|
|
66
|
+
if check_filename(f):
|
|
67
|
+
logger.info(f"Skipping {f}, already processed")
|
|
68
|
+
mkv_files.remove(f)
|
|
69
|
+
if len(mkv_files) == 0:
|
|
70
|
+
logger.info("No new files to process")
|
|
71
|
+
return
|
|
72
|
+
convert_mkv_to_srt(season_path, mkv_files)
|
|
73
|
+
reference_text_dict = process_reference_srt_files(show_name)
|
|
74
|
+
srt_text_dict = process_srt_files(show_dir)
|
|
75
|
+
compare_and_rename_files(srt_text_dict, reference_text_dict, dry_run=dry_run)
|
|
76
|
+
cleanup_ocr_files(show_dir)
|
|
77
|
+
|
|
78
|
+
def check_filename(filename):
|
|
79
|
+
"""
|
|
80
|
+
Check if the filename is in the correct format.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
filename (str): The filename to check.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
bool: True if the filename is in the correct format, False otherwise.
|
|
87
|
+
"""
|
|
88
|
+
# Check if the filename matches the expected format
|
|
89
|
+
match = re.match(r".*S\d+E\d+", filename)
|
|
90
|
+
return bool(match)
|
|
91
|
+
def extract_srt_text(filepath):
|
|
92
|
+
"""
|
|
93
|
+
Extracts the text from an SRT file.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
filepath (str): The path to the SRT file.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
list: A list of lists, where each inner list represents a block of text from the SRT file.
|
|
100
|
+
Each inner list contains the lines of text for that block.
|
|
101
|
+
"""
|
|
102
|
+
# extract the text from the file
|
|
103
|
+
with open(filepath) as f:
|
|
104
|
+
filepath = f.read()
|
|
105
|
+
text_lines = [
|
|
106
|
+
filepath.split("\n\n")[i].split("\n")[2:]
|
|
107
|
+
for i in range(len(filepath.split("\n\n")))
|
|
108
|
+
]
|
|
109
|
+
# remove empty lines
|
|
110
|
+
text_lines = [[line for line in lines if line] for lines in text_lines]
|
|
111
|
+
# remove <i> or </i> tags
|
|
112
|
+
text_lines = [
|
|
113
|
+
[re.sub(r"<i>|</i>|", "", line) for line in lines] for lines in text_lines
|
|
114
|
+
]
|
|
115
|
+
# remove empty lists
|
|
116
|
+
text_lines = [lines for lines in text_lines if lines]
|
|
117
|
+
return text_lines
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def compare_text(text1, text2):
|
|
121
|
+
"""
|
|
122
|
+
Compare two lists of text lines and return the number of matching lines.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
text1 (list): List of text lines from the first source.
|
|
126
|
+
text2 (list): List of text lines from the second source.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
int: Number of matching lines between the two sources.
|
|
130
|
+
"""
|
|
131
|
+
# Flatten the list of text lines
|
|
132
|
+
flat_text1 = [line for lines in text1 for line in lines]
|
|
133
|
+
flat_text2 = [line for lines in text2 for line in lines]
|
|
134
|
+
|
|
135
|
+
# Compare the two lists of text lines
|
|
136
|
+
matching_lines = set(flat_text1).intersection(flat_text2)
|
|
137
|
+
return len(matching_lines)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def extract_season_episode(filename):
|
|
141
|
+
"""
|
|
142
|
+
Extract the season and episode number from the filename.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
filename (str): The filename to extract the season and episode from.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
tuple: A tuple containing the season and episode number.
|
|
149
|
+
"""
|
|
150
|
+
# Extract the season and episode number from the filename
|
|
151
|
+
match = re.search(r"S(\d+)E(\d+)", filename)
|
|
152
|
+
if match:
|
|
153
|
+
season = int(match.group(1))
|
|
154
|
+
episode = int(match.group(2))
|
|
155
|
+
return season, episode
|
|
156
|
+
else:
|
|
157
|
+
return None, None
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def process_reference_srt_files(series_name):
|
|
161
|
+
"""
|
|
162
|
+
Process reference SRT files for a given series.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
series_name (str): The name of the series.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
dict: A dictionary containing the reference files where the keys are the MKV filenames
|
|
169
|
+
and the values are the corresponding SRT texts.
|
|
170
|
+
"""
|
|
171
|
+
reference_files = {}
|
|
172
|
+
reference_dir = os.path.join(CACHE_DIR, "data", series_name)
|
|
173
|
+
for dirpath, _, filenames in os.walk(reference_dir):
|
|
174
|
+
for filename in filenames:
|
|
175
|
+
if filename.lower().endswith(".srt"):
|
|
176
|
+
srt_file = os.path.join(dirpath, filename)
|
|
177
|
+
logger.info(f"Processing {srt_file}")
|
|
178
|
+
srt_text = extract_srt_text(srt_file)
|
|
179
|
+
season, episode = extract_season_episode(filename)
|
|
180
|
+
mkv_filename = f"{series_name} - S{season:02}E{episode:02}.mkv"
|
|
181
|
+
reference_files[mkv_filename] = srt_text
|
|
182
|
+
return reference_files
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def process_srt_files(show_dir):
|
|
186
|
+
"""
|
|
187
|
+
Process all SRT files in the given directory and its subdirectories.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
show_dir (str): The directory path where the SRT files are located.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
dict: A dictionary containing the SRT file paths as keys and their corresponding text content as values.
|
|
194
|
+
"""
|
|
195
|
+
srt_files = {}
|
|
196
|
+
for dirpath, _, filenames in os.walk(show_dir):
|
|
197
|
+
for filename in filenames:
|
|
198
|
+
if filename.lower().endswith(".srt"):
|
|
199
|
+
srt_file = os.path.join(dirpath, filename)
|
|
200
|
+
logger.info(f"Processing {srt_file}")
|
|
201
|
+
srt_text = extract_srt_text(srt_file)
|
|
202
|
+
srt_files[srt_file] = srt_text
|
|
203
|
+
return srt_files
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def compare_and_rename_files(srt_files, reference_files, dry_run=False):
|
|
207
|
+
"""
|
|
208
|
+
Compare the srt files with the reference files and rename the matching mkv files.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
srt_files (dict): A dictionary containing the srt files as keys and their contents as values.
|
|
212
|
+
reference_files (dict): A dictionary containing the reference files as keys and their contents as values.
|
|
213
|
+
dry_run (bool, optional): If True, the function will only log the renaming actions without actually renaming the files. Defaults to False.
|
|
214
|
+
"""
|
|
215
|
+
logger.info(
|
|
216
|
+
f"Comparing {len(srt_files)} srt files with {len(reference_files)} reference files"
|
|
217
|
+
)
|
|
218
|
+
for srt_text in srt_files.keys():
|
|
219
|
+
parent_dir = os.path.dirname(os.path.dirname(srt_text))
|
|
220
|
+
for reference in reference_files.keys():
|
|
221
|
+
season, episode = extract_season_episode(reference)
|
|
222
|
+
mkv_file = os.path.join(
|
|
223
|
+
parent_dir, os.path.basename(srt_text).replace(".srt", ".mkv")
|
|
224
|
+
)
|
|
225
|
+
matching_lines = compare_text(
|
|
226
|
+
reference_files[reference], srt_files[srt_text]
|
|
227
|
+
)
|
|
228
|
+
if matching_lines >= int(len(reference_files[reference]) * 0.1):
|
|
229
|
+
logger.info(f"Matching lines: {matching_lines}")
|
|
230
|
+
logger.info(f"Found matching file: {mkv_file} ->{reference}")
|
|
231
|
+
new_filename = os.path.join(parent_dir, reference)
|
|
232
|
+
if not os.path.exists(new_filename):
|
|
233
|
+
if os.path.exists(mkv_file) and not dry_run:
|
|
234
|
+
logger.info(f"Renaming {mkv_file} to {new_filename}")
|
|
235
|
+
os.rename(mkv_file, new_filename)
|
|
236
|
+
else:
|
|
237
|
+
logger.info(f"File {new_filename} already exists, skipping")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
gitdir: ../../.git/modules/libraries/pgs2srt
|