mkv-episode-matcher 0.3.4__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (54) hide show
  1. mkv_episode_matcher-0.3.6/.coverage +0 -0
  2. mkv_episode_matcher-0.3.6/.github/workflows/tests.yml +40 -0
  3. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/PKG-INFO +30 -28
  4. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/README.md +28 -26
  5. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/docs/cli.md +9 -27
  6. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/docs/quickstart.md +15 -18
  7. mkv_episode_matcher-0.3.6/mkv_episode_matcher/episode_identification.py +282 -0
  8. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/episode_matcher.py +8 -0
  9. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/utils.py +14 -4
  10. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher.egg-info/PKG-INFO +30 -28
  11. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher.egg-info/SOURCES.txt +2 -1
  12. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/pyproject.toml +1 -0
  13. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/setup.cfg +1 -1
  14. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/tests/test_main.py +48 -18
  15. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/uv.lock +12 -1
  16. mkv_episode_matcher-0.3.4/mkv_episode_matcher/episode_identification.py +0 -150
  17. mkv_episode_matcher-0.3.4/tests/test_improvements.py +0 -59
  18. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.gitattributes +0 -0
  19. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.github/funding.yml +0 -0
  20. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.github/workflows/documentation.yml +0 -0
  21. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.github/workflows/python-publish.yml +0 -0
  22. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.gitignore +0 -0
  23. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.gitmodules +0 -0
  24. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.python-version +0 -0
  25. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/.vscode/settings.json +0 -0
  26. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/docs/api/index.md +0 -0
  27. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/docs/configuration.md +0 -0
  28. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/docs/installation.md +0 -0
  29. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/docs/tips.md +0 -0
  30. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkdocs.yml +0 -0
  31. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/.gitattributes +0 -0
  32. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/__init__.py +0 -0
  33. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/__main__.py +0 -0
  34. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/config.py +0 -0
  35. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/.gitignore +0 -0
  36. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py +0 -0
  37. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py +0 -0
  38. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py +0 -0
  39. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/README.md +0 -0
  40. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/__init__.py +0 -0
  41. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/imagemaker.py +0 -0
  42. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/pgs2srt.py +0 -0
  43. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/pgsreader.py +0 -0
  44. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/libraries/pgs2srt/requirements.txt +0 -0
  45. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/mkv_to_srt.py +0 -0
  46. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/speech_to_text.py +0 -0
  47. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/subtitle_utils.py +0 -0
  48. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher/tmdb_client.py +0 -0
  49. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher.egg-info/dependency_links.txt +0 -0
  50. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher.egg-info/entry_points.txt +0 -0
  51. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher.egg-info/requires.txt +0 -0
  52. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/mkv_episode_matcher.egg-info/top_level.txt +0 -0
  53. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/setup.py +0 -0
  54. {mkv_episode_matcher-0.3.4 → mkv_episode_matcher-0.3.6}/tests/__init__.py +0 -0
Binary file
@@ -0,0 +1,40 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [main, master]
6
+ pull_request:
7
+ branches: [main, master]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version:
15
+ - "3.9"
16
+ - "3.10"
17
+ - "3.11"
18
+ - "3.12"
19
+
20
+ steps:
21
+ - uses: actions/checkout@v4
22
+
23
+ - name: Install uv and set the python version
24
+ uses: astral-sh/setup-uv@v4
25
+ with:
26
+ python-version: ${{ matrix.python-version }}
27
+
28
+ - name: Install dependencies
29
+ run: |
30
+ uv venv
31
+ uv pip install -e .
32
+
33
+ - name: Run tests with pytest and coverage
34
+ run: |
35
+ uv run --dev pytest --cov-branch --cov-report=xml
36
+
37
+ - name: Upload coverage reports to Codecov
38
+ uses: codecov/codecov-action@v5
39
+ with:
40
+ token: ${{ secrets.CODECOV_TOKEN }}
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: mkv-episode-matcher
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -51,46 +51,31 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
51
51
  - ✨ **Bulk Processing**: Handle entire seasons at once
52
52
  - 🧪 **Dry Run Mode**: Test changes before applying
53
53
 
54
+ ## Prerequisites
55
+
56
+ - Python 3.9 or higher
57
+ - [FFmpeg](https://ffmpeg.org/download.html) installed and available in system PATH
58
+ - [Tesseract OCR](https://github.com/UB-Mannheim/tesseract/wiki) installed (required for image-based subtitle processing)
59
+ - TMDb API key
60
+ - OpenSubtitles account (optional, for subtitle downloads)
61
+
54
62
  ## Quick Start
55
63
 
56
64
  1. Install the package:
57
65
  ```bash
58
66
  pip install mkv-episode-matcher
59
67
  ```
68
+ 2. Download .srt subtitles files to ~/.mkv-episode-matcher/cache/data/Show Name/
60
69
 
61
- 2. Run on your show directory:
70
+ 3. Run on your show directory:
62
71
  ```bash
63
- mkv-match --show-dir "path/to/your/show" --season 1
72
+ mkv-match --show-dir "path/to/your/show"
64
73
  ```
65
74
 
66
- ## Requirements
67
-
68
- - Python 3.8 or higher
69
- - TMDb API key
70
- - OpenSubtitles account (optional, for subtitle downloads)
71
-
72
75
  ## Documentation
73
76
 
74
77
  Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
75
78
 
76
- ## Basic Usage
77
-
78
- ```python
79
- from mkv_episode_matcher import process_show
80
-
81
- # Process all seasons
82
- process_show()
83
-
84
- # Process specific season
85
- process_show(season=1)
86
-
87
- # Test run without making changes
88
- process_show(season=1, dry_run=True)
89
-
90
- # Process and download subtitles
91
- process_show(get_subs=True)
92
- ```
93
-
94
79
  ## Directory Structure
95
80
 
96
81
  MKV Episode Matcher expects your TV shows to be organized as follows:
@@ -105,6 +90,23 @@ Show Name/
105
90
  │ └── episode2.mkv
106
91
  ```
107
92
 
93
+ ## Reference Subtitle File Structure
94
+
95
+ Subtitle files that are not automatically downloaded using the `--get-subs` flag should be named as follows:
96
+
97
+ ```
98
+
99
+ ~/.mkv-episode-matcher/cache/data/Show Name/
100
+ ├── Show Name - S01E01.srt
101
+ ├── Show Name - S01E02.srt
102
+ └── ...
103
+ ```
104
+
105
+ On Windows, the cache directory is located at `C:\Users\{username}\.mkv-episode-matcher\cache\data\`
106
+
107
+ Reference subtitle files should follow this naming pattern:
108
+ `{show_name} - S{season:02d}E{episode:02d}.srt`
109
+
108
110
  ## Contributing
109
111
 
110
112
  1. Fork the repository
@@ -22,46 +22,31 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
22
22
  - ✨ **Bulk Processing**: Handle entire seasons at once
23
23
  - 🧪 **Dry Run Mode**: Test changes before applying
24
24
 
25
+ ## Prerequisites
26
+
27
+ - Python 3.9 or higher
28
+ - [FFmpeg](https://ffmpeg.org/download.html) installed and available in system PATH
29
+ - [Tesseract OCR](https://github.com/UB-Mannheim/tesseract/wiki) installed (required for image-based subtitle processing)
30
+ - TMDb API key
31
+ - OpenSubtitles account (optional, for subtitle downloads)
32
+
25
33
  ## Quick Start
26
34
 
27
35
  1. Install the package:
28
36
  ```bash
29
37
  pip install mkv-episode-matcher
30
38
  ```
39
+ 2. Download .srt subtitles files to ~/.mkv-episode-matcher/cache/data/Show Name/
31
40
 
32
- 2. Run on your show directory:
41
+ 3. Run on your show directory:
33
42
  ```bash
34
- mkv-match --show-dir "path/to/your/show" --season 1
43
+ mkv-match --show-dir "path/to/your/show"
35
44
  ```
36
45
 
37
- ## Requirements
38
-
39
- - Python 3.8 or higher
40
- - TMDb API key
41
- - OpenSubtitles account (optional, for subtitle downloads)
42
-
43
46
  ## Documentation
44
47
 
45
48
  Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
46
49
 
47
- ## Basic Usage
48
-
49
- ```python
50
- from mkv_episode_matcher import process_show
51
-
52
- # Process all seasons
53
- process_show()
54
-
55
- # Process specific season
56
- process_show(season=1)
57
-
58
- # Test run without making changes
59
- process_show(season=1, dry_run=True)
60
-
61
- # Process and download subtitles
62
- process_show(get_subs=True)
63
- ```
64
-
65
50
  ## Directory Structure
66
51
 
67
52
  MKV Episode Matcher expects your TV shows to be organized as follows:
@@ -76,6 +61,23 @@ Show Name/
76
61
  │ └── episode2.mkv
77
62
  ```
78
63
 
64
+ ## Reference Subtitle File Structure
65
+
66
+ Subtitle files that are not automatically downloaded using the `--get-subs` flag should be named as follows:
67
+
68
+ ```
69
+
70
+ ~/.mkv-episode-matcher/cache/data/Show Name/
71
+ ├── Show Name - S01E01.srt
72
+ ├── Show Name - S01E02.srt
73
+ └── ...
74
+ ```
75
+
76
+ On Windows, the cache directory is located at `C:\Users\{username}\.mkv-episode-matcher\cache\data\`
77
+
78
+ Reference subtitle files should follow this naming pattern:
79
+ `{show_name} - S{season:02d}E{episode:02d}.srt`
80
+
79
81
  ## Contributing
80
82
 
81
83
  1. Fork the repository
@@ -16,14 +16,14 @@ mkv-match --show-dir "/path/to/show" --season 1
16
16
 
17
17
  ## Command Options
18
18
 
19
- | Option | Description | Default |
20
- |--------|-------------|---------|
21
- | `--show-dir` | Show directory path | None |
22
- | `--season` | Season number to process | None (all) |
23
- | `--dry-run` | Test without making changes | False |
24
- | `--get-subs` | Download subtitles | False |
25
- | `--tmdb-api-key` | TMDb API key | None |
26
- | `--tesseract-path` | Path to Tesseract | None |
19
+ | Option | Description | Default |
20
+ | ------------------ | --------------------------- | ---------- |
21
+ | `--show-dir` | Show directory path | None |
22
+ | `--season` | Season number to process | None (all) |
23
+ | `--dry-run` | Test without making changes | False |
24
+ | `--get-subs` | Download subtitles | False |
25
+ | `--tmdb-api-key` | TMDb API key | None |
26
+ | `--tesseract-path` | Path to Tesseract | None |
27
27
 
28
28
  ## Examples
29
29
 
@@ -55,25 +55,6 @@ mkv-match \
55
55
  --dry-run true
56
56
  ```
57
57
 
58
- ## Environment Variables
59
-
60
- Alternative to command line options:
61
-
62
- ```bash
63
- export TMDB_API_KEY="your_key"
64
- export SHOW_DIR="/path/to/shows"
65
- mkv-match
66
- ```
67
-
68
- ## Exit Codes
69
-
70
- | Code | Meaning |
71
- |------|---------|
72
- | 0 | Success |
73
- | 1 | General error |
74
- | 2 | Configuration error |
75
- | 3 | API error |
76
-
77
58
  ## Logging
78
59
 
79
60
  Logs are stored in:
@@ -89,3 +70,4 @@ Logs are stored in:
89
70
  2. Use dry-run first to test
90
71
  3. Check logs for details
91
72
  4. Use full paths for reliability
73
+ 5. Avoid using a trailing slash in paths
@@ -42,34 +42,31 @@ Show Name/
42
42
  │ └── episode2.mkv
43
43
  ```
44
44
 
45
- ## Python API Usage
45
+ ## Reference Subtitle File Structure
46
46
 
47
- ```python
48
- from mkv_episode_matcher import process_show
47
+ Subtitle files that are not automatically downloaded using the `--get-subs` flag should be named as follows:
49
48
 
50
- # Process all seasons
51
- process_show()
52
-
53
- # Process specific season
54
- process_show(season=1)
55
-
56
- # Test run
57
- process_show(season=1, dry_run=True)
58
-
59
- # With subtitles
60
- process_show(season=1, get_subs=True)
49
+ ```plaintext
50
+ ~/.mkv-episode-matcher/cache/data/Show Name/
51
+ ├── Show Name - S01E01.srt
52
+ ├── Show Name - S01E02.srt
53
+ └── ...
61
54
  ```
62
55
 
63
56
  ## Configuration
64
57
 
65
- Create a configuration file at `~/.mkv-episode-matcher/config.ini`:
58
+ The configuration file is automatically generated at `~/.mkv-episode-matcher/config.ini`:
66
59
 
67
60
  ```ini
68
61
  [Config]
69
- tmdb_api_key = your_api_key
70
- open_subtitles_api_key = your_opensubs_key
71
- show_dir = /path/to/shows
62
+ tmdb_api_key = your_tmdb_api_key
63
+ show_dir = /path/to/show
72
64
  max_threads = 4
65
+ open_subtitles_api_key = your_opensubs_key
66
+ open_subtitles_user_agent = your_user_agent
67
+ open_subtitles_username = your_username
68
+ open_subtitles_password = your_password
69
+ tesseract_path = C:\Program Files\Tesseract-OCR\tesseract.exe
73
70
  ```
74
71
 
75
72
  ## Next Steps
@@ -0,0 +1,282 @@
1
+ import json
2
+ import os
3
+ import subprocess
4
+ import tempfile
5
+ from pathlib import Path
6
+ import torch
7
+ from rapidfuzz import fuzz
8
+ from loguru import logger
9
+ import whisper
10
+ import numpy as np
11
+ import re
12
+ from pathlib import Path
13
+ import chardet
14
+ from loguru import logger
15
+
16
+ class EpisodeMatcher:
17
+ def __init__(self, cache_dir, show_name, min_confidence=0.6):
18
+ self.cache_dir = Path(cache_dir)
19
+ self.min_confidence = min_confidence
20
+ self.show_name = show_name
21
+ self.chunk_duration = 300 # 5 minutes
22
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
23
+ self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
24
+ self.temp_dir.mkdir(exist_ok=True)
25
+
26
+ def clean_text(self, text):
27
+ text = text.lower().strip()
28
+ text = re.sub(r'\[.*?\]|\<.*?\>', '', text)
29
+ text = re.sub(r'([A-Za-z])-\1+', r'\1', text)
30
+ return ' '.join(text.split())
31
+
32
+ def chunk_score(self, whisper_chunk, ref_chunk):
33
+ whisper_clean = self.clean_text(whisper_chunk)
34
+ ref_clean = self.clean_text(ref_chunk)
35
+ return (fuzz.token_sort_ratio(whisper_clean, ref_clean) * 0.7 +
36
+ fuzz.partial_ratio(whisper_clean, ref_clean) * 0.3) / 100.0
37
+
38
+ def extract_audio_chunk(self, mkv_file, start_time):
39
+ """Extract a chunk of audio from MKV file."""
40
+ chunk_path = self.temp_dir / f"chunk_{start_time}.wav"
41
+ if not chunk_path.exists():
42
+ cmd = [
43
+ 'ffmpeg',
44
+ '-ss', str(start_time),
45
+ '-t', str(self.chunk_duration),
46
+ '-i', mkv_file,
47
+ '-vn',
48
+ '-acodec', 'pcm_s16le',
49
+ '-ar', '16000',
50
+ '-ac', '1',
51
+ str(chunk_path)
52
+ ]
53
+ subprocess.run(cmd, capture_output=True)
54
+ return str(chunk_path)
55
+
56
+ def load_reference_chunk(self, srt_file, chunk_idx):
57
+ """
58
+ Load reference subtitles for a specific time chunk with robust encoding handling.
59
+
60
+ Args:
61
+ srt_file (str or Path): Path to the SRT file
62
+ chunk_idx (int): Index of the chunk to load
63
+
64
+ Returns:
65
+ str: Combined text from the subtitle chunk
66
+ """
67
+ chunk_start = chunk_idx * self.chunk_duration
68
+ chunk_end = chunk_start + self.chunk_duration
69
+
70
+ try:
71
+ # Read the file content using our robust reader
72
+ reader = SubtitleReader()
73
+ content = reader.read_srt_file(srt_file)
74
+
75
+ # Extract subtitles for the time chunk
76
+ text_lines = reader.extract_subtitle_chunk(content, chunk_start, chunk_end)
77
+
78
+ return ' '.join(text_lines)
79
+
80
+ except Exception as e:
81
+ logger.error(f"Error loading reference chunk from {srt_file}: {e}")
82
+ return ''
83
+
84
+ def identify_episode(self, video_file, temp_dir, season_number):
85
+ try:
86
+ # Get video duration
87
+ duration = float(subprocess.check_output([
88
+ 'ffprobe', '-v', 'error',
89
+ '-show_entries', 'format=duration',
90
+ '-of', 'default=noprint_wrappers=1:nokey=1',
91
+ video_file
92
+ ]).decode())
93
+
94
+ total_chunks = int(np.ceil(duration / self.chunk_duration))
95
+
96
+ # Load Whisper model
97
+ model = whisper.load_model("base", device=self.device)
98
+
99
+ # Get season-specific reference files using multiple patterns
100
+ reference_dir = self.cache_dir / "data" / self.show_name
101
+
102
+ # Create season patterns for different formats
103
+ patterns = [
104
+ f"S{season_number:02d}E", # S01E01
105
+ f"S{season_number}E", # S1E01
106
+ f"{season_number:02d}x", # 01x01
107
+ f"{season_number}x", # 1x01
108
+ ]
109
+
110
+ reference_files = []
111
+ for pattern in patterns:
112
+ files = [f for f in reference_dir.glob("*.srt")
113
+ if any(re.search(f"{p}\\d+", f.name, re.IGNORECASE)
114
+ for p in patterns)]
115
+ reference_files.extend(files)
116
+
117
+ # Remove duplicates while preserving order
118
+ reference_files = list(dict.fromkeys(reference_files))
119
+
120
+ if not reference_files:
121
+ logger.error(f"No reference files found for season {season_number}")
122
+ return None
123
+
124
+ # Process chunks until match found
125
+ for chunk_idx in range(min(3, total_chunks)): # Only try first 3 chunks
126
+ start_time = chunk_idx * self.chunk_duration
127
+ audio_path = self.extract_audio_chunk(video_file, start_time)
128
+
129
+ # Transcribe chunk
130
+ result = model.transcribe(
131
+ audio_path,
132
+ task="transcribe",
133
+ language="en"
134
+ )
135
+
136
+ chunk_text = result["text"]
137
+ best_confidence = 0
138
+ best_match = None
139
+
140
+ # Compare with reference chunks
141
+ for ref_file in reference_files:
142
+ ref_text = self.load_reference_chunk(ref_file, chunk_idx)
143
+ confidence = self.chunk_score(chunk_text, ref_text)
144
+
145
+ if confidence > best_confidence:
146
+ best_confidence = confidence
147
+ best_match = ref_file
148
+
149
+ if confidence > self.min_confidence:
150
+ season_ep = re.search(r'S(\d+)E(\d+)', best_match.stem)
151
+ if season_ep:
152
+ season, episode = map(int, season_ep.groups())
153
+ return {
154
+ 'season': season,
155
+ 'episode': episode,
156
+ 'confidence': best_confidence,
157
+ 'reference_file': str(best_match),
158
+ }
159
+
160
+ return None
161
+
162
+ finally:
163
+ # Cleanup temp files
164
+ for file in self.temp_dir.glob("chunk_*.wav"):
165
+ file.unlink()
166
+
167
+ def detect_file_encoding(file_path):
168
+ """
169
+ Detect the encoding of a file using chardet.
170
+
171
+ Args:
172
+ file_path (str or Path): Path to the file
173
+
174
+ Returns:
175
+ str: Detected encoding, defaults to 'utf-8' if detection fails
176
+ """
177
+ try:
178
+ with open(file_path, 'rb') as f:
179
+ raw_data = f.read()
180
+ result = chardet.detect(raw_data)
181
+ encoding = result['encoding']
182
+ confidence = result['confidence']
183
+
184
+ logger.debug(f"Detected encoding {encoding} with {confidence:.2%} confidence for {file_path}")
185
+ return encoding if encoding else 'utf-8'
186
+ except Exception as e:
187
+ logger.warning(f"Error detecting encoding for {file_path}: {e}")
188
+ return 'utf-8'
189
+
190
+ def read_file_with_fallback(file_path, encodings=None):
191
+ """
192
+ Read a file trying multiple encodings in order of preference.
193
+
194
+ Args:
195
+ file_path (str or Path): Path to the file
196
+ encodings (list): List of encodings to try, defaults to common subtitle encodings
197
+
198
+ Returns:
199
+ str: File contents
200
+
201
+ Raises:
202
+ ValueError: If file cannot be read with any encoding
203
+ """
204
+ if encodings is None:
205
+ # First try detected encoding, then fallback to common subtitle encodings
206
+ detected = detect_file_encoding(file_path)
207
+ encodings = [detected, 'utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
208
+
209
+ file_path = Path(file_path)
210
+ errors = []
211
+
212
+ for encoding in encodings:
213
+ try:
214
+ with open(file_path, 'r', encoding=encoding) as f:
215
+ content = f.read()
216
+ logger.debug(f"Successfully read {file_path} using {encoding} encoding")
217
+ return content
218
+ except UnicodeDecodeError as e:
219
+ errors.append(f"{encoding}: {str(e)}")
220
+ continue
221
+
222
+ error_msg = f"Failed to read {file_path} with any encoding. Errors:\n" + "\n".join(errors)
223
+ logger.error(error_msg)
224
+ raise ValueError(error_msg)
225
+
226
+ class SubtitleReader:
227
+ """Helper class for reading and parsing subtitle files."""
228
+
229
+ @staticmethod
230
+ def parse_timestamp(timestamp):
231
+ """Parse SRT timestamp into seconds."""
232
+ hours, minutes, seconds = timestamp.replace(',', '.').split(':')
233
+ return float(hours) * 3600 + float(minutes) * 60 + float(seconds)
234
+
235
+ @staticmethod
236
+ def read_srt_file(file_path):
237
+ """
238
+ Read an SRT file and return its contents with robust encoding handling.
239
+
240
+ Args:
241
+ file_path (str or Path): Path to the SRT file
242
+
243
+ Returns:
244
+ str: Contents of the SRT file
245
+ """
246
+ return read_file_with_fallback(file_path)
247
+
248
+ @staticmethod
249
+ def extract_subtitle_chunk(content, start_time, end_time):
250
+ """
251
+ Extract subtitle text for a specific time window.
252
+
253
+ Args:
254
+ content (str): Full SRT file content
255
+ start_time (float): Chunk start time in seconds
256
+ end_time (float): Chunk end time in seconds
257
+
258
+ Returns:
259
+ list: List of subtitle texts within the time window
260
+ """
261
+ text_lines = []
262
+
263
+ for block in content.strip().split('\n\n'):
264
+ lines = block.split('\n')
265
+ if len(lines) < 3 or '-->' not in lines[1]:
266
+ continue
267
+
268
+ try:
269
+ timestamp = lines[1]
270
+ text = ' '.join(lines[2:])
271
+
272
+ end_stamp = timestamp.split(' --> ')[1].strip()
273
+ total_seconds = SubtitleReader.parse_timestamp(end_stamp)
274
+
275
+ if start_time <= total_seconds <= end_time:
276
+ text_lines.append(text)
277
+
278
+ except (IndexError, ValueError) as e:
279
+ logger.warning(f"Error parsing subtitle block: {e}")
280
+ continue
281
+
282
+ return text_lines
@@ -29,6 +29,14 @@ def process_show(season=None, dry_run=False, get_subs=False):
29
29
  show_name = clean_text(os.path.basename(show_dir))
30
30
  matcher = EpisodeMatcher(CACHE_DIR, show_name)
31
31
 
32
+ # Early check for reference files
33
+ reference_dir = Path(CACHE_DIR) / "data" / show_name
34
+ reference_files = list(reference_dir.glob("*.srt"))
35
+ if not reference_files:
36
+ logger.error(f"No reference subtitle files found in {reference_dir}")
37
+ logger.info("Please download reference subtitles first")
38
+ return
39
+
32
40
  season_paths = get_valid_seasons(show_dir)
33
41
  if not season_paths:
34
42
  logger.warning(f"No seasons with .mkv files found")
@@ -300,7 +300,7 @@ def extract_srt_text(filepath):
300
300
 
301
301
  def extract_season_episode(filename):
302
302
  """
303
- Extract season and episode numbers from filename.
303
+ Extract season and episode numbers from filename with support for multiple formats.
304
304
 
305
305
  Args:
306
306
  filename (str): Filename to parse
@@ -308,10 +308,20 @@ def extract_season_episode(filename):
308
308
  Returns:
309
309
  tuple: (season_number, episode_number)
310
310
  """
311
- match = re.search(r'S(\d+)E(\d+)', filename)
312
- if match:
313
- return int(match.group(1)), int(match.group(2))
311
+ # List of patterns to try
312
+ patterns = [
313
+ r'S(\d+)E(\d+)', # S01E01
314
+ r'(\d+)x(\d+)', # 1x01 or 01x01
315
+ r'Season\s*(\d+).*?(\d+)' # Season 1 - 01
316
+ ]
317
+
318
+ for pattern in patterns:
319
+ match = re.search(pattern, filename, re.IGNORECASE)
320
+ if match:
321
+ return int(match.group(1)), int(match.group(2))
322
+
314
323
  return None, None
324
+
315
325
  def process_srt_files(show_dir):
316
326
  """
317
327
  Process all SRT files in the given directory and its subdirectories.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: mkv-episode-matcher
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -51,46 +51,31 @@ Automatically match and rename your MKV TV episodes using The Movie Database (TM
51
51
  - ✨ **Bulk Processing**: Handle entire seasons at once
52
52
  - 🧪 **Dry Run Mode**: Test changes before applying
53
53
 
54
+ ## Prerequisites
55
+
56
+ - Python 3.9 or higher
57
+ - [FFmpeg](https://ffmpeg.org/download.html) installed and available in system PATH
58
+ - [Tesseract OCR](https://github.com/UB-Mannheim/tesseract/wiki) installed (required for image-based subtitle processing)
59
+ - TMDb API key
60
+ - OpenSubtitles account (optional, for subtitle downloads)
61
+
54
62
  ## Quick Start
55
63
 
56
64
  1. Install the package:
57
65
  ```bash
58
66
  pip install mkv-episode-matcher
59
67
  ```
68
+ 2. Download .srt subtitles files to ~/.mkv-episode-matcher/cache/data/Show Name/
60
69
 
61
- 2. Run on your show directory:
70
+ 3. Run on your show directory:
62
71
  ```bash
63
- mkv-match --show-dir "path/to/your/show" --season 1
72
+ mkv-match --show-dir "path/to/your/show"
64
73
  ```
65
74
 
66
- ## Requirements
67
-
68
- - Python 3.8 or higher
69
- - TMDb API key
70
- - OpenSubtitles account (optional, for subtitle downloads)
71
-
72
75
  ## Documentation
73
76
 
74
77
  Full documentation is available at [https://jsakkos.github.io/mkv-episode-matcher/](https://jsakkos.github.io/mkv-episode-matcher/)
75
78
 
76
- ## Basic Usage
77
-
78
- ```python
79
- from mkv_episode_matcher import process_show
80
-
81
- # Process all seasons
82
- process_show()
83
-
84
- # Process specific season
85
- process_show(season=1)
86
-
87
- # Test run without making changes
88
- process_show(season=1, dry_run=True)
89
-
90
- # Process and download subtitles
91
- process_show(get_subs=True)
92
- ```
93
-
94
79
  ## Directory Structure
95
80
 
96
81
  MKV Episode Matcher expects your TV shows to be organized as follows:
@@ -105,6 +90,23 @@ Show Name/
105
90
  │ └── episode2.mkv
106
91
  ```
107
92
 
93
+ ## Reference Subtitle File Structure
94
+
95
+ Subtitle files that are not automatically downloaded using the `--get-subs` flag should be named as follows:
96
+
97
+ ```
98
+
99
+ ~/.mkv-episode-matcher/cache/data/Show Name/
100
+ ├── Show Name - S01E01.srt
101
+ ├── Show Name - S01E02.srt
102
+ └── ...
103
+ ```
104
+
105
+ On Windows, the cache directory is located at `C:\Users\{username}\.mkv-episode-matcher\cache\data\`
106
+
107
+ Reference subtitle files should follow this naming pattern:
108
+ `{show_name} - S{season:02d}E{episode:02d}.srt`
109
+
108
110
  ## Contributing
109
111
 
110
112
  1. Fork the repository
@@ -1,3 +1,4 @@
1
+ .coverage
1
2
  .gitattributes
2
3
  .gitignore
3
4
  .gitmodules
@@ -11,6 +12,7 @@ uv.lock
11
12
  .github/funding.yml
12
13
  .github/workflows/documentation.yml
13
14
  .github/workflows/python-publish.yml
15
+ .github/workflows/tests.yml
14
16
  .vscode/settings.json
15
17
  docs/cli.md
16
18
  docs/configuration.md
@@ -46,5 +48,4 @@ mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/SubZero.py
46
48
  mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/post_processing.py
47
49
  mkv_episode_matcher/libraries/pgs2srt/Libraries/SubZero/dictionaries/data.py
48
50
  tests/__init__.py
49
- tests/test_improvements.py
50
51
  tests/test_main.py
@@ -47,6 +47,7 @@ dev = [
47
47
  "pytest-cov>=6.0.0",
48
48
  "pytest>=8.3.3",
49
49
  "ruff>=0.8.0",
50
+ "chardet>=5.2.0",
50
51
  ]
51
52
 
52
53
 
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = mkv_episode_matcher
3
- version = 0.3.4
3
+ version = 0.3.6
4
4
  author = Jonathan Sakkos
5
5
  author_email = jonathansakkos@gmail.com
6
6
  description = The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
@@ -13,6 +13,31 @@ from mkv_episode_matcher.utils import (
13
13
  )
14
14
  from mkv_episode_matcher.episode_identification import EpisodeMatcher
15
15
  from mkv_episode_matcher.config import get_config, set_config
16
+ from unittest.mock import Mock, patch
17
+
18
+
19
+ # @pytest.fixture
20
+ # def mock_config():
21
+ # return {
22
+ # "tmdb_api_key": "test_key",
23
+ # "show_dir": "/test/path",
24
+ # "max_threads": 4,
25
+ # "tesseract_path": "/usr/bin/tesseract",
26
+ # }
27
+
28
+
29
+ @pytest.fixture
30
+ def mock_episode_data():
31
+ return {
32
+ "name": "Test Episode",
33
+ "season_number": 1,
34
+ "episode_number": 1,
35
+ "overview": "Test overview",
36
+ }
37
+
38
+ @pytest.fixture
39
+ def mock_seasons():
40
+ return ["/test/path/Season 1"]
16
41
 
17
42
  @pytest.fixture
18
43
  def temp_show_dir(tmp_path):
@@ -101,8 +126,8 @@ class TestEpisodeMatcher:
101
126
  return EpisodeMatcher(tmp_path, "Test Show")
102
127
 
103
128
  def test_clean_text(self, matcher):
104
- text = "Test [action] <tag> T-t-test"
105
- assert matcher.clean_text(text) == "test action tag test"
129
+ text = "Test [action] T-t-test"
130
+ assert matcher.clean_text(text) == "test action test"
106
131
 
107
132
  def test_chunk_score(self, matcher):
108
133
  score = matcher.chunk_score("Test dialogue", "test dialog")
@@ -116,22 +141,27 @@ class TestEpisodeMatcher:
116
141
  assert isinstance(chunk, str)
117
142
  assert mock_run.called
118
143
 
119
- class TestProcessShow:
120
- @patch('mkv_episode_matcher.episode_matcher.get_valid_seasons')
121
- @patch('mkv_episode_matcher.episode_matcher.get_config')
122
- def test_process_show_no_seasons(self, mock_config, mock_seasons, mock_config_data):
123
- mock_seasons.return_value = []
124
- mock_config.return_value = mock_config_data
125
- process_show()
126
- mock_seasons.assert_called_once()
127
-
128
- @patch('mkv_episode_matcher.episode_matcher.get_valid_seasons')
129
- @patch('mkv_episode_matcher.episode_matcher.get_config')
130
- def test_process_show_with_season(self, mock_config, mock_seasons, temp_show_dir, mock_config_data):
131
- mock_seasons.return_value = [str(temp_show_dir / "Season 1")]
132
- mock_config.return_value = mock_config_data
133
- process_show(season=1)
134
- mock_seasons.assert_called_once()
144
+ class TestEpisodeMatcher:
145
+ def test_extract_season_episode(self):
146
+ from mkv_episode_matcher.utils import extract_season_episode
147
+
148
+ # Test valid filename
149
+ assert extract_season_episode("Show - S01E02.mkv") == (1, 2)
150
+
151
+ # Test invalid filename
152
+ assert extract_season_episode("invalid.mkv") == (None, None)
153
+
154
+ @patch("mkv_episode_matcher.tmdb_client.requests.get")
155
+ def test_fetch_show_id(self, mock_get):
156
+ from mkv_episode_matcher.tmdb_client import fetch_show_id
157
+
158
+ mock_response = Mock()
159
+ mock_response.status_code = 200
160
+ mock_response.json.return_value = {"results": [{"id": 12345}]}
161
+ mock_get.return_value = mock_response
162
+
163
+ assert fetch_show_id("Test Show") == "12345"
164
+
135
165
 
136
166
  if __name__ == '__main__':
137
167
  pytest.main(['-v'])
@@ -24,6 +24,15 @@ wheels = [
24
24
  { url = "https://files.pythonhosted.org/packages/12/90/3c9ff0512038035f59d279fddeb79f5f1eccd8859f06d6163c58798b9487/certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8", size = 167321 },
25
25
  ]
26
26
 
27
+ [[package]]
28
+ name = "chardet"
29
+ version = "5.2.0"
30
+ source = { registry = "https://pypi.org/simple" }
31
+ sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618 }
32
+ wheels = [
33
+ { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 },
34
+ ]
35
+
27
36
  [[package]]
28
37
  name = "charset-normalizer"
29
38
  version = "3.4.0"
@@ -374,7 +383,7 @@ wheels = [
374
383
 
375
384
  [[package]]
376
385
  name = "mkv-episode-matcher"
377
- version = "0.3.3.post1.dev0+g40bb202.d20241207"
386
+ version = "0.3.5.post1.dev0+g1beb2cf.d20250112"
378
387
  source = { editable = "." }
379
388
  dependencies = [
380
389
  { name = "configparser" },
@@ -391,6 +400,7 @@ dependencies = [
391
400
 
392
401
  [package.dev-dependencies]
393
402
  dev = [
403
+ { name = "chardet" },
394
404
  { name = "pytest" },
395
405
  { name = "pytest-cov" },
396
406
  { name = "ruff" },
@@ -412,6 +422,7 @@ requires-dist = [
412
422
 
413
423
  [package.metadata.requires-dev]
414
424
  dev = [
425
+ { name = "chardet", specifier = ">=5.2.0" },
415
426
  { name = "pytest", specifier = ">=8.3.3" },
416
427
  { name = "pytest-cov", specifier = ">=6.0.0" },
417
428
  { name = "ruff", specifier = ">=0.8.0" },
@@ -1,150 +0,0 @@
1
- import json
2
- import os
3
- import subprocess
4
- import tempfile
5
- from pathlib import Path
6
- import torch
7
- from rapidfuzz import fuzz
8
- from loguru import logger
9
- import whisper
10
- import numpy as np
11
- import re
12
- class EpisodeMatcher:
13
- def __init__(self, cache_dir, show_name, min_confidence=0.6):
14
- self.cache_dir = Path(cache_dir)
15
- self.min_confidence = min_confidence
16
- self.show_name = show_name
17
- self.chunk_duration = 300 # 5 minutes
18
- self.device = "cuda" if torch.cuda.is_available() else "cpu"
19
- self.temp_dir = Path(tempfile.gettempdir()) / "whisper_chunks"
20
- self.temp_dir.mkdir(exist_ok=True)
21
-
22
- def clean_text(self, text):
23
- text = text.lower().strip()
24
- text = re.sub(r'\[.*?\]|\<.*?\>', '', text)
25
- text = re.sub(r'([A-Za-z])-\1+', r'\1', text)
26
- return ' '.join(text.split())
27
-
28
- def chunk_score(self, whisper_chunk, ref_chunk):
29
- whisper_clean = self.clean_text(whisper_chunk)
30
- ref_clean = self.clean_text(ref_chunk)
31
- return (fuzz.token_sort_ratio(whisper_clean, ref_clean) * 0.7 +
32
- fuzz.partial_ratio(whisper_clean, ref_clean) * 0.3) / 100.0
33
-
34
- def extract_audio_chunk(self, mkv_file, start_time):
35
- """Extract a chunk of audio from MKV file."""
36
- chunk_path = self.temp_dir / f"chunk_{start_time}.wav"
37
- if not chunk_path.exists():
38
- cmd = [
39
- 'ffmpeg',
40
- '-ss', str(start_time),
41
- '-t', str(self.chunk_duration),
42
- '-i', mkv_file,
43
- '-vn',
44
- '-acodec', 'pcm_s16le',
45
- '-ar', '16000',
46
- '-ac', '1',
47
- str(chunk_path)
48
- ]
49
- subprocess.run(cmd, capture_output=True)
50
- return str(chunk_path)
51
-
52
- def load_reference_chunk(self, srt_file, chunk_idx):
53
- """Load reference subtitles for a specific time chunk."""
54
- chunk_start = chunk_idx * self.chunk_duration
55
- chunk_end = chunk_start + self.chunk_duration
56
- text_lines = []
57
-
58
- with open(srt_file, 'r', encoding='utf-8') as f:
59
- content = f.read().strip()
60
-
61
- for block in content.split('\n\n'):
62
- lines = block.split('\n')
63
- if len(lines) < 3 or '-->' not in lines[1]: # Skip malformed blocks
64
- continue
65
-
66
- try:
67
- timestamp = lines[1]
68
- text = ' '.join(lines[2:])
69
-
70
- end_time = timestamp.split(' --> ')[1].strip()
71
- hours, minutes, seconds = map(float, end_time.replace(',','.').split(':'))
72
- total_seconds = hours * 3600 + minutes * 60 + seconds
73
-
74
- if chunk_start <= total_seconds <= chunk_end:
75
- text_lines.append(text)
76
-
77
- except (IndexError, ValueError):
78
- continue
79
-
80
- return ' '.join(text_lines)
81
-
82
- def identify_episode(self, video_file, temp_dir, season_number):
83
- try:
84
- # Get video duration
85
- duration = float(subprocess.check_output([
86
- 'ffprobe', '-v', 'error',
87
- '-show_entries', 'format=duration',
88
- '-of', 'default=noprint_wrappers=1:nokey=1',
89
- video_file
90
- ]).decode())
91
-
92
- total_chunks = int(np.ceil(duration / self.chunk_duration))
93
-
94
- # Load Whisper model
95
- model = whisper.load_model("base", device=self.device)
96
-
97
- # Get season-specific reference files
98
- reference_dir = self.cache_dir / "data" / self.show_name
99
- season_pattern = f"S{season_number:02d}E"
100
- reference_files = [
101
- f for f in reference_dir.glob("*.srt")
102
- if season_pattern in f.name
103
- ]
104
-
105
- if not reference_files:
106
- logger.error(f"No reference files found for season {season_number}")
107
- return None
108
-
109
- # Process chunks until match found
110
- for chunk_idx in range(min(3, total_chunks)): # Only try first 3 chunks
111
- start_time = chunk_idx * self.chunk_duration
112
- audio_path = self.extract_audio_chunk(video_file, start_time)
113
-
114
- # Transcribe chunk
115
- result = model.transcribe(
116
- audio_path,
117
- task="transcribe",
118
- language="en"
119
- )
120
-
121
- chunk_text = result["text"]
122
- best_confidence = 0
123
- best_match = None
124
-
125
- # Compare with reference chunks
126
- for ref_file in reference_files:
127
- ref_text = self.load_reference_chunk(ref_file, chunk_idx)
128
- confidence = self.chunk_score(chunk_text, ref_text)
129
-
130
- if confidence > best_confidence:
131
- best_confidence = confidence
132
- best_match = ref_file
133
-
134
- if confidence > self.min_confidence:
135
- season_ep = re.search(r'S(\d+)E(\d+)', best_match.stem)
136
- if season_ep:
137
- season, episode = map(int, season_ep.groups())
138
- return {
139
- 'season': season,
140
- 'episode': episode,
141
- 'confidence': best_confidence,
142
- 'reference_file': str(best_match),
143
- }
144
-
145
- return None
146
-
147
- finally:
148
- # Cleanup temp files
149
- for file in self.temp_dir.glob("chunk_*.wav"):
150
- file.unlink()
@@ -1,59 +0,0 @@
1
- from unittest.mock import Mock, patch
2
-
3
- import pytest
4
-
5
-
6
- @pytest.fixture
7
- def mock_config():
8
- return {
9
- "tmdb_api_key": "test_key",
10
- "show_dir": "/test/path",
11
- "max_threads": 4,
12
- "tesseract_path": "/usr/bin/tesseract",
13
- }
14
-
15
-
16
- @pytest.fixture
17
- def mock_episode_data():
18
- return {
19
- "name": "Test Episode",
20
- "season_number": 1,
21
- "episode_number": 1,
22
- "overview": "Test overview",
23
- }
24
-
25
-
26
- class TestEpisodeMatcher:
27
- def test_extract_season_episode(self):
28
- from mkv_episode_matcher.episode_matcher import extract_season_episode
29
-
30
- # Test valid filename
31
- assert extract_season_episode("Show - S01E02.mkv") == (1, 2)
32
-
33
- # Test invalid filename
34
- assert extract_season_episode("invalid.mkv") == (None, None)
35
-
36
- @patch("mkv_episode_matcher.tmdb_client.requests.get")
37
- def test_fetch_show_id(self, mock_get):
38
- from mkv_episode_matcher.tmdb_client import fetch_show_id
39
-
40
- mock_response = Mock()
41
- mock_response.status_code = 200
42
- mock_response.json.return_value = {"results": [{"id": 12345}]}
43
- mock_get.return_value = mock_response
44
-
45
- assert fetch_show_id("Test Show") == "12345"
46
-
47
- @patch("mkv_episode_matcher.utils.OpenSubtitles")
48
- def test_get_subtitles(self, mock_subtitles):
49
- from mkv_episode_matcher.utils import get_subtitles
50
-
51
- # Test subtitle download
52
- mock_subtitles.return_value.search.return_value.data = [
53
- {"file_name": "Test.Show.S01E01.srt"}
54
- ]
55
-
56
- with patch("pathlib.Path.exists", return_value=False):
57
- get_subtitles(12345, {1})
58
-
59
- mock_subtitles.return_value.download_and_save.assert_called_once()