mkv-episode-matcher 0.9.7__tar.gz → 0.9.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mkv-episode-matcher might be problematic. Click here for more details.

Files changed (52) hide show
  1. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.claude/settings.local.json +3 -1
  2. mkv_episode_matcher-0.9.8/CLAUDE.md +148 -0
  3. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/PKG-INFO +1 -1
  4. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/__main__.py +2 -0
  5. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/utils.py +14 -2
  6. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher.egg-info/PKG-INFO +1 -1
  7. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher.egg-info/SOURCES.txt +1 -0
  8. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/setup.cfg +1 -1
  9. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/test_episode_identification.py +2 -3
  10. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/test_main.py +42 -0
  11. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.coverage +0 -0
  12. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.gitattributes +0 -0
  13. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.github/funding.yml +0 -0
  14. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.github/workflows/claude-code-review.yml +0 -0
  15. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.github/workflows/claude.yml +0 -0
  16. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.github/workflows/documentation.yml +0 -0
  17. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.github/workflows/python-publish.yml +0 -0
  18. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.github/workflows/tests.yml +0 -0
  19. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.gitignore +0 -0
  20. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.gitmodules +0 -0
  21. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.python-version +0 -0
  22. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/.vscode/settings.json +0 -0
  23. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/CHANGELOG.md +0 -0
  24. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/LICENSE +0 -0
  25. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/README.md +0 -0
  26. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/api/index.md +0 -0
  27. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/changelog.md +0 -0
  28. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/cli.md +0 -0
  29. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/configuration.md +0 -0
  30. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/installation.md +0 -0
  31. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/quickstart.md +0 -0
  32. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/docs/tips.md +0 -0
  33. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkdocs.yml +0 -0
  34. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/.gitattributes +0 -0
  35. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/__init__.py +0 -0
  36. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/config.py +0 -0
  37. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/episode_identification.py +0 -0
  38. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/episode_matcher.py +0 -0
  39. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/subtitle_utils.py +0 -0
  40. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher/tmdb_client.py +0 -0
  41. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher.egg-info/dependency_links.txt +0 -0
  42. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher.egg-info/entry_points.txt +0 -0
  43. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher.egg-info/requires.txt +0 -0
  44. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/mkv_episode_matcher.egg-info/top_level.txt +0 -0
  45. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/pyproject.toml +0 -0
  46. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/setup.py +0 -0
  47. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/__init__.py +0 -0
  48. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/test_config_special_characters.py +0 -0
  49. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/test_path_handling.py +0 -0
  50. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/test_path_spaces_quotes.py +0 -0
  51. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/tests/test_trailing_slash.py +0 -0
  52. {mkv_episode_matcher-0.9.7 → mkv_episode_matcher-0.9.8}/uv.lock +0 -0
@@ -13,7 +13,9 @@
13
13
  "Bash(git add:*)",
14
14
  "Bash(git commit:*)",
15
15
  "Bash(git push:*)",
16
- "Bash(rm:*)"
16
+ "Bash(rm:*)",
17
+ "Bash(uv sync:*)",
18
+ "Bash(grep:*)"
17
19
  ],
18
20
  "deny": []
19
21
  }
@@ -0,0 +1,148 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Development Commands
6
+
7
+ ### Testing
8
+ ```bash
9
+ # Run all tests with coverage
10
+ uv run pytest --cov=mkv_episode_matcher --cov-report=term-missing
11
+
12
+ # Run specific test file
13
+ uv run pytest tests/test_main.py -v
14
+
15
+ # Run tests in verbose mode
16
+ uv run pytest -v
17
+ ```
18
+
19
+ ### Linting and Formatting
20
+ ```bash
21
+ # Run ruff linting
22
+ uv run ruff check .
23
+
24
+ # Run ruff formatting
25
+ uv run ruff format .
26
+
27
+ # Fix auto-fixable issues
28
+ uv run ruff check --fix .
29
+ ```
30
+
31
+ ### Dependency Management and Installation
32
+ ```bash
33
+ # Install dependencies
34
+ uv sync
35
+
36
+ # Install with dev dependencies (default behavior)
37
+ uv sync --group dev
38
+
39
+ # Add new dependency
40
+ uv add package-name
41
+
42
+ # Add dev dependency
43
+ uv add --group dev package-name
44
+
45
+ # Install in editable mode for development
46
+ uv sync --editable
47
+
48
+ # Build distribution packages
49
+ uv build
50
+ ```
51
+
52
+ ### Running the Application
53
+ ```bash
54
+ # Run onboarding (first time setup)
55
+ mkv-match --onboard
56
+
57
+ # Process a show directory
58
+ mkv-match --show-dir "/path/to/show"
59
+
60
+ # Dry run mode (test without making changes)
61
+ mkv-match --show-dir "/path/to/show" --dry-run
62
+
63
+ # Process specific season
64
+ mkv-match --show-dir "/path/to/show" --season 1
65
+
66
+ # Download subtitles and process
67
+ mkv-match --show-dir "/path/to/show" --get-subs
68
+ ```
69
+
70
+ ## Architecture Overview
71
+
72
+ ### Core Components
73
+
74
+ **Main Entry Point** (`__main__.py`):
75
+ - CLI argument parsing and rich console interface
76
+ - Configuration management and onboarding flow
77
+ - Coordinates the overall process execution
78
+
79
+ **Episode Matcher** (`episode_matcher.py`):
80
+ - High-level orchestration of the matching process
81
+ - Progress tracking and user feedback
82
+ - Handles season/episode file processing workflow
83
+
84
+ **Episode Identification** (`episode_identification.py`):
85
+ - Core speech recognition using OpenAI Whisper
86
+ - Subtitle comparison and matching algorithms
87
+ - Audio extraction and processing with FFmpeg
88
+ - Caching systems for performance optimization
89
+
90
+ **Configuration System** (`config.py`):
91
+ - INI-based configuration management
92
+ - API key and credential storage
93
+ - Show directory and user preferences
94
+
95
+ **Utilities** (`utils.py`):
96
+ - File operations (renaming, validation)
97
+ - Text processing and cleaning
98
+ - Season/episode extraction from filenames
99
+
100
+ ### Dependencies
101
+
102
+ **Required External Tools**:
103
+ - FFmpeg (for audio extraction from MKV files)
104
+ - Python 3.9-3.12
105
+
106
+ **Key Python Dependencies**:
107
+ - OpenAI Whisper (speech recognition)
108
+ - TMDb Client (show metadata)
109
+ - OpenSubtitles API (subtitle downloads)
110
+ - Rich (CLI interface)
111
+ - RapidFuzz (text matching)
112
+ - PyTorch (Whisper backend)
113
+
114
+ ### Data Flow
115
+
116
+ 1. **Configuration**: Load user settings, API keys, and show directory
117
+ 2. **File Discovery**: Find MKV files in season directories
118
+ 3. **Audio Extraction**: Extract audio chunks from MKV files using FFmpeg
119
+ 4. **Speech Recognition**: Use Whisper to transcribe audio chunks
120
+ 5. **Subtitle Matching**: Compare transcriptions with reference subtitles
121
+ 6. **File Renaming**: Rename files based on identified episodes
122
+
123
+ ### Caching Strategy
124
+
125
+ - **Whisper Models**: Cached globally to avoid reloading
126
+ - **Audio Chunks**: Temporary files cached during processing
127
+ - **Reference Subtitles**: Parsed subtitle content cached per session
128
+ - **Video Duration**: FFprobe results cached with LRU cache
129
+
130
+ ### Configuration Files
131
+
132
+ - **Config Location**: `~/.mkv-episode-matcher/config.ini`
133
+ - **Cache Directory**: `~/.mkv-episode-matcher/cache/`
134
+ - **Logs Directory**: `~/.mkv-episode-matcher/logs/`
135
+
136
+ ### Testing Strategy
137
+
138
+ - Uses pytest with fixtures for mock data
139
+ - Coverage reporting configured via pytest-cov
140
+ - Test files follow `test_*.py` naming convention
141
+ - Mocks external dependencies (TMDb API, file operations)
142
+
143
+ ### Code Quality
144
+
145
+ - Ruff for linting and formatting
146
+ - Type hints where applicable
147
+ - Comprehensive error handling and logging
148
+ - Rich console output for user experience
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkv-episode-matcher
3
- Version: 0.9.7
3
+ Version: 0.9.8
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -122,6 +122,7 @@ def select_season(seasons):
122
122
  selected_season = seasons[int(choice) - 1]
123
123
  return int(Path(selected_season).name.replace("Season ", ""))
124
124
 
125
+
125
126
  def onboarding(config_path):
126
127
  """Prompt user for all required config values, showing existing as defaults."""
127
128
  config = get_config(config_path) if config_path.exists() else {}
@@ -153,6 +154,7 @@ def onboarding(config_path):
153
154
  )
154
155
  console.print("[bold green]Onboarding complete! Configuration saved.[/bold green]")
155
156
 
157
+
156
158
  @logger.catch
157
159
  def main():
158
160
  """
@@ -304,8 +304,20 @@ def get_subtitles(show_id, seasons: set[int], config=None, max_retries=3):
304
304
 
305
305
 
306
306
  def clean_text(text):
307
- # Remove brackets, parentheses, and their content
308
- cleaned_text = re.sub(r"\[.*?\]|\(.*?\)|\{.*?\}", "", text)
307
+ # Remove brackets and curly braces with their content
308
+ cleaned_text = re.sub(r"\[.*?\]|\{.*?\}", "", text)
309
+ # Remove parentheses content EXCEPT for those containing exactly 4 digits (years)
310
+ # First, temporarily replace year patterns with a placeholder
311
+ year_pattern = r"\((\d{4})\)"
312
+ years = re.findall(year_pattern, cleaned_text)
313
+ cleaned_text = re.sub(year_pattern, "YEAR_PLACEHOLDER", cleaned_text)
314
+ # Remove all remaining parentheses content
315
+ cleaned_text = re.sub(r"\([^)]*\)", "", cleaned_text)
316
+ # Restore the years
317
+ for year in years:
318
+ cleaned_text = cleaned_text.replace("YEAR_PLACEHOLDER", f"({year})", 1)
319
+ # Normalize multiple spaces to single spaces
320
+ cleaned_text = re.sub(r"\s+", " ", cleaned_text)
309
321
  # Strip leading/trailing whitespace
310
322
  return cleaned_text.strip()
311
323
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mkv-episode-matcher
3
- Version: 0.9.7
3
+ Version: 0.9.8
4
4
  Summary: The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
5
5
  Home-page: https://github.com/Jsakkos/mkv-episode-matcher
6
6
  Author: Jonathan Sakkos
@@ -4,6 +4,7 @@
4
4
  .gitmodules
5
5
  .python-version
6
6
  CHANGELOG.md
7
+ CLAUDE.md
7
8
  LICENSE
8
9
  README.md
9
10
  mkdocs.yml
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = mkv_episode_matcher
3
- version = 0.9.7
3
+ version = 0.9.8
4
4
  author = Jonathan Sakkos
5
5
  author_email = jonathansakkos@gmail.com
6
6
  description = The MKV Episode Matcher is a tool for identifying TV series episodes from MKV files and renaming the files accordingly.
@@ -1,8 +1,7 @@
1
- import unittest
2
1
  import tempfile
3
- import re
2
+ import unittest
4
3
  from pathlib import Path
5
- from unittest.mock import Mock, patch
4
+ from unittest.mock import Mock
6
5
 
7
6
  from mkv_episode_matcher.episode_identification import EpisodeMatcher
8
7
 
@@ -72,6 +72,48 @@ class TestUtilities:
72
72
  def test_clean_text(self):
73
73
  text = "Test [action] (note) {tag}"
74
74
  assert clean_text(text) == "Test"
75
+
76
+ # Test that years are preserved
77
+ text_with_year = "Bluey (2018)"
78
+ assert clean_text(text_with_year) == "Bluey (2018)"
79
+
80
+ # Test mixed content with year
81
+ text_mixed = "Show Name [HD] (2020) {release}"
82
+ assert clean_text(text_mixed) == "Show Name (2020)"
83
+
84
+ def test_clean_text_edge_cases(self):
85
+ """Test edge cases for clean_text function reported by users."""
86
+ # Test exact user reported case
87
+ bluey_case = "Bluey (2018)"
88
+ assert clean_text(bluey_case) == "Bluey (2018)"
89
+
90
+ # Test with extra whitespace and tags
91
+ bluey_with_junk = "Bluey [1080p] (2018) {AMZN}"
92
+ assert clean_text(bluey_with_junk) == "Bluey (2018)"
93
+
94
+ # Test show name with multiple years (should preserve all)
95
+ multiple_years = "Show (1999) vs Show (2020)"
96
+ assert clean_text(multiple_years) == "Show (1999) vs Show (2020)"
97
+
98
+ # Test year at beginning
99
+ year_first = "(2018) Bluey [HD]"
100
+ assert clean_text(year_first) == "(2018) Bluey"
101
+
102
+ # Test year with other content in same parentheses (should remove)
103
+ year_with_text = "Show (2018 Remaster)"
104
+ assert clean_text(year_with_text) == "Show"
105
+
106
+ # Test 4-digit numbers (preserves any 4-digit number in parentheses)
107
+ four_digit = "Show (1234)"
108
+ assert clean_text(four_digit) == "Show (1234)"
109
+
110
+ # Test edge case years
111
+ assert clean_text("Show (1900)") == "Show (1900)" # Very old year
112
+ assert clean_text("Show (2099)") == "Show (2099)" # Future year
113
+
114
+ # Test multiple spaces and normalization
115
+ messy_spacing = "Bluey [HD] (2018) {RELEASE}"
116
+ assert clean_text(messy_spacing) == "Bluey (2018)"
75
117
 
76
118
  def test_extract_season_episode(self):
77
119
  filename = "Show - S01E02.mkv"