IMDBx 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imdbx-1.1.0/IMDBx.egg-info/PKG-INFO +281 -0
- imdbx-1.1.0/IMDBx.egg-info/SOURCES.txt +24 -0
- imdbx-1.1.0/IMDBx.egg-info/dependency_links.txt +1 -0
- imdbx-1.1.0/IMDBx.egg-info/entry_points.txt +2 -0
- imdbx-1.1.0/IMDBx.egg-info/requires.txt +8 -0
- imdbx-1.1.0/IMDBx.egg-info/top_level.txt +1 -0
- imdbx-1.1.0/LICENSE +21 -0
- imdbx-1.1.0/PKG-INFO +281 -0
- imdbx-1.1.0/README.md +242 -0
- imdbx-1.1.0/imdbx/__init__.py +349 -0
- imdbx-1.1.0/imdbx/_browser.py +364 -0
- imdbx-1.1.0/imdbx/_display.py +149 -0
- imdbx-1.1.0/imdbx/_http.py +165 -0
- imdbx-1.1.0/imdbx/_log.py +55 -0
- imdbx-1.1.0/imdbx/_parse.py +348 -0
- imdbx-1.1.0/imdbx/_scraper.py +145 -0
- imdbx-1.1.0/imdbx/cli.py +357 -0
- imdbx-1.1.0/imdbx/models.py +176 -0
- imdbx-1.1.0/imdbx/py.typed +0 -0
- imdbx-1.1.0/pyproject.toml +116 -0
- imdbx-1.1.0/setup.cfg +4 -0
- imdbx-1.1.0/tests/test_http_session_and_load.py +186 -0
- imdbx-1.1.0/tests/test_models_episode_dataclass.py +89 -0
- imdbx-1.1.0/tests/test_models_series_title_info.py +174 -0
- imdbx-1.1.0/tests/test_parse_episode_cards.py +184 -0
- imdbx-1.1.0/tests/test_parse_series_metadata.py +184 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: IMDBx
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: IMDBx - Titles, ratings, air dates, descriptions and cover art across every episode of any IMDb TV series.
|
|
5
|
+
Author-email: IMDBx <imdbx@imdbx.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ENC4YP7ED/IMDBx
|
|
8
|
+
Project-URL: Repository, https://github.com/ENC4YP7ED/IMDBx
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/ENC4YP7ED/IMDBx/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/ENC4YP7ED/IMDBx/releases
|
|
11
|
+
Keywords: imdbx,scraper,episodes,tv,anime,playwright,beautifulsoup
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Natural Language :: English
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
|
25
|
+
Classifier: Topic :: Multimedia :: Video
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Classifier: Typing :: Typed
|
|
28
|
+
Requires-Python: >=3.10
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: niquests>=3.0
|
|
32
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
33
|
+
Requires-Dist: playwright>=1.40
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# IMDBx
|
|
41
|
+
|
|
42
|
+
[](https://pypi.org/project/IMDBx/)
|
|
43
|
+
[](https://pypi.org/project/IMDBx/)
|
|
44
|
+
[](https://github.com/ENC4YP7ED/IMDBx/actions/workflows/test.yml)
|
|
45
|
+
[](LICENSE)
|
|
46
|
+
|
|
47
|
+
IMDBx - Titles, ratings, air dates, descriptions and cover art across every episode of any IMDb TV series.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install IMDBx
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Chromium is downloaded automatically the first time you run a scrape — no extra setup needed.
|
|
58
|
+
|
|
59
|
+
For development (includes pytest, ruff, coverage):
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install "IMDBx[dev]"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Quick start
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from imdbx import title
|
|
71
|
+
|
|
72
|
+
t = title("tt7441658") # Black Clover, all seasons
|
|
73
|
+
print(t) # Black Clover [tt7441658] — 4 seasons, 170 episodes
|
|
74
|
+
|
|
75
|
+
for season_num, episodes in t:
|
|
76
|
+
for ep in episodes:
|
|
77
|
+
print(ep) # S1.E1 · Asta and Yuno [7.6/10 (1.6K)]
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Python API
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from imdbx import title, season, episode, metadata, load
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### `title()` — full series
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
t = title("tt7441658")
|
|
92
|
+
t = title("tt7441658", seasons=[1, 2]) # specific seasons only
|
|
93
|
+
t = title("tt7441658", download_images=True) # also save cover art
|
|
94
|
+
t = title("tt7441658", pool_size=8) # more concurrency
|
|
95
|
+
|
|
96
|
+
print(t.meta.series_name) # "Black Clover"
|
|
97
|
+
print(t.meta.imdb_rating) # "8.2/10"
|
|
98
|
+
print(t.meta.tags) # ["Anime", "Action", "Adventure", …]
|
|
99
|
+
print(t.meta.years) # "2017–2021"
|
|
100
|
+
print(t.meta.content_rating) # "TV-PG"
|
|
101
|
+
|
|
102
|
+
all_eps = t.all_episodes() # flat list of every Episode
|
|
103
|
+
ep = t.get_episode(1, 1) # Episode S1E1
|
|
104
|
+
t.save("black_clover.json") # dump to JSON
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### `season()` — single season
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
eps = season("tt7441658", 1) # list[Episode] for season 1
|
|
111
|
+
|
|
112
|
+
for ep in eps:
|
|
113
|
+
print(ep.title, ep.rating)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### `episode()` — single episode
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
ep = episode("tt7441658", 1, 1) # Season 1, Episode 1
|
|
120
|
+
|
|
121
|
+
print(ep.episode_code) # "S1.E1"
|
|
122
|
+
print(ep.title) # "Asta and Yuno"
|
|
123
|
+
print(ep.rating) # "7.6/10 (1.6K)"
|
|
124
|
+
print(ep.air_date) # "Tue, Oct 3, 2017"
|
|
125
|
+
print(ep.description) # plot summary
|
|
126
|
+
print(ep.cover_image) # CDN URL
|
|
127
|
+
print(ep.imdb_url) # full IMDBx page URL
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### `metadata()` — series info only (fast, no browser)
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
m = metadata("tt7441658")
|
|
134
|
+
|
|
135
|
+
print(m.series_name) # "Black Clover"
|
|
136
|
+
print(m.type) # "TV Series"
|
|
137
|
+
print(m.years) # "2017–2021"
|
|
138
|
+
print(m.content_rating) # "TV-PG"
|
|
139
|
+
print(m.episode_duration) # "24m"
|
|
140
|
+
print(m.imdb_rating) # "8.2/10"
|
|
141
|
+
print(m.rating_count) # "47K"
|
|
142
|
+
print(m.popularity) # "529"
|
|
143
|
+
print(m.tags) # ["Japanese", "Anime", "Action", …]
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### `load()` — reload a saved JSON file
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
t = load("black_clover.json")
|
|
150
|
+
print(t.meta.series_name)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Data classes
|
|
156
|
+
|
|
157
|
+
### `Episode`
|
|
158
|
+
|
|
159
|
+
| Field | Type | Description |
|
|
160
|
+
|---|---|---|
|
|
161
|
+
| `episode_code` | `str` | Short code, e.g. `"S1.E1"` |
|
|
162
|
+
| `title` | `str` | Episode title |
|
|
163
|
+
| `season` | `int` | Season number (1-indexed) |
|
|
164
|
+
| `episode` | `int` | Episode number within the season (1-indexed) |
|
|
165
|
+
| `air_date` | `str` | Original air date, e.g. `"Tue, Oct 3, 2017"` |
|
|
166
|
+
| `description` | `str` | Plot summary |
|
|
167
|
+
| `rating` | `str` | IMDBx rating, e.g. `"7.6/10 (1.6K)"` |
|
|
168
|
+
| `cover_image` | `str \| None` | CDN URL of the episode thumbnail |
|
|
169
|
+
| `cover_image_local` | `str \| None` | Local file path after `--download-images` |
|
|
170
|
+
| `imdb_url` | `str` | Full IMDBx episode page URL |
|
|
171
|
+
|
|
172
|
+
### `SeriesMetadata`
|
|
173
|
+
|
|
174
|
+
| Field | Type | Description |
|
|
175
|
+
|---|---|---|
|
|
176
|
+
| `title_id` | `str` | IMDBx title ID, e.g. `"tt7441658"` |
|
|
177
|
+
| `series_name` | `str` | Series title |
|
|
178
|
+
| `type` | `str \| None` | Content type, e.g. `"TV Series"` |
|
|
179
|
+
| `years` | `str \| None` | Run years, e.g. `"2017–2021"` |
|
|
180
|
+
| `content_rating` | `str \| None` | Audience rating, e.g. `"TV-PG"` |
|
|
181
|
+
| `episode_duration` | `str \| None` | Typical episode length, e.g. `"24m"` |
|
|
182
|
+
| `imdb_rating` | `str \| None` | Aggregate rating, e.g. `"8.2/10"` |
|
|
183
|
+
| `rating_count` | `str \| None` | Number of ratings, e.g. `"47K"` |
|
|
184
|
+
| `popularity` | `str \| None` | IMDBx popularity rank |
|
|
185
|
+
| `tags` | `list[str]` | Genre and style tags |
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## CLI
|
|
190
|
+
|
|
191
|
+
After installing, the `imdbx` command is available system-wide:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
imdbx tt7441658 # Black Clover, all seasons
|
|
195
|
+
imdbx tt0903747 # Breaking Bad
|
|
196
|
+
imdbx tt7441658 --seasons 1 2 # specific seasons only
|
|
197
|
+
imdbx tt7441658 --output out.json # save results to a JSON file
|
|
198
|
+
imdbx tt7441658 --download-images # also download cover art to ./images/
|
|
199
|
+
imdbx tt7441658 --images-dir ~/pics # download cover art to a custom directory
|
|
200
|
+
imdbx tt7441658 --meta-only # series metadata only — no browser needed
|
|
201
|
+
imdbx --load out.json # display a previously saved JSON file
|
|
202
|
+
imdbx tt7441658 --pool-size 8 # increase concurrency for faster scraping
|
|
203
|
+
imdbx tt7441658 --debug # verbose output: HTTP, browser events, timing
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### `--test` — testing mode
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# Run the full offline unit-test suite (no network required)
|
|
210
|
+
imdbx --test
|
|
211
|
+
|
|
212
|
+
# Live end-to-end smoke test — fetches real data and checks key fields
|
|
213
|
+
imdbx --test tt7441658
|
|
214
|
+
imdbx --test tt0903747
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
`--test` alone runs `pytest` on the bundled test suite and exits with pytest's return code (0 = all passed). Requires `pip install "IMDBx[dev]"`.
|
|
218
|
+
|
|
219
|
+
`--test <TITLE_ID>` hits the real IMDBx API, verifies that metadata and episode data are populated correctly, and prints a coloured ✓/✗ summary. Requires a network connection and Playwright/Chromium.
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Architecture
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
imdbx/
|
|
227
|
+
├── __init__.py ← public API (title, season, episode, metadata, load)
|
|
228
|
+
├── models.py ← dataclasses (TitleInfo, SeriesMetadata, Episode)
|
|
229
|
+
├── cli.py ← imdbx command — all flags with full help text
|
|
230
|
+
├── _scraper.py ← orchestration: coordinates HTTP + browser + images
|
|
231
|
+
├── _http.py ← niquests connection pool + async image downloader
|
|
232
|
+
├── _browser.py ← Playwright pool + "Show more" expansion detection
|
|
233
|
+
├── _parse.py ← BeautifulSoup parsers (zero hardcoded class names)
|
|
234
|
+
├── _display.py ← terminal colour output
|
|
235
|
+
└── _log.py ← shared ANSI colour helpers + debug flag
|
|
236
|
+
tests/
|
|
237
|
+
├── test_models_episode_dataclass.py ← Episode field and repr tests
|
|
238
|
+
├── test_models_series_title_info.py ← TitleInfo + save/load round-trip
|
|
239
|
+
├── test_parse_episode_cards.py ← HTML → Episode parsing
|
|
240
|
+
├── test_parse_series_metadata.py ← HTML → SeriesMetadata parsing
|
|
241
|
+
└── test_http_session_and_load.py ← HTTP session and JSON load tests
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
Three-layer hybrid approach:
|
|
245
|
+
|
|
246
|
+
- **Layer 1** (niquests): lightweight HTTP for metadata, season counts, and image downloads
|
|
247
|
+
- **Layer 2** (Playwright): headless Chromium for JS-rendered episode pages with "Show more" expansion
|
|
248
|
+
- **Layer 3** (async): concurrent cover-image fetching via `niquests.AsyncSession`
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Running tests
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
# Via pytest directly
|
|
256
|
+
pip install "IMDBx[dev]"
|
|
257
|
+
pytest
|
|
258
|
+
|
|
259
|
+
# Via the CLI — same result, no pytest command needed
|
|
260
|
+
imdbx --test
|
|
261
|
+
|
|
262
|
+
# Live smoke test against a real title
|
|
263
|
+
imdbx --test tt7441658
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Contributing
|
|
269
|
+
|
|
270
|
+
1. Fork the repository
|
|
271
|
+
2. Create a feature branch: `git checkout -b feat/my-change`
|
|
272
|
+
3. Install dev dependencies: `pip install -e ".[dev]"`
|
|
273
|
+
4. Make your changes and add tests
|
|
274
|
+
5. Verify everything passes: `imdbx --test`
|
|
275
|
+
6. Open a pull request
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## License
|
|
280
|
+
|
|
281
|
+
MIT — see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
IMDBx.egg-info/PKG-INFO
|
|
5
|
+
IMDBx.egg-info/SOURCES.txt
|
|
6
|
+
IMDBx.egg-info/dependency_links.txt
|
|
7
|
+
IMDBx.egg-info/entry_points.txt
|
|
8
|
+
IMDBx.egg-info/requires.txt
|
|
9
|
+
IMDBx.egg-info/top_level.txt
|
|
10
|
+
imdbx/__init__.py
|
|
11
|
+
imdbx/_browser.py
|
|
12
|
+
imdbx/_display.py
|
|
13
|
+
imdbx/_http.py
|
|
14
|
+
imdbx/_log.py
|
|
15
|
+
imdbx/_parse.py
|
|
16
|
+
imdbx/_scraper.py
|
|
17
|
+
imdbx/cli.py
|
|
18
|
+
imdbx/models.py
|
|
19
|
+
imdbx/py.typed
|
|
20
|
+
tests/test_http_session_and_load.py
|
|
21
|
+
tests/test_models_episode_dataclass.py
|
|
22
|
+
tests/test_models_series_title_info.py
|
|
23
|
+
tests/test_parse_episode_cards.py
|
|
24
|
+
tests/test_parse_series_metadata.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
imdbx
|
imdbx-1.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Your Name
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
imdbx-1.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: IMDBx
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: IMDBx - Titles, ratings, air dates, descriptions and cover art across every episode of any IMDb TV series.
|
|
5
|
+
Author-email: IMDBx <imdbx@imdbx.dev>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ENC4YP7ED/IMDBx
|
|
8
|
+
Project-URL: Repository, https://github.com/ENC4YP7ED/IMDBx
|
|
9
|
+
Project-URL: Bug Tracker, https://github.com/ENC4YP7ED/IMDBx/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/ENC4YP7ED/IMDBx/releases
|
|
11
|
+
Keywords: imdbx,scraper,episodes,tv,anime,playwright,beautifulsoup
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: End Users/Desktop
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Natural Language :: English
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
|
|
25
|
+
Classifier: Topic :: Multimedia :: Video
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Classifier: Typing :: Typed
|
|
28
|
+
Requires-Python: >=3.10
|
|
29
|
+
Description-Content-Type: text/markdown
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Requires-Dist: niquests>=3.0
|
|
32
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
33
|
+
Requires-Dist: playwright>=1.40
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
|
|
40
|
+
# IMDBx
|
|
41
|
+
|
|
42
|
+
[](https://pypi.org/project/IMDBx/)
|
|
43
|
+
[](https://pypi.org/project/IMDBx/)
|
|
44
|
+
[](https://github.com/ENC4YP7ED/IMDBx/actions/workflows/test.yml)
|
|
45
|
+
[](LICENSE)
|
|
46
|
+
|
|
47
|
+
IMDBx - Titles, ratings, air dates, descriptions and cover art across every episode of any IMDb TV series.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## Install
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
pip install IMDBx
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Chromium is downloaded automatically the first time you run a scrape — no extra setup needed.
|
|
58
|
+
|
|
59
|
+
For development (includes pytest, ruff, coverage):
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install "IMDBx[dev]"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Quick start
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from imdbx import title
|
|
71
|
+
|
|
72
|
+
t = title("tt7441658") # Black Clover, all seasons
|
|
73
|
+
print(t) # Black Clover [tt7441658] — 4 seasons, 170 episodes
|
|
74
|
+
|
|
75
|
+
for season_num, episodes in t:
|
|
76
|
+
for ep in episodes:
|
|
77
|
+
print(ep) # S1.E1 · Asta and Yuno [7.6/10 (1.6K)]
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Python API
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from imdbx import title, season, episode, metadata, load
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### `title()` — full series
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
t = title("tt7441658")
|
|
92
|
+
t = title("tt7441658", seasons=[1, 2]) # specific seasons only
|
|
93
|
+
t = title("tt7441658", download_images=True) # also save cover art
|
|
94
|
+
t = title("tt7441658", pool_size=8) # more concurrency
|
|
95
|
+
|
|
96
|
+
print(t.meta.series_name) # "Black Clover"
|
|
97
|
+
print(t.meta.imdb_rating) # "8.2/10"
|
|
98
|
+
print(t.meta.tags) # ["Anime", "Action", "Adventure", …]
|
|
99
|
+
print(t.meta.years) # "2017–2021"
|
|
100
|
+
print(t.meta.content_rating) # "TV-PG"
|
|
101
|
+
|
|
102
|
+
all_eps = t.all_episodes() # flat list of every Episode
|
|
103
|
+
ep = t.get_episode(1, 1) # Episode S1E1
|
|
104
|
+
t.save("black_clover.json") # dump to JSON
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### `season()` — single season
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
eps = season("tt7441658", 1) # list[Episode] for season 1
|
|
111
|
+
|
|
112
|
+
for ep in eps:
|
|
113
|
+
print(ep.title, ep.rating)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### `episode()` — single episode
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
ep = episode("tt7441658", 1, 1) # Season 1, Episode 1
|
|
120
|
+
|
|
121
|
+
print(ep.episode_code) # "S1.E1"
|
|
122
|
+
print(ep.title) # "Asta and Yuno"
|
|
123
|
+
print(ep.rating) # "7.6/10 (1.6K)"
|
|
124
|
+
print(ep.air_date) # "Tue, Oct 3, 2017"
|
|
125
|
+
print(ep.description) # plot summary
|
|
126
|
+
print(ep.cover_image) # CDN URL
|
|
127
|
+
print(ep.imdb_url) # full IMDBx page URL
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### `metadata()` — series info only (fast, no browser)
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
m = metadata("tt7441658")
|
|
134
|
+
|
|
135
|
+
print(m.series_name) # "Black Clover"
|
|
136
|
+
print(m.type) # "TV Series"
|
|
137
|
+
print(m.years) # "2017–2021"
|
|
138
|
+
print(m.content_rating) # "TV-PG"
|
|
139
|
+
print(m.episode_duration) # "24m"
|
|
140
|
+
print(m.imdb_rating) # "8.2/10"
|
|
141
|
+
print(m.rating_count) # "47K"
|
|
142
|
+
print(m.popularity) # "529"
|
|
143
|
+
print(m.tags) # ["Japanese", "Anime", "Action", …]
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### `load()` — reload a saved JSON file
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
t = load("black_clover.json")
|
|
150
|
+
print(t.meta.series_name)
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
---
|
|
154
|
+
|
|
155
|
+
## Data classes
|
|
156
|
+
|
|
157
|
+
### `Episode`
|
|
158
|
+
|
|
159
|
+
| Field | Type | Description |
|
|
160
|
+
|---|---|---|
|
|
161
|
+
| `episode_code` | `str` | Short code, e.g. `"S1.E1"` |
|
|
162
|
+
| `title` | `str` | Episode title |
|
|
163
|
+
| `season` | `int` | Season number (1-indexed) |
|
|
164
|
+
| `episode` | `int` | Episode number within the season (1-indexed) |
|
|
165
|
+
| `air_date` | `str` | Original air date, e.g. `"Tue, Oct 3, 2017"` |
|
|
166
|
+
| `description` | `str` | Plot summary |
|
|
167
|
+
| `rating` | `str` | IMDBx rating, e.g. `"7.6/10 (1.6K)"` |
|
|
168
|
+
| `cover_image` | `str \| None` | CDN URL of the episode thumbnail |
|
|
169
|
+
| `cover_image_local` | `str \| None` | Local file path after `--download-images` |
|
|
170
|
+
| `imdb_url` | `str` | Full IMDBx episode page URL |
|
|
171
|
+
|
|
172
|
+
### `SeriesMetadata`
|
|
173
|
+
|
|
174
|
+
| Field | Type | Description |
|
|
175
|
+
|---|---|---|
|
|
176
|
+
| `title_id` | `str` | IMDBx title ID, e.g. `"tt7441658"` |
|
|
177
|
+
| `series_name` | `str` | Series title |
|
|
178
|
+
| `type` | `str \| None` | Content type, e.g. `"TV Series"` |
|
|
179
|
+
| `years` | `str \| None` | Run years, e.g. `"2017–2021"` |
|
|
180
|
+
| `content_rating` | `str \| None` | Audience rating, e.g. `"TV-PG"` |
|
|
181
|
+
| `episode_duration` | `str \| None` | Typical episode length, e.g. `"24m"` |
|
|
182
|
+
| `imdb_rating` | `str \| None` | Aggregate rating, e.g. `"8.2/10"` |
|
|
183
|
+
| `rating_count` | `str \| None` | Number of ratings, e.g. `"47K"` |
|
|
184
|
+
| `popularity` | `str \| None` | IMDBx popularity rank |
|
|
185
|
+
| `tags` | `list[str]` | Genre and style tags |
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
## CLI
|
|
190
|
+
|
|
191
|
+
After installing, the `imdbx` command is available system-wide:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
imdbx tt7441658 # Black Clover, all seasons
|
|
195
|
+
imdbx tt0903747 # Breaking Bad
|
|
196
|
+
imdbx tt7441658 --seasons 1 2 # specific seasons only
|
|
197
|
+
imdbx tt7441658 --output out.json # save results to a JSON file
|
|
198
|
+
imdbx tt7441658 --download-images # also download cover art to ./images/
|
|
199
|
+
imdbx tt7441658 --images-dir ~/pics # download cover art to a custom directory
|
|
200
|
+
imdbx tt7441658 --meta-only # series metadata only — no browser needed
|
|
201
|
+
imdbx --load out.json # display a previously saved JSON file
|
|
202
|
+
imdbx tt7441658 --pool-size 8 # increase concurrency for faster scraping
|
|
203
|
+
imdbx tt7441658 --debug # verbose output: HTTP, browser events, timing
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### `--test` — testing mode
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
# Run the full offline unit-test suite (no network required)
|
|
210
|
+
imdbx --test
|
|
211
|
+
|
|
212
|
+
# Live end-to-end smoke test — fetches real data and checks key fields
|
|
213
|
+
imdbx --test tt7441658
|
|
214
|
+
imdbx --test tt0903747
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
`--test` alone runs `pytest` on the bundled test suite and exits with pytest's return code (0 = all passed). Requires `pip install "IMDBx[dev]"`.
|
|
218
|
+
|
|
219
|
+
`--test <TITLE_ID>` hits the real IMDBx API, verifies that metadata and episode data are populated correctly, and prints a coloured ✓/✗ summary. Requires a network connection and Playwright/Chromium.
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
## Architecture
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
imdbx/
|
|
227
|
+
├── __init__.py ← public API (title, season, episode, metadata, load)
|
|
228
|
+
├── models.py ← dataclasses (TitleInfo, SeriesMetadata, Episode)
|
|
229
|
+
├── cli.py ← imdbx command — all flags with full help text
|
|
230
|
+
├── _scraper.py ← orchestration: coordinates HTTP + browser + images
|
|
231
|
+
├── _http.py ← niquests connection pool + async image downloader
|
|
232
|
+
├── _browser.py ← Playwright pool + "Show more" expansion detection
|
|
233
|
+
├── _parse.py ← BeautifulSoup parsers (zero hardcoded class names)
|
|
234
|
+
├── _display.py ← terminal colour output
|
|
235
|
+
└── _log.py ← shared ANSI colour helpers + debug flag
|
|
236
|
+
tests/
|
|
237
|
+
├── test_models_episode_dataclass.py ← Episode field and repr tests
|
|
238
|
+
├── test_models_series_title_info.py ← TitleInfo + save/load round-trip
|
|
239
|
+
├── test_parse_episode_cards.py ← HTML → Episode parsing
|
|
240
|
+
├── test_parse_series_metadata.py ← HTML → SeriesMetadata parsing
|
|
241
|
+
└── test_http_session_and_load.py ← HTTP session and JSON load tests
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
Three-layer hybrid approach:
|
|
245
|
+
|
|
246
|
+
- **Layer 1** (niquests): lightweight HTTP for metadata, season counts, and image downloads
|
|
247
|
+
- **Layer 2** (Playwright): headless Chromium for JS-rendered episode pages with "Show more" expansion
|
|
248
|
+
- **Layer 3** (async): concurrent cover-image fetching via `niquests.AsyncSession`
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Running tests
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
# Via pytest directly
|
|
256
|
+
pip install "IMDBx[dev]"
|
|
257
|
+
pytest
|
|
258
|
+
|
|
259
|
+
# Via the CLI — same result, no pytest command needed
|
|
260
|
+
imdbx --test
|
|
261
|
+
|
|
262
|
+
# Live smoke test against a real title
|
|
263
|
+
imdbx --test tt7441658
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Contributing
|
|
269
|
+
|
|
270
|
+
1. Fork the repository
|
|
271
|
+
2. Create a feature branch: `git checkout -b feat/my-change`
|
|
272
|
+
3. Install dev dependencies: `pip install -e ".[dev]"`
|
|
273
|
+
4. Make your changes and add tests
|
|
274
|
+
5. Verify everything passes: `imdbx --test`
|
|
275
|
+
6. Open a pull request
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## License
|
|
280
|
+
|
|
281
|
+
MIT — see [LICENSE](LICENSE) for details.
|