plexflow 0.0.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plexflow/__init__.py +0 -0
- plexflow/__main__.py +15 -0
- plexflow/core/.DS_Store +0 -0
- plexflow/core/__init__.py +0 -0
- plexflow/core/context/__init__.py +0 -0
- plexflow/core/context/metadata/__init__.py +0 -0
- plexflow/core/context/metadata/context.py +32 -0
- plexflow/core/context/metadata/tmdb/__init__.py +0 -0
- plexflow/core/context/metadata/tmdb/context.py +45 -0
- plexflow/core/context/partial_context.py +46 -0
- plexflow/core/context/partials/__init__.py +8 -0
- plexflow/core/context/partials/cache.py +16 -0
- plexflow/core/context/partials/context.py +12 -0
- plexflow/core/context/partials/ids.py +37 -0
- plexflow/core/context/partials/movie.py +115 -0
- plexflow/core/context/partials/tgx_batch.py +33 -0
- plexflow/core/context/partials/tgx_context.py +34 -0
- plexflow/core/context/partials/torrents.py +23 -0
- plexflow/core/context/partials/watchlist.py +35 -0
- plexflow/core/context/plexflow_context.py +29 -0
- plexflow/core/context/plexflow_property.py +36 -0
- plexflow/core/context/root/__init__.py +0 -0
- plexflow/core/context/root/context.py +25 -0
- plexflow/core/context/select/__init__.py +0 -0
- plexflow/core/context/select/context.py +45 -0
- plexflow/core/context/torrent/__init__.py +0 -0
- plexflow/core/context/torrent/context.py +43 -0
- plexflow/core/context/torrent/tpb/__init__.py +0 -0
- plexflow/core/context/torrent/tpb/context.py +45 -0
- plexflow/core/context/torrent/yts/__init__.py +0 -0
- plexflow/core/context/torrent/yts/context.py +45 -0
- plexflow/core/context/watchlist/__init__.py +0 -0
- plexflow/core/context/watchlist/context.py +46 -0
- plexflow/core/downloads/__init__.py +0 -0
- plexflow/core/downloads/candidates/__init__.py +0 -0
- plexflow/core/downloads/candidates/download_candidate.py +210 -0
- plexflow/core/downloads/candidates/filtered.py +51 -0
- plexflow/core/downloads/candidates/utils.py +39 -0
- plexflow/core/env/__init__.py +0 -0
- plexflow/core/env/env.py +31 -0
- plexflow/core/genai/__init__.py +0 -0
- plexflow/core/genai/bot.py +9 -0
- plexflow/core/genai/plexa.py +54 -0
- plexflow/core/genai/torrent/imdb_verify.py +65 -0
- plexflow/core/genai/torrent/movie.py +25 -0
- plexflow/core/genai/utils/__init__.py +0 -0
- plexflow/core/genai/utils/loader.py +5 -0
- plexflow/core/metadata/__init__.py +0 -0
- plexflow/core/metadata/auto/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_meta.py +40 -0
- plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
- plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
- plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
- plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
- plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
- plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
- plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
- plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
- plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
- plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
- plexflow/core/metadata/providers/__init__.py +0 -0
- plexflow/core/metadata/providers/imdb/__init__.py +0 -0
- plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
- plexflow/core/metadata/providers/imdb/imdb.py +112 -0
- plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
- plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
- plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
- plexflow/core/metadata/providers/plex/__init__.py +0 -0
- plexflow/core/metadata/providers/plex/datatypes.py +693 -0
- plexflow/core/metadata/providers/plex/plex.py +167 -0
- plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
- plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
- plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
- plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
- plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
- plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
- plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
- plexflow/core/metadata/providers/universal/__init__.py +0 -0
- plexflow/core/metadata/providers/universal/movie.py +130 -0
- plexflow/core/metadata/providers/universal/old.py +192 -0
- plexflow/core/metadata/providers/universal/show.py +107 -0
- plexflow/core/plex/__init__.py +0 -0
- plexflow/core/plex/api/context/authorized.py +15 -0
- plexflow/core/plex/api/context/discover.py +14 -0
- plexflow/core/plex/api/context/library.py +14 -0
- plexflow/core/plex/discover/__init__.py +0 -0
- plexflow/core/plex/discover/activity.py +448 -0
- plexflow/core/plex/discover/comment.py +89 -0
- plexflow/core/plex/discover/feed.py +11 -0
- plexflow/core/plex/hooks/__init__.py +0 -0
- plexflow/core/plex/hooks/plex_authorized.py +60 -0
- plexflow/core/plex/hooks/plexflow_database.py +6 -0
- plexflow/core/plex/library/__init__.py +0 -0
- plexflow/core/plex/library/library.py +103 -0
- plexflow/core/plex/token/__init__.py +0 -0
- plexflow/core/plex/token/auto_token.py +91 -0
- plexflow/core/plex/utils/__init__.py +0 -0
- plexflow/core/plex/utils/paginated.py +39 -0
- plexflow/core/plex/watchlist/__init__.py +0 -0
- plexflow/core/plex/watchlist/datatypes.py +124 -0
- plexflow/core/plex/watchlist/watchlist.py +23 -0
- plexflow/core/storage/__init__.py +0 -0
- plexflow/core/storage/object/__init__.py +0 -0
- plexflow/core/storage/object/plexflow_storage.py +143 -0
- plexflow/core/storage/object/redis_storage.py +169 -0
- plexflow/core/subtitles/__init__.py +0 -0
- plexflow/core/subtitles/providers/__init__.py +0 -0
- plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
- plexflow/core/subtitles/providers/oss/__init__.py +0 -0
- plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
- plexflow/core/subtitles/providers/oss/download.py +48 -0
- plexflow/core/subtitles/providers/oss/old.py +144 -0
- plexflow/core/subtitles/providers/oss/oss.py +400 -0
- plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
- plexflow/core/subtitles/providers/oss/search.py +52 -0
- plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
- plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
- plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
- plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
- plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
- plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
- plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
- plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
- plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
- plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
- plexflow/core/subtitles/results/__init__.py +0 -0
- plexflow/core/subtitles/results/subtitle.py +170 -0
- plexflow/core/torrents/__init__.py +0 -0
- plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
- plexflow/core/torrents/analyzers/analyzer.py +45 -0
- plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
- plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
- plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
- plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
- plexflow/core/torrents/auto/auto_torrents.py +29 -0
- plexflow/core/torrents/providers/__init__.py +0 -0
- plexflow/core/torrents/providers/ext/__init__.py +0 -0
- plexflow/core/torrents/providers/ext/ext.py +18 -0
- plexflow/core/torrents/providers/ext/utils.py +64 -0
- plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
- plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
- plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
- plexflow/core/torrents/providers/eztv/__init__.py +0 -0
- plexflow/core/torrents/providers/eztv/eztv.py +47 -0
- plexflow/core/torrents/providers/eztv/utils.py +83 -0
- plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
- plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
- plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
- plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
- plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
- plexflow/core/torrents/providers/snowfl/utils.py +59 -0
- plexflow/core/torrents/providers/tgx/__init__.py +0 -0
- plexflow/core/torrents/providers/tgx/context.py +50 -0
- plexflow/core/torrents/providers/tgx/dump.py +40 -0
- plexflow/core/torrents/providers/tgx/tgx.py +22 -0
- plexflow/core/torrents/providers/tgx/utils.py +61 -0
- plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
- plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
- plexflow/core/torrents/providers/therarbg/utils.py +61 -0
- plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
- plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
- plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
- plexflow/core/torrents/providers/tpb/__init__.py +0 -0
- plexflow/core/torrents/providers/tpb/tpb.py +17 -0
- plexflow/core/torrents/providers/tpb/utils.py +139 -0
- plexflow/core/torrents/providers/yts/__init__.py +0 -0
- plexflow/core/torrents/providers/yts/utils.py +57 -0
- plexflow/core/torrents/providers/yts/yts.py +31 -0
- plexflow/core/torrents/results/__init__.py +0 -0
- plexflow/core/torrents/results/torrent.py +165 -0
- plexflow/core/torrents/results/universal.py +220 -0
- plexflow/core/torrents/results/utils.py +15 -0
- plexflow/events/__init__.py +0 -0
- plexflow/events/download/__init__.py +0 -0
- plexflow/events/download/torrent_events.py +96 -0
- plexflow/events/publish/__init__.py +0 -0
- plexflow/events/publish/publish.py +34 -0
- plexflow/logging/__init__.py +0 -0
- plexflow/logging/log_setup.py +8 -0
- plexflow/spiders/quiet_logger.py +9 -0
- plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
- plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
- plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
- plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
- plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
- plexflow/spiders/tgx/settings.py +36 -0
- plexflow/spiders/tgx/spider.py +72 -0
- plexflow/utils/__init__.py +0 -0
- plexflow/utils/antibot/human_like_requests.py +122 -0
- plexflow/utils/api/__init__.py +0 -0
- plexflow/utils/api/context/http.py +62 -0
- plexflow/utils/api/rest/__init__.py +0 -0
- plexflow/utils/api/rest/antibot_restful.py +68 -0
- plexflow/utils/api/rest/restful.py +49 -0
- plexflow/utils/captcha/__init__.py +0 -0
- plexflow/utils/captcha/bypass/__init__.py +0 -0
- plexflow/utils/captcha/bypass/decode_audio.py +34 -0
- plexflow/utils/download/__init__.py +0 -0
- plexflow/utils/download/gz.py +26 -0
- plexflow/utils/filesystem/__init__.py +0 -0
- plexflow/utils/filesystem/search.py +129 -0
- plexflow/utils/gmail/__init__.py +0 -0
- plexflow/utils/gmail/mails.py +116 -0
- plexflow/utils/hooks/__init__.py +0 -0
- plexflow/utils/hooks/http.py +84 -0
- plexflow/utils/hooks/postgresql.py +93 -0
- plexflow/utils/hooks/redis.py +112 -0
- plexflow/utils/image/storage.py +36 -0
- plexflow/utils/imdb/__init__.py +0 -0
- plexflow/utils/imdb/imdb_codes.py +107 -0
- plexflow/utils/pubsub/consume.py +82 -0
- plexflow/utils/pubsub/produce.py +25 -0
- plexflow/utils/retry/__init__.py +0 -0
- plexflow/utils/retry/utils.py +38 -0
- plexflow/utils/strings/__init__.py +0 -0
- plexflow/utils/strings/filesize.py +55 -0
- plexflow/utils/strings/language.py +14 -0
- plexflow/utils/subtitle/search.py +76 -0
- plexflow/utils/tasks/decorators.py +78 -0
- plexflow/utils/tasks/k8s/task.py +70 -0
- plexflow/utils/thread_safe/safe_list.py +54 -0
- plexflow/utils/thread_safe/safe_set.py +69 -0
- plexflow/utils/torrent/__init__.py +0 -0
- plexflow/utils/torrent/analyze.py +118 -0
- plexflow/utils/torrent/extract/common.py +37 -0
- plexflow/utils/torrent/extract/ext.py +2391 -0
- plexflow/utils/torrent/extract/extratorrent.py +56 -0
- plexflow/utils/torrent/extract/kat.py +1581 -0
- plexflow/utils/torrent/extract/tgx.py +96 -0
- plexflow/utils/torrent/extract/therarbg.py +170 -0
- plexflow/utils/torrent/extract/torrentquest.py +171 -0
- plexflow/utils/torrent/files.py +36 -0
- plexflow/utils/torrent/hash.py +90 -0
- plexflow/utils/transcribe/__init__.py +0 -0
- plexflow/utils/transcribe/speech2text.py +40 -0
- plexflow/utils/video/__init__.py +0 -0
- plexflow/utils/video/subtitle.py +73 -0
- plexflow-0.0.64.dist-info/METADATA +71 -0
- plexflow-0.0.64.dist-info/RECORD +256 -0
- plexflow-0.0.64.dist-info/WHEEL +4 -0
- plexflow-0.0.64.dist-info/entry_points.txt +24 -0
@@ -0,0 +1,96 @@
|
|
1
|
+
import re
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from plexflow.utils.subtitle.search import SubtitleSearcher
|
4
|
+
from plexflow.utils.imdb.imdb_codes import extract_imdb_code
|
5
|
+
|
6
|
+
def extract_torrent_info(html_content):
|
7
|
+
"""Extracts torrent information from the provided HTML content,
|
8
|
+
searching for IMDb ID pattern in links and plain text.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
html_content (str): The HTML content of the webpage.
|
12
|
+
|
13
|
+
Returns:
|
14
|
+
dict: A dictionary containing the extracted information with
|
15
|
+
lowercase keys, or None if the information is not found.
|
16
|
+
"""
|
17
|
+
|
18
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
19
|
+
text = soup.get_text(separator='\n')
|
20
|
+
|
21
|
+
info = {}
|
22
|
+
|
23
|
+
# release_name
|
24
|
+
match = re.search(r"Torrent details for \"(.*?)\"", text)
|
25
|
+
info["release_name"] = match.group(1).lower() if match else None
|
26
|
+
|
27
|
+
# category and subcategory
|
28
|
+
match = re.search(
|
29
|
+
r"Category:\s*(.*?)\s*>\s*(.*?)$", text, re.MULTILINE
|
30
|
+
)
|
31
|
+
info["category"] = match.group(1).strip().lower() if match else None
|
32
|
+
info["subcategory"] = match.group(2).strip().lower() if match else None
|
33
|
+
|
34
|
+
# language
|
35
|
+
match = re.search(r"Language:\s*(.*?)$", text, re.MULTILINE)
|
36
|
+
info["language"] = match.group(1).strip().lower() if match else None
|
37
|
+
|
38
|
+
# total_size (Handling different units)
|
39
|
+
match = re.search(r"Total Size:\s*([\d.]+)\s*(GB|MB|KB)", text, re.IGNORECASE)
|
40
|
+
if match:
|
41
|
+
size_value = float(match.group(1))
|
42
|
+
size_unit = match.group(2).upper()
|
43
|
+
if size_unit == "GB":
|
44
|
+
info["total_size"] = size_value * 1024 * 1024 * 1024
|
45
|
+
elif size_unit == "MB":
|
46
|
+
info["total_size"] = size_value * 1024 * 1024
|
47
|
+
elif size_unit == "KB":
|
48
|
+
info["total_size"] = size_value * 1024
|
49
|
+
else:
|
50
|
+
info["total_size"] = None
|
51
|
+
|
52
|
+
# hash
|
53
|
+
match = re.search(r"Info Hash:\s*(.*?)$", text, re.MULTILINE)
|
54
|
+
info["hash"] = match.group(1).strip().lower() if match else None
|
55
|
+
|
56
|
+
# uploader
|
57
|
+
match = re.search(r"Added By:\s*(.*?)\s*Added", text, re.MULTILINE)
|
58
|
+
info["uploader"] = match.group(1).strip().lower() if match else None
|
59
|
+
|
60
|
+
# date
|
61
|
+
match = re.search(r"Added:\s*(.*?)$", text, re.MULTILINE)
|
62
|
+
info["date"] = match.group(1).strip().lower() if match else None
|
63
|
+
|
64
|
+
# seeds
|
65
|
+
match = re.search(r"Seeds:?\s*(\d+)", text)
|
66
|
+
info["seeds"] = int(match.group(1)) if match else None
|
67
|
+
|
68
|
+
# peers
|
69
|
+
match = re.search(r"Leechers:?\s*(\d+)", text)
|
70
|
+
info["peers"] = int(match.group(1)) if match else None
|
71
|
+
|
72
|
+
# imdb_id
|
73
|
+
imdb_id = next(extract_imdb_code(html_content), None)
|
74
|
+
info["imdb_id"] = imdb_id
|
75
|
+
|
76
|
+
# description (Extract up to the comments section)
|
77
|
+
match = re.search(
|
78
|
+
r"Description(.*?)User comments", text, re.IGNORECASE | re.DOTALL
|
79
|
+
)
|
80
|
+
info["description"] = match.group(1).strip().lower() if match else None
|
81
|
+
|
82
|
+
# magnet_url
|
83
|
+
match = re.search(r"magnet:\?xt=urn:btih:[a-z0-9]+[^'\"]+", html_content, re.IGNORECASE)
|
84
|
+
info["magnet_url"] = match.group(0) if match else None
|
85
|
+
|
86
|
+
searcher = SubtitleSearcher(hint_words={
|
87
|
+
"english",
|
88
|
+
"eng",
|
89
|
+
"dutch",
|
90
|
+
"dut"
|
91
|
+
})
|
92
|
+
|
93
|
+
subtitles = searcher.search_subtitles(text)
|
94
|
+
info["subtitles"] = subtitles
|
95
|
+
|
96
|
+
return info
|
@@ -0,0 +1,170 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
from urllib.parse import urljoin
|
3
|
+
import dateparser
|
4
|
+
from plexflow.utils.strings.filesize import parse_size
|
5
|
+
from plexflow.utils.imdb.imdb_codes import extract_imdb_code
|
6
|
+
import re
|
7
|
+
from plexflow.utils.torrent.files import TorrentFile
|
8
|
+
from plexflow.utils.torrent.extract.common import torrent_detail_extract
|
9
|
+
|
10
|
+
@torrent_detail_extract
|
11
|
+
def extract_torrent_details(html_content, **torrent_details):
|
12
|
+
"""Extracts specific torrent details from the given HTML content,
|
13
|
+
with increased robustness against HTML structure changes and case sensitivity.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
html_content: The HTML content of the torrent page.
|
17
|
+
|
18
|
+
Returns:
|
19
|
+
A dictionary containing the extracted torrent details.
|
20
|
+
"""
|
21
|
+
|
22
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
23
|
+
|
24
|
+
# Extract all text from the HTML
|
25
|
+
all_text = soup.get_text()
|
26
|
+
|
27
|
+
# Torrent name
|
28
|
+
torrent_details['release_name'] = re.search(r':\s*([^:]+)\s*:\s*Search', all_text, re.IGNORECASE).group(1).strip()
|
29
|
+
|
30
|
+
# Torrent size (capture only the number)
|
31
|
+
size_text = re.search(r'Size:\s*(\d+\.?\d*\s*\w+)', all_text, re.IGNORECASE)
|
32
|
+
if size_text:
|
33
|
+
torrent_details['torrent_size'] = size_text.group(1).strip()
|
34
|
+
torrent_details["size_bytes"] = next(iter(parse_size(torrent_details['torrent_size'])), None)
|
35
|
+
|
36
|
+
# Peers (Leechers)
|
37
|
+
peers_text = re.search(r'Leechers:\s*(\d+)', all_text, re.IGNORECASE)
|
38
|
+
if peers_text:
|
39
|
+
torrent_details['peers'] = int(peers_text.group(1))
|
40
|
+
|
41
|
+
# Seeds
|
42
|
+
seeds_text = re.search(r'Seeders:\s*(\d+)', all_text, re.IGNORECASE)
|
43
|
+
if seeds_text:
|
44
|
+
torrent_details['seeds'] = int(seeds_text.group(1))
|
45
|
+
|
46
|
+
# Total files
|
47
|
+
files_text = re.search(r'Files:\s*(\d+)', all_text, re.IGNORECASE)
|
48
|
+
if files_text:
|
49
|
+
torrent_details['total_files'] = int(files_text.group(1))
|
50
|
+
|
51
|
+
# Date of upload
|
52
|
+
upload_text = re.search(r'Added:\s*([^,]+)', all_text, re.IGNORECASE)
|
53
|
+
if upload_text:
|
54
|
+
torrent_details['date'] = dateparser.parse(upload_text.group(1).strip()).isoformat()
|
55
|
+
|
56
|
+
# Uploader
|
57
|
+
uploader_text = re.search(r'Uploader:\s*([^<]+)', all_text, re.IGNORECASE)
|
58
|
+
if uploader_text:
|
59
|
+
torrent_details['uploader'] = uploader_text.group(1).strip()
|
60
|
+
|
61
|
+
# Info hash
|
62
|
+
info_hash_text = re.search(r'Info Hash:\s*([\dA-F]+)', all_text, re.IGNORECASE)
|
63
|
+
if info_hash_text:
|
64
|
+
torrent_details['hash'] = info_hash_text.group(1).strip()
|
65
|
+
|
66
|
+
# File list (extract from text using surrounding context)
|
67
|
+
torrent_details['file_list'] = []
|
68
|
+
file_list_start = re.search(r'Files:\s*\d+', all_text, re.IGNORECASE)
|
69
|
+
if file_list_start:
|
70
|
+
file_list_start_index = file_list_start.end()
|
71
|
+
file_list_end = re.search(r'Multiple Quality Available', all_text, re.IGNORECASE)
|
72
|
+
if file_list_end:
|
73
|
+
file_list_end_index = file_list_end.start()
|
74
|
+
file_list_text = all_text[file_list_start_index:file_list_end_index]
|
75
|
+
|
76
|
+
file_entries = file_list_text.strip().splitlines()
|
77
|
+
|
78
|
+
for entry in file_entries:
|
79
|
+
# Extract file name and size
|
80
|
+
name_match = re.search(r'(.+)\s*(\d+\.?\d*\s*(GB|MB|KB|B))', entry, re.IGNORECASE)
|
81
|
+
name = name_match.group(1).strip() if name_match else None
|
82
|
+
size = name_match.group(2).strip() if name_match else None
|
83
|
+
|
84
|
+
torrent_details['file_list'].append(TorrentFile(
|
85
|
+
name=name,
|
86
|
+
size=size,
|
87
|
+
))
|
88
|
+
|
89
|
+
# Category
|
90
|
+
category_text = re.search(r'Category:\s*([^<]+)', all_text, re.IGNORECASE)
|
91
|
+
if category_text:
|
92
|
+
torrent_details['category'] = category_text.group(1).strip()
|
93
|
+
|
94
|
+
return torrent_details
|
95
|
+
|
96
|
+
|
97
|
+
def extract_torrent_results(html):
|
98
|
+
"""Extracts torrent information from HTML, resilient to HTML structure changes.
|
99
|
+
|
100
|
+
Args:
|
101
|
+
html: The HTML content of the page.
|
102
|
+
|
103
|
+
Returns:
|
104
|
+
A list of dictionaries, each containing torrent information:
|
105
|
+
- 'title': The title of the torrent.
|
106
|
+
- 'link': The link to the torrent detail page.
|
107
|
+
- 'category': The category of the torrent.
|
108
|
+
- 'added': The date the torrent was added.
|
109
|
+
- 'size': The size of the torrent.
|
110
|
+
- 'seeders': The number of seeders.
|
111
|
+
- 'leechers': The number of leechers.
|
112
|
+
- 'thumbnail': The URL of the torrent thumbnail (if available).
|
113
|
+
"""
|
114
|
+
|
115
|
+
torrents = []
|
116
|
+
soup = BeautifulSoup(html, 'html.parser')
|
117
|
+
|
118
|
+
# Find all table rows that likely contain torrent information
|
119
|
+
rows = soup.find_all('tr', class_='list-entry')
|
120
|
+
|
121
|
+
for row in rows:
|
122
|
+
torrent = {}
|
123
|
+
|
124
|
+
# Extract data from the cells
|
125
|
+
cells = row.find_all('td')
|
126
|
+
|
127
|
+
# Title (get full title from link href)
|
128
|
+
title_cell = cells[1]
|
129
|
+
title_link = title_cell.find('div', class_='wrapper').find('a', recursive=False)
|
130
|
+
if title_link:
|
131
|
+
# Get the part of the href after the last '/'
|
132
|
+
torrent['link'] = urljoin('https://therarbg.com/', title_link['href'])
|
133
|
+
torrent['name'] = torrent['link'].rstrip('/').split('/')[-1] # Use rsplit for last occurrence
|
134
|
+
else:
|
135
|
+
# If no link is found, get the text of the title cell
|
136
|
+
torrent['name'] = title_cell.text.strip()
|
137
|
+
torrent['link'] = ''
|
138
|
+
|
139
|
+
# lets search in all links of the cell
|
140
|
+
# for an imdb id
|
141
|
+
for link in title_cell.find_all('a'):
|
142
|
+
href = link.get('href')
|
143
|
+
if isinstance(href, str):
|
144
|
+
imdb_code = next(extract_imdb_code(href), None)
|
145
|
+
if isinstance(imdb_code, str):
|
146
|
+
torrent['imdb'] = imdb_code
|
147
|
+
break
|
148
|
+
|
149
|
+
# Category
|
150
|
+
torrent['type'] = cells[2].find('a').text.strip() if cells[2].find('a') else ''
|
151
|
+
|
152
|
+
# Added
|
153
|
+
added_cell = cells[3]
|
154
|
+
torrent['added'] = added_cell.text.strip() if added_cell else ''
|
155
|
+
torrent['date'] = dateparser.parse(torrent['added'])
|
156
|
+
|
157
|
+
# Size
|
158
|
+
size_cell = cells[5]
|
159
|
+
torrent['size'] = size_cell.text.strip() if size_cell else ''
|
160
|
+
torrent['size_bytes'] = next(iter(parse_size(torrent['size'])), None)
|
161
|
+
|
162
|
+
# Seeders and Leechers
|
163
|
+
seeders_cell = cells[6]
|
164
|
+
torrent['seeds'] = int(seeders_cell.text.strip()) if seeders_cell else 0
|
165
|
+
leechers_cell = cells[7]
|
166
|
+
torrent['peers'] = int(leechers_cell.text.strip()) if leechers_cell else 0
|
167
|
+
|
168
|
+
torrents.append(torrent)
|
169
|
+
|
170
|
+
return torrents
|
@@ -0,0 +1,171 @@
|
|
1
|
+
from bs4 import BeautifulSoup
|
2
|
+
import dateparser
|
3
|
+
from plexflow.utils.strings.filesize import parse_size
|
4
|
+
import re
|
5
|
+
from plexflow.utils.strings.filesize import parse_size
|
6
|
+
from plexflow.utils.torrent.files import TorrentFile
|
7
|
+
from plexflow.utils.torrent.extract.common import torrent_detail_extract
|
8
|
+
|
9
|
+
@torrent_detail_extract
|
10
|
+
def extract_torrent_details(html_content, **torrent_details):
|
11
|
+
"""Extracts specific torrent details from the given HTML content,
|
12
|
+
with increased robustness against HTML structure changes and case sensitivity.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
html_content: The HTML content of the torrent page.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
A dictionary containing the extracted torrent details.
|
19
|
+
"""
|
20
|
+
|
21
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
22
|
+
|
23
|
+
# Torrent name
|
24
|
+
# Extract the text from the 'header-content' class
|
25
|
+
torrent_details['release_name'] = soup.find('div', class_='header-content').get_text().strip()
|
26
|
+
|
27
|
+
# Extract all text from the HTML
|
28
|
+
all_text = soup.get_text()
|
29
|
+
|
30
|
+
# Torrent size (capture only the number)
|
31
|
+
size_text = re.search(r'Total Size:\s*(\d+\.?\d*\s*\w+)', all_text, re.IGNORECASE)
|
32
|
+
if size_text:
|
33
|
+
torrent_details['torrent_size'] = size_text.group(1).strip()
|
34
|
+
torrent_details["size_bytes"] = next(iter(parse_size(torrent_details['torrent_size'])), None)
|
35
|
+
|
36
|
+
# Peers (Leechers)
|
37
|
+
peers_text = re.search(r'Leechers:\s*\d+', all_text, re.IGNORECASE)
|
38
|
+
if peers_text:
|
39
|
+
torrent_details['peers'] = int(peers_text.group(0).split()[-1])
|
40
|
+
|
41
|
+
# Seeds
|
42
|
+
seeds_text = re.search(r'Seeders:\s*\d+', all_text, re.IGNORECASE)
|
43
|
+
if seeds_text:
|
44
|
+
torrent_details['seeds'] = int(seeds_text.group(0).split()[-1])
|
45
|
+
|
46
|
+
# Total files
|
47
|
+
files_text = re.search(r'Total Files:\s*\d+', all_text, re.IGNORECASE)
|
48
|
+
if files_text:
|
49
|
+
torrent_details['total_files'] = int(files_text.group(0).split()[-1])
|
50
|
+
|
51
|
+
# Date of upload
|
52
|
+
upload_text = re.search(r'Uploaded:\s*\d{1,2}-\w{3}-\d{4}', all_text, re.IGNORECASE)
|
53
|
+
if upload_text:
|
54
|
+
torrent_details['date'] = upload_text.group(0).split()[-1]
|
55
|
+
|
56
|
+
# Uploader
|
57
|
+
uploader_text = re.search(r'Uploader:\s*\w+', all_text, re.IGNORECASE)
|
58
|
+
if uploader_text:
|
59
|
+
torrent_details['uploader'] = uploader_text.group(0).split()[-1]
|
60
|
+
|
61
|
+
# Info hash
|
62
|
+
info_hash_text = re.search(r'Info Hash:\s*[\dA-F]+', all_text, re.IGNORECASE)
|
63
|
+
if info_hash_text:
|
64
|
+
torrent_details['hash'] = info_hash_text.group(0).split()[-1]
|
65
|
+
|
66
|
+
# File list (extract from text using surrounding context)
|
67
|
+
torrent_details['file_list'] = []
|
68
|
+
file_list_start = re.search(r'File List Information', all_text, re.IGNORECASE)
|
69
|
+
if file_list_start:
|
70
|
+
file_list_start_index = file_list_start.end()
|
71
|
+
file_list_end = re.search(r'Related Downloads|Help Downloading', all_text, re.IGNORECASE)
|
72
|
+
if file_list_end:
|
73
|
+
file_list_end_index = file_list_end.start()
|
74
|
+
file_list_text = all_text[file_list_start_index:file_list_end_index]
|
75
|
+
|
76
|
+
file_entries = file_list_text.strip().splitlines()
|
77
|
+
|
78
|
+
for entry in file_entries:
|
79
|
+
# Extract file name and size
|
80
|
+
name_match = re.search(r'(.+)\s*\(\d+\.?\d*\s*(GB|MB|KB|B)\)', entry, re.IGNORECASE)
|
81
|
+
# the size is always at the end, so lets search from the end
|
82
|
+
size_matches = re.findall(r'\(([^)]+)\)', entry, re.IGNORECASE)
|
83
|
+
print(size_matches)
|
84
|
+
|
85
|
+
name = name_match.group(1).strip() if name_match else None
|
86
|
+
if len(size_matches) > 0:
|
87
|
+
size = size_matches[-1].strip()
|
88
|
+
else:
|
89
|
+
size = None
|
90
|
+
|
91
|
+
torrent_details['file_list'].append(TorrentFile(
|
92
|
+
name=name,
|
93
|
+
size=size,
|
94
|
+
))
|
95
|
+
|
96
|
+
# Category
|
97
|
+
category_text = re.search(r'Category:\s*\w+', all_text, re.IGNORECASE)
|
98
|
+
if category_text:
|
99
|
+
torrent_details['category'] = category_text.group(0).split()[-1]
|
100
|
+
|
101
|
+
return torrent_details
|
102
|
+
|
103
|
+
|
104
|
+
def extract_torrent_results(html):
|
105
|
+
"""
|
106
|
+
Extracts torrent results from HTML content, resilient to changes in HTML structure.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
html: The HTML content as a string.
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
A list of dictionaries, each representing a torrent result with keys:
|
113
|
+
- 'download_name': The name of the torrent.
|
114
|
+
- 'magnet_link': The magnet link for the torrent.
|
115
|
+
- 'age': The age of the torrent.
|
116
|
+
- 'torrent_type': The type of the torrent (e.g., Movie, Game, etc.).
|
117
|
+
- 'files': The number of files in the torrent.
|
118
|
+
- 'size': The size of the torrent.
|
119
|
+
- 'seeders': The number of seeders for the torrent.
|
120
|
+
- 'leechers': The number of leechers for the torrent.
|
121
|
+
"""
|
122
|
+
|
123
|
+
soup = BeautifulSoup(html, 'html.parser')
|
124
|
+
torrent_results = []
|
125
|
+
|
126
|
+
# Find all 'a' tags with 'magnet' in the href attribute
|
127
|
+
magnet_links = soup.find_all('a', href=lambda href: 'magnet' in href)
|
128
|
+
|
129
|
+
# Iterate over each magnet link
|
130
|
+
for magnet_link in magnet_links:
|
131
|
+
torrent_result = {'magnet_link': magnet_link['href']}
|
132
|
+
|
133
|
+
# Find the parent 'tr' (table row) of the magnet link
|
134
|
+
parent_row = magnet_link.find_parent('tr')
|
135
|
+
if parent_row:
|
136
|
+
cols = parent_row.find_all('td')
|
137
|
+
|
138
|
+
# Extract data from columns based on their position
|
139
|
+
# (assuming consistent layout within the table row)
|
140
|
+
if len(cols) >= 8:
|
141
|
+
torrent_result['name'] = cols[1].find('a').text.strip()
|
142
|
+
# get link of name
|
143
|
+
link = cols[1].find('a')['href']
|
144
|
+
# make it a full link
|
145
|
+
torrent_result['link'] = f"https://torrentquest.com{link}"
|
146
|
+
age = cols[2].text.strip()
|
147
|
+
torrent_result['age'] = age
|
148
|
+
if isinstance(age, str):
|
149
|
+
age_str = f"{age} ago"
|
150
|
+
date = dateparser.parse(age_str)
|
151
|
+
torrent_result['date'] = date
|
152
|
+
else:
|
153
|
+
torrent_result['date'] = None
|
154
|
+
torrent_result['type'] = cols[3].text.strip().lower()
|
155
|
+
torrent_result['files'] = cols[4].text.strip()
|
156
|
+
size_human = cols[5].text.strip()
|
157
|
+
torrent_result['size'] = size_human
|
158
|
+
|
159
|
+
if isinstance(size_human, str):
|
160
|
+
sizes = parse_size(size_human)
|
161
|
+
if sizes:
|
162
|
+
torrent_result['size_bytes'] = sizes[0]
|
163
|
+
else:
|
164
|
+
torrent_result['size_bytes'] = None
|
165
|
+
|
166
|
+
torrent_result['seeds'] = cols[6].text.strip()
|
167
|
+
torrent_result['peers'] = cols[7].text.strip()
|
168
|
+
|
169
|
+
torrent_results.append(torrent_result)
|
170
|
+
|
171
|
+
return torrent_results
|
@@ -0,0 +1,36 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
from plexflow.utils.strings.filesize import parse_size
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Optional
|
5
|
+
|
6
|
+
class TorrentFile(BaseModel):
|
7
|
+
name: Optional[str]
|
8
|
+
size: Optional[str]
|
9
|
+
|
10
|
+
@property
|
11
|
+
def size_bytes(self):
|
12
|
+
return next(iter(parse_size(self.size)), None) if self.size else None
|
13
|
+
|
14
|
+
@property
|
15
|
+
def size_human(self):
|
16
|
+
return self.size
|
17
|
+
|
18
|
+
@property
|
19
|
+
def extension(self):
|
20
|
+
return Path(self.name).suffix.lstrip('.') if self.name else None
|
21
|
+
|
22
|
+
def __str__(self) -> str:
|
23
|
+
return f"{self.name} [({self.size_human})][{self.extension}][{self.size_bytes} bytes]"
|
24
|
+
|
25
|
+
def __repr__(self) -> str:
|
26
|
+
return self.__str__()
|
27
|
+
|
28
|
+
class TorrentSubtitle(BaseModel):
|
29
|
+
language: Optional[str]
|
30
|
+
name: Optional[str]
|
31
|
+
|
32
|
+
def __str__(self) -> str:
|
33
|
+
return f"{self.language} - {self.name}"
|
34
|
+
|
35
|
+
def __repr__(self) -> str:
|
36
|
+
return self.__str__()
|
@@ -0,0 +1,90 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
def extract_hash(s: str):
|
4
|
+
"""
|
5
|
+
Extract all potential SHA-1 hashes from an arbitrary string.
|
6
|
+
|
7
|
+
This function uses a generator to lazily yield each potential hash as it is found,
|
8
|
+
which can be more memory-efficient than returning a list of all hashes, especially
|
9
|
+
for large input strings with many potential hashes.
|
10
|
+
|
11
|
+
Parameters:
|
12
|
+
s (str): The string from which to extract the potential hashes.
|
13
|
+
|
14
|
+
Yields:
|
15
|
+
str: Each extracted potential hash.
|
16
|
+
|
17
|
+
Examples:
|
18
|
+
>>> list(extract_hash('Here is a SHA-1 hash: 5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8'))
|
19
|
+
['5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8']
|
20
|
+
|
21
|
+
>>> list(extract_hash('No hashes here!'))
|
22
|
+
[]
|
23
|
+
"""
|
24
|
+
# Regular expression pattern for a SHA-1 hash
|
25
|
+
pattern = r'\b[A-Fa-f0-9]{40}\b'
|
26
|
+
|
27
|
+
# Find all matches of the pattern in the input string
|
28
|
+
for match in re.finditer(pattern, s):
|
29
|
+
# Yield each match
|
30
|
+
yield match.group(0)
|
31
|
+
|
32
|
+
|
33
|
+
def extract_torrent_hash(magnet: str):
|
34
|
+
"""
|
35
|
+
Extract the torrent hash from a magnet link.
|
36
|
+
|
37
|
+
This function calls the `extract_hash` function to find potential SHA-1 hashes in the magnet link.
|
38
|
+
The torrent hash is typically found after 'xt=urn:btih:' in the magnet link.
|
39
|
+
|
40
|
+
Parameters:
|
41
|
+
magnet (str): The magnet link from which to extract the torrent hash.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
str: The extracted torrent hash, or None if no hash was found.
|
45
|
+
|
46
|
+
Examples:
|
47
|
+
>>> extract_torrent_hash('magnet:?xt=urn:btih:5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8')
|
48
|
+
'5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8'
|
49
|
+
|
50
|
+
>>> extract_torrent_hash('No hashes here!')
|
51
|
+
None
|
52
|
+
"""
|
53
|
+
# Find the start of the torrent hash in the magnet link
|
54
|
+
start = magnet.find('xt=urn:btih:') + len('xt=urn:btih:')
|
55
|
+
|
56
|
+
# Extract the potential hashes from the substring starting at the start index
|
57
|
+
hashes = extract_hash(magnet[start:])
|
58
|
+
|
59
|
+
# Return the first hash found, or None if no hash was found
|
60
|
+
return next(hashes, None)
|
61
|
+
|
62
|
+
|
63
|
+
def extract_magnet(text: str):
|
64
|
+
"""
|
65
|
+
Extract magnet links from an arbitrary string.
|
66
|
+
|
67
|
+
This function uses a generator to lazily yield each magnet link as it is found,
|
68
|
+
which can be more memory-efficient than returning a list of all links, especially
|
69
|
+
for large input strings with many potential links.
|
70
|
+
|
71
|
+
Parameters:
|
72
|
+
text (str): The string from which to extract the magnet links.
|
73
|
+
|
74
|
+
Yields:
|
75
|
+
str: Each extracted magnet link.
|
76
|
+
|
77
|
+
Examples:
|
78
|
+
>>> list(extract_magnet('Here is a magnet link: magnet:?xt=urn:btih:5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8'))
|
79
|
+
['magnet:?xt=urn:btih:5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8']
|
80
|
+
|
81
|
+
>>> list(extract_magnet('No magnet links here!'))
|
82
|
+
[]
|
83
|
+
"""
|
84
|
+
# Regular expression pattern for a magnet link
|
85
|
+
pattern = r'magnet:\?xt=urn:btih:[A-Fa-f0-9]+'
|
86
|
+
|
87
|
+
# Find all matches of the pattern in the input string
|
88
|
+
for match in re.finditer(pattern, text, re.IGNORECASE):
|
89
|
+
# Yield each match
|
90
|
+
yield match.group(0)
|
File without changes
|
@@ -0,0 +1,40 @@
|
|
1
|
+
import whisper
|
2
|
+
|
3
|
+
def transcribe_audio(file_path, model: str = 'medium'):
|
4
|
+
"""
|
5
|
+
Transcribes an audio file using the Whisper model.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
file_path (str): The path to the audio file to transcribe.
|
9
|
+
|
10
|
+
Returns:
|
11
|
+
str: The transcription of the audio file.
|
12
|
+
|
13
|
+
Raises:
|
14
|
+
FileNotFoundError: If the audio file does not exist.
|
15
|
+
Exception: If there is an error loading the model or transcribing the audio.
|
16
|
+
|
17
|
+
Example:
|
18
|
+
>>> transcribe_audio('path/to/your/audio/file.mp3')
|
19
|
+
'This is the transcribed text from your audio file.'
|
20
|
+
|
21
|
+
Note:
|
22
|
+
This function assumes that you have the Whisper model available locally.
|
23
|
+
"""
|
24
|
+
try:
|
25
|
+
# Check if the file exists
|
26
|
+
with open(file_path, 'rb') as f:
|
27
|
+
pass
|
28
|
+
except FileNotFoundError as e:
|
29
|
+
raise e
|
30
|
+
|
31
|
+
try:
|
32
|
+
# Load the Whisper model
|
33
|
+
model = whisper.load_model(model)
|
34
|
+
|
35
|
+
# Transcribe the audio
|
36
|
+
result = model.transcribe(file_path)
|
37
|
+
|
38
|
+
return result["text"]
|
39
|
+
except Exception as e:
|
40
|
+
raise e
|
File without changes
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import subprocess
|
2
|
+
import json
|
3
|
+
from pydantic import BaseModel
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
class SubtitleStream(BaseModel):
|
7
|
+
index: int
|
8
|
+
lang: Optional[str]
|
9
|
+
|
10
|
+
class Config:
|
11
|
+
schema_extra = {
|
12
|
+
"example": {
|
13
|
+
"index": 0,
|
14
|
+
"lang": "en"
|
15
|
+
}
|
16
|
+
}
|
17
|
+
|
18
|
+
def get_subtitles(video_path: str) -> List[SubtitleStream]:
|
19
|
+
"""
|
20
|
+
Function to get subtitle streams from a video file.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
video_path (str): The path to the video file.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
List[SubtitleStream]: A list of SubtitleStream objects, each representing a subtitle stream.
|
27
|
+
|
28
|
+
Raises:
|
29
|
+
FileNotFoundError: If the video file does not exist.
|
30
|
+
ValueError: If the output from the command could not be parsed.
|
31
|
+
|
32
|
+
Examples:
|
33
|
+
>>> video_path = "/path/to/your/video.mp4"
|
34
|
+
>>> try:
|
35
|
+
... subtitles = get_subtitles(video_path)
|
36
|
+
... for subtitle in subtitles:
|
37
|
+
... print(f"Subtitle stream index: {subtitle.index}, language: {subtitle.lang}")
|
38
|
+
... except FileNotFoundError:
|
39
|
+
... print(f"The video file {video_path} does not exist.")
|
40
|
+
... except ValueError:
|
41
|
+
... print("There was a problem parsing the command output.")
|
42
|
+
...
|
43
|
+
Subtitle stream index: 0, language: en
|
44
|
+
Subtitle stream index: 1, language: es
|
45
|
+
"""
|
46
|
+
if not os.path.isfile(video_path):
|
47
|
+
raise FileNotFoundError(f"No such file: '{video_path}'")
|
48
|
+
|
49
|
+
command = [
|
50
|
+
'ffprobe',
|
51
|
+
'-v', 'quiet',
|
52
|
+
'-print_format', 'json',
|
53
|
+
'-show_streams',
|
54
|
+
'-select_streams', 's',
|
55
|
+
video_path
|
56
|
+
]
|
57
|
+
|
58
|
+
output = subprocess.run(command, capture_output=True, text=True)
|
59
|
+
|
60
|
+
try:
|
61
|
+
probe = json.loads(output.stdout)
|
62
|
+
except json.JSONDecodeError as e:
|
63
|
+
raise ValueError("Could not parse command output") from e
|
64
|
+
|
65
|
+
subtitle_streams = []
|
66
|
+
for stream in probe['streams']:
|
67
|
+
subtitle_info = SubtitleStream(
|
68
|
+
index=stream['index'],
|
69
|
+
lang=stream['tags']['language'] if 'tags' in stream and 'language' in stream['tags'] else None
|
70
|
+
)
|
71
|
+
subtitle_streams.append(subtitle_info)
|
72
|
+
|
73
|
+
return subtitle_streams
|