plexflow 0.0.64__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plexflow/__init__.py +0 -0
- plexflow/__main__.py +15 -0
- plexflow/core/.DS_Store +0 -0
- plexflow/core/__init__.py +0 -0
- plexflow/core/context/__init__.py +0 -0
- plexflow/core/context/metadata/__init__.py +0 -0
- plexflow/core/context/metadata/context.py +32 -0
- plexflow/core/context/metadata/tmdb/__init__.py +0 -0
- plexflow/core/context/metadata/tmdb/context.py +45 -0
- plexflow/core/context/partial_context.py +46 -0
- plexflow/core/context/partials/__init__.py +8 -0
- plexflow/core/context/partials/cache.py +16 -0
- plexflow/core/context/partials/context.py +12 -0
- plexflow/core/context/partials/ids.py +37 -0
- plexflow/core/context/partials/movie.py +115 -0
- plexflow/core/context/partials/tgx_batch.py +33 -0
- plexflow/core/context/partials/tgx_context.py +34 -0
- plexflow/core/context/partials/torrents.py +23 -0
- plexflow/core/context/partials/watchlist.py +35 -0
- plexflow/core/context/plexflow_context.py +29 -0
- plexflow/core/context/plexflow_property.py +36 -0
- plexflow/core/context/root/__init__.py +0 -0
- plexflow/core/context/root/context.py +25 -0
- plexflow/core/context/select/__init__.py +0 -0
- plexflow/core/context/select/context.py +45 -0
- plexflow/core/context/torrent/__init__.py +0 -0
- plexflow/core/context/torrent/context.py +43 -0
- plexflow/core/context/torrent/tpb/__init__.py +0 -0
- plexflow/core/context/torrent/tpb/context.py +45 -0
- plexflow/core/context/torrent/yts/__init__.py +0 -0
- plexflow/core/context/torrent/yts/context.py +45 -0
- plexflow/core/context/watchlist/__init__.py +0 -0
- plexflow/core/context/watchlist/context.py +46 -0
- plexflow/core/downloads/__init__.py +0 -0
- plexflow/core/downloads/candidates/__init__.py +0 -0
- plexflow/core/downloads/candidates/download_candidate.py +210 -0
- plexflow/core/downloads/candidates/filtered.py +51 -0
- plexflow/core/downloads/candidates/utils.py +39 -0
- plexflow/core/env/__init__.py +0 -0
- plexflow/core/env/env.py +31 -0
- plexflow/core/genai/__init__.py +0 -0
- plexflow/core/genai/bot.py +9 -0
- plexflow/core/genai/plexa.py +54 -0
- plexflow/core/genai/torrent/imdb_verify.py +65 -0
- plexflow/core/genai/torrent/movie.py +25 -0
- plexflow/core/genai/utils/__init__.py +0 -0
- plexflow/core/genai/utils/loader.py +5 -0
- plexflow/core/metadata/__init__.py +0 -0
- plexflow/core/metadata/auto/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_meta.py +40 -0
- plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
- plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
- plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
- plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
- plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
- plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
- plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
- plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
- plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
- plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
- plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
- plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
- plexflow/core/metadata/providers/__init__.py +0 -0
- plexflow/core/metadata/providers/imdb/__init__.py +0 -0
- plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
- plexflow/core/metadata/providers/imdb/imdb.py +112 -0
- plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
- plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
- plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
- plexflow/core/metadata/providers/plex/__init__.py +0 -0
- plexflow/core/metadata/providers/plex/datatypes.py +693 -0
- plexflow/core/metadata/providers/plex/plex.py +167 -0
- plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
- plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
- plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
- plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
- plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
- plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
- plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
- plexflow/core/metadata/providers/universal/__init__.py +0 -0
- plexflow/core/metadata/providers/universal/movie.py +130 -0
- plexflow/core/metadata/providers/universal/old.py +192 -0
- plexflow/core/metadata/providers/universal/show.py +107 -0
- plexflow/core/plex/__init__.py +0 -0
- plexflow/core/plex/api/context/authorized.py +15 -0
- plexflow/core/plex/api/context/discover.py +14 -0
- plexflow/core/plex/api/context/library.py +14 -0
- plexflow/core/plex/discover/__init__.py +0 -0
- plexflow/core/plex/discover/activity.py +448 -0
- plexflow/core/plex/discover/comment.py +89 -0
- plexflow/core/plex/discover/feed.py +11 -0
- plexflow/core/plex/hooks/__init__.py +0 -0
- plexflow/core/plex/hooks/plex_authorized.py +60 -0
- plexflow/core/plex/hooks/plexflow_database.py +6 -0
- plexflow/core/plex/library/__init__.py +0 -0
- plexflow/core/plex/library/library.py +103 -0
- plexflow/core/plex/token/__init__.py +0 -0
- plexflow/core/plex/token/auto_token.py +91 -0
- plexflow/core/plex/utils/__init__.py +0 -0
- plexflow/core/plex/utils/paginated.py +39 -0
- plexflow/core/plex/watchlist/__init__.py +0 -0
- plexflow/core/plex/watchlist/datatypes.py +124 -0
- plexflow/core/plex/watchlist/watchlist.py +23 -0
- plexflow/core/storage/__init__.py +0 -0
- plexflow/core/storage/object/__init__.py +0 -0
- plexflow/core/storage/object/plexflow_storage.py +143 -0
- plexflow/core/storage/object/redis_storage.py +169 -0
- plexflow/core/subtitles/__init__.py +0 -0
- plexflow/core/subtitles/providers/__init__.py +0 -0
- plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
- plexflow/core/subtitles/providers/oss/__init__.py +0 -0
- plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
- plexflow/core/subtitles/providers/oss/download.py +48 -0
- plexflow/core/subtitles/providers/oss/old.py +144 -0
- plexflow/core/subtitles/providers/oss/oss.py +400 -0
- plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
- plexflow/core/subtitles/providers/oss/search.py +52 -0
- plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
- plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
- plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
- plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
- plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
- plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
- plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
- plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
- plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
- plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
- plexflow/core/subtitles/results/__init__.py +0 -0
- plexflow/core/subtitles/results/subtitle.py +170 -0
- plexflow/core/torrents/__init__.py +0 -0
- plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
- plexflow/core/torrents/analyzers/analyzer.py +45 -0
- plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
- plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
- plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
- plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
- plexflow/core/torrents/auto/auto_torrents.py +29 -0
- plexflow/core/torrents/providers/__init__.py +0 -0
- plexflow/core/torrents/providers/ext/__init__.py +0 -0
- plexflow/core/torrents/providers/ext/ext.py +18 -0
- plexflow/core/torrents/providers/ext/utils.py +64 -0
- plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
- plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
- plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
- plexflow/core/torrents/providers/eztv/__init__.py +0 -0
- plexflow/core/torrents/providers/eztv/eztv.py +47 -0
- plexflow/core/torrents/providers/eztv/utils.py +83 -0
- plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
- plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
- plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
- plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
- plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
- plexflow/core/torrents/providers/snowfl/utils.py +59 -0
- plexflow/core/torrents/providers/tgx/__init__.py +0 -0
- plexflow/core/torrents/providers/tgx/context.py +50 -0
- plexflow/core/torrents/providers/tgx/dump.py +40 -0
- plexflow/core/torrents/providers/tgx/tgx.py +22 -0
- plexflow/core/torrents/providers/tgx/utils.py +61 -0
- plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
- plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
- plexflow/core/torrents/providers/therarbg/utils.py +61 -0
- plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
- plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
- plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
- plexflow/core/torrents/providers/tpb/__init__.py +0 -0
- plexflow/core/torrents/providers/tpb/tpb.py +17 -0
- plexflow/core/torrents/providers/tpb/utils.py +139 -0
- plexflow/core/torrents/providers/yts/__init__.py +0 -0
- plexflow/core/torrents/providers/yts/utils.py +57 -0
- plexflow/core/torrents/providers/yts/yts.py +31 -0
- plexflow/core/torrents/results/__init__.py +0 -0
- plexflow/core/torrents/results/torrent.py +165 -0
- plexflow/core/torrents/results/universal.py +220 -0
- plexflow/core/torrents/results/utils.py +15 -0
- plexflow/events/__init__.py +0 -0
- plexflow/events/download/__init__.py +0 -0
- plexflow/events/download/torrent_events.py +96 -0
- plexflow/events/publish/__init__.py +0 -0
- plexflow/events/publish/publish.py +34 -0
- plexflow/logging/__init__.py +0 -0
- plexflow/logging/log_setup.py +8 -0
- plexflow/spiders/quiet_logger.py +9 -0
- plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
- plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
- plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
- plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
- plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
- plexflow/spiders/tgx/settings.py +36 -0
- plexflow/spiders/tgx/spider.py +72 -0
- plexflow/utils/__init__.py +0 -0
- plexflow/utils/antibot/human_like_requests.py +122 -0
- plexflow/utils/api/__init__.py +0 -0
- plexflow/utils/api/context/http.py +62 -0
- plexflow/utils/api/rest/__init__.py +0 -0
- plexflow/utils/api/rest/antibot_restful.py +68 -0
- plexflow/utils/api/rest/restful.py +49 -0
- plexflow/utils/captcha/__init__.py +0 -0
- plexflow/utils/captcha/bypass/__init__.py +0 -0
- plexflow/utils/captcha/bypass/decode_audio.py +34 -0
- plexflow/utils/download/__init__.py +0 -0
- plexflow/utils/download/gz.py +26 -0
- plexflow/utils/filesystem/__init__.py +0 -0
- plexflow/utils/filesystem/search.py +129 -0
- plexflow/utils/gmail/__init__.py +0 -0
- plexflow/utils/gmail/mails.py +116 -0
- plexflow/utils/hooks/__init__.py +0 -0
- plexflow/utils/hooks/http.py +84 -0
- plexflow/utils/hooks/postgresql.py +93 -0
- plexflow/utils/hooks/redis.py +112 -0
- plexflow/utils/image/storage.py +36 -0
- plexflow/utils/imdb/__init__.py +0 -0
- plexflow/utils/imdb/imdb_codes.py +107 -0
- plexflow/utils/pubsub/consume.py +82 -0
- plexflow/utils/pubsub/produce.py +25 -0
- plexflow/utils/retry/__init__.py +0 -0
- plexflow/utils/retry/utils.py +38 -0
- plexflow/utils/strings/__init__.py +0 -0
- plexflow/utils/strings/filesize.py +55 -0
- plexflow/utils/strings/language.py +14 -0
- plexflow/utils/subtitle/search.py +76 -0
- plexflow/utils/tasks/decorators.py +78 -0
- plexflow/utils/tasks/k8s/task.py +70 -0
- plexflow/utils/thread_safe/safe_list.py +54 -0
- plexflow/utils/thread_safe/safe_set.py +69 -0
- plexflow/utils/torrent/__init__.py +0 -0
- plexflow/utils/torrent/analyze.py +118 -0
- plexflow/utils/torrent/extract/common.py +37 -0
- plexflow/utils/torrent/extract/ext.py +2391 -0
- plexflow/utils/torrent/extract/extratorrent.py +56 -0
- plexflow/utils/torrent/extract/kat.py +1581 -0
- plexflow/utils/torrent/extract/tgx.py +96 -0
- plexflow/utils/torrent/extract/therarbg.py +170 -0
- plexflow/utils/torrent/extract/torrentquest.py +171 -0
- plexflow/utils/torrent/files.py +36 -0
- plexflow/utils/torrent/hash.py +90 -0
- plexflow/utils/transcribe/__init__.py +0 -0
- plexflow/utils/transcribe/speech2text.py +40 -0
- plexflow/utils/video/__init__.py +0 -0
- plexflow/utils/video/subtitle.py +73 -0
- plexflow-0.0.64.dist-info/METADATA +71 -0
- plexflow-0.0.64.dist-info/RECORD +256 -0
- plexflow-0.0.64.dist-info/WHEEL +4 -0
- plexflow-0.0.64.dist-info/entry_points.txt +24 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
import re
|
2
|
+
|
3
|
+
class SubtitleSearcher:
|
4
|
+
"""
|
5
|
+
A class used to search for subtitles in a text.
|
6
|
+
|
7
|
+
...
|
8
|
+
|
9
|
+
Attributes
|
10
|
+
----------
|
11
|
+
hint_words : set
|
12
|
+
a set of words to be used as hints in the subtitle search
|
13
|
+
|
14
|
+
Methods
|
15
|
+
-------
|
16
|
+
compile_subtitle_hint_pattern(hint_word):
|
17
|
+
Returns a compiled regular expression pattern based on the provided hint word.
|
18
|
+
search_subtitles(text):
|
19
|
+
Searches for subtitles in the provided text and returns a list of matches.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self, hint_words):
|
23
|
+
"""
|
24
|
+
Constructs all the necessary attributes for the SubtitleSearcher object.
|
25
|
+
|
26
|
+
Parameters
|
27
|
+
----------
|
28
|
+
hint_words : set
|
29
|
+
a set of words to be used as hints in the subtitle search
|
30
|
+
"""
|
31
|
+
self.hint_words = hint_words
|
32
|
+
|
33
|
+
def compile_subtitle_hint_pattern(self, hint_word):
|
34
|
+
"""
|
35
|
+
Returns a compiled regular expression pattern based on the provided hint word.
|
36
|
+
|
37
|
+
The pattern searches for the hint word preceded by 'text' or 'subtitles' and
|
38
|
+
followed by any characters. The search is case-insensitive and includes newline
|
39
|
+
characters.
|
40
|
+
|
41
|
+
Parameters
|
42
|
+
----------
|
43
|
+
hint_word : str
|
44
|
+
a word to be used as a hint in the subtitle search
|
45
|
+
|
46
|
+
Returns
|
47
|
+
-------
|
48
|
+
pattern : re.Pattern
|
49
|
+
a compiled regular expression pattern
|
50
|
+
"""
|
51
|
+
pattern = re.compile(rf'\b(text|subtitles?)\b(.*?)\b({hint_word})\b', re.IGNORECASE | re.DOTALL | re.UNICODE)
|
52
|
+
return pattern
|
53
|
+
|
54
|
+
def search_subtitles(self, text):
|
55
|
+
"""
|
56
|
+
Searches for subtitles in the provided text and returns a list of matches.
|
57
|
+
|
58
|
+
The search is performed using the hint words provided during the object
|
59
|
+
initialization. Each hint word is used to compile a regular expression pattern,
|
60
|
+
which is then used to search the text.
|
61
|
+
|
62
|
+
Parameters
|
63
|
+
----------
|
64
|
+
text : str
|
65
|
+
the text to be searched
|
66
|
+
|
67
|
+
Returns
|
68
|
+
-------
|
69
|
+
matches : list
|
70
|
+
a list of matches found in the text
|
71
|
+
"""
|
72
|
+
matches = []
|
73
|
+
for hint_word in self.hint_words:
|
74
|
+
pattern = self.compile_subtitle_hint_pattern(hint_word)
|
75
|
+
matches.extend([m.group(3) for m in re.finditer(pattern, text)])
|
76
|
+
return ",".join(set([m.strip().lower() for m in matches]))
|
@@ -0,0 +1,78 @@
|
|
1
|
+
import inspect
|
2
|
+
from plexflow.core.context.partial_context import PartialContext
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import json
|
6
|
+
|
7
|
+
def plexflow(task_func):
|
8
|
+
def wrapper(*args, **kwargs):
|
9
|
+
task_mode = os.getenv("TASK_MODE", "default")
|
10
|
+
default_ttl = 3600
|
11
|
+
|
12
|
+
if task_mode == "k8s":
|
13
|
+
dag_run_id = os.getenv("AIRFLOW_RUN_ID", None)
|
14
|
+
# setup logging level
|
15
|
+
logging.basicConfig(level=logging.INFO)
|
16
|
+
|
17
|
+
# Loop over environment variables and look for
|
18
|
+
# variables that start with ARG_ or KW_ARG_
|
19
|
+
# These are the extra arguments and keyword arguments
|
20
|
+
# that are passed to the task
|
21
|
+
# The ARG_ variables are passed as positional arguments
|
22
|
+
# and must be sorted from ARG_0 to ARG_N
|
23
|
+
# The KW_ARG_ variables are passed as keyword arguments
|
24
|
+
# and are ordered arbitrarily
|
25
|
+
extra_args = []
|
26
|
+
extra_kwargs = {}
|
27
|
+
# lets first sort the env variables by the name of the variable
|
28
|
+
sorted_env = sorted(os.environ.items(), key=lambda x: x[0])
|
29
|
+
logging.info(f"Sorted env: {sorted_env}")
|
30
|
+
|
31
|
+
for key, value in sorted_env:
|
32
|
+
if key.startswith("ARG_"):
|
33
|
+
extra_args.append(value)
|
34
|
+
elif key.startswith("KW_ARG_"):
|
35
|
+
arg = json.loads(value)
|
36
|
+
extra_kwargs[arg["key"]] = arg["value"]
|
37
|
+
|
38
|
+
logging.info(f"Extra args: {extra_args}")
|
39
|
+
logging.info(f"Extra kwargs: {extra_kwargs}")
|
40
|
+
|
41
|
+
# Now lets update the args and kwargs with the extra arguments
|
42
|
+
args = list(args) + extra_args
|
43
|
+
kwargs.update(extra_kwargs)
|
44
|
+
else:
|
45
|
+
context = kwargs.get('ti', None)
|
46
|
+
dag_run_id = context.run_id
|
47
|
+
|
48
|
+
context_id = PartialContext.create_universal_id(dag_run_id)
|
49
|
+
|
50
|
+
logging.info(f"Task mode: {task_mode}")
|
51
|
+
logging.info(f"Context id: {context_id}")
|
52
|
+
logging.info(f"Dag run id: {dag_run_id}")
|
53
|
+
logging.info(f"Default TTL: {default_ttl}")
|
54
|
+
logging.info(f"Universal id: {context_id}")
|
55
|
+
|
56
|
+
sig = inspect.signature(task_func)
|
57
|
+
logging.info(f"Function signature: {sig.parameters}")
|
58
|
+
|
59
|
+
func_kwargs = {}
|
60
|
+
pos_arg_index = 0
|
61
|
+
|
62
|
+
for param_name, param in sig.parameters.items():
|
63
|
+
arg_type = param.annotation
|
64
|
+
|
65
|
+
if param_name in kwargs:
|
66
|
+
func_kwargs[param_name] = kwargs[param_name]
|
67
|
+
elif issubclass(arg_type, PartialContext):
|
68
|
+
# check if arg_type is subclass of PartialContext
|
69
|
+
# Create an instance of the class
|
70
|
+
func_kwargs[param_name] = arg_type(context_id=context_id, dag_run_id=dag_run_id, default_ttl=default_ttl)
|
71
|
+
else:
|
72
|
+
func_kwargs[param_name] = args[pos_arg_index]
|
73
|
+
pos_arg_index += 1
|
74
|
+
|
75
|
+
result = task_func(**func_kwargs)
|
76
|
+
|
77
|
+
return result
|
78
|
+
return wrapper
|
@@ -0,0 +1,70 @@
|
|
1
|
+
import json
|
2
|
+
import os
|
3
|
+
from airflow.providers.cncf.kubernetes.operators.pod import KubernetesPodOperator
|
4
|
+
from kubernetes.client import models as k8s
|
5
|
+
|
6
|
+
class PlexflowK8sOperator(KubernetesPodOperator):
|
7
|
+
def __init__(self, *args, **kwargs):
|
8
|
+
self.extra_args = args
|
9
|
+
self.extra_kwargs = kwargs.pop('extra_kwargs', {})
|
10
|
+
|
11
|
+
# Define default volumes and volume mounts
|
12
|
+
self.default_volumes = [
|
13
|
+
k8s.V1Volume(
|
14
|
+
name='dags-volume',
|
15
|
+
persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='ssd-sync-dags-pvc'),
|
16
|
+
),
|
17
|
+
k8s.V1Volume(
|
18
|
+
name='logs-volume',
|
19
|
+
persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='ssd-airflow-logs-pvc')
|
20
|
+
)
|
21
|
+
]
|
22
|
+
self.default_volume_mounts = [
|
23
|
+
k8s.V1VolumeMount(
|
24
|
+
name='dags-volume',
|
25
|
+
mount_path='/dags',
|
26
|
+
sub_path='dags',
|
27
|
+
),
|
28
|
+
k8s.V1VolumeMount(
|
29
|
+
name='logs-volume',
|
30
|
+
mount_path='/opt/airflow/logs'
|
31
|
+
)
|
32
|
+
]
|
33
|
+
|
34
|
+
# Add default volumes and volume mounts to the operator
|
35
|
+
if 'volumes' in kwargs:
|
36
|
+
kwargs['volumes'].extend(self.default_volumes)
|
37
|
+
else:
|
38
|
+
kwargs['volumes'] = self.default_volumes
|
39
|
+
|
40
|
+
if 'volume_mounts' in kwargs:
|
41
|
+
kwargs['volume_mounts'].extend(self.default_volume_mounts)
|
42
|
+
else:
|
43
|
+
kwargs['volume_mounts'] = self.default_volume_mounts
|
44
|
+
|
45
|
+
super().__init__(*args, **kwargs)
|
46
|
+
|
47
|
+
def execute(self, context):
|
48
|
+
labels = self._generate_labels(context)
|
49
|
+
env_vars = self._convert_labels_to_env_vars(labels)
|
50
|
+
self.env_vars.extend(env_vars)
|
51
|
+
super().execute(context)
|
52
|
+
|
53
|
+
def _generate_labels(self, context):
|
54
|
+
ti = context['ti']
|
55
|
+
labels = {
|
56
|
+
'TASK_MODE': 'k8s',
|
57
|
+
'AIRFLOW_DAG_ID': ti.dag_id,
|
58
|
+
'AIRFLOW_TASK_ID': ti.task_id,
|
59
|
+
'AIRFLOW_RUN_ID': context['run_id'],
|
60
|
+
'AIRFLOW_TRY_NUMBER': str(ti.try_number),
|
61
|
+
'AIRFLOW_MAX_TRIES': str(1 + ti.max_tries),
|
62
|
+
**{key.upper(): value for key, value in os.environ.items()},
|
63
|
+
**{f"ARG_{i:05d}": str(arg) for i, arg in enumerate(self.extra_args)},
|
64
|
+
**{f"KW_ARG_{i:05d}": json.dumps({"key": key, "value": value}) for i, (key, value) in enumerate(self.extra_kwargs.items())},
|
65
|
+
}
|
66
|
+
return labels
|
67
|
+
|
68
|
+
def _convert_labels_to_env_vars(self, labels):
|
69
|
+
# Converts labels to Kubernetes environment variables
|
70
|
+
return [k8s.V1EnvVar(name=k, value=v) for k, v in labels.items()]
|
@@ -0,0 +1,54 @@
|
|
1
|
+
import threading
|
2
|
+
|
3
|
+
class ThreadSafeList:
|
4
|
+
def __init__(self):
|
5
|
+
self._list = []
|
6
|
+
self._lock = threading.Lock()
|
7
|
+
|
8
|
+
def append(self, item):
|
9
|
+
with self._lock:
|
10
|
+
self._list.append(item)
|
11
|
+
|
12
|
+
def remove(self, item):
|
13
|
+
with self._lock:
|
14
|
+
self._list.remove(item)
|
15
|
+
|
16
|
+
def __contains__(self, item):
|
17
|
+
with self._lock:
|
18
|
+
return item in self._list
|
19
|
+
|
20
|
+
def __getitem__(self, index):
|
21
|
+
with self._lock:
|
22
|
+
return self._list[index]
|
23
|
+
|
24
|
+
def __setitem__(self, index, value):
|
25
|
+
with self._lock:
|
26
|
+
self._list[index] = value
|
27
|
+
|
28
|
+
def __delitem__(self, index):
|
29
|
+
with self._lock:
|
30
|
+
del self._list[index]
|
31
|
+
|
32
|
+
def __iter__(self):
|
33
|
+
with self._lock:
|
34
|
+
return iter(self._list.copy())
|
35
|
+
|
36
|
+
def __len__(self):
|
37
|
+
with self._lock:
|
38
|
+
return len(self._list)
|
39
|
+
|
40
|
+
def insert(self, index, item):
|
41
|
+
with self._lock:
|
42
|
+
self._list.insert(index, item)
|
43
|
+
|
44
|
+
def to_list(self):
|
45
|
+
with self._lock:
|
46
|
+
return list(self._list)
|
47
|
+
|
48
|
+
def __str__(self) -> str:
|
49
|
+
with self._lock:
|
50
|
+
return str(self._list)
|
51
|
+
|
52
|
+
def __repr__(self) -> str:
|
53
|
+
with self._lock:
|
54
|
+
return repr(self._list)
|
@@ -0,0 +1,69 @@
|
|
1
|
+
import threading
|
2
|
+
|
3
|
+
class ThreadSafeSet:
|
4
|
+
def __init__(self):
|
5
|
+
self._set = set()
|
6
|
+
self._lock = threading.Lock()
|
7
|
+
|
8
|
+
def add(self, item):
|
9
|
+
with self._lock:
|
10
|
+
self._set.add(item)
|
11
|
+
|
12
|
+
def remove(self, item):
|
13
|
+
with self._lock:
|
14
|
+
self._set.remove(item)
|
15
|
+
|
16
|
+
def __contains__(self, item):
|
17
|
+
with self._lock:
|
18
|
+
return item in self._set
|
19
|
+
|
20
|
+
def __iter__(self):
|
21
|
+
with self._lock:
|
22
|
+
return iter(self._set.copy())
|
23
|
+
|
24
|
+
def __len__(self):
|
25
|
+
with self._lock:
|
26
|
+
return len(self._set)
|
27
|
+
|
28
|
+
def update(self, items):
|
29
|
+
with self._lock:
|
30
|
+
self._set.update(items)
|
31
|
+
|
32
|
+
def difference(self, other_set):
|
33
|
+
with self._lock:
|
34
|
+
if isinstance(other_set, ThreadSafeSet):
|
35
|
+
other_set = other_set._set
|
36
|
+
result = self._set.difference(other_set)
|
37
|
+
return ThreadSafeSet.from_set(result)
|
38
|
+
|
39
|
+
def intersection(self, other_set):
|
40
|
+
with self._lock:
|
41
|
+
if isinstance(other_set, ThreadSafeSet):
|
42
|
+
other_set = other_set._set
|
43
|
+
result = self._set.intersection(other_set)
|
44
|
+
return ThreadSafeSet.from_set(result)
|
45
|
+
|
46
|
+
def union(self, other_set):
|
47
|
+
with self._lock:
|
48
|
+
if isinstance(other_set, ThreadSafeSet):
|
49
|
+
other_set = other_set._set
|
50
|
+
result = self._set.union(other_set)
|
51
|
+
return ThreadSafeSet.from_set(result)
|
52
|
+
|
53
|
+
def to_set(self):
|
54
|
+
with self._lock:
|
55
|
+
return set(self._set)
|
56
|
+
|
57
|
+
def __str__(self) -> str:
|
58
|
+
with self._lock:
|
59
|
+
return str(self._set)
|
60
|
+
|
61
|
+
def __repr__(self) -> str:
|
62
|
+
with self._lock:
|
63
|
+
return repr(self._set)
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def from_set(cls, input_set):
|
67
|
+
instance = cls()
|
68
|
+
instance._set = input_set
|
69
|
+
return instance
|
File without changes
|
@@ -0,0 +1,118 @@
|
|
1
|
+
from plexflow.core.torrents.results.torrent import Torrent
|
2
|
+
from plexflow.utils.imdb.imdb_codes import IMDbCode, extract_imdb_code
|
3
|
+
import requests
|
4
|
+
import logging
|
5
|
+
from plexflow.utils.subtitle.search import SubtitleSearcher
|
6
|
+
from bs4 import BeautifulSoup
|
7
|
+
|
8
|
+
class TorrentReport:
|
9
|
+
def __init__(self, **kwargs) -> None:
|
10
|
+
self._torrent: Torrent = kwargs.get("torrent")
|
11
|
+
self._extracted_imdb_code = kwargs.get("extracted_imdb_code")
|
12
|
+
self.hardcoded = kwargs.get("hardcoded")
|
13
|
+
self.korsub = kwargs.get("korsub")
|
14
|
+
self._subtitles = kwargs.get("subtitles")
|
15
|
+
|
16
|
+
@property
|
17
|
+
def torrent(self) -> Torrent:
|
18
|
+
return self._torrent
|
19
|
+
|
20
|
+
@property
|
21
|
+
def extracted_imdb_code(self) -> str:
|
22
|
+
return self._extracted_imdb_code
|
23
|
+
|
24
|
+
@property
|
25
|
+
def imdb_code_matched(self) -> bool:
|
26
|
+
return IMDbCode(self.torrent.imdb_code) == IMDbCode(self.extracted_imdb_code)
|
27
|
+
|
28
|
+
@property
|
29
|
+
def acceptable_quality(self) -> bool:
|
30
|
+
return self.torrent.parsed_release_name.get("quality", "").upper() not in [
|
31
|
+
"CAM", "TS", "TC", "SCR", "DVDSCR",
|
32
|
+
"SCREENER", "TELESYNC", "TELECINE", "DVDSCREENER",
|
33
|
+
"BDSCR", "WEBSCREENER", "HDCAM",
|
34
|
+
]
|
35
|
+
@property
|
36
|
+
def has_hardcoded_subtitles(self) -> bool:
|
37
|
+
return self.torrent.parsed_release_name.get("hardcoded", False) or self.hardcoded
|
38
|
+
|
39
|
+
@property
|
40
|
+
def has_korsub_subtitles(self) -> bool:
|
41
|
+
return self.korsub
|
42
|
+
|
43
|
+
@property
|
44
|
+
def subtitles(self) -> list:
|
45
|
+
return self._subtitles
|
46
|
+
|
47
|
+
class TorrentInspector:
|
48
|
+
def __init__(self, torrent: Torrent) -> None:
|
49
|
+
self.torrent = torrent
|
50
|
+
|
51
|
+
def inspect(self) -> TorrentReport:
|
52
|
+
report = {
|
53
|
+
"torrent": self.torrent,
|
54
|
+
}
|
55
|
+
|
56
|
+
logging.info(f"Inspecting torrent: {self.torrent}")
|
57
|
+
logging.info(f"Inspecting release name: {self.torrent.release_name}")
|
58
|
+
logging.info(f"Inspecting IMDb code: {self.torrent.imdb_code}")
|
59
|
+
logging.info(f"Inspecting URL: {self.torrent.url}")
|
60
|
+
|
61
|
+
try:
|
62
|
+
url = self.torrent.url
|
63
|
+
if isinstance(url, str) and len(url) > 0:
|
64
|
+
response = requests.get(
|
65
|
+
url=url,
|
66
|
+
headers={
|
67
|
+
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
68
|
+
}
|
69
|
+
)
|
70
|
+
response.raise_for_status()
|
71
|
+
|
72
|
+
logging.info(f"URL status code: {response.status_code}")
|
73
|
+
|
74
|
+
extracted_imdb_id = next(extract_imdb_code(response.text), None)
|
75
|
+
logging.info(f"Extracted IMDb code: {extracted_imdb_id}")
|
76
|
+
|
77
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
78
|
+
|
79
|
+
report["extracted_imdb_code"] = extracted_imdb_id
|
80
|
+
|
81
|
+
# check if torrent has hardcoded subtitles using various alternatives for the word
|
82
|
+
# hardcoded
|
83
|
+
hardcoded = any([
|
84
|
+
"hardcoded" in self.torrent.release_name.lower(),
|
85
|
+
"hardsub" in self.torrent.release_name.lower(),
|
86
|
+
"hardcoded" in response.text.lower(),
|
87
|
+
"hardsub" in response.text.lower(),
|
88
|
+
])
|
89
|
+
|
90
|
+
logging.info(f"Hardcoded subtitles: {hardcoded}")
|
91
|
+
|
92
|
+
# check if torrent has korsub subtitles
|
93
|
+
korsub = any([
|
94
|
+
"korsub" in self.torrent.release_name.lower(),
|
95
|
+
"korsub" in response.text.lower(),
|
96
|
+
])
|
97
|
+
|
98
|
+
logging.info(f"Korsub subtitles: {korsub}")
|
99
|
+
|
100
|
+
report["hardcoded"] = hardcoded
|
101
|
+
report["korsub"] = korsub
|
102
|
+
|
103
|
+
searcher = SubtitleSearcher(hint_words=[
|
104
|
+
"english",
|
105
|
+
"eng",
|
106
|
+
"dutch",
|
107
|
+
"dut",
|
108
|
+
])
|
109
|
+
|
110
|
+
subtitles = searcher.search_subtitles(soup.get_text())
|
111
|
+
logging.info(f"Subtitles found: {subtitles}")
|
112
|
+
report["subtitles"] = subtitles.split(",")
|
113
|
+
else:
|
114
|
+
logging.info("No URL provided for torrent")
|
115
|
+
except Exception as e:
|
116
|
+
logging.error(f"Error while inspecting torrent: {e}")
|
117
|
+
|
118
|
+
return TorrentReport(**report)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from plexflow.utils.subtitle.search import SubtitleSearcher
|
2
|
+
from bs4 import BeautifulSoup
|
3
|
+
from plexflow.utils.imdb.imdb_codes import extract_imdb_code
|
4
|
+
from plexflow.utils.torrent.hash import extract_magnet
|
5
|
+
from plexflow.utils.torrent.files import TorrentSubtitle
|
6
|
+
from plexflow.utils.strings.language import get_language_code
|
7
|
+
|
8
|
+
def torrent_detail_extract(func):
|
9
|
+
def wrapper(html):
|
10
|
+
searcher = SubtitleSearcher(
|
11
|
+
hint_words={
|
12
|
+
"dutch",
|
13
|
+
"dut",
|
14
|
+
"eng",
|
15
|
+
"english",
|
16
|
+
}
|
17
|
+
)
|
18
|
+
|
19
|
+
soup = BeautifulSoup(html, 'html.parser')
|
20
|
+
text = soup.get_text()
|
21
|
+
|
22
|
+
subtitles = searcher.search_subtitles(text)
|
23
|
+
|
24
|
+
imdb_code = next(extract_imdb_code(html), None)
|
25
|
+
|
26
|
+
magnet = next(extract_magnet(text), None)
|
27
|
+
|
28
|
+
details = {
|
29
|
+
"subtitles": [TorrentSubtitle(name=sub, language=get_language_code(sub)) for sub in filter(lambda s: s.strip(), subtitles.split(","))],
|
30
|
+
"imdb_id": imdb_code,
|
31
|
+
"magnet": magnet,
|
32
|
+
}
|
33
|
+
|
34
|
+
# Call the decorated function with the HTML and extracted parts as a dictionary
|
35
|
+
return func(html, **details)
|
36
|
+
|
37
|
+
return wrapper
|