PyPI - plexflow - Versions diffs - 0.0.64__py3-none-any.whl - Mend

plexflow 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (256) hide show

plexflow/__init__.py +0 -0
plexflow/__main__.py +15 -0
plexflow/core/.DS_Store +0 -0
plexflow/core/__init__.py +0 -0
plexflow/core/context/__init__.py +0 -0
plexflow/core/context/metadata/__init__.py +0 -0
plexflow/core/context/metadata/context.py +32 -0
plexflow/core/context/metadata/tmdb/__init__.py +0 -0
plexflow/core/context/metadata/tmdb/context.py +45 -0
plexflow/core/context/partial_context.py +46 -0
plexflow/core/context/partials/__init__.py +8 -0
plexflow/core/context/partials/cache.py +16 -0
plexflow/core/context/partials/context.py +12 -0
plexflow/core/context/partials/ids.py +37 -0
plexflow/core/context/partials/movie.py +115 -0
plexflow/core/context/partials/tgx_batch.py +33 -0
plexflow/core/context/partials/tgx_context.py +34 -0
plexflow/core/context/partials/torrents.py +23 -0
plexflow/core/context/partials/watchlist.py +35 -0
plexflow/core/context/plexflow_context.py +29 -0
plexflow/core/context/plexflow_property.py +36 -0
plexflow/core/context/root/__init__.py +0 -0
plexflow/core/context/root/context.py +25 -0
plexflow/core/context/select/__init__.py +0 -0
plexflow/core/context/select/context.py +45 -0
plexflow/core/context/torrent/__init__.py +0 -0
plexflow/core/context/torrent/context.py +43 -0
plexflow/core/context/torrent/tpb/__init__.py +0 -0
plexflow/core/context/torrent/tpb/context.py +45 -0
plexflow/core/context/torrent/yts/__init__.py +0 -0
plexflow/core/context/torrent/yts/context.py +45 -0
plexflow/core/context/watchlist/__init__.py +0 -0
plexflow/core/context/watchlist/context.py +46 -0
plexflow/core/downloads/__init__.py +0 -0
plexflow/core/downloads/candidates/__init__.py +0 -0
plexflow/core/downloads/candidates/download_candidate.py +210 -0
plexflow/core/downloads/candidates/filtered.py +51 -0
plexflow/core/downloads/candidates/utils.py +39 -0
plexflow/core/env/__init__.py +0 -0
plexflow/core/env/env.py +31 -0
plexflow/core/genai/__init__.py +0 -0
plexflow/core/genai/bot.py +9 -0
plexflow/core/genai/plexa.py +54 -0
plexflow/core/genai/torrent/imdb_verify.py +65 -0
plexflow/core/genai/torrent/movie.py +25 -0
plexflow/core/genai/utils/__init__.py +0 -0
plexflow/core/genai/utils/loader.py +5 -0
plexflow/core/metadata/__init__.py +0 -0
plexflow/core/metadata/auto/__init__.py +0 -0
plexflow/core/metadata/auto/auto_meta.py +40 -0
plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
plexflow/core/metadata/providers/__init__.py +0 -0
plexflow/core/metadata/providers/imdb/__init__.py +0 -0
plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
plexflow/core/metadata/providers/imdb/imdb.py +112 -0
plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
plexflow/core/metadata/providers/plex/__init__.py +0 -0
plexflow/core/metadata/providers/plex/datatypes.py +693 -0
plexflow/core/metadata/providers/plex/plex.py +167 -0
plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
plexflow/core/metadata/providers/universal/__init__.py +0 -0
plexflow/core/metadata/providers/universal/movie.py +130 -0
plexflow/core/metadata/providers/universal/old.py +192 -0
plexflow/core/metadata/providers/universal/show.py +107 -0
plexflow/core/plex/__init__.py +0 -0
plexflow/core/plex/api/context/authorized.py +15 -0
plexflow/core/plex/api/context/discover.py +14 -0
plexflow/core/plex/api/context/library.py +14 -0
plexflow/core/plex/discover/__init__.py +0 -0
plexflow/core/plex/discover/activity.py +448 -0
plexflow/core/plex/discover/comment.py +89 -0
plexflow/core/plex/discover/feed.py +11 -0
plexflow/core/plex/hooks/__init__.py +0 -0
plexflow/core/plex/hooks/plex_authorized.py +60 -0
plexflow/core/plex/hooks/plexflow_database.py +6 -0
plexflow/core/plex/library/__init__.py +0 -0
plexflow/core/plex/library/library.py +103 -0
plexflow/core/plex/token/__init__.py +0 -0
plexflow/core/plex/token/auto_token.py +91 -0
plexflow/core/plex/utils/__init__.py +0 -0
plexflow/core/plex/utils/paginated.py +39 -0
plexflow/core/plex/watchlist/__init__.py +0 -0
plexflow/core/plex/watchlist/datatypes.py +124 -0
plexflow/core/plex/watchlist/watchlist.py +23 -0
plexflow/core/storage/__init__.py +0 -0
plexflow/core/storage/object/__init__.py +0 -0
plexflow/core/storage/object/plexflow_storage.py +143 -0
plexflow/core/storage/object/redis_storage.py +169 -0
plexflow/core/subtitles/__init__.py +0 -0
plexflow/core/subtitles/providers/__init__.py +0 -0
plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
plexflow/core/subtitles/providers/oss/__init__.py +0 -0
plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
plexflow/core/subtitles/providers/oss/download.py +48 -0
plexflow/core/subtitles/providers/oss/old.py +144 -0
plexflow/core/subtitles/providers/oss/oss.py +400 -0
plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
plexflow/core/subtitles/providers/oss/search.py +52 -0
plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
plexflow/core/subtitles/results/__init__.py +0 -0
plexflow/core/subtitles/results/subtitle.py +170 -0
plexflow/core/torrents/__init__.py +0 -0
plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
plexflow/core/torrents/analyzers/analyzer.py +45 -0
plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
plexflow/core/torrents/auto/auto_torrents.py +29 -0
plexflow/core/torrents/providers/__init__.py +0 -0
plexflow/core/torrents/providers/ext/__init__.py +0 -0
plexflow/core/torrents/providers/ext/ext.py +18 -0
plexflow/core/torrents/providers/ext/utils.py +64 -0
plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
plexflow/core/torrents/providers/eztv/__init__.py +0 -0
plexflow/core/torrents/providers/eztv/eztv.py +47 -0
plexflow/core/torrents/providers/eztv/utils.py +83 -0
plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
plexflow/core/torrents/providers/snowfl/utils.py +59 -0
plexflow/core/torrents/providers/tgx/__init__.py +0 -0
plexflow/core/torrents/providers/tgx/context.py +50 -0
plexflow/core/torrents/providers/tgx/dump.py +40 -0
plexflow/core/torrents/providers/tgx/tgx.py +22 -0
plexflow/core/torrents/providers/tgx/utils.py +61 -0
plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
plexflow/core/torrents/providers/therarbg/utils.py +61 -0
plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
plexflow/core/torrents/providers/tpb/__init__.py +0 -0
plexflow/core/torrents/providers/tpb/tpb.py +17 -0
plexflow/core/torrents/providers/tpb/utils.py +139 -0
plexflow/core/torrents/providers/yts/__init__.py +0 -0
plexflow/core/torrents/providers/yts/utils.py +57 -0
plexflow/core/torrents/providers/yts/yts.py +31 -0
plexflow/core/torrents/results/__init__.py +0 -0
plexflow/core/torrents/results/torrent.py +165 -0
plexflow/core/torrents/results/universal.py +220 -0
plexflow/core/torrents/results/utils.py +15 -0
plexflow/events/__init__.py +0 -0
plexflow/events/download/__init__.py +0 -0
plexflow/events/download/torrent_events.py +96 -0
plexflow/events/publish/__init__.py +0 -0
plexflow/events/publish/publish.py +34 -0
plexflow/logging/__init__.py +0 -0
plexflow/logging/log_setup.py +8 -0
plexflow/spiders/quiet_logger.py +9 -0
plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
plexflow/spiders/tgx/settings.py +36 -0
plexflow/spiders/tgx/spider.py +72 -0
plexflow/utils/__init__.py +0 -0
plexflow/utils/antibot/human_like_requests.py +122 -0
plexflow/utils/api/__init__.py +0 -0
plexflow/utils/api/context/http.py +62 -0
plexflow/utils/api/rest/__init__.py +0 -0
plexflow/utils/api/rest/antibot_restful.py +68 -0
plexflow/utils/api/rest/restful.py +49 -0
plexflow/utils/captcha/__init__.py +0 -0
plexflow/utils/captcha/bypass/__init__.py +0 -0
plexflow/utils/captcha/bypass/decode_audio.py +34 -0
plexflow/utils/download/__init__.py +0 -0
plexflow/utils/download/gz.py +26 -0
plexflow/utils/filesystem/__init__.py +0 -0
plexflow/utils/filesystem/search.py +129 -0
plexflow/utils/gmail/__init__.py +0 -0
plexflow/utils/gmail/mails.py +116 -0
plexflow/utils/hooks/__init__.py +0 -0
plexflow/utils/hooks/http.py +84 -0
plexflow/utils/hooks/postgresql.py +93 -0
plexflow/utils/hooks/redis.py +112 -0
plexflow/utils/image/storage.py +36 -0
plexflow/utils/imdb/__init__.py +0 -0
plexflow/utils/imdb/imdb_codes.py +107 -0
plexflow/utils/pubsub/consume.py +82 -0
plexflow/utils/pubsub/produce.py +25 -0
plexflow/utils/retry/__init__.py +0 -0
plexflow/utils/retry/utils.py +38 -0
plexflow/utils/strings/__init__.py +0 -0
plexflow/utils/strings/filesize.py +55 -0
plexflow/utils/strings/language.py +14 -0
plexflow/utils/subtitle/search.py +76 -0
plexflow/utils/tasks/decorators.py +78 -0
plexflow/utils/tasks/k8s/task.py +70 -0
plexflow/utils/thread_safe/safe_list.py +54 -0
plexflow/utils/thread_safe/safe_set.py +69 -0
plexflow/utils/torrent/__init__.py +0 -0
plexflow/utils/torrent/analyze.py +118 -0
plexflow/utils/torrent/extract/common.py +37 -0
plexflow/utils/torrent/extract/ext.py +2391 -0
plexflow/utils/torrent/extract/extratorrent.py +56 -0
plexflow/utils/torrent/extract/kat.py +1581 -0
plexflow/utils/torrent/extract/tgx.py +96 -0
plexflow/utils/torrent/extract/therarbg.py +170 -0
plexflow/utils/torrent/extract/torrentquest.py +171 -0
plexflow/utils/torrent/files.py +36 -0
plexflow/utils/torrent/hash.py +90 -0
plexflow/utils/transcribe/__init__.py +0 -0
plexflow/utils/transcribe/speech2text.py +40 -0
plexflow/utils/video/__init__.py +0 -0
plexflow/utils/video/subtitle.py +73 -0
plexflow-0.0.64.dist-info/METADATA +71 -0
plexflow-0.0.64.dist-info/RECORD +256 -0
plexflow-0.0.64.dist-info/WHEEL +4 -0
plexflow-0.0.64.dist-info/entry_points.txt +24 -0

plexflow/core/torrents/results/universal.py ADDED Viewed

@@ -0,0 +1,220 @@
+from typing import List, Optional
+from plexflow.core.subtitles.results.subtitle import Subtitle
+from plexflow.core.torrents.results.torrent import Torrent
+from plexflow.utils.imdb.imdb_codes import IMDbCode
+from collections import defaultdict
+from typing import List, Optional, Set
+class UniversalTorrent:
+    """
+    Represents a universal torrent that contains multiple torrents with the same hash.
+    Attributes:
+        torrents (List[Torrent]): The list of torrents contained in the universal torrent.
+    """
+    def __init__(self, torrents: List[Torrent]):
+        """
+        Initializes a new instance of the UniversalTorrent class.
+        Args:
+            torrents (List[Torrent]): The list of torrents to be included in the universal torrent.
+        Raises:
+            ValueError: If the torrents have different hashes.
+        """
+        hashes = {t.hash for t in torrents}
+        if len(hashes) > 1:
+            raise ValueError("All torrents should have the same hash")
+        self.torrents = torrents
+    @property
+    def imdb_code(self) -> IMDbCode:
+        """
+        Gets the IMDb code of the universal torrent.
+        Returns:
+            IMDbCode: The IMDb code of the universal torrent.
+        """
+        return self.torrents[0].imdb_code
+    @property
+    def is_season_pack(self) -> bool:
+        """
+        Checks if the universal torrent is a season pack.
+        Returns:
+            bool: True if the universal torrent is a season pack, False otherwise.
+        """
+        return any(t.has_multiple_episodes for t in self.torrents)
+    @property
+    def season(self) -> Optional[int]:
+        """
+        Gets the season number of the universal torrent.
+        Returns:
+            Optional[int]: The season number of the universal torrent, or None if not available.
+        """
+        for t in self.torrents:
+            if isinstance(t.season, int):
+                return t.season
+        return None
+    @property
+    def episode(self) -> Optional[int]:
+        """
+        Gets the episode number of the universal torrent.
+        Returns:
+            Optional[int]: The episode number of the universal torrent, or None if not available.
+        """
+        for t in self.torrents:
+            if isinstance(t.episode, int):
+                return t.episode
+        return None
+    @property
+    def max_peers(self) -> int:
+        """
+        Gets the maximum number of peers among all torrents in the universal torrent.
+        Returns:
+            int: The maximum number of peers.
+        """
+        return max(t.peers for t in self.torrents)
+    @property
+    def max_seeds(self) -> int:
+        """
+        Gets the maximum number of seeds among all torrents in the universal torrent.
+        Returns:
+            int: The maximum number of seeds.
+        """
+        return max(t.seeds for t in self.torrents)
+    @property
+    def min_seeds(self) -> int:
+        """
+        Gets the minimum number of seeds among all torrents in the universal torrent.
+        Returns:
+            int: The minimum number of seeds.
+        """
+        return min(t.seeds for t in self.torrents)
+    @property
+    def min_peers(self) -> int:
+        """
+        Gets the minimum number of peers among all torrents in the universal torrent.
+        Returns:
+            int: The minimum number of peers.
+        """
+        return min(t.peers for t in self.torrents)
+    @property
+    def sources(self) -> Set:
+        """
+        Gets the set of sources of the universal torrent.
+        Returns:
+            set: The set of sources.
+        """
+        return {t.source for t in self.torrents}
+    @property
+    def max_size_bytes(self) -> int:
+        """
+        Gets the maximum size in bytes among all torrents in the universal torrent.
+        Returns:
+            int: The maximum size in bytes.
+        """
+        return max(t.size_bytes for t in self.torrents)
+    @property
+    def min_size_bytes(self) -> int:
+        """
+        Gets the minimum size in bytes among all torrents in the universal torrent.
+        Returns:
+            int: The minimum size in bytes.
+        """
+        return min(t.size_bytes for t in self.torrents)
+    @property
+    def has_native_subtitles(self) -> bool:
+        """
+        Checks if the universal torrent has native subtitles.
+        Returns:
+            bool: True if the universal torrent has native subtitles, False otherwise.
+        """
+        return any(t.has_native_subtitles for t in self.torrents)
+    @property
+    def has_native_dutch_subtitles(self) -> bool:
+        """
+        Checks if the universal torrent has native Dutch subtitles.
+        Returns:
+            bool: True if the universal torrent has native Dutch subtitles, False otherwise.
+        """
+        return any(t.has_native_dutch_subtitles for t in self.torrents)
+    @property
+    def has_native_english_subtitles(self) -> bool:
+        """
+        Checks if the universal torrent has native English subtitles.
+        Returns:
+            bool: True if the universal torrent has native English subtitles, False otherwise.
+        """
+        return any(t.has_native_english_subtitles for t in self.torrents)
+    def is_compatible_with(self, s: Subtitle) -> bool:
+        """
+        Checks if the universal torrent is compatible with a given subtitle.
+        Args:
+            s (Subtitle): The subtitle to check compatibility with.
+        Returns:
+            bool: True if the universal torrent is compatible with the subtitle, False otherwise.
+        """
+        return any(s.name == t.release_name or s.encoder == t.encoder_name for t in self.torrents)
+    def __eq__(self, other):
+        """
+        Checks if the universal torrent is equal to another object.
+        Args:
+            other: The object to compare with.
+        Returns:
+            bool: True if the universal torrent is equal to the other object, False otherwise.
+        """
+        if not isinstance(other, UniversalTorrent):
+            return NotImplemented
+        return self.torrents[0].hash == other.torrents[0].hash
+    def __str__(self):
+        """
+        Returns a string representation of the universal torrent.
+        Returns:
+            str: The string representation of the universal torrent.
+        """
+        return f"UniversalTorrent({self.torrents[0].hash})"
+    def __repr__(self):
+        """
+        Returns a string representation of the universal torrent.
+        Returns:
+            str: The string representation of the universal torrent.
+        """
+        return self.__str__()

plexflow/core/torrents/results/utils.py ADDED Viewed

@@ -0,0 +1,15 @@
+from typing import List
+from plexflow.core.torrents.results.torrent import Torrent
+from collections import defaultdict
+from plexflow.core.torrents.results.universal import UniversalTorrent
+def create_universal_torrents(torrents: List[Torrent]) -> List[UniversalTorrent]:
+    """
+    This function creates a list of UniversalTorrents based on a given list of Torrent objects.
+    It groups the Torrent objects by their hash and creates a UniversalTorrent for each group.
+    """
+    torrents_by_hash = defaultdict(list)
+    for torrent in torrents:
+        torrents_by_hash[torrent.hash].append(torrent)
+    return [UniversalTorrent(torrents) for torrents in torrents_by_hash.values()]

plexflow/events/__init__.py ADDED Viewed

File without changes

plexflow/events/download/__init__.py ADDED Viewed

File without changes

plexflow/events/download/torrent_events.py ADDED Viewed

@@ -0,0 +1,96 @@
+from dataclasses import dataclass
+from typing import Dict
+from confluent_kafka.avro import AvroProducer
+from avro.schema import parse
+from avro.io import DatumWriter
+from confluent_kafka.avro import AvroProducer
+from confluent_kafka.schema_registry import SchemaRegistryClient
+from typing import Any
+@dataclass
+class DownloadEvent:
+    """
+    Represents a download event.
+    Attributes:
+        name (str): The name of the download.
+        category (str): The category of the download.
+        tags (str): The tags associated with the download.
+        content_path (str): The path to the content of the download.
+        root_path (str): The root path of the download.
+        save_path (str): The path where the download is saved.
+        total_files (int): The number of files in the download.
+        torrent_size (int): The size of the download in bytes.
+        current_tracker (str): The current tracker of the download.
+        info_hash_v1 (str): The info hash v1 of the download.
+        info_hash_v2 (str): The info hash v2 of the download.
+        torrent_id (str): The ID of the download.
+        finished (bool): Indicates whether the download has finished downloading.
+    """
+    name: str
+    category: str
+    tags: str
+    content_path: str
+    root_path: str
+    save_path: str
+    total_files: int
+    torrent_size: int
+    current_tracker: str
+    info_hash_v1: str
+    info_hash_v2: str
+    torrent_id: str
+    finished: bool
+    def to_dict(self) -> Dict[str, str]:
+        """
+        Converts the DownloadEvent object to a dictionary.
+        Returns:
+            dict: A dictionary representation of the DownloadEvent object.
+        """
+        return {
+            "name": self.name,
+            "category": self.category,
+            "tags": self.tags,
+            "content_path": self.content_path,
+            "root_path": self.root_path,
+            "save_path": self.save_path,
+            "total_files": self.total_files,
+            "torrent_size": self.torrent_size,
+            "current_tracker": self.current_tracker,
+            "info_hash_v1": self.info_hash_v1,
+            "info_hash_v2": self.info_hash_v2,
+            "torrent_id": self.torrent_id,
+            "finished": self.finished
+        }
+def produce_to_topic(bootstrap_servers: str, schema_registry_url: str, topic: str, value: Any, schema_subject: str):
+    """
+    Produces a value to a Kafka topic using Avro serialization and schema validation.
+    Args:
+        bootstrap_servers (str): The list of Kafka bootstrap servers.
+        schema_registry_url (str): The URL of the schema registry.
+        topic (str): The Kafka topic to produce the value to.
+        value (Any): The value to produce.
+        schema_subject (str): The subject of the Avro schema in the schema registry.
+    """
+    # Create a CachedSchemaRegistryClient instance
+    schema_registry_client = SchemaRegistryClient({'url': schema_registry_url})
+    # Get the latest version of the schema for the specified subject
+    schema = schema_registry_client.get_latest_version(schema_subject)
+    avro_schema = parse(schema.schema.schema_str)
+    producer = AvroProducer({
+        'bootstrap.servers': bootstrap_servers,
+        'schema.registry.url': schema_registry_url
+    }, default_value_schema=avro_schema, value_serializer=DatumWriter().write)
+    try:
+        producer.produce(topic=topic, value=value)
+        producer.flush()
+    finally:
+        producer.close()

plexflow/events/publish/__init__.py ADDED Viewed

File without changes

plexflow/events/publish/publish.py ADDED Viewed

@@ -0,0 +1,34 @@
+from avro.schema import parse
+from avro.io import DatumWriter
+from confluent_kafka.avro import AvroProducer
+from confluent_kafka.schema_registry import SchemaRegistryClient
+from typing import Any
+import json
+def produce_to_topic(bootstrap_servers: str, schema_registry_url: str, topic: str, value: Any, schema_subject: str):
+    """
+    Produces a value to a Kafka topic using Avro serialization and schema validation.
+    Args:
+        bootstrap_servers (str): The list of Kafka bootstrap servers.
+        schema_registry_url (str): The URL of the schema registry.
+        topic (str): The Kafka topic to produce the value to.
+        value (Any): The value to produce.
+        schema_subject (str): The subject of the Avro schema in the schema registry.
+    """
+    # Create a CachedSchemaRegistryClient instance
+    schema_registry_client = SchemaRegistryClient({'url': schema_registry_url})
+    # Get the latest version of the schema for the specified subject
+    schema = schema_registry_client.get_latest_version(schema_subject)
+    avro_schema = parse(schema.schema.schema_str)
+    producer = AvroProducer({
+        'bootstrap.servers': bootstrap_servers,
+        'schema.registry.url': schema_registry_url
+    }, default_value_schema=avro_schema)
+    producer.produce(topic=topic, value=value)
+    producer.flush()

plexflow/logging/__init__.py ADDED Viewed

File without changes

plexflow/logging/log_setup.py ADDED Viewed

@@ -0,0 +1,8 @@
+import logging.config
+import yaml
+with open('logging/config.yml', 'r') as f:
+    config = yaml.safe_load(f.read())
+    logging.config.dictConfig(config)
+logger = logging.getLogger("plexflow")

plexflow/spiders/quiet_logger.py ADDED Viewed

@@ -0,0 +1,9 @@
+import scrapy
+class QuietLogFormatter(scrapy.logformatter.LogFormatter):
+    def scraped(self, item, response, spider):
+        return (
+            super().scraped(item, response, spider)
+            if spider.settings.getbool("LOG_SCRAPED_ITEMS")
+            else None
+        )

plexflow/spiders/tgx/pipelines/dump_json_pipeline.py ADDED Viewed

@@ -0,0 +1,30 @@
+# Import necessary modules
+import json
+from pathlib import Path
+class DumpJsonPipeline:
+    def __init__(self):
+        self.data = []
+    def process_item(self, item, spider):
+        # Process each item and add it to the data list
+        self.data.append(dict(item.get("meta", {})))
+        return item
+    def close_spider(self, spider):
+        target_path = Path(spider.dump_folder)
+        tag = spider.tag
+        if isinstance(tag, bytes):
+            tag = tag.decode("utf-8")
+        print("type of tag:", type(tag))
+        print("tag:", tag)
+        json_file_path = target_path / f"{tag}.json"
+        # Create the directory if it doesn't exist
+        target_path.mkdir(exist_ok=True)
+        # Write the data to the JSON file
+        with open(json_file_path, 'w') as json_file:
+            json.dump(self.data, json_file, indent=4)

plexflow/spiders/tgx/pipelines/meta_pipeline.py ADDED Viewed

@@ -0,0 +1,13 @@
+from datetime import datetime
+class MetaPipeline:
+    def process_item(self, item, spider):
+        response = item.get("response", None)
+        meta = item.get("meta", {})
+        deleted = "it has probably been deleted" in response.text
+        date_last_scrape = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        meta = {**meta, "deleted": deleted, "date_last_scrape": date_last_scrape}
+        item["meta"] = meta
+        return item

plexflow/spiders/tgx/pipelines/publish_pipeline.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Import necessary modules
+import json
+from pathlib import Path
+import logging
+class PublishPipeline:
+    def process_item(self, item, spider):
+        meta = item.get("meta", {})
+        if meta is None:
+            logging.info("Meta is None. Skipping...")
+            return item
+        spider.mark_page_as_finished(meta)
+        return item

plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py ADDED Viewed

@@ -0,0 +1,12 @@
+from plexflow.utils.torrent.extract.tgx import extract_torrent_info
+class TorrentInfoPipeline:
+    def process_item(self, item, spider):
+        meta = item.get("meta", {})
+        response = item.get("response", None)
+        info = extract_torrent_info(html_content=response.text)
+        meta = {**meta, **info}
+        item["meta"] = meta
+        return item

plexflow/spiders/tgx/pipelines/validation_pipeline.py ADDED Viewed

@@ -0,0 +1,17 @@
+class ValidationPipeline:
+    def process_item(self, item, spider):
+        response = item.get("response", None)
+        meta = item.get("meta", {})
+        if "it has probably been deleted" not in response.text and "magnet:?xt" not in response.text:
+            print(f"Invalid HTML for number {meta.get('id')}")
+            meta["valid"] = False
+            spider.session_expired = True
+        else:
+            meta["valid"] = True
+        meta["errored"] = False
+        item["meta"] = meta
+        return item

plexflow/spiders/tgx/settings.py ADDED Viewed

@@ -0,0 +1,36 @@
+# User-agent settings
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
+# Obey robots.txt rules (set to False during development)
+ROBOTSTXT_OBEY = False
+# Configure pipelines (enable or disable as needed)
+ITEM_PIPELINES = {
+    "plexflow.spiders.tgx.pipelines.validation_pipeline.ValidationPipeline": 100,
+    "plexflow.spiders.tgx.pipelines.torrent_info_pipeline.TorrentInfoPipeline": 600,
+    "plexflow.spiders.tgx.pipelines.meta_pipeline.MetaPipeline": 800,
+    "plexflow.spiders.tgx.pipelines.publish_pipeline.PublishPipeline": 900,
+}
+# Configure logging
+LOG_ENABLED = True
+LOG_LEVEL = "INFO"
+LOG_FORMATTER = "plexflow.spiders.quiet_logger.QuietLogFormatter"
+# LOG_FILE = "scrapy.log"
+# Retry settings
+RETRY_ENABLED = True
+RETRY_TIMES = 5
+RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429]
+# Configure concurrent requests
+CONCURRENT_REQUESTS = 10
+CONCURRENT_REQUESTS_PER_DOMAIN = 10
+# Extend default headers (optional)
+DEFAULT_REQUEST_HEADERS = {
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language": "en",
+}
+DOWNLOAD_TIMEOUT = 15

plexflow/spiders/tgx/spider.py ADDED Viewed

@@ -0,0 +1,72 @@
+import scrapy
+from bs4 import BeautifulSoup
+from scrapy.exceptions import CloseSpider
+from scrapy import signals
+from plexflow.utils.thread_safe.safe_set import ThreadSafeSet
+from plexflow.utils.thread_safe.safe_list import ThreadSafeList
+import logging
+class TgxSpider(scrapy.Spider):
+    name = "tgx_spider"
+    session_expired: bool = False
+    def __init__(self, pages, host='https://torrentgalaxy.to', cookies: dict = None, callback=None):
+        self.pages = set(pages)
+        self.host = host
+        self.cookies = cookies or {}
+        self.original_batch = ThreadSafeSet.from_set(self.pages)
+        self.finished_batch = ThreadSafeList()
+        self.callback = callback
+    @property
+    def finished_ids(self):
+        return set(map(lambda x: x.get("id"), self.finished_batch))
+    @property
+    def finished_items(self):
+        return self.finished_batch.to_list()
+    @property
+    def original_ids(self):
+        return self.original_batch.to_set()
+    @property
+    def unfinished_ids(self):
+        return self.original_batch.difference(self.finished_ids).to_set()
+    @classmethod
+    def from_crawler(cls, crawler, *args, **kwargs):
+        spider = super(TgxSpider, cls).from_crawler(crawler, *args, **kwargs)
+        crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
+        return spider
+    def mark_page_as_finished(self, meta):
+        self.finished_batch.append(meta)
+    def start_requests(self):
+        for page_id in self.pages:
+            yield scrapy.Request(
+                f'{self.host}/torrent/{page_id}',
+                self.parse,
+                meta={'id': page_id},
+                cookies=self.cookies)
+    def parse(self, response):
+        if self.session_expired:
+            raise CloseSpider("Session Expired")
+        soup = BeautifulSoup(response.text, 'html.parser')
+        page_number = response.meta["id"]
+        return {"soup": soup, "valid": True, "response": response, "meta": {"id": page_number}}
+    def spider_closed(self, spider):
+        # Code to run when the spider is closed
+        logging.info(f"Spider {spider.name} closing. Finished scraping {len(self.finished_ids)} pages.")
+        logging.info(f"{len(self.unfinished_ids)} pages were not scraped.")
+        logging.info("Spider closed.")
+        if self.callback:
+            self.callback(self)

plexflow/utils/__init__.py ADDED Viewed

File without changes