plexflow 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. plexflow/__init__.py +0 -0
  2. plexflow/__main__.py +15 -0
  3. plexflow/core/.DS_Store +0 -0
  4. plexflow/core/__init__.py +0 -0
  5. plexflow/core/context/__init__.py +0 -0
  6. plexflow/core/context/metadata/__init__.py +0 -0
  7. plexflow/core/context/metadata/context.py +32 -0
  8. plexflow/core/context/metadata/tmdb/__init__.py +0 -0
  9. plexflow/core/context/metadata/tmdb/context.py +45 -0
  10. plexflow/core/context/partial_context.py +46 -0
  11. plexflow/core/context/partials/__init__.py +8 -0
  12. plexflow/core/context/partials/cache.py +16 -0
  13. plexflow/core/context/partials/context.py +12 -0
  14. plexflow/core/context/partials/ids.py +37 -0
  15. plexflow/core/context/partials/movie.py +115 -0
  16. plexflow/core/context/partials/tgx_batch.py +33 -0
  17. plexflow/core/context/partials/tgx_context.py +34 -0
  18. plexflow/core/context/partials/torrents.py +23 -0
  19. plexflow/core/context/partials/watchlist.py +35 -0
  20. plexflow/core/context/plexflow_context.py +29 -0
  21. plexflow/core/context/plexflow_property.py +36 -0
  22. plexflow/core/context/root/__init__.py +0 -0
  23. plexflow/core/context/root/context.py +25 -0
  24. plexflow/core/context/select/__init__.py +0 -0
  25. plexflow/core/context/select/context.py +45 -0
  26. plexflow/core/context/torrent/__init__.py +0 -0
  27. plexflow/core/context/torrent/context.py +43 -0
  28. plexflow/core/context/torrent/tpb/__init__.py +0 -0
  29. plexflow/core/context/torrent/tpb/context.py +45 -0
  30. plexflow/core/context/torrent/yts/__init__.py +0 -0
  31. plexflow/core/context/torrent/yts/context.py +45 -0
  32. plexflow/core/context/watchlist/__init__.py +0 -0
  33. plexflow/core/context/watchlist/context.py +46 -0
  34. plexflow/core/downloads/__init__.py +0 -0
  35. plexflow/core/downloads/candidates/__init__.py +0 -0
  36. plexflow/core/downloads/candidates/download_candidate.py +210 -0
  37. plexflow/core/downloads/candidates/filtered.py +51 -0
  38. plexflow/core/downloads/candidates/utils.py +39 -0
  39. plexflow/core/env/__init__.py +0 -0
  40. plexflow/core/env/env.py +31 -0
  41. plexflow/core/genai/__init__.py +0 -0
  42. plexflow/core/genai/bot.py +9 -0
  43. plexflow/core/genai/plexa.py +54 -0
  44. plexflow/core/genai/torrent/imdb_verify.py +65 -0
  45. plexflow/core/genai/torrent/movie.py +25 -0
  46. plexflow/core/genai/utils/__init__.py +0 -0
  47. plexflow/core/genai/utils/loader.py +5 -0
  48. plexflow/core/metadata/__init__.py +0 -0
  49. plexflow/core/metadata/auto/__init__.py +0 -0
  50. plexflow/core/metadata/auto/auto_meta.py +40 -0
  51. plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
  52. plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
  53. plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
  54. plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
  55. plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
  56. plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
  57. plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
  58. plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
  59. plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
  60. plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
  61. plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
  62. plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
  63. plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
  64. plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
  65. plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
  66. plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
  67. plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
  68. plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
  69. plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
  70. plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
  71. plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
  72. plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
  73. plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
  74. plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
  75. plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
  76. plexflow/core/metadata/providers/__init__.py +0 -0
  77. plexflow/core/metadata/providers/imdb/__init__.py +0 -0
  78. plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
  79. plexflow/core/metadata/providers/imdb/imdb.py +112 -0
  80. plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
  81. plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
  82. plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
  83. plexflow/core/metadata/providers/plex/__init__.py +0 -0
  84. plexflow/core/metadata/providers/plex/datatypes.py +693 -0
  85. plexflow/core/metadata/providers/plex/plex.py +167 -0
  86. plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
  87. plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
  88. plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
  89. plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
  90. plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
  91. plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
  92. plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
  93. plexflow/core/metadata/providers/universal/__init__.py +0 -0
  94. plexflow/core/metadata/providers/universal/movie.py +130 -0
  95. plexflow/core/metadata/providers/universal/old.py +192 -0
  96. plexflow/core/metadata/providers/universal/show.py +107 -0
  97. plexflow/core/plex/__init__.py +0 -0
  98. plexflow/core/plex/api/context/authorized.py +15 -0
  99. plexflow/core/plex/api/context/discover.py +14 -0
  100. plexflow/core/plex/api/context/library.py +14 -0
  101. plexflow/core/plex/discover/__init__.py +0 -0
  102. plexflow/core/plex/discover/activity.py +448 -0
  103. plexflow/core/plex/discover/comment.py +89 -0
  104. plexflow/core/plex/discover/feed.py +11 -0
  105. plexflow/core/plex/hooks/__init__.py +0 -0
  106. plexflow/core/plex/hooks/plex_authorized.py +60 -0
  107. plexflow/core/plex/hooks/plexflow_database.py +6 -0
  108. plexflow/core/plex/library/__init__.py +0 -0
  109. plexflow/core/plex/library/library.py +103 -0
  110. plexflow/core/plex/token/__init__.py +0 -0
  111. plexflow/core/plex/token/auto_token.py +91 -0
  112. plexflow/core/plex/utils/__init__.py +0 -0
  113. plexflow/core/plex/utils/paginated.py +39 -0
  114. plexflow/core/plex/watchlist/__init__.py +0 -0
  115. plexflow/core/plex/watchlist/datatypes.py +124 -0
  116. plexflow/core/plex/watchlist/watchlist.py +23 -0
  117. plexflow/core/storage/__init__.py +0 -0
  118. plexflow/core/storage/object/__init__.py +0 -0
  119. plexflow/core/storage/object/plexflow_storage.py +143 -0
  120. plexflow/core/storage/object/redis_storage.py +169 -0
  121. plexflow/core/subtitles/__init__.py +0 -0
  122. plexflow/core/subtitles/providers/__init__.py +0 -0
  123. plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
  124. plexflow/core/subtitles/providers/oss/__init__.py +0 -0
  125. plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
  126. plexflow/core/subtitles/providers/oss/download.py +48 -0
  127. plexflow/core/subtitles/providers/oss/old.py +144 -0
  128. plexflow/core/subtitles/providers/oss/oss.py +400 -0
  129. plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
  130. plexflow/core/subtitles/providers/oss/search.py +52 -0
  131. plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
  132. plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
  133. plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
  134. plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
  135. plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
  136. plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
  137. plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
  138. plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
  139. plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
  140. plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
  141. plexflow/core/subtitles/results/__init__.py +0 -0
  142. plexflow/core/subtitles/results/subtitle.py +170 -0
  143. plexflow/core/torrents/__init__.py +0 -0
  144. plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
  145. plexflow/core/torrents/analyzers/analyzer.py +45 -0
  146. plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
  147. plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
  148. plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
  149. plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
  150. plexflow/core/torrents/auto/auto_torrents.py +29 -0
  151. plexflow/core/torrents/providers/__init__.py +0 -0
  152. plexflow/core/torrents/providers/ext/__init__.py +0 -0
  153. plexflow/core/torrents/providers/ext/ext.py +18 -0
  154. plexflow/core/torrents/providers/ext/utils.py +64 -0
  155. plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
  156. plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
  157. plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
  158. plexflow/core/torrents/providers/eztv/__init__.py +0 -0
  159. plexflow/core/torrents/providers/eztv/eztv.py +47 -0
  160. plexflow/core/torrents/providers/eztv/utils.py +83 -0
  161. plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
  162. plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
  163. plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
  164. plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
  165. plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
  166. plexflow/core/torrents/providers/snowfl/utils.py +59 -0
  167. plexflow/core/torrents/providers/tgx/__init__.py +0 -0
  168. plexflow/core/torrents/providers/tgx/context.py +50 -0
  169. plexflow/core/torrents/providers/tgx/dump.py +40 -0
  170. plexflow/core/torrents/providers/tgx/tgx.py +22 -0
  171. plexflow/core/torrents/providers/tgx/utils.py +61 -0
  172. plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
  173. plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
  174. plexflow/core/torrents/providers/therarbg/utils.py +61 -0
  175. plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
  176. plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
  177. plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
  178. plexflow/core/torrents/providers/tpb/__init__.py +0 -0
  179. plexflow/core/torrents/providers/tpb/tpb.py +17 -0
  180. plexflow/core/torrents/providers/tpb/utils.py +139 -0
  181. plexflow/core/torrents/providers/yts/__init__.py +0 -0
  182. plexflow/core/torrents/providers/yts/utils.py +57 -0
  183. plexflow/core/torrents/providers/yts/yts.py +31 -0
  184. plexflow/core/torrents/results/__init__.py +0 -0
  185. plexflow/core/torrents/results/torrent.py +165 -0
  186. plexflow/core/torrents/results/universal.py +220 -0
  187. plexflow/core/torrents/results/utils.py +15 -0
  188. plexflow/events/__init__.py +0 -0
  189. plexflow/events/download/__init__.py +0 -0
  190. plexflow/events/download/torrent_events.py +96 -0
  191. plexflow/events/publish/__init__.py +0 -0
  192. plexflow/events/publish/publish.py +34 -0
  193. plexflow/logging/__init__.py +0 -0
  194. plexflow/logging/log_setup.py +8 -0
  195. plexflow/spiders/quiet_logger.py +9 -0
  196. plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
  197. plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
  198. plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
  199. plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
  200. plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
  201. plexflow/spiders/tgx/settings.py +36 -0
  202. plexflow/spiders/tgx/spider.py +72 -0
  203. plexflow/utils/__init__.py +0 -0
  204. plexflow/utils/antibot/human_like_requests.py +122 -0
  205. plexflow/utils/api/__init__.py +0 -0
  206. plexflow/utils/api/context/http.py +62 -0
  207. plexflow/utils/api/rest/__init__.py +0 -0
  208. plexflow/utils/api/rest/antibot_restful.py +68 -0
  209. plexflow/utils/api/rest/restful.py +49 -0
  210. plexflow/utils/captcha/__init__.py +0 -0
  211. plexflow/utils/captcha/bypass/__init__.py +0 -0
  212. plexflow/utils/captcha/bypass/decode_audio.py +34 -0
  213. plexflow/utils/download/__init__.py +0 -0
  214. plexflow/utils/download/gz.py +26 -0
  215. plexflow/utils/filesystem/__init__.py +0 -0
  216. plexflow/utils/filesystem/search.py +129 -0
  217. plexflow/utils/gmail/__init__.py +0 -0
  218. plexflow/utils/gmail/mails.py +116 -0
  219. plexflow/utils/hooks/__init__.py +0 -0
  220. plexflow/utils/hooks/http.py +84 -0
  221. plexflow/utils/hooks/postgresql.py +93 -0
  222. plexflow/utils/hooks/redis.py +112 -0
  223. plexflow/utils/image/storage.py +36 -0
  224. plexflow/utils/imdb/__init__.py +0 -0
  225. plexflow/utils/imdb/imdb_codes.py +107 -0
  226. plexflow/utils/pubsub/consume.py +82 -0
  227. plexflow/utils/pubsub/produce.py +25 -0
  228. plexflow/utils/retry/__init__.py +0 -0
  229. plexflow/utils/retry/utils.py +38 -0
  230. plexflow/utils/strings/__init__.py +0 -0
  231. plexflow/utils/strings/filesize.py +55 -0
  232. plexflow/utils/strings/language.py +14 -0
  233. plexflow/utils/subtitle/search.py +76 -0
  234. plexflow/utils/tasks/decorators.py +78 -0
  235. plexflow/utils/tasks/k8s/task.py +70 -0
  236. plexflow/utils/thread_safe/safe_list.py +54 -0
  237. plexflow/utils/thread_safe/safe_set.py +69 -0
  238. plexflow/utils/torrent/__init__.py +0 -0
  239. plexflow/utils/torrent/analyze.py +118 -0
  240. plexflow/utils/torrent/extract/common.py +37 -0
  241. plexflow/utils/torrent/extract/ext.py +2391 -0
  242. plexflow/utils/torrent/extract/extratorrent.py +56 -0
  243. plexflow/utils/torrent/extract/kat.py +1581 -0
  244. plexflow/utils/torrent/extract/tgx.py +96 -0
  245. plexflow/utils/torrent/extract/therarbg.py +170 -0
  246. plexflow/utils/torrent/extract/torrentquest.py +171 -0
  247. plexflow/utils/torrent/files.py +36 -0
  248. plexflow/utils/torrent/hash.py +90 -0
  249. plexflow/utils/transcribe/__init__.py +0 -0
  250. plexflow/utils/transcribe/speech2text.py +40 -0
  251. plexflow/utils/video/__init__.py +0 -0
  252. plexflow/utils/video/subtitle.py +73 -0
  253. plexflow-0.0.64.dist-info/METADATA +71 -0
  254. plexflow-0.0.64.dist-info/RECORD +256 -0
  255. plexflow-0.0.64.dist-info/WHEEL +4 -0
  256. plexflow-0.0.64.dist-info/entry_points.txt +24 -0
@@ -0,0 +1,220 @@
1
+ from typing import List, Optional
2
+ from plexflow.core.subtitles.results.subtitle import Subtitle
3
+ from plexflow.core.torrents.results.torrent import Torrent
4
+ from plexflow.utils.imdb.imdb_codes import IMDbCode
5
+ from collections import defaultdict
6
+
7
+ from typing import List, Optional, Set
8
+
9
+ class UniversalTorrent:
10
+ """
11
+ Represents a universal torrent that contains multiple torrents with the same hash.
12
+
13
+ Attributes:
14
+ torrents (List[Torrent]): The list of torrents contained in the universal torrent.
15
+ """
16
+
17
+ def __init__(self, torrents: List[Torrent]):
18
+ """
19
+ Initializes a new instance of the UniversalTorrent class.
20
+
21
+ Args:
22
+ torrents (List[Torrent]): The list of torrents to be included in the universal torrent.
23
+
24
+ Raises:
25
+ ValueError: If the torrents have different hashes.
26
+ """
27
+ hashes = {t.hash for t in torrents}
28
+ if len(hashes) > 1:
29
+ raise ValueError("All torrents should have the same hash")
30
+ self.torrents = torrents
31
+
32
+ @property
33
+ def imdb_code(self) -> IMDbCode:
34
+ """
35
+ Gets the IMDb code of the universal torrent.
36
+
37
+ Returns:
38
+ IMDbCode: The IMDb code of the universal torrent.
39
+ """
40
+ return self.torrents[0].imdb_code
41
+
42
+ @property
43
+ def is_season_pack(self) -> bool:
44
+ """
45
+ Checks if the universal torrent is a season pack.
46
+
47
+ Returns:
48
+ bool: True if the universal torrent is a season pack, False otherwise.
49
+ """
50
+ return any(t.has_multiple_episodes for t in self.torrents)
51
+
52
+ @property
53
+ def season(self) -> Optional[int]:
54
+ """
55
+ Gets the season number of the universal torrent.
56
+
57
+ Returns:
58
+ Optional[int]: The season number of the universal torrent, or None if not available.
59
+ """
60
+ for t in self.torrents:
61
+ if isinstance(t.season, int):
62
+ return t.season
63
+ return None
64
+
65
+ @property
66
+ def episode(self) -> Optional[int]:
67
+ """
68
+ Gets the episode number of the universal torrent.
69
+
70
+ Returns:
71
+ Optional[int]: The episode number of the universal torrent, or None if not available.
72
+ """
73
+ for t in self.torrents:
74
+ if isinstance(t.episode, int):
75
+ return t.episode
76
+ return None
77
+
78
+ @property
79
+ def max_peers(self) -> int:
80
+ """
81
+ Gets the maximum number of peers among all torrents in the universal torrent.
82
+
83
+ Returns:
84
+ int: The maximum number of peers.
85
+ """
86
+ return max(t.peers for t in self.torrents)
87
+
88
+ @property
89
+ def max_seeds(self) -> int:
90
+ """
91
+ Gets the maximum number of seeds among all torrents in the universal torrent.
92
+
93
+ Returns:
94
+ int: The maximum number of seeds.
95
+ """
96
+ return max(t.seeds for t in self.torrents)
97
+
98
+ @property
99
+ def min_seeds(self) -> int:
100
+ """
101
+ Gets the minimum number of seeds among all torrents in the universal torrent.
102
+
103
+ Returns:
104
+ int: The minimum number of seeds.
105
+ """
106
+ return min(t.seeds for t in self.torrents)
107
+
108
+ @property
109
+ def min_peers(self) -> int:
110
+ """
111
+ Gets the minimum number of peers among all torrents in the universal torrent.
112
+
113
+ Returns:
114
+ int: The minimum number of peers.
115
+ """
116
+ return min(t.peers for t in self.torrents)
117
+
118
+ @property
119
+ def sources(self) -> Set:
120
+ """
121
+ Gets the set of sources of the universal torrent.
122
+
123
+ Returns:
124
+ set: The set of sources.
125
+ """
126
+ return {t.source for t in self.torrents}
127
+
128
+ @property
129
+ def max_size_bytes(self) -> int:
130
+ """
131
+ Gets the maximum size in bytes among all torrents in the universal torrent.
132
+
133
+ Returns:
134
+ int: The maximum size in bytes.
135
+ """
136
+ return max(t.size_bytes for t in self.torrents)
137
+
138
+ @property
139
+ def min_size_bytes(self) -> int:
140
+ """
141
+ Gets the minimum size in bytes among all torrents in the universal torrent.
142
+
143
+ Returns:
144
+ int: The minimum size in bytes.
145
+ """
146
+ return min(t.size_bytes for t in self.torrents)
147
+
148
+ @property
149
+ def has_native_subtitles(self) -> bool:
150
+ """
151
+ Checks if the universal torrent has native subtitles.
152
+
153
+ Returns:
154
+ bool: True if the universal torrent has native subtitles, False otherwise.
155
+ """
156
+ return any(t.has_native_subtitles for t in self.torrents)
157
+
158
+ @property
159
+ def has_native_dutch_subtitles(self) -> bool:
160
+ """
161
+ Checks if the universal torrent has native Dutch subtitles.
162
+
163
+ Returns:
164
+ bool: True if the universal torrent has native Dutch subtitles, False otherwise.
165
+ """
166
+ return any(t.has_native_dutch_subtitles for t in self.torrents)
167
+
168
+ @property
169
+ def has_native_english_subtitles(self) -> bool:
170
+ """
171
+ Checks if the universal torrent has native English subtitles.
172
+
173
+ Returns:
174
+ bool: True if the universal torrent has native English subtitles, False otherwise.
175
+ """
176
+ return any(t.has_native_english_subtitles for t in self.torrents)
177
+
178
+ def is_compatible_with(self, s: Subtitle) -> bool:
179
+ """
180
+ Checks if the universal torrent is compatible with a given subtitle.
181
+
182
+ Args:
183
+ s (Subtitle): The subtitle to check compatibility with.
184
+
185
+ Returns:
186
+ bool: True if the universal torrent is compatible with the subtitle, False otherwise.
187
+ """
188
+ return any(s.name == t.release_name or s.encoder == t.encoder_name for t in self.torrents)
189
+
190
+ def __eq__(self, other):
191
+ """
192
+ Checks if the universal torrent is equal to another object.
193
+
194
+ Args:
195
+ other: The object to compare with.
196
+
197
+ Returns:
198
+ bool: True if the universal torrent is equal to the other object, False otherwise.
199
+ """
200
+ if not isinstance(other, UniversalTorrent):
201
+ return NotImplemented
202
+ return self.torrents[0].hash == other.torrents[0].hash
203
+
204
+ def __str__(self):
205
+ """
206
+ Returns a string representation of the universal torrent.
207
+
208
+ Returns:
209
+ str: The string representation of the universal torrent.
210
+ """
211
+ return f"UniversalTorrent({self.torrents[0].hash})"
212
+
213
+ def __repr__(self):
214
+ """
215
+ Returns a string representation of the universal torrent.
216
+
217
+ Returns:
218
+ str: The string representation of the universal torrent.
219
+ """
220
+ return self.__str__()
@@ -0,0 +1,15 @@
1
+ from typing import List
2
+ from plexflow.core.torrents.results.torrent import Torrent
3
+ from collections import defaultdict
4
+ from plexflow.core.torrents.results.universal import UniversalTorrent
5
+
6
+ def create_universal_torrents(torrents: List[Torrent]) -> List[UniversalTorrent]:
7
+ """
8
+ This function creates a list of UniversalTorrents based on a given list of Torrent objects.
9
+ It groups the Torrent objects by their hash and creates a UniversalTorrent for each group.
10
+ """
11
+ torrents_by_hash = defaultdict(list)
12
+ for torrent in torrents:
13
+ torrents_by_hash[torrent.hash].append(torrent)
14
+
15
+ return [UniversalTorrent(torrents) for torrents in torrents_by_hash.values()]
File without changes
File without changes
@@ -0,0 +1,96 @@
1
+ from dataclasses import dataclass
2
+ from typing import Dict
3
+ from confluent_kafka.avro import AvroProducer
4
+ from avro.schema import parse
5
+ from avro.io import DatumWriter
6
+ from confluent_kafka.avro import AvroProducer
7
+ from confluent_kafka.schema_registry import SchemaRegistryClient
8
+ from typing import Any
9
+
10
+ @dataclass
11
+ class DownloadEvent:
12
+ """
13
+ Represents a download event.
14
+
15
+ Attributes:
16
+ name (str): The name of the download.
17
+ category (str): The category of the download.
18
+ tags (str): The tags associated with the download.
19
+ content_path (str): The path to the content of the download.
20
+ root_path (str): The root path of the download.
21
+ save_path (str): The path where the download is saved.
22
+ total_files (int): The number of files in the download.
23
+ torrent_size (int): The size of the download in bytes.
24
+ current_tracker (str): The current tracker of the download.
25
+ info_hash_v1 (str): The info hash v1 of the download.
26
+ info_hash_v2 (str): The info hash v2 of the download.
27
+ torrent_id (str): The ID of the download.
28
+ finished (bool): Indicates whether the download has finished downloading.
29
+ """
30
+ name: str
31
+ category: str
32
+ tags: str
33
+ content_path: str
34
+ root_path: str
35
+ save_path: str
36
+ total_files: int
37
+ torrent_size: int
38
+ current_tracker: str
39
+ info_hash_v1: str
40
+ info_hash_v2: str
41
+ torrent_id: str
42
+ finished: bool
43
+
44
+ def to_dict(self) -> Dict[str, str]:
45
+ """
46
+ Converts the DownloadEvent object to a dictionary.
47
+
48
+ Returns:
49
+ dict: A dictionary representation of the DownloadEvent object.
50
+ """
51
+ return {
52
+ "name": self.name,
53
+ "category": self.category,
54
+ "tags": self.tags,
55
+ "content_path": self.content_path,
56
+ "root_path": self.root_path,
57
+ "save_path": self.save_path,
58
+ "total_files": self.total_files,
59
+ "torrent_size": self.torrent_size,
60
+ "current_tracker": self.current_tracker,
61
+ "info_hash_v1": self.info_hash_v1,
62
+ "info_hash_v2": self.info_hash_v2,
63
+ "torrent_id": self.torrent_id,
64
+ "finished": self.finished
65
+ }
66
+
67
+ def produce_to_topic(bootstrap_servers: str, schema_registry_url: str, topic: str, value: Any, schema_subject: str):
68
+ """
69
+ Produces a value to a Kafka topic using Avro serialization and schema validation.
70
+
71
+ Args:
72
+ bootstrap_servers (str): The list of Kafka bootstrap servers.
73
+ schema_registry_url (str): The URL of the schema registry.
74
+ topic (str): The Kafka topic to produce the value to.
75
+ value (Any): The value to produce.
76
+ schema_subject (str): The subject of the Avro schema in the schema registry.
77
+ """
78
+
79
+ # Create a CachedSchemaRegistryClient instance
80
+ schema_registry_client = SchemaRegistryClient({'url': schema_registry_url})
81
+
82
+ # Get the latest version of the schema for the specified subject
83
+ schema = schema_registry_client.get_latest_version(schema_subject)
84
+
85
+ avro_schema = parse(schema.schema.schema_str)
86
+
87
+ producer = AvroProducer({
88
+ 'bootstrap.servers': bootstrap_servers,
89
+ 'schema.registry.url': schema_registry_url
90
+ }, default_value_schema=avro_schema, value_serializer=DatumWriter().write)
91
+
92
+ try:
93
+ producer.produce(topic=topic, value=value)
94
+ producer.flush()
95
+ finally:
96
+ producer.close()
File without changes
@@ -0,0 +1,34 @@
1
+ from avro.schema import parse
2
+ from avro.io import DatumWriter
3
+ from confluent_kafka.avro import AvroProducer
4
+ from confluent_kafka.schema_registry import SchemaRegistryClient
5
+ from typing import Any
6
+ import json
7
+
8
+ def produce_to_topic(bootstrap_servers: str, schema_registry_url: str, topic: str, value: Any, schema_subject: str):
9
+ """
10
+ Produces a value to a Kafka topic using Avro serialization and schema validation.
11
+
12
+ Args:
13
+ bootstrap_servers (str): The list of Kafka bootstrap servers.
14
+ schema_registry_url (str): The URL of the schema registry.
15
+ topic (str): The Kafka topic to produce the value to.
16
+ value (Any): The value to produce.
17
+ schema_subject (str): The subject of the Avro schema in the schema registry.
18
+ """
19
+
20
+ # Create a CachedSchemaRegistryClient instance
21
+ schema_registry_client = SchemaRegistryClient({'url': schema_registry_url})
22
+
23
+ # Get the latest version of the schema for the specified subject
24
+ schema = schema_registry_client.get_latest_version(schema_subject)
25
+
26
+ avro_schema = parse(schema.schema.schema_str)
27
+
28
+ producer = AvroProducer({
29
+ 'bootstrap.servers': bootstrap_servers,
30
+ 'schema.registry.url': schema_registry_url
31
+ }, default_value_schema=avro_schema)
32
+
33
+ producer.produce(topic=topic, value=value)
34
+ producer.flush()
File without changes
@@ -0,0 +1,8 @@
1
+ import logging.config
2
+ import yaml
3
+
4
+ with open('logging/config.yml', 'r') as f:
5
+ config = yaml.safe_load(f.read())
6
+ logging.config.dictConfig(config)
7
+
8
+ logger = logging.getLogger("plexflow")
@@ -0,0 +1,9 @@
1
+ import scrapy
2
+
3
+ class QuietLogFormatter(scrapy.logformatter.LogFormatter):
4
+ def scraped(self, item, response, spider):
5
+ return (
6
+ super().scraped(item, response, spider)
7
+ if spider.settings.getbool("LOG_SCRAPED_ITEMS")
8
+ else None
9
+ )
@@ -0,0 +1,30 @@
1
+ # Import necessary modules
2
+ import json
3
+ from pathlib import Path
4
+
5
+ class DumpJsonPipeline:
6
+ def __init__(self):
7
+ self.data = []
8
+
9
+ def process_item(self, item, spider):
10
+ # Process each item and add it to the data list
11
+ self.data.append(dict(item.get("meta", {})))
12
+ return item
13
+
14
+ def close_spider(self, spider):
15
+ target_path = Path(spider.dump_folder)
16
+ tag = spider.tag
17
+ if isinstance(tag, bytes):
18
+ tag = tag.decode("utf-8")
19
+
20
+ print("type of tag:", type(tag))
21
+ print("tag:", tag)
22
+
23
+ json_file_path = target_path / f"{tag}.json"
24
+
25
+ # Create the directory if it doesn't exist
26
+ target_path.mkdir(exist_ok=True)
27
+
28
+ # Write the data to the JSON file
29
+ with open(json_file_path, 'w') as json_file:
30
+ json.dump(self.data, json_file, indent=4)
@@ -0,0 +1,13 @@
1
+ from datetime import datetime
2
+
3
+ class MetaPipeline:
4
+ def process_item(self, item, spider):
5
+ response = item.get("response", None)
6
+ meta = item.get("meta", {})
7
+
8
+ deleted = "it has probably been deleted" in response.text
9
+ date_last_scrape = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
10
+
11
+ meta = {**meta, "deleted": deleted, "date_last_scrape": date_last_scrape}
12
+ item["meta"] = meta
13
+ return item
@@ -0,0 +1,14 @@
1
+ # Import necessary modules
2
+ import json
3
+ from pathlib import Path
4
+ import logging
5
+
6
+ class PublishPipeline:
7
+ def process_item(self, item, spider):
8
+ meta = item.get("meta", {})
9
+ if meta is None:
10
+ logging.info("Meta is None. Skipping...")
11
+ return item
12
+
13
+ spider.mark_page_as_finished(meta)
14
+ return item
@@ -0,0 +1,12 @@
1
+ from plexflow.utils.torrent.extract.tgx import extract_torrent_info
2
+
3
+ class TorrentInfoPipeline:
4
+ def process_item(self, item, spider):
5
+ meta = item.get("meta", {})
6
+ response = item.get("response", None)
7
+
8
+ info = extract_torrent_info(html_content=response.text)
9
+
10
+ meta = {**meta, **info}
11
+ item["meta"] = meta
12
+ return item
@@ -0,0 +1,17 @@
1
+
2
+ class ValidationPipeline:
3
+ def process_item(self, item, spider):
4
+ response = item.get("response", None)
5
+ meta = item.get("meta", {})
6
+
7
+ if "it has probably been deleted" not in response.text and "magnet:?xt" not in response.text:
8
+ print(f"Invalid HTML for number {meta.get('id')}")
9
+ meta["valid"] = False
10
+ spider.session_expired = True
11
+ else:
12
+ meta["valid"] = True
13
+
14
+ meta["errored"] = False
15
+
16
+ item["meta"] = meta
17
+ return item
@@ -0,0 +1,36 @@
1
+ # User-agent settings
2
+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
3
+
4
+ # Obey robots.txt rules (set to False during development)
5
+ ROBOTSTXT_OBEY = False
6
+
7
+ # Configure pipelines (enable or disable as needed)
8
+ ITEM_PIPELINES = {
9
+ "plexflow.spiders.tgx.pipelines.validation_pipeline.ValidationPipeline": 100,
10
+ "plexflow.spiders.tgx.pipelines.torrent_info_pipeline.TorrentInfoPipeline": 600,
11
+ "plexflow.spiders.tgx.pipelines.meta_pipeline.MetaPipeline": 800,
12
+ "plexflow.spiders.tgx.pipelines.publish_pipeline.PublishPipeline": 900,
13
+ }
14
+
15
+ # Configure logging
16
+ LOG_ENABLED = True
17
+ LOG_LEVEL = "INFO"
18
+ LOG_FORMATTER = "plexflow.spiders.quiet_logger.QuietLogFormatter"
19
+ # LOG_FILE = "scrapy.log"
20
+
21
+ # Retry settings
22
+ RETRY_ENABLED = True
23
+ RETRY_TIMES = 5
24
+ RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429]
25
+
26
+ # Configure concurrent requests
27
+ CONCURRENT_REQUESTS = 10
28
+ CONCURRENT_REQUESTS_PER_DOMAIN = 10
29
+
30
+ # Extend default headers (optional)
31
+ DEFAULT_REQUEST_HEADERS = {
32
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
33
+ "Accept-Language": "en",
34
+ }
35
+
36
+ DOWNLOAD_TIMEOUT = 15
@@ -0,0 +1,72 @@
1
+ import scrapy
2
+ from bs4 import BeautifulSoup
3
+ from scrapy.exceptions import CloseSpider
4
+ from scrapy import signals
5
+ from plexflow.utils.thread_safe.safe_set import ThreadSafeSet
6
+ from plexflow.utils.thread_safe.safe_list import ThreadSafeList
7
+ import logging
8
+
9
+ class TgxSpider(scrapy.Spider):
10
+ name = "tgx_spider"
11
+ session_expired: bool = False
12
+
13
+ def __init__(self, pages, host='https://torrentgalaxy.to', cookies: dict = None, callback=None):
14
+ self.pages = set(pages)
15
+ self.host = host
16
+ self.cookies = cookies or {}
17
+
18
+ self.original_batch = ThreadSafeSet.from_set(self.pages)
19
+ self.finished_batch = ThreadSafeList()
20
+ self.callback = callback
21
+
22
+ @property
23
+ def finished_ids(self):
24
+ return set(map(lambda x: x.get("id"), self.finished_batch))
25
+
26
+ @property
27
+ def finished_items(self):
28
+ return self.finished_batch.to_list()
29
+
30
+ @property
31
+ def original_ids(self):
32
+ return self.original_batch.to_set()
33
+
34
+ @property
35
+ def unfinished_ids(self):
36
+ return self.original_batch.difference(self.finished_ids).to_set()
37
+
38
+ @classmethod
39
+ def from_crawler(cls, crawler, *args, **kwargs):
40
+ spider = super(TgxSpider, cls).from_crawler(crawler, *args, **kwargs)
41
+ crawler.signals.connect(spider.spider_closed, signal=signals.spider_closed)
42
+ return spider
43
+
44
+ def mark_page_as_finished(self, meta):
45
+ self.finished_batch.append(meta)
46
+
47
+ def start_requests(self):
48
+ for page_id in self.pages:
49
+ yield scrapy.Request(
50
+ f'{self.host}/torrent/{page_id}',
51
+ self.parse,
52
+ meta={'id': page_id},
53
+ cookies=self.cookies)
54
+
55
+ def parse(self, response):
56
+ if self.session_expired:
57
+ raise CloseSpider("Session Expired")
58
+ soup = BeautifulSoup(response.text, 'html.parser')
59
+ page_number = response.meta["id"]
60
+
61
+ return {"soup": soup, "valid": True, "response": response, "meta": {"id": page_number}}
62
+
63
+ def spider_closed(self, spider):
64
+ # Code to run when the spider is closed
65
+ logging.info(f"Spider {spider.name} closing. Finished scraping {len(self.finished_ids)} pages.")
66
+
67
+ logging.info(f"{len(self.unfinished_ids)} pages were not scraped.")
68
+
69
+ logging.info("Spider closed.")
70
+
71
+ if self.callback:
72
+ self.callback(self)
File without changes