plexflow 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. plexflow/__init__.py +0 -0
  2. plexflow/__main__.py +15 -0
  3. plexflow/core/.DS_Store +0 -0
  4. plexflow/core/__init__.py +0 -0
  5. plexflow/core/context/__init__.py +0 -0
  6. plexflow/core/context/metadata/__init__.py +0 -0
  7. plexflow/core/context/metadata/context.py +32 -0
  8. plexflow/core/context/metadata/tmdb/__init__.py +0 -0
  9. plexflow/core/context/metadata/tmdb/context.py +45 -0
  10. plexflow/core/context/partial_context.py +46 -0
  11. plexflow/core/context/partials/__init__.py +8 -0
  12. plexflow/core/context/partials/cache.py +16 -0
  13. plexflow/core/context/partials/context.py +12 -0
  14. plexflow/core/context/partials/ids.py +37 -0
  15. plexflow/core/context/partials/movie.py +115 -0
  16. plexflow/core/context/partials/tgx_batch.py +33 -0
  17. plexflow/core/context/partials/tgx_context.py +34 -0
  18. plexflow/core/context/partials/torrents.py +23 -0
  19. plexflow/core/context/partials/watchlist.py +35 -0
  20. plexflow/core/context/plexflow_context.py +29 -0
  21. plexflow/core/context/plexflow_property.py +36 -0
  22. plexflow/core/context/root/__init__.py +0 -0
  23. plexflow/core/context/root/context.py +25 -0
  24. plexflow/core/context/select/__init__.py +0 -0
  25. plexflow/core/context/select/context.py +45 -0
  26. plexflow/core/context/torrent/__init__.py +0 -0
  27. plexflow/core/context/torrent/context.py +43 -0
  28. plexflow/core/context/torrent/tpb/__init__.py +0 -0
  29. plexflow/core/context/torrent/tpb/context.py +45 -0
  30. plexflow/core/context/torrent/yts/__init__.py +0 -0
  31. plexflow/core/context/torrent/yts/context.py +45 -0
  32. plexflow/core/context/watchlist/__init__.py +0 -0
  33. plexflow/core/context/watchlist/context.py +46 -0
  34. plexflow/core/downloads/__init__.py +0 -0
  35. plexflow/core/downloads/candidates/__init__.py +0 -0
  36. plexflow/core/downloads/candidates/download_candidate.py +210 -0
  37. plexflow/core/downloads/candidates/filtered.py +51 -0
  38. plexflow/core/downloads/candidates/utils.py +39 -0
  39. plexflow/core/env/__init__.py +0 -0
  40. plexflow/core/env/env.py +31 -0
  41. plexflow/core/genai/__init__.py +0 -0
  42. plexflow/core/genai/bot.py +9 -0
  43. plexflow/core/genai/plexa.py +54 -0
  44. plexflow/core/genai/torrent/imdb_verify.py +65 -0
  45. plexflow/core/genai/torrent/movie.py +25 -0
  46. plexflow/core/genai/utils/__init__.py +0 -0
  47. plexflow/core/genai/utils/loader.py +5 -0
  48. plexflow/core/metadata/__init__.py +0 -0
  49. plexflow/core/metadata/auto/__init__.py +0 -0
  50. plexflow/core/metadata/auto/auto_meta.py +40 -0
  51. plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
  52. plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
  53. plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
  54. plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
  55. plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
  56. plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
  57. plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
  58. plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
  59. plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
  60. plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
  61. plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
  62. plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
  63. plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
  64. plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
  65. plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
  66. plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
  67. plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
  68. plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
  69. plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
  70. plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
  71. plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
  72. plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
  73. plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
  74. plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
  75. plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
  76. plexflow/core/metadata/providers/__init__.py +0 -0
  77. plexflow/core/metadata/providers/imdb/__init__.py +0 -0
  78. plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
  79. plexflow/core/metadata/providers/imdb/imdb.py +112 -0
  80. plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
  81. plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
  82. plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
  83. plexflow/core/metadata/providers/plex/__init__.py +0 -0
  84. plexflow/core/metadata/providers/plex/datatypes.py +693 -0
  85. plexflow/core/metadata/providers/plex/plex.py +167 -0
  86. plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
  87. plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
  88. plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
  89. plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
  90. plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
  91. plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
  92. plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
  93. plexflow/core/metadata/providers/universal/__init__.py +0 -0
  94. plexflow/core/metadata/providers/universal/movie.py +130 -0
  95. plexflow/core/metadata/providers/universal/old.py +192 -0
  96. plexflow/core/metadata/providers/universal/show.py +107 -0
  97. plexflow/core/plex/__init__.py +0 -0
  98. plexflow/core/plex/api/context/authorized.py +15 -0
  99. plexflow/core/plex/api/context/discover.py +14 -0
  100. plexflow/core/plex/api/context/library.py +14 -0
  101. plexflow/core/plex/discover/__init__.py +0 -0
  102. plexflow/core/plex/discover/activity.py +448 -0
  103. plexflow/core/plex/discover/comment.py +89 -0
  104. plexflow/core/plex/discover/feed.py +11 -0
  105. plexflow/core/plex/hooks/__init__.py +0 -0
  106. plexflow/core/plex/hooks/plex_authorized.py +60 -0
  107. plexflow/core/plex/hooks/plexflow_database.py +6 -0
  108. plexflow/core/plex/library/__init__.py +0 -0
  109. plexflow/core/plex/library/library.py +103 -0
  110. plexflow/core/plex/token/__init__.py +0 -0
  111. plexflow/core/plex/token/auto_token.py +91 -0
  112. plexflow/core/plex/utils/__init__.py +0 -0
  113. plexflow/core/plex/utils/paginated.py +39 -0
  114. plexflow/core/plex/watchlist/__init__.py +0 -0
  115. plexflow/core/plex/watchlist/datatypes.py +124 -0
  116. plexflow/core/plex/watchlist/watchlist.py +23 -0
  117. plexflow/core/storage/__init__.py +0 -0
  118. plexflow/core/storage/object/__init__.py +0 -0
  119. plexflow/core/storage/object/plexflow_storage.py +143 -0
  120. plexflow/core/storage/object/redis_storage.py +169 -0
  121. plexflow/core/subtitles/__init__.py +0 -0
  122. plexflow/core/subtitles/providers/__init__.py +0 -0
  123. plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
  124. plexflow/core/subtitles/providers/oss/__init__.py +0 -0
  125. plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
  126. plexflow/core/subtitles/providers/oss/download.py +48 -0
  127. plexflow/core/subtitles/providers/oss/old.py +144 -0
  128. plexflow/core/subtitles/providers/oss/oss.py +400 -0
  129. plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
  130. plexflow/core/subtitles/providers/oss/search.py +52 -0
  131. plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
  132. plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
  133. plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
  134. plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
  135. plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
  136. plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
  137. plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
  138. plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
  139. plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
  140. plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
  141. plexflow/core/subtitles/results/__init__.py +0 -0
  142. plexflow/core/subtitles/results/subtitle.py +170 -0
  143. plexflow/core/torrents/__init__.py +0 -0
  144. plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
  145. plexflow/core/torrents/analyzers/analyzer.py +45 -0
  146. plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
  147. plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
  148. plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
  149. plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
  150. plexflow/core/torrents/auto/auto_torrents.py +29 -0
  151. plexflow/core/torrents/providers/__init__.py +0 -0
  152. plexflow/core/torrents/providers/ext/__init__.py +0 -0
  153. plexflow/core/torrents/providers/ext/ext.py +18 -0
  154. plexflow/core/torrents/providers/ext/utils.py +64 -0
  155. plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
  156. plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
  157. plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
  158. plexflow/core/torrents/providers/eztv/__init__.py +0 -0
  159. plexflow/core/torrents/providers/eztv/eztv.py +47 -0
  160. plexflow/core/torrents/providers/eztv/utils.py +83 -0
  161. plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
  162. plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
  163. plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
  164. plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
  165. plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
  166. plexflow/core/torrents/providers/snowfl/utils.py +59 -0
  167. plexflow/core/torrents/providers/tgx/__init__.py +0 -0
  168. plexflow/core/torrents/providers/tgx/context.py +50 -0
  169. plexflow/core/torrents/providers/tgx/dump.py +40 -0
  170. plexflow/core/torrents/providers/tgx/tgx.py +22 -0
  171. plexflow/core/torrents/providers/tgx/utils.py +61 -0
  172. plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
  173. plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
  174. plexflow/core/torrents/providers/therarbg/utils.py +61 -0
  175. plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
  176. plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
  177. plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
  178. plexflow/core/torrents/providers/tpb/__init__.py +0 -0
  179. plexflow/core/torrents/providers/tpb/tpb.py +17 -0
  180. plexflow/core/torrents/providers/tpb/utils.py +139 -0
  181. plexflow/core/torrents/providers/yts/__init__.py +0 -0
  182. plexflow/core/torrents/providers/yts/utils.py +57 -0
  183. plexflow/core/torrents/providers/yts/yts.py +31 -0
  184. plexflow/core/torrents/results/__init__.py +0 -0
  185. plexflow/core/torrents/results/torrent.py +165 -0
  186. plexflow/core/torrents/results/universal.py +220 -0
  187. plexflow/core/torrents/results/utils.py +15 -0
  188. plexflow/events/__init__.py +0 -0
  189. plexflow/events/download/__init__.py +0 -0
  190. plexflow/events/download/torrent_events.py +96 -0
  191. plexflow/events/publish/__init__.py +0 -0
  192. plexflow/events/publish/publish.py +34 -0
  193. plexflow/logging/__init__.py +0 -0
  194. plexflow/logging/log_setup.py +8 -0
  195. plexflow/spiders/quiet_logger.py +9 -0
  196. plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
  197. plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
  198. plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
  199. plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
  200. plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
  201. plexflow/spiders/tgx/settings.py +36 -0
  202. plexflow/spiders/tgx/spider.py +72 -0
  203. plexflow/utils/__init__.py +0 -0
  204. plexflow/utils/antibot/human_like_requests.py +122 -0
  205. plexflow/utils/api/__init__.py +0 -0
  206. plexflow/utils/api/context/http.py +62 -0
  207. plexflow/utils/api/rest/__init__.py +0 -0
  208. plexflow/utils/api/rest/antibot_restful.py +68 -0
  209. plexflow/utils/api/rest/restful.py +49 -0
  210. plexflow/utils/captcha/__init__.py +0 -0
  211. plexflow/utils/captcha/bypass/__init__.py +0 -0
  212. plexflow/utils/captcha/bypass/decode_audio.py +34 -0
  213. plexflow/utils/download/__init__.py +0 -0
  214. plexflow/utils/download/gz.py +26 -0
  215. plexflow/utils/filesystem/__init__.py +0 -0
  216. plexflow/utils/filesystem/search.py +129 -0
  217. plexflow/utils/gmail/__init__.py +0 -0
  218. plexflow/utils/gmail/mails.py +116 -0
  219. plexflow/utils/hooks/__init__.py +0 -0
  220. plexflow/utils/hooks/http.py +84 -0
  221. plexflow/utils/hooks/postgresql.py +93 -0
  222. plexflow/utils/hooks/redis.py +112 -0
  223. plexflow/utils/image/storage.py +36 -0
  224. plexflow/utils/imdb/__init__.py +0 -0
  225. plexflow/utils/imdb/imdb_codes.py +107 -0
  226. plexflow/utils/pubsub/consume.py +82 -0
  227. plexflow/utils/pubsub/produce.py +25 -0
  228. plexflow/utils/retry/__init__.py +0 -0
  229. plexflow/utils/retry/utils.py +38 -0
  230. plexflow/utils/strings/__init__.py +0 -0
  231. plexflow/utils/strings/filesize.py +55 -0
  232. plexflow/utils/strings/language.py +14 -0
  233. plexflow/utils/subtitle/search.py +76 -0
  234. plexflow/utils/tasks/decorators.py +78 -0
  235. plexflow/utils/tasks/k8s/task.py +70 -0
  236. plexflow/utils/thread_safe/safe_list.py +54 -0
  237. plexflow/utils/thread_safe/safe_set.py +69 -0
  238. plexflow/utils/torrent/__init__.py +0 -0
  239. plexflow/utils/torrent/analyze.py +118 -0
  240. plexflow/utils/torrent/extract/common.py +37 -0
  241. plexflow/utils/torrent/extract/ext.py +2391 -0
  242. plexflow/utils/torrent/extract/extratorrent.py +56 -0
  243. plexflow/utils/torrent/extract/kat.py +1581 -0
  244. plexflow/utils/torrent/extract/tgx.py +96 -0
  245. plexflow/utils/torrent/extract/therarbg.py +170 -0
  246. plexflow/utils/torrent/extract/torrentquest.py +171 -0
  247. plexflow/utils/torrent/files.py +36 -0
  248. plexflow/utils/torrent/hash.py +90 -0
  249. plexflow/utils/transcribe/__init__.py +0 -0
  250. plexflow/utils/transcribe/speech2text.py +40 -0
  251. plexflow/utils/video/__init__.py +0 -0
  252. plexflow/utils/video/subtitle.py +73 -0
  253. plexflow-0.0.64.dist-info/METADATA +71 -0
  254. plexflow-0.0.64.dist-info/RECORD +256 -0
  255. plexflow-0.0.64.dist-info/WHEEL +4 -0
  256. plexflow-0.0.64.dist-info/entry_points.txt +24 -0
@@ -0,0 +1,122 @@
1
+ import subprocess
2
+ import time
3
+ import os
4
+ import logging
5
+ import re
6
+ from selenium.webdriver.common.by import By
7
+ from selenium.webdriver.support.ui import WebDriverWait
8
+ from selenium.webdriver.support import expected_conditions as EC
9
+ from selenium.common.exceptions import TimeoutException
10
+ from seleniumbase import SB
11
+
12
+ class HumanLikeRequestCapture:
13
+ def __init__(self, url, html, screenshot, cookies):
14
+ self.url = url
15
+ self.html = html
16
+ self.screenshot = screenshot
17
+ self.cookies = cookies
18
+
19
+ class HumanLikeRequestSession:
20
+ def __init__(self, use_xvfb=False):
21
+ self.use_xvfb = use_xvfb
22
+ if self.use_xvfb:
23
+ self._start_xvfb()
24
+
25
+ def _start_xvfb(self):
26
+ self.xvfb_process = subprocess.Popen(['Xvfb', ':99', '-screen', '0', '1920x1080x24'])
27
+ logging.info("Xvfb started")
28
+ time.sleep(5)
29
+ os.environ['DISPLAY'] = ':99'
30
+
31
+ def _stop_xvfb(self):
32
+ if self.xvfb_process:
33
+ self.xvfb_process.terminate()
34
+ logging.info("Xvfb terminated")
35
+
36
+ def execute_requests(self, urls: iter, take_screenshot: bool = False, wait_condition: str = "element", wait_value: str = "content", wait_until_not: bool = False, max_retries: int = 3) -> iter:
37
+ with SB(uc=True, maximize=True, test=False, headed=True, incognito=True, chromium_arg="--disable-search-engine-choice-screen") as sb:
38
+ logging.info("Running test task")
39
+ for url in urls:
40
+ cookies = None
41
+ for attempt in range(max_retries):
42
+ logging.info(f"[{attempt}/{max_retries}] Opening URL")
43
+ sb.uc_open_with_reconnect(url, 10)
44
+ sb.uc_gui_click_cf()
45
+ cookies = sb.get_cookies()
46
+ logging.info(cookies)
47
+
48
+ # Wait for a specific condition instead of sleeping
49
+ try:
50
+ if wait_condition == "element":
51
+ if wait_until_not:
52
+ WebDriverWait(sb.driver, 20).until_not(
53
+ EC.presence_of_element_located((By.ID, wait_value))
54
+ )
55
+ else:
56
+ WebDriverWait(sb.driver, 20).until(
57
+ EC.presence_of_element_located((By.ID, wait_value))
58
+ )
59
+ elif wait_condition == "regex":
60
+ if wait_until_not:
61
+ WebDriverWait(sb.driver, 20).until_not(
62
+ lambda driver: re.search(wait_value, driver.page_source)
63
+ )
64
+ else:
65
+ WebDriverWait(sb.driver, 20).until(
66
+ lambda driver: re.search(wait_value, driver.page_source)
67
+ )
68
+ elif wait_condition == "custom":
69
+ result = [None] # Use a list to store the result
70
+ if wait_until_not:
71
+ WebDriverWait(sb.driver, 20).until_not(
72
+ lambda driver: (result.__setitem__(0, wait_value(driver)) or result[0])
73
+ )
74
+ else:
75
+ WebDriverWait(sb.driver, 20).until(
76
+ lambda driver: (result.__setitem__(0, wait_value(driver)) or result[0])
77
+ )
78
+
79
+ print("Result:", result[0])
80
+
81
+ if result[0] == "retry":
82
+ continue
83
+ except TimeoutException:
84
+ logging.warning("Wait condition not met within the timeout period.")
85
+
86
+ break
87
+
88
+ if take_screenshot:
89
+ logging.info("Taking screenshot")
90
+ screenshot = sb.driver.get_screenshot_as_png()
91
+ else:
92
+ screenshot = None
93
+
94
+ html = sb.get_page_source()
95
+
96
+ # Assuming HumanLikeRequestCapture is the response object
97
+ yield HumanLikeRequestCapture(url=url, html=html, screenshot=screenshot, cookies=cookies)
98
+
99
+ def execute_request(self, url: str, take_screenshot: bool = False, wait_condition: str = "element", wait_value: str = "content", wait_until_not: bool = False) -> HumanLikeRequestCapture:
100
+ return next(self.execute_requests([url], take_screenshot, wait_condition, wait_value, wait_until_not))
101
+
102
+ def close(self):
103
+ if self.use_xvfb:
104
+ self._stop_xvfb()
105
+
106
+ def get(url: str, take_screenshot: bool = False, use_xvfb: bool = False, wait_condition: str = "element", wait_value: str = "content", wait_until_not: bool = False) -> HumanLikeRequestCapture:
107
+ session = HumanLikeRequestSession(use_xvfb=use_xvfb)
108
+ try:
109
+ response = session.execute_request(url=url, take_screenshot=take_screenshot, wait_condition=wait_condition, wait_value=wait_value, wait_until_not=wait_until_not)
110
+ response.use_xvfb = use_xvfb
111
+ return response
112
+ finally:
113
+ session.close()
114
+
115
+ def get_multiple(urls: iter, take_screenshot: bool = False, use_xvfb: bool = False, wait_condition: str = "element", wait_value: str = "content", wait_until_not: bool = False) -> iter:
116
+ session = HumanLikeRequestSession(use_xvfb=use_xvfb)
117
+ try:
118
+ for response in session.execute_requests(urls=urls, take_screenshot=take_screenshot, wait_condition=wait_condition, wait_value=wait_value, wait_until_not=wait_until_not):
119
+ response.use_xvfb = use_xvfb
120
+ yield response
121
+ finally:
122
+ session.close()
File without changes
@@ -0,0 +1,62 @@
1
+ import requests
2
+ from typing import Optional, Dict
3
+
4
+ class HttpRequestContext:
5
+ """
6
+ A base class for setting up a default request context for headers, params, etc.
7
+
8
+ Args:
9
+ base_url (str): The base URL for the API.
10
+ default_headers (dict, optional): The default headers for the API. Defaults to None.
11
+ default_params (dict, optional): The default parameters for the API. Defaults to None.
12
+
13
+ Attributes:
14
+ session (requests.Session): The requests Session instance.
15
+ """
16
+
17
+ def __init__(self, base_url: str, default_headers: Optional[Dict[str, str]] = None, default_params: Optional[Dict[str, str]] = None):
18
+ self.session = requests.Session()
19
+ self.session.headers.update(default_headers or {})
20
+ self.session.params.update(default_params or {})
21
+ self.base_url = base_url
22
+ self.default_headers = default_headers
23
+ self.default_params = default_params
24
+
25
+ def get(self, endpoint: str, headers: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
26
+ return self.request('GET', endpoint, headers, params, **kwargs)
27
+
28
+ def post(self, endpoint: str, headers: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
29
+ return self.request('POST', endpoint, headers, params, **kwargs)
30
+
31
+ def put(self, endpoint: str, headers: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
32
+ return self.request('PUT', endpoint, headers, params, **kwargs)
33
+
34
+ def delete(self, endpoint: str, headers: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
35
+ return self.request('DELETE', endpoint, headers, params, **kwargs)
36
+
37
+ def request(self, method: str, endpoint: str, headers: Optional[Dict[str, str]] = None, params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
38
+ """
39
+ Makes an HTTP request.
40
+
41
+ Args:
42
+ method (str): The HTTP method.
43
+ endpoint (str): The endpoint for the HTTP request.
44
+ headers (dict, optional): The headers for the HTTP request. Defaults to None.
45
+ params (dict, optional): The parameters for the HTTP request. Defaults to None.
46
+ **kwargs: Additional arguments passed to requests.Session.request.
47
+
48
+ Returns:
49
+ The response from the HTTP request.
50
+ """
51
+ if headers:
52
+ self.session.headers.update(headers)
53
+ if params:
54
+ self.session.params.update(params)
55
+
56
+ response = self.session.request(method, self.base_url + endpoint, **kwargs)
57
+
58
+ # Reset headers and params to defaults after each request
59
+ self.session.headers = self.default_headers or {}
60
+ self.session.params = self.default_params or {}
61
+
62
+ return response
File without changes
@@ -0,0 +1,68 @@
1
+ from typing import Optional, Dict
2
+ import requests
3
+ import plexflow.utils.antibot.human_like_requests as human_like_requests
4
+ from urllib.parse import urljoin, urlencode, urlunparse, urlparse
5
+ from plexflow.utils.image.storage import upload_image
6
+ import logging
7
+
8
+ class AntibotRestful:
9
+ def __init__(self, base_url: str, use_xvfb: bool = False):
10
+ self._base_url = base_url
11
+ self._use_xvfb = use_xvfb
12
+
13
+ def _construct_url(self, path: str, query_params: Optional[Dict[str, str]] = None) -> str:
14
+ # Join the base URL and path
15
+ url = urljoin(self._base_url, path)
16
+
17
+ # Parse the URL and add query parameters
18
+ url_parts = list(urlparse(url))
19
+ if query_params:
20
+ url_parts[4] = urlencode(query_params)
21
+ return urlunparse(url_parts)
22
+
23
+ def get(self, path: str, headers: Optional[Dict[str, str]] = None, query_params: Optional[Dict[str, str]] = None, **kwargs) -> human_like_requests.HumanLikeRequestCapture:
24
+ # Construct the full URL
25
+ url = self._construct_url(path, query_params)
26
+
27
+ # captures = human_like_requests.get_multiple(
28
+ # urls=["https://extratorrent.st/search/?new=1&search=twister+2024&s_cat=1", url],
29
+ # take_screenshot=True,
30
+ # use_xvfb=self._use_xvfb,
31
+ # wait_condition=kwargs.get('wait_condition', "regex"),
32
+ # wait_value=kwargs.get('wait_value', "magnet:"),
33
+ # wait_until_not=kwargs.get('wait_until_not', False)
34
+ # )
35
+
36
+ # for capture in captures:
37
+ # if capture.url == url:
38
+ # return capture
39
+
40
+ capture = human_like_requests.get(
41
+ url=url,
42
+ take_screenshot=True,
43
+ use_xvfb=self._use_xvfb,
44
+ wait_condition=kwargs.get('wait_condition', "regex"),
45
+ wait_value=kwargs.get('wait_value', "magnet:"),
46
+ wait_until_not=kwargs.get('wait_until_not', False)
47
+ )
48
+
49
+ if isinstance(capture.screenshot, bytes):
50
+ try:
51
+ image = capture.screenshot
52
+ image_id = f"{self.url_to_slug(url)}_screenshot"
53
+ details = upload_image(image, public_id=image_id)
54
+ logging.info(f"Uploaded screenshot for {image_id}: {details}")
55
+ except Exception as e:
56
+ logging.error(f"An error occurred while uploading the screenshot for {image_id}: {e}")
57
+
58
+ return capture
59
+
60
+ def url_to_slug(self, url: str) -> str:
61
+ # Parse the URL to extract the netloc
62
+ parsed_url = urlparse(url)
63
+ netloc = parsed_url.netloc
64
+
65
+ # Replace dots in the netloc with hyphens
66
+ slug = netloc.replace('.', '-')
67
+
68
+ return slug
@@ -0,0 +1,49 @@
1
+ from typing import Optional, Dict, Any
2
+ import requests
3
+ from plexflow.utils.api.context.http import HttpRequestContext
4
+
5
+ class Restful:
6
+ """
7
+ A class that uses UniversalHttpHook and UniversalPostgresqlHook to create RESTful API interfaces and interact with a PostgreSQL database.
8
+
9
+ Args:
10
+ http_conn_id (str, optional): The connection ID, used as Airflow connection ID or as the name for the YAML file. Defaults to None.
11
+ postgres_conn_id (str, optional): The connection ID, used as Airflow connection ID or as the name for the YAML file. Defaults to None.
12
+ config_folder (str, optional): The folder where the YAML configuration file is located. Defaults to None.
13
+ """
14
+
15
+ def __init__(self, base_url: str):
16
+ self._base_url = base_url
17
+
18
+ def get(self, url: str, headers: Optional[Dict[str, str]] = None, query_params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
19
+ """
20
+ Makes a GET request to the resource.
21
+
22
+ Args:
23
+ url (str): The full URL for the GET request.
24
+ headers (dict, optional): The headers for the GET request. Defaults to None.
25
+ query_params (dict, optional): The query parameters for the GET request. Defaults to None.
26
+ **kwargs: Additional keyword arguments for the GET request.
27
+
28
+ Returns:
29
+ The response from the GET request.
30
+ """
31
+ context = HttpRequestContext(self._base_url)
32
+ return context.get(url, headers=headers, params=query_params, **kwargs)
33
+
34
+ def post(self, url: str, data: Dict[str, Any], headers: Optional[Dict[str, str]] = None, query_params: Optional[Dict[str, str]] = None, **kwargs) -> requests.Response:
35
+ """
36
+ Makes a POST request to the resource.
37
+
38
+ Args:
39
+ url (str): The full URL for the POST request.
40
+ data (dict): The data for the POST request.
41
+ headers (dict, optional): The headers for the POST request. Defaults to None.
42
+ query_params (dict, optional): The query parameters for the POST request. Defaults to None.
43
+ **kwargs: Additional keyword arguments for the POST request.
44
+
45
+ Returns:
46
+ The response from the POST request.
47
+ """
48
+ context = HttpRequestContext(self._base_url)
49
+ return context.post(url, headers=headers, params=query_params, **kwargs)
File without changes
File without changes
@@ -0,0 +1,34 @@
1
+ from plexflow.utils.transcribe.speech2text import transcribe_audio
2
+ import os
3
+ from typing import Any, Dict
4
+
5
+ def get_captcha_code_from_audio(file_path: str, **kwargs: Dict[str, Any]) -> str:
6
+ """
7
+ Transcribes an audio file to text, representing a CAPTCHA code.
8
+
9
+ This function takes the path of an audio file as input, transcribes it to text using the `transcribe_audio` function,
10
+ and returns the transcribed text as a CAPTCHA code. The returned CAPTCHA code has no whitespace, is in all caps,
11
+ and contains no punctuation.
12
+
13
+ Args:
14
+ file_path (str): The path of the audio file to transcribe.
15
+ **kwargs: Arbitrary keyword arguments for the `transcribe_audio` function.
16
+
17
+ Returns:
18
+ str: The transcribed CAPTCHA code.
19
+
20
+ Raises:
21
+ RuntimeError: If the transcription fails for any reason.
22
+ """
23
+ try:
24
+ # Transcribe the audio file to text
25
+ text = transcribe_audio(file_path=file_path, **kwargs)
26
+
27
+ # Remove whitespace, convert to uppercase, and remove punctuation
28
+ captcha_code = ''.join(char for char in text if char.isalnum()).upper()
29
+
30
+ return captcha_code
31
+
32
+ except Exception as e:
33
+ error_message = f"Failed to get CAPTCHA code from audio file: {os.path.abspath(file_path)}"
34
+ raise RuntimeError(error_message) from e
File without changes
@@ -0,0 +1,26 @@
1
+ import requests
2
+ import gzip
3
+ import io
4
+
5
+ def download_and_extract_gz(url, output_filename):
6
+ """Download and extract a .gz file from a URL.
7
+
8
+ Parameters:
9
+ url (str): The URL of the .gz file to download.
10
+ output_filename (str): The name of the output file.
11
+
12
+ Returns:
13
+ None
14
+ """
15
+ # Download the file
16
+ response = requests.get(url)
17
+ compressed_file = io.BytesIO(response.content)
18
+
19
+ # Decompress the file
20
+ decompressed_file = gzip.GzipFile(fileobj=compressed_file)
21
+
22
+ # Write to a .txt file
23
+ with open(output_filename, "wb") as outfile:
24
+ outfile.write(decompressed_file.read())
25
+
26
+ print(f"File downloaded and extracted to {output_filename} successfully.")
File without changes
@@ -0,0 +1,129 @@
1
+ import os
2
+ from typing import Generator, Tuple
3
+
4
+ def search_files(directory: str, extensions: Tuple[str, ...], order_by_size: bool = False, ignore_hidden: bool = True) -> Generator[str, None, None]:
5
+ """
6
+ Generator function to search for files in a directory recursively. If extensions are specified,
7
+ only files with those extensions are returned. If no extensions are specified, all files are returned.
8
+ Optionally order the files by size and/or ignore hidden files.
9
+
10
+ Parameters:
11
+ directory (str): The directory in which to start the search.
12
+ extensions (tuple of str): The file extensions to search for. If empty, all files are returned.
13
+ order_by_size (bool, optional): Whether to order the files by size. Default is False.
14
+ ignore_hidden (bool, optional): Whether to ignore hidden files. Default is True.
15
+
16
+ Yields:
17
+ str: The path to a file that matches one of the specified extensions, or any file if no extensions are specified.
18
+
19
+ Examples:
20
+ >>> # Search for Python and text files, ordered by size, including hidden files
21
+ >>> for file in search_files('/path/to/directory', ('.txt', '.py'), order_by_size=True, ignore_hidden=False):
22
+ ... print(file)
23
+
24
+ >>> # Search for JPEG and PNG images, not ordered by size, ignoring hidden files
25
+ >>> for file in search_files('/path/to/directory', ('.jpg', '.png')):
26
+ ... print(file)
27
+
28
+ >>> # Search for Markdown files, ordered by size, ignoring hidden files
29
+ >>> for file in search_files('/path/to/directory', ('.md',), order_by_size=True):
30
+ ... print(file)
31
+
32
+ >>> # Search for all files, not ordered by size, ignoring hidden files
33
+ >>> for file in search_files('/path/to/directory', (), order_by_size=False, ignore_hidden=True):
34
+ ... print(file)
35
+ """
36
+
37
+ # Gather all files first if ordering by size
38
+ if order_by_size:
39
+ files = []
40
+ for dirpath, dirnames, filenames in os.walk(directory):
41
+ if ignore_hidden:
42
+ filenames = [f for f in filenames if not f[0] == '.']
43
+ dirnames[:] = [d for d in dirnames if not d[0] == '.']
44
+ for filename in filenames:
45
+ if not extensions or filename.endswith(extensions):
46
+ filepath = os.path.join(dirpath, filename)
47
+ files.append((os.path.getsize(filepath), filepath))
48
+ files.sort() # Files are now sorted by size
49
+ for _, filepath in files:
50
+ yield filepath
51
+ else:
52
+ # Original behavior, yield files as they are found
53
+ for dirpath, dirnames, filenames in os.walk(directory):
54
+ if ignore_hidden:
55
+ filenames = [f for f in filenames if not f[0] == '.']
56
+ dirnames[:] = [d for d in dirnames if not d[0] == '.']
57
+ for filename in filenames:
58
+ if not extensions or filename.endswith(extensions):
59
+ yield os.path.join(dirpath, filename)
60
+
61
+
62
+ def find_movie_files(directory: str, extensions: Tuple[str, ...] = ('.mp4', '.mkv', '.avi', '.mov', '.flv'), order_by_size: bool = False, ignore_hidden: bool = True) -> Generator[str, None, None]:
63
+ """
64
+ Generator function to search for movie files in a directory recursively.
65
+ Movie files are considered to have the extensions: '.mp4', '.mkv', '.avi', '.mov', '.flv', unless specified otherwise.
66
+ Optionally order the files by size and/or ignore hidden files.
67
+
68
+ Parameters:
69
+ directory (str): The directory in which to start the search.
70
+ extensions (tuple of str, optional): The file extensions to search for. Default is ('.mp4', '.mkv', '.avi', '.mov', '.flv').
71
+ order_by_size (bool, optional): Whether to order the files by size. Default is False.
72
+ ignore_hidden (bool, optional): Whether to ignore hidden files. Default is True.
73
+
74
+ Yields:
75
+ str: The path to a movie file.
76
+
77
+ Examples:
78
+ >>> # Search for movie files, ordered by size, including hidden files
79
+ >>> for file in find_movie_files('/path/to/directory', order_by_size=True, ignore_hidden=False):
80
+ ... print(file)
81
+
82
+ >>> # Search for movie files, not ordered by size, ignoring hidden files
83
+ >>> for file in find_movie_files('/path/to/directory'):
84
+ ... print(file)
85
+
86
+ >>> # Search for movie files, ordered by size, ignoring hidden files
87
+ >>> for file in find_movie_files('/path/to/directory', order_by_size=True):
88
+ ... print(file)
89
+
90
+ >>> # Search for custom file types
91
+ >>> for file in find_movie_files('/path/to/directory', extensions=('.wmv', '.mpg')):
92
+ ... print(file)
93
+ """
94
+ return search_files(directory, extensions, order_by_size, ignore_hidden)
95
+
96
+
97
+ def find_subtitle_files(directory: str, extensions: Tuple[str, ...] = ('.srt', '.sub', '.sbv', '.vtt', '.ass'), order_by_size: bool = False, ignore_hidden: bool = True) -> Generator[str, None, None]:
98
+ """
99
+ Generator function to search for subtitle files in a directory recursively.
100
+ Subtitle files are considered to have the extensions: '.srt', '.sub', '.sbv', '.vtt', '.ass', unless specified otherwise.
101
+ Optionally order the files by size and/or ignore hidden files.
102
+
103
+ Parameters:
104
+ directory (str): The directory in which to start the search.
105
+ extensions (tuple of str, optional): The file extensions to search for. Default is ('.srt', '.sub', '.sbv', '.vtt', '.ass').
106
+ order_by_size (bool, optional): Whether to order the files by size. Default is False.
107
+ ignore_hidden (bool, optional): Whether to ignore hidden files. Default is True.
108
+
109
+ Yields:
110
+ str: The path to a subtitle file.
111
+
112
+ Examples:
113
+ >>> # Search for subtitle files, ordered by size, including hidden files
114
+ >>> for file in find_subtitle_files('/path/to/directory', order_by_size=True, ignore_hidden=False):
115
+ ... print(file)
116
+
117
+ >>> # Search for subtitle files, not ordered by size, ignoring hidden files
118
+ >>> for file in find_subtitle_files('/path/to/directory'):
119
+ ... print(file)
120
+
121
+ >>> # Search for subtitle files, ordered by size, ignoring hidden files
122
+ >>> for file in find_subtitle_files('/path/to/directory', order_by_size=True):
123
+ ... print(file)
124
+
125
+ >>> # Search for custom file types
126
+ >>> for file in find_subtitle_files('/path/to/directory', extensions=('.smi', '.ssa')):
127
+ ... print(file)
128
+ """
129
+ return search_files(directory, extensions, order_by_size, ignore_hidden)
File without changes
@@ -0,0 +1,116 @@
1
+ from google_auth_oauthlib.flow import InstalledAppFlow
2
+ from google.auth.transport.requests import Request
3
+ from googleapiclient.discovery import build
4
+ import pickle
5
+ import os
6
+ from typing import Any
7
+
8
+ class GmailAuthenticatedContext:
9
+ """
10
+ A context manager for managing the authentication and retrieval of Gmail messages.
11
+
12
+ Args:
13
+ token_path (str): The path to the token file for storing the authentication token. Default is 'config/gmail_token.pickle'.
14
+ creds_path (str): The path to the credentials file for Gmail API. Default is 'config/gmail_credentials.json'.
15
+ scopes (tuple): The OAuth 2.0 scopes for the Gmail API. Default is ('https://www.googleapis.com/auth/gmail.readonly',).
16
+
17
+ Methods:
18
+ __enter__(): Enters the context and authenticates the Gmail API client.
19
+ __exit__(exc_type, exc_val, exc_tb): Exits the context.
20
+ get_messages_since_history_id(history_id, label_id): Retrieves the messages added since the specified history ID for a given label.
21
+ authenticate(): Authenticates the Gmail API client.
22
+
23
+ """
24
+
25
+ def __init__(self, token_path='config/gmail_token.pickle', creds_path='config/gmail_credentials.json', scopes=('https://www.googleapis.com/auth/gmail.readonly',)):
26
+ self.token_path = token_path
27
+ self.creds_path = creds_path
28
+ self.scopes = scopes
29
+
30
+ def __enter__(self):
31
+ self.creds = self.authenticate()
32
+ return self
33
+
34
+ def __exit__(self, exc_type, exc_val, exc_tb):
35
+ pass
36
+
37
+ def get_label_id(self, label_name: str) -> str:
38
+ """
39
+ Retrieves the label ID for a given label name.
40
+
41
+ Args:
42
+ label_name (str): The name of the label.
43
+
44
+ Returns:
45
+ str: The ID of the label if found, None otherwise.
46
+ """
47
+ try:
48
+ service = build('gmail', 'v1', credentials=self.creds)
49
+ results = service.users().labels().list(userId='me').execute()
50
+ labels = results.get('labels', [])
51
+ for label in labels:
52
+ if label['name'] == label_name:
53
+ return label['id']
54
+ except Exception as e:
55
+ raise RuntimeError(f"Failed to retrieve label ID for '{label_name}'") from e
56
+
57
+ return None
58
+ def get_messages_since_history_id(self, history_id: int, label_name: str) -> list:
59
+ """
60
+ Retrieves the messages added since the specified history ID for a given label name.
61
+
62
+ Args:
63
+ history_id (int): The history ID to start retrieving changes from.
64
+ label_name (str): The name of the label to filter the changes.
65
+
66
+ Returns:
67
+ list: A list of message objects that were added since the specified history ID.
68
+ """
69
+ label_id = self.get_label_id(label_name)
70
+ if not label_id:
71
+ raise RuntimeError(f"Failed to retrieve label ID for '{label_name}'")
72
+
73
+ # Use history.list to get the change details
74
+ service = build('gmail', 'v1', credentials=self.creds)
75
+ try:
76
+ results = service.users().history().list(userId='me', startHistoryId=history_id).execute()
77
+ changes = results.get('history', [])
78
+
79
+ # Get the body of the messages added
80
+ messages = []
81
+ for change in changes:
82
+ messages_added = change.get('messagesAdded', [])
83
+ for message_added in messages_added:
84
+ message_id = message_added['message']['id']
85
+ message = service.users().messages().get(userId='me', id=message_id).execute()
86
+ messages.append(message)
87
+
88
+ return messages
89
+ except Exception as e:
90
+ raise RuntimeError("Failed to retrieve messages") from e
91
+
92
+ def authenticate(self) -> Any:
93
+ """
94
+ Authenticates the user and returns the credentials.
95
+
96
+ Returns:
97
+ The credentials object after authentication.
98
+ """
99
+ creds: Any = None
100
+
101
+ if os.path.exists(self.token_path):
102
+ with open(self.token_path, 'rb') as token:
103
+ creds = pickle.load(token)
104
+
105
+ if not creds or not creds.valid:
106
+ if creds and creds.expired and creds.refresh_token:
107
+ creds.refresh(Request())
108
+ else:
109
+ flow = InstalledAppFlow.from_client_secrets_file(
110
+ self.creds_path, self.scopes)
111
+ creds = flow.run_local_server(port=0)
112
+
113
+ with open(self.token_path, 'wb') as token:
114
+ pickle.dump(creds, token)
115
+
116
+ return creds
File without changes