plexflow 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. plexflow/__init__.py +0 -0
  2. plexflow/__main__.py +15 -0
  3. plexflow/core/.DS_Store +0 -0
  4. plexflow/core/__init__.py +0 -0
  5. plexflow/core/context/__init__.py +0 -0
  6. plexflow/core/context/metadata/__init__.py +0 -0
  7. plexflow/core/context/metadata/context.py +32 -0
  8. plexflow/core/context/metadata/tmdb/__init__.py +0 -0
  9. plexflow/core/context/metadata/tmdb/context.py +45 -0
  10. plexflow/core/context/partial_context.py +46 -0
  11. plexflow/core/context/partials/__init__.py +8 -0
  12. plexflow/core/context/partials/cache.py +16 -0
  13. plexflow/core/context/partials/context.py +12 -0
  14. plexflow/core/context/partials/ids.py +37 -0
  15. plexflow/core/context/partials/movie.py +115 -0
  16. plexflow/core/context/partials/tgx_batch.py +33 -0
  17. plexflow/core/context/partials/tgx_context.py +34 -0
  18. plexflow/core/context/partials/torrents.py +23 -0
  19. plexflow/core/context/partials/watchlist.py +35 -0
  20. plexflow/core/context/plexflow_context.py +29 -0
  21. plexflow/core/context/plexflow_property.py +36 -0
  22. plexflow/core/context/root/__init__.py +0 -0
  23. plexflow/core/context/root/context.py +25 -0
  24. plexflow/core/context/select/__init__.py +0 -0
  25. plexflow/core/context/select/context.py +45 -0
  26. plexflow/core/context/torrent/__init__.py +0 -0
  27. plexflow/core/context/torrent/context.py +43 -0
  28. plexflow/core/context/torrent/tpb/__init__.py +0 -0
  29. plexflow/core/context/torrent/tpb/context.py +45 -0
  30. plexflow/core/context/torrent/yts/__init__.py +0 -0
  31. plexflow/core/context/torrent/yts/context.py +45 -0
  32. plexflow/core/context/watchlist/__init__.py +0 -0
  33. plexflow/core/context/watchlist/context.py +46 -0
  34. plexflow/core/downloads/__init__.py +0 -0
  35. plexflow/core/downloads/candidates/__init__.py +0 -0
  36. plexflow/core/downloads/candidates/download_candidate.py +210 -0
  37. plexflow/core/downloads/candidates/filtered.py +51 -0
  38. plexflow/core/downloads/candidates/utils.py +39 -0
  39. plexflow/core/env/__init__.py +0 -0
  40. plexflow/core/env/env.py +31 -0
  41. plexflow/core/genai/__init__.py +0 -0
  42. plexflow/core/genai/bot.py +9 -0
  43. plexflow/core/genai/plexa.py +54 -0
  44. plexflow/core/genai/torrent/imdb_verify.py +65 -0
  45. plexflow/core/genai/torrent/movie.py +25 -0
  46. plexflow/core/genai/utils/__init__.py +0 -0
  47. plexflow/core/genai/utils/loader.py +5 -0
  48. plexflow/core/metadata/__init__.py +0 -0
  49. plexflow/core/metadata/auto/__init__.py +0 -0
  50. plexflow/core/metadata/auto/auto_meta.py +40 -0
  51. plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
  52. plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
  53. plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
  54. plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
  55. plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
  56. plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
  57. plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
  58. plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
  59. plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
  60. plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
  61. plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
  62. plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
  63. plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
  64. plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
  65. plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
  66. plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
  67. plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
  68. plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
  69. plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
  70. plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
  71. plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
  72. plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
  73. plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
  74. plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
  75. plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
  76. plexflow/core/metadata/providers/__init__.py +0 -0
  77. plexflow/core/metadata/providers/imdb/__init__.py +0 -0
  78. plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
  79. plexflow/core/metadata/providers/imdb/imdb.py +112 -0
  80. plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
  81. plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
  82. plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
  83. plexflow/core/metadata/providers/plex/__init__.py +0 -0
  84. plexflow/core/metadata/providers/plex/datatypes.py +693 -0
  85. plexflow/core/metadata/providers/plex/plex.py +167 -0
  86. plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
  87. plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
  88. plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
  89. plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
  90. plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
  91. plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
  92. plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
  93. plexflow/core/metadata/providers/universal/__init__.py +0 -0
  94. plexflow/core/metadata/providers/universal/movie.py +130 -0
  95. plexflow/core/metadata/providers/universal/old.py +192 -0
  96. plexflow/core/metadata/providers/universal/show.py +107 -0
  97. plexflow/core/plex/__init__.py +0 -0
  98. plexflow/core/plex/api/context/authorized.py +15 -0
  99. plexflow/core/plex/api/context/discover.py +14 -0
  100. plexflow/core/plex/api/context/library.py +14 -0
  101. plexflow/core/plex/discover/__init__.py +0 -0
  102. plexflow/core/plex/discover/activity.py +448 -0
  103. plexflow/core/plex/discover/comment.py +89 -0
  104. plexflow/core/plex/discover/feed.py +11 -0
  105. plexflow/core/plex/hooks/__init__.py +0 -0
  106. plexflow/core/plex/hooks/plex_authorized.py +60 -0
  107. plexflow/core/plex/hooks/plexflow_database.py +6 -0
  108. plexflow/core/plex/library/__init__.py +0 -0
  109. plexflow/core/plex/library/library.py +103 -0
  110. plexflow/core/plex/token/__init__.py +0 -0
  111. plexflow/core/plex/token/auto_token.py +91 -0
  112. plexflow/core/plex/utils/__init__.py +0 -0
  113. plexflow/core/plex/utils/paginated.py +39 -0
  114. plexflow/core/plex/watchlist/__init__.py +0 -0
  115. plexflow/core/plex/watchlist/datatypes.py +124 -0
  116. plexflow/core/plex/watchlist/watchlist.py +23 -0
  117. plexflow/core/storage/__init__.py +0 -0
  118. plexflow/core/storage/object/__init__.py +0 -0
  119. plexflow/core/storage/object/plexflow_storage.py +143 -0
  120. plexflow/core/storage/object/redis_storage.py +169 -0
  121. plexflow/core/subtitles/__init__.py +0 -0
  122. plexflow/core/subtitles/providers/__init__.py +0 -0
  123. plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
  124. plexflow/core/subtitles/providers/oss/__init__.py +0 -0
  125. plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
  126. plexflow/core/subtitles/providers/oss/download.py +48 -0
  127. plexflow/core/subtitles/providers/oss/old.py +144 -0
  128. plexflow/core/subtitles/providers/oss/oss.py +400 -0
  129. plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
  130. plexflow/core/subtitles/providers/oss/search.py +52 -0
  131. plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
  132. plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
  133. plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
  134. plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
  135. plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
  136. plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
  137. plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
  138. plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
  139. plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
  140. plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
  141. plexflow/core/subtitles/results/__init__.py +0 -0
  142. plexflow/core/subtitles/results/subtitle.py +170 -0
  143. plexflow/core/torrents/__init__.py +0 -0
  144. plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
  145. plexflow/core/torrents/analyzers/analyzer.py +45 -0
  146. plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
  147. plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
  148. plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
  149. plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
  150. plexflow/core/torrents/auto/auto_torrents.py +29 -0
  151. plexflow/core/torrents/providers/__init__.py +0 -0
  152. plexflow/core/torrents/providers/ext/__init__.py +0 -0
  153. plexflow/core/torrents/providers/ext/ext.py +18 -0
  154. plexflow/core/torrents/providers/ext/utils.py +64 -0
  155. plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
  156. plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
  157. plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
  158. plexflow/core/torrents/providers/eztv/__init__.py +0 -0
  159. plexflow/core/torrents/providers/eztv/eztv.py +47 -0
  160. plexflow/core/torrents/providers/eztv/utils.py +83 -0
  161. plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
  162. plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
  163. plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
  164. plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
  165. plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
  166. plexflow/core/torrents/providers/snowfl/utils.py +59 -0
  167. plexflow/core/torrents/providers/tgx/__init__.py +0 -0
  168. plexflow/core/torrents/providers/tgx/context.py +50 -0
  169. plexflow/core/torrents/providers/tgx/dump.py +40 -0
  170. plexflow/core/torrents/providers/tgx/tgx.py +22 -0
  171. plexflow/core/torrents/providers/tgx/utils.py +61 -0
  172. plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
  173. plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
  174. plexflow/core/torrents/providers/therarbg/utils.py +61 -0
  175. plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
  176. plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
  177. plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
  178. plexflow/core/torrents/providers/tpb/__init__.py +0 -0
  179. plexflow/core/torrents/providers/tpb/tpb.py +17 -0
  180. plexflow/core/torrents/providers/tpb/utils.py +139 -0
  181. plexflow/core/torrents/providers/yts/__init__.py +0 -0
  182. plexflow/core/torrents/providers/yts/utils.py +57 -0
  183. plexflow/core/torrents/providers/yts/yts.py +31 -0
  184. plexflow/core/torrents/results/__init__.py +0 -0
  185. plexflow/core/torrents/results/torrent.py +165 -0
  186. plexflow/core/torrents/results/universal.py +220 -0
  187. plexflow/core/torrents/results/utils.py +15 -0
  188. plexflow/events/__init__.py +0 -0
  189. plexflow/events/download/__init__.py +0 -0
  190. plexflow/events/download/torrent_events.py +96 -0
  191. plexflow/events/publish/__init__.py +0 -0
  192. plexflow/events/publish/publish.py +34 -0
  193. plexflow/logging/__init__.py +0 -0
  194. plexflow/logging/log_setup.py +8 -0
  195. plexflow/spiders/quiet_logger.py +9 -0
  196. plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
  197. plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
  198. plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
  199. plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
  200. plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
  201. plexflow/spiders/tgx/settings.py +36 -0
  202. plexflow/spiders/tgx/spider.py +72 -0
  203. plexflow/utils/__init__.py +0 -0
  204. plexflow/utils/antibot/human_like_requests.py +122 -0
  205. plexflow/utils/api/__init__.py +0 -0
  206. plexflow/utils/api/context/http.py +62 -0
  207. plexflow/utils/api/rest/__init__.py +0 -0
  208. plexflow/utils/api/rest/antibot_restful.py +68 -0
  209. plexflow/utils/api/rest/restful.py +49 -0
  210. plexflow/utils/captcha/__init__.py +0 -0
  211. plexflow/utils/captcha/bypass/__init__.py +0 -0
  212. plexflow/utils/captcha/bypass/decode_audio.py +34 -0
  213. plexflow/utils/download/__init__.py +0 -0
  214. plexflow/utils/download/gz.py +26 -0
  215. plexflow/utils/filesystem/__init__.py +0 -0
  216. plexflow/utils/filesystem/search.py +129 -0
  217. plexflow/utils/gmail/__init__.py +0 -0
  218. plexflow/utils/gmail/mails.py +116 -0
  219. plexflow/utils/hooks/__init__.py +0 -0
  220. plexflow/utils/hooks/http.py +84 -0
  221. plexflow/utils/hooks/postgresql.py +93 -0
  222. plexflow/utils/hooks/redis.py +112 -0
  223. plexflow/utils/image/storage.py +36 -0
  224. plexflow/utils/imdb/__init__.py +0 -0
  225. plexflow/utils/imdb/imdb_codes.py +107 -0
  226. plexflow/utils/pubsub/consume.py +82 -0
  227. plexflow/utils/pubsub/produce.py +25 -0
  228. plexflow/utils/retry/__init__.py +0 -0
  229. plexflow/utils/retry/utils.py +38 -0
  230. plexflow/utils/strings/__init__.py +0 -0
  231. plexflow/utils/strings/filesize.py +55 -0
  232. plexflow/utils/strings/language.py +14 -0
  233. plexflow/utils/subtitle/search.py +76 -0
  234. plexflow/utils/tasks/decorators.py +78 -0
  235. plexflow/utils/tasks/k8s/task.py +70 -0
  236. plexflow/utils/thread_safe/safe_list.py +54 -0
  237. plexflow/utils/thread_safe/safe_set.py +69 -0
  238. plexflow/utils/torrent/__init__.py +0 -0
  239. plexflow/utils/torrent/analyze.py +118 -0
  240. plexflow/utils/torrent/extract/common.py +37 -0
  241. plexflow/utils/torrent/extract/ext.py +2391 -0
  242. plexflow/utils/torrent/extract/extratorrent.py +56 -0
  243. plexflow/utils/torrent/extract/kat.py +1581 -0
  244. plexflow/utils/torrent/extract/tgx.py +96 -0
  245. plexflow/utils/torrent/extract/therarbg.py +170 -0
  246. plexflow/utils/torrent/extract/torrentquest.py +171 -0
  247. plexflow/utils/torrent/files.py +36 -0
  248. plexflow/utils/torrent/hash.py +90 -0
  249. plexflow/utils/transcribe/__init__.py +0 -0
  250. plexflow/utils/transcribe/speech2text.py +40 -0
  251. plexflow/utils/video/__init__.py +0 -0
  252. plexflow/utils/video/subtitle.py +73 -0
  253. plexflow-0.0.64.dist-info/METADATA +71 -0
  254. plexflow-0.0.64.dist-info/RECORD +256 -0
  255. plexflow-0.0.64.dist-info/WHEEL +4 -0
  256. plexflow-0.0.64.dist-info/entry_points.txt +24 -0
@@ -0,0 +1,84 @@
1
+ import os
2
+ import yaml
3
+ import requests
4
+ from typing import Optional, Dict, Any
5
+ from airflow.providers.http.hooks.http import HttpHook
6
+
7
+ class UniversalHttpHook:
8
+ """
9
+ A universal HTTP hook that can work in Airflow as well as standalone.
10
+
11
+ When used with Airflow, connection details are fetched from Airflow Connections.
12
+ When used standalone, these details should be loaded from a YAML file named after the connection ID.
13
+
14
+ Args:
15
+ method (str): The HTTP method. Defaults to 'GET'.
16
+ http_conn_id (str, optional): The connection ID, used as Airflow connection ID or as the name for the YAML file. Defaults to None.
17
+ config_folder (str, optional): The folder where the YAML configuration file is located. Defaults to None.
18
+
19
+ Attributes:
20
+ hook (HttpHook, optional): The Airflow HttpHook instance.
21
+ session (requests.Session, optional): The requests Session instance.
22
+ config (dict, optional): The configuration loaded from the YAML file.
23
+
24
+ Examples:
25
+ Using UniversalHttpHook with Airflow:
26
+ hook = UniversalHttpHook(method='GET', http_conn_id='my_http_connection')
27
+ response = hook.run('/api/v1/resource')
28
+
29
+ Using UniversalHttpHook in standalone mode with a YAML configuration file:
30
+ hook = UniversalHttpHook(method='POST', http_conn_id='my_http_connection', config_folder='/path/to/configs')
31
+ response = hook.run('/api/v1/resource', data={'key': 'value'})
32
+ """
33
+
34
+ def __init__(self, method: str = 'GET', http_conn_id: Optional[str] = None, config_folder: Optional[str] = None):
35
+ self.method = method
36
+ self.http_conn_id = http_conn_id
37
+ self.config_folder = '' if config_folder is None else config_folder
38
+ if not self.config_folder:
39
+ self.hook = HttpHook(http_conn_id=self.http_conn_id, method=self.method)
40
+ else:
41
+ self.hook = None
42
+ if self.http_conn_id is None:
43
+ raise ValueError("http_conn_id must be provided when running in standalone mode")
44
+ self.session = requests.Session()
45
+ config_path = os.path.join(self.config_folder, f"{self.http_conn_id}.yaml")
46
+ with open(config_path, 'r') as file:
47
+ self.config = yaml.safe_load(file)
48
+
49
+ def get_conn(self, headers: Dict[str, str]) -> Any:
50
+ """
51
+ Establishes a connection for making HTTP requests.
52
+
53
+ Args:
54
+ headers (dict): The headers for the HTTP request.
55
+
56
+ Returns:
57
+ An object that can be used to make HTTP requests.
58
+ """
59
+ if self.hook:
60
+ return self.hook.get_conn(headers)
61
+ else:
62
+ self.session.headers.update(self.config['headers'])
63
+ return self.session
64
+
65
+ def run(self, endpoint: str, data: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None, extra_options: Optional[Dict[str, Any]] = None, query_params: Optional[Dict[str, str]] = None, json: Any = None) -> requests.Response:
66
+ """
67
+ Makes an HTTP request.
68
+
69
+ Args:
70
+ endpoint (str): The endpoint for the HTTP request.
71
+ data (dict, optional): The data for the HTTP request. Defaults to None.
72
+ headers (dict, optional): The headers for the HTTP request. Defaults to None.
73
+ extra_options (dict, optional): Extra options for the HTTP request. Defaults to None.
74
+ query_params (dict, optional): The query parameters for the HTTP request. Defaults to None.
75
+
76
+ Returns:
77
+ The response from the HTTP request.
78
+ """
79
+ if self.hook:
80
+ return self.hook.run(endpoint, data, headers, extra_options, query_params, json=json)
81
+ else:
82
+ url = self.config['base_url'] + endpoint
83
+ response = self.session.request(self.method, url, data=data, headers=headers, params=query_params, json=json)
84
+ return response
@@ -0,0 +1,93 @@
1
+ import os
2
+ import yaml
3
+ import psycopg2
4
+ from typing import Optional, Dict, Any, List
5
+ from airflow.providers.postgres.hooks.postgres import PostgresHook
6
+ from psycopg2.extras import DictCursor
7
+
8
+ class UniversalPostgresqlHook:
9
+ """
10
+ A universal PostgreSQL hook that can work in Airflow as well as standalone.
11
+
12
+ When used with Airflow, connection details are fetched from Airflow Connections.
13
+ When used standalone, these details should be loaded from a YAML file named after the connection ID.
14
+
15
+ Args:
16
+ postgres_conn_id (str, optional): The connection ID, used as Airflow connection ID or as the name for the YAML file. Defaults to None.
17
+ config_folder (str, optional): The folder where the YAML configuration file is located. Defaults to None.
18
+
19
+ Attributes:
20
+ hook (PostgresHook, optional): The Airflow PostgresHook instance.
21
+ conn (psycopg2.extensions.connection, optional): The psycopg2 connection instance.
22
+ config (dict, optional): The configuration loaded from the YAML file.
23
+ """
24
+
25
+ def __init__(self, postgres_conn_id: Optional[str] = None, config_folder: Optional[str] = None):
26
+ self.postgres_conn_id = postgres_conn_id
27
+ self.config_folder = '' if config_folder is None else config_folder
28
+ if self.config_folder:
29
+ if self.postgres_conn_id is None:
30
+ raise ValueError("postgres_conn_id must be provided when running in standalone mode")
31
+ config_path = os.path.join(self.config_folder, f"{self.postgres_conn_id}.yaml")
32
+ with open(config_path, 'r') as file:
33
+ self.config = yaml.safe_load(file)
34
+ self.conn = psycopg2.connect(
35
+ dbname=self.config['dbname'],
36
+ user=self.config['user'],
37
+ password=self.config['password'],
38
+ host=self.config['host'],
39
+ port=self.config['port']
40
+ )
41
+ else:
42
+ self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id)
43
+ self.conn = None
44
+
45
+ def get_conn(self) -> Any:
46
+ """
47
+ Establishes a connection to the PostgreSQL database.
48
+
49
+ Returns:
50
+ A connection object that can be used to interact with the database.
51
+ """
52
+ if self.conn:
53
+ return self.conn
54
+ else:
55
+ return self.hook.get_conn()
56
+
57
+ def get_first(self, sql: str, params: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
58
+ """
59
+ Executes the SQL query and returns the first result.
60
+
61
+ Args:
62
+ sql (str): The SQL query to execute.
63
+ params (dict, optional): The parameters to substitute into the SQL query.
64
+
65
+ Returns:
66
+ The first result from the executed SQL query as a dictionary where keys are column names and values are column values.
67
+ """
68
+ if self.conn:
69
+ with self.conn.cursor(cursor_factory=DictCursor) as cur:
70
+ cur.execute(sql, params)
71
+ result = cur.fetchone()
72
+ return dict(result) if result else None
73
+ else:
74
+ return self.hook.get_first(sql, parameters=params)
75
+
76
+ def get_all(self, sql: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
77
+ """
78
+ Executes the SQL query and returns all results.
79
+
80
+ Args:
81
+ sql (str): The SQL query to execute.
82
+ params (dict, optional): The parameters to substitute into the SQL query.
83
+
84
+ Returns:
85
+ All results from the executed SQL query as a list of dictionaries where keys are column names and values are column values.
86
+ """
87
+ if self.conn:
88
+ with self.conn.cursor(cursor_factory=DictCursor) as cur:
89
+ cur.execute(sql, params)
90
+ result = cur.fetchall()
91
+ return [dict(row) for row in result] if result else []
92
+ else:
93
+ return self.hook.get_records(sql, parameters=params)
@@ -0,0 +1,112 @@
1
+ import os
2
+ import yaml
3
+ import redis
4
+ from typing import Optional, Dict, Any
5
+ from airflow.providers.redis.hooks.redis import RedisHook
6
+
7
+ class UniversalRedisHook:
8
+ """
9
+ A universal Redis hook that can work in Airflow as well as standalone.
10
+
11
+ When used with Airflow, connection details are fetched from Airflow Connections.
12
+ When used standalone, these details should be loaded from a YAML file named after the connection ID.
13
+
14
+ Args:
15
+ redis_conn_id (str, optional): The connection ID, used as Airflow connection ID or as the name for the YAML file. Defaults to None.
16
+ config_folder (str, optional): The folder where the YAML configuration file is located. Defaults to None.
17
+
18
+ Attributes:
19
+ hook (RedisHook, optional): The Airflow RedisHook instance.
20
+ redis_client (redis.Redis, optional): The Redis client instance.
21
+ config (dict, optional): The configuration loaded from the YAML file.
22
+
23
+ Examples:
24
+ Using UniversalRedisHook with Airflow:
25
+ hook = UniversalRedisHook(redis_conn_id='my_redis_connection')
26
+ response = hook.run('GET', 'my_key')
27
+
28
+ Using UniversalRedisHook in standalone mode with a YAML configuration file:
29
+ hook = UniversalRedisHook(redis_conn_id='my_redis_connection', config_folder='/path/to/configs')
30
+ response = hook.run('SET', 'my_key', 'my_value')
31
+ """
32
+
33
+ def __init__(self, redis_conn_id: Optional[str] = None, config_folder: Optional[str] = None):
34
+ self.redis_conn_id = redis_conn_id
35
+ self.config_folder = '' if config_folder is None else config_folder
36
+ if not self.config_folder:
37
+ self.hook = RedisHook(redis_conn_id=self.redis_conn_id)
38
+ else:
39
+ self.hook = None
40
+ if self.redis_conn_id is None:
41
+ raise ValueError("redis_conn_id must be provided when running in standalone mode")
42
+ config_path = os.path.join(self.config_folder, f"{self.redis_conn_id}.yaml")
43
+ with open(config_path, 'r') as file:
44
+ self.config = yaml.safe_load(file)
45
+ self.redis_client = redis.Redis(**self.config)
46
+
47
+ def get_conn(self) -> Any:
48
+ """
49
+ Establishes a connection to Redis.
50
+
51
+ Returns:
52
+ An object that can be used to interact with Redis.
53
+ """
54
+ if self.hook:
55
+ return self.hook.get_conn()
56
+ else:
57
+ return self.redis_client
58
+
59
+ def run(self, command: str, *args, **kwargs) -> Any:
60
+ """
61
+ Executes a Redis command.
62
+
63
+ Args:
64
+ command (str): The Redis command to execute.
65
+ *args: Positional arguments for the Redis command.
66
+ **kwargs: Keyword arguments for the Redis command.
67
+
68
+ Returns:
69
+ The result of the Redis command.
70
+ """
71
+ if self.hook:
72
+ return self.hook.run(command, *args, **kwargs)
73
+ else:
74
+ return getattr(self.redis_client, command)(*args, **kwargs)
75
+
76
+ def get(self, key: str) -> Optional[str]:
77
+ """
78
+ Get the value of a key.
79
+
80
+ Args:
81
+ key (str): The key to retrieve.
82
+
83
+ Returns:
84
+ The value associated with the key, or None if the key does not exist.
85
+ """
86
+ if self.hook:
87
+ return self.hook.get_conn().get(key)
88
+ else:
89
+ return self.redis_client.get(key)
90
+
91
+ def set(self, key: str, value: str, ex: Optional[int] = None) -> bool:
92
+ """
93
+ Set the value of a key.
94
+
95
+ Args:
96
+ key (str): The key to set.
97
+ value (str): The value to associate with the key.
98
+ expiration (int, optional): The expiration time in seconds. Defaults to None.
99
+
100
+ Returns:
101
+ True if the key was set successfully, False otherwise.
102
+ """
103
+ if self.hook:
104
+ if ex is None:
105
+ return self.hook.get_conn().set(key, value)
106
+ else:
107
+ return self.hook.get_conn().set(key, value, ex=ex)
108
+ else:
109
+ if ex is None:
110
+ return self.redis_client.set(key, value)
111
+ else:
112
+ return self.redis_client.set(key, value, ex=ex)
@@ -0,0 +1,36 @@
1
+ import os
2
+ import cloudinary
3
+ import cloudinary.uploader
4
+ import cloudinary.api
5
+
6
+ # Retrieve Cloudinary credentials from environment variables
7
+ cloud_name = os.getenv('CLOUDINARY_CLOUD_NAME')
8
+ api_key = os.getenv('CLOUDINARY_API_KEY')
9
+ api_secret = os.getenv('CLOUDINARY_API_SECRET')
10
+
11
+ # Configure Cloudinary
12
+ cloudinary.config(
13
+ cloud_name = cloud_name,
14
+ api_key = api_key,
15
+ api_secret = api_secret
16
+ )
17
+
18
+ def upload_image(image, **kwargs):
19
+ """
20
+ Uploads an image to Cloudinary and returns the URL of the uploaded image.
21
+ The image can be provided as a file path or bytes.
22
+ Additional arguments can be passed to the Cloudinary uploader.
23
+
24
+ :param image: Path to the image file or bytes of the image to be uploaded
25
+ :param kwargs: Additional arguments for the Cloudinary uploader
26
+ :return: URL of the uploaded image
27
+ """
28
+ try:
29
+ # Ensure the image is private and signed by default
30
+ kwargs.setdefault('type', 'private')
31
+ kwargs.setdefault('sign_url', True)
32
+
33
+ response = cloudinary.uploader.upload(image, **kwargs)
34
+ return response
35
+ except Exception as e:
36
+ raise RuntimeError(f"An error occurred: {e}")
File without changes
@@ -0,0 +1,107 @@
1
+ import re
2
+
3
+ class IMDbCode:
4
+ """A class to represent an IMDb code.
5
+
6
+ Attributes:
7
+ code (str): A string representing the IMDb code.
8
+
9
+ Methods:
10
+ __str__(): Returns the IMDb code as a string.
11
+ __eq__(other): Compares the IMDb code with another IMDb code or string.
12
+ normalize_code(code: str) -> str: Normalizes an IMDb code by removing the leading 'tt' and any leading zeros.
13
+ """
14
+
15
+ def __init__(self, code):
16
+ """
17
+ Constructs an IMDbCode instance with the provided IMDb code.
18
+
19
+ Parameters
20
+ ----------
21
+ code : str
22
+ a string representing the IMDb code
23
+
24
+ Raises
25
+ ------
26
+ ValueError
27
+ If `code` is not a string.
28
+ """
29
+ if not isinstance(code, str):
30
+ raise ValueError("IMDb code must be a string.") from None
31
+
32
+ self.code = code.lower()
33
+
34
+ def __str__(self):
35
+ """Returns the IMDb code as a string."""
36
+ return self.code
37
+
38
+ def __eq__(self, other):
39
+ """
40
+ Compares the IMDb code with another IMDb code or string.
41
+
42
+ Parameters
43
+ ----------
44
+ other : IMDbCode or str
45
+ the other IMDb code or string to compare with
46
+
47
+ Returns
48
+ -------
49
+ bool
50
+ True if the IMDb codes are equal, False otherwise
51
+
52
+ Raises
53
+ ------
54
+ TypeError
55
+ If `other` is not an IMDbCode or a string.
56
+ """
57
+ if isinstance(other, IMDbCode):
58
+ return self.normalize_code(self.code) == self.normalize_code(other.code)
59
+ elif isinstance(other, str):
60
+ return self.normalize_code(self.code) == self.normalize_code(other)
61
+ else:
62
+ raise TypeError("Can only compare IMDbCode with another IMDbCode or a string.") from None
63
+
64
+ @staticmethod
65
+ def normalize_code(code):
66
+ """
67
+ Normalizes an IMDb code by removing the leading 'tt' and any leading zeros.
68
+
69
+ Parameters
70
+ ----------
71
+ code : str
72
+ the IMDb code to normalize
73
+
74
+ Returns
75
+ -------
76
+ str
77
+ the normalized IMDb code
78
+ """
79
+ # Remove leading 'tt' if present
80
+ if code.startswith('tt'):
81
+ code = code[2:]
82
+ # Remove leading zeros
83
+ code = code.lstrip('0')
84
+ return code
85
+
86
+
87
+ def extract_imdb_code(s: str):
88
+ """
89
+ Generator function to extract all IMDB codes from a given string.
90
+
91
+ IMDB codes are assumed to follow the format 'tt' followed by 7 or more digits.
92
+
93
+ Args:
94
+ s (str): The string to search for IMDB codes.
95
+
96
+ Yields:
97
+ str: The next IMDB code found in the string. If no code is found, the function simply returns.
98
+
99
+ Examples:
100
+ >>> list(extract_imdb_code('https://www.imdb.com/title/tt0111161/ and https://www.imdb.com/title/tt006864600/'))
101
+ ['tt0111161', 'tt006864600']
102
+
103
+ >>> list(extract_imdb_code('this string has no imdb code'))
104
+ []
105
+ """
106
+ for match in re.finditer(r'tt\d{7,}', s):
107
+ yield match.group(0)
@@ -0,0 +1,82 @@
1
+ import json
2
+ import logging
3
+ from confluent_kafka import Consumer
4
+ import os
5
+
6
+ def consume_message(topics_with_priority, group_id: str, as_json: bool = False, wait_time: float = 10.0, max_poll_attempts: int = 1):
7
+ return consume_messages(
8
+ topics_with_priority=topics_with_priority,
9
+ group_id=group_id,
10
+ as_json=as_json,
11
+ wait_time=wait_time,
12
+ max_messages=1,
13
+ max_poll_attempts=max_poll_attempts
14
+ )
15
+
16
+
17
+ def consume_messages(topics_with_priority, group_id: str, as_json: bool = False, wait_time: float = 10.0, max_messages: int = 1, max_poll_attempts: int = 5):
18
+ if max_messages is None:
19
+ raise ValueError("max_messages cannot be None to avoid infinite message consumption")
20
+
21
+ logging.info("Consuming messages from topics...")
22
+ consumer = Consumer({
23
+ 'bootstrap.servers': os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'localhost:9092'),
24
+ 'group.id': group_id,
25
+ 'auto.offset.reset': 'earliest',
26
+ 'enable.auto.commit': True, # Disable auto commit
27
+ 'auto.commit.interval.ms': 1000
28
+ })
29
+
30
+ # Sort topics by priority (assuming lower number means higher priority)
31
+ sorted_topic_names = (
32
+ [topics_with_priority] if isinstance(topics_with_priority, str)
33
+ else [topic['name'] for topic in sorted(topics_with_priority, key=lambda x: x['priority'])]
34
+ )
35
+
36
+ consumed_messages = []
37
+ consumed_message_count = 0
38
+
39
+ for topic_name in sorted_topic_names:
40
+ logging.info(f"Subscribing to topic: {topic_name}")
41
+ consumer.subscribe([topic_name])
42
+
43
+ current_poll_attempts = 0
44
+ while consumed_message_count <= max_messages and current_poll_attempts < max_poll_attempts:
45
+ logging.info(f"Polling messages from topic: {topic_name}")
46
+ message = consumer.poll(timeout=wait_time)
47
+
48
+ if message is None:
49
+ logging.info(f"No messages found in topic: {topic_name}")
50
+ current_poll_attempts += 1
51
+ continue
52
+
53
+ if message.error():
54
+ logging.error(f"Consumer error: {message.error()}")
55
+ current_poll_attempts += 1
56
+ continue
57
+
58
+ decoded_message = message.value().decode('utf-8')
59
+ logging.info(f"Consumed message from topic: {message.topic()}")
60
+
61
+ if as_json:
62
+ try:
63
+ decoded_message = json.loads(decoded_message)
64
+ except json.JSONDecodeError as e:
65
+ logging.error(f"JSON decode error: {e}")
66
+ decoded_message = None
67
+
68
+ consumed_messages.append(decoded_message)
69
+ consumed_message_count += 1
70
+
71
+ if consumed_message_count >= max_messages:
72
+ break
73
+
74
+ if consumed_message_count >= max_messages:
75
+ break
76
+
77
+ # Commit offsets once at the end
78
+ logging.info("Committing offsets for all consumed messages")
79
+ consumer.commit()
80
+ consumer.close()
81
+ logging.info(f"Consumed {consumed_message_count} messages.")
82
+ return consumed_messages if max_messages > 1 else (consumed_messages[0] if consumed_messages else None)
@@ -0,0 +1,25 @@
1
+ import json
2
+ import logging
3
+ from confluent_kafka import Producer
4
+ import os
5
+
6
+ def delivery_report(err, msg):
7
+ if err is not None:
8
+ logging.error(f"Message delivery failed: {err}")
9
+ else:
10
+ logging.info(f"Message delivered to {msg.topic()} [{msg.partition()}] at offset {msg.offset()}")
11
+
12
+ def produce_message(topic: str, message):
13
+ logging.info(f"Publishing message to {topic}...")
14
+ producer = Producer({'bootstrap.servers': os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'localhost:9092')})
15
+ producer.produce(topic, json.dumps(message).encode('utf-8'), callback=delivery_report)
16
+ producer.flush()
17
+ logging.info(f"Message published to {topic}.")
18
+
19
+ def produce_messages(topic: str, messages):
20
+ logging.info(f"Publishing messages to {topic}...")
21
+ producer = Producer({'bootstrap.servers': os.getenv('KAFKA_BOOTSTRAP_SERVERS', 'localhost:9092')})
22
+ for message in messages:
23
+ producer.produce(topic, json.dumps(message).encode('utf-8'), callback=delivery_report)
24
+ producer.flush()
25
+ logging.info(f"All messages published to {topic}.")
File without changes
@@ -0,0 +1,38 @@
1
+ import time
2
+ from retrying import retry
3
+
4
+ def execute_until_success(func, delay_type, delay, max_retries=None, retry_exceptions=None, *args, **kwargs):
5
+ """
6
+ This function executes a given function until it succeeds and no exceptions are thrown.
7
+
8
+ Parameters:
9
+ func (function): The function to be executed.
10
+ delay_type (str): The type of delay between attempts. Can be 'constant' or 'exponential'.
11
+ delay (int): The delay in seconds.
12
+ max_retries (int, optional): The maximum number of retries. If not set, it will keep trying until the function succeeds.
13
+ retry_exceptions (tuple, optional): The exceptions on which to retry. If not provided, retry will be attempted for any exception.
14
+ *args: Variable length argument list for the function.
15
+ **kwargs: Arbitrary keyword arguments for the function.
16
+
17
+ Returns:
18
+ The return value of the function, if it succeeds.
19
+ """
20
+
21
+ def retry_on_exception(exc):
22
+ """This function will be used to determine whether to retry if the function raises an exception."""
23
+ if retry_exceptions is None or isinstance(exc, tuple(retry_exceptions)):
24
+ return True # Retry for specified exceptions or any exception
25
+ return False
26
+
27
+ def wait_strategy(attempt_number, delay_since_first_attempt_ms):
28
+ """This function determines the delay between retries."""
29
+ if delay_type == 'constant':
30
+ return delay # Constant delay
31
+ elif delay_type == 'exponential':
32
+ return 2 ** attempt_number # Exponential backoff
33
+
34
+ @retry(retry_on_exception=retry_on_exception, wait_func=wait_strategy, stop_max_attempt_number=max_retries)
35
+ def func_with_retry(*args, **kwargs):
36
+ return func(*args, **kwargs)
37
+
38
+ return func_with_retry(*args, **kwargs)
File without changes
@@ -0,0 +1,55 @@
1
+ import re
2
+ import bitmath
3
+ from humanfriendly import parse_size as parse_size_to_bytes
4
+
5
+ def parse_size(sentence):
6
+ """
7
+ Parses the sizes from a sentence and returns them in bytes.
8
+
9
+ Args:
10
+ sentence (str): The sentence containing sizes to be parsed.
11
+
12
+ Returns:
13
+ list: A list of sizes in bytes.
14
+
15
+ """
16
+ try:
17
+ size = bitmath.parse_string(sentence)
18
+ return [size.to_Byte().value]
19
+ except ValueError:
20
+ pass
21
+
22
+ # Regular expression pattern for a size with optional space between number and unit
23
+ pattern = r'\b\d+(?:\.\d+)?\s*[KkMmGgTtPpEeZzYy]?[i]?[Bb]?\b'
24
+
25
+ # Find all sizes in the sentence
26
+ matches = re.findall(pattern, sentence, re.IGNORECASE)
27
+
28
+ sizes = []
29
+ for match in matches:
30
+ try:
31
+ # Remove any spaces within the match to ensure bitmath can parse it
32
+ size_str = match.replace(" ", "")
33
+
34
+ try:
35
+ # Parse the size to a bitmath object
36
+ size = bitmath.parse_string(size_str)
37
+
38
+ # Convert the size to bytes and return
39
+ sizes.append(size.to_Byte().value)
40
+ except Exception as e:
41
+ print(e)
42
+ size = parse_size_to_bytes(size_str)
43
+ sizes.append(size)
44
+
45
+ except ValueError as e:
46
+ print(f"Error parsing size: {e}")
47
+
48
+ return sizes
49
+
50
+
51
+ if __name__ == '__main__':
52
+ # Test the parse_size function
53
+ sizes = parse_size('My size is 6.40 GB')
54
+
55
+ print(sizes)
@@ -0,0 +1,14 @@
1
+
2
+ _language_mapping = {
3
+ "dutch": "nl",
4
+ "dut": "nl",
5
+ "eng": "en",
6
+ "english": "en",
7
+ "nld": "nl",
8
+ "ned": "nl",
9
+ "nl": "nl",
10
+ "en": "en",
11
+ }
12
+
13
+ def get_language_code(language: str) -> str:
14
+ return _language_mapping.get(language.lower(), "unk")