plexflow 0.0.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (256) hide show
  1. plexflow/__init__.py +0 -0
  2. plexflow/__main__.py +15 -0
  3. plexflow/core/.DS_Store +0 -0
  4. plexflow/core/__init__.py +0 -0
  5. plexflow/core/context/__init__.py +0 -0
  6. plexflow/core/context/metadata/__init__.py +0 -0
  7. plexflow/core/context/metadata/context.py +32 -0
  8. plexflow/core/context/metadata/tmdb/__init__.py +0 -0
  9. plexflow/core/context/metadata/tmdb/context.py +45 -0
  10. plexflow/core/context/partial_context.py +46 -0
  11. plexflow/core/context/partials/__init__.py +8 -0
  12. plexflow/core/context/partials/cache.py +16 -0
  13. plexflow/core/context/partials/context.py +12 -0
  14. plexflow/core/context/partials/ids.py +37 -0
  15. plexflow/core/context/partials/movie.py +115 -0
  16. plexflow/core/context/partials/tgx_batch.py +33 -0
  17. plexflow/core/context/partials/tgx_context.py +34 -0
  18. plexflow/core/context/partials/torrents.py +23 -0
  19. plexflow/core/context/partials/watchlist.py +35 -0
  20. plexflow/core/context/plexflow_context.py +29 -0
  21. plexflow/core/context/plexflow_property.py +36 -0
  22. plexflow/core/context/root/__init__.py +0 -0
  23. plexflow/core/context/root/context.py +25 -0
  24. plexflow/core/context/select/__init__.py +0 -0
  25. plexflow/core/context/select/context.py +45 -0
  26. plexflow/core/context/torrent/__init__.py +0 -0
  27. plexflow/core/context/torrent/context.py +43 -0
  28. plexflow/core/context/torrent/tpb/__init__.py +0 -0
  29. plexflow/core/context/torrent/tpb/context.py +45 -0
  30. plexflow/core/context/torrent/yts/__init__.py +0 -0
  31. plexflow/core/context/torrent/yts/context.py +45 -0
  32. plexflow/core/context/watchlist/__init__.py +0 -0
  33. plexflow/core/context/watchlist/context.py +46 -0
  34. plexflow/core/downloads/__init__.py +0 -0
  35. plexflow/core/downloads/candidates/__init__.py +0 -0
  36. plexflow/core/downloads/candidates/download_candidate.py +210 -0
  37. plexflow/core/downloads/candidates/filtered.py +51 -0
  38. plexflow/core/downloads/candidates/utils.py +39 -0
  39. plexflow/core/env/__init__.py +0 -0
  40. plexflow/core/env/env.py +31 -0
  41. plexflow/core/genai/__init__.py +0 -0
  42. plexflow/core/genai/bot.py +9 -0
  43. plexflow/core/genai/plexa.py +54 -0
  44. plexflow/core/genai/torrent/imdb_verify.py +65 -0
  45. plexflow/core/genai/torrent/movie.py +25 -0
  46. plexflow/core/genai/utils/__init__.py +0 -0
  47. plexflow/core/genai/utils/loader.py +5 -0
  48. plexflow/core/metadata/__init__.py +0 -0
  49. plexflow/core/metadata/auto/__init__.py +0 -0
  50. plexflow/core/metadata/auto/auto_meta.py +40 -0
  51. plexflow/core/metadata/auto/auto_providers/__init__.py +0 -0
  52. plexflow/core/metadata/auto/auto_providers/auto/__init__.py +0 -0
  53. plexflow/core/metadata/auto/auto_providers/auto/episode.py +49 -0
  54. plexflow/core/metadata/auto/auto_providers/auto/item.py +55 -0
  55. plexflow/core/metadata/auto/auto_providers/auto/movie.py +13 -0
  56. plexflow/core/metadata/auto/auto_providers/auto/season.py +43 -0
  57. plexflow/core/metadata/auto/auto_providers/auto/show.py +26 -0
  58. plexflow/core/metadata/auto/auto_providers/imdb/__init__.py +0 -0
  59. plexflow/core/metadata/auto/auto_providers/imdb/movie.py +36 -0
  60. plexflow/core/metadata/auto/auto_providers/imdb/show.py +45 -0
  61. plexflow/core/metadata/auto/auto_providers/moviemeter/__init__.py +0 -0
  62. plexflow/core/metadata/auto/auto_providers/moviemeter/movie.py +40 -0
  63. plexflow/core/metadata/auto/auto_providers/plex/__init__.py +0 -0
  64. plexflow/core/metadata/auto/auto_providers/plex/movie.py +39 -0
  65. plexflow/core/metadata/auto/auto_providers/tmdb/__init__.py +0 -0
  66. plexflow/core/metadata/auto/auto_providers/tmdb/episode.py +30 -0
  67. plexflow/core/metadata/auto/auto_providers/tmdb/movie.py +36 -0
  68. plexflow/core/metadata/auto/auto_providers/tmdb/season.py +23 -0
  69. plexflow/core/metadata/auto/auto_providers/tmdb/show.py +41 -0
  70. plexflow/core/metadata/auto/auto_providers/tmdb.py +92 -0
  71. plexflow/core/metadata/auto/auto_providers/tvdb/__init__.py +0 -0
  72. plexflow/core/metadata/auto/auto_providers/tvdb/episode.py +28 -0
  73. plexflow/core/metadata/auto/auto_providers/tvdb/movie.py +36 -0
  74. plexflow/core/metadata/auto/auto_providers/tvdb/season.py +25 -0
  75. plexflow/core/metadata/auto/auto_providers/tvdb/show.py +41 -0
  76. plexflow/core/metadata/providers/__init__.py +0 -0
  77. plexflow/core/metadata/providers/imdb/__init__.py +0 -0
  78. plexflow/core/metadata/providers/imdb/datatypes.py +53 -0
  79. plexflow/core/metadata/providers/imdb/imdb.py +112 -0
  80. plexflow/core/metadata/providers/moviemeter/__init__.py +0 -0
  81. plexflow/core/metadata/providers/moviemeter/datatypes.py +111 -0
  82. plexflow/core/metadata/providers/moviemeter/moviemeter.py +42 -0
  83. plexflow/core/metadata/providers/plex/__init__.py +0 -0
  84. plexflow/core/metadata/providers/plex/datatypes.py +693 -0
  85. plexflow/core/metadata/providers/plex/plex.py +167 -0
  86. plexflow/core/metadata/providers/tmdb/__init__.py +0 -0
  87. plexflow/core/metadata/providers/tmdb/datatypes.py +460 -0
  88. plexflow/core/metadata/providers/tmdb/tmdb.py +85 -0
  89. plexflow/core/metadata/providers/tvdb/__init__.py +0 -0
  90. plexflow/core/metadata/providers/tvdb/datatypes.py +257 -0
  91. plexflow/core/metadata/providers/tvdb/tv_datatypes.py +554 -0
  92. plexflow/core/metadata/providers/tvdb/tvdb.py +65 -0
  93. plexflow/core/metadata/providers/universal/__init__.py +0 -0
  94. plexflow/core/metadata/providers/universal/movie.py +130 -0
  95. plexflow/core/metadata/providers/universal/old.py +192 -0
  96. plexflow/core/metadata/providers/universal/show.py +107 -0
  97. plexflow/core/plex/__init__.py +0 -0
  98. plexflow/core/plex/api/context/authorized.py +15 -0
  99. plexflow/core/plex/api/context/discover.py +14 -0
  100. plexflow/core/plex/api/context/library.py +14 -0
  101. plexflow/core/plex/discover/__init__.py +0 -0
  102. plexflow/core/plex/discover/activity.py +448 -0
  103. plexflow/core/plex/discover/comment.py +89 -0
  104. plexflow/core/plex/discover/feed.py +11 -0
  105. plexflow/core/plex/hooks/__init__.py +0 -0
  106. plexflow/core/plex/hooks/plex_authorized.py +60 -0
  107. plexflow/core/plex/hooks/plexflow_database.py +6 -0
  108. plexflow/core/plex/library/__init__.py +0 -0
  109. plexflow/core/plex/library/library.py +103 -0
  110. plexflow/core/plex/token/__init__.py +0 -0
  111. plexflow/core/plex/token/auto_token.py +91 -0
  112. plexflow/core/plex/utils/__init__.py +0 -0
  113. plexflow/core/plex/utils/paginated.py +39 -0
  114. plexflow/core/plex/watchlist/__init__.py +0 -0
  115. plexflow/core/plex/watchlist/datatypes.py +124 -0
  116. plexflow/core/plex/watchlist/watchlist.py +23 -0
  117. plexflow/core/storage/__init__.py +0 -0
  118. plexflow/core/storage/object/__init__.py +0 -0
  119. plexflow/core/storage/object/plexflow_storage.py +143 -0
  120. plexflow/core/storage/object/redis_storage.py +169 -0
  121. plexflow/core/subtitles/__init__.py +0 -0
  122. plexflow/core/subtitles/providers/__init__.py +0 -0
  123. plexflow/core/subtitles/providers/auto_subtitles.py +48 -0
  124. plexflow/core/subtitles/providers/oss/__init__.py +0 -0
  125. plexflow/core/subtitles/providers/oss/datatypes.py +104 -0
  126. plexflow/core/subtitles/providers/oss/download.py +48 -0
  127. plexflow/core/subtitles/providers/oss/old.py +144 -0
  128. plexflow/core/subtitles/providers/oss/oss.py +400 -0
  129. plexflow/core/subtitles/providers/oss/oss_subtitle.py +32 -0
  130. plexflow/core/subtitles/providers/oss/search.py +52 -0
  131. plexflow/core/subtitles/providers/oss/unlimited_oss.py +231 -0
  132. plexflow/core/subtitles/providers/oss/utils/__init__.py +0 -0
  133. plexflow/core/subtitles/providers/oss/utils/config.py +63 -0
  134. plexflow/core/subtitles/providers/oss/utils/download_client.py +22 -0
  135. plexflow/core/subtitles/providers/oss/utils/exceptions.py +35 -0
  136. plexflow/core/subtitles/providers/oss/utils/file_utils.py +83 -0
  137. plexflow/core/subtitles/providers/oss/utils/languages.py +78 -0
  138. plexflow/core/subtitles/providers/oss/utils/response_base.py +221 -0
  139. plexflow/core/subtitles/providers/oss/utils/responses.py +176 -0
  140. plexflow/core/subtitles/providers/oss/utils/srt.py +561 -0
  141. plexflow/core/subtitles/results/__init__.py +0 -0
  142. plexflow/core/subtitles/results/subtitle.py +170 -0
  143. plexflow/core/torrents/__init__.py +0 -0
  144. plexflow/core/torrents/analyzers/analyzed_torrent.py +143 -0
  145. plexflow/core/torrents/analyzers/analyzer.py +45 -0
  146. plexflow/core/torrents/analyzers/torrentquest/analyzer.py +47 -0
  147. plexflow/core/torrents/auto/auto_providers/auto/__init__.py +0 -0
  148. plexflow/core/torrents/auto/auto_providers/auto/torrent.py +64 -0
  149. plexflow/core/torrents/auto/auto_providers/tpb/torrent.py +62 -0
  150. plexflow/core/torrents/auto/auto_torrents.py +29 -0
  151. plexflow/core/torrents/providers/__init__.py +0 -0
  152. plexflow/core/torrents/providers/ext/__init__.py +0 -0
  153. plexflow/core/torrents/providers/ext/ext.py +18 -0
  154. plexflow/core/torrents/providers/ext/utils.py +64 -0
  155. plexflow/core/torrents/providers/extratorrent/__init__.py +0 -0
  156. plexflow/core/torrents/providers/extratorrent/extratorrent.py +21 -0
  157. plexflow/core/torrents/providers/extratorrent/utils.py +66 -0
  158. plexflow/core/torrents/providers/eztv/__init__.py +0 -0
  159. plexflow/core/torrents/providers/eztv/eztv.py +47 -0
  160. plexflow/core/torrents/providers/eztv/utils.py +83 -0
  161. plexflow/core/torrents/providers/rarbg2/__init__.py +0 -0
  162. plexflow/core/torrents/providers/rarbg2/rarbg2.py +19 -0
  163. plexflow/core/torrents/providers/rarbg2/utils.py +76 -0
  164. plexflow/core/torrents/providers/snowfl/__init__.py +0 -0
  165. plexflow/core/torrents/providers/snowfl/snowfl.py +36 -0
  166. plexflow/core/torrents/providers/snowfl/utils.py +59 -0
  167. plexflow/core/torrents/providers/tgx/__init__.py +0 -0
  168. plexflow/core/torrents/providers/tgx/context.py +50 -0
  169. plexflow/core/torrents/providers/tgx/dump.py +40 -0
  170. plexflow/core/torrents/providers/tgx/tgx.py +22 -0
  171. plexflow/core/torrents/providers/tgx/utils.py +61 -0
  172. plexflow/core/torrents/providers/therarbg/__init__.py +0 -0
  173. plexflow/core/torrents/providers/therarbg/therarbg.py +17 -0
  174. plexflow/core/torrents/providers/therarbg/utils.py +61 -0
  175. plexflow/core/torrents/providers/torrentquest/__init__.py +0 -0
  176. plexflow/core/torrents/providers/torrentquest/torrentquest.py +20 -0
  177. plexflow/core/torrents/providers/torrentquest/utils.py +70 -0
  178. plexflow/core/torrents/providers/tpb/__init__.py +0 -0
  179. plexflow/core/torrents/providers/tpb/tpb.py +17 -0
  180. plexflow/core/torrents/providers/tpb/utils.py +139 -0
  181. plexflow/core/torrents/providers/yts/__init__.py +0 -0
  182. plexflow/core/torrents/providers/yts/utils.py +57 -0
  183. plexflow/core/torrents/providers/yts/yts.py +31 -0
  184. plexflow/core/torrents/results/__init__.py +0 -0
  185. plexflow/core/torrents/results/torrent.py +165 -0
  186. plexflow/core/torrents/results/universal.py +220 -0
  187. plexflow/core/torrents/results/utils.py +15 -0
  188. plexflow/events/__init__.py +0 -0
  189. plexflow/events/download/__init__.py +0 -0
  190. plexflow/events/download/torrent_events.py +96 -0
  191. plexflow/events/publish/__init__.py +0 -0
  192. plexflow/events/publish/publish.py +34 -0
  193. plexflow/logging/__init__.py +0 -0
  194. plexflow/logging/log_setup.py +8 -0
  195. plexflow/spiders/quiet_logger.py +9 -0
  196. plexflow/spiders/tgx/pipelines/dump_json_pipeline.py +30 -0
  197. plexflow/spiders/tgx/pipelines/meta_pipeline.py +13 -0
  198. plexflow/spiders/tgx/pipelines/publish_pipeline.py +14 -0
  199. plexflow/spiders/tgx/pipelines/torrent_info_pipeline.py +12 -0
  200. plexflow/spiders/tgx/pipelines/validation_pipeline.py +17 -0
  201. plexflow/spiders/tgx/settings.py +36 -0
  202. plexflow/spiders/tgx/spider.py +72 -0
  203. plexflow/utils/__init__.py +0 -0
  204. plexflow/utils/antibot/human_like_requests.py +122 -0
  205. plexflow/utils/api/__init__.py +0 -0
  206. plexflow/utils/api/context/http.py +62 -0
  207. plexflow/utils/api/rest/__init__.py +0 -0
  208. plexflow/utils/api/rest/antibot_restful.py +68 -0
  209. plexflow/utils/api/rest/restful.py +49 -0
  210. plexflow/utils/captcha/__init__.py +0 -0
  211. plexflow/utils/captcha/bypass/__init__.py +0 -0
  212. plexflow/utils/captcha/bypass/decode_audio.py +34 -0
  213. plexflow/utils/download/__init__.py +0 -0
  214. plexflow/utils/download/gz.py +26 -0
  215. plexflow/utils/filesystem/__init__.py +0 -0
  216. plexflow/utils/filesystem/search.py +129 -0
  217. plexflow/utils/gmail/__init__.py +0 -0
  218. plexflow/utils/gmail/mails.py +116 -0
  219. plexflow/utils/hooks/__init__.py +0 -0
  220. plexflow/utils/hooks/http.py +84 -0
  221. plexflow/utils/hooks/postgresql.py +93 -0
  222. plexflow/utils/hooks/redis.py +112 -0
  223. plexflow/utils/image/storage.py +36 -0
  224. plexflow/utils/imdb/__init__.py +0 -0
  225. plexflow/utils/imdb/imdb_codes.py +107 -0
  226. plexflow/utils/pubsub/consume.py +82 -0
  227. plexflow/utils/pubsub/produce.py +25 -0
  228. plexflow/utils/retry/__init__.py +0 -0
  229. plexflow/utils/retry/utils.py +38 -0
  230. plexflow/utils/strings/__init__.py +0 -0
  231. plexflow/utils/strings/filesize.py +55 -0
  232. plexflow/utils/strings/language.py +14 -0
  233. plexflow/utils/subtitle/search.py +76 -0
  234. plexflow/utils/tasks/decorators.py +78 -0
  235. plexflow/utils/tasks/k8s/task.py +70 -0
  236. plexflow/utils/thread_safe/safe_list.py +54 -0
  237. plexflow/utils/thread_safe/safe_set.py +69 -0
  238. plexflow/utils/torrent/__init__.py +0 -0
  239. plexflow/utils/torrent/analyze.py +118 -0
  240. plexflow/utils/torrent/extract/common.py +37 -0
  241. plexflow/utils/torrent/extract/ext.py +2391 -0
  242. plexflow/utils/torrent/extract/extratorrent.py +56 -0
  243. plexflow/utils/torrent/extract/kat.py +1581 -0
  244. plexflow/utils/torrent/extract/tgx.py +96 -0
  245. plexflow/utils/torrent/extract/therarbg.py +170 -0
  246. plexflow/utils/torrent/extract/torrentquest.py +171 -0
  247. plexflow/utils/torrent/files.py +36 -0
  248. plexflow/utils/torrent/hash.py +90 -0
  249. plexflow/utils/transcribe/__init__.py +0 -0
  250. plexflow/utils/transcribe/speech2text.py +40 -0
  251. plexflow/utils/video/__init__.py +0 -0
  252. plexflow/utils/video/subtitle.py +73 -0
  253. plexflow-0.0.64.dist-info/METADATA +71 -0
  254. plexflow-0.0.64.dist-info/RECORD +256 -0
  255. plexflow-0.0.64.dist-info/WHEEL +4 -0
  256. plexflow-0.0.64.dist-info/entry_points.txt +24 -0
@@ -0,0 +1,96 @@
1
+ import re
2
+ from bs4 import BeautifulSoup
3
+ from plexflow.utils.subtitle.search import SubtitleSearcher
4
+ from plexflow.utils.imdb.imdb_codes import extract_imdb_code
5
+
6
+ def extract_torrent_info(html_content):
7
+ """Extracts torrent information from the provided HTML content,
8
+ searching for IMDb ID pattern in links and plain text.
9
+
10
+ Args:
11
+ html_content (str): The HTML content of the webpage.
12
+
13
+ Returns:
14
+ dict: A dictionary containing the extracted information with
15
+ lowercase keys, or None if the information is not found.
16
+ """
17
+
18
+ soup = BeautifulSoup(html_content, 'html.parser')
19
+ text = soup.get_text(separator='\n')
20
+
21
+ info = {}
22
+
23
+ # release_name
24
+ match = re.search(r"Torrent details for \"(.*?)\"", text)
25
+ info["release_name"] = match.group(1).lower() if match else None
26
+
27
+ # category and subcategory
28
+ match = re.search(
29
+ r"Category:\s*(.*?)\s*>\s*(.*?)$", text, re.MULTILINE
30
+ )
31
+ info["category"] = match.group(1).strip().lower() if match else None
32
+ info["subcategory"] = match.group(2).strip().lower() if match else None
33
+
34
+ # language
35
+ match = re.search(r"Language:\s*(.*?)$", text, re.MULTILINE)
36
+ info["language"] = match.group(1).strip().lower() if match else None
37
+
38
+ # total_size (Handling different units)
39
+ match = re.search(r"Total Size:\s*([\d.]+)\s*(GB|MB|KB)", text, re.IGNORECASE)
40
+ if match:
41
+ size_value = float(match.group(1))
42
+ size_unit = match.group(2).upper()
43
+ if size_unit == "GB":
44
+ info["total_size"] = size_value * 1024 * 1024 * 1024
45
+ elif size_unit == "MB":
46
+ info["total_size"] = size_value * 1024 * 1024
47
+ elif size_unit == "KB":
48
+ info["total_size"] = size_value * 1024
49
+ else:
50
+ info["total_size"] = None
51
+
52
+ # hash
53
+ match = re.search(r"Info Hash:\s*(.*?)$", text, re.MULTILINE)
54
+ info["hash"] = match.group(1).strip().lower() if match else None
55
+
56
+ # uploader
57
+ match = re.search(r"Added By:\s*(.*?)\s*Added", text, re.MULTILINE)
58
+ info["uploader"] = match.group(1).strip().lower() if match else None
59
+
60
+ # date
61
+ match = re.search(r"Added:\s*(.*?)$", text, re.MULTILINE)
62
+ info["date"] = match.group(1).strip().lower() if match else None
63
+
64
+ # seeds
65
+ match = re.search(r"Seeds:?\s*(\d+)", text)
66
+ info["seeds"] = int(match.group(1)) if match else None
67
+
68
+ # peers
69
+ match = re.search(r"Leechers:?\s*(\d+)", text)
70
+ info["peers"] = int(match.group(1)) if match else None
71
+
72
+ # imdb_id
73
+ imdb_id = next(extract_imdb_code(html_content), None)
74
+ info["imdb_id"] = imdb_id
75
+
76
+ # description (Extract up to the comments section)
77
+ match = re.search(
78
+ r"Description(.*?)User comments", text, re.IGNORECASE | re.DOTALL
79
+ )
80
+ info["description"] = match.group(1).strip().lower() if match else None
81
+
82
+ # magnet_url
83
+ match = re.search(r"magnet:\?xt=urn:btih:[a-z0-9]+[^'\"]+", html_content, re.IGNORECASE)
84
+ info["magnet_url"] = match.group(0) if match else None
85
+
86
+ searcher = SubtitleSearcher(hint_words={
87
+ "english",
88
+ "eng",
89
+ "dutch",
90
+ "dut"
91
+ })
92
+
93
+ subtitles = searcher.search_subtitles(text)
94
+ info["subtitles"] = subtitles
95
+
96
+ return info
@@ -0,0 +1,170 @@
1
+ from bs4 import BeautifulSoup
2
+ from urllib.parse import urljoin
3
+ import dateparser
4
+ from plexflow.utils.strings.filesize import parse_size
5
+ from plexflow.utils.imdb.imdb_codes import extract_imdb_code
6
+ import re
7
+ from plexflow.utils.torrent.files import TorrentFile
8
+ from plexflow.utils.torrent.extract.common import torrent_detail_extract
9
+
10
+ @torrent_detail_extract
11
+ def extract_torrent_details(html_content, **torrent_details):
12
+ """Extracts specific torrent details from the given HTML content,
13
+ with increased robustness against HTML structure changes and case sensitivity.
14
+
15
+ Args:
16
+ html_content: The HTML content of the torrent page.
17
+
18
+ Returns:
19
+ A dictionary containing the extracted torrent details.
20
+ """
21
+
22
+ soup = BeautifulSoup(html_content, 'html.parser')
23
+
24
+ # Extract all text from the HTML
25
+ all_text = soup.get_text()
26
+
27
+ # Torrent name
28
+ torrent_details['release_name'] = re.search(r':\s*([^:]+)\s*:\s*Search', all_text, re.IGNORECASE).group(1).strip()
29
+
30
+ # Torrent size (capture only the number)
31
+ size_text = re.search(r'Size:\s*(\d+\.?\d*\s*\w+)', all_text, re.IGNORECASE)
32
+ if size_text:
33
+ torrent_details['torrent_size'] = size_text.group(1).strip()
34
+ torrent_details["size_bytes"] = next(iter(parse_size(torrent_details['torrent_size'])), None)
35
+
36
+ # Peers (Leechers)
37
+ peers_text = re.search(r'Leechers:\s*(\d+)', all_text, re.IGNORECASE)
38
+ if peers_text:
39
+ torrent_details['peers'] = int(peers_text.group(1))
40
+
41
+ # Seeds
42
+ seeds_text = re.search(r'Seeders:\s*(\d+)', all_text, re.IGNORECASE)
43
+ if seeds_text:
44
+ torrent_details['seeds'] = int(seeds_text.group(1))
45
+
46
+ # Total files
47
+ files_text = re.search(r'Files:\s*(\d+)', all_text, re.IGNORECASE)
48
+ if files_text:
49
+ torrent_details['total_files'] = int(files_text.group(1))
50
+
51
+ # Date of upload
52
+ upload_text = re.search(r'Added:\s*([^,]+)', all_text, re.IGNORECASE)
53
+ if upload_text:
54
+ torrent_details['date'] = dateparser.parse(upload_text.group(1).strip()).isoformat()
55
+
56
+ # Uploader
57
+ uploader_text = re.search(r'Uploader:\s*([^<]+)', all_text, re.IGNORECASE)
58
+ if uploader_text:
59
+ torrent_details['uploader'] = uploader_text.group(1).strip()
60
+
61
+ # Info hash
62
+ info_hash_text = re.search(r'Info Hash:\s*([\dA-F]+)', all_text, re.IGNORECASE)
63
+ if info_hash_text:
64
+ torrent_details['hash'] = info_hash_text.group(1).strip()
65
+
66
+ # File list (extract from text using surrounding context)
67
+ torrent_details['file_list'] = []
68
+ file_list_start = re.search(r'Files:\s*\d+', all_text, re.IGNORECASE)
69
+ if file_list_start:
70
+ file_list_start_index = file_list_start.end()
71
+ file_list_end = re.search(r'Multiple Quality Available', all_text, re.IGNORECASE)
72
+ if file_list_end:
73
+ file_list_end_index = file_list_end.start()
74
+ file_list_text = all_text[file_list_start_index:file_list_end_index]
75
+
76
+ file_entries = file_list_text.strip().splitlines()
77
+
78
+ for entry in file_entries:
79
+ # Extract file name and size
80
+ name_match = re.search(r'(.+)\s*(\d+\.?\d*\s*(GB|MB|KB|B))', entry, re.IGNORECASE)
81
+ name = name_match.group(1).strip() if name_match else None
82
+ size = name_match.group(2).strip() if name_match else None
83
+
84
+ torrent_details['file_list'].append(TorrentFile(
85
+ name=name,
86
+ size=size,
87
+ ))
88
+
89
+ # Category
90
+ category_text = re.search(r'Category:\s*([^<]+)', all_text, re.IGNORECASE)
91
+ if category_text:
92
+ torrent_details['category'] = category_text.group(1).strip()
93
+
94
+ return torrent_details
95
+
96
+
97
+ def extract_torrent_results(html):
98
+ """Extracts torrent information from HTML, resilient to HTML structure changes.
99
+
100
+ Args:
101
+ html: The HTML content of the page.
102
+
103
+ Returns:
104
+ A list of dictionaries, each containing torrent information:
105
+ - 'title': The title of the torrent.
106
+ - 'link': The link to the torrent detail page.
107
+ - 'category': The category of the torrent.
108
+ - 'added': The date the torrent was added.
109
+ - 'size': The size of the torrent.
110
+ - 'seeders': The number of seeders.
111
+ - 'leechers': The number of leechers.
112
+ - 'thumbnail': The URL of the torrent thumbnail (if available).
113
+ """
114
+
115
+ torrents = []
116
+ soup = BeautifulSoup(html, 'html.parser')
117
+
118
+ # Find all table rows that likely contain torrent information
119
+ rows = soup.find_all('tr', class_='list-entry')
120
+
121
+ for row in rows:
122
+ torrent = {}
123
+
124
+ # Extract data from the cells
125
+ cells = row.find_all('td')
126
+
127
+ # Title (get full title from link href)
128
+ title_cell = cells[1]
129
+ title_link = title_cell.find('div', class_='wrapper').find('a', recursive=False)
130
+ if title_link:
131
+ # Get the part of the href after the last '/'
132
+ torrent['link'] = urljoin('https://therarbg.com/', title_link['href'])
133
+ torrent['name'] = torrent['link'].rstrip('/').split('/')[-1] # Use rsplit for last occurrence
134
+ else:
135
+ # If no link is found, get the text of the title cell
136
+ torrent['name'] = title_cell.text.strip()
137
+ torrent['link'] = ''
138
+
139
+ # lets search in all links of the cell
140
+ # for an imdb id
141
+ for link in title_cell.find_all('a'):
142
+ href = link.get('href')
143
+ if isinstance(href, str):
144
+ imdb_code = next(extract_imdb_code(href), None)
145
+ if isinstance(imdb_code, str):
146
+ torrent['imdb'] = imdb_code
147
+ break
148
+
149
+ # Category
150
+ torrent['type'] = cells[2].find('a').text.strip() if cells[2].find('a') else ''
151
+
152
+ # Added
153
+ added_cell = cells[3]
154
+ torrent['added'] = added_cell.text.strip() if added_cell else ''
155
+ torrent['date'] = dateparser.parse(torrent['added'])
156
+
157
+ # Size
158
+ size_cell = cells[5]
159
+ torrent['size'] = size_cell.text.strip() if size_cell else ''
160
+ torrent['size_bytes'] = next(iter(parse_size(torrent['size'])), None)
161
+
162
+ # Seeders and Leechers
163
+ seeders_cell = cells[6]
164
+ torrent['seeds'] = int(seeders_cell.text.strip()) if seeders_cell else 0
165
+ leechers_cell = cells[7]
166
+ torrent['peers'] = int(leechers_cell.text.strip()) if leechers_cell else 0
167
+
168
+ torrents.append(torrent)
169
+
170
+ return torrents
@@ -0,0 +1,171 @@
1
+ from bs4 import BeautifulSoup
2
+ import dateparser
3
+ from plexflow.utils.strings.filesize import parse_size
4
+ import re
5
+ from plexflow.utils.strings.filesize import parse_size
6
+ from plexflow.utils.torrent.files import TorrentFile
7
+ from plexflow.utils.torrent.extract.common import torrent_detail_extract
8
+
9
+ @torrent_detail_extract
10
+ def extract_torrent_details(html_content, **torrent_details):
11
+ """Extracts specific torrent details from the given HTML content,
12
+ with increased robustness against HTML structure changes and case sensitivity.
13
+
14
+ Args:
15
+ html_content: The HTML content of the torrent page.
16
+
17
+ Returns:
18
+ A dictionary containing the extracted torrent details.
19
+ """
20
+
21
+ soup = BeautifulSoup(html_content, 'html.parser')
22
+
23
+ # Torrent name
24
+ # Extract the text from the 'header-content' class
25
+ torrent_details['release_name'] = soup.find('div', class_='header-content').get_text().strip()
26
+
27
+ # Extract all text from the HTML
28
+ all_text = soup.get_text()
29
+
30
+ # Torrent size (capture only the number)
31
+ size_text = re.search(r'Total Size:\s*(\d+\.?\d*\s*\w+)', all_text, re.IGNORECASE)
32
+ if size_text:
33
+ torrent_details['torrent_size'] = size_text.group(1).strip()
34
+ torrent_details["size_bytes"] = next(iter(parse_size(torrent_details['torrent_size'])), None)
35
+
36
+ # Peers (Leechers)
37
+ peers_text = re.search(r'Leechers:\s*\d+', all_text, re.IGNORECASE)
38
+ if peers_text:
39
+ torrent_details['peers'] = int(peers_text.group(0).split()[-1])
40
+
41
+ # Seeds
42
+ seeds_text = re.search(r'Seeders:\s*\d+', all_text, re.IGNORECASE)
43
+ if seeds_text:
44
+ torrent_details['seeds'] = int(seeds_text.group(0).split()[-1])
45
+
46
+ # Total files
47
+ files_text = re.search(r'Total Files:\s*\d+', all_text, re.IGNORECASE)
48
+ if files_text:
49
+ torrent_details['total_files'] = int(files_text.group(0).split()[-1])
50
+
51
+ # Date of upload
52
+ upload_text = re.search(r'Uploaded:\s*\d{1,2}-\w{3}-\d{4}', all_text, re.IGNORECASE)
53
+ if upload_text:
54
+ torrent_details['date'] = upload_text.group(0).split()[-1]
55
+
56
+ # Uploader
57
+ uploader_text = re.search(r'Uploader:\s*\w+', all_text, re.IGNORECASE)
58
+ if uploader_text:
59
+ torrent_details['uploader'] = uploader_text.group(0).split()[-1]
60
+
61
+ # Info hash
62
+ info_hash_text = re.search(r'Info Hash:\s*[\dA-F]+', all_text, re.IGNORECASE)
63
+ if info_hash_text:
64
+ torrent_details['hash'] = info_hash_text.group(0).split()[-1]
65
+
66
+ # File list (extract from text using surrounding context)
67
+ torrent_details['file_list'] = []
68
+ file_list_start = re.search(r'File List Information', all_text, re.IGNORECASE)
69
+ if file_list_start:
70
+ file_list_start_index = file_list_start.end()
71
+ file_list_end = re.search(r'Related Downloads|Help Downloading', all_text, re.IGNORECASE)
72
+ if file_list_end:
73
+ file_list_end_index = file_list_end.start()
74
+ file_list_text = all_text[file_list_start_index:file_list_end_index]
75
+
76
+ file_entries = file_list_text.strip().splitlines()
77
+
78
+ for entry in file_entries:
79
+ # Extract file name and size
80
+ name_match = re.search(r'(.+)\s*\(\d+\.?\d*\s*(GB|MB|KB|B)\)', entry, re.IGNORECASE)
81
+ # the size is always at the end, so lets search from the end
82
+ size_matches = re.findall(r'\(([^)]+)\)', entry, re.IGNORECASE)
83
+ print(size_matches)
84
+
85
+ name = name_match.group(1).strip() if name_match else None
86
+ if len(size_matches) > 0:
87
+ size = size_matches[-1].strip()
88
+ else:
89
+ size = None
90
+
91
+ torrent_details['file_list'].append(TorrentFile(
92
+ name=name,
93
+ size=size,
94
+ ))
95
+
96
+ # Category
97
+ category_text = re.search(r'Category:\s*\w+', all_text, re.IGNORECASE)
98
+ if category_text:
99
+ torrent_details['category'] = category_text.group(0).split()[-1]
100
+
101
+ return torrent_details
102
+
103
+
104
+ def extract_torrent_results(html):
105
+ """
106
+ Extracts torrent results from HTML content, resilient to changes in HTML structure.
107
+
108
+ Args:
109
+ html: The HTML content as a string.
110
+
111
+ Returns:
112
+ A list of dictionaries, each representing a torrent result with keys:
113
+ - 'download_name': The name of the torrent.
114
+ - 'magnet_link': The magnet link for the torrent.
115
+ - 'age': The age of the torrent.
116
+ - 'torrent_type': The type of the torrent (e.g., Movie, Game, etc.).
117
+ - 'files': The number of files in the torrent.
118
+ - 'size': The size of the torrent.
119
+ - 'seeders': The number of seeders for the torrent.
120
+ - 'leechers': The number of leechers for the torrent.
121
+ """
122
+
123
+ soup = BeautifulSoup(html, 'html.parser')
124
+ torrent_results = []
125
+
126
+ # Find all 'a' tags with 'magnet' in the href attribute
127
+ magnet_links = soup.find_all('a', href=lambda href: 'magnet' in href)
128
+
129
+ # Iterate over each magnet link
130
+ for magnet_link in magnet_links:
131
+ torrent_result = {'magnet_link': magnet_link['href']}
132
+
133
+ # Find the parent 'tr' (table row) of the magnet link
134
+ parent_row = magnet_link.find_parent('tr')
135
+ if parent_row:
136
+ cols = parent_row.find_all('td')
137
+
138
+ # Extract data from columns based on their position
139
+ # (assuming consistent layout within the table row)
140
+ if len(cols) >= 8:
141
+ torrent_result['name'] = cols[1].find('a').text.strip()
142
+ # get link of name
143
+ link = cols[1].find('a')['href']
144
+ # make it a full link
145
+ torrent_result['link'] = f"https://torrentquest.com{link}"
146
+ age = cols[2].text.strip()
147
+ torrent_result['age'] = age
148
+ if isinstance(age, str):
149
+ age_str = f"{age} ago"
150
+ date = dateparser.parse(age_str)
151
+ torrent_result['date'] = date
152
+ else:
153
+ torrent_result['date'] = None
154
+ torrent_result['type'] = cols[3].text.strip().lower()
155
+ torrent_result['files'] = cols[4].text.strip()
156
+ size_human = cols[5].text.strip()
157
+ torrent_result['size'] = size_human
158
+
159
+ if isinstance(size_human, str):
160
+ sizes = parse_size(size_human)
161
+ if sizes:
162
+ torrent_result['size_bytes'] = sizes[0]
163
+ else:
164
+ torrent_result['size_bytes'] = None
165
+
166
+ torrent_result['seeds'] = cols[6].text.strip()
167
+ torrent_result['peers'] = cols[7].text.strip()
168
+
169
+ torrent_results.append(torrent_result)
170
+
171
+ return torrent_results
@@ -0,0 +1,36 @@
1
+ from pydantic import BaseModel
2
+ from plexflow.utils.strings.filesize import parse_size
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ class TorrentFile(BaseModel):
7
+ name: Optional[str]
8
+ size: Optional[str]
9
+
10
+ @property
11
+ def size_bytes(self):
12
+ return next(iter(parse_size(self.size)), None) if self.size else None
13
+
14
+ @property
15
+ def size_human(self):
16
+ return self.size
17
+
18
+ @property
19
+ def extension(self):
20
+ return Path(self.name).suffix.lstrip('.') if self.name else None
21
+
22
+ def __str__(self) -> str:
23
+ return f"{self.name} [({self.size_human})][{self.extension}][{self.size_bytes} bytes]"
24
+
25
+ def __repr__(self) -> str:
26
+ return self.__str__()
27
+
28
+ class TorrentSubtitle(BaseModel):
29
+ language: Optional[str]
30
+ name: Optional[str]
31
+
32
+ def __str__(self) -> str:
33
+ return f"{self.language} - {self.name}"
34
+
35
+ def __repr__(self) -> str:
36
+ return self.__str__()
@@ -0,0 +1,90 @@
1
+ import re
2
+
3
+ def extract_hash(s: str):
4
+ """
5
+ Extract all potential SHA-1 hashes from an arbitrary string.
6
+
7
+ This function uses a generator to lazily yield each potential hash as it is found,
8
+ which can be more memory-efficient than returning a list of all hashes, especially
9
+ for large input strings with many potential hashes.
10
+
11
+ Parameters:
12
+ s (str): The string from which to extract the potential hashes.
13
+
14
+ Yields:
15
+ str: Each extracted potential hash.
16
+
17
+ Examples:
18
+ >>> list(extract_hash('Here is a SHA-1 hash: 5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8'))
19
+ ['5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8']
20
+
21
+ >>> list(extract_hash('No hashes here!'))
22
+ []
23
+ """
24
+ # Regular expression pattern for a SHA-1 hash
25
+ pattern = r'\b[A-Fa-f0-9]{40}\b'
26
+
27
+ # Find all matches of the pattern in the input string
28
+ for match in re.finditer(pattern, s):
29
+ # Yield each match
30
+ yield match.group(0)
31
+
32
+
33
+ def extract_torrent_hash(magnet: str):
34
+ """
35
+ Extract the torrent hash from a magnet link.
36
+
37
+ This function calls the `extract_hash` function to find potential SHA-1 hashes in the magnet link.
38
+ The torrent hash is typically found after 'xt=urn:btih:' in the magnet link.
39
+
40
+ Parameters:
41
+ magnet (str): The magnet link from which to extract the torrent hash.
42
+
43
+ Returns:
44
+ str: The extracted torrent hash, or None if no hash was found.
45
+
46
+ Examples:
47
+ >>> extract_torrent_hash('magnet:?xt=urn:btih:5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8')
48
+ '5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8'
49
+
50
+ >>> extract_torrent_hash('No hashes here!')
51
+ None
52
+ """
53
+ # Find the start of the torrent hash in the magnet link
54
+ start = magnet.find('xt=urn:btih:') + len('xt=urn:btih:')
55
+
56
+ # Extract the potential hashes from the substring starting at the start index
57
+ hashes = extract_hash(magnet[start:])
58
+
59
+ # Return the first hash found, or None if no hash was found
60
+ return next(hashes, None)
61
+
62
+
63
+ def extract_magnet(text: str):
64
+ """
65
+ Extract magnet links from an arbitrary string.
66
+
67
+ This function uses a generator to lazily yield each magnet link as it is found,
68
+ which can be more memory-efficient than returning a list of all links, especially
69
+ for large input strings with many potential links.
70
+
71
+ Parameters:
72
+ text (str): The string from which to extract the magnet links.
73
+
74
+ Yields:
75
+ str: Each extracted magnet link.
76
+
77
+ Examples:
78
+ >>> list(extract_magnet('Here is a magnet link: magnet:?xt=urn:btih:5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8'))
79
+ ['magnet:?xt=urn:btih:5baa61e4c9b93f3f0682250b6cf8331b7ee68fd8']
80
+
81
+ >>> list(extract_magnet('No magnet links here!'))
82
+ []
83
+ """
84
+ # Regular expression pattern for a magnet link
85
+ pattern = r'magnet:\?xt=urn:btih:[A-Fa-f0-9]+'
86
+
87
+ # Find all matches of the pattern in the input string
88
+ for match in re.finditer(pattern, text, re.IGNORECASE):
89
+ # Yield each match
90
+ yield match.group(0)
File without changes
@@ -0,0 +1,40 @@
1
+ import whisper
2
+
3
+ def transcribe_audio(file_path, model: str = 'medium'):
4
+ """
5
+ Transcribes an audio file using the Whisper model.
6
+
7
+ Args:
8
+ file_path (str): The path to the audio file to transcribe.
9
+
10
+ Returns:
11
+ str: The transcription of the audio file.
12
+
13
+ Raises:
14
+ FileNotFoundError: If the audio file does not exist.
15
+ Exception: If there is an error loading the model or transcribing the audio.
16
+
17
+ Example:
18
+ >>> transcribe_audio('path/to/your/audio/file.mp3')
19
+ 'This is the transcribed text from your audio file.'
20
+
21
+ Note:
22
+ This function assumes that you have the Whisper model available locally.
23
+ """
24
+ try:
25
+ # Check if the file exists
26
+ with open(file_path, 'rb') as f:
27
+ pass
28
+ except FileNotFoundError as e:
29
+ raise e
30
+
31
+ try:
32
+ # Load the Whisper model
33
+ model = whisper.load_model(model)
34
+
35
+ # Transcribe the audio
36
+ result = model.transcribe(file_path)
37
+
38
+ return result["text"]
39
+ except Exception as e:
40
+ raise e
File without changes
@@ -0,0 +1,73 @@
1
+ import subprocess
2
+ import json
3
+ from pydantic import BaseModel
4
+ from typing import List, Optional
5
+
6
+ class SubtitleStream(BaseModel):
7
+ index: int
8
+ lang: Optional[str]
9
+
10
+ class Config:
11
+ schema_extra = {
12
+ "example": {
13
+ "index": 0,
14
+ "lang": "en"
15
+ }
16
+ }
17
+
18
+ def get_subtitles(video_path: str) -> List[SubtitleStream]:
19
+ """
20
+ Function to get subtitle streams from a video file.
21
+
22
+ Args:
23
+ video_path (str): The path to the video file.
24
+
25
+ Returns:
26
+ List[SubtitleStream]: A list of SubtitleStream objects, each representing a subtitle stream.
27
+
28
+ Raises:
29
+ FileNotFoundError: If the video file does not exist.
30
+ ValueError: If the output from the command could not be parsed.
31
+
32
+ Examples:
33
+ >>> video_path = "/path/to/your/video.mp4"
34
+ >>> try:
35
+ ... subtitles = get_subtitles(video_path)
36
+ ... for subtitle in subtitles:
37
+ ... print(f"Subtitle stream index: {subtitle.index}, language: {subtitle.lang}")
38
+ ... except FileNotFoundError:
39
+ ... print(f"The video file {video_path} does not exist.")
40
+ ... except ValueError:
41
+ ... print("There was a problem parsing the command output.")
42
+ ...
43
+ Subtitle stream index: 0, language: en
44
+ Subtitle stream index: 1, language: es
45
+ """
46
+ if not os.path.isfile(video_path):
47
+ raise FileNotFoundError(f"No such file: '{video_path}'")
48
+
49
+ command = [
50
+ 'ffprobe',
51
+ '-v', 'quiet',
52
+ '-print_format', 'json',
53
+ '-show_streams',
54
+ '-select_streams', 's',
55
+ video_path
56
+ ]
57
+
58
+ output = subprocess.run(command, capture_output=True, text=True)
59
+
60
+ try:
61
+ probe = json.loads(output.stdout)
62
+ except json.JSONDecodeError as e:
63
+ raise ValueError("Could not parse command output") from e
64
+
65
+ subtitle_streams = []
66
+ for stream in probe['streams']:
67
+ subtitle_info = SubtitleStream(
68
+ index=stream['index'],
69
+ lang=stream['tags']['language'] if 'tags' in stream and 'language' in stream['tags'] else None
70
+ )
71
+ subtitle_streams.append(subtitle_info)
72
+
73
+ return subtitle_streams