KekikStream 2.0.3__py3-none-any.whl → 2.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- KekikStream/Extractors/ContentX.py +13 -0
- KekikStream/Extractors/DonilasPlay.py +86 -0
- KekikStream/Extractors/Odnoklassniki.py +6 -0
- KekikStream/Extractors/PeaceMakerst.py +6 -0
- KekikStream/Extractors/PlayerFilmIzle.py +8 -5
- KekikStream/Extractors/RapidVid.py +6 -0
- KekikStream/Extractors/SetPlay.py +7 -1
- KekikStream/Extractors/VCTPlay.py +41 -0
- KekikStream/Extractors/VidMoly.py +52 -30
- KekikStream/Extractors/YTDLP.py +87 -53
- KekikStream/Plugins/BelgeselX.py +204 -0
- KekikStream/Plugins/Dizilla.py +22 -14
- KekikStream/Plugins/FilmMakinesi.py +1 -1
- KekikStream/Plugins/FilmModu.py +6 -2
- KekikStream/Plugins/FullHDFilmizlesene.py +1 -1
- KekikStream/Plugins/HDFilmCehennemi.py +83 -8
- KekikStream/Plugins/JetFilmizle.py +1 -1
- KekikStream/Plugins/KultFilmler.py +1 -1
- KekikStream/Plugins/RoketDizi.py +17 -24
- KekikStream/Plugins/SelcukFlix.py +51 -52
- KekikStream/Plugins/SetFilmIzle.py +259 -0
- KekikStream/Plugins/SezonlukDizi.py +28 -7
- KekikStream/Plugins/Sinefy.py +11 -8
- KekikStream/Plugins/SinemaCX.py +3 -7
- KekikStream/Plugins/SuperFilmGeldi.py +13 -7
- KekikStream/Plugins/UgurFilm.py +1 -1
- {kekikstream-2.0.3.dist-info → kekikstream-2.0.9.dist-info}/METADATA +1 -1
- {kekikstream-2.0.3.dist-info → kekikstream-2.0.9.dist-info}/RECORD +32 -34
- KekikStream/Extractors/ContentX_.py +0 -40
- KekikStream/Extractors/FirePlayer.py +0 -60
- KekikStream/Extractors/Odnoklassniki_.py +0 -11
- KekikStream/Extractors/PeaceMakerst_.py +0 -7
- KekikStream/Extractors/RapidVid_.py +0 -7
- KekikStream/Extractors/VidMoly_.py +0 -7
- {kekikstream-2.0.3.dist-info → kekikstream-2.0.9.dist-info}/WHEEL +0 -0
- {kekikstream-2.0.3.dist-info → kekikstream-2.0.9.dist-info}/entry_points.txt +0 -0
- {kekikstream-2.0.3.dist-info → kekikstream-2.0.9.dist-info}/licenses/LICENSE +0 -0
- {kekikstream-2.0.3.dist-info → kekikstream-2.0.9.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,19 @@ class ContentX(ExtractorBase):
|
|
|
7
7
|
name = "ContentX"
|
|
8
8
|
main_url = "https://contentx.me"
|
|
9
9
|
|
|
10
|
+
# Birden fazla domain destekle
|
|
11
|
+
supported_domains = [
|
|
12
|
+
"contentx.me", "four.contentx.me",
|
|
13
|
+
"dplayer82.site", "sn.dplayer82.site", "four.dplayer82.site", "org.dplayer82.site",
|
|
14
|
+
"dplayer74.site", "sn.dplayer74.site",
|
|
15
|
+
"hotlinger.com", "sn.hotlinger.com",
|
|
16
|
+
"playru.net", "four.playru.net",
|
|
17
|
+
"pichive.online", "four.pichive.online", "pichive.me", "four.pichive.me"
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
def can_handle_url(self, url: str) -> bool:
|
|
21
|
+
return any(domain in url for domain in self.supported_domains)
|
|
22
|
+
|
|
10
23
|
async def extract(self, url, referer=None) -> list[ExtractResult]:
|
|
11
24
|
if referer:
|
|
12
25
|
self.httpx.headers.update({"Referer": referer})
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Bu araç @keyiflerolsun tarafından | @KekikAkademi için yazılmıştır.
|
|
2
|
+
|
|
3
|
+
from KekikStream.Core import ExtractorBase, ExtractResult, Subtitle
|
|
4
|
+
from Kekik.Sifreleme import AESManager
|
|
5
|
+
import re, json
|
|
6
|
+
|
|
7
|
+
class DonilasPlay(ExtractorBase):
|
|
8
|
+
name = "DonilasPlay"
|
|
9
|
+
main_url = "https://donilasplay.com"
|
|
10
|
+
|
|
11
|
+
async def extract(self, url, referer=None) -> ExtractResult:
|
|
12
|
+
if referer:
|
|
13
|
+
self.httpx.headers.update({"Referer": referer})
|
|
14
|
+
|
|
15
|
+
istek = await self.httpx.get(url)
|
|
16
|
+
istek.raise_for_status()
|
|
17
|
+
i_source = istek.text
|
|
18
|
+
|
|
19
|
+
m3u_link = None
|
|
20
|
+
subtitles = []
|
|
21
|
+
|
|
22
|
+
# bePlayer pattern
|
|
23
|
+
be_player_match = re.search(r"bePlayer\('([^']+)',\s*'(\{[^}]+\})'\);", i_source)
|
|
24
|
+
if be_player_match:
|
|
25
|
+
be_player_pass = be_player_match.group(1)
|
|
26
|
+
be_player_data = be_player_match.group(2)
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
# AES decrypt
|
|
30
|
+
decrypted = AESManager.decrypt(be_player_data, be_player_pass)
|
|
31
|
+
data = json.loads(decrypted)
|
|
32
|
+
|
|
33
|
+
m3u_link = data.get("video_location")
|
|
34
|
+
|
|
35
|
+
# Altyazıları işle
|
|
36
|
+
str_subtitles = data.get("strSubtitles", [])
|
|
37
|
+
if str_subtitles:
|
|
38
|
+
for sub in str_subtitles:
|
|
39
|
+
label = sub.get("label", "")
|
|
40
|
+
file = sub.get("file", "")
|
|
41
|
+
# Forced altyazıları hariç tut
|
|
42
|
+
if "Forced" in label:
|
|
43
|
+
continue
|
|
44
|
+
if file:
|
|
45
|
+
# Türkçe kontrolü
|
|
46
|
+
keywords = ["tur", "tr", "türkçe", "turkce"]
|
|
47
|
+
language = "Turkish" if any(k in label.lower() for k in keywords) else label
|
|
48
|
+
subtitles.append(Subtitle(
|
|
49
|
+
name = language,
|
|
50
|
+
url = self.fix_url(file)
|
|
51
|
+
))
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
# Fallback: file pattern
|
|
56
|
+
if not m3u_link:
|
|
57
|
+
file_match = re.search(r'file:"([^"]+)"', i_source)
|
|
58
|
+
if file_match:
|
|
59
|
+
m3u_link = file_match.group(1)
|
|
60
|
+
|
|
61
|
+
# tracks pattern for subtitles
|
|
62
|
+
tracks_match = re.search(r'tracks:\[([^\]]+)', i_source)
|
|
63
|
+
if tracks_match:
|
|
64
|
+
try:
|
|
65
|
+
tracks_str = f"[{tracks_match.group(1)}]"
|
|
66
|
+
tracks = json.loads(tracks_str)
|
|
67
|
+
for track in tracks:
|
|
68
|
+
file_url = track.get("file")
|
|
69
|
+
label = track.get("label", "")
|
|
70
|
+
if file_url and "Forced" not in label:
|
|
71
|
+
subtitles.append(Subtitle(
|
|
72
|
+
name = label,
|
|
73
|
+
url = self.fix_url(file_url)
|
|
74
|
+
))
|
|
75
|
+
except Exception:
|
|
76
|
+
pass
|
|
77
|
+
|
|
78
|
+
if not m3u_link:
|
|
79
|
+
raise ValueError("m3u link not found")
|
|
80
|
+
|
|
81
|
+
return ExtractResult(
|
|
82
|
+
name = self.name,
|
|
83
|
+
url = m3u_link,
|
|
84
|
+
referer = url,
|
|
85
|
+
subtitles = subtitles
|
|
86
|
+
)
|
|
@@ -7,6 +7,12 @@ class Odnoklassniki(ExtractorBase):
|
|
|
7
7
|
name = "Odnoklassniki"
|
|
8
8
|
main_url = "https://odnoklassniki.ru"
|
|
9
9
|
|
|
10
|
+
# Birden fazla domain destekle
|
|
11
|
+
supported_domains = ["odnoklassniki.ru", "ok.ru"]
|
|
12
|
+
|
|
13
|
+
def can_handle_url(self, url: str) -> bool:
|
|
14
|
+
return any(domain in url for domain in self.supported_domains)
|
|
15
|
+
|
|
10
16
|
async def extract(self, url, referer=None) -> ExtractResult:
|
|
11
17
|
if "/video/" in url:
|
|
12
18
|
url = url.replace("/video/", "/videoembed/")
|
|
@@ -7,6 +7,12 @@ class PeaceMakerst(ExtractorBase):
|
|
|
7
7
|
name = "PeaceMakerst"
|
|
8
8
|
main_url = "https://peacemakerst.com"
|
|
9
9
|
|
|
10
|
+
# Birden fazla domain destekle
|
|
11
|
+
supported_domains = ["peacemakerst.com", "hdstreamable.com"]
|
|
12
|
+
|
|
13
|
+
def can_handle_url(self, url: str) -> bool:
|
|
14
|
+
return any(domain in url for domain in self.supported_domains)
|
|
15
|
+
|
|
10
16
|
async def extract(self, url, referer=None) -> ExtractResult:
|
|
11
17
|
if referer:
|
|
12
18
|
self.httpx.headers.update({"Referer": referer})
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
# Bu araç @keyiflerolsun tarafından | @KekikAkademi için yazılmıştır.
|
|
2
2
|
|
|
3
3
|
from KekikStream.Core import ExtractorBase, ExtractResult, Subtitle
|
|
4
|
-
import re
|
|
4
|
+
import re
|
|
5
5
|
|
|
6
6
|
class PlayerFilmIzle(ExtractorBase):
|
|
7
7
|
name = "PlayerFilmIzle"
|
|
8
8
|
main_url = "https://player.filmizle.in"
|
|
9
9
|
|
|
10
|
+
def can_handle_url(self, url: str) -> bool:
|
|
11
|
+
return "filmizle.in" in url or "fireplayer" in url.lower()
|
|
12
|
+
|
|
10
13
|
async def extract(self, url: str, referer: str = None) -> ExtractResult:
|
|
11
14
|
# Kotlin tarafında referer mainUrl olarak zorlanmış
|
|
12
15
|
ext_ref = self.main_url
|
|
@@ -29,20 +32,20 @@ class PlayerFilmIzle(ExtractorBase):
|
|
|
29
32
|
# Data yakalama: FirePlayer|DATA|...
|
|
30
33
|
data_match = re.search(r'FirePlayer\|([^|]+)\|', video_req, re.IGNORECASE)
|
|
31
34
|
data_val = data_match.group(1) if data_match else None
|
|
32
|
-
|
|
35
|
+
|
|
33
36
|
if not data_val:
|
|
34
37
|
raise ValueError("PlayerFilmIzle: Data bulunamadı")
|
|
35
38
|
|
|
36
39
|
url_post = f"{self.main_url}/player/index.php?data={data_val}&do=getVideo"
|
|
37
|
-
|
|
40
|
+
|
|
38
41
|
post_headers = {
|
|
39
42
|
"Referer": ext_ref,
|
|
40
43
|
"X-Requested-With": "XMLHttpRequest"
|
|
41
44
|
}
|
|
42
|
-
|
|
45
|
+
|
|
43
46
|
# Kotlin'de post data: "hash" -> data, "r" -> ""
|
|
44
47
|
post_data = {"hash": data_val, "r": ""}
|
|
45
|
-
|
|
48
|
+
|
|
46
49
|
response = await self.httpx.post(url_post, data=post_data, headers=post_headers)
|
|
47
50
|
get_url = response.text.replace("\\", "")
|
|
48
51
|
|
|
@@ -8,6 +8,12 @@ class RapidVid(ExtractorBase):
|
|
|
8
8
|
name = "RapidVid"
|
|
9
9
|
main_url = "https://rapidvid.net"
|
|
10
10
|
|
|
11
|
+
# Birden fazla domain destekle
|
|
12
|
+
supported_domains = ["rapidvid.net", "rapid.filmmakinesi.to"]
|
|
13
|
+
|
|
14
|
+
def can_handle_url(self, url: str) -> bool:
|
|
15
|
+
return any(domain in url for domain in self.supported_domains)
|
|
16
|
+
|
|
11
17
|
async def extract(self, url, referer=None) -> ExtractResult:
|
|
12
18
|
if referer:
|
|
13
19
|
self.httpx.headers.update({"Referer": referer})
|
|
@@ -5,7 +5,13 @@ import re
|
|
|
5
5
|
|
|
6
6
|
class SetPlay(ExtractorBase):
|
|
7
7
|
name = "SetPlay"
|
|
8
|
-
main_url = "https://setplay.
|
|
8
|
+
main_url = "https://setplay.shop"
|
|
9
|
+
|
|
10
|
+
# Birden fazla domain destekle
|
|
11
|
+
supported_domains = ["setplay.cfd", "setplay.shop", "setplay.site"]
|
|
12
|
+
|
|
13
|
+
def can_handle_url(self, url: str) -> bool:
|
|
14
|
+
return any(domain in url for domain in self.supported_domains)
|
|
9
15
|
|
|
10
16
|
async def extract(self, url, referer=None) -> ExtractResult:
|
|
11
17
|
ext_ref = referer or ""
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Bu araç @keyiflerolsun tarafından | @KekikAkademi için yazılmıştır.
|
|
2
|
+
|
|
3
|
+
from KekikStream.Core import ExtractorBase, ExtractResult
|
|
4
|
+
from urllib.parse import urlparse, parse_qs
|
|
5
|
+
|
|
6
|
+
class VCTPlay(ExtractorBase):
|
|
7
|
+
name = "VCTPlay"
|
|
8
|
+
main_url = "https://vctplay.site"
|
|
9
|
+
|
|
10
|
+
async def extract(self, url, referer=None) -> ExtractResult:
|
|
11
|
+
if referer:
|
|
12
|
+
self.httpx.headers.update({"Referer": referer})
|
|
13
|
+
|
|
14
|
+
# URL'den video ID'sini çıkar
|
|
15
|
+
# https://vctplay.site/video/2hjDGco5exdv -> 2hjDGco5exdv
|
|
16
|
+
video_id = url.split("/")[-1]
|
|
17
|
+
if "?" in video_id:
|
|
18
|
+
video_id = video_id.split("?")[0]
|
|
19
|
+
|
|
20
|
+
# Manifests URL oluştur
|
|
21
|
+
master_url = f"{self.main_url}/manifests/{video_id}/master.txt"
|
|
22
|
+
|
|
23
|
+
# partKey'den isim belirle
|
|
24
|
+
parsed = urlparse(url)
|
|
25
|
+
params = parse_qs(parsed.query)
|
|
26
|
+
part_key = params.get("partKey", [""])[0]
|
|
27
|
+
|
|
28
|
+
name_suffix = ""
|
|
29
|
+
if "turkcedublaj" in part_key.lower():
|
|
30
|
+
name_suffix = "Dublaj"
|
|
31
|
+
elif "turkcealtyazi" in part_key.lower():
|
|
32
|
+
name_suffix = "Altyazı"
|
|
33
|
+
|
|
34
|
+
display_name = f"{self.name} - {name_suffix}" if name_suffix else self.name
|
|
35
|
+
|
|
36
|
+
return ExtractResult(
|
|
37
|
+
name = display_name,
|
|
38
|
+
url = master_url,
|
|
39
|
+
referer = f"{self.main_url}/",
|
|
40
|
+
subtitles = []
|
|
41
|
+
)
|
|
@@ -9,6 +9,12 @@ class VidMoly(ExtractorBase):
|
|
|
9
9
|
name = "VidMoly"
|
|
10
10
|
main_url = "https://vidmoly.to"
|
|
11
11
|
|
|
12
|
+
# Birden fazla domain destekle
|
|
13
|
+
supported_domains = ["vidmoly.to", "vidmoly.me", "vidmoly.net"]
|
|
14
|
+
|
|
15
|
+
def can_handle_url(self, url: str) -> bool:
|
|
16
|
+
return any(domain in url for domain in self.supported_domains)
|
|
17
|
+
|
|
12
18
|
async def extract(self, url: str, referer: str = None) -> ExtractResult:
|
|
13
19
|
if referer:
|
|
14
20
|
self.httpx.headers.update({"Referer": referer})
|
|
@@ -17,11 +23,11 @@ class VidMoly(ExtractorBase):
|
|
|
17
23
|
"Sec-Fetch-Dest" : "iframe",
|
|
18
24
|
})
|
|
19
25
|
|
|
20
|
-
if
|
|
21
|
-
self.main_url = self.main_url.replace(".me", ".net")
|
|
26
|
+
if ".me" in url:
|
|
22
27
|
url = url.replace(".me", ".net")
|
|
23
28
|
|
|
24
|
-
|
|
29
|
+
# VidMoly bazen redirect ediyor, takip et
|
|
30
|
+
response = await self.httpx.get(url, follow_redirects=True)
|
|
25
31
|
if "Select number" in response.text:
|
|
26
32
|
secici = Selector(response.text)
|
|
27
33
|
response = await self.httpx.post(
|
|
@@ -33,21 +39,10 @@ class VidMoly(ExtractorBase):
|
|
|
33
39
|
"ts" : secici.css("input[name='ts']::attr(value)").get(),
|
|
34
40
|
"nonce" : secici.css("input[name='nonce']::attr(value)").get(),
|
|
35
41
|
"ctok" : secici.css("input[name='ctok']::attr(value)").get()
|
|
36
|
-
}
|
|
42
|
+
},
|
|
43
|
+
follow_redirects=True
|
|
37
44
|
)
|
|
38
45
|
|
|
39
|
-
script_match = re.search(r"sources:\s*\[(.*?)\],", response.text, re.DOTALL)
|
|
40
|
-
script_content = script_match[1] if script_match else None
|
|
41
|
-
|
|
42
|
-
if not script_content:
|
|
43
|
-
raise ValueError("Gerekli script bulunamadı.")
|
|
44
|
-
|
|
45
|
-
# Video kaynaklarını ayrıştır
|
|
46
|
-
video_data = self._add_marks(script_content, "file")
|
|
47
|
-
try:
|
|
48
|
-
video_sources = json.loads(f"[{video_data}]")
|
|
49
|
-
except json.JSONDecodeError as hata:
|
|
50
|
-
raise ValueError("Video kaynakları ayrıştırılamadı.") from hata
|
|
51
46
|
|
|
52
47
|
# Altyazı kaynaklarını ayrıştır
|
|
53
48
|
subtitles = []
|
|
@@ -66,22 +61,49 @@ class VidMoly(ExtractorBase):
|
|
|
66
61
|
for sub in subtitle_sources
|
|
67
62
|
if sub.get("kind") == "captions"
|
|
68
63
|
]
|
|
69
|
-
# İlk video kaynağını al
|
|
70
|
-
video_url = None
|
|
71
|
-
for source in video_sources:
|
|
72
|
-
if file_url := source.get("file"):
|
|
73
|
-
video_url = file_url
|
|
74
|
-
break
|
|
75
64
|
|
|
76
|
-
|
|
77
|
-
|
|
65
|
+
script_match = re.search(r"sources:\s*\[(.*?)\],", response.text, re.DOTALL)
|
|
66
|
+
if script_match:
|
|
67
|
+
script_content = script_match[1]
|
|
68
|
+
# Video kaynaklarını ayrıştır
|
|
69
|
+
video_data = self._add_marks(script_content, "file")
|
|
70
|
+
try:
|
|
71
|
+
video_sources = json.loads(f"[{video_data}]")
|
|
72
|
+
# İlk video kaynağını al
|
|
73
|
+
for source in video_sources:
|
|
74
|
+
if file_url := source.get("file"):
|
|
75
|
+
return ExtractResult(
|
|
76
|
+
name = self.name,
|
|
77
|
+
url = file_url,
|
|
78
|
+
referer = self.main_url,
|
|
79
|
+
subtitles = subtitles
|
|
80
|
+
)
|
|
81
|
+
except json.JSONDecodeError:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
# Fallback: Doğrudan file regex ile ara (Kotlin mantığı)
|
|
85
|
+
# file:"..." veya file: "..."
|
|
86
|
+
if file_match := re.search(r'file\s*:\s*["\']([^"\']+\.m3u8[^"\']*)["\']', response.text):
|
|
87
|
+
return ExtractResult(
|
|
88
|
+
name = self.name,
|
|
89
|
+
url = file_match.group(1),
|
|
90
|
+
referer = self.main_url,
|
|
91
|
+
subtitles = subtitles
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Fallback 2: Herhangi bir file (m3u8 olma şartı olmadan ama tercihen)
|
|
95
|
+
if file_match := re.search(r'file\s*:\s*["\']([^"\']+)["\']', response.text):
|
|
96
|
+
url_candidate = file_match.group(1)
|
|
97
|
+
# Resim dosyalarını hariç tut
|
|
98
|
+
if not url_candidate.endswith(('.jpg', '.png', '.jpeg')):
|
|
99
|
+
return ExtractResult(
|
|
100
|
+
name = self.name,
|
|
101
|
+
url = url_candidate,
|
|
102
|
+
referer = self.main_url,
|
|
103
|
+
subtitles = subtitles
|
|
104
|
+
)
|
|
78
105
|
|
|
79
|
-
|
|
80
|
-
name = self.name,
|
|
81
|
-
url = video_url,
|
|
82
|
-
referer = self.main_url,
|
|
83
|
-
subtitles = subtitles
|
|
84
|
-
)
|
|
106
|
+
raise ValueError("Video URL bulunamadı.")
|
|
85
107
|
|
|
86
108
|
def _add_marks(self, text: str, field: str) -> str:
|
|
87
109
|
"""
|
KekikStream/Extractors/YTDLP.py
CHANGED
|
@@ -10,6 +10,86 @@ class YTDLP(ExtractorBase):
|
|
|
10
10
|
|
|
11
11
|
_FAST_DOMAIN_RE = None # compiled mega-regex (host üstünden)
|
|
12
12
|
|
|
13
|
+
_POPULAR_TLDS = {
|
|
14
|
+
"com", "net", "org", "tv", "io", "co", "me", "ly", "ru", "fr", "de", "es", "it",
|
|
15
|
+
"nl", "be", "ch", "at", "uk", "ca", "au", "jp", "kr", "cn", "in", "br", "mx",
|
|
16
|
+
"ar", "tr", "gov", "edu", "mil", "int", "info", "biz", "name", "pro", "aero",
|
|
17
|
+
"coop", "museum", "onion"
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
# 1. Literal TLD Regex: youtube\.com, vimeo\.com
|
|
21
|
+
# sorted by reverse length to prevent partial matches (e.g. 'co' matching 'com')
|
|
22
|
+
_LITERAL_TLD_RE = re.compile(
|
|
23
|
+
rf"([a-z0-9][-a-z0-9]*(?:\\\.[-a-z0-9]+)*\\\.(?:{'|'.join(sorted(_POPULAR_TLDS, key=len, reverse=True))}))",
|
|
24
|
+
re.IGNORECASE
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# 2. Regex TLD Regex: dailymotion\.[a-z]{2,3}
|
|
28
|
+
_REGEX_TLD_RE = re.compile(
|
|
29
|
+
r"([a-z0-9][-a-z0-9]*)\\\.\[a-z\]\{?\d*,?\d*\}?",
|
|
30
|
+
re.IGNORECASE
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# 3. Alternation TLD Regex: \.(?:com|net|org)
|
|
34
|
+
_ALT_TLD_RE = re.compile(
|
|
35
|
+
r"\\\.\(\?:([a-z|]+)\)",
|
|
36
|
+
re.IGNORECASE
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Kelime yakalayıcı (domain bulmak için)
|
|
40
|
+
_DOMAIN_WORD_RE = re.compile(
|
|
41
|
+
r"([a-z0-9][-a-z0-9]*)",
|
|
42
|
+
re.IGNORECASE
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def _extract_literal_domains(cls, valid_url: str) -> set[str]:
|
|
47
|
+
"""Pattern 1: Literal TLD domainlerini (youtube.com) çıkarır."""
|
|
48
|
+
return {
|
|
49
|
+
m.replace(r"\.", ".").lower()
|
|
50
|
+
for m in cls._LITERAL_TLD_RE.findall(valid_url)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def _extract_regex_tld_domains(cls, valid_url: str) -> set[str]:
|
|
55
|
+
"""Pattern 2: Regex TLD domainlerini (dailymotion.[...]) çıkarır ve popüler TLD'lerle birleştirir."""
|
|
56
|
+
domains = set()
|
|
57
|
+
for base in cls._REGEX_TLD_RE.findall(valid_url):
|
|
58
|
+
base_domain = base.lower()
|
|
59
|
+
for tld in cls._POPULAR_TLDS:
|
|
60
|
+
domains.add(f"{base_domain}.{tld}")
|
|
61
|
+
return domains
|
|
62
|
+
|
|
63
|
+
@classmethod
|
|
64
|
+
def _extract_alternation_domains(cls, valid_url: str) -> set[str]:
|
|
65
|
+
"""Pattern 3: Alternation TLD domainlerini (pornhub.(?:com|net)) çıkarır."""
|
|
66
|
+
domains = set()
|
|
67
|
+
for m in cls._ALT_TLD_RE.finditer(valid_url):
|
|
68
|
+
tlds = m.group(1).split("|")
|
|
69
|
+
start = m.start()
|
|
70
|
+
|
|
71
|
+
# Geriye doğru git ve domain'i bul
|
|
72
|
+
before = valid_url[:start]
|
|
73
|
+
|
|
74
|
+
# 1. Named Groups (?P<name> temizle
|
|
75
|
+
before = re.sub(r"\(\?P<[^>]+>", "", before)
|
|
76
|
+
|
|
77
|
+
# 2. Simple Non-Capturing Groups (?:xxx)? temizle (sadece alphanumeric ve escape)
|
|
78
|
+
before = re.sub(r"\(\?:[a-z0-9-]+\)\??", "", before)
|
|
79
|
+
|
|
80
|
+
# Son domain-like kelimeyi al
|
|
81
|
+
words = cls._DOMAIN_WORD_RE.findall(before)
|
|
82
|
+
if not words:
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
base = words[-1].lower()
|
|
86
|
+
for tld in tlds:
|
|
87
|
+
tld = tld.strip().lower()
|
|
88
|
+
if tld and len(tld) <= 6:
|
|
89
|
+
domains.add(f"{base}.{tld}")
|
|
90
|
+
|
|
91
|
+
return domains
|
|
92
|
+
|
|
13
93
|
@classmethod
|
|
14
94
|
def _init_fast_domain_regex(cls):
|
|
15
95
|
"""
|
|
@@ -19,44 +99,31 @@ class YTDLP(ExtractorBase):
|
|
|
19
99
|
return
|
|
20
100
|
|
|
21
101
|
domains = set()
|
|
22
|
-
|
|
23
|
-
# Merkezi cache'den extractorları al
|
|
24
102
|
extractors = get_ytdlp_extractors()
|
|
25
103
|
|
|
26
|
-
# yt-dlp extractor'larının _VALID_URL regex'lerinden domain yakala
|
|
27
|
-
# Regex metinlerinde domainler genelde "\." şeklinde geçer.
|
|
28
|
-
domain_pat = re.compile(r"(?:[a-z0-9-]+\\\.)+[a-z]{2,}", re.IGNORECASE)
|
|
29
|
-
|
|
30
104
|
for ie in extractors:
|
|
31
105
|
valid = getattr(ie, "_VALID_URL", None)
|
|
32
106
|
if not valid or not isinstance(valid, str):
|
|
33
107
|
continue
|
|
34
108
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# Çok agresif/şüpheli şeyleri elemek istersen burada filtre koyabilirsin
|
|
39
|
-
# (genelde gerek kalmıyor)
|
|
40
|
-
domains.add(d)
|
|
109
|
+
domains |= cls._extract_literal_domains(valid)
|
|
110
|
+
domains |= cls._extract_regex_tld_domains(valid)
|
|
111
|
+
domains |= cls._extract_alternation_domains(valid)
|
|
41
112
|
|
|
42
113
|
# Hiç domain çıkmazsa (çok uç durum) fallback: boş regex
|
|
43
114
|
if not domains:
|
|
44
115
|
cls._FAST_DOMAIN_RE = re.compile(r"$^") # hiçbir şeye match etmez
|
|
45
116
|
return
|
|
46
117
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
joined = "|".join(sorted(re.escape(d) for d in domains))
|
|
50
|
-
pattern = rf"(?:^|.*\.)(?:{joined})$"
|
|
51
|
-
cls._FAST_DOMAIN_RE = re.compile(pattern, re.IGNORECASE)
|
|
118
|
+
joined = "|".join(re.escape(d) for d in sorted(domains))
|
|
119
|
+
cls._FAST_DOMAIN_RE = re.compile(rf"(?:^|.*\.)(?:{joined})$", re.IGNORECASE)
|
|
52
120
|
|
|
53
121
|
def __init__(self):
|
|
54
122
|
self.__class__._init_fast_domain_regex()
|
|
55
123
|
|
|
56
124
|
def can_handle_url(self, url: str) -> bool:
|
|
57
125
|
"""
|
|
58
|
-
Fast-path: URL host'unu tek mega-regex ile kontrol et
|
|
59
|
-
Slow-path: gerekirse mevcut extract_info tabanlı kontrolün
|
|
126
|
+
Fast-path: URL host'unu tek mega-regex ile kontrol et
|
|
60
127
|
"""
|
|
61
128
|
# URL parse + host al
|
|
62
129
|
try:
|
|
@@ -77,40 +144,7 @@ class YTDLP(ExtractorBase):
|
|
|
77
144
|
if host and self.__class__._FAST_DOMAIN_RE.search(host):
|
|
78
145
|
return True
|
|
79
146
|
|
|
80
|
-
#
|
|
81
|
-
# try:
|
|
82
|
-
# # stderr'ı geçici olarak kapat (hata mesajlarını gizle)
|
|
83
|
-
# old_stderr = sys.stderr
|
|
84
|
-
# sys.stderr = open(os.devnull, "w")
|
|
85
|
-
|
|
86
|
-
# try:
|
|
87
|
-
# ydl_opts = {
|
|
88
|
-
# "simulate" : True, # Download yok, sadece tespit
|
|
89
|
-
# "quiet" : True, # Log kirliliği yok
|
|
90
|
-
# "no_warnings" : True, # Uyarı mesajları yok
|
|
91
|
-
# "extract_flat" : True, # Minimal işlem
|
|
92
|
-
# "no_check_certificates" : True,
|
|
93
|
-
# "ignoreerrors" : True, # Hataları yoksay
|
|
94
|
-
# "socket_timeout" : 3,
|
|
95
|
-
# "retries" : 1
|
|
96
|
-
# }
|
|
97
|
-
|
|
98
|
-
# with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
99
|
-
# # URL'yi işleyebiliyor mu kontrol et
|
|
100
|
-
# info = ydl.extract_info(url, download=False, process=False)
|
|
101
|
-
|
|
102
|
-
# # Generic extractor ise atla
|
|
103
|
-
# if info and info.get("extractor_key") != "Generic":
|
|
104
|
-
# return True
|
|
105
|
-
|
|
106
|
-
# return False
|
|
107
|
-
# finally:
|
|
108
|
-
# # stderr'ı geri yükle
|
|
109
|
-
# sys.stderr.close()
|
|
110
|
-
# sys.stderr = old_stderr
|
|
111
|
-
|
|
112
|
-
# except Exception:
|
|
113
|
-
# yt-dlp işleyemezse False döndür
|
|
147
|
+
# yt-dlp işleyemezse False döndür
|
|
114
148
|
return False
|
|
115
149
|
|
|
116
150
|
async def extract(self, url: str, referer: str | None = None) -> ExtractResult:
|