quasarr 1.20.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of quasarr might be problematic. Click here for more details.
- quasarr/__init__.py +460 -0
- quasarr/api/__init__.py +187 -0
- quasarr/api/arr/__init__.py +373 -0
- quasarr/api/captcha/__init__.py +1075 -0
- quasarr/api/config/__init__.py +23 -0
- quasarr/api/sponsors_helper/__init__.py +166 -0
- quasarr/api/statistics/__init__.py +196 -0
- quasarr/downloads/__init__.py +267 -0
- quasarr/downloads/linkcrypters/__init__.py +0 -0
- quasarr/downloads/linkcrypters/al.py +237 -0
- quasarr/downloads/linkcrypters/filecrypt.py +444 -0
- quasarr/downloads/linkcrypters/hide.py +123 -0
- quasarr/downloads/packages/__init__.py +467 -0
- quasarr/downloads/sources/__init__.py +0 -0
- quasarr/downloads/sources/al.py +697 -0
- quasarr/downloads/sources/by.py +106 -0
- quasarr/downloads/sources/dd.py +76 -0
- quasarr/downloads/sources/dj.py +7 -0
- quasarr/downloads/sources/dt.py +66 -0
- quasarr/downloads/sources/dw.py +65 -0
- quasarr/downloads/sources/he.py +112 -0
- quasarr/downloads/sources/mb.py +47 -0
- quasarr/downloads/sources/nk.py +51 -0
- quasarr/downloads/sources/nx.py +105 -0
- quasarr/downloads/sources/sf.py +159 -0
- quasarr/downloads/sources/sj.py +7 -0
- quasarr/downloads/sources/sl.py +90 -0
- quasarr/downloads/sources/wd.py +110 -0
- quasarr/providers/__init__.py +0 -0
- quasarr/providers/cloudflare.py +204 -0
- quasarr/providers/html_images.py +20 -0
- quasarr/providers/html_templates.py +241 -0
- quasarr/providers/imdb_metadata.py +142 -0
- quasarr/providers/log.py +19 -0
- quasarr/providers/myjd_api.py +917 -0
- quasarr/providers/notifications.py +124 -0
- quasarr/providers/obfuscated.py +51 -0
- quasarr/providers/sessions/__init__.py +0 -0
- quasarr/providers/sessions/al.py +286 -0
- quasarr/providers/sessions/dd.py +78 -0
- quasarr/providers/sessions/nx.py +76 -0
- quasarr/providers/shared_state.py +826 -0
- quasarr/providers/statistics.py +154 -0
- quasarr/providers/version.py +118 -0
- quasarr/providers/web_server.py +49 -0
- quasarr/search/__init__.py +153 -0
- quasarr/search/sources/__init__.py +0 -0
- quasarr/search/sources/al.py +448 -0
- quasarr/search/sources/by.py +203 -0
- quasarr/search/sources/dd.py +135 -0
- quasarr/search/sources/dj.py +213 -0
- quasarr/search/sources/dt.py +265 -0
- quasarr/search/sources/dw.py +214 -0
- quasarr/search/sources/fx.py +223 -0
- quasarr/search/sources/he.py +196 -0
- quasarr/search/sources/mb.py +195 -0
- quasarr/search/sources/nk.py +188 -0
- quasarr/search/sources/nx.py +197 -0
- quasarr/search/sources/sf.py +374 -0
- quasarr/search/sources/sj.py +213 -0
- quasarr/search/sources/sl.py +246 -0
- quasarr/search/sources/wd.py +208 -0
- quasarr/storage/__init__.py +0 -0
- quasarr/storage/config.py +163 -0
- quasarr/storage/setup.py +458 -0
- quasarr/storage/sqlite_database.py +80 -0
- quasarr-1.20.6.dist-info/METADATA +304 -0
- quasarr-1.20.6.dist-info/RECORD +72 -0
- quasarr-1.20.6.dist-info/WHEEL +5 -0
- quasarr-1.20.6.dist-info/entry_points.txt +2 -0
- quasarr-1.20.6.dist-info/licenses/LICENSE +21 -0
- quasarr-1.20.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
from quasarr.providers.log import info, debug
|
|
12
|
+
from quasarr.search.sources.sf import parse_mirrors
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_last_section_integer(url):
|
|
16
|
+
last_section = url.rstrip('/').split('/')[-1]
|
|
17
|
+
if last_section.isdigit() and len(last_section) <= 3:
|
|
18
|
+
return int(last_section)
|
|
19
|
+
return None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_sf_download_links(shared_state, url, mirror, title): # signature must align with other download link functions!
|
|
23
|
+
release_pattern = re.compile(
|
|
24
|
+
r'''
|
|
25
|
+
^ # start of string
|
|
26
|
+
(?P<name>.+?)\. # show name (dots in name) up to the dot before “S”
|
|
27
|
+
S(?P<season>\d+) # “S” + season number
|
|
28
|
+
(?:E\d+(?:-E\d+)?)? # optional “E##” or “E##-E##”
|
|
29
|
+
\. # literal dot
|
|
30
|
+
.*?\. # anything (e.g. language/codec) up to next dot
|
|
31
|
+
(?P<resolution>\d+p) # resolution “720p”, “1080p”, etc.
|
|
32
|
+
\..+? # dot + more junk (e.g. “.WEB.h264”)
|
|
33
|
+
-(?P<group>\w+) # dash + release group at end
|
|
34
|
+
$ # end of string
|
|
35
|
+
''',
|
|
36
|
+
re.IGNORECASE | re.VERBOSE
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
release_match = release_pattern.match(title)
|
|
40
|
+
|
|
41
|
+
if not release_match:
|
|
42
|
+
return {
|
|
43
|
+
"real_url": None,
|
|
44
|
+
"imdb_id": None,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
release_parts = release_match.groupdict()
|
|
48
|
+
|
|
49
|
+
season = is_last_section_integer(url)
|
|
50
|
+
try:
|
|
51
|
+
if not season:
|
|
52
|
+
season = "ALL"
|
|
53
|
+
|
|
54
|
+
sf = shared_state.values["config"]("Hostnames").get("sf")
|
|
55
|
+
headers = {
|
|
56
|
+
'User-Agent': shared_state.values["user_agent"],
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
series_page = requests.get(url, headers=headers, timeout=10).text
|
|
60
|
+
|
|
61
|
+
soup = BeautifulSoup(series_page, "html.parser")
|
|
62
|
+
# extract IMDb id if present
|
|
63
|
+
imdb_id = None
|
|
64
|
+
a_imdb = soup.find("a", href=re.compile(r"imdb\.com/title/tt\d+"))
|
|
65
|
+
if a_imdb:
|
|
66
|
+
m = re.search(r"(tt\d+)", a_imdb["href"])
|
|
67
|
+
if m:
|
|
68
|
+
imdb_id = m.group(1)
|
|
69
|
+
debug(f"Found IMDb id: {imdb_id}")
|
|
70
|
+
|
|
71
|
+
season_id = re.findall(r"initSeason\('(.+?)\',", series_page)[0]
|
|
72
|
+
epoch = str(datetime.now().timestamp()).replace('.', '')[:-3]
|
|
73
|
+
api_url = 'https://' + sf + '/api/v1/' + season_id + f'/season/{season}?lang=ALL&_=' + epoch
|
|
74
|
+
|
|
75
|
+
response = requests.get(api_url, headers=headers, timeout=10)
|
|
76
|
+
try:
|
|
77
|
+
data = response.json()["html"]
|
|
78
|
+
except ValueError:
|
|
79
|
+
epoch = str(datetime.now().timestamp()).replace('.', '')[:-3]
|
|
80
|
+
api_url = 'https://' + sf + '/api/v1/' + season_id + f'/season/ALL?lang=ALL&_=' + epoch
|
|
81
|
+
response = requests.get(api_url, headers=headers, timeout=10)
|
|
82
|
+
data = response.json()["html"]
|
|
83
|
+
|
|
84
|
+
content = BeautifulSoup(data, "html.parser")
|
|
85
|
+
|
|
86
|
+
items = content.find_all("h3")
|
|
87
|
+
|
|
88
|
+
for item in items:
|
|
89
|
+
try:
|
|
90
|
+
details = item.parent.parent.parent
|
|
91
|
+
name = details.find("small").text.strip()
|
|
92
|
+
|
|
93
|
+
result_pattern = re.compile(
|
|
94
|
+
r'^(?P<name>.+?)\.S(?P<season>\d+)(?:E\d+)?\..*?(?P<resolution>\d+p)\..+?-(?P<group>[\w/-]+)$',
|
|
95
|
+
re.IGNORECASE
|
|
96
|
+
)
|
|
97
|
+
result_match = result_pattern.match(name)
|
|
98
|
+
|
|
99
|
+
if not result_match:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
result_parts = result_match.groupdict()
|
|
103
|
+
|
|
104
|
+
# Normalize all relevant fields for case-insensitive comparison
|
|
105
|
+
name_match = release_parts['name'].lower() == result_parts['name'].lower()
|
|
106
|
+
season_match = release_parts['season'] == result_parts['season'] # Numbers are case-insensitive
|
|
107
|
+
resolution_match = release_parts['resolution'].lower() == result_parts['resolution'].lower()
|
|
108
|
+
|
|
109
|
+
# Handle multiple groups and case-insensitive matching
|
|
110
|
+
result_groups = {g.lower() for g in result_parts['group'].split('/')}
|
|
111
|
+
release_groups = {g.lower() for g in release_parts['group'].split('/')}
|
|
112
|
+
group_match = not result_groups.isdisjoint(release_groups) # Checks if any group matches
|
|
113
|
+
|
|
114
|
+
if name_match and season_match and resolution_match and group_match:
|
|
115
|
+
info(f'Release "{name}" found on SF at: {url}')
|
|
116
|
+
|
|
117
|
+
mirrors = parse_mirrors(f"https://{sf}", details)
|
|
118
|
+
|
|
119
|
+
if mirror:
|
|
120
|
+
if mirror not in mirrors["season"]:
|
|
121
|
+
continue
|
|
122
|
+
release_url = mirrors["season"][mirror]
|
|
123
|
+
if not release_url:
|
|
124
|
+
info(f"Could not find mirror '{mirror}' for '{title}'")
|
|
125
|
+
else:
|
|
126
|
+
release_url = next(iter(mirrors["season"].values()))
|
|
127
|
+
|
|
128
|
+
real_url = resolve_sf_redirect(release_url, shared_state.values["user_agent"])
|
|
129
|
+
return {
|
|
130
|
+
"real_url": real_url,
|
|
131
|
+
"imdb_id": imdb_id,
|
|
132
|
+
}
|
|
133
|
+
except:
|
|
134
|
+
continue
|
|
135
|
+
except:
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
return {
|
|
139
|
+
"real_url": None,
|
|
140
|
+
"imdb_id": None,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def resolve_sf_redirect(url, user_agent):
|
|
145
|
+
try:
|
|
146
|
+
response = requests.get(url, allow_redirects=True, timeout=10,
|
|
147
|
+
headers={'User-Agent': user_agent})
|
|
148
|
+
if response.history:
|
|
149
|
+
for resp in response.history:
|
|
150
|
+
debug(f"Redirected from {resp.url} to {response.url}")
|
|
151
|
+
if "/404.html" in response.url:
|
|
152
|
+
info(f"SF link redirected to 404 page: {response.url}")
|
|
153
|
+
return None
|
|
154
|
+
return response.url
|
|
155
|
+
else:
|
|
156
|
+
info(f"SF blocked attempt to resolve {url}. Your IP may be banned. Try again later.")
|
|
157
|
+
except Exception as e:
|
|
158
|
+
info(f"Error fetching redirected URL for {url}: {e}")
|
|
159
|
+
return None
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
from quasarr.providers.log import info, debug
|
|
12
|
+
|
|
13
|
+
supported_mirrors = ["nitroflare", "ddownload"] # ignoring captcha-protected multiup/mirrorace for now
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_sl_download_links(shared_state, url, mirror, title): # signature must align with other download link functions!
|
|
17
|
+
headers = {"User-Agent": shared_state.values["user_agent"]}
|
|
18
|
+
session = requests.Session()
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
resp = session.get(url, headers=headers, timeout=10)
|
|
22
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
23
|
+
|
|
24
|
+
entry = soup.find("div", class_="entry")
|
|
25
|
+
if not entry:
|
|
26
|
+
info(f"Could not find main content section for {title}")
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
# extract IMDb id if present
|
|
30
|
+
imdb_id = None
|
|
31
|
+
a_imdb = soup.find("a", href=re.compile(r"imdb\.com/title/tt\d+"))
|
|
32
|
+
if a_imdb:
|
|
33
|
+
m = re.search(r"(tt\d+)", a_imdb["href"])
|
|
34
|
+
if m:
|
|
35
|
+
imdb_id = m.group(1)
|
|
36
|
+
debug(f"Found IMDb id: {imdb_id}")
|
|
37
|
+
|
|
38
|
+
download_h2 = entry.find(
|
|
39
|
+
lambda t: t.name == "h2" and "download" in t.get_text(strip=True).lower()
|
|
40
|
+
)
|
|
41
|
+
if download_h2:
|
|
42
|
+
anchors = []
|
|
43
|
+
for sib in download_h2.next_siblings:
|
|
44
|
+
if getattr(sib, "name", None) == "h2":
|
|
45
|
+
break
|
|
46
|
+
if hasattr(sib, "find_all"):
|
|
47
|
+
anchors += sib.find_all("a", href=True)
|
|
48
|
+
else:
|
|
49
|
+
anchors = entry.find_all("a", href=True)
|
|
50
|
+
|
|
51
|
+
except Exception as e:
|
|
52
|
+
info(f"SL site has been updated. Grabbing download links for {title} not possible! ({e})")
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
filtered = []
|
|
56
|
+
for a in anchors:
|
|
57
|
+
href = a["href"].strip()
|
|
58
|
+
if not href.lower().startswith(("http://", "https://")):
|
|
59
|
+
continue
|
|
60
|
+
|
|
61
|
+
host = (urlparse(href).hostname or "").lower()
|
|
62
|
+
# require host to start with one of supported_mirrors + "."
|
|
63
|
+
if not any(host.startswith(m + ".") for m in supported_mirrors):
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
if not mirror or mirror in href:
|
|
67
|
+
filtered.append(href)
|
|
68
|
+
|
|
69
|
+
# regex‐fallback if still empty
|
|
70
|
+
if not filtered:
|
|
71
|
+
text = "".join(str(x) for x in anchors)
|
|
72
|
+
urls = re.findall(r"https?://[^\s<>'\"]+", text)
|
|
73
|
+
seen = set()
|
|
74
|
+
for u in urls:
|
|
75
|
+
u = u.strip()
|
|
76
|
+
if u in seen:
|
|
77
|
+
continue
|
|
78
|
+
seen.add(u)
|
|
79
|
+
|
|
80
|
+
host = (urlparse(u).hostname or "").lower()
|
|
81
|
+
if not any(host.startswith(m + ".") for m in supported_mirrors):
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
if not mirror or mirror in u:
|
|
85
|
+
filtered.append(u)
|
|
86
|
+
|
|
87
|
+
return {
|
|
88
|
+
"links": filtered,
|
|
89
|
+
"imdb_id": imdb_id,
|
|
90
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from urllib.parse import urljoin
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
from quasarr.providers.cloudflare import flaresolverr_get, is_cloudflare_challenge
|
|
12
|
+
from quasarr.providers.log import info, debug
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def resolve_wd_redirect(url, user_agent):
|
|
16
|
+
"""
|
|
17
|
+
Follow redirects for a WD mirror URL and return the final destination.
|
|
18
|
+
"""
|
|
19
|
+
try:
|
|
20
|
+
response = requests.get(
|
|
21
|
+
url,
|
|
22
|
+
allow_redirects=True,
|
|
23
|
+
timeout=10,
|
|
24
|
+
headers={"User-Agent": user_agent},
|
|
25
|
+
)
|
|
26
|
+
if response.history:
|
|
27
|
+
for resp in response.history:
|
|
28
|
+
debug(f"Redirected from {resp.url} to {response.url}")
|
|
29
|
+
return response.url
|
|
30
|
+
else:
|
|
31
|
+
info(f"WD blocked attempt to resolve {url}. Your IP may be banned. Try again later.")
|
|
32
|
+
except Exception as e:
|
|
33
|
+
info(f"Error fetching redirected URL for {url}: {e}")
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def get_wd_download_links(shared_state, url, mirror, title): # signature must align with other download link functions!
|
|
38
|
+
wd = shared_state.values["config"]("Hostnames").get("wd")
|
|
39
|
+
user_agent = shared_state.values["user_agent"]
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
output = requests.get(url)
|
|
43
|
+
if output.status_code == 403 or is_cloudflare_challenge(output.text):
|
|
44
|
+
info("WD is protected by Cloudflare. Using FlareSolverr to bypass protection.")
|
|
45
|
+
output = flaresolverr_get(shared_state, url)
|
|
46
|
+
|
|
47
|
+
soup = BeautifulSoup(output.text, "html.parser")
|
|
48
|
+
|
|
49
|
+
# extract IMDb id if present
|
|
50
|
+
imdb_id = None
|
|
51
|
+
a_imdb = soup.find("a", href=re.compile(r"imdb\.com/title/tt\d+"))
|
|
52
|
+
if a_imdb:
|
|
53
|
+
m = re.search(r"(tt\d+)", a_imdb["href"])
|
|
54
|
+
if m:
|
|
55
|
+
imdb_id = m.group(1)
|
|
56
|
+
debug(f"Found IMDb id: {imdb_id}")
|
|
57
|
+
|
|
58
|
+
# find Downloads card
|
|
59
|
+
header = soup.find(
|
|
60
|
+
"div",
|
|
61
|
+
class_="card-header",
|
|
62
|
+
string=re.compile(r"^\s*Downloads\s*$", re.IGNORECASE),
|
|
63
|
+
)
|
|
64
|
+
if not header:
|
|
65
|
+
info(f"WD Downloads section not found. Grabbing download links for {title} not possible!")
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
card = header.find_parent("div", class_="card")
|
|
69
|
+
body = card.find("div", class_="card-body")
|
|
70
|
+
link_tags = body.find_all(
|
|
71
|
+
"a", href=True, class_=lambda c: c and "background-" in c
|
|
72
|
+
)
|
|
73
|
+
except Exception:
|
|
74
|
+
info(f"WD site has been updated. Grabbing download links for {title} not possible!")
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
results = []
|
|
78
|
+
try:
|
|
79
|
+
for a in link_tags:
|
|
80
|
+
raw_href = a["href"]
|
|
81
|
+
full_link = urljoin(f"https://{wd}", raw_href)
|
|
82
|
+
|
|
83
|
+
# resolve any redirects
|
|
84
|
+
resolved = resolve_wd_redirect(full_link, user_agent)
|
|
85
|
+
|
|
86
|
+
if resolved:
|
|
87
|
+
if resolved.endswith("/404.html"):
|
|
88
|
+
info(f"Link {resolved} is dead!")
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
# determine hoster
|
|
92
|
+
hoster = a.get_text(strip=True) or None
|
|
93
|
+
if not hoster:
|
|
94
|
+
for cls in a.get("class", []):
|
|
95
|
+
if cls.startswith("background-"):
|
|
96
|
+
hoster = cls.split("-", 1)[1]
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
if mirror and mirror.lower() not in hoster.lower():
|
|
100
|
+
debug(f'Skipping link from "{hoster}" (not the desired mirror "{mirror}")!')
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
results.append([resolved, hoster])
|
|
104
|
+
except Exception:
|
|
105
|
+
info(f"WD site has been updated. Parsing download links for {title} not possible!")
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
"links": results,
|
|
109
|
+
"imdb_id": imdb_id,
|
|
110
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Quasarr
|
|
3
|
+
# Project by https://github.com/rix1337
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
from bs4 import BeautifulSoup
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def is_cloudflare_challenge(html: str) -> bool:
|
|
10
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
11
|
+
|
|
12
|
+
# Check <title>
|
|
13
|
+
title = (soup.title.string or "").strip().lower() if soup.title else ""
|
|
14
|
+
if "just a moment" in title or "attention required" in title:
|
|
15
|
+
return True
|
|
16
|
+
|
|
17
|
+
# Check known Cloudflare elements
|
|
18
|
+
if soup.find(id="challenge-form"):
|
|
19
|
+
return True
|
|
20
|
+
if soup.find("div", {"class": "cf-browser-verification"}):
|
|
21
|
+
return True
|
|
22
|
+
if soup.find("div", {"id": "cf-challenge-running"}):
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
# Check scripts referencing Cloudflare
|
|
26
|
+
for script in soup.find_all("script", src=True):
|
|
27
|
+
if "cdn-cgi/challenge-platform" in script["src"]:
|
|
28
|
+
return True
|
|
29
|
+
|
|
30
|
+
# Optional: look for Cloudflare comment or beacon
|
|
31
|
+
if "data-cf-beacon" in html or "<!-- cloudflare -->" in html.lower():
|
|
32
|
+
return True
|
|
33
|
+
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def update_session_via_flaresolverr(info,
|
|
38
|
+
shared_state,
|
|
39
|
+
sess,
|
|
40
|
+
target_url: str,
|
|
41
|
+
timeout: int = 60):
|
|
42
|
+
flaresolverr_url = shared_state.values["config"]('FlareSolverr').get('url')
|
|
43
|
+
if not flaresolverr_url:
|
|
44
|
+
info("Cannot proceed without FlareSolverr. Please set it up to try again!")
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
fs_payload = {
|
|
48
|
+
"cmd": "request.get",
|
|
49
|
+
"url": target_url,
|
|
50
|
+
"maxTimeout": timeout * 1000,
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
# Send the JSON request to FlareSolverr
|
|
54
|
+
fs_headers = {"Content-Type": "application/json"}
|
|
55
|
+
try:
|
|
56
|
+
resp = requests.post(
|
|
57
|
+
flaresolverr_url,
|
|
58
|
+
headers=fs_headers,
|
|
59
|
+
json=fs_payload,
|
|
60
|
+
timeout=timeout + 10
|
|
61
|
+
)
|
|
62
|
+
resp.raise_for_status()
|
|
63
|
+
except requests.exceptions.RequestException as e:
|
|
64
|
+
info(f"Could not reach FlareSolverr: {e}")
|
|
65
|
+
return {
|
|
66
|
+
"status_code": None,
|
|
67
|
+
"headers": {},
|
|
68
|
+
"json": None,
|
|
69
|
+
"text": "",
|
|
70
|
+
"cookies": [],
|
|
71
|
+
"error": f"FlareSolverr request failed: {e}"
|
|
72
|
+
}
|
|
73
|
+
except Exception as e:
|
|
74
|
+
raise RuntimeError(f"Could not reach FlareSolverr: {e}")
|
|
75
|
+
|
|
76
|
+
fs_json = resp.json()
|
|
77
|
+
if fs_json.get("status") != "ok" or "solution" not in fs_json:
|
|
78
|
+
raise RuntimeError(f"FlareSolverr did not return a valid solution: {fs_json.get('message', '<no message>')}")
|
|
79
|
+
|
|
80
|
+
solution = fs_json["solution"]
|
|
81
|
+
|
|
82
|
+
# Replace our requests.Session cookies with whatever FlareSolverr solved
|
|
83
|
+
sess.cookies.clear()
|
|
84
|
+
for ck in solution.get("cookies", []):
|
|
85
|
+
sess.cookies.set(
|
|
86
|
+
ck.get("name"),
|
|
87
|
+
ck.get("value"),
|
|
88
|
+
domain=ck.get("domain"),
|
|
89
|
+
path=ck.get("path", "/")
|
|
90
|
+
)
|
|
91
|
+
return {"session": sess, "user_agent": solution.get("userAgent", None)}
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def ensure_session_cf_bypassed(info, shared_state, session, url, headers):
|
|
95
|
+
"""
|
|
96
|
+
Performs a GET and, if Cloudflare challenge or 403 is present, tries FlareSolverr.
|
|
97
|
+
Returns tuple: (session, headers, response) or (None, None, None) on failure.
|
|
98
|
+
"""
|
|
99
|
+
try:
|
|
100
|
+
resp = session.get(url, headers=headers, timeout=30)
|
|
101
|
+
except requests.RequestException as e:
|
|
102
|
+
info(f"Initial GET failed: {e}")
|
|
103
|
+
return None, None, None
|
|
104
|
+
|
|
105
|
+
# If page is protected, try FlareSolverr
|
|
106
|
+
if resp.status_code == 403 or is_cloudflare_challenge(resp.text):
|
|
107
|
+
info("Encountered Cloudflare protection. Solving challenge with FlareSolverr...")
|
|
108
|
+
flaresolverr_result = update_session_via_flaresolverr(info, shared_state, session, url)
|
|
109
|
+
if not flaresolverr_result:
|
|
110
|
+
info("FlareSolverr did not return a result.")
|
|
111
|
+
return None, None, None
|
|
112
|
+
|
|
113
|
+
# update session and possibly user-agent
|
|
114
|
+
session = flaresolverr_result.get("session", session)
|
|
115
|
+
user_agent = flaresolverr_result.get("user_agent")
|
|
116
|
+
if user_agent and user_agent != shared_state.values.get("user_agent"):
|
|
117
|
+
info("Updating User-Agent from FlareSolverr solution: " + user_agent)
|
|
118
|
+
shared_state.update("user_agent", user_agent)
|
|
119
|
+
headers = {'User-Agent': shared_state.values["user_agent"]}
|
|
120
|
+
|
|
121
|
+
# re-fetch using the new session/headers
|
|
122
|
+
try:
|
|
123
|
+
resp = session.get(url, headers=headers, timeout=30)
|
|
124
|
+
except requests.RequestException as e:
|
|
125
|
+
info(f"GET after FlareSolverr failed: {e}")
|
|
126
|
+
return None, None, None
|
|
127
|
+
|
|
128
|
+
if resp.status_code == 403 or is_cloudflare_challenge(resp.text):
|
|
129
|
+
info("Could not bypass Cloudflare protection with FlareSolverr!")
|
|
130
|
+
return None, None, None
|
|
131
|
+
|
|
132
|
+
return session, headers, resp
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class FlareSolverrResponse:
|
|
136
|
+
"""
|
|
137
|
+
Minimal Response-like object so it behaves like requests.Response.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
def __init__(self, url, status_code, headers, text):
|
|
141
|
+
self.url = url
|
|
142
|
+
self.status_code = status_code
|
|
143
|
+
self.headers = headers or {}
|
|
144
|
+
self.text = text or ""
|
|
145
|
+
self.content = self.text.encode("utf-8")
|
|
146
|
+
|
|
147
|
+
# Cloudflare cookies are irrelevant here, but keep attribute for compatibility
|
|
148
|
+
self.cookies = requests.cookies.RequestsCookieJar()
|
|
149
|
+
|
|
150
|
+
def raise_for_status(self):
|
|
151
|
+
if 400 <= self.status_code:
|
|
152
|
+
raise requests.HTTPError(f"{self.status_code} Error for URL: {self.url}")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def flaresolverr_get(shared_state, url, timeout=60):
|
|
156
|
+
"""
|
|
157
|
+
Core function for performing a GET request via FlareSolverr only.
|
|
158
|
+
Used internally by FlareSolverrSession.get()
|
|
159
|
+
"""
|
|
160
|
+
flaresolverr_url = shared_state.values["config"]('FlareSolverr').get('url')
|
|
161
|
+
if not flaresolverr_url:
|
|
162
|
+
raise RuntimeError("FlareSolverr URL not configured in shared_state.")
|
|
163
|
+
|
|
164
|
+
payload = {
|
|
165
|
+
"cmd": "request.get",
|
|
166
|
+
"url": url,
|
|
167
|
+
"maxTimeout": timeout * 1000
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
resp = requests.post(
|
|
172
|
+
flaresolverr_url,
|
|
173
|
+
json=payload,
|
|
174
|
+
headers={"Content-Type": "application/json"},
|
|
175
|
+
timeout=timeout + 10
|
|
176
|
+
)
|
|
177
|
+
resp.raise_for_status()
|
|
178
|
+
except Exception as e:
|
|
179
|
+
raise RuntimeError(f"Error communicating with FlareSolverr: {e}")
|
|
180
|
+
|
|
181
|
+
data = resp.json()
|
|
182
|
+
|
|
183
|
+
if data.get("status") != "ok":
|
|
184
|
+
raise RuntimeError(f"FlareSolverr returned error: {data.get('message')}")
|
|
185
|
+
|
|
186
|
+
solution = data.get("solution", {})
|
|
187
|
+
html = solution.get("response", "")
|
|
188
|
+
status_code = solution.get("status", 200)
|
|
189
|
+
url = solution.get("url", url)
|
|
190
|
+
|
|
191
|
+
# headers → convert list-of-keyvals to dict
|
|
192
|
+
fs_headers = {h["name"]: h["value"] for h in solution.get("headers", [])}
|
|
193
|
+
|
|
194
|
+
# Update global UA if provided
|
|
195
|
+
user_agent = solution.get("userAgent")
|
|
196
|
+
if user_agent and user_agent != shared_state.values.get("user_agent"):
|
|
197
|
+
shared_state.update("user_agent", user_agent)
|
|
198
|
+
|
|
199
|
+
return FlareSolverrResponse(
|
|
200
|
+
url=url,
|
|
201
|
+
status_code=status_code,
|
|
202
|
+
headers=fs_headers,
|
|
203
|
+
text=html
|
|
204
|
+
)
|