StreamingCommunity 2.0.5__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of StreamingCommunity might be problematic. Click here for more details.
- StreamingCommunity/Api/Player/Helper/Vixcloud/util.py +15 -24
- StreamingCommunity/Api/Site/1337xx/site.py +9 -6
- StreamingCommunity/Api/Site/1337xx/title.py +2 -2
- StreamingCommunity/Api/Site/altadefinizione/costant.py +6 -2
- StreamingCommunity/Api/Site/altadefinizione/film.py +2 -2
- StreamingCommunity/Api/Site/altadefinizione/site.py +28 -22
- StreamingCommunity/Api/Site/animeunity/costant.py +6 -2
- StreamingCommunity/Api/Site/animeunity/film_serie.py +3 -3
- StreamingCommunity/Api/Site/animeunity/site.py +27 -19
- StreamingCommunity/Api/Site/cb01new/costant.py +6 -2
- StreamingCommunity/Api/Site/cb01new/film.py +2 -2
- StreamingCommunity/Api/Site/cb01new/site.py +20 -13
- StreamingCommunity/Api/Site/ddlstreamitaly/costant.py +6 -2
- StreamingCommunity/Api/Site/ddlstreamitaly/series.py +2 -2
- StreamingCommunity/Api/Site/ddlstreamitaly/site.py +9 -5
- StreamingCommunity/Api/Site/guardaserie/costant.py +6 -2
- StreamingCommunity/Api/Site/guardaserie/series.py +2 -3
- StreamingCommunity/Api/Site/guardaserie/site.py +10 -6
- StreamingCommunity/Api/Site/ilcorsaronero/costant.py +6 -2
- StreamingCommunity/Api/Site/ilcorsaronero/site.py +22 -13
- StreamingCommunity/Api/Site/ilcorsaronero/title.py +3 -3
- StreamingCommunity/Api/Site/mostraguarda/costant.py +6 -2
- StreamingCommunity/Api/Site/mostraguarda/film.py +2 -2
- StreamingCommunity/Api/Site/streamingcommunity/costant.py +7 -3
- StreamingCommunity/Api/Site/streamingcommunity/film.py +3 -3
- StreamingCommunity/Api/Site/streamingcommunity/series.py +2 -2
- StreamingCommunity/Api/Site/streamingcommunity/site.py +29 -28
- StreamingCommunity/Api/Site/streamingcommunity/util/ScrapeSerie.py +24 -24
- StreamingCommunity/Api/Template/Util/get_domain.py +100 -137
- StreamingCommunity/Lib/Downloader/HLS/downloader.py +3 -2
- StreamingCommunity/Lib/Downloader/HLS/segments.py +21 -17
- StreamingCommunity/Lib/M3U8/estimator.py +131 -106
- StreamingCommunity/Upload/version.py +1 -1
- StreamingCommunity/Util/ffmpeg_installer.py +8 -5
- {StreamingCommunity-2.0.5.dist-info → StreamingCommunity-2.3.0.dist-info}/METADATA +36 -19
- {StreamingCommunity-2.0.5.dist-info → StreamingCommunity-2.3.0.dist-info}/RECORD +40 -40
- {StreamingCommunity-2.0.5.dist-info → StreamingCommunity-2.3.0.dist-info}/LICENSE +0 -0
- {StreamingCommunity-2.0.5.dist-info → StreamingCommunity-2.3.0.dist-info}/WHEEL +0 -0
- {StreamingCommunity-2.0.5.dist-info → StreamingCommunity-2.3.0.dist-info}/entry_points.txt +0 -0
- {StreamingCommunity-2.0.5.dist-info → StreamingCommunity-2.3.0.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
# Internal utilities
|
|
5
|
+
from StreamingCommunity.Util._jsonConfig import config_manager
|
|
5
6
|
from StreamingCommunity.Util.table import TVShowManager
|
|
6
7
|
|
|
7
8
|
|
|
@@ -13,9 +14,11 @@ from .util.ilCorsarScraper import IlCorsaroNeroScraper
|
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
# Variable
|
|
16
|
-
from .costant import SITE_NAME
|
|
17
|
+
from .costant import SITE_NAME, DOMAIN_NOW
|
|
17
18
|
media_search_manager = MediaManager()
|
|
18
19
|
table_show_manager = TVShowManager()
|
|
20
|
+
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
|
21
|
+
disable_searchDomain = config_manager.get_bool("DEFAULT", "disable_searchDomain")
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
async def title_search(word_to_search: str) -> int:
|
|
@@ -32,25 +35,31 @@ async def title_search(word_to_search: str) -> int:
|
|
|
32
35
|
table_show_manager.clear()
|
|
33
36
|
|
|
34
37
|
# Find new domain if prev dont work
|
|
35
|
-
domain_to_use
|
|
38
|
+
domain_to_use = DOMAIN_NOW
|
|
39
|
+
|
|
40
|
+
if not disable_searchDomain:
|
|
41
|
+
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
|
36
42
|
|
|
37
43
|
# Create scraper and collect result
|
|
38
44
|
print("\n")
|
|
39
45
|
scraper = IlCorsaroNeroScraper(f"https://{SITE_NAME}.{domain_to_use}/", 1)
|
|
40
46
|
results = await scraper.search(word_to_search)
|
|
41
47
|
|
|
42
|
-
# Add all result to media manager
|
|
43
48
|
for i, torrent in enumerate(results):
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
try:
|
|
50
|
+
|
|
51
|
+
media_search_manager.add_media({
|
|
52
|
+
'name': torrent['name'],
|
|
53
|
+
'type': torrent['type'],
|
|
54
|
+
'seed': torrent['seed'],
|
|
55
|
+
'leech': torrent['leech'],
|
|
56
|
+
'size': torrent['size'],
|
|
57
|
+
'date': torrent['date'],
|
|
58
|
+
'url': torrent['url']
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
except Exception as e:
|
|
62
|
+
print(f"Error parsing a film entry: {e}")
|
|
54
63
|
|
|
55
64
|
# Return the number of titles found
|
|
56
65
|
return media_search_manager.get_length()
|
|
@@ -15,7 +15,7 @@ from StreamingCommunity.Api.Template.Class.SearchType import MediaItem
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
# Config
|
|
18
|
-
from .costant import
|
|
18
|
+
from .costant import MOVIE_FOLDER
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def download_title(select_title: MediaItem):
|
|
@@ -27,13 +27,13 @@ def download_title(select_title: MediaItem):
|
|
|
27
27
|
"""
|
|
28
28
|
|
|
29
29
|
start_message()
|
|
30
|
-
console.print(f"[yellow]Download:
|
|
30
|
+
console.print(f"[yellow]Download: [red]{select_title.name} \n")
|
|
31
31
|
print()
|
|
32
32
|
|
|
33
33
|
# Define output path
|
|
34
34
|
title_name = os_manager.get_sanitize_file(select_title.name)
|
|
35
35
|
mp4_path = os_manager.get_sanitize_path(
|
|
36
|
-
os.path.join(
|
|
36
|
+
os.path.join(MOVIE_FOLDER, title_name.replace(".mp4", ""))
|
|
37
37
|
)
|
|
38
38
|
|
|
39
39
|
# Create output folder
|
|
@@ -11,5 +11,9 @@ SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
|
|
|
11
11
|
ROOT_PATH = config_manager.get('DEFAULT', 'root_path')
|
|
12
12
|
DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain']
|
|
13
13
|
|
|
14
|
-
SERIES_FOLDER = config_manager.get('DEFAULT', 'serie_folder_name')
|
|
15
|
-
MOVIE_FOLDER = config_manager.get('DEFAULT', 'movie_folder_name')
|
|
14
|
+
SERIES_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'serie_folder_name'))
|
|
15
|
+
MOVIE_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'movie_folder_name'))
|
|
16
|
+
|
|
17
|
+
if config_manager.get_bool("DEFAULT", "add_siteName"):
|
|
18
|
+
SERIES_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'serie_folder_name'))
|
|
19
|
+
MOVIE_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'movie_folder_name'))
|
|
@@ -33,7 +33,7 @@ from StreamingCommunity.Lib.TMBD import Json_film
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
# Config
|
|
36
|
-
from .costant import
|
|
36
|
+
from .costant import SITE_NAME, DOMAIN_NOW, MOVIE_FOLDER
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
def download_film(movie_details: Json_film) -> str:
|
|
@@ -75,7 +75,7 @@ def download_film(movie_details: Json_film) -> str:
|
|
|
75
75
|
|
|
76
76
|
# Define output path
|
|
77
77
|
title_name = os_manager.get_sanitize_file(movie_details.title) + ".mp4"
|
|
78
|
-
mp4_path = os.path.join(
|
|
78
|
+
mp4_path = os.path.join(MOVIE_FOLDER, title_name.replace(".mp4", ""))
|
|
79
79
|
|
|
80
80
|
# Get m3u8 master playlist
|
|
81
81
|
master_playlist = video_source.get_playlist()
|
|
@@ -9,7 +9,11 @@ from StreamingCommunity.Util._jsonConfig import config_manager
|
|
|
9
9
|
|
|
10
10
|
SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
|
|
11
11
|
ROOT_PATH = config_manager.get('DEFAULT', 'root_path')
|
|
12
|
-
DOMAIN_NOW = config_manager.
|
|
12
|
+
DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain']
|
|
13
13
|
|
|
14
|
-
SERIES_FOLDER = config_manager.get('DEFAULT', 'serie_folder_name')
|
|
15
|
-
MOVIE_FOLDER = config_manager.get('DEFAULT', 'movie_folder_name')
|
|
14
|
+
SERIES_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'serie_folder_name'))
|
|
15
|
+
MOVIE_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'movie_folder_name'))
|
|
16
|
+
|
|
17
|
+
if config_manager.get_bool("DEFAULT", "add_siteName"):
|
|
18
|
+
SERIES_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'serie_folder_name'))
|
|
19
|
+
MOVIE_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'movie_folder_name'))
|
|
@@ -22,7 +22,7 @@ from StreamingCommunity.Api.Player.vixcloud import VideoSource
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
# Variable
|
|
25
|
-
from .costant import
|
|
25
|
+
from .costant import SITE_NAME, MOVIE_FOLDER
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def download_film(select_title: MediaItem) -> str:
|
|
@@ -51,8 +51,8 @@ def download_film(select_title: MediaItem) -> str:
|
|
|
51
51
|
master_playlist = video_source.get_playlist()
|
|
52
52
|
|
|
53
53
|
# Define the filename and path for the downloaded film
|
|
54
|
-
title_name = os_manager.get_sanitize_file(select_title.
|
|
55
|
-
mp4_path = os.path.join(
|
|
54
|
+
title_name = os_manager.get_sanitize_file(select_title.name) + ".mp4"
|
|
55
|
+
mp4_path = os.path.join(MOVIE_FOLDER, select_title.name)
|
|
56
56
|
|
|
57
57
|
# Download the film using the m3u8 playlist, and output filename
|
|
58
58
|
r_proc = HLS_Downloader(
|
|
@@ -24,7 +24,7 @@ from StreamingCommunity.Api.Player.vixcloud import VideoSource
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
# Variable
|
|
27
|
-
from .costant import
|
|
27
|
+
from .costant import SITE_NAME, SERIES_FOLDER
|
|
28
28
|
|
|
29
29
|
|
|
30
30
|
|
|
@@ -48,7 +48,7 @@ def download_video(index_season_selected: int, index_episode_selected: int, scra
|
|
|
48
48
|
|
|
49
49
|
# Define filename and path for the downloaded video
|
|
50
50
|
mp4_name = f"{map_episode_title(scrape_serie.series_name, index_season_selected, index_episode_selected, obj_episode.name)}.mp4"
|
|
51
|
-
mp4_path = os.path.join(
|
|
51
|
+
mp4_path = os.path.join(SERIES_FOLDER, scrape_serie.series_name, f"S{index_season_selected}")
|
|
52
52
|
|
|
53
53
|
# Retrieve scws and if available master playlist
|
|
54
54
|
video_source.get_iframe(obj_episode.id)
|
|
@@ -26,30 +26,36 @@ from StreamingCommunity.Api.Template.Class.SearchType import MediaManager
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
# Config
|
|
29
|
-
from .costant import SITE_NAME
|
|
29
|
+
from .costant import SITE_NAME, DOMAIN_NOW
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
# Variable
|
|
33
33
|
media_search_manager = MediaManager()
|
|
34
34
|
table_show_manager = TVShowManager()
|
|
35
35
|
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
|
36
|
+
disable_searchDomain = config_manager.get_bool("DEFAULT", "disable_searchDomain")
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
def get_version(
|
|
39
|
+
def get_version(domain: str):
|
|
39
40
|
"""
|
|
40
41
|
Extracts the version from the HTML text of a webpage.
|
|
41
42
|
|
|
42
43
|
Parameters:
|
|
43
|
-
-
|
|
44
|
+
- domain (str): The domain of the site.
|
|
44
45
|
|
|
45
46
|
Returns:
|
|
46
47
|
str: The version extracted from the webpage.
|
|
47
|
-
list: Top 10 titles headlines for today.
|
|
48
48
|
"""
|
|
49
49
|
try:
|
|
50
|
+
response = httpx.get(
|
|
51
|
+
url=f"https://{SITE_NAME}.{domain}/",
|
|
52
|
+
headers={'User-Agent': get_headers()},
|
|
53
|
+
timeout=max_timeout
|
|
54
|
+
)
|
|
55
|
+
response.raise_for_status()
|
|
50
56
|
|
|
51
57
|
# Parse request to site
|
|
52
|
-
soup = BeautifulSoup(text, "html.parser")
|
|
58
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
|
53
59
|
|
|
54
60
|
# Extract version
|
|
55
61
|
version = json.loads(soup.find("div", {"id": "app"}).get("data-page"))['version']
|
|
@@ -72,22 +78,13 @@ def get_version_and_domain():
|
|
|
72
78
|
"""
|
|
73
79
|
|
|
74
80
|
# Find new domain if prev dont work
|
|
75
|
-
domain_to_use
|
|
81
|
+
domain_to_use = DOMAIN_NOW
|
|
76
82
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
version = get_version(
|
|
80
|
-
httpx.get(
|
|
81
|
-
url=base_url,
|
|
82
|
-
headers={'User-Agent': get_headers()},
|
|
83
|
-
timeout=max_timeout
|
|
84
|
-
).text
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
except:
|
|
88
|
-
console.print("[green]Auto generate version ...")
|
|
89
|
-
version = secrets.token_hex(32 // 2)
|
|
83
|
+
if not disable_searchDomain:
|
|
84
|
+
domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
|
|
90
85
|
|
|
86
|
+
version = get_version(domain_to_use)
|
|
87
|
+
|
|
91
88
|
return version, domain_to_use
|
|
92
89
|
|
|
93
90
|
|
|
@@ -116,16 +113,20 @@ def title_search(title_search: str, domain: str) -> int:
|
|
|
116
113
|
except Exception as e:
|
|
117
114
|
console.print(f"Site: {SITE_NAME}, request search error: {e}")
|
|
118
115
|
|
|
119
|
-
# Add found titles to media search manager
|
|
120
116
|
for dict_title in response.json()['data']:
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
117
|
+
try:
|
|
118
|
+
|
|
119
|
+
media_search_manager.add_media({
|
|
120
|
+
'id': dict_title.get('id'),
|
|
121
|
+
'slug': dict_title.get('slug'),
|
|
122
|
+
'name': dict_title.get('name'),
|
|
123
|
+
'type': dict_title.get('type'),
|
|
124
|
+
'date': dict_title.get('last_air_date'),
|
|
125
|
+
'score': dict_title.get('score')
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
print(f"Error parsing a film entry: {e}")
|
|
129
130
|
|
|
130
131
|
# Return the number of titles found
|
|
131
132
|
return media_search_manager.get_length()
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
# 01.03.24
|
|
2
2
|
|
|
3
|
+
import json
|
|
3
4
|
import logging
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
# External libraries
|
|
7
8
|
import httpx
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
# Internal utilities
|
|
@@ -56,33 +58,33 @@ class ScrapeSerie:
|
|
|
56
58
|
Raises:
|
|
57
59
|
Exception: If there's an error fetching season information
|
|
58
60
|
"""
|
|
59
|
-
self.headers = {
|
|
60
|
-
'user-agent': get_headers(),
|
|
61
|
-
'x-inertia': 'true',
|
|
62
|
-
'x-inertia-version': self.version,
|
|
63
|
-
}
|
|
64
|
-
|
|
65
61
|
try:
|
|
66
|
-
|
|
67
62
|
response = httpx.get(
|
|
68
|
-
url=f"https://{self.base_name}.{self.domain}/titles/{self.media_id}-{self.series_name}",
|
|
69
|
-
headers=self.headers,
|
|
63
|
+
url=f"https://{self.base_name}.{self.domain}/titles/{self.media_id}-{self.series_name}",
|
|
64
|
+
headers=self.headers,
|
|
70
65
|
timeout=max_timeout
|
|
71
66
|
)
|
|
72
67
|
response.raise_for_status()
|
|
73
68
|
|
|
74
69
|
# Extract seasons from JSON response
|
|
75
|
-
|
|
70
|
+
soup = BeautifulSoup(response.text, "html.parser")
|
|
71
|
+
json_response = json.loads(soup.find("div", {"id": "app"}).get("data-page"))
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
response = httpx.post(
|
|
75
|
+
url=f'https://{self.base_name}.{self.domain}/api/titles/preview/{self.media_id}',
|
|
76
|
+
headers={'User-Agent': get_headers()}
|
|
77
|
+
)
|
|
78
|
+
response.raise_for_status()
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
# Extract seasons from JSON response
|
|
82
|
+
json_response = response.json()
|
|
83
|
+
"""
|
|
76
84
|
|
|
77
85
|
# Collect info about season
|
|
78
|
-
self.season_manager = Season(json_response.get(
|
|
79
|
-
self.season_manager.collect_images(self.base_name, self.domain)
|
|
86
|
+
self.season_manager = Season(json_response.get("props").get("title"))
|
|
80
87
|
|
|
81
|
-
# Collect first episode info
|
|
82
|
-
for i, ep in enumerate(json_response.get('loadedSeason').get('episodes')):
|
|
83
|
-
self.season_manager.episodes.add(ep)
|
|
84
|
-
self.season_manager.episodes.get(i).collect_image(self.base_name, self.domain)
|
|
85
|
-
|
|
86
88
|
except Exception as e:
|
|
87
89
|
logging.error(f"Error collecting season info: {e}")
|
|
88
90
|
raise
|
|
@@ -97,16 +99,14 @@ class ScrapeSerie:
|
|
|
97
99
|
Raises:
|
|
98
100
|
Exception: If there's an error fetching episode information
|
|
99
101
|
"""
|
|
100
|
-
self.headers = {
|
|
101
|
-
'user-agent': get_headers(),
|
|
102
|
-
'x-inertia': 'true',
|
|
103
|
-
'x-inertia-version': self.version,
|
|
104
|
-
}
|
|
105
|
-
|
|
106
102
|
try:
|
|
107
103
|
response = httpx.get(
|
|
108
104
|
url=f'https://{self.base_name}.{self.domain}/titles/{self.media_id}-{self.series_name}/stagione-{number_season}',
|
|
109
|
-
headers=
|
|
105
|
+
headers={
|
|
106
|
+
'User-Agent': get_headers(),
|
|
107
|
+
'x-inertia': 'true',
|
|
108
|
+
'x-inertia-version': self.version,
|
|
109
|
+
},
|
|
110
110
|
timeout=max_timeout
|
|
111
111
|
)
|
|
112
112
|
response.raise_for_status()
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
# 18.06.24
|
|
2
2
|
|
|
3
|
-
import sys
|
|
4
3
|
from urllib.parse import urlparse
|
|
5
4
|
|
|
6
5
|
|
|
@@ -15,160 +14,124 @@ from StreamingCommunity.Util.console import console, msg
|
|
|
15
14
|
from StreamingCommunity.Util._jsonConfig import config_manager
|
|
16
15
|
|
|
17
16
|
|
|
18
|
-
def
|
|
19
|
-
"""
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
def get_base_domain(url_str):
|
|
18
|
+
"""Extract base domain without protocol, www and path"""
|
|
19
|
+
parsed = urlparse(url_str)
|
|
20
|
+
domain = parsed.netloc.lower()
|
|
21
|
+
if domain.startswith('www.'):
|
|
22
|
+
domain = domain[4:]
|
|
23
|
+
return domain.split('.')[0]
|
|
24
24
|
|
|
25
|
-
|
|
26
|
-
str: The first URL result from the search, or None if no result is found.
|
|
25
|
+
def validate_url(url, base_url, max_timeout):
|
|
27
26
|
"""
|
|
28
|
-
|
|
29
|
-
search_results = search(query, num_results=1)
|
|
30
|
-
|
|
31
|
-
# Extract the first result
|
|
32
|
-
first_result = next(search_results, None)
|
|
33
|
-
|
|
34
|
-
if not first_result:
|
|
35
|
-
console.print("[red]No results found.[/red]")
|
|
36
|
-
|
|
37
|
-
return first_result
|
|
38
|
-
|
|
39
|
-
def get_final_redirect_url(initial_url, max_timeout):
|
|
40
|
-
"""
|
|
41
|
-
Follow redirects from the initial URL and return the final URL after all redirects.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
initial_url (str): The URL to start with and follow redirects.
|
|
45
|
-
|
|
46
|
-
Returns:
|
|
47
|
-
str: The final URL after all redirects are followed.
|
|
27
|
+
Validate if URL is accessible and matches expected base domain
|
|
48
28
|
"""
|
|
29
|
+
console.print(f"\n[cyan]Starting validation for URL[white]: [yellow]{url}")
|
|
30
|
+
|
|
31
|
+
def check_response(response, check_num):
|
|
32
|
+
if response.status_code == 403:
|
|
33
|
+
console.print(f"[red]Check {check_num} failed: Access forbidden (403)")
|
|
34
|
+
return False
|
|
35
|
+
if response.status_code >= 400:
|
|
36
|
+
console.print(f"[red]Check {check_num} failed: HTTP {response.status_code}")
|
|
37
|
+
return False
|
|
38
|
+
console.print(f"[green]Check {check_num} passed: HTTP {response.status_code}")
|
|
39
|
+
return True
|
|
49
40
|
|
|
50
|
-
# Create a client with redirects enabled
|
|
51
41
|
try:
|
|
42
|
+
|
|
43
|
+
# Check 1: Initial request without following redirects
|
|
44
|
+
console.print("[cyan]Performing initial connection check...")
|
|
52
45
|
with httpx.Client(
|
|
53
|
-
headers={
|
|
54
|
-
|
|
55
|
-
'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
|
|
56
|
-
'User-Agent': get_headers()
|
|
57
|
-
},
|
|
58
|
-
follow_redirects=True,
|
|
46
|
+
headers={'User-Agent': get_headers()},
|
|
47
|
+
follow_redirects=False,
|
|
59
48
|
timeout=max_timeout
|
|
60
|
-
|
|
61
49
|
) as client:
|
|
62
|
-
response = client.get(
|
|
50
|
+
response = client.get(url)
|
|
51
|
+
if not check_response(response, 1):
|
|
52
|
+
return False
|
|
63
53
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
54
|
+
# Check 2: Follow redirects and verify final domain
|
|
55
|
+
console.print("[cyan]Checking redirect destination...")
|
|
56
|
+
with httpx.Client(
|
|
57
|
+
headers={'User-Agent': get_headers()},
|
|
58
|
+
follow_redirects=True,
|
|
59
|
+
timeout=max_timeout
|
|
60
|
+
) as client:
|
|
61
|
+
|
|
62
|
+
response = client.get(url)
|
|
63
|
+
if not check_response(response, 2):
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
# Compare base domains
|
|
67
|
+
original_base = get_base_domain(url)
|
|
68
|
+
final_base = get_base_domain(str(response.url))
|
|
69
|
+
|
|
70
|
+
console.print(f"[cyan]Comparing domains:")
|
|
71
|
+
console.print(f"Original base domain: [yellow]{original_base}")
|
|
72
|
+
console.print(f"Final base domain: [yellow]{final_base}")
|
|
73
|
+
|
|
74
|
+
if original_base != final_base:
|
|
75
|
+
console.print(f"[red]Domain mismatch: Redirected to different base domain")
|
|
76
|
+
return False
|
|
77
|
+
|
|
78
|
+
# Verify against expected base_url
|
|
79
|
+
expected_base = get_base_domain(base_url)
|
|
80
|
+
if final_base != expected_base:
|
|
81
|
+
console.print(f"[red]Domain mismatch: Final domain does not match expected base URL")
|
|
82
|
+
console.print(f"Expected: [yellow]{expected_base}")
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
console.print(f"[green]All checks passed: URL is valid and matches expected domain")
|
|
86
|
+
return True
|
|
69
87
|
|
|
70
|
-
# Capture the final URL after all redirects
|
|
71
|
-
final_url = response.url
|
|
72
|
-
|
|
73
|
-
return final_url
|
|
74
|
-
|
|
75
88
|
except Exception as e:
|
|
76
|
-
console.print(f"
|
|
77
|
-
return
|
|
89
|
+
console.print(f"[red]Error during validation: {str(e)}")
|
|
90
|
+
return False
|
|
78
91
|
|
|
79
92
|
def search_domain(site_name: str, base_url: str, get_first: bool = False):
|
|
80
93
|
"""
|
|
81
|
-
Search for
|
|
82
|
-
|
|
83
|
-
Parameters:
|
|
84
|
-
- site_name (str): The name of the site to search the domain for.
|
|
85
|
-
- base_url (str): The base URL to construct complete URLs.
|
|
86
|
-
- get_first (bool): If True, automatically update to the first valid match without user confirmation.
|
|
87
|
-
|
|
88
|
-
Returns:
|
|
89
|
-
tuple: The found domain and the complete URL.
|
|
94
|
+
Search for valid domain matching site name and base URL.
|
|
90
95
|
"""
|
|
91
|
-
|
|
92
|
-
# Extract config domain
|
|
93
96
|
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
|
94
97
|
domain = str(config_manager.get_dict("SITE", site_name)['domain'])
|
|
98
|
+
test_url = f"{base_url}.{domain}"
|
|
95
99
|
|
|
100
|
+
console.print(f"\n[cyan]Testing initial URL[white]: [yellow]{test_url}")
|
|
101
|
+
|
|
96
102
|
try:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
timeout=max_timeout
|
|
106
|
-
) as client:
|
|
107
|
-
response_follow = client.get(f"{base_url}.{domain}")
|
|
108
|
-
response_follow.raise_for_status()
|
|
109
|
-
|
|
103
|
+
if validate_url(test_url, base_url, max_timeout):
|
|
104
|
+
parsed_url = urlparse(test_url)
|
|
105
|
+
tld = parsed_url.netloc.split('.')[-1]
|
|
106
|
+
config_manager.config['SITE'][site_name]['domain'] = tld
|
|
107
|
+
config_manager.write_config()
|
|
108
|
+
console.print(f"[green]Successfully validated initial URL")
|
|
109
|
+
return tld, test_url
|
|
110
|
+
|
|
110
111
|
except Exception as e:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
# Perform a Google search with multiple results
|
|
114
|
-
search_results = list(search(query, num_results=10, lang="it"))
|
|
115
|
-
#console.print(f"\nGoogle search results: {search_results}")
|
|
116
|
-
|
|
117
|
-
def normalize_for_comparison(url):
|
|
118
|
-
"""Normalize URL by removing protocol, www, and trailing slashes"""
|
|
119
|
-
url = url.lower()
|
|
120
|
-
url = url.replace("https://", "").replace("http://", "")
|
|
121
|
-
url = url.replace("www.", "")
|
|
122
|
-
return url.rstrip("/")
|
|
123
|
-
|
|
124
|
-
# Normalize the base_url we're looking for
|
|
125
|
-
target_url = normalize_for_comparison(base_url)
|
|
126
|
-
|
|
127
|
-
# Iterate through search results
|
|
128
|
-
for first_url in search_results:
|
|
129
|
-
console.print(f"[green]Checking url[white]: [red]{first_url}")
|
|
112
|
+
console.print(f"[red]Error testing initial URL: {str(e)}")
|
|
130
113
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
except Exception as redirect_error:
|
|
156
|
-
console.print(f"[red]Error following redirect for {first_url}: {redirect_error}")
|
|
157
|
-
continue
|
|
158
|
-
|
|
159
|
-
# If no matching URL is found return base domain
|
|
160
|
-
console.print("[bold red]No valid URL found matching the base URL.[/bold red]")
|
|
161
|
-
return domain, f"{base_url}.{domain}"
|
|
162
|
-
|
|
163
|
-
# Handle successful initial domain check
|
|
164
|
-
parsed_url = urlparse(str(response_follow.url))
|
|
165
|
-
parse_domain = parsed_url.netloc
|
|
166
|
-
tld = parse_domain.split('.')[-1]
|
|
167
|
-
|
|
168
|
-
if tld is not None:
|
|
169
|
-
# Update domain in config.json
|
|
170
|
-
config_manager.config['SITE'][site_name]['domain'] = tld
|
|
171
|
-
config_manager.write_config()
|
|
172
|
-
|
|
173
|
-
# Return config domain
|
|
174
|
-
return tld, f"{base_url}.{tld}"
|
|
114
|
+
# Google search phase
|
|
115
|
+
query = base_url.split("/")[-1]
|
|
116
|
+
console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}")
|
|
117
|
+
search_results = list(search(query, num_results=15, lang="it"))
|
|
118
|
+
|
|
119
|
+
for idx, result_url in enumerate(search_results, 1):
|
|
120
|
+
console.print(f"\n[cyan]Checking Google result {idx}/15[white]: [yellow]{result_url}")
|
|
121
|
+
|
|
122
|
+
if validate_url(result_url, base_url, max_timeout):
|
|
123
|
+
parsed_result = urlparse(result_url)
|
|
124
|
+
new_domain = parsed_result.netloc.split(".")[-1]
|
|
125
|
+
|
|
126
|
+
if get_first or msg.ask(
|
|
127
|
+
f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'",
|
|
128
|
+
choices=["y", "n"],
|
|
129
|
+
default="y"
|
|
130
|
+
).lower() == "y":
|
|
131
|
+
|
|
132
|
+
config_manager.config['SITE'][site_name]['domain'] = new_domain
|
|
133
|
+
config_manager.write_config()
|
|
134
|
+
return new_domain, f"{base_url}.{new_domain}"
|
|
135
|
+
|
|
136
|
+
console.print("[bold red]No valid URLs found matching the base URL.")
|
|
137
|
+
return domain, f"{base_url}.{domain}"
|
|
@@ -104,14 +104,15 @@ class HttpClient:
|
|
|
104
104
|
response = httpx.get(
|
|
105
105
|
url=url,
|
|
106
106
|
headers=self.headers,
|
|
107
|
-
timeout=max_timeout
|
|
107
|
+
timeout=max_timeout,
|
|
108
|
+
follow_redirects=True
|
|
108
109
|
)
|
|
109
110
|
|
|
110
111
|
response.raise_for_status()
|
|
111
112
|
return response.text
|
|
112
113
|
|
|
113
114
|
except Exception as e:
|
|
114
|
-
|
|
115
|
+
console.print(f"Request to {url} failed with error: {e}")
|
|
115
116
|
return 404
|
|
116
117
|
|
|
117
118
|
def get_content(self, url):
|