StreamingCommunity 2.2.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of StreamingCommunity might be problematic. Click here for more details.

Files changed (38) hide show
  1. StreamingCommunity/Api/Player/Helper/Vixcloud/util.py +15 -24
  2. StreamingCommunity/Api/Site/1337xx/site.py +9 -6
  3. StreamingCommunity/Api/Site/1337xx/title.py +2 -2
  4. StreamingCommunity/Api/Site/altadefinizione/costant.py +6 -2
  5. StreamingCommunity/Api/Site/altadefinizione/film.py +2 -2
  6. StreamingCommunity/Api/Site/altadefinizione/site.py +28 -22
  7. StreamingCommunity/Api/Site/animeunity/costant.py +6 -2
  8. StreamingCommunity/Api/Site/animeunity/film_serie.py +3 -3
  9. StreamingCommunity/Api/Site/animeunity/site.py +27 -19
  10. StreamingCommunity/Api/Site/cb01new/costant.py +6 -2
  11. StreamingCommunity/Api/Site/cb01new/film.py +2 -2
  12. StreamingCommunity/Api/Site/cb01new/site.py +20 -13
  13. StreamingCommunity/Api/Site/ddlstreamitaly/costant.py +6 -2
  14. StreamingCommunity/Api/Site/ddlstreamitaly/series.py +2 -2
  15. StreamingCommunity/Api/Site/ddlstreamitaly/site.py +9 -5
  16. StreamingCommunity/Api/Site/guardaserie/costant.py +6 -2
  17. StreamingCommunity/Api/Site/guardaserie/series.py +2 -3
  18. StreamingCommunity/Api/Site/guardaserie/site.py +10 -6
  19. StreamingCommunity/Api/Site/ilcorsaronero/costant.py +6 -2
  20. StreamingCommunity/Api/Site/ilcorsaronero/site.py +22 -13
  21. StreamingCommunity/Api/Site/ilcorsaronero/title.py +3 -3
  22. StreamingCommunity/Api/Site/mostraguarda/costant.py +6 -2
  23. StreamingCommunity/Api/Site/mostraguarda/film.py +2 -2
  24. StreamingCommunity/Api/Site/streamingcommunity/costant.py +7 -3
  25. StreamingCommunity/Api/Site/streamingcommunity/film.py +3 -3
  26. StreamingCommunity/Api/Site/streamingcommunity/series.py +2 -2
  27. StreamingCommunity/Api/Site/streamingcommunity/site.py +29 -28
  28. StreamingCommunity/Api/Site/streamingcommunity/util/ScrapeSerie.py +24 -24
  29. StreamingCommunity/Api/Template/Util/get_domain.py +100 -137
  30. StreamingCommunity/Lib/Downloader/HLS/downloader.py +3 -2
  31. StreamingCommunity/Lib/Downloader/HLS/segments.py +20 -15
  32. StreamingCommunity/Upload/version.py +1 -1
  33. {StreamingCommunity-2.2.0.dist-info → StreamingCommunity-2.3.0.dist-info}/METADATA +34 -20
  34. {StreamingCommunity-2.2.0.dist-info → StreamingCommunity-2.3.0.dist-info}/RECORD +38 -38
  35. {StreamingCommunity-2.2.0.dist-info → StreamingCommunity-2.3.0.dist-info}/LICENSE +0 -0
  36. {StreamingCommunity-2.2.0.dist-info → StreamingCommunity-2.3.0.dist-info}/WHEEL +0 -0
  37. {StreamingCommunity-2.2.0.dist-info → StreamingCommunity-2.3.0.dist-info}/entry_points.txt +0 -0
  38. {StreamingCommunity-2.2.0.dist-info → StreamingCommunity-2.3.0.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
 
3
3
 
4
4
  # Internal utilities
5
+ from StreamingCommunity.Util._jsonConfig import config_manager
5
6
  from StreamingCommunity.Util.table import TVShowManager
6
7
 
7
8
 
@@ -13,9 +14,11 @@ from .util.ilCorsarScraper import IlCorsaroNeroScraper
13
14
 
14
15
 
15
16
  # Variable
16
- from .costant import SITE_NAME
17
+ from .costant import SITE_NAME, DOMAIN_NOW
17
18
  media_search_manager = MediaManager()
18
19
  table_show_manager = TVShowManager()
20
+ max_timeout = config_manager.get_int("REQUESTS", "timeout")
21
+ disable_searchDomain = config_manager.get_bool("DEFAULT", "disable_searchDomain")
19
22
 
20
23
 
21
24
  async def title_search(word_to_search: str) -> int:
@@ -32,25 +35,31 @@ async def title_search(word_to_search: str) -> int:
32
35
  table_show_manager.clear()
33
36
 
34
37
  # Find new domain if prev dont work
35
- domain_to_use, _ = search_domain(SITE_NAME, f"https://{SITE_NAME}")
38
+ domain_to_use = DOMAIN_NOW
39
+
40
+ if not disable_searchDomain:
41
+ domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
36
42
 
37
43
  # Create scraper and collect result
38
44
  print("\n")
39
45
  scraper = IlCorsaroNeroScraper(f"https://{SITE_NAME}.{domain_to_use}/", 1)
40
46
  results = await scraper.search(word_to_search)
41
47
 
42
- # Add all result to media manager
43
48
  for i, torrent in enumerate(results):
44
- media_search_manager.add_media({
45
- 'name': torrent['name'],
46
- 'type': torrent['type'],
47
- 'seed': torrent['seed'],
48
- 'leech': torrent['leech'],
49
- 'size': torrent['size'],
50
- 'date': torrent['date'],
51
- 'url': torrent['url']
52
- })
53
-
49
+ try:
50
+
51
+ media_search_manager.add_media({
52
+ 'name': torrent['name'],
53
+ 'type': torrent['type'],
54
+ 'seed': torrent['seed'],
55
+ 'leech': torrent['leech'],
56
+ 'size': torrent['size'],
57
+ 'date': torrent['date'],
58
+ 'url': torrent['url']
59
+ })
60
+
61
+ except Exception as e:
62
+ print(f"Error parsing a film entry: {e}")
54
63
 
55
64
  # Return the number of titles found
56
65
  return media_search_manager.get_length()
@@ -15,7 +15,7 @@ from StreamingCommunity.Api.Template.Class.SearchType import MediaItem
15
15
 
16
16
 
17
17
  # Config
18
- from .costant import ROOT_PATH, MOVIE_FOLDER
18
+ from .costant import MOVIE_FOLDER
19
19
 
20
20
 
21
21
  def download_title(select_title: MediaItem):
@@ -27,13 +27,13 @@ def download_title(select_title: MediaItem):
27
27
  """
28
28
 
29
29
  start_message()
30
- console.print(f"[yellow]Download: [red]{select_title.name} \n")
30
+ console.print(f"[yellow]Download: [red]{select_title.name} \n")
31
31
  print()
32
32
 
33
33
  # Define output path
34
34
  title_name = os_manager.get_sanitize_file(select_title.name)
35
35
  mp4_path = os_manager.get_sanitize_path(
36
- os.path.join(ROOT_PATH, MOVIE_FOLDER, title_name.replace(".mp4", ""))
36
+ os.path.join(MOVIE_FOLDER, title_name.replace(".mp4", ""))
37
37
  )
38
38
 
39
39
  # Create output folder
@@ -11,5 +11,9 @@ SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
11
11
  ROOT_PATH = config_manager.get('DEFAULT', 'root_path')
12
12
  DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain']
13
13
 
14
- SERIES_FOLDER = config_manager.get('DEFAULT', 'serie_folder_name')
15
- MOVIE_FOLDER = config_manager.get('DEFAULT', 'movie_folder_name')
14
+ SERIES_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'serie_folder_name'))
15
+ MOVIE_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'movie_folder_name'))
16
+
17
+ if config_manager.get_bool("DEFAULT", "add_siteName"):
18
+ SERIES_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'serie_folder_name'))
19
+ MOVIE_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'movie_folder_name'))
@@ -33,7 +33,7 @@ from StreamingCommunity.Lib.TMBD import Json_film
33
33
 
34
34
 
35
35
  # Config
36
- from .costant import ROOT_PATH, SITE_NAME, DOMAIN_NOW, MOVIE_FOLDER
36
+ from .costant import SITE_NAME, DOMAIN_NOW, MOVIE_FOLDER
37
37
 
38
38
 
39
39
  def download_film(movie_details: Json_film) -> str:
@@ -75,7 +75,7 @@ def download_film(movie_details: Json_film) -> str:
75
75
 
76
76
  # Define output path
77
77
  title_name = os_manager.get_sanitize_file(movie_details.title) + ".mp4"
78
- mp4_path = os.path.join(ROOT_PATH, MOVIE_FOLDER, title_name.replace(".mp4", ""))
78
+ mp4_path = os.path.join(MOVIE_FOLDER, title_name.replace(".mp4", ""))
79
79
 
80
80
  # Get m3u8 master playlist
81
81
  master_playlist = video_source.get_playlist()
@@ -9,7 +9,11 @@ from StreamingCommunity.Util._jsonConfig import config_manager
9
9
 
10
10
  SITE_NAME = os.path.basename(os.path.dirname(os.path.abspath(__file__)))
11
11
  ROOT_PATH = config_manager.get('DEFAULT', 'root_path')
12
- DOMAIN_NOW = config_manager.get('SITE', SITE_NAME)
12
+ DOMAIN_NOW = config_manager.get_dict('SITE', SITE_NAME)['domain']
13
13
 
14
- SERIES_FOLDER = config_manager.get('DEFAULT', 'serie_folder_name')
15
- MOVIE_FOLDER = config_manager.get('DEFAULT', 'movie_folder_name')
14
+ SERIES_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'serie_folder_name'))
15
+ MOVIE_FOLDER = os.path.join(ROOT_PATH, config_manager.get('DEFAULT', 'movie_folder_name'))
16
+
17
+ if config_manager.get_bool("DEFAULT", "add_siteName"):
18
+ SERIES_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'serie_folder_name'))
19
+ MOVIE_FOLDER = os.path.join(ROOT_PATH, SITE_NAME, config_manager.get('DEFAULT', 'movie_folder_name'))
@@ -22,7 +22,7 @@ from StreamingCommunity.Api.Player.vixcloud import VideoSource
22
22
 
23
23
 
24
24
  # Variable
25
- from .costant import ROOT_PATH, SITE_NAME, MOVIE_FOLDER
25
+ from .costant import SITE_NAME, MOVIE_FOLDER
26
26
 
27
27
 
28
28
  def download_film(select_title: MediaItem) -> str:
@@ -51,8 +51,8 @@ def download_film(select_title: MediaItem) -> str:
51
51
  master_playlist = video_source.get_playlist()
52
52
 
53
53
  # Define the filename and path for the downloaded film
54
- title_name = os_manager.get_sanitize_file(select_title.slug) + ".mp4"
55
- mp4_path = os.path.join(ROOT_PATH, MOVIE_FOLDER, select_title.slug)
54
+ title_name = os_manager.get_sanitize_file(select_title.name) + ".mp4"
55
+ mp4_path = os.path.join(MOVIE_FOLDER, select_title.name)
56
56
 
57
57
  # Download the film using the m3u8 playlist, and output filename
58
58
  r_proc = HLS_Downloader(
@@ -24,7 +24,7 @@ from StreamingCommunity.Api.Player.vixcloud import VideoSource
24
24
 
25
25
 
26
26
  # Variable
27
- from .costant import ROOT_PATH, SITE_NAME, SERIES_FOLDER
27
+ from .costant import SITE_NAME, SERIES_FOLDER
28
28
 
29
29
 
30
30
 
@@ -48,7 +48,7 @@ def download_video(index_season_selected: int, index_episode_selected: int, scra
48
48
 
49
49
  # Define filename and path for the downloaded video
50
50
  mp4_name = f"{map_episode_title(scrape_serie.series_name, index_season_selected, index_episode_selected, obj_episode.name)}.mp4"
51
- mp4_path = os.path.join(ROOT_PATH, SERIES_FOLDER, scrape_serie.series_name, f"S{index_season_selected}")
51
+ mp4_path = os.path.join(SERIES_FOLDER, scrape_serie.series_name, f"S{index_season_selected}")
52
52
 
53
53
  # Retrieve scws and if available master playlist
54
54
  video_source.get_iframe(obj_episode.id)
@@ -26,30 +26,36 @@ from StreamingCommunity.Api.Template.Class.SearchType import MediaManager
26
26
 
27
27
 
28
28
  # Config
29
- from .costant import SITE_NAME
29
+ from .costant import SITE_NAME, DOMAIN_NOW
30
30
 
31
31
 
32
32
  # Variable
33
33
  media_search_manager = MediaManager()
34
34
  table_show_manager = TVShowManager()
35
35
  max_timeout = config_manager.get_int("REQUESTS", "timeout")
36
+ disable_searchDomain = config_manager.get_bool("DEFAULT", "disable_searchDomain")
36
37
 
37
38
 
38
- def get_version(text: str):
39
+ def get_version(domain: str):
39
40
  """
40
41
  Extracts the version from the HTML text of a webpage.
41
42
 
42
43
  Parameters:
43
- - text (str): The HTML text of the webpage.
44
+ - domain (str): The domain of the site.
44
45
 
45
46
  Returns:
46
47
  str: The version extracted from the webpage.
47
- list: Top 10 titles headlines for today.
48
48
  """
49
49
  try:
50
+ response = httpx.get(
51
+ url=f"https://{SITE_NAME}.{domain}/",
52
+ headers={'User-Agent': get_headers()},
53
+ timeout=max_timeout
54
+ )
55
+ response.raise_for_status()
50
56
 
51
57
  # Parse request to site
52
- soup = BeautifulSoup(text, "html.parser")
58
+ soup = BeautifulSoup(response.text, "html.parser")
53
59
 
54
60
  # Extract version
55
61
  version = json.loads(soup.find("div", {"id": "app"}).get("data-page"))['version']
@@ -72,22 +78,13 @@ def get_version_and_domain():
72
78
  """
73
79
 
74
80
  # Find new domain if prev dont work
75
- domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
81
+ domain_to_use = DOMAIN_NOW
76
82
 
77
- # Extract version from the response
78
- try:
79
- version = get_version(
80
- httpx.get(
81
- url=base_url,
82
- headers={'User-Agent': get_headers()},
83
- timeout=max_timeout
84
- ).text
85
- )
86
-
87
- except:
88
- console.print("[green]Auto generate version ...")
89
- version = secrets.token_hex(32 // 2)
83
+ if not disable_searchDomain:
84
+ domain_to_use, base_url = search_domain(SITE_NAME, f"https://{SITE_NAME}")
90
85
 
86
+ version = get_version(domain_to_use)
87
+
91
88
  return version, domain_to_use
92
89
 
93
90
 
@@ -116,16 +113,20 @@ def title_search(title_search: str, domain: str) -> int:
116
113
  except Exception as e:
117
114
  console.print(f"Site: {SITE_NAME}, request search error: {e}")
118
115
 
119
- # Add found titles to media search manager
120
116
  for dict_title in response.json()['data']:
121
- media_search_manager.add_media({
122
- 'id': dict_title.get('id'),
123
- 'slug': dict_title.get('slug'),
124
- 'name': dict_title.get('name'),
125
- 'type': dict_title.get('type'),
126
- 'date': dict_title.get('last_air_date'),
127
- 'score': dict_title.get('score')
128
- })
117
+ try:
118
+
119
+ media_search_manager.add_media({
120
+ 'id': dict_title.get('id'),
121
+ 'slug': dict_title.get('slug'),
122
+ 'name': dict_title.get('name'),
123
+ 'type': dict_title.get('type'),
124
+ 'date': dict_title.get('last_air_date'),
125
+ 'score': dict_title.get('score')
126
+ })
127
+
128
+ except Exception as e:
129
+ print(f"Error parsing a film entry: {e}")
129
130
 
130
131
  # Return the number of titles found
131
132
  return media_search_manager.get_length()
@@ -1,10 +1,12 @@
1
1
  # 01.03.24
2
2
 
3
+ import json
3
4
  import logging
4
5
 
5
6
 
6
7
  # External libraries
7
8
  import httpx
9
+ from bs4 import BeautifulSoup
8
10
 
9
11
 
10
12
  # Internal utilities
@@ -56,33 +58,33 @@ class ScrapeSerie:
56
58
  Raises:
57
59
  Exception: If there's an error fetching season information
58
60
  """
59
- self.headers = {
60
- 'user-agent': get_headers(),
61
- 'x-inertia': 'true',
62
- 'x-inertia-version': self.version,
63
- }
64
-
65
61
  try:
66
-
67
62
  response = httpx.get(
68
- url=f"https://{self.base_name}.{self.domain}/titles/{self.media_id}-{self.series_name}",
69
- headers=self.headers,
63
+ url=f"https://{self.base_name}.{self.domain}/titles/{self.media_id}-{self.series_name}",
64
+ headers=self.headers,
70
65
  timeout=max_timeout
71
66
  )
72
67
  response.raise_for_status()
73
68
 
74
69
  # Extract seasons from JSON response
75
- json_response = response.json().get('props')
70
+ soup = BeautifulSoup(response.text, "html.parser")
71
+ json_response = json.loads(soup.find("div", {"id": "app"}).get("data-page"))
72
+
73
+ """
74
+ response = httpx.post(
75
+ url=f'https://{self.base_name}.{self.domain}/api/titles/preview/{self.media_id}',
76
+ headers={'User-Agent': get_headers()}
77
+ )
78
+ response.raise_for_status()
79
+
80
+
81
+ # Extract seasons from JSON response
82
+ json_response = response.json()
83
+ """
76
84
 
77
85
  # Collect info about season
78
- self.season_manager = Season(json_response.get('title'))
79
- self.season_manager.collect_images(self.base_name, self.domain)
86
+ self.season_manager = Season(json_response.get("props").get("title"))
80
87
 
81
- # Collect first episode info
82
- for i, ep in enumerate(json_response.get('loadedSeason').get('episodes')):
83
- self.season_manager.episodes.add(ep)
84
- self.season_manager.episodes.get(i).collect_image(self.base_name, self.domain)
85
-
86
88
  except Exception as e:
87
89
  logging.error(f"Error collecting season info: {e}")
88
90
  raise
@@ -97,16 +99,14 @@ class ScrapeSerie:
97
99
  Raises:
98
100
  Exception: If there's an error fetching episode information
99
101
  """
100
- self.headers = {
101
- 'user-agent': get_headers(),
102
- 'x-inertia': 'true',
103
- 'x-inertia-version': self.version,
104
- }
105
-
106
102
  try:
107
103
  response = httpx.get(
108
104
  url=f'https://{self.base_name}.{self.domain}/titles/{self.media_id}-{self.series_name}/stagione-{number_season}',
109
- headers=self.headers,
105
+ headers={
106
+ 'User-Agent': get_headers(),
107
+ 'x-inertia': 'true',
108
+ 'x-inertia-version': self.version,
109
+ },
110
110
  timeout=max_timeout
111
111
  )
112
112
  response.raise_for_status()
@@ -1,6 +1,5 @@
1
1
  # 18.06.24
2
2
 
3
- import sys
4
3
  from urllib.parse import urlparse
5
4
 
6
5
 
@@ -15,160 +14,124 @@ from StreamingCommunity.Util.console import console, msg
15
14
  from StreamingCommunity.Util._jsonConfig import config_manager
16
15
 
17
16
 
18
- def google_search(query):
19
- """
20
- Perform a Google search and return the first result.
21
-
22
- Args:
23
- query (str): The search query to execute on Google.
17
+ def get_base_domain(url_str):
18
+ """Extract base domain without protocol, www and path"""
19
+ parsed = urlparse(url_str)
20
+ domain = parsed.netloc.lower()
21
+ if domain.startswith('www.'):
22
+ domain = domain[4:]
23
+ return domain.split('.')[0]
24
24
 
25
- Returns:
26
- str: The first URL result from the search, or None if no result is found.
25
+ def validate_url(url, base_url, max_timeout):
27
26
  """
28
- # Perform the search on Google and limit to 1 result
29
- search_results = search(query, num_results=1)
30
-
31
- # Extract the first result
32
- first_result = next(search_results, None)
33
-
34
- if not first_result:
35
- console.print("[red]No results found.[/red]")
36
-
37
- return first_result
38
-
39
- def get_final_redirect_url(initial_url, max_timeout):
40
- """
41
- Follow redirects from the initial URL and return the final URL after all redirects.
42
-
43
- Args:
44
- initial_url (str): The URL to start with and follow redirects.
45
-
46
- Returns:
47
- str: The final URL after all redirects are followed.
27
+ Validate if URL is accessible and matches expected base domain
48
28
  """
29
+ console.print(f"\n[cyan]Starting validation for URL[white]: [yellow]{url}")
30
+
31
+ def check_response(response, check_num):
32
+ if response.status_code == 403:
33
+ console.print(f"[red]Check {check_num} failed: Access forbidden (403)")
34
+ return False
35
+ if response.status_code >= 400:
36
+ console.print(f"[red]Check {check_num} failed: HTTP {response.status_code}")
37
+ return False
38
+ console.print(f"[green]Check {check_num} passed: HTTP {response.status_code}")
39
+ return True
49
40
 
50
- # Create a client with redirects enabled
51
41
  try:
42
+
43
+ # Check 1: Initial request without following redirects
44
+ console.print("[cyan]Performing initial connection check...")
52
45
  with httpx.Client(
53
- headers={
54
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
55
- 'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
56
- 'User-Agent': get_headers()
57
- },
58
- follow_redirects=True,
46
+ headers={'User-Agent': get_headers()},
47
+ follow_redirects=False,
59
48
  timeout=max_timeout
60
-
61
49
  ) as client:
62
- response = client.get(initial_url)
50
+ response = client.get(url)
51
+ if not check_response(response, 1):
52
+ return False
63
53
 
64
- if response.status_code == 403:
65
- console.print("[bold red]The owner of this website has banned your IP[/bold red]")
66
- raise
67
-
68
- response.raise_for_status()
54
+ # Check 2: Follow redirects and verify final domain
55
+ console.print("[cyan]Checking redirect destination...")
56
+ with httpx.Client(
57
+ headers={'User-Agent': get_headers()},
58
+ follow_redirects=True,
59
+ timeout=max_timeout
60
+ ) as client:
61
+
62
+ response = client.get(url)
63
+ if not check_response(response, 2):
64
+ return False
65
+
66
+ # Compare base domains
67
+ original_base = get_base_domain(url)
68
+ final_base = get_base_domain(str(response.url))
69
+
70
+ console.print(f"[cyan]Comparing domains:")
71
+ console.print(f"Original base domain: [yellow]{original_base}")
72
+ console.print(f"Final base domain: [yellow]{final_base}")
73
+
74
+ if original_base != final_base:
75
+ console.print(f"[red]Domain mismatch: Redirected to different base domain")
76
+ return False
77
+
78
+ # Verify against expected base_url
79
+ expected_base = get_base_domain(base_url)
80
+ if final_base != expected_base:
81
+ console.print(f"[red]Domain mismatch: Final domain does not match expected base URL")
82
+ console.print(f"Expected: [yellow]{expected_base}")
83
+ return False
84
+
85
+ console.print(f"[green]All checks passed: URL is valid and matches expected domain")
86
+ return True
69
87
 
70
- # Capture the final URL after all redirects
71
- final_url = response.url
72
-
73
- return final_url
74
-
75
88
  except Exception as e:
76
- console.print(f"\n[cyan]Test url[white]: [red]{initial_url}, [cyan]error[white]: [red]{e}")
77
- return None
89
+ console.print(f"[red]Error during validation: {str(e)}")
90
+ return False
78
91
 
79
92
  def search_domain(site_name: str, base_url: str, get_first: bool = False):
80
93
  """
81
- Search for a valid domain for the given site name and base URL.
82
-
83
- Parameters:
84
- - site_name (str): The name of the site to search the domain for.
85
- - base_url (str): The base URL to construct complete URLs.
86
- - get_first (bool): If True, automatically update to the first valid match without user confirmation.
87
-
88
- Returns:
89
- tuple: The found domain and the complete URL.
94
+ Search for valid domain matching site name and base URL.
90
95
  """
91
-
92
- # Extract config domain
93
96
  max_timeout = config_manager.get_int("REQUESTS", "timeout")
94
97
  domain = str(config_manager.get_dict("SITE", site_name)['domain'])
98
+ test_url = f"{base_url}.{domain}"
95
99
 
100
+ console.print(f"\n[cyan]Testing initial URL[white]: [yellow]{test_url}")
101
+
96
102
  try:
97
- # Test the current domain
98
- with httpx.Client(
99
- headers={
100
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
101
- 'accept-language': 'it-IT,it;q=0.9,en-US;q=0.8,en;q=0.7',
102
- 'User-Agent': get_headers()
103
- },
104
- follow_redirects=True,
105
- timeout=max_timeout
106
- ) as client:
107
- response_follow = client.get(f"{base_url}.{domain}")
108
- response_follow.raise_for_status()
109
-
103
+ if validate_url(test_url, base_url, max_timeout):
104
+ parsed_url = urlparse(test_url)
105
+ tld = parsed_url.netloc.split('.')[-1]
106
+ config_manager.config['SITE'][site_name]['domain'] = tld
107
+ config_manager.write_config()
108
+ console.print(f"[green]Successfully validated initial URL")
109
+ return tld, test_url
110
+
110
111
  except Exception as e:
111
- query = base_url.split("/")[-1]
112
-
113
- # Perform a Google search with multiple results
114
- search_results = list(search(query, num_results=10, lang="it"))
115
- #console.print(f"\nGoogle search results: {search_results}")
116
-
117
- def normalize_for_comparison(url):
118
- """Normalize URL by removing protocol, www, and trailing slashes"""
119
- url = url.lower()
120
- url = url.replace("https://", "").replace("http://", "")
121
- url = url.replace("www.", "")
122
- return url.rstrip("/")
123
-
124
- # Normalize the base_url we're looking for
125
- target_url = normalize_for_comparison(base_url)
126
-
127
- # Iterate through search results
128
- for first_url in search_results:
129
- console.print(f"[green]Checking url[white]: [red]{first_url}")
112
+ console.print(f"[red]Error testing initial URL: {str(e)}")
130
113
 
131
- # Get just the domain part of the search result
132
- parsed_result = urlparse(first_url)
133
- result_domain = normalize_for_comparison(parsed_result.netloc)
134
-
135
- # Compare with our target URL (without the protocol part)
136
- if result_domain.startswith(target_url.split("/")[-1]):
137
- try:
138
- final_url = get_final_redirect_url(first_url, max_timeout)
139
-
140
- if final_url is not None:
141
- def extract_domain(url):
142
- parsed_url = urlparse(url)
143
- domain = parsed_url.netloc
144
- return domain.split(".")[-1]
145
-
146
- new_domain_extract = extract_domain(str(final_url))
147
-
148
- if get_first or msg.ask(f"\n[cyan]Do you want to auto update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain_extract}'.", choices=["y", "n"], default="y").lower() == "y":
149
- # Update domain in config.json
150
- config_manager.config['SITE'][site_name]['domain'] = new_domain_extract
151
- config_manager.write_config()
152
-
153
- return new_domain_extract, f"{base_url}.{new_domain_extract}"
154
-
155
- except Exception as redirect_error:
156
- console.print(f"[red]Error following redirect for {first_url}: {redirect_error}")
157
- continue
158
-
159
- # If no matching URL is found return base domain
160
- console.print("[bold red]No valid URL found matching the base URL.[/bold red]")
161
- return domain, f"{base_url}.{domain}"
162
-
163
- # Handle successful initial domain check
164
- parsed_url = urlparse(str(response_follow.url))
165
- parse_domain = parsed_url.netloc
166
- tld = parse_domain.split('.')[-1]
167
-
168
- if tld is not None:
169
- # Update domain in config.json
170
- config_manager.config['SITE'][site_name]['domain'] = tld
171
- config_manager.write_config()
172
-
173
- # Return config domain
174
- return tld, f"{base_url}.{tld}"
114
+ # Google search phase
115
+ query = base_url.split("/")[-1]
116
+ console.print(f"\n[cyan]Performing Google search for[white]: [yellow]{query}")
117
+ search_results = list(search(query, num_results=15, lang="it"))
118
+
119
+ for idx, result_url in enumerate(search_results, 1):
120
+ console.print(f"\n[cyan]Checking Google result {idx}/15[white]: [yellow]{result_url}")
121
+
122
+ if validate_url(result_url, base_url, max_timeout):
123
+ parsed_result = urlparse(result_url)
124
+ new_domain = parsed_result.netloc.split(".")[-1]
125
+
126
+ if get_first or msg.ask(
127
+ f"\n[cyan]Do you want to update site[white] [red]'{site_name}'[cyan] with domain[white] [red]'{new_domain}'",
128
+ choices=["y", "n"],
129
+ default="y"
130
+ ).lower() == "y":
131
+
132
+ config_manager.config['SITE'][site_name]['domain'] = new_domain
133
+ config_manager.write_config()
134
+ return new_domain, f"{base_url}.{new_domain}"
135
+
136
+ console.print("[bold red]No valid URLs found matching the base URL.")
137
+ return domain, f"{base_url}.{domain}"
@@ -104,14 +104,15 @@ class HttpClient:
104
104
  response = httpx.get(
105
105
  url=url,
106
106
  headers=self.headers,
107
- timeout=max_timeout
107
+ timeout=max_timeout,
108
+ follow_redirects=True
108
109
  )
109
110
 
110
111
  response.raise_for_status()
111
112
  return response.text
112
113
 
113
114
  except Exception as e:
114
- logging.info(f"Request to {url} failed with error: {e}")
115
+ console.print(f"Request to {url} failed with error: {e}")
115
116
  return 404
116
117
 
117
118
  def get_content(self, url):