StreamingCommunity 3.0.0__py3-none-any.whl → 3.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of StreamingCommunity might be problematic. Click here for more details.
- StreamingCommunity/Api/Player/hdplayer.py +65 -0
- StreamingCommunity/Api/Player/mixdrop.py +145 -0
- StreamingCommunity/Api/Site/1337xx/site.py +5 -2
- StreamingCommunity/Api/Site/altadefinizione/site.py +1 -1
- StreamingCommunity/Api/Site/animeunity/site.py +2 -1
- StreamingCommunity/Api/Site/animeunity/util/ScrapeSerie.py +22 -12
- StreamingCommunity/Api/Site/animeworld/site.py +1 -1
- StreamingCommunity/Api/Site/ddlstreamitaly/site.py +1 -1
- StreamingCommunity/Api/Site/guardaserie/site.py +1 -1
- StreamingCommunity/Api/Site/raiplay/site.py +2 -2
- StreamingCommunity/Api/Site/streamingcommunity/series.py +2 -2
- StreamingCommunity/Api/Site/streamingcommunity/site.py +1 -1
- StreamingCommunity/Api/Site/streamingwatch/__init__.py +95 -0
- StreamingCommunity/Api/Site/{cb01new → streamingwatch}/film.py +12 -13
- StreamingCommunity/Api/Site/streamingwatch/series.py +160 -0
- StreamingCommunity/Api/Site/streamingwatch/site.py +111 -0
- StreamingCommunity/Api/Site/streamingwatch/util/ScrapeSerie.py +118 -0
- StreamingCommunity/Lib/Proxies/proxy.py +232 -0
- StreamingCommunity/Upload/version.py +1 -1
- StreamingCommunity/Util/config_json.py +11 -13
- {streamingcommunity-3.0.0.dist-info → streamingcommunity-3.0.2.dist-info}/METADATA +16 -2
- {streamingcommunity-3.0.0.dist-info → streamingcommunity-3.0.2.dist-info}/RECORD +26 -22
- {streamingcommunity-3.0.0.dist-info → streamingcommunity-3.0.2.dist-info}/WHEEL +1 -1
- StreamingCommunity/Api/Player/maxstream.py +0 -140
- StreamingCommunity/Api/Site/cb01new/__init__.py +0 -71
- StreamingCommunity/Api/Site/cb01new/site.py +0 -81
- {streamingcommunity-3.0.0.dist-info → streamingcommunity-3.0.2.dist-info}/entry_points.txt +0 -0
- {streamingcommunity-3.0.0.dist-info → streamingcommunity-3.0.2.dist-info}/licenses/LICENSE +0 -0
- {streamingcommunity-3.0.0.dist-info → streamingcommunity-3.0.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# 29.04.25
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Tuple
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# External library
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.prompt import Prompt
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Internal utilities
|
|
13
|
+
from StreamingCommunity.Util.message import start_message
|
|
14
|
+
from StreamingCommunity.Lib.Downloader import HLS_Downloader
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Logic class
|
|
18
|
+
from .util.ScrapeSerie import GetSerieInfo
|
|
19
|
+
from StreamingCommunity.Api.Template.Util import (
|
|
20
|
+
manage_selection,
|
|
21
|
+
map_episode_title,
|
|
22
|
+
validate_selection,
|
|
23
|
+
validate_episode_selection,
|
|
24
|
+
display_episodes_list
|
|
25
|
+
)
|
|
26
|
+
from StreamingCommunity.Api.Template.config_loader import site_constant
|
|
27
|
+
from StreamingCommunity.Api.Template.Class.SearchType import MediaItem
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Player
|
|
31
|
+
from StreamingCommunity.Api.Player.hdplayer import VideoSource
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# Variable
|
|
35
|
+
msg = Prompt()
|
|
36
|
+
console = Console()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def download_video(index_season_selected: int, index_episode_selected: int, scrape_serie: GetSerieInfo, proxy=None) -> Tuple[str,bool]:
|
|
40
|
+
"""
|
|
41
|
+
Downloads a specific episode from a specified season.
|
|
42
|
+
|
|
43
|
+
Parameters:
|
|
44
|
+
- index_season_selected (int): Season number
|
|
45
|
+
- index_episode_selected (int): Episode index
|
|
46
|
+
- scrape_serie (GetSerieInfo): Scraper object with series information
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
- str: Path to downloaded file
|
|
50
|
+
- bool: Whether download was stopped
|
|
51
|
+
"""
|
|
52
|
+
start_message()
|
|
53
|
+
|
|
54
|
+
# Get episode information
|
|
55
|
+
obj_episode = scrape_serie.selectEpisode(index_season_selected, index_episode_selected-1)
|
|
56
|
+
console.print(f"[bold yellow]Download:[/bold yellow] [red]{site_constant.SITE_NAME}[/red] → [bold magenta]{obj_episode.name}[/bold magenta] ([cyan]S{index_season_selected}E{index_episode_selected}[/cyan]) \n")
|
|
57
|
+
|
|
58
|
+
# Define filename and path for the downloaded video
|
|
59
|
+
mp4_name = f"{map_episode_title(scrape_serie.series_name, index_season_selected, index_episode_selected, obj_episode.name)}.mp4"
|
|
60
|
+
mp4_path = os.path.join(site_constant.SERIES_FOLDER, scrape_serie.series_name, f"S{index_season_selected}")
|
|
61
|
+
|
|
62
|
+
# Retrieve scws and if available master playlist
|
|
63
|
+
video_source = VideoSource(proxy)
|
|
64
|
+
master_playlist = video_source.get_m3u8_url(obj_episode.url)
|
|
65
|
+
|
|
66
|
+
# Download the episode
|
|
67
|
+
r_proc = HLS_Downloader(
|
|
68
|
+
m3u8_url=master_playlist,
|
|
69
|
+
output_path=os.path.join(mp4_path, mp4_name)
|
|
70
|
+
).start()
|
|
71
|
+
|
|
72
|
+
if r_proc['error'] is not None:
|
|
73
|
+
try: os.remove(r_proc['path'])
|
|
74
|
+
except: pass
|
|
75
|
+
|
|
76
|
+
return r_proc['path'], r_proc['stopped']
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def download_episode(index_season_selected: int, scrape_serie: GetSerieInfo, download_all: bool = False, episode_selection: str = None, proxy = None) -> None:
|
|
80
|
+
"""
|
|
81
|
+
Handle downloading episodes for a specific season.
|
|
82
|
+
|
|
83
|
+
Parameters:
|
|
84
|
+
- index_season_selected (int): Season number
|
|
85
|
+
- scrape_serie (GetSerieInfo): Scraper object with series information
|
|
86
|
+
- download_all (bool): Whether to download all episodes
|
|
87
|
+
- episode_selection (str, optional): Pre-defined episode selection that bypasses manual input
|
|
88
|
+
"""
|
|
89
|
+
# Get episodes for the selected season
|
|
90
|
+
episodes = scrape_serie.getEpisodeSeasons(index_season_selected)
|
|
91
|
+
episodes_count = len(episodes)
|
|
92
|
+
|
|
93
|
+
if download_all:
|
|
94
|
+
for i_episode in range(1, episodes_count + 1):
|
|
95
|
+
path, stopped = download_video(index_season_selected, i_episode, scrape_serie, proxy)
|
|
96
|
+
|
|
97
|
+
if stopped:
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
console.print(f"\n[red]End downloaded [yellow]season: [red]{index_season_selected}.")
|
|
101
|
+
|
|
102
|
+
else:
|
|
103
|
+
if episode_selection is not None:
|
|
104
|
+
last_command = episode_selection
|
|
105
|
+
console.print(f"\n[cyan]Using provided episode selection: [yellow]{episode_selection}")
|
|
106
|
+
|
|
107
|
+
else:
|
|
108
|
+
last_command = display_episodes_list(episodes)
|
|
109
|
+
|
|
110
|
+
# Prompt user for episode selection
|
|
111
|
+
list_episode_select = manage_selection(last_command, episodes_count)
|
|
112
|
+
list_episode_select = validate_episode_selection(list_episode_select, episodes_count)
|
|
113
|
+
|
|
114
|
+
# Download selected episodes if not stopped
|
|
115
|
+
for i_episode in list_episode_select:
|
|
116
|
+
path, stopped = download_video(index_season_selected, i_episode, scrape_serie, proxy)
|
|
117
|
+
|
|
118
|
+
if stopped:
|
|
119
|
+
break
|
|
120
|
+
|
|
121
|
+
def download_series(select_season: MediaItem, season_selection: str = None, episode_selection: str = None, proxy = None) -> None:
|
|
122
|
+
"""
|
|
123
|
+
Handle downloading a complete series.
|
|
124
|
+
|
|
125
|
+
Parameters:
|
|
126
|
+
- select_season (MediaItem): Series metadata from search
|
|
127
|
+
- season_selection (str, optional): Pre-defined season selection that bypasses manual input
|
|
128
|
+
- episode_selection (str, optional): Pre-defined episode selection that bypasses manual input
|
|
129
|
+
"""
|
|
130
|
+
scrape_serie = GetSerieInfo(select_season.url, proxy)
|
|
131
|
+
|
|
132
|
+
# Get total number of seasons
|
|
133
|
+
seasons_count = scrape_serie.getNumberSeason()
|
|
134
|
+
|
|
135
|
+
# Prompt user for season selection and download episodes
|
|
136
|
+
console.print(f"\n[green]Seasons found: [red]{seasons_count}")
|
|
137
|
+
|
|
138
|
+
# If season_selection is provided, use it instead of asking for input
|
|
139
|
+
if season_selection is None:
|
|
140
|
+
index_season_selected = msg.ask(
|
|
141
|
+
"\n[cyan]Insert season number [yellow](e.g., 1), [red]* [cyan]to download all seasons, "
|
|
142
|
+
"[yellow](e.g., 1-2) [cyan]for a range of seasons, or [yellow](e.g., 3-*) [cyan]to download from a specific season to the end"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
else:
|
|
146
|
+
index_season_selected = season_selection
|
|
147
|
+
console.print(f"\n[cyan]Using provided season selection: [yellow]{season_selection}")
|
|
148
|
+
|
|
149
|
+
# Validate the selection
|
|
150
|
+
list_season_select = manage_selection(index_season_selected, seasons_count)
|
|
151
|
+
list_season_select = validate_selection(list_season_select, seasons_count)
|
|
152
|
+
|
|
153
|
+
# Loop through the selected seasons and download episodes
|
|
154
|
+
for i_season in list_season_select:
|
|
155
|
+
if len(list_season_select) > 1 or index_season_selected == "*":
|
|
156
|
+
# Download all episodes if multiple seasons are selected or if '*' is used
|
|
157
|
+
download_episode(i_season, scrape_serie, download_all=True, proxy=proxy)
|
|
158
|
+
else:
|
|
159
|
+
# Otherwise, let the user select specific episodes for the single season
|
|
160
|
+
download_episode(i_season, scrape_serie, download_all=False, episode_selection=episode_selection, proxy=proxy)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# 29.04.25
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
# External libraries
|
|
7
|
+
import httpx
|
|
8
|
+
from bs4 import BeautifulSoup
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# Internal utilities
|
|
13
|
+
from StreamingCommunity.Util.config_json import config_manager
|
|
14
|
+
from StreamingCommunity.Util.headers import get_userAgent
|
|
15
|
+
from StreamingCommunity.Util.table import TVShowManager
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Logic class
|
|
19
|
+
from StreamingCommunity.Api.Template.config_loader import site_constant
|
|
20
|
+
from StreamingCommunity.Api.Template.Class.SearchType import MediaManager
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Variable
|
|
24
|
+
console = Console()
|
|
25
|
+
media_search_manager = MediaManager()
|
|
26
|
+
table_show_manager = TVShowManager()
|
|
27
|
+
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def extract_nonce(response_) -> str:
|
|
31
|
+
"""Extract nonce value from the page script"""
|
|
32
|
+
soup = BeautifulSoup(response_.content, 'html.parser')
|
|
33
|
+
script = soup.find('script', id='live-search-js-extra')
|
|
34
|
+
if script:
|
|
35
|
+
match = re.search(r'"admin_ajax_nonce":"([^"]+)"', script.text)
|
|
36
|
+
if match:
|
|
37
|
+
return match.group(1)
|
|
38
|
+
return ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def title_search(query: str, additionalData: list) -> int:
|
|
42
|
+
"""
|
|
43
|
+
Search for titles based on a search query.
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
- query (str): The query to search for.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
int: The number of titles found.
|
|
50
|
+
"""
|
|
51
|
+
media_search_manager.clear()
|
|
52
|
+
table_show_manager.clear()
|
|
53
|
+
|
|
54
|
+
proxy, response_serie = additionalData
|
|
55
|
+
search_url = f"{site_constant.FULL_URL}/wp-admin/admin-ajax.php"
|
|
56
|
+
console.print(f"[cyan]Search url: [yellow]{search_url}")
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
_wpnonce = extract_nonce(response_serie)
|
|
60
|
+
|
|
61
|
+
if not _wpnonce:
|
|
62
|
+
console.print("[red]Error: Failed to extract nonce")
|
|
63
|
+
return 0
|
|
64
|
+
|
|
65
|
+
data = {
|
|
66
|
+
'action': 'data_fetch',
|
|
67
|
+
'keyword': query,
|
|
68
|
+
'_wpnonce': _wpnonce
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
response = httpx.post(
|
|
72
|
+
search_url,
|
|
73
|
+
headers={
|
|
74
|
+
'origin': site_constant.FULL_URL,
|
|
75
|
+
'user-agent': get_userAgent()
|
|
76
|
+
},
|
|
77
|
+
data=data,
|
|
78
|
+
timeout=max_timeout,
|
|
79
|
+
proxy=proxy
|
|
80
|
+
)
|
|
81
|
+
response.raise_for_status()
|
|
82
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
console.print(f"[red]Site: {site_constant.SITE_NAME}, request search error: {e}")
|
|
86
|
+
return 0
|
|
87
|
+
|
|
88
|
+
for item in soup.find_all('div', class_='searchelement'):
|
|
89
|
+
try:
|
|
90
|
+
|
|
91
|
+
title = item.find_all("a")[-1].get_text(strip=True) if item.find_all("a") else 'N/A'
|
|
92
|
+
url = item.find('a').get('href', '')
|
|
93
|
+
year = item.find('div', id='search-cat-year')
|
|
94
|
+
year = year.get_text(strip=True) if year else 'N/A'
|
|
95
|
+
|
|
96
|
+
if any(keyword in year.lower() for keyword in ['stagione', 'episodio', 'ep.', 'season', 'episode']):
|
|
97
|
+
continue
|
|
98
|
+
|
|
99
|
+
media_search_manager.add_media({
|
|
100
|
+
'name': title,
|
|
101
|
+
'type': 'tv' if '/serie/' in url else 'Film',
|
|
102
|
+
'date': year,
|
|
103
|
+
'image': item.find('img').get('src', ''),
|
|
104
|
+
'url': url
|
|
105
|
+
})
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
print(f"Error parsing a film entry: {e}")
|
|
109
|
+
|
|
110
|
+
# Return the number of titles found
|
|
111
|
+
return media_search_manager.get_length()
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# 29.04.25
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# External libraries
|
|
8
|
+
import httpx
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
|
|
11
|
+
# Internal utilities
|
|
12
|
+
from StreamingCommunity.Util.headers import get_userAgent
|
|
13
|
+
from StreamingCommunity.Util.config_json import config_manager
|
|
14
|
+
from StreamingCommunity.Api.Player.Helper.Vixcloud.util import SeasonManager, Episode
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# Variable
|
|
18
|
+
max_timeout = config_manager.get_int("REQUESTS", "timeout")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GetSerieInfo:
|
|
22
|
+
def __init__(self, url, proxy: str = None):
|
|
23
|
+
self.headers = {'user-agent': get_userAgent()}
|
|
24
|
+
self.url = url
|
|
25
|
+
self.seasons_manager = SeasonManager()
|
|
26
|
+
self.series_name = None
|
|
27
|
+
|
|
28
|
+
self.client = httpx.Client(headers=self.headers, proxy=proxy, timeout=max_timeout)
|
|
29
|
+
|
|
30
|
+
def collect_info_season(self) -> None:
|
|
31
|
+
"""
|
|
32
|
+
Retrieve all series information including episodes and seasons.
|
|
33
|
+
"""
|
|
34
|
+
try:
|
|
35
|
+
response = self.client.get(self.url)
|
|
36
|
+
response.raise_for_status()
|
|
37
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
|
38
|
+
|
|
39
|
+
if not self.series_name:
|
|
40
|
+
title_tag = soup.find('h1', class_='title-border')
|
|
41
|
+
self.series_name = title_tag.get_text(strip=True) if title_tag else 'N/A'
|
|
42
|
+
|
|
43
|
+
# Extract episodes and organize by season
|
|
44
|
+
episodes = {}
|
|
45
|
+
for ep in soup.find_all('div', class_='bolumust'):
|
|
46
|
+
a_tag = ep.find('a')
|
|
47
|
+
if not a_tag:
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
ep_url = a_tag.get('href', '')
|
|
51
|
+
episode_title = a_tag.get_text(strip=True)
|
|
52
|
+
|
|
53
|
+
# Clean up episode title by removing season info and date
|
|
54
|
+
clean_title = re.sub(r'Stagione \d+ Episodio \d+\s*\(?([^)]+)\)?\s*\d+\s*\w+\s*\d+', r'\1', episode_title)
|
|
55
|
+
|
|
56
|
+
season_match = re.search(r'stagione-(\d+)', ep_url)
|
|
57
|
+
if season_match:
|
|
58
|
+
season_num = int(season_match.group(1))
|
|
59
|
+
if season_num not in episodes:
|
|
60
|
+
episodes[season_num] = []
|
|
61
|
+
|
|
62
|
+
episodes[season_num].append({
|
|
63
|
+
'id': len(episodes[season_num]) + 1,
|
|
64
|
+
'number': len(episodes[season_num]) + 1,
|
|
65
|
+
'name': clean_title.strip(),
|
|
66
|
+
'url': ep_url
|
|
67
|
+
})
|
|
68
|
+
|
|
69
|
+
# Add seasons to SeasonManager
|
|
70
|
+
for season_num, eps in episodes.items():
|
|
71
|
+
season = self.seasons_manager.add_season({
|
|
72
|
+
'id': season_num,
|
|
73
|
+
'number': season_num,
|
|
74
|
+
'name': f'Stagione {season_num}'
|
|
75
|
+
})
|
|
76
|
+
|
|
77
|
+
# Add episodes to season's EpisodeManager
|
|
78
|
+
for ep in eps:
|
|
79
|
+
season.episodes.add(ep)
|
|
80
|
+
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logging.error(f"Error collecting series info: {str(e)}")
|
|
83
|
+
raise
|
|
84
|
+
|
|
85
|
+
# ------------- FOR GUI -------------
|
|
86
|
+
def getNumberSeason(self) -> int:
|
|
87
|
+
"""
|
|
88
|
+
Get the total number of seasons available for the series.
|
|
89
|
+
"""
|
|
90
|
+
if not self.seasons_manager.seasons:
|
|
91
|
+
self.collect_info_season()
|
|
92
|
+
|
|
93
|
+
return len(self.seasons_manager.seasons)
|
|
94
|
+
|
|
95
|
+
def getEpisodeSeasons(self, season_number: int) -> list:
|
|
96
|
+
"""
|
|
97
|
+
Get all episodes for a specific season.
|
|
98
|
+
"""
|
|
99
|
+
if not self.seasons_manager.seasons:
|
|
100
|
+
self.collect_info_season()
|
|
101
|
+
|
|
102
|
+
season = self.seasons_manager.get_season_by_number(season_number)
|
|
103
|
+
if not season:
|
|
104
|
+
logging.error(f"Season {season_number} not found")
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
return season.episodes.episodes
|
|
108
|
+
|
|
109
|
+
def selectEpisode(self, season_number: int, episode_index: int) -> Episode:
|
|
110
|
+
"""
|
|
111
|
+
Get information for a specific episode in a specific season.
|
|
112
|
+
"""
|
|
113
|
+
episodes = self.getEpisodeSeasons(season_number)
|
|
114
|
+
if not episodes or episode_index < 0 or episode_index >= len(episodes):
|
|
115
|
+
logging.error(f"Episode index {episode_index} is out of range for season {season_number}")
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
return episodes[episode_index]
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
# 29.04.25
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import time
|
|
6
|
+
import json
|
|
7
|
+
import signal
|
|
8
|
+
import warnings
|
|
9
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# External library
|
|
15
|
+
import httpx
|
|
16
|
+
from rich import print
|
|
17
|
+
from rich.progress import Progress, SpinnerColumn, BarColumn, TextColumn, TimeRemainingColumn
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# Internal utilities
|
|
21
|
+
from StreamingCommunity.Util.config_json import config_manager
|
|
22
|
+
from StreamingCommunity.Util.headers import get_headers
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# Variable
|
|
26
|
+
MAX_TIMEOUT = config_manager.get_int("REQUESTS", "timeout")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ProxyFinder:
|
|
30
|
+
def __init__(self, url, timeout_threshold: float = 7.0, max_proxies: int = 150, max_workers: int = 12):
|
|
31
|
+
self.url = url
|
|
32
|
+
self.timeout_threshold = timeout_threshold
|
|
33
|
+
self.max_proxies = max_proxies
|
|
34
|
+
self.max_workers = max_workers
|
|
35
|
+
self.found_proxy = None
|
|
36
|
+
self.shutdown_flag = False
|
|
37
|
+
self.json_file = os.path.join(os.path.dirname(__file__), 'working_proxies.json')
|
|
38
|
+
signal.signal(signal.SIGINT, self._handle_interrupt)
|
|
39
|
+
|
|
40
|
+
def load_saved_proxies(self) -> tuple:
|
|
41
|
+
"""Load saved proxies if they're not expired (2 hours old)"""
|
|
42
|
+
try:
|
|
43
|
+
if not os.path.exists(self.json_file):
|
|
44
|
+
return None, None
|
|
45
|
+
|
|
46
|
+
with open(self.json_file, 'r') as f:
|
|
47
|
+
data = json.load(f)
|
|
48
|
+
|
|
49
|
+
if not data.get('proxies') or not data.get('last_update'):
|
|
50
|
+
return None, None
|
|
51
|
+
|
|
52
|
+
last_update = datetime.fromisoformat(data['last_update'])
|
|
53
|
+
if datetime.now() - last_update > timedelta(hours=2):
|
|
54
|
+
return None, None
|
|
55
|
+
|
|
56
|
+
return data['proxies'], last_update
|
|
57
|
+
except Exception:
|
|
58
|
+
return None, None
|
|
59
|
+
|
|
60
|
+
def save_working_proxy(self, proxy: str, response_time: float):
|
|
61
|
+
"""Save working proxy to JSON file"""
|
|
62
|
+
data = {
|
|
63
|
+
'proxies': [{'proxy': proxy, 'response_time': response_time}],
|
|
64
|
+
'last_update': datetime.now().isoformat()
|
|
65
|
+
}
|
|
66
|
+
try:
|
|
67
|
+
with open(self.json_file, 'w') as f:
|
|
68
|
+
json.dump(data, f, indent=4)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"[bold red]Error saving proxy:[/bold red] {str(e)}")
|
|
71
|
+
|
|
72
|
+
def fetch_geonode(self) -> list:
|
|
73
|
+
proxies = []
|
|
74
|
+
try:
|
|
75
|
+
response = httpx.get(
|
|
76
|
+
"https://proxylist.geonode.com/api/proxy-list?protocols=http%2Chttps&limit=100&page=1&sort_by=speed&sort_type=asc",
|
|
77
|
+
headers=get_headers(),
|
|
78
|
+
timeout=MAX_TIMEOUT
|
|
79
|
+
)
|
|
80
|
+
data = response.json()
|
|
81
|
+
proxies = [(f"http://{p['ip']}:{p['port']}", "Geonode") for p in data.get('data', [])]
|
|
82
|
+
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(f"[bold red]Error in Geonode:[/bold red] {str(e)[:100]}")
|
|
85
|
+
|
|
86
|
+
return proxies
|
|
87
|
+
|
|
88
|
+
def fetch_proxyscrape(self) -> list:
|
|
89
|
+
proxies = []
|
|
90
|
+
try:
|
|
91
|
+
response = httpx.get(
|
|
92
|
+
"https://api.proxyscrape.com/v4/free-proxy-list/get?request=get_proxies&protocol=http&skip=0&proxy_format=protocolipport&format=json&limit=100&timeout=1000",
|
|
93
|
+
headers=get_headers(),
|
|
94
|
+
timeout=MAX_TIMEOUT
|
|
95
|
+
)
|
|
96
|
+
data = response.json()
|
|
97
|
+
if 'proxies' in data and isinstance(data['proxies'], list):
|
|
98
|
+
proxies = [(proxy_data['proxy'], "ProxyScrape") for proxy_data in data['proxies'] if 'proxy' in proxy_data]
|
|
99
|
+
|
|
100
|
+
except Exception as e:
|
|
101
|
+
print(f"[bold red]Error in ProxyScrape:[/bold red] {str(e)[:100]}")
|
|
102
|
+
|
|
103
|
+
return proxies
|
|
104
|
+
|
|
105
|
+
def fetch_proxies_from_sources(self) -> list:
|
|
106
|
+
print("[cyan]Fetching proxies from sources...[/cyan]")
|
|
107
|
+
with ThreadPoolExecutor(max_workers=3) as executor:
|
|
108
|
+
proxyscrape_future = executor.submit(self.fetch_proxyscrape)
|
|
109
|
+
geonode_future = executor.submit(self.fetch_geonode)
|
|
110
|
+
|
|
111
|
+
sources_proxies = {}
|
|
112
|
+
|
|
113
|
+
try:
|
|
114
|
+
proxyscrape_result = proxyscrape_future.result()
|
|
115
|
+
sources_proxies["proxyscrape"] = proxyscrape_result[:int(self.max_proxies/2)]
|
|
116
|
+
except Exception as e:
|
|
117
|
+
print(f"[bold red]Error fetching from proxyscrape:[/bold red] {str(e)[:100]}")
|
|
118
|
+
sources_proxies["proxyscrape"] = []
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
geonode_result = geonode_future.result()
|
|
122
|
+
sources_proxies["geonode"] = geonode_result[:int(self.max_proxies/2)]
|
|
123
|
+
except Exception as e:
|
|
124
|
+
print(f"[bold red]Error fetching from geonode:[/bold red] {str(e)[:100]}")
|
|
125
|
+
sources_proxies["geonode"] = []
|
|
126
|
+
|
|
127
|
+
merged_proxies = []
|
|
128
|
+
|
|
129
|
+
if "proxyscrape" in sources_proxies:
|
|
130
|
+
merged_proxies.extend(sources_proxies["proxyscrape"])
|
|
131
|
+
|
|
132
|
+
if "geonode" in sources_proxies:
|
|
133
|
+
merged_proxies.extend(sources_proxies["geonode"])
|
|
134
|
+
|
|
135
|
+
proxy_list = merged_proxies[:self.max_proxies]
|
|
136
|
+
return proxy_list
|
|
137
|
+
|
|
138
|
+
def _test_single_request(self, proxy_info: tuple) -> tuple:
|
|
139
|
+
proxy, source = proxy_info
|
|
140
|
+
try:
|
|
141
|
+
start = time.time()
|
|
142
|
+
with httpx.Client(proxy=proxy, timeout=self.timeout_threshold) as client:
|
|
143
|
+
response = client.get(self.url, headers=get_headers())
|
|
144
|
+
if response.status_code == 200:
|
|
145
|
+
return (True, time.time() - start, response, source)
|
|
146
|
+
except Exception:
|
|
147
|
+
pass
|
|
148
|
+
return (False, self.timeout_threshold + 1, None, source)
|
|
149
|
+
|
|
150
|
+
def test_proxy(self, proxy_info: tuple) -> tuple:
|
|
151
|
+
proxy, source = proxy_info
|
|
152
|
+
if self.shutdown_flag:
|
|
153
|
+
return (proxy, False, 0, None, source)
|
|
154
|
+
|
|
155
|
+
success1, time1, text1, source = self._test_single_request(proxy_info)
|
|
156
|
+
if not success1 or time1 > self.timeout_threshold:
|
|
157
|
+
return (proxy, False, time1, None, source)
|
|
158
|
+
|
|
159
|
+
success2, time2, _, source = self._test_single_request(proxy_info)
|
|
160
|
+
avg_time = (time1 + time2) / 2
|
|
161
|
+
return (proxy, success2 and time2 <= self.timeout_threshold, avg_time, text1, source)
|
|
162
|
+
|
|
163
|
+
def _handle_interrupt(self, sig, frame):
|
|
164
|
+
print("\n[bold yellow]Received keyboard interrupt. Terminating...[/bold yellow]")
|
|
165
|
+
self.shutdown_flag = True
|
|
166
|
+
sys.exit(0)
|
|
167
|
+
|
|
168
|
+
def find_fast_proxy(self) -> tuple:
|
|
169
|
+
saved_proxies, last_update = self.load_saved_proxies()
|
|
170
|
+
if saved_proxies:
|
|
171
|
+
print("[cyan]Testing saved proxy...[/cyan]")
|
|
172
|
+
for proxy_data in saved_proxies:
|
|
173
|
+
result = self.test_proxy((proxy_data['proxy'], 'cached'))
|
|
174
|
+
if result[1]:
|
|
175
|
+
return proxy_data['proxy'], result[3], result[2]
|
|
176
|
+
else:
|
|
177
|
+
print(f"[red]Saved proxy {proxy_data['proxy']} failed - response time: {result[2]:.2f}s[/red]")
|
|
178
|
+
|
|
179
|
+
proxies = self.fetch_proxies_from_sources()
|
|
180
|
+
if not proxies:
|
|
181
|
+
print("[bold red]No proxies fetched to test.[/bold red]")
|
|
182
|
+
return (None, None, None)
|
|
183
|
+
|
|
184
|
+
found_proxy = None
|
|
185
|
+
response_text = None
|
|
186
|
+
source = None
|
|
187
|
+
failed_count = 0
|
|
188
|
+
success_count = 0
|
|
189
|
+
|
|
190
|
+
#print(f"[cyan]Testing {len(proxies)} proxies...[/cyan]")
|
|
191
|
+
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
|
192
|
+
futures = {executor.submit(self.test_proxy, p): p for p in proxies}
|
|
193
|
+
with Progress(
|
|
194
|
+
SpinnerColumn(),
|
|
195
|
+
TextColumn("[progress.description]{task.description}"),
|
|
196
|
+
BarColumn(),
|
|
197
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
198
|
+
TextColumn("[cyan]{task.fields[success]}[/cyan]/[red]{task.fields[failed]}[/red]"),
|
|
199
|
+
TimeRemainingColumn(),
|
|
200
|
+
) as progress:
|
|
201
|
+
task = progress.add_task(
|
|
202
|
+
"[cyan]Testing Proxies",
|
|
203
|
+
total=len(futures),
|
|
204
|
+
success=success_count,
|
|
205
|
+
failed=failed_count
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
for future in as_completed(futures):
|
|
209
|
+
if self.shutdown_flag:
|
|
210
|
+
break
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
proxy, success, elapsed, response, proxy_source = future.result()
|
|
214
|
+
if success:
|
|
215
|
+
success_count += 1
|
|
216
|
+
print(f"[bold green]Found valid proxy:[/bold green] {proxy} ({elapsed:.2f}s)")
|
|
217
|
+
found_proxy = proxy
|
|
218
|
+
response_text = response
|
|
219
|
+
self.save_working_proxy(proxy, elapsed)
|
|
220
|
+
self.shutdown_flag = True
|
|
221
|
+
break
|
|
222
|
+
else:
|
|
223
|
+
failed_count += 1
|
|
224
|
+
except Exception:
|
|
225
|
+
failed_count += 1
|
|
226
|
+
|
|
227
|
+
progress.update(task, advance=1, success=success_count, failed=failed_count)
|
|
228
|
+
|
|
229
|
+
if not found_proxy:
|
|
230
|
+
print("[bold red]No working proxies found[/bold red]")
|
|
231
|
+
|
|
232
|
+
return (found_proxy, response_text, source)
|