python-filmaffinity 0.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_filmaffinity/__init__.py +254 -0
- python_filmaffinity/__meta__.py +19 -0
- python_filmaffinity/client.py +379 -0
- python_filmaffinity/config.py +11 -0
- python_filmaffinity/exceptions.py +38 -0
- python_filmaffinity/pages/__init__.py +5 -0
- python_filmaffinity/pages/detail.py +202 -0
- python_filmaffinity/pages/images.py +64 -0
- python_filmaffinity/pages/page.py +209 -0
- python_filmaffinity/pages/search.py +10 -0
- python_filmaffinity/pages/top.py +6 -0
- python_filmaffinity/pages/top_service.py +15 -0
- python_filmaffinity/proxies.py +28 -0
- python_filmaffinity-0.0.21.dist-info/METADATA +427 -0
- python_filmaffinity-0.0.21.dist-info/RECORD +19 -0
- python_filmaffinity-0.0.21.dist-info/WHEEL +5 -0
- python_filmaffinity-0.0.21.dist-info/licenses/AUTHORS.rst +15 -0
- python_filmaffinity-0.0.21.dist-info/licenses/LICENSE.rst +22 -0
- python_filmaffinity-0.0.21.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from .__meta__ import *
|
|
3
|
+
from .client import Client
|
|
4
|
+
from .config import FIELDS_TYPE
|
|
5
|
+
|
|
6
|
+
class FilmAffinity(Client):
|
|
7
|
+
|
|
8
|
+
def get_movie(self, trailer=False, images=False, **kwargs):
|
|
9
|
+
"""Return a dictionary with the data of the movie.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
title: Search by title
|
|
13
|
+
id: Search by id
|
|
14
|
+
trailer: Enable/Disable search a trailer
|
|
15
|
+
images: Enable/Disable search for images
|
|
16
|
+
Returns:
|
|
17
|
+
TYPE: Dictionary with movie data
|
|
18
|
+
"""
|
|
19
|
+
movie = {}
|
|
20
|
+
if kwargs is not None:
|
|
21
|
+
for key, value in iter(kwargs.items()):
|
|
22
|
+
if key == 'id':
|
|
23
|
+
movie = self._get_movie_by_id(
|
|
24
|
+
value, trailer, images)
|
|
25
|
+
else:
|
|
26
|
+
movie = self._get_movie_by_args(
|
|
27
|
+
key, value, trailer, images)
|
|
28
|
+
return movie
|
|
29
|
+
|
|
30
|
+
def search(self, top=10, **kwargs):
|
|
31
|
+
"""Return a list with the data of the movies.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
title: Search by title
|
|
35
|
+
director: Search by director
|
|
36
|
+
cast: Search by cast
|
|
37
|
+
from_year: Search from the year
|
|
38
|
+
to_year: Search until the year
|
|
39
|
+
genre:
|
|
40
|
+
AN: Animación
|
|
41
|
+
AV: Aventuras
|
|
42
|
+
BE: Bélico
|
|
43
|
+
C-F: Ciencia Ficción
|
|
44
|
+
F-N: Cine Negro
|
|
45
|
+
CO: Comedia
|
|
46
|
+
DESC: Desconocido
|
|
47
|
+
DO: Documental
|
|
48
|
+
DR: Drama
|
|
49
|
+
FAN: Fantástico
|
|
50
|
+
INF: Infantil
|
|
51
|
+
INT: Intriga
|
|
52
|
+
MU: Musical
|
|
53
|
+
RO: Romance
|
|
54
|
+
TV_SE: Serie de TV
|
|
55
|
+
TE: Terror
|
|
56
|
+
WE: Western
|
|
57
|
+
Returns:
|
|
58
|
+
TYPE: Lis with movies data
|
|
59
|
+
"""
|
|
60
|
+
top = 20 if top > 20 else top
|
|
61
|
+
movies = []
|
|
62
|
+
if kwargs is not None:
|
|
63
|
+
options = ''
|
|
64
|
+
simple_search = 'title' in kwargs
|
|
65
|
+
for key, value in iter(kwargs.items()):
|
|
66
|
+
if key in FIELDS_TYPE:
|
|
67
|
+
if (key != 'title'):
|
|
68
|
+
simple_search = False
|
|
69
|
+
options += 'stext=%s&stype[]=%s&' % (str(kwargs[key]), key)
|
|
70
|
+
if key == 'from_year':
|
|
71
|
+
options += 'fromyear=%s&' % value
|
|
72
|
+
if key == 'to_year':
|
|
73
|
+
options += 'toyear=%s&' % value
|
|
74
|
+
if key == 'genre':
|
|
75
|
+
options += 'genre=%s&' % value
|
|
76
|
+
if (simple_search):
|
|
77
|
+
options = 'stype=title&stext=' + str(kwargs['title'])
|
|
78
|
+
url = self.url + 'search.php?' + options
|
|
79
|
+
else:
|
|
80
|
+
url = self.url + 'advsearch.php?' + options
|
|
81
|
+
page = self._load_url(url)
|
|
82
|
+
movies = self._return_list_movies(page, 'search', top)
|
|
83
|
+
if (len(movies) == 0):
|
|
84
|
+
url = self.url + 'advsearch.php?' + options
|
|
85
|
+
page = self._load_url(url)
|
|
86
|
+
movies = self._return_list_movies(page, 'search', top)
|
|
87
|
+
return movies
|
|
88
|
+
|
|
89
|
+
def top_filmaffinity(self, top=10, **kwargs):
|
|
90
|
+
"""Return a list with the top filmaffinity movies.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
|
|
94
|
+
from_year: Search from the year
|
|
95
|
+
to_year: Search until the year
|
|
96
|
+
Returns:
|
|
97
|
+
TYPE: Lis with movies data
|
|
98
|
+
"""
|
|
99
|
+
top = 30 if top > 30 else top
|
|
100
|
+
movies = []
|
|
101
|
+
if kwargs is not None:
|
|
102
|
+
options = ''
|
|
103
|
+
for key, value in iter(kwargs.items()):
|
|
104
|
+
if key == 'from_year':
|
|
105
|
+
options += 'fromyear=%s&' % value
|
|
106
|
+
if key == 'to_year':
|
|
107
|
+
options += 'toyear=%s&' % value
|
|
108
|
+
if options:
|
|
109
|
+
url = self.url + 'topgen.php?' + options
|
|
110
|
+
else:
|
|
111
|
+
url = self.url + 'topgen.php'
|
|
112
|
+
page = self._load_url(url)
|
|
113
|
+
movies = self._return_list_movies(page, 'top', top)
|
|
114
|
+
return movies
|
|
115
|
+
|
|
116
|
+
def top_tv_series(self, top=10, **kwargs):
|
|
117
|
+
"""Return a list with the top tv series.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
|
|
121
|
+
from_year: Search from the year
|
|
122
|
+
to_year: Search until the year
|
|
123
|
+
Returns:
|
|
124
|
+
TYPE: Lis with movies data
|
|
125
|
+
"""
|
|
126
|
+
top = 30 if top > 30 else top
|
|
127
|
+
movies = []
|
|
128
|
+
if kwargs is not None:
|
|
129
|
+
options = ''
|
|
130
|
+
for key, value in iter(kwargs.items()):
|
|
131
|
+
if key == 'from_year':
|
|
132
|
+
options += 'fromyear=%s&' % value
|
|
133
|
+
if key == 'to_year':
|
|
134
|
+
options += 'toyear=%s&' % value
|
|
135
|
+
if options:
|
|
136
|
+
url = self.url + 'topgen.php?genre=TV_SE&' + options
|
|
137
|
+
else:
|
|
138
|
+
url = self.url + 'topgen.php?genre=TV_SE'
|
|
139
|
+
page = self._load_url(url)
|
|
140
|
+
movies = self._return_list_movies(page, 'top', top)
|
|
141
|
+
return movies
|
|
142
|
+
|
|
143
|
+
def top_dvd(self, top=10):
|
|
144
|
+
"""Return a list with the top new dvds.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
TYPE: Lis with movies data
|
|
148
|
+
"""
|
|
149
|
+
top = 40 if top > 40 else top
|
|
150
|
+
movies = []
|
|
151
|
+
if self.lang == 'es':
|
|
152
|
+
url = self.url + 'topcat_new_sa_es.html'
|
|
153
|
+
else:
|
|
154
|
+
url = self.url + 'topcat_DVD_VID_US.html'
|
|
155
|
+
page = self._load_url(url)
|
|
156
|
+
movies = self._return_list_movies(page, 'top_service', top)
|
|
157
|
+
return movies
|
|
158
|
+
|
|
159
|
+
def top_premieres(self, top=10):
|
|
160
|
+
"""Return a list with the top filmaffinity movies.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
|
|
164
|
+
from_year: Search from the year
|
|
165
|
+
to_year: Search until the year
|
|
166
|
+
Returns:
|
|
167
|
+
TYPE: Lis with movies data
|
|
168
|
+
"""
|
|
169
|
+
top = 40 if top > 40 else top
|
|
170
|
+
movies = []
|
|
171
|
+
url = self.url + 'topcat_new_th_es.html'
|
|
172
|
+
page = self._load_url(url)
|
|
173
|
+
movies = self._return_list_movies(page, 'top_service', top)
|
|
174
|
+
return movies
|
|
175
|
+
|
|
176
|
+
def top_netflix(self, top=10):
|
|
177
|
+
"""Return a list with the top netflix movies.
|
|
178
|
+
|
|
179
|
+
Returns:
|
|
180
|
+
TYPE: Lis with movies data
|
|
181
|
+
"""
|
|
182
|
+
return self._top_service(top, 'new_netflix')
|
|
183
|
+
|
|
184
|
+
def top_hbo(self, top=10):
|
|
185
|
+
"""Return a list with the top hbo movies.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
TYPE: Lis with movies data
|
|
189
|
+
"""
|
|
190
|
+
return self._top_service(top, 'new_hbo_es')
|
|
191
|
+
|
|
192
|
+
def top_filmin(self, top=10):
|
|
193
|
+
"""Return a list with the top filmin movies.
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
TYPE: Lis with movies data
|
|
197
|
+
"""
|
|
198
|
+
return self._top_service(top, 'new_filmin')
|
|
199
|
+
|
|
200
|
+
def top_movistar(self, top=10):
|
|
201
|
+
"""Return a list with the top movistar movies.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
TYPE: Lis with movies data
|
|
205
|
+
"""
|
|
206
|
+
return self._top_service(top, 'new_movistar_f')
|
|
207
|
+
|
|
208
|
+
def top_rakuten(self, top=10):
|
|
209
|
+
"""Return a list with the top rakuten movies.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
TYPE: Lis with movies data
|
|
213
|
+
"""
|
|
214
|
+
return self._top_service(top, 'new_rakuten')
|
|
215
|
+
|
|
216
|
+
def recommend_netflix(self, trailer=False, images=False):
|
|
217
|
+
"""Return a movie random in Netflix.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
TYPE: Movie data
|
|
221
|
+
"""
|
|
222
|
+
return self._recommend('new_netflix', trailer, images)
|
|
223
|
+
|
|
224
|
+
def recommend_hbo(self, trailer=False, images=False):
|
|
225
|
+
"""Return a movie random in HBO.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
TYPE: Movie data
|
|
229
|
+
"""
|
|
230
|
+
return self._recommend('new_hbo_es', trailer, images)
|
|
231
|
+
|
|
232
|
+
def recommend_movistar(self, trailer=False, images=False):
|
|
233
|
+
"""Return a movie random in Movistar.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
TYPE: Movie data
|
|
237
|
+
"""
|
|
238
|
+
return self._recommend('new_movistar_f', trailer, images)
|
|
239
|
+
|
|
240
|
+
def recommend_rakuten(self, trailer=False, images=False):
|
|
241
|
+
"""Return a movie random in Rakuten.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
TYPE: Movie data
|
|
245
|
+
"""
|
|
246
|
+
return self._recommend('new_rakuten', trailer, images)
|
|
247
|
+
|
|
248
|
+
def recommend_filmin(self, trailer=False, images=False):
|
|
249
|
+
"""Return a movie random in Filmin.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
TYPE: Movie data
|
|
253
|
+
"""
|
|
254
|
+
return self._recommend('new_filmin', trailer, images)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Project metadata.
|
|
2
|
+
"""
|
|
3
|
+
|
|
4
|
+
__title__ = 'python_filmaffinity'
|
|
5
|
+
__summary__ = 'Python wrapper for FilmAffinity'
|
|
6
|
+
__url__ = 'https://github.com/sergiormb/python_filmaffinity'
|
|
7
|
+
|
|
8
|
+
__version__ = '0.0.21'
|
|
9
|
+
|
|
10
|
+
__install_requires__ = [
|
|
11
|
+
'beautifulsoup4>=4.9.1',
|
|
12
|
+
'requests>=2.24.0',
|
|
13
|
+
'requests-cache>=1.0.0',
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
__author__ = 'sergiormb'
|
|
17
|
+
__email__ = 'sergiormb88@gmail.com'
|
|
18
|
+
|
|
19
|
+
__license__ = 'MIT'
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import requests
|
|
6
|
+
import requests_cache
|
|
7
|
+
import random
|
|
8
|
+
|
|
9
|
+
from bs4 import BeautifulSoup
|
|
10
|
+
from inspect import getsourcefile
|
|
11
|
+
from os.path import join, dirname, abspath
|
|
12
|
+
from .config import FIELDS_MOVIE
|
|
13
|
+
from .pages import DetailPage, SearchPage, TopPage, TopServicePage, ImagesPage
|
|
14
|
+
from .exceptions import (
|
|
15
|
+
FilmAffinityInvalidLanguage,
|
|
16
|
+
FilmAffinityInvalidBackend,
|
|
17
|
+
FilmAffinityConnectionError)
|
|
18
|
+
from .proxies import get_random_proxy
|
|
19
|
+
try:
|
|
20
|
+
from urllib import quote # Python 2.X
|
|
21
|
+
except ImportError:
|
|
22
|
+
from urllib.parse import quote # Python 3+
|
|
23
|
+
|
|
24
|
+
current_folder = dirname(abspath(getsourcefile(lambda: 0)))
|
|
25
|
+
supported_languages = ['en', 'es', 'mx', 'ar', 'cl', 'co']
|
|
26
|
+
USER_AGENTS = [
|
|
27
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
|
|
28
|
+
'(KHTML, like Gecko) Chrome/125.0 Safari/537.36',
|
|
29
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_5) AppleWebKit/605.1.15 '
|
|
30
|
+
'(KHTML, like Gecko) Version/17.5 Safari/605.1.15',
|
|
31
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 '
|
|
32
|
+
'(KHTML, like Gecko) Chrome/125.0 Safari/537.36',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Client:
|
|
37
|
+
"""Client to make requests to FilmAffinity.
|
|
38
|
+
Args:
|
|
39
|
+
lang: Language, one of: ('en', 'es', 'mx', 'ar', 'cl', 'co')
|
|
40
|
+
cache_path: Path to FilmAffinity Database (If not set, the
|
|
41
|
+
database will be stored inside python_filmaffinity path)
|
|
42
|
+
cache_backend: One of (sqlite, mongodb, memory, redis). Set to
|
|
43
|
+
memory or None if you don't wont persistent cache
|
|
44
|
+
(defaults to sqlite).
|
|
45
|
+
Note for backends (from requests-cache docs):
|
|
46
|
+
'sqlite' - sqlite database
|
|
47
|
+
'memory' - not persistent,
|
|
48
|
+
stores all data in Python dict in memory
|
|
49
|
+
'mongodb' - (experimental) MongoDB database
|
|
50
|
+
(pymongo < 3.0 required)
|
|
51
|
+
'redis' - stores all data on a redis data store
|
|
52
|
+
(redis required)
|
|
53
|
+
cache_expires: Time in seconds to force new requests from the
|
|
54
|
+
server (defaults to 86400, 24 hours)
|
|
55
|
+
cache_remove_expired: Force to remove expired responses after any
|
|
56
|
+
requests call. This will ensure that if any call to
|
|
57
|
+
FilmAffinity fails and we already made that call we will get a
|
|
58
|
+
response at a cost of a bigger database file (defaults to True)
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
base_url = 'https://www.filmaffinity.com/'
|
|
63
|
+
|
|
64
|
+
def __init__(self, lang='es', cache_path=None,
|
|
65
|
+
cache_backend='sqlite', cache_expires=86400,
|
|
66
|
+
cache_remove_expired=True):
|
|
67
|
+
"""Init the search service.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
lang (str, optional): Language of the page
|
|
71
|
+
"""
|
|
72
|
+
if lang not in supported_languages:
|
|
73
|
+
raise FilmAffinityInvalidLanguage(
|
|
74
|
+
lang, supported_languages)
|
|
75
|
+
if cache_backend not in ['sqlite', 'memory', 'mongodb', 'redis', None]:
|
|
76
|
+
raise FilmAffinityInvalidBackend(
|
|
77
|
+
cache_backend)
|
|
78
|
+
self.lang = lang
|
|
79
|
+
|
|
80
|
+
self.url = self.base_url + self.lang + '/'
|
|
81
|
+
self.url_film = self.url + 'film'
|
|
82
|
+
self.url_images = self.url + 'filmimages.php?movie_id='
|
|
83
|
+
self.url_trailers = self.url + 'evideos.php?movie_id='
|
|
84
|
+
|
|
85
|
+
# initialize requests-cache
|
|
86
|
+
self.cache_expires = cache_expires
|
|
87
|
+
self.cache_backend = cache_backend if cache_backend else 'memory'
|
|
88
|
+
self.cache_path = self._get_cache_file(cache_path)
|
|
89
|
+
self.cache_remove_expired = cache_remove_expired
|
|
90
|
+
self.session = None
|
|
91
|
+
self.session_headers = {
|
|
92
|
+
'User-Agent': random.choice(USER_AGENTS)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
def _generate_new_session_headers(self):
|
|
96
|
+
self.session_headers = {
|
|
97
|
+
'User-Agent': random.choice(USER_AGENTS)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
def _get_cache_file(self, cache_path=None):
|
|
101
|
+
"""Returns the cache file used by requests-cache
|
|
102
|
+
"""
|
|
103
|
+
c = None
|
|
104
|
+
if self.cache_backend in ['memory']:
|
|
105
|
+
c = 'cache-film-affinity'
|
|
106
|
+
elif cache_path:
|
|
107
|
+
c = join(cache_path, "cache-film-affinity")
|
|
108
|
+
else:
|
|
109
|
+
c = join(current_folder, "cache-film-affinity")
|
|
110
|
+
return c
|
|
111
|
+
|
|
112
|
+
def _get_requests_session(self):
|
|
113
|
+
"""Initialize requests Session"""
|
|
114
|
+
self.session = requests_cache.CachedSession(
|
|
115
|
+
expire_after=self.cache_expires,
|
|
116
|
+
backend=self.cache_backend,
|
|
117
|
+
cache_name=self.cache_path,
|
|
118
|
+
include_get_headers=True,
|
|
119
|
+
old_data_on_error=True
|
|
120
|
+
)
|
|
121
|
+
if self.cache_remove_expired:
|
|
122
|
+
if hasattr(self.session.cache, 'delete'):
|
|
123
|
+
self.session.cache.delete(expired=True)
|
|
124
|
+
else:
|
|
125
|
+
remove_expired = getattr(
|
|
126
|
+
self.session, 'remove_expired_responses', None)
|
|
127
|
+
if remove_expired is None:
|
|
128
|
+
remove_expired = getattr(
|
|
129
|
+
self.session.cache, 'remove_expired_responses', None)
|
|
130
|
+
if remove_expired:
|
|
131
|
+
remove_expired()
|
|
132
|
+
|
|
133
|
+
def _load_url(self, url, headers=None, verify=None,
|
|
134
|
+
timeout=3, force_server_response=False):
|
|
135
|
+
"""Return response from The FilmAffinity"""
|
|
136
|
+
self._generate_new_session_headers()
|
|
137
|
+
kwargs = {'headers': self.session_headers}
|
|
138
|
+
proxies = get_random_proxy()
|
|
139
|
+
if proxies:
|
|
140
|
+
kwargs.update({"proxies": proxies})
|
|
141
|
+
if headers:
|
|
142
|
+
kwargs['verify'] = verify
|
|
143
|
+
if headers:
|
|
144
|
+
kwargs['headers'] = headers
|
|
145
|
+
if timeout:
|
|
146
|
+
kwargs['timeout'] = timeout
|
|
147
|
+
if not self.session:
|
|
148
|
+
self._get_requests_session()
|
|
149
|
+
try:
|
|
150
|
+
if not force_server_response:
|
|
151
|
+
response = self.session.get(url, **kwargs)
|
|
152
|
+
else:
|
|
153
|
+
with self.session.cache_disabled():
|
|
154
|
+
response = self.session.get(url, **kwargs)
|
|
155
|
+
except requests.exceptions.ConnectionError as er:
|
|
156
|
+
raise FilmAffinityConnectionError(er)
|
|
157
|
+
logging.warning(f"Filmaffinty Client: GET {url}")
|
|
158
|
+
return response
|
|
159
|
+
|
|
160
|
+
def _get_trailer(self, fa_id):
|
|
161
|
+
page = self._load_url(self.url_trailers + str(fa_id))
|
|
162
|
+
soup = BeautifulSoup(page.content, "html.parser")
|
|
163
|
+
vid = [i['src'] for i in soup.find_all('iframe')]
|
|
164
|
+
return vid
|
|
165
|
+
|
|
166
|
+
def _get_movie_images(self, fa_id):
|
|
167
|
+
url = self.url_images + str(fa_id)
|
|
168
|
+
r = self._load_url(url)
|
|
169
|
+
soup = BeautifulSoup(r.content, "html.parser")
|
|
170
|
+
exist = soup.find_all("div", {"id": 'main-image-wrapper'})
|
|
171
|
+
if not exist:
|
|
172
|
+
return {
|
|
173
|
+
'posters': [],
|
|
174
|
+
'stills': [],
|
|
175
|
+
'promo': [],
|
|
176
|
+
'events': [],
|
|
177
|
+
'shootings': []}
|
|
178
|
+
page = ImagesPage(soup)
|
|
179
|
+
return {
|
|
180
|
+
'posters': page.get_posters(),
|
|
181
|
+
'stills': page.get_stills(),
|
|
182
|
+
'promo': page.get_promos(),
|
|
183
|
+
'events': page.get_events(),
|
|
184
|
+
'shootings': page.get_shootings(),
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
def _get_movie_data(self, page, fa_id=None):
|
|
188
|
+
result = {
|
|
189
|
+
'id': None,
|
|
190
|
+
'title': None,
|
|
191
|
+
'original_title': None,
|
|
192
|
+
'year': None,
|
|
193
|
+
'duration': None,
|
|
194
|
+
'rating': None,
|
|
195
|
+
'votes': None,
|
|
196
|
+
'description': None,
|
|
197
|
+
'directors': None,
|
|
198
|
+
'writers': None,
|
|
199
|
+
'music': None,
|
|
200
|
+
'cinematography': None,
|
|
201
|
+
'actors': None,
|
|
202
|
+
'producers': None,
|
|
203
|
+
'poster': None,
|
|
204
|
+
'country': None,
|
|
205
|
+
'genre': None,
|
|
206
|
+
'awards': None,
|
|
207
|
+
'reviews': None
|
|
208
|
+
}
|
|
209
|
+
# Update the dictionary with values from functions, handling exceptions
|
|
210
|
+
try:
|
|
211
|
+
result['id'] = fa_id or page.get_id()
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logging.warning(f"Id field not found: {e}")
|
|
214
|
+
|
|
215
|
+
try:
|
|
216
|
+
result['title'] = page.get_title()
|
|
217
|
+
except Exception as e:
|
|
218
|
+
logging.warning(f"Title field not found for {result.get('id')}: {e}")
|
|
219
|
+
|
|
220
|
+
try:
|
|
221
|
+
result['original_title'] = page.get_original_title()
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logging.warning(f"Original title field not found for {result.get('id')}: {e}")
|
|
224
|
+
|
|
225
|
+
try:
|
|
226
|
+
result['year'] = page.get_year()
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logging.warning(f"Year field not found for {result.get('id')}: {e}")
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
result['duration'] = page.get_duration()
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logging.warning(f"Duration field not found for {result.get('id')}: {e}")
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
result['rating'] = page.get_rating()
|
|
237
|
+
except Exception as e:
|
|
238
|
+
logging.warning(f"Rating field not found for {result.get('id')}: {e}")
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
result['votes'] = page.get_number_of_votes()
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logging.warning(f"Votes field not found for {result.get('id')}: {e}")
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
result['description'] = page.get_description()
|
|
247
|
+
except Exception as e:
|
|
248
|
+
logging.warning(f"Description field not found for {result.get('id')}: {e}")
|
|
249
|
+
|
|
250
|
+
try:
|
|
251
|
+
result['directors'] = page.get_directors()
|
|
252
|
+
except Exception as e:
|
|
253
|
+
logging.warning(f"Directors field not found for {result.get('id')}: {e}")
|
|
254
|
+
|
|
255
|
+
try:
|
|
256
|
+
result['writers'] = page.get_writers()
|
|
257
|
+
except Exception as e:
|
|
258
|
+
logging.warning(f"Writers field not found for {result.get('id')}: {e}")
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
result['music'] = page.get_music()
|
|
262
|
+
except Exception as e:
|
|
263
|
+
logging.warning(f"Music field not found for {result.get('id')}: {e}")
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
result['cinematography'] = page.get_cinematography()
|
|
267
|
+
except Exception as e:
|
|
268
|
+
logging.warning(f"Cinematography field not found for {result.get('id')}: {e}")
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
result['actors'] = page.get_actors()
|
|
272
|
+
except Exception as e:
|
|
273
|
+
logging.warning(f"Actors field not found for {result.get('id')}: {e}")
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
result['producers'] = page.get_producers()
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logging.warning(f"Producers field not found for {result.get('id')}: {e}")
|
|
279
|
+
|
|
280
|
+
try:
|
|
281
|
+
result['poster'] = page.get_poster()
|
|
282
|
+
except Exception as e:
|
|
283
|
+
logging.warning(f"Poster field not found for {result.get('id')}: {e}")
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
result['country'] = page.get_country()
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logging.warning(f"Country field not found for {result.get('id')}: {e}")
|
|
289
|
+
|
|
290
|
+
try:
|
|
291
|
+
result['genre'] = page.get_genre()
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logging.warning(f"Genre field not found for {result.get('id')}: {e}")
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
result['awards'] = page.get_awards()
|
|
297
|
+
except Exception as e:
|
|
298
|
+
logging.warning(f"Awards field not found for {result.get('id')}: {e}")
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
result['reviews'] = page.get_reviews()
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logging.warning(f"Reviews field not found for {result.get('id')}: {e}")
|
|
304
|
+
|
|
305
|
+
return result
|
|
306
|
+
|
|
307
|
+
def _get_movie_by_id(self, id, trailer=False, images=False):
|
|
308
|
+
movie = {}
|
|
309
|
+
page = self._load_url(self.url_film + str(id) + '.html')
|
|
310
|
+
soup = BeautifulSoup(page.content, "html.parser")
|
|
311
|
+
exist = soup.find_all("div", {"class": 'z-movie'})
|
|
312
|
+
if exist:
|
|
313
|
+
page = DetailPage(soup)
|
|
314
|
+
movie = self._get_movie_data(page, fa_id=id)
|
|
315
|
+
if trailer:
|
|
316
|
+
trailer_url = self._get_trailer(fa_id=id)
|
|
317
|
+
if trailer_url:
|
|
318
|
+
movie.update({'trailer': trailer_url})
|
|
319
|
+
if images and movie.get('id', False):
|
|
320
|
+
movie.update({'images': self._get_movie_images(movie['id'])})
|
|
321
|
+
return movie
|
|
322
|
+
|
|
323
|
+
def _get_movie_by_args(self, key, value, trailer=False, images=False):
|
|
324
|
+
movie = {}
|
|
325
|
+
if key in FIELDS_MOVIE:
|
|
326
|
+
options = '&stype[]=%s' % key
|
|
327
|
+
url = self.url + 'advsearch.php?stext=' + \
|
|
328
|
+
str(value) + options
|
|
329
|
+
page = self._load_url(url)
|
|
330
|
+
soup = BeautifulSoup(page.content, "html.parser")
|
|
331
|
+
movies_cell = soup.find_all("div", {"class": 'movie-card-1'})
|
|
332
|
+
if movies_cell:
|
|
333
|
+
cell = movies_cell[0]
|
|
334
|
+
id = str(cell['data-movie-id'])
|
|
335
|
+
movie = self._get_movie_by_id(id, trailer, images)
|
|
336
|
+
return movie
|
|
337
|
+
|
|
338
|
+
def _return_list_movies(self, page, method, top=10):
|
|
339
|
+
movies = []
|
|
340
|
+
soup = BeautifulSoup(page.content, "html.parser")
|
|
341
|
+
if method == 'top':
|
|
342
|
+
movies_list = soup.find("ul", {"id": 'top-movies'})
|
|
343
|
+
movies_cell = movies_list.select(
|
|
344
|
+
".movie-card[data-movie-id]"
|
|
345
|
+
) if movies_list else []
|
|
346
|
+
class_ = TopPage
|
|
347
|
+
if method == 'search':
|
|
348
|
+
movies_cell = soup.select(".movie-card[data-movie-id]")
|
|
349
|
+
class_ = SearchPage
|
|
350
|
+
if method == 'top_service':
|
|
351
|
+
wrapper = soup.select(".top-movie .movie-card[data-movie-id]")
|
|
352
|
+
movies_cell = wrapper or soup.select(".movie-card[data-movie-id]")
|
|
353
|
+
class_ = TopServicePage
|
|
354
|
+
for cell in movies_cell[:top]:
|
|
355
|
+
page = class_(cell)
|
|
356
|
+
movie = self._get_movie_data(page)
|
|
357
|
+
movies.append(movie)
|
|
358
|
+
return movies
|
|
359
|
+
|
|
360
|
+
def _recommend(self, service, trailer=False, images=False):
|
|
361
|
+
movie = {}
|
|
362
|
+
url = self.url + 'topcat.php?id=' + service
|
|
363
|
+
page = self._load_url(url)
|
|
364
|
+
soup = BeautifulSoup(page.content, "html.parser")
|
|
365
|
+
movies_cell = soup.select(".movie-card[data-movie-id]")
|
|
366
|
+
if not movies_cell:
|
|
367
|
+
return movie
|
|
368
|
+
cell = random.choice(movies_cell)
|
|
369
|
+
id = str(cell['data-movie-id'])
|
|
370
|
+
movie = self._get_movie_by_id(id, trailer, images)
|
|
371
|
+
return movie
|
|
372
|
+
|
|
373
|
+
def _top_service(self, top, service):
|
|
374
|
+
movies = []
|
|
375
|
+
top = 40 if top > 40 else top
|
|
376
|
+
url = self.url + 'topcat.php?id=' + service
|
|
377
|
+
page = self._load_url(url)
|
|
378
|
+
movies = self._return_list_movies(page, 'top_service', top)
|
|
379
|
+
return movies
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Config."""
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
FIELDS_MOVIE = ['title', 'id']
|
|
5
|
+
FIELDS_TYPE = ['title', 'director', 'cast', 'stext']
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
FIELDS_PAGE_MOVIES = ['id', 'title', 'rating', 'directors', 'poster']
|
|
9
|
+
FIELDS_DETAIL = ['description', 'votes', 'year', 'country', 'duration',
|
|
10
|
+
'genre', 'awards', 'reviews', 'actors']
|
|
11
|
+
FIELDS_PAGE_DETAIL = FIELDS_PAGE_MOVIES + FIELDS_DETAIL
|