kinopoisk_parser 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/kinopoisk_parser.gemspec +1 -1
- data/lib/kinopoisk/movie.rb +46 -16
- data/lib/kinopoisk/person.rb +20 -0
- data/lib/kinopoisk/search.rb +2 -0
- data/lib/kinopoisk_parser.rb +1 -0
- data/spec/kinopoisk/movie_spec.rb +4 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MDM1MGUwYTYyZmE4OGZlOGFhOTIyZDYwYjY0N2ZjYjI0MTE0OWUyYQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MmMyMzIxNzYyNzM2YzU5YzVmZmQyMTY3N2E0ZWZjNWMxODE3ZTc4NQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDg0YjI2YjJhYjFhZmE5MDIwNDg4N2JkMjZlNDA0NDY0NGU2ZGRmZGMxMTQy
|
10
|
+
ZWZlMmRkOTczYjNkYzA2MzUxZTU2NTU4ZDQ4ZDhmZjBjZTM5M2MzMDUzYzRh
|
11
|
+
ZGFiYTM3NDIzNjEzZmVlNmMyMjYxYzA4MDBmMTBkZjI1MjBjYWI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2E0MGQ3MDVmNmZlZmRkNjhjNDRlN2FkYTcwOTg5Njc2ZjljZWM2NjZlYWVj
|
14
|
+
ZjJjZmNjNDhkN2M5NDgyN2UzNmE3OGI0MTUzNTM1MDkwZWI0YjUzNDRiZDhj
|
15
|
+
ZmMxYjdlZDVmZTBjZWU3ODI4NjNiY2I1ODIyNzQxYjNhNDRhMTk=
|
data/kinopoisk_parser.gemspec
CHANGED
data/lib/kinopoisk/movie.rb
CHANGED
@@ -3,7 +3,9 @@ module Kinopoisk
|
|
3
3
|
class Movie
|
4
4
|
attr_accessor :id, :url, :title
|
5
5
|
|
6
|
-
# New
|
6
|
+
# New instance can be initialized with id(integer) or title(string). Second
|
7
|
+
# argument may also receive a string title to make it easier to
|
8
|
+
# differentiate Kinopoisk::Movie instances.
|
7
9
|
#
|
8
10
|
# Kinopoisk::Movie.new 277537
|
9
11
|
# Kinopoisk::Movie.new 'Dexter'
|
@@ -17,115 +19,143 @@ module Kinopoisk
|
|
17
19
|
@title = title
|
18
20
|
end
|
19
21
|
|
22
|
+
# Returns an array of strings containing actor names
|
20
23
|
def actors
|
21
24
|
doc.search('td.actor_list div a').map{|n| n.text.gsub("\n",'').strip}
|
22
25
|
.delete_if{|text| text=='...'}
|
23
26
|
end
|
24
27
|
|
28
|
+
# Returns a string containing title in russian
|
25
29
|
def title
|
26
30
|
@title ||= doc.search('.moviename-big').xpath('text()').text.strip
|
27
31
|
end
|
28
32
|
|
33
|
+
# Returns an integer imdb rating vote count
|
29
34
|
def imdb_rating_count
|
30
35
|
doc.search('div.block_2 div:last').text.gsub(/[ ()]/, '').to_i
|
31
36
|
end
|
32
37
|
|
38
|
+
# Returns a float imdb rating
|
33
39
|
def imdb_rating
|
34
40
|
doc.search('div.block_2 div:last').text[/\d.\d\d/].to_f
|
35
41
|
end
|
36
42
|
|
43
|
+
# Returns an integer release year
|
37
44
|
def year
|
38
45
|
doc.search("table.info a[href*='/m_act%5Byear%5D/']").text.to_i
|
39
46
|
end
|
40
47
|
|
48
|
+
# Returns an array of strings containing countries
|
41
49
|
def countries
|
42
50
|
doc.search("table.info a[href*='/m_act%5Bcountry%5D/']").map(&:text)
|
43
51
|
end
|
44
52
|
|
53
|
+
# Returns a string containing budget for the movie
|
45
54
|
def budget
|
46
55
|
doc.search("//td[text()='бюджет']/following-sibling::*//a").text
|
47
56
|
end
|
48
57
|
|
49
|
-
|
58
|
+
# Returns a string containing Russia box-office
|
59
|
+
def box_office_ru
|
50
60
|
doc.search("td#div_rus_box_td2 a").text
|
51
61
|
end
|
52
62
|
|
53
|
-
|
63
|
+
# Returns a string containing USA box-office
|
64
|
+
def box_office_us
|
54
65
|
doc.search("td#div_usa_box_td2 a").text
|
55
66
|
end
|
56
67
|
|
57
|
-
|
68
|
+
# Returns a string containing world box-office
|
69
|
+
def box_office_world
|
58
70
|
doc.search("td#div_world_box_td2 a").text
|
59
71
|
end
|
60
72
|
|
73
|
+
# Returns a url to a small sized poster
|
61
74
|
def poster
|
62
75
|
doc.search("img[itemprop='image']").first.attr 'src'
|
63
76
|
end
|
64
77
|
|
78
|
+
# Returns a string containing world premiere date
|
65
79
|
def premiere_world
|
66
80
|
doc.search('td#div_world_prem_td2 a:first').text
|
67
81
|
end
|
68
82
|
|
83
|
+
# Returns a string containing Russian premiere date
|
69
84
|
def premiere_ru
|
70
85
|
doc.search('td#div_rus_prem_td2 a:first').text
|
71
86
|
end
|
72
87
|
|
88
|
+
# Returns a float kinopoisk rating
|
73
89
|
def rating
|
74
90
|
doc.search('span.rating_ball').text.to_f
|
75
91
|
end
|
76
92
|
|
93
|
+
# Returns a url to a big sized poster
|
77
94
|
def poster_big
|
78
95
|
poster.gsub 'film', 'film_big'
|
79
96
|
end
|
80
97
|
|
98
|
+
# Returns an integer length of the movie in minutes
|
81
99
|
def length
|
82
100
|
doc.search('td#runtime').text.to_i
|
83
101
|
end
|
84
102
|
|
103
|
+
# Returns a string containing title in english
|
104
|
+
def title_en
|
105
|
+
search_by_itemprop 'alternativeHeadline'
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns a string containing movie description
|
109
|
+
def description
|
110
|
+
search_by_itemprop 'description'
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns an integer kinopoisk rating vote count
|
114
|
+
def rating_count
|
115
|
+
search_by_itemprop('ratingCount').to_i
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns an array of strings containing director names
|
85
119
|
def directors
|
86
120
|
to_array search_by_itemprop 'director'
|
87
121
|
end
|
88
122
|
|
123
|
+
# Returns an array of strings containing producer names
|
89
124
|
def producers
|
90
125
|
to_array search_by_itemprop 'producer'
|
91
126
|
end
|
92
127
|
|
128
|
+
# Returns an array of strings containing composer names
|
93
129
|
def composers
|
94
130
|
to_array search_by_itemprop 'musicBy'
|
95
131
|
end
|
96
132
|
|
133
|
+
# Returns an array of strings containing genres
|
97
134
|
def genres
|
98
135
|
to_array search_by_itemprop 'genre'
|
99
136
|
end
|
100
137
|
|
101
|
-
|
102
|
-
search_by_itemprop 'alternativeHeadline'
|
103
|
-
end
|
104
|
-
|
105
|
-
def description
|
106
|
-
search_by_itemprop 'description'
|
107
|
-
end
|
108
|
-
|
109
|
-
def rating_count
|
110
|
-
search_by_itemprop('ratingCount').to_i
|
111
|
-
end
|
112
|
-
|
138
|
+
# Returns an array of strings containing writer names
|
113
139
|
def writers
|
114
140
|
to_array search_by_text 'сценарий'
|
115
141
|
end
|
116
142
|
|
143
|
+
# Returns an array of strings containing operator names
|
117
144
|
def operators
|
118
145
|
to_array search_by_text 'оператор'
|
119
146
|
end
|
120
147
|
|
148
|
+
# Returns an array of strings containing art director names
|
121
149
|
def art_directors
|
122
150
|
to_array search_by_text 'художник'
|
123
151
|
end
|
124
152
|
|
153
|
+
# Returns an array of strings containing editor names
|
125
154
|
def editors
|
126
155
|
to_array search_by_text 'монтаж'
|
127
156
|
end
|
128
157
|
|
158
|
+
# Returns a string containing movie slogan
|
129
159
|
def slogan
|
130
160
|
search_by_text 'слоган'
|
131
161
|
end
|
data/lib/kinopoisk/person.rb
CHANGED
@@ -3,60 +3,80 @@ module Kinopoisk
|
|
3
3
|
class Person
|
4
4
|
attr_accessor :id, :url, :name
|
5
5
|
|
6
|
+
# New instance can be initialized with id(string or integer) and an optional
|
7
|
+
# name to make it easier to differentiate Kinopoisk::Person instances.
|
8
|
+
#
|
9
|
+
# Kinopoisk::Person.new 13180
|
10
|
+
#
|
11
|
+
# Request to kinopoisk is made once and on the first access to a remote data.
|
12
|
+
#
|
6
13
|
def initialize(id, name=nil)
|
7
14
|
@id = id
|
8
15
|
@url = "http://www.kinopoisk.ru/name/#{id}/"
|
9
16
|
@name = name
|
10
17
|
end
|
11
18
|
|
19
|
+
# Returns a url to a poster
|
12
20
|
def poster
|
13
21
|
doc.search('img.people_thumbnail').first.attr 'src'
|
14
22
|
end
|
15
23
|
|
24
|
+
# Returns a string containing name in russian
|
16
25
|
def name
|
17
26
|
@name ||= doc.search('.moviename-big').text
|
18
27
|
end
|
19
28
|
|
29
|
+
# Returns a string containing name in english
|
20
30
|
def name_en
|
21
31
|
doc.search("//tr[./td/h1[@class='moviename-big']]/following-sibling::tr//span").text
|
22
32
|
end
|
23
33
|
|
34
|
+
# Returns a string containing partner's name
|
24
35
|
def partner
|
25
36
|
doc.search("//td[@class='type'][contains(text(),'супруг')]/following-sibling::*").text
|
26
37
|
end
|
27
38
|
|
39
|
+
# Returns a birthdate date object
|
28
40
|
def birthdate
|
29
41
|
Date.strptime doc.search("td.birth").first.attr 'birthdate'
|
30
42
|
end
|
31
43
|
|
44
|
+
# Returns a string containing birthplace
|
32
45
|
def birthplace
|
33
46
|
search_by_text('место рождения').split(', ').first
|
34
47
|
end
|
35
48
|
|
49
|
+
# Returns an array of strings containing genres
|
36
50
|
def genres
|
37
51
|
search_by_text('жанры').split(', ')
|
38
52
|
end
|
39
53
|
|
54
|
+
# Returns an array of strings containing career professions
|
40
55
|
def career
|
41
56
|
search_by_text('карьера').split(', ')
|
42
57
|
end
|
43
58
|
|
59
|
+
# Returns an integer total movie count
|
44
60
|
def total_movies
|
45
61
|
search_by_text('всего фильмов').to_i
|
46
62
|
end
|
47
63
|
|
64
|
+
# Returns an array of strings containing best movie titles
|
48
65
|
def best_movies
|
49
66
|
doc.search('td.actor_list a').map(&:text)
|
50
67
|
end
|
51
68
|
|
69
|
+
# Returns a string containing year of first movie
|
52
70
|
def first_movie
|
53
71
|
search_by_text 'первый фильм'
|
54
72
|
end
|
55
73
|
|
74
|
+
# Returns a string containing year of last movie
|
56
75
|
def last_movie
|
57
76
|
search_by_text 'последний фильм'
|
58
77
|
end
|
59
78
|
|
79
|
+
# Returns a string containing height
|
60
80
|
def height
|
61
81
|
search_by_text 'рост'
|
62
82
|
end
|
data/lib/kinopoisk/search.rb
CHANGED
@@ -8,10 +8,12 @@ module Kinopoisk
|
|
8
8
|
@url = SEARCH_URL + query.to_s
|
9
9
|
end
|
10
10
|
|
11
|
+
# Returns an array containing Kinopoisk::Movie instances
|
11
12
|
def movies
|
12
13
|
find_nodes('film').map{|n| new_movie n }
|
13
14
|
end
|
14
15
|
|
16
|
+
# Returns an array containing Kinopoisk::Person instances
|
15
17
|
def people
|
16
18
|
find_nodes('people').map{|n| new_person n }
|
17
19
|
end
|
data/lib/kinopoisk_parser.rb
CHANGED
@@ -13,6 +13,7 @@ module Kinopoisk
|
|
13
13
|
HTTPClient.new.get url, nil, { 'User-Agent'=>'a', 'Accept-Encoding'=>'a' }
|
14
14
|
end
|
15
15
|
|
16
|
+
# Returns a nokogiri document or an error if fetch response status is not 200
|
16
17
|
def self.parse(url)
|
17
18
|
p = fetch url
|
18
19
|
p.status==200 ? Nokogiri::HTML(p.body.encode('utf-8')) : raise(NotFound)
|
@@ -29,11 +29,11 @@ describe Kinopoisk::Movie, vcr: { cassette_name: 'movies' } do
|
|
29
29
|
it { dexter.imdb_rating.should be_a(Float) }
|
30
30
|
it { dexter.rating_count.should be_a(Integer) }
|
31
31
|
it { dexter.rating.should be_a(Float) }
|
32
|
-
it { dexter.
|
32
|
+
it { dexter.box_office_ru.should eq('') }
|
33
33
|
|
34
|
-
it { avatar.
|
35
|
-
it { avatar.
|
36
|
-
it { avatar.
|
34
|
+
it { avatar.box_office_world.should match('[$\d]') }
|
35
|
+
it { avatar.box_office_ru.should match('[$\d]') }
|
36
|
+
it { avatar.box_office_us.should match('[$\d]') }
|
37
37
|
it { avatar.budget.should eq("$237 000 000") }
|
38
38
|
it { avatar.length.should eq(162) }
|
39
39
|
|