kinopoisk_parser 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/kinopoisk_parser.gemspec +1 -1
- data/lib/kinopoisk/movie.rb +46 -16
- data/lib/kinopoisk/person.rb +20 -0
- data/lib/kinopoisk/search.rb +2 -0
- data/lib/kinopoisk_parser.rb +1 -0
- data/spec/kinopoisk/movie_spec.rb +4 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MDM1MGUwYTYyZmE4OGZlOGFhOTIyZDYwYjY0N2ZjYjI0MTE0OWUyYQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MmMyMzIxNzYyNzM2YzU5YzVmZmQyMTY3N2E0ZWZjNWMxODE3ZTc4NQ==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZDg0YjI2YjJhYjFhZmE5MDIwNDg4N2JkMjZlNDA0NDY0NGU2ZGRmZGMxMTQy
|
10
|
+
ZWZlMmRkOTczYjNkYzA2MzUxZTU2NTU4ZDQ4ZDhmZjBjZTM5M2MzMDUzYzRh
|
11
|
+
ZGFiYTM3NDIzNjEzZmVlNmMyMjYxYzA4MDBmMTBkZjI1MjBjYWI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
M2E0MGQ3MDVmNmZlZmRkNjhjNDRlN2FkYTcwOTg5Njc2ZjljZWM2NjZlYWVj
|
14
|
+
ZjJjZmNjNDhkN2M5NDgyN2UzNmE3OGI0MTUzNTM1MDkwZWI0YjUzNDRiZDhj
|
15
|
+
ZmMxYjdlZDVmZTBjZWU3ODI4NjNiY2I1ODIyNzQxYjNhNDRhMTk=
|
data/kinopoisk_parser.gemspec
CHANGED
data/lib/kinopoisk/movie.rb
CHANGED
@@ -3,7 +3,9 @@ module Kinopoisk
|
|
3
3
|
class Movie
|
4
4
|
attr_accessor :id, :url, :title
|
5
5
|
|
6
|
-
# New
|
6
|
+
# New instance can be initialized with id(integer) or title(string). Second
|
7
|
+
# argument may also receive a string title to make it easier to
|
8
|
+
# differentiate Kinopoisk::Movie instances.
|
7
9
|
#
|
8
10
|
# Kinopoisk::Movie.new 277537
|
9
11
|
# Kinopoisk::Movie.new 'Dexter'
|
@@ -17,115 +19,143 @@ module Kinopoisk
|
|
17
19
|
@title = title
|
18
20
|
end
|
19
21
|
|
22
|
+
# Returns an array of strings containing actor names
|
20
23
|
def actors
|
21
24
|
doc.search('td.actor_list div a').map{|n| n.text.gsub("\n",'').strip}
|
22
25
|
.delete_if{|text| text=='...'}
|
23
26
|
end
|
24
27
|
|
28
|
+
# Returns a string containing title in russian
|
25
29
|
def title
|
26
30
|
@title ||= doc.search('.moviename-big').xpath('text()').text.strip
|
27
31
|
end
|
28
32
|
|
33
|
+
# Returns an integer imdb rating vote count
|
29
34
|
def imdb_rating_count
|
30
35
|
doc.search('div.block_2 div:last').text.gsub(/[ ()]/, '').to_i
|
31
36
|
end
|
32
37
|
|
38
|
+
# Returns a float imdb rating
|
33
39
|
def imdb_rating
|
34
40
|
doc.search('div.block_2 div:last').text[/\d.\d\d/].to_f
|
35
41
|
end
|
36
42
|
|
43
|
+
# Returns an integer release year
|
37
44
|
def year
|
38
45
|
doc.search("table.info a[href*='/m_act%5Byear%5D/']").text.to_i
|
39
46
|
end
|
40
47
|
|
48
|
+
# Returns an array of strings containing countries
|
41
49
|
def countries
|
42
50
|
doc.search("table.info a[href*='/m_act%5Bcountry%5D/']").map(&:text)
|
43
51
|
end
|
44
52
|
|
53
|
+
# Returns a string containing budget for the movie
|
45
54
|
def budget
|
46
55
|
doc.search("//td[text()='бюджет']/following-sibling::*//a").text
|
47
56
|
end
|
48
57
|
|
49
|
-
|
58
|
+
# Returns a string containing Russia box-office
|
59
|
+
def box_office_ru
|
50
60
|
doc.search("td#div_rus_box_td2 a").text
|
51
61
|
end
|
52
62
|
|
53
|
-
|
63
|
+
# Returns a string containing USA box-office
|
64
|
+
def box_office_us
|
54
65
|
doc.search("td#div_usa_box_td2 a").text
|
55
66
|
end
|
56
67
|
|
57
|
-
|
68
|
+
# Returns a string containing world box-office
|
69
|
+
def box_office_world
|
58
70
|
doc.search("td#div_world_box_td2 a").text
|
59
71
|
end
|
60
72
|
|
73
|
+
# Returns a url to a small sized poster
|
61
74
|
def poster
|
62
75
|
doc.search("img[itemprop='image']").first.attr 'src'
|
63
76
|
end
|
64
77
|
|
78
|
+
# Returns a string containing world premiere date
|
65
79
|
def premiere_world
|
66
80
|
doc.search('td#div_world_prem_td2 a:first').text
|
67
81
|
end
|
68
82
|
|
83
|
+
# Returns a string containing Russian premiere date
|
69
84
|
def premiere_ru
|
70
85
|
doc.search('td#div_rus_prem_td2 a:first').text
|
71
86
|
end
|
72
87
|
|
88
|
+
# Returns a float kinopoisk rating
|
73
89
|
def rating
|
74
90
|
doc.search('span.rating_ball').text.to_f
|
75
91
|
end
|
76
92
|
|
93
|
+
# Returns a url to a big sized poster
|
77
94
|
def poster_big
|
78
95
|
poster.gsub 'film', 'film_big'
|
79
96
|
end
|
80
97
|
|
98
|
+
# Returns an integer length of the movie in minutes
|
81
99
|
def length
|
82
100
|
doc.search('td#runtime').text.to_i
|
83
101
|
end
|
84
102
|
|
103
|
+
# Returns a string containing title in english
|
104
|
+
def title_en
|
105
|
+
search_by_itemprop 'alternativeHeadline'
|
106
|
+
end
|
107
|
+
|
108
|
+
# Returns a string containing movie description
|
109
|
+
def description
|
110
|
+
search_by_itemprop 'description'
|
111
|
+
end
|
112
|
+
|
113
|
+
# Returns an integer kinopoisk rating vote count
|
114
|
+
def rating_count
|
115
|
+
search_by_itemprop('ratingCount').to_i
|
116
|
+
end
|
117
|
+
|
118
|
+
# Returns an array of strings containing director names
|
85
119
|
def directors
|
86
120
|
to_array search_by_itemprop 'director'
|
87
121
|
end
|
88
122
|
|
123
|
+
# Returns an array of strings containing producer names
|
89
124
|
def producers
|
90
125
|
to_array search_by_itemprop 'producer'
|
91
126
|
end
|
92
127
|
|
128
|
+
# Returns an array of strings containing composer names
|
93
129
|
def composers
|
94
130
|
to_array search_by_itemprop 'musicBy'
|
95
131
|
end
|
96
132
|
|
133
|
+
# Returns an array of strings containing genres
|
97
134
|
def genres
|
98
135
|
to_array search_by_itemprop 'genre'
|
99
136
|
end
|
100
137
|
|
101
|
-
|
102
|
-
search_by_itemprop 'alternativeHeadline'
|
103
|
-
end
|
104
|
-
|
105
|
-
def description
|
106
|
-
search_by_itemprop 'description'
|
107
|
-
end
|
108
|
-
|
109
|
-
def rating_count
|
110
|
-
search_by_itemprop('ratingCount').to_i
|
111
|
-
end
|
112
|
-
|
138
|
+
# Returns an array of strings containing writer names
|
113
139
|
def writers
|
114
140
|
to_array search_by_text 'сценарий'
|
115
141
|
end
|
116
142
|
|
143
|
+
# Returns an array of strings containing operator names
|
117
144
|
def operators
|
118
145
|
to_array search_by_text 'оператор'
|
119
146
|
end
|
120
147
|
|
148
|
+
# Returns an array of strings containing art director names
|
121
149
|
def art_directors
|
122
150
|
to_array search_by_text 'художник'
|
123
151
|
end
|
124
152
|
|
153
|
+
# Returns an array of strings containing editor names
|
125
154
|
def editors
|
126
155
|
to_array search_by_text 'монтаж'
|
127
156
|
end
|
128
157
|
|
158
|
+
# Returns a string containing movie slogan
|
129
159
|
def slogan
|
130
160
|
search_by_text 'слоган'
|
131
161
|
end
|
data/lib/kinopoisk/person.rb
CHANGED
@@ -3,60 +3,80 @@ module Kinopoisk
|
|
3
3
|
class Person
|
4
4
|
attr_accessor :id, :url, :name
|
5
5
|
|
6
|
+
# New instance can be initialized with id(string or integer) and an optional
|
7
|
+
# name to make it easier to differentiate Kinopoisk::Person instances.
|
8
|
+
#
|
9
|
+
# Kinopoisk::Person.new 13180
|
10
|
+
#
|
11
|
+
# Request to kinopoisk is made once and on the first access to a remote data.
|
12
|
+
#
|
6
13
|
def initialize(id, name=nil)
|
7
14
|
@id = id
|
8
15
|
@url = "http://www.kinopoisk.ru/name/#{id}/"
|
9
16
|
@name = name
|
10
17
|
end
|
11
18
|
|
19
|
+
# Returns a url to a poster
|
12
20
|
def poster
|
13
21
|
doc.search('img.people_thumbnail').first.attr 'src'
|
14
22
|
end
|
15
23
|
|
24
|
+
# Returns a string containing name in russian
|
16
25
|
def name
|
17
26
|
@name ||= doc.search('.moviename-big').text
|
18
27
|
end
|
19
28
|
|
29
|
+
# Returns a string containing name in english
|
20
30
|
def name_en
|
21
31
|
doc.search("//tr[./td/h1[@class='moviename-big']]/following-sibling::tr//span").text
|
22
32
|
end
|
23
33
|
|
34
|
+
# Returns a string containing partner's name
|
24
35
|
def partner
|
25
36
|
doc.search("//td[@class='type'][contains(text(),'супруг')]/following-sibling::*").text
|
26
37
|
end
|
27
38
|
|
39
|
+
# Returns a birthdate date object
|
28
40
|
def birthdate
|
29
41
|
Date.strptime doc.search("td.birth").first.attr 'birthdate'
|
30
42
|
end
|
31
43
|
|
44
|
+
# Returns a string containing birthplace
|
32
45
|
def birthplace
|
33
46
|
search_by_text('место рождения').split(', ').first
|
34
47
|
end
|
35
48
|
|
49
|
+
# Returns an array of strings containing genres
|
36
50
|
def genres
|
37
51
|
search_by_text('жанры').split(', ')
|
38
52
|
end
|
39
53
|
|
54
|
+
# Returns an array of strings containing career professions
|
40
55
|
def career
|
41
56
|
search_by_text('карьера').split(', ')
|
42
57
|
end
|
43
58
|
|
59
|
+
# Returns an integer total movie count
|
44
60
|
def total_movies
|
45
61
|
search_by_text('всего фильмов').to_i
|
46
62
|
end
|
47
63
|
|
64
|
+
# Returns an array of strings containing best movie titles
|
48
65
|
def best_movies
|
49
66
|
doc.search('td.actor_list a').map(&:text)
|
50
67
|
end
|
51
68
|
|
69
|
+
# Returns a string containing year of first movie
|
52
70
|
def first_movie
|
53
71
|
search_by_text 'первый фильм'
|
54
72
|
end
|
55
73
|
|
74
|
+
# Returns a string containing year of last movie
|
56
75
|
def last_movie
|
57
76
|
search_by_text 'последний фильм'
|
58
77
|
end
|
59
78
|
|
79
|
+
# Returns a string containing height
|
60
80
|
def height
|
61
81
|
search_by_text 'рост'
|
62
82
|
end
|
data/lib/kinopoisk/search.rb
CHANGED
@@ -8,10 +8,12 @@ module Kinopoisk
|
|
8
8
|
@url = SEARCH_URL + query.to_s
|
9
9
|
end
|
10
10
|
|
11
|
+
# Returns an array containing Kinopoisk::Movie instances
|
11
12
|
def movies
|
12
13
|
find_nodes('film').map{|n| new_movie n }
|
13
14
|
end
|
14
15
|
|
16
|
+
# Returns an array containing Kinopoisk::Person instances
|
15
17
|
def people
|
16
18
|
find_nodes('people').map{|n| new_person n }
|
17
19
|
end
|
data/lib/kinopoisk_parser.rb
CHANGED
@@ -13,6 +13,7 @@ module Kinopoisk
|
|
13
13
|
HTTPClient.new.get url, nil, { 'User-Agent'=>'a', 'Accept-Encoding'=>'a' }
|
14
14
|
end
|
15
15
|
|
16
|
+
# Returns a nokogiri document or an error if fetch response status is not 200
|
16
17
|
def self.parse(url)
|
17
18
|
p = fetch url
|
18
19
|
p.status==200 ? Nokogiri::HTML(p.body.encode('utf-8')) : raise(NotFound)
|
@@ -29,11 +29,11 @@ describe Kinopoisk::Movie, vcr: { cassette_name: 'movies' } do
|
|
29
29
|
it { dexter.imdb_rating.should be_a(Float) }
|
30
30
|
it { dexter.rating_count.should be_a(Integer) }
|
31
31
|
it { dexter.rating.should be_a(Float) }
|
32
|
-
it { dexter.
|
32
|
+
it { dexter.box_office_ru.should eq('') }
|
33
33
|
|
34
|
-
it { avatar.
|
35
|
-
it { avatar.
|
36
|
-
it { avatar.
|
34
|
+
it { avatar.box_office_world.should match('[$\d]') }
|
35
|
+
it { avatar.box_office_ru.should match('[$\d]') }
|
36
|
+
it { avatar.box_office_us.should match('[$\d]') }
|
37
37
|
it { avatar.budget.should eq("$237 000 000") }
|
38
38
|
it { avatar.length.should eq(162) }
|
39
39
|
|