porras-imdb 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +62 -0
- data/Rakefile +37 -0
- data/lib/imdb.rb +10 -0
- data/lib/imdb/imdb_movie.rb +80 -0
- data/lib/imdb/imdb_search.rb +21 -0
- data/lib/string_extensions.rb +16 -0
- data/spec/imdb_movie_spec.rb +172 -0
- data/spec/imdb_search_spec.rb +47 -0
- data/spec/samples/sample_incomplete_movie.html +588 -0
- data/spec/samples/sample_movie.html +951 -0
- data/spec/samples/sample_search.html +3 -0
- data/spec/spec_helper.rb +3 -0
- data/spec/string_extensions_spec.rb +25 -0
- metadata +65 -0
data/README
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
|
|
2
|
+
ImdbMovie
|
|
3
|
+
|
|
4
|
+
ImdbMovie Indiana Jones and the Last Crusade
|
|
5
|
+
- should query IMDB url
|
|
6
|
+
- should get director
|
|
7
|
+
- should get the poster
|
|
8
|
+
- should get cast members
|
|
9
|
+
- should get the writers
|
|
10
|
+
- should get the release date
|
|
11
|
+
- should get the genres
|
|
12
|
+
- should get the plot
|
|
13
|
+
- should get the length
|
|
14
|
+
- should get the countries
|
|
15
|
+
- should get the languages
|
|
16
|
+
- should get the color
|
|
17
|
+
- should get the company
|
|
18
|
+
- should get some photos
|
|
19
|
+
|
|
20
|
+
ImdbMovie Indiana Jones and the Last Crusade title pre-caching
|
|
21
|
+
- should have the original title before querying anything
|
|
22
|
+
- should have the updated title after querying something
|
|
23
|
+
|
|
24
|
+
ImdbMovie Han robado una estrella
|
|
25
|
+
- should query IMDB url
|
|
26
|
+
- should get director
|
|
27
|
+
- should not get the poster
|
|
28
|
+
- should get cast members
|
|
29
|
+
- should get the writers
|
|
30
|
+
- should get the release date
|
|
31
|
+
- should get the genres
|
|
32
|
+
- should not get the plot
|
|
33
|
+
- should get the length
|
|
34
|
+
- should get the countries
|
|
35
|
+
- should get the languages
|
|
36
|
+
- should not get the color
|
|
37
|
+
- should get the company
|
|
38
|
+
- should not get any photos
|
|
39
|
+
|
|
40
|
+
ImdbSearch
|
|
41
|
+
|
|
42
|
+
ImdbSearch Indiana Jones
|
|
43
|
+
- should query IMDB url
|
|
44
|
+
|
|
45
|
+
ImdbSearch Indiana Jones movies
|
|
46
|
+
- should be a collection of ImdbMovie instances
|
|
47
|
+
- should include 'Indiana Jones and the Last Crusade (1989)'
|
|
48
|
+
- should have titles
|
|
49
|
+
- should not have titles with HTML tags
|
|
50
|
+
|
|
51
|
+
String
|
|
52
|
+
|
|
53
|
+
String unescape_html
|
|
54
|
+
- should convert & to &
|
|
55
|
+
- should convert ó to ó
|
|
56
|
+
|
|
57
|
+
String strip_tags
|
|
58
|
+
- should strip HTML tags
|
|
59
|
+
|
|
60
|
+
Finished in 2.772724 seconds
|
|
61
|
+
|
|
62
|
+
38 examples, 0 failures
|
data/Rakefile
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
require 'rake'
|
|
2
|
+
require 'spec/rake/spectask'
|
|
3
|
+
|
|
4
|
+
desc "Run all specs"
|
|
5
|
+
Spec::Rake::SpecTask.new('spec') do |t|
|
|
6
|
+
t.spec_files = FileList['spec/**/*.rb']
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
desc "Run all specs and generate HTML report"
|
|
10
|
+
Spec::Rake::SpecTask.new('spec:html') do |t|
|
|
11
|
+
t.spec_files = FileList['spec/**/*.rb']
|
|
12
|
+
t.spec_opts = ["--format", "html:spec.html"]
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
desc "Run all specs and dump the result to README"
|
|
16
|
+
Spec::Rake::SpecTask.new('spec:readme') do |t|
|
|
17
|
+
t.spec_files = FileList['spec/**/*.rb']
|
|
18
|
+
t.spec_opts = ["--format", "specdoc:README"]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
namespace :gem do
|
|
22
|
+
desc "Increments the Gem version in imdb.gemspec"
|
|
23
|
+
task :increment do
|
|
24
|
+
lines = File.new('imdb.gemspec').readlines
|
|
25
|
+
lines.each do |line|
|
|
26
|
+
next unless line =~ /version = '\d+\.\d+\.(\d+)'/
|
|
27
|
+
line.gsub!(/\d+'/, "#{$1.to_i + 1}'")
|
|
28
|
+
end
|
|
29
|
+
File.open('imdb.gemspec', 'w') do |f|
|
|
30
|
+
lines.each do |line|
|
|
31
|
+
f.write(line)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
data/lib/imdb.rb
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
require 'open-uri'
|
|
2
|
+
require 'date'
|
|
3
|
+
require 'cgi'
|
|
4
|
+
require 'rubygems'
|
|
5
|
+
require 'hpricot'
|
|
6
|
+
require 'chronic'
|
|
7
|
+
|
|
8
|
+
require File.dirname(__FILE__) + '/imdb/imdb_search'
|
|
9
|
+
require File.dirname(__FILE__) + '/imdb/imdb_movie'
|
|
10
|
+
require File.dirname(__FILE__) + '/string_extensions'
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
class ImdbMovie
|
|
2
|
+
|
|
3
|
+
attr_accessor :id, :url, :title
|
|
4
|
+
|
|
5
|
+
def initialize(id, title = nil)
|
|
6
|
+
@id = id
|
|
7
|
+
@url = "http://www.imdb.com/title/tt#{self.id}/"
|
|
8
|
+
@title = title
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def director
|
|
12
|
+
document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def poster
|
|
16
|
+
document.at("a[@name='poster'] img")['src'] rescue nil
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def cast_members
|
|
20
|
+
document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def writers
|
|
24
|
+
document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def release_date
|
|
28
|
+
date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
|
|
29
|
+
Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
|
|
30
|
+
rescue
|
|
31
|
+
nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def genres
|
|
35
|
+
document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def plot
|
|
39
|
+
document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def length
|
|
43
|
+
document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def countries
|
|
47
|
+
document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def languages
|
|
51
|
+
document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def color
|
|
55
|
+
document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def company
|
|
59
|
+
document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def photos
|
|
63
|
+
document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def get_data
|
|
67
|
+
update_title
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private
|
|
71
|
+
|
|
72
|
+
def update_title
|
|
73
|
+
@title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def document
|
|
77
|
+
@document ||= Hpricot(open(self.url).read)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class ImdbSearch
|
|
2
|
+
|
|
3
|
+
attr_accessor :query
|
|
4
|
+
|
|
5
|
+
def initialize(query)
|
|
6
|
+
self.query = query
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def movies
|
|
10
|
+
@movies ||= document.search('a.l[@href*="/title/tt"]').map do |element|
|
|
11
|
+
ImdbMovie.new(element['href'][/\d+/], element.innerHTML.strip_tags)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
private
|
|
16
|
+
|
|
17
|
+
def document
|
|
18
|
+
@document ||= Hpricot(open("http://www.google.com/search?as_q=#{CGI::escape(query + ' inurl:title')}&num=20&as_sitesearch=imdb.com").read)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
require 'cgi'
|
|
2
|
+
require 'iconv'
|
|
3
|
+
|
|
4
|
+
module ImdbStringExtensions
|
|
5
|
+
|
|
6
|
+
def unescape_html
|
|
7
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def strip_tags
|
|
11
|
+
gsub(/<\/?[^>]*>/, "")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
String.send :include, ImdbStringExtensions
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
|
2
|
+
|
|
3
|
+
describe ImdbMovie do
|
|
4
|
+
|
|
5
|
+
describe 'Indiana Jones and the Last Crusade' do
|
|
6
|
+
|
|
7
|
+
before(:each) do
|
|
8
|
+
@imdb_movie = ImdbMovie.new('0097576', 'Indiana Jones and the Last Crusade (1989)')
|
|
9
|
+
@imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "should query IMDB url" do
|
|
13
|
+
@imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
|
|
14
|
+
@imdb_movie.send(:document)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "should get director" do
|
|
18
|
+
@imdb_movie.director.should == 'Steven Spielberg'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "should get the poster" do
|
|
22
|
+
@imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX95_SY140_.jpg'
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "should get cast members" do
|
|
26
|
+
@imdb_movie.cast_members.should include('Harrison Ford')
|
|
27
|
+
@imdb_movie.cast_members.should include('Sean Connery')
|
|
28
|
+
@imdb_movie.cast_members.should include('Denholm Elliott')
|
|
29
|
+
@imdb_movie.cast_members.should include('Alison Doody')
|
|
30
|
+
@imdb_movie.cast_members.should include('John Rhys-Davies')
|
|
31
|
+
@imdb_movie.cast_members.should_not include('more')
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "should get the writers" do
|
|
35
|
+
@imdb_movie.writers.should include('George Lucas')
|
|
36
|
+
@imdb_movie.writers.should include('Philip Kaufman')
|
|
37
|
+
@imdb_movie.writers.should_not include('more')
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "should get the release date" do
|
|
41
|
+
@imdb_movie.release_date.should be_an_instance_of(Date)
|
|
42
|
+
@imdb_movie.release_date.should == Date.new(1989, 9, 1)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
it "should get the genres" do
|
|
46
|
+
@imdb_movie.genres.should have(2).strings
|
|
47
|
+
@imdb_movie.genres.should include('Action')
|
|
48
|
+
@imdb_movie.genres.should include('Adventure')
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "should get the plot" do
|
|
52
|
+
@imdb_movie.plot.should == "When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana Jones must follow in his father's footsteps and stop the Nazis."
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it "should get the length" do
|
|
56
|
+
@imdb_movie.length.should == '127 min'
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "should get the countries" do
|
|
60
|
+
@imdb_movie.countries.should have(1).string
|
|
61
|
+
@imdb_movie.countries.should include('USA')
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "should get the languages" do
|
|
65
|
+
@imdb_movie.languages.should have(3).strings
|
|
66
|
+
@imdb_movie.languages.should include('English')
|
|
67
|
+
@imdb_movie.languages.should include('German')
|
|
68
|
+
@imdb_movie.languages.should include('Greek')
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "should get the color" do
|
|
72
|
+
@imdb_movie.color.should == 'Color'
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "should get the company" do
|
|
76
|
+
@imdb_movie.company.should == 'Lucasfilm'
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "should get some photos" do
|
|
80
|
+
@imdb_movie.photos.should have(10).strings
|
|
81
|
+
@imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMTY4MzY3OTY0MF5BMl5BanBnXkFtZTYwODM0OTE3._V1._CR82,0,320,320_SS90_.jpg')
|
|
82
|
+
@imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMjAwNTM4ODc3Nl5BMl5BanBnXkFtZTYwNzU0OTE3._V1._CR82,0,320,320_SS90_.jpg')
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
describe "title pre-caching & get_data" do
|
|
86
|
+
|
|
87
|
+
it "should have the original title before querying anything" do
|
|
88
|
+
@imdb_movie.title.should == 'Indiana Jones and the Last Crusade (1989)'
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
it "should have the updated title after calling get_data" do
|
|
92
|
+
@imdb_movie.get_data
|
|
93
|
+
@imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
describe 'Han robado una estrella' do
|
|
102
|
+
|
|
103
|
+
before(:each) do
|
|
104
|
+
@imdb_movie = ImdbMovie.new('0054961', 'Han robado una estrella')
|
|
105
|
+
@imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "should query IMDB url" do
|
|
109
|
+
@imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0054961/").and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
|
|
110
|
+
@imdb_movie.send(:document)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
it "should get director" do
|
|
114
|
+
@imdb_movie.director.should == 'Javier Setó'
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "should not get the poster" do
|
|
118
|
+
@imdb_movie.poster.should be_nil
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
it "should get cast members" do
|
|
122
|
+
@imdb_movie.cast_members.should include('Rafaela Aparicio')
|
|
123
|
+
@imdb_movie.cast_members.should include('Marujita Díaz')
|
|
124
|
+
@imdb_movie.cast_members.should include('Espartaco Santoni')
|
|
125
|
+
@imdb_movie.cast_members.should_not include('more')
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
it "should get the writers" do
|
|
129
|
+
@imdb_movie.writers.should have(1).string
|
|
130
|
+
@imdb_movie.writers.should include('Paulino Rodrigo')
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
it "should get the release date" do
|
|
134
|
+
@imdb_movie.release_date.should be_an_instance_of(Date)
|
|
135
|
+
@imdb_movie.release_date.should == Date.new(1963, 9, 9)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
it "should get the genres" do
|
|
139
|
+
@imdb_movie.genres.should == ['Comedy', 'Musical']
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it "should not get the plot" do
|
|
143
|
+
@imdb_movie.plot.should be_nil
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
it "should get the length" do
|
|
147
|
+
@imdb_movie.length.should == '93 min'
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
it "should get the countries" do
|
|
151
|
+
@imdb_movie.countries.should == ['Spain']
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
it "should get the languages" do
|
|
155
|
+
@imdb_movie.languages.should == ['Spanish']
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
it "should not get the color" do
|
|
159
|
+
@imdb_movie.color.should be_nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
it "should get the company" do
|
|
163
|
+
@imdb_movie.company.should == 'Brepi Films'
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
it "should not get any photos" do
|
|
167
|
+
@imdb_movie.photos.should be_empty
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
|
2
|
+
|
|
3
|
+
describe ImdbSearch do
|
|
4
|
+
|
|
5
|
+
describe 'Indiana Jones' do
|
|
6
|
+
|
|
7
|
+
before(:each) do
|
|
8
|
+
@imdb_search = ImdbSearch.new('indiana jones')
|
|
9
|
+
@imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_search.html"))
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
it "should query IMDB url" do
|
|
13
|
+
@imdb_search.should_receive(:open).with("http://www.google.com/search?as_q=indiana+jones+inurl%3Atitle&num=20&as_sitesearch=imdb.com").and_return(open("#{$samples_dir}/sample_search.html"))
|
|
14
|
+
@imdb_search.send(:document)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
describe "movies" do
|
|
18
|
+
|
|
19
|
+
it "should be a collection of ImdbMovie instances" do
|
|
20
|
+
@imdb_search.movies.should be_an_instance_of(Array)
|
|
21
|
+
@imdb_search.movies.should_not be_empty
|
|
22
|
+
@imdb_search.movies.each do |movie|
|
|
23
|
+
movie.should be_an_instance_of(ImdbMovie)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "should include 'Indiana Jones and the Last Crusade (1989)'" do
|
|
28
|
+
@imdb_search.movies.map { |m| m.title }.should include('Indiana Jones and the Last Crusade (1989)')
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "should have titles" do
|
|
32
|
+
@imdb_search.movies.each do |movie|
|
|
33
|
+
movie.title.should_not be_empty
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "should not have titles with HTML tags" do
|
|
38
|
+
@imdb_search.movies.each do |movie|
|
|
39
|
+
movie.title.should_not match(/<.+>/)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
end
|