porras-imdb 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,62 @@
1
+
2
+ ImdbMovie
3
+
4
+ ImdbMovie Indiana Jones and the Last Crusade
5
+ - should query IMDB url
6
+ - should get director
7
+ - should get the poster
8
+ - should get cast members
9
+ - should get the writers
10
+ - should get the release date
11
+ - should get the genres
12
+ - should get the plot
13
+ - should get the length
14
+ - should get the countries
15
+ - should get the languages
16
+ - should get the color
17
+ - should get the company
18
+ - should get some photos
19
+
20
+ ImdbMovie Indiana Jones and the Last Crusade title pre-caching
21
+ - should have the original title before querying anything
22
+ - should have the updated title after querying something
23
+
24
+ ImdbMovie Han robado una estrella
25
+ - should query IMDB url
26
+ - should get director
27
+ - should not get the poster
28
+ - should get cast members
29
+ - should get the writers
30
+ - should get the release date
31
+ - should get the genres
32
+ - should not get the plot
33
+ - should get the length
34
+ - should get the countries
35
+ - should get the languages
36
+ - should not get the color
37
+ - should get the company
38
+ - should not get any photos
39
+
40
+ ImdbSearch
41
+
42
+ ImdbSearch Indiana Jones
43
+ - should query IMDB url
44
+
45
+ ImdbSearch Indiana Jones movies
46
+ - should be a collection of ImdbMovie instances
47
+ - should include 'Indiana Jones and the Last Crusade (1989)'
48
+ - should have titles
49
+ - should not have titles with HTML tags
50
+
51
+ String
52
+
53
+ String unescape_html
54
+ - should convert & to &
55
+ - should convert ó to ó
56
+
57
+ String strip_tags
58
+ - should strip HTML tags
59
+
60
+ Finished in 2.772724 seconds
61
+
62
+ 38 examples, 0 failures
@@ -0,0 +1,37 @@
1
+ require 'rake'
2
+ require 'spec/rake/spectask'
3
+
4
+ desc "Run all specs"
5
+ Spec::Rake::SpecTask.new('spec') do |t|
6
+ t.spec_files = FileList['spec/**/*.rb']
7
+ end
8
+
9
+ desc "Run all specs and generate HTML report"
10
+ Spec::Rake::SpecTask.new('spec:html') do |t|
11
+ t.spec_files = FileList['spec/**/*.rb']
12
+ t.spec_opts = ["--format", "html:spec.html"]
13
+ end
14
+
15
+ desc "Run all specs and dump the result to README"
16
+ Spec::Rake::SpecTask.new('spec:readme') do |t|
17
+ t.spec_files = FileList['spec/**/*.rb']
18
+ t.spec_opts = ["--format", "specdoc:README"]
19
+ end
20
+
21
+ namespace :gem do
22
+ desc "Increments the Gem version in imdb.gemspec"
23
+ task :increment do
24
+ lines = File.new('imdb.gemspec').readlines
25
+ lines.each do |line|
26
+ next unless line =~ /version = '\d+\.\d+\.(\d+)'/
27
+ line.gsub!(/\d+'/, "#{$1.to_i + 1}'")
28
+ end
29
+ File.open('imdb.gemspec', 'w') do |f|
30
+ lines.each do |line|
31
+ f.write(line)
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+
@@ -0,0 +1,10 @@
1
+ require 'open-uri'
2
+ require 'date'
3
+ require 'cgi'
4
+ require 'rubygems'
5
+ require 'hpricot'
6
+ require 'chronic'
7
+
8
+ require File.dirname(__FILE__) + '/imdb/imdb_search'
9
+ require File.dirname(__FILE__) + '/imdb/imdb_movie'
10
+ require File.dirname(__FILE__) + '/string_extensions'
@@ -0,0 +1,80 @@
1
+ class ImdbMovie
2
+
3
+ attr_accessor :id, :url, :title
4
+
5
+ def initialize(id, title = nil)
6
+ @id = id
7
+ @url = "http://www.imdb.com/title/tt#{self.id}/"
8
+ @title = title
9
+ end
10
+
11
+ def director
12
+ document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
13
+ end
14
+
15
+ def poster
16
+ document.at("a[@name='poster'] img")['src'] rescue nil
17
+ end
18
+
19
+ def cast_members
20
+ document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
21
+ end
22
+
23
+ def writers
24
+ document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
25
+ end
26
+
27
+ def release_date
28
+ date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
29
+ Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
30
+ rescue
31
+ nil
32
+ end
33
+
34
+ def genres
35
+ document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
36
+ end
37
+
38
+ def plot
39
+ document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
40
+ end
41
+
42
+ def length
43
+ document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
44
+ end
45
+
46
+ def countries
47
+ document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
48
+ end
49
+
50
+ def languages
51
+ document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
52
+ end
53
+
54
+ def color
55
+ document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
56
+ end
57
+
58
+ def company
59
+ document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
60
+ end
61
+
62
+ def photos
63
+ document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
64
+ end
65
+
66
+ def get_data
67
+ update_title
68
+ end
69
+
70
+ private
71
+
72
+ def update_title
73
+ @title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
74
+ end
75
+
76
+ def document
77
+ @document ||= Hpricot(open(self.url).read)
78
+ end
79
+
80
+ end
@@ -0,0 +1,21 @@
1
+ class ImdbSearch
2
+
3
+ attr_accessor :query
4
+
5
+ def initialize(query)
6
+ self.query = query
7
+ end
8
+
9
+ def movies
10
+ @movies ||= document.search('a.l[@href*="/title/tt"]').map do |element|
11
+ ImdbMovie.new(element['href'][/\d+/], element.innerHTML.strip_tags)
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ def document
18
+ @document ||= Hpricot(open("http://www.google.com/search?as_q=#{CGI::escape(query + ' inurl:title')}&num=20&as_sitesearch=imdb.com").read)
19
+ end
20
+
21
+ end
@@ -0,0 +1,16 @@
1
+ require 'cgi'
2
+ require 'iconv'
3
+
4
+ module ImdbStringExtensions
5
+
6
+ def unescape_html
7
+ Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
8
+ end
9
+
10
+ def strip_tags
11
+ gsub(/<\/?[^>]*>/, "")
12
+ end
13
+
14
+ end
15
+
16
+ String.send :include, ImdbStringExtensions
@@ -0,0 +1,172 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe ImdbMovie do
4
+
5
+ describe 'Indiana Jones and the Last Crusade' do
6
+
7
+ before(:each) do
8
+ @imdb_movie = ImdbMovie.new('0097576', 'Indiana Jones and the Last Crusade (1989)')
9
+ @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
10
+ end
11
+
12
+ it "should query IMDB url" do
13
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
14
+ @imdb_movie.send(:document)
15
+ end
16
+
17
+ it "should get director" do
18
+ @imdb_movie.director.should == 'Steven Spielberg'
19
+ end
20
+
21
+ it "should get the poster" do
22
+ @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX95_SY140_.jpg'
23
+ end
24
+
25
+ it "should get cast members" do
26
+ @imdb_movie.cast_members.should include('Harrison Ford')
27
+ @imdb_movie.cast_members.should include('Sean Connery')
28
+ @imdb_movie.cast_members.should include('Denholm Elliott')
29
+ @imdb_movie.cast_members.should include('Alison Doody')
30
+ @imdb_movie.cast_members.should include('John Rhys-Davies')
31
+ @imdb_movie.cast_members.should_not include('more')
32
+ end
33
+
34
+ it "should get the writers" do
35
+ @imdb_movie.writers.should include('George Lucas')
36
+ @imdb_movie.writers.should include('Philip Kaufman')
37
+ @imdb_movie.writers.should_not include('more')
38
+ end
39
+
40
+ it "should get the release date" do
41
+ @imdb_movie.release_date.should be_an_instance_of(Date)
42
+ @imdb_movie.release_date.should == Date.new(1989, 9, 1)
43
+ end
44
+
45
+ it "should get the genres" do
46
+ @imdb_movie.genres.should have(2).strings
47
+ @imdb_movie.genres.should include('Action')
48
+ @imdb_movie.genres.should include('Adventure')
49
+ end
50
+
51
+ it "should get the plot" do
52
+ @imdb_movie.plot.should == "When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana Jones must follow in his father's footsteps and stop the Nazis."
53
+ end
54
+
55
+ it "should get the length" do
56
+ @imdb_movie.length.should == '127 min'
57
+ end
58
+
59
+ it "should get the countries" do
60
+ @imdb_movie.countries.should have(1).string
61
+ @imdb_movie.countries.should include('USA')
62
+ end
63
+
64
+ it "should get the languages" do
65
+ @imdb_movie.languages.should have(3).strings
66
+ @imdb_movie.languages.should include('English')
67
+ @imdb_movie.languages.should include('German')
68
+ @imdb_movie.languages.should include('Greek')
69
+ end
70
+
71
+ it "should get the color" do
72
+ @imdb_movie.color.should == 'Color'
73
+ end
74
+
75
+ it "should get the company" do
76
+ @imdb_movie.company.should == 'Lucasfilm'
77
+ end
78
+
79
+ it "should get some photos" do
80
+ @imdb_movie.photos.should have(10).strings
81
+ @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMTY4MzY3OTY0MF5BMl5BanBnXkFtZTYwODM0OTE3._V1._CR82,0,320,320_SS90_.jpg')
82
+ @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMjAwNTM4ODc3Nl5BMl5BanBnXkFtZTYwNzU0OTE3._V1._CR82,0,320,320_SS90_.jpg')
83
+ end
84
+
85
+ describe "title pre-caching & get_data" do
86
+
87
+ it "should have the original title before querying anything" do
88
+ @imdb_movie.title.should == 'Indiana Jones and the Last Crusade (1989)'
89
+ end
90
+
91
+ it "should have the updated title after calling get_data" do
92
+ @imdb_movie.get_data
93
+ @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
94
+ end
95
+
96
+ end
97
+
98
+ end
99
+
100
+
101
+ describe 'Han robado una estrella' do
102
+
103
+ before(:each) do
104
+ @imdb_movie = ImdbMovie.new('0054961', 'Han robado una estrella')
105
+ @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
106
+ end
107
+
108
+ it "should query IMDB url" do
109
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0054961/").and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
110
+ @imdb_movie.send(:document)
111
+ end
112
+
113
+ it "should get director" do
114
+ @imdb_movie.director.should == 'Javier Setó'
115
+ end
116
+
117
+ it "should not get the poster" do
118
+ @imdb_movie.poster.should be_nil
119
+ end
120
+
121
+ it "should get cast members" do
122
+ @imdb_movie.cast_members.should include('Rafaela Aparicio')
123
+ @imdb_movie.cast_members.should include('Marujita Díaz')
124
+ @imdb_movie.cast_members.should include('Espartaco Santoni')
125
+ @imdb_movie.cast_members.should_not include('more')
126
+ end
127
+
128
+ it "should get the writers" do
129
+ @imdb_movie.writers.should have(1).string
130
+ @imdb_movie.writers.should include('Paulino Rodrigo')
131
+ end
132
+
133
+ it "should get the release date" do
134
+ @imdb_movie.release_date.should be_an_instance_of(Date)
135
+ @imdb_movie.release_date.should == Date.new(1963, 9, 9)
136
+ end
137
+
138
+ it "should get the genres" do
139
+ @imdb_movie.genres.should == ['Comedy', 'Musical']
140
+ end
141
+
142
+ it "should not get the plot" do
143
+ @imdb_movie.plot.should be_nil
144
+ end
145
+
146
+ it "should get the length" do
147
+ @imdb_movie.length.should == '93 min'
148
+ end
149
+
150
+ it "should get the countries" do
151
+ @imdb_movie.countries.should == ['Spain']
152
+ end
153
+
154
+ it "should get the languages" do
155
+ @imdb_movie.languages.should == ['Spanish']
156
+ end
157
+
158
+ it "should not get the color" do
159
+ @imdb_movie.color.should be_nil
160
+ end
161
+
162
+ it "should get the company" do
163
+ @imdb_movie.company.should == 'Brepi Films'
164
+ end
165
+
166
+ it "should not get any photos" do
167
+ @imdb_movie.photos.should be_empty
168
+ end
169
+
170
+ end
171
+
172
+ end
@@ -0,0 +1,47 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe ImdbSearch do
4
+
5
+ describe 'Indiana Jones' do
6
+
7
+ before(:each) do
8
+ @imdb_search = ImdbSearch.new('indiana jones')
9
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_search.html"))
10
+ end
11
+
12
+ it "should query IMDB url" do
13
+ @imdb_search.should_receive(:open).with("http://www.google.com/search?as_q=indiana+jones+inurl%3Atitle&num=20&as_sitesearch=imdb.com").and_return(open("#{$samples_dir}/sample_search.html"))
14
+ @imdb_search.send(:document)
15
+ end
16
+
17
+ describe "movies" do
18
+
19
+ it "should be a collection of ImdbMovie instances" do
20
+ @imdb_search.movies.should be_an_instance_of(Array)
21
+ @imdb_search.movies.should_not be_empty
22
+ @imdb_search.movies.each do |movie|
23
+ movie.should be_an_instance_of(ImdbMovie)
24
+ end
25
+ end
26
+
27
+ it "should include 'Indiana Jones and the Last Crusade (1989)'" do
28
+ @imdb_search.movies.map { |m| m.title }.should include('Indiana Jones and the Last Crusade (1989)')
29
+ end
30
+
31
+ it "should have titles" do
32
+ @imdb_search.movies.each do |movie|
33
+ movie.title.should_not be_empty
34
+ end
35
+ end
36
+
37
+ it "should not have titles with HTML tags" do
38
+ @imdb_search.movies.each do |movie|
39
+ movie.title.should_not match(/<.+>/)
40
+ end
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end