langalex-imdb 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,70 @@
1
+
2
+ Imdb::Movie Indiana Jones and the Last Crusade
3
+ - should query IMDB url
4
+ - should get director
5
+ - should get the poster
6
+ - should get the rating
7
+ - should get cast members
8
+ - should get the writers
9
+ - should get the year
10
+ - should get the release date
11
+ - should get the genres
12
+ - should get the plot
13
+ - should get the length
14
+ - should get the countries
15
+ - should get the languages
16
+ - should get the color
17
+ - should get the company
18
+ - should get some photos
19
+ - should get the tagline
20
+ - should get the aspect ratio
21
+
22
+ Imdb::Movie Indiana Jones and the Last Crusade title pre-caching & get_data
23
+ - should have the original title before querying anything
24
+ - should have the updated title after calling get_data
25
+
26
+ Imdb::Movie Han robado una estrella
27
+ - should query IMDB url
28
+ - should get director
29
+ - should not get the poster
30
+ - should get cast members
31
+ - should get the writers
32
+ - should get the release date
33
+ - should get the genres
34
+ - should not get the plot
35
+ - should get the length
36
+ - should get the countries
37
+ - should get the languages
38
+ - should not get the color
39
+ - should get the company
40
+ - should not get any photos
41
+
42
+ Imdb::Search search that returns multiple movies
43
+ - should query IMDB url
44
+ - should not allow to change the query
45
+
46
+ Imdb::Search search that returns multiple movies movies
47
+ - should be a collection of Imdb::Movie instances
48
+ - should include 'Indiana Jones and the Last Crusade'
49
+ - should have titles
50
+ - should not have titles with HTML tags
51
+ - should not have duplicate movies
52
+
53
+ Imdb::Search search that redirects to the lone matching movie movies
54
+ - should be a collection containing a single Imdb::Movie instance
55
+ - should have the correct ID
56
+ - should have the correct title
57
+
58
+ Imdb::Search searches with potential encoding issues movies
59
+ - should include 'Misión en Marbella'
60
+
61
+ String unescape_html
62
+ - should convert & to &
63
+ - should convert ó to ó
64
+
65
+ String strip_tags
66
+ - should strip HTML tags
67
+
68
+ Finished in 0.829689 seconds
69
+
70
+ 48 examples, 0 failures
data/lib/imdb.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'open-uri'
2
+ require 'date'
3
+ require 'cgi'
4
+ require 'rubygems'
5
+ require 'hpricot'
6
+ require 'chronic'
7
+
8
+ require File.dirname(__FILE__) + '/imdb/imdb_search'
9
+ require File.dirname(__FILE__) + '/imdb/imdb_movie'
10
+ require File.dirname(__FILE__) + '/string_extensions'
@@ -0,0 +1,98 @@
1
+ module Imdb
2
+ class Movie
3
+
4
+ attr_accessor :id, :url, :title
5
+
6
+ def initialize(id, title = nil)
7
+ @id = id
8
+ @url = "http://www.imdb.com/title/tt#{self.id}/"
9
+ @title = title
10
+ end
11
+
12
+ def director
13
+ document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
14
+ end
15
+
16
+ def poster
17
+ document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
18
+ end
19
+
20
+ def rating
21
+ document.at(".general.rating b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
22
+ end
23
+
24
+ def cast_members
25
+ document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
26
+ end
27
+
28
+ def writers
29
+ document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
30
+ end
31
+
32
+ def year
33
+ document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
34
+ end
35
+
36
+ def release_date
37
+ date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
38
+ Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
39
+ rescue
40
+ nil
41
+ end
42
+
43
+ def genres
44
+ document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
45
+ end
46
+
47
+ def plot
48
+ document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
49
+ end
50
+
51
+ def tagline
52
+ document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
53
+ end
54
+
55
+ def aspect_ratio
56
+ document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
57
+ end
58
+
59
+ def length
60
+ document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
61
+ end
62
+
63
+ def countries
64
+ document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
65
+ end
66
+
67
+ def languages
68
+ document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
69
+ end
70
+
71
+ def color
72
+ document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
73
+ end
74
+
75
+ def company
76
+ document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
77
+ end
78
+
79
+ def photos
80
+ document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
81
+ end
82
+
83
+ def get_data
84
+ update_title
85
+ end
86
+
87
+ private
88
+
89
+ def update_title
90
+ @title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
91
+ end
92
+
93
+ def document
94
+ @document ||= Hpricot(open(self.url).read)
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,39 @@
1
+ module Imdb
2
+ class Search
3
+
4
+ def initialize(query)
5
+ @query = query
6
+ end
7
+
8
+ def movies
9
+ @movies ||= (exact_match? ? parse_movie : parse_movies)
10
+ end
11
+
12
+ private
13
+
14
+ def document
15
+ @document ||= Hpricot(open("http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt").read)
16
+ end
17
+
18
+ def parse_movies
19
+ document.search('a[@href^="/title/tt"]').reject do |element|
20
+ element.innerHTML.strip_tags.empty?
21
+ end.map do |element|
22
+ [element['href'][/\d+/], element.innerHTML.strip_tags.unescape_html]
23
+ end.uniq.map do |values|
24
+ Imdb::Movie.new(*values)
25
+ end
26
+ end
27
+
28
+ def parse_movie
29
+ id = document.at("a[@name='poster']")['href'][/\d+$/]
30
+ title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html
31
+ [Imdb::Movie.new(id, title)]
32
+ end
33
+
34
+ def exact_match?
35
+ document.at("title[text()='IMDb Search']").nil?
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,17 @@
1
+ require 'cgi'
2
+ require 'iconv'
3
+
4
+ module Imdb
5
+ module StringExtensions
6
+
7
+ def unescape_html
8
+ Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
9
+ end
10
+
11
+ def strip_tags
12
+ gsub(/<\/?[^>]*>/, "")
13
+ end
14
+ end
15
+ end
16
+
17
+ String.send :include, Imdb::StringExtensions
@@ -0,0 +1,190 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Imdb::Movie do
4
+
5
+ describe 'Indiana Jones and the Last Crusade' do
6
+
7
+ before(:each) do
8
+ @imdb_movie = Imdb::Movie.new('0097576', 'Indiana Jones and the Last Crusade')
9
+ @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
10
+ end
11
+
12
+ it "should query IMDB url" do
13
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
14
+ @imdb_movie.send(:document)
15
+ end
16
+
17
+ it "should get director" do
18
+ @imdb_movie.director.should == 'Steven Spielberg'
19
+ end
20
+
21
+ it "should get the poster" do
22
+ @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@.jpg'
23
+ end
24
+
25
+ it "should get the rating" do
26
+ @imdb_movie.rating.should == 8.3
27
+ end
28
+
29
+ it "should get cast members" do
30
+ @imdb_movie.cast_members.should include('Harrison Ford')
31
+ @imdb_movie.cast_members.should include('Sean Connery')
32
+ @imdb_movie.cast_members.should include('Denholm Elliott')
33
+ @imdb_movie.cast_members.should include('Alison Doody')
34
+ @imdb_movie.cast_members.should include('John Rhys-Davies')
35
+ @imdb_movie.cast_members.should_not include('more')
36
+ end
37
+
38
+ it "should get the writers" do
39
+ @imdb_movie.writers.should include('George Lucas')
40
+ @imdb_movie.writers.should include('Jeffrey Boam')
41
+ @imdb_movie.writers.should_not include('more')
42
+ end
43
+
44
+ it "should get the year" do
45
+ @imdb_movie.year.should == 1989
46
+ end
47
+
48
+ it "should get the release date" do
49
+ @imdb_movie.release_date.should be_an_instance_of(Date)
50
+ @imdb_movie.release_date.should == Date.new(1989, 9, 1)
51
+ end
52
+
53
+ it "should get the genres" do
54
+ @imdb_movie.genres.should have(2).strings
55
+ @imdb_movie.genres.should include('Action')
56
+ @imdb_movie.genres.should include('Adventure')
57
+ end
58
+
59
+ it "should get the plot" do
60
+ @imdb_movie.plot.should == "When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana Jones must follow in his father's footsteps and stop the Nazis."
61
+ end
62
+
63
+ it "should get the length" do
64
+ @imdb_movie.length.should == '127 min'
65
+ end
66
+
67
+ it "should get the countries" do
68
+ @imdb_movie.countries.should have(1).string
69
+ @imdb_movie.countries.should include('USA')
70
+ end
71
+
72
+ it "should get the languages" do
73
+ @imdb_movie.languages.should have(3).strings
74
+ @imdb_movie.languages.should include('English')
75
+ @imdb_movie.languages.should include('German')
76
+ @imdb_movie.languages.should include('Greek')
77
+ end
78
+
79
+ it "should get the color" do
80
+ @imdb_movie.color.should == 'Color'
81
+ end
82
+
83
+ it "should get the company" do
84
+ @imdb_movie.company.should == 'Paramount Pictures'
85
+ end
86
+
87
+ it "should get some photos" do
88
+ @imdb_movie.photos.should have(10).strings
89
+ @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMTY4MzY3OTY0MF5BMl5BanBnXkFtZTYwODM0OTE3._V1._CR82,0,320,320_SS90_.jpg')
90
+ @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMjAwNTM4ODc3Nl5BMl5BanBnXkFtZTYwNzU0OTE3._V1._CR82,0,320,320_SS90_.jpg')
91
+ end
92
+
93
+ it "should get the tagline" do
94
+ @imdb_movie.tagline.should == "He's back in an all new adventure. Memorial Day 1989."
95
+ end
96
+
97
+ it "should get the aspect ratio" do
98
+ @imdb_movie.aspect_ratio.should == "2.20 : 1"
99
+ end
100
+
101
+ describe "title pre-caching & get_data" do
102
+
103
+ it "should have the original title before querying anything" do
104
+ @imdb_movie.should_not_receive(:open)
105
+ @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
106
+ end
107
+
108
+ it "should have the updated title after calling get_data" do
109
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
110
+ @imdb_movie.get_data
111
+ @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
112
+ end
113
+
114
+ end
115
+
116
+ end
117
+
118
+
119
+ describe 'Han robado una estrella' do
120
+
121
+ before(:each) do
122
+ @imdb_movie = Imdb::Movie.new('0054961', 'Han robado una estrella')
123
+ @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
124
+ end
125
+
126
+ it "should query IMDB url" do
127
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0054961/").and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
128
+ @imdb_movie.send(:document)
129
+ end
130
+
131
+ it "should get director" do
132
+ @imdb_movie.director.should == 'Javier Setó'
133
+ end
134
+
135
+ it "should not get the poster" do
136
+ @imdb_movie.poster.should be_nil
137
+ end
138
+
139
+ it "should get cast members" do
140
+ @imdb_movie.cast_members.should include('Rafaela Aparicio')
141
+ @imdb_movie.cast_members.should include('Marujita Díaz')
142
+ @imdb_movie.cast_members.should include('Espartaco Santoni')
143
+ @imdb_movie.cast_members.should_not include('more')
144
+ end
145
+
146
+ it "should get the writers" do
147
+ @imdb_movie.writers.should have(1).string
148
+ @imdb_movie.writers.should include('Paulino Rodrigo')
149
+ end
150
+
151
+ it "should get the release date" do
152
+ @imdb_movie.release_date.should be_an_instance_of(Date)
153
+ @imdb_movie.release_date.should == Date.new(1963, 9, 9)
154
+ end
155
+
156
+ it "should get the genres" do
157
+ @imdb_movie.genres.should == ['Comedy', 'Musical']
158
+ end
159
+
160
+ it "should not get the plot" do
161
+ @imdb_movie.plot.should be_nil
162
+ end
163
+
164
+ it "should get the length" do
165
+ @imdb_movie.length.should == '93 min'
166
+ end
167
+
168
+ it "should get the countries" do
169
+ @imdb_movie.countries.should == ['Spain']
170
+ end
171
+
172
+ it "should get the languages" do
173
+ @imdb_movie.languages.should == ['Spanish']
174
+ end
175
+
176
+ it "should not get the color" do
177
+ @imdb_movie.color.should be_nil
178
+ end
179
+
180
+ it "should get the company" do
181
+ @imdb_movie.company.should == 'Brepi Films'
182
+ end
183
+
184
+ it "should not get any photos" do
185
+ @imdb_movie.photos.should be_empty
186
+ end
187
+
188
+ end
189
+
190
+ end
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Imdb::Search do
4
+
5
+ describe 'search that returns multiple movies' do
6
+
7
+ before(:each) do
8
+ @imdb_search = Imdb::Search.new('indiana jones')
9
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_search.html"))
10
+ end
11
+
12
+ it "should query IMDB url" do
13
+ @imdb_search.should_receive(:open).with("http://www.imdb.com/find?q=indiana+jones;s=tt").and_return(open("#{$samples_dir}/sample_search.html"))
14
+ @imdb_search.send(:document)
15
+ end
16
+
17
+ it "should not allow to change the query" do
18
+ lambda { @imdb_search.query = 'wadus' }.should raise_error(NoMethodError)
19
+ end
20
+
21
+ describe "movies" do
22
+
23
+ it "should be a collection of Imdb::Movie instances" do
24
+ @imdb_search.movies.should be_an_instance_of(Array)
25
+ @imdb_search.movies.should_not be_empty
26
+ @imdb_search.movies.each do |movie|
27
+ movie.should be_an_instance_of(Imdb::Movie)
28
+ end
29
+ end
30
+
31
+ it "should include 'Indiana Jones and the Last Crusade'" do
32
+ @imdb_search.movies.map { |m| m.title }.should include('Indiana Jones and the Last Crusade')
33
+ end
34
+
35
+ it "should have titles" do
36
+ @imdb_search.movies.each do |movie|
37
+ movie.title.should_not be_empty
38
+ end
39
+ end
40
+
41
+ it "should not have titles with HTML tags" do
42
+ @imdb_search.movies.each do |movie|
43
+ movie.title.should_not match(/<.+>/)
44
+ end
45
+ end
46
+
47
+ it "should not have duplicate movies" do
48
+ all_movie_ids = @imdb_search.movies.collect {|m| m.id}
49
+ unique_movie_ids = all_movie_ids.uniq
50
+ all_movie_ids.should == unique_movie_ids
51
+ end
52
+ end
53
+
54
+ end
55
+
56
+ describe 'search that redirects to the lone matching movie' do
57
+
58
+ before(:each) do
59
+ @imdb_search = Imdb::Search.new('some extremely specific search for indiana jones')
60
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
61
+ end
62
+
63
+ describe "movies" do
64
+
65
+ it "should be a collection containing a single Imdb::Movie instance" do
66
+ @imdb_search.movies.size.should == 1
67
+ @imdb_search.movies.first.should be_an_instance_of(Imdb::Movie)
68
+ end
69
+
70
+ it "should have the correct ID" do
71
+ @imdb_search.movies.first.id.should == '0097576'
72
+ end
73
+
74
+ it "should have the correct title" do
75
+ @imdb_search.movies.first.title.should == 'Indiana Jones and the Last Crusade'
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ describe 'searches with potential encoding issues' do
82
+
83
+ before(:each) do
84
+ @imdb_search = Imdb::Search.new('torrente')
85
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_spanish_search.html"))
86
+ end
87
+
88
+ describe "movies" do
89
+
90
+ it "should include 'Misión en Marbella'" do
91
+ @imdb_search.movies.map { |m| m.title }.should include('Misión en Marbella')
92
+ end
93
+
94
+ end
95
+
96
+ end
97
+
98
+ end
@@ -0,0 +1,3 @@
1
+ require File.dirname(__FILE__) + '/../lib/imdb'
2
+
3
+ $samples_dir = File.dirname(__FILE__) + '/samples'
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe String do
4
+
5
+ describe "unescape_html" do
6
+
7
+ it "should convert &amp; to &" do
8
+ "M&amp;M".unescape_html.should == 'M&M'
9
+ end
10
+
11
+ it "should convert &#243; to ó" do
12
+ "&#243;smosis".unescape_html.should == 'ósmosis'
13
+ end
14
+
15
+ end
16
+
17
+ describe "strip_tags" do
18
+
19
+ it "should strip HTML tags" do
20
+ "<em>Hola</em> hola".strip_tags.should == 'Hola hola'
21
+ end
22
+
23
+ end
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: langalex-imdb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.8
5
+ platform: ruby
6
+ authors:
7
+ - Sergio Gil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-09-09 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.6"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: chronic
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description:
36
+ email: sgilperez@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ files:
44
+ - README
45
+ - lib/imdb.rb
46
+ - lib/imdb/imdb_movie.rb
47
+ - lib/imdb/imdb_search.rb
48
+ - lib/string_extensions.rb
49
+ has_rdoc: false
50
+ homepage:
51
+ licenses:
52
+ post_install_message:
53
+ rdoc_options:
54
+ - --charset=UTF-8
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ version:
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.5
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Internet Movie DataBase
76
+ test_files:
77
+ - spec/imdb_movie_spec.rb
78
+ - spec/imdb_search_spec.rb
79
+ - spec/spec_helper.rb
80
+ - spec/string_extensions_spec.rb