langalex-imdb 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,70 @@
1
+
2
+ Imdb::Movie Indiana Jones and the Last Crusade
3
+ - should query IMDB url
4
+ - should get director
5
+ - should get the poster
6
+ - should get the rating
7
+ - should get cast members
8
+ - should get the writers
9
+ - should get the year
10
+ - should get the release date
11
+ - should get the genres
12
+ - should get the plot
13
+ - should get the length
14
+ - should get the countries
15
+ - should get the languages
16
+ - should get the color
17
+ - should get the company
18
+ - should get some photos
19
+ - should get the tagline
20
+ - should get the aspect ratio
21
+
22
+ Imdb::Movie Indiana Jones and the Last Crusade title pre-caching & get_data
23
+ - should have the original title before querying anything
24
+ - should have the updated title after calling get_data
25
+
26
+ Imdb::Movie Han robado una estrella
27
+ - should query IMDB url
28
+ - should get director
29
+ - should not get the poster
30
+ - should get cast members
31
+ - should get the writers
32
+ - should get the release date
33
+ - should get the genres
34
+ - should not get the plot
35
+ - should get the length
36
+ - should get the countries
37
+ - should get the languages
38
+ - should not get the color
39
+ - should get the company
40
+ - should not get any photos
41
+
42
+ Imdb::Search search that returns multiple movies
43
+ - should query IMDB url
44
+ - should not allow to change the query
45
+
46
+ Imdb::Search search that returns multiple movies movies
47
+ - should be a collection of Imdb::Movie instances
48
+ - should include 'Indiana Jones and the Last Crusade'
49
+ - should have titles
50
+ - should not have titles with HTML tags
51
+ - should not have duplicate movies
52
+
53
+ Imdb::Search search that redirects to the lone matching movie movies
54
+ - should be a collection containing a single Imdb::Movie instance
55
+ - should have the correct ID
56
+ - should have the correct title
57
+
58
+ Imdb::Search searches with potential encoding issues movies
59
+ - should include 'Misión en Marbella'
60
+
61
+ String unescape_html
62
+ - should convert & to &
63
+ - should convert ó to ó
64
+
65
+ String strip_tags
66
+ - should strip HTML tags
67
+
68
+ Finished in 0.829689 seconds
69
+
70
+ 48 examples, 0 failures
data/lib/imdb.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'open-uri'
2
+ require 'date'
3
+ require 'cgi'
4
+ require 'rubygems'
5
+ require 'hpricot'
6
+ require 'chronic'
7
+
8
+ require File.dirname(__FILE__) + '/imdb/imdb_search'
9
+ require File.dirname(__FILE__) + '/imdb/imdb_movie'
10
+ require File.dirname(__FILE__) + '/string_extensions'
@@ -0,0 +1,98 @@
1
+ module Imdb
2
+ class Movie
3
+
4
+ attr_accessor :id, :url, :title
5
+
6
+ def initialize(id, title = nil)
7
+ @id = id
8
+ @url = "http://www.imdb.com/title/tt#{self.id}/"
9
+ @title = title
10
+ end
11
+
12
+ def director
13
+ document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
14
+ end
15
+
16
+ def poster
17
+ document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
18
+ end
19
+
20
+ def rating
21
+ document.at(".general.rating b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
22
+ end
23
+
24
+ def cast_members
25
+ document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
26
+ end
27
+
28
+ def writers
29
+ document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
30
+ end
31
+
32
+ def year
33
+ document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
34
+ end
35
+
36
+ def release_date
37
+ date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
38
+ Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
39
+ rescue
40
+ nil
41
+ end
42
+
43
+ def genres
44
+ document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
45
+ end
46
+
47
+ def plot
48
+ document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
49
+ end
50
+
51
+ def tagline
52
+ document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
53
+ end
54
+
55
+ def aspect_ratio
56
+ document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
57
+ end
58
+
59
+ def length
60
+ document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
61
+ end
62
+
63
+ def countries
64
+ document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
65
+ end
66
+
67
+ def languages
68
+ document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
69
+ end
70
+
71
+ def color
72
+ document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
73
+ end
74
+
75
+ def company
76
+ document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
77
+ end
78
+
79
+ def photos
80
+ document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
81
+ end
82
+
83
+ def get_data
84
+ update_title
85
+ end
86
+
87
+ private
88
+
89
+ def update_title
90
+ @title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
91
+ end
92
+
93
+ def document
94
+ @document ||= Hpricot(open(self.url).read)
95
+ end
96
+
97
+ end
98
+ end
@@ -0,0 +1,39 @@
1
+ module Imdb
2
+ class Search
3
+
4
+ def initialize(query)
5
+ @query = query
6
+ end
7
+
8
+ def movies
9
+ @movies ||= (exact_match? ? parse_movie : parse_movies)
10
+ end
11
+
12
+ private
13
+
14
+ def document
15
+ @document ||= Hpricot(open("http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt").read)
16
+ end
17
+
18
+ def parse_movies
19
+ document.search('a[@href^="/title/tt"]').reject do |element|
20
+ element.innerHTML.strip_tags.empty?
21
+ end.map do |element|
22
+ [element['href'][/\d+/], element.innerHTML.strip_tags.unescape_html]
23
+ end.uniq.map do |values|
24
+ Imdb::Movie.new(*values)
25
+ end
26
+ end
27
+
28
+ def parse_movie
29
+ id = document.at("a[@name='poster']")['href'][/\d+$/]
30
+ title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html
31
+ [Imdb::Movie.new(id, title)]
32
+ end
33
+
34
+ def exact_match?
35
+ document.at("title[text()='IMDb Search']").nil?
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,17 @@
1
+ require 'cgi'
2
+ require 'iconv'
3
+
4
+ module Imdb
5
+ module StringExtensions
6
+
7
+ def unescape_html
8
+ Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
9
+ end
10
+
11
+ def strip_tags
12
+ gsub(/<\/?[^>]*>/, "")
13
+ end
14
+ end
15
+ end
16
+
17
+ String.send :include, Imdb::StringExtensions
@@ -0,0 +1,190 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Imdb::Movie do
4
+
5
+ describe 'Indiana Jones and the Last Crusade' do
6
+
7
+ before(:each) do
8
+ @imdb_movie = Imdb::Movie.new('0097576', 'Indiana Jones and the Last Crusade')
9
+ @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
10
+ end
11
+
12
+ it "should query IMDB url" do
13
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
14
+ @imdb_movie.send(:document)
15
+ end
16
+
17
+ it "should get director" do
18
+ @imdb_movie.director.should == 'Steven Spielberg'
19
+ end
20
+
21
+ it "should get the poster" do
22
+ @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@.jpg'
23
+ end
24
+
25
+ it "should get the rating" do
26
+ @imdb_movie.rating.should == 8.3
27
+ end
28
+
29
+ it "should get cast members" do
30
+ @imdb_movie.cast_members.should include('Harrison Ford')
31
+ @imdb_movie.cast_members.should include('Sean Connery')
32
+ @imdb_movie.cast_members.should include('Denholm Elliott')
33
+ @imdb_movie.cast_members.should include('Alison Doody')
34
+ @imdb_movie.cast_members.should include('John Rhys-Davies')
35
+ @imdb_movie.cast_members.should_not include('more')
36
+ end
37
+
38
+ it "should get the writers" do
39
+ @imdb_movie.writers.should include('George Lucas')
40
+ @imdb_movie.writers.should include('Jeffrey Boam')
41
+ @imdb_movie.writers.should_not include('more')
42
+ end
43
+
44
+ it "should get the year" do
45
+ @imdb_movie.year.should == 1989
46
+ end
47
+
48
+ it "should get the release date" do
49
+ @imdb_movie.release_date.should be_an_instance_of(Date)
50
+ @imdb_movie.release_date.should == Date.new(1989, 9, 1)
51
+ end
52
+
53
+ it "should get the genres" do
54
+ @imdb_movie.genres.should have(2).strings
55
+ @imdb_movie.genres.should include('Action')
56
+ @imdb_movie.genres.should include('Adventure')
57
+ end
58
+
59
+ it "should get the plot" do
60
+ @imdb_movie.plot.should == "When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana Jones must follow in his father's footsteps and stop the Nazis."
61
+ end
62
+
63
+ it "should get the length" do
64
+ @imdb_movie.length.should == '127 min'
65
+ end
66
+
67
+ it "should get the countries" do
68
+ @imdb_movie.countries.should have(1).string
69
+ @imdb_movie.countries.should include('USA')
70
+ end
71
+
72
+ it "should get the languages" do
73
+ @imdb_movie.languages.should have(3).strings
74
+ @imdb_movie.languages.should include('English')
75
+ @imdb_movie.languages.should include('German')
76
+ @imdb_movie.languages.should include('Greek')
77
+ end
78
+
79
+ it "should get the color" do
80
+ @imdb_movie.color.should == 'Color'
81
+ end
82
+
83
+ it "should get the company" do
84
+ @imdb_movie.company.should == 'Paramount Pictures'
85
+ end
86
+
87
+ it "should get some photos" do
88
+ @imdb_movie.photos.should have(10).strings
89
+ @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMTY4MzY3OTY0MF5BMl5BanBnXkFtZTYwODM0OTE3._V1._CR82,0,320,320_SS90_.jpg')
90
+ @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMjAwNTM4ODc3Nl5BMl5BanBnXkFtZTYwNzU0OTE3._V1._CR82,0,320,320_SS90_.jpg')
91
+ end
92
+
93
+ it "should get the tagline" do
94
+ @imdb_movie.tagline.should == "He's back in an all new adventure. Memorial Day 1989."
95
+ end
96
+
97
+ it "should get the aspect ratio" do
98
+ @imdb_movie.aspect_ratio.should == "2.20 : 1"
99
+ end
100
+
101
+ describe "title pre-caching & get_data" do
102
+
103
+ it "should have the original title before querying anything" do
104
+ @imdb_movie.should_not_receive(:open)
105
+ @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
106
+ end
107
+
108
+ it "should have the updated title after calling get_data" do
109
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
110
+ @imdb_movie.get_data
111
+ @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
112
+ end
113
+
114
+ end
115
+
116
+ end
117
+
118
+
119
+ describe 'Han robado una estrella' do
120
+
121
+ before(:each) do
122
+ @imdb_movie = Imdb::Movie.new('0054961', 'Han robado una estrella')
123
+ @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
124
+ end
125
+
126
+ it "should query IMDB url" do
127
+ @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0054961/").and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
128
+ @imdb_movie.send(:document)
129
+ end
130
+
131
+ it "should get director" do
132
+ @imdb_movie.director.should == 'Javier Setó'
133
+ end
134
+
135
+ it "should not get the poster" do
136
+ @imdb_movie.poster.should be_nil
137
+ end
138
+
139
+ it "should get cast members" do
140
+ @imdb_movie.cast_members.should include('Rafaela Aparicio')
141
+ @imdb_movie.cast_members.should include('Marujita Díaz')
142
+ @imdb_movie.cast_members.should include('Espartaco Santoni')
143
+ @imdb_movie.cast_members.should_not include('more')
144
+ end
145
+
146
+ it "should get the writers" do
147
+ @imdb_movie.writers.should have(1).string
148
+ @imdb_movie.writers.should include('Paulino Rodrigo')
149
+ end
150
+
151
+ it "should get the release date" do
152
+ @imdb_movie.release_date.should be_an_instance_of(Date)
153
+ @imdb_movie.release_date.should == Date.new(1963, 9, 9)
154
+ end
155
+
156
+ it "should get the genres" do
157
+ @imdb_movie.genres.should == ['Comedy', 'Musical']
158
+ end
159
+
160
+ it "should not get the plot" do
161
+ @imdb_movie.plot.should be_nil
162
+ end
163
+
164
+ it "should get the length" do
165
+ @imdb_movie.length.should == '93 min'
166
+ end
167
+
168
+ it "should get the countries" do
169
+ @imdb_movie.countries.should == ['Spain']
170
+ end
171
+
172
+ it "should get the languages" do
173
+ @imdb_movie.languages.should == ['Spanish']
174
+ end
175
+
176
+ it "should not get the color" do
177
+ @imdb_movie.color.should be_nil
178
+ end
179
+
180
+ it "should get the company" do
181
+ @imdb_movie.company.should == 'Brepi Films'
182
+ end
183
+
184
+ it "should not get any photos" do
185
+ @imdb_movie.photos.should be_empty
186
+ end
187
+
188
+ end
189
+
190
+ end
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe Imdb::Search do
4
+
5
+ describe 'search that returns multiple movies' do
6
+
7
+ before(:each) do
8
+ @imdb_search = Imdb::Search.new('indiana jones')
9
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_search.html"))
10
+ end
11
+
12
+ it "should query IMDB url" do
13
+ @imdb_search.should_receive(:open).with("http://www.imdb.com/find?q=indiana+jones;s=tt").and_return(open("#{$samples_dir}/sample_search.html"))
14
+ @imdb_search.send(:document)
15
+ end
16
+
17
+ it "should not allow to change the query" do
18
+ lambda { @imdb_search.query = 'wadus' }.should raise_error(NoMethodError)
19
+ end
20
+
21
+ describe "movies" do
22
+
23
+ it "should be a collection of Imdb::Movie instances" do
24
+ @imdb_search.movies.should be_an_instance_of(Array)
25
+ @imdb_search.movies.should_not be_empty
26
+ @imdb_search.movies.each do |movie|
27
+ movie.should be_an_instance_of(Imdb::Movie)
28
+ end
29
+ end
30
+
31
+ it "should include 'Indiana Jones and the Last Crusade'" do
32
+ @imdb_search.movies.map { |m| m.title }.should include('Indiana Jones and the Last Crusade')
33
+ end
34
+
35
+ it "should have titles" do
36
+ @imdb_search.movies.each do |movie|
37
+ movie.title.should_not be_empty
38
+ end
39
+ end
40
+
41
+ it "should not have titles with HTML tags" do
42
+ @imdb_search.movies.each do |movie|
43
+ movie.title.should_not match(/<.+>/)
44
+ end
45
+ end
46
+
47
+ it "should not have duplicate movies" do
48
+ all_movie_ids = @imdb_search.movies.collect {|m| m.id}
49
+ unique_movie_ids = all_movie_ids.uniq
50
+ all_movie_ids.should == unique_movie_ids
51
+ end
52
+ end
53
+
54
+ end
55
+
56
+ describe 'search that redirects to the lone matching movie' do
57
+
58
+ before(:each) do
59
+ @imdb_search = Imdb::Search.new('some extremely specific search for indiana jones')
60
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
61
+ end
62
+
63
+ describe "movies" do
64
+
65
+ it "should be a collection containing a single Imdb::Movie instance" do
66
+ @imdb_search.movies.size.should == 1
67
+ @imdb_search.movies.first.should be_an_instance_of(Imdb::Movie)
68
+ end
69
+
70
+ it "should have the correct ID" do
71
+ @imdb_search.movies.first.id.should == '0097576'
72
+ end
73
+
74
+ it "should have the correct title" do
75
+ @imdb_search.movies.first.title.should == 'Indiana Jones and the Last Crusade'
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ describe 'searches with potential encoding issues' do
82
+
83
+ before(:each) do
84
+ @imdb_search = Imdb::Search.new('torrente')
85
+ @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_spanish_search.html"))
86
+ end
87
+
88
+ describe "movies" do
89
+
90
+ it "should include 'Misión en Marbella'" do
91
+ @imdb_search.movies.map { |m| m.title }.should include('Misión en Marbella')
92
+ end
93
+
94
+ end
95
+
96
+ end
97
+
98
+ end
@@ -0,0 +1,3 @@
1
+ require File.dirname(__FILE__) + '/../lib/imdb'
2
+
3
+ $samples_dir = File.dirname(__FILE__) + '/samples'
@@ -0,0 +1,25 @@
1
+ require File.dirname(__FILE__) + '/spec_helper'
2
+
3
+ describe String do
4
+
5
+ describe "unescape_html" do
6
+
7
+ it "should convert &amp; to &" do
8
+ "M&amp;M".unescape_html.should == 'M&M'
9
+ end
10
+
11
+ it "should convert &#243; to ó" do
12
+ "&#243;smosis".unescape_html.should == 'ósmosis'
13
+ end
14
+
15
+ end
16
+
17
+ describe "strip_tags" do
18
+
19
+ it "should strip HTML tags" do
20
+ "<em>Hola</em> hola".strip_tags.should == 'Hola hola'
21
+ end
22
+
23
+ end
24
+
25
+ end
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: langalex-imdb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.8
5
+ platform: ruby
6
+ authors:
7
+ - Sergio Gil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-09-09 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.6"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: chronic
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description:
36
+ email: sgilperez@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - README
43
+ files:
44
+ - README
45
+ - lib/imdb.rb
46
+ - lib/imdb/imdb_movie.rb
47
+ - lib/imdb/imdb_search.rb
48
+ - lib/string_extensions.rb
49
+ has_rdoc: false
50
+ homepage:
51
+ licenses:
52
+ post_install_message:
53
+ rdoc_options:
54
+ - --charset=UTF-8
55
+ require_paths:
56
+ - lib
57
+ required_ruby_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ version:
69
+ requirements: []
70
+
71
+ rubyforge_project:
72
+ rubygems_version: 1.3.5
73
+ signing_key:
74
+ specification_version: 3
75
+ summary: Internet Movie DataBase
76
+ test_files:
77
+ - spec/imdb_movie_spec.rb
78
+ - spec/imdb_search_spec.rb
79
+ - spec/spec_helper.rb
80
+ - spec/string_extensions_spec.rb