jasonrudolph-imdb 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,70 @@
1
+
2
+ ImdbMovie
3
+
4
+ ImdbMovie Indiana Jones and the Last Crusade
5
+ - should query IMDB url
6
+ - should get director
7
+ - should get the poster
8
+ - should get the rating
9
+ - should get cast members
10
+ - should get the writers
11
+ - should get the release date
12
+ - should get the genres
13
+ - should get the plot
14
+ - should get the length
15
+ - should get the countries
16
+ - should get the languages
17
+ - should get the color
18
+ - should get the company
19
+ - should get some photos
20
+ - should get the tagline
21
+ - should get the aspect ratio
22
+
23
+ ImdbMovie Indiana Jones and the Last Crusade title pre-caching & get_data
24
+ - should have the original title before querying anything
25
+ - should have the updated title after calling get_data
26
+
27
+ ImdbMovie Han robado una estrella
28
+ - should query IMDB url
29
+ - should get director
30
+ - should not get the poster
31
+ - should get cast members
32
+ - should get the writers
33
+ - should get the release date
34
+ - should get the genres
35
+ - should not get the plot
36
+ - should get the length
37
+ - should get the countries
38
+ - should get the languages
39
+ - should not get the color
40
+ - should get the company
41
+ - should not get any photos
42
+
43
+ ImdbSearch
44
+
45
+ ImdbSearch Indiana Jones
46
+ - should query IMDB url
47
+
48
+ ImdbSearch Indiana Jones movies
49
+ - should be a collection of ImdbMovie instances
50
+ - should include 'Indiana Jones and the Last Crusade'
51
+ - should have titles
52
+ - should not have titles with HTML tags
53
+
54
+ ImdbSearch searches with potential encoding issues
55
+
56
+ ImdbSearch searches with potential encoding issues movies
57
+ - should include 'Misión en Marbella'
58
+
59
+ String
60
+
61
+ String unescape_html
62
+ - should convert & to &
63
+ - should convert ó to ó
64
+
65
+ String strip_tags
66
+ - should strip HTML tags
67
+
68
+ Finished in 3.044303 seconds
69
+
70
+ 42 examples, 0 failures
@@ -0,0 +1,96 @@
1
+ class ImdbMovie
2
+
3
+ attr_accessor :id, :url, :title
4
+
5
+ def initialize(id, title = nil)
6
+ @id = id
7
+ @url = "http://www.imdb.com/title/tt#{self.id}/"
8
+ @title = title
9
+ end
10
+
11
+ def director
12
+ document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
13
+ end
14
+
15
+ def poster
16
+ document.at("a[@name='poster'] img")['src'] rescue nil
17
+ end
18
+
19
+ def rating
20
+ document.at("b[text()='User Rating:'] ~ b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
21
+ end
22
+
23
+ def cast_members
24
+ document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
25
+ end
26
+
27
+ def writers
28
+ document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
29
+ end
30
+
31
+ def year
32
+ document.search('a[@href^="/Sections/Years/"]').innerHTML
33
+ end
34
+
35
+ def release_date
36
+ date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
37
+ Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
38
+ rescue
39
+ nil
40
+ end
41
+
42
+ def genres
43
+ document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
44
+ end
45
+
46
+ def plot
47
+ document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
48
+ end
49
+
50
+ def tagline
51
+ document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
52
+ end
53
+
54
+ def aspect_ratio
55
+ document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
56
+ end
57
+
58
+ def length
59
+ document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
60
+ end
61
+
62
+ def countries
63
+ document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
64
+ end
65
+
66
+ def languages
67
+ document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
68
+ end
69
+
70
+ def color
71
+ document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
72
+ end
73
+
74
+ def company
75
+ document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
76
+ end
77
+
78
+ def photos
79
+ document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
80
+ end
81
+
82
+ def get_data
83
+ update_title
84
+ end
85
+
86
+ private
87
+
88
+ def update_title
89
+ @title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
90
+ end
91
+
92
+ def document
93
+ @document ||= Hpricot(open(self.url).read)
94
+ end
95
+
96
+ end
@@ -0,0 +1,44 @@
1
+ class ImdbSearch
2
+
3
+ attr_accessor :query
4
+
5
+ def initialize(query)
6
+ self.query = query
7
+ end
8
+
9
+ def movies
10
+ @movies ||= parse_movies_from_document
11
+ end
12
+
13
+ private
14
+
15
+ def document
16
+ @document ||= Hpricot(open("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt").read)
17
+ end
18
+
19
+ def parse_movies_from_document
20
+ exact_match? ? parse_exact_match_search_results : parse_multi_movie_search_results
21
+ end
22
+
23
+ def parse_exact_match_search_results
24
+ id = document.at("a[@name='poster']")['href'][/\d+$/]
25
+ title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
26
+ [ImdbMovie.new(id, title)]
27
+ end
28
+
29
+ def parse_multi_movie_search_results
30
+ ids_and_titles = document.search('a[@href^="/title/tt"]').reject do |element|
31
+ element.innerHTML.strip_tags.empty?
32
+ end.map do |element|
33
+ [element['href'][/\d+/], element.innerHTML.strip_tags.unescape_html]
34
+ end.uniq
35
+ ids_and_titles.map do |id_and_title|
36
+ ImdbMovie.new(id_and_title[0], id_and_title[1])
37
+ end
38
+ end
39
+
40
+ def exact_match?
41
+ document.search("title[text()='IMDb Title Search']").empty?
42
+ end
43
+
44
+ end
data/lib/imdb.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'open-uri'
2
+ require 'date'
3
+ require 'cgi'
4
+ require 'rubygems'
5
+ require 'hpricot'
6
+ require 'chronic'
7
+
8
+ require File.dirname(__FILE__) + '/imdb/imdb_search'
9
+ require File.dirname(__FILE__) + '/imdb/imdb_movie'
10
+ require File.dirname(__FILE__) + '/string_extensions'
@@ -0,0 +1,16 @@
1
+ require 'cgi'
2
+ require 'iconv'
3
+
4
+ module ImdbStringExtensions
5
+
6
+ def unescape_html
7
+ Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
8
+ end
9
+
10
+ def strip_tags
11
+ gsub(/<\/?[^>]*>/, "")
12
+ end
13
+
14
+ end
15
+
16
+ String.send :include, ImdbStringExtensions
metadata ADDED
@@ -0,0 +1,74 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jasonrudolph-imdb
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.6
5
+ platform: ruby
6
+ authors:
7
+ - Sergio Gil
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-27 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0.6"
23
+ version:
24
+ - !ruby/object:Gem::Dependency
25
+ name: chronic
26
+ version_requirement:
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: "0"
32
+ version:
33
+ description:
34
+ email: sgilperez@gmail.com
35
+ executables: []
36
+
37
+ extensions: []
38
+
39
+ extra_rdoc_files: []
40
+
41
+ files:
42
+ - lib/imdb/imdb_movie.rb
43
+ - lib/imdb/imdb_search.rb
44
+ - lib/imdb.rb
45
+ - lib/string_extensions.rb
46
+ - README
47
+ has_rdoc: false
48
+ homepage:
49
+ post_install_message:
50
+ rdoc_options: []
51
+
52
+ require_paths:
53
+ - lib
54
+ required_ruby_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: "0"
59
+ version:
60
+ required_rubygems_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: "0"
65
+ version:
66
+ requirements: []
67
+
68
+ rubyforge_project:
69
+ rubygems_version: 1.2.0
70
+ signing_key:
71
+ specification_version: 2
72
+ summary: Internet Movie DataBase
73
+ test_files: []
74
+