jasonrudolph-imdb 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +70 -0
- data/lib/imdb/imdb_movie.rb +96 -0
- data/lib/imdb/imdb_search.rb +44 -0
- data/lib/imdb.rb +10 -0
- data/lib/string_extensions.rb +16 -0
- metadata +74 -0
data/README
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
ImdbMovie
|
3
|
+
|
4
|
+
ImdbMovie Indiana Jones and the Last Crusade
|
5
|
+
- should query IMDB url
|
6
|
+
- should get director
|
7
|
+
- should get the poster
|
8
|
+
- should get the rating
|
9
|
+
- should get cast members
|
10
|
+
- should get the writers
|
11
|
+
- should get the release date
|
12
|
+
- should get the genres
|
13
|
+
- should get the plot
|
14
|
+
- should get the length
|
15
|
+
- should get the countries
|
16
|
+
- should get the languages
|
17
|
+
- should get the color
|
18
|
+
- should get the company
|
19
|
+
- should get some photos
|
20
|
+
- should get the tagline
|
21
|
+
- should get the aspect ratio
|
22
|
+
|
23
|
+
ImdbMovie Indiana Jones and the Last Crusade title pre-caching & get_data
|
24
|
+
- should have the original title before querying anything
|
25
|
+
- should have the updated title after calling get_data
|
26
|
+
|
27
|
+
ImdbMovie Han robado una estrella
|
28
|
+
- should query IMDB url
|
29
|
+
- should get director
|
30
|
+
- should not get the poster
|
31
|
+
- should get cast members
|
32
|
+
- should get the writers
|
33
|
+
- should get the release date
|
34
|
+
- should get the genres
|
35
|
+
- should not get the plot
|
36
|
+
- should get the length
|
37
|
+
- should get the countries
|
38
|
+
- should get the languages
|
39
|
+
- should not get the color
|
40
|
+
- should get the company
|
41
|
+
- should not get any photos
|
42
|
+
|
43
|
+
ImdbSearch
|
44
|
+
|
45
|
+
ImdbSearch Indiana Jones
|
46
|
+
- should query IMDB url
|
47
|
+
|
48
|
+
ImdbSearch Indiana Jones movies
|
49
|
+
- should be a collection of ImdbMovie instances
|
50
|
+
- should include 'Indiana Jones and the Last Crusade'
|
51
|
+
- should have titles
|
52
|
+
- should not have titles with HTML tags
|
53
|
+
|
54
|
+
ImdbSearch searches with potential encoding issues
|
55
|
+
|
56
|
+
ImdbSearch searches with potential encoding issues movies
|
57
|
+
- should include 'Misión en Marbella'
|
58
|
+
|
59
|
+
String
|
60
|
+
|
61
|
+
String unescape_html
|
62
|
+
- should convert & to &
|
63
|
+
- should convert ó to ó
|
64
|
+
|
65
|
+
String strip_tags
|
66
|
+
- should strip HTML tags
|
67
|
+
|
68
|
+
Finished in 3.044303 seconds
|
69
|
+
|
70
|
+
42 examples, 0 failures
|
@@ -0,0 +1,96 @@
|
|
1
|
+
class ImdbMovie
|
2
|
+
|
3
|
+
attr_accessor :id, :url, :title
|
4
|
+
|
5
|
+
def initialize(id, title = nil)
|
6
|
+
@id = id
|
7
|
+
@url = "http://www.imdb.com/title/tt#{self.id}/"
|
8
|
+
@title = title
|
9
|
+
end
|
10
|
+
|
11
|
+
def director
|
12
|
+
document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
|
13
|
+
end
|
14
|
+
|
15
|
+
def poster
|
16
|
+
document.at("a[@name='poster'] img")['src'] rescue nil
|
17
|
+
end
|
18
|
+
|
19
|
+
def rating
|
20
|
+
document.at("b[text()='User Rating:'] ~ b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
|
21
|
+
end
|
22
|
+
|
23
|
+
def cast_members
|
24
|
+
document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
25
|
+
end
|
26
|
+
|
27
|
+
def writers
|
28
|
+
document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
|
29
|
+
end
|
30
|
+
|
31
|
+
def year
|
32
|
+
document.search('a[@href^="/Sections/Years/"]').innerHTML
|
33
|
+
end
|
34
|
+
|
35
|
+
def release_date
|
36
|
+
date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
|
37
|
+
Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
|
38
|
+
rescue
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
def genres
|
43
|
+
document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
44
|
+
end
|
45
|
+
|
46
|
+
def plot
|
47
|
+
document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
|
48
|
+
end
|
49
|
+
|
50
|
+
def tagline
|
51
|
+
document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def aspect_ratio
|
55
|
+
document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
|
56
|
+
end
|
57
|
+
|
58
|
+
def length
|
59
|
+
document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def countries
|
63
|
+
document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
64
|
+
end
|
65
|
+
|
66
|
+
def languages
|
67
|
+
document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
|
68
|
+
end
|
69
|
+
|
70
|
+
def color
|
71
|
+
document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
|
72
|
+
end
|
73
|
+
|
74
|
+
def company
|
75
|
+
document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def photos
|
79
|
+
document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
|
80
|
+
end
|
81
|
+
|
82
|
+
def get_data
|
83
|
+
update_title
|
84
|
+
end
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def update_title
|
89
|
+
@title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
|
90
|
+
end
|
91
|
+
|
92
|
+
def document
|
93
|
+
@document ||= Hpricot(open(self.url).read)
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
class ImdbSearch
|
2
|
+
|
3
|
+
attr_accessor :query
|
4
|
+
|
5
|
+
def initialize(query)
|
6
|
+
self.query = query
|
7
|
+
end
|
8
|
+
|
9
|
+
def movies
|
10
|
+
@movies ||= parse_movies_from_document
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def document
|
16
|
+
@document ||= Hpricot(open("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt").read)
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_movies_from_document
|
20
|
+
exact_match? ? parse_exact_match_search_results : parse_multi_movie_search_results
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse_exact_match_search_results
|
24
|
+
id = document.at("a[@name='poster']")['href'][/\d+$/]
|
25
|
+
title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
|
26
|
+
[ImdbMovie.new(id, title)]
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_multi_movie_search_results
|
30
|
+
ids_and_titles = document.search('a[@href^="/title/tt"]').reject do |element|
|
31
|
+
element.innerHTML.strip_tags.empty?
|
32
|
+
end.map do |element|
|
33
|
+
[element['href'][/\d+/], element.innerHTML.strip_tags.unescape_html]
|
34
|
+
end.uniq
|
35
|
+
ids_and_titles.map do |id_and_title|
|
36
|
+
ImdbMovie.new(id_and_title[0], id_and_title[1])
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def exact_match?
|
41
|
+
document.search("title[text()='IMDb Title Search']").empty?
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
data/lib/imdb.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'date'
|
3
|
+
require 'cgi'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'chronic'
|
7
|
+
|
8
|
+
require File.dirname(__FILE__) + '/imdb/imdb_search'
|
9
|
+
require File.dirname(__FILE__) + '/imdb/imdb_movie'
|
10
|
+
require File.dirname(__FILE__) + '/string_extensions'
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'iconv'
|
3
|
+
|
4
|
+
module ImdbStringExtensions
|
5
|
+
|
6
|
+
def unescape_html
|
7
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
8
|
+
end
|
9
|
+
|
10
|
+
def strip_tags
|
11
|
+
gsub(/<\/?[^>]*>/, "")
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
String.send :include, ImdbStringExtensions
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jasonrudolph-imdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.6
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sergio Gil
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-10-27 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: "0.6"
|
23
|
+
version:
|
24
|
+
- !ruby/object:Gem::Dependency
|
25
|
+
name: chronic
|
26
|
+
version_requirement:
|
27
|
+
version_requirements: !ruby/object:Gem::Requirement
|
28
|
+
requirements:
|
29
|
+
- - ">="
|
30
|
+
- !ruby/object:Gem::Version
|
31
|
+
version: "0"
|
32
|
+
version:
|
33
|
+
description:
|
34
|
+
email: sgilperez@gmail.com
|
35
|
+
executables: []
|
36
|
+
|
37
|
+
extensions: []
|
38
|
+
|
39
|
+
extra_rdoc_files: []
|
40
|
+
|
41
|
+
files:
|
42
|
+
- lib/imdb/imdb_movie.rb
|
43
|
+
- lib/imdb/imdb_search.rb
|
44
|
+
- lib/imdb.rb
|
45
|
+
- lib/string_extensions.rb
|
46
|
+
- README
|
47
|
+
has_rdoc: false
|
48
|
+
homepage:
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
|
52
|
+
require_paths:
|
53
|
+
- lib
|
54
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ">="
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: "0"
|
59
|
+
version:
|
60
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
version:
|
66
|
+
requirements: []
|
67
|
+
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 1.2.0
|
70
|
+
signing_key:
|
71
|
+
specification_version: 2
|
72
|
+
summary: Internet Movie DataBase
|
73
|
+
test_files: []
|
74
|
+
|