imdb 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/Manifest.txt +2 -0
- data/Rakefile +12 -1
- data/lib/imdb.rb +2 -1
- data/lib/imdb/movie.rb +30 -5
- data/lib/imdb/search.rb +68 -0
- data/lib/imdb/string_extensions.rb +4 -1
- data/spec/imdb_search_spec.rb +47 -0
- metadata +3 -1
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -4,11 +4,13 @@ README.rdoc
|
|
4
4
|
Rakefile
|
5
5
|
lib/imdb.rb
|
6
6
|
lib/imdb/movie.rb
|
7
|
+
lib/imdb/search.rb
|
7
8
|
lib/imdb/string_extensions.rb
|
8
9
|
script/console
|
9
10
|
script/destroy
|
10
11
|
script/generate
|
11
12
|
spec/imdb_movie_spec.rb
|
13
|
+
spec/imdb_search_spec.rb
|
12
14
|
spec/spec.opts
|
13
15
|
spec/spec_helper.rb
|
14
16
|
tasks/rspec.rake
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ $hoe = Hoe.new('imdb', Imdb::VERSION) do |p|
|
|
17
17
|
|
18
18
|
p.clean_globs |= %w[**/.DS_Store tmp *.log]
|
19
19
|
path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
|
20
|
-
p.remote_rdoc_dir =
|
20
|
+
p.remote_rdoc_dir = 'clown'
|
21
21
|
p.rsync_args = '-av --delete --ignore-errors'
|
22
22
|
end
|
23
23
|
|
@@ -26,3 +26,14 @@ Dir['tasks/**/*.rake'].each { |t| load t }
|
|
26
26
|
|
27
27
|
# TODO - want other tests/tasks run by default? Add them to the list
|
28
28
|
# task :default => [:spec, :features]
|
29
|
+
|
30
|
+
remove_task :publish_docs
|
31
|
+
|
32
|
+
desc 'Publish RDoc to RubyForge.'
|
33
|
+
task :publish_docs => [:clean, :docs] do
|
34
|
+
local_dir = 'doc'
|
35
|
+
host = website_config["host"]
|
36
|
+
host = host ? "#{host}:" : ""
|
37
|
+
remote_dir = File.join(website_config["remote_dir"], "")
|
38
|
+
sh %{rsync -aCv #{local_dir}/ #{host}#{remote_dir}}
|
39
|
+
end
|
data/lib/imdb.rb
CHANGED
data/lib/imdb/movie.rb
CHANGED
@@ -1,62 +1,87 @@
|
|
1
1
|
module Imdb
|
2
2
|
|
3
|
+
# Represents a Movie on IMDB.com
|
3
4
|
class Movie
|
4
5
|
include HTTParty
|
5
6
|
|
6
|
-
attr_accessor :id, :url
|
7
|
+
attr_accessor :id, :url, :title
|
7
8
|
|
8
|
-
# Initialize a new IMDB movie object
|
9
|
-
|
9
|
+
# Initialize a new IMDB movie object with it's IMDB id (as a String)
|
10
|
+
#
|
11
|
+
# movie = Imdb::Movie.new("0095016")
|
12
|
+
#
|
13
|
+
# Imdb::Movie objects are lazy loading, meaning that no HTTP request
|
14
|
+
# will be performed when a new object is created. Only when you use an
|
15
|
+
# accessor that needs the remote data, a HTTP request is made (once).
|
16
|
+
#
|
17
|
+
def initialize(imdb_id, title = nil)
|
10
18
|
@id = imdb_id
|
11
19
|
@url = "http://www.imdb.com/title/tt#{imdb_id}/"
|
20
|
+
@title = title
|
12
21
|
end
|
13
22
|
|
23
|
+
# Returns an array with cast members
|
14
24
|
def cast_members
|
15
25
|
document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
16
26
|
end
|
17
27
|
|
28
|
+
# Returns the name of the director
|
18
29
|
def director
|
19
30
|
document.at("h5[text()='Director:'] ~ a").innerHTML.strip.imdb_unescape_html rescue nil
|
20
31
|
end
|
21
32
|
|
33
|
+
# Returns an array of genres (as strings)
|
22
34
|
def genres
|
23
35
|
document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
24
36
|
end
|
25
37
|
|
38
|
+
# Returns the duration of the movie in minutes as an integer.
|
26
39
|
def length
|
27
40
|
document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/].to_i rescue nil
|
28
41
|
end
|
29
42
|
|
43
|
+
# Returns a string containing the plot.
|
30
44
|
def plot
|
31
45
|
document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
32
46
|
end
|
33
47
|
|
48
|
+
# Returns a string containing the URL to the movie poster.
|
34
49
|
def poster
|
35
50
|
document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
|
36
51
|
end
|
37
52
|
|
53
|
+
# Returns a float containing the average user rating
|
38
54
|
def rating
|
39
55
|
document.at(".general.rating b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
|
40
56
|
end
|
41
57
|
|
58
|
+
# Returns a string containing the tagline
|
42
59
|
def tagline
|
43
60
|
document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
44
61
|
end
|
45
62
|
|
46
|
-
|
47
|
-
|
63
|
+
# Returns a string containing the title
|
64
|
+
def title(force_refresh = false)
|
65
|
+
if @title && !force_refresh
|
66
|
+
@title
|
67
|
+
else
|
68
|
+
@title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
|
69
|
+
end
|
48
70
|
end
|
49
71
|
|
72
|
+
# Returns an integer containing the year (CCYY) the movie was released in.
|
50
73
|
def year
|
51
74
|
document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
|
52
75
|
end
|
53
76
|
|
54
77
|
private
|
55
78
|
|
79
|
+
# Returns a new Hpricot document for parsing.
|
56
80
|
def document
|
57
81
|
@document ||= Hpricot(Imdb::Movie.find_by_id(@id))
|
58
82
|
end
|
59
83
|
|
84
|
+
# Use HTTParty to fetch the raw HTML for this movie.
|
60
85
|
def self.find_by_id(imdb_id)
|
61
86
|
get("http://www.imdb.com/title/tt#{imdb_id}/")
|
62
87
|
end
|
data/lib/imdb/search.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
# Search IMDB for a title
|
4
|
+
class Search
|
5
|
+
include HTTParty
|
6
|
+
|
7
|
+
# Initialize a new IMDB search with the specified query
|
8
|
+
#
|
9
|
+
# search = Imdb::Search.new("Star Trek")
|
10
|
+
#
|
11
|
+
# Imdb::Search is lazy loading, meaning that unless you access the +movies+
|
12
|
+
# attribute, no query is made to IMDB.com.
|
13
|
+
#
|
14
|
+
def initialize(query)
|
15
|
+
@query = query
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns an array of Imdb::Movie objects for easy search result yielded.
|
19
|
+
# If the +query+ was an exact match, a single element array will be returned.
|
20
|
+
def movies
|
21
|
+
@movies ||= (exact_match? ? parse_movie : parse_movies)
|
22
|
+
end
|
23
|
+
|
24
|
+
#private
|
25
|
+
|
26
|
+
def document
|
27
|
+
@document ||= Hpricot(Imdb::Search.query(@query))
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.query(query)
|
31
|
+
get("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt")
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_movies
|
35
|
+
document.search('a[@href^="/title/tt"]').reject do |element|
|
36
|
+
element.innerHTML.imdb_strip_tags.empty? ||
|
37
|
+
element.parent.innerHTML =~ /media from/i
|
38
|
+
end.map do |element|
|
39
|
+
id = element['href'][/\d+/]
|
40
|
+
|
41
|
+
data = element.parent.innerHTML.split("<br />")
|
42
|
+
if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
|
43
|
+
title = data[1]
|
44
|
+
else
|
45
|
+
title = data[0]
|
46
|
+
end
|
47
|
+
|
48
|
+
title = title.imdb_strip_tags.imdb_unescape_html
|
49
|
+
|
50
|
+
[id, title]
|
51
|
+
end.uniq.map do |values|
|
52
|
+
Imdb::Movie.new(*values)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_movie
|
57
|
+
id = document.at("a[@name='poster']")['href'][/\d+$/]
|
58
|
+
title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
|
59
|
+
[Imdb::Movie.new(id, title)]
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns true if the search yielded only one result, an exact match
|
63
|
+
def exact_match?
|
64
|
+
!document.at("//h3[text()^='Overview']/..").nil?
|
65
|
+
end
|
66
|
+
|
67
|
+
end # Search
|
68
|
+
end # Imdb
|
@@ -1,13 +1,16 @@
|
|
1
1
|
require 'cgi'
|
2
2
|
require 'iconv'
|
3
3
|
|
4
|
-
|
4
|
+
|
5
|
+
module Imdb #:nordoc:
|
5
6
|
module StringExtensions
|
6
7
|
|
8
|
+
# Unescape HTML
|
7
9
|
def imdb_unescape_html
|
8
10
|
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
9
11
|
end
|
10
12
|
|
13
|
+
# Strip tags
|
11
14
|
def imdb_strip_tags
|
12
15
|
gsub(/<\/?[^>]*>/, "")
|
13
16
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
### WARNING: This spec uses live data!
|
4
|
+
#
|
5
|
+
# Many may object to testing against a live website, and for good reason.
|
6
|
+
# However, the IMDB interface changes over time, and to guarantee the parser
|
7
|
+
# works with the currently available IMDB website, tests are run against
|
8
|
+
# IMDB.com instead.
|
9
|
+
#
|
10
|
+
# This test searches for "Star Trek"
|
11
|
+
#
|
12
|
+
describe "Imdb::Search with multiple search results" do
|
13
|
+
|
14
|
+
before(:each) do
|
15
|
+
# Search for "Star Trek"
|
16
|
+
@search = Imdb::Search.new("Star Trek")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should find > 10 results" do
|
20
|
+
@search.movies.size.should > 10
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should return Imdb::Movie objects only" do
|
24
|
+
@search.movies.each { |movie| movie.should be_an(Imdb::Movie) }
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should not return movies with no title" do
|
28
|
+
@search.movies.each { |movie| movie.title.should_not be_blank }
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "Imdb::Search with an exact match" do
|
34
|
+
|
35
|
+
before(:each) do
|
36
|
+
# Search for "Star Trek"
|
37
|
+
@search = Imdb::Search.new("Matrix Revolutions")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should find one result" do
|
41
|
+
@search.movies.size.should eql(1)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have the corrected title" do
|
45
|
+
@search.movies.first.title.should =~ /The Matrix Revolutions/i
|
46
|
+
end
|
47
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: imdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ariejan de Vroom
|
@@ -70,11 +70,13 @@ files:
|
|
70
70
|
- Rakefile
|
71
71
|
- lib/imdb.rb
|
72
72
|
- lib/imdb/movie.rb
|
73
|
+
- lib/imdb/search.rb
|
73
74
|
- lib/imdb/string_extensions.rb
|
74
75
|
- script/console
|
75
76
|
- script/destroy
|
76
77
|
- script/generate
|
77
78
|
- spec/imdb_movie_spec.rb
|
79
|
+
- spec/imdb_search_spec.rb
|
78
80
|
- spec/spec.opts
|
79
81
|
- spec/spec_helper.rb
|
80
82
|
- tasks/rspec.rake
|