imdb 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/Manifest.txt +2 -0
- data/Rakefile +12 -1
- data/lib/imdb.rb +2 -1
- data/lib/imdb/movie.rb +30 -5
- data/lib/imdb/search.rb +68 -0
- data/lib/imdb/string_extensions.rb +4 -1
- data/spec/imdb_search_spec.rb +47 -0
- metadata +3 -1
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -4,11 +4,13 @@ README.rdoc
|
|
4
4
|
Rakefile
|
5
5
|
lib/imdb.rb
|
6
6
|
lib/imdb/movie.rb
|
7
|
+
lib/imdb/search.rb
|
7
8
|
lib/imdb/string_extensions.rb
|
8
9
|
script/console
|
9
10
|
script/destroy
|
10
11
|
script/generate
|
11
12
|
spec/imdb_movie_spec.rb
|
13
|
+
spec/imdb_search_spec.rb
|
12
14
|
spec/spec.opts
|
13
15
|
spec/spec_helper.rb
|
14
16
|
tasks/rspec.rake
|
data/Rakefile
CHANGED
@@ -17,7 +17,7 @@ $hoe = Hoe.new('imdb', Imdb::VERSION) do |p|
|
|
17
17
|
|
18
18
|
p.clean_globs |= %w[**/.DS_Store tmp *.log]
|
19
19
|
path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
|
20
|
-
p.remote_rdoc_dir =
|
20
|
+
p.remote_rdoc_dir = 'clown'
|
21
21
|
p.rsync_args = '-av --delete --ignore-errors'
|
22
22
|
end
|
23
23
|
|
@@ -26,3 +26,14 @@ Dir['tasks/**/*.rake'].each { |t| load t }
|
|
26
26
|
|
27
27
|
# TODO - want other tests/tasks run by default? Add them to the list
|
28
28
|
# task :default => [:spec, :features]
|
29
|
+
|
30
|
+
remove_task :publish_docs
|
31
|
+
|
32
|
+
desc 'Publish RDoc to RubyForge.'
|
33
|
+
task :publish_docs => [:clean, :docs] do
|
34
|
+
local_dir = 'doc'
|
35
|
+
host = website_config["host"]
|
36
|
+
host = host ? "#{host}:" : ""
|
37
|
+
remote_dir = File.join(website_config["remote_dir"], "")
|
38
|
+
sh %{rsync -aCv #{local_dir}/ #{host}#{remote_dir}}
|
39
|
+
end
|
data/lib/imdb.rb
CHANGED
data/lib/imdb/movie.rb
CHANGED
@@ -1,62 +1,87 @@
|
|
1
1
|
module Imdb
|
2
2
|
|
3
|
+
# Represents a Movie on IMDB.com
|
3
4
|
class Movie
|
4
5
|
include HTTParty
|
5
6
|
|
6
|
-
attr_accessor :id, :url
|
7
|
+
attr_accessor :id, :url, :title
|
7
8
|
|
8
|
-
# Initialize a new IMDB movie object
|
9
|
-
|
9
|
+
# Initialize a new IMDB movie object with it's IMDB id (as a String)
|
10
|
+
#
|
11
|
+
# movie = Imdb::Movie.new("0095016")
|
12
|
+
#
|
13
|
+
# Imdb::Movie objects are lazy loading, meaning that no HTTP request
|
14
|
+
# will be performed when a new object is created. Only when you use an
|
15
|
+
# accessor that needs the remote data, a HTTP request is made (once).
|
16
|
+
#
|
17
|
+
def initialize(imdb_id, title = nil)
|
10
18
|
@id = imdb_id
|
11
19
|
@url = "http://www.imdb.com/title/tt#{imdb_id}/"
|
20
|
+
@title = title
|
12
21
|
end
|
13
22
|
|
23
|
+
# Returns an array with cast members
|
14
24
|
def cast_members
|
15
25
|
document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
16
26
|
end
|
17
27
|
|
28
|
+
# Returns the name of the director
|
18
29
|
def director
|
19
30
|
document.at("h5[text()='Director:'] ~ a").innerHTML.strip.imdb_unescape_html rescue nil
|
20
31
|
end
|
21
32
|
|
33
|
+
# Returns an array of genres (as strings)
|
22
34
|
def genres
|
23
35
|
document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
|
24
36
|
end
|
25
37
|
|
38
|
+
# Returns the duration of the movie in minutes as an integer.
|
26
39
|
def length
|
27
40
|
document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/].to_i rescue nil
|
28
41
|
end
|
29
42
|
|
43
|
+
# Returns a string containing the plot.
|
30
44
|
def plot
|
31
45
|
document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
32
46
|
end
|
33
47
|
|
48
|
+
# Returns a string containing the URL to the movie poster.
|
34
49
|
def poster
|
35
50
|
document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
|
36
51
|
end
|
37
52
|
|
53
|
+
# Returns a float containing the average user rating
|
38
54
|
def rating
|
39
55
|
document.at(".general.rating b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
|
40
56
|
end
|
41
57
|
|
58
|
+
# Returns a string containing the tagline
|
42
59
|
def tagline
|
43
60
|
document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
|
44
61
|
end
|
45
62
|
|
46
|
-
|
47
|
-
|
63
|
+
# Returns a string containing the title
|
64
|
+
def title(force_refresh = false)
|
65
|
+
if @title && !force_refresh
|
66
|
+
@title
|
67
|
+
else
|
68
|
+
@title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
|
69
|
+
end
|
48
70
|
end
|
49
71
|
|
72
|
+
# Returns an integer containing the year (CCYY) the movie was released in.
|
50
73
|
def year
|
51
74
|
document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
|
52
75
|
end
|
53
76
|
|
54
77
|
private
|
55
78
|
|
79
|
+
# Returns a new Hpricot document for parsing.
|
56
80
|
def document
|
57
81
|
@document ||= Hpricot(Imdb::Movie.find_by_id(@id))
|
58
82
|
end
|
59
83
|
|
84
|
+
# Use HTTParty to fetch the raw HTML for this movie.
|
60
85
|
def self.find_by_id(imdb_id)
|
61
86
|
get("http://www.imdb.com/title/tt#{imdb_id}/")
|
62
87
|
end
|
data/lib/imdb/search.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
# Search IMDB for a title
|
4
|
+
class Search
|
5
|
+
include HTTParty
|
6
|
+
|
7
|
+
# Initialize a new IMDB search with the specified query
|
8
|
+
#
|
9
|
+
# search = Imdb::Search.new("Star Trek")
|
10
|
+
#
|
11
|
+
# Imdb::Search is lazy loading, meaning that unless you access the +movies+
|
12
|
+
# attribute, no query is made to IMDB.com.
|
13
|
+
#
|
14
|
+
def initialize(query)
|
15
|
+
@query = query
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns an array of Imdb::Movie objects for easy search result yielded.
|
19
|
+
# If the +query+ was an exact match, a single element array will be returned.
|
20
|
+
def movies
|
21
|
+
@movies ||= (exact_match? ? parse_movie : parse_movies)
|
22
|
+
end
|
23
|
+
|
24
|
+
#private
|
25
|
+
|
26
|
+
def document
|
27
|
+
@document ||= Hpricot(Imdb::Search.query(@query))
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.query(query)
|
31
|
+
get("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt")
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_movies
|
35
|
+
document.search('a[@href^="/title/tt"]').reject do |element|
|
36
|
+
element.innerHTML.imdb_strip_tags.empty? ||
|
37
|
+
element.parent.innerHTML =~ /media from/i
|
38
|
+
end.map do |element|
|
39
|
+
id = element['href'][/\d+/]
|
40
|
+
|
41
|
+
data = element.parent.innerHTML.split("<br />")
|
42
|
+
if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
|
43
|
+
title = data[1]
|
44
|
+
else
|
45
|
+
title = data[0]
|
46
|
+
end
|
47
|
+
|
48
|
+
title = title.imdb_strip_tags.imdb_unescape_html
|
49
|
+
|
50
|
+
[id, title]
|
51
|
+
end.uniq.map do |values|
|
52
|
+
Imdb::Movie.new(*values)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def parse_movie
|
57
|
+
id = document.at("a[@name='poster']")['href'][/\d+$/]
|
58
|
+
title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
|
59
|
+
[Imdb::Movie.new(id, title)]
|
60
|
+
end
|
61
|
+
|
62
|
+
# Returns true if the search yielded only one result, an exact match
|
63
|
+
def exact_match?
|
64
|
+
!document.at("//h3[text()^='Overview']/..").nil?
|
65
|
+
end
|
66
|
+
|
67
|
+
end # Search
|
68
|
+
end # Imdb
|
@@ -1,13 +1,16 @@
|
|
1
1
|
require 'cgi'
|
2
2
|
require 'iconv'
|
3
3
|
|
4
|
-
|
4
|
+
|
5
|
+
module Imdb #:nordoc:
|
5
6
|
module StringExtensions
|
6
7
|
|
8
|
+
# Unescape HTML
|
7
9
|
def imdb_unescape_html
|
8
10
|
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
9
11
|
end
|
10
12
|
|
13
|
+
# Strip tags
|
11
14
|
def imdb_strip_tags
|
12
15
|
gsub(/<\/?[^>]*>/, "")
|
13
16
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/spec_helper.rb'
|
2
|
+
|
3
|
+
### WARNING: This spec uses live data!
|
4
|
+
#
|
5
|
+
# Many may object to testing against a live website, and for good reason.
|
6
|
+
# However, the IMDB interface changes over time, and to guarantee the parser
|
7
|
+
# works with the currently available IMDB website, tests are run against
|
8
|
+
# IMDB.com instead.
|
9
|
+
#
|
10
|
+
# This test searches for "Star Trek"
|
11
|
+
#
|
12
|
+
describe "Imdb::Search with multiple search results" do
|
13
|
+
|
14
|
+
before(:each) do
|
15
|
+
# Search for "Star Trek"
|
16
|
+
@search = Imdb::Search.new("Star Trek")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should find > 10 results" do
|
20
|
+
@search.movies.size.should > 10
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should return Imdb::Movie objects only" do
|
24
|
+
@search.movies.each { |movie| movie.should be_an(Imdb::Movie) }
|
25
|
+
end
|
26
|
+
|
27
|
+
it "should not return movies with no title" do
|
28
|
+
@search.movies.each { |movie| movie.title.should_not be_blank }
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "Imdb::Search with an exact match" do
|
34
|
+
|
35
|
+
before(:each) do
|
36
|
+
# Search for "Star Trek"
|
37
|
+
@search = Imdb::Search.new("Matrix Revolutions")
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should find one result" do
|
41
|
+
@search.movies.size.should eql(1)
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should have the corrected title" do
|
45
|
+
@search.movies.first.title.should =~ /The Matrix Revolutions/i
|
46
|
+
end
|
47
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: imdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ariejan de Vroom
|
@@ -70,11 +70,13 @@ files:
|
|
70
70
|
- Rakefile
|
71
71
|
- lib/imdb.rb
|
72
72
|
- lib/imdb/movie.rb
|
73
|
+
- lib/imdb/search.rb
|
73
74
|
- lib/imdb/string_extensions.rb
|
74
75
|
- script/console
|
75
76
|
- script/destroy
|
76
77
|
- script/generate
|
77
78
|
- spec/imdb_movie_spec.rb
|
79
|
+
- spec/imdb_search_spec.rb
|
78
80
|
- spec/spec.opts
|
79
81
|
- spec/spec_helper.rb
|
80
82
|
- tasks/rspec.rake
|