imdb 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,7 @@
1
+ == 0.1.0 2009-06-03
2
+
3
+ * Added Imdb::Search that allows search IMDB for a specific movie.
4
+
1
5
  == 0.0.1 2009-06-03
2
6
 
3
7
  * First release of the IMDB gem.
@@ -4,11 +4,13 @@ README.rdoc
4
4
  Rakefile
5
5
  lib/imdb.rb
6
6
  lib/imdb/movie.rb
7
+ lib/imdb/search.rb
7
8
  lib/imdb/string_extensions.rb
8
9
  script/console
9
10
  script/destroy
10
11
  script/generate
11
12
  spec/imdb_movie_spec.rb
13
+ spec/imdb_search_spec.rb
12
14
  spec/spec.opts
13
15
  spec/spec_helper.rb
14
16
  tasks/rspec.rake
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ $hoe = Hoe.new('imdb', Imdb::VERSION) do |p|
17
17
 
18
18
  p.clean_globs |= %w[**/.DS_Store tmp *.log]
19
19
  path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
20
- p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
20
+ p.remote_rdoc_dir = 'clown'
21
21
  p.rsync_args = '-av --delete --ignore-errors'
22
22
  end
23
23
 
@@ -26,3 +26,14 @@ Dir['tasks/**/*.rake'].each { |t| load t }
26
26
 
27
27
  # TODO - want other tests/tasks run by default? Add them to the list
28
28
  # task :default => [:spec, :features]
29
+
30
+ remove_task :publish_docs
31
+
32
+ desc 'Publish RDoc to RubyForge.'
33
+ task :publish_docs => [:clean, :docs] do
34
+ local_dir = 'doc'
35
+ host = website_config["host"]
36
+ host = host ? "#{host}:" : ""
37
+ remote_dir = File.join(website_config["remote_dir"], "")
38
+ sh %{rsync -aCv #{local_dir}/ #{host}#{remote_dir}}
39
+ end
@@ -6,8 +6,9 @@ require 'httparty'
6
6
  require 'hpricot'
7
7
 
8
8
  require 'imdb/movie'
9
+ require 'imdb/search'
9
10
  require 'imdb/string_extensions'
10
11
 
11
12
  module Imdb
12
- VERSION = '0.0.1'
13
+ VERSION = '0.1.0'
13
14
  end
@@ -1,62 +1,87 @@
1
1
  module Imdb
2
2
 
3
+ # Represents a Movie on IMDB.com
3
4
  class Movie
4
5
  include HTTParty
5
6
 
6
- attr_accessor :id, :url
7
+ attr_accessor :id, :url, :title
7
8
 
8
- # Initialize a new IMDB movie object.
9
- def initialize(imdb_id)
9
+ # Initialize a new IMDB movie object with it's IMDB id (as a String)
10
+ #
11
+ # movie = Imdb::Movie.new("0095016")
12
+ #
13
+ # Imdb::Movie objects are lazy loading, meaning that no HTTP request
14
+ # will be performed when a new object is created. Only when you use an
15
+ # accessor that needs the remote data, a HTTP request is made (once).
16
+ #
17
+ def initialize(imdb_id, title = nil)
10
18
  @id = imdb_id
11
19
  @url = "http://www.imdb.com/title/tt#{imdb_id}/"
20
+ @title = title
12
21
  end
13
22
 
23
+ # Returns an array with cast members
14
24
  def cast_members
15
25
  document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
16
26
  end
17
27
 
28
+ # Returns the name of the director
18
29
  def director
19
30
  document.at("h5[text()='Director:'] ~ a").innerHTML.strip.imdb_unescape_html rescue nil
20
31
  end
21
32
 
33
+ # Returns an array of genres (as strings)
22
34
  def genres
23
35
  document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
24
36
  end
25
37
 
38
+ # Returns the duration of the movie in minutes as an integer.
26
39
  def length
27
40
  document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/].to_i rescue nil
28
41
  end
29
42
 
43
+ # Returns a string containing the plot.
30
44
  def plot
31
45
  document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
32
46
  end
33
47
 
48
+ # Returns a string containing the URL to the movie poster.
34
49
  def poster
35
50
  document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
36
51
  end
37
52
 
53
+ # Returns a float containing the average user rating
38
54
  def rating
39
55
  document.at(".general.rating b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
40
56
  end
41
57
 
58
+ # Returns a string containing the tagline
42
59
  def tagline
43
60
  document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
44
61
  end
45
62
 
46
- def title
47
- document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
63
+ # Returns a string containing the title
64
+ def title(force_refresh = false)
65
+ if @title && !force_refresh
66
+ @title
67
+ else
68
+ @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
69
+ end
48
70
  end
49
71
 
72
+ # Returns an integer containing the year (CCYY) the movie was released in.
50
73
  def year
51
74
  document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
52
75
  end
53
76
 
54
77
  private
55
78
 
79
+ # Returns a new Hpricot document for parsing.
56
80
  def document
57
81
  @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
58
82
  end
59
83
 
84
+ # Use HTTParty to fetch the raw HTML for this movie.
60
85
  def self.find_by_id(imdb_id)
61
86
  get("http://www.imdb.com/title/tt#{imdb_id}/")
62
87
  end
@@ -0,0 +1,68 @@
1
+ module Imdb
2
+
3
+ # Search IMDB for a title
4
+ class Search
5
+ include HTTParty
6
+
7
+ # Initialize a new IMDB search with the specified query
8
+ #
9
+ # search = Imdb::Search.new("Star Trek")
10
+ #
11
+ # Imdb::Search is lazy loading, meaning that unless you access the +movies+
12
+ # attribute, no query is made to IMDB.com.
13
+ #
14
+ def initialize(query)
15
+ @query = query
16
+ end
17
+
18
+ # Returns an array of Imdb::Movie objects for easy search result yielded.
19
+ # If the +query+ was an exact match, a single element array will be returned.
20
+ def movies
21
+ @movies ||= (exact_match? ? parse_movie : parse_movies)
22
+ end
23
+
24
+ #private
25
+
26
+ def document
27
+ @document ||= Hpricot(Imdb::Search.query(@query))
28
+ end
29
+
30
+ def self.query(query)
31
+ get("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt")
32
+ end
33
+
34
+ def parse_movies
35
+ document.search('a[@href^="/title/tt"]').reject do |element|
36
+ element.innerHTML.imdb_strip_tags.empty? ||
37
+ element.parent.innerHTML =~ /media from/i
38
+ end.map do |element|
39
+ id = element['href'][/\d+/]
40
+
41
+ data = element.parent.innerHTML.split("<br />")
42
+ if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
43
+ title = data[1]
44
+ else
45
+ title = data[0]
46
+ end
47
+
48
+ title = title.imdb_strip_tags.imdb_unescape_html
49
+
50
+ [id, title]
51
+ end.uniq.map do |values|
52
+ Imdb::Movie.new(*values)
53
+ end
54
+ end
55
+
56
+ def parse_movie
57
+ id = document.at("a[@name='poster']")['href'][/\d+$/]
58
+ title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
59
+ [Imdb::Movie.new(id, title)]
60
+ end
61
+
62
+ # Returns true if the search yielded only one result, an exact match
63
+ def exact_match?
64
+ !document.at("//h3[text()^='Overview']/..").nil?
65
+ end
66
+
67
+ end # Search
68
+ end # Imdb
@@ -1,13 +1,16 @@
1
1
  require 'cgi'
2
2
  require 'iconv'
3
3
 
4
- module Imdb
4
+
5
+ module Imdb #:nordoc:
5
6
  module StringExtensions
6
7
 
8
+ # Unescape HTML
7
9
  def imdb_unescape_html
8
10
  Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
9
11
  end
10
12
 
13
+ # Strip tags
11
14
  def imdb_strip_tags
12
15
  gsub(/<\/?[^>]*>/, "")
13
16
  end
@@ -0,0 +1,47 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ ### WARNING: This spec uses live data!
4
+ #
5
+ # Many may object to testing against a live website, and for good reason.
6
+ # However, the IMDB interface changes over time, and to guarantee the parser
7
+ # works with the currently available IMDB website, tests are run against
8
+ # IMDB.com instead.
9
+ #
10
+ # This test searches for "Star Trek"
11
+ #
12
+ describe "Imdb::Search with multiple search results" do
13
+
14
+ before(:each) do
15
+ # Search for "Star Trek"
16
+ @search = Imdb::Search.new("Star Trek")
17
+ end
18
+
19
+ it "should find > 10 results" do
20
+ @search.movies.size.should > 10
21
+ end
22
+
23
+ it "should return Imdb::Movie objects only" do
24
+ @search.movies.each { |movie| movie.should be_an(Imdb::Movie) }
25
+ end
26
+
27
+ it "should not return movies with no title" do
28
+ @search.movies.each { |movie| movie.title.should_not be_blank }
29
+ end
30
+
31
+ end
32
+
33
+ describe "Imdb::Search with an exact match" do
34
+
35
+ before(:each) do
36
+ # Search for "Star Trek"
37
+ @search = Imdb::Search.new("Matrix Revolutions")
38
+ end
39
+
40
+ it "should find one result" do
41
+ @search.movies.size.should eql(1)
42
+ end
43
+
44
+ it "should have the corrected title" do
45
+ @search.movies.first.title.should =~ /The Matrix Revolutions/i
46
+ end
47
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ariejan de Vroom
@@ -70,11 +70,13 @@ files:
70
70
  - Rakefile
71
71
  - lib/imdb.rb
72
72
  - lib/imdb/movie.rb
73
+ - lib/imdb/search.rb
73
74
  - lib/imdb/string_extensions.rb
74
75
  - script/console
75
76
  - script/destroy
76
77
  - script/generate
77
78
  - spec/imdb_movie_spec.rb
79
+ - spec/imdb_search_spec.rb
78
80
  - spec/spec.opts
79
81
  - spec/spec_helper.rb
80
82
  - tasks/rspec.rake