imdb 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,7 @@
1
+ == 0.1.0 2009-06-03
2
+
3
+ * Added Imdb::Search that allows search IMDB for a specific movie.
4
+
1
5
  == 0.0.1 2009-06-03
2
6
 
3
7
  * First release of the IMDB gem.
@@ -4,11 +4,13 @@ README.rdoc
4
4
  Rakefile
5
5
  lib/imdb.rb
6
6
  lib/imdb/movie.rb
7
+ lib/imdb/search.rb
7
8
  lib/imdb/string_extensions.rb
8
9
  script/console
9
10
  script/destroy
10
11
  script/generate
11
12
  spec/imdb_movie_spec.rb
13
+ spec/imdb_search_spec.rb
12
14
  spec/spec.opts
13
15
  spec/spec_helper.rb
14
16
  tasks/rspec.rake
data/Rakefile CHANGED
@@ -17,7 +17,7 @@ $hoe = Hoe.new('imdb', Imdb::VERSION) do |p|
17
17
 
18
18
  p.clean_globs |= %w[**/.DS_Store tmp *.log]
19
19
  path = (p.rubyforge_name == p.name) ? p.rubyforge_name : "\#{p.rubyforge_name}/\#{p.name}"
20
- p.remote_rdoc_dir = File.join(path.gsub(/^#{p.rubyforge_name}\/?/,''), 'rdoc')
20
+ p.remote_rdoc_dir = 'clown'
21
21
  p.rsync_args = '-av --delete --ignore-errors'
22
22
  end
23
23
 
@@ -26,3 +26,14 @@ Dir['tasks/**/*.rake'].each { |t| load t }
26
26
 
27
27
  # TODO - want other tests/tasks run by default? Add them to the list
28
28
  # task :default => [:spec, :features]
29
+
30
+ remove_task :publish_docs
31
+
32
+ desc 'Publish RDoc to RubyForge.'
33
+ task :publish_docs => [:clean, :docs] do
34
+ local_dir = 'doc'
35
+ host = website_config["host"]
36
+ host = host ? "#{host}:" : ""
37
+ remote_dir = File.join(website_config["remote_dir"], "")
38
+ sh %{rsync -aCv #{local_dir}/ #{host}#{remote_dir}}
39
+ end
@@ -6,8 +6,9 @@ require 'httparty'
6
6
  require 'hpricot'
7
7
 
8
8
  require 'imdb/movie'
9
+ require 'imdb/search'
9
10
  require 'imdb/string_extensions'
10
11
 
11
12
  module Imdb
12
- VERSION = '0.0.1'
13
+ VERSION = '0.1.0'
13
14
  end
@@ -1,62 +1,87 @@
1
1
  module Imdb
2
2
 
3
+ # Represents a Movie on IMDB.com
3
4
  class Movie
4
5
  include HTTParty
5
6
 
6
- attr_accessor :id, :url
7
+ attr_accessor :id, :url, :title
7
8
 
8
- # Initialize a new IMDB movie object.
9
- def initialize(imdb_id)
9
+ # Initialize a new IMDB movie object with it's IMDB id (as a String)
10
+ #
11
+ # movie = Imdb::Movie.new("0095016")
12
+ #
13
+ # Imdb::Movie objects are lazy loading, meaning that no HTTP request
14
+ # will be performed when a new object is created. Only when you use an
15
+ # accessor that needs the remote data, a HTTP request is made (once).
16
+ #
17
+ def initialize(imdb_id, title = nil)
10
18
  @id = imdb_id
11
19
  @url = "http://www.imdb.com/title/tt#{imdb_id}/"
20
+ @title = title
12
21
  end
13
22
 
23
+ # Returns an array with cast members
14
24
  def cast_members
15
25
  document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
16
26
  end
17
27
 
28
+ # Returns the name of the director
18
29
  def director
19
30
  document.at("h5[text()='Director:'] ~ a").innerHTML.strip.imdb_unescape_html rescue nil
20
31
  end
21
32
 
33
+ # Returns an array of genres (as strings)
22
34
  def genres
23
35
  document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.imdb_unescape_html } rescue []
24
36
  end
25
37
 
38
+ # Returns the duration of the movie in minutes as an integer.
26
39
  def length
27
40
  document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/].to_i rescue nil
28
41
  end
29
42
 
43
+ # Returns a string containing the plot.
30
44
  def plot
31
45
  document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
32
46
  end
33
47
 
48
+ # Returns a string containing the URL to the movie poster.
34
49
  def poster
35
50
  document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
36
51
  end
37
52
 
53
+ # Returns a float containing the average user rating
38
54
  def rating
39
55
  document.at(".general.rating b").innerHTML.strip.imdb_unescape_html.split('/').first.to_f rescue nil
40
56
  end
41
57
 
58
+ # Returns a string containing the tagline
42
59
  def tagline
43
60
  document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.imdb_unescape_html rescue nil
44
61
  end
45
62
 
46
- def title
47
- document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
63
+ # Returns a string containing the title
64
+ def title(force_refresh = false)
65
+ if @title && !force_refresh
66
+ @title
67
+ else
68
+ @title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html rescue nil
69
+ end
48
70
  end
49
71
 
72
+ # Returns an integer containing the year (CCYY) the movie was released in.
50
73
  def year
51
74
  document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
52
75
  end
53
76
 
54
77
  private
55
78
 
79
+ # Returns a new Hpricot document for parsing.
56
80
  def document
57
81
  @document ||= Hpricot(Imdb::Movie.find_by_id(@id))
58
82
  end
59
83
 
84
+ # Use HTTParty to fetch the raw HTML for this movie.
60
85
  def self.find_by_id(imdb_id)
61
86
  get("http://www.imdb.com/title/tt#{imdb_id}/")
62
87
  end
@@ -0,0 +1,68 @@
1
+ module Imdb
2
+
3
+ # Search IMDB for a title
4
+ class Search
5
+ include HTTParty
6
+
7
+ # Initialize a new IMDB search with the specified query
8
+ #
9
+ # search = Imdb::Search.new("Star Trek")
10
+ #
11
+ # Imdb::Search is lazy loading, meaning that unless you access the +movies+
12
+ # attribute, no query is made to IMDB.com.
13
+ #
14
+ def initialize(query)
15
+ @query = query
16
+ end
17
+
18
+ # Returns an array of Imdb::Movie objects for easy search result yielded.
19
+ # If the +query+ was an exact match, a single element array will be returned.
20
+ def movies
21
+ @movies ||= (exact_match? ? parse_movie : parse_movies)
22
+ end
23
+
24
+ #private
25
+
26
+ def document
27
+ @document ||= Hpricot(Imdb::Search.query(@query))
28
+ end
29
+
30
+ def self.query(query)
31
+ get("http://www.imdb.com/find?q=#{CGI::escape(query)};s=tt")
32
+ end
33
+
34
+ def parse_movies
35
+ document.search('a[@href^="/title/tt"]').reject do |element|
36
+ element.innerHTML.imdb_strip_tags.empty? ||
37
+ element.parent.innerHTML =~ /media from/i
38
+ end.map do |element|
39
+ id = element['href'][/\d+/]
40
+
41
+ data = element.parent.innerHTML.split("<br />")
42
+ if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
43
+ title = data[1]
44
+ else
45
+ title = data[0]
46
+ end
47
+
48
+ title = title.imdb_strip_tags.imdb_unescape_html
49
+
50
+ [id, title]
51
+ end.uniq.map do |values|
52
+ Imdb::Movie.new(*values)
53
+ end
54
+ end
55
+
56
+ def parse_movie
57
+ id = document.at("a[@name='poster']")['href'][/\d+$/]
58
+ title = document.at("h1").innerHTML.split('<span').first.strip.imdb_unescape_html
59
+ [Imdb::Movie.new(id, title)]
60
+ end
61
+
62
+ # Returns true if the search yielded only one result, an exact match
63
+ def exact_match?
64
+ !document.at("//h3[text()^='Overview']/..").nil?
65
+ end
66
+
67
+ end # Search
68
+ end # Imdb
@@ -1,13 +1,16 @@
1
1
  require 'cgi'
2
2
  require 'iconv'
3
3
 
4
- module Imdb
4
+
5
+ module Imdb #:nordoc:
5
6
  module StringExtensions
6
7
 
8
+ # Unescape HTML
7
9
  def imdb_unescape_html
8
10
  Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
9
11
  end
10
12
 
13
+ # Strip tags
11
14
  def imdb_strip_tags
12
15
  gsub(/<\/?[^>]*>/, "")
13
16
  end
@@ -0,0 +1,47 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ ### WARNING: This spec uses live data!
4
+ #
5
+ # Many may object to testing against a live website, and for good reason.
6
+ # However, the IMDB interface changes over time, and to guarantee the parser
7
+ # works with the currently available IMDB website, tests are run against
8
+ # IMDB.com instead.
9
+ #
10
+ # This test searches for "Star Trek"
11
+ #
12
+ describe "Imdb::Search with multiple search results" do
13
+
14
+ before(:each) do
15
+ # Search for "Star Trek"
16
+ @search = Imdb::Search.new("Star Trek")
17
+ end
18
+
19
+ it "should find > 10 results" do
20
+ @search.movies.size.should > 10
21
+ end
22
+
23
+ it "should return Imdb::Movie objects only" do
24
+ @search.movies.each { |movie| movie.should be_an(Imdb::Movie) }
25
+ end
26
+
27
+ it "should not return movies with no title" do
28
+ @search.movies.each { |movie| movie.title.should_not be_blank }
29
+ end
30
+
31
+ end
32
+
33
+ describe "Imdb::Search with an exact match" do
34
+
35
+ before(:each) do
36
+ # Search for "Star Trek"
37
+ @search = Imdb::Search.new("Matrix Revolutions")
38
+ end
39
+
40
+ it "should find one result" do
41
+ @search.movies.size.should eql(1)
42
+ end
43
+
44
+ it "should have the corrected title" do
45
+ @search.movies.first.title.should =~ /The Matrix Revolutions/i
46
+ end
47
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: imdb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ariejan de Vroom
@@ -70,11 +70,13 @@ files:
70
70
  - Rakefile
71
71
  - lib/imdb.rb
72
72
  - lib/imdb/movie.rb
73
+ - lib/imdb/search.rb
73
74
  - lib/imdb/string_extensions.rb
74
75
  - script/console
75
76
  - script/destroy
76
77
  - script/generate
77
78
  - spec/imdb_movie_spec.rb
79
+ - spec/imdb_search_spec.rb
78
80
  - spec/spec.opts
79
81
  - spec/spec_helper.rb
80
82
  - tasks/rspec.rake