RubyGems - langalex-imdb - Versions diffs - 0.0.8 - Mend

langalex-imdb 0.0.8

Files changed (10) hide show

data/README +70 -0
data/lib/imdb.rb +10 -0
data/lib/imdb/imdb_movie.rb +98 -0
data/lib/imdb/imdb_search.rb +39 -0
data/lib/string_extensions.rb +17 -0
data/spec/imdb_movie_spec.rb +190 -0
data/spec/imdb_search_spec.rb +98 -0
data/spec/spec_helper.rb +3 -0
data/spec/string_extensions_spec.rb +25 -0
metadata +80 -0

data/README ADDED Viewed

@@ -0,0 +1,70 @@
+Imdb::Movie Indiana Jones and the Last Crusade
+- should query IMDB url
+- should get director
+- should get the poster
+- should get the rating
+- should get cast members
+- should get the writers
+- should get the year
+- should get the release date
+- should get the genres
+- should get the plot
+- should get the length
+- should get the countries
+- should get the languages
+- should get the color
+- should get the company
+- should get some photos
+- should get the tagline
+- should get the aspect ratio
+Imdb::Movie Indiana Jones and the Last Crusade title pre-caching & get_data
+- should have the original title before querying anything
+- should have the updated title after calling get_data
+Imdb::Movie Han robado una estrella
+- should query IMDB url
+- should get director
+- should not get the poster
+- should get cast members
+- should get the writers
+- should get the release date
+- should get the genres
+- should not get the plot
+- should get the length
+- should get the countries
+- should get the languages
+- should not get the color
+- should get the company
+- should not get any photos
+Imdb::Search search that returns multiple movies
+- should query IMDB url
+- should not allow to change the query
+Imdb::Search search that returns multiple movies movies
+- should be a collection of Imdb::Movie instances
+- should include 'Indiana Jones and the Last Crusade'
+- should have titles
+- should not have titles with HTML tags
+- should not have duplicate movies
+Imdb::Search search that redirects to the lone matching movie movies
+- should be a collection containing a single Imdb::Movie instance
+- should have the correct ID
+- should have the correct title
+Imdb::Search searches with potential encoding issues movies
+- should include 'Misión en Marbella'
+String unescape_html
+- should convert &amp; to &
+- should convert &#243; to ó
+String strip_tags
+- should strip HTML tags
+Finished in 0.829689 seconds
+48 examples, 0 failures

data/lib/imdb.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require 'open-uri'
+require 'date'
+require 'cgi'
+require 'rubygems'
+require 'hpricot'
+require 'chronic'
+require File.dirname(__FILE__) + '/imdb/imdb_search'
+require File.dirname(__FILE__) + '/imdb/imdb_movie'
+require File.dirname(__FILE__) + '/string_extensions'

data/lib/imdb/imdb_movie.rb ADDED Viewed

@@ -0,0 +1,98 @@
+module Imdb
+  class Movie
+    attr_accessor :id, :url, :title
+    def initialize(id, title = nil)
+      @id = id
+      @url = "http://www.imdb.com/title/tt#{self.id}/"
+      @title = title
+    end
+    def director
+      document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
+    end
+    def poster
+      document.at("a[@name='poster'] img")['src'][/http:.+@@/] + '.jpg' rescue nil
+    end
+    def rating
+      document.at(".general.rating b").innerHTML.strip.unescape_html.split('/').first.to_f rescue nil
+    end
+    def cast_members
+      document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
+    end
+    def writers
+      document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
+    end
+    def year
+      document.search('a[@href^="/Sections/Years/"]').innerHTML.to_i
+    end
+    def release_date
+      date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
+      Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
+    rescue
+      nil
+    end
+    def genres
+      document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
+    end
+    def plot
+      document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
+    end
+    def tagline
+      document.search("//h5[text()^='Tagline']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
+    end
+    def aspect_ratio
+      document.search("//h5[text()^='Aspect Ratio']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
+    end
+    def length
+      document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
+    end
+    def countries
+      document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
+    end
+    def languages
+      document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
+    end
+    def color
+      document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
+    end
+    def company
+      document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
+    end
+    def photos
+      document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
+    end
+    def get_data
+      update_title
+    end
+    private
+    def update_title
+      @title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
+    end
+    def document
+      @document ||= Hpricot(open(self.url).read)
+    end
+  end
+end

data/lib/imdb/imdb_search.rb ADDED Viewed

@@ -0,0 +1,39 @@
+module Imdb
+  class Search
+    def initialize(query)
+      @query = query
+    end
+    def movies
+      @movies ||= (exact_match? ? parse_movie : parse_movies)
+    end
+    private
+    def document
+      @document ||= Hpricot(open("http://www.imdb.com/find?q=#{CGI::escape(@query)};s=tt").read)
+    end
+    def parse_movies
+      document.search('a[@href^="/title/tt"]').reject do |element|
+        element.innerHTML.strip_tags.empty?
+      end.map do |element|
+        [element['href'][/\d+/], element.innerHTML.strip_tags.unescape_html]
+      end.uniq.map do |values|
+        Imdb::Movie.new(*values)
+      end
+    end
+    def parse_movie
+      id = document.at("a[@name='poster']")['href'][/\d+$/]
+      title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html
+      [Imdb::Movie.new(id, title)]
+    end
+    def exact_match?
+      document.at("title[text()='IMDb Search']").nil?
+    end
+  end
+end

data/lib/string_extensions.rb ADDED Viewed

@@ -0,0 +1,17 @@
+require 'cgi'
+require 'iconv'
+module Imdb
+  module StringExtensions
+    def unescape_html
+      Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
+    end
+    def strip_tags
+      gsub(/<\/?[^>]*>/, "")
+    end
+  end
+end
+String.send :include, Imdb::StringExtensions

data/spec/imdb_movie_spec.rb ADDED Viewed

@@ -0,0 +1,190 @@
+require File.dirname(__FILE__) + '/spec_helper'
+describe Imdb::Movie do
+  describe 'Indiana Jones and the Last Crusade' do
+    before(:each) do
+      @imdb_movie = Imdb::Movie.new('0097576', 'Indiana Jones and the Last Crusade')
+      @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
+    end
+    it "should query IMDB url" do
+      @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
+      @imdb_movie.send(:document)
+    end
+    it "should get director" do
+      @imdb_movie.director.should == 'Steven Spielberg'
+    end
+    it "should get the poster" do
+      @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@.jpg'
+    end
+    it "should get the rating" do
+      @imdb_movie.rating.should == 8.3
+    end
+    it "should get cast members" do
+      @imdb_movie.cast_members.should include('Harrison Ford')
+      @imdb_movie.cast_members.should include('Sean Connery')
+      @imdb_movie.cast_members.should include('Denholm Elliott')
+      @imdb_movie.cast_members.should include('Alison Doody')
+      @imdb_movie.cast_members.should include('John Rhys-Davies')
+      @imdb_movie.cast_members.should_not include('more')
+    end
+    it "should get the writers" do
+      @imdb_movie.writers.should include('George Lucas')
+      @imdb_movie.writers.should include('Jeffrey Boam')
+      @imdb_movie.writers.should_not include('more')
+    end
+    it "should get the year" do
+      @imdb_movie.year.should == 1989
+    end
+    it "should get the release date" do
+      @imdb_movie.release_date.should be_an_instance_of(Date)
+      @imdb_movie.release_date.should == Date.new(1989, 9, 1)
+    end
+    it "should get the genres" do
+      @imdb_movie.genres.should have(2).strings
+      @imdb_movie.genres.should include('Action')
+      @imdb_movie.genres.should include('Adventure')
+    end
+    it "should get the plot" do
+      @imdb_movie.plot.should == "When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana Jones must follow in his father's footsteps and stop the Nazis."
+    end
+    it "should get the length" do
+      @imdb_movie.length.should == '127 min'
+    end
+    it "should get the countries" do
+      @imdb_movie.countries.should have(1).string
+      @imdb_movie.countries.should include('USA')
+    end
+    it "should get the languages" do
+      @imdb_movie.languages.should have(3).strings
+      @imdb_movie.languages.should include('English')
+      @imdb_movie.languages.should include('German')
+      @imdb_movie.languages.should include('Greek')
+    end
+    it "should get the color" do
+      @imdb_movie.color.should == 'Color'
+    end
+    it "should get the company" do
+      @imdb_movie.company.should == 'Paramount Pictures'
+    end
+    it "should get some photos" do
+      @imdb_movie.photos.should have(10).strings
+      @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMTY4MzY3OTY0MF5BMl5BanBnXkFtZTYwODM0OTE3._V1._CR82,0,320,320_SS90_.jpg')
+      @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMjAwNTM4ODc3Nl5BMl5BanBnXkFtZTYwNzU0OTE3._V1._CR82,0,320,320_SS90_.jpg')
+    end
+    it "should get the tagline" do
+      @imdb_movie.tagline.should == "He's back in an all new adventure. Memorial Day 1989."
+    end
+    it "should get the aspect ratio" do
+      @imdb_movie.aspect_ratio.should == "2.20 : 1"
+    end
+    describe "title pre-caching & get_data" do
+      it "should have the original title before querying anything" do
+        @imdb_movie.should_not_receive(:open)
+        @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
+      end
+      it "should have the updated title after calling get_data" do
+        @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
+        @imdb_movie.get_data
+        @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
+      end
+    end
+  end
+  describe 'Han robado una estrella' do
+    before(:each) do
+      @imdb_movie = Imdb::Movie.new('0054961', 'Han robado una estrella')
+      @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
+    end
+    it "should query IMDB url" do
+      @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0054961/").and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
+      @imdb_movie.send(:document)
+    end
+    it "should get director" do
+      @imdb_movie.director.should == 'Javier Setó'
+    end
+    it "should not get the poster" do
+      @imdb_movie.poster.should be_nil
+    end
+    it "should get cast members" do
+      @imdb_movie.cast_members.should include('Rafaela Aparicio')
+      @imdb_movie.cast_members.should include('Marujita Díaz')
+      @imdb_movie.cast_members.should include('Espartaco Santoni')
+      @imdb_movie.cast_members.should_not include('more')
+    end
+    it "should get the writers" do
+      @imdb_movie.writers.should have(1).string
+      @imdb_movie.writers.should include('Paulino Rodrigo')
+    end
+    it "should get the release date" do
+      @imdb_movie.release_date.should be_an_instance_of(Date)
+      @imdb_movie.release_date.should == Date.new(1963, 9, 9)
+    end
+    it "should get the genres" do
+      @imdb_movie.genres.should == ['Comedy', 'Musical']
+    end
+    it "should not get the plot" do
+      @imdb_movie.plot.should be_nil
+    end
+    it "should get the length" do
+      @imdb_movie.length.should == '93 min'
+    end
+    it "should get the countries" do
+      @imdb_movie.countries.should == ['Spain']
+    end
+    it "should get the languages" do
+      @imdb_movie.languages.should == ['Spanish']
+    end
+    it "should not get the color" do
+      @imdb_movie.color.should be_nil
+    end
+    it "should get the company" do
+      @imdb_movie.company.should == 'Brepi Films'
+    end
+    it "should not get any photos" do
+      @imdb_movie.photos.should be_empty
+    end
+  end
+end

data/spec/imdb_search_spec.rb ADDED Viewed

@@ -0,0 +1,98 @@
+require File.dirname(__FILE__) + '/spec_helper'
+describe Imdb::Search do
+  describe 'search that returns multiple movies' do
+    before(:each) do
+      @imdb_search = Imdb::Search.new('indiana jones')
+      @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_search.html"))
+    end
+    it "should query IMDB url" do
+      @imdb_search.should_receive(:open).with("http://www.imdb.com/find?q=indiana+jones;s=tt").and_return(open("#{$samples_dir}/sample_search.html"))
+      @imdb_search.send(:document)
+    end
+    it "should not allow to change the query" do
+      lambda { @imdb_search.query = 'wadus' }.should raise_error(NoMethodError)
+    end
+    describe "movies" do
+      it "should be a collection of Imdb::Movie instances" do
+        @imdb_search.movies.should be_an_instance_of(Array)
+        @imdb_search.movies.should_not be_empty
+        @imdb_search.movies.each do |movie|
+          movie.should be_an_instance_of(Imdb::Movie)
+        end
+      end
+      it "should include 'Indiana Jones and the Last Crusade'" do
+        @imdb_search.movies.map { |m| m.title }.should include('Indiana Jones and the Last Crusade')
+      end
+      it "should have titles" do
+        @imdb_search.movies.each do |movie|
+          movie.title.should_not be_empty
+        end
+      end
+      it "should not have titles with HTML tags" do
+        @imdb_search.movies.each do |movie|
+          movie.title.should_not match(/<.+>/)
+        end
+      end
+      it "should not have duplicate movies" do
+        all_movie_ids = @imdb_search.movies.collect {|m| m.id}
+        unique_movie_ids = all_movie_ids.uniq
+        all_movie_ids.should == unique_movie_ids
+      end
+    end
+  end
+  describe 'search that redirects to the lone matching movie' do
+    before(:each) do
+      @imdb_search = Imdb::Search.new('some extremely specific search for indiana jones')
+      @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
+    end
+    describe "movies" do
+      it "should be a collection containing a single Imdb::Movie instance" do
+        @imdb_search.movies.size.should == 1
+        @imdb_search.movies.first.should be_an_instance_of(Imdb::Movie)
+      end
+      it "should have the correct ID" do
+        @imdb_search.movies.first.id.should == '0097576'
+      end
+      it "should have the correct title" do
+        @imdb_search.movies.first.title.should == 'Indiana Jones and the Last Crusade'
+      end
+    end
+  end
+  describe 'searches with potential encoding issues' do
+    before(:each) do
+      @imdb_search = Imdb::Search.new('torrente')
+      @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_spanish_search.html"))
+    end
+    describe "movies" do
+      it "should include 'Misión en Marbella'" do
+        @imdb_search.movies.map { |m| m.title }.should include('Misión en Marbella')
+      end
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,3 @@
+require File.dirname(__FILE__) + '/../lib/imdb'
+$samples_dir = File.dirname(__FILE__) + '/samples'

data/spec/string_extensions_spec.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require File.dirname(__FILE__) + '/spec_helper'
+describe String do
+  describe "unescape_html" do
+    it "should convert &amp; to &" do
+      "M&amp;M".unescape_html.should == 'M&M'
+    end
+    it "should convert &#243; to ó" do
+      "&#243;smosis".unescape_html.should == 'ósmosis'
+    end
+  end
+  describe "strip_tags" do
+    it "should strip HTML tags" do
+      "<em>Hola</em> hola".strip_tags.should == 'Hola hola'
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,80 @@
+--- !ruby/object:Gem::Specification
+name: langalex-imdb
+version: !ruby/object:Gem::Version
+  version: 0.0.8
+platform: ruby
+authors:
+- Sergio Gil
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2009-09-09 00:00:00 -07:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: hpricot
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0.6"
+    version:
+- !ruby/object:Gem::Dependency
+  name: chronic
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: "0"
+    version:
+description:
+email: sgilperez@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- README
+files:
+- README
+- lib/imdb.rb
+- lib/imdb/imdb_movie.rb
+- lib/imdb/imdb_search.rb
+- lib/string_extensions.rb
+has_rdoc: false
+homepage:
+licenses:
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.5
+signing_key:
+specification_version: 3
+summary: Internet Movie DataBase
+test_files:
+- spec/imdb_movie_spec.rb
+- spec/imdb_search_spec.rb
+- spec/spec_helper.rb
+- spec/string_extensions_spec.rb