RubyGems - porras-imdb - Versions diffs - 0.0.2 - Mend

porras-imdb 0.0.2

Files changed (14) hide show

data/README +62 -0
data/Rakefile +37 -0
data/lib/imdb.rb +10 -0
data/lib/imdb/imdb_movie.rb +80 -0
data/lib/imdb/imdb_search.rb +21 -0
data/lib/string_extensions.rb +16 -0
data/spec/imdb_movie_spec.rb +172 -0
data/spec/imdb_search_spec.rb +47 -0
data/spec/samples/sample_incomplete_movie.html +588 -0
data/spec/samples/sample_movie.html +951 -0
data/spec/samples/sample_search.html +3 -0
data/spec/spec_helper.rb +3 -0
data/spec/string_extensions_spec.rb +25 -0
metadata +65 -0

data/README ADDED

@@ -0,0 +1,62 @@
+ImdbMovie
+ImdbMovie Indiana Jones and the Last Crusade
+- should query IMDB url
+- should get director
+- should get the poster
+- should get cast members
+- should get the writers
+- should get the release date
+- should get the genres
+- should get the plot
+- should get the length
+- should get the countries
+- should get the languages
+- should get the color
+- should get the company
+- should get some photos
+ImdbMovie Indiana Jones and the Last Crusade title pre-caching
+- should have the original title before querying anything
+- should have the updated title after querying something
+ImdbMovie Han robado una estrella
+- should query IMDB url
+- should get director
+- should not get the poster
+- should get cast members
+- should get the writers
+- should get the release date
+- should get the genres
+- should not get the plot
+- should get the length
+- should get the countries
+- should get the languages
+- should not get the color
+- should get the company
+- should not get any photos
+ImdbSearch
+ImdbSearch Indiana Jones
+- should query IMDB url
+ImdbSearch Indiana Jones movies
+- should be a collection of ImdbMovie instances
+- should include 'Indiana Jones and the Last Crusade (1989)'
+- should have titles
+- should not have titles with HTML tags
+String
+String unescape_html
+- should convert &amp; to &
+- should convert &#243; to ó
+String strip_tags
+- should strip HTML tags
+Finished in 2.772724 seconds
+38 examples, 0 failures

data/Rakefile ADDED

@@ -0,0 +1,37 @@
+require 'rake'
+require 'spec/rake/spectask'
+desc "Run all specs"
+Spec::Rake::SpecTask.new('spec') do |t|
+  t.spec_files = FileList['spec/**/*.rb']
+end
+desc "Run all specs and generate HTML report"
+Spec::Rake::SpecTask.new('spec:html') do |t|
+  t.spec_files = FileList['spec/**/*.rb']
+  t.spec_opts = ["--format", "html:spec.html"]
+end
+desc "Run all specs and dump the result to README"
+Spec::Rake::SpecTask.new('spec:readme') do |t|
+  t.spec_files = FileList['spec/**/*.rb']
+  t.spec_opts = ["--format", "specdoc:README"]
+end
+namespace :gem do
+  desc "Increments the Gem version in imdb.gemspec"
+  task :increment do
+    lines = File.new('imdb.gemspec').readlines
+    lines.each do |line|
+      next unless line =~ /version = '\d+\.\d+\.(\d+)'/
+      line.gsub!(/\d+'/, "#{$1.to_i + 1}'")
+    end
+    File.open('imdb.gemspec', 'w') do |f|
+      lines.each do |line|
+        f.write(line)
+      end
+    end
+  end
+end

data/lib/imdb.rb ADDED

@@ -0,0 +1,10 @@
+require 'open-uri'
+require 'date'
+require 'cgi'
+require 'rubygems'
+require 'hpricot'
+require 'chronic'
+require File.dirname(__FILE__) + '/imdb/imdb_search'
+require File.dirname(__FILE__) + '/imdb/imdb_movie'
+require File.dirname(__FILE__) + '/string_extensions'

data/lib/imdb/imdb_movie.rb ADDED

@@ -0,0 +1,80 @@
+class ImdbMovie
+  attr_accessor :id, :url, :title
+  def initialize(id, title = nil)
+    @id = id
+    @url = "http://www.imdb.com/title/tt#{self.id}/"
+    @title = title
+  end
+  def director
+    document.at("h5[text()='Director:'] ~ a").innerHTML.strip.unescape_html rescue nil
+  end
+  def poster
+    document.at("a[@name='poster'] img")['src'] rescue nil
+  end
+  def cast_members
+    document.search("table.cast td.nm a").map { |link| link.innerHTML.strip.unescape_html } rescue []
+  end
+  def writers
+    document.search("h5[text()^='Writers'] ~ a").map { |link| link.innerHTML.strip.unescape_html }.reject { |w| w == 'more' }.uniq rescue []
+  end
+  def release_date
+    date = document.search("//h5[text()^='Release Date']/..").innerHTML[/^\d{1,2} \w+ \d{4}/]
+    Date.parse(Chronic.parse(date).strftime('%Y/%m/%d'))
+  rescue
+    nil
+  end
+  def genres
+    document.search("h5[text()='Genre:'] ~ a[@href*=/Sections/Genres/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
+  end
+  def plot
+    document.search("//h5[text()^='Plot']/..").innerHTML.split("\n")[2].gsub(/<.+>.+<\/.+>/, '').strip.unescape_html rescue nil
+  end
+  def length
+    document.search("//h5[text()^='Runtime']/..").innerHTML[/\d+ min/] rescue nil
+  end
+  def countries
+    document.search("h5[text()='Country:'] ~ a[@href*=/Sections/Countries/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
+  end
+  def languages
+    document.search("h5[text()='Language:'] ~ a[@href*=/Sections/Languages/']").map { |link| link.innerHTML.strip.unescape_html } rescue []
+  end
+  def color
+    document.at("h5[text()='Color:'] ~ a[@href*=color-info']").innerHTML.strip.unescape_html rescue nil
+  end
+  def company
+    document.at("h5[text()='Company:'] ~ a[@href*=/company/']").innerHTML.strip.unescape_html rescue nil
+  end
+  def photos
+    document.search(".media_strip_thumb img").map { |img| img['src'] } rescue []
+  end
+  def get_data
+    update_title
+  end
+  private
+  def update_title
+    @title = document.at("h1").innerHTML.split('<span').first.strip.unescape_html rescue nil
+  end
+  def document
+    @document ||= Hpricot(open(self.url).read)
+  end
+end

data/lib/imdb/imdb_search.rb ADDED

@@ -0,0 +1,21 @@
+class ImdbSearch
+  attr_accessor :query
+  def initialize(query)
+    self.query = query
+  end
+  def movies
+    @movies ||= document.search('a.l[@href*="/title/tt"]').map do |element|
+      ImdbMovie.new(element['href'][/\d+/], element.innerHTML.strip_tags)
+    end
+  end
+  private
+  def document
+    @document ||= Hpricot(open("http://www.google.com/search?as_q=#{CGI::escape(query + ' inurl:title')}&num=20&as_sitesearch=imdb.com").read)
+  end
+end

data/lib/string_extensions.rb ADDED

@@ -0,0 +1,16 @@
+require 'cgi'
+require 'iconv'
+module ImdbStringExtensions
+  def unescape_html
+    Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
+  end
+  def strip_tags
+    gsub(/<\/?[^>]*>/, "")
+  end
+end
+String.send :include, ImdbStringExtensions

data/spec/imdb_movie_spec.rb ADDED

@@ -0,0 +1,172 @@
+require File.dirname(__FILE__) + '/spec_helper'
+describe ImdbMovie do
+  describe 'Indiana Jones and the Last Crusade' do
+    before(:each) do
+      @imdb_movie = ImdbMovie.new('0097576', 'Indiana Jones and the Last Crusade (1989)')
+      @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_movie.html"))
+    end
+    it "should query IMDB url" do
+      @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0097576/").and_return(open("#{$samples_dir}/sample_movie.html"))
+      @imdb_movie.send(:document)
+    end
+    it "should get director" do
+      @imdb_movie.director.should == 'Steven Spielberg'
+    end
+    it "should get the poster" do
+      @imdb_movie.poster.should == 'http://ia.media-imdb.com/images/M/MV5BMTkzODA5ODYwOV5BMl5BanBnXkFtZTcwMjAyNDYyMQ@@._V1._SX95_SY140_.jpg'
+    end
+    it "should get cast members" do
+      @imdb_movie.cast_members.should include('Harrison Ford')
+      @imdb_movie.cast_members.should include('Sean Connery')
+      @imdb_movie.cast_members.should include('Denholm Elliott')
+      @imdb_movie.cast_members.should include('Alison Doody')
+      @imdb_movie.cast_members.should include('John Rhys-Davies')
+      @imdb_movie.cast_members.should_not include('more')
+    end
+    it "should get the writers" do
+      @imdb_movie.writers.should include('George Lucas')
+      @imdb_movie.writers.should include('Philip Kaufman')
+      @imdb_movie.writers.should_not include('more')
+    end
+    it "should get the release date" do
+      @imdb_movie.release_date.should be_an_instance_of(Date)
+      @imdb_movie.release_date.should == Date.new(1989, 9, 1)
+    end
+    it "should get the genres" do
+      @imdb_movie.genres.should have(2).strings
+      @imdb_movie.genres.should include('Action')
+      @imdb_movie.genres.should include('Adventure')
+    end
+    it "should get the plot" do
+      @imdb_movie.plot.should == "When Dr. Henry Jones Sr. suddenly goes missing while pursuing the Holy Grail, eminent archaeologist Indiana Jones must follow in his father's footsteps and stop the Nazis."
+    end
+    it "should get the length" do
+      @imdb_movie.length.should == '127 min'
+    end
+    it "should get the countries" do
+      @imdb_movie.countries.should have(1).string
+      @imdb_movie.countries.should include('USA')
+    end
+    it "should get the languages" do
+      @imdb_movie.languages.should have(3).strings
+      @imdb_movie.languages.should include('English')
+      @imdb_movie.languages.should include('German')
+      @imdb_movie.languages.should include('Greek')
+    end
+    it "should get the color" do
+      @imdb_movie.color.should == 'Color'
+    end
+    it "should get the company" do
+      @imdb_movie.company.should == 'Lucasfilm'
+    end
+    it "should get some photos" do
+      @imdb_movie.photos.should have(10).strings
+      @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMTY4MzY3OTY0MF5BMl5BanBnXkFtZTYwODM0OTE3._V1._CR82,0,320,320_SS90_.jpg')
+      @imdb_movie.photos.should include('http://ia.media-imdb.com/images/M/MV5BMjAwNTM4ODc3Nl5BMl5BanBnXkFtZTYwNzU0OTE3._V1._CR82,0,320,320_SS90_.jpg')
+    end
+    describe "title pre-caching & get_data" do
+      it "should have the original title before querying anything" do
+        @imdb_movie.title.should == 'Indiana Jones and the Last Crusade (1989)'
+      end
+      it "should have the updated title after calling get_data" do
+        @imdb_movie.get_data
+        @imdb_movie.title.should == 'Indiana Jones and the Last Crusade'
+      end
+    end
+  end
+  describe 'Han robado una estrella' do
+    before(:each) do
+      @imdb_movie = ImdbMovie.new('0054961', 'Han robado una estrella')
+      @imdb_movie.stub!(:open).and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
+    end
+    it "should query IMDB url" do
+      @imdb_movie.should_receive(:open).with("http://www.imdb.com/title/tt0054961/").and_return(open("#{$samples_dir}/sample_incomplete_movie.html"))
+      @imdb_movie.send(:document)
+    end
+    it "should get director" do
+      @imdb_movie.director.should == 'Javier Setó'
+    end
+    it "should not get the poster" do
+      @imdb_movie.poster.should be_nil
+    end
+    it "should get cast members" do
+      @imdb_movie.cast_members.should include('Rafaela Aparicio')
+      @imdb_movie.cast_members.should include('Marujita Díaz')
+      @imdb_movie.cast_members.should include('Espartaco Santoni')
+      @imdb_movie.cast_members.should_not include('more')
+    end
+    it "should get the writers" do
+      @imdb_movie.writers.should have(1).string
+      @imdb_movie.writers.should include('Paulino Rodrigo')
+    end
+    it "should get the release date" do
+      @imdb_movie.release_date.should be_an_instance_of(Date)
+      @imdb_movie.release_date.should == Date.new(1963, 9, 9)
+    end
+    it "should get the genres" do
+      @imdb_movie.genres.should == ['Comedy', 'Musical']
+    end
+    it "should not get the plot" do
+      @imdb_movie.plot.should be_nil
+    end
+    it "should get the length" do
+      @imdb_movie.length.should == '93 min'
+    end
+    it "should get the countries" do
+      @imdb_movie.countries.should == ['Spain']
+    end
+    it "should get the languages" do
+      @imdb_movie.languages.should == ['Spanish']
+    end
+    it "should not get the color" do
+      @imdb_movie.color.should be_nil
+    end
+    it "should get the company" do
+      @imdb_movie.company.should == 'Brepi Films'
+    end
+    it "should not get any photos" do
+      @imdb_movie.photos.should be_empty
+    end
+  end
+end

data/spec/imdb_search_spec.rb ADDED

@@ -0,0 +1,47 @@
+require File.dirname(__FILE__) + '/spec_helper'
+describe ImdbSearch do
+  describe 'Indiana Jones' do
+    before(:each) do
+      @imdb_search = ImdbSearch.new('indiana jones')
+      @imdb_search.stub!(:open).and_return(open("#{$samples_dir}/sample_search.html"))
+    end
+    it "should query IMDB url" do
+      @imdb_search.should_receive(:open).with("http://www.google.com/search?as_q=indiana+jones+inurl%3Atitle&num=20&as_sitesearch=imdb.com").and_return(open("#{$samples_dir}/sample_search.html"))
+      @imdb_search.send(:document)
+    end
+    describe "movies" do
+      it "should be a collection of ImdbMovie instances" do
+        @imdb_search.movies.should be_an_instance_of(Array)
+        @imdb_search.movies.should_not be_empty
+        @imdb_search.movies.each do |movie|
+          movie.should be_an_instance_of(ImdbMovie)
+        end
+      end
+      it "should include 'Indiana Jones and the Last Crusade (1989)'" do
+        @imdb_search.movies.map { |m| m.title }.should include('Indiana Jones and the Last Crusade (1989)')
+      end
+      it "should have titles" do
+        @imdb_search.movies.each do |movie|
+          movie.title.should_not be_empty
+        end
+      end
+      it "should not have titles with HTML tags" do
+        @imdb_search.movies.each do |movie|
+          movie.title.should_not match(/<.+>/)
+        end
+      end
+    end
+  end
+end