imdb_celebrity 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/History.txt +8 -0
  2. data/Manifest.txt +28 -0
  3. data/PostInstall.txt +7 -0
  4. data/README.rdoc +55 -0
  5. data/Rakefile +26 -0
  6. data/bin/imdb_celebrity +10 -0
  7. data/imdb_celebrity.gemspec +15 -0
  8. data/lib/imdb_celebrity.rb +13 -0
  9. data/lib/imdb_celebrity/celebrity.rb +72 -0
  10. data/lib/imdb_celebrity/cli.rb +42 -0
  11. data/lib/imdb_celebrity/parser/hpricot_parser/celebrity_bio_parser.rb +17 -0
  12. data/lib/imdb_celebrity/parser/hpricot_parser/hpricot_parser.rb +22 -0
  13. data/lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb +40 -0
  14. data/lib/imdb_celebrity/parser/nokogiri_parser/celebrity_bio_parser.rb +22 -0
  15. data/lib/imdb_celebrity/parser/nokogiri_parser/nokogiri_parser.rb +21 -0
  16. data/lib/imdb_celebrity/parser/nokogiri_parser/search_parser.rb +39 -0
  17. data/lib/imdb_celebrity/parser/parser.rb +25 -0
  18. data/lib/imdb_celebrity/search.rb +45 -0
  19. data/lib/imdb_celebrity/string_extensions.rb +24 -0
  20. data/script/console +10 -0
  21. data/script/destroy +14 -0
  22. data/script/generate +14 -0
  23. data/spec/imdb_celebrity/celebrity_spec.rb +48 -0
  24. data/spec/imdb_celebrity/parser/hpricot_parser/celebrity_bio_parser_spec.rb +21 -0
  25. data/spec/imdb_celebrity/parser/nokogiri_parser/celebrity_bio_parser_spec.rb +33 -0
  26. data/spec/imdb_celebrity/parser/parser_spec.rb +45 -0
  27. data/spec/imdb_celebrity/search_spec.rb +28 -0
  28. data/spec/imdb_celebrity/string_extensions_spec.rb +31 -0
  29. data/spec/imdb_celebrity_cli_spec.rb +15 -0
  30. data/spec/imdb_celebrity_spec.rb +11 -0
  31. data/spec/spec.opts +1 -0
  32. data/spec/spec_helper.rb +19 -0
  33. data/tasks/rspec.rake +21 -0
  34. metadata +129 -0
data/History.txt ADDED
@@ -0,0 +1,8 @@
1
+ === 0.0.1 2010-09-29
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
5
+ * searching for celebrity for exact match or all matches available for query string
6
+ * fetch data for a celebrity for give imdbid [& name]
7
+ * Using hpricot or nokogiri for parsing IMDB pages.
8
+ *
data/Manifest.txt ADDED
@@ -0,0 +1,28 @@
1
+ History.txt
2
+ Manifest.txt
3
+ PostInstall.txt
4
+ README.rdoc
5
+ Rakefile
6
+ bin/imdb_celebrity
7
+ lib/imdb_celebrity.rb
8
+ lib/imdb_celebrity/cli.rb
9
+ lib/imdb_celebrity/search.rb
10
+ lib/imdb_celebrity/celebrity.rb
11
+ lib/imdb_celebrity/string_extensions.rb
12
+ lib/imdb_celebrity/parser/parser.rb
13
+ lib/imdb_celebrity/parser/hpricot_parser.rb
14
+ lib/imdb_celebrity/parser/nokogiri_parser.rb
15
+ lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb
16
+ lib/imdb_celebrity/parser/hpricot_parser/hpricot_parser.rb
17
+ lib/imdb_celebrity/parser/hpricot_parser/celebrity_bio_parser.rb
18
+ lib/imdb_celebrity/parser/nokogiri_parser/nokogiri_parser.rb
19
+ lib/imdb_celebrity/parser/nokogiri_parser/search_parser.rb
20
+ lib/imdb_celebrity/parser/nokogiri_parser/celebrity_bio_parser.rb
21
+ script/console
22
+ script/destroy
23
+ script/generate
24
+ spec/imdb_celebrity_cli_spec.rb
25
+ spec/imdb_celebrity_spec.rb
26
+ spec/spec.opts
27
+ spec/spec_helper.rb
28
+ tasks/rspec.rake
data/PostInstall.txt ADDED
@@ -0,0 +1,7 @@
1
+
2
+ For more information on imdb_celebrity, see http://github.com/sandeepkrao/imdb_celebrity
3
+
4
+ NOTE: Change this information in PostInstall.txt
5
+ You can also delete it if you don't want it.
6
+
7
+
data/README.rdoc ADDED
@@ -0,0 +1,55 @@
1
+ = imdb_celebrity
2
+
3
+ * http://github.com/sandeepkrao/imdb_celebrity
4
+
5
+ == DESCRIPTION:
6
+
7
+ * Imdb_celebrity is a ruby-gem which is used for scrapping celebrity pages from imdb.com . You can install imdb_celebrity as
8
+ gem install imdb_celebrity
9
+ With current initial release we can search a celebrity with name or we can fetch content for a celebrity with given IMDB id or/and name
10
+
11
+ Usages:
12
+ require 'imdb_celebrity'
13
+
14
+ ** searching a celebrity
15
+ imdb_celebs = ImdbCelebrity::Search.new("Brad Pitt")
16
+ imdb_celebs.celebrities
17
+ # this will return array of celebrity objects.
18
+
19
+
20
+ == SYNOPSIS:
21
+
22
+ FIX (code sample of usage)
23
+
24
+ == REQUIREMENTS:
25
+
26
+ * FIX (list of requirements)
27
+
28
+ == INSTALL:
29
+
30
+ * FIX (sudo gem install, anything else)
31
+
32
+ == LICENSE:
33
+
34
+ (The MIT License)
35
+
36
+ Copyright (c) 2010 FIXME full name
37
+
38
+ Permission is hereby granted, free of charge, to any person obtaining
39
+ a copy of this software and associated documentation files (the
40
+ 'Software'), to deal in the Software without restriction, including
41
+ without limitation the rights to use, copy, modify, merge, publish,
42
+ distribute, sublicense, and/or sell copies of the Software, and to
43
+ permit persons to whom the Software is furnished to do so, subject to
44
+ the following conditions:
45
+
46
+ The above copyright notice and this permission notice shall be
47
+ included in all copies or substantial portions of the Software.
48
+
49
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
50
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
51
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
52
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
53
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
54
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
55
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,26 @@
1
+ require 'rubygems'
2
+ gem 'hoe', '>= 2.1.0'
3
+ require 'hoe'
4
+ require 'fileutils'
5
+ require './lib/imdb_celebrity'
6
+
7
+ Hoe.plugin :newgem
8
+ # Hoe.plugin :website
9
+ # Hoe.plugin :cucumberfeatures
10
+
11
+ # Generate all the Rake tasks
12
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
13
+ #$hoe = Hoe.spec 'imdb_celebrity' do
14
+ #self.developer 'Sandeep Kumar', 'sandeep@sapnasolutions.com'
15
+ #self.post_install_message = 'PostInstall.txt' # TODO remove if post-install message not required
16
+ # self.rubyforge_name = self.name # TODO this is default value
17
+ # self.extra_deps = [['activesupport','>= 2.0.2']]
18
+
19
+ #end
20
+
21
+ require 'newgem/tasks'
22
+ Dir['tasks/**/*.rake'].each { |t| load t }
23
+
24
+ # TODO - want other tests/tasks run by default? Add them to the list
25
+ # remove_task :default
26
+ # task :default => [:spec, :features]
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Created on 2010-9-29.
4
+ # Copyright (c) 2010. All rights reserved.
5
+
6
+ require 'rubygems'
7
+ require File.expand_path(File.dirname(__FILE__) + "/../lib/imdb_celebrity")
8
+ require "imdb_celebrity/cli"
9
+
10
+ ImdbCelebrity::CLI.execute(STDOUT, ARGV)
@@ -0,0 +1,15 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = %q{imdb_celebrity}
3
+ s.version = "0.0.1"
4
+ s.date = %q{2010-10-04}
5
+ s.authors = ["Sandeep Kumar"]
6
+ s.email = %q{sandeep@sapnasolutions.com}
7
+ s.summary = %q{Imdb_celebrity is a ruby-gem which is used to scrap celebrity pages from imdb.com}
8
+ s.homepage = %q{http://github.com/sandeepkrao/imdb_celebrity}
9
+ s.description = %q{Imdb_celebrity is a ruby-gem which is used to scrap celebrity pages from imdb.com}
10
+ s.files = Dir["**/*"]
11
+ s.test_files = Dir["spec/**"]
12
+ s.executables = ["imdb_celebrity"]
13
+ s.add_dependency "nokogiri"
14
+ s.add_dependency "hpricot"
15
+ end
@@ -0,0 +1,13 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'rubygems'
5
+ require 'open-uri'
6
+ require 'imdb_celebrity/celebrity'
7
+ require 'imdb_celebrity/search'
8
+ require 'imdb_celebrity/string_extensions'
9
+ require 'imdb_celebrity/parser/parser'
10
+
11
+ module ImdbCelebrity
12
+ VERSION = '0.0.1'
13
+ end
@@ -0,0 +1,72 @@
1
+ require 'imdb_celebrity/parser/hpricot_parser/celebrity_bio_parser'
2
+ require 'imdb_celebrity/parser/nokogiri_parser/celebrity_bio_parser'
3
+ module ImdbCelebrity
4
+ #Represents a celebrity on IMDB.com
5
+ class Celebrity
6
+ attr_accessor :id, :url, :name, :parser, :real_name, :biography, :height, :nationality
7
+
8
+ # Initialize a new IMDB celebrity object with it's IMDB id (as a String)
9
+ #
10
+ # celebrity = Imdb::Celebrity.new("0095016", "celeb name", "Parsing Class")
11
+ #
12
+ # Imdb::Celebrity objects are lazy loading, meaning that no HTTP request
13
+ # will be performed when a new object is created. Only when you use an
14
+ # accessor that needs the remote data, a HTTP request is made (once).
15
+ #
16
+
17
+ def initialize imdb_id, title = nil, parser = "HpricotParser"
18
+ @id = imdb_id
19
+ @url = "http://www.imdb.com/name/nm#{imdb_id}/bio"
20
+ @name = title.gsub(/"/, "") if title
21
+ @parser = initialize_parser parser
22
+ end
23
+
24
+ def to_s
25
+ [@id, @url, @name, @real_name, @biography, @height, @nationality]
26
+ end
27
+
28
+ def name(flag=false)
29
+ if flag==true
30
+ @name = @parser.name
31
+ else
32
+ @name ||= @parser.name
33
+ end
34
+ end
35
+
36
+ def real_name
37
+ @real_name ||=@parser.real_name
38
+ end
39
+
40
+ def biography
41
+ @biography ||= @parser.biography
42
+ end
43
+
44
+ def height
45
+ @height ||= @parser.height
46
+ end
47
+
48
+ def nationality
49
+ @nationality ||= @parser.nationality
50
+ end
51
+
52
+ def celebrity_data
53
+ @real_name ||=@parser.real_name
54
+ @biography ||= @parser.biography
55
+ @height ||= @parser.height
56
+ @nationality ||= @parser.nationality
57
+ return true
58
+ end
59
+
60
+ private
61
+
62
+ def self.find_by_id imdb_id
63
+ open("http://www.imdb.com/name/nm#{imdb_id}/")
64
+ end
65
+
66
+ def initialize_parser parser
67
+ return ImdbCelebrity::Parser::HpricotParser::CelebrityBioParser.new(@url) if parser == "HpricotParser"
68
+ return ImdbCelebrity::Parser::NokogiriParser::CelebrityBioParser.new(@url) if parser == "NokogiriParser"
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,42 @@
1
+ require 'optparse'
2
+
3
+ module ImdbCelebrity
4
+ class CLI
5
+ def self.execute(stdout, arguments=[])
6
+
7
+ # NOTE: the option -p/--path= is given as an example, and should be replaced in your application.
8
+
9
+ options = {
10
+ :path => '~'
11
+ }
12
+ mandatory_options = %w( )
13
+
14
+ parser = OptionParser.new do |opts|
15
+ opts.banner = <<-BANNER.gsub(/^ /,'')
16
+ This application is wonderful because...
17
+
18
+ Usage: #{File.basename($0)} [options]
19
+
20
+ Options are:
21
+ BANNER
22
+ opts.separator ""
23
+ opts.on("-p", "--path PATH", String,
24
+ "This is a sample message.",
25
+ "For multiple lines, add more strings.",
26
+ "Default: ~") { |arg| options[:path] = arg }
27
+ opts.on("-h", "--help",
28
+ "Show this help message.") { stdout.puts opts; exit }
29
+ opts.parse!(arguments)
30
+
31
+ if mandatory_options && mandatory_options.find { |option| options[option.to_sym].nil? }
32
+ stdout.puts opts; exit
33
+ end
34
+ end
35
+
36
+ path = options[:path]
37
+
38
+ # do stuff
39
+ stdout.puts "To update this executable, look in lib/imdb_celebrity/cli.rb"
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,17 @@
1
+ require 'imdb_celebrity/parser/hpricot_parser/hpricot_parser'
2
+ require 'imdb_celebrity/parser/parser'
3
+
4
+ module ImdbCelebrity
5
+ module Parser
6
+ module HpricotParser
7
+ class CelebrityBioParser < ImdbCelebrity::Parser::HpricotParser::HpricotParser
8
+ include ImdbCelebrity::Parser::Parser
9
+
10
+ def initialize uri
11
+ super(uri)
12
+ end
13
+
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,22 @@
1
+ require 'hpricot'
2
+
3
+ module ImdbCelebrity
4
+ module Parser
5
+ module HpricotParser
6
+ class HpricotParser
7
+ attr_reader :page
8
+
9
+ def initialize uri
10
+ @page = uri
11
+ end
12
+
13
+ protected
14
+
15
+ def document
16
+ @document ||= Hpricot(open(URI.encode(@page), "User-Agent" => "ruby"))
17
+ end
18
+
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,40 @@
1
+ require 'imdb_celebrity/parser/hpricot_parser/hpricot_parser'
2
+ module ImdbCelebrity
3
+ module Parser
4
+ module HpricotParser
5
+ class SearchParser < ImdbCelebrity::Parser::HpricotParser::HpricotParser
6
+
7
+ def initialize uri
8
+ super uri
9
+ end
10
+
11
+ # Returns true if search returns specificly only one result, exact match
12
+ def exact_match?
13
+ #!document.at("//h3[text()^='Overview'/..]").nil?
14
+ !document.at("//h3[text()^='Overview']/..").nil?
15
+ end
16
+
17
+ def parse_celebrities parser
18
+ document.search('a[@href^="/name/nm"]').reject do |element|
19
+ element.innerHTML.imdb_strip_tags.empty? ||
20
+ element.parent.innerHTML =~ /media from/i
21
+ end.map do |element|
22
+ id = element['href'][/\d+/]
23
+
24
+ data = element.parent.innerHTML.split("<br />")
25
+ if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
26
+ title = data[1]
27
+ else
28
+ title = data[0]
29
+ end
30
+
31
+ title = title.imdb_strip_tags.imdb_unescape_html
32
+ title.gsub!(/\s+\(\d{4}\)$/, '')
33
+
34
+ [id, title, parser]
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,22 @@
1
+ require 'imdb_celebrity/parser/nokogiri_parser/nokogiri_parser'
2
+ require 'imdb_celebrity/parser/parser'
3
+
4
+ module ImdbCelebrity
5
+ module Parser
6
+ module NokogiriParser
7
+ class CelebrityBioParser < ImdbCelebrity::Parser::NokogiriParser::NokogiriParser
8
+
9
+ include ImdbCelebrity::Parser::Parser
10
+
11
+ def initialize uri
12
+ super uri
13
+ end
14
+
15
+ def biography
16
+ document.at("h5[text()*='Biography']").next_sibling.next_sibling.inner_text rescue nil
17
+ end
18
+
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,21 @@
1
+ require 'nokogiri'
2
+
3
+ module ImdbCelebrity
4
+ module Parser
5
+ module NokogiriParser
6
+ class NokogiriParser
7
+
8
+ def initialize uri
9
+ @page = uri
10
+ end
11
+
12
+ protected
13
+
14
+ def document
15
+ @document ||= Nokogiri::HTML(open(URI.encode(@page), "User-Agent" => "ruby"))
16
+ end
17
+
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,39 @@
1
+ require 'imdb_celebrity/parser/nokogiri_parser/nokogiri_parser'
2
+
3
+ module ImdbCelebrity
4
+ module Parser
5
+ module NokogiriParser
6
+ class SearchParser < ImdbCelebrity::Parser::NokogiriParser::NokogiriParser
7
+ def initialize uri
8
+ super uri
9
+ end
10
+
11
+ def exact_match?
12
+ !document.at("h3[text()^='Overview']").nil?
13
+ end
14
+
15
+ def parse_celebrities parser
16
+ document.search('a[@href^="/name/nm"]').reject do |element|
17
+ element.to_html.imdb_strip_tags.empty? ||
18
+ element.parent.to_html =~ /media from/i
19
+ end.map do |element|
20
+ id = element['href'][/\d+/]
21
+
22
+ data = element.parent.to_html.split("<br />")
23
+ if !data[0].nil? && !data[1].nil? && data[0] =~ /img/
24
+ title = data[1]
25
+ else
26
+ title = data[0]
27
+ end
28
+
29
+ title = title.imdb_strip_tags.imdb_unescape_html
30
+ title.gsub!(/\s+\(\d{4}\)$/, '')
31
+
32
+ [id, title, parser]
33
+ end
34
+ end
35
+
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,25 @@
1
+ module ImdbCelebrity
2
+ module Parser
3
+ module Parser
4
+ def name
5
+ document.at("a[@class='main']").inner_text rescue nil
6
+ end
7
+
8
+ def real_name
9
+ document.at("h5[text()*='Birth Name']").next.inner_text.strip rescue nil
10
+ end
11
+
12
+ def nationality
13
+ document.at("a[@href*='birth_place']").inner_text.strip rescue nil
14
+ end
15
+
16
+ def height
17
+ document.at("h5[text()*='Height']").next.inner_text[/\(.+\)/] rescue nil
18
+ end
19
+
20
+ def biography
21
+ document.at("h5[text()*='Biography']").next_sibling.inner_text rescue nil
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,45 @@
1
+ require 'imdb_celebrity/parser/hpricot_parser/search_parser'
2
+ require 'imdb_celebrity/parser/nokogiri_parser/search_parser'
3
+
4
+ module ImdbCelebrity
5
+
6
+ class Search
7
+ attr_reader :query, :parser
8
+
9
+ def initialize(query, parser ="HpricotParser")
10
+ @query = search_uri query
11
+ @parser_text = parser
12
+ @parser = initialize_parser parser
13
+ end
14
+
15
+ def celebrities
16
+ @celebrities = (@parser.exact_match? ? @parser.parse_celebrity : parse_celebrities)
17
+ end
18
+
19
+ private
20
+
21
+ def search_uri query
22
+ "http://imdb.com/find?s=nm&q=#{query}"
23
+ end
24
+
25
+ def parse_celebrity
26
+ id = @document.at("head/link[@rel='canonical']")[:href][/\d+/]
27
+ name = @document.at("h1").inner_html.split('<span').first.inner_text.imdb_unescape_html
28
+ [ImdbCelebrity::Celebrity.new(id, name, @parser_text)]
29
+ end
30
+
31
+ def parse_celebrities
32
+ celebrities = @parser.parse_celebrities @parser_text
33
+ celebrities.uniq.map do |values|
34
+ ImdbCelebrity::Celebrity.new(*values)
35
+ end
36
+ end
37
+
38
+ def initialize_parser parser
39
+ return ImdbCelebrity::Parser::HpricotParser::SearchParser.new(@query) if parser == "HpricotParser"
40
+ return ImdbCelebrity::Parser::NokogiriParser::SearchParser.new(@query) if parser == "NokogiriParser"
41
+ end
42
+
43
+ end
44
+
45
+ end
@@ -0,0 +1,24 @@
1
+ require 'cgi'
2
+ require 'iconv'
3
+
4
+ module ImdbCelebrity
5
+ module StringExtensions
6
+
7
+ # Unescape HTML
8
+ def imdb_unescape_html
9
+ Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
10
+ end
11
+
12
+ # Strip tags
13
+ def imdb_strip_tags
14
+ gsub(/<\/?[^>]*>/, "")
15
+ end
16
+
17
+ # Strips out whitespace then tests if the string is empty.
18
+ def blank?
19
+ strip.empty?
20
+ end unless method_defined?(:blank?)
21
+ end
22
+ end
23
+
24
+ String.send :include, ImdbCelebrity::StringExtensions
data/script/console ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+ libs = " -r irb/completion"
6
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
7
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
8
+ libs << " -r #{File.dirname(__FILE__) + '/../lib/imdb_celebrity.rb'}"
9
+ puts "Loading imdb_celebrity gem"
10
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/../spec_helper.rb'
2
+
3
+ describe "Celebrity Spec" do
4
+
5
+ before(:all) do
6
+ @celeb1 = ImdbCelebrity::Celebrity.new("0000093", "brad pitt")
7
+ @celeb2 = ImdbCelebrity::Celebrity.new("0000093", "brad pitt", "NokogiriParser")
8
+ end
9
+
10
+ after :all do
11
+ @celeb1 = nil
12
+ @celeb2 = nil
13
+ end
14
+
15
+ it "should use the Hpricot as default parser class" do
16
+ #celeb = ImdbCelebrity::Celebrity.new("0000093", "brad pitt")
17
+ @celeb1.parser.class.should == ImdbCelebrity::Parser::HpricotParser::CelebrityBioParser
18
+ end
19
+
20
+ it "should use the Parser class with Parser type we are sending" do
21
+ #celeb = ImdbCelebrity::Celebrity.new("0000093", "brad pitt", "NokogiriParser")
22
+ @celeb2.parser.class.should == ImdbCelebrity::Parser::NokogiriParser::CelebrityBioParser
23
+ end
24
+
25
+ it "should return biography of the celebrity" do
26
+ @celeb1.biography.should == "Brad Pitt was born in 1963 in Oklahoma and raised in Springfield, Missouri. His mother's name is Jane Etta Hillhouse. His father, William (Bill) Pitt, worked in management at a trucking firm in Springfield. He has a younger brother, Douglas (Doug) Pitt and a younger sister Julie Neal Pitt. At Kickapoo High School, Pitt was involved in sports, debating, student government and school musicals. Pitt attended the University of Missouri, where he majored in journalism with a focus on advertising. He occasionally acted in fraternity shows. He left college two credits short of graduating to move to California. Before he became successful at acting, Pitt supported himself by driving strippers in limos, moving refrigerators and dressing as a giant chicken while working for \"el Pollo Loco.\""
27
+ @celeb1.parser.stub!(:biography).and_return("Incosistent Data")
28
+ @celeb1.biography.should_not == "Inconsistent Data"
29
+ end
30
+
31
+ it "should return height of the celebrity" do
32
+ @celeb1.height.should == "(1.80 m)"
33
+ @celeb1.parser.stub!(:height).and_return("1.9m")
34
+ @celeb1.height.should_not == "1.9m"
35
+ end
36
+
37
+ it "should return nationality of the celebrity" do
38
+ @celeb1.nationality.should == "Shawnee, Oklahoma, USA"
39
+ @celeb1.parser.stub!(:nationality).and_return("New Jersy, USA")
40
+ @celeb1.nationality.should_not == "New Jersy, USA"
41
+ end
42
+
43
+ it "should return array of attribute accessors of celebrity" do
44
+ @celeb1.to_s.class.should == Array
45
+ @celeb1.to_s.length.should == 7
46
+ end
47
+
48
+ end
@@ -0,0 +1,21 @@
1
+ require File.dirname(__FILE__) + "/../../../spec_helper.rb"
2
+
3
+ describe "Celebrity Bio Parser Spec" do
4
+
5
+ before :all do
6
+ @celeb_bio_h = ImdbCelebrity::Parser::HpricotParser::CelebrityBioParser.new("http://www.imdb.com/name/nm0000093/bio")
7
+ end
8
+
9
+ after :all do
10
+ @celeb_bio_h = nil
11
+ end
12
+
13
+ it "should extend Parser module" do
14
+ @celeb_bio_h.extend ImdbCelebrity::Parser::Parser
15
+ end
16
+
17
+ it "should extend HpricotParser/NokogiriParser Class" do
18
+ @celeb_bio_h.class.superclass.should == ImdbCelebrity::Parser::HpricotParser::HpricotParser
19
+ end
20
+
21
+ end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + "/../../../spec_helper.rb"
2
+ require 'nokogiri'
3
+
4
+ describe "Celebrity Bio Parser Spec" do
5
+
6
+ before :all do
7
+ @celeb_bio_n = ImdbCelebrity::Parser::NokogiriParser::CelebrityBioParser.new("http://www.imdb.com/name/nm0000093/bio")
8
+ end
9
+
10
+ after :all do
11
+ @celeb_bio_n = nil
12
+ end
13
+
14
+ it "should extend Parser module" do
15
+ @celeb_bio_n.extend ImdbCelebrity::Parser::Parser
16
+ end
17
+
18
+ it "should extend HpricotParser/NokogiriParser Class" do
19
+ @celeb_bio_n.class.superclass.should == ImdbCelebrity::Parser::NokogiriParser::NokogiriParser
20
+ end
21
+
22
+ it "should called the overrided methods" do
23
+ # biography() method has been overrided, so it should call class method only
24
+ @celeb_bio_n.biography.should == "Brad Pitt was born in 1963 in Oklahoma and raised in Springfield, Missouri. His mother's name is Jane Etta Hillhouse. His father, William (Bill) Pitt, worked in management at a trucking firm in Springfield. He has a younger brother, Douglas (Doug) Pitt and a younger sister Julie Neal Pitt. At Kickapoo High School, Pitt was involved in sports, debating, student government and school musicals. Pitt attended the University of Missouri, where he majored in journalism with a focus on advertising. He occasionally acted in fraternity shows. He left college two credits short of graduating to move to California. Before he became successful at acting, Pitt supported himself by driving strippers in limos, moving refrigerators and dressing as a giant chicken while working for \"el Pollo Loco.\""
25
+ end
26
+
27
+ it "should not call the superclass methods for overrided methods" do
28
+ @superparser = ImdbCelebrity::Parser::NokogiriParser::NokogiriParser.new("http://www.imdb.com/name/nm0000093/bio")
29
+ @superparser.stub!(:biography).and_return("Demo Biography")
30
+ @celeb_bio_n.biography.should_not == "Demo Biography"
31
+ end
32
+
33
+ end
@@ -0,0 +1,45 @@
1
+ require File.dirname(__FILE__) + "/../../spec_helper.rb"
2
+
3
+ # dummy class created to test module's functionality
4
+ class ParserDummy
5
+ include ImdbCelebrity::Parser::Parser
6
+ def initialize uri
7
+ @page = uri
8
+ end
9
+ protected
10
+ def document
11
+ @document ||= Hpricot(open(URI.encode(@page), "User-Agent" => "ruby"))
12
+ end
13
+ end
14
+
15
+ describe "Parser Spec" do
16
+
17
+ before :all do
18
+ @parser_dummy = ParserDummy.new("http://imdb.com/name/nm0000093/bio")
19
+ end
20
+
21
+ after :all do
22
+ @parser_dummy = nil
23
+ end
24
+
25
+ it "should return the celebrity real_name" do
26
+ @parser_dummy.real_name.should == "William Bradley Pitt"
27
+ end
28
+
29
+ it "should return celebrity biography" do
30
+ @parser_dummy.biography.should == "Brad Pitt was born in 1963 in Oklahoma and raised in Springfield, Missouri. His mother's name is Jane Etta Hillhouse. His father, William (Bill) Pitt, worked in management at a trucking firm in Springfield. He has a younger brother, Douglas (Doug) Pitt and a younger sister Julie Neal Pitt. At Kickapoo High School, Pitt was involved in sports, debating, student government and school musicals. Pitt attended the University of Missouri, where he majored in journalism with a focus on advertising. He occasionally acted in fraternity shows. He left college two credits short of graduating to move to California. Before he became successful at acting, Pitt supported himself by driving strippers in limos, moving refrigerators and dressing as a giant chicken while working for \"el Pollo Loco.\""
31
+ end
32
+
33
+ it "should return celebrity height" do
34
+ @parser_dummy.height.should == "(1.80 m)"
35
+ end
36
+
37
+ it "should return celebrity nationality" do
38
+ @parser_dummy.nationality.should == "Shawnee, Oklahoma, USA"
39
+ end
40
+
41
+ it "should return celebrity name" do
42
+ @parser_dummy.name.should == "Brad Pitt"
43
+ end
44
+
45
+ end
@@ -0,0 +1,28 @@
1
+ require File.dirname(__FILE__) + "/../spec_helper.rb"
2
+
3
+ describe "Search Spec" do
4
+
5
+ before :all do
6
+ @search1 = ImdbCelebrity::Search.new("Brad Pitt")
7
+ @search2 = ImdbCelebrity::Search.new("Brad Pitt", "NokogiriParser")
8
+ end
9
+
10
+ after :all do
11
+ @search1 = nil
12
+ @search2 = nil
13
+ end
14
+
15
+ it "should use hpricot as defualt parsing class" do
16
+ @search1.parser.class.should == ImdbCelebrity::Parser::HpricotParser::SearchParser
17
+ end
18
+
19
+ it "should use the parsing class which we want" do
20
+ @search2.parser.class.should == ImdbCelebrity::Parser::NokogiriParser::SearchParser
21
+ end
22
+
23
+ it "should return the celbrities list with given search string" do
24
+ @search1.celebrities.class.should == Array
25
+ @search1.celebrities.should_not == []
26
+ end
27
+
28
+ end
@@ -0,0 +1,31 @@
1
+ require File.dirname(__FILE__) + "/../spec_helper.rb"
2
+
3
+ class String
4
+ include ImdbCelebrity::StringExtensions
5
+ end
6
+
7
+ describe "String Extensions Spec" do
8
+
9
+ before :all do
10
+ @dummy = "<small>(Actor, <a href='/title/tt0114369/'>Se7en</a> (1995))</small>"
11
+ end
12
+
13
+ after :all do
14
+ @dummy = nil
15
+ end
16
+
17
+ it "should unescape the html from a string literal" do
18
+ @dummy.imdb_unescape_html.should == "<small>(Actor, <a href='/title/tt0114369/'>Se7en</a> (1995))</small>"
19
+ end
20
+
21
+ it "should strip the html tag from the string" do
22
+ @dummy.imdb_strip_tags.should == "(Actor, Se7en (1995))"
23
+ end
24
+
25
+ it "should strip out the white space and check whether string is empty or not" do
26
+ @dummy.blank?.should == false
27
+ demo = " "
28
+ demo.blank?.should == true
29
+ end
30
+
31
+ end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'imdb_celebrity/cli'
3
+
4
+ describe ImdbCelebrity::CLI, "execute" do
5
+ before(:each) do
6
+ @stdout_io = StringIO.new
7
+ ImdbCelebrity::CLI.execute(@stdout_io, [])
8
+ @stdout_io.rewind
9
+ @stdout = @stdout_io.read
10
+ end
11
+
12
+ it "should print default output" do
13
+ @stdout.should =~ /To update this executable/
14
+ end
15
+ end
@@ -0,0 +1,11 @@
1
+ require File.dirname(__FILE__) + '/spec_helper.rb'
2
+
3
+ # Time to add your specs!
4
+ # http://rspec.info/
5
+ describe "Place your specs here" do
6
+
7
+ it "should return the correct version of the gem" do
8
+ ImdbCelebrity::VERSION.should == "0.0.1"
9
+ end
10
+
11
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --colour
@@ -0,0 +1,19 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ gem 'rspec'
6
+ require 'spec'
7
+ end
8
+
9
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
10
+ require 'imdb_celebrity'
11
+
12
+ Spec::Runner.configure do |config|
13
+ config.before(:all){
14
+
15
+ }
16
+ config.after(:all){
17
+
18
+ }
19
+ end
data/tasks/rspec.rake ADDED
@@ -0,0 +1,21 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
5
+ require 'spec'
6
+ end
7
+ begin
8
+ require 'spec/rake/spectask'
9
+ rescue LoadError
10
+ puts <<-EOS
11
+ To use rspec for testing you must install rspec gem:
12
+ gem install rspec
13
+ EOS
14
+ exit(0)
15
+ end
16
+
17
+ desc "Run the specs under spec/models"
18
+ Spec::Rake::SpecTask.new do |t|
19
+ t.spec_opts = ['--options', "spec/spec.opts"]
20
+ t.spec_files = FileList['spec/**/*_spec.rb']
21
+ end
metadata ADDED
@@ -0,0 +1,129 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: imdb_celebrity
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Sandeep Kumar
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-10-04 00:00:00 +05:30
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ - !ruby/object:Gem::Dependency
36
+ name: hpricot
37
+ prerelease: false
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 3
44
+ segments:
45
+ - 0
46
+ version: "0"
47
+ type: :runtime
48
+ version_requirements: *id002
49
+ description: Imdb_celebrity is a ruby-gem which is used to scrap celebrity pages from imdb.com
50
+ email: sandeep@sapnasolutions.com
51
+ executables:
52
+ - imdb_celebrity
53
+ extensions: []
54
+
55
+ extra_rdoc_files: []
56
+
57
+ files:
58
+ - bin/imdb_celebrity
59
+ - History.txt
60
+ - imdb_celebrity.gemspec
61
+ - lib/imdb_celebrity/celebrity.rb
62
+ - lib/imdb_celebrity/cli.rb
63
+ - lib/imdb_celebrity/parser/hpricot_parser/celebrity_bio_parser.rb
64
+ - lib/imdb_celebrity/parser/hpricot_parser/hpricot_parser.rb
65
+ - lib/imdb_celebrity/parser/hpricot_parser/search_parser.rb
66
+ - lib/imdb_celebrity/parser/nokogiri_parser/celebrity_bio_parser.rb
67
+ - lib/imdb_celebrity/parser/nokogiri_parser/nokogiri_parser.rb
68
+ - lib/imdb_celebrity/parser/nokogiri_parser/search_parser.rb
69
+ - lib/imdb_celebrity/parser/parser.rb
70
+ - lib/imdb_celebrity/search.rb
71
+ - lib/imdb_celebrity/string_extensions.rb
72
+ - lib/imdb_celebrity.rb
73
+ - Manifest.txt
74
+ - PostInstall.txt
75
+ - Rakefile
76
+ - README.rdoc
77
+ - script/console
78
+ - script/destroy
79
+ - script/generate
80
+ - spec/imdb_celebrity/celebrity_spec.rb
81
+ - spec/imdb_celebrity/parser/hpricot_parser/celebrity_bio_parser_spec.rb
82
+ - spec/imdb_celebrity/parser/nokogiri_parser/celebrity_bio_parser_spec.rb
83
+ - spec/imdb_celebrity/parser/parser_spec.rb
84
+ - spec/imdb_celebrity/search_spec.rb
85
+ - spec/imdb_celebrity/string_extensions_spec.rb
86
+ - spec/imdb_celebrity_cli_spec.rb
87
+ - spec/imdb_celebrity_spec.rb
88
+ - spec/spec.opts
89
+ - spec/spec_helper.rb
90
+ - tasks/rspec.rake
91
+ has_rdoc: true
92
+ homepage: http://github.com/sandeepkrao/imdb_celebrity
93
+ licenses: []
94
+
95
+ post_install_message:
96
+ rdoc_options: []
97
+
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ hash: 3
106
+ segments:
107
+ - 0
108
+ version: "0"
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ none: false
111
+ requirements:
112
+ - - ">="
113
+ - !ruby/object:Gem::Version
114
+ hash: 3
115
+ segments:
116
+ - 0
117
+ version: "0"
118
+ requirements: []
119
+
120
+ rubyforge_project:
121
+ rubygems_version: 1.3.7
122
+ signing_key:
123
+ specification_version: 3
124
+ summary: Imdb_celebrity is a ruby-gem which is used to scrap celebrity pages from imdb.com
125
+ test_files:
126
+ - spec/imdb_celebrity_cli_spec.rb
127
+ - spec/imdb_celebrity_spec.rb
128
+ - spec/spec.opts
129
+ - spec/spec_helper.rb