imdb_og 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jon Maddox
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.textile ADDED
@@ -0,0 +1,24 @@
1
+ h1. IMDB!
2
+
3
+ h2. What?
4
+
5
+ A simple ruby library to scrape IMDB
6
+
7
+ h2. How?
8
+
9
+ <pre>
10
+ <code>
11
+ movie = Imdb.find_movie_by_id('tt1099212')
12
+ movie.title
13
+ </code>
14
+ => "Twilight"
15
+
16
+ </pre>
17
+
18
+ It does more of course, just look at the source.
19
+
20
+ h2. Installation
21
+
22
+ gem install imdb
23
+
24
+ gem hosted on Gemcutter
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "imdb_og"
8
+ gem.summary = %Q{Simple library to look up movies on IMDB}
9
+ gem.description = %Q{Simple library to look up movies on IMDB}
10
+ gem.email = "jon@mustacheinc.com"
11
+ gem.homepage = "http://github.com/maddox/imdb"
12
+ gem.authors = ["Jon Maddox"]
13
+ gem.add_development_dependency "thoughtbot-shoulda"
14
+ gem.add_dependency "htmlentities"
15
+ gem.add_dependency "hpricot"
16
+ end
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "imdb_gem #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.0
data/imdb.gemspec ADDED
@@ -0,0 +1,62 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{imdb}
8
+ s.version = "0.5.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Jon Maddox"]
12
+ s.date = %q{2009-10-10}
13
+ s.description = %q{Simple library to look up movies on IMDB}
14
+ s.email = %q{jon@mustacheinc.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.textile"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.textile",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "lib/imdb.rb",
26
+ "lib/imdb/imdb.rb",
27
+ "lib/imdb/imdb_company.rb",
28
+ "lib/imdb/imdb_genre.rb",
29
+ "lib/imdb/imdb_movie.rb",
30
+ "lib/imdb/imdb_name.rb",
31
+ "test/imdb_test.rb",
32
+ "test/test_helper.rb"
33
+ ]
34
+ s.homepage = %q{http://github.com/maddox/imdb}
35
+ s.rdoc_options = ["--charset=UTF-8"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.3.5}
38
+ s.summary = %q{Simple library to look up movies on IMDB}
39
+ s.test_files = [
40
+ "test/imdb_test.rb",
41
+ "test/test_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
50
+ s.add_runtime_dependency(%q<htmlentities>, [">= 0"])
51
+ s.add_runtime_dependency(%q<hpricot>, [">= 0"])
52
+ else
53
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ s.add_dependency(%q<htmlentities>, [">= 0"])
55
+ s.add_dependency(%q<hpricot>, [">= 0"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
59
+ s.add_dependency(%q<htmlentities>, [">= 0"])
60
+ s.add_dependency(%q<hpricot>, [">= 0"])
61
+ end
62
+ end
data/lib/imdb/imdb.rb ADDED
@@ -0,0 +1,127 @@
1
+ class Imdb
2
+
3
+ IMDB_MOVIE_BASE_URL = "http://www.imdb.com/title/"
4
+ IMDB_NAME_BASE_URL = "http://www.imdb.com/name/"
5
+ IMDB_COMPANY_BASE_URL = "http://www.imdb.com/company/"
6
+ IMDB_GENRE_BASE_URL = "http://www.imdb.com/Sections/Genres/"
7
+ IMDB_SEARCH_BASE_URL = "http://imdb.com/find?s=all&q="
8
+
9
+ def self.find_movie_by_id(id)
10
+ coder = HTMLEntities.new
11
+
12
+ data = Hpricot(open(IMDB_MOVIE_BASE_URL + id))
13
+
14
+ movie = ImdbMovie.new
15
+
16
+ movie.imdb_id = id
17
+ movie.title = coder.decode(data.at("meta[@name='title']")['content'].gsub(/\((\d{4}(\/[^)]*)?|[A-Z]+)\)/,'').strip)
18
+
19
+ rating_text = (data/"div.rating/div.meta/b").inner_text
20
+ if rating_text =~ /([\d\.]+)\/10/
21
+ movie.rating = $1
22
+ end
23
+
24
+ begin
25
+ movie.poster_url = data.at("div.photo/a[@name='poster']/img")['src']
26
+ rescue
27
+ movie.poster_url = nil
28
+ end
29
+
30
+ infos = (data/"div.info")
31
+ infos.each do |info|
32
+ info_title = (info/"h5").inner_text
33
+ case info_title
34
+ when /Directors?:/
35
+ movie.directors = parse_names(info)
36
+ when /Writers?[^:]+:/
37
+ movie.writers = parse_names(info)
38
+ when /Company:/
39
+ movie.company = parse_company(info)
40
+ when "Tagline:"
41
+ movie.tagline = coder.decode(parse_info(info).strip)
42
+ when "Runtime:"
43
+ movie.runtime = parse_info(info).strip
44
+ if (movie.runtime)
45
+ movie.runtime.gsub!(/^[^:]+:\s*/, '')
46
+ movie.runtime.gsub!(/min .*/, 'min')
47
+ end
48
+ when "Plot:"
49
+ movie.plot = parse_info(info).strip
50
+ movie.plot = movie.plot.gsub(/\s*\|\s*add synopsis$/, '')
51
+ movie.plot = movie.plot.gsub(/\s*\|\s*full synopsis$/, '')
52
+ movie.plot = movie.plot.gsub(/\s*\|\s*add summary$/, '')
53
+ movie.plot = movie.plot.gsub(/full summary$/, '')
54
+ movie.plot = movie.plot.gsub(/more$/, '')
55
+ movie.plot = coder.decode(movie.plot.strip)
56
+ when "Genre:"
57
+ movie.genres = parse_genres(info)
58
+ when "Release Date:"
59
+ begin
60
+ if (parse_info(info).strip =~ /(\d{1,2}) ([a-zA-Z]+) (\d{4})/)
61
+ movie.release_date = Date.parse("#{$2} #{$1}, #{$3}")
62
+ end
63
+ rescue
64
+ movie.release_date = nil
65
+ end
66
+ when "Certification:"
67
+ begin
68
+ movie.certification = (info/"a").map { |v| v.inner_html }.select { |v| v =~ /^USA:/ && v !~ /Unrated/ }.map { |v| v[/^USA:/]=''; v.strip }.first
69
+ end
70
+ end
71
+ end
72
+
73
+ cast = (data/"table.cast"/"tr")
74
+ cast.each do |cast_member|
75
+ actor_a = (cast_member/"td.nm").inner_html
76
+ actor_a =~ /name\/([^"]+)\//
77
+ actor_id = $1
78
+ actor_name = coder.decode((cast_member/"td.nm"/"a").inner_text)
79
+ actor_role = coder.decode((cast_member/"td.char").inner_text)
80
+ movie.actors = movie.actors << ImdbName.new(actor_id, actor_name, actor_role)
81
+ end
82
+
83
+ movie # return movie
84
+
85
+ end
86
+
87
+
88
+ protected
89
+
90
+ def self.parse_info(info)
91
+ value = info.inner_text.gsub(/\n/,'')
92
+ if value =~ /\:(.+)/
93
+ value = $1
94
+ end
95
+ value.gsub(/ more$/, '')
96
+ end
97
+
98
+ def self.parse_names(info)
99
+ coder = HTMLEntities.new
100
+
101
+ # <a href="/name/nm0083348/">Brad Bird</a><br/><a href="/name/nm0684342/">Jan Pinkava</a> (co-director)<br/>N
102
+ info.inner_html.scan(/<a href="\/name\/([^"]+)\/"[^>]*>([^<]+)<\/a>( \(([^)]+)\))?/).map do |match|
103
+ ImdbName.new(coder.decode(match[0]), coder.decode(match[1]), coder.decode(match[3]))
104
+ end
105
+ end
106
+
107
+ def self.parse_company(info)
108
+ coder = HTMLEntities.new
109
+ # <a href="/company/co0017902/">Pixar Animation Studios</a>
110
+ match = info.inner_html =~ /<a href="\/company\/([^"]+)\/">([^<]+)<\/a>/
111
+ ImdbCompany.new(coder.decode($1), coder.decode($2))
112
+ end
113
+
114
+ def self.parse_genres(info)
115
+ coder = HTMLEntities.new
116
+ # <a href="/Sections/Genres/Animation/">Animation</a> / <a href="/Sections/Genres/Adventure/">Adventure</a>
117
+ genre_links = (info/"a").find_all do |link|
118
+ link['href'] =~ /^\/Sections\/Genres/
119
+ end
120
+ genre_links.map do |link|
121
+ genre = link['href'] =~ /([^\/]+)\/$/
122
+ ImdbGenre.new(coder.decode($1), coder.decode($1))
123
+ end
124
+ end
125
+
126
+
127
+ end
@@ -0,0 +1,8 @@
1
+ class ImdbCompany
2
+ attr_accessor :imdb_id, :name
3
+
4
+ def initialize(imdb_id, name)
5
+ self.imdb_id = imdb_id;
6
+ self.name = name;
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ class ImdbGenre
2
+ attr_accessor :imdb_id, :name
3
+
4
+ def initialize(imdb_id, name)
5
+ self.imdb_id = imdb_id;
6
+ self.name = name;
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ class ImdbMovie
2
+ attr_accessor :imdb_id, :title, :directors, :writers, :tagline, :company, :plot, :runtime, :rating, :poster_url, :release_date, :certification, :genres, :actors
3
+
4
+ def writers
5
+ self.instance_variable_get(:@writers) || []
6
+ end
7
+
8
+ def actors
9
+ self.instance_variable_get(:@actors) || []
10
+ end
11
+
12
+ def directors
13
+ self.instance_variable_get(:@directors) || []
14
+ end
15
+
16
+ def genres
17
+ self.instance_variable_get(:@genres) || []
18
+ end
19
+
20
+ end
@@ -0,0 +1,9 @@
1
+ class ImdbName
2
+ attr_accessor :imdb_id, :name, :role
3
+
4
+ def initialize(imdb_id, name, role)
5
+ self.imdb_id = imdb_id;
6
+ self.name = name;
7
+ self.role = role;
8
+ end
9
+ end
data/lib/imdb.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'open-uri'
4
+ require 'date'
5
+ require 'htmlentities'
6
+ require 'imdb/imdb'
7
+ require 'imdb/imdb_company'
8
+ require 'imdb/imdb_movie'
9
+ require 'imdb/imdb_name'
10
+ require 'imdb/imdb_genre'
data/test/imdb_test.rb ADDED
@@ -0,0 +1,126 @@
1
+ require 'test_helper'
2
+
3
+ class ImdbTest < Test::Unit::TestCase
4
+
5
+ context "Imdb" do
6
+ should "have an imdb movie base url" do
7
+ assert_equal "http://www.imdb.com/title/", Imdb::IMDB_MOVIE_BASE_URL
8
+ end
9
+ should "have an imdb search base url" do
10
+ assert_equal "http://imdb.com/find?s=all&q=", Imdb::IMDB_SEARCH_BASE_URL
11
+ end
12
+ end
13
+
14
+ context "ImdbMovie" do
15
+ context "when first created" do
16
+ should "not have an imdb_id" do
17
+ movie = ImdbMovie.new
18
+ assert_nil movie.imdb_id
19
+ end
20
+ end
21
+
22
+ context "after an Imdb.find_by_id returns it" do
23
+ setup do
24
+ @movie = Imdb.find_movie_by_id('tt0382932')
25
+ end
26
+
27
+ should "have an imdb_id" do
28
+ assert_equal 'tt0382932', @movie.imdb_id
29
+ end
30
+
31
+ should "have a title" do
32
+ assert_equal 'Ratatouille', @movie.title
33
+ end
34
+
35
+ should "have a release date" do
36
+ assert_equal Date.new(2007, 06, 29), @movie.release_date
37
+ end
38
+
39
+ should "have a G certification" do
40
+ assert_equal 'G', @movie.certification
41
+ end
42
+
43
+ should "have a company" do
44
+ assert_equal 'co0017902', @movie.company.imdb_id
45
+ assert_equal 'Pixar Animation Studios', @movie.company.name
46
+ end
47
+
48
+ should "have two directors" do
49
+ assert_equal 2, @movie.directors.length
50
+ assert_equal 'nm0083348', @movie.directors[0].imdb_id
51
+ assert_equal 'Brad Bird', @movie.directors[0].name
52
+ assert_equal '', @movie.directors[0].role
53
+
54
+ assert_equal 'nm0684342', @movie.directors[1].imdb_id
55
+ assert_equal 'Jan Pinkava', @movie.directors[1].name
56
+ assert_equal 'co-director', @movie.directors[1].role
57
+ end
58
+
59
+ should "have two writers" do
60
+ assert_equal 2, @movie.writers.length
61
+ assert_equal 'nm0083348', @movie.writers[0].imdb_id
62
+ assert_equal 'Brad Bird', @movie.writers[0].name
63
+ assert_equal 'screenplay', @movie.writers[0].role
64
+
65
+ assert_equal 'nm0684342', @movie.writers[1].imdb_id
66
+ assert_equal 'Jan Pinkava', @movie.writers[1].name
67
+ assert_equal 'story', @movie.writers[1].role
68
+ end
69
+
70
+ should "have 15 actors" do
71
+ assert_equal 15, @movie.actors.length
72
+ assert_equal 'nm0652663', @movie.actors[0].imdb_id
73
+ assert_equal 'Patton Oswalt', @movie.actors[0].name
74
+ assert_equal 'Remy (voice)', @movie.actors[0].role
75
+
76
+ assert_equal 'nm0826039', @movie.actors[14].imdb_id
77
+ assert_equal 'Jake Steinfeld', @movie.actors[14].name
78
+ assert_equal 'Git (Lab Rat) (voice)', @movie.actors[14].role
79
+ end
80
+
81
+ should "have five genres" do
82
+ assert_equal 3, @movie.genres.length
83
+ assert_equal 'Animation', @movie.genres[0].name
84
+ assert_equal 'Comedy', @movie.genres[1].name
85
+ assert_equal 'Family', @movie.genres[2].name
86
+ end
87
+
88
+ should "have a tagline" do
89
+ assert_equal 'Dinner is served... Summer 2007', @movie.tagline
90
+ end
91
+
92
+ should "have a rating" do
93
+ assert_match /\d.\d/, @movie.rating
94
+ end
95
+
96
+ should "have a poster_url" do
97
+ assert_match /http:\/\/.*\.jpg/, @movie.poster_url
98
+ end
99
+
100
+ should "have a runtime" do
101
+ assert_match /\d+ min/, @movie.runtime
102
+ end
103
+
104
+ should "have a plot" do
105
+ assert_equal %{Remy is a young rat in the French countryside who arrives in Paris, only to find out that his cooking idol is dead. When he makes an unusual alliance with a restaurant's new garbage boy, the culinary and personal adventures begin despite Remy's family's skepticism and the rat-hating world of humans.}, @movie.plot
106
+ end
107
+
108
+ should "return an empty array if writers is nil" do
109
+ @movie.writers = nil
110
+ assert_equal [], @movie.writers
111
+ end
112
+
113
+ should "return an empty array if directors is nil" do
114
+ @movie.directors = nil
115
+ assert_equal [], @movie.directors
116
+ end
117
+
118
+ should "return an empty array if genres is nil" do
119
+ @movie.genres = nil
120
+ assert_equal [], @movie.genres
121
+ end
122
+ end
123
+
124
+ end
125
+
126
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'imdb'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: imdb_og
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Jon Maddox
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-10 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: htmlentities
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hpricot
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: Simple library to look up movies on IMDB
46
+ email: jon@mustacheinc.com
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - LICENSE
53
+ - README.textile
54
+ files:
55
+ - .gitignore
56
+ - LICENSE
57
+ - README.textile
58
+ - Rakefile
59
+ - VERSION
60
+ - imdb.gemspec
61
+ - lib/imdb.rb
62
+ - lib/imdb/imdb.rb
63
+ - lib/imdb/imdb_company.rb
64
+ - lib/imdb/imdb_genre.rb
65
+ - lib/imdb/imdb_movie.rb
66
+ - lib/imdb/imdb_name.rb
67
+ - test/imdb_test.rb
68
+ - test/test_helper.rb
69
+ has_rdoc: true
70
+ homepage: http://github.com/maddox/imdb
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options:
75
+ - --charset=UTF-8
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: "0"
83
+ version:
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
89
+ version:
90
+ requirements: []
91
+
92
+ rubyforge_project:
93
+ rubygems_version: 1.3.5
94
+ signing_key:
95
+ specification_version: 3
96
+ summary: Simple library to look up movies on IMDB
97
+ test_files:
98
+ - test/imdb_test.rb
99
+ - test/test_helper.rb