imdb_og 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Jon Maddox
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.textile ADDED
@@ -0,0 +1,24 @@
1
+ h1. IMDB!
2
+
3
+ h2. What?
4
+
5
+ A simple ruby library to scrape IMDB
6
+
7
+ h2. How?
8
+
9
+ <pre>
10
+ <code>
11
+ movie = Imdb.find_movie_by_id('tt1099212')
12
+ movie.title
13
+ </code>
14
+ => "Twilight"
15
+
16
+ </pre>
17
+
18
+ It does more of course, just look at the source.
19
+
20
+ h2. Installation
21
+
22
+ gem install imdb
23
+
24
+ gem hosted on Gemcutter
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "imdb_og"
8
+ gem.summary = %Q{Simple library to look up movies on IMDB}
9
+ gem.description = %Q{Simple library to look up movies on IMDB}
10
+ gem.email = "jon@mustacheinc.com"
11
+ gem.homepage = "http://github.com/maddox/imdb"
12
+ gem.authors = ["Jon Maddox"]
13
+ gem.add_development_dependency "thoughtbot-shoulda"
14
+ gem.add_dependency "htmlentities"
15
+ gem.add_dependency "hpricot"
16
+ end
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "imdb_gem #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.0
data/imdb.gemspec ADDED
@@ -0,0 +1,62 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{imdb}
8
+ s.version = "0.5.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Jon Maddox"]
12
+ s.date = %q{2009-10-10}
13
+ s.description = %q{Simple library to look up movies on IMDB}
14
+ s.email = %q{jon@mustacheinc.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.textile"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.textile",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "lib/imdb.rb",
26
+ "lib/imdb/imdb.rb",
27
+ "lib/imdb/imdb_company.rb",
28
+ "lib/imdb/imdb_genre.rb",
29
+ "lib/imdb/imdb_movie.rb",
30
+ "lib/imdb/imdb_name.rb",
31
+ "test/imdb_test.rb",
32
+ "test/test_helper.rb"
33
+ ]
34
+ s.homepage = %q{http://github.com/maddox/imdb}
35
+ s.rdoc_options = ["--charset=UTF-8"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.3.5}
38
+ s.summary = %q{Simple library to look up movies on IMDB}
39
+ s.test_files = [
40
+ "test/imdb_test.rb",
41
+ "test/test_helper.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
49
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
50
+ s.add_runtime_dependency(%q<htmlentities>, [">= 0"])
51
+ s.add_runtime_dependency(%q<hpricot>, [">= 0"])
52
+ else
53
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ s.add_dependency(%q<htmlentities>, [">= 0"])
55
+ s.add_dependency(%q<hpricot>, [">= 0"])
56
+ end
57
+ else
58
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
59
+ s.add_dependency(%q<htmlentities>, [">= 0"])
60
+ s.add_dependency(%q<hpricot>, [">= 0"])
61
+ end
62
+ end
data/lib/imdb/imdb.rb ADDED
@@ -0,0 +1,127 @@
1
+ class Imdb
2
+
3
+ IMDB_MOVIE_BASE_URL = "http://www.imdb.com/title/"
4
+ IMDB_NAME_BASE_URL = "http://www.imdb.com/name/"
5
+ IMDB_COMPANY_BASE_URL = "http://www.imdb.com/company/"
6
+ IMDB_GENRE_BASE_URL = "http://www.imdb.com/Sections/Genres/"
7
+ IMDB_SEARCH_BASE_URL = "http://imdb.com/find?s=all&q="
8
+
9
+ def self.find_movie_by_id(id)
10
+ coder = HTMLEntities.new
11
+
12
+ data = Hpricot(open(IMDB_MOVIE_BASE_URL + id))
13
+
14
+ movie = ImdbMovie.new
15
+
16
+ movie.imdb_id = id
17
+ movie.title = coder.decode(data.at("meta[@name='title']")['content'].gsub(/\((\d{4}(\/[^)]*)?|[A-Z]+)\)/,'').strip)
18
+
19
+ rating_text = (data/"div.rating/div.meta/b").inner_text
20
+ if rating_text =~ /([\d\.]+)\/10/
21
+ movie.rating = $1
22
+ end
23
+
24
+ begin
25
+ movie.poster_url = data.at("div.photo/a[@name='poster']/img")['src']
26
+ rescue
27
+ movie.poster_url = nil
28
+ end
29
+
30
+ infos = (data/"div.info")
31
+ infos.each do |info|
32
+ info_title = (info/"h5").inner_text
33
+ case info_title
34
+ when /Directors?:/
35
+ movie.directors = parse_names(info)
36
+ when /Writers?[^:]+:/
37
+ movie.writers = parse_names(info)
38
+ when /Company:/
39
+ movie.company = parse_company(info)
40
+ when "Tagline:"
41
+ movie.tagline = coder.decode(parse_info(info).strip)
42
+ when "Runtime:"
43
+ movie.runtime = parse_info(info).strip
44
+ if (movie.runtime)
45
+ movie.runtime.gsub!(/^[^:]+:\s*/, '')
46
+ movie.runtime.gsub!(/min .*/, 'min')
47
+ end
48
+ when "Plot:"
49
+ movie.plot = parse_info(info).strip
50
+ movie.plot = movie.plot.gsub(/\s*\|\s*add synopsis$/, '')
51
+ movie.plot = movie.plot.gsub(/\s*\|\s*full synopsis$/, '')
52
+ movie.plot = movie.plot.gsub(/\s*\|\s*add summary$/, '')
53
+ movie.plot = movie.plot.gsub(/full summary$/, '')
54
+ movie.plot = movie.plot.gsub(/more$/, '')
55
+ movie.plot = coder.decode(movie.plot.strip)
56
+ when "Genre:"
57
+ movie.genres = parse_genres(info)
58
+ when "Release Date:"
59
+ begin
60
+ if (parse_info(info).strip =~ /(\d{1,2}) ([a-zA-Z]+) (\d{4})/)
61
+ movie.release_date = Date.parse("#{$2} #{$1}, #{$3}")
62
+ end
63
+ rescue
64
+ movie.release_date = nil
65
+ end
66
+ when "Certification:"
67
+ begin
68
+ movie.certification = (info/"a").map { |v| v.inner_html }.select { |v| v =~ /^USA:/ && v !~ /Unrated/ }.map { |v| v[/^USA:/]=''; v.strip }.first
69
+ end
70
+ end
71
+ end
72
+
73
+ cast = (data/"table.cast"/"tr")
74
+ cast.each do |cast_member|
75
+ actor_a = (cast_member/"td.nm").inner_html
76
+ actor_a =~ /name\/([^"]+)\//
77
+ actor_id = $1
78
+ actor_name = coder.decode((cast_member/"td.nm"/"a").inner_text)
79
+ actor_role = coder.decode((cast_member/"td.char").inner_text)
80
+ movie.actors = movie.actors << ImdbName.new(actor_id, actor_name, actor_role)
81
+ end
82
+
83
+ movie # return movie
84
+
85
+ end
86
+
87
+
88
+ protected
89
+
90
+ def self.parse_info(info)
91
+ value = info.inner_text.gsub(/\n/,'')
92
+ if value =~ /\:(.+)/
93
+ value = $1
94
+ end
95
+ value.gsub(/ more$/, '')
96
+ end
97
+
98
+ def self.parse_names(info)
99
+ coder = HTMLEntities.new
100
+
101
+ # <a href="/name/nm0083348/">Brad Bird</a><br/><a href="/name/nm0684342/">Jan Pinkava</a> (co-director)<br/>N
102
+ info.inner_html.scan(/<a href="\/name\/([^"]+)\/"[^>]*>([^<]+)<\/a>( \(([^)]+)\))?/).map do |match|
103
+ ImdbName.new(coder.decode(match[0]), coder.decode(match[1]), coder.decode(match[3]))
104
+ end
105
+ end
106
+
107
+ def self.parse_company(info)
108
+ coder = HTMLEntities.new
109
+ # <a href="/company/co0017902/">Pixar Animation Studios</a>
110
+ match = info.inner_html =~ /<a href="\/company\/([^"]+)\/">([^<]+)<\/a>/
111
+ ImdbCompany.new(coder.decode($1), coder.decode($2))
112
+ end
113
+
114
+ def self.parse_genres(info)
115
+ coder = HTMLEntities.new
116
+ # <a href="/Sections/Genres/Animation/">Animation</a> / <a href="/Sections/Genres/Adventure/">Adventure</a>
117
+ genre_links = (info/"a").find_all do |link|
118
+ link['href'] =~ /^\/Sections\/Genres/
119
+ end
120
+ genre_links.map do |link|
121
+ genre = link['href'] =~ /([^\/]+)\/$/
122
+ ImdbGenre.new(coder.decode($1), coder.decode($1))
123
+ end
124
+ end
125
+
126
+
127
+ end
@@ -0,0 +1,8 @@
1
+ class ImdbCompany
2
+ attr_accessor :imdb_id, :name
3
+
4
+ def initialize(imdb_id, name)
5
+ self.imdb_id = imdb_id;
6
+ self.name = name;
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ class ImdbGenre
2
+ attr_accessor :imdb_id, :name
3
+
4
+ def initialize(imdb_id, name)
5
+ self.imdb_id = imdb_id;
6
+ self.name = name;
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ class ImdbMovie
2
+ attr_accessor :imdb_id, :title, :directors, :writers, :tagline, :company, :plot, :runtime, :rating, :poster_url, :release_date, :certification, :genres, :actors
3
+
4
+ def writers
5
+ self.instance_variable_get(:@writers) || []
6
+ end
7
+
8
+ def actors
9
+ self.instance_variable_get(:@actors) || []
10
+ end
11
+
12
+ def directors
13
+ self.instance_variable_get(:@directors) || []
14
+ end
15
+
16
+ def genres
17
+ self.instance_variable_get(:@genres) || []
18
+ end
19
+
20
+ end
@@ -0,0 +1,9 @@
1
+ class ImdbName
2
+ attr_accessor :imdb_id, :name, :role
3
+
4
+ def initialize(imdb_id, name, role)
5
+ self.imdb_id = imdb_id;
6
+ self.name = name;
7
+ self.role = role;
8
+ end
9
+ end
data/lib/imdb.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'open-uri'
4
+ require 'date'
5
+ require 'htmlentities'
6
+ require 'imdb/imdb'
7
+ require 'imdb/imdb_company'
8
+ require 'imdb/imdb_movie'
9
+ require 'imdb/imdb_name'
10
+ require 'imdb/imdb_genre'
data/test/imdb_test.rb ADDED
@@ -0,0 +1,126 @@
1
+ require 'test_helper'
2
+
3
+ class ImdbTest < Test::Unit::TestCase
4
+
5
+ context "Imdb" do
6
+ should "have an imdb movie base url" do
7
+ assert_equal "http://www.imdb.com/title/", Imdb::IMDB_MOVIE_BASE_URL
8
+ end
9
+ should "have an imdb search base url" do
10
+ assert_equal "http://imdb.com/find?s=all&q=", Imdb::IMDB_SEARCH_BASE_URL
11
+ end
12
+ end
13
+
14
+ context "ImdbMovie" do
15
+ context "when first created" do
16
+ should "not have an imdb_id" do
17
+ movie = ImdbMovie.new
18
+ assert_nil movie.imdb_id
19
+ end
20
+ end
21
+
22
+ context "after an Imdb.find_by_id returns it" do
23
+ setup do
24
+ @movie = Imdb.find_movie_by_id('tt0382932')
25
+ end
26
+
27
+ should "have an imdb_id" do
28
+ assert_equal 'tt0382932', @movie.imdb_id
29
+ end
30
+
31
+ should "have a title" do
32
+ assert_equal 'Ratatouille', @movie.title
33
+ end
34
+
35
+ should "have a release date" do
36
+ assert_equal Date.new(2007, 06, 29), @movie.release_date
37
+ end
38
+
39
+ should "have a G certification" do
40
+ assert_equal 'G', @movie.certification
41
+ end
42
+
43
+ should "have a company" do
44
+ assert_equal 'co0017902', @movie.company.imdb_id
45
+ assert_equal 'Pixar Animation Studios', @movie.company.name
46
+ end
47
+
48
+ should "have two directors" do
49
+ assert_equal 2, @movie.directors.length
50
+ assert_equal 'nm0083348', @movie.directors[0].imdb_id
51
+ assert_equal 'Brad Bird', @movie.directors[0].name
52
+ assert_equal '', @movie.directors[0].role
53
+
54
+ assert_equal 'nm0684342', @movie.directors[1].imdb_id
55
+ assert_equal 'Jan Pinkava', @movie.directors[1].name
56
+ assert_equal 'co-director', @movie.directors[1].role
57
+ end
58
+
59
+ should "have two writers" do
60
+ assert_equal 2, @movie.writers.length
61
+ assert_equal 'nm0083348', @movie.writers[0].imdb_id
62
+ assert_equal 'Brad Bird', @movie.writers[0].name
63
+ assert_equal 'screenplay', @movie.writers[0].role
64
+
65
+ assert_equal 'nm0684342', @movie.writers[1].imdb_id
66
+ assert_equal 'Jan Pinkava', @movie.writers[1].name
67
+ assert_equal 'story', @movie.writers[1].role
68
+ end
69
+
70
+ should "have 15 actors" do
71
+ assert_equal 15, @movie.actors.length
72
+ assert_equal 'nm0652663', @movie.actors[0].imdb_id
73
+ assert_equal 'Patton Oswalt', @movie.actors[0].name
74
+ assert_equal 'Remy (voice)', @movie.actors[0].role
75
+
76
+ assert_equal 'nm0826039', @movie.actors[14].imdb_id
77
+ assert_equal 'Jake Steinfeld', @movie.actors[14].name
78
+ assert_equal 'Git (Lab Rat) (voice)', @movie.actors[14].role
79
+ end
80
+
81
+ should "have five genres" do
82
+ assert_equal 3, @movie.genres.length
83
+ assert_equal 'Animation', @movie.genres[0].name
84
+ assert_equal 'Comedy', @movie.genres[1].name
85
+ assert_equal 'Family', @movie.genres[2].name
86
+ end
87
+
88
+ should "have a tagline" do
89
+ assert_equal 'Dinner is served... Summer 2007', @movie.tagline
90
+ end
91
+
92
+ should "have a rating" do
93
+ assert_match /\d.\d/, @movie.rating
94
+ end
95
+
96
+ should "have a poster_url" do
97
+ assert_match /http:\/\/.*\.jpg/, @movie.poster_url
98
+ end
99
+
100
+ should "have a runtime" do
101
+ assert_match /\d+ min/, @movie.runtime
102
+ end
103
+
104
+ should "have a plot" do
105
+ assert_equal %{Remy is a young rat in the French countryside who arrives in Paris, only to find out that his cooking idol is dead. When he makes an unusual alliance with a restaurant's new garbage boy, the culinary and personal adventures begin despite Remy's family's skepticism and the rat-hating world of humans.}, @movie.plot
106
+ end
107
+
108
+ should "return an empty array if writers is nil" do
109
+ @movie.writers = nil
110
+ assert_equal [], @movie.writers
111
+ end
112
+
113
+ should "return an empty array if directors is nil" do
114
+ @movie.directors = nil
115
+ assert_equal [], @movie.directors
116
+ end
117
+
118
+ should "return an empty array if genres is nil" do
119
+ @movie.genres = nil
120
+ assert_equal [], @movie.genres
121
+ end
122
+ end
123
+
124
+ end
125
+
126
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'imdb'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,99 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: imdb_og
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Jon Maddox
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-10-10 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: htmlentities
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: hpricot
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: "0"
44
+ version:
45
+ description: Simple library to look up movies on IMDB
46
+ email: jon@mustacheinc.com
47
+ executables: []
48
+
49
+ extensions: []
50
+
51
+ extra_rdoc_files:
52
+ - LICENSE
53
+ - README.textile
54
+ files:
55
+ - .gitignore
56
+ - LICENSE
57
+ - README.textile
58
+ - Rakefile
59
+ - VERSION
60
+ - imdb.gemspec
61
+ - lib/imdb.rb
62
+ - lib/imdb/imdb.rb
63
+ - lib/imdb/imdb_company.rb
64
+ - lib/imdb/imdb_genre.rb
65
+ - lib/imdb/imdb_movie.rb
66
+ - lib/imdb/imdb_name.rb
67
+ - test/imdb_test.rb
68
+ - test/test_helper.rb
69
+ has_rdoc: true
70
+ homepage: http://github.com/maddox/imdb
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options:
75
+ - --charset=UTF-8
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: "0"
83
+ version:
84
+ required_rubygems_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
89
+ version:
90
+ requirements: []
91
+
92
+ rubyforge_project:
93
+ rubygems_version: 1.3.5
94
+ signing_key:
95
+ specification_version: 3
96
+ summary: Simple library to look up movies on IMDB
97
+ test_files:
98
+ - test/imdb_test.rb
99
+ - test/test_helper.rb