myimdb 0.3.13 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/myimdb +6 -6
- data/bin/myimdb-catalogue +3 -3
- data/lib/myimdb.rb +14 -7
- data/lib/myimdb/scraper/base.rb +18 -39
- data/lib/myimdb/scraper/imdb.rb +14 -14
- data/lib/myimdb/scraper/metacritic.rb +9 -7
- data/lib/myimdb/scraper/rotten_tomatoes.rb +6 -6
- data/lib/myimdb/scraper/string_extensions.rb +5 -30
- data/lib/myimdb/search.rb +3 -1
- data/lib/myimdb/search/base.rb +14 -0
- data/lib/myimdb/search/bing.rb +42 -0
- data/lib/myimdb/search/google.rb +32 -20
- data/myimdb.gemspec +4 -2
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/bin/myimdb
CHANGED
@@ -48,13 +48,13 @@ def details(klass_name, name)
|
|
48
48
|
search_scope = "#{klass_name.downcase}.com"
|
49
49
|
|
50
50
|
search_result = Myimdb::Search::Google.search_text(name, :restrict_to=> search_scope)[0]
|
51
|
-
site = "Myimdb::Scraper::#{klass_name}".
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
site = eval("Myimdb::Scraper::#{klass_name}").new(search_result[:url])
|
52
|
+
puts "===================================================="
|
53
|
+
puts "#{klass_name} details for: #{name}"
|
54
|
+
puts "===================================================="
|
55
|
+
puts "#{site.summary}\n"
|
56
56
|
rescue Exception=> ex
|
57
|
-
|
57
|
+
puts "Unable to fetch #{klass_name} details for: #{name} because: #{ex.message}"
|
58
58
|
end
|
59
59
|
|
60
60
|
['Imdb', 'RottenTomatoes', 'Metacritic'].each do |site|
|
data/bin/myimdb-catalogue
CHANGED
@@ -118,7 +118,7 @@ def repair_permissions_for(target_dir)
|
|
118
118
|
`attrib -r "#{target_dir}"`
|
119
119
|
`attrib +r "#{target_dir}"`
|
120
120
|
else
|
121
|
-
p 'Skipping permission repair;
|
121
|
+
p 'Skipping permission repair; Only works on Windows.'
|
122
122
|
end
|
123
123
|
end
|
124
124
|
|
@@ -128,7 +128,7 @@ end
|
|
128
128
|
|
129
129
|
def save_image(target_dir, name, image_index=0)
|
130
130
|
image_data = Myimdb::Search::Google.search_images(name, :size=> 'medium')[image_index]
|
131
|
-
image_url = image_data[
|
131
|
+
image_url = image_data[:url] if image_data
|
132
132
|
|
133
133
|
if image_url
|
134
134
|
image_file_path = File.join(target_dir, 'movie.jpg')
|
@@ -148,7 +148,7 @@ def generate_metadata(path, name)
|
|
148
148
|
search_scope = "imdb.com"
|
149
149
|
|
150
150
|
search_result = Myimdb::Search::Google.search_text(name, :restrict_to=> search_scope)[0]
|
151
|
-
imdb = Myimdb::Scraper::Imdb.new(search_result[
|
151
|
+
imdb = Myimdb::Scraper::Imdb.new(search_result[:url])
|
152
152
|
new_name = name.gsub(/\[\S+\]/, "").strip
|
153
153
|
new_name << " [#{imdb.year}] [#{imdb.rating},#{imdb.votes}] [#{imdb.directors.join(',')}]"
|
154
154
|
puts "Renaming: #{name} to: #{new_name}"
|
data/lib/myimdb.rb
CHANGED
@@ -1,7 +1,14 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'httparty'
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'open-uri'
|
5
|
-
|
6
|
-
require 'myimdb/search'
|
7
|
-
require 'myimdb/scraper'
|
1
|
+
require 'rubygems'
|
2
|
+
require 'httparty'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
require 'myimdb/search'
|
7
|
+
require 'myimdb/scraper'
|
8
|
+
|
9
|
+
class ImdbMovie
|
10
|
+
def self.search(movie_name)
|
11
|
+
search_result = Myimdb::Search::Google.search_text(movie_name, :restrict_to=> 'imdb.com')[0]
|
12
|
+
Myimdb::Scraper::Imdb.new(search_result[:url]) if search_result
|
13
|
+
end
|
14
|
+
end
|
data/lib/myimdb/scraper/base.rb
CHANGED
@@ -5,10 +5,10 @@ module HandleExceptions
|
|
5
5
|
base.send(:include, InstanceMethods)
|
6
6
|
base.send(:extend, ClassMethods)
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
module InstanceMethods
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
module ClassMethods
|
13
13
|
def handle_exceptions_for(*method_names)
|
14
14
|
method_names.each do |method_name|
|
@@ -29,47 +29,26 @@ module Myimdb
|
|
29
29
|
module Scraper
|
30
30
|
class Base
|
31
31
|
include HandleExceptions
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
def directors_with_url
|
36
|
-
end
|
37
|
-
|
38
|
-
def
|
39
|
-
end
|
40
|
-
|
41
|
-
def
|
42
|
-
end
|
43
|
-
|
44
|
-
def
|
45
|
-
end
|
46
|
-
|
47
|
-
def votes
|
48
|
-
end
|
49
|
-
|
50
|
-
def genres
|
51
|
-
end
|
52
|
-
|
53
|
-
def tagline
|
54
|
-
end
|
55
|
-
|
56
|
-
def plot
|
57
|
-
end
|
58
|
-
|
59
|
-
def year
|
60
|
-
end
|
61
|
-
|
62
|
-
def release_date
|
63
|
-
end
|
64
|
-
|
65
|
-
def image
|
66
|
-
end
|
67
|
-
|
32
|
+
include Myimdb::Scraper::StringExtensions
|
33
|
+
|
34
|
+
def directors; end
|
35
|
+
def directors_with_url; end
|
36
|
+
def writers; end
|
37
|
+
def writers_with_url; end
|
38
|
+
def rating; end
|
39
|
+
def votes; end
|
40
|
+
def genres; end
|
41
|
+
def tagline; end
|
42
|
+
def plot; end
|
43
|
+
def year; end
|
44
|
+
def release_date; end
|
45
|
+
def image; end
|
46
|
+
|
68
47
|
def summary
|
69
48
|
[:directors, :writers, :rating, :votes, :genres, :tagline, :plot, :year, :release_date].collect do |meth|
|
70
49
|
data = send(meth)
|
71
50
|
data = data.join(", ") if Array === data
|
72
|
-
sprintf("%-15s : %s", meth.to_s.
|
51
|
+
sprintf("%-15s : %s", meth.to_s.capitalize, data)
|
73
52
|
end.join("\n")
|
74
53
|
end
|
75
54
|
|
data/lib/myimdb/scraper/imdb.rb
CHANGED
@@ -4,51 +4,51 @@ module Myimdb
|
|
4
4
|
def initialize(url)
|
5
5
|
@url = url
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
def directors
|
9
9
|
document.css('.info h5:contains("Director") + .info-content a:not(.tn15more)').collect{ |a| a.text }
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def directors_with_url
|
13
13
|
document.css('.info h5:contains("Director") + .info-content a:not(.tn15more)').collect{ |a| {:name=> a.text, :url=> "http://www.imdb.com#{a['href']}" } }
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def writers
|
17
17
|
document.css('.info h5:contains("Writer") + .info-content a:not(.tn15more)').collect{ |a| a.text }
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def writers_with_url
|
21
21
|
document.css('.info h5:contains("Writer") + .info-content a:not(.tn15more)').collect{ |a| {:name=> a.text, :url=> "http://www.imdb.com#{a['href']}" } }
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def rating
|
25
25
|
document.css(".starbar-meta b").inner_text.strip.split('/').first.to_f
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
def votes
|
29
29
|
document.css(".starbar-meta a").inner_text.strip.split(' ').first.sub(',', '').to_i
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
def genres
|
33
33
|
document.css('.info h5:contains("Genre:") + .info-content a:not(.tn15more)').collect{ |a| a.text }
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
def tagline
|
37
37
|
document.css('.info h5:contains("Tagline:") + .info-content').children[0].text.strip rescue nil
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def plot
|
41
41
|
document.css('.info h5:contains("Plot:") + .info-content').children[0].text.strip
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
def year
|
45
45
|
document.css("div#tn15title a:first")[0].text.to_i
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
def release_date
|
49
49
|
Date.parse(document.css('.info h5:contains("Release Date:") + .info-content').inner_text)
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
def image
|
53
53
|
image_url = document.css(".photo:first a").first['href']
|
54
54
|
unless image_url.nil? or image_url =~ /addposter/
|
@@ -56,12 +56,12 @@ module Myimdb
|
|
56
56
|
image_document.css('#principal img:first').first['src']
|
57
57
|
end
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
private
|
61
61
|
def document
|
62
62
|
@document ||= Nokogiri::HTML(open(@url))
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
handle_exceptions_for :directors, :directors_with_url, :writers, :writers_with_url, :rating, :votes, :genres, :tagline, :plot, :year, :image
|
66
66
|
end
|
67
67
|
end
|
@@ -4,28 +4,30 @@ module Myimdb
|
|
4
4
|
def initialize(url)
|
5
5
|
@url = url
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
def rating
|
9
9
|
document.css("#metascore").inner_text.strip.to_f/10
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def votes
|
13
13
|
document.at("a[@href='#critics']").inner_text.strip.to_i
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def genres
|
17
|
-
document.css("#productinfo p:first").text.gsub(/^\S+:/, '').split("|").
|
17
|
+
document.css("#productinfo p:first").text.gsub(/^\S+:/, '').split("|").collect do |genre|
|
18
|
+
strip_useless_chars(genre)
|
19
|
+
end
|
18
20
|
end
|
19
|
-
|
21
|
+
|
20
22
|
def plot
|
21
23
|
document.css("#productsummary .summarytext").inner_text.strip
|
22
24
|
end
|
23
|
-
|
25
|
+
|
24
26
|
private
|
25
27
|
def document
|
26
28
|
@document ||= Nokogiri::HTML(open(@url))
|
27
29
|
end
|
28
|
-
|
30
|
+
|
29
31
|
handle_exceptions_for :rating, :votes, :genres, :plot
|
30
32
|
end
|
31
33
|
end
|
@@ -8,24 +8,24 @@ module Myimdb
|
|
8
8
|
def rating
|
9
9
|
document.css("#tomatometer_data p:nth-child(4) span").inner_text.strip.to_i
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def votes
|
13
13
|
document.css("#tomatometer_data p:nth-child(1) span").inner_text.strip.to_i
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def genres
|
17
|
-
document.css("#movie_stats .fl:first p:last .content a").inner_text.
|
17
|
+
document.css("#movie_stats .fl:first p:last .content a").inner_text.strip.to_a
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def plot
|
21
21
|
document.css("#movie_synopsis_all").inner_text.strip
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
private
|
25
25
|
def document
|
26
26
|
@document ||= Nokogiri::HTML(open(@url))
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
handle_exceptions_for :rating, :votes, :genres, :plot
|
30
30
|
end
|
31
31
|
end
|
@@ -4,38 +4,13 @@ require 'iconv'
|
|
4
4
|
module Myimdb
|
5
5
|
module Scraper
|
6
6
|
module StringExtensions
|
7
|
-
def scraper_unescape_html
|
8
|
-
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(
|
7
|
+
def scraper_unescape_html(string)
|
8
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(string))
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
12
|
-
gsub(
|
13
|
-
end
|
14
|
-
|
15
|
-
def strip_useless_chars
|
16
|
-
gsub(/[^a-zA-z0-9\|\-_\(\)@$\/\\]/, '')
|
17
|
-
end
|
18
|
-
|
19
|
-
def titleize
|
20
|
-
humanize.gsub(/\b('?[a-z])/) { $1.capitalize }
|
21
|
-
end
|
22
|
-
|
23
|
-
def humanize
|
24
|
-
gsub(/_/, " ").capitalize
|
25
|
-
end
|
26
|
-
|
27
|
-
def constantize
|
28
|
-
names = self.split('::')
|
29
|
-
names.shift if names.empty? || names.first.empty?
|
30
|
-
|
31
|
-
constant = Object
|
32
|
-
names.each do |name|
|
33
|
-
constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
|
34
|
-
end
|
35
|
-
constant
|
11
|
+
def strip_useless_chars(string)
|
12
|
+
string.gsub(/[^a-zA-z0-9\|\-_\(\)@$\/\\]/, '')
|
36
13
|
end
|
37
14
|
end
|
38
15
|
end
|
39
|
-
end
|
40
|
-
|
41
|
-
String.send :include, Myimdb::Scraper::StringExtensions
|
16
|
+
end
|
data/lib/myimdb/search.rb
CHANGED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Myimdb
|
2
|
+
module Search
|
3
|
+
class Base
|
4
|
+
class << self
|
5
|
+
def spell_movie( text, options={} )
|
6
|
+
search_results = search_text(text, :restrict_to=> 'imdb.com')
|
7
|
+
search_results and search_results.collect do |search_result|
|
8
|
+
search_result[:title].gsub(/ \(.*$/, "")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Myimdb
|
2
|
+
module Search
|
3
|
+
class Bing < Base
|
4
|
+
include HTTParty
|
5
|
+
format :json
|
6
|
+
headers 'Content-Type' => 'application/json'
|
7
|
+
|
8
|
+
AppKey = '36C1CEF363A00C6536C4420D356B5E507C4C2AF1'
|
9
|
+
base_uri 'api.search.live.net'
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def search_text( text, options={} )
|
13
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
14
|
+
response = get( '/json.aspx', :query=> {:Appid=> AppKey, :query=> text, :sources=> 'web'} )
|
15
|
+
parse_search_result(response, 'Web')
|
16
|
+
end
|
17
|
+
|
18
|
+
def search_images( text, options={} )
|
19
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
20
|
+
response = get( '/json.aspx', :query=> {:Appid=> AppKey, :query=> text, :sources=> 'image'} )
|
21
|
+
parse_search_result(response, 'Image')
|
22
|
+
end
|
23
|
+
|
24
|
+
def spell( text, options={} )
|
25
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
26
|
+
response = get( '/json.aspx', :query=> {:Appid=> AppKey, :query=> text, :sources=> 'spell'} )
|
27
|
+
parse_search_result(response, 'Spell')
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse_search_result( response, type )
|
32
|
+
response['SearchResponse'][type]['Results'].collect do |response_element|
|
33
|
+
{
|
34
|
+
:url => response_element['Url'],
|
35
|
+
:title => response_element['Title']
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/myimdb/search/google.rb
CHANGED
@@ -1,29 +1,41 @@
|
|
1
1
|
module Myimdb
|
2
2
|
module Search
|
3
|
-
class Google
|
3
|
+
class Google < Base
|
4
4
|
include HTTParty
|
5
|
+
format :json
|
6
|
+
headers 'Content-Type' => 'application/json'
|
5
7
|
|
6
|
-
format :json
|
7
|
-
headers 'Content-Type' => 'application/json'
|
8
8
|
base_uri 'ajax.googleapis.com'
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def search_text( text, options={} )
|
12
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
13
|
+
response = get( '/ajax/services/search/web', :query=> {:v=> '1.0', :q=> text} )
|
14
|
+
parse_search_result( response )
|
15
|
+
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
'
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
17
|
+
def search_images( text, options={} )
|
18
|
+
sizes = {
|
19
|
+
'large' => 'l',
|
20
|
+
'medium' => 'm',
|
21
|
+
'small' => 'i'
|
22
|
+
}
|
23
|
+
search_options = { :v=> '1.0', :q=> text }
|
24
|
+
search_options.merge!(:imgsz=> sizes[options[:size].to_s]) if !options[:size].blank?
|
25
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
26
|
+
response = get( '/ajax/services/search/images', :query=> search_options )
|
27
|
+
parse_search_result( response )
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse_search_result( response )
|
32
|
+
response['responseData'] and response['responseData']['results'].collect do |response_element|
|
33
|
+
{
|
34
|
+
:url => response_element['url'],
|
35
|
+
:title => response_element['titleNoFormatting']
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
27
39
|
end
|
28
40
|
end
|
29
41
|
end
|
data/myimdb.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{myimdb}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Gaurav"]
|
12
|
-
s.date = %q{2010-02-
|
12
|
+
s.date = %q{2010-02-22}
|
13
13
|
s.email = %q{gaurav@vinsol.com}
|
14
14
|
s.executables = ["myimdb", "myimdb-catalogue"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -31,6 +31,8 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/myimdb/scraper/rotten_tomatoes.rb",
|
32
32
|
"lib/myimdb/scraper/string_extensions.rb",
|
33
33
|
"lib/myimdb/search.rb",
|
34
|
+
"lib/myimdb/search/base.rb",
|
35
|
+
"lib/myimdb/search/bing.rb",
|
34
36
|
"lib/myimdb/search/google.rb",
|
35
37
|
"myimdb.gemspec",
|
36
38
|
"test/helper.rb",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: myimdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gaurav
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-22 00:00:00 +05:30
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -57,6 +57,8 @@ files:
|
|
57
57
|
- lib/myimdb/scraper/rotten_tomatoes.rb
|
58
58
|
- lib/myimdb/scraper/string_extensions.rb
|
59
59
|
- lib/myimdb/search.rb
|
60
|
+
- lib/myimdb/search/base.rb
|
61
|
+
- lib/myimdb/search/bing.rb
|
60
62
|
- lib/myimdb/search/google.rb
|
61
63
|
- myimdb.gemspec
|
62
64
|
- test/helper.rb
|