myimdb 0.3.13 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/myimdb +6 -6
- data/bin/myimdb-catalogue +3 -3
- data/lib/myimdb.rb +14 -7
- data/lib/myimdb/scraper/base.rb +18 -39
- data/lib/myimdb/scraper/imdb.rb +14 -14
- data/lib/myimdb/scraper/metacritic.rb +9 -7
- data/lib/myimdb/scraper/rotten_tomatoes.rb +6 -6
- data/lib/myimdb/scraper/string_extensions.rb +5 -30
- data/lib/myimdb/search.rb +3 -1
- data/lib/myimdb/search/base.rb +14 -0
- data/lib/myimdb/search/bing.rb +42 -0
- data/lib/myimdb/search/google.rb +32 -20
- data/myimdb.gemspec +4 -2
- metadata +4 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/bin/myimdb
CHANGED
@@ -48,13 +48,13 @@ def details(klass_name, name)
|
|
48
48
|
search_scope = "#{klass_name.downcase}.com"
|
49
49
|
|
50
50
|
search_result = Myimdb::Search::Google.search_text(name, :restrict_to=> search_scope)[0]
|
51
|
-
site = "Myimdb::Scraper::#{klass_name}".
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
site = eval("Myimdb::Scraper::#{klass_name}").new(search_result[:url])
|
52
|
+
puts "===================================================="
|
53
|
+
puts "#{klass_name} details for: #{name}"
|
54
|
+
puts "===================================================="
|
55
|
+
puts "#{site.summary}\n"
|
56
56
|
rescue Exception=> ex
|
57
|
-
|
57
|
+
puts "Unable to fetch #{klass_name} details for: #{name} because: #{ex.message}"
|
58
58
|
end
|
59
59
|
|
60
60
|
['Imdb', 'RottenTomatoes', 'Metacritic'].each do |site|
|
data/bin/myimdb-catalogue
CHANGED
@@ -118,7 +118,7 @@ def repair_permissions_for(target_dir)
|
|
118
118
|
`attrib -r "#{target_dir}"`
|
119
119
|
`attrib +r "#{target_dir}"`
|
120
120
|
else
|
121
|
-
p 'Skipping permission repair;
|
121
|
+
p 'Skipping permission repair; Only works on Windows.'
|
122
122
|
end
|
123
123
|
end
|
124
124
|
|
@@ -128,7 +128,7 @@ end
|
|
128
128
|
|
129
129
|
def save_image(target_dir, name, image_index=0)
|
130
130
|
image_data = Myimdb::Search::Google.search_images(name, :size=> 'medium')[image_index]
|
131
|
-
image_url = image_data[
|
131
|
+
image_url = image_data[:url] if image_data
|
132
132
|
|
133
133
|
if image_url
|
134
134
|
image_file_path = File.join(target_dir, 'movie.jpg')
|
@@ -148,7 +148,7 @@ def generate_metadata(path, name)
|
|
148
148
|
search_scope = "imdb.com"
|
149
149
|
|
150
150
|
search_result = Myimdb::Search::Google.search_text(name, :restrict_to=> search_scope)[0]
|
151
|
-
imdb = Myimdb::Scraper::Imdb.new(search_result[
|
151
|
+
imdb = Myimdb::Scraper::Imdb.new(search_result[:url])
|
152
152
|
new_name = name.gsub(/\[\S+\]/, "").strip
|
153
153
|
new_name << " [#{imdb.year}] [#{imdb.rating},#{imdb.votes}] [#{imdb.directors.join(',')}]"
|
154
154
|
puts "Renaming: #{name} to: #{new_name}"
|
data/lib/myimdb.rb
CHANGED
@@ -1,7 +1,14 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'httparty'
|
3
|
-
require 'nokogiri'
|
4
|
-
require 'open-uri'
|
5
|
-
|
6
|
-
require 'myimdb/search'
|
7
|
-
require 'myimdb/scraper'
|
1
|
+
require 'rubygems'
|
2
|
+
require 'httparty'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'open-uri'
|
5
|
+
|
6
|
+
require 'myimdb/search'
|
7
|
+
require 'myimdb/scraper'
|
8
|
+
|
9
|
+
class ImdbMovie
|
10
|
+
def self.search(movie_name)
|
11
|
+
search_result = Myimdb::Search::Google.search_text(movie_name, :restrict_to=> 'imdb.com')[0]
|
12
|
+
Myimdb::Scraper::Imdb.new(search_result[:url]) if search_result
|
13
|
+
end
|
14
|
+
end
|
data/lib/myimdb/scraper/base.rb
CHANGED
@@ -5,10 +5,10 @@ module HandleExceptions
|
|
5
5
|
base.send(:include, InstanceMethods)
|
6
6
|
base.send(:extend, ClassMethods)
|
7
7
|
end
|
8
|
-
|
8
|
+
|
9
9
|
module InstanceMethods
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
module ClassMethods
|
13
13
|
def handle_exceptions_for(*method_names)
|
14
14
|
method_names.each do |method_name|
|
@@ -29,47 +29,26 @@ module Myimdb
|
|
29
29
|
module Scraper
|
30
30
|
class Base
|
31
31
|
include HandleExceptions
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
def directors_with_url
|
36
|
-
end
|
37
|
-
|
38
|
-
def
|
39
|
-
end
|
40
|
-
|
41
|
-
def
|
42
|
-
end
|
43
|
-
|
44
|
-
def
|
45
|
-
end
|
46
|
-
|
47
|
-
def votes
|
48
|
-
end
|
49
|
-
|
50
|
-
def genres
|
51
|
-
end
|
52
|
-
|
53
|
-
def tagline
|
54
|
-
end
|
55
|
-
|
56
|
-
def plot
|
57
|
-
end
|
58
|
-
|
59
|
-
def year
|
60
|
-
end
|
61
|
-
|
62
|
-
def release_date
|
63
|
-
end
|
64
|
-
|
65
|
-
def image
|
66
|
-
end
|
67
|
-
|
32
|
+
include Myimdb::Scraper::StringExtensions
|
33
|
+
|
34
|
+
def directors; end
|
35
|
+
def directors_with_url; end
|
36
|
+
def writers; end
|
37
|
+
def writers_with_url; end
|
38
|
+
def rating; end
|
39
|
+
def votes; end
|
40
|
+
def genres; end
|
41
|
+
def tagline; end
|
42
|
+
def plot; end
|
43
|
+
def year; end
|
44
|
+
def release_date; end
|
45
|
+
def image; end
|
46
|
+
|
68
47
|
def summary
|
69
48
|
[:directors, :writers, :rating, :votes, :genres, :tagline, :plot, :year, :release_date].collect do |meth|
|
70
49
|
data = send(meth)
|
71
50
|
data = data.join(", ") if Array === data
|
72
|
-
sprintf("%-15s : %s", meth.to_s.
|
51
|
+
sprintf("%-15s : %s", meth.to_s.capitalize, data)
|
73
52
|
end.join("\n")
|
74
53
|
end
|
75
54
|
|
data/lib/myimdb/scraper/imdb.rb
CHANGED
@@ -4,51 +4,51 @@ module Myimdb
|
|
4
4
|
def initialize(url)
|
5
5
|
@url = url
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
def directors
|
9
9
|
document.css('.info h5:contains("Director") + .info-content a:not(.tn15more)').collect{ |a| a.text }
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def directors_with_url
|
13
13
|
document.css('.info h5:contains("Director") + .info-content a:not(.tn15more)').collect{ |a| {:name=> a.text, :url=> "http://www.imdb.com#{a['href']}" } }
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def writers
|
17
17
|
document.css('.info h5:contains("Writer") + .info-content a:not(.tn15more)').collect{ |a| a.text }
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def writers_with_url
|
21
21
|
document.css('.info h5:contains("Writer") + .info-content a:not(.tn15more)').collect{ |a| {:name=> a.text, :url=> "http://www.imdb.com#{a['href']}" } }
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
def rating
|
25
25
|
document.css(".starbar-meta b").inner_text.strip.split('/').first.to_f
|
26
26
|
end
|
27
|
-
|
27
|
+
|
28
28
|
def votes
|
29
29
|
document.css(".starbar-meta a").inner_text.strip.split(' ').first.sub(',', '').to_i
|
30
30
|
end
|
31
|
-
|
31
|
+
|
32
32
|
def genres
|
33
33
|
document.css('.info h5:contains("Genre:") + .info-content a:not(.tn15more)').collect{ |a| a.text }
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
def tagline
|
37
37
|
document.css('.info h5:contains("Tagline:") + .info-content').children[0].text.strip rescue nil
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
def plot
|
41
41
|
document.css('.info h5:contains("Plot:") + .info-content').children[0].text.strip
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
def year
|
45
45
|
document.css("div#tn15title a:first")[0].text.to_i
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
def release_date
|
49
49
|
Date.parse(document.css('.info h5:contains("Release Date:") + .info-content').inner_text)
|
50
50
|
end
|
51
|
-
|
51
|
+
|
52
52
|
def image
|
53
53
|
image_url = document.css(".photo:first a").first['href']
|
54
54
|
unless image_url.nil? or image_url =~ /addposter/
|
@@ -56,12 +56,12 @@ module Myimdb
|
|
56
56
|
image_document.css('#principal img:first').first['src']
|
57
57
|
end
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
private
|
61
61
|
def document
|
62
62
|
@document ||= Nokogiri::HTML(open(@url))
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
handle_exceptions_for :directors, :directors_with_url, :writers, :writers_with_url, :rating, :votes, :genres, :tagline, :plot, :year, :image
|
66
66
|
end
|
67
67
|
end
|
@@ -4,28 +4,30 @@ module Myimdb
|
|
4
4
|
def initialize(url)
|
5
5
|
@url = url
|
6
6
|
end
|
7
|
-
|
7
|
+
|
8
8
|
def rating
|
9
9
|
document.css("#metascore").inner_text.strip.to_f/10
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def votes
|
13
13
|
document.at("a[@href='#critics']").inner_text.strip.to_i
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def genres
|
17
|
-
document.css("#productinfo p:first").text.gsub(/^\S+:/, '').split("|").
|
17
|
+
document.css("#productinfo p:first").text.gsub(/^\S+:/, '').split("|").collect do |genre|
|
18
|
+
strip_useless_chars(genre)
|
19
|
+
end
|
18
20
|
end
|
19
|
-
|
21
|
+
|
20
22
|
def plot
|
21
23
|
document.css("#productsummary .summarytext").inner_text.strip
|
22
24
|
end
|
23
|
-
|
25
|
+
|
24
26
|
private
|
25
27
|
def document
|
26
28
|
@document ||= Nokogiri::HTML(open(@url))
|
27
29
|
end
|
28
|
-
|
30
|
+
|
29
31
|
handle_exceptions_for :rating, :votes, :genres, :plot
|
30
32
|
end
|
31
33
|
end
|
@@ -8,24 +8,24 @@ module Myimdb
|
|
8
8
|
def rating
|
9
9
|
document.css("#tomatometer_data p:nth-child(4) span").inner_text.strip.to_i
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def votes
|
13
13
|
document.css("#tomatometer_data p:nth-child(1) span").inner_text.strip.to_i
|
14
14
|
end
|
15
|
-
|
15
|
+
|
16
16
|
def genres
|
17
|
-
document.css("#movie_stats .fl:first p:last .content a").inner_text.
|
17
|
+
document.css("#movie_stats .fl:first p:last .content a").inner_text.strip.to_a
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
def plot
|
21
21
|
document.css("#movie_synopsis_all").inner_text.strip
|
22
22
|
end
|
23
|
-
|
23
|
+
|
24
24
|
private
|
25
25
|
def document
|
26
26
|
@document ||= Nokogiri::HTML(open(@url))
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
handle_exceptions_for :rating, :votes, :genres, :plot
|
30
30
|
end
|
31
31
|
end
|
@@ -4,38 +4,13 @@ require 'iconv'
|
|
4
4
|
module Myimdb
|
5
5
|
module Scraper
|
6
6
|
module StringExtensions
|
7
|
-
def scraper_unescape_html
|
8
|
-
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(
|
7
|
+
def scraper_unescape_html(string)
|
8
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(string))
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
12
|
-
gsub(
|
13
|
-
end
|
14
|
-
|
15
|
-
def strip_useless_chars
|
16
|
-
gsub(/[^a-zA-z0-9\|\-_\(\)@$\/\\]/, '')
|
17
|
-
end
|
18
|
-
|
19
|
-
def titleize
|
20
|
-
humanize.gsub(/\b('?[a-z])/) { $1.capitalize }
|
21
|
-
end
|
22
|
-
|
23
|
-
def humanize
|
24
|
-
gsub(/_/, " ").capitalize
|
25
|
-
end
|
26
|
-
|
27
|
-
def constantize
|
28
|
-
names = self.split('::')
|
29
|
-
names.shift if names.empty? || names.first.empty?
|
30
|
-
|
31
|
-
constant = Object
|
32
|
-
names.each do |name|
|
33
|
-
constant = constant.const_defined?(name) ? constant.const_get(name) : constant.const_missing(name)
|
34
|
-
end
|
35
|
-
constant
|
11
|
+
def strip_useless_chars(string)
|
12
|
+
string.gsub(/[^a-zA-z0-9\|\-_\(\)@$\/\\]/, '')
|
36
13
|
end
|
37
14
|
end
|
38
15
|
end
|
39
|
-
end
|
40
|
-
|
41
|
-
String.send :include, Myimdb::Scraper::StringExtensions
|
16
|
+
end
|
data/lib/myimdb/search.rb
CHANGED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Myimdb
|
2
|
+
module Search
|
3
|
+
class Base
|
4
|
+
class << self
|
5
|
+
def spell_movie( text, options={} )
|
6
|
+
search_results = search_text(text, :restrict_to=> 'imdb.com')
|
7
|
+
search_results and search_results.collect do |search_result|
|
8
|
+
search_result[:title].gsub(/ \(.*$/, "")
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module Myimdb
|
2
|
+
module Search
|
3
|
+
class Bing < Base
|
4
|
+
include HTTParty
|
5
|
+
format :json
|
6
|
+
headers 'Content-Type' => 'application/json'
|
7
|
+
|
8
|
+
AppKey = '36C1CEF363A00C6536C4420D356B5E507C4C2AF1'
|
9
|
+
base_uri 'api.search.live.net'
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def search_text( text, options={} )
|
13
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
14
|
+
response = get( '/json.aspx', :query=> {:Appid=> AppKey, :query=> text, :sources=> 'web'} )
|
15
|
+
parse_search_result(response, 'Web')
|
16
|
+
end
|
17
|
+
|
18
|
+
def search_images( text, options={} )
|
19
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
20
|
+
response = get( '/json.aspx', :query=> {:Appid=> AppKey, :query=> text, :sources=> 'image'} )
|
21
|
+
parse_search_result(response, 'Image')
|
22
|
+
end
|
23
|
+
|
24
|
+
def spell( text, options={} )
|
25
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
26
|
+
response = get( '/json.aspx', :query=> {:Appid=> AppKey, :query=> text, :sources=> 'spell'} )
|
27
|
+
parse_search_result(response, 'Spell')
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse_search_result( response, type )
|
32
|
+
response['SearchResponse'][type]['Results'].collect do |response_element|
|
33
|
+
{
|
34
|
+
:url => response_element['Url'],
|
35
|
+
:title => response_element['Title']
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/myimdb/search/google.rb
CHANGED
@@ -1,29 +1,41 @@
|
|
1
1
|
module Myimdb
|
2
2
|
module Search
|
3
|
-
class Google
|
3
|
+
class Google < Base
|
4
4
|
include HTTParty
|
5
|
+
format :json
|
6
|
+
headers 'Content-Type' => 'application/json'
|
5
7
|
|
6
|
-
format :json
|
7
|
-
headers 'Content-Type' => 'application/json'
|
8
8
|
base_uri 'ajax.googleapis.com'
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def search_text( text, options={} )
|
12
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
13
|
+
response = get( '/ajax/services/search/web', :query=> {:v=> '1.0', :q=> text} )
|
14
|
+
parse_search_result( response )
|
15
|
+
end
|
9
16
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
'
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
17
|
+
def search_images( text, options={} )
|
18
|
+
sizes = {
|
19
|
+
'large' => 'l',
|
20
|
+
'medium' => 'm',
|
21
|
+
'small' => 'i'
|
22
|
+
}
|
23
|
+
search_options = { :v=> '1.0', :q=> text }
|
24
|
+
search_options.merge!(:imgsz=> sizes[options[:size].to_s]) if !options[:size].blank?
|
25
|
+
text = text + " site:#{options[:restrict_to]}" if !options[:restrict_to].blank?
|
26
|
+
response = get( '/ajax/services/search/images', :query=> search_options )
|
27
|
+
parse_search_result( response )
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
def parse_search_result( response )
|
32
|
+
response['responseData'] and response['responseData']['results'].collect do |response_element|
|
33
|
+
{
|
34
|
+
:url => response_element['url'],
|
35
|
+
:title => response_element['titleNoFormatting']
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
27
39
|
end
|
28
40
|
end
|
29
41
|
end
|
data/myimdb.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{myimdb}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Gaurav"]
|
12
|
-
s.date = %q{2010-02-
|
12
|
+
s.date = %q{2010-02-22}
|
13
13
|
s.email = %q{gaurav@vinsol.com}
|
14
14
|
s.executables = ["myimdb", "myimdb-catalogue"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -31,6 +31,8 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/myimdb/scraper/rotten_tomatoes.rb",
|
32
32
|
"lib/myimdb/scraper/string_extensions.rb",
|
33
33
|
"lib/myimdb/search.rb",
|
34
|
+
"lib/myimdb/search/base.rb",
|
35
|
+
"lib/myimdb/search/bing.rb",
|
34
36
|
"lib/myimdb/search/google.rb",
|
35
37
|
"myimdb.gemspec",
|
36
38
|
"test/helper.rb",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: myimdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gaurav
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-22 00:00:00 +05:30
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -57,6 +57,8 @@ files:
|
|
57
57
|
- lib/myimdb/scraper/rotten_tomatoes.rb
|
58
58
|
- lib/myimdb/scraper/string_extensions.rb
|
59
59
|
- lib/myimdb/search.rb
|
60
|
+
- lib/myimdb/search/base.rb
|
61
|
+
- lib/myimdb/search/bing.rb
|
60
62
|
- lib/myimdb/search/google.rb
|
61
63
|
- myimdb.gemspec
|
62
64
|
- test/helper.rb
|