myimdb 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/myimdb +6 -3
- data/bin/myimdb-catalogue +1 -1
- data/lib/myimdb/scraper/base.rb +3 -1
- data/lib/myimdb/scraper/freebase.rb +74 -0
- data/lib/myimdb/scraper.rb +2 -1
- data/lib/myimdb/search/base.rb +14 -0
- data/lib/myimdb.rb +1 -1
- data/myimdb.gemspec +3 -2
- metadata +3 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/bin/myimdb
CHANGED
@@ -21,6 +21,10 @@ OptionParser.new do |opts|
|
|
21
21
|
options[:metacritic] = true
|
22
22
|
end
|
23
23
|
|
24
|
+
opts.on("-b", "--freebase", "Generates data from Freebase") do
|
25
|
+
options[:freebase] = true
|
26
|
+
end
|
27
|
+
|
24
28
|
opts.on("-i", "--imdb", "Generates data from Imdb") do
|
25
29
|
options[:imdb] = true
|
26
30
|
end
|
@@ -46,8 +50,7 @@ name = ARGV.join(' ')
|
|
46
50
|
|
47
51
|
def details(klass_name, name)
|
48
52
|
search_scope = "#{klass_name.downcase}.com"
|
49
|
-
|
50
|
-
search_result = Myimdb::Search::Google.search_text(name, :restrict_to=> search_scope)[0]
|
53
|
+
search_result = Myimdb::Search::Base.search(name, :restrict_to=> search_scope)[0]
|
51
54
|
site = eval("Myimdb::Scraper::#{klass_name}").new(search_result[:url])
|
52
55
|
puts "===================================================="
|
53
56
|
puts "#{klass_name} details for: #{name}"
|
@@ -57,6 +60,6 @@ rescue Exception=> ex
|
|
57
60
|
puts "Unable to fetch #{klass_name} details for: #{name} because: #{ex.message}"
|
58
61
|
end
|
59
62
|
|
60
|
-
['Imdb', 'RottenTomatoes', 'Metacritic'].each do |site|
|
63
|
+
['Imdb', 'RottenTomatoes', 'Metacritic', 'Freebase'].each do |site|
|
61
64
|
details(site, name) if options[site.downcase.to_sym]
|
62
65
|
end
|
data/bin/myimdb-catalogue
CHANGED
@@ -147,7 +147,7 @@ end
|
|
147
147
|
def generate_metadata(path, name)
|
148
148
|
search_scope = "imdb.com"
|
149
149
|
|
150
|
-
search_result = Myimdb::Search::
|
150
|
+
search_result = Myimdb::Search::Base.search(name, :restrict_to=> search_scope)[0]
|
151
151
|
imdb = Myimdb::Scraper::Imdb.new(search_result[:url])
|
152
152
|
new_name = name.gsub(/\[\S+\]/, "").strip
|
153
153
|
new_name << " [#{imdb.year}] [#{imdb.rating},#{imdb.votes}] [#{imdb.directors.join(',')}]"
|
data/lib/myimdb/scraper/base.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
class UnformattedHtml < Exception; end
|
2
|
+
class DocumentNotFound < Exception; end
|
2
3
|
|
3
4
|
module HandleExceptions
|
4
5
|
def self.included(base)
|
@@ -30,7 +31,8 @@ module Myimdb
|
|
30
31
|
class Base
|
31
32
|
include HandleExceptions
|
32
33
|
include Myimdb::Scraper::StringExtensions
|
33
|
-
|
34
|
+
|
35
|
+
def name; end
|
34
36
|
def directors; end
|
35
37
|
def directors_with_url; end
|
36
38
|
def writers; end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Myimdb
|
2
|
+
module Scraper
|
3
|
+
class Freebase < Scraper::Base
|
4
|
+
include HTTParty
|
5
|
+
format :json
|
6
|
+
headers 'Content-Type' => 'application/json'
|
7
|
+
|
8
|
+
def initialize(url)
|
9
|
+
@key = url.gsub(/.*?\/view/, '')
|
10
|
+
@url = "http://www.freebase.com/experimental/topic/standard?id=#{@key}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def directors
|
14
|
+
parse_text '/film/film/directed_by'
|
15
|
+
end
|
16
|
+
|
17
|
+
def directors_with_url
|
18
|
+
parse_text_and_url '/film/film/directed_by'
|
19
|
+
end
|
20
|
+
|
21
|
+
def writers
|
22
|
+
parse_text '/film/film/written_by'
|
23
|
+
end
|
24
|
+
|
25
|
+
def writers_with_url
|
26
|
+
parse_text_and_url '/film/film/written_by'
|
27
|
+
end
|
28
|
+
|
29
|
+
def genres
|
30
|
+
parse_text '/film/film/genre'
|
31
|
+
end
|
32
|
+
|
33
|
+
def tagline
|
34
|
+
parse_text('/film/film/tagline')[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
def plot
|
38
|
+
document['description']
|
39
|
+
end
|
40
|
+
|
41
|
+
def year
|
42
|
+
release_date.year
|
43
|
+
end
|
44
|
+
|
45
|
+
def release_date
|
46
|
+
Date.parse(parse_text('/film/film/initial_release_date')[0])
|
47
|
+
end
|
48
|
+
|
49
|
+
def image
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
def document
|
54
|
+
@document ||= begin
|
55
|
+
result = self.class.get( @url )[@key]
|
56
|
+
unless result['code'].index("ok")
|
57
|
+
raise DocumentNotFound.new("Unable to locate freebase article")
|
58
|
+
end
|
59
|
+
result['result']
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_text(path)
|
64
|
+
document['properties'][path]['values'].collect{ |obj| obj['text'] }
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse_text_and_url(path)
|
68
|
+
document['properties'][path]['values'].collect{ |obj| { :name=> obj['text'], :url=> obj['url'] } }
|
69
|
+
end
|
70
|
+
|
71
|
+
handle_exceptions_for :directors, :directors_with_url, :writers, :writers_with_url, :genres, :tagline, :plot, :year, :image
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/myimdb/scraper.rb
CHANGED
data/lib/myimdb/search/base.rb
CHANGED
@@ -8,6 +8,20 @@ module Myimdb
|
|
8
8
|
search_result[:title].gsub(/ \(.*$/, "")
|
9
9
|
end
|
10
10
|
end
|
11
|
+
|
12
|
+
def search( text, options )
|
13
|
+
engines = [Myimdb::Search::Google, Myimdb::Search::Bing]
|
14
|
+
|
15
|
+
def search(engine, text, options)
|
16
|
+
engine.search_text(text, options)
|
17
|
+
end
|
18
|
+
|
19
|
+
engines.each do |engine|
|
20
|
+
puts "Retrying using #{engine}" unless engines.index(engine) == 0
|
21
|
+
result = search(engine, text, options)
|
22
|
+
return result unless result.nil? or result.empty?
|
23
|
+
end
|
24
|
+
end
|
11
25
|
end
|
12
26
|
end
|
13
27
|
end
|
data/lib/myimdb.rb
CHANGED
@@ -8,7 +8,7 @@ require 'myimdb/scraper'
|
|
8
8
|
|
9
9
|
class ImdbMovie
|
10
10
|
def self.search(movie_name)
|
11
|
-
search_result = Myimdb::Search::
|
11
|
+
search_result = Myimdb::Search::Base.search(movie_name, :restrict_to=> 'imdb.com')[0]
|
12
12
|
Myimdb::Scraper::Imdb.new(search_result[:url]) if search_result
|
13
13
|
end
|
14
14
|
end
|
data/myimdb.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{myimdb}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Gaurav"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-03-23}
|
13
13
|
s.email = %q{gaurav@vinsol.com}
|
14
14
|
s.executables = ["myimdb", "myimdb-catalogue"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/myimdb.rb",
|
27
27
|
"lib/myimdb/scraper.rb",
|
28
28
|
"lib/myimdb/scraper/base.rb",
|
29
|
+
"lib/myimdb/scraper/freebase.rb",
|
29
30
|
"lib/myimdb/scraper/imdb.rb",
|
30
31
|
"lib/myimdb/scraper/metacritic.rb",
|
31
32
|
"lib/myimdb/scraper/rotten_tomatoes.rb",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: myimdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gaurav
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-03-23 00:00:00 +05:30
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -52,6 +52,7 @@ files:
|
|
52
52
|
- lib/myimdb.rb
|
53
53
|
- lib/myimdb/scraper.rb
|
54
54
|
- lib/myimdb/scraper/base.rb
|
55
|
+
- lib/myimdb/scraper/freebase.rb
|
55
56
|
- lib/myimdb/scraper/imdb.rb
|
56
57
|
- lib/myimdb/scraper/metacritic.rb
|
57
58
|
- lib/myimdb/scraper/rotten_tomatoes.rb
|