myimdb 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/myimdb +6 -3
- data/bin/myimdb-catalogue +1 -1
- data/lib/myimdb/scraper/base.rb +3 -1
- data/lib/myimdb/scraper/freebase.rb +74 -0
- data/lib/myimdb/scraper.rb +2 -1
- data/lib/myimdb/search/base.rb +14 -0
- data/lib/myimdb.rb +1 -1
- data/myimdb.gemspec +3 -2
- metadata +3 -2
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.1
|
data/bin/myimdb
CHANGED
@@ -21,6 +21,10 @@ OptionParser.new do |opts|
|
|
21
21
|
options[:metacritic] = true
|
22
22
|
end
|
23
23
|
|
24
|
+
opts.on("-b", "--freebase", "Generates data from Freebase") do
|
25
|
+
options[:freebase] = true
|
26
|
+
end
|
27
|
+
|
24
28
|
opts.on("-i", "--imdb", "Generates data from Imdb") do
|
25
29
|
options[:imdb] = true
|
26
30
|
end
|
@@ -46,8 +50,7 @@ name = ARGV.join(' ')
|
|
46
50
|
|
47
51
|
def details(klass_name, name)
|
48
52
|
search_scope = "#{klass_name.downcase}.com"
|
49
|
-
|
50
|
-
search_result = Myimdb::Search::Google.search_text(name, :restrict_to=> search_scope)[0]
|
53
|
+
search_result = Myimdb::Search::Base.search(name, :restrict_to=> search_scope)[0]
|
51
54
|
site = eval("Myimdb::Scraper::#{klass_name}").new(search_result[:url])
|
52
55
|
puts "===================================================="
|
53
56
|
puts "#{klass_name} details for: #{name}"
|
@@ -57,6 +60,6 @@ rescue Exception=> ex
|
|
57
60
|
puts "Unable to fetch #{klass_name} details for: #{name} because: #{ex.message}"
|
58
61
|
end
|
59
62
|
|
60
|
-
['Imdb', 'RottenTomatoes', 'Metacritic'].each do |site|
|
63
|
+
['Imdb', 'RottenTomatoes', 'Metacritic', 'Freebase'].each do |site|
|
61
64
|
details(site, name) if options[site.downcase.to_sym]
|
62
65
|
end
|
data/bin/myimdb-catalogue
CHANGED
@@ -147,7 +147,7 @@ end
|
|
147
147
|
def generate_metadata(path, name)
|
148
148
|
search_scope = "imdb.com"
|
149
149
|
|
150
|
-
search_result = Myimdb::Search::
|
150
|
+
search_result = Myimdb::Search::Base.search(name, :restrict_to=> search_scope)[0]
|
151
151
|
imdb = Myimdb::Scraper::Imdb.new(search_result[:url])
|
152
152
|
new_name = name.gsub(/\[\S+\]/, "").strip
|
153
153
|
new_name << " [#{imdb.year}] [#{imdb.rating},#{imdb.votes}] [#{imdb.directors.join(',')}]"
|
data/lib/myimdb/scraper/base.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
class UnformattedHtml < Exception; end
|
2
|
+
class DocumentNotFound < Exception; end
|
2
3
|
|
3
4
|
module HandleExceptions
|
4
5
|
def self.included(base)
|
@@ -30,7 +31,8 @@ module Myimdb
|
|
30
31
|
class Base
|
31
32
|
include HandleExceptions
|
32
33
|
include Myimdb::Scraper::StringExtensions
|
33
|
-
|
34
|
+
|
35
|
+
def name; end
|
34
36
|
def directors; end
|
35
37
|
def directors_with_url; end
|
36
38
|
def writers; end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Myimdb
|
2
|
+
module Scraper
|
3
|
+
class Freebase < Scraper::Base
|
4
|
+
include HTTParty
|
5
|
+
format :json
|
6
|
+
headers 'Content-Type' => 'application/json'
|
7
|
+
|
8
|
+
def initialize(url)
|
9
|
+
@key = url.gsub(/.*?\/view/, '')
|
10
|
+
@url = "http://www.freebase.com/experimental/topic/standard?id=#{@key}"
|
11
|
+
end
|
12
|
+
|
13
|
+
def directors
|
14
|
+
parse_text '/film/film/directed_by'
|
15
|
+
end
|
16
|
+
|
17
|
+
def directors_with_url
|
18
|
+
parse_text_and_url '/film/film/directed_by'
|
19
|
+
end
|
20
|
+
|
21
|
+
def writers
|
22
|
+
parse_text '/film/film/written_by'
|
23
|
+
end
|
24
|
+
|
25
|
+
def writers_with_url
|
26
|
+
parse_text_and_url '/film/film/written_by'
|
27
|
+
end
|
28
|
+
|
29
|
+
def genres
|
30
|
+
parse_text '/film/film/genre'
|
31
|
+
end
|
32
|
+
|
33
|
+
def tagline
|
34
|
+
parse_text('/film/film/tagline')[0]
|
35
|
+
end
|
36
|
+
|
37
|
+
def plot
|
38
|
+
document['description']
|
39
|
+
end
|
40
|
+
|
41
|
+
def year
|
42
|
+
release_date.year
|
43
|
+
end
|
44
|
+
|
45
|
+
def release_date
|
46
|
+
Date.parse(parse_text('/film/film/initial_release_date')[0])
|
47
|
+
end
|
48
|
+
|
49
|
+
def image
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
def document
|
54
|
+
@document ||= begin
|
55
|
+
result = self.class.get( @url )[@key]
|
56
|
+
unless result['code'].index("ok")
|
57
|
+
raise DocumentNotFound.new("Unable to locate freebase article")
|
58
|
+
end
|
59
|
+
result['result']
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse_text(path)
|
64
|
+
document['properties'][path]['values'].collect{ |obj| obj['text'] }
|
65
|
+
end
|
66
|
+
|
67
|
+
def parse_text_and_url(path)
|
68
|
+
document['properties'][path]['values'].collect{ |obj| { :name=> obj['text'], :url=> obj['url'] } }
|
69
|
+
end
|
70
|
+
|
71
|
+
handle_exceptions_for :directors, :directors_with_url, :writers, :writers_with_url, :genres, :tagline, :plot, :year, :image
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/myimdb/scraper.rb
CHANGED
data/lib/myimdb/search/base.rb
CHANGED
@@ -8,6 +8,20 @@ module Myimdb
|
|
8
8
|
search_result[:title].gsub(/ \(.*$/, "")
|
9
9
|
end
|
10
10
|
end
|
11
|
+
|
12
|
+
def search( text, options )
|
13
|
+
engines = [Myimdb::Search::Google, Myimdb::Search::Bing]
|
14
|
+
|
15
|
+
def search(engine, text, options)
|
16
|
+
engine.search_text(text, options)
|
17
|
+
end
|
18
|
+
|
19
|
+
engines.each do |engine|
|
20
|
+
puts "Retrying using #{engine}" unless engines.index(engine) == 0
|
21
|
+
result = search(engine, text, options)
|
22
|
+
return result unless result.nil? or result.empty?
|
23
|
+
end
|
24
|
+
end
|
11
25
|
end
|
12
26
|
end
|
13
27
|
end
|
data/lib/myimdb.rb
CHANGED
@@ -8,7 +8,7 @@ require 'myimdb/scraper'
|
|
8
8
|
|
9
9
|
class ImdbMovie
|
10
10
|
def self.search(movie_name)
|
11
|
-
search_result = Myimdb::Search::
|
11
|
+
search_result = Myimdb::Search::Base.search(movie_name, :restrict_to=> 'imdb.com')[0]
|
12
12
|
Myimdb::Scraper::Imdb.new(search_result[:url]) if search_result
|
13
13
|
end
|
14
14
|
end
|
data/myimdb.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{myimdb}
|
8
|
-
s.version = "0.4.
|
8
|
+
s.version = "0.4.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Gaurav"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-03-23}
|
13
13
|
s.email = %q{gaurav@vinsol.com}
|
14
14
|
s.executables = ["myimdb", "myimdb-catalogue"]
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
|
|
26
26
|
"lib/myimdb.rb",
|
27
27
|
"lib/myimdb/scraper.rb",
|
28
28
|
"lib/myimdb/scraper/base.rb",
|
29
|
+
"lib/myimdb/scraper/freebase.rb",
|
29
30
|
"lib/myimdb/scraper/imdb.rb",
|
30
31
|
"lib/myimdb/scraper/metacritic.rb",
|
31
32
|
"lib/myimdb/scraper/rotten_tomatoes.rb",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: myimdb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gaurav
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-
|
12
|
+
date: 2010-03-23 00:00:00 +05:30
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -52,6 +52,7 @@ files:
|
|
52
52
|
- lib/myimdb.rb
|
53
53
|
- lib/myimdb/scraper.rb
|
54
54
|
- lib/myimdb/scraper/base.rb
|
55
|
+
- lib/myimdb/scraper/freebase.rb
|
55
56
|
- lib/myimdb/scraper/imdb.rb
|
56
57
|
- lib/myimdb/scraper/metacritic.rb
|
57
58
|
- lib/myimdb/scraper/rotten_tomatoes.rb
|