derobo-ofdb 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/ofdb.rb +12 -0
- data/lib/ofdb/movie.rb +73 -0
- data/lib/ofdb/search.rb +49 -0
- data/lib/ofdb/string_extensions.rb +17 -0
- metadata +86 -0
data/lib/ofdb.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
require 'open-uri'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hpricot'
|
5
|
+
|
6
|
+
require 'ofdb/string_extensions'
|
7
|
+
require 'ofdb/movies'
|
8
|
+
require 'ofdb/search'
|
9
|
+
|
10
|
+
module Ofdb
|
11
|
+
VERSION = '0.0.1'
|
12
|
+
end
|
data/lib/ofdb/movie.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
module Ofdb
|
2
|
+
#Wraps a Ofdb-Movie-Page to Ruby.
|
3
|
+
#This Class is Lazy-Load, means that the Pages are only loaded when needed. Also the data mining
|
4
|
+
#is only done when needed
|
5
|
+
class Movie
|
6
|
+
attr_reader :title
|
7
|
+
def initialize(name)
|
8
|
+
@name = name
|
9
|
+
@url = "http://www.ofdb.de/film/#{@name}"
|
10
|
+
@title = /\d,/.match(name).post_match.gsub("---","§§§").gsub("-", " ").gsub("§§§", " - ")
|
11
|
+
end
|
12
|
+
|
13
|
+
#Returns a IMDB-ID
|
14
|
+
def imdb
|
15
|
+
document.search('a[@href^="http://www.imdb.com/"]').each do |x|
|
16
|
+
return x.get_attribute("href").to_s.gsub("http://www.imdb.com/Title?", "")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
#Returns s String containing the Plot
|
21
|
+
def plot
|
22
|
+
document.search('a[@href^="plot/"]').each do |x|
|
23
|
+
@plot_doc ||= Hpricot(open("http://www.ofdb.de/"+x.get_attribute("href")))
|
24
|
+
@plot_doc.search('//font[@class=Blocksatz]/').each do |y|
|
25
|
+
#next unless y.search('a[@href="usercenter/"')
|
26
|
+
next if y.to_s =~ /<b>/ || y.to_s =~ /<\/b>/ || y.to_s =~ /<br \/>/
|
27
|
+
return y.to_s.strip
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#Returns a URL to the Poster
|
33
|
+
def poster
|
34
|
+
document.search('img[@src^="http://img.ofdb.de/film/"]').each do |x|
|
35
|
+
return x.get_attribute("src").to_s
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
#Returns a Array containg the Genres
|
40
|
+
def genre
|
41
|
+
get_viewphp("page=genre&Genre=").map {|x| x.innerHTML }
|
42
|
+
end
|
43
|
+
|
44
|
+
#Returns an Array conating the Countries this film was filmed in
|
45
|
+
def countries
|
46
|
+
get_viewphp("Kat=Land").map {|x| x.innerHTML }
|
47
|
+
end
|
48
|
+
|
49
|
+
#Returns the year this movie was Filmed in
|
50
|
+
def year
|
51
|
+
get_viewphp("Kat=Jahr").each {|x| return x.innerHTML }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
# Returns a new Hpricot document for parsing.
|
56
|
+
def document
|
57
|
+
@document ||= Hpricot(Ofdb::Movie.find_by_id(@url))
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_viewphp(filter=nil)
|
61
|
+
rsl = Array.new
|
62
|
+
document.search('font.Daten a[@href^="view.php"').each do |x|
|
63
|
+
next if filter && x.to_s !~ Regexp.new(filter)
|
64
|
+
rsl << x
|
65
|
+
end
|
66
|
+
rsl
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.find_by_id(url)
|
70
|
+
open(url)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/ofdb/search.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
module Ofdb
|
2
|
+
class Search
|
3
|
+
attr_reader :query
|
4
|
+
#Uses query as Searchstring
|
5
|
+
#
|
6
|
+
# search = Ofdb::Search.new("Das Leben des Brains")
|
7
|
+
#
|
8
|
+
# Ofdb::Search ist lazy loading.
|
9
|
+
def initialize(query, is_imdb_id=false)
|
10
|
+
@is_imdb = is_imdb_id
|
11
|
+
@query = query
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the Searchresults as Array of Hashes with
|
15
|
+
#* :link => Url in Ofdb.de of that movie
|
16
|
+
#* :title => String like: $Germantitle / $OrignialTitle ($Year)
|
17
|
+
#* :poster => Url of the Poster, nil if not found
|
18
|
+
def movies
|
19
|
+
@movies ||= parse_movies
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def document
|
24
|
+
@document ||= Hpricot(Ofdb::Search.query(@query, @is_imdb))
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.query(query, imdb=false)
|
28
|
+
open("http://www.ofdb.de/view.php?page=suchergebnis&SText=#{CGI::escape(query)}#{"&Kat=IMDb" if imdb}")
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_movies
|
32
|
+
return @movies if @movies
|
33
|
+
@movies = Array.new
|
34
|
+
document.search('a[@href^="film/"]').reject do |element|
|
35
|
+
link = element.get_attribute("href").gsub("film/","")
|
36
|
+
title = element.innerHTML.imdb_strip_tags
|
37
|
+
poster = /images\/film\/.*"\sw/.match(element.get_attribute('onmouseover')).to_s.gsub("\" w","").gsub("images/film/", "")
|
38
|
+
poster = nil if poster =="na.gif"
|
39
|
+
@movies << {:link => link, :title => title, :poster=>poster}
|
40
|
+
end
|
41
|
+
@movies
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns true if the search yielded only one result, an exact match
|
45
|
+
def exact_match?
|
46
|
+
movies.size == 1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ofdb#:nordoc:
|
2
|
+
# borrowed form the imdb-gem of
|
3
|
+
# http://github.com/ariejan/imdb/
|
4
|
+
module StringExtensions
|
5
|
+
# Unescape HTML
|
6
|
+
def imdb_unescape_html
|
7
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
8
|
+
end
|
9
|
+
|
10
|
+
# Strip tags
|
11
|
+
def imdb_strip_tags
|
12
|
+
gsub(/<\/?[^>]*>/, "")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
String.send :include, Ofdb::StringExtensions
|
17
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: derobo-ofdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- deRobo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-06-23 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.1
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: mime-types
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "1.15"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: diff-lcs
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.1.2
|
44
|
+
version:
|
45
|
+
description: A Ruby wrapper for www.ofdb.de, a german movie indexing page like www.imdb.com.
|
46
|
+
email: rodarmy@gmail.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- lib/ofdb.rb
|
55
|
+
- lib/ofdb/movie.rb
|
56
|
+
- lib/ofdb/search.rb
|
57
|
+
- lib/ofdb/string_extensions.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/derobo/Ofdb/
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options:
|
62
|
+
- --inline-source
|
63
|
+
- --charset=UTF-8
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.2.0
|
82
|
+
signing_key:
|
83
|
+
specification_version: 2
|
84
|
+
summary: A Ruby wrapper for www.ofdb.de, a german movie indexing page like www.imdb.com.
|
85
|
+
test_files: []
|
86
|
+
|