derobo-ofdb 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/ofdb.rb +12 -0
- data/lib/ofdb/movie.rb +73 -0
- data/lib/ofdb/search.rb +49 -0
- data/lib/ofdb/string_extensions.rb +17 -0
- metadata +86 -0
data/lib/ofdb.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
require 'open-uri'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hpricot'
|
5
|
+
|
6
|
+
require 'ofdb/string_extensions'
|
7
|
+
require 'ofdb/movies'
|
8
|
+
require 'ofdb/search'
|
9
|
+
|
10
|
+
module Ofdb
|
11
|
+
VERSION = '0.0.1'
|
12
|
+
end
|
data/lib/ofdb/movie.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
module Ofdb
|
2
|
+
#Wraps a Ofdb-Movie-Page to Ruby.
|
3
|
+
#This Class is Lazy-Load, means that the Pages are only loaded when needed. Also the data mining
|
4
|
+
#is only done when needed
|
5
|
+
class Movie
|
6
|
+
attr_reader :title
|
7
|
+
def initialize(name)
|
8
|
+
@name = name
|
9
|
+
@url = "http://www.ofdb.de/film/#{@name}"
|
10
|
+
@title = /\d,/.match(name).post_match.gsub("---","§§§").gsub("-", " ").gsub("§§§", " - ")
|
11
|
+
end
|
12
|
+
|
13
|
+
#Returns a IMDB-ID
|
14
|
+
def imdb
|
15
|
+
document.search('a[@href^="http://www.imdb.com/"]').each do |x|
|
16
|
+
return x.get_attribute("href").to_s.gsub("http://www.imdb.com/Title?", "")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
#Returns s String containing the Plot
|
21
|
+
def plot
|
22
|
+
document.search('a[@href^="plot/"]').each do |x|
|
23
|
+
@plot_doc ||= Hpricot(open("http://www.ofdb.de/"+x.get_attribute("href")))
|
24
|
+
@plot_doc.search('//font[@class=Blocksatz]/').each do |y|
|
25
|
+
#next unless y.search('a[@href="usercenter/"')
|
26
|
+
next if y.to_s =~ /<b>/ || y.to_s =~ /<\/b>/ || y.to_s =~ /<br \/>/
|
27
|
+
return y.to_s.strip
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#Returns a URL to the Poster
|
33
|
+
def poster
|
34
|
+
document.search('img[@src^="http://img.ofdb.de/film/"]').each do |x|
|
35
|
+
return x.get_attribute("src").to_s
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
#Returns a Array containg the Genres
|
40
|
+
def genre
|
41
|
+
get_viewphp("page=genre&Genre=").map {|x| x.innerHTML }
|
42
|
+
end
|
43
|
+
|
44
|
+
#Returns an Array conating the Countries this film was filmed in
|
45
|
+
def countries
|
46
|
+
get_viewphp("Kat=Land").map {|x| x.innerHTML }
|
47
|
+
end
|
48
|
+
|
49
|
+
#Returns the year this movie was Filmed in
|
50
|
+
def year
|
51
|
+
get_viewphp("Kat=Jahr").each {|x| return x.innerHTML }
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
# Returns a new Hpricot document for parsing.
|
56
|
+
def document
|
57
|
+
@document ||= Hpricot(Ofdb::Movie.find_by_id(@url))
|
58
|
+
end
|
59
|
+
|
60
|
+
def get_viewphp(filter=nil)
|
61
|
+
rsl = Array.new
|
62
|
+
document.search('font.Daten a[@href^="view.php"').each do |x|
|
63
|
+
next if filter && x.to_s !~ Regexp.new(filter)
|
64
|
+
rsl << x
|
65
|
+
end
|
66
|
+
rsl
|
67
|
+
end
|
68
|
+
|
69
|
+
def self.find_by_id(url)
|
70
|
+
open(url)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/ofdb/search.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
module Ofdb
|
2
|
+
class Search
|
3
|
+
attr_reader :query
|
4
|
+
#Uses query as Searchstring
|
5
|
+
#
|
6
|
+
# search = Ofdb::Search.new("Das Leben des Brains")
|
7
|
+
#
|
8
|
+
# Ofdb::Search ist lazy loading.
|
9
|
+
def initialize(query, is_imdb_id=false)
|
10
|
+
@is_imdb = is_imdb_id
|
11
|
+
@query = query
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the Searchresults as Array of Hashes with
|
15
|
+
#* :link => Url in Ofdb.de of that movie
|
16
|
+
#* :title => String like: $Germantitle / $OrignialTitle ($Year)
|
17
|
+
#* :poster => Url of the Poster, nil if not found
|
18
|
+
def movies
|
19
|
+
@movies ||= parse_movies
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def document
|
24
|
+
@document ||= Hpricot(Ofdb::Search.query(@query, @is_imdb))
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.query(query, imdb=false)
|
28
|
+
open("http://www.ofdb.de/view.php?page=suchergebnis&SText=#{CGI::escape(query)}#{"&Kat=IMDb" if imdb}")
|
29
|
+
end
|
30
|
+
|
31
|
+
def parse_movies
|
32
|
+
return @movies if @movies
|
33
|
+
@movies = Array.new
|
34
|
+
document.search('a[@href^="film/"]').reject do |element|
|
35
|
+
link = element.get_attribute("href").gsub("film/","")
|
36
|
+
title = element.innerHTML.imdb_strip_tags
|
37
|
+
poster = /images\/film\/.*"\sw/.match(element.get_attribute('onmouseover')).to_s.gsub("\" w","").gsub("images/film/", "")
|
38
|
+
poster = nil if poster =="na.gif"
|
39
|
+
@movies << {:link => link, :title => title, :poster=>poster}
|
40
|
+
end
|
41
|
+
@movies
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns true if the search yielded only one result, an exact match
|
45
|
+
def exact_match?
|
46
|
+
movies.size == 1
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ofdb#:nordoc:
|
2
|
+
# borrowed form the imdb-gem of
|
3
|
+
# http://github.com/ariejan/imdb/
|
4
|
+
module StringExtensions
|
5
|
+
# Unescape HTML
|
6
|
+
def imdb_unescape_html
|
7
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
8
|
+
end
|
9
|
+
|
10
|
+
# Strip tags
|
11
|
+
def imdb_strip_tags
|
12
|
+
gsub(/<\/?[^>]*>/, "")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
String.send :include, Ofdb::StringExtensions
|
17
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: derobo-ofdb
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- deRobo
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-06-23 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.8.1
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: mime-types
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "1.15"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: diff-lcs
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.1.2
|
44
|
+
version:
|
45
|
+
description: A Ruby wrapper for www.ofdb.de, a german movie indexing page like www.imdb.com.
|
46
|
+
email: rodarmy@gmail.com
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- lib/ofdb.rb
|
55
|
+
- lib/ofdb/movie.rb
|
56
|
+
- lib/ofdb/search.rb
|
57
|
+
- lib/ofdb/string_extensions.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/derobo/Ofdb/
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options:
|
62
|
+
- --inline-source
|
63
|
+
- --charset=UTF-8
|
64
|
+
require_paths:
|
65
|
+
- lib
|
66
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
version:
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
requirements: []
|
79
|
+
|
80
|
+
rubyforge_project:
|
81
|
+
rubygems_version: 1.2.0
|
82
|
+
signing_key:
|
83
|
+
specification_version: 2
|
84
|
+
summary: A Ruby wrapper for www.ofdb.de, a german movie indexing page like www.imdb.com.
|
85
|
+
test_files: []
|
86
|
+
|