royw-dvdprofiler_collection 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2009 Roy Wright
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ = dvdprofiler_collection
2
+
3
+ dvdprofiler_collection is a companion to royw-imdb and royw-tmdb gems.
4
+
5
+ This is an interface for the exported Collection.xml from DVD Profiler 3.5.1.
6
+
7
+ Because the file size and parsing time of the Collection.xml can grow quite
8
+ large, we extract only part of the meta-data then save into a Collection.yaml
9
+ file which parses much faster. On my 3GHz quad core (alas with ruby only using
10
+ a single core), xml parsing is about 1:50 for about 1000 titles.
11
+
12
+ The Collection model supports fuzzy matching of titles.
13
+
14
+ The DvdprofilerProfile model provides a clean interface to the profiles using
15
+ first() and all() class methods. Searches can be by ISBN or title.
16
+ Optionally a year may be provided to refine the search.
17
+
18
+ The actual meta-data is accessed via the dvd_hash attribute.
19
+
20
+ Examples:
21
+
22
+ profile = DvdprofilerProfile.first(:isbn => '786936735390')
23
+ profile = DvdprofilerProfile.first(:title => 'Sabrina', :year => '1995')
24
+ profiles = DvdprofilerProfile.all(:title => 'Sabrina')
25
+
26
+ puts profile.isbn
27
+ puts profile.title
28
+
29
+ profile.dvd_hash.each {|k,v| puts "#{k} => #{v}"}
30
+
31
+ == Copyright
32
+
33
+ Copyright (c) 2009 Roy Wright. See LICENSE for details.
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "dvdprofiler_collection"
8
+ gem.summary = %Q{TODO}
9
+ gem.email = "roy@wright.org"
10
+ gem.homepage = "http://github.com/royw/dvdprofiler_collection"
11
+ gem.authors = ["Roy Wright"]
12
+
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+
15
+ gem.files.reject! do |fn|
16
+ result = false
17
+ basename = File.basename(fn)
18
+ result = true if basename =~ /^tt\d+\.html/
19
+ result = true if basename =~ /^Collection.yaml/
20
+ result
21
+ end
22
+ end
23
+ rescue LoadError
24
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
25
+ end
26
+
27
+ require 'spec/rake/spectask'
28
+ Spec::Rake::SpecTask.new(:spec) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
34
+ spec.libs << 'lib' << 'spec'
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+
40
+ task :default => :spec
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "dvdprofiler_collection #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 2
3
+ :major: 0
4
+ :minor: 0
@@ -0,0 +1,20 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'rubygems'
5
+ require 'yaml'
6
+ require 'xmlsimple'
7
+ require 'ftools'
8
+ require 'mash'
9
+ # require 'log4r'
10
+ require 'ruby-debug'
11
+
12
+ require 'module_extensions'
13
+ require 'numeric_extensions'
14
+ require 'kernel_extensions'
15
+ require 'file_extensions'
16
+ require 'object_extensions'
17
+ require 'string_extensions'
18
+ require 'dvdprofiler_collection/optional_logger'
19
+ require 'dvdprofiler_collection/collection'
20
+ require 'dvdprofiler_collection/dvdprofiler_profile'
@@ -0,0 +1,187 @@
1
+ # == Synopsis
2
+ # This model encapsulates the DVDProfiler Collection.xml
3
+ class Collection
4
+ # various regexes used to clean up a title for matching purposes.
5
+ # used in TITLE_REPLACEMENTS hash below
6
+ PUNCTUATION = /[\?\:\!\"\'\,\.\-\/\*]/
7
+ HTML_ESCAPES = /\&[a-zA-Z]+\;/
8
+ SQUARE_BRACKET_ENCLOSURES = /\[.*?\]/
9
+ PARENTHESIS_ENCLOSURES = /\(.*?\)/
10
+ MULTIPLE_WHITESPACES= /\s+/
11
+ STANDALONE_AMPERSAND = /\s\&\s/
12
+ WIDESCREEN = /widescreen/i
13
+ SPECIAL_EDITION = /special edition/i
14
+
15
+ # array of hashes is intentional as the order is critical
16
+ # the enclosures [...] & (...) must be removed first,
17
+ # then " & " must be replaced by " and ",
18
+ # then html escapes &...; must be replaced by a space,
19
+ # then remaining punctuation is replacesed by a space,
20
+ # finally multiple whitespaces are reduced to single whitespace
21
+ TITLE_REPLACEMENTS = [
22
+ { SQUARE_BRACKET_ENCLOSURES => '' },
23
+ { PARENTHESIS_ENCLOSURES => '' },
24
+ { STANDALONE_AMPERSAND => ' and ' },
25
+ { HTML_ESCAPES => ' ' },
26
+ { WIDESCREEN => ' ' },
27
+ { SPECIAL_EDITION => ' ' },
28
+ { PUNCTUATION => ' ' },
29
+ { MULTIPLE_WHITESPACES => ' ' },
30
+ ]
31
+
32
+ attr_reader :isbn_dvd_hash, :title_isbn_hash, :isbn_title_hash
33
+
34
+ @filespec = nil
35
+
36
+ def initialize(filename, logger)
37
+ @title_isbn_hash = Hash.new
38
+ @isbn_dvd_hash = Hash.new
39
+ @isbn_title_hash = Hash.new
40
+ @filespec = filename
41
+ @logger = OptionalLogger.new(logger)
42
+ reload
43
+ save
44
+ end
45
+
46
+ # save as a collection.yaml file unless the existing
47
+ # collection.yaml is newer than the collection.xml
48
+ def save
49
+ unless @filespec.nil?
50
+ yaml_filespec = @filespec.ext('.yaml')
51
+ if !File.exist?(yaml_filespec) || (File.mtime(@filespec) > File.mtime(yaml_filespec))
52
+ @logger.info { "saving: #{yaml_filespec}" }
53
+ File.open(yaml_filespec, "w") do |f|
54
+ YAML.dump(
55
+ {
56
+ :title_isbn_hash => @title_isbn_hash,
57
+ :isbn_title_hash => @isbn_title_hash,
58
+ :isbn_dvd_hash => @isbn_dvd_hash,
59
+ }, f)
60
+ end
61
+ else
62
+ @logger.info { "not saving, yaml file is newer than xml file" }
63
+ end
64
+ else
65
+ @logger.error { "can not save, the filespec is nil" }
66
+ end
67
+ end
68
+
69
+ # load the collection from the collection.yaml if it exists,
70
+ # otherwise from the collection.xml
71
+ def reload
72
+ @title_isbn_hash.clear
73
+ @isbn_dvd_hash.clear
74
+ @isbn_title_hash.clear
75
+ collection = Hash.new
76
+ yaml_filespec = @filespec.ext('.yaml')
77
+ if File.exist?(yaml_filespec) && (File.mtime(yaml_filespec) > File.mtime(@filespec))
78
+ @logger.info { "Loading #{yaml_filespec}" }
79
+ data = YAML.load_file(yaml_filespec)
80
+ @title_isbn_hash = data[:title_isbn_hash]
81
+ @isbn_dvd_hash = data[:isbn_dvd_hash]
82
+ @isbn_title_hash = data[:isbn_title_hash]
83
+ else
84
+ elapsed_time = timer do
85
+ @logger.info { "Loading #{@filespec}" }
86
+ collection = XmlSimple.xml_in(@filespec, { 'KeyToSymbol' => true})
87
+ end
88
+ @logger.info { "XmlSimple.xml_in elapse time: #{elapsed_time.elapsed_time_s}" }
89
+ collection[:dvd].each do |dvd|
90
+ isbn = dvd[:id][0]
91
+ original_title = dvd[:title][0]
92
+ title = Collection.title_pattern(dvd[:title][0])
93
+ unless isbn.blank? || title.blank?
94
+ @title_isbn_hash[title] ||= []
95
+ @title_isbn_hash[title] << isbn
96
+ @isbn_title_hash[isbn] = original_title
97
+ dvd_hash = Hash.new
98
+ dvd_hash[:isbn] = isbn
99
+ dvd_hash[:title] = original_title
100
+ unless dvd[:actors].blank?
101
+ dvd_hash[:actors] = dvd[:actors].compact.collect {|a| a[:actor]}.flatten.compact.collect do |a|
102
+ name = []
103
+ name << a['FirstName'] unless a['FirstName'].blank?
104
+ name << a['MiddleName'] unless a['MiddleName'].blank?
105
+ name << a['LastName'] unless a['LastName'].blank?
106
+ info = Hash.new
107
+ info['name'] = name.join(' ')
108
+ info['role'] = a['Role']
109
+ info
110
+ end
111
+ end
112
+ dvd_hash[:genres] = dvd[:genres].collect{|a| a[:genre]}.flatten unless dvd[:genres].blank?
113
+ dvd_hash[:studios] = dvd[:studios].collect{|a| a[:studio]}.flatten unless dvd[:studios].blank?
114
+ dvd_hash[:productionyear] = [dvd[:productionyear].join(',')] unless dvd[:productionyear].blank?
115
+ dvd_hash[:rating] = [dvd[:rating].join(',')] unless dvd[:rating].blank?
116
+ dvd_hash[:runningtime] = [dvd[:runningtime].join(',')] unless dvd[:runningtime].blank?
117
+ dvd_hash[:released] = [dvd[:released].join(',')] unless dvd[:released].blank?
118
+ dvd_hash[:overview] = [dvd[:overview].join(',')] unless dvd[:overview].blank?
119
+ dvd_hash[:lastedited] = dvd[:lastedited][0] unless dvd[:lastedited].blank?
120
+ directors = find_directors(dvd[:credits])
121
+ dvd_hash[:directors] = directors unless directors.blank?
122
+ dvd_hash[:boxset] = dvd[:boxset] unless dvd[:boxset].blank?
123
+ dvd_hash[:mediatypes] = dvd[:mediatypes] unless dvd[:mediatypes].blank?
124
+ dvd_hash[:format] = dvd[:format] unless dvd[:format].blank?
125
+ @isbn_dvd_hash[isbn] = dvd_hash
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ def find_directors(dvd_credits)
132
+ directors = nil
133
+ begin
134
+ dvd[:credits].each do |credits_hash|
135
+ credits_hash[:credit].each do |credit_hash|
136
+ if((credit_hash['CreditType'] == 'Direction') || (credit_hash['CreditSubtype'] == 'Director'))
137
+ name = []
138
+ name << credit_hash['FirstName'] unless credit_hash['FirstName'].blank?
139
+ name << credit_hash['MiddleName'] unless credit_hash['MiddleName'].blank?
140
+ name << credit_hash['LastName'] unless credit_hash['LastName'].blank?
141
+ directors ||= []
142
+ directors << name.join(' ')
143
+ end
144
+ end
145
+ end
146
+ rescue
147
+ end
148
+ directors
149
+ end
150
+
151
+ # == Synopsis
152
+ # The titles found between LMCE's Amazon lookup and DVDProfiler sometimes differ in
153
+ # whether or not a prefix of "The", "A", or "An" is included in the title. Here we
154
+ # create an Array of possible titles with and without these prefix words.
155
+ def Collection.title_permutations(base_title)
156
+ titles = []
157
+ unless base_title.nil? || base_title.empty?
158
+ titles << base_title
159
+ ['the', 'a', 'an'].each do |prefix|
160
+ titles << "#{prefix} " + base_title unless base_title =~ /^#{prefix}\s/
161
+ titles << $1 if base_title =~ /^#{prefix}\s(.*)$/
162
+ end
163
+ end
164
+ titles
165
+ end
166
+
167
+ # == Synopsis
168
+ # the titles found between LMCE's Amazon lookup and DVDProfiler quite often differ in the
169
+ # inclusion of punctuation and capitalization. So we create a pattern of lower case words
170
+ # without punctuation and with single spaces between words.
171
+ def Collection.title_pattern(src_title)
172
+ title = nil
173
+ unless src_title.nil?
174
+ title = src_title.dup
175
+ title.downcase!
176
+ TITLE_REPLACEMENTS.each do |replacement|
177
+ replacement.each do |regex, value|
178
+ title.gsub!(regex, value)
179
+ end
180
+ end
181
+ title.strip!
182
+ end
183
+ title
184
+ end
185
+
186
+ end
187
+
@@ -0,0 +1,138 @@
1
+ # This is the model for the DVD Profiler profile which is used
2
+ # to find meta data from DVD Profiler's exported Collection.xml
3
+ #
4
+ # Usage:
5
+ #
6
+ # profiles = DvdprofilerProfile.all(:titles => ['The Alamo'])
7
+ #
8
+ # profile = DvdprofilerProfile.first(:isbn => '012345678901')
9
+ # or
10
+ # profile = DvdprofilerProfile.first(:title => 'movie title')
11
+ #
12
+ # puts profile.dvd_hash[:key]
13
+ # puts profile.to_xml
14
+ # puts profile.isbn
15
+ # puts profile.title
16
+ # profile.save(media.path_to(:dvdprofiler_xml))
17
+ #
18
+ class DvdprofilerProfile
19
+
20
+ # options:
21
+ # :isbn => String
22
+ # :title => String
23
+ # :logger => nil or logger instance
24
+ # returns: Array of DvdprofilerProfile instances
25
+ def self.all(options={})
26
+ # :isbn_dvd_hash, :title_isbn_hash, :isbn_title_hash
27
+ result = []
28
+
29
+ # try finding by isbn first
30
+ if options.has_key?(:isbn) && !options[:isbn].blank?
31
+ dvd_hash = collection.isbn_dvd_hash[options[:isbn]]
32
+ unless dvd_hash.blank?
33
+ result << DvdprofilerProfile.new(dvd_hash, options[:isbn], options[:title], options[:logger])
34
+ end
35
+ end
36
+
37
+ # if unable to find by isbn, then try finding by title
38
+ if result.empty? && options.has_key?(:title)
39
+ isbns = self.find_isbns(options)
40
+ unless isbns.blank?
41
+ isbns.each do |isbn|
42
+ dvd_hash = collection.isbn_dvd_hash[isbn]
43
+ unless dvd_hash.blank?
44
+ unless options[:year].blank?
45
+ if dvd_hash[:productionyear].include? options[:year]
46
+ result << DvdprofilerProfile.new(dvd_hash, isbn, options[:title], options[:logger])
47
+ end
48
+ else
49
+ result << DvdprofilerProfile.new(dvd_hash, isbn, options[:title], options[:logger])
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ # return all profiles if neither :isbn nor :title are given
57
+ if result.empty? && !options.has_key?(:isbn) && !options.has_key?(:title)
58
+ collection.isbn_dvd_hash.each do |isbn, dvd_hash|
59
+ result << DvdprofilerProfile.new(dvd_hash, isbn, nil, options[:logger])
60
+ end
61
+ end
62
+
63
+ result
64
+ end
65
+
66
+ # options:
67
+ # :isbn => String
68
+ # :title => String
69
+ # returns: DvdprofilerProfile instance or nil
70
+ def self.first(options={})
71
+ all(options).first
72
+ end
73
+
74
+ # look up ISBN by title
75
+ # expects a :title option
76
+ # returns Array of ISBN Strings
77
+ def self.find_isbns(options={})
78
+ result = []
79
+ if options.has_key?(:title)
80
+ result = [collection.title_isbn_hash[Collection.title_pattern(options[:title])]].flatten.uniq.compact
81
+ end
82
+ result
83
+ end
84
+
85
+ class << self
86
+ @collection_filespec = 'Collection.xml'
87
+ attr_accessor :collection_filespec
88
+ end
89
+
90
+ protected
91
+
92
+ def self.collection(logger=nil)
93
+ @collection ||= Collection.new(File.expand_path(@collection_filespec), logger)
94
+ end
95
+
96
+ def initialize(dvd_hash, isbn, title, logger)
97
+ @dvd_hash = dvd_hash
98
+ @isbn = isbn
99
+ @title = title
100
+ @title ||= @dvd_hash[:title]
101
+ @logger = OptionalLogger.new(logger)
102
+ end
103
+
104
+ public
105
+
106
+ attr_reader :isbn, :title, :dvd_hash
107
+
108
+ def to_xml
109
+ data = @dvd_hash.stringify_keys
110
+ data.delete_if { |key, value| value.nil? }
111
+ xml = XmlSimple.xml_out(data, 'NoAttr' => true, 'RootName' => 'movie')
112
+ end
113
+
114
+ def save(filespec)
115
+ begin
116
+ xml = self.to_xml
117
+ unless xml.blank?
118
+ @logger.debug { "saving #{filespec}" }
119
+ DvdProfiler2Xbmc.save_to_file(filespec, xml)
120
+ end
121
+ rescue Exception => e
122
+ @logger.error { "Unable to save dvdprofiler profile to #{filespec} - #{e.to_s}" }
123
+ end
124
+ end
125
+
126
+ def save_to_file(filespec, data)
127
+ new_filespec = filespec + '.new'
128
+ File.open(new_filespec, "w") do |file|
129
+ file.puts(data)
130
+ end
131
+ backup_filespec = filespec + '~'
132
+ File.delete(backup_filespec) if File.exist?(backup_filespec)
133
+ File.rename(filespec, backup_filespec) if File.exist?(filespec)
134
+ File.rename(new_filespec, filespec)
135
+ File.delete(new_filespec) if File.exist?(new_filespec)
136
+ end
137
+
138
+ end