royw-dvdprofiler_collection 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ (The MIT License)
2
+
3
+ Copyright (c) 2009 Roy Wright
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ 'Software'), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,33 @@
1
+ = dvdprofiler_collection
2
+
3
+ dvdprofiler_collection is a companion to royw-imdb and royw-tmdb gems.
4
+
5
+ This is an interface for the exported Collection.xml from DVD Profiler 3.5.1.
6
+
7
+ Because the file size and parsing time of the Collection.xml can grow quite
8
+ large, we extract only part of the meta-data then save into a Collection.yaml
9
+ file which parses much faster. On my 3GHz quad core (alas with ruby only using
10
+ a single core), xml parsing is about 1:50 for about 1000 titles.
11
+
12
+ The Collection model supports fuzzy matching of titles.
13
+
14
+ The DvdprofilerProfile model provides a clean interface to the profiles using
15
+ first() and all() class methods. Searches can be by ISBN or title.
16
+ Optionally a year may be provided to refine the search.
17
+
18
+ The actual meta-data is accessed via the dvd_hash attribute.
19
+
20
+ Examples:
21
+
22
+ profile = DvdprofilerProfile.first(:isbn => '786936735390')
23
+ profile = DvdprofilerProfile.first(:title => 'Sabrina', :year => '1995')
24
+ profiles = DvdprofilerProfile.all(:title => 'Sabrina')
25
+
26
+ puts profile.isbn
27
+ puts profile.title
28
+
29
+ profile.dvd_hash.each {|k,v| puts "#{k} => #{v}"}
30
+
31
+ == Copyright
32
+
33
+ Copyright (c) 2009 Roy Wright. See LICENSE for details.
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "dvdprofiler_collection"
8
+ gem.summary = %Q{TODO}
9
+ gem.email = "roy@wright.org"
10
+ gem.homepage = "http://github.com/royw/dvdprofiler_collection"
11
+ gem.authors = ["Roy Wright"]
12
+
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+
15
+ gem.files.reject! do |fn|
16
+ result = false
17
+ basename = File.basename(fn)
18
+ result = true if basename =~ /^tt\d+\.html/
19
+ result = true if basename =~ /^Collection.yaml/
20
+ result
21
+ end
22
+ end
23
+ rescue LoadError
24
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
25
+ end
26
+
27
+ require 'spec/rake/spectask'
28
+ Spec::Rake::SpecTask.new(:spec) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.spec_files = FileList['spec/**/*_spec.rb']
31
+ end
32
+
33
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
34
+ spec.libs << 'lib' << 'spec'
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+
40
+ task :default => :spec
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "dvdprofiler_collection #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 2
3
+ :major: 0
4
+ :minor: 0
@@ -0,0 +1,20 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'rubygems'
5
+ require 'yaml'
6
+ require 'xmlsimple'
7
+ require 'ftools'
8
+ require 'mash'
9
+ # require 'log4r'
10
+ require 'ruby-debug'
11
+
12
+ require 'module_extensions'
13
+ require 'numeric_extensions'
14
+ require 'kernel_extensions'
15
+ require 'file_extensions'
16
+ require 'object_extensions'
17
+ require 'string_extensions'
18
+ require 'dvdprofiler_collection/optional_logger'
19
+ require 'dvdprofiler_collection/collection'
20
+ require 'dvdprofiler_collection/dvdprofiler_profile'
@@ -0,0 +1,187 @@
1
+ # == Synopsis
2
+ # This model encapsulates the DVDProfiler Collection.xml
3
+ class Collection
4
+ # various regexes used to clean up a title for matching purposes.
5
+ # used in TITLE_REPLACEMENTS hash below
6
+ PUNCTUATION = /[\?\:\!\"\'\,\.\-\/\*]/
7
+ HTML_ESCAPES = /\&[a-zA-Z]+\;/
8
+ SQUARE_BRACKET_ENCLOSURES = /\[.*?\]/
9
+ PARENTHESIS_ENCLOSURES = /\(.*?\)/
10
+ MULTIPLE_WHITESPACES= /\s+/
11
+ STANDALONE_AMPERSAND = /\s\&\s/
12
+ WIDESCREEN = /widescreen/i
13
+ SPECIAL_EDITION = /special edition/i
14
+
15
+ # array of hashes is intentional as the order is critical
16
+ # the enclosures [...] & (...) must be removed first,
17
+ # then " & " must be replaced by " and ",
18
+ # then html escapes &...; must be replaced by a space,
19
+ # then remaining punctuation is replacesed by a space,
20
+ # finally multiple whitespaces are reduced to single whitespace
21
+ TITLE_REPLACEMENTS = [
22
+ { SQUARE_BRACKET_ENCLOSURES => '' },
23
+ { PARENTHESIS_ENCLOSURES => '' },
24
+ { STANDALONE_AMPERSAND => ' and ' },
25
+ { HTML_ESCAPES => ' ' },
26
+ { WIDESCREEN => ' ' },
27
+ { SPECIAL_EDITION => ' ' },
28
+ { PUNCTUATION => ' ' },
29
+ { MULTIPLE_WHITESPACES => ' ' },
30
+ ]
31
+
32
+ attr_reader :isbn_dvd_hash, :title_isbn_hash, :isbn_title_hash
33
+
34
+ @filespec = nil
35
+
36
+ def initialize(filename, logger)
37
+ @title_isbn_hash = Hash.new
38
+ @isbn_dvd_hash = Hash.new
39
+ @isbn_title_hash = Hash.new
40
+ @filespec = filename
41
+ @logger = OptionalLogger.new(logger)
42
+ reload
43
+ save
44
+ end
45
+
46
+ # save as a collection.yaml file unless the existing
47
+ # collection.yaml is newer than the collection.xml
48
+ def save
49
+ unless @filespec.nil?
50
+ yaml_filespec = @filespec.ext('.yaml')
51
+ if !File.exist?(yaml_filespec) || (File.mtime(@filespec) > File.mtime(yaml_filespec))
52
+ @logger.info { "saving: #{yaml_filespec}" }
53
+ File.open(yaml_filespec, "w") do |f|
54
+ YAML.dump(
55
+ {
56
+ :title_isbn_hash => @title_isbn_hash,
57
+ :isbn_title_hash => @isbn_title_hash,
58
+ :isbn_dvd_hash => @isbn_dvd_hash,
59
+ }, f)
60
+ end
61
+ else
62
+ @logger.info { "not saving, yaml file is newer than xml file" }
63
+ end
64
+ else
65
+ @logger.error { "can not save, the filespec is nil" }
66
+ end
67
+ end
68
+
69
+ # load the collection from the collection.yaml if it exists,
70
+ # otherwise from the collection.xml
71
+ def reload
72
+ @title_isbn_hash.clear
73
+ @isbn_dvd_hash.clear
74
+ @isbn_title_hash.clear
75
+ collection = Hash.new
76
+ yaml_filespec = @filespec.ext('.yaml')
77
+ if File.exist?(yaml_filespec) && (File.mtime(yaml_filespec) > File.mtime(@filespec))
78
+ @logger.info { "Loading #{yaml_filespec}" }
79
+ data = YAML.load_file(yaml_filespec)
80
+ @title_isbn_hash = data[:title_isbn_hash]
81
+ @isbn_dvd_hash = data[:isbn_dvd_hash]
82
+ @isbn_title_hash = data[:isbn_title_hash]
83
+ else
84
+ elapsed_time = timer do
85
+ @logger.info { "Loading #{@filespec}" }
86
+ collection = XmlSimple.xml_in(@filespec, { 'KeyToSymbol' => true})
87
+ end
88
+ @logger.info { "XmlSimple.xml_in elapse time: #{elapsed_time.elapsed_time_s}" }
89
+ collection[:dvd].each do |dvd|
90
+ isbn = dvd[:id][0]
91
+ original_title = dvd[:title][0]
92
+ title = Collection.title_pattern(dvd[:title][0])
93
+ unless isbn.blank? || title.blank?
94
+ @title_isbn_hash[title] ||= []
95
+ @title_isbn_hash[title] << isbn
96
+ @isbn_title_hash[isbn] = original_title
97
+ dvd_hash = Hash.new
98
+ dvd_hash[:isbn] = isbn
99
+ dvd_hash[:title] = original_title
100
+ unless dvd[:actors].blank?
101
+ dvd_hash[:actors] = dvd[:actors].compact.collect {|a| a[:actor]}.flatten.compact.collect do |a|
102
+ name = []
103
+ name << a['FirstName'] unless a['FirstName'].blank?
104
+ name << a['MiddleName'] unless a['MiddleName'].blank?
105
+ name << a['LastName'] unless a['LastName'].blank?
106
+ info = Hash.new
107
+ info['name'] = name.join(' ')
108
+ info['role'] = a['Role']
109
+ info
110
+ end
111
+ end
112
+ dvd_hash[:genres] = dvd[:genres].collect{|a| a[:genre]}.flatten unless dvd[:genres].blank?
113
+ dvd_hash[:studios] = dvd[:studios].collect{|a| a[:studio]}.flatten unless dvd[:studios].blank?
114
+ dvd_hash[:productionyear] = [dvd[:productionyear].join(',')] unless dvd[:productionyear].blank?
115
+ dvd_hash[:rating] = [dvd[:rating].join(',')] unless dvd[:rating].blank?
116
+ dvd_hash[:runningtime] = [dvd[:runningtime].join(',')] unless dvd[:runningtime].blank?
117
+ dvd_hash[:released] = [dvd[:released].join(',')] unless dvd[:released].blank?
118
+ dvd_hash[:overview] = [dvd[:overview].join(',')] unless dvd[:overview].blank?
119
+ dvd_hash[:lastedited] = dvd[:lastedited][0] unless dvd[:lastedited].blank?
120
+ directors = find_directors(dvd[:credits])
121
+ dvd_hash[:directors] = directors unless directors.blank?
122
+ dvd_hash[:boxset] = dvd[:boxset] unless dvd[:boxset].blank?
123
+ dvd_hash[:mediatypes] = dvd[:mediatypes] unless dvd[:mediatypes].blank?
124
+ dvd_hash[:format] = dvd[:format] unless dvd[:format].blank?
125
+ @isbn_dvd_hash[isbn] = dvd_hash
126
+ end
127
+ end
128
+ end
129
+ end
130
+
131
+ def find_directors(dvd_credits)
132
+ directors = nil
133
+ begin
134
+ dvd[:credits].each do |credits_hash|
135
+ credits_hash[:credit].each do |credit_hash|
136
+ if((credit_hash['CreditType'] == 'Direction') || (credit_hash['CreditSubtype'] == 'Director'))
137
+ name = []
138
+ name << credit_hash['FirstName'] unless credit_hash['FirstName'].blank?
139
+ name << credit_hash['MiddleName'] unless credit_hash['MiddleName'].blank?
140
+ name << credit_hash['LastName'] unless credit_hash['LastName'].blank?
141
+ directors ||= []
142
+ directors << name.join(' ')
143
+ end
144
+ end
145
+ end
146
+ rescue
147
+ end
148
+ directors
149
+ end
150
+
151
+ # == Synopsis
152
+ # The titles found between LMCE's Amazon lookup and DVDProfiler sometimes differ in
153
+ # whether or not a prefix of "The", "A", or "An" is included in the title. Here we
154
+ # create an Array of possible titles with and without these prefix words.
155
+ def Collection.title_permutations(base_title)
156
+ titles = []
157
+ unless base_title.nil? || base_title.empty?
158
+ titles << base_title
159
+ ['the', 'a', 'an'].each do |prefix|
160
+ titles << "#{prefix} " + base_title unless base_title =~ /^#{prefix}\s/
161
+ titles << $1 if base_title =~ /^#{prefix}\s(.*)$/
162
+ end
163
+ end
164
+ titles
165
+ end
166
+
167
+ # == Synopsis
168
+ # the titles found between LMCE's Amazon lookup and DVDProfiler quite often differ in the
169
+ # inclusion of punctuation and capitalization. So we create a pattern of lower case words
170
+ # without punctuation and with single spaces between words.
171
+ def Collection.title_pattern(src_title)
172
+ title = nil
173
+ unless src_title.nil?
174
+ title = src_title.dup
175
+ title.downcase!
176
+ TITLE_REPLACEMENTS.each do |replacement|
177
+ replacement.each do |regex, value|
178
+ title.gsub!(regex, value)
179
+ end
180
+ end
181
+ title.strip!
182
+ end
183
+ title
184
+ end
185
+
186
+ end
187
+
@@ -0,0 +1,138 @@
1
+ # This is the model for the DVD Profiler profile which is used
2
+ # to find meta data from DVD Profiler's exported Collection.xml
3
+ #
4
+ # Usage:
5
+ #
6
+ # profiles = DvdprofilerProfile.all(:titles => ['The Alamo'])
7
+ #
8
+ # profile = DvdprofilerProfile.first(:isbn => '012345678901')
9
+ # or
10
+ # profile = DvdprofilerProfile.first(:title => 'movie title')
11
+ #
12
+ # puts profile.dvd_hash[:key]
13
+ # puts profile.to_xml
14
+ # puts profile.isbn
15
+ # puts profile.title
16
+ # profile.save(media.path_to(:dvdprofiler_xml))
17
+ #
18
+ class DvdprofilerProfile
19
+
20
+ # options:
21
+ # :isbn => String
22
+ # :title => String
23
+ # :logger => nil or logger instance
24
+ # returns: Array of DvdprofilerProfile instances
25
+ def self.all(options={})
26
+ # :isbn_dvd_hash, :title_isbn_hash, :isbn_title_hash
27
+ result = []
28
+
29
+ # try finding by isbn first
30
+ if options.has_key?(:isbn) && !options[:isbn].blank?
31
+ dvd_hash = collection.isbn_dvd_hash[options[:isbn]]
32
+ unless dvd_hash.blank?
33
+ result << DvdprofilerProfile.new(dvd_hash, options[:isbn], options[:title], options[:logger])
34
+ end
35
+ end
36
+
37
+ # if unable to find by isbn, then try finding by title
38
+ if result.empty? && options.has_key?(:title)
39
+ isbns = self.find_isbns(options)
40
+ unless isbns.blank?
41
+ isbns.each do |isbn|
42
+ dvd_hash = collection.isbn_dvd_hash[isbn]
43
+ unless dvd_hash.blank?
44
+ unless options[:year].blank?
45
+ if dvd_hash[:productionyear].include? options[:year]
46
+ result << DvdprofilerProfile.new(dvd_hash, isbn, options[:title], options[:logger])
47
+ end
48
+ else
49
+ result << DvdprofilerProfile.new(dvd_hash, isbn, options[:title], options[:logger])
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ # return all profiles if neither :isbn nor :title are given
57
+ if result.empty? && !options.has_key?(:isbn) && !options.has_key?(:title)
58
+ collection.isbn_dvd_hash.each do |isbn, dvd_hash|
59
+ result << DvdprofilerProfile.new(dvd_hash, isbn, nil, options[:logger])
60
+ end
61
+ end
62
+
63
+ result
64
+ end
65
+
66
+ # options:
67
+ # :isbn => String
68
+ # :title => String
69
+ # returns: DvdprofilerProfile instance or nil
70
+ def self.first(options={})
71
+ all(options).first
72
+ end
73
+
74
+ # look up ISBN by title
75
+ # expects a :title option
76
+ # returns Array of ISBN Strings
77
+ def self.find_isbns(options={})
78
+ result = []
79
+ if options.has_key?(:title)
80
+ result = [collection.title_isbn_hash[Collection.title_pattern(options[:title])]].flatten.uniq.compact
81
+ end
82
+ result
83
+ end
84
+
85
+ class << self
86
+ @collection_filespec = 'Collection.xml'
87
+ attr_accessor :collection_filespec
88
+ end
89
+
90
+ protected
91
+
92
+ def self.collection(logger=nil)
93
+ @collection ||= Collection.new(File.expand_path(@collection_filespec), logger)
94
+ end
95
+
96
+ def initialize(dvd_hash, isbn, title, logger)
97
+ @dvd_hash = dvd_hash
98
+ @isbn = isbn
99
+ @title = title
100
+ @title ||= @dvd_hash[:title]
101
+ @logger = OptionalLogger.new(logger)
102
+ end
103
+
104
+ public
105
+
106
+ attr_reader :isbn, :title, :dvd_hash
107
+
108
+ def to_xml
109
+ data = @dvd_hash.stringify_keys
110
+ data.delete_if { |key, value| value.nil? }
111
+ xml = XmlSimple.xml_out(data, 'NoAttr' => true, 'RootName' => 'movie')
112
+ end
113
+
114
+ def save(filespec)
115
+ begin
116
+ xml = self.to_xml
117
+ unless xml.blank?
118
+ @logger.debug { "saving #{filespec}" }
119
+ DvdProfiler2Xbmc.save_to_file(filespec, xml)
120
+ end
121
+ rescue Exception => e
122
+ @logger.error { "Unable to save dvdprofiler profile to #{filespec} - #{e.to_s}" }
123
+ end
124
+ end
125
+
126
+ def save_to_file(filespec, data)
127
+ new_filespec = filespec + '.new'
128
+ File.open(new_filespec, "w") do |file|
129
+ file.puts(data)
130
+ end
131
+ backup_filespec = filespec + '~'
132
+ File.delete(backup_filespec) if File.exist?(backup_filespec)
133
+ File.rename(filespec, backup_filespec) if File.exist?(filespec)
134
+ File.rename(new_filespec, filespec)
135
+ File.delete(new_filespec) if File.exist?(new_filespec)
136
+ end
137
+
138
+ end