overdrive_metadata 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -2,5 +2,4 @@ source 'http://rubygems.org'
2
2
 
3
3
  gem 'htmlentities'
4
4
  gem 'marc'
5
- gem 'sanitize'
6
5
  gem 'spreadsheet'
data/README.txt CHANGED
@@ -8,10 +8,14 @@ Generate marc records from Overdrive provided metadata spreadsheets.
8
8
 
9
9
  == FEATURES/PROBLEMS:
10
10
 
11
+ Most problems encountered owe to missing values in the Overdrive spreadsheet.
12
+ These are mostly handled defensively but missing values in the spreadsheet
13
+ may create unhandled exceptions in some cases.
11
14
  Have yet to see a Kindle eBook sample - may require tinkering.
12
15
 
13
16
  == SYNOPSIS:
14
17
 
18
+ # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
15
19
  require 'overdrive_metadata'
16
20
  records = OverdriveMetadata.new('spreadsheets/111111.xls')
17
21
  puts "R: " + records.size.to_s # print number of records generated to console
@@ -27,9 +31,7 @@ w.close
27
31
 
28
32
  == REQUIREMENTS:
29
33
 
30
- htmlentities
31
34
  marc
32
- sanitize
33
35
  spreadsheet
34
36
 
35
37
  == INSTALL:
@@ -1,11 +1,9 @@
1
- require 'htmlentities'
2
1
  require 'marc'
3
- require 'sanitize'
4
2
  require 'spreadsheet'
5
3
 
6
- ##
7
4
  # Class to generate marc records from Overdrive provided metadata spreadsheet
8
5
  # Usage:
6
+ # # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
9
7
  # require 'overdrive_metadata'
10
8
  # records = OverdriveMetadata.new('spreadsheets/111111.xls')
11
9
  # puts "R: " + records.size.to_s # print number of records generated to console
@@ -20,7 +18,7 @@ require 'spreadsheet'
20
18
  # w.close
21
19
 
22
20
  class OverdriveMetadata
23
- VERSION = '1.0.1'
21
+ VERSION = '1.0.2'
24
22
 
25
23
  attr_reader :records
26
24
 
@@ -62,7 +60,6 @@ class OverdriveMetadata
62
60
  rescue Exception => ex
63
61
  raise READ_ERR
64
62
  end
65
- @coder = HTMLEntities.new
66
63
  @records = []
67
64
  @count = 0
68
65
  map
@@ -73,7 +70,7 @@ class OverdriveMetadata
73
70
  @records << create_record(row)
74
71
  end
75
72
  @records.compact!
76
- merge_by_isbn
73
+ merge_by_content_url
77
74
  end
78
75
 
79
76
  def create_record(data)
@@ -101,7 +98,7 @@ class OverdriveMetadata
101
98
  r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
102
99
  r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
103
100
  r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
104
- field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => @coder.decode(s).strip + '.', '2' => 'local'}) }
101
+ field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => s.strip + '.', '2' => 'local'}) }
105
102
  r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
106
103
  r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
107
104
  r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
@@ -111,7 +108,7 @@ class OverdriveMetadata
111
108
  r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
112
109
  return r.record
113
110
  rescue Exception => ex
114
- puts @count.to_s + ': ' + ex.message
111
+ puts @count.to_s + ': ' + "#{ex.message}\n" + ex.backtrace[0..2].join("\n")
115
112
  nil
116
113
  end
117
114
  end
@@ -135,33 +132,34 @@ class OverdriveMetadata
135
132
  values[:hours] = hr ? hr : ''
136
133
  values[:minutes] = mn ? mn : ''
137
134
  values[:seconds] = sc ? sc : ''
138
- values[:author] = @coder.decode(data[HEADERS[:author]])
139
- values[:title] = @coder.decode(data[HEADERS[:title]])
135
+ values[:author] = clean_string data[HEADERS[:author]]
136
+ values[:title] = clean_string data[HEADERS[:title]]
140
137
  values[:title_src] = data[HEADERS[:title_src]]
141
- values[:reader] = @coder.decode(data[HEADERS[:reader]])
138
+ values[:reader] = clean_string data[HEADERS[:reader]]
142
139
  values[:requires] = data[HEADERS[:requires]]
143
140
  values[:format] = data[HEADERS[:format]]
144
141
  values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
145
- values[:summary] = Sanitize.clean(@coder.decode(data[HEADERS[:summary]])).gsub(/\s{2}+/, '').strip
146
- values[:subjects] = data[HEADERS[:subjects]].split ','
142
+ values[:summary] = clean_string data[HEADERS[:summary]]
143
+ values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
147
144
  values[:download] = data[HEADERS[:download]]
148
145
  values[:excerpt] = data[HEADERS[:excerpt]]
149
146
  values[:thumb] = data[HEADERS[:thumb]]
150
147
  values[:cover] = data[HEADERS[:cover]]
151
148
  values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
149
+ values.each { |k, v| values[k] = '' if v.nil? }
152
150
  return values
153
151
  end
154
152
 
155
- def merge_by_isbn
153
+ def merge_by_content_url
156
154
  puts 'Merging (may take a while on large record sets) ...'
157
- isbns = Hash.new(0)
155
+ content_url = Hash.new(0)
158
156
  @records.each do |record|
159
- isbns[record['020'].value] += 1 if record['020']
157
+ content_url[record['856']['u']] += 1
160
158
  end
161
- isbns.delete_if { |k,v| v < 2 }
162
- isbns.keys.each do |isbn|
163
- rcds = @records.find_all { |r| r['020']['a'] == isbn if r['020'] }
164
- raise 'Found invalid number of duplicate records: ' + isbn unless rcds.size == 2
159
+ content_url.delete_if { |k,v| v < 2 }
160
+ content_url.keys.each do |url|
161
+ rcds = @records.find_all { |r| r['856']['u'] == url }
162
+ raise 'Found invalid number of duplicate records: ' + url unless rcds.size == 2
165
163
  file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
166
164
  excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
167
165
  if file_note and excerpt
@@ -170,7 +168,7 @@ class OverdriveMetadata
170
168
  rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
171
169
  @records.delete rcds[1]
172
170
  rescue Exception => ex
173
- puts isbn + ': ' + 'failed to merge'
171
+ puts url + ': ' + 'failed to merge'
174
172
  end
175
173
  end
176
174
  end
@@ -191,7 +189,10 @@ class OverdriveMetadata
191
189
  return fullname
192
190
  end
193
191
 
194
- ##
192
+ def clean_string(input_str)
193
+ return input_str.gsub(/&lt;.*&gt;/, '').gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&#160;/, '').gsub(/&#235;/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
194
+ end
195
+
195
196
  # Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
196
197
 
197
198
  def kb_to_mb(size)
@@ -225,7 +226,7 @@ class OverdriveMetadata
225
226
  def make_data_field(tag, ind1, ind2, subfields)
226
227
  s = []
227
228
  subfields.each do |k,v|
228
- return nil if v.empty?
229
+ return nil if v.nil? or v.empty?
229
230
  s << MARC::Subfield.new(k, v)
230
231
  end
231
232
  @record.append MARC::DataField.new(tag, ind1, ind2, *s)
@@ -5,7 +5,7 @@ require "base64"
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{overdrive_metadata}
8
- s.version = "1.0.1"
8
+ s.version = '1.0.2'
9
9
  s.authors = ["Mark Cooper"]
10
10
  s.date = %q{2011-11-22}
11
11
  s.homepage = %q{http://www.libcode.net}
@@ -30,9 +30,6 @@ Gem::Specification.new do |s|
30
30
  s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
31
31
 
32
32
  # dependencies
33
- # RubyGems has runtime dependencies (add_dependency) and
34
- # development dependencies (add_development_dependency)
35
- s.add_development_dependency "simple-templater", ">= 0.0.1.2"
36
33
  s.add_development_dependency "bundler"
37
34
 
38
35
  begin
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: overdrive_metadata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,20 +12,9 @@ cert_chain:
12
12
  date: 2011-11-22 00:00:00.000000000 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: simple-templater
17
- requirement: &10315236 !ruby/object:Gem::Requirement
18
- none: false
19
- requirements:
20
- - - ! '>='
21
- - !ruby/object:Gem::Version
22
- version: 0.0.1.2
23
- type: :development
24
- prerelease: false
25
- version_requirements: *10315236
26
15
  - !ruby/object:Gem::Dependency
27
16
  name: bundler
28
- requirement: &10314960 !ruby/object:Gem::Requirement
17
+ requirement: &10269756 !ruby/object:Gem::Requirement
29
18
  none: false
30
19
  requirements:
31
20
  - - ! '>='
@@ -33,7 +22,7 @@ dependencies:
33
22
  version: '0'
34
23
  type: :development
35
24
  prerelease: false
36
- version_requirements: *10314960
25
+ version_requirements: *10269756
37
26
  description: Generate marc records from Overdrive provided metadata spreadsheets.
38
27
  email: markchristophercooper@gmail.com
39
28
  executables: []