overdrive_metadata 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -2,5 +2,4 @@ source 'http://rubygems.org'
2
2
 
3
3
  gem 'htmlentities'
4
4
  gem 'marc'
5
- gem 'sanitize'
6
5
  gem 'spreadsheet'
data/README.txt CHANGED
@@ -8,10 +8,14 @@ Generate marc records from Overdrive provided metadata spreadsheets.
8
8
 
9
9
  == FEATURES/PROBLEMS:
10
10
 
11
+ Most problems encountered owe to missing values in the Overdrive spreadsheet.
12
+ These are mostly handled defensively but missing values in the spreadsheet
13
+ may create unhandled exceptions in some cases.
11
14
  Have yet to see a Kindle eBook sample - may require tinkering.
12
15
 
13
16
  == SYNOPSIS:
14
17
 
18
+ # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
15
19
  require 'overdrive_metadata'
16
20
  records = OverdriveMetadata.new('spreadsheets/111111.xls')
17
21
  puts "R: " + records.size.to_s # print number of records generated to console
@@ -27,9 +31,7 @@ w.close
27
31
 
28
32
  == REQUIREMENTS:
29
33
 
30
- htmlentities
31
34
  marc
32
- sanitize
33
35
  spreadsheet
34
36
 
35
37
  == INSTALL:
@@ -1,11 +1,9 @@
1
- require 'htmlentities'
2
1
  require 'marc'
3
- require 'sanitize'
4
2
  require 'spreadsheet'
5
3
 
6
- ##
7
4
  # Class to generate marc records from Overdrive provided metadata spreadsheet
8
5
  # Usage:
6
+ # # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
9
7
  # require 'overdrive_metadata'
10
8
  # records = OverdriveMetadata.new('spreadsheets/111111.xls')
11
9
  # puts "R: " + records.size.to_s # print number of records generated to console
@@ -20,7 +18,7 @@ require 'spreadsheet'
20
18
  # w.close
21
19
 
22
20
  class OverdriveMetadata
23
- VERSION = '1.0.1'
21
+ VERSION = '1.0.2'
24
22
 
25
23
  attr_reader :records
26
24
 
@@ -62,7 +60,6 @@ class OverdriveMetadata
62
60
  rescue Exception => ex
63
61
  raise READ_ERR
64
62
  end
65
- @coder = HTMLEntities.new
66
63
  @records = []
67
64
  @count = 0
68
65
  map
@@ -73,7 +70,7 @@ class OverdriveMetadata
73
70
  @records << create_record(row)
74
71
  end
75
72
  @records.compact!
76
- merge_by_isbn
73
+ merge_by_content_url
77
74
  end
78
75
 
79
76
  def create_record(data)
@@ -101,7 +98,7 @@ class OverdriveMetadata
101
98
  r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
102
99
  r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
103
100
  r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
104
- field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => @coder.decode(s).strip + '.', '2' => 'local'}) }
101
+ field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => s.strip + '.', '2' => 'local'}) }
105
102
  r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
106
103
  r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
107
104
  r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
@@ -111,7 +108,7 @@ class OverdriveMetadata
111
108
  r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
112
109
  return r.record
113
110
  rescue Exception => ex
114
- puts @count.to_s + ': ' + ex.message
111
+ puts @count.to_s + ': ' + "#{ex.message}\n" + ex.backtrace[0..2].join("\n")
115
112
  nil
116
113
  end
117
114
  end
@@ -135,33 +132,34 @@ class OverdriveMetadata
135
132
  values[:hours] = hr ? hr : ''
136
133
  values[:minutes] = mn ? mn : ''
137
134
  values[:seconds] = sc ? sc : ''
138
- values[:author] = @coder.decode(data[HEADERS[:author]])
139
- values[:title] = @coder.decode(data[HEADERS[:title]])
135
+ values[:author] = clean_string data[HEADERS[:author]]
136
+ values[:title] = clean_string data[HEADERS[:title]]
140
137
  values[:title_src] = data[HEADERS[:title_src]]
141
- values[:reader] = @coder.decode(data[HEADERS[:reader]])
138
+ values[:reader] = clean_string data[HEADERS[:reader]]
142
139
  values[:requires] = data[HEADERS[:requires]]
143
140
  values[:format] = data[HEADERS[:format]]
144
141
  values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
145
- values[:summary] = Sanitize.clean(@coder.decode(data[HEADERS[:summary]])).gsub(/\s{2}+/, '').strip
146
- values[:subjects] = data[HEADERS[:subjects]].split ','
142
+ values[:summary] = clean_string data[HEADERS[:summary]]
143
+ values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
147
144
  values[:download] = data[HEADERS[:download]]
148
145
  values[:excerpt] = data[HEADERS[:excerpt]]
149
146
  values[:thumb] = data[HEADERS[:thumb]]
150
147
  values[:cover] = data[HEADERS[:cover]]
151
148
  values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
149
+ values.each { |k, v| values[k] = '' if v.nil? }
152
150
  return values
153
151
  end
154
152
 
155
- def merge_by_isbn
153
+ def merge_by_content_url
156
154
  puts 'Merging (may take a while on large record sets) ...'
157
- isbns = Hash.new(0)
155
+ content_url = Hash.new(0)
158
156
  @records.each do |record|
159
- isbns[record['020'].value] += 1 if record['020']
157
+ content_url[record['856']['u']] += 1
160
158
  end
161
- isbns.delete_if { |k,v| v < 2 }
162
- isbns.keys.each do |isbn|
163
- rcds = @records.find_all { |r| r['020']['a'] == isbn if r['020'] }
164
- raise 'Found invalid number of duplicate records: ' + isbn unless rcds.size == 2
159
+ content_url.delete_if { |k,v| v < 2 }
160
+ content_url.keys.each do |url|
161
+ rcds = @records.find_all { |r| r['856']['u'] == url }
162
+ raise 'Found invalid number of duplicate records: ' + url unless rcds.size == 2
165
163
  file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
166
164
  excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
167
165
  if file_note and excerpt
@@ -170,7 +168,7 @@ class OverdriveMetadata
170
168
  rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
171
169
  @records.delete rcds[1]
172
170
  rescue Exception => ex
173
- puts isbn + ': ' + 'failed to merge'
171
+ puts url + ': ' + 'failed to merge'
174
172
  end
175
173
  end
176
174
  end
@@ -191,7 +189,10 @@ class OverdriveMetadata
191
189
  return fullname
192
190
  end
193
191
 
194
- ##
192
+ def clean_string(input_str)
193
+ return input_str.gsub(/&lt;.*&gt;/, '').gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&#160;/, '').gsub(/&#235;/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
194
+ end
195
+
195
196
  # Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
196
197
 
197
198
  def kb_to_mb(size)
@@ -225,7 +226,7 @@ class OverdriveMetadata
225
226
  def make_data_field(tag, ind1, ind2, subfields)
226
227
  s = []
227
228
  subfields.each do |k,v|
228
- return nil if v.empty?
229
+ return nil if v.nil? or v.empty?
229
230
  s << MARC::Subfield.new(k, v)
230
231
  end
231
232
  @record.append MARC::DataField.new(tag, ind1, ind2, *s)
@@ -5,7 +5,7 @@ require "base64"
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{overdrive_metadata}
8
- s.version = "1.0.1"
8
+ s.version = '1.0.2'
9
9
  s.authors = ["Mark Cooper"]
10
10
  s.date = %q{2011-11-22}
11
11
  s.homepage = %q{http://www.libcode.net}
@@ -30,9 +30,6 @@ Gem::Specification.new do |s|
30
30
  s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
31
31
 
32
32
  # dependencies
33
- # RubyGems has runtime dependencies (add_dependency) and
34
- # development dependencies (add_development_dependency)
35
- s.add_development_dependency "simple-templater", ">= 0.0.1.2"
36
33
  s.add_development_dependency "bundler"
37
34
 
38
35
  begin
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: overdrive_metadata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,20 +12,9 @@ cert_chain:
12
12
  date: 2011-11-22 00:00:00.000000000 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
- - !ruby/object:Gem::Dependency
16
- name: simple-templater
17
- requirement: &10315236 !ruby/object:Gem::Requirement
18
- none: false
19
- requirements:
20
- - - ! '>='
21
- - !ruby/object:Gem::Version
22
- version: 0.0.1.2
23
- type: :development
24
- prerelease: false
25
- version_requirements: *10315236
26
15
  - !ruby/object:Gem::Dependency
27
16
  name: bundler
28
- requirement: &10314960 !ruby/object:Gem::Requirement
17
+ requirement: &10269756 !ruby/object:Gem::Requirement
29
18
  none: false
30
19
  requirements:
31
20
  - - ! '>='
@@ -33,7 +22,7 @@ dependencies:
33
22
  version: '0'
34
23
  type: :development
35
24
  prerelease: false
36
- version_requirements: *10314960
25
+ version_requirements: *10269756
37
26
  description: Generate marc records from Overdrive provided metadata spreadsheets.
38
27
  email: markchristophercooper@gmail.com
39
28
  executables: []