overdrive_metadata 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +0 -1
- data/README.txt +4 -2
- data/lib/overdrive_metadata.rb +24 -23
- data/overdrive_metadata.gemspec +1 -4
- metadata +3 -14
data/Gemfile
CHANGED
data/README.txt
CHANGED
@@ -8,10 +8,14 @@ Generate marc records from Overdrive provided metadata spreadsheets.
|
|
8
8
|
|
9
9
|
== FEATURES/PROBLEMS:
|
10
10
|
|
11
|
+
Most problems encountered owe to missing values in the Overdrive spreadsheet.
|
12
|
+
These are mostly handled defensively but missing values in the spreadsheet
|
13
|
+
may create unhandled exceptions in some cases.
|
11
14
|
Have yet to see a Kindle eBook sample - may require tinkering.
|
12
15
|
|
13
16
|
== SYNOPSIS:
|
14
17
|
|
18
|
+
# Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
|
15
19
|
require 'overdrive_metadata'
|
16
20
|
records = OverdriveMetadata.new('spreadsheets/111111.xls')
|
17
21
|
puts "R: " + records.size.to_s # print number of records generated to console
|
@@ -27,9 +31,7 @@ w.close
|
|
27
31
|
|
28
32
|
== REQUIREMENTS:
|
29
33
|
|
30
|
-
htmlentities
|
31
34
|
marc
|
32
|
-
sanitize
|
33
35
|
spreadsheet
|
34
36
|
|
35
37
|
== INSTALL:
|
data/lib/overdrive_metadata.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
require 'htmlentities'
|
2
1
|
require 'marc'
|
3
|
-
require 'sanitize'
|
4
2
|
require 'spreadsheet'
|
5
3
|
|
6
|
-
##
|
7
4
|
# Class to generate marc records from Overdrive provided metadata spreadsheet
|
8
5
|
# Usage:
|
6
|
+
# # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
|
9
7
|
# require 'overdrive_metadata'
|
10
8
|
# records = OverdriveMetadata.new('spreadsheets/111111.xls')
|
11
9
|
# puts "R: " + records.size.to_s # print number of records generated to console
|
@@ -20,7 +18,7 @@ require 'spreadsheet'
|
|
20
18
|
# w.close
|
21
19
|
|
22
20
|
class OverdriveMetadata
|
23
|
-
VERSION = '1.0.
|
21
|
+
VERSION = '1.0.2'
|
24
22
|
|
25
23
|
attr_reader :records
|
26
24
|
|
@@ -62,7 +60,6 @@ class OverdriveMetadata
|
|
62
60
|
rescue Exception => ex
|
63
61
|
raise READ_ERR
|
64
62
|
end
|
65
|
-
@coder = HTMLEntities.new
|
66
63
|
@records = []
|
67
64
|
@count = 0
|
68
65
|
map
|
@@ -73,7 +70,7 @@ class OverdriveMetadata
|
|
73
70
|
@records << create_record(row)
|
74
71
|
end
|
75
72
|
@records.compact!
|
76
|
-
|
73
|
+
merge_by_content_url
|
77
74
|
end
|
78
75
|
|
79
76
|
def create_record(data)
|
@@ -101,7 +98,7 @@ class OverdriveMetadata
|
|
101
98
|
r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
|
102
99
|
r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
|
103
100
|
r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
|
104
|
-
field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' =>
|
101
|
+
field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => s.strip + '.', '2' => 'local'}) }
|
105
102
|
r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
|
106
103
|
r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
|
107
104
|
r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
|
@@ -111,7 +108,7 @@ class OverdriveMetadata
|
|
111
108
|
r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
|
112
109
|
return r.record
|
113
110
|
rescue Exception => ex
|
114
|
-
puts @count.to_s + ': ' + ex.message
|
111
|
+
puts @count.to_s + ': ' + "#{ex.message}\n" + ex.backtrace[0..2].join("\n")
|
115
112
|
nil
|
116
113
|
end
|
117
114
|
end
|
@@ -135,33 +132,34 @@ class OverdriveMetadata
|
|
135
132
|
values[:hours] = hr ? hr : ''
|
136
133
|
values[:minutes] = mn ? mn : ''
|
137
134
|
values[:seconds] = sc ? sc : ''
|
138
|
-
values[:author] =
|
139
|
-
values[:title] =
|
135
|
+
values[:author] = clean_string data[HEADERS[:author]]
|
136
|
+
values[:title] = clean_string data[HEADERS[:title]]
|
140
137
|
values[:title_src] = data[HEADERS[:title_src]]
|
141
|
-
values[:reader] =
|
138
|
+
values[:reader] = clean_string data[HEADERS[:reader]]
|
142
139
|
values[:requires] = data[HEADERS[:requires]]
|
143
140
|
values[:format] = data[HEADERS[:format]]
|
144
141
|
values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
|
145
|
-
values[:summary] =
|
146
|
-
values[:subjects] = data[HEADERS[:subjects]].split
|
142
|
+
values[:summary] = clean_string data[HEADERS[:summary]]
|
143
|
+
values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
|
147
144
|
values[:download] = data[HEADERS[:download]]
|
148
145
|
values[:excerpt] = data[HEADERS[:excerpt]]
|
149
146
|
values[:thumb] = data[HEADERS[:thumb]]
|
150
147
|
values[:cover] = data[HEADERS[:cover]]
|
151
148
|
values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
|
149
|
+
values.each { |k, v| values[k] = '' if v.nil? }
|
152
150
|
return values
|
153
151
|
end
|
154
152
|
|
155
|
-
def
|
153
|
+
def merge_by_content_url
|
156
154
|
puts 'Merging (may take a while on large record sets) ...'
|
157
|
-
|
155
|
+
content_url = Hash.new(0)
|
158
156
|
@records.each do |record|
|
159
|
-
|
157
|
+
content_url[record['856']['u']] += 1
|
160
158
|
end
|
161
|
-
|
162
|
-
|
163
|
-
rcds = @records.find_all { |r| r['
|
164
|
-
raise 'Found invalid number of duplicate records: ' +
|
159
|
+
content_url.delete_if { |k,v| v < 2 }
|
160
|
+
content_url.keys.each do |url|
|
161
|
+
rcds = @records.find_all { |r| r['856']['u'] == url }
|
162
|
+
raise 'Found invalid number of duplicate records: ' + url unless rcds.size == 2
|
165
163
|
file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
|
166
164
|
excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
|
167
165
|
if file_note and excerpt
|
@@ -170,7 +168,7 @@ class OverdriveMetadata
|
|
170
168
|
rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
|
171
169
|
@records.delete rcds[1]
|
172
170
|
rescue Exception => ex
|
173
|
-
puts
|
171
|
+
puts url + ': ' + 'failed to merge'
|
174
172
|
end
|
175
173
|
end
|
176
174
|
end
|
@@ -191,7 +189,10 @@ class OverdriveMetadata
|
|
191
189
|
return fullname
|
192
190
|
end
|
193
191
|
|
194
|
-
|
192
|
+
def clean_string(input_str)
|
193
|
+
return input_str.gsub(/<.*>/, '').gsub(/"/, '"').gsub(/'/, "'").gsub(/ /, '').gsub(/ë/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
|
194
|
+
end
|
195
|
+
|
195
196
|
# Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
|
196
197
|
|
197
198
|
def kb_to_mb(size)
|
@@ -225,7 +226,7 @@ class OverdriveMetadata
|
|
225
226
|
def make_data_field(tag, ind1, ind2, subfields)
|
226
227
|
s = []
|
227
228
|
subfields.each do |k,v|
|
228
|
-
return nil if v.empty?
|
229
|
+
return nil if v.nil? or v.empty?
|
229
230
|
s << MARC::Subfield.new(k, v)
|
230
231
|
end
|
231
232
|
@record.append MARC::DataField.new(tag, ind1, ind2, *s)
|
data/overdrive_metadata.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "base64"
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{overdrive_metadata}
|
8
|
-
s.version =
|
8
|
+
s.version = '1.0.2'
|
9
9
|
s.authors = ["Mark Cooper"]
|
10
10
|
s.date = %q{2011-11-22}
|
11
11
|
s.homepage = %q{http://www.libcode.net}
|
@@ -30,9 +30,6 @@ Gem::Specification.new do |s|
|
|
30
30
|
s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
|
31
31
|
|
32
32
|
# dependencies
|
33
|
-
# RubyGems has runtime dependencies (add_dependency) and
|
34
|
-
# development dependencies (add_development_dependency)
|
35
|
-
s.add_development_dependency "simple-templater", ">= 0.0.1.2"
|
36
33
|
s.add_development_dependency "bundler"
|
37
34
|
|
38
35
|
begin
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: overdrive_metadata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,20 +12,9 @@ cert_chain:
|
|
12
12
|
date: 2011-11-22 00:00:00.000000000 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
|
-
- !ruby/object:Gem::Dependency
|
16
|
-
name: simple-templater
|
17
|
-
requirement: &10315236 !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
|
-
requirements:
|
20
|
-
- - ! '>='
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 0.0.1.2
|
23
|
-
type: :development
|
24
|
-
prerelease: false
|
25
|
-
version_requirements: *10315236
|
26
15
|
- !ruby/object:Gem::Dependency
|
27
16
|
name: bundler
|
28
|
-
requirement: &
|
17
|
+
requirement: &10269756 !ruby/object:Gem::Requirement
|
29
18
|
none: false
|
30
19
|
requirements:
|
31
20
|
- - ! '>='
|
@@ -33,7 +22,7 @@ dependencies:
|
|
33
22
|
version: '0'
|
34
23
|
type: :development
|
35
24
|
prerelease: false
|
36
|
-
version_requirements: *
|
25
|
+
version_requirements: *10269756
|
37
26
|
description: Generate marc records from Overdrive provided metadata spreadsheets.
|
38
27
|
email: markchristophercooper@gmail.com
|
39
28
|
executables: []
|