overdrive_metadata 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +0 -1
- data/README.txt +4 -2
- data/lib/overdrive_metadata.rb +24 -23
- data/overdrive_metadata.gemspec +1 -4
- metadata +3 -14
data/Gemfile
CHANGED
data/README.txt
CHANGED
@@ -8,10 +8,14 @@ Generate marc records from Overdrive provided metadata spreadsheets.
|
|
8
8
|
|
9
9
|
== FEATURES/PROBLEMS:
|
10
10
|
|
11
|
+
Most problems encountered owe to missing values in the Overdrive spreadsheet.
|
12
|
+
These are mostly handled defensively but missing values in the spreadsheet
|
13
|
+
may create unhandled exceptions in some cases.
|
11
14
|
Have yet to see a Kindle eBook sample - may require tinkering.
|
12
15
|
|
13
16
|
== SYNOPSIS:
|
14
17
|
|
18
|
+
# Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
|
15
19
|
require 'overdrive_metadata'
|
16
20
|
records = OverdriveMetadata.new('spreadsheets/111111.xls')
|
17
21
|
puts "R: " + records.size.to_s # print number of records generated to console
|
@@ -27,9 +31,7 @@ w.close
|
|
27
31
|
|
28
32
|
== REQUIREMENTS:
|
29
33
|
|
30
|
-
htmlentities
|
31
34
|
marc
|
32
|
-
sanitize
|
33
35
|
spreadsheet
|
34
36
|
|
35
37
|
== INSTALL:
|
data/lib/overdrive_metadata.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
require 'htmlentities'
|
2
1
|
require 'marc'
|
3
|
-
require 'sanitize'
|
4
2
|
require 'spreadsheet'
|
5
3
|
|
6
|
-
##
|
7
4
|
# Class to generate marc records from Overdrive provided metadata spreadsheet
|
8
5
|
# Usage:
|
6
|
+
# # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
|
9
7
|
# require 'overdrive_metadata'
|
10
8
|
# records = OverdriveMetadata.new('spreadsheets/111111.xls')
|
11
9
|
# puts "R: " + records.size.to_s # print number of records generated to console
|
@@ -20,7 +18,7 @@ require 'spreadsheet'
|
|
20
18
|
# w.close
|
21
19
|
|
22
20
|
class OverdriveMetadata
|
23
|
-
VERSION = '1.0.
|
21
|
+
VERSION = '1.0.2'
|
24
22
|
|
25
23
|
attr_reader :records
|
26
24
|
|
@@ -62,7 +60,6 @@ class OverdriveMetadata
|
|
62
60
|
rescue Exception => ex
|
63
61
|
raise READ_ERR
|
64
62
|
end
|
65
|
-
@coder = HTMLEntities.new
|
66
63
|
@records = []
|
67
64
|
@count = 0
|
68
65
|
map
|
@@ -73,7 +70,7 @@ class OverdriveMetadata
|
|
73
70
|
@records << create_record(row)
|
74
71
|
end
|
75
72
|
@records.compact!
|
76
|
-
|
73
|
+
merge_by_content_url
|
77
74
|
end
|
78
75
|
|
79
76
|
def create_record(data)
|
@@ -101,7 +98,7 @@ class OverdriveMetadata
|
|
101
98
|
r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
|
102
99
|
r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
|
103
100
|
r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
|
104
|
-
field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' =>
|
101
|
+
field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => s.strip + '.', '2' => 'local'}) }
|
105
102
|
r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
|
106
103
|
r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
|
107
104
|
r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
|
@@ -111,7 +108,7 @@ class OverdriveMetadata
|
|
111
108
|
r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
|
112
109
|
return r.record
|
113
110
|
rescue Exception => ex
|
114
|
-
puts @count.to_s + ': ' + ex.message
|
111
|
+
puts @count.to_s + ': ' + "#{ex.message}\n" + ex.backtrace[0..2].join("\n")
|
115
112
|
nil
|
116
113
|
end
|
117
114
|
end
|
@@ -135,33 +132,34 @@ class OverdriveMetadata
|
|
135
132
|
values[:hours] = hr ? hr : ''
|
136
133
|
values[:minutes] = mn ? mn : ''
|
137
134
|
values[:seconds] = sc ? sc : ''
|
138
|
-
values[:author] =
|
139
|
-
values[:title] =
|
135
|
+
values[:author] = clean_string data[HEADERS[:author]]
|
136
|
+
values[:title] = clean_string data[HEADERS[:title]]
|
140
137
|
values[:title_src] = data[HEADERS[:title_src]]
|
141
|
-
values[:reader] =
|
138
|
+
values[:reader] = clean_string data[HEADERS[:reader]]
|
142
139
|
values[:requires] = data[HEADERS[:requires]]
|
143
140
|
values[:format] = data[HEADERS[:format]]
|
144
141
|
values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
|
145
|
-
values[:summary] =
|
146
|
-
values[:subjects] = data[HEADERS[:subjects]].split
|
142
|
+
values[:summary] = clean_string data[HEADERS[:summary]]
|
143
|
+
values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
|
147
144
|
values[:download] = data[HEADERS[:download]]
|
148
145
|
values[:excerpt] = data[HEADERS[:excerpt]]
|
149
146
|
values[:thumb] = data[HEADERS[:thumb]]
|
150
147
|
values[:cover] = data[HEADERS[:cover]]
|
151
148
|
values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
|
149
|
+
values.each { |k, v| values[k] = '' if v.nil? }
|
152
150
|
return values
|
153
151
|
end
|
154
152
|
|
155
|
-
def
|
153
|
+
def merge_by_content_url
|
156
154
|
puts 'Merging (may take a while on large record sets) ...'
|
157
|
-
|
155
|
+
content_url = Hash.new(0)
|
158
156
|
@records.each do |record|
|
159
|
-
|
157
|
+
content_url[record['856']['u']] += 1
|
160
158
|
end
|
161
|
-
|
162
|
-
|
163
|
-
rcds = @records.find_all { |r| r['
|
164
|
-
raise 'Found invalid number of duplicate records: ' +
|
159
|
+
content_url.delete_if { |k,v| v < 2 }
|
160
|
+
content_url.keys.each do |url|
|
161
|
+
rcds = @records.find_all { |r| r['856']['u'] == url }
|
162
|
+
raise 'Found invalid number of duplicate records: ' + url unless rcds.size == 2
|
165
163
|
file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
|
166
164
|
excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
|
167
165
|
if file_note and excerpt
|
@@ -170,7 +168,7 @@ class OverdriveMetadata
|
|
170
168
|
rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
|
171
169
|
@records.delete rcds[1]
|
172
170
|
rescue Exception => ex
|
173
|
-
puts
|
171
|
+
puts url + ': ' + 'failed to merge'
|
174
172
|
end
|
175
173
|
end
|
176
174
|
end
|
@@ -191,7 +189,10 @@ class OverdriveMetadata
|
|
191
189
|
return fullname
|
192
190
|
end
|
193
191
|
|
194
|
-
|
192
|
+
def clean_string(input_str)
|
193
|
+
return input_str.gsub(/<.*>/, '').gsub(/"/, '"').gsub(/'/, "'").gsub(/ /, '').gsub(/ë/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
|
194
|
+
end
|
195
|
+
|
195
196
|
# Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
|
196
197
|
|
197
198
|
def kb_to_mb(size)
|
@@ -225,7 +226,7 @@ class OverdriveMetadata
|
|
225
226
|
def make_data_field(tag, ind1, ind2, subfields)
|
226
227
|
s = []
|
227
228
|
subfields.each do |k,v|
|
228
|
-
return nil if v.empty?
|
229
|
+
return nil if v.nil? or v.empty?
|
229
230
|
s << MARC::Subfield.new(k, v)
|
230
231
|
end
|
231
232
|
@record.append MARC::DataField.new(tag, ind1, ind2, *s)
|
data/overdrive_metadata.gemspec
CHANGED
@@ -5,7 +5,7 @@ require "base64"
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{overdrive_metadata}
|
8
|
-
s.version =
|
8
|
+
s.version = '1.0.2'
|
9
9
|
s.authors = ["Mark Cooper"]
|
10
10
|
s.date = %q{2011-11-22}
|
11
11
|
s.homepage = %q{http://www.libcode.net}
|
@@ -30,9 +30,6 @@ Gem::Specification.new do |s|
|
|
30
30
|
s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
|
31
31
|
|
32
32
|
# dependencies
|
33
|
-
# RubyGems has runtime dependencies (add_dependency) and
|
34
|
-
# development dependencies (add_development_dependency)
|
35
|
-
s.add_development_dependency "simple-templater", ">= 0.0.1.2"
|
36
33
|
s.add_development_dependency "bundler"
|
37
34
|
|
38
35
|
begin
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: overdrive_metadata
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,20 +12,9 @@ cert_chain:
|
|
12
12
|
date: 2011-11-22 00:00:00.000000000 -08:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
|
-
- !ruby/object:Gem::Dependency
|
16
|
-
name: simple-templater
|
17
|
-
requirement: &10315236 !ruby/object:Gem::Requirement
|
18
|
-
none: false
|
19
|
-
requirements:
|
20
|
-
- - ! '>='
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: 0.0.1.2
|
23
|
-
type: :development
|
24
|
-
prerelease: false
|
25
|
-
version_requirements: *10315236
|
26
15
|
- !ruby/object:Gem::Dependency
|
27
16
|
name: bundler
|
28
|
-
requirement: &
|
17
|
+
requirement: &10269756 !ruby/object:Gem::Requirement
|
29
18
|
none: false
|
30
19
|
requirements:
|
31
20
|
- - ! '>='
|
@@ -33,7 +22,7 @@ dependencies:
|
|
33
22
|
version: '0'
|
34
23
|
type: :development
|
35
24
|
prerelease: false
|
36
|
-
version_requirements: *
|
25
|
+
version_requirements: *10269756
|
37
26
|
description: Generate marc records from Overdrive provided metadata spreadsheets.
|
38
27
|
email: markchristophercooper@gmail.com
|
39
28
|
executables: []
|