overdrive_metadata 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/.autotest ADDED
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'autotest/restart'
4
+
5
+ # Autotest.add_hook :initialize do |at|
6
+ # at.extra_files << "../some/external/dependency.rb"
7
+ #
8
+ # at.libs << ":../some/external"
9
+ #
10
+ # at.add_exception 'vendor'
11
+ #
12
+ # at.add_mapping(/dependency.rb/) do |f, _|
13
+ # at.files_matching(/test_.*rb$/)
14
+ # end
15
+ #
16
+ # %w(TestA TestB).each do |klass|
17
+ # at.extra_class_map[klass] = "test/test_misc.rb"
18
+ # end
19
+ # end
20
+
21
+ # Autotest.add_hook :run_command do |at|
22
+ # system "rake build"
23
+ # end
data/.gemtest ADDED
File without changes
data/History.txt ADDED
@@ -0,0 +1,2 @@
1
+ === 1.0.0 / 2011-10-27
2
+
data/Manifest.txt ADDED
@@ -0,0 +1,8 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/overdrive_metadata.rb
7
+ raw/test.xls
8
+ test/test_overdrive_metadata.rb
data/README.txt ADDED
@@ -0,0 +1,62 @@
1
+ = overdrive_metadata
2
+
3
+ http://www.libcode.net
4
+
5
+ == DESCRIPTION:
6
+
7
+ Generate marc records from Overdrive provided metadata spreadsheets.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ Have yet to see a Kindle eBook sample - may require tinkering.
12
+
13
+ == SYNOPSIS:
14
+
15
+ require 'overdrive_metadata'
16
+ records = OverdriveMetadata.new('spreadsheets/111111.xls')
17
+ puts "R: " + records.size.to_s # print number of records generated to console
18
+ w = MARC::Writer.new('generated.mrc')
19
+ records.each do |r|
20
+ begin
21
+ w.write r
22
+ rescue
23
+ puts "FAILED: " + r['245']['a']
24
+ end
25
+ end
26
+ w.close
27
+
28
+ == REQUIREMENTS:
29
+
30
+ htmlentities
31
+ marc
32
+ sanitize
33
+ spreadsheet
34
+
35
+ == INSTALL:
36
+
37
+ sudo gem install overdrive_metadata
38
+
39
+ == LICENSE:
40
+
41
+ (The MIT License)
42
+
43
+ Copyright (c) 2011 Mark Cooper
44
+
45
+ Permission is hereby granted, free of charge, to any person obtaining
46
+ a copy of this software and associated documentation files (the
47
+ 'Software'), to deal in the Software without restriction, including
48
+ without limitation the rights to use, copy, modify, merge, publish,
49
+ distribute, sublicense, and/or sell copies of the Software, and to
50
+ permit persons to whom the Software is furnished to do so, subject to
51
+ the following conditions:
52
+
53
+ The above copyright notice and this permission notice shall be
54
+ included in all copies or substantial portions of the Software.
55
+
56
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
57
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
58
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
59
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
60
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
61
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
62
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.plugin :gemspec
7
+ Hoe.plugin :git
8
+
9
+ Hoe.spec 'overdrive_metadata' do
10
+ developer('Mark Cooper', 'markchristophercooper@gmail.com')
11
+ end
12
+
13
+ # vim: syntax=ruby
@@ -0,0 +1,334 @@
1
+ require 'htmlentities'
2
+ require 'marc'
3
+ require 'sanitize'
4
+ require 'spreadsheet'
5
+
6
+ ##
7
+ # Class to generate marc records from Overdrive provided metadata spreadsheet
8
+ # Usage:
9
+ # require 'overdrive_metadata'
10
+ # records = OverdriveMetadata.new('spreadsheets/111111.xls')
11
+ # puts "R: " + records.size.to_s # print number of records generated to console
12
+ # w = MARC::Writer.new('generated.mrc')
13
+ # records.each do |r|
14
+ # begin
15
+ # w.write r
16
+ # rescue
17
+ # puts "FAILED: " + r['245']['a']
18
+ # end
19
+ # end
20
+ # w.close
21
+
22
+ class OverdriveMetadata
23
+ VERSION = '1.0.0'
24
+
25
+ attr_reader :records
26
+
27
+ DEF_FORMAT = 'eBook'
28
+ OD_ORG = 'OverDrive, Inc.'
29
+ OD_URL = 'http://www.overdrive.com'
30
+ AGENCY = 'JTH' # Sonoma County Library
31
+ ACCESS = 'Mode of access: World Wide Web.'
32
+ URL_MSG = 'Click to download this resource.'
33
+ DISCLAIM = 'Record generated from Overdrive metadata spreadsheet.'
34
+ READ_ERR = 'Error, close file, check file path or try resaving file as .xls (not xml)'
35
+
36
+ # add option for config. file in future
37
+ HEADERS = {
38
+ :oclc => 19,
39
+ :date => 12,
40
+ :time => 21,
41
+ :isbn => 1,
42
+ :author => 4,
43
+ :title => 2,
44
+ :place => 11,
45
+ :publisher => 3,
46
+ :requires => 10,
47
+ :format => 9,
48
+ :filesize => 8,
49
+ :reader => 14,
50
+ :title_src => 13,
51
+ :summary => 15,
52
+ :subjects => 5,
53
+ :download => 7,
54
+ :excerpt => 16,
55
+ :cover => 17,
56
+ :thumb => 18,
57
+ }
58
+
59
+ def initialize(metadata_file)
60
+ begin
61
+ @metadata = Spreadsheet.open(metadata_file).worksheet 0
62
+ @coder = HTMLEntities.new
63
+ @records = []
64
+ map
65
+ merge_by_isbn
66
+ rescue Exception => ex
67
+ raise READ_ERR
68
+ end
69
+ end
70
+
71
+ def map
72
+ @metadata.each do |row|
73
+ @records << create_record(row)
74
+ end
75
+ @records.compact
76
+ end
77
+
78
+ def create_record(data)
79
+ field = package_data(data)
80
+ r = field[:format].match(/#{DEF_FORMAT}/) ? EBook.new : EAudioBook.new
81
+ begin
82
+ r.make_control_field('001', field[:oclc])
83
+ r.make_006
84
+ r.make_007
85
+ r.make_fixed_field(field[:year], field[:month], field[:day])
86
+ r.make_data_field('020', ' ', ' ', {'a' => field[:isbn] + ' ' + r.isbn}) unless field[:isbn].empty?
87
+ r.make_data_field('037', ' ', ' ', {'b' => OD_ORG, 'n' => OD_URL})
88
+ r.make_data_field('040', ' ', ' ', {'a' => AGENCY, 'c' => AGENCY})
89
+ r.make_data_field('100', '1', ' ', {'a' => normalize_author(field[:author])})
90
+ r.make_title(field[:title], field[:author])
91
+ r.make_publication(field[:place], field[:publisher], field[:year])
92
+ r.make_physical(field[:hours], field[:minutes])
93
+ r.make_data_field('306', ' ', ' ', {'a' => field[:hours] + field[:minutes] + field[:seconds]})
94
+ r.make_data_field('538', ' ', ' ', {'a' => ACCESS})
95
+ r.make_data_field('538', ' ', ' ', {'a' => 'Requires ' + field[:requires] + '.'})
96
+ r.make_data_field('500', ' ', ' ', {'a' => "#{field[:format]} (file size: #{field[:filesize]} MB)."})
97
+ r.make_data_field('511', '0', ' ', {'a' => "Read by #{field[:reader]}."}) unless field[:reader].empty?
98
+ r.make_data_field('520', ' ', ' ', {'a' => field[:summary]}) unless field[:summary].match(/^#+$/)
99
+ r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
100
+ r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
101
+ r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
102
+ field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => @coder.decode(s).strip + '.', '2' => 'local'}) }
103
+ r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
104
+ r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
105
+ r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
106
+ r.make_data_field('856', '4', '0', {'u' => field[:excerpt], 'y' => "Excerpt (#{field[:format]})."})
107
+ r.make_data_field('856', '4', '2', {'u' => field[:cover], 'y' => "<img class=\"scl_mwthumb\" src=\"#{field[:thumb]}\" alt=\"Artwork for this title - #{field[:title].gsub(/[^A-Za-z ]/, '')}\" />"})
108
+ r.make_data_field('907', ' ', ' ', {'a' => 'ER'})
109
+ r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
110
+ return r.record
111
+ rescue Exception => ex
112
+ puts ex.message
113
+ nil
114
+ end
115
+ end
116
+
117
+ def package_data(data)
118
+ values = {}
119
+ values[:isbn] = data[HEADERS[:isbn]]
120
+ values[:date] = data[HEADERS[:date]]
121
+ values[:place] = data[HEADERS[:place]]
122
+ values[:publisher] = data[HEADERS[:publisher]]
123
+ values[:month] = ''
124
+ values[:day] = ''
125
+ if values[:date].match(/\d{1,2}\/\d{1,2}\/\d{4}/)
126
+ month, day, year = values[:date].split '/'
127
+ values[:month] = month
128
+ values[:day] = day
129
+ end
130
+ values[:year] = values[:date].match(/\d{4}/).to_s # Fall-back
131
+ values[:time] = data[HEADERS[:time]]
132
+ hr, mn, sc = values[:time].split ':'
133
+ values[:hours] = hr ? hr : ''
134
+ values[:minutes] = mn ? mn : ''
135
+ values[:seconds] = sc ? sc : ''
136
+ values[:author] = @coder.decode(data[HEADERS[:author]])
137
+ values[:title] = @coder.decode(data[HEADERS[:title]])
138
+ values[:title_src] = data[HEADERS[:title_src]]
139
+ values[:reader] = @coder.decode(data[HEADERS[:reader]])
140
+ values[:requires] = data[HEADERS[:requires]]
141
+ values[:format] = data[HEADERS[:format]]
142
+ values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
143
+ values[:summary] = Sanitize.clean(@coder.decode(data[HEADERS[:summary]])).gsub(/\s{2}+/, '').strip
144
+ values[:subjects] = data[HEADERS[:subjects]].split ','
145
+ values[:download] = data[HEADERS[:download]]
146
+ values[:excerpt] = data[HEADERS[:excerpt]]
147
+ values[:thumb] = data[HEADERS[:thumb]]
148
+ values[:cover] = data[HEADERS[:cover]]
149
+ values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
150
+ return values
151
+ end
152
+
153
+ def merge_by_isbn
154
+ isbns = Hash.new(0)
155
+ @records.each do |record|
156
+ isbns[record['020'].value] += 1 if record['020']
157
+ end
158
+ isbns.delete_if { |k,v| v < 2 }
159
+ isbns.keys.each do |isbn|
160
+ rcds = @records.find_all { |r| r['020']['a'] == isbn if r['020'] }
161
+ raise 'Found invalid number of duplicate records: ' + isbn unless rcds.size == 2
162
+ file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
163
+ excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
164
+ if file_note and excerpt
165
+ rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '500' }, file_note)
166
+ rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
167
+ @records.delete rcds[1]
168
+ end
169
+ end
170
+ @records
171
+ end
172
+
173
+ def make_id(id_string)
174
+ return id_string[-9..-1].gsub(/\W/, '')
175
+ end
176
+
177
+ def normalize_author(author)
178
+ return author if author.empty?
179
+ author = author.split(',')[0]
180
+ names = author.split ' '
181
+ surname = names.last + ', '
182
+ fullname = surname + names[0 .. names.length - 2].join(' ')
183
+ fullname += '.' unless fullname[-1] == '.'
184
+ return fullname
185
+ end
186
+
187
+ ##
188
+ # Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
189
+
190
+ def kb_to_mb(size)
191
+ return (size.to_f / 1024 + 1).to_i.to_s
192
+ end
193
+
194
+ class ERecord
195
+
196
+ GMD = '[electronic resource]'
197
+ DATE_ERR = 'Date information not present for fixed field'
198
+ FIXF_ERR = 'Invalid fixed field created'
199
+ TITL_ERR = 'Title data is missing for record'
200
+
201
+ attr_reader :record
202
+
203
+ def initialize
204
+ @record = MARC::Record.new
205
+ @ldr = record.leader
206
+ @ldr[5] = 'n'
207
+ @ldr[7] = 'm'
208
+ @ldr[17] = 'M'
209
+ @ldr[18] = 'a'
210
+ fixed_field = ''
211
+ end
212
+
213
+ def make_control_field(tag, value)
214
+ return nil if value.empty?
215
+ @record.append MARC::ControlField.new(tag, value)
216
+ end
217
+
218
+ def make_data_field(tag, ind1, ind2, subfields)
219
+ s = []
220
+ subfields.each do |k,v|
221
+ return nil if v.empty?
222
+ s << MARC::Subfield.new(k, v)
223
+ end
224
+ @record.append MARC::DataField.new(tag, ind1, ind2, *s)
225
+ end
226
+
227
+ def make_fixed_field(year, month, day)
228
+ raise DATE_ERR if year.empty?
229
+ fixed_field = @fixed_field
230
+ unless month.empty? and day.empty?
231
+ month = '0' + month if month.length == 1
232
+ day = '0' + day if day.length == 1
233
+ fixed_field[0..5] = year[2..3] + month + day
234
+ fixed_field[7..10] = year
235
+ else
236
+ fixed_field[7..10] = year
237
+ end
238
+ raise FIXF_ERR unless fixed_field.length == 40
239
+ make_control_field('008', fixed_field)
240
+ end
241
+
242
+ def make_title(title, sor)
243
+ raise TITL_ERR if title.empty?
244
+ t_ind1 = sor.empty? ? '0' : '1'
245
+ t_ind2 = non_filing_characters title
246
+ subfields = {}
247
+ subfields['a'] = title
248
+ subfields['h'] = GMD + ' /'
249
+ unless sor.empty?
250
+ value = sor[-1] == '.' ? "by #{sor}" : "by #{sor}."
251
+ subfields['c'] = value
252
+ else
253
+ subfields['h'].gsub!(/\s+\/$/, '.')
254
+ end
255
+ make_data_field('245', t_ind1, t_ind2, subfields)
256
+ end
257
+
258
+ def make_publication(place, publisher, year)
259
+ return nil if place.empty? or publisher.empty? or year.empty?
260
+ make_data_field('260', ' ', ' ', {'a' => "#{place} :", 'b' => "#{publisher},", 'c' => "#{year}."})
261
+ end
262
+
263
+ def non_filing_characters(title)
264
+ return case
265
+ when title.match(/^The /)
266
+ '4'
267
+ when title.match(/^An /)
268
+ '3'
269
+ when title.match(/^A /)
270
+ '2'
271
+ else
272
+ '0'
273
+ end
274
+ end
275
+
276
+ end
277
+
278
+ class EBook < ERecord
279
+
280
+ attr_reader :isbn, :subject
281
+
282
+ def initialize
283
+ super
284
+ @ldr[6] = 'a'
285
+ @fixed_field = ' s xxu|||| s 0||| eng d'
286
+ @isbn = '(electronic bk. : OverDrive Electronic Book)'
287
+ @subject = 'Downloadable ebooks.'
288
+ # set leader
289
+ end
290
+
291
+ def make_006
292
+ make_control_field('006', 'm d ')
293
+ end
294
+
295
+ def make_007
296
+ make_control_field('007', 'cr nnu---|||||')
297
+ end
298
+
299
+ def make_physical(*args)
300
+ make_data_field('300', ' ', ' ', {'a' => "1 online resource."})
301
+ end
302
+
303
+ end
304
+
305
+ class EAudioBook < ERecord
306
+
307
+ attr_reader :isbn, :subject
308
+
309
+ def initialize
310
+ super
311
+ @ldr[6] = 'i'
312
+ @fixed_field = ' s xxunnnn s eng d'
313
+ @isbn = '(sound recording : OverDrive Audio Book)'
314
+ @subject = 'Downloadable audiobooks.'
315
+ # set leader
316
+ end
317
+
318
+ def make_006
319
+ make_control_field('006', 'm h ')
320
+ end
321
+
322
+ def make_007
323
+ make_control_field('007', 'sz usnnnnnnned')
324
+ make_control_field('007', 'cr nna |||||')
325
+ end
326
+
327
+ def make_physical(hours, minutes)
328
+ return nil if hours.empty? or minutes.empty?
329
+ make_data_field('300', ' ', ' ', {'a' => "1 sound file (ca. #{hours} hr., #{minutes} min.) :", 'b' => 'digital.'})
330
+ end
331
+
332
+ end
333
+
334
+ end
data/raw/test.xls ADDED
Binary file
@@ -0,0 +1,16 @@
1
+ require "shoulda"
2
+ require "overdrive_metadata"
3
+
4
+ class TestOverdriveMetadata < Test::Unit::TestCase
5
+
6
+ context "Creating Overdrive records" do
7
+
8
+ setup do
9
+ @o = OverdriveMetadata.new('raw/test.xls')
10
+ end
11
+
12
+ # Write some tests ...
13
+
14
+ end
15
+
16
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: overdrive_metadata
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mark Cooper
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-21 00:00:00.000000000 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ requirement: &10533756 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: '2.12'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *10533756
26
+ description: Generate marc records from Overdrive provided metadata spreadsheets.
27
+ email:
28
+ - markchristophercooper@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ files:
36
+ - .autotest
37
+ - History.txt
38
+ - Manifest.txt
39
+ - README.txt
40
+ - Rakefile
41
+ - lib/overdrive_metadata.rb
42
+ - raw/test.xls
43
+ - test/test_overdrive_metadata.rb
44
+ - .gemtest
45
+ has_rdoc: true
46
+ homepage: ! 'http://www.libcode.net
47
+
48
+ '
49
+ licenses: []
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --main
53
+ - README.txt
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project: overdrive_metadata
70
+ rubygems_version: 1.5.2
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Generate marc records from Overdrive provided metadata spreadsheets.
74
+ test_files:
75
+ - test/test_overdrive_metadata.rb