overdrive_metadata 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.autotest ADDED
@@ -0,0 +1,23 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'autotest/restart'
4
+
5
+ # Autotest.add_hook :initialize do |at|
6
+ # at.extra_files << "../some/external/dependency.rb"
7
+ #
8
+ # at.libs << ":../some/external"
9
+ #
10
+ # at.add_exception 'vendor'
11
+ #
12
+ # at.add_mapping(/dependency.rb/) do |f, _|
13
+ # at.files_matching(/test_.*rb$/)
14
+ # end
15
+ #
16
+ # %w(TestA TestB).each do |klass|
17
+ # at.extra_class_map[klass] = "test/test_misc.rb"
18
+ # end
19
+ # end
20
+
21
+ # Autotest.add_hook :run_command do |at|
22
+ # system "rake build"
23
+ # end
data/.gemtest ADDED
File without changes
data/History.txt ADDED
@@ -0,0 +1,2 @@
1
+ === 1.0.0 / 2011-10-27
2
+
data/Manifest.txt ADDED
@@ -0,0 +1,8 @@
1
+ .autotest
2
+ History.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/overdrive_metadata.rb
7
+ raw/test.xls
8
+ test/test_overdrive_metadata.rb
data/README.txt ADDED
@@ -0,0 +1,62 @@
1
+ = overdrive_metadata
2
+
3
+ http://www.libcode.net
4
+
5
+ == DESCRIPTION:
6
+
7
+ Generate marc records from Overdrive provided metadata spreadsheets.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ Have yet to see a Kindle eBook sample - may require tinkering.
12
+
13
+ == SYNOPSIS:
14
+
15
+ require 'overdrive_metadata'
16
+ records = OverdriveMetadata.new('spreadsheets/111111.xls')
17
+ puts "R: " + records.size.to_s # print number of records generated to console
18
+ w = MARC::Writer.new('generated.mrc')
19
+ records.each do |r|
20
+ begin
21
+ w.write r
22
+ rescue
23
+ puts "FAILED: " + r['245']['a']
24
+ end
25
+ end
26
+ w.close
27
+
28
+ == REQUIREMENTS:
29
+
30
+ htmlentities
31
+ marc
32
+ sanitize
33
+ spreadsheet
34
+
35
+ == INSTALL:
36
+
37
+ sudo gem install overdrive_metadata
38
+
39
+ == LICENSE:
40
+
41
+ (The MIT License)
42
+
43
+ Copyright (c) 2011 Mark Cooper
44
+
45
+ Permission is hereby granted, free of charge, to any person obtaining
46
+ a copy of this software and associated documentation files (the
47
+ 'Software'), to deal in the Software without restriction, including
48
+ without limitation the rights to use, copy, modify, merge, publish,
49
+ distribute, sublicense, and/or sell copies of the Software, and to
50
+ permit persons to whom the Software is furnished to do so, subject to
51
+ the following conditions:
52
+
53
+ The above copyright notice and this permission notice shall be
54
+ included in all copies or substantial portions of the Software.
55
+
56
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
57
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
58
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
59
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
60
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
61
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
62
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+
6
+ Hoe.plugin :gemspec
7
+ Hoe.plugin :git
8
+
9
+ Hoe.spec 'overdrive_metadata' do
10
+ developer('Mark Cooper', 'markchristophercooper@gmail.com')
11
+ end
12
+
13
+ # vim: syntax=ruby
@@ -0,0 +1,334 @@
1
+ require 'htmlentities'
2
+ require 'marc'
3
+ require 'sanitize'
4
+ require 'spreadsheet'
5
+
6
+ ##
7
+ # Class to generate marc records from Overdrive provided metadata spreadsheet
8
+ # Usage:
9
+ # require 'overdrive_metadata'
10
+ # records = OverdriveMetadata.new('spreadsheets/111111.xls')
11
+ # puts "R: " + records.size.to_s # print number of records generated to console
12
+ # w = MARC::Writer.new('generated.mrc')
13
+ # records.each do |r|
14
+ # begin
15
+ # w.write r
16
+ # rescue
17
+ # puts "FAILED: " + r['245']['a']
18
+ # end
19
+ # end
20
+ # w.close
21
+
22
+ class OverdriveMetadata
23
+ VERSION = '1.0.0'
24
+
25
+ attr_reader :records
26
+
27
+ DEF_FORMAT = 'eBook'
28
+ OD_ORG = 'OverDrive, Inc.'
29
+ OD_URL = 'http://www.overdrive.com'
30
+ AGENCY = 'JTH' # Sonoma County Library
31
+ ACCESS = 'Mode of access: World Wide Web.'
32
+ URL_MSG = 'Click to download this resource.'
33
+ DISCLAIM = 'Record generated from Overdrive metadata spreadsheet.'
34
+ READ_ERR = 'Error, close file, check file path or try resaving file as .xls (not xml)'
35
+
36
+ # add option for config. file in future
37
+ HEADERS = {
38
+ :oclc => 19,
39
+ :date => 12,
40
+ :time => 21,
41
+ :isbn => 1,
42
+ :author => 4,
43
+ :title => 2,
44
+ :place => 11,
45
+ :publisher => 3,
46
+ :requires => 10,
47
+ :format => 9,
48
+ :filesize => 8,
49
+ :reader => 14,
50
+ :title_src => 13,
51
+ :summary => 15,
52
+ :subjects => 5,
53
+ :download => 7,
54
+ :excerpt => 16,
55
+ :cover => 17,
56
+ :thumb => 18,
57
+ }
58
+
59
+ def initialize(metadata_file)
60
+ begin
61
+ @metadata = Spreadsheet.open(metadata_file).worksheet 0
62
+ @coder = HTMLEntities.new
63
+ @records = []
64
+ map
65
+ merge_by_isbn
66
+ rescue Exception => ex
67
+ raise READ_ERR
68
+ end
69
+ end
70
+
71
+ def map
72
+ @metadata.each do |row|
73
+ @records << create_record(row)
74
+ end
75
+ @records.compact
76
+ end
77
+
78
+ def create_record(data)
79
+ field = package_data(data)
80
+ r = field[:format].match(/#{DEF_FORMAT}/) ? EBook.new : EAudioBook.new
81
+ begin
82
+ r.make_control_field('001', field[:oclc])
83
+ r.make_006
84
+ r.make_007
85
+ r.make_fixed_field(field[:year], field[:month], field[:day])
86
+ r.make_data_field('020', ' ', ' ', {'a' => field[:isbn] + ' ' + r.isbn}) unless field[:isbn].empty?
87
+ r.make_data_field('037', ' ', ' ', {'b' => OD_ORG, 'n' => OD_URL})
88
+ r.make_data_field('040', ' ', ' ', {'a' => AGENCY, 'c' => AGENCY})
89
+ r.make_data_field('100', '1', ' ', {'a' => normalize_author(field[:author])})
90
+ r.make_title(field[:title], field[:author])
91
+ r.make_publication(field[:place], field[:publisher], field[:year])
92
+ r.make_physical(field[:hours], field[:minutes])
93
+ r.make_data_field('306', ' ', ' ', {'a' => field[:hours] + field[:minutes] + field[:seconds]})
94
+ r.make_data_field('538', ' ', ' ', {'a' => ACCESS})
95
+ r.make_data_field('538', ' ', ' ', {'a' => 'Requires ' + field[:requires] + '.'})
96
+ r.make_data_field('500', ' ', ' ', {'a' => "#{field[:format]} (file size: #{field[:filesize]} MB)."})
97
+ r.make_data_field('511', '0', ' ', {'a' => "Read by #{field[:reader]}."}) unless field[:reader].empty?
98
+ r.make_data_field('520', ' ', ' ', {'a' => field[:summary]}) unless field[:summary].match(/^#+$/)
99
+ r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
100
+ r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
101
+ r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
102
+ field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => @coder.decode(s).strip + '.', '2' => 'local'}) }
103
+ r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
104
+ r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
105
+ r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
106
+ r.make_data_field('856', '4', '0', {'u' => field[:excerpt], 'y' => "Excerpt (#{field[:format]})."})
107
+ r.make_data_field('856', '4', '2', {'u' => field[:cover], 'y' => "<img class=\"scl_mwthumb\" src=\"#{field[:thumb]}\" alt=\"Artwork for this title - #{field[:title].gsub(/[^A-Za-z ]/, '')}\" />"})
108
+ r.make_data_field('907', ' ', ' ', {'a' => 'ER'})
109
+ r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
110
+ return r.record
111
+ rescue Exception => ex
112
+ puts ex.message
113
+ nil
114
+ end
115
+ end
116
+
117
+ def package_data(data)
118
+ values = {}
119
+ values[:isbn] = data[HEADERS[:isbn]]
120
+ values[:date] = data[HEADERS[:date]]
121
+ values[:place] = data[HEADERS[:place]]
122
+ values[:publisher] = data[HEADERS[:publisher]]
123
+ values[:month] = ''
124
+ values[:day] = ''
125
+ if values[:date].match(/\d{1,2}\/\d{1,2}\/\d{4}/)
126
+ month, day, year = values[:date].split '/'
127
+ values[:month] = month
128
+ values[:day] = day
129
+ end
130
+ values[:year] = values[:date].match(/\d{4}/).to_s # Fall-back
131
+ values[:time] = data[HEADERS[:time]]
132
+ hr, mn, sc = values[:time].split ':'
133
+ values[:hours] = hr ? hr : ''
134
+ values[:minutes] = mn ? mn : ''
135
+ values[:seconds] = sc ? sc : ''
136
+ values[:author] = @coder.decode(data[HEADERS[:author]])
137
+ values[:title] = @coder.decode(data[HEADERS[:title]])
138
+ values[:title_src] = data[HEADERS[:title_src]]
139
+ values[:reader] = @coder.decode(data[HEADERS[:reader]])
140
+ values[:requires] = data[HEADERS[:requires]]
141
+ values[:format] = data[HEADERS[:format]]
142
+ values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
143
+ values[:summary] = Sanitize.clean(@coder.decode(data[HEADERS[:summary]])).gsub(/\s{2}+/, '').strip
144
+ values[:subjects] = data[HEADERS[:subjects]].split ','
145
+ values[:download] = data[HEADERS[:download]]
146
+ values[:excerpt] = data[HEADERS[:excerpt]]
147
+ values[:thumb] = data[HEADERS[:thumb]]
148
+ values[:cover] = data[HEADERS[:cover]]
149
+ values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
150
+ return values
151
+ end
152
+
153
+ def merge_by_isbn
154
+ isbns = Hash.new(0)
155
+ @records.each do |record|
156
+ isbns[record['020'].value] += 1 if record['020']
157
+ end
158
+ isbns.delete_if { |k,v| v < 2 }
159
+ isbns.keys.each do |isbn|
160
+ rcds = @records.find_all { |r| r['020']['a'] == isbn if r['020'] }
161
+ raise 'Found invalid number of duplicate records: ' + isbn unless rcds.size == 2
162
+ file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
163
+ excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
164
+ if file_note and excerpt
165
+ rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '500' }, file_note)
166
+ rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
167
+ @records.delete rcds[1]
168
+ end
169
+ end
170
+ @records
171
+ end
172
+
173
+ def make_id(id_string)
174
+ return id_string[-9..-1].gsub(/\W/, '')
175
+ end
176
+
177
+ def normalize_author(author)
178
+ return author if author.empty?
179
+ author = author.split(',')[0]
180
+ names = author.split ' '
181
+ surname = names.last + ', '
182
+ fullname = surname + names[0 .. names.length - 2].join(' ')
183
+ fullname += '.' unless fullname[-1] == '.'
184
+ return fullname
185
+ end
186
+
187
+ ##
188
+ # Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
189
+
190
+ def kb_to_mb(size)
191
+ return (size.to_f / 1024 + 1).to_i.to_s
192
+ end
193
+
194
+ class ERecord
195
+
196
+ GMD = '[electronic resource]'
197
+ DATE_ERR = 'Date information not present for fixed field'
198
+ FIXF_ERR = 'Invalid fixed field created'
199
+ TITL_ERR = 'Title data is missing for record'
200
+
201
+ attr_reader :record
202
+
203
+ def initialize
204
+ @record = MARC::Record.new
205
+ @ldr = record.leader
206
+ @ldr[5] = 'n'
207
+ @ldr[7] = 'm'
208
+ @ldr[17] = 'M'
209
+ @ldr[18] = 'a'
210
+ fixed_field = ''
211
+ end
212
+
213
+ def make_control_field(tag, value)
214
+ return nil if value.empty?
215
+ @record.append MARC::ControlField.new(tag, value)
216
+ end
217
+
218
+ def make_data_field(tag, ind1, ind2, subfields)
219
+ s = []
220
+ subfields.each do |k,v|
221
+ return nil if v.empty?
222
+ s << MARC::Subfield.new(k, v)
223
+ end
224
+ @record.append MARC::DataField.new(tag, ind1, ind2, *s)
225
+ end
226
+
227
+ def make_fixed_field(year, month, day)
228
+ raise DATE_ERR if year.empty?
229
+ fixed_field = @fixed_field
230
+ unless month.empty? and day.empty?
231
+ month = '0' + month if month.length == 1
232
+ day = '0' + day if day.length == 1
233
+ fixed_field[0..5] = year[2..3] + month + day
234
+ fixed_field[7..10] = year
235
+ else
236
+ fixed_field[7..10] = year
237
+ end
238
+ raise FIXF_ERR unless fixed_field.length == 40
239
+ make_control_field('008', fixed_field)
240
+ end
241
+
242
+ def make_title(title, sor)
243
+ raise TITL_ERR if title.empty?
244
+ t_ind1 = sor.empty? ? '0' : '1'
245
+ t_ind2 = non_filing_characters title
246
+ subfields = {}
247
+ subfields['a'] = title
248
+ subfields['h'] = GMD + ' /'
249
+ unless sor.empty?
250
+ value = sor[-1] == '.' ? "by #{sor}" : "by #{sor}."
251
+ subfields['c'] = value
252
+ else
253
+ subfields['h'].gsub!(/\s+\/$/, '.')
254
+ end
255
+ make_data_field('245', t_ind1, t_ind2, subfields)
256
+ end
257
+
258
+ def make_publication(place, publisher, year)
259
+ return nil if place.empty? or publisher.empty? or year.empty?
260
+ make_data_field('260', ' ', ' ', {'a' => "#{place} :", 'b' => "#{publisher},", 'c' => "#{year}."})
261
+ end
262
+
263
+ def non_filing_characters(title)
264
+ return case
265
+ when title.match(/^The /)
266
+ '4'
267
+ when title.match(/^An /)
268
+ '3'
269
+ when title.match(/^A /)
270
+ '2'
271
+ else
272
+ '0'
273
+ end
274
+ end
275
+
276
+ end
277
+
278
+ class EBook < ERecord
279
+
280
+ attr_reader :isbn, :subject
281
+
282
+ def initialize
283
+ super
284
+ @ldr[6] = 'a'
285
+ @fixed_field = ' s xxu|||| s 0||| eng d'
286
+ @isbn = '(electronic bk. : OverDrive Electronic Book)'
287
+ @subject = 'Downloadable ebooks.'
288
+ # set leader
289
+ end
290
+
291
+ def make_006
292
+ make_control_field('006', 'm d ')
293
+ end
294
+
295
+ def make_007
296
+ make_control_field('007', 'cr nnu---|||||')
297
+ end
298
+
299
+ def make_physical(*args)
300
+ make_data_field('300', ' ', ' ', {'a' => "1 online resource."})
301
+ end
302
+
303
+ end
304
+
305
+ class EAudioBook < ERecord
306
+
307
+ attr_reader :isbn, :subject
308
+
309
+ def initialize
310
+ super
311
+ @ldr[6] = 'i'
312
+ @fixed_field = ' s xxunnnn s eng d'
313
+ @isbn = '(sound recording : OverDrive Audio Book)'
314
+ @subject = 'Downloadable audiobooks.'
315
+ # set leader
316
+ end
317
+
318
+ def make_006
319
+ make_control_field('006', 'm h ')
320
+ end
321
+
322
+ def make_007
323
+ make_control_field('007', 'sz usnnnnnnned')
324
+ make_control_field('007', 'cr nna |||||')
325
+ end
326
+
327
+ def make_physical(hours, minutes)
328
+ return nil if hours.empty? or minutes.empty?
329
+ make_data_field('300', ' ', ' ', {'a' => "1 sound file (ca. #{hours} hr., #{minutes} min.) :", 'b' => 'digital.'})
330
+ end
331
+
332
+ end
333
+
334
+ end
data/raw/test.xls ADDED
Binary file
@@ -0,0 +1,16 @@
1
+ require "shoulda"
2
+ require "overdrive_metadata"
3
+
4
+ class TestOverdriveMetadata < Test::Unit::TestCase
5
+
6
+ context "Creating Overdrive records" do
7
+
8
+ setup do
9
+ @o = OverdriveMetadata.new('raw/test.xls')
10
+ end
11
+
12
+ # Write some tests ...
13
+
14
+ end
15
+
16
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: overdrive_metadata
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mark Cooper
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-21 00:00:00.000000000 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ requirement: &10533756 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ~>
21
+ - !ruby/object:Gem::Version
22
+ version: '2.12'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *10533756
26
+ description: Generate marc records from Overdrive provided metadata spreadsheets.
27
+ email:
28
+ - markchristophercooper@gmail.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ files:
36
+ - .autotest
37
+ - History.txt
38
+ - Manifest.txt
39
+ - README.txt
40
+ - Rakefile
41
+ - lib/overdrive_metadata.rb
42
+ - raw/test.xls
43
+ - test/test_overdrive_metadata.rb
44
+ - .gemtest
45
+ has_rdoc: true
46
+ homepage: ! 'http://www.libcode.net
47
+
48
+ '
49
+ licenses: []
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --main
53
+ - README.txt
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project: overdrive_metadata
70
+ rubygems_version: 1.5.2
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Generate marc records from Overdrive provided metadata spreadsheets.
74
+ test_files:
75
+ - test/test_overdrive_metadata.rb