overdrive_metadata 1.0.2.2 → 1.0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,2 +1,2 @@
1
- *.gem
2
- *.lock
1
+ *.gem
2
+ *.lock
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'http://rubygems.org'
2
-
3
- gem 'marc'
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'marc'
4
4
  gem 'spreadsheet'
data/README.txt CHANGED
@@ -1,64 +1,71 @@
1
- = overdrive_metadata
2
-
3
- http://www.libcode.net
4
-
5
- == DESCRIPTION:
6
-
7
- Generate marc records from Overdrive provided metadata spreadsheets.
8
-
9
- == FEATURES/PROBLEMS:
10
-
11
- Most problems encountered owe to missing values in the Overdrive spreadsheet.
12
- These are mostly handled defensively but missing values in the spreadsheet
13
- may create unhandled exceptions in some cases.
14
- Have yet to see a Kindle eBook sample - may require tinkering.
15
-
16
- == SYNOPSIS:
17
-
18
- # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
19
- require 'overdrive_metadata'
20
- records = OverdriveMetadata.new('spreadsheets/111111.xls')
21
- puts "R: " + records.size.to_s # print number of records generated to console
22
- w = MARC::Writer.new('generated.mrc')
23
- records.each do |r|
24
- begin
25
- w.write r
26
- rescue
27
- puts "FAILED: " + r['245']['a']
28
- end
29
- end
30
- w.close
31
-
32
- == REQUIREMENTS:
33
-
34
- marc
35
- spreadsheet
36
-
37
- == INSTALL:
38
-
39
- sudo gem install overdrive_metadata
40
-
41
- == LICENSE:
42
-
43
- (The MIT License)
44
-
45
- Copyright (c) 2011 Mark Cooper
46
-
47
- Permission is hereby granted, free of charge, to any person obtaining
48
- a copy of this software and associated documentation files (the
49
- 'Software'), to deal in the Software without restriction, including
50
- without limitation the rights to use, copy, modify, merge, publish,
51
- distribute, sublicense, and/or sell copies of the Software, and to
52
- permit persons to whom the Software is furnished to do so, subject to
53
- the following conditions:
54
-
55
- The above copyright notice and this permission notice shall be
56
- included in all copies or substantial portions of the Software.
57
-
58
- THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
59
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
60
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
61
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
62
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
63
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
64
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ = overdrive_metadata
2
+
3
+ http://www.libcode.net
4
+
5
+ == DESCRIPTION:
6
+
7
+ Generate marc records from Overdrive provided metadata spreadsheets.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ Much faster than previous versions -- no batch merging.
12
+ Fields are appended to a single record for rows with matching content urls.
13
+ Updated to account for ebook formats.
14
+ Now agency code must be passed in as second argument and headers are assumed to be present.
15
+
16
+ == SYNOPSIS:
17
+
18
+ require 'overdrive_metadata'
19
+
20
+ o = OverdriveMetadata.new('spreadsheets/111111.xls', 'JTH')
21
+ # o = OverdriveMetadata.new('spreadsheets/111111.xls', 'JTH', false) # if no header
22
+ records = o.map # this must be called to process the rows
23
+
24
+ puts "Fields read: #{o.count.to_s}" # count of spreadsheet rows processed
25
+ puts "R: #{records.size.to_s}" # print number of records generated to console
26
+
27
+ w = MARC::Writer.new('generated.mrc')
28
+
29
+ records.each do |r|
30
+ begin
31
+ w.write r
32
+ rescue
33
+ puts "FAILED: " + r['245']['a']
34
+ end
35
+ end
36
+
37
+ w.close
38
+
39
+ == REQUIREMENTS:
40
+
41
+ marc
42
+ spreadsheet
43
+
44
+ == INSTALL:
45
+
46
+ sudo gem install overdrive_metadata
47
+
48
+ == LICENSE:
49
+
50
+ (The MIT License)
51
+
52
+ Copyright (c) 2011 Mark Cooper
53
+
54
+ Permission is hereby granted, free of charge, to any person obtaining
55
+ a copy of this software and associated documentation files (the
56
+ 'Software'), to deal in the Software without restriction, including
57
+ without limitation the rights to use, copy, modify, merge, publish,
58
+ distribute, sublicense, and/or sell copies of the Software, and to
59
+ permit persons to whom the Software is furnished to do so, subject to
60
+ the following conditions:
61
+
62
+ The above copyright notice and this permission notice shall be
63
+ included in all copies or substantial portions of the Software.
64
+
65
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
66
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
67
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
68
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
69
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
70
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
71
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile CHANGED
@@ -1,28 +1,28 @@
1
- # -*- ruby -*-
2
-
3
- require 'rake/testtask'
4
-
5
- desc "Validate the gemspec"
6
- task :gemspec do
7
- gemspec.validate
8
- end
9
-
10
- desc "Build gem locally"
11
- task :build => :gemspec do
12
- system "gem build #{gemspec.name}.gemspec"
13
- FileUtils.mkdir_p "pkg"
14
- FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", "pkg"
15
- end
16
-
17
- desc "Install gem locally"
18
- task :install => :build do
19
- system "gem install pkg/#{gemspec.name}-#{gemspec.version}"
20
- end
21
-
22
- Rake::TestTask.new do |t|
23
- t.libs << "test"
24
- t.test_files = FileList['test/test*.rb']
25
- t.verbose = true
26
- end
27
-
28
- # vim: syntax=ruby
1
+ # -*- ruby -*-
2
+
3
+ require 'rake/testtask'
4
+
5
+ desc "Validate the gemspec"
6
+ task :gemspec do
7
+ gemspec.validate
8
+ end
9
+
10
+ desc "Build gem locally"
11
+ task :build => :gemspec do
12
+ system "gem build #{gemspec.name}.gemspec"
13
+ FileUtils.mkdir_p "pkg"
14
+ FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", "pkg"
15
+ end
16
+
17
+ desc "Install gem locally"
18
+ task :install => :build do
19
+ system "gem install pkg/#{gemspec.name}-#{gemspec.version}"
20
+ end
21
+
22
+ Rake::TestTask.new do |t|
23
+ t.libs << "test"
24
+ t.test_files = FileList['test/test*.rb']
25
+ t.verbose = true
26
+ end
27
+
28
+ # vim: syntax=ruby
@@ -1,342 +1,328 @@
1
- require 'marc'
2
- require 'spreadsheet'
3
-
4
- # Class to generate marc records from Overdrive provided metadata spreadsheet
5
- # Usage:
6
- # # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
7
- # require 'overdrive_metadata'
8
- # records = OverdriveMetadata.new('spreadsheets/111111.xls')
9
- # puts "R: " + records.size.to_s # print number of records generated to console
10
- # w = MARC::Writer.new('generated.mrc')
11
- # records.each do |r|
12
- # begin
13
- # w.write r
14
- # rescue
15
- # puts "FAILED: " + r['245']['a']
16
- # end
17
- # end
18
- # w.close
19
-
20
- class OverdriveMetadata
21
- VERSION = '1.0.2.2'
22
-
23
- attr_reader :records
24
-
25
- DEF_FORMAT = 'eBook'
26
- OD_ORG = 'OverDrive, Inc.'
27
- OD_URL = 'http://www.overdrive.com'
28
- AGENCY = 'JTH' # Sonoma County Library
29
- ACCESS = 'Mode of access: World Wide Web.'
30
- URL_MSG = 'Click to download this resource.'
31
- DISCLAIM = 'Record generated from Overdrive metadata spreadsheet.'
32
- READ_ERR = 'Error, close file, check file path or try resaving file as .xls (not xml)'
33
-
34
- # add option for config. file in future
35
- HEADERS = {
36
- :oclc => 19,
37
- :date => 12,
38
- :time => 21,
39
- :isbn => 1,
40
- :author => 4,
41
- :title => 2,
42
- :place => 11,
43
- :publisher => 3,
44
- :requires => 10,
45
- :format => 9,
46
- :filesize => 8,
47
- :reader => 14,
48
- :title_src => 13,
49
- :summary => 15,
50
- :subjects => 5,
51
- :download => 7,
52
- :excerpt => 16,
53
- :cover => 17,
54
- :thumb => 18,
55
- }
56
-
57
- def initialize(metadata_file)
58
- begin
59
- @metadata = Spreadsheet.open(metadata_file).worksheet 0
60
- rescue Exception => ex
61
- raise READ_ERR
62
- end
63
- @records = []
64
- @count = 0
65
- map
66
- end
67
-
68
- def map
69
- @metadata.each do |row|
70
- @records << create_record(row)
71
- end
72
- @records.compact!
73
- merge_by_content_url
74
- end
75
-
76
- def create_record(data)
77
- @count += 1
78
- field = package_data(data)
79
- r = field[:format].match(/#{DEF_FORMAT}/) ? EBook.new : EAudioBook.new
80
- begin
81
- r.make_control_field('001', field[:oclc])
82
- r.make_006
83
- r.make_007
84
- r.make_fixed_field(field[:year], field[:month], field[:day])
85
- r.make_data_field('020', ' ', ' ', {'a' => field[:isbn] + ' ' + r.isbn}) unless field[:isbn].empty?
86
- r.make_data_field('037', ' ', ' ', {'b' => OD_ORG, 'n' => OD_URL})
87
- r.make_data_field('040', ' ', ' ', {'a' => AGENCY, 'c' => AGENCY})
88
- r.make_data_field('100', '1', ' ', {'a' => normalize_author(field[:author])})
89
- r.make_title(field[:title], field[:author])
90
- r.make_publication(field[:place], field[:publisher], field[:year])
91
- r.make_physical(field[:hours], field[:minutes])
92
- r.make_data_field('306', ' ', ' ', {'a' => field[:hours] + field[:minutes] + field[:seconds]})
93
- r.make_data_field('538', ' ', ' ', {'a' => ACCESS})
94
- r.make_data_field('538', ' ', ' ', {'a' => 'Requires ' + field[:requires] + '.'})
95
- r.make_data_field('500', ' ', ' ', {'a' => "#{field[:format]} (file size: #{field[:filesize]} MB)."})
96
- r.make_data_field('511', '0', ' ', {'a' => "Read by #{field[:reader]}."}) unless field[:reader].empty?
97
- r.make_data_field('520', ' ', ' ', {'a' => field[:summary]}) unless field[:summary].match(/^#+$/)
98
- r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
99
- r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
100
- r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
101
- field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => clean_string(s).strip + '.', '2' => 'local'}) }
102
- r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
103
- r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
104
- r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
105
- r.make_data_field('856', '4', '0', {'u' => field[:excerpt], 'y' => "Excerpt (#{field[:format]})."})
106
- r.make_data_field('856', '4', '2', {'u' => field[:cover], 'y' => "<img class=\"scl_mwthumb\" src=\"#{field[:thumb]}\" alt=\"Artwork for this title - #{field[:title].gsub(/[^A-Za-z ]/, '')}\" />"})
107
- r.make_data_field('907', ' ', ' ', {'a' => 'ER'})
108
- r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
109
- return r.record
110
- rescue Exception => ex
111
- puts @count.to_s + ': ' + "#{ex.message}\n" + ex.backtrace[0..2].join("\n")
112
- nil
113
- end
114
- end
115
-
116
- def package_data(data)
117
- values = {}
118
- values[:isbn] = data[HEADERS[:isbn]]
119
- values[:date] = data[HEADERS[:date]]
120
- values[:place] = data[HEADERS[:place]]
121
- values[:publisher] = data[HEADERS[:publisher]]
122
- values[:month] = ''
123
- values[:day] = ''
124
- if values[:date].match(/\d{1,2}\/\d{1,2}\/\d{4}/)
125
- month, day, year = values[:date].split '/'
126
- values[:month] = month
127
- values[:day] = day
128
- end
129
- values[:year] = values[:date].match(/\d{4}/).to_s # Fall-back
130
- values[:time] = data[HEADERS[:time]]
131
- hr, mn, sc = values[:time].split ':'
132
- values[:hours] = hr ? hr : ''
133
- values[:minutes] = mn ? mn : ''
134
- values[:seconds] = sc ? sc : ''
135
- values[:author] = clean_string data[HEADERS[:author]]
136
- values[:title] = clean_string data[HEADERS[:title]]
137
- values[:title_src] = data[HEADERS[:title_src]]
138
- values[:reader] = clean_string data[HEADERS[:reader]]
139
- values[:requires] = data[HEADERS[:requires]]
140
- values[:format] = data[HEADERS[:format]]
141
- values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
142
- values[:summary] = clean_string data[HEADERS[:summary]]
143
- values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
144
- values[:download] = data[HEADERS[:download]]
145
- values[:excerpt] = data[HEADERS[:excerpt]]
146
- values[:thumb] = data[HEADERS[:thumb]]
147
- values[:cover] = data[HEADERS[:cover]]
148
- values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
149
- values.each { |k, v| values[k] = '' if v.nil? }
150
- return values
151
- end
152
-
153
- def merge_by_content_url
154
- puts 'Merging (may take a while on large record sets) ...'
155
- content_url = Hash.new(0)
156
- @records.each do |record|
157
- content_url[record['856']['u']] += 1
158
- end
159
- content_url.delete_if { |k,v| v < 2 }
160
- content_url.keys.each do |url|
161
- rcds = @records.find_all { |r| r['856']['u'] == url }
162
- raise 'Found invalid number of duplicate records: ' + url unless rcds.size == 2
163
- file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
164
- excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
165
- if file_note and excerpt
166
- begin
167
- rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '500' }, file_note)
168
- rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
169
- @records.delete rcds[1]
170
- rescue Exception => ex
171
- puts url + ': ' + 'failed to merge'
172
- end
173
- end
174
- end
175
- @records
176
- end
177
-
178
- def make_id(id_string)
179
- return id_string[-9..-1].gsub(/\W/, '')
180
- end
181
-
182
- def normalize_author(author)
183
- return author if author.empty?
184
- author = author.split(',')[0]
185
- names = author.split ' '
186
- surname = names.last + ', '
187
- fullname = surname + names[0 .. names.length - 2].join(' ')
188
- fullname += '.' unless fullname[-1] == '.'
189
- return fullname
190
- end
191
-
192
- def clean_string(input_str)
193
- return input_str.gsub(/&lt;.*&gt;/, '').gsub(/&amp;/, '&').gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&#160;/, '').gsub(/&#235;/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
194
- end
195
-
196
- # Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
197
-
198
- def kb_to_mb(size)
199
- return (size.to_f / 1024 + 1).to_i.to_s
200
- end
201
-
202
- class ERecord
203
-
204
- GMD = '[electronic resource]'
205
- DATE_ERR = 'Date information not present for fixed field'
206
- FIXF_ERR = 'Invalid fixed field created'
207
- TITL_ERR = 'Title data is missing for record'
208
-
209
- attr_reader :record
210
-
211
- def initialize
212
- @record = MARC::Record.new
213
- @ldr = record.leader
214
- @ldr[5] = 'n'
215
- @ldr[7] = 'm'
216
- @ldr[17] = 'M'
217
- @ldr[18] = 'a'
218
- fixed_field = ''
219
- end
220
-
221
- def make_control_field(tag, value)
222
- return nil if value.empty?
223
- @record.append MARC::ControlField.new(tag, value)
224
- end
225
-
226
- def make_data_field(tag, ind1, ind2, subfields)
227
- s = []
228
- subfields.each do |k,v|
229
- return nil if v.nil? or v.empty?
230
- s << MARC::Subfield.new(k, v)
231
- end
232
- @record.append MARC::DataField.new(tag, ind1, ind2, *s)
233
- end
234
-
235
- def make_fixed_field(year, month, day)
236
- raise DATE_ERR if year.empty?
237
- fixed_field = @fixed_field
238
- unless month.empty? and day.empty?
239
- month = '0' + month if month.length == 1
240
- day = '0' + day if day.length == 1
241
- fixed_field[0..5] = year[2..3] + month + day
242
- fixed_field[7..10] = year
243
- else
244
- fixed_field[7..10] = year
245
- end
246
- raise FIXF_ERR unless fixed_field.length == 40
247
- make_control_field('008', fixed_field)
248
- end
249
-
250
- def make_title(title, sor)
251
- raise TITL_ERR if title.empty?
252
- t_ind1 = sor.empty? ? '0' : '1'
253
- t_ind2 = non_filing_characters title
254
- subfields = {}
255
- subfields['a'] = title
256
- subfields['h'] = GMD + ' /'
257
- unless sor.empty?
258
- value = sor[-1] == '.' ? "by #{sor}" : "by #{sor}."
259
- subfields['c'] = value
260
- else
261
- subfields['h'].gsub!(/\s+\/$/, '.')
262
- end
263
- make_data_field('245', t_ind1, t_ind2, subfields)
264
- end
265
-
266
- def make_publication(place, publisher, year)
267
- return nil if place.empty? or publisher.empty? or year.empty?
268
- make_data_field('260', ' ', ' ', {'a' => "#{place} :", 'b' => "#{publisher},", 'c' => "#{year}."})
269
- end
270
-
271
- def non_filing_characters(title)
272
- return case
273
- when title.match(/^The /)
274
- '4'
275
- when title.match(/^An /)
276
- '3'
277
- when title.match(/^A /)
278
- '2'
279
- else
280
- '0'
281
- end
282
- end
283
-
284
- end
285
-
286
- class EBook < ERecord
287
-
288
- attr_reader :isbn, :subject
289
-
290
- def initialize
291
- super
292
- @ldr[6] = 'a'
293
- @fixed_field = ' s xxu|||| s 0||| eng d'
294
- @isbn = '(electronic bk. : OverDrive Electronic Book)'
295
- @subject = 'Downloadable ebooks.'
296
- # set leader
297
- end
298
-
299
- def make_006
300
- make_control_field('006', 'm d ')
301
- end
302
-
303
- def make_007
304
- make_control_field('007', 'cr nnu---|||||')
305
- end
306
-
307
- def make_physical(*args)
308
- make_data_field('300', ' ', ' ', {'a' => "1 online resource."})
309
- end
310
-
311
- end
312
-
313
- class EAudioBook < ERecord
314
-
315
- attr_reader :isbn, :subject
316
-
317
- def initialize
318
- super
319
- @ldr[6] = 'i'
320
- @fixed_field = ' s xxunnnn s eng d'
321
- @isbn = '(sound recording : OverDrive Audio Book)'
322
- @subject = 'Downloadable audiobooks.'
323
- # set leader
324
- end
325
-
326
- def make_006
327
- make_control_field('006', 'm h ')
328
- end
329
-
330
- def make_007
331
- make_control_field('007', 'sz usnnnnnnned')
332
- make_control_field('007', 'cr nna |||||')
333
- end
334
-
335
- def make_physical(hours, minutes)
336
- return nil if hours.empty? or minutes.empty?
337
- make_data_field('300', ' ', ' ', {'a' => "1 sound file (ca. #{hours} hr., #{minutes} min.) :", 'b' => 'digital.'})
338
- end
339
-
340
- end
341
-
1
+ require 'marc'
2
+ require 'spreadsheet'
3
+
4
+ class OverdriveMetadata
5
+ VERSION = '1.0.2.3'
6
+
7
+ attr_reader :records, :count
8
+
9
+ OD_ORG = 'OverDrive, Inc.'
10
+ OD_URL = 'http://www.overdrive.com'
11
+ ACCESS = 'Mode of access: World Wide Web.'
12
+ URL_MSG = 'Click to download this resource.'
13
+ DISCLAIM = 'Record generated from Overdrive metadata spreadsheet.'
14
+ READ_ERR = 'Error reading spreadsheet! Close, verfiy location and ensure .xls'
15
+
16
+ HEADERS = {
17
+ :oclc => 19,
18
+ :date => 12,
19
+ :time => 21,
20
+ :isbn => 1,
21
+ :author => 4,
22
+ :title => 2,
23
+ :place => 11,
24
+ :publisher => 3,
25
+ :requires => 10,
26
+ :format => 9,
27
+ :filesize => 8,
28
+ :reader => 14,
29
+ :title_src => 13,
30
+ :summary => 15,
31
+ :subjects => 5,
32
+ :download => 7,
33
+ :excerpt => 16,
34
+ :cover => 17,
35
+ :thumb => 18,
36
+ }
37
+
38
+ def initialize(metadata_file, agency, header = true, ebook_regex = nil)
39
+ begin
40
+ @metadata = Spreadsheet.open(metadata_file).worksheet 0
41
+ rescue Exception => ex
42
+ raise READ_ERR
43
+ end
44
+ @agency = agency
45
+ @ebook_regex = ebook_regex.nil? ? '(ebook|epub|kindle|pdf)' : ebook_regex
46
+ @records = []
47
+ @count = 0
48
+ @header = header
49
+ @content_rec = {}
50
+ end
51
+
52
+ def map
53
+ @metadata.each do |row|
54
+ if @header
55
+ @header = false
56
+ next
57
+ end
58
+
59
+ begin
60
+ @records << create_record(row)
61
+ rescue Exception => ex
62
+ puts "#{@count.to_s}\t#{ex.message}"
63
+ next
64
+ end
65
+
66
+ end
67
+
68
+ @records.compact!
69
+ @records
70
+ end
71
+
72
+ def create_record(data)
73
+ @count += 1
74
+ field = package_data(data)
75
+
76
+ if @content_rec.has_key? field[:download]
77
+ record = @content_rec[field[:download]]
78
+ format = MARC::DataField.new('500', ' ', ' ', ['a', "#{field[:format]} (file size: #{field[:filesize]} MB)."])
79
+ record.fields.insert(record.fields.index { |f| f.tag == '500' }, format)
80
+
81
+ unless field[:excerpt].empty?
82
+ excerpt = MARC::DataField.new('856', '4', '0', ['u', field[:excerpt]], ['y', "Excerpt (#{field[:format]})."])
83
+ record.fields.insert(record.fields.index { |f| f.tag == '856' }, excerpt)
84
+ end
85
+ return nil
86
+ end
87
+
88
+ r = field[:format].match(/#{@ebook_regex}/i) ? EBook.new : EAudioBook.new
89
+ r.make_control_field('001', field[:oclc])
90
+ r.make_006
91
+ r.make_007
92
+ r.make_fixed_field(field[:year], field[:month], field[:day])
93
+ r.make_data_field('020', ' ', ' ', {'a' => field[:isbn] + ' ' + r.isbn}) unless field[:isbn].empty?
94
+ r.make_data_field('037', ' ', ' ', {'b' => OD_ORG, 'n' => OD_URL})
95
+ r.make_data_field('040', ' ', ' ', {'a' => @agency, 'c' => @agency})
96
+ r.make_data_field('100', '1', ' ', {'a' => normalize_author(field[:author])})
97
+ r.make_title(field[:title], field[:author])
98
+ r.make_publication(field[:place], field[:publisher], field[:year])
99
+ r.make_physical(field[:hours], field[:minutes])
100
+ r.make_data_field('306', ' ', ' ', {'a' => field[:hours] + field[:minutes] + field[:seconds]})
101
+ r.make_data_field('538', ' ', ' ', {'a' => ACCESS})
102
+ r.make_data_field('538', ' ', ' ', {'a' => 'Requires ' + field[:requires] + '.'})
103
+ r.make_data_field('500', ' ', ' ', {'a' => "#{field[:format]} (file size: #{field[:filesize]} MB)."})
104
+ r.make_data_field('511', '0', ' ', {'a' => "Read by #{field[:reader]}."}) unless field[:reader].empty?
105
+ r.make_data_field('520', ' ', ' ', {'a' => field[:summary]}) unless field[:summary].match(/^#+$/)
106
+ r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
107
+
108
+ if r.is_a? EAudioBook
109
+ r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'})
110
+ r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."})
111
+ end
112
+
113
+ field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => clean_string(s).strip + '.', '2' => 'local'}) }
114
+ r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
115
+ r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
116
+ r.make_data_field('856', '4', '0', {'u' => field[:excerpt], 'y' => "Excerpt (#{field[:format]})."})
117
+ r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
118
+
119
+ if @agency == 'JTH'
120
+ r.make_data_field('856', '4', '2', {'u' => field[:cover], 'y' => "<img class=\"scl_mwthumb\" src=\"#{field[:thumb]}\" alt=\"Artwork for this title - #{field[:title].gsub(/[^A-Za-z ]/, '')}\" />"})
121
+ r.make_data_field('907', ' ', ' ', {'a' => 'ER'})
122
+ end
123
+
124
+ r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
125
+
126
+ @content_rec[field[:download]] = r.record
127
+ return r.record
128
+ end
129
+
130
+ def package_data(data)
131
+ values = {}
132
+ values[:isbn] = data[HEADERS[:isbn]]
133
+ values[:date] = data[HEADERS[:date]]
134
+ values[:place] = data[HEADERS[:place]]
135
+ values[:publisher] = data[HEADERS[:publisher]]
136
+ values[:month] = ''
137
+ values[:day] = ''
138
+ if values[:date].match(/\d{1,2}\/\d{1,2}\/\d{2,4}/)
139
+ month, day, year = values[:date].split '/'
140
+ values[:month] = month
141
+ values[:day] = day
142
+ values[:year] = year.size == 4 ? year : "20#{year}"
143
+ end
144
+ values[:year] = values[:date].match(/\d{4}/).to_s unless year # fall-back
145
+ values[:time] = data[HEADERS[:time]]
146
+ hr, mn, sc = values[:time].split ':'
147
+ values[:hours] = hr ? hr : ''
148
+ values[:minutes] = mn ? mn : ''
149
+ values[:seconds] = sc ? sc : ''
150
+ values[:author] = clean_string data[HEADERS[:author]]
151
+ values[:title] = clean_string data[HEADERS[:title]]
152
+ values[:title_src] = data[HEADERS[:title_src]]
153
+ values[:reader] = clean_string data[HEADERS[:reader]]
154
+ values[:requires] = data[HEADERS[:requires]]
155
+ values[:format] = data[HEADERS[:format]]
156
+ values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
157
+ values[:summary] = clean_string data[HEADERS[:summary]]
158
+ values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
159
+ values[:download] = data[HEADERS[:download]]
160
+ values[:excerpt] = data[HEADERS[:excerpt]]
161
+ values[:thumb] = data[HEADERS[:thumb]]
162
+ values[:cover] = data[HEADERS[:cover]]
163
+ values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
164
+ values.each { |k, v| values[k] = '' if v.nil? }
165
+ return values
166
+ end
167
+
168
+ def make_id(id_string)
169
+ return id_string[-9..-1].gsub(/\W/, '')
170
+ end
171
+
172
+ def normalize_author(author)
173
+ return author if author.empty?
174
+ author = author.split(',')[0]
175
+ names = author.split ' '
176
+ surname = names.last + ', '
177
+ fullname = surname + names[0 .. names.length - 2].join(' ')
178
+ fullname += '.' unless fullname[-1] == '.'
179
+ return fullname
180
+ end
181
+
182
+ def clean_string(input_str)
183
+ return input_str.gsub(/&lt;.*&gt;/, '').gsub(/&amp;/, '&').gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&#160;/, '').gsub(/&#235;/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
184
+ end
185
+
186
+ def kb_to_mb(size)
187
+ return (size.to_f / 1024 + 1).to_i.to_s
188
+ end
189
+
190
+ class ERecord
191
+
192
+ GMD = '[electronic resource]'
193
+ DATE_ERR = 'Date information not present for fixed field'
194
+ FIXF_ERR = 'Invalid fixed field created'
195
+ TITL_ERR = 'Title data is missing for record'
196
+
197
+ attr_reader :record
198
+
199
+ def initialize
200
+ @record = MARC::Record.new
201
+ @ldr = record.leader
202
+ @ldr[5] = 'n'
203
+ @ldr[7] = 'm'
204
+ @ldr[17] = 'M'
205
+ @ldr[18] = 'a'
206
+ @fixed_field = ''
207
+ end
208
+
209
+ def make_control_field(tag, value)
210
+ return nil if value.empty?
211
+ @record.append MARC::ControlField.new(tag, value)
212
+ end
213
+
214
+ def make_data_field(tag, ind1, ind2, subfields)
215
+ s = []
216
+ subfields.each do |k,v|
217
+ return nil if v.nil? or v.empty?
218
+ s << MARC::Subfield.new(k, v)
219
+ end
220
+ @record.append MARC::DataField.new(tag, ind1, ind2, *s)
221
+ end
222
+
223
+ def make_fixed_field(year, month, day)
224
+ raise DATE_ERR if year.empty?
225
+ fixed_field = @fixed_field
226
+ unless month.empty? and day.empty?
227
+ month = '0' + month if month.length == 1
228
+ day = '0' + day if day.length == 1
229
+ fixed_field[0..5] = year[2..3] + month + day
230
+ fixed_field[7..10] = year
231
+ else
232
+ fixed_field[7..10] = year
233
+ end
234
+ raise FIXF_ERR unless fixed_field.length == 40
235
+ make_control_field('008', fixed_field)
236
+ end
237
+
238
+ def make_title(title, sor)
239
+ raise TITL_ERR if title.empty?
240
+ t_ind1 = sor.empty? ? '0' : '1'
241
+ t_ind2 = non_filing_characters title
242
+ subfields = {}
243
+ subfields['a'] = title
244
+ subfields['h'] = GMD + ' /'
245
+ unless sor.empty?
246
+ value = sor[-1] == '.' ? "by #{sor}" : "by #{sor}."
247
+ subfields['c'] = value
248
+ else
249
+ subfields['h'].gsub!(/\s+\/$/, '.')
250
+ end
251
+ make_data_field('245', t_ind1, t_ind2, subfields)
252
+ end
253
+
254
+ def make_publication(place, publisher, year)
255
+ return nil if place.empty? or publisher.empty? or year.empty?
256
+ make_data_field('260', ' ', ' ', {'a' => "#{place} :", 'b' => "#{publisher},", 'c' => "#{year}."})
257
+ end
258
+
259
+ def non_filing_characters(title)
260
+ return case
261
+ when title.match(/^The /)
262
+ '4'
263
+ when title.match(/^An /)
264
+ '3'
265
+ when title.match(/^A /)
266
+ '2'
267
+ else
268
+ '0'
269
+ end
270
+ end
271
+
272
+ end
273
+
274
+ class EBook < ERecord
275
+
276
+ attr_reader :isbn, :subject
277
+
278
+ def initialize
279
+ super
280
+ @ldr[6] = 'a'
281
+ @fixed_field = ' s xxu|||| s 0||| eng d'
282
+ @isbn = '(electronic bk. : OverDrive Electronic Book)'
283
+ @subject = 'Downloadable ebooks.'
284
+ end
285
+
286
+ def make_006
287
+ make_control_field('006', 'm d ')
288
+ end
289
+
290
+ def make_007
291
+ make_control_field('007', 'cr nnu---|||||')
292
+ end
293
+
294
+ def make_physical(*args)
295
+ make_data_field('300', ' ', ' ', {'a' => "1 online resource."})
296
+ end
297
+
298
+ end
299
+
300
+ class EAudioBook < ERecord
301
+
302
+ attr_reader :isbn, :subject
303
+
304
+ def initialize
305
+ super
306
+ @ldr[6] = 'i'
307
+ @fixed_field = ' s xxunnnn s eng d'
308
+ @isbn = '(sound recording : OverDrive Audio Book)'
309
+ @subject = 'Downloadable audiobooks.'
310
+ end
311
+
312
+ def make_006
313
+ make_control_field('006', 'm h ')
314
+ end
315
+
316
+ def make_007
317
+ make_control_field('007', 'sz usnnnnnnned')
318
+ make_control_field('007', 'cr nna |||||')
319
+ end
320
+
321
+ def make_physical(hours, minutes)
322
+ return nil if hours.empty? or minutes.empty?
323
+ make_data_field('300', ' ', ' ', {'a' => "1 sound file (ca. #{hours} hr., #{minutes} min.) :", 'b' => 'digital.'})
324
+ end
325
+
326
+ end
327
+
342
328
  end
@@ -1,45 +1,44 @@
1
- #!/usr/bin/env gem build
2
- # encoding: utf-8
3
-
4
- require "base64"
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{overdrive_metadata}
8
- s.version = '1.0.2.2'
9
- s.authors = ["Mark Cooper"]
10
- s.date = %q{2011-11-22}
11
- s.homepage = %q{http://www.libcode.net}
12
- s.email = Base64.decode64("bWFya2NocmlzdG9waGVyY29vcGVyQGdtYWlsLmNvbQ==\n")
13
-
14
- s.summary = "Generate marc records from Overdrive provided metadata spreadsheets."
15
- s.description = "#{s.summary}"
16
- s.cert_chain = nil
17
- s.has_rdoc = true
18
-
19
- # files
20
- s.files = `git ls-files`.split("\n")
21
- s.test_files = `git ls-files test`.split("\n")
22
- s.extra_rdoc_files = ["README.txt"]
23
- s.rdoc_options = ["--main", "README.txt"]
24
-
25
- Dir["bin/*"].map(&File.method(:basename))
26
- # s.default_executable = "overdrive_metadata"
27
- s.require_paths = ["lib"]
28
-
29
- # Ruby version
30
- s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
31
-
32
- # dependencies
33
- s.add_development_dependency "bundler"
34
-
35
- begin
36
- require "changelog"
37
- rescue LoadError
38
- warn "You have to have changelog gem installed for post install message"
39
- else
40
- s.post_install_message = CHANGELOG.new.version_changes
41
- end
42
-
43
- # RubyForge
44
- s.rubyforge_project = %q{overdrive_metadata}
45
- end
1
+ #!/usr/bin/env gem build
2
+ # encoding: utf-8
3
+
4
+ require "base64"
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{overdrive_metadata}
8
+ s.version = '1.0.2.3'
9
+ s.authors = ["Mark Cooper"]
10
+ s.date = %q{2011-11-22}
11
+ s.homepage = %q{http://www.libcode.net}
12
+ s.email = Base64.decode64("bWFya2NocmlzdG9waGVyY29vcGVyQGdtYWlsLmNvbQ==\n")
13
+
14
+ s.summary = "Generate marc records from Overdrive provided metadata spreadsheets."
15
+ s.description = "#{s.summary}"
16
+ s.has_rdoc = true
17
+
18
+ # files
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files test`.split("\n")
21
+ s.extra_rdoc_files = ["README.txt"]
22
+ s.rdoc_options = ["--main", "README.txt"]
23
+
24
+ Dir["bin/*"].map(&File.method(:basename))
25
+ # s.default_executable = "overdrive_metadata"
26
+ s.require_paths = ["lib"]
27
+
28
+ # Ruby version
29
+ s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
30
+
31
+ # dependencies
32
+ s.add_development_dependency "bundler"
33
+
34
+ begin
35
+ require "changelog"
36
+ rescue LoadError
37
+ warn "You have to have changelog gem installed for post install message"
38
+ else
39
+ s.post_install_message = CHANGELOG.new.version_changes
40
+ end
41
+
42
+ # RubyForge
43
+ s.rubyforge_project = %q{overdrive_metadata}
44
+ end
@@ -1,16 +1,16 @@
1
- require "shoulda"
2
- require "overdrive_metadata"
3
-
4
- class TestOverdriveMetadata < Test::Unit::TestCase
5
-
6
- context "Creating Overdrive records" do
7
-
8
- setup do
9
- @o = OverdriveMetadata.new('raw/test.xls')
10
- end
11
-
12
- # Write some tests ...
13
-
14
- end
15
-
16
- end
1
+ require "shoulda"
2
+ require "overdrive_metadata"
3
+
4
+ class TestOverdriveMetadata < Test::Unit::TestCase
5
+
6
+ context "Creating Overdrive records" do
7
+
8
+ setup do
9
+ @o = OverdriveMetadata.new('raw/test.xls')
10
+ end
11
+
12
+ # Write some tests ...
13
+
14
+ end
15
+
16
+ end
metadata CHANGED
@@ -1,20 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: overdrive_metadata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2.2
4
+ version: 1.0.2.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Mark Cooper
9
9
  autorequire:
10
10
  bindir: bin
11
- cert_chain:
12
- date: 2011-11-22 00:00:00.000000000 -08:00
13
- default_executable:
11
+ cert_chain: []
12
+ date: 2011-11-22 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: bundler
17
- requirement: &10219932 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
18
17
  none: false
19
18
  requirements:
20
19
  - - ! '>='
@@ -22,9 +21,15 @@ dependencies:
22
21
  version: '0'
23
22
  type: :development
24
23
  prerelease: false
25
- version_requirements: *10219932
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
26
30
  description: Generate marc records from Overdrive provided metadata spreadsheets.
27
- email: markchristophercooper@gmail.com
31
+ email: !binary |-
32
+ bWFya2NocmlzdG9waGVyY29vcGVyQGdtYWlsLmNvbQ==
28
33
  executables: []
29
34
  extensions: []
30
35
  extra_rdoc_files:
@@ -38,7 +43,6 @@ files:
38
43
  - overdrive_metadata.gemspec
39
44
  - raw/test.xls
40
45
  - test/test_overdrive_metadata.rb
41
- has_rdoc: true
42
46
  homepage: http://www.libcode.net
43
47
  licenses: []
44
48
  post_install_message:
@@ -61,7 +65,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
65
  version: '0'
62
66
  requirements: []
63
67
  rubyforge_project: overdrive_metadata
64
- rubygems_version: 1.5.2
68
+ rubygems_version: 1.8.24
65
69
  signing_key:
66
70
  specification_version: 3
67
71
  summary: Generate marc records from Overdrive provided metadata spreadsheets.