overdrive_metadata 1.0.2.2 → 1.0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,2 +1,2 @@
1
- *.gem
2
- *.lock
1
+ *.gem
2
+ *.lock
data/Gemfile CHANGED
@@ -1,4 +1,4 @@
1
- source 'http://rubygems.org'
2
-
3
- gem 'marc'
1
+ source 'http://rubygems.org'
2
+
3
+ gem 'marc'
4
4
  gem 'spreadsheet'
data/README.txt CHANGED
@@ -1,64 +1,71 @@
1
- = overdrive_metadata
2
-
3
- http://www.libcode.net
4
-
5
- == DESCRIPTION:
6
-
7
- Generate marc records from Overdrive provided metadata spreadsheets.
8
-
9
- == FEATURES/PROBLEMS:
10
-
11
- Most problems encountered owe to missing values in the Overdrive spreadsheet.
12
- These are mostly handled defensively but missing values in the spreadsheet
13
- may create unhandled exceptions in some cases.
14
- Have yet to see a Kindle eBook sample - may require tinkering.
15
-
16
- == SYNOPSIS:
17
-
18
- # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
19
- require 'overdrive_metadata'
20
- records = OverdriveMetadata.new('spreadsheets/111111.xls')
21
- puts "R: " + records.size.to_s # print number of records generated to console
22
- w = MARC::Writer.new('generated.mrc')
23
- records.each do |r|
24
- begin
25
- w.write r
26
- rescue
27
- puts "FAILED: " + r['245']['a']
28
- end
29
- end
30
- w.close
31
-
32
- == REQUIREMENTS:
33
-
34
- marc
35
- spreadsheet
36
-
37
- == INSTALL:
38
-
39
- sudo gem install overdrive_metadata
40
-
41
- == LICENSE:
42
-
43
- (The MIT License)
44
-
45
- Copyright (c) 2011 Mark Cooper
46
-
47
- Permission is hereby granted, free of charge, to any person obtaining
48
- a copy of this software and associated documentation files (the
49
- 'Software'), to deal in the Software without restriction, including
50
- without limitation the rights to use, copy, modify, merge, publish,
51
- distribute, sublicense, and/or sell copies of the Software, and to
52
- permit persons to whom the Software is furnished to do so, subject to
53
- the following conditions:
54
-
55
- The above copyright notice and this permission notice shall be
56
- included in all copies or substantial portions of the Software.
57
-
58
- THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
59
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
60
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
61
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
62
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
63
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
64
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ = overdrive_metadata
2
+
3
+ http://www.libcode.net
4
+
5
+ == DESCRIPTION:
6
+
7
+ Generate marc records from Overdrive provided metadata spreadsheets.
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ Much faster than previous versions -- no batch merging.
12
+ Fields are appended to a single record for rows with matching content urls.
13
+ Updated to account for ebook formats.
14
+ Now agency code must be passed in as second argument and headers are assumed to be present.
15
+
16
+ == SYNOPSIS:
17
+
18
+ require 'overdrive_metadata'
19
+
20
+ o = OverdriveMetadata.new('spreadsheets/111111.xls', 'JTH')
21
+ # o = OverdriveMetadata.new('spreadsheets/111111.xls', 'JTH', false) # if no header
22
+ records = o.map # this must be called to process the rows
23
+
24
+ puts "Fields read: #{o.count.to_s}" # count of spreadsheet rows processed
25
+ puts "R: #{records.size.to_s}" # print number of records generated to console
26
+
27
+ w = MARC::Writer.new('generated.mrc')
28
+
29
+ records.each do |r|
30
+ begin
31
+ w.write r
32
+ rescue
33
+ puts "FAILED: " + r['245']['a']
34
+ end
35
+ end
36
+
37
+ w.close
38
+
39
+ == REQUIREMENTS:
40
+
41
+ marc
42
+ spreadsheet
43
+
44
+ == INSTALL:
45
+
46
+ sudo gem install overdrive_metadata
47
+
48
+ == LICENSE:
49
+
50
+ (The MIT License)
51
+
52
+ Copyright (c) 2011 Mark Cooper
53
+
54
+ Permission is hereby granted, free of charge, to any person obtaining
55
+ a copy of this software and associated documentation files (the
56
+ 'Software'), to deal in the Software without restriction, including
57
+ without limitation the rights to use, copy, modify, merge, publish,
58
+ distribute, sublicense, and/or sell copies of the Software, and to
59
+ permit persons to whom the Software is furnished to do so, subject to
60
+ the following conditions:
61
+
62
+ The above copyright notice and this permission notice shall be
63
+ included in all copies or substantial portions of the Software.
64
+
65
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
66
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
67
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
68
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
69
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
70
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
71
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile CHANGED
@@ -1,28 +1,28 @@
1
- # -*- ruby -*-
2
-
3
- require 'rake/testtask'
4
-
5
- desc "Validate the gemspec"
6
- task :gemspec do
7
- gemspec.validate
8
- end
9
-
10
- desc "Build gem locally"
11
- task :build => :gemspec do
12
- system "gem build #{gemspec.name}.gemspec"
13
- FileUtils.mkdir_p "pkg"
14
- FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", "pkg"
15
- end
16
-
17
- desc "Install gem locally"
18
- task :install => :build do
19
- system "gem install pkg/#{gemspec.name}-#{gemspec.version}"
20
- end
21
-
22
- Rake::TestTask.new do |t|
23
- t.libs << "test"
24
- t.test_files = FileList['test/test*.rb']
25
- t.verbose = true
26
- end
27
-
28
- # vim: syntax=ruby
1
+ # -*- ruby -*-
2
+
3
+ require 'rake/testtask'
4
+
5
+ desc "Validate the gemspec"
6
+ task :gemspec do
7
+ gemspec.validate
8
+ end
9
+
10
+ desc "Build gem locally"
11
+ task :build => :gemspec do
12
+ system "gem build #{gemspec.name}.gemspec"
13
+ FileUtils.mkdir_p "pkg"
14
+ FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", "pkg"
15
+ end
16
+
17
+ desc "Install gem locally"
18
+ task :install => :build do
19
+ system "gem install pkg/#{gemspec.name}-#{gemspec.version}"
20
+ end
21
+
22
+ Rake::TestTask.new do |t|
23
+ t.libs << "test"
24
+ t.test_files = FileList['test/test*.rb']
25
+ t.verbose = true
26
+ end
27
+
28
+ # vim: syntax=ruby
@@ -1,342 +1,328 @@
1
- require 'marc'
2
- require 'spreadsheet'
3
-
4
- # Class to generate marc records from Overdrive provided metadata spreadsheet
5
- # Usage:
6
- # # Remove the header of the Overdrive spreadsheet and save it as .xls (not xml)
7
- # require 'overdrive_metadata'
8
- # records = OverdriveMetadata.new('spreadsheets/111111.xls')
9
- # puts "R: " + records.size.to_s # print number of records generated to console
10
- # w = MARC::Writer.new('generated.mrc')
11
- # records.each do |r|
12
- # begin
13
- # w.write r
14
- # rescue
15
- # puts "FAILED: " + r['245']['a']
16
- # end
17
- # end
18
- # w.close
19
-
20
- class OverdriveMetadata
21
- VERSION = '1.0.2.2'
22
-
23
- attr_reader :records
24
-
25
- DEF_FORMAT = 'eBook'
26
- OD_ORG = 'OverDrive, Inc.'
27
- OD_URL = 'http://www.overdrive.com'
28
- AGENCY = 'JTH' # Sonoma County Library
29
- ACCESS = 'Mode of access: World Wide Web.'
30
- URL_MSG = 'Click to download this resource.'
31
- DISCLAIM = 'Record generated from Overdrive metadata spreadsheet.'
32
- READ_ERR = 'Error, close file, check file path or try resaving file as .xls (not xml)'
33
-
34
- # add option for config. file in future
35
- HEADERS = {
36
- :oclc => 19,
37
- :date => 12,
38
- :time => 21,
39
- :isbn => 1,
40
- :author => 4,
41
- :title => 2,
42
- :place => 11,
43
- :publisher => 3,
44
- :requires => 10,
45
- :format => 9,
46
- :filesize => 8,
47
- :reader => 14,
48
- :title_src => 13,
49
- :summary => 15,
50
- :subjects => 5,
51
- :download => 7,
52
- :excerpt => 16,
53
- :cover => 17,
54
- :thumb => 18,
55
- }
56
-
57
- def initialize(metadata_file)
58
- begin
59
- @metadata = Spreadsheet.open(metadata_file).worksheet 0
60
- rescue Exception => ex
61
- raise READ_ERR
62
- end
63
- @records = []
64
- @count = 0
65
- map
66
- end
67
-
68
- def map
69
- @metadata.each do |row|
70
- @records << create_record(row)
71
- end
72
- @records.compact!
73
- merge_by_content_url
74
- end
75
-
76
- def create_record(data)
77
- @count += 1
78
- field = package_data(data)
79
- r = field[:format].match(/#{DEF_FORMAT}/) ? EBook.new : EAudioBook.new
80
- begin
81
- r.make_control_field('001', field[:oclc])
82
- r.make_006
83
- r.make_007
84
- r.make_fixed_field(field[:year], field[:month], field[:day])
85
- r.make_data_field('020', ' ', ' ', {'a' => field[:isbn] + ' ' + r.isbn}) unless field[:isbn].empty?
86
- r.make_data_field('037', ' ', ' ', {'b' => OD_ORG, 'n' => OD_URL})
87
- r.make_data_field('040', ' ', ' ', {'a' => AGENCY, 'c' => AGENCY})
88
- r.make_data_field('100', '1', ' ', {'a' => normalize_author(field[:author])})
89
- r.make_title(field[:title], field[:author])
90
- r.make_publication(field[:place], field[:publisher], field[:year])
91
- r.make_physical(field[:hours], field[:minutes])
92
- r.make_data_field('306', ' ', ' ', {'a' => field[:hours] + field[:minutes] + field[:seconds]})
93
- r.make_data_field('538', ' ', ' ', {'a' => ACCESS})
94
- r.make_data_field('538', ' ', ' ', {'a' => 'Requires ' + field[:requires] + '.'})
95
- r.make_data_field('500', ' ', ' ', {'a' => "#{field[:format]} (file size: #{field[:filesize]} MB)."})
96
- r.make_data_field('511', '0', ' ', {'a' => "Read by #{field[:reader]}."}) unless field[:reader].empty?
97
- r.make_data_field('520', ' ', ' ', {'a' => field[:summary]}) unless field[:summary].match(/^#+$/)
98
- r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
99
- r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'}) if r.is_a? EAudioBook
100
- r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."}) if r.is_a? EAudioBook
101
- field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => clean_string(s).strip + '.', '2' => 'local'}) }
102
- r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
103
- r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
104
- r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
105
- r.make_data_field('856', '4', '0', {'u' => field[:excerpt], 'y' => "Excerpt (#{field[:format]})."})
106
- r.make_data_field('856', '4', '2', {'u' => field[:cover], 'y' => "<img class=\"scl_mwthumb\" src=\"#{field[:thumb]}\" alt=\"Artwork for this title - #{field[:title].gsub(/[^A-Za-z ]/, '')}\" />"})
107
- r.make_data_field('907', ' ', ' ', {'a' => 'ER'})
108
- r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
109
- return r.record
110
- rescue Exception => ex
111
- puts @count.to_s + ': ' + "#{ex.message}\n" + ex.backtrace[0..2].join("\n")
112
- nil
113
- end
114
- end
115
-
116
- def package_data(data)
117
- values = {}
118
- values[:isbn] = data[HEADERS[:isbn]]
119
- values[:date] = data[HEADERS[:date]]
120
- values[:place] = data[HEADERS[:place]]
121
- values[:publisher] = data[HEADERS[:publisher]]
122
- values[:month] = ''
123
- values[:day] = ''
124
- if values[:date].match(/\d{1,2}\/\d{1,2}\/\d{4}/)
125
- month, day, year = values[:date].split '/'
126
- values[:month] = month
127
- values[:day] = day
128
- end
129
- values[:year] = values[:date].match(/\d{4}/).to_s # Fall-back
130
- values[:time] = data[HEADERS[:time]]
131
- hr, mn, sc = values[:time].split ':'
132
- values[:hours] = hr ? hr : ''
133
- values[:minutes] = mn ? mn : ''
134
- values[:seconds] = sc ? sc : ''
135
- values[:author] = clean_string data[HEADERS[:author]]
136
- values[:title] = clean_string data[HEADERS[:title]]
137
- values[:title_src] = data[HEADERS[:title_src]]
138
- values[:reader] = clean_string data[HEADERS[:reader]]
139
- values[:requires] = data[HEADERS[:requires]]
140
- values[:format] = data[HEADERS[:format]]
141
- values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
142
- values[:summary] = clean_string data[HEADERS[:summary]]
143
- values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
144
- values[:download] = data[HEADERS[:download]]
145
- values[:excerpt] = data[HEADERS[:excerpt]]
146
- values[:thumb] = data[HEADERS[:thumb]]
147
- values[:cover] = data[HEADERS[:cover]]
148
- values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
149
- values.each { |k, v| values[k] = '' if v.nil? }
150
- return values
151
- end
152
-
153
- def merge_by_content_url
154
- puts 'Merging (may take a while on large record sets) ...'
155
- content_url = Hash.new(0)
156
- @records.each do |record|
157
- content_url[record['856']['u']] += 1
158
- end
159
- content_url.delete_if { |k,v| v < 2 }
160
- content_url.keys.each do |url|
161
- rcds = @records.find_all { |r| r['856']['u'] == url }
162
- raise 'Found invalid number of duplicate records: ' + url unless rcds.size == 2
163
- file_note = rcds[1].find { |f| f.tag == '500' and f['a'] =~ /OverDrive (WMA|MP3) Audiobook/ }
164
- excerpt = rcds[1].find { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }
165
- if file_note and excerpt
166
- begin
167
- rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '500' }, file_note)
168
- rcds[0].fields.insert(rcds[0].fields.index { |f| f.tag == '856' and f['y'] =~ /Excerpt/ }, excerpt)
169
- @records.delete rcds[1]
170
- rescue Exception => ex
171
- puts url + ': ' + 'failed to merge'
172
- end
173
- end
174
- end
175
- @records
176
- end
177
-
178
- def make_id(id_string)
179
- return id_string[-9..-1].gsub(/\W/, '')
180
- end
181
-
182
- def normalize_author(author)
183
- return author if author.empty?
184
- author = author.split(',')[0]
185
- names = author.split ' '
186
- surname = names.last + ', '
187
- fullname = surname + names[0 .. names.length - 2].join(' ')
188
- fullname += '.' unless fullname[-1] == '.'
189
- return fullname
190
- end
191
-
192
- def clean_string(input_str)
193
- return input_str.gsub(/&lt;.*&gt;/, '').gsub(/&amp;/, '&').gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&#160;/, '').gsub(/&#235;/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
194
- end
195
-
196
- # Quickly turn 325645 {kb} into 318 {mb} etc. + 1 so not 0
197
-
198
- def kb_to_mb(size)
199
- return (size.to_f / 1024 + 1).to_i.to_s
200
- end
201
-
202
- class ERecord
203
-
204
- GMD = '[electronic resource]'
205
- DATE_ERR = 'Date information not present for fixed field'
206
- FIXF_ERR = 'Invalid fixed field created'
207
- TITL_ERR = 'Title data is missing for record'
208
-
209
- attr_reader :record
210
-
211
- def initialize
212
- @record = MARC::Record.new
213
- @ldr = record.leader
214
- @ldr[5] = 'n'
215
- @ldr[7] = 'm'
216
- @ldr[17] = 'M'
217
- @ldr[18] = 'a'
218
- fixed_field = ''
219
- end
220
-
221
- def make_control_field(tag, value)
222
- return nil if value.empty?
223
- @record.append MARC::ControlField.new(tag, value)
224
- end
225
-
226
- def make_data_field(tag, ind1, ind2, subfields)
227
- s = []
228
- subfields.each do |k,v|
229
- return nil if v.nil? or v.empty?
230
- s << MARC::Subfield.new(k, v)
231
- end
232
- @record.append MARC::DataField.new(tag, ind1, ind2, *s)
233
- end
234
-
235
- def make_fixed_field(year, month, day)
236
- raise DATE_ERR if year.empty?
237
- fixed_field = @fixed_field
238
- unless month.empty? and day.empty?
239
- month = '0' + month if month.length == 1
240
- day = '0' + day if day.length == 1
241
- fixed_field[0..5] = year[2..3] + month + day
242
- fixed_field[7..10] = year
243
- else
244
- fixed_field[7..10] = year
245
- end
246
- raise FIXF_ERR unless fixed_field.length == 40
247
- make_control_field('008', fixed_field)
248
- end
249
-
250
- def make_title(title, sor)
251
- raise TITL_ERR if title.empty?
252
- t_ind1 = sor.empty? ? '0' : '1'
253
- t_ind2 = non_filing_characters title
254
- subfields = {}
255
- subfields['a'] = title
256
- subfields['h'] = GMD + ' /'
257
- unless sor.empty?
258
- value = sor[-1] == '.' ? "by #{sor}" : "by #{sor}."
259
- subfields['c'] = value
260
- else
261
- subfields['h'].gsub!(/\s+\/$/, '.')
262
- end
263
- make_data_field('245', t_ind1, t_ind2, subfields)
264
- end
265
-
266
- def make_publication(place, publisher, year)
267
- return nil if place.empty? or publisher.empty? or year.empty?
268
- make_data_field('260', ' ', ' ', {'a' => "#{place} :", 'b' => "#{publisher},", 'c' => "#{year}."})
269
- end
270
-
271
- def non_filing_characters(title)
272
- return case
273
- when title.match(/^The /)
274
- '4'
275
- when title.match(/^An /)
276
- '3'
277
- when title.match(/^A /)
278
- '2'
279
- else
280
- '0'
281
- end
282
- end
283
-
284
- end
285
-
286
- class EBook < ERecord
287
-
288
- attr_reader :isbn, :subject
289
-
290
- def initialize
291
- super
292
- @ldr[6] = 'a'
293
- @fixed_field = ' s xxu|||| s 0||| eng d'
294
- @isbn = '(electronic bk. : OverDrive Electronic Book)'
295
- @subject = 'Downloadable ebooks.'
296
- # set leader
297
- end
298
-
299
- def make_006
300
- make_control_field('006', 'm d ')
301
- end
302
-
303
- def make_007
304
- make_control_field('007', 'cr nnu---|||||')
305
- end
306
-
307
- def make_physical(*args)
308
- make_data_field('300', ' ', ' ', {'a' => "1 online resource."})
309
- end
310
-
311
- end
312
-
313
- class EAudioBook < ERecord
314
-
315
- attr_reader :isbn, :subject
316
-
317
- def initialize
318
- super
319
- @ldr[6] = 'i'
320
- @fixed_field = ' s xxunnnn s eng d'
321
- @isbn = '(sound recording : OverDrive Audio Book)'
322
- @subject = 'Downloadable audiobooks.'
323
- # set leader
324
- end
325
-
326
- def make_006
327
- make_control_field('006', 'm h ')
328
- end
329
-
330
- def make_007
331
- make_control_field('007', 'sz usnnnnnnned')
332
- make_control_field('007', 'cr nna |||||')
333
- end
334
-
335
- def make_physical(hours, minutes)
336
- return nil if hours.empty? or minutes.empty?
337
- make_data_field('300', ' ', ' ', {'a' => "1 sound file (ca. #{hours} hr., #{minutes} min.) :", 'b' => 'digital.'})
338
- end
339
-
340
- end
341
-
1
+ require 'marc'
2
+ require 'spreadsheet'
3
+
4
+ class OverdriveMetadata
5
+ VERSION = '1.0.2.3'
6
+
7
+ attr_reader :records, :count
8
+
9
+ OD_ORG = 'OverDrive, Inc.'
10
+ OD_URL = 'http://www.overdrive.com'
11
+ ACCESS = 'Mode of access: World Wide Web.'
12
+ URL_MSG = 'Click to download this resource.'
13
+ DISCLAIM = 'Record generated from Overdrive metadata spreadsheet.'
14
+ READ_ERR = 'Error reading spreadsheet! Close, verfiy location and ensure .xls'
15
+
16
+ HEADERS = {
17
+ :oclc => 19,
18
+ :date => 12,
19
+ :time => 21,
20
+ :isbn => 1,
21
+ :author => 4,
22
+ :title => 2,
23
+ :place => 11,
24
+ :publisher => 3,
25
+ :requires => 10,
26
+ :format => 9,
27
+ :filesize => 8,
28
+ :reader => 14,
29
+ :title_src => 13,
30
+ :summary => 15,
31
+ :subjects => 5,
32
+ :download => 7,
33
+ :excerpt => 16,
34
+ :cover => 17,
35
+ :thumb => 18,
36
+ }
37
+
38
+ def initialize(metadata_file, agency, header = true, ebook_regex = nil)
39
+ begin
40
+ @metadata = Spreadsheet.open(metadata_file).worksheet 0
41
+ rescue Exception => ex
42
+ raise READ_ERR
43
+ end
44
+ @agency = agency
45
+ @ebook_regex = ebook_regex.nil? ? '(ebook|epub|kindle|pdf)' : ebook_regex
46
+ @records = []
47
+ @count = 0
48
+ @header = header
49
+ @content_rec = {}
50
+ end
51
+
52
+ def map
53
+ @metadata.each do |row|
54
+ if @header
55
+ @header = false
56
+ next
57
+ end
58
+
59
+ begin
60
+ @records << create_record(row)
61
+ rescue Exception => ex
62
+ puts "#{@count.to_s}\t#{ex.message}"
63
+ next
64
+ end
65
+
66
+ end
67
+
68
+ @records.compact!
69
+ @records
70
+ end
71
+
72
+ def create_record(data)
73
+ @count += 1
74
+ field = package_data(data)
75
+
76
+ if @content_rec.has_key? field[:download]
77
+ record = @content_rec[field[:download]]
78
+ format = MARC::DataField.new('500', ' ', ' ', ['a', "#{field[:format]} (file size: #{field[:filesize]} MB)."])
79
+ record.fields.insert(record.fields.index { |f| f.tag == '500' }, format)
80
+
81
+ unless field[:excerpt].empty?
82
+ excerpt = MARC::DataField.new('856', '4', '0', ['u', field[:excerpt]], ['y', "Excerpt (#{field[:format]})."])
83
+ record.fields.insert(record.fields.index { |f| f.tag == '856' }, excerpt)
84
+ end
85
+ return nil
86
+ end
87
+
88
+ r = field[:format].match(/#{@ebook_regex}/i) ? EBook.new : EAudioBook.new
89
+ r.make_control_field('001', field[:oclc])
90
+ r.make_006
91
+ r.make_007
92
+ r.make_fixed_field(field[:year], field[:month], field[:day])
93
+ r.make_data_field('020', ' ', ' ', {'a' => field[:isbn] + ' ' + r.isbn}) unless field[:isbn].empty?
94
+ r.make_data_field('037', ' ', ' ', {'b' => OD_ORG, 'n' => OD_URL})
95
+ r.make_data_field('040', ' ', ' ', {'a' => @agency, 'c' => @agency})
96
+ r.make_data_field('100', '1', ' ', {'a' => normalize_author(field[:author])})
97
+ r.make_title(field[:title], field[:author])
98
+ r.make_publication(field[:place], field[:publisher], field[:year])
99
+ r.make_physical(field[:hours], field[:minutes])
100
+ r.make_data_field('306', ' ', ' ', {'a' => field[:hours] + field[:minutes] + field[:seconds]})
101
+ r.make_data_field('538', ' ', ' ', {'a' => ACCESS})
102
+ r.make_data_field('538', ' ', ' ', {'a' => 'Requires ' + field[:requires] + '.'})
103
+ r.make_data_field('500', ' ', ' ', {'a' => "#{field[:format]} (file size: #{field[:filesize]} MB)."})
104
+ r.make_data_field('511', '0', ' ', {'a' => "Read by #{field[:reader]}."}) unless field[:reader].empty?
105
+ r.make_data_field('520', ' ', ' ', {'a' => field[:summary]}) unless field[:summary].match(/^#+$/)
106
+ r.make_data_field('500', ' ', ' ', {'a' => "Title from: #{field[:title_src]}."})
107
+
108
+ if r.is_a? EAudioBook
109
+ r.make_data_field('500', ' ', ' ', {'a' => 'Unabridged.'})
110
+ r.make_data_field('500', ' ', ' ', {'a' => "Duration: #{field[:hours]} hr., #{field[:minutes]} min."})
111
+ end
112
+
113
+ field[:subjects].each { |s| r.make_data_field('655', ' ', '7', {'a' => clean_string(s).strip + '.', '2' => 'local'}) }
114
+ r.make_data_field('655', ' ', '7', {'a' => r.subject, '2' => 'local'})
115
+ r.make_data_field('700', '1', ' ', {'a' => normalize_author(field[:reader])})
116
+ r.make_data_field('856', '4', '0', {'u' => field[:excerpt], 'y' => "Excerpt (#{field[:format]})."})
117
+ r.make_data_field('856', '4', '0', {'u' => field[:download], 'y' => URL_MSG})
118
+
119
+ if @agency == 'JTH'
120
+ r.make_data_field('856', '4', '2', {'u' => field[:cover], 'y' => "<img class=\"scl_mwthumb\" src=\"#{field[:thumb]}\" alt=\"Artwork for this title - #{field[:title].gsub(/[^A-Za-z ]/, '')}\" />"})
121
+ r.make_data_field('907', ' ', ' ', {'a' => 'ER'})
122
+ end
123
+
124
+ r.make_data_field('991', ' ', ' ', {'a' => DISCLAIM})
125
+
126
+ @content_rec[field[:download]] = r.record
127
+ return r.record
128
+ end
129
+
130
+ def package_data(data)
131
+ values = {}
132
+ values[:isbn] = data[HEADERS[:isbn]]
133
+ values[:date] = data[HEADERS[:date]]
134
+ values[:place] = data[HEADERS[:place]]
135
+ values[:publisher] = data[HEADERS[:publisher]]
136
+ values[:month] = ''
137
+ values[:day] = ''
138
+ if values[:date].match(/\d{1,2}\/\d{1,2}\/\d{2,4}/)
139
+ month, day, year = values[:date].split '/'
140
+ values[:month] = month
141
+ values[:day] = day
142
+ values[:year] = year.size == 4 ? year : "20#{year}"
143
+ end
144
+ values[:year] = values[:date].match(/\d{4}/).to_s unless year # fall-back
145
+ values[:time] = data[HEADERS[:time]]
146
+ hr, mn, sc = values[:time].split ':'
147
+ values[:hours] = hr ? hr : ''
148
+ values[:minutes] = mn ? mn : ''
149
+ values[:seconds] = sc ? sc : ''
150
+ values[:author] = clean_string data[HEADERS[:author]]
151
+ values[:title] = clean_string data[HEADERS[:title]]
152
+ values[:title_src] = data[HEADERS[:title_src]]
153
+ values[:reader] = clean_string data[HEADERS[:reader]]
154
+ values[:requires] = data[HEADERS[:requires]]
155
+ values[:format] = data[HEADERS[:format]]
156
+ values[:filesize] = kb_to_mb(data[HEADERS[:filesize]])
157
+ values[:summary] = clean_string data[HEADERS[:summary]]
158
+ values[:subjects] = data[HEADERS[:subjects]].split(',') rescue []
159
+ values[:download] = data[HEADERS[:download]]
160
+ values[:excerpt] = data[HEADERS[:excerpt]]
161
+ values[:thumb] = data[HEADERS[:thumb]]
162
+ values[:cover] = data[HEADERS[:cover]]
163
+ values[:oclc] = data[HEADERS[:oclc]].to_s.empty? ? 'ovr' + make_id(values[:download]) : 'ocn' + data[HEADERS[:oclc]]
164
+ values.each { |k, v| values[k] = '' if v.nil? }
165
+ return values
166
+ end
167
+
168
+ def make_id(id_string)
169
+ return id_string[-9..-1].gsub(/\W/, '')
170
+ end
171
+
172
+ def normalize_author(author)
173
+ return author if author.empty?
174
+ author = author.split(',')[0]
175
+ names = author.split ' '
176
+ surname = names.last + ', '
177
+ fullname = surname + names[0 .. names.length - 2].join(' ')
178
+ fullname += '.' unless fullname[-1] == '.'
179
+ return fullname
180
+ end
181
+
182
+ def clean_string(input_str)
183
+ return input_str.gsub(/&lt;.*&gt;/, '').gsub(/&amp;/, '&').gsub(/&quot;/, '"').gsub(/&apos;/, "'").gsub(/&#160;/, '').gsub(/&#235;/, 'e').gsub(/<\/?[^>]*>/, '').gsub(/\s{2}+/, ' ').strip rescue ''
184
+ end
185
+
186
+ def kb_to_mb(size)
187
+ return (size.to_f / 1024 + 1).to_i.to_s
188
+ end
189
+
190
+ class ERecord
191
+
192
+ GMD = '[electronic resource]'
193
+ DATE_ERR = 'Date information not present for fixed field'
194
+ FIXF_ERR = 'Invalid fixed field created'
195
+ TITL_ERR = 'Title data is missing for record'
196
+
197
+ attr_reader :record
198
+
199
+ def initialize
200
+ @record = MARC::Record.new
201
+ @ldr = record.leader
202
+ @ldr[5] = 'n'
203
+ @ldr[7] = 'm'
204
+ @ldr[17] = 'M'
205
+ @ldr[18] = 'a'
206
+ @fixed_field = ''
207
+ end
208
+
209
+ def make_control_field(tag, value)
210
+ return nil if value.empty?
211
+ @record.append MARC::ControlField.new(tag, value)
212
+ end
213
+
214
+ def make_data_field(tag, ind1, ind2, subfields)
215
+ s = []
216
+ subfields.each do |k,v|
217
+ return nil if v.nil? or v.empty?
218
+ s << MARC::Subfield.new(k, v)
219
+ end
220
+ @record.append MARC::DataField.new(tag, ind1, ind2, *s)
221
+ end
222
+
223
+ def make_fixed_field(year, month, day)
224
+ raise DATE_ERR if year.empty?
225
+ fixed_field = @fixed_field
226
+ unless month.empty? and day.empty?
227
+ month = '0' + month if month.length == 1
228
+ day = '0' + day if day.length == 1
229
+ fixed_field[0..5] = year[2..3] + month + day
230
+ fixed_field[7..10] = year
231
+ else
232
+ fixed_field[7..10] = year
233
+ end
234
+ raise FIXF_ERR unless fixed_field.length == 40
235
+ make_control_field('008', fixed_field)
236
+ end
237
+
238
+ def make_title(title, sor)
239
+ raise TITL_ERR if title.empty?
240
+ t_ind1 = sor.empty? ? '0' : '1'
241
+ t_ind2 = non_filing_characters title
242
+ subfields = {}
243
+ subfields['a'] = title
244
+ subfields['h'] = GMD + ' /'
245
+ unless sor.empty?
246
+ value = sor[-1] == '.' ? "by #{sor}" : "by #{sor}."
247
+ subfields['c'] = value
248
+ else
249
+ subfields['h'].gsub!(/\s+\/$/, '.')
250
+ end
251
+ make_data_field('245', t_ind1, t_ind2, subfields)
252
+ end
253
+
254
+ def make_publication(place, publisher, year)
255
+ return nil if place.empty? or publisher.empty? or year.empty?
256
+ make_data_field('260', ' ', ' ', {'a' => "#{place} :", 'b' => "#{publisher},", 'c' => "#{year}."})
257
+ end
258
+
259
+ def non_filing_characters(title)
260
+ return case
261
+ when title.match(/^The /)
262
+ '4'
263
+ when title.match(/^An /)
264
+ '3'
265
+ when title.match(/^A /)
266
+ '2'
267
+ else
268
+ '0'
269
+ end
270
+ end
271
+
272
+ end
273
+
274
+ class EBook < ERecord
275
+
276
+ attr_reader :isbn, :subject
277
+
278
+ def initialize
279
+ super
280
+ @ldr[6] = 'a'
281
+ @fixed_field = ' s xxu|||| s 0||| eng d'
282
+ @isbn = '(electronic bk. : OverDrive Electronic Book)'
283
+ @subject = 'Downloadable ebooks.'
284
+ end
285
+
286
+ def make_006
287
+ make_control_field('006', 'm d ')
288
+ end
289
+
290
+ def make_007
291
+ make_control_field('007', 'cr nnu---|||||')
292
+ end
293
+
294
+ def make_physical(*args)
295
+ make_data_field('300', ' ', ' ', {'a' => "1 online resource."})
296
+ end
297
+
298
+ end
299
+
300
+ class EAudioBook < ERecord
301
+
302
+ attr_reader :isbn, :subject
303
+
304
+ def initialize
305
+ super
306
+ @ldr[6] = 'i'
307
+ @fixed_field = ' s xxunnnn s eng d'
308
+ @isbn = '(sound recording : OverDrive Audio Book)'
309
+ @subject = 'Downloadable audiobooks.'
310
+ end
311
+
312
+ def make_006
313
+ make_control_field('006', 'm h ')
314
+ end
315
+
316
+ def make_007
317
+ make_control_field('007', 'sz usnnnnnnned')
318
+ make_control_field('007', 'cr nna |||||')
319
+ end
320
+
321
+ def make_physical(hours, minutes)
322
+ return nil if hours.empty? or minutes.empty?
323
+ make_data_field('300', ' ', ' ', {'a' => "1 sound file (ca. #{hours} hr., #{minutes} min.) :", 'b' => 'digital.'})
324
+ end
325
+
326
+ end
327
+
342
328
  end
@@ -1,45 +1,44 @@
1
- #!/usr/bin/env gem build
2
- # encoding: utf-8
3
-
4
- require "base64"
5
-
6
- Gem::Specification.new do |s|
7
- s.name = %q{overdrive_metadata}
8
- s.version = '1.0.2.2'
9
- s.authors = ["Mark Cooper"]
10
- s.date = %q{2011-11-22}
11
- s.homepage = %q{http://www.libcode.net}
12
- s.email = Base64.decode64("bWFya2NocmlzdG9waGVyY29vcGVyQGdtYWlsLmNvbQ==\n")
13
-
14
- s.summary = "Generate marc records from Overdrive provided metadata spreadsheets."
15
- s.description = "#{s.summary}"
16
- s.cert_chain = nil
17
- s.has_rdoc = true
18
-
19
- # files
20
- s.files = `git ls-files`.split("\n")
21
- s.test_files = `git ls-files test`.split("\n")
22
- s.extra_rdoc_files = ["README.txt"]
23
- s.rdoc_options = ["--main", "README.txt"]
24
-
25
- Dir["bin/*"].map(&File.method(:basename))
26
- # s.default_executable = "overdrive_metadata"
27
- s.require_paths = ["lib"]
28
-
29
- # Ruby version
30
- s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
31
-
32
- # dependencies
33
- s.add_development_dependency "bundler"
34
-
35
- begin
36
- require "changelog"
37
- rescue LoadError
38
- warn "You have to have changelog gem installed for post install message"
39
- else
40
- s.post_install_message = CHANGELOG.new.version_changes
41
- end
42
-
43
- # RubyForge
44
- s.rubyforge_project = %q{overdrive_metadata}
45
- end
1
+ #!/usr/bin/env gem build
2
+ # encoding: utf-8
3
+
4
+ require "base64"
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{overdrive_metadata}
8
+ s.version = '1.0.2.3'
9
+ s.authors = ["Mark Cooper"]
10
+ s.date = %q{2011-11-22}
11
+ s.homepage = %q{http://www.libcode.net}
12
+ s.email = Base64.decode64("bWFya2NocmlzdG9waGVyY29vcGVyQGdtYWlsLmNvbQ==\n")
13
+
14
+ s.summary = "Generate marc records from Overdrive provided metadata spreadsheets."
15
+ s.description = "#{s.summary}"
16
+ s.has_rdoc = true
17
+
18
+ # files
19
+ s.files = `git ls-files`.split("\n")
20
+ s.test_files = `git ls-files test`.split("\n")
21
+ s.extra_rdoc_files = ["README.txt"]
22
+ s.rdoc_options = ["--main", "README.txt"]
23
+
24
+ Dir["bin/*"].map(&File.method(:basename))
25
+ # s.default_executable = "overdrive_metadata"
26
+ s.require_paths = ["lib"]
27
+
28
+ # Ruby version
29
+ s.required_ruby_version = ::Gem::Requirement.new("~> 1.9")
30
+
31
+ # dependencies
32
+ s.add_development_dependency "bundler"
33
+
34
+ begin
35
+ require "changelog"
36
+ rescue LoadError
37
+ warn "You have to have changelog gem installed for post install message"
38
+ else
39
+ s.post_install_message = CHANGELOG.new.version_changes
40
+ end
41
+
42
+ # RubyForge
43
+ s.rubyforge_project = %q{overdrive_metadata}
44
+ end
@@ -1,16 +1,16 @@
1
- require "shoulda"
2
- require "overdrive_metadata"
3
-
4
- class TestOverdriveMetadata < Test::Unit::TestCase
5
-
6
- context "Creating Overdrive records" do
7
-
8
- setup do
9
- @o = OverdriveMetadata.new('raw/test.xls')
10
- end
11
-
12
- # Write some tests ...
13
-
14
- end
15
-
16
- end
1
+ require "shoulda"
2
+ require "overdrive_metadata"
3
+
4
+ class TestOverdriveMetadata < Test::Unit::TestCase
5
+
6
+ context "Creating Overdrive records" do
7
+
8
+ setup do
9
+ @o = OverdriveMetadata.new('raw/test.xls')
10
+ end
11
+
12
+ # Write some tests ...
13
+
14
+ end
15
+
16
+ end
metadata CHANGED
@@ -1,20 +1,19 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: overdrive_metadata
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2.2
4
+ version: 1.0.2.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Mark Cooper
9
9
  autorequire:
10
10
  bindir: bin
11
- cert_chain:
12
- date: 2011-11-22 00:00:00.000000000 -08:00
13
- default_executable:
11
+ cert_chain: []
12
+ date: 2011-11-22 00:00:00.000000000 Z
14
13
  dependencies:
15
14
  - !ruby/object:Gem::Dependency
16
15
  name: bundler
17
- requirement: &10219932 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
18
17
  none: false
19
18
  requirements:
20
19
  - - ! '>='
@@ -22,9 +21,15 @@ dependencies:
22
21
  version: '0'
23
22
  type: :development
24
23
  prerelease: false
25
- version_requirements: *10219932
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
26
30
  description: Generate marc records from Overdrive provided metadata spreadsheets.
27
- email: markchristophercooper@gmail.com
31
+ email: !binary |-
32
+ bWFya2NocmlzdG9waGVyY29vcGVyQGdtYWlsLmNvbQ==
28
33
  executables: []
29
34
  extensions: []
30
35
  extra_rdoc_files:
@@ -38,7 +43,6 @@ files:
38
43
  - overdrive_metadata.gemspec
39
44
  - raw/test.xls
40
45
  - test/test_overdrive_metadata.rb
41
- has_rdoc: true
42
46
  homepage: http://www.libcode.net
43
47
  licenses: []
44
48
  post_install_message:
@@ -61,7 +65,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
65
  version: '0'
62
66
  requirements: []
63
67
  rubyforge_project: overdrive_metadata
64
- rubygems_version: 1.5.2
68
+ rubygems_version: 1.8.24
65
69
  signing_key:
66
70
  specification_version: 3
67
71
  summary: Generate marc records from Overdrive provided metadata spreadsheets.