enju_loc 0.1.0.pre3 → 0.1.0.pre4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad30cc6720622987403d9a1fa651d5ef300b33bf
4
- data.tar.gz: 89ff96361a63d8501792ce07083619941fbc6582
3
+ metadata.gz: 1d6f7ddad0b22f2b23741d4e91c73f27ad22827c
4
+ data.tar.gz: daa82be27e7d64aa3fef0a31d3b46ec5294721dc
5
5
  SHA512:
6
- metadata.gz: f6f3baeff7a8b82f8c7d2cd552b39f1bf42a64e8378a5759984ae3becfd9f1744a8e29cbff89438848caeab60dfac1fb1be4c53424518588022542c0274c27e1
7
- data.tar.gz: 62b0e0829caab8c2658e3c33ac88834cffbfa8d813fb74526bca1be9c4f4bf136b0b87a4c1b7665b45fb6daef61a1774cee9caee715ebb091b9ee09b6bb761e3
6
+ metadata.gz: 34eec96ca78e77b7f9304d1644bed1ca1ca7eb0e522ae6760e2f2e6cf4cc819ff4666f70cb0a2cd30162e13f5dd41dc5c97b547dd55671d0697265aea49fbe3e
7
+ data.tar.gz: 3971a831b8c62dee85e6ce3afde3901be6228371d78c5bbb44fdd5a4f5bb488495ea84f64401ecfc44d19b01bb3fa8d7919e9d615407fc14e3a47b6af8c0e0bc
@@ -1,3 +1,11 @@
1
1
  = EnjuLoc
2
+ {<img src="https://secure.travis-ci.org/next-l/enju_loc.png?branch=1.1" alt="Build Status" />}[http://travis-ci.org/next-l/enju_loc]
3
+ {<img src="https://coveralls.io/repos/next-l/enju_loc/badge.png?branch=1.1" alt="Coverage Status" />}[https://coveralls.io/r/next-l/enju_loc?branch=1.1]
4
+ {<img src="https://hakiri.io/github/next-l/enju_loc/1.1.svg" alt="security" />}[https://hakiri.io/github/next-l/enju_loc/1.1]
2
5
 
3
- This project rocks and uses MIT-LICENSE.
6
+ This project rocks and uses MIT-LICENSE.
7
+
8
+ == 製作者・貢献者 (Authors and contributors)
9
+ * {TAKAKU, Masao}[https://github.com/masao] ({@tmasao}[https://twitter.com/tmasao])
10
+ * {TANABE, Kosuke}[https://github.com/nabeta] ({@nabeta}[https://twitter.com/nabeta])
11
+ * {Project Next-L}[http://www.next-l.jp] ({@ProjectNextL}[https://twitter.com/ProjectNextL])
@@ -85,7 +85,9 @@ class LocSearch
85
85
  end
86
86
 
87
87
  def self.import_from_sru_response(lccn)
88
- identifier = Identifier.where(body: lccn, identifier_type_id: IdentifierType.where(name: 'lccn').first_or_create.id).first
88
+ identifier_type_lccn = IdentifierType.where(name: 'lccn').first
89
+ identifier_type_lccn = IdentifierType.create!(name: 'lccn') unless identifier_type_lccn
90
+ identifier = Identifier.where(body: lccn, identifier_type_id: identifier_type_lccn.id).first
89
91
  return if identifier
90
92
  url = make_sru_request_uri("bath.lccn=#{ lccn }")
91
93
  response = Nokogiri::XML(Faraday.get(url).body).at( '//zs:recordData', {"zs" => "http://www.loc.gov/zing/srw/"} )
@@ -1,4 +1,3 @@
1
- require 'enju_seed'
2
1
  require 'nokogiri'
3
2
  require 'faraday'
4
3
 
@@ -21,8 +21,8 @@ module EnjuLoc
21
21
 
22
22
  def import_record_from_loc_isbn(options)
23
23
  #if options[:isbn]
24
- lisbn = Lisbn.new(options[:isbn])
25
- raise EnjuLoc::InvalidIsbn unless lisbn.valid?
24
+ lisbn = Lisbn.new(options[:isbn])
25
+ raise EnjuLoc::InvalidIsbn unless lisbn.valid?
26
26
  #end
27
27
 
28
28
  manifestation = Manifestation.find_by_isbn(lisbn.isbn)
@@ -36,7 +36,9 @@ module EnjuLoc
36
36
  NS = {"mods"=>"http://www.loc.gov/mods/v3"}
37
37
  def import_record_from_loc( doc )
38
38
  record_identifier = doc.at( '//mods:recordInfo/mods:recordIdentifier', NS ).try(:content)
39
- loc_identifier = Identifier.where(:body => record_identifier, :identifier_type_id => IdentifierType.where(:name => 'loc_identifier').first_or_create.id).first
39
+ identifier_type = IdentifierType.where(name: 'loc_identifier').first
40
+ identifier_type = IdentifierType.create!(name: 'loc_identifier') unless identifier_type
41
+ loc_identifier = Identifier.where(:body => record_identifier, :identifier_type_id => identifier_type.id).first
40
42
  return loc_identifier.manifestation if loc_identifier
41
43
 
42
44
  publishers = []
@@ -67,25 +69,25 @@ module EnjuLoc
67
69
  issn = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn"]',NS).try(:content).to_s)
68
70
  issn_l = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn-l"]',NS).try(:content).to_s)
69
71
 
70
- types = get_mods_carrier_and_content_types( doc )
71
- content_type = types[ :content_type ]
72
- carrier_type = types[ :carrier_type ]
72
+ types = get_mods_carrier_and_content_types( doc )
73
+ content_type = types[ :content_type ]
74
+ carrier_type = types[ :carrier_type ]
73
75
 
74
- record_identifier = doc.at('//mods:recordInfo/mods:recordIdentifier',NS).try(:content)
76
+ record_identifier = doc.at('//mods:recordInfo/mods:recordIdentifier',NS).try(:content)
75
77
  description = doc.xpath('//mods:abstract',NS).collect(&:content).join("\n")
76
78
  edition_string = doc.at('//mods:edition',NS).try(:content)
77
79
  extent = get_mods_extent(doc)
78
- note = get_mods_note(doc)
80
+ note = get_mods_note(doc)
79
81
  frequency = get_mods_frequency(doc)
80
- issuance = doc.at('//mods:issuance',NS).try(:content)
81
- is_serial = true if issuance == "serial"
82
+ issuance = doc.at('//mods:issuance',NS).try(:content)
83
+ is_serial = true if issuance == "serial"
82
84
  statement_of_responsibility = get_mods_statement_of_responsibility(doc)
83
- access_address = get_mods_access_address(doc)
84
- publication_place = get_mods_publication_place(doc)
85
+ access_address = get_mods_access_address(doc)
86
+ publication_place = get_mods_publication_place(doc)
85
87
 
86
88
  manifestation = nil
87
89
  Agent.transaction do
88
- creator_agents = Agent.import_agents(creators)
90
+ creator_agents = Agent.import_agents(creators)
89
91
  publisher_agents = Agent.import_agents(publishers)
90
92
 
91
93
  manifestation = Manifestation.new(
@@ -95,51 +97,51 @@ module EnjuLoc
95
97
  :language_id => language_id,
96
98
  :pub_date => date,
97
99
  :description => description,
98
- :edition_string => edition_string,
100
+ :edition_string => edition_string,
99
101
  :statement_of_responsibility => statement_of_responsibility,
100
102
  :start_page => extent[:start_page],
101
103
  :end_page => extent[:end_page],
102
104
  :height => extent[:height],
103
- :access_address => access_address,
104
- :note => note,
105
- :publication_place => publication_place,
106
- :serial => is_serial,
105
+ :access_address => access_address,
106
+ :note => note,
107
+ :publication_place => publication_place,
108
+ :serial => is_serial,
107
109
  )
108
110
  identifier = {}
109
111
  if isbn
110
112
  identifier[:isbn] = Identifier.new(:body => isbn)
111
- identifier[:isbn].identifier_type = IdentifierType.where(:name => 'isbn').first_or_create
113
+ identifier[:isbn].identifier_type = IdentifierType.where(name: 'isbn').first || IdnetifierType.create!(name: 'isbn')
112
114
  end
113
115
  if loc_identifier
114
116
  identifier[:loc_identifier] = Identifier.new(:body => loc_identifier)
115
- identifier[:loc_identifier].identifier_type = IdentifierType.where(:name => 'loc_identifier').first_or_create
117
+ identifier[:loc_identifier].identifier_type = IdentifierType.where(name: 'loc_identifier').first || IdnetifierType.create!(name: 'loc_identifier')
116
118
  end
117
119
  if lccn
118
120
  identifier[:lccn] = Identifier.new(:body => lccn)
119
- identifier[:lccn].identifier_type = IdentifierType.where(:name => 'lccn').first_or_create
121
+ identifier[:lccn].identifier_type = IdentifierType.where(name: 'lccn').first || IdentifierType.create!(name: 'lccn')
120
122
  end
121
123
  if issn
122
124
  identifier[:issn] = Identifier.new(:body => issn)
123
- identifier[:issn].identifier_type = IdentifierType.where(:name => 'issn').first_or_create
125
+ identifier[:issn].identifier_type = IdentifierType.where(name: 'issn').first || IdentifierType.create!(name: 'issn')
124
126
  end
125
127
  if issn_l
126
128
  identifier[:issn_l] = Identifier.new(:body => issn_l)
127
- identifier[:issn_l].identifier_type = IdentifierType.where(:name => 'issn_l').first_or_create
129
+ identifier[:issn_l].identifier_type = IdentifierType.where(name: 'issn_l').first || IdentifierType.create!(name: 'issn_l')
128
130
  end
129
131
  manifestation.carrier_type = carrier_type if carrier_type
130
132
  manifestation.manifestation_content_type = content_type if content_type
131
- manifestation.frequency = frequency if frequency
133
+ manifestation.frequency = frequency if frequency
132
134
  if manifestation.save
133
135
  identifier.each do |k, v|
134
136
  manifestation.identifiers << v if v.valid?
135
137
  end
136
138
  manifestation.publishers << publisher_agents
137
- manifestation.creators << creator_agents
138
- create_subject_related_elements(doc, manifestation)
139
+ manifestation.creators << creator_agents
140
+ create_subject_related_elements(doc, manifestation)
139
141
  create_series_statement(doc, manifestation)
140
- if is_serial
142
+ if is_serial
141
143
  create_series_master(doc, manifestation)
142
- end
144
+ end
143
145
  end
144
146
  end
145
147
  return manifestation
@@ -147,31 +149,31 @@ module EnjuLoc
147
149
 
148
150
  private
149
151
  def create_subject_related_elements(doc, manifestation)
150
- subjects = get_mods_subjects(doc)
151
- classifications = get_mods_classifications(doc)
152
- if defined?(EnjuSubject)
153
- subject_heading_type = SubjectHeadingType.where(:name => 'lcsh').first_or_create
152
+ subjects = get_mods_subjects(doc)
153
+ classifications = get_mods_classifications(doc)
154
+ if defined?(EnjuSubject)
155
+ subject_heading_type = SubjectHeadingType.where(name: 'lcsh').first || SubjectHeadingType.create!(name: 'lcsh')
154
156
  subjects.each do |term|
155
157
  subject = Subject.where(:term => term[:term]).first
156
158
  unless subject
157
159
  subject = Subject.new(term)
158
160
  subject.subject_heading_type = subject_heading_type
159
- subject.subject_type = SubjectType.where(:name => 'concept').first_or_create
161
+ subject.subject_type = SubjectType.where(name: 'concept').first || SubjectType.create!(name: 'concept')
160
162
  end
161
163
  manifestation.subjects << subject
162
164
  end
163
165
  if classifications
164
- classification_type = ClassificationType.where(:name => 'ddc').first_or_create
165
- classifications.each do |ddc|
166
+ classification_type = ClassificationType.where(name: 'ddc').first || ClassificationType.create!(name: 'ddc')
167
+ classifications.each do |ddc|
166
168
  classification = Classification.where(:category => ddc).first
167
- unless classification
169
+ unless classification
168
170
  classification = Classification.new(:category => ddc)
169
171
  classification.classification_type = classification_type
170
- end
172
+ end
171
173
  manifestation.classifications << classification if classification.valid?
172
174
  end
173
175
  end
174
- end
176
+ end
175
177
  end
176
178
 
177
179
  def create_series_statement(doc, manifestation)
@@ -181,7 +183,7 @@ module EnjuLoc
181
183
  series_title = title.split(';')[0].strip
182
184
  end
183
185
  if series_title
184
- series_statement = SeriesStatement.where(:original_title => series_title).first_or_create
186
+ series_statement = SeriesStatement.where(:original_title => series_title).first || SeriesStatement.create!(original_title: series_title)
185
187
  if series_statement.try(:save)
186
188
  manifestation.series_statements << series_statement
187
189
  end
@@ -191,335 +193,337 @@ module EnjuLoc
191
193
 
192
194
  def create_series_master(doc, manifestation)
193
195
  titles = get_mods_titles(doc)
194
- series_statement = SeriesStatement.new(
195
- :original_title => titles[:original_title],
196
- :title_alternative => titles[:title_alternative],
197
- :series_master => true,
198
- )
199
- if series_statement.try(:save)
200
- manifestation.series_statements << series_statement
201
- end
196
+ series_statement = SeriesStatement.new(
197
+ :original_title => titles[:original_title],
198
+ :title_alternative => titles[:title_alternative],
199
+ :series_master => true,
200
+ )
201
+ if series_statement.try(:save)
202
+ manifestation.series_statements << series_statement
203
+ end
202
204
  end
203
205
 
204
206
  def get_mods_titles(doc)
205
- original_title = ""
206
- title_alternatives = []
207
- doc.xpath('//mods:mods/mods:titleInfo',NS).each do |e|
208
- type = e.attributes["type"].try(:content)
209
- case type
210
- when "alternative", "translated", "abbreviated", "uniform"
211
- title_alternatives << e.at('./mods:title',NS).content
212
- else
213
- nonsort = e.at('./mods:nonSort',NS).try(:content)
214
- original_title << nonsort if nonsort
215
- original_title << e.at('./mods:title',NS).try(:content)
216
- subtitle = e.at('./mods:subTitle',NS).try(:content)
217
- original_title << " : #{ subtitle }" if subtitle
218
- partnumber = e.at('./mods:partNumber',NS).try(:content)
219
- partname = e.at('./mods:partName',NS).try(:content)
220
- partname = [ partnumber, partname ].compact.join( ": " )
221
- original_title << ". #{ partname }" unless partname.blank?
222
- end
223
- end
224
- { :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
207
+ original_title = ""
208
+ title_alternatives = []
209
+ doc.xpath('//mods:mods/mods:titleInfo',NS).each do |e|
210
+ type = e.attributes["type"].try(:content)
211
+ case type
212
+ when "alternative", "translated", "abbreviated", "uniform"
213
+ title_alternatives << e.at('./mods:title',NS).content
214
+ else
215
+ nonsort = e.at('./mods:nonSort',NS).try(:content)
216
+ original_title << nonsort if nonsort
217
+ original_title << e.at('./mods:title',NS).try(:content)
218
+ subtitle = e.at('./mods:subTitle',NS).try(:content)
219
+ original_title << " : #{ subtitle }" if subtitle
220
+ partnumber = e.at('./mods:partNumber',NS).try(:content)
221
+ partname = e.at('./mods:partName',NS).try(:content)
222
+ partname = [ partnumber, partname ].compact.join( ": " )
223
+ original_title << ". #{ partname }" unless partname.blank?
224
+ end
225
225
  end
226
+ { :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
227
+ end
226
228
 
227
- def get_mods_language(doc)
228
- language = doc.at('//mods:language/mods:languageTerm[@authority="iso639-2b"]',NS).try(:content)
229
- end
229
+ def get_mods_language(doc)
230
+ language = doc.at('//mods:language/mods:languageTerm[@authority="iso639-2b"]',NS).try(:content)
231
+ end
230
232
 
231
- def get_mods_access_address(doc)
232
- access_address = nil
233
- url = doc.at('//mods:location/mods:url',NS)
234
- if url
235
- usage = url.attributes["usage"].try(:content)
236
- case usage
237
- when "primary display", "primary"
238
- access_address = url.try(:content)
239
- end
240
- end
241
- access_address
233
+ def get_mods_access_address(doc)
234
+ access_address = nil
235
+ url = doc.at('//mods:location/mods:url',NS)
236
+ if url
237
+ usage = url.attributes["usage"].try(:content)
238
+ case usage
239
+ when "primary display", "primary"
240
+ access_address = url.try(:content)
242
241
  end
242
+ end
243
+ access_address
244
+ end
243
245
 
244
- def get_mods_publication_place(doc)
245
- place = doc.at('//mods:originInfo/mods:place/mods:placeTerm[@type="text"]',NS).try(:content)
246
- end
246
+ def get_mods_publication_place(doc)
247
+ place = doc.at('//mods:originInfo/mods:place/mods:placeTerm[@type="text"]',NS).try(:content)
248
+ end
247
249
 
248
- def get_mods_extent(doc)
249
- extent = doc.at('//mods:extent',NS).try(:content)
250
- value = {:start_page => nil, :end_page => nil, :height => nil}
251
- if extent
252
- extent = extent.split(';')
253
- page = extent[0].try(:strip)
254
- if page =~ /(\d+)\s*(p|page)/
255
- value[:start_page] = 1
256
- value[:end_page] = $1.dup.to_i
257
- end
258
- height = extent[1].try(:strip)
259
- if height =~ /(\d+)\s*cm/
260
- value[:height] = $1.dup.to_i
261
- end
262
- end
263
- value
250
+ def get_mods_extent(doc)
251
+ extent = doc.at('//mods:extent',NS).try(:content)
252
+ value = {:start_page => nil, :end_page => nil, :height => nil}
253
+ if extent
254
+ extent = extent.split(';')
255
+ page = extent[0].try(:strip)
256
+ if page =~ /(\d+)\s*(p|page)/
257
+ value[:start_page] = 1
258
+ value[:end_page] = $1.dup.to_i
264
259
  end
265
-
266
- def get_mods_statement_of_responsibility(doc)
267
- note = doc.at('//mods:note[@type="statement of responsibility"]',NS).try(:content)
268
- if note.blank?
269
- note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
270
- end
271
- note
260
+ height = extent[1].try(:strip)
261
+ if height =~ /(\d+)\s*cm/
262
+ value[:height] = $1.dup.to_i
272
263
  end
273
- def get_mods_note(doc)
274
- notes = []
275
- doc.xpath('//mods:note',NS).each do |note|
276
- type = note.attributes['type'].try(:content)
277
- next if type == "statement of responsibility"
278
- note_s = note.try( :content )
279
- notes << note_s unless note_s.blank?
280
- end
281
- if notes.empty?
282
- nil
283
- else
284
- notes.join( ";\n" )
285
- end
264
+ end
265
+ value
266
+ end
267
+
268
+ def get_mods_statement_of_responsibility(doc)
269
+ note = doc.at('//mods:note[@type="statement of responsibility"]',NS).try(:content)
270
+ if note.blank?
271
+ note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
272
+ end
273
+ note
274
+ end
275
+
276
+ def get_mods_note(doc)
277
+ notes = []
278
+ doc.xpath('//mods:note',NS).each do |note|
279
+ type = note.attributes['type'].try(:content)
280
+ next if type == "statement of responsibility"
281
+ note_s = note.try( :content )
282
+ notes << note_s unless note_s.blank?
283
+ end
284
+ if notes.empty?
285
+ nil
286
+ else
287
+ notes.join( ";\n" )
288
+ end
289
+ end
290
+
291
+ def get_mods_date_of_publication(doc)
292
+ dates = []
293
+ doc.xpath('//mods:dateIssued',NS).each do |pub_date|
294
+ pub_date = pub_date.content.sub( /\A[cp]/, '' )
295
+ next unless pub_date =~ /^\d+(-\d\d?){0,2}$/
296
+ date = pub_date.split('-')
297
+ if date[0] and date[1]
298
+ dates << sprintf("%04d-%02d", date[0], date[1])
299
+ else
300
+ dates << pub_date
286
301
  end
287
- def get_mods_date_of_publication(doc)
288
- dates = []
289
- doc.xpath('//mods:dateIssued',NS).each do |pub_date|
290
- pub_date = pub_date.content.sub( /\A[cp]/, '' )
291
- next unless pub_date =~ /^\d+(-\d\d?){0,2}$/
292
- date = pub_date.split('-')
293
- if date[0] and date[1]
294
- dates << sprintf("%04d-%02d", date[0], date[1])
295
- else
296
- dates << pub_date
297
- end
302
+ end
303
+ dates.compact.first
304
+ end
305
+
306
+ # derived from marcfrequency: http://www.loc.gov/standards/valuelist/marcfrequency.html
307
+ MARCFREQUENCY = [
308
+ "Continuously updated",
309
+ "Daily",
310
+ "Semiweekly",
311
+ "Three times a week",
312
+ "Weekly",
313
+ "Biweekly",
314
+ "Three times a month",
315
+ "Semimonthly",
316
+ "Monthly",
317
+ "Bimonthly",
318
+ "Quarterly",
319
+ "Three times a year",
320
+ "Semiannual",
321
+ "Annual",
322
+ "Biennial",
323
+ "Triennial",
324
+ "Completely irregular",
325
+ ]
326
+ def get_mods_frequency(doc)
327
+ frequencies = []
328
+ doc.xpath('//mods:frequency',NS).each do |freq|
329
+ frequency = freq.try(:content)
330
+ MARCFREQUENCY.each do |freq_regex|
331
+ if /\A(#{freq_regex})/ =~ frequency
332
+ frequency_name = freq_regex.downcase.gsub( /\s+/, "_" )
333
+ frequencies << Frequency.where( name: frequency_name ).first
298
334
  end
299
- dates.compact.first
300
335
  end
336
+ end
337
+ frequencies.compact.first
338
+ end
301
339
 
302
- # derived from marcfrequency: http://www.loc.gov/standards/valuelist/marcfrequency.html
303
- MARCFREQUENCY = [
304
- "Continuously updated",
305
- "Daily",
306
- "Semiweekly",
307
- "Three times a week",
308
- "Weekly",
309
- "Biweekly",
310
- "Three times a month",
311
- "Semimonthly",
312
- "Monthly",
313
- "Bimonthly",
314
- "Quarterly",
315
- "Three times a year",
316
- "Semiannual",
317
- "Annual",
318
- "Biennial",
319
- "Triennial",
320
- "Completely irregular",
321
- ]
322
- def get_mods_frequency(doc)
323
- frequencies = []
324
- doc.xpath('//mods:frequency',NS).each do |freq|
325
- frequency = freq.try(:content)
326
- MARCFREQUENCY.each do |freq_regex|
327
- if /\A(#{freq_regex})/ =~ frequency
328
- frequency_name = freq_regex.downcase.gsub( /\s+/, "_" )
329
- frequencies << Frequency.where( :name => frequency_name ).first
330
- end
331
- end
332
- end
333
- frequencies.compact.first
334
- end
340
+ def get_mods_creators(doc)
341
+ creators = []
342
+ doc.xpath('/mods:mods/mods:name',NS).each do |creator|
343
+ creators << {
344
+ :full_name => creator.xpath('./mods:namePart',NS).collect(&:content).join( ", " ),
345
+ }
346
+ end
347
+ creators.uniq
348
+ end
335
349
 
336
- def get_mods_creators(doc)
337
- creators = []
338
- doc.xpath('/mods:mods/mods:name',NS).each do |creator|
339
- creators << {
340
- :full_name => creator.xpath('./mods:namePart',NS).collect(&:content).join( ", " ),
341
- }
350
+ # TODO:only LCSH-based parsing...
351
+ def get_mods_subjects(doc)
352
+ subjects = []
353
+ doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
354
+ subject = []
355
+ s.xpath('./*',NS).each do |subelement|
356
+ type = subelement.name
357
+ case subelement.name
358
+ when "topic", "geographic", "genre", "temporal"
359
+ subject << { :type => type , :term => subelement.try(:content) }
360
+ when "titleInfo"
361
+ subject << { :type => type, :term => subelement.at('./mods:title',NS).try(:content) }
362
+ when "name"
363
+ name = subelement.xpath('./mods:namePart',NS).map{|e| e.try(:content) }.join( ", " )
364
+ subject << { :type => type, :term => name }
342
365
  end
343
- creators.uniq
344
366
  end
345
-
346
- # TODO:only LCSH-based parsing...
347
- def get_mods_subjects(doc)
348
- subjects = []
349
- doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
350
- subject = []
351
- s.xpath('./*',NS).each do |subelement|
352
- type = subelement.name
353
- case subelement.name
354
- when "topic", "geographic", "genre", "temporal"
355
- subject << { :type => type , :term => subelement.try(:content) }
356
- when "titleInfo"
357
- subject << { :type => type, :term => subelement.at('./mods:title',NS).try(:content) }
358
- when "name"
359
- name = subelement.xpath('./mods:namePart',NS).map{|e| e.try(:content) }.join( ", " )
360
- subject << { :type => type, :term => name }
361
- end
362
- end
363
- next if subject.compact.empty?
364
- if subject.size > 1 and subject[0][:type] == "name" and subject[1][:type] == "titleInfo"
365
- subject[0..1] = { :term => subject[0..1].map{|e|e[:term]}.join( ". " ) }
366
- end
367
- subjects << {
368
- :term => subject.map{|e|e[:term]}.compact.join( "--" )
369
- }
370
- end
371
- subjects
367
+ next if subject.compact.empty?
368
+ if subject.size > 1 and subject[0][:type] == "name" and subject[1][:type] == "titleInfo"
369
+ subject[0..1] = { :term => subject[0..1].map{|e|e[:term]}.join( ". " ) }
372
370
  end
371
+ subjects << {
372
+ :term => subject.map{|e|e[:term]}.compact.join( "--" )
373
+ }
374
+ end
375
+ subjects
376
+ end
373
377
 
374
- # TODO:support only DDC.
375
- def get_mods_classifications(doc)
376
- classifications = []
377
- doc.xpath('//mods:classification[@authority="ddc"]',NS).each do|c|
378
- ddc = c.content
379
- if ddc
380
- classifications << ddc.split(/[^\d\.]/).first.try(:strip)
381
- end
382
- end
383
- classifications.compact
378
+ # TODO:support only DDC.
379
+ def get_mods_classifications(doc)
380
+ classifications = []
381
+ doc.xpath('//mods:classification[@authority="ddc"]',NS).each do|c|
382
+ ddc = c.content
383
+ if ddc
384
+ classifications << ddc.split(/[^\d\.]/).first.try(:strip)
384
385
  end
386
+ end
387
+ classifications.compact
388
+ end
385
389
 
386
- def get_mods_carrier_and_content_types(doc)
387
- carrier_type = content_type = nil
388
- doc.xpath('//mods:form',NS).each do |e|
389
- authority = e.attributes['authority'].try(:content)
390
- case authority
391
- when "gmd"
392
- case e.content
393
- when "electronic resource"
394
- carrier_type = CarrierType.where(:name => 'online_resource').first
395
- when "videorecording", "motion picture", "game"
396
- content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
397
- when "sound recording"
398
- content_type = ContentType.where(:name => 'performed_music').first
399
- when "graphic", "picture"
400
- content_type = ContentType.where(:name => 'still_image').first
401
- #TODO: Enju needs more specific mappings...
402
- when "art original",
403
- "microscope slides",
404
- "art reproduction",
405
- "model",
406
- "chart",
407
- "diorama",
408
- "realia",
409
- "filmstrip",
410
- "slide",
411
- "flash card",
412
- "technical drawing",
413
- "toy",
414
- "kit",
415
- "transparency",
416
- "microform"
417
- content_type = ContentType.where(:name => 'other').first
418
- end
419
- when "marcsmd" # cf.http://www.loc.gov/standards/valuelist/marcsmd.html
420
- case e.content
421
- when "text", "large print", "regular print", "text in looseleaf binder"
422
- carrier_type = CarrierType.where(:name => 'volume').first
423
- content_type = ContentType.where(:name => 'text').first
424
- when "braille"
425
- carrier_type = CarrierType.where(:name => 'volume').first
426
- content_type = ContentType.where(:name => 'tactile_text').first
427
- when "videodisc"
428
- carrier_type = CarrierType.where(:name => 'videodisc').first
429
- content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
430
- when "videorecording", "videocartridge", "videocassette", "videoreel"
431
- carrier_type = CarrierType.where(:name => 'other').first
432
- content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
433
- when "electronic resource"
434
- carrier_type = CarrierType.where(:name => 'online_resource').first
435
- when "chip cartridge", "computer optical disc cartridge", "magnetic disk", "magneto-optical disc", "optical disc", "remote", "tape cartridge", "tape cassette", "tape reel"
436
- #carrier_type = CarrierType.where(:name => 'other').first
437
- when "motion picture", "film cartridge", "film cassette", "film reel"
438
- content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
439
- when "sound recording", "cylinder", "roll", "sound cartridge", "sound cassette","sound-tape reel", "sound-track film", "wire recording"
440
- content_type = ContentType.where(:name => 'performed_music').first
441
- when "sound disc"
442
- content_type = ContentType.where(:name => 'performed_music').first
443
- carrier_type = CarrierType.where(:name => 'audio_disc').first
444
- when "nonprojected graphic", "chart", "collage", "drawing", "flash card", "painting", "photomechanical print", "photonegative", "photoprint", "picture", "print", "technical drawing", "projected graphic", "filmslip", "filmstrip cartridge", "filmstrip roll", "other filmstrip type ", "slide", "transparency"
445
- content_type = ContentType.where(:name => 'still_image').first
446
- when "tactile material", "braille", "tactile, with no writing system"
447
- content_type = ContentType.where(:name => 'tactile_text').first
448
- #TODO: Enju needs more specific mappings...
449
- when "globe",
450
- "celestial globe",
451
- "earth moon globe",
452
- "planetary or lunar globe",
453
- "terrestrial globe",
454
- "map",
455
- "atlas",
456
- "diagram",
457
- "map",
458
- "model",
459
- "profile",
460
- "remote-sensing image",
461
- "section",
462
- "view",
463
- "microform",
464
- "aperture card",
465
- "microfiche",
466
- "microfiche cassette",
467
- "microfilm cartridge",
468
- "microfilm cassette",
469
- "microfilm reel",
470
- "microopaque",
471
- "combination",
472
- "moon"
473
- content_type = ContentType.where(:name => 'other').first
474
- end
475
- when "marcform" # cf. http://www.loc.gov/standards/valuelist/marcform.html
476
- case e.content
477
- when "print", "large print"
478
- carrier_type = CarrierType.where(:name => 'volume').first
479
- content_type = ContentType.where(:name => 'text').first
480
- when "electronic"
481
- carrier_type = CarrierType.where(:name => 'online_resource').first
482
- when "braille"
483
- content_type = ContentType.where(:name => 'tactile_text').first
484
- #TODO: Enju needs more specific mappings...
485
- when "microfiche", "microfilm"
486
- content_type = ContentType.where(:name => 'other').first
487
- end
488
- end
489
- end
390
+ def get_mods_carrier_and_content_types(doc)
391
+ carrier_type = content_type = nil
392
+ doc.xpath('//mods:form',NS).each do |e|
393
+ authority = e.attributes['authority'].try(:content)
394
+ case authority
395
+ when "gmd"
396
+ case e.content
397
+ when "electronic resource"
398
+ carrier_type = CarrierType.where(name: 'online_resource').first
399
+ when "videorecording", "motion picture", "game"
400
+ content_type = ContentType.where(name: 'two_dimensional_moving_image').first
401
+ when "sound recording"
402
+ content_type = ContentType.where(name: 'performed_music').first
403
+ when "graphic", "picture"
404
+ content_type = ContentType.where(name: 'still_image').first
405
+ #TODO: Enju needs more specific mappings...
406
+ when "art original",
407
+ "microscope slides",
408
+ "art reproduction",
409
+ "model",
410
+ "chart",
411
+ "diorama",
412
+ "realia",
413
+ "filmstrip",
414
+ "slide",
415
+ "flash card",
416
+ "technical drawing",
417
+ "toy",
418
+ "kit",
419
+ "transparency",
420
+ "microform"
421
+ content_type = ContentType.where(name: 'other').first
422
+ end
423
+ when "marcsmd" # cf.http://www.loc.gov/standards/valuelist/marcsmd.html
424
+ case e.content
425
+ when "text", "large print", "regular print", "text in looseleaf binder"
426
+ carrier_type = CarrierType.where(name: 'volume').first
427
+ content_type = ContentType.where(name: 'text').first
428
+ when "braille"
429
+ carrier_type = CarrierType.where(name: 'volume').first
430
+ content_type = ContentType.where(name: 'tactile_text').first
431
+ when "videodisc"
432
+ carrier_type = CarrierType.where(name: 'videodisc').first
433
+ content_type = ContentType.where(name: 'two_dimensional_moving_image').first
434
+ when "videorecording", "videocartridge", "videocassette", "videoreel"
435
+ carrier_type = CarrierType.where(name: 'other').first
436
+ content_type = ContentType.where(name: 'two_dimensional_moving_image').first
437
+ when "electronic resource"
438
+ carrier_type = CarrierType.where(name: 'online_resource').first
439
+ when "chip cartridge", "computer optical disc cartridge", "magnetic disk", "magneto-optical disc", "optical disc", "remote", "tape cartridge", "tape cassette", "tape reel"
440
+ #carrier_type = CarrierType.where(name: 'other').first
441
+ when "motion picture", "film cartridge", "film cassette", "film reel"
442
+ content_type = ContentType.where(name: 'two_dimensional_moving_image').first
443
+ when "sound recording", "cylinder", "roll", "sound cartridge", "sound cassette","sound-tape reel", "sound-track film", "wire recording"
444
+ content_type = ContentType.where(name: 'performed_music').first
445
+ when "sound disc"
446
+ content_type = ContentType.where(name: 'performed_music').first
447
+ carrier_type = CarrierType.where(name: 'audio_disc').first
448
+ when "nonprojected graphic", "chart", "collage", "drawing", "flash card", "painting", "photomechanical print", "photonegative", "photoprint", "picture", "print", "technical drawing", "projected graphic", "filmslip", "filmstrip cartridge", "filmstrip roll", "other filmstrip type ", "slide", "transparency"
449
+ content_type = ContentType.where(name: 'still_image').first
450
+ when "tactile material", "braille", "tactile, with no writing system"
451
+ content_type = ContentType.where(name: 'tactile_text').first
452
+ #TODO: Enju needs more specific mappings...
453
+ when "globe",
454
+ "celestial globe",
455
+ "earth moon globe",
456
+ "planetary or lunar globe",
457
+ "terrestrial globe",
458
+ "map",
459
+ "atlas",
460
+ "diagram",
461
+ "map",
462
+ "model",
463
+ "profile",
464
+ "remote-sensing image",
465
+ "section",
466
+ "view",
467
+ "microform",
468
+ "aperture card",
469
+ "microfiche",
470
+ "microfiche cassette",
471
+ "microfilm cartridge",
472
+ "microfilm cassette",
473
+ "microfilm reel",
474
+ "microopaque",
475
+ "combination",
476
+ "moon"
477
+ content_type = ContentType.where(name: 'other').first
478
+ end
479
+ when "marcform" # cf. http://www.loc.gov/standards/valuelist/marcform.html
480
+ case e.content
481
+ when "print", "large print"
482
+ carrier_type = CarrierType.where(name: 'volume').first
483
+ content_type = ContentType.where(name: 'text').first
484
+ when "electronic"
485
+ carrier_type = CarrierType.where(name: 'online_resource').first
486
+ when "braille"
487
+ content_type = ContentType.where(name: 'tactile_text').first
488
+ #TODO: Enju needs more specific mappings...
489
+ when "microfiche", "microfilm"
490
+ content_type = ContentType.where(name: 'other').first
491
+ end
492
+ end
493
+ end
490
494
  doc.xpath('//mods:genre',NS).each do |e|
491
495
  authority = e.attributes['authority'].try(:content)
492
- case authority
493
- when "rdacontent"
494
- content_type = ContentType.where(:name => e.content.gsub(/\W+/, "_")).first
495
- content_type = ContentType.where(:name => 'other').first unless content_type
496
+ case authority
497
+ when "rdacontent"
498
+ content_type = ContentType.where(name: e.content.gsub(/\W+/, "_")).first
499
+ content_type = ContentType.where(name: 'other').first unless content_type
496
500
  end
497
501
  end
498
- type = doc.at('//mods:typeOfResource',NS).try(:content)
499
- case type
500
- when "text"
501
- content_type = ContentType.where(:name => 'text').first
502
- when "sound recording"
503
- content_type = ContentType.where(:name => 'sounds').first
504
- when"sound recording-musical"
505
- content_type = ContentType.where(:name => 'performed_music').first
506
- when"sound recording-nonmusical"
507
- content_type = ContentType.where(:name => 'spoken_word').first
508
- when "moving image"
509
- content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
510
- when "software, multimedia"
511
- content_type = ContentType.where(:name => 'other').first
512
- when "cartographic "
513
- content_type = ContentType.where(:name => 'cartographic_image').first
514
- when "notated music"
515
- content_type = ContentType.where(:name => 'notated_music').first
516
- when "still image"
517
- content_type = ContentType.where(:name => 'still_image').first
518
- when "three dimensional object"
519
- content_type = ContentType.where(:name => 'other').first
520
- when "mixed material"
521
- content_type = ContentType.where(:name => 'other').first
522
- end
502
+ type = doc.at('//mods:typeOfResource',NS).try(:content)
503
+ case type
504
+ when "text"
505
+ content_type = ContentType.where(name: 'text').first
506
+ when "sound recording"
507
+ content_type = ContentType.where(name: 'sounds').first
508
+ when"sound recording-musical"
509
+ content_type = ContentType.where(name: 'performed_music').first
510
+ when"sound recording-nonmusical"
511
+ content_type = ContentType.where(name: 'spoken_word').first
512
+ when "moving image"
513
+ content_type = ContentType.where(name: 'two_dimensional_moving_image').first
514
+ when "software, multimedia"
515
+ content_type = ContentType.where(name: 'other').first
516
+ when "cartographic "
517
+ content_type = ContentType.where(name: 'cartographic_image').first
518
+ when "notated music"
519
+ content_type = ContentType.where(name: 'notated_music').first
520
+ when "still image"
521
+ content_type = ContentType.where(name: 'still_image').first
522
+ when "three dimensional object"
523
+ content_type = ContentType.where(name: 'other').first
524
+ when "mixed material"
525
+ content_type = ContentType.where(name: 'other').first
526
+ end
523
527
  { :carrier_type => carrier_type, :content_type => content_type }
524
528
  end
525
529
  end