enju_loc 0.1.0.pre3 → 0.1.0.pre4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.rdoc +9 -1
- data/app/models/loc_search.rb +3 -1
- data/lib/enju_loc/engine.rb +0 -1
- data/lib/enju_loc/loc_search.rb +346 -342
- data/lib/enju_loc/version.rb +1 -1
- data/spec/dummy/bin/bundle +3 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/bin/setup +29 -0
- data/spec/dummy/config/database.yml +21 -16
- data/spec/dummy/db/migrate/005_create_manifestations.rb +0 -1
- data/spec/dummy/db/migrate/20140519170214_create_resource_import_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140519171220_create_import_request_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140524020735_create_agent_import_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140524074813_create_user_import_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140614141500_create_resource_export_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140709113905_create_user_export_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20141003181336_add_full_name_transcription_to_profile.rb +5 -0
- data/spec/dummy/db/migrate/20141003182825_add_date_of_birth_to_profile.rb +5 -0
- data/spec/dummy/db/schema.rb +414 -413
- data/spec/fixtures/users.yml +1 -1
- data/spec/spec_helper.rb +2 -0
- metadata +42 -20
- data/spec/dummy/db/test.sqlite3 +0 -0
- data/spec/dummy/script/rails +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d6f7ddad0b22f2b23741d4e91c73f27ad22827c
|
4
|
+
data.tar.gz: daa82be27e7d64aa3fef0a31d3b46ec5294721dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34eec96ca78e77b7f9304d1644bed1ca1ca7eb0e522ae6760e2f2e6cf4cc819ff4666f70cb0a2cd30162e13f5dd41dc5c97b547dd55671d0697265aea49fbe3e
|
7
|
+
data.tar.gz: 3971a831b8c62dee85e6ce3afde3901be6228371d78c5bbb44fdd5a4f5bb488495ea84f64401ecfc44d19b01bb3fa8d7919e9d615407fc14e3a47b6af8c0e0bc
|
data/README.rdoc
CHANGED
@@ -1,3 +1,11 @@
|
|
1
1
|
= EnjuLoc
|
2
|
+
{<img src="https://secure.travis-ci.org/next-l/enju_loc.png?branch=1.1" alt="Build Status" />}[http://travis-ci.org/next-l/enju_loc]
|
3
|
+
{<img src="https://coveralls.io/repos/next-l/enju_loc/badge.png?branch=1.1" alt="Coverage Status" />}[https://coveralls.io/r/next-l/enju_loc?branch=1.1]
|
4
|
+
{<img src="https://hakiri.io/github/next-l/enju_loc/1.1.svg" alt="security" />}[https://hakiri.io/github/next-l/enju_loc/1.1]
|
2
5
|
|
3
|
-
This project rocks and uses MIT-LICENSE.
|
6
|
+
This project rocks and uses MIT-LICENSE.
|
7
|
+
|
8
|
+
== 製作者・貢献者 (Authors and contributors)
|
9
|
+
* {TAKAKU, Masao}[https://github.com/masao] ({@tmasao}[https://twitter.com/tmasao])
|
10
|
+
* {TANABE, Kosuke}[https://github.com/nabeta] ({@nabeta}[https://twitter.com/nabeta])
|
11
|
+
* {Project Next-L}[http://www.next-l.jp] ({@ProjectNextL}[https://twitter.com/ProjectNextL])
|
data/app/models/loc_search.rb
CHANGED
@@ -85,7 +85,9 @@ class LocSearch
|
|
85
85
|
end
|
86
86
|
|
87
87
|
def self.import_from_sru_response(lccn)
|
88
|
-
|
88
|
+
identifier_type_lccn = IdentifierType.where(name: 'lccn').first
|
89
|
+
identifier_type_lccn = IdentifierType.create!(name: 'lccn') unless identifier_type_lccn
|
90
|
+
identifier = Identifier.where(body: lccn, identifier_type_id: identifier_type_lccn.id).first
|
89
91
|
return if identifier
|
90
92
|
url = make_sru_request_uri("bath.lccn=#{ lccn }")
|
91
93
|
response = Nokogiri::XML(Faraday.get(url).body).at( '//zs:recordData', {"zs" => "http://www.loc.gov/zing/srw/"} )
|
data/lib/enju_loc/engine.rb
CHANGED
data/lib/enju_loc/loc_search.rb
CHANGED
@@ -21,8 +21,8 @@ module EnjuLoc
|
|
21
21
|
|
22
22
|
def import_record_from_loc_isbn(options)
|
23
23
|
#if options[:isbn]
|
24
|
-
|
25
|
-
|
24
|
+
lisbn = Lisbn.new(options[:isbn])
|
25
|
+
raise EnjuLoc::InvalidIsbn unless lisbn.valid?
|
26
26
|
#end
|
27
27
|
|
28
28
|
manifestation = Manifestation.find_by_isbn(lisbn.isbn)
|
@@ -36,7 +36,9 @@ module EnjuLoc
|
|
36
36
|
NS = {"mods"=>"http://www.loc.gov/mods/v3"}
|
37
37
|
def import_record_from_loc( doc )
|
38
38
|
record_identifier = doc.at( '//mods:recordInfo/mods:recordIdentifier', NS ).try(:content)
|
39
|
-
|
39
|
+
identifier_type = IdentifierType.where(name: 'loc_identifier').first
|
40
|
+
identifier_type = IdentifierType.create!(name: 'loc_identifier') unless identifier_type
|
41
|
+
loc_identifier = Identifier.where(:body => record_identifier, :identifier_type_id => identifier_type.id).first
|
40
42
|
return loc_identifier.manifestation if loc_identifier
|
41
43
|
|
42
44
|
publishers = []
|
@@ -67,25 +69,25 @@ module EnjuLoc
|
|
67
69
|
issn = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn"]',NS).try(:content).to_s)
|
68
70
|
issn_l = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn-l"]',NS).try(:content).to_s)
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
types = get_mods_carrier_and_content_types( doc )
|
73
|
+
content_type = types[ :content_type ]
|
74
|
+
carrier_type = types[ :carrier_type ]
|
73
75
|
|
74
|
-
|
76
|
+
record_identifier = doc.at('//mods:recordInfo/mods:recordIdentifier',NS).try(:content)
|
75
77
|
description = doc.xpath('//mods:abstract',NS).collect(&:content).join("\n")
|
76
78
|
edition_string = doc.at('//mods:edition',NS).try(:content)
|
77
79
|
extent = get_mods_extent(doc)
|
78
|
-
|
80
|
+
note = get_mods_note(doc)
|
79
81
|
frequency = get_mods_frequency(doc)
|
80
|
-
|
81
|
-
|
82
|
+
issuance = doc.at('//mods:issuance',NS).try(:content)
|
83
|
+
is_serial = true if issuance == "serial"
|
82
84
|
statement_of_responsibility = get_mods_statement_of_responsibility(doc)
|
83
|
-
|
84
|
-
|
85
|
+
access_address = get_mods_access_address(doc)
|
86
|
+
publication_place = get_mods_publication_place(doc)
|
85
87
|
|
86
88
|
manifestation = nil
|
87
89
|
Agent.transaction do
|
88
|
-
|
90
|
+
creator_agents = Agent.import_agents(creators)
|
89
91
|
publisher_agents = Agent.import_agents(publishers)
|
90
92
|
|
91
93
|
manifestation = Manifestation.new(
|
@@ -95,51 +97,51 @@ module EnjuLoc
|
|
95
97
|
:language_id => language_id,
|
96
98
|
:pub_date => date,
|
97
99
|
:description => description,
|
98
|
-
|
100
|
+
:edition_string => edition_string,
|
99
101
|
:statement_of_responsibility => statement_of_responsibility,
|
100
102
|
:start_page => extent[:start_page],
|
101
103
|
:end_page => extent[:end_page],
|
102
104
|
:height => extent[:height],
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
105
|
+
:access_address => access_address,
|
106
|
+
:note => note,
|
107
|
+
:publication_place => publication_place,
|
108
|
+
:serial => is_serial,
|
107
109
|
)
|
108
110
|
identifier = {}
|
109
111
|
if isbn
|
110
112
|
identifier[:isbn] = Identifier.new(:body => isbn)
|
111
|
-
identifier[:isbn].identifier_type = IdentifierType.where(:name
|
113
|
+
identifier[:isbn].identifier_type = IdentifierType.where(name: 'isbn').first || IdnetifierType.create!(name: 'isbn')
|
112
114
|
end
|
113
115
|
if loc_identifier
|
114
116
|
identifier[:loc_identifier] = Identifier.new(:body => loc_identifier)
|
115
|
-
identifier[:loc_identifier].identifier_type = IdentifierType.where(:name
|
117
|
+
identifier[:loc_identifier].identifier_type = IdentifierType.where(name: 'loc_identifier').first || IdnetifierType.create!(name: 'loc_identifier')
|
116
118
|
end
|
117
119
|
if lccn
|
118
120
|
identifier[:lccn] = Identifier.new(:body => lccn)
|
119
|
-
identifier[:lccn].identifier_type = IdentifierType.where(:name
|
121
|
+
identifier[:lccn].identifier_type = IdentifierType.where(name: 'lccn').first || IdentifierType.create!(name: 'lccn')
|
120
122
|
end
|
121
123
|
if issn
|
122
124
|
identifier[:issn] = Identifier.new(:body => issn)
|
123
|
-
identifier[:issn].identifier_type = IdentifierType.where(:name
|
125
|
+
identifier[:issn].identifier_type = IdentifierType.where(name: 'issn').first || IdentifierType.create!(name: 'issn')
|
124
126
|
end
|
125
127
|
if issn_l
|
126
128
|
identifier[:issn_l] = Identifier.new(:body => issn_l)
|
127
|
-
identifier[:issn_l].identifier_type = IdentifierType.where(:name
|
129
|
+
identifier[:issn_l].identifier_type = IdentifierType.where(name: 'issn_l').first || IdentifierType.create!(name: 'issn_l')
|
128
130
|
end
|
129
131
|
manifestation.carrier_type = carrier_type if carrier_type
|
130
132
|
manifestation.manifestation_content_type = content_type if content_type
|
131
|
-
|
133
|
+
manifestation.frequency = frequency if frequency
|
132
134
|
if manifestation.save
|
133
135
|
identifier.each do |k, v|
|
134
136
|
manifestation.identifiers << v if v.valid?
|
135
137
|
end
|
136
138
|
manifestation.publishers << publisher_agents
|
137
|
-
|
138
|
-
|
139
|
+
manifestation.creators << creator_agents
|
140
|
+
create_subject_related_elements(doc, manifestation)
|
139
141
|
create_series_statement(doc, manifestation)
|
140
|
-
|
142
|
+
if is_serial
|
141
143
|
create_series_master(doc, manifestation)
|
142
|
-
|
144
|
+
end
|
143
145
|
end
|
144
146
|
end
|
145
147
|
return manifestation
|
@@ -147,31 +149,31 @@ module EnjuLoc
|
|
147
149
|
|
148
150
|
private
|
149
151
|
def create_subject_related_elements(doc, manifestation)
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
subject_heading_type = SubjectHeadingType.where(:name
|
152
|
+
subjects = get_mods_subjects(doc)
|
153
|
+
classifications = get_mods_classifications(doc)
|
154
|
+
if defined?(EnjuSubject)
|
155
|
+
subject_heading_type = SubjectHeadingType.where(name: 'lcsh').first || SubjectHeadingType.create!(name: 'lcsh')
|
154
156
|
subjects.each do |term|
|
155
157
|
subject = Subject.where(:term => term[:term]).first
|
156
158
|
unless subject
|
157
159
|
subject = Subject.new(term)
|
158
160
|
subject.subject_heading_type = subject_heading_type
|
159
|
-
subject.subject_type = SubjectType.where(:name
|
161
|
+
subject.subject_type = SubjectType.where(name: 'concept').first || SubjectType.create!(name: 'concept')
|
160
162
|
end
|
161
163
|
manifestation.subjects << subject
|
162
164
|
end
|
163
165
|
if classifications
|
164
|
-
classification_type = ClassificationType.where(:name
|
165
|
-
|
166
|
+
classification_type = ClassificationType.where(name: 'ddc').first || ClassificationType.create!(name: 'ddc')
|
167
|
+
classifications.each do |ddc|
|
166
168
|
classification = Classification.where(:category => ddc).first
|
167
|
-
|
169
|
+
unless classification
|
168
170
|
classification = Classification.new(:category => ddc)
|
169
171
|
classification.classification_type = classification_type
|
170
|
-
|
172
|
+
end
|
171
173
|
manifestation.classifications << classification if classification.valid?
|
172
174
|
end
|
173
175
|
end
|
174
|
-
|
176
|
+
end
|
175
177
|
end
|
176
178
|
|
177
179
|
def create_series_statement(doc, manifestation)
|
@@ -181,7 +183,7 @@ module EnjuLoc
|
|
181
183
|
series_title = title.split(';')[0].strip
|
182
184
|
end
|
183
185
|
if series_title
|
184
|
-
series_statement = SeriesStatement.where(:original_title => series_title).
|
186
|
+
series_statement = SeriesStatement.where(:original_title => series_title).first || SeriesStatement.create!(original_title: series_title)
|
185
187
|
if series_statement.try(:save)
|
186
188
|
manifestation.series_statements << series_statement
|
187
189
|
end
|
@@ -191,335 +193,337 @@ module EnjuLoc
|
|
191
193
|
|
192
194
|
def create_series_master(doc, manifestation)
|
193
195
|
titles = get_mods_titles(doc)
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
196
|
+
series_statement = SeriesStatement.new(
|
197
|
+
:original_title => titles[:original_title],
|
198
|
+
:title_alternative => titles[:title_alternative],
|
199
|
+
:series_master => true,
|
200
|
+
)
|
201
|
+
if series_statement.try(:save)
|
202
|
+
manifestation.series_statements << series_statement
|
203
|
+
end
|
202
204
|
end
|
203
205
|
|
204
206
|
def get_mods_titles(doc)
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
end
|
224
|
-
{ :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
|
207
|
+
original_title = ""
|
208
|
+
title_alternatives = []
|
209
|
+
doc.xpath('//mods:mods/mods:titleInfo',NS).each do |e|
|
210
|
+
type = e.attributes["type"].try(:content)
|
211
|
+
case type
|
212
|
+
when "alternative", "translated", "abbreviated", "uniform"
|
213
|
+
title_alternatives << e.at('./mods:title',NS).content
|
214
|
+
else
|
215
|
+
nonsort = e.at('./mods:nonSort',NS).try(:content)
|
216
|
+
original_title << nonsort if nonsort
|
217
|
+
original_title << e.at('./mods:title',NS).try(:content)
|
218
|
+
subtitle = e.at('./mods:subTitle',NS).try(:content)
|
219
|
+
original_title << " : #{ subtitle }" if subtitle
|
220
|
+
partnumber = e.at('./mods:partNumber',NS).try(:content)
|
221
|
+
partname = e.at('./mods:partName',NS).try(:content)
|
222
|
+
partname = [ partnumber, partname ].compact.join( ": " )
|
223
|
+
original_title << ". #{ partname }" unless partname.blank?
|
224
|
+
end
|
225
225
|
end
|
226
|
+
{ :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
|
227
|
+
end
|
226
228
|
|
227
|
-
|
228
|
-
|
229
|
-
|
229
|
+
def get_mods_language(doc)
|
230
|
+
language = doc.at('//mods:language/mods:languageTerm[@authority="iso639-2b"]',NS).try(:content)
|
231
|
+
end
|
230
232
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
end
|
240
|
-
end
|
241
|
-
access_address
|
233
|
+
def get_mods_access_address(doc)
|
234
|
+
access_address = nil
|
235
|
+
url = doc.at('//mods:location/mods:url',NS)
|
236
|
+
if url
|
237
|
+
usage = url.attributes["usage"].try(:content)
|
238
|
+
case usage
|
239
|
+
when "primary display", "primary"
|
240
|
+
access_address = url.try(:content)
|
242
241
|
end
|
242
|
+
end
|
243
|
+
access_address
|
244
|
+
end
|
243
245
|
|
244
|
-
|
245
|
-
|
246
|
-
|
246
|
+
def get_mods_publication_place(doc)
|
247
|
+
place = doc.at('//mods:originInfo/mods:place/mods:placeTerm[@type="text"]',NS).try(:content)
|
248
|
+
end
|
247
249
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
end
|
258
|
-
height = extent[1].try(:strip)
|
259
|
-
if height =~ /(\d+)\s*cm/
|
260
|
-
value[:height] = $1.dup.to_i
|
261
|
-
end
|
262
|
-
end
|
263
|
-
value
|
250
|
+
def get_mods_extent(doc)
|
251
|
+
extent = doc.at('//mods:extent',NS).try(:content)
|
252
|
+
value = {:start_page => nil, :end_page => nil, :height => nil}
|
253
|
+
if extent
|
254
|
+
extent = extent.split(';')
|
255
|
+
page = extent[0].try(:strip)
|
256
|
+
if page =~ /(\d+)\s*(p|page)/
|
257
|
+
value[:start_page] = 1
|
258
|
+
value[:end_page] = $1.dup.to_i
|
264
259
|
end
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
if note.blank?
|
269
|
-
note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
|
270
|
-
end
|
271
|
-
note
|
260
|
+
height = extent[1].try(:strip)
|
261
|
+
if height =~ /(\d+)\s*cm/
|
262
|
+
value[:height] = $1.dup.to_i
|
272
263
|
end
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
264
|
+
end
|
265
|
+
value
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_mods_statement_of_responsibility(doc)
|
269
|
+
note = doc.at('//mods:note[@type="statement of responsibility"]',NS).try(:content)
|
270
|
+
if note.blank?
|
271
|
+
note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
|
272
|
+
end
|
273
|
+
note
|
274
|
+
end
|
275
|
+
|
276
|
+
def get_mods_note(doc)
|
277
|
+
notes = []
|
278
|
+
doc.xpath('//mods:note',NS).each do |note|
|
279
|
+
type = note.attributes['type'].try(:content)
|
280
|
+
next if type == "statement of responsibility"
|
281
|
+
note_s = note.try( :content )
|
282
|
+
notes << note_s unless note_s.blank?
|
283
|
+
end
|
284
|
+
if notes.empty?
|
285
|
+
nil
|
286
|
+
else
|
287
|
+
notes.join( ";\n" )
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def get_mods_date_of_publication(doc)
|
292
|
+
dates = []
|
293
|
+
doc.xpath('//mods:dateIssued',NS).each do |pub_date|
|
294
|
+
pub_date = pub_date.content.sub( /\A[cp]/, '' )
|
295
|
+
next unless pub_date =~ /^\d+(-\d\d?){0,2}$/
|
296
|
+
date = pub_date.split('-')
|
297
|
+
if date[0] and date[1]
|
298
|
+
dates << sprintf("%04d-%02d", date[0], date[1])
|
299
|
+
else
|
300
|
+
dates << pub_date
|
286
301
|
end
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
302
|
+
end
|
303
|
+
dates.compact.first
|
304
|
+
end
|
305
|
+
|
306
|
+
# derived from marcfrequency: http://www.loc.gov/standards/valuelist/marcfrequency.html
|
307
|
+
MARCFREQUENCY = [
|
308
|
+
"Continuously updated",
|
309
|
+
"Daily",
|
310
|
+
"Semiweekly",
|
311
|
+
"Three times a week",
|
312
|
+
"Weekly",
|
313
|
+
"Biweekly",
|
314
|
+
"Three times a month",
|
315
|
+
"Semimonthly",
|
316
|
+
"Monthly",
|
317
|
+
"Bimonthly",
|
318
|
+
"Quarterly",
|
319
|
+
"Three times a year",
|
320
|
+
"Semiannual",
|
321
|
+
"Annual",
|
322
|
+
"Biennial",
|
323
|
+
"Triennial",
|
324
|
+
"Completely irregular",
|
325
|
+
]
|
326
|
+
def get_mods_frequency(doc)
|
327
|
+
frequencies = []
|
328
|
+
doc.xpath('//mods:frequency',NS).each do |freq|
|
329
|
+
frequency = freq.try(:content)
|
330
|
+
MARCFREQUENCY.each do |freq_regex|
|
331
|
+
if /\A(#{freq_regex})/ =~ frequency
|
332
|
+
frequency_name = freq_regex.downcase.gsub( /\s+/, "_" )
|
333
|
+
frequencies << Frequency.where( name: frequency_name ).first
|
298
334
|
end
|
299
|
-
dates.compact.first
|
300
335
|
end
|
336
|
+
end
|
337
|
+
frequencies.compact.first
|
338
|
+
end
|
301
339
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
"
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
"Semimonthly",
|
312
|
-
"Monthly",
|
313
|
-
"Bimonthly",
|
314
|
-
"Quarterly",
|
315
|
-
"Three times a year",
|
316
|
-
"Semiannual",
|
317
|
-
"Annual",
|
318
|
-
"Biennial",
|
319
|
-
"Triennial",
|
320
|
-
"Completely irregular",
|
321
|
-
]
|
322
|
-
def get_mods_frequency(doc)
|
323
|
-
frequencies = []
|
324
|
-
doc.xpath('//mods:frequency',NS).each do |freq|
|
325
|
-
frequency = freq.try(:content)
|
326
|
-
MARCFREQUENCY.each do |freq_regex|
|
327
|
-
if /\A(#{freq_regex})/ =~ frequency
|
328
|
-
frequency_name = freq_regex.downcase.gsub( /\s+/, "_" )
|
329
|
-
frequencies << Frequency.where( :name => frequency_name ).first
|
330
|
-
end
|
331
|
-
end
|
332
|
-
end
|
333
|
-
frequencies.compact.first
|
334
|
-
end
|
340
|
+
def get_mods_creators(doc)
|
341
|
+
creators = []
|
342
|
+
doc.xpath('/mods:mods/mods:name',NS).each do |creator|
|
343
|
+
creators << {
|
344
|
+
:full_name => creator.xpath('./mods:namePart',NS).collect(&:content).join( ", " ),
|
345
|
+
}
|
346
|
+
end
|
347
|
+
creators.uniq
|
348
|
+
end
|
335
349
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
350
|
+
# TODO:only LCSH-based parsing...
|
351
|
+
def get_mods_subjects(doc)
|
352
|
+
subjects = []
|
353
|
+
doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
|
354
|
+
subject = []
|
355
|
+
s.xpath('./*',NS).each do |subelement|
|
356
|
+
type = subelement.name
|
357
|
+
case subelement.name
|
358
|
+
when "topic", "geographic", "genre", "temporal"
|
359
|
+
subject << { :type => type , :term => subelement.try(:content) }
|
360
|
+
when "titleInfo"
|
361
|
+
subject << { :type => type, :term => subelement.at('./mods:title',NS).try(:content) }
|
362
|
+
when "name"
|
363
|
+
name = subelement.xpath('./mods:namePart',NS).map{|e| e.try(:content) }.join( ", " )
|
364
|
+
subject << { :type => type, :term => name }
|
342
365
|
end
|
343
|
-
creators.uniq
|
344
366
|
end
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
subjects = []
|
349
|
-
doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
|
350
|
-
subject = []
|
351
|
-
s.xpath('./*',NS).each do |subelement|
|
352
|
-
type = subelement.name
|
353
|
-
case subelement.name
|
354
|
-
when "topic", "geographic", "genre", "temporal"
|
355
|
-
subject << { :type => type , :term => subelement.try(:content) }
|
356
|
-
when "titleInfo"
|
357
|
-
subject << { :type => type, :term => subelement.at('./mods:title',NS).try(:content) }
|
358
|
-
when "name"
|
359
|
-
name = subelement.xpath('./mods:namePart',NS).map{|e| e.try(:content) }.join( ", " )
|
360
|
-
subject << { :type => type, :term => name }
|
361
|
-
end
|
362
|
-
end
|
363
|
-
next if subject.compact.empty?
|
364
|
-
if subject.size > 1 and subject[0][:type] == "name" and subject[1][:type] == "titleInfo"
|
365
|
-
subject[0..1] = { :term => subject[0..1].map{|e|e[:term]}.join( ". " ) }
|
366
|
-
end
|
367
|
-
subjects << {
|
368
|
-
:term => subject.map{|e|e[:term]}.compact.join( "--" )
|
369
|
-
}
|
370
|
-
end
|
371
|
-
subjects
|
367
|
+
next if subject.compact.empty?
|
368
|
+
if subject.size > 1 and subject[0][:type] == "name" and subject[1][:type] == "titleInfo"
|
369
|
+
subject[0..1] = { :term => subject[0..1].map{|e|e[:term]}.join( ". " ) }
|
372
370
|
end
|
371
|
+
subjects << {
|
372
|
+
:term => subject.map{|e|e[:term]}.compact.join( "--" )
|
373
|
+
}
|
374
|
+
end
|
375
|
+
subjects
|
376
|
+
end
|
373
377
|
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
end
|
382
|
-
end
|
383
|
-
classifications.compact
|
378
|
+
# TODO:support only DDC.
|
379
|
+
def get_mods_classifications(doc)
|
380
|
+
classifications = []
|
381
|
+
doc.xpath('//mods:classification[@authority="ddc"]',NS).each do|c|
|
382
|
+
ddc = c.content
|
383
|
+
if ddc
|
384
|
+
classifications << ddc.split(/[^\d\.]/).first.try(:strip)
|
384
385
|
end
|
386
|
+
end
|
387
|
+
classifications.compact
|
388
|
+
end
|
385
389
|
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
carrier_type = CarrierType.where(:
|
479
|
-
content_type = ContentType.where(:
|
480
|
-
|
481
|
-
carrier_type = CarrierType.where(:
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
390
|
+
def get_mods_carrier_and_content_types(doc)
|
391
|
+
carrier_type = content_type = nil
|
392
|
+
doc.xpath('//mods:form',NS).each do |e|
|
393
|
+
authority = e.attributes['authority'].try(:content)
|
394
|
+
case authority
|
395
|
+
when "gmd"
|
396
|
+
case e.content
|
397
|
+
when "electronic resource"
|
398
|
+
carrier_type = CarrierType.where(name: 'online_resource').first
|
399
|
+
when "videorecording", "motion picture", "game"
|
400
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
401
|
+
when "sound recording"
|
402
|
+
content_type = ContentType.where(name: 'performed_music').first
|
403
|
+
when "graphic", "picture"
|
404
|
+
content_type = ContentType.where(name: 'still_image').first
|
405
|
+
#TODO: Enju needs more specific mappings...
|
406
|
+
when "art original",
|
407
|
+
"microscope slides",
|
408
|
+
"art reproduction",
|
409
|
+
"model",
|
410
|
+
"chart",
|
411
|
+
"diorama",
|
412
|
+
"realia",
|
413
|
+
"filmstrip",
|
414
|
+
"slide",
|
415
|
+
"flash card",
|
416
|
+
"technical drawing",
|
417
|
+
"toy",
|
418
|
+
"kit",
|
419
|
+
"transparency",
|
420
|
+
"microform"
|
421
|
+
content_type = ContentType.where(name: 'other').first
|
422
|
+
end
|
423
|
+
when "marcsmd" # cf.http://www.loc.gov/standards/valuelist/marcsmd.html
|
424
|
+
case e.content
|
425
|
+
when "text", "large print", "regular print", "text in looseleaf binder"
|
426
|
+
carrier_type = CarrierType.where(name: 'volume').first
|
427
|
+
content_type = ContentType.where(name: 'text').first
|
428
|
+
when "braille"
|
429
|
+
carrier_type = CarrierType.where(name: 'volume').first
|
430
|
+
content_type = ContentType.where(name: 'tactile_text').first
|
431
|
+
when "videodisc"
|
432
|
+
carrier_type = CarrierType.where(name: 'videodisc').first
|
433
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
434
|
+
when "videorecording", "videocartridge", "videocassette", "videoreel"
|
435
|
+
carrier_type = CarrierType.where(name: 'other').first
|
436
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
437
|
+
when "electronic resource"
|
438
|
+
carrier_type = CarrierType.where(name: 'online_resource').first
|
439
|
+
when "chip cartridge", "computer optical disc cartridge", "magnetic disk", "magneto-optical disc", "optical disc", "remote", "tape cartridge", "tape cassette", "tape reel"
|
440
|
+
#carrier_type = CarrierType.where(name: 'other').first
|
441
|
+
when "motion picture", "film cartridge", "film cassette", "film reel"
|
442
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
443
|
+
when "sound recording", "cylinder", "roll", "sound cartridge", "sound cassette","sound-tape reel", "sound-track film", "wire recording"
|
444
|
+
content_type = ContentType.where(name: 'performed_music').first
|
445
|
+
when "sound disc"
|
446
|
+
content_type = ContentType.where(name: 'performed_music').first
|
447
|
+
carrier_type = CarrierType.where(name: 'audio_disc').first
|
448
|
+
when "nonprojected graphic", "chart", "collage", "drawing", "flash card", "painting", "photomechanical print", "photonegative", "photoprint", "picture", "print", "technical drawing", "projected graphic", "filmslip", "filmstrip cartridge", "filmstrip roll", "other filmstrip type ", "slide", "transparency"
|
449
|
+
content_type = ContentType.where(name: 'still_image').first
|
450
|
+
when "tactile material", "braille", "tactile, with no writing system"
|
451
|
+
content_type = ContentType.where(name: 'tactile_text').first
|
452
|
+
#TODO: Enju needs more specific mappings...
|
453
|
+
when "globe",
|
454
|
+
"celestial globe",
|
455
|
+
"earth moon globe",
|
456
|
+
"planetary or lunar globe",
|
457
|
+
"terrestrial globe",
|
458
|
+
"map",
|
459
|
+
"atlas",
|
460
|
+
"diagram",
|
461
|
+
"map",
|
462
|
+
"model",
|
463
|
+
"profile",
|
464
|
+
"remote-sensing image",
|
465
|
+
"section",
|
466
|
+
"view",
|
467
|
+
"microform",
|
468
|
+
"aperture card",
|
469
|
+
"microfiche",
|
470
|
+
"microfiche cassette",
|
471
|
+
"microfilm cartridge",
|
472
|
+
"microfilm cassette",
|
473
|
+
"microfilm reel",
|
474
|
+
"microopaque",
|
475
|
+
"combination",
|
476
|
+
"moon"
|
477
|
+
content_type = ContentType.where(name: 'other').first
|
478
|
+
end
|
479
|
+
when "marcform" # cf. http://www.loc.gov/standards/valuelist/marcform.html
|
480
|
+
case e.content
|
481
|
+
when "print", "large print"
|
482
|
+
carrier_type = CarrierType.where(name: 'volume').first
|
483
|
+
content_type = ContentType.where(name: 'text').first
|
484
|
+
when "electronic"
|
485
|
+
carrier_type = CarrierType.where(name: 'online_resource').first
|
486
|
+
when "braille"
|
487
|
+
content_type = ContentType.where(name: 'tactile_text').first
|
488
|
+
#TODO: Enju needs more specific mappings...
|
489
|
+
when "microfiche", "microfilm"
|
490
|
+
content_type = ContentType.where(name: 'other').first
|
491
|
+
end
|
492
|
+
end
|
493
|
+
end
|
490
494
|
doc.xpath('//mods:genre',NS).each do |e|
|
491
495
|
authority = e.attributes['authority'].try(:content)
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
+
case authority
|
497
|
+
when "rdacontent"
|
498
|
+
content_type = ContentType.where(name: e.content.gsub(/\W+/, "_")).first
|
499
|
+
content_type = ContentType.where(name: 'other').first unless content_type
|
496
500
|
end
|
497
501
|
end
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
502
|
+
type = doc.at('//mods:typeOfResource',NS).try(:content)
|
503
|
+
case type
|
504
|
+
when "text"
|
505
|
+
content_type = ContentType.where(name: 'text').first
|
506
|
+
when "sound recording"
|
507
|
+
content_type = ContentType.where(name: 'sounds').first
|
508
|
+
when"sound recording-musical"
|
509
|
+
content_type = ContentType.where(name: 'performed_music').first
|
510
|
+
when"sound recording-nonmusical"
|
511
|
+
content_type = ContentType.where(name: 'spoken_word').first
|
512
|
+
when "moving image"
|
513
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
514
|
+
when "software, multimedia"
|
515
|
+
content_type = ContentType.where(name: 'other').first
|
516
|
+
when "cartographic "
|
517
|
+
content_type = ContentType.where(name: 'cartographic_image').first
|
518
|
+
when "notated music"
|
519
|
+
content_type = ContentType.where(name: 'notated_music').first
|
520
|
+
when "still image"
|
521
|
+
content_type = ContentType.where(name: 'still_image').first
|
522
|
+
when "three dimensional object"
|
523
|
+
content_type = ContentType.where(name: 'other').first
|
524
|
+
when "mixed material"
|
525
|
+
content_type = ContentType.where(name: 'other').first
|
526
|
+
end
|
523
527
|
{ :carrier_type => carrier_type, :content_type => content_type }
|
524
528
|
end
|
525
529
|
end
|