enju_loc 0.1.0.pre3 → 0.1.0.pre4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.rdoc +9 -1
- data/app/models/loc_search.rb +3 -1
- data/lib/enju_loc/engine.rb +0 -1
- data/lib/enju_loc/loc_search.rb +346 -342
- data/lib/enju_loc/version.rb +1 -1
- data/spec/dummy/bin/bundle +3 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/bin/setup +29 -0
- data/spec/dummy/config/database.yml +21 -16
- data/spec/dummy/db/migrate/005_create_manifestations.rb +0 -1
- data/spec/dummy/db/migrate/20140519170214_create_resource_import_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140519171220_create_import_request_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140524020735_create_agent_import_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140524074813_create_user_import_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140614141500_create_resource_export_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20140709113905_create_user_export_file_transitions.rb +5 -1
- data/spec/dummy/db/migrate/20141003181336_add_full_name_transcription_to_profile.rb +5 -0
- data/spec/dummy/db/migrate/20141003182825_add_date_of_birth_to_profile.rb +5 -0
- data/spec/dummy/db/schema.rb +414 -413
- data/spec/fixtures/users.yml +1 -1
- data/spec/spec_helper.rb +2 -0
- metadata +42 -20
- data/spec/dummy/db/test.sqlite3 +0 -0
- data/spec/dummy/script/rails +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d6f7ddad0b22f2b23741d4e91c73f27ad22827c
|
4
|
+
data.tar.gz: daa82be27e7d64aa3fef0a31d3b46ec5294721dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34eec96ca78e77b7f9304d1644bed1ca1ca7eb0e522ae6760e2f2e6cf4cc819ff4666f70cb0a2cd30162e13f5dd41dc5c97b547dd55671d0697265aea49fbe3e
|
7
|
+
data.tar.gz: 3971a831b8c62dee85e6ce3afde3901be6228371d78c5bbb44fdd5a4f5bb488495ea84f64401ecfc44d19b01bb3fa8d7919e9d615407fc14e3a47b6af8c0e0bc
|
data/README.rdoc
CHANGED
@@ -1,3 +1,11 @@
|
|
1
1
|
= EnjuLoc
|
2
|
+
{<img src="https://secure.travis-ci.org/next-l/enju_loc.png?branch=1.1" alt="Build Status" />}[http://travis-ci.org/next-l/enju_loc]
|
3
|
+
{<img src="https://coveralls.io/repos/next-l/enju_loc/badge.png?branch=1.1" alt="Coverage Status" />}[https://coveralls.io/r/next-l/enju_loc?branch=1.1]
|
4
|
+
{<img src="https://hakiri.io/github/next-l/enju_loc/1.1.svg" alt="security" />}[https://hakiri.io/github/next-l/enju_loc/1.1]
|
2
5
|
|
3
|
-
This project rocks and uses MIT-LICENSE.
|
6
|
+
This project rocks and uses MIT-LICENSE.
|
7
|
+
|
8
|
+
== 製作者・貢献者 (Authors and contributors)
|
9
|
+
* {TAKAKU, Masao}[https://github.com/masao] ({@tmasao}[https://twitter.com/tmasao])
|
10
|
+
* {TANABE, Kosuke}[https://github.com/nabeta] ({@nabeta}[https://twitter.com/nabeta])
|
11
|
+
* {Project Next-L}[http://www.next-l.jp] ({@ProjectNextL}[https://twitter.com/ProjectNextL])
|
data/app/models/loc_search.rb
CHANGED
@@ -85,7 +85,9 @@ class LocSearch
|
|
85
85
|
end
|
86
86
|
|
87
87
|
def self.import_from_sru_response(lccn)
|
88
|
-
|
88
|
+
identifier_type_lccn = IdentifierType.where(name: 'lccn').first
|
89
|
+
identifier_type_lccn = IdentifierType.create!(name: 'lccn') unless identifier_type_lccn
|
90
|
+
identifier = Identifier.where(body: lccn, identifier_type_id: identifier_type_lccn.id).first
|
89
91
|
return if identifier
|
90
92
|
url = make_sru_request_uri("bath.lccn=#{ lccn }")
|
91
93
|
response = Nokogiri::XML(Faraday.get(url).body).at( '//zs:recordData', {"zs" => "http://www.loc.gov/zing/srw/"} )
|
data/lib/enju_loc/engine.rb
CHANGED
data/lib/enju_loc/loc_search.rb
CHANGED
@@ -21,8 +21,8 @@ module EnjuLoc
|
|
21
21
|
|
22
22
|
def import_record_from_loc_isbn(options)
|
23
23
|
#if options[:isbn]
|
24
|
-
|
25
|
-
|
24
|
+
lisbn = Lisbn.new(options[:isbn])
|
25
|
+
raise EnjuLoc::InvalidIsbn unless lisbn.valid?
|
26
26
|
#end
|
27
27
|
|
28
28
|
manifestation = Manifestation.find_by_isbn(lisbn.isbn)
|
@@ -36,7 +36,9 @@ module EnjuLoc
|
|
36
36
|
NS = {"mods"=>"http://www.loc.gov/mods/v3"}
|
37
37
|
def import_record_from_loc( doc )
|
38
38
|
record_identifier = doc.at( '//mods:recordInfo/mods:recordIdentifier', NS ).try(:content)
|
39
|
-
|
39
|
+
identifier_type = IdentifierType.where(name: 'loc_identifier').first
|
40
|
+
identifier_type = IdentifierType.create!(name: 'loc_identifier') unless identifier_type
|
41
|
+
loc_identifier = Identifier.where(:body => record_identifier, :identifier_type_id => identifier_type.id).first
|
40
42
|
return loc_identifier.manifestation if loc_identifier
|
41
43
|
|
42
44
|
publishers = []
|
@@ -67,25 +69,25 @@ module EnjuLoc
|
|
67
69
|
issn = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn"]',NS).try(:content).to_s)
|
68
70
|
issn_l = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn-l"]',NS).try(:content).to_s)
|
69
71
|
|
70
|
-
|
71
|
-
|
72
|
-
|
72
|
+
types = get_mods_carrier_and_content_types( doc )
|
73
|
+
content_type = types[ :content_type ]
|
74
|
+
carrier_type = types[ :carrier_type ]
|
73
75
|
|
74
|
-
|
76
|
+
record_identifier = doc.at('//mods:recordInfo/mods:recordIdentifier',NS).try(:content)
|
75
77
|
description = doc.xpath('//mods:abstract',NS).collect(&:content).join("\n")
|
76
78
|
edition_string = doc.at('//mods:edition',NS).try(:content)
|
77
79
|
extent = get_mods_extent(doc)
|
78
|
-
|
80
|
+
note = get_mods_note(doc)
|
79
81
|
frequency = get_mods_frequency(doc)
|
80
|
-
|
81
|
-
|
82
|
+
issuance = doc.at('//mods:issuance',NS).try(:content)
|
83
|
+
is_serial = true if issuance == "serial"
|
82
84
|
statement_of_responsibility = get_mods_statement_of_responsibility(doc)
|
83
|
-
|
84
|
-
|
85
|
+
access_address = get_mods_access_address(doc)
|
86
|
+
publication_place = get_mods_publication_place(doc)
|
85
87
|
|
86
88
|
manifestation = nil
|
87
89
|
Agent.transaction do
|
88
|
-
|
90
|
+
creator_agents = Agent.import_agents(creators)
|
89
91
|
publisher_agents = Agent.import_agents(publishers)
|
90
92
|
|
91
93
|
manifestation = Manifestation.new(
|
@@ -95,51 +97,51 @@ module EnjuLoc
|
|
95
97
|
:language_id => language_id,
|
96
98
|
:pub_date => date,
|
97
99
|
:description => description,
|
98
|
-
|
100
|
+
:edition_string => edition_string,
|
99
101
|
:statement_of_responsibility => statement_of_responsibility,
|
100
102
|
:start_page => extent[:start_page],
|
101
103
|
:end_page => extent[:end_page],
|
102
104
|
:height => extent[:height],
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
105
|
+
:access_address => access_address,
|
106
|
+
:note => note,
|
107
|
+
:publication_place => publication_place,
|
108
|
+
:serial => is_serial,
|
107
109
|
)
|
108
110
|
identifier = {}
|
109
111
|
if isbn
|
110
112
|
identifier[:isbn] = Identifier.new(:body => isbn)
|
111
|
-
identifier[:isbn].identifier_type = IdentifierType.where(:name
|
113
|
+
identifier[:isbn].identifier_type = IdentifierType.where(name: 'isbn').first || IdnetifierType.create!(name: 'isbn')
|
112
114
|
end
|
113
115
|
if loc_identifier
|
114
116
|
identifier[:loc_identifier] = Identifier.new(:body => loc_identifier)
|
115
|
-
identifier[:loc_identifier].identifier_type = IdentifierType.where(:name
|
117
|
+
identifier[:loc_identifier].identifier_type = IdentifierType.where(name: 'loc_identifier').first || IdnetifierType.create!(name: 'loc_identifier')
|
116
118
|
end
|
117
119
|
if lccn
|
118
120
|
identifier[:lccn] = Identifier.new(:body => lccn)
|
119
|
-
identifier[:lccn].identifier_type = IdentifierType.where(:name
|
121
|
+
identifier[:lccn].identifier_type = IdentifierType.where(name: 'lccn').first || IdentifierType.create!(name: 'lccn')
|
120
122
|
end
|
121
123
|
if issn
|
122
124
|
identifier[:issn] = Identifier.new(:body => issn)
|
123
|
-
identifier[:issn].identifier_type = IdentifierType.where(:name
|
125
|
+
identifier[:issn].identifier_type = IdentifierType.where(name: 'issn').first || IdentifierType.create!(name: 'issn')
|
124
126
|
end
|
125
127
|
if issn_l
|
126
128
|
identifier[:issn_l] = Identifier.new(:body => issn_l)
|
127
|
-
identifier[:issn_l].identifier_type = IdentifierType.where(:name
|
129
|
+
identifier[:issn_l].identifier_type = IdentifierType.where(name: 'issn_l').first || IdentifierType.create!(name: 'issn_l')
|
128
130
|
end
|
129
131
|
manifestation.carrier_type = carrier_type if carrier_type
|
130
132
|
manifestation.manifestation_content_type = content_type if content_type
|
131
|
-
|
133
|
+
manifestation.frequency = frequency if frequency
|
132
134
|
if manifestation.save
|
133
135
|
identifier.each do |k, v|
|
134
136
|
manifestation.identifiers << v if v.valid?
|
135
137
|
end
|
136
138
|
manifestation.publishers << publisher_agents
|
137
|
-
|
138
|
-
|
139
|
+
manifestation.creators << creator_agents
|
140
|
+
create_subject_related_elements(doc, manifestation)
|
139
141
|
create_series_statement(doc, manifestation)
|
140
|
-
|
142
|
+
if is_serial
|
141
143
|
create_series_master(doc, manifestation)
|
142
|
-
|
144
|
+
end
|
143
145
|
end
|
144
146
|
end
|
145
147
|
return manifestation
|
@@ -147,31 +149,31 @@ module EnjuLoc
|
|
147
149
|
|
148
150
|
private
|
149
151
|
def create_subject_related_elements(doc, manifestation)
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
subject_heading_type = SubjectHeadingType.where(:name
|
152
|
+
subjects = get_mods_subjects(doc)
|
153
|
+
classifications = get_mods_classifications(doc)
|
154
|
+
if defined?(EnjuSubject)
|
155
|
+
subject_heading_type = SubjectHeadingType.where(name: 'lcsh').first || SubjectHeadingType.create!(name: 'lcsh')
|
154
156
|
subjects.each do |term|
|
155
157
|
subject = Subject.where(:term => term[:term]).first
|
156
158
|
unless subject
|
157
159
|
subject = Subject.new(term)
|
158
160
|
subject.subject_heading_type = subject_heading_type
|
159
|
-
subject.subject_type = SubjectType.where(:name
|
161
|
+
subject.subject_type = SubjectType.where(name: 'concept').first || SubjectType.create!(name: 'concept')
|
160
162
|
end
|
161
163
|
manifestation.subjects << subject
|
162
164
|
end
|
163
165
|
if classifications
|
164
|
-
classification_type = ClassificationType.where(:name
|
165
|
-
|
166
|
+
classification_type = ClassificationType.where(name: 'ddc').first || ClassificationType.create!(name: 'ddc')
|
167
|
+
classifications.each do |ddc|
|
166
168
|
classification = Classification.where(:category => ddc).first
|
167
|
-
|
169
|
+
unless classification
|
168
170
|
classification = Classification.new(:category => ddc)
|
169
171
|
classification.classification_type = classification_type
|
170
|
-
|
172
|
+
end
|
171
173
|
manifestation.classifications << classification if classification.valid?
|
172
174
|
end
|
173
175
|
end
|
174
|
-
|
176
|
+
end
|
175
177
|
end
|
176
178
|
|
177
179
|
def create_series_statement(doc, manifestation)
|
@@ -181,7 +183,7 @@ module EnjuLoc
|
|
181
183
|
series_title = title.split(';')[0].strip
|
182
184
|
end
|
183
185
|
if series_title
|
184
|
-
series_statement = SeriesStatement.where(:original_title => series_title).
|
186
|
+
series_statement = SeriesStatement.where(:original_title => series_title).first || SeriesStatement.create!(original_title: series_title)
|
185
187
|
if series_statement.try(:save)
|
186
188
|
manifestation.series_statements << series_statement
|
187
189
|
end
|
@@ -191,335 +193,337 @@ module EnjuLoc
|
|
191
193
|
|
192
194
|
def create_series_master(doc, manifestation)
|
193
195
|
titles = get_mods_titles(doc)
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
196
|
+
series_statement = SeriesStatement.new(
|
197
|
+
:original_title => titles[:original_title],
|
198
|
+
:title_alternative => titles[:title_alternative],
|
199
|
+
:series_master => true,
|
200
|
+
)
|
201
|
+
if series_statement.try(:save)
|
202
|
+
manifestation.series_statements << series_statement
|
203
|
+
end
|
202
204
|
end
|
203
205
|
|
204
206
|
def get_mods_titles(doc)
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
end
|
224
|
-
{ :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
|
207
|
+
original_title = ""
|
208
|
+
title_alternatives = []
|
209
|
+
doc.xpath('//mods:mods/mods:titleInfo',NS).each do |e|
|
210
|
+
type = e.attributes["type"].try(:content)
|
211
|
+
case type
|
212
|
+
when "alternative", "translated", "abbreviated", "uniform"
|
213
|
+
title_alternatives << e.at('./mods:title',NS).content
|
214
|
+
else
|
215
|
+
nonsort = e.at('./mods:nonSort',NS).try(:content)
|
216
|
+
original_title << nonsort if nonsort
|
217
|
+
original_title << e.at('./mods:title',NS).try(:content)
|
218
|
+
subtitle = e.at('./mods:subTitle',NS).try(:content)
|
219
|
+
original_title << " : #{ subtitle }" if subtitle
|
220
|
+
partnumber = e.at('./mods:partNumber',NS).try(:content)
|
221
|
+
partname = e.at('./mods:partName',NS).try(:content)
|
222
|
+
partname = [ partnumber, partname ].compact.join( ": " )
|
223
|
+
original_title << ". #{ partname }" unless partname.blank?
|
224
|
+
end
|
225
225
|
end
|
226
|
+
{ :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
|
227
|
+
end
|
226
228
|
|
227
|
-
|
228
|
-
|
229
|
-
|
229
|
+
def get_mods_language(doc)
|
230
|
+
language = doc.at('//mods:language/mods:languageTerm[@authority="iso639-2b"]',NS).try(:content)
|
231
|
+
end
|
230
232
|
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
end
|
240
|
-
end
|
241
|
-
access_address
|
233
|
+
def get_mods_access_address(doc)
|
234
|
+
access_address = nil
|
235
|
+
url = doc.at('//mods:location/mods:url',NS)
|
236
|
+
if url
|
237
|
+
usage = url.attributes["usage"].try(:content)
|
238
|
+
case usage
|
239
|
+
when "primary display", "primary"
|
240
|
+
access_address = url.try(:content)
|
242
241
|
end
|
242
|
+
end
|
243
|
+
access_address
|
244
|
+
end
|
243
245
|
|
244
|
-
|
245
|
-
|
246
|
-
|
246
|
+
def get_mods_publication_place(doc)
|
247
|
+
place = doc.at('//mods:originInfo/mods:place/mods:placeTerm[@type="text"]',NS).try(:content)
|
248
|
+
end
|
247
249
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
end
|
258
|
-
height = extent[1].try(:strip)
|
259
|
-
if height =~ /(\d+)\s*cm/
|
260
|
-
value[:height] = $1.dup.to_i
|
261
|
-
end
|
262
|
-
end
|
263
|
-
value
|
250
|
+
def get_mods_extent(doc)
|
251
|
+
extent = doc.at('//mods:extent',NS).try(:content)
|
252
|
+
value = {:start_page => nil, :end_page => nil, :height => nil}
|
253
|
+
if extent
|
254
|
+
extent = extent.split(';')
|
255
|
+
page = extent[0].try(:strip)
|
256
|
+
if page =~ /(\d+)\s*(p|page)/
|
257
|
+
value[:start_page] = 1
|
258
|
+
value[:end_page] = $1.dup.to_i
|
264
259
|
end
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
if note.blank?
|
269
|
-
note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
|
270
|
-
end
|
271
|
-
note
|
260
|
+
height = extent[1].try(:strip)
|
261
|
+
if height =~ /(\d+)\s*cm/
|
262
|
+
value[:height] = $1.dup.to_i
|
272
263
|
end
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
264
|
+
end
|
265
|
+
value
|
266
|
+
end
|
267
|
+
|
268
|
+
def get_mods_statement_of_responsibility(doc)
|
269
|
+
note = doc.at('//mods:note[@type="statement of responsibility"]',NS).try(:content)
|
270
|
+
if note.blank?
|
271
|
+
note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
|
272
|
+
end
|
273
|
+
note
|
274
|
+
end
|
275
|
+
|
276
|
+
def get_mods_note(doc)
|
277
|
+
notes = []
|
278
|
+
doc.xpath('//mods:note',NS).each do |note|
|
279
|
+
type = note.attributes['type'].try(:content)
|
280
|
+
next if type == "statement of responsibility"
|
281
|
+
note_s = note.try( :content )
|
282
|
+
notes << note_s unless note_s.blank?
|
283
|
+
end
|
284
|
+
if notes.empty?
|
285
|
+
nil
|
286
|
+
else
|
287
|
+
notes.join( ";\n" )
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def get_mods_date_of_publication(doc)
|
292
|
+
dates = []
|
293
|
+
doc.xpath('//mods:dateIssued',NS).each do |pub_date|
|
294
|
+
pub_date = pub_date.content.sub( /\A[cp]/, '' )
|
295
|
+
next unless pub_date =~ /^\d+(-\d\d?){0,2}$/
|
296
|
+
date = pub_date.split('-')
|
297
|
+
if date[0] and date[1]
|
298
|
+
dates << sprintf("%04d-%02d", date[0], date[1])
|
299
|
+
else
|
300
|
+
dates << pub_date
|
286
301
|
end
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
302
|
+
end
|
303
|
+
dates.compact.first
|
304
|
+
end
|
305
|
+
|
306
|
+
# derived from marcfrequency: http://www.loc.gov/standards/valuelist/marcfrequency.html
|
307
|
+
MARCFREQUENCY = [
|
308
|
+
"Continuously updated",
|
309
|
+
"Daily",
|
310
|
+
"Semiweekly",
|
311
|
+
"Three times a week",
|
312
|
+
"Weekly",
|
313
|
+
"Biweekly",
|
314
|
+
"Three times a month",
|
315
|
+
"Semimonthly",
|
316
|
+
"Monthly",
|
317
|
+
"Bimonthly",
|
318
|
+
"Quarterly",
|
319
|
+
"Three times a year",
|
320
|
+
"Semiannual",
|
321
|
+
"Annual",
|
322
|
+
"Biennial",
|
323
|
+
"Triennial",
|
324
|
+
"Completely irregular",
|
325
|
+
]
|
326
|
+
def get_mods_frequency(doc)
|
327
|
+
frequencies = []
|
328
|
+
doc.xpath('//mods:frequency',NS).each do |freq|
|
329
|
+
frequency = freq.try(:content)
|
330
|
+
MARCFREQUENCY.each do |freq_regex|
|
331
|
+
if /\A(#{freq_regex})/ =~ frequency
|
332
|
+
frequency_name = freq_regex.downcase.gsub( /\s+/, "_" )
|
333
|
+
frequencies << Frequency.where( name: frequency_name ).first
|
298
334
|
end
|
299
|
-
dates.compact.first
|
300
335
|
end
|
336
|
+
end
|
337
|
+
frequencies.compact.first
|
338
|
+
end
|
301
339
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
"
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
"Semimonthly",
|
312
|
-
"Monthly",
|
313
|
-
"Bimonthly",
|
314
|
-
"Quarterly",
|
315
|
-
"Three times a year",
|
316
|
-
"Semiannual",
|
317
|
-
"Annual",
|
318
|
-
"Biennial",
|
319
|
-
"Triennial",
|
320
|
-
"Completely irregular",
|
321
|
-
]
|
322
|
-
def get_mods_frequency(doc)
|
323
|
-
frequencies = []
|
324
|
-
doc.xpath('//mods:frequency',NS).each do |freq|
|
325
|
-
frequency = freq.try(:content)
|
326
|
-
MARCFREQUENCY.each do |freq_regex|
|
327
|
-
if /\A(#{freq_regex})/ =~ frequency
|
328
|
-
frequency_name = freq_regex.downcase.gsub( /\s+/, "_" )
|
329
|
-
frequencies << Frequency.where( :name => frequency_name ).first
|
330
|
-
end
|
331
|
-
end
|
332
|
-
end
|
333
|
-
frequencies.compact.first
|
334
|
-
end
|
340
|
+
def get_mods_creators(doc)
|
341
|
+
creators = []
|
342
|
+
doc.xpath('/mods:mods/mods:name',NS).each do |creator|
|
343
|
+
creators << {
|
344
|
+
:full_name => creator.xpath('./mods:namePart',NS).collect(&:content).join( ", " ),
|
345
|
+
}
|
346
|
+
end
|
347
|
+
creators.uniq
|
348
|
+
end
|
335
349
|
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
350
|
+
# TODO:only LCSH-based parsing...
|
351
|
+
def get_mods_subjects(doc)
|
352
|
+
subjects = []
|
353
|
+
doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
|
354
|
+
subject = []
|
355
|
+
s.xpath('./*',NS).each do |subelement|
|
356
|
+
type = subelement.name
|
357
|
+
case subelement.name
|
358
|
+
when "topic", "geographic", "genre", "temporal"
|
359
|
+
subject << { :type => type , :term => subelement.try(:content) }
|
360
|
+
when "titleInfo"
|
361
|
+
subject << { :type => type, :term => subelement.at('./mods:title',NS).try(:content) }
|
362
|
+
when "name"
|
363
|
+
name = subelement.xpath('./mods:namePart',NS).map{|e| e.try(:content) }.join( ", " )
|
364
|
+
subject << { :type => type, :term => name }
|
342
365
|
end
|
343
|
-
creators.uniq
|
344
366
|
end
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
subjects = []
|
349
|
-
doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
|
350
|
-
subject = []
|
351
|
-
s.xpath('./*',NS).each do |subelement|
|
352
|
-
type = subelement.name
|
353
|
-
case subelement.name
|
354
|
-
when "topic", "geographic", "genre", "temporal"
|
355
|
-
subject << { :type => type , :term => subelement.try(:content) }
|
356
|
-
when "titleInfo"
|
357
|
-
subject << { :type => type, :term => subelement.at('./mods:title',NS).try(:content) }
|
358
|
-
when "name"
|
359
|
-
name = subelement.xpath('./mods:namePart',NS).map{|e| e.try(:content) }.join( ", " )
|
360
|
-
subject << { :type => type, :term => name }
|
361
|
-
end
|
362
|
-
end
|
363
|
-
next if subject.compact.empty?
|
364
|
-
if subject.size > 1 and subject[0][:type] == "name" and subject[1][:type] == "titleInfo"
|
365
|
-
subject[0..1] = { :term => subject[0..1].map{|e|e[:term]}.join( ". " ) }
|
366
|
-
end
|
367
|
-
subjects << {
|
368
|
-
:term => subject.map{|e|e[:term]}.compact.join( "--" )
|
369
|
-
}
|
370
|
-
end
|
371
|
-
subjects
|
367
|
+
next if subject.compact.empty?
|
368
|
+
if subject.size > 1 and subject[0][:type] == "name" and subject[1][:type] == "titleInfo"
|
369
|
+
subject[0..1] = { :term => subject[0..1].map{|e|e[:term]}.join( ". " ) }
|
372
370
|
end
|
371
|
+
subjects << {
|
372
|
+
:term => subject.map{|e|e[:term]}.compact.join( "--" )
|
373
|
+
}
|
374
|
+
end
|
375
|
+
subjects
|
376
|
+
end
|
373
377
|
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
end
|
382
|
-
end
|
383
|
-
classifications.compact
|
378
|
+
# TODO:support only DDC.
|
379
|
+
def get_mods_classifications(doc)
|
380
|
+
classifications = []
|
381
|
+
doc.xpath('//mods:classification[@authority="ddc"]',NS).each do|c|
|
382
|
+
ddc = c.content
|
383
|
+
if ddc
|
384
|
+
classifications << ddc.split(/[^\d\.]/).first.try(:strip)
|
384
385
|
end
|
386
|
+
end
|
387
|
+
classifications.compact
|
388
|
+
end
|
385
389
|
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
carrier_type = CarrierType.where(:
|
479
|
-
content_type = ContentType.where(:
|
480
|
-
|
481
|
-
carrier_type = CarrierType.where(:
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
390
|
+
def get_mods_carrier_and_content_types(doc)
|
391
|
+
carrier_type = content_type = nil
|
392
|
+
doc.xpath('//mods:form',NS).each do |e|
|
393
|
+
authority = e.attributes['authority'].try(:content)
|
394
|
+
case authority
|
395
|
+
when "gmd"
|
396
|
+
case e.content
|
397
|
+
when "electronic resource"
|
398
|
+
carrier_type = CarrierType.where(name: 'online_resource').first
|
399
|
+
when "videorecording", "motion picture", "game"
|
400
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
401
|
+
when "sound recording"
|
402
|
+
content_type = ContentType.where(name: 'performed_music').first
|
403
|
+
when "graphic", "picture"
|
404
|
+
content_type = ContentType.where(name: 'still_image').first
|
405
|
+
#TODO: Enju needs more specific mappings...
|
406
|
+
when "art original",
|
407
|
+
"microscope slides",
|
408
|
+
"art reproduction",
|
409
|
+
"model",
|
410
|
+
"chart",
|
411
|
+
"diorama",
|
412
|
+
"realia",
|
413
|
+
"filmstrip",
|
414
|
+
"slide",
|
415
|
+
"flash card",
|
416
|
+
"technical drawing",
|
417
|
+
"toy",
|
418
|
+
"kit",
|
419
|
+
"transparency",
|
420
|
+
"microform"
|
421
|
+
content_type = ContentType.where(name: 'other').first
|
422
|
+
end
|
423
|
+
when "marcsmd" # cf.http://www.loc.gov/standards/valuelist/marcsmd.html
|
424
|
+
case e.content
|
425
|
+
when "text", "large print", "regular print", "text in looseleaf binder"
|
426
|
+
carrier_type = CarrierType.where(name: 'volume').first
|
427
|
+
content_type = ContentType.where(name: 'text').first
|
428
|
+
when "braille"
|
429
|
+
carrier_type = CarrierType.where(name: 'volume').first
|
430
|
+
content_type = ContentType.where(name: 'tactile_text').first
|
431
|
+
when "videodisc"
|
432
|
+
carrier_type = CarrierType.where(name: 'videodisc').first
|
433
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
434
|
+
when "videorecording", "videocartridge", "videocassette", "videoreel"
|
435
|
+
carrier_type = CarrierType.where(name: 'other').first
|
436
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
437
|
+
when "electronic resource"
|
438
|
+
carrier_type = CarrierType.where(name: 'online_resource').first
|
439
|
+
when "chip cartridge", "computer optical disc cartridge", "magnetic disk", "magneto-optical disc", "optical disc", "remote", "tape cartridge", "tape cassette", "tape reel"
|
440
|
+
#carrier_type = CarrierType.where(name: 'other').first
|
441
|
+
when "motion picture", "film cartridge", "film cassette", "film reel"
|
442
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
443
|
+
when "sound recording", "cylinder", "roll", "sound cartridge", "sound cassette","sound-tape reel", "sound-track film", "wire recording"
|
444
|
+
content_type = ContentType.where(name: 'performed_music').first
|
445
|
+
when "sound disc"
|
446
|
+
content_type = ContentType.where(name: 'performed_music').first
|
447
|
+
carrier_type = CarrierType.where(name: 'audio_disc').first
|
448
|
+
when "nonprojected graphic", "chart", "collage", "drawing", "flash card", "painting", "photomechanical print", "photonegative", "photoprint", "picture", "print", "technical drawing", "projected graphic", "filmslip", "filmstrip cartridge", "filmstrip roll", "other filmstrip type ", "slide", "transparency"
|
449
|
+
content_type = ContentType.where(name: 'still_image').first
|
450
|
+
when "tactile material", "braille", "tactile, with no writing system"
|
451
|
+
content_type = ContentType.where(name: 'tactile_text').first
|
452
|
+
#TODO: Enju needs more specific mappings...
|
453
|
+
when "globe",
|
454
|
+
"celestial globe",
|
455
|
+
"earth moon globe",
|
456
|
+
"planetary or lunar globe",
|
457
|
+
"terrestrial globe",
|
458
|
+
"map",
|
459
|
+
"atlas",
|
460
|
+
"diagram",
|
461
|
+
"map",
|
462
|
+
"model",
|
463
|
+
"profile",
|
464
|
+
"remote-sensing image",
|
465
|
+
"section",
|
466
|
+
"view",
|
467
|
+
"microform",
|
468
|
+
"aperture card",
|
469
|
+
"microfiche",
|
470
|
+
"microfiche cassette",
|
471
|
+
"microfilm cartridge",
|
472
|
+
"microfilm cassette",
|
473
|
+
"microfilm reel",
|
474
|
+
"microopaque",
|
475
|
+
"combination",
|
476
|
+
"moon"
|
477
|
+
content_type = ContentType.where(name: 'other').first
|
478
|
+
end
|
479
|
+
when "marcform" # cf. http://www.loc.gov/standards/valuelist/marcform.html
|
480
|
+
case e.content
|
481
|
+
when "print", "large print"
|
482
|
+
carrier_type = CarrierType.where(name: 'volume').first
|
483
|
+
content_type = ContentType.where(name: 'text').first
|
484
|
+
when "electronic"
|
485
|
+
carrier_type = CarrierType.where(name: 'online_resource').first
|
486
|
+
when "braille"
|
487
|
+
content_type = ContentType.where(name: 'tactile_text').first
|
488
|
+
#TODO: Enju needs more specific mappings...
|
489
|
+
when "microfiche", "microfilm"
|
490
|
+
content_type = ContentType.where(name: 'other').first
|
491
|
+
end
|
492
|
+
end
|
493
|
+
end
|
490
494
|
doc.xpath('//mods:genre',NS).each do |e|
|
491
495
|
authority = e.attributes['authority'].try(:content)
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
+
case authority
|
497
|
+
when "rdacontent"
|
498
|
+
content_type = ContentType.where(name: e.content.gsub(/\W+/, "_")).first
|
499
|
+
content_type = ContentType.where(name: 'other').first unless content_type
|
496
500
|
end
|
497
501
|
end
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
502
|
+
type = doc.at('//mods:typeOfResource',NS).try(:content)
|
503
|
+
case type
|
504
|
+
when "text"
|
505
|
+
content_type = ContentType.where(name: 'text').first
|
506
|
+
when "sound recording"
|
507
|
+
content_type = ContentType.where(name: 'sounds').first
|
508
|
+
when"sound recording-musical"
|
509
|
+
content_type = ContentType.where(name: 'performed_music').first
|
510
|
+
when"sound recording-nonmusical"
|
511
|
+
content_type = ContentType.where(name: 'spoken_word').first
|
512
|
+
when "moving image"
|
513
|
+
content_type = ContentType.where(name: 'two_dimensional_moving_image').first
|
514
|
+
when "software, multimedia"
|
515
|
+
content_type = ContentType.where(name: 'other').first
|
516
|
+
when "cartographic "
|
517
|
+
content_type = ContentType.where(name: 'cartographic_image').first
|
518
|
+
when "notated music"
|
519
|
+
content_type = ContentType.where(name: 'notated_music').first
|
520
|
+
when "still image"
|
521
|
+
content_type = ContentType.where(name: 'still_image').first
|
522
|
+
when "three dimensional object"
|
523
|
+
content_type = ContentType.where(name: 'other').first
|
524
|
+
when "mixed material"
|
525
|
+
content_type = ContentType.where(name: 'other').first
|
526
|
+
end
|
523
527
|
{ :carrier_type => carrier_type, :content_type => content_type }
|
524
528
|
end
|
525
529
|
end
|