enju_loc 0.1.0.pre1 → 0.1.0.pre2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/models/loc_search.rb +3 -0
- data/app/views/loc_search/index.html.erb +5 -0
- data/lib/enju_loc/loc_search.rb +124 -134
- data/lib/enju_loc/version.rb +1 -1
- data/spec/cassette_library/LocSearch/_import_from_sru_response/should_import_e-resource_packaged_.yml +71 -0
- data/spec/cassette_library/LocSearch/_import_from_sru_response/should_import_notated_music.yml +106 -0
- data/spec/dummy/db/development.sqlite3 +0 -0
- data/spec/dummy/db/migrate/20140817155043_add_extent_of_text_to_manifestation.rb +5 -0
- data/spec/dummy/db/schema.rb +2 -3
- data/spec/dummy/db/test.sqlite3 +0 -0
- data/spec/dummy/log/development.log +844 -1455
- data/spec/dummy/log/test.log +52932 -19630
- data/spec/fixtures/carrier_types.yml +19 -31
- data/spec/fixtures/content_types.yml +72 -6
- data/spec/models/loc_search_spec.rb +21 -8
- metadata +26 -8
- data/spec/dummy/db/migrate/20121116033446_add_doi_to_manifestation.rb +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a96fc7e67fe0bd59c8d602abbc252a3e4640e9d4
|
4
|
+
data.tar.gz: cc88398cf5dc307bedbaba6ae79b2bc73569e84c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1676940930988bf9125d05633c08256cac1aa6529596c9d4235ff27e566e444024ee254287aa155f7e21a96062a39808b8517765fa110006f4efe73bbc02aed2
|
7
|
+
data.tar.gz: ff731a127de5d1c42b77d88b1e491d694ad97bf5a91ed0932ba96d4074bbd603cde6cb4413d7ab295cc907e7935f956f8c9907d8f84319dc54e6253a22ae4f9f
|
data/app/models/loc_search.rb
CHANGED
@@ -13,6 +13,9 @@ class LocSearch
|
|
13
13
|
def lccn
|
14
14
|
@node.xpath( './/mods:mods/mods:identifier[@type="lccn"]', MODS_NS ).first.try( :content )
|
15
15
|
end
|
16
|
+
def isbn
|
17
|
+
@node.xpath( './/mods:mods/mods:identifier[@type="isbn"]', MODS_NS ).first.try( :content )
|
18
|
+
end
|
16
19
|
def creator
|
17
20
|
statement_of_responsibility = @node.at('.//mods:note[@type="statement of responsibility"]',MODS_NS).try(:content)
|
18
21
|
if statement_of_responsibility
|
@@ -24,8 +24,13 @@
|
|
24
24
|
<td>
|
25
25
|
<strong><%= link_to_unless( book.lccn.blank?, book.title, "http://lccn.loc.gov/#{ book.lccn }" ) do book.title end %></strong><br />
|
26
26
|
<%=h book.creator -%><br />
|
27
|
+
<% if book.publisher.present? %>
|
27
28
|
<%=h book.publisher -%>,
|
29
|
+
<% end %>
|
28
30
|
<%=h book.pubyear -%>
|
31
|
+
<% if book.isbn.present? %>
|
32
|
+
(ISBN: <%=h book.isbn -%>)
|
33
|
+
<% end %>
|
29
34
|
</td>
|
30
35
|
</tr>
|
31
36
|
<% end -%>
|
data/lib/enju_loc/loc_search.rb
CHANGED
@@ -47,15 +47,15 @@ module EnjuLoc
|
|
47
47
|
}
|
48
48
|
end
|
49
49
|
|
50
|
-
creators =
|
50
|
+
creators = get_mods_creators(doc)
|
51
51
|
|
52
52
|
# title
|
53
|
-
titles =
|
53
|
+
titles = get_mods_titles(doc)
|
54
54
|
|
55
55
|
# date of publication
|
56
|
-
date =
|
56
|
+
date = get_mods_date_of_publication(doc)
|
57
57
|
|
58
|
-
language = Language.where(:iso_639_2 =>
|
58
|
+
language = Language.where(:iso_639_2 => get_mods_language(doc)).first
|
59
59
|
if language
|
60
60
|
language_id = language.id
|
61
61
|
else
|
@@ -67,21 +67,21 @@ module EnjuLoc
|
|
67
67
|
issn = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn"]',NS).try(:content).to_s)
|
68
68
|
issn_l = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn-l"]',NS).try(:content).to_s)
|
69
69
|
|
70
|
-
types =
|
70
|
+
types = get_mods_carrier_and_content_types( doc )
|
71
71
|
content_type = types[ :content_type ]
|
72
72
|
carrier_type = types[ :carrier_type ]
|
73
73
|
|
74
74
|
record_identifier = doc.at('//mods:recordInfo/mods:recordIdentifier',NS).try(:content)
|
75
75
|
description = doc.xpath('//mods:abstract',NS).collect(&:content).join("\n")
|
76
76
|
edition_string = doc.at('//mods:edition',NS).try(:content)
|
77
|
-
extent =
|
78
|
-
note =
|
79
|
-
frequency =
|
77
|
+
extent = get_mods_extent(doc)
|
78
|
+
note = get_mods_note(doc)
|
79
|
+
frequency = get_mods_frequency(doc)
|
80
80
|
issuance = doc.at('//mods:issuance',NS).try(:content)
|
81
81
|
is_serial = true if issuance == "serial"
|
82
|
-
statement_of_responsibility =
|
83
|
-
access_address =
|
84
|
-
publication_place =
|
82
|
+
statement_of_responsibility = get_mods_statement_of_responsibility(doc)
|
83
|
+
access_address = get_mods_access_address(doc)
|
84
|
+
publication_place = get_mods_publication_place(doc)
|
85
85
|
|
86
86
|
manifestation = nil
|
87
87
|
Agent.transaction do
|
@@ -147,8 +147,8 @@ module EnjuLoc
|
|
147
147
|
|
148
148
|
private
|
149
149
|
def create_subject_related_elements(doc, manifestation)
|
150
|
-
subjects =
|
151
|
-
classifications =
|
150
|
+
subjects = get_mods_subjects(doc)
|
151
|
+
classifications = get_mods_classifications(doc)
|
152
152
|
if defined?(EnjuSubject)
|
153
153
|
subject_heading_type = SubjectHeadingType.where(:name => 'lcsh').first_or_create
|
154
154
|
subjects.each do |term|
|
@@ -190,7 +190,7 @@ module EnjuLoc
|
|
190
190
|
end
|
191
191
|
|
192
192
|
def create_series_master(doc, manifestation)
|
193
|
-
titles =
|
193
|
+
titles = get_mods_titles(doc)
|
194
194
|
series_statement = SeriesStatement.new(
|
195
195
|
:original_title => titles[:original_title],
|
196
196
|
:title_alternative => titles[:title_alternative],
|
@@ -201,7 +201,7 @@ module EnjuLoc
|
|
201
201
|
end
|
202
202
|
end
|
203
203
|
|
204
|
-
def
|
204
|
+
def get_mods_titles(doc)
|
205
205
|
original_title = ""
|
206
206
|
title_alternatives = []
|
207
207
|
doc.xpath('//mods:mods/mods:titleInfo',NS).each do |e|
|
@@ -224,11 +224,11 @@ module EnjuLoc
|
|
224
224
|
{ :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
|
225
225
|
end
|
226
226
|
|
227
|
-
def
|
227
|
+
def get_mods_language(doc)
|
228
228
|
language = doc.at('//mods:language/mods:languageTerm[@authority="iso639-2b"]',NS).try(:content)
|
229
229
|
end
|
230
230
|
|
231
|
-
def
|
231
|
+
def get_mods_access_address(doc)
|
232
232
|
access_address = nil
|
233
233
|
url = doc.at('//mods:location/mods:url',NS)
|
234
234
|
if url
|
@@ -241,11 +241,11 @@ module EnjuLoc
|
|
241
241
|
access_address
|
242
242
|
end
|
243
243
|
|
244
|
-
def
|
244
|
+
def get_mods_publication_place(doc)
|
245
245
|
place = doc.at('//mods:originInfo/mods:place/mods:placeTerm[@type="text"]',NS).try(:content)
|
246
246
|
end
|
247
247
|
|
248
|
-
def
|
248
|
+
def get_mods_extent(doc)
|
249
249
|
extent = doc.at('//mods:extent',NS).try(:content)
|
250
250
|
value = {:start_page => nil, :end_page => nil, :height => nil}
|
251
251
|
if extent
|
@@ -263,17 +263,14 @@ module EnjuLoc
|
|
263
263
|
value
|
264
264
|
end
|
265
265
|
|
266
|
-
def
|
266
|
+
def get_mods_statement_of_responsibility(doc)
|
267
267
|
note = doc.at('//mods:note[@type="statement of responsibility"]',NS).try(:content)
|
268
|
-
if note
|
269
|
-
note
|
270
|
-
else
|
271
|
-
doc.xpath('/mods:mods/mods:name',NS).map do |n|
|
272
|
-
n.at('./mods:namePart',NS).try(:content)
|
273
|
-
end.join( "; " )
|
268
|
+
if note.blank?
|
269
|
+
note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
|
274
270
|
end
|
271
|
+
note
|
275
272
|
end
|
276
|
-
def
|
273
|
+
def get_mods_note(doc)
|
277
274
|
notes = []
|
278
275
|
doc.xpath('//mods:note',NS).each do |note|
|
279
276
|
type = note.attributes['type'].try(:content)
|
@@ -287,7 +284,7 @@ module EnjuLoc
|
|
287
284
|
notes.join( ";\n" )
|
288
285
|
end
|
289
286
|
end
|
290
|
-
def
|
287
|
+
def get_mods_date_of_publication(doc)
|
291
288
|
dates = []
|
292
289
|
doc.xpath('//mods:dateIssued',NS).each do |pub_date|
|
293
290
|
pub_date = pub_date.content.sub( /\A[cp]/, '' )
|
@@ -322,7 +319,7 @@ module EnjuLoc
|
|
322
319
|
"Triennial",
|
323
320
|
"Completely irregular",
|
324
321
|
]
|
325
|
-
def
|
322
|
+
def get_mods_frequency(doc)
|
326
323
|
frequencies = []
|
327
324
|
doc.xpath('//mods:frequency',NS).each do |freq|
|
328
325
|
frequency = freq.try(:content)
|
@@ -336,7 +333,7 @@ module EnjuLoc
|
|
336
333
|
frequencies.compact.first
|
337
334
|
end
|
338
335
|
|
339
|
-
def
|
336
|
+
def get_mods_creators(doc)
|
340
337
|
creators = []
|
341
338
|
doc.xpath('/mods:mods/mods:name',NS).each do |creator|
|
342
339
|
creators << {
|
@@ -347,7 +344,7 @@ module EnjuLoc
|
|
347
344
|
end
|
348
345
|
|
349
346
|
# TODO:only LCSH-based parsing...
|
350
|
-
def
|
347
|
+
def get_mods_subjects(doc)
|
351
348
|
subjects = []
|
352
349
|
doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
|
353
350
|
subject = []
|
@@ -375,7 +372,7 @@ module EnjuLoc
|
|
375
372
|
end
|
376
373
|
|
377
374
|
# TODO:support only DDC.
|
378
|
-
def
|
375
|
+
def get_mods_classifications(doc)
|
379
376
|
classifications = []
|
380
377
|
doc.xpath('//mods:classification[@authority="ddc"]',NS).each do|c|
|
381
378
|
ddc = c.content
|
@@ -386,7 +383,7 @@ module EnjuLoc
|
|
386
383
|
classifications.compact
|
387
384
|
end
|
388
385
|
|
389
|
-
def
|
386
|
+
def get_mods_carrier_and_content_types(doc)
|
390
387
|
carrier_type = content_type = nil
|
391
388
|
doc.xpath('//mods:form',NS).each do |e|
|
392
389
|
authority = e.attributes['authority'].try(:content)
|
@@ -394,86 +391,99 @@ module EnjuLoc
|
|
394
391
|
when "gmd"
|
395
392
|
case e.content
|
396
393
|
when "electronic resource"
|
397
|
-
carrier_type = CarrierType.where(:name => '
|
398
|
-
when "videorecording"
|
399
|
-
content_type = ContentType.where(:name => '
|
400
|
-
#TODO: Enju needs more specific mappings...
|
401
|
-
when "art original"
|
402
|
-
when "microscope slides"
|
403
|
-
when "art reproduction"
|
404
|
-
when "model"
|
405
|
-
when "chart"
|
406
|
-
when "motion picture"
|
407
|
-
when "diorama"
|
408
|
-
when "picture"
|
409
|
-
when "realia"
|
410
|
-
when "filmstrip"
|
411
|
-
when "slide"
|
412
|
-
when "flash card"
|
394
|
+
carrier_type = CarrierType.where(:name => 'online_resource').first
|
395
|
+
when "videorecording", "motion picture", "game"
|
396
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
413
397
|
when "sound recording"
|
414
|
-
|
415
|
-
when "
|
416
|
-
|
417
|
-
|
418
|
-
when "
|
419
|
-
|
420
|
-
|
398
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
399
|
+
when "graphic", "picture"
|
400
|
+
content_type = ContentType.where(:name => 'still_image').first
|
401
|
+
#TODO: Enju needs more specific mappings...
|
402
|
+
when "art original",
|
403
|
+
"microscope slides",
|
404
|
+
"art reproduction",
|
405
|
+
"model",
|
406
|
+
"chart",
|
407
|
+
"diorama",
|
408
|
+
"realia",
|
409
|
+
"filmstrip",
|
410
|
+
"slide",
|
411
|
+
"flash card",
|
412
|
+
"technical drawing",
|
413
|
+
"toy",
|
414
|
+
"kit",
|
415
|
+
"transparency",
|
416
|
+
"microform"
|
417
|
+
content_type = ContentType.where(:name => 'other').first
|
421
418
|
end
|
422
419
|
when "marcsmd" # cf.http://www.loc.gov/standards/valuelist/marcsmd.html
|
423
420
|
case e.content
|
424
|
-
when "text", "
|
425
|
-
carrier_type = CarrierType.where(:name => '
|
421
|
+
when "text", "large print", "regular print", "text in looseleaf binder"
|
422
|
+
carrier_type = CarrierType.where(:name => 'volume').first
|
426
423
|
content_type = ContentType.where(:name => 'text').first
|
427
|
-
when "
|
428
|
-
|
429
|
-
|
430
|
-
|
424
|
+
when "braille"
|
425
|
+
carrier_type = CarrierType.where(:name => 'volume').first
|
426
|
+
content_type = ContentType.where(:name => 'tactile_text').first
|
427
|
+
when "videodisc"
|
428
|
+
carrier_type = CarrierType.where(:name => 'videodisc').first
|
429
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
430
|
+
when "videorecording", "videocartridge", "videocassette", "videoreel"
|
431
|
+
carrier_type = CarrierType.where(:name => 'other').first
|
432
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
433
|
+
when "electronic resource"
|
434
|
+
carrier_type = CarrierType.where(:name => 'online_resource').first
|
435
|
+
when "chip cartridge", "computer optical disc cartridge", "magnetic disk", "magneto-optical disc", "optical disc", "remote", "tape cartridge", "tape cassette", "tape reel"
|
436
|
+
#carrier_type = CarrierType.where(:name => 'other').first
|
431
437
|
when "motion picture", "film cartridge", "film cassette", "film reel"
|
432
|
-
content_type = ContentType.where(:name => '
|
433
|
-
when "sound recording", "cylinder", "roll
|
434
|
-
|
435
|
-
|
436
|
-
|
438
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
439
|
+
when "sound recording", "cylinder", "roll", "sound cartridge", "sound cassette","sound-tape reel", "sound-track film", "wire recording"
|
440
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
441
|
+
when "sound disc"
|
442
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
443
|
+
carrier_type = CarrierType.where(:name => 'audio_disc').first
|
444
|
+
when "nonprojected graphic", "chart", "collage", "drawing", "flash card", "painting", "photomechanical print", "photonegative", "photoprint", "picture", "print", "technical drawing", "projected graphic", "filmslip", "filmstrip cartridge", "filmstrip roll", "other filmstrip type ", "slide", "transparency"
|
445
|
+
content_type = ContentType.where(:name => 'still_image').first
|
446
|
+
when "tactile material", "braille", "tactile, with no writing system"
|
447
|
+
content_type = ContentType.where(:name => 'tactile_text').first
|
437
448
|
#TODO: Enju needs more specific mappings...
|
438
|
-
when "globe"
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
when "moon"
|
464
|
-
when "tactile, with no writing system"
|
449
|
+
when "globe",
|
450
|
+
"celestial globe",
|
451
|
+
"earth moon globe",
|
452
|
+
"planetary or lunar globe",
|
453
|
+
"terrestrial globe",
|
454
|
+
"map",
|
455
|
+
"atlas",
|
456
|
+
"diagram",
|
457
|
+
"map",
|
458
|
+
"model",
|
459
|
+
"profile",
|
460
|
+
"remote-sensing image",
|
461
|
+
"section",
|
462
|
+
"view",
|
463
|
+
"microform",
|
464
|
+
"aperture card",
|
465
|
+
"microfiche",
|
466
|
+
"microfiche cassette",
|
467
|
+
"microfilm cartridge",
|
468
|
+
"microfilm cassette",
|
469
|
+
"microfilm reel",
|
470
|
+
"microopaque",
|
471
|
+
"combination",
|
472
|
+
"moon"
|
473
|
+
content_type = ContentType.where(:name => 'other').first
|
465
474
|
end
|
466
475
|
when "marcform" # cf. http://www.loc.gov/standards/valuelist/marcform.html
|
467
476
|
case e.content
|
468
477
|
when "print", "large print"
|
469
|
-
carrier_type = CarrierType.where(:name => '
|
478
|
+
carrier_type = CarrierType.where(:name => 'volume').first
|
470
479
|
content_type = ContentType.where(:name => 'text').first
|
471
480
|
when "electronic"
|
472
|
-
carrier_type = CarrierType.where(:name => '
|
473
|
-
#TODO: Enju needs more specific mappings...
|
474
|
-
when "microfiche"
|
481
|
+
carrier_type = CarrierType.where(:name => 'online_resource').first
|
475
482
|
when "braille"
|
476
|
-
|
483
|
+
content_type = ContentType.where(:name => 'tactile_text').first
|
484
|
+
#TODO: Enju needs more specific mappings...
|
485
|
+
when "microfiche", "microfilm"
|
486
|
+
content_type = ContentType.where(:name => 'other').first
|
477
487
|
end
|
478
488
|
end
|
479
489
|
end
|
@@ -481,54 +491,34 @@ module EnjuLoc
|
|
481
491
|
authority = e.attributes['authority'].try(:content)
|
482
492
|
case authority
|
483
493
|
when "rdacontent"
|
484
|
-
|
485
|
-
|
486
|
-
content_type = ContentType.where(:name => 'file').first
|
487
|
-
when "sounds", "spoken word"
|
488
|
-
content_type = ContentType.where(:name => 'audio').first
|
489
|
-
when "text"
|
490
|
-
content_type = ContentType.where(:name => 'text').first
|
491
|
-
when "two-dimensional moving image"
|
492
|
-
content_type = ContentType.where(:name => 'video').first
|
493
|
-
#TODO: Enju needs more specific mappings...
|
494
|
-
when "cartographic dataset"
|
495
|
-
when "cartographic image"
|
496
|
-
when "cartographic moving image"
|
497
|
-
when "cartographic tactile image"
|
498
|
-
when "cartographic tactile three-dimensional form"
|
499
|
-
when "cartographic three-dimensional form"
|
500
|
-
when "notated movement"
|
501
|
-
when "notated music"
|
502
|
-
when "performed music"
|
503
|
-
when "still image"
|
504
|
-
when "tactile image"
|
505
|
-
when "tactile notated music"
|
506
|
-
when "tactile notated movement"
|
507
|
-
when "tactile text"
|
508
|
-
when "tactile three-dimensional form"
|
509
|
-
when "three-dimensional form"
|
510
|
-
when "three-dimensional moving image"
|
511
|
-
when "other"
|
512
|
-
when "unspecified"
|
513
|
-
end
|
494
|
+
content_type = ContentType.where(:name => e.content.gsub(/\W+/, "_")).first
|
495
|
+
content_type = ContentType.where(:name => 'other').first unless content_type
|
514
496
|
end
|
515
497
|
end
|
516
498
|
type = doc.at('//mods:typeOfResource',NS).try(:content)
|
517
499
|
case type
|
518
500
|
when "text"
|
519
501
|
content_type = ContentType.where(:name => 'text').first
|
520
|
-
when "sound recording"
|
521
|
-
content_type = ContentType.where(:name => '
|
502
|
+
when "sound recording"
|
503
|
+
content_type = ContentType.where(:name => 'sounds').first
|
504
|
+
when"sound recording-musical"
|
505
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
506
|
+
when"sound recording-nonmusical"
|
507
|
+
content_type = ContentType.where(:name => 'spoken_word').first
|
522
508
|
when "moving image"
|
523
|
-
content_type = ContentType.where(:name => '
|
509
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
524
510
|
when "software, multimedia"
|
525
|
-
|
526
|
-
#TODO: Enju needs more specific mappings...
|
511
|
+
content_type = ContentType.where(:name => 'other').first
|
527
512
|
when "cartographic "
|
513
|
+
content_type = ContentType.where(:name => 'cartographic_image').first
|
528
514
|
when "notated music"
|
515
|
+
content_type = ContentType.where(:name => 'notated_music').first
|
529
516
|
when "still image"
|
517
|
+
content_type = ContentType.where(:name => 'still_image').first
|
530
518
|
when "three dimensional object"
|
519
|
+
content_type = ContentType.where(:name => 'other').first
|
531
520
|
when "mixed material"
|
521
|
+
content_type = ContentType.where(:name => 'other').first
|
532
522
|
end
|
533
523
|
{ :carrier_type => carrier_type, :content_type => content_type }
|
534
524
|
end
|
data/lib/enju_loc/version.rb
CHANGED