enju_loc 0.1.0.pre1 → 0.1.0.pre2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/models/loc_search.rb +3 -0
- data/app/views/loc_search/index.html.erb +5 -0
- data/lib/enju_loc/loc_search.rb +124 -134
- data/lib/enju_loc/version.rb +1 -1
- data/spec/cassette_library/LocSearch/_import_from_sru_response/should_import_e-resource_packaged_.yml +71 -0
- data/spec/cassette_library/LocSearch/_import_from_sru_response/should_import_notated_music.yml +106 -0
- data/spec/dummy/db/development.sqlite3 +0 -0
- data/spec/dummy/db/migrate/20140817155043_add_extent_of_text_to_manifestation.rb +5 -0
- data/spec/dummy/db/schema.rb +2 -3
- data/spec/dummy/db/test.sqlite3 +0 -0
- data/spec/dummy/log/development.log +844 -1455
- data/spec/dummy/log/test.log +52932 -19630
- data/spec/fixtures/carrier_types.yml +19 -31
- data/spec/fixtures/content_types.yml +72 -6
- data/spec/models/loc_search_spec.rb +21 -8
- metadata +26 -8
- data/spec/dummy/db/migrate/20121116033446_add_doi_to_manifestation.rb +0 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a96fc7e67fe0bd59c8d602abbc252a3e4640e9d4
|
4
|
+
data.tar.gz: cc88398cf5dc307bedbaba6ae79b2bc73569e84c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1676940930988bf9125d05633c08256cac1aa6529596c9d4235ff27e566e444024ee254287aa155f7e21a96062a39808b8517765fa110006f4efe73bbc02aed2
|
7
|
+
data.tar.gz: ff731a127de5d1c42b77d88b1e491d694ad97bf5a91ed0932ba96d4074bbd603cde6cb4413d7ab295cc907e7935f956f8c9907d8f84319dc54e6253a22ae4f9f
|
data/app/models/loc_search.rb
CHANGED
@@ -13,6 +13,9 @@ class LocSearch
|
|
13
13
|
def lccn
|
14
14
|
@node.xpath( './/mods:mods/mods:identifier[@type="lccn"]', MODS_NS ).first.try( :content )
|
15
15
|
end
|
16
|
+
def isbn
|
17
|
+
@node.xpath( './/mods:mods/mods:identifier[@type="isbn"]', MODS_NS ).first.try( :content )
|
18
|
+
end
|
16
19
|
def creator
|
17
20
|
statement_of_responsibility = @node.at('.//mods:note[@type="statement of responsibility"]',MODS_NS).try(:content)
|
18
21
|
if statement_of_responsibility
|
@@ -24,8 +24,13 @@
|
|
24
24
|
<td>
|
25
25
|
<strong><%= link_to_unless( book.lccn.blank?, book.title, "http://lccn.loc.gov/#{ book.lccn }" ) do book.title end %></strong><br />
|
26
26
|
<%=h book.creator -%><br />
|
27
|
+
<% if book.publisher.present? %>
|
27
28
|
<%=h book.publisher -%>,
|
29
|
+
<% end %>
|
28
30
|
<%=h book.pubyear -%>
|
31
|
+
<% if book.isbn.present? %>
|
32
|
+
(ISBN: <%=h book.isbn -%>)
|
33
|
+
<% end %>
|
29
34
|
</td>
|
30
35
|
</tr>
|
31
36
|
<% end -%>
|
data/lib/enju_loc/loc_search.rb
CHANGED
@@ -47,15 +47,15 @@ module EnjuLoc
|
|
47
47
|
}
|
48
48
|
end
|
49
49
|
|
50
|
-
creators =
|
50
|
+
creators = get_mods_creators(doc)
|
51
51
|
|
52
52
|
# title
|
53
|
-
titles =
|
53
|
+
titles = get_mods_titles(doc)
|
54
54
|
|
55
55
|
# date of publication
|
56
|
-
date =
|
56
|
+
date = get_mods_date_of_publication(doc)
|
57
57
|
|
58
|
-
language = Language.where(:iso_639_2 =>
|
58
|
+
language = Language.where(:iso_639_2 => get_mods_language(doc)).first
|
59
59
|
if language
|
60
60
|
language_id = language.id
|
61
61
|
else
|
@@ -67,21 +67,21 @@ module EnjuLoc
|
|
67
67
|
issn = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn"]',NS).try(:content).to_s)
|
68
68
|
issn_l = StdNum::ISSN.normalize(doc.at('/mods:mods/mods:identifier[@type="issn-l"]',NS).try(:content).to_s)
|
69
69
|
|
70
|
-
types =
|
70
|
+
types = get_mods_carrier_and_content_types( doc )
|
71
71
|
content_type = types[ :content_type ]
|
72
72
|
carrier_type = types[ :carrier_type ]
|
73
73
|
|
74
74
|
record_identifier = doc.at('//mods:recordInfo/mods:recordIdentifier',NS).try(:content)
|
75
75
|
description = doc.xpath('//mods:abstract',NS).collect(&:content).join("\n")
|
76
76
|
edition_string = doc.at('//mods:edition',NS).try(:content)
|
77
|
-
extent =
|
78
|
-
note =
|
79
|
-
frequency =
|
77
|
+
extent = get_mods_extent(doc)
|
78
|
+
note = get_mods_note(doc)
|
79
|
+
frequency = get_mods_frequency(doc)
|
80
80
|
issuance = doc.at('//mods:issuance',NS).try(:content)
|
81
81
|
is_serial = true if issuance == "serial"
|
82
|
-
statement_of_responsibility =
|
83
|
-
access_address =
|
84
|
-
publication_place =
|
82
|
+
statement_of_responsibility = get_mods_statement_of_responsibility(doc)
|
83
|
+
access_address = get_mods_access_address(doc)
|
84
|
+
publication_place = get_mods_publication_place(doc)
|
85
85
|
|
86
86
|
manifestation = nil
|
87
87
|
Agent.transaction do
|
@@ -147,8 +147,8 @@ module EnjuLoc
|
|
147
147
|
|
148
148
|
private
|
149
149
|
def create_subject_related_elements(doc, manifestation)
|
150
|
-
subjects =
|
151
|
-
classifications =
|
150
|
+
subjects = get_mods_subjects(doc)
|
151
|
+
classifications = get_mods_classifications(doc)
|
152
152
|
if defined?(EnjuSubject)
|
153
153
|
subject_heading_type = SubjectHeadingType.where(:name => 'lcsh').first_or_create
|
154
154
|
subjects.each do |term|
|
@@ -190,7 +190,7 @@ module EnjuLoc
|
|
190
190
|
end
|
191
191
|
|
192
192
|
def create_series_master(doc, manifestation)
|
193
|
-
titles =
|
193
|
+
titles = get_mods_titles(doc)
|
194
194
|
series_statement = SeriesStatement.new(
|
195
195
|
:original_title => titles[:original_title],
|
196
196
|
:title_alternative => titles[:title_alternative],
|
@@ -201,7 +201,7 @@ module EnjuLoc
|
|
201
201
|
end
|
202
202
|
end
|
203
203
|
|
204
|
-
def
|
204
|
+
def get_mods_titles(doc)
|
205
205
|
original_title = ""
|
206
206
|
title_alternatives = []
|
207
207
|
doc.xpath('//mods:mods/mods:titleInfo',NS).each do |e|
|
@@ -224,11 +224,11 @@ module EnjuLoc
|
|
224
224
|
{ :original_title => original_title, :title_alternative => title_alternatives.join( " ; " ) }
|
225
225
|
end
|
226
226
|
|
227
|
-
def
|
227
|
+
def get_mods_language(doc)
|
228
228
|
language = doc.at('//mods:language/mods:languageTerm[@authority="iso639-2b"]',NS).try(:content)
|
229
229
|
end
|
230
230
|
|
231
|
-
def
|
231
|
+
def get_mods_access_address(doc)
|
232
232
|
access_address = nil
|
233
233
|
url = doc.at('//mods:location/mods:url',NS)
|
234
234
|
if url
|
@@ -241,11 +241,11 @@ module EnjuLoc
|
|
241
241
|
access_address
|
242
242
|
end
|
243
243
|
|
244
|
-
def
|
244
|
+
def get_mods_publication_place(doc)
|
245
245
|
place = doc.at('//mods:originInfo/mods:place/mods:placeTerm[@type="text"]',NS).try(:content)
|
246
246
|
end
|
247
247
|
|
248
|
-
def
|
248
|
+
def get_mods_extent(doc)
|
249
249
|
extent = doc.at('//mods:extent',NS).try(:content)
|
250
250
|
value = {:start_page => nil, :end_page => nil, :height => nil}
|
251
251
|
if extent
|
@@ -263,17 +263,14 @@ module EnjuLoc
|
|
263
263
|
value
|
264
264
|
end
|
265
265
|
|
266
|
-
def
|
266
|
+
def get_mods_statement_of_responsibility(doc)
|
267
267
|
note = doc.at('//mods:note[@type="statement of responsibility"]',NS).try(:content)
|
268
|
-
if note
|
269
|
-
note
|
270
|
-
else
|
271
|
-
doc.xpath('/mods:mods/mods:name',NS).map do |n|
|
272
|
-
n.at('./mods:namePart',NS).try(:content)
|
273
|
-
end.join( "; " )
|
268
|
+
if note.blank?
|
269
|
+
note = get_mods_creators(doc).map{|e| e[:full_name] }.join( " ; " )
|
274
270
|
end
|
271
|
+
note
|
275
272
|
end
|
276
|
-
def
|
273
|
+
def get_mods_note(doc)
|
277
274
|
notes = []
|
278
275
|
doc.xpath('//mods:note',NS).each do |note|
|
279
276
|
type = note.attributes['type'].try(:content)
|
@@ -287,7 +284,7 @@ module EnjuLoc
|
|
287
284
|
notes.join( ";\n" )
|
288
285
|
end
|
289
286
|
end
|
290
|
-
def
|
287
|
+
def get_mods_date_of_publication(doc)
|
291
288
|
dates = []
|
292
289
|
doc.xpath('//mods:dateIssued',NS).each do |pub_date|
|
293
290
|
pub_date = pub_date.content.sub( /\A[cp]/, '' )
|
@@ -322,7 +319,7 @@ module EnjuLoc
|
|
322
319
|
"Triennial",
|
323
320
|
"Completely irregular",
|
324
321
|
]
|
325
|
-
def
|
322
|
+
def get_mods_frequency(doc)
|
326
323
|
frequencies = []
|
327
324
|
doc.xpath('//mods:frequency',NS).each do |freq|
|
328
325
|
frequency = freq.try(:content)
|
@@ -336,7 +333,7 @@ module EnjuLoc
|
|
336
333
|
frequencies.compact.first
|
337
334
|
end
|
338
335
|
|
339
|
-
def
|
336
|
+
def get_mods_creators(doc)
|
340
337
|
creators = []
|
341
338
|
doc.xpath('/mods:mods/mods:name',NS).each do |creator|
|
342
339
|
creators << {
|
@@ -347,7 +344,7 @@ module EnjuLoc
|
|
347
344
|
end
|
348
345
|
|
349
346
|
# TODO:only LCSH-based parsing...
|
350
|
-
def
|
347
|
+
def get_mods_subjects(doc)
|
351
348
|
subjects = []
|
352
349
|
doc.xpath('//mods:subject[@authority="lcsh"]',NS).each do |s|
|
353
350
|
subject = []
|
@@ -375,7 +372,7 @@ module EnjuLoc
|
|
375
372
|
end
|
376
373
|
|
377
374
|
# TODO:support only DDC.
|
378
|
-
def
|
375
|
+
def get_mods_classifications(doc)
|
379
376
|
classifications = []
|
380
377
|
doc.xpath('//mods:classification[@authority="ddc"]',NS).each do|c|
|
381
378
|
ddc = c.content
|
@@ -386,7 +383,7 @@ module EnjuLoc
|
|
386
383
|
classifications.compact
|
387
384
|
end
|
388
385
|
|
389
|
-
def
|
386
|
+
def get_mods_carrier_and_content_types(doc)
|
390
387
|
carrier_type = content_type = nil
|
391
388
|
doc.xpath('//mods:form',NS).each do |e|
|
392
389
|
authority = e.attributes['authority'].try(:content)
|
@@ -394,86 +391,99 @@ module EnjuLoc
|
|
394
391
|
when "gmd"
|
395
392
|
case e.content
|
396
393
|
when "electronic resource"
|
397
|
-
carrier_type = CarrierType.where(:name => '
|
398
|
-
when "videorecording"
|
399
|
-
content_type = ContentType.where(:name => '
|
400
|
-
#TODO: Enju needs more specific mappings...
|
401
|
-
when "art original"
|
402
|
-
when "microscope slides"
|
403
|
-
when "art reproduction"
|
404
|
-
when "model"
|
405
|
-
when "chart"
|
406
|
-
when "motion picture"
|
407
|
-
when "diorama"
|
408
|
-
when "picture"
|
409
|
-
when "realia"
|
410
|
-
when "filmstrip"
|
411
|
-
when "slide"
|
412
|
-
when "flash card"
|
394
|
+
carrier_type = CarrierType.where(:name => 'online_resource').first
|
395
|
+
when "videorecording", "motion picture", "game"
|
396
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
413
397
|
when "sound recording"
|
414
|
-
|
415
|
-
when "
|
416
|
-
|
417
|
-
|
418
|
-
when "
|
419
|
-
|
420
|
-
|
398
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
399
|
+
when "graphic", "picture"
|
400
|
+
content_type = ContentType.where(:name => 'still_image').first
|
401
|
+
#TODO: Enju needs more specific mappings...
|
402
|
+
when "art original",
|
403
|
+
"microscope slides",
|
404
|
+
"art reproduction",
|
405
|
+
"model",
|
406
|
+
"chart",
|
407
|
+
"diorama",
|
408
|
+
"realia",
|
409
|
+
"filmstrip",
|
410
|
+
"slide",
|
411
|
+
"flash card",
|
412
|
+
"technical drawing",
|
413
|
+
"toy",
|
414
|
+
"kit",
|
415
|
+
"transparency",
|
416
|
+
"microform"
|
417
|
+
content_type = ContentType.where(:name => 'other').first
|
421
418
|
end
|
422
419
|
when "marcsmd" # cf.http://www.loc.gov/standards/valuelist/marcsmd.html
|
423
420
|
case e.content
|
424
|
-
when "text", "
|
425
|
-
carrier_type = CarrierType.where(:name => '
|
421
|
+
when "text", "large print", "regular print", "text in looseleaf binder"
|
422
|
+
carrier_type = CarrierType.where(:name => 'volume').first
|
426
423
|
content_type = ContentType.where(:name => 'text').first
|
427
|
-
when "
|
428
|
-
|
429
|
-
|
430
|
-
|
424
|
+
when "braille"
|
425
|
+
carrier_type = CarrierType.where(:name => 'volume').first
|
426
|
+
content_type = ContentType.where(:name => 'tactile_text').first
|
427
|
+
when "videodisc"
|
428
|
+
carrier_type = CarrierType.where(:name => 'videodisc').first
|
429
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
430
|
+
when "videorecording", "videocartridge", "videocassette", "videoreel"
|
431
|
+
carrier_type = CarrierType.where(:name => 'other').first
|
432
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
433
|
+
when "electronic resource"
|
434
|
+
carrier_type = CarrierType.where(:name => 'online_resource').first
|
435
|
+
when "chip cartridge", "computer optical disc cartridge", "magnetic disk", "magneto-optical disc", "optical disc", "remote", "tape cartridge", "tape cassette", "tape reel"
|
436
|
+
#carrier_type = CarrierType.where(:name => 'other').first
|
431
437
|
when "motion picture", "film cartridge", "film cassette", "film reel"
|
432
|
-
content_type = ContentType.where(:name => '
|
433
|
-
when "sound recording", "cylinder", "roll
|
434
|
-
|
435
|
-
|
436
|
-
|
438
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
439
|
+
when "sound recording", "cylinder", "roll", "sound cartridge", "sound cassette","sound-tape reel", "sound-track film", "wire recording"
|
440
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
441
|
+
when "sound disc"
|
442
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
443
|
+
carrier_type = CarrierType.where(:name => 'audio_disc').first
|
444
|
+
when "nonprojected graphic", "chart", "collage", "drawing", "flash card", "painting", "photomechanical print", "photonegative", "photoprint", "picture", "print", "technical drawing", "projected graphic", "filmslip", "filmstrip cartridge", "filmstrip roll", "other filmstrip type ", "slide", "transparency"
|
445
|
+
content_type = ContentType.where(:name => 'still_image').first
|
446
|
+
when "tactile material", "braille", "tactile, with no writing system"
|
447
|
+
content_type = ContentType.where(:name => 'tactile_text').first
|
437
448
|
#TODO: Enju needs more specific mappings...
|
438
|
-
when "globe"
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
when "moon"
|
464
|
-
when "tactile, with no writing system"
|
449
|
+
when "globe",
|
450
|
+
"celestial globe",
|
451
|
+
"earth moon globe",
|
452
|
+
"planetary or lunar globe",
|
453
|
+
"terrestrial globe",
|
454
|
+
"map",
|
455
|
+
"atlas",
|
456
|
+
"diagram",
|
457
|
+
"map",
|
458
|
+
"model",
|
459
|
+
"profile",
|
460
|
+
"remote-sensing image",
|
461
|
+
"section",
|
462
|
+
"view",
|
463
|
+
"microform",
|
464
|
+
"aperture card",
|
465
|
+
"microfiche",
|
466
|
+
"microfiche cassette",
|
467
|
+
"microfilm cartridge",
|
468
|
+
"microfilm cassette",
|
469
|
+
"microfilm reel",
|
470
|
+
"microopaque",
|
471
|
+
"combination",
|
472
|
+
"moon"
|
473
|
+
content_type = ContentType.where(:name => 'other').first
|
465
474
|
end
|
466
475
|
when "marcform" # cf. http://www.loc.gov/standards/valuelist/marcform.html
|
467
476
|
case e.content
|
468
477
|
when "print", "large print"
|
469
|
-
carrier_type = CarrierType.where(:name => '
|
478
|
+
carrier_type = CarrierType.where(:name => 'volume').first
|
470
479
|
content_type = ContentType.where(:name => 'text').first
|
471
480
|
when "electronic"
|
472
|
-
carrier_type = CarrierType.where(:name => '
|
473
|
-
#TODO: Enju needs more specific mappings...
|
474
|
-
when "microfiche"
|
481
|
+
carrier_type = CarrierType.where(:name => 'online_resource').first
|
475
482
|
when "braille"
|
476
|
-
|
483
|
+
content_type = ContentType.where(:name => 'tactile_text').first
|
484
|
+
#TODO: Enju needs more specific mappings...
|
485
|
+
when "microfiche", "microfilm"
|
486
|
+
content_type = ContentType.where(:name => 'other').first
|
477
487
|
end
|
478
488
|
end
|
479
489
|
end
|
@@ -481,54 +491,34 @@ module EnjuLoc
|
|
481
491
|
authority = e.attributes['authority'].try(:content)
|
482
492
|
case authority
|
483
493
|
when "rdacontent"
|
484
|
-
|
485
|
-
|
486
|
-
content_type = ContentType.where(:name => 'file').first
|
487
|
-
when "sounds", "spoken word"
|
488
|
-
content_type = ContentType.where(:name => 'audio').first
|
489
|
-
when "text"
|
490
|
-
content_type = ContentType.where(:name => 'text').first
|
491
|
-
when "two-dimensional moving image"
|
492
|
-
content_type = ContentType.where(:name => 'video').first
|
493
|
-
#TODO: Enju needs more specific mappings...
|
494
|
-
when "cartographic dataset"
|
495
|
-
when "cartographic image"
|
496
|
-
when "cartographic moving image"
|
497
|
-
when "cartographic tactile image"
|
498
|
-
when "cartographic tactile three-dimensional form"
|
499
|
-
when "cartographic three-dimensional form"
|
500
|
-
when "notated movement"
|
501
|
-
when "notated music"
|
502
|
-
when "performed music"
|
503
|
-
when "still image"
|
504
|
-
when "tactile image"
|
505
|
-
when "tactile notated music"
|
506
|
-
when "tactile notated movement"
|
507
|
-
when "tactile text"
|
508
|
-
when "tactile three-dimensional form"
|
509
|
-
when "three-dimensional form"
|
510
|
-
when "three-dimensional moving image"
|
511
|
-
when "other"
|
512
|
-
when "unspecified"
|
513
|
-
end
|
494
|
+
content_type = ContentType.where(:name => e.content.gsub(/\W+/, "_")).first
|
495
|
+
content_type = ContentType.where(:name => 'other').first unless content_type
|
514
496
|
end
|
515
497
|
end
|
516
498
|
type = doc.at('//mods:typeOfResource',NS).try(:content)
|
517
499
|
case type
|
518
500
|
when "text"
|
519
501
|
content_type = ContentType.where(:name => 'text').first
|
520
|
-
when "sound recording"
|
521
|
-
content_type = ContentType.where(:name => '
|
502
|
+
when "sound recording"
|
503
|
+
content_type = ContentType.where(:name => 'sounds').first
|
504
|
+
when"sound recording-musical"
|
505
|
+
content_type = ContentType.where(:name => 'performed_music').first
|
506
|
+
when"sound recording-nonmusical"
|
507
|
+
content_type = ContentType.where(:name => 'spoken_word').first
|
522
508
|
when "moving image"
|
523
|
-
content_type = ContentType.where(:name => '
|
509
|
+
content_type = ContentType.where(:name => 'two_dimensional_moving_image').first
|
524
510
|
when "software, multimedia"
|
525
|
-
|
526
|
-
#TODO: Enju needs more specific mappings...
|
511
|
+
content_type = ContentType.where(:name => 'other').first
|
527
512
|
when "cartographic "
|
513
|
+
content_type = ContentType.where(:name => 'cartographic_image').first
|
528
514
|
when "notated music"
|
515
|
+
content_type = ContentType.where(:name => 'notated_music').first
|
529
516
|
when "still image"
|
517
|
+
content_type = ContentType.where(:name => 'still_image').first
|
530
518
|
when "three dimensional object"
|
519
|
+
content_type = ContentType.where(:name => 'other').first
|
531
520
|
when "mixed material"
|
521
|
+
content_type = ContentType.where(:name => 'other').first
|
532
522
|
end
|
533
523
|
{ :carrier_type => carrier_type, :content_type => content_type }
|
534
524
|
end
|
data/lib/enju_loc/version.rb
CHANGED