stanford-mods 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +3 -1
- data/lib/stanford-mods/searchworks.rb +432 -1
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/searchworks_gdor_spec.rb +634 -0
- metadata +7 -5
data/.travis.yml
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'stanford-mods/searchworks_languages'
|
3
|
-
|
3
|
+
require 'logger'
|
4
4
|
# SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object
|
5
5
|
module Stanford
|
6
6
|
module Mods
|
@@ -182,6 +182,437 @@ module Stanford
|
|
182
182
|
result
|
183
183
|
end
|
184
184
|
|
185
|
+
# Values are the contents of:
|
186
|
+
# mods/genre
|
187
|
+
# mods/subject/topic
|
188
|
+
# @return [Array<String>] values for the topic_search Solr field for this document or nil if none
|
189
|
+
def topic_search
|
190
|
+
@topic_search ||= begin
|
191
|
+
vals = self.term_values(:genre) || []
|
192
|
+
vals.concat(subject_topics) if subject_topics
|
193
|
+
vals.empty? ? nil : vals
|
194
|
+
end
|
195
|
+
end
|
196
|
+
def place
|
197
|
+
vals = self.term_values([:origin_info,:place,:placeTerm])
|
198
|
+
vals
|
199
|
+
end
|
200
|
+
def main_author_w_date_test
|
201
|
+
result = nil
|
202
|
+
first_wo_role = nil
|
203
|
+
self.plain_name.each { |n|
|
204
|
+
if n.role.size == 0
|
205
|
+
first_wo_role ||= n
|
206
|
+
end
|
207
|
+
n.role.each { |r|
|
208
|
+
if r.authority.include?('marcrelator') &&
|
209
|
+
(r.value.include?('Creator') || r.value.include?('Author'))
|
210
|
+
result ||= n.display_value_w_date
|
211
|
+
end
|
212
|
+
}
|
213
|
+
}
|
214
|
+
if !result && first_wo_role
|
215
|
+
result = first_wo_role.display_value_w_date
|
216
|
+
end
|
217
|
+
result
|
218
|
+
end
|
219
|
+
#remove trailing commas
|
220
|
+
def sw_full_title_without_commas
|
221
|
+
toret = self.sw_full_title
|
222
|
+
if toret
|
223
|
+
toret = toret.gsub(/,$/, '')
|
224
|
+
end
|
225
|
+
toret
|
226
|
+
end
|
227
|
+
|
228
|
+
def sw_logger
|
229
|
+
@logger ||= Logger.new(STDOUT)
|
230
|
+
end
|
231
|
+
# Values are the contents of:
|
232
|
+
# subject/geographic
|
233
|
+
# subject/hierarchicalGeographic
|
234
|
+
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
235
|
+
# @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
|
236
|
+
def geographic_search
|
237
|
+
@geographic_search ||= begin
|
238
|
+
result = self.sw_geographic_search
|
239
|
+
|
240
|
+
# TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger
|
241
|
+
# print a message for any unrecognized encodings
|
242
|
+
xvals = self.subject.geographicCode.translated_value
|
243
|
+
codes = self.term_values([:subject, :geographicCode])
|
244
|
+
if codes && codes.size > xvals.size
|
245
|
+
self.subject.geographicCode.each { |n|
|
246
|
+
if n.authority != 'marcgac' && n.authority != 'marccountry'
|
247
|
+
sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}")
|
248
|
+
end
|
249
|
+
}
|
250
|
+
end
|
251
|
+
|
252
|
+
# FIXME: stanford-mods should be returning [], not nil ...
|
253
|
+
return nil if !result || result.empty?
|
254
|
+
result
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# Values are the contents of:
|
259
|
+
# subject/name
|
260
|
+
# subject/occupation - no subelements
|
261
|
+
# subject/titleInfo
|
262
|
+
# @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
|
263
|
+
def subject_other_search
|
264
|
+
@subject_other_search ||= begin
|
265
|
+
vals = subject_occupations ? Array.new(subject_occupations) : []
|
266
|
+
vals.concat(subject_names) if subject_names
|
267
|
+
vals.concat(subject_titles) if subject_titles
|
268
|
+
vals.empty? ? nil : vals
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
# Values are the contents of:
|
273
|
+
# subject/temporal
|
274
|
+
# subject/genre
|
275
|
+
# @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
|
276
|
+
def subject_other_subvy_search
|
277
|
+
@subject_other_subvy_search ||= begin
|
278
|
+
vals = subject_temporal ? Array.new(subject_temporal) : []
|
279
|
+
gvals = self.term_values([:subject, :genre])
|
280
|
+
vals.concat(gvals) if gvals
|
281
|
+
|
282
|
+
# print a message for any temporal encodings
|
283
|
+
self.subject.temporal.each { |n|
|
284
|
+
sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty?
|
285
|
+
}
|
286
|
+
|
287
|
+
vals.empty? ? nil : vals
|
288
|
+
end
|
289
|
+
end
|
290
|
+
# @return [Array<String>] values for the pub_date_group_facet
|
291
|
+
def pub_date_groups year
|
292
|
+
if not year
|
293
|
+
return nil
|
294
|
+
end
|
295
|
+
year=year.to_i
|
296
|
+
current_year=Time.new.year.to_i
|
297
|
+
result = []
|
298
|
+
if year >= current_year - 1
|
299
|
+
result << "This year"
|
300
|
+
else
|
301
|
+
if year >= current_year - 3
|
302
|
+
result << "Last 3 years"
|
303
|
+
else
|
304
|
+
if year >= current_year - 10
|
305
|
+
result << "Last 10 years"
|
306
|
+
else
|
307
|
+
if year >= current_year - 50
|
308
|
+
result << "Last 50 years"
|
309
|
+
else
|
310
|
+
result << "More than 50 years ago"
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
# select one or more format values from the controlled vocabulary here:
|
318
|
+
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
319
|
+
# based on the dor_content_type
|
320
|
+
# @return [String] value in the SearchWorks controlled vocabulary
|
321
|
+
def format
|
322
|
+
val=[]
|
323
|
+
formats=self.term_values(:typeOfResource)
|
324
|
+
if formats
|
325
|
+
formats.each do |form|
|
326
|
+
case form
|
327
|
+
when 'still image'
|
328
|
+
val << 'Image'
|
329
|
+
when 'mixed material'
|
330
|
+
val << 'Manuscript/Archive'
|
331
|
+
when 'moving image'
|
332
|
+
val << 'Video'
|
333
|
+
when 'three dimensional object'
|
334
|
+
val <<'Other'
|
335
|
+
when 'cartographic'
|
336
|
+
val << 'Map/Globe'
|
337
|
+
when 'sound recording-musical'
|
338
|
+
val << 'Music-Recording'
|
339
|
+
when 'sound recording-nonmusical'
|
340
|
+
val << 'Sound Recording'
|
341
|
+
when 'software, multimedia'
|
342
|
+
val << 'Computer File'
|
343
|
+
else
|
344
|
+
sw_logger.warn "#{druid} has an unknown typeOfResource #{form}"
|
345
|
+
end
|
346
|
+
end
|
347
|
+
end
|
348
|
+
if val.length>0
|
349
|
+
return val.uniq
|
350
|
+
end
|
351
|
+
if not self.typeOfResource or self.typeOfResource.length == 0
|
352
|
+
sw_logger.warn "#{druid} has no valid typeOfResource"
|
353
|
+
[]
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# Values are the contents of:
|
358
|
+
# all subject subelements except subject/cartographic plus genre top level element
|
359
|
+
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
360
|
+
def subject_all_search
|
361
|
+
vals = topic_search ? Array.new(topic_search) : []
|
362
|
+
vals.concat(geographic_search) if geographic_search
|
363
|
+
vals.concat(subject_other_search) if subject_other_search
|
364
|
+
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
365
|
+
vals.empty? ? nil : vals
|
366
|
+
end
|
367
|
+
def pub_date_display
|
368
|
+
if pub_dates
|
369
|
+
pub_dates.first
|
370
|
+
else
|
371
|
+
nil
|
372
|
+
end
|
373
|
+
end
|
374
|
+
#get the dates from dateIssued, and dateCreated merged into 1 array.
|
375
|
+
# @return [Array<String>] values for the issue_date_display Solr field for this document or nil if none
|
376
|
+
def pub_dates
|
377
|
+
vals = self.term_values([:origin_info,:dateIssued])
|
378
|
+
if vals
|
379
|
+
vals = vals.concat self.term_values([:origin_info,:dateCreated]) unless not self.term_values([:origin_info,:dateCreated])
|
380
|
+
else
|
381
|
+
vals = self.term_values([:origin_info,:dateCreated])
|
382
|
+
end
|
383
|
+
vals and vals.empty? ? nil : vals
|
384
|
+
end
|
385
|
+
def is_number?(object)
|
386
|
+
true if Integer(object) rescue false
|
387
|
+
end
|
388
|
+
def is_date?(object)
|
389
|
+
true if Date.parse(object) rescue false
|
390
|
+
end
|
391
|
+
|
392
|
+
# Get the publish year from mods
|
393
|
+
#@return [String] 4 character year or nil if no valid date was found
|
394
|
+
def pub_year
|
395
|
+
#use the cached year if there is one
|
396
|
+
if @pub_year
|
397
|
+
if @pub_year == ''
|
398
|
+
return nil
|
399
|
+
end
|
400
|
+
return @pub_year
|
401
|
+
end
|
402
|
+
dates=pub_dates
|
403
|
+
if dates
|
404
|
+
year=[]
|
405
|
+
pruned_dates=[]
|
406
|
+
dates.each do |f_date|
|
407
|
+
#remove ? and []
|
408
|
+
pruned_dates << f_date.gsub('?','').gsub('[','').gsub(']','')
|
409
|
+
end
|
410
|
+
#try to find a date starting with the most normal date formats and progressing to more wonky ones
|
411
|
+
@pub_year=get_plain_four_digit_year pruned_dates
|
412
|
+
return @pub_year if @pub_year
|
413
|
+
@pub_year=get_double_digit_century pruned_dates
|
414
|
+
return @pub_year if @pub_year
|
415
|
+
@pub_year=get_three_digit_year pruned_dates
|
416
|
+
return @pub_year if @pub_year
|
417
|
+
@pub_year=get_single_digit_century pruned_dates
|
418
|
+
return @pub_year if @pub_year
|
419
|
+
end
|
420
|
+
@pub_year=''
|
421
|
+
sw_logger.info("#{druid} no valid pub date found in '#{dates.to_s}'")
|
422
|
+
return nil
|
423
|
+
end
|
424
|
+
#creates a date suitable for sorting. Guarnteed to be 4 digits or nil
|
425
|
+
def pub_date_sort
|
426
|
+
pd=nil
|
427
|
+
if pub_date
|
428
|
+
pd=pub_date
|
429
|
+
if pd.length == 3
|
430
|
+
pd='0'+pd
|
431
|
+
end
|
432
|
+
pd=pd.gsub('--','00')
|
433
|
+
end
|
434
|
+
raise "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd and pd.length !=4
|
435
|
+
pd
|
436
|
+
end
|
437
|
+
#The year the object was published, , filtered based on max_pub_date and min_pub_date from the config file
|
438
|
+
#@return [String] 4 character year or nil
|
439
|
+
def pub_date
|
440
|
+
val=pub_year
|
441
|
+
if val
|
442
|
+
return val
|
443
|
+
end
|
444
|
+
nil
|
445
|
+
end
|
446
|
+
#Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
|
447
|
+
#@return <Array[String]> with values for the pub date facet
|
448
|
+
def pub_date_facet
|
449
|
+
if pub_date
|
450
|
+
if pub_date.include? '--'
|
451
|
+
cent=pub_date[0,2].to_i
|
452
|
+
cent+=1
|
453
|
+
cent=cent.to_s+'th century'
|
454
|
+
cent
|
455
|
+
else
|
456
|
+
pub_date
|
457
|
+
end
|
458
|
+
else
|
459
|
+
nil
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
# Values are the contents of:
|
464
|
+
# subject/topic
|
465
|
+
# subject/name
|
466
|
+
# subject/title
|
467
|
+
# subject/occupation
|
468
|
+
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
469
|
+
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
470
|
+
def topic_facet
|
471
|
+
vals = subject_topics ? Array.new(subject_topics) : []
|
472
|
+
vals.concat(subject_names) if subject_names
|
473
|
+
vals.concat(subject_titles) if subject_titles
|
474
|
+
vals.concat(subject_occupations) if subject_occupations
|
475
|
+
vals.map! { |val|
|
476
|
+
v = val.sub(/[\\,;]$/, '')
|
477
|
+
v.strip
|
478
|
+
}
|
479
|
+
vals.empty? ? nil : vals
|
480
|
+
end
|
481
|
+
|
482
|
+
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
483
|
+
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
484
|
+
def geographic_facet
|
485
|
+
geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
|
486
|
+
end
|
487
|
+
|
488
|
+
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
489
|
+
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
490
|
+
def era_facet
|
491
|
+
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
492
|
+
end
|
493
|
+
# @return [String] value with the numeric catkey in it, or nil if none exists
|
494
|
+
def catkey
|
495
|
+
catkey=self.term_values([:record_info,:recordIdentifier])
|
496
|
+
if catkey and catkey.length>0
|
497
|
+
return catkey.first.gsub('a','') #need to ensure catkey is numeric only
|
498
|
+
end
|
499
|
+
nil
|
500
|
+
end
|
501
|
+
def druid= new_druid
|
502
|
+
@druid=new_druid
|
503
|
+
end
|
504
|
+
def druid
|
505
|
+
@druid ? @druid : 'Unknown item'
|
506
|
+
end
|
507
|
+
|
508
|
+
# protected ----------------------------------------------------------
|
509
|
+
|
510
|
+
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
511
|
+
def subject_names
|
512
|
+
@subject_names ||= self.sw_subject_names
|
513
|
+
end
|
514
|
+
|
515
|
+
# convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
|
516
|
+
def subject_occupations
|
517
|
+
@subject_occupations ||= self.term_values([:subject, :occupation])
|
518
|
+
end
|
519
|
+
|
520
|
+
# convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
|
521
|
+
def subject_temporal
|
522
|
+
@subject_temporal ||= self.term_values([:subject, :temporal])
|
523
|
+
end
|
524
|
+
|
525
|
+
# convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times)
|
526
|
+
def subject_titles
|
527
|
+
@subject_titles ||= self.sw_subject_titles
|
528
|
+
end
|
529
|
+
|
530
|
+
# convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times)
|
531
|
+
def subject_topics
|
532
|
+
@subject_topics ||= self.term_values([:subject, :topic])
|
533
|
+
end
|
534
|
+
|
535
|
+
#get a 4 digit year like 1865 from the date array
|
536
|
+
def get_plain_four_digit_year dates
|
537
|
+
dates.each do |f_date|
|
538
|
+
matches=f_date.scan(/\d{4}/)
|
539
|
+
if matches.length == 1
|
540
|
+
@pub_year=matches.first
|
541
|
+
return matches.first
|
542
|
+
else
|
543
|
+
#if there are multiples, check for ones with CE after them
|
544
|
+
matches.each do |match|
|
545
|
+
#look for things like '1865-6 CE'
|
546
|
+
pos = f_date.index(Regexp.new(match+'...CE'))
|
547
|
+
pos = pos ? pos.to_i : 0
|
548
|
+
if f_date.include?(match+' CE') or pos > 0
|
549
|
+
@pub_year=match
|
550
|
+
return match
|
551
|
+
end
|
552
|
+
end
|
553
|
+
end
|
554
|
+
end
|
555
|
+
return nil
|
556
|
+
end
|
557
|
+
|
558
|
+
#get a double digit century like '12th century' from the date array
|
559
|
+
def get_double_digit_century dates
|
560
|
+
dates.each do |f_date|
|
561
|
+
matches=f_date.scan(/\d{2}th/)
|
562
|
+
if matches.length == 1
|
563
|
+
@pub_year=((matches.first[0,2].to_i)-1).to_s+'--'
|
564
|
+
return @pub_year
|
565
|
+
end
|
566
|
+
#if there are multiples, check for ones with CE after them
|
567
|
+
if matches.length > 0
|
568
|
+
matches.each do |match|
|
569
|
+
pos = f_date.index(Regexp.new(match+'...CE'))
|
570
|
+
pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE'))
|
571
|
+
pos = pos ? pos.to_i : 0
|
572
|
+
if f_date.include?(match+' CE') or pos > 0
|
573
|
+
@pub_year=((match[0,2].to_i) - 1).to_s+'--'
|
574
|
+
return @pub_year
|
575
|
+
end
|
576
|
+
end
|
577
|
+
end
|
578
|
+
end
|
579
|
+
return nil
|
580
|
+
end
|
581
|
+
|
582
|
+
#get a 3 digit year like 965 from the date array
|
583
|
+
def get_three_digit_year dates
|
584
|
+
dates.each do |f_date|
|
585
|
+
matches=f_date.scan(/\d{3}/)
|
586
|
+
if matches.length > 0
|
587
|
+
return matches.first
|
588
|
+
end
|
589
|
+
end
|
590
|
+
return nil
|
591
|
+
end
|
592
|
+
|
593
|
+
#get a single digit century like '9th century' from the date array
|
594
|
+
def get_single_digit_century dates
|
595
|
+
dates.each do |f_date|
|
596
|
+
matches=f_date.scan(/\d{1}th/)
|
597
|
+
if matches.length == 1
|
598
|
+
@pub_year=((matches.first[0,2].to_i)-1).to_s+'--'
|
599
|
+
return @pub_year
|
600
|
+
end
|
601
|
+
#if there are multiples, check for ones with CE after them
|
602
|
+
if matches.length > 0
|
603
|
+
matches.each do |match|
|
604
|
+
pos = f_date.index(Regexp.new(match+'...CE'))
|
605
|
+
pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE'))
|
606
|
+
pos = pos ? pos.to_i : 0
|
607
|
+
if f_date.include?(match+' CE') or pos > 0
|
608
|
+
@pub_year=((match[0,1].to_i) - 1).to_s+'--'
|
609
|
+
return @pub_year
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|
613
|
+
end
|
614
|
+
return nil
|
615
|
+
end
|
185
616
|
end # class Record
|
186
617
|
end # Module Mods
|
187
618
|
end # Module Stanford
|
@@ -0,0 +1,634 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe "Searchworks mixin for Stanford::Mods::Record" do
|
5
|
+
|
6
|
+
before(:all) do
|
7
|
+
@smods_rec = Stanford::Mods::Record.new
|
8
|
+
@ns_decl = "xmlns='#{Mods::MODS_NS}'"
|
9
|
+
end
|
10
|
+
before :each do
|
11
|
+
@genre = 'genre top level'
|
12
|
+
@cart_coord = '6 00 S, 71 30 E'
|
13
|
+
@s_genre = 'genre in subject'
|
14
|
+
@geo = 'Somewhere'
|
15
|
+
@geo_code = 'us'
|
16
|
+
@hier_geo_country = 'France'
|
17
|
+
@s_name = 'name in subject'
|
18
|
+
@occupation = 'worker bee'
|
19
|
+
@temporal = 'temporal'
|
20
|
+
@s_title = 'title in subject'
|
21
|
+
@topic = 'topic'
|
22
|
+
m = "<mods #{@ns_decl}>
|
23
|
+
<genre>#{@genre}</genre>
|
24
|
+
<subject><cartographics><coordinates>#{@cart_coord}</coordinates></cartographics></subject>
|
25
|
+
<subject><genre>#{@s_genre}</genre></subject>
|
26
|
+
<subject><geographic>#{@geo}</geographic></subject>
|
27
|
+
<subject><geographicCode authority='iso3166'>#{@geo_code}</geographicCode></subject>
|
28
|
+
<subject><hierarchicalGeographic><country>#{@hier_geo_country}</country></hierarchicalGeographic></subject>
|
29
|
+
<subject><name><namePart>#{@s_name}</namePart></name></subject>
|
30
|
+
<subject><occupation>#{@occupation}</occupation></subject>
|
31
|
+
<subject><temporal>#{@temporal}</temporal></subject>
|
32
|
+
<subject><titleInfo><title>#{@s_title}</title></titleInfo></subject>
|
33
|
+
<subject><topic>#{@topic}</topic></subject>
|
34
|
+
</mods>"
|
35
|
+
@smods_rec = Stanford::Mods::Record.new
|
36
|
+
@smods_rec.from_str(m)
|
37
|
+
@ng_mods = Nokogiri::XML(m)
|
38
|
+
m_no_subject = "<mods #{@ns_decl}><note>notit</note></mods>"
|
39
|
+
@ng_mods_no_subject = Nokogiri::XML(m_no_subject)
|
40
|
+
end
|
41
|
+
|
42
|
+
context "sw author methods" do
|
43
|
+
before(:all) do
|
44
|
+
m = "<mods #{@ns_decl}>
|
45
|
+
|
46
|
+
</mods>"
|
47
|
+
@smods_rec = Stanford::Mods::Record.new
|
48
|
+
@smods_rec.from_str(m)
|
49
|
+
end
|
50
|
+
it 'should choose a date ending with CE if there are multiple dates' do
|
51
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>7192 AM (li-Adam) / 1684 CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
52
|
+
@smods_rec = Stanford::Mods::Record.new
|
53
|
+
@smods_rec.from_str(m)
|
54
|
+
@smods_rec.pub_date.should == '1684'
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
context "search fields" do
|
60
|
+
|
61
|
+
context "topic_search" do
|
62
|
+
before :each do
|
63
|
+
@genre = 'genre top level'
|
64
|
+
@cart_coord = '6 00 S, 71 30 E'
|
65
|
+
@s_genre = 'genre in subject'
|
66
|
+
@geo = 'Somewhere'
|
67
|
+
@geo_code = 'us'
|
68
|
+
@hier_geo_country = 'France'
|
69
|
+
@s_name = 'name in subject'
|
70
|
+
@occupation = 'worker bee'
|
71
|
+
@temporal = 'temporal'
|
72
|
+
@s_title = 'title in subject'
|
73
|
+
@topic = 'topic'
|
74
|
+
m = "<mods #{@ns_decl}>
|
75
|
+
<genre>#{@genre}</genre>
|
76
|
+
<subject><cartographics><coordinates>#{@cart_coord}</coordinates></cartographics></subject>
|
77
|
+
<subject><genre>#{@s_genre}</genre></subject>
|
78
|
+
<subject><geographic>#{@geo}</geographic></subject>
|
79
|
+
<subject><geographicCode authority='iso3166'>#{@geo_code}</geographicCode></subject>
|
80
|
+
<subject><hierarchicalGeographic><country>#{@hier_geo_country}</country></hierarchicalGeographic></subject>
|
81
|
+
<subject><name><namePart>#{@s_name}</namePart></name></subject>
|
82
|
+
<subject><occupation>#{@occupation}</occupation></subject>
|
83
|
+
<subject><temporal>#{@temporal}</temporal></subject>
|
84
|
+
<subject><titleInfo><title>#{@s_title}</title></titleInfo></subject>
|
85
|
+
<subject><topic>#{@topic}</topic></subject>
|
86
|
+
</mods>"
|
87
|
+
@smods_rec = Stanford::Mods::Record.new
|
88
|
+
@smods_rec.from_str(m)
|
89
|
+
@ng_mods = Nokogiri::XML(m)
|
90
|
+
m_no_subject = "<mods #{@ns_decl}><note>notit</note></mods>"
|
91
|
+
@ng_mods_no_subject = Nokogiri::XML(m_no_subject)
|
92
|
+
end
|
93
|
+
it "should be nil if there are no values in the MODS" do
|
94
|
+
m = "<mods #{@ns_decl}></mods>"
|
95
|
+
@smods_rec = Stanford::Mods::Record.new
|
96
|
+
@smods_rec.from_str(m)
|
97
|
+
@smods_rec.topic_search.should == nil
|
98
|
+
end
|
99
|
+
it "should contain subject <topic> subelement data" do
|
100
|
+
@smods_rec.topic_search.should include(@topic)
|
101
|
+
end
|
102
|
+
it "should contain top level <genre> element data" do
|
103
|
+
@smods_rec.topic_search.should include(@genre)
|
104
|
+
end
|
105
|
+
it "should not contain other subject element data" do
|
106
|
+
@smods_rec.topic_search.should_not include(@cart_coord)
|
107
|
+
@smods_rec.topic_search.should_not include(@s_genre)
|
108
|
+
@smods_rec.topic_search.should_not include(@geo)
|
109
|
+
@smods_rec.topic_search.should_not include(@geo_code)
|
110
|
+
@smods_rec.topic_search.should_not include(@hier_geo_country)
|
111
|
+
@smods_rec.topic_search.should_not include(@s_name)
|
112
|
+
@smods_rec.topic_search.should_not include(@occupation)
|
113
|
+
@smods_rec.topic_search.should_not include(@temporal)
|
114
|
+
@smods_rec.topic_search.should_not include(@s_title)
|
115
|
+
end
|
116
|
+
it "should not be nil if there are only subject/topic elements (no <genre>)" do
|
117
|
+
m = "<mods #{@ns_decl}><subject><topic>#{@topic}</topic></subject></mods>"
|
118
|
+
@smods_rec = Stanford::Mods::Record.new
|
119
|
+
@smods_rec.from_str(m)
|
120
|
+
@smods_rec.topic_search.should == [@topic]
|
121
|
+
end
|
122
|
+
it "should not be nil if there are only <genre> elements (no subject/topic elements)" do
|
123
|
+
m = "<mods #{@ns_decl}><genre>#{@genre}</genre></mods>"
|
124
|
+
@smods_rec = Stanford::Mods::Record.new
|
125
|
+
@smods_rec.from_str(m)
|
126
|
+
@smods_rec.topic_search.should == [@genre]
|
127
|
+
end
|
128
|
+
context "topic subelement" do
|
129
|
+
it "should have a separate value for each topic element" do
|
130
|
+
m = "<mods #{@ns_decl}>
|
131
|
+
<subject>
|
132
|
+
<topic>first</topic>
|
133
|
+
<topic>second</topic>
|
134
|
+
</subject>
|
135
|
+
<subject><topic>third</topic></subject>
|
136
|
+
</mods>"
|
137
|
+
@smods_rec = Stanford::Mods::Record.new
|
138
|
+
@smods_rec.from_str(m)
|
139
|
+
@smods_rec.topic_search.should == ['first', 'second', 'third']
|
140
|
+
end
|
141
|
+
it "should be nil if there are only empty values in the MODS" do
|
142
|
+
m = "<mods #{@ns_decl}><subject><topic/></subject><note>notit</note></mods>"
|
143
|
+
@smods_rec = Stanford::Mods::Record.new
|
144
|
+
@smods_rec.from_str(m)
|
145
|
+
@smods_rec.topic_search.should == nil
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end # topic_search
|
149
|
+
|
150
|
+
context "geographic_search" do
|
151
|
+
it "should call sw_geographic_search (from stanford-mods gem)" do
|
152
|
+
m = "<mods #{@ns_decl}><subject><geographic>#{@geo}</geographic></subject></mods>"
|
153
|
+
@smods_rec = Stanford::Mods::Record.new
|
154
|
+
@smods_rec.from_str(m)
|
155
|
+
@smods_rec.should_receive(:sw_geographic_search)
|
156
|
+
@smods_rec.geographic_search
|
157
|
+
end
|
158
|
+
it "should log an info message when it encounters a geographicCode encoding it doesn't translate" do
|
159
|
+
m = "<mods #{@ns_decl}><subject><geographicCode authority='iso3166'>ca</geographicCode></subject></mods>"
|
160
|
+
@smods_rec = Stanford::Mods::Record.new
|
161
|
+
@smods_rec.from_str(m)
|
162
|
+
@smods_rec.sw_logger.should_receive(:info).with(/#{@fake_druid} has subject geographicCode element with untranslated encoding \(iso3166\): <geographicCode authority=.*>ca<\/geographicCode>/)
|
163
|
+
@smods_rec.geographic_search
|
164
|
+
end
|
165
|
+
end # geographic_search
|
166
|
+
|
167
|
+
context "subject_other_search" do
|
168
|
+
|
169
|
+
it "should call sw_subject_names (from stanford-mods gem)" do
|
170
|
+
@smods_rec.should_receive(:sw_subject_names)
|
171
|
+
@smods_rec.subject_other_search
|
172
|
+
end
|
173
|
+
it "should call sw_subject_titles (from stanford-mods gem)" do
|
174
|
+
@smods_rec.should_receive(:sw_subject_titles)
|
175
|
+
@smods_rec.subject_other_search
|
176
|
+
end
|
177
|
+
it "should be nil if there are no values in the MODS" do
|
178
|
+
m = "<mods #{@ns_decl}></mods>"
|
179
|
+
@smods_rec = Stanford::Mods::Record.new
|
180
|
+
@smods_rec.from_str(m)
|
181
|
+
@smods_rec.subject_other_search.should == nil
|
182
|
+
end
|
183
|
+
it "should contain subject <name> SUBelement data" do
|
184
|
+
@smods_rec.subject_other_search.should include(@s_name)
|
185
|
+
end
|
186
|
+
it "should contain subject <occupation> subelement data" do
|
187
|
+
@smods_rec.subject_other_search.should include(@occupation)
|
188
|
+
end
|
189
|
+
it "should contain subject <titleInfo> SUBelement data" do
|
190
|
+
@smods_rec.subject_other_search.should include(@s_title)
|
191
|
+
end
|
192
|
+
it "should not contain other subject element data" do
|
193
|
+
@smods_rec.subject_other_search.should_not include(@genre)
|
194
|
+
@smods_rec.subject_other_search.should_not include(@cart_coord)
|
195
|
+
@smods_rec.subject_other_search.should_not include(@s_genre)
|
196
|
+
@smods_rec.subject_other_search.should_not include(@geo)
|
197
|
+
@smods_rec.subject_other_search.should_not include(@geo_code)
|
198
|
+
@smods_rec.subject_other_search.should_not include(@hier_geo_country)
|
199
|
+
@smods_rec.subject_other_search.should_not include(@temporal)
|
200
|
+
@smods_rec.subject_other_search.should_not include(@topic)
|
201
|
+
end
|
202
|
+
it "should not be nil if there are only subject/name elements" do
|
203
|
+
m = "<mods #{@ns_decl}><subject><name><namePart>#{@s_name}</namePart></name></subject></mods>"
|
204
|
+
@smods_rec = Stanford::Mods::Record.new
|
205
|
+
@smods_rec.from_str(m)
|
206
|
+
@smods_rec.subject_other_search.should == [@s_name]
|
207
|
+
end
|
208
|
+
it "should not be nil if there are only subject/occupation elements" do
|
209
|
+
m = "<mods #{@ns_decl}><subject><occupation>#{@occupation}</occupation></subject></mods>"
|
210
|
+
@smods_rec = Stanford::Mods::Record.new
|
211
|
+
@smods_rec.from_str(m)
|
212
|
+
|
213
|
+
|
214
|
+
@smods_rec. subject_other_search.should == [@occupation]
|
215
|
+
end
|
216
|
+
it "should not be nil if there are only subject/titleInfo elements" do
|
217
|
+
m = "<mods #{@ns_decl}><subject><titleInfo><title>#{@s_title}</title></titleInfo></subject></mods>"
|
218
|
+
@smods_rec = Stanford::Mods::Record.new
|
219
|
+
@smods_rec.from_str(m)
|
220
|
+
|
221
|
+
|
222
|
+
@smods_rec. subject_other_search.should == [@s_title]
|
223
|
+
end
|
224
|
+
context "occupation subelement" do
|
225
|
+
it "should have a separate value for each occupation element" do
|
226
|
+
m = "<mods #{@ns_decl}>
|
227
|
+
<subject>
|
228
|
+
<occupation>first</occupation>
|
229
|
+
<occupation>second</occupation>
|
230
|
+
</subject>
|
231
|
+
<subject><occupation>third</occupation></subject>
|
232
|
+
</mods>"
|
233
|
+
@smods_rec = Stanford::Mods::Record.new
|
234
|
+
@smods_rec.from_str(m)
|
235
|
+
|
236
|
+
@smods_rec.subject_other_search.should == ['first', 'second', 'third']
|
237
|
+
end
|
238
|
+
it "should be nil if there are only empty values in the MODS" do
|
239
|
+
m = "<mods #{@ns_decl}><subject><occupation/></subject><note>notit</note></mods>"
|
240
|
+
@smods_rec = Stanford::Mods::Record.new
|
241
|
+
@smods_rec.from_str(m)
|
242
|
+
|
243
|
+
@smods_rec. subject_other_search.should == nil
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end # subject_other_search
|
247
|
+
|
248
|
+
context "subject_other_subvy_search" do
|
249
|
+
it "should be nil if there are no values in the MODS" do
|
250
|
+
@smods_rec = Stanford::Mods::Record.new
|
251
|
+
@smods_rec.from_str(@ng_mods_no_subject.to_s)
|
252
|
+
|
253
|
+
@smods_rec. subject_other_subvy_search.should == nil
|
254
|
+
end
|
255
|
+
it "should contain subject <temporal> subelement data" do
|
256
|
+
@smods_rec.subject_other_subvy_search.should include(@temporal)
|
257
|
+
end
|
258
|
+
it "should contain subject <genre> SUBelement data" do
|
259
|
+
@smods_rec.subject_other_subvy_search.should include(@s_genre)
|
260
|
+
end
|
261
|
+
it "should not contain other subject element data" do
|
262
|
+
@smods_rec.subject_other_subvy_search.should_not include(@genre)
|
263
|
+
@smods_rec.subject_other_subvy_search.should_not include(@cart_coord)
|
264
|
+
@smods_rec.subject_other_subvy_search.should_not include(@geo)
|
265
|
+
@smods_rec.subject_other_subvy_search.should_not include(@geo_code)
|
266
|
+
@smods_rec.subject_other_subvy_search.should_not include(@hier_geo_country)
|
267
|
+
@smods_rec.subject_other_subvy_search.should_not include(@s_name)
|
268
|
+
@smods_rec.subject_other_subvy_search.should_not include(@occupation)
|
269
|
+
@smods_rec.subject_other_subvy_search.should_not include(@topic)
|
270
|
+
@smods_rec.subject_other_subvy_search.should_not include(@s_title)
|
271
|
+
end
|
272
|
+
it "should not be nil if there are only subject/temporal elements (no subject/genre)" do
|
273
|
+
m = "<mods #{@ns_decl}><subject><temporal>#{@temporal}</temporal></subject></mods>"
|
274
|
+
@smods_rec = Stanford::Mods::Record.new
|
275
|
+
@smods_rec.from_str(m)
|
276
|
+
|
277
|
+
|
278
|
+
@smods_rec. subject_other_subvy_search.should == [@temporal]
|
279
|
+
end
|
280
|
+
it "should not be nil if there are only subject/genre elements (no subject/temporal)" do
|
281
|
+
m = "<mods #{@ns_decl}><subject><genre>#{@s_genre}</genre></subject></mods>"
|
282
|
+
@smods_rec = Stanford::Mods::Record.new
|
283
|
+
@smods_rec.from_str(m)
|
284
|
+
|
285
|
+
|
286
|
+
@smods_rec. subject_other_subvy_search.should == [@s_genre]
|
287
|
+
end
|
288
|
+
context "temporal subelement" do
|
289
|
+
it "should have a separate value for each temporal element" do
|
290
|
+
m = "<mods #{@ns_decl}>
|
291
|
+
<subject>
|
292
|
+
<temporal>1890-1910</temporal>
|
293
|
+
<temporal>20th century</temporal>
|
294
|
+
</subject>
|
295
|
+
<subject><temporal>another</temporal></subject>
|
296
|
+
</mods>"
|
297
|
+
@smods_rec = Stanford::Mods::Record.new
|
298
|
+
@smods_rec.from_str(m)
|
299
|
+
|
300
|
+
@smods_rec. subject_other_subvy_search.should == ['1890-1910', '20th century', 'another']
|
301
|
+
end
|
302
|
+
it "should log an info message when it encounters an encoding it doesn't translate" do
|
303
|
+
m = "<mods #{@ns_decl}><subject><temporal encoding='iso8601'>197505</temporal></subject></mods>"
|
304
|
+
@smods_rec = Stanford::Mods::Record.new
|
305
|
+
@smods_rec.from_str(m)
|
306
|
+
|
307
|
+
@smods_rec.sw_logger.should_receive(:info).with(/#{@fake_druid} has subject temporal element with untranslated encoding: <temporal encoding=.*>197505<\/temporal>/)
|
308
|
+
@smods_rec.subject_other_subvy_search
|
309
|
+
end
|
310
|
+
it "should be nil if there are only empty values in the MODS" do
|
311
|
+
m = "<mods #{@ns_decl}><subject><temporal/></subject><note>notit</note></mods>"
|
312
|
+
@smods_rec = Stanford::Mods::Record.new
|
313
|
+
@smods_rec.from_str(m)
|
314
|
+
|
315
|
+
@smods_rec. subject_other_subvy_search.should == nil
|
316
|
+
end
|
317
|
+
end
|
318
|
+
context "genre subelement" do
|
319
|
+
it "should have a separate value for each genre element" do
|
320
|
+
m = "<mods #{@ns_decl}>
|
321
|
+
<subject>
|
322
|
+
<genre>first</genre>
|
323
|
+
<genre>second</genre>
|
324
|
+
</subject>
|
325
|
+
<subject><genre>third</genre></subject>
|
326
|
+
</mods>"
|
327
|
+
@smods_rec = Stanford::Mods::Record.new
|
328
|
+
@smods_rec.from_str(m)
|
329
|
+
|
330
|
+
@smods_rec. subject_other_subvy_search.should == ['first', 'second', 'third']
|
331
|
+
end
|
332
|
+
it "should be nil if there are only empty values in the MODS" do
|
333
|
+
m = "<mods #{@ns_decl}><subject><genre/></subject><note>notit</note></mods>"
|
334
|
+
@smods_rec = Stanford::Mods::Record.new
|
335
|
+
@smods_rec.from_str(m)
|
336
|
+
|
337
|
+
@smods_rec. subject_other_subvy_search.should == nil
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end # subject_other_subvy_search
|
341
|
+
|
342
|
+
context "subject_all_search" do
|
343
|
+
it "should be nil if there are no values in the MODS" do
|
344
|
+
@smods_rec = Stanford::Mods::Record.new
|
345
|
+
@smods_rec.from_str(@ng_mods_no_subject.to_s)
|
346
|
+
|
347
|
+
@smods_rec. subject_all_search.should == nil
|
348
|
+
end
|
349
|
+
it "should contain top level <genre> element data" do
|
350
|
+
@smods_rec.subject_all_search.should include(@genre)
|
351
|
+
end
|
352
|
+
it "should not contain cartographic sub element" do
|
353
|
+
@smods_rec.subject_all_search.should_not include(@cart_coord)
|
354
|
+
end
|
355
|
+
it "should not include codes from hierarchicalGeographic sub element" do
|
356
|
+
@smods_rec.subject_all_search.should_not include(@geo_code)
|
357
|
+
end
|
358
|
+
it "should contain all other subject subelement data" do
|
359
|
+
@smods_rec.subject_all_search.should include(@s_genre)
|
360
|
+
@smods_rec.subject_all_search.should include(@geo)
|
361
|
+
@smods_rec.subject_all_search.should include(@hier_geo_country)
|
362
|
+
@smods_rec.subject_all_search.should include(@s_name)
|
363
|
+
@smods_rec.subject_all_search.should include(@occupation)
|
364
|
+
@smods_rec.subject_all_search.should include(@temporal)
|
365
|
+
@smods_rec.subject_all_search.should include(@s_title)
|
366
|
+
@smods_rec.subject_all_search.should include(@topic)
|
367
|
+
end
|
368
|
+
end # subject_all_search
|
369
|
+
end # subject search fields
|
370
|
+
|
371
|
+
context "facet fields" do
|
372
|
+
context "topic_facet" do
|
373
|
+
it "should include topic subelement" do
|
374
|
+
@smods_rec.topic_facet.should include(@topic)
|
375
|
+
end
|
376
|
+
it "should include sw_subject_names" do
|
377
|
+
@smods_rec.topic_facet.should include(@s_name)
|
378
|
+
end
|
379
|
+
it "should include sw_subject_titles" do
|
380
|
+
@smods_rec.topic_facet.should include(@s_title)
|
381
|
+
end
|
382
|
+
it "should include occupation subelement" do
|
383
|
+
@smods_rec.topic_facet.should include(@occupation)
|
384
|
+
end
|
385
|
+
it "should have the trailing punctuation removed" do
|
386
|
+
m = "<mods #{@ns_decl}><subject>
|
387
|
+
<topic>comma,</topic>
|
388
|
+
<occupation>semicolon;</occupation>
|
389
|
+
<titleInfo><title>backslash \\</title></titleInfo>
|
390
|
+
<name><namePart>internal, punct;uation</namePart></name>
|
391
|
+
</subject></mods>"
|
392
|
+
@smods_rec = Stanford::Mods::Record.new
|
393
|
+
@smods_rec.from_str(m)
|
394
|
+
|
395
|
+
|
396
|
+
@smods_rec. topic_facet.should include('comma')
|
397
|
+
@smods_rec. topic_facet.should include('semicolon')
|
398
|
+
@smods_rec. topic_facet.should include('backslash')
|
399
|
+
@smods_rec. topic_facet.should include('internal, punct;uation')
|
400
|
+
end
|
401
|
+
it "should be nil if there are no values" do
|
402
|
+
@smods_rec = Stanford::Mods::Record.new
|
403
|
+
@smods_rec.from_str(@ng_mods_no_subject.to_s)
|
404
|
+
@smods_rec. topic_facet.should == nil
|
405
|
+
end
|
406
|
+
end
|
407
|
+
context "geographic_facet" do
|
408
|
+
it "should call geographic_search" do
|
409
|
+
@smods_rec.should_receive(:geographic_search)
|
410
|
+
@smods_rec.geographic_facet
|
411
|
+
end
|
412
|
+
it "should be like geographic_search with the trailing punctuation (and preceding spaces) removed" do
|
413
|
+
m = "<mods #{@ns_decl}><subject>
|
414
|
+
<geographic>comma,</geographic>
|
415
|
+
<geographic>semicolon;</geographic>
|
416
|
+
<geographic>backslash \\</geographic>
|
417
|
+
<geographic>internal, punct;uation</geographic>
|
418
|
+
</subject></mods>"
|
419
|
+
@smods_rec = Stanford::Mods::Record.new
|
420
|
+
@smods_rec.from_str(m)
|
421
|
+
|
422
|
+
|
423
|
+
@smods_rec. geographic_facet.should include('comma')
|
424
|
+
@smods_rec. geographic_facet.should include('semicolon')
|
425
|
+
@smods_rec. geographic_facet.should include('backslash')
|
426
|
+
@smods_rec. geographic_facet.should include('internal, punct;uation')
|
427
|
+
end
|
428
|
+
it "should be nil if there are no values" do
|
429
|
+
@smods_rec = Stanford::Mods::Record.new
|
430
|
+
@smods_rec.from_str(@ng_mods_no_subject.to_s)
|
431
|
+
@smods_rec. geographic_facet.should == nil
|
432
|
+
end
|
433
|
+
end
|
434
|
+
context "era_facet" do
|
435
|
+
it "should be temporal subelement with the trailing punctuation removed" do
|
436
|
+
m = "<mods #{@ns_decl}><subject>
|
437
|
+
<temporal>comma,</temporal>
|
438
|
+
<temporal>semicolon;</temporal>
|
439
|
+
<temporal>backslash \\</temporal>
|
440
|
+
<temporal>internal, punct;uation</temporal>
|
441
|
+
</subject></mods>"
|
442
|
+
@smods_rec = Stanford::Mods::Record.new
|
443
|
+
@smods_rec.from_str(m)
|
444
|
+
|
445
|
+
|
446
|
+
@smods_rec. era_facet.should include('comma')
|
447
|
+
@smods_rec. era_facet.should include('semicolon')
|
448
|
+
@smods_rec. era_facet.should include('backslash')
|
449
|
+
@smods_rec. era_facet.should include('internal, punct;uation')
|
450
|
+
end
|
451
|
+
it "should be nil if there are no values" do
|
452
|
+
@smods_rec = Stanford::Mods::Record.new
|
453
|
+
@smods_rec.from_str(@ng_mods_no_subject.to_s)
|
454
|
+
@smods_rec. era_facet.should == nil
|
455
|
+
end
|
456
|
+
|
457
|
+
end # subject facet fields
|
458
|
+
context "pub_dates" do
|
459
|
+
it "should choose the first date" do
|
460
|
+
m = "<mods #{@ns_decl}><originInfo>
|
461
|
+
<dateCreated>1904</dateCreated>
|
462
|
+
<dateCreated>1904</dateCreated>
|
463
|
+
<dateIssued>1906</dateIssued>
|
464
|
+
|
465
|
+
</originInfo></mods>"
|
466
|
+
@smods_rec = Stanford::Mods::Record.new
|
467
|
+
@smods_rec.from_str(m)
|
468
|
+
@smods_rec.pub_dates.should == ['1906','1904','1904']
|
469
|
+
end
|
470
|
+
end
|
471
|
+
end # context sw subject methods
|
472
|
+
context "pub_date" do
|
473
|
+
it "should choose the first date" do
|
474
|
+
m = "<mods #{@ns_decl}><originInfo>
|
475
|
+
<dateCreated>1904</dateCreated>
|
476
|
+
</originInfo></mods>"
|
477
|
+
@smods_rec = Stanford::Mods::Record.new
|
478
|
+
@smods_rec.from_str(m)
|
479
|
+
@smods_rec.pub_date.should == '1904'
|
480
|
+
end
|
481
|
+
it "should parse a date" do
|
482
|
+
m = "<mods #{@ns_decl}><originInfo>
|
483
|
+
<dateCreated>Aug. 3rd, 1886</dateCreated>
|
484
|
+
</originInfo></mods>"
|
485
|
+
@smods_rec = Stanford::Mods::Record.new
|
486
|
+
@smods_rec.from_str(m)
|
487
|
+
@smods_rec.pub_date.should == '1886'
|
488
|
+
end
|
489
|
+
it "should remove question marks and brackets" do
|
490
|
+
m = "<mods #{@ns_decl}><originInfo>
|
491
|
+
<dateCreated>Aug. 3rd, [18]86?</dateCreated>
|
492
|
+
</originInfo></mods>"
|
493
|
+
@smods_rec = Stanford::Mods::Record.new
|
494
|
+
@smods_rec.from_str(m)
|
495
|
+
@smods_rec.pub_date.should == '1886'
|
496
|
+
end
|
497
|
+
it 'should handle an s after the decade' do
|
498
|
+
m = "<mods #{@ns_decl}><originInfo>
|
499
|
+
<dateCreated>early 1890s</dateCreated>
|
500
|
+
</originInfo></mods>"
|
501
|
+
@smods_rec = Stanford::Mods::Record.new
|
502
|
+
@smods_rec.from_str(m)
|
503
|
+
@smods_rec.pub_date.should == '1890'
|
504
|
+
end
|
505
|
+
it 'should choose a date ending with CE if there are multiple dates' do
|
506
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>7192 AM (li-Adam) / 1684 CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
507
|
+
@smods_rec = Stanford::Mods::Record.new
|
508
|
+
@smods_rec.from_str(m)
|
509
|
+
@smods_rec.pub_date.should == '1684'
|
510
|
+
end
|
511
|
+
it 'should handle hyphenated range dates' do
|
512
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>1282 AH / 1865-6 CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
513
|
+
@smods_rec = Stanford::Mods::Record.new
|
514
|
+
@smods_rec.from_str(m)
|
515
|
+
@smods_rec.pub_date.should == '1865'
|
516
|
+
end
|
517
|
+
it 'should handle century based dates' do
|
518
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>13th century AH / 19th CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
519
|
+
@smods_rec = Stanford::Mods::Record.new
|
520
|
+
@smods_rec.from_str(m)
|
521
|
+
@smods_rec.pub_date_facet.should == '19th century'
|
522
|
+
@smods_rec.pub_date_sort.should =='1800'
|
523
|
+
@smods_rec.pub_date.should == '18--'
|
524
|
+
end
|
525
|
+
it 'should handle multiple CE dates' do
|
526
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>6 Dhu al-Hijjah 923 AH / 1517 CE -- 7 Rabi I 924 AH / 1518 CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
527
|
+
@smods_rec = Stanford::Mods::Record.new
|
528
|
+
@smods_rec.from_str(m)
|
529
|
+
@smods_rec.pub_date.should == '1517'
|
530
|
+
@smods_rec.pub_date_sort.should =='1517'
|
531
|
+
@smods_rec.pub_date_facet.should == '1517'
|
532
|
+
end
|
533
|
+
it 'should handle this case from walters' do
|
534
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>Late 14th or early 15th century CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
535
|
+
@smods_rec = Stanford::Mods::Record.new
|
536
|
+
@smods_rec.from_str(m)
|
537
|
+
@smods_rec.pub_date.should == '14--'
|
538
|
+
@smods_rec.pub_date_sort.should =='1400'
|
539
|
+
@smods_rec.pub_date_facet.should == '15th century'
|
540
|
+
end
|
541
|
+
it 'should work on 3 digit dates' do
|
542
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>966 CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
543
|
+
@smods_rec = Stanford::Mods::Record.new
|
544
|
+
@smods_rec.from_str(m)
|
545
|
+
@smods_rec.pub_date.should == '966'
|
546
|
+
@smods_rec.pub_date_sort.should =='0966'
|
547
|
+
@smods_rec.pub_date_facet.should == '966'
|
548
|
+
end
|
549
|
+
it 'should work on 3 digit dates' do
|
550
|
+
m = "<mods #{@ns_decl}><originInfo><dateIssued>3rd century AH / 9th CE</dateIssued><issuance>monographic</issuance></originInfo>"
|
551
|
+
@smods_rec = Stanford::Mods::Record.new
|
552
|
+
@smods_rec.from_str(m)
|
553
|
+
|
554
|
+
@smods_rec.pub_date.should == '8--'
|
555
|
+
@smods_rec.pub_date_sort.should =='0800'
|
556
|
+
@smods_rec.pub_date_facet.should == '9th century'
|
557
|
+
end
|
558
|
+
|
559
|
+
|
560
|
+
|
561
|
+
|
562
|
+
|
563
|
+
end #context pub_dates
|
564
|
+
context 'pub_date_sort' do
|
565
|
+
before :each do
|
566
|
+
m = "<mods #{@ns_decl}><originInfo>
|
567
|
+
<dateCreated>Aug. 3rd, 1886</dateCreated>
|
568
|
+
</originInfo></mods>"
|
569
|
+
@smods_rec = Stanford::Mods::Record.new
|
570
|
+
@smods_rec.from_str(m)
|
571
|
+
|
572
|
+
end
|
573
|
+
it 'should work on normal dates' do
|
574
|
+
@smods_rec.stub(:pub_date).and_return('1945')
|
575
|
+
@smods_rec.pub_date_sort.should == '1945'
|
576
|
+
end
|
577
|
+
it 'should work on 3 digit dates' do
|
578
|
+
@smods_rec.stub(:pub_date).and_return('945')
|
579
|
+
@smods_rec.pub_date_sort.should == '0945'
|
580
|
+
end
|
581
|
+
it 'should work on century dates' do
|
582
|
+
@smods_rec.stub(:pub_date).and_return('16--')
|
583
|
+
@smods_rec.pub_date_sort.should == '1600'
|
584
|
+
end
|
585
|
+
it 'should work on 3 digit century dates' do
|
586
|
+
@smods_rec.stub(:pub_date).and_return('9--')
|
587
|
+
@smods_rec.pub_date_sort.should == '0900'
|
588
|
+
end
|
589
|
+
end
|
590
|
+
|
591
|
+
context "format" do
|
592
|
+
it "should choose the format" do
|
593
|
+
m = "<mods #{@ns_decl}><typeOfResource>still image</typeOfResouce></mods>"
|
594
|
+
@smods_rec = Stanford::Mods::Record.new
|
595
|
+
@smods_rec.from_str(m)
|
596
|
+
@smods_rec.format.should == ['Image']
|
597
|
+
end
|
598
|
+
it "should return nothing if there is no format info" do
|
599
|
+
m = "<mods #{@ns_decl}><originInfo>
|
600
|
+
<dateCreated>1904</dateCreated>
|
601
|
+
</originInfo></mods>"
|
602
|
+
@smods_rec = Stanford::Mods::Record.new
|
603
|
+
@smods_rec.from_str(m)
|
604
|
+
@smods_rec.format.should == []
|
605
|
+
end
|
606
|
+
end#context format
|
607
|
+
context "pub_date_groups" do
|
608
|
+
it 'should generate the groups' do
|
609
|
+
m = "<mods #{@ns_decl}><originInfo>
|
610
|
+
<dateCreated>1904</dateCreated>
|
611
|
+
</originInfo></mods>"
|
612
|
+
@smods_rec = Stanford::Mods::Record.new
|
613
|
+
@smods_rec.from_str(m)
|
614
|
+
|
615
|
+
@smods_rec.pub_date_groups(1904).should == ['More than 50 years ago']
|
616
|
+
end
|
617
|
+
it 'should work for a modern date too' do
|
618
|
+
m = "<mods #{@ns_decl}><originInfo>
|
619
|
+
<dateCreated>1904</dateCreated>
|
620
|
+
</originInfo></mods>"
|
621
|
+
@smods_rec = Stanford::Mods::Record.new
|
622
|
+
@smods_rec.from_str(m)
|
623
|
+
@smods_rec.pub_date_groups(2013).should == ["This year"]
|
624
|
+
end
|
625
|
+
it 'should work ok given a nil date' do
|
626
|
+
m = "<mods #{@ns_decl}><originInfo>
|
627
|
+
<dateCreated>1904</dateCreated>
|
628
|
+
</originInfo></mods>"
|
629
|
+
@smods_rec = Stanford::Mods::Record.new
|
630
|
+
@smods_rec.from_str(m)
|
631
|
+
@smods_rec.pub_date_groups(nil).should == nil
|
632
|
+
end
|
633
|
+
end#context pub date groups
|
634
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2013-04
|
13
|
+
date: 2013-06-04 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: mods
|
@@ -151,6 +151,7 @@ files:
|
|
151
151
|
- lib/stanford-mods/version.rb
|
152
152
|
- spec/kolb_spec.rb
|
153
153
|
- spec/name_spec.rb
|
154
|
+
- spec/searchworks_gdor_spec.rb
|
154
155
|
- spec/searchworks_spec.rb
|
155
156
|
- spec/searchworks_subject_spec.rb
|
156
157
|
- spec/spec_helper.rb
|
@@ -169,7 +170,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
169
170
|
version: '0'
|
170
171
|
segments:
|
171
172
|
- 0
|
172
|
-
hash:
|
173
|
+
hash: 823695280464572683
|
173
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
174
175
|
none: false
|
175
176
|
requirements:
|
@@ -178,16 +179,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
179
|
version: '0'
|
179
180
|
segments:
|
180
181
|
- 0
|
181
|
-
hash:
|
182
|
+
hash: 823695280464572683
|
182
183
|
requirements: []
|
183
184
|
rubyforge_project:
|
184
|
-
rubygems_version: 1.8.
|
185
|
+
rubygems_version: 1.8.25
|
185
186
|
signing_key:
|
186
187
|
specification_version: 3
|
187
188
|
summary: Stanford specific wrangling of MODS metadata
|
188
189
|
test_files:
|
189
190
|
- spec/kolb_spec.rb
|
190
191
|
- spec/name_spec.rb
|
192
|
+
- spec/searchworks_gdor_spec.rb
|
191
193
|
- spec/searchworks_spec.rb
|
192
194
|
- spec/searchworks_subject_spec.rb
|
193
195
|
- spec/spec_helper.rb
|