stanford-mods 1.3.3 → 1.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +1 -0
- data/.rubocop.yml +4 -0
- data/Gemfile +1 -0
- data/lib/stanford-mods.rb +5 -5
- data/lib/stanford-mods/date_parsing.rb +245 -0
- data/lib/stanford-mods/origin_info.rb +411 -0
- data/lib/stanford-mods/searchworks.rb +23 -474
- data/lib/stanford-mods/searchworks_subjects.rb +208 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +746 -0
- data/spec/fixtures/spotlight_pub_date_data.rb +316 -0
- data/spec/origin_info_spec.rb +449 -0
- data/spec/searchworks_pub_dates_spec.rb +166 -163
- data/spec/spec_helper.rb +16 -5
- data/stanford-mods.gemspec +2 -0
- metadata +25 -2
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'stanford-mods/searchworks_languages'
|
3
|
+
require 'stanford-mods/searchworks_subjects'
|
3
4
|
require 'logger'
|
4
5
|
require 'mods'
|
5
6
|
|
@@ -207,289 +208,11 @@ module Stanford
|
|
207
208
|
# ---- end TITLE ----
|
208
209
|
|
209
210
|
# ---- SUBJECT ----
|
210
|
-
|
211
|
-
# Values are the contents of:
|
212
|
-
# subject/geographic
|
213
|
-
# subject/hierarchicalGeographic
|
214
|
-
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
215
|
-
# @param [String] sep - the separator string for joining hierarchicalGeographic sub elements
|
216
|
-
# @return [Array<String>] values for geographic_search Solr field for this document or [] if none
|
217
|
-
def sw_geographic_search(sep = ' ')
|
218
|
-
result = term_values([:subject, :geographic]) || []
|
219
|
-
|
220
|
-
# hierarchicalGeographic has sub elements
|
221
|
-
@mods_ng_xml.subject.hierarchicalGeographic.each { |hg_node|
|
222
|
-
hg_vals = []
|
223
|
-
hg_node.element_children.each { |e|
|
224
|
-
hg_vals << e.text unless e.text.empty?
|
225
|
-
}
|
226
|
-
result << hg_vals.join(sep) unless hg_vals.empty?
|
227
|
-
}
|
228
|
-
|
229
|
-
trans_code_vals = @mods_ng_xml.subject.geographicCode.translated_value
|
230
|
-
if trans_code_vals
|
231
|
-
trans_code_vals.each { |val|
|
232
|
-
result << val if !result.include?(val)
|
233
|
-
}
|
234
|
-
end
|
235
|
-
|
236
|
-
result
|
237
|
-
end
|
238
|
-
|
239
|
-
# Values are the contents of:
|
240
|
-
# subject/name/namePart
|
241
|
-
# "Values from namePart subelements should be concatenated in the order they appear (e.g. "Shakespeare, William, 1564-1616")"
|
242
|
-
# @param [String] sep - the separator string for joining namePart sub elements
|
243
|
-
# @return [Array<String>] values for names inside subject elements or [] if none
|
244
|
-
def sw_subject_names(sep = ', ')
|
245
|
-
result = []
|
246
|
-
@mods_ng_xml.subject.name_el.select { |n_el| n_el.namePart }.each { |name_el_w_np|
|
247
|
-
parts = name_el_w_np.namePart.map { |npn| npn.text unless npn.text.empty? }.compact
|
248
|
-
result << parts.join(sep).strip unless parts.empty?
|
249
|
-
}
|
250
|
-
result
|
251
|
-
end
|
252
|
-
|
253
|
-
# Values are the contents of:
|
254
|
-
# subject/titleInfo/(subelements)
|
255
|
-
# @param [String] sep - the separator string for joining titleInfo sub elements
|
256
|
-
# @return [Array<String>] values for titles inside subject elements or [] if none
|
257
|
-
def sw_subject_titles(sep = ' ')
|
258
|
-
result = []
|
259
|
-
@mods_ng_xml.subject.titleInfo.each { |ti_el|
|
260
|
-
parts = ti_el.element_children.map { |el| el.text unless el.text.empty? }.compact
|
261
|
-
result << parts.join(sep).strip unless parts.empty?
|
262
|
-
}
|
263
|
-
result
|
264
|
-
end
|
265
|
-
|
266
|
-
# Values are the contents of:
|
267
|
-
# mods/genre
|
268
|
-
# mods/subject/topic
|
269
|
-
# @return [Array<String>] values for the topic_search Solr field for this document or nil if none
|
270
|
-
def topic_search
|
271
|
-
@topic_search ||= begin
|
272
|
-
vals = self.term_values(:genre) || []
|
273
|
-
vals.concat(subject_topics) if subject_topics
|
274
|
-
vals.empty? ? nil : vals
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
# Values are the contents of:
|
279
|
-
# subject/topic
|
280
|
-
# subject/name
|
281
|
-
# subject/title
|
282
|
-
# subject/occupation
|
283
|
-
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
284
|
-
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
285
|
-
def topic_facet
|
286
|
-
vals = subject_topics ? Array.new(subject_topics) : []
|
287
|
-
vals.concat(subject_names) if subject_names
|
288
|
-
vals.concat(subject_titles) if subject_titles
|
289
|
-
vals.concat(subject_occupations) if subject_occupations
|
290
|
-
vals.map! { |val|
|
291
|
-
v = val.sub(/[\\,;]$/, '')
|
292
|
-
v.strip
|
293
|
-
}
|
294
|
-
vals.empty? ? nil : vals
|
295
|
-
end
|
296
|
-
|
297
|
-
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
298
|
-
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
299
|
-
def geographic_facet
|
300
|
-
geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
|
301
|
-
end
|
302
|
-
|
303
|
-
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
304
|
-
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
305
|
-
def era_facet
|
306
|
-
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
307
|
-
end
|
308
|
-
|
309
|
-
# Values are the contents of:
|
310
|
-
# subject/geographic
|
311
|
-
# subject/hierarchicalGeographic
|
312
|
-
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
313
|
-
# @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
|
314
|
-
def geographic_search
|
315
|
-
@geographic_search ||= begin
|
316
|
-
result = self.sw_geographic_search
|
317
|
-
|
318
|
-
# TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger
|
319
|
-
# print a message for any unrecognized encodings
|
320
|
-
xvals = self.subject.geographicCode.translated_value
|
321
|
-
codes = self.term_values([:subject, :geographicCode])
|
322
|
-
if codes && codes.size > xvals.size
|
323
|
-
self.subject.geographicCode.each { |n|
|
324
|
-
if n.authority != 'marcgac' && n.authority != 'marccountry'
|
325
|
-
sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}")
|
326
|
-
end
|
327
|
-
}
|
328
|
-
end
|
329
|
-
|
330
|
-
# FIXME: stanford-mods should be returning [], not nil ...
|
331
|
-
return nil if !result || result.empty?
|
332
|
-
result
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
# Values are the contents of:
|
337
|
-
# subject/name
|
338
|
-
# subject/occupation - no subelements
|
339
|
-
# subject/titleInfo
|
340
|
-
# @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
|
341
|
-
def subject_other_search
|
342
|
-
@subject_other_search ||= begin
|
343
|
-
vals = subject_occupations ? Array.new(subject_occupations) : []
|
344
|
-
vals.concat(subject_names) if subject_names
|
345
|
-
vals.concat(subject_titles) if subject_titles
|
346
|
-
vals.empty? ? nil : vals
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
# Values are the contents of:
|
351
|
-
# subject/temporal
|
352
|
-
# subject/genre
|
353
|
-
# @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
|
354
|
-
def subject_other_subvy_search
|
355
|
-
@subject_other_subvy_search ||= begin
|
356
|
-
vals = subject_temporal ? Array.new(subject_temporal) : []
|
357
|
-
gvals = self.term_values([:subject, :genre])
|
358
|
-
vals.concat(gvals) if gvals
|
359
|
-
|
360
|
-
# print a message for any temporal encodings
|
361
|
-
self.subject.temporal.each { |n|
|
362
|
-
sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty?
|
363
|
-
}
|
364
|
-
|
365
|
-
vals.empty? ? nil : vals
|
366
|
-
end
|
367
|
-
end
|
368
|
-
|
369
|
-
# Values are the contents of:
|
370
|
-
# all subject subelements except subject/cartographic plus genre top level element
|
371
|
-
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
372
|
-
def subject_all_search
|
373
|
-
vals = topic_search ? Array.new(topic_search) : []
|
374
|
-
vals.concat(geographic_search) if geographic_search
|
375
|
-
vals.concat(subject_other_search) if subject_other_search
|
376
|
-
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
377
|
-
vals.empty? ? nil : vals
|
378
|
-
end
|
379
|
-
|
211
|
+
# see searchworks_subjects.rb
|
380
212
|
# ---- end SUBJECT ----
|
381
213
|
|
382
214
|
# ---- PUBLICATION (place, year) ----
|
383
|
-
|
384
|
-
vals = self.term_values([:origin_info,:place,:placeTerm])
|
385
|
-
vals
|
386
|
-
end
|
387
|
-
|
388
|
-
# For the date display only, the first place to look is in the dates without encoding=marc array.
|
389
|
-
# If no such dates, select the first date in the dates_marc_encoding array. Otherwise return nil
|
390
|
-
# @return [String] value for the pub_date_display Solr field for this document or nil if none
|
391
|
-
def pub_date_display
|
392
|
-
return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
|
393
|
-
return dates_marc_encoding.first unless dates_marc_encoding.empty?
|
394
|
-
return nil
|
395
|
-
end
|
396
|
-
|
397
|
-
# For the date indexing, sorting and faceting, the first place to look is in the dates with encoding=marc array.
|
398
|
-
# If that doesn't exist, look in the dates without encoding=marc array. Otherwise return nil
|
399
|
-
# @return [Array<String>] values for the date Solr field for this document or nil if none
|
400
|
-
def pub_dates
|
401
|
-
return dates_marc_encoding unless dates_marc_encoding.empty?
|
402
|
-
return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
|
403
|
-
return nil
|
404
|
-
end
|
405
|
-
|
406
|
-
def is_number?(object)
|
407
|
-
true if Integer(object) rescue false
|
408
|
-
end
|
409
|
-
def is_date?(object)
|
410
|
-
true if Date.parse(object) rescue false
|
411
|
-
end
|
412
|
-
|
413
|
-
# Get the publish year from mods
|
414
|
-
# @return [String] 4 character year or nil if no valid date was found
|
415
|
-
def pub_year
|
416
|
-
#use the cached year if there is one
|
417
|
-
if @pub_year
|
418
|
-
if @pub_year == ''
|
419
|
-
return nil
|
420
|
-
end
|
421
|
-
return @pub_year
|
422
|
-
end
|
423
|
-
dates = pub_dates
|
424
|
-
if dates
|
425
|
-
pruned_dates = []
|
426
|
-
dates.each do |f_date|
|
427
|
-
#remove ? and []
|
428
|
-
if (f_date.length == 4 && f_date.end_with?('?'))
|
429
|
-
pruned_dates << f_date.gsub('?','0')
|
430
|
-
else
|
431
|
-
pruned_dates << f_date.gsub('?','').gsub('[','').gsub(']','')
|
432
|
-
end
|
433
|
-
end
|
434
|
-
#try to find a date starting with the most normal date formats and progressing to more wonky ones
|
435
|
-
@pub_year = get_plain_four_digit_year pruned_dates
|
436
|
-
return @pub_year if @pub_year
|
437
|
-
# Check for years in u notation, e.g., 198u
|
438
|
-
@pub_year = get_u_year pruned_dates
|
439
|
-
return @pub_year if @pub_year
|
440
|
-
@pub_year = get_double_digit_century pruned_dates
|
441
|
-
return @pub_year if @pub_year
|
442
|
-
@pub_year = get_bc_year pruned_dates
|
443
|
-
return @pub_year if @pub_year
|
444
|
-
@pub_year = get_three_digit_year pruned_dates
|
445
|
-
return @pub_year if @pub_year
|
446
|
-
@pub_year = get_single_digit_century pruned_dates
|
447
|
-
return @pub_year if @pub_year
|
448
|
-
end
|
449
|
-
@pub_year=''
|
450
|
-
return nil
|
451
|
-
end
|
452
|
-
|
453
|
-
#creates a date suitable for sorting. Guarnteed to be 4 digits or nil
|
454
|
-
def pub_date_sort
|
455
|
-
pd=nil
|
456
|
-
if pub_date
|
457
|
-
pd=pub_date
|
458
|
-
if pd.length == 3
|
459
|
-
pd='0'+pd
|
460
|
-
end
|
461
|
-
pd=pd.gsub('--','00')
|
462
|
-
end
|
463
|
-
raise "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd and pd.length !=4
|
464
|
-
pd
|
465
|
-
end
|
466
|
-
|
467
|
-
#The year the object was published, , filtered based on max_pub_date and min_pub_date from the config file
|
468
|
-
#@return [String] 4 character year or nil
|
469
|
-
def pub_date
|
470
|
-
pub_year || nil
|
471
|
-
end
|
472
|
-
|
473
|
-
#Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
|
474
|
-
#@return <Array[String]> with values for the pub date facet
|
475
|
-
def pub_date_facet
|
476
|
-
if pub_date
|
477
|
-
if pub_date.start_with?('-')
|
478
|
-
return (pub_date.to_i + 1000).to_s + ' B.C.'
|
479
|
-
end
|
480
|
-
if pub_date.include? '--'
|
481
|
-
cent=pub_date[0,2].to_i
|
482
|
-
cent+=1
|
483
|
-
cent=cent.to_s+'th century'
|
484
|
-
return cent
|
485
|
-
else
|
486
|
-
return pub_date
|
487
|
-
end
|
488
|
-
else
|
489
|
-
nil
|
490
|
-
end
|
491
|
-
end
|
492
|
-
|
215
|
+
# see origin_info.rb (as all this information comes from top level originInfo element)
|
493
216
|
# ---- end PUBLICATION (place, year) ----
|
494
217
|
|
495
218
|
def sw_logger
|
@@ -525,23 +248,23 @@ module Stanford
|
|
525
248
|
when 'still image'
|
526
249
|
val << 'Image'
|
527
250
|
when 'text'
|
528
|
-
val << 'Book' if issuance
|
251
|
+
val << 'Book' if issuance && issuance.include?('monographic')
|
529
252
|
book_genres = ['book chapter', 'Book chapter', 'Book Chapter',
|
530
253
|
'issue brief', 'Issue brief', 'Issue Brief',
|
531
254
|
'librettos', 'Librettos',
|
532
255
|
'project report', 'Project report', 'Project Report',
|
533
256
|
'technical report', 'Technical report', 'Technical Report',
|
534
257
|
'working paper', 'Working paper', 'Working Paper']
|
535
|
-
val << 'Book' if genres
|
258
|
+
val << 'Book' if genres && !(genres & book_genres).empty?
|
536
259
|
conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
|
537
|
-
val << 'Conference Proceedings' if genres
|
538
|
-
val << 'Journal/Periodical' if issuance
|
260
|
+
val << 'Conference Proceedings' if genres && !(genres & conf_pub).empty?
|
261
|
+
val << 'Journal/Periodical' if issuance && issuance.include?('continuing')
|
539
262
|
article = ['article', 'Article']
|
540
|
-
val << 'Journal/Periodical' if genres
|
263
|
+
val << 'Journal/Periodical' if genres && !(genres & article).empty?
|
541
264
|
stu_proj_rpt = ['student project report', 'Student project report', 'Student Project report', 'Student Project Report']
|
542
|
-
val << 'Other' if genres
|
265
|
+
val << 'Other' if genres && !(genres & stu_proj_rpt).empty?
|
543
266
|
thesis = ['thesis', 'Thesis']
|
544
|
-
val << 'Thesis' if genres
|
267
|
+
val << 'Thesis' if genres && !(genres & thesis).empty?
|
545
268
|
when 'three dimensional object'
|
546
269
|
val << 'Other'
|
547
270
|
end
|
@@ -571,7 +294,7 @@ module Stanford
|
|
571
294
|
]
|
572
295
|
if types
|
573
296
|
genres = self.term_values(:genre)
|
574
|
-
issuance = self.term_values([:origin_info
|
297
|
+
issuance = self.term_values([:origin_info, :issuance])
|
575
298
|
types.each do |type|
|
576
299
|
case type
|
577
300
|
when 'cartographic'
|
@@ -583,7 +306,7 @@ module Stanford
|
|
583
306
|
when 'notated music'
|
584
307
|
val << 'Music score'
|
585
308
|
when 'software, multimedia'
|
586
|
-
if genres
|
309
|
+
if genres && (genres.include?('dataset') || genres.include?('Dataset'))
|
587
310
|
val << 'Dataset'
|
588
311
|
else
|
589
312
|
val << 'Software/Multimedia'
|
@@ -595,10 +318,10 @@ module Stanford
|
|
595
318
|
when 'still image'
|
596
319
|
val << 'Image'
|
597
320
|
when 'text'
|
598
|
-
val << 'Book' if genres
|
599
|
-
val << 'Book' if issuance
|
600
|
-
val << 'Book' if genres
|
601
|
-
val << 'Journal/Periodical' if issuance
|
321
|
+
val << 'Book' if genres && !(genres & article_genres).empty?
|
322
|
+
val << 'Book' if issuance && issuance.include?('monographic')
|
323
|
+
val << 'Book' if genres && !(genres & book_genres).empty?
|
324
|
+
val << 'Journal/Periodical' if issuance && issuance.include?('continuing')
|
602
325
|
when 'three dimensional object'
|
603
326
|
val << 'Object'
|
604
327
|
end
|
@@ -633,195 +356,21 @@ module Stanford
|
|
633
356
|
|
634
357
|
# @return [String] value with the numeric catkey in it, or nil if none exists
|
635
358
|
def catkey
|
636
|
-
catkey=self.term_values([:record_info
|
637
|
-
if catkey
|
638
|
-
return catkey.first.
|
359
|
+
catkey = self.term_values([:record_info, :recordIdentifier])
|
360
|
+
if catkey && catkey.length > 0
|
361
|
+
return catkey.first.tr('a', '') # ensure catkey is numeric only
|
639
362
|
end
|
640
363
|
nil
|
641
364
|
end
|
642
|
-
def druid= new_druid
|
643
|
-
@druid=new_druid
|
644
|
-
end
|
645
|
-
def druid
|
646
|
-
@druid ? @druid : 'Unknown item'
|
647
|
-
end
|
648
|
-
|
649
|
-
# protected ----------------------------------------------------------
|
650
|
-
|
651
|
-
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
652
|
-
def subject_names
|
653
|
-
@subject_names ||= self.sw_subject_names
|
654
|
-
end
|
655
|
-
|
656
|
-
# convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
|
657
|
-
def subject_occupations
|
658
|
-
@subject_occupations ||= self.term_values([:subject, :occupation])
|
659
|
-
end
|
660
|
-
|
661
|
-
# convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
|
662
|
-
def subject_temporal
|
663
|
-
@subject_temporal ||= self.term_values([:subject, :temporal])
|
664
|
-
end
|
665
365
|
|
666
|
-
|
667
|
-
|
668
|
-
@subject_titles ||= self.sw_subject_titles
|
366
|
+
def druid=(new_druid)
|
367
|
+
@druid = new_druid
|
669
368
|
end
|
670
369
|
|
671
|
-
|
672
|
-
|
673
|
-
@subject_topics ||= self.term_values([:subject, :topic])
|
674
|
-
end
|
675
|
-
|
676
|
-
#get a 4 digit year like 1865 from the date array
|
677
|
-
def get_plain_four_digit_year dates
|
678
|
-
dates.each do |f_date|
|
679
|
-
matches=f_date.scan(/\d{4}/)
|
680
|
-
if matches.length == 1
|
681
|
-
@pub_year=matches.first
|
682
|
-
return matches.first
|
683
|
-
else
|
684
|
-
#if there are multiples, check for ones with CE after them
|
685
|
-
matches.each do |match|
|
686
|
-
#look for things like '1865-6 CE'
|
687
|
-
pos = f_date.index(Regexp.new(match+'...CE'))
|
688
|
-
pos = pos ? pos.to_i : 0
|
689
|
-
if f_date.include?(match+' CE') or pos > 0
|
690
|
-
@pub_year=match
|
691
|
-
return match
|
692
|
-
end
|
693
|
-
end
|
694
|
-
return matches.first
|
695
|
-
end
|
696
|
-
end
|
697
|
-
return nil
|
698
|
-
end
|
699
|
-
|
700
|
-
# If a year has a "u" in it, replace instances of u with 0
|
701
|
-
# @param [String] dates
|
702
|
-
# @return String
|
703
|
-
def get_u_year dates
|
704
|
-
dates.each do |f_date|
|
705
|
-
# Single digit u notation
|
706
|
-
matches = f_date.scan(/\d{3}u/)
|
707
|
-
if matches.length == 1
|
708
|
-
return matches.first.gsub('u','0')
|
709
|
-
end
|
710
|
-
# Double digit u notation
|
711
|
-
matches = f_date.scan(/\d{2}u{2}/)
|
712
|
-
if matches.length == 1
|
713
|
-
return matches.first.gsub('u','-')
|
714
|
-
end
|
715
|
-
end
|
716
|
-
return nil
|
717
|
-
end
|
718
|
-
|
719
|
-
#get a double digit century like '12th century' from the date array
|
720
|
-
def get_double_digit_century dates
|
721
|
-
dates.each do |f_date|
|
722
|
-
matches=f_date.scan(/\d{2}th/)
|
723
|
-
if matches.length == 1
|
724
|
-
@pub_year=((matches.first[0,2].to_i)-1).to_s+'--'
|
725
|
-
return @pub_year
|
726
|
-
end
|
727
|
-
#if there are multiples, check for ones with CE after them
|
728
|
-
if matches.length > 0
|
729
|
-
matches.each do |match|
|
730
|
-
pos = f_date.index(Regexp.new(match+'...CE'))
|
731
|
-
pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE'))
|
732
|
-
pos = pos ? pos.to_i : 0
|
733
|
-
if f_date.include?(match+' CE') or pos > 0
|
734
|
-
@pub_year=((match[0,2].to_i) - 1).to_s+'--'
|
735
|
-
return @pub_year
|
736
|
-
end
|
737
|
-
end
|
738
|
-
end
|
739
|
-
end
|
740
|
-
return nil
|
741
|
-
end
|
742
|
-
|
743
|
-
#get a 3 digit year like 965 from the date array
|
744
|
-
def get_three_digit_year dates
|
745
|
-
dates.each do |f_date|
|
746
|
-
matches=f_date.scan(/\d{3}/)
|
747
|
-
if matches.length > 0
|
748
|
-
return matches.first
|
749
|
-
end
|
750
|
-
end
|
751
|
-
return nil
|
752
|
-
end
|
753
|
-
#get the 3 digit BC year, return it as a negative, so -700 for 300 BC. Other methods will translate it to proper display, this is good for sorting.
|
754
|
-
def get_bc_year dates
|
755
|
-
dates.each do |f_date|
|
756
|
-
matches=f_date.scan(/\d{3} B.C./)
|
757
|
-
if matches.length > 0
|
758
|
-
bc_year=matches.first[0..2]
|
759
|
-
return (bc_year.to_i-1000).to_s
|
760
|
-
end
|
761
|
-
end
|
762
|
-
return nil
|
763
|
-
end
|
764
|
-
|
765
|
-
#get a single digit century like '9th century' from the date array
|
766
|
-
def get_single_digit_century dates
|
767
|
-
dates.each do |f_date|
|
768
|
-
matches=f_date.scan(/\d{1}th/)
|
769
|
-
if matches.length == 1
|
770
|
-
@pub_year=((matches.first[0,2].to_i)-1).to_s+'--'
|
771
|
-
return @pub_year
|
772
|
-
end
|
773
|
-
#if there are multiples, check for ones with CE after them
|
774
|
-
if matches.length > 0
|
775
|
-
matches.each do |match|
|
776
|
-
pos = f_date.index(Regexp.new(match+'...CE'))
|
777
|
-
pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE'))
|
778
|
-
pos = pos ? pos.to_i : 0
|
779
|
-
if f_date.include?(match+' CE') or pos > 0
|
780
|
-
@pub_year=((match[0,1].to_i) - 1).to_s+'--'
|
781
|
-
return @pub_year
|
782
|
-
end
|
783
|
-
end
|
784
|
-
end
|
785
|
-
end
|
786
|
-
return nil
|
787
|
-
end
|
788
|
-
|
789
|
-
# @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding="marc"
|
790
|
-
def dates_marc_encoding
|
791
|
-
@dates_marc_encoding ||= begin
|
792
|
-
parse_dates_from_originInfo
|
793
|
-
@dates_marc_encoding
|
794
|
-
end
|
795
|
-
end
|
796
|
-
|
797
|
-
# @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding not "marc"
|
798
|
-
def dates_no_marc_encoding
|
799
|
-
@dates_no_marc_encoding ||= begin
|
800
|
-
parse_dates_from_originInfo
|
801
|
-
@dates_no_marc_encoding
|
802
|
-
end
|
370
|
+
def druid
|
371
|
+
@druid ? @druid : 'Unknown item'
|
803
372
|
end
|
804
373
|
|
805
|
-
# Populate @dates_marc_encoding and @dates_no_marc_encoding from dateIssued and dateCreated tags from origin_info
|
806
|
-
# with and without encoding=marc
|
807
|
-
def parse_dates_from_originInfo
|
808
|
-
@dates_marc_encoding = []
|
809
|
-
@dates_no_marc_encoding = []
|
810
|
-
self.origin_info.dateIssued.each { |di|
|
811
|
-
if di.encoding == "marc"
|
812
|
-
@dates_marc_encoding << di.text
|
813
|
-
else
|
814
|
-
@dates_no_marc_encoding << di.text
|
815
|
-
end
|
816
|
-
}
|
817
|
-
self.origin_info.dateCreated.each { |dc|
|
818
|
-
if dc.encoding == "marc"
|
819
|
-
@dates_marc_encoding << dc.text
|
820
|
-
else
|
821
|
-
@dates_no_marc_encoding << dc.text
|
822
|
-
end
|
823
|
-
}
|
824
|
-
end
|
825
374
|
end # class Record
|
826
375
|
end # Module Mods
|
827
376
|
end # Module Stanford
|