stanford-mods 1.3.3 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +1 -0
- data/.rubocop.yml +4 -0
- data/Gemfile +1 -0
- data/lib/stanford-mods.rb +5 -5
- data/lib/stanford-mods/date_parsing.rb +245 -0
- data/lib/stanford-mods/origin_info.rb +411 -0
- data/lib/stanford-mods/searchworks.rb +23 -474
- data/lib/stanford-mods/searchworks_subjects.rb +208 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +746 -0
- data/spec/fixtures/spotlight_pub_date_data.rb +316 -0
- data/spec/origin_info_spec.rb +449 -0
- data/spec/searchworks_pub_dates_spec.rb +166 -163
- data/spec/spec_helper.rb +16 -5
- data/stanford-mods.gemspec +2 -0
- metadata +25 -2
@@ -1,5 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
require 'stanford-mods/searchworks_languages'
|
3
|
+
require 'stanford-mods/searchworks_subjects'
|
3
4
|
require 'logger'
|
4
5
|
require 'mods'
|
5
6
|
|
@@ -207,289 +208,11 @@ module Stanford
|
|
207
208
|
# ---- end TITLE ----
|
208
209
|
|
209
210
|
# ---- SUBJECT ----
|
210
|
-
|
211
|
-
# Values are the contents of:
|
212
|
-
# subject/geographic
|
213
|
-
# subject/hierarchicalGeographic
|
214
|
-
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
215
|
-
# @param [String] sep - the separator string for joining hierarchicalGeographic sub elements
|
216
|
-
# @return [Array<String>] values for geographic_search Solr field for this document or [] if none
|
217
|
-
def sw_geographic_search(sep = ' ')
|
218
|
-
result = term_values([:subject, :geographic]) || []
|
219
|
-
|
220
|
-
# hierarchicalGeographic has sub elements
|
221
|
-
@mods_ng_xml.subject.hierarchicalGeographic.each { |hg_node|
|
222
|
-
hg_vals = []
|
223
|
-
hg_node.element_children.each { |e|
|
224
|
-
hg_vals << e.text unless e.text.empty?
|
225
|
-
}
|
226
|
-
result << hg_vals.join(sep) unless hg_vals.empty?
|
227
|
-
}
|
228
|
-
|
229
|
-
trans_code_vals = @mods_ng_xml.subject.geographicCode.translated_value
|
230
|
-
if trans_code_vals
|
231
|
-
trans_code_vals.each { |val|
|
232
|
-
result << val if !result.include?(val)
|
233
|
-
}
|
234
|
-
end
|
235
|
-
|
236
|
-
result
|
237
|
-
end
|
238
|
-
|
239
|
-
# Values are the contents of:
|
240
|
-
# subject/name/namePart
|
241
|
-
# "Values from namePart subelements should be concatenated in the order they appear (e.g. "Shakespeare, William, 1564-1616")"
|
242
|
-
# @param [String] sep - the separator string for joining namePart sub elements
|
243
|
-
# @return [Array<String>] values for names inside subject elements or [] if none
|
244
|
-
def sw_subject_names(sep = ', ')
|
245
|
-
result = []
|
246
|
-
@mods_ng_xml.subject.name_el.select { |n_el| n_el.namePart }.each { |name_el_w_np|
|
247
|
-
parts = name_el_w_np.namePart.map { |npn| npn.text unless npn.text.empty? }.compact
|
248
|
-
result << parts.join(sep).strip unless parts.empty?
|
249
|
-
}
|
250
|
-
result
|
251
|
-
end
|
252
|
-
|
253
|
-
# Values are the contents of:
|
254
|
-
# subject/titleInfo/(subelements)
|
255
|
-
# @param [String] sep - the separator string for joining titleInfo sub elements
|
256
|
-
# @return [Array<String>] values for titles inside subject elements or [] if none
|
257
|
-
def sw_subject_titles(sep = ' ')
|
258
|
-
result = []
|
259
|
-
@mods_ng_xml.subject.titleInfo.each { |ti_el|
|
260
|
-
parts = ti_el.element_children.map { |el| el.text unless el.text.empty? }.compact
|
261
|
-
result << parts.join(sep).strip unless parts.empty?
|
262
|
-
}
|
263
|
-
result
|
264
|
-
end
|
265
|
-
|
266
|
-
# Values are the contents of:
|
267
|
-
# mods/genre
|
268
|
-
# mods/subject/topic
|
269
|
-
# @return [Array<String>] values for the topic_search Solr field for this document or nil if none
|
270
|
-
def topic_search
|
271
|
-
@topic_search ||= begin
|
272
|
-
vals = self.term_values(:genre) || []
|
273
|
-
vals.concat(subject_topics) if subject_topics
|
274
|
-
vals.empty? ? nil : vals
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
# Values are the contents of:
|
279
|
-
# subject/topic
|
280
|
-
# subject/name
|
281
|
-
# subject/title
|
282
|
-
# subject/occupation
|
283
|
-
# with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
284
|
-
# @return [Array<String>] values for the topic_facet Solr field for this document or nil if none
|
285
|
-
def topic_facet
|
286
|
-
vals = subject_topics ? Array.new(subject_topics) : []
|
287
|
-
vals.concat(subject_names) if subject_names
|
288
|
-
vals.concat(subject_titles) if subject_titles
|
289
|
-
vals.concat(subject_occupations) if subject_occupations
|
290
|
-
vals.map! { |val|
|
291
|
-
v = val.sub(/[\\,;]$/, '')
|
292
|
-
v.strip
|
293
|
-
}
|
294
|
-
vals.empty? ? nil : vals
|
295
|
-
end
|
296
|
-
|
297
|
-
# geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
298
|
-
# @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none
|
299
|
-
def geographic_facet
|
300
|
-
geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search
|
301
|
-
end
|
302
|
-
|
303
|
-
# subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed
|
304
|
-
# @return [Array<String>] values for the era_facet Solr field for this document or nil if none
|
305
|
-
def era_facet
|
306
|
-
subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal
|
307
|
-
end
|
308
|
-
|
309
|
-
# Values are the contents of:
|
310
|
-
# subject/geographic
|
311
|
-
# subject/hierarchicalGeographic
|
312
|
-
# subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields)
|
313
|
-
# @return [Array<String>] values for the geographic_search Solr field for this document or nil if none
|
314
|
-
def geographic_search
|
315
|
-
@geographic_search ||= begin
|
316
|
-
result = self.sw_geographic_search
|
317
|
-
|
318
|
-
# TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger
|
319
|
-
# print a message for any unrecognized encodings
|
320
|
-
xvals = self.subject.geographicCode.translated_value
|
321
|
-
codes = self.term_values([:subject, :geographicCode])
|
322
|
-
if codes && codes.size > xvals.size
|
323
|
-
self.subject.geographicCode.each { |n|
|
324
|
-
if n.authority != 'marcgac' && n.authority != 'marccountry'
|
325
|
-
sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}")
|
326
|
-
end
|
327
|
-
}
|
328
|
-
end
|
329
|
-
|
330
|
-
# FIXME: stanford-mods should be returning [], not nil ...
|
331
|
-
return nil if !result || result.empty?
|
332
|
-
result
|
333
|
-
end
|
334
|
-
end
|
335
|
-
|
336
|
-
# Values are the contents of:
|
337
|
-
# subject/name
|
338
|
-
# subject/occupation - no subelements
|
339
|
-
# subject/titleInfo
|
340
|
-
# @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none
|
341
|
-
def subject_other_search
|
342
|
-
@subject_other_search ||= begin
|
343
|
-
vals = subject_occupations ? Array.new(subject_occupations) : []
|
344
|
-
vals.concat(subject_names) if subject_names
|
345
|
-
vals.concat(subject_titles) if subject_titles
|
346
|
-
vals.empty? ? nil : vals
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
# Values are the contents of:
|
351
|
-
# subject/temporal
|
352
|
-
# subject/genre
|
353
|
-
# @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none
|
354
|
-
def subject_other_subvy_search
|
355
|
-
@subject_other_subvy_search ||= begin
|
356
|
-
vals = subject_temporal ? Array.new(subject_temporal) : []
|
357
|
-
gvals = self.term_values([:subject, :genre])
|
358
|
-
vals.concat(gvals) if gvals
|
359
|
-
|
360
|
-
# print a message for any temporal encodings
|
361
|
-
self.subject.temporal.each { |n|
|
362
|
-
sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty?
|
363
|
-
}
|
364
|
-
|
365
|
-
vals.empty? ? nil : vals
|
366
|
-
end
|
367
|
-
end
|
368
|
-
|
369
|
-
# Values are the contents of:
|
370
|
-
# all subject subelements except subject/cartographic plus genre top level element
|
371
|
-
# @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none
|
372
|
-
def subject_all_search
|
373
|
-
vals = topic_search ? Array.new(topic_search) : []
|
374
|
-
vals.concat(geographic_search) if geographic_search
|
375
|
-
vals.concat(subject_other_search) if subject_other_search
|
376
|
-
vals.concat(subject_other_subvy_search) if subject_other_subvy_search
|
377
|
-
vals.empty? ? nil : vals
|
378
|
-
end
|
379
|
-
|
211
|
+
# see searchworks_subjects.rb
|
380
212
|
# ---- end SUBJECT ----
|
381
213
|
|
382
214
|
# ---- PUBLICATION (place, year) ----
|
383
|
-
|
384
|
-
vals = self.term_values([:origin_info,:place,:placeTerm])
|
385
|
-
vals
|
386
|
-
end
|
387
|
-
|
388
|
-
# For the date display only, the first place to look is in the dates without encoding=marc array.
|
389
|
-
# If no such dates, select the first date in the dates_marc_encoding array. Otherwise return nil
|
390
|
-
# @return [String] value for the pub_date_display Solr field for this document or nil if none
|
391
|
-
def pub_date_display
|
392
|
-
return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
|
393
|
-
return dates_marc_encoding.first unless dates_marc_encoding.empty?
|
394
|
-
return nil
|
395
|
-
end
|
396
|
-
|
397
|
-
# For the date indexing, sorting and faceting, the first place to look is in the dates with encoding=marc array.
|
398
|
-
# If that doesn't exist, look in the dates without encoding=marc array. Otherwise return nil
|
399
|
-
# @return [Array<String>] values for the date Solr field for this document or nil if none
|
400
|
-
def pub_dates
|
401
|
-
return dates_marc_encoding unless dates_marc_encoding.empty?
|
402
|
-
return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
|
403
|
-
return nil
|
404
|
-
end
|
405
|
-
|
406
|
-
def is_number?(object)
|
407
|
-
true if Integer(object) rescue false
|
408
|
-
end
|
409
|
-
def is_date?(object)
|
410
|
-
true if Date.parse(object) rescue false
|
411
|
-
end
|
412
|
-
|
413
|
-
# Get the publish year from mods
|
414
|
-
# @return [String] 4 character year or nil if no valid date was found
|
415
|
-
def pub_year
|
416
|
-
#use the cached year if there is one
|
417
|
-
if @pub_year
|
418
|
-
if @pub_year == ''
|
419
|
-
return nil
|
420
|
-
end
|
421
|
-
return @pub_year
|
422
|
-
end
|
423
|
-
dates = pub_dates
|
424
|
-
if dates
|
425
|
-
pruned_dates = []
|
426
|
-
dates.each do |f_date|
|
427
|
-
#remove ? and []
|
428
|
-
if (f_date.length == 4 && f_date.end_with?('?'))
|
429
|
-
pruned_dates << f_date.gsub('?','0')
|
430
|
-
else
|
431
|
-
pruned_dates << f_date.gsub('?','').gsub('[','').gsub(']','')
|
432
|
-
end
|
433
|
-
end
|
434
|
-
#try to find a date starting with the most normal date formats and progressing to more wonky ones
|
435
|
-
@pub_year = get_plain_four_digit_year pruned_dates
|
436
|
-
return @pub_year if @pub_year
|
437
|
-
# Check for years in u notation, e.g., 198u
|
438
|
-
@pub_year = get_u_year pruned_dates
|
439
|
-
return @pub_year if @pub_year
|
440
|
-
@pub_year = get_double_digit_century pruned_dates
|
441
|
-
return @pub_year if @pub_year
|
442
|
-
@pub_year = get_bc_year pruned_dates
|
443
|
-
return @pub_year if @pub_year
|
444
|
-
@pub_year = get_three_digit_year pruned_dates
|
445
|
-
return @pub_year if @pub_year
|
446
|
-
@pub_year = get_single_digit_century pruned_dates
|
447
|
-
return @pub_year if @pub_year
|
448
|
-
end
|
449
|
-
@pub_year=''
|
450
|
-
return nil
|
451
|
-
end
|
452
|
-
|
453
|
-
#creates a date suitable for sorting. Guarnteed to be 4 digits or nil
|
454
|
-
def pub_date_sort
|
455
|
-
pd=nil
|
456
|
-
if pub_date
|
457
|
-
pd=pub_date
|
458
|
-
if pd.length == 3
|
459
|
-
pd='0'+pd
|
460
|
-
end
|
461
|
-
pd=pd.gsub('--','00')
|
462
|
-
end
|
463
|
-
raise "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd and pd.length !=4
|
464
|
-
pd
|
465
|
-
end
|
466
|
-
|
467
|
-
#The year the object was published, , filtered based on max_pub_date and min_pub_date from the config file
|
468
|
-
#@return [String] 4 character year or nil
|
469
|
-
def pub_date
|
470
|
-
pub_year || nil
|
471
|
-
end
|
472
|
-
|
473
|
-
#Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date
|
474
|
-
#@return <Array[String]> with values for the pub date facet
|
475
|
-
def pub_date_facet
|
476
|
-
if pub_date
|
477
|
-
if pub_date.start_with?('-')
|
478
|
-
return (pub_date.to_i + 1000).to_s + ' B.C.'
|
479
|
-
end
|
480
|
-
if pub_date.include? '--'
|
481
|
-
cent=pub_date[0,2].to_i
|
482
|
-
cent+=1
|
483
|
-
cent=cent.to_s+'th century'
|
484
|
-
return cent
|
485
|
-
else
|
486
|
-
return pub_date
|
487
|
-
end
|
488
|
-
else
|
489
|
-
nil
|
490
|
-
end
|
491
|
-
end
|
492
|
-
|
215
|
+
# see origin_info.rb (as all this information comes from top level originInfo element)
|
493
216
|
# ---- end PUBLICATION (place, year) ----
|
494
217
|
|
495
218
|
def sw_logger
|
@@ -525,23 +248,23 @@ module Stanford
|
|
525
248
|
when 'still image'
|
526
249
|
val << 'Image'
|
527
250
|
when 'text'
|
528
|
-
val << 'Book' if issuance
|
251
|
+
val << 'Book' if issuance && issuance.include?('monographic')
|
529
252
|
book_genres = ['book chapter', 'Book chapter', 'Book Chapter',
|
530
253
|
'issue brief', 'Issue brief', 'Issue Brief',
|
531
254
|
'librettos', 'Librettos',
|
532
255
|
'project report', 'Project report', 'Project Report',
|
533
256
|
'technical report', 'Technical report', 'Technical Report',
|
534
257
|
'working paper', 'Working paper', 'Working Paper']
|
535
|
-
val << 'Book' if genres
|
258
|
+
val << 'Book' if genres && !(genres & book_genres).empty?
|
536
259
|
conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
|
537
|
-
val << 'Conference Proceedings' if genres
|
538
|
-
val << 'Journal/Periodical' if issuance
|
260
|
+
val << 'Conference Proceedings' if genres && !(genres & conf_pub).empty?
|
261
|
+
val << 'Journal/Periodical' if issuance && issuance.include?('continuing')
|
539
262
|
article = ['article', 'Article']
|
540
|
-
val << 'Journal/Periodical' if genres
|
263
|
+
val << 'Journal/Periodical' if genres && !(genres & article).empty?
|
541
264
|
stu_proj_rpt = ['student project report', 'Student project report', 'Student Project report', 'Student Project Report']
|
542
|
-
val << 'Other' if genres
|
265
|
+
val << 'Other' if genres && !(genres & stu_proj_rpt).empty?
|
543
266
|
thesis = ['thesis', 'Thesis']
|
544
|
-
val << 'Thesis' if genres
|
267
|
+
val << 'Thesis' if genres && !(genres & thesis).empty?
|
545
268
|
when 'three dimensional object'
|
546
269
|
val << 'Other'
|
547
270
|
end
|
@@ -571,7 +294,7 @@ module Stanford
|
|
571
294
|
]
|
572
295
|
if types
|
573
296
|
genres = self.term_values(:genre)
|
574
|
-
issuance = self.term_values([:origin_info
|
297
|
+
issuance = self.term_values([:origin_info, :issuance])
|
575
298
|
types.each do |type|
|
576
299
|
case type
|
577
300
|
when 'cartographic'
|
@@ -583,7 +306,7 @@ module Stanford
|
|
583
306
|
when 'notated music'
|
584
307
|
val << 'Music score'
|
585
308
|
when 'software, multimedia'
|
586
|
-
if genres
|
309
|
+
if genres && (genres.include?('dataset') || genres.include?('Dataset'))
|
587
310
|
val << 'Dataset'
|
588
311
|
else
|
589
312
|
val << 'Software/Multimedia'
|
@@ -595,10 +318,10 @@ module Stanford
|
|
595
318
|
when 'still image'
|
596
319
|
val << 'Image'
|
597
320
|
when 'text'
|
598
|
-
val << 'Book' if genres
|
599
|
-
val << 'Book' if issuance
|
600
|
-
val << 'Book' if genres
|
601
|
-
val << 'Journal/Periodical' if issuance
|
321
|
+
val << 'Book' if genres && !(genres & article_genres).empty?
|
322
|
+
val << 'Book' if issuance && issuance.include?('monographic')
|
323
|
+
val << 'Book' if genres && !(genres & book_genres).empty?
|
324
|
+
val << 'Journal/Periodical' if issuance && issuance.include?('continuing')
|
602
325
|
when 'three dimensional object'
|
603
326
|
val << 'Object'
|
604
327
|
end
|
@@ -633,195 +356,21 @@ module Stanford
|
|
633
356
|
|
634
357
|
# @return [String] value with the numeric catkey in it, or nil if none exists
|
635
358
|
def catkey
|
636
|
-
catkey=self.term_values([:record_info
|
637
|
-
if catkey
|
638
|
-
return catkey.first.
|
359
|
+
catkey = self.term_values([:record_info, :recordIdentifier])
|
360
|
+
if catkey && catkey.length > 0
|
361
|
+
return catkey.first.tr('a', '') # ensure catkey is numeric only
|
639
362
|
end
|
640
363
|
nil
|
641
364
|
end
|
642
|
-
def druid= new_druid
|
643
|
-
@druid=new_druid
|
644
|
-
end
|
645
|
-
def druid
|
646
|
-
@druid ? @druid : 'Unknown item'
|
647
|
-
end
|
648
|
-
|
649
|
-
# protected ----------------------------------------------------------
|
650
|
-
|
651
|
-
# convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times)
|
652
|
-
def subject_names
|
653
|
-
@subject_names ||= self.sw_subject_names
|
654
|
-
end
|
655
|
-
|
656
|
-
# convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times)
|
657
|
-
def subject_occupations
|
658
|
-
@subject_occupations ||= self.term_values([:subject, :occupation])
|
659
|
-
end
|
660
|
-
|
661
|
-
# convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times)
|
662
|
-
def subject_temporal
|
663
|
-
@subject_temporal ||= self.term_values([:subject, :temporal])
|
664
|
-
end
|
665
365
|
|
666
|
-
|
667
|
-
|
668
|
-
@subject_titles ||= self.sw_subject_titles
|
366
|
+
def druid=(new_druid)
|
367
|
+
@druid = new_druid
|
669
368
|
end
|
670
369
|
|
671
|
-
|
672
|
-
|
673
|
-
@subject_topics ||= self.term_values([:subject, :topic])
|
674
|
-
end
|
675
|
-
|
676
|
-
#get a 4 digit year like 1865 from the date array
|
677
|
-
def get_plain_four_digit_year dates
|
678
|
-
dates.each do |f_date|
|
679
|
-
matches=f_date.scan(/\d{4}/)
|
680
|
-
if matches.length == 1
|
681
|
-
@pub_year=matches.first
|
682
|
-
return matches.first
|
683
|
-
else
|
684
|
-
#if there are multiples, check for ones with CE after them
|
685
|
-
matches.each do |match|
|
686
|
-
#look for things like '1865-6 CE'
|
687
|
-
pos = f_date.index(Regexp.new(match+'...CE'))
|
688
|
-
pos = pos ? pos.to_i : 0
|
689
|
-
if f_date.include?(match+' CE') or pos > 0
|
690
|
-
@pub_year=match
|
691
|
-
return match
|
692
|
-
end
|
693
|
-
end
|
694
|
-
return matches.first
|
695
|
-
end
|
696
|
-
end
|
697
|
-
return nil
|
698
|
-
end
|
699
|
-
|
700
|
-
# If a year has a "u" in it, replace instances of u with 0
|
701
|
-
# @param [String] dates
|
702
|
-
# @return String
|
703
|
-
def get_u_year dates
|
704
|
-
dates.each do |f_date|
|
705
|
-
# Single digit u notation
|
706
|
-
matches = f_date.scan(/\d{3}u/)
|
707
|
-
if matches.length == 1
|
708
|
-
return matches.first.gsub('u','0')
|
709
|
-
end
|
710
|
-
# Double digit u notation
|
711
|
-
matches = f_date.scan(/\d{2}u{2}/)
|
712
|
-
if matches.length == 1
|
713
|
-
return matches.first.gsub('u','-')
|
714
|
-
end
|
715
|
-
end
|
716
|
-
return nil
|
717
|
-
end
|
718
|
-
|
719
|
-
#get a double digit century like '12th century' from the date array
|
720
|
-
def get_double_digit_century dates
|
721
|
-
dates.each do |f_date|
|
722
|
-
matches=f_date.scan(/\d{2}th/)
|
723
|
-
if matches.length == 1
|
724
|
-
@pub_year=((matches.first[0,2].to_i)-1).to_s+'--'
|
725
|
-
return @pub_year
|
726
|
-
end
|
727
|
-
#if there are multiples, check for ones with CE after them
|
728
|
-
if matches.length > 0
|
729
|
-
matches.each do |match|
|
730
|
-
pos = f_date.index(Regexp.new(match+'...CE'))
|
731
|
-
pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE'))
|
732
|
-
pos = pos ? pos.to_i : 0
|
733
|
-
if f_date.include?(match+' CE') or pos > 0
|
734
|
-
@pub_year=((match[0,2].to_i) - 1).to_s+'--'
|
735
|
-
return @pub_year
|
736
|
-
end
|
737
|
-
end
|
738
|
-
end
|
739
|
-
end
|
740
|
-
return nil
|
741
|
-
end
|
742
|
-
|
743
|
-
#get a 3 digit year like 965 from the date array
|
744
|
-
def get_three_digit_year dates
|
745
|
-
dates.each do |f_date|
|
746
|
-
matches=f_date.scan(/\d{3}/)
|
747
|
-
if matches.length > 0
|
748
|
-
return matches.first
|
749
|
-
end
|
750
|
-
end
|
751
|
-
return nil
|
752
|
-
end
|
753
|
-
#get the 3 digit BC year, return it as a negative, so -700 for 300 BC. Other methods will translate it to proper display, this is good for sorting.
|
754
|
-
def get_bc_year dates
|
755
|
-
dates.each do |f_date|
|
756
|
-
matches=f_date.scan(/\d{3} B.C./)
|
757
|
-
if matches.length > 0
|
758
|
-
bc_year=matches.first[0..2]
|
759
|
-
return (bc_year.to_i-1000).to_s
|
760
|
-
end
|
761
|
-
end
|
762
|
-
return nil
|
763
|
-
end
|
764
|
-
|
765
|
-
#get a single digit century like '9th century' from the date array
|
766
|
-
def get_single_digit_century dates
|
767
|
-
dates.each do |f_date|
|
768
|
-
matches=f_date.scan(/\d{1}th/)
|
769
|
-
if matches.length == 1
|
770
|
-
@pub_year=((matches.first[0,2].to_i)-1).to_s+'--'
|
771
|
-
return @pub_year
|
772
|
-
end
|
773
|
-
#if there are multiples, check for ones with CE after them
|
774
|
-
if matches.length > 0
|
775
|
-
matches.each do |match|
|
776
|
-
pos = f_date.index(Regexp.new(match+'...CE'))
|
777
|
-
pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE'))
|
778
|
-
pos = pos ? pos.to_i : 0
|
779
|
-
if f_date.include?(match+' CE') or pos > 0
|
780
|
-
@pub_year=((match[0,1].to_i) - 1).to_s+'--'
|
781
|
-
return @pub_year
|
782
|
-
end
|
783
|
-
end
|
784
|
-
end
|
785
|
-
end
|
786
|
-
return nil
|
787
|
-
end
|
788
|
-
|
789
|
-
# @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding="marc"
|
790
|
-
def dates_marc_encoding
|
791
|
-
@dates_marc_encoding ||= begin
|
792
|
-
parse_dates_from_originInfo
|
793
|
-
@dates_marc_encoding
|
794
|
-
end
|
795
|
-
end
|
796
|
-
|
797
|
-
# @return [Array<String>] dates from dateIssued and dateCreated tags from origin_info with encoding not "marc"
|
798
|
-
def dates_no_marc_encoding
|
799
|
-
@dates_no_marc_encoding ||= begin
|
800
|
-
parse_dates_from_originInfo
|
801
|
-
@dates_no_marc_encoding
|
802
|
-
end
|
370
|
+
def druid
|
371
|
+
@druid ? @druid : 'Unknown item'
|
803
372
|
end
|
804
373
|
|
805
|
-
# Populate @dates_marc_encoding and @dates_no_marc_encoding from dateIssued and dateCreated tags from origin_info
|
806
|
-
# with and without encoding=marc
|
807
|
-
def parse_dates_from_originInfo
|
808
|
-
@dates_marc_encoding = []
|
809
|
-
@dates_no_marc_encoding = []
|
810
|
-
self.origin_info.dateIssued.each { |di|
|
811
|
-
if di.encoding == "marc"
|
812
|
-
@dates_marc_encoding << di.text
|
813
|
-
else
|
814
|
-
@dates_no_marc_encoding << di.text
|
815
|
-
end
|
816
|
-
}
|
817
|
-
self.origin_info.dateCreated.each { |dc|
|
818
|
-
if dc.encoding == "marc"
|
819
|
-
@dates_marc_encoding << dc.text
|
820
|
-
else
|
821
|
-
@dates_no_marc_encoding << dc.text
|
822
|
-
end
|
823
|
-
}
|
824
|
-
end
|
825
374
|
end # class Record
|
826
375
|
end # Module Mods
|
827
376
|
end # Module Stanford
|