stanford-mods 2.2.0 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop_todo.yml +358 -3
- data/.travis.yml +1 -4
- data/Gemfile +3 -4
- data/Rakefile +3 -3
- data/lib/marc_countries.rb +381 -381
- data/lib/stanford-mods.rb +1 -0
- data/lib/stanford-mods/coordinate.rb +14 -36
- data/lib/stanford-mods/date_parsing.rb +7 -8
- data/lib/stanford-mods/geo_spatial.rb +32 -12
- data/lib/stanford-mods/geo_utils.rb +28 -0
- data/lib/stanford-mods/imprint.rb +1 -1
- data/lib/stanford-mods/name.rb +7 -15
- data/lib/stanford-mods/origin_info.rb +35 -37
- data/lib/stanford-mods/physical_location.rb +6 -4
- data/lib/stanford-mods/searchworks.rb +149 -182
- data/lib/stanford-mods/searchworks_languages.rb +3 -3
- data/lib/stanford-mods/searchworks_subjects.rb +16 -18
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +17 -19
- data/spec/fixtures/searchworks_imprint_data.rb +1 -1
- data/spec/fixtures/searchworks_pub_date_data.rb +1 -1
- data/spec/fixtures/spotlight_pub_date_data.rb +1 -1
- data/spec/geo_spatial_spec.rb +51 -5
- data/spec/imprint_spec.rb +4 -6
- data/spec/lib/stanford-mods/coordinate_spec.rb +0 -2
- data/spec/name_spec.rb +0 -2
- data/spec/origin_info_spec.rb +26 -28
- data/spec/physical_location_spec.rb +0 -3
- data/spec/searchworks_basic_spec.rb +1 -2
- data/spec/searchworks_format_spec.rb +35 -39
- data/spec/searchworks_pub_dates_spec.rb +0 -5
- data/spec/searchworks_spec.rb +1 -7
- data/spec/searchworks_subject_raw_spec.rb +0 -5
- data/spec/searchworks_subject_spec.rb +2 -9
- data/spec/searchworks_title_spec.rb +0 -2
- data/spec/spec_helper.rb +2 -5
- data/spec/sw_publication_spec.rb +1 -2
- data/stanford-mods.gemspec +1 -2
- metadata +4 -4
data/lib/stanford-mods.rb
CHANGED
@@ -4,26 +4,28 @@ module Stanford
|
|
4
4
|
##
|
5
5
|
# Geospatial coordinate parsing
|
6
6
|
class Coordinate
|
7
|
+
require 'stanford-mods/geo_utils'
|
8
|
+
include ::Stanford::Mods::GeoUtils
|
9
|
+
|
7
10
|
attr_reader :value
|
8
11
|
|
9
12
|
def initialize(value)
|
10
13
|
@value = value
|
11
14
|
end
|
12
15
|
|
13
|
-
#
|
16
|
+
# @return [String] the coordinate in WKT/CQL ENVELOPE representation
|
14
17
|
def as_envelope
|
15
18
|
return unless valid?
|
16
|
-
|
17
19
|
"ENVELOPE(#{bounds[:min_x]}, #{bounds[:max_x]}, #{bounds[:max_y]}, #{bounds[:min_y]})"
|
18
20
|
end
|
19
21
|
|
20
|
-
#
|
22
|
+
# @return [String] the coordinate in Solr 4.x+ bbox-format representation
|
21
23
|
def as_bbox
|
22
24
|
return unless valid?
|
23
|
-
|
24
25
|
"#{bounds[:min_x]} #{bounds[:min_y]} #{bounds[:max_x]} #{bounds[:max_y]}"
|
25
26
|
end
|
26
27
|
|
28
|
+
# @return [Boolean] true iff the coordinates are geographically valid
|
27
29
|
def valid?
|
28
30
|
return false if bounds.empty?
|
29
31
|
|
@@ -40,42 +42,18 @@ module Stanford
|
|
40
42
|
|
41
43
|
def bounds
|
42
44
|
@bounds ||= begin
|
43
|
-
matches =
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
{ min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
|
50
|
-
else
|
51
|
-
{}
|
52
|
-
end
|
45
|
+
matches = cleaner_coordinate(value).match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
|
46
|
+
return {} unless matches
|
47
|
+
min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
|
48
|
+
min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
|
49
|
+
{ min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
|
53
50
|
end
|
54
51
|
end
|
55
52
|
|
53
|
+
# @deprecated see GeoUtils
|
56
54
|
def coord
|
57
|
-
|
58
|
-
|
59
|
-
if matches
|
60
|
-
matches[1]
|
61
|
-
else
|
62
|
-
value
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def coord_to_decimal(point)
|
67
|
-
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
68
|
-
match = regex.match(point)
|
69
|
-
|
70
|
-
return Float::INFINITY unless match
|
71
|
-
|
72
|
-
dec = match['deg'].to_i
|
73
|
-
dec += match['min'].to_f / 60
|
74
|
-
dec += match['sec'].to_f / 60 / 60
|
75
|
-
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
76
|
-
|
77
|
-
dec
|
55
|
+
cleaner_coordinate(value)
|
78
56
|
end
|
79
57
|
end
|
80
58
|
end
|
81
|
-
end
|
59
|
+
end
|
@@ -6,7 +6,6 @@ module Stanford
|
|
6
6
|
# - we may want an integer or date sort field as well as lexical
|
7
7
|
# - we could add methods like my_date.bc?
|
8
8
|
class DateParsing
|
9
|
-
|
10
9
|
# get display value for year, generally an explicit year or "17th century" or "5 B.C." or "1950s" or '845 A.D.'
|
11
10
|
# @return [String, nil] display value for year if we could parse one, nil otherwise
|
12
11
|
def self.date_str_for_display(date_str)
|
@@ -196,7 +195,7 @@ module Stanford
|
|
196
195
|
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
197
196
|
def sortable_year_for_century
|
198
197
|
return unless orig_date_str
|
199
|
-
return if orig_date_str
|
198
|
+
return if orig_date_str =~ /B\.C\./
|
200
199
|
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
201
200
|
if century_matches
|
202
201
|
return $1 + '00' if $1.length == 2
|
@@ -215,7 +214,7 @@ module Stanford
|
|
215
214
|
# @return [String, nil] yy(th) Century if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
216
215
|
def display_str_for_century
|
217
216
|
return unless orig_date_str
|
218
|
-
return if orig_date_str
|
217
|
+
return if orig_date_str =~ /B\.C\./
|
219
218
|
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
220
219
|
return century_str_matches.to_s if century_str_matches
|
221
220
|
|
@@ -263,7 +262,7 @@ module Stanford
|
|
263
262
|
# @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
|
264
263
|
def sortable_year_str_for_early_numeric
|
265
264
|
return unless orig_date_str.match(EARLY_NUMERIC)
|
266
|
-
if orig_date_str
|
265
|
+
if orig_date_str =~ /^\-/
|
267
266
|
# negative number becomes x - 1000 for sorting; -005 for -995
|
268
267
|
num = orig_date_str[1..-1].to_i - 1000
|
269
268
|
return '-' + num.to_s[1..-1].rjust(3, '0')
|
@@ -276,7 +275,7 @@ module Stanford
|
|
276
275
|
# @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
|
277
276
|
def sortable_year_int_for_early_numeric
|
278
277
|
return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
|
279
|
-
orig_date_str.to_i if orig_date_str
|
278
|
+
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
280
279
|
end
|
281
280
|
|
282
281
|
# get display value for date String containing yyy, yy, y, -y, -yy, -yyy
|
@@ -290,7 +289,7 @@ module Stanford
|
|
290
289
|
# return 1 B.C. when the date is 0 since there is no 0 year
|
291
290
|
return '1 B.C.' if orig_date_str == '0'
|
292
291
|
# negative number becomes B.C.
|
293
|
-
return
|
292
|
+
return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str =~ /^\-/
|
294
293
|
# remove leading 0s from early dates
|
295
294
|
"#{orig_date_str.to_i} A.D."
|
296
295
|
end
|
@@ -301,10 +300,10 @@ module Stanford
|
|
301
300
|
# has made this method bogus.
|
302
301
|
# @return [String, nil] sortable 4 digit year (e.g. 1865, 0950) if orig_date_str is parseable via ruby Date, nil otherwise
|
303
302
|
def year_via_ruby_parsing
|
304
|
-
return unless orig_date_str
|
303
|
+
return unless orig_date_str =~ /\d\d/ # need at least 2 digits
|
305
304
|
# need more in string than only 2 digits
|
306
305
|
return if orig_date_str.match(/^\d\d$/) || orig_date_str.match(/^\D*\d\d\D*$/)
|
307
|
-
return if orig_date_str
|
306
|
+
return if orig_date_str =~ /\d\s*B.C./ # skip B.C. dates
|
308
307
|
date_obj = Date.parse(orig_date_str)
|
309
308
|
date_obj.year.to_s
|
310
309
|
rescue ArgumentError
|
@@ -1,29 +1,49 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require 'logger'
|
3
2
|
require 'mods'
|
4
3
|
|
5
4
|
module Stanford
|
6
5
|
module Mods
|
7
6
|
# NON-SearchWorks specific wranglings of MODS cartographics metadata
|
8
7
|
class Record < ::Mods::Record
|
8
|
+
GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
|
9
|
+
|
10
|
+
# @return [Array{String}] subject cartographic coordinates values
|
9
11
|
def coordinates
|
10
|
-
Array(
|
12
|
+
Array(mods_ng_xml.subject.cartographics.coordinates).map(&:text)
|
11
13
|
end
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
# @return [Array{String}] values suitable for solr SRPT fields, like "ENVELOPE(-16.0, 28.0, 13.0, -15.0)"
|
16
|
+
# @note example xml leaf nodes
|
17
|
+
# <gml:lowerCorner>-122.191292 37.4063388</gml:lowerCorner>
|
18
|
+
# <gml:upperCorner>-122.149475 37.4435369</gml:upperCorner>
|
19
|
+
def geo_extensions_as_envelope
|
20
|
+
mods_ng_xml.extension
|
21
|
+
.xpath('//rdf:RDF/rdf:Description/gml:boundedBy/gml:Envelope',
|
22
|
+
'gml' => GMLNS,
|
23
|
+
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
24
|
+
).map do |v|
|
25
|
+
uppers = v.xpath('gml:upperCorner', 'gml' => GMLNS).text.split
|
26
|
+
lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
|
27
|
+
"ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
|
28
|
+
end
|
29
|
+
rescue RuntimeError => e
|
30
|
+
logger.warn "failure parsing <extension> element: #{e.message}"
|
31
|
+
[]
|
32
|
+
end
|
16
33
|
|
17
|
-
|
18
|
-
|
34
|
+
# @return [Array{Stanford::Mods::Coordinate}] valid coordinates as objects
|
35
|
+
def coordinates_objects
|
36
|
+
coordinates.map { |n| Stanford::Mods::Coordinate.new(n) }.select(&:valid?)
|
19
37
|
end
|
20
38
|
|
21
|
-
|
22
|
-
|
23
|
-
|
39
|
+
# @return [Array{String}] values suitable for solr SRPT fields, like "ENVELOPE(-16.0, 28.0, 13.0, -15.0)"
|
40
|
+
def coordinates_as_envelope
|
41
|
+
coordinates_objects.map(&:as_envelope).compact
|
42
|
+
end
|
24
43
|
|
25
|
-
|
26
|
-
|
44
|
+
# @return [Array{String}] with 4-part space-delimted strings, like "-16.0 -15.0 28.0 13.0"
|
45
|
+
def coordinates_as_bbox
|
46
|
+
coordinates_objects.map(&:as_bbox).compact
|
27
47
|
end
|
28
48
|
|
29
49
|
alias point_bbox coordinates_as_bbox
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Stanford
|
3
|
+
module Mods
|
4
|
+
# Abstract geo methods usable to several classes
|
5
|
+
module GeoUtils
|
6
|
+
# @param [String] val Coordinates value
|
7
|
+
# @return [String] cleaned value (strips parens and period), or the original value
|
8
|
+
def cleaner_coordinate(val)
|
9
|
+
matches = val.match(/^\(?([^)]+)\)?\.?$/)
|
10
|
+
matches ? matches[1] : val
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [String] point coordinate point in degrees notation
|
14
|
+
# @return [Float] converted value in decimal notation
|
15
|
+
def coord_to_decimal(point)
|
16
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
17
|
+
match = regex.match(point)
|
18
|
+
return Float::INFINITY unless match
|
19
|
+
|
20
|
+
dec = match['deg'].to_i
|
21
|
+
dec += match['min'].to_f / 60
|
22
|
+
dec += match['sec'].to_f / 60 / 60
|
23
|
+
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
24
|
+
dec
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -354,7 +354,7 @@ module Stanford
|
|
354
354
|
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
|
355
355
|
if century_matches
|
356
356
|
require 'active_support/core_ext/integer/inflections'
|
357
|
-
new_century_str =
|
357
|
+
new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
|
358
358
|
my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
|
359
359
|
else
|
360
360
|
my_ng_date_element.content
|
data/lib/stanford-mods/name.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require 'logger'
|
3
2
|
require 'mods'
|
4
3
|
|
5
4
|
# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
|
6
5
|
module Stanford
|
7
6
|
module Mods
|
8
|
-
|
9
7
|
class Record < ::Mods::Record
|
10
|
-
|
11
8
|
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
12
9
|
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
13
10
|
# if no name without a role, then nil
|
@@ -16,10 +13,8 @@ module Stanford
|
|
16
13
|
def main_author_w_date
|
17
14
|
result = nil
|
18
15
|
first_wo_role = nil
|
19
|
-
|
20
|
-
if n.role.
|
21
|
-
first_wo_role ||= n
|
22
|
-
end
|
16
|
+
mods_ng_xml.plain_name.each { |n|
|
17
|
+
first_wo_role ||= n if n.role.empty?
|
23
18
|
n.role.each { |r|
|
24
19
|
if r.authority.include?('marcrelator') &&
|
25
20
|
(r.value.include?('Creator') || r.value.include?('Author'))
|
@@ -27,9 +22,7 @@ module Stanford
|
|
27
22
|
end
|
28
23
|
}
|
29
24
|
}
|
30
|
-
if !result && first_wo_role
|
31
|
-
result = first_wo_role.display_value_w_date
|
32
|
-
end
|
25
|
+
result = first_wo_role.display_value_w_date if !result && first_wo_role
|
33
26
|
result
|
34
27
|
end # main_author
|
35
28
|
|
@@ -38,7 +31,7 @@ module Stanford
|
|
38
31
|
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
39
32
|
def additional_authors_w_dates
|
40
33
|
results = []
|
41
|
-
|
34
|
+
mods_ng_xml.plain_name.each { |n|
|
42
35
|
results << n.display_value_w_date
|
43
36
|
}
|
44
37
|
results.delete(main_author_w_date)
|
@@ -50,7 +43,7 @@ module Stanford
|
|
50
43
|
# FIXME: this is broken if there are multiple role codes and some of them are not marcrelator
|
51
44
|
def non_collector_person_authors
|
52
45
|
result = []
|
53
|
-
|
46
|
+
mods_ng_xml.personal_name.map do |n|
|
54
47
|
next if n.role.size.zero?
|
55
48
|
n.role.each { |r|
|
56
49
|
result << n.display_value_w_date unless includes_marc_relator_collector_role?(r)
|
@@ -63,7 +56,7 @@ module Stanford
|
|
63
56
|
# a personal name with the role of Collector (see mods gem nom_terminology for display value algorithm)
|
64
57
|
def collectors_w_dates
|
65
58
|
result = []
|
66
|
-
|
59
|
+
mods_ng_xml.personal_name.each do |n|
|
67
60
|
next if n.role.size.zero?
|
68
61
|
n.role.each { |r|
|
69
62
|
result << n.display_value_w_date if includes_marc_relator_collector_role?(r)
|
@@ -72,7 +65,7 @@ module Stanford
|
|
72
65
|
result unless result.empty?
|
73
66
|
end
|
74
67
|
|
75
|
-
COLLECTOR_ROLE_URI = 'http://id.loc.gov/vocabulary/relators/col'
|
68
|
+
COLLECTOR_ROLE_URI = 'http://id.loc.gov/vocabulary/relators/col'.freeze
|
76
69
|
|
77
70
|
# @param Nokogiri::XML::Node role_node the role node from a parent name node
|
78
71
|
# @return true if there is a MARC relator collector role assigned
|
@@ -80,7 +73,6 @@ module Stanford
|
|
80
73
|
(role_node.authority.include?('marcrelator') && role_node.value.include?('Collector')) ||
|
81
74
|
role_node.roleTerm.valueURI.first == COLLECTOR_ROLE_URI
|
82
75
|
end
|
83
|
-
|
84
76
|
end # class Record
|
85
77
|
end # Module Mods
|
86
78
|
end # Module Stanford
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'logger'
|
2
1
|
require 'mods'
|
3
2
|
|
4
3
|
# Parsing MODS /originInfo for Publication/Imprint data:
|
@@ -11,7 +10,6 @@ require 'mods'
|
|
11
10
|
module Stanford
|
12
11
|
module Mods
|
13
12
|
class Record < ::Mods::Record
|
14
|
-
|
15
13
|
# return pub year as an Integer
|
16
14
|
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
17
15
|
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
@@ -56,26 +54,26 @@ module Stanford
|
|
56
54
|
# get_main_title_date
|
57
55
|
# https://github.com/sul-dlss/SearchWorks/blob/7d4d870a9d450fed8b081c38dc3dbd590f0b706e/app/helpers/results_document_helper.rb#L8-L46
|
58
56
|
|
59
|
-
#"publication_year_isi" => "Publication date", <-- do it already
|
60
|
-
#"beginning_year_isi" => "Beginning date",
|
61
|
-
#"earliest_year_isi" => "Earliest date",
|
62
|
-
#"earliest_poss_year_isi" => "Earliest possible date",
|
63
|
-
#"ending_year_isi" => "Ending date",
|
64
|
-
#"latest_year_isi" => "Latest date",
|
65
|
-
#"latest_poss_year_isi" => "Latest possible date",
|
66
|
-
#"production_year_isi" => "Production date",
|
67
|
-
#"original_year_isi" => "Original date",
|
68
|
-
#"copyright_year_isi" => "Copyright date"} %>
|
57
|
+
# "publication_year_isi" => "Publication date", <-- do it already
|
58
|
+
# "beginning_year_isi" => "Beginning date",
|
59
|
+
# "earliest_year_isi" => "Earliest date",
|
60
|
+
# "earliest_poss_year_isi" => "Earliest possible date",
|
61
|
+
# "ending_year_isi" => "Ending date",
|
62
|
+
# "latest_year_isi" => "Latest date",
|
63
|
+
# "latest_poss_year_isi" => "Latest possible date",
|
64
|
+
# "production_year_isi" => "Production date",
|
65
|
+
# "original_year_isi" => "Original date",
|
66
|
+
# "copyright_year_isi" => "Copyright date"} %>
|
69
67
|
|
70
|
-
#"creation_year_isi" => "Creation date", <-- do it already
|
71
|
-
#{}"release_year_isi" => "Release date",
|
72
|
-
#{}"reprint_year_isi" => "Reprint/reissue date",
|
73
|
-
#{}"other_year_isi" => "Date",
|
68
|
+
# "creation_year_isi" => "Creation date", <-- do it already
|
69
|
+
# {}"release_year_isi" => "Release date",
|
70
|
+
# {}"reprint_year_isi" => "Reprint/reissue date",
|
71
|
+
# {}"other_year_isi" => "Date",
|
74
72
|
end
|
75
73
|
|
76
74
|
# @return [String] single String containing imprint information for display
|
77
75
|
def imprint_display_str
|
78
|
-
imp = Stanford::Mods::Imprint.new(
|
76
|
+
imp = Stanford::Mods::Imprint.new(origin_info)
|
79
77
|
imp.display_str
|
80
78
|
end
|
81
79
|
|
@@ -116,8 +114,8 @@ module Stanford
|
|
116
114
|
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
117
115
|
# should be excluded; false approximate dates should be included
|
118
116
|
# @return [Array<Nokogiri::XML::Element>]
|
119
|
-
def date_created_elements(ignore_approximate=false)
|
120
|
-
date_created_nodeset =
|
117
|
+
def date_created_elements(ignore_approximate = false)
|
118
|
+
date_created_nodeset = mods_ng_xml.origin_info.dateCreated
|
121
119
|
return self.class.remove_approximate(date_created_nodeset) if ignore_approximate
|
122
120
|
date_created_nodeset.to_a
|
123
121
|
end
|
@@ -126,8 +124,8 @@ module Stanford
|
|
126
124
|
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
127
125
|
# should be excluded; false approximate dates should be included
|
128
126
|
# @return [Array<Nokogiri::XML::Element>]
|
129
|
-
def date_issued_elements(ignore_approximate=false)
|
130
|
-
date_issued_nodeset =
|
127
|
+
def date_issued_elements(ignore_approximate = false)
|
128
|
+
date_issued_nodeset = mods_ng_xml.origin_info.dateIssued
|
131
129
|
return self.class.remove_approximate(date_issued_nodeset) if ignore_approximate
|
132
130
|
date_issued_nodeset.to_a
|
133
131
|
end
|
@@ -188,7 +186,7 @@ module Stanford
|
|
188
186
|
result = send(method_sym, date_issued_elements(ignore_approximate))
|
189
187
|
result ||= send(method_sym, date_created_elements(ignore_approximate))
|
190
188
|
# dateCaptured for web archive seed records
|
191
|
-
result ||= send(method_sym,
|
189
|
+
result ||= send(method_sym, mods_ng_xml.origin_info.dateCaptured.to_a)
|
192
190
|
result
|
193
191
|
end
|
194
192
|
|
@@ -208,6 +206,7 @@ module Stanford
|
|
208
206
|
|
209
207
|
class << self
|
210
208
|
private
|
209
|
+
|
211
210
|
# get earliest parseable year from the passed date elements
|
212
211
|
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
213
212
|
# @param [Symbol] method_sym method name in DateParsing, as a symbol
|
@@ -226,11 +225,10 @@ module Stanford
|
|
226
225
|
end
|
227
226
|
end
|
228
227
|
|
229
|
-
|
230
228
|
# ---- old date parsing methods used downstream of gem; will be deprecated/replaced with new date parsing methods
|
231
229
|
|
232
230
|
def place
|
233
|
-
vals =
|
231
|
+
vals = term_values([:origin_info, :place, :placeTerm])
|
234
232
|
vals
|
235
233
|
end
|
236
234
|
|
@@ -355,14 +353,14 @@ module Stanford
|
|
355
353
|
def parse_dates_from_originInfo
|
356
354
|
@dates_marc_encoding = []
|
357
355
|
@dates_no_marc_encoding = []
|
358
|
-
|
356
|
+
origin_info.dateIssued.each { |di|
|
359
357
|
if di.encoding == "marc"
|
360
358
|
@dates_marc_encoding << di.text
|
361
359
|
else
|
362
360
|
@dates_no_marc_encoding << di.text
|
363
361
|
end
|
364
362
|
}
|
365
|
-
|
363
|
+
origin_info.dateCreated.each { |dc|
|
366
364
|
if dc.encoding == "marc"
|
367
365
|
@dates_marc_encoding << dc.text
|
368
366
|
else
|
@@ -393,7 +391,7 @@ module Stanford
|
|
393
391
|
# look for things like '1865-6 CE'
|
394
392
|
pos = f_date.index(Regexp.new(match + '...CE'))
|
395
393
|
pos = pos ? pos.to_i : 0
|
396
|
-
if f_date.include?(match+' CE')
|
394
|
+
if f_date.include?(match + ' CE') || pos > 0
|
397
395
|
@pub_year = match
|
398
396
|
return match
|
399
397
|
end
|
@@ -409,7 +407,7 @@ module Stanford
|
|
409
407
|
def get_three_digit_year(dates)
|
410
408
|
dates.each do |f_date|
|
411
409
|
matches = f_date.scan(/\d{3}/)
|
412
|
-
return matches.first
|
410
|
+
return matches.first unless matches.empty?
|
413
411
|
end
|
414
412
|
nil
|
415
413
|
end
|
@@ -420,7 +418,7 @@ module Stanford
|
|
420
418
|
def get_bc_year(dates)
|
421
419
|
dates.each do |f_date|
|
422
420
|
matches = f_date.scan(/\d{3} B.C./)
|
423
|
-
|
421
|
+
unless matches.empty?
|
424
422
|
bc_year = matches.first[0..2]
|
425
423
|
return (bc_year.to_i - 1000).to_s
|
426
424
|
end
|
@@ -434,9 +432,9 @@ module Stanford
|
|
434
432
|
def get_single_digit_century(dates)
|
435
433
|
dates.each do |f_date|
|
436
434
|
matches = f_date.scan(/\d{1}th/)
|
437
|
-
next if matches.
|
435
|
+
next if matches.empty?
|
438
436
|
if matches.length == 1
|
439
|
-
@pub_year = (
|
437
|
+
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
440
438
|
return @pub_year
|
441
439
|
else
|
442
440
|
# when there are multiple matches, check for ones with CE after them
|
@@ -445,7 +443,7 @@ module Stanford
|
|
445
443
|
pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
|
446
444
|
pos = pos ? pos.to_i : 0
|
447
445
|
if f_date.include?(match + ' CE') || pos > 0
|
448
|
-
@pub_year = (
|
446
|
+
@pub_year = (match[0, 1].to_i - 1).to_s + '--'
|
449
447
|
return @pub_year
|
450
448
|
end
|
451
449
|
end
|
@@ -460,9 +458,9 @@ module Stanford
|
|
460
458
|
def get_double_digit_century(dates)
|
461
459
|
dates.each do |f_date|
|
462
460
|
matches = f_date.scan(/\d{2}th/)
|
463
|
-
next if matches.
|
461
|
+
next if matches.empty?
|
464
462
|
if matches.length == 1
|
465
|
-
@pub_year=(
|
463
|
+
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
466
464
|
return @pub_year
|
467
465
|
else
|
468
466
|
# when there are multiple matches, check for ones with CE after them
|
@@ -470,8 +468,8 @@ module Stanford
|
|
470
468
|
pos = f_date.index(Regexp.new(match + '...CE'))
|
471
469
|
pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
|
472
470
|
pos = pos ? pos.to_i : 0
|
473
|
-
if f_date.include?(match+' CE')
|
474
|
-
@pub_year = (
|
471
|
+
if f_date.include?(match + ' CE') || pos > 0
|
472
|
+
@pub_year = (match[0, 2].to_i - 1).to_s + '--'
|
475
473
|
return @pub_year
|
476
474
|
end
|
477
475
|
end
|
@@ -497,4 +495,4 @@ module Stanford
|
|
497
495
|
end
|
498
496
|
end # class Record
|
499
497
|
end
|
500
|
-
end
|
498
|
+
end
|