stanford-mods 2.2.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop_todo.yml +358 -3
- data/.travis.yml +1 -4
- data/Gemfile +3 -4
- data/Rakefile +3 -3
- data/lib/marc_countries.rb +381 -381
- data/lib/stanford-mods.rb +1 -0
- data/lib/stanford-mods/coordinate.rb +14 -36
- data/lib/stanford-mods/date_parsing.rb +7 -8
- data/lib/stanford-mods/geo_spatial.rb +32 -12
- data/lib/stanford-mods/geo_utils.rb +28 -0
- data/lib/stanford-mods/imprint.rb +1 -1
- data/lib/stanford-mods/name.rb +7 -15
- data/lib/stanford-mods/origin_info.rb +35 -37
- data/lib/stanford-mods/physical_location.rb +6 -4
- data/lib/stanford-mods/searchworks.rb +149 -182
- data/lib/stanford-mods/searchworks_languages.rb +3 -3
- data/lib/stanford-mods/searchworks_subjects.rb +16 -18
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +17 -19
- data/spec/fixtures/searchworks_imprint_data.rb +1 -1
- data/spec/fixtures/searchworks_pub_date_data.rb +1 -1
- data/spec/fixtures/spotlight_pub_date_data.rb +1 -1
- data/spec/geo_spatial_spec.rb +51 -5
- data/spec/imprint_spec.rb +4 -6
- data/spec/lib/stanford-mods/coordinate_spec.rb +0 -2
- data/spec/name_spec.rb +0 -2
- data/spec/origin_info_spec.rb +26 -28
- data/spec/physical_location_spec.rb +0 -3
- data/spec/searchworks_basic_spec.rb +1 -2
- data/spec/searchworks_format_spec.rb +35 -39
- data/spec/searchworks_pub_dates_spec.rb +0 -5
- data/spec/searchworks_spec.rb +1 -7
- data/spec/searchworks_subject_raw_spec.rb +0 -5
- data/spec/searchworks_subject_spec.rb +2 -9
- data/spec/searchworks_title_spec.rb +0 -2
- data/spec/spec_helper.rb +2 -5
- data/spec/sw_publication_spec.rb +1 -2
- data/stanford-mods.gemspec +1 -2
- metadata +4 -4
data/lib/stanford-mods.rb
CHANGED
@@ -4,26 +4,28 @@ module Stanford
|
|
4
4
|
##
|
5
5
|
# Geospatial coordinate parsing
|
6
6
|
class Coordinate
|
7
|
+
require 'stanford-mods/geo_utils'
|
8
|
+
include ::Stanford::Mods::GeoUtils
|
9
|
+
|
7
10
|
attr_reader :value
|
8
11
|
|
9
12
|
def initialize(value)
|
10
13
|
@value = value
|
11
14
|
end
|
12
15
|
|
13
|
-
#
|
16
|
+
# @return [String] the coordinate in WKT/CQL ENVELOPE representation
|
14
17
|
def as_envelope
|
15
18
|
return unless valid?
|
16
|
-
|
17
19
|
"ENVELOPE(#{bounds[:min_x]}, #{bounds[:max_x]}, #{bounds[:max_y]}, #{bounds[:min_y]})"
|
18
20
|
end
|
19
21
|
|
20
|
-
#
|
22
|
+
# @return [String] the coordinate in Solr 4.x+ bbox-format representation
|
21
23
|
def as_bbox
|
22
24
|
return unless valid?
|
23
|
-
|
24
25
|
"#{bounds[:min_x]} #{bounds[:min_y]} #{bounds[:max_x]} #{bounds[:max_y]}"
|
25
26
|
end
|
26
27
|
|
28
|
+
# @return [Boolean] true iff the coordinates are geographically valid
|
27
29
|
def valid?
|
28
30
|
return false if bounds.empty?
|
29
31
|
|
@@ -40,42 +42,18 @@ module Stanford
|
|
40
42
|
|
41
43
|
def bounds
|
42
44
|
@bounds ||= begin
|
43
|
-
matches =
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
{ min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
|
50
|
-
else
|
51
|
-
{}
|
52
|
-
end
|
45
|
+
matches = cleaner_coordinate(value).match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
|
46
|
+
return {} unless matches
|
47
|
+
min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
|
48
|
+
min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
|
49
|
+
{ min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
|
53
50
|
end
|
54
51
|
end
|
55
52
|
|
53
|
+
# @deprecated see GeoUtils
|
56
54
|
def coord
|
57
|
-
|
58
|
-
|
59
|
-
if matches
|
60
|
-
matches[1]
|
61
|
-
else
|
62
|
-
value
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
-
def coord_to_decimal(point)
|
67
|
-
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
68
|
-
match = regex.match(point)
|
69
|
-
|
70
|
-
return Float::INFINITY unless match
|
71
|
-
|
72
|
-
dec = match['deg'].to_i
|
73
|
-
dec += match['min'].to_f / 60
|
74
|
-
dec += match['sec'].to_f / 60 / 60
|
75
|
-
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
76
|
-
|
77
|
-
dec
|
55
|
+
cleaner_coordinate(value)
|
78
56
|
end
|
79
57
|
end
|
80
58
|
end
|
81
|
-
end
|
59
|
+
end
|
@@ -6,7 +6,6 @@ module Stanford
|
|
6
6
|
# - we may want an integer or date sort field as well as lexical
|
7
7
|
# - we could add methods like my_date.bc?
|
8
8
|
class DateParsing
|
9
|
-
|
10
9
|
# get display value for year, generally an explicit year or "17th century" or "5 B.C." or "1950s" or '845 A.D.'
|
11
10
|
# @return [String, nil] display value for year if we could parse one, nil otherwise
|
12
11
|
def self.date_str_for_display(date_str)
|
@@ -196,7 +195,7 @@ module Stanford
|
|
196
195
|
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
197
196
|
def sortable_year_for_century
|
198
197
|
return unless orig_date_str
|
199
|
-
return if orig_date_str
|
198
|
+
return if orig_date_str =~ /B\.C\./
|
200
199
|
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
201
200
|
if century_matches
|
202
201
|
return $1 + '00' if $1.length == 2
|
@@ -215,7 +214,7 @@ module Stanford
|
|
215
214
|
# @return [String, nil] yy(th) Century if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
216
215
|
def display_str_for_century
|
217
216
|
return unless orig_date_str
|
218
|
-
return if orig_date_str
|
217
|
+
return if orig_date_str =~ /B\.C\./
|
219
218
|
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
220
219
|
return century_str_matches.to_s if century_str_matches
|
221
220
|
|
@@ -263,7 +262,7 @@ module Stanford
|
|
263
262
|
# @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
|
264
263
|
def sortable_year_str_for_early_numeric
|
265
264
|
return unless orig_date_str.match(EARLY_NUMERIC)
|
266
|
-
if orig_date_str
|
265
|
+
if orig_date_str =~ /^\-/
|
267
266
|
# negative number becomes x - 1000 for sorting; -005 for -995
|
268
267
|
num = orig_date_str[1..-1].to_i - 1000
|
269
268
|
return '-' + num.to_s[1..-1].rjust(3, '0')
|
@@ -276,7 +275,7 @@ module Stanford
|
|
276
275
|
# @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
|
277
276
|
def sortable_year_int_for_early_numeric
|
278
277
|
return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
|
279
|
-
orig_date_str.to_i if orig_date_str
|
278
|
+
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
280
279
|
end
|
281
280
|
|
282
281
|
# get display value for date String containing yyy, yy, y, -y, -yy, -yyy
|
@@ -290,7 +289,7 @@ module Stanford
|
|
290
289
|
# return 1 B.C. when the date is 0 since there is no 0 year
|
291
290
|
return '1 B.C.' if orig_date_str == '0'
|
292
291
|
# negative number becomes B.C.
|
293
|
-
return
|
292
|
+
return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str =~ /^\-/
|
294
293
|
# remove leading 0s from early dates
|
295
294
|
"#{orig_date_str.to_i} A.D."
|
296
295
|
end
|
@@ -301,10 +300,10 @@ module Stanford
|
|
301
300
|
# has made this method bogus.
|
302
301
|
# @return [String, nil] sortable 4 digit year (e.g. 1865, 0950) if orig_date_str is parseable via ruby Date, nil otherwise
|
303
302
|
def year_via_ruby_parsing
|
304
|
-
return unless orig_date_str
|
303
|
+
return unless orig_date_str =~ /\d\d/ # need at least 2 digits
|
305
304
|
# need more in string than only 2 digits
|
306
305
|
return if orig_date_str.match(/^\d\d$/) || orig_date_str.match(/^\D*\d\d\D*$/)
|
307
|
-
return if orig_date_str
|
306
|
+
return if orig_date_str =~ /\d\s*B.C./ # skip B.C. dates
|
308
307
|
date_obj = Date.parse(orig_date_str)
|
309
308
|
date_obj.year.to_s
|
310
309
|
rescue ArgumentError
|
@@ -1,29 +1,49 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require 'logger'
|
3
2
|
require 'mods'
|
4
3
|
|
5
4
|
module Stanford
|
6
5
|
module Mods
|
7
6
|
# NON-SearchWorks specific wranglings of MODS cartographics metadata
|
8
7
|
class Record < ::Mods::Record
|
8
|
+
GMLNS = 'http://www.opengis.net/gml/3.2/'.freeze
|
9
|
+
|
10
|
+
# @return [Array{String}] subject cartographic coordinates values
|
9
11
|
def coordinates
|
10
|
-
Array(
|
12
|
+
Array(mods_ng_xml.subject.cartographics.coordinates).map(&:text)
|
11
13
|
end
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
-
|
15
|
+
# @return [Array{String}] values suitable for solr SRPT fields, like "ENVELOPE(-16.0, 28.0, 13.0, -15.0)"
|
16
|
+
# @note example xml leaf nodes
|
17
|
+
# <gml:lowerCorner>-122.191292 37.4063388</gml:lowerCorner>
|
18
|
+
# <gml:upperCorner>-122.149475 37.4435369</gml:upperCorner>
|
19
|
+
def geo_extensions_as_envelope
|
20
|
+
mods_ng_xml.extension
|
21
|
+
.xpath('//rdf:RDF/rdf:Description/gml:boundedBy/gml:Envelope',
|
22
|
+
'gml' => GMLNS,
|
23
|
+
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
24
|
+
).map do |v|
|
25
|
+
uppers = v.xpath('gml:upperCorner', 'gml' => GMLNS).text.split
|
26
|
+
lowers = v.xpath('gml:lowerCorner', 'gml' => GMLNS).text.split
|
27
|
+
"ENVELOPE(#{lowers[0]}, #{uppers[0]}, #{uppers[1]}, #{lowers[1]})"
|
28
|
+
end
|
29
|
+
rescue RuntimeError => e
|
30
|
+
logger.warn "failure parsing <extension> element: #{e.message}"
|
31
|
+
[]
|
32
|
+
end
|
16
33
|
|
17
|
-
|
18
|
-
|
34
|
+
# @return [Array{Stanford::Mods::Coordinate}] valid coordinates as objects
|
35
|
+
def coordinates_objects
|
36
|
+
coordinates.map { |n| Stanford::Mods::Coordinate.new(n) }.select(&:valid?)
|
19
37
|
end
|
20
38
|
|
21
|
-
|
22
|
-
|
23
|
-
|
39
|
+
# @return [Array{String}] values suitable for solr SRPT fields, like "ENVELOPE(-16.0, 28.0, 13.0, -15.0)"
|
40
|
+
def coordinates_as_envelope
|
41
|
+
coordinates_objects.map(&:as_envelope).compact
|
42
|
+
end
|
24
43
|
|
25
|
-
|
26
|
-
|
44
|
+
# @return [Array{String}] with 4-part space-delimted strings, like "-16.0 -15.0 28.0 13.0"
|
45
|
+
def coordinates_as_bbox
|
46
|
+
coordinates_objects.map(&:as_bbox).compact
|
27
47
|
end
|
28
48
|
|
29
49
|
alias point_bbox coordinates_as_bbox
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
module Stanford
|
3
|
+
module Mods
|
4
|
+
# Abstract geo methods usable to several classes
|
5
|
+
module GeoUtils
|
6
|
+
# @param [String] val Coordinates value
|
7
|
+
# @return [String] cleaned value (strips parens and period), or the original value
|
8
|
+
def cleaner_coordinate(val)
|
9
|
+
matches = val.match(/^\(?([^)]+)\)?\.?$/)
|
10
|
+
matches ? matches[1] : val
|
11
|
+
end
|
12
|
+
|
13
|
+
# @param [String] point coordinate point in degrees notation
|
14
|
+
# @return [Float] converted value in decimal notation
|
15
|
+
def coord_to_decimal(point)
|
16
|
+
regex = /(?<dir>[NESW])\s*(?<deg>\d+)[°⁰º](?:(?<min>\d+)[ʹ'])?(?:(?<sec>\d+)[ʺ"])?/
|
17
|
+
match = regex.match(point)
|
18
|
+
return Float::INFINITY unless match
|
19
|
+
|
20
|
+
dec = match['deg'].to_i
|
21
|
+
dec += match['min'].to_f / 60
|
22
|
+
dec += match['sec'].to_f / 60 / 60
|
23
|
+
dec = -1 * dec if match['dir'] == 'W' || match['dir'] == 'S'
|
24
|
+
dec
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -354,7 +354,7 @@ module Stanford
|
|
354
354
|
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
|
355
355
|
if century_matches
|
356
356
|
require 'active_support/core_ext/integer/inflections'
|
357
|
-
new_century_str =
|
357
|
+
new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
|
358
358
|
my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
|
359
359
|
else
|
360
360
|
my_ng_date_element.content
|
data/lib/stanford-mods/name.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require 'logger'
|
3
2
|
require 'mods'
|
4
3
|
|
5
4
|
# NON-SearchWorks specific wranglings of MODS <name> metadata as a mixin to the Stanford::Mods::Record object
|
6
5
|
module Stanford
|
7
6
|
module Mods
|
8
|
-
|
9
7
|
class Record < ::Mods::Record
|
10
|
-
|
11
8
|
# the first encountered <mods><name> element with marcrelator flavor role of 'Creator' or 'Author'.
|
12
9
|
# if no marcrelator 'Creator' or 'Author', the first name without a role.
|
13
10
|
# if no name without a role, then nil
|
@@ -16,10 +13,8 @@ module Stanford
|
|
16
13
|
def main_author_w_date
|
17
14
|
result = nil
|
18
15
|
first_wo_role = nil
|
19
|
-
|
20
|
-
if n.role.
|
21
|
-
first_wo_role ||= n
|
22
|
-
end
|
16
|
+
mods_ng_xml.plain_name.each { |n|
|
17
|
+
first_wo_role ||= n if n.role.empty?
|
23
18
|
n.role.each { |r|
|
24
19
|
if r.authority.include?('marcrelator') &&
|
25
20
|
(r.value.include?('Creator') || r.value.include?('Author'))
|
@@ -27,9 +22,7 @@ module Stanford
|
|
27
22
|
end
|
28
23
|
}
|
29
24
|
}
|
30
|
-
if !result && first_wo_role
|
31
|
-
result = first_wo_role.display_value_w_date
|
32
|
-
end
|
25
|
+
result = first_wo_role.display_value_w_date if !result && first_wo_role
|
33
26
|
result
|
34
27
|
end # main_author
|
35
28
|
|
@@ -38,7 +31,7 @@ module Stanford
|
|
38
31
|
# see Mods::Record.name in nom_terminology for details on the display_value algorithm
|
39
32
|
def additional_authors_w_dates
|
40
33
|
results = []
|
41
|
-
|
34
|
+
mods_ng_xml.plain_name.each { |n|
|
42
35
|
results << n.display_value_w_date
|
43
36
|
}
|
44
37
|
results.delete(main_author_w_date)
|
@@ -50,7 +43,7 @@ module Stanford
|
|
50
43
|
# FIXME: this is broken if there are multiple role codes and some of them are not marcrelator
|
51
44
|
def non_collector_person_authors
|
52
45
|
result = []
|
53
|
-
|
46
|
+
mods_ng_xml.personal_name.map do |n|
|
54
47
|
next if n.role.size.zero?
|
55
48
|
n.role.each { |r|
|
56
49
|
result << n.display_value_w_date unless includes_marc_relator_collector_role?(r)
|
@@ -63,7 +56,7 @@ module Stanford
|
|
63
56
|
# a personal name with the role of Collector (see mods gem nom_terminology for display value algorithm)
|
64
57
|
def collectors_w_dates
|
65
58
|
result = []
|
66
|
-
|
59
|
+
mods_ng_xml.personal_name.each do |n|
|
67
60
|
next if n.role.size.zero?
|
68
61
|
n.role.each { |r|
|
69
62
|
result << n.display_value_w_date if includes_marc_relator_collector_role?(r)
|
@@ -72,7 +65,7 @@ module Stanford
|
|
72
65
|
result unless result.empty?
|
73
66
|
end
|
74
67
|
|
75
|
-
COLLECTOR_ROLE_URI = 'http://id.loc.gov/vocabulary/relators/col'
|
68
|
+
COLLECTOR_ROLE_URI = 'http://id.loc.gov/vocabulary/relators/col'.freeze
|
76
69
|
|
77
70
|
# @param Nokogiri::XML::Node role_node the role node from a parent name node
|
78
71
|
# @return true if there is a MARC relator collector role assigned
|
@@ -80,7 +73,6 @@ module Stanford
|
|
80
73
|
(role_node.authority.include?('marcrelator') && role_node.value.include?('Collector')) ||
|
81
74
|
role_node.roleTerm.valueURI.first == COLLECTOR_ROLE_URI
|
82
75
|
end
|
83
|
-
|
84
76
|
end # class Record
|
85
77
|
end # Module Mods
|
86
78
|
end # Module Stanford
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'logger'
|
2
1
|
require 'mods'
|
3
2
|
|
4
3
|
# Parsing MODS /originInfo for Publication/Imprint data:
|
@@ -11,7 +10,6 @@ require 'mods'
|
|
11
10
|
module Stanford
|
12
11
|
module Mods
|
13
12
|
class Record < ::Mods::Record
|
14
|
-
|
15
13
|
# return pub year as an Integer
|
16
14
|
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
17
15
|
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
@@ -56,26 +54,26 @@ module Stanford
|
|
56
54
|
# get_main_title_date
|
57
55
|
# https://github.com/sul-dlss/SearchWorks/blob/7d4d870a9d450fed8b081c38dc3dbd590f0b706e/app/helpers/results_document_helper.rb#L8-L46
|
58
56
|
|
59
|
-
#"publication_year_isi" => "Publication date", <-- do it already
|
60
|
-
#"beginning_year_isi" => "Beginning date",
|
61
|
-
#"earliest_year_isi" => "Earliest date",
|
62
|
-
#"earliest_poss_year_isi" => "Earliest possible date",
|
63
|
-
#"ending_year_isi" => "Ending date",
|
64
|
-
#"latest_year_isi" => "Latest date",
|
65
|
-
#"latest_poss_year_isi" => "Latest possible date",
|
66
|
-
#"production_year_isi" => "Production date",
|
67
|
-
#"original_year_isi" => "Original date",
|
68
|
-
#"copyright_year_isi" => "Copyright date"} %>
|
57
|
+
# "publication_year_isi" => "Publication date", <-- do it already
|
58
|
+
# "beginning_year_isi" => "Beginning date",
|
59
|
+
# "earliest_year_isi" => "Earliest date",
|
60
|
+
# "earliest_poss_year_isi" => "Earliest possible date",
|
61
|
+
# "ending_year_isi" => "Ending date",
|
62
|
+
# "latest_year_isi" => "Latest date",
|
63
|
+
# "latest_poss_year_isi" => "Latest possible date",
|
64
|
+
# "production_year_isi" => "Production date",
|
65
|
+
# "original_year_isi" => "Original date",
|
66
|
+
# "copyright_year_isi" => "Copyright date"} %>
|
69
67
|
|
70
|
-
#"creation_year_isi" => "Creation date", <-- do it already
|
71
|
-
#{}"release_year_isi" => "Release date",
|
72
|
-
#{}"reprint_year_isi" => "Reprint/reissue date",
|
73
|
-
#{}"other_year_isi" => "Date",
|
68
|
+
# "creation_year_isi" => "Creation date", <-- do it already
|
69
|
+
# {}"release_year_isi" => "Release date",
|
70
|
+
# {}"reprint_year_isi" => "Reprint/reissue date",
|
71
|
+
# {}"other_year_isi" => "Date",
|
74
72
|
end
|
75
73
|
|
76
74
|
# @return [String] single String containing imprint information for display
|
77
75
|
def imprint_display_str
|
78
|
-
imp = Stanford::Mods::Imprint.new(
|
76
|
+
imp = Stanford::Mods::Imprint.new(origin_info)
|
79
77
|
imp.display_str
|
80
78
|
end
|
81
79
|
|
@@ -116,8 +114,8 @@ module Stanford
|
|
116
114
|
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
117
115
|
# should be excluded; false approximate dates should be included
|
118
116
|
# @return [Array<Nokogiri::XML::Element>]
|
119
|
-
def date_created_elements(ignore_approximate=false)
|
120
|
-
date_created_nodeset =
|
117
|
+
def date_created_elements(ignore_approximate = false)
|
118
|
+
date_created_nodeset = mods_ng_xml.origin_info.dateCreated
|
121
119
|
return self.class.remove_approximate(date_created_nodeset) if ignore_approximate
|
122
120
|
date_created_nodeset.to_a
|
123
121
|
end
|
@@ -126,8 +124,8 @@ module Stanford
|
|
126
124
|
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
127
125
|
# should be excluded; false approximate dates should be included
|
128
126
|
# @return [Array<Nokogiri::XML::Element>]
|
129
|
-
def date_issued_elements(ignore_approximate=false)
|
130
|
-
date_issued_nodeset =
|
127
|
+
def date_issued_elements(ignore_approximate = false)
|
128
|
+
date_issued_nodeset = mods_ng_xml.origin_info.dateIssued
|
131
129
|
return self.class.remove_approximate(date_issued_nodeset) if ignore_approximate
|
132
130
|
date_issued_nodeset.to_a
|
133
131
|
end
|
@@ -188,7 +186,7 @@ module Stanford
|
|
188
186
|
result = send(method_sym, date_issued_elements(ignore_approximate))
|
189
187
|
result ||= send(method_sym, date_created_elements(ignore_approximate))
|
190
188
|
# dateCaptured for web archive seed records
|
191
|
-
result ||= send(method_sym,
|
189
|
+
result ||= send(method_sym, mods_ng_xml.origin_info.dateCaptured.to_a)
|
192
190
|
result
|
193
191
|
end
|
194
192
|
|
@@ -208,6 +206,7 @@ module Stanford
|
|
208
206
|
|
209
207
|
class << self
|
210
208
|
private
|
209
|
+
|
211
210
|
# get earliest parseable year from the passed date elements
|
212
211
|
# @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
|
213
212
|
# @param [Symbol] method_sym method name in DateParsing, as a symbol
|
@@ -226,11 +225,10 @@ module Stanford
|
|
226
225
|
end
|
227
226
|
end
|
228
227
|
|
229
|
-
|
230
228
|
# ---- old date parsing methods used downstream of gem; will be deprecated/replaced with new date parsing methods
|
231
229
|
|
232
230
|
def place
|
233
|
-
vals =
|
231
|
+
vals = term_values([:origin_info, :place, :placeTerm])
|
234
232
|
vals
|
235
233
|
end
|
236
234
|
|
@@ -355,14 +353,14 @@ module Stanford
|
|
355
353
|
def parse_dates_from_originInfo
|
356
354
|
@dates_marc_encoding = []
|
357
355
|
@dates_no_marc_encoding = []
|
358
|
-
|
356
|
+
origin_info.dateIssued.each { |di|
|
359
357
|
if di.encoding == "marc"
|
360
358
|
@dates_marc_encoding << di.text
|
361
359
|
else
|
362
360
|
@dates_no_marc_encoding << di.text
|
363
361
|
end
|
364
362
|
}
|
365
|
-
|
363
|
+
origin_info.dateCreated.each { |dc|
|
366
364
|
if dc.encoding == "marc"
|
367
365
|
@dates_marc_encoding << dc.text
|
368
366
|
else
|
@@ -393,7 +391,7 @@ module Stanford
|
|
393
391
|
# look for things like '1865-6 CE'
|
394
392
|
pos = f_date.index(Regexp.new(match + '...CE'))
|
395
393
|
pos = pos ? pos.to_i : 0
|
396
|
-
if f_date.include?(match+' CE')
|
394
|
+
if f_date.include?(match + ' CE') || pos > 0
|
397
395
|
@pub_year = match
|
398
396
|
return match
|
399
397
|
end
|
@@ -409,7 +407,7 @@ module Stanford
|
|
409
407
|
def get_three_digit_year(dates)
|
410
408
|
dates.each do |f_date|
|
411
409
|
matches = f_date.scan(/\d{3}/)
|
412
|
-
return matches.first
|
410
|
+
return matches.first unless matches.empty?
|
413
411
|
end
|
414
412
|
nil
|
415
413
|
end
|
@@ -420,7 +418,7 @@ module Stanford
|
|
420
418
|
def get_bc_year(dates)
|
421
419
|
dates.each do |f_date|
|
422
420
|
matches = f_date.scan(/\d{3} B.C./)
|
423
|
-
|
421
|
+
unless matches.empty?
|
424
422
|
bc_year = matches.first[0..2]
|
425
423
|
return (bc_year.to_i - 1000).to_s
|
426
424
|
end
|
@@ -434,9 +432,9 @@ module Stanford
|
|
434
432
|
def get_single_digit_century(dates)
|
435
433
|
dates.each do |f_date|
|
436
434
|
matches = f_date.scan(/\d{1}th/)
|
437
|
-
next if matches.
|
435
|
+
next if matches.empty?
|
438
436
|
if matches.length == 1
|
439
|
-
@pub_year = (
|
437
|
+
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
440
438
|
return @pub_year
|
441
439
|
else
|
442
440
|
# when there are multiple matches, check for ones with CE after them
|
@@ -445,7 +443,7 @@ module Stanford
|
|
445
443
|
pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
|
446
444
|
pos = pos ? pos.to_i : 0
|
447
445
|
if f_date.include?(match + ' CE') || pos > 0
|
448
|
-
@pub_year = (
|
446
|
+
@pub_year = (match[0, 1].to_i - 1).to_s + '--'
|
449
447
|
return @pub_year
|
450
448
|
end
|
451
449
|
end
|
@@ -460,9 +458,9 @@ module Stanford
|
|
460
458
|
def get_double_digit_century(dates)
|
461
459
|
dates.each do |f_date|
|
462
460
|
matches = f_date.scan(/\d{2}th/)
|
463
|
-
next if matches.
|
461
|
+
next if matches.empty?
|
464
462
|
if matches.length == 1
|
465
|
-
@pub_year=(
|
463
|
+
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
466
464
|
return @pub_year
|
467
465
|
else
|
468
466
|
# when there are multiple matches, check for ones with CE after them
|
@@ -470,8 +468,8 @@ module Stanford
|
|
470
468
|
pos = f_date.index(Regexp.new(match + '...CE'))
|
471
469
|
pos = pos ? pos.to_i : f_date.index(Regexp.new(match + ' century CE'))
|
472
470
|
pos = pos ? pos.to_i : 0
|
473
|
-
if f_date.include?(match+' CE')
|
474
|
-
@pub_year = (
|
471
|
+
if f_date.include?(match + ' CE') || pos > 0
|
472
|
+
@pub_year = (match[0, 2].to_i - 1).to_s + '--'
|
475
473
|
return @pub_year
|
476
474
|
end
|
477
475
|
end
|
@@ -497,4 +495,4 @@ module Stanford
|
|
497
495
|
end
|
498
496
|
end # class Record
|
499
497
|
end
|
500
|
-
end
|
498
|
+
end
|