stanford-mods 2.6.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +24 -0
- data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
- data/lib/stanford-mods/concerns/name.rb +57 -0
- data/lib/stanford-mods/concerns/origin_info.rb +113 -0
- data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
- data/lib/stanford-mods/concerns/searchworks.rb +125 -0
- data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
- data/lib/stanford-mods/concerns/title.rb +87 -0
- data/lib/stanford-mods/coordinate.rb +21 -3
- data/lib/stanford-mods/date_parsing.rb +32 -288
- data/lib/stanford-mods/imprint.rb +149 -325
- data/lib/stanford-mods/record.rb +20 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +2 -0
- data/lib/stanford-mods.rb +13 -11
- data/spec/fixtures/searchworks_imprint_data.rb +38 -39
- data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
- data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
- data/spec/geo_spatial_spec.rb +1 -6
- data/spec/imprint_spec.rb +238 -207
- data/spec/name_spec.rb +28 -232
- data/spec/origin_info_spec.rb +34 -300
- data/spec/searchworks_basic_spec.rb +1 -3
- data/spec/searchworks_pub_dates_spec.rb +0 -215
- data/spec/searchworks_spec.rb +0 -21
- data/spec/searchworks_subject_raw_spec.rb +106 -105
- data/spec/searchworks_subject_spec.rb +19 -55
- data/spec/searchworks_title_spec.rb +5 -5
- data/stanford-mods.gemspec +1 -1
- metadata +24 -20
- data/.travis.yml +0 -17
- data/lib/marc_countries.rb +0 -387
- data/lib/stanford-mods/geo_utils.rb +0 -28
- data/lib/stanford-mods/name.rb +0 -80
- data/lib/stanford-mods/origin_info.rb +0 -489
- data/lib/stanford-mods/searchworks.rb +0 -333
- data/lib/stanford-mods/searchworks_subjects.rb +0 -196
- data/spec/date_parsing_spec.rb +0 -905
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'mods/marc_country_codes'
|
2
|
+
|
1
3
|
module Stanford
|
2
4
|
module Mods
|
3
5
|
##
|
@@ -8,77 +10,33 @@ module Stanford
|
|
8
10
|
# however, the date_parsing class only does years, and this does finer tuned dates and also
|
9
11
|
# reformats them according to the encoding.
|
10
12
|
class Imprint
|
11
|
-
|
12
|
-
def initialize(originInfo_ng_nodeset)
|
13
|
-
@originInfo_ng_nodeset = originInfo_ng_nodeset
|
14
|
-
end
|
13
|
+
attr_reader :element
|
15
14
|
|
16
|
-
|
15
|
+
# @param [Nokogiri::XML::Node] an originInfo node
|
16
|
+
def initialize(element)
|
17
|
+
@element = element
|
18
|
+
end
|
17
19
|
|
18
|
-
# @return Array<String> each String is an imprint statement from a single originInfo element
|
19
20
|
def imprint_statements
|
20
|
-
|
21
|
-
@originInfo_ng_nodeset.each do |origin_info_node|
|
22
|
-
edition = edition_vals_str(origin_info_node)
|
23
|
-
place = place_vals_str(origin_info_node)
|
24
|
-
publisher = publisher_vals_str(origin_info_node)
|
25
|
-
dates = date_str(origin_info_node)
|
26
|
-
|
27
|
-
place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
|
28
|
-
edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
|
29
|
-
ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
|
30
|
-
|
31
|
-
results << ed_place_pub_dates unless ed_place_pub_dates.empty?
|
32
|
-
end
|
33
|
-
results
|
21
|
+
display_str
|
34
22
|
end
|
35
23
|
|
24
|
+
# @return <String> an imprint statement from a single originInfo element
|
36
25
|
def display_str
|
37
|
-
|
38
|
-
|
26
|
+
edition = edition_vals_str
|
27
|
+
place = place_vals_str
|
28
|
+
publisher = publisher_vals_str
|
29
|
+
dates = date_str
|
39
30
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
else
|
46
|
-
date_field_keys.map do |date_field|
|
47
|
-
next unless origin_info_node.respond_to?(date_field)
|
48
|
-
|
49
|
-
date_elements = origin_info_node.send(date_field)
|
50
|
-
date_elements.map(&:as_object).map(&:first) if date_elements.any?
|
51
|
-
end.compact.first
|
52
|
-
end
|
53
|
-
|
54
|
-
if date_elements.nil? || date_elements.none?
|
55
|
-
[]
|
56
|
-
elsif date_elements.find(&:start?) &&
|
57
|
-
date_elements.find(&:start?).as_range &&
|
58
|
-
date_elements.find(&:end?) &&
|
59
|
-
date_elements.find(&:end?).as_range
|
60
|
-
start_date = date_elements.find(&:start?)
|
61
|
-
end_date = date_elements.find(&:end?)
|
62
|
-
|
63
|
-
(start_date.as_range.min.year..end_date.as_range.max.year).to_a
|
64
|
-
elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
|
65
|
-
start_date = date_elements.find(&:start?)
|
66
|
-
|
67
|
-
(start_date.as_range.min.year..Time.now.year).to_a
|
68
|
-
elsif date_elements.one?
|
69
|
-
date_elements.first.to_a.map(&:year)
|
70
|
-
else
|
71
|
-
date_elements.map { |v| v.to_a.map(&:year) }.flatten
|
72
|
-
end
|
73
|
-
end.flatten
|
31
|
+
place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
|
32
|
+
edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
|
33
|
+
ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
|
34
|
+
|
35
|
+
ed_place_pub_dates
|
74
36
|
end
|
75
37
|
|
76
38
|
private
|
77
39
|
|
78
|
-
def extract_year(el)
|
79
|
-
DateParsing.year_int_from_date_str(el.text)
|
80
|
-
end
|
81
|
-
|
82
40
|
def compact_and_join_with_delimiter(values, delimiter)
|
83
41
|
compact_values = values.compact.reject { |v| v.strip.empty? }
|
84
42
|
return compact_values.join(delimiter) if compact_values.length == 1 ||
|
@@ -98,16 +56,16 @@ module Stanford
|
|
98
56
|
value.strip.end_with?('.', ',', ':', ';')
|
99
57
|
end
|
100
58
|
|
101
|
-
def edition_vals_str
|
102
|
-
|
59
|
+
def edition_vals_str
|
60
|
+
element.edition.reject do |e|
|
103
61
|
e.text.strip.empty?
|
104
62
|
end.map(&:text).join(' ').strip
|
105
63
|
end
|
106
64
|
|
107
|
-
def publisher_vals_str
|
108
|
-
return if
|
65
|
+
def publisher_vals_str
|
66
|
+
return if element.publisher.text.strip.empty?
|
109
67
|
|
110
|
-
publishers =
|
68
|
+
publishers = element.publisher.reject do |p|
|
111
69
|
p.text.strip.empty?
|
112
70
|
end.map(&:text)
|
113
71
|
compact_and_join_with_delimiter(publishers, ' : ')
|
@@ -115,10 +73,10 @@ module Stanford
|
|
115
73
|
|
116
74
|
# PLACE processing methods ------
|
117
75
|
|
118
|
-
def place_vals_str
|
119
|
-
return if
|
76
|
+
def place_vals_str
|
77
|
+
return if element.place.text.strip.empty?
|
120
78
|
|
121
|
-
places = place_terms
|
79
|
+
places = place_terms.reject do |p|
|
122
80
|
p.text.strip.empty?
|
123
81
|
end.map(&:text)
|
124
82
|
compact_and_join_with_delimiter(places, ' : ')
|
@@ -131,25 +89,26 @@ module Stanford
|
|
131
89
|
end
|
132
90
|
end
|
133
91
|
|
134
|
-
def place_terms
|
135
|
-
return [] unless
|
136
|
-
|
92
|
+
def place_terms
|
93
|
+
return [] unless element.respond_to?(:place) &&
|
94
|
+
element.place.respond_to?(:placeTerm)
|
137
95
|
|
138
|
-
if unencoded_place_terms?(
|
139
|
-
|
96
|
+
if unencoded_place_terms?(element)
|
97
|
+
element.place.placeTerm.select do |term|
|
140
98
|
!term.attributes['type'].respond_to?(:value) ||
|
141
99
|
term.attributes['type'].value == 'text'
|
142
100
|
end.compact
|
143
101
|
else
|
144
|
-
|
102
|
+
element.place.placeTerm.map do |term|
|
145
103
|
next unless term.attributes['type'].respond_to?(:value) &&
|
146
104
|
term.attributes['type'].value == 'code' &&
|
147
105
|
term.attributes['authority'].respond_to?(:value) &&
|
148
106
|
term.attributes['authority'].value == 'marccountry' &&
|
149
|
-
|
107
|
+
!['xx', 'vp'].include?(term.text.strip) &&
|
108
|
+
MARC_COUNTRY.include?(term.text.strip)
|
150
109
|
|
151
110
|
term = term.clone
|
152
|
-
term.content =
|
111
|
+
term.content = MARC_COUNTRY[term.text.strip]
|
153
112
|
term
|
154
113
|
end.compact
|
155
114
|
end
|
@@ -157,308 +116,173 @@ module Stanford
|
|
157
116
|
|
158
117
|
# DATE processing methods ------
|
159
118
|
|
160
|
-
def date_str
|
161
|
-
date_vals = origin_info_date_vals
|
119
|
+
def date_str
|
120
|
+
date_vals = origin_info_date_vals
|
162
121
|
return if date_vals.empty?
|
163
|
-
|
164
122
|
date_vals.map(&:strip).join(' ')
|
165
123
|
end
|
166
124
|
|
167
|
-
def origin_info_date_vals
|
125
|
+
def origin_info_date_vals
|
168
126
|
date_field_keys.map do |date_field|
|
169
|
-
next unless
|
127
|
+
next unless element.respond_to?(date_field)
|
170
128
|
|
171
|
-
date_elements =
|
172
|
-
|
129
|
+
date_elements = element.send(date_field)
|
130
|
+
parse_dates(date_elements) if date_elements.present?
|
173
131
|
end.compact.flatten
|
174
132
|
end
|
175
133
|
|
176
|
-
def date_elements_display_vals(ng_date_elements)
|
177
|
-
apply_date_qualifier_decoration(
|
178
|
-
dedup_dates(
|
179
|
-
join_date_ranges(
|
180
|
-
process_decade_century_dates(
|
181
|
-
process_bc_ad_dates(
|
182
|
-
process_encoded_dates(ignore_bad_dates(ng_date_elements))
|
183
|
-
)
|
184
|
-
)
|
185
|
-
)
|
186
|
-
)
|
187
|
-
)
|
188
|
-
end
|
189
|
-
|
190
134
|
def date_field_keys
|
191
135
|
[:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
|
192
136
|
end
|
193
137
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
val != '9999' && val != '0000-00-00' && val != 'uuuu'
|
198
|
-
end
|
199
|
-
end
|
138
|
+
class DateValue
|
139
|
+
attr_reader :value
|
140
|
+
delegate :text, :date, :point, :qualifier, :encoding, to: :value
|
200
141
|
|
201
|
-
|
202
|
-
|
203
|
-
if date_is_w3cdtf?(ng_date_element)
|
204
|
-
process_w3cdtf_date(ng_date_element)
|
205
|
-
elsif date_is_iso8601?(ng_date_element)
|
206
|
-
process_iso8601_date(ng_date_element)
|
207
|
-
else
|
208
|
-
ng_date_element
|
209
|
-
end
|
142
|
+
def initialize(value)
|
143
|
+
@value = value
|
210
144
|
end
|
211
|
-
end
|
212
145
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
217
|
-
def process_bc_ad_dates(ng_date_elements)
|
218
|
-
ng_date_elements.map do |ng_date_element|
|
219
|
-
case
|
220
|
-
when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
|
221
|
-
ng_date_element.content = "1 B.C."
|
222
|
-
when date_is_bc_edtf?(ng_date_element)
|
223
|
-
year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
|
224
|
-
ng_date_element.content = "#{year} B.C."
|
225
|
-
when date_is_ad?(ng_date_element)
|
226
|
-
ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
|
227
|
-
end
|
228
|
-
ng_date_element
|
146
|
+
# True if the element text isn't blank or the placeholder "9999".
|
147
|
+
def valid?
|
148
|
+
text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
|
229
149
|
end
|
230
|
-
end
|
231
150
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
process_century_date(ng_date_element)
|
238
|
-
else
|
239
|
-
ng_date_element
|
151
|
+
# Element text reduced to digits and hyphen. Captures date ranges and
|
152
|
+
# negative (B.C.) dates. Used for comparison/deduping.
|
153
|
+
def base_value
|
154
|
+
if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
|
155
|
+
return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
|
240
156
|
end
|
157
|
+
|
158
|
+
text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
|
241
159
|
end
|
242
|
-
end
|
243
160
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
if normalize_date(date.text) == normalize_date(start_date.text)
|
251
|
-
date.content = [start_date.text, end_date.text].join(' - ')
|
252
|
-
date
|
253
|
-
elsif normalize_date(date.text) != normalize_date(end_date.text)
|
254
|
-
date
|
255
|
-
end
|
256
|
-
end.compact
|
257
|
-
elsif dates_are_open_range?(ng_date_elements)
|
258
|
-
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
259
|
-
ng_date_elements.map do |date|
|
260
|
-
date = date.clone # clone the date object so we don't append the same one
|
261
|
-
date.content = "#{start_date.text}-" if date.text == start_date.text
|
262
|
-
date
|
161
|
+
# Decoded version of the date, if it was encoded. Strips leading zeroes.
|
162
|
+
def decoded_value
|
163
|
+
return text.strip unless date
|
164
|
+
|
165
|
+
unless encoding.present?
|
166
|
+
return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
|
263
167
|
end
|
264
|
-
else
|
265
|
-
ng_date_elements
|
266
|
-
end
|
267
|
-
end
|
268
168
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
169
|
+
# Delegate to the appropriate decoding method, if any
|
170
|
+
case value.precision
|
171
|
+
when :day
|
172
|
+
date.strftime('%B %e, %Y')
|
173
|
+
when :month
|
174
|
+
date.strftime('%B %Y')
|
175
|
+
when :year
|
176
|
+
year = date.year
|
177
|
+
if year < 1
|
178
|
+
"#{year.abs + 1} B.C."
|
179
|
+
# Any dates before the year 1000 are explicitly marked A.D.
|
180
|
+
elsif year > 1 && year < 1000
|
181
|
+
"#{year} A.D."
|
182
|
+
else
|
183
|
+
year.to_s
|
184
|
+
end
|
185
|
+
when :century
|
186
|
+
return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
|
187
|
+
when :decade
|
188
|
+
return "#{date.year}s"
|
276
189
|
else
|
277
|
-
|
190
|
+
text.strip
|
278
191
|
end
|
279
|
-
else
|
280
|
-
ng_date_elements
|
281
192
|
end
|
282
|
-
end
|
283
193
|
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
date.content = "[ca. #{date.text}]"
|
289
|
-
elsif date_is_questionable?(date)
|
290
|
-
date.content = "[#{date.text}?]"
|
291
|
-
elsif date_is_inferred?(date)
|
292
|
-
date.content = "[#{date.text}]"
|
293
|
-
end
|
294
|
-
date
|
295
|
-
end
|
296
|
-
return_fields.map(&:text)
|
297
|
-
end
|
194
|
+
# Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
|
195
|
+
# https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
|
196
|
+
def qualified_value
|
197
|
+
date = decoded_value
|
298
198
|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
ng_date_element.attributes['qualifier'].value == 'approximate'
|
303
|
-
end
|
304
|
-
|
305
|
-
def date_is_questionable?(ng_date_element)
|
306
|
-
ng_date_element.attributes['qualifier'] &&
|
307
|
-
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
308
|
-
ng_date_element.attributes['qualifier'].value == 'questionable'
|
309
|
-
end
|
310
|
-
|
311
|
-
def date_is_inferred?(ng_date_element)
|
312
|
-
ng_date_element.attributes['qualifier'] &&
|
313
|
-
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
314
|
-
ng_date_element.attributes['qualifier'].value == 'inferred'
|
315
|
-
end
|
199
|
+
return "[ca. #{date}]" if qualifier == 'approximate'
|
200
|
+
return "[#{date}?]" if qualifier == 'questionable'
|
201
|
+
return "[#{date}]" if qualifier == 'inferred'
|
316
202
|
|
317
|
-
|
318
|
-
ng_date_elements.any? do |element|
|
319
|
-
element.attributes['point'] &&
|
320
|
-
element.attributes['point'].respond_to?(:value) &&
|
321
|
-
element.attributes['point'].value == 'start'
|
322
|
-
end && !ng_date_elements.any? do |element|
|
323
|
-
element.attributes['point'] &&
|
324
|
-
element.attributes['point'].respond_to?(:value) &&
|
325
|
-
element.attributes['point'].value == 'end'
|
203
|
+
date
|
326
204
|
end
|
327
205
|
end
|
328
206
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
end
|
207
|
+
class DateRange
|
208
|
+
def initialize(start: nil, stop: nil)
|
209
|
+
@start = start
|
210
|
+
@stop = stop
|
334
211
|
end
|
335
|
-
attributes.include?('start') &&
|
336
|
-
attributes.include?('end')
|
337
|
-
end
|
338
212
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
|
343
|
-
Date.parse(ng_date_element.text).strftime(full_date_format)
|
344
|
-
elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
|
345
|
-
Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
|
346
|
-
else
|
347
|
-
ng_date_element.content
|
348
|
-
end
|
349
|
-
rescue
|
350
|
-
ng_date_element.content
|
213
|
+
# Base value as hyphen-joined string. Used for comparison/deduping.
|
214
|
+
def base_value
|
215
|
+
"#{@start&.base_value}-#{@stop&.base_value}"
|
351
216
|
end
|
352
|
-
ng_date_element
|
353
|
-
end
|
354
217
|
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
if ng_date_element.text.strip =~ /^\d{8,}$/
|
359
|
-
Date.parse(ng_date_element.text).strftime(full_date_format)
|
360
|
-
else
|
361
|
-
ng_date_element.content
|
362
|
-
end
|
363
|
-
rescue
|
364
|
-
ng_date_element.content
|
218
|
+
# Base values as array. Used for comparison/deduping of individual dates.
|
219
|
+
def base_values
|
220
|
+
[@start&.base_value, @stop&.base_value].compact
|
365
221
|
end
|
366
|
-
ng_date_element
|
367
|
-
end
|
368
222
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
def process_decade_date(ng_date_element)
|
373
|
-
my_ng_date_element = ng_date_element.clone
|
374
|
-
my_ng_date_element.content = begin
|
375
|
-
orig_date_str = ng_date_element.text.strip
|
376
|
-
# note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
|
377
|
-
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
378
|
-
if decade_matches
|
379
|
-
decade_str = decade_matches[2]
|
380
|
-
changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
|
381
|
-
zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
|
382
|
-
new_decade_str = "#{zeroth_year}s" if zeroth_year
|
383
|
-
my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
|
384
|
-
else
|
385
|
-
my_ng_date_element.content
|
386
|
-
end
|
387
|
-
rescue
|
388
|
-
my_ng_date_element.content
|
223
|
+
# The encoding value for the start of the range, or stop if not present.
|
224
|
+
def encoding
|
225
|
+
@start&.encoding || @stop&.encoding
|
389
226
|
end
|
390
|
-
my_ng_date_element
|
391
|
-
end
|
392
227
|
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
|
405
|
-
my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
|
228
|
+
# Decoded dates with "B.C." or "A.D." and qualifier markers applied to
|
229
|
+
# the entire range, or individually if dates differ.
|
230
|
+
def qualified_value
|
231
|
+
if @start&.qualifier == @stop&.qualifier
|
232
|
+
qualifier = @start&.qualifier || @stop&.qualifier
|
233
|
+
date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
|
234
|
+
return "[ca. #{date}]" if qualifier == 'approximate'
|
235
|
+
return "[#{date}?]" if qualifier == 'questionable'
|
236
|
+
return "[#{date}]" if qualifier == 'inferred'
|
237
|
+
|
238
|
+
date
|
406
239
|
else
|
407
|
-
|
240
|
+
"#{@start&.qualified_value} - #{@stop&.qualified_value}"
|
408
241
|
end
|
409
|
-
rescue
|
410
|
-
my_ng_date_element.content
|
411
242
|
end
|
412
|
-
my_ng_date_element
|
413
|
-
end
|
414
|
-
|
415
|
-
def field_is_encoded?(ng_element, encoding)
|
416
|
-
ng_element.attributes['encoding'] &&
|
417
|
-
ng_element.attributes['encoding'].respond_to?(:value) &&
|
418
|
-
ng_element.attributes['encoding'].value.downcase == encoding
|
419
|
-
end
|
420
|
-
|
421
|
-
def date_is_bc_edtf?(ng_date_element)
|
422
|
-
ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
|
423
243
|
end
|
424
244
|
|
425
|
-
def
|
426
|
-
|
427
|
-
|
428
|
-
|
245
|
+
def parse_dates(elements)
|
246
|
+
# convert to DateValue objects and keep only valid ones
|
247
|
+
dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
|
248
|
+
# join any date ranges into DateRange objects
|
249
|
+
point, nonpoint = dates.partition(&:point)
|
250
|
+
if point.any?
|
251
|
+
range = DateRange.new(start: point.find { |date| date.point == 'start' },
|
252
|
+
stop: point.find { |date| date.point == 'end' })
|
253
|
+
nonpoint.unshift(range)
|
254
|
+
end
|
255
|
+
dates = nonpoint
|
429
256
|
|
430
|
-
|
431
|
-
|
432
|
-
|
257
|
+
# ensure dates are unique with respect to their base values
|
258
|
+
dates = dates.group_by(&:base_value).map do |_value, group|
|
259
|
+
next group.first if group.one?
|
433
260
|
|
434
|
-
|
435
|
-
|
436
|
-
|
261
|
+
# if one of the duplicates wasn't encoded, use that one. see:
|
262
|
+
# https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
|
263
|
+
if group.reject(&:encoding).any?
|
264
|
+
group.reject(&:encoding).first
|
437
265
|
|
438
|
-
|
439
|
-
|
440
|
-
|
266
|
+
# otherwise just randomly pick the first in the group
|
267
|
+
else
|
268
|
+
group.last
|
269
|
+
end
|
270
|
+
end
|
441
271
|
|
442
|
-
|
443
|
-
|
444
|
-
ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
|
445
|
-
end
|
272
|
+
# compare the remaining dates against one part of the other of a range
|
273
|
+
date_ranges = dates.select { |date| date.is_a?(DateRange) }
|
446
274
|
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
end
|
275
|
+
# remove any range that duplicates an unencoded date that includes that range
|
276
|
+
duplicated_ranges = dates.flat_map do |date|
|
277
|
+
next if date.is_a?(DateRange) || date.encoding.present?
|
451
278
|
|
452
|
-
|
453
|
-
|
454
|
-
end
|
279
|
+
date_ranges.select { |r| r.base_values.include?(date.base_value) }
|
280
|
+
end
|
455
281
|
|
456
|
-
|
457
|
-
@short_date_format ||= short_date_format
|
458
|
-
end
|
282
|
+
dates = dates - duplicated_ranges
|
459
283
|
|
460
|
-
|
461
|
-
|
284
|
+
# output formatted dates with qualifiers, A.D./B.C., etc.
|
285
|
+
dates.map(&:qualified_value)
|
462
286
|
end
|
463
287
|
end
|
464
288
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
|
2
|
+
module Stanford
|
3
|
+
module Mods
|
4
|
+
class Record < ::Mods::Record
|
5
|
+
include Stanford::Mods::Geospatial
|
6
|
+
include Stanford::Mods::Name
|
7
|
+
include Stanford::Mods::OriginInfo
|
8
|
+
include Stanford::Mods::PhysicalLocation
|
9
|
+
include Stanford::Mods::SearchworksSubjects
|
10
|
+
include Stanford::Mods::Searchworks
|
11
|
+
include Stanford::Mods::Title
|
12
|
+
|
13
|
+
attr_writer :druid
|
14
|
+
|
15
|
+
def druid
|
16
|
+
@druid || 'Unknown item'
|
17
|
+
end
|
18
|
+
end # Record class
|
19
|
+
end # Mods module
|
20
|
+
end # Stanford module
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# Language Values used by SearchWorks
|
2
2
|
# From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
|
3
3
|
# code 'ase' from iso639-3
|
4
|
+
# code egy-Egyd from rfc5646
|
4
5
|
SEARCHWORKS_LANGUAGES = {
|
5
6
|
'aaa' => 'Afar',
|
6
7
|
'abk' => 'Abkhaz',
|
@@ -125,6 +126,7 @@ SEARCHWORKS_LANGUAGES = {
|
|
125
126
|
'dzo' => 'Dzongkha',
|
126
127
|
'efi' => 'Efik',
|
127
128
|
'egy' => 'Egyptian',
|
129
|
+
'egy-Egyd' => 'Egyptian, Demotic',
|
128
130
|
'eka' => 'Ekajuk',
|
129
131
|
'elx' => 'Elamite',
|
130
132
|
'eng' => 'English',
|