stanford-mods 2.6.4 → 3.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
- data/lib/stanford-mods/concerns/name.rb +57 -0
- data/lib/stanford-mods/concerns/origin_info.rb +109 -0
- data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
- data/lib/stanford-mods/concerns/searchworks.rb +125 -0
- data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
- data/lib/stanford-mods/concerns/title.rb +79 -0
- data/lib/stanford-mods/coordinate.rb +21 -3
- data/lib/stanford-mods/date_parsing.rb +32 -289
- data/lib/stanford-mods/imprint.rb +148 -325
- data/lib/stanford-mods/record.rb +20 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
- data/lib/stanford-mods.rb +12 -11
- data/spec/fixtures/searchworks_imprint_data.rb +38 -39
- data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
- data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
- data/spec/geo_spatial_spec.rb +1 -6
- data/spec/imprint_spec.rb +238 -207
- data/spec/name_spec.rb +26 -230
- data/spec/origin_info_spec.rb +34 -300
- data/spec/searchworks_basic_spec.rb +1 -3
- data/spec/searchworks_pub_dates_spec.rb +0 -215
- data/spec/searchworks_spec.rb +0 -21
- data/spec/searchworks_subject_raw_spec.rb +106 -105
- data/spec/searchworks_subject_spec.rb +19 -55
- data/spec/searchworks_title_spec.rb +1 -1
- data/stanford-mods.gemspec +1 -1
- metadata +21 -17
- data/lib/marc_countries.rb +0 -387
- data/lib/stanford-mods/geo_utils.rb +0 -28
- data/lib/stanford-mods/name.rb +0 -80
- data/lib/stanford-mods/origin_info.rb +0 -489
- data/lib/stanford-mods/searchworks.rb +0 -333
- data/lib/stanford-mods/searchworks_subjects.rb +0 -196
- data/spec/date_parsing_spec.rb +0 -905
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'mods/marc_country_codes'
|
2
2
|
|
3
3
|
module Stanford
|
4
4
|
module Mods
|
@@ -10,77 +10,33 @@ module Stanford
|
|
10
10
|
# however, the date_parsing class only does years, and this does finer tuned dates and also
|
11
11
|
# reformats them according to the encoding.
|
12
12
|
class Imprint
|
13
|
-
|
14
|
-
def initialize(originInfo_ng_nodeset)
|
15
|
-
@originInfo_ng_nodeset = originInfo_ng_nodeset
|
16
|
-
end
|
13
|
+
attr_reader :element
|
17
14
|
|
18
|
-
|
15
|
+
# @param [Nokogiri::XML::Node] an originInfo node
|
16
|
+
def initialize(element)
|
17
|
+
@element = element
|
18
|
+
end
|
19
19
|
|
20
|
-
# @return Array<String> each String is an imprint statement from a single originInfo element
|
21
20
|
def imprint_statements
|
22
|
-
|
23
|
-
@originInfo_ng_nodeset.each do |origin_info_node|
|
24
|
-
edition = edition_vals_str(origin_info_node)
|
25
|
-
place = place_vals_str(origin_info_node)
|
26
|
-
publisher = publisher_vals_str(origin_info_node)
|
27
|
-
dates = date_str(origin_info_node)
|
28
|
-
|
29
|
-
place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
|
30
|
-
edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
|
31
|
-
ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
|
32
|
-
|
33
|
-
results << ed_place_pub_dates unless ed_place_pub_dates.empty?
|
34
|
-
end
|
35
|
-
results
|
21
|
+
display_str
|
36
22
|
end
|
37
23
|
|
24
|
+
# @return <String> an imprint statement from a single originInfo element
|
38
25
|
def display_str
|
39
|
-
|
40
|
-
|
26
|
+
edition = edition_vals_str
|
27
|
+
place = place_vals_str
|
28
|
+
publisher = publisher_vals_str
|
29
|
+
dates = date_str
|
41
30
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
else
|
48
|
-
date_field_keys.map do |date_field|
|
49
|
-
next unless origin_info_node.respond_to?(date_field)
|
50
|
-
|
51
|
-
date_elements = origin_info_node.send(date_field)
|
52
|
-
date_elements.map(&:as_object).map(&:first) if date_elements.any?
|
53
|
-
end.compact.first
|
54
|
-
end
|
55
|
-
|
56
|
-
if date_elements.nil? || date_elements.none?
|
57
|
-
[]
|
58
|
-
elsif date_elements.find(&:start?) &&
|
59
|
-
date_elements.find(&:start?).as_range &&
|
60
|
-
date_elements.find(&:end?) &&
|
61
|
-
date_elements.find(&:end?).as_range
|
62
|
-
start_date = date_elements.find(&:start?)
|
63
|
-
end_date = date_elements.find(&:end?)
|
64
|
-
|
65
|
-
(start_date.as_range.min.year..end_date.as_range.max.year).to_a
|
66
|
-
elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
|
67
|
-
start_date = date_elements.find(&:start?)
|
68
|
-
|
69
|
-
(start_date.as_range.min.year..Time.now.year).to_a
|
70
|
-
elsif date_elements.one?
|
71
|
-
date_elements.first.to_a.map(&:year)
|
72
|
-
else
|
73
|
-
date_elements.map { |v| v.to_a.map(&:year) }.flatten
|
74
|
-
end
|
75
|
-
end.flatten
|
31
|
+
place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
|
32
|
+
edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
|
33
|
+
ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
|
34
|
+
|
35
|
+
ed_place_pub_dates
|
76
36
|
end
|
77
37
|
|
78
38
|
private
|
79
39
|
|
80
|
-
def extract_year(el)
|
81
|
-
DateParsing.year_int_from_date_str(el.text)
|
82
|
-
end
|
83
|
-
|
84
40
|
def compact_and_join_with_delimiter(values, delimiter)
|
85
41
|
compact_values = values.compact.reject { |v| v.strip.empty? }
|
86
42
|
return compact_values.join(delimiter) if compact_values.length == 1 ||
|
@@ -100,16 +56,16 @@ module Stanford
|
|
100
56
|
value.strip.end_with?('.', ',', ':', ';')
|
101
57
|
end
|
102
58
|
|
103
|
-
def edition_vals_str
|
104
|
-
|
59
|
+
def edition_vals_str
|
60
|
+
element.edition.reject do |e|
|
105
61
|
e.text.strip.empty?
|
106
62
|
end.map(&:text).join(' ').strip
|
107
63
|
end
|
108
64
|
|
109
|
-
def publisher_vals_str
|
110
|
-
return if
|
65
|
+
def publisher_vals_str
|
66
|
+
return if element.publisher.text.strip.empty?
|
111
67
|
|
112
|
-
publishers =
|
68
|
+
publishers = element.publisher.reject do |p|
|
113
69
|
p.text.strip.empty?
|
114
70
|
end.map(&:text)
|
115
71
|
compact_and_join_with_delimiter(publishers, ' : ')
|
@@ -117,10 +73,10 @@ module Stanford
|
|
117
73
|
|
118
74
|
# PLACE processing methods ------
|
119
75
|
|
120
|
-
def place_vals_str
|
121
|
-
return if
|
76
|
+
def place_vals_str
|
77
|
+
return if element.place.text.strip.empty?
|
122
78
|
|
123
|
-
places = place_terms
|
79
|
+
places = place_terms.reject do |p|
|
124
80
|
p.text.strip.empty?
|
125
81
|
end.map(&:text)
|
126
82
|
compact_and_join_with_delimiter(places, ' : ')
|
@@ -133,25 +89,26 @@ module Stanford
|
|
133
89
|
end
|
134
90
|
end
|
135
91
|
|
136
|
-
def place_terms
|
137
|
-
return [] unless
|
138
|
-
|
92
|
+
def place_terms
|
93
|
+
return [] unless element.respond_to?(:place) &&
|
94
|
+
element.place.respond_to?(:placeTerm)
|
139
95
|
|
140
|
-
if unencoded_place_terms?(
|
141
|
-
|
96
|
+
if unencoded_place_terms?(element)
|
97
|
+
element.place.placeTerm.select do |term|
|
142
98
|
!term.attributes['type'].respond_to?(:value) ||
|
143
99
|
term.attributes['type'].value == 'text'
|
144
100
|
end.compact
|
145
101
|
else
|
146
|
-
|
102
|
+
element.place.placeTerm.map do |term|
|
147
103
|
next unless term.attributes['type'].respond_to?(:value) &&
|
148
104
|
term.attributes['type'].value == 'code' &&
|
149
105
|
term.attributes['authority'].respond_to?(:value) &&
|
150
106
|
term.attributes['authority'].value == 'marccountry' &&
|
151
|
-
|
107
|
+
!['xx', 'vp'].include?(term.text.strip) &&
|
108
|
+
MARC_COUNTRY.include?(term.text.strip)
|
152
109
|
|
153
110
|
term = term.clone
|
154
|
-
term.content =
|
111
|
+
term.content = MARC_COUNTRY[term.text.strip]
|
155
112
|
term
|
156
113
|
end.compact
|
157
114
|
end
|
@@ -159,307 +116,173 @@ module Stanford
|
|
159
116
|
|
160
117
|
# DATE processing methods ------
|
161
118
|
|
162
|
-
def date_str
|
163
|
-
date_vals = origin_info_date_vals
|
119
|
+
def date_str
|
120
|
+
date_vals = origin_info_date_vals
|
164
121
|
return if date_vals.empty?
|
165
|
-
|
166
122
|
date_vals.map(&:strip).join(' ')
|
167
123
|
end
|
168
124
|
|
169
|
-
def origin_info_date_vals
|
125
|
+
def origin_info_date_vals
|
170
126
|
date_field_keys.map do |date_field|
|
171
|
-
next unless
|
127
|
+
next unless element.respond_to?(date_field)
|
172
128
|
|
173
|
-
date_elements =
|
174
|
-
|
129
|
+
date_elements = element.send(date_field)
|
130
|
+
parse_dates(date_elements) if date_elements.present?
|
175
131
|
end.compact.flatten
|
176
132
|
end
|
177
133
|
|
178
|
-
def date_elements_display_vals(ng_date_elements)
|
179
|
-
apply_date_qualifier_decoration(
|
180
|
-
dedup_dates(
|
181
|
-
join_date_ranges(
|
182
|
-
process_decade_century_dates(
|
183
|
-
process_bc_ad_dates(
|
184
|
-
process_encoded_dates(ignore_bad_dates(ng_date_elements))
|
185
|
-
)
|
186
|
-
)
|
187
|
-
)
|
188
|
-
)
|
189
|
-
)
|
190
|
-
end
|
191
|
-
|
192
134
|
def date_field_keys
|
193
135
|
[:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
|
194
136
|
end
|
195
137
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
val != '9999' && val != '0000-00-00' && val != 'uuuu'
|
200
|
-
end
|
201
|
-
end
|
138
|
+
class DateValue
|
139
|
+
attr_reader :value
|
140
|
+
delegate :text, :date, :point, :qualifier, :encoding, to: :value
|
202
141
|
|
203
|
-
|
204
|
-
|
205
|
-
if date_is_w3cdtf?(ng_date_element)
|
206
|
-
process_w3cdtf_date(ng_date_element)
|
207
|
-
elsif date_is_iso8601?(ng_date_element)
|
208
|
-
process_iso8601_date(ng_date_element)
|
209
|
-
else
|
210
|
-
ng_date_element
|
211
|
-
end
|
142
|
+
def initialize(value)
|
143
|
+
@value = value
|
212
144
|
end
|
213
|
-
end
|
214
145
|
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
# See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
|
219
|
-
def process_bc_ad_dates(ng_date_elements)
|
220
|
-
ng_date_elements.map do |ng_date_element|
|
221
|
-
case
|
222
|
-
when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
|
223
|
-
ng_date_element.content = "1 B.C."
|
224
|
-
when date_is_bc_edtf?(ng_date_element)
|
225
|
-
year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
|
226
|
-
ng_date_element.content = "#{year} B.C."
|
227
|
-
when date_is_ad?(ng_date_element)
|
228
|
-
ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
|
229
|
-
end
|
230
|
-
ng_date_element
|
146
|
+
# True if the element text isn't blank or the placeholder "9999".
|
147
|
+
def valid?
|
148
|
+
text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
|
231
149
|
end
|
232
|
-
end
|
233
150
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
process_century_date(ng_date_element)
|
240
|
-
else
|
241
|
-
ng_date_element
|
151
|
+
# Element text reduced to digits and hyphen. Captures date ranges and
|
152
|
+
# negative (B.C.) dates. Used for comparison/deduping.
|
153
|
+
def base_value
|
154
|
+
if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
|
155
|
+
return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
|
242
156
|
end
|
243
|
-
end
|
244
|
-
end
|
245
157
|
|
246
|
-
|
247
|
-
if dates_are_range?(ng_date_elements)
|
248
|
-
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
249
|
-
end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
|
250
|
-
ng_date_elements.map do |date|
|
251
|
-
date = date.clone # clone the date object so we don't append the same one
|
252
|
-
if normalize_date(date.text) == normalize_date(start_date.text)
|
253
|
-
date.content = [start_date.text, end_date.text].join(' - ')
|
254
|
-
date
|
255
|
-
elsif normalize_date(date.text) != normalize_date(end_date.text)
|
256
|
-
date
|
257
|
-
end
|
258
|
-
end.compact
|
259
|
-
elsif dates_are_open_range?(ng_date_elements)
|
260
|
-
start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
|
261
|
-
ng_date_elements.map do |date|
|
262
|
-
date = date.clone # clone the date object so we don't append the same one
|
263
|
-
date.content = "#{start_date.text}-" if date.text == start_date.text
|
264
|
-
date
|
265
|
-
end
|
266
|
-
else
|
267
|
-
ng_date_elements
|
158
|
+
text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
|
268
159
|
end
|
269
|
-
end
|
270
160
|
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
[ng_date_elements.find { |d| !d.attributes['encoding'] }]
|
278
|
-
else
|
279
|
-
[ng_date_elements.first]
|
161
|
+
# Decoded version of the date, if it was encoded. Strips leading zeroes.
|
162
|
+
def decoded_value
|
163
|
+
return text.strip unless date
|
164
|
+
|
165
|
+
unless encoding.present?
|
166
|
+
return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
|
280
167
|
end
|
281
|
-
else
|
282
|
-
ng_date_elements
|
283
|
-
end
|
284
|
-
end
|
285
168
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
169
|
+
# Delegate to the appropriate decoding method, if any
|
170
|
+
case value.precision
|
171
|
+
when :day
|
172
|
+
date.strftime('%B %e, %Y')
|
173
|
+
when :month
|
174
|
+
date.strftime('%B %Y')
|
175
|
+
when :year
|
176
|
+
year = date.year
|
177
|
+
if year < 1
|
178
|
+
"#{year.abs + 1} B.C."
|
179
|
+
# Any dates before the year 1000 are explicitly marked A.D.
|
180
|
+
elsif year > 1 && year < 1000
|
181
|
+
"#{year} A.D."
|
182
|
+
else
|
183
|
+
year.to_s
|
184
|
+
end
|
185
|
+
when :century
|
186
|
+
return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
|
187
|
+
when :decade
|
188
|
+
return "#{date.year}s"
|
189
|
+
else
|
190
|
+
text.strip
|
295
191
|
end
|
296
|
-
date
|
297
192
|
end
|
298
|
-
return_fields.map(&:text)
|
299
|
-
end
|
300
193
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
end
|
194
|
+
# Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
|
195
|
+
# https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
|
196
|
+
def qualified_value
|
197
|
+
date = decoded_value
|
306
198
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
ng_date_element.attributes['qualifier'].value == 'questionable'
|
311
|
-
end
|
312
|
-
|
313
|
-
def date_is_inferred?(ng_date_element)
|
314
|
-
ng_date_element.attributes['qualifier'] &&
|
315
|
-
ng_date_element.attributes['qualifier'].respond_to?(:value) &&
|
316
|
-
ng_date_element.attributes['qualifier'].value == 'inferred'
|
317
|
-
end
|
199
|
+
return "[ca. #{date}]" if qualifier == 'approximate'
|
200
|
+
return "[#{date}?]" if qualifier == 'questionable'
|
201
|
+
return "[#{date}]" if qualifier == 'inferred'
|
318
202
|
|
319
|
-
|
320
|
-
ng_date_elements.any? do |element|
|
321
|
-
element.attributes['point'] &&
|
322
|
-
element.attributes['point'].respond_to?(:value) &&
|
323
|
-
element.attributes['point'].value == 'start'
|
324
|
-
end && !ng_date_elements.any? do |element|
|
325
|
-
element.attributes['point'] &&
|
326
|
-
element.attributes['point'].respond_to?(:value) &&
|
327
|
-
element.attributes['point'].value == 'end'
|
203
|
+
date
|
328
204
|
end
|
329
205
|
end
|
330
206
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
end
|
207
|
+
class DateRange
|
208
|
+
def initialize(start: nil, stop: nil)
|
209
|
+
@start = start
|
210
|
+
@stop = stop
|
336
211
|
end
|
337
|
-
attributes.include?('start') &&
|
338
|
-
attributes.include?('end')
|
339
|
-
end
|
340
212
|
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
|
345
|
-
Date.parse(ng_date_element.text).strftime(full_date_format)
|
346
|
-
elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
|
347
|
-
Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
|
348
|
-
else
|
349
|
-
ng_date_element.content
|
350
|
-
end
|
351
|
-
rescue
|
352
|
-
ng_date_element.content
|
213
|
+
# Base value as hyphen-joined string. Used for comparison/deduping.
|
214
|
+
def base_value
|
215
|
+
"#{@start&.base_value}-#{@stop&.base_value}"
|
353
216
|
end
|
354
|
-
ng_date_element
|
355
|
-
end
|
356
217
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
if ng_date_element.text.strip =~ /^\d{8,}$/
|
361
|
-
Date.parse(ng_date_element.text).strftime(full_date_format)
|
362
|
-
else
|
363
|
-
ng_date_element.content
|
364
|
-
end
|
365
|
-
rescue
|
366
|
-
ng_date_element.content
|
218
|
+
# Base values as array. Used for comparison/deduping of individual dates.
|
219
|
+
def base_values
|
220
|
+
[@start&.base_value, @stop&.base_value].compact
|
367
221
|
end
|
368
|
-
ng_date_element
|
369
|
-
end
|
370
222
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
def process_decade_date(ng_date_element)
|
375
|
-
my_ng_date_element = ng_date_element.clone
|
376
|
-
my_ng_date_element.content = begin
|
377
|
-
orig_date_str = ng_date_element.text.strip
|
378
|
-
# note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
|
379
|
-
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
380
|
-
if decade_matches
|
381
|
-
decade_str = decade_matches[2]
|
382
|
-
changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
|
383
|
-
zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
|
384
|
-
new_decade_str = "#{zeroth_year}s" if zeroth_year
|
385
|
-
my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
|
386
|
-
else
|
387
|
-
my_ng_date_element.content
|
388
|
-
end
|
389
|
-
rescue
|
390
|
-
my_ng_date_element.content
|
223
|
+
# The encoding value for the start of the range, or stop if not present.
|
224
|
+
def encoding
|
225
|
+
@start&.encoding || @stop&.encoding
|
391
226
|
end
|
392
|
-
my_ng_date_element
|
393
|
-
end
|
394
227
|
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
|
228
|
+
# Decoded dates with "B.C." or "A.D." and qualifier markers applied to
|
229
|
+
# the entire range, or individually if dates differ.
|
230
|
+
def qualified_value
|
231
|
+
if @start&.qualifier == @stop&.qualifier
|
232
|
+
qualifier = @start&.qualifier || @stop&.qualifier
|
233
|
+
date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
|
234
|
+
return "[ca. #{date}]" if qualifier == 'approximate'
|
235
|
+
return "[#{date}?]" if qualifier == 'questionable'
|
236
|
+
return "[#{date}]" if qualifier == 'inferred'
|
237
|
+
|
238
|
+
date
|
407
239
|
else
|
408
|
-
|
240
|
+
"#{@start&.qualified_value} - #{@stop&.qualified_value}"
|
409
241
|
end
|
410
|
-
rescue
|
411
|
-
my_ng_date_element.content
|
412
242
|
end
|
413
|
-
my_ng_date_element
|
414
|
-
end
|
415
|
-
|
416
|
-
def field_is_encoded?(ng_element, encoding)
|
417
|
-
ng_element.attributes['encoding'] &&
|
418
|
-
ng_element.attributes['encoding'].respond_to?(:value) &&
|
419
|
-
ng_element.attributes['encoding'].value.downcase == encoding
|
420
|
-
end
|
421
|
-
|
422
|
-
def date_is_bc_edtf?(ng_date_element)
|
423
|
-
ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
|
424
243
|
end
|
425
244
|
|
426
|
-
def
|
427
|
-
|
428
|
-
|
429
|
-
|
245
|
+
def parse_dates(elements)
|
246
|
+
# convert to DateValue objects and keep only valid ones
|
247
|
+
dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
|
248
|
+
# join any date ranges into DateRange objects
|
249
|
+
point, nonpoint = dates.partition(&:point)
|
250
|
+
if point.any?
|
251
|
+
range = DateRange.new(start: point.find { |date| date.point == 'start' },
|
252
|
+
stop: point.find { |date| date.point == 'end' })
|
253
|
+
nonpoint.unshift(range)
|
254
|
+
end
|
255
|
+
dates = nonpoint
|
430
256
|
|
431
|
-
|
432
|
-
|
433
|
-
|
257
|
+
# ensure dates are unique with respect to their base values
|
258
|
+
dates = dates.group_by(&:base_value).map do |_value, group|
|
259
|
+
next group.first if group.one?
|
434
260
|
|
435
|
-
|
436
|
-
|
437
|
-
|
261
|
+
# if one of the duplicates wasn't encoded, use that one. see:
|
262
|
+
# https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
|
263
|
+
if group.reject(&:encoding).any?
|
264
|
+
group.reject(&:encoding).first
|
438
265
|
|
439
|
-
|
440
|
-
|
441
|
-
|
266
|
+
# otherwise just randomly pick the first in the group
|
267
|
+
else
|
268
|
+
group.last
|
269
|
+
end
|
270
|
+
end
|
442
271
|
|
443
|
-
|
444
|
-
|
445
|
-
ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
|
446
|
-
end
|
272
|
+
# compare the remaining dates against one part of the other of a range
|
273
|
+
date_ranges = dates.select { |date| date.is_a?(DateRange) }
|
447
274
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
end
|
275
|
+
# remove any range that duplicates an unencoded date that includes that range
|
276
|
+
duplicated_ranges = dates.flat_map do |date|
|
277
|
+
next if date.is_a?(DateRange) || date.encoding.present?
|
452
278
|
|
453
|
-
|
454
|
-
|
455
|
-
end
|
279
|
+
date_ranges.select { |r| r.base_values.include?(date.base_value) }
|
280
|
+
end
|
456
281
|
|
457
|
-
|
458
|
-
@short_date_format ||= short_date_format
|
459
|
-
end
|
282
|
+
dates = dates - duplicated_ranges
|
460
283
|
|
461
|
-
|
462
|
-
|
284
|
+
# output formatted dates with qualifiers, A.D./B.C., etc.
|
285
|
+
dates.map(&:qualified_value)
|
463
286
|
end
|
464
287
|
end
|
465
288
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
|
2
|
+
module Stanford
|
3
|
+
module Mods
|
4
|
+
class Record < ::Mods::Record
|
5
|
+
include Stanford::Mods::Geospatial
|
6
|
+
include Stanford::Mods::Name
|
7
|
+
include Stanford::Mods::OriginInfo
|
8
|
+
include Stanford::Mods::PhysicalLocation
|
9
|
+
include Stanford::Mods::SearchworksSubjects
|
10
|
+
include Stanford::Mods::Searchworks
|
11
|
+
include Stanford::Mods::Title
|
12
|
+
|
13
|
+
attr_writer :druid
|
14
|
+
|
15
|
+
def druid
|
16
|
+
@druid || 'Unknown item'
|
17
|
+
end
|
18
|
+
end # Record class
|
19
|
+
end # Mods module
|
20
|
+
end # Stanford module
|
File without changes
|
data/lib/stanford-mods.rb
CHANGED
@@ -1,20 +1,21 @@
|
|
1
1
|
require 'active_support'
|
2
|
+
require 'active_support/core_ext/integer/inflections'
|
2
3
|
require 'mods'
|
3
4
|
require 'stanford-mods/date_parsing'
|
4
5
|
require 'stanford-mods/coordinate'
|
5
|
-
require 'stanford-mods/geo_spatial'
|
6
|
-
require 'stanford-mods/geo_utils'
|
7
6
|
require 'stanford-mods/imprint'
|
8
|
-
require 'stanford-mods/
|
9
|
-
require 'stanford-mods/
|
10
|
-
require 'stanford-mods/
|
11
|
-
require 'stanford-mods/
|
7
|
+
require 'stanford-mods/vocabularies/searchworks_languages'
|
8
|
+
require 'stanford-mods/concerns/geo_spatial'
|
9
|
+
require 'stanford-mods/concerns/name'
|
10
|
+
require 'stanford-mods/concerns/origin_info'
|
11
|
+
require 'stanford-mods/concerns/physical_location'
|
12
|
+
require 'stanford-mods/concerns/searchworks'
|
13
|
+
require 'stanford-mods/concerns/searchworks_subjects'
|
14
|
+
require 'stanford-mods/concerns/title'
|
15
|
+
require 'stanford-mods/record'
|
12
16
|
require 'stanford-mods/version'
|
13
17
|
|
14
|
-
# Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
|
15
18
|
module Stanford
|
16
19
|
module Mods
|
17
|
-
|
18
|
-
|
19
|
-
end # Mods module
|
20
|
-
end # Stanford module
|
20
|
+
end
|
21
|
+
end
|