stanford-mods 2.6.4 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +1 -1
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +113 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +87 -0
  10. data/lib/stanford-mods/coordinate.rb +24 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -289
  12. data/lib/stanford-mods/imprint.rb +170 -322
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
  16. data/lib/stanford-mods.rb +12 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +263 -207
  22. data/spec/lib/stanford-mods/coordinate_spec.rb +3 -5
  23. data/spec/name_spec.rb +26 -230
  24. data/spec/origin_info_spec.rb +34 -300
  25. data/spec/searchworks_basic_spec.rb +1 -3
  26. data/spec/searchworks_pub_dates_spec.rb +0 -215
  27. data/spec/searchworks_spec.rb +0 -21
  28. data/spec/searchworks_subject_raw_spec.rb +106 -105
  29. data/spec/searchworks_subject_spec.rb +19 -55
  30. data/spec/searchworks_title_spec.rb +5 -5
  31. data/stanford-mods.gemspec +1 -1
  32. metadata +19 -15
  33. data/lib/marc_countries.rb +0 -387
  34. data/lib/stanford-mods/geo_utils.rb +0 -28
  35. data/lib/stanford-mods/name.rb +0 -80
  36. data/lib/stanford-mods/origin_info.rb +0 -489
  37. data/lib/stanford-mods/searchworks.rb +0 -333
  38. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  39. data/spec/date_parsing_spec.rb +0 -905
@@ -1,4 +1,4 @@
1
- require 'active_support/core_ext/integer/inflections'
1
+ require 'mods/marc_country_codes'
2
2
 
3
3
  module Stanford
4
4
  module Mods
@@ -10,77 +10,33 @@ module Stanford
10
10
  # however, the date_parsing class only does years, and this does finer tuned dates and also
11
11
  # reformats them according to the encoding.
12
12
  class Imprint
13
- # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
14
- def initialize(originInfo_ng_nodeset)
15
- @originInfo_ng_nodeset = originInfo_ng_nodeset
16
- end
13
+ attr_reader :element
17
14
 
18
- require 'marc_countries'
15
+ # @param [Nokogiri::XML::Node] an originInfo node
16
+ def initialize(element)
17
+ @element = element
18
+ end
19
19
 
20
- # @return Array<String> each String is an imprint statement from a single originInfo element
21
20
  def imprint_statements
22
- results = []
23
- @originInfo_ng_nodeset.each do |origin_info_node|
24
- edition = edition_vals_str(origin_info_node)
25
- place = place_vals_str(origin_info_node)
26
- publisher = publisher_vals_str(origin_info_node)
27
- dates = date_str(origin_info_node)
28
-
29
- place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
30
- edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
31
- ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
32
-
33
- results << ed_place_pub_dates unless ed_place_pub_dates.empty?
34
- end
35
- results
21
+ display_str
36
22
  end
37
23
 
24
+ # @return <String> an imprint statement from a single originInfo element
38
25
  def display_str
39
- imprint_statements.join('; ') if imprint_statements.present?
40
- end
26
+ edition = edition_vals_str
27
+ place = place_vals_str
28
+ publisher = publisher_vals_str
29
+ dates = date_str
41
30
 
42
- # @return Array<Integer> an array of publication years for the resource
43
- def publication_date_for_slider
44
- @originInfo_ng_nodeset.map do |origin_info_node|
45
- date_elements = if origin_info_node.as_object.first.key_dates.any?
46
- origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
47
- else
48
- date_field_keys.map do |date_field|
49
- next unless origin_info_node.respond_to?(date_field)
50
-
51
- date_elements = origin_info_node.send(date_field)
52
- date_elements.map(&:as_object).map(&:first) if date_elements.any?
53
- end.compact.first
54
- end
55
-
56
- if date_elements.nil? || date_elements.none?
57
- []
58
- elsif date_elements.find(&:start?) &&
59
- date_elements.find(&:start?).as_range &&
60
- date_elements.find(&:end?) &&
61
- date_elements.find(&:end?).as_range
62
- start_date = date_elements.find(&:start?)
63
- end_date = date_elements.find(&:end?)
64
-
65
- (start_date.as_range.min.year..end_date.as_range.max.year).to_a
66
- elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
67
- start_date = date_elements.find(&:start?)
68
-
69
- (start_date.as_range.min.year..Time.now.year).to_a
70
- elsif date_elements.one?
71
- date_elements.first.to_a.map(&:year)
72
- else
73
- date_elements.map { |v| v.to_a.map(&:year) }.flatten
74
- end
75
- end.flatten
31
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
32
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
33
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
34
+
35
+ ed_place_pub_dates
76
36
  end
77
37
 
78
38
  private
79
39
 
80
- def extract_year(el)
81
- DateParsing.year_int_from_date_str(el.text)
82
- end
83
-
84
40
  def compact_and_join_with_delimiter(values, delimiter)
85
41
  compact_values = values.compact.reject { |v| v.strip.empty? }
86
42
  return compact_values.join(delimiter) if compact_values.length == 1 ||
@@ -100,16 +56,16 @@ module Stanford
100
56
  value.strip.end_with?('.', ',', ':', ';')
101
57
  end
102
58
 
103
- def edition_vals_str(origin_info_node)
104
- origin_info_node.edition.reject do |e|
59
+ def edition_vals_str
60
+ element.edition.reject do |e|
105
61
  e.text.strip.empty?
106
62
  end.map(&:text).join(' ').strip
107
63
  end
108
64
 
109
- def publisher_vals_str(origin_info_node)
110
- return if origin_info_node.publisher.text.strip.empty?
65
+ def publisher_vals_str
66
+ return if element.publisher.text.strip.empty?
111
67
 
112
- publishers = origin_info_node.publisher.reject do |p|
68
+ publishers = element.publisher.reject do |p|
113
69
  p.text.strip.empty?
114
70
  end.map(&:text)
115
71
  compact_and_join_with_delimiter(publishers, ' : ')
@@ -117,10 +73,10 @@ module Stanford
117
73
 
118
74
  # PLACE processing methods ------
119
75
 
120
- def place_vals_str(origin_info_node)
121
- return if origin_info_node.place.text.strip.empty?
76
+ def place_vals_str
77
+ return if element.place.text.strip.empty?
122
78
 
123
- places = place_terms(origin_info_node).reject do |p|
79
+ places = place_terms.reject do |p|
124
80
  p.text.strip.empty?
125
81
  end.map(&:text)
126
82
  compact_and_join_with_delimiter(places, ' : ')
@@ -133,25 +89,26 @@ module Stanford
133
89
  end
134
90
  end
135
91
 
136
- def place_terms(origin_info_element)
137
- return [] unless origin_info_element.respond_to?(:place) &&
138
- origin_info_element.place.respond_to?(:placeTerm)
92
+ def place_terms
93
+ return [] unless element.respond_to?(:place) &&
94
+ element.place.respond_to?(:placeTerm)
139
95
 
140
- if unencoded_place_terms?(origin_info_element)
141
- origin_info_element.place.placeTerm.select do |term|
96
+ if unencoded_place_terms?(element)
97
+ element.place.placeTerm.select do |term|
142
98
  !term.attributes['type'].respond_to?(:value) ||
143
99
  term.attributes['type'].value == 'text'
144
100
  end.compact
145
101
  else
146
- origin_info_element.place.placeTerm.map do |term|
102
+ element.place.placeTerm.map do |term|
147
103
  next unless term.attributes['type'].respond_to?(:value) &&
148
104
  term.attributes['type'].value == 'code' &&
149
105
  term.attributes['authority'].respond_to?(:value) &&
150
106
  term.attributes['authority'].value == 'marccountry' &&
151
- MARC_COUNTRIES.include?(term.text.strip)
107
+ !['xx', 'vp'].include?(term.text.strip) &&
108
+ MARC_COUNTRY.include?(term.text.strip)
152
109
 
153
110
  term = term.clone
154
- term.content = MARC_COUNTRIES[term.text.strip]
111
+ term.content = MARC_COUNTRY[term.text.strip]
155
112
  term
156
113
  end.compact
157
114
  end
@@ -159,307 +116,198 @@ module Stanford
159
116
 
160
117
  # DATE processing methods ------
161
118
 
162
- def date_str(origin_info_node)
163
- date_vals = origin_info_date_vals(origin_info_node)
119
+ def date_str
120
+ date_vals = origin_info_date_vals
164
121
  return if date_vals.empty?
165
-
166
122
  date_vals.map(&:strip).join(' ')
167
123
  end
168
124
 
169
- def origin_info_date_vals(origin_info_node)
125
+ def origin_info_date_vals
170
126
  date_field_keys.map do |date_field|
171
- next unless origin_info_node.respond_to?(date_field)
127
+ next unless element.respond_to?(date_field)
172
128
 
173
- date_elements = origin_info_node.send(date_field)
174
- date_elements_display_vals(date_elements) if date_elements.present?
129
+ date_elements = element.send(date_field)
130
+ parse_dates(date_elements) if date_elements.present?
175
131
  end.compact.flatten
176
132
  end
177
133
 
178
- def date_elements_display_vals(ng_date_elements)
179
- apply_date_qualifier_decoration(
180
- dedup_dates(
181
- join_date_ranges(
182
- process_decade_century_dates(
183
- process_bc_ad_dates(
184
- process_encoded_dates(ignore_bad_dates(ng_date_elements))
185
- )
186
- )
187
- )
188
- )
189
- )
190
- end
191
-
192
134
  def date_field_keys
193
135
  [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
194
136
  end
195
137
 
196
- def ignore_bad_dates(ng_date_elements)
197
- ng_date_elements.select do |ng_date_element|
198
- val = ng_date_element.text.strip
199
- val != '9999' && val != '0000-00-00' && val != 'uuuu'
138
+ class DateValue
139
+ attr_reader :value
140
+ delegate :text, :date, :point, :qualifier, :encoding, to: :value
141
+
142
+ def initialize(value)
143
+ @value = value
200
144
  end
201
- end
202
145
 
203
- def process_encoded_dates(ng_date_elements)
204
- ng_date_elements.map do |ng_date_element|
205
- if date_is_w3cdtf?(ng_date_element)
206
- process_w3cdtf_date(ng_date_element)
207
- elsif date_is_iso8601?(ng_date_element)
208
- process_iso8601_date(ng_date_element)
209
- else
210
- ng_date_element
211
- end
146
+ # True if the element text isn't blank or the placeholder "9999".
147
+ def valid?
148
+ text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
212
149
  end
213
- end
214
150
 
215
- # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
216
- # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
217
- # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
218
- # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
219
- def process_bc_ad_dates(ng_date_elements)
220
- ng_date_elements.map do |ng_date_element|
221
- case
222
- when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
223
- ng_date_element.content = "1 B.C."
224
- when date_is_bc_edtf?(ng_date_element)
225
- year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
226
- ng_date_element.content = "#{year} B.C."
227
- when date_is_ad?(ng_date_element)
228
- ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
151
+ # Element text reduced to digits and hyphen. Captures date ranges and
152
+ # negative (B.C.) dates. Used for comparison/deduping.
153
+ def base_value
154
+ if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
155
+ return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
229
156
  end
230
- ng_date_element
157
+
158
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
231
159
  end
232
- end
233
160
 
234
- def process_decade_century_dates(ng_date_elements)
235
- ng_date_elements.map do |ng_date_element|
236
- if date_is_decade?(ng_date_element)
237
- process_decade_date(ng_date_element)
238
- elsif date_is_century?(ng_date_element)
239
- process_century_date(ng_date_element)
240
- else
241
- ng_date_element
161
+ # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
+ def decoded_value
163
+ return text.strip unless date
164
+
165
+ unless encoding.present?
166
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
242
167
  end
243
- end
244
- end
245
168
 
246
- def join_date_ranges(ng_date_elements)
247
- if dates_are_range?(ng_date_elements)
248
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
249
- end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
250
- ng_date_elements.map do |date|
251
- date = date.clone # clone the date object so we don't append the same one
252
- if normalize_date(date.text) == normalize_date(start_date.text)
253
- date.content = [start_date.text, end_date.text].join(' - ')
254
- date
255
- elsif normalize_date(date.text) != normalize_date(end_date.text)
256
- date
169
+ if date.is_a?(EDTF::Interval)
170
+ if value.precision == :century || value.precision == :decade
171
+ return format_date(date, value.precision)
257
172
  end
258
- end.compact
259
- elsif dates_are_open_range?(ng_date_elements)
260
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
261
- ng_date_elements.map do |date|
262
- date = date.clone # clone the date object so we don't append the same one
263
- date.content = "#{start_date.text}-" if date.text == start_date.text
264
- date
265
- end
266
- else
267
- ng_date_elements
268
- end
269
- end
270
173
 
271
- def dedup_dates(ng_date_elements)
272
- date_text = ng_date_elements.map { |d| normalize_date(d.text) }
273
- if date_text != date_text.uniq
274
- if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
275
- [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
276
- elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
277
- [ng_date_elements.find { |d| !d.attributes['encoding'] }]
174
+ range = [
175
+ format_date(date.min, date.min.precision),
176
+ format_date(date.max, date.max.precision)
177
+ ].uniq.compact
178
+
179
+ return text.strip if range.empty?
180
+
181
+ range.join(' - ')
278
182
  else
279
- [ng_date_elements.first]
183
+ format_date(date, value.precision) || text.strip
280
184
  end
281
- else
282
- ng_date_elements
283
185
  end
284
- end
285
186
 
286
- def apply_date_qualifier_decoration(ng_date_elements)
287
- return_fields = ng_date_elements.map do |date|
288
- date = date.clone
289
- if date_is_approximate?(date)
290
- date.content = "[ca. #{date.text}]"
291
- elsif date_is_questionable?(date)
292
- date.content = "[#{date.text}?]"
293
- elsif date_is_inferred?(date)
294
- date.content = "[#{date.text}]"
187
+ def format_date(date, precision)
188
+ case precision
189
+ when :day
190
+ date.strftime('%B %e, %Y')
191
+ when :month
192
+ date.strftime('%B %Y')
193
+ when :year
194
+ year = date.year
195
+ if year < 1
196
+ "#{year.abs + 1} B.C."
197
+ # Any dates before the year 1000 are explicitly marked A.D.
198
+ elsif year > 1 && year < 1000
199
+ "#{year} A.D."
200
+ else
201
+ year.to_s
202
+ end
203
+ when :century
204
+ if date.year.negative?
205
+ "#{((date.year / 100).abs + 1).ordinalize} century B.C."
206
+ else
207
+ "#{((date.year / 100) + 1).ordinalize} century"
208
+ end
209
+ when :decade
210
+ "#{date.year}s"
295
211
  end
296
- date
297
212
  end
298
- return_fields.map(&:text)
299
- end
300
213
 
301
- def date_is_approximate?(ng_date_element)
302
- ng_date_element.attributes['qualifier'] &&
303
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
304
- ng_date_element.attributes['qualifier'].value == 'approximate'
305
- end
306
-
307
- def date_is_questionable?(ng_date_element)
308
- ng_date_element.attributes['qualifier'] &&
309
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
310
- ng_date_element.attributes['qualifier'].value == 'questionable'
311
- end
312
-
313
- def date_is_inferred?(ng_date_element)
314
- ng_date_element.attributes['qualifier'] &&
315
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
316
- ng_date_element.attributes['qualifier'].value == 'inferred'
317
- end
214
+ # Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
215
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
216
+ def qualified_value
217
+ qualified_format = case qualifier
218
+ when 'approximate'
219
+ '[ca. %s]'
220
+ when 'questionable'
221
+ '[%s?]'
222
+ when 'inferred'
223
+ '[%s]'
224
+ else
225
+ '%s'
226
+ end
318
227
 
319
- def dates_are_open_range?(ng_date_elements)
320
- ng_date_elements.any? do |element|
321
- element.attributes['point'] &&
322
- element.attributes['point'].respond_to?(:value) &&
323
- element.attributes['point'].value == 'start'
324
- end && !ng_date_elements.any? do |element|
325
- element.attributes['point'] &&
326
- element.attributes['point'].respond_to?(:value) &&
327
- element.attributes['point'].value == 'end'
228
+ format(qualified_format, decoded_value)
328
229
  end
329
230
  end
330
231
 
331
- def dates_are_range?(ng_date_elements)
332
- attributes = ng_date_elements.map do |date|
333
- if date.attributes['point'].respond_to?(:value)
334
- date.attributes['point'].value
335
- end
232
+ class DateRange
233
+ def initialize(start: nil, stop: nil)
234
+ @start = start
235
+ @stop = stop
336
236
  end
337
- attributes.include?('start') &&
338
- attributes.include?('end')
339
- end
340
237
 
341
- def process_w3cdtf_date(ng_date_element)
342
- ng_date_element = ng_date_element.clone
343
- ng_date_element.content = begin
344
- if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
345
- Date.parse(ng_date_element.text).strftime(full_date_format)
346
- elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
347
- Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
348
- else
349
- ng_date_element.content
350
- end
351
- rescue
352
- ng_date_element.content
238
+ # Base value as hyphen-joined string. Used for comparison/deduping.
239
+ def base_value
240
+ "#{@start&.base_value}-#{@stop&.base_value}"
353
241
  end
354
- ng_date_element
355
- end
356
242
 
357
- def process_iso8601_date(ng_date_element)
358
- ng_date_element = ng_date_element.clone
359
- ng_date_element.content = begin
360
- if ng_date_element.text.strip =~ /^\d{8,}$/
361
- Date.parse(ng_date_element.text).strftime(full_date_format)
362
- else
363
- ng_date_element.content
364
- end
365
- rescue
366
- ng_date_element.content
243
+ # Base values as array. Used for comparison/deduping of individual dates.
244
+ def base_values
245
+ [@start&.base_value, @stop&.base_value].compact
367
246
  end
368
- ng_date_element
369
- end
370
247
 
371
- DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
372
-
373
- # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
374
- def process_decade_date(ng_date_element)
375
- my_ng_date_element = ng_date_element.clone
376
- my_ng_date_element.content = begin
377
- orig_date_str = ng_date_element.text.strip
378
- # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
379
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
380
- if decade_matches
381
- decade_str = decade_matches[2]
382
- changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
383
- zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
384
- new_decade_str = "#{zeroth_year}s" if zeroth_year
385
- my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
386
- else
387
- my_ng_date_element.content
388
- end
389
- rescue
390
- my_ng_date_element.content
248
+ # The encoding value for the start of the range, or stop if not present.
249
+ def encoding
250
+ @start&.encoding || @stop&.encoding
391
251
  end
392
- my_ng_date_element
393
- end
394
252
 
395
- CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
396
-
397
- # strings like 18uu, 18-- become '19th century' in the ng_date_element content
398
- def process_century_date(ng_date_element)
399
- my_ng_date_element = ng_date_element.clone
400
- my_ng_date_element.content = begin
401
- orig_date_str = ng_date_element.text.strip
402
- # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
403
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
404
- if century_matches
405
- new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
406
- my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
253
+ # Decoded dates with "B.C." or "A.D." and qualifier markers applied to
254
+ # the entire range, or individually if dates differ.
255
+ def qualified_value
256
+ if @start&.qualifier == @stop&.qualifier
257
+ qualifier = @start&.qualifier || @stop&.qualifier
258
+ date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
259
+ return "[ca. #{date}]" if qualifier == 'approximate'
260
+ return "[#{date}?]" if qualifier == 'questionable'
261
+ return "[#{date}]" if qualifier == 'inferred'
262
+
263
+ date
407
264
  else
408
- my_ng_date_element.content
265
+ "#{@start&.qualified_value} - #{@stop&.qualified_value}"
409
266
  end
410
- rescue
411
- my_ng_date_element.content
412
267
  end
413
- my_ng_date_element
414
- end
415
-
416
- def field_is_encoded?(ng_element, encoding)
417
- ng_element.attributes['encoding'] &&
418
- ng_element.attributes['encoding'].respond_to?(:value) &&
419
- ng_element.attributes['encoding'].value.downcase == encoding
420
- end
421
-
422
- def date_is_bc_edtf?(ng_date_element)
423
- ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
424
268
  end
425
269
 
426
- def date_is_ad?(ng_date_element)
427
- str = ng_date_element.text.strip.gsub(/^0*/, '')
428
- str.present? && str.length < 4 && !str.match('A.D.')
429
- end
270
+ def parse_dates(elements)
271
+ # convert to DateValue objects and keep only valid ones
272
+ dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
273
+ # join any date ranges into DateRange objects
274
+ point, nonpoint = dates.partition(&:point)
275
+ if point.any?
276
+ range = DateRange.new(start: point.find { |date| date.point == 'start' },
277
+ stop: point.find { |date| date.point == 'end' })
278
+ nonpoint.unshift(range)
279
+ end
280
+ dates = nonpoint
430
281
 
431
- def date_is_edtf?(ng_date_element)
432
- field_is_encoded?(ng_date_element, 'edtf')
433
- end
282
+ # ensure dates are unique with respect to their base values
283
+ dates = dates.group_by(&:base_value).map do |_value, group|
284
+ next group.first if group.one?
434
285
 
435
- def date_is_w3cdtf?(ng_date_element)
436
- field_is_encoded?(ng_date_element, 'w3cdtf')
437
- end
286
+ # if one of the duplicates wasn't encoded, use that one. see:
287
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
288
+ if group.reject(&:encoding).any?
289
+ group.reject(&:encoding).first
438
290
 
439
- def date_is_iso8601?(ng_date_element)
440
- field_is_encoded?(ng_date_element, 'iso8601')
441
- end
291
+ # otherwise just randomly pick the first in the group
292
+ else
293
+ group.last
294
+ end
295
+ end
442
296
 
443
- # @return true if decade string needs tweaking for display
444
- def date_is_decade?(ng_date_element)
445
- ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
446
- end
297
+ # compare the remaining dates against one part of the other of a range
298
+ date_ranges = dates.select { |date| date.is_a?(DateRange) }
447
299
 
448
- # @return true if century string needs tweaking for display
449
- def date_is_century?(ng_date_element)
450
- ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
451
- end
300
+ # remove any range that duplicates an unencoded date that includes that range
301
+ duplicated_ranges = dates.flat_map do |date|
302
+ next if date.is_a?(DateRange) || date.encoding.present?
452
303
 
453
- def full_date_format(full_date_format = '%B %-d, %Y')
454
- @full_date_format ||= full_date_format
455
- end
304
+ date_ranges.select { |r| r.base_values.include?(date.base_value) }
305
+ end
456
306
 
457
- def short_date_format(short_date_format = '%B %Y')
458
- @short_date_format ||= short_date_format
459
- end
307
+ dates = dates - duplicated_ranges
460
308
 
461
- def normalize_date(date_str)
462
- date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
309
+ # output formatted dates with qualifiers, A.D./B.C., etc.
310
+ dates.map(&:qualified_value)
463
311
  end
464
312
  end
465
313
  end
@@ -0,0 +1,20 @@
1
+ # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
2
+ module Stanford
3
+ module Mods
4
+ class Record < ::Mods::Record
5
+ include Stanford::Mods::Geospatial
6
+ include Stanford::Mods::Name
7
+ include Stanford::Mods::OriginInfo
8
+ include Stanford::Mods::PhysicalLocation
9
+ include Stanford::Mods::SearchworksSubjects
10
+ include Stanford::Mods::Searchworks
11
+ include Stanford::Mods::Title
12
+
13
+ attr_writer :druid
14
+
15
+ def druid
16
+ @druid || 'Unknown item'
17
+ end
18
+ end # Record class
19
+ end # Mods module
20
+ end # Stanford module
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.6.4'.freeze
4
+ VERSION = '3.1.0'.freeze
5
5
  end
6
6
  end