stanford-mods 2.6.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +113 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +87 -0
  10. data/lib/stanford-mods/coordinate.rb +21 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -288
  12. data/lib/stanford-mods/imprint.rb +149 -325
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +2 -0
  16. data/lib/stanford-mods.rb +13 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +238 -207
  22. data/spec/name_spec.rb +28 -232
  23. data/spec/origin_info_spec.rb +34 -300
  24. data/spec/searchworks_basic_spec.rb +1 -3
  25. data/spec/searchworks_pub_dates_spec.rb +0 -215
  26. data/spec/searchworks_spec.rb +0 -21
  27. data/spec/searchworks_subject_raw_spec.rb +106 -105
  28. data/spec/searchworks_subject_spec.rb +19 -55
  29. data/spec/searchworks_title_spec.rb +5 -5
  30. data/stanford-mods.gemspec +1 -1
  31. metadata +24 -20
  32. data/.travis.yml +0 -17
  33. data/lib/marc_countries.rb +0 -387
  34. data/lib/stanford-mods/geo_utils.rb +0 -28
  35. data/lib/stanford-mods/name.rb +0 -80
  36. data/lib/stanford-mods/origin_info.rb +0 -489
  37. data/lib/stanford-mods/searchworks.rb +0 -333
  38. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  39. data/spec/date_parsing_spec.rb +0 -905
@@ -1,3 +1,5 @@
1
+ require 'mods/marc_country_codes'
2
+
1
3
  module Stanford
2
4
  module Mods
3
5
  ##
@@ -8,77 +10,33 @@ module Stanford
8
10
  # however, the date_parsing class only does years, and this does finer tuned dates and also
9
11
  # reformats them according to the encoding.
10
12
  class Imprint
11
- # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
12
- def initialize(originInfo_ng_nodeset)
13
- @originInfo_ng_nodeset = originInfo_ng_nodeset
14
- end
13
+ attr_reader :element
15
14
 
16
- require 'marc_countries'
15
+ # @param [Nokogiri::XML::Node] an originInfo node
16
+ def initialize(element)
17
+ @element = element
18
+ end
17
19
 
18
- # @return Array<String> each String is an imprint statement from a single originInfo element
19
20
  def imprint_statements
20
- results = []
21
- @originInfo_ng_nodeset.each do |origin_info_node|
22
- edition = edition_vals_str(origin_info_node)
23
- place = place_vals_str(origin_info_node)
24
- publisher = publisher_vals_str(origin_info_node)
25
- dates = date_str(origin_info_node)
26
-
27
- place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
28
- edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
29
- ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
30
-
31
- results << ed_place_pub_dates unless ed_place_pub_dates.empty?
32
- end
33
- results
21
+ display_str
34
22
  end
35
23
 
24
+ # @return <String> an imprint statement from a single originInfo element
36
25
  def display_str
37
- imprint_statements.join('; ') if imprint_statements.present?
38
- end
26
+ edition = edition_vals_str
27
+ place = place_vals_str
28
+ publisher = publisher_vals_str
29
+ dates = date_str
39
30
 
40
- # @return Array<Integer> an array of publication years for the resource
41
- def publication_date_for_slider
42
- @originInfo_ng_nodeset.map do |origin_info_node|
43
- date_elements = if origin_info_node.as_object.first.key_dates.any?
44
- origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
45
- else
46
- date_field_keys.map do |date_field|
47
- next unless origin_info_node.respond_to?(date_field)
48
-
49
- date_elements = origin_info_node.send(date_field)
50
- date_elements.map(&:as_object).map(&:first) if date_elements.any?
51
- end.compact.first
52
- end
53
-
54
- if date_elements.nil? || date_elements.none?
55
- []
56
- elsif date_elements.find(&:start?) &&
57
- date_elements.find(&:start?).as_range &&
58
- date_elements.find(&:end?) &&
59
- date_elements.find(&:end?).as_range
60
- start_date = date_elements.find(&:start?)
61
- end_date = date_elements.find(&:end?)
62
-
63
- (start_date.as_range.min.year..end_date.as_range.max.year).to_a
64
- elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
65
- start_date = date_elements.find(&:start?)
66
-
67
- (start_date.as_range.min.year..Time.now.year).to_a
68
- elsif date_elements.one?
69
- date_elements.first.to_a.map(&:year)
70
- else
71
- date_elements.map { |v| v.to_a.map(&:year) }.flatten
72
- end
73
- end.flatten
31
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
32
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
33
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
34
+
35
+ ed_place_pub_dates
74
36
  end
75
37
 
76
38
  private
77
39
 
78
- def extract_year(el)
79
- DateParsing.year_int_from_date_str(el.text)
80
- end
81
-
82
40
  def compact_and_join_with_delimiter(values, delimiter)
83
41
  compact_values = values.compact.reject { |v| v.strip.empty? }
84
42
  return compact_values.join(delimiter) if compact_values.length == 1 ||
@@ -98,16 +56,16 @@ module Stanford
98
56
  value.strip.end_with?('.', ',', ':', ';')
99
57
  end
100
58
 
101
- def edition_vals_str(origin_info_node)
102
- origin_info_node.edition.reject do |e|
59
+ def edition_vals_str
60
+ element.edition.reject do |e|
103
61
  e.text.strip.empty?
104
62
  end.map(&:text).join(' ').strip
105
63
  end
106
64
 
107
- def publisher_vals_str(origin_info_node)
108
- return if origin_info_node.publisher.text.strip.empty?
65
+ def publisher_vals_str
66
+ return if element.publisher.text.strip.empty?
109
67
 
110
- publishers = origin_info_node.publisher.reject do |p|
68
+ publishers = element.publisher.reject do |p|
111
69
  p.text.strip.empty?
112
70
  end.map(&:text)
113
71
  compact_and_join_with_delimiter(publishers, ' : ')
@@ -115,10 +73,10 @@ module Stanford
115
73
 
116
74
  # PLACE processing methods ------
117
75
 
118
- def place_vals_str(origin_info_node)
119
- return if origin_info_node.place.text.strip.empty?
76
+ def place_vals_str
77
+ return if element.place.text.strip.empty?
120
78
 
121
- places = place_terms(origin_info_node).reject do |p|
79
+ places = place_terms.reject do |p|
122
80
  p.text.strip.empty?
123
81
  end.map(&:text)
124
82
  compact_and_join_with_delimiter(places, ' : ')
@@ -131,25 +89,26 @@ module Stanford
131
89
  end
132
90
  end
133
91
 
134
- def place_terms(origin_info_element)
135
- return [] unless origin_info_element.respond_to?(:place) &&
136
- origin_info_element.place.respond_to?(:placeTerm)
92
+ def place_terms
93
+ return [] unless element.respond_to?(:place) &&
94
+ element.place.respond_to?(:placeTerm)
137
95
 
138
- if unencoded_place_terms?(origin_info_element)
139
- origin_info_element.place.placeTerm.select do |term|
96
+ if unencoded_place_terms?(element)
97
+ element.place.placeTerm.select do |term|
140
98
  !term.attributes['type'].respond_to?(:value) ||
141
99
  term.attributes['type'].value == 'text'
142
100
  end.compact
143
101
  else
144
- origin_info_element.place.placeTerm.map do |term|
102
+ element.place.placeTerm.map do |term|
145
103
  next unless term.attributes['type'].respond_to?(:value) &&
146
104
  term.attributes['type'].value == 'code' &&
147
105
  term.attributes['authority'].respond_to?(:value) &&
148
106
  term.attributes['authority'].value == 'marccountry' &&
149
- MARC_COUNTRIES.include?(term.text.strip)
107
+ !['xx', 'vp'].include?(term.text.strip) &&
108
+ MARC_COUNTRY.include?(term.text.strip)
150
109
 
151
110
  term = term.clone
152
- term.content = MARC_COUNTRIES[term.text.strip]
111
+ term.content = MARC_COUNTRY[term.text.strip]
153
112
  term
154
113
  end.compact
155
114
  end
@@ -157,308 +116,173 @@ module Stanford
157
116
 
158
117
  # DATE processing methods ------
159
118
 
160
- def date_str(origin_info_node)
161
- date_vals = origin_info_date_vals(origin_info_node)
119
+ def date_str
120
+ date_vals = origin_info_date_vals
162
121
  return if date_vals.empty?
163
-
164
122
  date_vals.map(&:strip).join(' ')
165
123
  end
166
124
 
167
- def origin_info_date_vals(origin_info_node)
125
+ def origin_info_date_vals
168
126
  date_field_keys.map do |date_field|
169
- next unless origin_info_node.respond_to?(date_field)
127
+ next unless element.respond_to?(date_field)
170
128
 
171
- date_elements = origin_info_node.send(date_field)
172
- date_elements_display_vals(date_elements) if date_elements.present?
129
+ date_elements = element.send(date_field)
130
+ parse_dates(date_elements) if date_elements.present?
173
131
  end.compact.flatten
174
132
  end
175
133
 
176
- def date_elements_display_vals(ng_date_elements)
177
- apply_date_qualifier_decoration(
178
- dedup_dates(
179
- join_date_ranges(
180
- process_decade_century_dates(
181
- process_bc_ad_dates(
182
- process_encoded_dates(ignore_bad_dates(ng_date_elements))
183
- )
184
- )
185
- )
186
- )
187
- )
188
- end
189
-
190
134
  def date_field_keys
191
135
  [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
192
136
  end
193
137
 
194
- def ignore_bad_dates(ng_date_elements)
195
- ng_date_elements.select do |ng_date_element|
196
- val = ng_date_element.text.strip
197
- val != '9999' && val != '0000-00-00' && val != 'uuuu'
198
- end
199
- end
138
+ class DateValue
139
+ attr_reader :value
140
+ delegate :text, :date, :point, :qualifier, :encoding, to: :value
200
141
 
201
- def process_encoded_dates(ng_date_elements)
202
- ng_date_elements.map do |ng_date_element|
203
- if date_is_w3cdtf?(ng_date_element)
204
- process_w3cdtf_date(ng_date_element)
205
- elsif date_is_iso8601?(ng_date_element)
206
- process_iso8601_date(ng_date_element)
207
- else
208
- ng_date_element
209
- end
142
+ def initialize(value)
143
+ @value = value
210
144
  end
211
- end
212
145
 
213
- # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
214
- # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
215
- # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
216
- # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
217
- def process_bc_ad_dates(ng_date_elements)
218
- ng_date_elements.map do |ng_date_element|
219
- case
220
- when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
221
- ng_date_element.content = "1 B.C."
222
- when date_is_bc_edtf?(ng_date_element)
223
- year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
224
- ng_date_element.content = "#{year} B.C."
225
- when date_is_ad?(ng_date_element)
226
- ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
227
- end
228
- ng_date_element
146
+ # True if the element text isn't blank or the placeholder "9999".
147
+ def valid?
148
+ text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
229
149
  end
230
- end
231
150
 
232
- def process_decade_century_dates(ng_date_elements)
233
- ng_date_elements.map do |ng_date_element|
234
- if date_is_decade?(ng_date_element)
235
- process_decade_date(ng_date_element)
236
- elsif date_is_century?(ng_date_element)
237
- process_century_date(ng_date_element)
238
- else
239
- ng_date_element
151
+ # Element text reduced to digits and hyphen. Captures date ranges and
152
+ # negative (B.C.) dates. Used for comparison/deduping.
153
+ def base_value
154
+ if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
155
+ return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
240
156
  end
157
+
158
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
241
159
  end
242
- end
243
160
 
244
- def join_date_ranges(ng_date_elements)
245
- if dates_are_range?(ng_date_elements)
246
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
247
- end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
248
- ng_date_elements.map do |date|
249
- date = date.clone # clone the date object so we don't append the same one
250
- if normalize_date(date.text) == normalize_date(start_date.text)
251
- date.content = [start_date.text, end_date.text].join(' - ')
252
- date
253
- elsif normalize_date(date.text) != normalize_date(end_date.text)
254
- date
255
- end
256
- end.compact
257
- elsif dates_are_open_range?(ng_date_elements)
258
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
259
- ng_date_elements.map do |date|
260
- date = date.clone # clone the date object so we don't append the same one
261
- date.content = "#{start_date.text}-" if date.text == start_date.text
262
- date
161
+ # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
+ def decoded_value
163
+ return text.strip unless date
164
+
165
+ unless encoding.present?
166
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
263
167
  end
264
- else
265
- ng_date_elements
266
- end
267
- end
268
168
 
269
- def dedup_dates(ng_date_elements)
270
- date_text = ng_date_elements.map { |d| normalize_date(d.text) }
271
- if date_text != date_text.uniq
272
- if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
273
- [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
274
- elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
275
- [ng_date_elements.find { |d| !d.attributes['encoding'] }]
169
+ # Delegate to the appropriate decoding method, if any
170
+ case value.precision
171
+ when :day
172
+ date.strftime('%B %e, %Y')
173
+ when :month
174
+ date.strftime('%B %Y')
175
+ when :year
176
+ year = date.year
177
+ if year < 1
178
+ "#{year.abs + 1} B.C."
179
+ # Any dates before the year 1000 are explicitly marked A.D.
180
+ elsif year > 1 && year < 1000
181
+ "#{year} A.D."
182
+ else
183
+ year.to_s
184
+ end
185
+ when :century
186
+ return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
187
+ when :decade
188
+ return "#{date.year}s"
276
189
  else
277
- [ng_date_elements.first]
190
+ text.strip
278
191
  end
279
- else
280
- ng_date_elements
281
192
  end
282
- end
283
193
 
284
- def apply_date_qualifier_decoration(ng_date_elements)
285
- return_fields = ng_date_elements.map do |date|
286
- date = date.clone
287
- if date_is_approximate?(date)
288
- date.content = "[ca. #{date.text}]"
289
- elsif date_is_questionable?(date)
290
- date.content = "[#{date.text}?]"
291
- elsif date_is_inferred?(date)
292
- date.content = "[#{date.text}]"
293
- end
294
- date
295
- end
296
- return_fields.map(&:text)
297
- end
194
+ # Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
195
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
196
+ def qualified_value
197
+ date = decoded_value
298
198
 
299
- def date_is_approximate?(ng_date_element)
300
- ng_date_element.attributes['qualifier'] &&
301
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
302
- ng_date_element.attributes['qualifier'].value == 'approximate'
303
- end
304
-
305
- def date_is_questionable?(ng_date_element)
306
- ng_date_element.attributes['qualifier'] &&
307
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
308
- ng_date_element.attributes['qualifier'].value == 'questionable'
309
- end
310
-
311
- def date_is_inferred?(ng_date_element)
312
- ng_date_element.attributes['qualifier'] &&
313
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
314
- ng_date_element.attributes['qualifier'].value == 'inferred'
315
- end
199
+ return "[ca. #{date}]" if qualifier == 'approximate'
200
+ return "[#{date}?]" if qualifier == 'questionable'
201
+ return "[#{date}]" if qualifier == 'inferred'
316
202
 
317
- def dates_are_open_range?(ng_date_elements)
318
- ng_date_elements.any? do |element|
319
- element.attributes['point'] &&
320
- element.attributes['point'].respond_to?(:value) &&
321
- element.attributes['point'].value == 'start'
322
- end && !ng_date_elements.any? do |element|
323
- element.attributes['point'] &&
324
- element.attributes['point'].respond_to?(:value) &&
325
- element.attributes['point'].value == 'end'
203
+ date
326
204
  end
327
205
  end
328
206
 
329
- def dates_are_range?(ng_date_elements)
330
- attributes = ng_date_elements.map do |date|
331
- if date.attributes['point'].respond_to?(:value)
332
- date.attributes['point'].value
333
- end
207
+ class DateRange
208
+ def initialize(start: nil, stop: nil)
209
+ @start = start
210
+ @stop = stop
334
211
  end
335
- attributes.include?('start') &&
336
- attributes.include?('end')
337
- end
338
212
 
339
- def process_w3cdtf_date(ng_date_element)
340
- ng_date_element = ng_date_element.clone
341
- ng_date_element.content = begin
342
- if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
343
- Date.parse(ng_date_element.text).strftime(full_date_format)
344
- elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
345
- Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
346
- else
347
- ng_date_element.content
348
- end
349
- rescue
350
- ng_date_element.content
213
+ # Base value as hyphen-joined string. Used for comparison/deduping.
214
+ def base_value
215
+ "#{@start&.base_value}-#{@stop&.base_value}"
351
216
  end
352
- ng_date_element
353
- end
354
217
 
355
- def process_iso8601_date(ng_date_element)
356
- ng_date_element = ng_date_element.clone
357
- ng_date_element.content = begin
358
- if ng_date_element.text.strip =~ /^\d{8,}$/
359
- Date.parse(ng_date_element.text).strftime(full_date_format)
360
- else
361
- ng_date_element.content
362
- end
363
- rescue
364
- ng_date_element.content
218
+ # Base values as array. Used for comparison/deduping of individual dates.
219
+ def base_values
220
+ [@start&.base_value, @stop&.base_value].compact
365
221
  end
366
- ng_date_element
367
- end
368
222
 
369
- DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
370
-
371
- # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
372
- def process_decade_date(ng_date_element)
373
- my_ng_date_element = ng_date_element.clone
374
- my_ng_date_element.content = begin
375
- orig_date_str = ng_date_element.text.strip
376
- # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
377
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
378
- if decade_matches
379
- decade_str = decade_matches[2]
380
- changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
381
- zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
382
- new_decade_str = "#{zeroth_year}s" if zeroth_year
383
- my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
384
- else
385
- my_ng_date_element.content
386
- end
387
- rescue
388
- my_ng_date_element.content
223
+ # The encoding value for the start of the range, or stop if not present.
224
+ def encoding
225
+ @start&.encoding || @stop&.encoding
389
226
  end
390
- my_ng_date_element
391
- end
392
227
 
393
- CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
394
-
395
- # strings like 18uu, 18-- become '19th century' in the ng_date_element content
396
- def process_century_date(ng_date_element)
397
- my_ng_date_element = ng_date_element.clone
398
- my_ng_date_element.content = begin
399
- orig_date_str = ng_date_element.text.strip
400
- # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
401
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
402
- if century_matches
403
- require 'active_support/core_ext/integer/inflections'
404
- new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
405
- my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
228
+ # Decoded dates with "B.C." or "A.D." and qualifier markers applied to
229
+ # the entire range, or individually if dates differ.
230
+ def qualified_value
231
+ if @start&.qualifier == @stop&.qualifier
232
+ qualifier = @start&.qualifier || @stop&.qualifier
233
+ date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
234
+ return "[ca. #{date}]" if qualifier == 'approximate'
235
+ return "[#{date}?]" if qualifier == 'questionable'
236
+ return "[#{date}]" if qualifier == 'inferred'
237
+
238
+ date
406
239
  else
407
- my_ng_date_element.content
240
+ "#{@start&.qualified_value} - #{@stop&.qualified_value}"
408
241
  end
409
- rescue
410
- my_ng_date_element.content
411
242
  end
412
- my_ng_date_element
413
- end
414
-
415
- def field_is_encoded?(ng_element, encoding)
416
- ng_element.attributes['encoding'] &&
417
- ng_element.attributes['encoding'].respond_to?(:value) &&
418
- ng_element.attributes['encoding'].value.downcase == encoding
419
- end
420
-
421
- def date_is_bc_edtf?(ng_date_element)
422
- ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
423
243
  end
424
244
 
425
- def date_is_ad?(ng_date_element)
426
- str = ng_date_element.text.strip.gsub(/^0*/, '')
427
- str.present? && str.length < 4 && !str.match('A.D.')
428
- end
245
+ def parse_dates(elements)
246
+ # convert to DateValue objects and keep only valid ones
247
+ dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
248
+ # join any date ranges into DateRange objects
249
+ point, nonpoint = dates.partition(&:point)
250
+ if point.any?
251
+ range = DateRange.new(start: point.find { |date| date.point == 'start' },
252
+ stop: point.find { |date| date.point == 'end' })
253
+ nonpoint.unshift(range)
254
+ end
255
+ dates = nonpoint
429
256
 
430
- def date_is_edtf?(ng_date_element)
431
- field_is_encoded?(ng_date_element, 'edtf')
432
- end
257
+ # ensure dates are unique with respect to their base values
258
+ dates = dates.group_by(&:base_value).map do |_value, group|
259
+ next group.first if group.one?
433
260
 
434
- def date_is_w3cdtf?(ng_date_element)
435
- field_is_encoded?(ng_date_element, 'w3cdtf')
436
- end
261
+ # if one of the duplicates wasn't encoded, use that one. see:
262
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
263
+ if group.reject(&:encoding).any?
264
+ group.reject(&:encoding).first
437
265
 
438
- def date_is_iso8601?(ng_date_element)
439
- field_is_encoded?(ng_date_element, 'iso8601')
440
- end
266
+ # otherwise just randomly pick the first in the group
267
+ else
268
+ group.last
269
+ end
270
+ end
441
271
 
442
- # @return true if decade string needs tweaking for display
443
- def date_is_decade?(ng_date_element)
444
- ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
445
- end
272
+ # compare the remaining dates against one part of the other of a range
273
+ date_ranges = dates.select { |date| date.is_a?(DateRange) }
446
274
 
447
- # @return true if century string needs tweaking for display
448
- def date_is_century?(ng_date_element)
449
- ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
450
- end
275
+ # remove any range that duplicates an unencoded date that includes that range
276
+ duplicated_ranges = dates.flat_map do |date|
277
+ next if date.is_a?(DateRange) || date.encoding.present?
451
278
 
452
- def full_date_format(full_date_format = '%B %-d, %Y')
453
- @full_date_format ||= full_date_format
454
- end
279
+ date_ranges.select { |r| r.base_values.include?(date.base_value) }
280
+ end
455
281
 
456
- def short_date_format(short_date_format = '%B %Y')
457
- @short_date_format ||= short_date_format
458
- end
282
+ dates = dates - duplicated_ranges
459
283
 
460
- def normalize_date(date_str)
461
- date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
284
+ # output formatted dates with qualifiers, A.D./B.C., etc.
285
+ dates.map(&:qualified_value)
462
286
  end
463
287
  end
464
288
  end
@@ -0,0 +1,20 @@
1
+ # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
2
+ module Stanford
3
+ module Mods
4
+ class Record < ::Mods::Record
5
+ include Stanford::Mods::Geospatial
6
+ include Stanford::Mods::Name
7
+ include Stanford::Mods::OriginInfo
8
+ include Stanford::Mods::PhysicalLocation
9
+ include Stanford::Mods::SearchworksSubjects
10
+ include Stanford::Mods::Searchworks
11
+ include Stanford::Mods::Title
12
+
13
+ attr_writer :druid
14
+
15
+ def druid
16
+ @druid || 'Unknown item'
17
+ end
18
+ end # Record class
19
+ end # Mods module
20
+ end # Stanford module
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.6.2'.freeze
4
+ VERSION = '3.0.0'.freeze
5
5
  end
6
6
  end
@@ -1,6 +1,7 @@
1
1
  # Language Values used by SearchWorks
2
2
  # From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
3
3
  # code 'ase' from iso639-3
4
+ # code egy-Egyd from rfc5646
4
5
  SEARCHWORKS_LANGUAGES = {
5
6
  'aaa' => 'Afar',
6
7
  'abk' => 'Abkhaz',
@@ -125,6 +126,7 @@ SEARCHWORKS_LANGUAGES = {
125
126
  'dzo' => 'Dzongkha',
126
127
  'efi' => 'Efik',
127
128
  'egy' => 'Egyptian',
129
+ 'egy-Egyd' => 'Egyptian, Demotic',
128
130
  'eka' => 'Ekajuk',
129
131
  'elx' => 'Elamite',
130
132
  'eng' => 'English',