stanford-mods 2.6.2 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +24 -0
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +113 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +87 -0
  10. data/lib/stanford-mods/coordinate.rb +21 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -288
  12. data/lib/stanford-mods/imprint.rb +149 -325
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +2 -0
  16. data/lib/stanford-mods.rb +13 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +238 -207
  22. data/spec/name_spec.rb +28 -232
  23. data/spec/origin_info_spec.rb +34 -300
  24. data/spec/searchworks_basic_spec.rb +1 -3
  25. data/spec/searchworks_pub_dates_spec.rb +0 -215
  26. data/spec/searchworks_spec.rb +0 -21
  27. data/spec/searchworks_subject_raw_spec.rb +106 -105
  28. data/spec/searchworks_subject_spec.rb +19 -55
  29. data/spec/searchworks_title_spec.rb +5 -5
  30. data/stanford-mods.gemspec +1 -1
  31. metadata +24 -20
  32. data/.travis.yml +0 -17
  33. data/lib/marc_countries.rb +0 -387
  34. data/lib/stanford-mods/geo_utils.rb +0 -28
  35. data/lib/stanford-mods/name.rb +0 -80
  36. data/lib/stanford-mods/origin_info.rb +0 -489
  37. data/lib/stanford-mods/searchworks.rb +0 -333
  38. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  39. data/spec/date_parsing_spec.rb +0 -905
@@ -1,3 +1,5 @@
1
+ require 'mods/marc_country_codes'
2
+
1
3
  module Stanford
2
4
  module Mods
3
5
  ##
@@ -8,77 +10,33 @@ module Stanford
8
10
  # however, the date_parsing class only does years, and this does finer tuned dates and also
9
11
  # reformats them according to the encoding.
10
12
  class Imprint
11
- # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
12
- def initialize(originInfo_ng_nodeset)
13
- @originInfo_ng_nodeset = originInfo_ng_nodeset
14
- end
13
+ attr_reader :element
15
14
 
16
- require 'marc_countries'
15
+ # @param [Nokogiri::XML::Node] an originInfo node
16
+ def initialize(element)
17
+ @element = element
18
+ end
17
19
 
18
- # @return Array<String> each String is an imprint statement from a single originInfo element
19
20
  def imprint_statements
20
- results = []
21
- @originInfo_ng_nodeset.each do |origin_info_node|
22
- edition = edition_vals_str(origin_info_node)
23
- place = place_vals_str(origin_info_node)
24
- publisher = publisher_vals_str(origin_info_node)
25
- dates = date_str(origin_info_node)
26
-
27
- place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
28
- edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
29
- ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
30
-
31
- results << ed_place_pub_dates unless ed_place_pub_dates.empty?
32
- end
33
- results
21
+ display_str
34
22
  end
35
23
 
24
+ # @return <String> an imprint statement from a single originInfo element
36
25
  def display_str
37
- imprint_statements.join('; ') if imprint_statements.present?
38
- end
26
+ edition = edition_vals_str
27
+ place = place_vals_str
28
+ publisher = publisher_vals_str
29
+ dates = date_str
39
30
 
40
- # @return Array<Integer> an array of publication years for the resource
41
- def publication_date_for_slider
42
- @originInfo_ng_nodeset.map do |origin_info_node|
43
- date_elements = if origin_info_node.as_object.first.key_dates.any?
44
- origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
45
- else
46
- date_field_keys.map do |date_field|
47
- next unless origin_info_node.respond_to?(date_field)
48
-
49
- date_elements = origin_info_node.send(date_field)
50
- date_elements.map(&:as_object).map(&:first) if date_elements.any?
51
- end.compact.first
52
- end
53
-
54
- if date_elements.nil? || date_elements.none?
55
- []
56
- elsif date_elements.find(&:start?) &&
57
- date_elements.find(&:start?).as_range &&
58
- date_elements.find(&:end?) &&
59
- date_elements.find(&:end?).as_range
60
- start_date = date_elements.find(&:start?)
61
- end_date = date_elements.find(&:end?)
62
-
63
- (start_date.as_range.min.year..end_date.as_range.max.year).to_a
64
- elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
65
- start_date = date_elements.find(&:start?)
66
-
67
- (start_date.as_range.min.year..Time.now.year).to_a
68
- elsif date_elements.one?
69
- date_elements.first.to_a.map(&:year)
70
- else
71
- date_elements.map { |v| v.to_a.map(&:year) }.flatten
72
- end
73
- end.flatten
31
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
32
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
33
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
34
+
35
+ ed_place_pub_dates
74
36
  end
75
37
 
76
38
  private
77
39
 
78
- def extract_year(el)
79
- DateParsing.year_int_from_date_str(el.text)
80
- end
81
-
82
40
  def compact_and_join_with_delimiter(values, delimiter)
83
41
  compact_values = values.compact.reject { |v| v.strip.empty? }
84
42
  return compact_values.join(delimiter) if compact_values.length == 1 ||
@@ -98,16 +56,16 @@ module Stanford
98
56
  value.strip.end_with?('.', ',', ':', ';')
99
57
  end
100
58
 
101
- def edition_vals_str(origin_info_node)
102
- origin_info_node.edition.reject do |e|
59
+ def edition_vals_str
60
+ element.edition.reject do |e|
103
61
  e.text.strip.empty?
104
62
  end.map(&:text).join(' ').strip
105
63
  end
106
64
 
107
- def publisher_vals_str(origin_info_node)
108
- return if origin_info_node.publisher.text.strip.empty?
65
+ def publisher_vals_str
66
+ return if element.publisher.text.strip.empty?
109
67
 
110
- publishers = origin_info_node.publisher.reject do |p|
68
+ publishers = element.publisher.reject do |p|
111
69
  p.text.strip.empty?
112
70
  end.map(&:text)
113
71
  compact_and_join_with_delimiter(publishers, ' : ')
@@ -115,10 +73,10 @@ module Stanford
115
73
 
116
74
  # PLACE processing methods ------
117
75
 
118
- def place_vals_str(origin_info_node)
119
- return if origin_info_node.place.text.strip.empty?
76
+ def place_vals_str
77
+ return if element.place.text.strip.empty?
120
78
 
121
- places = place_terms(origin_info_node).reject do |p|
79
+ places = place_terms.reject do |p|
122
80
  p.text.strip.empty?
123
81
  end.map(&:text)
124
82
  compact_and_join_with_delimiter(places, ' : ')
@@ -131,25 +89,26 @@ module Stanford
131
89
  end
132
90
  end
133
91
 
134
- def place_terms(origin_info_element)
135
- return [] unless origin_info_element.respond_to?(:place) &&
136
- origin_info_element.place.respond_to?(:placeTerm)
92
+ def place_terms
93
+ return [] unless element.respond_to?(:place) &&
94
+ element.place.respond_to?(:placeTerm)
137
95
 
138
- if unencoded_place_terms?(origin_info_element)
139
- origin_info_element.place.placeTerm.select do |term|
96
+ if unencoded_place_terms?(element)
97
+ element.place.placeTerm.select do |term|
140
98
  !term.attributes['type'].respond_to?(:value) ||
141
99
  term.attributes['type'].value == 'text'
142
100
  end.compact
143
101
  else
144
- origin_info_element.place.placeTerm.map do |term|
102
+ element.place.placeTerm.map do |term|
145
103
  next unless term.attributes['type'].respond_to?(:value) &&
146
104
  term.attributes['type'].value == 'code' &&
147
105
  term.attributes['authority'].respond_to?(:value) &&
148
106
  term.attributes['authority'].value == 'marccountry' &&
149
- MARC_COUNTRIES.include?(term.text.strip)
107
+ !['xx', 'vp'].include?(term.text.strip) &&
108
+ MARC_COUNTRY.include?(term.text.strip)
150
109
 
151
110
  term = term.clone
152
- term.content = MARC_COUNTRIES[term.text.strip]
111
+ term.content = MARC_COUNTRY[term.text.strip]
153
112
  term
154
113
  end.compact
155
114
  end
@@ -157,308 +116,173 @@ module Stanford
157
116
 
158
117
  # DATE processing methods ------
159
118
 
160
- def date_str(origin_info_node)
161
- date_vals = origin_info_date_vals(origin_info_node)
119
+ def date_str
120
+ date_vals = origin_info_date_vals
162
121
  return if date_vals.empty?
163
-
164
122
  date_vals.map(&:strip).join(' ')
165
123
  end
166
124
 
167
- def origin_info_date_vals(origin_info_node)
125
+ def origin_info_date_vals
168
126
  date_field_keys.map do |date_field|
169
- next unless origin_info_node.respond_to?(date_field)
127
+ next unless element.respond_to?(date_field)
170
128
 
171
- date_elements = origin_info_node.send(date_field)
172
- date_elements_display_vals(date_elements) if date_elements.present?
129
+ date_elements = element.send(date_field)
130
+ parse_dates(date_elements) if date_elements.present?
173
131
  end.compact.flatten
174
132
  end
175
133
 
176
- def date_elements_display_vals(ng_date_elements)
177
- apply_date_qualifier_decoration(
178
- dedup_dates(
179
- join_date_ranges(
180
- process_decade_century_dates(
181
- process_bc_ad_dates(
182
- process_encoded_dates(ignore_bad_dates(ng_date_elements))
183
- )
184
- )
185
- )
186
- )
187
- )
188
- end
189
-
190
134
  def date_field_keys
191
135
  [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
192
136
  end
193
137
 
194
- def ignore_bad_dates(ng_date_elements)
195
- ng_date_elements.select do |ng_date_element|
196
- val = ng_date_element.text.strip
197
- val != '9999' && val != '0000-00-00' && val != 'uuuu'
198
- end
199
- end
138
+ class DateValue
139
+ attr_reader :value
140
+ delegate :text, :date, :point, :qualifier, :encoding, to: :value
200
141
 
201
- def process_encoded_dates(ng_date_elements)
202
- ng_date_elements.map do |ng_date_element|
203
- if date_is_w3cdtf?(ng_date_element)
204
- process_w3cdtf_date(ng_date_element)
205
- elsif date_is_iso8601?(ng_date_element)
206
- process_iso8601_date(ng_date_element)
207
- else
208
- ng_date_element
209
- end
142
+ def initialize(value)
143
+ @value = value
210
144
  end
211
- end
212
145
 
213
- # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
214
- # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
215
- # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
216
- # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
217
- def process_bc_ad_dates(ng_date_elements)
218
- ng_date_elements.map do |ng_date_element|
219
- case
220
- when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
221
- ng_date_element.content = "1 B.C."
222
- when date_is_bc_edtf?(ng_date_element)
223
- year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
224
- ng_date_element.content = "#{year} B.C."
225
- when date_is_ad?(ng_date_element)
226
- ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
227
- end
228
- ng_date_element
146
+ # True if the element text isn't blank or the placeholder "9999".
147
+ def valid?
148
+ text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
229
149
  end
230
- end
231
150
 
232
- def process_decade_century_dates(ng_date_elements)
233
- ng_date_elements.map do |ng_date_element|
234
- if date_is_decade?(ng_date_element)
235
- process_decade_date(ng_date_element)
236
- elsif date_is_century?(ng_date_element)
237
- process_century_date(ng_date_element)
238
- else
239
- ng_date_element
151
+ # Element text reduced to digits and hyphen. Captures date ranges and
152
+ # negative (B.C.) dates. Used for comparison/deduping.
153
+ def base_value
154
+ if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
155
+ return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
240
156
  end
157
+
158
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
241
159
  end
242
- end
243
160
 
244
- def join_date_ranges(ng_date_elements)
245
- if dates_are_range?(ng_date_elements)
246
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
247
- end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
248
- ng_date_elements.map do |date|
249
- date = date.clone # clone the date object so we don't append the same one
250
- if normalize_date(date.text) == normalize_date(start_date.text)
251
- date.content = [start_date.text, end_date.text].join(' - ')
252
- date
253
- elsif normalize_date(date.text) != normalize_date(end_date.text)
254
- date
255
- end
256
- end.compact
257
- elsif dates_are_open_range?(ng_date_elements)
258
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
259
- ng_date_elements.map do |date|
260
- date = date.clone # clone the date object so we don't append the same one
261
- date.content = "#{start_date.text}-" if date.text == start_date.text
262
- date
161
+ # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
+ def decoded_value
163
+ return text.strip unless date
164
+
165
+ unless encoding.present?
166
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
263
167
  end
264
- else
265
- ng_date_elements
266
- end
267
- end
268
168
 
269
- def dedup_dates(ng_date_elements)
270
- date_text = ng_date_elements.map { |d| normalize_date(d.text) }
271
- if date_text != date_text.uniq
272
- if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
273
- [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
274
- elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
275
- [ng_date_elements.find { |d| !d.attributes['encoding'] }]
169
+ # Delegate to the appropriate decoding method, if any
170
+ case value.precision
171
+ when :day
172
+ date.strftime('%B %e, %Y')
173
+ when :month
174
+ date.strftime('%B %Y')
175
+ when :year
176
+ year = date.year
177
+ if year < 1
178
+ "#{year.abs + 1} B.C."
179
+ # Any dates before the year 1000 are explicitly marked A.D.
180
+ elsif year > 1 && year < 1000
181
+ "#{year} A.D."
182
+ else
183
+ year.to_s
184
+ end
185
+ when :century
186
+ return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
187
+ when :decade
188
+ return "#{date.year}s"
276
189
  else
277
- [ng_date_elements.first]
190
+ text.strip
278
191
  end
279
- else
280
- ng_date_elements
281
192
  end
282
- end
283
193
 
284
- def apply_date_qualifier_decoration(ng_date_elements)
285
- return_fields = ng_date_elements.map do |date|
286
- date = date.clone
287
- if date_is_approximate?(date)
288
- date.content = "[ca. #{date.text}]"
289
- elsif date_is_questionable?(date)
290
- date.content = "[#{date.text}?]"
291
- elsif date_is_inferred?(date)
292
- date.content = "[#{date.text}]"
293
- end
294
- date
295
- end
296
- return_fields.map(&:text)
297
- end
194
+ # Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
195
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
196
+ def qualified_value
197
+ date = decoded_value
298
198
 
299
- def date_is_approximate?(ng_date_element)
300
- ng_date_element.attributes['qualifier'] &&
301
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
302
- ng_date_element.attributes['qualifier'].value == 'approximate'
303
- end
304
-
305
- def date_is_questionable?(ng_date_element)
306
- ng_date_element.attributes['qualifier'] &&
307
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
308
- ng_date_element.attributes['qualifier'].value == 'questionable'
309
- end
310
-
311
- def date_is_inferred?(ng_date_element)
312
- ng_date_element.attributes['qualifier'] &&
313
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
314
- ng_date_element.attributes['qualifier'].value == 'inferred'
315
- end
199
+ return "[ca. #{date}]" if qualifier == 'approximate'
200
+ return "[#{date}?]" if qualifier == 'questionable'
201
+ return "[#{date}]" if qualifier == 'inferred'
316
202
 
317
- def dates_are_open_range?(ng_date_elements)
318
- ng_date_elements.any? do |element|
319
- element.attributes['point'] &&
320
- element.attributes['point'].respond_to?(:value) &&
321
- element.attributes['point'].value == 'start'
322
- end && !ng_date_elements.any? do |element|
323
- element.attributes['point'] &&
324
- element.attributes['point'].respond_to?(:value) &&
325
- element.attributes['point'].value == 'end'
203
+ date
326
204
  end
327
205
  end
328
206
 
329
- def dates_are_range?(ng_date_elements)
330
- attributes = ng_date_elements.map do |date|
331
- if date.attributes['point'].respond_to?(:value)
332
- date.attributes['point'].value
333
- end
207
+ class DateRange
208
+ def initialize(start: nil, stop: nil)
209
+ @start = start
210
+ @stop = stop
334
211
  end
335
- attributes.include?('start') &&
336
- attributes.include?('end')
337
- end
338
212
 
339
- def process_w3cdtf_date(ng_date_element)
340
- ng_date_element = ng_date_element.clone
341
- ng_date_element.content = begin
342
- if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
343
- Date.parse(ng_date_element.text).strftime(full_date_format)
344
- elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
345
- Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
346
- else
347
- ng_date_element.content
348
- end
349
- rescue
350
- ng_date_element.content
213
+ # Base value as hyphen-joined string. Used for comparison/deduping.
214
+ def base_value
215
+ "#{@start&.base_value}-#{@stop&.base_value}"
351
216
  end
352
- ng_date_element
353
- end
354
217
 
355
- def process_iso8601_date(ng_date_element)
356
- ng_date_element = ng_date_element.clone
357
- ng_date_element.content = begin
358
- if ng_date_element.text.strip =~ /^\d{8,}$/
359
- Date.parse(ng_date_element.text).strftime(full_date_format)
360
- else
361
- ng_date_element.content
362
- end
363
- rescue
364
- ng_date_element.content
218
+ # Base values as array. Used for comparison/deduping of individual dates.
219
+ def base_values
220
+ [@start&.base_value, @stop&.base_value].compact
365
221
  end
366
- ng_date_element
367
- end
368
222
 
369
- DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
370
-
371
- # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
372
- def process_decade_date(ng_date_element)
373
- my_ng_date_element = ng_date_element.clone
374
- my_ng_date_element.content = begin
375
- orig_date_str = ng_date_element.text.strip
376
- # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
377
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
378
- if decade_matches
379
- decade_str = decade_matches[2]
380
- changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
381
- zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
382
- new_decade_str = "#{zeroth_year}s" if zeroth_year
383
- my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
384
- else
385
- my_ng_date_element.content
386
- end
387
- rescue
388
- my_ng_date_element.content
223
+ # The encoding value for the start of the range, or stop if not present.
224
+ def encoding
225
+ @start&.encoding || @stop&.encoding
389
226
  end
390
- my_ng_date_element
391
- end
392
227
 
393
- CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
394
-
395
- # strings like 18uu, 18-- become '19th century' in the ng_date_element content
396
- def process_century_date(ng_date_element)
397
- my_ng_date_element = ng_date_element.clone
398
- my_ng_date_element.content = begin
399
- orig_date_str = ng_date_element.text.strip
400
- # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
401
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
402
- if century_matches
403
- require 'active_support/core_ext/integer/inflections'
404
- new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
405
- my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
228
+ # Decoded dates with "B.C." or "A.D." and qualifier markers applied to
229
+ # the entire range, or individually if dates differ.
230
+ def qualified_value
231
+ if @start&.qualifier == @stop&.qualifier
232
+ qualifier = @start&.qualifier || @stop&.qualifier
233
+ date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
234
+ return "[ca. #{date}]" if qualifier == 'approximate'
235
+ return "[#{date}?]" if qualifier == 'questionable'
236
+ return "[#{date}]" if qualifier == 'inferred'
237
+
238
+ date
406
239
  else
407
- my_ng_date_element.content
240
+ "#{@start&.qualified_value} - #{@stop&.qualified_value}"
408
241
  end
409
- rescue
410
- my_ng_date_element.content
411
242
  end
412
- my_ng_date_element
413
- end
414
-
415
- def field_is_encoded?(ng_element, encoding)
416
- ng_element.attributes['encoding'] &&
417
- ng_element.attributes['encoding'].respond_to?(:value) &&
418
- ng_element.attributes['encoding'].value.downcase == encoding
419
- end
420
-
421
- def date_is_bc_edtf?(ng_date_element)
422
- ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
423
243
  end
424
244
 
425
- def date_is_ad?(ng_date_element)
426
- str = ng_date_element.text.strip.gsub(/^0*/, '')
427
- str.present? && str.length < 4 && !str.match('A.D.')
428
- end
245
+ def parse_dates(elements)
246
+ # convert to DateValue objects and keep only valid ones
247
+ dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
248
+ # join any date ranges into DateRange objects
249
+ point, nonpoint = dates.partition(&:point)
250
+ if point.any?
251
+ range = DateRange.new(start: point.find { |date| date.point == 'start' },
252
+ stop: point.find { |date| date.point == 'end' })
253
+ nonpoint.unshift(range)
254
+ end
255
+ dates = nonpoint
429
256
 
430
- def date_is_edtf?(ng_date_element)
431
- field_is_encoded?(ng_date_element, 'edtf')
432
- end
257
+ # ensure dates are unique with respect to their base values
258
+ dates = dates.group_by(&:base_value).map do |_value, group|
259
+ next group.first if group.one?
433
260
 
434
- def date_is_w3cdtf?(ng_date_element)
435
- field_is_encoded?(ng_date_element, 'w3cdtf')
436
- end
261
+ # if one of the duplicates wasn't encoded, use that one. see:
262
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
263
+ if group.reject(&:encoding).any?
264
+ group.reject(&:encoding).first
437
265
 
438
- def date_is_iso8601?(ng_date_element)
439
- field_is_encoded?(ng_date_element, 'iso8601')
440
- end
266
+ # otherwise just randomly pick the first in the group
267
+ else
268
+ group.last
269
+ end
270
+ end
441
271
 
442
- # @return true if decade string needs tweaking for display
443
- def date_is_decade?(ng_date_element)
444
- ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
445
- end
272
+ # compare the remaining dates against one part of the other of a range
273
+ date_ranges = dates.select { |date| date.is_a?(DateRange) }
446
274
 
447
- # @return true if century string needs tweaking for display
448
- def date_is_century?(ng_date_element)
449
- ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
450
- end
275
+ # remove any range that duplicates an unencoded date that includes that range
276
+ duplicated_ranges = dates.flat_map do |date|
277
+ next if date.is_a?(DateRange) || date.encoding.present?
451
278
 
452
- def full_date_format(full_date_format = '%B %-d, %Y')
453
- @full_date_format ||= full_date_format
454
- end
279
+ date_ranges.select { |r| r.base_values.include?(date.base_value) }
280
+ end
455
281
 
456
- def short_date_format(short_date_format = '%B %Y')
457
- @short_date_format ||= short_date_format
458
- end
282
+ dates = dates - duplicated_ranges
459
283
 
460
- def normalize_date(date_str)
461
- date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
284
+ # output formatted dates with qualifiers, A.D./B.C., etc.
285
+ dates.map(&:qualified_value)
462
286
  end
463
287
  end
464
288
  end
@@ -0,0 +1,20 @@
1
+ # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
2
+ module Stanford
3
+ module Mods
4
+ class Record < ::Mods::Record
5
+ include Stanford::Mods::Geospatial
6
+ include Stanford::Mods::Name
7
+ include Stanford::Mods::OriginInfo
8
+ include Stanford::Mods::PhysicalLocation
9
+ include Stanford::Mods::SearchworksSubjects
10
+ include Stanford::Mods::Searchworks
11
+ include Stanford::Mods::Title
12
+
13
+ attr_writer :druid
14
+
15
+ def druid
16
+ @druid || 'Unknown item'
17
+ end
18
+ end # Record class
19
+ end # Mods module
20
+ end # Stanford module
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.6.2'.freeze
4
+ VERSION = '3.0.0'.freeze
5
5
  end
6
6
  end
@@ -1,6 +1,7 @@
1
1
  # Language Values used by SearchWorks
2
2
  # From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
3
3
  # code 'ase' from iso639-3
4
+ # code egy-Egyd from rfc5646
4
5
  SEARCHWORKS_LANGUAGES = {
5
6
  'aaa' => 'Afar',
6
7
  'abk' => 'Abkhaz',
@@ -125,6 +126,7 @@ SEARCHWORKS_LANGUAGES = {
125
126
  'dzo' => 'Dzongkha',
126
127
  'efi' => 'Efik',
127
128
  'egy' => 'Egyptian',
129
+ 'egy-Egyd' => 'Egyptian, Demotic',
128
130
  'eka' => 'Ekajuk',
129
131
  'elx' => 'Elamite',
130
132
  'eng' => 'English',