stanford-mods 2.6.4 → 3.0.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +1 -1
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +109 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +79 -0
  10. data/lib/stanford-mods/coordinate.rb +21 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -289
  12. data/lib/stanford-mods/imprint.rb +148 -325
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
  16. data/lib/stanford-mods.rb +12 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +238 -207
  22. data/spec/name_spec.rb +26 -230
  23. data/spec/origin_info_spec.rb +34 -300
  24. data/spec/searchworks_basic_spec.rb +1 -3
  25. data/spec/searchworks_pub_dates_spec.rb +0 -215
  26. data/spec/searchworks_spec.rb +0 -21
  27. data/spec/searchworks_subject_raw_spec.rb +106 -105
  28. data/spec/searchworks_subject_spec.rb +19 -55
  29. data/spec/searchworks_title_spec.rb +1 -1
  30. data/stanford-mods.gemspec +1 -1
  31. metadata +21 -17
  32. data/lib/marc_countries.rb +0 -387
  33. data/lib/stanford-mods/geo_utils.rb +0 -28
  34. data/lib/stanford-mods/name.rb +0 -80
  35. data/lib/stanford-mods/origin_info.rb +0 -489
  36. data/lib/stanford-mods/searchworks.rb +0 -333
  37. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  38. data/spec/date_parsing_spec.rb +0 -905
@@ -1,4 +1,4 @@
1
- require 'active_support/core_ext/integer/inflections'
1
+ require 'mods/marc_country_codes'
2
2
 
3
3
  module Stanford
4
4
  module Mods
@@ -10,77 +10,33 @@ module Stanford
10
10
  # however, the date_parsing class only does years, and this does finer tuned dates and also
11
11
  # reformats them according to the encoding.
12
12
  class Imprint
13
- # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
14
- def initialize(originInfo_ng_nodeset)
15
- @originInfo_ng_nodeset = originInfo_ng_nodeset
16
- end
13
+ attr_reader :element
17
14
 
18
- require 'marc_countries'
15
+ # @param [Nokogiri::XML::Node] an originInfo node
16
+ def initialize(element)
17
+ @element = element
18
+ end
19
19
 
20
- # @return Array<String> each String is an imprint statement from a single originInfo element
21
20
  def imprint_statements
22
- results = []
23
- @originInfo_ng_nodeset.each do |origin_info_node|
24
- edition = edition_vals_str(origin_info_node)
25
- place = place_vals_str(origin_info_node)
26
- publisher = publisher_vals_str(origin_info_node)
27
- dates = date_str(origin_info_node)
28
-
29
- place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
30
- edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
31
- ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
32
-
33
- results << ed_place_pub_dates unless ed_place_pub_dates.empty?
34
- end
35
- results
21
+ display_str
36
22
  end
37
23
 
24
+ # @return <String> an imprint statement from a single originInfo element
38
25
  def display_str
39
- imprint_statements.join('; ') if imprint_statements.present?
40
- end
26
+ edition = edition_vals_str
27
+ place = place_vals_str
28
+ publisher = publisher_vals_str
29
+ dates = date_str
41
30
 
42
- # @return Array<Integer> an array of publication years for the resource
43
- def publication_date_for_slider
44
- @originInfo_ng_nodeset.map do |origin_info_node|
45
- date_elements = if origin_info_node.as_object.first.key_dates.any?
46
- origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
47
- else
48
- date_field_keys.map do |date_field|
49
- next unless origin_info_node.respond_to?(date_field)
50
-
51
- date_elements = origin_info_node.send(date_field)
52
- date_elements.map(&:as_object).map(&:first) if date_elements.any?
53
- end.compact.first
54
- end
55
-
56
- if date_elements.nil? || date_elements.none?
57
- []
58
- elsif date_elements.find(&:start?) &&
59
- date_elements.find(&:start?).as_range &&
60
- date_elements.find(&:end?) &&
61
- date_elements.find(&:end?).as_range
62
- start_date = date_elements.find(&:start?)
63
- end_date = date_elements.find(&:end?)
64
-
65
- (start_date.as_range.min.year..end_date.as_range.max.year).to_a
66
- elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
67
- start_date = date_elements.find(&:start?)
68
-
69
- (start_date.as_range.min.year..Time.now.year).to_a
70
- elsif date_elements.one?
71
- date_elements.first.to_a.map(&:year)
72
- else
73
- date_elements.map { |v| v.to_a.map(&:year) }.flatten
74
- end
75
- end.flatten
31
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
32
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
33
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
34
+
35
+ ed_place_pub_dates
76
36
  end
77
37
 
78
38
  private
79
39
 
80
- def extract_year(el)
81
- DateParsing.year_int_from_date_str(el.text)
82
- end
83
-
84
40
  def compact_and_join_with_delimiter(values, delimiter)
85
41
  compact_values = values.compact.reject { |v| v.strip.empty? }
86
42
  return compact_values.join(delimiter) if compact_values.length == 1 ||
@@ -100,16 +56,16 @@ module Stanford
100
56
  value.strip.end_with?('.', ',', ':', ';')
101
57
  end
102
58
 
103
- def edition_vals_str(origin_info_node)
104
- origin_info_node.edition.reject do |e|
59
+ def edition_vals_str
60
+ element.edition.reject do |e|
105
61
  e.text.strip.empty?
106
62
  end.map(&:text).join(' ').strip
107
63
  end
108
64
 
109
- def publisher_vals_str(origin_info_node)
110
- return if origin_info_node.publisher.text.strip.empty?
65
+ def publisher_vals_str
66
+ return if element.publisher.text.strip.empty?
111
67
 
112
- publishers = origin_info_node.publisher.reject do |p|
68
+ publishers = element.publisher.reject do |p|
113
69
  p.text.strip.empty?
114
70
  end.map(&:text)
115
71
  compact_and_join_with_delimiter(publishers, ' : ')
@@ -117,10 +73,10 @@ module Stanford
117
73
 
118
74
  # PLACE processing methods ------
119
75
 
120
- def place_vals_str(origin_info_node)
121
- return if origin_info_node.place.text.strip.empty?
76
+ def place_vals_str
77
+ return if element.place.text.strip.empty?
122
78
 
123
- places = place_terms(origin_info_node).reject do |p|
79
+ places = place_terms.reject do |p|
124
80
  p.text.strip.empty?
125
81
  end.map(&:text)
126
82
  compact_and_join_with_delimiter(places, ' : ')
@@ -133,25 +89,26 @@ module Stanford
133
89
  end
134
90
  end
135
91
 
136
- def place_terms(origin_info_element)
137
- return [] unless origin_info_element.respond_to?(:place) &&
138
- origin_info_element.place.respond_to?(:placeTerm)
92
+ def place_terms
93
+ return [] unless element.respond_to?(:place) &&
94
+ element.place.respond_to?(:placeTerm)
139
95
 
140
- if unencoded_place_terms?(origin_info_element)
141
- origin_info_element.place.placeTerm.select do |term|
96
+ if unencoded_place_terms?(element)
97
+ element.place.placeTerm.select do |term|
142
98
  !term.attributes['type'].respond_to?(:value) ||
143
99
  term.attributes['type'].value == 'text'
144
100
  end.compact
145
101
  else
146
- origin_info_element.place.placeTerm.map do |term|
102
+ element.place.placeTerm.map do |term|
147
103
  next unless term.attributes['type'].respond_to?(:value) &&
148
104
  term.attributes['type'].value == 'code' &&
149
105
  term.attributes['authority'].respond_to?(:value) &&
150
106
  term.attributes['authority'].value == 'marccountry' &&
151
- MARC_COUNTRIES.include?(term.text.strip)
107
+ !['xx', 'vp'].include?(term.text.strip) &&
108
+ MARC_COUNTRY.include?(term.text.strip)
152
109
 
153
110
  term = term.clone
154
- term.content = MARC_COUNTRIES[term.text.strip]
111
+ term.content = MARC_COUNTRY[term.text.strip]
155
112
  term
156
113
  end.compact
157
114
  end
@@ -159,307 +116,173 @@ module Stanford
159
116
 
160
117
  # DATE processing methods ------
161
118
 
162
- def date_str(origin_info_node)
163
- date_vals = origin_info_date_vals(origin_info_node)
119
+ def date_str
120
+ date_vals = origin_info_date_vals
164
121
  return if date_vals.empty?
165
-
166
122
  date_vals.map(&:strip).join(' ')
167
123
  end
168
124
 
169
- def origin_info_date_vals(origin_info_node)
125
+ def origin_info_date_vals
170
126
  date_field_keys.map do |date_field|
171
- next unless origin_info_node.respond_to?(date_field)
127
+ next unless element.respond_to?(date_field)
172
128
 
173
- date_elements = origin_info_node.send(date_field)
174
- date_elements_display_vals(date_elements) if date_elements.present?
129
+ date_elements = element.send(date_field)
130
+ parse_dates(date_elements) if date_elements.present?
175
131
  end.compact.flatten
176
132
  end
177
133
 
178
- def date_elements_display_vals(ng_date_elements)
179
- apply_date_qualifier_decoration(
180
- dedup_dates(
181
- join_date_ranges(
182
- process_decade_century_dates(
183
- process_bc_ad_dates(
184
- process_encoded_dates(ignore_bad_dates(ng_date_elements))
185
- )
186
- )
187
- )
188
- )
189
- )
190
- end
191
-
192
134
  def date_field_keys
193
135
  [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
194
136
  end
195
137
 
196
- def ignore_bad_dates(ng_date_elements)
197
- ng_date_elements.select do |ng_date_element|
198
- val = ng_date_element.text.strip
199
- val != '9999' && val != '0000-00-00' && val != 'uuuu'
200
- end
201
- end
138
+ class DateValue
139
+ attr_reader :value
140
+ delegate :text, :date, :point, :qualifier, :encoding, to: :value
202
141
 
203
- def process_encoded_dates(ng_date_elements)
204
- ng_date_elements.map do |ng_date_element|
205
- if date_is_w3cdtf?(ng_date_element)
206
- process_w3cdtf_date(ng_date_element)
207
- elsif date_is_iso8601?(ng_date_element)
208
- process_iso8601_date(ng_date_element)
209
- else
210
- ng_date_element
211
- end
142
+ def initialize(value)
143
+ @value = value
212
144
  end
213
- end
214
145
 
215
- # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
216
- # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
217
- # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
218
- # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
219
- def process_bc_ad_dates(ng_date_elements)
220
- ng_date_elements.map do |ng_date_element|
221
- case
222
- when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
223
- ng_date_element.content = "1 B.C."
224
- when date_is_bc_edtf?(ng_date_element)
225
- year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
226
- ng_date_element.content = "#{year} B.C."
227
- when date_is_ad?(ng_date_element)
228
- ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
229
- end
230
- ng_date_element
146
+ # True if the element text isn't blank or the placeholder "9999".
147
+ def valid?
148
+ text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
231
149
  end
232
- end
233
150
 
234
- def process_decade_century_dates(ng_date_elements)
235
- ng_date_elements.map do |ng_date_element|
236
- if date_is_decade?(ng_date_element)
237
- process_decade_date(ng_date_element)
238
- elsif date_is_century?(ng_date_element)
239
- process_century_date(ng_date_element)
240
- else
241
- ng_date_element
151
+ # Element text reduced to digits and hyphen. Captures date ranges and
152
+ # negative (B.C.) dates. Used for comparison/deduping.
153
+ def base_value
154
+ if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
155
+ return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
242
156
  end
243
- end
244
- end
245
157
 
246
- def join_date_ranges(ng_date_elements)
247
- if dates_are_range?(ng_date_elements)
248
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
249
- end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
250
- ng_date_elements.map do |date|
251
- date = date.clone # clone the date object so we don't append the same one
252
- if normalize_date(date.text) == normalize_date(start_date.text)
253
- date.content = [start_date.text, end_date.text].join(' - ')
254
- date
255
- elsif normalize_date(date.text) != normalize_date(end_date.text)
256
- date
257
- end
258
- end.compact
259
- elsif dates_are_open_range?(ng_date_elements)
260
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
261
- ng_date_elements.map do |date|
262
- date = date.clone # clone the date object so we don't append the same one
263
- date.content = "#{start_date.text}-" if date.text == start_date.text
264
- date
265
- end
266
- else
267
- ng_date_elements
158
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
268
159
  end
269
- end
270
160
 
271
- def dedup_dates(ng_date_elements)
272
- date_text = ng_date_elements.map { |d| normalize_date(d.text) }
273
- if date_text != date_text.uniq
274
- if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
275
- [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
276
- elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
277
- [ng_date_elements.find { |d| !d.attributes['encoding'] }]
278
- else
279
- [ng_date_elements.first]
161
+ # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
+ def decoded_value
163
+ return text.strip unless date
164
+
165
+ unless encoding.present?
166
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
280
167
  end
281
- else
282
- ng_date_elements
283
- end
284
- end
285
168
 
286
- def apply_date_qualifier_decoration(ng_date_elements)
287
- return_fields = ng_date_elements.map do |date|
288
- date = date.clone
289
- if date_is_approximate?(date)
290
- date.content = "[ca. #{date.text}]"
291
- elsif date_is_questionable?(date)
292
- date.content = "[#{date.text}?]"
293
- elsif date_is_inferred?(date)
294
- date.content = "[#{date.text}]"
169
+ # Delegate to the appropriate decoding method, if any
170
+ case value.precision
171
+ when :day
172
+ date.strftime('%B %e, %Y')
173
+ when :month
174
+ date.strftime('%B %Y')
175
+ when :year
176
+ year = date.year
177
+ if year < 1
178
+ "#{year.abs + 1} B.C."
179
+ # Any dates before the year 1000 are explicitly marked A.D.
180
+ elsif year > 1 && year < 1000
181
+ "#{year} A.D."
182
+ else
183
+ year.to_s
184
+ end
185
+ when :century
186
+ return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
187
+ when :decade
188
+ return "#{date.year}s"
189
+ else
190
+ text.strip
295
191
  end
296
- date
297
192
  end
298
- return_fields.map(&:text)
299
- end
300
193
 
301
- def date_is_approximate?(ng_date_element)
302
- ng_date_element.attributes['qualifier'] &&
303
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
304
- ng_date_element.attributes['qualifier'].value == 'approximate'
305
- end
194
+ # Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
195
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
196
+ def qualified_value
197
+ date = decoded_value
306
198
 
307
- def date_is_questionable?(ng_date_element)
308
- ng_date_element.attributes['qualifier'] &&
309
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
310
- ng_date_element.attributes['qualifier'].value == 'questionable'
311
- end
312
-
313
- def date_is_inferred?(ng_date_element)
314
- ng_date_element.attributes['qualifier'] &&
315
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
316
- ng_date_element.attributes['qualifier'].value == 'inferred'
317
- end
199
+ return "[ca. #{date}]" if qualifier == 'approximate'
200
+ return "[#{date}?]" if qualifier == 'questionable'
201
+ return "[#{date}]" if qualifier == 'inferred'
318
202
 
319
- def dates_are_open_range?(ng_date_elements)
320
- ng_date_elements.any? do |element|
321
- element.attributes['point'] &&
322
- element.attributes['point'].respond_to?(:value) &&
323
- element.attributes['point'].value == 'start'
324
- end && !ng_date_elements.any? do |element|
325
- element.attributes['point'] &&
326
- element.attributes['point'].respond_to?(:value) &&
327
- element.attributes['point'].value == 'end'
203
+ date
328
204
  end
329
205
  end
330
206
 
331
- def dates_are_range?(ng_date_elements)
332
- attributes = ng_date_elements.map do |date|
333
- if date.attributes['point'].respond_to?(:value)
334
- date.attributes['point'].value
335
- end
207
+ class DateRange
208
+ def initialize(start: nil, stop: nil)
209
+ @start = start
210
+ @stop = stop
336
211
  end
337
- attributes.include?('start') &&
338
- attributes.include?('end')
339
- end
340
212
 
341
- def process_w3cdtf_date(ng_date_element)
342
- ng_date_element = ng_date_element.clone
343
- ng_date_element.content = begin
344
- if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
345
- Date.parse(ng_date_element.text).strftime(full_date_format)
346
- elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
347
- Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
348
- else
349
- ng_date_element.content
350
- end
351
- rescue
352
- ng_date_element.content
213
+ # Base value as hyphen-joined string. Used for comparison/deduping.
214
+ def base_value
215
+ "#{@start&.base_value}-#{@stop&.base_value}"
353
216
  end
354
- ng_date_element
355
- end
356
217
 
357
- def process_iso8601_date(ng_date_element)
358
- ng_date_element = ng_date_element.clone
359
- ng_date_element.content = begin
360
- if ng_date_element.text.strip =~ /^\d{8,}$/
361
- Date.parse(ng_date_element.text).strftime(full_date_format)
362
- else
363
- ng_date_element.content
364
- end
365
- rescue
366
- ng_date_element.content
218
+ # Base values as array. Used for comparison/deduping of individual dates.
219
+ def base_values
220
+ [@start&.base_value, @stop&.base_value].compact
367
221
  end
368
- ng_date_element
369
- end
370
222
 
371
- DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
372
-
373
- # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
374
- def process_decade_date(ng_date_element)
375
- my_ng_date_element = ng_date_element.clone
376
- my_ng_date_element.content = begin
377
- orig_date_str = ng_date_element.text.strip
378
- # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
379
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
380
- if decade_matches
381
- decade_str = decade_matches[2]
382
- changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
383
- zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
384
- new_decade_str = "#{zeroth_year}s" if zeroth_year
385
- my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
386
- else
387
- my_ng_date_element.content
388
- end
389
- rescue
390
- my_ng_date_element.content
223
+ # The encoding value for the start of the range, or stop if not present.
224
+ def encoding
225
+ @start&.encoding || @stop&.encoding
391
226
  end
392
- my_ng_date_element
393
- end
394
227
 
395
- CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
396
-
397
- # strings like 18uu, 18-- become '19th century' in the ng_date_element content
398
- def process_century_date(ng_date_element)
399
- my_ng_date_element = ng_date_element.clone
400
- my_ng_date_element.content = begin
401
- orig_date_str = ng_date_element.text.strip
402
- # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
403
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
404
- if century_matches
405
- new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
406
- my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
228
+ # Decoded dates with "B.C." or "A.D." and qualifier markers applied to
229
+ # the entire range, or individually if dates differ.
230
+ def qualified_value
231
+ if @start&.qualifier == @stop&.qualifier
232
+ qualifier = @start&.qualifier || @stop&.qualifier
233
+ date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
234
+ return "[ca. #{date}]" if qualifier == 'approximate'
235
+ return "[#{date}?]" if qualifier == 'questionable'
236
+ return "[#{date}]" if qualifier == 'inferred'
237
+
238
+ date
407
239
  else
408
- my_ng_date_element.content
240
+ "#{@start&.qualified_value} - #{@stop&.qualified_value}"
409
241
  end
410
- rescue
411
- my_ng_date_element.content
412
242
  end
413
- my_ng_date_element
414
- end
415
-
416
- def field_is_encoded?(ng_element, encoding)
417
- ng_element.attributes['encoding'] &&
418
- ng_element.attributes['encoding'].respond_to?(:value) &&
419
- ng_element.attributes['encoding'].value.downcase == encoding
420
- end
421
-
422
- def date_is_bc_edtf?(ng_date_element)
423
- ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
424
243
  end
425
244
 
426
- def date_is_ad?(ng_date_element)
427
- str = ng_date_element.text.strip.gsub(/^0*/, '')
428
- str.present? && str.length < 4 && !str.match('A.D.')
429
- end
245
+ def parse_dates(elements)
246
+ # convert to DateValue objects and keep only valid ones
247
+ dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
248
+ # join any date ranges into DateRange objects
249
+ point, nonpoint = dates.partition(&:point)
250
+ if point.any?
251
+ range = DateRange.new(start: point.find { |date| date.point == 'start' },
252
+ stop: point.find { |date| date.point == 'end' })
253
+ nonpoint.unshift(range)
254
+ end
255
+ dates = nonpoint
430
256
 
431
- def date_is_edtf?(ng_date_element)
432
- field_is_encoded?(ng_date_element, 'edtf')
433
- end
257
+ # ensure dates are unique with respect to their base values
258
+ dates = dates.group_by(&:base_value).map do |_value, group|
259
+ next group.first if group.one?
434
260
 
435
- def date_is_w3cdtf?(ng_date_element)
436
- field_is_encoded?(ng_date_element, 'w3cdtf')
437
- end
261
+ # if one of the duplicates wasn't encoded, use that one. see:
262
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
263
+ if group.reject(&:encoding).any?
264
+ group.reject(&:encoding).first
438
265
 
439
- def date_is_iso8601?(ng_date_element)
440
- field_is_encoded?(ng_date_element, 'iso8601')
441
- end
266
+ # otherwise just randomly pick the first in the group
267
+ else
268
+ group.last
269
+ end
270
+ end
442
271
 
443
- # @return true if decade string needs tweaking for display
444
- def date_is_decade?(ng_date_element)
445
- ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
446
- end
272
+ # compare the remaining dates against one part of the other of a range
273
+ date_ranges = dates.select { |date| date.is_a?(DateRange) }
447
274
 
448
- # @return true if century string needs tweaking for display
449
- def date_is_century?(ng_date_element)
450
- ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
451
- end
275
+ # remove any range that duplicates an unencoded date that includes that range
276
+ duplicated_ranges = dates.flat_map do |date|
277
+ next if date.is_a?(DateRange) || date.encoding.present?
452
278
 
453
- def full_date_format(full_date_format = '%B %-d, %Y')
454
- @full_date_format ||= full_date_format
455
- end
279
+ date_ranges.select { |r| r.base_values.include?(date.base_value) }
280
+ end
456
281
 
457
- def short_date_format(short_date_format = '%B %Y')
458
- @short_date_format ||= short_date_format
459
- end
282
+ dates = dates - duplicated_ranges
460
283
 
461
- def normalize_date(date_str)
462
- date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
284
+ # output formatted dates with qualifiers, A.D./B.C., etc.
285
+ dates.map(&:qualified_value)
463
286
  end
464
287
  end
465
288
  end
@@ -0,0 +1,20 @@
1
+ # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
2
+ module Stanford
3
+ module Mods
4
+ class Record < ::Mods::Record
5
+ include Stanford::Mods::Geospatial
6
+ include Stanford::Mods::Name
7
+ include Stanford::Mods::OriginInfo
8
+ include Stanford::Mods::PhysicalLocation
9
+ include Stanford::Mods::SearchworksSubjects
10
+ include Stanford::Mods::Searchworks
11
+ include Stanford::Mods::Title
12
+
13
+ attr_writer :druid
14
+
15
+ def druid
16
+ @druid || 'Unknown item'
17
+ end
18
+ end # Record class
19
+ end # Mods module
20
+ end # Stanford module
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.6.4'.freeze
4
+ VERSION = '3.0.0.alpha1'.freeze
5
5
  end
6
6
  end
data/lib/stanford-mods.rb CHANGED
@@ -1,20 +1,21 @@
1
1
  require 'active_support'
2
+ require 'active_support/core_ext/integer/inflections'
2
3
  require 'mods'
3
4
  require 'stanford-mods/date_parsing'
4
5
  require 'stanford-mods/coordinate'
5
- require 'stanford-mods/geo_spatial'
6
- require 'stanford-mods/geo_utils'
7
6
  require 'stanford-mods/imprint'
8
- require 'stanford-mods/name'
9
- require 'stanford-mods/origin_info'
10
- require 'stanford-mods/physical_location'
11
- require 'stanford-mods/searchworks'
7
+ require 'stanford-mods/vocabularies/searchworks_languages'
8
+ require 'stanford-mods/concerns/geo_spatial'
9
+ require 'stanford-mods/concerns/name'
10
+ require 'stanford-mods/concerns/origin_info'
11
+ require 'stanford-mods/concerns/physical_location'
12
+ require 'stanford-mods/concerns/searchworks'
13
+ require 'stanford-mods/concerns/searchworks_subjects'
14
+ require 'stanford-mods/concerns/title'
15
+ require 'stanford-mods/record'
12
16
  require 'stanford-mods/version'
13
17
 
14
- # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
15
18
  module Stanford
16
19
  module Mods
17
- class Record < ::Mods::Record
18
- end # Record class
19
- end # Mods module
20
- end # Stanford module
20
+ end
21
+ end