stanford-mods 2.6.4 → 3.0.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +1 -1
  3. data/lib/stanford-mods/{geo_spatial.rb → concerns/geo_spatial.rb} +3 -5
  4. data/lib/stanford-mods/concerns/name.rb +57 -0
  5. data/lib/stanford-mods/concerns/origin_info.rb +109 -0
  6. data/lib/stanford-mods/{physical_location.rb → concerns/physical_location.rb} +2 -2
  7. data/lib/stanford-mods/concerns/searchworks.rb +125 -0
  8. data/lib/stanford-mods/concerns/searchworks_subjects.rb +126 -0
  9. data/lib/stanford-mods/concerns/title.rb +79 -0
  10. data/lib/stanford-mods/coordinate.rb +21 -3
  11. data/lib/stanford-mods/date_parsing.rb +32 -289
  12. data/lib/stanford-mods/imprint.rb +148 -325
  13. data/lib/stanford-mods/record.rb +20 -0
  14. data/lib/stanford-mods/version.rb +1 -1
  15. data/lib/stanford-mods/{searchworks_languages.rb → vocabularies/searchworks_languages.rb} +0 -0
  16. data/lib/stanford-mods.rb +12 -11
  17. data/spec/fixtures/searchworks_imprint_data.rb +38 -39
  18. data/spec/fixtures/searchworks_pub_date_data.rb +7 -7
  19. data/spec/fixtures/spotlight_pub_date_data.rb +7 -7
  20. data/spec/geo_spatial_spec.rb +1 -6
  21. data/spec/imprint_spec.rb +238 -207
  22. data/spec/name_spec.rb +26 -230
  23. data/spec/origin_info_spec.rb +34 -300
  24. data/spec/searchworks_basic_spec.rb +1 -3
  25. data/spec/searchworks_pub_dates_spec.rb +0 -215
  26. data/spec/searchworks_spec.rb +0 -21
  27. data/spec/searchworks_subject_raw_spec.rb +106 -105
  28. data/spec/searchworks_subject_spec.rb +19 -55
  29. data/spec/searchworks_title_spec.rb +1 -1
  30. data/stanford-mods.gemspec +1 -1
  31. metadata +21 -17
  32. data/lib/marc_countries.rb +0 -387
  33. data/lib/stanford-mods/geo_utils.rb +0 -28
  34. data/lib/stanford-mods/name.rb +0 -80
  35. data/lib/stanford-mods/origin_info.rb +0 -489
  36. data/lib/stanford-mods/searchworks.rb +0 -333
  37. data/lib/stanford-mods/searchworks_subjects.rb +0 -196
  38. data/spec/date_parsing_spec.rb +0 -905
@@ -1,4 +1,4 @@
1
- require 'active_support/core_ext/integer/inflections'
1
+ require 'mods/marc_country_codes'
2
2
 
3
3
  module Stanford
4
4
  module Mods
@@ -10,77 +10,33 @@ module Stanford
10
10
  # however, the date_parsing class only does years, and this does finer tuned dates and also
11
11
  # reformats them according to the encoding.
12
12
  class Imprint
13
- # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
14
- def initialize(originInfo_ng_nodeset)
15
- @originInfo_ng_nodeset = originInfo_ng_nodeset
16
- end
13
+ attr_reader :element
17
14
 
18
- require 'marc_countries'
15
+ # @param [Nokogiri::XML::Node] an originInfo node
16
+ def initialize(element)
17
+ @element = element
18
+ end
19
19
 
20
- # @return Array<String> each String is an imprint statement from a single originInfo element
21
20
  def imprint_statements
22
- results = []
23
- @originInfo_ng_nodeset.each do |origin_info_node|
24
- edition = edition_vals_str(origin_info_node)
25
- place = place_vals_str(origin_info_node)
26
- publisher = publisher_vals_str(origin_info_node)
27
- dates = date_str(origin_info_node)
28
-
29
- place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
30
- edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
31
- ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
32
-
33
- results << ed_place_pub_dates unless ed_place_pub_dates.empty?
34
- end
35
- results
21
+ display_str
36
22
  end
37
23
 
24
+ # @return <String> an imprint statement from a single originInfo element
38
25
  def display_str
39
- imprint_statements.join('; ') if imprint_statements.present?
40
- end
26
+ edition = edition_vals_str
27
+ place = place_vals_str
28
+ publisher = publisher_vals_str
29
+ dates = date_str
41
30
 
42
- # @return Array<Integer> an array of publication years for the resource
43
- def publication_date_for_slider
44
- @originInfo_ng_nodeset.map do |origin_info_node|
45
- date_elements = if origin_info_node.as_object.first.key_dates.any?
46
- origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
47
- else
48
- date_field_keys.map do |date_field|
49
- next unless origin_info_node.respond_to?(date_field)
50
-
51
- date_elements = origin_info_node.send(date_field)
52
- date_elements.map(&:as_object).map(&:first) if date_elements.any?
53
- end.compact.first
54
- end
55
-
56
- if date_elements.nil? || date_elements.none?
57
- []
58
- elsif date_elements.find(&:start?) &&
59
- date_elements.find(&:start?).as_range &&
60
- date_elements.find(&:end?) &&
61
- date_elements.find(&:end?).as_range
62
- start_date = date_elements.find(&:start?)
63
- end_date = date_elements.find(&:end?)
64
-
65
- (start_date.as_range.min.year..end_date.as_range.max.year).to_a
66
- elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
67
- start_date = date_elements.find(&:start?)
68
-
69
- (start_date.as_range.min.year..Time.now.year).to_a
70
- elsif date_elements.one?
71
- date_elements.first.to_a.map(&:year)
72
- else
73
- date_elements.map { |v| v.to_a.map(&:year) }.flatten
74
- end
75
- end.flatten
31
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
32
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
33
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
34
+
35
+ ed_place_pub_dates
76
36
  end
77
37
 
78
38
  private
79
39
 
80
- def extract_year(el)
81
- DateParsing.year_int_from_date_str(el.text)
82
- end
83
-
84
40
  def compact_and_join_with_delimiter(values, delimiter)
85
41
  compact_values = values.compact.reject { |v| v.strip.empty? }
86
42
  return compact_values.join(delimiter) if compact_values.length == 1 ||
@@ -100,16 +56,16 @@ module Stanford
100
56
  value.strip.end_with?('.', ',', ':', ';')
101
57
  end
102
58
 
103
- def edition_vals_str(origin_info_node)
104
- origin_info_node.edition.reject do |e|
59
+ def edition_vals_str
60
+ element.edition.reject do |e|
105
61
  e.text.strip.empty?
106
62
  end.map(&:text).join(' ').strip
107
63
  end
108
64
 
109
- def publisher_vals_str(origin_info_node)
110
- return if origin_info_node.publisher.text.strip.empty?
65
+ def publisher_vals_str
66
+ return if element.publisher.text.strip.empty?
111
67
 
112
- publishers = origin_info_node.publisher.reject do |p|
68
+ publishers = element.publisher.reject do |p|
113
69
  p.text.strip.empty?
114
70
  end.map(&:text)
115
71
  compact_and_join_with_delimiter(publishers, ' : ')
@@ -117,10 +73,10 @@ module Stanford
117
73
 
118
74
  # PLACE processing methods ------
119
75
 
120
- def place_vals_str(origin_info_node)
121
- return if origin_info_node.place.text.strip.empty?
76
+ def place_vals_str
77
+ return if element.place.text.strip.empty?
122
78
 
123
- places = place_terms(origin_info_node).reject do |p|
79
+ places = place_terms.reject do |p|
124
80
  p.text.strip.empty?
125
81
  end.map(&:text)
126
82
  compact_and_join_with_delimiter(places, ' : ')
@@ -133,25 +89,26 @@ module Stanford
133
89
  end
134
90
  end
135
91
 
136
- def place_terms(origin_info_element)
137
- return [] unless origin_info_element.respond_to?(:place) &&
138
- origin_info_element.place.respond_to?(:placeTerm)
92
+ def place_terms
93
+ return [] unless element.respond_to?(:place) &&
94
+ element.place.respond_to?(:placeTerm)
139
95
 
140
- if unencoded_place_terms?(origin_info_element)
141
- origin_info_element.place.placeTerm.select do |term|
96
+ if unencoded_place_terms?(element)
97
+ element.place.placeTerm.select do |term|
142
98
  !term.attributes['type'].respond_to?(:value) ||
143
99
  term.attributes['type'].value == 'text'
144
100
  end.compact
145
101
  else
146
- origin_info_element.place.placeTerm.map do |term|
102
+ element.place.placeTerm.map do |term|
147
103
  next unless term.attributes['type'].respond_to?(:value) &&
148
104
  term.attributes['type'].value == 'code' &&
149
105
  term.attributes['authority'].respond_to?(:value) &&
150
106
  term.attributes['authority'].value == 'marccountry' &&
151
- MARC_COUNTRIES.include?(term.text.strip)
107
+ !['xx', 'vp'].include?(term.text.strip) &&
108
+ MARC_COUNTRY.include?(term.text.strip)
152
109
 
153
110
  term = term.clone
154
- term.content = MARC_COUNTRIES[term.text.strip]
111
+ term.content = MARC_COUNTRY[term.text.strip]
155
112
  term
156
113
  end.compact
157
114
  end
@@ -159,307 +116,173 @@ module Stanford
159
116
 
160
117
  # DATE processing methods ------
161
118
 
162
- def date_str(origin_info_node)
163
- date_vals = origin_info_date_vals(origin_info_node)
119
+ def date_str
120
+ date_vals = origin_info_date_vals
164
121
  return if date_vals.empty?
165
-
166
122
  date_vals.map(&:strip).join(' ')
167
123
  end
168
124
 
169
- def origin_info_date_vals(origin_info_node)
125
+ def origin_info_date_vals
170
126
  date_field_keys.map do |date_field|
171
- next unless origin_info_node.respond_to?(date_field)
127
+ next unless element.respond_to?(date_field)
172
128
 
173
- date_elements = origin_info_node.send(date_field)
174
- date_elements_display_vals(date_elements) if date_elements.present?
129
+ date_elements = element.send(date_field)
130
+ parse_dates(date_elements) if date_elements.present?
175
131
  end.compact.flatten
176
132
  end
177
133
 
178
- def date_elements_display_vals(ng_date_elements)
179
- apply_date_qualifier_decoration(
180
- dedup_dates(
181
- join_date_ranges(
182
- process_decade_century_dates(
183
- process_bc_ad_dates(
184
- process_encoded_dates(ignore_bad_dates(ng_date_elements))
185
- )
186
- )
187
- )
188
- )
189
- )
190
- end
191
-
192
134
  def date_field_keys
193
135
  [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
194
136
  end
195
137
 
196
- def ignore_bad_dates(ng_date_elements)
197
- ng_date_elements.select do |ng_date_element|
198
- val = ng_date_element.text.strip
199
- val != '9999' && val != '0000-00-00' && val != 'uuuu'
200
- end
201
- end
138
+ class DateValue
139
+ attr_reader :value
140
+ delegate :text, :date, :point, :qualifier, :encoding, to: :value
202
141
 
203
- def process_encoded_dates(ng_date_elements)
204
- ng_date_elements.map do |ng_date_element|
205
- if date_is_w3cdtf?(ng_date_element)
206
- process_w3cdtf_date(ng_date_element)
207
- elsif date_is_iso8601?(ng_date_element)
208
- process_iso8601_date(ng_date_element)
209
- else
210
- ng_date_element
211
- end
142
+ def initialize(value)
143
+ @value = value
212
144
  end
213
- end
214
145
 
215
- # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
216
- # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
217
- # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
218
- # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
219
- def process_bc_ad_dates(ng_date_elements)
220
- ng_date_elements.map do |ng_date_element|
221
- case
222
- when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
223
- ng_date_element.content = "1 B.C."
224
- when date_is_bc_edtf?(ng_date_element)
225
- year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
226
- ng_date_element.content = "#{year} B.C."
227
- when date_is_ad?(ng_date_element)
228
- ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
229
- end
230
- ng_date_element
146
+ # True if the element text isn't blank or the placeholder "9999".
147
+ def valid?
148
+ text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
231
149
  end
232
- end
233
150
 
234
- def process_decade_century_dates(ng_date_elements)
235
- ng_date_elements.map do |ng_date_element|
236
- if date_is_decade?(ng_date_element)
237
- process_decade_date(ng_date_element)
238
- elsif date_is_century?(ng_date_element)
239
- process_century_date(ng_date_element)
240
- else
241
- ng_date_element
151
+ # Element text reduced to digits and hyphen. Captures date ranges and
152
+ # negative (B.C.) dates. Used for comparison/deduping.
153
+ def base_value
154
+ if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
155
+ return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
242
156
  end
243
- end
244
- end
245
157
 
246
- def join_date_ranges(ng_date_elements)
247
- if dates_are_range?(ng_date_elements)
248
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
249
- end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
250
- ng_date_elements.map do |date|
251
- date = date.clone # clone the date object so we don't append the same one
252
- if normalize_date(date.text) == normalize_date(start_date.text)
253
- date.content = [start_date.text, end_date.text].join(' - ')
254
- date
255
- elsif normalize_date(date.text) != normalize_date(end_date.text)
256
- date
257
- end
258
- end.compact
259
- elsif dates_are_open_range?(ng_date_elements)
260
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
261
- ng_date_elements.map do |date|
262
- date = date.clone # clone the date object so we don't append the same one
263
- date.content = "#{start_date.text}-" if date.text == start_date.text
264
- date
265
- end
266
- else
267
- ng_date_elements
158
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
268
159
  end
269
- end
270
160
 
271
- def dedup_dates(ng_date_elements)
272
- date_text = ng_date_elements.map { |d| normalize_date(d.text) }
273
- if date_text != date_text.uniq
274
- if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
275
- [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
276
- elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
277
- [ng_date_elements.find { |d| !d.attributes['encoding'] }]
278
- else
279
- [ng_date_elements.first]
161
+ # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
+ def decoded_value
163
+ return text.strip unless date
164
+
165
+ unless encoding.present?
166
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
280
167
  end
281
- else
282
- ng_date_elements
283
- end
284
- end
285
168
 
286
- def apply_date_qualifier_decoration(ng_date_elements)
287
- return_fields = ng_date_elements.map do |date|
288
- date = date.clone
289
- if date_is_approximate?(date)
290
- date.content = "[ca. #{date.text}]"
291
- elsif date_is_questionable?(date)
292
- date.content = "[#{date.text}?]"
293
- elsif date_is_inferred?(date)
294
- date.content = "[#{date.text}]"
169
+ # Delegate to the appropriate decoding method, if any
170
+ case value.precision
171
+ when :day
172
+ date.strftime('%B %e, %Y')
173
+ when :month
174
+ date.strftime('%B %Y')
175
+ when :year
176
+ year = date.year
177
+ if year < 1
178
+ "#{year.abs + 1} B.C."
179
+ # Any dates before the year 1000 are explicitly marked A.D.
180
+ elsif year > 1 && year < 1000
181
+ "#{year} A.D."
182
+ else
183
+ year.to_s
184
+ end
185
+ when :century
186
+ return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
187
+ when :decade
188
+ return "#{date.year}s"
189
+ else
190
+ text.strip
295
191
  end
296
- date
297
192
  end
298
- return_fields.map(&:text)
299
- end
300
193
 
301
- def date_is_approximate?(ng_date_element)
302
- ng_date_element.attributes['qualifier'] &&
303
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
304
- ng_date_element.attributes['qualifier'].value == 'approximate'
305
- end
194
+ # Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
195
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
196
+ def qualified_value
197
+ date = decoded_value
306
198
 
307
- def date_is_questionable?(ng_date_element)
308
- ng_date_element.attributes['qualifier'] &&
309
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
310
- ng_date_element.attributes['qualifier'].value == 'questionable'
311
- end
312
-
313
- def date_is_inferred?(ng_date_element)
314
- ng_date_element.attributes['qualifier'] &&
315
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
316
- ng_date_element.attributes['qualifier'].value == 'inferred'
317
- end
199
+ return "[ca. #{date}]" if qualifier == 'approximate'
200
+ return "[#{date}?]" if qualifier == 'questionable'
201
+ return "[#{date}]" if qualifier == 'inferred'
318
202
 
319
- def dates_are_open_range?(ng_date_elements)
320
- ng_date_elements.any? do |element|
321
- element.attributes['point'] &&
322
- element.attributes['point'].respond_to?(:value) &&
323
- element.attributes['point'].value == 'start'
324
- end && !ng_date_elements.any? do |element|
325
- element.attributes['point'] &&
326
- element.attributes['point'].respond_to?(:value) &&
327
- element.attributes['point'].value == 'end'
203
+ date
328
204
  end
329
205
  end
330
206
 
331
- def dates_are_range?(ng_date_elements)
332
- attributes = ng_date_elements.map do |date|
333
- if date.attributes['point'].respond_to?(:value)
334
- date.attributes['point'].value
335
- end
207
+ class DateRange
208
+ def initialize(start: nil, stop: nil)
209
+ @start = start
210
+ @stop = stop
336
211
  end
337
- attributes.include?('start') &&
338
- attributes.include?('end')
339
- end
340
212
 
341
- def process_w3cdtf_date(ng_date_element)
342
- ng_date_element = ng_date_element.clone
343
- ng_date_element.content = begin
344
- if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
345
- Date.parse(ng_date_element.text).strftime(full_date_format)
346
- elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
347
- Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
348
- else
349
- ng_date_element.content
350
- end
351
- rescue
352
- ng_date_element.content
213
+ # Base value as hyphen-joined string. Used for comparison/deduping.
214
+ def base_value
215
+ "#{@start&.base_value}-#{@stop&.base_value}"
353
216
  end
354
- ng_date_element
355
- end
356
217
 
357
- def process_iso8601_date(ng_date_element)
358
- ng_date_element = ng_date_element.clone
359
- ng_date_element.content = begin
360
- if ng_date_element.text.strip =~ /^\d{8,}$/
361
- Date.parse(ng_date_element.text).strftime(full_date_format)
362
- else
363
- ng_date_element.content
364
- end
365
- rescue
366
- ng_date_element.content
218
+ # Base values as array. Used for comparison/deduping of individual dates.
219
+ def base_values
220
+ [@start&.base_value, @stop&.base_value].compact
367
221
  end
368
- ng_date_element
369
- end
370
222
 
371
- DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
372
-
373
- # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
374
- def process_decade_date(ng_date_element)
375
- my_ng_date_element = ng_date_element.clone
376
- my_ng_date_element.content = begin
377
- orig_date_str = ng_date_element.text.strip
378
- # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
379
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
380
- if decade_matches
381
- decade_str = decade_matches[2]
382
- changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
383
- zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
384
- new_decade_str = "#{zeroth_year}s" if zeroth_year
385
- my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
386
- else
387
- my_ng_date_element.content
388
- end
389
- rescue
390
- my_ng_date_element.content
223
+ # The encoding value for the start of the range, or stop if not present.
224
+ def encoding
225
+ @start&.encoding || @stop&.encoding
391
226
  end
392
- my_ng_date_element
393
- end
394
227
 
395
- CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
396
-
397
- # strings like 18uu, 18-- become '19th century' in the ng_date_element content
398
- def process_century_date(ng_date_element)
399
- my_ng_date_element = ng_date_element.clone
400
- my_ng_date_element.content = begin
401
- orig_date_str = ng_date_element.text.strip
402
- # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
403
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
404
- if century_matches
405
- new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
406
- my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
228
+ # Decoded dates with "B.C." or "A.D." and qualifier markers applied to
229
+ # the entire range, or individually if dates differ.
230
+ def qualified_value
231
+ if @start&.qualifier == @stop&.qualifier
232
+ qualifier = @start&.qualifier || @stop&.qualifier
233
+ date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
234
+ return "[ca. #{date}]" if qualifier == 'approximate'
235
+ return "[#{date}?]" if qualifier == 'questionable'
236
+ return "[#{date}]" if qualifier == 'inferred'
237
+
238
+ date
407
239
  else
408
- my_ng_date_element.content
240
+ "#{@start&.qualified_value} - #{@stop&.qualified_value}"
409
241
  end
410
- rescue
411
- my_ng_date_element.content
412
242
  end
413
- my_ng_date_element
414
- end
415
-
416
- def field_is_encoded?(ng_element, encoding)
417
- ng_element.attributes['encoding'] &&
418
- ng_element.attributes['encoding'].respond_to?(:value) &&
419
- ng_element.attributes['encoding'].value.downcase == encoding
420
- end
421
-
422
- def date_is_bc_edtf?(ng_date_element)
423
- ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
424
243
  end
425
244
 
426
- def date_is_ad?(ng_date_element)
427
- str = ng_date_element.text.strip.gsub(/^0*/, '')
428
- str.present? && str.length < 4 && !str.match('A.D.')
429
- end
245
+ def parse_dates(elements)
246
+ # convert to DateValue objects and keep only valid ones
247
+ dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
248
+ # join any date ranges into DateRange objects
249
+ point, nonpoint = dates.partition(&:point)
250
+ if point.any?
251
+ range = DateRange.new(start: point.find { |date| date.point == 'start' },
252
+ stop: point.find { |date| date.point == 'end' })
253
+ nonpoint.unshift(range)
254
+ end
255
+ dates = nonpoint
430
256
 
431
- def date_is_edtf?(ng_date_element)
432
- field_is_encoded?(ng_date_element, 'edtf')
433
- end
257
+ # ensure dates are unique with respect to their base values
258
+ dates = dates.group_by(&:base_value).map do |_value, group|
259
+ next group.first if group.one?
434
260
 
435
- def date_is_w3cdtf?(ng_date_element)
436
- field_is_encoded?(ng_date_element, 'w3cdtf')
437
- end
261
+ # if one of the duplicates wasn't encoded, use that one. see:
262
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
263
+ if group.reject(&:encoding).any?
264
+ group.reject(&:encoding).first
438
265
 
439
- def date_is_iso8601?(ng_date_element)
440
- field_is_encoded?(ng_date_element, 'iso8601')
441
- end
266
+ # otherwise just randomly pick the first in the group
267
+ else
268
+ group.last
269
+ end
270
+ end
442
271
 
443
- # @return true if decade string needs tweaking for display
444
- def date_is_decade?(ng_date_element)
445
- ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
446
- end
272
+ # compare the remaining dates against one part of the other of a range
273
+ date_ranges = dates.select { |date| date.is_a?(DateRange) }
447
274
 
448
- # @return true if century string needs tweaking for display
449
- def date_is_century?(ng_date_element)
450
- ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
451
- end
275
+ # remove any range that duplicates an unencoded date that includes that range
276
+ duplicated_ranges = dates.flat_map do |date|
277
+ next if date.is_a?(DateRange) || date.encoding.present?
452
278
 
453
- def full_date_format(full_date_format = '%B %-d, %Y')
454
- @full_date_format ||= full_date_format
455
- end
279
+ date_ranges.select { |r| r.base_values.include?(date.base_value) }
280
+ end
456
281
 
457
- def short_date_format(short_date_format = '%B %Y')
458
- @short_date_format ||= short_date_format
459
- end
282
+ dates = dates - duplicated_ranges
460
283
 
461
- def normalize_date(date_str)
462
- date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
284
+ # output formatted dates with qualifiers, A.D./B.C., etc.
285
+ dates.map(&:qualified_value)
463
286
  end
464
287
  end
465
288
  end
@@ -0,0 +1,20 @@
1
+ # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
2
+ module Stanford
3
+ module Mods
4
+ class Record < ::Mods::Record
5
+ include Stanford::Mods::Geospatial
6
+ include Stanford::Mods::Name
7
+ include Stanford::Mods::OriginInfo
8
+ include Stanford::Mods::PhysicalLocation
9
+ include Stanford::Mods::SearchworksSubjects
10
+ include Stanford::Mods::Searchworks
11
+ include Stanford::Mods::Title
12
+
13
+ attr_writer :druid
14
+
15
+ def druid
16
+ @druid || 'Unknown item'
17
+ end
18
+ end # Record class
19
+ end # Mods module
20
+ end # Stanford module
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '2.6.4'.freeze
4
+ VERSION = '3.0.0.alpha1'.freeze
5
5
  end
6
6
  end
data/lib/stanford-mods.rb CHANGED
@@ -1,20 +1,21 @@
1
1
  require 'active_support'
2
+ require 'active_support/core_ext/integer/inflections'
2
3
  require 'mods'
3
4
  require 'stanford-mods/date_parsing'
4
5
  require 'stanford-mods/coordinate'
5
- require 'stanford-mods/geo_spatial'
6
- require 'stanford-mods/geo_utils'
7
6
  require 'stanford-mods/imprint'
8
- require 'stanford-mods/name'
9
- require 'stanford-mods/origin_info'
10
- require 'stanford-mods/physical_location'
11
- require 'stanford-mods/searchworks'
7
+ require 'stanford-mods/vocabularies/searchworks_languages'
8
+ require 'stanford-mods/concerns/geo_spatial'
9
+ require 'stanford-mods/concerns/name'
10
+ require 'stanford-mods/concerns/origin_info'
11
+ require 'stanford-mods/concerns/physical_location'
12
+ require 'stanford-mods/concerns/searchworks'
13
+ require 'stanford-mods/concerns/searchworks_subjects'
14
+ require 'stanford-mods/concerns/title'
15
+ require 'stanford-mods/record'
12
16
  require 'stanford-mods/version'
13
17
 
14
- # Stanford specific wranglings of MODS metadata as an extension of the Mods::Record object
15
18
  module Stanford
16
19
  module Mods
17
- class Record < ::Mods::Record
18
- end # Record class
19
- end # Mods module
20
- end # Stanford module
20
+ end
21
+ end