stanford-mods 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57865ef64be3774919a58f9771243fb7f5090b09867fae098d3fa5ae6e2523cf
4
- data.tar.gz: 1a09641f450a3739c9c61598ec4ad7a4f1f7b410c804d12df6d50530fb249cb7
3
+ metadata.gz: c9c63c6699eef72da80bbf9e15d892248dfc21a7550588aac09843ef26d1869f
4
+ data.tar.gz: f820451452017ec653c4eabd48d19cb23a36fcfafbad814272dd26c9cc9b9d42
5
5
  SHA512:
6
- metadata.gz: c86f9171a032d1349068b43f6cf1272fbdf4a24419a58f66040132d7de2ea745853707b803608ea3344a137e544eff0e7e291a7b9c3922c09fe0568c68409a5e
7
- data.tar.gz: d031a52bb328ae6efa0e42e7050fc4f990295509e4ee0a4b4c1f93f51b2412049b16c87091f27b95a02ed95be1278ccd5cbcc11771f59272bf77f82d73bd3bb2
6
+ metadata.gz: '0817296bebb438d882509919ff0b6fb2ec0e1accd71b379209f3b3b5f4cbd684d8208d8aab7ff3ce5f745860c4331af9098afadb5d066bd406016625c63d3701'
7
+ data.tar.gz: 4ed8b47a3186c3f241d2b543358138c71bea5f7128fc483e1d78117af1224b75db1fb4524763c726b011f414524b7d732a54096dd07d38a909c13a05116cc3ca
@@ -16,14 +16,22 @@ module Stanford
16
16
  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
17
17
  # @return [Integer] publication year as an Integer
18
18
  # @note for sorting: 5 BCE => -5; 666 BCE => -666
19
- def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
20
- fields.each do |date_key|
21
- values = mods_ng_xml.origin_info.send(date_key)
22
- values = values.reject(&method(:is_approximate)) if ignore_approximate
23
-
24
- earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
25
- return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
26
- end; nil
19
+ def pub_year_int(ignore_approximate: false)
20
+ date = earliest_preferred_date(ignore_approximate: ignore_approximate)
21
+
22
+ return unless date
23
+
24
+ if date.is_a? Stanford::Mods::Imprint::DateRange
25
+ date = date.start || date.stop
26
+ end
27
+
28
+ edtf_date = date.date
29
+
30
+ if edtf_date.is_a?(EDTF::Interval)
31
+ edtf_date.from.year
32
+ else
33
+ edtf_date.year
34
+ end
27
35
  end
28
36
 
29
37
  # return a single string intended for lexical sorting for pub date
@@ -33,37 +41,17 @@ module Stanford
33
41
  # @return [String] single String containing publication year for lexical sorting
34
42
  # @note for string sorting 5 BCE = -5 => -995; 6 BCE => -994, so 6 BCE sorts before 5 BCE
35
43
  # @deprecated use pub_year_int
36
- def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
37
- fields.each do |date_key|
38
- values = mods_ng_xml.origin_info.send(date_key)
39
- values = values.reject(&method(:is_approximate)) if ignore_approximate
40
-
41
- earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
42
- return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
43
- end; nil
44
+ def pub_year_sort_str(ignore_approximate: false)
45
+ earliest_preferred_date(ignore_approximate: ignore_approximate)&.sort_key
44
46
  end
45
47
 
46
- # return a single string intended for display of pub year
47
- # 0 < year < 1000: add CE suffix
48
- # year < 0: add BCE suffix. ('-5' => '5 BCE', '700 BCE' => '700 BCE')
49
- # 195u => 195x
50
- # 19uu => 19xx
51
- # '-5' => '5 BCE'
52
- # '700 BCE' => '700 BCE'
53
- # '7th century' => '7th century'
54
- # date ranges?
55
- # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
56
- # look for a keyDate and use it if there is one; otherwise pick earliest date
48
+ # return a single string intended for display of pub year (or year range)
49
+ #
50
+ # @param [Array<Symbol>] fields array of field types to use to look for dates.
57
51
  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
58
52
  # should be ignored; false if approximate dates should be included
59
- def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
60
- fields.each do |date_key|
61
- values = mods_ng_xml.origin_info.send(date_key)
62
- values = values.reject(&method(:is_approximate)) if ignore_approximate
63
-
64
- earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
65
- return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
66
- end; nil
53
+ def pub_year_display_str(ignore_approximate: false)
54
+ earliest_preferred_date(ignore_approximate: ignore_approximate)&.decoded_value(allowed_precisions: [:year, :decade, :century], ignore_unparseable: true, display_original_text: false)
67
55
  end
68
56
 
69
57
  # @return [Array<Stanford::Mods::Imprint>] array of imprint objects
@@ -81,32 +69,39 @@ module Stanford
81
69
  imprints.map(&:display_str).reject(&:empty?).join('; ')
82
70
  end
83
71
 
84
- # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
85
- # @param [Nokogiri::XML::Element] node the date element
86
- # @return [Boolean]
87
- # @private
88
- def is_approximate(node)
89
- qualifier = node["qualifier"] if node.respond_to?('[]')
90
- qualifier == 'approximate' || qualifier == 'questionable'
91
- end
72
+ private
73
+
74
+ # The rules for which date to pick is a little complicated:
75
+ #
76
+ # 1) Examine the date elements of the provided fields.
77
+ # 2) Discard any we can't parse a year out of.
78
+ # 3) (if ignore_approximate is true, used only by exhibits for Feigenbaum), throw out any qualified dates (or ranges if either the start or end is qualified)
79
+ # 4) If that set of date elements has elements with a keyDate, prefer those.
80
+ # 5) If there were encoded dates, prefer those.
81
+ # 6) Choose the earliest date (or starting date of a range).
82
+ #
83
+ # Finally, format the date or range of an encoded date, or just pluck out the year from an unencoded one.
84
+ def earliest_preferred_date(fields: [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
85
+ local_imprints = imprints
86
+
87
+ fields.each do |field_name|
88
+ potential_dates = local_imprints.flat_map do |imprint|
89
+ dates = imprint.dates([field_name])
90
+ dates = dates.select(&:parsed_date?)
91
+ dates = dates.reject(&:qualified?) if ignore_approximate
92
+
93
+ dates
94
+ end
92
95
 
93
- # get earliest parseable year from the passed date elements
94
- # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
95
- # @return [Stanford::Mods::DateParsing]
96
- def self.best_or_earliest_year(date_el_array)
97
- key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
96
+ preferred_dates = potential_dates.select(&:key_date?).presence || potential_dates
97
+ best_dates = (preferred_dates.select { |x| x.encoding.present? }.presence || preferred_dates)
98
98
 
99
- sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
100
- sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
101
- results = {}
99
+ earliest_date = best_dates.min_by(&:sort_key)
102
100
 
103
- # this is a little weird; instead of just the earliest sorting date, if there are multiple
104
- # dates with the same sort key, we want to make sure we get the last occurring one?
105
- sortable_dates.each do |v|
106
- results[v.sortable_year_string_from_date_str] = v
101
+ return earliest_date if earliest_date
107
102
  end
108
103
 
109
- results[results.keys.min]
104
+ nil
110
105
  end
111
106
  end # class Record
112
107
  end
@@ -35,6 +35,17 @@ module Stanford
35
35
  ed_place_pub_dates
36
36
  end
37
37
 
38
+ # array of parsed but unformattted date values, for a given list of
39
+ # elements to pull data from
40
+ def dates(date_field_keys = [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate])
41
+ date_field_keys.map do |date_field|
42
+ next unless element.respond_to?(date_field)
43
+
44
+ date_elements = element.send(date_field)
45
+ parse_dates(date_elements) if date_elements.present?
46
+ end.compact.flatten
47
+ end
48
+
38
49
  private
39
50
 
40
51
  def compact_and_join_with_delimiter(values, delimiter)
@@ -117,22 +128,10 @@ module Stanford
117
128
  # DATE processing methods ------
118
129
 
119
130
  def date_str
120
- date_vals = origin_info_date_vals
131
+ date_vals = unique_dates_for_display(dates).map(&:qualified_value)
121
132
  return if date_vals.empty?
122
- date_vals.map(&:strip).join(' ')
123
- end
124
-
125
- def origin_info_date_vals
126
- date_field_keys.map do |date_field|
127
- next unless element.respond_to?(date_field)
128
-
129
- date_elements = element.send(date_field)
130
- parse_dates(date_elements) if date_elements.present?
131
- end.compact.flatten
132
- end
133
133
 
134
- def date_field_keys
135
- [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
134
+ date_vals.map(&:strip).join(' ')
136
135
  end
137
136
 
138
137
  class DateValue
@@ -148,6 +147,41 @@ module Stanford
148
147
  text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
149
148
  end
150
149
 
150
+ def key_date?
151
+ value.key?
152
+ end
153
+
154
+ def qualified?
155
+ qualifier.present?
156
+ end
157
+
158
+ def parsed_date?
159
+ date.present?
160
+ end
161
+
162
+ def sort_key
163
+ year = if date.is_a?(EDTF::Interval)
164
+ date.from.year
165
+ else
166
+ date.year
167
+ end
168
+
169
+ str = if year < 1
170
+ (-1 * year - 1000).to_s
171
+ else
172
+ year.to_s
173
+ end
174
+
175
+ case value.precision
176
+ when :decade
177
+ str[0..2] + "-"
178
+ when :century
179
+ str[0..1] + "--"
180
+ else
181
+ str.rjust(4, "0")
182
+ end
183
+ end
184
+
151
185
  # Element text reduced to digits and hyphen. Captures date ranges and
152
186
  # negative (BCE) dates. Used for comparison/deduping.
153
187
  def base_value
@@ -155,36 +189,44 @@ module Stanford
155
189
  return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
156
190
  end
157
191
 
158
- text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
192
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{Regexp.last_match(1)}#{'0' * Regexp.last_match(2).length}" }.scan(/[\d-]/).join
159
193
  end
160
194
 
161
195
  # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
- def decoded_value
196
+ def decoded_value(allowed_precisions: [:day, :month, :year, :decade, :century], ignore_unparseable: false, display_original_text: true)
197
+ return if ignore_unparseable && !date
163
198
  return text.strip unless date
164
199
 
165
- unless encoding.present?
166
- return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
200
+ if display_original_text
201
+ unless encoding.present?
202
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
203
+ end
167
204
  end
168
205
 
169
206
  if date.is_a?(EDTF::Interval)
170
207
  if value.precision == :century || value.precision == :decade
171
- return format_date(date, value.precision)
208
+ return format_date(date, value.precision, allowed_precisions)
172
209
  end
173
210
 
174
211
  range = [
175
- format_date(date.min, date.min.precision),
176
- format_date(date.max, date.max.precision)
212
+ format_date(date.min, date.min.precision, allowed_precisions),
213
+ format_date(date.max, date.max.precision, allowed_precisions)
177
214
  ].uniq.compact
178
215
 
179
216
  return text.strip if range.empty?
180
217
 
181
218
  range.join(' - ')
182
219
  else
183
- format_date(date, value.precision) || text.strip
220
+ format_date(date, value.precision, allowed_precisions) || text.strip
184
221
  end
185
222
  end
186
223
 
187
- def format_date(date, precision)
224
+ # Returns the date in the format specified by the precision.
225
+ # Allowed_precisions should be ordered by granularity and supports e.g.
226
+ # getting a year precision when the actual date is more precise.
227
+ def format_date(date, precision, allowed_precisions)
228
+ precision = allowed_precisions.first unless allowed_precisions.include?(precision)
229
+
188
230
  case precision
189
231
  when :day
190
232
  date.strftime('%B %e, %Y')
@@ -200,14 +242,14 @@ module Stanford
200
242
  else
201
243
  year.to_s
202
244
  end
245
+ when :decade
246
+ "#{date.year}s"
203
247
  when :century
204
248
  if date.year.negative?
205
249
  "#{((date.year / 100).abs + 1).ordinalize} century BCE"
206
250
  else
207
251
  "#{((date.year / 100) + 1).ordinalize} century"
208
252
  end
209
- when :decade
210
- "#{date.year}s"
211
253
  end
212
254
  end
213
255
 
@@ -215,26 +257,32 @@ module Stanford
215
257
  # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
216
258
  def qualified_value
217
259
  qualified_format = case qualifier
218
- when 'approximate'
219
- '[ca. %s]'
220
- when 'questionable'
221
- '[%s?]'
222
- when 'inferred'
223
- '[%s]'
224
- else
225
- '%s'
226
- end
260
+ when 'approximate'
261
+ '[ca. %s]'
262
+ when 'questionable'
263
+ '[%s?]'
264
+ when 'inferred'
265
+ '[%s]'
266
+ else
267
+ '%s'
268
+ end
227
269
 
228
270
  format(qualified_format, decoded_value)
229
271
  end
230
272
  end
231
273
 
232
274
  class DateRange
275
+ attr_reader :start, :stop
276
+
233
277
  def initialize(start: nil, stop: nil)
234
278
  @start = start
235
279
  @stop = stop
236
280
  end
237
281
 
282
+ def sort_key
283
+ @start&.sort_key || @stop&.sort_key
284
+ end
285
+
238
286
  # Base value as hyphen-joined string. Used for comparison/deduping.
239
287
  def base_value
240
288
  "#{@start&.base_value}-#{@stop&.base_value}"
@@ -250,12 +298,34 @@ module Stanford
250
298
  @start&.encoding || @stop&.encoding
251
299
  end
252
300
 
301
+ # If either date in the range is qualified in any way
302
+ def qualified?
303
+ @start&.qualified? || @stop&.qualified?
304
+ end
305
+
306
+ # If either date in the range is a key date
307
+ def key_date?
308
+ @start&.key_date? || @stop&.key_date?
309
+ end
310
+
311
+ # If either date in the range was successfully parsed
312
+ def parsed_date?
313
+ @start&.parsed_date? || @stop&.parsed_date?
314
+ end
315
+
316
+ def decoded_value(**kwargs)
317
+ [
318
+ @start&.decoded_value(**kwargs),
319
+ @stop&.decoded_value(**kwargs)
320
+ ].uniq.join(' - ')
321
+ end
322
+
253
323
  # Decoded dates with "BCE" or "CE" and qualifier markers applied to
254
324
  # the entire range, or individually if dates differ.
255
325
  def qualified_value
256
326
  if @start&.qualifier == @stop&.qualifier
257
327
  qualifier = @start&.qualifier || @stop&.qualifier
258
- date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
328
+ date = decoded_value
259
329
  return "[ca. #{date}]" if qualifier == 'approximate'
260
330
  return "[#{date}?]" if qualifier == 'questionable'
261
331
  return "[#{date}]" if qualifier == 'inferred'
@@ -270,15 +340,19 @@ module Stanford
270
340
  def parse_dates(elements)
271
341
  # convert to DateValue objects and keep only valid ones
272
342
  dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
343
+
273
344
  # join any date ranges into DateRange objects
274
- point, nonpoint = dates.partition(&:point)
275
- if point.any?
276
- range = DateRange.new(start: point.find { |date| date.point == 'start' },
277
- stop: point.find { |date| date.point == 'end' })
278
- nonpoint.unshift(range)
345
+ point_dates, dates = dates.partition(&:point)
346
+ if point_dates.any?
347
+ range = DateRange.new(start: point_dates.find { |date| date.point == 'start' },
348
+ stop: point_dates.find { |date| date.point == 'end' })
349
+ dates.unshift(range)
350
+ else
351
+ dates
279
352
  end
280
- dates = nonpoint
353
+ end
281
354
 
355
+ def unique_dates_for_display(dates)
282
356
  # ensure dates are unique with respect to their base values
283
357
  dates = dates.group_by(&:base_value).map do |_value, group|
284
358
  next group.first if group.one?
@@ -304,10 +378,7 @@ module Stanford
304
378
  date_ranges.select { |r| r.base_values.include?(date.base_value) }
305
379
  end
306
380
 
307
- dates = dates - duplicated_ranges
308
-
309
- # output formatted dates with qualifiers, CE/BCE, etc.
310
- dates.map(&:qualified_value)
381
+ dates - duplicated_ranges
311
382
  end
312
383
  end
313
384
  end
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '3.2.0'.freeze
4
+ VERSION = '3.3.0'.freeze
5
5
  end
6
6
  end
data/lib/stanford-mods.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'active_support'
2
2
  require 'active_support/core_ext/integer/inflections'
3
3
  require 'mods'
4
- require 'stanford-mods/date_parsing'
5
4
  require 'stanford-mods/coordinate'
6
5
  require 'stanford-mods/imprint'
7
6
  require 'stanford-mods/vocabularies/searchworks_languages'
@@ -18,4 +17,4 @@ require 'stanford-mods/version'
18
17
  module Stanford
19
18
  module Mods
20
19
  end
21
- end
20
+ end