stanford-mods 3.2.0 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57865ef64be3774919a58f9771243fb7f5090b09867fae098d3fa5ae6e2523cf
4
- data.tar.gz: 1a09641f450a3739c9c61598ec4ad7a4f1f7b410c804d12df6d50530fb249cb7
3
+ metadata.gz: c9c63c6699eef72da80bbf9e15d892248dfc21a7550588aac09843ef26d1869f
4
+ data.tar.gz: f820451452017ec653c4eabd48d19cb23a36fcfafbad814272dd26c9cc9b9d42
5
5
  SHA512:
6
- metadata.gz: c86f9171a032d1349068b43f6cf1272fbdf4a24419a58f66040132d7de2ea745853707b803608ea3344a137e544eff0e7e291a7b9c3922c09fe0568c68409a5e
7
- data.tar.gz: d031a52bb328ae6efa0e42e7050fc4f990295509e4ee0a4b4c1f93f51b2412049b16c87091f27b95a02ed95be1278ccd5cbcc11771f59272bf77f82d73bd3bb2
6
+ metadata.gz: '0817296bebb438d882509919ff0b6fb2ec0e1accd71b379209f3b3b5f4cbd684d8208d8aab7ff3ce5f745860c4331af9098afadb5d066bd406016625c63d3701'
7
+ data.tar.gz: 4ed8b47a3186c3f241d2b543358138c71bea5f7128fc483e1d78117af1224b75db1fb4524763c726b011f414524b7d732a54096dd07d38a909c13a05116cc3ca
@@ -16,14 +16,22 @@ module Stanford
16
16
  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
17
17
  # @return [Integer] publication year as an Integer
18
18
  # @note for sorting: 5 BCE => -5; 666 BCE => -666
19
- def pub_year_int(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
20
- fields.each do |date_key|
21
- values = mods_ng_xml.origin_info.send(date_key)
22
- values = values.reject(&method(:is_approximate)) if ignore_approximate
23
-
24
- earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
25
- return earliest_date.year_int_from_date_str if earliest_date&.year_int_from_date_str
26
- end; nil
19
+ def pub_year_int(ignore_approximate: false)
20
+ date = earliest_preferred_date(ignore_approximate: ignore_approximate)
21
+
22
+ return unless date
23
+
24
+ if date.is_a? Stanford::Mods::Imprint::DateRange
25
+ date = date.start || date.stop
26
+ end
27
+
28
+ edtf_date = date.date
29
+
30
+ if edtf_date.is_a?(EDTF::Interval)
31
+ edtf_date.from.year
32
+ else
33
+ edtf_date.year
34
+ end
27
35
  end
28
36
 
29
37
  # return a single string intended for lexical sorting for pub date
@@ -33,37 +41,17 @@ module Stanford
33
41
  # @return [String] single String containing publication year for lexical sorting
34
42
  # @note for string sorting 5 BCE = -5 => -995; 6 BCE => -994, so 6 BCE sorts before 5 BCE
35
43
  # @deprecated use pub_year_int
36
- def pub_year_sort_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
37
- fields.each do |date_key|
38
- values = mods_ng_xml.origin_info.send(date_key)
39
- values = values.reject(&method(:is_approximate)) if ignore_approximate
40
-
41
- earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
42
- return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
43
- end; nil
44
+ def pub_year_sort_str(ignore_approximate: false)
45
+ earliest_preferred_date(ignore_approximate: ignore_approximate)&.sort_key
44
46
  end
45
47
 
46
- # return a single string intended for display of pub year
47
- # 0 < year < 1000: add CE suffix
48
- # year < 0: add BCE suffix. ('-5' => '5 BCE', '700 BCE' => '700 BCE')
49
- # 195u => 195x
50
- # 19uu => 19xx
51
- # '-5' => '5 BCE'
52
- # '700 BCE' => '700 BCE'
53
- # '7th century' => '7th century'
54
- # date ranges?
55
- # prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
56
- # look for a keyDate and use it if there is one; otherwise pick earliest date
48
+ # return a single string intended for display of pub year (or year range)
49
+ #
50
+ # @param [Array<Symbol>] fields array of field types to use to look for dates.
57
51
  # @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
58
52
  # should be ignored; false if approximate dates should be included
59
- def pub_year_display_str(fields = [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
60
- fields.each do |date_key|
61
- values = mods_ng_xml.origin_info.send(date_key)
62
- values = values.reject(&method(:is_approximate)) if ignore_approximate
63
-
64
- earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
65
- return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
66
- end; nil
53
+ def pub_year_display_str(ignore_approximate: false)
54
+ earliest_preferred_date(ignore_approximate: ignore_approximate)&.decoded_value(allowed_precisions: [:year, :decade, :century], ignore_unparseable: true, display_original_text: false)
67
55
  end
68
56
 
69
57
  # @return [Array<Stanford::Mods::Imprint>] array of imprint objects
@@ -81,32 +69,39 @@ module Stanford
81
69
  imprints.map(&:display_str).reject(&:empty?).join('; ')
82
70
  end
83
71
 
84
- # remove Elements from NodeSet if they have a qualifier attribute of 'approximate' or 'questionable'
85
- # @param [Nokogiri::XML::Element] node the date element
86
- # @return [Boolean]
87
- # @private
88
- def is_approximate(node)
89
- qualifier = node["qualifier"] if node.respond_to?('[]')
90
- qualifier == 'approximate' || qualifier == 'questionable'
91
- end
72
+ private
73
+
74
+ # The rules for which date to pick is a little complicated:
75
+ #
76
+ # 1) Examine the date elements of the provided fields.
77
+ # 2) Discard any we can't parse a year out of.
78
+ # 3) (if ignore_approximate is true, used only by exhibits for Feigenbaum), throw out any qualified dates (or ranges if either the start or end is qualified)
79
+ # 4) If that set of date elements has elements with a keyDate, prefer those.
80
+ # 5) If there were encoded dates, prefer those.
81
+ # 6) Choose the earliest date (or starting date of a range).
82
+ #
83
+ # Finally, format the date or range of an encoded date, or just pluck out the year from an unencoded one.
84
+ def earliest_preferred_date(fields: [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
85
+ local_imprints = imprints
86
+
87
+ fields.each do |field_name|
88
+ potential_dates = local_imprints.flat_map do |imprint|
89
+ dates = imprint.dates([field_name])
90
+ dates = dates.select(&:parsed_date?)
91
+ dates = dates.reject(&:qualified?) if ignore_approximate
92
+
93
+ dates
94
+ end
92
95
 
93
- # get earliest parseable year from the passed date elements
94
- # @param [Array<Nokogiri::XML::Element>] date_el_array the elements from which to select a pub date
95
- # @return [Stanford::Mods::DateParsing]
96
- def self.best_or_earliest_year(date_el_array)
97
- key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
96
+ preferred_dates = potential_dates.select(&:key_date?).presence || potential_dates
97
+ best_dates = (preferred_dates.select { |x| x.encoding.present? }.presence || preferred_dates)
98
98
 
99
- sortable_dates = key_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str)
100
- sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
101
- results = {}
99
+ earliest_date = best_dates.min_by(&:sort_key)
102
100
 
103
- # this is a little weird; instead of just the earliest sorting date, if there are multiple
104
- # dates with the same sort key, we want to make sure we get the last occurring one?
105
- sortable_dates.each do |v|
106
- results[v.sortable_year_string_from_date_str] = v
101
+ return earliest_date if earliest_date
107
102
  end
108
103
 
109
- results[results.keys.min]
104
+ nil
110
105
  end
111
106
  end # class Record
112
107
  end
@@ -35,6 +35,17 @@ module Stanford
35
35
  ed_place_pub_dates
36
36
  end
37
37
 
38
+ # array of parsed but unformattted date values, for a given list of
39
+ # elements to pull data from
40
+ def dates(date_field_keys = [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate])
41
+ date_field_keys.map do |date_field|
42
+ next unless element.respond_to?(date_field)
43
+
44
+ date_elements = element.send(date_field)
45
+ parse_dates(date_elements) if date_elements.present?
46
+ end.compact.flatten
47
+ end
48
+
38
49
  private
39
50
 
40
51
  def compact_and_join_with_delimiter(values, delimiter)
@@ -117,22 +128,10 @@ module Stanford
117
128
  # DATE processing methods ------
118
129
 
119
130
  def date_str
120
- date_vals = origin_info_date_vals
131
+ date_vals = unique_dates_for_display(dates).map(&:qualified_value)
121
132
  return if date_vals.empty?
122
- date_vals.map(&:strip).join(' ')
123
- end
124
-
125
- def origin_info_date_vals
126
- date_field_keys.map do |date_field|
127
- next unless element.respond_to?(date_field)
128
-
129
- date_elements = element.send(date_field)
130
- parse_dates(date_elements) if date_elements.present?
131
- end.compact.flatten
132
- end
133
133
 
134
- def date_field_keys
135
- [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
134
+ date_vals.map(&:strip).join(' ')
136
135
  end
137
136
 
138
137
  class DateValue
@@ -148,6 +147,41 @@ module Stanford
148
147
  text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
149
148
  end
150
149
 
150
+ def key_date?
151
+ value.key?
152
+ end
153
+
154
+ def qualified?
155
+ qualifier.present?
156
+ end
157
+
158
+ def parsed_date?
159
+ date.present?
160
+ end
161
+
162
+ def sort_key
163
+ year = if date.is_a?(EDTF::Interval)
164
+ date.from.year
165
+ else
166
+ date.year
167
+ end
168
+
169
+ str = if year < 1
170
+ (-1 * year - 1000).to_s
171
+ else
172
+ year.to_s
173
+ end
174
+
175
+ case value.precision
176
+ when :decade
177
+ str[0..2] + "-"
178
+ when :century
179
+ str[0..1] + "--"
180
+ else
181
+ str.rjust(4, "0")
182
+ end
183
+ end
184
+
151
185
  # Element text reduced to digits and hyphen. Captures date ranges and
152
186
  # negative (BCE) dates. Used for comparison/deduping.
153
187
  def base_value
@@ -155,36 +189,44 @@ module Stanford
155
189
  return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
156
190
  end
157
191
 
158
- text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
192
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{Regexp.last_match(1)}#{'0' * Regexp.last_match(2).length}" }.scan(/[\d-]/).join
159
193
  end
160
194
 
161
195
  # Decoded version of the date, if it was encoded. Strips leading zeroes.
162
- def decoded_value
196
+ def decoded_value(allowed_precisions: [:day, :month, :year, :decade, :century], ignore_unparseable: false, display_original_text: true)
197
+ return if ignore_unparseable && !date
163
198
  return text.strip unless date
164
199
 
165
- unless encoding.present?
166
- return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
200
+ if display_original_text
201
+ unless encoding.present?
202
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
203
+ end
167
204
  end
168
205
 
169
206
  if date.is_a?(EDTF::Interval)
170
207
  if value.precision == :century || value.precision == :decade
171
- return format_date(date, value.precision)
208
+ return format_date(date, value.precision, allowed_precisions)
172
209
  end
173
210
 
174
211
  range = [
175
- format_date(date.min, date.min.precision),
176
- format_date(date.max, date.max.precision)
212
+ format_date(date.min, date.min.precision, allowed_precisions),
213
+ format_date(date.max, date.max.precision, allowed_precisions)
177
214
  ].uniq.compact
178
215
 
179
216
  return text.strip if range.empty?
180
217
 
181
218
  range.join(' - ')
182
219
  else
183
- format_date(date, value.precision) || text.strip
220
+ format_date(date, value.precision, allowed_precisions) || text.strip
184
221
  end
185
222
  end
186
223
 
187
- def format_date(date, precision)
224
+ # Returns the date in the format specified by the precision.
225
+ # Allowed_precisions should be ordered by granularity and supports e.g.
226
+ # getting a year precision when the actual date is more precise.
227
+ def format_date(date, precision, allowed_precisions)
228
+ precision = allowed_precisions.first unless allowed_precisions.include?(precision)
229
+
188
230
  case precision
189
231
  when :day
190
232
  date.strftime('%B %e, %Y')
@@ -200,14 +242,14 @@ module Stanford
200
242
  else
201
243
  year.to_s
202
244
  end
245
+ when :decade
246
+ "#{date.year}s"
203
247
  when :century
204
248
  if date.year.negative?
205
249
  "#{((date.year / 100).abs + 1).ordinalize} century BCE"
206
250
  else
207
251
  "#{((date.year / 100) + 1).ordinalize} century"
208
252
  end
209
- when :decade
210
- "#{date.year}s"
211
253
  end
212
254
  end
213
255
 
@@ -215,26 +257,32 @@ module Stanford
215
257
  # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
216
258
  def qualified_value
217
259
  qualified_format = case qualifier
218
- when 'approximate'
219
- '[ca. %s]'
220
- when 'questionable'
221
- '[%s?]'
222
- when 'inferred'
223
- '[%s]'
224
- else
225
- '%s'
226
- end
260
+ when 'approximate'
261
+ '[ca. %s]'
262
+ when 'questionable'
263
+ '[%s?]'
264
+ when 'inferred'
265
+ '[%s]'
266
+ else
267
+ '%s'
268
+ end
227
269
 
228
270
  format(qualified_format, decoded_value)
229
271
  end
230
272
  end
231
273
 
232
274
  class DateRange
275
+ attr_reader :start, :stop
276
+
233
277
  def initialize(start: nil, stop: nil)
234
278
  @start = start
235
279
  @stop = stop
236
280
  end
237
281
 
282
+ def sort_key
283
+ @start&.sort_key || @stop&.sort_key
284
+ end
285
+
238
286
  # Base value as hyphen-joined string. Used for comparison/deduping.
239
287
  def base_value
240
288
  "#{@start&.base_value}-#{@stop&.base_value}"
@@ -250,12 +298,34 @@ module Stanford
250
298
  @start&.encoding || @stop&.encoding
251
299
  end
252
300
 
301
+ # If either date in the range is qualified in any way
302
+ def qualified?
303
+ @start&.qualified? || @stop&.qualified?
304
+ end
305
+
306
+ # If either date in the range is a key date
307
+ def key_date?
308
+ @start&.key_date? || @stop&.key_date?
309
+ end
310
+
311
+ # If either date in the range was successfully parsed
312
+ def parsed_date?
313
+ @start&.parsed_date? || @stop&.parsed_date?
314
+ end
315
+
316
+ def decoded_value(**kwargs)
317
+ [
318
+ @start&.decoded_value(**kwargs),
319
+ @stop&.decoded_value(**kwargs)
320
+ ].uniq.join(' - ')
321
+ end
322
+
253
323
  # Decoded dates with "BCE" or "CE" and qualifier markers applied to
254
324
  # the entire range, or individually if dates differ.
255
325
  def qualified_value
256
326
  if @start&.qualifier == @stop&.qualifier
257
327
  qualifier = @start&.qualifier || @stop&.qualifier
258
- date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
328
+ date = decoded_value
259
329
  return "[ca. #{date}]" if qualifier == 'approximate'
260
330
  return "[#{date}?]" if qualifier == 'questionable'
261
331
  return "[#{date}]" if qualifier == 'inferred'
@@ -270,15 +340,19 @@ module Stanford
270
340
  def parse_dates(elements)
271
341
  # convert to DateValue objects and keep only valid ones
272
342
  dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
343
+
273
344
  # join any date ranges into DateRange objects
274
- point, nonpoint = dates.partition(&:point)
275
- if point.any?
276
- range = DateRange.new(start: point.find { |date| date.point == 'start' },
277
- stop: point.find { |date| date.point == 'end' })
278
- nonpoint.unshift(range)
345
+ point_dates, dates = dates.partition(&:point)
346
+ if point_dates.any?
347
+ range = DateRange.new(start: point_dates.find { |date| date.point == 'start' },
348
+ stop: point_dates.find { |date| date.point == 'end' })
349
+ dates.unshift(range)
350
+ else
351
+ dates
279
352
  end
280
- dates = nonpoint
353
+ end
281
354
 
355
+ def unique_dates_for_display(dates)
282
356
  # ensure dates are unique with respect to their base values
283
357
  dates = dates.group_by(&:base_value).map do |_value, group|
284
358
  next group.first if group.one?
@@ -304,10 +378,7 @@ module Stanford
304
378
  date_ranges.select { |r| r.base_values.include?(date.base_value) }
305
379
  end
306
380
 
307
- dates = dates - duplicated_ranges
308
-
309
- # output formatted dates with qualifiers, CE/BCE, etc.
310
- dates.map(&:qualified_value)
381
+ dates - duplicated_ranges
311
382
  end
312
383
  end
313
384
  end
@@ -1,6 +1,6 @@
1
1
  module Stanford
2
2
  module Mods
3
3
  # this is the Ruby Gem version
4
- VERSION = '3.2.0'.freeze
4
+ VERSION = '3.3.0'.freeze
5
5
  end
6
6
  end
data/lib/stanford-mods.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  require 'active_support'
2
2
  require 'active_support/core_ext/integer/inflections'
3
3
  require 'mods'
4
- require 'stanford-mods/date_parsing'
5
4
  require 'stanford-mods/coordinate'
6
5
  require 'stanford-mods/imprint'
7
6
  require 'stanford-mods/vocabularies/searchworks_languages'
@@ -18,4 +17,4 @@ require 'stanford-mods/version'
18
17
  module Stanford
19
18
  module Mods
20
19
  end
21
- end
20
+ end