stanford-mods 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/stanford-mods/concerns/origin_info.rb +51 -56
- data/lib/stanford-mods/imprint.rb +116 -45
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods.rb +1 -2
- data/spec/origin_info_spec.rb +560 -208
- data/spec/sw_publication_spec.rb +0 -24
- metadata +2 -7
- data/lib/stanford-mods/date_parsing.rb +0 -70
- data/spec/fixtures/searchworks_pub_date_data.rb +0 -979
- data/spec/fixtures/spotlight_pub_date_data.rb +0 -316
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9c63c6699eef72da80bbf9e15d892248dfc21a7550588aac09843ef26d1869f
|
4
|
+
data.tar.gz: f820451452017ec653c4eabd48d19cb23a36fcfafbad814272dd26c9cc9b9d42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0817296bebb438d882509919ff0b6fb2ec0e1accd71b379209f3b3b5f4cbd684d8208d8aab7ff3ce5f745860c4331af9098afadb5d066bd406016625c63d3701'
|
7
|
+
data.tar.gz: 4ed8b47a3186c3f241d2b543358138c71bea5f7128fc483e1d78117af1224b75db1fb4524763c726b011f414524b7d732a54096dd07d38a909c13a05116cc3ca
|
@@ -16,14 +16,22 @@ module Stanford
|
|
16
16
|
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute) should be ignored; false if approximate dates should be included
|
17
17
|
# @return [Integer] publication year as an Integer
|
18
18
|
# @note for sorting: 5 BCE => -5; 666 BCE => -666
|
19
|
-
def pub_year_int(
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
19
|
+
def pub_year_int(ignore_approximate: false)
|
20
|
+
date = earliest_preferred_date(ignore_approximate: ignore_approximate)
|
21
|
+
|
22
|
+
return unless date
|
23
|
+
|
24
|
+
if date.is_a? Stanford::Mods::Imprint::DateRange
|
25
|
+
date = date.start || date.stop
|
26
|
+
end
|
27
|
+
|
28
|
+
edtf_date = date.date
|
29
|
+
|
30
|
+
if edtf_date.is_a?(EDTF::Interval)
|
31
|
+
edtf_date.from.year
|
32
|
+
else
|
33
|
+
edtf_date.year
|
34
|
+
end
|
27
35
|
end
|
28
36
|
|
29
37
|
# return a single string intended for lexical sorting for pub date
|
@@ -33,37 +41,17 @@ module Stanford
|
|
33
41
|
# @return [String] single String containing publication year for lexical sorting
|
34
42
|
# @note for string sorting 5 BCE = -5 => -995; 6 BCE => -994, so 6 BCE sorts before 5 BCE
|
35
43
|
# @deprecated use pub_year_int
|
36
|
-
def pub_year_sort_str(
|
37
|
-
|
38
|
-
values = mods_ng_xml.origin_info.send(date_key)
|
39
|
-
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
40
|
-
|
41
|
-
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
42
|
-
return earliest_date.sortable_year_string_from_date_str if earliest_date&.sortable_year_string_from_date_str
|
43
|
-
end; nil
|
44
|
+
def pub_year_sort_str(ignore_approximate: false)
|
45
|
+
earliest_preferred_date(ignore_approximate: ignore_approximate)&.sort_key
|
44
46
|
end
|
45
47
|
|
46
|
-
# return a single string intended for display of pub year
|
47
|
-
#
|
48
|
-
#
|
49
|
-
# 195u => 195x
|
50
|
-
# 19uu => 19xx
|
51
|
-
# '-5' => '5 BCE'
|
52
|
-
# '700 BCE' => '700 BCE'
|
53
|
-
# '7th century' => '7th century'
|
54
|
-
# date ranges?
|
55
|
-
# prefer dateIssued (any) before dateCreated (any) before dateCaptured (any)
|
56
|
-
# look for a keyDate and use it if there is one; otherwise pick earliest date
|
48
|
+
# return a single string intended for display of pub year (or year range)
|
49
|
+
#
|
50
|
+
# @param [Array<Symbol>] fields array of field types to use to look for dates.
|
57
51
|
# @param [Boolean] ignore_approximate true if approximate dates (per qualifier attribute)
|
58
52
|
# should be ignored; false if approximate dates should be included
|
59
|
-
def pub_year_display_str(
|
60
|
-
|
61
|
-
values = mods_ng_xml.origin_info.send(date_key)
|
62
|
-
values = values.reject(&method(:is_approximate)) if ignore_approximate
|
63
|
-
|
64
|
-
earliest_date = Stanford::Mods::OriginInfo.best_or_earliest_year(values)
|
65
|
-
return earliest_date.date_str_for_display if earliest_date&.date_str_for_display
|
66
|
-
end; nil
|
53
|
+
def pub_year_display_str(ignore_approximate: false)
|
54
|
+
earliest_preferred_date(ignore_approximate: ignore_approximate)&.decoded_value(allowed_precisions: [:year, :decade, :century], ignore_unparseable: true, display_original_text: false)
|
67
55
|
end
|
68
56
|
|
69
57
|
# @return [Array<Stanford::Mods::Imprint>] array of imprint objects
|
@@ -81,32 +69,39 @@ module Stanford
|
|
81
69
|
imprints.map(&:display_str).reject(&:empty?).join('; ')
|
82
70
|
end
|
83
71
|
|
84
|
-
|
85
|
-
|
86
|
-
#
|
87
|
-
#
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
72
|
+
private
|
73
|
+
|
74
|
+
# The rules for which date to pick is a little complicated:
|
75
|
+
#
|
76
|
+
# 1) Examine the date elements of the provided fields.
|
77
|
+
# 2) Discard any we can't parse a year out of.
|
78
|
+
# 3) (if ignore_approximate is true, used only by exhibits for Feigenbaum), throw out any qualified dates (or ranges if either the start or end is qualified)
|
79
|
+
# 4) If that set of date elements has elements with a keyDate, prefer those.
|
80
|
+
# 5) If there were encoded dates, prefer those.
|
81
|
+
# 6) Choose the earliest date (or starting date of a range).
|
82
|
+
#
|
83
|
+
# Finally, format the date or range of an encoded date, or just pluck out the year from an unencoded one.
|
84
|
+
def earliest_preferred_date(fields: [:dateIssued, :dateCreated, :dateCaptured], ignore_approximate: false)
|
85
|
+
local_imprints = imprints
|
86
|
+
|
87
|
+
fields.each do |field_name|
|
88
|
+
potential_dates = local_imprints.flat_map do |imprint|
|
89
|
+
dates = imprint.dates([field_name])
|
90
|
+
dates = dates.select(&:parsed_date?)
|
91
|
+
dates = dates.reject(&:qualified?) if ignore_approximate
|
92
|
+
|
93
|
+
dates
|
94
|
+
end
|
92
95
|
|
93
|
-
|
94
|
-
|
95
|
-
# @return [Stanford::Mods::DateParsing]
|
96
|
-
def self.best_or_earliest_year(date_el_array)
|
97
|
-
key_dates, other_dates = date_el_array.partition { |node| node['keyDate'] == 'yes' }
|
96
|
+
preferred_dates = potential_dates.select(&:key_date?).presence || potential_dates
|
97
|
+
best_dates = (preferred_dates.select { |x| x.encoding.present? }.presence || preferred_dates)
|
98
98
|
|
99
|
-
|
100
|
-
sortable_dates = other_dates.map { |x| DateParsing.new(x) }.select(&:sortable_year_string_from_date_str) if sortable_dates.empty?
|
101
|
-
results = {}
|
99
|
+
earliest_date = best_dates.min_by(&:sort_key)
|
102
100
|
|
103
|
-
|
104
|
-
# dates with the same sort key, we want to make sure we get the last occurring one?
|
105
|
-
sortable_dates.each do |v|
|
106
|
-
results[v.sortable_year_string_from_date_str] = v
|
101
|
+
return earliest_date if earliest_date
|
107
102
|
end
|
108
103
|
|
109
|
-
|
104
|
+
nil
|
110
105
|
end
|
111
106
|
end # class Record
|
112
107
|
end
|
@@ -35,6 +35,17 @@ module Stanford
|
|
35
35
|
ed_place_pub_dates
|
36
36
|
end
|
37
37
|
|
38
|
+
# array of parsed but unformattted date values, for a given list of
|
39
|
+
# elements to pull data from
|
40
|
+
def dates(date_field_keys = [:dateIssued, :dateCreated, :dateCaptured, :copyrightDate])
|
41
|
+
date_field_keys.map do |date_field|
|
42
|
+
next unless element.respond_to?(date_field)
|
43
|
+
|
44
|
+
date_elements = element.send(date_field)
|
45
|
+
parse_dates(date_elements) if date_elements.present?
|
46
|
+
end.compact.flatten
|
47
|
+
end
|
48
|
+
|
38
49
|
private
|
39
50
|
|
40
51
|
def compact_and_join_with_delimiter(values, delimiter)
|
@@ -117,22 +128,10 @@ module Stanford
|
|
117
128
|
# DATE processing methods ------
|
118
129
|
|
119
130
|
def date_str
|
120
|
-
date_vals =
|
131
|
+
date_vals = unique_dates_for_display(dates).map(&:qualified_value)
|
121
132
|
return if date_vals.empty?
|
122
|
-
date_vals.map(&:strip).join(' ')
|
123
|
-
end
|
124
|
-
|
125
|
-
def origin_info_date_vals
|
126
|
-
date_field_keys.map do |date_field|
|
127
|
-
next unless element.respond_to?(date_field)
|
128
|
-
|
129
|
-
date_elements = element.send(date_field)
|
130
|
-
parse_dates(date_elements) if date_elements.present?
|
131
|
-
end.compact.flatten
|
132
|
-
end
|
133
133
|
|
134
|
-
|
135
|
-
[:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
|
134
|
+
date_vals.map(&:strip).join(' ')
|
136
135
|
end
|
137
136
|
|
138
137
|
class DateValue
|
@@ -148,6 +147,41 @@ module Stanford
|
|
148
147
|
text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
|
149
148
|
end
|
150
149
|
|
150
|
+
def key_date?
|
151
|
+
value.key?
|
152
|
+
end
|
153
|
+
|
154
|
+
def qualified?
|
155
|
+
qualifier.present?
|
156
|
+
end
|
157
|
+
|
158
|
+
def parsed_date?
|
159
|
+
date.present?
|
160
|
+
end
|
161
|
+
|
162
|
+
def sort_key
|
163
|
+
year = if date.is_a?(EDTF::Interval)
|
164
|
+
date.from.year
|
165
|
+
else
|
166
|
+
date.year
|
167
|
+
end
|
168
|
+
|
169
|
+
str = if year < 1
|
170
|
+
(-1 * year - 1000).to_s
|
171
|
+
else
|
172
|
+
year.to_s
|
173
|
+
end
|
174
|
+
|
175
|
+
case value.precision
|
176
|
+
when :decade
|
177
|
+
str[0..2] + "-"
|
178
|
+
when :century
|
179
|
+
str[0..1] + "--"
|
180
|
+
else
|
181
|
+
str.rjust(4, "0")
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
151
185
|
# Element text reduced to digits and hyphen. Captures date ranges and
|
152
186
|
# negative (BCE) dates. Used for comparison/deduping.
|
153
187
|
def base_value
|
@@ -155,36 +189,44 @@ module Stanford
|
|
155
189
|
return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
|
156
190
|
end
|
157
191
|
|
158
|
-
text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{
|
192
|
+
text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{Regexp.last_match(1)}#{'0' * Regexp.last_match(2).length}" }.scan(/[\d-]/).join
|
159
193
|
end
|
160
194
|
|
161
195
|
# Decoded version of the date, if it was encoded. Strips leading zeroes.
|
162
|
-
def decoded_value
|
196
|
+
def decoded_value(allowed_precisions: [:day, :month, :year, :decade, :century], ignore_unparseable: false, display_original_text: true)
|
197
|
+
return if ignore_unparseable && !date
|
163
198
|
return text.strip unless date
|
164
199
|
|
165
|
-
|
166
|
-
|
200
|
+
if display_original_text
|
201
|
+
unless encoding.present?
|
202
|
+
return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
|
203
|
+
end
|
167
204
|
end
|
168
205
|
|
169
206
|
if date.is_a?(EDTF::Interval)
|
170
207
|
if value.precision == :century || value.precision == :decade
|
171
|
-
return format_date(date, value.precision)
|
208
|
+
return format_date(date, value.precision, allowed_precisions)
|
172
209
|
end
|
173
210
|
|
174
211
|
range = [
|
175
|
-
format_date(date.min, date.min.precision),
|
176
|
-
format_date(date.max, date.max.precision)
|
212
|
+
format_date(date.min, date.min.precision, allowed_precisions),
|
213
|
+
format_date(date.max, date.max.precision, allowed_precisions)
|
177
214
|
].uniq.compact
|
178
215
|
|
179
216
|
return text.strip if range.empty?
|
180
217
|
|
181
218
|
range.join(' - ')
|
182
219
|
else
|
183
|
-
format_date(date, value.precision) || text.strip
|
220
|
+
format_date(date, value.precision, allowed_precisions) || text.strip
|
184
221
|
end
|
185
222
|
end
|
186
223
|
|
187
|
-
|
224
|
+
# Returns the date in the format specified by the precision.
|
225
|
+
# Allowed_precisions should be ordered by granularity and supports e.g.
|
226
|
+
# getting a year precision when the actual date is more precise.
|
227
|
+
def format_date(date, precision, allowed_precisions)
|
228
|
+
precision = allowed_precisions.first unless allowed_precisions.include?(precision)
|
229
|
+
|
188
230
|
case precision
|
189
231
|
when :day
|
190
232
|
date.strftime('%B %e, %Y')
|
@@ -200,14 +242,14 @@ module Stanford
|
|
200
242
|
else
|
201
243
|
year.to_s
|
202
244
|
end
|
245
|
+
when :decade
|
246
|
+
"#{date.year}s"
|
203
247
|
when :century
|
204
248
|
if date.year.negative?
|
205
249
|
"#{((date.year / 100).abs + 1).ordinalize} century BCE"
|
206
250
|
else
|
207
251
|
"#{((date.year / 100) + 1).ordinalize} century"
|
208
252
|
end
|
209
|
-
when :decade
|
210
|
-
"#{date.year}s"
|
211
253
|
end
|
212
254
|
end
|
213
255
|
|
@@ -215,26 +257,32 @@ module Stanford
|
|
215
257
|
# https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
|
216
258
|
def qualified_value
|
217
259
|
qualified_format = case qualifier
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
260
|
+
when 'approximate'
|
261
|
+
'[ca. %s]'
|
262
|
+
when 'questionable'
|
263
|
+
'[%s?]'
|
264
|
+
when 'inferred'
|
265
|
+
'[%s]'
|
266
|
+
else
|
267
|
+
'%s'
|
268
|
+
end
|
227
269
|
|
228
270
|
format(qualified_format, decoded_value)
|
229
271
|
end
|
230
272
|
end
|
231
273
|
|
232
274
|
class DateRange
|
275
|
+
attr_reader :start, :stop
|
276
|
+
|
233
277
|
def initialize(start: nil, stop: nil)
|
234
278
|
@start = start
|
235
279
|
@stop = stop
|
236
280
|
end
|
237
281
|
|
282
|
+
def sort_key
|
283
|
+
@start&.sort_key || @stop&.sort_key
|
284
|
+
end
|
285
|
+
|
238
286
|
# Base value as hyphen-joined string. Used for comparison/deduping.
|
239
287
|
def base_value
|
240
288
|
"#{@start&.base_value}-#{@stop&.base_value}"
|
@@ -250,12 +298,34 @@ module Stanford
|
|
250
298
|
@start&.encoding || @stop&.encoding
|
251
299
|
end
|
252
300
|
|
301
|
+
# If either date in the range is qualified in any way
|
302
|
+
def qualified?
|
303
|
+
@start&.qualified? || @stop&.qualified?
|
304
|
+
end
|
305
|
+
|
306
|
+
# If either date in the range is a key date
|
307
|
+
def key_date?
|
308
|
+
@start&.key_date? || @stop&.key_date?
|
309
|
+
end
|
310
|
+
|
311
|
+
# If either date in the range was successfully parsed
|
312
|
+
def parsed_date?
|
313
|
+
@start&.parsed_date? || @stop&.parsed_date?
|
314
|
+
end
|
315
|
+
|
316
|
+
def decoded_value(**kwargs)
|
317
|
+
[
|
318
|
+
@start&.decoded_value(**kwargs),
|
319
|
+
@stop&.decoded_value(**kwargs)
|
320
|
+
].uniq.join(' - ')
|
321
|
+
end
|
322
|
+
|
253
323
|
# Decoded dates with "BCE" or "CE" and qualifier markers applied to
|
254
324
|
# the entire range, or individually if dates differ.
|
255
325
|
def qualified_value
|
256
326
|
if @start&.qualifier == @stop&.qualifier
|
257
327
|
qualifier = @start&.qualifier || @stop&.qualifier
|
258
|
-
date =
|
328
|
+
date = decoded_value
|
259
329
|
return "[ca. #{date}]" if qualifier == 'approximate'
|
260
330
|
return "[#{date}?]" if qualifier == 'questionable'
|
261
331
|
return "[#{date}]" if qualifier == 'inferred'
|
@@ -270,15 +340,19 @@ module Stanford
|
|
270
340
|
def parse_dates(elements)
|
271
341
|
# convert to DateValue objects and keep only valid ones
|
272
342
|
dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
|
343
|
+
|
273
344
|
# join any date ranges into DateRange objects
|
274
|
-
|
275
|
-
if
|
276
|
-
range = DateRange.new(start:
|
277
|
-
stop:
|
278
|
-
|
345
|
+
point_dates, dates = dates.partition(&:point)
|
346
|
+
if point_dates.any?
|
347
|
+
range = DateRange.new(start: point_dates.find { |date| date.point == 'start' },
|
348
|
+
stop: point_dates.find { |date| date.point == 'end' })
|
349
|
+
dates.unshift(range)
|
350
|
+
else
|
351
|
+
dates
|
279
352
|
end
|
280
|
-
|
353
|
+
end
|
281
354
|
|
355
|
+
def unique_dates_for_display(dates)
|
282
356
|
# ensure dates are unique with respect to their base values
|
283
357
|
dates = dates.group_by(&:base_value).map do |_value, group|
|
284
358
|
next group.first if group.one?
|
@@ -304,10 +378,7 @@ module Stanford
|
|
304
378
|
date_ranges.select { |r| r.base_values.include?(date.base_value) }
|
305
379
|
end
|
306
380
|
|
307
|
-
dates
|
308
|
-
|
309
|
-
# output formatted dates with qualifiers, CE/BCE, etc.
|
310
|
-
dates.map(&:qualified_value)
|
381
|
+
dates - duplicated_ranges
|
311
382
|
end
|
312
383
|
end
|
313
384
|
end
|
data/lib/stanford-mods.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'active_support'
|
2
2
|
require 'active_support/core_ext/integer/inflections'
|
3
3
|
require 'mods'
|
4
|
-
require 'stanford-mods/date_parsing'
|
5
4
|
require 'stanford-mods/coordinate'
|
6
5
|
require 'stanford-mods/imprint'
|
7
6
|
require 'stanford-mods/vocabularies/searchworks_languages'
|
@@ -18,4 +17,4 @@ require 'stanford-mods/version'
|
|
18
17
|
module Stanford
|
19
18
|
module Mods
|
20
19
|
end
|
21
|
-
end
|
20
|
+
end
|