cocina_display 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,688 @@
1
+ require "edtf"
2
+
3
+ require "active_support"
4
+ require "active_support/core_ext/object/blank"
5
+ require "active_support/core_ext/integer/inflections"
6
+
7
+ module CocinaDisplay
8
+ module Dates
9
+ # A date to be converted to a Date object.
10
+ class Date
11
+ # List of values that we shouldn't even attempt to parse.
12
+ UNPARSABLE_VALUES = ["0000-00-00", "9999", "uuuu", "[uuuu]"].freeze
13
+
14
+ # Construct a Date from parsed Cocina data.
15
+ # @param cocina [Hash] Cocina date data
16
+ # @return [CocinaDisplay::Date]
17
+ def self.from_cocina(cocina)
18
+ # Create a DateRange instead if structuredValue(s) are present
19
+ return DateRange.from_cocina(cocina) if cocina["structuredValue"].present?
20
+
21
+ # If an encoding was declared, use it. Cocina validates this
22
+ case cocina.dig("encoding", "code")
23
+ when "w3cdtf"
24
+ W3cdtfFormat.new(cocina)
25
+ when "iso8601"
26
+ Iso8601Format.new(cocina)
27
+ when "marc"
28
+ MarcFormat.new(cocina)
29
+ when "edtf"
30
+ EdtfFormat.new(cocina)
31
+ else # No declared encoding, or unknown encoding
32
+ value = cocina["value"]
33
+
34
+ # Don't bother with weird unparseable values
35
+ date_class = UnparseableDate if value =~ /\p{Hebrew}/ || value =~ /^-/
36
+
37
+ # Try to match against known date formats using their regexes
38
+ # Order matters here; more specific formats should be checked first
39
+ date_class ||= [
40
+ MMDDYYYYFormat,
41
+ MMDDYYFormat,
42
+ YearRangeFormat,
43
+ DecadeAsYearDashFormat,
44
+ DecadeStringFormat,
45
+ EmbeddedBCYearFormat,
46
+ EmbeddedYearFormat,
47
+ EmbeddedThreeDigitYearFormat,
48
+ EmbeddedYearWithBracketsFormat,
49
+ MysteryCenturyFormat,
50
+ CenturyFormat,
51
+ RomanNumeralCenturyFormat,
52
+ RomanNumeralYearFormat,
53
+ OneOrTwoDigitYearFormat
54
+ ].find { |klass| klass.supports?(value) }
55
+
56
+ # If no specific format matched, use the base class
57
+ date_class ||= CocinaDisplay::Dates::Date
58
+
59
+ date_class.new(cocina)
60
+ end
61
+ end
62
+
63
+ # Parse a string to a Date object according to the given encoding.
64
+ # Delegates to the parser subclass {normalize_to_edtf} method.
65
+ # @param value [String] the date value to parse
66
+ # @return [Date]
67
+ # @return [nil] if the date is blank or invalid
68
+ def self.parse_date(value)
69
+ ::Date.edtf(normalize_to_edtf(value))
70
+ end
71
+
72
+ # Apply any encoding-specific munging or text extraction logic.
73
+ # @note This is the "fallback" version when no other parser matches.
74
+ # @param value [String] the date value to modify
75
+ # @return [String]
76
+ def self.normalize_to_edtf(value)
77
+ sanitized = value.gsub(/^[\[]+/, "").gsub(/[\.\]]+$/, "")
78
+ sanitized = value.rjust(4, "0") if /^\d{3}$/.match?(value)
79
+
80
+ sanitized
81
+ end
82
+
83
+ attr_reader :cocina, :date
84
+
85
+ def initialize(cocina)
86
+ @cocina = cocina
87
+ @date = self.class.parse_date(cocina["value"])
88
+ end
89
+
90
+ # Compare this date to another {Date} or {DateRange} using its {sort_key}.
91
+ def <=>(other)
92
+ sort_key <=> other.sort_key if other.is_a?(Date) || other.is_a?(DateRange)
93
+ end
94
+
95
+ # The text representation of the date, as stored in Cocina.
96
+ # @return [String]
97
+ def value
98
+ cocina["value"]
99
+ end
100
+
101
+ # The type of this date, if any, such as "creation", "publication", etc.
102
+ # @return [String, nil]
103
+ def type
104
+ cocina["type"]
105
+ end
106
+
107
+ # The qualifier for this date, if any, such as "approximate", "inferred", etc.
108
+ # @return [String, nil]
109
+ def qualifier
110
+ cocina["qualifier"]
111
+ end
112
+
113
+ # Does this date have a qualifier? E.g. "approximate", "inferred", etc.
114
+ # @return [Boolean]
115
+ def qualified?
116
+ qualifier.present?
117
+ end
118
+
119
+ # The encoding of this date, if specified.
120
+ # @example
121
+ # date.encoding #=> "iso8601"
122
+ # @return [String, nil]
123
+ def encoding
124
+ cocina.dig("encoding", "code")
125
+ end
126
+
127
+ # Was an encoding declared for this date?
128
+ # @return [Boolean]
129
+ def encoding?
130
+ encoding.present?
131
+ end
132
+
133
+ # Is this the start date in a range?
134
+ # @return [Boolean]
135
+ def start?
136
+ type == "start"
137
+ end
138
+
139
+ # Is this the end date in a range?
140
+ # @return [Boolean]
141
+ def end?
142
+ type == "end"
143
+ end
144
+
145
+ # Was the date marked as approximate?
146
+ # @return [Boolean]
147
+ def approximate?
148
+ qualifier == "approximate"
149
+ end
150
+
151
+ # Was the date marked as inferred?
152
+ # @return [Boolean]
153
+ def inferred?
154
+ qualifier == "inferred"
155
+ end
156
+
157
+ # Was the date marked as approximate?
158
+ # @return [Boolean]
159
+ def questionable?
160
+ qualifier == "questionable"
161
+ end
162
+
163
+ # Was the date marked as primary?
164
+ # @note In MODS XML, this corresponds to the +keyDate+ attribute.
165
+ # @return [Boolean]
166
+ def primary?
167
+ cocina["status"] == "primary"
168
+ end
169
+
170
+ # Is the value present and not a known unparsable value like "9999"?
171
+ # @return [Boolean]
172
+ def parsable?
173
+ value.present? && !UNPARSABLE_VALUES.include?(value)
174
+ end
175
+
176
+ # Did we successfully parse a date from the Cocina data?
177
+ # @return [Boolean]
178
+ def parsed_date?
179
+ date.present?
180
+ end
181
+
182
+ # How precise is the parsed date information?
183
+ # @return [Symbol] :year, :month, :day, :decade, :century, or :unknown
184
+ def precision
185
+ return :unknown unless date_range || date
186
+
187
+ if date_range.is_a? EDTF::Century
188
+ :century
189
+ elsif date_range.is_a? EDTF::Decade
190
+ :decade
191
+ elsif date.is_a? EDTF::Season
192
+ :month
193
+ elsif date.is_a? EDTF::Interval
194
+ date.precision
195
+ else
196
+ case date.precision
197
+ when :month
198
+ date.unspecified.unspecified?(:month) ? :year : :month
199
+ when :day
200
+ d = date.unspecified.unspecified?(:day) ? :month : :day
201
+ date.unspecified.unspecified?(:month) ? :year : d
202
+ else
203
+ date.precision
204
+ end
205
+ end
206
+ end
207
+
208
+ # Used to sort BCE dates correctly in lexicographic order.
209
+ BCE_CHAR_SORT_MAP = {"0" => "9", "1" => "8", "2" => "7", "3" => "6", "4" => "5", "5" => "4", "6" => "3", "7" => "2", "8" => "1", "9" => "0"}.freeze
210
+
211
+ # Key used to sort this date. Respects BCE/CE ordering and precision.
212
+ # @return [String]
213
+ def sort_key
214
+ # Even if not parsed, we might need to sort it for display later
215
+ return "" unless parsed_date?
216
+
217
+ # Use the start of an interval for sorting
218
+ sort_date = date.is_a?(EDTF::Interval) ? date.from : date
219
+
220
+ # Get the parsed year, month, and day values
221
+ year, month, day = if sort_date.respond_to?(:values)
222
+ sort_date.values
223
+ else
224
+ [sort_date.year, nil, nil]
225
+ end
226
+
227
+ # Format year into sortable string
228
+ year_str = if year > 0
229
+ # for CE dates, we can just pad them out to 4 digits and sort normally...
230
+ year.to_s.rjust(4, "0")
231
+ else
232
+ # ... but for BCE, because we're sorting lexically, we need to invert the digits (replacing 0 with 9, 1 with 8, etc.),
233
+ # we prefix it with a hyphen (which will sort before any digit) and the number of digits (also inverted) to get
234
+ # it to sort correctly.
235
+ inverted_year = year.abs.to_s.chars.map { |c| BCE_CHAR_SORT_MAP[c] }.join
236
+ length_prefix = BCE_CHAR_SORT_MAP[inverted_year.to_s.length.to_s]
237
+ "-#{length_prefix}#{inverted_year}"
238
+ end
239
+
240
+ # Format month and day into sortable strings, pad to 2 digits
241
+ month_str = month ? month.to_s.rjust(2, "0") : "00"
242
+ day_str = day ? day.to_s.rjust(2, "0") : "00"
243
+
244
+ # Join into a sortable string; add hyphens so decade/century sort first
245
+ case precision
246
+ when :decade
247
+ [year_str[0...-1], "-", month_str, day_str].join
248
+ when :century
249
+ [year_str[0...-2], "--", month_str, day_str].join
250
+ else
251
+ [year_str, month_str, day_str].join
252
+ end
253
+ end
254
+
255
+ # Value reduced to digits and hyphen. Used for comparison/deduping.
256
+ # @note This is important for uniqueness checks in Imprint display.
257
+ # @return [String]
258
+ def base_value
259
+ if value =~ /^\[?1\d{3}-\d{2}\??\]?$/
260
+ return value.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
261
+ end
262
+
263
+ value.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{Regexp.last_match(1)}#{"0" * Regexp.last_match(2).length}" }.scan(/[\d-]/).join
264
+ end
265
+
266
+ # Decoded version of the date with "BCE" or "CE". Strips leading zeroes.
267
+ # @param allowed_precisions [Array<Symbol>] List of allowed precisions for the output.
268
+ # Defaults to [:day, :month, :year, :decade, :century].
269
+ # @param ignore_unparseable [Boolean] Return nil instead of the original value if it couldn't be parsed
270
+ # @param display_original_value [Boolean] Return the original value if it was not encoded
271
+ # @return [String]
272
+ def decoded_value(allowed_precisions: [:day, :month, :year, :decade, :century], ignore_unparseable: false, display_original_value: true)
273
+ return if ignore_unparseable && !parsed_date?
274
+ return value.strip unless parsed_date?
275
+
276
+ if display_original_value
277
+ unless encoding?
278
+ return value.strip unless value =~ /^-?\d+$/ || value =~ /^[\dXxu?-]{4}$/
279
+ end
280
+ end
281
+
282
+ if date.is_a?(EDTF::Interval)
283
+ range = [
284
+ Date.format_date(date.min, date.min.precision, allowed_precisions),
285
+ Date.format_date(date.max, date.max.precision, allowed_precisions)
286
+ ].uniq.compact
287
+
288
+ return value.strip if range.empty?
289
+
290
+ range.join(" - ")
291
+ else
292
+ Date.format_date(date, precision, allowed_precisions) || value.strip
293
+ end
294
+ end
295
+
296
+ # Decoded date with "BCE" or "CE" and qualifier markers applied.
297
+ # @see decoded_value
298
+ # @see https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
299
+ def qualified_value
300
+ qualified_format = case qualifier
301
+ when "approximate"
302
+ "[ca. %s]"
303
+ when "questionable"
304
+ "[%s?]"
305
+ when "inferred"
306
+ "[%s]"
307
+ else
308
+ "%s"
309
+ end
310
+
311
+ format(qualified_format, decoded_value)
312
+ end
313
+
314
+ # Range between earliest possible date and latest possible date.
315
+ # @note Some encodings support disjoint sets of ranges, so this method could be less accurate than {#to_a}.
316
+ # @return [Range]
317
+ def as_range
318
+ return unless earliest_date && latest_date
319
+
320
+ earliest_date..latest_date
321
+ end
322
+
323
+ # Array of all dates that fall into the range of possible dates in the data.
324
+ # @note Some encodings support disjoint sets of ranges, so this method could be more accurate than {#as_range}.
325
+ # @return [Array]
326
+ def to_a
327
+ case date
328
+ when EDTF::Set
329
+ date.to_a
330
+ else
331
+ as_range.to_a
332
+ end
333
+ end
334
+
335
+ private
336
+
337
+ class << self
338
+ # Returns the date in the format specified by the precision.
339
+ # Supports e.g. retrieving year precision when the actual date is more precise.
340
+ # @param date [Date] The date to format.
341
+ # @param precision [Symbol] The precision to format the date at, e.g. :month
342
+ # @param allowed_precisions [Array<Symbol>] List of allowed precisions for the output.
343
+ # Options are [:day, :month, :year, :decade, :century].
344
+ # @note allowed_precisions should be ordered by granularity, with most specific first.
345
+ def format_date(date, precision, allowed_precisions)
346
+ precision = allowed_precisions.first unless allowed_precisions.include?(precision)
347
+
348
+ case precision
349
+ when :day
350
+ date.strftime("%B %e, %Y")
351
+ when :month
352
+ date.strftime("%B %Y")
353
+ when :year
354
+ year = date.year
355
+ if year < 1
356
+ "#{year.abs + 1} BCE"
357
+ # Any dates before the year 1000 are explicitly marked CE
358
+ elsif year >= 1 && year < 1000
359
+ "#{year} CE"
360
+ else
361
+ year.to_s
362
+ end
363
+ when :decade
364
+ "#{EDTF::Decade.new(date.year).year}s"
365
+ when :century
366
+ if date.year.negative?
367
+ "#{((date.year / 100).abs + 1).ordinalize} century BCE"
368
+ else
369
+ "#{((date.year / 100) + 1).ordinalize} century"
370
+ end
371
+ end
372
+ end
373
+ end
374
+
375
+ # Earliest possible date encoded in data, respecting unspecified/imprecise info.
376
+ # @return [Date]
377
+ def earliest_date
378
+ return nil if date.nil?
379
+
380
+ case date_range
381
+ when EDTF::Unknown
382
+ nil
383
+ when EDTF::Epoch, EDTF::Interval, EDTF::Season
384
+ date_range.min
385
+ when EDTF::Set
386
+ date_range.to_a.first
387
+ else
388
+ d = date.dup
389
+ d = d.change(month: 1, day: 1) if date.precision == :year
390
+ d = d.change(day: 1) if date.precision == :month
391
+ d = d.change(month: 1) if date.unspecified.unspecified? :month
392
+ d = d.change(day: 1) if date.unspecified.unspecified? :day
393
+ d
394
+ end
395
+ end
396
+
397
+ # Latest possible date encoded in data, respecting unspecified/imprecise info.
398
+ # @return [Date]
399
+ def latest_date
400
+ return nil if date.nil?
401
+
402
+ case date_range
403
+ when EDTF::Unknown
404
+ nil
405
+ when EDTF::Epoch, EDTF::Interval, EDTF::Season
406
+ date_range.max
407
+ when EDTF::Set
408
+ date_range.to_a.last.change(month: 12, day: 31)
409
+ else
410
+ d = date.dup
411
+ d = d.change(month: 12, day: 31) if date.precision == :year
412
+ d = d.change(day: days_in_month(date.month, date.year)) if date.precision == :month
413
+ d = d.change(month: 12) if date.unspecified.unspecified? :month
414
+ d = d.change(day: days_in_month(date.month, date.year)) if date.unspecified.unspecified? :day
415
+ d
416
+ end
417
+ end
418
+
419
+ # Expand placeholders like "19XX" into an object representing the full range.
420
+ # @note This is different from dates with an explicit start/end in the Cocina.
421
+ # @see CocinaDisplay::Dates::DateRange
422
+ # @return [Date]
423
+ def date_range
424
+ @date_range ||= if /u/.match?(value)
425
+ ::Date.edtf(value.tr("u", "x").tr("X", "x")) || date
426
+ else
427
+ date
428
+ end
429
+ end
430
+
431
+ # Helper for calculating days in a month, accounting for leap years.
432
+ # @param [Integer] month
433
+ # @param [Integer] year
434
+ # @return [Integer] Number of days in the month
435
+ def days_in_month(month, year)
436
+ if month == 2 && ::Date.gregorian_leap?(year)
437
+ 29
438
+ else
439
+ [nil, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31][month]
440
+ end
441
+ end
442
+ end
443
+
444
+ # Strict ISO8601-encoded date parser.
445
+ class Iso8601Format < Date
446
+ def self.parse_date(value)
447
+ ::Date.parse(normalize_to_edtf(value))
448
+ end
449
+ end
450
+
451
+ # Less strict W3CDTF-encoded date parser.
452
+ class W3cdtfFormat < Date
453
+ def self.normalize_to_edtf(value)
454
+ super.gsub("-00", "")
455
+ end
456
+ end
457
+
458
+ # Strict EDTF parser.
459
+ class EdtfFormat < Date
460
+ attr_reader :date
461
+
462
+ def self.normalize_to_edtf(value)
463
+ return "0000" if value.strip == "0"
464
+
465
+ case value
466
+ when /^\d{1,3}$/
467
+ value.rjust(4, "0") if /^\d{1,3}$/.match?(value)
468
+ when /^-\d{1,3}$/
469
+ "-#{value.sub(/^-/, "").rjust(4, "0")}"
470
+ else
471
+ value
472
+ end
473
+ end
474
+ end
475
+
476
+ # MARC date parser; similar to EDTF but with some MARC-specific encodings.
477
+ class MarcFormat < Date
478
+ def self.normalize_to_edtf(value)
479
+ return nil if value == "9999" || value == "uuuu" || value == "||||"
480
+
481
+ super
482
+ end
483
+
484
+ private
485
+
486
+ def earliest_date
487
+ if value == "1uuu"
488
+ ::Date.parse("1000-01-01")
489
+ else
490
+ super
491
+ end
492
+ end
493
+
494
+ def latest_date
495
+ if value == "1uuu"
496
+ ::Date.parse("1999-12-31")
497
+ else
498
+ super
499
+ end
500
+ end
501
+ end
502
+
503
+ # Base class for date formats that match using a regex.
504
+ class ExtractorDateFormat < Date
505
+ def self.supports?(value)
506
+ value.match self::REGEX
507
+ end
508
+ end
509
+
510
+ # A date format that cannot be parsed or recognized.
511
+ class UnparseableDate < ExtractorDateFormat
512
+ def self.parse_date(value)
513
+ nil
514
+ end
515
+ end
516
+
517
+ # Extractor for MM/DD/YYYY and MM/DD/YYY-formatted dates
518
+ class MMDDYYYYFormat < ExtractorDateFormat
519
+ REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{3,4})/
520
+
521
+ def self.normalize_to_edtf(value)
522
+ matches = value.match(self::REGEX)
523
+ "#{matches[:year].rjust(4, "0")}-#{matches[:month].rjust(2, "0")}-#{matches[:day].rjust(2, "0")}"
524
+ end
525
+ end
526
+
527
+ # Extractor for MM/DD/YY-formatted dates
528
+ class MMDDYYFormat < ExtractorDateFormat
529
+ REGEX = /(?<month>\d{1,2})\/(?<day>\d{1,2})\/(?<year>\d{2})/
530
+
531
+ def self.normalize_to_edtf(value)
532
+ matches = value.match(self::REGEX)
533
+ year = munge_to_yyyy(matches[:year])
534
+ "#{year}-#{matches[:month].rjust(2, "0")}-#{matches[:day].rjust(2, "0")}"
535
+ end
536
+
537
+ # For two-digit year, if it would be in the future, more likely to just
538
+ # be the previous century. 12/1/99 -> 1999
539
+ def self.munge_to_yyyy(year)
540
+ if year.to_i > (::Date.current.year - 2000)
541
+ "19#{year}"
542
+ else
543
+ "20#{year}"
544
+ end
545
+ end
546
+ end
547
+
548
+ # Extractor for dates encoded as Roman numerals.
549
+ class RomanNumeralYearFormat < ExtractorDateFormat
550
+ REGEX = /(?<![A-Za-z\.])(?<year>[MCDLXVI\.]+)(?![A-Za-z])/
551
+
552
+ def self.normalize_to_edtf(text)
553
+ matches = text.match(REGEX)
554
+ roman_to_int(matches[:year].upcase).to_s
555
+ end
556
+
557
+ def self.roman_to_int(value)
558
+ value = value.tr(".", "")
559
+ map = {"M" => 1000, "CM" => 900, "D" => 500, "CD" => 400, "C" => 100, "XC" => 90, "L" => 50, "XL" => 40, "X" => 10, "IX" => 9, "V" => 5, "IV" => 4, "I" => 1}
560
+ result = 0
561
+ map.each do |k, v|
562
+ while value.index(k) == 0
563
+ result += v
564
+ value.slice! k
565
+ end
566
+ end
567
+ result
568
+ end
569
+ end
570
+
571
+ # Extractor for centuries encoded as Roman numerals; sometimes centuries
572
+ # are given as e.g. xvith, hence the funny negative look-ahead assertion
573
+ class RomanNumeralCenturyFormat < RomanNumeralYearFormat
574
+ REGEX = /(?<![a-z])(?<century>[xvi]+)(?![a-su-z])/
575
+
576
+ def self.normalize_to_edtf(text)
577
+ matches = text.match(REGEX)
578
+ munge_to_yyyy(matches[:century])
579
+ end
580
+
581
+ def self.munge_to_yyyy(text)
582
+ value = roman_to_int(text.upcase)
583
+ (value - 1).to_s.rjust(2, "0") + "xx"
584
+ end
585
+ end
586
+
587
+ # Extractor for a flavor of century encoding present in Stanford data
588
+ # of unknown origin.
589
+ class MysteryCenturyFormat < ExtractorDateFormat
590
+ REGEX = /(?<century>\d{2})--/
591
+ def self.normalize_to_edtf(text)
592
+ matches = text.match(REGEX)
593
+ "#{matches[:century]}xx"
594
+ end
595
+ end
596
+
597
+ # Extractor for dates given as centuries
598
+ class CenturyFormat < ExtractorDateFormat
599
+ REGEX = /(?<century>\d{2})th C(?:entury)?/i
600
+
601
+ def self.normalize_to_edtf(text)
602
+ matches = text.match(REGEX)
603
+ "#{matches[:century].to_i - 1}xx"
604
+ end
605
+ end
606
+
607
+ # Extractor for data formatted as YYYY-YYYY or YYY-YYY
608
+ class YearRangeFormat < ExtractorDateFormat
609
+ REGEX = /(?<start>\d{3,4})-(?<end>\d{3,4})/
610
+
611
+ def self.normalize_to_edtf(text)
612
+ matches = text.match(REGEX)
613
+ "#{matches[:start].rjust(4, "0")}/#{matches[:end].rjust(4, "0")}"
614
+ end
615
+ end
616
+
617
+ # Extractor for data formatted as YYY-
618
+ class DecadeAsYearDashFormat < ExtractorDateFormat
619
+ REGEX = /(?<!\d)(?<year>\d{3})[-_xu?](?!\d)/
620
+
621
+ def self.normalize_to_edtf(text)
622
+ matches = text.match(REGEX)
623
+ "#{matches[:year]}x"
624
+ end
625
+ end
626
+
627
+ # Extractor for data formatted as YYY0s
628
+ class DecadeStringFormat < ExtractorDateFormat
629
+ REGEX = /(?<!\d)(?<year>\d{3})0s(?!\d)/
630
+
631
+ def self.normalize_to_edtf(text)
632
+ matches = text.match(REGEX)
633
+ "#{matches[:year]}x"
634
+ end
635
+ end
636
+
637
+ # Extractor that tries hard to pick any BC year present in the data
638
+ class EmbeddedBCYearFormat < ExtractorDateFormat
639
+ REGEX = /(?<year>\d{3,4})\s?B\.?C\.?/i
640
+
641
+ def self.normalize_to_edtf(text)
642
+ matches = text.match(REGEX)
643
+ "-#{(matches[:year].to_i - 1).to_s.rjust(4, "0")}"
644
+ end
645
+ end
646
+
647
+ # Extractor that tries hard to pick any year present in the data
648
+ class EmbeddedYearFormat < ExtractorDateFormat
649
+ REGEX = /(?<!\d)(?<year>\d{4})(?!\d)/
650
+
651
+ def self.normalize_to_edtf(text)
652
+ matches = text.match(REGEX)
653
+ matches[:year].rjust(4, "0")
654
+ end
655
+ end
656
+
657
+ # Extractor that tries hard to pick any 3-digit year present in the data
658
+ class EmbeddedThreeDigitYearFormat < ExtractorDateFormat
659
+ REGEX = /(?<!\d)(?<year>\d{3})(?!\d)(?!\d)/
660
+
661
+ def self.normalize_to_edtf(text)
662
+ matches = text.match(REGEX)
663
+ matches[:year].rjust(4, "0")
664
+ end
665
+ end
666
+
667
+ # Extractor that tries hard to pick any 1- or 2-digit year present in the data
668
+ class OneOrTwoDigitYearFormat < ExtractorDateFormat
669
+ REGEX = /^(?<year>\d{1,2})$/
670
+
671
+ def self.normalize_to_edtf(text)
672
+ matches = text.match(REGEX)
673
+ matches[:year].rjust(4, "0")
674
+ end
675
+ end
676
+
677
+ # Full-text extractor that tries hard to pick any bracketed year present in the data
678
+ class EmbeddedYearWithBracketsFormat < ExtractorDateFormat
679
+ # [YYY]Y Y[YYY] [YY]YY Y[YY]Y YY[YY] YYY[Y] YY[Y]Y Y[Y]YY [Y]YYY
680
+ REGEX = /(?<year>[\d\[\]]{6})(?!\d)/
681
+
682
+ def self.normalize_to_edtf(text)
683
+ matches = text.match(REGEX)
684
+ matches[:year].delete("[").delete("]")
685
+ end
686
+ end
687
+ end
688
+ end