parse_date 0.3.3 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42b7b5f7405ee4a79e7eece920b10fe2852a03c996b32da8c2b8edc58a6e3ede
4
- data.tar.gz: eeba668fd1ee75994f743a010dbe37af4ffdcb8ca0c4143ca329c811739c4b28
3
+ metadata.gz: 60c444ed79ea6842f49bfb44d2adc5bd1a6790d483fe43e0bd4a0880001a6005
4
+ data.tar.gz: a62f906a4c908c4b774ef9fe1dae2a047deee79ee6569688d1c2db9796bb3b2e
5
5
  SHA512:
6
- metadata.gz: a22f1e23899efc15e5fd13ab1aac724cddc39ddf9986d0295e2f4360b639483da116ef4540bb35fc0c03cf201c0d29f4e0ac6729339140663ff7622ed8a7a623
7
- data.tar.gz: df518ca44ed63ea2925976fe49a11e28cd4ecc40b66752c7db11934d4bba6fe7ade795aca93402cb0f556816cf076e40006e08294162b8f01c527377a4603e51
6
+ metadata.gz: a949b307fb3259f1a20fa279aa2af395137b514b35b15dbce8c63701047462b06b5b6cb31723f25dbef2c8f1db249cf8e95914d92c113dd4439ed6737267878f
7
+ data.tar.gz: e1e3e154a6bc4851a7aafa199da3ee66e0d4b7d5d27537528cee36d2f0b2f80761fbbffffa1d2a544ca4a204f4fd7816f85a6d0b975824fff06cead4d64cf3f2
@@ -1,4 +1,12 @@
1
1
  ## Why was this change made?
2
2
 
3
3
 
4
- ## Was the documentation (README, API, wiki, ...) updated?
4
+
5
+ ## How was this change tested?
6
+
7
+
8
+
9
+ ## Which documentation and/or configurations were updated?
10
+
11
+
12
+
data/.rubocop.yml CHANGED
@@ -1,7 +1,7 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
 
3
- AllCops:
4
- TargetRubyVersion: 2.6
3
+ Gemspec/RequiredRubyVersion:
4
+ Enabled: false
5
5
 
6
6
  Layout/EmptyLinesAroundClassBody:
7
7
  Enabled: false
@@ -9,15 +9,27 @@ Layout/EmptyLinesAroundClassBody:
9
9
  Layout/EmptyLinesAroundModuleBody:
10
10
  Enabled: false
11
11
 
12
+ Layout/LineLength:
13
+ Exclude:
14
+ - parse_date.gemspec
15
+
16
+ Lint/MixedRegexpCaptureTypes:
17
+ Enabled: false
18
+
12
19
  Metrics/BlockLength:
13
20
  Exclude:
14
21
  - spec/**/*
15
22
 
16
- Metrics/LineLength:
17
- Max: 120
18
-
19
23
  Metrics/MethodLength:
20
- Max: 25
24
+ Max: 15
25
+
26
+ Metrics/ModuleLength:
27
+ Exclude:
28
+ - lib/parse_date/int_from_string.rb
29
+
30
+ Style/Documentation:
31
+ Exclude:
32
+ - lib/parse_date.rb
21
33
 
22
34
  Style/NumericLiterals:
23
35
  Enabled: false
@@ -35,3 +47,64 @@ Style/WordArray:
35
47
 
36
48
  Style/YodaCondition:
37
49
  Enabled: false
50
+
51
+ Gemspec/DateAssignment: # (new in 1.10)
52
+ Enabled: true
53
+ Layout/SpaceBeforeBrackets: # (new in 1.7)
54
+ Enabled: true
55
+ Lint/AmbiguousAssignment: # (new in 1.7)
56
+ Enabled: true
57
+ Lint/DeprecatedConstants: # (new in 1.8)
58
+ Enabled: true
59
+ Lint/DuplicateBranch: # (new in 1.3)
60
+ Enabled: true
61
+ Lint/DuplicateRegexpCharacterClassElement: # (new in 1.1)
62
+ Enabled: true
63
+ Lint/EmptyBlock: # (new in 1.1)
64
+ Enabled: true
65
+ Lint/EmptyClass: # (new in 1.3)
66
+ Enabled: true
67
+ Lint/LambdaWithoutLiteralBlock: # (new in 1.8)
68
+ Enabled: true
69
+ Lint/NoReturnInBeginEndBlocks: # (new in 1.2)
70
+ Enabled: true
71
+ Lint/NumberedParameterAssignment: # (new in 1.9)
72
+ Enabled: true
73
+ Lint/OrAssignmentToConstant: # (new in 1.9)
74
+ Enabled: true
75
+ Lint/RedundantDirGlobSort: # (new in 1.8)
76
+ Enabled: true
77
+ Lint/SymbolConversion: # (new in 1.9)
78
+ Enabled: true
79
+ Lint/ToEnumArguments: # (new in 1.1)
80
+ Enabled: true
81
+ Lint/TripleQuotes: # (new in 1.9)
82
+ Enabled: true
83
+ Lint/UnexpectedBlockArity: # (new in 1.5)
84
+ Enabled: true
85
+ Lint/UnmodifiedReduceAccumulator: # (new in 1.1)
86
+ Enabled: true
87
+ Style/ArgumentsForwarding: # (new in 1.1)
88
+ Enabled: true
89
+ Style/CollectionCompact: # (new in 1.2)
90
+ Enabled: true
91
+ Style/DocumentDynamicEvalDefinition: # (new in 1.1)
92
+ Enabled: true
93
+ Style/EndlessMethod: # (new in 1.8)
94
+ Enabled: true
95
+ Style/HashConversion: # (new in 1.10)
96
+ Enabled: true
97
+ Style/HashExcept: # (new in 1.7)
98
+ Enabled: true
99
+ Style/IfWithBooleanLiteralBranches: # (new in 1.9)
100
+ Enabled: true
101
+ Style/NegatedIfElseCondition: # (new in 1.2)
102
+ Enabled: true
103
+ Style/NilLambda: # (new in 1.3)
104
+ Enabled: true
105
+ Style/RedundantArgument: # (new in 1.4)
106
+ Enabled: true
107
+ Style/StringChars: # (new in 1.12)
108
+ Enabled: true
109
+ Style/SwapValues: # (new in 1.1)
110
+ Enabled: true
data/.rubocop_todo.yml CHANGED
@@ -1,47 +1,7 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-10-28 09:19:23 -0700 using RuboCop version 0.74.0.
3
+ # on 2021-03-26 19:20:09 UTC using RuboCop version 1.12.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 5
10
- Metrics/AbcSize:
11
- Max: 41
12
-
13
- # Offense count: 4
14
- Metrics/CyclomaticComplexity:
15
- Max: 12
16
-
17
- # Offense count: 1
18
- # Configuration parameters: CountComments.
19
- Metrics/ModuleLength:
20
- Max: 178
21
-
22
- # Offense count: 4
23
- Metrics/PerceivedComplexity:
24
- Max: 12
25
-
26
- # Offense count: 2
27
- Style/Documentation:
28
- Exclude:
29
- - 'spec/**/*'
30
- - 'test/**/*'
31
- - 'lib/parse_date.rb'
32
-
33
- # Offense count: 1
34
- # Cop supports --auto-correct.
35
- # Configuration parameters: EnforcedOctalStyle.
36
- # SupportedOctalStyles: zero_with_o, zero_only
37
- Style/NumericLiteralPrefix:
38
- Exclude:
39
- - 'spec/parse_date/int_from_string_spec.rb'
40
-
41
- # Offense count: 1
42
- # Cop supports --auto-correct.
43
- # Configuration parameters: EnforcedStyle, AllowInnerSlashes.
44
- # SupportedStyles: slashes, percent_r, mixed
45
- Style/RegexpLiteral:
46
- Exclude:
47
- - 'lib/parse_date/int_from_string.rb'
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  [![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/parse_date)
2
- [![Build Status](https://travis-ci.org/sul-dlss/parse_date.svg?branch=master)](https://travis-ci.org/sul-dlss/parse_date)
2
+ [![Build Status](https://travis-ci.com/sul-dlss/parse_date.svg?branch=main)](https://travis-ci.com/sul-dlss/parse_date)
3
3
  [![Maintainability](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/maintainability)](https://codeclimate.com/github/sul-dlss/parse_date/maintainability)
4
4
  [![Test Coverage](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/test_coverage)](https://codeclimate.com/github/sul-dlss/parse_date/test_coverage)
5
5
 
@@ -40,6 +40,14 @@ ParseDate.parse_range('195-') # (1950..1959).to_a
40
40
  ParseDate.parse_range('199u') # (1990..1999).to_a
41
41
  ParseDate.parse_range('197?') # (1970..1979).to_a
42
42
  ParseDate.parse_range('196x') # (1960..1969).to_a
43
+ ParseDate.parse_range('1990s') # (1990..1999).to_a
44
+ ParseDate.parse_range('1990s?') # (1990..1999).to_a
45
+ ParseDate.parse_range('ca. 1930s') # (1930..1939).to_a
46
+ ParseDate.parse_range('1928-1980s') # (1928..1989).to_a
47
+ ParseDate.parse_range('1940s-1990') # (1940..1990).to_a
48
+ ParseDate.parse_range('1980s-1990s') # (1980..1999).to_a
49
+ ParseDate.parse_range('1675-7') # [1675, 1676, 1677]
50
+ ParseDate.parse_range('1040–1 CE') # [1040, 1041]
43
51
  ParseDate.parse_range('18th century CE') # (1700..1799).to_a
44
52
  ParseDate.parse_range('17uu') # (1700..1799).to_a
45
53
  ParseDate.parse_range('between 1694 and 1799') # (1694..1799).to_a
@@ -52,19 +60,26 @@ ParseDate.parse_range('17--?-18--?') # (1700..1899).to_a
52
60
  ParseDate.parse_range('1835 or 1836') # [1835, 1836]
53
61
  ParseDate.parse_range('17-- or 18--?') # (1700..1899).to_a
54
62
  ParseDate.parse_range('-2 or 1?') # (-2..1).to_a
63
+ ParseDate.parse_range('1500? to 1582') # (1500..1582).to_a
55
64
  ParseDate.parse_range('17th or 18th century?') # (1600..1799).to_a
56
65
  ParseDate.parse_range('ca. 5th–6th century A.D.') # (400..599).to_a
57
66
  ParseDate.parse_range('ca. 9th–8th century B.C.') # (-999..-800).to_a
58
67
  ParseDate.parse_range('ca. 13th–12th century B.C.') # (-1399..-1200).to_a
59
68
  ParseDate.parse_range('5th century B.C.') # (-599..-500).to_a
60
69
  ParseDate.parse_range('502-504') # [502, 503, 504]
70
+ ParseDate.parse_range('950-60') # (950..960).to_a
71
+ ParseDate.parse_range('-0150 - -0100') # (-150..-100).to_a
61
72
  ParseDate.parse_range('-2100 - -2000') # (-2100..-2000).to_a
73
+ ParseDate.parse_range('1230—1239 CE') # (1230..1239).to_a (alternate hyphen char)
74
+ ParseDate.parse_range('996–1021 CE') # (996..1021).to_a (diff alternate hyphen char)
62
75
  ParseDate.parse_range('1975 - 1905') # last year > first year, raises error
63
76
  ParseDate.parse_range('-100 - -150') # last year > first year, raises error
64
77
  ParseDate.parse_range('1975 or 1905') # last year > first year, raises error
65
- ParseDate.parse_range('2050') # year later than current year + 1, raises error
66
- ParseDate.parse_range('random text') # can't parse years, raises error
67
- ParseDate.parse_range(nil) # can't parse years, raises error
78
+ ParseDate.parse_range('1975 - 2050') # single invalid year endpoint, raises error
79
+ ParseDate.parse_range('2050') # nil - only invalid year endpoints present
80
+ ParseDate.parse_range('2045 - 2050') # nil - only invalid year endpoints present
81
+ ParseDate.parse_range('random text') # nil - can't parse years
82
+ ParseDate.parse_range(nil) # nil - can't parse years
68
83
 
69
84
  ParseDate.earliest_year('12/25/00') # 2000
70
85
  ParseDate.earliest_year('5-1-21') # 1921
@@ -82,7 +97,10 @@ ParseDate.earliest_year('17uu') # 1700
82
97
  ParseDate.earliest_year('between 1694 and 1799') # 1694
83
98
  ParseDate.earliest_year('between 1 and 5') # 1
84
99
  ParseDate.earliest_year('between 300 and 150 B.C.') # -300
100
+ ParseDate.earliest_year('1500? to 1582') # 1500
85
101
  ParseDate.earliest_year('1496-1499') # 1496
102
+ ParseDate.earliest_year('1230—1239 CE') # 1230 (alternate hyphen char)
103
+ ParseDate.earliest_year('996–1021 CE') # 996 (diff alternate hyphen char)
86
104
  ParseDate.earliest_year('1750?-1867') # 1750
87
105
  ParseDate.earliest_year('17--?-18--?') # 1700
88
106
  ParseDate.earliest_year('1835 or 1836') # 1835
@@ -93,6 +111,8 @@ ParseDate.earliest_year('ca. 9th–8th century B.C.') # -999
93
111
  ParseDate.earliest_year('ca. 13th–12th century B.C.') # -1399
94
112
  ParseDate.earliest_year('5th century B.C.') # -599
95
113
  ParseDate.earliest_year('502-504') # 502
114
+ ParseDate.earliest_year('950-60') # 950
115
+ ParseDate.earliest_year('-0150 - -0100') # -150
96
116
  ParseDate.earliest_year('-2100 - -2000') # -2100
97
117
 
98
118
  ParseDate.latest_year('20000222') # 2000
@@ -100,12 +120,23 @@ ParseDate.latest_year('195-') # 1959
100
120
  ParseDate.latest_year('199u') # 1999
101
121
  ParseDate.latest_year('197?') # 1979
102
122
  ParseDate.latest_year('196x') # 1969
123
+ ParseDate.latest_year('1990s') # 1999
124
+ ParseDate.latest_year('1990s?') # 1999
125
+ ParseDate.latest_year('ca. 1930s') # 1939
126
+ ParseDate.latest_year('1928-1980s') # 1989
127
+ ParseDate.latest_year('1940s-1990') # 1990
128
+ ParseDate.latest_year('1980s-1990s') # 1999
129
+ ParseDate.latest_year('1675-7') # 1677
130
+ ParseDate.latest_year('1040–1 CE') # 1041
103
131
  ParseDate.latest_year('18th century CE') # 1799
104
132
  ParseDate.latest_year('17uu') # 1799
105
133
  ParseDate.latest_year('between 1694 and 1799') # 1799
106
134
  ParseDate.latest_year('between 1 and 5') # 5
107
135
  ParseDate.latest_year('between 300 and 150 B.C.') # -150
136
+ ParseDate.latest_year('1500? to 1582') # 1582
108
137
  ParseDate.latest_year('1496-1499') # 1499
138
+ ParseDate.latest_year('1230—1239 CE') # 1239 (alternate hyphen char)
139
+ ParseDate.latest_year('996–1021 CE') # 1021 (diff alternate hyphen char)
109
140
  ParseDate.latest_year('1750?-1867') # 1867
110
141
  ParseDate.latest_year('17--?-18--?') # 1899
111
142
  ParseDate.latest_year('1757-58') # 1758
@@ -119,6 +150,8 @@ ParseDate.latest_year('ca. 13th–12th century B.C.') # -1200
119
150
  ParseDate.latest_year('5th century B.C.') # -500
120
151
  ParseDate.latest_year('-5 - 3') # 3
121
152
  ParseDate.latest_year('502-504') # 504
153
+ ParseDate.latest_year('950-60') # 960
154
+ ParseDate.latest_year('-0150 - -0100') # -100
122
155
  ParseDate.latest_year('-2100 - -2000') # -2000
123
156
 
124
157
  ParseDate.range_array('1993', '1995') # [1993, 1994, 1995]
data/lib/parse_date.rb CHANGED
@@ -42,6 +42,7 @@ class ParseDate
42
42
  def self.parse_range(date_str)
43
43
  first = earliest_year(date_str)
44
44
  last = latest_year(date_str)
45
+ return nil unless first || last
45
46
  raise ParseDate::Error, "Unable to parse range from '#{date_str}'" unless year_range_valid?(first, last)
46
47
 
47
48
  range_array(first, last)
@@ -64,6 +65,8 @@ class ParseDate
64
65
  # @param [Integer, String] first_year, expecting integer or parseable string for .to_i
65
66
  # @param [Integer, String] last_year, expecting integer or parseable string for .to_i
66
67
  # @return [Array] array of Integer year values from first to last, inclusive
68
+ # rubocop:disable Metrics/CyclomaticComplexity
69
+ # rubocop:disable Metrics/PerceivedComplexity
67
70
  def self.range_array(first_year, last_year)
68
71
  first_year = first_year.to_i if first_year.is_a?(String) && first_year.match?(/^-?\d+$/)
69
72
  last_year = last_year.to_i if last_year.is_a?(String) && last_year.match?(/^-?\d+$/)
@@ -71,9 +74,11 @@ class ParseDate
71
74
  return [] unless last_year || first_year
72
75
  return [first_year] if last_year.nil? && first_year
73
76
  return [last_year] if first_year.nil? && last_year
74
- raise(StandardError, "unable to create year range array from #{first_year}, #{last_year}") unless
77
+ raise(ParseDate::Error, "unable to create year range array from #{first_year}, #{last_year}") unless
75
78
  year_range_valid?(first_year, last_year)
76
79
 
77
80
  Range.new(first_year, last_year).to_a
78
81
  end
82
+ # rubocop:enable Metrics/CyclomaticComplexity
83
+ # rubocop:enable Metrics/PerceivedComplexity
79
84
  end
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'date' # so upstream callers don't have to require it
4
-
5
3
  class ParseDate
6
4
 
7
5
  # Parse (Year) Integers from Date Strings
@@ -20,23 +18,15 @@ class ParseDate
20
18
  return if date_str == '0000-00-00' # shpc collection has these useless dates
21
19
 
22
20
  # B.C. first (match longest string first)
23
- return ParseDate.send(:earliest_century_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
24
- return ParseDate.send(:between_bc_earliest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
25
- return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
21
+ bc_result = ParseDate.send(:earliest_year_bc_parsing, date_str)
22
+ return bc_result if bc_result
26
23
 
27
- result ||= ParseDate.send(:between_earliest_year, date_str)
28
- result ||= ParseDate.send(:negative_first_four_digits, date_str)
29
- result ||= ParseDate.send(:first_four_digits, date_str)
30
- result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
31
- result ||= ParseDate.send(:first_year_for_decade, date_str) # 198x or 201x
32
- result ||= ParseDate.send(:first_year_for_century, date_str) # includes BC
33
- result ||= ParseDate.send(:year_for_early_numeric, date_str)
34
- unless result
35
- # try removing brackets between digits in case we have 169[5] or [18]91
36
- no_brackets = ParseDate.send(:remove_brackets, date_str)
37
- return earliest_year(no_brackets) if no_brackets
38
- end
39
- result.to_i if result && year_int_valid?(result.to_i)
24
+ result = ParseDate.send(:earliest_year_parsing, date_str)
25
+ return result if result
26
+
27
+ # try removing brackets between digits in case we have 169[5] or [18]91
28
+ no_brackets = ParseDate.send(:remove_brackets, date_str)
29
+ earliest_year(no_brackets) if no_brackets
40
30
  end
41
31
 
42
32
  # latest year as Integer if we can parse one from date_str
@@ -52,30 +42,15 @@ class ParseDate
52
42
  return if date_str == '0000-00-00' # shpc collection has these useless dates
53
43
 
54
44
  # B.C. first (match longest string first)
55
- return ParseDate.send(:last_year_mult_centuries_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
56
- return ParseDate.send(:between_bc_latest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
57
- return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
58
- return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
45
+ bc_result = ParseDate.send(:latest_year_bc_parsing, date_str)
46
+ return bc_result if bc_result
59
47
 
60
- result ||= ParseDate.send(:between_latest_year, date_str)
61
- result ||= ParseDate.send(:hyphen_4digit_latest_year, date_str)
62
- result ||= ParseDate.send(:hyphen_2digit_latest_year, date_str)
63
- result ||= ParseDate.send(:yyuu_after_hyphen, date_str)
64
- result ||= ParseDate.send(:year_after_or, date_str)
65
- result ||= ParseDate.send(:negative_4digits_after_hyphen, date_str)
66
- result ||= ParseDate.send(:negative_first_four_digits, date_str)
67
- result ||= ParseDate.send(:first_four_digits, date_str)
68
- result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
69
- result ||= ParseDate.send(:last_year_for_decade, date_str) # 198x or 201x
70
- result ||= ParseDate.send(:last_year_mult_centuries, date_str) # nth-nth century
71
- result ||= ParseDate.send(:last_year_for_century, date_str)
72
- result ||= ParseDate.send(:last_year_for_early_numeric, date_str)
73
- unless result
74
- # try removing brackets between digits in case we have 169[5] or [18]91
75
- no_brackets = ParseDate.send(:remove_brackets, date_str)
76
- return earliest_year(no_brackets) if no_brackets
77
- end
78
- result.to_i if result && year_int_valid?(result.to_i)
48
+ result = ParseDate.send(:latest_year_parsing, date_str)
49
+ return result if result
50
+
51
+ # try removing brackets between digits in case we have 169[5] or [18]91
52
+ no_brackets = ParseDate.send(:remove_brackets, date_str)
53
+ latest_year(no_brackets) if no_brackets
79
54
  end
80
55
 
81
56
  # true if the year is between -9999 and (current year + 1), inclusive
@@ -88,24 +63,95 @@ class ParseDate
88
63
 
89
64
  protected
90
65
 
66
+ def earliest_year_bc_parsing(date_str)
67
+ return ParseDate.send(:earliest_century_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
68
+ return ParseDate.send(:between_bc_earliest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
69
+ return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
70
+ end
71
+
72
+ def earliest_year_parsing(date_str)
73
+ [
74
+ # longest string first, more or less
75
+ :between_earliest_year,
76
+ :hyphen_4digit_earliest_year,
77
+ :negative_first_four_digits,
78
+ :first_four_digits,
79
+ :year_from_mm_dd_yy,
80
+ :first_year_for_decade, # 198x or 201x
81
+ :first_year_for_century, # includes some BC
82
+ :year_for_early_numeric
83
+ ].each do |method_name|
84
+ result = ParseDate.send(method_name, date_str)
85
+ return result.to_i if result && year_int_valid?(result.to_i)
86
+ end
87
+ nil
88
+ end
89
+
90
+ def latest_year_bc_parsing(date_str)
91
+ return ParseDate.send(:last_year_mult_centuries_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
92
+ return ParseDate.send(:between_bc_latest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
93
+ return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
94
+ return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
95
+ end
96
+
97
+ # rubocop:disable Metrics/MethodLength
98
+ def latest_year_parsing(date_str)
99
+ result = nil
100
+ [
101
+ # longest string first, more or less
102
+ :between_latest_year,
103
+ :hyphen_4digit_latest_year,
104
+ :hyphen_2digit_latest_year,
105
+ :hyphen_1digit_latest_year,
106
+ :yyuu_after_hyphen,
107
+ :year_after_or,
108
+ :negative_4digits_after_hyphen,
109
+ :negative_first_four_digits,
110
+ :last_year_for_0s_decade,
111
+ :first_four_digits,
112
+ :year_from_mm_dd_yy,
113
+ :last_year_for_decade, # 198x or 201x
114
+ :last_year_mult_centuries, # nth-nth century
115
+ :last_year_for_century,
116
+ :last_year_for_early_numeric
117
+ ].each do |method|
118
+ result ||= ParseDate.send(method, date_str)
119
+ return result.to_i if result && year_int_valid?(result.to_i)
120
+ end
121
+ nil
122
+ end
123
+ # rubocop:enable Metrics/MethodLength
124
+
91
125
  REGEX_OPTS = Regexp::IGNORECASE | Regexp::MULTILINE
92
126
  BC_REGEX = Regexp.new(/\s*B\.?\s*C\.?/im)
93
- BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
127
+ BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new("\\d[#{Regexp.escape('[]')}]\\d")
94
128
 
95
129
  # removes brackets between digits such as 169[5] or [18]91
96
130
  def remove_brackets(date_str)
97
131
  date_str.delete('[]') if date_str.match(BRACKETS_BETWEEN_DIGITS_REGEX)
98
132
  end
99
133
 
100
- YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*[-—]\s*(?<last>\d{4})\??/m)
134
+ YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{3,4})s?\??\s*(-|—|–|to)\s*(?<last>\d{4}s?)\??/m)
135
+
136
+ # Integer value for latest year if we have "yyyy-yyyy" pattern
137
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
138
+ def hyphen_4digit_earliest_year(date_str)
139
+ Regexp.last_match(:first).to_i if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
140
+ end
101
141
 
102
142
  # Integer value for latest year if we have "yyyy-yyyy" pattern
103
143
  # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
104
144
  def hyphen_4digit_latest_year(date_str)
105
- Regexp.last_match(:last).to_i if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
145
+ latest = Regexp.last_match(:last) if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
146
+ if ParseDate.year_int_valid?(latest.to_i)
147
+ ParseDate.latest_year(latest) # accommodates '1980s - 1990s'
148
+ else
149
+ # return the bad value; parse_range might need to complain about it
150
+ latest
151
+ end
106
152
  end
107
153
 
108
- YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*[-—]\s*(?<last>\d{2})\??([^-0-9].*)?$/)
154
+ YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{3,4})\??\s*(-|—|–|to)\s*(?<last>\d{2})\??([^-0-9].*)?$/)
109
155
 
110
156
  # Integer value for latest year if we have "yyyy-yy" pattern
111
157
  # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
@@ -114,14 +160,28 @@ class ParseDate
114
160
  return unless matches
115
161
 
116
162
  first = Regexp.last_match(:first)
117
- century = first[0, 2]
163
+ century = first[0..-3] # whatever is before the last 2 digits
118
164
  last = "#{century}#{Regexp.last_match(:last)}"
119
165
  last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
120
166
  end
121
167
 
168
+ YYYY_HYPHEN_Y_REGEX = Regexp.new(/(?<first>\d{3,4})\??\s*(-|—|–|to)\s*(?<last>\d{1})\??([^-0-9].*)?$/)
169
+
170
+ # Integer value for latest year if we have "yyyy-y" pattern
171
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
172
+ def hyphen_1digit_latest_year(date_str)
173
+ matches = date_str.match(YYYY_HYPHEN_Y_REGEX)
174
+ return unless matches
175
+
176
+ first = Regexp.last_match(:first)
177
+ decade = first[0..-2] # whatever is before the last digit
178
+ last = "#{decade}#{Regexp.last_match(:last)}"
179
+ last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
180
+ end
181
+
122
182
  YYUU = '\\d{1,2}[u\\-]{2}'
123
183
  YYuu_HYPHEN_YYuu_REGEX =
124
- Regexp.new("(?<first>#{YYUU})\\??\\s*[-—]\\s*(?<last>#{YYUU})\\??([^u\\-]|$)??", REGEX_OPTS)
184
+ Regexp.new("(?<first>#{YYUU})\\??\\s*(-|—|–|to)\\s*(?<last>#{YYUU})\\??([^u\\-]|$)??", REGEX_OPTS)
125
185
 
126
186
  # Integer value for latest year if we have "yyuu-yyuu" pattern
127
187
  # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
@@ -139,7 +199,8 @@ class ParseDate
139
199
  end
140
200
 
141
201
  # NOTE: some actual data seemed to have a diff hyphen char. (slightly longer)
142
- YY_YY_CENTURY_REGEX = Regexp.new(/(?<first>\d{1,2})[a-z]{2}?\s*(-|–|or)\s*(?<last>\d{1,2})[a-z]{2}?\s+centur.*/im)
202
+ YY_YY_CENTURY_REGEX =
203
+ Regexp.new(/(?<first>\d{1,2})[a-z]{2}?\s*(-|–|–|or|to)\s*(?<last>\d{1,2})[a-z]{2}?\s+centur.*/im)
143
204
 
144
205
  # Integer value for latest year if we have nth-nth century pattern
145
206
  # @return [Integer, nil] yy99 if date_str matches pattern; nil otherwise
@@ -176,13 +237,13 @@ class ParseDate
176
237
  # looks for -yyyy at beginning of date_str and returns if found
177
238
  # @return [String, nil] negative 4 digit year (e.g. -1865) if date_str has -yyyy, nil otherwise
178
239
  def negative_first_four_digits(date_str)
179
- Regexp.last_match(1) if date_str.match(/^(\-\d{4})/)
240
+ Regexp.last_match(1) if date_str.match(/^(-\d{4})/)
180
241
  end
181
242
 
182
243
  # looks for -yyyy after hyphen and returns if found
183
244
  # @return [String, nil] negative 4 digit year (e.g. -1865) if date_str has -yyyy - -yyyy, nil otherwise
184
245
  def negative_4digits_after_hyphen(date_str)
185
- Regexp.last_match(1) if date_str.match(/\-\d{4}\s*\-\s*(\-\d{4})/)
246
+ Regexp.last_match(1) if date_str.match(/-\d{4}\s*(?:-|–|–|or|to)\s*(-\d{4})/)
186
247
  end
187
248
 
188
249
  # looks for 4 consecutive digits in date_str and returns first occurrence if found
@@ -198,7 +259,7 @@ class ParseDate
198
259
  # 1/1/27 -> 1927
199
260
  # @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str matches pattern, nil otherwise
200
261
  def year_from_mm_dd_yy(date_str)
201
- slash_matches = date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
262
+ slash_matches = date_str.match(%r{\d{1,2}/\d{1,2}/\d{2}})
202
263
  if slash_matches
203
264
  date_obj = Date.strptime(date_str, '%m/%d/%y')
204
265
  else
@@ -211,6 +272,16 @@ class ParseDate
211
272
  nil # explicitly want nil if date won't parse
212
273
  end
213
274
 
275
+ DECADE_0S_REGEX = Regexp.new('(^|\D)\d{3}0\'?s($|\D)', REGEX_OPTS)
276
+
277
+ # last year of decade (as String) if we have: yyy0s flavor pattern
278
+ # @return [String, nil] 4 digit year (e.g. 1869, 1959) if date_str matches pattern, nil otherwise
279
+ def last_year_for_0s_decade(date_str)
280
+ decade_matches = date_str.match(DECADE_0S_REGEX)
281
+ changed_to_nine = decade_matches.to_s.sub(/0'?s/, '9') if decade_matches
282
+ ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
283
+ end
284
+
214
285
  DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]($|\D)', REGEX_OPTS)
215
286
 
216
287
  # first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
@@ -237,12 +308,14 @@ class ParseDate
237
308
 
238
309
  # first year of century if we have: yyuu, yy--, yy--? or xxth century pattern; handles B.C.
239
310
  # @return [Integer, nil] yy00 if date_str matches pattern, nil otherwise
311
+ # rubocop:disable Metrics/AbcSize
240
312
  def first_year_for_century(date_str)
241
313
  return Regexp.last_match(1).to_i * -100 - 99 if date_str.match(BC_CENTURY_REGEX)
242
314
  return Regexp.last_match(1).to_i * 100 if date_str.match(CENTURY_4CHAR_REGEX)
243
315
  return (Regexp.last_match(:first).to_i - 1) * 100 if date_str.match(YY_YY_CENTURY_REGEX)
244
316
  return (Regexp.last_match(1).to_i - 1) * 100 if date_str.match(CENTURY_WORD_REGEX)
245
317
  end
318
+ # rubocop:enable Metrics/AbcSize
246
319
 
247
320
  # last year of century if we have: yyuu, yy--, yy--? or xxth century pattern
248
321
  # @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
@@ -304,7 +377,7 @@ class ParseDate
304
377
  end
305
378
 
306
379
  FIRST_LAST_EARLY_NUMERIC_REGEX =
307
- Regexp.new(/^(?<first>\-?\d{1,3})\??\s*(-|–|or)\s*(?<last>\-?\d{1,4})\??([^\du\-\[]|$)/im)
380
+ Regexp.new(/^(?<first>-?\d{1,3})\??\s*(-|–|–|or|to)\s*(?<last>-?\d{1,4})\??([^\du\-\[]|$)/im)
308
381
 
309
382
  # Integer value for latest year if we have early numeric year range or single early numeric year
310
383
  # @return [Integer, nil] year if date_str matches pattern; nil otherwise
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ParseDate
4
- VERSION = '0.3.3'
4
+ VERSION = '0.4.3'
5
5
  end
data/parse_date.gemspec CHANGED
@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
32
32
 
33
33
  spec.add_development_dependency 'bundler', '~> 2.0'
34
34
  spec.add_development_dependency 'pry-byebug'
35
- spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'rake', '~> 13.0.3'
36
36
  spec.add_development_dependency 'rspec', '~> 3.0'
37
- spec.add_development_dependency 'rubocop', '~> 0.74.0'
38
- spec.add_development_dependency 'simplecov'
37
+ spec.add_development_dependency 'rubocop', '~> 1.12'
38
+ spec.add_development_dependency 'rubocop-rspec'
39
+ spec.add_development_dependency 'simplecov', '~> 0.17.1' # CodeClimate cannot use SimpleCov >= 0.18.0 for generating test coverage
39
40
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-28 00:00:00.000000000 Z
11
+ date: 2021-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '10.0'
61
+ version: 13.0.3
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '10.0'
68
+ version: 13.0.3
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -86,16 +86,16 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: 0.74.0
89
+ version: '1.12'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: 0.74.0
96
+ version: '1.12'
97
97
  - !ruby/object:Gem::Dependency
98
- name: simplecov
98
+ name: rubocop-rspec
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
101
  - - ">="
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: simplecov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.17.1
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.17.1
111
125
  description: Get normalized date values for searching, faceting and display (e.g.
112
126
  in Solr search engine)
113
127
  email:
@@ -138,7 +152,7 @@ metadata:
138
152
  allowed_push_host: https://rubygems.org/
139
153
  homepage_uri: https://github.com/sul-dlss/parse_date
140
154
  source_code_uri: https://github.com/sul-dlss/parse_date
141
- post_install_message:
155
+ post_install_message:
142
156
  rdoc_options: []
143
157
  require_paths:
144
158
  - lib
@@ -153,8 +167,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
153
167
  - !ruby/object:Gem::Version
154
168
  version: '0'
155
169
  requirements: []
156
- rubygems_version: 3.0.3
157
- signing_key:
170
+ rubygems_version: 3.2.3
171
+ signing_key:
158
172
  specification_version: 4
159
173
  summary: parse date values out of strings and normalize them
160
174
  test_files: []