parse_date 0.3.3 → 0.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/pull_request_template.md +9 -1
- data/.rubocop.yml +79 -6
- data/.rubocop_todo.yml +1 -41
- data/README.md +37 -4
- data/lib/parse_date.rb +6 -1
- data/lib/parse_date/int_from_string.rb +125 -52
- data/lib/parse_date/version.rb +1 -1
- data/parse_date.gemspec +4 -3
- metadata +25 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 60c444ed79ea6842f49bfb44d2adc5bd1a6790d483fe43e0bd4a0880001a6005
|
4
|
+
data.tar.gz: a62f906a4c908c4b774ef9fe1dae2a047deee79ee6569688d1c2db9796bb3b2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a949b307fb3259f1a20fa279aa2af395137b514b35b15dbce8c63701047462b06b5b6cb31723f25dbef2c8f1db249cf8e95914d92c113dd4439ed6737267878f
|
7
|
+
data.tar.gz: e1e3e154a6bc4851a7aafa199da3ee66e0d4b7d5d27537528cee36d2f0b2f80761fbbffffa1d2a544ca4a204f4fd7816f85a6d0b975824fff06cead4d64cf3f2
|
data/.rubocop.yml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
inherit_from: .rubocop_todo.yml
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
Gemspec/RequiredRubyVersion:
|
4
|
+
Enabled: false
|
5
5
|
|
6
6
|
Layout/EmptyLinesAroundClassBody:
|
7
7
|
Enabled: false
|
@@ -9,15 +9,27 @@ Layout/EmptyLinesAroundClassBody:
|
|
9
9
|
Layout/EmptyLinesAroundModuleBody:
|
10
10
|
Enabled: false
|
11
11
|
|
12
|
+
Layout/LineLength:
|
13
|
+
Exclude:
|
14
|
+
- parse_date.gemspec
|
15
|
+
|
16
|
+
Lint/MixedRegexpCaptureTypes:
|
17
|
+
Enabled: false
|
18
|
+
|
12
19
|
Metrics/BlockLength:
|
13
20
|
Exclude:
|
14
21
|
- spec/**/*
|
15
22
|
|
16
|
-
Metrics/LineLength:
|
17
|
-
Max: 120
|
18
|
-
|
19
23
|
Metrics/MethodLength:
|
20
|
-
Max:
|
24
|
+
Max: 15
|
25
|
+
|
26
|
+
Metrics/ModuleLength:
|
27
|
+
Exclude:
|
28
|
+
- lib/parse_date/int_from_string.rb
|
29
|
+
|
30
|
+
Style/Documentation:
|
31
|
+
Exclude:
|
32
|
+
- lib/parse_date.rb
|
21
33
|
|
22
34
|
Style/NumericLiterals:
|
23
35
|
Enabled: false
|
@@ -35,3 +47,64 @@ Style/WordArray:
|
|
35
47
|
|
36
48
|
Style/YodaCondition:
|
37
49
|
Enabled: false
|
50
|
+
|
51
|
+
Gemspec/DateAssignment: # (new in 1.10)
|
52
|
+
Enabled: true
|
53
|
+
Layout/SpaceBeforeBrackets: # (new in 1.7)
|
54
|
+
Enabled: true
|
55
|
+
Lint/AmbiguousAssignment: # (new in 1.7)
|
56
|
+
Enabled: true
|
57
|
+
Lint/DeprecatedConstants: # (new in 1.8)
|
58
|
+
Enabled: true
|
59
|
+
Lint/DuplicateBranch: # (new in 1.3)
|
60
|
+
Enabled: true
|
61
|
+
Lint/DuplicateRegexpCharacterClassElement: # (new in 1.1)
|
62
|
+
Enabled: true
|
63
|
+
Lint/EmptyBlock: # (new in 1.1)
|
64
|
+
Enabled: true
|
65
|
+
Lint/EmptyClass: # (new in 1.3)
|
66
|
+
Enabled: true
|
67
|
+
Lint/LambdaWithoutLiteralBlock: # (new in 1.8)
|
68
|
+
Enabled: true
|
69
|
+
Lint/NoReturnInBeginEndBlocks: # (new in 1.2)
|
70
|
+
Enabled: true
|
71
|
+
Lint/NumberedParameterAssignment: # (new in 1.9)
|
72
|
+
Enabled: true
|
73
|
+
Lint/OrAssignmentToConstant: # (new in 1.9)
|
74
|
+
Enabled: true
|
75
|
+
Lint/RedundantDirGlobSort: # (new in 1.8)
|
76
|
+
Enabled: true
|
77
|
+
Lint/SymbolConversion: # (new in 1.9)
|
78
|
+
Enabled: true
|
79
|
+
Lint/ToEnumArguments: # (new in 1.1)
|
80
|
+
Enabled: true
|
81
|
+
Lint/TripleQuotes: # (new in 1.9)
|
82
|
+
Enabled: true
|
83
|
+
Lint/UnexpectedBlockArity: # (new in 1.5)
|
84
|
+
Enabled: true
|
85
|
+
Lint/UnmodifiedReduceAccumulator: # (new in 1.1)
|
86
|
+
Enabled: true
|
87
|
+
Style/ArgumentsForwarding: # (new in 1.1)
|
88
|
+
Enabled: true
|
89
|
+
Style/CollectionCompact: # (new in 1.2)
|
90
|
+
Enabled: true
|
91
|
+
Style/DocumentDynamicEvalDefinition: # (new in 1.1)
|
92
|
+
Enabled: true
|
93
|
+
Style/EndlessMethod: # (new in 1.8)
|
94
|
+
Enabled: true
|
95
|
+
Style/HashConversion: # (new in 1.10)
|
96
|
+
Enabled: true
|
97
|
+
Style/HashExcept: # (new in 1.7)
|
98
|
+
Enabled: true
|
99
|
+
Style/IfWithBooleanLiteralBranches: # (new in 1.9)
|
100
|
+
Enabled: true
|
101
|
+
Style/NegatedIfElseCondition: # (new in 1.2)
|
102
|
+
Enabled: true
|
103
|
+
Style/NilLambda: # (new in 1.3)
|
104
|
+
Enabled: true
|
105
|
+
Style/RedundantArgument: # (new in 1.4)
|
106
|
+
Enabled: true
|
107
|
+
Style/StringChars: # (new in 1.12)
|
108
|
+
Enabled: true
|
109
|
+
Style/SwapValues: # (new in 1.1)
|
110
|
+
Enabled: true
|
data/.rubocop_todo.yml
CHANGED
@@ -1,47 +1,7 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on
|
3
|
+
# on 2021-03-26 19:20:09 UTC using RuboCop version 1.12.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
|
-
|
9
|
-
# Offense count: 5
|
10
|
-
Metrics/AbcSize:
|
11
|
-
Max: 41
|
12
|
-
|
13
|
-
# Offense count: 4
|
14
|
-
Metrics/CyclomaticComplexity:
|
15
|
-
Max: 12
|
16
|
-
|
17
|
-
# Offense count: 1
|
18
|
-
# Configuration parameters: CountComments.
|
19
|
-
Metrics/ModuleLength:
|
20
|
-
Max: 178
|
21
|
-
|
22
|
-
# Offense count: 4
|
23
|
-
Metrics/PerceivedComplexity:
|
24
|
-
Max: 12
|
25
|
-
|
26
|
-
# Offense count: 2
|
27
|
-
Style/Documentation:
|
28
|
-
Exclude:
|
29
|
-
- 'spec/**/*'
|
30
|
-
- 'test/**/*'
|
31
|
-
- 'lib/parse_date.rb'
|
32
|
-
|
33
|
-
# Offense count: 1
|
34
|
-
# Cop supports --auto-correct.
|
35
|
-
# Configuration parameters: EnforcedOctalStyle.
|
36
|
-
# SupportedOctalStyles: zero_with_o, zero_only
|
37
|
-
Style/NumericLiteralPrefix:
|
38
|
-
Exclude:
|
39
|
-
- 'spec/parse_date/int_from_string_spec.rb'
|
40
|
-
|
41
|
-
# Offense count: 1
|
42
|
-
# Cop supports --auto-correct.
|
43
|
-
# Configuration parameters: EnforcedStyle, AllowInnerSlashes.
|
44
|
-
# SupportedStyles: slashes, percent_r, mixed
|
45
|
-
Style/RegexpLiteral:
|
46
|
-
Exclude:
|
47
|
-
- 'lib/parse_date/int_from_string.rb'
|
data/README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
[![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/parse_date)
|
2
|
-
[![Build Status](https://travis-ci.
|
2
|
+
[![Build Status](https://travis-ci.com/sul-dlss/parse_date.svg?branch=main)](https://travis-ci.com/sul-dlss/parse_date)
|
3
3
|
[![Maintainability](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/maintainability)](https://codeclimate.com/github/sul-dlss/parse_date/maintainability)
|
4
4
|
[![Test Coverage](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/test_coverage)](https://codeclimate.com/github/sul-dlss/parse_date/test_coverage)
|
5
5
|
|
@@ -40,6 +40,14 @@ ParseDate.parse_range('195-') # (1950..1959).to_a
|
|
40
40
|
ParseDate.parse_range('199u') # (1990..1999).to_a
|
41
41
|
ParseDate.parse_range('197?') # (1970..1979).to_a
|
42
42
|
ParseDate.parse_range('196x') # (1960..1969).to_a
|
43
|
+
ParseDate.parse_range('1990s') # (1990..1999).to_a
|
44
|
+
ParseDate.parse_range('1990s?') # (1990..1999).to_a
|
45
|
+
ParseDate.parse_range('ca. 1930s') # (1930..1939).to_a
|
46
|
+
ParseDate.parse_range('1928-1980s') # (1928..1989).to_a
|
47
|
+
ParseDate.parse_range('1940s-1990') # (1940..1990).to_a
|
48
|
+
ParseDate.parse_range('1980s-1990s') # (1980..1999).to_a
|
49
|
+
ParseDate.parse_range('1675-7') # [1675, 1676, 1677]
|
50
|
+
ParseDate.parse_range('1040–1 CE') # [1040, 1041]
|
43
51
|
ParseDate.parse_range('18th century CE') # (1700..1799).to_a
|
44
52
|
ParseDate.parse_range('17uu') # (1700..1799).to_a
|
45
53
|
ParseDate.parse_range('between 1694 and 1799') # (1694..1799).to_a
|
@@ -52,19 +60,26 @@ ParseDate.parse_range('17--?-18--?') # (1700..1899).to_a
|
|
52
60
|
ParseDate.parse_range('1835 or 1836') # [1835, 1836]
|
53
61
|
ParseDate.parse_range('17-- or 18--?') # (1700..1899).to_a
|
54
62
|
ParseDate.parse_range('-2 or 1?') # (-2..1).to_a
|
63
|
+
ParseDate.parse_range('1500? to 1582') # (1500..1582).to_a
|
55
64
|
ParseDate.parse_range('17th or 18th century?') # (1600..1799).to_a
|
56
65
|
ParseDate.parse_range('ca. 5th–6th century A.D.') # (400..599).to_a
|
57
66
|
ParseDate.parse_range('ca. 9th–8th century B.C.') # (-999..-800).to_a
|
58
67
|
ParseDate.parse_range('ca. 13th–12th century B.C.') # (-1399..-1200).to_a
|
59
68
|
ParseDate.parse_range('5th century B.C.') # (-599..-500).to_a
|
60
69
|
ParseDate.parse_range('502-504') # [502, 503, 504]
|
70
|
+
ParseDate.parse_range('950-60') # (950..960).to_a
|
71
|
+
ParseDate.parse_range('-0150 - -0100') # (-150..-100).to_a
|
61
72
|
ParseDate.parse_range('-2100 - -2000') # (-2100..-2000).to_a
|
73
|
+
ParseDate.parse_range('1230—1239 CE') # (1230..1239).to_a (alternate hyphen char)
|
74
|
+
ParseDate.parse_range('996–1021 CE') # (996..1021).to_a (diff alternate hyphen char)
|
62
75
|
ParseDate.parse_range('1975 - 1905') # last year > first year, raises error
|
63
76
|
ParseDate.parse_range('-100 - -150') # last year > first year, raises error
|
64
77
|
ParseDate.parse_range('1975 or 1905') # last year > first year, raises error
|
65
|
-
ParseDate.parse_range('2050')
|
66
|
-
ParseDate.parse_range('
|
67
|
-
ParseDate.parse_range(
|
78
|
+
ParseDate.parse_range('1975 - 2050') # single invalid year endpoint, raises error
|
79
|
+
ParseDate.parse_range('2050') # nil - only invalid year endpoints present
|
80
|
+
ParseDate.parse_range('2045 - 2050') # nil - only invalid year endpoints present
|
81
|
+
ParseDate.parse_range('random text') # nil - can't parse years
|
82
|
+
ParseDate.parse_range(nil) # nil - can't parse years
|
68
83
|
|
69
84
|
ParseDate.earliest_year('12/25/00') # 2000
|
70
85
|
ParseDate.earliest_year('5-1-21') # 1921
|
@@ -82,7 +97,10 @@ ParseDate.earliest_year('17uu') # 1700
|
|
82
97
|
ParseDate.earliest_year('between 1694 and 1799') # 1694
|
83
98
|
ParseDate.earliest_year('between 1 and 5') # 1
|
84
99
|
ParseDate.earliest_year('between 300 and 150 B.C.') # -300
|
100
|
+
ParseDate.earliest_year('1500? to 1582') # 1500
|
85
101
|
ParseDate.earliest_year('1496-1499') # 1496
|
102
|
+
ParseDate.earliest_year('1230—1239 CE') # 1230 (alternate hyphen char)
|
103
|
+
ParseDate.earliest_year('996–1021 CE') # 996 (diff alternate hyphen char)
|
86
104
|
ParseDate.earliest_year('1750?-1867') # 1750
|
87
105
|
ParseDate.earliest_year('17--?-18--?') # 1700
|
88
106
|
ParseDate.earliest_year('1835 or 1836') # 1835
|
@@ -93,6 +111,8 @@ ParseDate.earliest_year('ca. 9th–8th century B.C.') # -999
|
|
93
111
|
ParseDate.earliest_year('ca. 13th–12th century B.C.') # -1399
|
94
112
|
ParseDate.earliest_year('5th century B.C.') # -599
|
95
113
|
ParseDate.earliest_year('502-504') # 502
|
114
|
+
ParseDate.earliest_year('950-60') # 950
|
115
|
+
ParseDate.earliest_year('-0150 - -0100') # -150
|
96
116
|
ParseDate.earliest_year('-2100 - -2000') # -2100
|
97
117
|
|
98
118
|
ParseDate.latest_year('20000222') # 2000
|
@@ -100,12 +120,23 @@ ParseDate.latest_year('195-') # 1959
|
|
100
120
|
ParseDate.latest_year('199u') # 1999
|
101
121
|
ParseDate.latest_year('197?') # 1979
|
102
122
|
ParseDate.latest_year('196x') # 1969
|
123
|
+
ParseDate.latest_year('1990s') # 1999
|
124
|
+
ParseDate.latest_year('1990s?') # 1999
|
125
|
+
ParseDate.latest_year('ca. 1930s') # 1939
|
126
|
+
ParseDate.latest_year('1928-1980s') # 1989
|
127
|
+
ParseDate.latest_year('1940s-1990') # 1990
|
128
|
+
ParseDate.latest_year('1980s-1990s') # 1999
|
129
|
+
ParseDate.latest_year('1675-7') # 1677
|
130
|
+
ParseDate.latest_year('1040–1 CE') # 1041
|
103
131
|
ParseDate.latest_year('18th century CE') # 1799
|
104
132
|
ParseDate.latest_year('17uu') # 1799
|
105
133
|
ParseDate.latest_year('between 1694 and 1799') # 1799
|
106
134
|
ParseDate.latest_year('between 1 and 5') # 5
|
107
135
|
ParseDate.latest_year('between 300 and 150 B.C.') # -150
|
136
|
+
ParseDate.latest_year('1500? to 1582') # 1582
|
108
137
|
ParseDate.latest_year('1496-1499') # 1499
|
138
|
+
ParseDate.latest_year('1230—1239 CE') # 1239 (alternate hyphen char)
|
139
|
+
ParseDate.latest_year('996–1021 CE') # 1021 (diff alternate hyphen char)
|
109
140
|
ParseDate.latest_year('1750?-1867') # 1867
|
110
141
|
ParseDate.latest_year('17--?-18--?') # 1899
|
111
142
|
ParseDate.latest_year('1757-58') # 1758
|
@@ -119,6 +150,8 @@ ParseDate.latest_year('ca. 13th–12th century B.C.') # -1200
|
|
119
150
|
ParseDate.latest_year('5th century B.C.') # -500
|
120
151
|
ParseDate.latest_year('-5 - 3') # 3
|
121
152
|
ParseDate.latest_year('502-504') # 504
|
153
|
+
ParseDate.latest_year('950-60') # 960
|
154
|
+
ParseDate.latest_year('-0150 - -0100') # -100
|
122
155
|
ParseDate.latest_year('-2100 - -2000') # -2000
|
123
156
|
|
124
157
|
ParseDate.range_array('1993', '1995') # [1993, 1994, 1995]
|
data/lib/parse_date.rb
CHANGED
@@ -42,6 +42,7 @@ class ParseDate
|
|
42
42
|
def self.parse_range(date_str)
|
43
43
|
first = earliest_year(date_str)
|
44
44
|
last = latest_year(date_str)
|
45
|
+
return nil unless first || last
|
45
46
|
raise ParseDate::Error, "Unable to parse range from '#{date_str}'" unless year_range_valid?(first, last)
|
46
47
|
|
47
48
|
range_array(first, last)
|
@@ -64,6 +65,8 @@ class ParseDate
|
|
64
65
|
# @param [Integer, String] first_year, expecting integer or parseable string for .to_i
|
65
66
|
# @param [Integer, String] last_year, expecting integer or parseable string for .to_i
|
66
67
|
# @return [Array] array of Integer year values from first to last, inclusive
|
68
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
69
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
67
70
|
def self.range_array(first_year, last_year)
|
68
71
|
first_year = first_year.to_i if first_year.is_a?(String) && first_year.match?(/^-?\d+$/)
|
69
72
|
last_year = last_year.to_i if last_year.is_a?(String) && last_year.match?(/^-?\d+$/)
|
@@ -71,9 +74,11 @@ class ParseDate
|
|
71
74
|
return [] unless last_year || first_year
|
72
75
|
return [first_year] if last_year.nil? && first_year
|
73
76
|
return [last_year] if first_year.nil? && last_year
|
74
|
-
raise(
|
77
|
+
raise(ParseDate::Error, "unable to create year range array from #{first_year}, #{last_year}") unless
|
75
78
|
year_range_valid?(first_year, last_year)
|
76
79
|
|
77
80
|
Range.new(first_year, last_year).to_a
|
78
81
|
end
|
82
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
83
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
79
84
|
end
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'date' # so upstream callers don't have to require it
|
4
|
-
|
5
3
|
class ParseDate
|
6
4
|
|
7
5
|
# Parse (Year) Integers from Date Strings
|
@@ -20,23 +18,15 @@ class ParseDate
|
|
20
18
|
return if date_str == '0000-00-00' # shpc collection has these useless dates
|
21
19
|
|
22
20
|
# B.C. first (match longest string first)
|
23
|
-
|
24
|
-
return
|
25
|
-
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
|
21
|
+
bc_result = ParseDate.send(:earliest_year_bc_parsing, date_str)
|
22
|
+
return bc_result if bc_result
|
26
23
|
|
27
|
-
result
|
28
|
-
result
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
result ||= ParseDate.send(:year_for_early_numeric, date_str)
|
34
|
-
unless result
|
35
|
-
# try removing brackets between digits in case we have 169[5] or [18]91
|
36
|
-
no_brackets = ParseDate.send(:remove_brackets, date_str)
|
37
|
-
return earliest_year(no_brackets) if no_brackets
|
38
|
-
end
|
39
|
-
result.to_i if result && year_int_valid?(result.to_i)
|
24
|
+
result = ParseDate.send(:earliest_year_parsing, date_str)
|
25
|
+
return result if result
|
26
|
+
|
27
|
+
# try removing brackets between digits in case we have 169[5] or [18]91
|
28
|
+
no_brackets = ParseDate.send(:remove_brackets, date_str)
|
29
|
+
earliest_year(no_brackets) if no_brackets
|
40
30
|
end
|
41
31
|
|
42
32
|
# latest year as Integer if we can parse one from date_str
|
@@ -52,30 +42,15 @@ class ParseDate
|
|
52
42
|
return if date_str == '0000-00-00' # shpc collection has these useless dates
|
53
43
|
|
54
44
|
# B.C. first (match longest string first)
|
55
|
-
|
56
|
-
return
|
57
|
-
return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
|
58
|
-
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
|
45
|
+
bc_result = ParseDate.send(:latest_year_bc_parsing, date_str)
|
46
|
+
return bc_result if bc_result
|
59
47
|
|
60
|
-
result
|
61
|
-
result
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
result ||= ParseDate.send(:negative_first_four_digits, date_str)
|
67
|
-
result ||= ParseDate.send(:first_four_digits, date_str)
|
68
|
-
result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
|
69
|
-
result ||= ParseDate.send(:last_year_for_decade, date_str) # 198x or 201x
|
70
|
-
result ||= ParseDate.send(:last_year_mult_centuries, date_str) # nth-nth century
|
71
|
-
result ||= ParseDate.send(:last_year_for_century, date_str)
|
72
|
-
result ||= ParseDate.send(:last_year_for_early_numeric, date_str)
|
73
|
-
unless result
|
74
|
-
# try removing brackets between digits in case we have 169[5] or [18]91
|
75
|
-
no_brackets = ParseDate.send(:remove_brackets, date_str)
|
76
|
-
return earliest_year(no_brackets) if no_brackets
|
77
|
-
end
|
78
|
-
result.to_i if result && year_int_valid?(result.to_i)
|
48
|
+
result = ParseDate.send(:latest_year_parsing, date_str)
|
49
|
+
return result if result
|
50
|
+
|
51
|
+
# try removing brackets between digits in case we have 169[5] or [18]91
|
52
|
+
no_brackets = ParseDate.send(:remove_brackets, date_str)
|
53
|
+
latest_year(no_brackets) if no_brackets
|
79
54
|
end
|
80
55
|
|
81
56
|
# true if the year is between -9999 and (current year + 1), inclusive
|
@@ -88,24 +63,95 @@ class ParseDate
|
|
88
63
|
|
89
64
|
protected
|
90
65
|
|
66
|
+
def earliest_year_bc_parsing(date_str)
|
67
|
+
return ParseDate.send(:earliest_century_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
|
68
|
+
return ParseDate.send(:between_bc_earliest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
|
69
|
+
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
|
70
|
+
end
|
71
|
+
|
72
|
+
def earliest_year_parsing(date_str)
|
73
|
+
[
|
74
|
+
# longest string first, more or less
|
75
|
+
:between_earliest_year,
|
76
|
+
:hyphen_4digit_earliest_year,
|
77
|
+
:negative_first_four_digits,
|
78
|
+
:first_four_digits,
|
79
|
+
:year_from_mm_dd_yy,
|
80
|
+
:first_year_for_decade, # 198x or 201x
|
81
|
+
:first_year_for_century, # includes some BC
|
82
|
+
:year_for_early_numeric
|
83
|
+
].each do |method_name|
|
84
|
+
result = ParseDate.send(method_name, date_str)
|
85
|
+
return result.to_i if result && year_int_valid?(result.to_i)
|
86
|
+
end
|
87
|
+
nil
|
88
|
+
end
|
89
|
+
|
90
|
+
def latest_year_bc_parsing(date_str)
|
91
|
+
return ParseDate.send(:last_year_mult_centuries_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
|
92
|
+
return ParseDate.send(:between_bc_latest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
|
93
|
+
return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
|
94
|
+
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
|
95
|
+
end
|
96
|
+
|
97
|
+
# rubocop:disable Metrics/MethodLength
|
98
|
+
def latest_year_parsing(date_str)
|
99
|
+
result = nil
|
100
|
+
[
|
101
|
+
# longest string first, more or less
|
102
|
+
:between_latest_year,
|
103
|
+
:hyphen_4digit_latest_year,
|
104
|
+
:hyphen_2digit_latest_year,
|
105
|
+
:hyphen_1digit_latest_year,
|
106
|
+
:yyuu_after_hyphen,
|
107
|
+
:year_after_or,
|
108
|
+
:negative_4digits_after_hyphen,
|
109
|
+
:negative_first_four_digits,
|
110
|
+
:last_year_for_0s_decade,
|
111
|
+
:first_four_digits,
|
112
|
+
:year_from_mm_dd_yy,
|
113
|
+
:last_year_for_decade, # 198x or 201x
|
114
|
+
:last_year_mult_centuries, # nth-nth century
|
115
|
+
:last_year_for_century,
|
116
|
+
:last_year_for_early_numeric
|
117
|
+
].each do |method|
|
118
|
+
result ||= ParseDate.send(method, date_str)
|
119
|
+
return result.to_i if result && year_int_valid?(result.to_i)
|
120
|
+
end
|
121
|
+
nil
|
122
|
+
end
|
123
|
+
# rubocop:enable Metrics/MethodLength
|
124
|
+
|
91
125
|
REGEX_OPTS = Regexp::IGNORECASE | Regexp::MULTILINE
|
92
126
|
BC_REGEX = Regexp.new(/\s*B\.?\s*C\.?/im)
|
93
|
-
BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new(
|
127
|
+
BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new("\\d[#{Regexp.escape('[]')}]\\d")
|
94
128
|
|
95
129
|
# removes brackets between digits such as 169[5] or [18]91
|
96
130
|
def remove_brackets(date_str)
|
97
131
|
date_str.delete('[]') if date_str.match(BRACKETS_BETWEEN_DIGITS_REGEX)
|
98
132
|
end
|
99
133
|
|
100
|
-
YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{4})
|
134
|
+
YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{3,4})s?\??\s*(-|—|–|to)\s*(?<last>\d{4}s?)\??/m)
|
135
|
+
|
136
|
+
# Integer value for latest year if we have "yyyy-yyyy" pattern
|
137
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
138
|
+
def hyphen_4digit_earliest_year(date_str)
|
139
|
+
Regexp.last_match(:first).to_i if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
|
140
|
+
end
|
101
141
|
|
102
142
|
# Integer value for latest year if we have "yyyy-yyyy" pattern
|
103
143
|
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
104
144
|
def hyphen_4digit_latest_year(date_str)
|
105
|
-
Regexp.last_match(:last)
|
145
|
+
latest = Regexp.last_match(:last) if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
|
146
|
+
if ParseDate.year_int_valid?(latest.to_i)
|
147
|
+
ParseDate.latest_year(latest) # accommodates '1980s - 1990s'
|
148
|
+
else
|
149
|
+
# return the bad value; parse_range might need to complain about it
|
150
|
+
latest
|
151
|
+
end
|
106
152
|
end
|
107
153
|
|
108
|
-
YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*
|
154
|
+
YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{3,4})\??\s*(-|—|–|to)\s*(?<last>\d{2})\??([^-0-9].*)?$/)
|
109
155
|
|
110
156
|
# Integer value for latest year if we have "yyyy-yy" pattern
|
111
157
|
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
@@ -114,14 +160,28 @@ class ParseDate
|
|
114
160
|
return unless matches
|
115
161
|
|
116
162
|
first = Regexp.last_match(:first)
|
117
|
-
century = first[0
|
163
|
+
century = first[0..-3] # whatever is before the last 2 digits
|
118
164
|
last = "#{century}#{Regexp.last_match(:last)}"
|
119
165
|
last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
|
120
166
|
end
|
121
167
|
|
168
|
+
YYYY_HYPHEN_Y_REGEX = Regexp.new(/(?<first>\d{3,4})\??\s*(-|—|–|to)\s*(?<last>\d{1})\??([^-0-9].*)?$/)
|
169
|
+
|
170
|
+
# Integer value for latest year if we have "yyyy-y" pattern
|
171
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
172
|
+
def hyphen_1digit_latest_year(date_str)
|
173
|
+
matches = date_str.match(YYYY_HYPHEN_Y_REGEX)
|
174
|
+
return unless matches
|
175
|
+
|
176
|
+
first = Regexp.last_match(:first)
|
177
|
+
decade = first[0..-2] # whatever is before the last digit
|
178
|
+
last = "#{decade}#{Regexp.last_match(:last)}"
|
179
|
+
last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
|
180
|
+
end
|
181
|
+
|
122
182
|
YYUU = '\\d{1,2}[u\\-]{2}'
|
123
183
|
YYuu_HYPHEN_YYuu_REGEX =
|
124
|
-
Regexp.new("(?<first>#{YYUU})\\??\\s*
|
184
|
+
Regexp.new("(?<first>#{YYUU})\\??\\s*(-|—|–|to)\\s*(?<last>#{YYUU})\\??([^u\\-]|$)??", REGEX_OPTS)
|
125
185
|
|
126
186
|
# Integer value for latest year if we have "yyuu-yyuu" pattern
|
127
187
|
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
@@ -139,7 +199,8 @@ class ParseDate
|
|
139
199
|
end
|
140
200
|
|
141
201
|
# NOTE: some actual data seemed to have a diff hyphen char. (slightly longer)
|
142
|
-
YY_YY_CENTURY_REGEX =
|
202
|
+
YY_YY_CENTURY_REGEX =
|
203
|
+
Regexp.new(/(?<first>\d{1,2})[a-z]{2}?\s*(-|–|–|or|to)\s*(?<last>\d{1,2})[a-z]{2}?\s+centur.*/im)
|
143
204
|
|
144
205
|
# Integer value for latest year if we have nth-nth century pattern
|
145
206
|
# @return [Integer, nil] yy99 if date_str matches pattern; nil otherwise
|
@@ -176,13 +237,13 @@ class ParseDate
|
|
176
237
|
# looks for -yyyy at beginning of date_str and returns if found
|
177
238
|
# @return [String, nil] negative 4 digit year (e.g. -1865) if date_str has -yyyy, nil otherwise
|
178
239
|
def negative_first_four_digits(date_str)
|
179
|
-
Regexp.last_match(1) if date_str.match(/^(
|
240
|
+
Regexp.last_match(1) if date_str.match(/^(-\d{4})/)
|
180
241
|
end
|
181
242
|
|
182
243
|
# looks for -yyyy after hyphen and returns if found
|
183
244
|
# @return [String, nil] negative 4 digit year (e.g. -1865) if date_str has -yyyy - -yyyy, nil otherwise
|
184
245
|
def negative_4digits_after_hyphen(date_str)
|
185
|
-
Regexp.last_match(1) if date_str.match(
|
246
|
+
Regexp.last_match(1) if date_str.match(/-\d{4}\s*(?:-|–|–|or|to)\s*(-\d{4})/)
|
186
247
|
end
|
187
248
|
|
188
249
|
# looks for 4 consecutive digits in date_str and returns first occurrence if found
|
@@ -198,7 +259,7 @@ class ParseDate
|
|
198
259
|
# 1/1/27 -> 1927
|
199
260
|
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str matches pattern, nil otherwise
|
200
261
|
def year_from_mm_dd_yy(date_str)
|
201
|
-
slash_matches = date_str.match(
|
262
|
+
slash_matches = date_str.match(%r{\d{1,2}/\d{1,2}/\d{2}})
|
202
263
|
if slash_matches
|
203
264
|
date_obj = Date.strptime(date_str, '%m/%d/%y')
|
204
265
|
else
|
@@ -211,6 +272,16 @@ class ParseDate
|
|
211
272
|
nil # explicitly want nil if date won't parse
|
212
273
|
end
|
213
274
|
|
275
|
+
DECADE_0S_REGEX = Regexp.new('(^|\D)\d{3}0\'?s($|\D)', REGEX_OPTS)
|
276
|
+
|
277
|
+
# last year of decade (as String) if we have: yyy0s flavor pattern
|
278
|
+
# @return [String, nil] 4 digit year (e.g. 1869, 1959) if date_str matches pattern, nil otherwise
|
279
|
+
def last_year_for_0s_decade(date_str)
|
280
|
+
decade_matches = date_str.match(DECADE_0S_REGEX)
|
281
|
+
changed_to_nine = decade_matches.to_s.sub(/0'?s/, '9') if decade_matches
|
282
|
+
ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
|
283
|
+
end
|
284
|
+
|
214
285
|
DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]($|\D)', REGEX_OPTS)
|
215
286
|
|
216
287
|
# first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
@@ -237,12 +308,14 @@ class ParseDate
|
|
237
308
|
|
238
309
|
# first year of century if we have: yyuu, yy--, yy--? or xxth century pattern; handles B.C.
|
239
310
|
# @return [Integer, nil] yy00 if date_str matches pattern, nil otherwise
|
311
|
+
# rubocop:disable Metrics/AbcSize
|
240
312
|
def first_year_for_century(date_str)
|
241
313
|
return Regexp.last_match(1).to_i * -100 - 99 if date_str.match(BC_CENTURY_REGEX)
|
242
314
|
return Regexp.last_match(1).to_i * 100 if date_str.match(CENTURY_4CHAR_REGEX)
|
243
315
|
return (Regexp.last_match(:first).to_i - 1) * 100 if date_str.match(YY_YY_CENTURY_REGEX)
|
244
316
|
return (Regexp.last_match(1).to_i - 1) * 100 if date_str.match(CENTURY_WORD_REGEX)
|
245
317
|
end
|
318
|
+
# rubocop:enable Metrics/AbcSize
|
246
319
|
|
247
320
|
# last year of century if we have: yyuu, yy--, yy--? or xxth century pattern
|
248
321
|
# @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
@@ -304,7 +377,7 @@ class ParseDate
|
|
304
377
|
end
|
305
378
|
|
306
379
|
FIRST_LAST_EARLY_NUMERIC_REGEX =
|
307
|
-
Regexp.new(/^(?<first
|
380
|
+
Regexp.new(/^(?<first>-?\d{1,3})\??\s*(-|–|–|or|to)\s*(?<last>-?\d{1,4})\??([^\du\-\[]|$)/im)
|
308
381
|
|
309
382
|
# Integer value for latest year if we have early numeric year range or single early numeric year
|
310
383
|
# @return [Integer, nil] year if date_str matches pattern; nil otherwise
|
data/lib/parse_date/version.rb
CHANGED
data/parse_date.gemspec
CHANGED
@@ -32,8 +32,9 @@ Gem::Specification.new do |spec|
|
|
32
32
|
|
33
33
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
34
34
|
spec.add_development_dependency 'pry-byebug'
|
35
|
-
spec.add_development_dependency 'rake', '~>
|
35
|
+
spec.add_development_dependency 'rake', '~> 13.0.3'
|
36
36
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
37
|
-
spec.add_development_dependency 'rubocop', '~>
|
38
|
-
spec.add_development_dependency '
|
37
|
+
spec.add_development_dependency 'rubocop', '~> 1.12'
|
38
|
+
spec.add_development_dependency 'rubocop-rspec'
|
39
|
+
spec.add_development_dependency 'simplecov', '~> 0.17.1' # CodeClimate cannot use SimpleCov >= 0.18.0 for generating test coverage
|
39
40
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zeitwerk
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 13.0.3
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 13.0.3
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,16 +86,16 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
89
|
+
version: '1.12'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
96
|
+
version: '1.12'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: rubocop-rspec
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: simplecov
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.17.1
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.17.1
|
111
125
|
description: Get normalized date values for searching, faceting and display (e.g.
|
112
126
|
in Solr search engine)
|
113
127
|
email:
|
@@ -138,7 +152,7 @@ metadata:
|
|
138
152
|
allowed_push_host: https://rubygems.org/
|
139
153
|
homepage_uri: https://github.com/sul-dlss/parse_date
|
140
154
|
source_code_uri: https://github.com/sul-dlss/parse_date
|
141
|
-
post_install_message:
|
155
|
+
post_install_message:
|
142
156
|
rdoc_options: []
|
143
157
|
require_paths:
|
144
158
|
- lib
|
@@ -153,8 +167,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
153
167
|
- !ruby/object:Gem::Version
|
154
168
|
version: '0'
|
155
169
|
requirements: []
|
156
|
-
rubygems_version: 3.
|
157
|
-
signing_key:
|
170
|
+
rubygems_version: 3.2.3
|
171
|
+
signing_key:
|
158
172
|
specification_version: 4
|
159
173
|
summary: parse date values out of strings and normalize them
|
160
174
|
test_files: []
|