parse_date 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -1
- data/.rubocop_todo.yml +15 -23
- data/README.md +121 -34
- data/lib/parse_date.rb +45 -6
- data/lib/parse_date/int_from_string.rb +209 -82
- data/lib/parse_date/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3927636e715e52bf2f38a8c0f3f384b06dc29971000c4b700a5e9f8a128c144b
|
4
|
+
data.tar.gz: 1f6b9fcb0f3bbf37f5d8417ee21c92ee19c87604394453dbe60a4f16b722e59c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8d350764a5d813e5daa7e5abc0b17d9167b81b249322b52f181e7ea5ccba58f468883f30f6bdbd91e3a43f4da03270cd62581ea2e23ae5ac2a45261ecdf5e24
|
7
|
+
data.tar.gz: 6e421ee0b10f44f2cee021dc198cddb74a8640639b7c66ccb74f515ae891c4be6cadd47d6b5b12af1f4a9d6f3698f1fc3fe224c6be5aed03ce98eb5376c15d8f
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,34 +1,33 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2019-10-
|
3
|
+
# on 2019-10-21 14:32:14 -0700 using RuboCop version 0.74.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count:
|
10
|
-
# Cop supports --auto-correct.
|
11
|
-
Lint/StringConversionInInterpolation:
|
12
|
-
Exclude:
|
13
|
-
- 'lib/parse_date/int_from_string.rb'
|
14
|
-
|
15
|
-
# Offense count: 3
|
9
|
+
# Offense count: 5
|
16
10
|
Metrics/AbcSize:
|
17
|
-
Max:
|
11
|
+
Max: 37
|
18
12
|
|
19
|
-
# Offense count:
|
13
|
+
# Offense count: 8
|
20
14
|
# Configuration parameters: CountComments, ExcludedMethods.
|
21
15
|
# ExcludedMethods: refine
|
22
16
|
Metrics/BlockLength:
|
23
|
-
Max:
|
17
|
+
Max: 812
|
24
18
|
|
25
|
-
# Offense count:
|
19
|
+
# Offense count: 4
|
26
20
|
Metrics/CyclomaticComplexity:
|
27
|
-
Max:
|
21
|
+
Max: 12
|
28
22
|
|
29
23
|
# Offense count: 1
|
24
|
+
# Configuration parameters: CountComments.
|
25
|
+
Metrics/ModuleLength:
|
26
|
+
Max: 168
|
27
|
+
|
28
|
+
# Offense count: 4
|
30
29
|
Metrics/PerceivedComplexity:
|
31
|
-
Max:
|
30
|
+
Max: 12
|
32
31
|
|
33
32
|
# Offense count: 2
|
34
33
|
Style/Documentation:
|
@@ -53,18 +52,11 @@ Style/RegexpLiteral:
|
|
53
52
|
Exclude:
|
54
53
|
- 'lib/parse_date/int_from_string.rb'
|
55
54
|
|
56
|
-
# Offense count:
|
55
|
+
# Offense count: 3
|
57
56
|
# Cop supports --auto-correct.
|
58
57
|
# Configuration parameters: EnforcedStyleForMultiline.
|
59
58
|
# SupportedStylesForMultiline: comma, consistent_comma, no_comma
|
60
59
|
Style/TrailingCommaInArrayLiteral:
|
61
60
|
Exclude:
|
62
61
|
- 'spec/parse_date/int_from_string_spec.rb'
|
63
|
-
|
64
|
-
# Offense count: 1
|
65
|
-
# Cop supports --auto-correct.
|
66
|
-
# Configuration parameters: EnforcedStyleForMultiline.
|
67
|
-
# SupportedStylesForMultiline: comma, consistent_comma, no_comma
|
68
|
-
Style/TrailingCommaInHashLiteral:
|
69
|
-
Exclude:
|
70
|
-
- 'spec/parse_date/int_from_string_spec.rb'
|
62
|
+
- 'spec/parse_date_spec.rb'
|
data/README.md
CHANGED
@@ -30,40 +30,127 @@ ParseDate has class methods for date string parsing.
|
|
30
30
|
```
|
31
31
|
require 'parse_date'
|
32
32
|
|
33
|
-
ParseDate.
|
34
|
-
ParseDate.
|
35
|
-
ParseDate.
|
36
|
-
ParseDate.
|
37
|
-
ParseDate.
|
38
|
-
ParseDate.
|
39
|
-
ParseDate.
|
40
|
-
ParseDate.
|
41
|
-
ParseDate.
|
42
|
-
ParseDate.
|
43
|
-
ParseDate.
|
44
|
-
ParseDate.
|
45
|
-
|
46
|
-
ParseDate.
|
47
|
-
ParseDate.
|
48
|
-
ParseDate.
|
49
|
-
ParseDate.
|
50
|
-
ParseDate.
|
51
|
-
ParseDate.
|
52
|
-
|
53
|
-
ParseDate.
|
54
|
-
ParseDate.
|
55
|
-
ParseDate.
|
56
|
-
ParseDate.
|
57
|
-
ParseDate.
|
58
|
-
ParseDate.
|
59
|
-
ParseDate.
|
60
|
-
ParseDate.
|
61
|
-
ParseDate.
|
62
|
-
ParseDate.
|
63
|
-
ParseDate.
|
64
|
-
ParseDate.
|
65
|
-
ParseDate.
|
66
|
-
ParseDate.
|
33
|
+
ParseDate.parse_range('12/25/00') # [2000]
|
34
|
+
ParseDate.parse_range('5-1-25') # [1925]
|
35
|
+
ParseDate.parse_range('1666 B.C.') # [-1666]
|
36
|
+
ParseDate.parse_range('-914') # [-914]
|
37
|
+
ParseDate.parse_range('[c1926]') # [1926]
|
38
|
+
ParseDate.parse_range('ca. 1558') # [1558]
|
39
|
+
ParseDate.parse_range('195-') # (1950..1959).to_a
|
40
|
+
ParseDate.parse_range('199u') # (1990..1999).to_a
|
41
|
+
ParseDate.parse_range('197?') # (1970..1979).to_a
|
42
|
+
ParseDate.parse_range('196x') # (1960..1969).to_a
|
43
|
+
ParseDate.parse_range('18th century CE') # (1700..1799).to_a
|
44
|
+
ParseDate.parse_range('17uu') # (1700..1799).to_a
|
45
|
+
ParseDate.parse_range('between 1694 and 1799') # (1694..1799).to_a
|
46
|
+
ParseDate.parse_range('between 1 and 5') # (1..5).to_a
|
47
|
+
ParseDate.parse_range('between 300 and 150 B.C.') # (-300..-150).to_a
|
48
|
+
ParseDate.parse_range('-5 - 3') # (-5..3).to_a
|
49
|
+
ParseDate.parse_range('1496-1499') # (1496..1499).to_a
|
50
|
+
ParseDate.parse_range('1750?-1867') # (1750..1867).to_a
|
51
|
+
ParseDate.parse_range('17--?-18--?') # (1700..1899).to_a
|
52
|
+
ParseDate.parse_range('1835 or 1836') # [1835, 1836]
|
53
|
+
ParseDate.parse_range('17-- or 18--?') # (1700..1899).to_a
|
54
|
+
ParseDate.parse_range('-2 or 1?') # (-2..1).to_a
|
55
|
+
ParseDate.parse_range('17th or 18th century?') # (1600..1799).to_a
|
56
|
+
ParseDate.parse_range('ca. 5th–6th century A.D.') # (400..599).to_a
|
57
|
+
ParseDate.parse_range('ca. 9th–8th century B.C.') # (-999..-800).to_a
|
58
|
+
ParseDate.parse_range('ca. 13th–12th century B.C.') # (-1399..-1200).to_a
|
59
|
+
ParseDate.parse_range('5th century B.C.') # (-599..-500).to_a
|
60
|
+
ParseDate.parse_range('1975 - 1905') # last year > first year, raises error
|
61
|
+
ParseDate.parse_range('-100 - -150') # last year > first year, raises error
|
62
|
+
ParseDate.parse_range('1975 or 1905') # last year > first year, raises error
|
63
|
+
ParseDate.parse_range('2050') # year later than current year + 1, raises error
|
64
|
+
ParseDate.parse_range('12345') # year later than current year + 1, raises error
|
65
|
+
ParseDate.parse_range('random text') # can't parse years, raises error
|
66
|
+
ParseDate.parse_range(nil) # can't parse years, raises error
|
67
|
+
|
68
|
+
ParseDate.earliest_year('12/25/00') # 2000
|
69
|
+
ParseDate.earliest_year('5-1-21') # 1921
|
70
|
+
ParseDate.earliest_year('1666 B.C.') # -1666
|
71
|
+
ParseDate.earliest_year('-914') # -914
|
72
|
+
ParseDate.earliest_year('[c1926]') # 1926
|
73
|
+
ParseDate.earliest_year('ca. 1558') # 1558
|
74
|
+
ParseDate.earliest_year('195-') # 1950
|
75
|
+
ParseDate.earliest_year('199u') # 1990
|
76
|
+
ParseDate.earliest_year('197?') # 1970
|
77
|
+
ParseDate.earliest_year('196x') # 1960
|
78
|
+
ParseDate.earliest_year('18th century CE') # 1700
|
79
|
+
ParseDate.earliest_year('17uu') # 1700
|
80
|
+
ParseDate.earliest_year('between 1694 and 1799') # 1694
|
81
|
+
ParseDate.earliest_year('between 1 and 5') # 1
|
82
|
+
ParseDate.earliest_year('between 300 and 150 B.C.') # -300
|
83
|
+
ParseDate.earliest_year('1496-1499') # 1496
|
84
|
+
ParseDate.earliest_year('1750?-1867') # 1750
|
85
|
+
ParseDate.earliest_year('17--?-18--?') # 1700
|
86
|
+
ParseDate.earliest_year('1835 or 1836') # 1835
|
87
|
+
ParseDate.earliest_year('17-- or 18--?') # 1700
|
88
|
+
ParseDate.earliest_year('17th or 18th century?') # 1600
|
89
|
+
ParseDate.earliest_year('ca. 5th–6th century A.D.') # 400
|
90
|
+
ParseDate.earliest_year('ca. 9th–8th century B.C.') # -999
|
91
|
+
ParseDate.earliest_year('ca. 13th–12th century B.C.') # -1399
|
92
|
+
ParseDate.earliest_year('5th century B.C.') # -599
|
93
|
+
|
94
|
+
ParseDate.latest_year('195-') # 1959
|
95
|
+
ParseDate.latest_year('199u') # 1999
|
96
|
+
ParseDate.latest_year('197?') # 1979
|
97
|
+
ParseDate.latest_year('196x') # 1969
|
98
|
+
ParseDate.latest_year('18th century CE') # 1799
|
99
|
+
ParseDate.latest_year('17uu') # 1799
|
100
|
+
ParseDate.latest_year('between 1694 and 1799') # 1799
|
101
|
+
ParseDate.latest_year('between 1 and 5') # 5
|
102
|
+
ParseDate.latest_year('between 300 and 150 B.C.') # -150
|
103
|
+
ParseDate.latest_year('1496-1499') # 1499
|
104
|
+
ParseDate.latest_year('1750?-1867') # 1867
|
105
|
+
ParseDate.latest_year('17--?-18--?') # 1899
|
106
|
+
ParseDate.latest_year('1757-58') # 1758
|
107
|
+
ParseDate.latest_year('1975-05') # 1975 (range invalid)
|
108
|
+
ParseDate.latest_year('1835 or 1836') # 1836
|
109
|
+
ParseDate.latest_year('17-- or 18--?') # 1899
|
110
|
+
ParseDate.latest_year('17th or 18th century?') # 1799
|
111
|
+
ParseDate.latest_year('ca. 5th–6th century A.D.') # 599
|
112
|
+
ParseDate.latest_year('ca. 9th–8th century B.C.') # -800
|
113
|
+
ParseDate.latest_year('ca. 13th–12th century B.C.') # -1200
|
114
|
+
ParseDate.latest_year('5th century B.C.') # -500
|
115
|
+
ParseDate.latest_year('-5 - 3') # 3,
|
116
|
+
|
117
|
+
ParseDate.range_array('1993', '1995') # [1993, 1994, 1995]
|
118
|
+
ParseDate.range_array(1993, 1995) # [1993, 1994, 1995]
|
119
|
+
ParseDate.range_array(0, '0001') # [0, 1]
|
120
|
+
ParseDate.range_array('-0003', '0000') # [-3, -2, -1, 0]
|
121
|
+
ParseDate.range_array(-1, 1) # [-1, 0, 1]
|
122
|
+
ParseDate.range_array(15, 15) # [15]
|
123
|
+
ParseDate.range_array(-100, '-99') # [-100, -99]
|
124
|
+
ParseDate.range_array('98', 101) # [98, 99, 100, 101]
|
125
|
+
ParseDate.range_array('word1', 'word2') # throws ArgumentError
|
126
|
+
ParseDate.range_array('1993', 1990) # throws StandardError - bad range
|
127
|
+
ParseDate.range_array('12345', 12345) # throws StandardError - bad range
|
128
|
+
|
129
|
+
ParseDate.year_range_valid?(1975, 1905) # false, first year > last year
|
130
|
+
ParseDate.year_range_valid?(-100, -150) # false, first year > last year
|
131
|
+
ParseDate.year_range_valid?(2050, 2070) # false, year later than current year + 1
|
132
|
+
ParseDate.year_range_valid?(2007, 2050) # false, year later than current year + 1
|
133
|
+
ParseDate.year_range_valid?(2007, 2009) # true
|
134
|
+
ParseDate.year_range_valid?(75, 150) # true
|
135
|
+
ParseDate.year_range_valid?(-3, 2) # true
|
136
|
+
ParseDate.year_range_valid?(-100, -50) # true
|
137
|
+
ParseDate.year_range_valid?(-1500, -1499) # true
|
138
|
+
ParseDate.year_range_valid?(-15000, -14999) # true
|
139
|
+
|
140
|
+
ParseDate.year_int_valid?(0) # true
|
141
|
+
ParseDate.year_int_valid?(5) # true
|
142
|
+
ParseDate.year_int_valid?(33) # true
|
143
|
+
ParseDate.year_int_valid?(150) # true
|
144
|
+
ParseDate.year_int_valid?(2019) # true
|
145
|
+
ParseDate.year_int_valid?(Date.today.year + 1) # true
|
146
|
+
ParseDate.year_int_valid?(-3) # true
|
147
|
+
ParseDate.year_int_valid?(-35) # true
|
148
|
+
ParseDate.year_int_valid?(-999) # true
|
149
|
+
ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
|
150
|
+
ParseDate.year_int_valid?(165x) # false
|
151
|
+
ParseDate.year_int_valid?(198-) # false
|
152
|
+
ParseDate.year_int_valid?('random text') # false
|
153
|
+
ParseDate.year_int_valid?(nil) # false
|
67
154
|
```
|
68
155
|
|
69
156
|
## Development
|
data/lib/parse_date.rb
CHANGED
@@ -26,15 +26,54 @@ class ParseDate
|
|
26
26
|
extend ParseDate::IntFromString
|
27
27
|
|
28
28
|
# class method delegation
|
29
|
-
def self.earliest_year(
|
30
|
-
ParseDate::IntFromString.earliest_year(
|
29
|
+
def self.earliest_year(date_str)
|
30
|
+
ParseDate::IntFromString.earliest_year(date_str)
|
31
31
|
end
|
32
32
|
|
33
|
-
def self.latest_year(
|
34
|
-
ParseDate::IntFromString.latest_year(
|
33
|
+
def self.latest_year(date_str)
|
34
|
+
ParseDate::IntFromString.latest_year(date_str)
|
35
35
|
end
|
36
36
|
|
37
|
-
def self.year_int_valid?(
|
38
|
-
ParseDate::IntFromString.year_int_valid?(
|
37
|
+
def self.year_int_valid?(date_str)
|
38
|
+
ParseDate::IntFromString.year_int_valid?(date_str)
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Array] array of Integer year values from earliest year to latest year, inclusive
|
42
|
+
def self.parse_range(date_str)
|
43
|
+
first = earliest_year(date_str)
|
44
|
+
last = latest_year(date_str)
|
45
|
+
raise ParseDate::Error, "Unable to parse range from '#{date_str}'" unless year_range_valid?(first, last)
|
46
|
+
|
47
|
+
range_array(first, last)
|
48
|
+
rescue StandardError => e
|
49
|
+
raise ParseDate::Error, "Unable to parse range from '#{date_str}': #{e.message}"
|
50
|
+
end
|
51
|
+
|
52
|
+
# true if:
|
53
|
+
# both years are not newer than (current year + 1)
|
54
|
+
# first_year <= last_year
|
55
|
+
# false otherwise
|
56
|
+
def self.year_range_valid?(first_year, last_year)
|
57
|
+
upper_bound = Date.today.year + 2
|
58
|
+
return false if first_year > upper_bound || last_year > upper_bound
|
59
|
+
return false if first_year > last_year
|
60
|
+
|
61
|
+
true
|
62
|
+
end
|
63
|
+
|
64
|
+
# @param [Integer, String] first_year, expecting integer or parseable string for .to_i
|
65
|
+
# @param [Integer, String] last_year, expecting integer or parseable string for .to_i
|
66
|
+
# @return [Array] array of Integer year values from first to last, inclusive
|
67
|
+
def self.range_array(first_year, last_year)
|
68
|
+
first_year = first_year.to_i if first_year.is_a?(String) && first_year.match?(/^-?\d+$/)
|
69
|
+
last_year = last_year.to_i if last_year.is_a?(String) && last_year.match?(/^-?\d+$/)
|
70
|
+
|
71
|
+
return [] unless last_year || first_year
|
72
|
+
return [first_year] if last_year.nil? && first_year
|
73
|
+
return [last_year] if first_year.nil? && last_year
|
74
|
+
raise(StandardError, "unable to create year range array from #{first_year}, #{last_year}") unless
|
75
|
+
year_range_valid?(first_year, last_year)
|
76
|
+
|
77
|
+
Range.new(first_year, last_year).to_a
|
39
78
|
end
|
40
79
|
end
|
@@ -7,7 +7,7 @@ class ParseDate
|
|
7
7
|
# Parse (Year) Integers from Date Strings
|
8
8
|
module IntFromString
|
9
9
|
|
10
|
-
# earliest year as Integer if we can parse one from
|
10
|
+
# earliest year as Integer if we can parse one from date_str
|
11
11
|
# e.g. if 17uu, result is 1700
|
12
12
|
# NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
|
13
13
|
# found in our actual date strings in stanford-mods records), then
|
@@ -15,25 +15,30 @@ class ParseDate
|
|
15
15
|
# 1/1/17 -> 2017
|
16
16
|
# 1/1/27 -> 1927
|
17
17
|
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
18
|
-
def self.earliest_year(
|
19
|
-
return
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
result ||= ParseDate.send(:
|
18
|
+
def self.earliest_year(date_str)
|
19
|
+
return unless date_str && !date_str.empty?
|
20
|
+
return if date_str == '0000-00-00' # shpc collection has these useless dates
|
21
|
+
|
22
|
+
# B.C. first (match longest string first)
|
23
|
+
return ParseDate.send(:earliest_century_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
|
24
|
+
return ParseDate.send(:between_bc_earliest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
|
25
|
+
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
|
26
|
+
|
27
|
+
result ||= ParseDate.send(:between_earliest_year, date_str)
|
28
|
+
result ||= ParseDate.send(:first_four_digits, date_str)
|
29
|
+
result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
|
30
|
+
result ||= ParseDate.send(:first_year_for_decade, date_str) # 198x or 201x
|
31
|
+
result ||= ParseDate.send(:first_year_for_century, date_str) # includes BC
|
32
|
+
result ||= ParseDate.send(:year_for_early_numeric, date_str)
|
28
33
|
unless result
|
29
34
|
# try removing brackets between digits in case we have 169[5] or [18]91
|
30
|
-
no_brackets = ParseDate.send(:remove_brackets,
|
35
|
+
no_brackets = ParseDate.send(:remove_brackets, date_str)
|
31
36
|
return earliest_year(no_brackets) if no_brackets
|
32
37
|
end
|
33
38
|
result.to_i if result && year_int_valid?(result.to_i)
|
34
39
|
end
|
35
40
|
|
36
|
-
# latest year as Integer if we can parse one from
|
41
|
+
# latest year as Integer if we can parse one from date_str
|
37
42
|
# e.g. if 17uu, result is 1799
|
38
43
|
# NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
|
39
44
|
# found in our actual date strings in stanford-mods records), then
|
@@ -41,29 +46,36 @@ class ParseDate
|
|
41
46
|
# 1/1/17 -> 2017
|
42
47
|
# 1/1/27 -> 1927
|
43
48
|
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
44
|
-
def self.latest_year(
|
45
|
-
return
|
46
|
-
|
47
|
-
|
48
|
-
#
|
49
|
-
return ParseDate.send(:
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
result ||= ParseDate.send(:
|
55
|
-
|
56
|
-
result ||= ParseDate.send(:
|
57
|
-
result ||= ParseDate.send(:
|
49
|
+
def self.latest_year(date_str)
|
50
|
+
return unless date_str && !date_str.empty?
|
51
|
+
return if date_str == '0000-00-00' # shpc collection has these useless dates
|
52
|
+
|
53
|
+
# B.C. first (match longest string first)
|
54
|
+
return ParseDate.send(:last_year_mult_centuries_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
|
55
|
+
return ParseDate.send(:between_bc_latest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
|
56
|
+
return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
|
57
|
+
return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
|
58
|
+
|
59
|
+
result ||= ParseDate.send(:between_latest_year, date_str)
|
60
|
+
result ||= ParseDate.send(:hyphen_4digit_latest_year, date_str)
|
61
|
+
result ||= ParseDate.send(:hyphen_2digit_latest_year, date_str)
|
62
|
+
result ||= ParseDate.send(:yyuu_after_hyphen, date_str)
|
63
|
+
result ||= ParseDate.send(:year_after_or, date_str)
|
64
|
+
result ||= ParseDate.send(:first_four_digits, date_str)
|
65
|
+
result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
|
66
|
+
result ||= ParseDate.send(:last_year_for_decade, date_str) # 198x or 201x
|
67
|
+
result ||= ParseDate.send(:last_year_mult_centuries, date_str) # nth-nth century
|
68
|
+
result ||= ParseDate.send(:last_year_for_century, date_str)
|
69
|
+
result ||= ParseDate.send(:last_year_for_early_numeric, date_str)
|
58
70
|
unless result
|
59
71
|
# try removing brackets between digits in case we have 169[5] or [18]91
|
60
|
-
no_brackets = ParseDate.send(:remove_brackets,
|
72
|
+
no_brackets = ParseDate.send(:remove_brackets, date_str)
|
61
73
|
return earliest_year(no_brackets) if no_brackets
|
62
74
|
end
|
63
75
|
result.to_i if result && year_int_valid?(result.to_i)
|
64
76
|
end
|
65
77
|
|
66
|
-
# true if the year is between -999 and (current year +
|
78
|
+
# true if the year is between -999 and (current year + 1), inclusive
|
67
79
|
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
|
68
80
|
def self.year_int_valid?(year)
|
69
81
|
return false unless year.is_a? Integer
|
@@ -73,18 +85,94 @@ class ParseDate
|
|
73
85
|
|
74
86
|
protected
|
75
87
|
|
76
|
-
|
88
|
+
REGEX_OPTS = Regexp::IGNORECASE | Regexp::MULTILINE
|
89
|
+
BC_REGEX = Regexp.new(/\s*B\.?\s*C\.?/im)
|
90
|
+
BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
|
77
91
|
|
78
92
|
# removes brackets between digits such as 169[5] or [18]91
|
79
|
-
def remove_brackets(
|
80
|
-
|
93
|
+
def remove_brackets(date_str)
|
94
|
+
date_str.delete('[]') if date_str.match(BRACKETS_BETWEEN_DIGITS_REGEX)
|
95
|
+
end
|
96
|
+
|
97
|
+
YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*-\s*(?<last>\d{4})\??/m)
|
98
|
+
|
99
|
+
# Integer value for latest year if we have "yyyy-yyyy" pattern
|
100
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
101
|
+
def hyphen_4digit_latest_year(date_str)
|
102
|
+
Regexp.last_match(:last).to_i if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
|
103
|
+
end
|
104
|
+
|
105
|
+
YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*-\s*(?<last>\d{2})\??([^-0-9].*)?$/)
|
106
|
+
|
107
|
+
# Integer value for latest year if we have "yyyy-yy" pattern
|
108
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
109
|
+
def hyphen_2digit_latest_year(date_str)
|
110
|
+
matches = date_str.match(YYYY_HYPHEN_YY_REGEX)
|
111
|
+
return unless matches
|
112
|
+
|
113
|
+
first = Regexp.last_match(:first)
|
114
|
+
century = first[0, 2]
|
115
|
+
last = "#{century}#{Regexp.last_match(:last)}"
|
116
|
+
last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
|
117
|
+
end
|
118
|
+
|
119
|
+
YYUU = '\\d{1,2}[u\\-]{2}'
|
120
|
+
YYuu_HYPHEN_YYuu_REGEX = Regexp.new("(?<first>#{YYUU})\\??\\s*-\\s*(?<last>#{YYUU})\\??([^u\\-]|$)??", REGEX_OPTS)
|
121
|
+
|
122
|
+
# Integer value for latest year if we have "yyuu-yyuu" pattern
|
123
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
124
|
+
def yyuu_after_hyphen(date_str)
|
125
|
+
last_year_for_century(Regexp.last_match(:last)).to_i if date_str.match(YYuu_HYPHEN_YYuu_REGEX)
|
126
|
+
end
|
127
|
+
|
128
|
+
YYXX = '\\d{1,2}[u\\-\\d]{2}'
|
129
|
+
YExx_OR_YExx_REGEX = Regexp.new("(?<first>#{YYXX})\\??\\s*or\\s*(?<last>#{YYXX})\\??([^u\\-]|$)??", REGEX_OPTS)
|
130
|
+
|
131
|
+
# Integer value for latest year if we have "yyyy or yyyy" pattern
|
132
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
133
|
+
def year_after_or(date_str)
|
134
|
+
latest_year(Regexp.last_match(:last)).to_i if date_str.match(YExx_OR_YExx_REGEX)
|
135
|
+
end
|
136
|
+
|
137
|
+
# NOTE: some actual data seemed to have a diff hyphen char. (slightly longer)
|
138
|
+
YY_YY_CENTURY_REGEX = Regexp.new(/(?<first>\d{1,2})[a-z]{2}?\s*(-|–|or)\s*(?<last>\d{1,2})[a-z]{2}?\s+centur.*/im)
|
139
|
+
|
140
|
+
# Integer value for latest year if we have nth-nth century pattern
|
141
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
142
|
+
def last_year_mult_centuries(date_str)
|
143
|
+
matches = date_str.match(YY_YY_CENTURY_REGEX)
|
144
|
+
return unless matches
|
145
|
+
|
146
|
+
nth = Regexp.last_match(:last).to_i
|
147
|
+
(nth - 1) * 100 + 99
|
148
|
+
end
|
149
|
+
|
150
|
+
YY_YY_CENTURY_BC_REGEX = Regexp.new("#{YY_YY_CENTURY_REGEX}#{BC_REGEX}", REGEX_OPTS)
|
151
|
+
|
152
|
+
# Integer value for earliest year if we have nth-nth century BC pattern
|
153
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
154
|
+
def earliest_century_bc(date_str)
|
155
|
+
matches = date_str.match(YY_YY_CENTURY_BC_REGEX)
|
156
|
+
return unless matches
|
157
|
+
|
158
|
+
nth = Regexp.last_match(:first).to_i
|
159
|
+
nth * -100 - 99
|
160
|
+
end
|
161
|
+
|
162
|
+
# Integer value for latest year if we have nth-nth century BC pattern
|
163
|
+
# @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
|
164
|
+
def last_year_mult_centuries_bc(date_str)
|
165
|
+
matches = date_str.match(YY_YY_CENTURY_BC_REGEX)
|
166
|
+
return unless matches
|
167
|
+
|
168
|
+
nth = Regexp.last_match(:last).to_i
|
169
|
+
nth * -100
|
81
170
|
end
|
82
171
|
|
83
|
-
# looks for 4 consecutive digits in
|
84
|
-
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if
|
85
|
-
def first_four_digits(
|
86
|
-
|
87
|
-
matches&.to_s
|
172
|
+
# looks for 4 consecutive digits in date_str and returns first occurrence if found
|
173
|
+
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str has yyyy, nil otherwise
|
174
|
+
def first_four_digits(date_str)
|
175
|
+
Regexp.last_match(1) if date_str.match(/(\d{4})([^\d]|$)/im)
|
88
176
|
end
|
89
177
|
|
90
178
|
# returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
|
@@ -92,16 +180,14 @@ class ParseDate
|
|
92
180
|
# we use 20 as century digits unless it is greater than current year:
|
93
181
|
# 1/1/17 -> 2017
|
94
182
|
# 1/1/27 -> 1927
|
95
|
-
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if
|
96
|
-
def year_from_mm_dd_yy(
|
97
|
-
|
98
|
-
|
99
|
-
slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
|
183
|
+
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str matches pattern, nil otherwise
|
184
|
+
def year_from_mm_dd_yy(date_str)
|
185
|
+
slash_matches = date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
|
100
186
|
if slash_matches
|
101
|
-
date_obj = Date.strptime(
|
187
|
+
date_obj = Date.strptime(date_str, '%m/%d/%y')
|
102
188
|
else
|
103
|
-
hyphen_matches =
|
104
|
-
date_obj = Date.strptime(
|
189
|
+
hyphen_matches = date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
|
190
|
+
date_obj = Date.strptime(date_str, '%m-%d-%y') if hyphen_matches
|
105
191
|
end
|
106
192
|
date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
|
107
193
|
date_obj.year.to_s if date_obj
|
@@ -109,66 +195,107 @@ class ParseDate
|
|
109
195
|
nil # explicitly want nil if date won't parse
|
110
196
|
end
|
111
197
|
|
112
|
-
|
198
|
+
DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]', REGEX_OPTS)
|
113
199
|
|
114
200
|
# first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
115
201
|
# note that these are the only decade patterns found in our actual date strings in MODS records
|
116
|
-
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if
|
117
|
-
def first_year_for_decade(
|
118
|
-
decade_matches =
|
202
|
+
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if date_str matches pattern, nil otherwise
|
203
|
+
def first_year_for_decade(date_str)
|
204
|
+
decade_matches = date_str.match(DECADE_4CHAR_REGEX)
|
119
205
|
changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
|
120
206
|
ParseDate.first_four_digits(changed_to_zero) if changed_to_zero
|
121
207
|
end
|
122
208
|
|
123
209
|
# last year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
124
210
|
# note that these are the only decade patterns found in our actual date strings in MODS records
|
125
|
-
# @return [String, nil] 4 digit year (e.g.
|
126
|
-
def last_year_for_decade(
|
127
|
-
decade_matches =
|
211
|
+
# @return [String, nil] 4 digit year (e.g. 1869, 1959) if date_str matches pattern, nil otherwise
|
212
|
+
def last_year_for_decade(date_str)
|
213
|
+
decade_matches = date_str.match(DECADE_4CHAR_REGEX)
|
128
214
|
changed_to_nine = decade_matches.to_s.tr('u\-?x', '9') if decade_matches
|
129
215
|
ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
|
130
216
|
end
|
131
217
|
|
132
|
-
|
133
|
-
|
218
|
+
CENTURY_WORD_REGEX = Regexp.new('(\d{1,2})[a-z]{2}?\s*century', REGEX_OPTS)
|
219
|
+
CENTURY_4CHAR_REGEX = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)', REGEX_OPTS)
|
220
|
+
BC_CENTURY_REGEX = Regexp.new("#{CENTURY_WORD_REGEX}\\s+#{BC_REGEX}", REGEX_OPTS)
|
221
|
+
|
222
|
+
# first year of century if we have: yyuu, yy--, yy--? or xxth century pattern; handles B.C.
|
223
|
+
# @return [Integer, nil] yy00 if date_str matches pattern, nil otherwise
|
224
|
+
def first_year_for_century(date_str)
|
225
|
+
return Regexp.last_match(1).to_i * -100 - 99 if date_str.match(BC_CENTURY_REGEX)
|
226
|
+
return Regexp.last_match(1).to_i * 100 if date_str.match(CENTURY_4CHAR_REGEX)
|
227
|
+
return (Regexp.last_match(:first).to_i - 1) * 100 if date_str.match(YY_YY_CENTURY_REGEX)
|
228
|
+
return (Regexp.last_match(1).to_i - 1) * 100 if date_str.match(CENTURY_WORD_REGEX)
|
229
|
+
end
|
230
|
+
|
231
|
+
# last year of century if we have: yyuu, yy--, yy--? or xxth century pattern
|
232
|
+
# @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
233
|
+
def last_year_for_century(date_str)
|
234
|
+
return Regexp.last_match(1).to_i * 100 + 99 if date_str.match(CENTURY_4CHAR_REGEX)
|
235
|
+
return (Regexp.last_match(1).to_i - 1) * 100 + 99 if date_str.match(CENTURY_WORD_REGEX)
|
236
|
+
end
|
237
|
+
|
238
|
+
# last year of century (as String) if we have: nth century BC
|
239
|
+
# @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
240
|
+
def last_year_for_bc_century(date_str)
|
241
|
+
Regexp.last_match(1).to_i * -100 if date_str.match(BC_CENTURY_REGEX)
|
242
|
+
end
|
243
|
+
|
244
|
+
BETWEEN_Yn_AND_Yn_REGEX = Regexp.new(/between\s+(?<first>\d{1,4})\??\s+and\s+(?<last>\d{1,4})\??/im)
|
134
245
|
|
135
|
-
#
|
136
|
-
#
|
137
|
-
# @return [
|
138
|
-
def
|
139
|
-
|
140
|
-
return if orig_date_str =~ /B\.C\./
|
141
|
-
return "#{Regexp.last_match(1)}00" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
142
|
-
return "#{(Regexp.last_match(1).to_i - 1).to_s}00" if orig_date_str.match(CENTURY_WORD_REGEXP)
|
246
|
+
# Integer value for earliest if we have "between y and y" pattern
|
247
|
+
# NOTE: must match for BC first with between_bc_earliest_year
|
248
|
+
# @return [Integer, nil] year if date_str matches pattern; nil otherwise
|
249
|
+
def between_earliest_year(date_str)
|
250
|
+
Regexp.last_match(:first).to_i if date_str.match(BETWEEN_Yn_AND_Yn_REGEX)
|
143
251
|
end
|
144
252
|
|
145
|
-
#
|
146
|
-
#
|
147
|
-
# @return [
|
148
|
-
def
|
149
|
-
|
150
|
-
|
151
|
-
|
253
|
+
# Integer value for latest year if we have "between y and y" pattern
|
254
|
+
# NOTE: must match for BC first with between_bc_latest_year
|
255
|
+
# @return [Integer, nil] year if date_str matches pattern; nil otherwise
|
256
|
+
def between_latest_year(date_str)
|
257
|
+
Regexp.last_match(:last).to_i if date_str.match(BETWEEN_Yn_AND_Yn_REGEX)
|
258
|
+
end
|
259
|
+
|
260
|
+
YEAR_BC_REGEX = Regexp.new("(\\d{1,4})#{BC_REGEX}", REGEX_OPTS)
|
152
261
|
|
153
|
-
|
154
|
-
|
262
|
+
# Integer value for B.C. if we have B.C. pattern
|
263
|
+
# @return [Integer, nil] -ddd if B.C. in pattern; nil otherwise
|
264
|
+
def year_int_for_bc(date_str)
|
265
|
+
"-#{Regexp.last_match(1)}".to_i if date_str.match(YEAR_BC_REGEX)
|
155
266
|
end
|
156
267
|
|
157
|
-
|
268
|
+
BETWEEN_Yn_AND_Yn_BC_REGEX = Regexp.new("#{BETWEEN_Yn_AND_Yn_REGEX}#{BC_REGEX}", REGEX_OPTS)
|
158
269
|
|
159
|
-
# Integer
|
160
|
-
# @return [Integer, nil]
|
161
|
-
def
|
162
|
-
|
163
|
-
"-#{Regexp.last_match(1)}".to_i if bc_matches
|
270
|
+
# Integer value for earliest year if we have "between y and y B.C." pattern
|
271
|
+
# @return [Integer, nil] -ddd if date_str matches pattern; nil otherwise
|
272
|
+
def between_bc_earliest_year(date_str)
|
273
|
+
"-#{Regexp.last_match(:first)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
|
164
274
|
end
|
165
275
|
|
166
|
-
|
276
|
+
# Integer value for latest year if we have "between y and y B.C." pattern
|
277
|
+
# @return [Integer, nil] -ddd if date_str matches pattern; nil otherwise
|
278
|
+
def between_bc_latest_year(date_str)
|
279
|
+
"-#{Regexp.last_match(:last)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
|
280
|
+
end
|
281
|
+
|
282
|
+
EARLY_NUMERIC_REGEX = Regexp.new('^\-?\d{1,3}([^\du\-\[]|$)', REGEX_OPTS)
|
283
|
+
|
284
|
+
# year if date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
|
285
|
+
# @return [Integer, nil] year if date_str matches pattern; nil otherwise
|
286
|
+
def year_for_early_numeric(date_str)
|
287
|
+
date_str.to_i if date_str.match(EARLY_NUMERIC_REGEX) || date_str =~ /^-\d{4}([^\du\-\[]|$)$/
|
288
|
+
end
|
289
|
+
|
290
|
+
FIRST_LAST_EARLY_NUMERIC_REGEX =
|
291
|
+
Regexp.new(/^(?<first>\-?\d{1,3})\??\s*(-|–|or)\s*(?<last>\-?\d{1,4})\??([^\du\-\[]|$)/im)
|
292
|
+
|
293
|
+
# Integer value for latest year if we have early numeric year range or single early numeric year
|
294
|
+
# @return [Integer, nil] year if date_str matches pattern; nil otherwise
|
295
|
+
def last_year_for_early_numeric(date_str)
|
296
|
+
return Regexp.last_match(:last).to_i if date_str.match(FIRST_LAST_EARLY_NUMERIC_REGEX)
|
167
297
|
|
168
|
-
|
169
|
-
# @return [String, nil] -ddd if orig_date_str matches pattern; nil otherwise
|
170
|
-
def year_for_early_numeric(orig_date_str)
|
171
|
-
orig_date_str if orig_date_str.match(EARLY_NUMERIC) || orig_date_str =~ /^-\d{4}$/
|
298
|
+
year_for_early_numeric(date_str) # if single year, not matched above
|
172
299
|
end
|
173
300
|
end
|
174
301
|
end
|
data/lib/parse_date/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zeitwerk
|