parse_date 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 446afc4299ca93634d6689b20d3a32bdb28460da5627701682733e18bb1f0a16
4
- data.tar.gz: b5f6bbceab8542cc3c977c1c1ea8a4e250f82a9ae3a1eed2b335050067145703
3
+ metadata.gz: 3927636e715e52bf2f38a8c0f3f384b06dc29971000c4b700a5e9f8a128c144b
4
+ data.tar.gz: 1f6b9fcb0f3bbf37f5d8417ee21c92ee19c87604394453dbe60a4f16b722e59c
5
5
  SHA512:
6
- metadata.gz: 052f5d35a64c52f5bd74af2e487b70c8b03c37252cce33554aa3e7d6ba8141d9f0edd7c719aa487dbddb2e465b76d2d1ab222f057e68c7e6fa322eef313a35e6
7
- data.tar.gz: 880e66962d42d0c3f2824510a94edcf9256cc6ba8298815721a2a720e8f9ef2158793820ff71fc3a22a0ad0f3397db3f8e2361753ee661494ed1371117562eb7
6
+ metadata.gz: c8d350764a5d813e5daa7e5abc0b17d9167b81b249322b52f181e7ea5ccba58f468883f30f6bdbd91e3a43f4da03270cd62581ea2e23ae5ac2a45261ecdf5e24
7
+ data.tar.gz: 6e421ee0b10f44f2cee021dc198cddb74a8640639b7c66ccb74f515ae891c4be6cadd47d6b5b12af1f4a9d6f3698f1fc3fe224c6be5aed03ce98eb5376c15d8f
data/.rubocop.yml CHANGED
@@ -13,7 +13,14 @@ Metrics/LineLength:
13
13
  Max: 120
14
14
 
15
15
  Metrics/MethodLength:
16
- Max: 15
16
+ Max: 22
17
+
18
+ Style/NumericLiterals:
19
+ Enabled: false
20
+
21
+ Style/TrailingCommaInHashLiteral:
22
+ Exclude:
23
+ - spec/**/*
17
24
 
18
25
  Style/WordArray:
19
26
  Enabled: false
data/.rubocop_todo.yml CHANGED
@@ -1,34 +1,33 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-10-09 15:35:45 -0700 using RuboCop version 0.74.0.
3
+ # on 2019-10-21 14:32:14 -0700 using RuboCop version 0.74.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 2
10
- # Cop supports --auto-correct.
11
- Lint/StringConversionInInterpolation:
12
- Exclude:
13
- - 'lib/parse_date/int_from_string.rb'
14
-
15
- # Offense count: 3
9
+ # Offense count: 5
16
10
  Metrics/AbcSize:
17
- Max: 18
11
+ Max: 37
18
12
 
19
- # Offense count: 4
13
+ # Offense count: 8
20
14
  # Configuration parameters: CountComments, ExcludedMethods.
21
15
  # ExcludedMethods: refine
22
16
  Metrics/BlockLength:
23
- Max: 561
17
+ Max: 812
24
18
 
25
- # Offense count: 3
19
+ # Offense count: 4
26
20
  Metrics/CyclomaticComplexity:
27
- Max: 8
21
+ Max: 12
28
22
 
29
23
  # Offense count: 1
24
+ # Configuration parameters: CountComments.
25
+ Metrics/ModuleLength:
26
+ Max: 168
27
+
28
+ # Offense count: 4
30
29
  Metrics/PerceivedComplexity:
31
- Max: 9
30
+ Max: 12
32
31
 
33
32
  # Offense count: 2
34
33
  Style/Documentation:
@@ -53,18 +52,11 @@ Style/RegexpLiteral:
53
52
  Exclude:
54
53
  - 'lib/parse_date/int_from_string.rb'
55
54
 
56
- # Offense count: 1
55
+ # Offense count: 3
57
56
  # Cop supports --auto-correct.
58
57
  # Configuration parameters: EnforcedStyleForMultiline.
59
58
  # SupportedStylesForMultiline: comma, consistent_comma, no_comma
60
59
  Style/TrailingCommaInArrayLiteral:
61
60
  Exclude:
62
61
  - 'spec/parse_date/int_from_string_spec.rb'
63
-
64
- # Offense count: 1
65
- # Cop supports --auto-correct.
66
- # Configuration parameters: EnforcedStyleForMultiline.
67
- # SupportedStylesForMultiline: comma, consistent_comma, no_comma
68
- Style/TrailingCommaInHashLiteral:
69
- Exclude:
70
- - 'spec/parse_date/int_from_string_spec.rb'
62
+ - 'spec/parse_date_spec.rb'
data/README.md CHANGED
@@ -30,40 +30,127 @@ ParseDate has class methods for date string parsing.
30
30
  ```
31
31
  require 'parse_date'
32
32
 
33
- ParseDate.earliest_year('12/25/00') # 2000
34
- ParseDate.earliest_year('5-1-21') # 1921
35
- ParseDate.earliest_year('1666 B.C.') # -1666
36
- ParseDate.earliest_year('-914') # -914
37
- ParseDate.earliest_year('[c1926]') # 1926
38
- ParseDate.earliest_year('ca. 1558') # 1558
39
- ParseDate.earliest_year('195-') # 1950
40
- ParseDate.earliest_year('199u') # 1990
41
- ParseDate.earliest_year('197?') # 1970
42
- ParseDate.earliest_year('196x') # 1960
43
- ParseDate.earliest_year('18th century CE') # 1700
44
- ParseDate.earliest_year('17uu') # 1700
45
-
46
- ParseDate.latest_year('195-') # 1959
47
- ParseDate.latest_year('199u') # 1999
48
- ParseDate.latest_year('197?') # 1979
49
- ParseDate.latest_year('196x') # 1969
50
- ParseDate.latest_year('18th century CE') # 1799
51
- ParseDate.latest_year('17uu') # 1799
52
-
53
- ParseDate.year_int_valid?(0) # true
54
- ParseDate.year_int_valid?(5) # true
55
- ParseDate.year_int_valid?(33) # true
56
- ParseDate.year_int_valid?(150) # true
57
- ParseDate.year_int_valid?(2019) # true
58
- ParseDate.year_int_valid?(Date.today.year + 1) # true
59
- ParseDate.year_int_valid?(-3) # true
60
- ParseDate.year_int_valid?(-35) # true
61
- ParseDate.year_int_valid?(-999) # true
62
- ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
63
- ParseDate.year_int_valid?(165x) # false
64
- ParseDate.year_int_valid?(198-) # false
65
- ParseDate.year_int_valid?('random text') # false
66
- ParseDate.year_int_valid?(nil) # false
33
+ ParseDate.parse_range('12/25/00') # [2000]
34
+ ParseDate.parse_range('5-1-25') # [1925]
35
+ ParseDate.parse_range('1666 B.C.') # [-1666]
36
+ ParseDate.parse_range('-914') # [-914]
37
+ ParseDate.parse_range('[c1926]') # [1926]
38
+ ParseDate.parse_range('ca. 1558') # [1558]
39
+ ParseDate.parse_range('195-') # (1950..1959).to_a
40
+ ParseDate.parse_range('199u') # (1990..1999).to_a
41
+ ParseDate.parse_range('197?') # (1970..1979).to_a
42
+ ParseDate.parse_range('196x') # (1960..1969).to_a
43
+ ParseDate.parse_range('18th century CE') # (1700..1799).to_a
44
+ ParseDate.parse_range('17uu') # (1700..1799).to_a
45
+ ParseDate.parse_range('between 1694 and 1799') # (1694..1799).to_a
46
+ ParseDate.parse_range('between 1 and 5') # (1..5).to_a
47
+ ParseDate.parse_range('between 300 and 150 B.C.') # (-300..-150).to_a
48
+ ParseDate.parse_range('-5 - 3') # (-5..3).to_a
49
+ ParseDate.parse_range('1496-1499') # (1496..1499).to_a
50
+ ParseDate.parse_range('1750?-1867') # (1750..1867).to_a
51
+ ParseDate.parse_range('17--?-18--?') # (1700..1899).to_a
52
+ ParseDate.parse_range('1835 or 1836') # [1835, 1836]
53
+ ParseDate.parse_range('17-- or 18--?') # (1700..1899).to_a
54
+ ParseDate.parse_range('-2 or 1?') # (-2..1).to_a
55
+ ParseDate.parse_range('17th or 18th century?') # (1600..1799).to_a
56
+ ParseDate.parse_range('ca. 5th–6th century A.D.') # (400..599).to_a
57
+ ParseDate.parse_range('ca. 9th–8th century B.C.') # (-999..-800).to_a
58
+ ParseDate.parse_range('ca. 13th–12th century B.C.') # (-1399..-1200).to_a
59
+ ParseDate.parse_range('5th century B.C.') # (-599..-500).to_a
60
+ ParseDate.parse_range('1975 - 1905') # last year > first year, raises error
61
+ ParseDate.parse_range('-100 - -150') # last year > first year, raises error
62
+ ParseDate.parse_range('1975 or 1905') # last year > first year, raises error
63
+ ParseDate.parse_range('2050') # year later than current year + 1, raises error
64
+ ParseDate.parse_range('12345') # year later than current year + 1, raises error
65
+ ParseDate.parse_range('random text') # can't parse years, raises error
66
+ ParseDate.parse_range(nil) # can't parse years, raises error
67
+
68
+ ParseDate.earliest_year('12/25/00') # 2000
69
+ ParseDate.earliest_year('5-1-21') # 1921
70
+ ParseDate.earliest_year('1666 B.C.') # -1666
71
+ ParseDate.earliest_year('-914') # -914
72
+ ParseDate.earliest_year('[c1926]') # 1926
73
+ ParseDate.earliest_year('ca. 1558') # 1558
74
+ ParseDate.earliest_year('195-') # 1950
75
+ ParseDate.earliest_year('199u') # 1990
76
+ ParseDate.earliest_year('197?') # 1970
77
+ ParseDate.earliest_year('196x') # 1960
78
+ ParseDate.earliest_year('18th century CE') # 1700
79
+ ParseDate.earliest_year('17uu') # 1700
80
+ ParseDate.earliest_year('between 1694 and 1799') # 1694
81
+ ParseDate.earliest_year('between 1 and 5') # 1
82
+ ParseDate.earliest_year('between 300 and 150 B.C.') # -300
83
+ ParseDate.earliest_year('1496-1499') # 1496
84
+ ParseDate.earliest_year('1750?-1867') # 1750
85
+ ParseDate.earliest_year('17--?-18--?') # 1700
86
+ ParseDate.earliest_year('1835 or 1836') # 1835
87
+ ParseDate.earliest_year('17-- or 18--?') # 1700
88
+ ParseDate.earliest_year('17th or 18th century?') # 1600
89
+ ParseDate.earliest_year('ca. 5th–6th century A.D.') # 400
90
+ ParseDate.earliest_year('ca. 9th–8th century B.C.') # -999
91
+ ParseDate.earliest_year('ca. 13th–12th century B.C.') # -1399
92
+ ParseDate.earliest_year('5th century B.C.') # -599
93
+
94
+ ParseDate.latest_year('195-') # 1959
95
+ ParseDate.latest_year('199u') # 1999
96
+ ParseDate.latest_year('197?') # 1979
97
+ ParseDate.latest_year('196x') # 1969
98
+ ParseDate.latest_year('18th century CE') # 1799
99
+ ParseDate.latest_year('17uu') # 1799
100
+ ParseDate.latest_year('between 1694 and 1799') # 1799
101
+ ParseDate.latest_year('between 1 and 5') # 5
102
+ ParseDate.latest_year('between 300 and 150 B.C.') # -150
103
+ ParseDate.latest_year('1496-1499') # 1499
104
+ ParseDate.latest_year('1750?-1867') # 1867
105
+ ParseDate.latest_year('17--?-18--?') # 1899
106
+ ParseDate.latest_year('1757-58') # 1758
107
+ ParseDate.latest_year('1975-05') # 1975 (range invalid)
108
+ ParseDate.latest_year('1835 or 1836') # 1836
109
+ ParseDate.latest_year('17-- or 18--?') # 1899
110
+ ParseDate.latest_year('17th or 18th century?') # 1799
111
+ ParseDate.latest_year('ca. 5th–6th century A.D.') # 599
112
+ ParseDate.latest_year('ca. 9th–8th century B.C.') # -800
113
+ ParseDate.latest_year('ca. 13th–12th century B.C.') # -1200
114
+ ParseDate.latest_year('5th century B.C.') # -500
115
+ ParseDate.latest_year('-5 - 3') # 3,
116
+
117
+ ParseDate.range_array('1993', '1995') # [1993, 1994, 1995]
118
+ ParseDate.range_array(1993, 1995) # [1993, 1994, 1995]
119
+ ParseDate.range_array(0, '0001') # [0, 1]
120
+ ParseDate.range_array('-0003', '0000') # [-3, -2, -1, 0]
121
+ ParseDate.range_array(-1, 1) # [-1, 0, 1]
122
+ ParseDate.range_array(15, 15) # [15]
123
+ ParseDate.range_array(-100, '-99') # [-100, -99]
124
+ ParseDate.range_array('98', 101) # [98, 99, 100, 101]
125
+ ParseDate.range_array('word1', 'word2') # throws ArgumentError
126
+ ParseDate.range_array('1993', 1990) # throws StandardError - bad range
127
+ ParseDate.range_array('12345', 12345) # throws StandardError - bad range
128
+
129
+ ParseDate.year_range_valid?(1975, 1905) # false, first year > last year
130
+ ParseDate.year_range_valid?(-100, -150) # false, first year > last year
131
+ ParseDate.year_range_valid?(2050, 2070) # false, year later than current year + 1
132
+ ParseDate.year_range_valid?(2007, 2050) # false, year later than current year + 1
133
+ ParseDate.year_range_valid?(2007, 2009) # true
134
+ ParseDate.year_range_valid?(75, 150) # true
135
+ ParseDate.year_range_valid?(-3, 2) # true
136
+ ParseDate.year_range_valid?(-100, -50) # true
137
+ ParseDate.year_range_valid?(-1500, -1499) # true
138
+ ParseDate.year_range_valid?(-15000, -14999) # true
139
+
140
+ ParseDate.year_int_valid?(0) # true
141
+ ParseDate.year_int_valid?(5) # true
142
+ ParseDate.year_int_valid?(33) # true
143
+ ParseDate.year_int_valid?(150) # true
144
+ ParseDate.year_int_valid?(2019) # true
145
+ ParseDate.year_int_valid?(Date.today.year + 1) # true
146
+ ParseDate.year_int_valid?(-3) # true
147
+ ParseDate.year_int_valid?(-35) # true
148
+ ParseDate.year_int_valid?(-999) # true
149
+ ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
150
+ ParseDate.year_int_valid?(165x) # false
151
+ ParseDate.year_int_valid?(198-) # false
152
+ ParseDate.year_int_valid?('random text') # false
153
+ ParseDate.year_int_valid?(nil) # false
67
154
  ```
68
155
 
69
156
  ## Development
data/lib/parse_date.rb CHANGED
@@ -26,15 +26,54 @@ class ParseDate
26
26
  extend ParseDate::IntFromString
27
27
 
28
28
  # class method delegation
29
- def self.earliest_year(orig_date_str)
30
- ParseDate::IntFromString.earliest_year(orig_date_str)
29
+ def self.earliest_year(date_str)
30
+ ParseDate::IntFromString.earliest_year(date_str)
31
31
  end
32
32
 
33
- def self.latest_year(orig_date_str)
34
- ParseDate::IntFromString.latest_year(orig_date_str)
33
+ def self.latest_year(date_str)
34
+ ParseDate::IntFromString.latest_year(date_str)
35
35
  end
36
36
 
37
- def self.year_int_valid?(orig_date_str)
38
- ParseDate::IntFromString.year_int_valid?(orig_date_str)
37
+ def self.year_int_valid?(date_str)
38
+ ParseDate::IntFromString.year_int_valid?(date_str)
39
+ end
40
+
41
+ # @return [Array] array of Integer year values from earliest year to latest year, inclusive
42
+ def self.parse_range(date_str)
43
+ first = earliest_year(date_str)
44
+ last = latest_year(date_str)
45
+ raise ParseDate::Error, "Unable to parse range from '#{date_str}'" unless year_range_valid?(first, last)
46
+
47
+ range_array(first, last)
48
+ rescue StandardError => e
49
+ raise ParseDate::Error, "Unable to parse range from '#{date_str}': #{e.message}"
50
+ end
51
+
52
+ # true if:
53
+ # both years are not newer than (current year + 1)
54
+ # first_year <= last_year
55
+ # false otherwise
56
+ def self.year_range_valid?(first_year, last_year)
57
+ upper_bound = Date.today.year + 2
58
+ return false if first_year > upper_bound || last_year > upper_bound
59
+ return false if first_year > last_year
60
+
61
+ true
62
+ end
63
+
64
+ # @param [Integer, String] first_year, expecting integer or parseable string for .to_i
65
+ # @param [Integer, String] last_year, expecting integer or parseable string for .to_i
66
+ # @return [Array] array of Integer year values from first to last, inclusive
67
+ def self.range_array(first_year, last_year)
68
+ first_year = first_year.to_i if first_year.is_a?(String) && first_year.match?(/^-?\d+$/)
69
+ last_year = last_year.to_i if last_year.is_a?(String) && last_year.match?(/^-?\d+$/)
70
+
71
+ return [] unless last_year || first_year
72
+ return [first_year] if last_year.nil? && first_year
73
+ return [last_year] if first_year.nil? && last_year
74
+ raise(StandardError, "unable to create year range array from #{first_year}, #{last_year}") unless
75
+ year_range_valid?(first_year, last_year)
76
+
77
+ Range.new(first_year, last_year).to_a
39
78
  end
40
79
  end
@@ -7,7 +7,7 @@ class ParseDate
7
7
  # Parse (Year) Integers from Date Strings
8
8
  module IntFromString
9
9
 
10
- # earliest year as Integer if we can parse one from orig_date_str
10
+ # earliest year as Integer if we can parse one from date_str
11
11
  # e.g. if 17uu, result is 1700
12
12
  # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
13
13
  # found in our actual date strings in stanford-mods records), then
@@ -15,25 +15,30 @@ class ParseDate
15
15
  # 1/1/17 -> 2017
16
16
  # 1/1/27 -> 1927
17
17
  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
18
- def self.earliest_year(orig_date_str)
19
- return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
20
- # B.C. first in case there are 4 digits, e.g. 1600 B.C.
21
- return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
22
-
23
- result = ParseDate.send(:first_four_digits, orig_date_str)
24
- result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
25
- result ||= ParseDate.send(:first_year_for_decade, orig_date_str) # 19xx or 20xx
26
- result ||= ParseDate.send(:first_year_for_century, orig_date_str)
27
- result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
18
+ def self.earliest_year(date_str)
19
+ return unless date_str && !date_str.empty?
20
+ return if date_str == '0000-00-00' # shpc collection has these useless dates
21
+
22
+ # B.C. first (match longest string first)
23
+ return ParseDate.send(:earliest_century_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
24
+ return ParseDate.send(:between_bc_earliest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
25
+ return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
26
+
27
+ result ||= ParseDate.send(:between_earliest_year, date_str)
28
+ result ||= ParseDate.send(:first_four_digits, date_str)
29
+ result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
30
+ result ||= ParseDate.send(:first_year_for_decade, date_str) # 198x or 201x
31
+ result ||= ParseDate.send(:first_year_for_century, date_str) # includes BC
32
+ result ||= ParseDate.send(:year_for_early_numeric, date_str)
28
33
  unless result
29
34
  # try removing brackets between digits in case we have 169[5] or [18]91
30
- no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
35
+ no_brackets = ParseDate.send(:remove_brackets, date_str)
31
36
  return earliest_year(no_brackets) if no_brackets
32
37
  end
33
38
  result.to_i if result && year_int_valid?(result.to_i)
34
39
  end
35
40
 
36
- # latest year as Integer if we can parse one from orig_date_str
41
+ # latest year as Integer if we can parse one from date_str
37
42
  # e.g. if 17uu, result is 1799
38
43
  # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
39
44
  # found in our actual date strings in stanford-mods records), then
@@ -41,29 +46,36 @@ class ParseDate
41
46
  # 1/1/17 -> 2017
42
47
  # 1/1/27 -> 1927
43
48
  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
44
- def self.latest_year(orig_date_str)
45
- return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
46
-
47
- # B.C. first in case there are 4 digits, e.g. 1600 B.C.
48
- # NOTE: may want to parse for last occurence of 4 consecutive digits
49
- return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
50
-
51
- # NOTE: may want to parse for last occurence of 4 consecutive digits
52
- result = ParseDate.send(:first_four_digits, orig_date_str)
53
- result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
54
- result ||= ParseDate.send(:last_year_for_decade, orig_date_str) # 19xx or 20xx
55
- # NOTE: may want to parse for last occurence of consecutive digits
56
- result ||= ParseDate.send(:last_year_for_century, orig_date_str)
57
- result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
49
+ def self.latest_year(date_str)
50
+ return unless date_str && !date_str.empty?
51
+ return if date_str == '0000-00-00' # shpc collection has these useless dates
52
+
53
+ # B.C. first (match longest string first)
54
+ return ParseDate.send(:last_year_mult_centuries_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
55
+ return ParseDate.send(:between_bc_latest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
56
+ return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
57
+ return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
58
+
59
+ result ||= ParseDate.send(:between_latest_year, date_str)
60
+ result ||= ParseDate.send(:hyphen_4digit_latest_year, date_str)
61
+ result ||= ParseDate.send(:hyphen_2digit_latest_year, date_str)
62
+ result ||= ParseDate.send(:yyuu_after_hyphen, date_str)
63
+ result ||= ParseDate.send(:year_after_or, date_str)
64
+ result ||= ParseDate.send(:first_four_digits, date_str)
65
+ result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
66
+ result ||= ParseDate.send(:last_year_for_decade, date_str) # 198x or 201x
67
+ result ||= ParseDate.send(:last_year_mult_centuries, date_str) # nth-nth century
68
+ result ||= ParseDate.send(:last_year_for_century, date_str)
69
+ result ||= ParseDate.send(:last_year_for_early_numeric, date_str)
58
70
  unless result
59
71
  # try removing brackets between digits in case we have 169[5] or [18]91
60
- no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
72
+ no_brackets = ParseDate.send(:remove_brackets, date_str)
61
73
  return earliest_year(no_brackets) if no_brackets
62
74
  end
63
75
  result.to_i if result && year_int_valid?(result.to_i)
64
76
  end
65
77
 
66
- # true if the year is between -999 and (current year + 2)
78
+ # true if the year is between -999 and (current year + 1), inclusive
67
79
  # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
68
80
  def self.year_int_valid?(year)
69
81
  return false unless year.is_a? Integer
@@ -73,18 +85,94 @@ class ParseDate
73
85
 
74
86
  protected
75
87
 
76
- BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
88
+ REGEX_OPTS = Regexp::IGNORECASE | Regexp::MULTILINE
89
+ BC_REGEX = Regexp.new(/\s*B\.?\s*C\.?/im)
90
+ BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
77
91
 
78
92
  # removes brackets between digits such as 169[5] or [18]91
79
- def remove_brackets(orig_date_str)
80
- orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
93
+ def remove_brackets(date_str)
94
+ date_str.delete('[]') if date_str.match(BRACKETS_BETWEEN_DIGITS_REGEX)
95
+ end
96
+
97
+ YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*-\s*(?<last>\d{4})\??/m)
98
+
99
+ # Integer value for latest year if we have "yyyy-yyyy" pattern
100
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
101
+ def hyphen_4digit_latest_year(date_str)
102
+ Regexp.last_match(:last).to_i if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
103
+ end
104
+
105
+ YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*-\s*(?<last>\d{2})\??([^-0-9].*)?$/)
106
+
107
+ # Integer value for latest year if we have "yyyy-yy" pattern
108
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
109
+ def hyphen_2digit_latest_year(date_str)
110
+ matches = date_str.match(YYYY_HYPHEN_YY_REGEX)
111
+ return unless matches
112
+
113
+ first = Regexp.last_match(:first)
114
+ century = first[0, 2]
115
+ last = "#{century}#{Regexp.last_match(:last)}"
116
+ last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
117
+ end
118
+
119
+ YYUU = '\\d{1,2}[u\\-]{2}'
120
+ YYuu_HYPHEN_YYuu_REGEX = Regexp.new("(?<first>#{YYUU})\\??\\s*-\\s*(?<last>#{YYUU})\\??([^u\\-]|$)??", REGEX_OPTS)
121
+
122
+ # Integer value for latest year if we have "yyuu-yyuu" pattern
123
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
124
+ def yyuu_after_hyphen(date_str)
125
+ last_year_for_century(Regexp.last_match(:last)).to_i if date_str.match(YYuu_HYPHEN_YYuu_REGEX)
126
+ end
127
+
128
+ YYXX = '\\d{1,2}[u\\-\\d]{2}'
129
+ YExx_OR_YExx_REGEX = Regexp.new("(?<first>#{YYXX})\\??\\s*or\\s*(?<last>#{YYXX})\\??([^u\\-]|$)??", REGEX_OPTS)
130
+
131
+ # Integer value for latest year if we have "yyyy or yyyy" pattern
132
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
133
+ def year_after_or(date_str)
134
+ latest_year(Regexp.last_match(:last)).to_i if date_str.match(YExx_OR_YExx_REGEX)
135
+ end
136
+
137
+ # NOTE: some actual data seemed to have a diff hyphen char. (slightly longer)
138
+ YY_YY_CENTURY_REGEX = Regexp.new(/(?<first>\d{1,2})[a-z]{2}?\s*(-|–|or)\s*(?<last>\d{1,2})[a-z]{2}?\s+centur.*/im)
139
+
140
+ # Integer value for latest year if we have nth-nth century pattern
141
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
142
+ def last_year_mult_centuries(date_str)
143
+ matches = date_str.match(YY_YY_CENTURY_REGEX)
144
+ return unless matches
145
+
146
+ nth = Regexp.last_match(:last).to_i
147
+ (nth - 1) * 100 + 99
148
+ end
149
+
150
+ YY_YY_CENTURY_BC_REGEX = Regexp.new("#{YY_YY_CENTURY_REGEX}#{BC_REGEX}", REGEX_OPTS)
151
+
152
+ # Integer value for earliest year if we have nth-nth century BC pattern
153
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
154
+ def earliest_century_bc(date_str)
155
+ matches = date_str.match(YY_YY_CENTURY_BC_REGEX)
156
+ return unless matches
157
+
158
+ nth = Regexp.last_match(:first).to_i
159
+ nth * -100 - 99
160
+ end
161
+
162
+ # Integer value for latest year if we have nth-nth century BC pattern
163
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
164
+ def last_year_mult_centuries_bc(date_str)
165
+ matches = date_str.match(YY_YY_CENTURY_BC_REGEX)
166
+ return unless matches
167
+
168
+ nth = Regexp.last_match(:last).to_i
169
+ nth * -100
81
170
  end
82
171
 
83
- # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
84
- # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
85
- def first_four_digits(orig_date_str)
86
- matches = orig_date_str.match(/\d{4}/) if orig_date_str
87
- matches&.to_s
172
+ # looks for 4 consecutive digits in date_str and returns first occurrence if found
173
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str has yyyy, nil otherwise
174
+ def first_four_digits(date_str)
175
+ Regexp.last_match(1) if date_str.match(/(\d{4})([^\d]|$)/im)
88
176
  end
89
177
 
90
178
  # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
@@ -92,16 +180,14 @@ class ParseDate
92
180
  # we use 20 as century digits unless it is greater than current year:
93
181
  # 1/1/17 -> 2017
94
182
  # 1/1/27 -> 1927
95
- # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
96
- def year_from_mm_dd_yy(orig_date_str)
97
- return unless orig_date_str
98
-
99
- slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
183
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str matches pattern, nil otherwise
184
+ def year_from_mm_dd_yy(date_str)
185
+ slash_matches = date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
100
186
  if slash_matches
101
- date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
187
+ date_obj = Date.strptime(date_str, '%m/%d/%y')
102
188
  else
103
- hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
104
- date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
189
+ hyphen_matches = date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
190
+ date_obj = Date.strptime(date_str, '%m-%d-%y') if hyphen_matches
105
191
  end
106
192
  date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
107
193
  date_obj.year.to_s if date_obj
@@ -109,66 +195,107 @@ class ParseDate
109
195
  nil # explicitly want nil if date won't parse
110
196
  end
111
197
 
112
- DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
198
+ DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]', REGEX_OPTS)
113
199
 
114
200
  # first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
115
201
  # note that these are the only decade patterns found in our actual date strings in MODS records
116
- # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
117
- def first_year_for_decade(orig_date_str)
118
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
202
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if date_str matches pattern, nil otherwise
203
+ def first_year_for_decade(date_str)
204
+ decade_matches = date_str.match(DECADE_4CHAR_REGEX)
119
205
  changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
120
206
  ParseDate.first_four_digits(changed_to_zero) if changed_to_zero
121
207
  end
122
208
 
123
209
  # last year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
124
210
  # note that these are the only decade patterns found in our actual date strings in MODS records
125
- # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
126
- def last_year_for_decade(orig_date_str)
127
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
211
+ # @return [String, nil] 4 digit year (e.g. 1869, 1959) if date_str matches pattern, nil otherwise
212
+ def last_year_for_decade(date_str)
213
+ decade_matches = date_str.match(DECADE_4CHAR_REGEX)
128
214
  changed_to_nine = decade_matches.to_s.tr('u\-?x', '9') if decade_matches
129
215
  ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
130
216
  end
131
217
 
132
- CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
133
- CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
218
+ CENTURY_WORD_REGEX = Regexp.new('(\d{1,2})[a-z]{2}?\s*century', REGEX_OPTS)
219
+ CENTURY_4CHAR_REGEX = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)', REGEX_OPTS)
220
+ BC_CENTURY_REGEX = Regexp.new("#{CENTURY_WORD_REGEX}\\s+#{BC_REGEX}", REGEX_OPTS)
221
+
222
+ # first year of century if we have: yyuu, yy--, yy--? or xxth century pattern; handles B.C.
223
+ # @return [Integer, nil] yy00 if date_str matches pattern, nil otherwise
224
+ def first_year_for_century(date_str)
225
+ return Regexp.last_match(1).to_i * -100 - 99 if date_str.match(BC_CENTURY_REGEX)
226
+ return Regexp.last_match(1).to_i * 100 if date_str.match(CENTURY_4CHAR_REGEX)
227
+ return (Regexp.last_match(:first).to_i - 1) * 100 if date_str.match(YY_YY_CENTURY_REGEX)
228
+ return (Regexp.last_match(1).to_i - 1) * 100 if date_str.match(CENTURY_WORD_REGEX)
229
+ end
230
+
231
+ # last year of century if we have: yyuu, yy--, yy--? or xxth century pattern
232
+ # @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
233
+ def last_year_for_century(date_str)
234
+ return Regexp.last_match(1).to_i * 100 + 99 if date_str.match(CENTURY_4CHAR_REGEX)
235
+ return (Regexp.last_match(1).to_i - 1) * 100 + 99 if date_str.match(CENTURY_WORD_REGEX)
236
+ end
237
+
238
+ # last year of century (as String) if we have: nth century BC
239
+ # @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
240
+ def last_year_for_bc_century(date_str)
241
+ Regexp.last_match(1).to_i * -100 if date_str.match(BC_CENTURY_REGEX)
242
+ end
243
+
244
+ BETWEEN_Yn_AND_Yn_REGEX = Regexp.new(/between\s+(?<first>\d{1,4})\??\s+and\s+(?<last>\d{1,4})\??/im)
134
245
 
135
- # first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
136
- # note that these are the only century patterns found in our actual date strings in MODS records
137
- # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
138
- def first_year_for_century(orig_date_str)
139
- return unless orig_date_str
140
- return if orig_date_str =~ /B\.C\./
141
- return "#{Regexp.last_match(1)}00" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
142
- return "#{(Regexp.last_match(1).to_i - 1).to_s}00" if orig_date_str.match(CENTURY_WORD_REGEXP)
246
+ # Integer value for earliest if we have "between y and y" pattern
247
+ # NOTE: must match for BC first with between_bc_earliest_year
248
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
249
+ def between_earliest_year(date_str)
250
+ Regexp.last_match(:first).to_i if date_str.match(BETWEEN_Yn_AND_Yn_REGEX)
143
251
  end
144
252
 
145
- # last year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
146
- # note that these are the only century patterns found in our actual date strings in MODS records
147
- # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
148
- def last_year_for_century(orig_date_str)
149
- return unless orig_date_str
150
- return if orig_date_str =~ /B\.C\./
151
- return "#{Regexp.last_match(1)}99" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
253
+ # Integer value for latest year if we have "between y and y" pattern
254
+ # NOTE: must match for BC first with between_bc_latest_year
255
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
256
+ def between_latest_year(date_str)
257
+ Regexp.last_match(:last).to_i if date_str.match(BETWEEN_Yn_AND_Yn_REGEX)
258
+ end
259
+
260
+ YEAR_BC_REGEX = Regexp.new("(\\d{1,4})#{BC_REGEX}", REGEX_OPTS)
152
261
 
153
- # TODO: do we want to look for the very last match of digits before "century" instead of the first one?
154
- return "#{(Regexp.last_match(1).to_i - 1).to_s}99" if orig_date_str.match(CENTURY_WORD_REGEXP)
262
+ # Integer value for B.C. if we have B.C. pattern
263
+ # @return [Integer, nil] -ddd if B.C. in pattern; nil otherwise
264
+ def year_int_for_bc(date_str)
265
+ "-#{Regexp.last_match(1)}".to_i if date_str.match(YEAR_BC_REGEX)
155
266
  end
156
267
 
157
- BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
268
+ BETWEEN_Yn_AND_Yn_BC_REGEX = Regexp.new("#{BETWEEN_Yn_AND_Yn_REGEX}#{BC_REGEX}", REGEX_OPTS)
158
269
 
159
- # Integer sortable value for B.C. if we have B.C. pattern
160
- # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
161
- def year_int_for_bc(orig_date_str)
162
- bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
163
- "-#{Regexp.last_match(1)}".to_i if bc_matches
270
+ # Integer value for earliest year if we have "between y and y B.C." pattern
271
+ # @return [Integer, nil] -ddd if date_str matches pattern; nil otherwise
272
+ def between_bc_earliest_year(date_str)
273
+ "-#{Regexp.last_match(:first)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
164
274
  end
165
275
 
166
- EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
276
+ # Integer value for latest year if we have "between y and y B.C." pattern
277
+ # @return [Integer, nil] -ddd if date_str matches pattern; nil otherwise
278
+ def between_bc_latest_year(date_str)
279
+ "-#{Regexp.last_match(:last)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
280
+ end
281
+
282
+ EARLY_NUMERIC_REGEX = Regexp.new('^\-?\d{1,3}([^\du\-\[]|$)', REGEX_OPTS)
283
+
284
+ # year if date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
285
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
286
+ def year_for_early_numeric(date_str)
287
+ date_str.to_i if date_str.match(EARLY_NUMERIC_REGEX) || date_str =~ /^-\d{4}([^\du\-\[]|$)$/
288
+ end
289
+
290
+ FIRST_LAST_EARLY_NUMERIC_REGEX =
291
+ Regexp.new(/^(?<first>\-?\d{1,3})\??\s*(-|–|or)\s*(?<last>\-?\d{1,4})\??([^\du\-\[]|$)/im)
292
+
293
+ # Integer value for latest year if we have early numeric year range or single early numeric year
294
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
295
+ def last_year_for_early_numeric(date_str)
296
+ return Regexp.last_match(:last).to_i if date_str.match(FIRST_LAST_EARLY_NUMERIC_REGEX)
167
297
 
168
- # year if orig_date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
169
- # @return [String, nil] -ddd if orig_date_str matches pattern; nil otherwise
170
- def year_for_early_numeric(orig_date_str)
171
- orig_date_str if orig_date_str.match(EARLY_NUMERIC) || orig_date_str =~ /^-\d{4}$/
298
+ year_for_early_numeric(date_str) # if single year, not matched above
172
299
  end
173
300
  end
174
301
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ParseDate
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-09 00:00:00.000000000 Z
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk