parse_date 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 446afc4299ca93634d6689b20d3a32bdb28460da5627701682733e18bb1f0a16
4
- data.tar.gz: b5f6bbceab8542cc3c977c1c1ea8a4e250f82a9ae3a1eed2b335050067145703
3
+ metadata.gz: 3927636e715e52bf2f38a8c0f3f384b06dc29971000c4b700a5e9f8a128c144b
4
+ data.tar.gz: 1f6b9fcb0f3bbf37f5d8417ee21c92ee19c87604394453dbe60a4f16b722e59c
5
5
  SHA512:
6
- metadata.gz: 052f5d35a64c52f5bd74af2e487b70c8b03c37252cce33554aa3e7d6ba8141d9f0edd7c719aa487dbddb2e465b76d2d1ab222f057e68c7e6fa322eef313a35e6
7
- data.tar.gz: 880e66962d42d0c3f2824510a94edcf9256cc6ba8298815721a2a720e8f9ef2158793820ff71fc3a22a0ad0f3397db3f8e2361753ee661494ed1371117562eb7
6
+ metadata.gz: c8d350764a5d813e5daa7e5abc0b17d9167b81b249322b52f181e7ea5ccba58f468883f30f6bdbd91e3a43f4da03270cd62581ea2e23ae5ac2a45261ecdf5e24
7
+ data.tar.gz: 6e421ee0b10f44f2cee021dc198cddb74a8640639b7c66ccb74f515ae891c4be6cadd47d6b5b12af1f4a9d6f3698f1fc3fe224c6be5aed03ce98eb5376c15d8f
data/.rubocop.yml CHANGED
@@ -13,7 +13,14 @@ Metrics/LineLength:
13
13
  Max: 120
14
14
 
15
15
  Metrics/MethodLength:
16
- Max: 15
16
+ Max: 22
17
+
18
+ Style/NumericLiterals:
19
+ Enabled: false
20
+
21
+ Style/TrailingCommaInHashLiteral:
22
+ Exclude:
23
+ - spec/**/*
17
24
 
18
25
  Style/WordArray:
19
26
  Enabled: false
data/.rubocop_todo.yml CHANGED
@@ -1,34 +1,33 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-10-09 15:35:45 -0700 using RuboCop version 0.74.0.
3
+ # on 2019-10-21 14:32:14 -0700 using RuboCop version 0.74.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 2
10
- # Cop supports --auto-correct.
11
- Lint/StringConversionInInterpolation:
12
- Exclude:
13
- - 'lib/parse_date/int_from_string.rb'
14
-
15
- # Offense count: 3
9
+ # Offense count: 5
16
10
  Metrics/AbcSize:
17
- Max: 18
11
+ Max: 37
18
12
 
19
- # Offense count: 4
13
+ # Offense count: 8
20
14
  # Configuration parameters: CountComments, ExcludedMethods.
21
15
  # ExcludedMethods: refine
22
16
  Metrics/BlockLength:
23
- Max: 561
17
+ Max: 812
24
18
 
25
- # Offense count: 3
19
+ # Offense count: 4
26
20
  Metrics/CyclomaticComplexity:
27
- Max: 8
21
+ Max: 12
28
22
 
29
23
  # Offense count: 1
24
+ # Configuration parameters: CountComments.
25
+ Metrics/ModuleLength:
26
+ Max: 168
27
+
28
+ # Offense count: 4
30
29
  Metrics/PerceivedComplexity:
31
- Max: 9
30
+ Max: 12
32
31
 
33
32
  # Offense count: 2
34
33
  Style/Documentation:
@@ -53,18 +52,11 @@ Style/RegexpLiteral:
53
52
  Exclude:
54
53
  - 'lib/parse_date/int_from_string.rb'
55
54
 
56
- # Offense count: 1
55
+ # Offense count: 3
57
56
  # Cop supports --auto-correct.
58
57
  # Configuration parameters: EnforcedStyleForMultiline.
59
58
  # SupportedStylesForMultiline: comma, consistent_comma, no_comma
60
59
  Style/TrailingCommaInArrayLiteral:
61
60
  Exclude:
62
61
  - 'spec/parse_date/int_from_string_spec.rb'
63
-
64
- # Offense count: 1
65
- # Cop supports --auto-correct.
66
- # Configuration parameters: EnforcedStyleForMultiline.
67
- # SupportedStylesForMultiline: comma, consistent_comma, no_comma
68
- Style/TrailingCommaInHashLiteral:
69
- Exclude:
70
- - 'spec/parse_date/int_from_string_spec.rb'
62
+ - 'spec/parse_date_spec.rb'
data/README.md CHANGED
@@ -30,40 +30,127 @@ ParseDate has class methods for date string parsing.
30
30
  ```
31
31
  require 'parse_date'
32
32
 
33
- ParseDate.earliest_year('12/25/00') # 2000
34
- ParseDate.earliest_year('5-1-21') # 1921
35
- ParseDate.earliest_year('1666 B.C.') # -1666
36
- ParseDate.earliest_year('-914') # -914
37
- ParseDate.earliest_year('[c1926]') # 1926
38
- ParseDate.earliest_year('ca. 1558') # 1558
39
- ParseDate.earliest_year('195-') # 1950
40
- ParseDate.earliest_year('199u') # 1990
41
- ParseDate.earliest_year('197?') # 1970
42
- ParseDate.earliest_year('196x') # 1960
43
- ParseDate.earliest_year('18th century CE') # 1700
44
- ParseDate.earliest_year('17uu') # 1700
45
-
46
- ParseDate.latest_year('195-') # 1959
47
- ParseDate.latest_year('199u') # 1999
48
- ParseDate.latest_year('197?') # 1979
49
- ParseDate.latest_year('196x') # 1969
50
- ParseDate.latest_year('18th century CE') # 1799
51
- ParseDate.latest_year('17uu') # 1799
52
-
53
- ParseDate.year_int_valid?(0) # true
54
- ParseDate.year_int_valid?(5) # true
55
- ParseDate.year_int_valid?(33) # true
56
- ParseDate.year_int_valid?(150) # true
57
- ParseDate.year_int_valid?(2019) # true
58
- ParseDate.year_int_valid?(Date.today.year + 1) # true
59
- ParseDate.year_int_valid?(-3) # true
60
- ParseDate.year_int_valid?(-35) # true
61
- ParseDate.year_int_valid?(-999) # true
62
- ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
63
- ParseDate.year_int_valid?(165x) # false
64
- ParseDate.year_int_valid?(198-) # false
65
- ParseDate.year_int_valid?('random text') # false
66
- ParseDate.year_int_valid?(nil) # false
33
+ ParseDate.parse_range('12/25/00') # [2000]
34
+ ParseDate.parse_range('5-1-25') # [1925]
35
+ ParseDate.parse_range('1666 B.C.') # [-1666]
36
+ ParseDate.parse_range('-914') # [-914]
37
+ ParseDate.parse_range('[c1926]') # [1926]
38
+ ParseDate.parse_range('ca. 1558') # [1558]
39
+ ParseDate.parse_range('195-') # (1950..1959).to_a
40
+ ParseDate.parse_range('199u') # (1990..1999).to_a
41
+ ParseDate.parse_range('197?') # (1970..1979).to_a
42
+ ParseDate.parse_range('196x') # (1960..1969).to_a
43
+ ParseDate.parse_range('18th century CE') # (1700..1799).to_a
44
+ ParseDate.parse_range('17uu') # (1700..1799).to_a
45
+ ParseDate.parse_range('between 1694 and 1799') # (1694..1799).to_a
46
+ ParseDate.parse_range('between 1 and 5') # (1..5).to_a
47
+ ParseDate.parse_range('between 300 and 150 B.C.') # (-300..-150).to_a
48
+ ParseDate.parse_range('-5 - 3') # (-5..3).to_a
49
+ ParseDate.parse_range('1496-1499') # (1496..1499).to_a
50
+ ParseDate.parse_range('1750?-1867') # (1750..1867).to_a
51
+ ParseDate.parse_range('17--?-18--?') # (1700..1899).to_a
52
+ ParseDate.parse_range('1835 or 1836') # [1835, 1836]
53
+ ParseDate.parse_range('17-- or 18--?') # (1700..1899).to_a
54
+ ParseDate.parse_range('-2 or 1?') # (-2..1).to_a
55
+ ParseDate.parse_range('17th or 18th century?') # (1600..1799).to_a
56
+ ParseDate.parse_range('ca. 5th–6th century A.D.') # (400..599).to_a
57
+ ParseDate.parse_range('ca. 9th–8th century B.C.') # (-999..-800).to_a
58
+ ParseDate.parse_range('ca. 13th–12th century B.C.') # (-1399..-1200).to_a
59
+ ParseDate.parse_range('5th century B.C.') # (-599..-500).to_a
60
+ ParseDate.parse_range('1975 - 1905') # last year > first year, raises error
61
+ ParseDate.parse_range('-100 - -150') # last year > first year, raises error
62
+ ParseDate.parse_range('1975 or 1905') # last year > first year, raises error
63
+ ParseDate.parse_range('2050') # year later than current year + 1, raises error
64
+ ParseDate.parse_range('12345') # year later than current year + 1, raises error
65
+ ParseDate.parse_range('random text') # can't parse years, raises error
66
+ ParseDate.parse_range(nil) # can't parse years, raises error
67
+
68
+ ParseDate.earliest_year('12/25/00') # 2000
69
+ ParseDate.earliest_year('5-1-21') # 1921
70
+ ParseDate.earliest_year('1666 B.C.') # -1666
71
+ ParseDate.earliest_year('-914') # -914
72
+ ParseDate.earliest_year('[c1926]') # 1926
73
+ ParseDate.earliest_year('ca. 1558') # 1558
74
+ ParseDate.earliest_year('195-') # 1950
75
+ ParseDate.earliest_year('199u') # 1990
76
+ ParseDate.earliest_year('197?') # 1970
77
+ ParseDate.earliest_year('196x') # 1960
78
+ ParseDate.earliest_year('18th century CE') # 1700
79
+ ParseDate.earliest_year('17uu') # 1700
80
+ ParseDate.earliest_year('between 1694 and 1799') # 1694
81
+ ParseDate.earliest_year('between 1 and 5') # 1
82
+ ParseDate.earliest_year('between 300 and 150 B.C.') # -300
83
+ ParseDate.earliest_year('1496-1499') # 1496
84
+ ParseDate.earliest_year('1750?-1867') # 1750
85
+ ParseDate.earliest_year('17--?-18--?') # 1700
86
+ ParseDate.earliest_year('1835 or 1836') # 1835
87
+ ParseDate.earliest_year('17-- or 18--?') # 1700
88
+ ParseDate.earliest_year('17th or 18th century?') # 1600
89
+ ParseDate.earliest_year('ca. 5th–6th century A.D.') # 400
90
+ ParseDate.earliest_year('ca. 9th–8th century B.C.') # -999
91
+ ParseDate.earliest_year('ca. 13th–12th century B.C.') # -1399
92
+ ParseDate.earliest_year('5th century B.C.') # -599
93
+
94
+ ParseDate.latest_year('195-') # 1959
95
+ ParseDate.latest_year('199u') # 1999
96
+ ParseDate.latest_year('197?') # 1979
97
+ ParseDate.latest_year('196x') # 1969
98
+ ParseDate.latest_year('18th century CE') # 1799
99
+ ParseDate.latest_year('17uu') # 1799
100
+ ParseDate.latest_year('between 1694 and 1799') # 1799
101
+ ParseDate.latest_year('between 1 and 5') # 5
102
+ ParseDate.latest_year('between 300 and 150 B.C.') # -150
103
+ ParseDate.latest_year('1496-1499') # 1499
104
+ ParseDate.latest_year('1750?-1867') # 1867
105
+ ParseDate.latest_year('17--?-18--?') # 1899
106
+ ParseDate.latest_year('1757-58') # 1758
107
+ ParseDate.latest_year('1975-05') # 1975 (range invalid)
108
+ ParseDate.latest_year('1835 or 1836') # 1836
109
+ ParseDate.latest_year('17-- or 18--?') # 1899
110
+ ParseDate.latest_year('17th or 18th century?') # 1799
111
+ ParseDate.latest_year('ca. 5th–6th century A.D.') # 599
112
+ ParseDate.latest_year('ca. 9th–8th century B.C.') # -800
113
+ ParseDate.latest_year('ca. 13th–12th century B.C.') # -1200
114
+ ParseDate.latest_year('5th century B.C.') # -500
115
+ ParseDate.latest_year('-5 - 3') # 3,
116
+
117
+ ParseDate.range_array('1993', '1995') # [1993, 1994, 1995]
118
+ ParseDate.range_array(1993, 1995) # [1993, 1994, 1995]
119
+ ParseDate.range_array(0, '0001') # [0, 1]
120
+ ParseDate.range_array('-0003', '0000') # [-3, -2, -1, 0]
121
+ ParseDate.range_array(-1, 1) # [-1, 0, 1]
122
+ ParseDate.range_array(15, 15) # [15]
123
+ ParseDate.range_array(-100, '-99') # [-100, -99]
124
+ ParseDate.range_array('98', 101) # [98, 99, 100, 101]
125
+ ParseDate.range_array('word1', 'word2') # throws ArgumentError
126
+ ParseDate.range_array('1993', 1990) # throws StandardError - bad range
127
+ ParseDate.range_array('12345', 12345) # throws StandardError - bad range
128
+
129
+ ParseDate.year_range_valid?(1975, 1905) # false, first year > last year
130
+ ParseDate.year_range_valid?(-100, -150) # false, first year > last year
131
+ ParseDate.year_range_valid?(2050, 2070) # false, year later than current year + 1
132
+ ParseDate.year_range_valid?(2007, 2050) # false, year later than current year + 1
133
+ ParseDate.year_range_valid?(2007, 2009) # true
134
+ ParseDate.year_range_valid?(75, 150) # true
135
+ ParseDate.year_range_valid?(-3, 2) # true
136
+ ParseDate.year_range_valid?(-100, -50) # true
137
+ ParseDate.year_range_valid?(-1500, -1499) # true
138
+ ParseDate.year_range_valid?(-15000, -14999) # true
139
+
140
+ ParseDate.year_int_valid?(0) # true
141
+ ParseDate.year_int_valid?(5) # true
142
+ ParseDate.year_int_valid?(33) # true
143
+ ParseDate.year_int_valid?(150) # true
144
+ ParseDate.year_int_valid?(2019) # true
145
+ ParseDate.year_int_valid?(Date.today.year + 1) # true
146
+ ParseDate.year_int_valid?(-3) # true
147
+ ParseDate.year_int_valid?(-35) # true
148
+ ParseDate.year_int_valid?(-999) # true
149
+ ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
150
+ ParseDate.year_int_valid?(165x) # false
151
+ ParseDate.year_int_valid?(198-) # false
152
+ ParseDate.year_int_valid?('random text') # false
153
+ ParseDate.year_int_valid?(nil) # false
67
154
  ```
68
155
 
69
156
  ## Development
data/lib/parse_date.rb CHANGED
@@ -26,15 +26,54 @@ class ParseDate
26
26
  extend ParseDate::IntFromString
27
27
 
28
28
  # class method delegation
29
- def self.earliest_year(orig_date_str)
30
- ParseDate::IntFromString.earliest_year(orig_date_str)
29
+ def self.earliest_year(date_str)
30
+ ParseDate::IntFromString.earliest_year(date_str)
31
31
  end
32
32
 
33
- def self.latest_year(orig_date_str)
34
- ParseDate::IntFromString.latest_year(orig_date_str)
33
+ def self.latest_year(date_str)
34
+ ParseDate::IntFromString.latest_year(date_str)
35
35
  end
36
36
 
37
- def self.year_int_valid?(orig_date_str)
38
- ParseDate::IntFromString.year_int_valid?(orig_date_str)
37
+ def self.year_int_valid?(date_str)
38
+ ParseDate::IntFromString.year_int_valid?(date_str)
39
+ end
40
+
41
+ # @return [Array] array of Integer year values from earliest year to latest year, inclusive
42
+ def self.parse_range(date_str)
43
+ first = earliest_year(date_str)
44
+ last = latest_year(date_str)
45
+ raise ParseDate::Error, "Unable to parse range from '#{date_str}'" unless year_range_valid?(first, last)
46
+
47
+ range_array(first, last)
48
+ rescue StandardError => e
49
+ raise ParseDate::Error, "Unable to parse range from '#{date_str}': #{e.message}"
50
+ end
51
+
52
+ # true if:
53
+ # both years are not newer than (current year + 1)
54
+ # first_year <= last_year
55
+ # false otherwise
56
+ def self.year_range_valid?(first_year, last_year)
57
+ upper_bound = Date.today.year + 2
58
+ return false if first_year > upper_bound || last_year > upper_bound
59
+ return false if first_year > last_year
60
+
61
+ true
62
+ end
63
+
64
+ # @param [Integer, String] first_year, expecting integer or parseable string for .to_i
65
+ # @param [Integer, String] last_year, expecting integer or parseable string for .to_i
66
+ # @return [Array] array of Integer year values from first to last, inclusive
67
+ def self.range_array(first_year, last_year)
68
+ first_year = first_year.to_i if first_year.is_a?(String) && first_year.match?(/^-?\d+$/)
69
+ last_year = last_year.to_i if last_year.is_a?(String) && last_year.match?(/^-?\d+$/)
70
+
71
+ return [] unless last_year || first_year
72
+ return [first_year] if last_year.nil? && first_year
73
+ return [last_year] if first_year.nil? && last_year
74
+ raise(StandardError, "unable to create year range array from #{first_year}, #{last_year}") unless
75
+ year_range_valid?(first_year, last_year)
76
+
77
+ Range.new(first_year, last_year).to_a
39
78
  end
40
79
  end
@@ -7,7 +7,7 @@ class ParseDate
7
7
  # Parse (Year) Integers from Date Strings
8
8
  module IntFromString
9
9
 
10
- # earliest year as Integer if we can parse one from orig_date_str
10
+ # earliest year as Integer if we can parse one from date_str
11
11
  # e.g. if 17uu, result is 1700
12
12
  # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
13
13
  # found in our actual date strings in stanford-mods records), then
@@ -15,25 +15,30 @@ class ParseDate
15
15
  # 1/1/17 -> 2017
16
16
  # 1/1/27 -> 1927
17
17
  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
18
- def self.earliest_year(orig_date_str)
19
- return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
20
- # B.C. first in case there are 4 digits, e.g. 1600 B.C.
21
- return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
22
-
23
- result = ParseDate.send(:first_four_digits, orig_date_str)
24
- result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
25
- result ||= ParseDate.send(:first_year_for_decade, orig_date_str) # 19xx or 20xx
26
- result ||= ParseDate.send(:first_year_for_century, orig_date_str)
27
- result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
18
+ def self.earliest_year(date_str)
19
+ return unless date_str && !date_str.empty?
20
+ return if date_str == '0000-00-00' # shpc collection has these useless dates
21
+
22
+ # B.C. first (match longest string first)
23
+ return ParseDate.send(:earliest_century_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
24
+ return ParseDate.send(:between_bc_earliest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
25
+ return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(YEAR_BC_REGEX)
26
+
27
+ result ||= ParseDate.send(:between_earliest_year, date_str)
28
+ result ||= ParseDate.send(:first_four_digits, date_str)
29
+ result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
30
+ result ||= ParseDate.send(:first_year_for_decade, date_str) # 198x or 201x
31
+ result ||= ParseDate.send(:first_year_for_century, date_str) # includes BC
32
+ result ||= ParseDate.send(:year_for_early_numeric, date_str)
28
33
  unless result
29
34
  # try removing brackets between digits in case we have 169[5] or [18]91
30
- no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
35
+ no_brackets = ParseDate.send(:remove_brackets, date_str)
31
36
  return earliest_year(no_brackets) if no_brackets
32
37
  end
33
38
  result.to_i if result && year_int_valid?(result.to_i)
34
39
  end
35
40
 
36
- # latest year as Integer if we can parse one from orig_date_str
41
+ # latest year as Integer if we can parse one from date_str
37
42
  # e.g. if 17uu, result is 1799
38
43
  # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
39
44
  # found in our actual date strings in stanford-mods records), then
@@ -41,29 +46,36 @@ class ParseDate
41
46
  # 1/1/17 -> 2017
42
47
  # 1/1/27 -> 1927
43
48
  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
44
- def self.latest_year(orig_date_str)
45
- return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
46
-
47
- # B.C. first in case there are 4 digits, e.g. 1600 B.C.
48
- # NOTE: may want to parse for last occurence of 4 consecutive digits
49
- return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
50
-
51
- # NOTE: may want to parse for last occurence of 4 consecutive digits
52
- result = ParseDate.send(:first_four_digits, orig_date_str)
53
- result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
54
- result ||= ParseDate.send(:last_year_for_decade, orig_date_str) # 19xx or 20xx
55
- # NOTE: may want to parse for last occurence of consecutive digits
56
- result ||= ParseDate.send(:last_year_for_century, orig_date_str)
57
- result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
49
+ def self.latest_year(date_str)
50
+ return unless date_str && !date_str.empty?
51
+ return if date_str == '0000-00-00' # shpc collection has these useless dates
52
+
53
+ # B.C. first (match longest string first)
54
+ return ParseDate.send(:last_year_mult_centuries_bc, date_str) if date_str.match(YY_YY_CENTURY_BC_REGEX)
55
+ return ParseDate.send(:between_bc_latest_year, date_str) if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
56
+ return ParseDate.send(:last_year_for_bc_century, date_str) if date_str.match(BC_CENTURY_REGEX)
57
+ return ParseDate.send(:year_int_for_bc, date_str) if date_str.match(BC_REGEX)
58
+
59
+ result ||= ParseDate.send(:between_latest_year, date_str)
60
+ result ||= ParseDate.send(:hyphen_4digit_latest_year, date_str)
61
+ result ||= ParseDate.send(:hyphen_2digit_latest_year, date_str)
62
+ result ||= ParseDate.send(:yyuu_after_hyphen, date_str)
63
+ result ||= ParseDate.send(:year_after_or, date_str)
64
+ result ||= ParseDate.send(:first_four_digits, date_str)
65
+ result ||= ParseDate.send(:year_from_mm_dd_yy, date_str)
66
+ result ||= ParseDate.send(:last_year_for_decade, date_str) # 198x or 201x
67
+ result ||= ParseDate.send(:last_year_mult_centuries, date_str) # nth-nth century
68
+ result ||= ParseDate.send(:last_year_for_century, date_str)
69
+ result ||= ParseDate.send(:last_year_for_early_numeric, date_str)
58
70
  unless result
59
71
  # try removing brackets between digits in case we have 169[5] or [18]91
60
- no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
72
+ no_brackets = ParseDate.send(:remove_brackets, date_str)
61
73
  return earliest_year(no_brackets) if no_brackets
62
74
  end
63
75
  result.to_i if result && year_int_valid?(result.to_i)
64
76
  end
65
77
 
66
- # true if the year is between -999 and (current year + 2)
78
+ # true if the year is between -999 and (current year + 1), inclusive
67
79
  # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
68
80
  def self.year_int_valid?(year)
69
81
  return false unless year.is_a? Integer
@@ -73,18 +85,94 @@ class ParseDate
73
85
 
74
86
  protected
75
87
 
76
- BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
88
+ REGEX_OPTS = Regexp::IGNORECASE | Regexp::MULTILINE
89
+ BC_REGEX = Regexp.new(/\s*B\.?\s*C\.?/im)
90
+ BRACKETS_BETWEEN_DIGITS_REGEX = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
77
91
 
78
92
  # removes brackets between digits such as 169[5] or [18]91
79
- def remove_brackets(orig_date_str)
80
- orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
93
+ def remove_brackets(date_str)
94
+ date_str.delete('[]') if date_str.match(BRACKETS_BETWEEN_DIGITS_REGEX)
95
+ end
96
+
97
+ YYYY_HYPHEN_YYYY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*-\s*(?<last>\d{4})\??/m)
98
+
99
+ # Integer value for latest year if we have "yyyy-yyyy" pattern
100
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
101
+ def hyphen_4digit_latest_year(date_str)
102
+ Regexp.last_match(:last).to_i if date_str.match(YYYY_HYPHEN_YYYY_REGEX)
103
+ end
104
+
105
+ YYYY_HYPHEN_YY_REGEX = Regexp.new(/(?<first>\d{4})\??\s*-\s*(?<last>\d{2})\??([^-0-9].*)?$/)
106
+
107
+ # Integer value for latest year if we have "yyyy-yy" pattern
108
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
109
+ def hyphen_2digit_latest_year(date_str)
110
+ matches = date_str.match(YYYY_HYPHEN_YY_REGEX)
111
+ return unless matches
112
+
113
+ first = Regexp.last_match(:first)
114
+ century = first[0, 2]
115
+ last = "#{century}#{Regexp.last_match(:last)}"
116
+ last.to_i if ParseDate.year_range_valid?(first.to_i, last.to_i)
117
+ end
118
+
119
+ YYUU = '\\d{1,2}[u\\-]{2}'
120
+ YYuu_HYPHEN_YYuu_REGEX = Regexp.new("(?<first>#{YYUU})\\??\\s*-\\s*(?<last>#{YYUU})\\??([^u\\-]|$)??", REGEX_OPTS)
121
+
122
+ # Integer value for latest year if we have "yyuu-yyuu" pattern
123
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
124
+ def yyuu_after_hyphen(date_str)
125
+ last_year_for_century(Regexp.last_match(:last)).to_i if date_str.match(YYuu_HYPHEN_YYuu_REGEX)
126
+ end
127
+
128
+ YYXX = '\\d{1,2}[u\\-\\d]{2}'
129
+ YExx_OR_YExx_REGEX = Regexp.new("(?<first>#{YYXX})\\??\\s*or\\s*(?<last>#{YYXX})\\??([^u\\-]|$)??", REGEX_OPTS)
130
+
131
+ # Integer value for latest year if we have "yyyy or yyyy" pattern
132
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
133
+ def year_after_or(date_str)
134
+ latest_year(Regexp.last_match(:last)).to_i if date_str.match(YExx_OR_YExx_REGEX)
135
+ end
136
+
137
+ # NOTE: some actual data seemed to have a diff hyphen char. (slightly longer)
138
+ YY_YY_CENTURY_REGEX = Regexp.new(/(?<first>\d{1,2})[a-z]{2}?\s*(-|–|or)\s*(?<last>\d{1,2})[a-z]{2}?\s+centur.*/im)
139
+
140
+ # Integer value for latest year if we have nth-nth century pattern
141
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
142
+ def last_year_mult_centuries(date_str)
143
+ matches = date_str.match(YY_YY_CENTURY_REGEX)
144
+ return unless matches
145
+
146
+ nth = Regexp.last_match(:last).to_i
147
+ (nth - 1) * 100 + 99
148
+ end
149
+
150
+ YY_YY_CENTURY_BC_REGEX = Regexp.new("#{YY_YY_CENTURY_REGEX}#{BC_REGEX}", REGEX_OPTS)
151
+
152
+ # Integer value for earliest year if we have nth-nth century BC pattern
153
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
154
+ def earliest_century_bc(date_str)
155
+ matches = date_str.match(YY_YY_CENTURY_BC_REGEX)
156
+ return unless matches
157
+
158
+ nth = Regexp.last_match(:first).to_i
159
+ nth * -100 - 99
160
+ end
161
+
162
+ # Integer value for latest year if we have nth-nth century BC pattern
163
+ # @return [Integer, nil] yyyy if date_str matches pattern; nil otherwise
164
+ def last_year_mult_centuries_bc(date_str)
165
+ matches = date_str.match(YY_YY_CENTURY_BC_REGEX)
166
+ return unless matches
167
+
168
+ nth = Regexp.last_match(:last).to_i
169
+ nth * -100
81
170
  end
82
171
 
83
- # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
84
- # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
85
- def first_four_digits(orig_date_str)
86
- matches = orig_date_str.match(/\d{4}/) if orig_date_str
87
- matches&.to_s
172
+ # looks for 4 consecutive digits in date_str and returns first occurrence if found
173
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str has yyyy, nil otherwise
174
+ def first_four_digits(date_str)
175
+ Regexp.last_match(1) if date_str.match(/(\d{4})([^\d]|$)/im)
88
176
  end
89
177
 
90
178
  # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
@@ -92,16 +180,14 @@ class ParseDate
92
180
  # we use 20 as century digits unless it is greater than current year:
93
181
  # 1/1/17 -> 2017
94
182
  # 1/1/27 -> 1927
95
- # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
96
- def year_from_mm_dd_yy(orig_date_str)
97
- return unless orig_date_str
98
-
99
- slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
183
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if date_str matches pattern, nil otherwise
184
+ def year_from_mm_dd_yy(date_str)
185
+ slash_matches = date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
100
186
  if slash_matches
101
- date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
187
+ date_obj = Date.strptime(date_str, '%m/%d/%y')
102
188
  else
103
- hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
104
- date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
189
+ hyphen_matches = date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
190
+ date_obj = Date.strptime(date_str, '%m-%d-%y') if hyphen_matches
105
191
  end
106
192
  date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
107
193
  date_obj.year.to_s if date_obj
@@ -109,66 +195,107 @@ class ParseDate
109
195
  nil # explicitly want nil if date won't parse
110
196
  end
111
197
 
112
- DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
198
+ DECADE_4CHAR_REGEX = Regexp.new('(^|\D)\d{3}[u\-?x]', REGEX_OPTS)
113
199
 
114
200
  # first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
115
201
  # note that these are the only decade patterns found in our actual date strings in MODS records
116
- # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
117
- def first_year_for_decade(orig_date_str)
118
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
202
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if date_str matches pattern, nil otherwise
203
+ def first_year_for_decade(date_str)
204
+ decade_matches = date_str.match(DECADE_4CHAR_REGEX)
119
205
  changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
120
206
  ParseDate.first_four_digits(changed_to_zero) if changed_to_zero
121
207
  end
122
208
 
123
209
  # last year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
124
210
  # note that these are the only decade patterns found in our actual date strings in MODS records
125
- # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
126
- def last_year_for_decade(orig_date_str)
127
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
211
+ # @return [String, nil] 4 digit year (e.g. 1869, 1959) if date_str matches pattern, nil otherwise
212
+ def last_year_for_decade(date_str)
213
+ decade_matches = date_str.match(DECADE_4CHAR_REGEX)
128
214
  changed_to_nine = decade_matches.to_s.tr('u\-?x', '9') if decade_matches
129
215
  ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
130
216
  end
131
217
 
132
- CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
133
- CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
218
+ CENTURY_WORD_REGEX = Regexp.new('(\d{1,2})[a-z]{2}?\s*century', REGEX_OPTS)
219
+ CENTURY_4CHAR_REGEX = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)', REGEX_OPTS)
220
+ BC_CENTURY_REGEX = Regexp.new("#{CENTURY_WORD_REGEX}\\s+#{BC_REGEX}", REGEX_OPTS)
221
+
222
+ # first year of century if we have: yyuu, yy--, yy--? or xxth century pattern; handles B.C.
223
+ # @return [Integer, nil] yy00 if date_str matches pattern, nil otherwise
224
+ def first_year_for_century(date_str)
225
+ return Regexp.last_match(1).to_i * -100 - 99 if date_str.match(BC_CENTURY_REGEX)
226
+ return Regexp.last_match(1).to_i * 100 if date_str.match(CENTURY_4CHAR_REGEX)
227
+ return (Regexp.last_match(:first).to_i - 1) * 100 if date_str.match(YY_YY_CENTURY_REGEX)
228
+ return (Regexp.last_match(1).to_i - 1) * 100 if date_str.match(CENTURY_WORD_REGEX)
229
+ end
230
+
231
+ # last year of century if we have: yyuu, yy--, yy--? or xxth century pattern
232
+ # @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
233
+ def last_year_for_century(date_str)
234
+ return Regexp.last_match(1).to_i * 100 + 99 if date_str.match(CENTURY_4CHAR_REGEX)
235
+ return (Regexp.last_match(1).to_i - 1) * 100 + 99 if date_str.match(CENTURY_WORD_REGEX)
236
+ end
237
+
238
+ # last year of century (as String) if we have: nth century BC
239
+ # @return [Integer, nil] yy99 if date_str matches pattern, nil otherwise; also nil if B.C. in pattern
240
+ def last_year_for_bc_century(date_str)
241
+ Regexp.last_match(1).to_i * -100 if date_str.match(BC_CENTURY_REGEX)
242
+ end
243
+
244
+ BETWEEN_Yn_AND_Yn_REGEX = Regexp.new(/between\s+(?<first>\d{1,4})\??\s+and\s+(?<last>\d{1,4})\??/im)
134
245
 
135
- # first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
136
- # note that these are the only century patterns found in our actual date strings in MODS records
137
- # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
138
- def first_year_for_century(orig_date_str)
139
- return unless orig_date_str
140
- return if orig_date_str =~ /B\.C\./
141
- return "#{Regexp.last_match(1)}00" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
142
- return "#{(Regexp.last_match(1).to_i - 1).to_s}00" if orig_date_str.match(CENTURY_WORD_REGEXP)
246
+ # Integer value for earliest if we have "between y and y" pattern
247
+ # NOTE: must match for BC first with between_bc_earliest_year
248
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
249
+ def between_earliest_year(date_str)
250
+ Regexp.last_match(:first).to_i if date_str.match(BETWEEN_Yn_AND_Yn_REGEX)
143
251
  end
144
252
 
145
- # last year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
146
- # note that these are the only century patterns found in our actual date strings in MODS records
147
- # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
148
- def last_year_for_century(orig_date_str)
149
- return unless orig_date_str
150
- return if orig_date_str =~ /B\.C\./
151
- return "#{Regexp.last_match(1)}99" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
253
+ # Integer value for latest year if we have "between y and y" pattern
254
+ # NOTE: must match for BC first with between_bc_latest_year
255
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
256
+ def between_latest_year(date_str)
257
+ Regexp.last_match(:last).to_i if date_str.match(BETWEEN_Yn_AND_Yn_REGEX)
258
+ end
259
+
260
+ YEAR_BC_REGEX = Regexp.new("(\\d{1,4})#{BC_REGEX}", REGEX_OPTS)
152
261
 
153
- # TODO: do we want to look for the very last match of digits before "century" instead of the first one?
154
- return "#{(Regexp.last_match(1).to_i - 1).to_s}99" if orig_date_str.match(CENTURY_WORD_REGEXP)
262
+ # Integer value for B.C. if we have B.C. pattern
263
+ # @return [Integer, nil] -ddd if B.C. in pattern; nil otherwise
264
+ def year_int_for_bc(date_str)
265
+ "-#{Regexp.last_match(1)}".to_i if date_str.match(YEAR_BC_REGEX)
155
266
  end
156
267
 
157
- BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
268
+ BETWEEN_Yn_AND_Yn_BC_REGEX = Regexp.new("#{BETWEEN_Yn_AND_Yn_REGEX}#{BC_REGEX}", REGEX_OPTS)
158
269
 
159
- # Integer sortable value for B.C. if we have B.C. pattern
160
- # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
161
- def year_int_for_bc(orig_date_str)
162
- bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
163
- "-#{Regexp.last_match(1)}".to_i if bc_matches
270
+ # Integer value for earliest year if we have "between y and y B.C." pattern
271
+ # @return [Integer, nil] -ddd if date_str matches pattern; nil otherwise
272
+ def between_bc_earliest_year(date_str)
273
+ "-#{Regexp.last_match(:first)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
164
274
  end
165
275
 
166
- EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
276
+ # Integer value for latest year if we have "between y and y B.C." pattern
277
+ # @return [Integer, nil] -ddd if date_str matches pattern; nil otherwise
278
+ def between_bc_latest_year(date_str)
279
+ "-#{Regexp.last_match(:last)}".to_i if date_str.match(BETWEEN_Yn_AND_Yn_BC_REGEX)
280
+ end
281
+
282
+ EARLY_NUMERIC_REGEX = Regexp.new('^\-?\d{1,3}([^\du\-\[]|$)', REGEX_OPTS)
283
+
284
+ # year if date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
285
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
286
+ def year_for_early_numeric(date_str)
287
+ date_str.to_i if date_str.match(EARLY_NUMERIC_REGEX) || date_str =~ /^-\d{4}([^\du\-\[]|$)$/
288
+ end
289
+
290
+ FIRST_LAST_EARLY_NUMERIC_REGEX =
291
+ Regexp.new(/^(?<first>\-?\d{1,3})\??\s*(-|–|or)\s*(?<last>\-?\d{1,4})\??([^\du\-\[]|$)/im)
292
+
293
+ # Integer value for latest year if we have early numeric year range or single early numeric year
294
+ # @return [Integer, nil] year if date_str matches pattern; nil otherwise
295
+ def last_year_for_early_numeric(date_str)
296
+ return Regexp.last_match(:last).to_i if date_str.match(FIRST_LAST_EARLY_NUMERIC_REGEX)
167
297
 
168
- # year if orig_date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
169
- # @return [String, nil] -ddd if orig_date_str matches pattern; nil otherwise
170
- def year_for_early_numeric(orig_date_str)
171
- orig_date_str if orig_date_str.match(EARLY_NUMERIC) || orig_date_str =~ /^-\d{4}$/
298
+ year_for_early_numeric(date_str) # if single year, not matched above
172
299
  end
173
300
  end
174
301
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ParseDate
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-09 00:00:00.000000000 Z
11
+ date: 2019-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk