parse_date 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 67507d7ddc6be0cff379b210ca3985316153e96823ee573f6292298746fd25f7
4
- data.tar.gz: 87ffbb778c68a34a472f99429252172bb676738d48a6776e3a5195b871397afd
3
+ metadata.gz: 446afc4299ca93634d6689b20d3a32bdb28460da5627701682733e18bb1f0a16
4
+ data.tar.gz: b5f6bbceab8542cc3c977c1c1ea8a4e250f82a9ae3a1eed2b335050067145703
5
5
  SHA512:
6
- metadata.gz: a89b5cd3b4712bae4a0d82e5624b2463a10719243b19cc5d4b28d6d95db2ed097691fcc4c0a39a6e319db46cb0d6a6972b2bdda9530b6fc3d75d5e52e7ba6fcf
7
- data.tar.gz: 634e22955684ec3176217d4ebfe16434bcffca287b8d64f30a53332c93e2b992cd995997956799aec53e0b7a68560de49ba23cdf6efdaf332dd4bad6ef91e18e
6
+ metadata.gz: 052f5d35a64c52f5bd74af2e487b70c8b03c37252cce33554aa3e7d6ba8141d9f0edd7c719aa487dbddb2e465b76d2d1ab222f057e68c7e6fa322eef313a35e6
7
+ data.tar.gz: 880e66962d42d0c3f2824510a94edcf9256cc6ba8298815721a2a720e8f9ef2158793820ff71fc3a22a0ad0f3397db3f8e2361753ee661494ed1371117562eb7
@@ -1,26 +1,32 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-10-01 18:08:41 -0700 using RuboCop version 0.74.0.
3
+ # on 2019-10-09 15:35:45 -0700 using RuboCop version 0.74.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
+ # Offense count: 2
10
+ # Cop supports --auto-correct.
11
+ Lint/StringConversionInInterpolation:
12
+ Exclude:
13
+ - 'lib/parse_date/int_from_string.rb'
14
+
9
15
  # Offense count: 3
10
16
  Metrics/AbcSize:
11
- Max: 22
17
+ Max: 18
12
18
 
13
- # Offense count: 2
19
+ # Offense count: 4
14
20
  # Configuration parameters: CountComments, ExcludedMethods.
15
21
  # ExcludedMethods: refine
16
22
  Metrics/BlockLength:
17
- Max: 512
23
+ Max: 561
18
24
 
19
25
  # Offense count: 3
20
26
  Metrics/CyclomaticComplexity:
21
- Max: 9
27
+ Max: 8
22
28
 
23
- # Offense count: 2
29
+ # Offense count: 1
24
30
  Metrics/PerceivedComplexity:
25
31
  Max: 9
26
32
 
@@ -54,3 +60,11 @@ Style/RegexpLiteral:
54
60
  Style/TrailingCommaInArrayLiteral:
55
61
  Exclude:
56
62
  - 'spec/parse_date/int_from_string_spec.rb'
63
+
64
+ # Offense count: 1
65
+ # Cop supports --auto-correct.
66
+ # Configuration parameters: EnforcedStyleForMultiline.
67
+ # SupportedStylesForMultiline: comma, consistent_comma, no_comma
68
+ Style/TrailingCommaInHashLiteral:
69
+ Exclude:
70
+ - 'spec/parse_date/int_from_string_spec.rb'
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/preservation-client)
1
+ [![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/parse_date)
2
2
  [![Build Status](https://travis-ci.org/sul-dlss/parse_date.svg?branch=master)](https://travis-ci.org/sul-dlss/parse_date)
3
3
  [![Maintainability](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/maintainability)](https://codeclimate.com/github/sul-dlss/parse_date/maintainability)
4
4
  [![Test Coverage](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/test_coverage)](https://codeclimate.com/github/sul-dlss/parse_date/test_coverage)
@@ -30,14 +30,25 @@ ParseDate has class methods for date string parsing.
30
30
  ```
31
31
  require 'parse_date'
32
32
 
33
- ParseDate.year_int_from_date_str('12/25/00') # 2000
34
- ParseDate.year_int_from_date_str('5-1-21') # 1921
35
- ParseDate.year_int_from_date_str('18th century CE') # 1700
36
- ParseDate.year_int_from_date_str('1666 B.C.') # -1666
37
- ParseDate.year_int_from_date_str('17uu') # 1700
38
- ParseDate.year_int_from_date_str('-914') # -914
39
- ParseDate.year_int_from_date_str('[c1926]') # 1926
40
- ParseDate.year_int_from_date_str('ca. 1558') # 1558
33
+ ParseDate.earliest_year('12/25/00') # 2000
34
+ ParseDate.earliest_year('5-1-21') # 1921
35
+ ParseDate.earliest_year('1666 B.C.') # -1666
36
+ ParseDate.earliest_year('-914') # -914
37
+ ParseDate.earliest_year('[c1926]') # 1926
38
+ ParseDate.earliest_year('ca. 1558') # 1558
39
+ ParseDate.earliest_year('195-') # 1950
40
+ ParseDate.earliest_year('199u') # 1990
41
+ ParseDate.earliest_year('197?') # 1970
42
+ ParseDate.earliest_year('196x') # 1960
43
+ ParseDate.earliest_year('18th century CE') # 1700
44
+ ParseDate.earliest_year('17uu') # 1700
45
+
46
+ ParseDate.latest_year('195-') # 1959
47
+ ParseDate.latest_year('199u') # 1999
48
+ ParseDate.latest_year('197?') # 1979
49
+ ParseDate.latest_year('196x') # 1969
50
+ ParseDate.latest_year('18th century CE') # 1799
51
+ ParseDate.latest_year('17uu') # 1799
41
52
 
42
53
  ParseDate.year_int_valid?(0) # true
43
54
  ParseDate.year_int_valid?(5) # true
@@ -25,12 +25,15 @@ class ParseDate
25
25
  include Singleton
26
26
  extend ParseDate::IntFromString
27
27
 
28
- # class method delegation for ParseDate.year_int_from_date_str
29
- def self.year_int_from_date_str(orig_date_str)
30
- ParseDate::IntFromString.year_int_from_date_str(orig_date_str)
28
+ # class method delegation
29
+ def self.earliest_year(orig_date_str)
30
+ ParseDate::IntFromString.earliest_year(orig_date_str)
31
+ end
32
+
33
+ def self.latest_year(orig_date_str)
34
+ ParseDate::IntFromString.latest_year(orig_date_str)
31
35
  end
32
36
 
33
- # class method delegation for ParseDate.year_int_valid?
34
37
  def self.year_int_valid?(orig_date_str)
35
38
  ParseDate::IntFromString.year_int_valid?(orig_date_str)
36
39
  end
@@ -7,31 +7,63 @@ class ParseDate
7
7
  # Parse (Year) Integers from Date Strings
8
8
  module IntFromString
9
9
 
10
- # get Integer year if we can parse date_str to get a year.
10
+ # earliest year as Integer if we can parse one from orig_date_str
11
+ # e.g. if 17uu, result is 1700
11
12
  # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
12
13
  # found in our actual date strings in stanford-mods records), then
13
14
  # we use 20 as century digits unless it is greater than current year:
14
15
  # 1/1/17 -> 2017
15
16
  # 1/1/27 -> 1927
16
17
  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
17
- def self.year_int_from_date_str(orig_date_str)
18
+ def self.earliest_year(orig_date_str)
18
19
  return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
19
20
  # B.C. first in case there are 4 digits, e.g. 1600 B.C.
20
- return ParseDate.send(:sortable_year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
21
+ return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
21
22
 
22
- result = ParseDate.send(:sortable_year_for_yyyy_or_yy, orig_date_str)
23
- result ||= ParseDate.send(:sortable_year_for_decade, orig_date_str) # 19xx or 20xx
24
- result ||= ParseDate.send(:sortable_year_for_century, orig_date_str)
25
- result ||= ParseDate.send(:sortable_year_int_for_early_numeric, orig_date_str)
23
+ result = ParseDate.send(:first_four_digits, orig_date_str)
24
+ result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
25
+ result ||= ParseDate.send(:first_year_for_decade, orig_date_str) # 19xx or 20xx
26
+ result ||= ParseDate.send(:first_year_for_century, orig_date_str)
27
+ result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
26
28
  unless result
27
29
  # try removing brackets between digits in case we have 169[5] or [18]91
28
30
  no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
29
- return year_int_from_date_str(no_brackets) if no_brackets
31
+ return earliest_year(no_brackets) if no_brackets
30
32
  end
31
33
  result.to_i if result && year_int_valid?(result.to_i)
32
34
  end
33
35
 
34
- # true if the year is between -999 and (current year + 1)
36
+ # latest year as Integer if we can parse one from orig_date_str
37
+ # e.g. if 17uu, result is 1799
38
+ # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
39
+ # found in our actual date strings in stanford-mods records), then
40
+ # we use 20 as century digits unless it is greater than current year:
41
+ # 1/1/17 -> 2017
42
+ # 1/1/27 -> 1927
43
+ # @return [Integer, nil] Integer year if we could parse one, nil otherwise
44
+ def self.latest_year(orig_date_str)
45
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
46
+
47
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
48
+ # NOTE: may want to parse for last occurence of 4 consecutive digits
49
+ return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
50
+
51
+ # NOTE: may want to parse for last occurence of 4 consecutive digits
52
+ result = ParseDate.send(:first_four_digits, orig_date_str)
53
+ result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
54
+ result ||= ParseDate.send(:last_year_for_decade, orig_date_str) # 19xx or 20xx
55
+ # NOTE: may want to parse for last occurence of consecutive digits
56
+ result ||= ParseDate.send(:last_year_for_century, orig_date_str)
57
+ result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
58
+ unless result
59
+ # try removing brackets between digits in case we have 169[5] or [18]91
60
+ no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
61
+ return earliest_year(no_brackets) if no_brackets
62
+ end
63
+ result.to_i if result && year_int_valid?(result.to_i)
64
+ end
65
+
66
+ # true if the year is between -999 and (current year + 2)
35
67
  # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
36
68
  def self.year_int_valid?(year)
37
69
  return false unless year.is_a? Integer
@@ -41,16 +73,6 @@ class ParseDate
41
73
 
42
74
  protected
43
75
 
44
- # get String sortable value year if we can parse date_str to get a year.
45
- # @return [String, nil] String sortable year if we could parse one, nil otherwise
46
- # note that these values must *lexically* sort to create a chronological sort.
47
- def sortable_year_for_yyyy_or_yy(orig_date_str)
48
- # most date strings have a four digit year
49
- result = ParseDate.sortable_year_for_yyyy(orig_date_str)
50
- result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
51
- result
52
- end
53
-
54
76
  BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
55
77
 
56
78
  # removes brackets between digits such as 169[5] or [18]91
@@ -60,18 +82,18 @@ class ParseDate
60
82
 
61
83
  # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
62
84
  # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
63
- def sortable_year_for_yyyy(orig_date_str)
85
+ def first_four_digits(orig_date_str)
64
86
  matches = orig_date_str.match(/\d{4}/) if orig_date_str
65
87
  matches&.to_s
66
88
  end
67
89
 
68
90
  # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
69
- # note that these are the only 2 digit year patterns found in our actual date strings in stanford-mods records
91
+ # note that these are the only 2 digit year patterns found in stanford-mods date fields
70
92
  # we use 20 as century digits unless it is greater than current year:
71
93
  # 1/1/17 -> 2017
72
94
  # 1/1/27 -> 1927
73
95
  # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
74
- def sortable_year_for_yy(orig_date_str)
96
+ def year_from_mm_dd_yy(orig_date_str)
75
97
  return unless orig_date_str
76
98
 
77
99
  slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
@@ -89,57 +111,64 @@ class ParseDate
89
111
 
90
112
  DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
91
113
 
92
- # get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
114
+ # first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
93
115
  # note that these are the only decade patterns found in our actual date strings in MODS records
94
116
  # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
95
- def sortable_year_for_decade(orig_date_str)
117
+ def first_year_for_decade(orig_date_str)
96
118
  decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
97
119
  changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
98
- ParseDate.sortable_year_for_yyyy(changed_to_zero) if changed_to_zero
120
+ ParseDate.first_four_digits(changed_to_zero) if changed_to_zero
121
+ end
122
+
123
+ # last year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
124
+ # note that these are the only decade patterns found in our actual date strings in MODS records
125
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
126
+ def last_year_for_decade(orig_date_str)
127
+ decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
128
+ changed_to_nine = decade_matches.to_s.tr('u\-?x', '9') if decade_matches
129
+ ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
99
130
  end
100
131
 
101
132
  CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
102
133
  CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
103
134
 
104
- # get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
135
+ # first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
105
136
  # note that these are the only century patterns found in our actual date strings in MODS records
106
137
  # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
107
- def sortable_year_for_century(orig_date_str)
138
+ def first_year_for_century(orig_date_str)
108
139
  return unless orig_date_str
109
140
  return if orig_date_str =~ /B\.C\./
141
+ return "#{Regexp.last_match(1)}00" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
142
+ return "#{(Regexp.last_match(1).to_i - 1).to_s}00" if orig_date_str.match(CENTURY_WORD_REGEXP)
143
+ end
110
144
 
111
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
112
- if century_matches
113
- m = Regexp.last_match(1)
114
- return m + '00' if m.length == 2
115
- return '0' + m + '00' if m.length == 1
116
- end
117
-
118
- century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
119
- return unless century_str_matches
145
+ # last year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
146
+ # note that these are the only century patterns found in our actual date strings in MODS records
147
+ # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
148
+ def last_year_for_century(orig_date_str)
149
+ return unless orig_date_str
150
+ return if orig_date_str =~ /B\.C\./
151
+ return "#{Regexp.last_match(1)}99" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
120
152
 
121
- yy = (Regexp.last_match(1).to_i - 1).to_s
122
- return yy + '00' if yy.length == 2
123
- return '0' + yy + '00' if yy.length == 1
153
+ # TODO: do we want to look for the very last match of digits before "century" instead of the first one?
154
+ return "#{(Regexp.last_match(1).to_i - 1).to_s}99" if orig_date_str.match(CENTURY_WORD_REGEXP)
124
155
  end
125
156
 
126
157
  BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
127
158
 
128
- # get Integer sortable value for B.C. if we have B.C. pattern
159
+ # Integer sortable value for B.C. if we have B.C. pattern
129
160
  # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
130
- def sortable_year_int_for_bc(orig_date_str)
161
+ def year_int_for_bc(orig_date_str)
131
162
  bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
132
163
  "-#{Regexp.last_match(1)}".to_i if bc_matches
133
164
  end
134
165
 
135
166
  EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
136
167
 
137
- # get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
138
- # @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
139
- def sortable_year_int_for_early_numeric(orig_date_str)
140
- return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
141
-
142
- orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
168
+ # year if orig_date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
169
+ # @return [String, nil] -ddd if orig_date_str matches pattern; nil otherwise
170
+ def year_for_early_numeric(orig_date_str)
171
+ orig_date_str if orig_date_str.match(EARLY_NUMERIC) || orig_date_str =~ /^-\d{4}$/
143
172
  end
144
173
  end
145
174
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ParseDate
4
- VERSION = '0.1.0'
4
+ VERSION = '0.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-07 00:00:00.000000000 Z
11
+ date: 2019-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk