parse_date 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 67507d7ddc6be0cff379b210ca3985316153e96823ee573f6292298746fd25f7
4
- data.tar.gz: 87ffbb778c68a34a472f99429252172bb676738d48a6776e3a5195b871397afd
3
+ metadata.gz: 446afc4299ca93634d6689b20d3a32bdb28460da5627701682733e18bb1f0a16
4
+ data.tar.gz: b5f6bbceab8542cc3c977c1c1ea8a4e250f82a9ae3a1eed2b335050067145703
5
5
  SHA512:
6
- metadata.gz: a89b5cd3b4712bae4a0d82e5624b2463a10719243b19cc5d4b28d6d95db2ed097691fcc4c0a39a6e319db46cb0d6a6972b2bdda9530b6fc3d75d5e52e7ba6fcf
7
- data.tar.gz: 634e22955684ec3176217d4ebfe16434bcffca287b8d64f30a53332c93e2b992cd995997956799aec53e0b7a68560de49ba23cdf6efdaf332dd4bad6ef91e18e
6
+ metadata.gz: 052f5d35a64c52f5bd74af2e487b70c8b03c37252cce33554aa3e7d6ba8141d9f0edd7c719aa487dbddb2e465b76d2d1ab222f057e68c7e6fa322eef313a35e6
7
+ data.tar.gz: 880e66962d42d0c3f2824510a94edcf9256cc6ba8298815721a2a720e8f9ef2158793820ff71fc3a22a0ad0f3397db3f8e2361753ee661494ed1371117562eb7
@@ -1,26 +1,32 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-10-01 18:08:41 -0700 using RuboCop version 0.74.0.
3
+ # on 2019-10-09 15:35:45 -0700 using RuboCop version 0.74.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
+ # Offense count: 2
10
+ # Cop supports --auto-correct.
11
+ Lint/StringConversionInInterpolation:
12
+ Exclude:
13
+ - 'lib/parse_date/int_from_string.rb'
14
+
9
15
  # Offense count: 3
10
16
  Metrics/AbcSize:
11
- Max: 22
17
+ Max: 18
12
18
 
13
- # Offense count: 2
19
+ # Offense count: 4
14
20
  # Configuration parameters: CountComments, ExcludedMethods.
15
21
  # ExcludedMethods: refine
16
22
  Metrics/BlockLength:
17
- Max: 512
23
+ Max: 561
18
24
 
19
25
  # Offense count: 3
20
26
  Metrics/CyclomaticComplexity:
21
- Max: 9
27
+ Max: 8
22
28
 
23
- # Offense count: 2
29
+ # Offense count: 1
24
30
  Metrics/PerceivedComplexity:
25
31
  Max: 9
26
32
 
@@ -54,3 +60,11 @@ Style/RegexpLiteral:
54
60
  Style/TrailingCommaInArrayLiteral:
55
61
  Exclude:
56
62
  - 'spec/parse_date/int_from_string_spec.rb'
63
+
64
+ # Offense count: 1
65
+ # Cop supports --auto-correct.
66
+ # Configuration parameters: EnforcedStyleForMultiline.
67
+ # SupportedStylesForMultiline: comma, consistent_comma, no_comma
68
+ Style/TrailingCommaInHashLiteral:
69
+ Exclude:
70
+ - 'spec/parse_date/int_from_string_spec.rb'
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/preservation-client)
1
+ [![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/parse_date)
2
2
  [![Build Status](https://travis-ci.org/sul-dlss/parse_date.svg?branch=master)](https://travis-ci.org/sul-dlss/parse_date)
3
3
  [![Maintainability](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/maintainability)](https://codeclimate.com/github/sul-dlss/parse_date/maintainability)
4
4
  [![Test Coverage](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/test_coverage)](https://codeclimate.com/github/sul-dlss/parse_date/test_coverage)
@@ -30,14 +30,25 @@ ParseDate has class methods for date string parsing.
30
30
  ```
31
31
  require 'parse_date'
32
32
 
33
- ParseDate.year_int_from_date_str('12/25/00') # 2000
34
- ParseDate.year_int_from_date_str('5-1-21') # 1921
35
- ParseDate.year_int_from_date_str('18th century CE') # 1700
36
- ParseDate.year_int_from_date_str('1666 B.C.') # -1666
37
- ParseDate.year_int_from_date_str('17uu') # 1700
38
- ParseDate.year_int_from_date_str('-914') # -914
39
- ParseDate.year_int_from_date_str('[c1926]') # 1926
40
- ParseDate.year_int_from_date_str('ca. 1558') # 1558
33
+ ParseDate.earliest_year('12/25/00') # 2000
34
+ ParseDate.earliest_year('5-1-21') # 1921
35
+ ParseDate.earliest_year('1666 B.C.') # -1666
36
+ ParseDate.earliest_year('-914') # -914
37
+ ParseDate.earliest_year('[c1926]') # 1926
38
+ ParseDate.earliest_year('ca. 1558') # 1558
39
+ ParseDate.earliest_year('195-') # 1950
40
+ ParseDate.earliest_year('199u') # 1990
41
+ ParseDate.earliest_year('197?') # 1970
42
+ ParseDate.earliest_year('196x') # 1960
43
+ ParseDate.earliest_year('18th century CE') # 1700
44
+ ParseDate.earliest_year('17uu') # 1700
45
+
46
+ ParseDate.latest_year('195-') # 1959
47
+ ParseDate.latest_year('199u') # 1999
48
+ ParseDate.latest_year('197?') # 1979
49
+ ParseDate.latest_year('196x') # 1969
50
+ ParseDate.latest_year('18th century CE') # 1799
51
+ ParseDate.latest_year('17uu') # 1799
41
52
 
42
53
  ParseDate.year_int_valid?(0) # true
43
54
  ParseDate.year_int_valid?(5) # true
@@ -25,12 +25,15 @@ class ParseDate
25
25
  include Singleton
26
26
  extend ParseDate::IntFromString
27
27
 
28
- # class method delegation for ParseDate.year_int_from_date_str
29
- def self.year_int_from_date_str(orig_date_str)
30
- ParseDate::IntFromString.year_int_from_date_str(orig_date_str)
28
+ # class method delegation
29
+ def self.earliest_year(orig_date_str)
30
+ ParseDate::IntFromString.earliest_year(orig_date_str)
31
+ end
32
+
33
+ def self.latest_year(orig_date_str)
34
+ ParseDate::IntFromString.latest_year(orig_date_str)
31
35
  end
32
36
 
33
- # class method delegation for ParseDate.year_int_valid?
34
37
  def self.year_int_valid?(orig_date_str)
35
38
  ParseDate::IntFromString.year_int_valid?(orig_date_str)
36
39
  end
@@ -7,31 +7,63 @@ class ParseDate
7
7
  # Parse (Year) Integers from Date Strings
8
8
  module IntFromString
9
9
 
10
- # get Integer year if we can parse date_str to get a year.
10
+ # earliest year as Integer if we can parse one from orig_date_str
11
+ # e.g. if 17uu, result is 1700
11
12
  # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
12
13
  # found in our actual date strings in stanford-mods records), then
13
14
  # we use 20 as century digits unless it is greater than current year:
14
15
  # 1/1/17 -> 2017
15
16
  # 1/1/27 -> 1927
16
17
  # @return [Integer, nil] Integer year if we could parse one, nil otherwise
17
- def self.year_int_from_date_str(orig_date_str)
18
+ def self.earliest_year(orig_date_str)
18
19
  return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
19
20
  # B.C. first in case there are 4 digits, e.g. 1600 B.C.
20
- return ParseDate.send(:sortable_year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
21
+ return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
21
22
 
22
- result = ParseDate.send(:sortable_year_for_yyyy_or_yy, orig_date_str)
23
- result ||= ParseDate.send(:sortable_year_for_decade, orig_date_str) # 19xx or 20xx
24
- result ||= ParseDate.send(:sortable_year_for_century, orig_date_str)
25
- result ||= ParseDate.send(:sortable_year_int_for_early_numeric, orig_date_str)
23
+ result = ParseDate.send(:first_four_digits, orig_date_str)
24
+ result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
25
+ result ||= ParseDate.send(:first_year_for_decade, orig_date_str) # 19xx or 20xx
26
+ result ||= ParseDate.send(:first_year_for_century, orig_date_str)
27
+ result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
26
28
  unless result
27
29
  # try removing brackets between digits in case we have 169[5] or [18]91
28
30
  no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
29
- return year_int_from_date_str(no_brackets) if no_brackets
31
+ return earliest_year(no_brackets) if no_brackets
30
32
  end
31
33
  result.to_i if result && year_int_valid?(result.to_i)
32
34
  end
33
35
 
34
- # true if the year is between -999 and (current year + 1)
36
+ # latest year as Integer if we can parse one from orig_date_str
37
+ # e.g. if 17uu, result is 1799
38
+ # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
39
+ # found in our actual date strings in stanford-mods records), then
40
+ # we use 20 as century digits unless it is greater than current year:
41
+ # 1/1/17 -> 2017
42
+ # 1/1/27 -> 1927
43
+ # @return [Integer, nil] Integer year if we could parse one, nil otherwise
44
+ def self.latest_year(orig_date_str)
45
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
46
+
47
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
48
+ # NOTE: may want to parse for last occurence of 4 consecutive digits
49
+ return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
50
+
51
+ # NOTE: may want to parse for last occurence of 4 consecutive digits
52
+ result = ParseDate.send(:first_four_digits, orig_date_str)
53
+ result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
54
+ result ||= ParseDate.send(:last_year_for_decade, orig_date_str) # 19xx or 20xx
55
+ # NOTE: may want to parse for last occurence of consecutive digits
56
+ result ||= ParseDate.send(:last_year_for_century, orig_date_str)
57
+ result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
58
+ unless result
59
+ # try removing brackets between digits in case we have 169[5] or [18]91
60
+ no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
61
+ return earliest_year(no_brackets) if no_brackets
62
+ end
63
+ result.to_i if result && year_int_valid?(result.to_i)
64
+ end
65
+
66
+ # true if the year is between -999 and (current year + 2)
35
67
  # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
36
68
  def self.year_int_valid?(year)
37
69
  return false unless year.is_a? Integer
@@ -41,16 +73,6 @@ class ParseDate
41
73
 
42
74
  protected
43
75
 
44
- # get String sortable value year if we can parse date_str to get a year.
45
- # @return [String, nil] String sortable year if we could parse one, nil otherwise
46
- # note that these values must *lexically* sort to create a chronological sort.
47
- def sortable_year_for_yyyy_or_yy(orig_date_str)
48
- # most date strings have a four digit year
49
- result = ParseDate.sortable_year_for_yyyy(orig_date_str)
50
- result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
51
- result
52
- end
53
-
54
76
  BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
55
77
 
56
78
  # removes brackets between digits such as 169[5] or [18]91
@@ -60,18 +82,18 @@ class ParseDate
60
82
 
61
83
  # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
62
84
  # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
63
- def sortable_year_for_yyyy(orig_date_str)
85
+ def first_four_digits(orig_date_str)
64
86
  matches = orig_date_str.match(/\d{4}/) if orig_date_str
65
87
  matches&.to_s
66
88
  end
67
89
 
68
90
  # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
69
- # note that these are the only 2 digit year patterns found in our actual date strings in stanford-mods records
91
+ # note that these are the only 2 digit year patterns found in stanford-mods date fields
70
92
  # we use 20 as century digits unless it is greater than current year:
71
93
  # 1/1/17 -> 2017
72
94
  # 1/1/27 -> 1927
73
95
  # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
74
- def sortable_year_for_yy(orig_date_str)
96
+ def year_from_mm_dd_yy(orig_date_str)
75
97
  return unless orig_date_str
76
98
 
77
99
  slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
@@ -89,57 +111,64 @@ class ParseDate
89
111
 
90
112
  DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
91
113
 
92
- # get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
114
+ # first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
93
115
  # note that these are the only decade patterns found in our actual date strings in MODS records
94
116
  # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
95
- def sortable_year_for_decade(orig_date_str)
117
+ def first_year_for_decade(orig_date_str)
96
118
  decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
97
119
  changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
98
- ParseDate.sortable_year_for_yyyy(changed_to_zero) if changed_to_zero
120
+ ParseDate.first_four_digits(changed_to_zero) if changed_to_zero
121
+ end
122
+
123
+ # last year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
124
+ # note that these are the only decade patterns found in our actual date strings in MODS records
125
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
126
+ def last_year_for_decade(orig_date_str)
127
+ decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
128
+ changed_to_nine = decade_matches.to_s.tr('u\-?x', '9') if decade_matches
129
+ ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
99
130
  end
100
131
 
101
132
  CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
102
133
  CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
103
134
 
104
- # get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
135
+ # first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
105
136
  # note that these are the only century patterns found in our actual date strings in MODS records
106
137
  # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
107
- def sortable_year_for_century(orig_date_str)
138
+ def first_year_for_century(orig_date_str)
108
139
  return unless orig_date_str
109
140
  return if orig_date_str =~ /B\.C\./
141
+ return "#{Regexp.last_match(1)}00" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
142
+ return "#{(Regexp.last_match(1).to_i - 1).to_s}00" if orig_date_str.match(CENTURY_WORD_REGEXP)
143
+ end
110
144
 
111
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
112
- if century_matches
113
- m = Regexp.last_match(1)
114
- return m + '00' if m.length == 2
115
- return '0' + m + '00' if m.length == 1
116
- end
117
-
118
- century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
119
- return unless century_str_matches
145
+ # last year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
146
+ # note that these are the only century patterns found in our actual date strings in MODS records
147
+ # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
148
+ def last_year_for_century(orig_date_str)
149
+ return unless orig_date_str
150
+ return if orig_date_str =~ /B\.C\./
151
+ return "#{Regexp.last_match(1)}99" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
120
152
 
121
- yy = (Regexp.last_match(1).to_i - 1).to_s
122
- return yy + '00' if yy.length == 2
123
- return '0' + yy + '00' if yy.length == 1
153
+ # TODO: do we want to look for the very last match of digits before "century" instead of the first one?
154
+ return "#{(Regexp.last_match(1).to_i - 1).to_s}99" if orig_date_str.match(CENTURY_WORD_REGEXP)
124
155
  end
125
156
 
126
157
  BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
127
158
 
128
- # get Integer sortable value for B.C. if we have B.C. pattern
159
+ # Integer sortable value for B.C. if we have B.C. pattern
129
160
  # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
130
- def sortable_year_int_for_bc(orig_date_str)
161
+ def year_int_for_bc(orig_date_str)
131
162
  bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
132
163
  "-#{Regexp.last_match(1)}".to_i if bc_matches
133
164
  end
134
165
 
135
166
  EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
136
167
 
137
- # get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
138
- # @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
139
- def sortable_year_int_for_early_numeric(orig_date_str)
140
- return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
141
-
142
- orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
168
+ # year if orig_date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
169
+ # @return [String, nil] -ddd if orig_date_str matches pattern; nil otherwise
170
+ def year_for_early_numeric(orig_date_str)
171
+ orig_date_str if orig_date_str.match(EARLY_NUMERIC) || orig_date_str =~ /^-\d{4}$/
143
172
  end
144
173
  end
145
174
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class ParseDate
4
- VERSION = '0.1.0'
4
+ VERSION = '0.2.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-07 00:00:00.000000000 Z
11
+ date: 2019-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk