parse_date 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 501a6ce6db3b6f30cf1447079027cc3e1f2d80fc2410636190483aadbddb7d7b
4
- data.tar.gz: 649cf13aa5b94b32d54b7bdba6b6199021ec885ec49cb0899e9b8ef5a5319f94
3
+ metadata.gz: 67507d7ddc6be0cff379b210ca3985316153e96823ee573f6292298746fd25f7
4
+ data.tar.gz: 87ffbb778c68a34a472f99429252172bb676738d48a6776e3a5195b871397afd
5
5
  SHA512:
6
- metadata.gz: 4287a85f179120c0de4be292e8a2eb73c7966884c4ce03031e87772c0a66a5f8254e3c2055a0e187b4cb764b991e23f39380e362848bc5d87219d66c634fde0a
7
- data.tar.gz: 4ba900b4300aa88900ad803a1d8acaf4c180c48260546c73d7fdfc4a78e20090a5a01308307d0c657287f33fe8546798bfa84e3a1b50341d482dcd66db046204
6
+ metadata.gz: a89b5cd3b4712bae4a0d82e5624b2463a10719243b19cc5d4b28d6d95db2ed097691fcc4c0a39a6e319db46cb0d6a6972b2bdda9530b6fc3d75d5e52e7ba6fcf
7
+ data.tar.gz: 634e22955684ec3176217d4ebfe16434bcffca287b8d64f30a53332c93e2b992cd995997956799aec53e0b7a68560de49ba23cdf6efdaf332dd4bad6ef91e18e
@@ -6,16 +6,17 @@ AllCops:
6
6
  Layout/EmptyLinesAroundClassBody:
7
7
  Enabled: false
8
8
 
9
+ Layout/EmptyLinesAroundModuleBody:
10
+ Enabled: false
11
+
9
12
  Metrics/LineLength:
10
13
  Max: 120
11
14
 
12
15
  Metrics/MethodLength:
13
16
  Max: 15
14
17
 
15
- Style/Documentation:
16
- Exclude:
17
- - 'spec/**/*'
18
- - 'lib/parse_date.rb'
19
-
20
18
  Style/WordArray:
21
19
  Enabled: false
20
+
21
+ Style/YodaCondition:
22
+ Enabled: false
@@ -1,7 +1,56 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-09-27 15:27:21 -0700 using RuboCop version 0.74.0.
3
+ # on 2019-10-01 18:08:41 -0700 using RuboCop version 0.74.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 3
10
+ Metrics/AbcSize:
11
+ Max: 22
12
+
13
+ # Offense count: 2
14
+ # Configuration parameters: CountComments, ExcludedMethods.
15
+ # ExcludedMethods: refine
16
+ Metrics/BlockLength:
17
+ Max: 512
18
+
19
+ # Offense count: 3
20
+ Metrics/CyclomaticComplexity:
21
+ Max: 9
22
+
23
+ # Offense count: 2
24
+ Metrics/PerceivedComplexity:
25
+ Max: 9
26
+
27
+ # Offense count: 2
28
+ Style/Documentation:
29
+ Exclude:
30
+ - 'spec/**/*'
31
+ - 'test/**/*'
32
+ - 'lib/parse_date.rb'
33
+
34
+ # Offense count: 1
35
+ # Cop supports --auto-correct.
36
+ # Configuration parameters: EnforcedOctalStyle.
37
+ # SupportedOctalStyles: zero_with_o, zero_only
38
+ Style/NumericLiteralPrefix:
39
+ Exclude:
40
+ - 'spec/parse_date/int_from_string_spec.rb'
41
+
42
+ # Offense count: 1
43
+ # Cop supports --auto-correct.
44
+ # Configuration parameters: EnforcedStyle, AllowInnerSlashes.
45
+ # SupportedStyles: slashes, percent_r, mixed
46
+ Style/RegexpLiteral:
47
+ Exclude:
48
+ - 'lib/parse_date/int_from_string.rb'
49
+
50
+ # Offense count: 1
51
+ # Cop supports --auto-correct.
52
+ # Configuration parameters: EnforcedStyleForMultiline.
53
+ # SupportedStylesForMultiline: comma, consistent_comma, no_comma
54
+ Style/TrailingCommaInArrayLiteral:
55
+ Exclude:
56
+ - 'spec/parse_date/int_from_string_spec.rb'
data/README.md CHANGED
@@ -25,7 +25,35 @@ Or install it yourself as:
25
25
 
26
26
  ## Usage
27
27
 
28
- TODO: Write usage instructions here
28
+ ParseDate has class methods for date string parsing.
29
+
30
+ ```
31
+ require 'parse_date'
32
+
33
+ ParseDate.year_int_from_date_str('12/25/00') # 2000
34
+ ParseDate.year_int_from_date_str('5-1-21') # 1921
35
+ ParseDate.year_int_from_date_str('18th century CE') # 1700
36
+ ParseDate.year_int_from_date_str('1666 B.C.') # -1666
37
+ ParseDate.year_int_from_date_str('17uu') # 1700
38
+ ParseDate.year_int_from_date_str('-914') # -914
39
+ ParseDate.year_int_from_date_str('[c1926]') # 1926
40
+ ParseDate.year_int_from_date_str('ca. 1558') # 1558
41
+
42
+ ParseDate.year_int_valid?(0) # true
43
+ ParseDate.year_int_valid?(5) # true
44
+ ParseDate.year_int_valid?(33) # true
45
+ ParseDate.year_int_valid?(150) # true
46
+ ParseDate.year_int_valid?(2019) # true
47
+ ParseDate.year_int_valid?(Date.today.year + 1) # true
48
+ ParseDate.year_int_valid?(-3) # true
49
+ ParseDate.year_int_valid?(-35) # true
50
+ ParseDate.year_int_valid?(-999) # true
51
+ ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
52
+ ParseDate.year_int_valid?(165x) # false
53
+ ParseDate.year_int_valid?(198-) # false
54
+ ParseDate.year_int_valid?('random text') # false
55
+ ParseDate.year_int_valid?(nil) # false
56
+ ```
29
57
 
30
58
  ## Development
31
59
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'singleton'
3
4
  require 'zeitwerk'
4
5
 
5
6
  class ParseDateInflector < Zeitwerk::Inflector
@@ -18,7 +19,19 @@ loader.inflector = ParseDateInflector.new
18
19
  loader.push_dir(File.absolute_path("#{__FILE__}/.."))
19
20
  loader.setup
20
21
 
21
- module ParseDate
22
+ class ParseDate
22
23
  class Error < StandardError; end
23
- # Your code goes here...
24
+
25
+ include Singleton
26
+ extend ParseDate::IntFromString
27
+
28
+ # class method delegation for ParseDate.year_int_from_date_str
29
+ def self.year_int_from_date_str(orig_date_str)
30
+ ParseDate::IntFromString.year_int_from_date_str(orig_date_str)
31
+ end
32
+
33
+ # class method delegation for ParseDate.year_int_valid?
34
+ def self.year_int_valid?(orig_date_str)
35
+ ParseDate::IntFromString.year_int_valid?(orig_date_str)
36
+ end
24
37
  end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date' # so upstream callers don't have to require it
4
+
5
+ class ParseDate
6
+
7
+ # Parse (Year) Integers from Date Strings
8
+ module IntFromString
9
+
10
+ # get Integer year if we can parse date_str to get a year.
11
+ # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
12
+ # found in our actual date strings in stanford-mods records), then
13
+ # we use 20 as century digits unless it is greater than current year:
14
+ # 1/1/17 -> 2017
15
+ # 1/1/27 -> 1927
16
+ # @return [Integer, nil] Integer year if we could parse one, nil otherwise
17
+ def self.year_int_from_date_str(orig_date_str)
18
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
19
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
20
+ return ParseDate.send(:sortable_year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
21
+
22
+ result = ParseDate.send(:sortable_year_for_yyyy_or_yy, orig_date_str)
23
+ result ||= ParseDate.send(:sortable_year_for_decade, orig_date_str) # 19xx or 20xx
24
+ result ||= ParseDate.send(:sortable_year_for_century, orig_date_str)
25
+ result ||= ParseDate.send(:sortable_year_int_for_early_numeric, orig_date_str)
26
+ unless result
27
+ # try removing brackets between digits in case we have 169[5] or [18]91
28
+ no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
29
+ return year_int_from_date_str(no_brackets) if no_brackets
30
+ end
31
+ result.to_i if result && year_int_valid?(result.to_i)
32
+ end
33
+
34
+ # true if the year is between -999 and (current year + 1)
35
+ # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
36
+ def self.year_int_valid?(year)
37
+ return false unless year.is_a? Integer
38
+
39
+ (-1000 < year.to_i) && (year < Date.today.year + 2)
40
+ end
41
+
42
+ protected
43
+
44
+ # get String sortable value year if we can parse date_str to get a year.
45
+ # @return [String, nil] String sortable year if we could parse one, nil otherwise
46
+ # note that these values must *lexically* sort to create a chronological sort.
47
+ def sortable_year_for_yyyy_or_yy(orig_date_str)
48
+ # most date strings have a four digit year
49
+ result = ParseDate.sortable_year_for_yyyy(orig_date_str)
50
+ result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
51
+ result
52
+ end
53
+
54
+ BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
55
+
56
+ # removes brackets between digits such as 169[5] or [18]91
57
+ def remove_brackets(orig_date_str)
58
+ orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
59
+ end
60
+
61
+ # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
62
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
63
+ def sortable_year_for_yyyy(orig_date_str)
64
+ matches = orig_date_str.match(/\d{4}/) if orig_date_str
65
+ matches&.to_s
66
+ end
67
+
68
+ # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
69
+ # note that these are the only 2 digit year patterns found in our actual date strings in stanford-mods records
70
+ # we use 20 as century digits unless it is greater than current year:
71
+ # 1/1/17 -> 2017
72
+ # 1/1/27 -> 1927
73
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
74
+ def sortable_year_for_yy(orig_date_str)
75
+ return unless orig_date_str
76
+
77
+ slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
78
+ if slash_matches
79
+ date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
80
+ else
81
+ hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
82
+ date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
83
+ end
84
+ date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
85
+ date_obj.year.to_s if date_obj
86
+ rescue ArgumentError
87
+ nil # explicitly want nil if date won't parse
88
+ end
89
+
90
+ DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
91
+
92
+ # get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
93
+ # note that these are the only decade patterns found in our actual date strings in MODS records
94
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
95
+ def sortable_year_for_decade(orig_date_str)
96
+ decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
97
+ changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
98
+ ParseDate.sortable_year_for_yyyy(changed_to_zero) if changed_to_zero
99
+ end
100
+
101
+ CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
102
+ CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
103
+
104
+ # get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
105
+ # note that these are the only century patterns found in our actual date strings in MODS records
106
+ # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
107
+ def sortable_year_for_century(orig_date_str)
108
+ return unless orig_date_str
109
+ return if orig_date_str =~ /B\.C\./
110
+
111
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
112
+ if century_matches
113
+ m = Regexp.last_match(1)
114
+ return m + '00' if m.length == 2
115
+ return '0' + m + '00' if m.length == 1
116
+ end
117
+
118
+ century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
119
+ return unless century_str_matches
120
+
121
+ yy = (Regexp.last_match(1).to_i - 1).to_s
122
+ return yy + '00' if yy.length == 2
123
+ return '0' + yy + '00' if yy.length == 1
124
+ end
125
+
126
+ BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
127
+
128
+ # get Integer sortable value for B.C. if we have B.C. pattern
129
+ # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
130
+ def sortable_year_int_for_bc(orig_date_str)
131
+ bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
132
+ "-#{Regexp.last_match(1)}".to_i if bc_matches
133
+ end
134
+
135
+ EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
136
+
137
+ # get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
138
+ # @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
139
+ def sortable_year_int_for_early_numeric(orig_date_str)
140
+ return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
141
+
142
+ orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
143
+ end
144
+ end
145
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module ParseDate
4
- VERSION = '0.0.1'
3
+ class ParseDate
4
+ VERSION = '0.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-27 00:00:00.000000000 Z
11
+ date: 2019-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk
@@ -129,6 +129,7 @@ files:
129
129
  - bin/console
130
130
  - bin/setup
131
131
  - lib/parse_date.rb
132
+ - lib/parse_date/int_from_string.rb
132
133
  - lib/parse_date/version.rb
133
134
  - parse_date.gemspec
134
135
  homepage: https://github.com/sul-dlss/parse_date