parse_date 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 501a6ce6db3b6f30cf1447079027cc3e1f2d80fc2410636190483aadbddb7d7b
4
- data.tar.gz: 649cf13aa5b94b32d54b7bdba6b6199021ec885ec49cb0899e9b8ef5a5319f94
3
+ metadata.gz: 67507d7ddc6be0cff379b210ca3985316153e96823ee573f6292298746fd25f7
4
+ data.tar.gz: 87ffbb778c68a34a472f99429252172bb676738d48a6776e3a5195b871397afd
5
5
  SHA512:
6
- metadata.gz: 4287a85f179120c0de4be292e8a2eb73c7966884c4ce03031e87772c0a66a5f8254e3c2055a0e187b4cb764b991e23f39380e362848bc5d87219d66c634fde0a
7
- data.tar.gz: 4ba900b4300aa88900ad803a1d8acaf4c180c48260546c73d7fdfc4a78e20090a5a01308307d0c657287f33fe8546798bfa84e3a1b50341d482dcd66db046204
6
+ metadata.gz: a89b5cd3b4712bae4a0d82e5624b2463a10719243b19cc5d4b28d6d95db2ed097691fcc4c0a39a6e319db46cb0d6a6972b2bdda9530b6fc3d75d5e52e7ba6fcf
7
+ data.tar.gz: 634e22955684ec3176217d4ebfe16434bcffca287b8d64f30a53332c93e2b992cd995997956799aec53e0b7a68560de49ba23cdf6efdaf332dd4bad6ef91e18e
@@ -6,16 +6,17 @@ AllCops:
6
6
  Layout/EmptyLinesAroundClassBody:
7
7
  Enabled: false
8
8
 
9
+ Layout/EmptyLinesAroundModuleBody:
10
+ Enabled: false
11
+
9
12
  Metrics/LineLength:
10
13
  Max: 120
11
14
 
12
15
  Metrics/MethodLength:
13
16
  Max: 15
14
17
 
15
- Style/Documentation:
16
- Exclude:
17
- - 'spec/**/*'
18
- - 'lib/parse_date.rb'
19
-
20
18
  Style/WordArray:
21
19
  Enabled: false
20
+
21
+ Style/YodaCondition:
22
+ Enabled: false
@@ -1,7 +1,56 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2019-09-27 15:27:21 -0700 using RuboCop version 0.74.0.
3
+ # on 2019-10-01 18:08:41 -0700 using RuboCop version 0.74.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 3
10
+ Metrics/AbcSize:
11
+ Max: 22
12
+
13
+ # Offense count: 2
14
+ # Configuration parameters: CountComments, ExcludedMethods.
15
+ # ExcludedMethods: refine
16
+ Metrics/BlockLength:
17
+ Max: 512
18
+
19
+ # Offense count: 3
20
+ Metrics/CyclomaticComplexity:
21
+ Max: 9
22
+
23
+ # Offense count: 2
24
+ Metrics/PerceivedComplexity:
25
+ Max: 9
26
+
27
+ # Offense count: 2
28
+ Style/Documentation:
29
+ Exclude:
30
+ - 'spec/**/*'
31
+ - 'test/**/*'
32
+ - 'lib/parse_date.rb'
33
+
34
+ # Offense count: 1
35
+ # Cop supports --auto-correct.
36
+ # Configuration parameters: EnforcedOctalStyle.
37
+ # SupportedOctalStyles: zero_with_o, zero_only
38
+ Style/NumericLiteralPrefix:
39
+ Exclude:
40
+ - 'spec/parse_date/int_from_string_spec.rb'
41
+
42
+ # Offense count: 1
43
+ # Cop supports --auto-correct.
44
+ # Configuration parameters: EnforcedStyle, AllowInnerSlashes.
45
+ # SupportedStyles: slashes, percent_r, mixed
46
+ Style/RegexpLiteral:
47
+ Exclude:
48
+ - 'lib/parse_date/int_from_string.rb'
49
+
50
+ # Offense count: 1
51
+ # Cop supports --auto-correct.
52
+ # Configuration parameters: EnforcedStyleForMultiline.
53
+ # SupportedStylesForMultiline: comma, consistent_comma, no_comma
54
+ Style/TrailingCommaInArrayLiteral:
55
+ Exclude:
56
+ - 'spec/parse_date/int_from_string_spec.rb'
data/README.md CHANGED
@@ -25,7 +25,35 @@ Or install it yourself as:
25
25
 
26
26
  ## Usage
27
27
 
28
- TODO: Write usage instructions here
28
+ ParseDate has class methods for date string parsing.
29
+
30
+ ```
31
+ require 'parse_date'
32
+
33
+ ParseDate.year_int_from_date_str('12/25/00') # 2000
34
+ ParseDate.year_int_from_date_str('5-1-21') # 1921
35
+ ParseDate.year_int_from_date_str('18th century CE') # 1700
36
+ ParseDate.year_int_from_date_str('1666 B.C.') # -1666
37
+ ParseDate.year_int_from_date_str('17uu') # 1700
38
+ ParseDate.year_int_from_date_str('-914') # -914
39
+ ParseDate.year_int_from_date_str('[c1926]') # 1926
40
+ ParseDate.year_int_from_date_str('ca. 1558') # 1558
41
+
42
+ ParseDate.year_int_valid?(0) # true
43
+ ParseDate.year_int_valid?(5) # true
44
+ ParseDate.year_int_valid?(33) # true
45
+ ParseDate.year_int_valid?(150) # true
46
+ ParseDate.year_int_valid?(2019) # true
47
+ ParseDate.year_int_valid?(Date.today.year + 1) # true
48
+ ParseDate.year_int_valid?(-3) # true
49
+ ParseDate.year_int_valid?(-35) # true
50
+ ParseDate.year_int_valid?(-999) # true
51
+ ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
52
+ ParseDate.year_int_valid?(165x) # false
53
+ ParseDate.year_int_valid?(198-) # false
54
+ ParseDate.year_int_valid?('random text') # false
55
+ ParseDate.year_int_valid?(nil) # false
56
+ ```
29
57
 
30
58
  ## Development
31
59
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'singleton'
3
4
  require 'zeitwerk'
4
5
 
5
6
  class ParseDateInflector < Zeitwerk::Inflector
@@ -18,7 +19,19 @@ loader.inflector = ParseDateInflector.new
18
19
  loader.push_dir(File.absolute_path("#{__FILE__}/.."))
19
20
  loader.setup
20
21
 
21
- module ParseDate
22
+ class ParseDate
22
23
  class Error < StandardError; end
23
- # Your code goes here...
24
+
25
+ include Singleton
26
+ extend ParseDate::IntFromString
27
+
28
+ # class method delegation for ParseDate.year_int_from_date_str
29
+ def self.year_int_from_date_str(orig_date_str)
30
+ ParseDate::IntFromString.year_int_from_date_str(orig_date_str)
31
+ end
32
+
33
+ # class method delegation for ParseDate.year_int_valid?
34
+ def self.year_int_valid?(orig_date_str)
35
+ ParseDate::IntFromString.year_int_valid?(orig_date_str)
36
+ end
24
37
  end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'date' # so upstream callers don't have to require it
4
+
5
+ class ParseDate
6
+
7
+ # Parse (Year) Integers from Date Strings
8
+ module IntFromString
9
+
10
+ # get Integer year if we can parse date_str to get a year.
11
+ # NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
12
+ # found in our actual date strings in stanford-mods records), then
13
+ # we use 20 as century digits unless it is greater than current year:
14
+ # 1/1/17 -> 2017
15
+ # 1/1/27 -> 1927
16
+ # @return [Integer, nil] Integer year if we could parse one, nil otherwise
17
+ def self.year_int_from_date_str(orig_date_str)
18
+ return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
19
+ # B.C. first in case there are 4 digits, e.g. 1600 B.C.
20
+ return ParseDate.send(:sortable_year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
21
+
22
+ result = ParseDate.send(:sortable_year_for_yyyy_or_yy, orig_date_str)
23
+ result ||= ParseDate.send(:sortable_year_for_decade, orig_date_str) # 19xx or 20xx
24
+ result ||= ParseDate.send(:sortable_year_for_century, orig_date_str)
25
+ result ||= ParseDate.send(:sortable_year_int_for_early_numeric, orig_date_str)
26
+ unless result
27
+ # try removing brackets between digits in case we have 169[5] or [18]91
28
+ no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
29
+ return year_int_from_date_str(no_brackets) if no_brackets
30
+ end
31
+ result.to_i if result && year_int_valid?(result.to_i)
32
+ end
33
+
34
+ # true if the year is between -999 and (current year + 1)
35
+ # @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
36
+ def self.year_int_valid?(year)
37
+ return false unless year.is_a? Integer
38
+
39
+ (-1000 < year.to_i) && (year < Date.today.year + 2)
40
+ end
41
+
42
+ protected
43
+
44
+ # get String sortable value year if we can parse date_str to get a year.
45
+ # @return [String, nil] String sortable year if we could parse one, nil otherwise
46
+ # note that these values must *lexically* sort to create a chronological sort.
47
+ def sortable_year_for_yyyy_or_yy(orig_date_str)
48
+ # most date strings have a four digit year
49
+ result = ParseDate.sortable_year_for_yyyy(orig_date_str)
50
+ result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
51
+ result
52
+ end
53
+
54
+ BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
55
+
56
+ # removes brackets between digits such as 169[5] or [18]91
57
+ def remove_brackets(orig_date_str)
58
+ orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
59
+ end
60
+
61
+ # looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
62
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
63
+ def sortable_year_for_yyyy(orig_date_str)
64
+ matches = orig_date_str.match(/\d{4}/) if orig_date_str
65
+ matches&.to_s
66
+ end
67
+
68
+ # returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
69
+ # note that these are the only 2 digit year patterns found in our actual date strings in stanford-mods records
70
+ # we use 20 as century digits unless it is greater than current year:
71
+ # 1/1/17 -> 2017
72
+ # 1/1/27 -> 1927
73
+ # @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
74
+ def sortable_year_for_yy(orig_date_str)
75
+ return unless orig_date_str
76
+
77
+ slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
78
+ if slash_matches
79
+ date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
80
+ else
81
+ hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
82
+ date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
83
+ end
84
+ date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
85
+ date_obj.year.to_s if date_obj
86
+ rescue ArgumentError
87
+ nil # explicitly want nil if date won't parse
88
+ end
89
+
90
+ DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
91
+
92
+ # get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
93
+ # note that these are the only decade patterns found in our actual date strings in MODS records
94
+ # @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
95
+ def sortable_year_for_decade(orig_date_str)
96
+ decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
97
+ changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
98
+ ParseDate.sortable_year_for_yyyy(changed_to_zero) if changed_to_zero
99
+ end
100
+
101
+ CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
102
+ CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
103
+
104
+ # get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
105
+ # note that these are the only century patterns found in our actual date strings in MODS records
106
+ # @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
107
+ def sortable_year_for_century(orig_date_str)
108
+ return unless orig_date_str
109
+ return if orig_date_str =~ /B\.C\./
110
+
111
+ century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
112
+ if century_matches
113
+ m = Regexp.last_match(1)
114
+ return m + '00' if m.length == 2
115
+ return '0' + m + '00' if m.length == 1
116
+ end
117
+
118
+ century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
119
+ return unless century_str_matches
120
+
121
+ yy = (Regexp.last_match(1).to_i - 1).to_s
122
+ return yy + '00' if yy.length == 2
123
+ return '0' + yy + '00' if yy.length == 1
124
+ end
125
+
126
+ BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
127
+
128
+ # get Integer sortable value for B.C. if we have B.C. pattern
129
+ # @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
130
+ def sortable_year_int_for_bc(orig_date_str)
131
+ bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
132
+ "-#{Regexp.last_match(1)}".to_i if bc_matches
133
+ end
134
+
135
+ EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
136
+
137
+ # get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
138
+ # @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
139
+ def sortable_year_int_for_early_numeric(orig_date_str)
140
+ return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
141
+
142
+ orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
143
+ end
144
+ end
145
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module ParseDate
4
- VERSION = '0.0.1'
3
+ class ParseDate
4
+ VERSION = '0.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_date
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naomi Dushay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-27 00:00:00.000000000 Z
11
+ date: 2019-10-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk
@@ -129,6 +129,7 @@ files:
129
129
  - bin/console
130
130
  - bin/setup
131
131
  - lib/parse_date.rb
132
+ - lib/parse_date/int_from_string.rb
132
133
  - lib/parse_date/version.rb
133
134
  - parse_date.gemspec
134
135
  homepage: https://github.com/sul-dlss/parse_date