parse_date 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -5
- data/.rubocop_todo.yml +50 -1
- data/README.md +29 -1
- data/lib/parse_date.rb +15 -2
- data/lib/parse_date/int_from_string.rb +145 -0
- data/lib/parse_date/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67507d7ddc6be0cff379b210ca3985316153e96823ee573f6292298746fd25f7
|
4
|
+
data.tar.gz: 87ffbb778c68a34a472f99429252172bb676738d48a6776e3a5195b871397afd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a89b5cd3b4712bae4a0d82e5624b2463a10719243b19cc5d4b28d6d95db2ed097691fcc4c0a39a6e319db46cb0d6a6972b2bdda9530b6fc3d75d5e52e7ba6fcf
|
7
|
+
data.tar.gz: 634e22955684ec3176217d4ebfe16434bcffca287b8d64f30a53332c93e2b992cd995997956799aec53e0b7a68560de49ba23cdf6efdaf332dd4bad6ef91e18e
|
data/.rubocop.yml
CHANGED
@@ -6,16 +6,17 @@ AllCops:
|
|
6
6
|
Layout/EmptyLinesAroundClassBody:
|
7
7
|
Enabled: false
|
8
8
|
|
9
|
+
Layout/EmptyLinesAroundModuleBody:
|
10
|
+
Enabled: false
|
11
|
+
|
9
12
|
Metrics/LineLength:
|
10
13
|
Max: 120
|
11
14
|
|
12
15
|
Metrics/MethodLength:
|
13
16
|
Max: 15
|
14
17
|
|
15
|
-
Style/Documentation:
|
16
|
-
Exclude:
|
17
|
-
- 'spec/**/*'
|
18
|
-
- 'lib/parse_date.rb'
|
19
|
-
|
20
18
|
Style/WordArray:
|
21
19
|
Enabled: false
|
20
|
+
|
21
|
+
Style/YodaCondition:
|
22
|
+
Enabled: false
|
data/.rubocop_todo.yml
CHANGED
@@ -1,7 +1,56 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2019-
|
3
|
+
# on 2019-10-01 18:08:41 -0700 using RuboCop version 0.74.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 3
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 22
|
12
|
+
|
13
|
+
# Offense count: 2
|
14
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
15
|
+
# ExcludedMethods: refine
|
16
|
+
Metrics/BlockLength:
|
17
|
+
Max: 512
|
18
|
+
|
19
|
+
# Offense count: 3
|
20
|
+
Metrics/CyclomaticComplexity:
|
21
|
+
Max: 9
|
22
|
+
|
23
|
+
# Offense count: 2
|
24
|
+
Metrics/PerceivedComplexity:
|
25
|
+
Max: 9
|
26
|
+
|
27
|
+
# Offense count: 2
|
28
|
+
Style/Documentation:
|
29
|
+
Exclude:
|
30
|
+
- 'spec/**/*'
|
31
|
+
- 'test/**/*'
|
32
|
+
- 'lib/parse_date.rb'
|
33
|
+
|
34
|
+
# Offense count: 1
|
35
|
+
# Cop supports --auto-correct.
|
36
|
+
# Configuration parameters: EnforcedOctalStyle.
|
37
|
+
# SupportedOctalStyles: zero_with_o, zero_only
|
38
|
+
Style/NumericLiteralPrefix:
|
39
|
+
Exclude:
|
40
|
+
- 'spec/parse_date/int_from_string_spec.rb'
|
41
|
+
|
42
|
+
# Offense count: 1
|
43
|
+
# Cop supports --auto-correct.
|
44
|
+
# Configuration parameters: EnforcedStyle, AllowInnerSlashes.
|
45
|
+
# SupportedStyles: slashes, percent_r, mixed
|
46
|
+
Style/RegexpLiteral:
|
47
|
+
Exclude:
|
48
|
+
- 'lib/parse_date/int_from_string.rb'
|
49
|
+
|
50
|
+
# Offense count: 1
|
51
|
+
# Cop supports --auto-correct.
|
52
|
+
# Configuration parameters: EnforcedStyleForMultiline.
|
53
|
+
# SupportedStylesForMultiline: comma, consistent_comma, no_comma
|
54
|
+
Style/TrailingCommaInArrayLiteral:
|
55
|
+
Exclude:
|
56
|
+
- 'spec/parse_date/int_from_string_spec.rb'
|
data/README.md
CHANGED
@@ -25,7 +25,35 @@ Or install it yourself as:
|
|
25
25
|
|
26
26
|
## Usage
|
27
27
|
|
28
|
-
|
28
|
+
ParseDate has class methods for date string parsing.
|
29
|
+
|
30
|
+
```
|
31
|
+
require 'parse_date'
|
32
|
+
|
33
|
+
ParseDate.year_int_from_date_str('12/25/00') # 2000
|
34
|
+
ParseDate.year_int_from_date_str('5-1-21') # 1921
|
35
|
+
ParseDate.year_int_from_date_str('18th century CE') # 1700
|
36
|
+
ParseDate.year_int_from_date_str('1666 B.C.') # -1666
|
37
|
+
ParseDate.year_int_from_date_str('17uu') # 1700
|
38
|
+
ParseDate.year_int_from_date_str('-914') # -914
|
39
|
+
ParseDate.year_int_from_date_str('[c1926]') # 1926
|
40
|
+
ParseDate.year_int_from_date_str('ca. 1558') # 1558
|
41
|
+
|
42
|
+
ParseDate.year_int_valid?(0) # true
|
43
|
+
ParseDate.year_int_valid?(5) # true
|
44
|
+
ParseDate.year_int_valid?(33) # true
|
45
|
+
ParseDate.year_int_valid?(150) # true
|
46
|
+
ParseDate.year_int_valid?(2019) # true
|
47
|
+
ParseDate.year_int_valid?(Date.today.year + 1) # true
|
48
|
+
ParseDate.year_int_valid?(-3) # true
|
49
|
+
ParseDate.year_int_valid?(-35) # true
|
50
|
+
ParseDate.year_int_valid?(-999) # true
|
51
|
+
ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
|
52
|
+
ParseDate.year_int_valid?(165x) # false
|
53
|
+
ParseDate.year_int_valid?(198-) # false
|
54
|
+
ParseDate.year_int_valid?('random text') # false
|
55
|
+
ParseDate.year_int_valid?(nil) # false
|
56
|
+
```
|
29
57
|
|
30
58
|
## Development
|
31
59
|
|
data/lib/parse_date.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'singleton'
|
3
4
|
require 'zeitwerk'
|
4
5
|
|
5
6
|
class ParseDateInflector < Zeitwerk::Inflector
|
@@ -18,7 +19,19 @@ loader.inflector = ParseDateInflector.new
|
|
18
19
|
loader.push_dir(File.absolute_path("#{__FILE__}/.."))
|
19
20
|
loader.setup
|
20
21
|
|
21
|
-
|
22
|
+
class ParseDate
|
22
23
|
class Error < StandardError; end
|
23
|
-
|
24
|
+
|
25
|
+
include Singleton
|
26
|
+
extend ParseDate::IntFromString
|
27
|
+
|
28
|
+
# class method delegation for ParseDate.year_int_from_date_str
|
29
|
+
def self.year_int_from_date_str(orig_date_str)
|
30
|
+
ParseDate::IntFromString.year_int_from_date_str(orig_date_str)
|
31
|
+
end
|
32
|
+
|
33
|
+
# class method delegation for ParseDate.year_int_valid?
|
34
|
+
def self.year_int_valid?(orig_date_str)
|
35
|
+
ParseDate::IntFromString.year_int_valid?(orig_date_str)
|
36
|
+
end
|
24
37
|
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date' # so upstream callers don't have to require it
|
4
|
+
|
5
|
+
class ParseDate
|
6
|
+
|
7
|
+
# Parse (Year) Integers from Date Strings
|
8
|
+
module IntFromString
|
9
|
+
|
10
|
+
# get Integer year if we can parse date_str to get a year.
|
11
|
+
# NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
|
12
|
+
# found in our actual date strings in stanford-mods records), then
|
13
|
+
# we use 20 as century digits unless it is greater than current year:
|
14
|
+
# 1/1/17 -> 2017
|
15
|
+
# 1/1/27 -> 1927
|
16
|
+
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
17
|
+
def self.year_int_from_date_str(orig_date_str)
|
18
|
+
return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
|
19
|
+
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
20
|
+
return ParseDate.send(:sortable_year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
|
21
|
+
|
22
|
+
result = ParseDate.send(:sortable_year_for_yyyy_or_yy, orig_date_str)
|
23
|
+
result ||= ParseDate.send(:sortable_year_for_decade, orig_date_str) # 19xx or 20xx
|
24
|
+
result ||= ParseDate.send(:sortable_year_for_century, orig_date_str)
|
25
|
+
result ||= ParseDate.send(:sortable_year_int_for_early_numeric, orig_date_str)
|
26
|
+
unless result
|
27
|
+
# try removing brackets between digits in case we have 169[5] or [18]91
|
28
|
+
no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
|
29
|
+
return year_int_from_date_str(no_brackets) if no_brackets
|
30
|
+
end
|
31
|
+
result.to_i if result && year_int_valid?(result.to_i)
|
32
|
+
end
|
33
|
+
|
34
|
+
# true if the year is between -999 and (current year + 1)
|
35
|
+
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
|
36
|
+
def self.year_int_valid?(year)
|
37
|
+
return false unless year.is_a? Integer
|
38
|
+
|
39
|
+
(-1000 < year.to_i) && (year < Date.today.year + 2)
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
# get String sortable value year if we can parse date_str to get a year.
|
45
|
+
# @return [String, nil] String sortable year if we could parse one, nil otherwise
|
46
|
+
# note that these values must *lexically* sort to create a chronological sort.
|
47
|
+
def sortable_year_for_yyyy_or_yy(orig_date_str)
|
48
|
+
# most date strings have a four digit year
|
49
|
+
result = ParseDate.sortable_year_for_yyyy(orig_date_str)
|
50
|
+
result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
|
51
|
+
result
|
52
|
+
end
|
53
|
+
|
54
|
+
BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
|
55
|
+
|
56
|
+
# removes brackets between digits such as 169[5] or [18]91
|
57
|
+
def remove_brackets(orig_date_str)
|
58
|
+
orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
|
59
|
+
end
|
60
|
+
|
61
|
+
# looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
|
62
|
+
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
|
63
|
+
def sortable_year_for_yyyy(orig_date_str)
|
64
|
+
matches = orig_date_str.match(/\d{4}/) if orig_date_str
|
65
|
+
matches&.to_s
|
66
|
+
end
|
67
|
+
|
68
|
+
# returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
|
69
|
+
# note that these are the only 2 digit year patterns found in our actual date strings in stanford-mods records
|
70
|
+
# we use 20 as century digits unless it is greater than current year:
|
71
|
+
# 1/1/17 -> 2017
|
72
|
+
# 1/1/27 -> 1927
|
73
|
+
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
|
74
|
+
def sortable_year_for_yy(orig_date_str)
|
75
|
+
return unless orig_date_str
|
76
|
+
|
77
|
+
slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
|
78
|
+
if slash_matches
|
79
|
+
date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
|
80
|
+
else
|
81
|
+
hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
|
82
|
+
date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
|
83
|
+
end
|
84
|
+
date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
|
85
|
+
date_obj.year.to_s if date_obj
|
86
|
+
rescue ArgumentError
|
87
|
+
nil # explicitly want nil if date won't parse
|
88
|
+
end
|
89
|
+
|
90
|
+
DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
|
91
|
+
|
92
|
+
# get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
93
|
+
# note that these are the only decade patterns found in our actual date strings in MODS records
|
94
|
+
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
|
95
|
+
def sortable_year_for_decade(orig_date_str)
|
96
|
+
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
97
|
+
changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
|
98
|
+
ParseDate.sortable_year_for_yyyy(changed_to_zero) if changed_to_zero
|
99
|
+
end
|
100
|
+
|
101
|
+
CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
|
102
|
+
CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
|
103
|
+
|
104
|
+
# get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
|
105
|
+
# note that these are the only century patterns found in our actual date strings in MODS records
|
106
|
+
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
107
|
+
def sortable_year_for_century(orig_date_str)
|
108
|
+
return unless orig_date_str
|
109
|
+
return if orig_date_str =~ /B\.C\./
|
110
|
+
|
111
|
+
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
112
|
+
if century_matches
|
113
|
+
m = Regexp.last_match(1)
|
114
|
+
return m + '00' if m.length == 2
|
115
|
+
return '0' + m + '00' if m.length == 1
|
116
|
+
end
|
117
|
+
|
118
|
+
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
119
|
+
return unless century_str_matches
|
120
|
+
|
121
|
+
yy = (Regexp.last_match(1).to_i - 1).to_s
|
122
|
+
return yy + '00' if yy.length == 2
|
123
|
+
return '0' + yy + '00' if yy.length == 1
|
124
|
+
end
|
125
|
+
|
126
|
+
BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
|
127
|
+
|
128
|
+
# get Integer sortable value for B.C. if we have B.C. pattern
|
129
|
+
# @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
|
130
|
+
def sortable_year_int_for_bc(orig_date_str)
|
131
|
+
bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
|
132
|
+
"-#{Regexp.last_match(1)}".to_i if bc_matches
|
133
|
+
end
|
134
|
+
|
135
|
+
EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
|
136
|
+
|
137
|
+
# get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
|
138
|
+
# @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
|
139
|
+
def sortable_year_int_for_early_numeric(orig_date_str)
|
140
|
+
return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
|
141
|
+
|
142
|
+
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/lib/parse_date/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zeitwerk
|
@@ -129,6 +129,7 @@ files:
|
|
129
129
|
- bin/console
|
130
130
|
- bin/setup
|
131
131
|
- lib/parse_date.rb
|
132
|
+
- lib/parse_date/int_from_string.rb
|
132
133
|
- lib/parse_date/version.rb
|
133
134
|
- parse_date.gemspec
|
134
135
|
homepage: https://github.com/sul-dlss/parse_date
|