parse_date 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -5
- data/.rubocop_todo.yml +50 -1
- data/README.md +29 -1
- data/lib/parse_date.rb +15 -2
- data/lib/parse_date/int_from_string.rb +145 -0
- data/lib/parse_date/version.rb +2 -2
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67507d7ddc6be0cff379b210ca3985316153e96823ee573f6292298746fd25f7
|
4
|
+
data.tar.gz: 87ffbb778c68a34a472f99429252172bb676738d48a6776e3a5195b871397afd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a89b5cd3b4712bae4a0d82e5624b2463a10719243b19cc5d4b28d6d95db2ed097691fcc4c0a39a6e319db46cb0d6a6972b2bdda9530b6fc3d75d5e52e7ba6fcf
|
7
|
+
data.tar.gz: 634e22955684ec3176217d4ebfe16434bcffca287b8d64f30a53332c93e2b992cd995997956799aec53e0b7a68560de49ba23cdf6efdaf332dd4bad6ef91e18e
|
data/.rubocop.yml
CHANGED
@@ -6,16 +6,17 @@ AllCops:
|
|
6
6
|
Layout/EmptyLinesAroundClassBody:
|
7
7
|
Enabled: false
|
8
8
|
|
9
|
+
Layout/EmptyLinesAroundModuleBody:
|
10
|
+
Enabled: false
|
11
|
+
|
9
12
|
Metrics/LineLength:
|
10
13
|
Max: 120
|
11
14
|
|
12
15
|
Metrics/MethodLength:
|
13
16
|
Max: 15
|
14
17
|
|
15
|
-
Style/Documentation:
|
16
|
-
Exclude:
|
17
|
-
- 'spec/**/*'
|
18
|
-
- 'lib/parse_date.rb'
|
19
|
-
|
20
18
|
Style/WordArray:
|
21
19
|
Enabled: false
|
20
|
+
|
21
|
+
Style/YodaCondition:
|
22
|
+
Enabled: false
|
data/.rubocop_todo.yml
CHANGED
@@ -1,7 +1,56 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2019-
|
3
|
+
# on 2019-10-01 18:08:41 -0700 using RuboCop version 0.74.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 3
|
10
|
+
Metrics/AbcSize:
|
11
|
+
Max: 22
|
12
|
+
|
13
|
+
# Offense count: 2
|
14
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
15
|
+
# ExcludedMethods: refine
|
16
|
+
Metrics/BlockLength:
|
17
|
+
Max: 512
|
18
|
+
|
19
|
+
# Offense count: 3
|
20
|
+
Metrics/CyclomaticComplexity:
|
21
|
+
Max: 9
|
22
|
+
|
23
|
+
# Offense count: 2
|
24
|
+
Metrics/PerceivedComplexity:
|
25
|
+
Max: 9
|
26
|
+
|
27
|
+
# Offense count: 2
|
28
|
+
Style/Documentation:
|
29
|
+
Exclude:
|
30
|
+
- 'spec/**/*'
|
31
|
+
- 'test/**/*'
|
32
|
+
- 'lib/parse_date.rb'
|
33
|
+
|
34
|
+
# Offense count: 1
|
35
|
+
# Cop supports --auto-correct.
|
36
|
+
# Configuration parameters: EnforcedOctalStyle.
|
37
|
+
# SupportedOctalStyles: zero_with_o, zero_only
|
38
|
+
Style/NumericLiteralPrefix:
|
39
|
+
Exclude:
|
40
|
+
- 'spec/parse_date/int_from_string_spec.rb'
|
41
|
+
|
42
|
+
# Offense count: 1
|
43
|
+
# Cop supports --auto-correct.
|
44
|
+
# Configuration parameters: EnforcedStyle, AllowInnerSlashes.
|
45
|
+
# SupportedStyles: slashes, percent_r, mixed
|
46
|
+
Style/RegexpLiteral:
|
47
|
+
Exclude:
|
48
|
+
- 'lib/parse_date/int_from_string.rb'
|
49
|
+
|
50
|
+
# Offense count: 1
|
51
|
+
# Cop supports --auto-correct.
|
52
|
+
# Configuration parameters: EnforcedStyleForMultiline.
|
53
|
+
# SupportedStylesForMultiline: comma, consistent_comma, no_comma
|
54
|
+
Style/TrailingCommaInArrayLiteral:
|
55
|
+
Exclude:
|
56
|
+
- 'spec/parse_date/int_from_string_spec.rb'
|
data/README.md
CHANGED
@@ -25,7 +25,35 @@ Or install it yourself as:
|
|
25
25
|
|
26
26
|
## Usage
|
27
27
|
|
28
|
-
|
28
|
+
ParseDate has class methods for date string parsing.
|
29
|
+
|
30
|
+
```
|
31
|
+
require 'parse_date'
|
32
|
+
|
33
|
+
ParseDate.year_int_from_date_str('12/25/00') # 2000
|
34
|
+
ParseDate.year_int_from_date_str('5-1-21') # 1921
|
35
|
+
ParseDate.year_int_from_date_str('18th century CE') # 1700
|
36
|
+
ParseDate.year_int_from_date_str('1666 B.C.') # -1666
|
37
|
+
ParseDate.year_int_from_date_str('17uu') # 1700
|
38
|
+
ParseDate.year_int_from_date_str('-914') # -914
|
39
|
+
ParseDate.year_int_from_date_str('[c1926]') # 1926
|
40
|
+
ParseDate.year_int_from_date_str('ca. 1558') # 1558
|
41
|
+
|
42
|
+
ParseDate.year_int_valid?(0) # true
|
43
|
+
ParseDate.year_int_valid?(5) # true
|
44
|
+
ParseDate.year_int_valid?(33) # true
|
45
|
+
ParseDate.year_int_valid?(150) # true
|
46
|
+
ParseDate.year_int_valid?(2019) # true
|
47
|
+
ParseDate.year_int_valid?(Date.today.year + 1) # true
|
48
|
+
ParseDate.year_int_valid?(-3) # true
|
49
|
+
ParseDate.year_int_valid?(-35) # true
|
50
|
+
ParseDate.year_int_valid?(-999) # true
|
51
|
+
ParseDate.year_int_valid?(-1666) # false - four digit negative years not considered valid here
|
52
|
+
ParseDate.year_int_valid?(165x) # false
|
53
|
+
ParseDate.year_int_valid?(198-) # false
|
54
|
+
ParseDate.year_int_valid?('random text') # false
|
55
|
+
ParseDate.year_int_valid?(nil) # false
|
56
|
+
```
|
29
57
|
|
30
58
|
## Development
|
31
59
|
|
data/lib/parse_date.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'singleton'
|
3
4
|
require 'zeitwerk'
|
4
5
|
|
5
6
|
class ParseDateInflector < Zeitwerk::Inflector
|
@@ -18,7 +19,19 @@ loader.inflector = ParseDateInflector.new
|
|
18
19
|
loader.push_dir(File.absolute_path("#{__FILE__}/.."))
|
19
20
|
loader.setup
|
20
21
|
|
21
|
-
|
22
|
+
class ParseDate
|
22
23
|
class Error < StandardError; end
|
23
|
-
|
24
|
+
|
25
|
+
include Singleton
|
26
|
+
extend ParseDate::IntFromString
|
27
|
+
|
28
|
+
# class method delegation for ParseDate.year_int_from_date_str
|
29
|
+
def self.year_int_from_date_str(orig_date_str)
|
30
|
+
ParseDate::IntFromString.year_int_from_date_str(orig_date_str)
|
31
|
+
end
|
32
|
+
|
33
|
+
# class method delegation for ParseDate.year_int_valid?
|
34
|
+
def self.year_int_valid?(orig_date_str)
|
35
|
+
ParseDate::IntFromString.year_int_valid?(orig_date_str)
|
36
|
+
end
|
24
37
|
end
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'date' # so upstream callers don't have to require it
|
4
|
+
|
5
|
+
class ParseDate
|
6
|
+
|
7
|
+
# Parse (Year) Integers from Date Strings
|
8
|
+
module IntFromString
|
9
|
+
|
10
|
+
# get Integer year if we can parse date_str to get a year.
|
11
|
+
# NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
|
12
|
+
# found in our actual date strings in stanford-mods records), then
|
13
|
+
# we use 20 as century digits unless it is greater than current year:
|
14
|
+
# 1/1/17 -> 2017
|
15
|
+
# 1/1/27 -> 1927
|
16
|
+
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
17
|
+
def self.year_int_from_date_str(orig_date_str)
|
18
|
+
return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
|
19
|
+
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
20
|
+
return ParseDate.send(:sortable_year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
|
21
|
+
|
22
|
+
result = ParseDate.send(:sortable_year_for_yyyy_or_yy, orig_date_str)
|
23
|
+
result ||= ParseDate.send(:sortable_year_for_decade, orig_date_str) # 19xx or 20xx
|
24
|
+
result ||= ParseDate.send(:sortable_year_for_century, orig_date_str)
|
25
|
+
result ||= ParseDate.send(:sortable_year_int_for_early_numeric, orig_date_str)
|
26
|
+
unless result
|
27
|
+
# try removing brackets between digits in case we have 169[5] or [18]91
|
28
|
+
no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
|
29
|
+
return year_int_from_date_str(no_brackets) if no_brackets
|
30
|
+
end
|
31
|
+
result.to_i if result && year_int_valid?(result.to_i)
|
32
|
+
end
|
33
|
+
|
34
|
+
# true if the year is between -999 and (current year + 1)
|
35
|
+
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
|
36
|
+
def self.year_int_valid?(year)
|
37
|
+
return false unless year.is_a? Integer
|
38
|
+
|
39
|
+
(-1000 < year.to_i) && (year < Date.today.year + 2)
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
# get String sortable value year if we can parse date_str to get a year.
|
45
|
+
# @return [String, nil] String sortable year if we could parse one, nil otherwise
|
46
|
+
# note that these values must *lexically* sort to create a chronological sort.
|
47
|
+
def sortable_year_for_yyyy_or_yy(orig_date_str)
|
48
|
+
# most date strings have a four digit year
|
49
|
+
result = ParseDate.sortable_year_for_yyyy(orig_date_str)
|
50
|
+
result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
|
51
|
+
result
|
52
|
+
end
|
53
|
+
|
54
|
+
BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
|
55
|
+
|
56
|
+
# removes brackets between digits such as 169[5] or [18]91
|
57
|
+
def remove_brackets(orig_date_str)
|
58
|
+
orig_date_str.delete('[]') if orig_date_str.match(BRACKETS_BETWEEN_DIGITS_REXEXP)
|
59
|
+
end
|
60
|
+
|
61
|
+
# looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
|
62
|
+
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
|
63
|
+
def sortable_year_for_yyyy(orig_date_str)
|
64
|
+
matches = orig_date_str.match(/\d{4}/) if orig_date_str
|
65
|
+
matches&.to_s
|
66
|
+
end
|
67
|
+
|
68
|
+
# returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
|
69
|
+
# note that these are the only 2 digit year patterns found in our actual date strings in stanford-mods records
|
70
|
+
# we use 20 as century digits unless it is greater than current year:
|
71
|
+
# 1/1/17 -> 2017
|
72
|
+
# 1/1/27 -> 1927
|
73
|
+
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
|
74
|
+
def sortable_year_for_yy(orig_date_str)
|
75
|
+
return unless orig_date_str
|
76
|
+
|
77
|
+
slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
|
78
|
+
if slash_matches
|
79
|
+
date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
|
80
|
+
else
|
81
|
+
hyphen_matches = orig_date_str.match(/\d{1,2}-\d{1,2}-\d{2}/)
|
82
|
+
date_obj = Date.strptime(orig_date_str, '%m-%d-%y') if hyphen_matches
|
83
|
+
end
|
84
|
+
date_obj = Date.new(date_obj.year - 100, date_obj.month, date_obj.mday) if date_obj && date_obj > Date.today
|
85
|
+
date_obj.year.to_s if date_obj
|
86
|
+
rescue ArgumentError
|
87
|
+
nil # explicitly want nil if date won't parse
|
88
|
+
end
|
89
|
+
|
90
|
+
DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
|
91
|
+
|
92
|
+
# get first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
93
|
+
# note that these are the only decade patterns found in our actual date strings in MODS records
|
94
|
+
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
|
95
|
+
def sortable_year_for_decade(orig_date_str)
|
96
|
+
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
97
|
+
changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
|
98
|
+
ParseDate.sortable_year_for_yyyy(changed_to_zero) if changed_to_zero
|
99
|
+
end
|
100
|
+
|
101
|
+
CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
|
102
|
+
CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
|
103
|
+
|
104
|
+
# get first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
|
105
|
+
# note that these are the only century patterns found in our actual date strings in MODS records
|
106
|
+
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
107
|
+
def sortable_year_for_century(orig_date_str)
|
108
|
+
return unless orig_date_str
|
109
|
+
return if orig_date_str =~ /B\.C\./
|
110
|
+
|
111
|
+
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
112
|
+
if century_matches
|
113
|
+
m = Regexp.last_match(1)
|
114
|
+
return m + '00' if m.length == 2
|
115
|
+
return '0' + m + '00' if m.length == 1
|
116
|
+
end
|
117
|
+
|
118
|
+
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
119
|
+
return unless century_str_matches
|
120
|
+
|
121
|
+
yy = (Regexp.last_match(1).to_i - 1).to_s
|
122
|
+
return yy + '00' if yy.length == 2
|
123
|
+
return '0' + yy + '00' if yy.length == 1
|
124
|
+
end
|
125
|
+
|
126
|
+
BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
|
127
|
+
|
128
|
+
# get Integer sortable value for B.C. if we have B.C. pattern
|
129
|
+
# @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
|
130
|
+
def sortable_year_int_for_bc(orig_date_str)
|
131
|
+
bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
|
132
|
+
"-#{Regexp.last_match(1)}".to_i if bc_matches
|
133
|
+
end
|
134
|
+
|
135
|
+
EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
|
136
|
+
|
137
|
+
# get Integer sortable value from date String containing yyy, yy, y, -y, -yy, -yyy, -yyyy
|
138
|
+
# @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
|
139
|
+
def sortable_year_int_for_early_numeric(orig_date_str)
|
140
|
+
return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
|
141
|
+
|
142
|
+
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
data/lib/parse_date/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-10-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zeitwerk
|
@@ -129,6 +129,7 @@ files:
|
|
129
129
|
- bin/console
|
130
130
|
- bin/setup
|
131
131
|
- lib/parse_date.rb
|
132
|
+
- lib/parse_date/int_from_string.rb
|
132
133
|
- lib/parse_date/version.rb
|
133
134
|
- parse_date.gemspec
|
134
135
|
homepage: https://github.com/sul-dlss/parse_date
|