parse_date 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +20 -6
- data/README.md +20 -9
- data/lib/parse_date.rb +7 -4
- data/lib/parse_date/int_from_string.rb +76 -47
- data/lib/parse_date/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 446afc4299ca93634d6689b20d3a32bdb28460da5627701682733e18bb1f0a16
|
4
|
+
data.tar.gz: b5f6bbceab8542cc3c977c1c1ea8a4e250f82a9ae3a1eed2b335050067145703
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 052f5d35a64c52f5bd74af2e487b70c8b03c37252cce33554aa3e7d6ba8141d9f0edd7c719aa487dbddb2e465b76d2d1ab222f057e68c7e6fa322eef313a35e6
|
7
|
+
data.tar.gz: 880e66962d42d0c3f2824510a94edcf9256cc6ba8298815721a2a720e8f9ef2158793820ff71fc3a22a0ad0f3397db3f8e2361753ee661494ed1371117562eb7
|
data/.rubocop_todo.yml
CHANGED
@@ -1,26 +1,32 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2019-10-
|
3
|
+
# on 2019-10-09 15:35:45 -0700 using RuboCop version 0.74.0.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
+
# Offense count: 2
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
Lint/StringConversionInInterpolation:
|
12
|
+
Exclude:
|
13
|
+
- 'lib/parse_date/int_from_string.rb'
|
14
|
+
|
9
15
|
# Offense count: 3
|
10
16
|
Metrics/AbcSize:
|
11
|
-
Max:
|
17
|
+
Max: 18
|
12
18
|
|
13
|
-
# Offense count:
|
19
|
+
# Offense count: 4
|
14
20
|
# Configuration parameters: CountComments, ExcludedMethods.
|
15
21
|
# ExcludedMethods: refine
|
16
22
|
Metrics/BlockLength:
|
17
|
-
Max:
|
23
|
+
Max: 561
|
18
24
|
|
19
25
|
# Offense count: 3
|
20
26
|
Metrics/CyclomaticComplexity:
|
21
|
-
Max:
|
27
|
+
Max: 8
|
22
28
|
|
23
|
-
# Offense count:
|
29
|
+
# Offense count: 1
|
24
30
|
Metrics/PerceivedComplexity:
|
25
31
|
Max: 9
|
26
32
|
|
@@ -54,3 +60,11 @@ Style/RegexpLiteral:
|
|
54
60
|
Style/TrailingCommaInArrayLiteral:
|
55
61
|
Exclude:
|
56
62
|
- 'spec/parse_date/int_from_string_spec.rb'
|
63
|
+
|
64
|
+
# Offense count: 1
|
65
|
+
# Cop supports --auto-correct.
|
66
|
+
# Configuration parameters: EnforcedStyleForMultiline.
|
67
|
+
# SupportedStylesForMultiline: comma, consistent_comma, no_comma
|
68
|
+
Style/TrailingCommaInHashLiteral:
|
69
|
+
Exclude:
|
70
|
+
- 'spec/parse_date/int_from_string_spec.rb'
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
[![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/parse_date.svg)](https://badge.fury.io/rb/parse_date)
|
2
2
|
[![Build Status](https://travis-ci.org/sul-dlss/parse_date.svg?branch=master)](https://travis-ci.org/sul-dlss/parse_date)
|
3
3
|
[![Maintainability](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/maintainability)](https://codeclimate.com/github/sul-dlss/parse_date/maintainability)
|
4
4
|
[![Test Coverage](https://api.codeclimate.com/v1/badges/2d006b4ccb3100434f4a/test_coverage)](https://codeclimate.com/github/sul-dlss/parse_date/test_coverage)
|
@@ -30,14 +30,25 @@ ParseDate has class methods for date string parsing.
|
|
30
30
|
```
|
31
31
|
require 'parse_date'
|
32
32
|
|
33
|
-
ParseDate.
|
34
|
-
ParseDate.
|
35
|
-
ParseDate.
|
36
|
-
ParseDate.
|
37
|
-
ParseDate.
|
38
|
-
ParseDate.
|
39
|
-
ParseDate.
|
40
|
-
ParseDate.
|
33
|
+
ParseDate.earliest_year('12/25/00') # 2000
|
34
|
+
ParseDate.earliest_year('5-1-21') # 1921
|
35
|
+
ParseDate.earliest_year('1666 B.C.') # -1666
|
36
|
+
ParseDate.earliest_year('-914') # -914
|
37
|
+
ParseDate.earliest_year('[c1926]') # 1926
|
38
|
+
ParseDate.earliest_year('ca. 1558') # 1558
|
39
|
+
ParseDate.earliest_year('195-') # 1950
|
40
|
+
ParseDate.earliest_year('199u') # 1990
|
41
|
+
ParseDate.earliest_year('197?') # 1970
|
42
|
+
ParseDate.earliest_year('196x') # 1960
|
43
|
+
ParseDate.earliest_year('18th century CE') # 1700
|
44
|
+
ParseDate.earliest_year('17uu') # 1700
|
45
|
+
|
46
|
+
ParseDate.latest_year('195-') # 1959
|
47
|
+
ParseDate.latest_year('199u') # 1999
|
48
|
+
ParseDate.latest_year('197?') # 1979
|
49
|
+
ParseDate.latest_year('196x') # 1969
|
50
|
+
ParseDate.latest_year('18th century CE') # 1799
|
51
|
+
ParseDate.latest_year('17uu') # 1799
|
41
52
|
|
42
53
|
ParseDate.year_int_valid?(0) # true
|
43
54
|
ParseDate.year_int_valid?(5) # true
|
data/lib/parse_date.rb
CHANGED
@@ -25,12 +25,15 @@ class ParseDate
|
|
25
25
|
include Singleton
|
26
26
|
extend ParseDate::IntFromString
|
27
27
|
|
28
|
-
# class method delegation
|
29
|
-
def self.
|
30
|
-
ParseDate::IntFromString.
|
28
|
+
# class method delegation
|
29
|
+
def self.earliest_year(orig_date_str)
|
30
|
+
ParseDate::IntFromString.earliest_year(orig_date_str)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.latest_year(orig_date_str)
|
34
|
+
ParseDate::IntFromString.latest_year(orig_date_str)
|
31
35
|
end
|
32
36
|
|
33
|
-
# class method delegation for ParseDate.year_int_valid?
|
34
37
|
def self.year_int_valid?(orig_date_str)
|
35
38
|
ParseDate::IntFromString.year_int_valid?(orig_date_str)
|
36
39
|
end
|
@@ -7,31 +7,63 @@ class ParseDate
|
|
7
7
|
# Parse (Year) Integers from Date Strings
|
8
8
|
module IntFromString
|
9
9
|
|
10
|
-
#
|
10
|
+
# earliest year as Integer if we can parse one from orig_date_str
|
11
|
+
# e.g. if 17uu, result is 1700
|
11
12
|
# NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
|
12
13
|
# found in our actual date strings in stanford-mods records), then
|
13
14
|
# we use 20 as century digits unless it is greater than current year:
|
14
15
|
# 1/1/17 -> 2017
|
15
16
|
# 1/1/27 -> 1927
|
16
17
|
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
17
|
-
def self.
|
18
|
+
def self.earliest_year(orig_date_str)
|
18
19
|
return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
|
19
20
|
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
20
|
-
return ParseDate.send(:
|
21
|
+
return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
|
21
22
|
|
22
|
-
result = ParseDate.send(:
|
23
|
-
result ||= ParseDate.send(:
|
24
|
-
result ||= ParseDate.send(:
|
25
|
-
result ||= ParseDate.send(:
|
23
|
+
result = ParseDate.send(:first_four_digits, orig_date_str)
|
24
|
+
result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
|
25
|
+
result ||= ParseDate.send(:first_year_for_decade, orig_date_str) # 19xx or 20xx
|
26
|
+
result ||= ParseDate.send(:first_year_for_century, orig_date_str)
|
27
|
+
result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
|
26
28
|
unless result
|
27
29
|
# try removing brackets between digits in case we have 169[5] or [18]91
|
28
30
|
no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
|
29
|
-
return
|
31
|
+
return earliest_year(no_brackets) if no_brackets
|
30
32
|
end
|
31
33
|
result.to_i if result && year_int_valid?(result.to_i)
|
32
34
|
end
|
33
35
|
|
34
|
-
#
|
36
|
+
# latest year as Integer if we can parse one from orig_date_str
|
37
|
+
# e.g. if 17uu, result is 1799
|
38
|
+
# NOTE: if we have a x/x/yy or x-x-yy pattern (the only 2 digit year patterns
|
39
|
+
# found in our actual date strings in stanford-mods records), then
|
40
|
+
# we use 20 as century digits unless it is greater than current year:
|
41
|
+
# 1/1/17 -> 2017
|
42
|
+
# 1/1/27 -> 1927
|
43
|
+
# @return [Integer, nil] Integer year if we could parse one, nil otherwise
|
44
|
+
def self.latest_year(orig_date_str)
|
45
|
+
return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
|
46
|
+
|
47
|
+
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
48
|
+
# NOTE: may want to parse for last occurence of 4 consecutive digits
|
49
|
+
return ParseDate.send(:year_int_for_bc, orig_date_str) if orig_date_str.match(BC_REGEX)
|
50
|
+
|
51
|
+
# NOTE: may want to parse for last occurence of 4 consecutive digits
|
52
|
+
result = ParseDate.send(:first_four_digits, orig_date_str)
|
53
|
+
result ||= ParseDate.send(:year_from_mm_dd_yy, orig_date_str)
|
54
|
+
result ||= ParseDate.send(:last_year_for_decade, orig_date_str) # 19xx or 20xx
|
55
|
+
# NOTE: may want to parse for last occurence of consecutive digits
|
56
|
+
result ||= ParseDate.send(:last_year_for_century, orig_date_str)
|
57
|
+
result ||= ParseDate.send(:year_for_early_numeric, orig_date_str)
|
58
|
+
unless result
|
59
|
+
# try removing brackets between digits in case we have 169[5] or [18]91
|
60
|
+
no_brackets = ParseDate.send(:remove_brackets, orig_date_str)
|
61
|
+
return earliest_year(no_brackets) if no_brackets
|
62
|
+
end
|
63
|
+
result.to_i if result && year_int_valid?(result.to_i)
|
64
|
+
end
|
65
|
+
|
66
|
+
# true if the year is between -999 and (current year + 2)
|
35
67
|
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
|
36
68
|
def self.year_int_valid?(year)
|
37
69
|
return false unless year.is_a? Integer
|
@@ -41,16 +73,6 @@ class ParseDate
|
|
41
73
|
|
42
74
|
protected
|
43
75
|
|
44
|
-
# get String sortable value year if we can parse date_str to get a year.
|
45
|
-
# @return [String, nil] String sortable year if we could parse one, nil otherwise
|
46
|
-
# note that these values must *lexically* sort to create a chronological sort.
|
47
|
-
def sortable_year_for_yyyy_or_yy(orig_date_str)
|
48
|
-
# most date strings have a four digit year
|
49
|
-
result = ParseDate.sortable_year_for_yyyy(orig_date_str)
|
50
|
-
result ||= ParseDate.sortable_year_for_yy(orig_date_str) # 19xx or 20xx
|
51
|
-
result
|
52
|
-
end
|
53
|
-
|
54
76
|
BRACKETS_BETWEEN_DIGITS_REXEXP = Regexp.new('\d[' + Regexp.escape('[]') + ']\d')
|
55
77
|
|
56
78
|
# removes brackets between digits such as 169[5] or [18]91
|
@@ -60,18 +82,18 @@ class ParseDate
|
|
60
82
|
|
61
83
|
# looks for 4 consecutive digits in orig_date_str and returns first occurrence if found
|
62
84
|
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str has yyyy, nil otherwise
|
63
|
-
def
|
85
|
+
def first_four_digits(orig_date_str)
|
64
86
|
matches = orig_date_str.match(/\d{4}/) if orig_date_str
|
65
87
|
matches&.to_s
|
66
88
|
end
|
67
89
|
|
68
90
|
# returns 4 digit year as String if we have a x/x/yy or x-x-yy pattern
|
69
|
-
# note that these are the only 2 digit year patterns found in
|
91
|
+
# note that these are the only 2 digit year patterns found in stanford-mods date fields
|
70
92
|
# we use 20 as century digits unless it is greater than current year:
|
71
93
|
# 1/1/17 -> 2017
|
72
94
|
# 1/1/27 -> 1927
|
73
95
|
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
|
74
|
-
def
|
96
|
+
def year_from_mm_dd_yy(orig_date_str)
|
75
97
|
return unless orig_date_str
|
76
98
|
|
77
99
|
slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
|
@@ -89,57 +111,64 @@ class ParseDate
|
|
89
111
|
|
90
112
|
DECADE_4CHAR_REGEXP = Regexp.new('(^|\D)\d{3}[u\-?x]')
|
91
113
|
|
92
|
-
#
|
114
|
+
# first year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
93
115
|
# note that these are the only decade patterns found in our actual date strings in MODS records
|
94
116
|
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
|
95
|
-
def
|
117
|
+
def first_year_for_decade(orig_date_str)
|
96
118
|
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
97
119
|
changed_to_zero = decade_matches.to_s.tr('u\-?x', '0') if decade_matches
|
98
|
-
ParseDate.
|
120
|
+
ParseDate.first_four_digits(changed_to_zero) if changed_to_zero
|
121
|
+
end
|
122
|
+
|
123
|
+
# last year of decade (as String) if we have: yyyu, yyy-, yyy? or yyyx pattern
|
124
|
+
# note that these are the only decade patterns found in our actual date strings in MODS records
|
125
|
+
# @return [String, nil] 4 digit year (e.g. 1860, 1950) if orig_date_str matches pattern, nil otherwise
|
126
|
+
def last_year_for_decade(orig_date_str)
|
127
|
+
decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
|
128
|
+
changed_to_nine = decade_matches.to_s.tr('u\-?x', '9') if decade_matches
|
129
|
+
ParseDate.first_four_digits(changed_to_nine) if changed_to_nine
|
99
130
|
end
|
100
131
|
|
101
132
|
CENTURY_WORD_REGEXP = Regexp.new('(\d{1,2}).*century')
|
102
133
|
CENTURY_4CHAR_REGEXP = Regexp.new('(\d{1,2})[u\-]{2}([^u\-]|$)')
|
103
134
|
|
104
|
-
#
|
135
|
+
# first year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
|
105
136
|
# note that these are the only century patterns found in our actual date strings in MODS records
|
106
137
|
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
107
|
-
def
|
138
|
+
def first_year_for_century(orig_date_str)
|
108
139
|
return unless orig_date_str
|
109
140
|
return if orig_date_str =~ /B\.C\./
|
141
|
+
return "#{Regexp.last_match(1)}00" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
142
|
+
return "#{(Regexp.last_match(1).to_i - 1).to_s}00" if orig_date_str.match(CENTURY_WORD_REGEXP)
|
143
|
+
end
|
110
144
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
119
|
-
return unless century_str_matches
|
145
|
+
# last year of century (as String) if we have: yyuu, yy--, yy--? or xxth century pattern
|
146
|
+
# note that these are the only century patterns found in our actual date strings in MODS records
|
147
|
+
# @return [String, nil] yy00 if orig_date_str matches pattern, nil otherwise; also nil if B.C. in pattern
|
148
|
+
def last_year_for_century(orig_date_str)
|
149
|
+
return unless orig_date_str
|
150
|
+
return if orig_date_str =~ /B\.C\./
|
151
|
+
return "#{Regexp.last_match(1)}99" if orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
120
152
|
|
121
|
-
|
122
|
-
return
|
123
|
-
return '0' + yy + '00' if yy.length == 1
|
153
|
+
# TODO: do we want to look for the very last match of digits before "century" instead of the first one?
|
154
|
+
return "#{(Regexp.last_match(1).to_i - 1).to_s}99" if orig_date_str.match(CENTURY_WORD_REGEXP)
|
124
155
|
end
|
125
156
|
|
126
157
|
BC_REGEX = Regexp.new('(\d{1,4}).*' + Regexp.escape('B.C.'))
|
127
158
|
|
128
|
-
#
|
159
|
+
# Integer sortable value for B.C. if we have B.C. pattern
|
129
160
|
# @return [Integer, nil] Integer sortable -ddd if B.C. in pattern; nil otherwise
|
130
|
-
def
|
161
|
+
def year_int_for_bc(orig_date_str)
|
131
162
|
bc_matches = orig_date_str.match(BC_REGEX) if orig_date_str
|
132
163
|
"-#{Regexp.last_match(1)}".to_i if bc_matches
|
133
164
|
end
|
134
165
|
|
135
166
|
EARLY_NUMERIC = Regexp.new('^\-?\d{1,3}$')
|
136
167
|
|
137
|
-
#
|
138
|
-
# @return [
|
139
|
-
def
|
140
|
-
|
141
|
-
|
142
|
-
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
168
|
+
# year if orig_date_str contains yyy, yy, y, -y, -yy, -yyy, -yyyy
|
169
|
+
# @return [String, nil] -ddd if orig_date_str matches pattern; nil otherwise
|
170
|
+
def year_for_early_numeric(orig_date_str)
|
171
|
+
orig_date_str if orig_date_str.match(EARLY_NUMERIC) || orig_date_str =~ /^-\d{4}$/
|
143
172
|
end
|
144
173
|
end
|
145
174
|
end
|
data/lib/parse_date/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naomi Dushay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: zeitwerk
|