date-formats 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/date-formats.rb +13 -27
- data/lib/date-formats/date.rb +214 -120
- data/lib/date-formats/formats.rb +35 -60
- data/lib/date-formats/reader.rb +18 -31
- data/lib/date-formats/source.rb +23 -13
- data/lib/date-formats/version.rb +2 -2
- data/test/test_date.rb +34 -35
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81aad612afdba8f85e731f97c1bc119499a8211d
|
4
|
+
data.tar.gz: e013e9bdac91e1938149d395b946c535b2c75714
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 718656e27eab527b211337e3a9fef08030017d2b31f7c59fa89b2292df61275a8b9f1ca247e5f9a5eacad03782d83890cd1a37f5d2d6c5ff541ce0ef67c579bc
|
7
|
+
data.tar.gz: c9470e38606e1b909230e8e62187cb1ba2f29f4bd4d99a48fc8125445d80b08e7ac37e37c7d08e463739d7bca767526ab535e0f0016d1f8abc546e16a55cb551
|
data/lib/date-formats.rb
CHANGED
@@ -11,40 +11,26 @@ require 'logutils'
|
|
11
11
|
###
|
12
12
|
# our own code
|
13
13
|
require 'date-formats/version' # let version always go first
|
14
|
-
require 'date-formats/source'
|
15
14
|
require 'date-formats/reader'
|
15
|
+
require 'date-formats/source'
|
16
16
|
|
17
17
|
|
18
18
|
module DateFormats
|
19
19
|
|
20
|
+
#############
|
21
|
+
# helpers for building format regex patterns
|
22
|
+
MONTH_EN = build_names( MONTH_NAMES[:en] )
|
23
|
+
# e.g. Jan|Feb|March|Mar|April|Apr|May|June|Jun|...
|
24
|
+
DAY_EN = build_names( DAY_NAMES[:en] )
|
25
|
+
# e.g.
|
20
26
|
|
21
|
-
|
22
|
-
|
23
|
-
MONTH_EN = build_re( MONTH_EN_LINES )
|
24
|
-
|
25
|
-
WEEKDAY_EN_LINES = Reader.parse_weekday( Source::WEEKDAY_EN )
|
26
|
-
WEEKDAY_EN = build_re( WEEKDAY_EN_LINES )
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
MONTH_FR_LINES = Reader.parse_month( Source::MONTH_FR )
|
31
|
-
MONTH_FR_TO_MM = build_map( MONTH_FR_LINES )
|
32
|
-
MONTH_FR = build_re( MONTH_FR_LINES )
|
33
|
-
|
34
|
-
WEEKDAY_FR_LINES = Reader.parse_weekday( Source::WEEKDAY_FR )
|
35
|
-
WEEKDAY_FR = build_re( WEEKDAY_FR_LINES )
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
MONTH_ES_LINES = Reader.parse_month( Source::MONTH_ES )
|
40
|
-
MONTH_ES_TO_MM = build_map( MONTH_ES_LINES )
|
41
|
-
MONTH_ES = build_re( MONTH_ES_LINES )
|
42
|
-
|
43
|
-
|
27
|
+
MONTH_FR = build_names( MONTH_NAMES[:fr] )
|
28
|
+
DAY_FR = build_names( DAY_NAMES[:fr] )
|
44
29
|
|
45
|
-
|
46
|
-
|
47
|
-
MONTH_DE
|
30
|
+
MONTH_ES = build_names( MONTH_NAMES[:es] )
|
31
|
+
MONTH_PT = build_names( MONTH_NAMES[:pt] )
|
32
|
+
MONTH_DE = build_names( MONTH_NAMES[:de] )
|
33
|
+
MONTH_IT = build_names( MONTH_NAMES[:it] )
|
48
34
|
|
49
35
|
end # module DateFormats
|
50
36
|
|
data/lib/date-formats/date.rb
CHANGED
@@ -1,154 +1,248 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
|
3
4
|
module DateFormats
|
4
5
|
|
5
6
|
|
6
|
-
|
7
|
+
def self.lang
|
8
|
+
@@lang ||= :en ## defaults to english (:en)
|
9
|
+
end
|
10
|
+
def self.lang=( value )
|
11
|
+
@@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
|
12
|
+
end
|
7
13
|
|
8
|
-
private
|
9
|
-
def calc_year( month, day, start: ) ## note: start required param for now on!!!
|
10
14
|
|
11
|
-
|
15
|
+
def self.parser( lang: ) ## find parser
|
16
|
+
lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
|
12
17
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
else
|
17
|
-
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
18
|
-
start.year+1
|
19
|
-
end
|
18
|
+
## note: cache all "built-in" lang versions (e.g. formats == nil)
|
19
|
+
@@parser ||= {}
|
20
|
+
parser = @@parser[ lang ] ||= DateParser.new( lang: lang )
|
20
21
|
end
|
21
22
|
|
23
|
+
def self.parse( line,
|
24
|
+
lang: self.class.lang,
|
25
|
+
start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
|
26
|
+
)
|
27
|
+
parser( lang: lang ).parse( line, start: start )
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.find!( line,
|
31
|
+
lang: self.class.lang,
|
32
|
+
start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
|
33
|
+
)
|
34
|
+
parser( lang: lang ).find!( line, start: start )
|
35
|
+
end
|
22
36
|
|
23
|
-
def parse_date_time( match_data, start: )
|
24
37
|
|
25
|
-
# convert regex match_data captures to hash
|
26
|
-
# - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
|
27
|
-
h = {}
|
28
|
-
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
29
|
-
match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
30
38
|
|
31
|
-
|
32
|
-
## pp h
|
33
|
-
logger.debug " [parse_date_time] hash: >#{h.inspect}<"
|
39
|
+
class DateParser
|
34
40
|
|
35
|
-
|
36
|
-
h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es]
|
37
|
-
h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr]
|
38
|
-
h[ :month ] = MONTH_DE_TO_MM[ h[:month_de] ] if h[:month_de]
|
39
|
-
h[ :month ] = MONTH_IT_TO_MM[ h[:month_it] ] if h[:month_it]
|
40
|
-
h[ :month ] = MONTH_PT_TO_MM[ h[:month_pt] ] if h[:month_pt]
|
41
|
+
include LogUtils::Logging
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
def initialize( lang:,
|
44
|
+
formats: nil, month_names: nil, day_names: nil
|
45
|
+
)
|
46
|
+
@lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
+
if formats
|
49
|
+
@formats = formats
|
50
|
+
else
|
51
|
+
@formats = FORMATS[ @lang ]
|
52
|
+
if @formats
|
53
|
+
month_names = MONTH_NAMES[ @lang ]
|
54
|
+
day_names = DAY_NAMES[ @lang ]
|
55
|
+
else
|
56
|
+
## fallback to english if lang not available
|
57
|
+
## todo/fix: add/issue warning!!!!!
|
58
|
+
@formats = FORMATS[ :en ]
|
59
|
+
month_names = MONTH_NAMES[ :en ]
|
60
|
+
day_names = DAY_NAMES[ :en ]
|
61
|
+
end
|
62
|
+
end
|
48
63
|
|
49
|
-
|
50
|
-
|
64
|
+
## convert month_names and day_names to map if present
|
65
|
+
@month_names = month_names ? build_map( month_names ) : nil
|
66
|
+
@day_names = day_names ? build_map( day_names ) : nil
|
67
|
+
end
|
51
68
|
|
52
|
-
DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
53
|
-
end
|
54
69
|
|
55
|
-
|
70
|
+
def parse( line, start: )
|
71
|
+
date = nil
|
72
|
+
@formats.each do |format|
|
73
|
+
re = format[0]
|
74
|
+
m = re.match( line )
|
75
|
+
if m
|
76
|
+
date = parse_matchdata( m, start: start )
|
77
|
+
break
|
78
|
+
end
|
79
|
+
# no match; continue; try next regex pattern
|
80
|
+
end
|
56
81
|
|
82
|
+
## todo/fix - raise ArgumentError - invalid date; no format match found
|
83
|
+
date # note: nil if no match found
|
84
|
+
end
|
57
85
|
|
58
86
|
|
59
|
-
|
87
|
+
def find!( line, start: )
|
88
|
+
# fix: use more lookahead for all required trailing spaces!!!!!
|
89
|
+
# fix: use <name capturing group> for month,day,year etc.!!!
|
90
|
+
|
91
|
+
#
|
92
|
+
# fix: !!!!
|
93
|
+
# date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
|
94
|
+
# fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
|
95
|
+
#
|
96
|
+
|
97
|
+
date = nil
|
98
|
+
@formats.each do |format|
|
99
|
+
re = format[0]
|
100
|
+
tag = format[1]
|
101
|
+
m = re.match( line )
|
102
|
+
if m
|
103
|
+
date = parse_matchdata( m, start: start )
|
104
|
+
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
105
|
+
## fix: use md.begin(0), md.end(0)
|
106
|
+
line.sub!( m[0], tag )
|
107
|
+
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
108
|
+
break
|
109
|
+
end
|
110
|
+
# no match; continue; try next regex pattern
|
111
|
+
end
|
112
|
+
date # note: nil if no match found
|
113
|
+
end
|
60
114
|
|
61
|
-
|
115
|
+
private
|
116
|
+
def calc_year( month, day, start: )
|
62
117
|
|
118
|
+
logger.debug " [calc_year] ????-#{month}-#{day} -- start: #{start}"
|
63
119
|
|
64
|
-
|
65
|
-
|
120
|
+
if month >= start.month
|
121
|
+
# assume same year as start_at event (e.g. 2013 for 2013/14 season)
|
122
|
+
start.year
|
123
|
+
else
|
124
|
+
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
125
|
+
start.year+1
|
126
|
+
end
|
127
|
+
end
|
66
128
|
|
67
129
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
130
|
+
def parse_matchdata( m, start: )
|
131
|
+
# convert regex match_data captures to hash
|
132
|
+
# - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
|
133
|
+
h = {}
|
134
|
+
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
135
|
+
m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
136
|
+
|
137
|
+
## puts "[parse_date_time] match_data:"
|
138
|
+
## pp h
|
139
|
+
logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
|
140
|
+
|
141
|
+
if h[:month_name]
|
142
|
+
## todo/fix: issue error if no month names defined !!!
|
143
|
+
if @month_names
|
144
|
+
h[ :month ] = @month_names[ h[:month_name] ]
|
145
|
+
else
|
146
|
+
## todo/fix: change to ArgumentError( "invalid date; ")
|
147
|
+
puts "** !!! ERROR !!! - no month names defined for lang #{@lang}; cannot match:"
|
148
|
+
pp m
|
149
|
+
exit 1
|
150
|
+
end
|
151
|
+
end
|
74
152
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
@formats.each do |format|
|
87
|
-
tag = format[0]
|
88
|
-
pattern = format[1]
|
89
|
-
m=pattern.match( line )
|
90
|
-
if m
|
91
|
-
date = parse_date_time( m, start: start_at )
|
92
|
-
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
93
|
-
## fix: use md.begin(0), md.end(0)
|
94
|
-
line.sub!( m[0], tag )
|
95
|
-
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
96
|
-
return date
|
153
|
+
if h[:day_name]
|
154
|
+
if @day_names
|
155
|
+
## note: use cwday in ruby date to get days from 1-7 (Monday (1) to Sunday (7))
|
156
|
+
## wday gives you 0-6 (Sunday (0), Monday (1) to Saturday (6))
|
157
|
+
h[ :cwday ] = @day_names[ h[:day_name ] ]
|
158
|
+
else
|
159
|
+
## todo/fix: change to ArgumentError( "invalid date; ")
|
160
|
+
puts "** !!! ERROR !!! - no day names defined for lang #{@lang}; cannot match:"
|
161
|
+
pp m
|
162
|
+
exit 1
|
163
|
+
end
|
97
164
|
end
|
98
|
-
# no match; continue; try next pattern
|
99
|
-
end
|
100
165
|
|
101
|
-
|
102
|
-
|
166
|
+
month = h[:month]
|
167
|
+
day = h[:day]
|
168
|
+
year = h[:year] || calc_year( month.to_i, day.to_i, start: start ).to_s
|
169
|
+
|
170
|
+
if h[:hours] || h[:minutes] ## check time (hours or minutes) is present (otherwise asume just Date and NOT DateTime)
|
171
|
+
hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
|
172
|
+
minutes = h[:minutes] || '00'
|
103
173
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
def find!( line, start_at: ) ## todo/fix: change start_at to start only!!!
|
136
|
-
# fix: use more lookahead for all required trailing spaces!!!!!
|
137
|
-
# fix: use <name capturing group> for month,day,year etc.!!!
|
138
|
-
|
139
|
-
tag = '[EN_MONTH_DD]'
|
140
|
-
pattern = EN__MONTH_DD__DATE_RE
|
141
|
-
m = pattern.match( line )
|
142
|
-
if m
|
143
|
-
date = parse_date_time( m, start: start_at )
|
144
|
-
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
145
|
-
## fix: use md.begin(0), md.end(0)
|
146
|
-
line.sub!( m[0], tag )
|
147
|
-
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
148
|
-
return date
|
174
|
+
value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
|
175
|
+
logger.debug " datetime: >#{value}<"
|
176
|
+
|
177
|
+
date = DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
178
|
+
else
|
179
|
+
value = '%d-%02d-%02d' % [year.to_i, month.to_i, day.to_i]
|
180
|
+
logger.debug " date: >#{value}<"
|
181
|
+
|
182
|
+
date = Date.strptime( value, '%Y-%m-%d' )
|
183
|
+
end
|
184
|
+
|
185
|
+
## check/assert cwday if present!!!!
|
186
|
+
date
|
187
|
+
end # method parse
|
188
|
+
|
189
|
+
##################
|
190
|
+
# helpers
|
191
|
+
private
|
192
|
+
def build_map( lines )
|
193
|
+
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
194
|
+
## note: index is a string too
|
195
|
+
## {"January" => "1", "Jan" => "1",
|
196
|
+
## "February" => "2", "Feb" => "2",
|
197
|
+
## "March" => "3", "Mar" => "3",
|
198
|
+
## "April" => "4", "Apr" => "4",
|
199
|
+
## "May" => "5",
|
200
|
+
## "June" => "6", "Jun" => "6", ...
|
201
|
+
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
202
|
+
line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
203
|
+
h
|
204
|
+
end
|
149
205
|
end
|
150
|
-
|
151
|
-
|
152
|
-
|
206
|
+
end # class DateParser
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
class RsssfDateParser < DateParser
|
212
|
+
|
213
|
+
MONTH_NAMES = DateFormats.parse_month( <<TXT )
|
214
|
+
Jan
|
215
|
+
Feb
|
216
|
+
March Mar
|
217
|
+
April Apr
|
218
|
+
May
|
219
|
+
June Jun
|
220
|
+
July Jul
|
221
|
+
Aug
|
222
|
+
Sept Sep
|
223
|
+
Oct
|
224
|
+
Nov
|
225
|
+
Dec
|
226
|
+
TXT
|
227
|
+
|
228
|
+
MONTH_EN = DateFormats.build_names( MONTH_NAMES ) ## re helper e.g. Jan|Feb|March|Mar|...
|
229
|
+
|
230
|
+
## e.g.
|
231
|
+
## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
|
232
|
+
##
|
233
|
+
## check add \b at the beginning and end - why?? why not?? working??
|
234
|
+
EN__MONTH_DD__DATE_RE = /\[
|
235
|
+
(?<month_name>#{MONTH_EN})
|
236
|
+
\s
|
237
|
+
(?<day>\d{1,2})
|
238
|
+
\]/x
|
239
|
+
|
240
|
+
def initialize
|
241
|
+
super( lang: 'en',
|
242
|
+
formats: [[EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]']],
|
243
|
+
month_names: MONTH_NAMES
|
244
|
+
)
|
245
|
+
end
|
246
|
+
end ## class RsssfDateParser
|
153
247
|
|
154
248
|
end # module DateFormats
|
data/lib/date-formats/formats.rb
CHANGED
@@ -18,7 +18,7 @@ DB__DATE_TIME_RE = /\b
|
|
18
18
|
(?<minutes>\d{2})
|
19
19
|
\b/x
|
20
20
|
|
21
|
-
# e.g. 2012-09-14
|
21
|
+
# e.g. 2012-09-14 => YYYY-MM-DD
|
22
22
|
# note: allow 2012-9-3 e.g. no leading zero required
|
23
23
|
# regex_db2
|
24
24
|
DB__DATE_RE = /\b
|
@@ -61,7 +61,7 @@ DD_MM__DATE_TIME_RE = /\b
|
|
61
61
|
(?<minutes>\d{2})
|
62
62
|
\b/x
|
63
63
|
|
64
|
-
# e.g. 14.09.2012 => DD.MM.YYYY
|
64
|
+
# e.g. 14.09.2012 => DD.MM.YYYY
|
65
65
|
# regex_de3
|
66
66
|
DD_MM_YYYY__DATE_RE = /\b
|
67
67
|
(?<day>\d{1,2})
|
@@ -71,7 +71,7 @@ DD_MM_YYYY__DATE_RE = /\b
|
|
71
71
|
(?<year>\d{4})
|
72
72
|
\b/x
|
73
73
|
|
74
|
-
# e.g. 14.09. => DD.MM. w/ implied year
|
74
|
+
# e.g. 14.09. => DD.MM. w/ implied year
|
75
75
|
# note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
|
76
76
|
# note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
|
77
77
|
# regex_de4 (use lookahead assert)
|
@@ -89,7 +89,7 @@ DD_MM__DATE_RE = /\b
|
|
89
89
|
EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
|
90
90
|
(?<day>\d{1,2})
|
91
91
|
\s
|
92
|
-
(?<
|
92
|
+
(?<month_name>#{MONTH_EN})
|
93
93
|
\s
|
94
94
|
(?<year>\d{4})
|
95
95
|
\s+
|
@@ -98,22 +98,16 @@ EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
|
|
98
98
|
(?<minutes>\d{2})
|
99
99
|
\b/x
|
100
100
|
|
101
|
-
|
102
|
-
# fix: pass in lang (e.g. en or es)
|
103
|
-
# only process format for lang plus fallback to en?
|
104
|
-
# e.g. EN__DD_MONTH and ES__DD_MONTH depend on order for match (first listed will match)
|
105
|
-
|
106
|
-
# e.g. 12 May => D|DD.MMM w/ implied year and implied hours
|
101
|
+
# e.g. 12 May => D|DD.MMM w/ implied year
|
107
102
|
EN__DD_MONTH__DATE_RE = /\b
|
108
103
|
(?<day>\d{1,2})
|
109
104
|
\s
|
110
|
-
(?<
|
105
|
+
(?<month_name>#{MONTH_EN})
|
111
106
|
\b/x
|
112
107
|
|
113
|
-
|
114
108
|
# e.g. Jun/12 2011 14:00
|
115
109
|
EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
|
116
|
-
(?<
|
110
|
+
(?<month_name>#{MONTH_EN})
|
117
111
|
\/
|
118
112
|
(?<day>\d{1,2})
|
119
113
|
\s
|
@@ -126,7 +120,7 @@ EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
|
|
126
120
|
|
127
121
|
# e.g. Jun/12 14:00 w/ implied year H|HH:MM
|
128
122
|
EN__MONTH_DD__DATE_TIME_RE = /\b
|
129
|
-
(?<
|
123
|
+
(?<month_name>#{MONTH_EN})
|
130
124
|
\/
|
131
125
|
(?<day>\d{1,2})
|
132
126
|
\s+
|
@@ -135,9 +129,9 @@ EN__MONTH_DD__DATE_TIME_RE = /\b
|
|
135
129
|
(?<minutes>\d{2})
|
136
130
|
\b/x
|
137
131
|
|
138
|
-
# e.g. Jun/12 2013
|
132
|
+
# e.g. Jun/12 2013
|
139
133
|
EN__MONTH_DD_YYYY__DATE_RE = /\b
|
140
|
-
(?<
|
134
|
+
(?<month_name>#{MONTH_EN})
|
141
135
|
\/
|
142
136
|
(?<day>\d{1,2})
|
143
137
|
\s
|
@@ -150,85 +144,66 @@ EN__MONTH_DD_YYYY__DATE_RE = /\b
|
|
150
144
|
# fix: remove space again for now - and use simple en date reader or something!!!
|
151
145
|
## was [\/ ] changed back to \/
|
152
146
|
EN__MONTH_DD__DATE_RE = /\b
|
153
|
-
(?<
|
147
|
+
(?<month_name>#{MONTH_EN})
|
154
148
|
\/
|
155
149
|
(?<day>\d{1,2})
|
156
150
|
\b/x
|
157
151
|
|
158
152
|
|
159
|
-
# e.g. 12 Ene w/ implied year
|
153
|
+
# e.g. 12 Ene w/ implied year
|
160
154
|
ES__DD_MONTH__DATE_RE = /\b
|
161
155
|
(?<day>\d{1,2})
|
162
156
|
\s
|
163
|
-
(?<
|
157
|
+
(?<month_name>#{MONTH_ES})
|
164
158
|
\b/x
|
165
159
|
|
166
160
|
# e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août]
|
167
161
|
### note: do NOT consume [] in regex (use lookahead assert)
|
168
|
-
|
169
|
-
(
|
162
|
+
FR__DAY_DD_MONTH__DATE_RE = /\b
|
163
|
+
(?<day_name>#{DAY_FR})
|
170
164
|
\s+
|
171
165
|
(?<day>\d{1,2})
|
172
166
|
\.? # note: make dot optional
|
173
167
|
\s+
|
174
|
-
(?<
|
168
|
+
(?<month_name>#{MONTH_FR})
|
175
169
|
(?=\s+|$|[\]])/x ## note: allow end-of-string/line too
|
176
170
|
|
177
171
|
|
178
172
|
|
179
|
-
|
180
|
-
# map
|
181
|
-
## todo/fix: remove (move to attic)??? always use lang specific - why? why not?
|
182
|
-
FORMATS_ALL = [
|
183
|
-
[ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
|
184
|
-
[ '[YYYY_MM_DD]', DB__DATE_RE ],
|
185
|
-
[ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
|
186
|
-
[ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
|
187
|
-
[ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
|
188
|
-
[ '[DD_MM]', DD_MM__DATE_RE ],
|
189
|
-
[ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
|
190
|
-
[ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
|
191
|
-
[ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
|
192
|
-
[ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
|
193
|
-
[ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
|
194
|
-
[ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
|
195
|
-
[ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
|
196
|
-
[ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ]
|
197
|
-
]
|
198
|
-
|
173
|
+
#############################################
|
174
|
+
# map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
|
199
175
|
|
200
176
|
FORMATS_BASE = [ ### all numbers (no month names or weekday) - find a better name?
|
201
|
-
[ '[YYYY_MM_DD_hh_mm]'
|
202
|
-
[ '[YYYY_MM_DD]'
|
203
|
-
[ '[DD_MM_YYYY_hh_mm]'
|
204
|
-
[ '[DD_MM_hh_mm]'
|
205
|
-
[ '[DD_MM_YYYY]'
|
206
|
-
[ '[DD_MM]'
|
177
|
+
[ DB__DATE_TIME_RE, '[YYYY_MM_DD_hh_mm]' ],
|
178
|
+
[ DB__DATE_RE, '[YYYY_MM_DD]' ],
|
179
|
+
[ DD_MM_YYYY__DATE_TIME_RE, '[DD_MM_YYYY_hh_mm]' ],
|
180
|
+
[ DD_MM__DATE_TIME_RE, '[DD_MM_hh_mm]' ],
|
181
|
+
[ DD_MM_YYYY__DATE_RE, '[DD_MM_YYYY]' ],
|
182
|
+
[ DD_MM__DATE_RE, '[DD_MM]' ],
|
207
183
|
]
|
208
184
|
|
209
185
|
FORMATS_EN = [
|
210
|
-
[ '[EN_DD_MONTH_YYYY_hh_mm]'
|
211
|
-
[ '[EN_MONTH_DD_YYYY_hh_mm]'
|
212
|
-
[ '[EN_MONTH_DD_hh_mm]'
|
213
|
-
[ '[EN_MONTH_DD_YYYY]'
|
214
|
-
[ '[EN_MONTH_DD]'
|
215
|
-
[ '[EN_DD_MONTH]'
|
186
|
+
[ EN__DD_MONTH_YYYY__DATE_TIME_RE, '[EN_DD_MONTH_YYYY_hh_mm]' ],
|
187
|
+
[ EN__MONTH_DD_YYYY__DATE_TIME_RE, '[EN_MONTH_DD_YYYY_hh_mm]' ],
|
188
|
+
[ EN__MONTH_DD__DATE_TIME_RE, '[EN_MONTH_DD_hh_mm]' ],
|
189
|
+
[ EN__MONTH_DD_YYYY__DATE_RE, '[EN_MONTH_DD_YYYY]' ],
|
190
|
+
[ EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]' ],
|
191
|
+
[ EN__DD_MONTH__DATE_RE, '[EN_DD_MONTH]' ],
|
216
192
|
]
|
217
193
|
|
218
194
|
FORMATS_FR = [
|
219
|
-
[ '[
|
195
|
+
[ FR__DAY_DD_MONTH__DATE_RE, '[FR_DAY_DD_MONTH]' ],
|
220
196
|
]
|
221
197
|
|
222
198
|
FORMATS_ES = [
|
223
|
-
[ '[ES_DD_MONTH]'
|
199
|
+
[ ES__DD_MONTH__DATE_RE, '[ES_DD_MONTH]' ],
|
224
200
|
]
|
225
201
|
|
226
202
|
|
227
203
|
FORMATS = {
|
228
|
-
|
229
|
-
|
230
|
-
|
204
|
+
en: FORMATS_BASE+FORMATS_EN,
|
205
|
+
fr: FORMATS_BASE+FORMATS_FR,
|
206
|
+
es: FORMATS_BASE+FORMATS_ES,
|
231
207
|
}
|
232
208
|
|
233
|
-
|
234
209
|
end # module DateFormats
|
data/lib/date-formats/reader.rb
CHANGED
@@ -34,45 +34,32 @@ class Reader ## todo/check: rename to WordReader or something for easy (re)use
|
|
34
34
|
end
|
35
35
|
lines
|
36
36
|
end # method parse
|
37
|
+
end # class Reader
|
38
|
+
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
lines
|
40
|
+
|
41
|
+
def self.parse_month( txt )
|
42
|
+
lines = Reader.parse( txt )
|
43
|
+
if lines.size != 12
|
44
|
+
puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
|
45
|
+
exit 1
|
45
46
|
end
|
47
|
+
lines
|
48
|
+
end
|
46
49
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
-
lines
|
50
|
+
def self.parse_day( txt )
|
51
|
+
lines = Reader.parse( txt )
|
52
|
+
if lines.size != 7
|
53
|
+
puts "*** !!! ERROR !!! reading day names; got #{lines.size} lines - expected 7"
|
54
|
+
exit 1
|
54
55
|
end
|
55
|
-
|
56
|
+
lines
|
57
|
+
end
|
56
58
|
|
57
59
|
|
58
|
-
def self.
|
60
|
+
def self.build_names( lines )
|
59
61
|
## join all words together into a single string e.g.
|
60
62
|
## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
|
61
63
|
lines.map { |line| line.join('|') }.join('|')
|
62
64
|
end
|
63
|
-
|
64
|
-
def self.build_map( lines )
|
65
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
66
|
-
## note: index is a string too
|
67
|
-
## {"January" => "1", "Jan" => "1",
|
68
|
-
## "February" => "2", "Feb" => "2",
|
69
|
-
## "March" => "3", "Mar" => "3",
|
70
|
-
## "April" => "4", "Apr" => "4",
|
71
|
-
## "May" => "5",
|
72
|
-
## "June" => "6", "Jun" => "6", ...
|
73
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
74
|
-
line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
75
|
-
h
|
76
|
-
end
|
77
|
-
end
|
78
65
|
end # module DateFormats
|
data/lib/date-formats/source.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
module DateFormats
|
2
|
-
module Source
|
3
2
|
|
4
3
|
# todo: make more generic for reuse
|
5
4
|
### fix:
|
6
5
|
## use date/en.txt or en.txt etc. -- why? why not?
|
7
6
|
|
8
|
-
|
7
|
+
## note: always sort lines with longest words, abbrevations first!!!!
|
8
|
+
## todo/fix: add/split into MONTH_NAMES and MONTH_ABBREVS (and DAY_NAMES and DAY_ABBREVS) - why? why not?
|
9
|
+
MONTH_NAMES = {}
|
10
|
+
DAY_NAMES = {}
|
11
|
+
|
12
|
+
|
13
|
+
MONTH_NAMES[:en] = <<TXT
|
9
14
|
January Jan
|
10
15
|
February Feb
|
11
16
|
March Mar
|
@@ -20,11 +25,12 @@ November Nov
|
|
20
25
|
December Dec
|
21
26
|
TXT
|
22
27
|
|
23
|
-
|
28
|
+
|
29
|
+
DAY_NAMES[:en] = <<TXT
|
24
30
|
Monday Mon
|
25
|
-
Tuesday
|
31
|
+
Tuesday Tues Tue Tu
|
26
32
|
Wednesday Wed
|
27
|
-
Thursday
|
33
|
+
Thursday Thurs Thur Thu Th
|
28
34
|
Friday Fri
|
29
35
|
Saturday Sat
|
30
36
|
Sunday Sun
|
@@ -32,7 +38,7 @@ TXT
|
|
32
38
|
|
33
39
|
|
34
40
|
|
35
|
-
|
41
|
+
MONTH_NAMES[:fr] = <<TXT
|
36
42
|
Janvier Janv Jan ## check janv in use??
|
37
43
|
Février Févr Fév ## check fevr in use???
|
38
44
|
Mars Mar
|
@@ -47,7 +53,7 @@ Novembre Nove Nov ## check nove in use??
|
|
47
53
|
Décembre Déce Déc ## check dece in use??
|
48
54
|
TXT
|
49
55
|
|
50
|
-
|
56
|
+
DAY_NAMES[:fr] = <<TXT
|
51
57
|
Lundi Lun L
|
52
58
|
Mardi Mar Ma
|
53
59
|
Mercredi Mer Me
|
@@ -59,7 +65,7 @@ TXT
|
|
59
65
|
|
60
66
|
|
61
67
|
|
62
|
-
|
68
|
+
MONTH_NAMES[:es] = <<TXT
|
63
69
|
Enero Ene
|
64
70
|
Febrero Feb
|
65
71
|
Marzo Mar
|
@@ -74,7 +80,7 @@ Noviembre Nov
|
|
74
80
|
Diciembre Dic
|
75
81
|
TXT
|
76
82
|
|
77
|
-
|
83
|
+
MONTH_NAMES[:de] = <<TXT
|
78
84
|
Jänner Januar Jan Jän # note: in Austria - Jänner; in Deutschland Januar allow both ??
|
79
85
|
Feber Februar Feb
|
80
86
|
März Mär
|
@@ -89,7 +95,7 @@ November Nov
|
|
89
95
|
Dezember Dez
|
90
96
|
TXT
|
91
97
|
|
92
|
-
|
98
|
+
MONTH_NAMES[:it] = <<TXT
|
93
99
|
Gennaio
|
94
100
|
Febbraio
|
95
101
|
Marzo
|
@@ -104,7 +110,7 @@ Novembre
|
|
104
110
|
Dicembre
|
105
111
|
TXT
|
106
112
|
|
107
|
-
|
113
|
+
MONTH_NAMES[:pt] = <<TXT
|
108
114
|
Janeiro
|
109
115
|
Fevereiro
|
110
116
|
Março
|
@@ -119,7 +125,7 @@ Novembro
|
|
119
125
|
Dezembro
|
120
126
|
TXT
|
121
127
|
|
122
|
-
|
128
|
+
MONTH_NAMES[:ro] = <<TXT
|
123
129
|
Ianuarie
|
124
130
|
Februarie
|
125
131
|
Martie
|
@@ -134,5 +140,9 @@ Noiembrie
|
|
134
140
|
Decembrie
|
135
141
|
TXT
|
136
142
|
|
137
|
-
|
143
|
+
############################################
|
144
|
+
## convert (unparsed) text to (parsed) lines with words
|
145
|
+
MONTH_NAMES.each {|k,v| MONTH_NAMES[k] = parse_month(v) }
|
146
|
+
DAY_NAMES.each {|k,v| DAY_NAMES[k] = parse_day(v) }
|
147
|
+
|
138
148
|
end # module DateFormats
|
data/lib/date-formats/version.rb
CHANGED
data/test/test_date.rb
CHANGED
@@ -15,7 +15,7 @@ class TestDate < MiniTest::Test
|
|
15
15
|
[ '21.01.2013 21.30', '2013-01-21 21:30', '[DD_MM_YYYY_hh_mm]' ],
|
16
16
|
[ '26.01.2013', '2013-01-26', '[DD_MM_YYYY]' ],
|
17
17
|
[ '[26.01.2013]', '2013-01-26', '[[DD_MM_YYYY]]' ],
|
18
|
-
[ '[21.1.]', '2013-01-21
|
18
|
+
[ '[21.1.]', '2013-01-21', '[[DD_MM]]' ]
|
19
19
|
]
|
20
20
|
|
21
21
|
assert_dates( data, start: Date.new( 2013, 1, 1 ) )
|
@@ -23,15 +23,15 @@ class TestDate < MiniTest::Test
|
|
23
23
|
|
24
24
|
def test_date_fr
|
25
25
|
data = [
|
26
|
-
[ '[Ven 08. Août]', '2014-08-08' ],
|
27
|
-
[ 'Ven 08. Août', '2014-08-08' ],
|
28
|
-
[ 'Ven 8. Août', '2014-08-08' ],
|
29
|
-
[ '[Sam 9. Août]', '2014-08-09' ],
|
30
|
-
[ '[Dim 10. Août]', '2014-08-10' ],
|
31
|
-
[ '[Sam 31. Janv]', '2015-01-31' ],
|
32
|
-
[ '[Sam 7. Févr]', '2015-02-07' ],
|
33
|
-
[ '[Sam 31. Jan]', '2015-01-31' ],
|
34
|
-
[ '[Sam 7. Fév]', '2015-02-07' ],
|
26
|
+
[ '[Ven 08. Août]', '2014-08-08', '[[FR_DAY_DD_MONTH]]' ],
|
27
|
+
[ 'Ven 08. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
|
28
|
+
[ 'Ven 8. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
|
29
|
+
[ '[Sam 9. Août]', '2014-08-09', '[[FR_DAY_DD_MONTH]]' ],
|
30
|
+
[ '[Dim 10. Août]', '2014-08-10', '[[FR_DAY_DD_MONTH]]' ],
|
31
|
+
[ '[Sam 31. Janv]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
|
32
|
+
[ '[Sam 7. Févr]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
|
33
|
+
[ '[Sam 31. Jan]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
|
34
|
+
[ '[Sam 7. Fév]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
|
35
35
|
]
|
36
36
|
|
37
37
|
assert_dates( data, start: Date.new( 2014, 8, 1 ), lang: 'fr' )
|
@@ -39,21 +39,21 @@ class TestDate < MiniTest::Test
|
|
39
39
|
|
40
40
|
def test_date_en
|
41
41
|
data = [
|
42
|
-
[ 'Jun/12 2011 14:00', '2011-06-12 14:00' ],
|
43
|
-
[ 'Oct/12 2013 16:00', '2013-10-12 16:00' ],
|
42
|
+
[ 'Jun/12 2011 14:00', '2011-06-12 14:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
|
43
|
+
[ 'Oct/12 2013 16:00', '2013-10-12 16:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
|
44
44
|
|
45
|
-
[ 'Jan/26 2011', '2011-01-26' ],
|
46
|
-
[ 'Jan/26 2011', '2011-01-26
|
45
|
+
[ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
|
46
|
+
[ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
|
47
47
|
|
48
|
-
[ 'Jan/26', '2013-01-26' ],
|
49
|
-
[ 'Jan/26', '2013-01-26
|
50
|
-
[ '26 January', '2013-01-26' ],
|
51
|
-
[ '26 January', '2013-01-26
|
48
|
+
[ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
|
49
|
+
[ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
|
50
|
+
[ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
|
51
|
+
[ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
|
52
52
|
|
53
|
-
[ 'Jun/13', '2013-06-13' ],
|
54
|
-
[ 'Jun/13', '2013-06-13
|
55
|
-
[ '13 June', '2013-06-13' ],
|
56
|
-
[ '13 June', '2013-06-13
|
53
|
+
[ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
|
54
|
+
[ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
|
55
|
+
[ '13 June', '2013-06-13', '[EN_DD_MONTH]' ],
|
56
|
+
[ '13 June', '2013-06-13', '[EN_DD_MONTH]' ]
|
57
57
|
]
|
58
58
|
|
59
59
|
assert_dates( data, start: Date.new( 2013, 1, 1 ), lang: 'en' )
|
@@ -66,7 +66,11 @@ private
|
|
66
66
|
data.each do |rec|
|
67
67
|
line = rec[0]
|
68
68
|
str = rec[1]
|
69
|
-
|
69
|
+
|
70
|
+
## note: test / use parse and find! -- parse MUST go first
|
71
|
+
values = []
|
72
|
+
values << DateFormats.parse( line, start: start, lang: lang )
|
73
|
+
values << DateFormats.find!( line, start: start, lang: lang )
|
70
74
|
|
71
75
|
tagged_line = rec[2] ## optinal tagged line
|
72
76
|
if tagged_line ## note: line gets tagged inplace!!! (no new string)
|
@@ -74,14 +78,17 @@ private
|
|
74
78
|
puts "#{line} == #{tagged_line}"
|
75
79
|
end
|
76
80
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
values.each do |value|
|
82
|
+
if str.index( ':' )
|
83
|
+
assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
|
84
|
+
else
|
85
|
+
assert_date( Date.strptime( str, '%Y-%m-%d' ), value )
|
86
|
+
end
|
81
87
|
end
|
82
88
|
end
|
83
89
|
end
|
84
90
|
|
91
|
+
|
85
92
|
## todo: check if assert_datetime or assert_date exist already? what is the best practice to check dates ???
|
86
93
|
def assert_date( exp, value )
|
87
94
|
assert_equal exp.year, value.year
|
@@ -98,12 +105,4 @@ private
|
|
98
105
|
assert_date( exp, value )
|
99
106
|
assert_time( exp, value )
|
100
107
|
end
|
101
|
-
|
102
|
-
|
103
|
-
def parse_date( line, start:, lang: )
|
104
|
-
# e.g. lets you pass in opts[:start_at] ???
|
105
|
-
finder = DateFormats::DateFinder.new( lang: lang )
|
106
|
-
finder.find!( line, start_at: start )
|
107
|
-
end
|
108
|
-
|
109
108
|
end # class TestDate
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: date-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: logutils
|