date-formats 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/date-formats.rb +13 -27
- data/lib/date-formats/date.rb +214 -120
- data/lib/date-formats/formats.rb +35 -60
- data/lib/date-formats/reader.rb +18 -31
- data/lib/date-formats/source.rb +23 -13
- data/lib/date-formats/version.rb +2 -2
- data/test/test_date.rb +34 -35
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81aad612afdba8f85e731f97c1bc119499a8211d
|
4
|
+
data.tar.gz: e013e9bdac91e1938149d395b946c535b2c75714
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 718656e27eab527b211337e3a9fef08030017d2b31f7c59fa89b2292df61275a8b9f1ca247e5f9a5eacad03782d83890cd1a37f5d2d6c5ff541ce0ef67c579bc
|
7
|
+
data.tar.gz: c9470e38606e1b909230e8e62187cb1ba2f29f4bd4d99a48fc8125445d80b08e7ac37e37c7d08e463739d7bca767526ab535e0f0016d1f8abc546e16a55cb551
|
data/lib/date-formats.rb
CHANGED
@@ -11,40 +11,26 @@ require 'logutils'
|
|
11
11
|
###
|
12
12
|
# our own code
|
13
13
|
require 'date-formats/version' # let version always go first
|
14
|
-
require 'date-formats/source'
|
15
14
|
require 'date-formats/reader'
|
15
|
+
require 'date-formats/source'
|
16
16
|
|
17
17
|
|
18
18
|
module DateFormats
|
19
19
|
|
20
|
+
#############
|
21
|
+
# helpers for building format regex patterns
|
22
|
+
MONTH_EN = build_names( MONTH_NAMES[:en] )
|
23
|
+
# e.g. Jan|Feb|March|Mar|April|Apr|May|June|Jun|...
|
24
|
+
DAY_EN = build_names( DAY_NAMES[:en] )
|
25
|
+
# e.g.
|
20
26
|
|
21
|
-
|
22
|
-
|
23
|
-
MONTH_EN = build_re( MONTH_EN_LINES )
|
24
|
-
|
25
|
-
WEEKDAY_EN_LINES = Reader.parse_weekday( Source::WEEKDAY_EN )
|
26
|
-
WEEKDAY_EN = build_re( WEEKDAY_EN_LINES )
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
MONTH_FR_LINES = Reader.parse_month( Source::MONTH_FR )
|
31
|
-
MONTH_FR_TO_MM = build_map( MONTH_FR_LINES )
|
32
|
-
MONTH_FR = build_re( MONTH_FR_LINES )
|
33
|
-
|
34
|
-
WEEKDAY_FR_LINES = Reader.parse_weekday( Source::WEEKDAY_FR )
|
35
|
-
WEEKDAY_FR = build_re( WEEKDAY_FR_LINES )
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
MONTH_ES_LINES = Reader.parse_month( Source::MONTH_ES )
|
40
|
-
MONTH_ES_TO_MM = build_map( MONTH_ES_LINES )
|
41
|
-
MONTH_ES = build_re( MONTH_ES_LINES )
|
42
|
-
|
43
|
-
|
27
|
+
MONTH_FR = build_names( MONTH_NAMES[:fr] )
|
28
|
+
DAY_FR = build_names( DAY_NAMES[:fr] )
|
44
29
|
|
45
|
-
|
46
|
-
|
47
|
-
MONTH_DE
|
30
|
+
MONTH_ES = build_names( MONTH_NAMES[:es] )
|
31
|
+
MONTH_PT = build_names( MONTH_NAMES[:pt] )
|
32
|
+
MONTH_DE = build_names( MONTH_NAMES[:de] )
|
33
|
+
MONTH_IT = build_names( MONTH_NAMES[:it] )
|
48
34
|
|
49
35
|
end # module DateFormats
|
50
36
|
|
data/lib/date-formats/date.rb
CHANGED
@@ -1,154 +1,248 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
|
3
4
|
module DateFormats
|
4
5
|
|
5
6
|
|
6
|
-
|
7
|
+
def self.lang
|
8
|
+
@@lang ||= :en ## defaults to english (:en)
|
9
|
+
end
|
10
|
+
def self.lang=( value )
|
11
|
+
@@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
|
12
|
+
end
|
7
13
|
|
8
|
-
private
|
9
|
-
def calc_year( month, day, start: ) ## note: start required param for now on!!!
|
10
14
|
|
11
|
-
|
15
|
+
def self.parser( lang: ) ## find parser
|
16
|
+
lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
|
12
17
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
else
|
17
|
-
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
18
|
-
start.year+1
|
19
|
-
end
|
18
|
+
## note: cache all "built-in" lang versions (e.g. formats == nil)
|
19
|
+
@@parser ||= {}
|
20
|
+
parser = @@parser[ lang ] ||= DateParser.new( lang: lang )
|
20
21
|
end
|
21
22
|
|
23
|
+
def self.parse( line,
|
24
|
+
lang: self.class.lang,
|
25
|
+
start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
|
26
|
+
)
|
27
|
+
parser( lang: lang ).parse( line, start: start )
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.find!( line,
|
31
|
+
lang: self.class.lang,
|
32
|
+
start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
|
33
|
+
)
|
34
|
+
parser( lang: lang ).find!( line, start: start )
|
35
|
+
end
|
22
36
|
|
23
|
-
def parse_date_time( match_data, start: )
|
24
37
|
|
25
|
-
# convert regex match_data captures to hash
|
26
|
-
# - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
|
27
|
-
h = {}
|
28
|
-
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
29
|
-
match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
30
38
|
|
31
|
-
|
32
|
-
## pp h
|
33
|
-
logger.debug " [parse_date_time] hash: >#{h.inspect}<"
|
39
|
+
class DateParser
|
34
40
|
|
35
|
-
|
36
|
-
h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es]
|
37
|
-
h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr]
|
38
|
-
h[ :month ] = MONTH_DE_TO_MM[ h[:month_de] ] if h[:month_de]
|
39
|
-
h[ :month ] = MONTH_IT_TO_MM[ h[:month_it] ] if h[:month_it]
|
40
|
-
h[ :month ] = MONTH_PT_TO_MM[ h[:month_pt] ] if h[:month_pt]
|
41
|
+
include LogUtils::Logging
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
def initialize( lang:,
|
44
|
+
formats: nil, month_names: nil, day_names: nil
|
45
|
+
)
|
46
|
+
@lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
|
45
47
|
|
46
|
-
|
47
|
-
|
48
|
+
if formats
|
49
|
+
@formats = formats
|
50
|
+
else
|
51
|
+
@formats = FORMATS[ @lang ]
|
52
|
+
if @formats
|
53
|
+
month_names = MONTH_NAMES[ @lang ]
|
54
|
+
day_names = DAY_NAMES[ @lang ]
|
55
|
+
else
|
56
|
+
## fallback to english if lang not available
|
57
|
+
## todo/fix: add/issue warning!!!!!
|
58
|
+
@formats = FORMATS[ :en ]
|
59
|
+
month_names = MONTH_NAMES[ :en ]
|
60
|
+
day_names = DAY_NAMES[ :en ]
|
61
|
+
end
|
62
|
+
end
|
48
63
|
|
49
|
-
|
50
|
-
|
64
|
+
## convert month_names and day_names to map if present
|
65
|
+
@month_names = month_names ? build_map( month_names ) : nil
|
66
|
+
@day_names = day_names ? build_map( day_names ) : nil
|
67
|
+
end
|
51
68
|
|
52
|
-
DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
53
|
-
end
|
54
69
|
|
55
|
-
|
70
|
+
def parse( line, start: )
|
71
|
+
date = nil
|
72
|
+
@formats.each do |format|
|
73
|
+
re = format[0]
|
74
|
+
m = re.match( line )
|
75
|
+
if m
|
76
|
+
date = parse_matchdata( m, start: start )
|
77
|
+
break
|
78
|
+
end
|
79
|
+
# no match; continue; try next regex pattern
|
80
|
+
end
|
56
81
|
|
82
|
+
## todo/fix - raise ArgumentError - invalid date; no format match found
|
83
|
+
date # note: nil if no match found
|
84
|
+
end
|
57
85
|
|
58
86
|
|
59
|
-
|
87
|
+
def find!( line, start: )
|
88
|
+
# fix: use more lookahead for all required trailing spaces!!!!!
|
89
|
+
# fix: use <name capturing group> for month,day,year etc.!!!
|
90
|
+
|
91
|
+
#
|
92
|
+
# fix: !!!!
|
93
|
+
# date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
|
94
|
+
# fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
|
95
|
+
#
|
96
|
+
|
97
|
+
date = nil
|
98
|
+
@formats.each do |format|
|
99
|
+
re = format[0]
|
100
|
+
tag = format[1]
|
101
|
+
m = re.match( line )
|
102
|
+
if m
|
103
|
+
date = parse_matchdata( m, start: start )
|
104
|
+
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
105
|
+
## fix: use md.begin(0), md.end(0)
|
106
|
+
line.sub!( m[0], tag )
|
107
|
+
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
108
|
+
break
|
109
|
+
end
|
110
|
+
# no match; continue; try next regex pattern
|
111
|
+
end
|
112
|
+
date # note: nil if no match found
|
113
|
+
end
|
60
114
|
|
61
|
-
|
115
|
+
private
|
116
|
+
def calc_year( month, day, start: )
|
62
117
|
|
118
|
+
logger.debug " [calc_year] ????-#{month}-#{day} -- start: #{start}"
|
63
119
|
|
64
|
-
|
65
|
-
|
120
|
+
if month >= start.month
|
121
|
+
# assume same year as start_at event (e.g. 2013 for 2013/14 season)
|
122
|
+
start.year
|
123
|
+
else
|
124
|
+
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
125
|
+
start.year+1
|
126
|
+
end
|
127
|
+
end
|
66
128
|
|
67
129
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
130
|
+
def parse_matchdata( m, start: )
|
131
|
+
# convert regex match_data captures to hash
|
132
|
+
# - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
|
133
|
+
h = {}
|
134
|
+
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
135
|
+
m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
136
|
+
|
137
|
+
## puts "[parse_date_time] match_data:"
|
138
|
+
## pp h
|
139
|
+
logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
|
140
|
+
|
141
|
+
if h[:month_name]
|
142
|
+
## todo/fix: issue error if no month names defined !!!
|
143
|
+
if @month_names
|
144
|
+
h[ :month ] = @month_names[ h[:month_name] ]
|
145
|
+
else
|
146
|
+
## todo/fix: change to ArgumentError( "invalid date; ")
|
147
|
+
puts "** !!! ERROR !!! - no month names defined for lang #{@lang}; cannot match:"
|
148
|
+
pp m
|
149
|
+
exit 1
|
150
|
+
end
|
151
|
+
end
|
74
152
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
@formats.each do |format|
|
87
|
-
tag = format[0]
|
88
|
-
pattern = format[1]
|
89
|
-
m=pattern.match( line )
|
90
|
-
if m
|
91
|
-
date = parse_date_time( m, start: start_at )
|
92
|
-
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
93
|
-
## fix: use md.begin(0), md.end(0)
|
94
|
-
line.sub!( m[0], tag )
|
95
|
-
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
96
|
-
return date
|
153
|
+
if h[:day_name]
|
154
|
+
if @day_names
|
155
|
+
## note: use cwday in ruby date to get days from 1-7 (Monday (1) to Sunday (7))
|
156
|
+
## wday gives you 0-6 (Sunday (0), Monday (1) to Saturday (6))
|
157
|
+
h[ :cwday ] = @day_names[ h[:day_name ] ]
|
158
|
+
else
|
159
|
+
## todo/fix: change to ArgumentError( "invalid date; ")
|
160
|
+
puts "** !!! ERROR !!! - no day names defined for lang #{@lang}; cannot match:"
|
161
|
+
pp m
|
162
|
+
exit 1
|
163
|
+
end
|
97
164
|
end
|
98
|
-
# no match; continue; try next pattern
|
99
|
-
end
|
100
165
|
|
101
|
-
|
102
|
-
|
166
|
+
month = h[:month]
|
167
|
+
day = h[:day]
|
168
|
+
year = h[:year] || calc_year( month.to_i, day.to_i, start: start ).to_s
|
169
|
+
|
170
|
+
if h[:hours] || h[:minutes] ## check time (hours or minutes) is present (otherwise asume just Date and NOT DateTime)
|
171
|
+
hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
|
172
|
+
minutes = h[:minutes] || '00'
|
103
173
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
def find!( line, start_at: ) ## todo/fix: change start_at to start only!!!
|
136
|
-
# fix: use more lookahead for all required trailing spaces!!!!!
|
137
|
-
# fix: use <name capturing group> for month,day,year etc.!!!
|
138
|
-
|
139
|
-
tag = '[EN_MONTH_DD]'
|
140
|
-
pattern = EN__MONTH_DD__DATE_RE
|
141
|
-
m = pattern.match( line )
|
142
|
-
if m
|
143
|
-
date = parse_date_time( m, start: start_at )
|
144
|
-
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
145
|
-
## fix: use md.begin(0), md.end(0)
|
146
|
-
line.sub!( m[0], tag )
|
147
|
-
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
148
|
-
return date
|
174
|
+
value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
|
175
|
+
logger.debug " datetime: >#{value}<"
|
176
|
+
|
177
|
+
date = DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
178
|
+
else
|
179
|
+
value = '%d-%02d-%02d' % [year.to_i, month.to_i, day.to_i]
|
180
|
+
logger.debug " date: >#{value}<"
|
181
|
+
|
182
|
+
date = Date.strptime( value, '%Y-%m-%d' )
|
183
|
+
end
|
184
|
+
|
185
|
+
## check/assert cwday if present!!!!
|
186
|
+
date
|
187
|
+
end # method parse
|
188
|
+
|
189
|
+
##################
|
190
|
+
# helpers
|
191
|
+
private
|
192
|
+
def build_map( lines )
|
193
|
+
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
194
|
+
## note: index is a string too
|
195
|
+
## {"January" => "1", "Jan" => "1",
|
196
|
+
## "February" => "2", "Feb" => "2",
|
197
|
+
## "March" => "3", "Mar" => "3",
|
198
|
+
## "April" => "4", "Apr" => "4",
|
199
|
+
## "May" => "5",
|
200
|
+
## "June" => "6", "Jun" => "6", ...
|
201
|
+
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
202
|
+
line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
203
|
+
h
|
204
|
+
end
|
149
205
|
end
|
150
|
-
|
151
|
-
|
152
|
-
|
206
|
+
end # class DateParser
|
207
|
+
|
208
|
+
|
209
|
+
|
210
|
+
|
211
|
+
class RsssfDateParser < DateParser
|
212
|
+
|
213
|
+
MONTH_NAMES = DateFormats.parse_month( <<TXT )
|
214
|
+
Jan
|
215
|
+
Feb
|
216
|
+
March Mar
|
217
|
+
April Apr
|
218
|
+
May
|
219
|
+
June Jun
|
220
|
+
July Jul
|
221
|
+
Aug
|
222
|
+
Sept Sep
|
223
|
+
Oct
|
224
|
+
Nov
|
225
|
+
Dec
|
226
|
+
TXT
|
227
|
+
|
228
|
+
MONTH_EN = DateFormats.build_names( MONTH_NAMES ) ## re helper e.g. Jan|Feb|March|Mar|...
|
229
|
+
|
230
|
+
## e.g.
|
231
|
+
## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
|
232
|
+
##
|
233
|
+
## check add \b at the beginning and end - why?? why not?? working??
|
234
|
+
EN__MONTH_DD__DATE_RE = /\[
|
235
|
+
(?<month_name>#{MONTH_EN})
|
236
|
+
\s
|
237
|
+
(?<day>\d{1,2})
|
238
|
+
\]/x
|
239
|
+
|
240
|
+
def initialize
|
241
|
+
super( lang: 'en',
|
242
|
+
formats: [[EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]']],
|
243
|
+
month_names: MONTH_NAMES
|
244
|
+
)
|
245
|
+
end
|
246
|
+
end ## class RsssfDateParser
|
153
247
|
|
154
248
|
end # module DateFormats
|
data/lib/date-formats/formats.rb
CHANGED
@@ -18,7 +18,7 @@ DB__DATE_TIME_RE = /\b
|
|
18
18
|
(?<minutes>\d{2})
|
19
19
|
\b/x
|
20
20
|
|
21
|
-
# e.g. 2012-09-14
|
21
|
+
# e.g. 2012-09-14 => YYYY-MM-DD
|
22
22
|
# note: allow 2012-9-3 e.g. no leading zero required
|
23
23
|
# regex_db2
|
24
24
|
DB__DATE_RE = /\b
|
@@ -61,7 +61,7 @@ DD_MM__DATE_TIME_RE = /\b
|
|
61
61
|
(?<minutes>\d{2})
|
62
62
|
\b/x
|
63
63
|
|
64
|
-
# e.g. 14.09.2012 => DD.MM.YYYY
|
64
|
+
# e.g. 14.09.2012 => DD.MM.YYYY
|
65
65
|
# regex_de3
|
66
66
|
DD_MM_YYYY__DATE_RE = /\b
|
67
67
|
(?<day>\d{1,2})
|
@@ -71,7 +71,7 @@ DD_MM_YYYY__DATE_RE = /\b
|
|
71
71
|
(?<year>\d{4})
|
72
72
|
\b/x
|
73
73
|
|
74
|
-
# e.g. 14.09. => DD.MM. w/ implied year
|
74
|
+
# e.g. 14.09. => DD.MM. w/ implied year
|
75
75
|
# note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
|
76
76
|
# note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
|
77
77
|
# regex_de4 (use lookahead assert)
|
@@ -89,7 +89,7 @@ DD_MM__DATE_RE = /\b
|
|
89
89
|
EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
|
90
90
|
(?<day>\d{1,2})
|
91
91
|
\s
|
92
|
-
(?<
|
92
|
+
(?<month_name>#{MONTH_EN})
|
93
93
|
\s
|
94
94
|
(?<year>\d{4})
|
95
95
|
\s+
|
@@ -98,22 +98,16 @@ EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
|
|
98
98
|
(?<minutes>\d{2})
|
99
99
|
\b/x
|
100
100
|
|
101
|
-
|
102
|
-
# fix: pass in lang (e.g. en or es)
|
103
|
-
# only process format for lang plus fallback to en?
|
104
|
-
# e.g. EN__DD_MONTH and ES__DD_MONTH depend on order for match (first listed will match)
|
105
|
-
|
106
|
-
# e.g. 12 May => D|DD.MMM w/ implied year and implied hours
|
101
|
+
# e.g. 12 May => D|DD.MMM w/ implied year
|
107
102
|
EN__DD_MONTH__DATE_RE = /\b
|
108
103
|
(?<day>\d{1,2})
|
109
104
|
\s
|
110
|
-
(?<
|
105
|
+
(?<month_name>#{MONTH_EN})
|
111
106
|
\b/x
|
112
107
|
|
113
|
-
|
114
108
|
# e.g. Jun/12 2011 14:00
|
115
109
|
EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
|
116
|
-
(?<
|
110
|
+
(?<month_name>#{MONTH_EN})
|
117
111
|
\/
|
118
112
|
(?<day>\d{1,2})
|
119
113
|
\s
|
@@ -126,7 +120,7 @@ EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
|
|
126
120
|
|
127
121
|
# e.g. Jun/12 14:00 w/ implied year H|HH:MM
|
128
122
|
EN__MONTH_DD__DATE_TIME_RE = /\b
|
129
|
-
(?<
|
123
|
+
(?<month_name>#{MONTH_EN})
|
130
124
|
\/
|
131
125
|
(?<day>\d{1,2})
|
132
126
|
\s+
|
@@ -135,9 +129,9 @@ EN__MONTH_DD__DATE_TIME_RE = /\b
|
|
135
129
|
(?<minutes>\d{2})
|
136
130
|
\b/x
|
137
131
|
|
138
|
-
# e.g. Jun/12 2013
|
132
|
+
# e.g. Jun/12 2013
|
139
133
|
EN__MONTH_DD_YYYY__DATE_RE = /\b
|
140
|
-
(?<
|
134
|
+
(?<month_name>#{MONTH_EN})
|
141
135
|
\/
|
142
136
|
(?<day>\d{1,2})
|
143
137
|
\s
|
@@ -150,85 +144,66 @@ EN__MONTH_DD_YYYY__DATE_RE = /\b
|
|
150
144
|
# fix: remove space again for now - and use simple en date reader or something!!!
|
151
145
|
## was [\/ ] changed back to \/
|
152
146
|
EN__MONTH_DD__DATE_RE = /\b
|
153
|
-
(?<
|
147
|
+
(?<month_name>#{MONTH_EN})
|
154
148
|
\/
|
155
149
|
(?<day>\d{1,2})
|
156
150
|
\b/x
|
157
151
|
|
158
152
|
|
159
|
-
# e.g. 12 Ene w/ implied year
|
153
|
+
# e.g. 12 Ene w/ implied year
|
160
154
|
ES__DD_MONTH__DATE_RE = /\b
|
161
155
|
(?<day>\d{1,2})
|
162
156
|
\s
|
163
|
-
(?<
|
157
|
+
(?<month_name>#{MONTH_ES})
|
164
158
|
\b/x
|
165
159
|
|
166
160
|
# e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août]
|
167
161
|
### note: do NOT consume [] in regex (use lookahead assert)
|
168
|
-
|
169
|
-
(
|
162
|
+
FR__DAY_DD_MONTH__DATE_RE = /\b
|
163
|
+
(?<day_name>#{DAY_FR})
|
170
164
|
\s+
|
171
165
|
(?<day>\d{1,2})
|
172
166
|
\.? # note: make dot optional
|
173
167
|
\s+
|
174
|
-
(?<
|
168
|
+
(?<month_name>#{MONTH_FR})
|
175
169
|
(?=\s+|$|[\]])/x ## note: allow end-of-string/line too
|
176
170
|
|
177
171
|
|
178
172
|
|
179
|
-
|
180
|
-
# map
|
181
|
-
## todo/fix: remove (move to attic)??? always use lang specific - why? why not?
|
182
|
-
FORMATS_ALL = [
|
183
|
-
[ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
|
184
|
-
[ '[YYYY_MM_DD]', DB__DATE_RE ],
|
185
|
-
[ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
|
186
|
-
[ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
|
187
|
-
[ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
|
188
|
-
[ '[DD_MM]', DD_MM__DATE_RE ],
|
189
|
-
[ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
|
190
|
-
[ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
|
191
|
-
[ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
|
192
|
-
[ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
|
193
|
-
[ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
|
194
|
-
[ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
|
195
|
-
[ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
|
196
|
-
[ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ]
|
197
|
-
]
|
198
|
-
|
173
|
+
#############################################
|
174
|
+
# map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
|
199
175
|
|
200
176
|
FORMATS_BASE = [ ### all numbers (no month names or weekday) - find a better name?
|
201
|
-
[ '[YYYY_MM_DD_hh_mm]'
|
202
|
-
[ '[YYYY_MM_DD]'
|
203
|
-
[ '[DD_MM_YYYY_hh_mm]'
|
204
|
-
[ '[DD_MM_hh_mm]'
|
205
|
-
[ '[DD_MM_YYYY]'
|
206
|
-
[ '[DD_MM]'
|
177
|
+
[ DB__DATE_TIME_RE, '[YYYY_MM_DD_hh_mm]' ],
|
178
|
+
[ DB__DATE_RE, '[YYYY_MM_DD]' ],
|
179
|
+
[ DD_MM_YYYY__DATE_TIME_RE, '[DD_MM_YYYY_hh_mm]' ],
|
180
|
+
[ DD_MM__DATE_TIME_RE, '[DD_MM_hh_mm]' ],
|
181
|
+
[ DD_MM_YYYY__DATE_RE, '[DD_MM_YYYY]' ],
|
182
|
+
[ DD_MM__DATE_RE, '[DD_MM]' ],
|
207
183
|
]
|
208
184
|
|
209
185
|
FORMATS_EN = [
|
210
|
-
[ '[EN_DD_MONTH_YYYY_hh_mm]'
|
211
|
-
[ '[EN_MONTH_DD_YYYY_hh_mm]'
|
212
|
-
[ '[EN_MONTH_DD_hh_mm]'
|
213
|
-
[ '[EN_MONTH_DD_YYYY]'
|
214
|
-
[ '[EN_MONTH_DD]'
|
215
|
-
[ '[EN_DD_MONTH]'
|
186
|
+
[ EN__DD_MONTH_YYYY__DATE_TIME_RE, '[EN_DD_MONTH_YYYY_hh_mm]' ],
|
187
|
+
[ EN__MONTH_DD_YYYY__DATE_TIME_RE, '[EN_MONTH_DD_YYYY_hh_mm]' ],
|
188
|
+
[ EN__MONTH_DD__DATE_TIME_RE, '[EN_MONTH_DD_hh_mm]' ],
|
189
|
+
[ EN__MONTH_DD_YYYY__DATE_RE, '[EN_MONTH_DD_YYYY]' ],
|
190
|
+
[ EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]' ],
|
191
|
+
[ EN__DD_MONTH__DATE_RE, '[EN_DD_MONTH]' ],
|
216
192
|
]
|
217
193
|
|
218
194
|
FORMATS_FR = [
|
219
|
-
[ '[
|
195
|
+
[ FR__DAY_DD_MONTH__DATE_RE, '[FR_DAY_DD_MONTH]' ],
|
220
196
|
]
|
221
197
|
|
222
198
|
FORMATS_ES = [
|
223
|
-
[ '[ES_DD_MONTH]'
|
199
|
+
[ ES__DD_MONTH__DATE_RE, '[ES_DD_MONTH]' ],
|
224
200
|
]
|
225
201
|
|
226
202
|
|
227
203
|
FORMATS = {
|
228
|
-
|
229
|
-
|
230
|
-
|
204
|
+
en: FORMATS_BASE+FORMATS_EN,
|
205
|
+
fr: FORMATS_BASE+FORMATS_FR,
|
206
|
+
es: FORMATS_BASE+FORMATS_ES,
|
231
207
|
}
|
232
208
|
|
233
|
-
|
234
209
|
end # module DateFormats
|
data/lib/date-formats/reader.rb
CHANGED
@@ -34,45 +34,32 @@ class Reader ## todo/check: rename to WordReader or something for easy (re)use
|
|
34
34
|
end
|
35
35
|
lines
|
36
36
|
end # method parse
|
37
|
+
end # class Reader
|
38
|
+
|
37
39
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
lines
|
40
|
+
|
41
|
+
def self.parse_month( txt )
|
42
|
+
lines = Reader.parse( txt )
|
43
|
+
if lines.size != 12
|
44
|
+
puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
|
45
|
+
exit 1
|
45
46
|
end
|
47
|
+
lines
|
48
|
+
end
|
46
49
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
end
|
53
|
-
lines
|
50
|
+
def self.parse_day( txt )
|
51
|
+
lines = Reader.parse( txt )
|
52
|
+
if lines.size != 7
|
53
|
+
puts "*** !!! ERROR !!! reading day names; got #{lines.size} lines - expected 7"
|
54
|
+
exit 1
|
54
55
|
end
|
55
|
-
|
56
|
+
lines
|
57
|
+
end
|
56
58
|
|
57
59
|
|
58
|
-
def self.
|
60
|
+
def self.build_names( lines )
|
59
61
|
## join all words together into a single string e.g.
|
60
62
|
## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
|
61
63
|
lines.map { |line| line.join('|') }.join('|')
|
62
64
|
end
|
63
|
-
|
64
|
-
def self.build_map( lines )
|
65
|
-
## build a lookup map that maps the word to the index (line no) plus 1 e.g.
|
66
|
-
## note: index is a string too
|
67
|
-
## {"January" => "1", "Jan" => "1",
|
68
|
-
## "February" => "2", "Feb" => "2",
|
69
|
-
## "March" => "3", "Mar" => "3",
|
70
|
-
## "April" => "4", "Apr" => "4",
|
71
|
-
## "May" => "5",
|
72
|
-
## "June" => "6", "Jun" => "6", ...
|
73
|
-
lines.each_with_index.reduce( {} ) do |h,(line,i)|
|
74
|
-
line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
|
75
|
-
h
|
76
|
-
end
|
77
|
-
end
|
78
65
|
end # module DateFormats
|
data/lib/date-formats/source.rb
CHANGED
@@ -1,11 +1,16 @@
|
|
1
1
|
module DateFormats
|
2
|
-
module Source
|
3
2
|
|
4
3
|
# todo: make more generic for reuse
|
5
4
|
### fix:
|
6
5
|
## use date/en.txt or en.txt etc. -- why? why not?
|
7
6
|
|
8
|
-
|
7
|
+
## note: always sort lines with longest words, abbrevations first!!!!
|
8
|
+
## todo/fix: add/split into MONTH_NAMES and MONTH_ABBREVS (and DAY_NAMES and DAY_ABBREVS) - why? why not?
|
9
|
+
MONTH_NAMES = {}
|
10
|
+
DAY_NAMES = {}
|
11
|
+
|
12
|
+
|
13
|
+
MONTH_NAMES[:en] = <<TXT
|
9
14
|
January Jan
|
10
15
|
February Feb
|
11
16
|
March Mar
|
@@ -20,11 +25,12 @@ November Nov
|
|
20
25
|
December Dec
|
21
26
|
TXT
|
22
27
|
|
23
|
-
|
28
|
+
|
29
|
+
DAY_NAMES[:en] = <<TXT
|
24
30
|
Monday Mon
|
25
|
-
Tuesday
|
31
|
+
Tuesday Tues Tue Tu
|
26
32
|
Wednesday Wed
|
27
|
-
Thursday
|
33
|
+
Thursday Thurs Thur Thu Th
|
28
34
|
Friday Fri
|
29
35
|
Saturday Sat
|
30
36
|
Sunday Sun
|
@@ -32,7 +38,7 @@ TXT
|
|
32
38
|
|
33
39
|
|
34
40
|
|
35
|
-
|
41
|
+
MONTH_NAMES[:fr] = <<TXT
|
36
42
|
Janvier Janv Jan ## check janv in use??
|
37
43
|
Février Févr Fév ## check fevr in use???
|
38
44
|
Mars Mar
|
@@ -47,7 +53,7 @@ Novembre Nove Nov ## check nove in use??
|
|
47
53
|
Décembre Déce Déc ## check dece in use??
|
48
54
|
TXT
|
49
55
|
|
50
|
-
|
56
|
+
DAY_NAMES[:fr] = <<TXT
|
51
57
|
Lundi Lun L
|
52
58
|
Mardi Mar Ma
|
53
59
|
Mercredi Mer Me
|
@@ -59,7 +65,7 @@ TXT
|
|
59
65
|
|
60
66
|
|
61
67
|
|
62
|
-
|
68
|
+
MONTH_NAMES[:es] = <<TXT
|
63
69
|
Enero Ene
|
64
70
|
Febrero Feb
|
65
71
|
Marzo Mar
|
@@ -74,7 +80,7 @@ Noviembre Nov
|
|
74
80
|
Diciembre Dic
|
75
81
|
TXT
|
76
82
|
|
77
|
-
|
83
|
+
MONTH_NAMES[:de] = <<TXT
|
78
84
|
Jänner Januar Jan Jän # note: in Austria - Jänner; in Deutschland Januar allow both ??
|
79
85
|
Feber Februar Feb
|
80
86
|
März Mär
|
@@ -89,7 +95,7 @@ November Nov
|
|
89
95
|
Dezember Dez
|
90
96
|
TXT
|
91
97
|
|
92
|
-
|
98
|
+
MONTH_NAMES[:it] = <<TXT
|
93
99
|
Gennaio
|
94
100
|
Febbraio
|
95
101
|
Marzo
|
@@ -104,7 +110,7 @@ Novembre
|
|
104
110
|
Dicembre
|
105
111
|
TXT
|
106
112
|
|
107
|
-
|
113
|
+
MONTH_NAMES[:pt] = <<TXT
|
108
114
|
Janeiro
|
109
115
|
Fevereiro
|
110
116
|
Março
|
@@ -119,7 +125,7 @@ Novembro
|
|
119
125
|
Dezembro
|
120
126
|
TXT
|
121
127
|
|
122
|
-
|
128
|
+
MONTH_NAMES[:ro] = <<TXT
|
123
129
|
Ianuarie
|
124
130
|
Februarie
|
125
131
|
Martie
|
@@ -134,5 +140,9 @@ Noiembrie
|
|
134
140
|
Decembrie
|
135
141
|
TXT
|
136
142
|
|
137
|
-
|
143
|
+
############################################
|
144
|
+
## convert (unparsed) text to (parsed) lines with words
|
145
|
+
MONTH_NAMES.each {|k,v| MONTH_NAMES[k] = parse_month(v) }
|
146
|
+
DAY_NAMES.each {|k,v| DAY_NAMES[k] = parse_day(v) }
|
147
|
+
|
138
148
|
end # module DateFormats
|
data/lib/date-formats/version.rb
CHANGED
data/test/test_date.rb
CHANGED
@@ -15,7 +15,7 @@ class TestDate < MiniTest::Test
|
|
15
15
|
[ '21.01.2013 21.30', '2013-01-21 21:30', '[DD_MM_YYYY_hh_mm]' ],
|
16
16
|
[ '26.01.2013', '2013-01-26', '[DD_MM_YYYY]' ],
|
17
17
|
[ '[26.01.2013]', '2013-01-26', '[[DD_MM_YYYY]]' ],
|
18
|
-
[ '[21.1.]', '2013-01-21
|
18
|
+
[ '[21.1.]', '2013-01-21', '[[DD_MM]]' ]
|
19
19
|
]
|
20
20
|
|
21
21
|
assert_dates( data, start: Date.new( 2013, 1, 1 ) )
|
@@ -23,15 +23,15 @@ class TestDate < MiniTest::Test
|
|
23
23
|
|
24
24
|
def test_date_fr
|
25
25
|
data = [
|
26
|
-
[ '[Ven 08. Août]', '2014-08-08' ],
|
27
|
-
[ 'Ven 08. Août', '2014-08-08' ],
|
28
|
-
[ 'Ven 8. Août', '2014-08-08' ],
|
29
|
-
[ '[Sam 9. Août]', '2014-08-09' ],
|
30
|
-
[ '[Dim 10. Août]', '2014-08-10' ],
|
31
|
-
[ '[Sam 31. Janv]', '2015-01-31' ],
|
32
|
-
[ '[Sam 7. Févr]', '2015-02-07' ],
|
33
|
-
[ '[Sam 31. Jan]', '2015-01-31' ],
|
34
|
-
[ '[Sam 7. Fév]', '2015-02-07' ],
|
26
|
+
[ '[Ven 08. Août]', '2014-08-08', '[[FR_DAY_DD_MONTH]]' ],
|
27
|
+
[ 'Ven 08. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
|
28
|
+
[ 'Ven 8. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
|
29
|
+
[ '[Sam 9. Août]', '2014-08-09', '[[FR_DAY_DD_MONTH]]' ],
|
30
|
+
[ '[Dim 10. Août]', '2014-08-10', '[[FR_DAY_DD_MONTH]]' ],
|
31
|
+
[ '[Sam 31. Janv]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
|
32
|
+
[ '[Sam 7. Févr]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
|
33
|
+
[ '[Sam 31. Jan]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
|
34
|
+
[ '[Sam 7. Fév]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
|
35
35
|
]
|
36
36
|
|
37
37
|
assert_dates( data, start: Date.new( 2014, 8, 1 ), lang: 'fr' )
|
@@ -39,21 +39,21 @@ class TestDate < MiniTest::Test
|
|
39
39
|
|
40
40
|
def test_date_en
|
41
41
|
data = [
|
42
|
-
[ 'Jun/12 2011 14:00', '2011-06-12 14:00' ],
|
43
|
-
[ 'Oct/12 2013 16:00', '2013-10-12 16:00' ],
|
42
|
+
[ 'Jun/12 2011 14:00', '2011-06-12 14:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
|
43
|
+
[ 'Oct/12 2013 16:00', '2013-10-12 16:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
|
44
44
|
|
45
|
-
[ 'Jan/26 2011', '2011-01-26' ],
|
46
|
-
[ 'Jan/26 2011', '2011-01-26
|
45
|
+
[ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
|
46
|
+
[ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
|
47
47
|
|
48
|
-
[ 'Jan/26', '2013-01-26' ],
|
49
|
-
[ 'Jan/26', '2013-01-26
|
50
|
-
[ '26 January', '2013-01-26' ],
|
51
|
-
[ '26 January', '2013-01-26
|
48
|
+
[ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
|
49
|
+
[ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
|
50
|
+
[ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
|
51
|
+
[ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
|
52
52
|
|
53
|
-
[ 'Jun/13', '2013-06-13' ],
|
54
|
-
[ 'Jun/13', '2013-06-13
|
55
|
-
[ '13 June', '2013-06-13' ],
|
56
|
-
[ '13 June', '2013-06-13
|
53
|
+
[ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
|
54
|
+
[ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
|
55
|
+
[ '13 June', '2013-06-13', '[EN_DD_MONTH]' ],
|
56
|
+
[ '13 June', '2013-06-13', '[EN_DD_MONTH]' ]
|
57
57
|
]
|
58
58
|
|
59
59
|
assert_dates( data, start: Date.new( 2013, 1, 1 ), lang: 'en' )
|
@@ -66,7 +66,11 @@ private
|
|
66
66
|
data.each do |rec|
|
67
67
|
line = rec[0]
|
68
68
|
str = rec[1]
|
69
|
-
|
69
|
+
|
70
|
+
## note: test / use parse and find! -- parse MUST go first
|
71
|
+
values = []
|
72
|
+
values << DateFormats.parse( line, start: start, lang: lang )
|
73
|
+
values << DateFormats.find!( line, start: start, lang: lang )
|
70
74
|
|
71
75
|
tagged_line = rec[2] ## optinal tagged line
|
72
76
|
if tagged_line ## note: line gets tagged inplace!!! (no new string)
|
@@ -74,14 +78,17 @@ private
|
|
74
78
|
puts "#{line} == #{tagged_line}"
|
75
79
|
end
|
76
80
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
values.each do |value|
|
82
|
+
if str.index( ':' )
|
83
|
+
assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
|
84
|
+
else
|
85
|
+
assert_date( Date.strptime( str, '%Y-%m-%d' ), value )
|
86
|
+
end
|
81
87
|
end
|
82
88
|
end
|
83
89
|
end
|
84
90
|
|
91
|
+
|
85
92
|
## todo: check if assert_datetime or assert_date exist already? what is the best practice to check dates ???
|
86
93
|
def assert_date( exp, value )
|
87
94
|
assert_equal exp.year, value.year
|
@@ -98,12 +105,4 @@ private
|
|
98
105
|
assert_date( exp, value )
|
99
106
|
assert_time( exp, value )
|
100
107
|
end
|
101
|
-
|
102
|
-
|
103
|
-
def parse_date( line, start:, lang: )
|
104
|
-
# e.g. lets you pass in opts[:start_at] ???
|
105
|
-
finder = DateFormats::DateFinder.new( lang: lang )
|
106
|
-
finder.find!( line, start_at: start )
|
107
|
-
end
|
108
|
-
|
109
108
|
end # class TestDate
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: date-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-11-
|
11
|
+
date: 2019-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: logutils
|