date-formats 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c60c4e7804fd0a6a4b11e70c35daccfb6db9acbb
4
- data.tar.gz: b78c75c1941eeafd29333b7872fdd62facb93cd1
3
+ metadata.gz: 81aad612afdba8f85e731f97c1bc119499a8211d
4
+ data.tar.gz: e013e9bdac91e1938149d395b946c535b2c75714
5
5
  SHA512:
6
- metadata.gz: d9f1630097d9f0d1ee5e64963b156093969791ba6626c16421dafe46b33b74966a45875a830f5c61a23d8c76afd471b9715d84fa9de52e68c6ef8aefc0b829c6
7
- data.tar.gz: 436c8c910c926d09126f0bd4142d3e1b783f6f3c83d6af18463504470d477f2886be119ff0e03709c892eeafa021ed038b2c84c263b1cd1aafc406357333b1a1
6
+ metadata.gz: 718656e27eab527b211337e3a9fef08030017d2b31f7c59fa89b2292df61275a8b9f1ca247e5f9a5eacad03782d83890cd1a37f5d2d6c5ff541ce0ef67c579bc
7
+ data.tar.gz: c9470e38606e1b909230e8e62187cb1ba2f29f4bd4d99a48fc8125445d80b08e7ac37e37c7d08e463739d7bca767526ab535e0f0016d1f8abc546e16a55cb551
@@ -11,40 +11,26 @@ require 'logutils'
11
11
  ###
12
12
  # our own code
13
13
  require 'date-formats/version' # let version always go first
14
- require 'date-formats/source'
15
14
  require 'date-formats/reader'
15
+ require 'date-formats/source'
16
16
 
17
17
 
18
18
  module DateFormats
19
19
 
20
+ #############
21
+ # helpers for building format regex patterns
22
+ MONTH_EN = build_names( MONTH_NAMES[:en] )
23
+ # e.g. Jan|Feb|March|Mar|April|Apr|May|June|Jun|...
24
+ DAY_EN = build_names( DAY_NAMES[:en] )
25
+ # e.g.
20
26
 
21
- MONTH_EN_LINES = Reader.parse_month( Source::MONTH_EN )
22
- MONTH_EN_TO_MM = build_map( MONTH_EN_LINES )
23
- MONTH_EN = build_re( MONTH_EN_LINES )
24
-
25
- WEEKDAY_EN_LINES = Reader.parse_weekday( Source::WEEKDAY_EN )
26
- WEEKDAY_EN = build_re( WEEKDAY_EN_LINES )
27
-
28
-
29
-
30
- MONTH_FR_LINES = Reader.parse_month( Source::MONTH_FR )
31
- MONTH_FR_TO_MM = build_map( MONTH_FR_LINES )
32
- MONTH_FR = build_re( MONTH_FR_LINES )
33
-
34
- WEEKDAY_FR_LINES = Reader.parse_weekday( Source::WEEKDAY_FR )
35
- WEEKDAY_FR = build_re( WEEKDAY_FR_LINES )
36
-
37
-
38
-
39
- MONTH_ES_LINES = Reader.parse_month( Source::MONTH_ES )
40
- MONTH_ES_TO_MM = build_map( MONTH_ES_LINES )
41
- MONTH_ES = build_re( MONTH_ES_LINES )
42
-
43
-
27
+ MONTH_FR = build_names( MONTH_NAMES[:fr] )
28
+ DAY_FR = build_names( DAY_NAMES[:fr] )
44
29
 
45
- MONTH_DE_LINES = Reader.parse_month( Source::MONTH_DE )
46
- MONTH_DE_TO_MM = build_map( MONTH_DE_LINES )
47
- MONTH_DE = build_re( MONTH_DE_LINES )
30
+ MONTH_ES = build_names( MONTH_NAMES[:es] )
31
+ MONTH_PT = build_names( MONTH_NAMES[:pt] )
32
+ MONTH_DE = build_names( MONTH_NAMES[:de] )
33
+ MONTH_IT = build_names( MONTH_NAMES[:it] )
48
34
 
49
35
  end # module DateFormats
50
36
 
@@ -1,154 +1,248 @@
1
1
  # encoding: utf-8
2
2
 
3
+
3
4
  module DateFormats
4
5
 
5
6
 
6
- class DateFinderBase
7
+ def self.lang
8
+ @@lang ||= :en ## defaults to english (:en)
9
+ end
10
+ def self.lang=( value )
11
+ @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
12
+ end
7
13
 
8
- private
9
- def calc_year( month, day, start: ) ## note: start required param for now on!!!
10
14
 
11
- logger.debug " [calc_year] ????-#{month}-#{day} -- start: #{start}"
15
+ def self.parser( lang: ) ## find parser
16
+ lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
12
17
 
13
- if month >= start.month
14
- # assume same year as start_at event (e.g. 2013 for 2013/14 season)
15
- start.year
16
- else
17
- # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
18
- start.year+1
19
- end
18
+ ## note: cache all "built-in" lang versions (e.g. formats == nil)
19
+ @@parser ||= {}
20
+ parser = @@parser[ lang ] ||= DateParser.new( lang: lang )
20
21
  end
21
22
 
23
+ def self.parse( line,
24
+ lang: self.class.lang,
25
+ start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
26
+ )
27
+ parser( lang: lang ).parse( line, start: start )
28
+ end
29
+
30
+ def self.find!( line,
31
+ lang: self.class.lang,
32
+ start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
33
+ )
34
+ parser( lang: lang ).find!( line, start: start )
35
+ end
22
36
 
23
- def parse_date_time( match_data, start: )
24
37
 
25
- # convert regex match_data captures to hash
26
- # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
27
- h = {}
28
- # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
29
- match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
30
38
 
31
- ## puts "[parse_date_time] match_data:"
32
- ## pp h
33
- logger.debug " [parse_date_time] hash: >#{h.inspect}<"
39
+ class DateParser
34
40
 
35
- h[ :month ] = MONTH_EN_TO_MM[ h[:month_en] ] if h[:month_en]
36
- h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es]
37
- h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr]
38
- h[ :month ] = MONTH_DE_TO_MM[ h[:month_de] ] if h[:month_de]
39
- h[ :month ] = MONTH_IT_TO_MM[ h[:month_it] ] if h[:month_it]
40
- h[ :month ] = MONTH_PT_TO_MM[ h[:month_pt] ] if h[:month_pt]
41
+ include LogUtils::Logging
41
42
 
42
- month = h[:month]
43
- day = h[:day]
44
- year = h[:year] || calc_year( month.to_i, day.to_i, start: start ).to_s
43
+ def initialize( lang:,
44
+ formats: nil, month_names: nil, day_names: nil
45
+ )
46
+ @lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
45
47
 
46
- hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
47
- minutes = h[:minutes] || '00'
48
+ if formats
49
+ @formats = formats
50
+ else
51
+ @formats = FORMATS[ @lang ]
52
+ if @formats
53
+ month_names = MONTH_NAMES[ @lang ]
54
+ day_names = DAY_NAMES[ @lang ]
55
+ else
56
+ ## fallback to english if lang not available
57
+ ## todo/fix: add/issue warning!!!!!
58
+ @formats = FORMATS[ :en ]
59
+ month_names = MONTH_NAMES[ :en ]
60
+ day_names = DAY_NAMES[ :en ]
61
+ end
62
+ end
48
63
 
49
- value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
50
- logger.debug " date: >#{value}<"
64
+ ## convert month_names and day_names to map if present
65
+ @month_names = month_names ? build_map( month_names ) : nil
66
+ @day_names = day_names ? build_map( day_names ) : nil
67
+ end
51
68
 
52
- DateTime.strptime( value, '%Y-%m-%d %H:%M' )
53
- end
54
69
 
55
- end # class DateFinderBase
70
+ def parse( line, start: )
71
+ date = nil
72
+ @formats.each do |format|
73
+ re = format[0]
74
+ m = re.match( line )
75
+ if m
76
+ date = parse_matchdata( m, start: start )
77
+ break
78
+ end
79
+ # no match; continue; try next regex pattern
80
+ end
56
81
 
82
+ ## todo/fix - raise ArgumentError - invalid date; no format match found
83
+ date # note: nil if no match found
84
+ end
57
85
 
58
86
 
59
- class DateFinder < DateFinderBase
87
+ def find!( line, start: )
88
+ # fix: use more lookahead for all required trailing spaces!!!!!
89
+ # fix: use <name capturing group> for month,day,year etc.!!!
90
+
91
+ #
92
+ # fix: !!!!
93
+ # date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
94
+ # fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
95
+ #
96
+
97
+ date = nil
98
+ @formats.each do |format|
99
+ re = format[0]
100
+ tag = format[1]
101
+ m = re.match( line )
102
+ if m
103
+ date = parse_matchdata( m, start: start )
104
+ ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
105
+ ## fix: use md.begin(0), md.end(0)
106
+ line.sub!( m[0], tag )
107
+ ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
108
+ break
109
+ end
110
+ # no match; continue; try next regex pattern
111
+ end
112
+ date # note: nil if no match found
113
+ end
60
114
 
61
- include LogUtils::Logging
115
+ private
116
+ def calc_year( month, day, start: )
62
117
 
118
+ logger.debug " [calc_year] ????-#{month}-#{day} -- start: #{start}"
63
119
 
64
- def self.lang() @@lang ||= 'en'; end ## defaults to english (en)
65
- def self.lang=(value) @@lang = value; end
120
+ if month >= start.month
121
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
122
+ start.year
123
+ else
124
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
125
+ start.year+1
126
+ end
127
+ end
66
128
 
67
129
 
68
- def initialize( lang: self.class.lang )
69
- @lang = lang.to_s
70
- ## fallback to english if lang not available
71
- ## todo/fix: add/issue warning!!!!!
72
- @formats = FORMATS[ @lang ] || FORMATS['en']
73
- end
130
+ def parse_matchdata( m, start: )
131
+ # convert regex match_data captures to hash
132
+ # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
133
+ h = {}
134
+ # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
135
+ m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
136
+
137
+ ## puts "[parse_date_time] match_data:"
138
+ ## pp h
139
+ logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
140
+
141
+ if h[:month_name]
142
+ ## todo/fix: issue error if no month names defined !!!
143
+ if @month_names
144
+ h[ :month ] = @month_names[ h[:month_name] ]
145
+ else
146
+ ## todo/fix: change to ArgumentError( "invalid date; ")
147
+ puts "** !!! ERROR !!! - no month names defined for lang #{@lang}; cannot match:"
148
+ pp m
149
+ exit 1
150
+ end
151
+ end
74
152
 
75
- def find!( line, start_at: ) ## todo/fix: change start_at to start only!!!
76
- # fix: use more lookahead for all required trailing spaces!!!!!
77
- # fix: use <name capturing group> for month,day,year etc.!!!
78
-
79
- #
80
- # fix: !!!!
81
- # date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
82
- # fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
83
- #
84
-
85
- m = nil
86
- @formats.each do |format|
87
- tag = format[0]
88
- pattern = format[1]
89
- m=pattern.match( line )
90
- if m
91
- date = parse_date_time( m, start: start_at )
92
- ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
93
- ## fix: use md.begin(0), md.end(0)
94
- line.sub!( m[0], tag )
95
- ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
96
- return date
153
+ if h[:day_name]
154
+ if @day_names
155
+ ## note: use cwday in ruby date to get days from 1-7 (Monday (1) to Sunday (7))
156
+ ## wday gives you 0-6 (Sunday (0), Monday (1) to Saturday (6))
157
+ h[ :cwday ] = @day_names[ h[:day_name ] ]
158
+ else
159
+ ## todo/fix: change to ArgumentError( "invalid date; ")
160
+ puts "** !!! ERROR !!! - no day names defined for lang #{@lang}; cannot match:"
161
+ pp m
162
+ exit 1
163
+ end
97
164
  end
98
- # no match; continue; try next pattern
99
- end
100
165
 
101
- return nil # no match found
102
- end
166
+ month = h[:month]
167
+ day = h[:day]
168
+ year = h[:year] || calc_year( month.to_i, day.to_i, start: start ).to_s
169
+
170
+ if h[:hours] || h[:minutes] ## check time (hours or minutes) is present (otherwise asume just Date and NOT DateTime)
171
+ hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
172
+ minutes = h[:minutes] || '00'
103
173
 
104
- end # class DateFinder
105
-
106
-
107
-
108
- class RsssfDateFinder < DateFinderBase
109
-
110
- include LogUtils::Logging
111
-
112
- MONTH_EN = 'Jan|'+
113
- 'Feb|'+
114
- 'March|Mar|'+
115
- 'April|Apr|'+
116
- 'May|'+
117
- 'June|Jun|'+
118
- 'July|Jul|'+
119
- 'Aug|'+
120
- 'Sept|Sep|'+
121
- 'Oct|'+
122
- 'Nov|'+
123
- 'Dec'
124
-
125
- ## e.g.
126
- ## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
127
- ##
128
- ## check add \b at the beginning and end - why?? why not?? working??
129
- EN__MONTH_DD__DATE_RE = /\[
130
- (?<month_en>#{MONTH_EN})
131
- \s
132
- (?<day>\d{1,2})
133
- \]/x
134
-
135
- def find!( line, start_at: ) ## todo/fix: change start_at to start only!!!
136
- # fix: use more lookahead for all required trailing spaces!!!!!
137
- # fix: use <name capturing group> for month,day,year etc.!!!
138
-
139
- tag = '[EN_MONTH_DD]'
140
- pattern = EN__MONTH_DD__DATE_RE
141
- m = pattern.match( line )
142
- if m
143
- date = parse_date_time( m, start: start_at )
144
- ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
145
- ## fix: use md.begin(0), md.end(0)
146
- line.sub!( m[0], tag )
147
- ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
148
- return date
174
+ value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
175
+ logger.debug " datetime: >#{value}<"
176
+
177
+ date = DateTime.strptime( value, '%Y-%m-%d %H:%M' )
178
+ else
179
+ value = '%d-%02d-%02d' % [year.to_i, month.to_i, day.to_i]
180
+ logger.debug " date: >#{value}<"
181
+
182
+ date = Date.strptime( value, '%Y-%m-%d' )
183
+ end
184
+
185
+ ## check/assert cwday if present!!!!
186
+ date
187
+ end # method parse
188
+
189
+ ##################
190
+ # helpers
191
+ private
192
+ def build_map( lines )
193
+ ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
194
+ ## note: index is a string too
195
+ ## {"January" => "1", "Jan" => "1",
196
+ ## "February" => "2", "Feb" => "2",
197
+ ## "March" => "3", "Mar" => "3",
198
+ ## "April" => "4", "Apr" => "4",
199
+ ## "May" => "5",
200
+ ## "June" => "6", "Jun" => "6", ...
201
+ lines.each_with_index.reduce( {} ) do |h,(line,i)|
202
+ line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
203
+ h
204
+ end
149
205
  end
150
- return nil # no match found
151
- end
152
- end ## class RsssfDateFinder
206
+ end # class DateParser
207
+
208
+
209
+
210
+
211
+ class RsssfDateParser < DateParser
212
+
213
+ MONTH_NAMES = DateFormats.parse_month( <<TXT )
214
+ Jan
215
+ Feb
216
+ March Mar
217
+ April Apr
218
+ May
219
+ June Jun
220
+ July Jul
221
+ Aug
222
+ Sept Sep
223
+ Oct
224
+ Nov
225
+ Dec
226
+ TXT
227
+
228
+ MONTH_EN = DateFormats.build_names( MONTH_NAMES ) ## re helper e.g. Jan|Feb|March|Mar|...
229
+
230
+ ## e.g.
231
+ ## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
232
+ ##
233
+ ## check add \b at the beginning and end - why?? why not?? working??
234
+ EN__MONTH_DD__DATE_RE = /\[
235
+ (?<month_name>#{MONTH_EN})
236
+ \s
237
+ (?<day>\d{1,2})
238
+ \]/x
239
+
240
+ def initialize
241
+ super( lang: 'en',
242
+ formats: [[EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]']],
243
+ month_names: MONTH_NAMES
244
+ )
245
+ end
246
+ end ## class RsssfDateParser
153
247
 
154
248
  end # module DateFormats
@@ -18,7 +18,7 @@ DB__DATE_TIME_RE = /\b
18
18
  (?<minutes>\d{2})
19
19
  \b/x
20
20
 
21
- # e.g. 2012-09-14 w/ implied hours (set to 12:00)
21
+ # e.g. 2012-09-14 => YYYY-MM-DD
22
22
  # note: allow 2012-9-3 e.g. no leading zero required
23
23
  # regex_db2
24
24
  DB__DATE_RE = /\b
@@ -61,7 +61,7 @@ DD_MM__DATE_TIME_RE = /\b
61
61
  (?<minutes>\d{2})
62
62
  \b/x
63
63
 
64
- # e.g. 14.09.2012 => DD.MM.YYYY w/ implied hours (set to 12:00)
64
+ # e.g. 14.09.2012 => DD.MM.YYYY
65
65
  # regex_de3
66
66
  DD_MM_YYYY__DATE_RE = /\b
67
67
  (?<day>\d{1,2})
@@ -71,7 +71,7 @@ DD_MM_YYYY__DATE_RE = /\b
71
71
  (?<year>\d{4})
72
72
  \b/x
73
73
 
74
- # e.g. 14.09. => DD.MM. w/ implied year and implied hours (set to 12:00)
74
+ # e.g. 14.09. => DD.MM. w/ implied year
75
75
  # note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
76
76
  # note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
77
77
  # regex_de4 (use lookahead assert)
@@ -89,7 +89,7 @@ DD_MM__DATE_RE = /\b
89
89
  EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
90
90
  (?<day>\d{1,2})
91
91
  \s
92
- (?<month_en>#{MONTH_EN})
92
+ (?<month_name>#{MONTH_EN})
93
93
  \s
94
94
  (?<year>\d{4})
95
95
  \s+
@@ -98,22 +98,16 @@ EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
98
98
  (?<minutes>\d{2})
99
99
  \b/x
100
100
 
101
- ###
102
- # fix: pass in lang (e.g. en or es)
103
- # only process format for lang plus fallback to en?
104
- # e.g. EN__DD_MONTH and ES__DD_MONTH depend on order for match (first listed will match)
105
-
106
- # e.g. 12 May => D|DD.MMM w/ implied year and implied hours
101
+ # e.g. 12 May => D|DD.MMM w/ implied year
107
102
  EN__DD_MONTH__DATE_RE = /\b
108
103
  (?<day>\d{1,2})
109
104
  \s
110
- (?<month_en>#{MONTH_EN})
105
+ (?<month_name>#{MONTH_EN})
111
106
  \b/x
112
107
 
113
-
114
108
  # e.g. Jun/12 2011 14:00
115
109
  EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
116
- (?<month_en>#{MONTH_EN})
110
+ (?<month_name>#{MONTH_EN})
117
111
  \/
118
112
  (?<day>\d{1,2})
119
113
  \s
@@ -126,7 +120,7 @@ EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
126
120
 
127
121
  # e.g. Jun/12 14:00 w/ implied year H|HH:MM
128
122
  EN__MONTH_DD__DATE_TIME_RE = /\b
129
- (?<month_en>#{MONTH_EN})
123
+ (?<month_name>#{MONTH_EN})
130
124
  \/
131
125
  (?<day>\d{1,2})
132
126
  \s+
@@ -135,9 +129,9 @@ EN__MONTH_DD__DATE_TIME_RE = /\b
135
129
  (?<minutes>\d{2})
136
130
  \b/x
137
131
 
138
- # e.g. Jun/12 2013 w/ implied hours (set to 12:00)
132
+ # e.g. Jun/12 2013
139
133
  EN__MONTH_DD_YYYY__DATE_RE = /\b
140
- (?<month_en>#{MONTH_EN})
134
+ (?<month_name>#{MONTH_EN})
141
135
  \/
142
136
  (?<day>\d{1,2})
143
137
  \s
@@ -150,85 +144,66 @@ EN__MONTH_DD_YYYY__DATE_RE = /\b
150
144
  # fix: remove space again for now - and use simple en date reader or something!!!
151
145
  ## was [\/ ] changed back to \/
152
146
  EN__MONTH_DD__DATE_RE = /\b
153
- (?<month_en>#{MONTH_EN})
147
+ (?<month_name>#{MONTH_EN})
154
148
  \/
155
149
  (?<day>\d{1,2})
156
150
  \b/x
157
151
 
158
152
 
159
- # e.g. 12 Ene w/ implied year and implied hours (set to 12:00)
153
+ # e.g. 12 Ene w/ implied year
160
154
  ES__DD_MONTH__DATE_RE = /\b
161
155
  (?<day>\d{1,2})
162
156
  \s
163
- (?<month_es>#{MONTH_ES})
157
+ (?<month_name>#{MONTH_ES})
164
158
  \b/x
165
159
 
166
160
  # e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août]
167
161
  ### note: do NOT consume [] in regex (use lookahead assert)
168
- FR__WEEKDAY_DD_MONTH__DATE_RE = /\b
169
- (?:#{WEEKDAY_FR}) # note: skip weekday for now; do NOT capture
162
+ FR__DAY_DD_MONTH__DATE_RE = /\b
163
+ (?<day_name>#{DAY_FR})
170
164
  \s+
171
165
  (?<day>\d{1,2})
172
166
  \.? # note: make dot optional
173
167
  \s+
174
- (?<month_fr>#{MONTH_FR})
168
+ (?<month_name>#{MONTH_FR})
175
169
  (?=\s+|$|[\]])/x ## note: allow end-of-string/line too
176
170
 
177
171
 
178
172
 
179
- #
180
- # map table - 1) tag, 2) regex - note: order matters; first come-first matched/served
181
- ## todo/fix: remove (move to attic)??? always use lang specific - why? why not?
182
- FORMATS_ALL = [
183
- [ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
184
- [ '[YYYY_MM_DD]', DB__DATE_RE ],
185
- [ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
186
- [ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
187
- [ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
188
- [ '[DD_MM]', DD_MM__DATE_RE ],
189
- [ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
190
- [ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
191
- [ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
192
- [ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
193
- [ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
194
- [ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
195
- [ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
196
- [ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ]
197
- ]
198
-
173
+ #############################################
174
+ # map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
199
175
 
200
176
  FORMATS_BASE = [ ### all numbers (no month names or weekday) - find a better name?
201
- [ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
202
- [ '[YYYY_MM_DD]', DB__DATE_RE ],
203
- [ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
204
- [ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
205
- [ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
206
- [ '[DD_MM]', DD_MM__DATE_RE ],
177
+ [ DB__DATE_TIME_RE, '[YYYY_MM_DD_hh_mm]' ],
178
+ [ DB__DATE_RE, '[YYYY_MM_DD]' ],
179
+ [ DD_MM_YYYY__DATE_TIME_RE, '[DD_MM_YYYY_hh_mm]' ],
180
+ [ DD_MM__DATE_TIME_RE, '[DD_MM_hh_mm]' ],
181
+ [ DD_MM_YYYY__DATE_RE, '[DD_MM_YYYY]' ],
182
+ [ DD_MM__DATE_RE, '[DD_MM]' ],
207
183
  ]
208
184
 
209
185
  FORMATS_EN = [
210
- [ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
211
- [ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
212
- [ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
213
- [ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
214
- [ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
215
- [ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
186
+ [ EN__DD_MONTH_YYYY__DATE_TIME_RE, '[EN_DD_MONTH_YYYY_hh_mm]' ],
187
+ [ EN__MONTH_DD_YYYY__DATE_TIME_RE, '[EN_MONTH_DD_YYYY_hh_mm]' ],
188
+ [ EN__MONTH_DD__DATE_TIME_RE, '[EN_MONTH_DD_hh_mm]' ],
189
+ [ EN__MONTH_DD_YYYY__DATE_RE, '[EN_MONTH_DD_YYYY]' ],
190
+ [ EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]' ],
191
+ [ EN__DD_MONTH__DATE_RE, '[EN_DD_MONTH]' ],
216
192
  ]
217
193
 
218
194
  FORMATS_FR = [
219
- [ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
195
+ [ FR__DAY_DD_MONTH__DATE_RE, '[FR_DAY_DD_MONTH]' ],
220
196
  ]
221
197
 
222
198
  FORMATS_ES = [
223
- [ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ],
199
+ [ ES__DD_MONTH__DATE_RE, '[ES_DD_MONTH]' ],
224
200
  ]
225
201
 
226
202
 
227
203
  FORMATS = {
228
- 'en' => FORMATS_BASE+FORMATS_EN,
229
- 'fr' => FORMATS_BASE+FORMATS_FR,
230
- 'es' => FORMATS_BASE+FORMATS_ES,
204
+ en: FORMATS_BASE+FORMATS_EN,
205
+ fr: FORMATS_BASE+FORMATS_FR,
206
+ es: FORMATS_BASE+FORMATS_ES,
231
207
  }
232
208
 
233
-
234
209
  end # module DateFormats
@@ -34,45 +34,32 @@ class Reader ## todo/check: rename to WordReader or something for easy (re)use
34
34
  end
35
35
  lines
36
36
  end # method parse
37
+ end # class Reader
38
+
37
39
 
38
- def self.parse_month( txt )
39
- lines = parse( txt )
40
- if lines.size != 12
41
- puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
42
- exit 1
43
- end
44
- lines
40
+
41
+ def self.parse_month( txt )
42
+ lines = Reader.parse( txt )
43
+ if lines.size != 12
44
+ puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
45
+ exit 1
45
46
  end
47
+ lines
48
+ end
46
49
 
47
- def self.parse_weekday( txt )
48
- lines = parse( txt )
49
- if lines.size != 7
50
- puts "*** !!! ERROR !!! reading weekday names; got #{lines.size} lines - expected 7"
51
- exit 1
52
- end
53
- lines
50
+ def self.parse_day( txt )
51
+ lines = Reader.parse( txt )
52
+ if lines.size != 7
53
+ puts "*** !!! ERROR !!! reading day names; got #{lines.size} lines - expected 7"
54
+ exit 1
54
55
  end
55
- end # class Reader
56
+ lines
57
+ end
56
58
 
57
59
 
58
- def self.build_re( lines )
60
+ def self.build_names( lines )
59
61
  ## join all words together into a single string e.g.
60
62
  ## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
61
63
  lines.map { |line| line.join('|') }.join('|')
62
64
  end
63
-
64
- def self.build_map( lines )
65
- ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
66
- ## note: index is a string too
67
- ## {"January" => "1", "Jan" => "1",
68
- ## "February" => "2", "Feb" => "2",
69
- ## "March" => "3", "Mar" => "3",
70
- ## "April" => "4", "Apr" => "4",
71
- ## "May" => "5",
72
- ## "June" => "6", "Jun" => "6", ...
73
- lines.each_with_index.reduce( {} ) do |h,(line,i)|
74
- line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
75
- h
76
- end
77
- end
78
65
  end # module DateFormats
@@ -1,11 +1,16 @@
1
1
  module DateFormats
2
- module Source
3
2
 
4
3
  # todo: make more generic for reuse
5
4
  ### fix:
6
5
  ## use date/en.txt or en.txt etc. -- why? why not?
7
6
 
8
- MONTH_EN = <<TXT
7
+ ## note: always sort lines with longest words, abbrevations first!!!!
8
+ ## todo/fix: add/split into MONTH_NAMES and MONTH_ABBREVS (and DAY_NAMES and DAY_ABBREVS) - why? why not?
9
+ MONTH_NAMES = {}
10
+ DAY_NAMES = {}
11
+
12
+
13
+ MONTH_NAMES[:en] = <<TXT
9
14
  January Jan
10
15
  February Feb
11
16
  March Mar
@@ -20,11 +25,12 @@ November Nov
20
25
  December Dec
21
26
  TXT
22
27
 
23
- WEEKDAY_EN = <<TXT
28
+
29
+ DAY_NAMES[:en] = <<TXT
24
30
  Monday Mon
25
- Tuesday Tu Tue Tues
31
+ Tuesday Tues Tue Tu
26
32
  Wednesday Wed
27
- Thursday Th Thu Thur Thurs
33
+ Thursday Thurs Thur Thu Th
28
34
  Friday Fri
29
35
  Saturday Sat
30
36
  Sunday Sun
@@ -32,7 +38,7 @@ TXT
32
38
 
33
39
 
34
40
 
35
- MONTH_FR = <<TXT
41
+ MONTH_NAMES[:fr] = <<TXT
36
42
  Janvier Janv Jan ## check janv in use??
37
43
  Février Févr Fév ## check fevr in use???
38
44
  Mars Mar
@@ -47,7 +53,7 @@ Novembre Nove Nov ## check nove in use??
47
53
  Décembre Déce Déc ## check dece in use??
48
54
  TXT
49
55
 
50
- WEEKDAY_FR = <<TXT
56
+ DAY_NAMES[:fr] = <<TXT
51
57
  Lundi Lun L
52
58
  Mardi Mar Ma
53
59
  Mercredi Mer Me
@@ -59,7 +65,7 @@ TXT
59
65
 
60
66
 
61
67
 
62
- MONTH_ES = <<TXT
68
+ MONTH_NAMES[:es] = <<TXT
63
69
  Enero Ene
64
70
  Febrero Feb
65
71
  Marzo Mar
@@ -74,7 +80,7 @@ Noviembre Nov
74
80
  Diciembre Dic
75
81
  TXT
76
82
 
77
- MONTH_DE = <<TXT
83
+ MONTH_NAMES[:de] = <<TXT
78
84
  Jänner Januar Jan Jän # note: in Austria - Jänner; in Deutschland Januar allow both ??
79
85
  Feber Februar Feb
80
86
  März Mär
@@ -89,7 +95,7 @@ November Nov
89
95
  Dezember Dez
90
96
  TXT
91
97
 
92
- MONTH_IT = <<TXT
98
+ MONTH_NAMES[:it] = <<TXT
93
99
  Gennaio
94
100
  Febbraio
95
101
  Marzo
@@ -104,7 +110,7 @@ Novembre
104
110
  Dicembre
105
111
  TXT
106
112
 
107
- MONTH_PT = <<TXT
113
+ MONTH_NAMES[:pt] = <<TXT
108
114
  Janeiro
109
115
  Fevereiro
110
116
  Março
@@ -119,7 +125,7 @@ Novembro
119
125
  Dezembro
120
126
  TXT
121
127
 
122
- MONTH_RO = <<TXT
128
+ MONTH_NAMES[:ro] = <<TXT
123
129
  Ianuarie
124
130
  Februarie
125
131
  Martie
@@ -134,5 +140,9 @@ Noiembrie
134
140
  Decembrie
135
141
  TXT
136
142
 
137
- end # module Source
143
+ ############################################
144
+ ## convert (unparsed) text to (parsed) lines with words
145
+ MONTH_NAMES.each {|k,v| MONTH_NAMES[k] = parse_month(v) }
146
+ DAY_NAMES.each {|k,v| DAY_NAMES[k] = parse_day(v) }
147
+
138
148
  end # module DateFormats
@@ -3,8 +3,8 @@
3
3
 
4
4
  module DateFormats
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 1
6
+ MINOR = 2
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -15,7 +15,7 @@ class TestDate < MiniTest::Test
15
15
  [ '21.01.2013 21.30', '2013-01-21 21:30', '[DD_MM_YYYY_hh_mm]' ],
16
16
  [ '26.01.2013', '2013-01-26', '[DD_MM_YYYY]' ],
17
17
  [ '[26.01.2013]', '2013-01-26', '[[DD_MM_YYYY]]' ],
18
- [ '[21.1.]', '2013-01-21 00:00', '[[DD_MM]]' ]
18
+ [ '[21.1.]', '2013-01-21', '[[DD_MM]]' ]
19
19
  ]
20
20
 
21
21
  assert_dates( data, start: Date.new( 2013, 1, 1 ) )
@@ -23,15 +23,15 @@ class TestDate < MiniTest::Test
23
23
 
24
24
  def test_date_fr
25
25
  data = [
26
- [ '[Ven 08. Août]', '2014-08-08' ],
27
- [ 'Ven 08. Août', '2014-08-08' ],
28
- [ 'Ven 8. Août', '2014-08-08' ],
29
- [ '[Sam 9. Août]', '2014-08-09' ],
30
- [ '[Dim 10. Août]', '2014-08-10' ],
31
- [ '[Sam 31. Janv]', '2015-01-31' ],
32
- [ '[Sam 7. Févr]', '2015-02-07' ],
33
- [ '[Sam 31. Jan]', '2015-01-31' ],
34
- [ '[Sam 7. Fév]', '2015-02-07' ],
26
+ [ '[Ven 08. Août]', '2014-08-08', '[[FR_DAY_DD_MONTH]]' ],
27
+ [ 'Ven 08. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
28
+ [ 'Ven 8. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
29
+ [ '[Sam 9. Août]', '2014-08-09', '[[FR_DAY_DD_MONTH]]' ],
30
+ [ '[Dim 10. Août]', '2014-08-10', '[[FR_DAY_DD_MONTH]]' ],
31
+ [ '[Sam 31. Janv]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
32
+ [ '[Sam 7. Févr]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
33
+ [ '[Sam 31. Jan]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
34
+ [ '[Sam 7. Fév]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
35
35
  ]
36
36
 
37
37
  assert_dates( data, start: Date.new( 2014, 8, 1 ), lang: 'fr' )
@@ -39,21 +39,21 @@ class TestDate < MiniTest::Test
39
39
 
40
40
  def test_date_en
41
41
  data = [
42
- [ 'Jun/12 2011 14:00', '2011-06-12 14:00' ],
43
- [ 'Oct/12 2013 16:00', '2013-10-12 16:00' ],
42
+ [ 'Jun/12 2011 14:00', '2011-06-12 14:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
43
+ [ 'Oct/12 2013 16:00', '2013-10-12 16:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
44
44
 
45
- [ 'Jan/26 2011', '2011-01-26' ],
46
- [ 'Jan/26 2011', '2011-01-26 00:00' ],
45
+ [ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
46
+ [ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
47
47
 
48
- [ 'Jan/26', '2013-01-26' ],
49
- [ 'Jan/26', '2013-01-26 00:00' ],
50
- [ '26 January', '2013-01-26' ],
51
- [ '26 January', '2013-01-26 00:00' ],
48
+ [ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
49
+ [ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
50
+ [ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
51
+ [ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
52
52
 
53
- [ 'Jun/13', '2013-06-13' ],
54
- [ 'Jun/13', '2013-06-13 00:00' ],
55
- [ '13 June', '2013-06-13' ],
56
- [ '13 June', '2013-06-13 00:00' ]
53
+ [ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
54
+ [ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
55
+ [ '13 June', '2013-06-13', '[EN_DD_MONTH]' ],
56
+ [ '13 June', '2013-06-13', '[EN_DD_MONTH]' ]
57
57
  ]
58
58
 
59
59
  assert_dates( data, start: Date.new( 2013, 1, 1 ), lang: 'en' )
@@ -66,7 +66,11 @@ private
66
66
  data.each do |rec|
67
67
  line = rec[0]
68
68
  str = rec[1]
69
- value = parse_date( line, start: start, lang: lang )
69
+
70
+ ## note: test / use parse and find! -- parse MUST go first
71
+ values = []
72
+ values << DateFormats.parse( line, start: start, lang: lang )
73
+ values << DateFormats.find!( line, start: start, lang: lang )
70
74
 
71
75
  tagged_line = rec[2] ## optinal tagged line
72
76
  if tagged_line ## note: line gets tagged inplace!!! (no new string)
@@ -74,14 +78,17 @@ private
74
78
  puts "#{line} == #{tagged_line}"
75
79
  end
76
80
 
77
- if str.index( ':' )
78
- assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
79
- else
80
- assert_date( DateTime.strptime( str, '%Y-%m-%d' ), value )
81
+ values.each do |value|
82
+ if str.index( ':' )
83
+ assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
84
+ else
85
+ assert_date( Date.strptime( str, '%Y-%m-%d' ), value )
86
+ end
81
87
  end
82
88
  end
83
89
  end
84
90
 
91
+
85
92
  ## todo: check if assert_datetime or assert_date exist already? what is the best practice to check dates ???
86
93
  def assert_date( exp, value )
87
94
  assert_equal exp.year, value.year
@@ -98,12 +105,4 @@ private
98
105
  assert_date( exp, value )
99
106
  assert_time( exp, value )
100
107
  end
101
-
102
-
103
- def parse_date( line, start:, lang: )
104
- # e.g. lets you pass in opts[:start_at] ???
105
- finder = DateFormats::DateFinder.new( lang: lang )
106
- finder.find!( line, start_at: start )
107
- end
108
-
109
108
  end # class TestDate
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: date-formats
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-08 00:00:00.000000000 Z
11
+ date: 2019-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logutils