date-formats 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c60c4e7804fd0a6a4b11e70c35daccfb6db9acbb
4
- data.tar.gz: b78c75c1941eeafd29333b7872fdd62facb93cd1
3
+ metadata.gz: 81aad612afdba8f85e731f97c1bc119499a8211d
4
+ data.tar.gz: e013e9bdac91e1938149d395b946c535b2c75714
5
5
  SHA512:
6
- metadata.gz: d9f1630097d9f0d1ee5e64963b156093969791ba6626c16421dafe46b33b74966a45875a830f5c61a23d8c76afd471b9715d84fa9de52e68c6ef8aefc0b829c6
7
- data.tar.gz: 436c8c910c926d09126f0bd4142d3e1b783f6f3c83d6af18463504470d477f2886be119ff0e03709c892eeafa021ed038b2c84c263b1cd1aafc406357333b1a1
6
+ metadata.gz: 718656e27eab527b211337e3a9fef08030017d2b31f7c59fa89b2292df61275a8b9f1ca247e5f9a5eacad03782d83890cd1a37f5d2d6c5ff541ce0ef67c579bc
7
+ data.tar.gz: c9470e38606e1b909230e8e62187cb1ba2f29f4bd4d99a48fc8125445d80b08e7ac37e37c7d08e463739d7bca767526ab535e0f0016d1f8abc546e16a55cb551
@@ -11,40 +11,26 @@ require 'logutils'
11
11
  ###
12
12
  # our own code
13
13
  require 'date-formats/version' # let version always go first
14
- require 'date-formats/source'
15
14
  require 'date-formats/reader'
15
+ require 'date-formats/source'
16
16
 
17
17
 
18
18
  module DateFormats
19
19
 
20
+ #############
21
+ # helpers for building format regex patterns
22
+ MONTH_EN = build_names( MONTH_NAMES[:en] )
23
+ # e.g. Jan|Feb|March|Mar|April|Apr|May|June|Jun|...
24
+ DAY_EN = build_names( DAY_NAMES[:en] )
25
+ # e.g.
20
26
 
21
- MONTH_EN_LINES = Reader.parse_month( Source::MONTH_EN )
22
- MONTH_EN_TO_MM = build_map( MONTH_EN_LINES )
23
- MONTH_EN = build_re( MONTH_EN_LINES )
24
-
25
- WEEKDAY_EN_LINES = Reader.parse_weekday( Source::WEEKDAY_EN )
26
- WEEKDAY_EN = build_re( WEEKDAY_EN_LINES )
27
-
28
-
29
-
30
- MONTH_FR_LINES = Reader.parse_month( Source::MONTH_FR )
31
- MONTH_FR_TO_MM = build_map( MONTH_FR_LINES )
32
- MONTH_FR = build_re( MONTH_FR_LINES )
33
-
34
- WEEKDAY_FR_LINES = Reader.parse_weekday( Source::WEEKDAY_FR )
35
- WEEKDAY_FR = build_re( WEEKDAY_FR_LINES )
36
-
37
-
38
-
39
- MONTH_ES_LINES = Reader.parse_month( Source::MONTH_ES )
40
- MONTH_ES_TO_MM = build_map( MONTH_ES_LINES )
41
- MONTH_ES = build_re( MONTH_ES_LINES )
42
-
43
-
27
+ MONTH_FR = build_names( MONTH_NAMES[:fr] )
28
+ DAY_FR = build_names( DAY_NAMES[:fr] )
44
29
 
45
- MONTH_DE_LINES = Reader.parse_month( Source::MONTH_DE )
46
- MONTH_DE_TO_MM = build_map( MONTH_DE_LINES )
47
- MONTH_DE = build_re( MONTH_DE_LINES )
30
+ MONTH_ES = build_names( MONTH_NAMES[:es] )
31
+ MONTH_PT = build_names( MONTH_NAMES[:pt] )
32
+ MONTH_DE = build_names( MONTH_NAMES[:de] )
33
+ MONTH_IT = build_names( MONTH_NAMES[:it] )
48
34
 
49
35
  end # module DateFormats
50
36
 
@@ -1,154 +1,248 @@
1
1
  # encoding: utf-8
2
2
 
3
+
3
4
  module DateFormats
4
5
 
5
6
 
6
- class DateFinderBase
7
+ def self.lang
8
+ @@lang ||= :en ## defaults to english (:en)
9
+ end
10
+ def self.lang=( value )
11
+ @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
12
+ end
7
13
 
8
- private
9
- def calc_year( month, day, start: ) ## note: start required param for now on!!!
10
14
 
11
- logger.debug " [calc_year] ????-#{month}-#{day} -- start: #{start}"
15
+ def self.parser( lang: ) ## find parser
16
+ lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
12
17
 
13
- if month >= start.month
14
- # assume same year as start_at event (e.g. 2013 for 2013/14 season)
15
- start.year
16
- else
17
- # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
18
- start.year+1
19
- end
18
+ ## note: cache all "built-in" lang versions (e.g. formats == nil)
19
+ @@parser ||= {}
20
+ parser = @@parser[ lang ] ||= DateParser.new( lang: lang )
20
21
  end
21
22
 
23
+ def self.parse( line,
24
+ lang: self.class.lang,
25
+ start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
26
+ )
27
+ parser( lang: lang ).parse( line, start: start )
28
+ end
29
+
30
+ def self.find!( line,
31
+ lang: self.class.lang,
32
+ start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
33
+ )
34
+ parser( lang: lang ).find!( line, start: start )
35
+ end
22
36
 
23
- def parse_date_time( match_data, start: )
24
37
 
25
- # convert regex match_data captures to hash
26
- # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
27
- h = {}
28
- # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
29
- match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
30
38
 
31
- ## puts "[parse_date_time] match_data:"
32
- ## pp h
33
- logger.debug " [parse_date_time] hash: >#{h.inspect}<"
39
+ class DateParser
34
40
 
35
- h[ :month ] = MONTH_EN_TO_MM[ h[:month_en] ] if h[:month_en]
36
- h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es]
37
- h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr]
38
- h[ :month ] = MONTH_DE_TO_MM[ h[:month_de] ] if h[:month_de]
39
- h[ :month ] = MONTH_IT_TO_MM[ h[:month_it] ] if h[:month_it]
40
- h[ :month ] = MONTH_PT_TO_MM[ h[:month_pt] ] if h[:month_pt]
41
+ include LogUtils::Logging
41
42
 
42
- month = h[:month]
43
- day = h[:day]
44
- year = h[:year] || calc_year( month.to_i, day.to_i, start: start ).to_s
43
+ def initialize( lang:,
44
+ formats: nil, month_names: nil, day_names: nil
45
+ )
46
+ @lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string)
45
47
 
46
- hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
47
- minutes = h[:minutes] || '00'
48
+ if formats
49
+ @formats = formats
50
+ else
51
+ @formats = FORMATS[ @lang ]
52
+ if @formats
53
+ month_names = MONTH_NAMES[ @lang ]
54
+ day_names = DAY_NAMES[ @lang ]
55
+ else
56
+ ## fallback to english if lang not available
57
+ ## todo/fix: add/issue warning!!!!!
58
+ @formats = FORMATS[ :en ]
59
+ month_names = MONTH_NAMES[ :en ]
60
+ day_names = DAY_NAMES[ :en ]
61
+ end
62
+ end
48
63
 
49
- value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
50
- logger.debug " date: >#{value}<"
64
+ ## convert month_names and day_names to map if present
65
+ @month_names = month_names ? build_map( month_names ) : nil
66
+ @day_names = day_names ? build_map( day_names ) : nil
67
+ end
51
68
 
52
- DateTime.strptime( value, '%Y-%m-%d %H:%M' )
53
- end
54
69
 
55
- end # class DateFinderBase
70
+ def parse( line, start: )
71
+ date = nil
72
+ @formats.each do |format|
73
+ re = format[0]
74
+ m = re.match( line )
75
+ if m
76
+ date = parse_matchdata( m, start: start )
77
+ break
78
+ end
79
+ # no match; continue; try next regex pattern
80
+ end
56
81
 
82
+ ## todo/fix - raise ArgumentError - invalid date; no format match found
83
+ date # note: nil if no match found
84
+ end
57
85
 
58
86
 
59
- class DateFinder < DateFinderBase
87
+ def find!( line, start: )
88
+ # fix: use more lookahead for all required trailing spaces!!!!!
89
+ # fix: use <name capturing group> for month,day,year etc.!!!
90
+
91
+ #
92
+ # fix: !!!!
93
+ # date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
94
+ # fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
95
+ #
96
+
97
+ date = nil
98
+ @formats.each do |format|
99
+ re = format[0]
100
+ tag = format[1]
101
+ m = re.match( line )
102
+ if m
103
+ date = parse_matchdata( m, start: start )
104
+ ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
105
+ ## fix: use md.begin(0), md.end(0)
106
+ line.sub!( m[0], tag )
107
+ ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
108
+ break
109
+ end
110
+ # no match; continue; try next regex pattern
111
+ end
112
+ date # note: nil if no match found
113
+ end
60
114
 
61
- include LogUtils::Logging
115
+ private
116
+ def calc_year( month, day, start: )
62
117
 
118
+ logger.debug " [calc_year] ????-#{month}-#{day} -- start: #{start}"
63
119
 
64
- def self.lang() @@lang ||= 'en'; end ## defaults to english (en)
65
- def self.lang=(value) @@lang = value; end
120
+ if month >= start.month
121
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
122
+ start.year
123
+ else
124
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
125
+ start.year+1
126
+ end
127
+ end
66
128
 
67
129
 
68
- def initialize( lang: self.class.lang )
69
- @lang = lang.to_s
70
- ## fallback to english if lang not available
71
- ## todo/fix: add/issue warning!!!!!
72
- @formats = FORMATS[ @lang ] || FORMATS['en']
73
- end
130
+ def parse_matchdata( m, start: )
131
+ # convert regex match_data captures to hash
132
+ # - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
133
+ h = {}
134
+ # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
135
+ m.names.each { |name| h[name.to_sym] = m[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
136
+
137
+ ## puts "[parse_date_time] match_data:"
138
+ ## pp h
139
+ logger.debug " [parse_matchdata] hash: >#{h.inspect}<"
140
+
141
+ if h[:month_name]
142
+ ## todo/fix: issue error if no month names defined !!!
143
+ if @month_names
144
+ h[ :month ] = @month_names[ h[:month_name] ]
145
+ else
146
+ ## todo/fix: change to ArgumentError( "invalid date; ")
147
+ puts "** !!! ERROR !!! - no month names defined for lang #{@lang}; cannot match:"
148
+ pp m
149
+ exit 1
150
+ end
151
+ end
74
152
 
75
- def find!( line, start_at: ) ## todo/fix: change start_at to start only!!!
76
- # fix: use more lookahead for all required trailing spaces!!!!!
77
- # fix: use <name capturing group> for month,day,year etc.!!!
78
-
79
- #
80
- # fix: !!!!
81
- # date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
82
- # fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
83
- #
84
-
85
- m = nil
86
- @formats.each do |format|
87
- tag = format[0]
88
- pattern = format[1]
89
- m=pattern.match( line )
90
- if m
91
- date = parse_date_time( m, start: start_at )
92
- ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
93
- ## fix: use md.begin(0), md.end(0)
94
- line.sub!( m[0], tag )
95
- ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
96
- return date
153
+ if h[:day_name]
154
+ if @day_names
155
+ ## note: use cwday in ruby date to get days from 1-7 (Monday (1) to Sunday (7))
156
+ ## wday gives you 0-6 (Sunday (0), Monday (1) to Saturday (6))
157
+ h[ :cwday ] = @day_names[ h[:day_name ] ]
158
+ else
159
+ ## todo/fix: change to ArgumentError( "invalid date; ")
160
+ puts "** !!! ERROR !!! - no day names defined for lang #{@lang}; cannot match:"
161
+ pp m
162
+ exit 1
163
+ end
97
164
  end
98
- # no match; continue; try next pattern
99
- end
100
165
 
101
- return nil # no match found
102
- end
166
+ month = h[:month]
167
+ day = h[:day]
168
+ year = h[:year] || calc_year( month.to_i, day.to_i, start: start ).to_s
169
+
170
+ if h[:hours] || h[:minutes] ## check time (hours or minutes) is present (otherwise asume just Date and NOT DateTime)
171
+ hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
172
+ minutes = h[:minutes] || '00'
103
173
 
104
- end # class DateFinder
105
-
106
-
107
-
108
- class RsssfDateFinder < DateFinderBase
109
-
110
- include LogUtils::Logging
111
-
112
- MONTH_EN = 'Jan|'+
113
- 'Feb|'+
114
- 'March|Mar|'+
115
- 'April|Apr|'+
116
- 'May|'+
117
- 'June|Jun|'+
118
- 'July|Jul|'+
119
- 'Aug|'+
120
- 'Sept|Sep|'+
121
- 'Oct|'+
122
- 'Nov|'+
123
- 'Dec'
124
-
125
- ## e.g.
126
- ## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
127
- ##
128
- ## check add \b at the beginning and end - why?? why not?? working??
129
- EN__MONTH_DD__DATE_RE = /\[
130
- (?<month_en>#{MONTH_EN})
131
- \s
132
- (?<day>\d{1,2})
133
- \]/x
134
-
135
- def find!( line, start_at: ) ## todo/fix: change start_at to start only!!!
136
- # fix: use more lookahead for all required trailing spaces!!!!!
137
- # fix: use <name capturing group> for month,day,year etc.!!!
138
-
139
- tag = '[EN_MONTH_DD]'
140
- pattern = EN__MONTH_DD__DATE_RE
141
- m = pattern.match( line )
142
- if m
143
- date = parse_date_time( m, start: start_at )
144
- ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
145
- ## fix: use md.begin(0), md.end(0)
146
- line.sub!( m[0], tag )
147
- ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
148
- return date
174
+ value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
175
+ logger.debug " datetime: >#{value}<"
176
+
177
+ date = DateTime.strptime( value, '%Y-%m-%d %H:%M' )
178
+ else
179
+ value = '%d-%02d-%02d' % [year.to_i, month.to_i, day.to_i]
180
+ logger.debug " date: >#{value}<"
181
+
182
+ date = Date.strptime( value, '%Y-%m-%d' )
183
+ end
184
+
185
+ ## check/assert cwday if present!!!!
186
+ date
187
+ end # method parse
188
+
189
+ ##################
190
+ # helpers
191
+ private
192
+ def build_map( lines )
193
+ ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
194
+ ## note: index is a string too
195
+ ## {"January" => "1", "Jan" => "1",
196
+ ## "February" => "2", "Feb" => "2",
197
+ ## "March" => "3", "Mar" => "3",
198
+ ## "April" => "4", "Apr" => "4",
199
+ ## "May" => "5",
200
+ ## "June" => "6", "Jun" => "6", ...
201
+ lines.each_with_index.reduce( {} ) do |h,(line,i)|
202
+ line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
203
+ h
204
+ end
149
205
  end
150
- return nil # no match found
151
- end
152
- end ## class RsssfDateFinder
206
+ end # class DateParser
207
+
208
+
209
+
210
+
211
+ class RsssfDateParser < DateParser
212
+
213
+ MONTH_NAMES = DateFormats.parse_month( <<TXT )
214
+ Jan
215
+ Feb
216
+ March Mar
217
+ April Apr
218
+ May
219
+ June Jun
220
+ July Jul
221
+ Aug
222
+ Sept Sep
223
+ Oct
224
+ Nov
225
+ Dec
226
+ TXT
227
+
228
+ MONTH_EN = DateFormats.build_names( MONTH_NAMES ) ## re helper e.g. Jan|Feb|March|Mar|...
229
+
230
+ ## e.g.
231
+ ## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
232
+ ##
233
+ ## check add \b at the beginning and end - why?? why not?? working??
234
+ EN__MONTH_DD__DATE_RE = /\[
235
+ (?<month_name>#{MONTH_EN})
236
+ \s
237
+ (?<day>\d{1,2})
238
+ \]/x
239
+
240
+ def initialize
241
+ super( lang: 'en',
242
+ formats: [[EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]']],
243
+ month_names: MONTH_NAMES
244
+ )
245
+ end
246
+ end ## class RsssfDateParser
153
247
 
154
248
  end # module DateFormats
@@ -18,7 +18,7 @@ DB__DATE_TIME_RE = /\b
18
18
  (?<minutes>\d{2})
19
19
  \b/x
20
20
 
21
- # e.g. 2012-09-14 w/ implied hours (set to 12:00)
21
+ # e.g. 2012-09-14 => YYYY-MM-DD
22
22
  # note: allow 2012-9-3 e.g. no leading zero required
23
23
  # regex_db2
24
24
  DB__DATE_RE = /\b
@@ -61,7 +61,7 @@ DD_MM__DATE_TIME_RE = /\b
61
61
  (?<minutes>\d{2})
62
62
  \b/x
63
63
 
64
- # e.g. 14.09.2012 => DD.MM.YYYY w/ implied hours (set to 12:00)
64
+ # e.g. 14.09.2012 => DD.MM.YYYY
65
65
  # regex_de3
66
66
  DD_MM_YYYY__DATE_RE = /\b
67
67
  (?<day>\d{1,2})
@@ -71,7 +71,7 @@ DD_MM_YYYY__DATE_RE = /\b
71
71
  (?<year>\d{4})
72
72
  \b/x
73
73
 
74
- # e.g. 14.09. => DD.MM. w/ implied year and implied hours (set to 12:00)
74
+ # e.g. 14.09. => DD.MM. w/ implied year
75
75
  # note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
76
76
  # note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
77
77
  # regex_de4 (use lookahead assert)
@@ -89,7 +89,7 @@ DD_MM__DATE_RE = /\b
89
89
  EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
90
90
  (?<day>\d{1,2})
91
91
  \s
92
- (?<month_en>#{MONTH_EN})
92
+ (?<month_name>#{MONTH_EN})
93
93
  \s
94
94
  (?<year>\d{4})
95
95
  \s+
@@ -98,22 +98,16 @@ EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
98
98
  (?<minutes>\d{2})
99
99
  \b/x
100
100
 
101
- ###
102
- # fix: pass in lang (e.g. en or es)
103
- # only process format for lang plus fallback to en?
104
- # e.g. EN__DD_MONTH and ES__DD_MONTH depend on order for match (first listed will match)
105
-
106
- # e.g. 12 May => D|DD.MMM w/ implied year and implied hours
101
+ # e.g. 12 May => D|DD.MMM w/ implied year
107
102
  EN__DD_MONTH__DATE_RE = /\b
108
103
  (?<day>\d{1,2})
109
104
  \s
110
- (?<month_en>#{MONTH_EN})
105
+ (?<month_name>#{MONTH_EN})
111
106
  \b/x
112
107
 
113
-
114
108
  # e.g. Jun/12 2011 14:00
115
109
  EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
116
- (?<month_en>#{MONTH_EN})
110
+ (?<month_name>#{MONTH_EN})
117
111
  \/
118
112
  (?<day>\d{1,2})
119
113
  \s
@@ -126,7 +120,7 @@ EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
126
120
 
127
121
  # e.g. Jun/12 14:00 w/ implied year H|HH:MM
128
122
  EN__MONTH_DD__DATE_TIME_RE = /\b
129
- (?<month_en>#{MONTH_EN})
123
+ (?<month_name>#{MONTH_EN})
130
124
  \/
131
125
  (?<day>\d{1,2})
132
126
  \s+
@@ -135,9 +129,9 @@ EN__MONTH_DD__DATE_TIME_RE = /\b
135
129
  (?<minutes>\d{2})
136
130
  \b/x
137
131
 
138
- # e.g. Jun/12 2013 w/ implied hours (set to 12:00)
132
+ # e.g. Jun/12 2013
139
133
  EN__MONTH_DD_YYYY__DATE_RE = /\b
140
- (?<month_en>#{MONTH_EN})
134
+ (?<month_name>#{MONTH_EN})
141
135
  \/
142
136
  (?<day>\d{1,2})
143
137
  \s
@@ -150,85 +144,66 @@ EN__MONTH_DD_YYYY__DATE_RE = /\b
150
144
  # fix: remove space again for now - and use simple en date reader or something!!!
151
145
  ## was [\/ ] changed back to \/
152
146
  EN__MONTH_DD__DATE_RE = /\b
153
- (?<month_en>#{MONTH_EN})
147
+ (?<month_name>#{MONTH_EN})
154
148
  \/
155
149
  (?<day>\d{1,2})
156
150
  \b/x
157
151
 
158
152
 
159
- # e.g. 12 Ene w/ implied year and implied hours (set to 12:00)
153
+ # e.g. 12 Ene w/ implied year
160
154
  ES__DD_MONTH__DATE_RE = /\b
161
155
  (?<day>\d{1,2})
162
156
  \s
163
- (?<month_es>#{MONTH_ES})
157
+ (?<month_name>#{MONTH_ES})
164
158
  \b/x
165
159
 
166
160
  # e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août]
167
161
  ### note: do NOT consume [] in regex (use lookahead assert)
168
- FR__WEEKDAY_DD_MONTH__DATE_RE = /\b
169
- (?:#{WEEKDAY_FR}) # note: skip weekday for now; do NOT capture
162
+ FR__DAY_DD_MONTH__DATE_RE = /\b
163
+ (?<day_name>#{DAY_FR})
170
164
  \s+
171
165
  (?<day>\d{1,2})
172
166
  \.? # note: make dot optional
173
167
  \s+
174
- (?<month_fr>#{MONTH_FR})
168
+ (?<month_name>#{MONTH_FR})
175
169
  (?=\s+|$|[\]])/x ## note: allow end-of-string/line too
176
170
 
177
171
 
178
172
 
179
- #
180
- # map table - 1) tag, 2) regex - note: order matters; first come-first matched/served
181
- ## todo/fix: remove (move to attic)??? always use lang specific - why? why not?
182
- FORMATS_ALL = [
183
- [ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
184
- [ '[YYYY_MM_DD]', DB__DATE_RE ],
185
- [ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
186
- [ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
187
- [ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
188
- [ '[DD_MM]', DD_MM__DATE_RE ],
189
- [ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
190
- [ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
191
- [ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
192
- [ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
193
- [ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
194
- [ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
195
- [ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
196
- [ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ]
197
- ]
198
-
173
+ #############################################
174
+ # map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
199
175
 
200
176
  FORMATS_BASE = [ ### all numbers (no month names or weekday) - find a better name?
201
- [ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
202
- [ '[YYYY_MM_DD]', DB__DATE_RE ],
203
- [ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
204
- [ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
205
- [ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
206
- [ '[DD_MM]', DD_MM__DATE_RE ],
177
+ [ DB__DATE_TIME_RE, '[YYYY_MM_DD_hh_mm]' ],
178
+ [ DB__DATE_RE, '[YYYY_MM_DD]' ],
179
+ [ DD_MM_YYYY__DATE_TIME_RE, '[DD_MM_YYYY_hh_mm]' ],
180
+ [ DD_MM__DATE_TIME_RE, '[DD_MM_hh_mm]' ],
181
+ [ DD_MM_YYYY__DATE_RE, '[DD_MM_YYYY]' ],
182
+ [ DD_MM__DATE_RE, '[DD_MM]' ],
207
183
  ]
208
184
 
209
185
  FORMATS_EN = [
210
- [ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
211
- [ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
212
- [ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
213
- [ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
214
- [ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
215
- [ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
186
+ [ EN__DD_MONTH_YYYY__DATE_TIME_RE, '[EN_DD_MONTH_YYYY_hh_mm]' ],
187
+ [ EN__MONTH_DD_YYYY__DATE_TIME_RE, '[EN_MONTH_DD_YYYY_hh_mm]' ],
188
+ [ EN__MONTH_DD__DATE_TIME_RE, '[EN_MONTH_DD_hh_mm]' ],
189
+ [ EN__MONTH_DD_YYYY__DATE_RE, '[EN_MONTH_DD_YYYY]' ],
190
+ [ EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]' ],
191
+ [ EN__DD_MONTH__DATE_RE, '[EN_DD_MONTH]' ],
216
192
  ]
217
193
 
218
194
  FORMATS_FR = [
219
- [ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
195
+ [ FR__DAY_DD_MONTH__DATE_RE, '[FR_DAY_DD_MONTH]' ],
220
196
  ]
221
197
 
222
198
  FORMATS_ES = [
223
- [ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ],
199
+ [ ES__DD_MONTH__DATE_RE, '[ES_DD_MONTH]' ],
224
200
  ]
225
201
 
226
202
 
227
203
  FORMATS = {
228
- 'en' => FORMATS_BASE+FORMATS_EN,
229
- 'fr' => FORMATS_BASE+FORMATS_FR,
230
- 'es' => FORMATS_BASE+FORMATS_ES,
204
+ en: FORMATS_BASE+FORMATS_EN,
205
+ fr: FORMATS_BASE+FORMATS_FR,
206
+ es: FORMATS_BASE+FORMATS_ES,
231
207
  }
232
208
 
233
-
234
209
  end # module DateFormats
@@ -34,45 +34,32 @@ class Reader ## todo/check: rename to WordReader or something for easy (re)use
34
34
  end
35
35
  lines
36
36
  end # method parse
37
+ end # class Reader
38
+
37
39
 
38
- def self.parse_month( txt )
39
- lines = parse( txt )
40
- if lines.size != 12
41
- puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
42
- exit 1
43
- end
44
- lines
40
+
41
+ def self.parse_month( txt )
42
+ lines = Reader.parse( txt )
43
+ if lines.size != 12
44
+ puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
45
+ exit 1
45
46
  end
47
+ lines
48
+ end
46
49
 
47
- def self.parse_weekday( txt )
48
- lines = parse( txt )
49
- if lines.size != 7
50
- puts "*** !!! ERROR !!! reading weekday names; got #{lines.size} lines - expected 7"
51
- exit 1
52
- end
53
- lines
50
+ def self.parse_day( txt )
51
+ lines = Reader.parse( txt )
52
+ if lines.size != 7
53
+ puts "*** !!! ERROR !!! reading day names; got #{lines.size} lines - expected 7"
54
+ exit 1
54
55
  end
55
- end # class Reader
56
+ lines
57
+ end
56
58
 
57
59
 
58
- def self.build_re( lines )
60
+ def self.build_names( lines )
59
61
  ## join all words together into a single string e.g.
60
62
  ## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
61
63
  lines.map { |line| line.join('|') }.join('|')
62
64
  end
63
-
64
- def self.build_map( lines )
65
- ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
66
- ## note: index is a string too
67
- ## {"January" => "1", "Jan" => "1",
68
- ## "February" => "2", "Feb" => "2",
69
- ## "March" => "3", "Mar" => "3",
70
- ## "April" => "4", "Apr" => "4",
71
- ## "May" => "5",
72
- ## "June" => "6", "Jun" => "6", ...
73
- lines.each_with_index.reduce( {} ) do |h,(line,i)|
74
- line.each { |name| h[ name ] = (i+1).to_s } ## note: start mapping with 1 (and NOT zero-based, that is, 0)
75
- h
76
- end
77
- end
78
65
  end # module DateFormats
@@ -1,11 +1,16 @@
1
1
  module DateFormats
2
- module Source
3
2
 
4
3
  # todo: make more generic for reuse
5
4
  ### fix:
6
5
  ## use date/en.txt or en.txt etc. -- why? why not?
7
6
 
8
- MONTH_EN = <<TXT
7
+ ## note: always sort lines with longest words, abbrevations first!!!!
8
+ ## todo/fix: add/split into MONTH_NAMES and MONTH_ABBREVS (and DAY_NAMES and DAY_ABBREVS) - why? why not?
9
+ MONTH_NAMES = {}
10
+ DAY_NAMES = {}
11
+
12
+
13
+ MONTH_NAMES[:en] = <<TXT
9
14
  January Jan
10
15
  February Feb
11
16
  March Mar
@@ -20,11 +25,12 @@ November Nov
20
25
  December Dec
21
26
  TXT
22
27
 
23
- WEEKDAY_EN = <<TXT
28
+
29
+ DAY_NAMES[:en] = <<TXT
24
30
  Monday Mon
25
- Tuesday Tu Tue Tues
31
+ Tuesday Tues Tue Tu
26
32
  Wednesday Wed
27
- Thursday Th Thu Thur Thurs
33
+ Thursday Thurs Thur Thu Th
28
34
  Friday Fri
29
35
  Saturday Sat
30
36
  Sunday Sun
@@ -32,7 +38,7 @@ TXT
32
38
 
33
39
 
34
40
 
35
- MONTH_FR = <<TXT
41
+ MONTH_NAMES[:fr] = <<TXT
36
42
  Janvier Janv Jan ## check janv in use??
37
43
  Février Févr Fév ## check fevr in use???
38
44
  Mars Mar
@@ -47,7 +53,7 @@ Novembre Nove Nov ## check nove in use??
47
53
  Décembre Déce Déc ## check dece in use??
48
54
  TXT
49
55
 
50
- WEEKDAY_FR = <<TXT
56
+ DAY_NAMES[:fr] = <<TXT
51
57
  Lundi Lun L
52
58
  Mardi Mar Ma
53
59
  Mercredi Mer Me
@@ -59,7 +65,7 @@ TXT
59
65
 
60
66
 
61
67
 
62
- MONTH_ES = <<TXT
68
+ MONTH_NAMES[:es] = <<TXT
63
69
  Enero Ene
64
70
  Febrero Feb
65
71
  Marzo Mar
@@ -74,7 +80,7 @@ Noviembre Nov
74
80
  Diciembre Dic
75
81
  TXT
76
82
 
77
- MONTH_DE = <<TXT
83
+ MONTH_NAMES[:de] = <<TXT
78
84
  Jänner Januar Jan Jän # note: in Austria - Jänner; in Deutschland Januar allow both ??
79
85
  Feber Februar Feb
80
86
  März Mär
@@ -89,7 +95,7 @@ November Nov
89
95
  Dezember Dez
90
96
  TXT
91
97
 
92
- MONTH_IT = <<TXT
98
+ MONTH_NAMES[:it] = <<TXT
93
99
  Gennaio
94
100
  Febbraio
95
101
  Marzo
@@ -104,7 +110,7 @@ Novembre
104
110
  Dicembre
105
111
  TXT
106
112
 
107
- MONTH_PT = <<TXT
113
+ MONTH_NAMES[:pt] = <<TXT
108
114
  Janeiro
109
115
  Fevereiro
110
116
  Março
@@ -119,7 +125,7 @@ Novembro
119
125
  Dezembro
120
126
  TXT
121
127
 
122
- MONTH_RO = <<TXT
128
+ MONTH_NAMES[:ro] = <<TXT
123
129
  Ianuarie
124
130
  Februarie
125
131
  Martie
@@ -134,5 +140,9 @@ Noiembrie
134
140
  Decembrie
135
141
  TXT
136
142
 
137
- end # module Source
143
+ ############################################
144
+ ## convert (unparsed) text to (parsed) lines with words
145
+ MONTH_NAMES.each {|k,v| MONTH_NAMES[k] = parse_month(v) }
146
+ DAY_NAMES.each {|k,v| DAY_NAMES[k] = parse_day(v) }
147
+
138
148
  end # module DateFormats
@@ -3,8 +3,8 @@
3
3
 
4
4
  module DateFormats
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 1
7
- PATCH = 1
6
+ MINOR = 2
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -15,7 +15,7 @@ class TestDate < MiniTest::Test
15
15
  [ '21.01.2013 21.30', '2013-01-21 21:30', '[DD_MM_YYYY_hh_mm]' ],
16
16
  [ '26.01.2013', '2013-01-26', '[DD_MM_YYYY]' ],
17
17
  [ '[26.01.2013]', '2013-01-26', '[[DD_MM_YYYY]]' ],
18
- [ '[21.1.]', '2013-01-21 00:00', '[[DD_MM]]' ]
18
+ [ '[21.1.]', '2013-01-21', '[[DD_MM]]' ]
19
19
  ]
20
20
 
21
21
  assert_dates( data, start: Date.new( 2013, 1, 1 ) )
@@ -23,15 +23,15 @@ class TestDate < MiniTest::Test
23
23
 
24
24
  def test_date_fr
25
25
  data = [
26
- [ '[Ven 08. Août]', '2014-08-08' ],
27
- [ 'Ven 08. Août', '2014-08-08' ],
28
- [ 'Ven 8. Août', '2014-08-08' ],
29
- [ '[Sam 9. Août]', '2014-08-09' ],
30
- [ '[Dim 10. Août]', '2014-08-10' ],
31
- [ '[Sam 31. Janv]', '2015-01-31' ],
32
- [ '[Sam 7. Févr]', '2015-02-07' ],
33
- [ '[Sam 31. Jan]', '2015-01-31' ],
34
- [ '[Sam 7. Fév]', '2015-02-07' ],
26
+ [ '[Ven 08. Août]', '2014-08-08', '[[FR_DAY_DD_MONTH]]' ],
27
+ [ 'Ven 08. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
28
+ [ 'Ven 8. Août', '2014-08-08', '[FR_DAY_DD_MONTH]' ],
29
+ [ '[Sam 9. Août]', '2014-08-09', '[[FR_DAY_DD_MONTH]]' ],
30
+ [ '[Dim 10. Août]', '2014-08-10', '[[FR_DAY_DD_MONTH]]' ],
31
+ [ '[Sam 31. Janv]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
32
+ [ '[Sam 7. Févr]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
33
+ [ '[Sam 31. Jan]', '2015-01-31', '[[FR_DAY_DD_MONTH]]' ],
34
+ [ '[Sam 7. Fév]', '2015-02-07', '[[FR_DAY_DD_MONTH]]' ],
35
35
  ]
36
36
 
37
37
  assert_dates( data, start: Date.new( 2014, 8, 1 ), lang: 'fr' )
@@ -39,21 +39,21 @@ class TestDate < MiniTest::Test
39
39
 
40
40
  def test_date_en
41
41
  data = [
42
- [ 'Jun/12 2011 14:00', '2011-06-12 14:00' ],
43
- [ 'Oct/12 2013 16:00', '2013-10-12 16:00' ],
42
+ [ 'Jun/12 2011 14:00', '2011-06-12 14:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
43
+ [ 'Oct/12 2013 16:00', '2013-10-12 16:00', '[EN_MONTH_DD_YYYY_hh_mm]' ],
44
44
 
45
- [ 'Jan/26 2011', '2011-01-26' ],
46
- [ 'Jan/26 2011', '2011-01-26 00:00' ],
45
+ [ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
46
+ [ 'Jan/26 2011', '2011-01-26', '[EN_MONTH_DD_YYYY]' ],
47
47
 
48
- [ 'Jan/26', '2013-01-26' ],
49
- [ 'Jan/26', '2013-01-26 00:00' ],
50
- [ '26 January', '2013-01-26' ],
51
- [ '26 January', '2013-01-26 00:00' ],
48
+ [ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
49
+ [ 'Jan/26', '2013-01-26', '[EN_MONTH_DD]' ],
50
+ [ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
51
+ [ '26 January', '2013-01-26', '[EN_DD_MONTH]' ],
52
52
 
53
- [ 'Jun/13', '2013-06-13' ],
54
- [ 'Jun/13', '2013-06-13 00:00' ],
55
- [ '13 June', '2013-06-13' ],
56
- [ '13 June', '2013-06-13 00:00' ]
53
+ [ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
54
+ [ 'Jun/13', '2013-06-13', '[EN_MONTH_DD]' ],
55
+ [ '13 June', '2013-06-13', '[EN_DD_MONTH]' ],
56
+ [ '13 June', '2013-06-13', '[EN_DD_MONTH]' ]
57
57
  ]
58
58
 
59
59
  assert_dates( data, start: Date.new( 2013, 1, 1 ), lang: 'en' )
@@ -66,7 +66,11 @@ private
66
66
  data.each do |rec|
67
67
  line = rec[0]
68
68
  str = rec[1]
69
- value = parse_date( line, start: start, lang: lang )
69
+
70
+ ## note: test / use parse and find! -- parse MUST go first
71
+ values = []
72
+ values << DateFormats.parse( line, start: start, lang: lang )
73
+ values << DateFormats.find!( line, start: start, lang: lang )
70
74
 
71
75
  tagged_line = rec[2] ## optinal tagged line
72
76
  if tagged_line ## note: line gets tagged inplace!!! (no new string)
@@ -74,14 +78,17 @@ private
74
78
  puts "#{line} == #{tagged_line}"
75
79
  end
76
80
 
77
- if str.index( ':' )
78
- assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
79
- else
80
- assert_date( DateTime.strptime( str, '%Y-%m-%d' ), value )
81
+ values.each do |value|
82
+ if str.index( ':' )
83
+ assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
84
+ else
85
+ assert_date( Date.strptime( str, '%Y-%m-%d' ), value )
86
+ end
81
87
  end
82
88
  end
83
89
  end
84
90
 
91
+
85
92
  ## todo: check if assert_datetime or assert_date exist already? what is the best practice to check dates ???
86
93
  def assert_date( exp, value )
87
94
  assert_equal exp.year, value.year
@@ -98,12 +105,4 @@ private
98
105
  assert_date( exp, value )
99
106
  assert_time( exp, value )
100
107
  end
101
-
102
-
103
- def parse_date( line, start:, lang: )
104
- # e.g. lets you pass in opts[:start_at] ???
105
- finder = DateFormats::DateFinder.new( lang: lang )
106
- finder.find!( line, start_at: start )
107
- end
108
-
109
108
  end # class TestDate
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: date-formats
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-08 00:00:00.000000000 Z
11
+ date: 2019-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: logutils