date-formats 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/Rakefile +4 -3
- data/lib/date-formats.rb +120 -2
- data/lib/date-formats/date.rb +150 -0
- data/lib/date-formats/formats.rb +231 -1
- data/lib/date-formats/version.rb +2 -2
- data/test/test_date.rb +102 -0
- metadata +18 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d66ff71b3025105325d5411ef4ea1e1787cb6c5c
|
4
|
+
data.tar.gz: e3a61033de8bd61b13cf5dbb3482b6a2fec4217e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6cfec2ec58486a7dde4af6a55aafd4a6609017ef58f97388f29f4c1c7f83716d835b85d49cc822d041bde5c449b7aca7865e48fa6fddb3007f970ce02ce460d3
|
7
|
+
data.tar.gz: 5cef395ab1d13386395b13c7621bd7bfdb4981c44fcbd7c09cc126c086c230115dbae202f85ed550cb9d0b196f6646f6d484c28141dc3cd37fc44e333050baec
|
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
@@ -19,10 +19,11 @@ Hoe.spec 'date-formats' do
|
|
19
19
|
|
20
20
|
self.licenses = ['Public Domain']
|
21
21
|
|
22
|
-
self.extra_deps = [
|
22
|
+
self.extra_deps = [
|
23
|
+
['logutils', '>= 0.6.1'],
|
24
|
+
]
|
23
25
|
|
24
26
|
self.spec_extras = {
|
25
|
-
|
27
|
+
required_ruby_version: '>= 2.2.2'
|
26
28
|
}
|
27
|
-
|
28
29
|
end
|
data/lib/date-formats.rb
CHANGED
@@ -1,15 +1,133 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
+
require 'pp'
|
3
4
|
require 'time'
|
4
5
|
require 'date'
|
5
|
-
|
6
|
+
|
7
|
+
## 3rd party libs/gems
|
8
|
+
require 'logutils'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
module DateFormats
|
13
|
+
|
14
|
+
MONTH_EN_TO_MM = {
|
15
|
+
'Jan' => '1', 'January' => '1',
|
16
|
+
'Feb' => '2', 'February' => '2',
|
17
|
+
'Mar' => '3', 'March' => '3',
|
18
|
+
'Apr' => '4', 'April' => '4',
|
19
|
+
'May' => '5',
|
20
|
+
'Jun' => '6', 'June' => '6',
|
21
|
+
'Jul' => '7', 'July' => '7',
|
22
|
+
'Aug' => '8', 'August' => '8',
|
23
|
+
'Sep' => '9', 'Sept' => '9', 'September' => '9',
|
24
|
+
'Oct' => '10', 'October' => '10',
|
25
|
+
'Nov' => '11', 'November' => '11',
|
26
|
+
'Dec' => '12', 'December' =>'12' }
|
27
|
+
|
28
|
+
MONTH_EN = 'January|Jan|'+
|
29
|
+
'February|Feb|'+
|
30
|
+
'March|Mar|'+
|
31
|
+
'April|Apr|'+
|
32
|
+
'May|'+
|
33
|
+
'June|Jun|'+
|
34
|
+
'July|Jul|'+
|
35
|
+
'August|Aug|'+
|
36
|
+
'September|Sept|Sep|'+
|
37
|
+
'October|Oct|'+
|
38
|
+
'November|Nov|'+
|
39
|
+
'December|Dec'
|
40
|
+
|
41
|
+
###
|
42
|
+
## todo: add days
|
43
|
+
## 1. Sunday - Sun. 2. Monday - Mon.
|
44
|
+
## 3. Tuesday - Tu., Tue., or Tues. 4. Wednesday - Wed.
|
45
|
+
## 5. Thursday - Th., Thu., Thur., or Thurs. 6. Friday - Fri.
|
46
|
+
## 7. Saturday - Sat.
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
MONTH_FR_TO_MM = {
|
51
|
+
'Janvier' => '1', 'Janv' => '1', 'Jan' => '1', ## check janv in use??
|
52
|
+
'Février' => '2', 'Févr' => '2', 'Fév' => '2', ## check fevr in use???
|
53
|
+
'Mars' => '3', 'Mar' => '3',
|
54
|
+
'Avril' => '4', 'Avri' => '4', 'Avr' => '4', ## check avri in use??? if not remove
|
55
|
+
'Mai' => '5',
|
56
|
+
'Juin' => '6',
|
57
|
+
'Juillet' => '7', 'Juil' => '7',
|
58
|
+
'Août' => '8',
|
59
|
+
'Septembre' => '9', 'Sept' => '9',
|
60
|
+
'Octobre' => '10', 'Octo' => '10', 'Oct' => '10', ### check octo in use??
|
61
|
+
'Novembre' => '11', 'Nove' => '11', 'Nov' => '11', ## check nove in use??
|
62
|
+
'Décembre' => '12', 'Déce' => '12', 'Déc' => '12' } ## check dece in use??
|
63
|
+
|
64
|
+
MONTH_FR = 'Janvier|Janv|Jan|' +
|
65
|
+
'Février|Févr|Fév|' +
|
66
|
+
'Mars|Mar|' +
|
67
|
+
'Avril|Avri|Avr|' +
|
68
|
+
'Mai|' +
|
69
|
+
'Juin|' +
|
70
|
+
'Juillet|Juil|' +
|
71
|
+
'Août|' +
|
72
|
+
'Septembre|Sept|' +
|
73
|
+
'Octobre|Octo|Oct|' +
|
74
|
+
'Novembre|Nove|Nov|' +
|
75
|
+
'Décembre|Déce|Déc'
|
76
|
+
|
77
|
+
WEEKDAY_FR = 'Lundi|Lun|L|' +
|
78
|
+
'Mardi|Mar|Ma|' +
|
79
|
+
'Mercredi|Mer|Me|' +
|
80
|
+
'Jeudi|Jeu|J|' +
|
81
|
+
'Vendredi|Ven|V|' +
|
82
|
+
'Samedi|Sam|S|' +
|
83
|
+
'Dimanche|Dim|D|'
|
84
|
+
|
85
|
+
|
86
|
+
MONTH_ES_TO_MM = {
|
87
|
+
'Ene' => '1', 'Enero' => '1',
|
88
|
+
'Feb' => '2',
|
89
|
+
'Mar' => '3', 'Marzo' => '3',
|
90
|
+
'Abr' => '4', 'Abril' => '4',
|
91
|
+
'May' => '5', 'Mayo' => '5',
|
92
|
+
'Jun' => '6', 'Junio' => '6',
|
93
|
+
'Jul' => '7', 'Julio' => '7',
|
94
|
+
'Ago' => '8', 'Agosto' => '8',
|
95
|
+
'Sep' => '9', 'Set' => '9', 'Sept' => '9',
|
96
|
+
'Oct' => '10',
|
97
|
+
'Nov' => '11',
|
98
|
+
'Dic' => '12' }
|
99
|
+
|
100
|
+
MONTH_ES = 'Enero|Ene|'+
|
101
|
+
'Feb|'+
|
102
|
+
'Marzo|Mar|'+
|
103
|
+
'Abril|Abr|'+
|
104
|
+
'Mayo|May|'+
|
105
|
+
'Junio|Jun|'+
|
106
|
+
'Julio|Jul|'+
|
107
|
+
'Agosto|Ago|'+
|
108
|
+
'Sept|Set|Sep|'+
|
109
|
+
'Oct|'+
|
110
|
+
'Nov|'+
|
111
|
+
'Dic'
|
112
|
+
|
113
|
+
|
114
|
+
# todo: make more generic for reuse
|
115
|
+
### fix:
|
116
|
+
## use date/fr.yml en.yml etc. -- why? why not?
|
117
|
+
|
118
|
+
# todo/fix - add de and es too!!
|
119
|
+
# note: in Austria - Jänner - in Deutschland Januar allow both ??
|
120
|
+
# MONTH_DE = 'J[aä]n|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez'
|
121
|
+
|
122
|
+
end # module DateFormats
|
123
|
+
|
6
124
|
|
7
125
|
|
8
126
|
###
|
9
127
|
# our own code
|
10
128
|
require 'date-formats/version' # let version always go first
|
11
129
|
require 'date-formats/formats'
|
12
|
-
|
130
|
+
require 'date-formats/date'
|
13
131
|
|
14
132
|
|
15
133
|
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module DateFormats
|
4
|
+
|
5
|
+
|
6
|
+
class DateFinderBase
|
7
|
+
|
8
|
+
private
|
9
|
+
def calc_year( month, day, start_at: ) ## note: start_at required param for now on!!!
|
10
|
+
|
11
|
+
logger.debug " [calc_year] ????-#{month}-#{day} -- start_at: #{start_at}"
|
12
|
+
|
13
|
+
if month >= start_at.month
|
14
|
+
# assume same year as start_at event (e.g. 2013 for 2013/14 season)
|
15
|
+
start_at.year
|
16
|
+
else
|
17
|
+
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
18
|
+
start_at.year+1
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
def parse_date_time( match_data, start_at: )
|
24
|
+
|
25
|
+
# convert regex match_data captures to hash
|
26
|
+
# - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
|
27
|
+
h = {}
|
28
|
+
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
29
|
+
match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
30
|
+
|
31
|
+
## puts "[parse_date_time] match_data:"
|
32
|
+
## pp h
|
33
|
+
logger.debug " [parse_date_time] hash: >#{h.inspect}<"
|
34
|
+
|
35
|
+
h[ :month ] = MONTH_EN_TO_MM[ h[:month_en] ] if h[:month_en]
|
36
|
+
h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es]
|
37
|
+
h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr]
|
38
|
+
|
39
|
+
month = h[:month]
|
40
|
+
day = h[:day]
|
41
|
+
year = h[:year] || calc_year( month.to_i, day.to_i, start_at: start_at ).to_s
|
42
|
+
|
43
|
+
hours = h[:hours] || '00' # default to 00:00 for HH:MM (hours:minutes)
|
44
|
+
minutes = h[:minutes] || '00'
|
45
|
+
|
46
|
+
value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
|
47
|
+
logger.debug " date: >#{value}<"
|
48
|
+
|
49
|
+
DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
50
|
+
end
|
51
|
+
|
52
|
+
end # class DateFinderBase
|
53
|
+
|
54
|
+
|
55
|
+
class DateFinder < DateFinderBase
|
56
|
+
|
57
|
+
include LogUtils::Logging
|
58
|
+
|
59
|
+
|
60
|
+
def self.lang() @@lang ||= 'en'; end ## defaults to english (en)
|
61
|
+
def self.lang=(value) @@lang = value; end
|
62
|
+
|
63
|
+
|
64
|
+
def initialize( lang: self.class.lang )
|
65
|
+
@lang = lang.to_s
|
66
|
+
## fallback to english if lang not available
|
67
|
+
## todo/fix: add/issue warning!!!!!
|
68
|
+
@formats = FORMATS[ @lang ] || FORMATS['en']
|
69
|
+
end
|
70
|
+
|
71
|
+
def find!( line, start_at: )
|
72
|
+
# fix: use more lookahead for all required trailing spaces!!!!!
|
73
|
+
# fix: use <name capturing group> for month,day,year etc.!!!
|
74
|
+
|
75
|
+
#
|
76
|
+
# fix: !!!!
|
77
|
+
# date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
|
78
|
+
# fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
|
79
|
+
#
|
80
|
+
|
81
|
+
m = nil
|
82
|
+
@formats.each do |format|
|
83
|
+
tag = format[0]
|
84
|
+
pattern = format[1]
|
85
|
+
m=pattern.match( line )
|
86
|
+
if m
|
87
|
+
date = parse_date_time( m, start_at: start_at )
|
88
|
+
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
89
|
+
## fix: use md.begin(0), md.end(0)
|
90
|
+
line.sub!( m[0], tag )
|
91
|
+
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
92
|
+
return date
|
93
|
+
end
|
94
|
+
# no match; continue; try next pattern
|
95
|
+
end
|
96
|
+
|
97
|
+
return nil # no match found
|
98
|
+
end
|
99
|
+
|
100
|
+
end # class DateFinder
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
class RsssfDateFinder < DateFinderBase
|
105
|
+
|
106
|
+
include LogUtils::Logging
|
107
|
+
|
108
|
+
MONTH_EN = 'Jan|'+
|
109
|
+
'Feb|'+
|
110
|
+
'March|Mar|'+
|
111
|
+
'April|Apr|'+
|
112
|
+
'May|'+
|
113
|
+
'June|Jun|'+
|
114
|
+
'July|Jul|'+
|
115
|
+
'Aug|'+
|
116
|
+
'Sept|Sep|'+
|
117
|
+
'Oct|'+
|
118
|
+
'Nov|'+
|
119
|
+
'Dec'
|
120
|
+
|
121
|
+
## e.g.
|
122
|
+
## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
|
123
|
+
##
|
124
|
+
## check add \b at the beginning and end - why?? why not?? working??
|
125
|
+
EN__MONTH_DD__DATE_RE = /\[
|
126
|
+
(?<month_en>#{MONTH_EN})
|
127
|
+
\s
|
128
|
+
(?<day>\d{1,2})
|
129
|
+
\]/x
|
130
|
+
|
131
|
+
def find!( line, start_at: )
|
132
|
+
# fix: use more lookahead for all required trailing spaces!!!!!
|
133
|
+
# fix: use <name capturing group> for month,day,year etc.!!!
|
134
|
+
|
135
|
+
tag = '[EN_MONTH_DD]'
|
136
|
+
pattern = EN__MONTH_DD__DATE_RE
|
137
|
+
m = pattern.match( line )
|
138
|
+
if m
|
139
|
+
date = parse_date_time( m, start_at: start_at )
|
140
|
+
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
141
|
+
## fix: use md.begin(0), md.end(0)
|
142
|
+
line.sub!( m[0], tag )
|
143
|
+
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
144
|
+
return date
|
145
|
+
end
|
146
|
+
return nil # no match found
|
147
|
+
end
|
148
|
+
end ## class RsssfDateFinder
|
149
|
+
|
150
|
+
end # module DateFormats
|
data/lib/date-formats/formats.rb
CHANGED
@@ -1,4 +1,234 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
|
4
|
-
|
4
|
+
module DateFormats
|
5
|
+
|
6
|
+
# e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM
|
7
|
+
# note: allow 2012-9-3 7:30 e.g. no leading zero required
|
8
|
+
# regex_db
|
9
|
+
DB__DATE_TIME_RE = /\b
|
10
|
+
(?<year>\d{4})
|
11
|
+
-
|
12
|
+
(?<month>\d{1,2})
|
13
|
+
-
|
14
|
+
(?<day>\d{1,2})
|
15
|
+
\s+
|
16
|
+
(?<hours>\d{1,2})
|
17
|
+
:
|
18
|
+
(?<minutes>\d{2})
|
19
|
+
\b/x
|
20
|
+
|
21
|
+
# e.g. 2012-09-14 w/ implied hours (set to 12:00)
|
22
|
+
# note: allow 2012-9-3 e.g. no leading zero required
|
23
|
+
# regex_db2
|
24
|
+
DB__DATE_RE = /\b
|
25
|
+
(?<year>\d{4})
|
26
|
+
-
|
27
|
+
(?<month>\d{1,2})
|
28
|
+
-
|
29
|
+
(?<day>\d{1,2})
|
30
|
+
\b/x
|
31
|
+
|
32
|
+
|
33
|
+
# e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM
|
34
|
+
# note: allow 2.3.2012 e.g. no leading zero required
|
35
|
+
# note: allow hour as 20.30
|
36
|
+
# regex_de
|
37
|
+
DD_MM_YYYY__DATE_TIME_RE = /\b
|
38
|
+
(?<day>\d{1,2})
|
39
|
+
\.
|
40
|
+
(?<month>\d{1,2})
|
41
|
+
\.
|
42
|
+
(?<year>\d{4})
|
43
|
+
\s+
|
44
|
+
(?<hours>\d{1,2})
|
45
|
+
[:.]
|
46
|
+
(?<minutes>\d{2})
|
47
|
+
\b/x
|
48
|
+
|
49
|
+
# e.g. 14.09. 20:30 => DD.MM. HH:MM
|
50
|
+
# note: allow 2.3.2012 e.g. no leading zero required
|
51
|
+
# note: allow hour as 20.30 or 3.30 instead of 03.30
|
52
|
+
# regex_de2
|
53
|
+
DD_MM__DATE_TIME_RE = /\b
|
54
|
+
(?<day>\d{1,2})
|
55
|
+
\.
|
56
|
+
(?<month>\d{1,2})
|
57
|
+
\.
|
58
|
+
\s+
|
59
|
+
(?<hours>\d{1,2})
|
60
|
+
[:.]
|
61
|
+
(?<minutes>\d{2})
|
62
|
+
\b/x
|
63
|
+
|
64
|
+
# e.g. 14.09.2012 => DD.MM.YYYY w/ implied hours (set to 12:00)
|
65
|
+
# regex_de3
|
66
|
+
DD_MM_YYYY__DATE_RE = /\b
|
67
|
+
(?<day>\d{1,2})
|
68
|
+
\.
|
69
|
+
(?<month>\d{1,2})
|
70
|
+
\.
|
71
|
+
(?<year>\d{4})
|
72
|
+
\b/x
|
73
|
+
|
74
|
+
# e.g. 14.09. => DD.MM. w/ implied year and implied hours (set to 12:00)
|
75
|
+
# note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
|
76
|
+
# note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
|
77
|
+
# regex_de4 (use lookahead assert)
|
78
|
+
DD_MM__DATE_RE = /\b
|
79
|
+
(?<day>\d{1,2})
|
80
|
+
\.
|
81
|
+
(?<month>\d{1,2})
|
82
|
+
\.
|
83
|
+
(?=\s+|$|[\]])/x ## note: allow end-of-string/line too
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
##
|
88
|
+
# e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM
|
89
|
+
EN__DD_MONTH_YYYY__DATE_TIME_RE = /\b
|
90
|
+
(?<day>\d{1,2})
|
91
|
+
\s
|
92
|
+
(?<month_en>#{MONTH_EN})
|
93
|
+
\s
|
94
|
+
(?<year>\d{4})
|
95
|
+
\s+
|
96
|
+
(?<hours>\d{1,2})
|
97
|
+
:
|
98
|
+
(?<minutes>\d{2})
|
99
|
+
\b/x
|
100
|
+
|
101
|
+
###
|
102
|
+
# fix: pass in lang (e.g. en or es)
|
103
|
+
# only process format for lang plus fallback to en?
|
104
|
+
# e.g. EN__DD_MONTH and ES__DD_MONTH depend on order for match (first listed will match)
|
105
|
+
|
106
|
+
# e.g. 12 May => D|DD.MMM w/ implied year and implied hours
|
107
|
+
EN__DD_MONTH__DATE_RE = /\b
|
108
|
+
(?<day>\d{1,2})
|
109
|
+
\s
|
110
|
+
(?<month_en>#{MONTH_EN})
|
111
|
+
\b/x
|
112
|
+
|
113
|
+
|
114
|
+
# e.g. Jun/12 2011 14:00
|
115
|
+
EN__MONTH_DD_YYYY__DATE_TIME_RE = /\b
|
116
|
+
(?<month_en>#{MONTH_EN})
|
117
|
+
\/
|
118
|
+
(?<day>\d{1,2})
|
119
|
+
\s
|
120
|
+
(?<year>\d{4})
|
121
|
+
\s+
|
122
|
+
(?<hours>\d{1,2})
|
123
|
+
:
|
124
|
+
(?<minutes>\d{2})
|
125
|
+
\b/x
|
126
|
+
|
127
|
+
# e.g. Jun/12 14:00 w/ implied year H|HH:MM
|
128
|
+
EN__MONTH_DD__DATE_TIME_RE = /\b
|
129
|
+
(?<month_en>#{MONTH_EN})
|
130
|
+
\/
|
131
|
+
(?<day>\d{1,2})
|
132
|
+
\s+
|
133
|
+
(?<hours>\d{1,2})
|
134
|
+
:
|
135
|
+
(?<minutes>\d{2})
|
136
|
+
\b/x
|
137
|
+
|
138
|
+
# e.g. Jun/12 2013 w/ implied hours (set to 12:00)
|
139
|
+
EN__MONTH_DD_YYYY__DATE_RE = /\b
|
140
|
+
(?<month_en>#{MONTH_EN})
|
141
|
+
\/
|
142
|
+
(?<day>\d{1,2})
|
143
|
+
\s
|
144
|
+
(?<year>\d{4})
|
145
|
+
\b/x
|
146
|
+
|
147
|
+
# e.g. Jun/12 w/ implied year and implied hours (set to 12:00)
|
148
|
+
# note: allow space too e.g Jun 12 -- check if conflicts w/ other formats??? (added for rsssf reader)
|
149
|
+
# -- fix: might eat french weekday mar 12 is mardi (mar) !!! see FR__ pattern
|
150
|
+
# fix: remove space again for now - and use simple en date reader or something!!!
|
151
|
+
## was [\/ ] changed back to \/
|
152
|
+
EN__MONTH_DD__DATE_RE = /\b
|
153
|
+
(?<month_en>#{MONTH_EN})
|
154
|
+
\/
|
155
|
+
(?<day>\d{1,2})
|
156
|
+
\b/x
|
157
|
+
|
158
|
+
|
159
|
+
# e.g. 12 Ene w/ implied year and implied hours (set to 12:00)
|
160
|
+
ES__DD_MONTH__DATE_RE = /\b
|
161
|
+
(?<day>\d{1,2})
|
162
|
+
\s
|
163
|
+
(?<month_es>#{MONTH_ES})
|
164
|
+
\b/x
|
165
|
+
|
166
|
+
# e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août]
|
167
|
+
### note: do NOT consume [] in regex (use lookahead assert)
|
168
|
+
FR__WEEKDAY_DD_MONTH__DATE_RE = /\b
|
169
|
+
(?:#{WEEKDAY_FR}) # note: skip weekday for now; do NOT capture
|
170
|
+
\s+
|
171
|
+
(?<day>\d{1,2})
|
172
|
+
\.? # note: make dot optional
|
173
|
+
\s+
|
174
|
+
(?<month_fr>#{MONTH_FR})
|
175
|
+
(?=\s+|$|[\]])/x ## note: allow end-of-string/line too
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
#
|
180
|
+
# map table - 1) tag, 2) regex - note: order matters; first come-first matched/served
|
181
|
+
## todo/fix: remove (move to attic)??? always use lang specific - why? why not?
|
182
|
+
FORMATS_ALL = [
|
183
|
+
[ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
|
184
|
+
[ '[YYYY_MM_DD]', DB__DATE_RE ],
|
185
|
+
[ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
|
186
|
+
[ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
|
187
|
+
[ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
|
188
|
+
[ '[DD_MM]', DD_MM__DATE_RE ],
|
189
|
+
[ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
|
190
|
+
[ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
|
191
|
+
[ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
|
192
|
+
[ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
|
193
|
+
[ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
|
194
|
+
[ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
|
195
|
+
[ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
|
196
|
+
[ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ]
|
197
|
+
]
|
198
|
+
|
199
|
+
|
200
|
+
FORMATS_BASE = [ ### all numbers (no month names or weekday) - find a better name?
|
201
|
+
[ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_RE ],
|
202
|
+
[ '[YYYY_MM_DD]', DB__DATE_RE ],
|
203
|
+
[ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_RE ],
|
204
|
+
[ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_RE ],
|
205
|
+
[ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_RE ],
|
206
|
+
[ '[DD_MM]', DD_MM__DATE_RE ],
|
207
|
+
]
|
208
|
+
|
209
|
+
FORMATS_EN = [
|
210
|
+
[ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_RE ],
|
211
|
+
[ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_RE ],
|
212
|
+
[ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_RE ],
|
213
|
+
[ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_RE ],
|
214
|
+
[ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_RE ],
|
215
|
+
[ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_RE ],
|
216
|
+
]
|
217
|
+
|
218
|
+
FORMATS_FR = [
|
219
|
+
[ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_RE ],
|
220
|
+
]
|
221
|
+
|
222
|
+
FORMATS_ES = [
|
223
|
+
[ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_RE ],
|
224
|
+
]
|
225
|
+
|
226
|
+
|
227
|
+
FORMATS = {
|
228
|
+
'en' => FORMATS_BASE+FORMATS_EN,
|
229
|
+
'fr' => FORMATS_BASE+FORMATS_FR,
|
230
|
+
'es' => FORMATS_BASE+FORMATS_ES,
|
231
|
+
}
|
232
|
+
|
233
|
+
|
234
|
+
end # module DateFormats
|
data/lib/date-formats/version.rb
CHANGED
data/test/test_date.rb
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_date.rb
|
6
|
+
|
7
|
+
require 'helper'
|
8
|
+
|
9
|
+
class TestDate < MiniTest::Test
|
10
|
+
|
11
|
+
def test_date
|
12
|
+
data = [
|
13
|
+
[ '19.01.2013 22.00', '2013-01-19 22:00' ],
|
14
|
+
[ '21.01.2013 21.30', '2013-01-21 21:30' ],
|
15
|
+
[ '26.01.2013', '2013-01-26' ],
|
16
|
+
[ '[26.01.2013]', '2013-01-26' ],
|
17
|
+
[ '[21.1.]', '2013-01-21 00:00' ]
|
18
|
+
]
|
19
|
+
|
20
|
+
assert_dates( data, start_at: Date.new( 2013, 1, 1 ) )
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_date_fr
|
24
|
+
data = [
|
25
|
+
[ '[Ven 08. Août]', '2014-08-08' ],
|
26
|
+
[ 'Ven 08. Août', '2014-08-08' ],
|
27
|
+
[ 'Ven 8. Août', '2014-08-08' ],
|
28
|
+
[ '[Sam 9. Août]', '2014-08-09' ],
|
29
|
+
[ '[Dim 10. Août]', '2014-08-10' ],
|
30
|
+
[ '[Sam 31. Janv]', '2015-01-31' ],
|
31
|
+
[ '[Sam 7. Févr]', '2015-02-07' ],
|
32
|
+
[ '[Sam 31. Jan]', '2015-01-31' ],
|
33
|
+
[ '[Sam 7. Fév]', '2015-02-07' ],
|
34
|
+
]
|
35
|
+
|
36
|
+
assert_dates( data, start_at: Date.new( 2014, 8, 1 ), lang: 'fr' )
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_date_en
|
40
|
+
data = [
|
41
|
+
[ 'Jun/12 2011 14:00', '2011-06-12 14:00' ],
|
42
|
+
[ 'Oct/12 2013 16:00', '2013-10-12 16:00' ],
|
43
|
+
|
44
|
+
[ 'Jan/26 2011', '2011-01-26' ],
|
45
|
+
[ 'Jan/26 2011', '2011-01-26 00:00' ],
|
46
|
+
|
47
|
+
[ 'Jan/26', '2013-01-26' ],
|
48
|
+
[ 'Jan/26', '2013-01-26 00:00' ],
|
49
|
+
[ '26 January', '2013-01-26' ],
|
50
|
+
[ '26 January', '2013-01-26 00:00' ],
|
51
|
+
|
52
|
+
[ 'Jun/13', '2013-06-13' ],
|
53
|
+
[ 'Jun/13', '2013-06-13 00:00' ],
|
54
|
+
[ '13 June', '2013-06-13' ],
|
55
|
+
[ '13 June', '2013-06-13 00:00' ]
|
56
|
+
]
|
57
|
+
|
58
|
+
assert_dates( data, start_at: Date.new( 2013, 1, 1 ), lang: 'en' )
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
private
|
64
|
+
def assert_dates( data, start_at:, lang: 'en' )
|
65
|
+
data.each do |rec|
|
66
|
+
line = rec[0]
|
67
|
+
str = rec[1]
|
68
|
+
value = parse_date( line, start_at: start_at, lang: lang )
|
69
|
+
|
70
|
+
if str.index( ':' )
|
71
|
+
assert_datetime( DateTime.strptime( str, '%Y-%m-%d %H:%M' ), value )
|
72
|
+
else
|
73
|
+
assert_date( DateTime.strptime( str, '%Y-%m-%d' ), value )
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
## todo: check if assert_datetime or assert_date exist already? what is the best practice to check dates ???
|
79
|
+
def assert_date( exp, value )
|
80
|
+
assert_equal exp.year, value.year
|
81
|
+
assert_equal exp.month, value.month
|
82
|
+
assert_equal exp.day, value.day
|
83
|
+
end
|
84
|
+
|
85
|
+
def assert_time( exp, value )
|
86
|
+
assert_equal exp.hour, value.hour
|
87
|
+
assert_equal exp.min, value.min
|
88
|
+
end
|
89
|
+
|
90
|
+
def assert_datetime( exp, value )
|
91
|
+
assert_date( exp, value )
|
92
|
+
assert_time( exp, value )
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
def parse_date( line, start_at:, lang: )
|
97
|
+
# e.g. lets you pass in opts[:start_at] ???
|
98
|
+
finder = DateFormats::DateFinder.new( lang: lang )
|
99
|
+
finder.find!( line, start_at: start_at )
|
100
|
+
end
|
101
|
+
|
102
|
+
end # class TestDate
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: date-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logutils
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.6.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.1
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rdoc
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -53,9 +67,11 @@ files:
|
|
53
67
|
- README.md
|
54
68
|
- Rakefile
|
55
69
|
- lib/date-formats.rb
|
70
|
+
- lib/date-formats/date.rb
|
56
71
|
- lib/date-formats/formats.rb
|
57
72
|
- lib/date-formats/version.rb
|
58
73
|
- test/helper.rb
|
74
|
+
- test/test_date.rb
|
59
75
|
- test/test_formats.rb
|
60
76
|
homepage: https://github.com/sportdb/sport.db
|
61
77
|
licenses:
|