sportdb-models 1.18.0 → 1.18.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +0 -0
- data/Manifest.txt +1 -16
- data/Rakefile +6 -7
- data/lib/sportdb/models.rb +8 -30
- data/lib/sportdb/version.rb +1 -1
- data/test/test_assoc_reader.rb +1 -3
- data/test/test_changes.rb +9 -5
- data/test/test_cursor.rb +6 -1
- data/test/test_event_meta_reader.rb +0 -3
- data/test/test_event_reader.rb +2 -6
- data/test/test_event_table_reader.rb +1 -5
- metadata +24 -25
- data/config/fixtures/de.yml +0 -46
- data/config/fixtures/en.yml +0 -54
- data/config/fixtures/es.yml +0 -48
- data/config/fixtures/fr.yml +0 -53
- data/config/fixtures/it.yml +0 -55
- data/config/fixtures/pt.yml +0 -46
- data/config/fixtures/ro.yml +0 -55
- data/data/seasons.txt +0 -74
- data/data/setups/all.txt +0 -5
- data/lib/sportdb/csv_reader.rb +0 -241
- data/lib/sportdb/finders/date.rb +0 -446
- data/lib/sportdb/lang.rb +0 -216
- data/test/test_csv_reader.rb +0 -45
- data/test/test_date.rb +0 -100
- data/test/test_lang.rb +0 -130
data/lib/sportdb/csv_reader.rb
DELETED
@@ -1,241 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
|
6
|
-
|
7
|
-
class CsvGameReader
|
8
|
-
|
9
|
-
include LogUtils::Logging
|
10
|
-
|
11
|
-
## make models available by default with namespace
|
12
|
-
# e.g. lets you use Usage instead of Model::Usage
|
13
|
-
include Models
|
14
|
-
|
15
|
-
##
|
16
|
-
## todo: add from_file and from_zip too
|
17
|
-
|
18
|
-
def self.from_string( event_key, text )
|
19
|
-
### fix - fix -fix:
|
20
|
-
## change event to event_or_event_key !!!!! - allow event_key as string passed in
|
21
|
-
self.new( event_key, text )
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize( event_key, text )
|
25
|
-
### fix - fix -fix:
|
26
|
-
## change event to event_or_event_key !!!!! - allow event_key as string passed in
|
27
|
-
|
28
|
-
## todo/fix: how to add opts={} ???
|
29
|
-
@event_key = event_key
|
30
|
-
@text = text
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
def read
|
35
|
-
## note: assume active activerecord connection
|
36
|
-
@event = Event.find_by!( key: @event_key )
|
37
|
-
|
38
|
-
logger.debug "Event #{@event.key} >#{@event.title}<"
|
39
|
-
|
40
|
-
@team_mapper = TextUtils::TitleMapper.new( @event.teams, 'team' )
|
41
|
-
|
42
|
-
## reset cached values
|
43
|
-
@patch_round_ids = []
|
44
|
-
|
45
|
-
@last_round = nil ## remove last round ?? - always required - why? why not?
|
46
|
-
@last_date = nil ## remove last date ?? - always required - why? why not?
|
47
|
-
|
48
|
-
parse_fixtures
|
49
|
-
end # method load_fixtures
|
50
|
-
|
51
|
-
|
52
|
-
def handle_round( round_pos_str )
|
53
|
-
|
54
|
-
round_pos = round_pos_str.to_i
|
55
|
-
|
56
|
-
round_attribs = { }
|
57
|
-
|
58
|
-
round = Round.find_by( event_id: @event.id,
|
59
|
-
pos: round_pos )
|
60
|
-
|
61
|
-
if round.present?
|
62
|
-
logger.debug "update round #{round.id}:"
|
63
|
-
else
|
64
|
-
logger.debug "create round:"
|
65
|
-
round = Round.new
|
66
|
-
|
67
|
-
round_attribs = round_attribs.merge( {
|
68
|
-
event_id: @event.id,
|
69
|
-
pos: round_pos,
|
70
|
-
title: "Round #{round_pos}",
|
71
|
-
title2: nil,
|
72
|
-
knockout: false,
|
73
|
-
start_at: Date.parse('1911-11-11'),
|
74
|
-
end_at: Date.parse('1911-11-11')
|
75
|
-
})
|
76
|
-
end
|
77
|
-
|
78
|
-
logger.debug round_attribs.to_json
|
79
|
-
|
80
|
-
round.update_attributes!( round_attribs )
|
81
|
-
|
82
|
-
### store list of round ids for patching start_at/end_at at the end
|
83
|
-
@patch_round_ids << round.id
|
84
|
-
@last_round = round ## keep track of last seen round for matches that follow etc.
|
85
|
-
end
|
86
|
-
|
87
|
-
def handle_game( date_str, team1_str, team2_str, ft_str, ht_str )
|
88
|
-
|
89
|
-
## todo/fix: make more "efficient"
|
90
|
-
## - e.g. add new support method for mapping single team/reference - why? why not??
|
91
|
-
line = "#{team1_str} - #{team2_str}"
|
92
|
-
@team_mapper.map_titles!( line )
|
93
|
-
team1_key = @team_mapper.find_key!( line )
|
94
|
-
team2_key = @team_mapper.find_key!( line )
|
95
|
-
|
96
|
-
## note: if we do NOT find two teams; return false - no match found
|
97
|
-
if team1_key.nil? || team2_key.nil?
|
98
|
-
fail " no game match (two teams required) found for line: >#{line}<"
|
99
|
-
end
|
100
|
-
|
101
|
-
if date_str
|
102
|
-
date = DateTime.strptime( date_str, '%Y-%m-%d' ) ## (always) use DateTime - why? why not??
|
103
|
-
@last_date = date # keep a reference for later use
|
104
|
-
else
|
105
|
-
date = @last_date # no date found; (re)use last seen date
|
106
|
-
end
|
107
|
-
|
108
|
-
##
|
109
|
-
## todo: support for awarded, abadoned, a.e.t, pen. etc. - why?? why not??
|
110
|
-
##
|
111
|
-
|
112
|
-
if ht_str ## half time scores
|
113
|
-
scoresi_str = ht_str.gsub(/ /, '').split('-') ## note: remove all (inline) spaces first
|
114
|
-
score1i = scoresi_str[0].to_i
|
115
|
-
score2i = scoresi_str[1].to_i
|
116
|
-
else
|
117
|
-
score1i = nil
|
118
|
-
score2i = nil
|
119
|
-
end
|
120
|
-
|
121
|
-
if ft_str ## full time scores
|
122
|
-
scores_str = ft_str.gsub(/ /, '').split('-') ## note: remove all (inline) spaces first
|
123
|
-
score1 = scores_str[0].to_i
|
124
|
-
score2 = scores_str[1].to_i
|
125
|
-
else
|
126
|
-
score1 = nil
|
127
|
-
score2 = nil
|
128
|
-
end
|
129
|
-
|
130
|
-
### todo: cache team lookups in hash? - why? why not??
|
131
|
-
team1 = Team.find_by!( key: team1_key )
|
132
|
-
team2 = Team.find_by!( key: team2_key )
|
133
|
-
|
134
|
-
round = @last_round
|
135
|
-
|
136
|
-
### check if games exists
|
137
|
-
## with this teams in this round if yes only update
|
138
|
-
game = Game.find_by( round_id: round.id,
|
139
|
-
team1_id: team1.id,
|
140
|
-
team2_id: team2.id )
|
141
|
-
|
142
|
-
game_attribs = {
|
143
|
-
score1: score1,
|
144
|
-
score2: score2,
|
145
|
-
score1i: score1i,
|
146
|
-
score2i: score2i,
|
147
|
-
play_at: date,
|
148
|
-
play_at_v2: nil,
|
149
|
-
postponed: false,
|
150
|
-
knockout: false, ## round.knockout, ## note: for now always use knockout flag from round - why? why not??
|
151
|
-
ground_id: nil,
|
152
|
-
group_id: nil
|
153
|
-
}
|
154
|
-
|
155
|
-
if game.present?
|
156
|
-
logger.debug "update game #{game.id}:"
|
157
|
-
else
|
158
|
-
logger.debug "create game:"
|
159
|
-
game = Game.new
|
160
|
-
|
161
|
-
## Note: use round.games.count for pos
|
162
|
-
## lets us add games out of order if later needed
|
163
|
-
more_game_attribs = {
|
164
|
-
round_id: round.id,
|
165
|
-
team1_id: team1.id,
|
166
|
-
team2_id: team2.id,
|
167
|
-
pos: round.games.count+1
|
168
|
-
}
|
169
|
-
game_attribs = game_attribs.merge( more_game_attribs )
|
170
|
-
end
|
171
|
-
|
172
|
-
logger.debug game_attribs.to_json
|
173
|
-
game.update_attributes!( game_attribs )
|
174
|
-
|
175
|
-
end # method handle_game
|
176
|
-
|
177
|
-
|
178
|
-
def parse_fixtures
|
179
|
-
|
180
|
-
CSV.parse( @text, headers: true ) do |row|
|
181
|
-
puts row.inspect
|
182
|
-
|
183
|
-
pp round = row['Round']
|
184
|
-
pp date = row['Date']
|
185
|
-
pp team1 = row['Team 1']
|
186
|
-
pp team2 = row['Team 2']
|
187
|
-
pp ft = row['FT']
|
188
|
-
pp ht = row['HT']
|
189
|
-
|
190
|
-
## find round by pos
|
191
|
-
if round
|
192
|
-
handle_round( round )
|
193
|
-
handle_game( date, team1, team2, ft, ht )
|
194
|
-
else
|
195
|
-
fail "round required for import; sorry"
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
###########################
|
200
|
-
# backtrack and patch round dates (start_at/end_at)
|
201
|
-
|
202
|
-
unless @patch_round_ids.empty?
|
203
|
-
###
|
204
|
-
# note: use uniq - to allow multiple round headers (possible?)
|
205
|
-
|
206
|
-
Round.find( @patch_round_ids.uniq ).each do |r|
|
207
|
-
logger.debug "patch round start_at/end_at date for #{r.title}:"
|
208
|
-
|
209
|
-
## note:
|
210
|
-
## will add "scope" pos first e.g
|
211
|
-
#
|
212
|
-
## SELECT "games".* FROM "games" WHERE "games"."round_id" = ?
|
213
|
-
# ORDER BY pos, play_at asc [["round_id", 7]]
|
214
|
-
# thus will NOT order by play_at but by pos first!!!
|
215
|
-
# =>
|
216
|
-
# need to unscope pos!!! or use unordered_games - games_by_play_at_date etc.??
|
217
|
-
# thus use reorder()!!! - not just order('play_at asc')
|
218
|
-
|
219
|
-
games = r.games.reorder( 'play_at asc' ).all
|
220
|
-
|
221
|
-
## skip rounds w/ no games
|
222
|
-
|
223
|
-
## todo/check/fix: what's the best way for checking assoc w/ 0 recs?
|
224
|
-
next if games.size == 0
|
225
|
-
|
226
|
-
# note: make sure start_at/end_at is date only (e.g. use play_at.to_date)
|
227
|
-
# sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
|
228
|
-
|
229
|
-
round_attribs = {
|
230
|
-
start_at: games[0].play_at.to_date, # use games.first ?
|
231
|
-
end_at: games[-1].play_at.to_date # use games.last ? why? why not?
|
232
|
-
}
|
233
|
-
|
234
|
-
logger.debug round_attribs.to_json
|
235
|
-
r.update_attributes!( round_attribs )
|
236
|
-
end
|
237
|
-
end
|
238
|
-
end # method parse_fixtures
|
239
|
-
|
240
|
-
end # class CsvGameReader
|
241
|
-
end # module SportDb
|
data/lib/sportdb/finders/date.rb
DELETED
@@ -1,446 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#### fix: move to textutils for reuse !!!!!!!!!! - why?? why not ??
|
4
|
-
|
5
|
-
|
6
|
-
module SportDb
|
7
|
-
|
8
|
-
|
9
|
-
class DateFinderBase
|
10
|
-
|
11
|
-
MONTH_EN_TO_MM = {
|
12
|
-
'Jan' => '1', 'January' => '1',
|
13
|
-
'Feb' => '2', 'February' => '2',
|
14
|
-
'Mar' => '3', 'March' => '3',
|
15
|
-
'Apr' => '4', 'April' => '4',
|
16
|
-
'May' => '5',
|
17
|
-
'Jun' => '6', 'June' => '6',
|
18
|
-
'Jul' => '7', 'July' => '7',
|
19
|
-
'Aug' => '8', 'August' => '8',
|
20
|
-
'Sep' => '9', 'Sept' => '9', 'September' => '9',
|
21
|
-
'Oct' => '10', 'October' => '10',
|
22
|
-
'Nov' => '11', 'November' => '11',
|
23
|
-
'Dec' => '12', 'December' =>'12' }
|
24
|
-
|
25
|
-
MONTH_FR_TO_MM = {
|
26
|
-
'Janvier' => '1', 'Janv' => '1', 'Jan' => '1', ## check janv in use??
|
27
|
-
'Février' => '2', 'Févr' => '2', 'Fév' => '2', ## check fevr in use???
|
28
|
-
'Mars' => '3', 'Mar' => '3',
|
29
|
-
'Avril' => '4', 'Avri' => '4', 'Avr' => '4', ## check avri in use??? if not remove
|
30
|
-
'Mai' => '5',
|
31
|
-
'Juin' => '6',
|
32
|
-
'Juillet' => '7', 'Juil' => '7',
|
33
|
-
'Août' => '8',
|
34
|
-
'Septembre' => '9', 'Sept' => '9',
|
35
|
-
'Octobre' => '10', 'Octo' => '10', 'Oct' => '10', ### check octo in use??
|
36
|
-
'Novembre' => '11', 'Nove' => '11', 'Nov' => '11', ## check nove in use??
|
37
|
-
'Décembre' => '12', 'Déce' => '12', 'Déc' => '12' } ## check dece in use??
|
38
|
-
|
39
|
-
MONTH_ES_TO_MM = {
|
40
|
-
'Ene' => '1', 'Enero' => '1',
|
41
|
-
'Feb' => '2',
|
42
|
-
'Mar' => '3', 'Marzo' => '3',
|
43
|
-
'Abr' => '4', 'Abril' => '4',
|
44
|
-
'May' => '5', 'Mayo' => '5',
|
45
|
-
'Jun' => '6', 'Junio' => '6',
|
46
|
-
'Jul' => '7', 'Julio' => '7',
|
47
|
-
'Ago' => '8', 'Agosto' => '8',
|
48
|
-
'Sep' => '9', 'Set' => '9', 'Sept' => '9',
|
49
|
-
'Oct' => '10',
|
50
|
-
'Nov' => '11',
|
51
|
-
'Dic' => '12' }
|
52
|
-
|
53
|
-
private
|
54
|
-
def calc_year( month, day, opts )
|
55
|
-
start_at = opts[:start_at]
|
56
|
-
|
57
|
-
logger.debug " [calc_year] ????-#{month}-#{day} -- start_at: #{start_at}"
|
58
|
-
|
59
|
-
if month >= start_at.month
|
60
|
-
# assume same year as start_at event (e.g. 2013 for 2013/14 season)
|
61
|
-
start_at.year
|
62
|
-
else
|
63
|
-
# assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
|
64
|
-
start_at.year+1
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
def parse_date_time( match_data, opts={} )
|
69
|
-
|
70
|
-
# convert regex match_data captures to hash
|
71
|
-
# - note: cannont use match_data like a hash (e.g. raises exception if key/name not present/found)
|
72
|
-
h = {}
|
73
|
-
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
74
|
-
match_data.names.each { |name| h[name.to_sym] = match_data[name] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
75
|
-
|
76
|
-
## puts "[parse_date_time] match_data:"
|
77
|
-
## pp h
|
78
|
-
logger.debug " [parse_date_time] hash: >#{h.inspect}<"
|
79
|
-
|
80
|
-
h[ :month ] = MONTH_EN_TO_MM[ h[:month_en] ] if h[:month_en]
|
81
|
-
h[ :month ] = MONTH_ES_TO_MM[ h[:month_es] ] if h[:month_es]
|
82
|
-
h[ :month ] = MONTH_FR_TO_MM[ h[:month_fr] ] if h[:month_fr]
|
83
|
-
|
84
|
-
month = h[:month]
|
85
|
-
day = h[:day]
|
86
|
-
year = h[:year] || calc_year( month.to_i, day.to_i, opts ).to_s
|
87
|
-
|
88
|
-
hours = h[:hours] || '12' # default to 12:00 for HH:MM (hours:minutes)
|
89
|
-
minutes = h[:minutes] || '00'
|
90
|
-
|
91
|
-
value = '%d-%02d-%02d %02d:%02d' % [year.to_i, month.to_i, day.to_i, hours.to_i, minutes.to_i]
|
92
|
-
logger.debug " date: >#{value}<"
|
93
|
-
|
94
|
-
DateTime.strptime( value, '%Y-%m-%d %H:%M' )
|
95
|
-
end
|
96
|
-
|
97
|
-
end # class DateFinderBase
|
98
|
-
|
99
|
-
|
100
|
-
class DateFinder < DateFinderBase
|
101
|
-
|
102
|
-
include LogUtils::Logging
|
103
|
-
|
104
|
-
# todo: make more generic for reuse
|
105
|
-
### fix:
|
106
|
-
### move to textutils
|
107
|
-
## date/fr.yml en.yml etc. ???
|
108
|
-
## why? why not?
|
109
|
-
|
110
|
-
MONTH_FR = 'Janvier|Janv|Jan|' +
|
111
|
-
'Février|Févr|Fév|' +
|
112
|
-
'Mars|Mar|' +
|
113
|
-
'Avril|Avri|Avr|' +
|
114
|
-
'Mai|' +
|
115
|
-
'Juin|' +
|
116
|
-
'Juillet|Juil|' +
|
117
|
-
'Août|' +
|
118
|
-
'Septembre|Sept|' +
|
119
|
-
'Octobre|Octo|Oct|' +
|
120
|
-
'Novembre|Nove|Nov|' +
|
121
|
-
'Décembre|Déce|Déc'
|
122
|
-
|
123
|
-
WEEKDAY_FR = 'Lundi|Lun|L|' +
|
124
|
-
'Mardi|Mar|Ma|' +
|
125
|
-
'Mercredi|Mer|Me|' +
|
126
|
-
'Jeudi|Jeu|J|' +
|
127
|
-
'Vendredi|Ven|V|' +
|
128
|
-
'Samedi|Sam|S|' +
|
129
|
-
'Dimanche|Dim|D|'
|
130
|
-
|
131
|
-
|
132
|
-
MONTH_EN = 'January|Jan|'+
|
133
|
-
'February|Feb|'+
|
134
|
-
'March|Mar|'+
|
135
|
-
'April|Apr|'+
|
136
|
-
'May|'+
|
137
|
-
'June|Jun|'+
|
138
|
-
'July|Jul|'+
|
139
|
-
'August|Aug|'+
|
140
|
-
'September|Sept|Sep|'+
|
141
|
-
'October|Oct|'+
|
142
|
-
'November|Nov|'+
|
143
|
-
'December|Dec'
|
144
|
-
|
145
|
-
###
|
146
|
-
## todo: add days
|
147
|
-
## 1. Sunday - Sun. 2. Monday - Mon.
|
148
|
-
## 3. Tuesday - Tu., Tue., or Tues. 4. Wednesday - Wed.
|
149
|
-
## 5. Thursday - Th., Thu., Thur., or Thurs. 6. Friday - Fri.
|
150
|
-
## 7. Saturday - Sat.
|
151
|
-
|
152
|
-
|
153
|
-
MONTH_ES = 'Enero|Ene|'+
|
154
|
-
'Feb|'+
|
155
|
-
'Marzo|Mar|'+
|
156
|
-
'Abril|Abr|'+
|
157
|
-
'Mayo|May|'+
|
158
|
-
'Junio|Jun|'+
|
159
|
-
'Julio|Jul|'+
|
160
|
-
'Agosto|Ago|'+
|
161
|
-
'Sept|Set|Sep|'+
|
162
|
-
'Oct|'+
|
163
|
-
'Nov|'+
|
164
|
-
'Dic'
|
165
|
-
|
166
|
-
# todo/fix - add de and es too!!
|
167
|
-
# note: in Austria - Jänner - in Deutschland Januar allow both ??
|
168
|
-
# MONTH_DE = 'J[aä]n|Feb|Mär|Apr|Mai|Jun|Jul|Aug|Sep|Okt|Nov|Dez'
|
169
|
-
|
170
|
-
|
171
|
-
# e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM
|
172
|
-
# nb: allow 2012-9-3 7:30 e.g. no leading zero required
|
173
|
-
# regex_db
|
174
|
-
DB__DATE_TIME_REGEX = /\b
|
175
|
-
(?<year>\d{4})
|
176
|
-
-
|
177
|
-
(?<month>\d{1,2})
|
178
|
-
-
|
179
|
-
(?<day>\d{1,2})
|
180
|
-
\s+
|
181
|
-
(?<hours>\d{1,2})
|
182
|
-
:
|
183
|
-
(?<minutes>\d{2})
|
184
|
-
\b/x
|
185
|
-
|
186
|
-
# e.g. 2012-09-14 w/ implied hours (set to 12:00)
|
187
|
-
# nb: allow 2012-9-3 e.g. no leading zero required
|
188
|
-
# regex_db2
|
189
|
-
DB__DATE_REGEX = /\b
|
190
|
-
(?<year>\d{4})
|
191
|
-
-
|
192
|
-
(?<month>\d{1,2})
|
193
|
-
-
|
194
|
-
(?<day>\d{1,2})
|
195
|
-
\b/x
|
196
|
-
|
197
|
-
# e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM
|
198
|
-
# nb: allow 2.3.2012 e.g. no leading zero required
|
199
|
-
# nb: allow hour as 20.30
|
200
|
-
# regex_de
|
201
|
-
DD_MM_YYYY__DATE_TIME_REGEX = /\b
|
202
|
-
(?<day>\d{1,2})
|
203
|
-
\.
|
204
|
-
(?<month>\d{1,2})
|
205
|
-
\.
|
206
|
-
(?<year>\d{4})
|
207
|
-
\s+
|
208
|
-
(?<hours>\d{1,2})
|
209
|
-
[:.]
|
210
|
-
(?<minutes>\d{2})
|
211
|
-
\b/x
|
212
|
-
|
213
|
-
# e.g. 14.09. 20:30 => DD.MM. HH:MM
|
214
|
-
# nb: allow 2.3.2012 e.g. no leading zero required
|
215
|
-
# nb: allow hour as 20.30 or 3.30 instead of 03.30
|
216
|
-
# regex_de2
|
217
|
-
DD_MM__DATE_TIME_REGEX = /\b
|
218
|
-
(?<day>\d{1,2})
|
219
|
-
\.
|
220
|
-
(?<month>\d{1,2})
|
221
|
-
\.
|
222
|
-
\s+
|
223
|
-
(?<hours>\d{1,2})
|
224
|
-
[:.]
|
225
|
-
(?<minutes>\d{2})
|
226
|
-
\b/x
|
227
|
-
|
228
|
-
# e.g. 14.09.2012 => DD.MM.YYYY w/ implied hours (set to 12:00)
|
229
|
-
# regex_de3
|
230
|
-
DD_MM_YYYY__DATE_REGEX = /\b
|
231
|
-
(?<day>\d{1,2})
|
232
|
-
\.
|
233
|
-
(?<month>\d{1,2})
|
234
|
-
\.
|
235
|
-
(?<year>\d{4})
|
236
|
-
\b/x
|
237
|
-
|
238
|
-
# e.g. 14.09. => DD.MM. w/ implied year and implied hours (set to 12:00)
|
239
|
-
# note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
|
240
|
-
# note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
|
241
|
-
# regex_de4 (use lookahead assert)
|
242
|
-
DD_MM__DATE_REGEX = /\b
|
243
|
-
(?<day>\d{1,2})
|
244
|
-
\.
|
245
|
-
(?<month>\d{1,2})
|
246
|
-
\.
|
247
|
-
(?=\s+|$|[\]])/x ## note: allow end-of-string/line too
|
248
|
-
|
249
|
-
|
250
|
-
# e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM
|
251
|
-
EN__DD_MONTH_YYYY__DATE_TIME_REGEX = /\b
|
252
|
-
(?<day>\d{1,2})
|
253
|
-
\s
|
254
|
-
(?<month_en>#{MONTH_EN})
|
255
|
-
\s
|
256
|
-
(?<year>\d{4})
|
257
|
-
\s+
|
258
|
-
(?<hours>\d{1,2})
|
259
|
-
:
|
260
|
-
(?<minutes>\d{2})
|
261
|
-
\b/x
|
262
|
-
|
263
|
-
###
|
264
|
-
# fix: pass in lang (e.g. en or es)
|
265
|
-
# only process format for lang plus fallback to en?
|
266
|
-
# e.g. EN__DD_MONTH and ES__DD_MONTH depend on order for match (first listed will match)
|
267
|
-
|
268
|
-
# e.g. 12 May => D|DD.MMM w/ implied year and implied hours
|
269
|
-
EN__DD_MONTH__DATE_REGEX = /\b
|
270
|
-
(?<day>\d{1,2})
|
271
|
-
\s
|
272
|
-
(?<month_en>#{MONTH_EN})
|
273
|
-
\b/x
|
274
|
-
|
275
|
-
|
276
|
-
# e.g. Jun/12 2011 14:00
|
277
|
-
EN__MONTH_DD_YYYY__DATE_TIME_REGEX = /\b
|
278
|
-
(?<month_en>#{MONTH_EN})
|
279
|
-
\/
|
280
|
-
(?<day>\d{1,2})
|
281
|
-
\s
|
282
|
-
(?<year>\d{4})
|
283
|
-
\s+
|
284
|
-
(?<hours>\d{1,2})
|
285
|
-
:
|
286
|
-
(?<minutes>\d{2})
|
287
|
-
\b/x
|
288
|
-
|
289
|
-
# e.g. Jun/12 14:00 w/ implied year H|HH:MM
|
290
|
-
EN__MONTH_DD__DATE_TIME_REGEX = /\b
|
291
|
-
(?<month_en>#{MONTH_EN})
|
292
|
-
\/
|
293
|
-
(?<day>\d{1,2})
|
294
|
-
\s+
|
295
|
-
(?<hours>\d{1,2})
|
296
|
-
:
|
297
|
-
(?<minutes>\d{2})
|
298
|
-
\b/x
|
299
|
-
|
300
|
-
# e.g. Jun/12 2013 w/ implied hours (set to 12:00)
|
301
|
-
EN__MONTH_DD_YYYY__DATE_REGEX = /\b
|
302
|
-
(?<month_en>#{MONTH_EN})
|
303
|
-
\/
|
304
|
-
(?<day>\d{1,2})
|
305
|
-
\s
|
306
|
-
(?<year>\d{4})
|
307
|
-
\b/x
|
308
|
-
|
309
|
-
# e.g. Jun/12 w/ implied year and implied hours (set to 12:00)
|
310
|
-
# note: allow space too e.g Jun 12 -- check if conflicts w/ other formats??? (added for rsssf reader)
|
311
|
-
# -- fix: might eat french weekday mar 12 is mardi (mar) !!! see FR__ pattern
|
312
|
-
# fix: remove space again for now - and use simple en date reader or something!!!
|
313
|
-
## was [\/ ] changed back to \/
|
314
|
-
EN__MONTH_DD__DATE_REGEX = /\b
|
315
|
-
(?<month_en>#{MONTH_EN})
|
316
|
-
\/
|
317
|
-
(?<day>\d{1,2})
|
318
|
-
\b/x
|
319
|
-
|
320
|
-
|
321
|
-
# e.g. 12 Ene w/ implied year and implied hours (set to 12:00)
|
322
|
-
ES__DD_MONTH__DATE_REGEX = /\b
|
323
|
-
(?<day>\d{1,2})
|
324
|
-
\s
|
325
|
-
(?<month_es>#{MONTH_ES})
|
326
|
-
\b/x
|
327
|
-
|
328
|
-
# e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août]
|
329
|
-
### note: do NOT consume [] in regex (use lookahead assert)
|
330
|
-
FR__WEEKDAY_DD_MONTH__DATE_REGEX = /\b
|
331
|
-
(?:#{WEEKDAY_FR}) # note: skip weekday for now; do NOT capture
|
332
|
-
\s+
|
333
|
-
(?<day>\d{1,2})
|
334
|
-
\.? # note: make dot optional
|
335
|
-
\s+
|
336
|
-
(?<month_fr>#{MONTH_FR})
|
337
|
-
(?=\s+|$|[\]])/x ## note: allow end-of-string/line too
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
# map table - 1) tag, 2) regex - note: order matters; first come-first matched/served
|
343
|
-
FORMATS = [
|
344
|
-
[ '[YYYY_MM_DD_hh_mm]', DB__DATE_TIME_REGEX ],
|
345
|
-
[ '[YYYY_MM_DD]', DB__DATE_REGEX ],
|
346
|
-
[ '[DD_MM_YYYY_hh_mm]', DD_MM_YYYY__DATE_TIME_REGEX ],
|
347
|
-
[ '[DD_MM_hh_mm]', DD_MM__DATE_TIME_REGEX ],
|
348
|
-
[ '[DD_MM_YYYY]', DD_MM_YYYY__DATE_REGEX ],
|
349
|
-
[ '[DD_MM]', DD_MM__DATE_REGEX ],
|
350
|
-
[ '[FR_WEEKDAY_DD_MONTH]', FR__WEEKDAY_DD_MONTH__DATE_REGEX ],
|
351
|
-
[ '[EN_DD_MONTH_YYYY_hh_mm]', EN__DD_MONTH_YYYY__DATE_TIME_REGEX ],
|
352
|
-
[ '[EN_MONTH_DD_YYYY_hh_mm]', EN__MONTH_DD_YYYY__DATE_TIME_REGEX ],
|
353
|
-
[ '[EN_MONTH_DD_hh_mm]', EN__MONTH_DD__DATE_TIME_REGEX ],
|
354
|
-
[ '[EN_MONTH_DD_YYYY]', EN__MONTH_DD_YYYY__DATE_REGEX ],
|
355
|
-
[ '[EN_MONTH_DD]', EN__MONTH_DD__DATE_REGEX ],
|
356
|
-
[ '[EN_DD_MONTH]', EN__DD_MONTH__DATE_REGEX ],
|
357
|
-
[ '[ES_DD_MONTH]', ES__DD_MONTH__DATE_REGEX ]
|
358
|
-
]
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
def initialize
|
363
|
-
# nothing here for now
|
364
|
-
end
|
365
|
-
|
366
|
-
def find!( line, opts={} )
|
367
|
-
# fix: use more lookahead for all required trailing spaces!!!!!
|
368
|
-
# fix: use <name capturing group> for month,day,year etc.!!!
|
369
|
-
|
370
|
-
#
|
371
|
-
# fix: !!!!
|
372
|
-
# date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
|
373
|
-
# fix: change regex to \[[A-Z0-9.]\] !!!!!! plus add unit test too!!!
|
374
|
-
#
|
375
|
-
|
376
|
-
md = nil
|
377
|
-
FORMATS.each do |format|
|
378
|
-
tag = format[0]
|
379
|
-
pattern = format[1]
|
380
|
-
md=pattern.match( line )
|
381
|
-
if md
|
382
|
-
date = parse_date_time( md, opts )
|
383
|
-
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
384
|
-
## fix: use md.begin(0), md.end(0)
|
385
|
-
line.sub!( md[0], tag )
|
386
|
-
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
387
|
-
return date
|
388
|
-
end
|
389
|
-
# no match; continue; try next pattern
|
390
|
-
end
|
391
|
-
|
392
|
-
return nil # no match found
|
393
|
-
end
|
394
|
-
|
395
|
-
end # class DateFinder
|
396
|
-
|
397
|
-
|
398
|
-
class RsssfDateFinder < DateFinderBase
|
399
|
-
|
400
|
-
include LogUtils::Logging
|
401
|
-
|
402
|
-
MONTH_EN = 'Jan|'+
|
403
|
-
'Feb|'+
|
404
|
-
'March|Mar|'+
|
405
|
-
'April|Apr|'+
|
406
|
-
'May|'+
|
407
|
-
'June|Jun|'+
|
408
|
-
'July|Jul|'+
|
409
|
-
'Aug|'+
|
410
|
-
'Sept|Sep|'+
|
411
|
-
'Oct|'+
|
412
|
-
'Nov|'+
|
413
|
-
'Dec'
|
414
|
-
|
415
|
-
## e.g.
|
416
|
-
## [Jun 7] or [Aug 12] etc. - not MUST include brackets e.g. []
|
417
|
-
##
|
418
|
-
## check add \b at the beginning and end - why?? why not?? working??
|
419
|
-
EN__MONTH_DD__DATE_REGEX = /\[
|
420
|
-
(?<month_en>#{MONTH_EN})
|
421
|
-
\s
|
422
|
-
(?<day>\d{1,2})
|
423
|
-
\]/x
|
424
|
-
|
425
|
-
def find!( line, opts={} )
|
426
|
-
# fix: use more lookahead for all required trailing spaces!!!!!
|
427
|
-
# fix: use <name capturing group> for month,day,year etc.!!!
|
428
|
-
|
429
|
-
tag = '[EN_MONTH_DD]'
|
430
|
-
pattern = EN__MONTH_DD__DATE_REGEX
|
431
|
-
md = pattern.match( line )
|
432
|
-
if md
|
433
|
-
date = parse_date_time( md, opts )
|
434
|
-
## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
|
435
|
-
## fix: use md.begin(0), md.end(0)
|
436
|
-
line.sub!( md[0], tag )
|
437
|
-
## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
|
438
|
-
return date
|
439
|
-
end
|
440
|
-
return nil # no match found
|
441
|
-
end
|
442
|
-
|
443
|
-
|
444
|
-
end ## class RsssfDateFinder
|
445
|
-
|
446
|
-
end # module SportDb
|