sportdb-formats 1.2.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,38 +1,37 @@
1
1
 
2
2
  module SportDb
3
3
 
4
- class MatchParser ## simple match parser for team match schedules
4
+ class MatchParser ## simple match parser for team match schedules
5
5
 
6
- def self.parse( lines, teams, start: )
6
+
7
+ def self.parse( lines, start: )
7
8
  ## todo/fix: add support for txt and lines
8
9
  ## check if lines_or_txt is an array or just a string
9
10
  ## use teams: like start: why? why not?
10
- parser = new( lines, teams, start )
11
+ parser = new( lines, start )
11
12
  parser.parse
12
13
  end
13
14
 
14
15
 
15
16
  include Logging ## e.g. logger#debug, logger#info, etc.
16
- include ParserHelper ## e.g. read_lines, etc.
17
17
 
18
+ def self.debug=(value) @@debug = value; end
19
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
20
+ def debug?() self.class.debug?; end
18
21
 
19
- def initialize( lines, teams, start )
20
- # for convenience split string into lines
21
- ## note: removes/strips empty lines
22
- ## todo/check: change to text instead of array of lines - why? why not?
23
-
24
- ## note - wrap in enumerator/iterator a.k.a lines reader
25
- @lines = LinesReader.new( lines.is_a?( String ) ?
26
- read_lines( lines ) :
27
- lines
28
- )
29
-
30
- @mapper_teams = TeamMapper.new( teams )
31
- @start = start
32
- end
22
+ def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
23
+ ## returns an array of lines with comments and empty lines striped / removed
24
+ lines = []
25
+ txt.each_line do |line| ## preprocess
26
+ line = line.strip
33
27
 
28
+ next if line.empty? || line.start_with?('#') ### skip empty lines and comments
29
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
30
+ lines << line
31
+ end
32
+ lines
33
+ end
34
34
 
35
-
36
35
 
37
36
  ## note: colon (:) MUST be followed by one (or more) spaces
38
37
  ## make sure mon feb 12 18:10 will not match
@@ -45,7 +44,7 @@ class MatchParser ## simple match parser for team match schedules
45
44
  ## Group B: - remove colon
46
45
  ## or lookup first
47
46
 
48
- ATTRIB_REGEX = /^
47
+ ATTRIB_RE = %r{^
49
48
  [ ]*? # slurp leading spaces
50
49
  (?<key>[^:|\]\[()\/; -]
51
50
  [^:|\]\[()\/;]{0,30}
@@ -55,96 +54,185 @@ class MatchParser ## simple match parser for team match schedules
55
54
  (?<value>.+)
56
55
  [ ]*? # slurp trailing spaces
57
56
  $
58
- /ix
57
+ }ix
58
+
59
+ #
60
+ # todo/fix: change start to start: too!!!
61
+ # might be optional in the future!! - why? why not?
62
+
63
+ def initialize( lines, start )
64
+ # for convenience split string into lines
65
+ ## note: removes/strips empty lines
66
+ ## todo/check: change to text instead of array of lines - why? why not?
67
+
68
+ ## note - wrap in enumerator/iterator a.k.a lines reader
69
+ @lines = lines.is_a?( String ) ?
70
+ _read_lines( lines ) : lines
71
+
72
+ @start = start
73
+ end
59
74
 
60
75
 
61
76
  def parse
62
77
  @last_date = nil
78
+ @last_time = nil
63
79
  @last_round = nil
64
80
  @last_group = nil
65
81
 
82
+ ## last_goals - rename to (longer) @last_team_goals or such - why? why not?
83
+ @last_goals = 1 ## toggle between 1|2 - hacky (quick & dirty) support for multi-line goals, fix soon!
84
+
85
+ @teams = Hash.new(0) ## track counts (only) for now for (interal) team stats - why? why not?
66
86
  @rounds = {}
67
87
  @groups = {}
68
88
  @matches = []
69
89
 
70
90
  @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
71
91
 
72
- ## todo/fix - use @lines.rewind first here - why? why not?
73
- @lines.each do |line|
74
-
75
- if is_round_def?( line )
92
+
93
+
94
+ @parser = Parser.new
95
+
96
+ @errors = []
97
+ @tree = []
98
+
99
+ attrib_found = false
100
+
101
+ @lines.each_with_index do |line,i|
102
+
103
+ if debug?
104
+ puts
105
+ puts "line >#{line}<"
106
+ end
107
+
108
+ ## skip new (experimental attrib syntax)
109
+ if attrib_found == false &&
110
+ ATTRIB_RE.match?( line )
111
+ ## note: check attrib regex AFTER group def e.g.:
112
+ ## Group A:
113
+ ## Group B: etc.
114
+ ## todo/fix - change Group A: to Group A etc.
115
+ ## Group B: to Group B
116
+ attrib_found = true
117
+ ## logger.debug "skipping key/value line - >#{line}<"
118
+ next
119
+ end
120
+
121
+ if attrib_found
122
+ ## check if line ends with dot
123
+ ## if not slurp up lines to the next do!!!
124
+ ## logger.debug "skipping key/value line - >#{line}<"
125
+ attrib_found = false if line.end_with?( '.' )
126
+ # logger.debug "skipping key/value line (cont.) - >#{line}<"
127
+ next
128
+ end
129
+
130
+ t, error_messages = @parser.parse_with_errors( line )
131
+
132
+
133
+ if error_messages.size > 0
134
+ ## add to "global" error list
135
+ ## make a triplet tuple (file / msg / line text)
136
+ error_messages.each do |msg|
137
+ @errors << [ '<file>', ## add filename here
138
+ msg,
139
+ line
140
+ ]
141
+ end
142
+ end
143
+
144
+ pp t if debug?
145
+
146
+ @tree << t
147
+ end # each lines
148
+
149
+ ## pp @tree
150
+
151
+ ## report parse errors here - why? why not?
152
+
153
+
154
+
155
+ @tree.each do |nodes|
156
+
157
+ node_type = nodes[0][0] ## get node type of first/head node
158
+
159
+ if node_type == :round_def
76
160
  ## todo/fix: add round definition (w begin n end date)
77
161
  ## todo: do not patch rounds with definition (already assume begin/end date is good)
78
162
  ## -- how to deal with matches that get rescheduled/postponed?
79
- parse_round_def( line )
80
- elsif is_round?( line )
81
- parse_round_header( line )
82
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
163
+ parse_round_def( nodes )
164
+ elsif node_type == :group_def ## NB: group goes after round (round may contain group marker too)
83
165
  ### todo: add pipe (|) marker (required)
84
- parse_group_def( line )
85
- elsif is_group?( line )
86
- ## -- lets you set group e.g. Group A etc.
87
- parse_group_header( line )
88
-
89
- elsif m=ATTRIB_REGEX.match( line )
90
- ## note: check attrib regex AFTER group def e.g.:
91
- ## Group A:
92
- ## Group B: etc.
93
- ## todo/fix - change Group A: to Group A etc.
94
- ## Group B: to Group B
95
-
96
- ## check if line ends with dot
97
- ## if not slurp up lines to the next do!!!
98
- logger.debug "skipping key/value line - >#{line}<"
99
- while !line.end_with?( '.' ) || line.nil? do
100
- line = @lines.next
101
- logger.debug "skipping key/value line (cont.) - >#{line}<"
102
- end
103
- elsif is_goals?( line )
104
- ## note - goals must be AFTER attributes!!!
105
- logger.debug "matched goals line: >#{line}<"
106
- logger.debug " try parse:"
107
-
108
- goals = GoalsFinder.new.find!( line )
109
- pp goals
110
- ## quick & dirty - auto add goals to last match
111
- match = @matches[-1]
112
- match.goals = goals
113
-
114
- elsif try_parse_game( line )
115
- # do nothing here
116
- elsif try_parse_date_header( line )
117
- # do nothing here
118
- else
119
- logger.warn "skipping line (no match found): >#{line}<"
120
- @warns << line
166
+ parse_group_def( nodes )
167
+
168
+ elsif node_type == :player ||
169
+ node_type == :none # e.g [[:none], [:";"], [:player, "Xhaka"],...]
170
+ ## note - for now goals line MUST start with player!!
171
+ parse_goals( nodes )
172
+ else
173
+ ## try to be liberal/flexible
174
+ ## eat-up nodes as we go
175
+ ## assume match with group / round header
176
+ ## etc. on its own line or not
177
+
178
+ ## preprocess possible before match nodes
179
+
180
+ while !nodes.empty? do
181
+ node_type = nodes[0][0] ## get node type of first/head node
182
+ if node_type == :round
183
+ node = nodes.shift ## eat-up
184
+ parse_round_header( node )
185
+ elsif node_type == :leg
186
+ node = nodes.shift ## eat-up
187
+ ## ignore (round) leg for now - add later leg - 1|2|3 etc!!!
188
+ ## needs to get added to db/schema too!!!!
189
+ ## add @last_leg = nil or 1|2|3 etc.
190
+ elsif node_type == :group
191
+ ## -- lets you set group e.g. Group A etc.
192
+ node = nodes.shift ## eat-up
193
+ parse_group_header( node )
194
+ elsif node_type == :date
195
+ node = nodes.shift ## eat-up
196
+ parse_date_header( node )
197
+ ## add time here too - why? why not?
198
+ ## add skip comma separator here too - why? why not?
199
+ ## "slurp-up" in upstream parser?
200
+ ## e.g. round, group or group, round ?
201
+ else
202
+ break
203
+ end
204
+ end
205
+ next if nodes.empty?
206
+
207
+ ## rename to try_parse_match - why? why not?
208
+ parse_match( nodes )
121
209
  end
122
- end # lines.each
123
210
 
124
- [@matches, @rounds.values, @groups.values]
211
+ end # tree.each
212
+
213
+ ## note - team keys are names and values are "internal" stats!!
214
+ ## and NOT team/club/nat_team structs!!
215
+ [@teams.keys, @matches, @rounds.values, @groups.values]
125
216
  end # method parse
126
217
 
127
218
 
128
219
 
129
- def parse_group_header( line )
130
- logger.debug "parsing group header line: >#{line}<"
220
+ def parse_group_header( node )
221
+ logger.debug "parsing group header: >#{node}<"
131
222
 
132
223
  # note: group header resets (last) round (allows, for example):
133
224
  # e.g.
134
225
  # Group Playoffs/Replays -- round header
135
226
  # team1 team2 -- match
136
- # Group B: -- group header
227
+ # Group B -- group header
137
228
  # team1 team2 - match (will get new auto-matchday! not last round)
138
229
  @last_round = nil
139
230
 
140
- name = find_group_name!( line )
141
-
142
- logger.debug " name: >#{name}<"
143
- logger.debug " line: >#{line}<"
231
+ name = node[1]
144
232
 
145
233
  group = @groups[ name ]
146
234
  if group.nil?
147
- puts "!! ERROR - no group def found for >#{name}<"
235
+ puts "!! PARSE ERROR - no group def found for >#{name}<"
148
236
  exit 1
149
237
  end
150
238
 
@@ -152,63 +240,99 @@ class MatchParser ## simple match parser for team match schedules
152
240
  @last_group = group
153
241
  end
154
242
 
155
- def parse_group_def( line )
156
- logger.debug "parsing group def line: >#{line}<"
157
243
 
158
- @mapper_teams.map_teams!( line )
159
- teams = @mapper_teams.find_teams!( line )
244
+ def parse_group_def( nodes )
245
+ logger.debug "parsing group def: >#{nodes}<"
246
+
247
+ ## e.g
248
+ ## [:group_def, "Group A"],
249
+ ## [:team, "Germany"],
250
+ ## [:team, "Scotland"],
251
+ ## [:team, "Hungary"],
252
+ ## [:team, "Switzerland"]
160
253
 
161
- name = find_group_name!( line )
254
+ node = nodes[0]
255
+ name = node[1] ## group name
162
256
 
163
- logger.debug " line: >#{line}<"
257
+ teams = nodes[1..-1].map do |node|
258
+ if node[0] == :team
259
+ team = node[1]
260
+ @teams[ team ] += 1
261
+ team
262
+ else
263
+ puts "!! PARSE ERROR - only teams expected in group def; got:"
264
+ pp nodes
265
+ exit 1
266
+ end
267
+ end
164
268
 
165
269
  ## todo/check/fix: add back group key - why? why not?
166
270
  group = Import::Group.new( name: name,
167
- teams: teams.map {|team| team.name } )
271
+ teams: teams )
168
272
 
169
273
  @groups[ name ] = group
170
274
  end
171
275
 
172
276
 
173
- def find_group_name!( line )
174
- ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
175
- ## nb: (?:) = is for non-capturing group(ing)
176
-
177
- ## fix:
178
- ## get Group|Gruppe|Grupo from lang!!!! do NOT hardcode in place
179
-
180
- ## todo:
181
- ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
182
- regex = /\b
183
- (?:
184
- (Group | Gruppe | Grupo)
185
- [ ]+
186
- (\d+ | [A-Z]+)
187
- )
188
- \b/x
277
+ def _build_date( m:, d:, y:, start: )
189
278
 
190
- m = regex.match( line )
191
279
 
192
- return nil if m.nil?
280
+ ## quick debug hack
281
+ if m == 2 && d == 29
282
+ puts "quick check feb/29 dates"
283
+ pp [d,m,y]
284
+ pp start
285
+ end
193
286
 
194
- name = m[0]
287
+ if y.nil? ## try to calculate year
288
+ y = if m > start.month ||
289
+ (m == start.month && d >= start.day)
290
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
291
+ start.year
292
+ else
293
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
294
+ start.year+1
295
+ end
296
+ end
195
297
 
196
- logger.debug " name: >#{name}<"
197
298
 
198
- line.sub!( name, '[GROUP.NAME]' )
199
299
 
200
- name
300
+ Date.new( y,m,d ) ## y,m,d
201
301
  end
202
302
 
303
+ def parse_round_def( nodes )
304
+ logger.debug "parsing round def: >#{nodes}<"
203
305
 
204
- def parse_round_def( line )
205
- logger.debug "parsing round def line: >#{line}<"
206
-
207
- start_date = find_date!( line, start: @start )
208
- end_date = find_date!( line, start: @start )
306
+ ## e.g. [[:round_def, "Matchday 1"], [:duration, "Fri Jun/14 - Tue Jun/18"]]
307
+ ## [[:round_def, "Matchday 2"], [:duration, "Wed Jun/19 - Sat Jun/22"]]
308
+ ## [[:round_def, "Matchday 3"], [:duration, "Sun Jun/23 - Wed Jun/26"]]
209
309
 
210
- # note: if end_date missing -- assume start_date is (==) end_at
211
- end_date = start_date if end_date.nil?
310
+ node = nodes[0]
311
+ name = node[1]
312
+ # NB: use extracted round name for knockout check
313
+ # knockout_flag = is_knockout_round?( name )
314
+
315
+ node = nodes[1]
316
+ node_type = node[0]
317
+ if node_type == :date
318
+ start_date = end_date = _build_date( m: node[2][:m],
319
+ d: node[2][:d],
320
+ y: node[2][:y],
321
+ start: @start)
322
+ elsif node_type == :duration
323
+ start_date = _build_date( m: node[2][:start][:m],
324
+ d: node[2][:start][:d],
325
+ y: node[2][:start][:y],
326
+ start: @start)
327
+ end_date = _build_date( m: node[2][:end][:m],
328
+ d: node[2][:end][:d],
329
+ y: node[2][:end][:y],
330
+ start: @start)
331
+ else
332
+ puts "!! PARSE ERROR - expected date or duration for round def; got:"
333
+ pp nodes
334
+ exit 1
335
+ end
212
336
 
213
337
  # note: - NOT needed; start_at and end_at are saved as date only (NOT datetime)
214
338
  # set hours,minutes,secs to beginning and end of day (do NOT use default 12.00)
@@ -218,13 +342,15 @@ class MatchParser ## simple match parser for team match schedules
218
342
 
219
343
  # note: make sure start_at/end_at is date only (e.g. use start_at.to_date)
220
344
  # sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
221
- start_date = start_date.to_date
222
- end_date = end_date.to_date
223
345
 
346
+ # note - _build_date always returns Date for now - no longer needed!!
347
+ # start_date = start_date.to_date
348
+ # end_date = end_date.to_date
224
349
 
225
- name = find_round_def_name!( line )
226
- # NB: use extracted round name for knockout check
227
- knockout_flag = is_knockout_round?( name )
350
+
351
+ ## fix:
352
+ ## remove knockout_flag - why? why not?
353
+ knockout_flag = false
228
354
 
229
355
 
230
356
  logger.debug " start_date: #{start_date}"
@@ -232,8 +358,6 @@ class MatchParser ## simple match parser for team match schedules
232
358
  logger.debug " name: >#{name}<"
233
359
  logger.debug " knockout_flag: #{knockout_flag}"
234
360
 
235
- logger.debug " line: >#{line}<"
236
-
237
361
  round = Import::Round.new( name: name,
238
362
  start_date: start_date,
239
363
  end_date: end_date,
@@ -244,178 +368,278 @@ class MatchParser ## simple match parser for team match schedules
244
368
  end
245
369
 
246
370
 
371
+ def parse_round_header( node )
372
+ logger.debug "parsing round header: >#{node}<"
247
373
 
248
- def find_round_def_name!( line )
249
- # assume everything before pipe (\) is the round name
250
- # strip [ROUND.POS], todo:?? [ROUND.NAME2]
251
-
252
- # todo/fix: add name2 w/ // or / why? why not?
253
- # -- strip / or / chars
374
+ name = node[1]
254
375
 
255
- buf = line.dup
256
- logger.debug " find_round_def_name! line-before: >>#{buf}<<"
376
+ # name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
377
+ # name = name.strip
257
378
 
258
- ## cut-off everything after (including) pipe (|)
259
- buf = buf[ 0...buf.index('|') ]
260
- buf.strip!
261
-
262
- logger.debug " find_round_def_name! line-after: >>#{buf}<<"
379
+ round = @rounds[ name ]
380
+ if round.nil? ## auto-add / create if missing
381
+ ## todo/check: add num (was pos) if present - why? why not?
382
+ round = Import::Round.new( name: name )
383
+ @rounds[ name ] = round
384
+ end
263
385
 
264
- logger.debug " name: >>#{buf}<<"
265
- line.sub!( buf, '[ROUND.NAME]' )
386
+ ## todo/check: if pos match (MUST always match for now)
387
+ @last_round = round
388
+ @last_group = nil # note: reset group to no group - why? why not?
266
389
 
267
- buf
390
+ ## todo/fix/check
391
+ ## make round a scope for date(time) - why? why not?
392
+ ## reset date/time e.g. @last_date = nil !!!!
268
393
  end
269
394
 
395
+ def parse_date_header( node )
396
+ logger.debug( "date header: >#{node}<")
270
397
 
271
- ## split by or || or |||
272
- ## or ++ or +++
273
- ## or -- or ---
274
- ## or // or ///
275
- ## note: allow Final | First Leg as ONE name same as
276
- ## Final - First Leg or
277
- ## Final, First Leg
278
- ## for cut-off always MUST be more than two chars
279
- ##
280
- ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
281
- ## todo/fix: move to parser utils and add a method split_name or such?
282
- HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
283
- (?:\|{2,} |
284
- \+{2,} |
285
- -{2,} |
286
- \/{2,}
287
- )
288
- [ ]* ## allow (strip) trailing spaces
289
- /x
398
+ date = _build_date( m: node[2][:m],
399
+ d: node[2][:d],
400
+ y: node[2][:y],
401
+ start: @start )
290
402
 
291
- def find_round_header_name!( line )
292
- # assume everything left is the round name
293
- # extract all other items first (round name2, round pos, group name n pos, etc.)
294
-
295
- buf = line.dup
296
- logger.debug " find_round_header_name! line-before: >>#{buf}<<"
403
+ logger.debug( " date: #{date} with start: #{@start}")
297
404
 
405
+ @last_date = date # keep a reference for later use
406
+ @last_time = nil
298
407
 
299
- parts = buf.split( HEADER_SEP_RE )
300
- buf = parts[0]
408
+ ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
409
+ ## find a better way??
410
+ ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
411
+ ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
412
+ ### todo/check: just turn on for 2019/20 season or always? why? why not?
301
413
 
302
- buf.strip! # remove leading and trailing whitespace
414
+ ## todo/fix: add switch back to old @start_org
415
+ ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
416
+ =begin
417
+ if @start.month != 1
418
+ if date.year == @start.year+1
419
+ logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
420
+ @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
421
+ @start = Date.new( @start.year+1, 1, 1 )
422
+ end
423
+ end
424
+ =end
425
+ end
303
426
 
304
- logger.debug " find_round_name! line-after: >>#{buf}<<"
427
+ def parse_minutes( nodes )
428
+ ## parse goals by player
429
+ ## may have multiple minutes!!
430
+ goals = []
305
431
 
306
- ### bingo - assume what's left is the round name
432
+ node = nodes.shift ## get player
433
+ name = node[1]
307
434
 
308
- logger.debug " name: >>#{buf}<<"
309
- line.sub!( buf, '[ROUND.NAME]' )
435
+ loop do
436
+ goal = {}
437
+ goal[:name] = name
310
438
 
311
- buf
312
- end
439
+ node_type = nodes[0][0]
440
+ if node_type != :minute
441
+ puts "!! PARSE ERROR - minute expected to follow player (in goal); got #{node_type}:"
442
+ pp nodes
443
+ exit 1
444
+ end
313
445
 
314
- ## quick hack- collect all "fillwords" by language!!!!
315
- ## change later and add to sportdb-langs!!!!
316
- ##
317
- ## strip all "fillwords" e.g.:
318
- ## Nachtrag/Postponed/Addition/Supplemento names
319
- ##
320
- ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
321
- ROUND_EXTRA_WORDS_RE = /\b(?:
322
- Nachtrag | ## de
323
- Postponed | ## en
324
- Addition | ## en
325
- Supplemento ## es
326
- )
327
- \b/ix
446
+ node = nodes.shift
447
+ goal[:minute] = node[2][:m]
448
+ goal[:offset] = node[2][:offset] if node[2][:offset]
449
+
450
+ ## check for own goal or penalty or such
451
+ if !nodes.empty?
452
+ node_type = nodes[0][0]
453
+ if node_type == :og
454
+ nodes.shift
455
+ goal[:og] = true
456
+ elsif node_type == :pen
457
+ nodes.shift
458
+ goal[:pen] = true
459
+ else
460
+ # do nothing
461
+ end
462
+ end
328
463
 
329
- def parse_round_header( line )
330
- logger.debug "parsing round header line: >#{line}<"
464
+ goals << goal
331
465
 
332
- name = find_round_header_name!( line )
466
+ ## check if another minute ahead; otherwise break
467
+ break if nodes.empty?
333
468
 
334
- logger.debug " line: >#{line}<"
469
+ node_type = nodes[0][0]
335
470
 
336
- name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
337
- name = name.strip
471
+ ## Kane 39', 62', 67'
472
+ ## consume/eat-up (optional?) commas
473
+ if node_type == :','
474
+ nodes.shift
475
+ node_type = nodes[0][0]
476
+ end
338
477
 
339
- round = @rounds[ name ]
340
- if round.nil? ## auto-add / create if missing
341
- ## todo/check: add num (was pos) if present - why? why not?
342
- round = Import::Round.new( name: name )
343
- @rounds[ name ] = round
478
+ break if node_type != :minute
344
479
  end
345
480
 
346
- ## todo/check: if pos match (MUST always match for now)
347
- @last_round = round
348
- @last_group = nil # note: reset group to no group - why? why not?
349
- end
350
-
351
-
352
- def find_score!( line )
353
- # note: always call after find_dates !!!
354
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
355
- # -- note: score might have two digits too
356
481
 
357
- ScoreFormats.find!( line )
482
+ goals
358
483
  end
359
484
 
360
- def find_status!( line )
361
- StatusParser.find!( line )
362
- end
363
485
 
486
+ def parse_goals( nodes )
487
+ logger.debug "parse goals: >#{nodes}<"
488
+
489
+ goals1 = []
490
+ goals2 = []
491
+
492
+ while !nodes.empty?
493
+ node_type = nodes[0][0]
494
+ if node_type == :player
495
+ more_goals = parse_minutes( nodes )
496
+ ## hacky multi-line support for goals
497
+ ## using last_goal (1|2)
498
+ @last_goals == 2 ? goals2 += more_goals :
499
+ goals1 += more_goals
500
+ elsif node_type == :';' ## team separator
501
+ nodes.shift # eat-up
502
+ @last_goals = 2
503
+ elsif node_type == :none
504
+ nodes.shift # eat-up
505
+ else
506
+ puts "!! PARSE ERROR - unexpected node type in goals;; got #{node_type}:"
507
+ pp nodes
508
+ exit 1
509
+ end
510
+ end
364
511
 
365
- def try_parse_game( line )
366
- # note: clone line; for possible test do NOT modify in place for now
367
- # note: returns true if parsed, false if no match
368
- parse_game( line.dup )
369
- end
512
+ pp [goals1,goals2]
370
513
 
514
+ ## wrap in struct andd add/append to match
515
+ =begin
516
+ class GoalStruct
517
+ ######
518
+ # flat struct for goals - one entry per goals
519
+ attr_accessor :name
520
+ attr_accessor :team # 1 or 2 ? check/todo: add team1 or team2 flag?
521
+ attr_accessor :minute, :offset
522
+ attr_accessor :penalty, :owngoal
523
+ attr_accessor :score1, :score2 # gets calculated
524
+ =end
371
525
 
372
- def parse_game( line )
373
- logger.debug "parsing game (fixture) line: >#{line}<"
374
-
375
- ## split by geo (@)
376
- ## split into parts e.g. break using @ !!!
377
- values = line.split( '@' )
378
-
379
- ## for now pass along ground, city (timezone) as string as is
380
- ## parse (map) later - why? why not??
381
- ### check for ground/stadium and cities
382
- ground = if values.size == 1
383
- nil ## no stadium
384
- elsif values.size == 2 # bingo!!!
385
- ## process stadium, city (timezone) etc.
386
- ## for now keep it simple - pass along "unparsed" all-in-one
387
- values[1].gsub( /[ \t]+/, ' ').strip ## squish
388
- else
389
- puts "!! ERROR - too many @-markers found in line:"
390
- puts line
391
- exit 1
392
- end
526
+ goals = []
527
+ goals1.each do |rec|
528
+ goal = Import::Goal.new(
529
+ player: rec[:name],
530
+ team: 1,
531
+ minute: rec[:minute],
532
+ offset: rec[:offset],
533
+ penalty: rec[:pen] || false, # note: pass along/use false NOT nil
534
+ owngoal: rec[:og] || false
535
+ )
536
+ goals << goal
537
+ end
538
+ goals2.each do |rec|
539
+ goal = Import::Goal.new(
540
+ player: rec[:name],
541
+ team: 2,
542
+ minute: rec[:minute],
543
+ offset: rec[:offset],
544
+ penalty: rec[:pen] || false, # note: pass along/use false NOT nil
545
+ owngoal: rec[:og] || false
546
+ )
547
+ goals << goal
548
+ end
393
549
 
550
+ pp goals
394
551
 
395
- line = values[0]
552
+ ## quick & dirty - auto add goals to last match
553
+ ## note - for hacky (quick& dirty) multi-line support
554
+ ## always append for now
555
+ match = @matches[-1]
556
+ match.goals ||= []
557
+ match.goals += goals
396
558
 
397
- @mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
398
- teams = @mapper_teams.find_teams!( line )
399
- team1 = teams[0]
400
- team2 = teams[1]
559
+ ## todo/fix
560
+ ## sort by minute
561
+ ## PLUS auto-fill score1,score2 - why? why not?
562
+ end
401
563
 
402
- ## note: if we do NOT find two teams; return false - no match found
403
- if team1.nil? || team2.nil?
404
- logger.debug " no game match (two teams required) found for line: >#{line}<"
405
- return false
406
- end
407
564
 
408
- ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
409
- ## or [cancelled] or [postponed] etc.
410
- status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
565
+ def parse_match( nodes )
566
+ logger.debug( "parse match: >#{nodes}<" )
567
+
568
+ ## collect (possible) nodes by type
569
+ num = nil
570
+ date = nil
571
+ time = nil
572
+ teams = []
573
+ score = nil
574
+ more = []
575
+
576
+ while !nodes.empty?
577
+ node = nodes.shift
578
+ node_type = node[0]
579
+
580
+ if node_type == :num
581
+ num = node[1]
582
+ elsif node_type == :date
583
+ ## note: date wipes out/clear time
584
+ ## time MUST always come after date
585
+ time = nil
586
+ date = _build_date( m: node[2][:m],
587
+ d: node[2][:d],
588
+ y: node[2][:y],
589
+ start: @start )
590
+ elsif node_type == :time
591
+ ## note - there's no time (-only) type in ruby
592
+ ## use string (e.g. '14:56', '1:44')
593
+ ## use 01:44 or 1:44 ?
594
+ ## check for 0:00 or 24:00 possible?
595
+ time = '%d:%02d' % [node[2][:h], node[2][:m]]
596
+ elsif node_type == :team
597
+ teams << node[1]
598
+ elsif node_type == :score
599
+ ### todo/fix
600
+ ## add keywords (e.g. ht, ft or such) to Score.new - why? why not?
601
+ ## or use new Score.build( ht:, ft:, ) or such - why? why not?
602
+ ht = node[2][:ht] || [nil,nil]
603
+ ft = node[2][:ft] || [nil,nil]
604
+ et = node[2][:et] || [nil,nil]
605
+ p = node[2][:p] || [nil,nil]
606
+ values = [*ht, *ft, *et, *p]
607
+ ## pp values
608
+
609
+ score = Score.new( *values )
610
+ ## pp score
611
+ elsif node_type == :vs
612
+ ## skip; do nothing
613
+ ##
614
+ ## todo - add ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
615
+ ## or [cancelled] or [postponed] etc.
616
+ ## status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
617
+
618
+ elsif node_type == :'@' ||
619
+ node_type == :',' ||
620
+ node_type == :geo
621
+ ## e.g.
622
+ ## [:"@"], [:geo, "Stade de France"], [:","], [:geo, "Saint-Denis"]]
623
+ more << node[1] if node_type == :geo
624
+ else
625
+ puts "!! PARSE ERROR - unexpected node type #{node_type} in match line; got:"
626
+ pp node
627
+ exit 1
628
+ end
629
+ end
411
630
 
412
- ## pos = find_game_pos!( line )
413
631
 
414
- date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
632
+ if teams.size != 2
633
+ puts "!! PARSE ERROR - expected two teams; got #{teams.size}:"
634
+ pp teams
635
+ exit 1
636
+ end
415
637
 
416
- ## todo/fix:
417
- ## add support for find_time! e.g. 21.00 (or 21:00 ?)
638
+ team1 = teams[0]
639
+ team2 = teams[1]
418
640
 
641
+ @teams[ team1 ] += 1
642
+ @teams[ team2 ] += 1
419
643
 
420
644
 
421
645
  ###
@@ -424,14 +648,17 @@ class MatchParser ## simple match parser for team match schedules
424
648
  if date
425
649
  ### check: use date_v2 if present? why? why not?
426
650
  @last_date = date # keep a reference for later use
651
+ @last_time = nil
652
+ # @last_time = nil
427
653
  else
428
654
  date = @last_date # no date found; (re)use last seen date
429
655
  end
430
656
 
431
-
432
- score = find_score!( line )
433
-
434
- logger.debug " line: >#{line}<"
657
+ if time
658
+ @last_time = time
659
+ else
660
+ time = @last_time
661
+ end
435
662
 
436
663
 
437
664
  round = nil
@@ -451,20 +678,19 @@ class MatchParser ## simple match parser for team match schedules
451
678
  end
452
679
  end
453
680
  if round.nil?
454
- puts "!! ERROR - no matching round found for match date:"
681
+ puts "!! PARSE ERROR - no matching round found for match date:"
455
682
  pp date
456
683
  exit 1
457
684
  end
458
685
  end
459
686
  end
460
687
 
461
-
462
688
  ## todo/check: scores are integers or strings?
463
689
 
464
690
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
465
691
 
466
-
467
692
  ## split date in date & time if DateTime
693
+ =begin
468
694
  time_str = nil
469
695
  date_str = nil
470
696
  if date.is_a?( DateTime )
@@ -473,9 +699,21 @@ class MatchParser ## simple match parser for team match schedules
473
699
  elsif date.is_a?( Date )
474
700
  date_str = date.strftime('%Y-%m-%d')
475
701
  else # assume date is nil
476
- end
702
+ end
703
+ =end
704
+
705
+ time_str = nil
706
+ date_str = nil
707
+
708
+ date_str = date.strftime('%Y-%m-%d') if date
709
+ time_str = time if date && time
710
+
711
+
712
+ status = nil
713
+ ground = nil
477
714
 
478
- @matches << Import::Match.new( date: date_str,
715
+ @matches << Import::Match.new( num: num,
716
+ date: date_str,
479
717
  time: time_str,
480
718
  team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
481
719
  team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
@@ -486,212 +724,10 @@ class MatchParser ## simple match parser for team match schedules
486
724
  ground: ground )
487
725
  ### todo: cache team lookups in hash?
488
726
 
489
- =begin
490
- team1 = Team.find_by_key!( team1_key )
491
- team2 = Team.find_by_key!( team2_key )
492
-
493
- @last_team1 = team1 # store for later use for goals etc.
494
- @last_team2 = team2
495
-
496
-
497
- if @round.nil?
498
- ## no round header found; calculate round from date
499
-
500
- ###
501
- ## todo/fix: add some unit tests for round look up
502
- # fix: use date_v2 if present!! (old/original date; otherwise use date)
503
-
504
- #
505
- # fix: check - what to do with hours e.g. start_at use 00:00 and for end_at use 23.59 ??
506
- # -- for now - remove hours (e.g. use end_of_day and beginnig_of_day)
507
-
508
- ##
509
- # note: start_at and end_at are dates ONLY (note datetime)
510
- # - do NOT pass in hours etc. in query
511
- # again use --> date.end_of_day, date.beginning_of_day
512
- # new: not working: date.to_date, date.to_date
513
- # will not find round if start_at same as date !! (in theory hours do not matter)
514
-
515
- ###
516
- # hack:
517
- # special case for sqlite3 (date compare not working reliable; use casts)
518
- # fix: move to adapter_name to activerecord_utils as sqlite? or similar?
519
-
520
- if ActiveRecord::Base.connection.adapter_name.downcase.starts_with?( 'sqlite' )
521
- logger.debug( " [sqlite] using sqlite-specific query for date compare for rounds finder" )
522
- round = Round.where( 'event_id = ? AND ( julianday(start_at) <= julianday(?)'+
523
- 'AND julianday(end_at) >= julianday(?))',
524
- @event.id, date.to_date, date.to_date).first
525
- else # all other dbs (postgresql, mysql, etc.)
526
- round = Round.where( 'event_id = ? AND (start_at <= ? AND end_at >= ?)',
527
- @event.id, date.to_date, date.to_date).first
528
- end
529
-
530
- pp round
531
- if round.nil?
532
- logger.warn( " !!!! no round match found for date #{date}" )
533
- pp Round.all
534
-
535
- ###################################
536
- # -- try auto-adding matchday
537
- round = Round.new
538
-
539
- round_attribs = {
540
- event_id: @event.id,
541
- name: "Matchday #{date.to_date}",
542
- pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
543
- start_at: date.to_date,
544
- end_at: date.to_date
545
- }
546
-
547
- logger.info( " auto-add round >Matchday #{date.to_date}<" )
548
- logger.debug round_attribs.to_json
549
-
550
- round.update_attributes!( round_attribs )
551
-
552
- @patch_round_ids_pos << round.id # todo/check - add just id or "full" record as now - why? why not?
553
- end
554
-
555
- # store pos for auto-number next round if missing
556
- # - note: only if greater/bigger than last; use max
557
- # - note: last_round_pos might be nil - thus set to 0
558
- if round.pos > 999000
559
- # note: do NOT update last_round_pos for to-be-patched rounds
560
- else
561
- @last_round_pos = [round.pos,@last_round_pos||0].max
562
- end
563
-
564
- ## note: will crash (round.pos) if round is nil
565
- logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
566
- else
567
- ## use round from last round header
568
- round = @round
569
- end
570
-
571
-
572
- ### check if games exists
573
- ## with this teams in this round if yes only update
574
- game = Game.find_by_round_id_and_team1_id_and_team2_id(
575
- round.id, team1.id, team2.id
576
- )
577
-
578
- game_attribs = {
579
- score1i: scores[0],
580
- score2i: scores[1],
581
- score1: scores[2],
582
- score2: scores[3],
583
- score1et: scores[4],
584
- score2et: scores[5],
585
- score1p: scores[6],
586
- score2p: scores[7],
587
- play_at: date,
588
- play_at_v2: date_v2,
589
- postponed: postponed,
590
- knockout: round.knockout, ## note: for now always use knockout flag from round - why? why not??
591
- ground_id: ground.present? ? ground.id : nil,
592
- group_id: @group.present? ? @group.id : nil
593
- }
594
-
595
- game_attribs[ :pos ] = pos if pos.present?
596
-
597
- ####
598
- # note: only update if any changes (or create if new record)
599
- if game.present? &&
600
- game.check_for_changes( game_attribs ) == false
601
- logger.debug " skip update game #{game.id}; no changes found"
602
- else
603
- if game.present?
604
- logger.debug "update game #{game.id}:"
605
- else
606
- logger.debug "create game:"
607
- game = Game.new
608
-
609
- more_game_attribs = {
610
- round_id: round.id,
611
- team1_id: team1.id,
612
- team2_id: team2.id
613
- }
614
-
615
- ## NB: use round.games.count for pos
616
- ## lets us add games out of order if later needed
617
- more_game_attribs[ :pos ] = round.games.count+1 if pos.nil?
618
-
619
- game_attribs = game_attribs.merge( more_game_attribs )
620
- end
621
-
622
- logger.debug game_attribs.to_json
623
- game.update_attributes!( game_attribs )
624
- end
625
-
626
- @last_game = game # store for later reference (e.g. used for goals etc.)
627
- =end
628
-
629
- return true # game match found
630
- end # method parse_game
631
-
632
-
633
-
634
- def try_parse_date_header( line )
635
- # note: clone line; for possible test do NOT modify in place for now
636
- # note: returns true if parsed, false if no match
637
- parse_date_header( line.dup )
638
- end
639
-
640
- def find_date!( line, start: )
641
- ## NB: lets us pass in start_at/end_at date (for event)
642
- # for auto-complete year
643
-
644
- # extract date from line
645
- # and return it
646
- # NB: side effect - removes date from line string
647
- DateFormats.find!( line, start: start )
727
+ ## hacky goals support
728
+ ### reset/toggle 1/2
729
+ @last_goals = 1
648
730
  end
649
-
650
-
651
- def parse_date_header( line )
652
- # note: returns true if parsed, false if no match
653
-
654
- # line with NO teams plus include date e.g.
655
- # [Fri Jun/17] or
656
- # Jun/17 or
657
- # Jun/17: etc.
658
-
659
- @mapper_teams.map_teams!( line )
660
- teams = @mapper_teams.find_teams!( line )
661
- team1 = teams[0]
662
- team2 = teams[1]
663
-
664
- date = find_date!( line, start: @start )
665
-
666
- if date && team1.nil? && team2.nil?
667
- logger.debug( "date header line found: >#{line}<")
668
- logger.debug( " date: #{date} with start: #{@start}")
669
-
670
- @last_date = date # keep a reference for later use
671
-
672
- ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
673
- ## find a better way??
674
- ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
675
- ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
676
- ### todo/check: just turn on for 2019/20 season or always? why? why not?
677
-
678
- ## todo/fix: add switch back to old @start_org
679
- ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
680
- if @start.month != 1
681
- if date.year == @start.year+1
682
- logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
683
- @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
684
- @start = Date.new( @start.year+1, 1, 1 )
685
- end
686
- end
687
-
688
- true
689
- else
690
- false
691
- end
692
- end
693
-
694
-
695
-
696
731
  end # class MatchParser
697
732
  end # module SportDb
733
+