sportdb-formats 1.2.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,38 +1,37 @@
1
1
 
2
2
  module SportDb
3
3
 
4
- class MatchParser ## simple match parser for team match schedules
4
+ class MatchParser ## simple match parser for team match schedules
5
5
 
6
- def self.parse( lines, teams, start: )
6
+
7
+ def self.parse( lines, start: )
7
8
  ## todo/fix: add support for txt and lines
8
9
  ## check if lines_or_txt is an array or just a string
9
10
  ## use teams: like start: why? why not?
10
- parser = new( lines, teams, start )
11
+ parser = new( lines, start )
11
12
  parser.parse
12
13
  end
13
14
 
14
15
 
15
16
  include Logging ## e.g. logger#debug, logger#info, etc.
16
- include ParserHelper ## e.g. read_lines, etc.
17
17
 
18
+ def self.debug=(value) @@debug = value; end
19
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
20
+ def debug?() self.class.debug?; end
18
21
 
19
- def initialize( lines, teams, start )
20
- # for convenience split string into lines
21
- ## note: removes/strips empty lines
22
- ## todo/check: change to text instead of array of lines - why? why not?
23
-
24
- ## note - wrap in enumerator/iterator a.k.a lines reader
25
- @lines = LinesReader.new( lines.is_a?( String ) ?
26
- read_lines( lines ) :
27
- lines
28
- )
29
-
30
- @mapper_teams = TeamMapper.new( teams )
31
- @start = start
32
- end
22
+ def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
23
+ ## returns an array of lines with comments and empty lines striped / removed
24
+ lines = []
25
+ txt.each_line do |line| ## preprocess
26
+ line = line.strip
33
27
 
28
+ next if line.empty? || line.start_with?('#') ### skip empty lines and comments
29
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
30
+ lines << line
31
+ end
32
+ lines
33
+ end
34
34
 
35
-
36
35
 
37
36
  ## note: colon (:) MUST be followed by one (or more) spaces
38
37
  ## make sure mon feb 12 18:10 will not match
@@ -45,7 +44,7 @@ class MatchParser ## simple match parser for team match schedules
45
44
  ## Group B: - remove colon
46
45
  ## or lookup first
47
46
 
48
- ATTRIB_REGEX = /^
47
+ ATTRIB_RE = %r{^
49
48
  [ ]*? # slurp leading spaces
50
49
  (?<key>[^:|\]\[()\/; -]
51
50
  [^:|\]\[()\/;]{0,30}
@@ -55,96 +54,185 @@ class MatchParser ## simple match parser for team match schedules
55
54
  (?<value>.+)
56
55
  [ ]*? # slurp trailing spaces
57
56
  $
58
- /ix
57
+ }ix
58
+
59
+ #
60
+ # todo/fix: change start to start: too!!!
61
+ # might be optional in the future!! - why? why not?
62
+
63
+ def initialize( lines, start )
64
+ # for convenience split string into lines
65
+ ## note: removes/strips empty lines
66
+ ## todo/check: change to text instead of array of lines - why? why not?
67
+
68
+ ## note - wrap in enumerator/iterator a.k.a lines reader
69
+ @lines = lines.is_a?( String ) ?
70
+ _read_lines( lines ) : lines
71
+
72
+ @start = start
73
+ end
59
74
 
60
75
 
61
76
  def parse
62
77
  @last_date = nil
78
+ @last_time = nil
63
79
  @last_round = nil
64
80
  @last_group = nil
65
81
 
82
+ ## last_goals - rename to (longer) @last_team_goals or such - why? why not?
83
+ @last_goals = 1 ## toggle between 1|2 - hacky (quick & dirty) support for multi-line goals, fix soon!
84
+
85
+ @teams = Hash.new(0) ## track counts (only) for now for (interal) team stats - why? why not?
66
86
  @rounds = {}
67
87
  @groups = {}
68
88
  @matches = []
69
89
 
70
90
  @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
71
91
 
72
- ## todo/fix - use @lines.rewind first here - why? why not?
73
- @lines.each do |line|
74
-
75
- if is_round_def?( line )
92
+
93
+
94
+ @parser = Parser.new
95
+
96
+ @errors = []
97
+ @tree = []
98
+
99
+ attrib_found = false
100
+
101
+ @lines.each_with_index do |line,i|
102
+
103
+ if debug?
104
+ puts
105
+ puts "line >#{line}<"
106
+ end
107
+
108
+ ## skip new (experimental attrib syntax)
109
+ if attrib_found == false &&
110
+ ATTRIB_RE.match?( line )
111
+ ## note: check attrib regex AFTER group def e.g.:
112
+ ## Group A:
113
+ ## Group B: etc.
114
+ ## todo/fix - change Group A: to Group A etc.
115
+ ## Group B: to Group B
116
+ attrib_found = true
117
+ ## logger.debug "skipping key/value line - >#{line}<"
118
+ next
119
+ end
120
+
121
+ if attrib_found
122
+ ## check if line ends with dot
123
+ ## if not slurp up lines to the next do!!!
124
+ ## logger.debug "skipping key/value line - >#{line}<"
125
+ attrib_found = false if line.end_with?( '.' )
126
+ # logger.debug "skipping key/value line (cont.) - >#{line}<"
127
+ next
128
+ end
129
+
130
+ t, error_messages = @parser.parse_with_errors( line )
131
+
132
+
133
+ if error_messages.size > 0
134
+ ## add to "global" error list
135
+ ## make a triplet tuple (file / msg / line text)
136
+ error_messages.each do |msg|
137
+ @errors << [ '<file>', ## add filename here
138
+ msg,
139
+ line
140
+ ]
141
+ end
142
+ end
143
+
144
+ pp t if debug?
145
+
146
+ @tree << t
147
+ end # each lines
148
+
149
+ ## pp @tree
150
+
151
+ ## report parse errors here - why? why not?
152
+
153
+
154
+
155
+ @tree.each do |nodes|
156
+
157
+ node_type = nodes[0][0] ## get node type of first/head node
158
+
159
+ if node_type == :round_def
76
160
  ## todo/fix: add round definition (w begin n end date)
77
161
  ## todo: do not patch rounds with definition (already assume begin/end date is good)
78
162
  ## -- how to deal with matches that get rescheduled/postponed?
79
- parse_round_def( line )
80
- elsif is_round?( line )
81
- parse_round_header( line )
82
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
163
+ parse_round_def( nodes )
164
+ elsif node_type == :group_def ## NB: group goes after round (round may contain group marker too)
83
165
  ### todo: add pipe (|) marker (required)
84
- parse_group_def( line )
85
- elsif is_group?( line )
86
- ## -- lets you set group e.g. Group A etc.
87
- parse_group_header( line )
88
-
89
- elsif m=ATTRIB_REGEX.match( line )
90
- ## note: check attrib regex AFTER group def e.g.:
91
- ## Group A:
92
- ## Group B: etc.
93
- ## todo/fix - change Group A: to Group A etc.
94
- ## Group B: to Group B
95
-
96
- ## check if line ends with dot
97
- ## if not slurp up lines to the next do!!!
98
- logger.debug "skipping key/value line - >#{line}<"
99
- while !line.end_with?( '.' ) || line.nil? do
100
- line = @lines.next
101
- logger.debug "skipping key/value line (cont.) - >#{line}<"
102
- end
103
- elsif is_goals?( line )
104
- ## note - goals must be AFTER attributes!!!
105
- logger.debug "matched goals line: >#{line}<"
106
- logger.debug " try parse:"
107
-
108
- goals = GoalsFinder.new.find!( line )
109
- pp goals
110
- ## quick & dirty - auto add goals to last match
111
- match = @matches[-1]
112
- match.goals = goals
113
-
114
- elsif try_parse_game( line )
115
- # do nothing here
116
- elsif try_parse_date_header( line )
117
- # do nothing here
118
- else
119
- logger.warn "skipping line (no match found): >#{line}<"
120
- @warns << line
166
+ parse_group_def( nodes )
167
+
168
+ elsif node_type == :player ||
169
+ node_type == :none # e.g [[:none], [:";"], [:player, "Xhaka"],...]
170
+ ## note - for now goals line MUST start with player!!
171
+ parse_goals( nodes )
172
+ else
173
+ ## try to be liberal/flexible
174
+ ## eat-up nodes as we go
175
+ ## assume match with group / round header
176
+ ## etc. on its own line or not
177
+
178
+ ## preprocess possible before match nodes
179
+
180
+ while !nodes.empty? do
181
+ node_type = nodes[0][0] ## get node type of first/head node
182
+ if node_type == :round
183
+ node = nodes.shift ## eat-up
184
+ parse_round_header( node )
185
+ elsif node_type == :leg
186
+ node = nodes.shift ## eat-up
187
+ ## ignore (round) leg for now - add later leg - 1|2|3 etc!!!
188
+ ## needs to get added to db/schema too!!!!
189
+ ## add @last_leg = nil or 1|2|3 etc.
190
+ elsif node_type == :group
191
+ ## -- lets you set group e.g. Group A etc.
192
+ node = nodes.shift ## eat-up
193
+ parse_group_header( node )
194
+ elsif node_type == :date
195
+ node = nodes.shift ## eat-up
196
+ parse_date_header( node )
197
+ ## add time here too - why? why not?
198
+ ## add skip comma separator here too - why? why not?
199
+ ## "slurp-up" in upstream parser?
200
+ ## e.g. round, group or group, round ?
201
+ else
202
+ break
203
+ end
204
+ end
205
+ next if nodes.empty?
206
+
207
+ ## rename to try_parse_match - why? why not?
208
+ parse_match( nodes )
121
209
  end
122
- end # lines.each
123
210
 
124
- [@matches, @rounds.values, @groups.values]
211
+ end # tree.each
212
+
213
+ ## note - team keys are names and values are "internal" stats!!
214
+ ## and NOT team/club/nat_team structs!!
215
+ [@teams.keys, @matches, @rounds.values, @groups.values]
125
216
  end # method parse
126
217
 
127
218
 
128
219
 
129
- def parse_group_header( line )
130
- logger.debug "parsing group header line: >#{line}<"
220
+ def parse_group_header( node )
221
+ logger.debug "parsing group header: >#{node}<"
131
222
 
132
223
  # note: group header resets (last) round (allows, for example):
133
224
  # e.g.
134
225
  # Group Playoffs/Replays -- round header
135
226
  # team1 team2 -- match
136
- # Group B: -- group header
227
+ # Group B -- group header
137
228
  # team1 team2 - match (will get new auto-matchday! not last round)
138
229
  @last_round = nil
139
230
 
140
- name = find_group_name!( line )
141
-
142
- logger.debug " name: >#{name}<"
143
- logger.debug " line: >#{line}<"
231
+ name = node[1]
144
232
 
145
233
  group = @groups[ name ]
146
234
  if group.nil?
147
- puts "!! ERROR - no group def found for >#{name}<"
235
+ puts "!! PARSE ERROR - no group def found for >#{name}<"
148
236
  exit 1
149
237
  end
150
238
 
@@ -152,63 +240,99 @@ class MatchParser ## simple match parser for team match schedules
152
240
  @last_group = group
153
241
  end
154
242
 
155
- def parse_group_def( line )
156
- logger.debug "parsing group def line: >#{line}<"
157
243
 
158
- @mapper_teams.map_teams!( line )
159
- teams = @mapper_teams.find_teams!( line )
244
+ def parse_group_def( nodes )
245
+ logger.debug "parsing group def: >#{nodes}<"
246
+
247
+ ## e.g
248
+ ## [:group_def, "Group A"],
249
+ ## [:team, "Germany"],
250
+ ## [:team, "Scotland"],
251
+ ## [:team, "Hungary"],
252
+ ## [:team, "Switzerland"]
160
253
 
161
- name = find_group_name!( line )
254
+ node = nodes[0]
255
+ name = node[1] ## group name
162
256
 
163
- logger.debug " line: >#{line}<"
257
+ teams = nodes[1..-1].map do |node|
258
+ if node[0] == :team
259
+ team = node[1]
260
+ @teams[ team ] += 1
261
+ team
262
+ else
263
+ puts "!! PARSE ERROR - only teams expected in group def; got:"
264
+ pp nodes
265
+ exit 1
266
+ end
267
+ end
164
268
 
165
269
  ## todo/check/fix: add back group key - why? why not?
166
270
  group = Import::Group.new( name: name,
167
- teams: teams.map {|team| team.name } )
271
+ teams: teams )
168
272
 
169
273
  @groups[ name ] = group
170
274
  end
171
275
 
172
276
 
173
- def find_group_name!( line )
174
- ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
175
- ## nb: (?:) = is for non-capturing group(ing)
176
-
177
- ## fix:
178
- ## get Group|Gruppe|Grupo from lang!!!! do NOT hardcode in place
179
-
180
- ## todo:
181
- ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
182
- regex = /\b
183
- (?:
184
- (Group | Gruppe | Grupo)
185
- [ ]+
186
- (\d+ | [A-Z]+)
187
- )
188
- \b/x
277
+ def _build_date( m:, d:, y:, start: )
189
278
 
190
- m = regex.match( line )
191
279
 
192
- return nil if m.nil?
280
+ ## quick debug hack
281
+ if m == 2 && d == 29
282
+ puts "quick check feb/29 dates"
283
+ pp [d,m,y]
284
+ pp start
285
+ end
193
286
 
194
- name = m[0]
287
+ if y.nil? ## try to calculate year
288
+ y = if m > start.month ||
289
+ (m == start.month && d >= start.day)
290
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
291
+ start.year
292
+ else
293
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
294
+ start.year+1
295
+ end
296
+ end
195
297
 
196
- logger.debug " name: >#{name}<"
197
298
 
198
- line.sub!( name, '[GROUP.NAME]' )
199
299
 
200
- name
300
+ Date.new( y,m,d ) ## y,m,d
201
301
  end
202
302
 
303
+ def parse_round_def( nodes )
304
+ logger.debug "parsing round def: >#{nodes}<"
203
305
 
204
- def parse_round_def( line )
205
- logger.debug "parsing round def line: >#{line}<"
206
-
207
- start_date = find_date!( line, start: @start )
208
- end_date = find_date!( line, start: @start )
306
+ ## e.g. [[:round_def, "Matchday 1"], [:duration, "Fri Jun/14 - Tue Jun/18"]]
307
+ ## [[:round_def, "Matchday 2"], [:duration, "Wed Jun/19 - Sat Jun/22"]]
308
+ ## [[:round_def, "Matchday 3"], [:duration, "Sun Jun/23 - Wed Jun/26"]]
209
309
 
210
- # note: if end_date missing -- assume start_date is (==) end_at
211
- end_date = start_date if end_date.nil?
310
+ node = nodes[0]
311
+ name = node[1]
312
+ # NB: use extracted round name for knockout check
313
+ # knockout_flag = is_knockout_round?( name )
314
+
315
+ node = nodes[1]
316
+ node_type = node[0]
317
+ if node_type == :date
318
+ start_date = end_date = _build_date( m: node[2][:m],
319
+ d: node[2][:d],
320
+ y: node[2][:y],
321
+ start: @start)
322
+ elsif node_type == :duration
323
+ start_date = _build_date( m: node[2][:start][:m],
324
+ d: node[2][:start][:d],
325
+ y: node[2][:start][:y],
326
+ start: @start)
327
+ end_date = _build_date( m: node[2][:end][:m],
328
+ d: node[2][:end][:d],
329
+ y: node[2][:end][:y],
330
+ start: @start)
331
+ else
332
+ puts "!! PARSE ERROR - expected date or duration for round def; got:"
333
+ pp nodes
334
+ exit 1
335
+ end
212
336
 
213
337
  # note: - NOT needed; start_at and end_at are saved as date only (NOT datetime)
214
338
  # set hours,minutes,secs to beginning and end of day (do NOT use default 12.00)
@@ -218,13 +342,15 @@ class MatchParser ## simple match parser for team match schedules
218
342
 
219
343
  # note: make sure start_at/end_at is date only (e.g. use start_at.to_date)
220
344
  # sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
221
- start_date = start_date.to_date
222
- end_date = end_date.to_date
223
345
 
346
+ # note - _build_date always returns Date for now - no longer needed!!
347
+ # start_date = start_date.to_date
348
+ # end_date = end_date.to_date
224
349
 
225
- name = find_round_def_name!( line )
226
- # NB: use extracted round name for knockout check
227
- knockout_flag = is_knockout_round?( name )
350
+
351
+ ## fix:
352
+ ## remove knockout_flag - why? why not?
353
+ knockout_flag = false
228
354
 
229
355
 
230
356
  logger.debug " start_date: #{start_date}"
@@ -232,8 +358,6 @@ class MatchParser ## simple match parser for team match schedules
232
358
  logger.debug " name: >#{name}<"
233
359
  logger.debug " knockout_flag: #{knockout_flag}"
234
360
 
235
- logger.debug " line: >#{line}<"
236
-
237
361
  round = Import::Round.new( name: name,
238
362
  start_date: start_date,
239
363
  end_date: end_date,
@@ -244,204 +368,278 @@ class MatchParser ## simple match parser for team match schedules
244
368
  end
245
369
 
246
370
 
371
+ def parse_round_header( node )
372
+ logger.debug "parsing round header: >#{node}<"
247
373
 
248
- def find_round_def_name!( line )
249
- # assume everything before pipe (\) is the round name
250
- # strip [ROUND.POS], todo:?? [ROUND.NAME2]
251
-
252
- # todo/fix: add name2 w/ // or / why? why not?
253
- # -- strip / or / chars
374
+ name = node[1]
254
375
 
255
- buf = line.dup
256
- logger.debug " find_round_def_name! line-before: >>#{buf}<<"
376
+ # name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
377
+ # name = name.strip
257
378
 
258
- ## cut-off everything after (including) pipe (|)
259
- buf = buf[ 0...buf.index('|') ]
260
- buf.strip!
261
-
262
- logger.debug " find_round_def_name! line-after: >>#{buf}<<"
379
+ round = @rounds[ name ]
380
+ if round.nil? ## auto-add / create if missing
381
+ ## todo/check: add num (was pos) if present - why? why not?
382
+ round = Import::Round.new( name: name )
383
+ @rounds[ name ] = round
384
+ end
263
385
 
264
- logger.debug " name: >>#{buf}<<"
265
- line.sub!( buf, '[ROUND.NAME]' )
386
+ ## todo/check: if pos match (MUST always match for now)
387
+ @last_round = round
388
+ @last_group = nil # note: reset group to no group - why? why not?
266
389
 
267
- buf
390
+ ## todo/fix/check
391
+ ## make round a scope for date(time) - why? why not?
392
+ ## reset date/time e.g. @last_date = nil !!!!
268
393
  end
269
394
 
395
+ def parse_date_header( node )
396
+ logger.debug( "date header: >#{node}<")
270
397
 
271
- ## split by or || or |||
272
- ## or ++ or +++
273
- ## or -- or ---
274
- ## or // or ///
275
- ## note: allow Final | First Leg as ONE name same as
276
- ## Final - First Leg or
277
- ## Final, First Leg
278
- ## for cut-off always MUST be more than two chars
279
- ##
280
- ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
281
- ## todo/fix: move to parser utils and add a method split_name or such?
282
- HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
283
- (?:\|{2,} |
284
- \+{2,} |
285
- -{2,} |
286
- \/{2,}
287
- )
288
- [ ]* ## allow (strip) trailing spaces
289
- /x
398
+ date = _build_date( m: node[2][:m],
399
+ d: node[2][:d],
400
+ y: node[2][:y],
401
+ start: @start )
290
402
 
291
- def find_round_header_name!( line )
292
- # assume everything left is the round name
293
- # extract all other items first (round name2, round pos, group name n pos, etc.)
294
-
295
- buf = line.dup
296
- logger.debug " find_round_header_name! line-before: >>#{buf}<<"
403
+ logger.debug( " date: #{date} with start: #{@start}")
297
404
 
405
+ @last_date = date # keep a reference for later use
406
+ @last_time = nil
298
407
 
299
- parts = buf.split( HEADER_SEP_RE )
300
- buf = parts[0]
408
+ ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
409
+ ## find a better way??
410
+ ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
411
+ ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
412
+ ### todo/check: just turn on for 2019/20 season or always? why? why not?
301
413
 
302
- buf.strip! # remove leading and trailing whitespace
414
+ ## todo/fix: add switch back to old @start_org
415
+ ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
416
+ =begin
417
+ if @start.month != 1
418
+ if date.year == @start.year+1
419
+ logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
420
+ @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
421
+ @start = Date.new( @start.year+1, 1, 1 )
422
+ end
423
+ end
424
+ =end
425
+ end
303
426
 
304
- logger.debug " find_round_name! line-after: >>#{buf}<<"
427
+ def parse_minutes( nodes )
428
+ ## parse goals by player
429
+ ## may have multiple minutes!!
430
+ goals = []
305
431
 
306
- ### bingo - assume what's left is the round name
432
+ node = nodes.shift ## get player
433
+ name = node[1]
307
434
 
308
- logger.debug " name: >>#{buf}<<"
309
- line.sub!( buf, '[ROUND.NAME]' )
435
+ loop do
436
+ goal = {}
437
+ goal[:name] = name
310
438
 
311
- buf
312
- end
439
+ node_type = nodes[0][0]
440
+ if node_type != :minute
441
+ puts "!! PARSE ERROR - minute expected to follow player (in goal); got #{node_type}:"
442
+ pp nodes
443
+ exit 1
444
+ end
313
445
 
314
- ## quick hack- collect all "fillwords" by language!!!!
315
- ## change later and add to sportdb-langs!!!!
316
- ##
317
- ## strip all "fillwords" e.g.:
318
- ## Nachtrag/Postponed/Addition/Supplemento names
319
- ##
320
- ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
321
- ROUND_EXTRA_WORDS_RE = /\b(?:
322
- Nachtrag | ## de
323
- Postponed | ## en
324
- Addition | ## en
325
- Supplemento ## es
326
- )
327
- \b/ix
446
+ node = nodes.shift
447
+ goal[:minute] = node[2][:m]
448
+ goal[:offset] = node[2][:offset] if node[2][:offset]
449
+
450
+ ## check for own goal or penalty or such
451
+ if !nodes.empty?
452
+ node_type = nodes[0][0]
453
+ if node_type == :og
454
+ nodes.shift
455
+ goal[:og] = true
456
+ elsif node_type == :pen
457
+ nodes.shift
458
+ goal[:pen] = true
459
+ else
460
+ # do nothing
461
+ end
462
+ end
328
463
 
329
- def parse_round_header( line )
330
- logger.debug "parsing round header line: >#{line}<"
464
+ goals << goal
331
465
 
332
- name = find_round_header_name!( line )
466
+ ## check if another minute ahead; otherwise break
467
+ break if nodes.empty?
333
468
 
334
- logger.debug " line: >#{line}<"
469
+ node_type = nodes[0][0]
335
470
 
336
- name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
337
- name = name.strip
471
+ ## Kane 39', 62', 67'
472
+ ## consume/eat-up (optional?) commas
473
+ if node_type == :','
474
+ nodes.shift
475
+ node_type = nodes[0][0]
476
+ end
338
477
 
339
- round = @rounds[ name ]
340
- if round.nil? ## auto-add / create if missing
341
- ## todo/check: add num (was pos) if present - why? why not?
342
- round = Import::Round.new( name: name )
343
- @rounds[ name ] = round
478
+ break if node_type != :minute
344
479
  end
345
480
 
346
- ## todo/check: if pos match (MUST always match for now)
347
- @last_round = round
348
- @last_group = nil # note: reset group to no group - why? why not?
481
+
482
+ goals
349
483
  end
350
484
 
351
485
 
352
- def find_score!( line )
353
- # note: always call after find_dates !!!
354
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
355
- # -- note: score might have two digits too
486
+ def parse_goals( nodes )
487
+ logger.debug "parse goals: >#{nodes}<"
488
+
489
+ goals1 = []
490
+ goals2 = []
491
+
492
+ while !nodes.empty?
493
+ node_type = nodes[0][0]
494
+ if node_type == :player
495
+ more_goals = parse_minutes( nodes )
496
+ ## hacky multi-line support for goals
497
+ ## using last_goal (1|2)
498
+ @last_goals == 2 ? goals2 += more_goals :
499
+ goals1 += more_goals
500
+ elsif node_type == :';' ## team separator
501
+ nodes.shift # eat-up
502
+ @last_goals = 2
503
+ elsif node_type == :none
504
+ nodes.shift # eat-up
505
+ else
506
+ puts "!! PARSE ERROR - unexpected node type in goals;; got #{node_type}:"
507
+ pp nodes
508
+ exit 1
509
+ end
510
+ end
356
511
 
357
- ScoreFormats.find!( line )
358
- end
512
+ pp [goals1,goals2]
359
513
 
360
- def find_status!( line )
361
- StatusParser.find!( line )
362
- end
514
+ ## wrap in struct andd add/append to match
515
+ =begin
516
+ class GoalStruct
517
+ ######
518
+ # flat struct for goals - one entry per goals
519
+ attr_accessor :name
520
+ attr_accessor :team # 1 or 2 ? check/todo: add team1 or team2 flag?
521
+ attr_accessor :minute, :offset
522
+ attr_accessor :penalty, :owngoal
523
+ attr_accessor :score1, :score2 # gets calculated
524
+ =end
363
525
 
526
+ goals = []
527
+ goals1.each do |rec|
528
+ goal = Import::Goal.new(
529
+ player: rec[:name],
530
+ team: 1,
531
+ minute: rec[:minute],
532
+ offset: rec[:offset],
533
+ penalty: rec[:pen] || false, # note: pass along/use false NOT nil
534
+ owngoal: rec[:og] || false
535
+ )
536
+ goals << goal
537
+ end
538
+ goals2.each do |rec|
539
+ goal = Import::Goal.new(
540
+ player: rec[:name],
541
+ team: 2,
542
+ minute: rec[:minute],
543
+ offset: rec[:offset],
544
+ penalty: rec[:pen] || false, # note: pass along/use false NOT nil
545
+ owngoal: rec[:og] || false
546
+ )
547
+ goals << goal
548
+ end
364
549
 
365
- ### todo/check - include (optional) leading space in regex - why? why not?
366
- NUM_RE = /^[ ]*\(
367
- (?<num>[0-9]{1,3})
368
- \)
369
- /x
370
-
371
- def find_num!( line )
372
- ## check for leading match number e.g.
373
- ## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
374
- m = line.match( NUM_RE )
375
- if m
376
- num = m[:num].to_i(10) ## allows 01/02/07 etc. -- why? why not?
377
- match_str = m[0]
378
- line.sub!( match_str, '[NUM]' )
379
- num
380
- else
381
- nil
382
- end
383
- end
550
+ pp goals
384
551
 
552
+ ## quick & dirty - auto add goals to last match
553
+ ## note - for hacky (quick& dirty) multi-line support
554
+ ## always append for now
555
+ match = @matches[-1]
556
+ match.goals ||= []
557
+ match.goals += goals
385
558
 
386
- def try_parse_game( line )
387
- # note: clone line; for possible test do NOT modify in place for now
388
- # note: returns true if parsed, false if no match
389
- parse_game( line.dup )
559
+ ## todo/fix
560
+ ## sort by minute
561
+ ## PLUS auto-fill score1,score2 - why? why not?
390
562
  end
391
563
 
392
564
 
393
- def parse_game( line )
394
- logger.debug "parsing game (fixture) line: >#{line}<"
395
-
396
- ## split by geo (@)
397
- ## split into parts e.g. break using @ !!!
398
- values = line.split( '@' )
399
-
400
- ## for now pass along ground, city (timezone) as string as is
401
- ## parse (map) later - why? why not??
402
- ### check for ground/stadium and cities
403
- ground = if values.size == 1
404
- nil ## no stadium
405
- elsif values.size == 2 # bingo!!!
406
- ## process stadium, city (timezone) etc.
407
- ## for now keep it simple - pass along "unparsed" all-in-one
408
- values[1].gsub( /[ \t]+/, ' ').strip ## squish
409
- else
410
- puts "!! ERROR - too many @-markers found in line:"
411
- puts line
412
- exit 1
413
- end
565
+ def parse_match( nodes )
566
+ logger.debug( "parse match: >#{nodes}<" )
567
+
568
+ ## collect (possible) nodes by type
569
+ num = nil
570
+ date = nil
571
+ time = nil
572
+ teams = []
573
+ score = nil
574
+ more = []
575
+
576
+ while !nodes.empty?
577
+ node = nodes.shift
578
+ node_type = node[0]
579
+
580
+ if node_type == :num
581
+ num = node[1]
582
+ elsif node_type == :date
583
+ ## note: date wipes out/clear time
584
+ ## time MUST always come after date
585
+ time = nil
586
+ date = _build_date( m: node[2][:m],
587
+ d: node[2][:d],
588
+ y: node[2][:y],
589
+ start: @start )
590
+ elsif node_type == :time
591
+ ## note - there's no time (-only) type in ruby
592
+ ## use string (e.g. '14:56', '1:44')
593
+ ## use 01:44 or 1:44 ?
594
+ ## check for 0:00 or 24:00 possible?
595
+ time = '%d:%02d' % [node[2][:h], node[2][:m]]
596
+ elsif node_type == :team
597
+ teams << node[1]
598
+ elsif node_type == :score
599
+ ### todo/fix
600
+ ## add keywords (e.g. ht, ft or such) to Score.new - why? why not?
601
+ ## or use new Score.build( ht:, ft:, ) or such - why? why not?
602
+ ht = node[2][:ht] || [nil,nil]
603
+ ft = node[2][:ft] || [nil,nil]
604
+ et = node[2][:et] || [nil,nil]
605
+ p = node[2][:p] || [nil,nil]
606
+ values = [*ht, *ft, *et, *p]
607
+ ## pp values
608
+
609
+ score = Score.new( *values )
610
+ ## pp score
611
+ elsif node_type == :vs
612
+ ## skip; do nothing
613
+ ##
614
+ ## todo - add ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
615
+ ## or [cancelled] or [postponed] etc.
616
+ ## status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
617
+
618
+ elsif node_type == :'@' ||
619
+ node_type == :',' ||
620
+ node_type == :geo
621
+ ## e.g.
622
+ ## [:"@"], [:geo, "Stade de France"], [:","], [:geo, "Saint-Denis"]]
623
+ more << node[1] if node_type == :geo
624
+ else
625
+ puts "!! PARSE ERROR - unexpected node type #{node_type} in match line; got:"
626
+ pp node
627
+ exit 1
628
+ end
629
+ end
414
630
 
415
631
 
416
- line = values[0]
632
+ if teams.size != 2
633
+ puts "!! PARSE ERROR - expected two teams; got #{teams.size}:"
634
+ pp teams
635
+ exit 1
636
+ end
417
637
 
418
- @mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
419
- teams = @mapper_teams.find_teams!( line )
420
638
  team1 = teams[0]
421
639
  team2 = teams[1]
422
640
 
423
- ## note: if we do NOT find two teams; return false - no match found
424
- if team1.nil? || team2.nil?
425
- logger.debug " no game match (two teams required) found for line: >#{line}<"
426
- return false
427
- end
428
-
429
-
430
- ## try optional match number e.g.
431
- ## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
432
- num = find_num!( line )
433
- ## pos = find_game_pos!( line )
434
-
435
- ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
436
- ## or [cancelled] or [postponed] etc.
437
- status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
438
-
439
-
440
- date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
441
-
442
- ## todo/fix:
443
- ## add support for find_time! e.g. 21.00 (or 21:00 ?)
444
-
641
+ @teams[ team1 ] += 1
642
+ @teams[ team2 ] += 1
445
643
 
446
644
 
447
645
  ###
@@ -450,14 +648,17 @@ class MatchParser ## simple match parser for team match schedules
450
648
  if date
451
649
  ### check: use date_v2 if present? why? why not?
452
650
  @last_date = date # keep a reference for later use
651
+ @last_time = nil
652
+ # @last_time = nil
453
653
  else
454
654
  date = @last_date # no date found; (re)use last seen date
455
655
  end
456
656
 
457
-
458
- score = find_score!( line )
459
-
460
- logger.debug " line: >#{line}<"
657
+ if time
658
+ @last_time = time
659
+ else
660
+ time = @last_time
661
+ end
461
662
 
462
663
 
463
664
  round = nil
@@ -477,20 +678,19 @@ class MatchParser ## simple match parser for team match schedules
477
678
  end
478
679
  end
479
680
  if round.nil?
480
- puts "!! ERROR - no matching round found for match date:"
681
+ puts "!! PARSE ERROR - no matching round found for match date:"
481
682
  pp date
482
683
  exit 1
483
684
  end
484
685
  end
485
686
  end
486
687
 
487
-
488
688
  ## todo/check: scores are integers or strings?
489
689
 
490
690
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
491
691
 
492
-
493
692
  ## split date in date & time if DateTime
693
+ =begin
494
694
  time_str = nil
495
695
  date_str = nil
496
696
  if date.is_a?( DateTime )
@@ -499,7 +699,18 @@ class MatchParser ## simple match parser for team match schedules
499
699
  elsif date.is_a?( Date )
500
700
  date_str = date.strftime('%Y-%m-%d')
501
701
  else # assume date is nil
502
- end
702
+ end
703
+ =end
704
+
705
+ time_str = nil
706
+ date_str = nil
707
+
708
+ date_str = date.strftime('%Y-%m-%d') if date
709
+ time_str = time if date && time
710
+
711
+
712
+ status = nil
713
+ ground = nil
503
714
 
504
715
  @matches << Import::Match.new( num: num,
505
716
  date: date_str,
@@ -513,212 +724,10 @@ class MatchParser ## simple match parser for team match schedules
513
724
  ground: ground )
514
725
  ### todo: cache team lookups in hash?
515
726
 
516
- =begin
517
- team1 = Team.find_by_key!( team1_key )
518
- team2 = Team.find_by_key!( team2_key )
519
-
520
- @last_team1 = team1 # store for later use for goals etc.
521
- @last_team2 = team2
522
-
523
-
524
- if @round.nil?
525
- ## no round header found; calculate round from date
526
-
527
- ###
528
- ## todo/fix: add some unit tests for round look up
529
- # fix: use date_v2 if present!! (old/original date; otherwise use date)
530
-
531
- #
532
- # fix: check - what to do with hours e.g. start_at use 00:00 and for end_at use 23.59 ??
533
- # -- for now - remove hours (e.g. use end_of_day and beginnig_of_day)
534
-
535
- ##
536
- # note: start_at and end_at are dates ONLY (note datetime)
537
- # - do NOT pass in hours etc. in query
538
- # again use --> date.end_of_day, date.beginning_of_day
539
- # new: not working: date.to_date, date.to_date
540
- # will not find round if start_at same as date !! (in theory hours do not matter)
541
-
542
- ###
543
- # hack:
544
- # special case for sqlite3 (date compare not working reliable; use casts)
545
- # fix: move to adapter_name to activerecord_utils as sqlite? or similar?
546
-
547
- if ActiveRecord::Base.connection.adapter_name.downcase.starts_with?( 'sqlite' )
548
- logger.debug( " [sqlite] using sqlite-specific query for date compare for rounds finder" )
549
- round = Round.where( 'event_id = ? AND ( julianday(start_at) <= julianday(?)'+
550
- 'AND julianday(end_at) >= julianday(?))',
551
- @event.id, date.to_date, date.to_date).first
552
- else # all other dbs (postgresql, mysql, etc.)
553
- round = Round.where( 'event_id = ? AND (start_at <= ? AND end_at >= ?)',
554
- @event.id, date.to_date, date.to_date).first
555
- end
556
-
557
- pp round
558
- if round.nil?
559
- logger.warn( " !!!! no round match found for date #{date}" )
560
- pp Round.all
561
-
562
- ###################################
563
- # -- try auto-adding matchday
564
- round = Round.new
565
-
566
- round_attribs = {
567
- event_id: @event.id,
568
- name: "Matchday #{date.to_date}",
569
- pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
570
- start_at: date.to_date,
571
- end_at: date.to_date
572
- }
573
-
574
- logger.info( " auto-add round >Matchday #{date.to_date}<" )
575
- logger.debug round_attribs.to_json
576
-
577
- round.update_attributes!( round_attribs )
578
-
579
- @patch_round_ids_pos << round.id # todo/check - add just id or "full" record as now - why? why not?
580
- end
581
-
582
- # store pos for auto-number next round if missing
583
- # - note: only if greater/bigger than last; use max
584
- # - note: last_round_pos might be nil - thus set to 0
585
- if round.pos > 999000
586
- # note: do NOT update last_round_pos for to-be-patched rounds
587
- else
588
- @last_round_pos = [round.pos,@last_round_pos||0].max
589
- end
590
-
591
- ## note: will crash (round.pos) if round is nil
592
- logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
593
- else
594
- ## use round from last round header
595
- round = @round
596
- end
597
-
598
-
599
- ### check if games exists
600
- ## with this teams in this round if yes only update
601
- game = Game.find_by_round_id_and_team1_id_and_team2_id(
602
- round.id, team1.id, team2.id
603
- )
604
-
605
- game_attribs = {
606
- score1i: scores[0],
607
- score2i: scores[1],
608
- score1: scores[2],
609
- score2: scores[3],
610
- score1et: scores[4],
611
- score2et: scores[5],
612
- score1p: scores[6],
613
- score2p: scores[7],
614
- play_at: date,
615
- play_at_v2: date_v2,
616
- postponed: postponed,
617
- knockout: round.knockout, ## note: for now always use knockout flag from round - why? why not??
618
- ground_id: ground.present? ? ground.id : nil,
619
- group_id: @group.present? ? @group.id : nil
620
- }
621
-
622
- game_attribs[ :pos ] = pos if pos.present?
623
-
624
- ####
625
- # note: only update if any changes (or create if new record)
626
- if game.present? &&
627
- game.check_for_changes( game_attribs ) == false
628
- logger.debug " skip update game #{game.id}; no changes found"
629
- else
630
- if game.present?
631
- logger.debug "update game #{game.id}:"
632
- else
633
- logger.debug "create game:"
634
- game = Game.new
635
-
636
- more_game_attribs = {
637
- round_id: round.id,
638
- team1_id: team1.id,
639
- team2_id: team2.id
640
- }
641
-
642
- ## NB: use round.games.count for pos
643
- ## lets us add games out of order if later needed
644
- more_game_attribs[ :pos ] = round.games.count+1 if pos.nil?
645
-
646
- game_attribs = game_attribs.merge( more_game_attribs )
647
- end
648
-
649
- logger.debug game_attribs.to_json
650
- game.update_attributes!( game_attribs )
651
- end
652
-
653
- @last_game = game # store for later reference (e.g. used for goals etc.)
654
- =end
655
-
656
- return true # game match found
657
- end # method parse_game
658
-
659
-
660
-
661
- def try_parse_date_header( line )
662
- # note: clone line; for possible test do NOT modify in place for now
663
- # note: returns true if parsed, false if no match
664
- parse_date_header( line.dup )
665
- end
666
-
667
- def find_date!( line, start: )
668
- ## NB: lets us pass in start_at/end_at date (for event)
669
- # for auto-complete year
670
-
671
- # extract date from line
672
- # and return it
673
- # NB: side effect - removes date from line string
674
- DateFormats.find!( line, start: start )
727
+ ## hacky goals support
728
+ ### reset/toggle 1/2
729
+ @last_goals = 1
675
730
  end
676
-
677
-
678
- def parse_date_header( line )
679
- # note: returns true if parsed, false if no match
680
-
681
- # line with NO teams plus include date e.g.
682
- # [Fri Jun/17] or
683
- # Jun/17 or
684
- # Jun/17: etc.
685
-
686
- @mapper_teams.map_teams!( line )
687
- teams = @mapper_teams.find_teams!( line )
688
- team1 = teams[0]
689
- team2 = teams[1]
690
-
691
- date = find_date!( line, start: @start )
692
-
693
- if date && team1.nil? && team2.nil?
694
- logger.debug( "date header line found: >#{line}<")
695
- logger.debug( " date: #{date} with start: #{@start}")
696
-
697
- @last_date = date # keep a reference for later use
698
-
699
- ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
700
- ## find a better way??
701
- ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
702
- ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
703
- ### todo/check: just turn on for 2019/20 season or always? why? why not?
704
-
705
- ## todo/fix: add switch back to old @start_org
706
- ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
707
- if @start.month != 1
708
- if date.year == @start.year+1
709
- logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
710
- @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
711
- @start = Date.new( @start.year+1, 1, 1 )
712
- end
713
- end
714
-
715
- true
716
- else
717
- false
718
- end
719
- end
720
-
721
-
722
-
723
731
  end # class MatchParser
724
732
  end # module SportDb
733
+