sportdb-formats 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,38 +1,37 @@
1
1
 
2
2
  module SportDb
3
3
 
4
- class MatchParser ## simple match parser for team match schedules
4
+ class MatchParser ## simple match parser for team match schedules
5
5
 
6
- def self.parse( lines, teams, start: )
6
+
7
+ def self.parse( lines, start: )
7
8
  ## todo/fix: add support for txt and lines
8
9
  ## check if lines_or_txt is an array or just a string
9
10
  ## use teams: like start: why? why not?
10
- parser = new( lines, teams, start )
11
+ parser = new( lines, start )
11
12
  parser.parse
12
13
  end
13
14
 
14
15
 
15
16
  include Logging ## e.g. logger#debug, logger#info, etc.
16
- include ParserHelper ## e.g. read_lines, etc.
17
17
 
18
+ def self.debug=(value) @@debug = value; end
19
+ def self.debug?() @@debug ||= false; end ## note: default is FALSE
20
+ def debug?() self.class.debug?; end
18
21
 
19
- def initialize( lines, teams, start )
20
- # for convenience split string into lines
21
- ## note: removes/strips empty lines
22
- ## todo/check: change to text instead of array of lines - why? why not?
23
-
24
- ## note - wrap in enumerator/iterator a.k.a lines reader
25
- @lines = LinesReader.new( lines.is_a?( String ) ?
26
- read_lines( lines ) :
27
- lines
28
- )
29
-
30
- @mapper_teams = TeamMapper.new( teams )
31
- @start = start
32
- end
22
+ def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
23
+ ## returns an array of lines with comments and empty lines striped / removed
24
+ lines = []
25
+ txt.each_line do |line| ## preprocess
26
+ line = line.strip
33
27
 
28
+ next if line.empty? || line.start_with?('#') ### skip empty lines and comments
29
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
30
+ lines << line
31
+ end
32
+ lines
33
+ end
34
34
 
35
-
36
35
 
37
36
  ## note: colon (:) MUST be followed by one (or more) spaces
38
37
  ## make sure mon feb 12 18:10 will not match
@@ -45,7 +44,7 @@ class MatchParser ## simple match parser for team match schedules
45
44
  ## Group B: - remove colon
46
45
  ## or lookup first
47
46
 
48
- ATTRIB_REGEX = /^
47
+ ATTRIB_RE = %r{^
49
48
  [ ]*? # slurp leading spaces
50
49
  (?<key>[^:|\]\[()\/; -]
51
50
  [^:|\]\[()\/;]{0,30}
@@ -55,96 +54,185 @@ class MatchParser ## simple match parser for team match schedules
55
54
  (?<value>.+)
56
55
  [ ]*? # slurp trailing spaces
57
56
  $
58
- /ix
57
+ }ix
58
+
59
+ #
60
+ # todo/fix: change start to start: too!!!
61
+ # might be optional in the future!! - why? why not?
62
+
63
+ def initialize( lines, start )
64
+ # for convenience split string into lines
65
+ ## note: removes/strips empty lines
66
+ ## todo/check: change to text instead of array of lines - why? why not?
67
+
68
+ ## note - wrap in enumerator/iterator a.k.a lines reader
69
+ @lines = lines.is_a?( String ) ?
70
+ _read_lines( lines ) : lines
71
+
72
+ @start = start
73
+ end
59
74
 
60
75
 
61
76
  def parse
62
77
  @last_date = nil
78
+ @last_time = nil
63
79
  @last_round = nil
64
80
  @last_group = nil
65
81
 
82
+ ## last_goals - rename to (longer) @last_team_goals or such - why? why not?
83
+ @last_goals = 1 ## toggle between 1|2 - hacky (quick & dirty) support for multi-line goals, fix soon!
84
+
85
+ @teams = Hash.new(0) ## track counts (only) for now for (interal) team stats - why? why not?
66
86
  @rounds = {}
67
87
  @groups = {}
68
88
  @matches = []
69
89
 
70
90
  @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
71
91
 
72
- ## todo/fix - use @lines.rewind first here - why? why not?
73
- @lines.each do |line|
74
-
75
- if is_round_def?( line )
92
+
93
+
94
+ @parser = Parser.new
95
+
96
+ @errors = []
97
+ @tree = []
98
+
99
+ attrib_found = false
100
+
101
+ @lines.each_with_index do |line,i|
102
+
103
+ if debug?
104
+ puts
105
+ puts "line >#{line}<"
106
+ end
107
+
108
+ ## skip new (experimental attrib syntax)
109
+ if attrib_found == false &&
110
+ ATTRIB_RE.match?( line )
111
+ ## note: check attrib regex AFTER group def e.g.:
112
+ ## Group A:
113
+ ## Group B: etc.
114
+ ## todo/fix - change Group A: to Group A etc.
115
+ ## Group B: to Group B
116
+ attrib_found = true
117
+ ## logger.debug "skipping key/value line - >#{line}<"
118
+ next
119
+ end
120
+
121
+ if attrib_found
122
+ ## check if line ends with dot
123
+ ## if not slurp up lines to the next do!!!
124
+ ## logger.debug "skipping key/value line - >#{line}<"
125
+ attrib_found = false if line.end_with?( '.' )
126
+ # logger.debug "skipping key/value line (cont.) - >#{line}<"
127
+ next
128
+ end
129
+
130
+ t, error_messages = @parser.parse_with_errors( line )
131
+
132
+
133
+ if error_messages.size > 0
134
+ ## add to "global" error list
135
+ ## make a triplet tuple (file / msg / line text)
136
+ error_messages.each do |msg|
137
+ @errors << [ '<file>', ## add filename here
138
+ msg,
139
+ line
140
+ ]
141
+ end
142
+ end
143
+
144
+ pp t if debug?
145
+
146
+ @tree << t
147
+ end # each lines
148
+
149
+ ## pp @tree
150
+
151
+ ## report parse errors here - why? why not?
152
+
153
+
154
+
155
+ @tree.each do |nodes|
156
+
157
+ node_type = nodes[0][0] ## get node type of first/head node
158
+
159
+ if node_type == :round_def
76
160
  ## todo/fix: add round definition (w begin n end date)
77
161
  ## todo: do not patch rounds with definition (already assume begin/end date is good)
78
162
  ## -- how to deal with matches that get rescheduled/postponed?
79
- parse_round_def( line )
80
- elsif is_round?( line )
81
- parse_round_header( line )
82
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
163
+ parse_round_def( nodes )
164
+ elsif node_type == :group_def ## NB: group goes after round (round may contain group marker too)
83
165
  ### todo: add pipe (|) marker (required)
84
- parse_group_def( line )
85
- elsif is_group?( line )
86
- ## -- lets you set group e.g. Group A etc.
87
- parse_group_header( line )
88
-
89
- elsif m=ATTRIB_REGEX.match( line )
90
- ## note: check attrib regex AFTER group def e.g.:
91
- ## Group A:
92
- ## Group B: etc.
93
- ## todo/fix - change Group A: to Group A etc.
94
- ## Group B: to Group B
95
-
96
- ## check if line ends with dot
97
- ## if not slurp up lines to the next do!!!
98
- logger.debug "skipping key/value line - >#{line}<"
99
- while !line.end_with?( '.' ) || line.nil? do
100
- line = @lines.next
101
- logger.debug "skipping key/value line (cont.) - >#{line}<"
102
- end
103
- elsif is_goals?( line )
104
- ## note - goals must be AFTER attributes!!!
105
- logger.debug "matched goals line: >#{line}<"
106
- logger.debug " try parse:"
107
-
108
- goals = GoalsFinder.new.find!( line )
109
- pp goals
110
- ## quick & dirty - auto add goals to last match
111
- match = @matches[-1]
112
- match.goals = goals
113
-
114
- elsif try_parse_game( line )
115
- # do nothing here
116
- elsif try_parse_date_header( line )
117
- # do nothing here
118
- else
119
- logger.warn "skipping line (no match found): >#{line}<"
120
- @warns << line
166
+ parse_group_def( nodes )
167
+
168
+ elsif node_type == :player ||
169
+ node_type == :none # e.g [[:none], [:";"], [:player, "Xhaka"],...]
170
+ ## note - for now goals line MUST start with player!!
171
+ parse_goals( nodes )
172
+ else
173
+ ## try to be liberal/flexible
174
+ ## eat-up nodes as we go
175
+ ## assume match with group / round header
176
+ ## etc. on its own line or not
177
+
178
+ ## preprocess possible before match nodes
179
+
180
+ while !nodes.empty? do
181
+ node_type = nodes[0][0] ## get node type of first/head node
182
+ if node_type == :round
183
+ node = nodes.shift ## eat-up
184
+ parse_round_header( node )
185
+ elsif node_type == :leg
186
+ node = nodes.shift ## eat-up
187
+ ## ignore (round) leg for now - add later leg - 1|2|3 etc!!!
188
+ ## needs to get added to db/schema too!!!!
189
+ ## add @last_leg = nil or 1|2|3 etc.
190
+ elsif node_type == :group
191
+ ## -- lets you set group e.g. Group A etc.
192
+ node = nodes.shift ## eat-up
193
+ parse_group_header( node )
194
+ elsif node_type == :date
195
+ node = nodes.shift ## eat-up
196
+ parse_date_header( node )
197
+ ## add time here too - why? why not?
198
+ ## add skip comma separator here too - why? why not?
199
+ ## "slurp-up" in upstream parser?
200
+ ## e.g. round, group or group, round ?
201
+ else
202
+ break
203
+ end
204
+ end
205
+ next if nodes.empty?
206
+
207
+ ## rename to try_parse_match - why? why not?
208
+ parse_match( nodes )
121
209
  end
122
- end # lines.each
123
210
 
124
- [@matches, @rounds.values, @groups.values]
211
+ end # tree.each
212
+
213
+ ## note - team keys are names and values are "internal" stats!!
214
+ ## and NOT team/club/nat_team structs!!
215
+ [@teams.keys, @matches, @rounds.values, @groups.values]
125
216
  end # method parse
126
217
 
127
218
 
128
219
 
129
- def parse_group_header( line )
130
- logger.debug "parsing group header line: >#{line}<"
220
+ def parse_group_header( node )
221
+ logger.debug "parsing group header: >#{node}<"
131
222
 
132
223
  # note: group header resets (last) round (allows, for example):
133
224
  # e.g.
134
225
  # Group Playoffs/Replays -- round header
135
226
  # team1 team2 -- match
136
- # Group B: -- group header
227
+ # Group B -- group header
137
228
  # team1 team2 - match (will get new auto-matchday! not last round)
138
229
  @last_round = nil
139
230
 
140
- name = find_group_name!( line )
141
-
142
- logger.debug " name: >#{name}<"
143
- logger.debug " line: >#{line}<"
231
+ name = node[1]
144
232
 
145
233
  group = @groups[ name ]
146
234
  if group.nil?
147
- puts "!! ERROR - no group def found for >#{name}<"
235
+ puts "!! PARSE ERROR - no group def found for >#{name}<"
148
236
  exit 1
149
237
  end
150
238
 
@@ -152,63 +240,99 @@ class MatchParser ## simple match parser for team match schedules
152
240
  @last_group = group
153
241
  end
154
242
 
155
- def parse_group_def( line )
156
- logger.debug "parsing group def line: >#{line}<"
157
243
 
158
- @mapper_teams.map_teams!( line )
159
- teams = @mapper_teams.find_teams!( line )
244
+ def parse_group_def( nodes )
245
+ logger.debug "parsing group def: >#{nodes}<"
246
+
247
+ ## e.g
248
+ ## [:group_def, "Group A"],
249
+ ## [:team, "Germany"],
250
+ ## [:team, "Scotland"],
251
+ ## [:team, "Hungary"],
252
+ ## [:team, "Switzerland"]
160
253
 
161
- name = find_group_name!( line )
254
+ node = nodes[0]
255
+ name = node[1] ## group name
162
256
 
163
- logger.debug " line: >#{line}<"
257
+ teams = nodes[1..-1].map do |node|
258
+ if node[0] == :team
259
+ team = node[1]
260
+ @teams[ team ] += 1
261
+ team
262
+ else
263
+ puts "!! PARSE ERROR - only teams expected in group def; got:"
264
+ pp nodes
265
+ exit 1
266
+ end
267
+ end
164
268
 
165
269
  ## todo/check/fix: add back group key - why? why not?
166
270
  group = Import::Group.new( name: name,
167
- teams: teams.map {|team| team.name } )
271
+ teams: teams )
168
272
 
169
273
  @groups[ name ] = group
170
274
  end
171
275
 
172
276
 
173
- def find_group_name!( line )
174
- ## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
175
- ## nb: (?:) = is for non-capturing group(ing)
176
-
177
- ## fix:
178
- ## get Group|Gruppe|Grupo from lang!!!! do NOT hardcode in place
179
-
180
- ## todo:
181
- ## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
182
- regex = /\b
183
- (?:
184
- (Group | Gruppe | Grupo)
185
- [ ]+
186
- (\d+ | [A-Z]+)
187
- )
188
- \b/x
277
+ def _build_date( m:, d:, y:, start: )
189
278
 
190
- m = regex.match( line )
191
279
 
192
- return nil if m.nil?
280
+ ## quick debug hack
281
+ if m == 2 && d == 29
282
+ puts "quick check feb/29 dates"
283
+ pp [d,m,y]
284
+ pp start
285
+ end
193
286
 
194
- name = m[0]
287
+ if y.nil? ## try to calculate year
288
+ y = if m > start.month ||
289
+ (m == start.month && d >= start.day)
290
+ # assume same year as start_at event (e.g. 2013 for 2013/14 season)
291
+ start.year
292
+ else
293
+ # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
294
+ start.year+1
295
+ end
296
+ end
195
297
 
196
- logger.debug " name: >#{name}<"
197
298
 
198
- line.sub!( name, '[GROUP.NAME]' )
199
299
 
200
- name
300
+ Date.new( y,m,d ) ## y,m,d
201
301
  end
202
302
 
303
+ def parse_round_def( nodes )
304
+ logger.debug "parsing round def: >#{nodes}<"
203
305
 
204
- def parse_round_def( line )
205
- logger.debug "parsing round def line: >#{line}<"
206
-
207
- start_date = find_date!( line, start: @start )
208
- end_date = find_date!( line, start: @start )
306
+ ## e.g. [[:round_def, "Matchday 1"], [:duration, "Fri Jun/14 - Tue Jun/18"]]
307
+ ## [[:round_def, "Matchday 2"], [:duration, "Wed Jun/19 - Sat Jun/22"]]
308
+ ## [[:round_def, "Matchday 3"], [:duration, "Sun Jun/23 - Wed Jun/26"]]
209
309
 
210
- # note: if end_date missing -- assume start_date is (==) end_at
211
- end_date = start_date if end_date.nil?
310
+ node = nodes[0]
311
+ name = node[1]
312
+ # NB: use extracted round name for knockout check
313
+ # knockout_flag = is_knockout_round?( name )
314
+
315
+ node = nodes[1]
316
+ node_type = node[0]
317
+ if node_type == :date
318
+ start_date = end_date = _build_date( m: node[2][:m],
319
+ d: node[2][:d],
320
+ y: node[2][:y],
321
+ start: @start)
322
+ elsif node_type == :duration
323
+ start_date = _build_date( m: node[2][:start][:m],
324
+ d: node[2][:start][:d],
325
+ y: node[2][:start][:y],
326
+ start: @start)
327
+ end_date = _build_date( m: node[2][:end][:m],
328
+ d: node[2][:end][:d],
329
+ y: node[2][:end][:y],
330
+ start: @start)
331
+ else
332
+ puts "!! PARSE ERROR - expected date or duration for round def; got:"
333
+ pp nodes
334
+ exit 1
335
+ end
212
336
 
213
337
  # note: - NOT needed; start_at and end_at are saved as date only (NOT datetime)
214
338
  # set hours,minutes,secs to beginning and end of day (do NOT use default 12.00)
@@ -218,13 +342,15 @@ class MatchParser ## simple match parser for team match schedules
218
342
 
219
343
  # note: make sure start_at/end_at is date only (e.g. use start_at.to_date)
220
344
  # sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
221
- start_date = start_date.to_date
222
- end_date = end_date.to_date
223
345
 
346
+ # note - _build_date always returns Date for now - no longer needed!!
347
+ # start_date = start_date.to_date
348
+ # end_date = end_date.to_date
224
349
 
225
- name = find_round_def_name!( line )
226
- # NB: use extracted round name for knockout check
227
- knockout_flag = is_knockout_round?( name )
350
+
351
+ ## fix:
352
+ ## remove knockout_flag - why? why not?
353
+ knockout_flag = false
228
354
 
229
355
 
230
356
  logger.debug " start_date: #{start_date}"
@@ -232,8 +358,6 @@ class MatchParser ## simple match parser for team match schedules
232
358
  logger.debug " name: >#{name}<"
233
359
  logger.debug " knockout_flag: #{knockout_flag}"
234
360
 
235
- logger.debug " line: >#{line}<"
236
-
237
361
  round = Import::Round.new( name: name,
238
362
  start_date: start_date,
239
363
  end_date: end_date,
@@ -244,178 +368,278 @@ class MatchParser ## simple match parser for team match schedules
244
368
  end
245
369
 
246
370
 
371
+ def parse_round_header( node )
372
+ logger.debug "parsing round header: >#{node}<"
247
373
 
248
- def find_round_def_name!( line )
249
- # assume everything before pipe (\) is the round name
250
- # strip [ROUND.POS], todo:?? [ROUND.NAME2]
251
-
252
- # todo/fix: add name2 w/ // or / why? why not?
253
- # -- strip / or / chars
374
+ name = node[1]
254
375
 
255
- buf = line.dup
256
- logger.debug " find_round_def_name! line-before: >>#{buf}<<"
376
+ # name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
377
+ # name = name.strip
257
378
 
258
- ## cut-off everything after (including) pipe (|)
259
- buf = buf[ 0...buf.index('|') ]
260
- buf.strip!
261
-
262
- logger.debug " find_round_def_name! line-after: >>#{buf}<<"
379
+ round = @rounds[ name ]
380
+ if round.nil? ## auto-add / create if missing
381
+ ## todo/check: add num (was pos) if present - why? why not?
382
+ round = Import::Round.new( name: name )
383
+ @rounds[ name ] = round
384
+ end
263
385
 
264
- logger.debug " name: >>#{buf}<<"
265
- line.sub!( buf, '[ROUND.NAME]' )
386
+ ## todo/check: if pos match (MUST always match for now)
387
+ @last_round = round
388
+ @last_group = nil # note: reset group to no group - why? why not?
266
389
 
267
- buf
390
+ ## todo/fix/check
391
+ ## make round a scope for date(time) - why? why not?
392
+ ## reset date/time e.g. @last_date = nil !!!!
268
393
  end
269
394
 
395
+ def parse_date_header( node )
396
+ logger.debug( "date header: >#{node}<")
270
397
 
271
- ## split by or || or |||
272
- ## or ++ or +++
273
- ## or -- or ---
274
- ## or // or ///
275
- ## note: allow Final | First Leg as ONE name same as
276
- ## Final - First Leg or
277
- ## Final, First Leg
278
- ## for cut-off always MUST be more than two chars
279
- ##
280
- ## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
281
- ## todo/fix: move to parser utils and add a method split_name or such?
282
- HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
283
- (?:\|{2,} |
284
- \+{2,} |
285
- -{2,} |
286
- \/{2,}
287
- )
288
- [ ]* ## allow (strip) trailing spaces
289
- /x
398
+ date = _build_date( m: node[2][:m],
399
+ d: node[2][:d],
400
+ y: node[2][:y],
401
+ start: @start )
290
402
 
291
- def find_round_header_name!( line )
292
- # assume everything left is the round name
293
- # extract all other items first (round name2, round pos, group name n pos, etc.)
294
-
295
- buf = line.dup
296
- logger.debug " find_round_header_name! line-before: >>#{buf}<<"
403
+ logger.debug( " date: #{date} with start: #{@start}")
297
404
 
405
+ @last_date = date # keep a reference for later use
406
+ @last_time = nil
298
407
 
299
- parts = buf.split( HEADER_SEP_RE )
300
- buf = parts[0]
408
+ ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
409
+ ## find a better way??
410
+ ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
411
+ ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
412
+ ### todo/check: just turn on for 2019/20 season or always? why? why not?
301
413
 
302
- buf.strip! # remove leading and trailing whitespace
414
+ ## todo/fix: add switch back to old @start_org
415
+ ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
416
+ =begin
417
+ if @start.month != 1
418
+ if date.year == @start.year+1
419
+ logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
420
+ @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
421
+ @start = Date.new( @start.year+1, 1, 1 )
422
+ end
423
+ end
424
+ =end
425
+ end
303
426
 
304
- logger.debug " find_round_name! line-after: >>#{buf}<<"
427
+ def parse_minutes( nodes )
428
+ ## parse goals by player
429
+ ## may have multiple minutes!!
430
+ goals = []
305
431
 
306
- ### bingo - assume what's left is the round name
432
+ node = nodes.shift ## get player
433
+ name = node[1]
307
434
 
308
- logger.debug " name: >>#{buf}<<"
309
- line.sub!( buf, '[ROUND.NAME]' )
435
+ loop do
436
+ goal = {}
437
+ goal[:name] = name
310
438
 
311
- buf
312
- end
439
+ node_type = nodes[0][0]
440
+ if node_type != :minute
441
+ puts "!! PARSE ERROR - minute expected to follow player (in goal); got #{node_type}:"
442
+ pp nodes
443
+ exit 1
444
+ end
313
445
 
314
- ## quick hack- collect all "fillwords" by language!!!!
315
- ## change later and add to sportdb-langs!!!!
316
- ##
317
- ## strip all "fillwords" e.g.:
318
- ## Nachtrag/Postponed/Addition/Supplemento names
319
- ##
320
- ## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
321
- ROUND_EXTRA_WORDS_RE = /\b(?:
322
- Nachtrag | ## de
323
- Postponed | ## en
324
- Addition | ## en
325
- Supplemento ## es
326
- )
327
- \b/ix
446
+ node = nodes.shift
447
+ goal[:minute] = node[2][:m]
448
+ goal[:offset] = node[2][:offset] if node[2][:offset]
449
+
450
+ ## check for own goal or penalty or such
451
+ if !nodes.empty?
452
+ node_type = nodes[0][0]
453
+ if node_type == :og
454
+ nodes.shift
455
+ goal[:og] = true
456
+ elsif node_type == :pen
457
+ nodes.shift
458
+ goal[:pen] = true
459
+ else
460
+ # do nothing
461
+ end
462
+ end
328
463
 
329
- def parse_round_header( line )
330
- logger.debug "parsing round header line: >#{line}<"
464
+ goals << goal
331
465
 
332
- name = find_round_header_name!( line )
466
+ ## check if another minute ahead; otherwise break
467
+ break if nodes.empty?
333
468
 
334
- logger.debug " line: >#{line}<"
469
+ node_type = nodes[0][0]
335
470
 
336
- name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
337
- name = name.strip
471
+ ## Kane 39', 62', 67'
472
+ ## consume/eat-up (optional?) commas
473
+ if node_type == :','
474
+ nodes.shift
475
+ node_type = nodes[0][0]
476
+ end
338
477
 
339
- round = @rounds[ name ]
340
- if round.nil? ## auto-add / create if missing
341
- ## todo/check: add num (was pos) if present - why? why not?
342
- round = Import::Round.new( name: name )
343
- @rounds[ name ] = round
478
+ break if node_type != :minute
344
479
  end
345
480
 
346
- ## todo/check: if pos match (MUST always match for now)
347
- @last_round = round
348
- @last_group = nil # note: reset group to no group - why? why not?
349
- end
350
-
351
-
352
- def find_score!( line )
353
- # note: always call after find_dates !!!
354
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
355
- # -- note: score might have two digits too
356
481
 
357
- ScoreFormats.find!( line )
482
+ goals
358
483
  end
359
484
 
360
- def find_status!( line )
361
- StatusParser.find!( line )
362
- end
363
485
 
486
+ def parse_goals( nodes )
487
+ logger.debug "parse goals: >#{nodes}<"
488
+
489
+ goals1 = []
490
+ goals2 = []
491
+
492
+ while !nodes.empty?
493
+ node_type = nodes[0][0]
494
+ if node_type == :player
495
+ more_goals = parse_minutes( nodes )
496
+ ## hacky multi-line support for goals
497
+ ## using last_goal (1|2)
498
+ @last_goals == 2 ? goals2 += more_goals :
499
+ goals1 += more_goals
500
+ elsif node_type == :';' ## team separator
501
+ nodes.shift # eat-up
502
+ @last_goals = 2
503
+ elsif node_type == :none
504
+ nodes.shift # eat-up
505
+ else
506
+ puts "!! PARSE ERROR - unexpected node type in goals;; got #{node_type}:"
507
+ pp nodes
508
+ exit 1
509
+ end
510
+ end
364
511
 
365
- def try_parse_game( line )
366
- # note: clone line; for possible test do NOT modify in place for now
367
- # note: returns true if parsed, false if no match
368
- parse_game( line.dup )
369
- end
512
+ pp [goals1,goals2]
370
513
 
514
+ ## wrap in struct andd add/append to match
515
+ =begin
516
+ class GoalStruct
517
+ ######
518
+ # flat struct for goals - one entry per goals
519
+ attr_accessor :name
520
+ attr_accessor :team # 1 or 2 ? check/todo: add team1 or team2 flag?
521
+ attr_accessor :minute, :offset
522
+ attr_accessor :penalty, :owngoal
523
+ attr_accessor :score1, :score2 # gets calculated
524
+ =end
371
525
 
372
- def parse_game( line )
373
- logger.debug "parsing game (fixture) line: >#{line}<"
374
-
375
- ## split by geo (@)
376
- ## split into parts e.g. break using @ !!!
377
- values = line.split( '@' )
378
-
379
- ## for now pass along ground, city (timezone) as string as is
380
- ## parse (map) later - why? why not??
381
- ### check for ground/stadium and cities
382
- ground = if values.size == 1
383
- nil ## no stadium
384
- elsif values.size == 2 # bingo!!!
385
- ## process stadium, city (timezone) etc.
386
- ## for now keep it simple - pass along "unparsed" all-in-one
387
- values[1].gsub( /[ \t]+/, ' ').strip ## squish
388
- else
389
- puts "!! ERROR - too many @-markers found in line:"
390
- puts line
391
- exit 1
392
- end
526
+ goals = []
527
+ goals1.each do |rec|
528
+ goal = Import::Goal.new(
529
+ player: rec[:name],
530
+ team: 1,
531
+ minute: rec[:minute],
532
+ offset: rec[:offset],
533
+ penalty: rec[:pen] || false, # note: pass along/use false NOT nil
534
+ owngoal: rec[:og] || false
535
+ )
536
+ goals << goal
537
+ end
538
+ goals2.each do |rec|
539
+ goal = Import::Goal.new(
540
+ player: rec[:name],
541
+ team: 2,
542
+ minute: rec[:minute],
543
+ offset: rec[:offset],
544
+ penalty: rec[:pen] || false, # note: pass along/use false NOT nil
545
+ owngoal: rec[:og] || false
546
+ )
547
+ goals << goal
548
+ end
393
549
 
550
+ pp goals
394
551
 
395
- line = values[0]
552
+ ## quick & dirty - auto add goals to last match
553
+ ## note - for hacky (quick& dirty) multi-line support
554
+ ## always append for now
555
+ match = @matches[-1]
556
+ match.goals ||= []
557
+ match.goals += goals
396
558
 
397
- @mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
398
- teams = @mapper_teams.find_teams!( line )
399
- team1 = teams[0]
400
- team2 = teams[1]
559
+ ## todo/fix
560
+ ## sort by minute
561
+ ## PLUS auto-fill score1,score2 - why? why not?
562
+ end
401
563
 
402
- ## note: if we do NOT find two teams; return false - no match found
403
- if team1.nil? || team2.nil?
404
- logger.debug " no game match (two teams required) found for line: >#{line}<"
405
- return false
406
- end
407
564
 
408
- ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
409
- ## or [cancelled] or [postponed] etc.
410
- status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
565
+ def parse_match( nodes )
566
+ logger.debug( "parse match: >#{nodes}<" )
567
+
568
+ ## collect (possible) nodes by type
569
+ num = nil
570
+ date = nil
571
+ time = nil
572
+ teams = []
573
+ score = nil
574
+ more = []
575
+
576
+ while !nodes.empty?
577
+ node = nodes.shift
578
+ node_type = node[0]
579
+
580
+ if node_type == :num
581
+ num = node[1]
582
+ elsif node_type == :date
583
+ ## note: date wipes out/clear time
584
+ ## time MUST always come after date
585
+ time = nil
586
+ date = _build_date( m: node[2][:m],
587
+ d: node[2][:d],
588
+ y: node[2][:y],
589
+ start: @start )
590
+ elsif node_type == :time
591
+ ## note - there's no time (-only) type in ruby
592
+ ## use string (e.g. '14:56', '1:44')
593
+ ## use 01:44 or 1:44 ?
594
+ ## check for 0:00 or 24:00 possible?
595
+ time = '%d:%02d' % [node[2][:h], node[2][:m]]
596
+ elsif node_type == :team
597
+ teams << node[1]
598
+ elsif node_type == :score
599
+ ### todo/fix
600
+ ## add keywords (e.g. ht, ft or such) to Score.new - why? why not?
601
+ ## or use new Score.build( ht:, ft:, ) or such - why? why not?
602
+ ht = node[2][:ht] || [nil,nil]
603
+ ft = node[2][:ft] || [nil,nil]
604
+ et = node[2][:et] || [nil,nil]
605
+ p = node[2][:p] || [nil,nil]
606
+ values = [*ht, *ft, *et, *p]
607
+ ## pp values
608
+
609
+ score = Score.new( *values )
610
+ ## pp score
611
+ elsif node_type == :vs
612
+ ## skip; do nothing
613
+ ##
614
+ ## todo - add ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
615
+ ## or [cancelled] or [postponed] etc.
616
+ ## status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
617
+
618
+ elsif node_type == :'@' ||
619
+ node_type == :',' ||
620
+ node_type == :geo
621
+ ## e.g.
622
+ ## [:"@"], [:geo, "Stade de France"], [:","], [:geo, "Saint-Denis"]]
623
+ more << node[1] if node_type == :geo
624
+ else
625
+ puts "!! PARSE ERROR - unexpected node type #{node_type} in match line; got:"
626
+ pp node
627
+ exit 1
628
+ end
629
+ end
411
630
 
412
- ## pos = find_game_pos!( line )
413
631
 
414
- date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
632
+ if teams.size != 2
633
+ puts "!! PARSE ERROR - expected two teams; got #{teams.size}:"
634
+ pp teams
635
+ exit 1
636
+ end
415
637
 
416
- ## todo/fix:
417
- ## add support for find_time! e.g. 21.00 (or 21:00 ?)
638
+ team1 = teams[0]
639
+ team2 = teams[1]
418
640
 
641
+ @teams[ team1 ] += 1
642
+ @teams[ team2 ] += 1
419
643
 
420
644
 
421
645
  ###
@@ -424,14 +648,17 @@ class MatchParser ## simple match parser for team match schedules
424
648
  if date
425
649
  ### check: use date_v2 if present? why? why not?
426
650
  @last_date = date # keep a reference for later use
651
+ @last_time = nil
652
+ # @last_time = nil
427
653
  else
428
654
  date = @last_date # no date found; (re)use last seen date
429
655
  end
430
656
 
431
-
432
- score = find_score!( line )
433
-
434
- logger.debug " line: >#{line}<"
657
+ if time
658
+ @last_time = time
659
+ else
660
+ time = @last_time
661
+ end
435
662
 
436
663
 
437
664
  round = nil
@@ -451,20 +678,19 @@ class MatchParser ## simple match parser for team match schedules
451
678
  end
452
679
  end
453
680
  if round.nil?
454
- puts "!! ERROR - no matching round found for match date:"
681
+ puts "!! PARSE ERROR - no matching round found for match date:"
455
682
  pp date
456
683
  exit 1
457
684
  end
458
685
  end
459
686
  end
460
687
 
461
-
462
688
  ## todo/check: scores are integers or strings?
463
689
 
464
690
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
465
691
 
466
-
467
692
  ## split date in date & time if DateTime
693
+ =begin
468
694
  time_str = nil
469
695
  date_str = nil
470
696
  if date.is_a?( DateTime )
@@ -473,9 +699,21 @@ class MatchParser ## simple match parser for team match schedules
473
699
  elsif date.is_a?( Date )
474
700
  date_str = date.strftime('%Y-%m-%d')
475
701
  else # assume date is nil
476
- end
702
+ end
703
+ =end
704
+
705
+ time_str = nil
706
+ date_str = nil
707
+
708
+ date_str = date.strftime('%Y-%m-%d') if date
709
+ time_str = time if date && time
710
+
711
+
712
+ status = nil
713
+ ground = nil
477
714
 
478
- @matches << Import::Match.new( date: date_str,
715
+ @matches << Import::Match.new( num: num,
716
+ date: date_str,
479
717
  time: time_str,
480
718
  team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
481
719
  team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
@@ -486,212 +724,10 @@ class MatchParser ## simple match parser for team match schedules
486
724
  ground: ground )
487
725
  ### todo: cache team lookups in hash?
488
726
 
489
- =begin
490
- team1 = Team.find_by_key!( team1_key )
491
- team2 = Team.find_by_key!( team2_key )
492
-
493
- @last_team1 = team1 # store for later use for goals etc.
494
- @last_team2 = team2
495
-
496
-
497
- if @round.nil?
498
- ## no round header found; calculate round from date
499
-
500
- ###
501
- ## todo/fix: add some unit tests for round look up
502
- # fix: use date_v2 if present!! (old/original date; otherwise use date)
503
-
504
- #
505
- # fix: check - what to do with hours e.g. start_at use 00:00 and for end_at use 23.59 ??
506
- # -- for now - remove hours (e.g. use end_of_day and beginnig_of_day)
507
-
508
- ##
509
- # note: start_at and end_at are dates ONLY (note datetime)
510
- # - do NOT pass in hours etc. in query
511
- # again use --> date.end_of_day, date.beginning_of_day
512
- # new: not working: date.to_date, date.to_date
513
- # will not find round if start_at same as date !! (in theory hours do not matter)
514
-
515
- ###
516
- # hack:
517
- # special case for sqlite3 (date compare not working reliable; use casts)
518
- # fix: move to adapter_name to activerecord_utils as sqlite? or similar?
519
-
520
- if ActiveRecord::Base.connection.adapter_name.downcase.starts_with?( 'sqlite' )
521
- logger.debug( " [sqlite] using sqlite-specific query for date compare for rounds finder" )
522
- round = Round.where( 'event_id = ? AND ( julianday(start_at) <= julianday(?)'+
523
- 'AND julianday(end_at) >= julianday(?))',
524
- @event.id, date.to_date, date.to_date).first
525
- else # all other dbs (postgresql, mysql, etc.)
526
- round = Round.where( 'event_id = ? AND (start_at <= ? AND end_at >= ?)',
527
- @event.id, date.to_date, date.to_date).first
528
- end
529
-
530
- pp round
531
- if round.nil?
532
- logger.warn( " !!!! no round match found for date #{date}" )
533
- pp Round.all
534
-
535
- ###################################
536
- # -- try auto-adding matchday
537
- round = Round.new
538
-
539
- round_attribs = {
540
- event_id: @event.id,
541
- name: "Matchday #{date.to_date}",
542
- pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
543
- start_at: date.to_date,
544
- end_at: date.to_date
545
- }
546
-
547
- logger.info( " auto-add round >Matchday #{date.to_date}<" )
548
- logger.debug round_attribs.to_json
549
-
550
- round.update_attributes!( round_attribs )
551
-
552
- @patch_round_ids_pos << round.id # todo/check - add just id or "full" record as now - why? why not?
553
- end
554
-
555
- # store pos for auto-number next round if missing
556
- # - note: only if greater/bigger than last; use max
557
- # - note: last_round_pos might be nil - thus set to 0
558
- if round.pos > 999000
559
- # note: do NOT update last_round_pos for to-be-patched rounds
560
- else
561
- @last_round_pos = [round.pos,@last_round_pos||0].max
562
- end
563
-
564
- ## note: will crash (round.pos) if round is nil
565
- logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
566
- else
567
- ## use round from last round header
568
- round = @round
569
- end
570
-
571
-
572
- ### check if games exists
573
- ## with this teams in this round if yes only update
574
- game = Game.find_by_round_id_and_team1_id_and_team2_id(
575
- round.id, team1.id, team2.id
576
- )
577
-
578
- game_attribs = {
579
- score1i: scores[0],
580
- score2i: scores[1],
581
- score1: scores[2],
582
- score2: scores[3],
583
- score1et: scores[4],
584
- score2et: scores[5],
585
- score1p: scores[6],
586
- score2p: scores[7],
587
- play_at: date,
588
- play_at_v2: date_v2,
589
- postponed: postponed,
590
- knockout: round.knockout, ## note: for now always use knockout flag from round - why? why not??
591
- ground_id: ground.present? ? ground.id : nil,
592
- group_id: @group.present? ? @group.id : nil
593
- }
594
-
595
- game_attribs[ :pos ] = pos if pos.present?
596
-
597
- ####
598
- # note: only update if any changes (or create if new record)
599
- if game.present? &&
600
- game.check_for_changes( game_attribs ) == false
601
- logger.debug " skip update game #{game.id}; no changes found"
602
- else
603
- if game.present?
604
- logger.debug "update game #{game.id}:"
605
- else
606
- logger.debug "create game:"
607
- game = Game.new
608
-
609
- more_game_attribs = {
610
- round_id: round.id,
611
- team1_id: team1.id,
612
- team2_id: team2.id
613
- }
614
-
615
- ## NB: use round.games.count for pos
616
- ## lets us add games out of order if later needed
617
- more_game_attribs[ :pos ] = round.games.count+1 if pos.nil?
618
-
619
- game_attribs = game_attribs.merge( more_game_attribs )
620
- end
621
-
622
- logger.debug game_attribs.to_json
623
- game.update_attributes!( game_attribs )
624
- end
625
-
626
- @last_game = game # store for later reference (e.g. used for goals etc.)
627
- =end
628
-
629
- return true # game match found
630
- end # method parse_game
631
-
632
-
633
-
634
- def try_parse_date_header( line )
635
- # note: clone line; for possible test do NOT modify in place for now
636
- # note: returns true if parsed, false if no match
637
- parse_date_header( line.dup )
638
- end
639
-
640
- def find_date!( line, start: )
641
- ## NB: lets us pass in start_at/end_at date (for event)
642
- # for auto-complete year
643
-
644
- # extract date from line
645
- # and return it
646
- # NB: side effect - removes date from line string
647
- DateFormats.find!( line, start: start )
727
+ ## hacky goals support
728
+ ### reset/toggle 1/2
729
+ @last_goals = 1
648
730
  end
649
-
650
-
651
- def parse_date_header( line )
652
- # note: returns true if parsed, false if no match
653
-
654
- # line with NO teams plus include date e.g.
655
- # [Fri Jun/17] or
656
- # Jun/17 or
657
- # Jun/17: etc.
658
-
659
- @mapper_teams.map_teams!( line )
660
- teams = @mapper_teams.find_teams!( line )
661
- team1 = teams[0]
662
- team2 = teams[1]
663
-
664
- date = find_date!( line, start: @start )
665
-
666
- if date && team1.nil? && team2.nil?
667
- logger.debug( "date header line found: >#{line}<")
668
- logger.debug( " date: #{date} with start: #{@start}")
669
-
670
- @last_date = date # keep a reference for later use
671
-
672
- ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
673
- ## find a better way??
674
- ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
675
- ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
676
- ### todo/check: just turn on for 2019/20 season or always? why? why not?
677
-
678
- ## todo/fix: add switch back to old @start_org
679
- ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
680
- if @start.month != 1
681
- if date.year == @start.year+1
682
- logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
683
- @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
684
- @start = Date.new( @start.year+1, 1, 1 )
685
- end
686
- end
687
-
688
- true
689
- else
690
- false
691
- end
692
- end
693
-
694
-
695
-
696
731
  end # class MatchParser
697
732
  end # module SportDb
733
+