sportdb-formats 2.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,735 +0,0 @@
1
-
2
- module SportDb
3
-
4
- class MatchParser ## simple match parser for team match schedules
5
-
6
-
7
- def self.parse( lines, start: )
8
- ## todo/fix: add support for txt and lines
9
- ## check if lines_or_txt is an array or just a string
10
- ## use teams: like start: why? why not?
11
- parser = new( lines, start )
12
- parser.parse
13
- end
14
-
15
-
16
- include Logging ## e.g. logger#debug, logger#info, etc.
17
-
18
- def self.debug=(value) @@debug = value; end
19
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
20
- def debug?() self.class.debug?; end
21
-
22
- def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
23
- ## returns an array of lines with comments and empty lines striped / removed
24
- lines = []
25
- txt.each_line do |line| ## preprocess
26
- line = line.strip
27
-
28
- next if line.empty? || line.start_with?('#') ### skip empty lines and comments
29
- line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
30
- lines << line
31
- end
32
- lines
33
- end
34
-
35
-
36
- ## note: colon (:) MUST be followed by one (or more) spaces
37
- ## make sure mon feb 12 18:10 will not match
38
- ## allow 1. FC Köln etc.
39
- ## Mainz 05:
40
- ## limit to 30 chars max
41
- ## only allow chars incl. intl buut (NOT ()[]/;)
42
- ##
43
- ## Group A:
44
- ## Group B: - remove colon
45
- ## or lookup first
46
-
47
- ATTRIB_RE = %r{^
48
- [ ]*? # slurp leading spaces
49
- (?<key>[^:|\]\[()\/; -]
50
- [^:|\]\[()\/;]{0,30}
51
- )
52
- [ ]*? # slurp trailing spaces
53
- :[ ]+
54
- (?<value>.+)
55
- [ ]*? # slurp trailing spaces
56
- $
57
- }ix
58
-
59
- #
60
- # todo/fix: change start to start: too!!!
61
- # might be optional in the future!! - why? why not?
62
-
63
- def initialize( lines, start )
64
- # for convenience split string into lines
65
- ## note: removes/strips empty lines
66
- ## todo/check: change to text instead of array of lines - why? why not?
67
-
68
- ## note - wrap in enumerator/iterator a.k.a lines reader
69
- @lines = lines.is_a?( String ) ?
70
- _read_lines( lines ) : lines
71
-
72
- @start = start
73
- end
74
-
75
-
76
- def parse
77
- @last_date = nil
78
- @last_time = nil
79
- @last_round = nil
80
- @last_group = nil
81
-
82
- ## last_goals - rename to (longer) @last_team_goals or such - why? why not?
83
- @last_goals = 1 ## toggle between 1|2 - hacky (quick & dirty) support for multi-line goals, fix soon!
84
-
85
- @teams = Hash.new(0) ## track counts (only) for now for (interal) team stats - why? why not?
86
- @rounds = {}
87
- @groups = {}
88
- @matches = []
89
-
90
- @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
91
-
92
-
93
-
94
- @parser = Parser.new
95
-
96
- @errors = []
97
- @tree = []
98
-
99
- attrib_found = false
100
-
101
- @lines.each_with_index do |line,i|
102
-
103
- if debug?
104
- puts
105
- puts "line >#{line}<"
106
- end
107
-
108
- ## skip new (experimental attrib syntax)
109
- if attrib_found == false &&
110
- ATTRIB_RE.match?( line )
111
- ## note: check attrib regex AFTER group def e.g.:
112
- ## Group A:
113
- ## Group B: etc.
114
- ## todo/fix - change Group A: to Group A etc.
115
- ## Group B: to Group B
116
- attrib_found = true
117
- ## logger.debug "skipping key/value line - >#{line}<"
118
- next
119
- end
120
-
121
- if attrib_found
122
- ## check if line ends with dot
123
- ## if not slurp up lines to the next do!!!
124
- ## logger.debug "skipping key/value line - >#{line}<"
125
- attrib_found = false if line.end_with?( '.' )
126
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
127
- next
128
- end
129
-
130
- t, error_messages = @parser.parse_with_errors( line )
131
-
132
-
133
- if error_messages.size > 0
134
- ## add to "global" error list
135
- ## make a triplet tuple (file / msg / line text)
136
- error_messages.each do |msg|
137
- @errors << [ '<file>', ## add filename here
138
- msg,
139
- line
140
- ]
141
- end
142
- end
143
-
144
- pp t if debug?
145
-
146
- @tree << t
147
- end # each lines
148
-
149
- ## pp @tree
150
-
151
- ## report parse errors here - why? why not?
152
-
153
-
154
-
155
- @tree.each do |nodes|
156
-
157
- node_type = nodes[0][0] ## get node type of first/head node
158
-
159
- if node_type == :round_def
160
- ## todo/fix: add round definition (w begin n end date)
161
- ## todo: do not patch rounds with definition (already assume begin/end date is good)
162
- ## -- how to deal with matches that get rescheduled/postponed?
163
- parse_round_def( nodes )
164
- elsif node_type == :group_def ## NB: group goes after round (round may contain group marker too)
165
- ### todo: add pipe (|) marker (required)
166
- parse_group_def( nodes )
167
-
168
- elsif node_type == :player ||
169
- node_type == :none # e.g [[:none], [:";"], [:player, "Xhaka"],...]
170
- ## note - for now goals line MUST start with player!!
171
- parse_goals( nodes )
172
- else
173
- ## try to be liberal/flexible
174
- ## eat-up nodes as we go
175
- ## assume match with group / round header
176
- ## etc. on its own line or not
177
-
178
- ## preprocess possible before match nodes
179
-
180
- while !nodes.empty? do
181
- node_type = nodes[0][0] ## get node type of first/head node
182
- if node_type == :round
183
- node = nodes.shift ## eat-up
184
- parse_round_header( node )
185
- elsif node_type == :leg
186
- node = nodes.shift ## eat-up
187
- ## ignore (round) leg for now - add later leg - 1|2|3 etc!!!
188
- ## needs to get added to db/schema too!!!!
189
- ## add @last_leg = nil or 1|2|3 etc.
190
- elsif node_type == :group
191
- ## -- lets you set group e.g. Group A etc.
192
- node = nodes.shift ## eat-up
193
- parse_group_header( node )
194
- elsif node_type == :date
195
- node = nodes.shift ## eat-up
196
- parse_date_header( node )
197
- ## add time here too - why? why not?
198
- ## add skip comma separator here too - why? why not?
199
- ## "slurp-up" in upstream parser?
200
- ## e.g. round, group or group, round ?
201
- else
202
- break
203
- end
204
- end
205
- next if nodes.empty?
206
-
207
- ## rename to try_parse_match - why? why not?
208
- parse_match( nodes )
209
- end
210
-
211
- end # tree.each
212
-
213
- ## note - team keys are names and values are "internal" stats!!
214
- ## and NOT team/club/nat_team structs!!
215
- [@teams.keys, @matches, @rounds.values, @groups.values]
216
- end # method parse
217
-
218
-
219
-
220
- def parse_group_header( node )
221
- logger.debug "parsing group header: >#{node}<"
222
-
223
- # note: group header resets (last) round (allows, for example):
224
- # e.g.
225
- # Group Playoffs/Replays -- round header
226
- # team1 team2 -- match
227
- # Group B -- group header
228
- # team1 team2 - match (will get new auto-matchday! not last round)
229
- @last_round = nil
230
-
231
- name = node[1]
232
-
233
- group = @groups[ name ]
234
- if group.nil?
235
- puts "!! PARSE ERROR - no group def found for >#{name}<"
236
- exit 1
237
- end
238
-
239
- # set group for games
240
- @last_group = group
241
- end
242
-
243
-
244
- def parse_group_def( nodes )
245
- logger.debug "parsing group def: >#{nodes}<"
246
-
247
- ## e.g
248
- ## [:group_def, "Group A"],
249
- ## [:team, "Germany"],
250
- ## [:team, "Scotland"],
251
- ## [:team, "Hungary"],
252
- ## [:team, "Switzerland"]
253
-
254
- node = nodes[0]
255
- name = node[1] ## group name
256
-
257
- teams = nodes[1..-1].map do |node|
258
- if node[0] == :team
259
- team = node[1]
260
- @teams[ team ] += 1
261
- team
262
- else
263
- puts "!! PARSE ERROR - only teams expected in group def; got:"
264
- pp nodes
265
- exit 1
266
- end
267
- end
268
-
269
- ## todo/check/fix: add back group key - why? why not?
270
- group = Import::Group.new( name: name,
271
- teams: teams )
272
-
273
- @groups[ name ] = group
274
- end
275
-
276
-
277
- def _build_date( m:, d:, y:, start: )
278
-
279
-
280
- ## quick debug hack
281
- if m == 2 && d == 29
282
- puts "quick check feb/29 dates"
283
- pp [d,m,y]
284
- pp start
285
- end
286
-
287
- if y.nil? ## try to calculate year
288
- y = if m > start.month ||
289
- (m == start.month && d >= start.day)
290
- # assume same year as start_at event (e.g. 2013 for 2013/14 season)
291
- start.year
292
- else
293
- # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
294
- start.year+1
295
- end
296
- end
297
-
298
-
299
-
300
- Date.new( y,m,d ) ## y,m,d
301
- end
302
-
303
- def parse_round_def( nodes )
304
- logger.debug "parsing round def: >#{nodes}<"
305
-
306
- ## e.g. [[:round_def, "Matchday 1"], [:duration, "Fri Jun/14 - Tue Jun/18"]]
307
- ## [[:round_def, "Matchday 2"], [:duration, "Wed Jun/19 - Sat Jun/22"]]
308
- ## [[:round_def, "Matchday 3"], [:duration, "Sun Jun/23 - Wed Jun/26"]]
309
-
310
- node = nodes[0]
311
- name = node[1]
312
- # NB: use extracted round name for knockout check
313
- # knockout_flag = is_knockout_round?( name )
314
-
315
- node = nodes[1]
316
- node_type = node[0]
317
- if node_type == :date
318
- start_date = end_date = _build_date( m: node[2][:m],
319
- d: node[2][:d],
320
- y: node[2][:y],
321
- start: @start)
322
- elsif node_type == :duration
323
- start_date = _build_date( m: node[2][:start][:m],
324
- d: node[2][:start][:d],
325
- y: node[2][:start][:y],
326
- start: @start)
327
- end_date = _build_date( m: node[2][:end][:m],
328
- d: node[2][:end][:d],
329
- y: node[2][:end][:y],
330
- start: @start)
331
- else
332
- puts "!! PARSE ERROR - expected date or duration for round def; got:"
333
- pp nodes
334
- exit 1
335
- end
336
-
337
- # note: - NOT needed; start_at and end_at are saved as date only (NOT datetime)
338
- # set hours,minutes,secs to beginning and end of day (do NOT use default 12.00)
339
- # e.g. use 00.00 and 23.59
340
- # start_at = start_at.beginning_of_day
341
- # end_at = end_at.end_of_day
342
-
343
- # note: make sure start_at/end_at is date only (e.g. use start_at.to_date)
344
- # sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
345
-
346
- # note - _build_date always returns Date for now - no longer needed!!
347
- # start_date = start_date.to_date
348
- # end_date = end_date.to_date
349
-
350
-
351
- ## fix:
352
- ## remove knockout_flag - why? why not?
353
- knockout_flag = false
354
-
355
-
356
- logger.debug " start_date: #{start_date}"
357
- logger.debug " end_date: #{end_date}"
358
- logger.debug " name: >#{name}<"
359
- logger.debug " knockout_flag: #{knockout_flag}"
360
-
361
- round = Import::Round.new( name: name,
362
- start_date: start_date,
363
- end_date: end_date,
364
- knockout: knockout_flag,
365
- auto: false )
366
-
367
- @rounds[ name ] = round
368
- end
369
-
370
-
371
- def parse_round_header( node )
372
- logger.debug "parsing round header: >#{node}<"
373
-
374
- name = node[1]
375
-
376
- # name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
377
- # name = name.strip
378
-
379
- round = @rounds[ name ]
380
- if round.nil? ## auto-add / create if missing
381
- ## todo/check: add num (was pos) if present - why? why not?
382
- round = Import::Round.new( name: name )
383
- @rounds[ name ] = round
384
- end
385
-
386
- ## todo/check: if pos match (MUST always match for now)
387
- @last_round = round
388
- @last_group = nil # note: reset group to no group - why? why not?
389
-
390
- ## todo/fix/check
391
- ## make round a scope for date(time) - why? why not?
392
- ## reset date/time e.g. @last_date = nil !!!!
393
- end
394
-
395
- def parse_date_header( node )
396
- logger.debug( "date header: >#{node}<")
397
-
398
- date = _build_date( m: node[2][:m],
399
- d: node[2][:d],
400
- y: node[2][:y],
401
- start: @start )
402
-
403
- logger.debug( " date: #{date} with start: #{@start}")
404
-
405
- @last_date = date # keep a reference for later use
406
- @last_time = nil
407
-
408
- ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
409
- ## find a better way??
410
- ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
411
- ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
412
- ### todo/check: just turn on for 2019/20 season or always? why? why not?
413
-
414
- ## todo/fix: add switch back to old @start_org
415
- ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
416
- =begin
417
- if @start.month != 1
418
- if date.year == @start.year+1
419
- logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
420
- @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
421
- @start = Date.new( @start.year+1, 1, 1 )
422
- end
423
- end
424
- =end
425
- end
426
-
427
- def parse_minutes( nodes )
428
- ## parse goals by player
429
- ## may have multiple minutes!!
430
- goals = []
431
-
432
- node = nodes.shift ## get player
433
- name = node[1]
434
-
435
- loop do
436
- goal = {}
437
- goal[:name] = name
438
-
439
- node_type = nodes[0][0]
440
- if node_type != :minute
441
- puts "!! PARSE ERROR - minute expected to follow player (in goal); got #{node_type}:"
442
- pp nodes
443
- exit 1
444
- end
445
-
446
- node = nodes.shift
447
- goal[:minute] = node[2][:m]
448
- goal[:offset] = node[2][:offset] if node[2][:offset]
449
-
450
- ## check for own goal or penalty or such
451
- if !nodes.empty?
452
- node_type = nodes[0][0]
453
- if node_type == :og
454
- nodes.shift
455
- goal[:og] = true
456
- elsif node_type == :pen
457
- nodes.shift
458
- goal[:pen] = true
459
- else
460
- # do nothing
461
- end
462
- end
463
-
464
- goals << goal
465
-
466
- ## check if another minute ahead; otherwise break
467
- break if nodes.empty?
468
-
469
- node_type = nodes[0][0]
470
-
471
- ## Kane 39', 62', 67'
472
- ## consume/eat-up (optional?) commas
473
- if node_type == :','
474
- nodes.shift
475
- node_type = nodes[0][0]
476
- end
477
-
478
- break if node_type != :minute
479
- end
480
-
481
-
482
- goals
483
- end
484
-
485
-
486
- def parse_goals( nodes )
487
- logger.debug "parse goals: >#{nodes}<"
488
-
489
- goals1 = []
490
- goals2 = []
491
-
492
- while !nodes.empty?
493
- node_type = nodes[0][0]
494
- if node_type == :player
495
- more_goals = parse_minutes( nodes )
496
- ## hacky multi-line support for goals
497
- ## using last_goal (1|2)
498
- @last_goals == 2 ? goals2 += more_goals :
499
- goals1 += more_goals
500
- elsif node_type == :';' ## team separator
501
- nodes.shift # eat-up
502
- @last_goals = 2
503
- elsif node_type == :none
504
- nodes.shift # eat-up
505
- else
506
- puts "!! PARSE ERROR - unexpected node type in goals;; got #{node_type}:"
507
- pp nodes
508
- exit 1
509
- end
510
- end
511
-
512
- pp [goals1,goals2]
513
-
514
- ## wrap in struct andd add/append to match
515
- =begin
516
- class GoalStruct
517
- ######
518
- # flat struct for goals - one entry per goals
519
- attr_accessor :name
520
- attr_accessor :team # 1 or 2 ? check/todo: add team1 or team2 flag?
521
- attr_accessor :minute, :offset
522
- attr_accessor :penalty, :owngoal
523
- attr_accessor :score1, :score2 # gets calculated
524
- =end
525
-
526
- goals = []
527
- goals1.each do |rec|
528
- goal = Import::Goal.new(
529
- player: rec[:name],
530
- team: 1,
531
- minute: rec[:minute],
532
- offset: rec[:offset],
533
- penalty: rec[:pen] || false, # note: pass along/use false NOT nil
534
- owngoal: rec[:og] || false
535
- )
536
- goals << goal
537
- end
538
- goals2.each do |rec|
539
- goal = Import::Goal.new(
540
- player: rec[:name],
541
- team: 2,
542
- minute: rec[:minute],
543
- offset: rec[:offset],
544
- penalty: rec[:pen] || false, # note: pass along/use false NOT nil
545
- owngoal: rec[:og] || false
546
- )
547
- goals << goal
548
- end
549
-
550
- pp goals
551
-
552
- ## quick & dirty - auto add goals to last match
553
- ## note - for hacky (quick& dirty) multi-line support
554
- ## always append for now
555
- match = @matches[-1]
556
- match.goals ||= []
557
- match.goals += goals
558
-
559
- ## todo/fix
560
- ## sort by minute
561
- ## PLUS auto-fill score1,score2 - why? why not?
562
- end
563
-
564
-
565
- def parse_match( nodes )
566
- logger.debug( "parse match: >#{nodes}<" )
567
-
568
- ## collect (possible) nodes by type
569
- num = nil
570
- date = nil
571
- time = nil
572
- teams = []
573
- score = nil
574
- more = []
575
-
576
- while !nodes.empty?
577
- node = nodes.shift
578
- node_type = node[0]
579
-
580
- if node_type == :num
581
- num = node[1]
582
- elsif node_type == :date
583
- ## note: date wipes out/clear time
584
- ## time MUST always come after date
585
- time = nil
586
- date = _build_date( m: node[2][:m],
587
- d: node[2][:d],
588
- y: node[2][:y],
589
- start: @start )
590
- elsif node_type == :time
591
- ## note - there's no time (-only) type in ruby
592
- ## use string (e.g. '14:56', '1:44')
593
- ## use 01:44 or 1:44 ?
594
- ## check for 0:00 or 24:00 possible?
595
- time = '%d:%02d' % [node[2][:h], node[2][:m]]
596
- elsif node_type == :team
597
- teams << node[1]
598
- elsif node_type == :score
599
- ### todo/fix
600
- ## add keywords (e.g. ht, ft or such) to Score.new - why? why not?
601
- ## or use new Score.build( ht:, ft:, ) or such - why? why not?
602
- ht = node[2][:ht] || [nil,nil]
603
- ft = node[2][:ft] || [nil,nil]
604
- et = node[2][:et] || [nil,nil]
605
- p = node[2][:p] || [nil,nil]
606
- values = [*ht, *ft, *et, *p]
607
- ## pp values
608
-
609
- score = Score.new( *values )
610
- ## pp score
611
- elsif node_type == :vs
612
- ## skip; do nothing
613
- ##
614
- ## todo - add ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
615
- ## or [cancelled] or [postponed] etc.
616
- ## status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
617
-
618
- elsif node_type == :'@' ||
619
- node_type == :',' ||
620
- node_type == :geo ||
621
- node_type == :timezone
622
- ## e.g.
623
- ## [:"@"], [:geo, "Stade de France"], [:","], [:geo, "Saint-Denis"]]
624
- ## [:"@"], [:geo, "Arena de São Paulo"], [:","], [:geo, "São Paulo"], [:timezone, "(UTC-3)"]
625
- more << node[1] if node_type == :geo
626
- else
627
- puts "!! PARSE ERROR - unexpected node type #{node_type} in match line; got:"
628
- pp node
629
- exit 1
630
- end
631
- end
632
-
633
-
634
- if teams.size != 2
635
- puts "!! PARSE ERROR - expected two teams; got #{teams.size}:"
636
- pp teams
637
- exit 1
638
- end
639
-
640
- team1 = teams[0]
641
- team2 = teams[1]
642
-
643
- @teams[ team1 ] += 1
644
- @teams[ team2 ] += 1
645
-
646
-
647
- ###
648
- # check if date found?
649
- # note: ruby falsey is nil & false only (not 0 or empty array etc.)
650
- if date
651
- ### check: use date_v2 if present? why? why not?
652
- @last_date = date # keep a reference for later use
653
- @last_time = nil
654
- # @last_time = nil
655
- else
656
- date = @last_date # no date found; (re)use last seen date
657
- end
658
-
659
- if time
660
- @last_time = time
661
- else
662
- time = @last_time
663
- end
664
-
665
-
666
- round = nil
667
- if @last_round
668
- round = @last_round
669
- else
670
- ## find (first) matching round by date if rounds / matchdays defined
671
- ## if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
672
- if @rounds.size > 0
673
- @rounds.values.each do |round_rec|
674
- ## note: convert date to date only (no time) with to_date!!!
675
- if (round_rec.start_date && round_rec.end_date) &&
676
- (date.to_date >= round_rec.start_date &&
677
- date.to_date <= round_rec.end_date)
678
- round = round_rec
679
- break
680
- end
681
- end
682
- if round.nil?
683
- puts "!! PARSE ERROR - no matching round found for match date:"
684
- pp date
685
- exit 1
686
- end
687
- end
688
- end
689
-
690
- ## todo/check: scores are integers or strings?
691
-
692
- ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
693
-
694
- ## split date in date & time if DateTime
695
- =begin
696
- time_str = nil
697
- date_str = nil
698
- if date.is_a?( DateTime )
699
- date_str = date.strftime('%Y-%m-%d')
700
- time_str = date.strftime('%H:%M')
701
- elsif date.is_a?( Date )
702
- date_str = date.strftime('%Y-%m-%d')
703
- else # assume date is nil
704
- end
705
- =end
706
-
707
- time_str = nil
708
- date_str = nil
709
-
710
- date_str = date.strftime('%Y-%m-%d') if date
711
- time_str = time if date && time
712
-
713
-
714
- status = nil
715
- ground = nil
716
-
717
- @matches << Import::Match.new( num: num,
718
- date: date_str,
719
- time: time_str,
720
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
721
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
722
- score: score,
723
- round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
724
- group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
725
- status: status,
726
- ground: ground )
727
- ### todo: cache team lookups in hash?
728
-
729
- ## hacky goals support
730
- ### reset/toggle 1/2
731
- @last_goals = 1
732
- end
733
- end # class MatchParser
734
- end # module SportDb
735
-