sportdb-formats 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,733 +0,0 @@
1
-
2
- module SportDb
3
-
4
- class MatchParser ## simple match parser for team match schedules
5
-
6
-
7
- def self.parse( lines, start: )
8
- ## todo/fix: add support for txt and lines
9
- ## check if lines_or_txt is an array or just a string
10
- ## use teams: like start: why? why not?
11
- parser = new( lines, start )
12
- parser.parse
13
- end
14
-
15
-
16
- include Logging ## e.g. logger#debug, logger#info, etc.
17
-
18
- def self.debug=(value) @@debug = value; end
19
- def self.debug?() @@debug ||= false; end ## note: default is FALSE
20
- def debug?() self.class.debug?; end
21
-
22
- def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
23
- ## returns an array of lines with comments and empty lines striped / removed
24
- lines = []
25
- txt.each_line do |line| ## preprocess
26
- line = line.strip
27
-
28
- next if line.empty? || line.start_with?('#') ### skip empty lines and comments
29
- line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
30
- lines << line
31
- end
32
- lines
33
- end
34
-
35
-
36
- ## note: colon (:) MUST be followed by one (or more) spaces
37
- ## make sure mon feb 12 18:10 will not match
38
- ## allow 1. FC Köln etc.
39
- ## Mainz 05:
40
- ## limit to 30 chars max
41
- ## only allow chars incl. intl buut (NOT ()[]/;)
42
- ##
43
- ## Group A:
44
- ## Group B: - remove colon
45
- ## or lookup first
46
-
47
- ATTRIB_RE = %r{^
48
- [ ]*? # slurp leading spaces
49
- (?<key>[^:|\]\[()\/; -]
50
- [^:|\]\[()\/;]{0,30}
51
- )
52
- [ ]*? # slurp trailing spaces
53
- :[ ]+
54
- (?<value>.+)
55
- [ ]*? # slurp trailing spaces
56
- $
57
- }ix
58
-
59
- #
60
- # todo/fix: change start to start: too!!!
61
- # might be optional in the future!! - why? why not?
62
-
63
- def initialize( lines, start )
64
- # for convenience split string into lines
65
- ## note: removes/strips empty lines
66
- ## todo/check: change to text instead of array of lines - why? why not?
67
-
68
- ## note - wrap in enumerator/iterator a.k.a lines reader
69
- @lines = lines.is_a?( String ) ?
70
- _read_lines( lines ) : lines
71
-
72
- @start = start
73
- end
74
-
75
-
76
- def parse
77
- @last_date = nil
78
- @last_time = nil
79
- @last_round = nil
80
- @last_group = nil
81
-
82
- ## last_goals - rename to (longer) @last_team_goals or such - why? why not?
83
- @last_goals = 1 ## toggle between 1|2 - hacky (quick & dirty) support for multi-line goals, fix soon!
84
-
85
- @teams = Hash.new(0) ## track counts (only) for now for (interal) team stats - why? why not?
86
- @rounds = {}
87
- @groups = {}
88
- @matches = []
89
-
90
- @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
91
-
92
-
93
-
94
- @parser = Parser.new
95
-
96
- @errors = []
97
- @tree = []
98
-
99
- attrib_found = false
100
-
101
- @lines.each_with_index do |line,i|
102
-
103
- if debug?
104
- puts
105
- puts "line >#{line}<"
106
- end
107
-
108
- ## skip new (experimental attrib syntax)
109
- if attrib_found == false &&
110
- ATTRIB_RE.match?( line )
111
- ## note: check attrib regex AFTER group def e.g.:
112
- ## Group A:
113
- ## Group B: etc.
114
- ## todo/fix - change Group A: to Group A etc.
115
- ## Group B: to Group B
116
- attrib_found = true
117
- ## logger.debug "skipping key/value line - >#{line}<"
118
- next
119
- end
120
-
121
- if attrib_found
122
- ## check if line ends with dot
123
- ## if not slurp up lines to the next do!!!
124
- ## logger.debug "skipping key/value line - >#{line}<"
125
- attrib_found = false if line.end_with?( '.' )
126
- # logger.debug "skipping key/value line (cont.) - >#{line}<"
127
- next
128
- end
129
-
130
- t, error_messages = @parser.parse_with_errors( line )
131
-
132
-
133
- if error_messages.size > 0
134
- ## add to "global" error list
135
- ## make a triplet tuple (file / msg / line text)
136
- error_messages.each do |msg|
137
- @errors << [ '<file>', ## add filename here
138
- msg,
139
- line
140
- ]
141
- end
142
- end
143
-
144
- pp t if debug?
145
-
146
- @tree << t
147
- end # each lines
148
-
149
- ## pp @tree
150
-
151
- ## report parse errors here - why? why not?
152
-
153
-
154
-
155
- @tree.each do |nodes|
156
-
157
- node_type = nodes[0][0] ## get node type of first/head node
158
-
159
- if node_type == :round_def
160
- ## todo/fix: add round definition (w begin n end date)
161
- ## todo: do not patch rounds with definition (already assume begin/end date is good)
162
- ## -- how to deal with matches that get rescheduled/postponed?
163
- parse_round_def( nodes )
164
- elsif node_type == :group_def ## NB: group goes after round (round may contain group marker too)
165
- ### todo: add pipe (|) marker (required)
166
- parse_group_def( nodes )
167
-
168
- elsif node_type == :player ||
169
- node_type == :none # e.g [[:none], [:";"], [:player, "Xhaka"],...]
170
- ## note - for now goals line MUST start with player!!
171
- parse_goals( nodes )
172
- else
173
- ## try to be liberal/flexible
174
- ## eat-up nodes as we go
175
- ## assume match with group / round header
176
- ## etc. on its own line or not
177
-
178
- ## preprocess possible before match nodes
179
-
180
- while !nodes.empty? do
181
- node_type = nodes[0][0] ## get node type of first/head node
182
- if node_type == :round
183
- node = nodes.shift ## eat-up
184
- parse_round_header( node )
185
- elsif node_type == :leg
186
- node = nodes.shift ## eat-up
187
- ## ignore (round) leg for now - add later leg - 1|2|3 etc!!!
188
- ## needs to get added to db/schema too!!!!
189
- ## add @last_leg = nil or 1|2|3 etc.
190
- elsif node_type == :group
191
- ## -- lets you set group e.g. Group A etc.
192
- node = nodes.shift ## eat-up
193
- parse_group_header( node )
194
- elsif node_type == :date
195
- node = nodes.shift ## eat-up
196
- parse_date_header( node )
197
- ## add time here too - why? why not?
198
- ## add skip comma separator here too - why? why not?
199
- ## "slurp-up" in upstream parser?
200
- ## e.g. round, group or group, round ?
201
- else
202
- break
203
- end
204
- end
205
- next if nodes.empty?
206
-
207
- ## rename to try_parse_match - why? why not?
208
- parse_match( nodes )
209
- end
210
-
211
- end # tree.each
212
-
213
- ## note - team keys are names and values are "internal" stats!!
214
- ## and NOT team/club/nat_team structs!!
215
- [@teams.keys, @matches, @rounds.values, @groups.values]
216
- end # method parse
217
-
218
-
219
-
220
- def parse_group_header( node )
221
- logger.debug "parsing group header: >#{node}<"
222
-
223
- # note: group header resets (last) round (allows, for example):
224
- # e.g.
225
- # Group Playoffs/Replays -- round header
226
- # team1 team2 -- match
227
- # Group B -- group header
228
- # team1 team2 - match (will get new auto-matchday! not last round)
229
- @last_round = nil
230
-
231
- name = node[1]
232
-
233
- group = @groups[ name ]
234
- if group.nil?
235
- puts "!! PARSE ERROR - no group def found for >#{name}<"
236
- exit 1
237
- end
238
-
239
- # set group for games
240
- @last_group = group
241
- end
242
-
243
-
244
- def parse_group_def( nodes )
245
- logger.debug "parsing group def: >#{nodes}<"
246
-
247
- ## e.g
248
- ## [:group_def, "Group A"],
249
- ## [:team, "Germany"],
250
- ## [:team, "Scotland"],
251
- ## [:team, "Hungary"],
252
- ## [:team, "Switzerland"]
253
-
254
- node = nodes[0]
255
- name = node[1] ## group name
256
-
257
- teams = nodes[1..-1].map do |node|
258
- if node[0] == :team
259
- team = node[1]
260
- @teams[ team ] += 1
261
- team
262
- else
263
- puts "!! PARSE ERROR - only teams expected in group def; got:"
264
- pp nodes
265
- exit 1
266
- end
267
- end
268
-
269
- ## todo/check/fix: add back group key - why? why not?
270
- group = Import::Group.new( name: name,
271
- teams: teams )
272
-
273
- @groups[ name ] = group
274
- end
275
-
276
-
277
- def _build_date( m:, d:, y:, start: )
278
-
279
-
280
- ## quick debug hack
281
- if m == 2 && d == 29
282
- puts "quick check feb/29 dates"
283
- pp [d,m,y]
284
- pp start
285
- end
286
-
287
- if y.nil? ## try to calculate year
288
- y = if m > start.month ||
289
- (m == start.month && d >= start.day)
290
- # assume same year as start_at event (e.g. 2013 for 2013/14 season)
291
- start.year
292
- else
293
- # assume year+1 as start_at event (e.g. 2014 for 2013/14 season)
294
- start.year+1
295
- end
296
- end
297
-
298
-
299
-
300
- Date.new( y,m,d ) ## y,m,d
301
- end
302
-
303
- def parse_round_def( nodes )
304
- logger.debug "parsing round def: >#{nodes}<"
305
-
306
- ## e.g. [[:round_def, "Matchday 1"], [:duration, "Fri Jun/14 - Tue Jun/18"]]
307
- ## [[:round_def, "Matchday 2"], [:duration, "Wed Jun/19 - Sat Jun/22"]]
308
- ## [[:round_def, "Matchday 3"], [:duration, "Sun Jun/23 - Wed Jun/26"]]
309
-
310
- node = nodes[0]
311
- name = node[1]
312
- # NB: use extracted round name for knockout check
313
- # knockout_flag = is_knockout_round?( name )
314
-
315
- node = nodes[1]
316
- node_type = node[0]
317
- if node_type == :date
318
- start_date = end_date = _build_date( m: node[2][:m],
319
- d: node[2][:d],
320
- y: node[2][:y],
321
- start: @start)
322
- elsif node_type == :duration
323
- start_date = _build_date( m: node[2][:start][:m],
324
- d: node[2][:start][:d],
325
- y: node[2][:start][:y],
326
- start: @start)
327
- end_date = _build_date( m: node[2][:end][:m],
328
- d: node[2][:end][:d],
329
- y: node[2][:end][:y],
330
- start: @start)
331
- else
332
- puts "!! PARSE ERROR - expected date or duration for round def; got:"
333
- pp nodes
334
- exit 1
335
- end
336
-
337
- # note: - NOT needed; start_at and end_at are saved as date only (NOT datetime)
338
- # set hours,minutes,secs to beginning and end of day (do NOT use default 12.00)
339
- # e.g. use 00.00 and 23.59
340
- # start_at = start_at.beginning_of_day
341
- # end_at = end_at.end_of_day
342
-
343
- # note: make sure start_at/end_at is date only (e.g. use start_at.to_date)
344
- # sqlite3 saves datetime in date field as datetime, for example (will break date compares later!)
345
-
346
- # note - _build_date always returns Date for now - no longer needed!!
347
- # start_date = start_date.to_date
348
- # end_date = end_date.to_date
349
-
350
-
351
- ## fix:
352
- ## remove knockout_flag - why? why not?
353
- knockout_flag = false
354
-
355
-
356
- logger.debug " start_date: #{start_date}"
357
- logger.debug " end_date: #{end_date}"
358
- logger.debug " name: >#{name}<"
359
- logger.debug " knockout_flag: #{knockout_flag}"
360
-
361
- round = Import::Round.new( name: name,
362
- start_date: start_date,
363
- end_date: end_date,
364
- knockout: knockout_flag,
365
- auto: false )
366
-
367
- @rounds[ name ] = round
368
- end
369
-
370
-
371
- def parse_round_header( node )
372
- logger.debug "parsing round header: >#{node}<"
373
-
374
- name = node[1]
375
-
376
- # name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
377
- # name = name.strip
378
-
379
- round = @rounds[ name ]
380
- if round.nil? ## auto-add / create if missing
381
- ## todo/check: add num (was pos) if present - why? why not?
382
- round = Import::Round.new( name: name )
383
- @rounds[ name ] = round
384
- end
385
-
386
- ## todo/check: if pos match (MUST always match for now)
387
- @last_round = round
388
- @last_group = nil # note: reset group to no group - why? why not?
389
-
390
- ## todo/fix/check
391
- ## make round a scope for date(time) - why? why not?
392
- ## reset date/time e.g. @last_date = nil !!!!
393
- end
394
-
395
- def parse_date_header( node )
396
- logger.debug( "date header: >#{node}<")
397
-
398
- date = _build_date( m: node[2][:m],
399
- d: node[2][:d],
400
- y: node[2][:y],
401
- start: @start )
402
-
403
- logger.debug( " date: #{date} with start: #{@start}")
404
-
405
- @last_date = date # keep a reference for later use
406
- @last_time = nil
407
-
408
- ### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
409
- ## find a better way??
410
- ## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
411
- ## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
412
- ### todo/check: just turn on for 2019/20 season or always? why? why not?
413
-
414
- ## todo/fix: add switch back to old @start_org
415
- ## if year is date.year == @start.year-1 -- possible when full date with year set!!!
416
- =begin
417
- if @start.month != 1
418
- if date.year == @start.year+1
419
- logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
420
- @start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
421
- @start = Date.new( @start.year+1, 1, 1 )
422
- end
423
- end
424
- =end
425
- end
426
-
427
- def parse_minutes( nodes )
428
- ## parse goals by player
429
- ## may have multiple minutes!!
430
- goals = []
431
-
432
- node = nodes.shift ## get player
433
- name = node[1]
434
-
435
- loop do
436
- goal = {}
437
- goal[:name] = name
438
-
439
- node_type = nodes[0][0]
440
- if node_type != :minute
441
- puts "!! PARSE ERROR - minute expected to follow player (in goal); got #{node_type}:"
442
- pp nodes
443
- exit 1
444
- end
445
-
446
- node = nodes.shift
447
- goal[:minute] = node[2][:m]
448
- goal[:offset] = node[2][:offset] if node[2][:offset]
449
-
450
- ## check for own goal or penalty or such
451
- if !nodes.empty?
452
- node_type = nodes[0][0]
453
- if node_type == :og
454
- nodes.shift
455
- goal[:og] = true
456
- elsif node_type == :pen
457
- nodes.shift
458
- goal[:pen] = true
459
- else
460
- # do nothing
461
- end
462
- end
463
-
464
- goals << goal
465
-
466
- ## check if another minute ahead; otherwise break
467
- break if nodes.empty?
468
-
469
- node_type = nodes[0][0]
470
-
471
- ## Kane 39', 62', 67'
472
- ## consume/eat-up (optional?) commas
473
- if node_type == :','
474
- nodes.shift
475
- node_type = nodes[0][0]
476
- end
477
-
478
- break if node_type != :minute
479
- end
480
-
481
-
482
- goals
483
- end
484
-
485
-
486
- def parse_goals( nodes )
487
- logger.debug "parse goals: >#{nodes}<"
488
-
489
- goals1 = []
490
- goals2 = []
491
-
492
- while !nodes.empty?
493
- node_type = nodes[0][0]
494
- if node_type == :player
495
- more_goals = parse_minutes( nodes )
496
- ## hacky multi-line support for goals
497
- ## using last_goal (1|2)
498
- @last_goals == 2 ? goals2 += more_goals :
499
- goals1 += more_goals
500
- elsif node_type == :';' ## team separator
501
- nodes.shift # eat-up
502
- @last_goals = 2
503
- elsif node_type == :none
504
- nodes.shift # eat-up
505
- else
506
- puts "!! PARSE ERROR - unexpected node type in goals;; got #{node_type}:"
507
- pp nodes
508
- exit 1
509
- end
510
- end
511
-
512
- pp [goals1,goals2]
513
-
514
- ## wrap in struct andd add/append to match
515
- =begin
516
- class GoalStruct
517
- ######
518
- # flat struct for goals - one entry per goals
519
- attr_accessor :name
520
- attr_accessor :team # 1 or 2 ? check/todo: add team1 or team2 flag?
521
- attr_accessor :minute, :offset
522
- attr_accessor :penalty, :owngoal
523
- attr_accessor :score1, :score2 # gets calculated
524
- =end
525
-
526
- goals = []
527
- goals1.each do |rec|
528
- goal = Import::Goal.new(
529
- player: rec[:name],
530
- team: 1,
531
- minute: rec[:minute],
532
- offset: rec[:offset],
533
- penalty: rec[:pen] || false, # note: pass along/use false NOT nil
534
- owngoal: rec[:og] || false
535
- )
536
- goals << goal
537
- end
538
- goals2.each do |rec|
539
- goal = Import::Goal.new(
540
- player: rec[:name],
541
- team: 2,
542
- minute: rec[:minute],
543
- offset: rec[:offset],
544
- penalty: rec[:pen] || false, # note: pass along/use false NOT nil
545
- owngoal: rec[:og] || false
546
- )
547
- goals << goal
548
- end
549
-
550
- pp goals
551
-
552
- ## quick & dirty - auto add goals to last match
553
- ## note - for hacky (quick& dirty) multi-line support
554
- ## always append for now
555
- match = @matches[-1]
556
- match.goals ||= []
557
- match.goals += goals
558
-
559
- ## todo/fix
560
- ## sort by minute
561
- ## PLUS auto-fill score1,score2 - why? why not?
562
- end
563
-
564
-
565
- def parse_match( nodes )
566
- logger.debug( "parse match: >#{nodes}<" )
567
-
568
- ## collect (possible) nodes by type
569
- num = nil
570
- date = nil
571
- time = nil
572
- teams = []
573
- score = nil
574
- more = []
575
-
576
- while !nodes.empty?
577
- node = nodes.shift
578
- node_type = node[0]
579
-
580
- if node_type == :num
581
- num = node[1]
582
- elsif node_type == :date
583
- ## note: date wipes out/clear time
584
- ## time MUST always come after date
585
- time = nil
586
- date = _build_date( m: node[2][:m],
587
- d: node[2][:d],
588
- y: node[2][:y],
589
- start: @start )
590
- elsif node_type == :time
591
- ## note - there's no time (-only) type in ruby
592
- ## use string (e.g. '14:56', '1:44')
593
- ## use 01:44 or 1:44 ?
594
- ## check for 0:00 or 24:00 possible?
595
- time = '%d:%02d' % [node[2][:h], node[2][:m]]
596
- elsif node_type == :team
597
- teams << node[1]
598
- elsif node_type == :score
599
- ### todo/fix
600
- ## add keywords (e.g. ht, ft or such) to Score.new - why? why not?
601
- ## or use new Score.build( ht:, ft:, ) or such - why? why not?
602
- ht = node[2][:ht] || [nil,nil]
603
- ft = node[2][:ft] || [nil,nil]
604
- et = node[2][:et] || [nil,nil]
605
- p = node[2][:p] || [nil,nil]
606
- values = [*ht, *ft, *et, *p]
607
- ## pp values
608
-
609
- score = Score.new( *values )
610
- ## pp score
611
- elsif node_type == :vs
612
- ## skip; do nothing
613
- ##
614
- ## todo - add ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
615
- ## or [cancelled] or [postponed] etc.
616
- ## status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
617
-
618
- elsif node_type == :'@' ||
619
- node_type == :',' ||
620
- node_type == :geo
621
- ## e.g.
622
- ## [:"@"], [:geo, "Stade de France"], [:","], [:geo, "Saint-Denis"]]
623
- more << node[1] if node_type == :geo
624
- else
625
- puts "!! PARSE ERROR - unexpected node type #{node_type} in match line; got:"
626
- pp node
627
- exit 1
628
- end
629
- end
630
-
631
-
632
- if teams.size != 2
633
- puts "!! PARSE ERROR - expected two teams; got #{teams.size}:"
634
- pp teams
635
- exit 1
636
- end
637
-
638
- team1 = teams[0]
639
- team2 = teams[1]
640
-
641
- @teams[ team1 ] += 1
642
- @teams[ team2 ] += 1
643
-
644
-
645
- ###
646
- # check if date found?
647
- # note: ruby falsey is nil & false only (not 0 or empty array etc.)
648
- if date
649
- ### check: use date_v2 if present? why? why not?
650
- @last_date = date # keep a reference for later use
651
- @last_time = nil
652
- # @last_time = nil
653
- else
654
- date = @last_date # no date found; (re)use last seen date
655
- end
656
-
657
- if time
658
- @last_time = time
659
- else
660
- time = @last_time
661
- end
662
-
663
-
664
- round = nil
665
- if @last_round
666
- round = @last_round
667
- else
668
- ## find (first) matching round by date if rounds / matchdays defined
669
- ## if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
670
- if @rounds.size > 0
671
- @rounds.values.each do |round_rec|
672
- ## note: convert date to date only (no time) with to_date!!!
673
- if (round_rec.start_date && round_rec.end_date) &&
674
- (date.to_date >= round_rec.start_date &&
675
- date.to_date <= round_rec.end_date)
676
- round = round_rec
677
- break
678
- end
679
- end
680
- if round.nil?
681
- puts "!! PARSE ERROR - no matching round found for match date:"
682
- pp date
683
- exit 1
684
- end
685
- end
686
- end
687
-
688
- ## todo/check: scores are integers or strings?
689
-
690
- ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
691
-
692
- ## split date in date & time if DateTime
693
- =begin
694
- time_str = nil
695
- date_str = nil
696
- if date.is_a?( DateTime )
697
- date_str = date.strftime('%Y-%m-%d')
698
- time_str = date.strftime('%H:%M')
699
- elsif date.is_a?( Date )
700
- date_str = date.strftime('%Y-%m-%d')
701
- else # assume date is nil
702
- end
703
- =end
704
-
705
- time_str = nil
706
- date_str = nil
707
-
708
- date_str = date.strftime('%Y-%m-%d') if date
709
- time_str = time if date && time
710
-
711
-
712
- status = nil
713
- ground = nil
714
-
715
- @matches << Import::Match.new( num: num,
716
- date: date_str,
717
- time: time_str,
718
- team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
719
- team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
720
- score: score,
721
- round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
722
- group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
723
- status: status,
724
- ground: ground )
725
- ### todo: cache team lookups in hash?
726
-
727
- ## hacky goals support
728
- ### reset/toggle 1/2
729
- @last_goals = 1
730
- end
731
- end # class MatchParser
732
- end # module SportDb
733
-