sportdb-formats 1.1.6 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  module SportDb
4
3
 
@@ -21,13 +20,44 @@ class MatchParser ## simple match parser for team match schedules
21
20
  # for convenience split string into lines
22
21
  ## note: removes/strips empty lines
23
22
  ## todo/check: change to text instead of array of lines - why? why not?
24
- @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
-
23
+
24
+ ## note - wrap in enumerator/iterator a.k.a lines reader
25
+ @lines = LinesReader.new( lines.is_a?( String ) ?
26
+ read_lines( lines ) :
27
+ lines
28
+ )
29
+
26
30
  @mapper_teams = TeamMapper.new( teams )
27
31
  @start = start
28
32
  end
29
33
 
30
34
 
35
+
36
+
37
+ ## note: colon (:) MUST be followed by one (or more) spaces
38
+ ## make sure mon feb 12 18:10 will not match
39
+ ## allow 1. FC Köln etc.
40
+ ## Mainz 05:
41
+ ## limit to 30 chars max
42
+ ## only allow chars incl. intl buut (NOT ()[]/;)
43
+ ##
44
+ ## Group A:
45
+ ## Group B: - remove colon
46
+ ## or lookup first
47
+
48
+ ATTRIB_REGEX = /^
49
+ [ ]*? # slurp leading spaces
50
+ (?<key>[^:|\]\[()\/; -]
51
+ [^:|\]\[()\/;]{0,30}
52
+ )
53
+ [ ]*? # slurp trailing spaces
54
+ :[ ]+
55
+ (?<value>.+)
56
+ [ ]*? # slurp trailing spaces
57
+ $
58
+ /ix
59
+
60
+
31
61
  def parse
32
62
  @last_date = nil
33
63
  @last_round = nil
@@ -39,11 +69,10 @@ class MatchParser ## simple match parser for team match schedules
39
69
 
40
70
  @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
41
71
 
42
-
72
+ ## todo/fix - use @lines.rewind first here - why? why not?
43
73
  @lines.each do |line|
44
- if is_goals?( line )
45
- logger.debug "skipping matched goals line: >#{line}<"
46
- elsif is_round_def?( line )
74
+
75
+ if is_round_def?( line )
47
76
  ## todo/fix: add round definition (w begin n end date)
48
77
  ## todo: do not patch rounds with definition (already assume begin/end date is good)
49
78
  ## -- how to deal with matches that get rescheduled/postponed?
@@ -56,6 +85,32 @@ class MatchParser ## simple match parser for team match schedules
56
85
  elsif is_group?( line )
57
86
  ## -- lets you set group e.g. Group A etc.
58
87
  parse_group_header( line )
88
+
89
+ elsif m=ATTRIB_REGEX.match( line )
90
+ ## note: check attrib regex AFTER group def e.g.:
91
+ ## Group A:
92
+ ## Group B: etc.
93
+ ## todo/fix - change Group A: to Group A etc.
94
+ ## Group B: to Group B
95
+
96
+ ## check if line ends with dot
97
+ ## if not slurp up lines to the next do!!!
98
+ logger.debug "skipping key/value line - >#{line}<"
99
+ while !line.end_with?( '.' ) || line.nil? do
100
+ line = @lines.next
101
+ logger.debug "skipping key/value line (cont.) - >#{line}<"
102
+ end
103
+ elsif is_goals?( line )
104
+ ## note - goals must be AFTER attributes!!!
105
+ logger.debug "matched goals line: >#{line}<"
106
+ logger.debug " try parse:"
107
+
108
+ goals = GoalsFinder.new.find!( line )
109
+ pp goals
110
+ ## quick & dirty - auto add goals to last match
111
+ match = @matches[-1]
112
+ match.goals = goals
113
+
59
114
  elsif try_parse_game( line )
60
115
  # do nothing here
61
116
  elsif try_parse_date_header( line )
@@ -307,6 +362,27 @@ class MatchParser ## simple match parser for team match schedules
307
362
  end
308
363
 
309
364
 
365
+ ### todo/check - include (optional) leading space in regex - why? why not?
366
+ NUM_RE = /^[ ]*\(
367
+ (?<num>[0-9]{1,3})
368
+ \)
369
+ /x
370
+
371
+ def find_num!( line )
372
+ ## check for leading match number e.g.
373
+ ## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
374
+ m = line.match( NUM_RE )
375
+ if m
376
+ num = m[:num].to_i(10) ## allows 01/02/07 etc. -- why? why not?
377
+ match_str = m[0]
378
+ line.sub!( match_str, '[NUM]' )
379
+ num
380
+ else
381
+ nil
382
+ end
383
+ end
384
+
385
+
310
386
  def try_parse_game( line )
311
387
  # note: clone line; for possible test do NOT modify in place for now
312
388
  # note: returns true if parsed, false if no match
@@ -317,11 +393,27 @@ class MatchParser ## simple match parser for team match schedules
317
393
  def parse_game( line )
318
394
  logger.debug "parsing game (fixture) line: >#{line}<"
319
395
 
320
- ## split by geo (@) - remove for now
396
+ ## split by geo (@)
321
397
  ## split into parts e.g. break using @ !!!
322
398
  values = line.split( '@' )
323
- line = values[0]
324
399
 
400
+ ## for now pass along ground, city (timezone) as string as is
401
+ ## parse (map) later - why? why not??
402
+ ### check for ground/stadium and cities
403
+ ground = if values.size == 1
404
+ nil ## no stadium
405
+ elsif values.size == 2 # bingo!!!
406
+ ## process stadium, city (timezone) etc.
407
+ ## for now keep it simple - pass along "unparsed" all-in-one
408
+ values[1].gsub( /[ \t]+/, ' ').strip ## squish
409
+ else
410
+ puts "!! ERROR - too many @-markers found in line:"
411
+ puts line
412
+ exit 1
413
+ end
414
+
415
+
416
+ line = values[0]
325
417
 
326
418
  @mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
327
419
  teams = @mapper_teams.find_teams!( line )
@@ -334,13 +426,23 @@ class MatchParser ## simple match parser for team match schedules
334
426
  return false
335
427
  end
336
428
 
429
+
430
+ ## try optional match number e.g.
431
+ ## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
432
+ num = find_num!( line )
433
+ ## pos = find_game_pos!( line )
434
+
337
435
  ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
338
436
  ## or [cancelled] or [postponed] etc.
339
437
  status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
340
438
 
341
- ## pos = find_game_pos!( line )
342
439
 
343
- date = find_date!( line, start: @start )
440
+ date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
441
+
442
+ ## todo/fix:
443
+ ## add support for find_time! e.g. 21.00 (or 21:00 ?)
444
+
445
+
344
446
 
345
447
  ###
346
448
  # check if date found?
@@ -387,13 +489,28 @@ class MatchParser ## simple match parser for team match schedules
387
489
 
388
490
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
389
491
 
390
- @matches << Import::Match.new( date: date,
492
+
493
+ ## split date in date & time if DateTime
494
+ time_str = nil
495
+ date_str = nil
496
+ if date.is_a?( DateTime )
497
+ date_str = date.strftime('%Y-%m-%d')
498
+ time_str = date.strftime('%H:%M')
499
+ elsif date.is_a?( Date )
500
+ date_str = date.strftime('%Y-%m-%d')
501
+ else # assume date is nil
502
+ end
503
+
504
+ @matches << Import::Match.new( num: num,
505
+ date: date_str,
506
+ time: time_str,
391
507
  team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
392
508
  team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
393
509
  score: score,
394
510
  round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
395
511
  group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
396
- status: status )
512
+ status: status,
513
+ ground: ground )
397
514
  ### todo: cache team lookups in hash?
398
515
 
399
516
  =begin