sportdb-formats 1.1.6 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  module SportDb
4
3
 
@@ -21,13 +20,44 @@ class MatchParser ## simple match parser for team match schedules
21
20
  # for convenience split string into lines
22
21
  ## note: removes/strips empty lines
23
22
  ## todo/check: change to text instead of array of lines - why? why not?
24
- @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
-
23
+
24
+ ## note - wrap in enumerator/iterator a.k.a lines reader
25
+ @lines = LinesReader.new( lines.is_a?( String ) ?
26
+ read_lines( lines ) :
27
+ lines
28
+ )
29
+
26
30
  @mapper_teams = TeamMapper.new( teams )
27
31
  @start = start
28
32
  end
29
33
 
30
34
 
35
+
36
+
37
+ ## note: colon (:) MUST be followed by one (or more) spaces
38
+ ## make sure mon feb 12 18:10 will not match
39
+ ## allow 1. FC Köln etc.
40
+ ## Mainz 05:
41
+ ## limit to 30 chars max
42
+ ## only allow chars incl. intl buut (NOT ()[]/;)
43
+ ##
44
+ ## Group A:
45
+ ## Group B: - remove colon
46
+ ## or lookup first
47
+
48
+ ATTRIB_REGEX = /^
49
+ [ ]*? # slurp leading spaces
50
+ (?<key>[^:|\]\[()\/; -]
51
+ [^:|\]\[()\/;]{0,30}
52
+ )
53
+ [ ]*? # slurp trailing spaces
54
+ :[ ]+
55
+ (?<value>.+)
56
+ [ ]*? # slurp trailing spaces
57
+ $
58
+ /ix
59
+
60
+
31
61
  def parse
32
62
  @last_date = nil
33
63
  @last_round = nil
@@ -39,11 +69,10 @@ class MatchParser ## simple match parser for team match schedules
39
69
 
40
70
  @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
41
71
 
42
-
72
+ ## todo/fix - use @lines.rewind first here - why? why not?
43
73
  @lines.each do |line|
44
- if is_goals?( line )
45
- logger.debug "skipping matched goals line: >#{line}<"
46
- elsif is_round_def?( line )
74
+
75
+ if is_round_def?( line )
47
76
  ## todo/fix: add round definition (w begin n end date)
48
77
  ## todo: do not patch rounds with definition (already assume begin/end date is good)
49
78
  ## -- how to deal with matches that get rescheduled/postponed?
@@ -56,6 +85,32 @@ class MatchParser ## simple match parser for team match schedules
56
85
  elsif is_group?( line )
57
86
  ## -- lets you set group e.g. Group A etc.
58
87
  parse_group_header( line )
88
+
89
+ elsif m=ATTRIB_REGEX.match( line )
90
+ ## note: check attrib regex AFTER group def e.g.:
91
+ ## Group A:
92
+ ## Group B: etc.
93
+ ## todo/fix - change Group A: to Group A etc.
94
+ ## Group B: to Group B
95
+
96
+ ## check if line ends with dot
97
+ ## if not slurp up lines to the next do!!!
98
+ logger.debug "skipping key/value line - >#{line}<"
99
+ while !line.end_with?( '.' ) || line.nil? do
100
+ line = @lines.next
101
+ logger.debug "skipping key/value line (cont.) - >#{line}<"
102
+ end
103
+ elsif is_goals?( line )
104
+ ## note - goals must be AFTER attributes!!!
105
+ logger.debug "matched goals line: >#{line}<"
106
+ logger.debug " try parse:"
107
+
108
+ goals = GoalsFinder.new.find!( line )
109
+ pp goals
110
+ ## quick & dirty - auto add goals to last match
111
+ match = @matches[-1]
112
+ match.goals = goals
113
+
59
114
  elsif try_parse_game( line )
60
115
  # do nothing here
61
116
  elsif try_parse_date_header( line )
@@ -307,6 +362,27 @@ class MatchParser ## simple match parser for team match schedules
307
362
  end
308
363
 
309
364
 
365
+ ### todo/check - include (optional) leading space in regex - why? why not?
366
+ NUM_RE = /^[ ]*\(
367
+ (?<num>[0-9]{1,3})
368
+ \)
369
+ /x
370
+
371
+ def find_num!( line )
372
+ ## check for leading match number e.g.
373
+ ## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
374
+ m = line.match( NUM_RE )
375
+ if m
376
+ num = m[:num].to_i(10) ## allows 01/02/07 etc. -- why? why not?
377
+ match_str = m[0]
378
+ line.sub!( match_str, '[NUM]' )
379
+ num
380
+ else
381
+ nil
382
+ end
383
+ end
384
+
385
+
310
386
  def try_parse_game( line )
311
387
  # note: clone line; for possible test do NOT modify in place for now
312
388
  # note: returns true if parsed, false if no match
@@ -317,11 +393,27 @@ class MatchParser ## simple match parser for team match schedules
317
393
  def parse_game( line )
318
394
  logger.debug "parsing game (fixture) line: >#{line}<"
319
395
 
320
- ## split by geo (@) - remove for now
396
+ ## split by geo (@)
321
397
  ## split into parts e.g. break using @ !!!
322
398
  values = line.split( '@' )
323
- line = values[0]
324
399
 
400
+ ## for now pass along ground, city (timezone) as string as is
401
+ ## parse (map) later - why? why not??
402
+ ### check for ground/stadium and cities
403
+ ground = if values.size == 1
404
+ nil ## no stadium
405
+ elsif values.size == 2 # bingo!!!
406
+ ## process stadium, city (timezone) etc.
407
+ ## for now keep it simple - pass along "unparsed" all-in-one
408
+ values[1].gsub( /[ \t]+/, ' ').strip ## squish
409
+ else
410
+ puts "!! ERROR - too many @-markers found in line:"
411
+ puts line
412
+ exit 1
413
+ end
414
+
415
+
416
+ line = values[0]
325
417
 
326
418
  @mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
327
419
  teams = @mapper_teams.find_teams!( line )
@@ -334,13 +426,23 @@ class MatchParser ## simple match parser for team match schedules
334
426
  return false
335
427
  end
336
428
 
429
+
430
+ ## try optional match number e.g.
431
+ ## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
432
+ num = find_num!( line )
433
+ ## pos = find_game_pos!( line )
434
+
337
435
  ## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
338
436
  ## or [cancelled] or [postponed] etc.
339
437
  status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
340
438
 
341
- ## pos = find_game_pos!( line )
342
439
 
343
- date = find_date!( line, start: @start )
440
+ date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
441
+
442
+ ## todo/fix:
443
+ ## add support for find_time! e.g. 21.00 (or 21:00 ?)
444
+
445
+
344
446
 
345
447
  ###
346
448
  # check if date found?
@@ -387,13 +489,28 @@ class MatchParser ## simple match parser for team match schedules
387
489
 
388
490
  ## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
389
491
 
390
- @matches << Import::Match.new( date: date,
492
+
493
+ ## split date in date & time if DateTime
494
+ time_str = nil
495
+ date_str = nil
496
+ if date.is_a?( DateTime )
497
+ date_str = date.strftime('%Y-%m-%d')
498
+ time_str = date.strftime('%H:%M')
499
+ elsif date.is_a?( Date )
500
+ date_str = date.strftime('%Y-%m-%d')
501
+ else # assume date is nil
502
+ end
503
+
504
+ @matches << Import::Match.new( num: num,
505
+ date: date_str,
506
+ time: time_str,
391
507
  team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
392
508
  team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
393
509
  score: score,
394
510
  round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
395
511
  group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
396
- status: status )
512
+ status: status,
513
+ ground: ground )
397
514
  ### todo: cache team lookups in hash?
398
515
 
399
516
  =begin