sportdb-formats 1.1.6 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +53 -9
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +130 -13
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module SportDb
|
4
3
|
|
@@ -21,13 +20,44 @@ class MatchParser ## simple match parser for team match schedules
|
|
21
20
|
# for convenience split string into lines
|
22
21
|
## note: removes/strips empty lines
|
23
22
|
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
-
|
25
|
-
|
23
|
+
|
24
|
+
## note - wrap in enumerator/iterator a.k.a lines reader
|
25
|
+
@lines = LinesReader.new( lines.is_a?( String ) ?
|
26
|
+
read_lines( lines ) :
|
27
|
+
lines
|
28
|
+
)
|
29
|
+
|
26
30
|
@mapper_teams = TeamMapper.new( teams )
|
27
31
|
@start = start
|
28
32
|
end
|
29
33
|
|
30
34
|
|
35
|
+
|
36
|
+
|
37
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
38
|
+
## make sure mon feb 12 18:10 will not match
|
39
|
+
## allow 1. FC Köln etc.
|
40
|
+
## Mainz 05:
|
41
|
+
## limit to 30 chars max
|
42
|
+
## only allow chars incl. intl buut (NOT ()[]/;)
|
43
|
+
##
|
44
|
+
## Group A:
|
45
|
+
## Group B: - remove colon
|
46
|
+
## or lookup first
|
47
|
+
|
48
|
+
ATTRIB_REGEX = /^
|
49
|
+
[ ]*? # slurp leading spaces
|
50
|
+
(?<key>[^:|\]\[()\/; -]
|
51
|
+
[^:|\]\[()\/;]{0,30}
|
52
|
+
)
|
53
|
+
[ ]*? # slurp trailing spaces
|
54
|
+
:[ ]+
|
55
|
+
(?<value>.+)
|
56
|
+
[ ]*? # slurp trailing spaces
|
57
|
+
$
|
58
|
+
/ix
|
59
|
+
|
60
|
+
|
31
61
|
def parse
|
32
62
|
@last_date = nil
|
33
63
|
@last_round = nil
|
@@ -39,11 +69,10 @@ class MatchParser ## simple match parser for team match schedules
|
|
39
69
|
|
40
70
|
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
41
71
|
|
42
|
-
|
72
|
+
## todo/fix - use @lines.rewind first here - why? why not?
|
43
73
|
@lines.each do |line|
|
44
|
-
|
45
|
-
|
46
|
-
elsif is_round_def?( line )
|
74
|
+
|
75
|
+
if is_round_def?( line )
|
47
76
|
## todo/fix: add round definition (w begin n end date)
|
48
77
|
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
49
78
|
## -- how to deal with matches that get rescheduled/postponed?
|
@@ -56,6 +85,32 @@ class MatchParser ## simple match parser for team match schedules
|
|
56
85
|
elsif is_group?( line )
|
57
86
|
## -- lets you set group e.g. Group A etc.
|
58
87
|
parse_group_header( line )
|
88
|
+
|
89
|
+
elsif m=ATTRIB_REGEX.match( line )
|
90
|
+
## note: check attrib regex AFTER group def e.g.:
|
91
|
+
## Group A:
|
92
|
+
## Group B: etc.
|
93
|
+
## todo/fix - change Group A: to Group A etc.
|
94
|
+
## Group B: to Group B
|
95
|
+
|
96
|
+
## check if line ends with dot
|
97
|
+
## if not slurp up lines to the next do!!!
|
98
|
+
logger.debug "skipping key/value line - >#{line}<"
|
99
|
+
while !line.end_with?( '.' ) || line.nil? do
|
100
|
+
line = @lines.next
|
101
|
+
logger.debug "skipping key/value line (cont.) - >#{line}<"
|
102
|
+
end
|
103
|
+
elsif is_goals?( line )
|
104
|
+
## note - goals must be AFTER attributes!!!
|
105
|
+
logger.debug "matched goals line: >#{line}<"
|
106
|
+
logger.debug " try parse:"
|
107
|
+
|
108
|
+
goals = GoalsFinder.new.find!( line )
|
109
|
+
pp goals
|
110
|
+
## quick & dirty - auto add goals to last match
|
111
|
+
match = @matches[-1]
|
112
|
+
match.goals = goals
|
113
|
+
|
59
114
|
elsif try_parse_game( line )
|
60
115
|
# do nothing here
|
61
116
|
elsif try_parse_date_header( line )
|
@@ -307,6 +362,27 @@ class MatchParser ## simple match parser for team match schedules
|
|
307
362
|
end
|
308
363
|
|
309
364
|
|
365
|
+
### todo/check - include (optional) leading space in regex - why? why not?
|
366
|
+
NUM_RE = /^[ ]*\(
|
367
|
+
(?<num>[0-9]{1,3})
|
368
|
+
\)
|
369
|
+
/x
|
370
|
+
|
371
|
+
def find_num!( line )
|
372
|
+
## check for leading match number e.g.
|
373
|
+
## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
|
374
|
+
m = line.match( NUM_RE )
|
375
|
+
if m
|
376
|
+
num = m[:num].to_i(10) ## allows 01/02/07 etc. -- why? why not?
|
377
|
+
match_str = m[0]
|
378
|
+
line.sub!( match_str, '[NUM]' )
|
379
|
+
num
|
380
|
+
else
|
381
|
+
nil
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
|
310
386
|
def try_parse_game( line )
|
311
387
|
# note: clone line; for possible test do NOT modify in place for now
|
312
388
|
# note: returns true if parsed, false if no match
|
@@ -317,11 +393,27 @@ class MatchParser ## simple match parser for team match schedules
|
|
317
393
|
def parse_game( line )
|
318
394
|
logger.debug "parsing game (fixture) line: >#{line}<"
|
319
395
|
|
320
|
-
## split by geo (@)
|
396
|
+
## split by geo (@)
|
321
397
|
## split into parts e.g. break using @ !!!
|
322
398
|
values = line.split( '@' )
|
323
|
-
line = values[0]
|
324
399
|
|
400
|
+
## for now pass along ground, city (timezone) as string as is
|
401
|
+
## parse (map) later - why? why not??
|
402
|
+
### check for ground/stadium and cities
|
403
|
+
ground = if values.size == 1
|
404
|
+
nil ## no stadium
|
405
|
+
elsif values.size == 2 # bingo!!!
|
406
|
+
## process stadium, city (timezone) etc.
|
407
|
+
## for now keep it simple - pass along "unparsed" all-in-one
|
408
|
+
values[1].gsub( /[ \t]+/, ' ').strip ## squish
|
409
|
+
else
|
410
|
+
puts "!! ERROR - too many @-markers found in line:"
|
411
|
+
puts line
|
412
|
+
exit 1
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
line = values[0]
|
325
417
|
|
326
418
|
@mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
|
327
419
|
teams = @mapper_teams.find_teams!( line )
|
@@ -334,13 +426,23 @@ class MatchParser ## simple match parser for team match schedules
|
|
334
426
|
return false
|
335
427
|
end
|
336
428
|
|
429
|
+
|
430
|
+
## try optional match number e.g.
|
431
|
+
## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
|
432
|
+
num = find_num!( line )
|
433
|
+
## pos = find_game_pos!( line )
|
434
|
+
|
337
435
|
## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
|
338
436
|
## or [cancelled] or [postponed] etc.
|
339
437
|
status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
|
340
438
|
|
341
|
-
## pos = find_game_pos!( line )
|
342
439
|
|
343
|
-
date = find_date!( line, start: @start )
|
440
|
+
date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
|
441
|
+
|
442
|
+
## todo/fix:
|
443
|
+
## add support for find_time! e.g. 21.00 (or 21:00 ?)
|
444
|
+
|
445
|
+
|
344
446
|
|
345
447
|
###
|
346
448
|
# check if date found?
|
@@ -387,13 +489,28 @@ class MatchParser ## simple match parser for team match schedules
|
|
387
489
|
|
388
490
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
389
491
|
|
390
|
-
|
492
|
+
|
493
|
+
## split date in date & time if DateTime
|
494
|
+
time_str = nil
|
495
|
+
date_str = nil
|
496
|
+
if date.is_a?( DateTime )
|
497
|
+
date_str = date.strftime('%Y-%m-%d')
|
498
|
+
time_str = date.strftime('%H:%M')
|
499
|
+
elsif date.is_a?( Date )
|
500
|
+
date_str = date.strftime('%Y-%m-%d')
|
501
|
+
else # assume date is nil
|
502
|
+
end
|
503
|
+
|
504
|
+
@matches << Import::Match.new( num: num,
|
505
|
+
date: date_str,
|
506
|
+
time: time_str,
|
391
507
|
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
392
508
|
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
393
509
|
score: score,
|
394
510
|
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
395
511
|
group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
396
|
-
status: status
|
512
|
+
status: status,
|
513
|
+
ground: ground )
|
397
514
|
### todo: cache team lookups in hash?
|
398
515
|
|
399
516
|
=begin
|