sportdb-formats 1.1.6 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +53 -9
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +130 -13
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module SportDb
|
4
3
|
|
@@ -21,13 +20,44 @@ class MatchParser ## simple match parser for team match schedules
|
|
21
20
|
# for convenience split string into lines
|
22
21
|
## note: removes/strips empty lines
|
23
22
|
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
-
|
25
|
-
|
23
|
+
|
24
|
+
## note - wrap in enumerator/iterator a.k.a lines reader
|
25
|
+
@lines = LinesReader.new( lines.is_a?( String ) ?
|
26
|
+
read_lines( lines ) :
|
27
|
+
lines
|
28
|
+
)
|
29
|
+
|
26
30
|
@mapper_teams = TeamMapper.new( teams )
|
27
31
|
@start = start
|
28
32
|
end
|
29
33
|
|
30
34
|
|
35
|
+
|
36
|
+
|
37
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
38
|
+
## make sure mon feb 12 18:10 will not match
|
39
|
+
## allow 1. FC Köln etc.
|
40
|
+
## Mainz 05:
|
41
|
+
## limit to 30 chars max
|
42
|
+
## only allow chars incl. intl buut (NOT ()[]/;)
|
43
|
+
##
|
44
|
+
## Group A:
|
45
|
+
## Group B: - remove colon
|
46
|
+
## or lookup first
|
47
|
+
|
48
|
+
ATTRIB_REGEX = /^
|
49
|
+
[ ]*? # slurp leading spaces
|
50
|
+
(?<key>[^:|\]\[()\/; -]
|
51
|
+
[^:|\]\[()\/;]{0,30}
|
52
|
+
)
|
53
|
+
[ ]*? # slurp trailing spaces
|
54
|
+
:[ ]+
|
55
|
+
(?<value>.+)
|
56
|
+
[ ]*? # slurp trailing spaces
|
57
|
+
$
|
58
|
+
/ix
|
59
|
+
|
60
|
+
|
31
61
|
def parse
|
32
62
|
@last_date = nil
|
33
63
|
@last_round = nil
|
@@ -39,11 +69,10 @@ class MatchParser ## simple match parser for team match schedules
|
|
39
69
|
|
40
70
|
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
41
71
|
|
42
|
-
|
72
|
+
## todo/fix - use @lines.rewind first here - why? why not?
|
43
73
|
@lines.each do |line|
|
44
|
-
|
45
|
-
|
46
|
-
elsif is_round_def?( line )
|
74
|
+
|
75
|
+
if is_round_def?( line )
|
47
76
|
## todo/fix: add round definition (w begin n end date)
|
48
77
|
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
49
78
|
## -- how to deal with matches that get rescheduled/postponed?
|
@@ -56,6 +85,32 @@ class MatchParser ## simple match parser for team match schedules
|
|
56
85
|
elsif is_group?( line )
|
57
86
|
## -- lets you set group e.g. Group A etc.
|
58
87
|
parse_group_header( line )
|
88
|
+
|
89
|
+
elsif m=ATTRIB_REGEX.match( line )
|
90
|
+
## note: check attrib regex AFTER group def e.g.:
|
91
|
+
## Group A:
|
92
|
+
## Group B: etc.
|
93
|
+
## todo/fix - change Group A: to Group A etc.
|
94
|
+
## Group B: to Group B
|
95
|
+
|
96
|
+
## check if line ends with dot
|
97
|
+
## if not slurp up lines to the next do!!!
|
98
|
+
logger.debug "skipping key/value line - >#{line}<"
|
99
|
+
while !line.end_with?( '.' ) || line.nil? do
|
100
|
+
line = @lines.next
|
101
|
+
logger.debug "skipping key/value line (cont.) - >#{line}<"
|
102
|
+
end
|
103
|
+
elsif is_goals?( line )
|
104
|
+
## note - goals must be AFTER attributes!!!
|
105
|
+
logger.debug "matched goals line: >#{line}<"
|
106
|
+
logger.debug " try parse:"
|
107
|
+
|
108
|
+
goals = GoalsFinder.new.find!( line )
|
109
|
+
pp goals
|
110
|
+
## quick & dirty - auto add goals to last match
|
111
|
+
match = @matches[-1]
|
112
|
+
match.goals = goals
|
113
|
+
|
59
114
|
elsif try_parse_game( line )
|
60
115
|
# do nothing here
|
61
116
|
elsif try_parse_date_header( line )
|
@@ -307,6 +362,27 @@ class MatchParser ## simple match parser for team match schedules
|
|
307
362
|
end
|
308
363
|
|
309
364
|
|
365
|
+
### todo/check - include (optional) leading space in regex - why? why not?
|
366
|
+
NUM_RE = /^[ ]*\(
|
367
|
+
(?<num>[0-9]{1,3})
|
368
|
+
\)
|
369
|
+
/x
|
370
|
+
|
371
|
+
def find_num!( line )
|
372
|
+
## check for leading match number e.g.
|
373
|
+
## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
|
374
|
+
m = line.match( NUM_RE )
|
375
|
+
if m
|
376
|
+
num = m[:num].to_i(10) ## allows 01/02/07 etc. -- why? why not?
|
377
|
+
match_str = m[0]
|
378
|
+
line.sub!( match_str, '[NUM]' )
|
379
|
+
num
|
380
|
+
else
|
381
|
+
nil
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
|
310
386
|
def try_parse_game( line )
|
311
387
|
# note: clone line; for possible test do NOT modify in place for now
|
312
388
|
# note: returns true if parsed, false if no match
|
@@ -317,11 +393,27 @@ class MatchParser ## simple match parser for team match schedules
|
|
317
393
|
def parse_game( line )
|
318
394
|
logger.debug "parsing game (fixture) line: >#{line}<"
|
319
395
|
|
320
|
-
## split by geo (@)
|
396
|
+
## split by geo (@)
|
321
397
|
## split into parts e.g. break using @ !!!
|
322
398
|
values = line.split( '@' )
|
323
|
-
line = values[0]
|
324
399
|
|
400
|
+
## for now pass along ground, city (timezone) as string as is
|
401
|
+
## parse (map) later - why? why not??
|
402
|
+
### check for ground/stadium and cities
|
403
|
+
ground = if values.size == 1
|
404
|
+
nil ## no stadium
|
405
|
+
elsif values.size == 2 # bingo!!!
|
406
|
+
## process stadium, city (timezone) etc.
|
407
|
+
## for now keep it simple - pass along "unparsed" all-in-one
|
408
|
+
values[1].gsub( /[ \t]+/, ' ').strip ## squish
|
409
|
+
else
|
410
|
+
puts "!! ERROR - too many @-markers found in line:"
|
411
|
+
puts line
|
412
|
+
exit 1
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
line = values[0]
|
325
417
|
|
326
418
|
@mapper_teams.map_teams!( line ) ### todo/fix: limit mapping to two(2) teams - why? why not? might avoid matching @ Barcelona ??
|
327
419
|
teams = @mapper_teams.find_teams!( line )
|
@@ -334,13 +426,23 @@ class MatchParser ## simple match parser for team match schedules
|
|
334
426
|
return false
|
335
427
|
end
|
336
428
|
|
429
|
+
|
430
|
+
## try optional match number e.g.
|
431
|
+
## (1) Fri Jun/14 21:00 Germany 5-1 (3-0) Scotland
|
432
|
+
num = find_num!( line )
|
433
|
+
## pos = find_game_pos!( line )
|
434
|
+
|
337
435
|
## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
|
338
436
|
## or [cancelled] or [postponed] etc.
|
339
437
|
status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
|
340
438
|
|
341
|
-
## pos = find_game_pos!( line )
|
342
439
|
|
343
|
-
date = find_date!( line, start: @start )
|
440
|
+
date = find_date!( line, start: @start ) ## date or datetime (but NOT time!)
|
441
|
+
|
442
|
+
## todo/fix:
|
443
|
+
## add support for find_time! e.g. 21.00 (or 21:00 ?)
|
444
|
+
|
445
|
+
|
344
446
|
|
345
447
|
###
|
346
448
|
# check if date found?
|
@@ -387,13 +489,28 @@ class MatchParser ## simple match parser for team match schedules
|
|
387
489
|
|
388
490
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
389
491
|
|
390
|
-
|
492
|
+
|
493
|
+
## split date in date & time if DateTime
|
494
|
+
time_str = nil
|
495
|
+
date_str = nil
|
496
|
+
if date.is_a?( DateTime )
|
497
|
+
date_str = date.strftime('%Y-%m-%d')
|
498
|
+
time_str = date.strftime('%H:%M')
|
499
|
+
elsif date.is_a?( Date )
|
500
|
+
date_str = date.strftime('%Y-%m-%d')
|
501
|
+
else # assume date is nil
|
502
|
+
end
|
503
|
+
|
504
|
+
@matches << Import::Match.new( num: num,
|
505
|
+
date: date_str,
|
506
|
+
time: time_str,
|
391
507
|
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
392
508
|
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
393
509
|
score: score,
|
394
510
|
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
395
511
|
group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
396
|
-
status: status
|
512
|
+
status: status,
|
513
|
+
ground: ground )
|
397
514
|
### todo: cache team lookups in hash?
|
398
515
|
|
399
516
|
=begin
|