sportdb-formats 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -19
- data/Rakefile +2 -7
- data/bin/fbchk +166 -0
- data/lib/sportdb/formats/quick_match_linter.rb +195 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/lib/sportdb/formats.rb +10 -269
- metadata +10 -82
- data/lib/sportdb/formats/country/country_reader.rb +0 -142
- data/lib/sportdb/formats/csv/goal.rb +0 -192
- data/lib/sportdb/formats/csv/goal_parser_csv.rb +0 -28
- data/lib/sportdb/formats/csv/match_parser_csv.rb +0 -490
- data/lib/sportdb/formats/csv/match_status_parser.rb +0 -90
- data/lib/sportdb/formats/datafile.rb +0 -59
- data/lib/sportdb/formats/event/event_reader.rb +0 -119
- data/lib/sportdb/formats/ground/ground_reader.rb +0 -289
- data/lib/sportdb/formats/league/league_outline_reader.rb +0 -176
- data/lib/sportdb/formats/league/league_reader.rb +0 -152
- data/lib/sportdb/formats/match/conf_parser.rb +0 -132
- data/lib/sportdb/formats/match/match_parser.rb +0 -733
- data/lib/sportdb/formats/search/sport.rb +0 -372
- data/lib/sportdb/formats/search/structs.rb +0 -116
- data/lib/sportdb/formats/search/world.rb +0 -157
- data/lib/sportdb/formats/team/club_reader.rb +0 -318
- data/lib/sportdb/formats/team/club_reader_history.rb +0 -203
- data/lib/sportdb/formats/team/club_reader_props.rb +0 -90
- data/lib/sportdb/formats/team/wiki_reader.rb +0 -108
@@ -1,176 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
|
6
|
-
## shared "higher-level" outline reader
|
7
|
-
## todo: add CountryOutlineReader - why? why not?
|
8
|
-
|
9
|
-
class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeaguePageOutlineReader - why? why not?
|
10
|
-
|
11
|
-
def self.read( path, season: nil ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
13
|
-
parse( txt, season: season )
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.parse( txt, season: nil )
|
17
|
-
new( txt ).parse( season: season )
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
def initialize( txt )
|
22
|
-
@txt = txt
|
23
|
-
end
|
24
|
-
|
25
|
-
def parse( season: nil )
|
26
|
-
secs=[] # sec(tion)s
|
27
|
-
OutlineReader.parse( @txt ).each do |node|
|
28
|
-
if node[0] == :h1
|
29
|
-
## check for league (and stage) and season
|
30
|
-
heading = node[1]
|
31
|
-
values = split_league( heading )
|
32
|
-
if m=values[0].match( LEAGUE_SEASON_HEADING_RE )
|
33
|
-
puts "league >#{m[:league]}<, season >#{m[:season]}<"
|
34
|
-
|
35
|
-
secs << { league: m[:league],
|
36
|
-
season: m[:season],
|
37
|
-
stage: values[1], ## note: defaults to nil if not present
|
38
|
-
lines: []
|
39
|
-
}
|
40
|
-
else
|
41
|
-
puts "** !!! ERROR - cannot match league and season in heading; season missing?"
|
42
|
-
pp heading
|
43
|
-
exit 1
|
44
|
-
end
|
45
|
-
elsif node[0] == :p ## paragraph with (text) lines
|
46
|
-
lines = node[1]
|
47
|
-
## note: skip lines if no heading seen
|
48
|
-
if secs.empty?
|
49
|
-
puts "** !!! WARN - skipping lines (no heading):"
|
50
|
-
pp lines
|
51
|
-
else
|
52
|
-
## todo/check: unroll paragraphs into lines or pass along paragraphs - why? why not?
|
53
|
-
secs[-1][:lines] += lines
|
54
|
-
end
|
55
|
-
else
|
56
|
-
puts "** !!! ERROR - unknown line type; for now only heading 1 for leagues supported; sorry:"
|
57
|
-
pp node
|
58
|
-
exit 1
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
## pass 2 - filter seasons if filter present
|
64
|
-
if season
|
65
|
-
filtered_secs = []
|
66
|
-
filter = norm_seasons( season )
|
67
|
-
secs.each do |sec|
|
68
|
-
if filter.include?( Season.parse( sec[:season] ).key )
|
69
|
-
filtered_secs << sec
|
70
|
-
else
|
71
|
-
puts " skipping season >#{sec[:season]}< NOT matched by filter"
|
72
|
-
end
|
73
|
-
end
|
74
|
-
secs = filtered_secs
|
75
|
-
end
|
76
|
-
|
77
|
-
## pass 3 - check & map; replace inline (string with data struct record)
|
78
|
-
secs.each do |sec|
|
79
|
-
sec[:season] = Season.parse( sec[:season ] )
|
80
|
-
sec[:league] = catalog.leagues.find!( sec[:league] )
|
81
|
-
|
82
|
-
check_stage( sec[:stage] ) if sec[:stage] ## note: only check for now (no remapping etc.)
|
83
|
-
end
|
84
|
-
|
85
|
-
secs
|
86
|
-
end # method parse
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
def catalog() Import.catalog; end ## shortcut convenience helper
|
91
|
-
|
92
|
-
## split into league + season
|
93
|
-
## e.g. Österr. Bundesliga 2015/16 ## or 2015-16
|
94
|
-
## World Cup 2018
|
95
|
-
LEAGUE_SEASON_HEADING_RE = %r{^
|
96
|
-
(?<league>.+?) ## non-greedy
|
97
|
-
\s+
|
98
|
-
(?<season>\d{4}
|
99
|
-
(?:[\/-]\d{1,4})? ## optional 2nd year in season
|
100
|
-
)
|
101
|
-
$}x
|
102
|
-
|
103
|
-
|
104
|
-
def norm_seasons( season_or_seasons ) ## todo/check: add alias norm_seasons - why? why not?
|
105
|
-
|
106
|
-
seasons = if season_or_seasons.is_a?( Array ) # is it an array already
|
107
|
-
season_or_seasons
|
108
|
-
elsif season_or_seasons.is_a?( Range ) # e.g. Season(1999)..Season(2001) or such
|
109
|
-
season_or_seasons.to_a
|
110
|
-
else ## assume - single entry - wrap in array
|
111
|
-
[season_or_seasons]
|
112
|
-
end
|
113
|
-
|
114
|
-
seasons.map { |season| Season( season ).key }
|
115
|
-
end
|
116
|
-
|
117
|
-
|
118
|
-
def split_league( str ) ## todo/check: rename to parse_league(s) - why? why not?
|
119
|
-
## split into league / stage / ... e.g.
|
120
|
-
## => Österr. Bundesliga 2018/19, Regular Season
|
121
|
-
## => Österr. Bundesliga 2018/19, Championship Round
|
122
|
-
## etc.
|
123
|
-
values = str.split( /[,<>‹›]/ ) ## note: allow , > < or › ‹ for now
|
124
|
-
values = values.map { |value| value.strip } ## remove all whitespaces
|
125
|
-
values
|
126
|
-
end
|
127
|
-
|
128
|
-
|
129
|
-
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
130
|
-
KNOWN_STAGES = [
|
131
|
-
'Regular Season',
|
132
|
-
'Regular Stage',
|
133
|
-
'Championship Round',
|
134
|
-
'Championship Playoff', # or Championship play-off
|
135
|
-
'Relegation Round',
|
136
|
-
'Relegation Playoff',
|
137
|
-
'Play-offs',
|
138
|
-
'Playoff Stage',
|
139
|
-
'Grunddurchgang',
|
140
|
-
'Finaldurchgang - Qualifikationsgruppe',
|
141
|
-
'Finaldurchgang - Qualifikation',
|
142
|
-
'Finaldurchgang - Meistergruppe',
|
143
|
-
'Finaldurchgang - Meister',
|
144
|
-
'EL Play-off',
|
145
|
-
'Europa League Play-off',
|
146
|
-
'Europa-League-Play-offs',
|
147
|
-
'Europa League Finals',
|
148
|
-
'Playoffs - Championship',
|
149
|
-
'Playoffs - Europa League',
|
150
|
-
'Playoffs - Europa League - Finals',
|
151
|
-
'Playoffs - Relegation',
|
152
|
-
'Playoffs - Challenger',
|
153
|
-
'Finals',
|
154
|
-
'Match 6th Place', # e.g. Super League Greece 2012/13
|
155
|
-
|
156
|
-
'Apertura',
|
157
|
-
'Apertura - Liguilla',
|
158
|
-
'Clausura',
|
159
|
-
'Clausura - Liguilla',
|
160
|
-
|
161
|
-
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
162
|
-
|
163
|
-
|
164
|
-
def check_stage( name )
|
165
|
-
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
166
|
-
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
167
|
-
## everything ok
|
168
|
-
else
|
169
|
-
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
170
|
-
exit 1
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
end # class LeagueOutlineReader
|
175
|
-
|
176
|
-
end # module SportDb
|
@@ -1,152 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class LeagueReader
|
7
|
-
|
8
|
-
def world() Import.world; end
|
9
|
-
|
10
|
-
|
11
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
-
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
13
|
-
parse( txt )
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.parse( txt )
|
17
|
-
new( txt ).parse
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
include Logging
|
23
|
-
|
24
|
-
def initialize( txt )
|
25
|
-
@txt = txt
|
26
|
-
end
|
27
|
-
|
28
|
-
def parse
|
29
|
-
recs = []
|
30
|
-
last_rec = nil
|
31
|
-
|
32
|
-
country = nil # last country
|
33
|
-
intl = false # is international (league/tournament/cup/competition)
|
34
|
-
clubs = true # or clubs|national teams
|
35
|
-
|
36
|
-
OutlineReader.parse( @txt ).each do |node|
|
37
|
-
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
38
|
-
heading_level = node[0][1].to_i
|
39
|
-
heading = node[1]
|
40
|
-
|
41
|
-
logger.debug "heading #{heading_level} >#{heading}<"
|
42
|
-
|
43
|
-
if heading_level != 1
|
44
|
-
puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
|
45
|
-
pp line
|
46
|
-
exit 1
|
47
|
-
else
|
48
|
-
logger.debug "heading (#{heading_level}) >#{heading}<"
|
49
|
-
last_heading = heading
|
50
|
-
## map to country or international / int'l or national teams
|
51
|
-
if heading =~ /national team/i ## national team tournament
|
52
|
-
country = nil
|
53
|
-
intl = true
|
54
|
-
clubs = false
|
55
|
-
elsif heading =~ /international|int'l/i ## int'l club tournament
|
56
|
-
country = nil
|
57
|
-
intl = true
|
58
|
-
clubs = true
|
59
|
-
else
|
60
|
-
## assume country in heading; allow all "formats" supported by parse e.g.
|
61
|
-
## Österreich • Austria (at)
|
62
|
-
## Österreich • Austria
|
63
|
-
## Austria
|
64
|
-
## Deutschland (de) • Germany
|
65
|
-
country = world.countries.parse( heading )
|
66
|
-
intl = false
|
67
|
-
clubs = true
|
68
|
-
|
69
|
-
## check country code - MUST exist for now!!!!
|
70
|
-
if country.nil?
|
71
|
-
puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
72
|
-
exit 1
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
elsif node[0] == :p ## paragraph with (text) lines
|
77
|
-
lines = node[1]
|
78
|
-
lines.each do |line|
|
79
|
-
|
80
|
-
if line.start_with?( '|' )
|
81
|
-
## assume continuation with line of alternative names
|
82
|
-
## note: skip leading pipe
|
83
|
-
values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
|
84
|
-
values = values.map {|value| _norm(value) } ## squish/strip etc.
|
85
|
-
|
86
|
-
logger.debug "alt_names: #{values.join( '|' )}"
|
87
|
-
|
88
|
-
last_rec.alt_names += values
|
89
|
-
else
|
90
|
-
## assume "regular" line
|
91
|
-
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
92
|
-
if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
|
93
|
-
league_key = $1
|
94
|
-
## 1) strip (commercial) sponsor markers/tags e.g $$
|
95
|
-
## 2) strip and squish (white)spaces
|
96
|
-
league_name = _norm( $2 )
|
97
|
-
|
98
|
-
logger.debug "key: >#{league_key}<, name: >#{league_name}<"
|
99
|
-
|
100
|
-
|
101
|
-
## prepend country key/code if country present
|
102
|
-
## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
|
103
|
-
## why? lets you "overwrite" key if desired - use it - why? why not?
|
104
|
-
if country
|
105
|
-
league_key = "#{country.key}.#{league_key}"
|
106
|
-
end
|
107
|
-
|
108
|
-
rec = League.new( key: league_key,
|
109
|
-
name: league_name,
|
110
|
-
country: country,
|
111
|
-
intl: intl,
|
112
|
-
clubs: clubs)
|
113
|
-
recs << rec
|
114
|
-
last_rec = rec
|
115
|
-
else
|
116
|
-
puts "** !!! ERROR !!! missing key for (canonical) league name"
|
117
|
-
exit 1
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end # each line
|
121
|
-
else
|
122
|
-
puts "** !!! ERROR !!! [league reader] - unknown line type:"
|
123
|
-
pp node
|
124
|
-
exit 1
|
125
|
-
end
|
126
|
-
## pp line
|
127
|
-
end
|
128
|
-
recs
|
129
|
-
end # method parse
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
#######################
|
134
|
-
### helpers
|
135
|
-
|
136
|
-
## norm(alize) helper - squish (spaces)
|
137
|
-
## and remove dollars ($$$)
|
138
|
-
## and remove leading and trailing spaces
|
139
|
-
def _norm( str )
|
140
|
-
## only extra clean-up of dollars for now ($$$)
|
141
|
-
_squish( str.gsub( '$', '' ) )
|
142
|
-
end
|
143
|
-
|
144
|
-
def _squish( str )
|
145
|
-
str.gsub( /[ \t\u00a0]+/, ' ' ).strip
|
146
|
-
end
|
147
|
-
|
148
|
-
|
149
|
-
end # class LeagueReader
|
150
|
-
|
151
|
-
end ## module Import
|
152
|
-
end ## module SportDb
|
@@ -1,132 +0,0 @@
|
|
1
|
-
module SportDb
|
2
|
-
|
3
|
-
class ConfParser
|
4
|
-
|
5
|
-
def self.parse( lines )
|
6
|
-
parser = new( lines )
|
7
|
-
parser.parse
|
8
|
-
end
|
9
|
-
|
10
|
-
include Logging ## e.g. logger#debug, logger#info, etc.
|
11
|
-
|
12
|
-
def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
|
13
|
-
## returns an array of lines with comments and empty lines striped / removed
|
14
|
-
lines = []
|
15
|
-
txt.each_line do |line| ## preprocess
|
16
|
-
line = line.strip
|
17
|
-
|
18
|
-
next if line.empty? || line.start_with?('#') ### skip empty lines and comments
|
19
|
-
line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
|
20
|
-
lines << line
|
21
|
-
end
|
22
|
-
lines
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
def initialize( lines )
|
27
|
-
# for convenience split string into lines
|
28
|
-
## note: removes/strips empty lines
|
29
|
-
## todo/check: change to text instead of array of lines - why? why not?
|
30
|
-
@lines = lines.is_a?( String ) ? _read_lines( lines ) : lines
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
COUNTRY_RE = %r{ [<>‹›,]
|
36
|
-
[ ]*
|
37
|
-
(?<country>[A-Z]{2,4}) ## todo/check: allow one-letter (motor vehicle plates) or 5 letter possible?
|
38
|
-
\b}xi
|
39
|
-
|
40
|
-
|
41
|
-
## standings table row regex matcher e.g.
|
42
|
-
## 1 Manchester City 38 32 4 2 106-27 100
|
43
|
-
## or 1. Manchester City 38 32 4 2 106:27 100
|
44
|
-
TABLE_RE = %r{ ^
|
45
|
-
(?:
|
46
|
-
(?<rank>\d+)\.?
|
47
|
-
|
|
48
|
-
[-]
|
49
|
-
)
|
50
|
-
[ ]+
|
51
|
-
(?<team>.+?) ## note: let's use non-greedy (MINIMUM length) match for now
|
52
|
-
[ ]+
|
53
|
-
(?<pld>\d+) ## (pl)aye(d)
|
54
|
-
[ ]+
|
55
|
-
(?<w>\d+) ## (w)ins
|
56
|
-
[ ]+
|
57
|
-
(?<d>\d+) ## (d)raws
|
58
|
-
[ ]+
|
59
|
-
(?<l>\d+) ## (l)ost
|
60
|
-
[ ]+
|
61
|
-
(?<gf>\d+) ## (g)oal (f)or
|
62
|
-
[ ]*
|
63
|
-
[:-] ## note: allow 10-10 or 10:10 or 10 - 10 or 10 : 10 etc.
|
64
|
-
[ ]*
|
65
|
-
(?<ga>\d+) ## (g)oal (a)gainst
|
66
|
-
(?: ## allow optional (g)oal (d)ifference
|
67
|
-
[ ]+
|
68
|
-
(?<gd>[±+-]?\d+) ## (g)oal (d)ifference
|
69
|
-
)?
|
70
|
-
[ ]+
|
71
|
-
(?<pts>\d+) ## (p)oin(ts)
|
72
|
-
(?: ## allow optional deductions e.g. [-7]
|
73
|
-
[ ]+
|
74
|
-
\[(?<deduction>-\d+)\]
|
75
|
-
)?
|
76
|
-
$}x
|
77
|
-
|
78
|
-
def parse
|
79
|
-
teams = {} ## convert lines to teams
|
80
|
-
|
81
|
-
@lines.each do |line|
|
82
|
-
next if line =~ /^[ -]+$/ ## skip decorative lines with dash only (e.g. ---- or - - - -) etc.
|
83
|
-
|
84
|
-
|
85
|
-
## quick hack - check for/extract (optional) county code (for teams) first
|
86
|
-
## allow as separators <>‹›, NOTE: includes (,) comma for now too
|
87
|
-
m = nil
|
88
|
-
country = nil
|
89
|
-
if m=COUNTRY_RE.match( line )
|
90
|
-
country = m[:country]
|
91
|
-
line = line.sub( m[0], '' ) ## replace match with nothing for now
|
92
|
-
end
|
93
|
-
|
94
|
-
if m=TABLE_RE.match( line )
|
95
|
-
puts " matching table entry >#{line}<"
|
96
|
-
|
97
|
-
name = m[:team]
|
98
|
-
rank = m[:rank] ? Integer(m[:rank]) : nil
|
99
|
-
|
100
|
-
standing = {
|
101
|
-
pld: Integer(m[:pld]),
|
102
|
-
w: Integer(m[:w]),
|
103
|
-
d: Integer(m[:d]),
|
104
|
-
l: Integer(m[:l]),
|
105
|
-
gf: Integer(m[:gf]),
|
106
|
-
ga: Integer(m[:ga]),
|
107
|
-
}
|
108
|
-
standing[ :gd ] = Integer(m[:gd].gsub(/[±+]/,'')) if m[:gd]
|
109
|
-
standing[ :pts ] = Integer(m[:pts])
|
110
|
-
standing[ :deduction ] = Integer(m[:deduction]) if m[:deduction]
|
111
|
-
|
112
|
-
|
113
|
-
## todo/fix: track double usage - why? why not? report/raise error/exception on duplicates?
|
114
|
-
team = teams[ name ] ||= { }
|
115
|
-
team[ :country ] = country if country
|
116
|
-
|
117
|
-
team[ :rank ] = rank if rank
|
118
|
-
team[ :standing ] = standing if standing
|
119
|
-
else
|
120
|
-
## assume team is full line
|
121
|
-
name = line.strip # note: strip leading and trailing spaces
|
122
|
-
|
123
|
-
team = teams[ name ] ||= { }
|
124
|
-
team[ :country ] = country if country
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
teams
|
129
|
-
end # method parse
|
130
|
-
|
131
|
-
end # class ConfParser
|
132
|
-
end # module SportDb
|