sportdb-formats 2.0.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -20
- data/Rakefile +2 -7
- data/bin/fbchk +173 -0
- data/lib/sportdb/formats/quick_match_linter.rb +201 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/lib/sportdb/formats.rb +10 -269
- metadata +11 -85
- data/bin/fbx +0 -146
- data/lib/sportdb/formats/country/country_reader.rb +0 -142
- data/lib/sportdb/formats/csv/goal.rb +0 -192
- data/lib/sportdb/formats/csv/goal_parser_csv.rb +0 -28
- data/lib/sportdb/formats/csv/match_parser_csv.rb +0 -490
- data/lib/sportdb/formats/csv/match_status_parser.rb +0 -90
- data/lib/sportdb/formats/datafile.rb +0 -59
- data/lib/sportdb/formats/event/event_reader.rb +0 -119
- data/lib/sportdb/formats/ground/ground_reader.rb +0 -289
- data/lib/sportdb/formats/league/league_outline_reader.rb +0 -176
- data/lib/sportdb/formats/league/league_reader.rb +0 -152
- data/lib/sportdb/formats/match/conf_parser.rb +0 -132
- data/lib/sportdb/formats/match/match_parser.rb +0 -735
- data/lib/sportdb/formats/search/sport.rb +0 -372
- data/lib/sportdb/formats/search/structs.rb +0 -116
- data/lib/sportdb/formats/search/world.rb +0 -157
- data/lib/sportdb/formats/team/club_reader.rb +0 -318
- data/lib/sportdb/formats/team/club_reader_history.rb +0 -203
- data/lib/sportdb/formats/team/club_reader_props.rb +0 -90
- data/lib/sportdb/formats/team/wiki_reader.rb +0 -108
@@ -1,152 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class LeagueReader
|
7
|
-
|
8
|
-
def world() Import.world; end
|
9
|
-
|
10
|
-
|
11
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
-
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
13
|
-
parse( txt )
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.parse( txt )
|
17
|
-
new( txt ).parse
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
include Logging
|
23
|
-
|
24
|
-
def initialize( txt )
|
25
|
-
@txt = txt
|
26
|
-
end
|
27
|
-
|
28
|
-
def parse
|
29
|
-
recs = []
|
30
|
-
last_rec = nil
|
31
|
-
|
32
|
-
country = nil # last country
|
33
|
-
intl = false # is international (league/tournament/cup/competition)
|
34
|
-
clubs = true # or clubs|national teams
|
35
|
-
|
36
|
-
OutlineReader.parse( @txt ).each do |node|
|
37
|
-
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
38
|
-
heading_level = node[0][1].to_i
|
39
|
-
heading = node[1]
|
40
|
-
|
41
|
-
logger.debug "heading #{heading_level} >#{heading}<"
|
42
|
-
|
43
|
-
if heading_level != 1
|
44
|
-
puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
|
45
|
-
pp line
|
46
|
-
exit 1
|
47
|
-
else
|
48
|
-
logger.debug "heading (#{heading_level}) >#{heading}<"
|
49
|
-
last_heading = heading
|
50
|
-
## map to country or international / int'l or national teams
|
51
|
-
if heading =~ /national team/i ## national team tournament
|
52
|
-
country = nil
|
53
|
-
intl = true
|
54
|
-
clubs = false
|
55
|
-
elsif heading =~ /international|int'l/i ## int'l club tournament
|
56
|
-
country = nil
|
57
|
-
intl = true
|
58
|
-
clubs = true
|
59
|
-
else
|
60
|
-
## assume country in heading; allow all "formats" supported by parse e.g.
|
61
|
-
## Österreich • Austria (at)
|
62
|
-
## Österreich • Austria
|
63
|
-
## Austria
|
64
|
-
## Deutschland (de) • Germany
|
65
|
-
country = world.countries.parse( heading )
|
66
|
-
intl = false
|
67
|
-
clubs = true
|
68
|
-
|
69
|
-
## check country code - MUST exist for now!!!!
|
70
|
-
if country.nil?
|
71
|
-
puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
72
|
-
exit 1
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
elsif node[0] == :p ## paragraph with (text) lines
|
77
|
-
lines = node[1]
|
78
|
-
lines.each do |line|
|
79
|
-
|
80
|
-
if line.start_with?( '|' )
|
81
|
-
## assume continuation with line of alternative names
|
82
|
-
## note: skip leading pipe
|
83
|
-
values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
|
84
|
-
values = values.map {|value| _norm(value) } ## squish/strip etc.
|
85
|
-
|
86
|
-
logger.debug "alt_names: #{values.join( '|' )}"
|
87
|
-
|
88
|
-
last_rec.alt_names += values
|
89
|
-
else
|
90
|
-
## assume "regular" line
|
91
|
-
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
92
|
-
if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
|
93
|
-
league_key = $1
|
94
|
-
## 1) strip (commercial) sponsor markers/tags e.g $$
|
95
|
-
## 2) strip and squish (white)spaces
|
96
|
-
league_name = _norm( $2 )
|
97
|
-
|
98
|
-
logger.debug "key: >#{league_key}<, name: >#{league_name}<"
|
99
|
-
|
100
|
-
|
101
|
-
## prepend country key/code if country present
|
102
|
-
## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
|
103
|
-
## why? lets you "overwrite" key if desired - use it - why? why not?
|
104
|
-
if country
|
105
|
-
league_key = "#{country.key}.#{league_key}"
|
106
|
-
end
|
107
|
-
|
108
|
-
rec = League.new( key: league_key,
|
109
|
-
name: league_name,
|
110
|
-
country: country,
|
111
|
-
intl: intl,
|
112
|
-
clubs: clubs)
|
113
|
-
recs << rec
|
114
|
-
last_rec = rec
|
115
|
-
else
|
116
|
-
puts "** !!! ERROR !!! missing key for (canonical) league name"
|
117
|
-
exit 1
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end # each line
|
121
|
-
else
|
122
|
-
puts "** !!! ERROR !!! [league reader] - unknown line type:"
|
123
|
-
pp node
|
124
|
-
exit 1
|
125
|
-
end
|
126
|
-
## pp line
|
127
|
-
end
|
128
|
-
recs
|
129
|
-
end # method parse
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
#######################
|
134
|
-
### helpers
|
135
|
-
|
136
|
-
## norm(alize) helper - squish (spaces)
|
137
|
-
## and remove dollars ($$$)
|
138
|
-
## and remove leading and trailing spaces
|
139
|
-
def _norm( str )
|
140
|
-
## only extra clean-up of dollars for now ($$$)
|
141
|
-
_squish( str.gsub( '$', '' ) )
|
142
|
-
end
|
143
|
-
|
144
|
-
def _squish( str )
|
145
|
-
str.gsub( /[ \t\u00a0]+/, ' ' ).strip
|
146
|
-
end
|
147
|
-
|
148
|
-
|
149
|
-
end # class LeagueReader
|
150
|
-
|
151
|
-
end ## module Import
|
152
|
-
end ## module SportDb
|
@@ -1,132 +0,0 @@
|
|
1
|
-
module SportDb
|
2
|
-
|
3
|
-
class ConfParser
|
4
|
-
|
5
|
-
def self.parse( lines )
|
6
|
-
parser = new( lines )
|
7
|
-
parser.parse
|
8
|
-
end
|
9
|
-
|
10
|
-
include Logging ## e.g. logger#debug, logger#info, etc.
|
11
|
-
|
12
|
-
def _read_lines( txt ) ## todo/check: add alias preproc_lines or build_lines or prep_lines etc. - why? why not?
|
13
|
-
## returns an array of lines with comments and empty lines striped / removed
|
14
|
-
lines = []
|
15
|
-
txt.each_line do |line| ## preprocess
|
16
|
-
line = line.strip
|
17
|
-
|
18
|
-
next if line.empty? || line.start_with?('#') ### skip empty lines and comments
|
19
|
-
line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
|
20
|
-
lines << line
|
21
|
-
end
|
22
|
-
lines
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
def initialize( lines )
|
27
|
-
# for convenience split string into lines
|
28
|
-
## note: removes/strips empty lines
|
29
|
-
## todo/check: change to text instead of array of lines - why? why not?
|
30
|
-
@lines = lines.is_a?( String ) ? _read_lines( lines ) : lines
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
COUNTRY_RE = %r{ [<>‹›,]
|
36
|
-
[ ]*
|
37
|
-
(?<country>[A-Z]{2,4}) ## todo/check: allow one-letter (motor vehicle plates) or 5 letter possible?
|
38
|
-
\b}xi
|
39
|
-
|
40
|
-
|
41
|
-
## standings table row regex matcher e.g.
|
42
|
-
## 1 Manchester City 38 32 4 2 106-27 100
|
43
|
-
## or 1. Manchester City 38 32 4 2 106:27 100
|
44
|
-
TABLE_RE = %r{ ^
|
45
|
-
(?:
|
46
|
-
(?<rank>\d+)\.?
|
47
|
-
|
|
48
|
-
[-]
|
49
|
-
)
|
50
|
-
[ ]+
|
51
|
-
(?<team>.+?) ## note: let's use non-greedy (MINIMUM length) match for now
|
52
|
-
[ ]+
|
53
|
-
(?<pld>\d+) ## (pl)aye(d)
|
54
|
-
[ ]+
|
55
|
-
(?<w>\d+) ## (w)ins
|
56
|
-
[ ]+
|
57
|
-
(?<d>\d+) ## (d)raws
|
58
|
-
[ ]+
|
59
|
-
(?<l>\d+) ## (l)ost
|
60
|
-
[ ]+
|
61
|
-
(?<gf>\d+) ## (g)oal (f)or
|
62
|
-
[ ]*
|
63
|
-
[:-] ## note: allow 10-10 or 10:10 or 10 - 10 or 10 : 10 etc.
|
64
|
-
[ ]*
|
65
|
-
(?<ga>\d+) ## (g)oal (a)gainst
|
66
|
-
(?: ## allow optional (g)oal (d)ifference
|
67
|
-
[ ]+
|
68
|
-
(?<gd>[±+-]?\d+) ## (g)oal (d)ifference
|
69
|
-
)?
|
70
|
-
[ ]+
|
71
|
-
(?<pts>\d+) ## (p)oin(ts)
|
72
|
-
(?: ## allow optional deductions e.g. [-7]
|
73
|
-
[ ]+
|
74
|
-
\[(?<deduction>-\d+)\]
|
75
|
-
)?
|
76
|
-
$}x
|
77
|
-
|
78
|
-
def parse
|
79
|
-
teams = {} ## convert lines to teams
|
80
|
-
|
81
|
-
@lines.each do |line|
|
82
|
-
next if line =~ /^[ -]+$/ ## skip decorative lines with dash only (e.g. ---- or - - - -) etc.
|
83
|
-
|
84
|
-
|
85
|
-
## quick hack - check for/extract (optional) county code (for teams) first
|
86
|
-
## allow as separators <>‹›, NOTE: includes (,) comma for now too
|
87
|
-
m = nil
|
88
|
-
country = nil
|
89
|
-
if m=COUNTRY_RE.match( line )
|
90
|
-
country = m[:country]
|
91
|
-
line = line.sub( m[0], '' ) ## replace match with nothing for now
|
92
|
-
end
|
93
|
-
|
94
|
-
if m=TABLE_RE.match( line )
|
95
|
-
puts " matching table entry >#{line}<"
|
96
|
-
|
97
|
-
name = m[:team]
|
98
|
-
rank = m[:rank] ? Integer(m[:rank]) : nil
|
99
|
-
|
100
|
-
standing = {
|
101
|
-
pld: Integer(m[:pld]),
|
102
|
-
w: Integer(m[:w]),
|
103
|
-
d: Integer(m[:d]),
|
104
|
-
l: Integer(m[:l]),
|
105
|
-
gf: Integer(m[:gf]),
|
106
|
-
ga: Integer(m[:ga]),
|
107
|
-
}
|
108
|
-
standing[ :gd ] = Integer(m[:gd].gsub(/[±+]/,'')) if m[:gd]
|
109
|
-
standing[ :pts ] = Integer(m[:pts])
|
110
|
-
standing[ :deduction ] = Integer(m[:deduction]) if m[:deduction]
|
111
|
-
|
112
|
-
|
113
|
-
## todo/fix: track double usage - why? why not? report/raise error/exception on duplicates?
|
114
|
-
team = teams[ name ] ||= { }
|
115
|
-
team[ :country ] = country if country
|
116
|
-
|
117
|
-
team[ :rank ] = rank if rank
|
118
|
-
team[ :standing ] = standing if standing
|
119
|
-
else
|
120
|
-
## assume team is full line
|
121
|
-
name = line.strip # note: strip leading and trailing spaces
|
122
|
-
|
123
|
-
team = teams[ name ] ||= { }
|
124
|
-
team[ :country ] = country if country
|
125
|
-
end
|
126
|
-
end
|
127
|
-
|
128
|
-
teams
|
129
|
-
end # method parse
|
130
|
-
|
131
|
-
end # class ConfParser
|
132
|
-
end # module SportDb
|