sportdb-readers 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -3
- data/NOTES.md +35 -0
- data/Rakefile +1 -3
- data/lib/sportdb/readers.rb +21 -60
- data/lib/sportdb/readers/conf_reader.rb +69 -57
- data/lib/sportdb/readers/match_reader.rb +109 -77
- data/lib/sportdb/readers/package.rb +22 -83
- data/lib/sportdb/readers/version.rb +2 -2
- data/test/helper.rb +11 -7
- data/test/test_conf_reader.rb +78 -0
- data/test/test_match_reader_champs.rb +487 -0
- data/test/test_match_reader_eng.rb +1 -1
- data/test/test_match_reader_euro.rb +156 -0
- data/test/test_match_reader_mu.rb +5 -5
- data/test/test_reader_champs.rb +187 -0
- metadata +10 -35
- data/lib/sportdb/readers/conf_linter.rb +0 -73
- data/lib/sportdb/readers/league_outline_reader.rb +0 -146
- data/lib/sportdb/readers/match_linter.rb +0 -30
@@ -1,73 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
|
6
|
-
class ConfLinter
|
7
|
-
|
8
|
-
def self.config() Import.config; end ## shortcut convenience helper
|
9
|
-
|
10
|
-
|
11
|
-
def self.read( path, season: nil ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
-
puts "reading conf(iguration) datafile >#{path}<..."
|
13
|
-
txt = File.open( path, 'r:utf-8' ).read
|
14
|
-
parse( txt, season: season )
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.parse( txt, season: nil )
|
18
|
-
recs = LeagueOutlineReader.parse( txt, season: season )
|
19
|
-
|
20
|
-
if recs.empty? ## todo: check for season filter - why? why not?
|
21
|
-
puts " ** !!! WARN !!! - no league headings found"
|
22
|
-
else
|
23
|
-
puts " found #{recs.size} league (+season+stage) headings"
|
24
|
-
recs.each do |rec|
|
25
|
-
## rec[:league] )
|
26
|
-
## rec[:season] )
|
27
|
-
## rec[:stage]
|
28
|
-
puts " league: >#{rec[:league]}<, season: >#{rec[:season]}<, stage: >#{rec[:stage]}<"
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
## pass 2 - check & map; replace inline (string with record)
|
33
|
-
recs.each do |rec|
|
34
|
-
league = rec[:league]
|
35
|
-
clubs = [] ## convert lines to clubs
|
36
|
-
rec[:lines].each do |line|
|
37
|
-
|
38
|
-
next if line =~ /^[ -]+$/ ## skip decorative lines with dash only (e.g. ---- or - - - -) etc.
|
39
|
-
|
40
|
-
scan = StringScanner.new( line )
|
41
|
-
|
42
|
-
if scan.check( /\d{1,2}[ ]+/ ) ## entry with standaning starts with ranking e.g. 1,2,3, etc.
|
43
|
-
puts " table entry >#{line}<"
|
44
|
-
rank = scan.scan( /\d{1,2}[ ]+/ ).strip # note: strip trailing spaces
|
45
|
-
|
46
|
-
## note: uses look ahead scan until we hit at least two spaces
|
47
|
-
## or the end of string (standing records for now optional)
|
48
|
-
name = scan.scan_until( /(?=\s{2})|$/ )
|
49
|
-
if scan.eos?
|
50
|
-
standing = nil
|
51
|
-
else
|
52
|
-
standing = scan.rest.strip # note: strip leading and trailing spaces
|
53
|
-
end
|
54
|
-
puts " rank: >#{rank}<, name: >#{name}<, standing: >#{standing}<"
|
55
|
-
|
56
|
-
## note: rank and standing gets ignored (not used) for now
|
57
|
-
else
|
58
|
-
## assume club is full line
|
59
|
-
name = line
|
60
|
-
end
|
61
|
-
|
62
|
-
clubs << config.clubs.find_by!( name: name, country: league.country )
|
63
|
-
end
|
64
|
-
|
65
|
-
rec[:clubs] = clubs
|
66
|
-
rec.delete( :lines ) ## remove lines entry
|
67
|
-
end
|
68
|
-
|
69
|
-
recs
|
70
|
-
end # method read
|
71
|
-
|
72
|
-
end # class ConfLinter
|
73
|
-
end # module SportDb
|
@@ -1,146 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
|
6
|
-
## shared "higher-level" outline reader
|
7
|
-
## todo: add CountryOutlineReader - why? why not?
|
8
|
-
|
9
|
-
class LeagueOutlineReader
|
10
|
-
|
11
|
-
def self.config() Import.config; end ## shortcut convenience helper
|
12
|
-
|
13
|
-
## split into league + season
|
14
|
-
## e.g. Österr. Bundesliga 2015/16 ## or 2015-16
|
15
|
-
## World Cup 2018
|
16
|
-
LEAGUE_SEASON_HEADING_REGEX = /^
|
17
|
-
(?<league>.+?) ## non-greedy
|
18
|
-
\s+
|
19
|
-
(?<season>\d{4}
|
20
|
-
(?:[\/-]\d{2})? ## optional 2nd year in season
|
21
|
-
)
|
22
|
-
$/x
|
23
|
-
|
24
|
-
|
25
|
-
def self.parse( txt, season: nil )
|
26
|
-
recs=[]
|
27
|
-
OutlineReader.parse( txt ).each do |node|
|
28
|
-
if node[0] == :h1
|
29
|
-
## check for league (and stage) and season
|
30
|
-
heading = node[1]
|
31
|
-
values = split_league( heading )
|
32
|
-
if m=values[0].match( LEAGUE_SEASON_HEADING_REGEX )
|
33
|
-
puts "league >#{m[:league]}<, season >#{m[:season]}<"
|
34
|
-
|
35
|
-
recs << { league: m[:league],
|
36
|
-
season: m[:season],
|
37
|
-
stage: values[1], ## note: defaults to nil if not present
|
38
|
-
lines: []
|
39
|
-
}
|
40
|
-
else
|
41
|
-
puts "** !!! ERROR !!! - CANNOT match league and season in heading; season missing?"
|
42
|
-
pp heading
|
43
|
-
exit 1
|
44
|
-
end
|
45
|
-
elsif node[0] == :l ## regular (text) line
|
46
|
-
line = node[1]
|
47
|
-
## note: skip lines if no heading seen
|
48
|
-
if recs.empty?
|
49
|
-
puts "** !! WARN !! - skipping line (no heading) >#{line}<"
|
50
|
-
else
|
51
|
-
recs[-1][:lines] << line
|
52
|
-
end
|
53
|
-
else
|
54
|
-
puts "** !!! ERROR !!! unknown line type; for now only heading 1 for leagues supported; sorry:"
|
55
|
-
pp node
|
56
|
-
exit 1
|
57
|
-
end
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
## pass 2 - filter seasons if filter present
|
62
|
-
if season
|
63
|
-
filtered_recs = []
|
64
|
-
filter = normalize_seasons( season )
|
65
|
-
recs.each do |rec|
|
66
|
-
if filter.include?( SeasonUtils.key( rec[:season] ))
|
67
|
-
filtered_recs << rec
|
68
|
-
else
|
69
|
-
puts " skipping season >#{rec[:season]}< NOT matched by filter"
|
70
|
-
end
|
71
|
-
end
|
72
|
-
recs = filtered_recs
|
73
|
-
end
|
74
|
-
|
75
|
-
## pass 3 - check & map; replace inline (string with data record)
|
76
|
-
recs.each do |rec|
|
77
|
-
league = find_league( rec[:league] )
|
78
|
-
rec[:league] = league
|
79
|
-
|
80
|
-
check_stage( rec[:stage] ) if rec[:stage] ## note: only check for now (no remapping etc.)
|
81
|
-
end
|
82
|
-
|
83
|
-
recs
|
84
|
-
end # method parse
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
def self.normalize_seasons( season_or_seasons ) ## todo/check: add alias norm_seasons - why? why not?
|
89
|
-
seasons = if season_or_seasons.is_a? String ## wrap in array
|
90
|
-
[season_or_seasons]
|
91
|
-
else ## assume it's an array already
|
92
|
-
season_or_seasons
|
93
|
-
end
|
94
|
-
|
95
|
-
seasons.map { |season| SeasonUtils.key( season ) }
|
96
|
-
end
|
97
|
-
|
98
|
-
|
99
|
-
def self.split_league( str ) ## todo/check: rename to parse_league(s) - why? why not?
|
100
|
-
## split into league / stage / ... e.g.
|
101
|
-
## => Österr. Bundesliga 2018/19, Regular Season
|
102
|
-
## => Österr. Bundesliga 2018/19, Championship Round
|
103
|
-
## etc.
|
104
|
-
values = str.split( /[,<>‹›]/ ) ## note: allow , > < or › ‹ for now
|
105
|
-
values = values.map { |value| value.strip } ## remove all whitespaces
|
106
|
-
values
|
107
|
-
end
|
108
|
-
|
109
|
-
def self.check_stage( name )
|
110
|
-
known_stages = ['regular season',
|
111
|
-
'championship round',
|
112
|
-
'relegation round',
|
113
|
-
'play-offs'
|
114
|
-
]
|
115
|
-
|
116
|
-
if known_stages.include?( name.downcase )
|
117
|
-
## everything ok
|
118
|
-
else
|
119
|
-
puts "** !!! ERROR !!! no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
120
|
-
exit 1
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
|
125
|
-
### fix/todo: move find_league to sportdb-league index use find_by! and find_by !!!!
|
126
|
-
def self.find_league( name )
|
127
|
-
league = nil
|
128
|
-
m = config.leagues.match( name )
|
129
|
-
# pp m
|
130
|
-
|
131
|
-
if m.nil?
|
132
|
-
puts "** !!! ERROR !!! no league match found for >#{name}<, add to leagues table; sorry"
|
133
|
-
exit 1
|
134
|
-
elsif m.size > 1
|
135
|
-
puts "** !!! ERROR !!! ambigious league name; too many leagues (#{m.size}) found:"
|
136
|
-
pp m
|
137
|
-
exit 1
|
138
|
-
else
|
139
|
-
league = m[0]
|
140
|
-
end
|
141
|
-
|
142
|
-
league
|
143
|
-
end
|
144
|
-
end # class LeagueOutlineReader
|
145
|
-
|
146
|
-
end # module SportDb
|
@@ -1,30 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
class MatchLinter
|
6
|
-
|
7
|
-
def self.read( path, season: nil ) ## use - rename to read_file or from_file etc. - why? why not?
|
8
|
-
puts "reading match datafile >#{path}<..."
|
9
|
-
txt = File.open( path, 'r:utf-8' ).read
|
10
|
-
parse( txt, season: season )
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.parse( txt, season: nil )
|
14
|
-
recs = LeagueOutlineReader.parse( txt, season: season )
|
15
|
-
|
16
|
-
if recs.empty? ## todo - check for filter - why? why not?
|
17
|
-
puts " ** !!! WARN !!! - no league headings found"
|
18
|
-
else
|
19
|
-
puts " found #{recs.size} league (+season+stage) headings"
|
20
|
-
recs.each do |rec|
|
21
|
-
## rec[:league] )
|
22
|
-
## rec[:season] )
|
23
|
-
## rec[:stage]
|
24
|
-
puts " league: >#{rec[:league]}<, season: >#{rec[:season]}<, stage: >#{rec[:stage]}<"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
recs
|
28
|
-
end # method read
|
29
|
-
end # class MatchLinter
|
30
|
-
end # module SportDb
|