sportdb-readers 0.5.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -3
- data/NOTES.md +35 -0
- data/README.md +86 -20
- data/Rakefile +1 -3
- data/lib/sportdb/readers.rb +21 -60
- data/lib/sportdb/readers/conf_reader.rb +71 -59
- data/lib/sportdb/readers/match_reader.rb +111 -80
- data/lib/sportdb/readers/package.rb +23 -83
- data/lib/sportdb/readers/version.rb +2 -2
- data/test/helper.rb +11 -7
- data/test/test_conf_reader.rb +78 -0
- data/test/test_match_reader_champs.rb +487 -0
- data/test/test_match_reader_eng.rb +3 -3
- data/test/test_match_reader_euro.rb +156 -0
- data/test/test_match_reader_mu.rb +6 -6
- data/test/test_reader.rb +2 -2
- data/test/test_reader_champs.rb +187 -0
- metadata +10 -35
- data/lib/sportdb/readers/conf_linter.rb +0 -73
- data/lib/sportdb/readers/league_outline_reader.rb +0 -146
- data/lib/sportdb/readers/match_linter.rb +0 -30
@@ -2,120 +2,151 @@
|
|
2
2
|
|
3
3
|
module SportDb
|
4
4
|
|
5
|
-
class
|
6
|
-
|
7
|
-
def self.config() Import.config; end
|
8
|
-
|
9
|
-
|
5
|
+
class MatchReader ## todo/check: rename to MatchReaderV2 (use plural?) why? why not?
|
10
6
|
|
11
7
|
def self.read( path, season: nil ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
-
txt = File.open( path, 'r:utf-8' ).read
|
8
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
13
9
|
parse( txt, season: season )
|
14
10
|
end
|
15
11
|
|
16
12
|
def self.parse( txt, season: nil )
|
17
|
-
|
18
|
-
|
13
|
+
new( txt ).parse( season: season )
|
14
|
+
end
|
15
|
+
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
include Logging
|
18
|
+
|
19
|
+
def initialize( txt )
|
20
|
+
@txt = txt
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse( season: nil )
|
24
|
+
secs = LeagueOutlineReader.parse( @txt, season: season )
|
25
|
+
pp secs
|
26
|
+
|
27
|
+
|
28
|
+
###
|
29
|
+
## todo/check/fix: move to LeagueOutlineReader for (re)use - why? why not?
|
30
|
+
## use sec[:lang] or something?
|
31
|
+
langs = { ## map country keys to lang codes
|
32
|
+
'de' => 'de', ## de - Deutsch (German)
|
33
|
+
'at' => 'de',
|
34
|
+
'fr' => 'fr', ## fr - French
|
35
|
+
'it' => 'it', ## it - Italian
|
36
|
+
'es' => 'es', ## es - Español (Spanish)
|
37
|
+
'mx' => 'es',
|
38
|
+
'pt' => 'pt', ## pt - Português (Portuguese)
|
39
|
+
'br' => 'br'
|
40
|
+
}
|
41
|
+
|
42
|
+
secs.each do |sec| ## sec(tion)s
|
43
|
+
season = sec[:season]
|
44
|
+
league = sec[:league]
|
45
|
+
stage = sec[:stage]
|
46
|
+
lines = sec[:lines]
|
23
47
|
|
24
48
|
## hack for now: switch lang
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
DateFormats.lang = 'fr'
|
31
|
-
elsif ['it'].include?( league.country.key )
|
32
|
-
SportDb.lang.lang = 'it'
|
33
|
-
DateFormats.lang = 'it'
|
34
|
-
elsif ['es', 'mx'].include?( league.country.key )
|
35
|
-
SportDb.lang.lang = 'es'
|
36
|
-
DateFormats.lang = 'es'
|
37
|
-
elsif ['pt', 'br'].include?( league.country.key )
|
38
|
-
SportDb.lang.lang = 'pt'
|
39
|
-
DateFormats.lang = 'pt'
|
40
|
-
else
|
41
|
-
SportDb.lang.lang = 'en'
|
42
|
-
DateFormats.lang = 'en'
|
49
|
+
## todo/fix: set lang for now depending on league country!!!
|
50
|
+
if league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
|
51
|
+
Import.config.lang = 'en'
|
52
|
+
else ## assume national/domestic
|
53
|
+
Import.config.lang = langs[ league.country.key ] || 'en'
|
43
54
|
end
|
44
55
|
|
45
56
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
57
|
+
start = if season.year?
|
58
|
+
Date.new( season.start_year, 1, 1 )
|
59
|
+
else
|
60
|
+
Date.new( season.start_year, 7, 1 )
|
61
|
+
end
|
50
62
|
|
51
|
-
|
63
|
+
auto_conf_teams, _ = AutoConfParser.parse( lines,
|
64
|
+
start: start )
|
52
65
|
|
53
|
-
|
54
|
-
Sync::Stage.find_or_create( rec[:stage], event: event )
|
55
|
-
else
|
56
|
-
nil
|
57
|
-
end
|
66
|
+
## step 1: map/find teams
|
58
67
|
|
68
|
+
## note: loop over keys (holding the names); values hold the usage counter!! e.g. 'Arsenal' => 2, etc.
|
69
|
+
mods = nil
|
70
|
+
if league.clubs? && league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
|
71
|
+
### quick hack mods for popular/known ambigious club names
|
72
|
+
## todo/fix: make more generic / reuseable!!!!
|
73
|
+
mods = {}
|
74
|
+
## europa league uses same mods as champions league
|
75
|
+
mods[ 'uefa.el' ] = mods[ 'uefa.cl' ] = catalog.clubs.build_mods(
|
76
|
+
{ 'Liverpool | Liverpool FC' => 'Liverpool FC, ENG',
|
77
|
+
'Arsenal | Arsenal FC' => 'Arsenal FC, ENG',
|
78
|
+
'Barcelona' => 'FC Barcelona, ESP',
|
79
|
+
'Valencia' => 'Valencia CF, ESP' })
|
80
|
+
end
|
59
81
|
|
60
|
-
|
61
|
-
|
82
|
+
teams = catalog.teams.find_by!( name: auto_conf_teams.keys,
|
83
|
+
league: league,
|
84
|
+
mods: mods )
|
62
85
|
|
63
|
-
|
64
|
-
|
65
|
-
club_mapping = {} ## name => database (ActiveRecord) record
|
86
|
+
## build mapping - name => team struct record
|
87
|
+
team_mapping = auto_conf_teams.keys.zip( teams ).to_h
|
66
88
|
|
67
|
-
## note: loop over keys (holding the names); values hold the usage counter!! e.g. 'Arsenal' => 2, etc.
|
68
|
-
country = league.country
|
69
|
-
auto_conf_clubs.keys.each do |name|
|
70
|
-
club_rec = config.clubs.find_by!( name: name, country: country )
|
71
|
-
club_recs << club_rec
|
72
89
|
|
73
|
-
|
74
|
-
|
75
|
-
|
90
|
+
parser = MatchParser.new( lines,
|
91
|
+
team_mapping,
|
92
|
+
start ) ## note: keep season start_at date for now (no need for more specific stage date need for now)
|
76
93
|
|
94
|
+
matches, rounds, groups = parser.parse
|
77
95
|
|
78
|
-
|
79
|
-
|
80
|
-
clubs = club_mapping.values.uniq
|
96
|
+
pp rounds
|
97
|
+
pp groups
|
81
98
|
|
82
99
|
|
83
|
-
|
84
|
-
|
85
|
-
team_ids = stage ? stage.team_ids : event.team_ids
|
100
|
+
######################################################
|
101
|
+
## step 2: add to database
|
86
102
|
|
87
|
-
|
88
|
-
|
89
|
-
## for now check if team is alreay included
|
90
|
-
## todo/fix: clear/destroy_all first - why? why not!!!
|
103
|
+
event_rec = Sync::Event.find_or_create_by( league: league,
|
104
|
+
season: season )
|
91
105
|
|
92
|
-
|
93
|
-
|
106
|
+
stage_rec = if stage
|
107
|
+
Sync::Stage.find_or_create( stage, event: event_rec )
|
108
|
+
else
|
109
|
+
nil
|
110
|
+
end
|
94
111
|
|
112
|
+
team_recs = stage_rec ? stage_rec.teams : event_rec.teams
|
113
|
+
team_ids = stage_rec ? stage_rec.team_ids : event_rec.team_ids
|
95
114
|
|
115
|
+
## todo/fix: check if all teams are unique
|
116
|
+
## check if uniq works for club record (struct) - yes,no ??
|
117
|
+
new_team_recs = Sync::Team.find_or_create( team_mapping.values.uniq )
|
96
118
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
119
|
+
new_team_recs.each do |team_rec|
|
120
|
+
## add teams to event
|
121
|
+
## for now check if team is alreay included
|
122
|
+
## todo/fix: clear/destroy_all first - why? why not!!!
|
123
|
+
team_recs << team_rec unless team_ids.include?( team_rec.id )
|
124
|
+
end
|
101
125
|
|
102
|
-
match_recs, round_recs = parser.parse
|
103
126
|
|
104
|
-
|
127
|
+
rounds.each do |round|
|
128
|
+
round_rec = Sync::Round.find_or_create( round, event: event_rec ) ## check: use/rename to EventRound why? why not?
|
129
|
+
end
|
105
130
|
|
106
|
-
|
107
|
-
|
108
|
-
round_rec.pos = 999 if round_rec.pos.nil?
|
109
|
-
round = Sync::Round.find_or_create( round_rec, event: event ) ## check: use/rename to EventRound why? why not?
|
131
|
+
groups.each do |group|
|
132
|
+
group_rec = Sync::Group.find_or_create( group, event: event_rec ) ## check: use/rename to EventGroup why? why not?
|
110
133
|
end
|
111
134
|
|
112
|
-
|
113
|
-
##
|
114
|
-
match =
|
135
|
+
matches.each do |match|
|
136
|
+
## note: pass along stage (if present): stage - optional from heading!!!!
|
137
|
+
match = match.update( stage: stage ) if stage
|
138
|
+
match_rec = Sync::Match.create_or_update( match, event: event_rec )
|
115
139
|
end
|
116
140
|
end
|
117
141
|
|
118
|
-
|
119
|
-
end # method
|
120
|
-
|
142
|
+
true ## success/ok
|
143
|
+
end # method parse
|
144
|
+
|
145
|
+
|
146
|
+
######################
|
147
|
+
# (convenience) helpers
|
148
|
+
|
149
|
+
def catalog() Import.catalog; end
|
150
|
+
|
151
|
+
end # class MatchReader
|
121
152
|
end # module SportDb
|
@@ -2,130 +2,70 @@
|
|
2
2
|
module SportDb
|
3
3
|
class Package
|
4
4
|
|
5
|
-
|
6
|
-
CLUB_PROPS_RE = Datafile::CLUB_PROPS_RE
|
7
|
-
LEAGUES_RE = Datafile::LEAGUES_RE
|
8
|
-
CLUBS_RE = Datafile::CLUBS_RE
|
9
|
-
|
10
|
-
|
11
|
-
## note: if pattern includes directory add here (otherwise move to more "generic" datafile) - why? why not?
|
12
|
-
MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
|
13
|
-
| \d{4} ## season year-only folder e.g. /2019
|
14
|
-
)
|
15
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
16
|
-
}x
|
17
|
-
|
18
|
-
|
19
|
-
attr_reader :pack ## allow access to embedded ("low-level") delegate package
|
20
|
-
|
21
|
-
def initialize( path_or_pack )
|
22
|
-
if path_or_pack.is_a?( Datafile::Package )
|
23
|
-
@pack = path_or_pack
|
24
|
-
else ## assume it's a (string) path
|
25
|
-
path = path_or_pack
|
26
|
-
if !File.exist?( path ) ## file or directory
|
27
|
-
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
28
|
-
exit 1
|
29
|
-
end
|
30
|
-
|
31
|
-
if File.directory?( path )
|
32
|
-
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
33
|
-
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
34
|
-
@pack = Datafile::ZipPackage.new( path )
|
35
|
-
else
|
36
|
-
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
37
|
-
exit 1
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def each_conf( &blk ) @pack.each( pattern: CONF_RE, &blk ); end
|
43
|
-
def each_match( &blk ) @pack.each( pattern: MATCH_RE, &blk ); end
|
44
|
-
def each_club_props( &blk ) @pack.each( pattern: CLUB_PROPS_RE, &blk ); end
|
45
|
-
|
46
|
-
def each_leagues( &blk ) @pack.each( pattern: LEAGUES_RE, &blk ); end
|
47
|
-
def each_clubs( &blk ) @pack.each( pattern: CLUBS_RE, &blk ); end
|
48
|
-
|
5
|
+
## note: add readers here; for full class def see the sourcein sportdb-formats!!!
|
49
6
|
|
50
7
|
def read_leagues
|
51
|
-
each_leagues
|
52
|
-
SportDb.parse_leagues( entry.read )
|
53
|
-
end
|
8
|
+
each_leagues { |entry| SportDb.parse_leagues( entry.read ) }
|
54
9
|
end
|
55
10
|
|
56
11
|
def read_clubs
|
57
|
-
each_clubs
|
58
|
-
SportDb.parse_clubs( entry.read )
|
59
|
-
end
|
12
|
+
each_clubs { |entry| SportDb.parse_clubs( entry.read ) }
|
60
13
|
end
|
61
14
|
|
62
|
-
|
63
|
-
|
64
|
-
each_club_props do |entry|
|
65
|
-
SportDb.parse_club_props( entry.read, sync: sync )
|
66
|
-
end
|
15
|
+
def read_club_props
|
16
|
+
each_club_props { |entry| SportDb.parse_club_props( entry.read ) }
|
67
17
|
end
|
68
18
|
|
69
|
-
|
70
|
-
|
19
|
+
|
20
|
+
def read_conf( *names, season: nil )
|
71
21
|
if names.empty? ## no (entry) names passed in; read in all
|
72
22
|
each_conf do |entry|
|
73
|
-
SportDb.parse_conf( entry.read, season: season
|
23
|
+
SportDb.parse_conf( entry.read, season: season )
|
74
24
|
end
|
75
25
|
else
|
76
26
|
names.each do |name|
|
77
27
|
entry = @pack.find( name )
|
78
|
-
SportDb.parse_conf( entry.read, season: season
|
28
|
+
SportDb.parse_conf( entry.read, season: season )
|
79
29
|
end
|
80
30
|
end
|
81
31
|
end
|
82
32
|
|
83
|
-
def read_match( *names,
|
84
|
-
season: nil, sync: true )
|
33
|
+
def read_match( *names, season: nil )
|
85
34
|
if names.empty? ## no (entry) names passed in; read in all
|
86
35
|
each_match do |entry|
|
87
|
-
SportDb.parse_match( entry.read, season: season
|
36
|
+
SportDb.parse_match( entry.read, season: season )
|
88
37
|
end
|
89
38
|
else
|
90
39
|
names.each do |name|
|
91
40
|
entry = @pack.find( name )
|
92
|
-
SportDb.parse_match( entry.read, season: season
|
41
|
+
SportDb.parse_match( entry.read, season: season )
|
93
42
|
end
|
94
43
|
end
|
95
44
|
end
|
96
45
|
|
97
46
|
|
98
|
-
def read( *names,
|
99
|
-
season: nil, sync: true )
|
47
|
+
def read( *names, season: nil )
|
100
48
|
if names.empty? ## read all datafiles
|
101
49
|
read_leagues()
|
102
50
|
read_clubs()
|
103
|
-
read_club_props(
|
104
|
-
|
105
|
-
|
51
|
+
read_club_props()
|
52
|
+
## note: skip conf(iguration)s for now!!!!!!!
|
53
|
+
## read_conf( season: season )
|
54
|
+
read_match( season: season )
|
106
55
|
else
|
107
56
|
names.each do |name|
|
108
57
|
entry = @pack.find( name )
|
109
58
|
## fix/todo: add read_leagues, read_clubs too!!!
|
110
|
-
if
|
111
|
-
SportDb.parse_conf( entry.read, season: season
|
112
|
-
elsif
|
113
|
-
SportDb.parse_club_props( entry.read
|
114
|
-
else ## assume "regular" match datafile
|
115
|
-
SportDb.parse_match( entry.read, season: season
|
59
|
+
if match_conf?( name ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
|
60
|
+
SportDb.parse_conf( entry.read, season: season )
|
61
|
+
elsif match_club_props?( name )
|
62
|
+
SportDb.parse_club_props( entry.read )
|
63
|
+
else ## assume "regular" match datafile or check pattern and report error on fail - why? why not?
|
64
|
+
SportDb.parse_match( entry.read, season: season )
|
116
65
|
end
|
117
66
|
end
|
118
67
|
end
|
119
68
|
end
|
120
69
|
end # class Package
|
121
70
|
|
122
|
-
|
123
|
-
class DirPackage < Package
|
124
|
-
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
125
|
-
end
|
126
|
-
|
127
|
-
class ZipPackage < Package
|
128
|
-
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
129
|
-
end
|
130
|
-
|
131
71
|
end # module SportDb
|
@@ -4,8 +4,8 @@
|
|
4
4
|
module SportDb
|
5
5
|
module Readers
|
6
6
|
|
7
|
-
MAJOR =
|
8
|
-
MINOR =
|
7
|
+
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 1
|
9
9
|
PATCH = 0
|
10
10
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
11
|
|
data/test/helper.rb
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
##
|
1
|
+
## note: use the local version of sportdb gems
|
2
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-formats/lib' ))
|
3
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-config/lib' ))
|
4
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-models/lib' ))
|
5
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-sync/lib' ))
|
6
|
+
|
2
7
|
|
3
8
|
## minitest setup
|
4
9
|
require 'minitest/autorun'
|
5
10
|
|
6
11
|
|
7
|
-
## note: use the local version of sportdb gems
|
8
|
-
$LOAD_PATH.unshift( File.expand_path( '../sportdb-match-formats/lib' ))
|
9
|
-
|
10
12
|
|
11
13
|
## our own code
|
12
14
|
require 'sportdb/readers'
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
16
|
## use (switch to) "external" datasets
|
18
17
|
SportDb::Import.config.leagues_dir = "../../../openfootball/leagues"
|
19
18
|
SportDb::Import.config.clubs_dir = "../../../openfootball/clubs"
|
19
|
+
|
20
|
+
|
21
|
+
COUNTRIES = SportDb::Import.catalog.countries
|
22
|
+
LEAGUES = SportDb::Import.catalog.leagues
|
23
|
+
CLUBS = SportDb::Import.catalog.clubs
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_conf_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestConfReader < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
SportDb.connect( adapter: 'sqlite3',
|
15
|
+
database: ':memory:' )
|
16
|
+
SportDb.create_all ## build schema
|
17
|
+
|
18
|
+
## turn on logging to console
|
19
|
+
ActiveRecord::Base.logger = Logger.new(STDOUT)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_read
|
23
|
+
# path = "../../../openfootball/austria/2018-19/.conf.txt"
|
24
|
+
path = "../../../openfootball/england/2015-16/.conf.txt"
|
25
|
+
# path = "../../../openfootball/england/2017-18/.conf.txt"
|
26
|
+
# path = "../../../openfootball/england/2018-19/.conf.txt"
|
27
|
+
# path = "../../../openfootball/england/2019-20/.conf.txt"
|
28
|
+
SportDb::ConfReader.read( path )
|
29
|
+
end # method test_read
|
30
|
+
|
31
|
+
|
32
|
+
def test_read_champs
|
33
|
+
txt =<<TXT
|
34
|
+
= UEFA Champions League 2017/18
|
35
|
+
|
36
|
+
Manchester United › ENG
|
37
|
+
Liverpool › ENG
|
38
|
+
Chelsea › ENG
|
39
|
+
Manchester City › ENG
|
40
|
+
Tottenham Hotspur › ENG
|
41
|
+
|
42
|
+
Atlético Madrid › ESP
|
43
|
+
Barcelona › ESP
|
44
|
+
Sevilla › ESP
|
45
|
+
Real Madrid › ESP
|
46
|
+
|
47
|
+
Roma › ITA
|
48
|
+
Juventus › ITA
|
49
|
+
Napoli › ITA
|
50
|
+
|
51
|
+
Bayern München › GER
|
52
|
+
Borussia Dortmund › GER
|
53
|
+
RB Leipzig › GER
|
54
|
+
|
55
|
+
Benfica › POR
|
56
|
+
Sporting CP › POR
|
57
|
+
Porto › POR
|
58
|
+
|
59
|
+
CSKA Moscow › RUS
|
60
|
+
Spartak Moscow › RUS
|
61
|
+
|
62
|
+
Paris Saint-Germain › FRA
|
63
|
+
Basel › SUI
|
64
|
+
Celtic › SCO
|
65
|
+
Anderlecht › BEL
|
66
|
+
Qarabağ › AZE
|
67
|
+
Olympiacos › GRE
|
68
|
+
Maribor › SVN
|
69
|
+
Shakhtar Donetsk › UKR
|
70
|
+
Feyenoord › NED
|
71
|
+
Beşiktaş › TUR
|
72
|
+
Monaco › MCO
|
73
|
+
APOEL › CYP
|
74
|
+
TXT
|
75
|
+
|
76
|
+
SportDb::ConfReader.parse( txt )
|
77
|
+
end
|
78
|
+
end # class TestConfReader
|