sportdb-readers 0.5.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -3
- data/NOTES.md +35 -0
- data/README.md +86 -20
- data/Rakefile +1 -3
- data/lib/sportdb/readers.rb +21 -60
- data/lib/sportdb/readers/conf_reader.rb +71 -59
- data/lib/sportdb/readers/match_reader.rb +111 -80
- data/lib/sportdb/readers/package.rb +23 -83
- data/lib/sportdb/readers/version.rb +2 -2
- data/test/helper.rb +11 -7
- data/test/test_conf_reader.rb +78 -0
- data/test/test_match_reader_champs.rb +487 -0
- data/test/test_match_reader_eng.rb +3 -3
- data/test/test_match_reader_euro.rb +156 -0
- data/test/test_match_reader_mu.rb +6 -6
- data/test/test_reader.rb +2 -2
- data/test/test_reader_champs.rb +187 -0
- metadata +10 -35
- data/lib/sportdb/readers/conf_linter.rb +0 -73
- data/lib/sportdb/readers/league_outline_reader.rb +0 -146
- data/lib/sportdb/readers/match_linter.rb +0 -30
@@ -2,120 +2,151 @@
|
|
2
2
|
|
3
3
|
module SportDb
|
4
4
|
|
5
|
-
class
|
6
|
-
|
7
|
-
def self.config() Import.config; end
|
8
|
-
|
9
|
-
|
5
|
+
class MatchReader ## todo/check: rename to MatchReaderV2 (use plural?) why? why not?
|
10
6
|
|
11
7
|
def self.read( path, season: nil ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
-
txt = File.open( path, 'r:utf-8' ).read
|
8
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
13
9
|
parse( txt, season: season )
|
14
10
|
end
|
15
11
|
|
16
12
|
def self.parse( txt, season: nil )
|
17
|
-
|
18
|
-
|
13
|
+
new( txt ).parse( season: season )
|
14
|
+
end
|
15
|
+
|
19
16
|
|
20
|
-
|
21
|
-
|
22
|
-
|
17
|
+
include Logging
|
18
|
+
|
19
|
+
def initialize( txt )
|
20
|
+
@txt = txt
|
21
|
+
end
|
22
|
+
|
23
|
+
def parse( season: nil )
|
24
|
+
secs = LeagueOutlineReader.parse( @txt, season: season )
|
25
|
+
pp secs
|
26
|
+
|
27
|
+
|
28
|
+
###
|
29
|
+
## todo/check/fix: move to LeagueOutlineReader for (re)use - why? why not?
|
30
|
+
## use sec[:lang] or something?
|
31
|
+
langs = { ## map country keys to lang codes
|
32
|
+
'de' => 'de', ## de - Deutsch (German)
|
33
|
+
'at' => 'de',
|
34
|
+
'fr' => 'fr', ## fr - French
|
35
|
+
'it' => 'it', ## it - Italian
|
36
|
+
'es' => 'es', ## es - Español (Spanish)
|
37
|
+
'mx' => 'es',
|
38
|
+
'pt' => 'pt', ## pt - Português (Portuguese)
|
39
|
+
'br' => 'br'
|
40
|
+
}
|
41
|
+
|
42
|
+
secs.each do |sec| ## sec(tion)s
|
43
|
+
season = sec[:season]
|
44
|
+
league = sec[:league]
|
45
|
+
stage = sec[:stage]
|
46
|
+
lines = sec[:lines]
|
23
47
|
|
24
48
|
## hack for now: switch lang
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
DateFormats.lang = 'fr'
|
31
|
-
elsif ['it'].include?( league.country.key )
|
32
|
-
SportDb.lang.lang = 'it'
|
33
|
-
DateFormats.lang = 'it'
|
34
|
-
elsif ['es', 'mx'].include?( league.country.key )
|
35
|
-
SportDb.lang.lang = 'es'
|
36
|
-
DateFormats.lang = 'es'
|
37
|
-
elsif ['pt', 'br'].include?( league.country.key )
|
38
|
-
SportDb.lang.lang = 'pt'
|
39
|
-
DateFormats.lang = 'pt'
|
40
|
-
else
|
41
|
-
SportDb.lang.lang = 'en'
|
42
|
-
DateFormats.lang = 'en'
|
49
|
+
## todo/fix: set lang for now depending on league country!!!
|
50
|
+
if league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
|
51
|
+
Import.config.lang = 'en'
|
52
|
+
else ## assume national/domestic
|
53
|
+
Import.config.lang = langs[ league.country.key ] || 'en'
|
43
54
|
end
|
44
55
|
|
45
56
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
57
|
+
start = if season.year?
|
58
|
+
Date.new( season.start_year, 1, 1 )
|
59
|
+
else
|
60
|
+
Date.new( season.start_year, 7, 1 )
|
61
|
+
end
|
50
62
|
|
51
|
-
|
63
|
+
auto_conf_teams, _ = AutoConfParser.parse( lines,
|
64
|
+
start: start )
|
52
65
|
|
53
|
-
|
54
|
-
Sync::Stage.find_or_create( rec[:stage], event: event )
|
55
|
-
else
|
56
|
-
nil
|
57
|
-
end
|
66
|
+
## step 1: map/find teams
|
58
67
|
|
68
|
+
## note: loop over keys (holding the names); values hold the usage counter!! e.g. 'Arsenal' => 2, etc.
|
69
|
+
mods = nil
|
70
|
+
if league.clubs? && league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
|
71
|
+
### quick hack mods for popular/known ambigious club names
|
72
|
+
## todo/fix: make more generic / reuseable!!!!
|
73
|
+
mods = {}
|
74
|
+
## europa league uses same mods as champions league
|
75
|
+
mods[ 'uefa.el' ] = mods[ 'uefa.cl' ] = catalog.clubs.build_mods(
|
76
|
+
{ 'Liverpool | Liverpool FC' => 'Liverpool FC, ENG',
|
77
|
+
'Arsenal | Arsenal FC' => 'Arsenal FC, ENG',
|
78
|
+
'Barcelona' => 'FC Barcelona, ESP',
|
79
|
+
'Valencia' => 'Valencia CF, ESP' })
|
80
|
+
end
|
59
81
|
|
60
|
-
|
61
|
-
|
82
|
+
teams = catalog.teams.find_by!( name: auto_conf_teams.keys,
|
83
|
+
league: league,
|
84
|
+
mods: mods )
|
62
85
|
|
63
|
-
|
64
|
-
|
65
|
-
club_mapping = {} ## name => database (ActiveRecord) record
|
86
|
+
## build mapping - name => team struct record
|
87
|
+
team_mapping = auto_conf_teams.keys.zip( teams ).to_h
|
66
88
|
|
67
|
-
## note: loop over keys (holding the names); values hold the usage counter!! e.g. 'Arsenal' => 2, etc.
|
68
|
-
country = league.country
|
69
|
-
auto_conf_clubs.keys.each do |name|
|
70
|
-
club_rec = config.clubs.find_by!( name: name, country: country )
|
71
|
-
club_recs << club_rec
|
72
89
|
|
73
|
-
|
74
|
-
|
75
|
-
|
90
|
+
parser = MatchParser.new( lines,
|
91
|
+
team_mapping,
|
92
|
+
start ) ## note: keep season start_at date for now (no need for more specific stage date need for now)
|
76
93
|
|
94
|
+
matches, rounds, groups = parser.parse
|
77
95
|
|
78
|
-
|
79
|
-
|
80
|
-
clubs = club_mapping.values.uniq
|
96
|
+
pp rounds
|
97
|
+
pp groups
|
81
98
|
|
82
99
|
|
83
|
-
|
84
|
-
|
85
|
-
team_ids = stage ? stage.team_ids : event.team_ids
|
100
|
+
######################################################
|
101
|
+
## step 2: add to database
|
86
102
|
|
87
|
-
|
88
|
-
|
89
|
-
## for now check if team is alreay included
|
90
|
-
## todo/fix: clear/destroy_all first - why? why not!!!
|
103
|
+
event_rec = Sync::Event.find_or_create_by( league: league,
|
104
|
+
season: season )
|
91
105
|
|
92
|
-
|
93
|
-
|
106
|
+
stage_rec = if stage
|
107
|
+
Sync::Stage.find_or_create( stage, event: event_rec )
|
108
|
+
else
|
109
|
+
nil
|
110
|
+
end
|
94
111
|
|
112
|
+
team_recs = stage_rec ? stage_rec.teams : event_rec.teams
|
113
|
+
team_ids = stage_rec ? stage_rec.team_ids : event_rec.team_ids
|
95
114
|
|
115
|
+
## todo/fix: check if all teams are unique
|
116
|
+
## check if uniq works for club record (struct) - yes,no ??
|
117
|
+
new_team_recs = Sync::Team.find_or_create( team_mapping.values.uniq )
|
96
118
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
119
|
+
new_team_recs.each do |team_rec|
|
120
|
+
## add teams to event
|
121
|
+
## for now check if team is alreay included
|
122
|
+
## todo/fix: clear/destroy_all first - why? why not!!!
|
123
|
+
team_recs << team_rec unless team_ids.include?( team_rec.id )
|
124
|
+
end
|
101
125
|
|
102
|
-
match_recs, round_recs = parser.parse
|
103
126
|
|
104
|
-
|
127
|
+
rounds.each do |round|
|
128
|
+
round_rec = Sync::Round.find_or_create( round, event: event_rec ) ## check: use/rename to EventRound why? why not?
|
129
|
+
end
|
105
130
|
|
106
|
-
|
107
|
-
|
108
|
-
round_rec.pos = 999 if round_rec.pos.nil?
|
109
|
-
round = Sync::Round.find_or_create( round_rec, event: event ) ## check: use/rename to EventRound why? why not?
|
131
|
+
groups.each do |group|
|
132
|
+
group_rec = Sync::Group.find_or_create( group, event: event_rec ) ## check: use/rename to EventGroup why? why not?
|
110
133
|
end
|
111
134
|
|
112
|
-
|
113
|
-
##
|
114
|
-
match =
|
135
|
+
matches.each do |match|
|
136
|
+
## note: pass along stage (if present): stage - optional from heading!!!!
|
137
|
+
match = match.update( stage: stage ) if stage
|
138
|
+
match_rec = Sync::Match.create_or_update( match, event: event_rec )
|
115
139
|
end
|
116
140
|
end
|
117
141
|
|
118
|
-
|
119
|
-
end # method
|
120
|
-
|
142
|
+
true ## success/ok
|
143
|
+
end # method parse
|
144
|
+
|
145
|
+
|
146
|
+
######################
|
147
|
+
# (convenience) helpers
|
148
|
+
|
149
|
+
def catalog() Import.catalog; end
|
150
|
+
|
151
|
+
end # class MatchReader
|
121
152
|
end # module SportDb
|
@@ -2,130 +2,70 @@
|
|
2
2
|
module SportDb
|
3
3
|
class Package
|
4
4
|
|
5
|
-
|
6
|
-
CLUB_PROPS_RE = Datafile::CLUB_PROPS_RE
|
7
|
-
LEAGUES_RE = Datafile::LEAGUES_RE
|
8
|
-
CLUBS_RE = Datafile::CLUBS_RE
|
9
|
-
|
10
|
-
|
11
|
-
## note: if pattern includes directory add here (otherwise move to more "generic" datafile) - why? why not?
|
12
|
-
MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
|
13
|
-
| \d{4} ## season year-only folder e.g. /2019
|
14
|
-
)
|
15
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
16
|
-
}x
|
17
|
-
|
18
|
-
|
19
|
-
attr_reader :pack ## allow access to embedded ("low-level") delegate package
|
20
|
-
|
21
|
-
def initialize( path_or_pack )
|
22
|
-
if path_or_pack.is_a?( Datafile::Package )
|
23
|
-
@pack = path_or_pack
|
24
|
-
else ## assume it's a (string) path
|
25
|
-
path = path_or_pack
|
26
|
-
if !File.exist?( path ) ## file or directory
|
27
|
-
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
28
|
-
exit 1
|
29
|
-
end
|
30
|
-
|
31
|
-
if File.directory?( path )
|
32
|
-
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
33
|
-
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
34
|
-
@pack = Datafile::ZipPackage.new( path )
|
35
|
-
else
|
36
|
-
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
37
|
-
exit 1
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
def each_conf( &blk ) @pack.each( pattern: CONF_RE, &blk ); end
|
43
|
-
def each_match( &blk ) @pack.each( pattern: MATCH_RE, &blk ); end
|
44
|
-
def each_club_props( &blk ) @pack.each( pattern: CLUB_PROPS_RE, &blk ); end
|
45
|
-
|
46
|
-
def each_leagues( &blk ) @pack.each( pattern: LEAGUES_RE, &blk ); end
|
47
|
-
def each_clubs( &blk ) @pack.each( pattern: CLUBS_RE, &blk ); end
|
48
|
-
|
5
|
+
## note: add readers here; for full class def see the sourcein sportdb-formats!!!
|
49
6
|
|
50
7
|
def read_leagues
|
51
|
-
each_leagues
|
52
|
-
SportDb.parse_leagues( entry.read )
|
53
|
-
end
|
8
|
+
each_leagues { |entry| SportDb.parse_leagues( entry.read ) }
|
54
9
|
end
|
55
10
|
|
56
11
|
def read_clubs
|
57
|
-
each_clubs
|
58
|
-
SportDb.parse_clubs( entry.read )
|
59
|
-
end
|
12
|
+
each_clubs { |entry| SportDb.parse_clubs( entry.read ) }
|
60
13
|
end
|
61
14
|
|
62
|
-
|
63
|
-
|
64
|
-
each_club_props do |entry|
|
65
|
-
SportDb.parse_club_props( entry.read, sync: sync )
|
66
|
-
end
|
15
|
+
def read_club_props
|
16
|
+
each_club_props { |entry| SportDb.parse_club_props( entry.read ) }
|
67
17
|
end
|
68
18
|
|
69
|
-
|
70
|
-
|
19
|
+
|
20
|
+
def read_conf( *names, season: nil )
|
71
21
|
if names.empty? ## no (entry) names passed in; read in all
|
72
22
|
each_conf do |entry|
|
73
|
-
SportDb.parse_conf( entry.read, season: season
|
23
|
+
SportDb.parse_conf( entry.read, season: season )
|
74
24
|
end
|
75
25
|
else
|
76
26
|
names.each do |name|
|
77
27
|
entry = @pack.find( name )
|
78
|
-
SportDb.parse_conf( entry.read, season: season
|
28
|
+
SportDb.parse_conf( entry.read, season: season )
|
79
29
|
end
|
80
30
|
end
|
81
31
|
end
|
82
32
|
|
83
|
-
def read_match( *names,
|
84
|
-
season: nil, sync: true )
|
33
|
+
def read_match( *names, season: nil )
|
85
34
|
if names.empty? ## no (entry) names passed in; read in all
|
86
35
|
each_match do |entry|
|
87
|
-
SportDb.parse_match( entry.read, season: season
|
36
|
+
SportDb.parse_match( entry.read, season: season )
|
88
37
|
end
|
89
38
|
else
|
90
39
|
names.each do |name|
|
91
40
|
entry = @pack.find( name )
|
92
|
-
SportDb.parse_match( entry.read, season: season
|
41
|
+
SportDb.parse_match( entry.read, season: season )
|
93
42
|
end
|
94
43
|
end
|
95
44
|
end
|
96
45
|
|
97
46
|
|
98
|
-
def read( *names,
|
99
|
-
season: nil, sync: true )
|
47
|
+
def read( *names, season: nil )
|
100
48
|
if names.empty? ## read all datafiles
|
101
49
|
read_leagues()
|
102
50
|
read_clubs()
|
103
|
-
read_club_props(
|
104
|
-
|
105
|
-
|
51
|
+
read_club_props()
|
52
|
+
## note: skip conf(iguration)s for now!!!!!!!
|
53
|
+
## read_conf( season: season )
|
54
|
+
read_match( season: season )
|
106
55
|
else
|
107
56
|
names.each do |name|
|
108
57
|
entry = @pack.find( name )
|
109
58
|
## fix/todo: add read_leagues, read_clubs too!!!
|
110
|
-
if
|
111
|
-
SportDb.parse_conf( entry.read, season: season
|
112
|
-
elsif
|
113
|
-
SportDb.parse_club_props( entry.read
|
114
|
-
else ## assume "regular" match datafile
|
115
|
-
SportDb.parse_match( entry.read, season: season
|
59
|
+
if match_conf?( name ) ## check if datafile matches conf(iguration) naming (e.g. .conf.txt)
|
60
|
+
SportDb.parse_conf( entry.read, season: season )
|
61
|
+
elsif match_club_props?( name )
|
62
|
+
SportDb.parse_club_props( entry.read )
|
63
|
+
else ## assume "regular" match datafile or check pattern and report error on fail - why? why not?
|
64
|
+
SportDb.parse_match( entry.read, season: season )
|
116
65
|
end
|
117
66
|
end
|
118
67
|
end
|
119
68
|
end
|
120
69
|
end # class Package
|
121
70
|
|
122
|
-
|
123
|
-
class DirPackage < Package
|
124
|
-
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
125
|
-
end
|
126
|
-
|
127
|
-
class ZipPackage < Package
|
128
|
-
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
129
|
-
end
|
130
|
-
|
131
71
|
end # module SportDb
|
@@ -4,8 +4,8 @@
|
|
4
4
|
module SportDb
|
5
5
|
module Readers
|
6
6
|
|
7
|
-
MAJOR =
|
8
|
-
MINOR =
|
7
|
+
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 1
|
9
9
|
PATCH = 0
|
10
10
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
11
|
|
data/test/helper.rb
CHANGED
@@ -1,19 +1,23 @@
|
|
1
|
-
##
|
1
|
+
## note: use the local version of sportdb gems
|
2
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-formats/lib' ))
|
3
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-config/lib' ))
|
4
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-models/lib' ))
|
5
|
+
$LOAD_PATH.unshift( File.expand_path( '../sportdb-sync/lib' ))
|
6
|
+
|
2
7
|
|
3
8
|
## minitest setup
|
4
9
|
require 'minitest/autorun'
|
5
10
|
|
6
11
|
|
7
|
-
## note: use the local version of sportdb gems
|
8
|
-
$LOAD_PATH.unshift( File.expand_path( '../sportdb-match-formats/lib' ))
|
9
|
-
|
10
12
|
|
11
13
|
## our own code
|
12
14
|
require 'sportdb/readers'
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
16
|
## use (switch to) "external" datasets
|
18
17
|
SportDb::Import.config.leagues_dir = "../../../openfootball/leagues"
|
19
18
|
SportDb::Import.config.clubs_dir = "../../../openfootball/clubs"
|
19
|
+
|
20
|
+
|
21
|
+
COUNTRIES = SportDb::Import.catalog.countries
|
22
|
+
LEAGUES = SportDb::Import.catalog.leagues
|
23
|
+
CLUBS = SportDb::Import.catalog.clubs
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_conf_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestConfReader < MiniTest::Test
|
12
|
+
|
13
|
+
def setup
|
14
|
+
SportDb.connect( adapter: 'sqlite3',
|
15
|
+
database: ':memory:' )
|
16
|
+
SportDb.create_all ## build schema
|
17
|
+
|
18
|
+
## turn on logging to console
|
19
|
+
ActiveRecord::Base.logger = Logger.new(STDOUT)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_read
|
23
|
+
# path = "../../../openfootball/austria/2018-19/.conf.txt"
|
24
|
+
path = "../../../openfootball/england/2015-16/.conf.txt"
|
25
|
+
# path = "../../../openfootball/england/2017-18/.conf.txt"
|
26
|
+
# path = "../../../openfootball/england/2018-19/.conf.txt"
|
27
|
+
# path = "../../../openfootball/england/2019-20/.conf.txt"
|
28
|
+
SportDb::ConfReader.read( path )
|
29
|
+
end # method test_read
|
30
|
+
|
31
|
+
|
32
|
+
def test_read_champs
|
33
|
+
txt =<<TXT
|
34
|
+
= UEFA Champions League 2017/18
|
35
|
+
|
36
|
+
Manchester United › ENG
|
37
|
+
Liverpool › ENG
|
38
|
+
Chelsea › ENG
|
39
|
+
Manchester City › ENG
|
40
|
+
Tottenham Hotspur › ENG
|
41
|
+
|
42
|
+
Atlético Madrid › ESP
|
43
|
+
Barcelona › ESP
|
44
|
+
Sevilla › ESP
|
45
|
+
Real Madrid › ESP
|
46
|
+
|
47
|
+
Roma › ITA
|
48
|
+
Juventus › ITA
|
49
|
+
Napoli › ITA
|
50
|
+
|
51
|
+
Bayern München › GER
|
52
|
+
Borussia Dortmund › GER
|
53
|
+
RB Leipzig › GER
|
54
|
+
|
55
|
+
Benfica › POR
|
56
|
+
Sporting CP › POR
|
57
|
+
Porto › POR
|
58
|
+
|
59
|
+
CSKA Moscow › RUS
|
60
|
+
Spartak Moscow › RUS
|
61
|
+
|
62
|
+
Paris Saint-Germain › FRA
|
63
|
+
Basel › SUI
|
64
|
+
Celtic › SCO
|
65
|
+
Anderlecht › BEL
|
66
|
+
Qarabağ › AZE
|
67
|
+
Olympiacos › GRE
|
68
|
+
Maribor › SVN
|
69
|
+
Shakhtar Donetsk › UKR
|
70
|
+
Feyenoord › NED
|
71
|
+
Beşiktaş › TUR
|
72
|
+
Monaco › MCO
|
73
|
+
APOEL › CYP
|
74
|
+
TXT
|
75
|
+
|
76
|
+
SportDb::ConfReader.parse( txt )
|
77
|
+
end
|
78
|
+
end # class TestConfReader
|