sportdb-readers 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +14 -0
- data/README.md +26 -0
- data/Rakefile +31 -0
- data/lib/sportdb/readers.rb +22 -0
- data/lib/sportdb/readers/event_reader.rb +104 -0
- data/lib/sportdb/readers/match_parser.rb +466 -0
- data/lib/sportdb/readers/match_reader.rb +952 -0
- data/lib/sportdb/readers/outline_reader.rb +83 -0
- data/lib/sportdb/readers/sync.rb +208 -0
- data/lib/sportdb/readers/version.rb +25 -0
- data/test/helper.rb +21 -0
- data/test/test_match_parser.rb +101 -0
- data/test/test_reader.rb +35 -0
- metadata +118 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
|
6
|
+
## shared "higher-level" outline reader
|
7
|
+
## todo: add CountryOutlineReader - why? why not?
|
8
|
+
class LeagueOutlineReader
|
9
|
+
## split into league + season
|
10
|
+
## e.g. Österr. Bundesliga 2015/16 ## or 2015-16
|
11
|
+
## World Cup 2018
|
12
|
+
LEAGUE_SEASON_HEADING_REGEX = /^
|
13
|
+
(?<league>.+?) ## non-greedy
|
14
|
+
\s+
|
15
|
+
(?<season>\d{4}
|
16
|
+
(?:[\/-]\d{2})? ## optional 2nd year in season
|
17
|
+
)
|
18
|
+
$/x
|
19
|
+
|
20
|
+
def self.parse( txt )
|
21
|
+
recs=[]
|
22
|
+
OutlineReader.parse( txt ).each do |node|
|
23
|
+
if node[0] == :h1
|
24
|
+
## check for league and season
|
25
|
+
heading = node[1]
|
26
|
+
if m=heading.match( LEAGUE_SEASON_HEADING_REGEX )
|
27
|
+
puts "league >#{m[:league]}<, season >#{m[:season]}<"
|
28
|
+
|
29
|
+
recs << { league: m[:league],
|
30
|
+
season: m[:season],
|
31
|
+
lines: []
|
32
|
+
}
|
33
|
+
else
|
34
|
+
puts "** !!! ERROR !!! - CANNOT match league and season in heading; season missing?"
|
35
|
+
pp heading
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
elsif node[0] == :l ## regular (text) line
|
39
|
+
line = node[1]
|
40
|
+
## note: skip lines if no heading seen
|
41
|
+
if recs.empty?
|
42
|
+
puts "** !! WARN !! - skipping line (no heading) >#{line}<"
|
43
|
+
else
|
44
|
+
recs[-1][:lines] << line
|
45
|
+
end
|
46
|
+
else
|
47
|
+
puts "** !!! ERROR !!! unknown line type; for now only heading 1 for leagues supported; sorry:"
|
48
|
+
pp node
|
49
|
+
exit 1
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
## pass 2 - check & map; replace inline (string with data record)
|
54
|
+
recs.each do |rec|
|
55
|
+
league = find_league( rec[:league] )
|
56
|
+
rec[:league] = league
|
57
|
+
end
|
58
|
+
|
59
|
+
recs
|
60
|
+
end # method parse
|
61
|
+
|
62
|
+
|
63
|
+
def self.find_league( name )
|
64
|
+
league = nil
|
65
|
+
m = LEAGUES.match( name )
|
66
|
+
# pp m
|
67
|
+
|
68
|
+
if m.nil?
|
69
|
+
puts "** !!! ERROR !!! no league match found for >#{name}<, add to leagues table; sorry"
|
70
|
+
exit 1
|
71
|
+
elsif m.size > 1
|
72
|
+
puts "** !!! ERROR !!! ambigious league name; too many leagues (#{m.size}) found:"
|
73
|
+
pp m
|
74
|
+
exit 1
|
75
|
+
else
|
76
|
+
league = m[0]
|
77
|
+
end
|
78
|
+
|
79
|
+
league
|
80
|
+
end
|
81
|
+
end # class LeagueOutlineReader
|
82
|
+
|
83
|
+
end # module SportDb
|
@@ -0,0 +1,208 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
|
6
|
+
module Sync
|
7
|
+
class Country
|
8
|
+
def self.find_or_create( country )
|
9
|
+
rec = WorldDb::Model::Country.find_by( key: country.key )
|
10
|
+
if rec.nil?
|
11
|
+
attribs = {
|
12
|
+
key: country.key,
|
13
|
+
name: country.name,
|
14
|
+
code: country.fifa, ## fix: uses fifa code now (should be iso-alpha3 if available)
|
15
|
+
fifa: country.fifa,
|
16
|
+
area: 1,
|
17
|
+
pop: 1
|
18
|
+
}
|
19
|
+
rec = WorldDb::Model::Country.create!( attribs )
|
20
|
+
end
|
21
|
+
rec
|
22
|
+
end
|
23
|
+
end # class Country
|
24
|
+
|
25
|
+
|
26
|
+
class League
|
27
|
+
def self.find( league )
|
28
|
+
SportDb::Model::League.find_by( key: league.key )
|
29
|
+
end
|
30
|
+
def self.find!( league )
|
31
|
+
rec = find( league )
|
32
|
+
if rec.nil?
|
33
|
+
puts "** !!!ERROR!!! db sync - no league match found for:"
|
34
|
+
pp league
|
35
|
+
exit 1
|
36
|
+
end
|
37
|
+
rec
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.find_or_create( league )
|
41
|
+
rec = find( league )
|
42
|
+
if rec.nil?
|
43
|
+
## use title and not name - why? why not?
|
44
|
+
## quick fix: change name to title
|
45
|
+
attribs = { key: league.key,
|
46
|
+
title: league.name }
|
47
|
+
if league.country
|
48
|
+
attribs[ :country_id ] = Country.find_or_create( league.country ).id
|
49
|
+
end
|
50
|
+
|
51
|
+
rec = SportDb::Model::League.create!( attribs )
|
52
|
+
end
|
53
|
+
rec
|
54
|
+
end
|
55
|
+
end # class League
|
56
|
+
|
57
|
+
class Season
|
58
|
+
def self.normalize_key( key ) ## helper for season key (rename to norm_key ???)
|
59
|
+
## note: "normalize" season key
|
60
|
+
## always use 2017/18 (and not 2017-18 or 2017-2018 or 2017/2018)
|
61
|
+
## 1) change 2017-18 to 2017/18
|
62
|
+
key = key.tr( '-', '/' )
|
63
|
+
## 2) check for 2017/2018 - change to 2017/18
|
64
|
+
if key.length == 9
|
65
|
+
key = "#{key[0..3]}/#{key[7..8]}"
|
66
|
+
end
|
67
|
+
key
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.find( key )
|
71
|
+
key = normalize_key( key )
|
72
|
+
SportDb::Model::Season.find_by( key: key )
|
73
|
+
end
|
74
|
+
def self.find!( key )
|
75
|
+
rec = find( key )
|
76
|
+
if rec.nil?
|
77
|
+
puts "** !!!ERROR!!! db sync - no season match found for >#{normalize_key(key)}<:"
|
78
|
+
pp key
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
rec
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.find_or_create( key ) ## e.g. key = '2017/18'
|
85
|
+
rec = find( key )
|
86
|
+
if rec.nil?
|
87
|
+
key = normalize_key( key ) ## note: do NOT forget to normalize key e.g. always use slash (2019/20) etc.
|
88
|
+
attribs = { key: key,
|
89
|
+
title: key }
|
90
|
+
rec = SportDb::Model::Season.create!( attribs )
|
91
|
+
end
|
92
|
+
rec
|
93
|
+
end
|
94
|
+
end # class Season
|
95
|
+
|
96
|
+
class Club
|
97
|
+
def self.find_or_create( club )
|
98
|
+
rec = SportDb::Model::Team.find_by( title: club.name )
|
99
|
+
if rec.nil?
|
100
|
+
## remove all non-ascii a-z chars
|
101
|
+
key = club.name.downcase.gsub( /[^a-z]/, '' )
|
102
|
+
puts "add club: #{key}, #{club.name}, #{club.country.name} (#{club.country.key})"
|
103
|
+
|
104
|
+
attribs = {
|
105
|
+
key: key,
|
106
|
+
title: club.name,
|
107
|
+
country_id: Country.find_or_create( club.country ).id,
|
108
|
+
club: true,
|
109
|
+
national: false ## check -is default anyway - use - why? why not?
|
110
|
+
## todo/fix: add city if present - why? why not?
|
111
|
+
}
|
112
|
+
if club.alt_names.empty? == false
|
113
|
+
attribs[:synonyms] = club.alt_names.join('|')
|
114
|
+
end
|
115
|
+
|
116
|
+
rec = SportDb::Model::Team.create!( attribs )
|
117
|
+
end
|
118
|
+
rec
|
119
|
+
end
|
120
|
+
end # class Club
|
121
|
+
|
122
|
+
class Event
|
123
|
+
def self.find( league:, season: )
|
124
|
+
SportDb::Model::Event.find_by( league_id: league.id, season_id: season.id )
|
125
|
+
end
|
126
|
+
def self.find!( league:, season: )
|
127
|
+
rec = find( league: league, season: season )
|
128
|
+
if rec.nil?
|
129
|
+
puts "** !!!ERROR!!! db sync - no event match found for:"
|
130
|
+
pp league
|
131
|
+
pp season
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
rec
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.find_or_create( league:, season: )
|
138
|
+
rec = find( league: league, season: season )
|
139
|
+
if rec.nil?
|
140
|
+
## quick hack/change later !!
|
141
|
+
## todo/fix: check season - if is length 4 (single year) use 2017, 1, 1
|
142
|
+
## otherwise use 2017, 7, 1
|
143
|
+
## start_at use year and 7,1 e.g. Date.new( 2017, 7, 1 )
|
144
|
+
## hack: fix/todo1!!
|
145
|
+
## add "fake" start_at date for now
|
146
|
+
if season.key.size == '4' ## e.g. assume 2018 etc.
|
147
|
+
year = season.key.to_i
|
148
|
+
start_at = Date.new( year, 1, 1 )
|
149
|
+
else ## assume 2014/15 etc.
|
150
|
+
year = season.key[0..3].to_i
|
151
|
+
start_at = Date.new( year, 7, 1 )
|
152
|
+
end
|
153
|
+
|
154
|
+
attribs = {
|
155
|
+
league_id: league.id,
|
156
|
+
season_id: season.id,
|
157
|
+
start_at: start_at }
|
158
|
+
|
159
|
+
rec = SportDb::Model::Event.create!( attribs )
|
160
|
+
end
|
161
|
+
rec
|
162
|
+
end
|
163
|
+
end # class Event
|
164
|
+
|
165
|
+
class Round
|
166
|
+
def self.find_or_create( round, event: )
|
167
|
+
rec = SportDb::Model::Round.find_by( title: round.title, event_id: event.id )
|
168
|
+
if rec.nil?
|
169
|
+
attribs = { event_id: event.id,
|
170
|
+
title: round.title,
|
171
|
+
pos: round.pos,
|
172
|
+
start_at: event.start_at.to_date
|
173
|
+
}
|
174
|
+
rec = SportDb::Model::Round.create!( attribs )
|
175
|
+
end
|
176
|
+
rec
|
177
|
+
end
|
178
|
+
end # class Round
|
179
|
+
|
180
|
+
class Match ## todo/check: add alias for Game class - why? why not?
|
181
|
+
def self.create_or_update( match, event: )
|
182
|
+
## note: MUST find round, thus, use bang (!)
|
183
|
+
round_rec = SportDb::Model::Round.find_by!( event_id: event.id,
|
184
|
+
title: match.round.title )
|
185
|
+
|
186
|
+
rec = SportDb::Model::Game.find_by( round_id: round_rec.id,
|
187
|
+
team1_id: match.team1.id,
|
188
|
+
team2_id: match.team2.id )
|
189
|
+
if rec.nil?
|
190
|
+
attribs = { round_id: round_rec.id,
|
191
|
+
team1_id: match.team1.id,
|
192
|
+
team2_id: match.team2.id,
|
193
|
+
pos: 999, ## make optional why? why not? - change to num?
|
194
|
+
play_at: match.date.to_date,
|
195
|
+
score1: match.score1,
|
196
|
+
score2: match.score2,
|
197
|
+
score1i: match.score1i,
|
198
|
+
score2i: match.score2i }
|
199
|
+
rec = SportDb::Model::Game.create!( attribs )
|
200
|
+
else
|
201
|
+
# update - todo
|
202
|
+
end
|
203
|
+
rec
|
204
|
+
end
|
205
|
+
end # class Match
|
206
|
+
|
207
|
+
end # module Sync
|
208
|
+
end # module SportDb
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Readers
|
6
|
+
|
7
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 0
|
9
|
+
PATCH = 1
|
10
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
|
+
|
12
|
+
def self.version
|
13
|
+
VERSION
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.banner
|
17
|
+
"sportdb-readers/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
22
|
+
end
|
23
|
+
|
24
|
+
end # module Readers
|
25
|
+
end # module SportDb
|
data/test/helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
## minitest setup
|
4
|
+
require 'minitest/autorun'
|
5
|
+
|
6
|
+
|
7
|
+
## our own code
|
8
|
+
require 'sportdb/readers'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## use (switch to) "external" datasets
|
14
|
+
SportDb::Import.config.clubs_dir = "../../../openfootball/clubs"
|
15
|
+
SportDb::Import.config.leagues_dir = "../../../openfootball/leagues"
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
LEAGUES = SportDb::Import.config.leagues
|
20
|
+
CLUBS = SportDb::Import.config.clubs
|
21
|
+
COUNTRIES = SportDb::Import.config.countries
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_match_parser.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
class TestMatchParser < MiniTest::Test
|
13
|
+
|
14
|
+
## build ActiveRecord-like club records/structs
|
15
|
+
Club = Struct.new( :key, :title, :synonyms )
|
16
|
+
def Club.read( txt )
|
17
|
+
recs = []
|
18
|
+
txt.each_line do |line|
|
19
|
+
values = line.split( ',' )
|
20
|
+
values = values.map {|value| value.strip }
|
21
|
+
recs << Club.new( values[0], values[1], values.size > 2 ? values[2..-1].join('|') : nil )
|
22
|
+
end
|
23
|
+
recs
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def test_parse
|
28
|
+
txt = <<TXT
|
29
|
+
Matchday 1
|
30
|
+
|
31
|
+
[Fri Aug/11]
|
32
|
+
Arsenal FC 4-3 Leicester City
|
33
|
+
[Sat Aug/12]
|
34
|
+
Watford FC 3-3 Liverpool FC
|
35
|
+
Chelsea FC 2-3 Burnley FC
|
36
|
+
Crystal Palace 0-3 Huddersfield Town
|
37
|
+
Everton FC 1-0 Stoke City
|
38
|
+
Southampton FC 0-0 Swansea City
|
39
|
+
West Bromwich Albion 1-0 AFC Bournemouth
|
40
|
+
Brighton & Hove Albion 0-2 Manchester City
|
41
|
+
[Sun Aug/13]
|
42
|
+
Newcastle United 0-2 Tottenham Hotspur
|
43
|
+
Manchester United 4-0 West Ham United
|
44
|
+
|
45
|
+
|
46
|
+
Matchday 2
|
47
|
+
|
48
|
+
[Sat Aug/19]
|
49
|
+
Swansea City 0-4 Manchester United
|
50
|
+
AFC Bournemouth 0-2 Watford FC
|
51
|
+
Burnley FC 0-1 West Bromwich Albion
|
52
|
+
Leicester City 2-0 Brighton & Hove Albion
|
53
|
+
Liverpool FC 1-0 Crystal Palace
|
54
|
+
Southampton FC 3-2 West Ham United
|
55
|
+
Stoke City 1-0 Arsenal FC
|
56
|
+
[Sun Aug/20]
|
57
|
+
Huddersfield Town 1-0 Newcastle United
|
58
|
+
Tottenham Hotspur 1-2 Chelsea FC
|
59
|
+
[Mon Aug/21]
|
60
|
+
Manchester City 1-1 Everton FC
|
61
|
+
TXT
|
62
|
+
|
63
|
+
clubs_txt = <<TXT
|
64
|
+
arsenalfc, Arsenal FC, Arsenal, FC Arsenal
|
65
|
+
leicestercityfc, Leicester City FC, Leicester, Leicester City
|
66
|
+
watfordfc, Watford FC, Watford, FC Watford
|
67
|
+
liverpoolfc, Liverpool FC, Liverpool, FC Liverpool
|
68
|
+
chelseafc, Chelsea FC, Chelsea, FC Chelsea
|
69
|
+
burnleyfc, Burnley FC, Burnley, FC Burnley
|
70
|
+
crystalpalacefc, Crystal Palace FC, Crystal Palace, C Palace, Palace, Crystal P
|
71
|
+
huddersfieldtownafc, Huddersfield Town AFC, Huddersfield, Huddersfield Town
|
72
|
+
evertonfc, Everton FC, Everton, FC Everton
|
73
|
+
stokecityfc, Stoke City FC, Stoke, Stoke City
|
74
|
+
southamptonfc, Southampton FC, Southampton, FC Southampton
|
75
|
+
swanseacityfc, Swansea City FC, Swansea, Swansea City, Swansea City AFC
|
76
|
+
westbromwichalbionfc, West Bromwich Albion FC, West Brom, West Bromwich Albion, West Bromwich, Albion
|
77
|
+
afcbournemouth, AFC Bournemouth, Bournemouth, A.F.C. Bournemouth, Bournemouth FC
|
78
|
+
brightonhovealbionfc, Brighton & Hove Albion FC, Brighton, Brighton & Hove, Brighton & Hove Albion
|
79
|
+
manchestercityfc, Manchester City FC, Man City, Manchester City, Man. City, Manchester C
|
80
|
+
newcastleunitedfc, Newcastle United FC, Newcastle, Newcastle Utd, Newcastle United
|
81
|
+
tottenhamhotspurfc, Tottenham Hotspur FC, Tottenham, Tottenham Hotspur, Spurs
|
82
|
+
manchesterunitedfc, Manchester United FC, Man Utd, Man. United, Manchester U., Manchester Utd, Manchester United
|
83
|
+
westhamunitedfc, West Ham United FC, West Ham, West Ham United
|
84
|
+
TXT
|
85
|
+
|
86
|
+
|
87
|
+
clubs = Club.read( clubs_txt )
|
88
|
+
pp clubs
|
89
|
+
|
90
|
+
lines = txt.split( /\n+/ ) # note: removes/strips empty lines
|
91
|
+
pp lines
|
92
|
+
|
93
|
+
start_at= Date.new( 2017, 7, 1 )
|
94
|
+
|
95
|
+
|
96
|
+
parser = SportDb::MatchParserSimpleV2.new( lines, clubs, start_at )
|
97
|
+
rounds, matches = parser.parse
|
98
|
+
pp rounds
|
99
|
+
pp matches
|
100
|
+
end # method test_parse
|
101
|
+
end # class TestMatchParser
|
data/test/test_reader.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestReader < MiniTest::Test
|
12
|
+
|
13
|
+
def test_read
|
14
|
+
|
15
|
+
SportDb.connect( adapter: 'sqlite3', database: ':memory:' )
|
16
|
+
SportDb.create_all ## build schema
|
17
|
+
|
18
|
+
## turn on logging to console
|
19
|
+
ActiveRecord::Base.logger = Logger.new(STDOUT)
|
20
|
+
|
21
|
+
|
22
|
+
path = "../../../openfootball/england/2015-16/.conf.txt"
|
23
|
+
# path = "../../../openfootball/england/2017-18/.conf.txt"
|
24
|
+
# path = "../../../openfootball/england/2018-19/.conf.txt"
|
25
|
+
# path = "../../../openfootball/england/2019-20/.conf.txt"
|
26
|
+
recs = SportDb::EventReaderV2.read( path )
|
27
|
+
path = "../../../openfootball/england/2015-16/1-premierleague-i.txt"
|
28
|
+
# path = "../../../openfootball/england/2017-18/1-premierleague-i.txt"
|
29
|
+
# path = "../../../openfootball/england/2018-19/1-premierleague.txt"
|
30
|
+
# path = "../../../openfootball/england/2019-20/1-premierleague.txt"
|
31
|
+
recs = SportDb::MatchReaderV2.read( path )
|
32
|
+
# path = "../../../openfootball/england/2017-18/1-premierleague-ii.txt"
|
33
|
+
#recs = SportDb::MatchReaderV2.read( path )
|
34
|
+
end # method test_read
|
35
|
+
end # class TestReader
|