sportdb-readers 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +14 -0
- data/README.md +26 -0
- data/Rakefile +31 -0
- data/lib/sportdb/readers.rb +22 -0
- data/lib/sportdb/readers/event_reader.rb +104 -0
- data/lib/sportdb/readers/match_parser.rb +466 -0
- data/lib/sportdb/readers/match_reader.rb +952 -0
- data/lib/sportdb/readers/outline_reader.rb +83 -0
- data/lib/sportdb/readers/sync.rb +208 -0
- data/lib/sportdb/readers/version.rb +25 -0
- data/test/helper.rb +21 -0
- data/test/test_match_parser.rb +101 -0
- data/test/test_reader.rb +35 -0
- metadata +118 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
|
6
|
+
## shared "higher-level" outline reader
|
7
|
+
## todo: add CountryOutlineReader - why? why not?
|
8
|
+
class LeagueOutlineReader
|
9
|
+
## split into league + season
|
10
|
+
## e.g. Österr. Bundesliga 2015/16 ## or 2015-16
|
11
|
+
## World Cup 2018
|
12
|
+
LEAGUE_SEASON_HEADING_REGEX = /^
|
13
|
+
(?<league>.+?) ## non-greedy
|
14
|
+
\s+
|
15
|
+
(?<season>\d{4}
|
16
|
+
(?:[\/-]\d{2})? ## optional 2nd year in season
|
17
|
+
)
|
18
|
+
$/x
|
19
|
+
|
20
|
+
def self.parse( txt )
|
21
|
+
recs=[]
|
22
|
+
OutlineReader.parse( txt ).each do |node|
|
23
|
+
if node[0] == :h1
|
24
|
+
## check for league and season
|
25
|
+
heading = node[1]
|
26
|
+
if m=heading.match( LEAGUE_SEASON_HEADING_REGEX )
|
27
|
+
puts "league >#{m[:league]}<, season >#{m[:season]}<"
|
28
|
+
|
29
|
+
recs << { league: m[:league],
|
30
|
+
season: m[:season],
|
31
|
+
lines: []
|
32
|
+
}
|
33
|
+
else
|
34
|
+
puts "** !!! ERROR !!! - CANNOT match league and season in heading; season missing?"
|
35
|
+
pp heading
|
36
|
+
exit 1
|
37
|
+
end
|
38
|
+
elsif node[0] == :l ## regular (text) line
|
39
|
+
line = node[1]
|
40
|
+
## note: skip lines if no heading seen
|
41
|
+
if recs.empty?
|
42
|
+
puts "** !! WARN !! - skipping line (no heading) >#{line}<"
|
43
|
+
else
|
44
|
+
recs[-1][:lines] << line
|
45
|
+
end
|
46
|
+
else
|
47
|
+
puts "** !!! ERROR !!! unknown line type; for now only heading 1 for leagues supported; sorry:"
|
48
|
+
pp node
|
49
|
+
exit 1
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
## pass 2 - check & map; replace inline (string with data record)
|
54
|
+
recs.each do |rec|
|
55
|
+
league = find_league( rec[:league] )
|
56
|
+
rec[:league] = league
|
57
|
+
end
|
58
|
+
|
59
|
+
recs
|
60
|
+
end # method parse
|
61
|
+
|
62
|
+
|
63
|
+
def self.find_league( name )
|
64
|
+
league = nil
|
65
|
+
m = LEAGUES.match( name )
|
66
|
+
# pp m
|
67
|
+
|
68
|
+
if m.nil?
|
69
|
+
puts "** !!! ERROR !!! no league match found for >#{name}<, add to leagues table; sorry"
|
70
|
+
exit 1
|
71
|
+
elsif m.size > 1
|
72
|
+
puts "** !!! ERROR !!! ambigious league name; too many leagues (#{m.size}) found:"
|
73
|
+
pp m
|
74
|
+
exit 1
|
75
|
+
else
|
76
|
+
league = m[0]
|
77
|
+
end
|
78
|
+
|
79
|
+
league
|
80
|
+
end
|
81
|
+
end # class LeagueOutlineReader
|
82
|
+
|
83
|
+
end # module SportDb
|
@@ -0,0 +1,208 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
|
6
|
+
module Sync
|
7
|
+
class Country
|
8
|
+
def self.find_or_create( country )
|
9
|
+
rec = WorldDb::Model::Country.find_by( key: country.key )
|
10
|
+
if rec.nil?
|
11
|
+
attribs = {
|
12
|
+
key: country.key,
|
13
|
+
name: country.name,
|
14
|
+
code: country.fifa, ## fix: uses fifa code now (should be iso-alpha3 if available)
|
15
|
+
fifa: country.fifa,
|
16
|
+
area: 1,
|
17
|
+
pop: 1
|
18
|
+
}
|
19
|
+
rec = WorldDb::Model::Country.create!( attribs )
|
20
|
+
end
|
21
|
+
rec
|
22
|
+
end
|
23
|
+
end # class Country
|
24
|
+
|
25
|
+
|
26
|
+
class League
|
27
|
+
def self.find( league )
|
28
|
+
SportDb::Model::League.find_by( key: league.key )
|
29
|
+
end
|
30
|
+
def self.find!( league )
|
31
|
+
rec = find( league )
|
32
|
+
if rec.nil?
|
33
|
+
puts "** !!!ERROR!!! db sync - no league match found for:"
|
34
|
+
pp league
|
35
|
+
exit 1
|
36
|
+
end
|
37
|
+
rec
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.find_or_create( league )
|
41
|
+
rec = find( league )
|
42
|
+
if rec.nil?
|
43
|
+
## use title and not name - why? why not?
|
44
|
+
## quick fix: change name to title
|
45
|
+
attribs = { key: league.key,
|
46
|
+
title: league.name }
|
47
|
+
if league.country
|
48
|
+
attribs[ :country_id ] = Country.find_or_create( league.country ).id
|
49
|
+
end
|
50
|
+
|
51
|
+
rec = SportDb::Model::League.create!( attribs )
|
52
|
+
end
|
53
|
+
rec
|
54
|
+
end
|
55
|
+
end # class League
|
56
|
+
|
57
|
+
class Season
|
58
|
+
def self.normalize_key( key ) ## helper for season key (rename to norm_key ???)
|
59
|
+
## note: "normalize" season key
|
60
|
+
## always use 2017/18 (and not 2017-18 or 2017-2018 or 2017/2018)
|
61
|
+
## 1) change 2017-18 to 2017/18
|
62
|
+
key = key.tr( '-', '/' )
|
63
|
+
## 2) check for 2017/2018 - change to 2017/18
|
64
|
+
if key.length == 9
|
65
|
+
key = "#{key[0..3]}/#{key[7..8]}"
|
66
|
+
end
|
67
|
+
key
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.find( key )
|
71
|
+
key = normalize_key( key )
|
72
|
+
SportDb::Model::Season.find_by( key: key )
|
73
|
+
end
|
74
|
+
def self.find!( key )
|
75
|
+
rec = find( key )
|
76
|
+
if rec.nil?
|
77
|
+
puts "** !!!ERROR!!! db sync - no season match found for >#{normalize_key(key)}<:"
|
78
|
+
pp key
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
rec
|
82
|
+
end
|
83
|
+
|
84
|
+
def self.find_or_create( key ) ## e.g. key = '2017/18'
|
85
|
+
rec = find( key )
|
86
|
+
if rec.nil?
|
87
|
+
key = normalize_key( key ) ## note: do NOT forget to normalize key e.g. always use slash (2019/20) etc.
|
88
|
+
attribs = { key: key,
|
89
|
+
title: key }
|
90
|
+
rec = SportDb::Model::Season.create!( attribs )
|
91
|
+
end
|
92
|
+
rec
|
93
|
+
end
|
94
|
+
end # class Season
|
95
|
+
|
96
|
+
class Club
|
97
|
+
def self.find_or_create( club )
|
98
|
+
rec = SportDb::Model::Team.find_by( title: club.name )
|
99
|
+
if rec.nil?
|
100
|
+
## remove all non-ascii a-z chars
|
101
|
+
key = club.name.downcase.gsub( /[^a-z]/, '' )
|
102
|
+
puts "add club: #{key}, #{club.name}, #{club.country.name} (#{club.country.key})"
|
103
|
+
|
104
|
+
attribs = {
|
105
|
+
key: key,
|
106
|
+
title: club.name,
|
107
|
+
country_id: Country.find_or_create( club.country ).id,
|
108
|
+
club: true,
|
109
|
+
national: false ## check -is default anyway - use - why? why not?
|
110
|
+
## todo/fix: add city if present - why? why not?
|
111
|
+
}
|
112
|
+
if club.alt_names.empty? == false
|
113
|
+
attribs[:synonyms] = club.alt_names.join('|')
|
114
|
+
end
|
115
|
+
|
116
|
+
rec = SportDb::Model::Team.create!( attribs )
|
117
|
+
end
|
118
|
+
rec
|
119
|
+
end
|
120
|
+
end # class Club
|
121
|
+
|
122
|
+
class Event
|
123
|
+
def self.find( league:, season: )
|
124
|
+
SportDb::Model::Event.find_by( league_id: league.id, season_id: season.id )
|
125
|
+
end
|
126
|
+
def self.find!( league:, season: )
|
127
|
+
rec = find( league: league, season: season )
|
128
|
+
if rec.nil?
|
129
|
+
puts "** !!!ERROR!!! db sync - no event match found for:"
|
130
|
+
pp league
|
131
|
+
pp season
|
132
|
+
exit 1
|
133
|
+
end
|
134
|
+
rec
|
135
|
+
end
|
136
|
+
|
137
|
+
def self.find_or_create( league:, season: )
|
138
|
+
rec = find( league: league, season: season )
|
139
|
+
if rec.nil?
|
140
|
+
## quick hack/change later !!
|
141
|
+
## todo/fix: check season - if is length 4 (single year) use 2017, 1, 1
|
142
|
+
## otherwise use 2017, 7, 1
|
143
|
+
## start_at use year and 7,1 e.g. Date.new( 2017, 7, 1 )
|
144
|
+
## hack: fix/todo1!!
|
145
|
+
## add "fake" start_at date for now
|
146
|
+
if season.key.size == '4' ## e.g. assume 2018 etc.
|
147
|
+
year = season.key.to_i
|
148
|
+
start_at = Date.new( year, 1, 1 )
|
149
|
+
else ## assume 2014/15 etc.
|
150
|
+
year = season.key[0..3].to_i
|
151
|
+
start_at = Date.new( year, 7, 1 )
|
152
|
+
end
|
153
|
+
|
154
|
+
attribs = {
|
155
|
+
league_id: league.id,
|
156
|
+
season_id: season.id,
|
157
|
+
start_at: start_at }
|
158
|
+
|
159
|
+
rec = SportDb::Model::Event.create!( attribs )
|
160
|
+
end
|
161
|
+
rec
|
162
|
+
end
|
163
|
+
end # class Event
|
164
|
+
|
165
|
+
class Round
|
166
|
+
def self.find_or_create( round, event: )
|
167
|
+
rec = SportDb::Model::Round.find_by( title: round.title, event_id: event.id )
|
168
|
+
if rec.nil?
|
169
|
+
attribs = { event_id: event.id,
|
170
|
+
title: round.title,
|
171
|
+
pos: round.pos,
|
172
|
+
start_at: event.start_at.to_date
|
173
|
+
}
|
174
|
+
rec = SportDb::Model::Round.create!( attribs )
|
175
|
+
end
|
176
|
+
rec
|
177
|
+
end
|
178
|
+
end # class Round
|
179
|
+
|
180
|
+
class Match ## todo/check: add alias for Game class - why? why not?
|
181
|
+
def self.create_or_update( match, event: )
|
182
|
+
## note: MUST find round, thus, use bang (!)
|
183
|
+
round_rec = SportDb::Model::Round.find_by!( event_id: event.id,
|
184
|
+
title: match.round.title )
|
185
|
+
|
186
|
+
rec = SportDb::Model::Game.find_by( round_id: round_rec.id,
|
187
|
+
team1_id: match.team1.id,
|
188
|
+
team2_id: match.team2.id )
|
189
|
+
if rec.nil?
|
190
|
+
attribs = { round_id: round_rec.id,
|
191
|
+
team1_id: match.team1.id,
|
192
|
+
team2_id: match.team2.id,
|
193
|
+
pos: 999, ## make optional why? why not? - change to num?
|
194
|
+
play_at: match.date.to_date,
|
195
|
+
score1: match.score1,
|
196
|
+
score2: match.score2,
|
197
|
+
score1i: match.score1i,
|
198
|
+
score2i: match.score2i }
|
199
|
+
rec = SportDb::Model::Game.create!( attribs )
|
200
|
+
else
|
201
|
+
# update - todo
|
202
|
+
end
|
203
|
+
rec
|
204
|
+
end
|
205
|
+
end # class Match
|
206
|
+
|
207
|
+
end # module Sync
|
208
|
+
end # module SportDb
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Readers
|
6
|
+
|
7
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 0
|
9
|
+
PATCH = 1
|
10
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
|
+
|
12
|
+
def self.version
|
13
|
+
VERSION
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.banner
|
17
|
+
"sportdb-readers/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
|
22
|
+
end
|
23
|
+
|
24
|
+
end # module Readers
|
25
|
+
end # module SportDb
|
data/test/helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
## minitest setup
|
4
|
+
require 'minitest/autorun'
|
5
|
+
|
6
|
+
|
7
|
+
## our own code
|
8
|
+
require 'sportdb/readers'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
## use (switch to) "external" datasets
|
14
|
+
SportDb::Import.config.clubs_dir = "../../../openfootball/clubs"
|
15
|
+
SportDb::Import.config.leagues_dir = "../../../openfootball/leagues"
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
LEAGUES = SportDb::Import.config.leagues
|
20
|
+
CLUBS = SportDb::Import.config.clubs
|
21
|
+
COUNTRIES = SportDb::Import.config.countries
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_match_parser.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
class TestMatchParser < MiniTest::Test
|
13
|
+
|
14
|
+
## build ActiveRecord-like club records/structs
|
15
|
+
Club = Struct.new( :key, :title, :synonyms )
|
16
|
+
def Club.read( txt )
|
17
|
+
recs = []
|
18
|
+
txt.each_line do |line|
|
19
|
+
values = line.split( ',' )
|
20
|
+
values = values.map {|value| value.strip }
|
21
|
+
recs << Club.new( values[0], values[1], values.size > 2 ? values[2..-1].join('|') : nil )
|
22
|
+
end
|
23
|
+
recs
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def test_parse
|
28
|
+
txt = <<TXT
|
29
|
+
Matchday 1
|
30
|
+
|
31
|
+
[Fri Aug/11]
|
32
|
+
Arsenal FC 4-3 Leicester City
|
33
|
+
[Sat Aug/12]
|
34
|
+
Watford FC 3-3 Liverpool FC
|
35
|
+
Chelsea FC 2-3 Burnley FC
|
36
|
+
Crystal Palace 0-3 Huddersfield Town
|
37
|
+
Everton FC 1-0 Stoke City
|
38
|
+
Southampton FC 0-0 Swansea City
|
39
|
+
West Bromwich Albion 1-0 AFC Bournemouth
|
40
|
+
Brighton & Hove Albion 0-2 Manchester City
|
41
|
+
[Sun Aug/13]
|
42
|
+
Newcastle United 0-2 Tottenham Hotspur
|
43
|
+
Manchester United 4-0 West Ham United
|
44
|
+
|
45
|
+
|
46
|
+
Matchday 2
|
47
|
+
|
48
|
+
[Sat Aug/19]
|
49
|
+
Swansea City 0-4 Manchester United
|
50
|
+
AFC Bournemouth 0-2 Watford FC
|
51
|
+
Burnley FC 0-1 West Bromwich Albion
|
52
|
+
Leicester City 2-0 Brighton & Hove Albion
|
53
|
+
Liverpool FC 1-0 Crystal Palace
|
54
|
+
Southampton FC 3-2 West Ham United
|
55
|
+
Stoke City 1-0 Arsenal FC
|
56
|
+
[Sun Aug/20]
|
57
|
+
Huddersfield Town 1-0 Newcastle United
|
58
|
+
Tottenham Hotspur 1-2 Chelsea FC
|
59
|
+
[Mon Aug/21]
|
60
|
+
Manchester City 1-1 Everton FC
|
61
|
+
TXT
|
62
|
+
|
63
|
+
clubs_txt = <<TXT
|
64
|
+
arsenalfc, Arsenal FC, Arsenal, FC Arsenal
|
65
|
+
leicestercityfc, Leicester City FC, Leicester, Leicester City
|
66
|
+
watfordfc, Watford FC, Watford, FC Watford
|
67
|
+
liverpoolfc, Liverpool FC, Liverpool, FC Liverpool
|
68
|
+
chelseafc, Chelsea FC, Chelsea, FC Chelsea
|
69
|
+
burnleyfc, Burnley FC, Burnley, FC Burnley
|
70
|
+
crystalpalacefc, Crystal Palace FC, Crystal Palace, C Palace, Palace, Crystal P
|
71
|
+
huddersfieldtownafc, Huddersfield Town AFC, Huddersfield, Huddersfield Town
|
72
|
+
evertonfc, Everton FC, Everton, FC Everton
|
73
|
+
stokecityfc, Stoke City FC, Stoke, Stoke City
|
74
|
+
southamptonfc, Southampton FC, Southampton, FC Southampton
|
75
|
+
swanseacityfc, Swansea City FC, Swansea, Swansea City, Swansea City AFC
|
76
|
+
westbromwichalbionfc, West Bromwich Albion FC, West Brom, West Bromwich Albion, West Bromwich, Albion
|
77
|
+
afcbournemouth, AFC Bournemouth, Bournemouth, A.F.C. Bournemouth, Bournemouth FC
|
78
|
+
brightonhovealbionfc, Brighton & Hove Albion FC, Brighton, Brighton & Hove, Brighton & Hove Albion
|
79
|
+
manchestercityfc, Manchester City FC, Man City, Manchester City, Man. City, Manchester C
|
80
|
+
newcastleunitedfc, Newcastle United FC, Newcastle, Newcastle Utd, Newcastle United
|
81
|
+
tottenhamhotspurfc, Tottenham Hotspur FC, Tottenham, Tottenham Hotspur, Spurs
|
82
|
+
manchesterunitedfc, Manchester United FC, Man Utd, Man. United, Manchester U., Manchester Utd, Manchester United
|
83
|
+
westhamunitedfc, West Ham United FC, West Ham, West Ham United
|
84
|
+
TXT
|
85
|
+
|
86
|
+
|
87
|
+
clubs = Club.read( clubs_txt )
|
88
|
+
pp clubs
|
89
|
+
|
90
|
+
lines = txt.split( /\n+/ ) # note: removes/strips empty lines
|
91
|
+
pp lines
|
92
|
+
|
93
|
+
start_at= Date.new( 2017, 7, 1 )
|
94
|
+
|
95
|
+
|
96
|
+
parser = SportDb::MatchParserSimpleV2.new( lines, clubs, start_at )
|
97
|
+
rounds, matches = parser.parse
|
98
|
+
pp rounds
|
99
|
+
pp matches
|
100
|
+
end # method test_parse
|
101
|
+
end # class TestMatchParser
|
data/test/test_reader.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestReader < MiniTest::Test
|
12
|
+
|
13
|
+
def test_read
|
14
|
+
|
15
|
+
SportDb.connect( adapter: 'sqlite3', database: ':memory:' )
|
16
|
+
SportDb.create_all ## build schema
|
17
|
+
|
18
|
+
## turn on logging to console
|
19
|
+
ActiveRecord::Base.logger = Logger.new(STDOUT)
|
20
|
+
|
21
|
+
|
22
|
+
path = "../../../openfootball/england/2015-16/.conf.txt"
|
23
|
+
# path = "../../../openfootball/england/2017-18/.conf.txt"
|
24
|
+
# path = "../../../openfootball/england/2018-19/.conf.txt"
|
25
|
+
# path = "../../../openfootball/england/2019-20/.conf.txt"
|
26
|
+
recs = SportDb::EventReaderV2.read( path )
|
27
|
+
path = "../../../openfootball/england/2015-16/1-premierleague-i.txt"
|
28
|
+
# path = "../../../openfootball/england/2017-18/1-premierleague-i.txt"
|
29
|
+
# path = "../../../openfootball/england/2018-19/1-premierleague.txt"
|
30
|
+
# path = "../../../openfootball/england/2019-20/1-premierleague.txt"
|
31
|
+
recs = SportDb::MatchReaderV2.read( path )
|
32
|
+
# path = "../../../openfootball/england/2017-18/1-premierleague-ii.txt"
|
33
|
+
#recs = SportDb::MatchReaderV2.read( path )
|
34
|
+
end # method test_read
|
35
|
+
end # class TestReader
|