sportdb-formats 1.0.5 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +8 -11
- data/Rakefile +1 -1
- data/lib/sportdb/formats.rb +19 -0
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +141 -0
- data/lib/sportdb/formats/event/event_reader.rb +183 -0
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +27 -7
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +141 -193
- data/lib/sportdb/formats/match/match_parser_csv.rb +169 -25
- data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
- data/lib/sportdb/formats/name_helper.rb +4 -1
- data/lib/sportdb/formats/package.rb +57 -9
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_formats.rb +19 -0
- data/lib/sportdb/formats/score/score_parser.rb +10 -2
- data/lib/sportdb/formats/season_utils.rb +0 -11
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/match.rb +7 -1
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/season.rb +114 -45
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +8 -2
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +138 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +48 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_country_reader.rb +2 -2
- data/test/test_datafile_package.rb +1 -1
- data/test/test_match_status_parser.rb +49 -0
- data/test/test_regex.rb +25 -7
- data/test/test_scores.rb +2 -0
- data/test/test_season.rb +68 -19
- metadata +12 -15
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_worldcup.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 666fc1f16808dddbc988aa073c0d3b47c08a5934
|
4
|
+
data.tar.gz: 97437435e1f37341c9f2cc15c8b4e67adf0efe61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9498c5e378feab9fa1e80a8287f33ab608e606ecde6ae914997483850faffbbbc7695ef07d2cea1e04a529e724b01612e8231fe8625c19d0af34f3fabffd0a14
|
7
|
+
data.tar.gz: 28146d5c35a061f6c128a001c190681eb76de5471cde8124435e77d93ccca090bf0a84a7a6a09ccb67372f8edef0198e8f09390126629d14072901ff5fcb7743
|
data/Manifest.txt
CHANGED
@@ -8,6 +8,8 @@ lib/sportdb/formats/country/country_index.rb
|
|
8
8
|
lib/sportdb/formats/country/country_reader.rb
|
9
9
|
lib/sportdb/formats/datafile.rb
|
10
10
|
lib/sportdb/formats/datafile_package.rb
|
11
|
+
lib/sportdb/formats/event/event_index.rb
|
12
|
+
lib/sportdb/formats/event/event_reader.rb
|
11
13
|
lib/sportdb/formats/goals.rb
|
12
14
|
lib/sportdb/formats/league/league_index.rb
|
13
15
|
lib/sportdb/formats/league/league_outline_reader.rb
|
@@ -18,6 +20,7 @@ lib/sportdb/formats/match/mapper_teams.rb
|
|
18
20
|
lib/sportdb/formats/match/match_parser.rb
|
19
21
|
lib/sportdb/formats/match/match_parser_auto_conf.rb
|
20
22
|
lib/sportdb/formats/match/match_parser_csv.rb
|
23
|
+
lib/sportdb/formats/match/match_status_parser.rb
|
21
24
|
lib/sportdb/formats/name_helper.rb
|
22
25
|
lib/sportdb/formats/outline_reader.rb
|
23
26
|
lib/sportdb/formats/package.rb
|
@@ -36,7 +39,9 @@ lib/sportdb/formats/structs/standings.rb
|
|
36
39
|
lib/sportdb/formats/structs/team.rb
|
37
40
|
lib/sportdb/formats/structs/team_usage.rb
|
38
41
|
lib/sportdb/formats/team/club_index.rb
|
42
|
+
lib/sportdb/formats/team/club_index_history.rb
|
39
43
|
lib/sportdb/formats/team/club_reader.rb
|
44
|
+
lib/sportdb/formats/team/club_reader_history.rb
|
40
45
|
lib/sportdb/formats/team/club_reader_props.rb
|
41
46
|
lib/sportdb/formats/team/national_team_index.rb
|
42
47
|
lib/sportdb/formats/team/team_index.rb
|
@@ -44,14 +49,13 @@ lib/sportdb/formats/team/wiki_reader.rb
|
|
44
49
|
lib/sportdb/formats/version.rb
|
45
50
|
test/helper.rb
|
46
51
|
test/test_club_index.rb
|
52
|
+
test/test_club_index_history.rb
|
47
53
|
test/test_club_reader.rb
|
54
|
+
test/test_club_reader_history.rb
|
48
55
|
test/test_club_reader_props.rb
|
49
56
|
test/test_clubs.rb
|
50
|
-
test/test_conf.rb
|
51
57
|
test/test_country_index.rb
|
52
58
|
test/test_country_reader.rb
|
53
|
-
test/test_csv_match_parser.rb
|
54
|
-
test/test_csv_match_parser_utils.rb
|
55
59
|
test/test_csv_reader.rb
|
56
60
|
test/test_datafile.rb
|
57
61
|
test/test_datafile_package.rb
|
@@ -60,14 +64,7 @@ test/test_league_index.rb
|
|
60
64
|
test/test_league_outline_reader.rb
|
61
65
|
test/test_league_reader.rb
|
62
66
|
test/test_match.rb
|
63
|
-
test/
|
64
|
-
test/test_match_auto_champs.rb
|
65
|
-
test/test_match_auto_euro.rb
|
66
|
-
test/test_match_auto_worldcup.rb
|
67
|
-
test/test_match_champs.rb
|
68
|
-
test/test_match_eng.rb
|
69
|
-
test/test_match_euro.rb
|
70
|
-
test/test_match_worldcup.rb
|
67
|
+
test/test_match_status_parser.rb
|
71
68
|
test/test_name_helper.rb
|
72
69
|
test/test_outline_reader.rb
|
73
70
|
test/test_package.rb
|
data/Rakefile
CHANGED
data/lib/sportdb/formats.rb
CHANGED
@@ -75,10 +75,12 @@ require 'sportdb/formats/goals'
|
|
75
75
|
|
76
76
|
require 'sportdb/formats/match/mapper'
|
77
77
|
require 'sportdb/formats/match/mapper_teams'
|
78
|
+
require 'sportdb/formats/match/match_status_parser'
|
78
79
|
require 'sportdb/formats/match/match_parser'
|
79
80
|
require 'sportdb/formats/match/match_parser_auto_conf'
|
80
81
|
require 'sportdb/formats/match/conf_parser'
|
81
82
|
|
83
|
+
|
82
84
|
require 'sportdb/formats/match/match_parser_csv'
|
83
85
|
|
84
86
|
require 'sportdb/formats/country/country_reader'
|
@@ -119,6 +121,9 @@ require 'sportdb/formats/team/wiki_reader'
|
|
119
121
|
require 'sportdb/formats/team/national_team_index'
|
120
122
|
require 'sportdb/formats/team/team_index'
|
121
123
|
|
124
|
+
require 'sportdb/formats/team/club_reader_history'
|
125
|
+
require 'sportdb/formats/team/club_index_history'
|
126
|
+
|
122
127
|
|
123
128
|
###
|
124
129
|
# add convenience helpers / shortcuts
|
@@ -136,6 +141,20 @@ end # module Import
|
|
136
141
|
end # module SportDb
|
137
142
|
|
138
143
|
|
144
|
+
require 'sportdb/formats/event/event_reader'
|
145
|
+
require 'sportdb/formats/event/event_index'
|
146
|
+
|
147
|
+
## add convenience helper
|
148
|
+
module SportDb
|
149
|
+
module Import
|
150
|
+
class EventInfo
|
151
|
+
def self.read( path ) EventInfoReader.read( path ); end
|
152
|
+
def self.parse( txt ) EventInfoReader.parse( txt ); end
|
153
|
+
end # class EventInfo
|
154
|
+
end # module Import
|
155
|
+
end # module SportDb
|
156
|
+
|
157
|
+
|
139
158
|
|
140
159
|
|
141
160
|
|
@@ -109,12 +109,12 @@ class CountryIndex
|
|
109
109
|
@countries_by_name[ name ]
|
110
110
|
end
|
111
111
|
|
112
|
-
def
|
112
|
+
def find( key )
|
113
113
|
country = find_by_code( key )
|
114
114
|
country = find_by_name( key ) if country.nil? ## try lookup / find by (normalized) name
|
115
115
|
country
|
116
116
|
end
|
117
|
-
alias_method :
|
117
|
+
alias_method :[], :find
|
118
118
|
|
119
119
|
|
120
120
|
###
|
@@ -0,0 +1,141 @@
|
|
1
|
+
module SportDb
|
2
|
+
module Import
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
class EventIndex
|
7
|
+
|
8
|
+
def self.build( path )
|
9
|
+
pack = Package.new( path ) ## lets us use direcotry or zip archive
|
10
|
+
|
11
|
+
recs = []
|
12
|
+
pack.each_seasons do |entry|
|
13
|
+
recs += EventInfoReader.parse( entry.read )
|
14
|
+
end
|
15
|
+
recs
|
16
|
+
|
17
|
+
index = new
|
18
|
+
index.add( recs )
|
19
|
+
index
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
attr_reader :events
|
25
|
+
def initialize
|
26
|
+
@events = []
|
27
|
+
@leagues = {}
|
28
|
+
end
|
29
|
+
|
30
|
+
def add( recs )
|
31
|
+
@events += recs ## add to "linear" records
|
32
|
+
|
33
|
+
recs.each do |rec|
|
34
|
+
league = rec.league
|
35
|
+
season = rec.season
|
36
|
+
|
37
|
+
seasons = @leagues[ league.key ] ||= {}
|
38
|
+
seasons[season.key] = rec
|
39
|
+
end
|
40
|
+
## build search index by leagues (and season)
|
41
|
+
end
|
42
|
+
|
43
|
+
def find_by( league:, season: )
|
44
|
+
league_key = league.is_a?( String ) ? league : league.key
|
45
|
+
season_key = season.is_a?( String ) ? season : season.key
|
46
|
+
|
47
|
+
seasons = @leagues[ league_key ]
|
48
|
+
if seasons
|
49
|
+
seasons[ season_key ]
|
50
|
+
else
|
51
|
+
nil
|
52
|
+
end
|
53
|
+
end # method find_by
|
54
|
+
end ## class EventIndex
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
class SeasonIndex
|
59
|
+
def initialize( *args )
|
60
|
+
@leagues = {} ## use a league hash by years for now; change later
|
61
|
+
|
62
|
+
if args.size == 1 && args[0].is_a?( EventIndex )
|
63
|
+
## convenience setup/hookup
|
64
|
+
## (auto-)add all events from event index
|
65
|
+
add( args[0].events )
|
66
|
+
else
|
67
|
+
pp args
|
68
|
+
raise ArgumentError.new( 'unsupported arguments' )
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def add( recs )
|
73
|
+
## use a lookup index by year for now
|
74
|
+
## todo - find something better/more generic for searching/matching date periods!!!
|
75
|
+
recs.each do |rec|
|
76
|
+
league = rec.league
|
77
|
+
season = rec.season
|
78
|
+
|
79
|
+
years = @leagues[ league.key ] ||= {}
|
80
|
+
if season.year?
|
81
|
+
years[season.start_year] ||= []
|
82
|
+
years[season.start_year] << rec
|
83
|
+
else
|
84
|
+
years[season.start_year] ||= []
|
85
|
+
years[season.end_year] ||= []
|
86
|
+
years[season.start_year] << rec
|
87
|
+
years[season.end_year] << rec
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end # method add
|
91
|
+
|
92
|
+
def find_by( date:, league: )
|
93
|
+
date = Date.strptime( date, '%Y-%m-%d' ) if date.is_a?( String )
|
94
|
+
league_key = league.is_a?( String ) ? league : league.key
|
95
|
+
|
96
|
+
years = @leagues[ league_key ]
|
97
|
+
if years
|
98
|
+
year = years[ date.year ]
|
99
|
+
if year
|
100
|
+
season_key = nil
|
101
|
+
year.each do |event|
|
102
|
+
## todo/check: rename/use between? instead of include? - why? why not?
|
103
|
+
if event.include?( date )
|
104
|
+
season_key = event.season.key
|
105
|
+
break
|
106
|
+
end
|
107
|
+
end
|
108
|
+
if season_key.nil?
|
109
|
+
puts "!! WARN: date >#{date}< out-of-seasons for year #{date.year} in league #{league_key}:"
|
110
|
+
year.each do |event|
|
111
|
+
puts " #{event.season.key} | #{event.start_date} - #{event.end_date}"
|
112
|
+
end
|
113
|
+
## retry again and pick season with "overflow" at the end (date is great end_date)
|
114
|
+
year.each do |event|
|
115
|
+
if date > event.end_date
|
116
|
+
diff_in_days = date.to_date.jd - event.end_date.to_date.jd
|
117
|
+
puts " +#{diff_in_days} days - adding overflow to #{event.season.key} ending on #{event.end_date} ++ #{date}"
|
118
|
+
season_key = event.season.key
|
119
|
+
break
|
120
|
+
end
|
121
|
+
end
|
122
|
+
## exit now for sure - if still empty!!!!
|
123
|
+
if season_key.nil?
|
124
|
+
puts "!! ERROR: CANNOT auto-fix / (auto-)append date at the end of an event; check season setup - sorry"
|
125
|
+
exit 1
|
126
|
+
end
|
127
|
+
end
|
128
|
+
season_key
|
129
|
+
else
|
130
|
+
nil ## no year defined / found for league
|
131
|
+
end
|
132
|
+
else
|
133
|
+
nil ## no league defined / found
|
134
|
+
end
|
135
|
+
end # method find
|
136
|
+
|
137
|
+
end # class SeasonIndex
|
138
|
+
|
139
|
+
|
140
|
+
end # module Import
|
141
|
+
end # module SportDb
|
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class EventInfo
|
7
|
+
## "high level" info (summary) about event (like a "wikipedia infobox")
|
8
|
+
## use for checking dataset imports; lets you check e.g.
|
9
|
+
## - dates within range
|
10
|
+
## - number of teams e.g. 20
|
11
|
+
## - matches played e.g. 380
|
12
|
+
## - goals scored e.g. 937
|
13
|
+
## etc.
|
14
|
+
|
15
|
+
attr_reader :league,
|
16
|
+
:season,
|
17
|
+
:teams,
|
18
|
+
:matches,
|
19
|
+
:goals,
|
20
|
+
:start_date,
|
21
|
+
:end_date
|
22
|
+
|
23
|
+
def initialize( league:, season:,
|
24
|
+
start_date: nil, end_date: nil,
|
25
|
+
teams: nil,
|
26
|
+
matches: nil,
|
27
|
+
goals: nil )
|
28
|
+
|
29
|
+
@league = league
|
30
|
+
@season = season
|
31
|
+
|
32
|
+
@start_date = start_date
|
33
|
+
@end_date = end_date
|
34
|
+
|
35
|
+
@teams = teams ## todo/check: rename/use teams_count ??
|
36
|
+
@matches = matches ## todo/check: rename/use match_count ??
|
37
|
+
@goals = goals
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?( date )
|
41
|
+
## todo/fix: add options e.g.
|
42
|
+
## - add delta/off_by_one or such?
|
43
|
+
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
44
|
+
|
45
|
+
### note: for now allow off by one error (via timezone/local time errors)
|
46
|
+
## todo/fix: issue warning if off by one!!!!
|
47
|
+
if @start_date && @end_date
|
48
|
+
date >= (@start_date-1) &&
|
49
|
+
date <= (@end_date+1)
|
50
|
+
else
|
51
|
+
if @season.year?
|
52
|
+
# assume generic rule
|
53
|
+
## same year e.g. Jan 1 - Dec 31; always true for now
|
54
|
+
date.year == @season.start_year
|
55
|
+
else
|
56
|
+
# assume generic rule
|
57
|
+
## July 1 - June 30 (Y+1)
|
58
|
+
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
59
|
+
date >= Date.new( @season.start_year, 7, 1 ) &&
|
60
|
+
date <= Date.new( @season.end_year, 6, 30 )
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end # method include?
|
64
|
+
alias_method :between?, :include?
|
65
|
+
end # class EventInfo
|
66
|
+
|
67
|
+
|
68
|
+
class EventInfoReader
|
69
|
+
def catalog() Import.catalog; end
|
70
|
+
|
71
|
+
|
72
|
+
def self.read( path )
|
73
|
+
txt = File.open( path, 'r:utf-8') {|f| f.read }
|
74
|
+
new( txt ).parse
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.parse( txt )
|
78
|
+
new( txt ).parse
|
79
|
+
end
|
80
|
+
|
81
|
+
def initialize( txt )
|
82
|
+
@txt = txt
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
recs = []
|
87
|
+
|
88
|
+
parse_csv( @txt ).each do |row|
|
89
|
+
league_col = row['League']
|
90
|
+
season_col = row['Season'] || row['Year']
|
91
|
+
dates_col = row['Dates']
|
92
|
+
|
93
|
+
season = Import::Season.new( season_col )
|
94
|
+
league = catalog.leagues.find!( league_col )
|
95
|
+
|
96
|
+
|
97
|
+
dates = []
|
98
|
+
if dates_col.nil? || dates_col.empty?
|
99
|
+
## do nothing; no dates - keep dates array empty
|
100
|
+
else
|
101
|
+
## squish spaces
|
102
|
+
dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
|
103
|
+
|
104
|
+
puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
|
105
|
+
|
106
|
+
### todo/check: check what parts "Aug 15" return ???
|
107
|
+
### short form for "Aug 15 -" - works?
|
108
|
+
|
109
|
+
## todo/fix!!! - check EventInfo.include?
|
110
|
+
## now allow dates with only start_date too!! (WITHOUT end_date)
|
111
|
+
parts = dates_col.split( /[ ]*[–-][ ]*/ )
|
112
|
+
if parts.size == 1
|
113
|
+
pp parts
|
114
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
115
|
+
pp dates
|
116
|
+
elsif parts.size == 2
|
117
|
+
pp parts
|
118
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
119
|
+
dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
|
120
|
+
pp dates
|
121
|
+
|
122
|
+
## assert/check if period is less than 365 days for now
|
123
|
+
diff = dates[1].to_date.jd - dates[0].to_date.jd
|
124
|
+
puts "#{diff}d"
|
125
|
+
if diff > 365
|
126
|
+
puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
else
|
130
|
+
puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
|
131
|
+
pp dates_col
|
132
|
+
pp parts
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
teams_col = row['Clubs'] || row['Teams']
|
139
|
+
goals_col = row['Goals']
|
140
|
+
|
141
|
+
## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
|
142
|
+
teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
|
143
|
+
goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
|
144
|
+
|
145
|
+
teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
|
146
|
+
goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
|
147
|
+
|
148
|
+
matches_col = row['Matches']
|
149
|
+
## note: support additions in matches (played) e.g.
|
150
|
+
# 132 + 63 Play-off-Spiele
|
151
|
+
matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
|
152
|
+
|
153
|
+
matches = if matches_col.nil? || matches_col.empty?
|
154
|
+
nil
|
155
|
+
else
|
156
|
+
if matches_col.index( '+' ) ### check for calculations
|
157
|
+
## note: for now only supports additions
|
158
|
+
matches_col.split( '+' ).reduce( 0 ) do |sum,str|
|
159
|
+
sum + str.to_i
|
160
|
+
end
|
161
|
+
else ## assume single (integer) number
|
162
|
+
matches_col.to_i
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
rec = EventInfo.new( league: league,
|
167
|
+
season: season,
|
168
|
+
start_date: dates[0],
|
169
|
+
end_date: dates[1],
|
170
|
+
teams: teams,
|
171
|
+
matches: matches,
|
172
|
+
goals: goals
|
173
|
+
)
|
174
|
+
recs << rec
|
175
|
+
end # each row
|
176
|
+
recs
|
177
|
+
end # method parse
|
178
|
+
end # class EventInfoReader
|
179
|
+
|
180
|
+
|
181
|
+
end ## module Import
|
182
|
+
end ## module SportDb
|
183
|
+
|