sportdb-formats 1.1.6 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +53 -9
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +130 -13
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,183 +1,184 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class EventInfo
|
7
|
-
## "high level" info (summary) about event
|
8
|
-
##
|
9
|
-
##
|
10
|
-
## -
|
11
|
-
## -
|
12
|
-
## -
|
13
|
-
##
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@
|
31
|
-
|
32
|
-
|
33
|
-
@
|
34
|
-
|
35
|
-
|
36
|
-
@
|
37
|
-
@
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
##
|
43
|
-
## - add
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
if
|
48
|
-
|
49
|
-
date
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
##
|
59
|
-
|
60
|
-
date
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
###
|
108
|
-
|
109
|
-
|
110
|
-
##
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
dates << DateFormats.parse( parts[
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
pp
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
end ## module
|
183
|
-
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class EventInfo
|
7
|
+
## "high level" info (summary) about event
|
8
|
+
## (like a "wikipedia infobox")
|
9
|
+
## use for checking dataset imports; lets you check e.g.
|
10
|
+
## - dates within range
|
11
|
+
## - number of teams e.g. 20
|
12
|
+
## - matches played e.g. 380
|
13
|
+
## - goals scored e.g. 937
|
14
|
+
## etc.
|
15
|
+
|
16
|
+
attr_reader :league,
|
17
|
+
:season,
|
18
|
+
:teams,
|
19
|
+
:matches,
|
20
|
+
:goals,
|
21
|
+
:start_date,
|
22
|
+
:end_date
|
23
|
+
|
24
|
+
def initialize( league:, season:,
|
25
|
+
start_date: nil, end_date: nil,
|
26
|
+
teams: nil,
|
27
|
+
matches: nil,
|
28
|
+
goals: nil )
|
29
|
+
|
30
|
+
@league = league
|
31
|
+
@season = season
|
32
|
+
|
33
|
+
@start_date = start_date
|
34
|
+
@end_date = end_date
|
35
|
+
|
36
|
+
@teams = teams ## todo/check: rename/use teams_count ??
|
37
|
+
@matches = matches ## todo/check: rename/use match_count ??
|
38
|
+
@goals = goals
|
39
|
+
end
|
40
|
+
|
41
|
+
def include?( date )
|
42
|
+
## todo/fix: add options e.g.
|
43
|
+
## - add delta/off_by_one or such?
|
44
|
+
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
45
|
+
|
46
|
+
### note: for now allow off by one error (via timezone/local time errors)
|
47
|
+
## todo/fix: issue warning if off by one!!!!
|
48
|
+
if @start_date && @end_date
|
49
|
+
date >= (@start_date-1) &&
|
50
|
+
date <= (@end_date+1)
|
51
|
+
else
|
52
|
+
if @season.year?
|
53
|
+
# assume generic rule
|
54
|
+
## same year e.g. Jan 1 - Dec 31; always true for now
|
55
|
+
date.year == @season.start_year
|
56
|
+
else
|
57
|
+
# assume generic rule
|
58
|
+
## July 1 - June 30 (Y+1)
|
59
|
+
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
60
|
+
date >= Date.new( @season.start_year, 7, 1 ) &&
|
61
|
+
date <= Date.new( @season.end_year, 6, 30 )
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end # method include?
|
65
|
+
alias_method :between?, :include?
|
66
|
+
end # class EventInfo
|
67
|
+
|
68
|
+
|
69
|
+
class EventInfoReader
|
70
|
+
def catalog() Import.catalog; end
|
71
|
+
|
72
|
+
|
73
|
+
def self.read( path )
|
74
|
+
txt = File.open( path, 'r:utf-8') {|f| f.read }
|
75
|
+
new( txt ).parse
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.parse( txt )
|
79
|
+
new( txt ).parse
|
80
|
+
end
|
81
|
+
|
82
|
+
def initialize( txt )
|
83
|
+
@txt = txt
|
84
|
+
end
|
85
|
+
|
86
|
+
def parse
|
87
|
+
recs = []
|
88
|
+
|
89
|
+
parse_csv( @txt ).each do |row|
|
90
|
+
league_col = row['League']
|
91
|
+
season_col = row['Season'] || row['Year']
|
92
|
+
dates_col = row['Dates']
|
93
|
+
|
94
|
+
season = Season.parse( season_col )
|
95
|
+
league = catalog.leagues.find!( league_col )
|
96
|
+
|
97
|
+
|
98
|
+
dates = []
|
99
|
+
if dates_col.nil? || dates_col.empty?
|
100
|
+
## do nothing; no dates - keep dates array empty
|
101
|
+
else
|
102
|
+
## squish spaces
|
103
|
+
dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
|
104
|
+
|
105
|
+
puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
|
106
|
+
|
107
|
+
### todo/check: check what parts "Aug 15" return ???
|
108
|
+
### short form for "Aug 15 -" - works?
|
109
|
+
|
110
|
+
## todo/fix!!! - check EventInfo.include?
|
111
|
+
## now allow dates with only start_date too!! (WITHOUT end_date)
|
112
|
+
parts = dates_col.split( /[ ]*[–-][ ]*/ )
|
113
|
+
if parts.size == 1
|
114
|
+
pp parts
|
115
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
116
|
+
pp dates
|
117
|
+
elsif parts.size == 2
|
118
|
+
pp parts
|
119
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
120
|
+
dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
|
121
|
+
pp dates
|
122
|
+
|
123
|
+
## assert/check if period is less than 365 days for now
|
124
|
+
diff = dates[1].to_date.jd - dates[0].to_date.jd
|
125
|
+
puts "#{diff}d"
|
126
|
+
if diff > 365
|
127
|
+
puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
|
128
|
+
exit 1
|
129
|
+
end
|
130
|
+
else
|
131
|
+
puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
|
132
|
+
pp dates_col
|
133
|
+
pp parts
|
134
|
+
exit 1
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
teams_col = row['Clubs'] || row['Teams']
|
140
|
+
goals_col = row['Goals']
|
141
|
+
|
142
|
+
## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
|
143
|
+
teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
|
144
|
+
goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
|
145
|
+
|
146
|
+
teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
|
147
|
+
goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
|
148
|
+
|
149
|
+
matches_col = row['Matches']
|
150
|
+
## note: support additions in matches (played) e.g.
|
151
|
+
# 132 + 63 Play-off-Spiele
|
152
|
+
matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
|
153
|
+
|
154
|
+
matches = if matches_col.nil? || matches_col.empty?
|
155
|
+
nil
|
156
|
+
else
|
157
|
+
if matches_col.index( '+' ) ### check for calculations
|
158
|
+
## note: for now only supports additions
|
159
|
+
matches_col.split( '+' ).reduce( 0 ) do |sum,str|
|
160
|
+
sum + str.to_i
|
161
|
+
end
|
162
|
+
else ## assume single (integer) number
|
163
|
+
matches_col.to_i
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
rec = EventInfo.new( league: league,
|
168
|
+
season: season,
|
169
|
+
start_date: dates[0],
|
170
|
+
end_date: dates[1],
|
171
|
+
teams: teams,
|
172
|
+
matches: matches,
|
173
|
+
goals: goals
|
174
|
+
)
|
175
|
+
recs << rec
|
176
|
+
end # each row
|
177
|
+
recs
|
178
|
+
end # method parse
|
179
|
+
end # class EventInfoReader
|
180
|
+
|
181
|
+
|
182
|
+
end ## module Import
|
183
|
+
end ## module SportDb
|
184
|
+
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module SportDb
|
4
3
|
|
@@ -15,6 +14,15 @@ class GoalsPlayerStruct
|
|
15
14
|
def initialize
|
16
15
|
@minutes = []
|
17
16
|
end
|
17
|
+
|
18
|
+
def pretty_print( printer )
|
19
|
+
buf = String.new
|
20
|
+
buf << "<GoalsPlayerStruct: #{@name} "
|
21
|
+
buf << @minutes.pretty_print_inspect
|
22
|
+
buf << ">"
|
23
|
+
|
24
|
+
printer.text( buf )
|
25
|
+
end
|
18
26
|
end
|
19
27
|
|
20
28
|
|
@@ -27,6 +35,18 @@ class GoalsMinuteStruct
|
|
27
35
|
@penalty = false
|
28
36
|
@owngoal = false
|
29
37
|
end
|
38
|
+
|
39
|
+
def pretty_print( printer )
|
40
|
+
buf = String.new
|
41
|
+
buf << "<GoalsMinuteStruct: #{@minute}"
|
42
|
+
buf << "+#{@offset}" if @offset && @offset > 0
|
43
|
+
buf << "'"
|
44
|
+
buf << " (o.g.)" if @owngoal
|
45
|
+
buf << " (pen.)" if @penalty
|
46
|
+
buf << ">"
|
47
|
+
|
48
|
+
printer.text( buf )
|
49
|
+
end
|
30
50
|
end
|
31
51
|
|
32
52
|
|
@@ -59,6 +79,22 @@ class GoalStruct
|
|
59
79
|
def state
|
60
80
|
[@name, @team, @minute, @offset, @penalty, @owngoal, @score1, @score2]
|
61
81
|
end
|
82
|
+
|
83
|
+
|
84
|
+
def pretty_print( printer )
|
85
|
+
buf = String.new
|
86
|
+
buf << "<GoalStruct: #{@score1}-#{@score2} #{@name} #{@minute}"
|
87
|
+
buf << "+#{@offset}" if @offset && @offset > 0
|
88
|
+
buf << "'"
|
89
|
+
buf << " (o.g.)" if @owngoal
|
90
|
+
buf << " (pen.)" if @penalty
|
91
|
+
buf << " for #{@team}" ### team 1 or 2 - use home/away
|
92
|
+
buf << ">"
|
93
|
+
|
94
|
+
printer.text( buf )
|
95
|
+
end
|
96
|
+
|
97
|
+
|
62
98
|
end
|
63
99
|
|
64
100
|
|
@@ -173,6 +209,10 @@ class GoalsParser
|
|
173
209
|
include LogUtils::Logging
|
174
210
|
|
175
211
|
|
212
|
+
### todo/fix:
|
213
|
+
## let's use stringscanner for parsing line - why? why not?
|
214
|
+
|
215
|
+
|
176
216
|
# note: use ^ for start of string only!!!
|
177
217
|
# - for now slurp everything up to digits (inlc. spaces - use strip to remove)
|
178
218
|
# todo/check: use/rename to NAME_UNTIL_REGEX ??? ( add lookahead for spaces?)
|
@@ -184,18 +224,21 @@ class GoalsParser
|
|
184
224
|
# todo/check: change to MINUTE_REGEX ??
|
185
225
|
# add MINUTE_SKIP_REGEX or MINUTE_SEP_REGEX /^[ ,]+/
|
186
226
|
# todo/fix: split out penalty and owngoal flag in PATTERN constant for reuse
|
227
|
+
# note - offset 90+10 possible!!!!
|
228
|
+
# note - allow p/pen./pen or o.g. or og
|
187
229
|
MINUTES_REGEX = /^ # note: use ^ for start of string only!!!
|
188
230
|
(?<minute>[0-9]{1,3})
|
189
231
|
(?:\+
|
190
|
-
(?<offset>[
|
232
|
+
(?<offset>[0-9]{1,2})
|
191
233
|
)?
|
192
234
|
'
|
193
235
|
(?:[ ]*
|
194
236
|
\(
|
195
|
-
(?<type>
|
237
|
+
(?<type>p|pen\.?|
|
238
|
+
og|o\.g\.)
|
196
239
|
\)
|
197
240
|
)?
|
198
|
-
/
|
241
|
+
/ix
|
199
242
|
|
200
243
|
|
201
244
|
|
@@ -220,22 +263,21 @@ class GoalsParser
|
|
220
263
|
player = GoalsPlayerStruct.new
|
221
264
|
player.name = name
|
222
265
|
|
223
|
-
minute_hash =
|
224
|
-
while minute_hash
|
266
|
+
minute_hash = nil
|
267
|
+
while minute_hash=get_minute_hash!( line ) ## note: returns nil if no (regex) match
|
225
268
|
logger.debug " found minutes >#{minute_hash.inspect}< - remaining >#{line}<"
|
226
269
|
|
227
270
|
minute = GoalsMinuteStruct.new
|
228
271
|
minute.minute = minute_hash[:minute].to_i
|
229
272
|
minute.offset = minute_hash[:offset].to_i if minute_hash[:offset]
|
230
273
|
if minute_hash[:type]
|
231
|
-
minute.owngoal = true if minute_hash[:type] =~ /o\.g\./
|
232
|
-
minute.penalty = true if minute_hash[:type] =~ /
|
274
|
+
minute.owngoal = true if minute_hash[:type] =~ /og|o\.g\./i
|
275
|
+
minute.penalty = true if minute_hash[:type] =~ /p|pen\.?/i
|
233
276
|
end
|
234
277
|
player.minutes << minute
|
235
278
|
|
236
279
|
# remove commas and spaces (note: use ^ for start of string only!!!)
|
237
280
|
line.sub!( /^[ ,]+/, '' )
|
238
|
-
minute_hash = get_minute_hash!( line )
|
239
281
|
end
|
240
282
|
|
241
283
|
players << player
|
@@ -261,6 +303,8 @@ private
|
|
261
303
|
m = MINUTES_REGEX.match( line ) # note: use ^ for start of string only!!!
|
262
304
|
if m
|
263
305
|
h = {}
|
306
|
+
## todo/fix - hash conversion no longer need in ruby 3+!!
|
307
|
+
## double check - and remove (simplify) !!!!
|
264
308
|
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
265
309
|
m.names.each { |n| h[n.to_sym] = m[n] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
266
310
|
|