sportdb-formats 1.1.6 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +53 -9
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +130 -13
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,183 +1,184 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class EventInfo
|
7
|
-
## "high level" info (summary) about event
|
8
|
-
##
|
9
|
-
##
|
10
|
-
## -
|
11
|
-
## -
|
12
|
-
## -
|
13
|
-
##
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
:
|
18
|
-
:
|
19
|
-
:
|
20
|
-
:
|
21
|
-
:
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@
|
31
|
-
|
32
|
-
|
33
|
-
@
|
34
|
-
|
35
|
-
|
36
|
-
@
|
37
|
-
@
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
##
|
43
|
-
## - add
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
if
|
48
|
-
|
49
|
-
date
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
##
|
59
|
-
|
60
|
-
date
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
###
|
108
|
-
|
109
|
-
|
110
|
-
##
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
dates << DateFormats.parse( parts[
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
pp
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
end ## module
|
183
|
-
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class EventInfo
|
7
|
+
## "high level" info (summary) about event
|
8
|
+
## (like a "wikipedia infobox")
|
9
|
+
## use for checking dataset imports; lets you check e.g.
|
10
|
+
## - dates within range
|
11
|
+
## - number of teams e.g. 20
|
12
|
+
## - matches played e.g. 380
|
13
|
+
## - goals scored e.g. 937
|
14
|
+
## etc.
|
15
|
+
|
16
|
+
attr_reader :league,
|
17
|
+
:season,
|
18
|
+
:teams,
|
19
|
+
:matches,
|
20
|
+
:goals,
|
21
|
+
:start_date,
|
22
|
+
:end_date
|
23
|
+
|
24
|
+
def initialize( league:, season:,
|
25
|
+
start_date: nil, end_date: nil,
|
26
|
+
teams: nil,
|
27
|
+
matches: nil,
|
28
|
+
goals: nil )
|
29
|
+
|
30
|
+
@league = league
|
31
|
+
@season = season
|
32
|
+
|
33
|
+
@start_date = start_date
|
34
|
+
@end_date = end_date
|
35
|
+
|
36
|
+
@teams = teams ## todo/check: rename/use teams_count ??
|
37
|
+
@matches = matches ## todo/check: rename/use match_count ??
|
38
|
+
@goals = goals
|
39
|
+
end
|
40
|
+
|
41
|
+
def include?( date )
|
42
|
+
## todo/fix: add options e.g.
|
43
|
+
## - add delta/off_by_one or such?
|
44
|
+
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
45
|
+
|
46
|
+
### note: for now allow off by one error (via timezone/local time errors)
|
47
|
+
## todo/fix: issue warning if off by one!!!!
|
48
|
+
if @start_date && @end_date
|
49
|
+
date >= (@start_date-1) &&
|
50
|
+
date <= (@end_date+1)
|
51
|
+
else
|
52
|
+
if @season.year?
|
53
|
+
# assume generic rule
|
54
|
+
## same year e.g. Jan 1 - Dec 31; always true for now
|
55
|
+
date.year == @season.start_year
|
56
|
+
else
|
57
|
+
# assume generic rule
|
58
|
+
## July 1 - June 30 (Y+1)
|
59
|
+
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
60
|
+
date >= Date.new( @season.start_year, 7, 1 ) &&
|
61
|
+
date <= Date.new( @season.end_year, 6, 30 )
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end # method include?
|
65
|
+
alias_method :between?, :include?
|
66
|
+
end # class EventInfo
|
67
|
+
|
68
|
+
|
69
|
+
class EventInfoReader
|
70
|
+
def catalog() Import.catalog; end
|
71
|
+
|
72
|
+
|
73
|
+
def self.read( path )
|
74
|
+
txt = File.open( path, 'r:utf-8') {|f| f.read }
|
75
|
+
new( txt ).parse
|
76
|
+
end
|
77
|
+
|
78
|
+
def self.parse( txt )
|
79
|
+
new( txt ).parse
|
80
|
+
end
|
81
|
+
|
82
|
+
def initialize( txt )
|
83
|
+
@txt = txt
|
84
|
+
end
|
85
|
+
|
86
|
+
def parse
|
87
|
+
recs = []
|
88
|
+
|
89
|
+
parse_csv( @txt ).each do |row|
|
90
|
+
league_col = row['League']
|
91
|
+
season_col = row['Season'] || row['Year']
|
92
|
+
dates_col = row['Dates']
|
93
|
+
|
94
|
+
season = Season.parse( season_col )
|
95
|
+
league = catalog.leagues.find!( league_col )
|
96
|
+
|
97
|
+
|
98
|
+
dates = []
|
99
|
+
if dates_col.nil? || dates_col.empty?
|
100
|
+
## do nothing; no dates - keep dates array empty
|
101
|
+
else
|
102
|
+
## squish spaces
|
103
|
+
dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
|
104
|
+
|
105
|
+
puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
|
106
|
+
|
107
|
+
### todo/check: check what parts "Aug 15" return ???
|
108
|
+
### short form for "Aug 15 -" - works?
|
109
|
+
|
110
|
+
## todo/fix!!! - check EventInfo.include?
|
111
|
+
## now allow dates with only start_date too!! (WITHOUT end_date)
|
112
|
+
parts = dates_col.split( /[ ]*[–-][ ]*/ )
|
113
|
+
if parts.size == 1
|
114
|
+
pp parts
|
115
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
116
|
+
pp dates
|
117
|
+
elsif parts.size == 2
|
118
|
+
pp parts
|
119
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
120
|
+
dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
|
121
|
+
pp dates
|
122
|
+
|
123
|
+
## assert/check if period is less than 365 days for now
|
124
|
+
diff = dates[1].to_date.jd - dates[0].to_date.jd
|
125
|
+
puts "#{diff}d"
|
126
|
+
if diff > 365
|
127
|
+
puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
|
128
|
+
exit 1
|
129
|
+
end
|
130
|
+
else
|
131
|
+
puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
|
132
|
+
pp dates_col
|
133
|
+
pp parts
|
134
|
+
exit 1
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
teams_col = row['Clubs'] || row['Teams']
|
140
|
+
goals_col = row['Goals']
|
141
|
+
|
142
|
+
## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
|
143
|
+
teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
|
144
|
+
goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
|
145
|
+
|
146
|
+
teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
|
147
|
+
goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
|
148
|
+
|
149
|
+
matches_col = row['Matches']
|
150
|
+
## note: support additions in matches (played) e.g.
|
151
|
+
# 132 + 63 Play-off-Spiele
|
152
|
+
matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
|
153
|
+
|
154
|
+
matches = if matches_col.nil? || matches_col.empty?
|
155
|
+
nil
|
156
|
+
else
|
157
|
+
if matches_col.index( '+' ) ### check for calculations
|
158
|
+
## note: for now only supports additions
|
159
|
+
matches_col.split( '+' ).reduce( 0 ) do |sum,str|
|
160
|
+
sum + str.to_i
|
161
|
+
end
|
162
|
+
else ## assume single (integer) number
|
163
|
+
matches_col.to_i
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
rec = EventInfo.new( league: league,
|
168
|
+
season: season,
|
169
|
+
start_date: dates[0],
|
170
|
+
end_date: dates[1],
|
171
|
+
teams: teams,
|
172
|
+
matches: matches,
|
173
|
+
goals: goals
|
174
|
+
)
|
175
|
+
recs << rec
|
176
|
+
end # each row
|
177
|
+
recs
|
178
|
+
end # method parse
|
179
|
+
end # class EventInfoReader
|
180
|
+
|
181
|
+
|
182
|
+
end ## module Import
|
183
|
+
end ## module SportDb
|
184
|
+
|
@@ -1,4 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module SportDb
|
4
3
|
|
@@ -15,6 +14,15 @@ class GoalsPlayerStruct
|
|
15
14
|
def initialize
|
16
15
|
@minutes = []
|
17
16
|
end
|
17
|
+
|
18
|
+
def pretty_print( printer )
|
19
|
+
buf = String.new
|
20
|
+
buf << "<GoalsPlayerStruct: #{@name} "
|
21
|
+
buf << @minutes.pretty_print_inspect
|
22
|
+
buf << ">"
|
23
|
+
|
24
|
+
printer.text( buf )
|
25
|
+
end
|
18
26
|
end
|
19
27
|
|
20
28
|
|
@@ -27,6 +35,18 @@ class GoalsMinuteStruct
|
|
27
35
|
@penalty = false
|
28
36
|
@owngoal = false
|
29
37
|
end
|
38
|
+
|
39
|
+
def pretty_print( printer )
|
40
|
+
buf = String.new
|
41
|
+
buf << "<GoalsMinuteStruct: #{@minute}"
|
42
|
+
buf << "+#{@offset}" if @offset && @offset > 0
|
43
|
+
buf << "'"
|
44
|
+
buf << " (o.g.)" if @owngoal
|
45
|
+
buf << " (pen.)" if @penalty
|
46
|
+
buf << ">"
|
47
|
+
|
48
|
+
printer.text( buf )
|
49
|
+
end
|
30
50
|
end
|
31
51
|
|
32
52
|
|
@@ -59,6 +79,22 @@ class GoalStruct
|
|
59
79
|
def state
|
60
80
|
[@name, @team, @minute, @offset, @penalty, @owngoal, @score1, @score2]
|
61
81
|
end
|
82
|
+
|
83
|
+
|
84
|
+
def pretty_print( printer )
|
85
|
+
buf = String.new
|
86
|
+
buf << "<GoalStruct: #{@score1}-#{@score2} #{@name} #{@minute}"
|
87
|
+
buf << "+#{@offset}" if @offset && @offset > 0
|
88
|
+
buf << "'"
|
89
|
+
buf << " (o.g.)" if @owngoal
|
90
|
+
buf << " (pen.)" if @penalty
|
91
|
+
buf << " for #{@team}" ### team 1 or 2 - use home/away
|
92
|
+
buf << ">"
|
93
|
+
|
94
|
+
printer.text( buf )
|
95
|
+
end
|
96
|
+
|
97
|
+
|
62
98
|
end
|
63
99
|
|
64
100
|
|
@@ -173,6 +209,10 @@ class GoalsParser
|
|
173
209
|
include LogUtils::Logging
|
174
210
|
|
175
211
|
|
212
|
+
### todo/fix:
|
213
|
+
## let's use stringscanner for parsing line - why? why not?
|
214
|
+
|
215
|
+
|
176
216
|
# note: use ^ for start of string only!!!
|
177
217
|
# - for now slurp everything up to digits (inlc. spaces - use strip to remove)
|
178
218
|
# todo/check: use/rename to NAME_UNTIL_REGEX ??? ( add lookahead for spaces?)
|
@@ -184,18 +224,21 @@ class GoalsParser
|
|
184
224
|
# todo/check: change to MINUTE_REGEX ??
|
185
225
|
# add MINUTE_SKIP_REGEX or MINUTE_SEP_REGEX /^[ ,]+/
|
186
226
|
# todo/fix: split out penalty and owngoal flag in PATTERN constant for reuse
|
227
|
+
# note - offset 90+10 possible!!!!
|
228
|
+
# note - allow p/pen./pen or o.g. or og
|
187
229
|
MINUTES_REGEX = /^ # note: use ^ for start of string only!!!
|
188
230
|
(?<minute>[0-9]{1,3})
|
189
231
|
(?:\+
|
190
|
-
(?<offset>[
|
232
|
+
(?<offset>[0-9]{1,2})
|
191
233
|
)?
|
192
234
|
'
|
193
235
|
(?:[ ]*
|
194
236
|
\(
|
195
|
-
(?<type>
|
237
|
+
(?<type>p|pen\.?|
|
238
|
+
og|o\.g\.)
|
196
239
|
\)
|
197
240
|
)?
|
198
|
-
/
|
241
|
+
/ix
|
199
242
|
|
200
243
|
|
201
244
|
|
@@ -220,22 +263,21 @@ class GoalsParser
|
|
220
263
|
player = GoalsPlayerStruct.new
|
221
264
|
player.name = name
|
222
265
|
|
223
|
-
minute_hash =
|
224
|
-
while minute_hash
|
266
|
+
minute_hash = nil
|
267
|
+
while minute_hash=get_minute_hash!( line ) ## note: returns nil if no (regex) match
|
225
268
|
logger.debug " found minutes >#{minute_hash.inspect}< - remaining >#{line}<"
|
226
269
|
|
227
270
|
minute = GoalsMinuteStruct.new
|
228
271
|
minute.minute = minute_hash[:minute].to_i
|
229
272
|
minute.offset = minute_hash[:offset].to_i if minute_hash[:offset]
|
230
273
|
if minute_hash[:type]
|
231
|
-
minute.owngoal = true if minute_hash[:type] =~ /o\.g\./
|
232
|
-
minute.penalty = true if minute_hash[:type] =~ /
|
274
|
+
minute.owngoal = true if minute_hash[:type] =~ /og|o\.g\./i
|
275
|
+
minute.penalty = true if minute_hash[:type] =~ /p|pen\.?/i
|
233
276
|
end
|
234
277
|
player.minutes << minute
|
235
278
|
|
236
279
|
# remove commas and spaces (note: use ^ for start of string only!!!)
|
237
280
|
line.sub!( /^[ ,]+/, '' )
|
238
|
-
minute_hash = get_minute_hash!( line )
|
239
281
|
end
|
240
282
|
|
241
283
|
players << player
|
@@ -261,6 +303,8 @@ private
|
|
261
303
|
m = MINUTES_REGEX.match( line ) # note: use ^ for start of string only!!!
|
262
304
|
if m
|
263
305
|
h = {}
|
306
|
+
## todo/fix - hash conversion no longer need in ruby 3+!!
|
307
|
+
## double check - and remove (simplify) !!!!
|
264
308
|
# - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
|
265
309
|
m.names.each { |n| h[n.to_sym] = m[n] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
|
266
310
|
|