sportdb-formats 1.1.6 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,183 +1,184 @@
1
-
2
- module SportDb
3
- module Import
4
-
5
-
6
- class EventInfo
7
- ## "high level" info (summary) about event (like a "wikipedia infobox")
8
- ## use for checking dataset imports; lets you check e.g.
9
- ## - dates within range
10
- ## - number of teams e.g. 20
11
- ## - matches played e.g. 380
12
- ## - goals scored e.g. 937
13
- ## etc.
14
-
15
- attr_reader :league,
16
- :season,
17
- :teams,
18
- :matches,
19
- :goals,
20
- :start_date,
21
- :end_date
22
-
23
- def initialize( league:, season:,
24
- start_date: nil, end_date: nil,
25
- teams: nil,
26
- matches: nil,
27
- goals: nil )
28
-
29
- @league = league
30
- @season = season
31
-
32
- @start_date = start_date
33
- @end_date = end_date
34
-
35
- @teams = teams ## todo/check: rename/use teams_count ??
36
- @matches = matches ## todo/check: rename/use match_count ??
37
- @goals = goals
38
- end
39
-
40
- def include?( date )
41
- ## todo/fix: add options e.g.
42
- ## - add delta/off_by_one or such?
43
- ## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
44
-
45
- ### note: for now allow off by one error (via timezone/local time errors)
46
- ## todo/fix: issue warning if off by one!!!!
47
- if @start_date && @end_date
48
- date >= (@start_date-1) &&
49
- date <= (@end_date+1)
50
- else
51
- if @season.year?
52
- # assume generic rule
53
- ## same year e.g. Jan 1 - Dec 31; always true for now
54
- date.year == @season.start_year
55
- else
56
- # assume generic rule
57
- ## July 1 - June 30 (Y+1)
58
- ## - todo/check -start for some countries/leagues in June 1 or August 1 ????
59
- date >= Date.new( @season.start_year, 7, 1 ) &&
60
- date <= Date.new( @season.end_year, 6, 30 )
61
- end
62
- end
63
- end # method include?
64
- alias_method :between?, :include?
65
- end # class EventInfo
66
-
67
-
68
- class EventInfoReader
69
- def catalog() Import.catalog; end
70
-
71
-
72
- def self.read( path )
73
- txt = File.open( path, 'r:utf-8') {|f| f.read }
74
- new( txt ).parse
75
- end
76
-
77
- def self.parse( txt )
78
- new( txt ).parse
79
- end
80
-
81
- def initialize( txt )
82
- @txt = txt
83
- end
84
-
85
- def parse
86
- recs = []
87
-
88
- parse_csv( @txt ).each do |row|
89
- league_col = row['League']
90
- season_col = row['Season'] || row['Year']
91
- dates_col = row['Dates']
92
-
93
- season = Import::Season.parse( season_col )
94
- league = catalog.leagues.find!( league_col )
95
-
96
-
97
- dates = []
98
- if dates_col.nil? || dates_col.empty?
99
- ## do nothing; no dates - keep dates array empty
100
- else
101
- ## squish spaces
102
- dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
103
-
104
- puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
105
-
106
- ### todo/check: check what parts "Aug 15" return ???
107
- ### short form for "Aug 15 -" - works?
108
-
109
- ## todo/fix!!! - check EventInfo.include?
110
- ## now allow dates with only start_date too!! (WITHOUT end_date)
111
- parts = dates_col.split( /[ ]*[–-][ ]*/ )
112
- if parts.size == 1
113
- pp parts
114
- dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
115
- pp dates
116
- elsif parts.size == 2
117
- pp parts
118
- dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
119
- dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
120
- pp dates
121
-
122
- ## assert/check if period is less than 365 days for now
123
- diff = dates[1].to_date.jd - dates[0].to_date.jd
124
- puts "#{diff}d"
125
- if diff > 365
126
- puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
127
- exit 1
128
- end
129
- else
130
- puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
131
- pp dates_col
132
- pp parts
133
- exit 1
134
- end
135
- end
136
-
137
-
138
- teams_col = row['Clubs'] || row['Teams']
139
- goals_col = row['Goals']
140
-
141
- ## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
142
- teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
143
- goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
144
-
145
- teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
146
- goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
147
-
148
- matches_col = row['Matches']
149
- ## note: support additions in matches (played) e.g.
150
- # 132 + 63 Play-off-Spiele
151
- matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
152
-
153
- matches = if matches_col.nil? || matches_col.empty?
154
- nil
155
- else
156
- if matches_col.index( '+' ) ### check for calculations
157
- ## note: for now only supports additions
158
- matches_col.split( '+' ).reduce( 0 ) do |sum,str|
159
- sum + str.to_i
160
- end
161
- else ## assume single (integer) number
162
- matches_col.to_i
163
- end
164
- end
165
-
166
- rec = EventInfo.new( league: league,
167
- season: season,
168
- start_date: dates[0],
169
- end_date: dates[1],
170
- teams: teams,
171
- matches: matches,
172
- goals: goals
173
- )
174
- recs << rec
175
- end # each row
176
- recs
177
- end # method parse
178
- end # class EventInfoReader
179
-
180
-
181
- end ## module Import
182
- end ## module SportDb
183
-
1
+
2
+ module SportDb
3
+ module Import
4
+
5
+
6
+ class EventInfo
7
+ ## "high level" info (summary) about event
8
+ ## (like a "wikipedia infobox")
9
+ ## use for checking dataset imports; lets you check e.g.
10
+ ## - dates within range
11
+ ## - number of teams e.g. 20
12
+ ## - matches played e.g. 380
13
+ ## - goals scored e.g. 937
14
+ ## etc.
15
+
16
+ attr_reader :league,
17
+ :season,
18
+ :teams,
19
+ :matches,
20
+ :goals,
21
+ :start_date,
22
+ :end_date
23
+
24
+ def initialize( league:, season:,
25
+ start_date: nil, end_date: nil,
26
+ teams: nil,
27
+ matches: nil,
28
+ goals: nil )
29
+
30
+ @league = league
31
+ @season = season
32
+
33
+ @start_date = start_date
34
+ @end_date = end_date
35
+
36
+ @teams = teams ## todo/check: rename/use teams_count ??
37
+ @matches = matches ## todo/check: rename/use match_count ??
38
+ @goals = goals
39
+ end
40
+
41
+ def include?( date )
42
+ ## todo/fix: add options e.g.
43
+ ## - add delta/off_by_one or such?
44
+ ## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
45
+
46
+ ### note: for now allow off by one error (via timezone/local time errors)
47
+ ## todo/fix: issue warning if off by one!!!!
48
+ if @start_date && @end_date
49
+ date >= (@start_date-1) &&
50
+ date <= (@end_date+1)
51
+ else
52
+ if @season.year?
53
+ # assume generic rule
54
+ ## same year e.g. Jan 1 - Dec 31; always true for now
55
+ date.year == @season.start_year
56
+ else
57
+ # assume generic rule
58
+ ## July 1 - June 30 (Y+1)
59
+ ## - todo/check -start for some countries/leagues in June 1 or August 1 ????
60
+ date >= Date.new( @season.start_year, 7, 1 ) &&
61
+ date <= Date.new( @season.end_year, 6, 30 )
62
+ end
63
+ end
64
+ end # method include?
65
+ alias_method :between?, :include?
66
+ end # class EventInfo
67
+
68
+
69
+ class EventInfoReader
70
+ def catalog() Import.catalog; end
71
+
72
+
73
+ def self.read( path )
74
+ txt = File.open( path, 'r:utf-8') {|f| f.read }
75
+ new( txt ).parse
76
+ end
77
+
78
+ def self.parse( txt )
79
+ new( txt ).parse
80
+ end
81
+
82
+ def initialize( txt )
83
+ @txt = txt
84
+ end
85
+
86
+ def parse
87
+ recs = []
88
+
89
+ parse_csv( @txt ).each do |row|
90
+ league_col = row['League']
91
+ season_col = row['Season'] || row['Year']
92
+ dates_col = row['Dates']
93
+
94
+ season = Season.parse( season_col )
95
+ league = catalog.leagues.find!( league_col )
96
+
97
+
98
+ dates = []
99
+ if dates_col.nil? || dates_col.empty?
100
+ ## do nothing; no dates - keep dates array empty
101
+ else
102
+ ## squish spaces
103
+ dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
104
+
105
+ puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
106
+
107
+ ### todo/check: check what parts "Aug 15" return ???
108
+ ### short form for "Aug 15 -" - works?
109
+
110
+ ## todo/fix!!! - check EventInfo.include?
111
+ ## now allow dates with only start_date too!! (WITHOUT end_date)
112
+ parts = dates_col.split( /[ ]*[–-][ ]*/ )
113
+ if parts.size == 1
114
+ pp parts
115
+ dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
116
+ pp dates
117
+ elsif parts.size == 2
118
+ pp parts
119
+ dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
120
+ dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
121
+ pp dates
122
+
123
+ ## assert/check if period is less than 365 days for now
124
+ diff = dates[1].to_date.jd - dates[0].to_date.jd
125
+ puts "#{diff}d"
126
+ if diff > 365
127
+ puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
128
+ exit 1
129
+ end
130
+ else
131
+ puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
132
+ pp dates_col
133
+ pp parts
134
+ exit 1
135
+ end
136
+ end
137
+
138
+
139
+ teams_col = row['Clubs'] || row['Teams']
140
+ goals_col = row['Goals']
141
+
142
+ ## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
143
+ teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
144
+ goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
145
+
146
+ teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
147
+ goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
148
+
149
+ matches_col = row['Matches']
150
+ ## note: support additions in matches (played) e.g.
151
+ # 132 + 63 Play-off-Spiele
152
+ matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
153
+
154
+ matches = if matches_col.nil? || matches_col.empty?
155
+ nil
156
+ else
157
+ if matches_col.index( '+' ) ### check for calculations
158
+ ## note: for now only supports additions
159
+ matches_col.split( '+' ).reduce( 0 ) do |sum,str|
160
+ sum + str.to_i
161
+ end
162
+ else ## assume single (integer) number
163
+ matches_col.to_i
164
+ end
165
+ end
166
+
167
+ rec = EventInfo.new( league: league,
168
+ season: season,
169
+ start_date: dates[0],
170
+ end_date: dates[1],
171
+ teams: teams,
172
+ matches: matches,
173
+ goals: goals
174
+ )
175
+ recs << rec
176
+ end # each row
177
+ recs
178
+ end # method parse
179
+ end # class EventInfoReader
180
+
181
+
182
+ end ## module Import
183
+ end ## module SportDb
184
+
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  module SportDb
4
3
 
@@ -15,6 +14,15 @@ class GoalsPlayerStruct
15
14
  def initialize
16
15
  @minutes = []
17
16
  end
17
+
18
+ def pretty_print( printer )
19
+ buf = String.new
20
+ buf << "<GoalsPlayerStruct: #{@name} "
21
+ buf << @minutes.pretty_print_inspect
22
+ buf << ">"
23
+
24
+ printer.text( buf )
25
+ end
18
26
  end
19
27
 
20
28
 
@@ -27,6 +35,18 @@ class GoalsMinuteStruct
27
35
  @penalty = false
28
36
  @owngoal = false
29
37
  end
38
+
39
+ def pretty_print( printer )
40
+ buf = String.new
41
+ buf << "<GoalsMinuteStruct: #{@minute}"
42
+ buf << "+#{@offset}" if @offset && @offset > 0
43
+ buf << "'"
44
+ buf << " (o.g.)" if @owngoal
45
+ buf << " (pen.)" if @penalty
46
+ buf << ">"
47
+
48
+ printer.text( buf )
49
+ end
30
50
  end
31
51
 
32
52
 
@@ -59,6 +79,22 @@ class GoalStruct
59
79
  def state
60
80
  [@name, @team, @minute, @offset, @penalty, @owngoal, @score1, @score2]
61
81
  end
82
+
83
+
84
+ def pretty_print( printer )
85
+ buf = String.new
86
+ buf << "<GoalStruct: #{@score1}-#{@score2} #{@name} #{@minute}"
87
+ buf << "+#{@offset}" if @offset && @offset > 0
88
+ buf << "'"
89
+ buf << " (o.g.)" if @owngoal
90
+ buf << " (pen.)" if @penalty
91
+ buf << " for #{@team}" ### team 1 or 2 - use home/away
92
+ buf << ">"
93
+
94
+ printer.text( buf )
95
+ end
96
+
97
+
62
98
  end
63
99
 
64
100
 
@@ -173,6 +209,10 @@ class GoalsParser
173
209
  include LogUtils::Logging
174
210
 
175
211
 
212
+ ### todo/fix:
213
+ ## let's use stringscanner for parsing line - why? why not?
214
+
215
+
176
216
  # note: use ^ for start of string only!!!
177
217
  # - for now slurp everything up to digits (inlc. spaces - use strip to remove)
178
218
  # todo/check: use/rename to NAME_UNTIL_REGEX ??? ( add lookahead for spaces?)
@@ -184,18 +224,21 @@ class GoalsParser
184
224
  # todo/check: change to MINUTE_REGEX ??
185
225
  # add MINUTE_SKIP_REGEX or MINUTE_SEP_REGEX /^[ ,]+/
186
226
  # todo/fix: split out penalty and owngoal flag in PATTERN constant for reuse
227
+ # note - offset 90+10 possible!!!!
228
+ # note - allow p/pen./pen or o.g. or og
187
229
  MINUTES_REGEX = /^ # note: use ^ for start of string only!!!
188
230
  (?<minute>[0-9]{1,3})
189
231
  (?:\+
190
- (?<offset>[1-9]{1})
232
+ (?<offset>[0-9]{1,2})
191
233
  )?
192
234
  '
193
235
  (?:[ ]*
194
236
  \(
195
- (?<type>P|pen\.|o\.g\.)
237
+ (?<type>p|pen\.?|
238
+ og|o\.g\.)
196
239
  \)
197
240
  )?
198
- /x
241
+ /ix
199
242
 
200
243
 
201
244
 
@@ -220,22 +263,21 @@ class GoalsParser
220
263
  player = GoalsPlayerStruct.new
221
264
  player.name = name
222
265
 
223
- minute_hash = get_minute_hash!( line )
224
- while minute_hash
266
+ minute_hash = nil
267
+ while minute_hash=get_minute_hash!( line ) ## note: returns nil if no (regex) match
225
268
  logger.debug " found minutes >#{minute_hash.inspect}< - remaining >#{line}<"
226
269
 
227
270
  minute = GoalsMinuteStruct.new
228
271
  minute.minute = minute_hash[:minute].to_i
229
272
  minute.offset = minute_hash[:offset].to_i if minute_hash[:offset]
230
273
  if minute_hash[:type]
231
- minute.owngoal = true if minute_hash[:type] =~ /o\.g\./
232
- minute.penalty = true if minute_hash[:type] =~ /P|pen\./
274
+ minute.owngoal = true if minute_hash[:type] =~ /og|o\.g\./i
275
+ minute.penalty = true if minute_hash[:type] =~ /p|pen\.?/i
233
276
  end
234
277
  player.minutes << minute
235
278
 
236
279
  # remove commas and spaces (note: use ^ for start of string only!!!)
237
280
  line.sub!( /^[ ,]+/, '' )
238
- minute_hash = get_minute_hash!( line )
239
281
  end
240
282
 
241
283
  players << player
@@ -261,6 +303,8 @@ private
261
303
  m = MINUTES_REGEX.match( line ) # note: use ^ for start of string only!!!
262
304
  if m
263
305
  h = {}
306
+ ## todo/fix - hash conversion no longer need in ruby 3+!!
307
+ ## double check - and remove (simplify) !!!!
264
308
  # - note: do NOT forget to turn name into symbol for lookup in new hash (name.to_sym)
265
309
  m.names.each { |n| h[n.to_sym] = m[n] } # or use match_data.names.zip( match_data.captures ) - more cryptic but "elegant"??
266
310