sportdb-writers 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,25 @@
1
+
2
+ module SportDb
3
+ module Module
4
+ module Writers
5
+
6
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
+ MINOR = 0
8
+ PATCH = 1
9
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
10
+
11
+ def self.version
12
+ VERSION
13
+ end
14
+
15
+ def self.banner
16
+ "sportdb-writers/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
17
+ end
18
+
19
+ def self.root
20
+ File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
21
+ end
22
+
23
+ end # module Writers
24
+ end # module Module
25
+ end # module SportDb
@@ -0,0 +1,367 @@
1
+
2
+ module Writer
3
+
4
+
5
+ SOURCES = {
6
+ 'one' => { path: '../../stage/one' },
7
+ 'one/o' => { path: '../apis/o' }, ## "o" debug version
8
+
9
+ 'two' => { path: '../../stage/two' },
10
+ 'two/o' => { path: '../cache.weltfussball/o' }, ## "o" debug version
11
+ 'two/tmp' => { path: '../cache.weltfussball/tmp' }, ## "tmp" debug version
12
+
13
+ 'leagues' => { path: '../../../footballcsv/cache.leagues' },
14
+ 'leagues/o' => { path: '../cache.leagues/o' }, ## "o" debug version
15
+
16
+ 'soccerdata' => { path: '../../../footballcsv/cache.soccerdata',
17
+ format: 'century', # e.g. 1800s/1888-89
18
+ }
19
+ }
20
+
21
+
22
+
23
+ def self.merge_goals( matches, goals )
24
+ goals_by_match = goals.group_by { |rec| rec.match_id }
25
+ puts "match goal reports - #{goals_by_match.size} records"
26
+
27
+ ## lets group by date for easier lookup
28
+ matches_by_date = matches.group_by { |rec| rec.date }
29
+
30
+
31
+ ## note: "shadow / reuse" matches and goals vars for now in loop
32
+ ## find better names to avoid confusion!!
33
+ goals_by_match.each_with_index do |(match_id, goals),i|
34
+ ## split match_id
35
+ team_str, more_str = match_id.split( '|' )
36
+ team1_str, team2_str = team_str.split( ' - ' )
37
+
38
+ more_str = more_str.strip
39
+ team1_str = team1_str.strip
40
+ team2_str = team2_str.strip
41
+
42
+ ## for now assume date in more (and not round or something else)
43
+ date_str = more_str # e.g. in 2019-07-26 format
44
+
45
+ puts ">#{team1_str}< - >#{team2_str}< | #{date_str}, #{goals.size} goals"
46
+
47
+ ## try a join - find matching match
48
+ matches = matches_by_date[ date_str ]
49
+ if matches.nil?
50
+ puts "!! ERROR: no match found for date >#{date_str}<"
51
+ exit 1
52
+ end
53
+
54
+ found_matches = matches.select {|match| match.team1 == team1_str &&
55
+ match.team2 == team2_str }
56
+
57
+ if found_matches.size == 1
58
+ match = found_matches[0]
59
+ match.goals = SportDb::Import::Goal.build( goals )
60
+ else
61
+ puts "!!! ERROR: found #{found_matches.size} in #{matches.size} matches for date >#{date_str}<:"
62
+ matches.each do |match|
63
+ puts " >#{match.team1}< - >#{match.team2}<"
64
+ end
65
+ exit 1
66
+ end
67
+ end
68
+ end
69
+
70
+
71
+
72
+
73
+ ########
74
+ # helpers
75
+ # normalize team names
76
+ #
77
+ # todo/fix: for reuse move to sportdb-catalogs
78
+ # use normalize - add to module/class ??
79
+ ##
80
+ ## todo/fix: check league - if is national_team or clubs or intl etc.!!!!
81
+
82
+
83
+ def self.normalize( matches, league:, season: nil )
84
+ league = SportDb::Import.catalog.leagues.find!( league )
85
+ country = league.country
86
+
87
+ ## todo/fix: cache name lookups - why? why not?
88
+ matches.each do |match|
89
+ team1 = SportDb::Import.catalog.clubs.find_by!( name: match.team1,
90
+ country: country )
91
+ team2 = SportDb::Import.catalog.clubs.find_by!( name: match.team2,
92
+ country: country )
93
+
94
+ if season
95
+ team1_name = team1.name_by_season( season )
96
+ team2_name = team2.name_by_season( season )
97
+ else
98
+ team1_name = team1.name
99
+ team2_name = team2.name
100
+ end
101
+
102
+ puts "#{match.team1} => #{team1_name}" if match.team1 != team1_name
103
+ puts "#{match.team2} => #{team2_name}" if match.team2 != team2_name
104
+
105
+ match.update( team1: team1_name )
106
+ match.update( team2: team2_name )
107
+ end
108
+ matches
109
+ end
110
+
111
+
112
+
113
+
114
+ def self.split_matches( matches, season: )
115
+ matches_i = []
116
+ matches_ii = []
117
+ matches.each do |match|
118
+ date = Date.strptime( match.date, '%Y-%m-%d' )
119
+ if date.year == season.start_year
120
+ matches_i << match
121
+ elsif date.year == season.end_year
122
+ matches_ii << match
123
+ else
124
+ puts "!! ERROR: match date-out-of-range for season:"
125
+ pp season
126
+ pp date
127
+ pp match
128
+ exit 1
129
+ end
130
+ end
131
+ [matches_i, matches_ii]
132
+ end
133
+
134
+
135
+
136
+ ###
137
+ # todo/check: use Writer.open() or FileWriter.open() or such - why? why not?
138
+ def self.write_buf( path, buf ) ## write buffer helper
139
+ ## for convenience - make sure parent folders/directories exist
140
+ FileUtils.mkdir_p( File.dirname( path )) unless Dir.exist?( File.dirname( path ))
141
+
142
+ File.open( path, 'w:utf-8' ) do |f|
143
+ f.write( buf )
144
+ end
145
+ end
146
+
147
+
148
+
149
+ def self.write( league, season, source:,
150
+ extra: nil,
151
+ split: false,
152
+ normalize: true,
153
+ rounds: true )
154
+ season = Season( season ) ## normalize season
155
+
156
+ league_info = LEAGUES[ league ]
157
+ if league_info.nil?
158
+ puts "!! ERROR - no league found for >#{league}<; sorry"
159
+ exit 1
160
+ end
161
+
162
+ ## check - if source is directory (assume if starting ./ or ../ or /)
163
+ if source.start_with?( './') ||
164
+ source.start_with?( '../') ||
165
+ source.start_with?( '/')
166
+ ## check if directory exists
167
+ unless File.exist?( source )
168
+ puts "!! ERROR: source dir >#{source}< does not exist"
169
+ exit 1
170
+ end
171
+ source_info = { path: source } ## wrap in "plain" source dir in source info
172
+ else
173
+ source_info = SOURCES[ source ]
174
+ if source_info.nil?
175
+ puts "!! ERROR - no source found for >#{source}<; sorry"
176
+ exit 1
177
+ end
178
+ end
179
+
180
+ source_path = source_info[:path]
181
+
182
+ ## format lets you specify directory layout
183
+ ## default = 1888-89
184
+ ## century = 1800s/1888-89
185
+ ## ...
186
+ season_path = season.to_path( (source_info[:format] || 'default').to_sym )
187
+ in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
188
+
189
+
190
+ matches = SportDb::CsvMatchParser.read( in_path )
191
+ puts "matches- #{matches.size} records"
192
+
193
+
194
+ ## check for goals
195
+ in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
196
+ if File.exist?( in_path_goals )
197
+ goals = SportDb::CsvGoalParser.read( in_path_goals )
198
+ puts "goals - #{goals.size} records"
199
+ pp goals[0]
200
+
201
+ puts
202
+ puts "merge goals:"
203
+ merge_goals( matches, goals )
204
+ end
205
+
206
+
207
+ pp matches[0]
208
+
209
+
210
+ matches = normalize( matches, league: league, season: season ) if normalize
211
+
212
+
213
+
214
+ league_name = league_info[ :name ] # e.g. Brasileiro Série A
215
+ basename = league_info[ :basename] #.e.g 1-seriea
216
+
217
+ league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
218
+ basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
219
+
220
+ lang = league_info[ :lang ] || 'en_AU' ## default / fallback to en_AU (always use rounds NOT matchday for now)
221
+ repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
222
+
223
+
224
+ season_path = String.new('') ## note: allow extra path for output!!!! e.g. archive/2000s etc.
225
+ season_path << "#{extra}/" if extra
226
+ season_path << season.path
227
+
228
+
229
+ ## check for stages
230
+ stages = league_info[ :stages ]
231
+ stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
232
+
233
+
234
+ if stages
235
+
236
+ ## split into four stages / two files
237
+ ## - Grunddurchgang
238
+ ## - Finaldurchgang - Meister
239
+ ## - Finaldurchgang - Qualifikation
240
+ ## - Europa League Play-off
241
+
242
+ matches_by_stage = matches.group_by { |match| match.stage }
243
+ pp matches_by_stage.keys
244
+
245
+
246
+ ## stages = prepare_stages( stages )
247
+ pp stages
248
+
249
+
250
+ romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
251
+
252
+ stages.each_with_index do |stage, i|
253
+
254
+ ## assume "extended" style / syntax
255
+ if stage.is_a?( Hash ) && stage.has_key?( :names )
256
+ stage_names = stage[ :names ]
257
+ stage_basename = stage[ :basename ]
258
+ ## add search/replace {basename} - why? why not?
259
+ stage_basename = stage_basename.sub( '{basename}', basename )
260
+ else ## assume simple style (array of strings OR hash mapping of string => string)
261
+ stage_names = stage
262
+ stage_basename = if stages.size == 1
263
+ "#{basename}" ## use basename as is 1:1
264
+ else
265
+ "#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
266
+ end
267
+ end
268
+
269
+ buf = build_stage( matches_by_stage, stages: stage_names,
270
+ name: "#{league_name} #{season.key}",
271
+ lang: lang )
272
+
273
+ ## note: might be empty!!! if no matches skip (do NOT write)
274
+ write_buf( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt", buf ) unless buf.empty?
275
+ end
276
+ else ## no stages - assume "regular" plain vanilla season
277
+
278
+ ## always (auto-) sort for now - why? why not?
279
+ matches = matches.sort do |l,r|
280
+ ## first by date (older first)
281
+ ## next by matchday (lower first)
282
+ res = l.date <=> r.date
283
+ res = l.time <=> r.time if res == 0 && l.time && r.time
284
+ res = l.round <=> r.round if res == 0 && rounds
285
+ res
286
+ end
287
+
288
+ if split
289
+ matches_i, matches_ii = split_matches( matches, season: season )
290
+
291
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
292
+
293
+ SportDb::TxtMatchWriter.write( out_path, matches_i,
294
+ name: "#{league_name} #{season.key}",
295
+ lang: lang,
296
+ rounds: rounds )
297
+
298
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
299
+
300
+ SportDb::TxtMatchWriter.write( out_path, matches_ii,
301
+ name: "#{league_name} #{season.key}",
302
+ lang: lang,
303
+ rounds: rounds )
304
+ else
305
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
306
+
307
+ SportDb::TxtMatchWriter.write( out_path, matches,
308
+ name: "#{league_name} #{season.key}",
309
+ lang: lang,
310
+ rounds: rounds )
311
+ end
312
+ end
313
+ end
314
+
315
+
316
+ =begin
317
+ def prepare_stages( stages )
318
+ if stages.is_a?( Array )
319
+ if stages[0].is_a?( Array ) ## is array of array
320
+ ## convert inner array shortcuts to hash - stage input is same as stage output
321
+ stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
322
+ elsif stages[0].is_a?( Hash ) ## assume array of hashes
323
+ stages ## pass through as is ("canonical") format!!!
324
+ else ## assume array of strings
325
+ ## assume single array shortcut; convert to hash - stage input is same as stage output name
326
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
327
+ [stages] ## return hash wrapped in array
328
+ end
329
+ else ## assume (single) hash
330
+ [stages] ## always return array of hashes
331
+ end
332
+ end
333
+ =end
334
+
335
+
336
+
337
+ def self.build_stage( matches_by_stage, stages:, name:, lang: )
338
+ buf = String.new('')
339
+
340
+ ## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
341
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
342
+
343
+ stages.each_with_index do |(stage_in, stage_out),i|
344
+ matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
345
+
346
+ next if matches.nil? || matches.empty?
347
+
348
+ ## (auto-)sort matches by
349
+ ## 1) date
350
+ matches = matches.sort do |l,r|
351
+ result = l.date <=> r.date
352
+ result
353
+ end
354
+
355
+ buf << "\n\n" if i > 0 && buf.size > 0
356
+
357
+ buf << "= #{name}, #{stage_out}\n"
358
+ buf << SportDb::TxtMatchWriter.build( matches, lang: lang )
359
+
360
+ puts buf
361
+ end
362
+
363
+ buf
364
+ end
365
+
366
+
367
+ end # module Writer
@@ -0,0 +1,19 @@
1
+ ## note: use the local version of sportdb gems
2
+
3
+ # todo/fix: use SPORTDB_DIR or such (for reuse) in boot!!!!!!!!
4
+
5
+ $LOAD_PATH.unshift( File.expand_path( '../../../sportdb/sport.db/sportdb-formats/lib' ))
6
+ $LOAD_PATH.unshift( File.expand_path( '../../../sportdb/sport.db/sportdb-config/lib' ))
7
+
8
+
9
+ ## minitest setup
10
+ require 'minitest/autorun'
11
+
12
+
13
+ ## our own code
14
+ require 'sportdb/writers'
15
+
16
+
17
+ ## use (switch to) "external" datasets
18
+ SportDb::Import.config.clubs_dir = "../../../openfootball/clubs"
19
+ SportDb::Import.config.leagues_dir = "../../../openfootball/leagues"
@@ -0,0 +1,124 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_txt_writer.rb
4
+
5
+
6
+ require 'helper'
7
+
8
+
9
+ class TestTxtWriter < MiniTest::Test
10
+
11
+ TxtMatchWriter = SportDb::TxtMatchWriter
12
+
13
+
14
+ def test_eng
15
+ matches = SportDb::CsvMatchParser.read( '../../stage/one/2019-20/eng.1.csv' )
16
+
17
+ puts
18
+ pp matches[0]
19
+ puts "#{matches.size} matches"
20
+
21
+
22
+ league_name = 'English Premier League'
23
+ season_key = '2019/20'
24
+
25
+ matches = normalize( matches, league: league_name )
26
+
27
+ path = './tmp/pl.txt'
28
+ TxtMatchWriter.write( path, matches,
29
+ title: "#{league_name} #{season_key}",
30
+ round: 'Matchday',
31
+ lang: 'en')
32
+
33
+ end
34
+
35
+ def test_es
36
+ matches = SportDb::CsvMatchParser.read( '../../stage/one/2019-20/es.1.csv' )
37
+
38
+ puts
39
+ pp matches[0]
40
+ puts "#{matches.size} matches"
41
+
42
+
43
+ league_name = 'Primera División de España'
44
+ season_key = '2019/20'
45
+
46
+ matches = normalize( matches, league: league_name )
47
+
48
+ path = './tmp/liga.txt'
49
+ TxtMatchWriter.write( path, matches,
50
+ title: "#{league_name} #{season_key}",
51
+ round: 'Jornada',
52
+ lang: 'es')
53
+
54
+ end
55
+
56
+ def test_it
57
+ matches = SportDb::CsvMatchParser.read( '../../stage/one/2019-20/it.1.csv' )
58
+
59
+ puts
60
+ pp matches[0]
61
+ puts "#{matches.size} matches"
62
+
63
+
64
+ league_name = 'Italian Serie A'
65
+ season_key = '2019/20'
66
+
67
+ matches = normalize( matches, league: league_name )
68
+
69
+ path = './tmp/seriea.txt'
70
+ TxtMatchWriter.write( path, matches,
71
+ title: "#{league_name} #{season_key}",
72
+ round: ->(round) { "%s^ Giornata" % round },
73
+ lang: 'it')
74
+ end
75
+
76
+ #####
77
+ # note: fix sort order e.g. cover
78
+ #
79
+ # 17^ Giornata
80
+ # [Mer. 18.12.]
81
+ # UC Sampdoria 1-2 Juventus
82
+ #
83
+ # 7^ Giornata
84
+ # [Mer. 18.12.]
85
+ # Brescia 0-2 US Sassuolo Calcio
86
+ #
87
+ # 17^ Giornata
88
+ # [Ven. 20.12.]
89
+ # ACF Fiorentina 1-4 AS Roma
90
+
91
+
92
+ ########
93
+ # helper
94
+ # normalize team names
95
+ def normalize( matches, league: )
96
+ matches = matches.sort do |l,r|
97
+ ## first by date (older first)
98
+ ## next by matchday (lowwer first)
99
+ res = l.date <=> r.date
100
+ res = l.round <=> r.round if res == 0
101
+ res
102
+ end
103
+
104
+
105
+ league = SportDb::Import.catalog.leagues.find!( league )
106
+ country = league.country
107
+
108
+ ## todo/fix: cache name lookups - why? why not?
109
+ matches.each do |match|
110
+ team1 = SportDb::Import.catalog.clubs.find_by!( name: match.team1,
111
+ country: country )
112
+ team2 = SportDb::Import.catalog.clubs.find_by!( name: match.team2,
113
+ country: country )
114
+
115
+ puts "#{match.team1} => #{team1.name}" if match.team1 != team1.name
116
+ puts "#{match.team2} => #{team2.name}" if match.team2 != team2.name
117
+
118
+ match.update( team1: team1.name )
119
+ match.update( team2: team2.name )
120
+ end
121
+ matches
122
+ end
123
+
124
+ end # class TestTxtWriter