sportdb-writers 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+
2
+ module SportDb
3
+ module Module
4
+ module Writers
5
+
6
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
+ MINOR = 0
8
+ PATCH = 1
9
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
10
+
11
+ def self.version
12
+ VERSION
13
+ end
14
+
15
+ def self.banner
16
+ "sportdb-writers/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
17
+ end
18
+
19
+ def self.root
20
+ File.expand_path( File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) )
21
+ end
22
+
23
+ end # module Writers
24
+ end # module Module
25
+ end # module SportDb
@@ -0,0 +1,367 @@
1
+
2
+ module Writer
3
+
4
+
5
+ SOURCES = {
6
+ 'one' => { path: '../../stage/one' },
7
+ 'one/o' => { path: '../apis/o' }, ## "o" debug version
8
+
9
+ 'two' => { path: '../../stage/two' },
10
+ 'two/o' => { path: '../cache.weltfussball/o' }, ## "o" debug version
11
+ 'two/tmp' => { path: '../cache.weltfussball/tmp' }, ## "tmp" debug version
12
+
13
+ 'leagues' => { path: '../../../footballcsv/cache.leagues' },
14
+ 'leagues/o' => { path: '../cache.leagues/o' }, ## "o" debug version
15
+
16
+ 'soccerdata' => { path: '../../../footballcsv/cache.soccerdata',
17
+ format: 'century', # e.g. 1800s/1888-89
18
+ }
19
+ }
20
+
21
+
22
+
23
+ def self.merge_goals( matches, goals )
24
+ goals_by_match = goals.group_by { |rec| rec.match_id }
25
+ puts "match goal reports - #{goals_by_match.size} records"
26
+
27
+ ## lets group by date for easier lookup
28
+ matches_by_date = matches.group_by { |rec| rec.date }
29
+
30
+
31
+ ## note: "shadow / reuse" matches and goals vars for now in loop
32
+ ## find better names to avoid confusion!!
33
+ goals_by_match.each_with_index do |(match_id, goals),i|
34
+ ## split match_id
35
+ team_str, more_str = match_id.split( '|' )
36
+ team1_str, team2_str = team_str.split( ' - ' )
37
+
38
+ more_str = more_str.strip
39
+ team1_str = team1_str.strip
40
+ team2_str = team2_str.strip
41
+
42
+ ## for now assume date in more (and not round or something else)
43
+ date_str = more_str # e.g. in 2019-07-26 format
44
+
45
+ puts ">#{team1_str}< - >#{team2_str}< | #{date_str}, #{goals.size} goals"
46
+
47
+ ## try a join - find matching match
48
+ matches = matches_by_date[ date_str ]
49
+ if matches.nil?
50
+ puts "!! ERROR: no match found for date >#{date_str}<"
51
+ exit 1
52
+ end
53
+
54
+ found_matches = matches.select {|match| match.team1 == team1_str &&
55
+ match.team2 == team2_str }
56
+
57
+ if found_matches.size == 1
58
+ match = found_matches[0]
59
+ match.goals = SportDb::Import::Goal.build( goals )
60
+ else
61
+ puts "!!! ERROR: found #{found_matches.size} in #{matches.size} matches for date >#{date_str}<:"
62
+ matches.each do |match|
63
+ puts " >#{match.team1}< - >#{match.team2}<"
64
+ end
65
+ exit 1
66
+ end
67
+ end
68
+ end
69
+
70
+
71
+
72
+
73
+ ########
74
+ # helpers
75
+ # normalize team names
76
+ #
77
+ # todo/fix: for reuse move to sportdb-catalogs
78
+ # use normalize - add to module/class ??
79
+ ##
80
+ ## todo/fix: check league - if is national_team or clubs or intl etc.!!!!
81
+
82
+
83
+ def self.normalize( matches, league:, season: nil )
84
+ league = SportDb::Import.catalog.leagues.find!( league )
85
+ country = league.country
86
+
87
+ ## todo/fix: cache name lookups - why? why not?
88
+ matches.each do |match|
89
+ team1 = SportDb::Import.catalog.clubs.find_by!( name: match.team1,
90
+ country: country )
91
+ team2 = SportDb::Import.catalog.clubs.find_by!( name: match.team2,
92
+ country: country )
93
+
94
+ if season
95
+ team1_name = team1.name_by_season( season )
96
+ team2_name = team2.name_by_season( season )
97
+ else
98
+ team1_name = team1.name
99
+ team2_name = team2.name
100
+ end
101
+
102
+ puts "#{match.team1} => #{team1_name}" if match.team1 != team1_name
103
+ puts "#{match.team2} => #{team2_name}" if match.team2 != team2_name
104
+
105
+ match.update( team1: team1_name )
106
+ match.update( team2: team2_name )
107
+ end
108
+ matches
109
+ end
110
+
111
+
112
+
113
+
114
+ def self.split_matches( matches, season: )
115
+ matches_i = []
116
+ matches_ii = []
117
+ matches.each do |match|
118
+ date = Date.strptime( match.date, '%Y-%m-%d' )
119
+ if date.year == season.start_year
120
+ matches_i << match
121
+ elsif date.year == season.end_year
122
+ matches_ii << match
123
+ else
124
+ puts "!! ERROR: match date-out-of-range for season:"
125
+ pp season
126
+ pp date
127
+ pp match
128
+ exit 1
129
+ end
130
+ end
131
+ [matches_i, matches_ii]
132
+ end
133
+
134
+
135
+
136
+ ###
137
+ # todo/check: use Writer.open() or FileWriter.open() or such - why? why not?
138
+ def self.write_buf( path, buf ) ## write buffer helper
139
+ ## for convenience - make sure parent folders/directories exist
140
+ FileUtils.mkdir_p( File.dirname( path )) unless Dir.exist?( File.dirname( path ))
141
+
142
+ File.open( path, 'w:utf-8' ) do |f|
143
+ f.write( buf )
144
+ end
145
+ end
146
+
147
+
148
+
149
+ def self.write( league, season, source:,
150
+ extra: nil,
151
+ split: false,
152
+ normalize: true,
153
+ rounds: true )
154
+ season = Season( season ) ## normalize season
155
+
156
+ league_info = LEAGUES[ league ]
157
+ if league_info.nil?
158
+ puts "!! ERROR - no league found for >#{league}<; sorry"
159
+ exit 1
160
+ end
161
+
162
+ ## check - if source is directory (assume if starting ./ or ../ or /)
163
+ if source.start_with?( './') ||
164
+ source.start_with?( '../') ||
165
+ source.start_with?( '/')
166
+ ## check if directory exists
167
+ unless File.exist?( source )
168
+ puts "!! ERROR: source dir >#{source}< does not exist"
169
+ exit 1
170
+ end
171
+ source_info = { path: source } ## wrap in "plain" source dir in source info
172
+ else
173
+ source_info = SOURCES[ source ]
174
+ if source_info.nil?
175
+ puts "!! ERROR - no source found for >#{source}<; sorry"
176
+ exit 1
177
+ end
178
+ end
179
+
180
+ source_path = source_info[:path]
181
+
182
+ ## format lets you specify directory layout
183
+ ## default = 1888-89
184
+ ## century = 1800s/1888-89
185
+ ## ...
186
+ season_path = season.to_path( (source_info[:format] || 'default').to_sym )
187
+ in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
188
+
189
+
190
+ matches = SportDb::CsvMatchParser.read( in_path )
191
+ puts "matches- #{matches.size} records"
192
+
193
+
194
+ ## check for goals
195
+ in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
196
+ if File.exist?( in_path_goals )
197
+ goals = SportDb::CsvGoalParser.read( in_path_goals )
198
+ puts "goals - #{goals.size} records"
199
+ pp goals[0]
200
+
201
+ puts
202
+ puts "merge goals:"
203
+ merge_goals( matches, goals )
204
+ end
205
+
206
+
207
+ pp matches[0]
208
+
209
+
210
+ matches = normalize( matches, league: league, season: season ) if normalize
211
+
212
+
213
+
214
+ league_name = league_info[ :name ] # e.g. Brasileiro Série A
215
+ basename = league_info[ :basename] #.e.g 1-seriea
216
+
217
+ league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
218
+ basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
219
+
220
+ lang = league_info[ :lang ] || 'en_AU' ## default / fallback to en_AU (always use rounds NOT matchday for now)
221
+ repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
222
+
223
+
224
+ season_path = String.new('') ## note: allow extra path for output!!!! e.g. archive/2000s etc.
225
+ season_path << "#{extra}/" if extra
226
+ season_path << season.path
227
+
228
+
229
+ ## check for stages
230
+ stages = league_info[ :stages ]
231
+ stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
232
+
233
+
234
+ if stages
235
+
236
+ ## split into four stages / two files
237
+ ## - Grunddurchgang
238
+ ## - Finaldurchgang - Meister
239
+ ## - Finaldurchgang - Qualifikation
240
+ ## - Europa League Play-off
241
+
242
+ matches_by_stage = matches.group_by { |match| match.stage }
243
+ pp matches_by_stage.keys
244
+
245
+
246
+ ## stages = prepare_stages( stages )
247
+ pp stages
248
+
249
+
250
+ romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
251
+
252
+ stages.each_with_index do |stage, i|
253
+
254
+ ## assume "extended" style / syntax
255
+ if stage.is_a?( Hash ) && stage.has_key?( :names )
256
+ stage_names = stage[ :names ]
257
+ stage_basename = stage[ :basename ]
258
+ ## add search/replace {basename} - why? why not?
259
+ stage_basename = stage_basename.sub( '{basename}', basename )
260
+ else ## assume simple style (array of strings OR hash mapping of string => string)
261
+ stage_names = stage
262
+ stage_basename = if stages.size == 1
263
+ "#{basename}" ## use basename as is 1:1
264
+ else
265
+ "#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
266
+ end
267
+ end
268
+
269
+ buf = build_stage( matches_by_stage, stages: stage_names,
270
+ name: "#{league_name} #{season.key}",
271
+ lang: lang )
272
+
273
+ ## note: might be empty!!! if no matches skip (do NOT write)
274
+ write_buf( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt", buf ) unless buf.empty?
275
+ end
276
+ else ## no stages - assume "regular" plain vanilla season
277
+
278
+ ## always (auto-) sort for now - why? why not?
279
+ matches = matches.sort do |l,r|
280
+ ## first by date (older first)
281
+ ## next by matchday (lower first)
282
+ res = l.date <=> r.date
283
+ res = l.time <=> r.time if res == 0 && l.time && r.time
284
+ res = l.round <=> r.round if res == 0 && rounds
285
+ res
286
+ end
287
+
288
+ if split
289
+ matches_i, matches_ii = split_matches( matches, season: season )
290
+
291
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
292
+
293
+ SportDb::TxtMatchWriter.write( out_path, matches_i,
294
+ name: "#{league_name} #{season.key}",
295
+ lang: lang,
296
+ rounds: rounds )
297
+
298
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
299
+
300
+ SportDb::TxtMatchWriter.write( out_path, matches_ii,
301
+ name: "#{league_name} #{season.key}",
302
+ lang: lang,
303
+ rounds: rounds )
304
+ else
305
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
306
+
307
+ SportDb::TxtMatchWriter.write( out_path, matches,
308
+ name: "#{league_name} #{season.key}",
309
+ lang: lang,
310
+ rounds: rounds )
311
+ end
312
+ end
313
+ end
314
+
315
+
316
+ =begin
317
+ def prepare_stages( stages )
318
+ if stages.is_a?( Array )
319
+ if stages[0].is_a?( Array ) ## is array of array
320
+ ## convert inner array shortcuts to hash - stage input is same as stage output
321
+ stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
322
+ elsif stages[0].is_a?( Hash ) ## assume array of hashes
323
+ stages ## pass through as is ("canonical") format!!!
324
+ else ## assume array of strings
325
+ ## assume single array shortcut; convert to hash - stage input is same as stage output name
326
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
327
+ [stages] ## return hash wrapped in array
328
+ end
329
+ else ## assume (single) hash
330
+ [stages] ## always return array of hashes
331
+ end
332
+ end
333
+ =end
334
+
335
+
336
+
337
+ def self.build_stage( matches_by_stage, stages:, name:, lang: )
338
+ buf = String.new('')
339
+
340
+ ## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
341
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
342
+
343
+ stages.each_with_index do |(stage_in, stage_out),i|
344
+ matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
345
+
346
+ next if matches.nil? || matches.empty?
347
+
348
+ ## (auto-)sort matches by
349
+ ## 1) date
350
+ matches = matches.sort do |l,r|
351
+ result = l.date <=> r.date
352
+ result
353
+ end
354
+
355
+ buf << "\n\n" if i > 0 && buf.size > 0
356
+
357
+ buf << "= #{name}, #{stage_out}\n"
358
+ buf << SportDb::TxtMatchWriter.build( matches, lang: lang )
359
+
360
+ puts buf
361
+ end
362
+
363
+ buf
364
+ end
365
+
366
+
367
+ end # module Writer
@@ -0,0 +1,19 @@
1
+ ## note: use the local version of sportdb gems
2
+
3
+ # todo/fix: use SPORTDB_DIR or such (for reuse) in boot!!!!!!!!
4
+
5
+ $LOAD_PATH.unshift( File.expand_path( '../../../sportdb/sport.db/sportdb-formats/lib' ))
6
+ $LOAD_PATH.unshift( File.expand_path( '../../../sportdb/sport.db/sportdb-config/lib' ))
7
+
8
+
9
+ ## minitest setup
10
+ require 'minitest/autorun'
11
+
12
+
13
+ ## our own code
14
+ require 'sportdb/writers'
15
+
16
+
17
+ ## use (switch to) "external" datasets
18
+ SportDb::Import.config.clubs_dir = "../../../openfootball/clubs"
19
+ SportDb::Import.config.leagues_dir = "../../../openfootball/leagues"
@@ -0,0 +1,124 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_txt_writer.rb
4
+
5
+
6
+ require 'helper'
7
+
8
+
9
+ class TestTxtWriter < MiniTest::Test
10
+
11
+ TxtMatchWriter = SportDb::TxtMatchWriter
12
+
13
+
14
+ def test_eng
15
+ matches = SportDb::CsvMatchParser.read( '../../stage/one/2019-20/eng.1.csv' )
16
+
17
+ puts
18
+ pp matches[0]
19
+ puts "#{matches.size} matches"
20
+
21
+
22
+ league_name = 'English Premier League'
23
+ season_key = '2019/20'
24
+
25
+ matches = normalize( matches, league: league_name )
26
+
27
+ path = './tmp/pl.txt'
28
+ TxtMatchWriter.write( path, matches,
29
+ title: "#{league_name} #{season_key}",
30
+ round: 'Matchday',
31
+ lang: 'en')
32
+
33
+ end
34
+
35
+ def test_es
36
+ matches = SportDb::CsvMatchParser.read( '../../stage/one/2019-20/es.1.csv' )
37
+
38
+ puts
39
+ pp matches[0]
40
+ puts "#{matches.size} matches"
41
+
42
+
43
+ league_name = 'Primera División de España'
44
+ season_key = '2019/20'
45
+
46
+ matches = normalize( matches, league: league_name )
47
+
48
+ path = './tmp/liga.txt'
49
+ TxtMatchWriter.write( path, matches,
50
+ title: "#{league_name} #{season_key}",
51
+ round: 'Jornada',
52
+ lang: 'es')
53
+
54
+ end
55
+
56
+ def test_it
57
+ matches = SportDb::CsvMatchParser.read( '../../stage/one/2019-20/it.1.csv' )
58
+
59
+ puts
60
+ pp matches[0]
61
+ puts "#{matches.size} matches"
62
+
63
+
64
+ league_name = 'Italian Serie A'
65
+ season_key = '2019/20'
66
+
67
+ matches = normalize( matches, league: league_name )
68
+
69
+ path = './tmp/seriea.txt'
70
+ TxtMatchWriter.write( path, matches,
71
+ title: "#{league_name} #{season_key}",
72
+ round: ->(round) { "%s^ Giornata" % round },
73
+ lang: 'it')
74
+ end
75
+
76
+ #####
77
+ # note: fix sort order e.g. cover
78
+ #
79
+ # 17^ Giornata
80
+ # [Mer. 18.12.]
81
+ # UC Sampdoria 1-2 Juventus
82
+ #
83
+ # 7^ Giornata
84
+ # [Mer. 18.12.]
85
+ # Brescia 0-2 US Sassuolo Calcio
86
+ #
87
+ # 17^ Giornata
88
+ # [Ven. 20.12.]
89
+ # ACF Fiorentina 1-4 AS Roma
90
+
91
+
92
+ ########
93
+ # helper
94
+ # normalize team names
95
+ def normalize( matches, league: )
96
+ matches = matches.sort do |l,r|
97
+ ## first by date (older first)
98
+ ## next by matchday (lowwer first)
99
+ res = l.date <=> r.date
100
+ res = l.round <=> r.round if res == 0
101
+ res
102
+ end
103
+
104
+
105
+ league = SportDb::Import.catalog.leagues.find!( league )
106
+ country = league.country
107
+
108
+ ## todo/fix: cache name lookups - why? why not?
109
+ matches.each do |match|
110
+ team1 = SportDb::Import.catalog.clubs.find_by!( name: match.team1,
111
+ country: country )
112
+ team2 = SportDb::Import.catalog.clubs.find_by!( name: match.team2,
113
+ country: country )
114
+
115
+ puts "#{match.team1} => #{team1.name}" if match.team1 != team1.name
116
+ puts "#{match.team2} => #{team2.name}" if match.team2 != team2.name
117
+
118
+ match.update( team1: team1.name )
119
+ match.update( team2: team2.name )
120
+ end
121
+ matches
122
+ end
123
+
124
+ end # class TestTxtWriter