sportdb-writers 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,367 +1,269 @@
1
-
2
- module Writer
3
-
4
-
5
- SOURCES = {
6
- 'one' => { path: '../../stage/one' },
7
- 'one/o' => { path: '../apis/o' }, ## "o" debug version
8
-
9
- 'two' => { path: '../../stage/two' },
10
- 'two/o' => { path: '../cache.weltfussball/o' }, ## "o" debug version
11
- 'two/tmp' => { path: '../cache.weltfussball/tmp' }, ## "tmp" debug version
12
-
13
- 'leagues' => { path: '../../../footballcsv/cache.leagues' },
14
- 'leagues/o' => { path: '../cache.leagues/o' }, ## "o" debug version
15
-
16
- 'soccerdata' => { path: '../../../footballcsv/cache.soccerdata',
17
- format: 'century', # e.g. 1800s/1888-89
18
- }
19
- }
20
-
21
-
22
-
23
- def self.merge_goals( matches, goals )
24
- goals_by_match = goals.group_by { |rec| rec.match_id }
25
- puts "match goal reports - #{goals_by_match.size} records"
26
-
27
- ## lets group by date for easier lookup
28
- matches_by_date = matches.group_by { |rec| rec.date }
29
-
30
-
31
- ## note: "shadow / reuse" matches and goals vars for now in loop
32
- ## find better names to avoid confusion!!
33
- goals_by_match.each_with_index do |(match_id, goals),i|
34
- ## split match_id
35
- team_str, more_str = match_id.split( '|' )
36
- team1_str, team2_str = team_str.split( ' - ' )
37
-
38
- more_str = more_str.strip
39
- team1_str = team1_str.strip
40
- team2_str = team2_str.strip
41
-
42
- ## for now assume date in more (and not round or something else)
43
- date_str = more_str # e.g. in 2019-07-26 format
44
-
45
- puts ">#{team1_str}< - >#{team2_str}< | #{date_str}, #{goals.size} goals"
46
-
47
- ## try a join - find matching match
48
- matches = matches_by_date[ date_str ]
49
- if matches.nil?
50
- puts "!! ERROR: no match found for date >#{date_str}<"
51
- exit 1
52
- end
53
-
54
- found_matches = matches.select {|match| match.team1 == team1_str &&
55
- match.team2 == team2_str }
56
-
57
- if found_matches.size == 1
58
- match = found_matches[0]
59
- match.goals = SportDb::Import::Goal.build( goals )
60
- else
61
- puts "!!! ERROR: found #{found_matches.size} in #{matches.size} matches for date >#{date_str}<:"
62
- matches.each do |match|
63
- puts " >#{match.team1}< - >#{match.team2}<"
64
- end
65
- exit 1
66
- end
67
- end
68
- end
69
-
70
-
71
-
72
-
73
- ########
74
- # helpers
75
- # normalize team names
76
- #
77
- # todo/fix: for reuse move to sportdb-catalogs
78
- # use normalize - add to module/class ??
79
- ##
80
- ## todo/fix: check league - if is national_team or clubs or intl etc.!!!!
81
-
82
-
83
- def self.normalize( matches, league:, season: nil )
84
- league = SportDb::Import.catalog.leagues.find!( league )
85
- country = league.country
86
-
87
- ## todo/fix: cache name lookups - why? why not?
88
- matches.each do |match|
89
- team1 = SportDb::Import.catalog.clubs.find_by!( name: match.team1,
90
- country: country )
91
- team2 = SportDb::Import.catalog.clubs.find_by!( name: match.team2,
92
- country: country )
93
-
94
- if season
95
- team1_name = team1.name_by_season( season )
96
- team2_name = team2.name_by_season( season )
97
- else
98
- team1_name = team1.name
99
- team2_name = team2.name
100
- end
101
-
102
- puts "#{match.team1} => #{team1_name}" if match.team1 != team1_name
103
- puts "#{match.team2} => #{team2_name}" if match.team2 != team2_name
104
-
105
- match.update( team1: team1_name )
106
- match.update( team2: team2_name )
107
- end
108
- matches
109
- end
110
-
111
-
112
-
113
-
114
- def self.split_matches( matches, season: )
115
- matches_i = []
116
- matches_ii = []
117
- matches.each do |match|
118
- date = Date.strptime( match.date, '%Y-%m-%d' )
119
- if date.year == season.start_year
120
- matches_i << match
121
- elsif date.year == season.end_year
122
- matches_ii << match
123
- else
124
- puts "!! ERROR: match date-out-of-range for season:"
125
- pp season
126
- pp date
127
- pp match
128
- exit 1
129
- end
130
- end
131
- [matches_i, matches_ii]
132
- end
133
-
134
-
135
-
136
- ###
137
- # todo/check: use Writer.open() or FileWriter.open() or such - why? why not?
138
- def self.write_buf( path, buf ) ## write buffer helper
139
- ## for convenience - make sure parent folders/directories exist
140
- FileUtils.mkdir_p( File.dirname( path )) unless Dir.exist?( File.dirname( path ))
141
-
142
- File.open( path, 'w:utf-8' ) do |f|
143
- f.write( buf )
144
- end
145
- end
146
-
147
-
148
-
149
- def self.write( league, season, source:,
150
- extra: nil,
151
- split: false,
152
- normalize: true,
153
- rounds: true )
154
- season = Season( season ) ## normalize season
155
-
156
- league_info = LEAGUES[ league ]
157
- if league_info.nil?
158
- puts "!! ERROR - no league found for >#{league}<; sorry"
159
- exit 1
160
- end
161
-
162
- ## check - if source is directory (assume if starting ./ or ../ or /)
163
- if source.start_with?( './') ||
164
- source.start_with?( '../') ||
165
- source.start_with?( '/')
166
- ## check if directory exists
167
- unless File.exist?( source )
168
- puts "!! ERROR: source dir >#{source}< does not exist"
169
- exit 1
170
- end
171
- source_info = { path: source } ## wrap in "plain" source dir in source info
172
- else
173
- source_info = SOURCES[ source ]
174
- if source_info.nil?
175
- puts "!! ERROR - no source found for >#{source}<; sorry"
176
- exit 1
177
- end
178
- end
179
-
180
- source_path = source_info[:path]
181
-
182
- ## format lets you specify directory layout
183
- ## default = 1888-89
184
- ## century = 1800s/1888-89
185
- ## ...
186
- season_path = season.to_path( (source_info[:format] || 'default').to_sym )
187
- in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
188
-
189
-
190
- matches = SportDb::CsvMatchParser.read( in_path )
191
- puts "matches- #{matches.size} records"
192
-
193
-
194
- ## check for goals
195
- in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
196
- if File.exist?( in_path_goals )
197
- goals = SportDb::CsvGoalParser.read( in_path_goals )
198
- puts "goals - #{goals.size} records"
199
- pp goals[0]
200
-
201
- puts
202
- puts "merge goals:"
203
- merge_goals( matches, goals )
204
- end
205
-
206
-
207
- pp matches[0]
208
-
209
-
210
- matches = normalize( matches, league: league, season: season ) if normalize
211
-
212
-
213
-
214
- league_name = league_info[ :name ] # e.g. Brasileiro Série A
215
- basename = league_info[ :basename] #.e.g 1-seriea
216
-
217
- league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
218
- basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
219
-
220
- lang = league_info[ :lang ] || 'en_AU' ## default / fallback to en_AU (always use rounds NOT matchday for now)
221
- repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
222
-
223
-
224
- season_path = String.new('') ## note: allow extra path for output!!!! e.g. archive/2000s etc.
225
- season_path << "#{extra}/" if extra
226
- season_path << season.path
227
-
228
-
229
- ## check for stages
230
- stages = league_info[ :stages ]
231
- stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
232
-
233
-
234
- if stages
235
-
236
- ## split into four stages / two files
237
- ## - Grunddurchgang
238
- ## - Finaldurchgang - Meister
239
- ## - Finaldurchgang - Qualifikation
240
- ## - Europa League Play-off
241
-
242
- matches_by_stage = matches.group_by { |match| match.stage }
243
- pp matches_by_stage.keys
244
-
245
-
246
- ## stages = prepare_stages( stages )
247
- pp stages
248
-
249
-
250
- romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
251
-
252
- stages.each_with_index do |stage, i|
253
-
254
- ## assume "extended" style / syntax
255
- if stage.is_a?( Hash ) && stage.has_key?( :names )
256
- stage_names = stage[ :names ]
257
- stage_basename = stage[ :basename ]
258
- ## add search/replace {basename} - why? why not?
259
- stage_basename = stage_basename.sub( '{basename}', basename )
260
- else ## assume simple style (array of strings OR hash mapping of string => string)
261
- stage_names = stage
262
- stage_basename = if stages.size == 1
263
- "#{basename}" ## use basename as is 1:1
264
- else
265
- "#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
266
- end
267
- end
268
-
269
- buf = build_stage( matches_by_stage, stages: stage_names,
270
- name: "#{league_name} #{season.key}",
271
- lang: lang )
272
-
273
- ## note: might be empty!!! if no matches skip (do NOT write)
274
- write_buf( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt", buf ) unless buf.empty?
275
- end
276
- else ## no stages - assume "regular" plain vanilla season
277
-
278
- ## always (auto-) sort for now - why? why not?
279
- matches = matches.sort do |l,r|
280
- ## first by date (older first)
281
- ## next by matchday (lower first)
282
- res = l.date <=> r.date
283
- res = l.time <=> r.time if res == 0 && l.time && r.time
284
- res = l.round <=> r.round if res == 0 && rounds
285
- res
286
- end
287
-
288
- if split
289
- matches_i, matches_ii = split_matches( matches, season: season )
290
-
291
- out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
292
-
293
- SportDb::TxtMatchWriter.write( out_path, matches_i,
294
- name: "#{league_name} #{season.key}",
295
- lang: lang,
296
- rounds: rounds )
297
-
298
- out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
299
-
300
- SportDb::TxtMatchWriter.write( out_path, matches_ii,
301
- name: "#{league_name} #{season.key}",
302
- lang: lang,
303
- rounds: rounds )
304
- else
305
- out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
306
-
307
- SportDb::TxtMatchWriter.write( out_path, matches,
308
- name: "#{league_name} #{season.key}",
309
- lang: lang,
310
- rounds: rounds )
311
- end
312
- end
313
- end
314
-
315
-
316
- =begin
317
- def prepare_stages( stages )
318
- if stages.is_a?( Array )
319
- if stages[0].is_a?( Array ) ## is array of array
320
- ## convert inner array shortcuts to hash - stage input is same as stage output
321
- stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
322
- elsif stages[0].is_a?( Hash ) ## assume array of hashes
323
- stages ## pass through as is ("canonical") format!!!
324
- else ## assume array of strings
325
- ## assume single array shortcut; convert to hash - stage input is same as stage output name
326
- stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
327
- [stages] ## return hash wrapped in array
328
- end
329
- else ## assume (single) hash
330
- [stages] ## always return array of hashes
331
- end
332
- end
333
- =end
334
-
335
-
336
-
337
- def self.build_stage( matches_by_stage, stages:, name:, lang: )
338
- buf = String.new('')
339
-
340
- ## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
341
- stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
342
-
343
- stages.each_with_index do |(stage_in, stage_out),i|
344
- matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
345
-
346
- next if matches.nil? || matches.empty?
347
-
348
- ## (auto-)sort matches by
349
- ## 1) date
350
- matches = matches.sort do |l,r|
351
- result = l.date <=> r.date
352
- result
353
- end
354
-
355
- buf << "\n\n" if i > 0 && buf.size > 0
356
-
357
- buf << "= #{name}, #{stage_out}\n"
358
- buf << SportDb::TxtMatchWriter.build( matches, lang: lang )
359
-
360
- puts buf
361
- end
362
-
363
- buf
364
- end
365
-
366
-
367
- end # module Writer
1
+
2
+ module Writer
3
+
4
+
5
+ class Job ## todo/check: use a module (and NOT a class) - why? why not?
6
+ def self.write( datasets, source:,
7
+ normalize: false )
8
+ datasets.each_with_index do |dataset,i|
9
+ league = dataset[0]
10
+ seasons = dataset[1]
11
+
12
+ puts "writing [#{i+1}/#{datasets.size}] #{league}..."
13
+ seasons.each_with_index do |season,j|
14
+ puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
15
+ Writer.write( league: league,
16
+ season: season,
17
+ source: source,
18
+ normalize: normalize )
19
+ end
20
+ end
21
+ end
22
+ end # class Job
23
+
24
+
25
+
26
+
27
+ def self.split_matches( matches, season: )
28
+ matches_i = []
29
+ matches_ii = []
30
+ matches.each do |match|
31
+ date = Date.strptime( match.date, '%Y-%m-%d' )
32
+ if date.year == season.start_year
33
+ matches_i << match
34
+ elsif date.year == season.end_year
35
+ matches_ii << match
36
+ else
37
+ puts "!! ERROR: match date-out-of-range for season:"
38
+ pp season
39
+ pp date
40
+ pp match
41
+ exit 1
42
+ end
43
+ end
44
+ [matches_i, matches_ii]
45
+ end
46
+
47
+
48
+
49
+ ##
50
+ ## note: default - do NOT normalize any more
51
+
52
+ def self.write( league:, season:,
53
+ source:,
54
+ extra: nil,
55
+ split: false,
56
+ normalize: false,
57
+ rounds: true )
58
+ season = Season( season ) ## normalize season
59
+
60
+ league_info = LEAGUES[ league ]
61
+ if league_info.nil?
62
+ puts "!! ERROR - no league found for >#{league}<; sorry"
63
+ exit 1
64
+ end
65
+
66
+ ## check - if source is directory (assume if starting ./ or ../ or /)
67
+ ## check if directory exists
68
+ ## todo/fix - use Dir.exist? why? why not?
69
+ unless File.exist?( source )
70
+ puts "!! ERROR: source dir >#{source}< does not exist"
71
+ exit 1
72
+ end
73
+ source_info = { path: source } ## wrap in "plain" source dir in source info
74
+
75
+ source_path = source_info[:path]
76
+
77
+ ## format lets you specify directory layout
78
+ ## default = 1888-89
79
+ ## century = 1800s/1888-89
80
+ ## ...
81
+ season_path = season.to_path( (source_info[:format] || 'default').to_sym )
82
+ in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
83
+
84
+
85
+ matches = SportDb::CsvMatchParser.read( in_path )
86
+ puts "matches- #{matches.size} records"
87
+
88
+
89
+ ## check for goals
90
+ in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
91
+ if File.exist?( in_path_goals )
92
+ goals = SportDb::CsvGoalParser.read( in_path_goals )
93
+ puts "goals - #{goals.size} records"
94
+ pp goals[0]
95
+
96
+ puts
97
+ puts "merge goals:"
98
+ merge_goals( matches, goals )
99
+ end
100
+
101
+
102
+ pp matches[0]
103
+
104
+
105
+ if normalize
106
+ if normalize.is_a?(Proc)
107
+ matches = normalize.call( matches, league: league,
108
+ season: season )
109
+ else
110
+ puts "!! ERROR - normalize; expected proc got #{normalize.inspect}"
111
+ exit 1
112
+ end
113
+ end
114
+
115
+
116
+
117
+ league_name = league_info[ :name ] # e.g. Brasileiro Série A
118
+ basename = league_info[ :basename] #.e.g 1-seriea
119
+
120
+ league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
121
+ basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
122
+
123
+ ## note - repo_path moved!!!
124
+ ## repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
125
+ repo_path = SportDb::GitHubSync::REPOS[ league ]
126
+
127
+
128
+ season_path = String.new ## note: allow extra path for output!!!! e.g. archive/2000s etc.
129
+ season_path << "#{extra}/" if extra
130
+ season_path << season.path
131
+
132
+
133
+ ## check for stages
134
+ stages = league_info[ :stages ]
135
+ stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
136
+
137
+
138
+ if stages
139
+
140
+ ## split into four stages / two files
141
+ ## - Grunddurchgang
142
+ ## - Finaldurchgang - Meister
143
+ ## - Finaldurchgang - Qualifikation
144
+ ## - Europa League Play-off
145
+
146
+ matches_by_stage = matches.group_by { |match| match.stage }
147
+ pp matches_by_stage.keys
148
+
149
+
150
+ ## stages = prepare_stages( stages )
151
+ pp stages
152
+
153
+
154
+ romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
155
+
156
+ stages.each_with_index do |stage, i|
157
+
158
+ ## assume "extended" style / syntax
159
+ if stage.is_a?( Hash ) && stage.has_key?( :names )
160
+ stage_names = stage[ :names ]
161
+ stage_basename = stage[ :basename ]
162
+ ## add search/replace {basename} - why? why not?
163
+ stage_basename = stage_basename.sub( '{basename}', basename )
164
+ else ## assume simple style (array of strings OR hash mapping of string => string)
165
+ stage_names = stage
166
+ stage_basename = if stages.size == 1
167
+ "#{basename}" ## use basename as is 1:1
168
+ else
169
+ "#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
170
+ end
171
+ end
172
+
173
+ buf = build_stage( matches_by_stage, stages: stage_names,
174
+ name: "#{league_name} #{season.key}"
175
+ )
176
+
177
+ ## note: might be empty!!! if no matches skip (do NOT write)
178
+ write_text( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt",
179
+ buf ) unless buf.empty?
180
+ end
181
+ else ## no stages - assume "regular" plain vanilla season
182
+
183
+ ## always (auto-) sort for now - why? why not?
184
+ matches = matches.sort do |l,r|
185
+ ## first by date (older first)
186
+ ## next by matchday (lower first)
187
+ res = l.date <=> r.date
188
+ res = l.time <=> r.time if res == 0 && l.time && r.time
189
+ res = l.round <=> r.round if res == 0 && rounds
190
+ res
191
+ end
192
+
193
+ if split
194
+ matches_i, matches_ii = split_matches( matches, season: season )
195
+
196
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
197
+
198
+ SportDb::TxtMatchWriter.write( out_path, matches_i,
199
+ name: "#{league_name} #{season.key}",
200
+ rounds: rounds )
201
+
202
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
203
+
204
+ SportDb::TxtMatchWriter.write( out_path, matches_ii,
205
+ name: "#{league_name} #{season.key}",
206
+ rounds: rounds )
207
+ else
208
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
209
+
210
+ SportDb::TxtMatchWriter.write( out_path, matches,
211
+ name: "#{league_name} #{season.key}",
212
+ rounds: rounds )
213
+ end
214
+ end
215
+ end
216
+
217
+
218
+ =begin
219
+ def prepare_stages( stages )
220
+ if stages.is_a?( Array )
221
+ if stages[0].is_a?( Array ) ## is array of array
222
+ ## convert inner array shortcuts to hash - stage input is same as stage output
223
+ stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
224
+ elsif stages[0].is_a?( Hash ) ## assume array of hashes
225
+ stages ## pass through as is ("canonical") format!!!
226
+ else ## assume array of strings
227
+ ## assume single array shortcut; convert to hash - stage input is same as stage output name
228
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
229
+ [stages] ## return hash wrapped in array
230
+ end
231
+ else ## assume (single) hash
232
+ [stages] ## always return array of hashes
233
+ end
234
+ end
235
+ =end
236
+
237
+
238
+
239
+ def self.build_stage( matches_by_stage, stages:, name: )
240
+ buf = String.new
241
+
242
+ ## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
243
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
244
+
245
+ stages.each_with_index do |(stage_in, stage_out),i|
246
+ matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
247
+
248
+ next if matches.nil? || matches.empty?
249
+
250
+ ## (auto-)sort matches by
251
+ ## 1) date
252
+ matches = matches.sort do |l,r|
253
+ result = l.date <=> r.date
254
+ result
255
+ end
256
+
257
+ buf << "\n\n" if i > 0 && buf.size > 0
258
+
259
+ buf << "= #{name}, #{stage_out}\n"
260
+ buf << SportDb::TxtMatchWriter.build( matches )
261
+
262
+ puts buf
263
+ end
264
+
265
+ buf
266
+ end
267
+
268
+
269
+ end # module Writer