sportdb-writers 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,367 +1,269 @@
1
-
2
- module Writer
3
-
4
-
5
- SOURCES = {
6
- 'one' => { path: '../../stage/one' },
7
- 'one/o' => { path: '../apis/o' }, ## "o" debug version
8
-
9
- 'two' => { path: '../../stage/two' },
10
- 'two/o' => { path: '../cache.weltfussball/o' }, ## "o" debug version
11
- 'two/tmp' => { path: '../cache.weltfussball/tmp' }, ## "tmp" debug version
12
-
13
- 'leagues' => { path: '../../../footballcsv/cache.leagues' },
14
- 'leagues/o' => { path: '../cache.leagues/o' }, ## "o" debug version
15
-
16
- 'soccerdata' => { path: '../../../footballcsv/cache.soccerdata',
17
- format: 'century', # e.g. 1800s/1888-89
18
- }
19
- }
20
-
21
-
22
-
23
- def self.merge_goals( matches, goals )
24
- goals_by_match = goals.group_by { |rec| rec.match_id }
25
- puts "match goal reports - #{goals_by_match.size} records"
26
-
27
- ## lets group by date for easier lookup
28
- matches_by_date = matches.group_by { |rec| rec.date }
29
-
30
-
31
- ## note: "shadow / reuse" matches and goals vars for now in loop
32
- ## find better names to avoid confusion!!
33
- goals_by_match.each_with_index do |(match_id, goals),i|
34
- ## split match_id
35
- team_str, more_str = match_id.split( '|' )
36
- team1_str, team2_str = team_str.split( ' - ' )
37
-
38
- more_str = more_str.strip
39
- team1_str = team1_str.strip
40
- team2_str = team2_str.strip
41
-
42
- ## for now assume date in more (and not round or something else)
43
- date_str = more_str # e.g. in 2019-07-26 format
44
-
45
- puts ">#{team1_str}< - >#{team2_str}< | #{date_str}, #{goals.size} goals"
46
-
47
- ## try a join - find matching match
48
- matches = matches_by_date[ date_str ]
49
- if matches.nil?
50
- puts "!! ERROR: no match found for date >#{date_str}<"
51
- exit 1
52
- end
53
-
54
- found_matches = matches.select {|match| match.team1 == team1_str &&
55
- match.team2 == team2_str }
56
-
57
- if found_matches.size == 1
58
- match = found_matches[0]
59
- match.goals = SportDb::Import::Goal.build( goals )
60
- else
61
- puts "!!! ERROR: found #{found_matches.size} in #{matches.size} matches for date >#{date_str}<:"
62
- matches.each do |match|
63
- puts " >#{match.team1}< - >#{match.team2}<"
64
- end
65
- exit 1
66
- end
67
- end
68
- end
69
-
70
-
71
-
72
-
73
- ########
74
- # helpers
75
- # normalize team names
76
- #
77
- # todo/fix: for reuse move to sportdb-catalogs
78
- # use normalize - add to module/class ??
79
- ##
80
- ## todo/fix: check league - if is national_team or clubs or intl etc.!!!!
81
-
82
-
83
- def self.normalize( matches, league:, season: nil )
84
- league = SportDb::Import.catalog.leagues.find!( league )
85
- country = league.country
86
-
87
- ## todo/fix: cache name lookups - why? why not?
88
- matches.each do |match|
89
- team1 = SportDb::Import.catalog.clubs.find_by!( name: match.team1,
90
- country: country )
91
- team2 = SportDb::Import.catalog.clubs.find_by!( name: match.team2,
92
- country: country )
93
-
94
- if season
95
- team1_name = team1.name_by_season( season )
96
- team2_name = team2.name_by_season( season )
97
- else
98
- team1_name = team1.name
99
- team2_name = team2.name
100
- end
101
-
102
- puts "#{match.team1} => #{team1_name}" if match.team1 != team1_name
103
- puts "#{match.team2} => #{team2_name}" if match.team2 != team2_name
104
-
105
- match.update( team1: team1_name )
106
- match.update( team2: team2_name )
107
- end
108
- matches
109
- end
110
-
111
-
112
-
113
-
114
- def self.split_matches( matches, season: )
115
- matches_i = []
116
- matches_ii = []
117
- matches.each do |match|
118
- date = Date.strptime( match.date, '%Y-%m-%d' )
119
- if date.year == season.start_year
120
- matches_i << match
121
- elsif date.year == season.end_year
122
- matches_ii << match
123
- else
124
- puts "!! ERROR: match date-out-of-range for season:"
125
- pp season
126
- pp date
127
- pp match
128
- exit 1
129
- end
130
- end
131
- [matches_i, matches_ii]
132
- end
133
-
134
-
135
-
136
- ###
137
- # todo/check: use Writer.open() or FileWriter.open() or such - why? why not?
138
- def self.write_buf( path, buf ) ## write buffer helper
139
- ## for convenience - make sure parent folders/directories exist
140
- FileUtils.mkdir_p( File.dirname( path )) unless Dir.exist?( File.dirname( path ))
141
-
142
- File.open( path, 'w:utf-8' ) do |f|
143
- f.write( buf )
144
- end
145
- end
146
-
147
-
148
-
149
- def self.write( league, season, source:,
150
- extra: nil,
151
- split: false,
152
- normalize: true,
153
- rounds: true )
154
- season = Season( season ) ## normalize season
155
-
156
- league_info = LEAGUES[ league ]
157
- if league_info.nil?
158
- puts "!! ERROR - no league found for >#{league}<; sorry"
159
- exit 1
160
- end
161
-
162
- ## check - if source is directory (assume if starting ./ or ../ or /)
163
- if source.start_with?( './') ||
164
- source.start_with?( '../') ||
165
- source.start_with?( '/')
166
- ## check if directory exists
167
- unless File.exist?( source )
168
- puts "!! ERROR: source dir >#{source}< does not exist"
169
- exit 1
170
- end
171
- source_info = { path: source } ## wrap in "plain" source dir in source info
172
- else
173
- source_info = SOURCES[ source ]
174
- if source_info.nil?
175
- puts "!! ERROR - no source found for >#{source}<; sorry"
176
- exit 1
177
- end
178
- end
179
-
180
- source_path = source_info[:path]
181
-
182
- ## format lets you specify directory layout
183
- ## default = 1888-89
184
- ## century = 1800s/1888-89
185
- ## ...
186
- season_path = season.to_path( (source_info[:format] || 'default').to_sym )
187
- in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
188
-
189
-
190
- matches = SportDb::CsvMatchParser.read( in_path )
191
- puts "matches- #{matches.size} records"
192
-
193
-
194
- ## check for goals
195
- in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
196
- if File.exist?( in_path_goals )
197
- goals = SportDb::CsvGoalParser.read( in_path_goals )
198
- puts "goals - #{goals.size} records"
199
- pp goals[0]
200
-
201
- puts
202
- puts "merge goals:"
203
- merge_goals( matches, goals )
204
- end
205
-
206
-
207
- pp matches[0]
208
-
209
-
210
- matches = normalize( matches, league: league, season: season ) if normalize
211
-
212
-
213
-
214
- league_name = league_info[ :name ] # e.g. Brasileiro Série A
215
- basename = league_info[ :basename] #.e.g 1-seriea
216
-
217
- league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
218
- basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
219
-
220
- lang = league_info[ :lang ] || 'en_AU' ## default / fallback to en_AU (always use rounds NOT matchday for now)
221
- repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
222
-
223
-
224
- season_path = String.new('') ## note: allow extra path for output!!!! e.g. archive/2000s etc.
225
- season_path << "#{extra}/" if extra
226
- season_path << season.path
227
-
228
-
229
- ## check for stages
230
- stages = league_info[ :stages ]
231
- stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
232
-
233
-
234
- if stages
235
-
236
- ## split into four stages / two files
237
- ## - Grunddurchgang
238
- ## - Finaldurchgang - Meister
239
- ## - Finaldurchgang - Qualifikation
240
- ## - Europa League Play-off
241
-
242
- matches_by_stage = matches.group_by { |match| match.stage }
243
- pp matches_by_stage.keys
244
-
245
-
246
- ## stages = prepare_stages( stages )
247
- pp stages
248
-
249
-
250
- romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
251
-
252
- stages.each_with_index do |stage, i|
253
-
254
- ## assume "extended" style / syntax
255
- if stage.is_a?( Hash ) && stage.has_key?( :names )
256
- stage_names = stage[ :names ]
257
- stage_basename = stage[ :basename ]
258
- ## add search/replace {basename} - why? why not?
259
- stage_basename = stage_basename.sub( '{basename}', basename )
260
- else ## assume simple style (array of strings OR hash mapping of string => string)
261
- stage_names = stage
262
- stage_basename = if stages.size == 1
263
- "#{basename}" ## use basename as is 1:1
264
- else
265
- "#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
266
- end
267
- end
268
-
269
- buf = build_stage( matches_by_stage, stages: stage_names,
270
- name: "#{league_name} #{season.key}",
271
- lang: lang )
272
-
273
- ## note: might be empty!!! if no matches skip (do NOT write)
274
- write_buf( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt", buf ) unless buf.empty?
275
- end
276
- else ## no stages - assume "regular" plain vanilla season
277
-
278
- ## always (auto-) sort for now - why? why not?
279
- matches = matches.sort do |l,r|
280
- ## first by date (older first)
281
- ## next by matchday (lower first)
282
- res = l.date <=> r.date
283
- res = l.time <=> r.time if res == 0 && l.time && r.time
284
- res = l.round <=> r.round if res == 0 && rounds
285
- res
286
- end
287
-
288
- if split
289
- matches_i, matches_ii = split_matches( matches, season: season )
290
-
291
- out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
292
-
293
- SportDb::TxtMatchWriter.write( out_path, matches_i,
294
- name: "#{league_name} #{season.key}",
295
- lang: lang,
296
- rounds: rounds )
297
-
298
- out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
299
-
300
- SportDb::TxtMatchWriter.write( out_path, matches_ii,
301
- name: "#{league_name} #{season.key}",
302
- lang: lang,
303
- rounds: rounds )
304
- else
305
- out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
306
-
307
- SportDb::TxtMatchWriter.write( out_path, matches,
308
- name: "#{league_name} #{season.key}",
309
- lang: lang,
310
- rounds: rounds )
311
- end
312
- end
313
- end
314
-
315
-
316
- =begin
317
- def prepare_stages( stages )
318
- if stages.is_a?( Array )
319
- if stages[0].is_a?( Array ) ## is array of array
320
- ## convert inner array shortcuts to hash - stage input is same as stage output
321
- stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
322
- elsif stages[0].is_a?( Hash ) ## assume array of hashes
323
- stages ## pass through as is ("canonical") format!!!
324
- else ## assume array of strings
325
- ## assume single array shortcut; convert to hash - stage input is same as stage output name
326
- stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
327
- [stages] ## return hash wrapped in array
328
- end
329
- else ## assume (single) hash
330
- [stages] ## always return array of hashes
331
- end
332
- end
333
- =end
334
-
335
-
336
-
337
- def self.build_stage( matches_by_stage, stages:, name:, lang: )
338
- buf = String.new('')
339
-
340
- ## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
341
- stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
342
-
343
- stages.each_with_index do |(stage_in, stage_out),i|
344
- matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
345
-
346
- next if matches.nil? || matches.empty?
347
-
348
- ## (auto-)sort matches by
349
- ## 1) date
350
- matches = matches.sort do |l,r|
351
- result = l.date <=> r.date
352
- result
353
- end
354
-
355
- buf << "\n\n" if i > 0 && buf.size > 0
356
-
357
- buf << "= #{name}, #{stage_out}\n"
358
- buf << SportDb::TxtMatchWriter.build( matches, lang: lang )
359
-
360
- puts buf
361
- end
362
-
363
- buf
364
- end
365
-
366
-
367
- end # module Writer
1
+
2
+ module Writer
3
+
4
+
5
+ class Job ## todo/check: use a module (and NOT a class) - why? why not?
6
+ def self.write( datasets, source:,
7
+ normalize: false )
8
+ datasets.each_with_index do |dataset,i|
9
+ league = dataset[0]
10
+ seasons = dataset[1]
11
+
12
+ puts "writing [#{i+1}/#{datasets.size}] #{league}..."
13
+ seasons.each_with_index do |season,j|
14
+ puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
15
+ Writer.write( league: league,
16
+ season: season,
17
+ source: source,
18
+ normalize: normalize )
19
+ end
20
+ end
21
+ end
22
+ end # class Job
23
+
24
+
25
+
26
+
27
+ def self.split_matches( matches, season: )
28
+ matches_i = []
29
+ matches_ii = []
30
+ matches.each do |match|
31
+ date = Date.strptime( match.date, '%Y-%m-%d' )
32
+ if date.year == season.start_year
33
+ matches_i << match
34
+ elsif date.year == season.end_year
35
+ matches_ii << match
36
+ else
37
+ puts "!! ERROR: match date-out-of-range for season:"
38
+ pp season
39
+ pp date
40
+ pp match
41
+ exit 1
42
+ end
43
+ end
44
+ [matches_i, matches_ii]
45
+ end
46
+
47
+
48
+
49
+ ##
50
+ ## note: default - do NOT normalize any more
51
+
52
+ def self.write( league:, season:,
53
+ source:,
54
+ extra: nil,
55
+ split: false,
56
+ normalize: false,
57
+ rounds: true )
58
+ season = Season( season ) ## normalize season
59
+
60
+ league_info = LEAGUES[ league ]
61
+ if league_info.nil?
62
+ puts "!! ERROR - no league found for >#{league}<; sorry"
63
+ exit 1
64
+ end
65
+
66
+ ## check - if source is directory (assume if starting ./ or ../ or /)
67
+ ## check if directory exists
68
+ ## todo/fix - use Dir.exist? why? why not?
69
+ unless File.exist?( source )
70
+ puts "!! ERROR: source dir >#{source}< does not exist"
71
+ exit 1
72
+ end
73
+ source_info = { path: source } ## wrap in "plain" source dir in source info
74
+
75
+ source_path = source_info[:path]
76
+
77
+ ## format lets you specify directory layout
78
+ ## default = 1888-89
79
+ ## century = 1800s/1888-89
80
+ ## ...
81
+ season_path = season.to_path( (source_info[:format] || 'default').to_sym )
82
+ in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
83
+
84
+
85
+ matches = SportDb::CsvMatchParser.read( in_path )
86
+ puts "matches- #{matches.size} records"
87
+
88
+
89
+ ## check for goals
90
+ in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
91
+ if File.exist?( in_path_goals )
92
+ goals = SportDb::CsvGoalParser.read( in_path_goals )
93
+ puts "goals - #{goals.size} records"
94
+ pp goals[0]
95
+
96
+ puts
97
+ puts "merge goals:"
98
+ merge_goals( matches, goals )
99
+ end
100
+
101
+
102
+ pp matches[0]
103
+
104
+
105
+ if normalize
106
+ if normalize.is_a?(Proc)
107
+ matches = normalize.call( matches, league: league,
108
+ season: season )
109
+ else
110
+ puts "!! ERROR - normalize; expected proc got #{normalize.inspect}"
111
+ exit 1
112
+ end
113
+ end
114
+
115
+
116
+
117
+ league_name = league_info[ :name ] # e.g. Brasileiro Série A
118
+ basename = league_info[ :basename] #.e.g 1-seriea
119
+
120
+ league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
121
+ basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
122
+
123
+ ## note - repo_path moved!!!
124
+ ## repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
125
+ repo_path = SportDb::GitHubSync::REPOS[ league ]
126
+
127
+
128
+ season_path = String.new ## note: allow extra path for output!!!! e.g. archive/2000s etc.
129
+ season_path << "#{extra}/" if extra
130
+ season_path << season.path
131
+
132
+
133
+ ## check for stages
134
+ stages = league_info[ :stages ]
135
+ stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
136
+
137
+
138
+ if stages
139
+
140
+ ## split into four stages / two files
141
+ ## - Grunddurchgang
142
+ ## - Finaldurchgang - Meister
143
+ ## - Finaldurchgang - Qualifikation
144
+ ## - Europa League Play-off
145
+
146
+ matches_by_stage = matches.group_by { |match| match.stage }
147
+ pp matches_by_stage.keys
148
+
149
+
150
+ ## stages = prepare_stages( stages )
151
+ pp stages
152
+
153
+
154
+ romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
155
+
156
+ stages.each_with_index do |stage, i|
157
+
158
+ ## assume "extended" style / syntax
159
+ if stage.is_a?( Hash ) && stage.has_key?( :names )
160
+ stage_names = stage[ :names ]
161
+ stage_basename = stage[ :basename ]
162
+ ## add search/replace {basename} - why? why not?
163
+ stage_basename = stage_basename.sub( '{basename}', basename )
164
+ else ## assume simple style (array of strings OR hash mapping of string => string)
165
+ stage_names = stage
166
+ stage_basename = if stages.size == 1
167
+ "#{basename}" ## use basename as is 1:1
168
+ else
169
+ "#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
170
+ end
171
+ end
172
+
173
+ buf = build_stage( matches_by_stage, stages: stage_names,
174
+ name: "#{league_name} #{season.key}"
175
+ )
176
+
177
+ ## note: might be empty!!! if no matches skip (do NOT write)
178
+ write_text( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt",
179
+ buf ) unless buf.empty?
180
+ end
181
+ else ## no stages - assume "regular" plain vanilla season
182
+
183
+ ## always (auto-) sort for now - why? why not?
184
+ matches = matches.sort do |l,r|
185
+ ## first by date (older first)
186
+ ## next by matchday (lower first)
187
+ res = l.date <=> r.date
188
+ res = l.time <=> r.time if res == 0 && l.time && r.time
189
+ res = l.round <=> r.round if res == 0 && rounds
190
+ res
191
+ end
192
+
193
+ if split
194
+ matches_i, matches_ii = split_matches( matches, season: season )
195
+
196
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
197
+
198
+ SportDb::TxtMatchWriter.write( out_path, matches_i,
199
+ name: "#{league_name} #{season.key}",
200
+ rounds: rounds )
201
+
202
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
203
+
204
+ SportDb::TxtMatchWriter.write( out_path, matches_ii,
205
+ name: "#{league_name} #{season.key}",
206
+ rounds: rounds )
207
+ else
208
+ out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
209
+
210
+ SportDb::TxtMatchWriter.write( out_path, matches,
211
+ name: "#{league_name} #{season.key}",
212
+ rounds: rounds )
213
+ end
214
+ end
215
+ end
216
+
217
+
218
+ =begin
219
+ def prepare_stages( stages )
220
+ if stages.is_a?( Array )
221
+ if stages[0].is_a?( Array ) ## is array of array
222
+ ## convert inner array shortcuts to hash - stage input is same as stage output
223
+ stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
224
+ elsif stages[0].is_a?( Hash ) ## assume array of hashes
225
+ stages ## pass through as is ("canonical") format!!!
226
+ else ## assume array of strings
227
+ ## assume single array shortcut; convert to hash - stage input is same as stage output name
228
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
229
+ [stages] ## return hash wrapped in array
230
+ end
231
+ else ## assume (single) hash
232
+ [stages] ## always return array of hashes
233
+ end
234
+ end
235
+ =end
236
+
237
+
238
+
239
+ def self.build_stage( matches_by_stage, stages:, name: )
240
+ buf = String.new
241
+
242
+ ## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
243
+ stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
244
+
245
+ stages.each_with_index do |(stage_in, stage_out),i|
246
+ matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
247
+
248
+ next if matches.nil? || matches.empty?
249
+
250
+ ## (auto-)sort matches by
251
+ ## 1) date
252
+ matches = matches.sort do |l,r|
253
+ result = l.date <=> r.date
254
+ result
255
+ end
256
+
257
+ buf << "\n\n" if i > 0 && buf.size > 0
258
+
259
+ buf << "= #{name}, #{stage_out}\n"
260
+ buf << SportDb::TxtMatchWriter.build( matches )
261
+
262
+ puts buf
263
+ end
264
+
265
+ buf
266
+ end
267
+
268
+
269
+ end # module Writer