sportdb-writers 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -4
- data/Manifest.txt +2 -4
- data/README.md +26 -28
- data/Rakefile +32 -30
- data/lib/sportdb/leagues/leagues_at.rb +35 -39
- data/lib/sportdb/leagues/leagues_de.rb +21 -29
- data/lib/sportdb/leagues/leagues_eng.rb +58 -70
- data/lib/sportdb/leagues/leagues_es.rb +15 -19
- data/lib/sportdb/leagues/leagues_europe.rb +178 -213
- data/lib/sportdb/leagues/leagues_it.rb +16 -20
- data/lib/sportdb/leagues/leagues_mx.rb +23 -25
- data/lib/sportdb/leagues/leagues_south_america.rb +17 -21
- data/lib/sportdb/leagues/leagues_world.rb +14 -16
- data/lib/sportdb/writers/github.rb +195 -0
- data/lib/sportdb/writers/goals.rb +57 -0
- data/lib/sportdb/writers/txt_writer.rb +218 -407
- data/lib/sportdb/writers/version.rb +24 -24
- data/lib/sportdb/writers/write.rb +269 -367
- data/lib/sportdb/writers.rb +97 -31
- metadata +42 -17
- data/lib/sportdb/writers/config.rb +0 -18
- data/test/helper.rb +0 -19
- data/test/test_txt_writer.rb +0 -124
- data/test/test_version.rb +0 -16
@@ -1,367 +1,269 @@
|
|
1
|
-
|
2
|
-
module Writer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
##
|
80
|
-
##
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
##
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
##
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
name: "#{league_name} #{season.key}",
|
271
|
-
lang: lang )
|
272
|
-
|
273
|
-
## note: might be empty!!! if no matches skip (do NOT write)
|
274
|
-
write_buf( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt", buf ) unless buf.empty?
|
275
|
-
end
|
276
|
-
else ## no stages - assume "regular" plain vanilla season
|
277
|
-
|
278
|
-
## always (auto-) sort for now - why? why not?
|
279
|
-
matches = matches.sort do |l,r|
|
280
|
-
## first by date (older first)
|
281
|
-
## next by matchday (lower first)
|
282
|
-
res = l.date <=> r.date
|
283
|
-
res = l.time <=> r.time if res == 0 && l.time && r.time
|
284
|
-
res = l.round <=> r.round if res == 0 && rounds
|
285
|
-
res
|
286
|
-
end
|
287
|
-
|
288
|
-
if split
|
289
|
-
matches_i, matches_ii = split_matches( matches, season: season )
|
290
|
-
|
291
|
-
out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
|
292
|
-
|
293
|
-
SportDb::TxtMatchWriter.write( out_path, matches_i,
|
294
|
-
name: "#{league_name} #{season.key}",
|
295
|
-
lang: lang,
|
296
|
-
rounds: rounds )
|
297
|
-
|
298
|
-
out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
|
299
|
-
|
300
|
-
SportDb::TxtMatchWriter.write( out_path, matches_ii,
|
301
|
-
name: "#{league_name} #{season.key}",
|
302
|
-
lang: lang,
|
303
|
-
rounds: rounds )
|
304
|
-
else
|
305
|
-
out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
|
306
|
-
|
307
|
-
SportDb::TxtMatchWriter.write( out_path, matches,
|
308
|
-
name: "#{league_name} #{season.key}",
|
309
|
-
lang: lang,
|
310
|
-
rounds: rounds )
|
311
|
-
end
|
312
|
-
end
|
313
|
-
end
|
314
|
-
|
315
|
-
|
316
|
-
=begin
|
317
|
-
def prepare_stages( stages )
|
318
|
-
if stages.is_a?( Array )
|
319
|
-
if stages[0].is_a?( Array ) ## is array of array
|
320
|
-
## convert inner array shortcuts to hash - stage input is same as stage output
|
321
|
-
stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
|
322
|
-
elsif stages[0].is_a?( Hash ) ## assume array of hashes
|
323
|
-
stages ## pass through as is ("canonical") format!!!
|
324
|
-
else ## assume array of strings
|
325
|
-
## assume single array shortcut; convert to hash - stage input is same as stage output name
|
326
|
-
stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
|
327
|
-
[stages] ## return hash wrapped in array
|
328
|
-
end
|
329
|
-
else ## assume (single) hash
|
330
|
-
[stages] ## always return array of hashes
|
331
|
-
end
|
332
|
-
end
|
333
|
-
=end
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
def self.build_stage( matches_by_stage, stages:, name:, lang: )
|
338
|
-
buf = String.new('')
|
339
|
-
|
340
|
-
## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
|
341
|
-
stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
|
342
|
-
|
343
|
-
stages.each_with_index do |(stage_in, stage_out),i|
|
344
|
-
matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
|
345
|
-
|
346
|
-
next if matches.nil? || matches.empty?
|
347
|
-
|
348
|
-
## (auto-)sort matches by
|
349
|
-
## 1) date
|
350
|
-
matches = matches.sort do |l,r|
|
351
|
-
result = l.date <=> r.date
|
352
|
-
result
|
353
|
-
end
|
354
|
-
|
355
|
-
buf << "\n\n" if i > 0 && buf.size > 0
|
356
|
-
|
357
|
-
buf << "= #{name}, #{stage_out}\n"
|
358
|
-
buf << SportDb::TxtMatchWriter.build( matches, lang: lang )
|
359
|
-
|
360
|
-
puts buf
|
361
|
-
end
|
362
|
-
|
363
|
-
buf
|
364
|
-
end
|
365
|
-
|
366
|
-
|
367
|
-
end # module Writer
|
1
|
+
|
2
|
+
module Writer
|
3
|
+
|
4
|
+
|
5
|
+
class Job ## todo/check: use a module (and NOT a class) - why? why not?
|
6
|
+
def self.write( datasets, source:,
|
7
|
+
normalize: false )
|
8
|
+
datasets.each_with_index do |dataset,i|
|
9
|
+
league = dataset[0]
|
10
|
+
seasons = dataset[1]
|
11
|
+
|
12
|
+
puts "writing [#{i+1}/#{datasets.size}] #{league}..."
|
13
|
+
seasons.each_with_index do |season,j|
|
14
|
+
puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
|
15
|
+
Writer.write( league: league,
|
16
|
+
season: season,
|
17
|
+
source: source,
|
18
|
+
normalize: normalize )
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end # class Job
|
23
|
+
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def self.split_matches( matches, season: )
|
28
|
+
matches_i = []
|
29
|
+
matches_ii = []
|
30
|
+
matches.each do |match|
|
31
|
+
date = Date.strptime( match.date, '%Y-%m-%d' )
|
32
|
+
if date.year == season.start_year
|
33
|
+
matches_i << match
|
34
|
+
elsif date.year == season.end_year
|
35
|
+
matches_ii << match
|
36
|
+
else
|
37
|
+
puts "!! ERROR: match date-out-of-range for season:"
|
38
|
+
pp season
|
39
|
+
pp date
|
40
|
+
pp match
|
41
|
+
exit 1
|
42
|
+
end
|
43
|
+
end
|
44
|
+
[matches_i, matches_ii]
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
##
|
50
|
+
## note: default - do NOT normalize any more
|
51
|
+
|
52
|
+
def self.write( league:, season:,
|
53
|
+
source:,
|
54
|
+
extra: nil,
|
55
|
+
split: false,
|
56
|
+
normalize: false,
|
57
|
+
rounds: true )
|
58
|
+
season = Season( season ) ## normalize season
|
59
|
+
|
60
|
+
league_info = LEAGUES[ league ]
|
61
|
+
if league_info.nil?
|
62
|
+
puts "!! ERROR - no league found for >#{league}<; sorry"
|
63
|
+
exit 1
|
64
|
+
end
|
65
|
+
|
66
|
+
## check - if source is directory (assume if starting ./ or ../ or /)
|
67
|
+
## check if directory exists
|
68
|
+
## todo/fix - use Dir.exist? why? why not?
|
69
|
+
unless File.exist?( source )
|
70
|
+
puts "!! ERROR: source dir >#{source}< does not exist"
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
source_info = { path: source } ## wrap in "plain" source dir in source info
|
74
|
+
|
75
|
+
source_path = source_info[:path]
|
76
|
+
|
77
|
+
## format lets you specify directory layout
|
78
|
+
## default = 1888-89
|
79
|
+
## century = 1800s/1888-89
|
80
|
+
## ...
|
81
|
+
season_path = season.to_path( (source_info[:format] || 'default').to_sym )
|
82
|
+
in_path = "#{source_path}/#{season_path}/#{league}.csv" # e.g. ../stage/one/2020/br.1.csv
|
83
|
+
|
84
|
+
|
85
|
+
matches = SportDb::CsvMatchParser.read( in_path )
|
86
|
+
puts "matches- #{matches.size} records"
|
87
|
+
|
88
|
+
|
89
|
+
## check for goals
|
90
|
+
in_path_goals = "#{source_path}/#{season_path}/#{league}~goals.csv" # e.g. ../stage/one/2020/br.1~goals.csv
|
91
|
+
if File.exist?( in_path_goals )
|
92
|
+
goals = SportDb::CsvGoalParser.read( in_path_goals )
|
93
|
+
puts "goals - #{goals.size} records"
|
94
|
+
pp goals[0]
|
95
|
+
|
96
|
+
puts
|
97
|
+
puts "merge goals:"
|
98
|
+
merge_goals( matches, goals )
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
pp matches[0]
|
103
|
+
|
104
|
+
|
105
|
+
if normalize
|
106
|
+
if normalize.is_a?(Proc)
|
107
|
+
matches = normalize.call( matches, league: league,
|
108
|
+
season: season )
|
109
|
+
else
|
110
|
+
puts "!! ERROR - normalize; expected proc got #{normalize.inspect}"
|
111
|
+
exit 1
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
league_name = league_info[ :name ] # e.g. Brasileiro Série A
|
118
|
+
basename = league_info[ :basename] #.e.g 1-seriea
|
119
|
+
|
120
|
+
league_name = league_name.call( season ) if league_name.is_a?( Proc ) ## is proc/func - name depends on season
|
121
|
+
basename = basename.call( season ) if basename.is_a?( Proc ) ## is proc/func - name depends on season
|
122
|
+
|
123
|
+
## note - repo_path moved!!!
|
124
|
+
## repo_path = league_info[ :path ] # e.g. brazil or world/europe/portugal etc.
|
125
|
+
repo_path = SportDb::GitHubSync::REPOS[ league ]
|
126
|
+
|
127
|
+
|
128
|
+
season_path = String.new ## note: allow extra path for output!!!! e.g. archive/2000s etc.
|
129
|
+
season_path << "#{extra}/" if extra
|
130
|
+
season_path << season.path
|
131
|
+
|
132
|
+
|
133
|
+
## check for stages
|
134
|
+
stages = league_info[ :stages ]
|
135
|
+
stages = stages.call( season ) if stages.is_a?( Proc ) ## is proc/func - stages depends on season
|
136
|
+
|
137
|
+
|
138
|
+
if stages
|
139
|
+
|
140
|
+
## split into four stages / two files
|
141
|
+
## - Grunddurchgang
|
142
|
+
## - Finaldurchgang - Meister
|
143
|
+
## - Finaldurchgang - Qualifikation
|
144
|
+
## - Europa League Play-off
|
145
|
+
|
146
|
+
matches_by_stage = matches.group_by { |match| match.stage }
|
147
|
+
pp matches_by_stage.keys
|
148
|
+
|
149
|
+
|
150
|
+
## stages = prepare_stages( stages )
|
151
|
+
pp stages
|
152
|
+
|
153
|
+
|
154
|
+
romans = %w[I II III IIII V VI VII VIII VIIII X XI] ## note: use "simple" romans without -1 rule e.g. iv or ix
|
155
|
+
|
156
|
+
stages.each_with_index do |stage, i|
|
157
|
+
|
158
|
+
## assume "extended" style / syntax
|
159
|
+
if stage.is_a?( Hash ) && stage.has_key?( :names )
|
160
|
+
stage_names = stage[ :names ]
|
161
|
+
stage_basename = stage[ :basename ]
|
162
|
+
## add search/replace {basename} - why? why not?
|
163
|
+
stage_basename = stage_basename.sub( '{basename}', basename )
|
164
|
+
else ## assume simple style (array of strings OR hash mapping of string => string)
|
165
|
+
stage_names = stage
|
166
|
+
stage_basename = if stages.size == 1
|
167
|
+
"#{basename}" ## use basename as is 1:1
|
168
|
+
else
|
169
|
+
"#{basename}-#{romans[i].downcase}" ## append i,ii,etc.
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
buf = build_stage( matches_by_stage, stages: stage_names,
|
174
|
+
name: "#{league_name} #{season.key}"
|
175
|
+
)
|
176
|
+
|
177
|
+
## note: might be empty!!! if no matches skip (do NOT write)
|
178
|
+
write_text( "#{config.out_dir}/#{repo_path}/#{season_path}/#{stage_basename}.txt",
|
179
|
+
buf ) unless buf.empty?
|
180
|
+
end
|
181
|
+
else ## no stages - assume "regular" plain vanilla season
|
182
|
+
|
183
|
+
## always (auto-) sort for now - why? why not?
|
184
|
+
matches = matches.sort do |l,r|
|
185
|
+
## first by date (older first)
|
186
|
+
## next by matchday (lower first)
|
187
|
+
res = l.date <=> r.date
|
188
|
+
res = l.time <=> r.time if res == 0 && l.time && r.time
|
189
|
+
res = l.round <=> r.round if res == 0 && rounds
|
190
|
+
res
|
191
|
+
end
|
192
|
+
|
193
|
+
if split
|
194
|
+
matches_i, matches_ii = split_matches( matches, season: season )
|
195
|
+
|
196
|
+
out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-i.txt"
|
197
|
+
|
198
|
+
SportDb::TxtMatchWriter.write( out_path, matches_i,
|
199
|
+
name: "#{league_name} #{season.key}",
|
200
|
+
rounds: rounds )
|
201
|
+
|
202
|
+
out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}-ii.txt"
|
203
|
+
|
204
|
+
SportDb::TxtMatchWriter.write( out_path, matches_ii,
|
205
|
+
name: "#{league_name} #{season.key}",
|
206
|
+
rounds: rounds )
|
207
|
+
else
|
208
|
+
out_path = "#{config.out_dir}/#{repo_path}/#{season_path}/#{basename}.txt"
|
209
|
+
|
210
|
+
SportDb::TxtMatchWriter.write( out_path, matches,
|
211
|
+
name: "#{league_name} #{season.key}",
|
212
|
+
rounds: rounds )
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
|
218
|
+
=begin
|
219
|
+
def prepare_stages( stages )
|
220
|
+
if stages.is_a?( Array )
|
221
|
+
if stages[0].is_a?( Array ) ## is array of array
|
222
|
+
## convert inner array shortcuts to hash - stage input is same as stage output
|
223
|
+
stages.map {|ary| ary.reduce({}) {|h,stage| h[stage]=stage; h }}
|
224
|
+
elsif stages[0].is_a?( Hash ) ## assume array of hashes
|
225
|
+
stages ## pass through as is ("canonical") format!!!
|
226
|
+
else ## assume array of strings
|
227
|
+
## assume single array shortcut; convert to hash - stage input is same as stage output name
|
228
|
+
stages = stages.reduce({}) {|h,stage| h[stage]=stage; h }
|
229
|
+
[stages] ## return hash wrapped in array
|
230
|
+
end
|
231
|
+
else ## assume (single) hash
|
232
|
+
[stages] ## always return array of hashes
|
233
|
+
end
|
234
|
+
end
|
235
|
+
=end
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
def self.build_stage( matches_by_stage, stages:, name: )
|
240
|
+
buf = String.new
|
241
|
+
|
242
|
+
## note: allow convenience shortcut - assume stage_in is stage_out - auto-convert
|
243
|
+
stages = stages.reduce({}) {|h,stage| h[stage]=stage; h } if stages.is_a?( Array )
|
244
|
+
|
245
|
+
stages.each_with_index do |(stage_in, stage_out),i|
|
246
|
+
matches = matches_by_stage[ stage_in ] ## todo/fix: report error if no matches found!!!
|
247
|
+
|
248
|
+
next if matches.nil? || matches.empty?
|
249
|
+
|
250
|
+
## (auto-)sort matches by
|
251
|
+
## 1) date
|
252
|
+
matches = matches.sort do |l,r|
|
253
|
+
result = l.date <=> r.date
|
254
|
+
result
|
255
|
+
end
|
256
|
+
|
257
|
+
buf << "\n\n" if i > 0 && buf.size > 0
|
258
|
+
|
259
|
+
buf << "= #{name}, #{stage_out}\n"
|
260
|
+
buf << SportDb::TxtMatchWriter.build( matches )
|
261
|
+
|
262
|
+
puts buf
|
263
|
+
end
|
264
|
+
|
265
|
+
buf
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
end # module Writer
|