sportdb-formats 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +4 -0
- data/Rakefile +2 -2
- data/lib/sportdb/formats.rb +17 -5
- data/lib/sportdb/formats/datafile_package.rb +10 -7
- data/lib/sportdb/formats/match/match_parser_csv.rb +314 -0
- data/lib/sportdb/formats/package.rb +137 -7
- data/lib/sportdb/formats/score/score_formats.rb +41 -1
- data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
- data/lib/sportdb/formats/version.rb +3 -1
- data/test/helper.rb +1 -1
- data/test/test_club_reader_props.rb +2 -2
- data/test/test_csv_match_parser.rb +114 -0
- data/test/test_csv_match_parser_utils.rb +20 -0
- data/test/test_csv_reader.rb +5 -5
- data/test/test_datafile.rb +0 -32
- data/test/test_datafile_package.rb +46 -0
- data/test/test_package.rb +60 -28
- data/test/test_package_match.rb +27 -3
- data/test/test_scores.rb +58 -49
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a27c364d2003ece8da886c892d34d595ffcffcc
|
4
|
+
data.tar.gz: 81530dfec5decf5d9476a3f2be90bcbeb0195824
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a2aabb65968939d55f2000c64e4ac5df4cbedfe6b3786fd12ff86d26bd5e6af12c32ea129b59a7ddf30736afa6f093e0e4f2e97f2c6f0b23bffd6901a83ca91
|
7
|
+
data.tar.gz: 63f4ca42ebb537173334a931a02331b8cbdc27d9e953052c8126d9e683e54a3103eb496103fea400283c6b0b08fa7c6e738466f215e26f9567a3b822a80c172d
|
data/Manifest.txt
CHANGED
@@ -17,6 +17,7 @@ lib/sportdb/formats/match/mapper.rb
|
|
17
17
|
lib/sportdb/formats/match/mapper_teams.rb
|
18
18
|
lib/sportdb/formats/match/match_parser.rb
|
19
19
|
lib/sportdb/formats/match/match_parser_auto_conf.rb
|
20
|
+
lib/sportdb/formats/match/match_parser_csv.rb
|
20
21
|
lib/sportdb/formats/name_helper.rb
|
21
22
|
lib/sportdb/formats/outline_reader.rb
|
22
23
|
lib/sportdb/formats/package.rb
|
@@ -49,8 +50,11 @@ test/test_clubs.rb
|
|
49
50
|
test/test_conf.rb
|
50
51
|
test/test_country_index.rb
|
51
52
|
test/test_country_reader.rb
|
53
|
+
test/test_csv_match_parser.rb
|
54
|
+
test/test_csv_match_parser_utils.rb
|
52
55
|
test/test_csv_reader.rb
|
53
56
|
test/test_datafile.rb
|
57
|
+
test/test_datafile_package.rb
|
54
58
|
test/test_goals.rb
|
55
59
|
test/test_league_index.rb
|
56
60
|
test/test_league_outline_reader.rb
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/formats/version.rb'
|
|
3
3
|
|
4
4
|
Hoe.spec 'sportdb-formats' do
|
5
5
|
|
6
|
-
self.version = SportDb::Formats::VERSION
|
6
|
+
self.version = SportDb::Module::Formats::VERSION
|
7
7
|
|
8
8
|
self.summary = "sportdb-formats - sport.db format and text utilities"
|
9
9
|
self.description = summary
|
@@ -21,7 +21,7 @@ Hoe.spec 'sportdb-formats' do
|
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
23
|
['alphabets', '>= 1.0.0'],
|
24
|
-
['date-formats', '>= 1.0.
|
24
|
+
['date-formats', '>= 1.0.1'],
|
25
25
|
['csvreader', '>= 1.2.4'],
|
26
26
|
['sportdb-langs', '>= 0.1.0'],
|
27
27
|
|
data/lib/sportdb/formats.rb
CHANGED
@@ -10,15 +10,26 @@ require 'zip' ## todo/check: if zip is alreay included in a required module
|
|
10
10
|
|
11
11
|
|
12
12
|
|
13
|
-
def read_csv( path
|
14
|
-
|
13
|
+
def read_csv( path, sep: nil,
|
14
|
+
symbolize_names: nil )
|
15
|
+
opts = {}
|
16
|
+
opts[:sep] = sep if sep
|
17
|
+
opts[:header_converters] = :symbol if symbolize_names
|
18
|
+
|
19
|
+
CsvHash.read( path, **opts )
|
15
20
|
end
|
16
21
|
|
17
|
-
def parse_csv( txt
|
18
|
-
|
22
|
+
def parse_csv( txt, sep: nil,
|
23
|
+
symbolize_names: nil )
|
24
|
+
opts = {}
|
25
|
+
opts[:sep] = sep if sep
|
26
|
+
opts[:header_converters] = :symbol if symbolize_names
|
27
|
+
|
28
|
+
CsvHash.parse( txt, **opts )
|
19
29
|
end
|
20
30
|
|
21
31
|
|
32
|
+
|
22
33
|
## more sportdb libs/gems
|
23
34
|
require 'sportdb/langs'
|
24
35
|
|
@@ -68,6 +79,7 @@ require 'sportdb/formats/match/match_parser'
|
|
68
79
|
require 'sportdb/formats/match/match_parser_auto_conf'
|
69
80
|
require 'sportdb/formats/match/conf_parser'
|
70
81
|
|
82
|
+
require 'sportdb/formats/match/match_parser_csv'
|
71
83
|
|
72
84
|
require 'sportdb/formats/country/country_reader'
|
73
85
|
require 'sportdb/formats/country/country_index'
|
@@ -147,4 +159,4 @@ end # module SportDb
|
|
147
159
|
|
148
160
|
|
149
161
|
|
150
|
-
puts SportDb::Formats.banner # say hello
|
162
|
+
puts SportDb::Module::Formats.banner # say hello
|
@@ -30,7 +30,7 @@ class Entry
|
|
30
30
|
@name = path[ pack.path.length+1..-1 ]
|
31
31
|
end
|
32
32
|
def name() @name; end
|
33
|
-
def read() File.open( @path, 'r:utf-8' ).read; end
|
33
|
+
def read() File.open( @path, 'r:utf-8' ) {|f| f.read }; end
|
34
34
|
end # class DirPackage::Entry
|
35
35
|
|
36
36
|
|
@@ -44,17 +44,20 @@ end # class DirPackage::Entry
|
|
44
44
|
@name = basename
|
45
45
|
end
|
46
46
|
|
47
|
-
|
47
|
+
## todo/check: change pattern: to re: - why? why not?
|
48
|
+
def each( pattern: ) ## todo/check: rename to glob or something - why? why not?
|
48
49
|
## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
|
49
50
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
|
-
|
51
|
-
|
52
|
-
if
|
53
|
-
|
51
|
+
## todo/check/fix: is there a better (simpler) glob pattern? yes? no?
|
52
|
+
Dir.glob( "#{@path}/**/{*,.*}.*" ).each do |path|
|
53
|
+
if File.directory?( path )
|
54
|
+
## always skip directories / folders
|
55
|
+
elsif EXCLUDE_RE.match( path )
|
56
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
54
57
|
elsif pattern.match( path )
|
55
58
|
yield( Entry.new( self, path ))
|
56
59
|
else
|
57
|
-
|
60
|
+
## puts " skipping >#{path}<"
|
58
61
|
end
|
59
62
|
end
|
60
63
|
end
|
@@ -0,0 +1,314 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
class CsvMatchParser
|
6
|
+
|
7
|
+
#############
|
8
|
+
# helpers
|
9
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
10
|
+
|
11
|
+
## check if headers incl. season if yes,has priority over col mapping
|
12
|
+
## e.g. no need to specify twice (if using headers)
|
13
|
+
col = headers[:season] if headers && headers[:season]
|
14
|
+
|
15
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
16
|
+
|
17
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
18
|
+
## use read_csv with block to switch to foreach!!!!
|
19
|
+
rows = read_csv( path, sep: sep )
|
20
|
+
|
21
|
+
rows.each_with_index do |row,i|
|
22
|
+
puts "[#{i}] " + row.inspect if i < 2
|
23
|
+
|
24
|
+
season = row[ col ] ## column name defaults to 'Season'
|
25
|
+
seasons[ season ] += 1
|
26
|
+
end
|
27
|
+
|
28
|
+
pp seasons
|
29
|
+
|
30
|
+
## note: only return season keys/names (not hash with usage counter)
|
31
|
+
seasons.keys
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
##########
|
36
|
+
# main machinery
|
37
|
+
|
38
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
39
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
40
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
41
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
42
|
+
## CsvHash.parse( text, sep: sep )
|
43
|
+
## end
|
44
|
+
|
45
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
46
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
47
|
+
parse( txt, headers: headers,
|
48
|
+
filters: filters,
|
49
|
+
converters: converters,
|
50
|
+
sep: sep )
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
54
|
+
new( txt ).parse( headers: headers,
|
55
|
+
filters: filters,
|
56
|
+
converters: converters,
|
57
|
+
sep: sep )
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def initialize( txt )
|
62
|
+
@txt = txt
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
66
|
+
|
67
|
+
headers_mapping = {}
|
68
|
+
|
69
|
+
rows = parse_csv( @txt, sep: sep )
|
70
|
+
|
71
|
+
return [] if rows.empty? ## no rows / empty?
|
72
|
+
|
73
|
+
|
74
|
+
## fix/todo: use logger!!!!
|
75
|
+
## pp csv
|
76
|
+
|
77
|
+
if headers ## use user supplied headers if present
|
78
|
+
headers_mapping = headers_mapping.merge( headers )
|
79
|
+
else
|
80
|
+
|
81
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
82
|
+
headers = rows[0].keys
|
83
|
+
pp headers
|
84
|
+
|
85
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
86
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
87
|
+
|
88
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
89
|
+
## assume our own football.csv format, see github.com/footballcsv
|
90
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
91
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
92
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
93
|
+
|
94
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
95
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
96
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
97
|
+
|
98
|
+
headers_mapping[:round] = find_header( headers, ['Round'] )
|
99
|
+
|
100
|
+
## optional headers - note: find_header returns nil if header NOT found
|
101
|
+
header_stage = find_header( headers, ['Stage'] )
|
102
|
+
headers_mapping[:stage] = header_stage if header_stage
|
103
|
+
else
|
104
|
+
## else try footballdata.uk and others
|
105
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
106
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
107
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
108
|
+
|
109
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
110
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
111
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
112
|
+
|
113
|
+
## check for half time scores ?
|
114
|
+
## note: HT = Half Time
|
115
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
116
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
pp headers_mapping
|
121
|
+
|
122
|
+
### todo/fix: check headers - how?
|
123
|
+
## if present HomeTeam or HT required etc.
|
124
|
+
## issue error/warn is not present
|
125
|
+
##
|
126
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
127
|
+
## exit 1
|
128
|
+
##
|
129
|
+
|
130
|
+
matches = []
|
131
|
+
|
132
|
+
rows.each_with_index do |row,i|
|
133
|
+
|
134
|
+
## fix/todo: use logger!!!!
|
135
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
136
|
+
|
137
|
+
|
138
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
139
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
140
|
+
skip = false
|
141
|
+
filters.each do |header, value|
|
142
|
+
if row[ header ] != value ## e.g. row['Season']
|
143
|
+
skip = true
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
next if skip ## if header values NOT matching
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
## note:
|
152
|
+
## add converters after filters for now (why not before filters?)
|
153
|
+
if converters ## any converters defined?
|
154
|
+
## convert single proc shortcut to array with single converter
|
155
|
+
converters = [converters] if converters.is_a?( Proc )
|
156
|
+
|
157
|
+
## assumes array of procs
|
158
|
+
converters.each do |converter|
|
159
|
+
row = converter.call( row )
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
166
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
167
|
+
|
168
|
+
|
169
|
+
## check if data present - if not skip (might be empty row)
|
170
|
+
if team1.nil? && team2.nil?
|
171
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
172
|
+
pp row
|
173
|
+
next
|
174
|
+
end
|
175
|
+
|
176
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
177
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
178
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
col = row[ headers_mapping[ :date ]]
|
183
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
184
|
+
|
185
|
+
if col.empty? || col == '-' || col == '?'
|
186
|
+
## note: allow missing / unknown date for match
|
187
|
+
date = nil
|
188
|
+
else
|
189
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
190
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
191
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
192
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
193
|
+
|
194
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
195
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
196
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
197
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
198
|
+
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
199
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
200
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
201
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
202
|
+
else
|
203
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
204
|
+
## todo/fix: add to errors/warns list - why? why not?
|
205
|
+
exit 1
|
206
|
+
end
|
207
|
+
|
208
|
+
## todo/check: use date object (keep string?) - why? why not?
|
209
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
210
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
round = nil
|
215
|
+
## check for (optional) round / matchday
|
216
|
+
if headers_mapping[ :round ]
|
217
|
+
col = row[ headers_mapping[ :round ]]
|
218
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
219
|
+
round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
score1 = nil
|
224
|
+
score2 = nil
|
225
|
+
score1i = nil
|
226
|
+
score2i = nil
|
227
|
+
|
228
|
+
## check for full time scores ?
|
229
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
230
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
231
|
+
row[ headers_mapping[ :score2 ]] ]
|
232
|
+
|
233
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
234
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
235
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
236
|
+
end
|
237
|
+
|
238
|
+
## check for half time scores ?
|
239
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
240
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
241
|
+
row[ headers_mapping[ :score2i ]] ]
|
242
|
+
|
243
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
244
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
245
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
246
|
+
end
|
247
|
+
|
248
|
+
## check for all-in-one full time scores?
|
249
|
+
if headers_mapping[ :score ]
|
250
|
+
ft = row[ headers_mapping[ :score ] ]
|
251
|
+
if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
252
|
+
scores = ft.split( /[\-:]/ )
|
253
|
+
score1 = scores[0].to_i
|
254
|
+
score2 = scores[1].to_i
|
255
|
+
end
|
256
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
257
|
+
end
|
258
|
+
|
259
|
+
if headers_mapping[ :scorei ]
|
260
|
+
ht = row[ headers_mapping[ :scorei ] ]
|
261
|
+
if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
262
|
+
scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
|
263
|
+
score1i = scores[0].to_i
|
264
|
+
score2i = scores[1].to_i
|
265
|
+
end
|
266
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
## try some optional headings / columns
|
271
|
+
stage = nil
|
272
|
+
if headers_mapping[ :stage ]
|
273
|
+
col = row[ headers_mapping[ :stage ]]
|
274
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
275
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
276
|
+
## note: allow missing stage for match / defaults to "regular"
|
277
|
+
nil
|
278
|
+
elsif col == '?'
|
279
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
280
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
281
|
+
else
|
282
|
+
col
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
|
287
|
+
match = Import::Match.new( date: date,
|
288
|
+
team1: team1, team2: team2,
|
289
|
+
score1: score1, score2: score2,
|
290
|
+
score1i: score1i, score2i: score2i,
|
291
|
+
round: round,
|
292
|
+
stage: stage )
|
293
|
+
matches << match
|
294
|
+
end
|
295
|
+
|
296
|
+
## pp matches
|
297
|
+
matches
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
private
|
302
|
+
|
303
|
+
def find_header( headers, candidates )
|
304
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
305
|
+
|
306
|
+
candidates.each do |candidate|
|
307
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
308
|
+
end
|
309
|
+
nil ## no matching header found!!!
|
310
|
+
end
|
311
|
+
|
312
|
+
end # class CsvMatchParser
|
313
|
+
end # module SportDb
|
314
|
+
|
@@ -2,6 +2,9 @@
|
|
2
2
|
module SportDb
|
3
3
|
class Package
|
4
4
|
|
5
|
+
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
|
+
## e.g. .TXT and .txt
|
7
|
+
|
5
8
|
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
6
9
|
\.conf\.txt$
|
7
10
|
}x
|
@@ -26,14 +29,33 @@ module SportDb
|
|
26
29
|
clubs\.props\.txt$
|
27
30
|
}x
|
28
31
|
|
32
|
+
|
33
|
+
### season folder:
|
34
|
+
## e.g. /2019-20 or
|
35
|
+
## year-only e.g. /2019 or
|
36
|
+
## /2016--france
|
37
|
+
SEASON_RE = %r{ (?:
|
38
|
+
\d{4}-\d{2}
|
39
|
+
| \d{4}(--[^/]+)?
|
40
|
+
)
|
41
|
+
}x
|
42
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
43
|
+
|
44
|
+
|
29
45
|
## note: if pattern includes directory add here
|
30
46
|
## (otherwise move to more "generic" datafile) - why? why not?
|
31
|
-
MATCH_RE = %r{
|
32
|
-
|
33
|
-
|
34
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
47
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
48
|
+
#{SEASON}
|
49
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
35
50
|
}x
|
36
51
|
|
52
|
+
MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
53
|
+
#{SEASON}
|
54
|
+
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
55
|
+
}x
|
56
|
+
|
57
|
+
|
58
|
+
|
37
59
|
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
38
60
|
|
39
61
|
def self.find( path, pattern )
|
@@ -41,7 +63,7 @@ module SportDb
|
|
41
63
|
|
42
64
|
## check all txt files
|
43
65
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
44
|
-
candidates = Dir.glob( "#{path}/**/{*,.*}
|
66
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
|
45
67
|
pp candidates
|
46
68
|
candidates.each do |candidate|
|
47
69
|
datafiles << candidate if pattern.match( candidate )
|
@@ -66,6 +88,15 @@ module SportDb
|
|
66
88
|
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
67
89
|
def self.match_conf( path ) CONF_RE.match( path ); end
|
68
90
|
|
91
|
+
def self.find_match( path, format: 'txt' )
|
92
|
+
if format == 'csv'
|
93
|
+
find( path, MATCH_CSV_RE )
|
94
|
+
else ## otherwise always assume txt for now
|
95
|
+
find( path, MATCH_RE )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
## add match_match and match_match_csv - why? why not?
|
99
|
+
|
69
100
|
class << self
|
70
101
|
alias_method :match_clubs?, :match_clubs
|
71
102
|
alias_method :clubs?, :match_clubs
|
@@ -149,7 +180,14 @@ module SportDb
|
|
149
180
|
end
|
150
181
|
|
151
182
|
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
152
|
-
def each_match(
|
183
|
+
def each_match( format: 'txt', &blk )
|
184
|
+
if format == 'csv'
|
185
|
+
each( pattern: MATCH_CSV_RE, &blk );
|
186
|
+
else
|
187
|
+
each( pattern: MATCH_RE, &blk );
|
188
|
+
end
|
189
|
+
end
|
190
|
+
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
153
191
|
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
154
192
|
|
155
193
|
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
@@ -157,8 +195,100 @@ module SportDb
|
|
157
195
|
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
158
196
|
|
159
197
|
## return all match datafile entries
|
160
|
-
def match(
|
198
|
+
def match( format: 'txt' )
|
199
|
+
ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
|
200
|
+
end
|
161
201
|
alias_method :matches, :match
|
202
|
+
|
203
|
+
|
204
|
+
## todo/check: rename/change to match_by_dir - why? why not?
|
205
|
+
## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
|
206
|
+
def match_by_season_dir( format: 'txt' )
|
207
|
+
##
|
208
|
+
## [["1950s/1956-57",
|
209
|
+
## ["1950s/1956-57/1-division1.csv",
|
210
|
+
## "1950s/1956-57/2-division2.csv",
|
211
|
+
## "1950s/1956-57/3a-division3n.csv",
|
212
|
+
## "1950s/1956-57/3b-division3s.csv"]],
|
213
|
+
## ...]
|
214
|
+
|
215
|
+
h = {}
|
216
|
+
match( format: format ).each do |entry|
|
217
|
+
season_path = File.dirname( entry.name )
|
218
|
+
|
219
|
+
h[ season_path ] ||= []
|
220
|
+
h[ season_path ] << entry
|
221
|
+
end
|
222
|
+
|
223
|
+
## todo/fix: - add sort entries by name - why? why not?
|
224
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
225
|
+
## - use sort. (will not sort by default?)
|
226
|
+
|
227
|
+
h.to_a ## return as array (or keep hash) - why? why not?
|
228
|
+
end # method match_by_season_dir
|
229
|
+
|
230
|
+
def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
|
231
|
+
|
232
|
+
## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
|
233
|
+
|
234
|
+
## note: fold all sames seasons (even if in different directories)
|
235
|
+
## into same datafile list e.g.
|
236
|
+
## ["1957/58",
|
237
|
+
## ["1950s/1957-58/1-division1.csv",
|
238
|
+
## "1950s/1957-58/2-division2.csv",
|
239
|
+
## "1950s/1957-58/3a-division3n.csv",
|
240
|
+
## "1950s/1957-58/3b-division3s.csv"]],
|
241
|
+
## and
|
242
|
+
## ["1957/58",
|
243
|
+
## ["archives/1950s/1957-58/1-division1.csv",
|
244
|
+
## "archives/1950s/1957-58/2-division2.csv",
|
245
|
+
## "archives/1950s/1957-58/3a-division3n.csv",
|
246
|
+
## "archives/1950s/1957-58/3b-division3s.csv"]],
|
247
|
+
## should be together - why? why not?
|
248
|
+
|
249
|
+
####
|
250
|
+
# Example package:
|
251
|
+
# [["2012/13", ["2012-13/1-proleague.csv"]],
|
252
|
+
# ["2013/14", ["2013-14/1-proleague.csv"]],
|
253
|
+
# ["2014/15", ["2014-15/1-proleague.csv"]],
|
254
|
+
# ["2015/16", ["2015-16/1-proleague.csv"]],
|
255
|
+
# ["2016/17", ["2016-17/1-proleague.csv"]],
|
256
|
+
# ["2017/18", ["2017-18/1-proleague.csv"]]]
|
257
|
+
|
258
|
+
## todo/fix: (re)use a more generic filter instead of start for start of season only
|
259
|
+
|
260
|
+
## todo/fix: use a "generic" filter_season helper for easy reuse
|
261
|
+
## filter_season( clause, season_key )
|
262
|
+
## or better filter = SeasonFilter.new( clause )
|
263
|
+
## filter.skip? filter.include? ( season_sason_key )?
|
264
|
+
## fiteer.before?( season_key ) etc.
|
265
|
+
## find some good method names!!!!
|
266
|
+
season_start = start ? Import::Season.new( start ) : nil
|
267
|
+
|
268
|
+
h = {}
|
269
|
+
match( format: format ).each do |entry|
|
270
|
+
## note: assume last directory in datafile path is the season part/key
|
271
|
+
season_q = File.basename( File.dirname( entry.name ))
|
272
|
+
season = Import::Season.new( season_q ) ## normalize season
|
273
|
+
|
274
|
+
## skip if start season before this season
|
275
|
+
next if season_start && season_start.start_year > season.start_year
|
276
|
+
|
277
|
+
h[ season.key ] ||= []
|
278
|
+
h[ season.key ] << entry
|
279
|
+
end
|
280
|
+
|
281
|
+
## todo/fix: - add sort entries by name - why? why not?
|
282
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
283
|
+
## - use sort. (will not sort by default?)
|
284
|
+
|
285
|
+
## sort by season
|
286
|
+
## latest / newest first (and oldest last)
|
287
|
+
|
288
|
+
h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
|
289
|
+
r[0] <=> l[0]
|
290
|
+
end
|
291
|
+
end # method match_by_season
|
162
292
|
end # class Package
|
163
293
|
|
164
294
|
|