sportdb-formats 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +4 -0
- data/Rakefile +2 -2
- data/lib/sportdb/formats.rb +17 -5
- data/lib/sportdb/formats/datafile_package.rb +10 -7
- data/lib/sportdb/formats/match/match_parser_csv.rb +314 -0
- data/lib/sportdb/formats/package.rb +137 -7
- data/lib/sportdb/formats/score/score_formats.rb +41 -1
- data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
- data/lib/sportdb/formats/version.rb +3 -1
- data/test/helper.rb +1 -1
- data/test/test_club_reader_props.rb +2 -2
- data/test/test_csv_match_parser.rb +114 -0
- data/test/test_csv_match_parser_utils.rb +20 -0
- data/test/test_csv_reader.rb +5 -5
- data/test/test_datafile.rb +0 -32
- data/test/test_datafile_package.rb +46 -0
- data/test/test_package.rb +60 -28
- data/test/test_package_match.rb +27 -3
- data/test/test_scores.rb +58 -49
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a27c364d2003ece8da886c892d34d595ffcffcc
|
4
|
+
data.tar.gz: 81530dfec5decf5d9476a3f2be90bcbeb0195824
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a2aabb65968939d55f2000c64e4ac5df4cbedfe6b3786fd12ff86d26bd5e6af12c32ea129b59a7ddf30736afa6f093e0e4f2e97f2c6f0b23bffd6901a83ca91
|
7
|
+
data.tar.gz: 63f4ca42ebb537173334a931a02331b8cbdc27d9e953052c8126d9e683e54a3103eb496103fea400283c6b0b08fa7c6e738466f215e26f9567a3b822a80c172d
|
data/Manifest.txt
CHANGED
@@ -17,6 +17,7 @@ lib/sportdb/formats/match/mapper.rb
|
|
17
17
|
lib/sportdb/formats/match/mapper_teams.rb
|
18
18
|
lib/sportdb/formats/match/match_parser.rb
|
19
19
|
lib/sportdb/formats/match/match_parser_auto_conf.rb
|
20
|
+
lib/sportdb/formats/match/match_parser_csv.rb
|
20
21
|
lib/sportdb/formats/name_helper.rb
|
21
22
|
lib/sportdb/formats/outline_reader.rb
|
22
23
|
lib/sportdb/formats/package.rb
|
@@ -49,8 +50,11 @@ test/test_clubs.rb
|
|
49
50
|
test/test_conf.rb
|
50
51
|
test/test_country_index.rb
|
51
52
|
test/test_country_reader.rb
|
53
|
+
test/test_csv_match_parser.rb
|
54
|
+
test/test_csv_match_parser_utils.rb
|
52
55
|
test/test_csv_reader.rb
|
53
56
|
test/test_datafile.rb
|
57
|
+
test/test_datafile_package.rb
|
54
58
|
test/test_goals.rb
|
55
59
|
test/test_league_index.rb
|
56
60
|
test/test_league_outline_reader.rb
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/formats/version.rb'
|
|
3
3
|
|
4
4
|
Hoe.spec 'sportdb-formats' do
|
5
5
|
|
6
|
-
self.version = SportDb::Formats::VERSION
|
6
|
+
self.version = SportDb::Module::Formats::VERSION
|
7
7
|
|
8
8
|
self.summary = "sportdb-formats - sport.db format and text utilities"
|
9
9
|
self.description = summary
|
@@ -21,7 +21,7 @@ Hoe.spec 'sportdb-formats' do
|
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
23
|
['alphabets', '>= 1.0.0'],
|
24
|
-
['date-formats', '>= 1.0.
|
24
|
+
['date-formats', '>= 1.0.1'],
|
25
25
|
['csvreader', '>= 1.2.4'],
|
26
26
|
['sportdb-langs', '>= 0.1.0'],
|
27
27
|
|
data/lib/sportdb/formats.rb
CHANGED
@@ -10,15 +10,26 @@ require 'zip' ## todo/check: if zip is alreay included in a required module
|
|
10
10
|
|
11
11
|
|
12
12
|
|
13
|
-
def read_csv( path
|
14
|
-
|
13
|
+
def read_csv( path, sep: nil,
|
14
|
+
symbolize_names: nil )
|
15
|
+
opts = {}
|
16
|
+
opts[:sep] = sep if sep
|
17
|
+
opts[:header_converters] = :symbol if symbolize_names
|
18
|
+
|
19
|
+
CsvHash.read( path, **opts )
|
15
20
|
end
|
16
21
|
|
17
|
-
def parse_csv( txt
|
18
|
-
|
22
|
+
def parse_csv( txt, sep: nil,
|
23
|
+
symbolize_names: nil )
|
24
|
+
opts = {}
|
25
|
+
opts[:sep] = sep if sep
|
26
|
+
opts[:header_converters] = :symbol if symbolize_names
|
27
|
+
|
28
|
+
CsvHash.parse( txt, **opts )
|
19
29
|
end
|
20
30
|
|
21
31
|
|
32
|
+
|
22
33
|
## more sportdb libs/gems
|
23
34
|
require 'sportdb/langs'
|
24
35
|
|
@@ -68,6 +79,7 @@ require 'sportdb/formats/match/match_parser'
|
|
68
79
|
require 'sportdb/formats/match/match_parser_auto_conf'
|
69
80
|
require 'sportdb/formats/match/conf_parser'
|
70
81
|
|
82
|
+
require 'sportdb/formats/match/match_parser_csv'
|
71
83
|
|
72
84
|
require 'sportdb/formats/country/country_reader'
|
73
85
|
require 'sportdb/formats/country/country_index'
|
@@ -147,4 +159,4 @@ end # module SportDb
|
|
147
159
|
|
148
160
|
|
149
161
|
|
150
|
-
puts SportDb::Formats.banner # say hello
|
162
|
+
puts SportDb::Module::Formats.banner # say hello
|
@@ -30,7 +30,7 @@ class Entry
|
|
30
30
|
@name = path[ pack.path.length+1..-1 ]
|
31
31
|
end
|
32
32
|
def name() @name; end
|
33
|
-
def read() File.open( @path, 'r:utf-8' ).read; end
|
33
|
+
def read() File.open( @path, 'r:utf-8' ) {|f| f.read }; end
|
34
34
|
end # class DirPackage::Entry
|
35
35
|
|
36
36
|
|
@@ -44,17 +44,20 @@ end # class DirPackage::Entry
|
|
44
44
|
@name = basename
|
45
45
|
end
|
46
46
|
|
47
|
-
|
47
|
+
## todo/check: change pattern: to re: - why? why not?
|
48
|
+
def each( pattern: ) ## todo/check: rename to glob or something - why? why not?
|
48
49
|
## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
|
49
50
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
|
-
|
51
|
-
|
52
|
-
if
|
53
|
-
|
51
|
+
## todo/check/fix: is there a better (simpler) glob pattern? yes? no?
|
52
|
+
Dir.glob( "#{@path}/**/{*,.*}.*" ).each do |path|
|
53
|
+
if File.directory?( path )
|
54
|
+
## always skip directories / folders
|
55
|
+
elsif EXCLUDE_RE.match( path )
|
56
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
54
57
|
elsif pattern.match( path )
|
55
58
|
yield( Entry.new( self, path ))
|
56
59
|
else
|
57
|
-
|
60
|
+
## puts " skipping >#{path}<"
|
58
61
|
end
|
59
62
|
end
|
60
63
|
end
|
@@ -0,0 +1,314 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
class CsvMatchParser
|
6
|
+
|
7
|
+
#############
|
8
|
+
# helpers
|
9
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
10
|
+
|
11
|
+
## check if headers incl. season if yes,has priority over col mapping
|
12
|
+
## e.g. no need to specify twice (if using headers)
|
13
|
+
col = headers[:season] if headers && headers[:season]
|
14
|
+
|
15
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
16
|
+
|
17
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
18
|
+
## use read_csv with block to switch to foreach!!!!
|
19
|
+
rows = read_csv( path, sep: sep )
|
20
|
+
|
21
|
+
rows.each_with_index do |row,i|
|
22
|
+
puts "[#{i}] " + row.inspect if i < 2
|
23
|
+
|
24
|
+
season = row[ col ] ## column name defaults to 'Season'
|
25
|
+
seasons[ season ] += 1
|
26
|
+
end
|
27
|
+
|
28
|
+
pp seasons
|
29
|
+
|
30
|
+
## note: only return season keys/names (not hash with usage counter)
|
31
|
+
seasons.keys
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
##########
|
36
|
+
# main machinery
|
37
|
+
|
38
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
39
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
40
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
41
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
42
|
+
## CsvHash.parse( text, sep: sep )
|
43
|
+
## end
|
44
|
+
|
45
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
46
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
47
|
+
parse( txt, headers: headers,
|
48
|
+
filters: filters,
|
49
|
+
converters: converters,
|
50
|
+
sep: sep )
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
54
|
+
new( txt ).parse( headers: headers,
|
55
|
+
filters: filters,
|
56
|
+
converters: converters,
|
57
|
+
sep: sep )
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
def initialize( txt )
|
62
|
+
@txt = txt
|
63
|
+
end
|
64
|
+
|
65
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
66
|
+
|
67
|
+
headers_mapping = {}
|
68
|
+
|
69
|
+
rows = parse_csv( @txt, sep: sep )
|
70
|
+
|
71
|
+
return [] if rows.empty? ## no rows / empty?
|
72
|
+
|
73
|
+
|
74
|
+
## fix/todo: use logger!!!!
|
75
|
+
## pp csv
|
76
|
+
|
77
|
+
if headers ## use user supplied headers if present
|
78
|
+
headers_mapping = headers_mapping.merge( headers )
|
79
|
+
else
|
80
|
+
|
81
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
82
|
+
headers = rows[0].keys
|
83
|
+
pp headers
|
84
|
+
|
85
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
86
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
87
|
+
|
88
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
89
|
+
## assume our own football.csv format, see github.com/footballcsv
|
90
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
91
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
92
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
93
|
+
|
94
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
95
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
96
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
97
|
+
|
98
|
+
headers_mapping[:round] = find_header( headers, ['Round'] )
|
99
|
+
|
100
|
+
## optional headers - note: find_header returns nil if header NOT found
|
101
|
+
header_stage = find_header( headers, ['Stage'] )
|
102
|
+
headers_mapping[:stage] = header_stage if header_stage
|
103
|
+
else
|
104
|
+
## else try footballdata.uk and others
|
105
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
106
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
107
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
108
|
+
|
109
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
110
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
111
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
112
|
+
|
113
|
+
## check for half time scores ?
|
114
|
+
## note: HT = Half Time
|
115
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
116
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
pp headers_mapping
|
121
|
+
|
122
|
+
### todo/fix: check headers - how?
|
123
|
+
## if present HomeTeam or HT required etc.
|
124
|
+
## issue error/warn is not present
|
125
|
+
##
|
126
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
127
|
+
## exit 1
|
128
|
+
##
|
129
|
+
|
130
|
+
matches = []
|
131
|
+
|
132
|
+
rows.each_with_index do |row,i|
|
133
|
+
|
134
|
+
## fix/todo: use logger!!!!
|
135
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
136
|
+
|
137
|
+
|
138
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
139
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
140
|
+
skip = false
|
141
|
+
filters.each do |header, value|
|
142
|
+
if row[ header ] != value ## e.g. row['Season']
|
143
|
+
skip = true
|
144
|
+
break
|
145
|
+
end
|
146
|
+
end
|
147
|
+
next if skip ## if header values NOT matching
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
## note:
|
152
|
+
## add converters after filters for now (why not before filters?)
|
153
|
+
if converters ## any converters defined?
|
154
|
+
## convert single proc shortcut to array with single converter
|
155
|
+
converters = [converters] if converters.is_a?( Proc )
|
156
|
+
|
157
|
+
## assumes array of procs
|
158
|
+
converters.each do |converter|
|
159
|
+
row = converter.call( row )
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
166
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
167
|
+
|
168
|
+
|
169
|
+
## check if data present - if not skip (might be empty row)
|
170
|
+
if team1.nil? && team2.nil?
|
171
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
172
|
+
pp row
|
173
|
+
next
|
174
|
+
end
|
175
|
+
|
176
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
177
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
178
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
col = row[ headers_mapping[ :date ]]
|
183
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
184
|
+
|
185
|
+
if col.empty? || col == '-' || col == '?'
|
186
|
+
## note: allow missing / unknown date for match
|
187
|
+
date = nil
|
188
|
+
else
|
189
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
190
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
191
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
192
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
193
|
+
|
194
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
195
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
196
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
197
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
198
|
+
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
199
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
200
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
201
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
202
|
+
else
|
203
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
204
|
+
## todo/fix: add to errors/warns list - why? why not?
|
205
|
+
exit 1
|
206
|
+
end
|
207
|
+
|
208
|
+
## todo/check: use date object (keep string?) - why? why not?
|
209
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
210
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
round = nil
|
215
|
+
## check for (optional) round / matchday
|
216
|
+
if headers_mapping[ :round ]
|
217
|
+
col = row[ headers_mapping[ :round ]]
|
218
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
219
|
+
round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
score1 = nil
|
224
|
+
score2 = nil
|
225
|
+
score1i = nil
|
226
|
+
score2i = nil
|
227
|
+
|
228
|
+
## check for full time scores ?
|
229
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
230
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
231
|
+
row[ headers_mapping[ :score2 ]] ]
|
232
|
+
|
233
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
234
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
235
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
236
|
+
end
|
237
|
+
|
238
|
+
## check for half time scores ?
|
239
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
240
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
241
|
+
row[ headers_mapping[ :score2i ]] ]
|
242
|
+
|
243
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
244
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
245
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
246
|
+
end
|
247
|
+
|
248
|
+
## check for all-in-one full time scores?
|
249
|
+
if headers_mapping[ :score ]
|
250
|
+
ft = row[ headers_mapping[ :score ] ]
|
251
|
+
if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
252
|
+
scores = ft.split( /[\-:]/ )
|
253
|
+
score1 = scores[0].to_i
|
254
|
+
score2 = scores[1].to_i
|
255
|
+
end
|
256
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
257
|
+
end
|
258
|
+
|
259
|
+
if headers_mapping[ :scorei ]
|
260
|
+
ht = row[ headers_mapping[ :scorei ] ]
|
261
|
+
if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
|
262
|
+
scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
|
263
|
+
score1i = scores[0].to_i
|
264
|
+
score2i = scores[1].to_i
|
265
|
+
end
|
266
|
+
## todo/fix: issue warning if non-empty!!! and not matching format!!!!
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
## try some optional headings / columns
|
271
|
+
stage = nil
|
272
|
+
if headers_mapping[ :stage ]
|
273
|
+
col = row[ headers_mapping[ :stage ]]
|
274
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
275
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
276
|
+
## note: allow missing stage for match / defaults to "regular"
|
277
|
+
nil
|
278
|
+
elsif col == '?'
|
279
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
280
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
281
|
+
else
|
282
|
+
col
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
|
287
|
+
match = Import::Match.new( date: date,
|
288
|
+
team1: team1, team2: team2,
|
289
|
+
score1: score1, score2: score2,
|
290
|
+
score1i: score1i, score2i: score2i,
|
291
|
+
round: round,
|
292
|
+
stage: stage )
|
293
|
+
matches << match
|
294
|
+
end
|
295
|
+
|
296
|
+
## pp matches
|
297
|
+
matches
|
298
|
+
end
|
299
|
+
|
300
|
+
|
301
|
+
private
|
302
|
+
|
303
|
+
def find_header( headers, candidates )
|
304
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
305
|
+
|
306
|
+
candidates.each do |candidate|
|
307
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
308
|
+
end
|
309
|
+
nil ## no matching header found!!!
|
310
|
+
end
|
311
|
+
|
312
|
+
end # class CsvMatchParser
|
313
|
+
end # module SportDb
|
314
|
+
|
@@ -2,6 +2,9 @@
|
|
2
2
|
module SportDb
|
3
3
|
class Package
|
4
4
|
|
5
|
+
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
|
+
## e.g. .TXT and .txt
|
7
|
+
|
5
8
|
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
6
9
|
\.conf\.txt$
|
7
10
|
}x
|
@@ -26,14 +29,33 @@ module SportDb
|
|
26
29
|
clubs\.props\.txt$
|
27
30
|
}x
|
28
31
|
|
32
|
+
|
33
|
+
### season folder:
|
34
|
+
## e.g. /2019-20 or
|
35
|
+
## year-only e.g. /2019 or
|
36
|
+
## /2016--france
|
37
|
+
SEASON_RE = %r{ (?:
|
38
|
+
\d{4}-\d{2}
|
39
|
+
| \d{4}(--[^/]+)?
|
40
|
+
)
|
41
|
+
}x
|
42
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
43
|
+
|
44
|
+
|
29
45
|
## note: if pattern includes directory add here
|
30
46
|
## (otherwise move to more "generic" datafile) - why? why not?
|
31
|
-
MATCH_RE = %r{
|
32
|
-
|
33
|
-
|
34
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
47
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
48
|
+
#{SEASON}
|
49
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
35
50
|
}x
|
36
51
|
|
52
|
+
MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
53
|
+
#{SEASON}
|
54
|
+
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
55
|
+
}x
|
56
|
+
|
57
|
+
|
58
|
+
|
37
59
|
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
38
60
|
|
39
61
|
def self.find( path, pattern )
|
@@ -41,7 +63,7 @@ module SportDb
|
|
41
63
|
|
42
64
|
## check all txt files
|
43
65
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
44
|
-
candidates = Dir.glob( "#{path}/**/{*,.*}
|
66
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
|
45
67
|
pp candidates
|
46
68
|
candidates.each do |candidate|
|
47
69
|
datafiles << candidate if pattern.match( candidate )
|
@@ -66,6 +88,15 @@ module SportDb
|
|
66
88
|
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
67
89
|
def self.match_conf( path ) CONF_RE.match( path ); end
|
68
90
|
|
91
|
+
def self.find_match( path, format: 'txt' )
|
92
|
+
if format == 'csv'
|
93
|
+
find( path, MATCH_CSV_RE )
|
94
|
+
else ## otherwise always assume txt for now
|
95
|
+
find( path, MATCH_RE )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
## add match_match and match_match_csv - why? why not?
|
99
|
+
|
69
100
|
class << self
|
70
101
|
alias_method :match_clubs?, :match_clubs
|
71
102
|
alias_method :clubs?, :match_clubs
|
@@ -149,7 +180,14 @@ module SportDb
|
|
149
180
|
end
|
150
181
|
|
151
182
|
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
152
|
-
def each_match(
|
183
|
+
def each_match( format: 'txt', &blk )
|
184
|
+
if format == 'csv'
|
185
|
+
each( pattern: MATCH_CSV_RE, &blk );
|
186
|
+
else
|
187
|
+
each( pattern: MATCH_RE, &blk );
|
188
|
+
end
|
189
|
+
end
|
190
|
+
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
153
191
|
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
154
192
|
|
155
193
|
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
@@ -157,8 +195,100 @@ module SportDb
|
|
157
195
|
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
158
196
|
|
159
197
|
## return all match datafile entries
|
160
|
-
def match(
|
198
|
+
def match( format: 'txt' )
|
199
|
+
ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
|
200
|
+
end
|
161
201
|
alias_method :matches, :match
|
202
|
+
|
203
|
+
|
204
|
+
## todo/check: rename/change to match_by_dir - why? why not?
|
205
|
+
## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
|
206
|
+
def match_by_season_dir( format: 'txt' )
|
207
|
+
##
|
208
|
+
## [["1950s/1956-57",
|
209
|
+
## ["1950s/1956-57/1-division1.csv",
|
210
|
+
## "1950s/1956-57/2-division2.csv",
|
211
|
+
## "1950s/1956-57/3a-division3n.csv",
|
212
|
+
## "1950s/1956-57/3b-division3s.csv"]],
|
213
|
+
## ...]
|
214
|
+
|
215
|
+
h = {}
|
216
|
+
match( format: format ).each do |entry|
|
217
|
+
season_path = File.dirname( entry.name )
|
218
|
+
|
219
|
+
h[ season_path ] ||= []
|
220
|
+
h[ season_path ] << entry
|
221
|
+
end
|
222
|
+
|
223
|
+
## todo/fix: - add sort entries by name - why? why not?
|
224
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
225
|
+
## - use sort. (will not sort by default?)
|
226
|
+
|
227
|
+
h.to_a ## return as array (or keep hash) - why? why not?
|
228
|
+
end # method match_by_season_dir
|
229
|
+
|
230
|
+
def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
|
231
|
+
|
232
|
+
## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
|
233
|
+
|
234
|
+
## note: fold all sames seasons (even if in different directories)
|
235
|
+
## into same datafile list e.g.
|
236
|
+
## ["1957/58",
|
237
|
+
## ["1950s/1957-58/1-division1.csv",
|
238
|
+
## "1950s/1957-58/2-division2.csv",
|
239
|
+
## "1950s/1957-58/3a-division3n.csv",
|
240
|
+
## "1950s/1957-58/3b-division3s.csv"]],
|
241
|
+
## and
|
242
|
+
## ["1957/58",
|
243
|
+
## ["archives/1950s/1957-58/1-division1.csv",
|
244
|
+
## "archives/1950s/1957-58/2-division2.csv",
|
245
|
+
## "archives/1950s/1957-58/3a-division3n.csv",
|
246
|
+
## "archives/1950s/1957-58/3b-division3s.csv"]],
|
247
|
+
## should be together - why? why not?
|
248
|
+
|
249
|
+
####
|
250
|
+
# Example package:
|
251
|
+
# [["2012/13", ["2012-13/1-proleague.csv"]],
|
252
|
+
# ["2013/14", ["2013-14/1-proleague.csv"]],
|
253
|
+
# ["2014/15", ["2014-15/1-proleague.csv"]],
|
254
|
+
# ["2015/16", ["2015-16/1-proleague.csv"]],
|
255
|
+
# ["2016/17", ["2016-17/1-proleague.csv"]],
|
256
|
+
# ["2017/18", ["2017-18/1-proleague.csv"]]]
|
257
|
+
|
258
|
+
## todo/fix: (re)use a more generic filter instead of start for start of season only
|
259
|
+
|
260
|
+
## todo/fix: use a "generic" filter_season helper for easy reuse
|
261
|
+
## filter_season( clause, season_key )
|
262
|
+
## or better filter = SeasonFilter.new( clause )
|
263
|
+
## filter.skip? filter.include? ( season_sason_key )?
|
264
|
+
## fiteer.before?( season_key ) etc.
|
265
|
+
## find some good method names!!!!
|
266
|
+
season_start = start ? Import::Season.new( start ) : nil
|
267
|
+
|
268
|
+
h = {}
|
269
|
+
match( format: format ).each do |entry|
|
270
|
+
## note: assume last directory in datafile path is the season part/key
|
271
|
+
season_q = File.basename( File.dirname( entry.name ))
|
272
|
+
season = Import::Season.new( season_q ) ## normalize season
|
273
|
+
|
274
|
+
## skip if start season before this season
|
275
|
+
next if season_start && season_start.start_year > season.start_year
|
276
|
+
|
277
|
+
h[ season.key ] ||= []
|
278
|
+
h[ season.key ] << entry
|
279
|
+
end
|
280
|
+
|
281
|
+
## todo/fix: - add sort entries by name - why? why not?
|
282
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
283
|
+
## - use sort. (will not sort by default?)
|
284
|
+
|
285
|
+
## sort by season
|
286
|
+
## latest / newest first (and oldest last)
|
287
|
+
|
288
|
+
h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
|
289
|
+
r[0] <=> l[0]
|
290
|
+
end
|
291
|
+
end # method match_by_season
|
162
292
|
end # class Package
|
163
293
|
|
164
294
|
|