sportdb-formats 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7b1daae3f04961c06c34b98e84438ee39a80c5ca
4
- data.tar.gz: 872154cb339bb6b6c8f440a392f250dc7d7387d9
3
+ metadata.gz: 4a27c364d2003ece8da886c892d34d595ffcffcc
4
+ data.tar.gz: 81530dfec5decf5d9476a3f2be90bcbeb0195824
5
5
  SHA512:
6
- metadata.gz: 853b0362fde6f9d8b17db7036ae954768500ab681acff7bf859b8cbf59b7d8bc4bec9d153f21e3c7ac069b6486b10386ce99fe151d4229df815c006a27dccbe0
7
- data.tar.gz: e356061bfb0f15ff8274651ce107ec3be00818996d0c29a2ffb26bd5b71c385a3bd1464eea42e2e11ab60dc24ce96663a701e28fa6a8aebd27d434525c71a1f4
6
+ metadata.gz: 4a2aabb65968939d55f2000c64e4ac5df4cbedfe6b3786fd12ff86d26bd5e6af12c32ea129b59a7ddf30736afa6f093e0e4f2e97f2c6f0b23bffd6901a83ca91
7
+ data.tar.gz: 63f4ca42ebb537173334a931a02331b8cbdc27d9e953052c8126d9e683e54a3103eb496103fea400283c6b0b08fa7c6e738466f215e26f9567a3b822a80c172d
@@ -17,6 +17,7 @@ lib/sportdb/formats/match/mapper.rb
17
17
  lib/sportdb/formats/match/mapper_teams.rb
18
18
  lib/sportdb/formats/match/match_parser.rb
19
19
  lib/sportdb/formats/match/match_parser_auto_conf.rb
20
+ lib/sportdb/formats/match/match_parser_csv.rb
20
21
  lib/sportdb/formats/name_helper.rb
21
22
  lib/sportdb/formats/outline_reader.rb
22
23
  lib/sportdb/formats/package.rb
@@ -49,8 +50,11 @@ test/test_clubs.rb
49
50
  test/test_conf.rb
50
51
  test/test_country_index.rb
51
52
  test/test_country_reader.rb
53
+ test/test_csv_match_parser.rb
54
+ test/test_csv_match_parser_utils.rb
52
55
  test/test_csv_reader.rb
53
56
  test/test_datafile.rb
57
+ test/test_datafile_package.rb
54
58
  test/test_goals.rb
55
59
  test/test_league_index.rb
56
60
  test/test_league_outline_reader.rb
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/formats/version.rb'
3
3
 
4
4
  Hoe.spec 'sportdb-formats' do
5
5
 
6
- self.version = SportDb::Formats::VERSION
6
+ self.version = SportDb::Module::Formats::VERSION
7
7
 
8
8
  self.summary = "sportdb-formats - sport.db format and text utilities"
9
9
  self.description = summary
@@ -21,7 +21,7 @@ Hoe.spec 'sportdb-formats' do
21
21
 
22
22
  self.extra_deps = [
23
23
  ['alphabets', '>= 1.0.0'],
24
- ['date-formats', '>= 1.0.0'],
24
+ ['date-formats', '>= 1.0.1'],
25
25
  ['csvreader', '>= 1.2.4'],
26
26
  ['sportdb-langs', '>= 0.1.0'],
27
27
 
@@ -10,15 +10,26 @@ require 'zip' ## todo/check: if zip is alreay included in a required module
10
10
 
11
11
 
12
12
 
13
- def read_csv( path )
14
- CsvHash.read( path, :header_converters => :symbol )
13
+ def read_csv( path, sep: nil,
14
+ symbolize_names: nil )
15
+ opts = {}
16
+ opts[:sep] = sep if sep
17
+ opts[:header_converters] = :symbol if symbolize_names
18
+
19
+ CsvHash.read( path, **opts )
15
20
  end
16
21
 
17
- def parse_csv( txt )
18
- CsvHash.parse( txt, :header_converters => :symbol )
22
+ def parse_csv( txt, sep: nil,
23
+ symbolize_names: nil )
24
+ opts = {}
25
+ opts[:sep] = sep if sep
26
+ opts[:header_converters] = :symbol if symbolize_names
27
+
28
+ CsvHash.parse( txt, **opts )
19
29
  end
20
30
 
21
31
 
32
+
22
33
  ## more sportdb libs/gems
23
34
  require 'sportdb/langs'
24
35
 
@@ -68,6 +79,7 @@ require 'sportdb/formats/match/match_parser'
68
79
  require 'sportdb/formats/match/match_parser_auto_conf'
69
80
  require 'sportdb/formats/match/conf_parser'
70
81
 
82
+ require 'sportdb/formats/match/match_parser_csv'
71
83
 
72
84
  require 'sportdb/formats/country/country_reader'
73
85
  require 'sportdb/formats/country/country_index'
@@ -147,4 +159,4 @@ end # module SportDb
147
159
 
148
160
 
149
161
 
150
- puts SportDb::Formats.banner # say hello
162
+ puts SportDb::Module::Formats.banner # say hello
@@ -30,7 +30,7 @@ class Entry
30
30
  @name = path[ pack.path.length+1..-1 ]
31
31
  end
32
32
  def name() @name; end
33
- def read() File.open( @path, 'r:utf-8' ).read; end
33
+ def read() File.open( @path, 'r:utf-8' ) {|f| f.read }; end
34
34
  end # class DirPackage::Entry
35
35
 
36
36
 
@@ -44,17 +44,20 @@ end # class DirPackage::Entry
44
44
  @name = basename
45
45
  end
46
46
 
47
- def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
47
+ ## todo/check: change pattern: to re: - why? why not?
48
+ def each( pattern: ) ## todo/check: rename to glob or something - why? why not?
48
49
  ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
50
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
- Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
- ## todo/fix: (auto) skip and check for directories
52
- if EXCLUDE_RE.match( path )
53
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
51
+ ## todo/check/fix: is there a better (simpler) glob pattern? yes? no?
52
+ Dir.glob( "#{@path}/**/{*,.*}.*" ).each do |path|
53
+ if File.directory?( path )
54
+ ## always skip directories / folders
55
+ elsif EXCLUDE_RE.match( path )
56
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
57
  elsif pattern.match( path )
55
58
  yield( Entry.new( self, path ))
56
59
  else
57
- ## puts " skipping >#{path}<"
60
+ ## puts " skipping >#{path}<"
58
61
  end
59
62
  end
60
63
  end
@@ -0,0 +1,314 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ class CsvMatchParser
6
+
7
+ #############
8
+ # helpers
9
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
10
+
11
+ ## check if headers incl. season if yes,has priority over col mapping
12
+ ## e.g. no need to specify twice (if using headers)
13
+ col = headers[:season] if headers && headers[:season]
14
+
15
+ seasons = Hash.new( 0 ) ## default value is 0
16
+
17
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
18
+ ## use read_csv with block to switch to foreach!!!!
19
+ rows = read_csv( path, sep: sep )
20
+
21
+ rows.each_with_index do |row,i|
22
+ puts "[#{i}] " + row.inspect if i < 2
23
+
24
+ season = row[ col ] ## column name defaults to 'Season'
25
+ seasons[ season ] += 1
26
+ end
27
+
28
+ pp seasons
29
+
30
+ ## note: only return season keys/names (not hash with usage counter)
31
+ seasons.keys
32
+ end
33
+
34
+
35
+ ##########
36
+ # main machinery
37
+
38
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
39
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
40
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
41
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
42
+ ## CsvHash.parse( text, sep: sep )
43
+ ## end
44
+
45
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
46
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
47
+ parse( txt, headers: headers,
48
+ filters: filters,
49
+ converters: converters,
50
+ sep: sep )
51
+ end
52
+
53
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
54
+ new( txt ).parse( headers: headers,
55
+ filters: filters,
56
+ converters: converters,
57
+ sep: sep )
58
+ end
59
+
60
+
61
+ def initialize( txt )
62
+ @txt = txt
63
+ end
64
+
65
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
66
+
67
+ headers_mapping = {}
68
+
69
+ rows = parse_csv( @txt, sep: sep )
70
+
71
+ return [] if rows.empty? ## no rows / empty?
72
+
73
+
74
+ ## fix/todo: use logger!!!!
75
+ ## pp csv
76
+
77
+ if headers ## use user supplied headers if present
78
+ headers_mapping = headers_mapping.merge( headers )
79
+ else
80
+
81
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
82
+ headers = rows[0].keys
83
+ pp headers
84
+
85
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
86
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
87
+
88
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
89
+ ## assume our own football.csv format, see github.com/footballcsv
90
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
91
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
92
+ headers_mapping[:date] = find_header( headers, ['Date'] )
93
+
94
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
95
+ headers_mapping[:score] = find_header( headers, ['FT'] )
96
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
+
98
+ headers_mapping[:round] = find_header( headers, ['Round'] )
99
+
100
+ ## optional headers - note: find_header returns nil if header NOT found
101
+ header_stage = find_header( headers, ['Stage'] )
102
+ headers_mapping[:stage] = header_stage if header_stage
103
+ else
104
+ ## else try footballdata.uk and others
105
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
106
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
107
+ headers_mapping[:date] = find_header( headers, ['Date'] )
108
+
109
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
110
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
111
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
112
+
113
+ ## check for half time scores ?
114
+ ## note: HT = Half Time
115
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
116
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
117
+ end
118
+ end
119
+
120
+ pp headers_mapping
121
+
122
+ ### todo/fix: check headers - how?
123
+ ## if present HomeTeam or HT required etc.
124
+ ## issue error/warn is not present
125
+ ##
126
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
127
+ ## exit 1
128
+ ##
129
+
130
+ matches = []
131
+
132
+ rows.each_with_index do |row,i|
133
+
134
+ ## fix/todo: use logger!!!!
135
+ ## puts "[#{i}] " + row.inspect if i < 2
136
+
137
+
138
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
139
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
140
+ skip = false
141
+ filters.each do |header, value|
142
+ if row[ header ] != value ## e.g. row['Season']
143
+ skip = true
144
+ break
145
+ end
146
+ end
147
+ next if skip ## if header values NOT matching
148
+ end
149
+
150
+
151
+ ## note:
152
+ ## add converters after filters for now (why not before filters?)
153
+ if converters ## any converters defined?
154
+ ## convert single proc shortcut to array with single converter
155
+ converters = [converters] if converters.is_a?( Proc )
156
+
157
+ ## assumes array of procs
158
+ converters.each do |converter|
159
+ row = converter.call( row )
160
+ end
161
+ end
162
+
163
+
164
+
165
+ team1 = row[ headers_mapping[ :team1 ]]
166
+ team2 = row[ headers_mapping[ :team2 ]]
167
+
168
+
169
+ ## check if data present - if not skip (might be empty row)
170
+ if team1.nil? && team2.nil?
171
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
172
+ pp row
173
+ next
174
+ end
175
+
176
+ ## remove possible match played counters e.g. (4) (11) etc.
177
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
178
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
179
+
180
+
181
+
182
+ col = row[ headers_mapping[ :date ]]
183
+ col = col.strip # make sure not leading or trailing spaces left over
184
+
185
+ if col.empty? || col == '-' || col == '?'
186
+ ## note: allow missing / unknown date for match
187
+ date = nil
188
+ else
189
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
190
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
191
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
192
+ col = col.strip # make sure not leading or trailing spaces left over
193
+
194
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
195
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
196
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
197
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
198
+ elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
199
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
200
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
201
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
202
+ else
203
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
204
+ ## todo/fix: add to errors/warns list - why? why not?
205
+ exit 1
206
+ end
207
+
208
+ ## todo/check: use date object (keep string?) - why? why not?
209
+ ## todo/fix: yes!! use date object!!!! do NOT use string
210
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
211
+ end
212
+
213
+
214
+ round = nil
215
+ ## check for (optional) round / matchday
216
+ if headers_mapping[ :round ]
217
+ col = row[ headers_mapping[ :round ]]
218
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
219
+ round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
220
+ end
221
+
222
+
223
+ score1 = nil
224
+ score2 = nil
225
+ score1i = nil
226
+ score2i = nil
227
+
228
+ ## check for full time scores ?
229
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
230
+ ft = [ row[ headers_mapping[ :score1 ]],
231
+ row[ headers_mapping[ :score2 ]] ]
232
+
233
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
234
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
235
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
236
+ end
237
+
238
+ ## check for half time scores ?
239
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
240
+ ht = [ row[ headers_mapping[ :score1i ]],
241
+ row[ headers_mapping[ :score2i ]] ]
242
+
243
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
244
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
245
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
246
+ end
247
+
248
+ ## check for all-in-one full time scores?
249
+ if headers_mapping[ :score ]
250
+ ft = row[ headers_mapping[ :score ] ]
251
+ if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
252
+ scores = ft.split( /[\-:]/ )
253
+ score1 = scores[0].to_i
254
+ score2 = scores[1].to_i
255
+ end
256
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
257
+ end
258
+
259
+ if headers_mapping[ :scorei ]
260
+ ht = row[ headers_mapping[ :scorei ] ]
261
+ if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
262
+ scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
263
+ score1i = scores[0].to_i
264
+ score2i = scores[1].to_i
265
+ end
266
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
267
+ end
268
+
269
+
270
+ ## try some optional headings / columns
271
+ stage = nil
272
+ if headers_mapping[ :stage ]
273
+ col = row[ headers_mapping[ :stage ]]
274
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
275
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
276
+ ## note: allow missing stage for match / defaults to "regular"
277
+ nil
278
+ elsif col == '?'
279
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
280
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
281
+ else
282
+ col
283
+ end
284
+ end
285
+
286
+
287
+ match = Import::Match.new( date: date,
288
+ team1: team1, team2: team2,
289
+ score1: score1, score2: score2,
290
+ score1i: score1i, score2i: score2i,
291
+ round: round,
292
+ stage: stage )
293
+ matches << match
294
+ end
295
+
296
+ ## pp matches
297
+ matches
298
+ end
299
+
300
+
301
+ private
302
+
303
+ def find_header( headers, candidates )
304
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
305
+
306
+ candidates.each do |candidate|
307
+ return candidate if headers.include?( candidate ) ## bingo!!!
308
+ end
309
+ nil ## no matching header found!!!
310
+ end
311
+
312
+ end # class CsvMatchParser
313
+ end # module SportDb
314
+
@@ -2,6 +2,9 @@
2
2
  module SportDb
3
3
  class Package
4
4
 
5
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
+ ## e.g. .TXT and .txt
7
+
5
8
  CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
6
9
  \.conf\.txt$
7
10
  }x
@@ -26,14 +29,33 @@ module SportDb
26
29
  clubs\.props\.txt$
27
30
  }x
28
31
 
32
+
33
+ ### season folder:
34
+ ## e.g. /2019-20 or
35
+ ## year-only e.g. /2019 or
36
+ ## /2016--france
37
+ SEASON_RE = %r{ (?:
38
+ \d{4}-\d{2}
39
+ | \d{4}(--[^/]+)?
40
+ )
41
+ }x
42
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
43
+
44
+
29
45
  ## note: if pattern includes directory add here
30
46
  ## (otherwise move to more "generic" datafile) - why? why not?
31
- MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
32
- | \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
33
- )
34
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
47
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
48
+ #{SEASON}
49
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
35
50
  }x
36
51
 
52
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
53
+ #{SEASON}
54
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
55
+ }x
56
+
57
+
58
+
37
59
  ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
38
60
 
39
61
  def self.find( path, pattern )
@@ -41,7 +63,7 @@ module SportDb
41
63
 
42
64
  ## check all txt files
43
65
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
44
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
66
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
45
67
  pp candidates
46
68
  candidates.each do |candidate|
47
69
  datafiles << candidate if pattern.match( candidate )
@@ -66,6 +88,15 @@ module SportDb
66
88
  def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
67
89
  def self.match_conf( path ) CONF_RE.match( path ); end
68
90
 
91
+ def self.find_match( path, format: 'txt' )
92
+ if format == 'csv'
93
+ find( path, MATCH_CSV_RE )
94
+ else ## otherwise always assume txt for now
95
+ find( path, MATCH_RE )
96
+ end
97
+ end
98
+ ## add match_match and match_match_csv - why? why not?
99
+
69
100
  class << self
70
101
  alias_method :match_clubs?, :match_clubs
71
102
  alias_method :clubs?, :match_clubs
@@ -149,7 +180,14 @@ module SportDb
149
180
  end
150
181
 
151
182
  def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
152
- def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
183
+ def each_match( format: 'txt', &blk )
184
+ if format == 'csv'
185
+ each( pattern: MATCH_CSV_RE, &blk );
186
+ else
187
+ each( pattern: MATCH_RE, &blk );
188
+ end
189
+ end
190
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
153
191
  def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
154
192
 
155
193
  def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
@@ -157,8 +195,100 @@ module SportDb
157
195
  def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
158
196
 
159
197
  ## return all match datafile entries
160
- def match() ary=[]; each_match {|entry| ary << entry }; ary; end
198
+ def match( format: 'txt' )
199
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
200
+ end
161
201
  alias_method :matches, :match
202
+
203
+
204
+ ## todo/check: rename/change to match_by_dir - why? why not?
205
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
206
+ def match_by_season_dir( format: 'txt' )
207
+ ##
208
+ ## [["1950s/1956-57",
209
+ ## ["1950s/1956-57/1-division1.csv",
210
+ ## "1950s/1956-57/2-division2.csv",
211
+ ## "1950s/1956-57/3a-division3n.csv",
212
+ ## "1950s/1956-57/3b-division3s.csv"]],
213
+ ## ...]
214
+
215
+ h = {}
216
+ match( format: format ).each do |entry|
217
+ season_path = File.dirname( entry.name )
218
+
219
+ h[ season_path ] ||= []
220
+ h[ season_path ] << entry
221
+ end
222
+
223
+ ## todo/fix: - add sort entries by name - why? why not?
224
+ ## note: assume 1-,2- etc. gets us back sorted leagues
225
+ ## - use sort. (will not sort by default?)
226
+
227
+ h.to_a ## return as array (or keep hash) - why? why not?
228
+ end # method match_by_season_dir
229
+
230
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
231
+
232
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
233
+
234
+ ## note: fold all sames seasons (even if in different directories)
235
+ ## into same datafile list e.g.
236
+ ## ["1957/58",
237
+ ## ["1950s/1957-58/1-division1.csv",
238
+ ## "1950s/1957-58/2-division2.csv",
239
+ ## "1950s/1957-58/3a-division3n.csv",
240
+ ## "1950s/1957-58/3b-division3s.csv"]],
241
+ ## and
242
+ ## ["1957/58",
243
+ ## ["archives/1950s/1957-58/1-division1.csv",
244
+ ## "archives/1950s/1957-58/2-division2.csv",
245
+ ## "archives/1950s/1957-58/3a-division3n.csv",
246
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
247
+ ## should be together - why? why not?
248
+
249
+ ####
250
+ # Example package:
251
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
252
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
253
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
254
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
255
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
256
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
257
+
258
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
259
+
260
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
261
+ ## filter_season( clause, season_key )
262
+ ## or better filter = SeasonFilter.new( clause )
263
+ ## filter.skip? filter.include? ( season_sason_key )?
264
+ ## fiteer.before?( season_key ) etc.
265
+ ## find some good method names!!!!
266
+ season_start = start ? Import::Season.new( start ) : nil
267
+
268
+ h = {}
269
+ match( format: format ).each do |entry|
270
+ ## note: assume last directory in datafile path is the season part/key
271
+ season_q = File.basename( File.dirname( entry.name ))
272
+ season = Import::Season.new( season_q ) ## normalize season
273
+
274
+ ## skip if start season before this season
275
+ next if season_start && season_start.start_year > season.start_year
276
+
277
+ h[ season.key ] ||= []
278
+ h[ season.key ] << entry
279
+ end
280
+
281
+ ## todo/fix: - add sort entries by name - why? why not?
282
+ ## note: assume 1-,2- etc. gets us back sorted leagues
283
+ ## - use sort. (will not sort by default?)
284
+
285
+ ## sort by season
286
+ ## latest / newest first (and oldest last)
287
+
288
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
289
+ r[0] <=> l[0]
290
+ end
291
+ end # method match_by_season
162
292
  end # class Package
163
293
 
164
294