sportdb-formats 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7b1daae3f04961c06c34b98e84438ee39a80c5ca
4
- data.tar.gz: 872154cb339bb6b6c8f440a392f250dc7d7387d9
3
+ metadata.gz: 4a27c364d2003ece8da886c892d34d595ffcffcc
4
+ data.tar.gz: 81530dfec5decf5d9476a3f2be90bcbeb0195824
5
5
  SHA512:
6
- metadata.gz: 853b0362fde6f9d8b17db7036ae954768500ab681acff7bf859b8cbf59b7d8bc4bec9d153f21e3c7ac069b6486b10386ce99fe151d4229df815c006a27dccbe0
7
- data.tar.gz: e356061bfb0f15ff8274651ce107ec3be00818996d0c29a2ffb26bd5b71c385a3bd1464eea42e2e11ab60dc24ce96663a701e28fa6a8aebd27d434525c71a1f4
6
+ metadata.gz: 4a2aabb65968939d55f2000c64e4ac5df4cbedfe6b3786fd12ff86d26bd5e6af12c32ea129b59a7ddf30736afa6f093e0e4f2e97f2c6f0b23bffd6901a83ca91
7
+ data.tar.gz: 63f4ca42ebb537173334a931a02331b8cbdc27d9e953052c8126d9e683e54a3103eb496103fea400283c6b0b08fa7c6e738466f215e26f9567a3b822a80c172d
@@ -17,6 +17,7 @@ lib/sportdb/formats/match/mapper.rb
17
17
  lib/sportdb/formats/match/mapper_teams.rb
18
18
  lib/sportdb/formats/match/match_parser.rb
19
19
  lib/sportdb/formats/match/match_parser_auto_conf.rb
20
+ lib/sportdb/formats/match/match_parser_csv.rb
20
21
  lib/sportdb/formats/name_helper.rb
21
22
  lib/sportdb/formats/outline_reader.rb
22
23
  lib/sportdb/formats/package.rb
@@ -49,8 +50,11 @@ test/test_clubs.rb
49
50
  test/test_conf.rb
50
51
  test/test_country_index.rb
51
52
  test/test_country_reader.rb
53
+ test/test_csv_match_parser.rb
54
+ test/test_csv_match_parser_utils.rb
52
55
  test/test_csv_reader.rb
53
56
  test/test_datafile.rb
57
+ test/test_datafile_package.rb
54
58
  test/test_goals.rb
55
59
  test/test_league_index.rb
56
60
  test/test_league_outline_reader.rb
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/formats/version.rb'
3
3
 
4
4
  Hoe.spec 'sportdb-formats' do
5
5
 
6
- self.version = SportDb::Formats::VERSION
6
+ self.version = SportDb::Module::Formats::VERSION
7
7
 
8
8
  self.summary = "sportdb-formats - sport.db format and text utilities"
9
9
  self.description = summary
@@ -21,7 +21,7 @@ Hoe.spec 'sportdb-formats' do
21
21
 
22
22
  self.extra_deps = [
23
23
  ['alphabets', '>= 1.0.0'],
24
- ['date-formats', '>= 1.0.0'],
24
+ ['date-formats', '>= 1.0.1'],
25
25
  ['csvreader', '>= 1.2.4'],
26
26
  ['sportdb-langs', '>= 0.1.0'],
27
27
 
@@ -10,15 +10,26 @@ require 'zip' ## todo/check: if zip is alreay included in a required module
10
10
 
11
11
 
12
12
 
13
- def read_csv( path )
14
- CsvHash.read( path, :header_converters => :symbol )
13
+ def read_csv( path, sep: nil,
14
+ symbolize_names: nil )
15
+ opts = {}
16
+ opts[:sep] = sep if sep
17
+ opts[:header_converters] = :symbol if symbolize_names
18
+
19
+ CsvHash.read( path, **opts )
15
20
  end
16
21
 
17
- def parse_csv( txt )
18
- CsvHash.parse( txt, :header_converters => :symbol )
22
+ def parse_csv( txt, sep: nil,
23
+ symbolize_names: nil )
24
+ opts = {}
25
+ opts[:sep] = sep if sep
26
+ opts[:header_converters] = :symbol if symbolize_names
27
+
28
+ CsvHash.parse( txt, **opts )
19
29
  end
20
30
 
21
31
 
32
+
22
33
  ## more sportdb libs/gems
23
34
  require 'sportdb/langs'
24
35
 
@@ -68,6 +79,7 @@ require 'sportdb/formats/match/match_parser'
68
79
  require 'sportdb/formats/match/match_parser_auto_conf'
69
80
  require 'sportdb/formats/match/conf_parser'
70
81
 
82
+ require 'sportdb/formats/match/match_parser_csv'
71
83
 
72
84
  require 'sportdb/formats/country/country_reader'
73
85
  require 'sportdb/formats/country/country_index'
@@ -147,4 +159,4 @@ end # module SportDb
147
159
 
148
160
 
149
161
 
150
- puts SportDb::Formats.banner # say hello
162
+ puts SportDb::Module::Formats.banner # say hello
@@ -30,7 +30,7 @@ class Entry
30
30
  @name = path[ pack.path.length+1..-1 ]
31
31
  end
32
32
  def name() @name; end
33
- def read() File.open( @path, 'r:utf-8' ).read; end
33
+ def read() File.open( @path, 'r:utf-8' ) {|f| f.read }; end
34
34
  end # class DirPackage::Entry
35
35
 
36
36
 
@@ -44,17 +44,20 @@ end # class DirPackage::Entry
44
44
  @name = basename
45
45
  end
46
46
 
47
- def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
47
+ ## todo/check: change pattern: to re: - why? why not?
48
+ def each( pattern: ) ## todo/check: rename to glob or something - why? why not?
48
49
  ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
50
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
- Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
- ## todo/fix: (auto) skip and check for directories
52
- if EXCLUDE_RE.match( path )
53
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
51
+ ## todo/check/fix: is there a better (simpler) glob pattern? yes? no?
52
+ Dir.glob( "#{@path}/**/{*,.*}.*" ).each do |path|
53
+ if File.directory?( path )
54
+ ## always skip directories / folders
55
+ elsif EXCLUDE_RE.match( path )
56
+ ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
57
  elsif pattern.match( path )
55
58
  yield( Entry.new( self, path ))
56
59
  else
57
- ## puts " skipping >#{path}<"
60
+ ## puts " skipping >#{path}<"
58
61
  end
59
62
  end
60
63
  end
@@ -0,0 +1,314 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ class CsvMatchParser
6
+
7
+ #############
8
+ # helpers
9
+ def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
10
+
11
+ ## check if headers incl. season if yes,has priority over col mapping
12
+ ## e.g. no need to specify twice (if using headers)
13
+ col = headers[:season] if headers && headers[:season]
14
+
15
+ seasons = Hash.new( 0 ) ## default value is 0
16
+
17
+ ## todo/fix: yes, use CsvHash.foreach - why? why not?
18
+ ## use read_csv with block to switch to foreach!!!!
19
+ rows = read_csv( path, sep: sep )
20
+
21
+ rows.each_with_index do |row,i|
22
+ puts "[#{i}] " + row.inspect if i < 2
23
+
24
+ season = row[ col ] ## column name defaults to 'Season'
25
+ seasons[ season ] += 1
26
+ end
27
+
28
+ pp seasons
29
+
30
+ ## note: only return season keys/names (not hash with usage counter)
31
+ seasons.keys
32
+ end
33
+
34
+
35
+ ##########
36
+ # main machinery
37
+
38
+ ## todo/fix: use a generic "global" parse_csv method - why? why not?
39
+ ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
40
+ ## ## note: do NOT symbolize keys - keep them as is!!!!!!
41
+ ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
42
+ ## CsvHash.parse( text, sep: sep )
43
+ ## end
44
+
45
+ def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
46
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
47
+ parse( txt, headers: headers,
48
+ filters: filters,
49
+ converters: converters,
50
+ sep: sep )
51
+ end
52
+
53
+ def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
54
+ new( txt ).parse( headers: headers,
55
+ filters: filters,
56
+ converters: converters,
57
+ sep: sep )
58
+ end
59
+
60
+
61
+ def initialize( txt )
62
+ @txt = txt
63
+ end
64
+
65
+ def parse( headers: nil, filters: nil, converters: nil, sep: nil )
66
+
67
+ headers_mapping = {}
68
+
69
+ rows = parse_csv( @txt, sep: sep )
70
+
71
+ return [] if rows.empty? ## no rows / empty?
72
+
73
+
74
+ ## fix/todo: use logger!!!!
75
+ ## pp csv
76
+
77
+ if headers ## use user supplied headers if present
78
+ headers_mapping = headers_mapping.merge( headers )
79
+ else
80
+
81
+ ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
82
+ headers = rows[0].keys
83
+ pp headers
84
+
85
+ # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
86
+ # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
87
+
88
+ if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
89
+ ## assume our own football.csv format, see github.com/footballcsv
90
+ headers_mapping[:team1] = find_header( headers, ['Team 1'] )
91
+ headers_mapping[:team2] = find_header( headers, ['Team 2'] )
92
+ headers_mapping[:date] = find_header( headers, ['Date'] )
93
+
94
+ ## check for all-in-one full time (ft) and half time (ht9 scores?
95
+ headers_mapping[:score] = find_header( headers, ['FT'] )
96
+ headers_mapping[:scorei] = find_header( headers, ['HT'] )
97
+
98
+ headers_mapping[:round] = find_header( headers, ['Round'] )
99
+
100
+ ## optional headers - note: find_header returns nil if header NOT found
101
+ header_stage = find_header( headers, ['Stage'] )
102
+ headers_mapping[:stage] = header_stage if header_stage
103
+ else
104
+ ## else try footballdata.uk and others
105
+ headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
106
+ headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
107
+ headers_mapping[:date] = find_header( headers, ['Date'] )
108
+
109
+ ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
110
+ headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
111
+ headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
112
+
113
+ ## check for half time scores ?
114
+ ## note: HT = Half Time
115
+ headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
116
+ headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
117
+ end
118
+ end
119
+
120
+ pp headers_mapping
121
+
122
+ ### todo/fix: check headers - how?
123
+ ## if present HomeTeam or HT required etc.
124
+ ## issue error/warn is not present
125
+ ##
126
+ ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
127
+ ## exit 1
128
+ ##
129
+
130
+ matches = []
131
+
132
+ rows.each_with_index do |row,i|
133
+
134
+ ## fix/todo: use logger!!!!
135
+ ## puts "[#{i}] " + row.inspect if i < 2
136
+
137
+
138
+ ## todo/fix: move to its own (helper) method - filter or such!!!!
139
+ if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
140
+ skip = false
141
+ filters.each do |header, value|
142
+ if row[ header ] != value ## e.g. row['Season']
143
+ skip = true
144
+ break
145
+ end
146
+ end
147
+ next if skip ## if header values NOT matching
148
+ end
149
+
150
+
151
+ ## note:
152
+ ## add converters after filters for now (why not before filters?)
153
+ if converters ## any converters defined?
154
+ ## convert single proc shortcut to array with single converter
155
+ converters = [converters] if converters.is_a?( Proc )
156
+
157
+ ## assumes array of procs
158
+ converters.each do |converter|
159
+ row = converter.call( row )
160
+ end
161
+ end
162
+
163
+
164
+
165
+ team1 = row[ headers_mapping[ :team1 ]]
166
+ team2 = row[ headers_mapping[ :team2 ]]
167
+
168
+
169
+ ## check if data present - if not skip (might be empty row)
170
+ if team1.nil? && team2.nil?
171
+ puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
172
+ pp row
173
+ next
174
+ end
175
+
176
+ ## remove possible match played counters e.g. (4) (11) etc.
177
+ team1 = team1.sub( /\(\d+\)/, '' ).strip
178
+ team2 = team2.sub( /\(\d+\)/, '' ).strip
179
+
180
+
181
+
182
+ col = row[ headers_mapping[ :date ]]
183
+ col = col.strip # make sure not leading or trailing spaces left over
184
+
185
+ if col.empty? || col == '-' || col == '?'
186
+ ## note: allow missing / unknown date for match
187
+ date = nil
188
+ else
189
+ ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
190
+ col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
191
+ col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
192
+ col = col.strip # make sure not leading or trailing spaces left over
193
+
194
+ if col =~ /^\d{2}\/\d{2}\/\d{4}$/
195
+ date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
196
+ elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
197
+ date_fmt = '%d/%m/%y' # e.g. 17/08/02
198
+ elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
199
+ date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
200
+ elsif col =~ /^\d{1,2} \w{3} \d{4}$/
201
+ date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
202
+ else
203
+ puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
204
+ ## todo/fix: add to errors/warns list - why? why not?
205
+ exit 1
206
+ end
207
+
208
+ ## todo/check: use date object (keep string?) - why? why not?
209
+ ## todo/fix: yes!! use date object!!!! do NOT use string
210
+ date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
211
+ end
212
+
213
+
214
+ round = nil
215
+ ## check for (optional) round / matchday
216
+ if headers_mapping[ :round ]
217
+ col = row[ headers_mapping[ :round ]]
218
+ ## todo: issue warning if not ? or - (and just empty string) why? why not
219
+ round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
220
+ end
221
+
222
+
223
+ score1 = nil
224
+ score2 = nil
225
+ score1i = nil
226
+ score2i = nil
227
+
228
+ ## check for full time scores ?
229
+ if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
230
+ ft = [ row[ headers_mapping[ :score1 ]],
231
+ row[ headers_mapping[ :score2 ]] ]
232
+
233
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
234
+ score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
235
+ score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
236
+ end
237
+
238
+ ## check for half time scores ?
239
+ if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
240
+ ht = [ row[ headers_mapping[ :score1i ]],
241
+ row[ headers_mapping[ :score2i ]] ]
242
+
243
+ ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
244
+ score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
245
+ score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
246
+ end
247
+
248
+ ## check for all-in-one full time scores?
249
+ if headers_mapping[ :score ]
250
+ ft = row[ headers_mapping[ :score ] ]
251
+ if ft =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
252
+ scores = ft.split( /[\-:]/ )
253
+ score1 = scores[0].to_i
254
+ score2 = scores[1].to_i
255
+ end
256
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
257
+ end
258
+
259
+ if headers_mapping[ :scorei ]
260
+ ht = row[ headers_mapping[ :scorei ] ]
261
+ if ht =~ /^\d{1,2}[\-:]\d{1,2}$/ ## sanity check scores format
262
+ scores = ht.split( /[\-:]/) ## allow 1-1 and 1:1
263
+ score1i = scores[0].to_i
264
+ score2i = scores[1].to_i
265
+ end
266
+ ## todo/fix: issue warning if non-empty!!! and not matching format!!!!
267
+ end
268
+
269
+
270
+ ## try some optional headings / columns
271
+ stage = nil
272
+ if headers_mapping[ :stage ]
273
+ col = row[ headers_mapping[ :stage ]]
274
+ ## todo/fix: check can col be nil e.g. col.nil? possible?
275
+ stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
276
+ ## note: allow missing stage for match / defaults to "regular"
277
+ nil
278
+ elsif col == '?'
279
+ ## note: default explicit unknown to unknown for now AND not regular - why? why not?
280
+ '?' ## todo/check: use unkown and NOT ? - why? why not?
281
+ else
282
+ col
283
+ end
284
+ end
285
+
286
+
287
+ match = Import::Match.new( date: date,
288
+ team1: team1, team2: team2,
289
+ score1: score1, score2: score2,
290
+ score1i: score1i, score2i: score2i,
291
+ round: round,
292
+ stage: stage )
293
+ matches << match
294
+ end
295
+
296
+ ## pp matches
297
+ matches
298
+ end
299
+
300
+
301
+ private
302
+
303
+ def find_header( headers, candidates )
304
+ ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
305
+
306
+ candidates.each do |candidate|
307
+ return candidate if headers.include?( candidate ) ## bingo!!!
308
+ end
309
+ nil ## no matching header found!!!
310
+ end
311
+
312
+ end # class CsvMatchParser
313
+ end # module SportDb
314
+
@@ -2,6 +2,9 @@
2
2
  module SportDb
3
3
  class Package
4
4
 
5
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
+ ## e.g. .TXT and .txt
7
+
5
8
  CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
6
9
  \.conf\.txt$
7
10
  }x
@@ -26,14 +29,33 @@ module SportDb
26
29
  clubs\.props\.txt$
27
30
  }x
28
31
 
32
+
33
+ ### season folder:
34
+ ## e.g. /2019-20 or
35
+ ## year-only e.g. /2019 or
36
+ ## /2016--france
37
+ SEASON_RE = %r{ (?:
38
+ \d{4}-\d{2}
39
+ | \d{4}(--[^/]+)?
40
+ )
41
+ }x
42
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
43
+
44
+
29
45
  ## note: if pattern includes directory add here
30
46
  ## (otherwise move to more "generic" datafile) - why? why not?
31
- MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
32
- | \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
33
- )
34
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
47
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
48
+ #{SEASON}
49
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
35
50
  }x
36
51
 
52
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
53
+ #{SEASON}
54
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
55
+ }x
56
+
57
+
58
+
37
59
  ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
38
60
 
39
61
  def self.find( path, pattern )
@@ -41,7 +63,7 @@ module SportDb
41
63
 
42
64
  ## check all txt files
43
65
  ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
44
- candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
66
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
45
67
  pp candidates
46
68
  candidates.each do |candidate|
47
69
  datafiles << candidate if pattern.match( candidate )
@@ -66,6 +88,15 @@ module SportDb
66
88
  def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
67
89
  def self.match_conf( path ) CONF_RE.match( path ); end
68
90
 
91
+ def self.find_match( path, format: 'txt' )
92
+ if format == 'csv'
93
+ find( path, MATCH_CSV_RE )
94
+ else ## otherwise always assume txt for now
95
+ find( path, MATCH_RE )
96
+ end
97
+ end
98
+ ## add match_match and match_match_csv - why? why not?
99
+
69
100
  class << self
70
101
  alias_method :match_clubs?, :match_clubs
71
102
  alias_method :clubs?, :match_clubs
@@ -149,7 +180,14 @@ module SportDb
149
180
  end
150
181
 
151
182
  def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
152
- def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
183
+ def each_match( format: 'txt', &blk )
184
+ if format == 'csv'
185
+ each( pattern: MATCH_CSV_RE, &blk );
186
+ else
187
+ each( pattern: MATCH_RE, &blk );
188
+ end
189
+ end
190
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
153
191
  def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
154
192
 
155
193
  def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
@@ -157,8 +195,100 @@ module SportDb
157
195
  def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
158
196
 
159
197
  ## return all match datafile entries
160
- def match() ary=[]; each_match {|entry| ary << entry }; ary; end
198
+ def match( format: 'txt' )
199
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
200
+ end
161
201
  alias_method :matches, :match
202
+
203
+
204
+ ## todo/check: rename/change to match_by_dir - why? why not?
205
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
206
+ def match_by_season_dir( format: 'txt' )
207
+ ##
208
+ ## [["1950s/1956-57",
209
+ ## ["1950s/1956-57/1-division1.csv",
210
+ ## "1950s/1956-57/2-division2.csv",
211
+ ## "1950s/1956-57/3a-division3n.csv",
212
+ ## "1950s/1956-57/3b-division3s.csv"]],
213
+ ## ...]
214
+
215
+ h = {}
216
+ match( format: format ).each do |entry|
217
+ season_path = File.dirname( entry.name )
218
+
219
+ h[ season_path ] ||= []
220
+ h[ season_path ] << entry
221
+ end
222
+
223
+ ## todo/fix: - add sort entries by name - why? why not?
224
+ ## note: assume 1-,2- etc. gets us back sorted leagues
225
+ ## - use sort. (will not sort by default?)
226
+
227
+ h.to_a ## return as array (or keep hash) - why? why not?
228
+ end # method match_by_season_dir
229
+
230
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
231
+
232
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
233
+
234
+ ## note: fold all sames seasons (even if in different directories)
235
+ ## into same datafile list e.g.
236
+ ## ["1957/58",
237
+ ## ["1950s/1957-58/1-division1.csv",
238
+ ## "1950s/1957-58/2-division2.csv",
239
+ ## "1950s/1957-58/3a-division3n.csv",
240
+ ## "1950s/1957-58/3b-division3s.csv"]],
241
+ ## and
242
+ ## ["1957/58",
243
+ ## ["archives/1950s/1957-58/1-division1.csv",
244
+ ## "archives/1950s/1957-58/2-division2.csv",
245
+ ## "archives/1950s/1957-58/3a-division3n.csv",
246
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
247
+ ## should be together - why? why not?
248
+
249
+ ####
250
+ # Example package:
251
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
252
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
253
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
254
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
255
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
256
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
257
+
258
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
259
+
260
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
261
+ ## filter_season( clause, season_key )
262
+ ## or better filter = SeasonFilter.new( clause )
263
+ ## filter.skip? filter.include? ( season_sason_key )?
264
+ ## fiteer.before?( season_key ) etc.
265
+ ## find some good method names!!!!
266
+ season_start = start ? Import::Season.new( start ) : nil
267
+
268
+ h = {}
269
+ match( format: format ).each do |entry|
270
+ ## note: assume last directory in datafile path is the season part/key
271
+ season_q = File.basename( File.dirname( entry.name ))
272
+ season = Import::Season.new( season_q ) ## normalize season
273
+
274
+ ## skip if start season before this season
275
+ next if season_start && season_start.start_year > season.start_year
276
+
277
+ h[ season.key ] ||= []
278
+ h[ season.key ] << entry
279
+ end
280
+
281
+ ## todo/fix: - add sort entries by name - why? why not?
282
+ ## note: assume 1-,2- etc. gets us back sorted leagues
283
+ ## - use sort. (will not sort by default?)
284
+
285
+ ## sort by season
286
+ ## latest / newest first (and oldest last)
287
+
288
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
289
+ r[0] <=> l[0]
290
+ end
291
+ end # method match_by_season
162
292
  end # class Package
163
293
 
164
294