sportdb-formats 1.1.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +37 -1
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +102 -12
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,374 +1,394 @@
1
-
2
- module SportDb
3
- class Package
4
-
5
- ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
- ## e.g. .TXT and .txt
7
- ## yes!! use /i option!!!!!
8
-
9
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
- \.conf\.txt$
11
- }x
12
-
13
- ## leagues.txt or leagues_en.txt
14
- ## remove support for en.leagues.txt - why? why not?
15
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
- leagues
18
- (?:_[a-z0-9_-]+)?
19
- \.txt$
20
- }x
21
-
22
- ## seasons.txt or seasons_en.txt
23
- ## remove support for br.seasons.txt - why? why not?
24
- SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
- seasons
27
- (?:_[a-z0-9_-]+)?
28
- \.txt$
29
- }x
30
-
31
-
32
- ## clubs.txt or clubs_en.txt
33
- ## remove support for en.clubs.txt - why? why not?
34
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
- clubs
37
- (?:_[a-z0-9_-]+)?
38
- \.txt$
39
- }x
40
-
41
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
43
- clubs
44
- (?:_[a-z0-9_-]+)?
45
- \.wiki\.txt$
46
- }x
47
-
48
- ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
- CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
50
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
51
- clubs
52
- (?:_[a-z0-9_-]+)?
53
- \.props\.txt$
54
- }x
55
- CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
-
57
-
58
- CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
- clubs
61
- (?:_[a-z0-9_-]+)?
62
- \.history\.txt$
63
- }x
64
-
65
- ## teams.txt or teams_history.txt
66
- TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
- teams
68
- (?:_[a-z0-9_-]+)?
69
- \.txt$
70
- }x
71
-
72
-
73
- ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
74
- ### season folder:
75
- ## e.g. /2019-20 or
76
- ## year-only e.g. /2019 or
77
- ## /2016--france
78
- SEASON_RE = %r{ (?:
79
- \d{4}-\d{2}
80
- | \d{4}(--[a-z0-9_-]+)?
81
- )
82
- }x
83
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
84
-
85
-
86
- ## note: if pattern includes directory add here
87
- ## (otherwise move to more "generic" datafile) - why? why not?
88
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
89
- #{SEASON}
90
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
91
- }x
92
-
93
- MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
94
- #{SEASON}
95
- /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
96
- }x
97
-
98
- ### add "generic" pattern to find all csv datafiles
99
- CSV_RE = %r{ (?: ^|/ )
100
- [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
- }x
102
-
103
-
104
- ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
105
-
106
- def self.find( path, pattern )
107
- datafiles = []
108
-
109
- ## check all txt files
110
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
111
- candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
112
- pp candidates
113
- candidates.each do |candidate|
114
- datafiles << candidate if pattern.match( candidate )
115
- end
116
-
117
- pp datafiles
118
- datafiles
119
- end
120
-
121
-
122
- def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
123
- def self.match_teams( path ) TEAMS_RE.match( path ); end
124
-
125
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
- def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
128
-
129
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
- def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
133
-
134
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
135
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
136
-
137
- def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
- def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
-
140
-
141
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
142
- def self.match_conf( path ) CONF_RE.match( path ); end
143
-
144
- def self.find_match( path, format: 'txt' )
145
- if format == 'csv'
146
- find( path, MATCH_CSV_RE )
147
- else ## otherwise always assume txt for now
148
- find( path, MATCH_RE )
149
- end
150
- end
151
- ## add match_match and match_match_csv - why? why not?
152
-
153
-
154
- class << self
155
- alias_method :match_teams?, :match_teams
156
- alias_method :teams?, :match_teams
157
-
158
- alias_method :match_clubs?, :match_clubs
159
- alias_method :clubs?, :match_clubs
160
-
161
- alias_method :match_clubs_wiki?, :match_clubs_wiki
162
- alias_method :clubs_wiki?, :match_clubs_wiki
163
-
164
- alias_method :match_clubs_history?, :match_clubs_history
165
- alias_method :clubs_history?, :match_clubs_history
166
-
167
- alias_method :match_club_props, :match_clubs_props
168
- alias_method :match_club_props?, :match_clubs_props
169
- alias_method :club_props?, :match_clubs_props
170
- alias_method :match_clubs_props?, :match_clubs_props
171
- alias_method :clubs_props?, :match_clubs_props
172
-
173
- alias_method :match_leagues?, :match_leagues
174
- alias_method :leagues?, :match_leagues
175
-
176
- alias_method :match_seasons?, :match_seasons
177
- alias_method :seasons?, :match_seasons
178
-
179
- alias_method :match_conf?, :match_conf
180
- alias_method :conf?, :match_conf
181
- end
182
-
183
-
184
- ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
185
- attr_accessor :include, :exclude
186
-
187
- ## private helpers - like select returns true for keeping and false for skipping entry
188
- def filter_clause( filter, entry )
189
- if filter.is_a?( String )
190
- entry.name.index( filter ) ? true : false
191
- elsif filter.is_a?( Regexp )
192
- filter.match( entry.name ) ? true : false
193
- else ## assume
194
- ## todo/check: pass in entry (and NOT entry.name) - why? why not?
195
- filter.call( entry )
196
- end
197
- end
198
-
199
- def filter( entry )
200
- if @include
201
- if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
202
- true ## todo/check: check for exclude here too - why? why not?
203
- else
204
- false
205
- end
206
- else
207
- if @exclude && filter_clause( @exclude, entry )
208
- false
209
- else
210
- true
211
- end
212
- end
213
- end
214
-
215
-
216
- def initialize( path_or_pack )
217
- @include = nil
218
- @exclude = nil
219
-
220
- if path_or_pack.is_a?( Datafile::Package )
221
- @pack = path_or_pack
222
- else ## assume it's a (string) path
223
- path = path_or_pack
224
- if !File.exist?( path ) ## file or directory
225
- puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
226
- exit 1
227
- end
228
-
229
- if File.directory?( path )
230
- @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
231
- elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
232
- @pack = Datafile::ZipPackage.new( path )
233
- else
234
- puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
235
- exit 1
236
- end
237
- end
238
- end
239
-
240
-
241
- def each( pattern:, &blk )
242
- @pack.each( pattern: pattern ) do |entry|
243
- next unless filter( entry ) ## lets you use include/exclude filters
244
- blk.call( entry )
245
- end
246
- end
247
-
248
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
249
- def each_match( format: 'txt', &blk )
250
- if format == 'csv'
251
- each( pattern: MATCH_CSV_RE, &blk );
252
- else
253
- each( pattern: MATCH_RE, &blk );
254
- end
255
- end
256
- def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
- def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
-
259
- def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
260
-
261
- def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
262
- def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
263
- def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
- def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
-
266
- def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
-
268
-
269
- ## return all match datafile entries
270
- def match( format: 'txt' )
271
- ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
272
- end
273
- alias_method :matches, :match
274
-
275
-
276
- ## todo/check: rename/change to match_by_dir - why? why not?
277
- ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
278
- def match_by_season_dir( format: 'txt' )
279
- ##
280
- ## [["1950s/1956-57",
281
- ## ["1950s/1956-57/1-division1.csv",
282
- ## "1950s/1956-57/2-division2.csv",
283
- ## "1950s/1956-57/3a-division3n.csv",
284
- ## "1950s/1956-57/3b-division3s.csv"]],
285
- ## ...]
286
-
287
- h = {}
288
- match( format: format ).each do |entry|
289
- season_path = File.dirname( entry.name )
290
-
291
- h[ season_path ] ||= []
292
- h[ season_path ] << entry
293
- end
294
-
295
- ## todo/fix: - add sort entries by name - why? why not?
296
- ## note: assume 1-,2- etc. gets us back sorted leagues
297
- ## - use sort. (will not sort by default?)
298
-
299
- h.to_a ## return as array (or keep hash) - why? why not?
300
- end # method match_by_season_dir
301
-
302
- def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
303
-
304
- ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
305
-
306
- ## note: fold all sames seasons (even if in different directories)
307
- ## into same datafile list e.g.
308
- ## ["1957/58",
309
- ## ["1950s/1957-58/1-division1.csv",
310
- ## "1950s/1957-58/2-division2.csv",
311
- ## "1950s/1957-58/3a-division3n.csv",
312
- ## "1950s/1957-58/3b-division3s.csv"]],
313
- ## and
314
- ## ["1957/58",
315
- ## ["archives/1950s/1957-58/1-division1.csv",
316
- ## "archives/1950s/1957-58/2-division2.csv",
317
- ## "archives/1950s/1957-58/3a-division3n.csv",
318
- ## "archives/1950s/1957-58/3b-division3s.csv"]],
319
- ## should be together - why? why not?
320
-
321
- ####
322
- # Example package:
323
- # [["2012/13", ["2012-13/1-proleague.csv"]],
324
- # ["2013/14", ["2013-14/1-proleague.csv"]],
325
- # ["2014/15", ["2014-15/1-proleague.csv"]],
326
- # ["2015/16", ["2015-16/1-proleague.csv"]],
327
- # ["2016/17", ["2016-17/1-proleague.csv"]],
328
- # ["2017/18", ["2017-18/1-proleague.csv"]]]
329
-
330
- ## todo/fix: (re)use a more generic filter instead of start for start of season only
331
-
332
- ## todo/fix: use a "generic" filter_season helper for easy reuse
333
- ## filter_season( clause, season_key )
334
- ## or better filter = SeasonFilter.new( clause )
335
- ## filter.skip? filter.include? ( season_sason_key )?
336
- ## fiteer.before?( season_key ) etc.
337
- ## find some good method names!!!!
338
- season_start = start ? Season( start ) : nil
339
-
340
- h = {}
341
- match( format: format ).each do |entry|
342
- ## note: assume last directory in datafile path is the season part/key
343
- season_q = File.basename( File.dirname( entry.name ))
344
- season = Season.parse( season_q ) ## normalize season
345
-
346
- ## skip if start season before this season
347
- next if season_start && season_start.start_year > season.start_year
348
-
349
- h[ season.key ] ||= []
350
- h[ season.key ] << entry
351
- end
352
-
353
- ## todo/fix: - add sort entries by name - why? why not?
354
- ## note: assume 1-,2- etc. gets us back sorted leagues
355
- ## - use sort. (will not sort by default?)
356
-
357
- ## sort by season
358
- ## latest / newest first (and oldest last)
359
-
360
- h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
361
- r[0] <=> l[0]
362
- end
363
- end # method match_by_season
364
- end # class Package
365
-
366
-
367
- class DirPackage < Package
368
- def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
369
- end
370
-
371
- class ZipPackage < Package
372
- def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
373
- end
374
- end # module SportDb
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
+ ## e.g. .TXT and .txt
7
+ ## yes!! use /i option!!!!!
8
+
9
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ \.conf\.txt$
11
+ }x
12
+
13
+ ## leagues.txt or leagues_en.txt
14
+ ## remove support for en.leagues.txt - why? why not?
15
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
+ leagues
18
+ (?:_[a-z0-9_-]+)?
19
+ \.txt$
20
+ }x
21
+
22
+ ## seasons.txt or seasons_en.txt
23
+ ## remove support for br.seasons.txt - why? why not?
24
+ SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
+ seasons
27
+ (?:_[a-z0-9_-]+)?
28
+ \.txt$
29
+ }x
30
+
31
+
32
+ ####
33
+ # de.stadiums.txt or stadiums.txt or stadiums_de.txt
34
+ GROUNDS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
+ stadiums
37
+ (?:_[a-z0-9_-]+)?
38
+ \.txt$
39
+ }x
40
+
41
+ PLAYERS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
42
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
43
+ players
44
+ (?:_[a-z0-9_-]+)?
45
+ \.txt$
46
+ }x
47
+
48
+
49
+ ## clubs.txt or clubs_en.txt
50
+ ## remove support for en.clubs.txt - why? why not?
51
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
52
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
53
+ clubs
54
+ (?:_[a-z0-9_-]+)?
55
+ \.txt$
56
+ }x
57
+
58
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
59
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.wiki\.txt$
63
+ }x
64
+
65
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
66
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
68
+ clubs
69
+ (?:_[a-z0-9_-]+)?
70
+ \.props\.txt$
71
+ }x
72
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
73
+
74
+
75
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
76
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
77
+ clubs
78
+ (?:_[a-z0-9_-]+)?
79
+ \.history\.txt$
80
+ }x
81
+
82
+ ## teams.txt or teams_history.txt
83
+ TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
84
+ teams
85
+ (?:_[a-z0-9_-]+)?
86
+ \.txt$
87
+ }x
88
+
89
+
90
+ ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
91
+ ### season folder:
92
+ ## e.g. /2019-20 or
93
+ ## year-only e.g. /2019 or
94
+ ## /2016--france
95
+ SEASON_RE = %r{ (?:
96
+ \d{4}-\d{2}
97
+ | \d{4}(--[a-z0-9_-]+)?
98
+ )
99
+ }x
100
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
101
+
102
+
103
+ ## note: if pattern includes directory add here
104
+ ## (otherwise move to more "generic" datafile) - why? why not?
105
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
106
+ #{SEASON}
107
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
108
+ }x
109
+
110
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
111
+ #{SEASON}
112
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
113
+ }x
114
+
115
+ ### add "generic" pattern to find all csv datafiles
116
+ CSV_RE = %r{ (?: ^|/ )
117
+ [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
118
+ }x
119
+
120
+
121
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
122
+
123
+ def self.find( path, pattern )
124
+ datafiles = []
125
+
126
+ ## check all txt files
127
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
128
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
129
+ pp candidates
130
+ candidates.each do |candidate|
131
+ datafiles << candidate if pattern.match( candidate )
132
+ end
133
+
134
+ pp datafiles
135
+ datafiles
136
+ end
137
+
138
+
139
+ def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
140
+ def self.match_teams( path ) TEAMS_RE.match( path ); end
141
+
142
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
143
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
144
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
145
+
146
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
147
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
148
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
149
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
150
+
151
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
152
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
153
+
154
+ def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
155
+ def self.match_seasons( path ) SEASONS_RE.match( path ); end
156
+
157
+
158
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
159
+ def self.match_conf( path ) CONF_RE.match( path ); end
160
+
161
+ def self.find_match( path, format: 'txt' )
162
+ if format == 'csv'
163
+ find( path, MATCH_CSV_RE )
164
+ else ## otherwise always assume txt for now
165
+ find( path, MATCH_RE )
166
+ end
167
+ end
168
+ ## add match_match and match_match_csv - why? why not?
169
+
170
+
171
+ class << self
172
+ alias_method :match_teams?, :match_teams
173
+ alias_method :teams?, :match_teams
174
+
175
+ alias_method :match_clubs?, :match_clubs
176
+ alias_method :clubs?, :match_clubs
177
+
178
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
179
+ alias_method :clubs_wiki?, :match_clubs_wiki
180
+
181
+ alias_method :match_clubs_history?, :match_clubs_history
182
+ alias_method :clubs_history?, :match_clubs_history
183
+
184
+ alias_method :match_club_props, :match_clubs_props
185
+ alias_method :match_club_props?, :match_clubs_props
186
+ alias_method :club_props?, :match_clubs_props
187
+ alias_method :match_clubs_props?, :match_clubs_props
188
+ alias_method :clubs_props?, :match_clubs_props
189
+
190
+ alias_method :match_leagues?, :match_leagues
191
+ alias_method :leagues?, :match_leagues
192
+
193
+ alias_method :match_seasons?, :match_seasons
194
+ alias_method :seasons?, :match_seasons
195
+
196
+ alias_method :match_conf?, :match_conf
197
+ alias_method :conf?, :match_conf
198
+ end
199
+
200
+
201
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
202
+ attr_accessor :include, :exclude
203
+
204
+ ## private helpers - like select returns true for keeping and false for skipping entry
205
+ def filter_clause( filter, entry )
206
+ if filter.is_a?( String )
207
+ entry.name.index( filter ) ? true : false
208
+ elsif filter.is_a?( Regexp )
209
+ filter.match( entry.name ) ? true : false
210
+ else ## assume
211
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
212
+ filter.call( entry )
213
+ end
214
+ end
215
+
216
+ def filter( entry )
217
+ if @include
218
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
219
+ true ## todo/check: check for exclude here too - why? why not?
220
+ else
221
+ false
222
+ end
223
+ else
224
+ if @exclude && filter_clause( @exclude, entry )
225
+ false
226
+ else
227
+ true
228
+ end
229
+ end
230
+ end
231
+
232
+
233
+ def initialize( path_or_pack )
234
+ @include = nil
235
+ @exclude = nil
236
+
237
+ if path_or_pack.is_a?( Datafile::Package )
238
+ @pack = path_or_pack
239
+ else ## assume it's a (string) path
240
+ path = path_or_pack
241
+ if !File.exist?( path ) ## file or directory
242
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
243
+ exit 1
244
+ end
245
+
246
+ if File.directory?( path )
247
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
248
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
249
+ @pack = Datafile::ZipPackage.new( path )
250
+ else
251
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
252
+ exit 1
253
+ end
254
+ end
255
+ end
256
+
257
+
258
+ def each( pattern:, &blk )
259
+ @pack.each( pattern: pattern ) do |entry|
260
+ next unless filter( entry ) ## lets you use include/exclude filters
261
+ blk.call( entry )
262
+ end
263
+ end
264
+
265
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
266
+ def each_match( format: 'txt', &blk )
267
+ if format == 'csv'
268
+ each( pattern: MATCH_CSV_RE, &blk );
269
+ else
270
+ each( pattern: MATCH_RE, &blk );
271
+ end
272
+ end
273
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
274
+ def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
275
+
276
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
277
+
278
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
279
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
280
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
281
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
282
+
283
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
284
+
285
+
286
+ def each_grounds( &blk ) each( pattern: GROUNDS_RE, &blk ); end
287
+ def each_players( &blk ) each( pattern: PLAYERS_RE, &blk ); end
288
+
289
+ ## return all match datafile entries
290
+ def match( format: 'txt' )
291
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
292
+ end
293
+ alias_method :matches, :match
294
+
295
+
296
+ ## todo/check: rename/change to match_by_dir - why? why not?
297
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
298
+ def match_by_season_dir( format: 'txt' )
299
+ ##
300
+ ## [["1950s/1956-57",
301
+ ## ["1950s/1956-57/1-division1.csv",
302
+ ## "1950s/1956-57/2-division2.csv",
303
+ ## "1950s/1956-57/3a-division3n.csv",
304
+ ## "1950s/1956-57/3b-division3s.csv"]],
305
+ ## ...]
306
+
307
+ h = {}
308
+ match( format: format ).each do |entry|
309
+ season_path = File.dirname( entry.name )
310
+
311
+ h[ season_path ] ||= []
312
+ h[ season_path ] << entry
313
+ end
314
+
315
+ ## todo/fix: - add sort entries by name - why? why not?
316
+ ## note: assume 1-,2- etc. gets us back sorted leagues
317
+ ## - use sort. (will not sort by default?)
318
+
319
+ h.to_a ## return as array (or keep hash) - why? why not?
320
+ end # method match_by_season_dir
321
+
322
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
323
+
324
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
325
+
326
+ ## note: fold all sames seasons (even if in different directories)
327
+ ## into same datafile list e.g.
328
+ ## ["1957/58",
329
+ ## ["1950s/1957-58/1-division1.csv",
330
+ ## "1950s/1957-58/2-division2.csv",
331
+ ## "1950s/1957-58/3a-division3n.csv",
332
+ ## "1950s/1957-58/3b-division3s.csv"]],
333
+ ## and
334
+ ## ["1957/58",
335
+ ## ["archives/1950s/1957-58/1-division1.csv",
336
+ ## "archives/1950s/1957-58/2-division2.csv",
337
+ ## "archives/1950s/1957-58/3a-division3n.csv",
338
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
339
+ ## should be together - why? why not?
340
+
341
+ ####
342
+ # Example package:
343
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
344
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
345
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
346
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
347
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
348
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
349
+
350
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
351
+
352
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
353
+ ## filter_season( clause, season_key )
354
+ ## or better filter = SeasonFilter.new( clause )
355
+ ## filter.skip? filter.include? ( season_sason_key )?
356
+ ## fiteer.before?( season_key ) etc.
357
+ ## find some good method names!!!!
358
+ season_start = start ? Season( start ) : nil
359
+
360
+ h = {}
361
+ match( format: format ).each do |entry|
362
+ ## note: assume last directory in datafile path is the season part/key
363
+ season_q = File.basename( File.dirname( entry.name ))
364
+ season = Season.parse( season_q ) ## normalize season
365
+
366
+ ## skip if start season before this season
367
+ next if season_start && season_start.start_year > season.start_year
368
+
369
+ h[ season.key ] ||= []
370
+ h[ season.key ] << entry
371
+ end
372
+
373
+ ## todo/fix: - add sort entries by name - why? why not?
374
+ ## note: assume 1-,2- etc. gets us back sorted leagues
375
+ ## - use sort. (will not sort by default?)
376
+
377
+ ## sort by season
378
+ ## latest / newest first (and oldest last)
379
+
380
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
381
+ r[0] <=> l[0]
382
+ end
383
+ end # method match_by_season
384
+ end # class Package
385
+
386
+
387
+ class DirPackage < Package
388
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
389
+ end
390
+
391
+ class ZipPackage < Package
392
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
393
+ end
394
+ end # module SportDb