sportdb-formats 1.1.6 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,374 +1,394 @@
1
-
2
- module SportDb
3
- class Package
4
-
5
- ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
- ## e.g. .TXT and .txt
7
- ## yes!! use /i option!!!!!
8
-
9
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
- \.conf\.txt$
11
- }x
12
-
13
- ## leagues.txt or leagues_en.txt
14
- ## remove support for en.leagues.txt - why? why not?
15
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
- leagues
18
- (?:_[a-z0-9_-]+)?
19
- \.txt$
20
- }x
21
-
22
- ## seasons.txt or seasons_en.txt
23
- ## remove support for br.seasons.txt - why? why not?
24
- SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
- seasons
27
- (?:_[a-z0-9_-]+)?
28
- \.txt$
29
- }x
30
-
31
-
32
- ## clubs.txt or clubs_en.txt
33
- ## remove support for en.clubs.txt - why? why not?
34
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
- clubs
37
- (?:_[a-z0-9_-]+)?
38
- \.txt$
39
- }x
40
-
41
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
43
- clubs
44
- (?:_[a-z0-9_-]+)?
45
- \.wiki\.txt$
46
- }x
47
-
48
- ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
- CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
50
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
51
- clubs
52
- (?:_[a-z0-9_-]+)?
53
- \.props\.txt$
54
- }x
55
- CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
-
57
-
58
- CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
- clubs
61
- (?:_[a-z0-9_-]+)?
62
- \.history\.txt$
63
- }x
64
-
65
- ## teams.txt or teams_history.txt
66
- TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
- teams
68
- (?:_[a-z0-9_-]+)?
69
- \.txt$
70
- }x
71
-
72
-
73
- ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
74
- ### season folder:
75
- ## e.g. /2019-20 or
76
- ## year-only e.g. /2019 or
77
- ## /2016--france
78
- SEASON_RE = %r{ (?:
79
- \d{4}-\d{2}
80
- | \d{4}(--[a-z0-9_-]+)?
81
- )
82
- }x
83
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
84
-
85
-
86
- ## note: if pattern includes directory add here
87
- ## (otherwise move to more "generic" datafile) - why? why not?
88
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
89
- #{SEASON}
90
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
91
- }x
92
-
93
- MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
94
- #{SEASON}
95
- /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
96
- }x
97
-
98
- ### add "generic" pattern to find all csv datafiles
99
- CSV_RE = %r{ (?: ^|/ )
100
- [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
- }x
102
-
103
-
104
- ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
105
-
106
- def self.find( path, pattern )
107
- datafiles = []
108
-
109
- ## check all txt files
110
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
111
- candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
112
- pp candidates
113
- candidates.each do |candidate|
114
- datafiles << candidate if pattern.match( candidate )
115
- end
116
-
117
- pp datafiles
118
- datafiles
119
- end
120
-
121
-
122
- def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
123
- def self.match_teams( path ) TEAMS_RE.match( path ); end
124
-
125
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
- def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
128
-
129
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
- def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
133
-
134
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
135
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
136
-
137
- def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
- def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
-
140
-
141
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
142
- def self.match_conf( path ) CONF_RE.match( path ); end
143
-
144
- def self.find_match( path, format: 'txt' )
145
- if format == 'csv'
146
- find( path, MATCH_CSV_RE )
147
- else ## otherwise always assume txt for now
148
- find( path, MATCH_RE )
149
- end
150
- end
151
- ## add match_match and match_match_csv - why? why not?
152
-
153
-
154
- class << self
155
- alias_method :match_teams?, :match_teams
156
- alias_method :teams?, :match_teams
157
-
158
- alias_method :match_clubs?, :match_clubs
159
- alias_method :clubs?, :match_clubs
160
-
161
- alias_method :match_clubs_wiki?, :match_clubs_wiki
162
- alias_method :clubs_wiki?, :match_clubs_wiki
163
-
164
- alias_method :match_clubs_history?, :match_clubs_history
165
- alias_method :clubs_history?, :match_clubs_history
166
-
167
- alias_method :match_club_props, :match_clubs_props
168
- alias_method :match_club_props?, :match_clubs_props
169
- alias_method :club_props?, :match_clubs_props
170
- alias_method :match_clubs_props?, :match_clubs_props
171
- alias_method :clubs_props?, :match_clubs_props
172
-
173
- alias_method :match_leagues?, :match_leagues
174
- alias_method :leagues?, :match_leagues
175
-
176
- alias_method :match_seasons?, :match_seasons
177
- alias_method :seasons?, :match_seasons
178
-
179
- alias_method :match_conf?, :match_conf
180
- alias_method :conf?, :match_conf
181
- end
182
-
183
-
184
- ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
185
- attr_accessor :include, :exclude
186
-
187
- ## private helpers - like select returns true for keeping and false for skipping entry
188
- def filter_clause( filter, entry )
189
- if filter.is_a?( String )
190
- entry.name.index( filter ) ? true : false
191
- elsif filter.is_a?( Regexp )
192
- filter.match( entry.name ) ? true : false
193
- else ## assume
194
- ## todo/check: pass in entry (and NOT entry.name) - why? why not?
195
- filter.call( entry )
196
- end
197
- end
198
-
199
- def filter( entry )
200
- if @include
201
- if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
202
- true ## todo/check: check for exclude here too - why? why not?
203
- else
204
- false
205
- end
206
- else
207
- if @exclude && filter_clause( @exclude, entry )
208
- false
209
- else
210
- true
211
- end
212
- end
213
- end
214
-
215
-
216
- def initialize( path_or_pack )
217
- @include = nil
218
- @exclude = nil
219
-
220
- if path_or_pack.is_a?( Datafile::Package )
221
- @pack = path_or_pack
222
- else ## assume it's a (string) path
223
- path = path_or_pack
224
- if !File.exist?( path ) ## file or directory
225
- puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
226
- exit 1
227
- end
228
-
229
- if File.directory?( path )
230
- @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
231
- elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
232
- @pack = Datafile::ZipPackage.new( path )
233
- else
234
- puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
235
- exit 1
236
- end
237
- end
238
- end
239
-
240
-
241
- def each( pattern:, &blk )
242
- @pack.each( pattern: pattern ) do |entry|
243
- next unless filter( entry ) ## lets you use include/exclude filters
244
- blk.call( entry )
245
- end
246
- end
247
-
248
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
249
- def each_match( format: 'txt', &blk )
250
- if format == 'csv'
251
- each( pattern: MATCH_CSV_RE, &blk );
252
- else
253
- each( pattern: MATCH_RE, &blk );
254
- end
255
- end
256
- def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
- def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
-
259
- def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
260
-
261
- def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
262
- def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
263
- def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
- def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
-
266
- def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
-
268
-
269
- ## return all match datafile entries
270
- def match( format: 'txt' )
271
- ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
272
- end
273
- alias_method :matches, :match
274
-
275
-
276
- ## todo/check: rename/change to match_by_dir - why? why not?
277
- ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
278
- def match_by_season_dir( format: 'txt' )
279
- ##
280
- ## [["1950s/1956-57",
281
- ## ["1950s/1956-57/1-division1.csv",
282
- ## "1950s/1956-57/2-division2.csv",
283
- ## "1950s/1956-57/3a-division3n.csv",
284
- ## "1950s/1956-57/3b-division3s.csv"]],
285
- ## ...]
286
-
287
- h = {}
288
- match( format: format ).each do |entry|
289
- season_path = File.dirname( entry.name )
290
-
291
- h[ season_path ] ||= []
292
- h[ season_path ] << entry
293
- end
294
-
295
- ## todo/fix: - add sort entries by name - why? why not?
296
- ## note: assume 1-,2- etc. gets us back sorted leagues
297
- ## - use sort. (will not sort by default?)
298
-
299
- h.to_a ## return as array (or keep hash) - why? why not?
300
- end # method match_by_season_dir
301
-
302
- def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
303
-
304
- ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
305
-
306
- ## note: fold all sames seasons (even if in different directories)
307
- ## into same datafile list e.g.
308
- ## ["1957/58",
309
- ## ["1950s/1957-58/1-division1.csv",
310
- ## "1950s/1957-58/2-division2.csv",
311
- ## "1950s/1957-58/3a-division3n.csv",
312
- ## "1950s/1957-58/3b-division3s.csv"]],
313
- ## and
314
- ## ["1957/58",
315
- ## ["archives/1950s/1957-58/1-division1.csv",
316
- ## "archives/1950s/1957-58/2-division2.csv",
317
- ## "archives/1950s/1957-58/3a-division3n.csv",
318
- ## "archives/1950s/1957-58/3b-division3s.csv"]],
319
- ## should be together - why? why not?
320
-
321
- ####
322
- # Example package:
323
- # [["2012/13", ["2012-13/1-proleague.csv"]],
324
- # ["2013/14", ["2013-14/1-proleague.csv"]],
325
- # ["2014/15", ["2014-15/1-proleague.csv"]],
326
- # ["2015/16", ["2015-16/1-proleague.csv"]],
327
- # ["2016/17", ["2016-17/1-proleague.csv"]],
328
- # ["2017/18", ["2017-18/1-proleague.csv"]]]
329
-
330
- ## todo/fix: (re)use a more generic filter instead of start for start of season only
331
-
332
- ## todo/fix: use a "generic" filter_season helper for easy reuse
333
- ## filter_season( clause, season_key )
334
- ## or better filter = SeasonFilter.new( clause )
335
- ## filter.skip? filter.include? ( season_sason_key )?
336
- ## fiteer.before?( season_key ) etc.
337
- ## find some good method names!!!!
338
- season_start = start ? Season( start ) : nil
339
-
340
- h = {}
341
- match( format: format ).each do |entry|
342
- ## note: assume last directory in datafile path is the season part/key
343
- season_q = File.basename( File.dirname( entry.name ))
344
- season = Season.parse( season_q ) ## normalize season
345
-
346
- ## skip if start season before this season
347
- next if season_start && season_start.start_year > season.start_year
348
-
349
- h[ season.key ] ||= []
350
- h[ season.key ] << entry
351
- end
352
-
353
- ## todo/fix: - add sort entries by name - why? why not?
354
- ## note: assume 1-,2- etc. gets us back sorted leagues
355
- ## - use sort. (will not sort by default?)
356
-
357
- ## sort by season
358
- ## latest / newest first (and oldest last)
359
-
360
- h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
361
- r[0] <=> l[0]
362
- end
363
- end # method match_by_season
364
- end # class Package
365
-
366
-
367
- class DirPackage < Package
368
- def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
369
- end
370
-
371
- class ZipPackage < Package
372
- def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
373
- end
374
- end # module SportDb
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
+ ## e.g. .TXT and .txt
7
+ ## yes!! use /i option!!!!!
8
+
9
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ \.conf\.txt$
11
+ }x
12
+
13
+ ## leagues.txt or leagues_en.txt
14
+ ## remove support for en.leagues.txt - why? why not?
15
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
+ leagues
18
+ (?:_[a-z0-9_-]+)?
19
+ \.txt$
20
+ }x
21
+
22
+ ## seasons.txt or seasons_en.txt
23
+ ## remove support for br.seasons.txt - why? why not?
24
+ SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
+ seasons
27
+ (?:_[a-z0-9_-]+)?
28
+ \.txt$
29
+ }x
30
+
31
+
32
+ ####
33
+ # de.stadiums.txt or stadiums.txt or stadiums_de.txt
34
+ GROUNDS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
+ stadiums
37
+ (?:_[a-z0-9_-]+)?
38
+ \.txt$
39
+ }x
40
+
41
+ PLAYERS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
42
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
43
+ players
44
+ (?:_[a-z0-9_-]+)?
45
+ \.txt$
46
+ }x
47
+
48
+
49
+ ## clubs.txt or clubs_en.txt
50
+ ## remove support for en.clubs.txt - why? why not?
51
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
52
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
53
+ clubs
54
+ (?:_[a-z0-9_-]+)?
55
+ \.txt$
56
+ }x
57
+
58
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
59
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.wiki\.txt$
63
+ }x
64
+
65
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
66
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
68
+ clubs
69
+ (?:_[a-z0-9_-]+)?
70
+ \.props\.txt$
71
+ }x
72
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
73
+
74
+
75
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
76
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
77
+ clubs
78
+ (?:_[a-z0-9_-]+)?
79
+ \.history\.txt$
80
+ }x
81
+
82
+ ## teams.txt or teams_history.txt
83
+ TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
84
+ teams
85
+ (?:_[a-z0-9_-]+)?
86
+ \.txt$
87
+ }x
88
+
89
+
90
+ ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
91
+ ### season folder:
92
+ ## e.g. /2019-20 or
93
+ ## year-only e.g. /2019 or
94
+ ## /2016--france
95
+ SEASON_RE = %r{ (?:
96
+ \d{4}-\d{2}
97
+ | \d{4}(--[a-z0-9_-]+)?
98
+ )
99
+ }x
100
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
101
+
102
+
103
+ ## note: if pattern includes directory add here
104
+ ## (otherwise move to more "generic" datafile) - why? why not?
105
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
106
+ #{SEASON}
107
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
108
+ }x
109
+
110
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
111
+ #{SEASON}
112
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
113
+ }x
114
+
115
+ ### add "generic" pattern to find all csv datafiles
116
+ CSV_RE = %r{ (?: ^|/ )
117
+ [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
118
+ }x
119
+
120
+
121
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
122
+
123
+ def self.find( path, pattern )
124
+ datafiles = []
125
+
126
+ ## check all txt files
127
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
128
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
129
+ pp candidates
130
+ candidates.each do |candidate|
131
+ datafiles << candidate if pattern.match( candidate )
132
+ end
133
+
134
+ pp datafiles
135
+ datafiles
136
+ end
137
+
138
+
139
+ def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
140
+ def self.match_teams( path ) TEAMS_RE.match( path ); end
141
+
142
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
143
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
144
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
145
+
146
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
147
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
148
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
149
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
150
+
151
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
152
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
153
+
154
+ def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
155
+ def self.match_seasons( path ) SEASONS_RE.match( path ); end
156
+
157
+
158
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
159
+ def self.match_conf( path ) CONF_RE.match( path ); end
160
+
161
+ def self.find_match( path, format: 'txt' )
162
+ if format == 'csv'
163
+ find( path, MATCH_CSV_RE )
164
+ else ## otherwise always assume txt for now
165
+ find( path, MATCH_RE )
166
+ end
167
+ end
168
+ ## add match_match and match_match_csv - why? why not?
169
+
170
+
171
+ class << self
172
+ alias_method :match_teams?, :match_teams
173
+ alias_method :teams?, :match_teams
174
+
175
+ alias_method :match_clubs?, :match_clubs
176
+ alias_method :clubs?, :match_clubs
177
+
178
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
179
+ alias_method :clubs_wiki?, :match_clubs_wiki
180
+
181
+ alias_method :match_clubs_history?, :match_clubs_history
182
+ alias_method :clubs_history?, :match_clubs_history
183
+
184
+ alias_method :match_club_props, :match_clubs_props
185
+ alias_method :match_club_props?, :match_clubs_props
186
+ alias_method :club_props?, :match_clubs_props
187
+ alias_method :match_clubs_props?, :match_clubs_props
188
+ alias_method :clubs_props?, :match_clubs_props
189
+
190
+ alias_method :match_leagues?, :match_leagues
191
+ alias_method :leagues?, :match_leagues
192
+
193
+ alias_method :match_seasons?, :match_seasons
194
+ alias_method :seasons?, :match_seasons
195
+
196
+ alias_method :match_conf?, :match_conf
197
+ alias_method :conf?, :match_conf
198
+ end
199
+
200
+
201
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
202
+ attr_accessor :include, :exclude
203
+
204
+ ## private helpers - like select returns true for keeping and false for skipping entry
205
+ def filter_clause( filter, entry )
206
+ if filter.is_a?( String )
207
+ entry.name.index( filter ) ? true : false
208
+ elsif filter.is_a?( Regexp )
209
+ filter.match( entry.name ) ? true : false
210
+ else ## assume
211
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
212
+ filter.call( entry )
213
+ end
214
+ end
215
+
216
+ def filter( entry )
217
+ if @include
218
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
219
+ true ## todo/check: check for exclude here too - why? why not?
220
+ else
221
+ false
222
+ end
223
+ else
224
+ if @exclude && filter_clause( @exclude, entry )
225
+ false
226
+ else
227
+ true
228
+ end
229
+ end
230
+ end
231
+
232
+
233
+ def initialize( path_or_pack )
234
+ @include = nil
235
+ @exclude = nil
236
+
237
+ if path_or_pack.is_a?( Datafile::Package )
238
+ @pack = path_or_pack
239
+ else ## assume it's a (string) path
240
+ path = path_or_pack
241
+ if !File.exist?( path ) ## file or directory
242
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
243
+ exit 1
244
+ end
245
+
246
+ if File.directory?( path )
247
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
248
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
249
+ @pack = Datafile::ZipPackage.new( path )
250
+ else
251
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
252
+ exit 1
253
+ end
254
+ end
255
+ end
256
+
257
+
258
+ def each( pattern:, &blk )
259
+ @pack.each( pattern: pattern ) do |entry|
260
+ next unless filter( entry ) ## lets you use include/exclude filters
261
+ blk.call( entry )
262
+ end
263
+ end
264
+
265
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
266
+ def each_match( format: 'txt', &blk )
267
+ if format == 'csv'
268
+ each( pattern: MATCH_CSV_RE, &blk );
269
+ else
270
+ each( pattern: MATCH_RE, &blk );
271
+ end
272
+ end
273
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
274
+ def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
275
+
276
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
277
+
278
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
279
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
280
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
281
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
282
+
283
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
284
+
285
+
286
+ def each_grounds( &blk ) each( pattern: GROUNDS_RE, &blk ); end
287
+ def each_players( &blk ) each( pattern: PLAYERS_RE, &blk ); end
288
+
289
+ ## return all match datafile entries
290
+ def match( format: 'txt' )
291
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
292
+ end
293
+ alias_method :matches, :match
294
+
295
+
296
+ ## todo/check: rename/change to match_by_dir - why? why not?
297
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
298
+ def match_by_season_dir( format: 'txt' )
299
+ ##
300
+ ## [["1950s/1956-57",
301
+ ## ["1950s/1956-57/1-division1.csv",
302
+ ## "1950s/1956-57/2-division2.csv",
303
+ ## "1950s/1956-57/3a-division3n.csv",
304
+ ## "1950s/1956-57/3b-division3s.csv"]],
305
+ ## ...]
306
+
307
+ h = {}
308
+ match( format: format ).each do |entry|
309
+ season_path = File.dirname( entry.name )
310
+
311
+ h[ season_path ] ||= []
312
+ h[ season_path ] << entry
313
+ end
314
+
315
+ ## todo/fix: - add sort entries by name - why? why not?
316
+ ## note: assume 1-,2- etc. gets us back sorted leagues
317
+ ## - use sort. (will not sort by default?)
318
+
319
+ h.to_a ## return as array (or keep hash) - why? why not?
320
+ end # method match_by_season_dir
321
+
322
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
323
+
324
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
325
+
326
+ ## note: fold all sames seasons (even if in different directories)
327
+ ## into same datafile list e.g.
328
+ ## ["1957/58",
329
+ ## ["1950s/1957-58/1-division1.csv",
330
+ ## "1950s/1957-58/2-division2.csv",
331
+ ## "1950s/1957-58/3a-division3n.csv",
332
+ ## "1950s/1957-58/3b-division3s.csv"]],
333
+ ## and
334
+ ## ["1957/58",
335
+ ## ["archives/1950s/1957-58/1-division1.csv",
336
+ ## "archives/1950s/1957-58/2-division2.csv",
337
+ ## "archives/1950s/1957-58/3a-division3n.csv",
338
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
339
+ ## should be together - why? why not?
340
+
341
+ ####
342
+ # Example package:
343
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
344
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
345
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
346
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
347
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
348
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
349
+
350
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
351
+
352
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
353
+ ## filter_season( clause, season_key )
354
+ ## or better filter = SeasonFilter.new( clause )
355
+ ## filter.skip? filter.include? ( season_sason_key )?
356
+ ## fiteer.before?( season_key ) etc.
357
+ ## find some good method names!!!!
358
+ season_start = start ? Season( start ) : nil
359
+
360
+ h = {}
361
+ match( format: format ).each do |entry|
362
+ ## note: assume last directory in datafile path is the season part/key
363
+ season_q = File.basename( File.dirname( entry.name ))
364
+ season = Season.parse( season_q ) ## normalize season
365
+
366
+ ## skip if start season before this season
367
+ next if season_start && season_start.start_year > season.start_year
368
+
369
+ h[ season.key ] ||= []
370
+ h[ season.key ] << entry
371
+ end
372
+
373
+ ## todo/fix: - add sort entries by name - why? why not?
374
+ ## note: assume 1-,2- etc. gets us back sorted leagues
375
+ ## - use sort. (will not sort by default?)
376
+
377
+ ## sort by season
378
+ ## latest / newest first (and oldest last)
379
+
380
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
381
+ r[0] <=> l[0]
382
+ end
383
+ end # method match_by_season
384
+ end # class Package
385
+
386
+
387
+ class DirPackage < Package
388
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
389
+ end
390
+
391
+ class ZipPackage < Package
392
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
393
+ end
394
+ end # module SportDb