sportdb-formats 1.1.5 → 1.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,202 +1,202 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
-
5
-
6
- class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
-
8
- def self.parse( lines, start: )
9
- ## todo/fix: add support for txt and lines
10
- ## check if lines_or_txt is an array or just a string
11
- parser = new( lines, start )
12
- parser.parse
13
- end
14
-
15
-
16
- include Logging ## e.g. logger#debug, logger#info, etc.
17
- include ParserHelper ## e.g. read_lines, etc.
18
-
19
-
20
- def initialize( lines, start )
21
- # for convenience split string into lines
22
- ## note: removes/strips empty lines
23
- ## todo/check: change to text instead of array of lines - why? why not?
24
- @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
- @start = start
26
- end
27
-
28
- def parse
29
- ## try to find all teams in match schedule
30
- @last_round = nil
31
- @last_group = nil
32
-
33
- ## definitions/defs
34
- @round_defs = Hash.new(0)
35
- @group_defs = Hash.new(0)
36
-
37
- ## usage/refs
38
- @rounds = {} ## track usage counter and match (two teams) counter
39
- @groups = {} ## -"-
40
- @teams = Hash.new(0) ## keep track of usage counter
41
-
42
- @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
-
44
-
45
- @lines.each do |line|
46
- if is_goals?( line )
47
- logger.debug "skipping matched goals line: >#{line}<"
48
- elsif is_round_def?( line )
49
- ## todo/fix: add round definition (w begin n end date)
50
- ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
- ## -- how to deal with matches that get rescheduled/postponed?
52
- logger.debug "skipping matched round def line: >#{line}<"
53
- @round_defs[ line ] += 1
54
- elsif is_round?( line )
55
- logger.debug "skipping matched round line: >#{line}<"
56
-
57
- round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
- round[:count] +=1
59
- @last_round = round
60
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
- ### todo: add pipe (|) marker (required)
62
- logger.debug "skipping matched group def line: >#{line}<"
63
- @group_defs[ line ] += 1
64
- elsif is_group?( line )
65
- ## -- lets you set group e.g. Group A etc.
66
- logger.debug "skipping matched group line: >#{line}<"
67
-
68
- group = @groups[ line ] ||= {count: 0, match_count: 0}
69
- group[:count] +=1
70
- @last_group = group
71
- ## todo/fix: parse group line!!!
72
- elsif try_parse_game( line )
73
- # do nothing here
74
- else
75
- logger.warn "skipping line (no match found): >#{line}<"
76
- @warns << line
77
- end
78
- end # lines.each
79
-
80
- [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
- end
82
-
83
-
84
- def try_parse_game( line )
85
- # note: clone line; for possible test do NOT modify in place for now
86
- # note: returns true if parsed, false if no match
87
- parse_game( line.dup )
88
- end
89
-
90
- def parse_game( line )
91
- logger.debug "parsing game (fixture) line: >#{line}<"
92
-
93
- ## remove all protected text runs e.g. []
94
- ## fix: add [ to end-of-line too
95
- ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
-
97
- line = line.gsub( /\[
98
- [^\]]+?
99
- \]/x, '' ).strip
100
- return true if line.empty? ## note: return true (for valid line with no match/teams)
101
-
102
-
103
- ## split by geo (@) - remove for now
104
- values = line.split( '@' )
105
- line = values[0]
106
-
107
-
108
- ## try find date
109
- date = find_date!( line, start: @start )
110
- if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
- line = line.sub( /\[
112
- [^\]]+?
113
- \]/x, '' ).strip
114
-
115
- else
116
- ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
- ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
- line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
- [012]?[0-9]
120
- [.:hH]
121
- [0-9][0-9]
122
- (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
- }x, '' ).strip
124
- end
125
-
126
- return true if line.empty? ## note: return true (for valid line with no match/teams)
127
-
128
-
129
- score = find_score!( line )
130
-
131
- logger.debug " line: >#{line}<"
132
-
133
- line = line.sub( /\[
134
- [^\]]+?
135
- \]/x, '$$' ) # note: replace first score tag with $$
136
- line = line.gsub( /\[
137
- [^\]]+?
138
- \]/x, '' ) # note: replace/remove all other score tags with nothing
139
-
140
- ## clean-up remove all text run inside () or empty () too
141
- line = line.gsub( /\(
142
- [^)]*?
143
- \)/x, '' )
144
-
145
-
146
- ## check for more match separators e.g. - or vs for now
147
- line = line.sub( / \s+
148
- ( -
149
- | v
150
- | vs\.? # note: allow optional dot eg. vs.
151
- )
152
- \s+
153
- /ix, '$$' )
154
-
155
- values = line.split( '$$' )
156
- values = values.map { |value| value.strip } ## strip spaces
157
- values = values.select { |value| !value.empty? } ## remove empty strings
158
-
159
- return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
-
161
- if values.size == 1
162
- puts "(auto config) try matching teams separated by spaces (2+):"
163
- pp values
164
-
165
- values = values[0].split( /[ ]{2,}/ )
166
- pp values
167
- end
168
-
169
- return false if values.size != 2
170
-
171
- puts "(auto config) try matching teams:"
172
- pp values
173
-
174
- @teams[ values[0] ] += 1 ## update usage counters
175
- @teams[ values[1] ] += 1
176
-
177
- @last_round[ :match_count ] += 1 if @last_round
178
- @last_group[ :match_count ] += 1 if @last_group
179
-
180
- true
181
- end
182
-
183
-
184
-
185
- def find_score!( line )
186
- # note: always call after find_dates !!!
187
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
- # -- note: score might have two digits too
189
- ScoreFormats.find!( line )
190
- end
191
-
192
- def find_date!( line, start: )
193
- ## NB: lets us pass in start_at/end_at date (for event)
194
- # for auto-complete year
195
-
196
- # extract date from line
197
- # and return it
198
- # NB: side effect - removes date from line string
199
- DateFormats.find!( line, start: start )
200
- end
201
- end # class AutoConfParser
202
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+
6
+ class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
+
8
+ def self.parse( lines, start: )
9
+ ## todo/fix: add support for txt and lines
10
+ ## check if lines_or_txt is an array or just a string
11
+ parser = new( lines, start )
12
+ parser.parse
13
+ end
14
+
15
+
16
+ include Logging ## e.g. logger#debug, logger#info, etc.
17
+ include ParserHelper ## e.g. read_lines, etc.
18
+
19
+
20
+ def initialize( lines, start )
21
+ # for convenience split string into lines
22
+ ## note: removes/strips empty lines
23
+ ## todo/check: change to text instead of array of lines - why? why not?
24
+ @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
+ @start = start
26
+ end
27
+
28
+ def parse
29
+ ## try to find all teams in match schedule
30
+ @last_round = nil
31
+ @last_group = nil
32
+
33
+ ## definitions/defs
34
+ @round_defs = Hash.new(0)
35
+ @group_defs = Hash.new(0)
36
+
37
+ ## usage/refs
38
+ @rounds = {} ## track usage counter and match (two teams) counter
39
+ @groups = {} ## -"-
40
+ @teams = Hash.new(0) ## keep track of usage counter
41
+
42
+ @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
+
44
+
45
+ @lines.each do |line|
46
+ if is_goals?( line )
47
+ logger.debug "skipping matched goals line: >#{line}<"
48
+ elsif is_round_def?( line )
49
+ ## todo/fix: add round definition (w begin n end date)
50
+ ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
+ ## -- how to deal with matches that get rescheduled/postponed?
52
+ logger.debug "skipping matched round def line: >#{line}<"
53
+ @round_defs[ line ] += 1
54
+ elsif is_round?( line )
55
+ logger.debug "skipping matched round line: >#{line}<"
56
+
57
+ round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
+ round[:count] +=1
59
+ @last_round = round
60
+ elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
+ ### todo: add pipe (|) marker (required)
62
+ logger.debug "skipping matched group def line: >#{line}<"
63
+ @group_defs[ line ] += 1
64
+ elsif is_group?( line )
65
+ ## -- lets you set group e.g. Group A etc.
66
+ logger.debug "skipping matched group line: >#{line}<"
67
+
68
+ group = @groups[ line ] ||= {count: 0, match_count: 0}
69
+ group[:count] +=1
70
+ @last_group = group
71
+ ## todo/fix: parse group line!!!
72
+ elsif try_parse_game( line )
73
+ # do nothing here
74
+ else
75
+ logger.warn "skipping line (no match found): >#{line}<"
76
+ @warns << line
77
+ end
78
+ end # lines.each
79
+
80
+ [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
+ end
82
+
83
+
84
+ def try_parse_game( line )
85
+ # note: clone line; for possible test do NOT modify in place for now
86
+ # note: returns true if parsed, false if no match
87
+ parse_game( line.dup )
88
+ end
89
+
90
+ def parse_game( line )
91
+ logger.debug "parsing game (fixture) line: >#{line}<"
92
+
93
+ ## remove all protected text runs e.g. []
94
+ ## fix: add [ to end-of-line too
95
+ ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
+
97
+ line = line.gsub( /\[
98
+ [^\]]+?
99
+ \]/x, '' ).strip
100
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
101
+
102
+
103
+ ## split by geo (@) - remove for now
104
+ values = line.split( '@' )
105
+ line = values[0]
106
+
107
+
108
+ ## try find date
109
+ date = find_date!( line, start: @start )
110
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
+ line = line.sub( /\[
112
+ [^\]]+?
113
+ \]/x, '' ).strip
114
+
115
+ else
116
+ ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
+ ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
+ line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
+ [012]?[0-9]
120
+ [.:hH]
121
+ [0-9][0-9]
122
+ (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
+ }x, '' ).strip
124
+ end
125
+
126
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
127
+
128
+
129
+ score = find_score!( line )
130
+
131
+ logger.debug " line: >#{line}<"
132
+
133
+ line = line.sub( /\[
134
+ [^\]]+?
135
+ \]/x, '$$' ) # note: replace first score tag with $$
136
+ line = line.gsub( /\[
137
+ [^\]]+?
138
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
139
+
140
+ ## clean-up remove all text run inside () or empty () too
141
+ line = line.gsub( /\(
142
+ [^)]*?
143
+ \)/x, '' )
144
+
145
+
146
+ ## check for more match separators e.g. - or vs for now
147
+ line = line.sub( / \s+
148
+ ( -
149
+ | v
150
+ | vs\.? # note: allow optional dot eg. vs.
151
+ )
152
+ \s+
153
+ /ix, '$$' )
154
+
155
+ values = line.split( '$$' )
156
+ values = values.map { |value| value.strip } ## strip spaces
157
+ values = values.select { |value| !value.empty? } ## remove empty strings
158
+
159
+ return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
+
161
+ if values.size == 1
162
+ puts "(auto config) try matching teams separated by spaces (2+):"
163
+ pp values
164
+
165
+ values = values[0].split( /[ ]{2,}/ )
166
+ pp values
167
+ end
168
+
169
+ return false if values.size != 2
170
+
171
+ puts "(auto config) try matching teams:"
172
+ pp values
173
+
174
+ @teams[ values[0] ] += 1 ## update usage counters
175
+ @teams[ values[1] ] += 1
176
+
177
+ @last_round[ :match_count ] += 1 if @last_round
178
+ @last_group[ :match_count ] += 1 if @last_group
179
+
180
+ true
181
+ end
182
+
183
+
184
+
185
+ def find_score!( line )
186
+ # note: always call after find_dates !!!
187
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
+ # -- note: score might have two digits too
189
+ ScoreFormats.find!( line )
190
+ end
191
+
192
+ def find_date!( line, start: )
193
+ ## NB: lets us pass in start_at/end_at date (for event)
194
+ # for auto-complete year
195
+
196
+ # extract date from line
197
+ # and return it
198
+ # NB: side effect - removes date from line string
199
+ DateFormats.find!( line, start: start )
200
+ end
201
+ end # class AutoConfParser
202
+ end # module SportDb
@@ -1,374 +1,374 @@
1
-
2
- module SportDb
3
- class Package
4
-
5
- ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
- ## e.g. .TXT and .txt
7
- ## yes!! use /i option!!!!!
8
-
9
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
- \.conf\.txt$
11
- }x
12
-
13
- ## leagues.txt or leagues_en.txt
14
- ## remove support for en.leagues.txt - why? why not?
15
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
- leagues
18
- (?:_[a-z0-9_-]+)?
19
- \.txt$
20
- }x
21
-
22
- ## seasons.txt or seasons_en.txt
23
- ## remove support for br.seasons.txt - why? why not?
24
- SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
- seasons
27
- (?:_[a-z0-9_-]+)?
28
- \.txt$
29
- }x
30
-
31
-
32
- ## clubs.txt or clubs_en.txt
33
- ## remove support for en.clubs.txt - why? why not?
34
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
- clubs
37
- (?:_[a-z0-9_-]+)?
38
- \.txt$
39
- }x
40
-
41
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
43
- clubs
44
- (?:_[a-z0-9_-]+)?
45
- \.wiki\.txt$
46
- }x
47
-
48
- ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
- CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
50
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
51
- clubs
52
- (?:_[a-z0-9_-]+)?
53
- \.props\.txt$
54
- }x
55
- CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
-
57
-
58
- CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
- clubs
61
- (?:_[a-z0-9_-]+)?
62
- \.history\.txt$
63
- }x
64
-
65
- ## teams.txt or teams_history.txt
66
- TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
- teams
68
- (?:_[a-z0-9_-]+)?
69
- \.txt$
70
- }x
71
-
72
-
73
- ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
74
- ### season folder:
75
- ## e.g. /2019-20 or
76
- ## year-only e.g. /2019 or
77
- ## /2016--france
78
- SEASON_RE = %r{ (?:
79
- \d{4}-\d{2}
80
- | \d{4}(--[a-z0-9_-]+)?
81
- )
82
- }x
83
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
84
-
85
-
86
- ## note: if pattern includes directory add here
87
- ## (otherwise move to more "generic" datafile) - why? why not?
88
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
89
- #{SEASON}
90
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
91
- }x
92
-
93
- MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
94
- #{SEASON}
95
- /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
96
- }x
97
-
98
- ### add "generic" pattern to find all csv datafiles
99
- CSV_RE = %r{ (?: ^|/ )
100
- [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
- }x
102
-
103
-
104
- ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
105
-
106
- def self.find( path, pattern )
107
- datafiles = []
108
-
109
- ## check all txt files
110
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
111
- candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
112
- pp candidates
113
- candidates.each do |candidate|
114
- datafiles << candidate if pattern.match( candidate )
115
- end
116
-
117
- pp datafiles
118
- datafiles
119
- end
120
-
121
-
122
- def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
123
- def self.match_teams( path ) TEAMS_RE.match( path ); end
124
-
125
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
- def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
128
-
129
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
- def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
133
-
134
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
135
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
136
-
137
- def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
- def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
-
140
-
141
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
142
- def self.match_conf( path ) CONF_RE.match( path ); end
143
-
144
- def self.find_match( path, format: 'txt' )
145
- if format == 'csv'
146
- find( path, MATCH_CSV_RE )
147
- else ## otherwise always assume txt for now
148
- find( path, MATCH_RE )
149
- end
150
- end
151
- ## add match_match and match_match_csv - why? why not?
152
-
153
-
154
- class << self
155
- alias_method :match_teams?, :match_teams
156
- alias_method :teams?, :match_teams
157
-
158
- alias_method :match_clubs?, :match_clubs
159
- alias_method :clubs?, :match_clubs
160
-
161
- alias_method :match_clubs_wiki?, :match_clubs_wiki
162
- alias_method :clubs_wiki?, :match_clubs_wiki
163
-
164
- alias_method :match_clubs_history?, :match_clubs_history
165
- alias_method :clubs_history?, :match_clubs_history
166
-
167
- alias_method :match_club_props, :match_clubs_props
168
- alias_method :match_club_props?, :match_clubs_props
169
- alias_method :club_props?, :match_clubs_props
170
- alias_method :match_clubs_props?, :match_clubs_props
171
- alias_method :clubs_props?, :match_clubs_props
172
-
173
- alias_method :match_leagues?, :match_leagues
174
- alias_method :leagues?, :match_leagues
175
-
176
- alias_method :match_seasons?, :match_seasons
177
- alias_method :seasons?, :match_seasons
178
-
179
- alias_method :match_conf?, :match_conf
180
- alias_method :conf?, :match_conf
181
- end
182
-
183
-
184
- ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
185
- attr_accessor :include, :exclude
186
-
187
- ## private helpers - like select returns true for keeping and false for skipping entry
188
- def filter_clause( filter, entry )
189
- if filter.is_a?( String )
190
- entry.name.index( filter ) ? true : false
191
- elsif filter.is_a?( Regexp )
192
- filter.match( entry.name ) ? true : false
193
- else ## assume
194
- ## todo/check: pass in entry (and NOT entry.name) - why? why not?
195
- filter.call( entry )
196
- end
197
- end
198
-
199
- def filter( entry )
200
- if @include
201
- if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
202
- true ## todo/check: check for exclude here too - why? why not?
203
- else
204
- false
205
- end
206
- else
207
- if @exclude && filter_clause( @exclude, entry )
208
- false
209
- else
210
- true
211
- end
212
- end
213
- end
214
-
215
-
216
- def initialize( path_or_pack )
217
- @include = nil
218
- @exclude = nil
219
-
220
- if path_or_pack.is_a?( Datafile::Package )
221
- @pack = path_or_pack
222
- else ## assume it's a (string) path
223
- path = path_or_pack
224
- if !File.exist?( path ) ## file or directory
225
- puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
226
- exit 1
227
- end
228
-
229
- if File.directory?( path )
230
- @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
231
- elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
232
- @pack = Datafile::ZipPackage.new( path )
233
- else
234
- puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
235
- exit 1
236
- end
237
- end
238
- end
239
-
240
-
241
- def each( pattern:, &blk )
242
- @pack.each( pattern: pattern ) do |entry|
243
- next unless filter( entry ) ## lets you use include/exclude filters
244
- blk.call( entry )
245
- end
246
- end
247
-
248
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
249
- def each_match( format: 'txt', &blk )
250
- if format == 'csv'
251
- each( pattern: MATCH_CSV_RE, &blk );
252
- else
253
- each( pattern: MATCH_RE, &blk );
254
- end
255
- end
256
- def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
- def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
-
259
- def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
260
-
261
- def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
262
- def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
263
- def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
- def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
-
266
- def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
-
268
-
269
- ## return all match datafile entries
270
- def match( format: 'txt' )
271
- ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
272
- end
273
- alias_method :matches, :match
274
-
275
-
276
- ## todo/check: rename/change to match_by_dir - why? why not?
277
- ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
278
- def match_by_season_dir( format: 'txt' )
279
- ##
280
- ## [["1950s/1956-57",
281
- ## ["1950s/1956-57/1-division1.csv",
282
- ## "1950s/1956-57/2-division2.csv",
283
- ## "1950s/1956-57/3a-division3n.csv",
284
- ## "1950s/1956-57/3b-division3s.csv"]],
285
- ## ...]
286
-
287
- h = {}
288
- match( format: format ).each do |entry|
289
- season_path = File.dirname( entry.name )
290
-
291
- h[ season_path ] ||= []
292
- h[ season_path ] << entry
293
- end
294
-
295
- ## todo/fix: - add sort entries by name - why? why not?
296
- ## note: assume 1-,2- etc. gets us back sorted leagues
297
- ## - use sort. (will not sort by default?)
298
-
299
- h.to_a ## return as array (or keep hash) - why? why not?
300
- end # method match_by_season_dir
301
-
302
- def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
303
-
304
- ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
305
-
306
- ## note: fold all sames seasons (even if in different directories)
307
- ## into same datafile list e.g.
308
- ## ["1957/58",
309
- ## ["1950s/1957-58/1-division1.csv",
310
- ## "1950s/1957-58/2-division2.csv",
311
- ## "1950s/1957-58/3a-division3n.csv",
312
- ## "1950s/1957-58/3b-division3s.csv"]],
313
- ## and
314
- ## ["1957/58",
315
- ## ["archives/1950s/1957-58/1-division1.csv",
316
- ## "archives/1950s/1957-58/2-division2.csv",
317
- ## "archives/1950s/1957-58/3a-division3n.csv",
318
- ## "archives/1950s/1957-58/3b-division3s.csv"]],
319
- ## should be together - why? why not?
320
-
321
- ####
322
- # Example package:
323
- # [["2012/13", ["2012-13/1-proleague.csv"]],
324
- # ["2013/14", ["2013-14/1-proleague.csv"]],
325
- # ["2014/15", ["2014-15/1-proleague.csv"]],
326
- # ["2015/16", ["2015-16/1-proleague.csv"]],
327
- # ["2016/17", ["2016-17/1-proleague.csv"]],
328
- # ["2017/18", ["2017-18/1-proleague.csv"]]]
329
-
330
- ## todo/fix: (re)use a more generic filter instead of start for start of season only
331
-
332
- ## todo/fix: use a "generic" filter_season helper for easy reuse
333
- ## filter_season( clause, season_key )
334
- ## or better filter = SeasonFilter.new( clause )
335
- ## filter.skip? filter.include? ( season_sason_key )?
336
- ## fiteer.before?( season_key ) etc.
337
- ## find some good method names!!!!
338
- season_start = start ? Season( start ) : nil
339
-
340
- h = {}
341
- match( format: format ).each do |entry|
342
- ## note: assume last directory in datafile path is the season part/key
343
- season_q = File.basename( File.dirname( entry.name ))
344
- season = Season.parse( season_q ) ## normalize season
345
-
346
- ## skip if start season before this season
347
- next if season_start && season_start.start_year > season.start_year
348
-
349
- h[ season.key ] ||= []
350
- h[ season.key ] << entry
351
- end
352
-
353
- ## todo/fix: - add sort entries by name - why? why not?
354
- ## note: assume 1-,2- etc. gets us back sorted leagues
355
- ## - use sort. (will not sort by default?)
356
-
357
- ## sort by season
358
- ## latest / newest first (and oldest last)
359
-
360
- h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
361
- r[0] <=> l[0]
362
- end
363
- end # method match_by_season
364
- end # class Package
365
-
366
-
367
- class DirPackage < Package
368
- def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
369
- end
370
-
371
- class ZipPackage < Package
372
- def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
373
- end
374
- end # module SportDb
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
+ ## e.g. .TXT and .txt
7
+ ## yes!! use /i option!!!!!
8
+
9
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ \.conf\.txt$
11
+ }x
12
+
13
+ ## leagues.txt or leagues_en.txt
14
+ ## remove support for en.leagues.txt - why? why not?
15
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
+ leagues
18
+ (?:_[a-z0-9_-]+)?
19
+ \.txt$
20
+ }x
21
+
22
+ ## seasons.txt or seasons_en.txt
23
+ ## remove support for br.seasons.txt - why? why not?
24
+ SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
+ seasons
27
+ (?:_[a-z0-9_-]+)?
28
+ \.txt$
29
+ }x
30
+
31
+
32
+ ## clubs.txt or clubs_en.txt
33
+ ## remove support for en.clubs.txt - why? why not?
34
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
+ clubs
37
+ (?:_[a-z0-9_-]+)?
38
+ \.txt$
39
+ }x
40
+
41
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
43
+ clubs
44
+ (?:_[a-z0-9_-]+)?
45
+ \.wiki\.txt$
46
+ }x
47
+
48
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
50
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
51
+ clubs
52
+ (?:_[a-z0-9_-]+)?
53
+ \.props\.txt$
54
+ }x
55
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
+
57
+
58
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.history\.txt$
63
+ }x
64
+
65
+ ## teams.txt or teams_history.txt
66
+ TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
+ teams
68
+ (?:_[a-z0-9_-]+)?
69
+ \.txt$
70
+ }x
71
+
72
+
73
+ ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
74
+ ### season folder:
75
+ ## e.g. /2019-20 or
76
+ ## year-only e.g. /2019 or
77
+ ## /2016--france
78
+ SEASON_RE = %r{ (?:
79
+ \d{4}-\d{2}
80
+ | \d{4}(--[a-z0-9_-]+)?
81
+ )
82
+ }x
83
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
84
+
85
+
86
+ ## note: if pattern includes directory add here
87
+ ## (otherwise move to more "generic" datafile) - why? why not?
88
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
89
+ #{SEASON}
90
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
91
+ }x
92
+
93
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
94
+ #{SEASON}
95
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
96
+ }x
97
+
98
+ ### add "generic" pattern to find all csv datafiles
99
+ CSV_RE = %r{ (?: ^|/ )
100
+ [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
+ }x
102
+
103
+
104
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
105
+
106
+ def self.find( path, pattern )
107
+ datafiles = []
108
+
109
+ ## check all txt files
110
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
111
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
112
+ pp candidates
113
+ candidates.each do |candidate|
114
+ datafiles << candidate if pattern.match( candidate )
115
+ end
116
+
117
+ pp datafiles
118
+ datafiles
119
+ end
120
+
121
+
122
+ def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
123
+ def self.match_teams( path ) TEAMS_RE.match( path ); end
124
+
125
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
128
+
129
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
133
+
134
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
135
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
136
+
137
+ def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
+ def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
+
140
+
141
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
142
+ def self.match_conf( path ) CONF_RE.match( path ); end
143
+
144
+ def self.find_match( path, format: 'txt' )
145
+ if format == 'csv'
146
+ find( path, MATCH_CSV_RE )
147
+ else ## otherwise always assume txt for now
148
+ find( path, MATCH_RE )
149
+ end
150
+ end
151
+ ## add match_match and match_match_csv - why? why not?
152
+
153
+
154
+ class << self
155
+ alias_method :match_teams?, :match_teams
156
+ alias_method :teams?, :match_teams
157
+
158
+ alias_method :match_clubs?, :match_clubs
159
+ alias_method :clubs?, :match_clubs
160
+
161
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
162
+ alias_method :clubs_wiki?, :match_clubs_wiki
163
+
164
+ alias_method :match_clubs_history?, :match_clubs_history
165
+ alias_method :clubs_history?, :match_clubs_history
166
+
167
+ alias_method :match_club_props, :match_clubs_props
168
+ alias_method :match_club_props?, :match_clubs_props
169
+ alias_method :club_props?, :match_clubs_props
170
+ alias_method :match_clubs_props?, :match_clubs_props
171
+ alias_method :clubs_props?, :match_clubs_props
172
+
173
+ alias_method :match_leagues?, :match_leagues
174
+ alias_method :leagues?, :match_leagues
175
+
176
+ alias_method :match_seasons?, :match_seasons
177
+ alias_method :seasons?, :match_seasons
178
+
179
+ alias_method :match_conf?, :match_conf
180
+ alias_method :conf?, :match_conf
181
+ end
182
+
183
+
184
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
185
+ attr_accessor :include, :exclude
186
+
187
+ ## private helpers - like select returns true for keeping and false for skipping entry
188
+ def filter_clause( filter, entry )
189
+ if filter.is_a?( String )
190
+ entry.name.index( filter ) ? true : false
191
+ elsif filter.is_a?( Regexp )
192
+ filter.match( entry.name ) ? true : false
193
+ else ## assume
194
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
195
+ filter.call( entry )
196
+ end
197
+ end
198
+
199
+ def filter( entry )
200
+ if @include
201
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
202
+ true ## todo/check: check for exclude here too - why? why not?
203
+ else
204
+ false
205
+ end
206
+ else
207
+ if @exclude && filter_clause( @exclude, entry )
208
+ false
209
+ else
210
+ true
211
+ end
212
+ end
213
+ end
214
+
215
+
216
+ def initialize( path_or_pack )
217
+ @include = nil
218
+ @exclude = nil
219
+
220
+ if path_or_pack.is_a?( Datafile::Package )
221
+ @pack = path_or_pack
222
+ else ## assume it's a (string) path
223
+ path = path_or_pack
224
+ if !File.exist?( path ) ## file or directory
225
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
226
+ exit 1
227
+ end
228
+
229
+ if File.directory?( path )
230
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
231
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
232
+ @pack = Datafile::ZipPackage.new( path )
233
+ else
234
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
235
+ exit 1
236
+ end
237
+ end
238
+ end
239
+
240
+
241
+ def each( pattern:, &blk )
242
+ @pack.each( pattern: pattern ) do |entry|
243
+ next unless filter( entry ) ## lets you use include/exclude filters
244
+ blk.call( entry )
245
+ end
246
+ end
247
+
248
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
249
+ def each_match( format: 'txt', &blk )
250
+ if format == 'csv'
251
+ each( pattern: MATCH_CSV_RE, &blk );
252
+ else
253
+ each( pattern: MATCH_RE, &blk );
254
+ end
255
+ end
256
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
+ def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
+
259
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
260
+
261
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
262
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
263
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
+
266
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
+
268
+
269
+ ## return all match datafile entries
270
+ def match( format: 'txt' )
271
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
272
+ end
273
+ alias_method :matches, :match
274
+
275
+
276
+ ## todo/check: rename/change to match_by_dir - why? why not?
277
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
278
+ def match_by_season_dir( format: 'txt' )
279
+ ##
280
+ ## [["1950s/1956-57",
281
+ ## ["1950s/1956-57/1-division1.csv",
282
+ ## "1950s/1956-57/2-division2.csv",
283
+ ## "1950s/1956-57/3a-division3n.csv",
284
+ ## "1950s/1956-57/3b-division3s.csv"]],
285
+ ## ...]
286
+
287
+ h = {}
288
+ match( format: format ).each do |entry|
289
+ season_path = File.dirname( entry.name )
290
+
291
+ h[ season_path ] ||= []
292
+ h[ season_path ] << entry
293
+ end
294
+
295
+ ## todo/fix: - add sort entries by name - why? why not?
296
+ ## note: assume 1-,2- etc. gets us back sorted leagues
297
+ ## - use sort. (will not sort by default?)
298
+
299
+ h.to_a ## return as array (or keep hash) - why? why not?
300
+ end # method match_by_season_dir
301
+
302
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
303
+
304
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
305
+
306
+ ## note: fold all sames seasons (even if in different directories)
307
+ ## into same datafile list e.g.
308
+ ## ["1957/58",
309
+ ## ["1950s/1957-58/1-division1.csv",
310
+ ## "1950s/1957-58/2-division2.csv",
311
+ ## "1950s/1957-58/3a-division3n.csv",
312
+ ## "1950s/1957-58/3b-division3s.csv"]],
313
+ ## and
314
+ ## ["1957/58",
315
+ ## ["archives/1950s/1957-58/1-division1.csv",
316
+ ## "archives/1950s/1957-58/2-division2.csv",
317
+ ## "archives/1950s/1957-58/3a-division3n.csv",
318
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
319
+ ## should be together - why? why not?
320
+
321
+ ####
322
+ # Example package:
323
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
324
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
325
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
326
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
327
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
328
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
329
+
330
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
331
+
332
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
333
+ ## filter_season( clause, season_key )
334
+ ## or better filter = SeasonFilter.new( clause )
335
+ ## filter.skip? filter.include? ( season_sason_key )?
336
+ ## fiteer.before?( season_key ) etc.
337
+ ## find some good method names!!!!
338
+ season_start = start ? Season( start ) : nil
339
+
340
+ h = {}
341
+ match( format: format ).each do |entry|
342
+ ## note: assume last directory in datafile path is the season part/key
343
+ season_q = File.basename( File.dirname( entry.name ))
344
+ season = Season.parse( season_q ) ## normalize season
345
+
346
+ ## skip if start season before this season
347
+ next if season_start && season_start.start_year > season.start_year
348
+
349
+ h[ season.key ] ||= []
350
+ h[ season.key ] << entry
351
+ end
352
+
353
+ ## todo/fix: - add sort entries by name - why? why not?
354
+ ## note: assume 1-,2- etc. gets us back sorted leagues
355
+ ## - use sort. (will not sort by default?)
356
+
357
+ ## sort by season
358
+ ## latest / newest first (and oldest last)
359
+
360
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
361
+ r[0] <=> l[0]
362
+ end
363
+ end # method match_by_season
364
+ end # class Package
365
+
366
+
367
+ class DirPackage < Package
368
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
369
+ end
370
+
371
+ class ZipPackage < Package
372
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
373
+ end
374
+ end # module SportDb