sportdb-formats 1.1.5 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,202 +1,202 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
-
5
-
6
- class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
-
8
- def self.parse( lines, start: )
9
- ## todo/fix: add support for txt and lines
10
- ## check if lines_or_txt is an array or just a string
11
- parser = new( lines, start )
12
- parser.parse
13
- end
14
-
15
-
16
- include Logging ## e.g. logger#debug, logger#info, etc.
17
- include ParserHelper ## e.g. read_lines, etc.
18
-
19
-
20
- def initialize( lines, start )
21
- # for convenience split string into lines
22
- ## note: removes/strips empty lines
23
- ## todo/check: change to text instead of array of lines - why? why not?
24
- @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
- @start = start
26
- end
27
-
28
- def parse
29
- ## try to find all teams in match schedule
30
- @last_round = nil
31
- @last_group = nil
32
-
33
- ## definitions/defs
34
- @round_defs = Hash.new(0)
35
- @group_defs = Hash.new(0)
36
-
37
- ## usage/refs
38
- @rounds = {} ## track usage counter and match (two teams) counter
39
- @groups = {} ## -"-
40
- @teams = Hash.new(0) ## keep track of usage counter
41
-
42
- @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
-
44
-
45
- @lines.each do |line|
46
- if is_goals?( line )
47
- logger.debug "skipping matched goals line: >#{line}<"
48
- elsif is_round_def?( line )
49
- ## todo/fix: add round definition (w begin n end date)
50
- ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
- ## -- how to deal with matches that get rescheduled/postponed?
52
- logger.debug "skipping matched round def line: >#{line}<"
53
- @round_defs[ line ] += 1
54
- elsif is_round?( line )
55
- logger.debug "skipping matched round line: >#{line}<"
56
-
57
- round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
- round[:count] +=1
59
- @last_round = round
60
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
- ### todo: add pipe (|) marker (required)
62
- logger.debug "skipping matched group def line: >#{line}<"
63
- @group_defs[ line ] += 1
64
- elsif is_group?( line )
65
- ## -- lets you set group e.g. Group A etc.
66
- logger.debug "skipping matched group line: >#{line}<"
67
-
68
- group = @groups[ line ] ||= {count: 0, match_count: 0}
69
- group[:count] +=1
70
- @last_group = group
71
- ## todo/fix: parse group line!!!
72
- elsif try_parse_game( line )
73
- # do nothing here
74
- else
75
- logger.warn "skipping line (no match found): >#{line}<"
76
- @warns << line
77
- end
78
- end # lines.each
79
-
80
- [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
- end
82
-
83
-
84
- def try_parse_game( line )
85
- # note: clone line; for possible test do NOT modify in place for now
86
- # note: returns true if parsed, false if no match
87
- parse_game( line.dup )
88
- end
89
-
90
- def parse_game( line )
91
- logger.debug "parsing game (fixture) line: >#{line}<"
92
-
93
- ## remove all protected text runs e.g. []
94
- ## fix: add [ to end-of-line too
95
- ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
-
97
- line = line.gsub( /\[
98
- [^\]]+?
99
- \]/x, '' ).strip
100
- return true if line.empty? ## note: return true (for valid line with no match/teams)
101
-
102
-
103
- ## split by geo (@) - remove for now
104
- values = line.split( '@' )
105
- line = values[0]
106
-
107
-
108
- ## try find date
109
- date = find_date!( line, start: @start )
110
- if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
- line = line.sub( /\[
112
- [^\]]+?
113
- \]/x, '' ).strip
114
-
115
- else
116
- ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
- ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
- line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
- [012]?[0-9]
120
- [.:hH]
121
- [0-9][0-9]
122
- (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
- }x, '' ).strip
124
- end
125
-
126
- return true if line.empty? ## note: return true (for valid line with no match/teams)
127
-
128
-
129
- score = find_score!( line )
130
-
131
- logger.debug " line: >#{line}<"
132
-
133
- line = line.sub( /\[
134
- [^\]]+?
135
- \]/x, '$$' ) # note: replace first score tag with $$
136
- line = line.gsub( /\[
137
- [^\]]+?
138
- \]/x, '' ) # note: replace/remove all other score tags with nothing
139
-
140
- ## clean-up remove all text run inside () or empty () too
141
- line = line.gsub( /\(
142
- [^)]*?
143
- \)/x, '' )
144
-
145
-
146
- ## check for more match separators e.g. - or vs for now
147
- line = line.sub( / \s+
148
- ( -
149
- | v
150
- | vs\.? # note: allow optional dot eg. vs.
151
- )
152
- \s+
153
- /ix, '$$' )
154
-
155
- values = line.split( '$$' )
156
- values = values.map { |value| value.strip } ## strip spaces
157
- values = values.select { |value| !value.empty? } ## remove empty strings
158
-
159
- return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
-
161
- if values.size == 1
162
- puts "(auto config) try matching teams separated by spaces (2+):"
163
- pp values
164
-
165
- values = values[0].split( /[ ]{2,}/ )
166
- pp values
167
- end
168
-
169
- return false if values.size != 2
170
-
171
- puts "(auto config) try matching teams:"
172
- pp values
173
-
174
- @teams[ values[0] ] += 1 ## update usage counters
175
- @teams[ values[1] ] += 1
176
-
177
- @last_round[ :match_count ] += 1 if @last_round
178
- @last_group[ :match_count ] += 1 if @last_group
179
-
180
- true
181
- end
182
-
183
-
184
-
185
- def find_score!( line )
186
- # note: always call after find_dates !!!
187
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
- # -- note: score might have two digits too
189
- ScoreFormats.find!( line )
190
- end
191
-
192
- def find_date!( line, start: )
193
- ## NB: lets us pass in start_at/end_at date (for event)
194
- # for auto-complete year
195
-
196
- # extract date from line
197
- # and return it
198
- # NB: side effect - removes date from line string
199
- DateFormats.find!( line, start: start )
200
- end
201
- end # class AutoConfParser
202
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+
6
+ class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
+
8
+ def self.parse( lines, start: )
9
+ ## todo/fix: add support for txt and lines
10
+ ## check if lines_or_txt is an array or just a string
11
+ parser = new( lines, start )
12
+ parser.parse
13
+ end
14
+
15
+
16
+ include Logging ## e.g. logger#debug, logger#info, etc.
17
+ include ParserHelper ## e.g. read_lines, etc.
18
+
19
+
20
+ def initialize( lines, start )
21
+ # for convenience split string into lines
22
+ ## note: removes/strips empty lines
23
+ ## todo/check: change to text instead of array of lines - why? why not?
24
+ @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
+ @start = start
26
+ end
27
+
28
+ def parse
29
+ ## try to find all teams in match schedule
30
+ @last_round = nil
31
+ @last_group = nil
32
+
33
+ ## definitions/defs
34
+ @round_defs = Hash.new(0)
35
+ @group_defs = Hash.new(0)
36
+
37
+ ## usage/refs
38
+ @rounds = {} ## track usage counter and match (two teams) counter
39
+ @groups = {} ## -"-
40
+ @teams = Hash.new(0) ## keep track of usage counter
41
+
42
+ @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
+
44
+
45
+ @lines.each do |line|
46
+ if is_goals?( line )
47
+ logger.debug "skipping matched goals line: >#{line}<"
48
+ elsif is_round_def?( line )
49
+ ## todo/fix: add round definition (w begin n end date)
50
+ ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
+ ## -- how to deal with matches that get rescheduled/postponed?
52
+ logger.debug "skipping matched round def line: >#{line}<"
53
+ @round_defs[ line ] += 1
54
+ elsif is_round?( line )
55
+ logger.debug "skipping matched round line: >#{line}<"
56
+
57
+ round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
+ round[:count] +=1
59
+ @last_round = round
60
+ elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
+ ### todo: add pipe (|) marker (required)
62
+ logger.debug "skipping matched group def line: >#{line}<"
63
+ @group_defs[ line ] += 1
64
+ elsif is_group?( line )
65
+ ## -- lets you set group e.g. Group A etc.
66
+ logger.debug "skipping matched group line: >#{line}<"
67
+
68
+ group = @groups[ line ] ||= {count: 0, match_count: 0}
69
+ group[:count] +=1
70
+ @last_group = group
71
+ ## todo/fix: parse group line!!!
72
+ elsif try_parse_game( line )
73
+ # do nothing here
74
+ else
75
+ logger.warn "skipping line (no match found): >#{line}<"
76
+ @warns << line
77
+ end
78
+ end # lines.each
79
+
80
+ [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
+ end
82
+
83
+
84
+ def try_parse_game( line )
85
+ # note: clone line; for possible test do NOT modify in place for now
86
+ # note: returns true if parsed, false if no match
87
+ parse_game( line.dup )
88
+ end
89
+
90
+ def parse_game( line )
91
+ logger.debug "parsing game (fixture) line: >#{line}<"
92
+
93
+ ## remove all protected text runs e.g. []
94
+ ## fix: add [ to end-of-line too
95
+ ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
+
97
+ line = line.gsub( /\[
98
+ [^\]]+?
99
+ \]/x, '' ).strip
100
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
101
+
102
+
103
+ ## split by geo (@) - remove for now
104
+ values = line.split( '@' )
105
+ line = values[0]
106
+
107
+
108
+ ## try find date
109
+ date = find_date!( line, start: @start )
110
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
+ line = line.sub( /\[
112
+ [^\]]+?
113
+ \]/x, '' ).strip
114
+
115
+ else
116
+ ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
+ ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
+ line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
+ [012]?[0-9]
120
+ [.:hH]
121
+ [0-9][0-9]
122
+ (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
+ }x, '' ).strip
124
+ end
125
+
126
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
127
+
128
+
129
+ score = find_score!( line )
130
+
131
+ logger.debug " line: >#{line}<"
132
+
133
+ line = line.sub( /\[
134
+ [^\]]+?
135
+ \]/x, '$$' ) # note: replace first score tag with $$
136
+ line = line.gsub( /\[
137
+ [^\]]+?
138
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
139
+
140
+ ## clean-up remove all text run inside () or empty () too
141
+ line = line.gsub( /\(
142
+ [^)]*?
143
+ \)/x, '' )
144
+
145
+
146
+ ## check for more match separators e.g. - or vs for now
147
+ line = line.sub( / \s+
148
+ ( -
149
+ | v
150
+ | vs\.? # note: allow optional dot eg. vs.
151
+ )
152
+ \s+
153
+ /ix, '$$' )
154
+
155
+ values = line.split( '$$' )
156
+ values = values.map { |value| value.strip } ## strip spaces
157
+ values = values.select { |value| !value.empty? } ## remove empty strings
158
+
159
+ return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
+
161
+ if values.size == 1
162
+ puts "(auto config) try matching teams separated by spaces (2+):"
163
+ pp values
164
+
165
+ values = values[0].split( /[ ]{2,}/ )
166
+ pp values
167
+ end
168
+
169
+ return false if values.size != 2
170
+
171
+ puts "(auto config) try matching teams:"
172
+ pp values
173
+
174
+ @teams[ values[0] ] += 1 ## update usage counters
175
+ @teams[ values[1] ] += 1
176
+
177
+ @last_round[ :match_count ] += 1 if @last_round
178
+ @last_group[ :match_count ] += 1 if @last_group
179
+
180
+ true
181
+ end
182
+
183
+
184
+
185
+ def find_score!( line )
186
+ # note: always call after find_dates !!!
187
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
+ # -- note: score might have two digits too
189
+ ScoreFormats.find!( line )
190
+ end
191
+
192
+ def find_date!( line, start: )
193
+ ## NB: lets us pass in start_at/end_at date (for event)
194
+ # for auto-complete year
195
+
196
+ # extract date from line
197
+ # and return it
198
+ # NB: side effect - removes date from line string
199
+ DateFormats.find!( line, start: start )
200
+ end
201
+ end # class AutoConfParser
202
+ end # module SportDb
@@ -1,374 +1,374 @@
1
-
2
- module SportDb
3
- class Package
4
-
5
- ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
- ## e.g. .TXT and .txt
7
- ## yes!! use /i option!!!!!
8
-
9
- CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
- \.conf\.txt$
11
- }x
12
-
13
- ## leagues.txt or leagues_en.txt
14
- ## remove support for en.leagues.txt - why? why not?
15
- LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
- leagues
18
- (?:_[a-z0-9_-]+)?
19
- \.txt$
20
- }x
21
-
22
- ## seasons.txt or seasons_en.txt
23
- ## remove support for br.seasons.txt - why? why not?
24
- SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
- seasons
27
- (?:_[a-z0-9_-]+)?
28
- \.txt$
29
- }x
30
-
31
-
32
- ## clubs.txt or clubs_en.txt
33
- ## remove support for en.clubs.txt - why? why not?
34
- CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
- clubs
37
- (?:_[a-z0-9_-]+)?
38
- \.txt$
39
- }x
40
-
41
- CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
- (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
43
- clubs
44
- (?:_[a-z0-9_-]+)?
45
- \.wiki\.txt$
46
- }x
47
-
48
- ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
- CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
50
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
51
- clubs
52
- (?:_[a-z0-9_-]+)?
53
- \.props\.txt$
54
- }x
55
- CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
-
57
-
58
- CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
- (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
- clubs
61
- (?:_[a-z0-9_-]+)?
62
- \.history\.txt$
63
- }x
64
-
65
- ## teams.txt or teams_history.txt
66
- TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
- teams
68
- (?:_[a-z0-9_-]+)?
69
- \.txt$
70
- }x
71
-
72
-
73
- ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
74
- ### season folder:
75
- ## e.g. /2019-20 or
76
- ## year-only e.g. /2019 or
77
- ## /2016--france
78
- SEASON_RE = %r{ (?:
79
- \d{4}-\d{2}
80
- | \d{4}(--[a-z0-9_-]+)?
81
- )
82
- }x
83
- SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
84
-
85
-
86
- ## note: if pattern includes directory add here
87
- ## (otherwise move to more "generic" datafile) - why? why not?
88
- MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
89
- #{SEASON}
90
- /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
91
- }x
92
-
93
- MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
94
- #{SEASON}
95
- /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
96
- }x
97
-
98
- ### add "generic" pattern to find all csv datafiles
99
- CSV_RE = %r{ (?: ^|/ )
100
- [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
- }x
102
-
103
-
104
- ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
105
-
106
- def self.find( path, pattern )
107
- datafiles = []
108
-
109
- ## check all txt files
110
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
111
- candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
112
- pp candidates
113
- candidates.each do |candidate|
114
- datafiles << candidate if pattern.match( candidate )
115
- end
116
-
117
- pp datafiles
118
- datafiles
119
- end
120
-
121
-
122
- def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
123
- def self.match_teams( path ) TEAMS_RE.match( path ); end
124
-
125
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
- def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
128
-
129
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
- def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
- def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
133
-
134
- def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
135
- def self.match_leagues( path ) LEAGUES_RE.match( path ); end
136
-
137
- def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
- def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
-
140
-
141
- def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
142
- def self.match_conf( path ) CONF_RE.match( path ); end
143
-
144
- def self.find_match( path, format: 'txt' )
145
- if format == 'csv'
146
- find( path, MATCH_CSV_RE )
147
- else ## otherwise always assume txt for now
148
- find( path, MATCH_RE )
149
- end
150
- end
151
- ## add match_match and match_match_csv - why? why not?
152
-
153
-
154
- class << self
155
- alias_method :match_teams?, :match_teams
156
- alias_method :teams?, :match_teams
157
-
158
- alias_method :match_clubs?, :match_clubs
159
- alias_method :clubs?, :match_clubs
160
-
161
- alias_method :match_clubs_wiki?, :match_clubs_wiki
162
- alias_method :clubs_wiki?, :match_clubs_wiki
163
-
164
- alias_method :match_clubs_history?, :match_clubs_history
165
- alias_method :clubs_history?, :match_clubs_history
166
-
167
- alias_method :match_club_props, :match_clubs_props
168
- alias_method :match_club_props?, :match_clubs_props
169
- alias_method :club_props?, :match_clubs_props
170
- alias_method :match_clubs_props?, :match_clubs_props
171
- alias_method :clubs_props?, :match_clubs_props
172
-
173
- alias_method :match_leagues?, :match_leagues
174
- alias_method :leagues?, :match_leagues
175
-
176
- alias_method :match_seasons?, :match_seasons
177
- alias_method :seasons?, :match_seasons
178
-
179
- alias_method :match_conf?, :match_conf
180
- alias_method :conf?, :match_conf
181
- end
182
-
183
-
184
- ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
185
- attr_accessor :include, :exclude
186
-
187
- ## private helpers - like select returns true for keeping and false for skipping entry
188
- def filter_clause( filter, entry )
189
- if filter.is_a?( String )
190
- entry.name.index( filter ) ? true : false
191
- elsif filter.is_a?( Regexp )
192
- filter.match( entry.name ) ? true : false
193
- else ## assume
194
- ## todo/check: pass in entry (and NOT entry.name) - why? why not?
195
- filter.call( entry )
196
- end
197
- end
198
-
199
- def filter( entry )
200
- if @include
201
- if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
202
- true ## todo/check: check for exclude here too - why? why not?
203
- else
204
- false
205
- end
206
- else
207
- if @exclude && filter_clause( @exclude, entry )
208
- false
209
- else
210
- true
211
- end
212
- end
213
- end
214
-
215
-
216
- def initialize( path_or_pack )
217
- @include = nil
218
- @exclude = nil
219
-
220
- if path_or_pack.is_a?( Datafile::Package )
221
- @pack = path_or_pack
222
- else ## assume it's a (string) path
223
- path = path_or_pack
224
- if !File.exist?( path ) ## file or directory
225
- puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
226
- exit 1
227
- end
228
-
229
- if File.directory?( path )
230
- @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
231
- elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
232
- @pack = Datafile::ZipPackage.new( path )
233
- else
234
- puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
235
- exit 1
236
- end
237
- end
238
- end
239
-
240
-
241
- def each( pattern:, &blk )
242
- @pack.each( pattern: pattern ) do |entry|
243
- next unless filter( entry ) ## lets you use include/exclude filters
244
- blk.call( entry )
245
- end
246
- end
247
-
248
- def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
249
- def each_match( format: 'txt', &blk )
250
- if format == 'csv'
251
- each( pattern: MATCH_CSV_RE, &blk );
252
- else
253
- each( pattern: MATCH_RE, &blk );
254
- end
255
- end
256
- def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
- def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
-
259
- def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
260
-
261
- def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
262
- def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
263
- def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
- def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
-
266
- def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
-
268
-
269
- ## return all match datafile entries
270
- def match( format: 'txt' )
271
- ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
272
- end
273
- alias_method :matches, :match
274
-
275
-
276
- ## todo/check: rename/change to match_by_dir - why? why not?
277
- ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
278
- def match_by_season_dir( format: 'txt' )
279
- ##
280
- ## [["1950s/1956-57",
281
- ## ["1950s/1956-57/1-division1.csv",
282
- ## "1950s/1956-57/2-division2.csv",
283
- ## "1950s/1956-57/3a-division3n.csv",
284
- ## "1950s/1956-57/3b-division3s.csv"]],
285
- ## ...]
286
-
287
- h = {}
288
- match( format: format ).each do |entry|
289
- season_path = File.dirname( entry.name )
290
-
291
- h[ season_path ] ||= []
292
- h[ season_path ] << entry
293
- end
294
-
295
- ## todo/fix: - add sort entries by name - why? why not?
296
- ## note: assume 1-,2- etc. gets us back sorted leagues
297
- ## - use sort. (will not sort by default?)
298
-
299
- h.to_a ## return as array (or keep hash) - why? why not?
300
- end # method match_by_season_dir
301
-
302
- def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
303
-
304
- ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
305
-
306
- ## note: fold all sames seasons (even if in different directories)
307
- ## into same datafile list e.g.
308
- ## ["1957/58",
309
- ## ["1950s/1957-58/1-division1.csv",
310
- ## "1950s/1957-58/2-division2.csv",
311
- ## "1950s/1957-58/3a-division3n.csv",
312
- ## "1950s/1957-58/3b-division3s.csv"]],
313
- ## and
314
- ## ["1957/58",
315
- ## ["archives/1950s/1957-58/1-division1.csv",
316
- ## "archives/1950s/1957-58/2-division2.csv",
317
- ## "archives/1950s/1957-58/3a-division3n.csv",
318
- ## "archives/1950s/1957-58/3b-division3s.csv"]],
319
- ## should be together - why? why not?
320
-
321
- ####
322
- # Example package:
323
- # [["2012/13", ["2012-13/1-proleague.csv"]],
324
- # ["2013/14", ["2013-14/1-proleague.csv"]],
325
- # ["2014/15", ["2014-15/1-proleague.csv"]],
326
- # ["2015/16", ["2015-16/1-proleague.csv"]],
327
- # ["2016/17", ["2016-17/1-proleague.csv"]],
328
- # ["2017/18", ["2017-18/1-proleague.csv"]]]
329
-
330
- ## todo/fix: (re)use a more generic filter instead of start for start of season only
331
-
332
- ## todo/fix: use a "generic" filter_season helper for easy reuse
333
- ## filter_season( clause, season_key )
334
- ## or better filter = SeasonFilter.new( clause )
335
- ## filter.skip? filter.include? ( season_sason_key )?
336
- ## fiteer.before?( season_key ) etc.
337
- ## find some good method names!!!!
338
- season_start = start ? Season( start ) : nil
339
-
340
- h = {}
341
- match( format: format ).each do |entry|
342
- ## note: assume last directory in datafile path is the season part/key
343
- season_q = File.basename( File.dirname( entry.name ))
344
- season = Season.parse( season_q ) ## normalize season
345
-
346
- ## skip if start season before this season
347
- next if season_start && season_start.start_year > season.start_year
348
-
349
- h[ season.key ] ||= []
350
- h[ season.key ] << entry
351
- end
352
-
353
- ## todo/fix: - add sort entries by name - why? why not?
354
- ## note: assume 1-,2- etc. gets us back sorted leagues
355
- ## - use sort. (will not sort by default?)
356
-
357
- ## sort by season
358
- ## latest / newest first (and oldest last)
359
-
360
- h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
361
- r[0] <=> l[0]
362
- end
363
- end # method match_by_season
364
- end # class Package
365
-
366
-
367
- class DirPackage < Package
368
- def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
369
- end
370
-
371
- class ZipPackage < Package
372
- def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
373
- end
374
- end # module SportDb
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ ## todo/fix: make all regexes case-insensitive with /i option - why? why not?
6
+ ## e.g. .TXT and .txt
7
+ ## yes!! use /i option!!!!!
8
+
9
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ \.conf\.txt$
11
+ }x
12
+
13
+ ## leagues.txt or leagues_en.txt
14
+ ## remove support for en.leagues.txt - why? why not?
15
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
16
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
17
+ leagues
18
+ (?:_[a-z0-9_-]+)?
19
+ \.txt$
20
+ }x
21
+
22
+ ## seasons.txt or seasons_en.txt
23
+ ## remove support for br.seasons.txt - why? why not?
24
+ SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
26
+ seasons
27
+ (?:_[a-z0-9_-]+)?
28
+ \.txt$
29
+ }x
30
+
31
+
32
+ ## clubs.txt or clubs_en.txt
33
+ ## remove support for en.clubs.txt - why? why not?
34
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
35
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
36
+ clubs
37
+ (?:_[a-z0-9_-]+)?
38
+ \.txt$
39
+ }x
40
+
41
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
42
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
43
+ clubs
44
+ (?:_[a-z0-9_-]+)?
45
+ \.wiki\.txt$
46
+ }x
47
+
48
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
50
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
51
+ clubs
52
+ (?:_[a-z0-9_-]+)?
53
+ \.props\.txt$
54
+ }x
55
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
+
57
+
58
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.history\.txt$
63
+ }x
64
+
65
+ ## teams.txt or teams_history.txt
66
+ TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
67
+ teams
68
+ (?:_[a-z0-9_-]+)?
69
+ \.txt$
70
+ }x
71
+
72
+
73
+ ### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
74
+ ### season folder:
75
+ ## e.g. /2019-20 or
76
+ ## year-only e.g. /2019 or
77
+ ## /2016--france
78
+ SEASON_RE = %r{ (?:
79
+ \d{4}-\d{2}
80
+ | \d{4}(--[a-z0-9_-]+)?
81
+ )
82
+ }x
83
+ SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
84
+
85
+
86
+ ## note: if pattern includes directory add here
87
+ ## (otherwise move to more "generic" datafile) - why? why not?
88
+ MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
89
+ #{SEASON}
90
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
91
+ }x
92
+
93
+ MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
94
+ #{SEASON}
95
+ /[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
96
+ }x
97
+
98
+ ### add "generic" pattern to find all csv datafiles
99
+ CSV_RE = %r{ (?: ^|/ )
100
+ [a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
101
+ }x
102
+
103
+
104
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
105
+
106
+ def self.find( path, pattern )
107
+ datafiles = []
108
+
109
+ ## check all txt files
110
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
111
+ candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
112
+ pp candidates
113
+ candidates.each do |candidate|
114
+ datafiles << candidate if pattern.match( candidate )
115
+ end
116
+
117
+ pp datafiles
118
+ datafiles
119
+ end
120
+
121
+
122
+ def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
123
+ def self.match_teams( path ) TEAMS_RE.match( path ); end
124
+
125
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
128
+
129
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
133
+
134
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
135
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
136
+
137
+ def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
138
+ def self.match_seasons( path ) SEASONS_RE.match( path ); end
139
+
140
+
141
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
142
+ def self.match_conf( path ) CONF_RE.match( path ); end
143
+
144
+ def self.find_match( path, format: 'txt' )
145
+ if format == 'csv'
146
+ find( path, MATCH_CSV_RE )
147
+ else ## otherwise always assume txt for now
148
+ find( path, MATCH_RE )
149
+ end
150
+ end
151
+ ## add match_match and match_match_csv - why? why not?
152
+
153
+
154
+ class << self
155
+ alias_method :match_teams?, :match_teams
156
+ alias_method :teams?, :match_teams
157
+
158
+ alias_method :match_clubs?, :match_clubs
159
+ alias_method :clubs?, :match_clubs
160
+
161
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
162
+ alias_method :clubs_wiki?, :match_clubs_wiki
163
+
164
+ alias_method :match_clubs_history?, :match_clubs_history
165
+ alias_method :clubs_history?, :match_clubs_history
166
+
167
+ alias_method :match_club_props, :match_clubs_props
168
+ alias_method :match_club_props?, :match_clubs_props
169
+ alias_method :club_props?, :match_clubs_props
170
+ alias_method :match_clubs_props?, :match_clubs_props
171
+ alias_method :clubs_props?, :match_clubs_props
172
+
173
+ alias_method :match_leagues?, :match_leagues
174
+ alias_method :leagues?, :match_leagues
175
+
176
+ alias_method :match_seasons?, :match_seasons
177
+ alias_method :seasons?, :match_seasons
178
+
179
+ alias_method :match_conf?, :match_conf
180
+ alias_method :conf?, :match_conf
181
+ end
182
+
183
+
184
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
185
+ attr_accessor :include, :exclude
186
+
187
+ ## private helpers - like select returns true for keeping and false for skipping entry
188
+ def filter_clause( filter, entry )
189
+ if filter.is_a?( String )
190
+ entry.name.index( filter ) ? true : false
191
+ elsif filter.is_a?( Regexp )
192
+ filter.match( entry.name ) ? true : false
193
+ else ## assume
194
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
195
+ filter.call( entry )
196
+ end
197
+ end
198
+
199
+ def filter( entry )
200
+ if @include
201
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
202
+ true ## todo/check: check for exclude here too - why? why not?
203
+ else
204
+ false
205
+ end
206
+ else
207
+ if @exclude && filter_clause( @exclude, entry )
208
+ false
209
+ else
210
+ true
211
+ end
212
+ end
213
+ end
214
+
215
+
216
+ def initialize( path_or_pack )
217
+ @include = nil
218
+ @exclude = nil
219
+
220
+ if path_or_pack.is_a?( Datafile::Package )
221
+ @pack = path_or_pack
222
+ else ## assume it's a (string) path
223
+ path = path_or_pack
224
+ if !File.exist?( path ) ## file or directory
225
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
226
+ exit 1
227
+ end
228
+
229
+ if File.directory?( path )
230
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
231
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
232
+ @pack = Datafile::ZipPackage.new( path )
233
+ else
234
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
235
+ exit 1
236
+ end
237
+ end
238
+ end
239
+
240
+
241
+ def each( pattern:, &blk )
242
+ @pack.each( pattern: pattern ) do |entry|
243
+ next unless filter( entry ) ## lets you use include/exclude filters
244
+ blk.call( entry )
245
+ end
246
+ end
247
+
248
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
249
+ def each_match( format: 'txt', &blk )
250
+ if format == 'csv'
251
+ each( pattern: MATCH_CSV_RE, &blk );
252
+ else
253
+ each( pattern: MATCH_RE, &blk );
254
+ end
255
+ end
256
+ def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
257
+ def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
258
+
259
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
260
+
261
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
262
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
263
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
+
266
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
+
268
+
269
+ ## return all match datafile entries
270
+ def match( format: 'txt' )
271
+ ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
272
+ end
273
+ alias_method :matches, :match
274
+
275
+
276
+ ## todo/check: rename/change to match_by_dir - why? why not?
277
+ ## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
278
+ def match_by_season_dir( format: 'txt' )
279
+ ##
280
+ ## [["1950s/1956-57",
281
+ ## ["1950s/1956-57/1-division1.csv",
282
+ ## "1950s/1956-57/2-division2.csv",
283
+ ## "1950s/1956-57/3a-division3n.csv",
284
+ ## "1950s/1956-57/3b-division3s.csv"]],
285
+ ## ...]
286
+
287
+ h = {}
288
+ match( format: format ).each do |entry|
289
+ season_path = File.dirname( entry.name )
290
+
291
+ h[ season_path ] ||= []
292
+ h[ season_path ] << entry
293
+ end
294
+
295
+ ## todo/fix: - add sort entries by name - why? why not?
296
+ ## note: assume 1-,2- etc. gets us back sorted leagues
297
+ ## - use sort. (will not sort by default?)
298
+
299
+ h.to_a ## return as array (or keep hash) - why? why not?
300
+ end # method match_by_season_dir
301
+
302
+ def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
303
+
304
+ ## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
305
+
306
+ ## note: fold all sames seasons (even if in different directories)
307
+ ## into same datafile list e.g.
308
+ ## ["1957/58",
309
+ ## ["1950s/1957-58/1-division1.csv",
310
+ ## "1950s/1957-58/2-division2.csv",
311
+ ## "1950s/1957-58/3a-division3n.csv",
312
+ ## "1950s/1957-58/3b-division3s.csv"]],
313
+ ## and
314
+ ## ["1957/58",
315
+ ## ["archives/1950s/1957-58/1-division1.csv",
316
+ ## "archives/1950s/1957-58/2-division2.csv",
317
+ ## "archives/1950s/1957-58/3a-division3n.csv",
318
+ ## "archives/1950s/1957-58/3b-division3s.csv"]],
319
+ ## should be together - why? why not?
320
+
321
+ ####
322
+ # Example package:
323
+ # [["2012/13", ["2012-13/1-proleague.csv"]],
324
+ # ["2013/14", ["2013-14/1-proleague.csv"]],
325
+ # ["2014/15", ["2014-15/1-proleague.csv"]],
326
+ # ["2015/16", ["2015-16/1-proleague.csv"]],
327
+ # ["2016/17", ["2016-17/1-proleague.csv"]],
328
+ # ["2017/18", ["2017-18/1-proleague.csv"]]]
329
+
330
+ ## todo/fix: (re)use a more generic filter instead of start for start of season only
331
+
332
+ ## todo/fix: use a "generic" filter_season helper for easy reuse
333
+ ## filter_season( clause, season_key )
334
+ ## or better filter = SeasonFilter.new( clause )
335
+ ## filter.skip? filter.include? ( season_sason_key )?
336
+ ## fiteer.before?( season_key ) etc.
337
+ ## find some good method names!!!!
338
+ season_start = start ? Season( start ) : nil
339
+
340
+ h = {}
341
+ match( format: format ).each do |entry|
342
+ ## note: assume last directory in datafile path is the season part/key
343
+ season_q = File.basename( File.dirname( entry.name ))
344
+ season = Season.parse( season_q ) ## normalize season
345
+
346
+ ## skip if start season before this season
347
+ next if season_start && season_start.start_year > season.start_year
348
+
349
+ h[ season.key ] ||= []
350
+ h[ season.key ] << entry
351
+ end
352
+
353
+ ## todo/fix: - add sort entries by name - why? why not?
354
+ ## note: assume 1-,2- etc. gets us back sorted leagues
355
+ ## - use sort. (will not sort by default?)
356
+
357
+ ## sort by season
358
+ ## latest / newest first (and oldest last)
359
+
360
+ h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
361
+ r[0] <=> l[0]
362
+ end
363
+ end # method match_by_season
364
+ end # class Package
365
+
366
+
367
+ class DirPackage < Package
368
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
369
+ end
370
+
371
+ class ZipPackage < Package
372
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
373
+ end
374
+ end # module SportDb