sportdb-formats 1.1.5 → 1.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_index.rb +141 -141
- data/lib/sportdb/formats/event/event_reader.rb +183 -183
- data/lib/sportdb/formats/league/league_outline_reader.rb +1 -0
- data/lib/sportdb/formats/league/league_reader.rb +168 -168
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -202
- data/lib/sportdb/formats/package.rb +374 -374
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +350 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/test_club_index_history.rb +107 -107
- data/test/test_club_reader.rb +201 -201
- data/test/test_club_reader_history.rb +212 -212
- data/test/test_country_reader.rb +89 -89
- data/test/test_league_outline_reader.rb +55 -55
- data/test/test_league_reader.rb +72 -72
- data/test/test_outline_reader.rb +31 -31
- data/test/test_regex.rb +67 -67
- data/test/test_wiki_reader.rb +77 -77
- metadata +12 -6
@@ -1,202 +1,202 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
|
6
|
-
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
7
|
-
|
8
|
-
def self.parse( lines, start: )
|
9
|
-
## todo/fix: add support for txt and lines
|
10
|
-
## check if lines_or_txt is an array or just a string
|
11
|
-
parser = new( lines, start )
|
12
|
-
parser.parse
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
include Logging ## e.g. logger#debug, logger#info, etc.
|
17
|
-
include ParserHelper ## e.g. read_lines, etc.
|
18
|
-
|
19
|
-
|
20
|
-
def initialize( lines, start )
|
21
|
-
# for convenience split string into lines
|
22
|
-
## note: removes/strips empty lines
|
23
|
-
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
-
@lines = lines.is_a?( String ) ? read_lines( lines ) : lines
|
25
|
-
@start = start
|
26
|
-
end
|
27
|
-
|
28
|
-
def parse
|
29
|
-
## try to find all teams in match schedule
|
30
|
-
@last_round = nil
|
31
|
-
@last_group = nil
|
32
|
-
|
33
|
-
## definitions/defs
|
34
|
-
@round_defs = Hash.new(0)
|
35
|
-
@group_defs = Hash.new(0)
|
36
|
-
|
37
|
-
## usage/refs
|
38
|
-
@rounds = {} ## track usage counter and match (two teams) counter
|
39
|
-
@groups = {} ## -"-
|
40
|
-
@teams = Hash.new(0) ## keep track of usage counter
|
41
|
-
|
42
|
-
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
43
|
-
|
44
|
-
|
45
|
-
@lines.each do |line|
|
46
|
-
if is_goals?( line )
|
47
|
-
logger.debug "skipping matched goals line: >#{line}<"
|
48
|
-
elsif is_round_def?( line )
|
49
|
-
## todo/fix: add round definition (w begin n end date)
|
50
|
-
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
51
|
-
## -- how to deal with matches that get rescheduled/postponed?
|
52
|
-
logger.debug "skipping matched round def line: >#{line}<"
|
53
|
-
@round_defs[ line ] += 1
|
54
|
-
elsif is_round?( line )
|
55
|
-
logger.debug "skipping matched round line: >#{line}<"
|
56
|
-
|
57
|
-
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
58
|
-
round[:count] +=1
|
59
|
-
@last_round = round
|
60
|
-
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
61
|
-
### todo: add pipe (|) marker (required)
|
62
|
-
logger.debug "skipping matched group def line: >#{line}<"
|
63
|
-
@group_defs[ line ] += 1
|
64
|
-
elsif is_group?( line )
|
65
|
-
## -- lets you set group e.g. Group A etc.
|
66
|
-
logger.debug "skipping matched group line: >#{line}<"
|
67
|
-
|
68
|
-
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
69
|
-
group[:count] +=1
|
70
|
-
@last_group = group
|
71
|
-
## todo/fix: parse group line!!!
|
72
|
-
elsif try_parse_game( line )
|
73
|
-
# do nothing here
|
74
|
-
else
|
75
|
-
logger.warn "skipping line (no match found): >#{line}<"
|
76
|
-
@warns << line
|
77
|
-
end
|
78
|
-
end # lines.each
|
79
|
-
|
80
|
-
[@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
def try_parse_game( line )
|
85
|
-
# note: clone line; for possible test do NOT modify in place for now
|
86
|
-
# note: returns true if parsed, false if no match
|
87
|
-
parse_game( line.dup )
|
88
|
-
end
|
89
|
-
|
90
|
-
def parse_game( line )
|
91
|
-
logger.debug "parsing game (fixture) line: >#{line}<"
|
92
|
-
|
93
|
-
## remove all protected text runs e.g. []
|
94
|
-
## fix: add [ to end-of-line too
|
95
|
-
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
96
|
-
|
97
|
-
line = line.gsub( /\[
|
98
|
-
[^\]]+?
|
99
|
-
\]/x, '' ).strip
|
100
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
101
|
-
|
102
|
-
|
103
|
-
## split by geo (@) - remove for now
|
104
|
-
values = line.split( '@' )
|
105
|
-
line = values[0]
|
106
|
-
|
107
|
-
|
108
|
-
## try find date
|
109
|
-
date = find_date!( line, start: @start )
|
110
|
-
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
111
|
-
line = line.sub( /\[
|
112
|
-
[^\]]+?
|
113
|
-
\]/x, '' ).strip
|
114
|
-
|
115
|
-
else
|
116
|
-
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
117
|
-
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
118
|
-
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
119
|
-
[012]?[0-9]
|
120
|
-
[.:hH]
|
121
|
-
[0-9][0-9]
|
122
|
-
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
123
|
-
}x, '' ).strip
|
124
|
-
end
|
125
|
-
|
126
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
127
|
-
|
128
|
-
|
129
|
-
score = find_score!( line )
|
130
|
-
|
131
|
-
logger.debug " line: >#{line}<"
|
132
|
-
|
133
|
-
line = line.sub( /\[
|
134
|
-
[^\]]+?
|
135
|
-
\]/x, '$$' ) # note: replace first score tag with $$
|
136
|
-
line = line.gsub( /\[
|
137
|
-
[^\]]+?
|
138
|
-
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
139
|
-
|
140
|
-
## clean-up remove all text run inside () or empty () too
|
141
|
-
line = line.gsub( /\(
|
142
|
-
[^)]*?
|
143
|
-
\)/x, '' )
|
144
|
-
|
145
|
-
|
146
|
-
## check for more match separators e.g. - or vs for now
|
147
|
-
line = line.sub( / \s+
|
148
|
-
( -
|
149
|
-
| v
|
150
|
-
| vs\.? # note: allow optional dot eg. vs.
|
151
|
-
)
|
152
|
-
\s+
|
153
|
-
/ix, '$$' )
|
154
|
-
|
155
|
-
values = line.split( '$$' )
|
156
|
-
values = values.map { |value| value.strip } ## strip spaces
|
157
|
-
values = values.select { |value| !value.empty? } ## remove empty strings
|
158
|
-
|
159
|
-
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
160
|
-
|
161
|
-
if values.size == 1
|
162
|
-
puts "(auto config) try matching teams separated by spaces (2+):"
|
163
|
-
pp values
|
164
|
-
|
165
|
-
values = values[0].split( /[ ]{2,}/ )
|
166
|
-
pp values
|
167
|
-
end
|
168
|
-
|
169
|
-
return false if values.size != 2
|
170
|
-
|
171
|
-
puts "(auto config) try matching teams:"
|
172
|
-
pp values
|
173
|
-
|
174
|
-
@teams[ values[0] ] += 1 ## update usage counters
|
175
|
-
@teams[ values[1] ] += 1
|
176
|
-
|
177
|
-
@last_round[ :match_count ] += 1 if @last_round
|
178
|
-
@last_group[ :match_count ] += 1 if @last_group
|
179
|
-
|
180
|
-
true
|
181
|
-
end
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
def find_score!( line )
|
186
|
-
# note: always call after find_dates !!!
|
187
|
-
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
188
|
-
# -- note: score might have two digits too
|
189
|
-
ScoreFormats.find!( line )
|
190
|
-
end
|
191
|
-
|
192
|
-
def find_date!( line, start: )
|
193
|
-
## NB: lets us pass in start_at/end_at date (for event)
|
194
|
-
# for auto-complete year
|
195
|
-
|
196
|
-
# extract date from line
|
197
|
-
# and return it
|
198
|
-
# NB: side effect - removes date from line string
|
199
|
-
DateFormats.find!( line, start: start )
|
200
|
-
end
|
201
|
-
end # class AutoConfParser
|
202
|
-
end # module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
|
6
|
+
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
7
|
+
|
8
|
+
def self.parse( lines, start: )
|
9
|
+
## todo/fix: add support for txt and lines
|
10
|
+
## check if lines_or_txt is an array or just a string
|
11
|
+
parser = new( lines, start )
|
12
|
+
parser.parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
include Logging ## e.g. logger#debug, logger#info, etc.
|
17
|
+
include ParserHelper ## e.g. read_lines, etc.
|
18
|
+
|
19
|
+
|
20
|
+
def initialize( lines, start )
|
21
|
+
# for convenience split string into lines
|
22
|
+
## note: removes/strips empty lines
|
23
|
+
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
+
@lines = lines.is_a?( String ) ? read_lines( lines ) : lines
|
25
|
+
@start = start
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
## try to find all teams in match schedule
|
30
|
+
@last_round = nil
|
31
|
+
@last_group = nil
|
32
|
+
|
33
|
+
## definitions/defs
|
34
|
+
@round_defs = Hash.new(0)
|
35
|
+
@group_defs = Hash.new(0)
|
36
|
+
|
37
|
+
## usage/refs
|
38
|
+
@rounds = {} ## track usage counter and match (two teams) counter
|
39
|
+
@groups = {} ## -"-
|
40
|
+
@teams = Hash.new(0) ## keep track of usage counter
|
41
|
+
|
42
|
+
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
43
|
+
|
44
|
+
|
45
|
+
@lines.each do |line|
|
46
|
+
if is_goals?( line )
|
47
|
+
logger.debug "skipping matched goals line: >#{line}<"
|
48
|
+
elsif is_round_def?( line )
|
49
|
+
## todo/fix: add round definition (w begin n end date)
|
50
|
+
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
51
|
+
## -- how to deal with matches that get rescheduled/postponed?
|
52
|
+
logger.debug "skipping matched round def line: >#{line}<"
|
53
|
+
@round_defs[ line ] += 1
|
54
|
+
elsif is_round?( line )
|
55
|
+
logger.debug "skipping matched round line: >#{line}<"
|
56
|
+
|
57
|
+
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
58
|
+
round[:count] +=1
|
59
|
+
@last_round = round
|
60
|
+
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
61
|
+
### todo: add pipe (|) marker (required)
|
62
|
+
logger.debug "skipping matched group def line: >#{line}<"
|
63
|
+
@group_defs[ line ] += 1
|
64
|
+
elsif is_group?( line )
|
65
|
+
## -- lets you set group e.g. Group A etc.
|
66
|
+
logger.debug "skipping matched group line: >#{line}<"
|
67
|
+
|
68
|
+
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
69
|
+
group[:count] +=1
|
70
|
+
@last_group = group
|
71
|
+
## todo/fix: parse group line!!!
|
72
|
+
elsif try_parse_game( line )
|
73
|
+
# do nothing here
|
74
|
+
else
|
75
|
+
logger.warn "skipping line (no match found): >#{line}<"
|
76
|
+
@warns << line
|
77
|
+
end
|
78
|
+
end # lines.each
|
79
|
+
|
80
|
+
[@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
def try_parse_game( line )
|
85
|
+
# note: clone line; for possible test do NOT modify in place for now
|
86
|
+
# note: returns true if parsed, false if no match
|
87
|
+
parse_game( line.dup )
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_game( line )
|
91
|
+
logger.debug "parsing game (fixture) line: >#{line}<"
|
92
|
+
|
93
|
+
## remove all protected text runs e.g. []
|
94
|
+
## fix: add [ to end-of-line too
|
95
|
+
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
96
|
+
|
97
|
+
line = line.gsub( /\[
|
98
|
+
[^\]]+?
|
99
|
+
\]/x, '' ).strip
|
100
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
101
|
+
|
102
|
+
|
103
|
+
## split by geo (@) - remove for now
|
104
|
+
values = line.split( '@' )
|
105
|
+
line = values[0]
|
106
|
+
|
107
|
+
|
108
|
+
## try find date
|
109
|
+
date = find_date!( line, start: @start )
|
110
|
+
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
111
|
+
line = line.sub( /\[
|
112
|
+
[^\]]+?
|
113
|
+
\]/x, '' ).strip
|
114
|
+
|
115
|
+
else
|
116
|
+
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
117
|
+
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
118
|
+
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
119
|
+
[012]?[0-9]
|
120
|
+
[.:hH]
|
121
|
+
[0-9][0-9]
|
122
|
+
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
123
|
+
}x, '' ).strip
|
124
|
+
end
|
125
|
+
|
126
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
127
|
+
|
128
|
+
|
129
|
+
score = find_score!( line )
|
130
|
+
|
131
|
+
logger.debug " line: >#{line}<"
|
132
|
+
|
133
|
+
line = line.sub( /\[
|
134
|
+
[^\]]+?
|
135
|
+
\]/x, '$$' ) # note: replace first score tag with $$
|
136
|
+
line = line.gsub( /\[
|
137
|
+
[^\]]+?
|
138
|
+
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
139
|
+
|
140
|
+
## clean-up remove all text run inside () or empty () too
|
141
|
+
line = line.gsub( /\(
|
142
|
+
[^)]*?
|
143
|
+
\)/x, '' )
|
144
|
+
|
145
|
+
|
146
|
+
## check for more match separators e.g. - or vs for now
|
147
|
+
line = line.sub( / \s+
|
148
|
+
( -
|
149
|
+
| v
|
150
|
+
| vs\.? # note: allow optional dot eg. vs.
|
151
|
+
)
|
152
|
+
\s+
|
153
|
+
/ix, '$$' )
|
154
|
+
|
155
|
+
values = line.split( '$$' )
|
156
|
+
values = values.map { |value| value.strip } ## strip spaces
|
157
|
+
values = values.select { |value| !value.empty? } ## remove empty strings
|
158
|
+
|
159
|
+
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
160
|
+
|
161
|
+
if values.size == 1
|
162
|
+
puts "(auto config) try matching teams separated by spaces (2+):"
|
163
|
+
pp values
|
164
|
+
|
165
|
+
values = values[0].split( /[ ]{2,}/ )
|
166
|
+
pp values
|
167
|
+
end
|
168
|
+
|
169
|
+
return false if values.size != 2
|
170
|
+
|
171
|
+
puts "(auto config) try matching teams:"
|
172
|
+
pp values
|
173
|
+
|
174
|
+
@teams[ values[0] ] += 1 ## update usage counters
|
175
|
+
@teams[ values[1] ] += 1
|
176
|
+
|
177
|
+
@last_round[ :match_count ] += 1 if @last_round
|
178
|
+
@last_group[ :match_count ] += 1 if @last_group
|
179
|
+
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
def find_score!( line )
|
186
|
+
# note: always call after find_dates !!!
|
187
|
+
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
188
|
+
# -- note: score might have two digits too
|
189
|
+
ScoreFormats.find!( line )
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_date!( line, start: )
|
193
|
+
## NB: lets us pass in start_at/end_at date (for event)
|
194
|
+
# for auto-complete year
|
195
|
+
|
196
|
+
# extract date from line
|
197
|
+
# and return it
|
198
|
+
# NB: side effect - removes date from line string
|
199
|
+
DateFormats.find!( line, start: start )
|
200
|
+
end
|
201
|
+
end # class AutoConfParser
|
202
|
+
end # module SportDb
|
@@ -1,374 +1,374 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
class Package
|
4
|
-
|
5
|
-
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
|
-
## e.g. .TXT and .txt
|
7
|
-
## yes!! use /i option!!!!!
|
8
|
-
|
9
|
-
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
10
|
-
\.conf\.txt$
|
11
|
-
}x
|
12
|
-
|
13
|
-
## leagues.txt or leagues_en.txt
|
14
|
-
## remove support for en.leagues.txt - why? why not?
|
15
|
-
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
16
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
|
17
|
-
leagues
|
18
|
-
(?:_[a-z0-9_-]+)?
|
19
|
-
\.txt$
|
20
|
-
}x
|
21
|
-
|
22
|
-
## seasons.txt or seasons_en.txt
|
23
|
-
## remove support for br.seasons.txt - why? why not?
|
24
|
-
SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
|
26
|
-
seasons
|
27
|
-
(?:_[a-z0-9_-]+)?
|
28
|
-
\.txt$
|
29
|
-
}x
|
30
|
-
|
31
|
-
|
32
|
-
## clubs.txt or clubs_en.txt
|
33
|
-
## remove support for en.clubs.txt - why? why not?
|
34
|
-
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
35
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
36
|
-
clubs
|
37
|
-
(?:_[a-z0-9_-]+)?
|
38
|
-
\.txt$
|
39
|
-
}x
|
40
|
-
|
41
|
-
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
42
|
-
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
43
|
-
clubs
|
44
|
-
(?:_[a-z0-9_-]+)?
|
45
|
-
\.wiki\.txt$
|
46
|
-
}x
|
47
|
-
|
48
|
-
## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
|
49
|
-
CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
50
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
51
|
-
clubs
|
52
|
-
(?:_[a-z0-9_-]+)?
|
53
|
-
\.props\.txt$
|
54
|
-
}x
|
55
|
-
CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
|
56
|
-
|
57
|
-
|
58
|
-
CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
59
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
|
60
|
-
clubs
|
61
|
-
(?:_[a-z0-9_-]+)?
|
62
|
-
\.history\.txt$
|
63
|
-
}x
|
64
|
-
|
65
|
-
## teams.txt or teams_history.txt
|
66
|
-
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
67
|
-
teams
|
68
|
-
(?:_[a-z0-9_-]+)?
|
69
|
-
\.txt$
|
70
|
-
}x
|
71
|
-
|
72
|
-
|
73
|
-
### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
|
74
|
-
### season folder:
|
75
|
-
## e.g. /2019-20 or
|
76
|
-
## year-only e.g. /2019 or
|
77
|
-
## /2016--france
|
78
|
-
SEASON_RE = %r{ (?:
|
79
|
-
\d{4}-\d{2}
|
80
|
-
| \d{4}(--[a-z0-9_-]+)?
|
81
|
-
)
|
82
|
-
}x
|
83
|
-
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
84
|
-
|
85
|
-
|
86
|
-
## note: if pattern includes directory add here
|
87
|
-
## (otherwise move to more "generic" datafile) - why? why not?
|
88
|
-
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
89
|
-
#{SEASON}
|
90
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
91
|
-
}x
|
92
|
-
|
93
|
-
MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
94
|
-
#{SEASON}
|
95
|
-
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
96
|
-
}x
|
97
|
-
|
98
|
-
### add "generic" pattern to find all csv datafiles
|
99
|
-
CSV_RE = %r{ (?: ^|/ )
|
100
|
-
[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
101
|
-
}x
|
102
|
-
|
103
|
-
|
104
|
-
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
105
|
-
|
106
|
-
def self.find( path, pattern )
|
107
|
-
datafiles = []
|
108
|
-
|
109
|
-
## check all txt files
|
110
|
-
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
111
|
-
candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
|
112
|
-
pp candidates
|
113
|
-
candidates.each do |candidate|
|
114
|
-
datafiles << candidate if pattern.match( candidate )
|
115
|
-
end
|
116
|
-
|
117
|
-
pp datafiles
|
118
|
-
datafiles
|
119
|
-
end
|
120
|
-
|
121
|
-
|
122
|
-
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
123
|
-
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
124
|
-
|
125
|
-
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
126
|
-
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
127
|
-
def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
|
128
|
-
|
129
|
-
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
130
|
-
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
131
|
-
def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
|
132
|
-
def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
|
133
|
-
|
134
|
-
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
135
|
-
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
136
|
-
|
137
|
-
def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
|
138
|
-
def self.match_seasons( path ) SEASONS_RE.match( path ); end
|
139
|
-
|
140
|
-
|
141
|
-
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
142
|
-
def self.match_conf( path ) CONF_RE.match( path ); end
|
143
|
-
|
144
|
-
def self.find_match( path, format: 'txt' )
|
145
|
-
if format == 'csv'
|
146
|
-
find( path, MATCH_CSV_RE )
|
147
|
-
else ## otherwise always assume txt for now
|
148
|
-
find( path, MATCH_RE )
|
149
|
-
end
|
150
|
-
end
|
151
|
-
## add match_match and match_match_csv - why? why not?
|
152
|
-
|
153
|
-
|
154
|
-
class << self
|
155
|
-
alias_method :match_teams?, :match_teams
|
156
|
-
alias_method :teams?, :match_teams
|
157
|
-
|
158
|
-
alias_method :match_clubs?, :match_clubs
|
159
|
-
alias_method :clubs?, :match_clubs
|
160
|
-
|
161
|
-
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
162
|
-
alias_method :clubs_wiki?, :match_clubs_wiki
|
163
|
-
|
164
|
-
alias_method :match_clubs_history?, :match_clubs_history
|
165
|
-
alias_method :clubs_history?, :match_clubs_history
|
166
|
-
|
167
|
-
alias_method :match_club_props, :match_clubs_props
|
168
|
-
alias_method :match_club_props?, :match_clubs_props
|
169
|
-
alias_method :club_props?, :match_clubs_props
|
170
|
-
alias_method :match_clubs_props?, :match_clubs_props
|
171
|
-
alias_method :clubs_props?, :match_clubs_props
|
172
|
-
|
173
|
-
alias_method :match_leagues?, :match_leagues
|
174
|
-
alias_method :leagues?, :match_leagues
|
175
|
-
|
176
|
-
alias_method :match_seasons?, :match_seasons
|
177
|
-
alias_method :seasons?, :match_seasons
|
178
|
-
|
179
|
-
alias_method :match_conf?, :match_conf
|
180
|
-
alias_method :conf?, :match_conf
|
181
|
-
end
|
182
|
-
|
183
|
-
|
184
|
-
## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
|
185
|
-
attr_accessor :include, :exclude
|
186
|
-
|
187
|
-
## private helpers - like select returns true for keeping and false for skipping entry
|
188
|
-
def filter_clause( filter, entry )
|
189
|
-
if filter.is_a?( String )
|
190
|
-
entry.name.index( filter ) ? true : false
|
191
|
-
elsif filter.is_a?( Regexp )
|
192
|
-
filter.match( entry.name ) ? true : false
|
193
|
-
else ## assume
|
194
|
-
## todo/check: pass in entry (and NOT entry.name) - why? why not?
|
195
|
-
filter.call( entry )
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
def filter( entry )
|
200
|
-
if @include
|
201
|
-
if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
|
202
|
-
true ## todo/check: check for exclude here too - why? why not?
|
203
|
-
else
|
204
|
-
false
|
205
|
-
end
|
206
|
-
else
|
207
|
-
if @exclude && filter_clause( @exclude, entry )
|
208
|
-
false
|
209
|
-
else
|
210
|
-
true
|
211
|
-
end
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
|
216
|
-
def initialize( path_or_pack )
|
217
|
-
@include = nil
|
218
|
-
@exclude = nil
|
219
|
-
|
220
|
-
if path_or_pack.is_a?( Datafile::Package )
|
221
|
-
@pack = path_or_pack
|
222
|
-
else ## assume it's a (string) path
|
223
|
-
path = path_or_pack
|
224
|
-
if !File.exist?( path ) ## file or directory
|
225
|
-
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
226
|
-
exit 1
|
227
|
-
end
|
228
|
-
|
229
|
-
if File.directory?( path )
|
230
|
-
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
231
|
-
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
232
|
-
@pack = Datafile::ZipPackage.new( path )
|
233
|
-
else
|
234
|
-
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
235
|
-
exit 1
|
236
|
-
end
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
|
241
|
-
def each( pattern:, &blk )
|
242
|
-
@pack.each( pattern: pattern ) do |entry|
|
243
|
-
next unless filter( entry ) ## lets you use include/exclude filters
|
244
|
-
blk.call( entry )
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
249
|
-
def each_match( format: 'txt', &blk )
|
250
|
-
if format == 'csv'
|
251
|
-
each( pattern: MATCH_CSV_RE, &blk );
|
252
|
-
else
|
253
|
-
each( pattern: MATCH_RE, &blk );
|
254
|
-
end
|
255
|
-
end
|
256
|
-
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
257
|
-
def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
|
258
|
-
|
259
|
-
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
260
|
-
|
261
|
-
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
262
|
-
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
263
|
-
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
264
|
-
def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
|
265
|
-
|
266
|
-
def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
|
267
|
-
|
268
|
-
|
269
|
-
## return all match datafile entries
|
270
|
-
def match( format: 'txt' )
|
271
|
-
ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
|
272
|
-
end
|
273
|
-
alias_method :matches, :match
|
274
|
-
|
275
|
-
|
276
|
-
## todo/check: rename/change to match_by_dir - why? why not?
|
277
|
-
## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
|
278
|
-
def match_by_season_dir( format: 'txt' )
|
279
|
-
##
|
280
|
-
## [["1950s/1956-57",
|
281
|
-
## ["1950s/1956-57/1-division1.csv",
|
282
|
-
## "1950s/1956-57/2-division2.csv",
|
283
|
-
## "1950s/1956-57/3a-division3n.csv",
|
284
|
-
## "1950s/1956-57/3b-division3s.csv"]],
|
285
|
-
## ...]
|
286
|
-
|
287
|
-
h = {}
|
288
|
-
match( format: format ).each do |entry|
|
289
|
-
season_path = File.dirname( entry.name )
|
290
|
-
|
291
|
-
h[ season_path ] ||= []
|
292
|
-
h[ season_path ] << entry
|
293
|
-
end
|
294
|
-
|
295
|
-
## todo/fix: - add sort entries by name - why? why not?
|
296
|
-
## note: assume 1-,2- etc. gets us back sorted leagues
|
297
|
-
## - use sort. (will not sort by default?)
|
298
|
-
|
299
|
-
h.to_a ## return as array (or keep hash) - why? why not?
|
300
|
-
end # method match_by_season_dir
|
301
|
-
|
302
|
-
def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
|
303
|
-
|
304
|
-
## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
|
305
|
-
|
306
|
-
## note: fold all sames seasons (even if in different directories)
|
307
|
-
## into same datafile list e.g.
|
308
|
-
## ["1957/58",
|
309
|
-
## ["1950s/1957-58/1-division1.csv",
|
310
|
-
## "1950s/1957-58/2-division2.csv",
|
311
|
-
## "1950s/1957-58/3a-division3n.csv",
|
312
|
-
## "1950s/1957-58/3b-division3s.csv"]],
|
313
|
-
## and
|
314
|
-
## ["1957/58",
|
315
|
-
## ["archives/1950s/1957-58/1-division1.csv",
|
316
|
-
## "archives/1950s/1957-58/2-division2.csv",
|
317
|
-
## "archives/1950s/1957-58/3a-division3n.csv",
|
318
|
-
## "archives/1950s/1957-58/3b-division3s.csv"]],
|
319
|
-
## should be together - why? why not?
|
320
|
-
|
321
|
-
####
|
322
|
-
# Example package:
|
323
|
-
# [["2012/13", ["2012-13/1-proleague.csv"]],
|
324
|
-
# ["2013/14", ["2013-14/1-proleague.csv"]],
|
325
|
-
# ["2014/15", ["2014-15/1-proleague.csv"]],
|
326
|
-
# ["2015/16", ["2015-16/1-proleague.csv"]],
|
327
|
-
# ["2016/17", ["2016-17/1-proleague.csv"]],
|
328
|
-
# ["2017/18", ["2017-18/1-proleague.csv"]]]
|
329
|
-
|
330
|
-
## todo/fix: (re)use a more generic filter instead of start for start of season only
|
331
|
-
|
332
|
-
## todo/fix: use a "generic" filter_season helper for easy reuse
|
333
|
-
## filter_season( clause, season_key )
|
334
|
-
## or better filter = SeasonFilter.new( clause )
|
335
|
-
## filter.skip? filter.include? ( season_sason_key )?
|
336
|
-
## fiteer.before?( season_key ) etc.
|
337
|
-
## find some good method names!!!!
|
338
|
-
season_start = start ? Season( start ) : nil
|
339
|
-
|
340
|
-
h = {}
|
341
|
-
match( format: format ).each do |entry|
|
342
|
-
## note: assume last directory in datafile path is the season part/key
|
343
|
-
season_q = File.basename( File.dirname( entry.name ))
|
344
|
-
season = Season.parse( season_q ) ## normalize season
|
345
|
-
|
346
|
-
## skip if start season before this season
|
347
|
-
next if season_start && season_start.start_year > season.start_year
|
348
|
-
|
349
|
-
h[ season.key ] ||= []
|
350
|
-
h[ season.key ] << entry
|
351
|
-
end
|
352
|
-
|
353
|
-
## todo/fix: - add sort entries by name - why? why not?
|
354
|
-
## note: assume 1-,2- etc. gets us back sorted leagues
|
355
|
-
## - use sort. (will not sort by default?)
|
356
|
-
|
357
|
-
## sort by season
|
358
|
-
## latest / newest first (and oldest last)
|
359
|
-
|
360
|
-
h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
|
361
|
-
r[0] <=> l[0]
|
362
|
-
end
|
363
|
-
end # method match_by_season
|
364
|
-
end # class Package
|
365
|
-
|
366
|
-
|
367
|
-
class DirPackage < Package
|
368
|
-
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
369
|
-
end
|
370
|
-
|
371
|
-
class ZipPackage < Package
|
372
|
-
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
373
|
-
end
|
374
|
-
end # module SportDb
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Package
|
4
|
+
|
5
|
+
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
|
+
## e.g. .TXT and .txt
|
7
|
+
## yes!! use /i option!!!!!
|
8
|
+
|
9
|
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
10
|
+
\.conf\.txt$
|
11
|
+
}x
|
12
|
+
|
13
|
+
## leagues.txt or leagues_en.txt
|
14
|
+
## remove support for en.leagues.txt - why? why not?
|
15
|
+
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
16
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
|
17
|
+
leagues
|
18
|
+
(?:_[a-z0-9_-]+)?
|
19
|
+
\.txt$
|
20
|
+
}x
|
21
|
+
|
22
|
+
## seasons.txt or seasons_en.txt
|
23
|
+
## remove support for br.seasons.txt - why? why not?
|
24
|
+
SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
|
26
|
+
seasons
|
27
|
+
(?:_[a-z0-9_-]+)?
|
28
|
+
\.txt$
|
29
|
+
}x
|
30
|
+
|
31
|
+
|
32
|
+
## clubs.txt or clubs_en.txt
|
33
|
+
## remove support for en.clubs.txt - why? why not?
|
34
|
+
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
35
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
36
|
+
clubs
|
37
|
+
(?:_[a-z0-9_-]+)?
|
38
|
+
\.txt$
|
39
|
+
}x
|
40
|
+
|
41
|
+
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
42
|
+
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
43
|
+
clubs
|
44
|
+
(?:_[a-z0-9_-]+)?
|
45
|
+
\.wiki\.txt$
|
46
|
+
}x
|
47
|
+
|
48
|
+
## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
|
49
|
+
CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
50
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
51
|
+
clubs
|
52
|
+
(?:_[a-z0-9_-]+)?
|
53
|
+
\.props\.txt$
|
54
|
+
}x
|
55
|
+
CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
|
56
|
+
|
57
|
+
|
58
|
+
CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
59
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
|
60
|
+
clubs
|
61
|
+
(?:_[a-z0-9_-]+)?
|
62
|
+
\.history\.txt$
|
63
|
+
}x
|
64
|
+
|
65
|
+
## teams.txt or teams_history.txt
|
66
|
+
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
67
|
+
teams
|
68
|
+
(?:_[a-z0-9_-]+)?
|
69
|
+
\.txt$
|
70
|
+
}x
|
71
|
+
|
72
|
+
|
73
|
+
### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
|
74
|
+
### season folder:
|
75
|
+
## e.g. /2019-20 or
|
76
|
+
## year-only e.g. /2019 or
|
77
|
+
## /2016--france
|
78
|
+
SEASON_RE = %r{ (?:
|
79
|
+
\d{4}-\d{2}
|
80
|
+
| \d{4}(--[a-z0-9_-]+)?
|
81
|
+
)
|
82
|
+
}x
|
83
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
84
|
+
|
85
|
+
|
86
|
+
## note: if pattern includes directory add here
|
87
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
88
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
89
|
+
#{SEASON}
|
90
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
91
|
+
}x
|
92
|
+
|
93
|
+
MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
94
|
+
#{SEASON}
|
95
|
+
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
96
|
+
}x
|
97
|
+
|
98
|
+
### add "generic" pattern to find all csv datafiles
|
99
|
+
CSV_RE = %r{ (?: ^|/ )
|
100
|
+
[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
101
|
+
}x
|
102
|
+
|
103
|
+
|
104
|
+
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
105
|
+
|
106
|
+
def self.find( path, pattern )
|
107
|
+
datafiles = []
|
108
|
+
|
109
|
+
## check all txt files
|
110
|
+
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
111
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
|
112
|
+
pp candidates
|
113
|
+
candidates.each do |candidate|
|
114
|
+
datafiles << candidate if pattern.match( candidate )
|
115
|
+
end
|
116
|
+
|
117
|
+
pp datafiles
|
118
|
+
datafiles
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
123
|
+
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
124
|
+
|
125
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
126
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
127
|
+
def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
|
128
|
+
|
129
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
130
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
131
|
+
def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
|
132
|
+
def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
|
133
|
+
|
134
|
+
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
135
|
+
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
136
|
+
|
137
|
+
def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
|
138
|
+
def self.match_seasons( path ) SEASONS_RE.match( path ); end
|
139
|
+
|
140
|
+
|
141
|
+
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
142
|
+
def self.match_conf( path ) CONF_RE.match( path ); end
|
143
|
+
|
144
|
+
def self.find_match( path, format: 'txt' )
|
145
|
+
if format == 'csv'
|
146
|
+
find( path, MATCH_CSV_RE )
|
147
|
+
else ## otherwise always assume txt for now
|
148
|
+
find( path, MATCH_RE )
|
149
|
+
end
|
150
|
+
end
|
151
|
+
## add match_match and match_match_csv - why? why not?
|
152
|
+
|
153
|
+
|
154
|
+
class << self
|
155
|
+
alias_method :match_teams?, :match_teams
|
156
|
+
alias_method :teams?, :match_teams
|
157
|
+
|
158
|
+
alias_method :match_clubs?, :match_clubs
|
159
|
+
alias_method :clubs?, :match_clubs
|
160
|
+
|
161
|
+
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
162
|
+
alias_method :clubs_wiki?, :match_clubs_wiki
|
163
|
+
|
164
|
+
alias_method :match_clubs_history?, :match_clubs_history
|
165
|
+
alias_method :clubs_history?, :match_clubs_history
|
166
|
+
|
167
|
+
alias_method :match_club_props, :match_clubs_props
|
168
|
+
alias_method :match_club_props?, :match_clubs_props
|
169
|
+
alias_method :club_props?, :match_clubs_props
|
170
|
+
alias_method :match_clubs_props?, :match_clubs_props
|
171
|
+
alias_method :clubs_props?, :match_clubs_props
|
172
|
+
|
173
|
+
alias_method :match_leagues?, :match_leagues
|
174
|
+
alias_method :leagues?, :match_leagues
|
175
|
+
|
176
|
+
alias_method :match_seasons?, :match_seasons
|
177
|
+
alias_method :seasons?, :match_seasons
|
178
|
+
|
179
|
+
alias_method :match_conf?, :match_conf
|
180
|
+
alias_method :conf?, :match_conf
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
|
185
|
+
attr_accessor :include, :exclude
|
186
|
+
|
187
|
+
## private helpers - like select returns true for keeping and false for skipping entry
|
188
|
+
def filter_clause( filter, entry )
|
189
|
+
if filter.is_a?( String )
|
190
|
+
entry.name.index( filter ) ? true : false
|
191
|
+
elsif filter.is_a?( Regexp )
|
192
|
+
filter.match( entry.name ) ? true : false
|
193
|
+
else ## assume
|
194
|
+
## todo/check: pass in entry (and NOT entry.name) - why? why not?
|
195
|
+
filter.call( entry )
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def filter( entry )
|
200
|
+
if @include
|
201
|
+
if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
|
202
|
+
true ## todo/check: check for exclude here too - why? why not?
|
203
|
+
else
|
204
|
+
false
|
205
|
+
end
|
206
|
+
else
|
207
|
+
if @exclude && filter_clause( @exclude, entry )
|
208
|
+
false
|
209
|
+
else
|
210
|
+
true
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
def initialize( path_or_pack )
|
217
|
+
@include = nil
|
218
|
+
@exclude = nil
|
219
|
+
|
220
|
+
if path_or_pack.is_a?( Datafile::Package )
|
221
|
+
@pack = path_or_pack
|
222
|
+
else ## assume it's a (string) path
|
223
|
+
path = path_or_pack
|
224
|
+
if !File.exist?( path ) ## file or directory
|
225
|
+
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
226
|
+
exit 1
|
227
|
+
end
|
228
|
+
|
229
|
+
if File.directory?( path )
|
230
|
+
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
231
|
+
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
232
|
+
@pack = Datafile::ZipPackage.new( path )
|
233
|
+
else
|
234
|
+
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
235
|
+
exit 1
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
|
241
|
+
def each( pattern:, &blk )
|
242
|
+
@pack.each( pattern: pattern ) do |entry|
|
243
|
+
next unless filter( entry ) ## lets you use include/exclude filters
|
244
|
+
blk.call( entry )
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
249
|
+
def each_match( format: 'txt', &blk )
|
250
|
+
if format == 'csv'
|
251
|
+
each( pattern: MATCH_CSV_RE, &blk );
|
252
|
+
else
|
253
|
+
each( pattern: MATCH_RE, &blk );
|
254
|
+
end
|
255
|
+
end
|
256
|
+
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
257
|
+
def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
|
258
|
+
|
259
|
+
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
260
|
+
|
261
|
+
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
262
|
+
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
263
|
+
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
264
|
+
def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
|
265
|
+
|
266
|
+
def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
|
267
|
+
|
268
|
+
|
269
|
+
## return all match datafile entries
|
270
|
+
def match( format: 'txt' )
|
271
|
+
ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
|
272
|
+
end
|
273
|
+
alias_method :matches, :match
|
274
|
+
|
275
|
+
|
276
|
+
## todo/check: rename/change to match_by_dir - why? why not?
|
277
|
+
## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
|
278
|
+
def match_by_season_dir( format: 'txt' )
|
279
|
+
##
|
280
|
+
## [["1950s/1956-57",
|
281
|
+
## ["1950s/1956-57/1-division1.csv",
|
282
|
+
## "1950s/1956-57/2-division2.csv",
|
283
|
+
## "1950s/1956-57/3a-division3n.csv",
|
284
|
+
## "1950s/1956-57/3b-division3s.csv"]],
|
285
|
+
## ...]
|
286
|
+
|
287
|
+
h = {}
|
288
|
+
match( format: format ).each do |entry|
|
289
|
+
season_path = File.dirname( entry.name )
|
290
|
+
|
291
|
+
h[ season_path ] ||= []
|
292
|
+
h[ season_path ] << entry
|
293
|
+
end
|
294
|
+
|
295
|
+
## todo/fix: - add sort entries by name - why? why not?
|
296
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
297
|
+
## - use sort. (will not sort by default?)
|
298
|
+
|
299
|
+
h.to_a ## return as array (or keep hash) - why? why not?
|
300
|
+
end # method match_by_season_dir
|
301
|
+
|
302
|
+
def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
|
303
|
+
|
304
|
+
## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
|
305
|
+
|
306
|
+
## note: fold all sames seasons (even if in different directories)
|
307
|
+
## into same datafile list e.g.
|
308
|
+
## ["1957/58",
|
309
|
+
## ["1950s/1957-58/1-division1.csv",
|
310
|
+
## "1950s/1957-58/2-division2.csv",
|
311
|
+
## "1950s/1957-58/3a-division3n.csv",
|
312
|
+
## "1950s/1957-58/3b-division3s.csv"]],
|
313
|
+
## and
|
314
|
+
## ["1957/58",
|
315
|
+
## ["archives/1950s/1957-58/1-division1.csv",
|
316
|
+
## "archives/1950s/1957-58/2-division2.csv",
|
317
|
+
## "archives/1950s/1957-58/3a-division3n.csv",
|
318
|
+
## "archives/1950s/1957-58/3b-division3s.csv"]],
|
319
|
+
## should be together - why? why not?
|
320
|
+
|
321
|
+
####
|
322
|
+
# Example package:
|
323
|
+
# [["2012/13", ["2012-13/1-proleague.csv"]],
|
324
|
+
# ["2013/14", ["2013-14/1-proleague.csv"]],
|
325
|
+
# ["2014/15", ["2014-15/1-proleague.csv"]],
|
326
|
+
# ["2015/16", ["2015-16/1-proleague.csv"]],
|
327
|
+
# ["2016/17", ["2016-17/1-proleague.csv"]],
|
328
|
+
# ["2017/18", ["2017-18/1-proleague.csv"]]]
|
329
|
+
|
330
|
+
## todo/fix: (re)use a more generic filter instead of start for start of season only
|
331
|
+
|
332
|
+
## todo/fix: use a "generic" filter_season helper for easy reuse
|
333
|
+
## filter_season( clause, season_key )
|
334
|
+
## or better filter = SeasonFilter.new( clause )
|
335
|
+
## filter.skip? filter.include? ( season_sason_key )?
|
336
|
+
## fiteer.before?( season_key ) etc.
|
337
|
+
## find some good method names!!!!
|
338
|
+
season_start = start ? Season( start ) : nil
|
339
|
+
|
340
|
+
h = {}
|
341
|
+
match( format: format ).each do |entry|
|
342
|
+
## note: assume last directory in datafile path is the season part/key
|
343
|
+
season_q = File.basename( File.dirname( entry.name ))
|
344
|
+
season = Season.parse( season_q ) ## normalize season
|
345
|
+
|
346
|
+
## skip if start season before this season
|
347
|
+
next if season_start && season_start.start_year > season.start_year
|
348
|
+
|
349
|
+
h[ season.key ] ||= []
|
350
|
+
h[ season.key ] << entry
|
351
|
+
end
|
352
|
+
|
353
|
+
## todo/fix: - add sort entries by name - why? why not?
|
354
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
355
|
+
## - use sort. (will not sort by default?)
|
356
|
+
|
357
|
+
## sort by season
|
358
|
+
## latest / newest first (and oldest last)
|
359
|
+
|
360
|
+
h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
|
361
|
+
r[0] <=> l[0]
|
362
|
+
end
|
363
|
+
end # method match_by_season
|
364
|
+
end # class Package
|
365
|
+
|
366
|
+
|
367
|
+
class DirPackage < Package
|
368
|
+
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
369
|
+
end
|
370
|
+
|
371
|
+
class ZipPackage < Package
|
372
|
+
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
373
|
+
end
|
374
|
+
end # module SportDb
|