sportdb-formats 1.1.5 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_index.rb +141 -141
- data/lib/sportdb/formats/event/event_reader.rb +183 -183
- data/lib/sportdb/formats/league/league_outline_reader.rb +1 -0
- data/lib/sportdb/formats/league/league_reader.rb +168 -168
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -202
- data/lib/sportdb/formats/package.rb +374 -374
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +350 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/test_club_index_history.rb +107 -107
- data/test/test_club_reader.rb +201 -201
- data/test/test_club_reader_history.rb +212 -212
- data/test/test_country_reader.rb +89 -89
- data/test/test_league_outline_reader.rb +55 -55
- data/test/test_league_reader.rb +72 -72
- data/test/test_outline_reader.rb +31 -31
- data/test/test_regex.rb +67 -67
- data/test/test_wiki_reader.rb +77 -77
- metadata +12 -6
@@ -1,202 +1,202 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
|
6
|
-
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
7
|
-
|
8
|
-
def self.parse( lines, start: )
|
9
|
-
## todo/fix: add support for txt and lines
|
10
|
-
## check if lines_or_txt is an array or just a string
|
11
|
-
parser = new( lines, start )
|
12
|
-
parser.parse
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
include Logging ## e.g. logger#debug, logger#info, etc.
|
17
|
-
include ParserHelper ## e.g. read_lines, etc.
|
18
|
-
|
19
|
-
|
20
|
-
def initialize( lines, start )
|
21
|
-
# for convenience split string into lines
|
22
|
-
## note: removes/strips empty lines
|
23
|
-
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
-
@lines = lines.is_a?( String ) ? read_lines( lines ) : lines
|
25
|
-
@start = start
|
26
|
-
end
|
27
|
-
|
28
|
-
def parse
|
29
|
-
## try to find all teams in match schedule
|
30
|
-
@last_round = nil
|
31
|
-
@last_group = nil
|
32
|
-
|
33
|
-
## definitions/defs
|
34
|
-
@round_defs = Hash.new(0)
|
35
|
-
@group_defs = Hash.new(0)
|
36
|
-
|
37
|
-
## usage/refs
|
38
|
-
@rounds = {} ## track usage counter and match (two teams) counter
|
39
|
-
@groups = {} ## -"-
|
40
|
-
@teams = Hash.new(0) ## keep track of usage counter
|
41
|
-
|
42
|
-
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
43
|
-
|
44
|
-
|
45
|
-
@lines.each do |line|
|
46
|
-
if is_goals?( line )
|
47
|
-
logger.debug "skipping matched goals line: >#{line}<"
|
48
|
-
elsif is_round_def?( line )
|
49
|
-
## todo/fix: add round definition (w begin n end date)
|
50
|
-
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
51
|
-
## -- how to deal with matches that get rescheduled/postponed?
|
52
|
-
logger.debug "skipping matched round def line: >#{line}<"
|
53
|
-
@round_defs[ line ] += 1
|
54
|
-
elsif is_round?( line )
|
55
|
-
logger.debug "skipping matched round line: >#{line}<"
|
56
|
-
|
57
|
-
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
58
|
-
round[:count] +=1
|
59
|
-
@last_round = round
|
60
|
-
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
61
|
-
### todo: add pipe (|) marker (required)
|
62
|
-
logger.debug "skipping matched group def line: >#{line}<"
|
63
|
-
@group_defs[ line ] += 1
|
64
|
-
elsif is_group?( line )
|
65
|
-
## -- lets you set group e.g. Group A etc.
|
66
|
-
logger.debug "skipping matched group line: >#{line}<"
|
67
|
-
|
68
|
-
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
69
|
-
group[:count] +=1
|
70
|
-
@last_group = group
|
71
|
-
## todo/fix: parse group line!!!
|
72
|
-
elsif try_parse_game( line )
|
73
|
-
# do nothing here
|
74
|
-
else
|
75
|
-
logger.warn "skipping line (no match found): >#{line}<"
|
76
|
-
@warns << line
|
77
|
-
end
|
78
|
-
end # lines.each
|
79
|
-
|
80
|
-
[@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
|
81
|
-
end
|
82
|
-
|
83
|
-
|
84
|
-
def try_parse_game( line )
|
85
|
-
# note: clone line; for possible test do NOT modify in place for now
|
86
|
-
# note: returns true if parsed, false if no match
|
87
|
-
parse_game( line.dup )
|
88
|
-
end
|
89
|
-
|
90
|
-
def parse_game( line )
|
91
|
-
logger.debug "parsing game (fixture) line: >#{line}<"
|
92
|
-
|
93
|
-
## remove all protected text runs e.g. []
|
94
|
-
## fix: add [ to end-of-line too
|
95
|
-
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
96
|
-
|
97
|
-
line = line.gsub( /\[
|
98
|
-
[^\]]+?
|
99
|
-
\]/x, '' ).strip
|
100
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
101
|
-
|
102
|
-
|
103
|
-
## split by geo (@) - remove for now
|
104
|
-
values = line.split( '@' )
|
105
|
-
line = values[0]
|
106
|
-
|
107
|
-
|
108
|
-
## try find date
|
109
|
-
date = find_date!( line, start: @start )
|
110
|
-
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
111
|
-
line = line.sub( /\[
|
112
|
-
[^\]]+?
|
113
|
-
\]/x, '' ).strip
|
114
|
-
|
115
|
-
else
|
116
|
-
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
117
|
-
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
118
|
-
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
119
|
-
[012]?[0-9]
|
120
|
-
[.:hH]
|
121
|
-
[0-9][0-9]
|
122
|
-
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
123
|
-
}x, '' ).strip
|
124
|
-
end
|
125
|
-
|
126
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
127
|
-
|
128
|
-
|
129
|
-
score = find_score!( line )
|
130
|
-
|
131
|
-
logger.debug " line: >#{line}<"
|
132
|
-
|
133
|
-
line = line.sub( /\[
|
134
|
-
[^\]]+?
|
135
|
-
\]/x, '$$' ) # note: replace first score tag with $$
|
136
|
-
line = line.gsub( /\[
|
137
|
-
[^\]]+?
|
138
|
-
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
139
|
-
|
140
|
-
## clean-up remove all text run inside () or empty () too
|
141
|
-
line = line.gsub( /\(
|
142
|
-
[^)]*?
|
143
|
-
\)/x, '' )
|
144
|
-
|
145
|
-
|
146
|
-
## check for more match separators e.g. - or vs for now
|
147
|
-
line = line.sub( / \s+
|
148
|
-
( -
|
149
|
-
| v
|
150
|
-
| vs\.? # note: allow optional dot eg. vs.
|
151
|
-
)
|
152
|
-
\s+
|
153
|
-
/ix, '$$' )
|
154
|
-
|
155
|
-
values = line.split( '$$' )
|
156
|
-
values = values.map { |value| value.strip } ## strip spaces
|
157
|
-
values = values.select { |value| !value.empty? } ## remove empty strings
|
158
|
-
|
159
|
-
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
160
|
-
|
161
|
-
if values.size == 1
|
162
|
-
puts "(auto config) try matching teams separated by spaces (2+):"
|
163
|
-
pp values
|
164
|
-
|
165
|
-
values = values[0].split( /[ ]{2,}/ )
|
166
|
-
pp values
|
167
|
-
end
|
168
|
-
|
169
|
-
return false if values.size != 2
|
170
|
-
|
171
|
-
puts "(auto config) try matching teams:"
|
172
|
-
pp values
|
173
|
-
|
174
|
-
@teams[ values[0] ] += 1 ## update usage counters
|
175
|
-
@teams[ values[1] ] += 1
|
176
|
-
|
177
|
-
@last_round[ :match_count ] += 1 if @last_round
|
178
|
-
@last_group[ :match_count ] += 1 if @last_group
|
179
|
-
|
180
|
-
true
|
181
|
-
end
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
def find_score!( line )
|
186
|
-
# note: always call after find_dates !!!
|
187
|
-
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
188
|
-
# -- note: score might have two digits too
|
189
|
-
ScoreFormats.find!( line )
|
190
|
-
end
|
191
|
-
|
192
|
-
def find_date!( line, start: )
|
193
|
-
## NB: lets us pass in start_at/end_at date (for event)
|
194
|
-
# for auto-complete year
|
195
|
-
|
196
|
-
# extract date from line
|
197
|
-
# and return it
|
198
|
-
# NB: side effect - removes date from line string
|
199
|
-
DateFormats.find!( line, start: start )
|
200
|
-
end
|
201
|
-
end # class AutoConfParser
|
202
|
-
end # module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
|
6
|
+
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
7
|
+
|
8
|
+
def self.parse( lines, start: )
|
9
|
+
## todo/fix: add support for txt and lines
|
10
|
+
## check if lines_or_txt is an array or just a string
|
11
|
+
parser = new( lines, start )
|
12
|
+
parser.parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
include Logging ## e.g. logger#debug, logger#info, etc.
|
17
|
+
include ParserHelper ## e.g. read_lines, etc.
|
18
|
+
|
19
|
+
|
20
|
+
def initialize( lines, start )
|
21
|
+
# for convenience split string into lines
|
22
|
+
## note: removes/strips empty lines
|
23
|
+
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
+
@lines = lines.is_a?( String ) ? read_lines( lines ) : lines
|
25
|
+
@start = start
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
## try to find all teams in match schedule
|
30
|
+
@last_round = nil
|
31
|
+
@last_group = nil
|
32
|
+
|
33
|
+
## definitions/defs
|
34
|
+
@round_defs = Hash.new(0)
|
35
|
+
@group_defs = Hash.new(0)
|
36
|
+
|
37
|
+
## usage/refs
|
38
|
+
@rounds = {} ## track usage counter and match (two teams) counter
|
39
|
+
@groups = {} ## -"-
|
40
|
+
@teams = Hash.new(0) ## keep track of usage counter
|
41
|
+
|
42
|
+
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
43
|
+
|
44
|
+
|
45
|
+
@lines.each do |line|
|
46
|
+
if is_goals?( line )
|
47
|
+
logger.debug "skipping matched goals line: >#{line}<"
|
48
|
+
elsif is_round_def?( line )
|
49
|
+
## todo/fix: add round definition (w begin n end date)
|
50
|
+
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
51
|
+
## -- how to deal with matches that get rescheduled/postponed?
|
52
|
+
logger.debug "skipping matched round def line: >#{line}<"
|
53
|
+
@round_defs[ line ] += 1
|
54
|
+
elsif is_round?( line )
|
55
|
+
logger.debug "skipping matched round line: >#{line}<"
|
56
|
+
|
57
|
+
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
58
|
+
round[:count] +=1
|
59
|
+
@last_round = round
|
60
|
+
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
61
|
+
### todo: add pipe (|) marker (required)
|
62
|
+
logger.debug "skipping matched group def line: >#{line}<"
|
63
|
+
@group_defs[ line ] += 1
|
64
|
+
elsif is_group?( line )
|
65
|
+
## -- lets you set group e.g. Group A etc.
|
66
|
+
logger.debug "skipping matched group line: >#{line}<"
|
67
|
+
|
68
|
+
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
69
|
+
group[:count] +=1
|
70
|
+
@last_group = group
|
71
|
+
## todo/fix: parse group line!!!
|
72
|
+
elsif try_parse_game( line )
|
73
|
+
# do nothing here
|
74
|
+
else
|
75
|
+
logger.warn "skipping line (no match found): >#{line}<"
|
76
|
+
@warns << line
|
77
|
+
end
|
78
|
+
end # lines.each
|
79
|
+
|
80
|
+
[@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
def try_parse_game( line )
|
85
|
+
# note: clone line; for possible test do NOT modify in place for now
|
86
|
+
# note: returns true if parsed, false if no match
|
87
|
+
parse_game( line.dup )
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_game( line )
|
91
|
+
logger.debug "parsing game (fixture) line: >#{line}<"
|
92
|
+
|
93
|
+
## remove all protected text runs e.g. []
|
94
|
+
## fix: add [ to end-of-line too
|
95
|
+
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
96
|
+
|
97
|
+
line = line.gsub( /\[
|
98
|
+
[^\]]+?
|
99
|
+
\]/x, '' ).strip
|
100
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
101
|
+
|
102
|
+
|
103
|
+
## split by geo (@) - remove for now
|
104
|
+
values = line.split( '@' )
|
105
|
+
line = values[0]
|
106
|
+
|
107
|
+
|
108
|
+
## try find date
|
109
|
+
date = find_date!( line, start: @start )
|
110
|
+
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
111
|
+
line = line.sub( /\[
|
112
|
+
[^\]]+?
|
113
|
+
\]/x, '' ).strip
|
114
|
+
|
115
|
+
else
|
116
|
+
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
117
|
+
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
118
|
+
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
119
|
+
[012]?[0-9]
|
120
|
+
[.:hH]
|
121
|
+
[0-9][0-9]
|
122
|
+
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
123
|
+
}x, '' ).strip
|
124
|
+
end
|
125
|
+
|
126
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
127
|
+
|
128
|
+
|
129
|
+
score = find_score!( line )
|
130
|
+
|
131
|
+
logger.debug " line: >#{line}<"
|
132
|
+
|
133
|
+
line = line.sub( /\[
|
134
|
+
[^\]]+?
|
135
|
+
\]/x, '$$' ) # note: replace first score tag with $$
|
136
|
+
line = line.gsub( /\[
|
137
|
+
[^\]]+?
|
138
|
+
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
139
|
+
|
140
|
+
## clean-up remove all text run inside () or empty () too
|
141
|
+
line = line.gsub( /\(
|
142
|
+
[^)]*?
|
143
|
+
\)/x, '' )
|
144
|
+
|
145
|
+
|
146
|
+
## check for more match separators e.g. - or vs for now
|
147
|
+
line = line.sub( / \s+
|
148
|
+
( -
|
149
|
+
| v
|
150
|
+
| vs\.? # note: allow optional dot eg. vs.
|
151
|
+
)
|
152
|
+
\s+
|
153
|
+
/ix, '$$' )
|
154
|
+
|
155
|
+
values = line.split( '$$' )
|
156
|
+
values = values.map { |value| value.strip } ## strip spaces
|
157
|
+
values = values.select { |value| !value.empty? } ## remove empty strings
|
158
|
+
|
159
|
+
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
160
|
+
|
161
|
+
if values.size == 1
|
162
|
+
puts "(auto config) try matching teams separated by spaces (2+):"
|
163
|
+
pp values
|
164
|
+
|
165
|
+
values = values[0].split( /[ ]{2,}/ )
|
166
|
+
pp values
|
167
|
+
end
|
168
|
+
|
169
|
+
return false if values.size != 2
|
170
|
+
|
171
|
+
puts "(auto config) try matching teams:"
|
172
|
+
pp values
|
173
|
+
|
174
|
+
@teams[ values[0] ] += 1 ## update usage counters
|
175
|
+
@teams[ values[1] ] += 1
|
176
|
+
|
177
|
+
@last_round[ :match_count ] += 1 if @last_round
|
178
|
+
@last_group[ :match_count ] += 1 if @last_group
|
179
|
+
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
def find_score!( line )
|
186
|
+
# note: always call after find_dates !!!
|
187
|
+
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
188
|
+
# -- note: score might have two digits too
|
189
|
+
ScoreFormats.find!( line )
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_date!( line, start: )
|
193
|
+
## NB: lets us pass in start_at/end_at date (for event)
|
194
|
+
# for auto-complete year
|
195
|
+
|
196
|
+
# extract date from line
|
197
|
+
# and return it
|
198
|
+
# NB: side effect - removes date from line string
|
199
|
+
DateFormats.find!( line, start: start )
|
200
|
+
end
|
201
|
+
end # class AutoConfParser
|
202
|
+
end # module SportDb
|
@@ -1,374 +1,374 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
class Package
|
4
|
-
|
5
|
-
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
|
-
## e.g. .TXT and .txt
|
7
|
-
## yes!! use /i option!!!!!
|
8
|
-
|
9
|
-
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
10
|
-
\.conf\.txt$
|
11
|
-
}x
|
12
|
-
|
13
|
-
## leagues.txt or leagues_en.txt
|
14
|
-
## remove support for en.leagues.txt - why? why not?
|
15
|
-
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
16
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
|
17
|
-
leagues
|
18
|
-
(?:_[a-z0-9_-]+)?
|
19
|
-
\.txt$
|
20
|
-
}x
|
21
|
-
|
22
|
-
## seasons.txt or seasons_en.txt
|
23
|
-
## remove support for br.seasons.txt - why? why not?
|
24
|
-
SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
|
26
|
-
seasons
|
27
|
-
(?:_[a-z0-9_-]+)?
|
28
|
-
\.txt$
|
29
|
-
}x
|
30
|
-
|
31
|
-
|
32
|
-
## clubs.txt or clubs_en.txt
|
33
|
-
## remove support for en.clubs.txt - why? why not?
|
34
|
-
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
35
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
36
|
-
clubs
|
37
|
-
(?:_[a-z0-9_-]+)?
|
38
|
-
\.txt$
|
39
|
-
}x
|
40
|
-
|
41
|
-
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
42
|
-
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
43
|
-
clubs
|
44
|
-
(?:_[a-z0-9_-]+)?
|
45
|
-
\.wiki\.txt$
|
46
|
-
}x
|
47
|
-
|
48
|
-
## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
|
49
|
-
CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
50
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
51
|
-
clubs
|
52
|
-
(?:_[a-z0-9_-]+)?
|
53
|
-
\.props\.txt$
|
54
|
-
}x
|
55
|
-
CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
|
56
|
-
|
57
|
-
|
58
|
-
CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
59
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
|
60
|
-
clubs
|
61
|
-
(?:_[a-z0-9_-]+)?
|
62
|
-
\.history\.txt$
|
63
|
-
}x
|
64
|
-
|
65
|
-
## teams.txt or teams_history.txt
|
66
|
-
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
67
|
-
teams
|
68
|
-
(?:_[a-z0-9_-]+)?
|
69
|
-
\.txt$
|
70
|
-
}x
|
71
|
-
|
72
|
-
|
73
|
-
### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
|
74
|
-
### season folder:
|
75
|
-
## e.g. /2019-20 or
|
76
|
-
## year-only e.g. /2019 or
|
77
|
-
## /2016--france
|
78
|
-
SEASON_RE = %r{ (?:
|
79
|
-
\d{4}-\d{2}
|
80
|
-
| \d{4}(--[a-z0-9_-]+)?
|
81
|
-
)
|
82
|
-
}x
|
83
|
-
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
84
|
-
|
85
|
-
|
86
|
-
## note: if pattern includes directory add here
|
87
|
-
## (otherwise move to more "generic" datafile) - why? why not?
|
88
|
-
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
89
|
-
#{SEASON}
|
90
|
-
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
91
|
-
}x
|
92
|
-
|
93
|
-
MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
94
|
-
#{SEASON}
|
95
|
-
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
96
|
-
}x
|
97
|
-
|
98
|
-
### add "generic" pattern to find all csv datafiles
|
99
|
-
CSV_RE = %r{ (?: ^|/ )
|
100
|
-
[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
101
|
-
}x
|
102
|
-
|
103
|
-
|
104
|
-
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
105
|
-
|
106
|
-
def self.find( path, pattern )
|
107
|
-
datafiles = []
|
108
|
-
|
109
|
-
## check all txt files
|
110
|
-
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
111
|
-
candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
|
112
|
-
pp candidates
|
113
|
-
candidates.each do |candidate|
|
114
|
-
datafiles << candidate if pattern.match( candidate )
|
115
|
-
end
|
116
|
-
|
117
|
-
pp datafiles
|
118
|
-
datafiles
|
119
|
-
end
|
120
|
-
|
121
|
-
|
122
|
-
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
123
|
-
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
124
|
-
|
125
|
-
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
126
|
-
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
127
|
-
def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
|
128
|
-
|
129
|
-
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
130
|
-
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
131
|
-
def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
|
132
|
-
def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
|
133
|
-
|
134
|
-
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
135
|
-
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
136
|
-
|
137
|
-
def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
|
138
|
-
def self.match_seasons( path ) SEASONS_RE.match( path ); end
|
139
|
-
|
140
|
-
|
141
|
-
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
142
|
-
def self.match_conf( path ) CONF_RE.match( path ); end
|
143
|
-
|
144
|
-
def self.find_match( path, format: 'txt' )
|
145
|
-
if format == 'csv'
|
146
|
-
find( path, MATCH_CSV_RE )
|
147
|
-
else ## otherwise always assume txt for now
|
148
|
-
find( path, MATCH_RE )
|
149
|
-
end
|
150
|
-
end
|
151
|
-
## add match_match and match_match_csv - why? why not?
|
152
|
-
|
153
|
-
|
154
|
-
class << self
|
155
|
-
alias_method :match_teams?, :match_teams
|
156
|
-
alias_method :teams?, :match_teams
|
157
|
-
|
158
|
-
alias_method :match_clubs?, :match_clubs
|
159
|
-
alias_method :clubs?, :match_clubs
|
160
|
-
|
161
|
-
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
162
|
-
alias_method :clubs_wiki?, :match_clubs_wiki
|
163
|
-
|
164
|
-
alias_method :match_clubs_history?, :match_clubs_history
|
165
|
-
alias_method :clubs_history?, :match_clubs_history
|
166
|
-
|
167
|
-
alias_method :match_club_props, :match_clubs_props
|
168
|
-
alias_method :match_club_props?, :match_clubs_props
|
169
|
-
alias_method :club_props?, :match_clubs_props
|
170
|
-
alias_method :match_clubs_props?, :match_clubs_props
|
171
|
-
alias_method :clubs_props?, :match_clubs_props
|
172
|
-
|
173
|
-
alias_method :match_leagues?, :match_leagues
|
174
|
-
alias_method :leagues?, :match_leagues
|
175
|
-
|
176
|
-
alias_method :match_seasons?, :match_seasons
|
177
|
-
alias_method :seasons?, :match_seasons
|
178
|
-
|
179
|
-
alias_method :match_conf?, :match_conf
|
180
|
-
alias_method :conf?, :match_conf
|
181
|
-
end
|
182
|
-
|
183
|
-
|
184
|
-
## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
|
185
|
-
attr_accessor :include, :exclude
|
186
|
-
|
187
|
-
## private helpers - like select returns true for keeping and false for skipping entry
|
188
|
-
def filter_clause( filter, entry )
|
189
|
-
if filter.is_a?( String )
|
190
|
-
entry.name.index( filter ) ? true : false
|
191
|
-
elsif filter.is_a?( Regexp )
|
192
|
-
filter.match( entry.name ) ? true : false
|
193
|
-
else ## assume
|
194
|
-
## todo/check: pass in entry (and NOT entry.name) - why? why not?
|
195
|
-
filter.call( entry )
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
def filter( entry )
|
200
|
-
if @include
|
201
|
-
if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
|
202
|
-
true ## todo/check: check for exclude here too - why? why not?
|
203
|
-
else
|
204
|
-
false
|
205
|
-
end
|
206
|
-
else
|
207
|
-
if @exclude && filter_clause( @exclude, entry )
|
208
|
-
false
|
209
|
-
else
|
210
|
-
true
|
211
|
-
end
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
|
216
|
-
def initialize( path_or_pack )
|
217
|
-
@include = nil
|
218
|
-
@exclude = nil
|
219
|
-
|
220
|
-
if path_or_pack.is_a?( Datafile::Package )
|
221
|
-
@pack = path_or_pack
|
222
|
-
else ## assume it's a (string) path
|
223
|
-
path = path_or_pack
|
224
|
-
if !File.exist?( path ) ## file or directory
|
225
|
-
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
226
|
-
exit 1
|
227
|
-
end
|
228
|
-
|
229
|
-
if File.directory?( path )
|
230
|
-
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
231
|
-
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
232
|
-
@pack = Datafile::ZipPackage.new( path )
|
233
|
-
else
|
234
|
-
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
235
|
-
exit 1
|
236
|
-
end
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
|
241
|
-
def each( pattern:, &blk )
|
242
|
-
@pack.each( pattern: pattern ) do |entry|
|
243
|
-
next unless filter( entry ) ## lets you use include/exclude filters
|
244
|
-
blk.call( entry )
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
249
|
-
def each_match( format: 'txt', &blk )
|
250
|
-
if format == 'csv'
|
251
|
-
each( pattern: MATCH_CSV_RE, &blk );
|
252
|
-
else
|
253
|
-
each( pattern: MATCH_RE, &blk );
|
254
|
-
end
|
255
|
-
end
|
256
|
-
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
257
|
-
def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
|
258
|
-
|
259
|
-
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
260
|
-
|
261
|
-
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
262
|
-
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
263
|
-
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
264
|
-
def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
|
265
|
-
|
266
|
-
def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
|
267
|
-
|
268
|
-
|
269
|
-
## return all match datafile entries
|
270
|
-
def match( format: 'txt' )
|
271
|
-
ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
|
272
|
-
end
|
273
|
-
alias_method :matches, :match
|
274
|
-
|
275
|
-
|
276
|
-
## todo/check: rename/change to match_by_dir - why? why not?
|
277
|
-
## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
|
278
|
-
def match_by_season_dir( format: 'txt' )
|
279
|
-
##
|
280
|
-
## [["1950s/1956-57",
|
281
|
-
## ["1950s/1956-57/1-division1.csv",
|
282
|
-
## "1950s/1956-57/2-division2.csv",
|
283
|
-
## "1950s/1956-57/3a-division3n.csv",
|
284
|
-
## "1950s/1956-57/3b-division3s.csv"]],
|
285
|
-
## ...]
|
286
|
-
|
287
|
-
h = {}
|
288
|
-
match( format: format ).each do |entry|
|
289
|
-
season_path = File.dirname( entry.name )
|
290
|
-
|
291
|
-
h[ season_path ] ||= []
|
292
|
-
h[ season_path ] << entry
|
293
|
-
end
|
294
|
-
|
295
|
-
## todo/fix: - add sort entries by name - why? why not?
|
296
|
-
## note: assume 1-,2- etc. gets us back sorted leagues
|
297
|
-
## - use sort. (will not sort by default?)
|
298
|
-
|
299
|
-
h.to_a ## return as array (or keep hash) - why? why not?
|
300
|
-
end # method match_by_season_dir
|
301
|
-
|
302
|
-
def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
|
303
|
-
|
304
|
-
## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
|
305
|
-
|
306
|
-
## note: fold all sames seasons (even if in different directories)
|
307
|
-
## into same datafile list e.g.
|
308
|
-
## ["1957/58",
|
309
|
-
## ["1950s/1957-58/1-division1.csv",
|
310
|
-
## "1950s/1957-58/2-division2.csv",
|
311
|
-
## "1950s/1957-58/3a-division3n.csv",
|
312
|
-
## "1950s/1957-58/3b-division3s.csv"]],
|
313
|
-
## and
|
314
|
-
## ["1957/58",
|
315
|
-
## ["archives/1950s/1957-58/1-division1.csv",
|
316
|
-
## "archives/1950s/1957-58/2-division2.csv",
|
317
|
-
## "archives/1950s/1957-58/3a-division3n.csv",
|
318
|
-
## "archives/1950s/1957-58/3b-division3s.csv"]],
|
319
|
-
## should be together - why? why not?
|
320
|
-
|
321
|
-
####
|
322
|
-
# Example package:
|
323
|
-
# [["2012/13", ["2012-13/1-proleague.csv"]],
|
324
|
-
# ["2013/14", ["2013-14/1-proleague.csv"]],
|
325
|
-
# ["2014/15", ["2014-15/1-proleague.csv"]],
|
326
|
-
# ["2015/16", ["2015-16/1-proleague.csv"]],
|
327
|
-
# ["2016/17", ["2016-17/1-proleague.csv"]],
|
328
|
-
# ["2017/18", ["2017-18/1-proleague.csv"]]]
|
329
|
-
|
330
|
-
## todo/fix: (re)use a more generic filter instead of start for start of season only
|
331
|
-
|
332
|
-
## todo/fix: use a "generic" filter_season helper for easy reuse
|
333
|
-
## filter_season( clause, season_key )
|
334
|
-
## or better filter = SeasonFilter.new( clause )
|
335
|
-
## filter.skip? filter.include? ( season_sason_key )?
|
336
|
-
## fiteer.before?( season_key ) etc.
|
337
|
-
## find some good method names!!!!
|
338
|
-
season_start = start ? Season( start ) : nil
|
339
|
-
|
340
|
-
h = {}
|
341
|
-
match( format: format ).each do |entry|
|
342
|
-
## note: assume last directory in datafile path is the season part/key
|
343
|
-
season_q = File.basename( File.dirname( entry.name ))
|
344
|
-
season = Season.parse( season_q ) ## normalize season
|
345
|
-
|
346
|
-
## skip if start season before this season
|
347
|
-
next if season_start && season_start.start_year > season.start_year
|
348
|
-
|
349
|
-
h[ season.key ] ||= []
|
350
|
-
h[ season.key ] << entry
|
351
|
-
end
|
352
|
-
|
353
|
-
## todo/fix: - add sort entries by name - why? why not?
|
354
|
-
## note: assume 1-,2- etc. gets us back sorted leagues
|
355
|
-
## - use sort. (will not sort by default?)
|
356
|
-
|
357
|
-
## sort by season
|
358
|
-
## latest / newest first (and oldest last)
|
359
|
-
|
360
|
-
h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
|
361
|
-
r[0] <=> l[0]
|
362
|
-
end
|
363
|
-
end # method match_by_season
|
364
|
-
end # class Package
|
365
|
-
|
366
|
-
|
367
|
-
class DirPackage < Package
|
368
|
-
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
369
|
-
end
|
370
|
-
|
371
|
-
class ZipPackage < Package
|
372
|
-
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
373
|
-
end
|
374
|
-
end # module SportDb
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Package
|
4
|
+
|
5
|
+
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
|
+
## e.g. .TXT and .txt
|
7
|
+
## yes!! use /i option!!!!!
|
8
|
+
|
9
|
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
10
|
+
\.conf\.txt$
|
11
|
+
}x
|
12
|
+
|
13
|
+
## leagues.txt or leagues_en.txt
|
14
|
+
## remove support for en.leagues.txt - why? why not?
|
15
|
+
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
16
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
|
17
|
+
leagues
|
18
|
+
(?:_[a-z0-9_-]+)?
|
19
|
+
\.txt$
|
20
|
+
}x
|
21
|
+
|
22
|
+
## seasons.txt or seasons_en.txt
|
23
|
+
## remove support for br.seasons.txt - why? why not?
|
24
|
+
SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
|
26
|
+
seasons
|
27
|
+
(?:_[a-z0-9_-]+)?
|
28
|
+
\.txt$
|
29
|
+
}x
|
30
|
+
|
31
|
+
|
32
|
+
## clubs.txt or clubs_en.txt
|
33
|
+
## remove support for en.clubs.txt - why? why not?
|
34
|
+
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
35
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
36
|
+
clubs
|
37
|
+
(?:_[a-z0-9_-]+)?
|
38
|
+
\.txt$
|
39
|
+
}x
|
40
|
+
|
41
|
+
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
42
|
+
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
43
|
+
clubs
|
44
|
+
(?:_[a-z0-9_-]+)?
|
45
|
+
\.wiki\.txt$
|
46
|
+
}x
|
47
|
+
|
48
|
+
## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
|
49
|
+
CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
50
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
51
|
+
clubs
|
52
|
+
(?:_[a-z0-9_-]+)?
|
53
|
+
\.props\.txt$
|
54
|
+
}x
|
55
|
+
CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
|
56
|
+
|
57
|
+
|
58
|
+
CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
59
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
|
60
|
+
clubs
|
61
|
+
(?:_[a-z0-9_-]+)?
|
62
|
+
\.history\.txt$
|
63
|
+
}x
|
64
|
+
|
65
|
+
## teams.txt or teams_history.txt
|
66
|
+
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
67
|
+
teams
|
68
|
+
(?:_[a-z0-9_-]+)?
|
69
|
+
\.txt$
|
70
|
+
}x
|
71
|
+
|
72
|
+
|
73
|
+
### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
|
74
|
+
### season folder:
|
75
|
+
## e.g. /2019-20 or
|
76
|
+
## year-only e.g. /2019 or
|
77
|
+
## /2016--france
|
78
|
+
SEASON_RE = %r{ (?:
|
79
|
+
\d{4}-\d{2}
|
80
|
+
| \d{4}(--[a-z0-9_-]+)?
|
81
|
+
)
|
82
|
+
}x
|
83
|
+
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
84
|
+
|
85
|
+
|
86
|
+
## note: if pattern includes directory add here
|
87
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
88
|
+
MATCH_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
89
|
+
#{SEASON}
|
90
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
91
|
+
}x
|
92
|
+
|
93
|
+
MATCH_CSV_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
94
|
+
#{SEASON}
|
95
|
+
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
96
|
+
}x
|
97
|
+
|
98
|
+
### add "generic" pattern to find all csv datafiles
|
99
|
+
CSV_RE = %r{ (?: ^|/ )
|
100
|
+
[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
101
|
+
}x
|
102
|
+
|
103
|
+
|
104
|
+
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
105
|
+
|
106
|
+
def self.find( path, pattern )
|
107
|
+
datafiles = []
|
108
|
+
|
109
|
+
## check all txt files
|
110
|
+
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
111
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.*" )
|
112
|
+
pp candidates
|
113
|
+
candidates.each do |candidate|
|
114
|
+
datafiles << candidate if pattern.match( candidate )
|
115
|
+
end
|
116
|
+
|
117
|
+
pp datafiles
|
118
|
+
datafiles
|
119
|
+
end
|
120
|
+
|
121
|
+
|
122
|
+
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
123
|
+
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
124
|
+
|
125
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
126
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
127
|
+
def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
|
128
|
+
|
129
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
130
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
131
|
+
def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
|
132
|
+
def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
|
133
|
+
|
134
|
+
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
135
|
+
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
136
|
+
|
137
|
+
def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
|
138
|
+
def self.match_seasons( path ) SEASONS_RE.match( path ); end
|
139
|
+
|
140
|
+
|
141
|
+
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
142
|
+
def self.match_conf( path ) CONF_RE.match( path ); end
|
143
|
+
|
144
|
+
def self.find_match( path, format: 'txt' )
|
145
|
+
if format == 'csv'
|
146
|
+
find( path, MATCH_CSV_RE )
|
147
|
+
else ## otherwise always assume txt for now
|
148
|
+
find( path, MATCH_RE )
|
149
|
+
end
|
150
|
+
end
|
151
|
+
## add match_match and match_match_csv - why? why not?
|
152
|
+
|
153
|
+
|
154
|
+
class << self
|
155
|
+
alias_method :match_teams?, :match_teams
|
156
|
+
alias_method :teams?, :match_teams
|
157
|
+
|
158
|
+
alias_method :match_clubs?, :match_clubs
|
159
|
+
alias_method :clubs?, :match_clubs
|
160
|
+
|
161
|
+
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
162
|
+
alias_method :clubs_wiki?, :match_clubs_wiki
|
163
|
+
|
164
|
+
alias_method :match_clubs_history?, :match_clubs_history
|
165
|
+
alias_method :clubs_history?, :match_clubs_history
|
166
|
+
|
167
|
+
alias_method :match_club_props, :match_clubs_props
|
168
|
+
alias_method :match_club_props?, :match_clubs_props
|
169
|
+
alias_method :club_props?, :match_clubs_props
|
170
|
+
alias_method :match_clubs_props?, :match_clubs_props
|
171
|
+
alias_method :clubs_props?, :match_clubs_props
|
172
|
+
|
173
|
+
alias_method :match_leagues?, :match_leagues
|
174
|
+
alias_method :leagues?, :match_leagues
|
175
|
+
|
176
|
+
alias_method :match_seasons?, :match_seasons
|
177
|
+
alias_method :seasons?, :match_seasons
|
178
|
+
|
179
|
+
alias_method :match_conf?, :match_conf
|
180
|
+
alias_method :conf?, :match_conf
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
|
185
|
+
attr_accessor :include, :exclude
|
186
|
+
|
187
|
+
## private helpers - like select returns true for keeping and false for skipping entry
|
188
|
+
def filter_clause( filter, entry )
|
189
|
+
if filter.is_a?( String )
|
190
|
+
entry.name.index( filter ) ? true : false
|
191
|
+
elsif filter.is_a?( Regexp )
|
192
|
+
filter.match( entry.name ) ? true : false
|
193
|
+
else ## assume
|
194
|
+
## todo/check: pass in entry (and NOT entry.name) - why? why not?
|
195
|
+
filter.call( entry )
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
def filter( entry )
|
200
|
+
if @include
|
201
|
+
if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
|
202
|
+
true ## todo/check: check for exclude here too - why? why not?
|
203
|
+
else
|
204
|
+
false
|
205
|
+
end
|
206
|
+
else
|
207
|
+
if @exclude && filter_clause( @exclude, entry )
|
208
|
+
false
|
209
|
+
else
|
210
|
+
true
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
|
216
|
+
def initialize( path_or_pack )
|
217
|
+
@include = nil
|
218
|
+
@exclude = nil
|
219
|
+
|
220
|
+
if path_or_pack.is_a?( Datafile::Package )
|
221
|
+
@pack = path_or_pack
|
222
|
+
else ## assume it's a (string) path
|
223
|
+
path = path_or_pack
|
224
|
+
if !File.exist?( path ) ## file or directory
|
225
|
+
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
226
|
+
exit 1
|
227
|
+
end
|
228
|
+
|
229
|
+
if File.directory?( path )
|
230
|
+
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
231
|
+
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
232
|
+
@pack = Datafile::ZipPackage.new( path )
|
233
|
+
else
|
234
|
+
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
235
|
+
exit 1
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
|
241
|
+
def each( pattern:, &blk )
|
242
|
+
@pack.each( pattern: pattern ) do |entry|
|
243
|
+
next unless filter( entry ) ## lets you use include/exclude filters
|
244
|
+
blk.call( entry )
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
249
|
+
def each_match( format: 'txt', &blk )
|
250
|
+
if format == 'csv'
|
251
|
+
each( pattern: MATCH_CSV_RE, &blk );
|
252
|
+
else
|
253
|
+
each( pattern: MATCH_RE, &blk );
|
254
|
+
end
|
255
|
+
end
|
256
|
+
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
257
|
+
def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
|
258
|
+
|
259
|
+
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
260
|
+
|
261
|
+
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
262
|
+
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
263
|
+
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
264
|
+
def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
|
265
|
+
|
266
|
+
def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
|
267
|
+
|
268
|
+
|
269
|
+
## return all match datafile entries
|
270
|
+
def match( format: 'txt' )
|
271
|
+
ary=[]; each_match( format: format ) {|entry| ary << entry }; ary;
|
272
|
+
end
|
273
|
+
alias_method :matches, :match
|
274
|
+
|
275
|
+
|
276
|
+
## todo/check: rename/change to match_by_dir - why? why not?
|
277
|
+
## still in use somewhere? move to attic? use match_by_season and delete by_season_dir? - why? why not?
|
278
|
+
def match_by_season_dir( format: 'txt' )
|
279
|
+
##
|
280
|
+
## [["1950s/1956-57",
|
281
|
+
## ["1950s/1956-57/1-division1.csv",
|
282
|
+
## "1950s/1956-57/2-division2.csv",
|
283
|
+
## "1950s/1956-57/3a-division3n.csv",
|
284
|
+
## "1950s/1956-57/3b-division3s.csv"]],
|
285
|
+
## ...]
|
286
|
+
|
287
|
+
h = {}
|
288
|
+
match( format: format ).each do |entry|
|
289
|
+
season_path = File.dirname( entry.name )
|
290
|
+
|
291
|
+
h[ season_path ] ||= []
|
292
|
+
h[ season_path ] << entry
|
293
|
+
end
|
294
|
+
|
295
|
+
## todo/fix: - add sort entries by name - why? why not?
|
296
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
297
|
+
## - use sort. (will not sort by default?)
|
298
|
+
|
299
|
+
h.to_a ## return as array (or keep hash) - why? why not?
|
300
|
+
end # method match_by_season_dir
|
301
|
+
|
302
|
+
def match_by_season( format: 'txt', start: nil ) ## change/rename to by_season_key - why? why not?
|
303
|
+
|
304
|
+
## todo/note: in the future - season might be anything (e.g. part of a filename and NOT a directory) - why? why not?
|
305
|
+
|
306
|
+
## note: fold all sames seasons (even if in different directories)
|
307
|
+
## into same datafile list e.g.
|
308
|
+
## ["1957/58",
|
309
|
+
## ["1950s/1957-58/1-division1.csv",
|
310
|
+
## "1950s/1957-58/2-division2.csv",
|
311
|
+
## "1950s/1957-58/3a-division3n.csv",
|
312
|
+
## "1950s/1957-58/3b-division3s.csv"]],
|
313
|
+
## and
|
314
|
+
## ["1957/58",
|
315
|
+
## ["archives/1950s/1957-58/1-division1.csv",
|
316
|
+
## "archives/1950s/1957-58/2-division2.csv",
|
317
|
+
## "archives/1950s/1957-58/3a-division3n.csv",
|
318
|
+
## "archives/1950s/1957-58/3b-division3s.csv"]],
|
319
|
+
## should be together - why? why not?
|
320
|
+
|
321
|
+
####
|
322
|
+
# Example package:
|
323
|
+
# [["2012/13", ["2012-13/1-proleague.csv"]],
|
324
|
+
# ["2013/14", ["2013-14/1-proleague.csv"]],
|
325
|
+
# ["2014/15", ["2014-15/1-proleague.csv"]],
|
326
|
+
# ["2015/16", ["2015-16/1-proleague.csv"]],
|
327
|
+
# ["2016/17", ["2016-17/1-proleague.csv"]],
|
328
|
+
# ["2017/18", ["2017-18/1-proleague.csv"]]]
|
329
|
+
|
330
|
+
## todo/fix: (re)use a more generic filter instead of start for start of season only
|
331
|
+
|
332
|
+
## todo/fix: use a "generic" filter_season helper for easy reuse
|
333
|
+
## filter_season( clause, season_key )
|
334
|
+
## or better filter = SeasonFilter.new( clause )
|
335
|
+
## filter.skip? filter.include? ( season_sason_key )?
|
336
|
+
## fiteer.before?( season_key ) etc.
|
337
|
+
## find some good method names!!!!
|
338
|
+
season_start = start ? Season( start ) : nil
|
339
|
+
|
340
|
+
h = {}
|
341
|
+
match( format: format ).each do |entry|
|
342
|
+
## note: assume last directory in datafile path is the season part/key
|
343
|
+
season_q = File.basename( File.dirname( entry.name ))
|
344
|
+
season = Season.parse( season_q ) ## normalize season
|
345
|
+
|
346
|
+
## skip if start season before this season
|
347
|
+
next if season_start && season_start.start_year > season.start_year
|
348
|
+
|
349
|
+
h[ season.key ] ||= []
|
350
|
+
h[ season.key ] << entry
|
351
|
+
end
|
352
|
+
|
353
|
+
## todo/fix: - add sort entries by name - why? why not?
|
354
|
+
## note: assume 1-,2- etc. gets us back sorted leagues
|
355
|
+
## - use sort. (will not sort by default?)
|
356
|
+
|
357
|
+
## sort by season
|
358
|
+
## latest / newest first (and oldest last)
|
359
|
+
|
360
|
+
h.to_a.sort do |l,r| ## return as array (or keep hash) - why? why not?
|
361
|
+
r[0] <=> l[0]
|
362
|
+
end
|
363
|
+
end # method match_by_season
|
364
|
+
end # class Package
|
365
|
+
|
366
|
+
|
367
|
+
class DirPackage < Package
|
368
|
+
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
369
|
+
end
|
370
|
+
|
371
|
+
class ZipPackage < Package
|
372
|
+
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
373
|
+
end
|
374
|
+
end # module SportDb
|