sportdb-formats 0.4.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +24 -4
- data/Rakefile +3 -3
- data/lib/sportdb/formats.rb +25 -2
- data/lib/sportdb/formats/config.rb +40 -0
- data/lib/sportdb/formats/datafile.rb +42 -62
- data/lib/sportdb/formats/datafile_package.rb +160 -0
- data/lib/sportdb/formats/match/conf_parser.rb +120 -0
- data/lib/sportdb/formats/match/mapper.rb +319 -0
- data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
- data/lib/sportdb/formats/match/match_parser.rb +659 -0
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
- data/lib/sportdb/formats/name_helper.rb +84 -0
- data/lib/sportdb/formats/outline_reader.rb +53 -15
- data/lib/sportdb/formats/package.rb +172 -160
- data/lib/sportdb/formats/parser_helper.rb +81 -0
- data/lib/sportdb/formats/score/score_formats.rb +180 -0
- data/lib/sportdb/formats/score/score_parser.rb +196 -0
- data/lib/sportdb/formats/structs/country.rb +1 -43
- data/lib/sportdb/formats/structs/group.rb +25 -0
- data/lib/sportdb/formats/structs/league.rb +7 -26
- data/lib/sportdb/formats/structs/match.rb +72 -51
- data/lib/sportdb/formats/structs/round.rb +14 -4
- data/lib/sportdb/formats/structs/season.rb +3 -0
- data/lib/sportdb/formats/structs/team.rb +144 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +83 -1
- data/test/test_clubs.rb +3 -3
- data/test/test_conf.rb +65 -0
- data/test/test_datafile.rb +21 -30
- data/test/test_match.rb +0 -6
- data/test/test_match_auto.rb +72 -0
- data/test/test_match_auto_champs.rb +45 -0
- data/test/test_match_auto_euro.rb +37 -0
- data/test/test_match_auto_worldcup.rb +61 -0
- data/test/test_match_champs.rb +27 -0
- data/test/test_match_eng.rb +26 -0
- data/test/test_match_euro.rb +27 -0
- data/test/test_match_worldcup.rb +27 -0
- data/test/test_name_helper.rb +67 -0
- data/test/test_outline_reader.rb +3 -3
- data/test/test_package.rb +21 -2
- data/test/test_package_match.rb +78 -0
- data/test/test_scores.rb +67 -51
- metadata +32 -12
- data/lib/sportdb/formats/scores.rb +0 -253
- data/lib/sportdb/formats/structs/club.rb +0 -213
- data/test/test_club_helpers.rb +0 -63
- data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,202 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
|
6
|
+
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
7
|
+
|
8
|
+
def self.parse( lines, start: )
|
9
|
+
## todo/fix: add support for txt and lines
|
10
|
+
## check if lines_or_txt is an array or just a string
|
11
|
+
parser = new( lines, start )
|
12
|
+
parser.parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
include Logging ## e.g. logger#debug, logger#info, etc.
|
17
|
+
include ParserHelper ## e.g. read_lines, etc.
|
18
|
+
|
19
|
+
|
20
|
+
def initialize( lines, start )
|
21
|
+
# for convenience split string into lines
|
22
|
+
## note: removes/strips empty lines
|
23
|
+
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
+
@lines = lines.is_a?( String ) ? read_lines( lines ) : lines
|
25
|
+
@start = start
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
## try to find all teams in match schedule
|
30
|
+
@last_round = nil
|
31
|
+
@last_group = nil
|
32
|
+
|
33
|
+
## definitions/defs
|
34
|
+
@round_defs = Hash.new(0)
|
35
|
+
@group_defs = Hash.new(0)
|
36
|
+
|
37
|
+
## usage/refs
|
38
|
+
@rounds = {} ## track usage counter and match (two teams) counter
|
39
|
+
@groups = {} ## -"-
|
40
|
+
@teams = Hash.new(0) ## keep track of usage counter
|
41
|
+
|
42
|
+
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
43
|
+
|
44
|
+
|
45
|
+
@lines.each do |line|
|
46
|
+
if is_goals?( line )
|
47
|
+
logger.debug "skipping matched goals line: >#{line}<"
|
48
|
+
elsif is_round_def?( line )
|
49
|
+
## todo/fix: add round definition (w begin n end date)
|
50
|
+
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
51
|
+
## -- how to deal with matches that get rescheduled/postponed?
|
52
|
+
logger.debug "skipping matched round def line: >#{line}<"
|
53
|
+
@round_defs[ line ] += 1
|
54
|
+
elsif is_round?( line )
|
55
|
+
logger.debug "skipping matched round line: >#{line}<"
|
56
|
+
|
57
|
+
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
58
|
+
round[:count] +=1
|
59
|
+
@last_round = round
|
60
|
+
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
61
|
+
### todo: add pipe (|) marker (required)
|
62
|
+
logger.debug "skipping matched group def line: >#{line}<"
|
63
|
+
@group_defs[ line ] += 1
|
64
|
+
elsif is_group?( line )
|
65
|
+
## -- lets you set group e.g. Group A etc.
|
66
|
+
logger.debug "skipping matched group line: >#{line}<"
|
67
|
+
|
68
|
+
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
69
|
+
group[:count] +=1
|
70
|
+
@last_group = group
|
71
|
+
## todo/fix: parse group line!!!
|
72
|
+
elsif try_parse_game( line )
|
73
|
+
# do nothing here
|
74
|
+
else
|
75
|
+
logger.warn "skipping line (no match found): >#{line}<"
|
76
|
+
@warns << line
|
77
|
+
end
|
78
|
+
end # lines.each
|
79
|
+
|
80
|
+
[@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
def try_parse_game( line )
|
85
|
+
# note: clone line; for possible test do NOT modify in place for now
|
86
|
+
# note: returns true if parsed, false if no match
|
87
|
+
parse_game( line.dup )
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_game( line )
|
91
|
+
logger.debug "parsing game (fixture) line: >#{line}<"
|
92
|
+
|
93
|
+
## remove all protected text runs e.g. []
|
94
|
+
## fix: add [ to end-of-line too
|
95
|
+
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
96
|
+
|
97
|
+
line = line.gsub( /\[
|
98
|
+
[^\]]+?
|
99
|
+
\]/x, '' ).strip
|
100
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
101
|
+
|
102
|
+
|
103
|
+
## split by geo (@) - remove for now
|
104
|
+
values = line.split( '@' )
|
105
|
+
line = values[0]
|
106
|
+
|
107
|
+
|
108
|
+
## try find date
|
109
|
+
date = find_date!( line, start: @start )
|
110
|
+
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
111
|
+
line = line.sub( /\[
|
112
|
+
[^\]]+?
|
113
|
+
\]/x, '' ).strip
|
114
|
+
|
115
|
+
else
|
116
|
+
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
117
|
+
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
118
|
+
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
119
|
+
[012]?[0-9]
|
120
|
+
[.:hH]
|
121
|
+
[0-9][0-9]
|
122
|
+
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
123
|
+
}x, '' ).strip
|
124
|
+
end
|
125
|
+
|
126
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
127
|
+
|
128
|
+
|
129
|
+
score = find_score!( line )
|
130
|
+
|
131
|
+
logger.debug " line: >#{line}<"
|
132
|
+
|
133
|
+
line = line.sub( /\[
|
134
|
+
[^\]]+?
|
135
|
+
\]/x, '$$' ) # note: replace first score tag with $$
|
136
|
+
line = line.gsub( /\[
|
137
|
+
[^\]]+?
|
138
|
+
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
139
|
+
|
140
|
+
## clean-up remove all text run inside () or empty () too
|
141
|
+
line = line.gsub( /\(
|
142
|
+
[^)]*?
|
143
|
+
\)/x, '' )
|
144
|
+
|
145
|
+
|
146
|
+
## check for more match separators e.g. - or vs for now
|
147
|
+
line = line.sub( / \s+
|
148
|
+
( -
|
149
|
+
| v
|
150
|
+
| vs\.? # note: allow optional dot eg. vs.
|
151
|
+
)
|
152
|
+
\s+
|
153
|
+
/ix, '$$' )
|
154
|
+
|
155
|
+
values = line.split( '$$' )
|
156
|
+
values = values.map { |value| value.strip } ## strip spaces
|
157
|
+
values = values.select { |value| !value.empty? } ## remove empty strings
|
158
|
+
|
159
|
+
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
160
|
+
|
161
|
+
if values.size == 1
|
162
|
+
puts "(auto config) try matching teams separated by spaces (2+):"
|
163
|
+
pp values
|
164
|
+
|
165
|
+
values = values[0].split( /[ ]{2,}/ )
|
166
|
+
pp values
|
167
|
+
end
|
168
|
+
|
169
|
+
return false if values.size != 2
|
170
|
+
|
171
|
+
puts "(auto config) try matching teams:"
|
172
|
+
pp values
|
173
|
+
|
174
|
+
@teams[ values[0] ] += 1 ## update usage counters
|
175
|
+
@teams[ values[1] ] += 1
|
176
|
+
|
177
|
+
@last_round[ :match_count ] += 1 if @last_round
|
178
|
+
@last_group[ :match_count ] += 1 if @last_group
|
179
|
+
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
def find_score!( line )
|
186
|
+
# note: always call after find_dates !!!
|
187
|
+
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
188
|
+
# -- note: score might have two digits too
|
189
|
+
ScoreFormats.find!( line )
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_date!( line, start: )
|
193
|
+
## NB: lets us pass in start_at/end_at date (for event)
|
194
|
+
# for auto-complete year
|
195
|
+
|
196
|
+
# extract date from line
|
197
|
+
# and return it
|
198
|
+
# NB: side effect - removes date from line string
|
199
|
+
DateFormats.find!( line, start: start )
|
200
|
+
end
|
201
|
+
end # class AutoConfParser
|
202
|
+
end # module SportDb
|
@@ -0,0 +1,84 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module NameHelper
|
4
|
+
|
5
|
+
|
6
|
+
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
+
## for marking missing (to be filled in) years
|
8
|
+
## e.g. (1887-1911), (-2013),
|
9
|
+
## (1946-2001, 2013-) etc.
|
10
|
+
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
+
YEAR_RE = %r{\(
|
12
|
+
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
+
\)}x
|
14
|
+
|
15
|
+
def strip_year( name )
|
16
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
+
## (1946-2001, 2013-) etc.
|
18
|
+
## todo/check: only sub once (not global) - why? why not?
|
19
|
+
name.gsub( YEAR_RE, '' ).strip
|
20
|
+
end
|
21
|
+
|
22
|
+
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
+
|
24
|
+
|
25
|
+
LANG_RE = %r{\[
|
26
|
+
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
+
\]}x
|
28
|
+
def strip_lang( name )
|
29
|
+
name.gsub( LANG_RE, '' ).strip
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
+
|
34
|
+
|
35
|
+
def sanitize( name )
|
36
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
+
## (1946-2001,2013-) etc.
|
38
|
+
name = strip_year( name )
|
39
|
+
## check lang codes e.g. [en], [fr], etc.
|
40
|
+
name = strip_lang( name )
|
41
|
+
name
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
## note: also add (),’,− etc. e.g.
|
46
|
+
## Estudiantes (LP) => Estudiantes LP
|
47
|
+
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
+
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
+
|
50
|
+
NORM_RE = %r{
|
51
|
+
[.'’º/()_−-]
|
52
|
+
}x # note: in [] dash (-) if last doesn't need to get escaped
|
53
|
+
## note: remove all dots (.), dash (-), ', º, /, etc.
|
54
|
+
# . U+002E (46) - FULL STOP
|
55
|
+
# ' U+0027 (39) - APOSTROPHE
|
56
|
+
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
57
|
+
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
58
|
+
# / U+002F (47) - SOLIDUS
|
59
|
+
# ( U+0028 (40) - LEFT PARENTHESIS
|
60
|
+
# ) U+0029 (41) - RIGHT PARENTHESIS
|
61
|
+
# − U+2212 (8722) - MINUS SIGN
|
62
|
+
# - U+002D (45) - HYPHEN-MINUS
|
63
|
+
|
64
|
+
## for norm(alizing) names
|
65
|
+
def strip_norm( name )
|
66
|
+
name.gsub( NORM_RE, '' )
|
67
|
+
end
|
68
|
+
|
69
|
+
def normalize( name )
|
70
|
+
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
71
|
+
name = strip_norm( name )
|
72
|
+
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
73
|
+
|
74
|
+
## todo/check: use our own downcase - why? why not?
|
75
|
+
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
76
|
+
name
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
def variants( name ) Variant.find( name ); end
|
81
|
+
|
82
|
+
end # module NameHelper
|
83
|
+
end # module SportDb
|
84
|
+
|
@@ -5,17 +5,45 @@ module SportDb
|
|
5
5
|
class OutlineReader
|
6
6
|
|
7
7
|
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
8
|
-
txt = File.open( path, 'r:utf-8' ).read
|
8
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
9
9
|
parse( txt )
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.parse( txt )
|
13
|
+
new( txt ).parse
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
## note: skip "decorative" only heading e.g. ========
|
21
|
+
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
22
|
+
HEADING_BLANK_RE = %r{\A
|
23
|
+
={1,}
|
24
|
+
\z}x
|
25
|
+
|
26
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
27
|
+
HEADING_RE = %r{\A
|
28
|
+
(?<marker>={1,}) ## 1. leading ======
|
29
|
+
[ ]*
|
30
|
+
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
31
|
+
[ ]*
|
32
|
+
=* ## 3. (optional) trailing ====
|
33
|
+
\z}x
|
34
|
+
|
35
|
+
def parse
|
13
36
|
outline=[] ## outline structure
|
37
|
+
start_para = true ## start new para(graph) on new text line?
|
14
38
|
|
15
|
-
txt.each_line do |line|
|
39
|
+
@txt.each_line do |line|
|
16
40
|
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
17
41
|
|
18
|
-
|
42
|
+
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
43
|
+
start_para = true
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
19
47
|
break if line == '__END__'
|
20
48
|
|
21
49
|
next if line.start_with?( '#' ) ## skip comments too
|
@@ -27,23 +55,33 @@ class OutlineReader
|
|
27
55
|
line = line.sub( /#.*/, '' ).strip
|
28
56
|
pp line
|
29
57
|
|
30
|
-
|
58
|
+
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
59
|
+
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
31
60
|
|
32
61
|
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
heading_level = $1.length ## count number of = for heading level
|
40
|
-
heading = $2.strip
|
62
|
+
if m=HEADING_RE.match( line )
|
63
|
+
start_para = true
|
64
|
+
|
65
|
+
heading_marker = m[:marker]
|
66
|
+
heading_level = m[:marker].length ## count number of = for heading level
|
67
|
+
heading = m[:text].strip
|
41
68
|
|
42
69
|
puts "heading #{heading_level} >#{heading}<"
|
43
70
|
outline << [:"h#{heading_level}", heading]
|
44
|
-
else
|
45
|
-
|
46
|
-
|
71
|
+
else ## assume it's a (plain/regular) text line
|
72
|
+
if start_para
|
73
|
+
outline << [:p, [line]]
|
74
|
+
start_para = false
|
75
|
+
else
|
76
|
+
node = outline[-1] ## get last entry
|
77
|
+
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
78
|
+
node[1] << line ## add line to p(aragraph)
|
79
|
+
else
|
80
|
+
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
81
|
+
pp node
|
82
|
+
exit 1
|
83
|
+
end
|
84
|
+
end
|
47
85
|
end
|
48
86
|
end
|
49
87
|
outline
|
@@ -1,160 +1,172 @@
|
|
1
|
-
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
##
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
class
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Package
|
4
|
+
|
5
|
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
6
|
+
\.conf\.txt$
|
7
|
+
}x
|
8
|
+
|
9
|
+
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
10
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
|
11
|
+
leagues\.txt$
|
12
|
+
}x
|
13
|
+
|
14
|
+
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
15
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
16
|
+
clubs\.txt$
|
17
|
+
}x
|
18
|
+
|
19
|
+
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
20
|
+
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
21
|
+
clubs\.wiki\.txt$
|
22
|
+
}x
|
23
|
+
|
24
|
+
CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
26
|
+
clubs\.props\.txt$
|
27
|
+
}x
|
28
|
+
|
29
|
+
## note: if pattern includes directory add here
|
30
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
31
|
+
MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
|
32
|
+
| \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
|
33
|
+
)
|
34
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
35
|
+
}x
|
36
|
+
|
37
|
+
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
38
|
+
|
39
|
+
def self.find( path, pattern )
|
40
|
+
datafiles = []
|
41
|
+
|
42
|
+
## check all txt files
|
43
|
+
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
44
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
45
|
+
pp candidates
|
46
|
+
candidates.each do |candidate|
|
47
|
+
datafiles << candidate if pattern.match( candidate )
|
48
|
+
end
|
49
|
+
|
50
|
+
pp datafiles
|
51
|
+
datafiles
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
57
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
58
|
+
|
59
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
60
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
61
|
+
def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
|
62
|
+
|
63
|
+
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
64
|
+
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
65
|
+
|
66
|
+
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
67
|
+
def self.match_conf( path ) CONF_RE.match( path ); end
|
68
|
+
|
69
|
+
class << self
|
70
|
+
alias_method :match_clubs?, :match_clubs
|
71
|
+
alias_method :clubs?, :match_clubs
|
72
|
+
|
73
|
+
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
74
|
+
alias_method :clubs_wiki?, :match_clubs_wiki
|
75
|
+
|
76
|
+
alias_method :match_club_props?, :match_club_props
|
77
|
+
alias_method :club_props?, :match_club_props
|
78
|
+
|
79
|
+
alias_method :match_leagues?, :match_leagues
|
80
|
+
alias_method :leagues?, :match_leagues
|
81
|
+
|
82
|
+
alias_method :match_conf?, :match_conf
|
83
|
+
alias_method :conf?, :match_conf
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
|
88
|
+
attr_accessor :include, :exclude
|
89
|
+
|
90
|
+
## private helpers - like select returns true for keeping and false for skipping entry
|
91
|
+
def filter_clause( filter, entry )
|
92
|
+
if filter.is_a?( String )
|
93
|
+
entry.name.index( filter ) ? true : false
|
94
|
+
elsif filter.is_a?( Regexp )
|
95
|
+
filter.match( entry.name ) ? true : false
|
96
|
+
else ## assume
|
97
|
+
## todo/check: pass in entry (and NOT entry.name) - why? why not?
|
98
|
+
filter.call( entry )
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def filter( entry )
|
103
|
+
if @include
|
104
|
+
if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
|
105
|
+
true ## todo/check: check for exclude here too - why? why not?
|
106
|
+
else
|
107
|
+
false
|
108
|
+
end
|
109
|
+
else
|
110
|
+
if @exclude && filter_clause( @exclude, entry )
|
111
|
+
false
|
112
|
+
else
|
113
|
+
true
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
def initialize( path_or_pack )
|
120
|
+
@include = nil
|
121
|
+
@exclude = nil
|
122
|
+
|
123
|
+
if path_or_pack.is_a?( Datafile::Package )
|
124
|
+
@pack = path_or_pack
|
125
|
+
else ## assume it's a (string) path
|
126
|
+
path = path_or_pack
|
127
|
+
if !File.exist?( path ) ## file or directory
|
128
|
+
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
129
|
+
exit 1
|
130
|
+
end
|
131
|
+
|
132
|
+
if File.directory?( path )
|
133
|
+
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
134
|
+
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
135
|
+
@pack = Datafile::ZipPackage.new( path )
|
136
|
+
else
|
137
|
+
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
138
|
+
exit 1
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
def each( pattern:, &blk )
|
145
|
+
@pack.each( pattern: pattern ) do |entry|
|
146
|
+
next unless filter( entry ) ## lets you use include/exclude filters
|
147
|
+
blk.call( entry )
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
152
|
+
def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
|
153
|
+
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
154
|
+
|
155
|
+
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
156
|
+
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
157
|
+
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
158
|
+
|
159
|
+
## return all match datafile entries
|
160
|
+
def match() ary=[]; each_match {|entry| ary << entry }; ary; end
|
161
|
+
alias_method :matches, :match
|
162
|
+
end # class Package
|
163
|
+
|
164
|
+
|
165
|
+
class DirPackage < Package
|
166
|
+
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
167
|
+
end
|
168
|
+
|
169
|
+
class ZipPackage < Package
|
170
|
+
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
171
|
+
end
|
172
|
+
end # module SportDb
|