sportdb-formats 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +24 -4
- data/Rakefile +3 -3
- data/lib/sportdb/formats.rb +25 -2
- data/lib/sportdb/formats/config.rb +40 -0
- data/lib/sportdb/formats/datafile.rb +42 -62
- data/lib/sportdb/formats/datafile_package.rb +160 -0
- data/lib/sportdb/formats/match/conf_parser.rb +120 -0
- data/lib/sportdb/formats/match/mapper.rb +319 -0
- data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
- data/lib/sportdb/formats/match/match_parser.rb +659 -0
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
- data/lib/sportdb/formats/name_helper.rb +84 -0
- data/lib/sportdb/formats/outline_reader.rb +53 -15
- data/lib/sportdb/formats/package.rb +172 -160
- data/lib/sportdb/formats/parser_helper.rb +81 -0
- data/lib/sportdb/formats/score/score_formats.rb +180 -0
- data/lib/sportdb/formats/score/score_parser.rb +196 -0
- data/lib/sportdb/formats/structs/country.rb +1 -43
- data/lib/sportdb/formats/structs/group.rb +25 -0
- data/lib/sportdb/formats/structs/league.rb +7 -26
- data/lib/sportdb/formats/structs/match.rb +72 -51
- data/lib/sportdb/formats/structs/round.rb +14 -4
- data/lib/sportdb/formats/structs/season.rb +3 -0
- data/lib/sportdb/formats/structs/team.rb +144 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +83 -1
- data/test/test_clubs.rb +3 -3
- data/test/test_conf.rb +65 -0
- data/test/test_datafile.rb +21 -30
- data/test/test_match.rb +0 -6
- data/test/test_match_auto.rb +72 -0
- data/test/test_match_auto_champs.rb +45 -0
- data/test/test_match_auto_euro.rb +37 -0
- data/test/test_match_auto_worldcup.rb +61 -0
- data/test/test_match_champs.rb +27 -0
- data/test/test_match_eng.rb +26 -0
- data/test/test_match_euro.rb +27 -0
- data/test/test_match_worldcup.rb +27 -0
- data/test/test_name_helper.rb +67 -0
- data/test/test_outline_reader.rb +3 -3
- data/test/test_package.rb +21 -2
- data/test/test_package_match.rb +78 -0
- data/test/test_scores.rb +67 -51
- metadata +32 -12
- data/lib/sportdb/formats/scores.rb +0 -253
- data/lib/sportdb/formats/structs/club.rb +0 -213
- data/test/test_club_helpers.rb +0 -63
- data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,202 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
|
5
|
+
|
6
|
+
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
7
|
+
|
8
|
+
def self.parse( lines, start: )
|
9
|
+
## todo/fix: add support for txt and lines
|
10
|
+
## check if lines_or_txt is an array or just a string
|
11
|
+
parser = new( lines, start )
|
12
|
+
parser.parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
include Logging ## e.g. logger#debug, logger#info, etc.
|
17
|
+
include ParserHelper ## e.g. read_lines, etc.
|
18
|
+
|
19
|
+
|
20
|
+
def initialize( lines, start )
|
21
|
+
# for convenience split string into lines
|
22
|
+
## note: removes/strips empty lines
|
23
|
+
## todo/check: change to text instead of array of lines - why? why not?
|
24
|
+
@lines = lines.is_a?( String ) ? read_lines( lines ) : lines
|
25
|
+
@start = start
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
## try to find all teams in match schedule
|
30
|
+
@last_round = nil
|
31
|
+
@last_group = nil
|
32
|
+
|
33
|
+
## definitions/defs
|
34
|
+
@round_defs = Hash.new(0)
|
35
|
+
@group_defs = Hash.new(0)
|
36
|
+
|
37
|
+
## usage/refs
|
38
|
+
@rounds = {} ## track usage counter and match (two teams) counter
|
39
|
+
@groups = {} ## -"-
|
40
|
+
@teams = Hash.new(0) ## keep track of usage counter
|
41
|
+
|
42
|
+
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
43
|
+
|
44
|
+
|
45
|
+
@lines.each do |line|
|
46
|
+
if is_goals?( line )
|
47
|
+
logger.debug "skipping matched goals line: >#{line}<"
|
48
|
+
elsif is_round_def?( line )
|
49
|
+
## todo/fix: add round definition (w begin n end date)
|
50
|
+
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
51
|
+
## -- how to deal with matches that get rescheduled/postponed?
|
52
|
+
logger.debug "skipping matched round def line: >#{line}<"
|
53
|
+
@round_defs[ line ] += 1
|
54
|
+
elsif is_round?( line )
|
55
|
+
logger.debug "skipping matched round line: >#{line}<"
|
56
|
+
|
57
|
+
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
58
|
+
round[:count] +=1
|
59
|
+
@last_round = round
|
60
|
+
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
61
|
+
### todo: add pipe (|) marker (required)
|
62
|
+
logger.debug "skipping matched group def line: >#{line}<"
|
63
|
+
@group_defs[ line ] += 1
|
64
|
+
elsif is_group?( line )
|
65
|
+
## -- lets you set group e.g. Group A etc.
|
66
|
+
logger.debug "skipping matched group line: >#{line}<"
|
67
|
+
|
68
|
+
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
69
|
+
group[:count] +=1
|
70
|
+
@last_group = group
|
71
|
+
## todo/fix: parse group line!!!
|
72
|
+
elsif try_parse_game( line )
|
73
|
+
# do nothing here
|
74
|
+
else
|
75
|
+
logger.warn "skipping line (no match found): >#{line}<"
|
76
|
+
@warns << line
|
77
|
+
end
|
78
|
+
end # lines.each
|
79
|
+
|
80
|
+
[@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
def try_parse_game( line )
|
85
|
+
# note: clone line; for possible test do NOT modify in place for now
|
86
|
+
# note: returns true if parsed, false if no match
|
87
|
+
parse_game( line.dup )
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse_game( line )
|
91
|
+
logger.debug "parsing game (fixture) line: >#{line}<"
|
92
|
+
|
93
|
+
## remove all protected text runs e.g. []
|
94
|
+
## fix: add [ to end-of-line too
|
95
|
+
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
96
|
+
|
97
|
+
line = line.gsub( /\[
|
98
|
+
[^\]]+?
|
99
|
+
\]/x, '' ).strip
|
100
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
101
|
+
|
102
|
+
|
103
|
+
## split by geo (@) - remove for now
|
104
|
+
values = line.split( '@' )
|
105
|
+
line = values[0]
|
106
|
+
|
107
|
+
|
108
|
+
## try find date
|
109
|
+
date = find_date!( line, start: @start )
|
110
|
+
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
111
|
+
line = line.sub( /\[
|
112
|
+
[^\]]+?
|
113
|
+
\]/x, '' ).strip
|
114
|
+
|
115
|
+
else
|
116
|
+
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
117
|
+
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
118
|
+
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
119
|
+
[012]?[0-9]
|
120
|
+
[.:hH]
|
121
|
+
[0-9][0-9]
|
122
|
+
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
123
|
+
}x, '' ).strip
|
124
|
+
end
|
125
|
+
|
126
|
+
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
127
|
+
|
128
|
+
|
129
|
+
score = find_score!( line )
|
130
|
+
|
131
|
+
logger.debug " line: >#{line}<"
|
132
|
+
|
133
|
+
line = line.sub( /\[
|
134
|
+
[^\]]+?
|
135
|
+
\]/x, '$$' ) # note: replace first score tag with $$
|
136
|
+
line = line.gsub( /\[
|
137
|
+
[^\]]+?
|
138
|
+
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
139
|
+
|
140
|
+
## clean-up remove all text run inside () or empty () too
|
141
|
+
line = line.gsub( /\(
|
142
|
+
[^)]*?
|
143
|
+
\)/x, '' )
|
144
|
+
|
145
|
+
|
146
|
+
## check for more match separators e.g. - or vs for now
|
147
|
+
line = line.sub( / \s+
|
148
|
+
( -
|
149
|
+
| v
|
150
|
+
| vs\.? # note: allow optional dot eg. vs.
|
151
|
+
)
|
152
|
+
\s+
|
153
|
+
/ix, '$$' )
|
154
|
+
|
155
|
+
values = line.split( '$$' )
|
156
|
+
values = values.map { |value| value.strip } ## strip spaces
|
157
|
+
values = values.select { |value| !value.empty? } ## remove empty strings
|
158
|
+
|
159
|
+
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
160
|
+
|
161
|
+
if values.size == 1
|
162
|
+
puts "(auto config) try matching teams separated by spaces (2+):"
|
163
|
+
pp values
|
164
|
+
|
165
|
+
values = values[0].split( /[ ]{2,}/ )
|
166
|
+
pp values
|
167
|
+
end
|
168
|
+
|
169
|
+
return false if values.size != 2
|
170
|
+
|
171
|
+
puts "(auto config) try matching teams:"
|
172
|
+
pp values
|
173
|
+
|
174
|
+
@teams[ values[0] ] += 1 ## update usage counters
|
175
|
+
@teams[ values[1] ] += 1
|
176
|
+
|
177
|
+
@last_round[ :match_count ] += 1 if @last_round
|
178
|
+
@last_group[ :match_count ] += 1 if @last_group
|
179
|
+
|
180
|
+
true
|
181
|
+
end
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
def find_score!( line )
|
186
|
+
# note: always call after find_dates !!!
|
187
|
+
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
188
|
+
# -- note: score might have two digits too
|
189
|
+
ScoreFormats.find!( line )
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_date!( line, start: )
|
193
|
+
## NB: lets us pass in start_at/end_at date (for event)
|
194
|
+
# for auto-complete year
|
195
|
+
|
196
|
+
# extract date from line
|
197
|
+
# and return it
|
198
|
+
# NB: side effect - removes date from line string
|
199
|
+
DateFormats.find!( line, start: start )
|
200
|
+
end
|
201
|
+
end # class AutoConfParser
|
202
|
+
end # module SportDb
|
@@ -0,0 +1,84 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module NameHelper
|
4
|
+
|
5
|
+
|
6
|
+
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
+
## for marking missing (to be filled in) years
|
8
|
+
## e.g. (1887-1911), (-2013),
|
9
|
+
## (1946-2001, 2013-) etc.
|
10
|
+
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
+
YEAR_RE = %r{\(
|
12
|
+
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
+
\)}x
|
14
|
+
|
15
|
+
def strip_year( name )
|
16
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
+
## (1946-2001, 2013-) etc.
|
18
|
+
## todo/check: only sub once (not global) - why? why not?
|
19
|
+
name.gsub( YEAR_RE, '' ).strip
|
20
|
+
end
|
21
|
+
|
22
|
+
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
+
|
24
|
+
|
25
|
+
LANG_RE = %r{\[
|
26
|
+
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
+
\]}x
|
28
|
+
def strip_lang( name )
|
29
|
+
name.gsub( LANG_RE, '' ).strip
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
+
|
34
|
+
|
35
|
+
def sanitize( name )
|
36
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
+
## (1946-2001,2013-) etc.
|
38
|
+
name = strip_year( name )
|
39
|
+
## check lang codes e.g. [en], [fr], etc.
|
40
|
+
name = strip_lang( name )
|
41
|
+
name
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
## note: also add (),’,− etc. e.g.
|
46
|
+
## Estudiantes (LP) => Estudiantes LP
|
47
|
+
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
+
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
+
|
50
|
+
NORM_RE = %r{
|
51
|
+
[.'’º/()_−-]
|
52
|
+
}x # note: in [] dash (-) if last doesn't need to get escaped
|
53
|
+
## note: remove all dots (.), dash (-), ', º, /, etc.
|
54
|
+
# . U+002E (46) - FULL STOP
|
55
|
+
# ' U+0027 (39) - APOSTROPHE
|
56
|
+
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
57
|
+
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
58
|
+
# / U+002F (47) - SOLIDUS
|
59
|
+
# ( U+0028 (40) - LEFT PARENTHESIS
|
60
|
+
# ) U+0029 (41) - RIGHT PARENTHESIS
|
61
|
+
# − U+2212 (8722) - MINUS SIGN
|
62
|
+
# - U+002D (45) - HYPHEN-MINUS
|
63
|
+
|
64
|
+
## for norm(alizing) names
|
65
|
+
def strip_norm( name )
|
66
|
+
name.gsub( NORM_RE, '' )
|
67
|
+
end
|
68
|
+
|
69
|
+
def normalize( name )
|
70
|
+
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
71
|
+
name = strip_norm( name )
|
72
|
+
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
73
|
+
|
74
|
+
## todo/check: use our own downcase - why? why not?
|
75
|
+
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
76
|
+
name
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
def variants( name ) Variant.find( name ); end
|
81
|
+
|
82
|
+
end # module NameHelper
|
83
|
+
end # module SportDb
|
84
|
+
|
@@ -5,17 +5,45 @@ module SportDb
|
|
5
5
|
class OutlineReader
|
6
6
|
|
7
7
|
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
8
|
-
txt = File.open( path, 'r:utf-8' ).read
|
8
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
9
9
|
parse( txt )
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.parse( txt )
|
13
|
+
new( txt ).parse
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
## note: skip "decorative" only heading e.g. ========
|
21
|
+
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
22
|
+
HEADING_BLANK_RE = %r{\A
|
23
|
+
={1,}
|
24
|
+
\z}x
|
25
|
+
|
26
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
27
|
+
HEADING_RE = %r{\A
|
28
|
+
(?<marker>={1,}) ## 1. leading ======
|
29
|
+
[ ]*
|
30
|
+
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
31
|
+
[ ]*
|
32
|
+
=* ## 3. (optional) trailing ====
|
33
|
+
\z}x
|
34
|
+
|
35
|
+
def parse
|
13
36
|
outline=[] ## outline structure
|
37
|
+
start_para = true ## start new para(graph) on new text line?
|
14
38
|
|
15
|
-
txt.each_line do |line|
|
39
|
+
@txt.each_line do |line|
|
16
40
|
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
17
41
|
|
18
|
-
|
42
|
+
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
43
|
+
start_para = true
|
44
|
+
next
|
45
|
+
end
|
46
|
+
|
19
47
|
break if line == '__END__'
|
20
48
|
|
21
49
|
next if line.start_with?( '#' ) ## skip comments too
|
@@ -27,23 +55,33 @@ class OutlineReader
|
|
27
55
|
line = line.sub( /#.*/, '' ).strip
|
28
56
|
pp line
|
29
57
|
|
30
|
-
|
58
|
+
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
59
|
+
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
31
60
|
|
32
61
|
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
heading_level = $1.length ## count number of = for heading level
|
40
|
-
heading = $2.strip
|
62
|
+
if m=HEADING_RE.match( line )
|
63
|
+
start_para = true
|
64
|
+
|
65
|
+
heading_marker = m[:marker]
|
66
|
+
heading_level = m[:marker].length ## count number of = for heading level
|
67
|
+
heading = m[:text].strip
|
41
68
|
|
42
69
|
puts "heading #{heading_level} >#{heading}<"
|
43
70
|
outline << [:"h#{heading_level}", heading]
|
44
|
-
else
|
45
|
-
|
46
|
-
|
71
|
+
else ## assume it's a (plain/regular) text line
|
72
|
+
if start_para
|
73
|
+
outline << [:p, [line]]
|
74
|
+
start_para = false
|
75
|
+
else
|
76
|
+
node = outline[-1] ## get last entry
|
77
|
+
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
78
|
+
node[1] << line ## add line to p(aragraph)
|
79
|
+
else
|
80
|
+
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
81
|
+
pp node
|
82
|
+
exit 1
|
83
|
+
end
|
84
|
+
end
|
47
85
|
end
|
48
86
|
end
|
49
87
|
outline
|
@@ -1,160 +1,172 @@
|
|
1
|
-
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
##
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
class
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
end
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Package
|
4
|
+
|
5
|
+
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
6
|
+
\.conf\.txt$
|
7
|
+
}x
|
8
|
+
|
9
|
+
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
10
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
|
11
|
+
leagues\.txt$
|
12
|
+
}x
|
13
|
+
|
14
|
+
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
15
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
16
|
+
clubs\.txt$
|
17
|
+
}x
|
18
|
+
|
19
|
+
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
20
|
+
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
21
|
+
clubs\.wiki\.txt$
|
22
|
+
}x
|
23
|
+
|
24
|
+
CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
26
|
+
clubs\.props\.txt$
|
27
|
+
}x
|
28
|
+
|
29
|
+
## note: if pattern includes directory add here
|
30
|
+
## (otherwise move to more "generic" datafile) - why? why not?
|
31
|
+
MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
|
32
|
+
| \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
|
33
|
+
)
|
34
|
+
/[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
|
35
|
+
}x
|
36
|
+
|
37
|
+
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
38
|
+
|
39
|
+
def self.find( path, pattern )
|
40
|
+
datafiles = []
|
41
|
+
|
42
|
+
## check all txt files
|
43
|
+
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
44
|
+
candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
|
45
|
+
pp candidates
|
46
|
+
candidates.each do |candidate|
|
47
|
+
datafiles << candidate if pattern.match( candidate )
|
48
|
+
end
|
49
|
+
|
50
|
+
pp datafiles
|
51
|
+
datafiles
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
57
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
58
|
+
|
59
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
60
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
61
|
+
def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
|
62
|
+
|
63
|
+
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
64
|
+
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
65
|
+
|
66
|
+
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
67
|
+
def self.match_conf( path ) CONF_RE.match( path ); end
|
68
|
+
|
69
|
+
class << self
|
70
|
+
alias_method :match_clubs?, :match_clubs
|
71
|
+
alias_method :clubs?, :match_clubs
|
72
|
+
|
73
|
+
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
74
|
+
alias_method :clubs_wiki?, :match_clubs_wiki
|
75
|
+
|
76
|
+
alias_method :match_club_props?, :match_club_props
|
77
|
+
alias_method :club_props?, :match_club_props
|
78
|
+
|
79
|
+
alias_method :match_leagues?, :match_leagues
|
80
|
+
alias_method :leagues?, :match_leagues
|
81
|
+
|
82
|
+
alias_method :match_conf?, :match_conf
|
83
|
+
alias_method :conf?, :match_conf
|
84
|
+
end
|
85
|
+
|
86
|
+
|
87
|
+
## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
|
88
|
+
attr_accessor :include, :exclude
|
89
|
+
|
90
|
+
## private helpers - like select returns true for keeping and false for skipping entry
|
91
|
+
def filter_clause( filter, entry )
|
92
|
+
if filter.is_a?( String )
|
93
|
+
entry.name.index( filter ) ? true : false
|
94
|
+
elsif filter.is_a?( Regexp )
|
95
|
+
filter.match( entry.name ) ? true : false
|
96
|
+
else ## assume
|
97
|
+
## todo/check: pass in entry (and NOT entry.name) - why? why not?
|
98
|
+
filter.call( entry )
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def filter( entry )
|
103
|
+
if @include
|
104
|
+
if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
|
105
|
+
true ## todo/check: check for exclude here too - why? why not?
|
106
|
+
else
|
107
|
+
false
|
108
|
+
end
|
109
|
+
else
|
110
|
+
if @exclude && filter_clause( @exclude, entry )
|
111
|
+
false
|
112
|
+
else
|
113
|
+
true
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
|
119
|
+
def initialize( path_or_pack )
|
120
|
+
@include = nil
|
121
|
+
@exclude = nil
|
122
|
+
|
123
|
+
if path_or_pack.is_a?( Datafile::Package )
|
124
|
+
@pack = path_or_pack
|
125
|
+
else ## assume it's a (string) path
|
126
|
+
path = path_or_pack
|
127
|
+
if !File.exist?( path ) ## file or directory
|
128
|
+
puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
|
129
|
+
exit 1
|
130
|
+
end
|
131
|
+
|
132
|
+
if File.directory?( path )
|
133
|
+
@pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
|
134
|
+
elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
|
135
|
+
@pack = Datafile::ZipPackage.new( path )
|
136
|
+
else
|
137
|
+
puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
|
138
|
+
exit 1
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
def each( pattern:, &blk )
|
145
|
+
@pack.each( pattern: pattern ) do |entry|
|
146
|
+
next unless filter( entry ) ## lets you use include/exclude filters
|
147
|
+
blk.call( entry )
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
|
152
|
+
def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
|
153
|
+
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
154
|
+
|
155
|
+
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
156
|
+
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
157
|
+
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
158
|
+
|
159
|
+
## return all match datafile entries
|
160
|
+
def match() ary=[]; each_match {|entry| ary << entry }; ary; end
|
161
|
+
alias_method :matches, :match
|
162
|
+
end # class Package
|
163
|
+
|
164
|
+
|
165
|
+
class DirPackage < Package
|
166
|
+
def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
|
167
|
+
end
|
168
|
+
|
169
|
+
class ZipPackage < Package
|
170
|
+
def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
|
171
|
+
end
|
172
|
+
end # module SportDb
|