sportdb-formats 0.4.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,202 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+
6
+ class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
+
8
+ def self.parse( lines, start: )
9
+ ## todo/fix: add support for txt and lines
10
+ ## check if lines_or_txt is an array or just a string
11
+ parser = new( lines, start )
12
+ parser.parse
13
+ end
14
+
15
+
16
+ include Logging ## e.g. logger#debug, logger#info, etc.
17
+ include ParserHelper ## e.g. read_lines, etc.
18
+
19
+
20
+ def initialize( lines, start )
21
+ # for convenience split string into lines
22
+ ## note: removes/strips empty lines
23
+ ## todo/check: change to text instead of array of lines - why? why not?
24
+ @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
+ @start = start
26
+ end
27
+
28
+ def parse
29
+ ## try to find all teams in match schedule
30
+ @last_round = nil
31
+ @last_group = nil
32
+
33
+ ## definitions/defs
34
+ @round_defs = Hash.new(0)
35
+ @group_defs = Hash.new(0)
36
+
37
+ ## usage/refs
38
+ @rounds = {} ## track usage counter and match (two teams) counter
39
+ @groups = {} ## -"-
40
+ @teams = Hash.new(0) ## keep track of usage counter
41
+
42
+ @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
+
44
+
45
+ @lines.each do |line|
46
+ if is_goals?( line )
47
+ logger.debug "skipping matched goals line: >#{line}<"
48
+ elsif is_round_def?( line )
49
+ ## todo/fix: add round definition (w begin n end date)
50
+ ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
+ ## -- how to deal with matches that get rescheduled/postponed?
52
+ logger.debug "skipping matched round def line: >#{line}<"
53
+ @round_defs[ line ] += 1
54
+ elsif is_round?( line )
55
+ logger.debug "skipping matched round line: >#{line}<"
56
+
57
+ round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
+ round[:count] +=1
59
+ @last_round = round
60
+ elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
+ ### todo: add pipe (|) marker (required)
62
+ logger.debug "skipping matched group def line: >#{line}<"
63
+ @group_defs[ line ] += 1
64
+ elsif is_group?( line )
65
+ ## -- lets you set group e.g. Group A etc.
66
+ logger.debug "skipping matched group line: >#{line}<"
67
+
68
+ group = @groups[ line ] ||= {count: 0, match_count: 0}
69
+ group[:count] +=1
70
+ @last_group = group
71
+ ## todo/fix: parse group line!!!
72
+ elsif try_parse_game( line )
73
+ # do nothing here
74
+ else
75
+ logger.warn "skipping line (no match found): >#{line}<"
76
+ @warns << line
77
+ end
78
+ end # lines.each
79
+
80
+ [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
+ end
82
+
83
+
84
+ def try_parse_game( line )
85
+ # note: clone line; for possible test do NOT modify in place for now
86
+ # note: returns true if parsed, false if no match
87
+ parse_game( line.dup )
88
+ end
89
+
90
+ def parse_game( line )
91
+ logger.debug "parsing game (fixture) line: >#{line}<"
92
+
93
+ ## remove all protected text runs e.g. []
94
+ ## fix: add [ to end-of-line too
95
+ ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
+
97
+ line = line.gsub( /\[
98
+ [^\]]+?
99
+ \]/x, '' ).strip
100
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
101
+
102
+
103
+ ## split by geo (@) - remove for now
104
+ values = line.split( '@' )
105
+ line = values[0]
106
+
107
+
108
+ ## try find date
109
+ date = find_date!( line, start: @start )
110
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
+ line = line.sub( /\[
112
+ [^\]]+?
113
+ \]/x, '' ).strip
114
+
115
+ else
116
+ ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
+ ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
+ line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
+ [012]?[0-9]
120
+ [.:hH]
121
+ [0-9][0-9]
122
+ (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
+ }x, '' ).strip
124
+ end
125
+
126
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
127
+
128
+
129
+ score = find_score!( line )
130
+
131
+ logger.debug " line: >#{line}<"
132
+
133
+ line = line.sub( /\[
134
+ [^\]]+?
135
+ \]/x, '$$' ) # note: replace first score tag with $$
136
+ line = line.gsub( /\[
137
+ [^\]]+?
138
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
139
+
140
+ ## clean-up remove all text run inside () or empty () too
141
+ line = line.gsub( /\(
142
+ [^)]*?
143
+ \)/x, '' )
144
+
145
+
146
+ ## check for more match separators e.g. - or vs for now
147
+ line = line.sub( / \s+
148
+ ( -
149
+ | v
150
+ | vs\.? # note: allow optional dot eg. vs.
151
+ )
152
+ \s+
153
+ /ix, '$$' )
154
+
155
+ values = line.split( '$$' )
156
+ values = values.map { |value| value.strip } ## strip spaces
157
+ values = values.select { |value| !value.empty? } ## remove empty strings
158
+
159
+ return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
+
161
+ if values.size == 1
162
+ puts "(auto config) try matching teams separated by spaces (2+):"
163
+ pp values
164
+
165
+ values = values[0].split( /[ ]{2,}/ )
166
+ pp values
167
+ end
168
+
169
+ return false if values.size != 2
170
+
171
+ puts "(auto config) try matching teams:"
172
+ pp values
173
+
174
+ @teams[ values[0] ] += 1 ## update usage counters
175
+ @teams[ values[1] ] += 1
176
+
177
+ @last_round[ :match_count ] += 1 if @last_round
178
+ @last_group[ :match_count ] += 1 if @last_group
179
+
180
+ true
181
+ end
182
+
183
+
184
+
185
+ def find_score!( line )
186
+ # note: always call after find_dates !!!
187
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
+ # -- note: score might have two digits too
189
+ ScoreFormats.find!( line )
190
+ end
191
+
192
+ def find_date!( line, start: )
193
+ ## NB: lets us pass in start_at/end_at date (for event)
194
+ # for auto-complete year
195
+
196
+ # extract date from line
197
+ # and return it
198
+ # NB: side effect - removes date from line string
199
+ DateFormats.find!( line, start: start )
200
+ end
201
+ end # class AutoConfParser
202
+ end # module SportDb
@@ -0,0 +1,84 @@
1
+
2
+ module SportDb
3
+ module NameHelper
4
+
5
+
6
+ ## note: allow placeholder years to e.g. (-___) or (-????)
7
+ ## for marking missing (to be filled in) years
8
+ ## e.g. (1887-1911), (-2013),
9
+ ## (1946-2001, 2013-) etc.
10
+ ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
+ YEAR_RE = %r{\(
12
+ [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
+ \)}x
14
+
15
+ def strip_year( name )
16
+ ## check for year(s) e.g. (1887-1911), (-2013),
17
+ ## (1946-2001, 2013-) etc.
18
+ ## todo/check: only sub once (not global) - why? why not?
19
+ name.gsub( YEAR_RE, '' ).strip
20
+ end
21
+
22
+ def has_year?( name ) name =~ YEAR_RE; end
23
+
24
+
25
+ LANG_RE = %r{\[
26
+ [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
+ \]}x
28
+ def strip_lang( name )
29
+ name.gsub( LANG_RE, '' ).strip
30
+ end
31
+
32
+ def has_lang?( name ) name =~ LANG_RE; end
33
+
34
+
35
+ def sanitize( name )
36
+ ## check for year(s) e.g. (1887-1911), (-2013),
37
+ ## (1946-2001,2013-) etc.
38
+ name = strip_year( name )
39
+ ## check lang codes e.g. [en], [fr], etc.
40
+ name = strip_lang( name )
41
+ name
42
+ end
43
+
44
+
45
+ ## note: also add (),’,− etc. e.g.
46
+ ## Estudiantes (LP) => Estudiantes LP
47
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+
50
+ NORM_RE = %r{
51
+ [.'’º/()_−-]
52
+ }x # note: in [] dash (-) if last doesn't need to get escaped
53
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
54
+ # . U+002E (46) - FULL STOP
55
+ # ' U+0027 (39) - APOSTROPHE
56
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
57
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
58
+ # / U+002F (47) - SOLIDUS
59
+ # ( U+0028 (40) - LEFT PARENTHESIS
60
+ # ) U+0029 (41) - RIGHT PARENTHESIS
61
+ # − U+2212 (8722) - MINUS SIGN
62
+ # - U+002D (45) - HYPHEN-MINUS
63
+
64
+ ## for norm(alizing) names
65
+ def strip_norm( name )
66
+ name.gsub( NORM_RE, '' )
67
+ end
68
+
69
+ def normalize( name )
70
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
71
+ name = strip_norm( name )
72
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
73
+
74
+ ## todo/check: use our own downcase - why? why not?
75
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
76
+ name
77
+ end
78
+
79
+
80
+ def variants( name ) Variant.find( name ); end
81
+
82
+ end # module NameHelper
83
+ end # module SportDb
84
+
@@ -5,17 +5,45 @@ module SportDb
5
5
  class OutlineReader
6
6
 
7
7
  def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
8
- txt = File.open( path, 'r:utf-8' ).read
8
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read }
9
9
  parse( txt )
10
10
  end
11
11
 
12
12
  def self.parse( txt )
13
+ new( txt ).parse
14
+ end
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ ## note: skip "decorative" only heading e.g. ========
21
+ ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
22
+ HEADING_BLANK_RE = %r{\A
23
+ ={1,}
24
+ \z}x
25
+
26
+ ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
27
+ HEADING_RE = %r{\A
28
+ (?<marker>={1,}) ## 1. leading ======
29
+ [ ]*
30
+ (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
31
+ [ ]*
32
+ =* ## 3. (optional) trailing ====
33
+ \z}x
34
+
35
+ def parse
13
36
  outline=[] ## outline structure
37
+ start_para = true ## start new para(graph) on new text line?
14
38
 
15
- txt.each_line do |line|
39
+ @txt.each_line do |line|
16
40
  line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
17
41
 
18
- next if line.empty? ## todo/fix: keep blank line nodes e.g. just remove comments and process headings?! - why? why not?
42
+ if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
43
+ start_para = true
44
+ next
45
+ end
46
+
19
47
  break if line == '__END__'
20
48
 
21
49
  next if line.start_with?( '#' ) ## skip comments too
@@ -27,23 +55,33 @@ class OutlineReader
27
55
  line = line.sub( /#.*/, '' ).strip
28
56
  pp line
29
57
 
30
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
58
+ ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
59
+ next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
31
60
 
32
61
  ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
34
- if line =~ /^(={1,}) ## leading ======
35
- ([^=]+?) ## text (note: for now no "inline" = allowed)
36
- =* ## (optional) trailing ====
37
- $/x
38
- heading_marker = $1
39
- heading_level = $1.length ## count number of = for heading level
40
- heading = $2.strip
62
+ if m=HEADING_RE.match( line )
63
+ start_para = true
64
+
65
+ heading_marker = m[:marker]
66
+ heading_level = m[:marker].length ## count number of = for heading level
67
+ heading = m[:text].strip
41
68
 
42
69
  puts "heading #{heading_level} >#{heading}<"
43
70
  outline << [:"h#{heading_level}", heading]
44
- else
45
- ## assume it's a (plain/regular) text line
46
- outline << [:l, line]
71
+ else ## assume it's a (plain/regular) text line
72
+ if start_para
73
+ outline << [:p, [line]]
74
+ start_para = false
75
+ else
76
+ node = outline[-1] ## get last entry
77
+ if node[0] == :p ## assert it's a p(aragraph) node!!!
78
+ node[1] << line ## add line to p(aragraph)
79
+ else
80
+ puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
81
+ pp node
82
+ exit 1
83
+ end
84
+ end
47
85
  end
48
86
  end
49
87
  outline
@@ -1,160 +1,172 @@
1
-
2
- module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
3
-
4
-
5
- ZIP_RE = %r{ \.zip$
6
- }x
7
- def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
8
-
9
-
10
-
11
- ## exclude pattern
12
- ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
13
- ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
14
- EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
15
- \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
16
- /
17
- }x
18
- def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
19
-
20
-
21
-
22
- class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
23
-
24
- class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
25
- class Entry
26
- def initialize( pack, path )
27
- @pack = pack ## parent package
28
- @path = path
29
- ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
30
- @name = path
31
- end
32
- def name() @name; end
33
- def read() File.open( @path, 'r:utf-8' ).read; end
34
- end # class DirPackage::Entry
35
-
36
-
37
- attr_reader :name, :path
38
-
39
- def initialize( path )
40
- ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
- @path = path ## rename to root_path or base_path or somehting - why? why not?
42
-
43
- basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
- @name = basename
45
- end
46
-
47
- def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
- ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
- Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
- ## todo/fix: (auto) skip and check for directories
52
- if EXCLUDE_RE.match( path )
53
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
- elsif pattern.match( path )
55
- yield( Entry.new( self, path ))
56
- else
57
- ## puts " skipping >#{path}<"
58
- end
59
- end
60
- end
61
-
62
- def find( name )
63
- Entry.new( self, "#{@path}/#{name}" )
64
- end
65
- end # class DirPackage
66
-
67
-
68
- ## helper wrapper for datafiles in zips
69
- class ZipPackage < Package
70
- class Entry
71
- def initialize( pack, entry )
72
- @pack = pack
73
- @entry = entry
74
- end
75
-
76
- def name() @entry.name; end
77
- def read
78
- txt = @entry.get_input_stream.read
79
- ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
80
- txt = txt.force_encoding( Encoding::UTF_8 )
81
- txt
82
- end
83
- end # class ZipPackage::Entry
84
-
85
- attr_reader :name, :path
86
-
87
- def initialize( path )
88
- @path = path
89
-
90
- extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
91
- basename = File.basename( path, extname )
92
- @name = basename
93
- end
94
-
95
- def each( pattern: )
96
- Zip::File.open( @path ) do |zipfile|
97
- zipfile.each do |entry|
98
- if entry.directory?
99
- next ## skip
100
- elsif entry.file?
101
- if EXCLUDE_RE.match( entry.name )
102
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
103
- elsif pattern.match( entry.name )
104
- yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
105
- else
106
- ## puts " skipping >#{entry.name}<"
107
- end
108
- else
109
- puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
110
- exit 1
111
- end
112
- end
113
- end
114
- end
115
-
116
- def find( name )
117
- entries = match_entry( name )
118
- if entries.empty?
119
- puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
120
- exit 1
121
- elsif entries.size > 1
122
- puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
123
- pp entries
124
- exit 1
125
- else
126
- Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
127
- end
128
- end
129
-
130
- private
131
- def match_entry( name )
132
- ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
133
-
134
- pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
135
- $
136
- }x
137
-
138
- entries = []
139
- Zip::File.open( @path ) do |zipfile|
140
- zipfile.each do |entry|
141
- if entry.directory?
142
- next ## skip
143
- elsif entry.file?
144
- if EXCLUDE_RE.match( entry.name )
145
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
146
- elsif pattern.match( entry.name )
147
- entries << entry
148
- else
149
- ## no match; skip too
150
- end
151
- else
152
- puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
153
- exit 1
154
- end
155
- end
156
- end
157
- entries
158
- end
159
- end # class ZipPackage
160
- end # module Datafile
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
6
+ \.conf\.txt$
7
+ }x
8
+
9
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
11
+ leagues\.txt$
12
+ }x
13
+
14
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
15
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
16
+ clubs\.txt$
17
+ }x
18
+
19
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
20
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
21
+ clubs\.wiki\.txt$
22
+ }x
23
+
24
+ CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
26
+ clubs\.props\.txt$
27
+ }x
28
+
29
+ ## note: if pattern includes directory add here
30
+ ## (otherwise move to more "generic" datafile) - why? why not?
31
+ MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
32
+ | \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
33
+ )
34
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
35
+ }x
36
+
37
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
38
+
39
+ def self.find( path, pattern )
40
+ datafiles = []
41
+
42
+ ## check all txt files
43
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
44
+ candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
45
+ pp candidates
46
+ candidates.each do |candidate|
47
+ datafiles << candidate if pattern.match( candidate )
48
+ end
49
+
50
+ pp datafiles
51
+ datafiles
52
+ end
53
+
54
+
55
+
56
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
57
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
58
+
59
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
60
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
61
+ def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
62
+
63
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
64
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
65
+
66
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
67
+ def self.match_conf( path ) CONF_RE.match( path ); end
68
+
69
+ class << self
70
+ alias_method :match_clubs?, :match_clubs
71
+ alias_method :clubs?, :match_clubs
72
+
73
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
74
+ alias_method :clubs_wiki?, :match_clubs_wiki
75
+
76
+ alias_method :match_club_props?, :match_club_props
77
+ alias_method :club_props?, :match_club_props
78
+
79
+ alias_method :match_leagues?, :match_leagues
80
+ alias_method :leagues?, :match_leagues
81
+
82
+ alias_method :match_conf?, :match_conf
83
+ alias_method :conf?, :match_conf
84
+ end
85
+
86
+
87
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
88
+ attr_accessor :include, :exclude
89
+
90
+ ## private helpers - like select returns true for keeping and false for skipping entry
91
+ def filter_clause( filter, entry )
92
+ if filter.is_a?( String )
93
+ entry.name.index( filter ) ? true : false
94
+ elsif filter.is_a?( Regexp )
95
+ filter.match( entry.name ) ? true : false
96
+ else ## assume
97
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
98
+ filter.call( entry )
99
+ end
100
+ end
101
+
102
+ def filter( entry )
103
+ if @include
104
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
105
+ true ## todo/check: check for exclude here too - why? why not?
106
+ else
107
+ false
108
+ end
109
+ else
110
+ if @exclude && filter_clause( @exclude, entry )
111
+ false
112
+ else
113
+ true
114
+ end
115
+ end
116
+ end
117
+
118
+
119
+ def initialize( path_or_pack )
120
+ @include = nil
121
+ @exclude = nil
122
+
123
+ if path_or_pack.is_a?( Datafile::Package )
124
+ @pack = path_or_pack
125
+ else ## assume it's a (string) path
126
+ path = path_or_pack
127
+ if !File.exist?( path ) ## file or directory
128
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
129
+ exit 1
130
+ end
131
+
132
+ if File.directory?( path )
133
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
134
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
135
+ @pack = Datafile::ZipPackage.new( path )
136
+ else
137
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
138
+ exit 1
139
+ end
140
+ end
141
+ end
142
+
143
+
144
+ def each( pattern:, &blk )
145
+ @pack.each( pattern: pattern ) do |entry|
146
+ next unless filter( entry ) ## lets you use include/exclude filters
147
+ blk.call( entry )
148
+ end
149
+ end
150
+
151
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
152
+ def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
153
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
154
+
155
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
156
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
157
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
158
+
159
+ ## return all match datafile entries
160
+ def match() ary=[]; each_match {|entry| ary << entry }; ary; end
161
+ alias_method :matches, :match
162
+ end # class Package
163
+
164
+
165
+ class DirPackage < Package
166
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
167
+ end
168
+
169
+ class ZipPackage < Package
170
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
171
+ end
172
+ end # module SportDb