sportdb-formats 0.4.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,202 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+
6
+ class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
+
8
+ def self.parse( lines, start: )
9
+ ## todo/fix: add support for txt and lines
10
+ ## check if lines_or_txt is an array or just a string
11
+ parser = new( lines, start )
12
+ parser.parse
13
+ end
14
+
15
+
16
+ include Logging ## e.g. logger#debug, logger#info, etc.
17
+ include ParserHelper ## e.g. read_lines, etc.
18
+
19
+
20
+ def initialize( lines, start )
21
+ # for convenience split string into lines
22
+ ## note: removes/strips empty lines
23
+ ## todo/check: change to text instead of array of lines - why? why not?
24
+ @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
+ @start = start
26
+ end
27
+
28
+ def parse
29
+ ## try to find all teams in match schedule
30
+ @last_round = nil
31
+ @last_group = nil
32
+
33
+ ## definitions/defs
34
+ @round_defs = Hash.new(0)
35
+ @group_defs = Hash.new(0)
36
+
37
+ ## usage/refs
38
+ @rounds = {} ## track usage counter and match (two teams) counter
39
+ @groups = {} ## -"-
40
+ @teams = Hash.new(0) ## keep track of usage counter
41
+
42
+ @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
+
44
+
45
+ @lines.each do |line|
46
+ if is_goals?( line )
47
+ logger.debug "skipping matched goals line: >#{line}<"
48
+ elsif is_round_def?( line )
49
+ ## todo/fix: add round definition (w begin n end date)
50
+ ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
+ ## -- how to deal with matches that get rescheduled/postponed?
52
+ logger.debug "skipping matched round def line: >#{line}<"
53
+ @round_defs[ line ] += 1
54
+ elsif is_round?( line )
55
+ logger.debug "skipping matched round line: >#{line}<"
56
+
57
+ round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
+ round[:count] +=1
59
+ @last_round = round
60
+ elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
+ ### todo: add pipe (|) marker (required)
62
+ logger.debug "skipping matched group def line: >#{line}<"
63
+ @group_defs[ line ] += 1
64
+ elsif is_group?( line )
65
+ ## -- lets you set group e.g. Group A etc.
66
+ logger.debug "skipping matched group line: >#{line}<"
67
+
68
+ group = @groups[ line ] ||= {count: 0, match_count: 0}
69
+ group[:count] +=1
70
+ @last_group = group
71
+ ## todo/fix: parse group line!!!
72
+ elsif try_parse_game( line )
73
+ # do nothing here
74
+ else
75
+ logger.warn "skipping line (no match found): >#{line}<"
76
+ @warns << line
77
+ end
78
+ end # lines.each
79
+
80
+ [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
+ end
82
+
83
+
84
+ def try_parse_game( line )
85
+ # note: clone line; for possible test do NOT modify in place for now
86
+ # note: returns true if parsed, false if no match
87
+ parse_game( line.dup )
88
+ end
89
+
90
+ def parse_game( line )
91
+ logger.debug "parsing game (fixture) line: >#{line}<"
92
+
93
+ ## remove all protected text runs e.g. []
94
+ ## fix: add [ to end-of-line too
95
+ ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
+
97
+ line = line.gsub( /\[
98
+ [^\]]+?
99
+ \]/x, '' ).strip
100
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
101
+
102
+
103
+ ## split by geo (@) - remove for now
104
+ values = line.split( '@' )
105
+ line = values[0]
106
+
107
+
108
+ ## try find date
109
+ date = find_date!( line, start: @start )
110
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
+ line = line.sub( /\[
112
+ [^\]]+?
113
+ \]/x, '' ).strip
114
+
115
+ else
116
+ ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
+ ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
+ line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
+ [012]?[0-9]
120
+ [.:hH]
121
+ [0-9][0-9]
122
+ (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
+ }x, '' ).strip
124
+ end
125
+
126
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
127
+
128
+
129
+ score = find_score!( line )
130
+
131
+ logger.debug " line: >#{line}<"
132
+
133
+ line = line.sub( /\[
134
+ [^\]]+?
135
+ \]/x, '$$' ) # note: replace first score tag with $$
136
+ line = line.gsub( /\[
137
+ [^\]]+?
138
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
139
+
140
+ ## clean-up remove all text run inside () or empty () too
141
+ line = line.gsub( /\(
142
+ [^)]*?
143
+ \)/x, '' )
144
+
145
+
146
+ ## check for more match separators e.g. - or vs for now
147
+ line = line.sub( / \s+
148
+ ( -
149
+ | v
150
+ | vs\.? # note: allow optional dot eg. vs.
151
+ )
152
+ \s+
153
+ /ix, '$$' )
154
+
155
+ values = line.split( '$$' )
156
+ values = values.map { |value| value.strip } ## strip spaces
157
+ values = values.select { |value| !value.empty? } ## remove empty strings
158
+
159
+ return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
+
161
+ if values.size == 1
162
+ puts "(auto config) try matching teams separated by spaces (2+):"
163
+ pp values
164
+
165
+ values = values[0].split( /[ ]{2,}/ )
166
+ pp values
167
+ end
168
+
169
+ return false if values.size != 2
170
+
171
+ puts "(auto config) try matching teams:"
172
+ pp values
173
+
174
+ @teams[ values[0] ] += 1 ## update usage counters
175
+ @teams[ values[1] ] += 1
176
+
177
+ @last_round[ :match_count ] += 1 if @last_round
178
+ @last_group[ :match_count ] += 1 if @last_group
179
+
180
+ true
181
+ end
182
+
183
+
184
+
185
+ def find_score!( line )
186
+ # note: always call after find_dates !!!
187
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
+ # -- note: score might have two digits too
189
+ ScoreFormats.find!( line )
190
+ end
191
+
192
+ def find_date!( line, start: )
193
+ ## NB: lets us pass in start_at/end_at date (for event)
194
+ # for auto-complete year
195
+
196
+ # extract date from line
197
+ # and return it
198
+ # NB: side effect - removes date from line string
199
+ DateFormats.find!( line, start: start )
200
+ end
201
+ end # class AutoConfParser
202
+ end # module SportDb
@@ -0,0 +1,84 @@
1
+
2
+ module SportDb
3
+ module NameHelper
4
+
5
+
6
+ ## note: allow placeholder years to e.g. (-___) or (-????)
7
+ ## for marking missing (to be filled in) years
8
+ ## e.g. (1887-1911), (-2013),
9
+ ## (1946-2001, 2013-) etc.
10
+ ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
+ YEAR_RE = %r{\(
12
+ [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
+ \)}x
14
+
15
+ def strip_year( name )
16
+ ## check for year(s) e.g. (1887-1911), (-2013),
17
+ ## (1946-2001, 2013-) etc.
18
+ ## todo/check: only sub once (not global) - why? why not?
19
+ name.gsub( YEAR_RE, '' ).strip
20
+ end
21
+
22
+ def has_year?( name ) name =~ YEAR_RE; end
23
+
24
+
25
+ LANG_RE = %r{\[
26
+ [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
+ \]}x
28
+ def strip_lang( name )
29
+ name.gsub( LANG_RE, '' ).strip
30
+ end
31
+
32
+ def has_lang?( name ) name =~ LANG_RE; end
33
+
34
+
35
+ def sanitize( name )
36
+ ## check for year(s) e.g. (1887-1911), (-2013),
37
+ ## (1946-2001,2013-) etc.
38
+ name = strip_year( name )
39
+ ## check lang codes e.g. [en], [fr], etc.
40
+ name = strip_lang( name )
41
+ name
42
+ end
43
+
44
+
45
+ ## note: also add (),’,− etc. e.g.
46
+ ## Estudiantes (LP) => Estudiantes LP
47
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+
50
+ NORM_RE = %r{
51
+ [.'’º/()_−-]
52
+ }x # note: in [] dash (-) if last doesn't need to get escaped
53
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
54
+ # . U+002E (46) - FULL STOP
55
+ # ' U+0027 (39) - APOSTROPHE
56
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
57
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
58
+ # / U+002F (47) - SOLIDUS
59
+ # ( U+0028 (40) - LEFT PARENTHESIS
60
+ # ) U+0029 (41) - RIGHT PARENTHESIS
61
+ # − U+2212 (8722) - MINUS SIGN
62
+ # - U+002D (45) - HYPHEN-MINUS
63
+
64
+ ## for norm(alizing) names
65
+ def strip_norm( name )
66
+ name.gsub( NORM_RE, '' )
67
+ end
68
+
69
+ def normalize( name )
70
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
71
+ name = strip_norm( name )
72
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
73
+
74
+ ## todo/check: use our own downcase - why? why not?
75
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
76
+ name
77
+ end
78
+
79
+
80
+ def variants( name ) Variant.find( name ); end
81
+
82
+ end # module NameHelper
83
+ end # module SportDb
84
+
@@ -5,17 +5,45 @@ module SportDb
5
5
  class OutlineReader
6
6
 
7
7
  def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
8
- txt = File.open( path, 'r:utf-8' ).read
8
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read }
9
9
  parse( txt )
10
10
  end
11
11
 
12
12
  def self.parse( txt )
13
+ new( txt ).parse
14
+ end
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ ## note: skip "decorative" only heading e.g. ========
21
+ ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
22
+ HEADING_BLANK_RE = %r{\A
23
+ ={1,}
24
+ \z}x
25
+
26
+ ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
27
+ HEADING_RE = %r{\A
28
+ (?<marker>={1,}) ## 1. leading ======
29
+ [ ]*
30
+ (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
31
+ [ ]*
32
+ =* ## 3. (optional) trailing ====
33
+ \z}x
34
+
35
+ def parse
13
36
  outline=[] ## outline structure
37
+ start_para = true ## start new para(graph) on new text line?
14
38
 
15
- txt.each_line do |line|
39
+ @txt.each_line do |line|
16
40
  line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
17
41
 
18
- next if line.empty? ## todo/fix: keep blank line nodes e.g. just remove comments and process headings?! - why? why not?
42
+ if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
43
+ start_para = true
44
+ next
45
+ end
46
+
19
47
  break if line == '__END__'
20
48
 
21
49
  next if line.start_with?( '#' ) ## skip comments too
@@ -27,23 +55,33 @@ class OutlineReader
27
55
  line = line.sub( /#.*/, '' ).strip
28
56
  pp line
29
57
 
30
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
58
+ ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
59
+ next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
31
60
 
32
61
  ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
33
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
34
- if line =~ /^(={1,}) ## leading ======
35
- ([^=]+?) ## text (note: for now no "inline" = allowed)
36
- =* ## (optional) trailing ====
37
- $/x
38
- heading_marker = $1
39
- heading_level = $1.length ## count number of = for heading level
40
- heading = $2.strip
62
+ if m=HEADING_RE.match( line )
63
+ start_para = true
64
+
65
+ heading_marker = m[:marker]
66
+ heading_level = m[:marker].length ## count number of = for heading level
67
+ heading = m[:text].strip
41
68
 
42
69
  puts "heading #{heading_level} >#{heading}<"
43
70
  outline << [:"h#{heading_level}", heading]
44
- else
45
- ## assume it's a (plain/regular) text line
46
- outline << [:l, line]
71
+ else ## assume it's a (plain/regular) text line
72
+ if start_para
73
+ outline << [:p, [line]]
74
+ start_para = false
75
+ else
76
+ node = outline[-1] ## get last entry
77
+ if node[0] == :p ## assert it's a p(aragraph) node!!!
78
+ node[1] << line ## add line to p(aragraph)
79
+ else
80
+ puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
81
+ pp node
82
+ exit 1
83
+ end
84
+ end
47
85
  end
48
86
  end
49
87
  outline
@@ -1,160 +1,172 @@
1
-
2
- module Datafile # note: keep Datafile in its own top-level module/namespace for now - why? why not?
3
-
4
-
5
- ZIP_RE = %r{ \.zip$
6
- }x
7
- def self.match_zip( path, pattern: ZIP_RE ) pattern.match( path ); end
8
-
9
-
10
-
11
- ## exclude pattern
12
- ## for now exclude all files in directories starting with a dot (e.g. .git/ or .github/ or .build/ etc.)
13
- ## todo/check: rename to EXCLUDE_DOT_DIRS_RE - why? why not?
14
- EXCLUDE_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
15
- \.[a-zA-Z0-9_-]+ ## (almost) any name BUT must start with dot e.g. .git, .build, etc.
16
- /
17
- }x
18
- def self.match_exclude( path, pattern: EXCLUDE_RE ) pattern.match( path ); end
19
-
20
-
21
-
22
- class Package; end ## use a shared base class for DirPackage, ZipPackage, etc.
23
-
24
- class DirPackage < Package ## todo/check: find a better name e.g. UnzippedPackage, FilesystemPackage, etc. - why? why not?
25
- class Entry
26
- def initialize( pack, path )
27
- @pack = pack ## parent package
28
- @path = path
29
- ## todo/fix!!!!: calculate @name (cut-off pack.path!!!)
30
- @name = path
31
- end
32
- def name() @name; end
33
- def read() File.open( @path, 'r:utf-8' ).read; end
34
- end # class DirPackage::Entry
35
-
36
-
37
- attr_reader :name, :path
38
-
39
- def initialize( path )
40
- ## todo/fix: expand_path ?! - why? why not? if you pass in ./ basename will be . and NOT directory name, for example!!!
41
- @path = path ## rename to root_path or base_path or somehting - why? why not?
42
-
43
- basename = File.basename( path ) ## note: ALWAYS keeps "extension"-like name if present (e.g. ./austria.zip => austria.zip)
44
- @name = basename
45
- end
46
-
47
- def each( pattern:, extension: 'txt' ) ## todo/check: rename to glob or something - why? why not?
48
- ## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
49
- ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
50
- Dir.glob( "#{@path}/**/{*,.*}.#{extension}" ).each do |path|
51
- ## todo/fix: (auto) skip and check for directories
52
- if EXCLUDE_RE.match( path )
53
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
54
- elsif pattern.match( path )
55
- yield( Entry.new( self, path ))
56
- else
57
- ## puts " skipping >#{path}<"
58
- end
59
- end
60
- end
61
-
62
- def find( name )
63
- Entry.new( self, "#{@path}/#{name}" )
64
- end
65
- end # class DirPackage
66
-
67
-
68
- ## helper wrapper for datafiles in zips
69
- class ZipPackage < Package
70
- class Entry
71
- def initialize( pack, entry )
72
- @pack = pack
73
- @entry = entry
74
- end
75
-
76
- def name() @entry.name; end
77
- def read
78
- txt = @entry.get_input_stream.read
79
- ## puts "** encoding: #{txt.encoding}" #=> encoding: ASCII-8BIT
80
- txt = txt.force_encoding( Encoding::UTF_8 )
81
- txt
82
- end
83
- end # class ZipPackage::Entry
84
-
85
- attr_reader :name, :path
86
-
87
- def initialize( path )
88
- @path = path
89
-
90
- extname = File.extname( path ) ## todo/check: double check if extension is .zip - why? why not?
91
- basename = File.basename( path, extname )
92
- @name = basename
93
- end
94
-
95
- def each( pattern: )
96
- Zip::File.open( @path ) do |zipfile|
97
- zipfile.each do |entry|
98
- if entry.directory?
99
- next ## skip
100
- elsif entry.file?
101
- if EXCLUDE_RE.match( entry.name )
102
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
103
- elsif pattern.match( entry.name )
104
- yield( Entry.new( self, entry ) ) # wrap entry in uniform access interface / api
105
- else
106
- ## puts " skipping >#{entry.name}<"
107
- end
108
- else
109
- puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
110
- exit 1
111
- end
112
- end
113
- end
114
- end
115
-
116
- def find( name )
117
- entries = match_entry( name )
118
- if entries.empty?
119
- puts "** !!! ERROR !!! zip entry >#{name}< not found in >#{@path}<; sorry"
120
- exit 1
121
- elsif entries.size > 1
122
- puts "** !!! ERROR !!! ambigious zip entry >#{name}<; found #{entries.size} entries in >#{@path}<:"
123
- pp entries
124
- exit 1
125
- else
126
- Entry.new( self, entries[0] ) # wrap entry in uniform access interface / api
127
- end
128
- end
129
-
130
- private
131
- def match_entry( name )
132
- ## todo/fix: use Zip::File.glob or find_entry or something better/faster? why? why not?
133
-
134
- pattern = %r{ #{Regexp.escape( name )} ## match string if ends with name
135
- $
136
- }x
137
-
138
- entries = []
139
- Zip::File.open( @path ) do |zipfile|
140
- zipfile.each do |entry|
141
- if entry.directory?
142
- next ## skip
143
- elsif entry.file?
144
- if EXCLUDE_RE.match( entry.name )
145
- ## note: skip dot dirs (e.g. .build/, .git/, etc.)
146
- elsif pattern.match( entry.name )
147
- entries << entry
148
- else
149
- ## no match; skip too
150
- end
151
- else
152
- puts "** !!! ERROR !!! #{entry.name} is unknown zip file type in >#{@path}<, sorry"
153
- exit 1
154
- end
155
- end
156
- end
157
- entries
158
- end
159
- end # class ZipPackage
160
- end # module Datafile
1
+
2
+ module SportDb
3
+ class Package
4
+
5
+ CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
6
+ \.conf\.txt$
7
+ }x
8
+
9
+ LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
10
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
11
+ leagues\.txt$
12
+ }x
13
+
14
+ CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
15
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
16
+ clubs\.txt$
17
+ }x
18
+
19
+ CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
20
+ (?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
21
+ clubs\.wiki\.txt$
22
+ }x
23
+
24
+ CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
25
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
26
+ clubs\.props\.txt$
27
+ }x
28
+
29
+ ## note: if pattern includes directory add here
30
+ ## (otherwise move to more "generic" datafile) - why? why not?
31
+ MATCH_RE = %r{ /(?: \d{4}-\d{2} ## season folder e.g. /2019-20
32
+ | \d{4}(--[^/]+)? ## season year-only folder e.g. /2019 or /2016--france
33
+ )
34
+ /[a-z0-9_-]+\.txt$ ## txt e.g /1-premierleague.txt
35
+ }x
36
+
37
+ ## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
38
+
39
+ def self.find( path, pattern )
40
+ datafiles = []
41
+
42
+ ## check all txt files
43
+ ## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
44
+ candidates = Dir.glob( "#{path}/**/{*,.*}.txt" )
45
+ pp candidates
46
+ candidates.each do |candidate|
47
+ datafiles << candidate if pattern.match( candidate )
48
+ end
49
+
50
+ pp datafiles
51
+ datafiles
52
+ end
53
+
54
+
55
+
56
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
57
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
58
+
59
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
60
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
61
+ def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
62
+
63
+ def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
64
+ def self.match_leagues( path ) LEAGUES_RE.match( path ); end
65
+
66
+ def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
67
+ def self.match_conf( path ) CONF_RE.match( path ); end
68
+
69
+ class << self
70
+ alias_method :match_clubs?, :match_clubs
71
+ alias_method :clubs?, :match_clubs
72
+
73
+ alias_method :match_clubs_wiki?, :match_clubs_wiki
74
+ alias_method :clubs_wiki?, :match_clubs_wiki
75
+
76
+ alias_method :match_club_props?, :match_club_props
77
+ alias_method :club_props?, :match_club_props
78
+
79
+ alias_method :match_leagues?, :match_leagues
80
+ alias_method :leagues?, :match_leagues
81
+
82
+ alias_method :match_conf?, :match_conf
83
+ alias_method :conf?, :match_conf
84
+ end
85
+
86
+
87
+ ## attr_reader :pack ## allow access to embedded ("low-level") delegate package (or hide!?) - why? why not?
88
+ attr_accessor :include, :exclude
89
+
90
+ ## private helpers - like select returns true for keeping and false for skipping entry
91
+ def filter_clause( filter, entry )
92
+ if filter.is_a?( String )
93
+ entry.name.index( filter ) ? true : false
94
+ elsif filter.is_a?( Regexp )
95
+ filter.match( entry.name ) ? true : false
96
+ else ## assume
97
+ ## todo/check: pass in entry (and NOT entry.name) - why? why not?
98
+ filter.call( entry )
99
+ end
100
+ end
101
+
102
+ def filter( entry )
103
+ if @include
104
+ if filter_clause( @include, entry ) ## todo/check: is include a reserved keyword????
105
+ true ## todo/check: check for exclude here too - why? why not?
106
+ else
107
+ false
108
+ end
109
+ else
110
+ if @exclude && filter_clause( @exclude, entry )
111
+ false
112
+ else
113
+ true
114
+ end
115
+ end
116
+ end
117
+
118
+
119
+ def initialize( path_or_pack )
120
+ @include = nil
121
+ @exclude = nil
122
+
123
+ if path_or_pack.is_a?( Datafile::Package )
124
+ @pack = path_or_pack
125
+ else ## assume it's a (string) path
126
+ path = path_or_pack
127
+ if !File.exist?( path ) ## file or directory
128
+ puts "** !!! ERROR !!! file NOT found >#{path}<; cannot open package"
129
+ exit 1
130
+ end
131
+
132
+ if File.directory?( path )
133
+ @pack = Datafile::DirPackage.new( path ) ## delegate to "generic" package
134
+ elsif File.file?( path ) && File.extname( path ) == '.zip' # note: includes dot (.) eg .zip
135
+ @pack = Datafile::ZipPackage.new( path )
136
+ else
137
+ puts "** !!! ERROR !!! cannot open package - directory or file with .zip extension required"
138
+ exit 1
139
+ end
140
+ end
141
+ end
142
+
143
+
144
+ def each( pattern:, &blk )
145
+ @pack.each( pattern: pattern ) do |entry|
146
+ next unless filter( entry ) ## lets you use include/exclude filters
147
+ blk.call( entry )
148
+ end
149
+ end
150
+
151
+ def each_conf( &blk ) each( pattern: CONF_RE, &blk ); end
152
+ def each_match( &blk ) each( pattern: MATCH_RE, &blk ); end
153
+ def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
154
+
155
+ def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
156
+ def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
157
+ def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
158
+
159
+ ## return all match datafile entries
160
+ def match() ary=[]; each_match {|entry| ary << entry }; ary; end
161
+ alias_method :matches, :match
162
+ end # class Package
163
+
164
+
165
+ class DirPackage < Package
166
+ def initialize( path ) super( Datafile::DirPackage.new( path ) ); end
167
+ end
168
+
169
+ class ZipPackage < Package
170
+ def initialize( path ) super( Datafile::ZipPackage.new( path ) ); end
171
+ end
172
+ end # module SportDb