sportdb-formats 1.1.6 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,202 +1,270 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
-
5
-
6
- class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
-
8
- def self.parse( lines, start: )
9
- ## todo/fix: add support for txt and lines
10
- ## check if lines_or_txt is an array or just a string
11
- parser = new( lines, start )
12
- parser.parse
13
- end
14
-
15
-
16
- include Logging ## e.g. logger#debug, logger#info, etc.
17
- include ParserHelper ## e.g. read_lines, etc.
18
-
19
-
20
- def initialize( lines, start )
21
- # for convenience split string into lines
22
- ## note: removes/strips empty lines
23
- ## todo/check: change to text instead of array of lines - why? why not?
24
- @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
- @start = start
26
- end
27
-
28
- def parse
29
- ## try to find all teams in match schedule
30
- @last_round = nil
31
- @last_group = nil
32
-
33
- ## definitions/defs
34
- @round_defs = Hash.new(0)
35
- @group_defs = Hash.new(0)
36
-
37
- ## usage/refs
38
- @rounds = {} ## track usage counter and match (two teams) counter
39
- @groups = {} ## -"-
40
- @teams = Hash.new(0) ## keep track of usage counter
41
-
42
- @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
-
44
-
45
- @lines.each do |line|
46
- if is_goals?( line )
47
- logger.debug "skipping matched goals line: >#{line}<"
48
- elsif is_round_def?( line )
49
- ## todo/fix: add round definition (w begin n end date)
50
- ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
- ## -- how to deal with matches that get rescheduled/postponed?
52
- logger.debug "skipping matched round def line: >#{line}<"
53
- @round_defs[ line ] += 1
54
- elsif is_round?( line )
55
- logger.debug "skipping matched round line: >#{line}<"
56
-
57
- round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
- round[:count] +=1
59
- @last_round = round
60
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
- ### todo: add pipe (|) marker (required)
62
- logger.debug "skipping matched group def line: >#{line}<"
63
- @group_defs[ line ] += 1
64
- elsif is_group?( line )
65
- ## -- lets you set group e.g. Group A etc.
66
- logger.debug "skipping matched group line: >#{line}<"
67
-
68
- group = @groups[ line ] ||= {count: 0, match_count: 0}
69
- group[:count] +=1
70
- @last_group = group
71
- ## todo/fix: parse group line!!!
72
- elsif try_parse_game( line )
73
- # do nothing here
74
- else
75
- logger.warn "skipping line (no match found): >#{line}<"
76
- @warns << line
77
- end
78
- end # lines.each
79
-
80
- [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
- end
82
-
83
-
84
- def try_parse_game( line )
85
- # note: clone line; for possible test do NOT modify in place for now
86
- # note: returns true if parsed, false if no match
87
- parse_game( line.dup )
88
- end
89
-
90
- def parse_game( line )
91
- logger.debug "parsing game (fixture) line: >#{line}<"
92
-
93
- ## remove all protected text runs e.g. []
94
- ## fix: add [ to end-of-line too
95
- ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
-
97
- line = line.gsub( /\[
98
- [^\]]+?
99
- \]/x, '' ).strip
100
- return true if line.empty? ## note: return true (for valid line with no match/teams)
101
-
102
-
103
- ## split by geo (@) - remove for now
104
- values = line.split( '@' )
105
- line = values[0]
106
-
107
-
108
- ## try find date
109
- date = find_date!( line, start: @start )
110
- if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
- line = line.sub( /\[
112
- [^\]]+?
113
- \]/x, '' ).strip
114
-
115
- else
116
- ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
- ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
- line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
- [012]?[0-9]
120
- [.:hH]
121
- [0-9][0-9]
122
- (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
- }x, '' ).strip
124
- end
125
-
126
- return true if line.empty? ## note: return true (for valid line with no match/teams)
127
-
128
-
129
- score = find_score!( line )
130
-
131
- logger.debug " line: >#{line}<"
132
-
133
- line = line.sub( /\[
134
- [^\]]+?
135
- \]/x, '$$' ) # note: replace first score tag with $$
136
- line = line.gsub( /\[
137
- [^\]]+?
138
- \]/x, '' ) # note: replace/remove all other score tags with nothing
139
-
140
- ## clean-up remove all text run inside () or empty () too
141
- line = line.gsub( /\(
142
- [^)]*?
143
- \)/x, '' )
144
-
145
-
146
- ## check for more match separators e.g. - or vs for now
147
- line = line.sub( / \s+
148
- ( -
149
- | v
150
- | vs\.? # note: allow optional dot eg. vs.
151
- )
152
- \s+
153
- /ix, '$$' )
154
-
155
- values = line.split( '$$' )
156
- values = values.map { |value| value.strip } ## strip spaces
157
- values = values.select { |value| !value.empty? } ## remove empty strings
158
-
159
- return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
-
161
- if values.size == 1
162
- puts "(auto config) try matching teams separated by spaces (2+):"
163
- pp values
164
-
165
- values = values[0].split( /[ ]{2,}/ )
166
- pp values
167
- end
168
-
169
- return false if values.size != 2
170
-
171
- puts "(auto config) try matching teams:"
172
- pp values
173
-
174
- @teams[ values[0] ] += 1 ## update usage counters
175
- @teams[ values[1] ] += 1
176
-
177
- @last_round[ :match_count ] += 1 if @last_round
178
- @last_group[ :match_count ] += 1 if @last_group
179
-
180
- true
181
- end
182
-
183
-
184
-
185
- def find_score!( line )
186
- # note: always call after find_dates !!!
187
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
- # -- note: score might have two digits too
189
- ScoreFormats.find!( line )
190
- end
191
-
192
- def find_date!( line, start: )
193
- ## NB: lets us pass in start_at/end_at date (for event)
194
- # for auto-complete year
195
-
196
- # extract date from line
197
- # and return it
198
- # NB: side effect - removes date from line string
199
- DateFormats.find!( line, start: start )
200
- end
201
- end # class AutoConfParser
202
- end # module SportDb
1
+
2
+ module SportDb
3
+
4
+
5
+ class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
6
+
7
+ def self.parse( lines, start: )
8
+ ## todo/fix: add support for txt and lines
9
+ ## check if lines_or_txt is an array or just a string
10
+ parser = new( lines, start )
11
+ parser.parse
12
+ end
13
+
14
+
15
+ include Logging ## e.g. logger#debug, logger#info, etc.
16
+ include ParserHelper ## e.g. read_lines, etc.
17
+
18
+
19
+ def initialize( lines, start )
20
+ # for convenience split string into lines
21
+ ## note: removes/strips empty lines
22
+ ## todo/check: change to text instead of array of lines - why? why not?
23
+
24
+ ## note - wrap in enumerator/iterator a.k.a lines reader
25
+ @lines = LinesReader.new( lines.is_a?( String ) ?
26
+ read_lines( lines ) :
27
+ lines
28
+ )
29
+
30
+ @start = start
31
+ end
32
+
33
+
34
+ ## note: colon (:) MUST be followed by one (or more) spaces
35
+ ## make sure mon feb 12 18:10 will not match
36
+ ## allow 1. FC Köln etc.
37
+ ## Mainz 05:
38
+ ## limit to 30 chars max
39
+ ## only allow chars incl. intl buut (NOT ()[]/;)
40
+ ##
41
+ ## Group A:
42
+ ## Group B: - remove colon
43
+ ## or lookup first
44
+
45
+ ATTRIB_REGEX = /^
46
+ [ ]*? # slurp leading spaces
47
+ (?<key>[^:|\]\[()\/; -]
48
+ [^:|\]\[()\/;]{0,30}
49
+ )
50
+ [ ]*? # slurp trailing spaces
51
+ :[ ]+
52
+ (?<value>.+)
53
+ [ ]*? # slurp trailing spaces
54
+ $
55
+ /ix
56
+
57
+
58
+ def parse
59
+ ## try to find all teams in match schedule
60
+ @last_round = nil
61
+ @last_group = nil
62
+
63
+ ## definitions/defs
64
+ @round_defs = Hash.new(0)
65
+ @group_defs = Hash.new(0)
66
+
67
+ ## usage/refs
68
+ @rounds = {} ## track usage counter and match (two teams) counter
69
+ @groups = {} ## -"-
70
+
71
+ @teams = Hash.new(0) ## keep track of usage counter
72
+
73
+ ## note: ground incl. optional city (timezone) etc. - why? why not?
74
+ @grounds = Hash.new(0)
75
+
76
+ @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
77
+
78
+
79
+ ## todo/fix - use @lines.rewind first here - why? why not?
80
+ @lines.each do |line|
81
+ if is_round_def?( line )
82
+ ## todo/fix: add round definition (w begin n end date)
83
+ ## todo: do not patch rounds with definition (already assume begin/end date is good)
84
+ ## -- how to deal with matches that get rescheduled/postponed?
85
+ logger.debug "skipping matched round def line: >#{line}<"
86
+ @round_defs[ line ] += 1
87
+ elsif is_round?( line )
88
+ logger.debug "skipping matched round line: >#{line}<"
89
+
90
+ round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
91
+ round[:count] +=1
92
+ @last_round = round
93
+ elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
94
+ ### todo: add pipe (|) marker (required)
95
+ logger.debug "skipping matched group def line: >#{line}<"
96
+ @group_defs[ line ] += 1
97
+ elsif is_group?( line )
98
+ ## -- lets you set group e.g. Group A etc.
99
+ logger.debug "skipping matched group line: >#{line}<"
100
+
101
+ group = @groups[ line ] ||= {count: 0, match_count: 0}
102
+ group[:count] +=1
103
+ @last_group = group
104
+ ## todo/fix: parse group line!!!
105
+ elsif m=ATTRIB_REGEX.match( line )
106
+ ## note: check attrib regex AFTER group def e.g.:
107
+ ## Group A:
108
+ ## Group B: etc.
109
+ ## todo/fix - change Group A: to Group A etc.
110
+ ## Group B: to Group B
111
+
112
+ ## check if line ends with dot
113
+ ## if not slurp up lines to the next do!!!
114
+ logger.debug "skipping key/value line - >#{line}<"
115
+ while !line.end_with?( '.' ) || line.nil? do
116
+ line = @lines.next
117
+ logger.debug "skipping key/value line (cont.) - >#{line}<"
118
+ end
119
+ elsif is_goals?( line )
120
+ ## note - goals must be AFTER attributes!!!
121
+ logger.debug "skipping matched goals line: >#{line}<"
122
+ elsif try_parse_game( line )
123
+ # do nothing here
124
+ else
125
+ logger.warn "skipping line (no match found): >#{line}<"
126
+ @warns << line
127
+ end
128
+ end # lines.each
129
+
130
+ ## new - add grounds and cities
131
+ [@teams, @rounds, @groups, @round_defs, @group_defs,
132
+ @grounds, ## note: ground incl. optional city (timezone) etc.
133
+ @warns]
134
+ end
135
+
136
+
137
+ def try_parse_game( line )
138
+ # note: clone line; for possible test do NOT modify in place for now
139
+ # note: returns true if parsed, false if no match
140
+ parse_game( line.dup )
141
+ end
142
+
143
+ def parse_game( line )
144
+ logger.debug "parsing game (fixture) line: >#{line}<"
145
+
146
+ ## remove all protected text runs e.g. []
147
+ ## fix: add [ to end-of-line too
148
+ ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
149
+
150
+ line = line.gsub( /\[
151
+ [^\]]+?
152
+ \]/x, '' ).strip
153
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
154
+
155
+
156
+ ## split by geo (@) - remove for now
157
+ values = line.split( '@' )
158
+
159
+ ### check for ground/stadium and cities
160
+ if values.size == 1
161
+ ## no stadium
162
+ elsif values.size == 2 # bingo!!!
163
+ ## process stadium, city (timezone) etc.
164
+ ## for now keep it simple - pass along "unparsed" all-in-one
165
+ ground = values[1].gsub( /[ \t]+/, ' ').strip ## squish
166
+ @grounds[ ground ] += 1
167
+ else
168
+ puts "!! ERROR - too many @-markers found in line:"
169
+ puts line
170
+ exit 1
171
+ end
172
+
173
+ line = values[0]
174
+
175
+
176
+ ## try find date
177
+ date = find_date!( line, start: @start )
178
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
179
+ line = line.sub( /\[
180
+ [^\]]+?
181
+ \]/x, '' ).strip
182
+
183
+ else
184
+ ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
185
+ ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
186
+ line = line.sub( %r{^ ## MUST be anchored to beginning of line
187
+ [012]?[0-9]
188
+ [.:hH]
189
+ [0-9][0-9]
190
+ (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
191
+ }x, '' ).strip
192
+ end
193
+
194
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
195
+
196
+
197
+ score = find_score!( line )
198
+
199
+ logger.debug " line: >#{line}<"
200
+
201
+ line = line.sub( /\[
202
+ [^\]]+?
203
+ \]/x, '$$' ) # note: replace first score tag with $$
204
+ line = line.gsub( /\[
205
+ [^\]]+?
206
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
207
+
208
+ ## clean-up remove all text run inside () or empty () too
209
+ line = line.gsub( /\(
210
+ [^)]*?
211
+ \)/x, '' )
212
+
213
+
214
+ ## check for more match separators e.g. - or vs for now
215
+ line = line.sub( / \s+
216
+ ( -
217
+ | v
218
+ | vs\.? # note: allow optional dot eg. vs.
219
+ )
220
+ \s+
221
+ /ix, '$$' )
222
+
223
+ values = line.split( '$$' )
224
+ values = values.map { |value| value.strip } ## strip spaces
225
+ values = values.select { |value| !value.empty? } ## remove empty strings
226
+
227
+ return true if values.size == 0 ## note: return true (for valid line with no match/teams)
228
+
229
+ if values.size == 1
230
+ puts "(auto config) try matching teams separated by spaces (2+):"
231
+ pp values
232
+
233
+ values = values[0].split( /[ ]{2,}/ )
234
+ pp values
235
+ end
236
+
237
+ return false if values.size != 2
238
+
239
+ puts "(auto config) try matching teams:"
240
+ pp values
241
+
242
+ @teams[ values[0] ] += 1 ## update usage counters
243
+ @teams[ values[1] ] += 1
244
+
245
+ @last_round[ :match_count ] += 1 if @last_round
246
+ @last_group[ :match_count ] += 1 if @last_group
247
+
248
+ true
249
+ end
250
+
251
+
252
+
253
+ def find_score!( line )
254
+ # note: always call after find_dates !!!
255
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
256
+ # -- note: score might have two digits too
257
+ ScoreFormats.find!( line )
258
+ end
259
+
260
+ def find_date!( line, start: )
261
+ ## NB: lets us pass in start_at/end_at date (for event)
262
+ # for auto-complete year
263
+
264
+ # extract date from line
265
+ # and return it
266
+ # NB: side effect - removes date from line string
267
+ DateFormats.find!( line, start: start )
268
+ end
269
+ end # class AutoConfParser
270
+ end # module SportDb
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  module SportDb
4
3