sportdb-formats 1.1.6 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +53 -9
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +130 -13
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -1,202 +1,270 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
-
5
-
6
- class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
7
-
8
- def self.parse( lines, start: )
9
- ## todo/fix: add support for txt and lines
10
- ## check if lines_or_txt is an array or just a string
11
- parser = new( lines, start )
12
- parser.parse
13
- end
14
-
15
-
16
- include Logging ## e.g. logger#debug, logger#info, etc.
17
- include ParserHelper ## e.g. read_lines, etc.
18
-
19
-
20
- def initialize( lines, start )
21
- # for convenience split string into lines
22
- ## note: removes/strips empty lines
23
- ## todo/check: change to text instead of array of lines - why? why not?
24
- @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
25
- @start = start
26
- end
27
-
28
- def parse
29
- ## try to find all teams in match schedule
30
- @last_round = nil
31
- @last_group = nil
32
-
33
- ## definitions/defs
34
- @round_defs = Hash.new(0)
35
- @group_defs = Hash.new(0)
36
-
37
- ## usage/refs
38
- @rounds = {} ## track usage counter and match (two teams) counter
39
- @groups = {} ## -"-
40
- @teams = Hash.new(0) ## keep track of usage counter
41
-
42
- @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
43
-
44
-
45
- @lines.each do |line|
46
- if is_goals?( line )
47
- logger.debug "skipping matched goals line: >#{line}<"
48
- elsif is_round_def?( line )
49
- ## todo/fix: add round definition (w begin n end date)
50
- ## todo: do not patch rounds with definition (already assume begin/end date is good)
51
- ## -- how to deal with matches that get rescheduled/postponed?
52
- logger.debug "skipping matched round def line: >#{line}<"
53
- @round_defs[ line ] += 1
54
- elsif is_round?( line )
55
- logger.debug "skipping matched round line: >#{line}<"
56
-
57
- round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
58
- round[:count] +=1
59
- @last_round = round
60
- elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
61
- ### todo: add pipe (|) marker (required)
62
- logger.debug "skipping matched group def line: >#{line}<"
63
- @group_defs[ line ] += 1
64
- elsif is_group?( line )
65
- ## -- lets you set group e.g. Group A etc.
66
- logger.debug "skipping matched group line: >#{line}<"
67
-
68
- group = @groups[ line ] ||= {count: 0, match_count: 0}
69
- group[:count] +=1
70
- @last_group = group
71
- ## todo/fix: parse group line!!!
72
- elsif try_parse_game( line )
73
- # do nothing here
74
- else
75
- logger.warn "skipping line (no match found): >#{line}<"
76
- @warns << line
77
- end
78
- end # lines.each
79
-
80
- [@teams, @rounds, @groups, @round_defs, @group_defs, @warns]
81
- end
82
-
83
-
84
- def try_parse_game( line )
85
- # note: clone line; for possible test do NOT modify in place for now
86
- # note: returns true if parsed, false if no match
87
- parse_game( line.dup )
88
- end
89
-
90
- def parse_game( line )
91
- logger.debug "parsing game (fixture) line: >#{line}<"
92
-
93
- ## remove all protected text runs e.g. []
94
- ## fix: add [ to end-of-line too
95
- ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
96
-
97
- line = line.gsub( /\[
98
- [^\]]+?
99
- \]/x, '' ).strip
100
- return true if line.empty? ## note: return true (for valid line with no match/teams)
101
-
102
-
103
- ## split by geo (@) - remove for now
104
- values = line.split( '@' )
105
- line = values[0]
106
-
107
-
108
- ## try find date
109
- date = find_date!( line, start: @start )
110
- if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
111
- line = line.sub( /\[
112
- [^\]]+?
113
- \]/x, '' ).strip
114
-
115
- else
116
- ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
117
- ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
118
- line = line.sub( %r{^ ## MUST be anchored to beginning of line
119
- [012]?[0-9]
120
- [.:hH]
121
- [0-9][0-9]
122
- (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
123
- }x, '' ).strip
124
- end
125
-
126
- return true if line.empty? ## note: return true (for valid line with no match/teams)
127
-
128
-
129
- score = find_score!( line )
130
-
131
- logger.debug " line: >#{line}<"
132
-
133
- line = line.sub( /\[
134
- [^\]]+?
135
- \]/x, '$$' ) # note: replace first score tag with $$
136
- line = line.gsub( /\[
137
- [^\]]+?
138
- \]/x, '' ) # note: replace/remove all other score tags with nothing
139
-
140
- ## clean-up remove all text run inside () or empty () too
141
- line = line.gsub( /\(
142
- [^)]*?
143
- \)/x, '' )
144
-
145
-
146
- ## check for more match separators e.g. - or vs for now
147
- line = line.sub( / \s+
148
- ( -
149
- | v
150
- | vs\.? # note: allow optional dot eg. vs.
151
- )
152
- \s+
153
- /ix, '$$' )
154
-
155
- values = line.split( '$$' )
156
- values = values.map { |value| value.strip } ## strip spaces
157
- values = values.select { |value| !value.empty? } ## remove empty strings
158
-
159
- return true if values.size == 0 ## note: return true (for valid line with no match/teams)
160
-
161
- if values.size == 1
162
- puts "(auto config) try matching teams separated by spaces (2+):"
163
- pp values
164
-
165
- values = values[0].split( /[ ]{2,}/ )
166
- pp values
167
- end
168
-
169
- return false if values.size != 2
170
-
171
- puts "(auto config) try matching teams:"
172
- pp values
173
-
174
- @teams[ values[0] ] += 1 ## update usage counters
175
- @teams[ values[1] ] += 1
176
-
177
- @last_round[ :match_count ] += 1 if @last_round
178
- @last_group[ :match_count ] += 1 if @last_group
179
-
180
- true
181
- end
182
-
183
-
184
-
185
- def find_score!( line )
186
- # note: always call after find_dates !!!
187
- # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
188
- # -- note: score might have two digits too
189
- ScoreFormats.find!( line )
190
- end
191
-
192
- def find_date!( line, start: )
193
- ## NB: lets us pass in start_at/end_at date (for event)
194
- # for auto-complete year
195
-
196
- # extract date from line
197
- # and return it
198
- # NB: side effect - removes date from line string
199
- DateFormats.find!( line, start: start )
200
- end
201
- end # class AutoConfParser
202
- end # module SportDb
1
+
2
+ module SportDb
3
+
4
+
5
+ class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
6
+
7
+ def self.parse( lines, start: )
8
+ ## todo/fix: add support for txt and lines
9
+ ## check if lines_or_txt is an array or just a string
10
+ parser = new( lines, start )
11
+ parser.parse
12
+ end
13
+
14
+
15
+ include Logging ## e.g. logger#debug, logger#info, etc.
16
+ include ParserHelper ## e.g. read_lines, etc.
17
+
18
+
19
+ def initialize( lines, start )
20
+ # for convenience split string into lines
21
+ ## note: removes/strips empty lines
22
+ ## todo/check: change to text instead of array of lines - why? why not?
23
+
24
+ ## note - wrap in enumerator/iterator a.k.a lines reader
25
+ @lines = LinesReader.new( lines.is_a?( String ) ?
26
+ read_lines( lines ) :
27
+ lines
28
+ )
29
+
30
+ @start = start
31
+ end
32
+
33
+
34
+ ## note: colon (:) MUST be followed by one (or more) spaces
35
+ ## make sure mon feb 12 18:10 will not match
36
+ ## allow 1. FC Köln etc.
37
+ ## Mainz 05:
38
+ ## limit to 30 chars max
39
+ ## only allow chars incl. intl buut (NOT ()[]/;)
40
+ ##
41
+ ## Group A:
42
+ ## Group B: - remove colon
43
+ ## or lookup first
44
+
45
+ ATTRIB_REGEX = /^
46
+ [ ]*? # slurp leading spaces
47
+ (?<key>[^:|\]\[()\/; -]
48
+ [^:|\]\[()\/;]{0,30}
49
+ )
50
+ [ ]*? # slurp trailing spaces
51
+ :[ ]+
52
+ (?<value>.+)
53
+ [ ]*? # slurp trailing spaces
54
+ $
55
+ /ix
56
+
57
+
58
+ def parse
59
+ ## try to find all teams in match schedule
60
+ @last_round = nil
61
+ @last_group = nil
62
+
63
+ ## definitions/defs
64
+ @round_defs = Hash.new(0)
65
+ @group_defs = Hash.new(0)
66
+
67
+ ## usage/refs
68
+ @rounds = {} ## track usage counter and match (two teams) counter
69
+ @groups = {} ## -"-
70
+
71
+ @teams = Hash.new(0) ## keep track of usage counter
72
+
73
+ ## note: ground incl. optional city (timezone) etc. - why? why not?
74
+ @grounds = Hash.new(0)
75
+
76
+ @warns = [] ## track list of warnings (unmatched lines) too - why? why not?
77
+
78
+
79
+ ## todo/fix - use @lines.rewind first here - why? why not?
80
+ @lines.each do |line|
81
+ if is_round_def?( line )
82
+ ## todo/fix: add round definition (w begin n end date)
83
+ ## todo: do not patch rounds with definition (already assume begin/end date is good)
84
+ ## -- how to deal with matches that get rescheduled/postponed?
85
+ logger.debug "skipping matched round def line: >#{line}<"
86
+ @round_defs[ line ] += 1
87
+ elsif is_round?( line )
88
+ logger.debug "skipping matched round line: >#{line}<"
89
+
90
+ round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
91
+ round[:count] +=1
92
+ @last_round = round
93
+ elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
94
+ ### todo: add pipe (|) marker (required)
95
+ logger.debug "skipping matched group def line: >#{line}<"
96
+ @group_defs[ line ] += 1
97
+ elsif is_group?( line )
98
+ ## -- lets you set group e.g. Group A etc.
99
+ logger.debug "skipping matched group line: >#{line}<"
100
+
101
+ group = @groups[ line ] ||= {count: 0, match_count: 0}
102
+ group[:count] +=1
103
+ @last_group = group
104
+ ## todo/fix: parse group line!!!
105
+ elsif m=ATTRIB_REGEX.match( line )
106
+ ## note: check attrib regex AFTER group def e.g.:
107
+ ## Group A:
108
+ ## Group B: etc.
109
+ ## todo/fix - change Group A: to Group A etc.
110
+ ## Group B: to Group B
111
+
112
+ ## check if line ends with dot
113
+ ## if not slurp up lines to the next do!!!
114
+ logger.debug "skipping key/value line - >#{line}<"
115
+ while !line.end_with?( '.' ) || line.nil? do
116
+ line = @lines.next
117
+ logger.debug "skipping key/value line (cont.) - >#{line}<"
118
+ end
119
+ elsif is_goals?( line )
120
+ ## note - goals must be AFTER attributes!!!
121
+ logger.debug "skipping matched goals line: >#{line}<"
122
+ elsif try_parse_game( line )
123
+ # do nothing here
124
+ else
125
+ logger.warn "skipping line (no match found): >#{line}<"
126
+ @warns << line
127
+ end
128
+ end # lines.each
129
+
130
+ ## new - add grounds and cities
131
+ [@teams, @rounds, @groups, @round_defs, @group_defs,
132
+ @grounds, ## note: ground incl. optional city (timezone) etc.
133
+ @warns]
134
+ end
135
+
136
+
137
+ def try_parse_game( line )
138
+ # note: clone line; for possible test do NOT modify in place for now
139
+ # note: returns true if parsed, false if no match
140
+ parse_game( line.dup )
141
+ end
142
+
143
+ def parse_game( line )
144
+ logger.debug "parsing game (fixture) line: >#{line}<"
145
+
146
+ ## remove all protected text runs e.g. []
147
+ ## fix: add [ to end-of-line too
148
+ ## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
149
+
150
+ line = line.gsub( /\[
151
+ [^\]]+?
152
+ \]/x, '' ).strip
153
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
154
+
155
+
156
+ ## split by geo (@) - remove for now
157
+ values = line.split( '@' )
158
+
159
+ ### check for ground/stadium and cities
160
+ if values.size == 1
161
+ ## no stadium
162
+ elsif values.size == 2 # bingo!!!
163
+ ## process stadium, city (timezone) etc.
164
+ ## for now keep it simple - pass along "unparsed" all-in-one
165
+ ground = values[1].gsub( /[ \t]+/, ' ').strip ## squish
166
+ @grounds[ ground ] += 1
167
+ else
168
+ puts "!! ERROR - too many @-markers found in line:"
169
+ puts line
170
+ exit 1
171
+ end
172
+
173
+ line = values[0]
174
+
175
+
176
+ ## try find date
177
+ date = find_date!( line, start: @start )
178
+ if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
179
+ line = line.sub( /\[
180
+ [^\]]+?
181
+ \]/x, '' ).strip
182
+
183
+ else
184
+ ## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
185
+ ## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
186
+ line = line.sub( %r{^ ## MUST be anchored to beginning of line
187
+ [012]?[0-9]
188
+ [.:hH]
189
+ [0-9][0-9]
190
+ (?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
191
+ }x, '' ).strip
192
+ end
193
+
194
+ return true if line.empty? ## note: return true (for valid line with no match/teams)
195
+
196
+
197
+ score = find_score!( line )
198
+
199
+ logger.debug " line: >#{line}<"
200
+
201
+ line = line.sub( /\[
202
+ [^\]]+?
203
+ \]/x, '$$' ) # note: replace first score tag with $$
204
+ line = line.gsub( /\[
205
+ [^\]]+?
206
+ \]/x, '' ) # note: replace/remove all other score tags with nothing
207
+
208
+ ## clean-up remove all text run inside () or empty () too
209
+ line = line.gsub( /\(
210
+ [^)]*?
211
+ \)/x, '' )
212
+
213
+
214
+ ## check for more match separators e.g. - or vs for now
215
+ line = line.sub( / \s+
216
+ ( -
217
+ | v
218
+ | vs\.? # note: allow optional dot eg. vs.
219
+ )
220
+ \s+
221
+ /ix, '$$' )
222
+
223
+ values = line.split( '$$' )
224
+ values = values.map { |value| value.strip } ## strip spaces
225
+ values = values.select { |value| !value.empty? } ## remove empty strings
226
+
227
+ return true if values.size == 0 ## note: return true (for valid line with no match/teams)
228
+
229
+ if values.size == 1
230
+ puts "(auto config) try matching teams separated by spaces (2+):"
231
+ pp values
232
+
233
+ values = values[0].split( /[ ]{2,}/ )
234
+ pp values
235
+ end
236
+
237
+ return false if values.size != 2
238
+
239
+ puts "(auto config) try matching teams:"
240
+ pp values
241
+
242
+ @teams[ values[0] ] += 1 ## update usage counters
243
+ @teams[ values[1] ] += 1
244
+
245
+ @last_round[ :match_count ] += 1 if @last_round
246
+ @last_group[ :match_count ] += 1 if @last_group
247
+
248
+ true
249
+ end
250
+
251
+
252
+
253
+ def find_score!( line )
254
+ # note: always call after find_dates !!!
255
+ # scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
256
+ # -- note: score might have two digits too
257
+ ScoreFormats.find!( line )
258
+ end
259
+
260
+ def find_date!( line, start: )
261
+ ## NB: lets us pass in start_at/end_at date (for event)
262
+ # for auto-complete year
263
+
264
+ # extract date from line
265
+ # and return it
266
+ # NB: side effect - removes date from line string
267
+ DateFormats.find!( line, start: start )
268
+ end
269
+ end # class AutoConfParser
270
+ end # module SportDb
@@ -1,4 +1,3 @@
1
- # encoding: utf-8
2
1
 
3
2
  module SportDb
4
3