sportdb-formats 0.4.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +24 -4
  3. data/Rakefile +3 -3
  4. data/lib/sportdb/formats.rb +25 -2
  5. data/lib/sportdb/formats/config.rb +40 -0
  6. data/lib/sportdb/formats/datafile.rb +42 -62
  7. data/lib/sportdb/formats/datafile_package.rb +160 -0
  8. data/lib/sportdb/formats/match/conf_parser.rb +120 -0
  9. data/lib/sportdb/formats/match/mapper.rb +319 -0
  10. data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
  11. data/lib/sportdb/formats/match/match_parser.rb +659 -0
  12. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
  13. data/lib/sportdb/formats/name_helper.rb +84 -0
  14. data/lib/sportdb/formats/outline_reader.rb +53 -15
  15. data/lib/sportdb/formats/package.rb +172 -160
  16. data/lib/sportdb/formats/parser_helper.rb +81 -0
  17. data/lib/sportdb/formats/score/score_formats.rb +180 -0
  18. data/lib/sportdb/formats/score/score_parser.rb +196 -0
  19. data/lib/sportdb/formats/structs/country.rb +1 -43
  20. data/lib/sportdb/formats/structs/group.rb +25 -0
  21. data/lib/sportdb/formats/structs/league.rb +7 -26
  22. data/lib/sportdb/formats/structs/match.rb +72 -51
  23. data/lib/sportdb/formats/structs/round.rb +14 -4
  24. data/lib/sportdb/formats/structs/season.rb +3 -0
  25. data/lib/sportdb/formats/structs/team.rb +144 -0
  26. data/lib/sportdb/formats/version.rb +2 -2
  27. data/test/helper.rb +83 -1
  28. data/test/test_clubs.rb +3 -3
  29. data/test/test_conf.rb +65 -0
  30. data/test/test_datafile.rb +21 -30
  31. data/test/test_match.rb +0 -6
  32. data/test/test_match_auto.rb +72 -0
  33. data/test/test_match_auto_champs.rb +45 -0
  34. data/test/test_match_auto_euro.rb +37 -0
  35. data/test/test_match_auto_worldcup.rb +61 -0
  36. data/test/test_match_champs.rb +27 -0
  37. data/test/test_match_eng.rb +26 -0
  38. data/test/test_match_euro.rb +27 -0
  39. data/test/test_match_worldcup.rb +27 -0
  40. data/test/test_name_helper.rb +67 -0
  41. data/test/test_outline_reader.rb +3 -3
  42. data/test/test_package.rb +21 -2
  43. data/test/test_package_match.rb +78 -0
  44. data/test/test_scores.rb +67 -51
  45. metadata +32 -12
  46. data/lib/sportdb/formats/scores.rb +0 -253
  47. data/lib/sportdb/formats/structs/club.rb +0 -213
  48. data/test/test_club_helpers.rb +0 -63
  49. data/test/test_datafile_match.rb +0 -65
@@ -0,0 +1,120 @@
1
+ module SportDb
2
+
3
+ class ConfParser
4
+
5
+ def self.parse( lines )
6
+ parser = new( lines )
7
+ parser.parse
8
+ end
9
+
10
+ include Logging ## e.g. logger#debug, logger#info, etc.
11
+ include ParserHelper ## e.g. read_lines, etc.
12
+
13
+
14
+ def initialize( lines )
15
+ # for convenience split string into lines
16
+ ## note: removes/strips empty lines
17
+ ## todo/check: change to text instead of array of lines - why? why not?
18
+ @lines = lines.is_a?( String ) ? read_lines( lines ) : lines
19
+ end
20
+
21
+
22
+
23
+ COUNTRY_RE = %r{ [<>‹›,]
24
+ [ ]*
25
+ (?<country>[A-Z]{2,4}) ## todo/check: allow one-letter (motor vehicle plates) or 5 letter possible?
26
+ \b}xi
27
+
28
+
29
+ ## standings table row regex matcher e.g.
30
+ ## 1 Manchester City 38 32 4 2 106-27 100
31
+ ## or 1. Manchester City 38 32 4 2 106:27 100
32
+ TABLE_RE = %r{ ^
33
+ (?:
34
+ (?<rank>\d+)\.?
35
+ |
36
+ [-]
37
+ )
38
+ [ ]+
39
+ (?<team>.+?) ## note: let's use non-greedy (MINIMUM length) match for now
40
+ [ ]+
41
+ (?<pld>\d+) ## (pl)aye(d)
42
+ [ ]+
43
+ (?<w>\d+) ## (w)ins
44
+ [ ]+
45
+ (?<d>\d+) ## (d)raws
46
+ [ ]+
47
+ (?<l>\d+) ## (l)ost
48
+ [ ]+
49
+ (?<gf>\d+) ## (g)oal (f)or
50
+ [ ]*
51
+ [:-] ## note: allow 10-10 or 10:10 or 10 - 10 or 10 : 10 etc.
52
+ [ ]*
53
+ (?<ga>\d+) ## (g)oal (a)gainst
54
+ (?: ## allow optional (g)oal (d)ifference
55
+ [ ]+
56
+ (?<gd>[±+-]?\d+) ## (g)oal (d)ifference
57
+ )?
58
+ [ ]+
59
+ (?<pts>\d+) ## (p)oin(ts)
60
+ (?: ## allow optional deductions e.g. [-7]
61
+ [ ]+
62
+ \[(?<deduction>-\d+)\]
63
+ )?
64
+ $}x
65
+
66
+ def parse
67
+ teams = {} ## convert lines to teams
68
+
69
+ @lines.each do |line|
70
+ next if line =~ /^[ -]+$/ ## skip decorative lines with dash only (e.g. ---- or - - - -) etc.
71
+
72
+
73
+ ## quick hack - check for/extract (optional) county code (for teams) first
74
+ ## allow as separators <>‹›, NOTE: includes (,) comma for now too
75
+ m = nil
76
+ country = nil
77
+ if m=COUNTRY_RE.match( line )
78
+ country = m[:country]
79
+ line = line.sub( m[0], '' ) ## replace match with nothing for now
80
+ end
81
+
82
+ if m=TABLE_RE.match( line )
83
+ puts " matching table entry >#{line}<"
84
+
85
+ name = m[:team]
86
+ rank = m[:rank] ? Integer(m[:rank]) : nil
87
+
88
+ standing = {
89
+ pld: Integer(m[:pld]),
90
+ w: Integer(m[:w]),
91
+ d: Integer(m[:d]),
92
+ l: Integer(m[:l]),
93
+ gf: Integer(m[:gf]),
94
+ ga: Integer(m[:ga]),
95
+ }
96
+ standing[ :gd ] = Integer(m[:gd].gsub(/[±+]/,'')) if m[:gd]
97
+ standing[ :pts ] = Integer(m[:pts])
98
+ standing[ :deduction ] = Integer(m[:deduction]) if m[:deduction]
99
+
100
+
101
+ ## todo/fix: track double usage - why? why not? report/raise error/exception on duplicates?
102
+ team = teams[ name ] ||= { }
103
+ team[ :country ] = country if country
104
+
105
+ team[ :rank ] = rank if rank
106
+ team[ :standing ] = standing if standing
107
+ else
108
+ ## assume team is full line
109
+ name = line.strip # note: strip leading and trailing spaces
110
+
111
+ team = teams[ name ] ||= { }
112
+ team[ :country ] = country if country
113
+ end
114
+ end
115
+
116
+ teams
117
+ end # method parse
118
+
119
+ end # class ConfParser
120
+ end # module SportDb
@@ -0,0 +1,319 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+ ##
6
+ ## note: this was/is a cut-n-page (inline) copy of TextUtils::TitleMapper2
7
+ ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
8
+
9
+
10
+ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why not??
11
+
12
+ include Logging
13
+
14
+ attr_reader :known_titles ## rename to mapping or mappings or just titles - why? why not?
15
+
16
+ ########
17
+ ## key: e.g. augsburg
18
+ ## title: e.g. FC Augsburg
19
+ ## length (of title(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
20
+ MappingStruct = Struct.new( :key, :title, :length, :pattern) ## todo/check: use (rename to) TitleStruct - why? why not??
21
+
22
+ ######
23
+ ## convenience helper - (auto)build ActiveRecord-like team records/structs
24
+ Record = Struct.new( :key, :title, :synonyms )
25
+ def build_records( txt_or_lines )
26
+ recs = []
27
+
28
+ if txt_or_lines.is_a?( String )
29
+ ## todo/fix: use ParserHelper read_lines !!! ????
30
+ txt = txt_or_lines
31
+ lines = []
32
+
33
+ txt.each_line do |line|
34
+ line = line.strip
35
+
36
+ next if line.empty? || line.start_with?( '#' ) ## note: skip empty and comment lines
37
+ lines << line
38
+ end
39
+ else
40
+ lines = txt_or_lines
41
+ end
42
+
43
+ lines.each do |line|
44
+ values = line.split( '|' )
45
+ values = values.map { |value| value.strip }
46
+
47
+ title = values[0]
48
+ ## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
49
+ key = title.downcase.gsub( /[^a-z]/, '' )
50
+ synonyms = values.size > 1 ? values[1..-1].join( '|' ) : nil
51
+
52
+ recs << Record.new( key, title, synonyms )
53
+ end
54
+ recs
55
+ end
56
+
57
+
58
+ def initialize( records_or_mapping, tag )
59
+ ## for convenience allow easy (auto-)convert text (lines) to records
60
+ ## as 1) text block/string or
61
+ ## 2) array of lines/strings
62
+ records_or_mapping = build_records( records_or_mapping ) if records_or_mapping.is_a?( String ) ||
63
+ (records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
64
+
65
+ ## build mapping lookup table
66
+ @known_titles = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (title/name=>record)
67
+ build_title_table_for_mapping( records_or_mapping )
68
+ else ## assume array of records
69
+ build_title_table_for_records( records_or_mapping )
70
+ end
71
+
72
+ ## build lookup hash by record (e.g. team/club/etc.) key
73
+ records = if records_or_mapping.is_a?( Array )
74
+ records_or_mapping
75
+ else ## assume hash (uses values assuming to be all records - note might include duplicates)
76
+ records_or_mapping.values
77
+ end
78
+
79
+ @records = records.reduce({}) { |h,rec| h[rec.key]=rec; h }
80
+
81
+
82
+ ## todo: rename tag to attrib or attrib_name - why ?? why not ???
83
+ @tag = tag # e.g. tag name use for @@brewery@@ @@team@@ etc.
84
+ end
85
+
86
+
87
+
88
+ def map_titles!( line ) ## rename to just map! - why?? why not???
89
+ begin
90
+ found = map_title_for!( @tag, line, @known_titles )
91
+ end while found
92
+ end
93
+
94
+ def find_rec!( line )
95
+ find_rec_for!( @tag, line, @records )
96
+ end
97
+
98
+ def find_recs!( line ) # note: keys (plural!) - will return array
99
+ counter = 1
100
+ recs = []
101
+
102
+ rec = find_rec_for!( "#{@tag}#{counter}", line, @records )
103
+ while rec
104
+ recs << rec
105
+ counter += 1
106
+ rec = find_rec_for!( "#{@tag}#{counter}", line, @records )
107
+ end
108
+ recs
109
+ end
110
+
111
+
112
+ private
113
+ def build_title_table_for_mapping( mapping )
114
+ known_titles = []
115
+
116
+ mapping.each do |title, rec|
117
+ m = MappingStruct.new
118
+ m.key = rec.key
119
+ m.title = title
120
+ m.length = title.length
121
+ m.pattern = Regexp.escape( title ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
122
+
123
+ known_titles << m
124
+ end
125
+
126
+ ## note: sort here by length (largest goes first - best match)
127
+ known_titles = known_titles.sort { |l,r| r.length <=> l.length }
128
+ known_titles
129
+ end
130
+
131
+ def build_title_table_for_records( records )
132
+
133
+ ## build known tracks table w/ synonyms e.g.
134
+ #
135
+ # [[ 'wolfsbrug', 'VfL Wolfsburg'],
136
+ # [ 'augsburg', 'FC Augsburg'],
137
+ # [ 'augsburg', 'Augi2'],
138
+ # [ 'augsburg', 'Augi3' ],
139
+ # [ 'stuttgart', 'VfB Stuttgart']]
140
+
141
+ known_titles = []
142
+
143
+ records.each_with_index do |rec,index|
144
+
145
+ title_candidates = []
146
+ title_candidates << rec.title
147
+
148
+ title_candidates += rec.synonyms.split('|') if rec.synonyms && !rec.synonyms.empty?
149
+
150
+
151
+ ## check if title includes subtitle e.g. Grand Prix Japan (Suzuka Circuit)
152
+ # make subtitle optional by adding title w/o subtitle e.g. Grand Prix Japan
153
+
154
+ titles = []
155
+ title_candidates.each do |t|
156
+ titles << t
157
+ if t =~ /\(.+\)/
158
+ extra_title = t.gsub( /\(.+\)/, '' ) # remove/delete subtitles
159
+ # note: strip leading n trailing withspaces too!
160
+ # -- todo: add squish or something if () is inline e.g. leaves two spaces?
161
+ extra_title.strip!
162
+ titles << extra_title
163
+ end
164
+ end
165
+
166
+ titles.each do |t|
167
+ m = MappingStruct.new
168
+ m.key = rec.key
169
+ m.title = t
170
+ m.length = t.length
171
+ ## note: escape for regex plus allow subs for special chars/accents
172
+ m.pattern = title_esc_regex( t )
173
+
174
+ known_titles << m
175
+ end
176
+
177
+ logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{titles.join('|')}<"
178
+
179
+ ## note: only include code field - if defined
180
+ if rec.respond_to?(:code) && rec.code && !rec.code.empty?
181
+ m = MappingStruct.new
182
+ m.key = rec.key
183
+ m.title = rec.code
184
+ m.length = rec.code.length
185
+ m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
186
+
187
+ known_titles << m
188
+ end
189
+ end
190
+
191
+ ## note: sort here by length (largest goes first - best match)
192
+ # exclude code and key (key should always go last)
193
+ known_titles = known_titles.sort { |l,r| r.length <=> l.length }
194
+ known_titles
195
+ end
196
+
197
+
198
+
199
+ def map_title_for!( tag, line, mappings )
200
+ mappings.each do |mapping|
201
+ key = mapping.key
202
+ pattern = mapping.pattern
203
+ ## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
204
+ ## (thus add it, allows match for Benfica Lis. for example - note . at the end)
205
+
206
+ ## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
207
+ re = /\b#{pattern}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
208
+ if line =~ re
209
+ logger.debug " match for #{tag.downcase} >#{key}< >#{pattern}<"
210
+ # make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
211
+ line.sub!( re, "@@oo#{key}oo@@ " ) # NB: add one space char at end
212
+ return true # break out after first match (do NOT continue)
213
+ end
214
+ end
215
+
216
+ false
217
+ end
218
+
219
+
220
+ def find_rec_for!( tag, line, records )
221
+ re = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
222
+
223
+ if line =~ re
224
+ key = $1
225
+ logger.debug " #{tag.downcase}: >#{key}<"
226
+
227
+ line.sub!( re, "[#{tag.upcase}]" )
228
+
229
+ records[ key ] ## note: map key to record (using records hash table mapping)
230
+ else
231
+ nil
232
+ end
233
+ end # method find_key_for!
234
+
235
+
236
+ ####
237
+ # title helper cut-n-paste copy from TextUtils
238
+ ## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
239
+ def title_esc_regex( title_unescaped )
240
+
241
+ ## escape regex special chars e.g.
242
+ # . to \. and
243
+ # ( to \(
244
+ # ) to \)
245
+ # ? to \? -- zero or one
246
+ # * to \* -- zero or more
247
+ # + to \+ -- one or more
248
+ # $ to \$ -- end of line
249
+ # ^ to \^ -- start of line etc.
250
+
251
+ ### add { and } ???
252
+ ### add [ and ] ???
253
+ ### add \ too ???
254
+ ### add | too ???
255
+
256
+ # e.g. Benfica Lis.
257
+ # e.g. Club Atlético Colón (Santa Fe)
258
+ # e.g. Bauer Anton (????)
259
+
260
+ ## NB: cannot use Regexp.escape! will escape space '' to '\ '
261
+ ## title = Regexp.escape( title_unescaped )
262
+ title = title_unescaped.gsub( '.', '\.' )
263
+ title = title.gsub( '(', '\(' )
264
+ title = title.gsub( ')', '\)' )
265
+ title = title.gsub( '?', '\?' )
266
+ title = title.gsub( '*', '\*' )
267
+ title = title.gsub( '+', '\+' )
268
+ title = title.gsub( '$', '\$' )
269
+ title = title.gsub( '^', '\^' )
270
+
271
+ ## match accented char with or without accents
272
+ ## add (ü|ue) etc.
273
+ ## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
274
+
275
+ ## todo: add some more
276
+ ## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
277
+ ##
278
+ ## reuse for all readers!
279
+
280
+ alternatives = [
281
+ ['-', '(-| )'], ## e.g. Blau-Weiß Linz
282
+ ['æ', '(æ|ae)'], ## e.g.
283
+ ['ä', '(ä|ae)'], ## e.g.
284
+ ['Ö', '(Ö|Oe)'], ## e.g. Österreich
285
+ ['ö', '(ö|oe)'], ## e.g. Mönchengladbach
286
+ ['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
287
+ ['ü', '(ü|ue)'], ## e.g.
288
+
289
+ ['á', '(á|a)'], ## e.g. Bogotá, Sársfield
290
+ ['ã', '(ã|a)'], ## e.g São Paulo
291
+ ['ç', '(ç|c)'], ## e.g. Fenerbahçe
292
+ ['é', '(é|e)'], ## e.g. Vélez
293
+ ['ê', '(ê|e)'], ## e.g. Grêmio
294
+ ['ï', '(ï|i)' ], ## e.g. El Djazaïr
295
+ ['ñ', '(ñ|n)'], ## e.g. Porteño
296
+ ['ň', '(ň|n)'], ## e.g. Plzeň
297
+ ['ó', '(ó|o)'], ## e.g. Colón
298
+ ['ō', '(ō|o)'], # # e.g. Tōkyō
299
+ ['ș', '(ș|s)'], ## e.g. Bucarești
300
+ ['ú', '(ú|u)'] ## e.g. Fútbol
301
+ ]
302
+
303
+ ### fix/todo: check for dot+space e.g. . and make dot optional
304
+ ##
305
+ # e.g. make dot (.) optional plus allow alternative optional space e.g.
306
+ # -- for U.S.A. => allow USA or U S A
307
+ #
308
+ ## e.g. U. de G. or U de G or U.de G. ??
309
+ ## collect some more (real-world) examples first!!!!!
310
+
311
+ alternatives.each do |alt|
312
+ title = title.gsub( alt[0], alt[1] )
313
+ end
314
+
315
+ title
316
+ end
317
+
318
+ end # class MapperV2
319
+ end # module SportDb
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+
5
+ class TeamMapper
6
+ def initialize( records_or_mapping )
7
+ @mapper = MapperV2.new( records_or_mapping, 'team' )
8
+ end
9
+
10
+ def find_teams!( line ) # Note: returns an array - note: plural! (teamsssss)
11
+ @mapper.find_recs!( line )
12
+ end
13
+
14
+ def find_team!( line ) # Note: returns key (string or nil)
15
+ @mapper.find_rec!( line )
16
+ end
17
+
18
+ def map_teams!( line )
19
+ @mapper.map_titles!( line )
20
+ end
21
+ end # class TeamMapper
22
+
23
+ end # module SportDb