sportdb-formats 1.0.3 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +99 -180
- data/lib/sportdb/formats/match/match_parser_csv.rb +19 -5
- data/lib/sportdb/formats/package.rb +36 -5
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/match.rb +5 -1
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +1 -0
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_regex.rb +25 -7
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cdd2bc410771494ed506a24d384ca3c8b1c9684
|
4
|
+
data.tar.gz: 066f5288da503a00efe280369f57d6cd65bf4bf7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f61edee9495047fc49dfb5720c7afc1e0e316e5fab024d8fc0b1bd5fcdad70524f6e22d24d8fad0aa679380565e0a0c7e36fae79c170655f8a7a496dee170aca
|
7
|
+
data.tar.gz: b378202d2c8152ac46386618d3c69c03d802b1439e74175f8d84fad304ce111edb0bb1cd848fc969848ce55e0cf8de15a46bec322351d5d199652ed16d41b164
|
data/Manifest.txt
CHANGED
@@ -30,11 +30,11 @@ class CountryIndex
|
|
30
30
|
## auto-fill countries
|
31
31
|
## pp recs
|
32
32
|
recs.each do |rec|
|
33
|
-
## rec e.g. { key:'af',
|
33
|
+
## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
|
34
34
|
|
35
35
|
@countries << rec
|
36
36
|
|
37
|
-
## add codes lookups - key,
|
37
|
+
## add codes lookups - key, code, ...
|
38
38
|
if @countries_by_code[ rec.key ]
|
39
39
|
puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
|
40
40
|
exit 1
|
@@ -42,13 +42,13 @@ class CountryIndex
|
|
42
42
|
@countries_by_code[ rec.key ] = rec
|
43
43
|
end
|
44
44
|
|
45
|
-
## add
|
46
|
-
if rec.key != rec.
|
47
|
-
if @countries_by_code[ rec.
|
48
|
-
puts "** !! ERROR !! country code
|
45
|
+
## add code (only) if different from key
|
46
|
+
if rec.key != rec.code.downcase
|
47
|
+
if @countries_by_code[ rec.code.downcase ]
|
48
|
+
puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
|
49
49
|
exit 1
|
50
50
|
else
|
51
|
-
@countries_by_code[ rec.
|
51
|
+
@countries_by_code[ rec.code.downcase ] = rec
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -43,11 +43,31 @@ def parse
|
|
43
43
|
# e.g. East Germany (-1989) => East Germany (-1989)
|
44
44
|
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
45
45
|
last_country.alt_names += values
|
46
|
+
elsif line =~ /^-[ ]*(\d{4})
|
47
|
+
[ ]+
|
48
|
+
(.+)$
|
49
|
+
/x ## check for historic lines e.g. -1989
|
50
|
+
year = $1.to_i
|
51
|
+
parts = $2.split( /=>|⇒/ )
|
52
|
+
values = parts[0].split( ',' )
|
53
|
+
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
54
|
+
|
55
|
+
name = values[0]
|
56
|
+
code = values[1]
|
57
|
+
|
58
|
+
last_country = country = Country.new( name: "#{name} (-#{year})",
|
59
|
+
code: code )
|
60
|
+
## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
|
61
|
+
|
62
|
+
countries << country
|
63
|
+
## todo/fix: add reference to country today (in parts[1] !!!!)
|
46
64
|
else
|
47
65
|
## assume "regular" line
|
48
66
|
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
49
67
|
## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
|
50
|
-
if line =~ /^([a-z]{2,4})
|
68
|
+
if line =~ /^([a-z]{2,4})
|
69
|
+
[ ]+
|
70
|
+
(.+)$/x
|
51
71
|
key = $1
|
52
72
|
values = $2.split( ',' )
|
53
73
|
## strip and squish (white)spaces
|
@@ -60,14 +80,14 @@ def parse
|
|
60
80
|
geos = split_geo( values[0] )
|
61
81
|
name = geos[0] ## note: ignore all other geos for now
|
62
82
|
|
63
|
-
## note: allow
|
64
|
-
|
83
|
+
## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
|
84
|
+
code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
|
65
85
|
values[1]
|
66
86
|
else
|
67
87
|
if values[1]
|
68
|
-
puts "** !!! ERROR !!! wrong
|
88
|
+
puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
|
69
89
|
else
|
70
|
-
puts "** !!! ERROR !!! missing
|
90
|
+
puts "** !!! ERROR !!! missing code for (canonical) country name"
|
71
91
|
end
|
72
92
|
exit 1
|
73
93
|
end
|
@@ -80,7 +100,7 @@ def parse
|
|
80
100
|
|
81
101
|
last_country = country = Country.new( key: key,
|
82
102
|
name: name,
|
83
|
-
|
103
|
+
code: code,
|
84
104
|
tags: tags )
|
85
105
|
countries << country
|
86
106
|
else
|
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
121
121
|
values
|
122
122
|
end
|
123
123
|
|
124
|
-
def check_stage( name )
|
125
|
-
known_stages = ['regular season',
|
126
|
-
'championship round',
|
127
|
-
'relegation round',
|
128
|
-
'play-offs'
|
129
|
-
]
|
130
124
|
|
131
|
-
|
125
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
126
|
+
KNOWN_STAGES = [
|
127
|
+
'Regular Season',
|
128
|
+
'Regular Stage',
|
129
|
+
'Championship Round',
|
130
|
+
'Championship Playoff',
|
131
|
+
'Relegation Round',
|
132
|
+
'Relegation Playoff',
|
133
|
+
'Play-offs',
|
134
|
+
'Playoff Stage',
|
135
|
+
'Grunddurchgang',
|
136
|
+
'Finaldurchgang - Qualifikationsgruppe',
|
137
|
+
'Finaldurchgang - Qualifikation',
|
138
|
+
'Finaldurchgang - Meistergruppe',
|
139
|
+
'Finaldurchgang - Meister',
|
140
|
+
'EL Play-off',
|
141
|
+
'Europa League Play-off',
|
142
|
+
'Europa-League-Play-offs',
|
143
|
+
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
|
+
|
145
|
+
|
146
|
+
def check_stage( name )
|
147
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
148
|
+
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
132
149
|
## everything ok
|
133
150
|
else
|
134
151
|
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
@@ -113,9 +113,9 @@ def parse
|
|
113
113
|
## add a list of (auto-)excluded country codes with conflicts? why? why not?
|
114
114
|
## cl - a) Chile b) Champions League
|
115
115
|
alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
116
|
-
if country.key.upcase != country.
|
117
|
-
alt_names_auto << "#{country.
|
118
|
-
alt_names_auto << "#{country.
|
116
|
+
if country.key.upcase != country.code
|
117
|
+
alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
|
118
|
+
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
121
|
else ## assume int'l (no country) e.g. champions league, etc.
|
@@ -7,21 +7,21 @@ module SportDb
|
|
7
7
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
8
|
|
9
9
|
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper
|
10
|
+
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
11
|
|
12
12
|
include Logging
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
15
|
|
16
16
|
########
|
17
17
|
## key: e.g. augsburg
|
18
|
-
##
|
19
|
-
## length (of
|
20
|
-
MappingStruct = Struct.new( :key, :
|
18
|
+
## name: e.g. FC Augsburg
|
19
|
+
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
+
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
21
|
|
22
22
|
######
|
23
23
|
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :
|
24
|
+
Record = Struct.new( :key, :name, :alt_names )
|
25
25
|
def build_records( txt_or_lines )
|
26
26
|
recs = []
|
27
27
|
|
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
44
44
|
values = line.split( '|' )
|
45
45
|
values = values.map { |value| value.strip }
|
46
46
|
|
47
|
-
|
47
|
+
name = values[0]
|
48
48
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key
|
50
|
-
|
49
|
+
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
+
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
51
|
|
52
|
-
recs << Record.new( key,
|
52
|
+
recs << Record.new( key, name, alt_names )
|
53
53
|
end
|
54
54
|
recs
|
55
55
|
end
|
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
63
63
|
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
64
|
|
65
65
|
## build mapping lookup table
|
66
|
-
@
|
67
|
-
|
66
|
+
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
+
build_name_table_for_mapping( records_or_mapping )
|
68
68
|
else ## assume array of records
|
69
|
-
|
69
|
+
build_name_table_for_records( records_or_mapping )
|
70
70
|
end
|
71
71
|
|
72
72
|
## build lookup hash by record (e.g. team/club/etc.) key
|
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
85
85
|
|
86
86
|
|
87
87
|
|
88
|
-
def
|
88
|
+
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
89
|
begin
|
90
|
-
found =
|
90
|
+
found = map_name_for!( @tag, line, @known_names )
|
91
91
|
end while found
|
92
92
|
end
|
93
93
|
|
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
110
110
|
|
111
111
|
|
112
112
|
private
|
113
|
-
def
|
114
|
-
|
113
|
+
def build_name_table_for_mapping( mapping )
|
114
|
+
known_names = []
|
115
115
|
|
116
|
-
mapping.each do |
|
116
|
+
mapping.each do |name, rec|
|
117
117
|
m = MappingStruct.new
|
118
118
|
m.key = rec.key
|
119
|
-
m.
|
120
|
-
m.length =
|
121
|
-
m.pattern = Regexp.escape(
|
119
|
+
m.name = name
|
120
|
+
m.length = name.length
|
121
|
+
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
122
|
|
123
|
-
|
123
|
+
known_names << m
|
124
124
|
end
|
125
125
|
|
126
126
|
## note: sort here by length (largest goes first - best match)
|
127
|
-
|
128
|
-
|
127
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
+
known_names
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def build_name_table_for_records( records )
|
132
132
|
|
133
|
-
## build known tracks table w/
|
133
|
+
## build known tracks table w/ alt names e.g.
|
134
134
|
#
|
135
135
|
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
136
|
# [ 'augsburg', 'FC Augsburg'],
|
@@ -138,65 +138,65 @@ private
|
|
138
138
|
# [ 'augsburg', 'Augi3' ],
|
139
139
|
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
140
|
|
141
|
-
|
141
|
+
known_names = []
|
142
142
|
|
143
143
|
records.each_with_index do |rec,index|
|
144
144
|
|
145
|
-
|
146
|
-
|
145
|
+
name_candidates = []
|
146
|
+
name_candidates << rec.name
|
147
147
|
|
148
|
-
|
148
|
+
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
149
|
|
150
150
|
|
151
|
-
## check if
|
152
|
-
# make
|
151
|
+
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
+
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
names = []
|
155
|
+
name_candidates.each do |t|
|
156
|
+
names << t
|
157
157
|
if t =~ /\(.+\)/
|
158
|
-
|
158
|
+
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
159
|
# note: strip leading n trailing withspaces too!
|
160
160
|
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
|
162
|
-
|
161
|
+
extra_name.strip!
|
162
|
+
names << extra_name
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
|
166
|
+
names.each do |name|
|
167
167
|
m = MappingStruct.new
|
168
168
|
m.key = rec.key
|
169
|
-
m.
|
170
|
-
m.length =
|
169
|
+
m.name = name
|
170
|
+
m.length = name.length
|
171
171
|
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern =
|
172
|
+
m.pattern = name_esc_regex( name )
|
173
173
|
|
174
|
-
|
174
|
+
known_names << m
|
175
175
|
end
|
176
176
|
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{
|
177
|
+
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
178
|
|
179
179
|
## note: only include code field - if defined
|
180
180
|
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
181
|
m = MappingStruct.new
|
182
182
|
m.key = rec.key
|
183
|
-
m.
|
183
|
+
m.name = rec.code
|
184
184
|
m.length = rec.code.length
|
185
185
|
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
186
|
|
187
|
-
|
187
|
+
known_names << m
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
191
|
## note: sort here by length (largest goes first - best match)
|
192
192
|
# exclude code and key (key should always go last)
|
193
|
-
|
194
|
-
|
193
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
+
known_names
|
195
195
|
end
|
196
196
|
|
197
197
|
|
198
198
|
|
199
|
-
def
|
199
|
+
def map_name_for!( tag, line, mappings )
|
200
200
|
mappings.each do |mapping|
|
201
201
|
key = mapping.key
|
202
202
|
pattern = mapping.pattern
|
@@ -234,9 +234,9 @@ private
|
|
234
234
|
|
235
235
|
|
236
236
|
####
|
237
|
-
#
|
237
|
+
# name helper cut-n-paste copy from TextUtils
|
238
238
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def
|
239
|
+
def name_esc_regex( name_unescaped )
|
240
240
|
|
241
241
|
## escape regex special chars e.g.
|
242
242
|
# . to \. and
|
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
|
|
257
257
|
# e.g. Club Atlético Colón (Santa Fe)
|
258
258
|
# e.g. Bauer Anton (????)
|
259
259
|
|
260
|
-
##
|
261
|
-
##
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
260
|
+
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
+
## name = Regexp.escape( name_unescaped )
|
262
|
+
name = name_unescaped.gsub( '.', '\.' )
|
263
|
+
name = name.gsub( '(', '\(' )
|
264
|
+
name = name.gsub( ')', '\)' )
|
265
|
+
name = name.gsub( '?', '\?' )
|
266
|
+
name = name.gsub( '*', '\*' )
|
267
|
+
name = name.gsub( '+', '\+' )
|
268
|
+
name = name.gsub( '$', '\$' )
|
269
|
+
name = name.gsub( '^', '\^' )
|
270
270
|
|
271
271
|
## match accented char with or without accents
|
272
272
|
## add (ü|ue) etc.
|
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
|
|
309
309
|
## collect some more (real-world) examples first!!!!!
|
310
310
|
|
311
311
|
alternatives.each do |alt|
|
312
|
-
|
312
|
+
name = name.gsub( alt[0], alt[1] )
|
313
313
|
end
|
314
314
|
|
315
|
-
|
315
|
+
name
|
316
316
|
end
|
317
317
|
|
318
318
|
end # class MapperV2
|
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
|
|
82
82
|
# team1 team2 - match (will get new auto-matchday! not last round)
|
83
83
|
@last_round = nil
|
84
84
|
|
85
|
-
|
85
|
+
name = find_group_name!( line )
|
86
86
|
|
87
|
-
logger.debug "
|
88
|
-
logger.debug " pos: >#{pos}<"
|
87
|
+
logger.debug " name: >#{name}<"
|
89
88
|
logger.debug " line: >#{line}<"
|
90
89
|
|
91
|
-
group = @groups[
|
90
|
+
group = @groups[ name ]
|
92
91
|
if group.nil?
|
93
|
-
puts "!! ERROR - no group def found for >#{
|
92
|
+
puts "!! ERROR - no group def found for >#{name}<"
|
94
93
|
exit 1
|
95
94
|
end
|
96
95
|
|
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
|
|
104
103
|
@mapper_teams.map_teams!( line )
|
105
104
|
teams = @mapper_teams.find_teams!( line )
|
106
105
|
|
107
|
-
|
106
|
+
name = find_group_name!( line )
|
108
107
|
|
109
108
|
logger.debug " line: >#{line}<"
|
110
109
|
|
111
|
-
group
|
112
|
-
|
113
|
-
teams: teams.map {|team| team.
|
110
|
+
## todo/check/fix: add back group key - why? why not?
|
111
|
+
group = Import::Group.new( name: name,
|
112
|
+
teams: teams.map {|team| team.name } )
|
114
113
|
|
115
|
-
@groups[
|
114
|
+
@groups[ name ] = group
|
116
115
|
end
|
117
116
|
|
118
117
|
|
119
|
-
def
|
118
|
+
def find_group_name!( line )
|
120
119
|
## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
|
121
120
|
## nb: (?:) = is for non-capturing group(ing)
|
122
121
|
|
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
|
|
125
124
|
|
126
125
|
## todo:
|
127
126
|
## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
|
128
|
-
regex =
|
127
|
+
regex = /\b
|
128
|
+
(?:
|
129
|
+
(Group | Gruppe | Grupo)
|
130
|
+
[ ]+
|
131
|
+
(\d+ | [A-Z]+)
|
132
|
+
)
|
133
|
+
\b/x
|
129
134
|
|
130
135
|
m = regex.match( line )
|
131
136
|
|
132
|
-
return
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
when 'G' then 7
|
142
|
-
when 'H' then 8
|
143
|
-
when 'I' then 9
|
144
|
-
when 'J' then 10
|
145
|
-
when 'K' then 11
|
146
|
-
when 'L' then 12
|
147
|
-
when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
|
148
|
-
else m[1].to_i
|
149
|
-
end
|
150
|
-
|
151
|
-
title = m[0]
|
152
|
-
|
153
|
-
logger.debug " title: >#{title}<"
|
154
|
-
logger.debug " pos: >#{pos}<"
|
155
|
-
|
156
|
-
line.sub!( regex, '[GROUP.TITLE+POS]' )
|
157
|
-
|
158
|
-
[title,pos]
|
137
|
+
return nil if m.nil?
|
138
|
+
|
139
|
+
name = m[0]
|
140
|
+
|
141
|
+
logger.debug " name: >#{name}<"
|
142
|
+
|
143
|
+
line.sub!( name, '[GROUP.NAME]' )
|
144
|
+
|
145
|
+
name
|
159
146
|
end
|
160
147
|
|
161
148
|
|
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
|
|
180
167
|
end_date = end_date.to_date
|
181
168
|
|
182
169
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
knockout_flag = is_knockout_round?( title )
|
170
|
+
name = find_round_def_name!( line )
|
171
|
+
# NB: use extracted round name for knockout check
|
172
|
+
knockout_flag = is_knockout_round?( name )
|
187
173
|
|
188
174
|
|
189
175
|
logger.debug " start_date: #{start_date}"
|
190
176
|
logger.debug " end_date: #{end_date}"
|
191
|
-
logger.debug "
|
192
|
-
logger.debug " title: >#{title}<"
|
177
|
+
logger.debug " name: >#{name}<"
|
193
178
|
logger.debug " knockout_flag: #{knockout_flag}"
|
194
179
|
|
195
180
|
logger.debug " line: >#{line}<"
|
196
181
|
|
197
|
-
|
198
|
-
# todo/fix: add auto flag is false !!!! - why? why not?
|
199
|
-
round = Import::Round.new( pos: pos,
|
200
|
-
title: title,
|
182
|
+
round = Import::Round.new( name: name,
|
201
183
|
start_date: start_date,
|
202
184
|
end_date: end_date,
|
203
185
|
knockout: knockout_flag,
|
204
186
|
auto: false )
|
205
187
|
|
206
|
-
@rounds[
|
188
|
+
@rounds[ name ] = round
|
207
189
|
end
|
208
190
|
|
209
191
|
|
210
192
|
|
211
|
-
def
|
212
|
-
#
|
213
|
-
#
|
214
|
-
regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
|
215
|
-
|
216
|
-
# pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
|
217
|
-
# note: /\b(\d{1,3})\b/
|
218
|
-
# will match -12
|
219
|
-
# thus, use space required - will NOT match -2 e.g. Group-2 Play-off
|
220
|
-
# note: allow 1. Runde n
|
221
|
-
# 1^ Giornata
|
222
|
-
regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
|
223
|
-
|
224
|
-
if line =~ regex_pos
|
225
|
-
logger.debug " pos: >#{$1}<"
|
226
|
-
|
227
|
-
line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
|
228
|
-
return $1.to_i
|
229
|
-
elsif line =~ regex_num
|
230
|
-
## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
|
231
|
-
## NB: do NOT remove pos from string (will get removed by round title)
|
232
|
-
|
233
|
-
num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
|
234
|
-
|
235
|
-
#### fix:
|
236
|
-
# use/make keywords required
|
237
|
-
# e.g. Round of 16 -> should NOT match 16!
|
238
|
-
# Spiel um Platz 3 (or 5) etc -> should NOT match 3!
|
239
|
-
# Round 16 - ok
|
240
|
-
# thus, check for required keywords
|
193
|
+
def find_round_def_name!( line )
|
194
|
+
# assume everything before pipe (\) is the round name
|
195
|
+
# strip [ROUND.POS], todo:?? [ROUND.NAME2]
|
241
196
|
|
242
|
-
|
243
|
-
# todo: mask match e.g. Round of xxx ... and try again - might include something
|
244
|
-
# reuse pattern for Group XX Replays for example
|
245
|
-
if line =~ /^\s*Round of \d{1,3}\b/
|
246
|
-
return nil
|
247
|
-
end
|
248
|
-
|
249
|
-
logger.debug " pos: >#{num}<"
|
250
|
-
return num
|
251
|
-
else
|
252
|
-
## fix: add logger.warn no round pos found in line
|
253
|
-
return nil
|
254
|
-
end
|
255
|
-
end # method find_round_pos!
|
256
|
-
|
257
|
-
def find_round_def_title!( line )
|
258
|
-
# assume everything before pipe (\) is the round title
|
259
|
-
# strip [ROUND.POS], todo:?? [ROUND.TITLE2]
|
260
|
-
|
261
|
-
# todo/fix: add title2 w/ // or / why? why not?
|
197
|
+
# todo/fix: add name2 w/ // or / why? why not?
|
262
198
|
# -- strip / or / chars
|
263
199
|
|
264
200
|
buf = line.dup
|
265
|
-
logger.debug "
|
201
|
+
logger.debug " find_round_def_name! line-before: >>#{buf}<<"
|
266
202
|
|
267
203
|
## cut-off everything after (including) pipe (|)
|
268
204
|
buf = buf[ 0...buf.index('|') ]
|
269
|
-
|
270
|
-
# e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
271
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
|
272
|
-
# remove leading and trailing whitespace
|
273
205
|
buf.strip!
|
274
206
|
|
275
|
-
logger.debug "
|
207
|
+
logger.debug " find_round_def_name! line-after: >>#{buf}<<"
|
276
208
|
|
277
|
-
logger.debug "
|
278
|
-
line.sub!( buf, '[ROUND.
|
209
|
+
logger.debug " name: >>#{buf}<<"
|
210
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
279
211
|
|
280
212
|
buf
|
281
213
|
end
|
282
214
|
|
283
|
-
def find_round_header_title!( line )
|
284
|
-
# assume everything left is the round title
|
285
|
-
# extract all other items first (round title2, round pos, group title n pos, etc.)
|
286
215
|
|
287
|
-
##
|
288
|
-
##
|
289
|
-
##
|
290
|
-
##
|
291
|
-
##
|
216
|
+
## split by or || or |||
|
217
|
+
## or ++ or +++
|
218
|
+
## or -- or ---
|
219
|
+
## or // or ///
|
220
|
+
## note: allow Final | First Leg as ONE name same as
|
221
|
+
## Final - First Leg or
|
222
|
+
## Final, First Leg
|
223
|
+
## for cut-off always MUST be more than two chars
|
224
|
+
##
|
225
|
+
## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
|
226
|
+
## todo/fix: move to parser utils and add a method split_name or such?
|
227
|
+
HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
|
228
|
+
(?:\|{2,} |
|
229
|
+
\+{2,} |
|
230
|
+
-{2,} |
|
231
|
+
\/{2,}
|
232
|
+
)
|
233
|
+
[ ]* ## allow (strip) trailing spaces
|
234
|
+
/x
|
235
|
+
|
236
|
+
def find_round_header_name!( line )
|
237
|
+
# assume everything left is the round name
|
238
|
+
# extract all other items first (round name2, round pos, group name n pos, etc.)
|
292
239
|
|
293
240
|
buf = line.dup
|
294
|
-
logger.debug "
|
241
|
+
logger.debug " find_round_header_name! line-before: >>#{buf}<<"
|
242
|
+
|
243
|
+
|
244
|
+
parts = buf.split( HEADER_SEP_RE )
|
245
|
+
buf = parts[0]
|
295
246
|
|
296
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
297
247
|
buf.strip! # remove leading and trailing whitespace
|
298
248
|
|
299
|
-
logger.debug "
|
249
|
+
logger.debug " find_round_name! line-after: >>#{buf}<<"
|
300
250
|
|
301
|
-
### bingo - assume what's left is the round
|
251
|
+
### bingo - assume what's left is the round name
|
302
252
|
|
303
|
-
logger.debug "
|
304
|
-
line.sub!( buf, '[ROUND.
|
253
|
+
logger.debug " name: >>#{buf}<<"
|
254
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
305
255
|
|
306
256
|
buf
|
307
257
|
end
|
308
258
|
|
259
|
+
## quick hack- collect all "fillwords" by language!!!!
|
260
|
+
## change later and add to sportdb-langs!!!!
|
261
|
+
##
|
262
|
+
## strip all "fillwords" e.g.:
|
263
|
+
## Nachtrag/Postponed/Addition/Supplemento names
|
264
|
+
##
|
265
|
+
## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
|
266
|
+
ROUND_EXTRA_WORDS_RE = /\b(?:
|
267
|
+
Nachtrag | ## de
|
268
|
+
Postponed | ## en
|
269
|
+
Addition | ## en
|
270
|
+
Supplemento ## es
|
271
|
+
)
|
272
|
+
\b/ix
|
309
273
|
|
310
274
|
def parse_round_header( line )
|
311
275
|
logger.debug "parsing round header line: >#{line}<"
|
312
276
|
|
313
|
-
|
314
|
-
# make sure Round of 16 will not return pos 16 -- how? possible?
|
315
|
-
# add unit test too to verify
|
316
|
-
pos = find_round_pos!( line )
|
317
|
-
|
318
|
-
title = find_round_header_title!( line )
|
277
|
+
name = find_round_header_name!( line )
|
319
278
|
|
320
279
|
logger.debug " line: >#{line}<"
|
321
280
|
|
281
|
+
name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
|
282
|
+
name = name.strip
|
322
283
|
|
323
|
-
round = @rounds[
|
284
|
+
round = @rounds[ name ]
|
324
285
|
if round.nil? ## auto-add / create if missing
|
325
|
-
|
326
|
-
|
327
|
-
@rounds[
|
286
|
+
## todo/check: add num (was pos) if present - why? why not?
|
287
|
+
round = Import::Round.new( name: name )
|
288
|
+
@rounds[ name ] = round
|
328
289
|
end
|
329
290
|
|
330
291
|
## todo/check: if pos match (MUST always match for now)
|
331
292
|
@last_round = round
|
332
293
|
@last_group = nil # note: reset group to no group - why? why not?
|
333
|
-
|
334
|
-
|
335
|
-
## NB: dummy/placeholder start_at, end_at date
|
336
|
-
## replace/patch after adding all games for round
|
337
|
-
|
338
|
-
=begin
|
339
|
-
round_attribs = {
|
340
|
-
title: title,
|
341
|
-
title2: title2,
|
342
|
-
knockout: knockout_flag
|
343
|
-
}
|
344
|
-
|
345
|
-
if pos > 999000
|
346
|
-
# no pos (e.g. will get autonumbered later) - try match by title for now
|
347
|
-
# e.g. lets us use title 'Group Replays', for example, multiple times
|
348
|
-
@round = Round.find_by_event_id_and_title( @event.id, title )
|
349
|
-
else
|
350
|
-
@round = Round.find_by_event_id_and_pos( @event.id, pos )
|
351
|
-
end
|
352
|
-
|
353
|
-
if @round.present?
|
354
|
-
logger.debug "update round #{@round.id}:"
|
355
|
-
else
|
356
|
-
logger.debug "create round:"
|
357
|
-
@round = Round.new
|
358
|
-
|
359
|
-
round_attribs = round_attribs.merge( {
|
360
|
-
event_id: @event.id,
|
361
|
-
pos: pos,
|
362
|
-
start_at: Date.parse('1911-11-11'),
|
363
|
-
end_at: Date.parse('1911-11-11')
|
364
|
-
})
|
365
|
-
end
|
366
|
-
|
367
|
-
logger.debug round_attribs.to_json
|
368
|
-
|
369
|
-
@round.update_attributes!( round_attribs )
|
370
|
-
|
371
|
-
@patch_round_ids_pos << @round.id if pos > 999000
|
372
|
-
### store list of round ids for patching start_at/end_at at the end
|
373
|
-
@patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
|
374
|
-
=end
|
375
294
|
end
|
376
295
|
|
377
296
|
|
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
457
376
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
458
377
|
|
459
378
|
@matches << Import::Match.new( date: date,
|
460
|
-
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.
|
461
|
-
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.
|
379
|
+
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
380
|
+
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
462
381
|
score: score,
|
463
|
-
round: round ? round.
|
464
|
-
group: @last_group ? @last_group.
|
382
|
+
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
383
|
+
group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
|
465
384
|
|
466
385
|
### todo: cache team lookups in hash?
|
467
386
|
|
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
517
436
|
|
518
437
|
round_attribs = {
|
519
438
|
event_id: @event.id,
|
520
|
-
|
439
|
+
name: "Matchday #{date.to_date}",
|
521
440
|
pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
|
522
441
|
start_at: date.to_date,
|
523
442
|
end_at: date.to_date
|
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
541
460
|
end
|
542
461
|
|
543
462
|
## note: will crash (round.pos) if round is nil
|
544
|
-
logger.debug( " using round #{round.pos} >#{round.
|
463
|
+
logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
|
545
464
|
else
|
546
465
|
## use round from last round header
|
547
466
|
round = @round
|