sportdb-formats 1.0.3 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +99 -180
- data/lib/sportdb/formats/match/match_parser_csv.rb +19 -5
- data/lib/sportdb/formats/package.rb +36 -5
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/match.rb +5 -1
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +1 -0
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_regex.rb +25 -7
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4cdd2bc410771494ed506a24d384ca3c8b1c9684
|
4
|
+
data.tar.gz: 066f5288da503a00efe280369f57d6cd65bf4bf7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f61edee9495047fc49dfb5720c7afc1e0e316e5fab024d8fc0b1bd5fcdad70524f6e22d24d8fad0aa679380565e0a0c7e36fae79c170655f8a7a496dee170aca
|
7
|
+
data.tar.gz: b378202d2c8152ac46386618d3c69c03d802b1439e74175f8d84fad304ce111edb0bb1cd848fc969848ce55e0cf8de15a46bec322351d5d199652ed16d41b164
|
data/Manifest.txt
CHANGED
@@ -30,11 +30,11 @@ class CountryIndex
|
|
30
30
|
## auto-fill countries
|
31
31
|
## pp recs
|
32
32
|
recs.each do |rec|
|
33
|
-
## rec e.g. { key:'af',
|
33
|
+
## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
|
34
34
|
|
35
35
|
@countries << rec
|
36
36
|
|
37
|
-
## add codes lookups - key,
|
37
|
+
## add codes lookups - key, code, ...
|
38
38
|
if @countries_by_code[ rec.key ]
|
39
39
|
puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
|
40
40
|
exit 1
|
@@ -42,13 +42,13 @@ class CountryIndex
|
|
42
42
|
@countries_by_code[ rec.key ] = rec
|
43
43
|
end
|
44
44
|
|
45
|
-
## add
|
46
|
-
if rec.key != rec.
|
47
|
-
if @countries_by_code[ rec.
|
48
|
-
puts "** !! ERROR !! country code
|
45
|
+
## add code (only) if different from key
|
46
|
+
if rec.key != rec.code.downcase
|
47
|
+
if @countries_by_code[ rec.code.downcase ]
|
48
|
+
puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
|
49
49
|
exit 1
|
50
50
|
else
|
51
|
-
@countries_by_code[ rec.
|
51
|
+
@countries_by_code[ rec.code.downcase ] = rec
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -43,11 +43,31 @@ def parse
|
|
43
43
|
# e.g. East Germany (-1989) => East Germany (-1989)
|
44
44
|
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
45
45
|
last_country.alt_names += values
|
46
|
+
elsif line =~ /^-[ ]*(\d{4})
|
47
|
+
[ ]+
|
48
|
+
(.+)$
|
49
|
+
/x ## check for historic lines e.g. -1989
|
50
|
+
year = $1.to_i
|
51
|
+
parts = $2.split( /=>|⇒/ )
|
52
|
+
values = parts[0].split( ',' )
|
53
|
+
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
54
|
+
|
55
|
+
name = values[0]
|
56
|
+
code = values[1]
|
57
|
+
|
58
|
+
last_country = country = Country.new( name: "#{name} (-#{year})",
|
59
|
+
code: code )
|
60
|
+
## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
|
61
|
+
|
62
|
+
countries << country
|
63
|
+
## todo/fix: add reference to country today (in parts[1] !!!!)
|
46
64
|
else
|
47
65
|
## assume "regular" line
|
48
66
|
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
49
67
|
## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
|
50
|
-
if line =~ /^([a-z]{2,4})
|
68
|
+
if line =~ /^([a-z]{2,4})
|
69
|
+
[ ]+
|
70
|
+
(.+)$/x
|
51
71
|
key = $1
|
52
72
|
values = $2.split( ',' )
|
53
73
|
## strip and squish (white)spaces
|
@@ -60,14 +80,14 @@ def parse
|
|
60
80
|
geos = split_geo( values[0] )
|
61
81
|
name = geos[0] ## note: ignore all other geos for now
|
62
82
|
|
63
|
-
## note: allow
|
64
|
-
|
83
|
+
## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
|
84
|
+
code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
|
65
85
|
values[1]
|
66
86
|
else
|
67
87
|
if values[1]
|
68
|
-
puts "** !!! ERROR !!! wrong
|
88
|
+
puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
|
69
89
|
else
|
70
|
-
puts "** !!! ERROR !!! missing
|
90
|
+
puts "** !!! ERROR !!! missing code for (canonical) country name"
|
71
91
|
end
|
72
92
|
exit 1
|
73
93
|
end
|
@@ -80,7 +100,7 @@ def parse
|
|
80
100
|
|
81
101
|
last_country = country = Country.new( key: key,
|
82
102
|
name: name,
|
83
|
-
|
103
|
+
code: code,
|
84
104
|
tags: tags )
|
85
105
|
countries << country
|
86
106
|
else
|
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
121
121
|
values
|
122
122
|
end
|
123
123
|
|
124
|
-
def check_stage( name )
|
125
|
-
known_stages = ['regular season',
|
126
|
-
'championship round',
|
127
|
-
'relegation round',
|
128
|
-
'play-offs'
|
129
|
-
]
|
130
124
|
|
131
|
-
|
125
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
126
|
+
KNOWN_STAGES = [
|
127
|
+
'Regular Season',
|
128
|
+
'Regular Stage',
|
129
|
+
'Championship Round',
|
130
|
+
'Championship Playoff',
|
131
|
+
'Relegation Round',
|
132
|
+
'Relegation Playoff',
|
133
|
+
'Play-offs',
|
134
|
+
'Playoff Stage',
|
135
|
+
'Grunddurchgang',
|
136
|
+
'Finaldurchgang - Qualifikationsgruppe',
|
137
|
+
'Finaldurchgang - Qualifikation',
|
138
|
+
'Finaldurchgang - Meistergruppe',
|
139
|
+
'Finaldurchgang - Meister',
|
140
|
+
'EL Play-off',
|
141
|
+
'Europa League Play-off',
|
142
|
+
'Europa-League-Play-offs',
|
143
|
+
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
|
+
|
145
|
+
|
146
|
+
def check_stage( name )
|
147
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
148
|
+
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
132
149
|
## everything ok
|
133
150
|
else
|
134
151
|
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
@@ -113,9 +113,9 @@ def parse
|
|
113
113
|
## add a list of (auto-)excluded country codes with conflicts? why? why not?
|
114
114
|
## cl - a) Chile b) Champions League
|
115
115
|
alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
116
|
-
if country.key.upcase != country.
|
117
|
-
alt_names_auto << "#{country.
|
118
|
-
alt_names_auto << "#{country.
|
116
|
+
if country.key.upcase != country.code
|
117
|
+
alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
|
118
|
+
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
121
|
else ## assume int'l (no country) e.g. champions league, etc.
|
@@ -7,21 +7,21 @@ module SportDb
|
|
7
7
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
8
|
|
9
9
|
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper
|
10
|
+
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
11
|
|
12
12
|
include Logging
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
15
|
|
16
16
|
########
|
17
17
|
## key: e.g. augsburg
|
18
|
-
##
|
19
|
-
## length (of
|
20
|
-
MappingStruct = Struct.new( :key, :
|
18
|
+
## name: e.g. FC Augsburg
|
19
|
+
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
+
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
21
|
|
22
22
|
######
|
23
23
|
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :
|
24
|
+
Record = Struct.new( :key, :name, :alt_names )
|
25
25
|
def build_records( txt_or_lines )
|
26
26
|
recs = []
|
27
27
|
|
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
44
44
|
values = line.split( '|' )
|
45
45
|
values = values.map { |value| value.strip }
|
46
46
|
|
47
|
-
|
47
|
+
name = values[0]
|
48
48
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key
|
50
|
-
|
49
|
+
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
+
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
51
|
|
52
|
-
recs << Record.new( key,
|
52
|
+
recs << Record.new( key, name, alt_names )
|
53
53
|
end
|
54
54
|
recs
|
55
55
|
end
|
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
63
63
|
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
64
|
|
65
65
|
## build mapping lookup table
|
66
|
-
@
|
67
|
-
|
66
|
+
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
+
build_name_table_for_mapping( records_or_mapping )
|
68
68
|
else ## assume array of records
|
69
|
-
|
69
|
+
build_name_table_for_records( records_or_mapping )
|
70
70
|
end
|
71
71
|
|
72
72
|
## build lookup hash by record (e.g. team/club/etc.) key
|
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
85
85
|
|
86
86
|
|
87
87
|
|
88
|
-
def
|
88
|
+
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
89
|
begin
|
90
|
-
found =
|
90
|
+
found = map_name_for!( @tag, line, @known_names )
|
91
91
|
end while found
|
92
92
|
end
|
93
93
|
|
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
110
110
|
|
111
111
|
|
112
112
|
private
|
113
|
-
def
|
114
|
-
|
113
|
+
def build_name_table_for_mapping( mapping )
|
114
|
+
known_names = []
|
115
115
|
|
116
|
-
mapping.each do |
|
116
|
+
mapping.each do |name, rec|
|
117
117
|
m = MappingStruct.new
|
118
118
|
m.key = rec.key
|
119
|
-
m.
|
120
|
-
m.length =
|
121
|
-
m.pattern = Regexp.escape(
|
119
|
+
m.name = name
|
120
|
+
m.length = name.length
|
121
|
+
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
122
|
|
123
|
-
|
123
|
+
known_names << m
|
124
124
|
end
|
125
125
|
|
126
126
|
## note: sort here by length (largest goes first - best match)
|
127
|
-
|
128
|
-
|
127
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
+
known_names
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def build_name_table_for_records( records )
|
132
132
|
|
133
|
-
## build known tracks table w/
|
133
|
+
## build known tracks table w/ alt names e.g.
|
134
134
|
#
|
135
135
|
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
136
|
# [ 'augsburg', 'FC Augsburg'],
|
@@ -138,65 +138,65 @@ private
|
|
138
138
|
# [ 'augsburg', 'Augi3' ],
|
139
139
|
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
140
|
|
141
|
-
|
141
|
+
known_names = []
|
142
142
|
|
143
143
|
records.each_with_index do |rec,index|
|
144
144
|
|
145
|
-
|
146
|
-
|
145
|
+
name_candidates = []
|
146
|
+
name_candidates << rec.name
|
147
147
|
|
148
|
-
|
148
|
+
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
149
|
|
150
150
|
|
151
|
-
## check if
|
152
|
-
# make
|
151
|
+
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
+
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
names = []
|
155
|
+
name_candidates.each do |t|
|
156
|
+
names << t
|
157
157
|
if t =~ /\(.+\)/
|
158
|
-
|
158
|
+
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
159
|
# note: strip leading n trailing withspaces too!
|
160
160
|
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
|
162
|
-
|
161
|
+
extra_name.strip!
|
162
|
+
names << extra_name
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
|
166
|
+
names.each do |name|
|
167
167
|
m = MappingStruct.new
|
168
168
|
m.key = rec.key
|
169
|
-
m.
|
170
|
-
m.length =
|
169
|
+
m.name = name
|
170
|
+
m.length = name.length
|
171
171
|
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern =
|
172
|
+
m.pattern = name_esc_regex( name )
|
173
173
|
|
174
|
-
|
174
|
+
known_names << m
|
175
175
|
end
|
176
176
|
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{
|
177
|
+
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
178
|
|
179
179
|
## note: only include code field - if defined
|
180
180
|
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
181
|
m = MappingStruct.new
|
182
182
|
m.key = rec.key
|
183
|
-
m.
|
183
|
+
m.name = rec.code
|
184
184
|
m.length = rec.code.length
|
185
185
|
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
186
|
|
187
|
-
|
187
|
+
known_names << m
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
191
|
## note: sort here by length (largest goes first - best match)
|
192
192
|
# exclude code and key (key should always go last)
|
193
|
-
|
194
|
-
|
193
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
+
known_names
|
195
195
|
end
|
196
196
|
|
197
197
|
|
198
198
|
|
199
|
-
def
|
199
|
+
def map_name_for!( tag, line, mappings )
|
200
200
|
mappings.each do |mapping|
|
201
201
|
key = mapping.key
|
202
202
|
pattern = mapping.pattern
|
@@ -234,9 +234,9 @@ private
|
|
234
234
|
|
235
235
|
|
236
236
|
####
|
237
|
-
#
|
237
|
+
# name helper cut-n-paste copy from TextUtils
|
238
238
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def
|
239
|
+
def name_esc_regex( name_unescaped )
|
240
240
|
|
241
241
|
## escape regex special chars e.g.
|
242
242
|
# . to \. and
|
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
|
|
257
257
|
# e.g. Club Atlético Colón (Santa Fe)
|
258
258
|
# e.g. Bauer Anton (????)
|
259
259
|
|
260
|
-
##
|
261
|
-
##
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
260
|
+
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
+
## name = Regexp.escape( name_unescaped )
|
262
|
+
name = name_unescaped.gsub( '.', '\.' )
|
263
|
+
name = name.gsub( '(', '\(' )
|
264
|
+
name = name.gsub( ')', '\)' )
|
265
|
+
name = name.gsub( '?', '\?' )
|
266
|
+
name = name.gsub( '*', '\*' )
|
267
|
+
name = name.gsub( '+', '\+' )
|
268
|
+
name = name.gsub( '$', '\$' )
|
269
|
+
name = name.gsub( '^', '\^' )
|
270
270
|
|
271
271
|
## match accented char with or without accents
|
272
272
|
## add (ü|ue) etc.
|
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
|
|
309
309
|
## collect some more (real-world) examples first!!!!!
|
310
310
|
|
311
311
|
alternatives.each do |alt|
|
312
|
-
|
312
|
+
name = name.gsub( alt[0], alt[1] )
|
313
313
|
end
|
314
314
|
|
315
|
-
|
315
|
+
name
|
316
316
|
end
|
317
317
|
|
318
318
|
end # class MapperV2
|
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
|
|
82
82
|
# team1 team2 - match (will get new auto-matchday! not last round)
|
83
83
|
@last_round = nil
|
84
84
|
|
85
|
-
|
85
|
+
name = find_group_name!( line )
|
86
86
|
|
87
|
-
logger.debug "
|
88
|
-
logger.debug " pos: >#{pos}<"
|
87
|
+
logger.debug " name: >#{name}<"
|
89
88
|
logger.debug " line: >#{line}<"
|
90
89
|
|
91
|
-
group = @groups[
|
90
|
+
group = @groups[ name ]
|
92
91
|
if group.nil?
|
93
|
-
puts "!! ERROR - no group def found for >#{
|
92
|
+
puts "!! ERROR - no group def found for >#{name}<"
|
94
93
|
exit 1
|
95
94
|
end
|
96
95
|
|
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
|
|
104
103
|
@mapper_teams.map_teams!( line )
|
105
104
|
teams = @mapper_teams.find_teams!( line )
|
106
105
|
|
107
|
-
|
106
|
+
name = find_group_name!( line )
|
108
107
|
|
109
108
|
logger.debug " line: >#{line}<"
|
110
109
|
|
111
|
-
group
|
112
|
-
|
113
|
-
teams: teams.map {|team| team.
|
110
|
+
## todo/check/fix: add back group key - why? why not?
|
111
|
+
group = Import::Group.new( name: name,
|
112
|
+
teams: teams.map {|team| team.name } )
|
114
113
|
|
115
|
-
@groups[
|
114
|
+
@groups[ name ] = group
|
116
115
|
end
|
117
116
|
|
118
117
|
|
119
|
-
def
|
118
|
+
def find_group_name!( line )
|
120
119
|
## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
|
121
120
|
## nb: (?:) = is for non-capturing group(ing)
|
122
121
|
|
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
|
|
125
124
|
|
126
125
|
## todo:
|
127
126
|
## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
|
128
|
-
regex =
|
127
|
+
regex = /\b
|
128
|
+
(?:
|
129
|
+
(Group | Gruppe | Grupo)
|
130
|
+
[ ]+
|
131
|
+
(\d+ | [A-Z]+)
|
132
|
+
)
|
133
|
+
\b/x
|
129
134
|
|
130
135
|
m = regex.match( line )
|
131
136
|
|
132
|
-
return
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
when 'G' then 7
|
142
|
-
when 'H' then 8
|
143
|
-
when 'I' then 9
|
144
|
-
when 'J' then 10
|
145
|
-
when 'K' then 11
|
146
|
-
when 'L' then 12
|
147
|
-
when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
|
148
|
-
else m[1].to_i
|
149
|
-
end
|
150
|
-
|
151
|
-
title = m[0]
|
152
|
-
|
153
|
-
logger.debug " title: >#{title}<"
|
154
|
-
logger.debug " pos: >#{pos}<"
|
155
|
-
|
156
|
-
line.sub!( regex, '[GROUP.TITLE+POS]' )
|
157
|
-
|
158
|
-
[title,pos]
|
137
|
+
return nil if m.nil?
|
138
|
+
|
139
|
+
name = m[0]
|
140
|
+
|
141
|
+
logger.debug " name: >#{name}<"
|
142
|
+
|
143
|
+
line.sub!( name, '[GROUP.NAME]' )
|
144
|
+
|
145
|
+
name
|
159
146
|
end
|
160
147
|
|
161
148
|
|
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
|
|
180
167
|
end_date = end_date.to_date
|
181
168
|
|
182
169
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
knockout_flag = is_knockout_round?( title )
|
170
|
+
name = find_round_def_name!( line )
|
171
|
+
# NB: use extracted round name for knockout check
|
172
|
+
knockout_flag = is_knockout_round?( name )
|
187
173
|
|
188
174
|
|
189
175
|
logger.debug " start_date: #{start_date}"
|
190
176
|
logger.debug " end_date: #{end_date}"
|
191
|
-
logger.debug "
|
192
|
-
logger.debug " title: >#{title}<"
|
177
|
+
logger.debug " name: >#{name}<"
|
193
178
|
logger.debug " knockout_flag: #{knockout_flag}"
|
194
179
|
|
195
180
|
logger.debug " line: >#{line}<"
|
196
181
|
|
197
|
-
|
198
|
-
# todo/fix: add auto flag is false !!!! - why? why not?
|
199
|
-
round = Import::Round.new( pos: pos,
|
200
|
-
title: title,
|
182
|
+
round = Import::Round.new( name: name,
|
201
183
|
start_date: start_date,
|
202
184
|
end_date: end_date,
|
203
185
|
knockout: knockout_flag,
|
204
186
|
auto: false )
|
205
187
|
|
206
|
-
@rounds[
|
188
|
+
@rounds[ name ] = round
|
207
189
|
end
|
208
190
|
|
209
191
|
|
210
192
|
|
211
|
-
def
|
212
|
-
#
|
213
|
-
#
|
214
|
-
regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
|
215
|
-
|
216
|
-
# pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
|
217
|
-
# note: /\b(\d{1,3})\b/
|
218
|
-
# will match -12
|
219
|
-
# thus, use space required - will NOT match -2 e.g. Group-2 Play-off
|
220
|
-
# note: allow 1. Runde n
|
221
|
-
# 1^ Giornata
|
222
|
-
regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
|
223
|
-
|
224
|
-
if line =~ regex_pos
|
225
|
-
logger.debug " pos: >#{$1}<"
|
226
|
-
|
227
|
-
line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
|
228
|
-
return $1.to_i
|
229
|
-
elsif line =~ regex_num
|
230
|
-
## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
|
231
|
-
## NB: do NOT remove pos from string (will get removed by round title)
|
232
|
-
|
233
|
-
num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
|
234
|
-
|
235
|
-
#### fix:
|
236
|
-
# use/make keywords required
|
237
|
-
# e.g. Round of 16 -> should NOT match 16!
|
238
|
-
# Spiel um Platz 3 (or 5) etc -> should NOT match 3!
|
239
|
-
# Round 16 - ok
|
240
|
-
# thus, check for required keywords
|
193
|
+
def find_round_def_name!( line )
|
194
|
+
# assume everything before pipe (\) is the round name
|
195
|
+
# strip [ROUND.POS], todo:?? [ROUND.NAME2]
|
241
196
|
|
242
|
-
|
243
|
-
# todo: mask match e.g. Round of xxx ... and try again - might include something
|
244
|
-
# reuse pattern for Group XX Replays for example
|
245
|
-
if line =~ /^\s*Round of \d{1,3}\b/
|
246
|
-
return nil
|
247
|
-
end
|
248
|
-
|
249
|
-
logger.debug " pos: >#{num}<"
|
250
|
-
return num
|
251
|
-
else
|
252
|
-
## fix: add logger.warn no round pos found in line
|
253
|
-
return nil
|
254
|
-
end
|
255
|
-
end # method find_round_pos!
|
256
|
-
|
257
|
-
def find_round_def_title!( line )
|
258
|
-
# assume everything before pipe (\) is the round title
|
259
|
-
# strip [ROUND.POS], todo:?? [ROUND.TITLE2]
|
260
|
-
|
261
|
-
# todo/fix: add title2 w/ // or / why? why not?
|
197
|
+
# todo/fix: add name2 w/ // or / why? why not?
|
262
198
|
# -- strip / or / chars
|
263
199
|
|
264
200
|
buf = line.dup
|
265
|
-
logger.debug "
|
201
|
+
logger.debug " find_round_def_name! line-before: >>#{buf}<<"
|
266
202
|
|
267
203
|
## cut-off everything after (including) pipe (|)
|
268
204
|
buf = buf[ 0...buf.index('|') ]
|
269
|
-
|
270
|
-
# e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
271
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
|
272
|
-
# remove leading and trailing whitespace
|
273
205
|
buf.strip!
|
274
206
|
|
275
|
-
logger.debug "
|
207
|
+
logger.debug " find_round_def_name! line-after: >>#{buf}<<"
|
276
208
|
|
277
|
-
logger.debug "
|
278
|
-
line.sub!( buf, '[ROUND.
|
209
|
+
logger.debug " name: >>#{buf}<<"
|
210
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
279
211
|
|
280
212
|
buf
|
281
213
|
end
|
282
214
|
|
283
|
-
def find_round_header_title!( line )
|
284
|
-
# assume everything left is the round title
|
285
|
-
# extract all other items first (round title2, round pos, group title n pos, etc.)
|
286
215
|
|
287
|
-
##
|
288
|
-
##
|
289
|
-
##
|
290
|
-
##
|
291
|
-
##
|
216
|
+
## split by or || or |||
|
217
|
+
## or ++ or +++
|
218
|
+
## or -- or ---
|
219
|
+
## or // or ///
|
220
|
+
## note: allow Final | First Leg as ONE name same as
|
221
|
+
## Final - First Leg or
|
222
|
+
## Final, First Leg
|
223
|
+
## for cut-off always MUST be more than two chars
|
224
|
+
##
|
225
|
+
## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
|
226
|
+
## todo/fix: move to parser utils and add a method split_name or such?
|
227
|
+
HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
|
228
|
+
(?:\|{2,} |
|
229
|
+
\+{2,} |
|
230
|
+
-{2,} |
|
231
|
+
\/{2,}
|
232
|
+
)
|
233
|
+
[ ]* ## allow (strip) trailing spaces
|
234
|
+
/x
|
235
|
+
|
236
|
+
def find_round_header_name!( line )
|
237
|
+
# assume everything left is the round name
|
238
|
+
# extract all other items first (round name2, round pos, group name n pos, etc.)
|
292
239
|
|
293
240
|
buf = line.dup
|
294
|
-
logger.debug "
|
241
|
+
logger.debug " find_round_header_name! line-before: >>#{buf}<<"
|
242
|
+
|
243
|
+
|
244
|
+
parts = buf.split( HEADER_SEP_RE )
|
245
|
+
buf = parts[0]
|
295
246
|
|
296
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
297
247
|
buf.strip! # remove leading and trailing whitespace
|
298
248
|
|
299
|
-
logger.debug "
|
249
|
+
logger.debug " find_round_name! line-after: >>#{buf}<<"
|
300
250
|
|
301
|
-
### bingo - assume what's left is the round
|
251
|
+
### bingo - assume what's left is the round name
|
302
252
|
|
303
|
-
logger.debug "
|
304
|
-
line.sub!( buf, '[ROUND.
|
253
|
+
logger.debug " name: >>#{buf}<<"
|
254
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
305
255
|
|
306
256
|
buf
|
307
257
|
end
|
308
258
|
|
259
|
+
## quick hack- collect all "fillwords" by language!!!!
|
260
|
+
## change later and add to sportdb-langs!!!!
|
261
|
+
##
|
262
|
+
## strip all "fillwords" e.g.:
|
263
|
+
## Nachtrag/Postponed/Addition/Supplemento names
|
264
|
+
##
|
265
|
+
## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
|
266
|
+
ROUND_EXTRA_WORDS_RE = /\b(?:
|
267
|
+
Nachtrag | ## de
|
268
|
+
Postponed | ## en
|
269
|
+
Addition | ## en
|
270
|
+
Supplemento ## es
|
271
|
+
)
|
272
|
+
\b/ix
|
309
273
|
|
310
274
|
def parse_round_header( line )
|
311
275
|
logger.debug "parsing round header line: >#{line}<"
|
312
276
|
|
313
|
-
|
314
|
-
# make sure Round of 16 will not return pos 16 -- how? possible?
|
315
|
-
# add unit test too to verify
|
316
|
-
pos = find_round_pos!( line )
|
317
|
-
|
318
|
-
title = find_round_header_title!( line )
|
277
|
+
name = find_round_header_name!( line )
|
319
278
|
|
320
279
|
logger.debug " line: >#{line}<"
|
321
280
|
|
281
|
+
name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
|
282
|
+
name = name.strip
|
322
283
|
|
323
|
-
round = @rounds[
|
284
|
+
round = @rounds[ name ]
|
324
285
|
if round.nil? ## auto-add / create if missing
|
325
|
-
|
326
|
-
|
327
|
-
@rounds[
|
286
|
+
## todo/check: add num (was pos) if present - why? why not?
|
287
|
+
round = Import::Round.new( name: name )
|
288
|
+
@rounds[ name ] = round
|
328
289
|
end
|
329
290
|
|
330
291
|
## todo/check: if pos match (MUST always match for now)
|
331
292
|
@last_round = round
|
332
293
|
@last_group = nil # note: reset group to no group - why? why not?
|
333
|
-
|
334
|
-
|
335
|
-
## NB: dummy/placeholder start_at, end_at date
|
336
|
-
## replace/patch after adding all games for round
|
337
|
-
|
338
|
-
=begin
|
339
|
-
round_attribs = {
|
340
|
-
title: title,
|
341
|
-
title2: title2,
|
342
|
-
knockout: knockout_flag
|
343
|
-
}
|
344
|
-
|
345
|
-
if pos > 999000
|
346
|
-
# no pos (e.g. will get autonumbered later) - try match by title for now
|
347
|
-
# e.g. lets us use title 'Group Replays', for example, multiple times
|
348
|
-
@round = Round.find_by_event_id_and_title( @event.id, title )
|
349
|
-
else
|
350
|
-
@round = Round.find_by_event_id_and_pos( @event.id, pos )
|
351
|
-
end
|
352
|
-
|
353
|
-
if @round.present?
|
354
|
-
logger.debug "update round #{@round.id}:"
|
355
|
-
else
|
356
|
-
logger.debug "create round:"
|
357
|
-
@round = Round.new
|
358
|
-
|
359
|
-
round_attribs = round_attribs.merge( {
|
360
|
-
event_id: @event.id,
|
361
|
-
pos: pos,
|
362
|
-
start_at: Date.parse('1911-11-11'),
|
363
|
-
end_at: Date.parse('1911-11-11')
|
364
|
-
})
|
365
|
-
end
|
366
|
-
|
367
|
-
logger.debug round_attribs.to_json
|
368
|
-
|
369
|
-
@round.update_attributes!( round_attribs )
|
370
|
-
|
371
|
-
@patch_round_ids_pos << @round.id if pos > 999000
|
372
|
-
### store list of round ids for patching start_at/end_at at the end
|
373
|
-
@patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
|
374
|
-
=end
|
375
294
|
end
|
376
295
|
|
377
296
|
|
@@ -457,11 +376,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
457
376
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
458
377
|
|
459
378
|
@matches << Import::Match.new( date: date,
|
460
|
-
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.
|
461
|
-
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.
|
379
|
+
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
380
|
+
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
462
381
|
score: score,
|
463
|
-
round: round ? round.
|
464
|
-
group: @last_group ? @last_group.
|
382
|
+
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
383
|
+
group: @last_group ? @last_group.name : nil ) ## note: for now always use string (assume unique canonical name for event)
|
465
384
|
|
466
385
|
### todo: cache team lookups in hash?
|
467
386
|
|
@@ -517,7 +436,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
517
436
|
|
518
437
|
round_attribs = {
|
519
438
|
event_id: @event.id,
|
520
|
-
|
439
|
+
name: "Matchday #{date.to_date}",
|
521
440
|
pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
|
522
441
|
start_at: date.to_date,
|
523
442
|
end_at: date.to_date
|
@@ -541,7 +460,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
541
460
|
end
|
542
461
|
|
543
462
|
## note: will crash (round.pos) if round is nil
|
544
|
-
logger.debug( " using round #{round.pos} >#{round.
|
463
|
+
logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
|
545
464
|
else
|
546
465
|
## use round from last round header
|
547
466
|
round = @round
|