sportdb-formats 1.0.5 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +8 -11
- data/Rakefile +1 -1
- data/lib/sportdb/formats.rb +19 -0
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +141 -0
- data/lib/sportdb/formats/event/event_reader.rb +183 -0
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +27 -7
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +141 -193
- data/lib/sportdb/formats/match/match_parser_csv.rb +169 -25
- data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
- data/lib/sportdb/formats/name_helper.rb +4 -1
- data/lib/sportdb/formats/package.rb +57 -9
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_formats.rb +19 -0
- data/lib/sportdb/formats/score/score_parser.rb +10 -2
- data/lib/sportdb/formats/season_utils.rb +0 -11
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/match.rb +7 -1
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/season.rb +114 -45
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +8 -2
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +138 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +48 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_country_reader.rb +2 -2
- data/test/test_datafile_package.rb +1 -1
- data/test/test_match_status_parser.rb +49 -0
- data/test/test_regex.rb +25 -7
- data/test/test_scores.rb +2 -0
- data/test/test_season.rb +68 -19
- metadata +12 -15
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_worldcup.rb +0 -27
@@ -95,36 +95,40 @@ class LeagueIndex
|
|
95
95
|
end # method add
|
96
96
|
|
97
97
|
|
98
|
+
## helper to always convert (possible) country key to existing country record
|
99
|
+
## todo: make private - why? why not?
|
100
|
+
def country( country )
|
101
|
+
if country.is_a?( String ) || country.is_a?( Symbol )
|
102
|
+
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
103
|
+
rec = catalog.countries.find( country.to_s )
|
104
|
+
if rec.nil?
|
105
|
+
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
106
|
+
exit 1
|
107
|
+
end
|
108
|
+
rec
|
109
|
+
else
|
110
|
+
country ## (re)use country struct - no need to run lookup again
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
|
98
115
|
def match( name )
|
99
|
-
##
|
116
|
+
## note: returns empty array if no match and NOT nil
|
100
117
|
name = normalize( name )
|
101
|
-
@leagues_by_name[ name ]
|
118
|
+
@leagues_by_name[ name ] || []
|
102
119
|
end
|
103
120
|
|
104
|
-
|
105
121
|
def match_by( name:, country: )
|
106
122
|
## note: match must for now always include name
|
107
123
|
m = match( name )
|
108
|
-
if
|
124
|
+
if country ## filter by country
|
109
125
|
## note: country assumes / allows the country key or fifa code for now
|
110
|
-
|
111
126
|
## note: allow passing in of country struct too
|
112
|
-
country_rec =
|
113
|
-
country ## (re)use country struct - no need to run lookup again
|
114
|
-
else
|
115
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
116
|
-
rec = catalog.countries.find( country )
|
117
|
-
if rec.nil?
|
118
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
119
|
-
exit 1
|
120
|
-
end
|
121
|
-
rec
|
122
|
-
end
|
127
|
+
country_rec = country( country )
|
123
128
|
|
124
129
|
## note: also skip international leagues & cups (e.g. champions league etc.) for now - why? why not?
|
125
130
|
m = m.select { |league| league.country &&
|
126
131
|
league.country.key == country_rec.key }
|
127
|
-
m = nil if m.empty? ## note: reset to nil if no more matches
|
128
132
|
end
|
129
133
|
m
|
130
134
|
end
|
@@ -144,7 +148,7 @@ class LeagueIndex
|
|
144
148
|
m = match( name )
|
145
149
|
# pp m
|
146
150
|
|
147
|
-
if m.
|
151
|
+
if m.empty?
|
148
152
|
## fall through/do nothing
|
149
153
|
elsif m.size > 1
|
150
154
|
puts "** !!! ERROR - ambigious league name; too many leagues (#{m.size}) found:"
|
@@ -121,14 +121,34 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
121
121
|
values
|
122
122
|
end
|
123
123
|
|
124
|
-
def check_stage( name )
|
125
|
-
known_stages = ['regular season',
|
126
|
-
'championship round',
|
127
|
-
'relegation round',
|
128
|
-
'play-offs'
|
129
|
-
]
|
130
124
|
|
131
|
-
|
125
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
126
|
+
KNOWN_STAGES = [
|
127
|
+
'Regular Season',
|
128
|
+
'Regular Stage',
|
129
|
+
'Championship Round',
|
130
|
+
'Championship Playoff', # or Championship play-off
|
131
|
+
'Relegation Round',
|
132
|
+
'Relegation Playoff',
|
133
|
+
'Play-offs',
|
134
|
+
'Playoff Stage',
|
135
|
+
'Grunddurchgang',
|
136
|
+
'Finaldurchgang - Qualifikationsgruppe',
|
137
|
+
'Finaldurchgang - Qualifikation',
|
138
|
+
'Finaldurchgang - Meistergruppe',
|
139
|
+
'Finaldurchgang - Meister',
|
140
|
+
'EL Play-off',
|
141
|
+
'Europa League Play-off',
|
142
|
+
'Europa-League-Play-offs',
|
143
|
+
'Playoffs - Championship',
|
144
|
+
'Playoffs - Relegation',
|
145
|
+
'Finals',
|
146
|
+
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
147
|
+
|
148
|
+
|
149
|
+
def check_stage( name )
|
150
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
151
|
+
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
132
152
|
## everything ok
|
133
153
|
else
|
134
154
|
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
@@ -118,12 +118,18 @@ def parse
|
|
118
118
|
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
|
+
|
122
|
+
## auto-add with country prepended
|
123
|
+
## e.g. England Premier League, Austria Bundesliga etc.
|
124
|
+
## todo/check: also add variants with country alt name if present!!!
|
125
|
+
## todo/check: exclude cups or such from country + league name auto-add - why? why not?
|
126
|
+
alt_names_auto << "#{country.name} #{league_name}"
|
121
127
|
else ## assume int'l (no country) e.g. champions league, etc.
|
122
128
|
## only auto-add key (e.g. CL, EL, etc.)
|
123
129
|
alt_names_auto << league_key.upcase.gsub('.', ' ') ## note: no country code (prefix/leading) used
|
124
130
|
end
|
125
131
|
|
126
|
-
pp alt_names_auto
|
132
|
+
## pp alt_names_auto
|
127
133
|
|
128
134
|
## prepend country key/code if country present
|
129
135
|
## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
|
@@ -7,21 +7,21 @@ module SportDb
|
|
7
7
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
8
|
|
9
9
|
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper
|
10
|
+
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
11
|
|
12
12
|
include Logging
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
15
|
|
16
16
|
########
|
17
17
|
## key: e.g. augsburg
|
18
|
-
##
|
19
|
-
## length (of
|
20
|
-
MappingStruct = Struct.new( :key, :
|
18
|
+
## name: e.g. FC Augsburg
|
19
|
+
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
+
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
21
|
|
22
22
|
######
|
23
23
|
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :
|
24
|
+
Record = Struct.new( :key, :name, :alt_names )
|
25
25
|
def build_records( txt_or_lines )
|
26
26
|
recs = []
|
27
27
|
|
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
44
44
|
values = line.split( '|' )
|
45
45
|
values = values.map { |value| value.strip }
|
46
46
|
|
47
|
-
|
47
|
+
name = values[0]
|
48
48
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key
|
50
|
-
|
49
|
+
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
+
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
51
|
|
52
|
-
recs << Record.new( key,
|
52
|
+
recs << Record.new( key, name, alt_names )
|
53
53
|
end
|
54
54
|
recs
|
55
55
|
end
|
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
63
63
|
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
64
|
|
65
65
|
## build mapping lookup table
|
66
|
-
@
|
67
|
-
|
66
|
+
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
+
build_name_table_for_mapping( records_or_mapping )
|
68
68
|
else ## assume array of records
|
69
|
-
|
69
|
+
build_name_table_for_records( records_or_mapping )
|
70
70
|
end
|
71
71
|
|
72
72
|
## build lookup hash by record (e.g. team/club/etc.) key
|
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
85
85
|
|
86
86
|
|
87
87
|
|
88
|
-
def
|
88
|
+
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
89
|
begin
|
90
|
-
found =
|
90
|
+
found = map_name_for!( @tag, line, @known_names )
|
91
91
|
end while found
|
92
92
|
end
|
93
93
|
|
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
110
110
|
|
111
111
|
|
112
112
|
private
|
113
|
-
def
|
114
|
-
|
113
|
+
def build_name_table_for_mapping( mapping )
|
114
|
+
known_names = []
|
115
115
|
|
116
|
-
mapping.each do |
|
116
|
+
mapping.each do |name, rec|
|
117
117
|
m = MappingStruct.new
|
118
118
|
m.key = rec.key
|
119
|
-
m.
|
120
|
-
m.length =
|
121
|
-
m.pattern = Regexp.escape(
|
119
|
+
m.name = name
|
120
|
+
m.length = name.length
|
121
|
+
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
122
|
|
123
|
-
|
123
|
+
known_names << m
|
124
124
|
end
|
125
125
|
|
126
126
|
## note: sort here by length (largest goes first - best match)
|
127
|
-
|
128
|
-
|
127
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
+
known_names
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def build_name_table_for_records( records )
|
132
132
|
|
133
|
-
## build known tracks table w/
|
133
|
+
## build known tracks table w/ alt names e.g.
|
134
134
|
#
|
135
135
|
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
136
|
# [ 'augsburg', 'FC Augsburg'],
|
@@ -138,65 +138,65 @@ private
|
|
138
138
|
# [ 'augsburg', 'Augi3' ],
|
139
139
|
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
140
|
|
141
|
-
|
141
|
+
known_names = []
|
142
142
|
|
143
143
|
records.each_with_index do |rec,index|
|
144
144
|
|
145
|
-
|
146
|
-
|
145
|
+
name_candidates = []
|
146
|
+
name_candidates << rec.name
|
147
147
|
|
148
|
-
|
148
|
+
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
149
|
|
150
150
|
|
151
|
-
## check if
|
152
|
-
# make
|
151
|
+
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
+
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
names = []
|
155
|
+
name_candidates.each do |t|
|
156
|
+
names << t
|
157
157
|
if t =~ /\(.+\)/
|
158
|
-
|
158
|
+
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
159
|
# note: strip leading n trailing withspaces too!
|
160
160
|
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
|
162
|
-
|
161
|
+
extra_name.strip!
|
162
|
+
names << extra_name
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
|
166
|
+
names.each do |name|
|
167
167
|
m = MappingStruct.new
|
168
168
|
m.key = rec.key
|
169
|
-
m.
|
170
|
-
m.length =
|
169
|
+
m.name = name
|
170
|
+
m.length = name.length
|
171
171
|
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern =
|
172
|
+
m.pattern = name_esc_regex( name )
|
173
173
|
|
174
|
-
|
174
|
+
known_names << m
|
175
175
|
end
|
176
176
|
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{
|
177
|
+
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
178
|
|
179
179
|
## note: only include code field - if defined
|
180
180
|
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
181
|
m = MappingStruct.new
|
182
182
|
m.key = rec.key
|
183
|
-
m.
|
183
|
+
m.name = rec.code
|
184
184
|
m.length = rec.code.length
|
185
185
|
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
186
|
|
187
|
-
|
187
|
+
known_names << m
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
191
|
## note: sort here by length (largest goes first - best match)
|
192
192
|
# exclude code and key (key should always go last)
|
193
|
-
|
194
|
-
|
193
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
+
known_names
|
195
195
|
end
|
196
196
|
|
197
197
|
|
198
198
|
|
199
|
-
def
|
199
|
+
def map_name_for!( tag, line, mappings )
|
200
200
|
mappings.each do |mapping|
|
201
201
|
key = mapping.key
|
202
202
|
pattern = mapping.pattern
|
@@ -234,9 +234,9 @@ private
|
|
234
234
|
|
235
235
|
|
236
236
|
####
|
237
|
-
#
|
237
|
+
# name helper cut-n-paste copy from TextUtils
|
238
238
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def
|
239
|
+
def name_esc_regex( name_unescaped )
|
240
240
|
|
241
241
|
## escape regex special chars e.g.
|
242
242
|
# . to \. and
|
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
|
|
257
257
|
# e.g. Club Atlético Colón (Santa Fe)
|
258
258
|
# e.g. Bauer Anton (????)
|
259
259
|
|
260
|
-
##
|
261
|
-
##
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
260
|
+
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
+
## name = Regexp.escape( name_unescaped )
|
262
|
+
name = name_unescaped.gsub( '.', '\.' )
|
263
|
+
name = name.gsub( '(', '\(' )
|
264
|
+
name = name.gsub( ')', '\)' )
|
265
|
+
name = name.gsub( '?', '\?' )
|
266
|
+
name = name.gsub( '*', '\*' )
|
267
|
+
name = name.gsub( '+', '\+' )
|
268
|
+
name = name.gsub( '$', '\$' )
|
269
|
+
name = name.gsub( '^', '\^' )
|
270
270
|
|
271
271
|
## match accented char with or without accents
|
272
272
|
## add (ü|ue) etc.
|
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
|
|
309
309
|
## collect some more (real-world) examples first!!!!!
|
310
310
|
|
311
311
|
alternatives.each do |alt|
|
312
|
-
|
312
|
+
name = name.gsub( alt[0], alt[1] )
|
313
313
|
end
|
314
314
|
|
315
|
-
|
315
|
+
name
|
316
316
|
end
|
317
317
|
|
318
318
|
end # class MapperV2
|
@@ -82,15 +82,14 @@ class MatchParser ## simple match parser for team match schedules
|
|
82
82
|
# team1 team2 - match (will get new auto-matchday! not last round)
|
83
83
|
@last_round = nil
|
84
84
|
|
85
|
-
|
85
|
+
name = find_group_name!( line )
|
86
86
|
|
87
|
-
logger.debug "
|
88
|
-
logger.debug " pos: >#{pos}<"
|
87
|
+
logger.debug " name: >#{name}<"
|
89
88
|
logger.debug " line: >#{line}<"
|
90
89
|
|
91
|
-
group = @groups[
|
90
|
+
group = @groups[ name ]
|
92
91
|
if group.nil?
|
93
|
-
puts "!! ERROR - no group def found for >#{
|
92
|
+
puts "!! ERROR - no group def found for >#{name}<"
|
94
93
|
exit 1
|
95
94
|
end
|
96
95
|
|
@@ -104,19 +103,19 @@ class MatchParser ## simple match parser for team match schedules
|
|
104
103
|
@mapper_teams.map_teams!( line )
|
105
104
|
teams = @mapper_teams.find_teams!( line )
|
106
105
|
|
107
|
-
|
106
|
+
name = find_group_name!( line )
|
108
107
|
|
109
108
|
logger.debug " line: >#{line}<"
|
110
109
|
|
111
|
-
group
|
112
|
-
|
113
|
-
teams: teams.map {|team| team.
|
110
|
+
## todo/check/fix: add back group key - why? why not?
|
111
|
+
group = Import::Group.new( name: name,
|
112
|
+
teams: teams.map {|team| team.name } )
|
114
113
|
|
115
|
-
@groups[
|
114
|
+
@groups[ name ] = group
|
116
115
|
end
|
117
116
|
|
118
117
|
|
119
|
-
def
|
118
|
+
def find_group_name!( line )
|
120
119
|
## group pos - for now support single digit e.g 1,2,3 or letter e.g. A,B,C or HEX
|
121
120
|
## nb: (?:) = is for non-capturing group(ing)
|
122
121
|
|
@@ -125,37 +124,25 @@ class MatchParser ## simple match parser for team match schedules
|
|
125
124
|
|
126
125
|
## todo:
|
127
126
|
## check if Group A: or [Group A] works e.g. : or ] get matched by \b ???
|
128
|
-
regex =
|
127
|
+
regex = /\b
|
128
|
+
(?:
|
129
|
+
(Group | Gruppe | Grupo)
|
130
|
+
[ ]+
|
131
|
+
(\d+ | [A-Z]+)
|
132
|
+
)
|
133
|
+
\b/x
|
129
134
|
|
130
135
|
m = regex.match( line )
|
131
136
|
|
132
|
-
return
|
133
|
-
|
134
|
-
pos = case m[1]
|
135
|
-
when 'A' then 1
|
136
|
-
when 'B' then 2
|
137
|
-
when 'C' then 3
|
138
|
-
when 'D' then 4
|
139
|
-
when 'E' then 5
|
140
|
-
when 'F' then 6
|
141
|
-
when 'G' then 7
|
142
|
-
when 'H' then 8
|
143
|
-
when 'I' then 9
|
144
|
-
when 'J' then 10
|
145
|
-
when 'K' then 11
|
146
|
-
when 'L' then 12
|
147
|
-
when 'HEX' then 666 # HEX for Hexagonal - todo/check: map to something else ??
|
148
|
-
else m[1].to_i
|
149
|
-
end
|
137
|
+
return nil if m.nil?
|
150
138
|
|
151
|
-
|
139
|
+
name = m[0]
|
152
140
|
|
153
|
-
logger.debug "
|
154
|
-
logger.debug " pos: >#{pos}<"
|
141
|
+
logger.debug " name: >#{name}<"
|
155
142
|
|
156
|
-
line.sub!(
|
143
|
+
line.sub!( name, '[GROUP.NAME]' )
|
157
144
|
|
158
|
-
|
145
|
+
name
|
159
146
|
end
|
160
147
|
|
161
148
|
|
@@ -180,198 +167,130 @@ class MatchParser ## simple match parser for team match schedules
|
|
180
167
|
end_date = end_date.to_date
|
181
168
|
|
182
169
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
knockout_flag = is_knockout_round?( title )
|
170
|
+
name = find_round_def_name!( line )
|
171
|
+
# NB: use extracted round name for knockout check
|
172
|
+
knockout_flag = is_knockout_round?( name )
|
187
173
|
|
188
174
|
|
189
175
|
logger.debug " start_date: #{start_date}"
|
190
176
|
logger.debug " end_date: #{end_date}"
|
191
|
-
logger.debug "
|
192
|
-
logger.debug " title: >#{title}<"
|
177
|
+
logger.debug " name: >#{name}<"
|
193
178
|
logger.debug " knockout_flag: #{knockout_flag}"
|
194
179
|
|
195
180
|
logger.debug " line: >#{line}<"
|
196
181
|
|
197
|
-
|
198
|
-
# todo/fix: add auto flag is false !!!! - why? why not?
|
199
|
-
round = Import::Round.new( pos: pos,
|
200
|
-
title: title,
|
182
|
+
round = Import::Round.new( name: name,
|
201
183
|
start_date: start_date,
|
202
184
|
end_date: end_date,
|
203
185
|
knockout: knockout_flag,
|
204
186
|
auto: false )
|
205
187
|
|
206
|
-
@rounds[
|
188
|
+
@rounds[ name ] = round
|
207
189
|
end
|
208
190
|
|
209
191
|
|
210
192
|
|
211
|
-
def
|
212
|
-
#
|
213
|
-
#
|
214
|
-
regex_pos = /^[ \t]*\((\d{1,3})\)[ \t]+/
|
215
|
-
|
216
|
-
# pass #2) find free standing number e.g. Matchday 3 or Round 5 or 3. Spieltag etc.
|
217
|
-
# note: /\b(\d{1,3})\b/
|
218
|
-
# will match -12
|
219
|
-
# thus, use space required - will NOT match -2 e.g. Group-2 Play-off
|
220
|
-
# note: allow 1. Runde n
|
221
|
-
# 1^ Giornata
|
222
|
-
regex_num = /(?:^|\s)(\d{1,3})(?:[.\^\s]|$)/
|
223
|
-
|
224
|
-
if line =~ regex_pos
|
225
|
-
logger.debug " pos: >#{$1}<"
|
226
|
-
|
227
|
-
line.sub!( regex_pos, '[ROUND.POS] ' ) ## NB: add back trailing space that got swallowed w/ regex -> [ \t]+
|
228
|
-
return $1.to_i
|
229
|
-
elsif line =~ regex_num
|
230
|
-
## assume number in title is pos (e.g. Jornada 3, 3 Runde etc.)
|
231
|
-
## NB: do NOT remove pos from string (will get removed by round title)
|
232
|
-
|
233
|
-
num = $1.to_i # note: clone capture; keep a copy (another regex follows; will redefine $1)
|
234
|
-
|
235
|
-
#### fix:
|
236
|
-
# use/make keywords required
|
237
|
-
# e.g. Round of 16 -> should NOT match 16!
|
238
|
-
# Spiel um Platz 3 (or 5) etc -> should NOT match 3!
|
239
|
-
# Round 16 - ok
|
240
|
-
# thus, check for required keywords
|
241
|
-
|
242
|
-
## quick hack for round of 16
|
243
|
-
# todo: mask match e.g. Round of xxx ... and try again - might include something
|
244
|
-
# reuse pattern for Group XX Replays for example
|
245
|
-
if line =~ /^\s*Round of \d{1,3}\b/
|
246
|
-
return nil
|
247
|
-
end
|
248
|
-
|
249
|
-
logger.debug " pos: >#{num}<"
|
250
|
-
return num
|
251
|
-
else
|
252
|
-
## fix: add logger.warn no round pos found in line
|
253
|
-
return nil
|
254
|
-
end
|
255
|
-
end # method find_round_pos!
|
256
|
-
|
257
|
-
def find_round_def_title!( line )
|
258
|
-
# assume everything before pipe (\) is the round title
|
259
|
-
# strip [ROUND.POS], todo:?? [ROUND.TITLE2]
|
193
|
+
def find_round_def_name!( line )
|
194
|
+
# assume everything before pipe (\) is the round name
|
195
|
+
# strip [ROUND.POS], todo:?? [ROUND.NAME2]
|
260
196
|
|
261
|
-
# todo/fix: add
|
197
|
+
# todo/fix: add name2 w/ // or / why? why not?
|
262
198
|
# -- strip / or / chars
|
263
199
|
|
264
200
|
buf = line.dup
|
265
|
-
logger.debug "
|
201
|
+
logger.debug " find_round_def_name! line-before: >>#{buf}<<"
|
266
202
|
|
267
203
|
## cut-off everything after (including) pipe (|)
|
268
204
|
buf = buf[ 0...buf.index('|') ]
|
269
|
-
|
270
|
-
# e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
271
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) ## fix: use helper for (re)use e.g. remove_match_placeholder/marker or similar?
|
272
|
-
# remove leading and trailing whitespace
|
273
205
|
buf.strip!
|
274
206
|
|
275
|
-
logger.debug "
|
207
|
+
logger.debug " find_round_def_name! line-after: >>#{buf}<<"
|
276
208
|
|
277
|
-
logger.debug "
|
278
|
-
line.sub!( buf, '[ROUND.
|
209
|
+
logger.debug " name: >>#{buf}<<"
|
210
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
279
211
|
|
280
212
|
buf
|
281
213
|
end
|
282
214
|
|
283
|
-
def find_round_header_title!( line )
|
284
|
-
# assume everything left is the round title
|
285
|
-
# extract all other items first (round title2, round pos, group title n pos, etc.)
|
286
215
|
|
287
|
-
##
|
288
|
-
##
|
289
|
-
##
|
290
|
-
##
|
291
|
-
##
|
216
|
+
## split by or || or |||
|
217
|
+
## or ++ or +++
|
218
|
+
## or -- or ---
|
219
|
+
## or // or ///
|
220
|
+
## note: allow Final | First Leg as ONE name same as
|
221
|
+
## Final - First Leg or
|
222
|
+
## Final, First Leg
|
223
|
+
## for cut-off always MUST be more than two chars
|
224
|
+
##
|
225
|
+
## todo/check: find a better name than HEADER_SEP(ARATOR) - why? why not?
|
226
|
+
## todo/fix: move to parser utils and add a method split_name or such?
|
227
|
+
HEADER_SEP_RE = / [ ]* ## allow (strip) leading spaces
|
228
|
+
(?:\|{2,} |
|
229
|
+
\+{2,} |
|
230
|
+
-{2,} |
|
231
|
+
\/{2,}
|
232
|
+
)
|
233
|
+
[ ]* ## allow (strip) trailing spaces
|
234
|
+
/x
|
235
|
+
|
236
|
+
def find_round_header_name!( line )
|
237
|
+
# assume everything left is the round name
|
238
|
+
# extract all other items first (round name2, round pos, group name n pos, etc.)
|
292
239
|
|
293
240
|
buf = line.dup
|
294
|
-
logger.debug "
|
241
|
+
logger.debug " find_round_header_name! line-before: >>#{buf}<<"
|
242
|
+
|
243
|
+
|
244
|
+
parts = buf.split( HEADER_SEP_RE )
|
245
|
+
buf = parts[0]
|
295
246
|
|
296
|
-
buf.gsub!( /\[[^\]]+\]/, '' ) # e.g. remove [ROUND.POS], [ROUND.TITLE2], [GROUP.TITLE+POS] etc.
|
297
247
|
buf.strip! # remove leading and trailing whitespace
|
298
248
|
|
299
|
-
logger.debug "
|
249
|
+
logger.debug " find_round_name! line-after: >>#{buf}<<"
|
300
250
|
|
301
|
-
### bingo - assume what's left is the round
|
251
|
+
### bingo - assume what's left is the round name
|
302
252
|
|
303
|
-
logger.debug "
|
304
|
-
line.sub!( buf, '[ROUND.
|
253
|
+
logger.debug " name: >>#{buf}<<"
|
254
|
+
line.sub!( buf, '[ROUND.NAME]' )
|
305
255
|
|
306
256
|
buf
|
307
257
|
end
|
308
258
|
|
259
|
+
## quick hack- collect all "fillwords" by language!!!!
|
260
|
+
## change later and add to sportdb-langs!!!!
|
261
|
+
##
|
262
|
+
## strip all "fillwords" e.g.:
|
263
|
+
## Nachtrag/Postponed/Addition/Supplemento names
|
264
|
+
##
|
265
|
+
## todo/change: find a better name for ROUND_EXTRA_WORDS - why? why not?
|
266
|
+
ROUND_EXTRA_WORDS_RE = /\b(?:
|
267
|
+
Nachtrag | ## de
|
268
|
+
Postponed | ## en
|
269
|
+
Addition | ## en
|
270
|
+
Supplemento ## es
|
271
|
+
)
|
272
|
+
\b/ix
|
309
273
|
|
310
274
|
def parse_round_header( line )
|
311
275
|
logger.debug "parsing round header line: >#{line}<"
|
312
276
|
|
313
|
-
|
314
|
-
# make sure Round of 16 will not return pos 16 -- how? possible?
|
315
|
-
# add unit test too to verify
|
316
|
-
pos = find_round_pos!( line )
|
317
|
-
|
318
|
-
title = find_round_header_title!( line )
|
277
|
+
name = find_round_header_name!( line )
|
319
278
|
|
320
279
|
logger.debug " line: >#{line}<"
|
321
280
|
|
281
|
+
name = name.sub( ROUND_EXTRA_WORDS_RE, '' )
|
282
|
+
name = name.strip
|
322
283
|
|
323
|
-
round = @rounds[
|
284
|
+
round = @rounds[ name ]
|
324
285
|
if round.nil? ## auto-add / create if missing
|
325
|
-
|
326
|
-
|
327
|
-
@rounds[
|
286
|
+
## todo/check: add num (was pos) if present - why? why not?
|
287
|
+
round = Import::Round.new( name: name )
|
288
|
+
@rounds[ name ] = round
|
328
289
|
end
|
329
290
|
|
330
291
|
## todo/check: if pos match (MUST always match for now)
|
331
292
|
@last_round = round
|
332
293
|
@last_group = nil # note: reset group to no group - why? why not?
|
333
|
-
|
334
|
-
|
335
|
-
## NB: dummy/placeholder start_at, end_at date
|
336
|
-
## replace/patch after adding all games for round
|
337
|
-
|
338
|
-
=begin
|
339
|
-
round_attribs = {
|
340
|
-
title: title,
|
341
|
-
title2: title2,
|
342
|
-
knockout: knockout_flag
|
343
|
-
}
|
344
|
-
|
345
|
-
if pos > 999000
|
346
|
-
# no pos (e.g. will get autonumbered later) - try match by title for now
|
347
|
-
# e.g. lets us use title 'Group Replays', for example, multiple times
|
348
|
-
@round = Round.find_by_event_id_and_title( @event.id, title )
|
349
|
-
else
|
350
|
-
@round = Round.find_by_event_id_and_pos( @event.id, pos )
|
351
|
-
end
|
352
|
-
|
353
|
-
if @round.present?
|
354
|
-
logger.debug "update round #{@round.id}:"
|
355
|
-
else
|
356
|
-
logger.debug "create round:"
|
357
|
-
@round = Round.new
|
358
|
-
|
359
|
-
round_attribs = round_attribs.merge( {
|
360
|
-
event_id: @event.id,
|
361
|
-
pos: pos,
|
362
|
-
start_at: Date.parse('1911-11-11'),
|
363
|
-
end_at: Date.parse('1911-11-11')
|
364
|
-
})
|
365
|
-
end
|
366
|
-
|
367
|
-
logger.debug round_attribs.to_json
|
368
|
-
|
369
|
-
@round.update_attributes!( round_attribs )
|
370
|
-
|
371
|
-
@patch_round_ids_pos << @round.id if pos > 999000
|
372
|
-
### store list of round ids for patching start_at/end_at at the end
|
373
|
-
@patch_round_ids_dates << @round.id # todo/fix/check: check if round has definition (do NOT patch if definition (not auto-added) present)
|
374
|
-
=end
|
375
294
|
end
|
376
295
|
|
377
296
|
|
@@ -383,6 +302,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
383
302
|
ScoreFormats.find!( line )
|
384
303
|
end
|
385
304
|
|
305
|
+
def find_status!( line )
|
306
|
+
StatusParser.find!( line )
|
307
|
+
end
|
308
|
+
|
309
|
+
|
386
310
|
def try_parse_game( line )
|
387
311
|
# note: clone line; for possible test do NOT modify in place for now
|
388
312
|
# note: returns true if parsed, false if no match
|
@@ -410,6 +334,10 @@ class MatchParser ## simple match parser for team match schedules
|
|
410
334
|
return false
|
411
335
|
end
|
412
336
|
|
337
|
+
## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
|
338
|
+
## or [cancelled] or [postponed] etc.
|
339
|
+
status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
|
340
|
+
|
413
341
|
## pos = find_game_pos!( line )
|
414
342
|
|
415
343
|
date = find_date!( line, start: @start )
|
@@ -434,20 +362,23 @@ class MatchParser ## simple match parser for team match schedules
|
|
434
362
|
if @last_round
|
435
363
|
round = @last_round
|
436
364
|
else
|
437
|
-
## find (first) matching round by date
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
365
|
+
## find (first) matching round by date if rounds / matchdays defined
|
366
|
+
## if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
|
367
|
+
if @rounds.size > 0
|
368
|
+
@rounds.values.each do |round_rec|
|
369
|
+
## note: convert date to date only (no time) with to_date!!!
|
370
|
+
if (round_rec.start_date && round_rec.end_date) &&
|
371
|
+
(date.to_date >= round_rec.start_date &&
|
372
|
+
date.to_date <= round_rec.end_date)
|
373
|
+
round = round_rec
|
374
|
+
break
|
375
|
+
end
|
376
|
+
end
|
377
|
+
if round.nil?
|
378
|
+
puts "!! ERROR - no matching round found for match date:"
|
379
|
+
pp date
|
380
|
+
exit 1
|
445
381
|
end
|
446
|
-
end
|
447
|
-
if round.nil?
|
448
|
-
puts "!! ERROR - no matching round found for match date:"
|
449
|
-
pp date
|
450
|
-
exit 1
|
451
382
|
end
|
452
383
|
end
|
453
384
|
|
@@ -457,12 +388,12 @@ class MatchParser ## simple match parser for team match schedules
|
|
457
388
|
## todo/check: pass along round and group refs or just string (canonical names) - why? why not?
|
458
389
|
|
459
390
|
@matches << Import::Match.new( date: date,
|
460
|
-
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.
|
461
|
-
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.
|
391
|
+
team1: team1, ## note: for now always use mapping value e.g. rec (NOT string e.g. team1.name)
|
392
|
+
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
462
393
|
score: score,
|
463
|
-
round: round ? round.
|
464
|
-
group: @last_group ? @last_group.
|
465
|
-
|
394
|
+
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
395
|
+
group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
396
|
+
status: status )
|
466
397
|
### todo: cache team lookups in hash?
|
467
398
|
|
468
399
|
=begin
|
@@ -517,7 +448,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
517
448
|
|
518
449
|
round_attribs = {
|
519
450
|
event_id: @event.id,
|
520
|
-
|
451
|
+
name: "Matchday #{date.to_date}",
|
521
452
|
pos: 999001+@patch_round_ids_pos.length, # e.g. 999<count> - 999001,999002,etc.
|
522
453
|
start_at: date.to_date,
|
523
454
|
end_at: date.to_date
|
@@ -541,7 +472,7 @@ class MatchParser ## simple match parser for team match schedules
|
|
541
472
|
end
|
542
473
|
|
543
474
|
## note: will crash (round.pos) if round is nil
|
544
|
-
logger.debug( " using round #{round.pos} >#{round.
|
475
|
+
logger.debug( " using round #{round.pos} >#{round.name}< start_at: #{round.start_at}, end_at: #{round.end_at}" )
|
545
476
|
else
|
546
477
|
## use round from last round header
|
547
478
|
round = @round
|
@@ -644,12 +575,29 @@ class MatchParser ## simple match parser for team match schedules
|
|
644
575
|
|
645
576
|
if date && team1.nil? && team2.nil?
|
646
577
|
logger.debug( "date header line found: >#{line}<")
|
647
|
-
logger.debug( " date: #{date}")
|
578
|
+
logger.debug( " date: #{date} with start: #{@start}")
|
648
579
|
|
649
580
|
@last_date = date # keep a reference for later use
|
650
|
-
|
581
|
+
|
582
|
+
### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
|
583
|
+
## find a better way??
|
584
|
+
## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
|
585
|
+
## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
|
586
|
+
### todo/check: just turn on for 2019/20 season or always? why? why not?
|
587
|
+
|
588
|
+
## todo/fix: add switch back to old @start_org
|
589
|
+
## if year is date.year == @start.year-1 -- possible when full date with year set!!!
|
590
|
+
if @start.month != 1
|
591
|
+
if date.year == @start.year+1
|
592
|
+
logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
|
593
|
+
@start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
|
594
|
+
@start = Date.new( @start.year+1, 1, 1 )
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
true
|
651
599
|
else
|
652
|
-
|
600
|
+
false
|
653
601
|
end
|
654
602
|
end
|
655
603
|
|