sportdb-formats 1.0.4 → 1.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +4 -0
- data/lib/sportdb/formats.rb +14 -0
- data/lib/sportdb/formats/country/country_reader.rb +1 -1
- data/lib/sportdb/formats/event/event_index.rb +143 -0
- data/lib/sportdb/formats/event/event_reader.rb +183 -0
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +119 -183
- data/lib/sportdb/formats/match/match_parser_csv.rb +23 -6
- data/lib/sportdb/formats/package.rb +27 -1
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/season_utils.rb +0 -11
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/match.rb +5 -1
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/season.rb +114 -45
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +1 -0
- data/test/test_country_reader.rb +2 -2
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_match_start_date.rb +44 -0
- data/test/test_regex.rb +25 -7
- data/test/test_season.rb +68 -19
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89c3ae173bed0c7a68f30a897e4cf34da8d72d6e
|
4
|
+
data.tar.gz: f3898e8b03177b62075cced2a90be2502555bf63
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c938af88f6dd86f1736532c9d8271a3e56f1769b955a31ce6d84a0b1fcf5a6eec7cfa42e0c019bd7072351d777a1988d6637c4297bee0dad9592bb8ec7450e8
|
7
|
+
data.tar.gz: a80df68f64e13eeb9a85931e2d2dd4381f204f94a4ea9254c89b4b28346db4c1542321aed4fd123aef5ddedc7e4cd2c08731dd340326fae5dc94b57fa6797835
|
data/Manifest.txt
CHANGED
@@ -8,6 +8,8 @@ lib/sportdb/formats/country/country_index.rb
|
|
8
8
|
lib/sportdb/formats/country/country_reader.rb
|
9
9
|
lib/sportdb/formats/datafile.rb
|
10
10
|
lib/sportdb/formats/datafile_package.rb
|
11
|
+
lib/sportdb/formats/event/event_index.rb
|
12
|
+
lib/sportdb/formats/event/event_reader.rb
|
11
13
|
lib/sportdb/formats/goals.rb
|
12
14
|
lib/sportdb/formats/league/league_index.rb
|
13
15
|
lib/sportdb/formats/league/league_outline_reader.rb
|
@@ -63,10 +65,12 @@ test/test_match.rb
|
|
63
65
|
test/test_match_auto.rb
|
64
66
|
test/test_match_auto_champs.rb
|
65
67
|
test/test_match_auto_euro.rb
|
68
|
+
test/test_match_auto_relegation.rb
|
66
69
|
test/test_match_auto_worldcup.rb
|
67
70
|
test/test_match_champs.rb
|
68
71
|
test/test_match_eng.rb
|
69
72
|
test/test_match_euro.rb
|
73
|
+
test/test_match_start_date.rb
|
70
74
|
test/test_match_worldcup.rb
|
71
75
|
test/test_name_helper.rb
|
72
76
|
test/test_outline_reader.rb
|
data/lib/sportdb/formats.rb
CHANGED
@@ -136,6 +136,20 @@ end # module Import
|
|
136
136
|
end # module SportDb
|
137
137
|
|
138
138
|
|
139
|
+
require 'sportdb/formats/event/event_reader'
|
140
|
+
require 'sportdb/formats/event/event_index'
|
141
|
+
|
142
|
+
## add convenience helper
|
143
|
+
module SportDb
|
144
|
+
module Import
|
145
|
+
class EventInfo
|
146
|
+
def self.read( path ) EventInfoReader.read( path ); end
|
147
|
+
def self.parse( txt ) EventInfoReader.parse( txt ); end
|
148
|
+
end # class EventInfo
|
149
|
+
end # module Import
|
150
|
+
end # module SportDb
|
151
|
+
|
152
|
+
|
139
153
|
|
140
154
|
|
141
155
|
|
@@ -57,7 +57,7 @@ def parse
|
|
57
57
|
|
58
58
|
last_country = country = Country.new( name: "#{name} (-#{year})",
|
59
59
|
code: code )
|
60
|
-
country.alt_names << name ## note: do NOT
|
60
|
+
## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
|
61
61
|
|
62
62
|
countries << country
|
63
63
|
## todo/fix: add reference to country today (in parts[1] !!!!)
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module SportDb
|
2
|
+
module Import
|
3
|
+
|
4
|
+
|
5
|
+
class EventIndex
|
6
|
+
|
7
|
+
def self.build( path )
|
8
|
+
datafiles = Package.find_seasons( path )
|
9
|
+
|
10
|
+
puts
|
11
|
+
puts "#{datafiles.size} seasons datafile(s):"
|
12
|
+
pp datafiles
|
13
|
+
|
14
|
+
index = new
|
15
|
+
datafiles.each do |datafile|
|
16
|
+
recs = EventInfoReader.read( datafile )
|
17
|
+
# pp recs
|
18
|
+
|
19
|
+
index.add( recs )
|
20
|
+
end
|
21
|
+
|
22
|
+
index
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
attr_reader :events
|
27
|
+
def initialize
|
28
|
+
@events = []
|
29
|
+
@leagues = {}
|
30
|
+
end
|
31
|
+
|
32
|
+
def add( recs )
|
33
|
+
@events += recs ## add to "linear" records
|
34
|
+
|
35
|
+
recs.each do |rec|
|
36
|
+
league = rec.league
|
37
|
+
season = rec.season
|
38
|
+
|
39
|
+
seasons = @leagues[ league.key ] ||= {}
|
40
|
+
seasons[season.key] = rec
|
41
|
+
end
|
42
|
+
## build search index by leagues (and season)
|
43
|
+
end
|
44
|
+
|
45
|
+
def find_by( league:, season: )
|
46
|
+
league_key = league.is_a?( String ) ? league : league.key
|
47
|
+
season_key = season.is_a?( String ) ? season : season.key
|
48
|
+
|
49
|
+
seasons = @leagues[ league_key ]
|
50
|
+
if seasons
|
51
|
+
seasons[ season_key ]
|
52
|
+
else
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end # method find_by
|
56
|
+
end ## class EventIndex
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
class SeasonIndex
|
61
|
+
def initialize( *args )
|
62
|
+
@leagues = {} ## use a league hash by years for now; change later
|
63
|
+
|
64
|
+
if args.size == 1 && args[0].is_a?( EventIndex )
|
65
|
+
## convenience setup/hookup
|
66
|
+
## (auto-)add all events from event index
|
67
|
+
add( args[0].events )
|
68
|
+
else
|
69
|
+
pp args
|
70
|
+
raise ArgumentError.new( 'unsupported arguments' )
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def add( recs )
|
75
|
+
## use a lookup index by year for now
|
76
|
+
## todo - find something better/more generic for searching/matching date periods!!!
|
77
|
+
recs.each do |rec|
|
78
|
+
league = rec.league
|
79
|
+
season = rec.season
|
80
|
+
|
81
|
+
years = @leagues[ league.key ] ||= {}
|
82
|
+
if season.year?
|
83
|
+
years[season.start_year] ||= []
|
84
|
+
years[season.start_year] << rec
|
85
|
+
else
|
86
|
+
years[season.start_year] ||= []
|
87
|
+
years[season.end_year] ||= []
|
88
|
+
years[season.start_year] << rec
|
89
|
+
years[season.end_year] << rec
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end # method add
|
93
|
+
|
94
|
+
def find_by( date:, league: )
|
95
|
+
date = Date.strptime( date, '%Y-%m-%d' ) if date.is_a?( String )
|
96
|
+
league_key = league.is_a?( String ) ? league : league.key
|
97
|
+
|
98
|
+
years = @leagues[ league_key ]
|
99
|
+
if years
|
100
|
+
year = years[ date.year ]
|
101
|
+
if year
|
102
|
+
season_key = nil
|
103
|
+
year.each do |event|
|
104
|
+
## todo/check: rename/use between? instead of include? - why? why not?
|
105
|
+
if event.include?( date )
|
106
|
+
season_key = event.season.key
|
107
|
+
break
|
108
|
+
end
|
109
|
+
end
|
110
|
+
if season_key.nil?
|
111
|
+
puts "!! WARN: date >#{date}< out-of-seasons for year #{date.year} in league #{league_key}:"
|
112
|
+
year.each do |event|
|
113
|
+
puts " #{event.season.key} | #{event.start_date} - #{event.end_date}"
|
114
|
+
end
|
115
|
+
## retry again and pick season with "overflow" at the end (date is great end_date)
|
116
|
+
year.each do |event|
|
117
|
+
if date > event.end_date
|
118
|
+
diff_in_days = date.to_date.jd - event.end_date.to_date.jd
|
119
|
+
puts " +#{diff_in_days} days - adding overflow to #{event.season.key} ending on #{event.end_date} ++ #{date}"
|
120
|
+
season_key = event.season.key
|
121
|
+
break
|
122
|
+
end
|
123
|
+
end
|
124
|
+
## exit now for sure - if still empty!!!!
|
125
|
+
if season_key.nil?
|
126
|
+
puts "!! ERROR: CANNOT auto-fix / (auto-)append date at the end of an event; check season setup - sorry"
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
season_key
|
131
|
+
else
|
132
|
+
nil ## no year defined / found for league
|
133
|
+
end
|
134
|
+
else
|
135
|
+
nil ## no league defined / found
|
136
|
+
end
|
137
|
+
end # method find
|
138
|
+
|
139
|
+
end # class SeasonIndex
|
140
|
+
|
141
|
+
|
142
|
+
end # module Import
|
143
|
+
end # module SportDb
|
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class EventInfo
|
7
|
+
## "high level" info (summary) about event (like a "wikipedia infobox")
|
8
|
+
## use for checking dataset imports; lets you check e.g.
|
9
|
+
## - dates within range
|
10
|
+
## - number of teams e.g. 20
|
11
|
+
## - matches played e.g. 380
|
12
|
+
## - goals scored e.g. 937
|
13
|
+
## etc.
|
14
|
+
|
15
|
+
attr_reader :league,
|
16
|
+
:season,
|
17
|
+
:teams,
|
18
|
+
:matches,
|
19
|
+
:goals,
|
20
|
+
:start_date,
|
21
|
+
:end_date
|
22
|
+
|
23
|
+
def initialize( league:, season:,
|
24
|
+
start_date: nil, end_date: nil,
|
25
|
+
teams: nil,
|
26
|
+
matches: nil,
|
27
|
+
goals: nil )
|
28
|
+
|
29
|
+
@league = league
|
30
|
+
@season = season
|
31
|
+
|
32
|
+
@start_date = start_date
|
33
|
+
@end_date = end_date
|
34
|
+
|
35
|
+
@teams = teams ## todo/check: rename/use teams_count ??
|
36
|
+
@matches = matches ## todo/check: rename/use match_count ??
|
37
|
+
@goals = goals
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?( date )
|
41
|
+
## todo/fix: add options e.g.
|
42
|
+
## - add delta/off_by_one or such?
|
43
|
+
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
44
|
+
|
45
|
+
### note: for now allow off by one error (via timezone/local time errors)
|
46
|
+
## todo/fix: issue warning if off by one!!!!
|
47
|
+
if @start_date && @end_date
|
48
|
+
date >= (@start_date-1) &&
|
49
|
+
date <= (@end_date+1)
|
50
|
+
else
|
51
|
+
if @season.year?
|
52
|
+
# assume generic rule
|
53
|
+
## same year e.g. Jan 1 - Dec 31; always true for now
|
54
|
+
date.year == @season.start_year
|
55
|
+
else
|
56
|
+
# assume generic rule
|
57
|
+
## July 1 - June 30 (Y+1)
|
58
|
+
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
59
|
+
date >= Date.new( @season.start_year, 7, 1 ) &&
|
60
|
+
date <= Date.new( @season.end_year, 6, 30 )
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end # method include?
|
64
|
+
alias_method :between?, :include?
|
65
|
+
end # class EventInfo
|
66
|
+
|
67
|
+
|
68
|
+
class EventInfoReader
|
69
|
+
def catalog() Import.catalog; end
|
70
|
+
|
71
|
+
|
72
|
+
def self.read( path )
|
73
|
+
txt = File.open( path, 'r:utf-8') {|f| f.read }
|
74
|
+
new( txt ).parse
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.parse( txt )
|
78
|
+
new( txt ).parse
|
79
|
+
end
|
80
|
+
|
81
|
+
def initialize( txt )
|
82
|
+
@txt = txt
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
recs = []
|
87
|
+
|
88
|
+
parse_csv( @txt ).each do |row|
|
89
|
+
league_col = row['League']
|
90
|
+
season_col = row['Season'] || row['Year']
|
91
|
+
dates_col = row['Dates']
|
92
|
+
|
93
|
+
season = Import::Season.new( season_col )
|
94
|
+
league = catalog.leagues.find!( league_col )
|
95
|
+
|
96
|
+
|
97
|
+
dates = []
|
98
|
+
if dates_col.nil? || dates_col.empty?
|
99
|
+
## do nothing; no dates - keep dates array empty
|
100
|
+
else
|
101
|
+
## squish spaces
|
102
|
+
dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
|
103
|
+
|
104
|
+
puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
|
105
|
+
|
106
|
+
### todo/check: check what parts "Aug 15" return ???
|
107
|
+
### short form for "Aug 15 -" - works?
|
108
|
+
|
109
|
+
## todo/fix!!! - check EventInfo.include?
|
110
|
+
## now allow dates with only start_date too!! (WITHOUT end_date)
|
111
|
+
parts = dates_col.split( /[ ]*[–-][ ]*/ )
|
112
|
+
if parts.size == 1
|
113
|
+
pp parts
|
114
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
115
|
+
pp dates
|
116
|
+
elsif parts.size == 2
|
117
|
+
pp parts
|
118
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
119
|
+
dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
|
120
|
+
pp dates
|
121
|
+
|
122
|
+
## assert/check if period is less than 365 days for now
|
123
|
+
diff = dates[1].to_date.jd - dates[0].to_date.jd
|
124
|
+
puts "#{diff}d"
|
125
|
+
if diff > 365
|
126
|
+
puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
else
|
130
|
+
puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
|
131
|
+
pp dates_col
|
132
|
+
pp parts
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
teams_col = row['Clubs'] || row['Teams']
|
139
|
+
goals_col = row['Goals']
|
140
|
+
|
141
|
+
## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
|
142
|
+
teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
|
143
|
+
goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
|
144
|
+
|
145
|
+
teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
|
146
|
+
goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
|
147
|
+
|
148
|
+
matches_col = row['Matches']
|
149
|
+
## note: support additions in matches (played) e.g.
|
150
|
+
# 132 + 63 Play-off-Spiele
|
151
|
+
matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
|
152
|
+
|
153
|
+
matches = if matches_col.nil? || matches_col.empty?
|
154
|
+
nil
|
155
|
+
else
|
156
|
+
if matches_col.index( '+' ) ### check for calculations
|
157
|
+
## note: for now only supports additions
|
158
|
+
matches_col.split( '+' ).reduce( 0 ) do |sum,str|
|
159
|
+
sum + str.to_i
|
160
|
+
end
|
161
|
+
else ## assume single (integer) number
|
162
|
+
matches_col.to_i
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
rec = EventInfo.new( league: league,
|
167
|
+
season: season,
|
168
|
+
start_date: dates[0],
|
169
|
+
end_date: dates[1],
|
170
|
+
teams: teams,
|
171
|
+
matches: matches,
|
172
|
+
goals: goals
|
173
|
+
)
|
174
|
+
recs << rec
|
175
|
+
end # each row
|
176
|
+
recs
|
177
|
+
end # method parse
|
178
|
+
end # class EventInfoReader
|
179
|
+
|
180
|
+
|
181
|
+
end ## module Import
|
182
|
+
end ## module SportDb
|
183
|
+
|
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
121
121
|
values
|
122
122
|
end
|
123
123
|
|
124
|
-
def check_stage( name )
|
125
|
-
known_stages = ['regular season',
|
126
|
-
'championship round',
|
127
|
-
'relegation round',
|
128
|
-
'play-offs'
|
129
|
-
]
|
130
124
|
|
131
|
-
|
125
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
126
|
+
KNOWN_STAGES = [
|
127
|
+
'Regular Season',
|
128
|
+
'Regular Stage',
|
129
|
+
'Championship Round',
|
130
|
+
'Championship Playoff',
|
131
|
+
'Relegation Round',
|
132
|
+
'Relegation Playoff',
|
133
|
+
'Play-offs',
|
134
|
+
'Playoff Stage',
|
135
|
+
'Grunddurchgang',
|
136
|
+
'Finaldurchgang - Qualifikationsgruppe',
|
137
|
+
'Finaldurchgang - Qualifikation',
|
138
|
+
'Finaldurchgang - Meistergruppe',
|
139
|
+
'Finaldurchgang - Meister',
|
140
|
+
'EL Play-off',
|
141
|
+
'Europa League Play-off',
|
142
|
+
'Europa-League-Play-offs',
|
143
|
+
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
|
+
|
145
|
+
|
146
|
+
def check_stage( name )
|
147
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
148
|
+
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
132
149
|
## everything ok
|
133
150
|
else
|
134
151
|
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
@@ -7,21 +7,21 @@ module SportDb
|
|
7
7
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
8
|
|
9
9
|
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper
|
10
|
+
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
11
|
|
12
12
|
include Logging
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
15
|
|
16
16
|
########
|
17
17
|
## key: e.g. augsburg
|
18
|
-
##
|
19
|
-
## length (of
|
20
|
-
MappingStruct = Struct.new( :key, :
|
18
|
+
## name: e.g. FC Augsburg
|
19
|
+
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
+
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
21
|
|
22
22
|
######
|
23
23
|
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :
|
24
|
+
Record = Struct.new( :key, :name, :alt_names )
|
25
25
|
def build_records( txt_or_lines )
|
26
26
|
recs = []
|
27
27
|
|
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
44
44
|
values = line.split( '|' )
|
45
45
|
values = values.map { |value| value.strip }
|
46
46
|
|
47
|
-
|
47
|
+
name = values[0]
|
48
48
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key
|
50
|
-
|
49
|
+
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
+
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
51
|
|
52
|
-
recs << Record.new( key,
|
52
|
+
recs << Record.new( key, name, alt_names )
|
53
53
|
end
|
54
54
|
recs
|
55
55
|
end
|
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
63
63
|
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
64
|
|
65
65
|
## build mapping lookup table
|
66
|
-
@
|
67
|
-
|
66
|
+
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
+
build_name_table_for_mapping( records_or_mapping )
|
68
68
|
else ## assume array of records
|
69
|
-
|
69
|
+
build_name_table_for_records( records_or_mapping )
|
70
70
|
end
|
71
71
|
|
72
72
|
## build lookup hash by record (e.g. team/club/etc.) key
|
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
85
85
|
|
86
86
|
|
87
87
|
|
88
|
-
def
|
88
|
+
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
89
|
begin
|
90
|
-
found =
|
90
|
+
found = map_name_for!( @tag, line, @known_names )
|
91
91
|
end while found
|
92
92
|
end
|
93
93
|
|
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
110
110
|
|
111
111
|
|
112
112
|
private
|
113
|
-
def
|
114
|
-
|
113
|
+
def build_name_table_for_mapping( mapping )
|
114
|
+
known_names = []
|
115
115
|
|
116
|
-
mapping.each do |
|
116
|
+
mapping.each do |name, rec|
|
117
117
|
m = MappingStruct.new
|
118
118
|
m.key = rec.key
|
119
|
-
m.
|
120
|
-
m.length =
|
121
|
-
m.pattern = Regexp.escape(
|
119
|
+
m.name = name
|
120
|
+
m.length = name.length
|
121
|
+
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
122
|
|
123
|
-
|
123
|
+
known_names << m
|
124
124
|
end
|
125
125
|
|
126
126
|
## note: sort here by length (largest goes first - best match)
|
127
|
-
|
128
|
-
|
127
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
+
known_names
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def build_name_table_for_records( records )
|
132
132
|
|
133
|
-
## build known tracks table w/
|
133
|
+
## build known tracks table w/ alt names e.g.
|
134
134
|
#
|
135
135
|
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
136
|
# [ 'augsburg', 'FC Augsburg'],
|
@@ -138,65 +138,65 @@ private
|
|
138
138
|
# [ 'augsburg', 'Augi3' ],
|
139
139
|
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
140
|
|
141
|
-
|
141
|
+
known_names = []
|
142
142
|
|
143
143
|
records.each_with_index do |rec,index|
|
144
144
|
|
145
|
-
|
146
|
-
|
145
|
+
name_candidates = []
|
146
|
+
name_candidates << rec.name
|
147
147
|
|
148
|
-
|
148
|
+
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
149
|
|
150
150
|
|
151
|
-
## check if
|
152
|
-
# make
|
151
|
+
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
+
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
names = []
|
155
|
+
name_candidates.each do |t|
|
156
|
+
names << t
|
157
157
|
if t =~ /\(.+\)/
|
158
|
-
|
158
|
+
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
159
|
# note: strip leading n trailing withspaces too!
|
160
160
|
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
|
162
|
-
|
161
|
+
extra_name.strip!
|
162
|
+
names << extra_name
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
|
166
|
+
names.each do |name|
|
167
167
|
m = MappingStruct.new
|
168
168
|
m.key = rec.key
|
169
|
-
m.
|
170
|
-
m.length =
|
169
|
+
m.name = name
|
170
|
+
m.length = name.length
|
171
171
|
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern =
|
172
|
+
m.pattern = name_esc_regex( name )
|
173
173
|
|
174
|
-
|
174
|
+
known_names << m
|
175
175
|
end
|
176
176
|
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{
|
177
|
+
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
178
|
|
179
179
|
## note: only include code field - if defined
|
180
180
|
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
181
|
m = MappingStruct.new
|
182
182
|
m.key = rec.key
|
183
|
-
m.
|
183
|
+
m.name = rec.code
|
184
184
|
m.length = rec.code.length
|
185
185
|
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
186
|
|
187
|
-
|
187
|
+
known_names << m
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
191
|
## note: sort here by length (largest goes first - best match)
|
192
192
|
# exclude code and key (key should always go last)
|
193
|
-
|
194
|
-
|
193
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
+
known_names
|
195
195
|
end
|
196
196
|
|
197
197
|
|
198
198
|
|
199
|
-
def
|
199
|
+
def map_name_for!( tag, line, mappings )
|
200
200
|
mappings.each do |mapping|
|
201
201
|
key = mapping.key
|
202
202
|
pattern = mapping.pattern
|
@@ -234,9 +234,9 @@ private
|
|
234
234
|
|
235
235
|
|
236
236
|
####
|
237
|
-
#
|
237
|
+
# name helper cut-n-paste copy from TextUtils
|
238
238
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def
|
239
|
+
def name_esc_regex( name_unescaped )
|
240
240
|
|
241
241
|
## escape regex special chars e.g.
|
242
242
|
# . to \. and
|
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
|
|
257
257
|
# e.g. Club Atlético Colón (Santa Fe)
|
258
258
|
# e.g. Bauer Anton (????)
|
259
259
|
|
260
|
-
##
|
261
|
-
##
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
260
|
+
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
+
## name = Regexp.escape( name_unescaped )
|
262
|
+
name = name_unescaped.gsub( '.', '\.' )
|
263
|
+
name = name.gsub( '(', '\(' )
|
264
|
+
name = name.gsub( ')', '\)' )
|
265
|
+
name = name.gsub( '?', '\?' )
|
266
|
+
name = name.gsub( '*', '\*' )
|
267
|
+
name = name.gsub( '+', '\+' )
|
268
|
+
name = name.gsub( '$', '\$' )
|
269
|
+
name = name.gsub( '^', '\^' )
|
270
270
|
|
271
271
|
## match accented char with or without accents
|
272
272
|
## add (ü|ue) etc.
|
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
|
|
309
309
|
## collect some more (real-world) examples first!!!!!
|
310
310
|
|
311
311
|
alternatives.each do |alt|
|
312
|
-
|
312
|
+
name = name.gsub( alt[0], alt[1] )
|
313
313
|
end
|
314
314
|
|
315
|
-
|
315
|
+
name
|
316
316
|
end
|
317
317
|
|
318
318
|
end # class MapperV2
|