sportdb-formats 1.0.2 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -0
- data/Rakefile +2 -2
- data/lib/sportdb/formats.rb +17 -5
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/datafile_package.rb +10 -7
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +99 -180
- data/lib/sportdb/formats/match/match_parser_csv.rb +321 -0
- data/lib/sportdb/formats/package.rb +165 -11
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_formats.rb +41 -1
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
- data/lib/sportdb/formats/version.rb +4 -2
- data/test/helper.rb +2 -1
- data/test/test_club_reader_props.rb +2 -2
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- data/test/test_csv_match_parser.rb +114 -0
- data/test/test_csv_match_parser_utils.rb +20 -0
- data/test/test_csv_reader.rb +5 -5
- data/test/test_datafile.rb +0 -32
- data/test/test_datafile_package.rb +46 -0
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_package.rb +60 -28
- data/test/test_package_match.rb +27 -3
- data/test/test_regex.rb +25 -7
- data/test/test_scores.rb +58 -49
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 216e15369403af5707b3d2c6d82fcd6c72e2712d
|
4
|
+
data.tar.gz: 5c73e0e7ce7309b2c8fb86e38d90d1d566f80688
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8033e4db4d7b047b37ec8b3be61657d15df72b7a6c6295317ca8c9cd19c0f578f2ecd75c25cf11b0dcb9218c1ff3e8d2a8e0efb0c59ae235b568b353ef55c9d6
|
7
|
+
data.tar.gz: e5b510b45d4c28c6d83d6baa43ef954e78ae64af116c8305a983b5c54d2fda486425a793b48c8332dcf8cf5c441da37f7183602d50a7e07066049995b5940f0f
|
data/Manifest.txt
CHANGED
@@ -17,6 +17,7 @@ lib/sportdb/formats/match/mapper.rb
|
|
17
17
|
lib/sportdb/formats/match/mapper_teams.rb
|
18
18
|
lib/sportdb/formats/match/match_parser.rb
|
19
19
|
lib/sportdb/formats/match/match_parser_auto_conf.rb
|
20
|
+
lib/sportdb/formats/match/match_parser_csv.rb
|
20
21
|
lib/sportdb/formats/name_helper.rb
|
21
22
|
lib/sportdb/formats/outline_reader.rb
|
22
23
|
lib/sportdb/formats/package.rb
|
@@ -49,8 +50,11 @@ test/test_clubs.rb
|
|
49
50
|
test/test_conf.rb
|
50
51
|
test/test_country_index.rb
|
51
52
|
test/test_country_reader.rb
|
53
|
+
test/test_csv_match_parser.rb
|
54
|
+
test/test_csv_match_parser_utils.rb
|
52
55
|
test/test_csv_reader.rb
|
53
56
|
test/test_datafile.rb
|
57
|
+
test/test_datafile_package.rb
|
54
58
|
test/test_goals.rb
|
55
59
|
test/test_league_index.rb
|
56
60
|
test/test_league_outline_reader.rb
|
@@ -59,6 +63,7 @@ test/test_match.rb
|
|
59
63
|
test/test_match_auto.rb
|
60
64
|
test/test_match_auto_champs.rb
|
61
65
|
test/test_match_auto_euro.rb
|
66
|
+
test/test_match_auto_relegation.rb
|
62
67
|
test/test_match_auto_worldcup.rb
|
63
68
|
test/test_match_champs.rb
|
64
69
|
test/test_match_eng.rb
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/formats/version.rb'
|
|
3
3
|
|
4
4
|
Hoe.spec 'sportdb-formats' do
|
5
5
|
|
6
|
-
self.version = SportDb::Formats::VERSION
|
6
|
+
self.version = SportDb::Module::Formats::VERSION
|
7
7
|
|
8
8
|
self.summary = "sportdb-formats - sport.db format and text utilities"
|
9
9
|
self.description = summary
|
@@ -21,7 +21,7 @@ Hoe.spec 'sportdb-formats' do
|
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
23
|
['alphabets', '>= 1.0.0'],
|
24
|
-
['date-formats', '>= 1.0.
|
24
|
+
['date-formats', '>= 1.0.1'],
|
25
25
|
['csvreader', '>= 1.2.4'],
|
26
26
|
['sportdb-langs', '>= 0.1.0'],
|
27
27
|
|
data/lib/sportdb/formats.rb
CHANGED
@@ -10,15 +10,26 @@ require 'zip' ## todo/check: if zip is alreay included in a required module
|
|
10
10
|
|
11
11
|
|
12
12
|
|
13
|
-
def read_csv( path
|
14
|
-
|
13
|
+
def read_csv( path, sep: nil,
|
14
|
+
symbolize_names: nil )
|
15
|
+
opts = {}
|
16
|
+
opts[:sep] = sep if sep
|
17
|
+
opts[:header_converters] = :symbol if symbolize_names
|
18
|
+
|
19
|
+
CsvHash.read( path, **opts )
|
15
20
|
end
|
16
21
|
|
17
|
-
def parse_csv( txt
|
18
|
-
|
22
|
+
def parse_csv( txt, sep: nil,
|
23
|
+
symbolize_names: nil )
|
24
|
+
opts = {}
|
25
|
+
opts[:sep] = sep if sep
|
26
|
+
opts[:header_converters] = :symbol if symbolize_names
|
27
|
+
|
28
|
+
CsvHash.parse( txt, **opts )
|
19
29
|
end
|
20
30
|
|
21
31
|
|
32
|
+
|
22
33
|
## more sportdb libs/gems
|
23
34
|
require 'sportdb/langs'
|
24
35
|
|
@@ -68,6 +79,7 @@ require 'sportdb/formats/match/match_parser'
|
|
68
79
|
require 'sportdb/formats/match/match_parser_auto_conf'
|
69
80
|
require 'sportdb/formats/match/conf_parser'
|
70
81
|
|
82
|
+
require 'sportdb/formats/match/match_parser_csv'
|
71
83
|
|
72
84
|
require 'sportdb/formats/country/country_reader'
|
73
85
|
require 'sportdb/formats/country/country_index'
|
@@ -147,4 +159,4 @@ end # module SportDb
|
|
147
159
|
|
148
160
|
|
149
161
|
|
150
|
-
puts SportDb::Formats.banner # say hello
|
162
|
+
puts SportDb::Module::Formats.banner # say hello
|
@@ -30,11 +30,11 @@ class CountryIndex
|
|
30
30
|
## auto-fill countries
|
31
31
|
## pp recs
|
32
32
|
recs.each do |rec|
|
33
|
-
## rec e.g. { key:'af',
|
33
|
+
## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
|
34
34
|
|
35
35
|
@countries << rec
|
36
36
|
|
37
|
-
## add codes lookups - key,
|
37
|
+
## add codes lookups - key, code, ...
|
38
38
|
if @countries_by_code[ rec.key ]
|
39
39
|
puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
|
40
40
|
exit 1
|
@@ -42,13 +42,13 @@ class CountryIndex
|
|
42
42
|
@countries_by_code[ rec.key ] = rec
|
43
43
|
end
|
44
44
|
|
45
|
-
## add
|
46
|
-
if rec.key != rec.
|
47
|
-
if @countries_by_code[ rec.
|
48
|
-
puts "** !! ERROR !! country code
|
45
|
+
## add code (only) if different from key
|
46
|
+
if rec.key != rec.code.downcase
|
47
|
+
if @countries_by_code[ rec.code.downcase ]
|
48
|
+
puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
|
49
49
|
exit 1
|
50
50
|
else
|
51
|
-
@countries_by_code[ rec.
|
51
|
+
@countries_by_code[ rec.code.downcase ] = rec
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -43,11 +43,31 @@ def parse
|
|
43
43
|
# e.g. East Germany (-1989) => East Germany (-1989)
|
44
44
|
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
45
45
|
last_country.alt_names += values
|
46
|
+
elsif line =~ /^-[ ]*(\d{4})
|
47
|
+
[ ]+
|
48
|
+
(.+)$
|
49
|
+
/x ## check for historic lines e.g. -1989
|
50
|
+
year = $1.to_i
|
51
|
+
parts = $2.split( /=>|⇒/ )
|
52
|
+
values = parts[0].split( ',' )
|
53
|
+
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
54
|
+
|
55
|
+
name = values[0]
|
56
|
+
code = values[1]
|
57
|
+
|
58
|
+
last_country = country = Country.new( name: "#{name} (-#{year})",
|
59
|
+
code: code )
|
60
|
+
## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
|
61
|
+
|
62
|
+
countries << country
|
63
|
+
## todo/fix: add reference to country today (in parts[1] !!!!)
|
46
64
|
else
|
47
65
|
## assume "regular" line
|
48
66
|
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
49
67
|
## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
|
50
|
-
if line =~ /^([a-z]{2,4})
|
68
|
+
if line =~ /^([a-z]{2,4})
|
69
|
+
[ ]+
|
70
|
+
(.+)$/x
|
51
71
|
key = $1
|
52
72
|
values = $2.split( ',' )
|
53
73
|
## strip and squish (white)spaces
|
@@ -60,14 +80,14 @@ def parse
|
|
60
80
|
geos = split_geo( values[0] )
|
61
81
|
name = geos[0] ## note: ignore all other geos for now
|
62
82
|
|
63
|
-
## note: allow
|
64
|
-
|
83
|
+
## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
|
84
|
+
code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
|
65
85
|
values[1]
|
66
86
|
else
|
67
87
|
if values[1]
|
68
|
-
puts "** !!! ERROR !!! wrong
|
88
|
+
puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
|
69
89
|
else
|
70
|
-
puts "** !!! ERROR !!! missing
|
90
|
+
puts "** !!! ERROR !!! missing code for (canonical) country name"
|
71
91
|
end
|
72
92
|
exit 1
|
73
93
|
end
|
@@ -80,7 +100,7 @@ def parse
|
|
80
100
|
|
81
101
|
last_country = country = Country.new( key: key,
|
82
102
|
name: name,
|
83
|
-
|
103
|
+
code: code,
|
84
104
|
tags: tags )
|
85
105
|
countries << country
|
86
106
|
else
|
@@ -30,7 +30,7 @@ class Entry
|
|
30
30
|
@name = path[ pack.path.length+1..-1 ]
|
31
31
|
end
|
32
32
|
def name() @name; end
|
33
|
-
def read() File.open( @path, 'r:utf-8' ).read; end
|
33
|
+
def read() File.open( @path, 'r:utf-8' ) {|f| f.read }; end
|
34
34
|
end # class DirPackage::Entry
|
35
35
|
|
36
36
|
|
@@ -44,17 +44,20 @@ end # class DirPackage::Entry
|
|
44
44
|
@name = basename
|
45
45
|
end
|
46
46
|
|
47
|
-
|
47
|
+
## todo/check: change pattern: to re: - why? why not?
|
48
|
+
def each( pattern: ) ## todo/check: rename to glob or something - why? why not?
|
48
49
|
## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
|
49
50
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
|
-
|
51
|
-
|
52
|
-
if
|
53
|
-
|
51
|
+
## todo/check/fix: is there a better (simpler) glob pattern? yes? no?
|
52
|
+
Dir.glob( "#{@path}/**/{*,.*}.*" ).each do |path|
|
53
|
+
if File.directory?( path )
|
54
|
+
## always skip directories / folders
|
55
|
+
elsif EXCLUDE_RE.match( path )
|
56
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
54
57
|
elsif pattern.match( path )
|
55
58
|
yield( Entry.new( self, path ))
|
56
59
|
else
|
57
|
-
|
60
|
+
## puts " skipping >#{path}<"
|
58
61
|
end
|
59
62
|
end
|
60
63
|
end
|
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
121
121
|
values
|
122
122
|
end
|
123
123
|
|
124
|
-
def check_stage( name )
|
125
|
-
known_stages = ['regular season',
|
126
|
-
'championship round',
|
127
|
-
'relegation round',
|
128
|
-
'play-offs'
|
129
|
-
]
|
130
124
|
|
131
|
-
|
125
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
126
|
+
KNOWN_STAGES = [
|
127
|
+
'Regular Season',
|
128
|
+
'Regular Stage',
|
129
|
+
'Championship Round',
|
130
|
+
'Championship Playoff',
|
131
|
+
'Relegation Round',
|
132
|
+
'Relegation Playoff',
|
133
|
+
'Play-offs',
|
134
|
+
'Playoff Stage',
|
135
|
+
'Grunddurchgang',
|
136
|
+
'Finaldurchgang - Qualifikationsgruppe',
|
137
|
+
'Finaldurchgang - Qualifikation',
|
138
|
+
'Finaldurchgang - Meistergruppe',
|
139
|
+
'Finaldurchgang - Meister',
|
140
|
+
'EL Play-off',
|
141
|
+
'Europa League Play-off',
|
142
|
+
'Europa-League-Play-offs',
|
143
|
+
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
|
+
|
145
|
+
|
146
|
+
def check_stage( name )
|
147
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
148
|
+
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
132
149
|
## everything ok
|
133
150
|
else
|
134
151
|
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
@@ -113,9 +113,9 @@ def parse
|
|
113
113
|
## add a list of (auto-)excluded country codes with conflicts? why? why not?
|
114
114
|
## cl - a) Chile b) Champions League
|
115
115
|
alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
116
|
-
if country.key.upcase != country.
|
117
|
-
alt_names_auto << "#{country.
|
118
|
-
alt_names_auto << "#{country.
|
116
|
+
if country.key.upcase != country.code
|
117
|
+
alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
|
118
|
+
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
121
|
else ## assume int'l (no country) e.g. champions league, etc.
|
@@ -7,21 +7,21 @@ module SportDb
|
|
7
7
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
8
|
|
9
9
|
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper
|
10
|
+
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
11
|
|
12
12
|
include Logging
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
15
|
|
16
16
|
########
|
17
17
|
## key: e.g. augsburg
|
18
|
-
##
|
19
|
-
## length (of
|
20
|
-
MappingStruct = Struct.new( :key, :
|
18
|
+
## name: e.g. FC Augsburg
|
19
|
+
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
+
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
21
|
|
22
22
|
######
|
23
23
|
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :
|
24
|
+
Record = Struct.new( :key, :name, :alt_names )
|
25
25
|
def build_records( txt_or_lines )
|
26
26
|
recs = []
|
27
27
|
|
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
44
44
|
values = line.split( '|' )
|
45
45
|
values = values.map { |value| value.strip }
|
46
46
|
|
47
|
-
|
47
|
+
name = values[0]
|
48
48
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key
|
50
|
-
|
49
|
+
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
+
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
51
|
|
52
|
-
recs << Record.new( key,
|
52
|
+
recs << Record.new( key, name, alt_names )
|
53
53
|
end
|
54
54
|
recs
|
55
55
|
end
|
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
63
63
|
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
64
|
|
65
65
|
## build mapping lookup table
|
66
|
-
@
|
67
|
-
|
66
|
+
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
+
build_name_table_for_mapping( records_or_mapping )
|
68
68
|
else ## assume array of records
|
69
|
-
|
69
|
+
build_name_table_for_records( records_or_mapping )
|
70
70
|
end
|
71
71
|
|
72
72
|
## build lookup hash by record (e.g. team/club/etc.) key
|
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
85
85
|
|
86
86
|
|
87
87
|
|
88
|
-
def
|
88
|
+
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
89
|
begin
|
90
|
-
found =
|
90
|
+
found = map_name_for!( @tag, line, @known_names )
|
91
91
|
end while found
|
92
92
|
end
|
93
93
|
|
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
110
110
|
|
111
111
|
|
112
112
|
private
|
113
|
-
def
|
114
|
-
|
113
|
+
def build_name_table_for_mapping( mapping )
|
114
|
+
known_names = []
|
115
115
|
|
116
|
-
mapping.each do |
|
116
|
+
mapping.each do |name, rec|
|
117
117
|
m = MappingStruct.new
|
118
118
|
m.key = rec.key
|
119
|
-
m.
|
120
|
-
m.length =
|
121
|
-
m.pattern = Regexp.escape(
|
119
|
+
m.name = name
|
120
|
+
m.length = name.length
|
121
|
+
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
122
|
|
123
|
-
|
123
|
+
known_names << m
|
124
124
|
end
|
125
125
|
|
126
126
|
## note: sort here by length (largest goes first - best match)
|
127
|
-
|
128
|
-
|
127
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
+
known_names
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def build_name_table_for_records( records )
|
132
132
|
|
133
|
-
## build known tracks table w/
|
133
|
+
## build known tracks table w/ alt names e.g.
|
134
134
|
#
|
135
135
|
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
136
|
# [ 'augsburg', 'FC Augsburg'],
|
@@ -138,65 +138,65 @@ private
|
|
138
138
|
# [ 'augsburg', 'Augi3' ],
|
139
139
|
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
140
|
|
141
|
-
|
141
|
+
known_names = []
|
142
142
|
|
143
143
|
records.each_with_index do |rec,index|
|
144
144
|
|
145
|
-
|
146
|
-
|
145
|
+
name_candidates = []
|
146
|
+
name_candidates << rec.name
|
147
147
|
|
148
|
-
|
148
|
+
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
149
|
|
150
150
|
|
151
|
-
## check if
|
152
|
-
# make
|
151
|
+
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
+
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
names = []
|
155
|
+
name_candidates.each do |t|
|
156
|
+
names << t
|
157
157
|
if t =~ /\(.+\)/
|
158
|
-
|
158
|
+
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
159
|
# note: strip leading n trailing withspaces too!
|
160
160
|
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
|
162
|
-
|
161
|
+
extra_name.strip!
|
162
|
+
names << extra_name
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
|
166
|
+
names.each do |name|
|
167
167
|
m = MappingStruct.new
|
168
168
|
m.key = rec.key
|
169
|
-
m.
|
170
|
-
m.length =
|
169
|
+
m.name = name
|
170
|
+
m.length = name.length
|
171
171
|
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern =
|
172
|
+
m.pattern = name_esc_regex( name )
|
173
173
|
|
174
|
-
|
174
|
+
known_names << m
|
175
175
|
end
|
176
176
|
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{
|
177
|
+
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
178
|
|
179
179
|
## note: only include code field - if defined
|
180
180
|
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
181
|
m = MappingStruct.new
|
182
182
|
m.key = rec.key
|
183
|
-
m.
|
183
|
+
m.name = rec.code
|
184
184
|
m.length = rec.code.length
|
185
185
|
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
186
|
|
187
|
-
|
187
|
+
known_names << m
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
191
|
## note: sort here by length (largest goes first - best match)
|
192
192
|
# exclude code and key (key should always go last)
|
193
|
-
|
194
|
-
|
193
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
+
known_names
|
195
195
|
end
|
196
196
|
|
197
197
|
|
198
198
|
|
199
|
-
def
|
199
|
+
def map_name_for!( tag, line, mappings )
|
200
200
|
mappings.each do |mapping|
|
201
201
|
key = mapping.key
|
202
202
|
pattern = mapping.pattern
|
@@ -234,9 +234,9 @@ private
|
|
234
234
|
|
235
235
|
|
236
236
|
####
|
237
|
-
#
|
237
|
+
# name helper cut-n-paste copy from TextUtils
|
238
238
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def
|
239
|
+
def name_esc_regex( name_unescaped )
|
240
240
|
|
241
241
|
## escape regex special chars e.g.
|
242
242
|
# . to \. and
|
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
|
|
257
257
|
# e.g. Club Atlético Colón (Santa Fe)
|
258
258
|
# e.g. Bauer Anton (????)
|
259
259
|
|
260
|
-
##
|
261
|
-
##
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
260
|
+
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
+
## name = Regexp.escape( name_unescaped )
|
262
|
+
name = name_unescaped.gsub( '.', '\.' )
|
263
|
+
name = name.gsub( '(', '\(' )
|
264
|
+
name = name.gsub( ')', '\)' )
|
265
|
+
name = name.gsub( '?', '\?' )
|
266
|
+
name = name.gsub( '*', '\*' )
|
267
|
+
name = name.gsub( '+', '\+' )
|
268
|
+
name = name.gsub( '$', '\$' )
|
269
|
+
name = name.gsub( '^', '\^' )
|
270
270
|
|
271
271
|
## match accented char with or without accents
|
272
272
|
## add (ü|ue) etc.
|
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
|
|
309
309
|
## collect some more (real-world) examples first!!!!!
|
310
310
|
|
311
311
|
alternatives.each do |alt|
|
312
|
-
|
312
|
+
name = name.gsub( alt[0], alt[1] )
|
313
313
|
end
|
314
314
|
|
315
|
-
|
315
|
+
name
|
316
316
|
end
|
317
317
|
|
318
318
|
end # class MapperV2
|