sportdb-formats 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -0
- data/Rakefile +2 -2
- data/lib/sportdb/formats.rb +17 -5
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/datafile_package.rb +10 -7
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/match/mapper.rb +63 -63
- data/lib/sportdb/formats/match/mapper_teams.rb +1 -1
- data/lib/sportdb/formats/match/match_parser.rb +99 -180
- data/lib/sportdb/formats/match/match_parser_csv.rb +321 -0
- data/lib/sportdb/formats/package.rb +165 -11
- data/lib/sportdb/formats/parser_helper.rb +11 -2
- data/lib/sportdb/formats/score/score_formats.rb +41 -1
- data/lib/sportdb/formats/score/score_parser.rb +6 -0
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/structs/group.rb +5 -12
- data/lib/sportdb/formats/structs/round.rb +6 -13
- data/lib/sportdb/formats/structs/standings.rb +30 -9
- data/lib/sportdb/formats/structs/team.rb +1 -2
- data/lib/sportdb/formats/team/club_reader_props.rb +3 -3
- data/lib/sportdb/formats/version.rb +4 -2
- data/test/helper.rb +2 -1
- data/test/test_club_reader_props.rb +2 -2
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- data/test/test_csv_match_parser.rb +114 -0
- data/test/test_csv_match_parser_utils.rb +20 -0
- data/test/test_csv_reader.rb +5 -5
- data/test/test_datafile.rb +0 -32
- data/test/test_datafile_package.rb +46 -0
- data/test/test_match_auto_relegation.rb +41 -0
- data/test/test_package.rb +60 -28
- data/test/test_package_match.rb +27 -3
- data/test/test_regex.rb +25 -7
- data/test/test_scores.rb +58 -49
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 216e15369403af5707b3d2c6d82fcd6c72e2712d
|
4
|
+
data.tar.gz: 5c73e0e7ce7309b2c8fb86e38d90d1d566f80688
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8033e4db4d7b047b37ec8b3be61657d15df72b7a6c6295317ca8c9cd19c0f578f2ecd75c25cf11b0dcb9218c1ff3e8d2a8e0efb0c59ae235b568b353ef55c9d6
|
7
|
+
data.tar.gz: e5b510b45d4c28c6d83d6baa43ef954e78ae64af116c8305a983b5c54d2fda486425a793b48c8332dcf8cf5c441da37f7183602d50a7e07066049995b5940f0f
|
data/Manifest.txt
CHANGED
@@ -17,6 +17,7 @@ lib/sportdb/formats/match/mapper.rb
|
|
17
17
|
lib/sportdb/formats/match/mapper_teams.rb
|
18
18
|
lib/sportdb/formats/match/match_parser.rb
|
19
19
|
lib/sportdb/formats/match/match_parser_auto_conf.rb
|
20
|
+
lib/sportdb/formats/match/match_parser_csv.rb
|
20
21
|
lib/sportdb/formats/name_helper.rb
|
21
22
|
lib/sportdb/formats/outline_reader.rb
|
22
23
|
lib/sportdb/formats/package.rb
|
@@ -49,8 +50,11 @@ test/test_clubs.rb
|
|
49
50
|
test/test_conf.rb
|
50
51
|
test/test_country_index.rb
|
51
52
|
test/test_country_reader.rb
|
53
|
+
test/test_csv_match_parser.rb
|
54
|
+
test/test_csv_match_parser_utils.rb
|
52
55
|
test/test_csv_reader.rb
|
53
56
|
test/test_datafile.rb
|
57
|
+
test/test_datafile_package.rb
|
54
58
|
test/test_goals.rb
|
55
59
|
test/test_league_index.rb
|
56
60
|
test/test_league_outline_reader.rb
|
@@ -59,6 +63,7 @@ test/test_match.rb
|
|
59
63
|
test/test_match_auto.rb
|
60
64
|
test/test_match_auto_champs.rb
|
61
65
|
test/test_match_auto_euro.rb
|
66
|
+
test/test_match_auto_relegation.rb
|
62
67
|
test/test_match_auto_worldcup.rb
|
63
68
|
test/test_match_champs.rb
|
64
69
|
test/test_match_eng.rb
|
data/Rakefile
CHANGED
@@ -3,7 +3,7 @@ require './lib/sportdb/formats/version.rb'
|
|
3
3
|
|
4
4
|
Hoe.spec 'sportdb-formats' do
|
5
5
|
|
6
|
-
self.version = SportDb::Formats::VERSION
|
6
|
+
self.version = SportDb::Module::Formats::VERSION
|
7
7
|
|
8
8
|
self.summary = "sportdb-formats - sport.db format and text utilities"
|
9
9
|
self.description = summary
|
@@ -21,7 +21,7 @@ Hoe.spec 'sportdb-formats' do
|
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
23
|
['alphabets', '>= 1.0.0'],
|
24
|
-
['date-formats', '>= 1.0.
|
24
|
+
['date-formats', '>= 1.0.1'],
|
25
25
|
['csvreader', '>= 1.2.4'],
|
26
26
|
['sportdb-langs', '>= 0.1.0'],
|
27
27
|
|
data/lib/sportdb/formats.rb
CHANGED
@@ -10,15 +10,26 @@ require 'zip' ## todo/check: if zip is alreay included in a required module
|
|
10
10
|
|
11
11
|
|
12
12
|
|
13
|
-
def read_csv( path
|
14
|
-
|
13
|
+
def read_csv( path, sep: nil,
|
14
|
+
symbolize_names: nil )
|
15
|
+
opts = {}
|
16
|
+
opts[:sep] = sep if sep
|
17
|
+
opts[:header_converters] = :symbol if symbolize_names
|
18
|
+
|
19
|
+
CsvHash.read( path, **opts )
|
15
20
|
end
|
16
21
|
|
17
|
-
def parse_csv( txt
|
18
|
-
|
22
|
+
def parse_csv( txt, sep: nil,
|
23
|
+
symbolize_names: nil )
|
24
|
+
opts = {}
|
25
|
+
opts[:sep] = sep if sep
|
26
|
+
opts[:header_converters] = :symbol if symbolize_names
|
27
|
+
|
28
|
+
CsvHash.parse( txt, **opts )
|
19
29
|
end
|
20
30
|
|
21
31
|
|
32
|
+
|
22
33
|
## more sportdb libs/gems
|
23
34
|
require 'sportdb/langs'
|
24
35
|
|
@@ -68,6 +79,7 @@ require 'sportdb/formats/match/match_parser'
|
|
68
79
|
require 'sportdb/formats/match/match_parser_auto_conf'
|
69
80
|
require 'sportdb/formats/match/conf_parser'
|
70
81
|
|
82
|
+
require 'sportdb/formats/match/match_parser_csv'
|
71
83
|
|
72
84
|
require 'sportdb/formats/country/country_reader'
|
73
85
|
require 'sportdb/formats/country/country_index'
|
@@ -147,4 +159,4 @@ end # module SportDb
|
|
147
159
|
|
148
160
|
|
149
161
|
|
150
|
-
puts SportDb::Formats.banner # say hello
|
162
|
+
puts SportDb::Module::Formats.banner # say hello
|
@@ -30,11 +30,11 @@ class CountryIndex
|
|
30
30
|
## auto-fill countries
|
31
31
|
## pp recs
|
32
32
|
recs.each do |rec|
|
33
|
-
## rec e.g. { key:'af',
|
33
|
+
## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
|
34
34
|
|
35
35
|
@countries << rec
|
36
36
|
|
37
|
-
## add codes lookups - key,
|
37
|
+
## add codes lookups - key, code, ...
|
38
38
|
if @countries_by_code[ rec.key ]
|
39
39
|
puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
|
40
40
|
exit 1
|
@@ -42,13 +42,13 @@ class CountryIndex
|
|
42
42
|
@countries_by_code[ rec.key ] = rec
|
43
43
|
end
|
44
44
|
|
45
|
-
## add
|
46
|
-
if rec.key != rec.
|
47
|
-
if @countries_by_code[ rec.
|
48
|
-
puts "** !! ERROR !! country code
|
45
|
+
## add code (only) if different from key
|
46
|
+
if rec.key != rec.code.downcase
|
47
|
+
if @countries_by_code[ rec.code.downcase ]
|
48
|
+
puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
|
49
49
|
exit 1
|
50
50
|
else
|
51
|
-
@countries_by_code[ rec.
|
51
|
+
@countries_by_code[ rec.code.downcase ] = rec
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -43,11 +43,31 @@ def parse
|
|
43
43
|
# e.g. East Germany (-1989) => East Germany (-1989)
|
44
44
|
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
45
45
|
last_country.alt_names += values
|
46
|
+
elsif line =~ /^-[ ]*(\d{4})
|
47
|
+
[ ]+
|
48
|
+
(.+)$
|
49
|
+
/x ## check for historic lines e.g. -1989
|
50
|
+
year = $1.to_i
|
51
|
+
parts = $2.split( /=>|⇒/ )
|
52
|
+
values = parts[0].split( ',' )
|
53
|
+
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
54
|
+
|
55
|
+
name = values[0]
|
56
|
+
code = values[1]
|
57
|
+
|
58
|
+
last_country = country = Country.new( name: "#{name} (-#{year})",
|
59
|
+
code: code )
|
60
|
+
## country.alt_names << name ## note: for now do NOT add name without year to alt_names - gets auto-add by index!!!
|
61
|
+
|
62
|
+
countries << country
|
63
|
+
## todo/fix: add reference to country today (in parts[1] !!!!)
|
46
64
|
else
|
47
65
|
## assume "regular" line
|
48
66
|
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
49
67
|
## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
|
50
|
-
if line =~ /^([a-z]{2,4})
|
68
|
+
if line =~ /^([a-z]{2,4})
|
69
|
+
[ ]+
|
70
|
+
(.+)$/x
|
51
71
|
key = $1
|
52
72
|
values = $2.split( ',' )
|
53
73
|
## strip and squish (white)spaces
|
@@ -60,14 +80,14 @@ def parse
|
|
60
80
|
geos = split_geo( values[0] )
|
61
81
|
name = geos[0] ## note: ignore all other geos for now
|
62
82
|
|
63
|
-
## note: allow
|
64
|
-
|
83
|
+
## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
|
84
|
+
code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
|
65
85
|
values[1]
|
66
86
|
else
|
67
87
|
if values[1]
|
68
|
-
puts "** !!! ERROR !!! wrong
|
88
|
+
puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
|
69
89
|
else
|
70
|
-
puts "** !!! ERROR !!! missing
|
90
|
+
puts "** !!! ERROR !!! missing code for (canonical) country name"
|
71
91
|
end
|
72
92
|
exit 1
|
73
93
|
end
|
@@ -80,7 +100,7 @@ def parse
|
|
80
100
|
|
81
101
|
last_country = country = Country.new( key: key,
|
82
102
|
name: name,
|
83
|
-
|
103
|
+
code: code,
|
84
104
|
tags: tags )
|
85
105
|
countries << country
|
86
106
|
else
|
@@ -30,7 +30,7 @@ class Entry
|
|
30
30
|
@name = path[ pack.path.length+1..-1 ]
|
31
31
|
end
|
32
32
|
def name() @name; end
|
33
|
-
def read() File.open( @path, 'r:utf-8' ).read; end
|
33
|
+
def read() File.open( @path, 'r:utf-8' ) {|f| f.read }; end
|
34
34
|
end # class DirPackage::Entry
|
35
35
|
|
36
36
|
|
@@ -44,17 +44,20 @@ end # class DirPackage::Entry
|
|
44
44
|
@name = basename
|
45
45
|
end
|
46
46
|
|
47
|
-
|
47
|
+
## todo/check: change pattern: to re: - why? why not?
|
48
|
+
def each( pattern: ) ## todo/check: rename to glob or something - why? why not?
|
48
49
|
## use just .* for extension or remove and check if File.file? and skip File.directory? - why? why not?
|
49
50
|
## note: incl. files starting with dot (.)) as candidates (normally excluded with just *)
|
50
|
-
|
51
|
-
|
52
|
-
if
|
53
|
-
|
51
|
+
## todo/check/fix: is there a better (simpler) glob pattern? yes? no?
|
52
|
+
Dir.glob( "#{@path}/**/{*,.*}.*" ).each do |path|
|
53
|
+
if File.directory?( path )
|
54
|
+
## always skip directories / folders
|
55
|
+
elsif EXCLUDE_RE.match( path )
|
56
|
+
## note: skip dot dirs (e.g. .build/, .git/, etc.)
|
54
57
|
elsif pattern.match( path )
|
55
58
|
yield( Entry.new( self, path ))
|
56
59
|
else
|
57
|
-
|
60
|
+
## puts " skipping >#{path}<"
|
58
61
|
end
|
59
62
|
end
|
60
63
|
end
|
@@ -121,14 +121,31 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
121
121
|
values
|
122
122
|
end
|
123
123
|
|
124
|
-
def check_stage( name )
|
125
|
-
known_stages = ['regular season',
|
126
|
-
'championship round',
|
127
|
-
'relegation round',
|
128
|
-
'play-offs'
|
129
|
-
]
|
130
124
|
|
131
|
-
|
125
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
126
|
+
KNOWN_STAGES = [
|
127
|
+
'Regular Season',
|
128
|
+
'Regular Stage',
|
129
|
+
'Championship Round',
|
130
|
+
'Championship Playoff',
|
131
|
+
'Relegation Round',
|
132
|
+
'Relegation Playoff',
|
133
|
+
'Play-offs',
|
134
|
+
'Playoff Stage',
|
135
|
+
'Grunddurchgang',
|
136
|
+
'Finaldurchgang - Qualifikationsgruppe',
|
137
|
+
'Finaldurchgang - Qualifikation',
|
138
|
+
'Finaldurchgang - Meistergruppe',
|
139
|
+
'Finaldurchgang - Meister',
|
140
|
+
'EL Play-off',
|
141
|
+
'Europa League Play-off',
|
142
|
+
'Europa-League-Play-offs',
|
143
|
+
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
|
+
|
145
|
+
|
146
|
+
def check_stage( name )
|
147
|
+
# note: normalize names e.g. downcase and remove all non a-z chars (e.g. space, dash, etc.)
|
148
|
+
if KNOWN_STAGES.include?( name.downcase.gsub( /[^a-z]/, '' ) )
|
132
149
|
## everything ok
|
133
150
|
else
|
134
151
|
puts "** !!! ERROR - no (league) stage match found for >#{name}<, add to (builtin) stages table; sorry"
|
@@ -113,9 +113,9 @@ def parse
|
|
113
113
|
## add a list of (auto-)excluded country codes with conflicts? why? why not?
|
114
114
|
## cl - a) Chile b) Champions League
|
115
115
|
alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
116
|
-
if country.key.upcase != country.
|
117
|
-
alt_names_auto << "#{country.
|
118
|
-
alt_names_auto << "#{country.
|
116
|
+
if country.key.upcase != country.code
|
117
|
+
alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
|
118
|
+
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
121
|
else ## assume int'l (no country) e.g. champions league, etc.
|
@@ -7,21 +7,21 @@ module SportDb
|
|
7
7
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
8
|
|
9
9
|
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper
|
10
|
+
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
11
|
|
12
12
|
include Logging
|
13
13
|
|
14
|
-
attr_reader :
|
14
|
+
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
15
|
|
16
16
|
########
|
17
17
|
## key: e.g. augsburg
|
18
|
-
##
|
19
|
-
## length (of
|
20
|
-
MappingStruct = Struct.new( :key, :
|
18
|
+
## name: e.g. FC Augsburg
|
19
|
+
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
+
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
21
|
|
22
22
|
######
|
23
23
|
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :
|
24
|
+
Record = Struct.new( :key, :name, :alt_names )
|
25
25
|
def build_records( txt_or_lines )
|
26
26
|
recs = []
|
27
27
|
|
@@ -44,12 +44,12 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
44
44
|
values = line.split( '|' )
|
45
45
|
values = values.map { |value| value.strip }
|
46
46
|
|
47
|
-
|
47
|
+
name = values[0]
|
48
48
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key
|
50
|
-
|
49
|
+
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
+
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
51
|
|
52
|
-
recs << Record.new( key,
|
52
|
+
recs << Record.new( key, name, alt_names )
|
53
53
|
end
|
54
54
|
recs
|
55
55
|
end
|
@@ -63,10 +63,10 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
63
63
|
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
64
|
|
65
65
|
## build mapping lookup table
|
66
|
-
@
|
67
|
-
|
66
|
+
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
+
build_name_table_for_mapping( records_or_mapping )
|
68
68
|
else ## assume array of records
|
69
|
-
|
69
|
+
build_name_table_for_records( records_or_mapping )
|
70
70
|
end
|
71
71
|
|
72
72
|
## build lookup hash by record (e.g. team/club/etc.) key
|
@@ -85,9 +85,9 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
85
85
|
|
86
86
|
|
87
87
|
|
88
|
-
def
|
88
|
+
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
89
|
begin
|
90
|
-
found =
|
90
|
+
found = map_name_for!( @tag, line, @known_names )
|
91
91
|
end while found
|
92
92
|
end
|
93
93
|
|
@@ -110,27 +110,27 @@ class MapperV2 ## todo/check: rename to NameMapper/TitleMapper ? why? why n
|
|
110
110
|
|
111
111
|
|
112
112
|
private
|
113
|
-
def
|
114
|
-
|
113
|
+
def build_name_table_for_mapping( mapping )
|
114
|
+
known_names = []
|
115
115
|
|
116
|
-
mapping.each do |
|
116
|
+
mapping.each do |name, rec|
|
117
117
|
m = MappingStruct.new
|
118
118
|
m.key = rec.key
|
119
|
-
m.
|
120
|
-
m.length =
|
121
|
-
m.pattern = Regexp.escape(
|
119
|
+
m.name = name
|
120
|
+
m.length = name.length
|
121
|
+
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
122
|
|
123
|
-
|
123
|
+
known_names << m
|
124
124
|
end
|
125
125
|
|
126
126
|
## note: sort here by length (largest goes first - best match)
|
127
|
-
|
128
|
-
|
127
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
+
known_names
|
129
129
|
end
|
130
130
|
|
131
|
-
def
|
131
|
+
def build_name_table_for_records( records )
|
132
132
|
|
133
|
-
## build known tracks table w/
|
133
|
+
## build known tracks table w/ alt names e.g.
|
134
134
|
#
|
135
135
|
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
136
|
# [ 'augsburg', 'FC Augsburg'],
|
@@ -138,65 +138,65 @@ private
|
|
138
138
|
# [ 'augsburg', 'Augi3' ],
|
139
139
|
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
140
|
|
141
|
-
|
141
|
+
known_names = []
|
142
142
|
|
143
143
|
records.each_with_index do |rec,index|
|
144
144
|
|
145
|
-
|
146
|
-
|
145
|
+
name_candidates = []
|
146
|
+
name_candidates << rec.name
|
147
147
|
|
148
|
-
|
148
|
+
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
149
|
|
150
150
|
|
151
|
-
## check if
|
152
|
-
# make
|
151
|
+
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
+
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
153
|
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
names = []
|
155
|
+
name_candidates.each do |t|
|
156
|
+
names << t
|
157
157
|
if t =~ /\(.+\)/
|
158
|
-
|
158
|
+
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
159
|
# note: strip leading n trailing withspaces too!
|
160
160
|
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
|
162
|
-
|
161
|
+
extra_name.strip!
|
162
|
+
names << extra_name
|
163
163
|
end
|
164
164
|
end
|
165
165
|
|
166
|
-
|
166
|
+
names.each do |name|
|
167
167
|
m = MappingStruct.new
|
168
168
|
m.key = rec.key
|
169
|
-
m.
|
170
|
-
m.length =
|
169
|
+
m.name = name
|
170
|
+
m.length = name.length
|
171
171
|
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern =
|
172
|
+
m.pattern = name_esc_regex( name )
|
173
173
|
|
174
|
-
|
174
|
+
known_names << m
|
175
175
|
end
|
176
176
|
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{
|
177
|
+
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
178
|
|
179
179
|
## note: only include code field - if defined
|
180
180
|
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
181
|
m = MappingStruct.new
|
182
182
|
m.key = rec.key
|
183
|
-
m.
|
183
|
+
m.name = rec.code
|
184
184
|
m.length = rec.code.length
|
185
185
|
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
186
|
|
187
|
-
|
187
|
+
known_names << m
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
191
|
## note: sort here by length (largest goes first - best match)
|
192
192
|
# exclude code and key (key should always go last)
|
193
|
-
|
194
|
-
|
193
|
+
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
+
known_names
|
195
195
|
end
|
196
196
|
|
197
197
|
|
198
198
|
|
199
|
-
def
|
199
|
+
def map_name_for!( tag, line, mappings )
|
200
200
|
mappings.each do |mapping|
|
201
201
|
key = mapping.key
|
202
202
|
pattern = mapping.pattern
|
@@ -234,9 +234,9 @@ private
|
|
234
234
|
|
235
235
|
|
236
236
|
####
|
237
|
-
#
|
237
|
+
# name helper cut-n-paste copy from TextUtils
|
238
238
|
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def
|
239
|
+
def name_esc_regex( name_unescaped )
|
240
240
|
|
241
241
|
## escape regex special chars e.g.
|
242
242
|
# . to \. and
|
@@ -257,16 +257,16 @@ def title_esc_regex( title_unescaped )
|
|
257
257
|
# e.g. Club Atlético Colón (Santa Fe)
|
258
258
|
# e.g. Bauer Anton (????)
|
259
259
|
|
260
|
-
##
|
261
|
-
##
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
260
|
+
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
+
## name = Regexp.escape( name_unescaped )
|
262
|
+
name = name_unescaped.gsub( '.', '\.' )
|
263
|
+
name = name.gsub( '(', '\(' )
|
264
|
+
name = name.gsub( ')', '\)' )
|
265
|
+
name = name.gsub( '?', '\?' )
|
266
|
+
name = name.gsub( '*', '\*' )
|
267
|
+
name = name.gsub( '+', '\+' )
|
268
|
+
name = name.gsub( '$', '\$' )
|
269
|
+
name = name.gsub( '^', '\^' )
|
270
270
|
|
271
271
|
## match accented char with or without accents
|
272
272
|
## add (ü|ue) etc.
|
@@ -309,10 +309,10 @@ def title_esc_regex( title_unescaped )
|
|
309
309
|
## collect some more (real-world) examples first!!!!!
|
310
310
|
|
311
311
|
alternatives.each do |alt|
|
312
|
-
|
312
|
+
name = name.gsub( alt[0], alt[1] )
|
313
313
|
end
|
314
314
|
|
315
|
-
|
315
|
+
name
|
316
316
|
end
|
317
317
|
|
318
318
|
end # class MapperV2
|