sportdb-formats 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +1 -1
- data/README.md +6 -5
- data/Rakefile +8 -7
- data/bin/fbx +146 -0
- data/lib/sportdb/formats/event/event_reader.rb +0 -65
- data/lib/sportdb/formats/match/match_parser.rb +3 -1
- data/lib/sportdb/formats/version.rb +1 -1
- data/lib/sportdb/formats.rb +73 -2
- metadata +23 -8
- data/lib/sportdb/formats/name_helper.rb +0 -87
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67f0ea27fe2c7a39512eab9728167881fb1b99b919492b3d513cb1a1ff46c787
|
4
|
+
data.tar.gz: a93978a81ec8cb8c9b639ff4842e5eb46d7ebc018031e4b37e8361b7ed80e8af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 94d114bc6772b18e972449cd7ed0f0a890d737203e8e020655e446e4496e8b8e99985ac33c6da2579e27a52a863602ea561e2275f2e1852762355228b3d9383d
|
7
|
+
data.tar.gz: 4e645afc07ca5c73cda0308fe82211293160e2ebd3857770dc959ea41c684cbc2794e1c514f1fcf9e103f22294cc5d7879446632a9be06d2a04399b11daef5ed
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -2,6 +2,7 @@ CHANGELOG.md
|
|
2
2
|
Manifest.txt
|
3
3
|
README.md
|
4
4
|
Rakefile
|
5
|
+
bin/fbx
|
5
6
|
lib/sportdb/formats.rb
|
6
7
|
lib/sportdb/formats/country/country_reader.rb
|
7
8
|
lib/sportdb/formats/csv/goal.rb
|
@@ -16,7 +17,6 @@ lib/sportdb/formats/league/league_outline_reader.rb
|
|
16
17
|
lib/sportdb/formats/league/league_reader.rb
|
17
18
|
lib/sportdb/formats/match/conf_parser.rb
|
18
19
|
lib/sportdb/formats/match/match_parser.rb
|
19
|
-
lib/sportdb/formats/name_helper.rb
|
20
20
|
lib/sportdb/formats/package.rb
|
21
21
|
lib/sportdb/formats/search/sport.rb
|
22
22
|
lib/sportdb/formats/search/structs.rb
|
data/README.md
CHANGED
@@ -1,11 +1,10 @@
|
|
1
|
-
# sportdb-formats - sport.db format and text utilities
|
1
|
+
# sportdb-formats - sport.db format and text utilities
|
2
2
|
|
3
3
|
|
4
4
|
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
5
5
|
* bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
|
6
6
|
* gem :: [rubygems.org/gems/sportdb-formats](https://rubygems.org/gems/sportdb-formats)
|
7
7
|
* rdoc :: [rubydoc.info/gems/sportdb-formats](http://rubydoc.info/gems/sportdb-formats)
|
8
|
-
* forum :: [opensport](http://groups.google.com/group/opensport)
|
9
8
|
|
10
9
|
|
11
10
|
|
@@ -19,8 +18,10 @@ The `sportdb-formats` scripts are dedicated to the public domain.
|
|
19
18
|
Use it as you please with no restrictions whatsoever.
|
20
19
|
|
21
20
|
|
21
|
+
|
22
22
|
## Questions? Comments?
|
23
23
|
|
24
|
-
|
25
|
-
[
|
26
|
-
|
24
|
+
Yes, you can. More than welcome.
|
25
|
+
See [Help & Support »](https://github.com/openfootball/help)
|
26
|
+
|
27
|
+
|
data/Rakefile
CHANGED
@@ -20,13 +20,14 @@ Hoe.spec 'sportdb-formats' do
|
|
20
20
|
self.licenses = ['Public Domain']
|
21
21
|
|
22
22
|
self.extra_deps = [
|
23
|
-
['sportdb-structs',
|
24
|
-
['sportdb-parser',
|
25
|
-
['
|
26
|
-
['
|
27
|
-
['
|
28
|
-
|
29
|
-
|
23
|
+
['sportdb-structs', '>= 0.3.1'],
|
24
|
+
['sportdb-parser', '>= 0.2.1'],
|
25
|
+
['sportdb-catalogs', '>= 1.2.2'],
|
26
|
+
['date-formats', '>= 1.0.2'],
|
27
|
+
['cocos', '>= 0.4.0'],
|
28
|
+
['logutils', '>= 0.6.1'],
|
29
|
+
|
30
|
+
['rubyzip', '>= 2.3.2' ],
|
30
31
|
]
|
31
32
|
|
32
33
|
self.spec_extras = {
|
data/bin/fbx
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib -I ../parser/lib bin/fbx
|
5
|
+
|
6
|
+
|
7
|
+
## our own code
|
8
|
+
require 'sportdb/formats'
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
require 'optparse'
|
13
|
+
|
14
|
+
##
|
15
|
+
## read textfile
|
16
|
+
## and dump match parse results
|
17
|
+
##
|
18
|
+
## fbt ../openfootball/.../euro.txt
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
args = ARGV
|
24
|
+
opts = { debug: false,
|
25
|
+
outline: false }
|
26
|
+
|
27
|
+
parser = OptionParser.new do |parser|
|
28
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
29
|
+
|
30
|
+
##
|
31
|
+
## check if git has a offline option?? (use same)
|
32
|
+
## check for other tools - why? why not?
|
33
|
+
|
34
|
+
|
35
|
+
parser.on( "--verbose", "--debug",
|
36
|
+
"turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
|
37
|
+
opts[:debug] = debug
|
38
|
+
end
|
39
|
+
|
40
|
+
parser.on( "--outline",
|
41
|
+
"turn on outline (only) output (default: #{opts[:outline]})" ) do |outline|
|
42
|
+
opts[:outline] = outline
|
43
|
+
end
|
44
|
+
end
|
45
|
+
parser.parse!( args )
|
46
|
+
|
47
|
+
puts "OPTS:"
|
48
|
+
p opts
|
49
|
+
puts "ARGV:"
|
50
|
+
p args
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
paths = if args.empty?
|
57
|
+
[
|
58
|
+
'../../../openfootball/euro/2021--europe/euro.txt',
|
59
|
+
'../../../openfootball/euro/2024--germany/euro.txt',
|
60
|
+
]
|
61
|
+
else
|
62
|
+
## check for directories
|
63
|
+
## and auto-expand
|
64
|
+
|
65
|
+
SportDb::Parser::Opts.expand_args( args )
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
|
74
|
+
SportDb::MatchParser.debug = true if opts[:debug]
|
75
|
+
|
76
|
+
|
77
|
+
## errors = []
|
78
|
+
|
79
|
+
|
80
|
+
paths.each_with_index do |path,i|
|
81
|
+
puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
|
82
|
+
|
83
|
+
txt = read_text( path )
|
84
|
+
secs = SportDb::LeagueOutlineReader.parse( txt )
|
85
|
+
## pp secs
|
86
|
+
|
87
|
+
secs.each_with_index do |sec,j| ## sec(tion)s
|
88
|
+
season = sec[:season]
|
89
|
+
league = sec[:league]
|
90
|
+
stage = sec[:stage]
|
91
|
+
lines = sec[:lines]
|
92
|
+
|
93
|
+
puts " section #{j+1}/#{secs.size} - #{league.name} #{season}, #{stage} - #{lines.size} line(s)"
|
94
|
+
|
95
|
+
next if opts[:outline]
|
96
|
+
|
97
|
+
=begin
|
98
|
+
### check if event info availabe - use start_date;
|
99
|
+
## otherwise we have to guess (use a "synthetic" start_date)
|
100
|
+
event_info = catalog.events.find_by( season: season,
|
101
|
+
league: league )
|
102
|
+
|
103
|
+
start = if event_info && event_info.start_date
|
104
|
+
puts "event info found:"
|
105
|
+
puts " using start date from event: "
|
106
|
+
pp event_info
|
107
|
+
pp event_info.start_date
|
108
|
+
event_info.start_date
|
109
|
+
else
|
110
|
+
=end
|
111
|
+
start = if season.year?
|
112
|
+
Date.new( season.start_year, 1, 1 )
|
113
|
+
else
|
114
|
+
Date.new( season.start_year, 7, 1 )
|
115
|
+
end
|
116
|
+
|
117
|
+
parser = SportDb::MatchParser.new( lines,
|
118
|
+
start ) ## note: keep season start_at date for now (no need for more specific stage date need for now)
|
119
|
+
|
120
|
+
auto_conf_teams, matches, rounds, groups = parser.parse
|
121
|
+
|
122
|
+
puts ">>> #{auto_conf_teams.size} teams:"
|
123
|
+
pp auto_conf_teams
|
124
|
+
puts ">>> #{matches.size} matches:"
|
125
|
+
## pp matches
|
126
|
+
puts ">>> #{rounds.size} rounds:"
|
127
|
+
pp rounds
|
128
|
+
puts ">>> #{groups.size} groups:"
|
129
|
+
pp groups
|
130
|
+
end # each secs
|
131
|
+
end # each paths
|
132
|
+
|
133
|
+
=begin
|
134
|
+
if errors.size > 0
|
135
|
+
puts
|
136
|
+
pp errors
|
137
|
+
puts
|
138
|
+
puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
|
139
|
+
else
|
140
|
+
puts
|
141
|
+
puts "OK no parse errors found in #{paths.size} datafile(s)"
|
142
|
+
end
|
143
|
+
=end
|
144
|
+
|
145
|
+
puts "bye"
|
146
|
+
|
@@ -1,71 +1,6 @@
|
|
1
1
|
|
2
2
|
module SportDb
|
3
3
|
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class EventInfo
|
7
|
-
## "high level" info (summary) about event
|
8
|
-
## (like a "wikipedia infobox")
|
9
|
-
## use for checking dataset imports; lets you check e.g.
|
10
|
-
## - dates within range
|
11
|
-
## - number of teams e.g. 20
|
12
|
-
## - matches played e.g. 380
|
13
|
-
## - goals scored e.g. 937
|
14
|
-
## etc.
|
15
|
-
|
16
|
-
attr_reader :league,
|
17
|
-
:season,
|
18
|
-
:teams,
|
19
|
-
:matches,
|
20
|
-
:goals,
|
21
|
-
:start_date,
|
22
|
-
:end_date
|
23
|
-
|
24
|
-
def initialize( league:, season:,
|
25
|
-
start_date: nil, end_date: nil,
|
26
|
-
teams: nil,
|
27
|
-
matches: nil,
|
28
|
-
goals: nil )
|
29
|
-
|
30
|
-
@league = league
|
31
|
-
@season = season
|
32
|
-
|
33
|
-
@start_date = start_date
|
34
|
-
@end_date = end_date
|
35
|
-
|
36
|
-
@teams = teams ## todo/check: rename/use teams_count ??
|
37
|
-
@matches = matches ## todo/check: rename/use match_count ??
|
38
|
-
@goals = goals
|
39
|
-
end
|
40
|
-
|
41
|
-
def include?( date )
|
42
|
-
## todo/fix: add options e.g.
|
43
|
-
## - add delta/off_by_one or such?
|
44
|
-
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
45
|
-
|
46
|
-
### note: for now allow off by one error (via timezone/local time errors)
|
47
|
-
## todo/fix: issue warning if off by one!!!!
|
48
|
-
if @start_date && @end_date
|
49
|
-
date >= (@start_date-1) &&
|
50
|
-
date <= (@end_date+1)
|
51
|
-
else
|
52
|
-
if @season.year?
|
53
|
-
# assume generic rule
|
54
|
-
## same year e.g. Jan 1 - Dec 31; always true for now
|
55
|
-
date.year == @season.start_year
|
56
|
-
else
|
57
|
-
# assume generic rule
|
58
|
-
## July 1 - June 30 (Y+1)
|
59
|
-
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
60
|
-
date >= Date.new( @season.start_year, 7, 1 ) &&
|
61
|
-
date <= Date.new( @season.end_year, 6, 30 )
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end # method include?
|
65
|
-
alias_method :between?, :include?
|
66
|
-
end # class EventInfo
|
67
|
-
|
68
|
-
|
69
4
|
class EventInfoReader
|
70
5
|
def catalog() Import.catalog; end
|
71
6
|
|
@@ -617,9 +617,11 @@ class GoalStruct
|
|
617
617
|
|
618
618
|
elsif node_type == :'@' ||
|
619
619
|
node_type == :',' ||
|
620
|
-
node_type == :geo
|
620
|
+
node_type == :geo ||
|
621
|
+
node_type == :timezone
|
621
622
|
## e.g.
|
622
623
|
## [:"@"], [:geo, "Stade de France"], [:","], [:geo, "Saint-Denis"]]
|
624
|
+
## [:"@"], [:geo, "Arena de São Paulo"], [:","], [:geo, "São Paulo"], [:timezone, "(UTC-3)"]
|
623
625
|
more << node[1] if node_type == :geo
|
624
626
|
else
|
625
627
|
puts "!! PARSE ERROR - unexpected node type #{node_type} in match line; got:"
|
data/lib/sportdb/formats.rb
CHANGED
@@ -30,8 +30,6 @@ require_relative 'formats/datafile'
|
|
30
30
|
require_relative 'formats/datafile_package'
|
31
31
|
require_relative 'formats/package'
|
32
32
|
|
33
|
-
require_relative 'formats/name_helper'
|
34
|
-
|
35
33
|
|
36
34
|
|
37
35
|
## let's put test configuration in its own namespace / module
|
@@ -151,6 +149,8 @@ module SportDb
|
|
151
149
|
|
152
150
|
Player = ::Sports::Player
|
153
151
|
|
152
|
+
EventInfo = ::Sports::EventInfo
|
153
|
+
|
154
154
|
|
155
155
|
class Team
|
156
156
|
## add convenience lookup helper / method for name by season for now
|
@@ -243,4 +243,75 @@ require_relative 'formats/ground/ground_reader'
|
|
243
243
|
|
244
244
|
|
245
245
|
|
246
|
+
|
247
|
+
|
248
|
+
### auto-configure builtin lookups via catalog.db(s)
|
249
|
+
require 'sportdb/catalogs'
|
250
|
+
|
251
|
+
|
252
|
+
module SportDb
|
253
|
+
module Import
|
254
|
+
|
255
|
+
class Configuration
|
256
|
+
## note: add more configs (open class), see sportdb-structs for original config!!!
|
257
|
+
|
258
|
+
###
|
259
|
+
# find a better name for setting - why? why not?
|
260
|
+
# how about catalogdb or ???
|
261
|
+
attr_reader :catalog_path
|
262
|
+
def catalog_path=(path)
|
263
|
+
@catalog_path = path
|
264
|
+
########
|
265
|
+
# reset database here to new path
|
266
|
+
CatalogDb::Metal::Record.database = path
|
267
|
+
|
268
|
+
## plus automagically set world search too (to use CatalogDb)
|
269
|
+
self.world = WorldSearch.new(
|
270
|
+
countries: CatalogDb::Metal::Country,
|
271
|
+
cities: CatalogDb::Metal::City,
|
272
|
+
)
|
273
|
+
|
274
|
+
@catalog_path
|
275
|
+
end
|
276
|
+
|
277
|
+
def catalog
|
278
|
+
@catalog ||= SportSearch.new(
|
279
|
+
leagues: CatalogDb::Metal::League,
|
280
|
+
national_teams: CatalogDb::Metal::NationalTeam,
|
281
|
+
clubs: CatalogDb::Metal::Club,
|
282
|
+
grounds: CatalogDb::Metal::Ground,
|
283
|
+
events: CatalogDb::Metal::EventInfo,
|
284
|
+
players: CatalogDb::Metal::Player, # note - via players.db !!!
|
285
|
+
)
|
286
|
+
end
|
287
|
+
|
288
|
+
###
|
289
|
+
# find a better name for setting - why? why not?
|
290
|
+
# how about playersdb or ???
|
291
|
+
attr_reader :players_path
|
292
|
+
def players_path=(path)
|
293
|
+
@players_path = path
|
294
|
+
########
|
295
|
+
# reset database here to new path
|
296
|
+
CatalogDb::Metal::PlayerRecord.database = path
|
297
|
+
|
298
|
+
@players_path
|
299
|
+
end
|
300
|
+
end # class Configuration
|
301
|
+
|
302
|
+
|
303
|
+
## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
|
304
|
+
def self.catalog() config.catalog; end
|
305
|
+
end # module Import
|
306
|
+
end # module SportDb
|
307
|
+
|
308
|
+
|
309
|
+
###
|
310
|
+
## add default/built-in catalog here - why? why not?
|
311
|
+
## todo/fix - set catalog_path on demand
|
312
|
+
## note: for now required for world search setup etc.
|
313
|
+
SportDb::Import.config.catalog_path = "#{FootballDb::Data.data_dir}/catalog.db"
|
314
|
+
|
315
|
+
|
316
|
+
|
246
317
|
puts SportDb::Module::Formats.banner # say hello
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-08-
|
11
|
+
date: 2024-08-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sportdb-structs
|
@@ -16,28 +16,42 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.3.
|
19
|
+
version: 0.3.1
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.3.
|
26
|
+
version: 0.3.1
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: sportdb-parser
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.2.
|
33
|
+
version: 0.2.1
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.2.
|
40
|
+
version: 0.2.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: sportdb-catalogs
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.2.2
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.2.2
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: date-formats
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -130,7 +144,8 @@ dependencies:
|
|
130
144
|
version: '4.1'
|
131
145
|
description: sportdb-formats - sport.db format and text utilities
|
132
146
|
email: gerald.bauer@gmail.com
|
133
|
-
executables:
|
147
|
+
executables:
|
148
|
+
- fbx
|
134
149
|
extensions: []
|
135
150
|
extra_rdoc_files:
|
136
151
|
- CHANGELOG.md
|
@@ -141,6 +156,7 @@ files:
|
|
141
156
|
- Manifest.txt
|
142
157
|
- README.md
|
143
158
|
- Rakefile
|
159
|
+
- bin/fbx
|
144
160
|
- lib/sportdb/formats.rb
|
145
161
|
- lib/sportdb/formats/country/country_reader.rb
|
146
162
|
- lib/sportdb/formats/csv/goal.rb
|
@@ -155,7 +171,6 @@ files:
|
|
155
171
|
- lib/sportdb/formats/league/league_reader.rb
|
156
172
|
- lib/sportdb/formats/match/conf_parser.rb
|
157
173
|
- lib/sportdb/formats/match/match_parser.rb
|
158
|
-
- lib/sportdb/formats/name_helper.rb
|
159
174
|
- lib/sportdb/formats/package.rb
|
160
175
|
- lib/sportdb/formats/search/sport.rb
|
161
176
|
- lib/sportdb/formats/search/structs.rb
|
@@ -1,87 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module NameHelper
|
4
|
-
|
5
|
-
|
6
|
-
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
-
## for marking missing (to be filled in) years
|
8
|
-
## e.g. (1887-1911), (-2013),
|
9
|
-
## (1946-2001, 2013-) etc.
|
10
|
-
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
-
YEAR_RE = %r{\(
|
12
|
-
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
-
\)}x
|
14
|
-
|
15
|
-
def strip_year( name )
|
16
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
-
## (1946-2001, 2013-) etc.
|
18
|
-
## todo/check: only sub once (not global) - why? why not?
|
19
|
-
name.gsub( YEAR_RE, '' ).strip
|
20
|
-
end
|
21
|
-
|
22
|
-
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
-
|
24
|
-
|
25
|
-
LANG_RE = %r{\[
|
26
|
-
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
-
\]}x
|
28
|
-
def strip_lang( name )
|
29
|
-
name.gsub( LANG_RE, '' ).strip
|
30
|
-
end
|
31
|
-
|
32
|
-
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
-
|
34
|
-
|
35
|
-
def sanitize( name )
|
36
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
-
## (1946-2001,2013-) etc.
|
38
|
-
name = strip_year( name )
|
39
|
-
## check lang codes e.g. [en], [fr], etc.
|
40
|
-
name = strip_lang( name )
|
41
|
-
name
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
## note: also add (),’,− etc. e.g.
|
46
|
-
## Estudiantes (LP) => Estudiantes LP
|
47
|
-
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
-
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
-
##
|
50
|
-
## add & too!!
|
51
|
-
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
52
|
-
|
53
|
-
NORM_RE = %r{
|
54
|
-
[.'’º/()&_−-]
|
55
|
-
}x # note: in [] dash (-) if last doesn't need to get escaped
|
56
|
-
## note: remove all dots (.), dash (-), ', º, /, etc.
|
57
|
-
# . U+002E (46) - FULL STOP
|
58
|
-
# ' U+0027 (39) - APOSTROPHE
|
59
|
-
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
60
|
-
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
61
|
-
# / U+002F (47) - SOLIDUS
|
62
|
-
# ( U+0028 (40) - LEFT PARENTHESIS
|
63
|
-
# ) U+0029 (41) - RIGHT PARENTHESIS
|
64
|
-
# − U+2212 (8722) - MINUS SIGN
|
65
|
-
# - U+002D (45) - HYPHEN-MINUS
|
66
|
-
|
67
|
-
## for norm(alizing) names
|
68
|
-
def strip_norm( name )
|
69
|
-
name.gsub( NORM_RE, '' )
|
70
|
-
end
|
71
|
-
|
72
|
-
def normalize( name )
|
73
|
-
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
74
|
-
name = strip_norm( name )
|
75
|
-
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
76
|
-
|
77
|
-
## todo/check: use our own downcase - why? why not?
|
78
|
-
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
79
|
-
name
|
80
|
-
end
|
81
|
-
|
82
|
-
|
83
|
-
def variants( name ) Variant.find( name ); end
|
84
|
-
|
85
|
-
end # module NameHelper
|
86
|
-
end # module SportDb
|
87
|
-
|