sportdb-structs 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/sportdb/structs/config.rb +39 -39
- data/lib/sportdb/structs/goal_parser_csv.rb +28 -28
- data/lib/sportdb/structs/match_status_parser.rb +90 -90
- data/lib/sportdb/structs/name_helper.rb +87 -87
- data/lib/sportdb/structs/structs/goal.rb +231 -231
- data/lib/sportdb/structs/structs/match.rb +1 -1
- data/lib/sportdb/structs/structs/standings.rb +271 -271
- data/lib/sportdb/structs/structs/team.rb +1 -1
- data/lib/sportdb/structs/structs/team_usage.rb +84 -84
- data/lib/sportdb/structs/version.rb +1 -1
- data/test/helper.rb +13 -13
- data/test/test_match_status_parser.rb +57 -57
- metadata +8 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9569b6727737da1a3c241e4acae88a19ea61e7cc132e752b1d391628424db916
|
4
|
+
data.tar.gz: 37ad5ee8f954dfdb3221060b500eed898461ce957ed257d2dd58e6b6467bae7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4eb9e97280e9a390f188409fd8dab427b96dcc9e0ed9d81de20546254dc02903b9d7c28ab22efbaea0fb7f13cf9ea685c81ab1f88fc67cd69f9d8b017579d3ab
|
7
|
+
data.tar.gz: 4bcb33bdc129b67439817eea7204bd42aaf26a20b0b1962a046eb182d8d48746b38452aa5a177a01c7f522e3bcab203053778d42e75212bde5128405eeb37bf0
|
@@ -1,39 +1,39 @@
|
|
1
|
-
module SportDb
|
2
|
-
module Import
|
3
|
-
|
4
|
-
class Configuration
|
5
|
-
##
|
6
|
-
## todo: allow configure of countries_dir like clubs_dir
|
7
|
-
## "fallback" and use a default built-in world/countries.txt
|
8
|
-
|
9
|
-
attr_accessor :catalog
|
10
|
-
|
11
|
-
attr_reader :lang
|
12
|
-
def lang=(value)
|
13
|
-
## check/todo: always use to_sym - why? needed?
|
14
|
-
DateFormats.lang = value
|
15
|
-
ScoreFormats.lang = value
|
16
|
-
SportDb.lang.lang = value
|
17
|
-
|
18
|
-
## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
|
19
|
-
## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
|
20
|
-
end
|
21
|
-
|
22
|
-
end # class Configuration
|
23
|
-
|
24
|
-
|
25
|
-
## lets you use
|
26
|
-
## SportDb::Import.configure do |config|
|
27
|
-
## config.lang = 'it'
|
28
|
-
## end
|
29
|
-
|
30
|
-
def self.configure() yield( config ); end
|
31
|
-
|
32
|
-
def self.config() @config ||= Configuration.new; end
|
33
|
-
|
34
|
-
## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
|
35
|
-
def self.catalog() config.catalog; end
|
36
|
-
|
37
|
-
end # module Import
|
38
|
-
end # module SportDb
|
39
|
-
|
1
|
+
module SportDb
|
2
|
+
module Import
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
##
|
6
|
+
## todo: allow configure of countries_dir like clubs_dir
|
7
|
+
## "fallback" and use a default built-in world/countries.txt
|
8
|
+
|
9
|
+
attr_accessor :catalog
|
10
|
+
|
11
|
+
attr_reader :lang
|
12
|
+
def lang=(value)
|
13
|
+
## check/todo: always use to_sym - why? needed?
|
14
|
+
DateFormats.lang = value
|
15
|
+
ScoreFormats.lang = value
|
16
|
+
SportDb.lang.lang = value
|
17
|
+
|
18
|
+
## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
|
19
|
+
## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
|
20
|
+
end
|
21
|
+
|
22
|
+
end # class Configuration
|
23
|
+
|
24
|
+
|
25
|
+
## lets you use
|
26
|
+
## SportDb::Import.configure do |config|
|
27
|
+
## config.lang = 'it'
|
28
|
+
## end
|
29
|
+
|
30
|
+
def self.configure() yield( config ); end
|
31
|
+
|
32
|
+
def self.config() @config ||= Configuration.new; end
|
33
|
+
|
34
|
+
## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
|
35
|
+
def self.catalog() config.catalog; end
|
36
|
+
|
37
|
+
end # module Import
|
38
|
+
end # module SportDb
|
39
|
+
|
@@ -1,28 +1,28 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
class CsvGoalParser
|
4
|
-
|
5
|
-
|
6
|
-
def self.read( path )
|
7
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
-
parse( txt )
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.parse( txt )
|
12
|
-
new( txt ).parse
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
def initialize( txt )
|
17
|
-
@txt = txt
|
18
|
-
end
|
19
|
-
|
20
|
-
def parse
|
21
|
-
rows = parse_csv( @txt )
|
22
|
-
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
-
## pp recs[0]
|
24
|
-
recs
|
25
|
-
end
|
26
|
-
|
27
|
-
end # class CsvGoalParser
|
28
|
-
end # module Sports
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvGoalParser
|
4
|
+
|
5
|
+
|
6
|
+
def self.read( path )
|
7
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
+
parse( txt )
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.parse( txt )
|
12
|
+
new( txt ).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse
|
21
|
+
rows = parse_csv( @txt )
|
22
|
+
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
+
## pp recs[0]
|
24
|
+
recs
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class CsvGoalParser
|
28
|
+
end # module Sports
|
@@ -1,90 +1,90 @@
|
|
1
|
-
#####################
|
2
|
-
# helpers for parsing & finding match status e.g.
|
3
|
-
# - cancelled / canceled
|
4
|
-
# - awarded
|
5
|
-
# - abandoned
|
6
|
-
# - replay
|
7
|
-
# etc.
|
8
|
-
|
9
|
-
|
10
|
-
module SportDb
|
11
|
-
|
12
|
-
|
13
|
-
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
-
|
15
|
-
class Status
|
16
|
-
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
-
# move class into Match e.g. Match::Status - why? why not?
|
18
|
-
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
-
AWARDED = 'AWARDED'
|
20
|
-
POSTPONED = 'POSTPONED'
|
21
|
-
ABANDONED = 'ABANDONED'
|
22
|
-
REPLAY = 'REPLAY'
|
23
|
-
end # class Status
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
class StatusParser
|
28
|
-
|
29
|
-
def self.parse( str )
|
30
|
-
## note: returns nil if no match found
|
31
|
-
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
-
if str =~ /^(cancelled|
|
33
|
-
canceled|
|
34
|
-
can\.
|
35
|
-
)/xi
|
36
|
-
Status::CANCELLED
|
37
|
-
elsif str =~ /^(awarded|
|
38
|
-
awd\.
|
39
|
-
)/xi
|
40
|
-
Status::AWARDED
|
41
|
-
elsif str =~ /^(postponed
|
42
|
-
)/xi
|
43
|
-
Status::POSTPONED
|
44
|
-
elsif str =~ /^(abandoned|
|
45
|
-
abd\.
|
46
|
-
)/xi
|
47
|
-
Status::ABANDONED
|
48
|
-
elsif str =~ /^(replay
|
49
|
-
)/xi
|
50
|
-
Status::REPLAY
|
51
|
-
else
|
52
|
-
# no match
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
RUN_RE = /\[
|
59
|
-
(?<text>[^\]]+)
|
60
|
-
\]
|
61
|
-
/x
|
62
|
-
def self.find!( line )
|
63
|
-
## for now check all "protected" text run blocks e.g. []
|
64
|
-
## puts "line: >#{line}<"
|
65
|
-
|
66
|
-
status = nil
|
67
|
-
|
68
|
-
str = line
|
69
|
-
while m = str.match( RUN_RE )
|
70
|
-
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
-
|
72
|
-
## check for status match
|
73
|
-
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
-
text = m[:text].strip
|
75
|
-
## puts " text: >#{text}<"
|
76
|
-
|
77
|
-
status = parse( text )
|
78
|
-
|
79
|
-
if status
|
80
|
-
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
-
break
|
82
|
-
end
|
83
|
-
end # while match
|
84
|
-
|
85
|
-
status
|
86
|
-
end # method find!
|
87
|
-
end # class StatusParser
|
88
|
-
|
89
|
-
end # module SportDb
|
90
|
-
|
1
|
+
#####################
|
2
|
+
# helpers for parsing & finding match status e.g.
|
3
|
+
# - cancelled / canceled
|
4
|
+
# - awarded
|
5
|
+
# - abandoned
|
6
|
+
# - replay
|
7
|
+
# etc.
|
8
|
+
|
9
|
+
|
10
|
+
module SportDb
|
11
|
+
|
12
|
+
|
13
|
+
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
+
|
15
|
+
class Status
|
16
|
+
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
+
# move class into Match e.g. Match::Status - why? why not?
|
18
|
+
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
+
AWARDED = 'AWARDED'
|
20
|
+
POSTPONED = 'POSTPONED'
|
21
|
+
ABANDONED = 'ABANDONED'
|
22
|
+
REPLAY = 'REPLAY'
|
23
|
+
end # class Status
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
class StatusParser
|
28
|
+
|
29
|
+
def self.parse( str )
|
30
|
+
## note: returns nil if no match found
|
31
|
+
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
+
if str =~ /^(cancelled|
|
33
|
+
canceled|
|
34
|
+
can\.
|
35
|
+
)/xi
|
36
|
+
Status::CANCELLED
|
37
|
+
elsif str =~ /^(awarded|
|
38
|
+
awd\.
|
39
|
+
)/xi
|
40
|
+
Status::AWARDED
|
41
|
+
elsif str =~ /^(postponed
|
42
|
+
)/xi
|
43
|
+
Status::POSTPONED
|
44
|
+
elsif str =~ /^(abandoned|
|
45
|
+
abd\.
|
46
|
+
)/xi
|
47
|
+
Status::ABANDONED
|
48
|
+
elsif str =~ /^(replay
|
49
|
+
)/xi
|
50
|
+
Status::REPLAY
|
51
|
+
else
|
52
|
+
# no match
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
RUN_RE = /\[
|
59
|
+
(?<text>[^\]]+)
|
60
|
+
\]
|
61
|
+
/x
|
62
|
+
def self.find!( line )
|
63
|
+
## for now check all "protected" text run blocks e.g. []
|
64
|
+
## puts "line: >#{line}<"
|
65
|
+
|
66
|
+
status = nil
|
67
|
+
|
68
|
+
str = line
|
69
|
+
while m = str.match( RUN_RE )
|
70
|
+
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
+
|
72
|
+
## check for status match
|
73
|
+
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
+
text = m[:text].strip
|
75
|
+
## puts " text: >#{text}<"
|
76
|
+
|
77
|
+
status = parse( text )
|
78
|
+
|
79
|
+
if status
|
80
|
+
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end # while match
|
84
|
+
|
85
|
+
status
|
86
|
+
end # method find!
|
87
|
+
end # class StatusParser
|
88
|
+
|
89
|
+
end # module SportDb
|
90
|
+
|
@@ -1,87 +1,87 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module NameHelper
|
4
|
-
|
5
|
-
|
6
|
-
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
-
## for marking missing (to be filled in) years
|
8
|
-
## e.g. (1887-1911), (-2013),
|
9
|
-
## (1946-2001, 2013-) etc.
|
10
|
-
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
-
YEAR_RE = %r{\(
|
12
|
-
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
-
\)}x
|
14
|
-
|
15
|
-
def strip_year( name )
|
16
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
-
## (1946-2001, 2013-) etc.
|
18
|
-
## todo/check: only sub once (not global) - why? why not?
|
19
|
-
name.gsub( YEAR_RE, '' ).strip
|
20
|
-
end
|
21
|
-
|
22
|
-
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
-
|
24
|
-
|
25
|
-
LANG_RE = %r{\[
|
26
|
-
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
-
\]}x
|
28
|
-
def strip_lang( name )
|
29
|
-
name.gsub( LANG_RE, '' ).strip
|
30
|
-
end
|
31
|
-
|
32
|
-
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
-
|
34
|
-
|
35
|
-
def sanitize( name )
|
36
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
-
## (1946-2001,2013-) etc.
|
38
|
-
name = strip_year( name )
|
39
|
-
## check lang codes e.g. [en], [fr], etc.
|
40
|
-
name = strip_lang( name )
|
41
|
-
name
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
## note: also add (),’,− etc. e.g.
|
46
|
-
## Estudiantes (LP) => Estudiantes LP
|
47
|
-
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
-
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
-
##
|
50
|
-
## add & too!!
|
51
|
-
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
52
|
-
|
53
|
-
NORM_RE = %r{
|
54
|
-
[.'’º/()&_−-]
|
55
|
-
}x # note: in [] dash (-) if last doesn't need to get escaped
|
56
|
-
## note: remove all dots (.), dash (-), ', º, /, etc.
|
57
|
-
# . U+002E (46) - FULL STOP
|
58
|
-
# ' U+0027 (39) - APOSTROPHE
|
59
|
-
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
60
|
-
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
61
|
-
# / U+002F (47) - SOLIDUS
|
62
|
-
# ( U+0028 (40) - LEFT PARENTHESIS
|
63
|
-
# ) U+0029 (41) - RIGHT PARENTHESIS
|
64
|
-
# − U+2212 (8722) - MINUS SIGN
|
65
|
-
# - U+002D (45) - HYPHEN-MINUS
|
66
|
-
|
67
|
-
## for norm(alizing) names
|
68
|
-
def strip_norm( name )
|
69
|
-
name.gsub( NORM_RE, '' )
|
70
|
-
end
|
71
|
-
|
72
|
-
def normalize( name )
|
73
|
-
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
74
|
-
name = strip_norm( name )
|
75
|
-
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
76
|
-
|
77
|
-
## todo/check: use our own downcase - why? why not?
|
78
|
-
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
79
|
-
name
|
80
|
-
end
|
81
|
-
|
82
|
-
|
83
|
-
def variants( name ) Variant.find( name ); end
|
84
|
-
|
85
|
-
end # module NameHelper
|
86
|
-
end # module SportDb
|
87
|
-
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module NameHelper
|
4
|
+
|
5
|
+
|
6
|
+
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
+
## for marking missing (to be filled in) years
|
8
|
+
## e.g. (1887-1911), (-2013),
|
9
|
+
## (1946-2001, 2013-) etc.
|
10
|
+
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
+
YEAR_RE = %r{\(
|
12
|
+
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
+
\)}x
|
14
|
+
|
15
|
+
def strip_year( name )
|
16
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
+
## (1946-2001, 2013-) etc.
|
18
|
+
## todo/check: only sub once (not global) - why? why not?
|
19
|
+
name.gsub( YEAR_RE, '' ).strip
|
20
|
+
end
|
21
|
+
|
22
|
+
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
+
|
24
|
+
|
25
|
+
LANG_RE = %r{\[
|
26
|
+
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
+
\]}x
|
28
|
+
def strip_lang( name )
|
29
|
+
name.gsub( LANG_RE, '' ).strip
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
+
|
34
|
+
|
35
|
+
def sanitize( name )
|
36
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
+
## (1946-2001,2013-) etc.
|
38
|
+
name = strip_year( name )
|
39
|
+
## check lang codes e.g. [en], [fr], etc.
|
40
|
+
name = strip_lang( name )
|
41
|
+
name
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
## note: also add (),’,− etc. e.g.
|
46
|
+
## Estudiantes (LP) => Estudiantes LP
|
47
|
+
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
+
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
+
##
|
50
|
+
## add & too!!
|
51
|
+
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
52
|
+
|
53
|
+
NORM_RE = %r{
|
54
|
+
[.'’º/()&_−-]
|
55
|
+
}x # note: in [] dash (-) if last doesn't need to get escaped
|
56
|
+
## note: remove all dots (.), dash (-), ', º, /, etc.
|
57
|
+
# . U+002E (46) - FULL STOP
|
58
|
+
# ' U+0027 (39) - APOSTROPHE
|
59
|
+
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
60
|
+
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
61
|
+
# / U+002F (47) - SOLIDUS
|
62
|
+
# ( U+0028 (40) - LEFT PARENTHESIS
|
63
|
+
# ) U+0029 (41) - RIGHT PARENTHESIS
|
64
|
+
# − U+2212 (8722) - MINUS SIGN
|
65
|
+
# - U+002D (45) - HYPHEN-MINUS
|
66
|
+
|
67
|
+
## for norm(alizing) names
|
68
|
+
def strip_norm( name )
|
69
|
+
name.gsub( NORM_RE, '' )
|
70
|
+
end
|
71
|
+
|
72
|
+
def normalize( name )
|
73
|
+
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
74
|
+
name = strip_norm( name )
|
75
|
+
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
76
|
+
|
77
|
+
## todo/check: use our own downcase - why? why not?
|
78
|
+
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
79
|
+
name
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
def variants( name ) Variant.find( name ); end
|
84
|
+
|
85
|
+
end # module NameHelper
|
86
|
+
end # module SportDb
|
87
|
+
|