sportdb-structs 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/sportdb/structs/config.rb +39 -39
- data/lib/sportdb/structs/goal_parser_csv.rb +28 -28
- data/lib/sportdb/structs/match_status_parser.rb +90 -90
- data/lib/sportdb/structs/name_helper.rb +87 -87
- data/lib/sportdb/structs/structs/goal.rb +231 -231
- data/lib/sportdb/structs/structs/match.rb +1 -1
- data/lib/sportdb/structs/structs/standings.rb +271 -271
- data/lib/sportdb/structs/structs/team.rb +1 -1
- data/lib/sportdb/structs/structs/team_usage.rb +84 -84
- data/lib/sportdb/structs/version.rb +1 -1
- data/test/helper.rb +13 -13
- data/test/test_match_status_parser.rb +57 -57
- metadata +8 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9569b6727737da1a3c241e4acae88a19ea61e7cc132e752b1d391628424db916
|
4
|
+
data.tar.gz: 37ad5ee8f954dfdb3221060b500eed898461ce957ed257d2dd58e6b6467bae7c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4eb9e97280e9a390f188409fd8dab427b96dcc9e0ed9d81de20546254dc02903b9d7c28ab22efbaea0fb7f13cf9ea685c81ab1f88fc67cd69f9d8b017579d3ab
|
7
|
+
data.tar.gz: 4bcb33bdc129b67439817eea7204bd42aaf26a20b0b1962a046eb182d8d48746b38452aa5a177a01c7f522e3bcab203053778d42e75212bde5128405eeb37bf0
|
@@ -1,39 +1,39 @@
|
|
1
|
-
module SportDb
|
2
|
-
module Import
|
3
|
-
|
4
|
-
class Configuration
|
5
|
-
##
|
6
|
-
## todo: allow configure of countries_dir like clubs_dir
|
7
|
-
## "fallback" and use a default built-in world/countries.txt
|
8
|
-
|
9
|
-
attr_accessor :catalog
|
10
|
-
|
11
|
-
attr_reader :lang
|
12
|
-
def lang=(value)
|
13
|
-
## check/todo: always use to_sym - why? needed?
|
14
|
-
DateFormats.lang = value
|
15
|
-
ScoreFormats.lang = value
|
16
|
-
SportDb.lang.lang = value
|
17
|
-
|
18
|
-
## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
|
19
|
-
## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
|
20
|
-
end
|
21
|
-
|
22
|
-
end # class Configuration
|
23
|
-
|
24
|
-
|
25
|
-
## lets you use
|
26
|
-
## SportDb::Import.configure do |config|
|
27
|
-
## config.lang = 'it'
|
28
|
-
## end
|
29
|
-
|
30
|
-
def self.configure() yield( config ); end
|
31
|
-
|
32
|
-
def self.config() @config ||= Configuration.new; end
|
33
|
-
|
34
|
-
## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
|
35
|
-
def self.catalog() config.catalog; end
|
36
|
-
|
37
|
-
end # module Import
|
38
|
-
end # module SportDb
|
39
|
-
|
1
|
+
module SportDb
|
2
|
+
module Import
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
##
|
6
|
+
## todo: allow configure of countries_dir like clubs_dir
|
7
|
+
## "fallback" and use a default built-in world/countries.txt
|
8
|
+
|
9
|
+
attr_accessor :catalog
|
10
|
+
|
11
|
+
attr_reader :lang
|
12
|
+
def lang=(value)
|
13
|
+
## check/todo: always use to_sym - why? needed?
|
14
|
+
DateFormats.lang = value
|
15
|
+
ScoreFormats.lang = value
|
16
|
+
SportDb.lang.lang = value
|
17
|
+
|
18
|
+
## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
|
19
|
+
## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
|
20
|
+
end
|
21
|
+
|
22
|
+
end # class Configuration
|
23
|
+
|
24
|
+
|
25
|
+
## lets you use
|
26
|
+
## SportDb::Import.configure do |config|
|
27
|
+
## config.lang = 'it'
|
28
|
+
## end
|
29
|
+
|
30
|
+
def self.configure() yield( config ); end
|
31
|
+
|
32
|
+
def self.config() @config ||= Configuration.new; end
|
33
|
+
|
34
|
+
## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
|
35
|
+
def self.catalog() config.catalog; end
|
36
|
+
|
37
|
+
end # module Import
|
38
|
+
end # module SportDb
|
39
|
+
|
@@ -1,28 +1,28 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
class CsvGoalParser
|
4
|
-
|
5
|
-
|
6
|
-
def self.read( path )
|
7
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
-
parse( txt )
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.parse( txt )
|
12
|
-
new( txt ).parse
|
13
|
-
end
|
14
|
-
|
15
|
-
|
16
|
-
def initialize( txt )
|
17
|
-
@txt = txt
|
18
|
-
end
|
19
|
-
|
20
|
-
def parse
|
21
|
-
rows = parse_csv( @txt )
|
22
|
-
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
-
## pp recs[0]
|
24
|
-
recs
|
25
|
-
end
|
26
|
-
|
27
|
-
end # class CsvGoalParser
|
28
|
-
end # module Sports
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvGoalParser
|
4
|
+
|
5
|
+
|
6
|
+
def self.read( path )
|
7
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
+
parse( txt )
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.parse( txt )
|
12
|
+
new( txt ).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse
|
21
|
+
rows = parse_csv( @txt )
|
22
|
+
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
+
## pp recs[0]
|
24
|
+
recs
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class CsvGoalParser
|
28
|
+
end # module Sports
|
@@ -1,90 +1,90 @@
|
|
1
|
-
#####################
|
2
|
-
# helpers for parsing & finding match status e.g.
|
3
|
-
# - cancelled / canceled
|
4
|
-
# - awarded
|
5
|
-
# - abandoned
|
6
|
-
# - replay
|
7
|
-
# etc.
|
8
|
-
|
9
|
-
|
10
|
-
module SportDb
|
11
|
-
|
12
|
-
|
13
|
-
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
-
|
15
|
-
class Status
|
16
|
-
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
-
# move class into Match e.g. Match::Status - why? why not?
|
18
|
-
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
-
AWARDED = 'AWARDED'
|
20
|
-
POSTPONED = 'POSTPONED'
|
21
|
-
ABANDONED = 'ABANDONED'
|
22
|
-
REPLAY = 'REPLAY'
|
23
|
-
end # class Status
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
class StatusParser
|
28
|
-
|
29
|
-
def self.parse( str )
|
30
|
-
## note: returns nil if no match found
|
31
|
-
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
-
if str =~ /^(cancelled|
|
33
|
-
canceled|
|
34
|
-
can\.
|
35
|
-
)/xi
|
36
|
-
Status::CANCELLED
|
37
|
-
elsif str =~ /^(awarded|
|
38
|
-
awd\.
|
39
|
-
)/xi
|
40
|
-
Status::AWARDED
|
41
|
-
elsif str =~ /^(postponed
|
42
|
-
)/xi
|
43
|
-
Status::POSTPONED
|
44
|
-
elsif str =~ /^(abandoned|
|
45
|
-
abd\.
|
46
|
-
)/xi
|
47
|
-
Status::ABANDONED
|
48
|
-
elsif str =~ /^(replay
|
49
|
-
)/xi
|
50
|
-
Status::REPLAY
|
51
|
-
else
|
52
|
-
# no match
|
53
|
-
nil
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
RUN_RE = /\[
|
59
|
-
(?<text>[^\]]+)
|
60
|
-
\]
|
61
|
-
/x
|
62
|
-
def self.find!( line )
|
63
|
-
## for now check all "protected" text run blocks e.g. []
|
64
|
-
## puts "line: >#{line}<"
|
65
|
-
|
66
|
-
status = nil
|
67
|
-
|
68
|
-
str = line
|
69
|
-
while m = str.match( RUN_RE )
|
70
|
-
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
-
|
72
|
-
## check for status match
|
73
|
-
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
-
text = m[:text].strip
|
75
|
-
## puts " text: >#{text}<"
|
76
|
-
|
77
|
-
status = parse( text )
|
78
|
-
|
79
|
-
if status
|
80
|
-
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
-
break
|
82
|
-
end
|
83
|
-
end # while match
|
84
|
-
|
85
|
-
status
|
86
|
-
end # method find!
|
87
|
-
end # class StatusParser
|
88
|
-
|
89
|
-
end # module SportDb
|
90
|
-
|
1
|
+
#####################
|
2
|
+
# helpers for parsing & finding match status e.g.
|
3
|
+
# - cancelled / canceled
|
4
|
+
# - awarded
|
5
|
+
# - abandoned
|
6
|
+
# - replay
|
7
|
+
# etc.
|
8
|
+
|
9
|
+
|
10
|
+
module SportDb
|
11
|
+
|
12
|
+
|
13
|
+
### todo/fix: move Status inside Match struct - why? why not?
|
14
|
+
|
15
|
+
class Status
|
16
|
+
# note: use a class as an "enum"-like namespace for now - why? why not?
|
17
|
+
# move class into Match e.g. Match::Status - why? why not?
|
18
|
+
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
19
|
+
AWARDED = 'AWARDED'
|
20
|
+
POSTPONED = 'POSTPONED'
|
21
|
+
ABANDONED = 'ABANDONED'
|
22
|
+
REPLAY = 'REPLAY'
|
23
|
+
end # class Status
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
class StatusParser
|
28
|
+
|
29
|
+
def self.parse( str )
|
30
|
+
## note: returns nil if no match found
|
31
|
+
## note: english usage - cancelled (in UK), canceled (in US)
|
32
|
+
if str =~ /^(cancelled|
|
33
|
+
canceled|
|
34
|
+
can\.
|
35
|
+
)/xi
|
36
|
+
Status::CANCELLED
|
37
|
+
elsif str =~ /^(awarded|
|
38
|
+
awd\.
|
39
|
+
)/xi
|
40
|
+
Status::AWARDED
|
41
|
+
elsif str =~ /^(postponed
|
42
|
+
)/xi
|
43
|
+
Status::POSTPONED
|
44
|
+
elsif str =~ /^(abandoned|
|
45
|
+
abd\.
|
46
|
+
)/xi
|
47
|
+
Status::ABANDONED
|
48
|
+
elsif str =~ /^(replay
|
49
|
+
)/xi
|
50
|
+
Status::REPLAY
|
51
|
+
else
|
52
|
+
# no match
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
RUN_RE = /\[
|
59
|
+
(?<text>[^\]]+)
|
60
|
+
\]
|
61
|
+
/x
|
62
|
+
def self.find!( line )
|
63
|
+
## for now check all "protected" text run blocks e.g. []
|
64
|
+
## puts "line: >#{line}<"
|
65
|
+
|
66
|
+
status = nil
|
67
|
+
|
68
|
+
str = line
|
69
|
+
while m = str.match( RUN_RE )
|
70
|
+
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
71
|
+
|
72
|
+
## check for status match
|
73
|
+
match_str = m[0] ## keep a copy of the match string (for later sub)
|
74
|
+
text = m[:text].strip
|
75
|
+
## puts " text: >#{text}<"
|
76
|
+
|
77
|
+
status = parse( text )
|
78
|
+
|
79
|
+
if status
|
80
|
+
line.sub!( match_str, "[STATUS.#{status}]" )
|
81
|
+
break
|
82
|
+
end
|
83
|
+
end # while match
|
84
|
+
|
85
|
+
status
|
86
|
+
end # method find!
|
87
|
+
end # class StatusParser
|
88
|
+
|
89
|
+
end # module SportDb
|
90
|
+
|
@@ -1,87 +1,87 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module NameHelper
|
4
|
-
|
5
|
-
|
6
|
-
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
-
## for marking missing (to be filled in) years
|
8
|
-
## e.g. (1887-1911), (-2013),
|
9
|
-
## (1946-2001, 2013-) etc.
|
10
|
-
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
-
YEAR_RE = %r{\(
|
12
|
-
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
-
\)}x
|
14
|
-
|
15
|
-
def strip_year( name )
|
16
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
-
## (1946-2001, 2013-) etc.
|
18
|
-
## todo/check: only sub once (not global) - why? why not?
|
19
|
-
name.gsub( YEAR_RE, '' ).strip
|
20
|
-
end
|
21
|
-
|
22
|
-
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
-
|
24
|
-
|
25
|
-
LANG_RE = %r{\[
|
26
|
-
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
-
\]}x
|
28
|
-
def strip_lang( name )
|
29
|
-
name.gsub( LANG_RE, '' ).strip
|
30
|
-
end
|
31
|
-
|
32
|
-
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
-
|
34
|
-
|
35
|
-
def sanitize( name )
|
36
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
-
## (1946-2001,2013-) etc.
|
38
|
-
name = strip_year( name )
|
39
|
-
## check lang codes e.g. [en], [fr], etc.
|
40
|
-
name = strip_lang( name )
|
41
|
-
name
|
42
|
-
end
|
43
|
-
|
44
|
-
|
45
|
-
## note: also add (),’,− etc. e.g.
|
46
|
-
## Estudiantes (LP) => Estudiantes LP
|
47
|
-
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
-
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
-
##
|
50
|
-
## add & too!!
|
51
|
-
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
52
|
-
|
53
|
-
NORM_RE = %r{
|
54
|
-
[.'’º/()&_−-]
|
55
|
-
}x # note: in [] dash (-) if last doesn't need to get escaped
|
56
|
-
## note: remove all dots (.), dash (-), ', º, /, etc.
|
57
|
-
# . U+002E (46) - FULL STOP
|
58
|
-
# ' U+0027 (39) - APOSTROPHE
|
59
|
-
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
60
|
-
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
61
|
-
# / U+002F (47) - SOLIDUS
|
62
|
-
# ( U+0028 (40) - LEFT PARENTHESIS
|
63
|
-
# ) U+0029 (41) - RIGHT PARENTHESIS
|
64
|
-
# − U+2212 (8722) - MINUS SIGN
|
65
|
-
# - U+002D (45) - HYPHEN-MINUS
|
66
|
-
|
67
|
-
## for norm(alizing) names
|
68
|
-
def strip_norm( name )
|
69
|
-
name.gsub( NORM_RE, '' )
|
70
|
-
end
|
71
|
-
|
72
|
-
def normalize( name )
|
73
|
-
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
74
|
-
name = strip_norm( name )
|
75
|
-
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
76
|
-
|
77
|
-
## todo/check: use our own downcase - why? why not?
|
78
|
-
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
79
|
-
name
|
80
|
-
end
|
81
|
-
|
82
|
-
|
83
|
-
def variants( name ) Variant.find( name ); end
|
84
|
-
|
85
|
-
end # module NameHelper
|
86
|
-
end # module SportDb
|
87
|
-
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module NameHelper
|
4
|
+
|
5
|
+
|
6
|
+
## note: allow placeholder years to e.g. (-___) or (-????)
|
7
|
+
## for marking missing (to be filled in) years
|
8
|
+
## e.g. (1887-1911), (-2013),
|
9
|
+
## (1946-2001, 2013-) etc.
|
10
|
+
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
11
|
+
YEAR_RE = %r{\(
|
12
|
+
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
13
|
+
\)}x
|
14
|
+
|
15
|
+
def strip_year( name )
|
16
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
17
|
+
## (1946-2001, 2013-) etc.
|
18
|
+
## todo/check: only sub once (not global) - why? why not?
|
19
|
+
name.gsub( YEAR_RE, '' ).strip
|
20
|
+
end
|
21
|
+
|
22
|
+
def has_year?( name ) name =~ YEAR_RE; end
|
23
|
+
|
24
|
+
|
25
|
+
LANG_RE = %r{\[
|
26
|
+
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
27
|
+
\]}x
|
28
|
+
def strip_lang( name )
|
29
|
+
name.gsub( LANG_RE, '' ).strip
|
30
|
+
end
|
31
|
+
|
32
|
+
def has_lang?( name ) name =~ LANG_RE; end
|
33
|
+
|
34
|
+
|
35
|
+
def sanitize( name )
|
36
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
37
|
+
## (1946-2001,2013-) etc.
|
38
|
+
name = strip_year( name )
|
39
|
+
## check lang codes e.g. [en], [fr], etc.
|
40
|
+
name = strip_lang( name )
|
41
|
+
name
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
## note: also add (),’,− etc. e.g.
|
46
|
+
## Estudiantes (LP) => Estudiantes LP
|
47
|
+
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
48
|
+
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
49
|
+
##
|
50
|
+
## add & too!!
|
51
|
+
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
52
|
+
|
53
|
+
NORM_RE = %r{
|
54
|
+
[.'’º/()&_−-]
|
55
|
+
}x # note: in [] dash (-) if last doesn't need to get escaped
|
56
|
+
## note: remove all dots (.), dash (-), ', º, /, etc.
|
57
|
+
# . U+002E (46) - FULL STOP
|
58
|
+
# ' U+0027 (39) - APOSTROPHE
|
59
|
+
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
60
|
+
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
61
|
+
# / U+002F (47) - SOLIDUS
|
62
|
+
# ( U+0028 (40) - LEFT PARENTHESIS
|
63
|
+
# ) U+0029 (41) - RIGHT PARENTHESIS
|
64
|
+
# − U+2212 (8722) - MINUS SIGN
|
65
|
+
# - U+002D (45) - HYPHEN-MINUS
|
66
|
+
|
67
|
+
## for norm(alizing) names
|
68
|
+
def strip_norm( name )
|
69
|
+
name.gsub( NORM_RE, '' )
|
70
|
+
end
|
71
|
+
|
72
|
+
def normalize( name )
|
73
|
+
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
74
|
+
name = strip_norm( name )
|
75
|
+
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
76
|
+
|
77
|
+
## todo/check: use our own downcase - why? why not?
|
78
|
+
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
79
|
+
name
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
def variants( name ) Variant.find( name ); end
|
84
|
+
|
85
|
+
end # module NameHelper
|
86
|
+
end # module SportDb
|
87
|
+
|