sportdb-structs 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 558329ee3ae366f713dc5e20cdd61de62d5f190d
4
- data.tar.gz: 05a89470a83b3ded621a2c0ef0125be847e4e14a
2
+ SHA256:
3
+ metadata.gz: 9569b6727737da1a3c241e4acae88a19ea61e7cc132e752b1d391628424db916
4
+ data.tar.gz: 37ad5ee8f954dfdb3221060b500eed898461ce957ed257d2dd58e6b6467bae7c
5
5
  SHA512:
6
- metadata.gz: dcad6e831a704229f9e7926a22b64d0b54f5855e320ef77fa48972dcd2759f241c7db837e6320981857b66cd109155385e21e92d65caf105b6296bd22751b97a
7
- data.tar.gz: 36b338d8654a1d9ffda30ea85e77caa099a71a411e38bcb69c1a11bacf0f30c6ed9daaf77bbba66e8849726a0e827844fc4ac6b4d74ae9ab9ec73e33e5175bd5
6
+ metadata.gz: 4eb9e97280e9a390f188409fd8dab427b96dcc9e0ed9d81de20546254dc02903b9d7c28ab22efbaea0fb7f13cf9ea685c81ab1f88fc67cd69f9d8b017579d3ab
7
+ data.tar.gz: 4bcb33bdc129b67439817eea7204bd42aaf26a20b0b1962a046eb182d8d48746b38452aa5a177a01c7f522e3bcab203053778d42e75212bde5128405eeb37bf0
@@ -1,39 +1,39 @@
1
- module SportDb
2
- module Import
3
-
4
- class Configuration
5
- ##
6
- ## todo: allow configure of countries_dir like clubs_dir
7
- ## "fallback" and use a default built-in world/countries.txt
8
-
9
- attr_accessor :catalog
10
-
11
- attr_reader :lang
12
- def lang=(value)
13
- ## check/todo: always use to_sym - why? needed?
14
- DateFormats.lang = value
15
- ScoreFormats.lang = value
16
- SportDb.lang.lang = value
17
-
18
- ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
19
- ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
20
- end
21
-
22
- end # class Configuration
23
-
24
-
25
- ## lets you use
26
- ## SportDb::Import.configure do |config|
27
- ## config.lang = 'it'
28
- ## end
29
-
30
- def self.configure() yield( config ); end
31
-
32
- def self.config() @config ||= Configuration.new; end
33
-
34
- ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
35
- def self.catalog() config.catalog; end
36
-
37
- end # module Import
38
- end # module SportDb
39
-
1
+ module SportDb
2
+ module Import
3
+
4
+ class Configuration
5
+ ##
6
+ ## todo: allow configure of countries_dir like clubs_dir
7
+ ## "fallback" and use a default built-in world/countries.txt
8
+
9
+ attr_accessor :catalog
10
+
11
+ attr_reader :lang
12
+ def lang=(value)
13
+ ## check/todo: always use to_sym - why? needed?
14
+ DateFormats.lang = value
15
+ ScoreFormats.lang = value
16
+ SportDb.lang.lang = value
17
+
18
+ ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
19
+ ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
20
+ end
21
+
22
+ end # class Configuration
23
+
24
+
25
+ ## lets you use
26
+ ## SportDb::Import.configure do |config|
27
+ ## config.lang = 'it'
28
+ ## end
29
+
30
+ def self.configure() yield( config ); end
31
+
32
+ def self.config() @config ||= Configuration.new; end
33
+
34
+ ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
35
+ def self.catalog() config.catalog; end
36
+
37
+ end # module Import
38
+ end # module SportDb
39
+
@@ -1,28 +1,28 @@
1
-
2
- module SportDb
3
- class CsvGoalParser
4
-
5
-
6
- def self.read( path )
7
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
- parse( txt )
9
- end
10
-
11
- def self.parse( txt )
12
- new( txt ).parse
13
- end
14
-
15
-
16
- def initialize( txt )
17
- @txt = txt
18
- end
19
-
20
- def parse
21
- rows = parse_csv( @txt )
22
- recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
- ## pp recs[0]
24
- recs
25
- end
26
-
27
- end # class CsvGoalParser
28
- end # module Sports
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -1,90 +1,90 @@
1
- #####################
2
- # helpers for parsing & finding match status e.g.
3
- # - cancelled / canceled
4
- # - awarded
5
- # - abandoned
6
- # - replay
7
- # etc.
8
-
9
-
10
- module SportDb
11
-
12
-
13
- ### todo/fix: move Status inside Match struct - why? why not?
14
-
15
- class Status
16
- # note: use a class as an "enum"-like namespace for now - why? why not?
17
- # move class into Match e.g. Match::Status - why? why not?
18
- CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
- AWARDED = 'AWARDED'
20
- POSTPONED = 'POSTPONED'
21
- ABANDONED = 'ABANDONED'
22
- REPLAY = 'REPLAY'
23
- end # class Status
24
-
25
-
26
-
27
- class StatusParser
28
-
29
- def self.parse( str )
30
- ## note: returns nil if no match found
31
- ## note: english usage - cancelled (in UK), canceled (in US)
32
- if str =~ /^(cancelled|
33
- canceled|
34
- can\.
35
- )/xi
36
- Status::CANCELLED
37
- elsif str =~ /^(awarded|
38
- awd\.
39
- )/xi
40
- Status::AWARDED
41
- elsif str =~ /^(postponed
42
- )/xi
43
- Status::POSTPONED
44
- elsif str =~ /^(abandoned|
45
- abd\.
46
- )/xi
47
- Status::ABANDONED
48
- elsif str =~ /^(replay
49
- )/xi
50
- Status::REPLAY
51
- else
52
- # no match
53
- nil
54
- end
55
- end
56
-
57
-
58
- RUN_RE = /\[
59
- (?<text>[^\]]+)
60
- \]
61
- /x
62
- def self.find!( line )
63
- ## for now check all "protected" text run blocks e.g. []
64
- ## puts "line: >#{line}<"
65
-
66
- status = nil
67
-
68
- str = line
69
- while m = str.match( RUN_RE )
70
- str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
-
72
- ## check for status match
73
- match_str = m[0] ## keep a copy of the match string (for later sub)
74
- text = m[:text].strip
75
- ## puts " text: >#{text}<"
76
-
77
- status = parse( text )
78
-
79
- if status
80
- line.sub!( match_str, "[STATUS.#{status}]" )
81
- break
82
- end
83
- end # while match
84
-
85
- status
86
- end # method find!
87
- end # class StatusParser
88
-
89
- end # module SportDb
90
-
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ class StatusParser
28
+
29
+ def self.parse( str )
30
+ ## note: returns nil if no match found
31
+ ## note: english usage - cancelled (in UK), canceled (in US)
32
+ if str =~ /^(cancelled|
33
+ canceled|
34
+ can\.
35
+ )/xi
36
+ Status::CANCELLED
37
+ elsif str =~ /^(awarded|
38
+ awd\.
39
+ )/xi
40
+ Status::AWARDED
41
+ elsif str =~ /^(postponed
42
+ )/xi
43
+ Status::POSTPONED
44
+ elsif str =~ /^(abandoned|
45
+ abd\.
46
+ )/xi
47
+ Status::ABANDONED
48
+ elsif str =~ /^(replay
49
+ )/xi
50
+ Status::REPLAY
51
+ else
52
+ # no match
53
+ nil
54
+ end
55
+ end
56
+
57
+
58
+ RUN_RE = /\[
59
+ (?<text>[^\]]+)
60
+ \]
61
+ /x
62
+ def self.find!( line )
63
+ ## for now check all "protected" text run blocks e.g. []
64
+ ## puts "line: >#{line}<"
65
+
66
+ status = nil
67
+
68
+ str = line
69
+ while m = str.match( RUN_RE )
70
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
+
72
+ ## check for status match
73
+ match_str = m[0] ## keep a copy of the match string (for later sub)
74
+ text = m[:text].strip
75
+ ## puts " text: >#{text}<"
76
+
77
+ status = parse( text )
78
+
79
+ if status
80
+ line.sub!( match_str, "[STATUS.#{status}]" )
81
+ break
82
+ end
83
+ end # while match
84
+
85
+ status
86
+ end # method find!
87
+ end # class StatusParser
88
+
89
+ end # module SportDb
90
+
@@ -1,87 +1,87 @@
1
-
2
- module SportDb
3
- module NameHelper
4
-
5
-
6
- ## note: allow placeholder years to e.g. (-___) or (-????)
7
- ## for marking missing (to be filled in) years
8
- ## e.g. (1887-1911), (-2013),
9
- ## (1946-2001, 2013-) etc.
10
- ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
- YEAR_RE = %r{\(
12
- [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
- \)}x
14
-
15
- def strip_year( name )
16
- ## check for year(s) e.g. (1887-1911), (-2013),
17
- ## (1946-2001, 2013-) etc.
18
- ## todo/check: only sub once (not global) - why? why not?
19
- name.gsub( YEAR_RE, '' ).strip
20
- end
21
-
22
- def has_year?( name ) name =~ YEAR_RE; end
23
-
24
-
25
- LANG_RE = %r{\[
26
- [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
- \]}x
28
- def strip_lang( name )
29
- name.gsub( LANG_RE, '' ).strip
30
- end
31
-
32
- def has_lang?( name ) name =~ LANG_RE; end
33
-
34
-
35
- def sanitize( name )
36
- ## check for year(s) e.g. (1887-1911), (-2013),
37
- ## (1946-2001,2013-) etc.
38
- name = strip_year( name )
39
- ## check lang codes e.g. [en], [fr], etc.
40
- name = strip_lang( name )
41
- name
42
- end
43
-
44
-
45
- ## note: also add (),’,− etc. e.g.
46
- ## Estudiantes (LP) => Estudiantes LP
47
- ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
- ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
- ##
50
- ## add & too!!
51
- ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
52
-
53
- NORM_RE = %r{
54
- [.'’º/()&_−-]
55
- }x # note: in [] dash (-) if last doesn't need to get escaped
56
- ## note: remove all dots (.), dash (-), ', º, /, etc.
57
- # . U+002E (46) - FULL STOP
58
- # ' U+0027 (39) - APOSTROPHE
59
- # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
60
- # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
61
- # / U+002F (47) - SOLIDUS
62
- # ( U+0028 (40) - LEFT PARENTHESIS
63
- # ) U+0029 (41) - RIGHT PARENTHESIS
64
- # − U+2212 (8722) - MINUS SIGN
65
- # - U+002D (45) - HYPHEN-MINUS
66
-
67
- ## for norm(alizing) names
68
- def strip_norm( name )
69
- name.gsub( NORM_RE, '' )
70
- end
71
-
72
- def normalize( name )
73
- # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
74
- name = strip_norm( name )
75
- name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
76
-
77
- ## todo/check: use our own downcase - why? why not?
78
- name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
79
- name
80
- end
81
-
82
-
83
- def variants( name ) Variant.find( name ); end
84
-
85
- end # module NameHelper
86
- end # module SportDb
87
-
1
+
2
+ module SportDb
3
+ module NameHelper
4
+
5
+
6
+ ## note: allow placeholder years to e.g. (-___) or (-????)
7
+ ## for marking missing (to be filled in) years
8
+ ## e.g. (1887-1911), (-2013),
9
+ ## (1946-2001, 2013-) etc.
10
+ ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
+ YEAR_RE = %r{\(
12
+ [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
+ \)}x
14
+
15
+ def strip_year( name )
16
+ ## check for year(s) e.g. (1887-1911), (-2013),
17
+ ## (1946-2001, 2013-) etc.
18
+ ## todo/check: only sub once (not global) - why? why not?
19
+ name.gsub( YEAR_RE, '' ).strip
20
+ end
21
+
22
+ def has_year?( name ) name =~ YEAR_RE; end
23
+
24
+
25
+ LANG_RE = %r{\[
26
+ [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
+ \]}x
28
+ def strip_lang( name )
29
+ name.gsub( LANG_RE, '' ).strip
30
+ end
31
+
32
+ def has_lang?( name ) name =~ LANG_RE; end
33
+
34
+
35
+ def sanitize( name )
36
+ ## check for year(s) e.g. (1887-1911), (-2013),
37
+ ## (1946-2001,2013-) etc.
38
+ name = strip_year( name )
39
+ ## check lang codes e.g. [en], [fr], etc.
40
+ name = strip_lang( name )
41
+ name
42
+ end
43
+
44
+
45
+ ## note: also add (),’,− etc. e.g.
46
+ ## Estudiantes (LP) => Estudiantes LP
47
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+ ##
50
+ ## add & too!!
51
+ ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
52
+
53
+ NORM_RE = %r{
54
+ [.'’º/()&_−-]
55
+ }x # note: in [] dash (-) if last doesn't need to get escaped
56
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
57
+ # . U+002E (46) - FULL STOP
58
+ # ' U+0027 (39) - APOSTROPHE
59
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
60
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
61
+ # / U+002F (47) - SOLIDUS
62
+ # ( U+0028 (40) - LEFT PARENTHESIS
63
+ # ) U+0029 (41) - RIGHT PARENTHESIS
64
+ # − U+2212 (8722) - MINUS SIGN
65
+ # - U+002D (45) - HYPHEN-MINUS
66
+
67
+ ## for norm(alizing) names
68
+ def strip_norm( name )
69
+ name.gsub( NORM_RE, '' )
70
+ end
71
+
72
+ def normalize( name )
73
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
74
+ name = strip_norm( name )
75
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
76
+
77
+ ## todo/check: use our own downcase - why? why not?
78
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
79
+ name
80
+ end
81
+
82
+
83
+ def variants( name ) Variant.find( name ); end
84
+
85
+ end # module NameHelper
86
+ end # module SportDb
87
+