sportdb-structs 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 558329ee3ae366f713dc5e20cdd61de62d5f190d
4
- data.tar.gz: 05a89470a83b3ded621a2c0ef0125be847e4e14a
2
+ SHA256:
3
+ metadata.gz: 9569b6727737da1a3c241e4acae88a19ea61e7cc132e752b1d391628424db916
4
+ data.tar.gz: 37ad5ee8f954dfdb3221060b500eed898461ce957ed257d2dd58e6b6467bae7c
5
5
  SHA512:
6
- metadata.gz: dcad6e831a704229f9e7926a22b64d0b54f5855e320ef77fa48972dcd2759f241c7db837e6320981857b66cd109155385e21e92d65caf105b6296bd22751b97a
7
- data.tar.gz: 36b338d8654a1d9ffda30ea85e77caa099a71a411e38bcb69c1a11bacf0f30c6ed9daaf77bbba66e8849726a0e827844fc4ac6b4d74ae9ab9ec73e33e5175bd5
6
+ metadata.gz: 4eb9e97280e9a390f188409fd8dab427b96dcc9e0ed9d81de20546254dc02903b9d7c28ab22efbaea0fb7f13cf9ea685c81ab1f88fc67cd69f9d8b017579d3ab
7
+ data.tar.gz: 4bcb33bdc129b67439817eea7204bd42aaf26a20b0b1962a046eb182d8d48746b38452aa5a177a01c7f522e3bcab203053778d42e75212bde5128405eeb37bf0
@@ -1,39 +1,39 @@
1
- module SportDb
2
- module Import
3
-
4
- class Configuration
5
- ##
6
- ## todo: allow configure of countries_dir like clubs_dir
7
- ## "fallback" and use a default built-in world/countries.txt
8
-
9
- attr_accessor :catalog
10
-
11
- attr_reader :lang
12
- def lang=(value)
13
- ## check/todo: always use to_sym - why? needed?
14
- DateFormats.lang = value
15
- ScoreFormats.lang = value
16
- SportDb.lang.lang = value
17
-
18
- ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
19
- ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
20
- end
21
-
22
- end # class Configuration
23
-
24
-
25
- ## lets you use
26
- ## SportDb::Import.configure do |config|
27
- ## config.lang = 'it'
28
- ## end
29
-
30
- def self.configure() yield( config ); end
31
-
32
- def self.config() @config ||= Configuration.new; end
33
-
34
- ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
35
- def self.catalog() config.catalog; end
36
-
37
- end # module Import
38
- end # module SportDb
39
-
1
+ module SportDb
2
+ module Import
3
+
4
+ class Configuration
5
+ ##
6
+ ## todo: allow configure of countries_dir like clubs_dir
7
+ ## "fallback" and use a default built-in world/countries.txt
8
+
9
+ attr_accessor :catalog
10
+
11
+ attr_reader :lang
12
+ def lang=(value)
13
+ ## check/todo: always use to_sym - why? needed?
14
+ DateFormats.lang = value
15
+ ScoreFormats.lang = value
16
+ SportDb.lang.lang = value
17
+
18
+ ## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
19
+ ## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
20
+ end
21
+
22
+ end # class Configuration
23
+
24
+
25
+ ## lets you use
26
+ ## SportDb::Import.configure do |config|
27
+ ## config.lang = 'it'
28
+ ## end
29
+
30
+ def self.configure() yield( config ); end
31
+
32
+ def self.config() @config ||= Configuration.new; end
33
+
34
+ ## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
35
+ def self.catalog() config.catalog; end
36
+
37
+ end # module Import
38
+ end # module SportDb
39
+
@@ -1,28 +1,28 @@
1
-
2
- module SportDb
3
- class CsvGoalParser
4
-
5
-
6
- def self.read( path )
7
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
- parse( txt )
9
- end
10
-
11
- def self.parse( txt )
12
- new( txt ).parse
13
- end
14
-
15
-
16
- def initialize( txt )
17
- @txt = txt
18
- end
19
-
20
- def parse
21
- rows = parse_csv( @txt )
22
- recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
- ## pp recs[0]
24
- recs
25
- end
26
-
27
- end # class CsvGoalParser
28
- end # module Sports
1
+
2
+ module SportDb
3
+ class CsvGoalParser
4
+
5
+
6
+ def self.read( path )
7
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
+ parse( txt )
9
+ end
10
+
11
+ def self.parse( txt )
12
+ new( txt ).parse
13
+ end
14
+
15
+
16
+ def initialize( txt )
17
+ @txt = txt
18
+ end
19
+
20
+ def parse
21
+ rows = parse_csv( @txt )
22
+ recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
+ ## pp recs[0]
24
+ recs
25
+ end
26
+
27
+ end # class CsvGoalParser
28
+ end # module Sports
@@ -1,90 +1,90 @@
1
- #####################
2
- # helpers for parsing & finding match status e.g.
3
- # - cancelled / canceled
4
- # - awarded
5
- # - abandoned
6
- # - replay
7
- # etc.
8
-
9
-
10
- module SportDb
11
-
12
-
13
- ### todo/fix: move Status inside Match struct - why? why not?
14
-
15
- class Status
16
- # note: use a class as an "enum"-like namespace for now - why? why not?
17
- # move class into Match e.g. Match::Status - why? why not?
18
- CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
- AWARDED = 'AWARDED'
20
- POSTPONED = 'POSTPONED'
21
- ABANDONED = 'ABANDONED'
22
- REPLAY = 'REPLAY'
23
- end # class Status
24
-
25
-
26
-
27
- class StatusParser
28
-
29
- def self.parse( str )
30
- ## note: returns nil if no match found
31
- ## note: english usage - cancelled (in UK), canceled (in US)
32
- if str =~ /^(cancelled|
33
- canceled|
34
- can\.
35
- )/xi
36
- Status::CANCELLED
37
- elsif str =~ /^(awarded|
38
- awd\.
39
- )/xi
40
- Status::AWARDED
41
- elsif str =~ /^(postponed
42
- )/xi
43
- Status::POSTPONED
44
- elsif str =~ /^(abandoned|
45
- abd\.
46
- )/xi
47
- Status::ABANDONED
48
- elsif str =~ /^(replay
49
- )/xi
50
- Status::REPLAY
51
- else
52
- # no match
53
- nil
54
- end
55
- end
56
-
57
-
58
- RUN_RE = /\[
59
- (?<text>[^\]]+)
60
- \]
61
- /x
62
- def self.find!( line )
63
- ## for now check all "protected" text run blocks e.g. []
64
- ## puts "line: >#{line}<"
65
-
66
- status = nil
67
-
68
- str = line
69
- while m = str.match( RUN_RE )
70
- str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
-
72
- ## check for status match
73
- match_str = m[0] ## keep a copy of the match string (for later sub)
74
- text = m[:text].strip
75
- ## puts " text: >#{text}<"
76
-
77
- status = parse( text )
78
-
79
- if status
80
- line.sub!( match_str, "[STATUS.#{status}]" )
81
- break
82
- end
83
- end # while match
84
-
85
- status
86
- end # method find!
87
- end # class StatusParser
88
-
89
- end # module SportDb
90
-
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ class StatusParser
28
+
29
+ def self.parse( str )
30
+ ## note: returns nil if no match found
31
+ ## note: english usage - cancelled (in UK), canceled (in US)
32
+ if str =~ /^(cancelled|
33
+ canceled|
34
+ can\.
35
+ )/xi
36
+ Status::CANCELLED
37
+ elsif str =~ /^(awarded|
38
+ awd\.
39
+ )/xi
40
+ Status::AWARDED
41
+ elsif str =~ /^(postponed
42
+ )/xi
43
+ Status::POSTPONED
44
+ elsif str =~ /^(abandoned|
45
+ abd\.
46
+ )/xi
47
+ Status::ABANDONED
48
+ elsif str =~ /^(replay
49
+ )/xi
50
+ Status::REPLAY
51
+ else
52
+ # no match
53
+ nil
54
+ end
55
+ end
56
+
57
+
58
+ RUN_RE = /\[
59
+ (?<text>[^\]]+)
60
+ \]
61
+ /x
62
+ def self.find!( line )
63
+ ## for now check all "protected" text run blocks e.g. []
64
+ ## puts "line: >#{line}<"
65
+
66
+ status = nil
67
+
68
+ str = line
69
+ while m = str.match( RUN_RE )
70
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
+
72
+ ## check for status match
73
+ match_str = m[0] ## keep a copy of the match string (for later sub)
74
+ text = m[:text].strip
75
+ ## puts " text: >#{text}<"
76
+
77
+ status = parse( text )
78
+
79
+ if status
80
+ line.sub!( match_str, "[STATUS.#{status}]" )
81
+ break
82
+ end
83
+ end # while match
84
+
85
+ status
86
+ end # method find!
87
+ end # class StatusParser
88
+
89
+ end # module SportDb
90
+
@@ -1,87 +1,87 @@
1
-
2
- module SportDb
3
- module NameHelper
4
-
5
-
6
- ## note: allow placeholder years to e.g. (-___) or (-????)
7
- ## for marking missing (to be filled in) years
8
- ## e.g. (1887-1911), (-2013),
9
- ## (1946-2001, 2013-) etc.
10
- ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
- YEAR_RE = %r{\(
12
- [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
- \)}x
14
-
15
- def strip_year( name )
16
- ## check for year(s) e.g. (1887-1911), (-2013),
17
- ## (1946-2001, 2013-) etc.
18
- ## todo/check: only sub once (not global) - why? why not?
19
- name.gsub( YEAR_RE, '' ).strip
20
- end
21
-
22
- def has_year?( name ) name =~ YEAR_RE; end
23
-
24
-
25
- LANG_RE = %r{\[
26
- [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
- \]}x
28
- def strip_lang( name )
29
- name.gsub( LANG_RE, '' ).strip
30
- end
31
-
32
- def has_lang?( name ) name =~ LANG_RE; end
33
-
34
-
35
- def sanitize( name )
36
- ## check for year(s) e.g. (1887-1911), (-2013),
37
- ## (1946-2001,2013-) etc.
38
- name = strip_year( name )
39
- ## check lang codes e.g. [en], [fr], etc.
40
- name = strip_lang( name )
41
- name
42
- end
43
-
44
-
45
- ## note: also add (),’,− etc. e.g.
46
- ## Estudiantes (LP) => Estudiantes LP
47
- ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
- ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
- ##
50
- ## add & too!!
51
- ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
52
-
53
- NORM_RE = %r{
54
- [.'’º/()&_−-]
55
- }x # note: in [] dash (-) if last doesn't need to get escaped
56
- ## note: remove all dots (.), dash (-), ', º, /, etc.
57
- # . U+002E (46) - FULL STOP
58
- # ' U+0027 (39) - APOSTROPHE
59
- # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
60
- # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
61
- # / U+002F (47) - SOLIDUS
62
- # ( U+0028 (40) - LEFT PARENTHESIS
63
- # ) U+0029 (41) - RIGHT PARENTHESIS
64
- # − U+2212 (8722) - MINUS SIGN
65
- # - U+002D (45) - HYPHEN-MINUS
66
-
67
- ## for norm(alizing) names
68
- def strip_norm( name )
69
- name.gsub( NORM_RE, '' )
70
- end
71
-
72
- def normalize( name )
73
- # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
74
- name = strip_norm( name )
75
- name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
76
-
77
- ## todo/check: use our own downcase - why? why not?
78
- name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
79
- name
80
- end
81
-
82
-
83
- def variants( name ) Variant.find( name ); end
84
-
85
- end # module NameHelper
86
- end # module SportDb
87
-
1
+
2
+ module SportDb
3
+ module NameHelper
4
+
5
+
6
+ ## note: allow placeholder years to e.g. (-___) or (-????)
7
+ ## for marking missing (to be filled in) years
8
+ ## e.g. (1887-1911), (-2013),
9
+ ## (1946-2001, 2013-) etc.
10
+ ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
+ YEAR_RE = %r{\(
12
+ [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
+ \)}x
14
+
15
+ def strip_year( name )
16
+ ## check for year(s) e.g. (1887-1911), (-2013),
17
+ ## (1946-2001, 2013-) etc.
18
+ ## todo/check: only sub once (not global) - why? why not?
19
+ name.gsub( YEAR_RE, '' ).strip
20
+ end
21
+
22
+ def has_year?( name ) name =~ YEAR_RE; end
23
+
24
+
25
+ LANG_RE = %r{\[
26
+ [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
+ \]}x
28
+ def strip_lang( name )
29
+ name.gsub( LANG_RE, '' ).strip
30
+ end
31
+
32
+ def has_lang?( name ) name =~ LANG_RE; end
33
+
34
+
35
+ def sanitize( name )
36
+ ## check for year(s) e.g. (1887-1911), (-2013),
37
+ ## (1946-2001,2013-) etc.
38
+ name = strip_year( name )
39
+ ## check lang codes e.g. [en], [fr], etc.
40
+ name = strip_lang( name )
41
+ name
42
+ end
43
+
44
+
45
+ ## note: also add (),’,− etc. e.g.
46
+ ## Estudiantes (LP) => Estudiantes LP
47
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+ ##
50
+ ## add & too!!
51
+ ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
52
+
53
+ NORM_RE = %r{
54
+ [.'’º/()&_−-]
55
+ }x # note: in [] dash (-) if last doesn't need to get escaped
56
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
57
+ # . U+002E (46) - FULL STOP
58
+ # ' U+0027 (39) - APOSTROPHE
59
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
60
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
61
+ # / U+002F (47) - SOLIDUS
62
+ # ( U+0028 (40) - LEFT PARENTHESIS
63
+ # ) U+0029 (41) - RIGHT PARENTHESIS
64
+ # − U+2212 (8722) - MINUS SIGN
65
+ # - U+002D (45) - HYPHEN-MINUS
66
+
67
+ ## for norm(alizing) names
68
+ def strip_norm( name )
69
+ name.gsub( NORM_RE, '' )
70
+ end
71
+
72
+ def normalize( name )
73
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
74
+ name = strip_norm( name )
75
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
76
+
77
+ ## todo/check: use our own downcase - why? why not?
78
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
79
+ name
80
+ end
81
+
82
+
83
+ def variants( name ) Variant.find( name ); end
84
+
85
+ end # module NameHelper
86
+ end # module SportDb
87
+