sportdb-structs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,90 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+
13
+ ### todo/fix: move Status inside Match struct - why? why not?
14
+
15
+ class Status
16
+ # note: use a class as an "enum"-like namespace for now - why? why not?
17
+ # move class into Match e.g. Match::Status - why? why not?
18
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
+ AWARDED = 'AWARDED'
20
+ POSTPONED = 'POSTPONED'
21
+ ABANDONED = 'ABANDONED'
22
+ REPLAY = 'REPLAY'
23
+ end # class Status
24
+
25
+
26
+
27
+ class StatusParser
28
+
29
+ def self.parse( str )
30
+ ## note: returns nil if no match found
31
+ ## note: english usage - cancelled (in UK), canceled (in US)
32
+ if str =~ /^(cancelled|
33
+ canceled|
34
+ can\.
35
+ )/xi
36
+ Status::CANCELLED
37
+ elsif str =~ /^(awarded|
38
+ awd\.
39
+ )/xi
40
+ Status::AWARDED
41
+ elsif str =~ /^(postponed
42
+ )/xi
43
+ Status::POSTPONED
44
+ elsif str =~ /^(abandoned|
45
+ abd\.
46
+ )/xi
47
+ Status::ABANDONED
48
+ elsif str =~ /^(replay
49
+ )/xi
50
+ Status::REPLAY
51
+ else
52
+ # no match
53
+ nil
54
+ end
55
+ end
56
+
57
+
58
+ RUN_RE = /\[
59
+ (?<text>[^\]]+)
60
+ \]
61
+ /x
62
+ def self.find!( line )
63
+ ## for now check all "protected" text run blocks e.g. []
64
+ ## puts "line: >#{line}<"
65
+
66
+ status = nil
67
+
68
+ str = line
69
+ while m = str.match( RUN_RE )
70
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
+
72
+ ## check for status match
73
+ match_str = m[0] ## keep a copy of the match string (for later sub)
74
+ text = m[:text].strip
75
+ ## puts " text: >#{text}<"
76
+
77
+ status = parse( text )
78
+
79
+ if status
80
+ line.sub!( match_str, "[STATUS.#{status}]" )
81
+ break
82
+ end
83
+ end # while match
84
+
85
+ status
86
+ end # method find!
87
+ end # class StatusParser
88
+
89
+ end # module SportDb
90
+
@@ -0,0 +1,87 @@
1
+
2
+ module SportDb
3
+ module NameHelper
4
+
5
+
6
+ ## note: allow placeholder years to e.g. (-___) or (-????)
7
+ ## for marking missing (to be filled in) years
8
+ ## e.g. (1887-1911), (-2013),
9
+ ## (1946-2001, 2013-) etc.
10
+ ## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
11
+ YEAR_RE = %r{\(
12
+ [0-9, ?_-]+? # note: non-greedy (minimum/first) match
13
+ \)}x
14
+
15
+ def strip_year( name )
16
+ ## check for year(s) e.g. (1887-1911), (-2013),
17
+ ## (1946-2001, 2013-) etc.
18
+ ## todo/check: only sub once (not global) - why? why not?
19
+ name.gsub( YEAR_RE, '' ).strip
20
+ end
21
+
22
+ def has_year?( name ) name =~ YEAR_RE; end
23
+
24
+
25
+ LANG_RE = %r{\[
26
+ [a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
27
+ \]}x
28
+ def strip_lang( name )
29
+ name.gsub( LANG_RE, '' ).strip
30
+ end
31
+
32
+ def has_lang?( name ) name =~ LANG_RE; end
33
+
34
+
35
+ def sanitize( name )
36
+ ## check for year(s) e.g. (1887-1911), (-2013),
37
+ ## (1946-2001,2013-) etc.
38
+ name = strip_year( name )
39
+ ## check lang codes e.g. [en], [fr], etc.
40
+ name = strip_lang( name )
41
+ name
42
+ end
43
+
44
+
45
+ ## note: also add (),’,− etc. e.g.
46
+ ## Estudiantes (LP) => Estudiantes LP
47
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+ ##
50
+ ## add & too!!
51
+ ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
52
+
53
+ NORM_RE = %r{
54
+ [.'’º/()&_−-]
55
+ }x # note: in [] dash (-) if last doesn't need to get escaped
56
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
57
+ # . U+002E (46) - FULL STOP
58
+ # ' U+0027 (39) - APOSTROPHE
59
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
60
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
61
+ # / U+002F (47) - SOLIDUS
62
+ # ( U+0028 (40) - LEFT PARENTHESIS
63
+ # ) U+0029 (41) - RIGHT PARENTHESIS
64
+ # − U+2212 (8722) - MINUS SIGN
65
+ # - U+002D (45) - HYPHEN-MINUS
66
+
67
+ ## for norm(alizing) names
68
+ def strip_norm( name )
69
+ name.gsub( NORM_RE, '' )
70
+ end
71
+
72
+ def normalize( name )
73
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
74
+ name = strip_norm( name )
75
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
76
+
77
+ ## todo/check: use our own downcase - why? why not?
78
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
79
+ name
80
+ end
81
+
82
+
83
+ def variants( name ) Variant.find( name ); end
84
+
85
+ end # module NameHelper
86
+ end # module SportDb
87
+
@@ -0,0 +1,199 @@
1
+
2
+ ### note: make Season like Date a "top-level" / "generic" class
3
+
4
+
5
+ class Season
6
+ ##
7
+ ## todo: add (optional) start_date and end_date - why? why not?
8
+
9
+ ## todo/todo/todo/check/check/check !!!
10
+ ## todo: add a kernel Seaons e.g. Season('2011/12')
11
+ ## forward to Season.convert( *args ) - why? why not?
12
+
13
+ ## todo: add unicode - too - why? why not? see wikipedia pages, for example
14
+
15
+ YYYY_YYYY_RE = %r{^ ## e.g. 2011-2012 or 2011/2012
16
+ (\d{4})
17
+ [/-]
18
+ (\d{4})
19
+ $
20
+ }x
21
+ YYYY_YY_RE = %r{^ ## e.g. 2011-12 or 2011/12
22
+ (\d{4})
23
+ [/-]
24
+ (\d{2})
25
+ $
26
+ }x
27
+ YYYY_Y_RE = %r{^ ## e.g. 2011-2 or 2011/2
28
+ (\d{4})
29
+ [/-]
30
+ (\d{1})
31
+ $
32
+ }x
33
+ YYYY_RE = %r{^ ## e.g. 2011
34
+ (\d{4})
35
+ $
36
+ }x
37
+
38
+
39
+ def self.parse( str )
40
+ new( *_parse( str ))
41
+ end
42
+
43
+ def self._parse( str ) ## "internal" parse helper
44
+ if str =~ YYYY_YYYY_RE ## e.g. 2011/2012
45
+ [$1.to_i, $2.to_i]
46
+ elsif str =~ YYYY_YY_RE ## e.g. 2011/12
47
+ fst = $1.to_i
48
+ snd = $2.to_i
49
+ snd_exp = '%02d' % [(fst+1) % 100] ## double check: e.g 00 == 00, 01==01 etc.
50
+ raise ArgumentError, "[Season.parse] invalid year in season >>#{str}<<; expected #{snd_exp} but got #{$2}" if snd_exp != $2
51
+ [fst, fst+1]
52
+ elsif str =~ YYYY_Y_RE ## e.g. 2011/2
53
+ fst = $1.to_i
54
+ snd = $2.to_i
55
+ snd_exp = '%d' % [(fst+1) % 10] ## double check: e.g 0 == 0, 1==1 etc.
56
+ raise ArgumentError, "[Season.parse] invalid year in season >>#{str}<<; expected #{snd_exp} but got #{$2}" if snd_exp != $2
57
+ [fst, fst+1]
58
+ elsif str =~ YYYY_RE ## e.g. 2011
59
+ [$1.to_i]
60
+ else
61
+ raise ArgumentError, "[Season.parse] unkown season format >>#{str}<<; sorry cannot parse"
62
+ end
63
+ end
64
+
65
+
66
+ def self.convert( *args ) ## note: used by Kernel method Season()
67
+ if args.size == 1 && args[0].is_a?( Season )
68
+ args[0] # pass through / along as is 1:1
69
+ elsif args.size == 1 && args[0].is_a?( String )
70
+ parse( args[0] )
71
+ elsif args.size == 1 && args[0].is_a?( Integer ) && args[0] > 9999
72
+ ## note: allow convenience "hack" such as:
73
+ # 202021 or 2020_21 => '2020/21' or
74
+ # 2020_1 or 2020_1 => '2020/21' or
75
+ # 20202021 or 2020_2021 => '2020/21'
76
+ str = args[0].to_s
77
+ parse( "#{str[0..3]}/#{str[4..-1]}" )
78
+ else ## assume all integer args e.g. 2020 or 2020, 2021 and such
79
+ new( *args ) ## try conversion with new
80
+ end
81
+ end
82
+
83
+
84
+ attr_reader :start_year,
85
+ :end_year
86
+
87
+ def initialize( *args ) ## change args to years - why? why not?
88
+ if args.size == 1 && args[0].is_a?( Integer )
89
+ @start_year = args[0]
90
+ @end_year = args[0]
91
+ elsif args.size == 2 && args[0].is_a?( Integer ) &&
92
+ args[1].is_a?( Integer )
93
+ @start_year = args[0]
94
+ @end_year = args[1]
95
+ end_year_exp = @start_year+1
96
+ raise ArgumentError, "[Season] invalid year in season >>#{to_s}<<; expected #{end_year_exp} but got #{@end_year}" if end_year_exp != @end_year
97
+ else
98
+ pp args
99
+ raise ArgumentError, "[Season] expected season start year (integer) with opt. end year"
100
+ end
101
+ end
102
+
103
+
104
+
105
+ ## convenience helper - move to sportdb or such - remove - why - why not???
106
+ def start_date ## generate "generic / syntetic start date" - keep helper - why? why not?
107
+ if year?
108
+ Date.new( start_year, 1, 1 )
109
+ else
110
+ Date.new( start_year 1, 7 )
111
+ end
112
+ end
113
+
114
+
115
+ ## single-year season e.g. 2011 if start_year is end_year - todo - find a better name?
116
+ def year?() @start_year == @end_year; end
117
+
118
+ def prev
119
+ if year?
120
+ Season.new( @start_year-1 )
121
+ else
122
+ Season.new( @start_year-1, @end_year-1 )
123
+ end
124
+ end
125
+
126
+ def next
127
+ if year?
128
+ Season.new( @start_year+1 )
129
+ else
130
+ Season.new( @start_year+1, @end_year+1 )
131
+ end
132
+ end
133
+ alias_method :succ, :next ## add support for ranges
134
+
135
+
136
+ include Comparable
137
+ def <=>(other)
138
+ ## todo/fix/fix: check if other is_a?( Season )!!!
139
+ ## what to return if other type/class ??
140
+ ## note: check special edge case - year season and other e.g.
141
+ ## 2010 <=> 2010/2011
142
+
143
+ res = @start_year <=> other.start_year
144
+ res = @end_year <=> other.end_year if res == 0
145
+ res
146
+ end
147
+
148
+
149
+ def to_formatted_s( format=:default, sep: '/' )
150
+ if year?
151
+ '%d' % @start_year
152
+ else
153
+ case format
154
+ when :default, :short, :s ## e.g. 1999/00 or 2019/20
155
+ "%d#{sep}%02d" % [@start_year, @end_year % 100]
156
+ when :long, :l ## e.g. 1999/2000 or 2019/2020
157
+ "%d#{sep}%d" % [@start_year, @end_year]
158
+ else
159
+ raise ArgumentError, "[Season.to_s] unsupported format >#{format}<"
160
+ end
161
+ end
162
+ end
163
+ alias_method :to_s, :to_formatted_s
164
+
165
+ def key() to_s( :short ); end
166
+ alias_method :to_key, :key
167
+ alias_method :name, :key
168
+ alias_method :title, :key
169
+
170
+ alias_method :inspect, :key ## note: add inspect debug support change debug output to string!!
171
+
172
+
173
+
174
+ def to_path( format=:default )
175
+ case format
176
+ when :default, :short, :s ## e.g. 1999-00 or 2019-20
177
+ to_s( :short, sep: '-' )
178
+ when :long, :l ## e.g. 1999-2000 or 2019-2000
179
+ to_s( :long, sep: '-' )
180
+ when :archive, :decade, :d ## e.g. 1990s/1999-00 or 2010s/2019-20
181
+ "%3d0s/%s" % [@start_year / 10, to_s( :short, sep: '-' )]
182
+ when :century, :c ## e.g. 1900s/1990-00 or 2000s/2019-20
183
+ "%2d00s/%s" % [@start_year / 100, to_s( :short, sep: '-' )]
184
+ else
185
+ raise ArgumentError, "[Season.to_path] unsupported format >#{format}<"
186
+ end
187
+ end # method to_path
188
+ alias_method :directory, :to_path ## keep "legacy" directory alias - why? why not?
189
+ alias_method :path, :to_path
190
+
191
+ end # class Season
192
+
193
+
194
+
195
+ ### note: add a convenience "shortcut" season kernel method conversion method
196
+ ## use like Season( '2012/3' ) or such
197
+ module Kernel
198
+ def Season( *args ) Season.convert( *args ); end
199
+ end
@@ -0,0 +1,26 @@
1
+ module Sports
2
+
3
+ ##
4
+ # note: check that shape/structure/fields/attributes match
5
+ # the ActiveRecord model !!!!
6
+
7
+ class Country
8
+
9
+ ## note: is read-only/immutable for now - why? why not?
10
+ ## add cities (array/list) - why? why not?
11
+ attr_reader :key, :name, :code, :tags
12
+ attr_accessor :alt_names
13
+
14
+ def initialize( key: nil, name:, code:, tags: [] )
15
+ ## note: auto-generate key "on-the-fly" if missing for now - why? why not?
16
+ ## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
17
+ @key = key || name.downcase.gsub( /[^a-z]/, '' )
18
+ @name, @code = name, code
19
+ @alt_names = []
20
+ @tags = tags
21
+ end
22
+
23
+ end # class Country
24
+
25
+ end # module Sports
26
+
@@ -0,0 +1,231 @@
1
+
2
+ module Sports
3
+
4
+
5
+ ## "free-standing" goal event - for import/export in separate event / goal datafiles
6
+ ## returned by CsvGoalParser and others
7
+ class GoalEvent
8
+
9
+ def self.build( row ) ## rename to parse or such - why? why not?
10
+
11
+ ## split match_id
12
+ team_str, more_str = row['Match'].split( '|' )
13
+ team1_str, team2_str = team_str.split( ' - ' )
14
+
15
+ more_str = more_str.strip
16
+ team1_str = team1_str.strip
17
+ team2_str = team2_str.strip
18
+
19
+ # check if more_str is a date otherwise assume round
20
+ date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
21
+ '%b %d'
22
+ elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
23
+ '%b %d %Y'
24
+ else
25
+ nil
26
+ end
27
+
28
+ if date_fmt
29
+ date = Date.strptime( more_str, date_fmt )
30
+ round = nil
31
+ else
32
+ date = nil
33
+ round = more_str
34
+ end
35
+
36
+
37
+ values = row['Score'].split('-')
38
+ values = values.map { |value| value.strip }
39
+ score1 = values[0].to_i
40
+ score2 = values[1].to_i
41
+
42
+ minute = nil
43
+ offset = nil
44
+ if m=%r{([0-9]+)
45
+ (?:[ ]+
46
+ \+([0-9]+)
47
+ )?
48
+ ['.]
49
+ $}x.match( row['Minute'])
50
+ minute = m[1].to_i
51
+ offset = m[2] ? m[2].to_i : nil
52
+ else
53
+ puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
54
+ exit 1
55
+ end
56
+
57
+ attributes = {
58
+ team1: team1_str,
59
+ team2: team2_str,
60
+ date: date,
61
+ round: round,
62
+ score1: score1,
63
+ score2: score2,
64
+ minute: minute,
65
+ offset: offset,
66
+ player: row['Player'],
67
+ owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
68
+ penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
69
+ notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
70
+ }
71
+
72
+ new( **attributes )
73
+ end
74
+
75
+
76
+ ## match id
77
+ attr_reader :team1,
78
+ :team2,
79
+ :round, ## optional
80
+ :date ## optional
81
+
82
+ ## main attributes
83
+ attr_reader :score1,
84
+ :score2,
85
+ :player,
86
+ :minute,
87
+ :offset,
88
+ :owngoal,
89
+ :penalty,
90
+ :notes
91
+
92
+
93
+ ## todo/check: or just use match.hash or such if match mapping known - why? why not?
94
+ def match_id
95
+ if round
96
+ "#{@team1} - #{@team2} | #{@round}"
97
+ else
98
+ "#{@team1} - #{@team2} | #{@date}"
99
+ end
100
+ end
101
+
102
+
103
+ def owngoal?() @owngoal==true; end
104
+ def penalty?() @penalty==true; end
105
+
106
+ def initialize( team1:,
107
+ team2:,
108
+ round: nil,
109
+ date: nil,
110
+ score1:,
111
+ score2:,
112
+ player:,
113
+ minute:,
114
+ offset: nil,
115
+ owngoal: false,
116
+ penalty: false,
117
+ notes: nil
118
+ )
119
+ @team1 = team1
120
+ @team2 = team2
121
+ @round = round
122
+ @date = date
123
+
124
+ @score1 = score1
125
+ @score2 = score2
126
+ @player = player
127
+ @minute = minute
128
+ @offset = offset
129
+ @owngoal = owngoal
130
+ @penalty = penalty
131
+ @notes = notes
132
+ end
133
+
134
+
135
+ ## note: lets you use normalize teams or such acts like a Match struct
136
+ def update( **kwargs )
137
+ ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
138
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
139
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
140
+ end
141
+ end # class GoalEvent
142
+
143
+
144
+
145
+
146
+ class Goal ### nested (non-freestanding) inside match (match is parent)
147
+ def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
148
+ ## build an array of goal structs from (csv) recs
149
+ recs = []
150
+
151
+ last_score1 = 0
152
+ last_score2 = 0
153
+
154
+ events.each do |event|
155
+
156
+ if last_score1+1 == event.score1 && last_score2 == event.score2
157
+ team = 1
158
+ elsif last_score2+1 == event.score2 && last_score1 == event.score1
159
+ team = 2
160
+ else
161
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
162
+ puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
163
+ exit 1
164
+ end
165
+
166
+ last_score1 = event.score1
167
+ last_score2 = event.score2
168
+
169
+
170
+ attributes = {
171
+ score1: event.score1,
172
+ score2: event.score2,
173
+ team: team,
174
+ minute: event.minute,
175
+ offset: event.offset,
176
+ player: event.player,
177
+ owngoal: event.owngoal,
178
+ penalty: event.penalty,
179
+ notes: event.notes
180
+ }
181
+
182
+ recs << Goal.new( **attributes )
183
+ end
184
+
185
+ recs
186
+ end
187
+
188
+
189
+
190
+ attr_reader :score1,
191
+ :score2,
192
+ :team,
193
+ :player,
194
+ :minute,
195
+ :offset,
196
+ :owngoal,
197
+ :penalty,
198
+ :notes
199
+
200
+
201
+
202
+ def owngoal?() @owngoal==true; end
203
+ def penalty?() @penalty==true; end
204
+ def team1?() @team == 1; end
205
+ def team2?() @team == 2; end
206
+
207
+ def initialize( score1:,
208
+ score2:,
209
+ team:,
210
+ player:,
211
+ minute:,
212
+ offset: nil,
213
+ owngoal: false,
214
+ penalty: false,
215
+ notes: nil
216
+ )
217
+ @score1 = score1
218
+ @score2 = score2
219
+ @team = team # 1 or 2
220
+ @player = player
221
+ @minute = minute
222
+ @offset = offset
223
+ @owngoal = owngoal
224
+ @penalty = penalty
225
+ @notes = notes
226
+ end
227
+ end # class Goal
228
+
229
+
230
+ end # module Sports
231
+