sportdb-formats 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: '08b90c09533628f176c7c08778634c557898b5ca'
4
- data.tar.gz: 434a9655f2d11614016322bb7829b975866aa9ca
3
+ metadata.gz: 4ebc60eb9651185cf5aa0ba1c7707fc8678f7803
4
+ data.tar.gz: 67d24f30f4a8586f54ea73135fd58239420e1e22
5
5
  SHA512:
6
- metadata.gz: 6dc601d34ae8d5bb66b181cb09228d40792c2af2a360b82203f711fdfae309353bf1d3f01b79f5c0d4d41e0a64b0d69709193b553065950364a7d4e7bf5a72d3
7
- data.tar.gz: a583d31ff5fd7d6201c7f85499b448e0f73111a71d679511ca13b82d798ba3a12a40a57be42a821760a3dbafeafe9ab16a8c48b7d46c7f582df27c4f43de175c
6
+ metadata.gz: 6c2fdcdf34f4ea95ee765d1b1cfedd9aef84787836fb7fb2c21c10d4000e3b8417109f5ec9ce4d5ae552a0a63d5e91a9c976847fd33b175cccad5aa4d5ea4762
7
+ data.tar.gz: 4ef8c4ba7538c738e2c44b6f16aafbdc11d640485ecf99a603a764c069b038e290ea320918ec16ecd3405f321e6b4bba5f8b350c6835c5f3a70808954da93350
data/Manifest.txt CHANGED
@@ -9,13 +9,22 @@ lib/sportdb/formats/outline_reader.rb
9
9
  lib/sportdb/formats/package.rb
10
10
  lib/sportdb/formats/scores.rb
11
11
  lib/sportdb/formats/season_utils.rb
12
+ lib/sportdb/formats/structs/club.rb
13
+ lib/sportdb/formats/structs/match.rb
14
+ lib/sportdb/formats/structs/matchlist.rb
15
+ lib/sportdb/formats/structs/season.rb
16
+ lib/sportdb/formats/structs/standings.rb
17
+ lib/sportdb/formats/structs/team_usage.rb
12
18
  lib/sportdb/formats/version.rb
13
19
  test/helper.rb
20
+ test/test_club_helpers.rb
21
+ test/test_clubs.rb
14
22
  test/test_csv_reader.rb
15
23
  test/test_datafile.rb
16
24
  test/test_datafile_match.rb
17
25
  test/test_goals.rb
26
+ test/test_match.rb
18
27
  test/test_outline_reader.rb
19
28
  test/test_package.rb
20
29
  test/test_scores.rb
21
- test/test_season_utils.rb
30
+ test/test_season.rb
data/Rakefile CHANGED
@@ -20,7 +20,7 @@ Hoe.spec 'sportdb-formats' do
20
20
  self.licenses = ['Public Domain']
21
21
 
22
22
  self.extra_deps = [
23
- ['alphabets', '>= 0.1.0'],
23
+ ['alphabets', '>= 0.1.3'],
24
24
  ['date-formats', '>= 0.2.4'],
25
25
  ['csvreader', '>= 1.2.4'],
26
26
  ['sportdb-langs', '>= 0.0.1'],
@@ -2,110 +2,21 @@
2
2
 
3
3
 
4
4
  module SeasonHelper ## use Helpers why? why not?
5
- def prev( season )
6
- ## todo: add 1964-1965 format too!!!
7
- if season =~ /^(\d{4})-(\d{2})$/ ## season format is 1964-65
8
- fst = $1.to_i - 1
9
- snd = (100 + $2.to_i - 1) % 100 ## note: add 100 to turn 00 => 99
10
- "%4d-%02d" % [fst,snd]
11
- elsif season =~ /^(\d{4})$/
12
- fst = $1.to_i - 1
13
- "%4d" % [fst]
14
- else
15
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
16
- exit 1
17
- end
18
- end # method prev
19
5
 
6
+ ##############################################
7
+ ### deprecated!!! use new Season class!!!
8
+ ## this code will get removed!!!!
9
+ ###################################################
20
10
 
21
- def key( basename )
22
- if basename =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2012 or 2011/2012 => 2011/12
23
- "%4d/%02d" % [$1.to_i, $2.to_i % 100]
24
- elsif basename =~ /^(\d{4})[\-\/](\d{2})$/ ## e.g. 2011-12 or 2011/12 => 2011/12
25
- "#{$1}/#{$2}"
26
- elsif basename =~ /^(\d{4})$/
27
- $1
28
- else
29
- puts "*** !!!! wrong season format >>#{basename}<<; exit; sorry"
30
- exit 1
31
- end
32
- end # method key
11
+ def prev( str ) SportDb::Import::Season.new( str ).prev; end
12
+ def key( str ) SportDb::Import::Season.new( str ).key; end
13
+ def directory( str, format: nil ) SportDb::Import::Season.new( str ).directory( format: format ); end
33
14
 
34
-
35
- def directory( season, format: nil )
36
- ## todo: find better names for formats - why? why not?:
37
- ## long | archive | decade(?) => 1980s/1988-89, 2010s/2017-18, ...
38
- ## short | std(?) => 1988-89, 2017-18, ...
39
-
40
- ## convert season name to "standard" season name for directory
41
-
42
- ## todo/fix: move to parse / validate season (for (re)use)!!!! - why? why not?
43
- if season =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2012 or 2011/2012 => 2011-12
44
- years = [$1.to_i, $2.to_i]
45
- elsif season =~ /^(\d{4})[\-\/](\d{2})$/ ## e.g. 2011-12 or 2011/12 => 2011-12
46
- years = [$1.to_i, $1.to_i+1]
47
- ## note: check that season end year is (always) season start year + 1
48
- if ($1.to_i+1) % 100 != $2.to_i
49
- puts "*** !!!! wrong season format >>#{season}<<; season end year MUST (always) equal season start year + 1; exit; sorry"
50
- exit 1
51
- end
52
- elsif season =~ /^(\d{4})$/
53
- years = [$1.to_i]
54
- else
55
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
56
- exit 1
57
- end
58
-
59
-
60
- if ['l', 'long', 'archive' ].include?( format.to_s ) ## note: allow passing in of symbol to e.g. 'long' or :long
61
- if years.size == 2
62
- "%3d0s/%4d-%02d" % [years[0] / 10, years[0], years[1] % 100] ## e.g. 2000s/2001-02
63
- else ## assume size 1 (single year season)
64
- "%3d0s/%4d" % [years[0] / 10, years[0]]
65
- end
66
- else ## default 'short' format / fallback
67
- if years.size == 2
68
- "%4d-%02d" % [years[0], years[1] % 100] ## e.g. 2001-02
69
- else ## assume size 1 (single year season)
70
- "%4d" % years[0]
71
- end
72
- end
73
- end # method directory
74
-
75
-
76
-
77
- def start_year( season ) ## get start year
78
- ## convert season name to "standard" season name for directory
79
-
80
- ## todo/check: just return year from first for chars - keep it simple - why? why not?
81
- if season =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2010 or 2011/2011 => 2011-10
82
- $1
83
- elsif season =~ /^(\d{4})[\-\/](\d{2})$/
84
- $1
85
- elsif season =~ /^(\d{4})$/
86
- $1
87
- else
88
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
89
- exit 1
90
- end
91
- end
92
-
93
- def end_year( season ) ## get end year
94
- ## convert season name to "standard" season name for directory
95
- if season =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2010 or 2011/2011 => 2011-10
96
- $2
97
- elsif season =~ /^(\d{4})[\-\/](\d{2})$/
98
- ## note: assume second year is always +1
99
- ## todo/fix: add assert/check - why? why not?
100
- ## eg. 1999-00 => 2000 or 1899-00 => 1900
101
- ($1.to_i+1).to_s
102
- elsif season =~ /^(\d{4})$/
103
- $1
104
- else
105
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
106
- exit 1
107
- end
108
- end
15
+ ## note: new start_year now returns an integer number (no longer a string)!!!
16
+ def start_year( str ) SportDb::Import::Season.new( str ).start_year; end
17
+ ## note: new end_year now returns an integer number (no longer a string)!!!
18
+ ## if now end_year (year? == true) than returns nil (no longer the start_year "as fallback")!!!
19
+ def end_year( str ) SportDb::Import::Season.new( str ).end_year; end
109
20
  end # module SeasonHelper
110
21
 
111
22
 
@@ -0,0 +1,221 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+ ##
7
+ # note: use our own (internal) club struct for now - why? why not?
8
+ # - check that shape/structure/fields/attributes match
9
+ # the Team struct in sportdb-text (in SportDb::Struct::Team) !!!!
10
+
11
+
12
+ ## more attribs - todo/fix - also add "upstream" to struct & model!!!!!
13
+ # district, geos, year_end, country, etc.
14
+
15
+
16
+
17
+
18
+ class Club
19
+
20
+ def self.create( **kwargs )
21
+ new.update( kwargs )
22
+ end
23
+
24
+ def update( **kwargs )
25
+ @name = kwargs[:name] if kwargs.has_key?( :name )
26
+ @alt_names = kwargs[:alt_names] if kwargs.has_key?( :alt_names )
27
+ @city = kwargs[:city] if kwargs.has_key?( :city )
28
+ ## todo/fix: use city struct - why? why not?
29
+ ## todo/fix: add country too or report unused keywords / attributes - why? why not?
30
+
31
+ self ## note - MUST return self for chaining
32
+ end
33
+
34
+
35
+ ## todo: use just names for alt_names - why? why not?
36
+ attr_accessor :key, :name, :alt_names,
37
+ :code, ## code == abbreviation e.g. ARS etc.
38
+ :year, :year_end, ## todo/fix: change year_end to end_year (like in season)!!!
39
+ :ground
40
+
41
+
42
+ alias_method :title, :name ## add alias/compat - why? why not
43
+
44
+ def names
45
+ ## todo/check: add alt_names_auto too? - why? why not?
46
+ [@name] + @alt_names
47
+ end ## all names
48
+
49
+
50
+ ## special import only attribs
51
+ attr_accessor :alt_names_auto ## auto-generated alt names
52
+ attr_accessor :wikipedia # wikipedia page name (for english (en))
53
+
54
+
55
+ def historic?() @year_end ? true : false; end
56
+ alias_method :past?, :historic?
57
+
58
+
59
+ attr_accessor :a, :b
60
+ def a?() @a == nil; end ## is a (1st) team / club (i)? if a is NOT set
61
+ def b?() @a != nil; end ## is b (2nd/reserve/jr) team / club (ii) if a is set
62
+
63
+ ## note: delegate/forward all geo attributes for team b for now (to team a) - keep - why? why not?
64
+ attr_writer :city, :district, :country, :geos
65
+ def city() @a == nil ? @city : @a.city; end
66
+ def district() @a == nil ? @district : @a.district; end
67
+ def country() @a == nil ? @country : @a.country; end
68
+ def geos() @a == nil ? @geos : @a.geos; end
69
+
70
+
71
+ def wikipedia?() @wikipedia; end
72
+ def wikipedia_url
73
+ if @wikipedia
74
+ ## note: replace spaces with underscore (-)
75
+ ## e.g. Club Brugge KV => Club_Brugge_KV
76
+ ## todo/check/fix:
77
+ ## check if "plain" dash (-) needs to get replaced with typographic dash??
78
+ "https://en.wikipedia.org/wiki/#{@wikipedia.gsub(' ','_')}"
79
+ else
80
+ nil
81
+ end
82
+ end
83
+
84
+
85
+ def initialize
86
+ @alt_names = []
87
+ @alt_names_auto = []
88
+ end
89
+
90
+
91
+ ## helper methods for import only
92
+ ## check for duplicates
93
+ def duplicates?
94
+ names = [name] + alt_names + alt_names_auto
95
+ names = names.map { |name| normalize( sanitize(name) ) }
96
+
97
+ names.size != names.uniq.size
98
+ end
99
+
100
+ def duplicates
101
+ names = [name] + alt_names + alt_names_auto
102
+
103
+ ## calculate (count) frequency and select if greater than one
104
+ names.reduce( Hash.new ) do |h,name|
105
+ norm = normalize( sanitize(name) )
106
+ h[norm] ||= []
107
+ h[norm] << name; h
108
+ end.select { |norm,names| names.size > 1 }
109
+ end
110
+
111
+ def add_variants( name_or_names )
112
+ names = name_or_names.is_a?(Array) ? name_or_names : [name_or_names]
113
+ names.each do |name|
114
+ name = sanitize( name )
115
+ self.alt_names_auto += variants( name )
116
+ end
117
+ end
118
+
119
+
120
+ ###################################
121
+ # "global" helper - move to ___ ? why? why not?
122
+
123
+ ## note: allow placeholder years to e.g. (-___) or (-????)
124
+ ## for marking missing (to be filled in) years
125
+ YEAR_REGEX = /\([0-9, ?_-]+?\)/ # note: non-greedy (minimum/first) match
126
+ def self.strip_year( name )
127
+ ## check for year(s) e.g. (1887-1911), (-2013),
128
+ ## (1946-2001, 2013-) etc.
129
+ name.gsub( YEAR_REGEX, '' ).strip
130
+ end
131
+
132
+ def self.has_year?( name ) name =~ YEAR_REGEX; end
133
+
134
+ LANG_REGEX = /\[[a-z]{1,2}\]/ ## note also allow [a] or [d] or [e] - why? why not?
135
+ def self.strip_lang( name )
136
+ name.gsub( LANG_REGEX, '' ).strip
137
+ end
138
+
139
+ def self.has_lang?( name ) name =~ LANG_REGEX; end
140
+
141
+ def self.sanitize( name )
142
+ ## check for year(s) e.g. (1887-1911), (-2013),
143
+ ## (1946-2001,2013-) etc.
144
+ name = strip_year( name )
145
+ ## check lang codes e.g. [en], [fr], etc.
146
+ name = strip_lang( name )
147
+ name
148
+ end
149
+
150
+
151
+ ## note: also add (),’,− etc. e.g.
152
+ ## Estudiantes (LP) => Estudiantes LP
153
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
154
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
155
+
156
+ NORM_REGEX = %r{
157
+ [.'’º/()−-]
158
+ }x # note: in [] dash (-) if last doesn't need to get escaped
159
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
160
+ # . U+002E (46) - FULL STOP
161
+ # ' U+0027 (39) - APOSTROPHE
162
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
163
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
164
+ # / U+002F (47) - SOLIDUS
165
+ # ( U+0028 (40) - LEFT PARENTHESIS
166
+ # ) U+0029 (41) - RIGHT PARENTHESIS
167
+ # − U+2212 (8722) - MINUS SIGN
168
+ # - U+002D (45) - HYPHEN-MINUS
169
+
170
+ ## for norm(alizing) names
171
+ def self.strip_norm( name )
172
+ name.gsub( NORM_REGEX, '' )
173
+ end
174
+
175
+ def self.normalize( name )
176
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
177
+ name = strip_norm( name )
178
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
179
+
180
+ ## todo/fix: use our own downcase - why? why not?
181
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
182
+ name
183
+ end
184
+
185
+
186
+ def self.strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
187
+ ## note: strip disambiguationn qualifier from wikipedia page name if present
188
+ ## note: only remove year and foot... for now
189
+ ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
190
+ ## Willem II (football club) => Willem II
191
+ ##
192
+ ## e.g. do NOT strip others !! e.g.
193
+ ## América Futebol Clube (MG)
194
+ ## only add more "special" cases on demand (that, is) if we find more
195
+ name = name.gsub( /\([12][^\)]+?\)/, '' ).strip ## starting with a digit 1 or 2 (assuming year)
196
+ name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip ## starting with foot (assuming football ...)
197
+ name
198
+ end
199
+
200
+
201
+ private
202
+ ## private "shortcut" convenience helpers
203
+ def sanitize( name ) self.class.sanitize( name ); end
204
+ def normalize( name ) self.class.normalize( name ); end
205
+
206
+ def variants( name ) Variant.find( name ); end
207
+ end # class Club
208
+
209
+
210
+
211
+ ############
212
+ # convenience
213
+ # Club and Team are for now alias
214
+ # in the future make
215
+ # Club > Team
216
+ # NationalTeam > Team - why? why not?
217
+ Team = Club
218
+
219
+
220
+ end # module Import
221
+ end # module SportDb
@@ -0,0 +1,131 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # note: add all "former" structs to the SportDb::Import module / namespace
5
+
6
+ module SportDb
7
+ module Import
8
+
9
+
10
+ class Match
11
+
12
+ def self.create( **kwargs ) ## keep using create why? why not?
13
+ new.update( kwargs )
14
+ end
15
+
16
+ def update( **kwargs )
17
+ ## note: check with has_key? because value might be nil!!!
18
+ @date = kwargs[:date] if kwargs.has_key? :date
19
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
20
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
21
+ @conf1 = kwargs[:conf1] if kwargs.has_key? :conf1
22
+ @conf2 = kwargs[:conf2] if kwargs.has_key? :conf2
23
+ @country1 = kwargs[:country1] if kwargs.has_key? :country1
24
+ @country2 = kwargs[:country2] if kwargs.has_key? :country2
25
+
26
+ ## note: round is a string!!! e.g. '1', '2' for matchday or 'Final', 'Semi-final', etc.
27
+ ## todo: use to_s - why? why not?
28
+ @round = kwargs[:round] if kwargs.has_key? :round
29
+ @stage = kwargs[:stage] if kwargs.has_key? :stage
30
+ @leg = kwargs[:leg] if kwargs.has_key? :leg
31
+ @group = kwargs[:group] if kwargs.has_key? :group
32
+ @comments = kwargs[:comments] if kwargs.has_key? :comments
33
+
34
+
35
+ @score1 = kwargs[:score1] if kwargs.has_key? :score1
36
+ @score1i = kwargs[:score1i] if kwargs.has_key? :score1i
37
+ @score1et = kwargs[:score1et] if kwargs.has_key? :score1et
38
+ @score1p = kwargs[:score1p] if kwargs.has_key? :score1p
39
+ @score1agg = kwargs[:score1agg] if kwargs.has_key? :score1agg
40
+
41
+ @score2 = kwargs[:score2] if kwargs.has_key? :score2
42
+ @score2i = kwargs[:score2i] if kwargs.has_key? :score2i
43
+ @score2et = kwargs[:score2et] if kwargs.has_key? :score2et
44
+ @score2p = kwargs[:score2p] if kwargs.has_key? :score2p
45
+ @score2agg = kwargs[:score2agg] if kwargs.has_key? :score2agg
46
+
47
+ ## note: (always) (auto-)convert scores to integers
48
+ @score1 = @score1.to_i if @score1
49
+ @score1i = @score1i.to_i if @score1i
50
+ @score1et = @score1et.to_i if @score1et
51
+ @score1p = @score1p.to_i if @score1p
52
+ @score1agg = @score1agg.to_i if @score1agg
53
+
54
+ @score2 = @score2.to_i if @score2
55
+ @score2i = @score2i.to_i if @score2i
56
+ @score2et = @score2et.to_i if @score2et
57
+ @score2p = @score2p.to_i if @score2p
58
+ @score2agg = @score2agg.to_i if @score2agg
59
+
60
+
61
+ ## todo/fix:
62
+ ## gr-greece/2014-15/G1.csv:
63
+ ## G1,10/05/15,Niki Volos,OFI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64
+ ##
65
+
66
+ ## for now score1 and score2 must be present
67
+ if @score1.nil? || @score2.nil?
68
+ puts "*** missing scores for match:"
69
+ pp kwargs
70
+ ## exit 1
71
+ end
72
+
73
+ ## todo/fix: auto-calculate winner
74
+ # return 1,2,0 1 => team1, 2 => team2, 0 => draw/tie
75
+ ### calculate winner - use 1,2,0
76
+ if @score1 && @score2
77
+ if @score1 > @score2
78
+ @winner = 1
79
+ elsif @score2 > @score1
80
+ @winner = 2
81
+ elsif @score1 == @score2
82
+ @winner = 0
83
+ else
84
+ end
85
+ else
86
+ @winner = nil # unknown / undefined
87
+ end
88
+
89
+ self ## note - MUST return self for chaining
90
+ end
91
+
92
+
93
+ attr_reader :date,
94
+ :team1, :team2, ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
95
+ :score1, :score2, ## full time
96
+ :score1i, :score2i, ## half time (first (i) part)
97
+ :score1et, :score2et, ## extra time
98
+ :score1p, :score2p, ## penalty
99
+ :score1agg, :score2agg, ## full time (all legs) aggregated
100
+ :winner, # return 1,2,0 1 => team1, 2 => team2, 0 => draw/tie
101
+ :round, ## todo/fix: use round_num or similar - for compat with db activerecord version? why? why not?
102
+ :leg, ## e.g. '1','2','3','replay', etc. - use leg for marking **replay** too - keep/make leg numeric?! - why? why not?
103
+ :stage,
104
+ :group,
105
+ :conf1, :conf2, ## special case for mls e.g. conference1, conference2 (e.g. west, east, central)
106
+ :country1, :country2, ## special case for champions league etc. - uses FIFA country code
107
+ :comments
108
+
109
+
110
+
111
+ def initialize( **kwargs )
112
+ update( kwargs ) unless kwargs.empty?
113
+ end
114
+
115
+
116
+ def over?() true; end ## for now all matches are over - in the future check date!!!
117
+ def complete?() true; end ## for now all scores are complete - in the future check scores; might be missing - not yet entered
118
+
119
+
120
+ def score_str # pretty print (full time) scores; convenience method
121
+ "#{@score1}-#{@score2}"
122
+ end
123
+
124
+ def scorei_str # pretty print (half time) scores; convenience method
125
+ "#{@score1i}-#{@score2i}"
126
+ end
127
+
128
+ end # class Match
129
+ end # module Import
130
+
131
+ end # module SportDb