sportdb-formats 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: '08b90c09533628f176c7c08778634c557898b5ca'
4
- data.tar.gz: 434a9655f2d11614016322bb7829b975866aa9ca
3
+ metadata.gz: 4ebc60eb9651185cf5aa0ba1c7707fc8678f7803
4
+ data.tar.gz: 67d24f30f4a8586f54ea73135fd58239420e1e22
5
5
  SHA512:
6
- metadata.gz: 6dc601d34ae8d5bb66b181cb09228d40792c2af2a360b82203f711fdfae309353bf1d3f01b79f5c0d4d41e0a64b0d69709193b553065950364a7d4e7bf5a72d3
7
- data.tar.gz: a583d31ff5fd7d6201c7f85499b448e0f73111a71d679511ca13b82d798ba3a12a40a57be42a821760a3dbafeafe9ab16a8c48b7d46c7f582df27c4f43de175c
6
+ metadata.gz: 6c2fdcdf34f4ea95ee765d1b1cfedd9aef84787836fb7fb2c21c10d4000e3b8417109f5ec9ce4d5ae552a0a63d5e91a9c976847fd33b175cccad5aa4d5ea4762
7
+ data.tar.gz: 4ef8c4ba7538c738e2c44b6f16aafbdc11d640485ecf99a603a764c069b038e290ea320918ec16ecd3405f321e6b4bba5f8b350c6835c5f3a70808954da93350
data/Manifest.txt CHANGED
@@ -9,13 +9,22 @@ lib/sportdb/formats/outline_reader.rb
9
9
  lib/sportdb/formats/package.rb
10
10
  lib/sportdb/formats/scores.rb
11
11
  lib/sportdb/formats/season_utils.rb
12
+ lib/sportdb/formats/structs/club.rb
13
+ lib/sportdb/formats/structs/match.rb
14
+ lib/sportdb/formats/structs/matchlist.rb
15
+ lib/sportdb/formats/structs/season.rb
16
+ lib/sportdb/formats/structs/standings.rb
17
+ lib/sportdb/formats/structs/team_usage.rb
12
18
  lib/sportdb/formats/version.rb
13
19
  test/helper.rb
20
+ test/test_club_helpers.rb
21
+ test/test_clubs.rb
14
22
  test/test_csv_reader.rb
15
23
  test/test_datafile.rb
16
24
  test/test_datafile_match.rb
17
25
  test/test_goals.rb
26
+ test/test_match.rb
18
27
  test/test_outline_reader.rb
19
28
  test/test_package.rb
20
29
  test/test_scores.rb
21
- test/test_season_utils.rb
30
+ test/test_season.rb
data/Rakefile CHANGED
@@ -20,7 +20,7 @@ Hoe.spec 'sportdb-formats' do
20
20
  self.licenses = ['Public Domain']
21
21
 
22
22
  self.extra_deps = [
23
- ['alphabets', '>= 0.1.0'],
23
+ ['alphabets', '>= 0.1.3'],
24
24
  ['date-formats', '>= 0.2.4'],
25
25
  ['csvreader', '>= 1.2.4'],
26
26
  ['sportdb-langs', '>= 0.0.1'],
@@ -2,110 +2,21 @@
2
2
 
3
3
 
4
4
  module SeasonHelper ## use Helpers why? why not?
5
- def prev( season )
6
- ## todo: add 1964-1965 format too!!!
7
- if season =~ /^(\d{4})-(\d{2})$/ ## season format is 1964-65
8
- fst = $1.to_i - 1
9
- snd = (100 + $2.to_i - 1) % 100 ## note: add 100 to turn 00 => 99
10
- "%4d-%02d" % [fst,snd]
11
- elsif season =~ /^(\d{4})$/
12
- fst = $1.to_i - 1
13
- "%4d" % [fst]
14
- else
15
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
16
- exit 1
17
- end
18
- end # method prev
19
5
 
6
+ ##############################################
7
+ ### deprecated!!! use new Season class!!!
8
+ ## this code will get removed!!!!
9
+ ###################################################
20
10
 
21
- def key( basename )
22
- if basename =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2012 or 2011/2012 => 2011/12
23
- "%4d/%02d" % [$1.to_i, $2.to_i % 100]
24
- elsif basename =~ /^(\d{4})[\-\/](\d{2})$/ ## e.g. 2011-12 or 2011/12 => 2011/12
25
- "#{$1}/#{$2}"
26
- elsif basename =~ /^(\d{4})$/
27
- $1
28
- else
29
- puts "*** !!!! wrong season format >>#{basename}<<; exit; sorry"
30
- exit 1
31
- end
32
- end # method key
11
+ def prev( str ) SportDb::Import::Season.new( str ).prev; end
12
+ def key( str ) SportDb::Import::Season.new( str ).key; end
13
+ def directory( str, format: nil ) SportDb::Import::Season.new( str ).directory( format: format ); end
33
14
 
34
-
35
- def directory( season, format: nil )
36
- ## todo: find better names for formats - why? why not?:
37
- ## long | archive | decade(?) => 1980s/1988-89, 2010s/2017-18, ...
38
- ## short | std(?) => 1988-89, 2017-18, ...
39
-
40
- ## convert season name to "standard" season name for directory
41
-
42
- ## todo/fix: move to parse / validate season (for (re)use)!!!! - why? why not?
43
- if season =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2012 or 2011/2012 => 2011-12
44
- years = [$1.to_i, $2.to_i]
45
- elsif season =~ /^(\d{4})[\-\/](\d{2})$/ ## e.g. 2011-12 or 2011/12 => 2011-12
46
- years = [$1.to_i, $1.to_i+1]
47
- ## note: check that season end year is (always) season start year + 1
48
- if ($1.to_i+1) % 100 != $2.to_i
49
- puts "*** !!!! wrong season format >>#{season}<<; season end year MUST (always) equal season start year + 1; exit; sorry"
50
- exit 1
51
- end
52
- elsif season =~ /^(\d{4})$/
53
- years = [$1.to_i]
54
- else
55
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
56
- exit 1
57
- end
58
-
59
-
60
- if ['l', 'long', 'archive' ].include?( format.to_s ) ## note: allow passing in of symbol to e.g. 'long' or :long
61
- if years.size == 2
62
- "%3d0s/%4d-%02d" % [years[0] / 10, years[0], years[1] % 100] ## e.g. 2000s/2001-02
63
- else ## assume size 1 (single year season)
64
- "%3d0s/%4d" % [years[0] / 10, years[0]]
65
- end
66
- else ## default 'short' format / fallback
67
- if years.size == 2
68
- "%4d-%02d" % [years[0], years[1] % 100] ## e.g. 2001-02
69
- else ## assume size 1 (single year season)
70
- "%4d" % years[0]
71
- end
72
- end
73
- end # method directory
74
-
75
-
76
-
77
- def start_year( season ) ## get start year
78
- ## convert season name to "standard" season name for directory
79
-
80
- ## todo/check: just return year from first for chars - keep it simple - why? why not?
81
- if season =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2010 or 2011/2011 => 2011-10
82
- $1
83
- elsif season =~ /^(\d{4})[\-\/](\d{2})$/
84
- $1
85
- elsif season =~ /^(\d{4})$/
86
- $1
87
- else
88
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
89
- exit 1
90
- end
91
- end
92
-
93
- def end_year( season ) ## get end year
94
- ## convert season name to "standard" season name for directory
95
- if season =~ /^(\d{4})[\-\/](\d{4})$/ ## e.g. 2011-2010 or 2011/2011 => 2011-10
96
- $2
97
- elsif season =~ /^(\d{4})[\-\/](\d{2})$/
98
- ## note: assume second year is always +1
99
- ## todo/fix: add assert/check - why? why not?
100
- ## eg. 1999-00 => 2000 or 1899-00 => 1900
101
- ($1.to_i+1).to_s
102
- elsif season =~ /^(\d{4})$/
103
- $1
104
- else
105
- puts "*** !!!! wrong season format >>#{season}<<; exit; sorry"
106
- exit 1
107
- end
108
- end
15
+ ## note: new start_year now returns an integer number (no longer a string)!!!
16
+ def start_year( str ) SportDb::Import::Season.new( str ).start_year; end
17
+ ## note: new end_year now returns an integer number (no longer a string)!!!
18
+ ## if now end_year (year? == true) than returns nil (no longer the start_year "as fallback")!!!
19
+ def end_year( str ) SportDb::Import::Season.new( str ).end_year; end
109
20
  end # module SeasonHelper
110
21
 
111
22
 
@@ -0,0 +1,221 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+ ##
7
+ # note: use our own (internal) club struct for now - why? why not?
8
+ # - check that shape/structure/fields/attributes match
9
+ # the Team struct in sportdb-text (in SportDb::Struct::Team) !!!!
10
+
11
+
12
+ ## more attribs - todo/fix - also add "upstream" to struct & model!!!!!
13
+ # district, geos, year_end, country, etc.
14
+
15
+
16
+
17
+
18
+ class Club
19
+
20
+ def self.create( **kwargs )
21
+ new.update( kwargs )
22
+ end
23
+
24
+ def update( **kwargs )
25
+ @name = kwargs[:name] if kwargs.has_key?( :name )
26
+ @alt_names = kwargs[:alt_names] if kwargs.has_key?( :alt_names )
27
+ @city = kwargs[:city] if kwargs.has_key?( :city )
28
+ ## todo/fix: use city struct - why? why not?
29
+ ## todo/fix: add country too or report unused keywords / attributes - why? why not?
30
+
31
+ self ## note - MUST return self for chaining
32
+ end
33
+
34
+
35
+ ## todo: use just names for alt_names - why? why not?
36
+ attr_accessor :key, :name, :alt_names,
37
+ :code, ## code == abbreviation e.g. ARS etc.
38
+ :year, :year_end, ## todo/fix: change year_end to end_year (like in season)!!!
39
+ :ground
40
+
41
+
42
+ alias_method :title, :name ## add alias/compat - why? why not
43
+
44
+ def names
45
+ ## todo/check: add alt_names_auto too? - why? why not?
46
+ [@name] + @alt_names
47
+ end ## all names
48
+
49
+
50
+ ## special import only attribs
51
+ attr_accessor :alt_names_auto ## auto-generated alt names
52
+ attr_accessor :wikipedia # wikipedia page name (for english (en))
53
+
54
+
55
+ def historic?() @year_end ? true : false; end
56
+ alias_method :past?, :historic?
57
+
58
+
59
+ attr_accessor :a, :b
60
+ def a?() @a == nil; end ## is a (1st) team / club (i)? if a is NOT set
61
+ def b?() @a != nil; end ## is b (2nd/reserve/jr) team / club (ii) if a is set
62
+
63
+ ## note: delegate/forward all geo attributes for team b for now (to team a) - keep - why? why not?
64
+ attr_writer :city, :district, :country, :geos
65
+ def city() @a == nil ? @city : @a.city; end
66
+ def district() @a == nil ? @district : @a.district; end
67
+ def country() @a == nil ? @country : @a.country; end
68
+ def geos() @a == nil ? @geos : @a.geos; end
69
+
70
+
71
+ def wikipedia?() @wikipedia; end
72
+ def wikipedia_url
73
+ if @wikipedia
74
+ ## note: replace spaces with underscore (-)
75
+ ## e.g. Club Brugge KV => Club_Brugge_KV
76
+ ## todo/check/fix:
77
+ ## check if "plain" dash (-) needs to get replaced with typographic dash??
78
+ "https://en.wikipedia.org/wiki/#{@wikipedia.gsub(' ','_')}"
79
+ else
80
+ nil
81
+ end
82
+ end
83
+
84
+
85
+ def initialize
86
+ @alt_names = []
87
+ @alt_names_auto = []
88
+ end
89
+
90
+
91
+ ## helper methods for import only
92
+ ## check for duplicates
93
+ def duplicates?
94
+ names = [name] + alt_names + alt_names_auto
95
+ names = names.map { |name| normalize( sanitize(name) ) }
96
+
97
+ names.size != names.uniq.size
98
+ end
99
+
100
+ def duplicates
101
+ names = [name] + alt_names + alt_names_auto
102
+
103
+ ## calculate (count) frequency and select if greater than one
104
+ names.reduce( Hash.new ) do |h,name|
105
+ norm = normalize( sanitize(name) )
106
+ h[norm] ||= []
107
+ h[norm] << name; h
108
+ end.select { |norm,names| names.size > 1 }
109
+ end
110
+
111
+ def add_variants( name_or_names )
112
+ names = name_or_names.is_a?(Array) ? name_or_names : [name_or_names]
113
+ names.each do |name|
114
+ name = sanitize( name )
115
+ self.alt_names_auto += variants( name )
116
+ end
117
+ end
118
+
119
+
120
+ ###################################
121
+ # "global" helper - move to ___ ? why? why not?
122
+
123
+ ## note: allow placeholder years to e.g. (-___) or (-????)
124
+ ## for marking missing (to be filled in) years
125
+ YEAR_REGEX = /\([0-9, ?_-]+?\)/ # note: non-greedy (minimum/first) match
126
+ def self.strip_year( name )
127
+ ## check for year(s) e.g. (1887-1911), (-2013),
128
+ ## (1946-2001, 2013-) etc.
129
+ name.gsub( YEAR_REGEX, '' ).strip
130
+ end
131
+
132
+ def self.has_year?( name ) name =~ YEAR_REGEX; end
133
+
134
+ LANG_REGEX = /\[[a-z]{1,2}\]/ ## note also allow [a] or [d] or [e] - why? why not?
135
+ def self.strip_lang( name )
136
+ name.gsub( LANG_REGEX, '' ).strip
137
+ end
138
+
139
+ def self.has_lang?( name ) name =~ LANG_REGEX; end
140
+
141
+ def self.sanitize( name )
142
+ ## check for year(s) e.g. (1887-1911), (-2013),
143
+ ## (1946-2001,2013-) etc.
144
+ name = strip_year( name )
145
+ ## check lang codes e.g. [en], [fr], etc.
146
+ name = strip_lang( name )
147
+ name
148
+ end
149
+
150
+
151
+ ## note: also add (),’,− etc. e.g.
152
+ ## Estudiantes (LP) => Estudiantes LP
153
+ ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
154
+ ## Myllykosken Pallo −47 => Myllykosken Pallo 47
155
+
156
+ NORM_REGEX = %r{
157
+ [.'’º/()−-]
158
+ }x # note: in [] dash (-) if last doesn't need to get escaped
159
+ ## note: remove all dots (.), dash (-), ', º, /, etc.
160
+ # . U+002E (46) - FULL STOP
161
+ # ' U+0027 (39) - APOSTROPHE
162
+ # ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
163
+ # º U+00BA (186) - MASCULINE ORDINAL INDICATOR
164
+ # / U+002F (47) - SOLIDUS
165
+ # ( U+0028 (40) - LEFT PARENTHESIS
166
+ # ) U+0029 (41) - RIGHT PARENTHESIS
167
+ # − U+2212 (8722) - MINUS SIGN
168
+ # - U+002D (45) - HYPHEN-MINUS
169
+
170
+ ## for norm(alizing) names
171
+ def self.strip_norm( name )
172
+ name.gsub( NORM_REGEX, '' )
173
+ end
174
+
175
+ def self.normalize( name )
176
+ # note: do NOT call sanitize here (keep normalize "atomic" for reuse)
177
+ name = strip_norm( name )
178
+ name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
179
+
180
+ ## todo/fix: use our own downcase - why? why not?
181
+ name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
182
+ name
183
+ end
184
+
185
+
186
+ def self.strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
187
+ ## note: strip disambiguationn qualifier from wikipedia page name if present
188
+ ## note: only remove year and foot... for now
189
+ ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
190
+ ## Willem II (football club) => Willem II
191
+ ##
192
+ ## e.g. do NOT strip others !! e.g.
193
+ ## América Futebol Clube (MG)
194
+ ## only add more "special" cases on demand (that, is) if we find more
195
+ name = name.gsub( /\([12][^\)]+?\)/, '' ).strip ## starting with a digit 1 or 2 (assuming year)
196
+ name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip ## starting with foot (assuming football ...)
197
+ name
198
+ end
199
+
200
+
201
+ private
202
+ ## private "shortcut" convenience helpers
203
+ def sanitize( name ) self.class.sanitize( name ); end
204
+ def normalize( name ) self.class.normalize( name ); end
205
+
206
+ def variants( name ) Variant.find( name ); end
207
+ end # class Club
208
+
209
+
210
+
211
+ ############
212
+ # convenience
213
+ # Club and Team are for now alias
214
+ # in the future make
215
+ # Club > Team
216
+ # NationalTeam > Team - why? why not?
217
+ Team = Club
218
+
219
+
220
+ end # module Import
221
+ end # module SportDb
@@ -0,0 +1,131 @@
1
+ # encoding: utf-8
2
+
3
+ ##
4
+ # note: add all "former" structs to the SportDb::Import module / namespace
5
+
6
+ module SportDb
7
+ module Import
8
+
9
+
10
+ class Match
11
+
12
+ def self.create( **kwargs ) ## keep using create why? why not?
13
+ new.update( kwargs )
14
+ end
15
+
16
+ def update( **kwargs )
17
+ ## note: check with has_key? because value might be nil!!!
18
+ @date = kwargs[:date] if kwargs.has_key? :date
19
+ @team1 = kwargs[:team1] if kwargs.has_key? :team1
20
+ @team2 = kwargs[:team2] if kwargs.has_key? :team2
21
+ @conf1 = kwargs[:conf1] if kwargs.has_key? :conf1
22
+ @conf2 = kwargs[:conf2] if kwargs.has_key? :conf2
23
+ @country1 = kwargs[:country1] if kwargs.has_key? :country1
24
+ @country2 = kwargs[:country2] if kwargs.has_key? :country2
25
+
26
+ ## note: round is a string!!! e.g. '1', '2' for matchday or 'Final', 'Semi-final', etc.
27
+ ## todo: use to_s - why? why not?
28
+ @round = kwargs[:round] if kwargs.has_key? :round
29
+ @stage = kwargs[:stage] if kwargs.has_key? :stage
30
+ @leg = kwargs[:leg] if kwargs.has_key? :leg
31
+ @group = kwargs[:group] if kwargs.has_key? :group
32
+ @comments = kwargs[:comments] if kwargs.has_key? :comments
33
+
34
+
35
+ @score1 = kwargs[:score1] if kwargs.has_key? :score1
36
+ @score1i = kwargs[:score1i] if kwargs.has_key? :score1i
37
+ @score1et = kwargs[:score1et] if kwargs.has_key? :score1et
38
+ @score1p = kwargs[:score1p] if kwargs.has_key? :score1p
39
+ @score1agg = kwargs[:score1agg] if kwargs.has_key? :score1agg
40
+
41
+ @score2 = kwargs[:score2] if kwargs.has_key? :score2
42
+ @score2i = kwargs[:score2i] if kwargs.has_key? :score2i
43
+ @score2et = kwargs[:score2et] if kwargs.has_key? :score2et
44
+ @score2p = kwargs[:score2p] if kwargs.has_key? :score2p
45
+ @score2agg = kwargs[:score2agg] if kwargs.has_key? :score2agg
46
+
47
+ ## note: (always) (auto-)convert scores to integers
48
+ @score1 = @score1.to_i if @score1
49
+ @score1i = @score1i.to_i if @score1i
50
+ @score1et = @score1et.to_i if @score1et
51
+ @score1p = @score1p.to_i if @score1p
52
+ @score1agg = @score1agg.to_i if @score1agg
53
+
54
+ @score2 = @score2.to_i if @score2
55
+ @score2i = @score2i.to_i if @score2i
56
+ @score2et = @score2et.to_i if @score2et
57
+ @score2p = @score2p.to_i if @score2p
58
+ @score2agg = @score2agg.to_i if @score2agg
59
+
60
+
61
+ ## todo/fix:
62
+ ## gr-greece/2014-15/G1.csv:
63
+ ## G1,10/05/15,Niki Volos,OFI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64
+ ##
65
+
66
+ ## for now score1 and score2 must be present
67
+ if @score1.nil? || @score2.nil?
68
+ puts "*** missing scores for match:"
69
+ pp kwargs
70
+ ## exit 1
71
+ end
72
+
73
+ ## todo/fix: auto-calculate winner
74
+ # return 1,2,0 1 => team1, 2 => team2, 0 => draw/tie
75
+ ### calculate winner - use 1,2,0
76
+ if @score1 && @score2
77
+ if @score1 > @score2
78
+ @winner = 1
79
+ elsif @score2 > @score1
80
+ @winner = 2
81
+ elsif @score1 == @score2
82
+ @winner = 0
83
+ else
84
+ end
85
+ else
86
+ @winner = nil # unknown / undefined
87
+ end
88
+
89
+ self ## note - MUST return self for chaining
90
+ end
91
+
92
+
93
+ attr_reader :date,
94
+ :team1, :team2, ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
95
+ :score1, :score2, ## full time
96
+ :score1i, :score2i, ## half time (first (i) part)
97
+ :score1et, :score2et, ## extra time
98
+ :score1p, :score2p, ## penalty
99
+ :score1agg, :score2agg, ## full time (all legs) aggregated
100
+ :winner, # return 1,2,0 1 => team1, 2 => team2, 0 => draw/tie
101
+ :round, ## todo/fix: use round_num or similar - for compat with db activerecord version? why? why not?
102
+ :leg, ## e.g. '1','2','3','replay', etc. - use leg for marking **replay** too - keep/make leg numeric?! - why? why not?
103
+ :stage,
104
+ :group,
105
+ :conf1, :conf2, ## special case for mls e.g. conference1, conference2 (e.g. west, east, central)
106
+ :country1, :country2, ## special case for champions league etc. - uses FIFA country code
107
+ :comments
108
+
109
+
110
+
111
+ def initialize( **kwargs )
112
+ update( kwargs ) unless kwargs.empty?
113
+ end
114
+
115
+
116
+ def over?() true; end ## for now all matches are over - in the future check date!!!
117
+ def complete?() true; end ## for now all scores are complete - in the future check scores; might be missing - not yet entered
118
+
119
+
120
+ def score_str # pretty print (full time) scores; convenience method
121
+ "#{@score1}-#{@score2}"
122
+ end
123
+
124
+ def scorei_str # pretty print (half time) scores; convenience method
125
+ "#{@score1i}-#{@score2i}"
126
+ end
127
+
128
+ end # class Match
129
+ end # module Import
130
+
131
+ end # module SportDb