sportdb-formats 1.1.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -13
  3. data/Rakefile +1 -1
  4. data/lib/sportdb/formats.rb +5 -0
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +9 -11
  7. data/lib/sportdb/formats/league/league_index.rb +22 -18
  8. data/lib/sportdb/formats/league/league_outline_reader.rb +4 -1
  9. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  10. data/lib/sportdb/formats/match/match_parser.rb +27 -15
  11. data/lib/sportdb/formats/match/match_parser_csv.rb +148 -21
  12. data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
  13. data/lib/sportdb/formats/name_helper.rb +4 -1
  14. data/lib/sportdb/formats/package.rb +30 -8
  15. data/lib/sportdb/formats/score/score_formats.rb +19 -0
  16. data/lib/sportdb/formats/score/score_parser.rb +4 -2
  17. data/lib/sportdb/formats/structs/match.rb +2 -0
  18. data/lib/sportdb/formats/structs/team.rb +7 -0
  19. data/lib/sportdb/formats/team/club_index.rb +13 -11
  20. data/lib/sportdb/formats/team/club_index_history.rb +138 -0
  21. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  22. data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
  23. data/lib/sportdb/formats/version.rb +1 -1
  24. data/test/helper.rb +47 -81
  25. data/test/test_club_index_history.rb +107 -0
  26. data/test/test_club_reader_history.rb +212 -0
  27. data/test/test_datafile_package.rb +1 -1
  28. data/test/test_match_status_parser.rb +49 -0
  29. data/test/test_scores.rb +2 -0
  30. metadata +10 -17
  31. data/test/test_conf.rb +0 -65
  32. data/test/test_csv_match_parser.rb +0 -114
  33. data/test/test_csv_match_parser_utils.rb +0 -20
  34. data/test/test_match_auto.rb +0 -72
  35. data/test/test_match_auto_champs.rb +0 -45
  36. data/test/test_match_auto_euro.rb +0 -37
  37. data/test/test_match_auto_relegation.rb +0 -41
  38. data/test/test_match_auto_worldcup.rb +0 -61
  39. data/test/test_match_champs.rb +0 -27
  40. data/test/test_match_eng.rb +0 -26
  41. data/test/test_match_euro.rb +0 -27
  42. data/test/test_match_start_date.rb +0 -44
  43. data/test/test_match_worldcup.rb +0 -27
@@ -0,0 +1,86 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+ class Status
13
+ # note: use a class as an "enum"-like namespace for now - why? why not?
14
+ # move class into Match e.g. Match::Status - why? why not?
15
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
16
+ AWARDED = 'AWARDED'
17
+ POSTPONED = 'POSTPONED'
18
+ ABANDONED = 'ABANDONED'
19
+ REPLAY = 'REPLAY'
20
+ end # class Status
21
+
22
+
23
+
24
+ class StatusParser
25
+
26
+ def self.parse( str )
27
+ ## note: returns nil if no match found
28
+ ## note: english usage - cancelled (in UK), canceled (in US)
29
+ if str =~ /^(cancelled|
30
+ canceled|
31
+ can\.
32
+ )/xi
33
+ Status::CANCELLED
34
+ elsif str =~ /^(awarded|
35
+ awd\.
36
+ )/xi
37
+ Status::AWARDED
38
+ elsif str =~ /^(postponed
39
+ )/xi
40
+ Status::POSTPONED
41
+ elsif str =~ /^(abandoned|
42
+ abd\.
43
+ )/xi
44
+ Status::ABANDONED
45
+ elsif str =~ /^(replay
46
+ )/xi
47
+ Status::REPLAY
48
+ else
49
+ # no match
50
+ nil
51
+ end
52
+ end
53
+
54
+
55
+ RUN_RE = /\[
56
+ (?<text>[^\]]+)
57
+ \]
58
+ /x
59
+ def self.find!( line )
60
+ ## for now check all "protected" text run blocks e.g. []
61
+ ## puts "line: >#{line}<"
62
+
63
+ status = nil
64
+
65
+ str = line
66
+ while m = str.match( RUN_RE )
67
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
68
+
69
+ ## check for status match
70
+ match_str = m[0] ## keep a copy of the match string (for later sub)
71
+ text = m[:text].strip
72
+ ## puts " text: >#{text}<"
73
+
74
+ status = parse( text )
75
+
76
+ if status
77
+ line.sub!( match_str, "[STATUS.#{status}]" )
78
+ break
79
+ end
80
+ end # while match
81
+
82
+ status
83
+ end # method find!
84
+ end # class StatusParser
85
+
86
+ end # module SportDb
@@ -46,9 +46,12 @@ module SportDb
46
46
  ## Estudiantes (LP) => Estudiantes LP
47
47
  ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
48
  ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+ ##
50
+ ## add & too!!
51
+ ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
49
52
 
50
53
  NORM_RE = %r{
51
- [.'’º/()_−-]
54
+ [.'’º/()&_−-]
52
55
  }x # note: in [] dash (-) if last doesn't need to get escaped
53
56
  ## note: remove all dots (.), dash (-), ', º, /, etc.
54
57
  # . U+002E (46) - FULL STOP
@@ -45,12 +45,22 @@ module SportDb
45
45
  \.wiki\.txt$
46
46
  }x
47
47
 
48
- CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
48
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
49
50
  (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
50
51
  clubs
51
52
  (?:_[a-z0-9_-]+)?
52
53
  \.props\.txt$
53
54
  }x
55
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
+
57
+
58
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.history\.txt$
63
+ }x
54
64
 
55
65
  ## teams.txt or teams_history.txt
56
66
  TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
@@ -112,12 +122,14 @@ module SportDb
112
122
  def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
113
123
  def self.match_teams( path ) TEAMS_RE.match( path ); end
114
124
 
115
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
116
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
125
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
117
128
 
118
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
119
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
120
- def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
129
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
121
133
 
122
134
  def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
123
135
  def self.match_leagues( path ) LEAGUES_RE.match( path ); end
@@ -149,8 +161,14 @@ module SportDb
149
161
  alias_method :match_clubs_wiki?, :match_clubs_wiki
150
162
  alias_method :clubs_wiki?, :match_clubs_wiki
151
163
 
152
- alias_method :match_club_props?, :match_club_props
153
- alias_method :club_props?, :match_club_props
164
+ alias_method :match_clubs_history?, :match_clubs_history
165
+ alias_method :clubs_history?, :match_clubs_history
166
+
167
+ alias_method :match_club_props, :match_clubs_props
168
+ alias_method :match_club_props?, :match_clubs_props
169
+ alias_method :club_props?, :match_clubs_props
170
+ alias_method :match_clubs_props?, :match_clubs_props
171
+ alias_method :clubs_props?, :match_clubs_props
154
172
 
155
173
  alias_method :match_leagues?, :match_leagues
156
174
  alias_method :leagues?, :match_leagues
@@ -243,6 +261,10 @@ module SportDb
243
261
  def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
244
262
  def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
245
263
  def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
+
266
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
+
246
268
 
247
269
  ## return all match datafile entries
248
270
  def match( format: 'txt' )
@@ -9,6 +9,24 @@ module ScoreFormats
9
9
  ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
10
10
 
11
11
 
12
+ ## note: allow SPECIAL cases WITHOUT full time scores (just a.e.t or pen. + a.e.t.)
13
+ ## 3-4 pen. 2-2 a.e.t.
14
+ ## 2-2 a.e.t.
15
+ EN__P_ET__RE = /\b
16
+ (?:
17
+ (?<score1p>\d{1,2})
18
+ [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
19
+ (?<score2p>\d{1,2})
20
+ [ ]* #{P_EN} [ ]*
21
+ )? # note: make penalty (P) score optional for now
22
+ (?<score1et>\d{1,2})
23
+ [ ]* - [ ]*
24
+ (?<score2et>\d{1,2})
25
+ [ ]* #{ET_EN}
26
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
27
+ ## note: \b works only after non-alphanum e.g. )
28
+
29
+
12
30
  ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
13
31
  ## 3-4 pen. 2-2 a.e.t. (1-1, ) or
14
32
  ## 3-4 pen. 2-2 a.e.t. (1-1) or
@@ -203,6 +221,7 @@ module ScoreFormats
203
221
  FORMATS_EN = [
204
222
  [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
205
223
  [ EN__P_FT_HT__RE, '[SCORE.EN__P_(FT_HT?)]' ], # e.g. 5-1 pen. (1-1)
224
+ [ EN__P_ET__RE, '[SCORE.EN__P?_ET]' ], # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
206
225
  [ EN__FT_HT__RE, '[SCORE.EN__FT_(HT)?]' ], # e.g. 1-1 (1-0)
207
226
  ]
208
227
 
@@ -175,8 +175,10 @@ private
175
175
  score2i = h[:score2i].to_i
176
176
  end
177
177
 
178
- score1 = h[:score1].to_i
179
- score2 = h[:score2].to_i
178
+ if h[:score1] && h[:score2] ## note: full time (FT) score can be optional too!!!
179
+ score1 = h[:score1].to_i
180
+ score2 = h[:score2].to_i
181
+ end
180
182
 
181
183
  if h[:score1et] && h[:score2et]
182
184
  score1et = h[:score1et].to_i
@@ -18,6 +18,7 @@ class Match
18
18
  :leg, ## e.g. '1','2','3','replay', etc. - use leg for marking **replay** too - keep/make leg numeric?! - why? why not?
19
19
  :stage,
20
20
  :group,
21
+ :status, ## e.g. replay, cancelled, awarded, abadoned, postponed, etc.
21
22
  :conf1, :conf2, ## special case for mls e.g. conference1, conference2 (e.g. west, east, central)
22
23
  :country1, :country2, ## special case for champions league etc. - uses FIFA country code
23
24
  :comments,
@@ -46,6 +47,7 @@ class Match
46
47
  @stage = kwargs[:stage] if kwargs.has_key? :stage
47
48
  @leg = kwargs[:leg] if kwargs.has_key? :leg
48
49
  @group = kwargs[:group] if kwargs.has_key? :group
50
+ @status = kwargs[:status] if kwargs.has_key? :status
49
51
  @comments = kwargs[:comments] if kwargs.has_key? :comments
50
52
 
51
53
  @league = kwargs[:league] if kwargs.has_key? :league
@@ -65,6 +65,13 @@ class Team
65
65
  end
66
66
 
67
67
 
68
+ ## add convenience lookup helper / method for name by season for now
69
+ ## use clubs history - for now kept separate from struct - why? why not?
70
+ def name_by_season( season )
71
+ ## note: returns / fallback to "regular" name if no records found in history
72
+ SportDb::Import.catalog.clubs_history.find_name_by( name: name, season: season ) || name
73
+ end
74
+
68
75
  ## helper methods for import only
69
76
  ## check for duplicates
70
77
  include NameHelper
@@ -54,6 +54,7 @@ class ClubIndex
54
54
  ## normalize( name )
55
55
 
56
56
  def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
57
+ ## change/rename to strip_wiki_qualifier or such - why? why not?
57
58
  ## note: strip disambiguationn qualifier from wikipedia page name if present
58
59
  ## note: only remove year and foot... for now
59
60
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
@@ -178,22 +179,24 @@ class ClubIndex
178
179
  ## todo/fix/check: use rename to find_canon or find_canonical() or something??
179
180
  ## remove (getting used?) - why? why not?
180
181
  def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
182
+ puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
181
183
  @clubs[ name ]
182
184
  end
183
185
 
184
186
 
185
- ## todo/fix/check: return empty array if no match!!!
186
- ## and NOT nil (add || []) - why? why not?
187
187
  def match( name )
188
+ # note: returns empty array (e.g. []) if no match and NOT nil
188
189
  name = normalize( name )
189
- m = @clubs_by_name[ name ]
190
+ m = @clubs_by_name[ name ] || []
190
191
 
191
192
  ## no match - retry with unaccented variant if different
192
193
  ## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
193
194
  ## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
194
- if m.nil?
195
+ if m.empty?
195
196
  name2 = unaccent( name )
196
- m = @clubs_by_name[ name2 ] if name2 != name
197
+ if name2 != name
198
+ m = @clubs_by_name[ name2 ] || []
199
+ end
197
200
  end
198
201
  m
199
202
  end
@@ -227,10 +230,8 @@ class ClubIndex
227
230
  country = country( country )
228
231
 
229
232
  ## note: match must for now always include name
230
- if m ## filter by country
231
- m = m.select { |club| club.country.key == country.key }
232
- m = nil if m.empty? ## note: reset to nil if no more matches
233
- end
233
+ ## filter by country
234
+ m = m.select { |club| club.country.key == country.key }
234
235
  end
235
236
  m
236
237
  end
@@ -263,7 +264,7 @@ class ClubIndex
263
264
 
264
265
  m = match_by( name: name, country: country )
265
266
 
266
- if m.nil?
267
+ if m.empty?
267
268
  ## (re)try with second country - quick hacks for known leagues
268
269
  ## todo/fix: add league flag to activate!!! - why? why not
269
270
  m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
@@ -272,6 +273,7 @@ class ClubIndex
272
273
  m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
273
274
  m = match_by( name: name, country: 'li' ) if country.key == 'ch'
274
275
  m = match_by( name: name, country: 'ca' ) if country.key == 'us'
276
+ m = match_by( name: name, country: 'nz' ) if country.key == 'au'
275
277
  end
276
278
  else ## try "global" search - no country passed in
277
279
  m = match( name )
@@ -279,7 +281,7 @@ class ClubIndex
279
281
 
280
282
 
281
283
  club = nil
282
- if m.nil?
284
+ if m.empty?
283
285
  ## puts "** !!! WARN !!! no match for club >#{name}<"
284
286
  elsif m.size > 1
285
287
  puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
@@ -0,0 +1,138 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class ClubHistoryIndex
8
+
9
+ def self.build( path )
10
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
11
+
12
+ recs = []
13
+ pack.each_clubs_history do |entry|
14
+ recs += ClubHistoryReader.parse( entry.read )
15
+ end
16
+ recs
17
+
18
+ index = new
19
+ index.add( recs )
20
+ index
21
+ end
22
+
23
+
24
+
25
+ def catalog() Import.catalog; end
26
+
27
+ ## note: keep name history for now separate from
28
+ ## from club struct - why? why not?
29
+ ## later yes, yes, yes, merge name history into club struct!!!!!
30
+ ##
31
+ ## for now the name history is experimental
32
+
33
+
34
+ def initialize
35
+ @clubs = {} ## clubs (indexed) by canonical name
36
+ @errors = []
37
+ end
38
+
39
+ attr_reader :errors
40
+ def errors?() @errors.empty? == false; end
41
+
42
+ def mappings() @clubs; end ## todo/check: rename to records or histories or something - why? why not?
43
+
44
+
45
+ def add_history( club_rec, keyword, season, args )
46
+ ## note use season obj for now (and NOT key) - why? why not?
47
+ rec = @clubs[ club_rec.name ] ||= []
48
+
49
+ rec << [season, [keyword, args]]
50
+
51
+ ## note: always keep records sorted by season_key for now
52
+ ## check if 2010 and 2010/11 is in order using alpha sort?? (see argentina)
53
+ rec.sort! { |l,r| r[0] <=> l[0] }
54
+ end
55
+
56
+
57
+ def add( rec_or_recs ) ## add club record / alt_names
58
+ recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
59
+
60
+ recs.each do |rec|
61
+
62
+ keyword = rec[0]
63
+ season_key = rec[1]
64
+ args = rec[2..-1] ## get rest of args e.g. one, two or more
65
+
66
+ ## note: for now only add (re)name history season records,
67
+ ## that is, skip MERGE and BANKRUPT for now
68
+ ## and incl. only RENAME, REFORM, MOVE for now
69
+ next if ['MERGE', 'BANKRUPT'].include?( keyword )
70
+
71
+
72
+ name_old = strip_geo( args[0][0] ) ## note: strip optional geo part from name
73
+ name_new = strip_geo( args[1][0] )
74
+
75
+ country_old = args[0][1]
76
+ country_new = args[1][1]
77
+
78
+ club_old = catalog.clubs.find_by!( name: name_old, country: country_old )
79
+ club_new = catalog.clubs.find_by!( name: name_new, country: country_new )
80
+
81
+ ## note use season obj for now (and NOT key) - why? why not?
82
+ season = Season.new( season_key )
83
+
84
+ ## todo/check:
85
+ ## check if club_old and club_new reference different club record!!
86
+ ## examples - RB II -> Liefering ?? or
87
+ ## FC Pasching -> OOE Juniors ??
88
+ ## Austria Salzburg -> RB Salburg ??
89
+ ## for now always add name history to both - why? why not?
90
+
91
+ add_history( club_old, keyword, season, args )
92
+ ## note: allow for now different club references
93
+ ## but maybe warn later - why? why not?
94
+ ## add history to both for now
95
+ add_history( club_new, keyword, season, args ) if club_old != club_new
96
+ end # each rec
97
+ end # method add
98
+
99
+
100
+ #### todo/check: move as method to club struct later - to always use club reference
101
+ ## returns (simply) name as string for now or nil - why? why not?
102
+ #
103
+ # history entry example
104
+ # Arsenal FC"=>
105
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
106
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
107
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
108
+ def find_name_by( name:, season: )
109
+ recs = @clubs[ name ]
110
+ if recs
111
+ season = season( season ) ## make sure season is a season obj (and NOT a string)
112
+ ## check season records for name; use linear search (assume only few records)
113
+ recs.each do |rec|
114
+ if season >= rec[0]
115
+ return strip_geo( rec[1][1][1][0] ) # use second arg
116
+ end
117
+ end
118
+ ## if we get here use last name
119
+ strip_geo( recs[-1][1][1][0][0] ) # use first arg
120
+ else
121
+ nil
122
+ end
123
+ end
124
+
125
+ ##################
126
+ ## helpers
127
+ def season( season )
128
+ season.is_a?( Season ) ? season : Season.new( season )
129
+ end
130
+
131
+ def strip_geo( name )
132
+ ## e.g. Arsenal, London => Arsenal
133
+ name.split(',')[0].strip
134
+ end
135
+ end # class ClubHistoryIndex
136
+
137
+ end # module Import
138
+ end # module SportDb