sportdb-formats 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -13
  3. data/Rakefile +1 -1
  4. data/lib/sportdb/formats.rb +5 -0
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +9 -11
  7. data/lib/sportdb/formats/league/league_index.rb +22 -18
  8. data/lib/sportdb/formats/league/league_outline_reader.rb +4 -1
  9. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  10. data/lib/sportdb/formats/match/match_parser.rb +27 -15
  11. data/lib/sportdb/formats/match/match_parser_csv.rb +148 -21
  12. data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
  13. data/lib/sportdb/formats/name_helper.rb +4 -1
  14. data/lib/sportdb/formats/package.rb +30 -8
  15. data/lib/sportdb/formats/score/score_formats.rb +19 -0
  16. data/lib/sportdb/formats/score/score_parser.rb +4 -2
  17. data/lib/sportdb/formats/structs/match.rb +2 -0
  18. data/lib/sportdb/formats/structs/team.rb +7 -0
  19. data/lib/sportdb/formats/team/club_index.rb +13 -11
  20. data/lib/sportdb/formats/team/club_index_history.rb +138 -0
  21. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  22. data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
  23. data/lib/sportdb/formats/version.rb +1 -1
  24. data/test/helper.rb +47 -81
  25. data/test/test_club_index_history.rb +107 -0
  26. data/test/test_club_reader_history.rb +212 -0
  27. data/test/test_datafile_package.rb +1 -1
  28. data/test/test_match_status_parser.rb +49 -0
  29. data/test/test_scores.rb +2 -0
  30. metadata +10 -17
  31. data/test/test_conf.rb +0 -65
  32. data/test/test_csv_match_parser.rb +0 -114
  33. data/test/test_csv_match_parser_utils.rb +0 -20
  34. data/test/test_match_auto.rb +0 -72
  35. data/test/test_match_auto_champs.rb +0 -45
  36. data/test/test_match_auto_euro.rb +0 -37
  37. data/test/test_match_auto_relegation.rb +0 -41
  38. data/test/test_match_auto_worldcup.rb +0 -61
  39. data/test/test_match_champs.rb +0 -27
  40. data/test/test_match_eng.rb +0 -26
  41. data/test/test_match_euro.rb +0 -27
  42. data/test/test_match_start_date.rb +0 -44
  43. data/test/test_match_worldcup.rb +0 -27
@@ -0,0 +1,86 @@
1
+ #####################
2
+ # helpers for parsing & finding match status e.g.
3
+ # - cancelled / canceled
4
+ # - awarded
5
+ # - abandoned
6
+ # - replay
7
+ # etc.
8
+
9
+
10
+ module SportDb
11
+
12
+ class Status
13
+ # note: use a class as an "enum"-like namespace for now - why? why not?
14
+ # move class into Match e.g. Match::Status - why? why not?
15
+ CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
16
+ AWARDED = 'AWARDED'
17
+ POSTPONED = 'POSTPONED'
18
+ ABANDONED = 'ABANDONED'
19
+ REPLAY = 'REPLAY'
20
+ end # class Status
21
+
22
+
23
+
24
+ class StatusParser
25
+
26
+ def self.parse( str )
27
+ ## note: returns nil if no match found
28
+ ## note: english usage - cancelled (in UK), canceled (in US)
29
+ if str =~ /^(cancelled|
30
+ canceled|
31
+ can\.
32
+ )/xi
33
+ Status::CANCELLED
34
+ elsif str =~ /^(awarded|
35
+ awd\.
36
+ )/xi
37
+ Status::AWARDED
38
+ elsif str =~ /^(postponed
39
+ )/xi
40
+ Status::POSTPONED
41
+ elsif str =~ /^(abandoned|
42
+ abd\.
43
+ )/xi
44
+ Status::ABANDONED
45
+ elsif str =~ /^(replay
46
+ )/xi
47
+ Status::REPLAY
48
+ else
49
+ # no match
50
+ nil
51
+ end
52
+ end
53
+
54
+
55
+ RUN_RE = /\[
56
+ (?<text>[^\]]+)
57
+ \]
58
+ /x
59
+ def self.find!( line )
60
+ ## for now check all "protected" text run blocks e.g. []
61
+ ## puts "line: >#{line}<"
62
+
63
+ status = nil
64
+
65
+ str = line
66
+ while m = str.match( RUN_RE )
67
+ str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
68
+
69
+ ## check for status match
70
+ match_str = m[0] ## keep a copy of the match string (for later sub)
71
+ text = m[:text].strip
72
+ ## puts " text: >#{text}<"
73
+
74
+ status = parse( text )
75
+
76
+ if status
77
+ line.sub!( match_str, "[STATUS.#{status}]" )
78
+ break
79
+ end
80
+ end # while match
81
+
82
+ status
83
+ end # method find!
84
+ end # class StatusParser
85
+
86
+ end # module SportDb
@@ -46,9 +46,12 @@ module SportDb
46
46
  ## Estudiantes (LP) => Estudiantes LP
47
47
  ## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
48
48
  ## Myllykosken Pallo −47 => Myllykosken Pallo 47
49
+ ##
50
+ ## add & too!!
51
+ ## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
49
52
 
50
53
  NORM_RE = %r{
51
- [.'’º/()_−-]
54
+ [.'’º/()&_−-]
52
55
  }x # note: in [] dash (-) if last doesn't need to get escaped
53
56
  ## note: remove all dots (.), dash (-), ', º, /, etc.
54
57
  # . U+002E (46) - FULL STOP
@@ -45,12 +45,22 @@ module SportDb
45
45
  \.wiki\.txt$
46
46
  }x
47
47
 
48
- CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
48
+ ## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
49
+ CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
49
50
  (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
50
51
  clubs
51
52
  (?:_[a-z0-9_-]+)?
52
53
  \.props\.txt$
53
54
  }x
55
+ CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
56
+
57
+
58
+ CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
59
+ (?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
60
+ clubs
61
+ (?:_[a-z0-9_-]+)?
62
+ \.history\.txt$
63
+ }x
54
64
 
55
65
  ## teams.txt or teams_history.txt
56
66
  TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
@@ -112,12 +122,14 @@ module SportDb
112
122
  def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
113
123
  def self.match_teams( path ) TEAMS_RE.match( path ); end
114
124
 
115
- def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
116
- def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
125
+ def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
126
+ def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
127
+ def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
117
128
 
118
- def self.match_clubs( path ) CLUBS_RE.match( path ); end
119
- def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
120
- def self.match_club_props( path, pattern: CLUB_PROPS_RE ) pattern.match( path ); end
129
+ def self.match_clubs( path ) CLUBS_RE.match( path ); end
130
+ def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
131
+ def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
132
+ def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
121
133
 
122
134
  def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
123
135
  def self.match_leagues( path ) LEAGUES_RE.match( path ); end
@@ -149,8 +161,14 @@ module SportDb
149
161
  alias_method :match_clubs_wiki?, :match_clubs_wiki
150
162
  alias_method :clubs_wiki?, :match_clubs_wiki
151
163
 
152
- alias_method :match_club_props?, :match_club_props
153
- alias_method :club_props?, :match_club_props
164
+ alias_method :match_clubs_history?, :match_clubs_history
165
+ alias_method :clubs_history?, :match_clubs_history
166
+
167
+ alias_method :match_club_props, :match_clubs_props
168
+ alias_method :match_club_props?, :match_clubs_props
169
+ alias_method :club_props?, :match_clubs_props
170
+ alias_method :match_clubs_props?, :match_clubs_props
171
+ alias_method :clubs_props?, :match_clubs_props
154
172
 
155
173
  alias_method :match_leagues?, :match_leagues
156
174
  alias_method :leagues?, :match_leagues
@@ -243,6 +261,10 @@ module SportDb
243
261
  def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
244
262
  def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
245
263
  def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
264
+ def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
265
+
266
+ def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
267
+
246
268
 
247
269
  ## return all match datafile entries
248
270
  def match( format: 'txt' )
@@ -9,6 +9,24 @@ module ScoreFormats
9
9
  ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
10
10
 
11
11
 
12
+ ## note: allow SPECIAL cases WITHOUT full time scores (just a.e.t or pen. + a.e.t.)
13
+ ## 3-4 pen. 2-2 a.e.t.
14
+ ## 2-2 a.e.t.
15
+ EN__P_ET__RE = /\b
16
+ (?:
17
+ (?<score1p>\d{1,2})
18
+ [ ]* - [ ]* # note: sep in optional block; CANNOT use a reference
19
+ (?<score2p>\d{1,2})
20
+ [ ]* #{P_EN} [ ]*
21
+ )? # note: make penalty (P) score optional for now
22
+ (?<score1et>\d{1,2})
23
+ [ ]* - [ ]*
24
+ (?<score2et>\d{1,2})
25
+ [ ]* #{ET_EN}
26
+ (?=[ \]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
27
+ ## note: \b works only after non-alphanum e.g. )
28
+
29
+
12
30
  ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
13
31
  ## 3-4 pen. 2-2 a.e.t. (1-1, ) or
14
32
  ## 3-4 pen. 2-2 a.e.t. (1-1) or
@@ -203,6 +221,7 @@ module ScoreFormats
203
221
  FORMATS_EN = [
204
222
  [ EN__P_ET_FT_HT__RE, '[SCORE.EN__P?_ET_(FT_HT?)]' ], # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
205
223
  [ EN__P_FT_HT__RE, '[SCORE.EN__P_(FT_HT?)]' ], # e.g. 5-1 pen. (1-1)
224
+ [ EN__P_ET__RE, '[SCORE.EN__P?_ET]' ], # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
206
225
  [ EN__FT_HT__RE, '[SCORE.EN__FT_(HT)?]' ], # e.g. 1-1 (1-0)
207
226
  ]
208
227
 
@@ -175,8 +175,10 @@ private
175
175
  score2i = h[:score2i].to_i
176
176
  end
177
177
 
178
- score1 = h[:score1].to_i
179
- score2 = h[:score2].to_i
178
+ if h[:score1] && h[:score2] ## note: full time (FT) score can be optional too!!!
179
+ score1 = h[:score1].to_i
180
+ score2 = h[:score2].to_i
181
+ end
180
182
 
181
183
  if h[:score1et] && h[:score2et]
182
184
  score1et = h[:score1et].to_i
@@ -18,6 +18,7 @@ class Match
18
18
  :leg, ## e.g. '1','2','3','replay', etc. - use leg for marking **replay** too - keep/make leg numeric?! - why? why not?
19
19
  :stage,
20
20
  :group,
21
+ :status, ## e.g. replay, cancelled, awarded, abadoned, postponed, etc.
21
22
  :conf1, :conf2, ## special case for mls e.g. conference1, conference2 (e.g. west, east, central)
22
23
  :country1, :country2, ## special case for champions league etc. - uses FIFA country code
23
24
  :comments,
@@ -46,6 +47,7 @@ class Match
46
47
  @stage = kwargs[:stage] if kwargs.has_key? :stage
47
48
  @leg = kwargs[:leg] if kwargs.has_key? :leg
48
49
  @group = kwargs[:group] if kwargs.has_key? :group
50
+ @status = kwargs[:status] if kwargs.has_key? :status
49
51
  @comments = kwargs[:comments] if kwargs.has_key? :comments
50
52
 
51
53
  @league = kwargs[:league] if kwargs.has_key? :league
@@ -65,6 +65,13 @@ class Team
65
65
  end
66
66
 
67
67
 
68
+ ## add convenience lookup helper / method for name by season for now
69
+ ## use clubs history - for now kept separate from struct - why? why not?
70
+ def name_by_season( season )
71
+ ## note: returns / fallback to "regular" name if no records found in history
72
+ SportDb::Import.catalog.clubs_history.find_name_by( name: name, season: season ) || name
73
+ end
74
+
68
75
  ## helper methods for import only
69
76
  ## check for duplicates
70
77
  include NameHelper
@@ -54,6 +54,7 @@ class ClubIndex
54
54
  ## normalize( name )
55
55
 
56
56
  def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
57
+ ## change/rename to strip_wiki_qualifier or such - why? why not?
57
58
  ## note: strip disambiguationn qualifier from wikipedia page name if present
58
59
  ## note: only remove year and foot... for now
59
60
  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
@@ -178,22 +179,24 @@ class ClubIndex
178
179
  ## todo/fix/check: use rename to find_canon or find_canonical() or something??
179
180
  ## remove (getting used?) - why? why not?
180
181
  def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
182
+ puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
181
183
  @clubs[ name ]
182
184
  end
183
185
 
184
186
 
185
- ## todo/fix/check: return empty array if no match!!!
186
- ## and NOT nil (add || []) - why? why not?
187
187
  def match( name )
188
+ # note: returns empty array (e.g. []) if no match and NOT nil
188
189
  name = normalize( name )
189
- m = @clubs_by_name[ name ]
190
+ m = @clubs_by_name[ name ] || []
190
191
 
191
192
  ## no match - retry with unaccented variant if different
192
193
  ## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
193
194
  ## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
194
- if m.nil?
195
+ if m.empty?
195
196
  name2 = unaccent( name )
196
- m = @clubs_by_name[ name2 ] if name2 != name
197
+ if name2 != name
198
+ m = @clubs_by_name[ name2 ] || []
199
+ end
197
200
  end
198
201
  m
199
202
  end
@@ -227,10 +230,8 @@ class ClubIndex
227
230
  country = country( country )
228
231
 
229
232
  ## note: match must for now always include name
230
- if m ## filter by country
231
- m = m.select { |club| club.country.key == country.key }
232
- m = nil if m.empty? ## note: reset to nil if no more matches
233
- end
233
+ ## filter by country
234
+ m = m.select { |club| club.country.key == country.key }
234
235
  end
235
236
  m
236
237
  end
@@ -263,7 +264,7 @@ class ClubIndex
263
264
 
264
265
  m = match_by( name: name, country: country )
265
266
 
266
- if m.nil?
267
+ if m.empty?
267
268
  ## (re)try with second country - quick hacks for known leagues
268
269
  ## todo/fix: add league flag to activate!!! - why? why not
269
270
  m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
@@ -272,6 +273,7 @@ class ClubIndex
272
273
  m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
273
274
  m = match_by( name: name, country: 'li' ) if country.key == 'ch'
274
275
  m = match_by( name: name, country: 'ca' ) if country.key == 'us'
276
+ m = match_by( name: name, country: 'nz' ) if country.key == 'au'
275
277
  end
276
278
  else ## try "global" search - no country passed in
277
279
  m = match( name )
@@ -279,7 +281,7 @@ class ClubIndex
279
281
 
280
282
 
281
283
  club = nil
282
- if m.nil?
284
+ if m.empty?
283
285
  ## puts "** !!! WARN !!! no match for club >#{name}<"
284
286
  elsif m.size > 1
285
287
  puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
@@ -0,0 +1,138 @@
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class ClubHistoryIndex
8
+
9
+ def self.build( path )
10
+ pack = Package.new( path ) ## lets us use direcotry or zip archive
11
+
12
+ recs = []
13
+ pack.each_clubs_history do |entry|
14
+ recs += ClubHistoryReader.parse( entry.read )
15
+ end
16
+ recs
17
+
18
+ index = new
19
+ index.add( recs )
20
+ index
21
+ end
22
+
23
+
24
+
25
+ def catalog() Import.catalog; end
26
+
27
+ ## note: keep name history for now separate from
28
+ ## from club struct - why? why not?
29
+ ## later yes, yes, yes, merge name history into club struct!!!!!
30
+ ##
31
+ ## for now the name history is experimental
32
+
33
+
34
+ def initialize
35
+ @clubs = {} ## clubs (indexed) by canonical name
36
+ @errors = []
37
+ end
38
+
39
+ attr_reader :errors
40
+ def errors?() @errors.empty? == false; end
41
+
42
+ def mappings() @clubs; end ## todo/check: rename to records or histories or something - why? why not?
43
+
44
+
45
+ def add_history( club_rec, keyword, season, args )
46
+ ## note use season obj for now (and NOT key) - why? why not?
47
+ rec = @clubs[ club_rec.name ] ||= []
48
+
49
+ rec << [season, [keyword, args]]
50
+
51
+ ## note: always keep records sorted by season_key for now
52
+ ## check if 2010 and 2010/11 is in order using alpha sort?? (see argentina)
53
+ rec.sort! { |l,r| r[0] <=> l[0] }
54
+ end
55
+
56
+
57
+ def add( rec_or_recs ) ## add club record / alt_names
58
+ recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
59
+
60
+ recs.each do |rec|
61
+
62
+ keyword = rec[0]
63
+ season_key = rec[1]
64
+ args = rec[2..-1] ## get rest of args e.g. one, two or more
65
+
66
+ ## note: for now only add (re)name history season records,
67
+ ## that is, skip MERGE and BANKRUPT for now
68
+ ## and incl. only RENAME, REFORM, MOVE for now
69
+ next if ['MERGE', 'BANKRUPT'].include?( keyword )
70
+
71
+
72
+ name_old = strip_geo( args[0][0] ) ## note: strip optional geo part from name
73
+ name_new = strip_geo( args[1][0] )
74
+
75
+ country_old = args[0][1]
76
+ country_new = args[1][1]
77
+
78
+ club_old = catalog.clubs.find_by!( name: name_old, country: country_old )
79
+ club_new = catalog.clubs.find_by!( name: name_new, country: country_new )
80
+
81
+ ## note use season obj for now (and NOT key) - why? why not?
82
+ season = Season.new( season_key )
83
+
84
+ ## todo/check:
85
+ ## check if club_old and club_new reference different club record!!
86
+ ## examples - RB II -> Liefering ?? or
87
+ ## FC Pasching -> OOE Juniors ??
88
+ ## Austria Salzburg -> RB Salburg ??
89
+ ## for now always add name history to both - why? why not?
90
+
91
+ add_history( club_old, keyword, season, args )
92
+ ## note: allow for now different club references
93
+ ## but maybe warn later - why? why not?
94
+ ## add history to both for now
95
+ add_history( club_new, keyword, season, args ) if club_old != club_new
96
+ end # each rec
97
+ end # method add
98
+
99
+
100
+ #### todo/check: move as method to club struct later - to always use club reference
101
+ ## returns (simply) name as string for now or nil - why? why not?
102
+ #
103
+ # history entry example
104
+ # Arsenal FC"=>
105
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
106
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
107
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
108
+ def find_name_by( name:, season: )
109
+ recs = @clubs[ name ]
110
+ if recs
111
+ season = season( season ) ## make sure season is a season obj (and NOT a string)
112
+ ## check season records for name; use linear search (assume only few records)
113
+ recs.each do |rec|
114
+ if season >= rec[0]
115
+ return strip_geo( rec[1][1][1][0] ) # use second arg
116
+ end
117
+ end
118
+ ## if we get here use last name
119
+ strip_geo( recs[-1][1][1][0][0] ) # use first arg
120
+ else
121
+ nil
122
+ end
123
+ end
124
+
125
+ ##################
126
+ ## helpers
127
+ def season( season )
128
+ season.is_a?( Season ) ? season : Season.new( season )
129
+ end
130
+
131
+ def strip_geo( name )
132
+ ## e.g. Arsenal, London => Arsenal
133
+ name.split(',')[0].strip
134
+ end
135
+ end # class ClubHistoryIndex
136
+
137
+ end # module Import
138
+ end # module SportDb