sportdb-formats 1.1.0 → 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -34
- data/Rakefile +3 -6
- data/lib/sportdb/formats.rb +54 -70
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +141 -0
- data/lib/sportdb/formats/event/event_reader.rb +183 -0
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/match_parser.rb +47 -18
- data/lib/sportdb/formats/package.rb +59 -11
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +134 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/helper.rb +50 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_datafile_package.rb +1 -1
- metadata +11 -81
- data/lib/sportdb/formats/config.rb +0 -40
- data/lib/sportdb/formats/match/match_parser_csv.rb +0 -321
- data/lib/sportdb/formats/name_helper.rb +0 -84
- data/lib/sportdb/formats/score/score_formats.rb +0 -220
- data/lib/sportdb/formats/score/score_parser.rb +0 -202
- data/lib/sportdb/formats/season_utils.rb +0 -27
- data/lib/sportdb/formats/structs/country.rb +0 -31
- data/lib/sportdb/formats/structs/group.rb +0 -18
- data/lib/sportdb/formats/structs/league.rb +0 -37
- data/lib/sportdb/formats/structs/match.rb +0 -151
- data/lib/sportdb/formats/structs/matchlist.rb +0 -220
- data/lib/sportdb/formats/structs/round.rb +0 -25
- data/lib/sportdb/formats/structs/season.rb +0 -123
- data/lib/sportdb/formats/structs/standings.rb +0 -268
- data/lib/sportdb/formats/structs/team.rb +0 -150
- data/lib/sportdb/formats/structs/team_usage.rb +0 -88
- data/test/test_clubs.rb +0 -40
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_csv_reader.rb +0 -31
- data/test/test_match.rb +0 -30
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_relegation.rb +0 -41
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_worldcup.rb +0 -27
- data/test/test_name_helper.rb +0 -67
- data/test/test_scores.rb +0 -122
- data/test/test_season.rb +0 -62
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class EventInfo
|
7
|
+
## "high level" info (summary) about event (like a "wikipedia infobox")
|
8
|
+
## use for checking dataset imports; lets you check e.g.
|
9
|
+
## - dates within range
|
10
|
+
## - number of teams e.g. 20
|
11
|
+
## - matches played e.g. 380
|
12
|
+
## - goals scored e.g. 937
|
13
|
+
## etc.
|
14
|
+
|
15
|
+
attr_reader :league,
|
16
|
+
:season,
|
17
|
+
:teams,
|
18
|
+
:matches,
|
19
|
+
:goals,
|
20
|
+
:start_date,
|
21
|
+
:end_date
|
22
|
+
|
23
|
+
def initialize( league:, season:,
|
24
|
+
start_date: nil, end_date: nil,
|
25
|
+
teams: nil,
|
26
|
+
matches: nil,
|
27
|
+
goals: nil )
|
28
|
+
|
29
|
+
@league = league
|
30
|
+
@season = season
|
31
|
+
|
32
|
+
@start_date = start_date
|
33
|
+
@end_date = end_date
|
34
|
+
|
35
|
+
@teams = teams ## todo/check: rename/use teams_count ??
|
36
|
+
@matches = matches ## todo/check: rename/use match_count ??
|
37
|
+
@goals = goals
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?( date )
|
41
|
+
## todo/fix: add options e.g.
|
42
|
+
## - add delta/off_by_one or such?
|
43
|
+
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
44
|
+
|
45
|
+
### note: for now allow off by one error (via timezone/local time errors)
|
46
|
+
## todo/fix: issue warning if off by one!!!!
|
47
|
+
if @start_date && @end_date
|
48
|
+
date >= (@start_date-1) &&
|
49
|
+
date <= (@end_date+1)
|
50
|
+
else
|
51
|
+
if @season.year?
|
52
|
+
# assume generic rule
|
53
|
+
## same year e.g. Jan 1 - Dec 31; always true for now
|
54
|
+
date.year == @season.start_year
|
55
|
+
else
|
56
|
+
# assume generic rule
|
57
|
+
## July 1 - June 30 (Y+1)
|
58
|
+
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
59
|
+
date >= Date.new( @season.start_year, 7, 1 ) &&
|
60
|
+
date <= Date.new( @season.end_year, 6, 30 )
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end # method include?
|
64
|
+
alias_method :between?, :include?
|
65
|
+
end # class EventInfo
|
66
|
+
|
67
|
+
|
68
|
+
class EventInfoReader
|
69
|
+
def catalog() Import.catalog; end
|
70
|
+
|
71
|
+
|
72
|
+
def self.read( path )
|
73
|
+
txt = File.open( path, 'r:utf-8') {|f| f.read }
|
74
|
+
new( txt ).parse
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.parse( txt )
|
78
|
+
new( txt ).parse
|
79
|
+
end
|
80
|
+
|
81
|
+
def initialize( txt )
|
82
|
+
@txt = txt
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
recs = []
|
87
|
+
|
88
|
+
parse_csv( @txt ).each do |row|
|
89
|
+
league_col = row['League']
|
90
|
+
season_col = row['Season'] || row['Year']
|
91
|
+
dates_col = row['Dates']
|
92
|
+
|
93
|
+
season = Import::Season.parse( season_col )
|
94
|
+
league = catalog.leagues.find!( league_col )
|
95
|
+
|
96
|
+
|
97
|
+
dates = []
|
98
|
+
if dates_col.nil? || dates_col.empty?
|
99
|
+
## do nothing; no dates - keep dates array empty
|
100
|
+
else
|
101
|
+
## squish spaces
|
102
|
+
dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
|
103
|
+
|
104
|
+
puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
|
105
|
+
|
106
|
+
### todo/check: check what parts "Aug 15" return ???
|
107
|
+
### short form for "Aug 15 -" - works?
|
108
|
+
|
109
|
+
## todo/fix!!! - check EventInfo.include?
|
110
|
+
## now allow dates with only start_date too!! (WITHOUT end_date)
|
111
|
+
parts = dates_col.split( /[ ]*[–-][ ]*/ )
|
112
|
+
if parts.size == 1
|
113
|
+
pp parts
|
114
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
115
|
+
pp dates
|
116
|
+
elsif parts.size == 2
|
117
|
+
pp parts
|
118
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
119
|
+
dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
|
120
|
+
pp dates
|
121
|
+
|
122
|
+
## assert/check if period is less than 365 days for now
|
123
|
+
diff = dates[1].to_date.jd - dates[0].to_date.jd
|
124
|
+
puts "#{diff}d"
|
125
|
+
if diff > 365
|
126
|
+
puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
else
|
130
|
+
puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
|
131
|
+
pp dates_col
|
132
|
+
pp parts
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
teams_col = row['Clubs'] || row['Teams']
|
139
|
+
goals_col = row['Goals']
|
140
|
+
|
141
|
+
## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
|
142
|
+
teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
|
143
|
+
goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
|
144
|
+
|
145
|
+
teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
|
146
|
+
goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
|
147
|
+
|
148
|
+
matches_col = row['Matches']
|
149
|
+
## note: support additions in matches (played) e.g.
|
150
|
+
# 132 + 63 Play-off-Spiele
|
151
|
+
matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
|
152
|
+
|
153
|
+
matches = if matches_col.nil? || matches_col.empty?
|
154
|
+
nil
|
155
|
+
else
|
156
|
+
if matches_col.index( '+' ) ### check for calculations
|
157
|
+
## note: for now only supports additions
|
158
|
+
matches_col.split( '+' ).reduce( 0 ) do |sum,str|
|
159
|
+
sum + str.to_i
|
160
|
+
end
|
161
|
+
else ## assume single (integer) number
|
162
|
+
matches_col.to_i
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
rec = EventInfo.new( league: league,
|
167
|
+
season: season,
|
168
|
+
start_date: dates[0],
|
169
|
+
end_date: dates[1],
|
170
|
+
teams: teams,
|
171
|
+
matches: matches,
|
172
|
+
goals: goals
|
173
|
+
)
|
174
|
+
recs << rec
|
175
|
+
end # each row
|
176
|
+
recs
|
177
|
+
end # method parse
|
178
|
+
end # class EventInfoReader
|
179
|
+
|
180
|
+
|
181
|
+
end ## module Import
|
182
|
+
end ## module SportDb
|
183
|
+
|
@@ -95,36 +95,40 @@ class LeagueIndex
|
|
95
95
|
end # method add
|
96
96
|
|
97
97
|
|
98
|
+
## helper to always convert (possible) country key to existing country record
|
99
|
+
## todo: make private - why? why not?
|
100
|
+
def country( country )
|
101
|
+
if country.is_a?( String ) || country.is_a?( Symbol )
|
102
|
+
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
103
|
+
rec = catalog.countries.find( country.to_s )
|
104
|
+
if rec.nil?
|
105
|
+
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
106
|
+
exit 1
|
107
|
+
end
|
108
|
+
rec
|
109
|
+
else
|
110
|
+
country ## (re)use country struct - no need to run lookup again
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
|
98
115
|
def match( name )
|
99
|
-
##
|
116
|
+
## note: returns empty array if no match and NOT nil
|
100
117
|
name = normalize( name )
|
101
|
-
@leagues_by_name[ name ]
|
118
|
+
@leagues_by_name[ name ] || []
|
102
119
|
end
|
103
120
|
|
104
|
-
|
105
121
|
def match_by( name:, country: )
|
106
122
|
## note: match must for now always include name
|
107
123
|
m = match( name )
|
108
|
-
if
|
124
|
+
if country ## filter by country
|
109
125
|
## note: country assumes / allows the country key or fifa code for now
|
110
|
-
|
111
126
|
## note: allow passing in of country struct too
|
112
|
-
country_rec =
|
113
|
-
country ## (re)use country struct - no need to run lookup again
|
114
|
-
else
|
115
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
116
|
-
rec = catalog.countries.find( country )
|
117
|
-
if rec.nil?
|
118
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
119
|
-
exit 1
|
120
|
-
end
|
121
|
-
rec
|
122
|
-
end
|
127
|
+
country_rec = country( country )
|
123
128
|
|
124
129
|
## note: also skip international leagues & cups (e.g. champions league etc.) for now - why? why not?
|
125
130
|
m = m.select { |league| league.country &&
|
126
131
|
league.country.key == country_rec.key }
|
127
|
-
m = nil if m.empty? ## note: reset to nil if no more matches
|
128
132
|
end
|
129
133
|
m
|
130
134
|
end
|
@@ -144,7 +148,7 @@ class LeagueIndex
|
|
144
148
|
m = match( name )
|
145
149
|
# pp m
|
146
150
|
|
147
|
-
if m.
|
151
|
+
if m.empty?
|
148
152
|
## fall through/do nothing
|
149
153
|
elsif m.size > 1
|
150
154
|
puts "** !!! ERROR - ambigious league name; too many leagues (#{m.size}) found:"
|
@@ -65,7 +65,7 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
65
65
|
filtered_secs = []
|
66
66
|
filter = norm_seasons( season )
|
67
67
|
secs.each do |sec|
|
68
|
-
if filter.include?(
|
68
|
+
if filter.include?( Season.parse( sec[:season] ).key )
|
69
69
|
filtered_secs << sec
|
70
70
|
else
|
71
71
|
puts " skipping season >#{sec[:season]}< NOT matched by filter"
|
@@ -76,7 +76,7 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
76
76
|
|
77
77
|
## pass 3 - check & map; replace inline (string with data struct record)
|
78
78
|
secs.each do |sec|
|
79
|
-
sec[:season] =
|
79
|
+
sec[:season] = Season.parse( sec[:season ] )
|
80
80
|
sec[:league] = catalog.leagues.find!( sec[:league] )
|
81
81
|
|
82
82
|
check_stage( sec[:stage] ) if sec[:stage] ## note: only check for now (no remapping etc.)
|
@@ -100,14 +100,18 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
100
100
|
)
|
101
101
|
$}x
|
102
102
|
|
103
|
+
|
103
104
|
def norm_seasons( season_or_seasons ) ## todo/check: add alias norm_seasons - why? why not?
|
104
|
-
|
105
|
+
|
106
|
+
seasons = if season_or_seasons.is_a?( Array ) # is it an array already
|
107
|
+
season_or_seasons
|
108
|
+
elsif season_or_seasons.is_a?( Range ) # e.g. Season(1999)..Season(2001) or such
|
109
|
+
season_or_seasons.to_a
|
110
|
+
else ## assume - single entry - wrap in array
|
105
111
|
[season_or_seasons]
|
106
|
-
else ## assume it's an array already
|
107
|
-
season_or_seasons
|
108
112
|
end
|
109
113
|
|
110
|
-
seasons.map { |season|
|
114
|
+
seasons.map { |season| Season( season ).key }
|
111
115
|
end
|
112
116
|
|
113
117
|
|
@@ -127,7 +131,7 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
127
131
|
'Regular Season',
|
128
132
|
'Regular Stage',
|
129
133
|
'Championship Round',
|
130
|
-
'Championship Playoff',
|
134
|
+
'Championship Playoff', # or Championship play-off
|
131
135
|
'Relegation Round',
|
132
136
|
'Relegation Playoff',
|
133
137
|
'Play-offs',
|
@@ -140,6 +144,19 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
140
144
|
'EL Play-off',
|
141
145
|
'Europa League Play-off',
|
142
146
|
'Europa-League-Play-offs',
|
147
|
+
'Europa League Finals',
|
148
|
+
'Playoffs - Championship',
|
149
|
+
'Playoffs - Europa League',
|
150
|
+
'Playoffs - Europa League - Finals',
|
151
|
+
'Playoffs - Relegation',
|
152
|
+
'Playoffs - Challenger',
|
153
|
+
'Finals',
|
154
|
+
|
155
|
+
'Apertura',
|
156
|
+
'Apertura - Liguilla',
|
157
|
+
'Clausura',
|
158
|
+
'Clausura - Liguilla',
|
159
|
+
|
143
160
|
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
161
|
|
145
162
|
|
@@ -118,12 +118,18 @@ def parse
|
|
118
118
|
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
|
+
|
122
|
+
## auto-add with country prepended
|
123
|
+
## e.g. England Premier League, Austria Bundesliga etc.
|
124
|
+
## todo/check: also add variants with country alt name if present!!!
|
125
|
+
## todo/check: exclude cups or such from country + league name auto-add - why? why not?
|
126
|
+
alt_names_auto << "#{country.name} #{league_name}"
|
121
127
|
else ## assume int'l (no country) e.g. champions league, etc.
|
122
128
|
## only auto-add key (e.g. CL, EL, etc.)
|
123
129
|
alt_names_auto << league_key.upcase.gsub('.', ' ') ## note: no country code (prefix/leading) used
|
124
130
|
end
|
125
131
|
|
126
|
-
pp alt_names_auto
|
132
|
+
## pp alt_names_auto
|
127
133
|
|
128
134
|
## prepend country key/code if country present
|
129
135
|
## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
|
@@ -302,6 +302,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
302
302
|
ScoreFormats.find!( line )
|
303
303
|
end
|
304
304
|
|
305
|
+
def find_status!( line )
|
306
|
+
StatusParser.find!( line )
|
307
|
+
end
|
308
|
+
|
309
|
+
|
305
310
|
def try_parse_game( line )
|
306
311
|
# note: clone line; for possible test do NOT modify in place for now
|
307
312
|
# note: returns true if parsed, false if no match
|
@@ -329,6 +334,10 @@ class MatchParser ## simple match parser for team match schedules
|
|
329
334
|
return false
|
330
335
|
end
|
331
336
|
|
337
|
+
## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
|
338
|
+
## or [cancelled] or [postponed] etc.
|
339
|
+
status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
|
340
|
+
|
332
341
|
## pos = find_game_pos!( line )
|
333
342
|
|
334
343
|
date = find_date!( line, start: @start )
|
@@ -353,20 +362,23 @@ class MatchParser ## simple match parser for team match schedules
|
|
353
362
|
if @last_round
|
354
363
|
round = @last_round
|
355
364
|
else
|
356
|
-
## find (first) matching round by date
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
365
|
+
## find (first) matching round by date if rounds / matchdays defined
|
366
|
+
## if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
|
367
|
+
if @rounds.size > 0
|
368
|
+
@rounds.values.each do |round_rec|
|
369
|
+
## note: convert date to date only (no time) with to_date!!!
|
370
|
+
if (round_rec.start_date && round_rec.end_date) &&
|
371
|
+
(date.to_date >= round_rec.start_date &&
|
372
|
+
date.to_date <= round_rec.end_date)
|
373
|
+
round = round_rec
|
374
|
+
break
|
375
|
+
end
|
376
|
+
end
|
377
|
+
if round.nil?
|
378
|
+
puts "!! ERROR - no matching round found for match date:"
|
379
|
+
pp date
|
380
|
+
exit 1
|
364
381
|
end
|
365
|
-
end
|
366
|
-
if round.nil?
|
367
|
-
puts "!! ERROR - no matching round found for match date:"
|
368
|
-
pp date
|
369
|
-
exit 1
|
370
382
|
end
|
371
383
|
end
|
372
384
|
|
@@ -380,8 +392,8 @@ class MatchParser ## simple match parser for team match schedules
|
|
380
392
|
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
381
393
|
score: score,
|
382
394
|
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
383
|
-
group: @last_group ? @last_group.name : nil
|
384
|
-
|
395
|
+
group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
396
|
+
status: status )
|
385
397
|
### todo: cache team lookups in hash?
|
386
398
|
|
387
399
|
=begin
|
@@ -563,12 +575,29 @@ class MatchParser ## simple match parser for team match schedules
|
|
563
575
|
|
564
576
|
if date && team1.nil? && team2.nil?
|
565
577
|
logger.debug( "date header line found: >#{line}<")
|
566
|
-
logger.debug( " date: #{date}")
|
578
|
+
logger.debug( " date: #{date} with start: #{@start}")
|
567
579
|
|
568
580
|
@last_date = date # keep a reference for later use
|
569
|
-
|
581
|
+
|
582
|
+
### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
|
583
|
+
## find a better way??
|
584
|
+
## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
|
585
|
+
## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
|
586
|
+
### todo/check: just turn on for 2019/20 season or always? why? why not?
|
587
|
+
|
588
|
+
## todo/fix: add switch back to old @start_org
|
589
|
+
## if year is date.year == @start.year-1 -- possible when full date with year set!!!
|
590
|
+
if @start.month != 1
|
591
|
+
if date.year == @start.year+1
|
592
|
+
logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
|
593
|
+
@start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
|
594
|
+
@start = Date.new( @start.year+1, 1, 1 )
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
true
|
570
599
|
else
|
571
|
-
|
600
|
+
false
|
572
601
|
end
|
573
602
|
end
|
574
603
|
|
@@ -13,12 +13,22 @@ module SportDb
|
|
13
13
|
## leagues.txt or leagues_en.txt
|
14
14
|
## remove support for en.leagues.txt - why? why not?
|
15
15
|
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
16
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.
|
16
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
|
17
17
|
leagues
|
18
18
|
(?:_[a-z0-9_-]+)?
|
19
19
|
\.txt$
|
20
20
|
}x
|
21
21
|
|
22
|
+
## seasons.txt or seasons_en.txt
|
23
|
+
## remove support for br.seasons.txt - why? why not?
|
24
|
+
SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
|
26
|
+
seasons
|
27
|
+
(?:_[a-z0-9_-]+)?
|
28
|
+
\.txt$
|
29
|
+
}x
|
30
|
+
|
31
|
+
|
22
32
|
## clubs.txt or clubs_en.txt
|
23
33
|
## remove support for en.clubs.txt - why? why not?
|
24
34
|
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
@@ -35,12 +45,22 @@ module SportDb
|
|
35
45
|
\.wiki\.txt$
|
36
46
|
}x
|
37
47
|
|
38
|
-
|
48
|
+
## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
|
49
|
+
CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
39
50
|
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
40
51
|
clubs
|
41
52
|
(?:_[a-z0-9_-]+)?
|
42
53
|
\.props\.txt$
|
43
54
|
}x
|
55
|
+
CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
|
56
|
+
|
57
|
+
|
58
|
+
CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
59
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
|
60
|
+
clubs
|
61
|
+
(?:_[a-z0-9_-]+)?
|
62
|
+
\.history\.txt$
|
63
|
+
}x
|
44
64
|
|
45
65
|
## teams.txt or teams_history.txt
|
46
66
|
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
@@ -49,6 +69,8 @@ module SportDb
|
|
49
69
|
\.txt$
|
50
70
|
}x
|
51
71
|
|
72
|
+
|
73
|
+
### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
|
52
74
|
### season folder:
|
53
75
|
## e.g. /2019-20 or
|
54
76
|
## year-only e.g. /2019 or
|
@@ -73,6 +95,10 @@ module SportDb
|
|
73
95
|
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
74
96
|
}x
|
75
97
|
|
98
|
+
### add "generic" pattern to find all csv datafiles
|
99
|
+
CSV_RE = %r{ (?: ^|/ )
|
100
|
+
[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
101
|
+
}x
|
76
102
|
|
77
103
|
|
78
104
|
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
@@ -96,16 +122,22 @@ module SportDb
|
|
96
122
|
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
97
123
|
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
98
124
|
|
99
|
-
def self.find_clubs( path, pattern: CLUBS_RE )
|
100
|
-
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE )
|
125
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
126
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
127
|
+
def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
|
101
128
|
|
102
|
-
def self.match_clubs( path )
|
103
|
-
def self.match_clubs_wiki( path )
|
104
|
-
def self.
|
129
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
130
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
131
|
+
def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
|
132
|
+
def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
|
105
133
|
|
106
134
|
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
107
135
|
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
108
136
|
|
137
|
+
def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
|
138
|
+
def self.match_seasons( path ) SEASONS_RE.match( path ); end
|
139
|
+
|
140
|
+
|
109
141
|
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
110
142
|
def self.match_conf( path ) CONF_RE.match( path ); end
|
111
143
|
|
@@ -118,6 +150,7 @@ module SportDb
|
|
118
150
|
end
|
119
151
|
## add match_match and match_match_csv - why? why not?
|
120
152
|
|
153
|
+
|
121
154
|
class << self
|
122
155
|
alias_method :match_teams?, :match_teams
|
123
156
|
alias_method :teams?, :match_teams
|
@@ -128,12 +161,21 @@ module SportDb
|
|
128
161
|
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
129
162
|
alias_method :clubs_wiki?, :match_clubs_wiki
|
130
163
|
|
131
|
-
alias_method :
|
132
|
-
alias_method :
|
164
|
+
alias_method :match_clubs_history?, :match_clubs_history
|
165
|
+
alias_method :clubs_history?, :match_clubs_history
|
166
|
+
|
167
|
+
alias_method :match_club_props, :match_clubs_props
|
168
|
+
alias_method :match_club_props?, :match_clubs_props
|
169
|
+
alias_method :club_props?, :match_clubs_props
|
170
|
+
alias_method :match_clubs_props?, :match_clubs_props
|
171
|
+
alias_method :clubs_props?, :match_clubs_props
|
133
172
|
|
134
173
|
alias_method :match_leagues?, :match_leagues
|
135
174
|
alias_method :leagues?, :match_leagues
|
136
175
|
|
176
|
+
alias_method :match_seasons?, :match_seasons
|
177
|
+
alias_method :seasons?, :match_seasons
|
178
|
+
|
137
179
|
alias_method :match_conf?, :match_conf
|
138
180
|
alias_method :conf?, :match_conf
|
139
181
|
end
|
@@ -212,11 +254,17 @@ module SportDb
|
|
212
254
|
end
|
213
255
|
end
|
214
256
|
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
257
|
+
def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
|
258
|
+
|
215
259
|
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
216
260
|
|
217
261
|
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
218
262
|
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
219
263
|
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
264
|
+
def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
|
265
|
+
|
266
|
+
def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
|
267
|
+
|
220
268
|
|
221
269
|
## return all match datafile entries
|
222
270
|
def match( format: 'txt' )
|
@@ -287,13 +335,13 @@ module SportDb
|
|
287
335
|
## filter.skip? filter.include? ( season_sason_key )?
|
288
336
|
## fiteer.before?( season_key ) etc.
|
289
337
|
## find some good method names!!!!
|
290
|
-
season_start = start ?
|
338
|
+
season_start = start ? Season( start ) : nil
|
291
339
|
|
292
340
|
h = {}
|
293
341
|
match( format: format ).each do |entry|
|
294
342
|
## note: assume last directory in datafile path is the season part/key
|
295
343
|
season_q = File.basename( File.dirname( entry.name ))
|
296
|
-
season =
|
344
|
+
season = Season.parse( season_q ) ## normalize season
|
297
345
|
|
298
346
|
## skip if start season before this season
|
299
347
|
next if season_start && season_start.start_year > season.start_year
|