sportdb-formats 1.1.0 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -34
- data/Rakefile +3 -6
- data/lib/sportdb/formats.rb +54 -70
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +141 -0
- data/lib/sportdb/formats/event/event_reader.rb +183 -0
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +24 -7
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/match_parser.rb +47 -18
- data/lib/sportdb/formats/package.rb +59 -11
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +134 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +20 -5
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/helper.rb +50 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_datafile_package.rb +1 -1
- metadata +11 -81
- data/lib/sportdb/formats/config.rb +0 -40
- data/lib/sportdb/formats/match/match_parser_csv.rb +0 -321
- data/lib/sportdb/formats/name_helper.rb +0 -84
- data/lib/sportdb/formats/score/score_formats.rb +0 -220
- data/lib/sportdb/formats/score/score_parser.rb +0 -202
- data/lib/sportdb/formats/season_utils.rb +0 -27
- data/lib/sportdb/formats/structs/country.rb +0 -31
- data/lib/sportdb/formats/structs/group.rb +0 -18
- data/lib/sportdb/formats/structs/league.rb +0 -37
- data/lib/sportdb/formats/structs/match.rb +0 -151
- data/lib/sportdb/formats/structs/matchlist.rb +0 -220
- data/lib/sportdb/formats/structs/round.rb +0 -25
- data/lib/sportdb/formats/structs/season.rb +0 -123
- data/lib/sportdb/formats/structs/standings.rb +0 -268
- data/lib/sportdb/formats/structs/team.rb +0 -150
- data/lib/sportdb/formats/structs/team_usage.rb +0 -88
- data/test/test_clubs.rb +0 -40
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_csv_reader.rb +0 -31
- data/test/test_match.rb +0 -30
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_relegation.rb +0 -41
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_worldcup.rb +0 -27
- data/test/test_name_helper.rb +0 -67
- data/test/test_scores.rb +0 -122
- data/test/test_season.rb +0 -62
@@ -0,0 +1,183 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class EventInfo
|
7
|
+
## "high level" info (summary) about event (like a "wikipedia infobox")
|
8
|
+
## use for checking dataset imports; lets you check e.g.
|
9
|
+
## - dates within range
|
10
|
+
## - number of teams e.g. 20
|
11
|
+
## - matches played e.g. 380
|
12
|
+
## - goals scored e.g. 937
|
13
|
+
## etc.
|
14
|
+
|
15
|
+
attr_reader :league,
|
16
|
+
:season,
|
17
|
+
:teams,
|
18
|
+
:matches,
|
19
|
+
:goals,
|
20
|
+
:start_date,
|
21
|
+
:end_date
|
22
|
+
|
23
|
+
def initialize( league:, season:,
|
24
|
+
start_date: nil, end_date: nil,
|
25
|
+
teams: nil,
|
26
|
+
matches: nil,
|
27
|
+
goals: nil )
|
28
|
+
|
29
|
+
@league = league
|
30
|
+
@season = season
|
31
|
+
|
32
|
+
@start_date = start_date
|
33
|
+
@end_date = end_date
|
34
|
+
|
35
|
+
@teams = teams ## todo/check: rename/use teams_count ??
|
36
|
+
@matches = matches ## todo/check: rename/use match_count ??
|
37
|
+
@goals = goals
|
38
|
+
end
|
39
|
+
|
40
|
+
def include?( date )
|
41
|
+
## todo/fix: add options e.g.
|
42
|
+
## - add delta/off_by_one or such?
|
43
|
+
## - add strict (for) only return true if date range (really) defined (no generic auto-rules)
|
44
|
+
|
45
|
+
### note: for now allow off by one error (via timezone/local time errors)
|
46
|
+
## todo/fix: issue warning if off by one!!!!
|
47
|
+
if @start_date && @end_date
|
48
|
+
date >= (@start_date-1) &&
|
49
|
+
date <= (@end_date+1)
|
50
|
+
else
|
51
|
+
if @season.year?
|
52
|
+
# assume generic rule
|
53
|
+
## same year e.g. Jan 1 - Dec 31; always true for now
|
54
|
+
date.year == @season.start_year
|
55
|
+
else
|
56
|
+
# assume generic rule
|
57
|
+
## July 1 - June 30 (Y+1)
|
58
|
+
## - todo/check -start for some countries/leagues in June 1 or August 1 ????
|
59
|
+
date >= Date.new( @season.start_year, 7, 1 ) &&
|
60
|
+
date <= Date.new( @season.end_year, 6, 30 )
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end # method include?
|
64
|
+
alias_method :between?, :include?
|
65
|
+
end # class EventInfo
|
66
|
+
|
67
|
+
|
68
|
+
class EventInfoReader
|
69
|
+
def catalog() Import.catalog; end
|
70
|
+
|
71
|
+
|
72
|
+
def self.read( path )
|
73
|
+
txt = File.open( path, 'r:utf-8') {|f| f.read }
|
74
|
+
new( txt ).parse
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.parse( txt )
|
78
|
+
new( txt ).parse
|
79
|
+
end
|
80
|
+
|
81
|
+
def initialize( txt )
|
82
|
+
@txt = txt
|
83
|
+
end
|
84
|
+
|
85
|
+
def parse
|
86
|
+
recs = []
|
87
|
+
|
88
|
+
parse_csv( @txt ).each do |row|
|
89
|
+
league_col = row['League']
|
90
|
+
season_col = row['Season'] || row['Year']
|
91
|
+
dates_col = row['Dates']
|
92
|
+
|
93
|
+
season = Import::Season.parse( season_col )
|
94
|
+
league = catalog.leagues.find!( league_col )
|
95
|
+
|
96
|
+
|
97
|
+
dates = []
|
98
|
+
if dates_col.nil? || dates_col.empty?
|
99
|
+
## do nothing; no dates - keep dates array empty
|
100
|
+
else
|
101
|
+
## squish spaces
|
102
|
+
dates_col = dates_col.gsub( /[ ]{2,}/, ' ' ) ## squish/fold spaces
|
103
|
+
|
104
|
+
puts "#{league.name} (#{league.key}) | #{season.key} | #{dates_col}"
|
105
|
+
|
106
|
+
### todo/check: check what parts "Aug 15" return ???
|
107
|
+
### short form for "Aug 15 -" - works?
|
108
|
+
|
109
|
+
## todo/fix!!! - check EventInfo.include?
|
110
|
+
## now allow dates with only start_date too!! (WITHOUT end_date)
|
111
|
+
parts = dates_col.split( /[ ]*[–-][ ]*/ )
|
112
|
+
if parts.size == 1
|
113
|
+
pp parts
|
114
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
115
|
+
pp dates
|
116
|
+
elsif parts.size == 2
|
117
|
+
pp parts
|
118
|
+
dates << DateFormats.parse( parts[0], start: Date.new( season.start_year, 1, 1 ), lang: 'en' )
|
119
|
+
dates << DateFormats.parse( parts[1], start: Date.new( season.end_year ? season.end_year : season.start_year, 1, 1 ), lang: 'en' )
|
120
|
+
pp dates
|
121
|
+
|
122
|
+
## assert/check if period is less than 365 days for now
|
123
|
+
diff = dates[1].to_date.jd - dates[0].to_date.jd
|
124
|
+
puts "#{diff}d"
|
125
|
+
if diff > 365
|
126
|
+
puts "!! ERROR - date range / period assertion failed; expected diff < 365 days"
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
else
|
130
|
+
puts "!! ERRROR - expected data range / period - one or two dates; got #{parts.size}:"
|
131
|
+
pp dates_col
|
132
|
+
pp parts
|
133
|
+
exit 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
|
138
|
+
teams_col = row['Clubs'] || row['Teams']
|
139
|
+
goals_col = row['Goals']
|
140
|
+
|
141
|
+
## note: remove (and allow) all non-digits e.g. 370 goals, 20 clubs, etc.
|
142
|
+
teams_col = teams_col.gsub( /[^0-9]/, '' ) if teams_col
|
143
|
+
goals_col = goals_col.gsub( /[^0-9]/, '' ) if goals_col
|
144
|
+
|
145
|
+
teams = (teams_col.nil? || teams_col.empty?) ? nil : teams_col.to_i
|
146
|
+
goals = (goals_col.nil? || goals_col.empty?) ? nil : goals_col.to_i
|
147
|
+
|
148
|
+
matches_col = row['Matches']
|
149
|
+
## note: support additions in matches (played) e.g.
|
150
|
+
# 132 + 63 Play-off-Spiele
|
151
|
+
matches_col = matches_col.gsub( /[^0-9+]/, '' ) if matches_col
|
152
|
+
|
153
|
+
matches = if matches_col.nil? || matches_col.empty?
|
154
|
+
nil
|
155
|
+
else
|
156
|
+
if matches_col.index( '+' ) ### check for calculations
|
157
|
+
## note: for now only supports additions
|
158
|
+
matches_col.split( '+' ).reduce( 0 ) do |sum,str|
|
159
|
+
sum + str.to_i
|
160
|
+
end
|
161
|
+
else ## assume single (integer) number
|
162
|
+
matches_col.to_i
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
rec = EventInfo.new( league: league,
|
167
|
+
season: season,
|
168
|
+
start_date: dates[0],
|
169
|
+
end_date: dates[1],
|
170
|
+
teams: teams,
|
171
|
+
matches: matches,
|
172
|
+
goals: goals
|
173
|
+
)
|
174
|
+
recs << rec
|
175
|
+
end # each row
|
176
|
+
recs
|
177
|
+
end # method parse
|
178
|
+
end # class EventInfoReader
|
179
|
+
|
180
|
+
|
181
|
+
end ## module Import
|
182
|
+
end ## module SportDb
|
183
|
+
|
@@ -95,36 +95,40 @@ class LeagueIndex
|
|
95
95
|
end # method add
|
96
96
|
|
97
97
|
|
98
|
+
## helper to always convert (possible) country key to existing country record
|
99
|
+
## todo: make private - why? why not?
|
100
|
+
def country( country )
|
101
|
+
if country.is_a?( String ) || country.is_a?( Symbol )
|
102
|
+
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
103
|
+
rec = catalog.countries.find( country.to_s )
|
104
|
+
if rec.nil?
|
105
|
+
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
106
|
+
exit 1
|
107
|
+
end
|
108
|
+
rec
|
109
|
+
else
|
110
|
+
country ## (re)use country struct - no need to run lookup again
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
|
98
115
|
def match( name )
|
99
|
-
##
|
116
|
+
## note: returns empty array if no match and NOT nil
|
100
117
|
name = normalize( name )
|
101
|
-
@leagues_by_name[ name ]
|
118
|
+
@leagues_by_name[ name ] || []
|
102
119
|
end
|
103
120
|
|
104
|
-
|
105
121
|
def match_by( name:, country: )
|
106
122
|
## note: match must for now always include name
|
107
123
|
m = match( name )
|
108
|
-
if
|
124
|
+
if country ## filter by country
|
109
125
|
## note: country assumes / allows the country key or fifa code for now
|
110
|
-
|
111
126
|
## note: allow passing in of country struct too
|
112
|
-
country_rec =
|
113
|
-
country ## (re)use country struct - no need to run lookup again
|
114
|
-
else
|
115
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
116
|
-
rec = catalog.countries.find( country )
|
117
|
-
if rec.nil?
|
118
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
119
|
-
exit 1
|
120
|
-
end
|
121
|
-
rec
|
122
|
-
end
|
127
|
+
country_rec = country( country )
|
123
128
|
|
124
129
|
## note: also skip international leagues & cups (e.g. champions league etc.) for now - why? why not?
|
125
130
|
m = m.select { |league| league.country &&
|
126
131
|
league.country.key == country_rec.key }
|
127
|
-
m = nil if m.empty? ## note: reset to nil if no more matches
|
128
132
|
end
|
129
133
|
m
|
130
134
|
end
|
@@ -144,7 +148,7 @@ class LeagueIndex
|
|
144
148
|
m = match( name )
|
145
149
|
# pp m
|
146
150
|
|
147
|
-
if m.
|
151
|
+
if m.empty?
|
148
152
|
## fall through/do nothing
|
149
153
|
elsif m.size > 1
|
150
154
|
puts "** !!! ERROR - ambigious league name; too many leagues (#{m.size}) found:"
|
@@ -65,7 +65,7 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
65
65
|
filtered_secs = []
|
66
66
|
filter = norm_seasons( season )
|
67
67
|
secs.each do |sec|
|
68
|
-
if filter.include?(
|
68
|
+
if filter.include?( Season.parse( sec[:season] ).key )
|
69
69
|
filtered_secs << sec
|
70
70
|
else
|
71
71
|
puts " skipping season >#{sec[:season]}< NOT matched by filter"
|
@@ -76,7 +76,7 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
76
76
|
|
77
77
|
## pass 3 - check & map; replace inline (string with data struct record)
|
78
78
|
secs.each do |sec|
|
79
|
-
sec[:season] =
|
79
|
+
sec[:season] = Season.parse( sec[:season ] )
|
80
80
|
sec[:league] = catalog.leagues.find!( sec[:league] )
|
81
81
|
|
82
82
|
check_stage( sec[:stage] ) if sec[:stage] ## note: only check for now (no remapping etc.)
|
@@ -100,14 +100,18 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
100
100
|
)
|
101
101
|
$}x
|
102
102
|
|
103
|
+
|
103
104
|
def norm_seasons( season_or_seasons ) ## todo/check: add alias norm_seasons - why? why not?
|
104
|
-
|
105
|
+
|
106
|
+
seasons = if season_or_seasons.is_a?( Array ) # is it an array already
|
107
|
+
season_or_seasons
|
108
|
+
elsif season_or_seasons.is_a?( Range ) # e.g. Season(1999)..Season(2001) or such
|
109
|
+
season_or_seasons.to_a
|
110
|
+
else ## assume - single entry - wrap in array
|
105
111
|
[season_or_seasons]
|
106
|
-
else ## assume it's an array already
|
107
|
-
season_or_seasons
|
108
112
|
end
|
109
113
|
|
110
|
-
seasons.map { |season|
|
114
|
+
seasons.map { |season| Season( season ).key }
|
111
115
|
end
|
112
116
|
|
113
117
|
|
@@ -127,7 +131,7 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
127
131
|
'Regular Season',
|
128
132
|
'Regular Stage',
|
129
133
|
'Championship Round',
|
130
|
-
'Championship Playoff',
|
134
|
+
'Championship Playoff', # or Championship play-off
|
131
135
|
'Relegation Round',
|
132
136
|
'Relegation Playoff',
|
133
137
|
'Play-offs',
|
@@ -140,6 +144,19 @@ class LeagueOutlineReader ## todo/check - rename to LeaguePageReader / LeagueP
|
|
140
144
|
'EL Play-off',
|
141
145
|
'Europa League Play-off',
|
142
146
|
'Europa-League-Play-offs',
|
147
|
+
'Europa League Finals',
|
148
|
+
'Playoffs - Championship',
|
149
|
+
'Playoffs - Europa League',
|
150
|
+
'Playoffs - Europa League - Finals',
|
151
|
+
'Playoffs - Relegation',
|
152
|
+
'Playoffs - Challenger',
|
153
|
+
'Finals',
|
154
|
+
|
155
|
+
'Apertura',
|
156
|
+
'Apertura - Liguilla',
|
157
|
+
'Clausura',
|
158
|
+
'Clausura - Liguilla',
|
159
|
+
|
143
160
|
].map {|name| name.downcase.gsub( /[^a-z]/, '' ) }
|
144
161
|
|
145
162
|
|
@@ -118,12 +118,18 @@ def parse
|
|
118
118
|
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
|
+
|
122
|
+
## auto-add with country prepended
|
123
|
+
## e.g. England Premier League, Austria Bundesliga etc.
|
124
|
+
## todo/check: also add variants with country alt name if present!!!
|
125
|
+
## todo/check: exclude cups or such from country + league name auto-add - why? why not?
|
126
|
+
alt_names_auto << "#{country.name} #{league_name}"
|
121
127
|
else ## assume int'l (no country) e.g. champions league, etc.
|
122
128
|
## only auto-add key (e.g. CL, EL, etc.)
|
123
129
|
alt_names_auto << league_key.upcase.gsub('.', ' ') ## note: no country code (prefix/leading) used
|
124
130
|
end
|
125
131
|
|
126
|
-
pp alt_names_auto
|
132
|
+
## pp alt_names_auto
|
127
133
|
|
128
134
|
## prepend country key/code if country present
|
129
135
|
## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
|
@@ -302,6 +302,11 @@ class MatchParser ## simple match parser for team match schedules
|
|
302
302
|
ScoreFormats.find!( line )
|
303
303
|
end
|
304
304
|
|
305
|
+
def find_status!( line )
|
306
|
+
StatusParser.find!( line )
|
307
|
+
end
|
308
|
+
|
309
|
+
|
305
310
|
def try_parse_game( line )
|
306
311
|
# note: clone line; for possible test do NOT modify in place for now
|
307
312
|
# note: returns true if parsed, false if no match
|
@@ -329,6 +334,10 @@ class MatchParser ## simple match parser for team match schedules
|
|
329
334
|
return false
|
330
335
|
end
|
331
336
|
|
337
|
+
## find (optional) match status e.g. [abandoned] or [replay] or [awarded]
|
338
|
+
## or [cancelled] or [postponed] etc.
|
339
|
+
status = find_status!( line ) ## todo/check: allow match status also in geo part (e.g. after @) - why? why not?
|
340
|
+
|
332
341
|
## pos = find_game_pos!( line )
|
333
342
|
|
334
343
|
date = find_date!( line, start: @start )
|
@@ -353,20 +362,23 @@ class MatchParser ## simple match parser for team match schedules
|
|
353
362
|
if @last_round
|
354
363
|
round = @last_round
|
355
364
|
else
|
356
|
-
## find (first) matching round by date
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
365
|
+
## find (first) matching round by date if rounds / matchdays defined
|
366
|
+
## if not rounds / matchdays defined - YES, allow matches WITHOUT rounds!!!
|
367
|
+
if @rounds.size > 0
|
368
|
+
@rounds.values.each do |round_rec|
|
369
|
+
## note: convert date to date only (no time) with to_date!!!
|
370
|
+
if (round_rec.start_date && round_rec.end_date) &&
|
371
|
+
(date.to_date >= round_rec.start_date &&
|
372
|
+
date.to_date <= round_rec.end_date)
|
373
|
+
round = round_rec
|
374
|
+
break
|
375
|
+
end
|
376
|
+
end
|
377
|
+
if round.nil?
|
378
|
+
puts "!! ERROR - no matching round found for match date:"
|
379
|
+
pp date
|
380
|
+
exit 1
|
364
381
|
end
|
365
|
-
end
|
366
|
-
if round.nil?
|
367
|
-
puts "!! ERROR - no matching round found for match date:"
|
368
|
-
pp date
|
369
|
-
exit 1
|
370
382
|
end
|
371
383
|
end
|
372
384
|
|
@@ -380,8 +392,8 @@ class MatchParser ## simple match parser for team match schedules
|
|
380
392
|
team2: team2, ## note: for now always use mapping value e.g. rec (NOT string e.g. team2.name)
|
381
393
|
score: score,
|
382
394
|
round: round ? round.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
383
|
-
group: @last_group ? @last_group.name : nil
|
384
|
-
|
395
|
+
group: @last_group ? @last_group.name : nil, ## note: for now always use string (assume unique canonical name for event)
|
396
|
+
status: status )
|
385
397
|
### todo: cache team lookups in hash?
|
386
398
|
|
387
399
|
=begin
|
@@ -563,12 +575,29 @@ class MatchParser ## simple match parser for team match schedules
|
|
563
575
|
|
564
576
|
if date && team1.nil? && team2.nil?
|
565
577
|
logger.debug( "date header line found: >#{line}<")
|
566
|
-
logger.debug( " date: #{date}")
|
578
|
+
logger.debug( " date: #{date} with start: #{@start}")
|
567
579
|
|
568
580
|
@last_date = date # keep a reference for later use
|
569
|
-
|
581
|
+
|
582
|
+
### quick "corona" hack - support seasons going beyond 12 month (see swiss league 2019/20 and others!!)
|
583
|
+
## find a better way??
|
584
|
+
## set @start date to full year (e.g. 1.1.) if date.year is @start.year+1
|
585
|
+
## todo/fix: add to linter to check for chronological dates!! - warn if NOT chronological
|
586
|
+
### todo/check: just turn on for 2019/20 season or always? why? why not?
|
587
|
+
|
588
|
+
## todo/fix: add switch back to old @start_org
|
589
|
+
## if year is date.year == @start.year-1 -- possible when full date with year set!!!
|
590
|
+
if @start.month != 1
|
591
|
+
if date.year == @start.year+1
|
592
|
+
logger.debug( "!! hack - extending start date to full (next/end) year; assumes all dates are chronologigal - always moving forward" )
|
593
|
+
@start_org = @start ## keep a copy of the original (old) start date - why? why not? - not used for now
|
594
|
+
@start = Date.new( @start.year+1, 1, 1 )
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
true
|
570
599
|
else
|
571
|
-
|
600
|
+
false
|
572
601
|
end
|
573
602
|
end
|
574
603
|
|
@@ -13,12 +13,22 @@ module SportDb
|
|
13
13
|
## leagues.txt or leagues_en.txt
|
14
14
|
## remove support for en.leagues.txt - why? why not?
|
15
15
|
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
16
|
-
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.
|
16
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.leagues.txt
|
17
17
|
leagues
|
18
18
|
(?:_[a-z0-9_-]+)?
|
19
19
|
\.txt$
|
20
20
|
}x
|
21
21
|
|
22
|
+
## seasons.txt or seasons_en.txt
|
23
|
+
## remove support for br.seasons.txt - why? why not?
|
24
|
+
SEASONS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
25
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.seasons.txt
|
26
|
+
seasons
|
27
|
+
(?:_[a-z0-9_-]+)?
|
28
|
+
\.txt$
|
29
|
+
}x
|
30
|
+
|
31
|
+
|
22
32
|
## clubs.txt or clubs_en.txt
|
23
33
|
## remove support for en.clubs.txt - why? why not?
|
24
34
|
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
@@ -35,12 +45,22 @@ module SportDb
|
|
35
45
|
\.wiki\.txt$
|
36
46
|
}x
|
37
47
|
|
38
|
-
|
48
|
+
## todo/fix: rename to CLUBS too e.g. CLUBS_PROPS to reflect filename - why? why not?
|
49
|
+
CLUBS_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
39
50
|
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
40
51
|
clubs
|
41
52
|
(?:_[a-z0-9_-]+)?
|
42
53
|
\.props\.txt$
|
43
54
|
}x
|
55
|
+
CLUB_PROPS_RE = CLUBS_PROPS_RE ## add alias for now (fix later - why? why not?)
|
56
|
+
|
57
|
+
|
58
|
+
CLUBS_HISTORY_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
59
|
+
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.history.txt
|
60
|
+
clubs
|
61
|
+
(?:_[a-z0-9_-]+)?
|
62
|
+
\.history\.txt$
|
63
|
+
}x
|
44
64
|
|
45
65
|
## teams.txt or teams_history.txt
|
46
66
|
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
@@ -49,6 +69,8 @@ module SportDb
|
|
49
69
|
\.txt$
|
50
70
|
}x
|
51
71
|
|
72
|
+
|
73
|
+
### todo/fix: change SEASON_RE to SEASON_KEY_RE (avoid confusion w/ SEASONS_RE for datafile?) - why? why not? !!!!!!!
|
52
74
|
### season folder:
|
53
75
|
## e.g. /2019-20 or
|
54
76
|
## year-only e.g. /2019 or
|
@@ -73,6 +95,10 @@ module SportDb
|
|
73
95
|
/[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
74
96
|
}x
|
75
97
|
|
98
|
+
### add "generic" pattern to find all csv datafiles
|
99
|
+
CSV_RE = %r{ (?: ^|/ )
|
100
|
+
[a-z0-9_.-]+\.csv$ ## note: allow dot (.) too e.g /eng.1.csv
|
101
|
+
}x
|
76
102
|
|
77
103
|
|
78
104
|
## move class-level "static" finders to DirPackage (do NOT work for now for zip packages) - why? why not?
|
@@ -96,16 +122,22 @@ module SportDb
|
|
96
122
|
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
97
123
|
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
98
124
|
|
99
|
-
def self.find_clubs( path, pattern: CLUBS_RE )
|
100
|
-
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE )
|
125
|
+
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
126
|
+
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
127
|
+
def self.find_clubs_history( path, pattern: CLUBS_HISTORY_RE ) find( path, pattern ); end
|
101
128
|
|
102
|
-
def self.match_clubs( path )
|
103
|
-
def self.match_clubs_wiki( path )
|
104
|
-
def self.
|
129
|
+
def self.match_clubs( path ) CLUBS_RE.match( path ); end
|
130
|
+
def self.match_clubs_wiki( path ) CLUBS_WIKI_RE.match( path ); end
|
131
|
+
def self.match_clubs_history( path ) CLUBS_HISTORY_RE.match( path); end
|
132
|
+
def self.match_clubs_props( path, pattern: CLUBS_PROPS_RE ) pattern.match( path ); end
|
105
133
|
|
106
134
|
def self.find_leagues( path, pattern: LEAGUES_RE ) find( path, pattern ); end
|
107
135
|
def self.match_leagues( path ) LEAGUES_RE.match( path ); end
|
108
136
|
|
137
|
+
def self.find_seasons( path, pattern: SEASONS_RE ) find( path, pattern ); end
|
138
|
+
def self.match_seasons( path ) SEASONS_RE.match( path ); end
|
139
|
+
|
140
|
+
|
109
141
|
def self.find_conf( path, pattern: CONF_RE ) find( path, pattern ); end
|
110
142
|
def self.match_conf( path ) CONF_RE.match( path ); end
|
111
143
|
|
@@ -118,6 +150,7 @@ module SportDb
|
|
118
150
|
end
|
119
151
|
## add match_match and match_match_csv - why? why not?
|
120
152
|
|
153
|
+
|
121
154
|
class << self
|
122
155
|
alias_method :match_teams?, :match_teams
|
123
156
|
alias_method :teams?, :match_teams
|
@@ -128,12 +161,21 @@ module SportDb
|
|
128
161
|
alias_method :match_clubs_wiki?, :match_clubs_wiki
|
129
162
|
alias_method :clubs_wiki?, :match_clubs_wiki
|
130
163
|
|
131
|
-
alias_method :
|
132
|
-
alias_method :
|
164
|
+
alias_method :match_clubs_history?, :match_clubs_history
|
165
|
+
alias_method :clubs_history?, :match_clubs_history
|
166
|
+
|
167
|
+
alias_method :match_club_props, :match_clubs_props
|
168
|
+
alias_method :match_club_props?, :match_clubs_props
|
169
|
+
alias_method :club_props?, :match_clubs_props
|
170
|
+
alias_method :match_clubs_props?, :match_clubs_props
|
171
|
+
alias_method :clubs_props?, :match_clubs_props
|
133
172
|
|
134
173
|
alias_method :match_leagues?, :match_leagues
|
135
174
|
alias_method :leagues?, :match_leagues
|
136
175
|
|
176
|
+
alias_method :match_seasons?, :match_seasons
|
177
|
+
alias_method :seasons?, :match_seasons
|
178
|
+
|
137
179
|
alias_method :match_conf?, :match_conf
|
138
180
|
alias_method :conf?, :match_conf
|
139
181
|
end
|
@@ -212,11 +254,17 @@ module SportDb
|
|
212
254
|
end
|
213
255
|
end
|
214
256
|
def each_match_csv( &blk ) each( pattern: MATCH_CSV_RE, &blk ); end
|
257
|
+
def each_csv( &blk ) each( pattern: CSV_RE, &blk ); end
|
258
|
+
|
215
259
|
def each_club_props( &blk ) each( pattern: CLUB_PROPS_RE, &blk ); end
|
216
260
|
|
217
261
|
def each_leagues( &blk ) each( pattern: LEAGUES_RE, &blk ); end
|
218
262
|
def each_clubs( &blk ) each( pattern: CLUBS_RE, &blk ); end
|
219
263
|
def each_clubs_wiki( &blk ) each( pattern: CLUBS_WIKI_RE, &blk ); end
|
264
|
+
def each_clubs_history( &blk ) each( pattern: CLUBS_HISTORY_RE, &blk ); end
|
265
|
+
|
266
|
+
def each_seasons( &blk ) each( pattern: SEASONS_RE, &blk ); end
|
267
|
+
|
220
268
|
|
221
269
|
## return all match datafile entries
|
222
270
|
def match( format: 'txt' )
|
@@ -287,13 +335,13 @@ module SportDb
|
|
287
335
|
## filter.skip? filter.include? ( season_sason_key )?
|
288
336
|
## fiteer.before?( season_key ) etc.
|
289
337
|
## find some good method names!!!!
|
290
|
-
season_start = start ?
|
338
|
+
season_start = start ? Season( start ) : nil
|
291
339
|
|
292
340
|
h = {}
|
293
341
|
match( format: format ).each do |entry|
|
294
342
|
## note: assume last directory in datafile path is the season part/key
|
295
343
|
season_q = File.basename( File.dirname( entry.name ))
|
296
|
-
season =
|
344
|
+
season = Season.parse( season_q ) ## normalize season
|
297
345
|
|
298
346
|
## skip if start season before this season
|
299
347
|
next if season_start && season_start.start_year > season.start_year
|