football-sources 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -15
  3. data/README.md +142 -1
  4. data/Rakefile +1 -1
  5. data/lib/football-sources.rb +26 -15
  6. data/lib/football-sources/apis.rb +1 -77
  7. data/lib/football-sources/apis/mods.rb +20 -0
  8. data/lib/football-sources/fbref.rb +4 -0
  9. data/lib/football-sources/fbref/build.rb +96 -0
  10. data/lib/football-sources/fbref/config.rb +16 -0
  11. data/lib/football-sources/fbref/convert.rb +95 -0
  12. data/lib/football-sources/version.rb +2 -2
  13. data/lib/football-sources/worldfootball.rb +1 -6
  14. data/lib/football-sources/worldfootball/jobs.rb +76 -0
  15. metadata +10 -19
  16. data/lib/football-sources/apis/download.rb +0 -11
  17. data/lib/football-sources/worldfootball/download.rb +0 -61
  18. data/lib/football-sources/worldfootball/leagues.rb +0 -200
  19. data/lib/football-sources/worldfootball/leagues/asia.rb +0 -53
  20. data/lib/football-sources/worldfootball/leagues/europe--british_isles.rb +0 -59
  21. data/lib/football-sources/worldfootball/leagues/europe--central.rb +0 -127
  22. data/lib/football-sources/worldfootball/leagues/europe--eastern.rb +0 -82
  23. data/lib/football-sources/worldfootball/leagues/europe--northern.rb +0 -57
  24. data/lib/football-sources/worldfootball/leagues/europe--southern.rb +0 -86
  25. data/lib/football-sources/worldfootball/leagues/europe--western.rb +0 -38
  26. data/lib/football-sources/worldfootball/leagues/europe.rb +0 -13
  27. data/lib/football-sources/worldfootball/leagues/north_america.rb +0 -44
  28. data/lib/football-sources/worldfootball/leagues/pacific.rb +0 -21
  29. data/lib/football-sources/worldfootball/leagues/south_america.rb +0 -11
  30. data/lib/football-sources/worldfootball/tool.rb +0 -100
@@ -0,0 +1,16 @@
1
+ module Fbref
2
+
3
+ ### add some more config options / settings
4
+ class Configuration
5
+ #########
6
+ ## nested configuration classes - use - why? why not?
7
+ class Convert
8
+ def out_dir() @out_dir || './o'; end
9
+ def out_dir=(value) @out_dir = value; end
10
+ end
11
+
12
+ def convert() @convert ||= Convert.new; end
13
+ end # class Configuration
14
+
15
+
16
+ end # module Fbref
@@ -0,0 +1,95 @@
1
+ module Fbref
2
+
3
+ def self.convert( league:, season: )
4
+ page = Page::Schedule.from_cache( league: league,
5
+ season: season )
6
+
7
+ puts page.title
8
+
9
+ rows = page.matches
10
+ recs = build( rows, league: league, season: season )
11
+ ## pp rows
12
+
13
+ ## reformat date / beautify e.g. Sat Aug 7 1993
14
+ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
15
+
16
+ recs, headers = vacuum( recs )
17
+ pp recs[0..2]
18
+
19
+ season = Season.parse( season )
20
+ path = "#{config.convert.out_dir}/#{league}_#{season.to_path}.csv"
21
+ puts "write #{path}..."
22
+ Cache::CsvMatchWriter.write( path, recs, headers: headers )
23
+ end
24
+
25
+
26
+
27
+
28
+ #####
29
+ # vacuum helper stuff - todo/fix - (re)use - make more generic - why? why not?
30
+
31
+ MAX_HEADERS = [
32
+ 'Stage',
33
+ 'Round',
34
+ 'Date',
35
+ 'Time',
36
+ 'Team 1',
37
+ 'FT',
38
+ 'HT',
39
+ 'Team 2',
40
+ 'ET',
41
+ 'P',
42
+ 'Venue',
43
+ 'Att',
44
+ 'Comments', ## e.g. awarded, cancelled/canceled, etc.
45
+ ]
46
+
47
+ MIN_HEADERS = [ ## always keep even if all empty
48
+ 'Date',
49
+ 'Team 1',
50
+ 'FT',
51
+ 'Team 2'
52
+ ]
53
+
54
+ def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
55
+ ## check for unused columns and strip/remove
56
+ counter = Array.new( MAX_HEADERS.size, 0 )
57
+ rows.each do |row|
58
+ row.each_with_index do |col, idx|
59
+ counter[idx] += 1 unless col.nil? || col.empty?
60
+ end
61
+ end
62
+
63
+ pp counter
64
+
65
+ ## check empty columns
66
+ headers = []
67
+ indices = []
68
+ empty_headers = []
69
+ empty_indices = []
70
+
71
+ counter.each_with_index do |num, idx|
72
+ header = MAX_HEADERS[ idx ]
73
+ if num > 0 || (num == 0 && fixed_headers.include?( header ))
74
+ headers << header
75
+ indices << idx
76
+ else
77
+ empty_headers << header
78
+ empty_indices << idx
79
+ end
80
+ end
81
+
82
+ if empty_indices.size > 0
83
+ rows = rows.map do |row|
84
+ row_vacuumed = []
85
+ row.each_with_index do |col, idx|
86
+ ## todo/fix: use values or such??
87
+ row_vacuumed << col unless empty_indices.include?( idx )
88
+ end
89
+ row_vacuumed
90
+ end
91
+ end
92
+
93
+ [rows, headers]
94
+ end
95
+ end # module Fbref
@@ -1,8 +1,8 @@
1
1
 
2
2
  module FootballSources
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
- MINOR = 0
5
- PATCH = 1
4
+ MINOR = 1
5
+ PATCH = 0
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
@@ -5,12 +5,7 @@
5
5
 
6
6
 
7
7
  ## our own code
8
- require_relative 'worldfootball/leagues'
9
-
10
8
  require_relative 'worldfootball/config'
11
-
12
- require_relative 'worldfootball/download'
13
-
14
9
  require_relative 'worldfootball/mods'
15
10
  require_relative 'worldfootball/vacuum'
16
11
  require_relative 'worldfootball/build'
@@ -18,7 +13,7 @@ require_relative 'worldfootball/convert'
18
13
  require_relative 'worldfootball/convert_reports'
19
14
 
20
15
 
21
- require_relative 'worldfootball/tool'
16
+ require_relative 'worldfootball/jobs'
22
17
 
23
18
 
24
19
 
@@ -0,0 +1,76 @@
1
+
2
+
3
+ module Worldfootball
4
+ ################################
5
+ # add more helpers
6
+ # move upstream for (re)use - why? why not?
7
+
8
+ ## todo/check: what to do: if league is both included and excluded?
9
+ ## include forces include? or exclude has the last word? - why? why not?
10
+ ## Excludes match before includes,
11
+ ## meaning that something that has been excluded cannot be included again
12
+
13
+ ## todo - find "proper/classic" timezone ("winter time")
14
+
15
+ ## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
16
+ ## Ciudad de México, CDMX, México (GMT-5) -- summer time?
17
+ ## Londres, Reino Unido (GMT+1)
18
+ ## Madrid -- ?
19
+ ## Lisboa -- ?
20
+ ## Moskow -- ?
21
+ ##
22
+ ## todo/check - quick fix timezone offsets for leagues for now
23
+ ## - find something better - why? why not?
24
+ ## note: assume time is in GMT+1
25
+ OFFSETS = {
26
+ 'eng.1' => -1,
27
+ 'eng.2' => -1,
28
+ 'eng.3' => -1,
29
+ 'eng.4' => -1,
30
+ 'eng.5' => -1,
31
+
32
+ 'es.1' => -1,
33
+ 'es.2' => -1,
34
+
35
+ 'pt.1' => -1,
36
+ 'pt.2' => -1,
37
+
38
+ 'br.1' => -5,
39
+ 'mx.1' => -7,
40
+ }
41
+
42
+
43
+
44
+ class Job ## todo/check: use a module (NOT a class) - why? why not?
45
+ def self.download( datasets )
46
+ datasets.each_with_index do |dataset,i|
47
+ league = dataset[0]
48
+ seasons = dataset[1]
49
+
50
+ puts "downloading [#{i+1}/#{datasets.size}] #{league}..."
51
+ seasons.each_with_index do |season,j|
52
+ puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
53
+ Worldfootball.schedule( league: league,
54
+ season: season )
55
+ end
56
+ end
57
+ end
58
+
59
+ def self.convert( datasets )
60
+ datasets.each_with_index do |dataset,i|
61
+ league = dataset[0]
62
+ seasons = dataset[1]
63
+
64
+ puts "converting [#{i+1}/#{datasets.size}] #{league}..."
65
+ seasons.each_with_index do |season,j|
66
+ puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
67
+ Worldfootball.convert( league: league,
68
+ season: season,
69
+ offset: OFFSETS[ league ] )
70
+ end
71
+ end
72
+ end
73
+ end # class Job
74
+
75
+ end # module Worldfootball
76
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: football-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-10 00:00:00.000000000 Z
11
+ date: 2020-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: webget-football
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.0.1
19
+ version: 0.1.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.0.1
26
+ version: 0.1.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sportdb-catalogs
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -91,29 +91,20 @@ files:
91
91
  - lib/football-sources/apis/config.rb
92
92
  - lib/football-sources/apis/convert.rb
93
93
  - lib/football-sources/apis/convert_cl.rb
94
- - lib/football-sources/apis/download.rb
94
+ - lib/football-sources/apis/mods.rb
95
95
  - lib/football-sources/apis/stat.rb
96
+ - lib/football-sources/fbref.rb
97
+ - lib/football-sources/fbref/build.rb
98
+ - lib/football-sources/fbref/config.rb
99
+ - lib/football-sources/fbref/convert.rb
96
100
  - lib/football-sources/version.rb
97
101
  - lib/football-sources/worldfootball.rb
98
102
  - lib/football-sources/worldfootball/build.rb
99
103
  - lib/football-sources/worldfootball/config.rb
100
104
  - lib/football-sources/worldfootball/convert.rb
101
105
  - lib/football-sources/worldfootball/convert_reports.rb
102
- - lib/football-sources/worldfootball/download.rb
103
- - lib/football-sources/worldfootball/leagues.rb
104
- - lib/football-sources/worldfootball/leagues/asia.rb
105
- - lib/football-sources/worldfootball/leagues/europe--british_isles.rb
106
- - lib/football-sources/worldfootball/leagues/europe--central.rb
107
- - lib/football-sources/worldfootball/leagues/europe--eastern.rb
108
- - lib/football-sources/worldfootball/leagues/europe--northern.rb
109
- - lib/football-sources/worldfootball/leagues/europe--southern.rb
110
- - lib/football-sources/worldfootball/leagues/europe--western.rb
111
- - lib/football-sources/worldfootball/leagues/europe.rb
112
- - lib/football-sources/worldfootball/leagues/north_america.rb
113
- - lib/football-sources/worldfootball/leagues/pacific.rb
114
- - lib/football-sources/worldfootball/leagues/south_america.rb
106
+ - lib/football-sources/worldfootball/jobs.rb
115
107
  - lib/football-sources/worldfootball/mods.rb
116
- - lib/football-sources/worldfootball/tool.rb
117
108
  - lib/football-sources/worldfootball/vacuum.rb
118
109
  - lib/football/sources.rb
119
110
  - test/helper.rb
@@ -1,11 +0,0 @@
1
-
2
-
3
- module Footballdata
4
-
5
- def self.schedule( league:, season: )
6
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
7
-
8
- Metal.competition( LEAGUES[ league.downcase ], season.start_year )
9
- end
10
-
11
- end # module Footballdata
@@ -1,61 +0,0 @@
1
-
2
-
3
- module Worldfootball
4
-
5
- ##
6
- ## note/fix!!!!
7
- ## do NOT allow redirects for now - report error!!!
8
- ## does NOT return 404 page not found errors; always redirects (301) to home page
9
- ## on missing pages:
10
- ## 301 Moved Permanently location=https://www.weltfussball.de/
11
- ## 301 Moved Permanently location=https://www.weltfussball.de/
12
-
13
-
14
-
15
-
16
- # url = "https://www.weltfussball.de/alle_spiele/eng-league-one-#{season}/"
17
- # url = "https://www.weltfussball.de/alle_spiele/eng-league-two-#{season}/"
18
- # https://www.weltfussball.de/alle_spiele/eng-national-league-2019-2020/
19
- # https://www.weltfussball.de/alle_spiele/eng-fa-cup-2018-2019/
20
- # https://www.weltfussball.de/alle_spiele/eng-league-cup-2019-2020/
21
-
22
- # https://www.weltfussball.de/alle_spiele/fra-ligue-2-2019-2020/
23
- # https://www.weltfussball.de/alle_spiele/ita-serie-b-2019-2020/
24
- # https://www.weltfussball.de/alle_spiele/rus-premier-liga-2019-2020/
25
- # https://www.weltfussball.de/alle_spiele/rus-1-division-2019-2020/
26
- # https://www.weltfussball.de/alle_spiele/tur-sueperlig-2019-2020/
27
- # https://www.weltfussball.de/alle_spiele/tur-1-lig-2019-2020/
28
-
29
-
30
-
31
- def self.schedule( league:, season: )
32
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
33
-
34
- league = find_league( league )
35
-
36
- pages = league.pages( season: season )
37
-
38
- ## if single (simple) page setup - wrap in array
39
- pages = pages.is_a?(Array) ? pages : [pages]
40
- pages.each do |page_meta|
41
- Metal.schedule( page_meta[:slug] )
42
- end # each page
43
- end
44
-
45
-
46
- def self.schedule_reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
47
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
48
-
49
- league = find_league( league )
50
-
51
- pages = league.pages( season: season )
52
-
53
- ## if single (simple) page setup - wrap in array
54
- pages = pages.is_a?(Array) ? pages : [pages]
55
- pages.each do |page_meta|
56
- Metal.schedule_reports( page_meta[:slug], cache: cache )
57
- end # each page
58
- end
59
-
60
-
61
- end # module Worldfootball
@@ -1,200 +0,0 @@
1
-
2
-
3
- require_relative 'leagues/europe'
4
- require_relative 'leagues/north_america'
5
- require_relative 'leagues/south_america'
6
- require_relative 'leagues/pacific'
7
- require_relative 'leagues/asia'
8
-
9
-
10
- module Worldfootball
11
-
12
- LEAGUES = [LEAGUES_EUROPE,
13
- LEAGUES_NORTH_AMERICA,
14
- LEAGUES_SOUTH_AMERICA,
15
- LEAGUES_PACIFIC,
16
- LEAGUES_ASIA].reduce({}) { |mem,h| mem.merge!( h ); mem }
17
-
18
-
19
- class League
20
- def initialize( key, data )
21
- @key = key
22
- ## @data = data
23
-
24
- @pages = data[:pages]
25
- @season_proc = data[:season] || ->(season) { nil }
26
- end
27
-
28
- def key() @key; end
29
-
30
- def pages( season: )
31
- ## note: return for no stages / simple case - just a string
32
- ## and for the stages case ALWAYS an array (even if it has only one page (with stage))
33
-
34
- if @pages.is_a?( String )
35
- # assume always "simple/regular" format w/o stages
36
- slug = @pages
37
- { slug: fill_slug( slug, season: season ) }
38
- else
39
- ## check for league format / stages
40
- ## return array (of strings) or nil (for no stages - "simple" format)
41
- indices = @season_proc.call( season )
42
- if indices.nil?
43
- puts "!! ERROR - no configuration found for season >#{season}< for league >#{@key}< found; sorry"
44
- exit 1
45
- elsif indices.is_a?( Integer ) ## single number - single/regular format w/o stage
46
- # note: starting with 0 (always use idx-1) !!!
47
- slug = if @pages.is_a?( Array )
48
- @pages[indices-1]
49
- else ## assume hash (and key is page slug)
50
- @pages.keys[indices-1]
51
- end
52
- { slug: fill_slug( slug, season: season ) }
53
- else ## assume regular case - array of integers
54
- recs = []
55
- indices.each do |idx|
56
- slug = key = @pages.keys[idx-1]
57
- recs << { slug: fill_slug( slug, season: season ),
58
- stage: @pages[key] } ## note: include mapping for page to stage name!!
59
- end
60
- recs
61
- end
62
- end
63
- end # pages
64
-
65
-
66
- ######
67
- # helper method
68
- def fill_slug( slug, season: )
69
- ## note: fill-in/check for place holders too
70
- slug = if slug.index( '{season}' )
71
- slug.sub( '{season}', season.to_path( :long ) ) ## e.g. 2010-2011
72
- elsif slug.index( '{end_year}' )
73
- slug.sub( '{end_year}', season.end_year.to_s ) ## e.g. 2011
74
- else
75
- ## assume convenience fallback - append regular season
76
- "#{slug}-#{season.to_path( :long )}"
77
- end
78
-
79
- puts " slug=>#{slug}<"
80
-
81
- slug
82
- end
83
- end # class League
84
-
85
-
86
-
87
- def self.find_league( key ) ## league info lookup
88
- data = LEAGUES[ key ]
89
- if data.nil?
90
- puts "!! ERROR - no league found for >#{key}<; add to leagues tables"
91
- exit 1
92
- end
93
- League.new( key, data ) ## use a convenience wrapper for now
94
- end
95
-
96
-
97
-
98
- ### "reverse" lookup by page - returns league AND season
99
- ## note: "blind" season template para - might be season or start_year etc.
100
- ## e.g. {season} or {start_year} becomes {}
101
-
102
- PAGE_VAR_RE = /{
103
- [^}]+
104
- }/x
105
-
106
-
107
- def self.norm_slug( slug )
108
- ## assume convenience fallback - append regular season
109
- slug.index( '{' ) ? slug : "#{slug}-{season}"
110
- end
111
-
112
- PAGES ||=
113
- LEAGUES.reduce( {} ) do |pages, (key, data)|
114
- if data[:pages].is_a?( String )
115
- slug = data[:pages]
116
- slug = Worldfootball.norm_slug( slug )
117
- pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
118
- elsif data[:pages].is_a?( Array )
119
- data[:pages].each do |slug|
120
- slug = Worldfootball.norm_slug( slug )
121
- pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
122
- end
123
- ## elsif data[:pages].nil?
124
- ## todo/fix: missing pages!!!
125
- else ## assume hash
126
- ## add stage to pages too - why? why not?
127
- data[:pages].each do |slug, stage|
128
- slug = Worldfootball.norm_slug( slug )
129
- pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug, stage: stage }
130
- end
131
- end
132
- pages
133
- end
134
-
135
- # e.g. 2000 or 2000-2001
136
- SEASON_RE = /[0-9]{4}
137
- (?:
138
- -[0-9]{4}
139
- )?
140
- /x
141
-
142
-
143
- def self.find_page!( slug )
144
- page = find_page( slug )
145
- if page.nil?
146
- puts "!! ERROR: no mapping for page >#{slug}< found; sorry"
147
-
148
- season_str = nil
149
- norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
150
- season_str = match ## keep reference to season str
151
- '{}' ## replace with {}
152
- end
153
-
154
- puts " season: >#{season_str}<"
155
- puts " slug (norm): >#{norm}<"
156
- puts
157
- ## pp PAGES
158
- exit 1
159
- end
160
- page
161
- end
162
-
163
-
164
-
165
- def self.find_page( slug )
166
- ## return league key and season
167
- season_str = nil
168
- norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
169
- season_str = match ## keep reference to season str
170
- '{}' ## replace with {}
171
- end
172
-
173
- if season_str.nil?
174
- puts "!! ERROR: no season found in page slug >#{slug}<; sorry"
175
- exit 1
176
- end
177
-
178
- rec = PAGES[ norm ]
179
- return nil if rec.nil?
180
-
181
-
182
- league_key = rec[:league]
183
- slug_tmpl = rec[:slug]
184
- season = if slug_tmpl.index( '{start_year}' )
185
- ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
186
- Season( "#{season_str.to_i}-#{season_str.to_i+1}" )
187
- elsif slug_tmpl.index( '{end_year}' )
188
- ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
189
- Season( "#{season_str.to_i-1}-#{season_str.to_i}" )
190
- else ## assume "regular" seasson - pass through as is
191
- Season( season_str )
192
- end
193
-
194
- ## return hash table / record
195
- { league: league_key,
196
- season: season.key }
197
- end
198
-
199
-
200
- end # module Worldfootball