football-sources 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -15
  3. data/README.md +142 -1
  4. data/Rakefile +1 -1
  5. data/lib/football-sources.rb +26 -15
  6. data/lib/football-sources/apis.rb +1 -77
  7. data/lib/football-sources/apis/mods.rb +20 -0
  8. data/lib/football-sources/fbref.rb +4 -0
  9. data/lib/football-sources/fbref/build.rb +96 -0
  10. data/lib/football-sources/fbref/config.rb +16 -0
  11. data/lib/football-sources/fbref/convert.rb +95 -0
  12. data/lib/football-sources/version.rb +2 -2
  13. data/lib/football-sources/worldfootball.rb +1 -6
  14. data/lib/football-sources/worldfootball/jobs.rb +76 -0
  15. metadata +10 -19
  16. data/lib/football-sources/apis/download.rb +0 -11
  17. data/lib/football-sources/worldfootball/download.rb +0 -61
  18. data/lib/football-sources/worldfootball/leagues.rb +0 -200
  19. data/lib/football-sources/worldfootball/leagues/asia.rb +0 -53
  20. data/lib/football-sources/worldfootball/leagues/europe--british_isles.rb +0 -59
  21. data/lib/football-sources/worldfootball/leagues/europe--central.rb +0 -127
  22. data/lib/football-sources/worldfootball/leagues/europe--eastern.rb +0 -82
  23. data/lib/football-sources/worldfootball/leagues/europe--northern.rb +0 -57
  24. data/lib/football-sources/worldfootball/leagues/europe--southern.rb +0 -86
  25. data/lib/football-sources/worldfootball/leagues/europe--western.rb +0 -38
  26. data/lib/football-sources/worldfootball/leagues/europe.rb +0 -13
  27. data/lib/football-sources/worldfootball/leagues/north_america.rb +0 -44
  28. data/lib/football-sources/worldfootball/leagues/pacific.rb +0 -21
  29. data/lib/football-sources/worldfootball/leagues/south_america.rb +0 -11
  30. data/lib/football-sources/worldfootball/tool.rb +0 -100
@@ -0,0 +1,16 @@
1
+ module Fbref
2
+
3
+ ### add some more config options / settings
4
+ class Configuration
5
+ #########
6
+ ## nested configuration classes - use - why? why not?
7
+ class Convert
8
+ def out_dir() @out_dir || './o'; end
9
+ def out_dir=(value) @out_dir = value; end
10
+ end
11
+
12
+ def convert() @convert ||= Convert.new; end
13
+ end # class Configuration
14
+
15
+
16
+ end # module Fbref
@@ -0,0 +1,95 @@
1
+ module Fbref
2
+
3
+ def self.convert( league:, season: )
4
+ page = Page::Schedule.from_cache( league: league,
5
+ season: season )
6
+
7
+ puts page.title
8
+
9
+ rows = page.matches
10
+ recs = build( rows, league: league, season: season )
11
+ ## pp rows
12
+
13
+ ## reformat date / beautify e.g. Sat Aug 7 1993
14
+ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
15
+
16
+ recs, headers = vacuum( recs )
17
+ pp recs[0..2]
18
+
19
+ season = Season.parse( season )
20
+ path = "#{config.convert.out_dir}/#{league}_#{season.to_path}.csv"
21
+ puts "write #{path}..."
22
+ Cache::CsvMatchWriter.write( path, recs, headers: headers )
23
+ end
24
+
25
+
26
+
27
+
28
+ #####
29
+ # vacuum helper stuff - todo/fix - (re)use - make more generic - why? why not?
30
+
31
+ MAX_HEADERS = [
32
+ 'Stage',
33
+ 'Round',
34
+ 'Date',
35
+ 'Time',
36
+ 'Team 1',
37
+ 'FT',
38
+ 'HT',
39
+ 'Team 2',
40
+ 'ET',
41
+ 'P',
42
+ 'Venue',
43
+ 'Att',
44
+ 'Comments', ## e.g. awarded, cancelled/canceled, etc.
45
+ ]
46
+
47
+ MIN_HEADERS = [ ## always keep even if all empty
48
+ 'Date',
49
+ 'Team 1',
50
+ 'FT',
51
+ 'Team 2'
52
+ ]
53
+
54
+ def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
55
+ ## check for unused columns and strip/remove
56
+ counter = Array.new( MAX_HEADERS.size, 0 )
57
+ rows.each do |row|
58
+ row.each_with_index do |col, idx|
59
+ counter[idx] += 1 unless col.nil? || col.empty?
60
+ end
61
+ end
62
+
63
+ pp counter
64
+
65
+ ## check empty columns
66
+ headers = []
67
+ indices = []
68
+ empty_headers = []
69
+ empty_indices = []
70
+
71
+ counter.each_with_index do |num, idx|
72
+ header = MAX_HEADERS[ idx ]
73
+ if num > 0 || (num == 0 && fixed_headers.include?( header ))
74
+ headers << header
75
+ indices << idx
76
+ else
77
+ empty_headers << header
78
+ empty_indices << idx
79
+ end
80
+ end
81
+
82
+ if empty_indices.size > 0
83
+ rows = rows.map do |row|
84
+ row_vacuumed = []
85
+ row.each_with_index do |col, idx|
86
+ ## todo/fix: use values or such??
87
+ row_vacuumed << col unless empty_indices.include?( idx )
88
+ end
89
+ row_vacuumed
90
+ end
91
+ end
92
+
93
+ [rows, headers]
94
+ end
95
+ end # module Fbref
@@ -1,8 +1,8 @@
1
1
 
2
2
  module FootballSources
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
- MINOR = 0
5
- PATCH = 1
4
+ MINOR = 1
5
+ PATCH = 0
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
@@ -5,12 +5,7 @@
5
5
 
6
6
 
7
7
  ## our own code
8
- require_relative 'worldfootball/leagues'
9
-
10
8
  require_relative 'worldfootball/config'
11
-
12
- require_relative 'worldfootball/download'
13
-
14
9
  require_relative 'worldfootball/mods'
15
10
  require_relative 'worldfootball/vacuum'
16
11
  require_relative 'worldfootball/build'
@@ -18,7 +13,7 @@ require_relative 'worldfootball/convert'
18
13
  require_relative 'worldfootball/convert_reports'
19
14
 
20
15
 
21
- require_relative 'worldfootball/tool'
16
+ require_relative 'worldfootball/jobs'
22
17
 
23
18
 
24
19
 
@@ -0,0 +1,76 @@
1
+
2
+
3
+ module Worldfootball
4
+ ################################
5
+ # add more helpers
6
+ # move upstream for (re)use - why? why not?
7
+
8
+ ## todo/check: what to do: if league is both included and excluded?
9
+ ## include forces include? or exclude has the last word? - why? why not?
10
+ ## Excludes match before includes,
11
+ ## meaning that something that has been excluded cannot be included again
12
+
13
+ ## todo - find "proper/classic" timezone ("winter time")
14
+
15
+ ## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
16
+ ## Ciudad de México, CDMX, México (GMT-5) -- summer time?
17
+ ## Londres, Reino Unido (GMT+1)
18
+ ## Madrid -- ?
19
+ ## Lisboa -- ?
20
+ ## Moskow -- ?
21
+ ##
22
+ ## todo/check - quick fix timezone offsets for leagues for now
23
+ ## - find something better - why? why not?
24
+ ## note: assume time is in GMT+1
25
+ OFFSETS = {
26
+ 'eng.1' => -1,
27
+ 'eng.2' => -1,
28
+ 'eng.3' => -1,
29
+ 'eng.4' => -1,
30
+ 'eng.5' => -1,
31
+
32
+ 'es.1' => -1,
33
+ 'es.2' => -1,
34
+
35
+ 'pt.1' => -1,
36
+ 'pt.2' => -1,
37
+
38
+ 'br.1' => -5,
39
+ 'mx.1' => -7,
40
+ }
41
+
42
+
43
+
44
+ class Job ## todo/check: use a module (NOT a class) - why? why not?
45
+ def self.download( datasets )
46
+ datasets.each_with_index do |dataset,i|
47
+ league = dataset[0]
48
+ seasons = dataset[1]
49
+
50
+ puts "downloading [#{i+1}/#{datasets.size}] #{league}..."
51
+ seasons.each_with_index do |season,j|
52
+ puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
53
+ Worldfootball.schedule( league: league,
54
+ season: season )
55
+ end
56
+ end
57
+ end
58
+
59
+ def self.convert( datasets )
60
+ datasets.each_with_index do |dataset,i|
61
+ league = dataset[0]
62
+ seasons = dataset[1]
63
+
64
+ puts "converting [#{i+1}/#{datasets.size}] #{league}..."
65
+ seasons.each_with_index do |season,j|
66
+ puts " season [#{j+1}/#{season.size}] #{league} #{season}..."
67
+ Worldfootball.convert( league: league,
68
+ season: season,
69
+ offset: OFFSETS[ league ] )
70
+ end
71
+ end
72
+ end
73
+ end # class Job
74
+
75
+ end # module Worldfootball
76
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: football-sources
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-10 00:00:00.000000000 Z
11
+ date: 2020-11-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: webget-football
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.0.1
19
+ version: 0.1.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.0.1
26
+ version: 0.1.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: sportdb-catalogs
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -91,29 +91,20 @@ files:
91
91
  - lib/football-sources/apis/config.rb
92
92
  - lib/football-sources/apis/convert.rb
93
93
  - lib/football-sources/apis/convert_cl.rb
94
- - lib/football-sources/apis/download.rb
94
+ - lib/football-sources/apis/mods.rb
95
95
  - lib/football-sources/apis/stat.rb
96
+ - lib/football-sources/fbref.rb
97
+ - lib/football-sources/fbref/build.rb
98
+ - lib/football-sources/fbref/config.rb
99
+ - lib/football-sources/fbref/convert.rb
96
100
  - lib/football-sources/version.rb
97
101
  - lib/football-sources/worldfootball.rb
98
102
  - lib/football-sources/worldfootball/build.rb
99
103
  - lib/football-sources/worldfootball/config.rb
100
104
  - lib/football-sources/worldfootball/convert.rb
101
105
  - lib/football-sources/worldfootball/convert_reports.rb
102
- - lib/football-sources/worldfootball/download.rb
103
- - lib/football-sources/worldfootball/leagues.rb
104
- - lib/football-sources/worldfootball/leagues/asia.rb
105
- - lib/football-sources/worldfootball/leagues/europe--british_isles.rb
106
- - lib/football-sources/worldfootball/leagues/europe--central.rb
107
- - lib/football-sources/worldfootball/leagues/europe--eastern.rb
108
- - lib/football-sources/worldfootball/leagues/europe--northern.rb
109
- - lib/football-sources/worldfootball/leagues/europe--southern.rb
110
- - lib/football-sources/worldfootball/leagues/europe--western.rb
111
- - lib/football-sources/worldfootball/leagues/europe.rb
112
- - lib/football-sources/worldfootball/leagues/north_america.rb
113
- - lib/football-sources/worldfootball/leagues/pacific.rb
114
- - lib/football-sources/worldfootball/leagues/south_america.rb
106
+ - lib/football-sources/worldfootball/jobs.rb
115
107
  - lib/football-sources/worldfootball/mods.rb
116
- - lib/football-sources/worldfootball/tool.rb
117
108
  - lib/football-sources/worldfootball/vacuum.rb
118
109
  - lib/football/sources.rb
119
110
  - test/helper.rb
@@ -1,11 +0,0 @@
1
-
2
-
3
- module Footballdata
4
-
5
- def self.schedule( league:, season: )
6
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
7
-
8
- Metal.competition( LEAGUES[ league.downcase ], season.start_year )
9
- end
10
-
11
- end # module Footballdata
@@ -1,61 +0,0 @@
1
-
2
-
3
- module Worldfootball
4
-
5
- ##
6
- ## note/fix!!!!
7
- ## do NOT allow redirects for now - report error!!!
8
- ## does NOT return 404 page not found errors; always redirects (301) to home page
9
- ## on missing pages:
10
- ## 301 Moved Permanently location=https://www.weltfussball.de/
11
- ## 301 Moved Permanently location=https://www.weltfussball.de/
12
-
13
-
14
-
15
-
16
- # url = "https://www.weltfussball.de/alle_spiele/eng-league-one-#{season}/"
17
- # url = "https://www.weltfussball.de/alle_spiele/eng-league-two-#{season}/"
18
- # https://www.weltfussball.de/alle_spiele/eng-national-league-2019-2020/
19
- # https://www.weltfussball.de/alle_spiele/eng-fa-cup-2018-2019/
20
- # https://www.weltfussball.de/alle_spiele/eng-league-cup-2019-2020/
21
-
22
- # https://www.weltfussball.de/alle_spiele/fra-ligue-2-2019-2020/
23
- # https://www.weltfussball.de/alle_spiele/ita-serie-b-2019-2020/
24
- # https://www.weltfussball.de/alle_spiele/rus-premier-liga-2019-2020/
25
- # https://www.weltfussball.de/alle_spiele/rus-1-division-2019-2020/
26
- # https://www.weltfussball.de/alle_spiele/tur-sueperlig-2019-2020/
27
- # https://www.weltfussball.de/alle_spiele/tur-1-lig-2019-2020/
28
-
29
-
30
-
31
- def self.schedule( league:, season: )
32
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
33
-
34
- league = find_league( league )
35
-
36
- pages = league.pages( season: season )
37
-
38
- ## if single (simple) page setup - wrap in array
39
- pages = pages.is_a?(Array) ? pages : [pages]
40
- pages.each do |page_meta|
41
- Metal.schedule( page_meta[:slug] )
42
- end # each page
43
- end
44
-
45
-
46
- def self.schedule_reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
47
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
48
-
49
- league = find_league( league )
50
-
51
- pages = league.pages( season: season )
52
-
53
- ## if single (simple) page setup - wrap in array
54
- pages = pages.is_a?(Array) ? pages : [pages]
55
- pages.each do |page_meta|
56
- Metal.schedule_reports( page_meta[:slug], cache: cache )
57
- end # each page
58
- end
59
-
60
-
61
- end # module Worldfootball
@@ -1,200 +0,0 @@
1
-
2
-
3
- require_relative 'leagues/europe'
4
- require_relative 'leagues/north_america'
5
- require_relative 'leagues/south_america'
6
- require_relative 'leagues/pacific'
7
- require_relative 'leagues/asia'
8
-
9
-
10
- module Worldfootball
11
-
12
- LEAGUES = [LEAGUES_EUROPE,
13
- LEAGUES_NORTH_AMERICA,
14
- LEAGUES_SOUTH_AMERICA,
15
- LEAGUES_PACIFIC,
16
- LEAGUES_ASIA].reduce({}) { |mem,h| mem.merge!( h ); mem }
17
-
18
-
19
- class League
20
- def initialize( key, data )
21
- @key = key
22
- ## @data = data
23
-
24
- @pages = data[:pages]
25
- @season_proc = data[:season] || ->(season) { nil }
26
- end
27
-
28
- def key() @key; end
29
-
30
- def pages( season: )
31
- ## note: return for no stages / simple case - just a string
32
- ## and for the stages case ALWAYS an array (even if it has only one page (with stage))
33
-
34
- if @pages.is_a?( String )
35
- # assume always "simple/regular" format w/o stages
36
- slug = @pages
37
- { slug: fill_slug( slug, season: season ) }
38
- else
39
- ## check for league format / stages
40
- ## return array (of strings) or nil (for no stages - "simple" format)
41
- indices = @season_proc.call( season )
42
- if indices.nil?
43
- puts "!! ERROR - no configuration found for season >#{season}< for league >#{@key}< found; sorry"
44
- exit 1
45
- elsif indices.is_a?( Integer ) ## single number - single/regular format w/o stage
46
- # note: starting with 0 (always use idx-1) !!!
47
- slug = if @pages.is_a?( Array )
48
- @pages[indices-1]
49
- else ## assume hash (and key is page slug)
50
- @pages.keys[indices-1]
51
- end
52
- { slug: fill_slug( slug, season: season ) }
53
- else ## assume regular case - array of integers
54
- recs = []
55
- indices.each do |idx|
56
- slug = key = @pages.keys[idx-1]
57
- recs << { slug: fill_slug( slug, season: season ),
58
- stage: @pages[key] } ## note: include mapping for page to stage name!!
59
- end
60
- recs
61
- end
62
- end
63
- end # pages
64
-
65
-
66
- ######
67
- # helper method
68
- def fill_slug( slug, season: )
69
- ## note: fill-in/check for place holders too
70
- slug = if slug.index( '{season}' )
71
- slug.sub( '{season}', season.to_path( :long ) ) ## e.g. 2010-2011
72
- elsif slug.index( '{end_year}' )
73
- slug.sub( '{end_year}', season.end_year.to_s ) ## e.g. 2011
74
- else
75
- ## assume convenience fallback - append regular season
76
- "#{slug}-#{season.to_path( :long )}"
77
- end
78
-
79
- puts " slug=>#{slug}<"
80
-
81
- slug
82
- end
83
- end # class League
84
-
85
-
86
-
87
- def self.find_league( key ) ## league info lookup
88
- data = LEAGUES[ key ]
89
- if data.nil?
90
- puts "!! ERROR - no league found for >#{key}<; add to leagues tables"
91
- exit 1
92
- end
93
- League.new( key, data ) ## use a convenience wrapper for now
94
- end
95
-
96
-
97
-
98
- ### "reverse" lookup by page - returns league AND season
99
- ## note: "blind" season template para - might be season or start_year etc.
100
- ## e.g. {season} or {start_year} becomes {}
101
-
102
- PAGE_VAR_RE = /{
103
- [^}]+
104
- }/x
105
-
106
-
107
- def self.norm_slug( slug )
108
- ## assume convenience fallback - append regular season
109
- slug.index( '{' ) ? slug : "#{slug}-{season}"
110
- end
111
-
112
- PAGES ||=
113
- LEAGUES.reduce( {} ) do |pages, (key, data)|
114
- if data[:pages].is_a?( String )
115
- slug = data[:pages]
116
- slug = Worldfootball.norm_slug( slug )
117
- pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
118
- elsif data[:pages].is_a?( Array )
119
- data[:pages].each do |slug|
120
- slug = Worldfootball.norm_slug( slug )
121
- pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
122
- end
123
- ## elsif data[:pages].nil?
124
- ## todo/fix: missing pages!!!
125
- else ## assume hash
126
- ## add stage to pages too - why? why not?
127
- data[:pages].each do |slug, stage|
128
- slug = Worldfootball.norm_slug( slug )
129
- pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug, stage: stage }
130
- end
131
- end
132
- pages
133
- end
134
-
135
- # e.g. 2000 or 2000-2001
136
- SEASON_RE = /[0-9]{4}
137
- (?:
138
- -[0-9]{4}
139
- )?
140
- /x
141
-
142
-
143
- def self.find_page!( slug )
144
- page = find_page( slug )
145
- if page.nil?
146
- puts "!! ERROR: no mapping for page >#{slug}< found; sorry"
147
-
148
- season_str = nil
149
- norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
150
- season_str = match ## keep reference to season str
151
- '{}' ## replace with {}
152
- end
153
-
154
- puts " season: >#{season_str}<"
155
- puts " slug (norm): >#{norm}<"
156
- puts
157
- ## pp PAGES
158
- exit 1
159
- end
160
- page
161
- end
162
-
163
-
164
-
165
- def self.find_page( slug )
166
- ## return league key and season
167
- season_str = nil
168
- norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
169
- season_str = match ## keep reference to season str
170
- '{}' ## replace with {}
171
- end
172
-
173
- if season_str.nil?
174
- puts "!! ERROR: no season found in page slug >#{slug}<; sorry"
175
- exit 1
176
- end
177
-
178
- rec = PAGES[ norm ]
179
- return nil if rec.nil?
180
-
181
-
182
- league_key = rec[:league]
183
- slug_tmpl = rec[:slug]
184
- season = if slug_tmpl.index( '{start_year}' )
185
- ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
186
- Season( "#{season_str.to_i}-#{season_str.to_i+1}" )
187
- elsif slug_tmpl.index( '{end_year}' )
188
- ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
189
- Season( "#{season_str.to_i-1}-#{season_str.to_i}" )
190
- else ## assume "regular" seasson - pass through as is
191
- Season( season_str )
192
- end
193
-
194
- ## return hash table / record
195
- { league: league_key,
196
- season: season.key }
197
- end
198
-
199
-
200
- end # module Worldfootball