football-sources 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +4 -0
  3. data/Manifest.txt +36 -0
  4. data/README.md +28 -0
  5. data/Rakefile +31 -0
  6. data/lib/football-sources.rb +46 -0
  7. data/lib/football-sources/apis.rb +86 -0
  8. data/lib/football-sources/apis/config.rb +17 -0
  9. data/lib/football-sources/apis/convert.rb +239 -0
  10. data/lib/football-sources/apis/convert_cl.rb +267 -0
  11. data/lib/football-sources/apis/download.rb +11 -0
  12. data/lib/football-sources/apis/stat.rb +59 -0
  13. data/lib/football-sources/version.rb +19 -0
  14. data/lib/football-sources/worldfootball.rb +24 -0
  15. data/lib/football-sources/worldfootball/build.rb +245 -0
  16. data/lib/football-sources/worldfootball/config.rb +16 -0
  17. data/lib/football-sources/worldfootball/convert.rb +100 -0
  18. data/lib/football-sources/worldfootball/convert_reports.rb +107 -0
  19. data/lib/football-sources/worldfootball/download.rb +61 -0
  20. data/lib/football-sources/worldfootball/leagues.rb +200 -0
  21. data/lib/football-sources/worldfootball/leagues/asia.rb +53 -0
  22. data/lib/football-sources/worldfootball/leagues/europe--british_isles.rb +59 -0
  23. data/lib/football-sources/worldfootball/leagues/europe--central.rb +127 -0
  24. data/lib/football-sources/worldfootball/leagues/europe--eastern.rb +82 -0
  25. data/lib/football-sources/worldfootball/leagues/europe--northern.rb +57 -0
  26. data/lib/football-sources/worldfootball/leagues/europe--southern.rb +86 -0
  27. data/lib/football-sources/worldfootball/leagues/europe--western.rb +38 -0
  28. data/lib/football-sources/worldfootball/leagues/europe.rb +13 -0
  29. data/lib/football-sources/worldfootball/leagues/north_america.rb +44 -0
  30. data/lib/football-sources/worldfootball/leagues/pacific.rb +21 -0
  31. data/lib/football-sources/worldfootball/leagues/south_america.rb +11 -0
  32. data/lib/football-sources/worldfootball/mods.rb +72 -0
  33. data/lib/football-sources/worldfootball/tool.rb +100 -0
  34. data/lib/football-sources/worldfootball/vacuum.rb +66 -0
  35. data/lib/football/sources.rb +6 -0
  36. data/test/helper.rb +8 -0
  37. data/test/test_version.rb +16 -0
  38. metadata +147 -0
@@ -0,0 +1,16 @@
1
+ module Worldfootball
2
+
3
+ ### add some more config options / settings
4
+ class Configuration
5
+ #########
6
+ ## nested configuration classes - use - why? why not?
7
+ class Convert
8
+ def out_dir() @out_dir || './o'; end
9
+ def out_dir=(value) @out_dir = value; end
10
+ end
11
+
12
+ def convert() @convert ||= Convert.new; end
13
+ end # class Configuration
14
+
15
+
16
+ end # module Worldfootball
@@ -0,0 +1,100 @@
1
+
2
+ module Worldfootball
3
+
4
+
5
+
6
+ def self.convert( league:, season:, offset: nil ) ## check: rename (optional) offset to time_offset or such?
7
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
8
+
9
+ league = find_league( league )
10
+
11
+ pages = league.pages( season: season )
12
+
13
+ # note: assume stages if pages is an array (of hash table/records)
14
+ # (and NOT a single hash table/record)
15
+ if pages.is_a?(Array)
16
+ recs = []
17
+ pages.each do |page_meta|
18
+ slug = page_meta[:slug]
19
+ stage_name = page_meta[:stage]
20
+ ## todo/fix: report error/check if stage.name is nil!!!
21
+
22
+ print " parsing #{slug}..."
23
+
24
+ # unless File.exist?( path )
25
+ # puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
26
+ # next
27
+ # end
28
+
29
+ page = Page::Schedule.from_cache( slug )
30
+ print " title=>#{page.title}<..."
31
+ print "\n"
32
+
33
+ rows = page.matches
34
+ stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
35
+
36
+ pp stage_recs[0] ## check first record
37
+ recs += stage_recs
38
+ end
39
+ else
40
+ page_meta = pages
41
+ slug = page_meta[:slug]
42
+
43
+ print " parsing #{slug}..."
44
+
45
+ page = Page::Schedule.from_cache( slug )
46
+ print " title=>#{page.title}<..."
47
+ print "\n"
48
+
49
+ rows = page.matches
50
+ recs = build( rows, season: season, league: league.key )
51
+
52
+ pp recs[0] ## check first record
53
+ end
54
+
55
+ recs = recs.map { |rec| fix_date( rec, offset ) } if offset
56
+
57
+ ## note: sort matches by date before saving/writing!!!!
58
+ ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
59
+ ## note: assume date is third column!!! (stage/round/date/...)
60
+ recs = recs.sort { |l,r| l[2] <=> r[2] }
61
+ ## reformat date / beautify e.g. Sat Aug 7 1993
62
+ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
63
+
64
+ ## remove unused columns (e.g. stage, et, p, etc.)
65
+ recs, headers = vacuum( recs )
66
+
67
+ puts headers
68
+ pp recs[0] ## check first record
69
+
70
+ out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
71
+
72
+ puts "write #{out_path}..."
73
+ Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
74
+ end
75
+
76
+
77
+
78
+ ## helper to fix dates to use local timezone (and not utc/london time)
79
+ def self.fix_date( row, offset )
80
+ return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
81
+
82
+ col = row[2]
83
+ if col =~ /^\d{4}-\d{2}-\d{2}$/
84
+ date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
85
+ else
86
+ puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
87
+ ## todo/fix: add to errors/warns list - why? why not?
88
+ exit 1
89
+ end
90
+
91
+ date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
92
+ ## NOTE - MUST be -7/24.0!!!! or such to work
93
+ date = date + (offset/24.0)
94
+
95
+ row[2] = date.strftime( date_fmt ) ## overwrite "old"
96
+ row[3] = date.strftime( '%H:%M' )
97
+ row ## return row for possible pipelining - why? why not?
98
+ end
99
+
100
+ end # module Worldfootball
@@ -0,0 +1,107 @@
1
+ module Worldfootball
2
+
3
+
4
+ def self.convert_reports( league:, season: )
5
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
6
+
7
+ league = find_league( league )
8
+
9
+ ## note: use only first part from key for lookup
10
+ ## e.g. at.1 => at
11
+ ## eng.1 => eng
12
+ ## and so on
13
+ mods = MODS[ league.key.split('.')[0] ] || {}
14
+
15
+
16
+
17
+ pages = league.pages( season: season )
18
+
19
+ recs = []
20
+
21
+ ## if single (simple) page setup - wrap in array
22
+ pages = pages.is_a?(Array) ? pages : [pages]
23
+ pages.each do |page_meta| # note: use page_info for now (or page_rec or page_meta or such)
24
+
25
+ page = Page::Schedule.from_cache( page_meta[:slug] )
26
+ print " page title=>#{page.title}<..."
27
+ print "\n"
28
+
29
+ matches = page.matches
30
+
31
+ puts "matches - #{matches.size} rows:"
32
+ pp matches[0]
33
+
34
+ puts "#{page.generated_in_days_ago} - #{page.generated}"
35
+
36
+
37
+ matches.each_with_index do |match,i|
38
+
39
+ report_ref = match[:report_ref]
40
+ if report_ref.nil?
41
+ puts "!! WARN: no match report ref found for match:"
42
+ pp match
43
+ next
44
+ end
45
+
46
+ puts "reading #{i+1}/#{matches.size} - #{report_ref}..."
47
+ report = Page::Report.from_cache( report_ref )
48
+
49
+ puts
50
+ puts report.title
51
+ puts report.generated
52
+
53
+ rows = report.goals
54
+ puts "goals - #{rows.size} records"
55
+ ## pp rows
56
+
57
+
58
+ if rows.size > 0
59
+ ## add goals
60
+ date = Date.strptime( match[:date], '%Y-%m-%d')
61
+
62
+ team1 = match[:team1]
63
+ team2 = match[:team2]
64
+
65
+ ## clean team name (e.g. remove (old))
66
+ ## and asciify (e.g. ’ to ' )
67
+ team1 = norm_team( team1 )
68
+ team2 = norm_team( team2 )
69
+
70
+ team1 = mods[ team1 ] if mods[ team1 ]
71
+ team2 = mods[ team2 ] if mods[ team2 ]
72
+
73
+ match_id = "#{team1} - #{team2} | #{date.strftime('%b %-d %Y')}"
74
+
75
+
76
+ rows.each do |row|
77
+ extra = if row[:owngoal]
78
+ '(og)' ## or use OG or O.G.- why? why not?
79
+ elsif row[:penalty]
80
+ '(pen)' ## or use P or PEN - why? why not?
81
+ else
82
+ ''
83
+ end
84
+
85
+ rec = [match_id,
86
+ row[:score],
87
+ "#{row[:minute]}'",
88
+ extra,
89
+ row[:player],
90
+ row[:notes]]
91
+ recs << rec
92
+ end
93
+ end
94
+ end # each match
95
+ end # each page
96
+
97
+ ## pp recs
98
+
99
+ out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}~goals.csv"
100
+
101
+ headers = ['Match', 'Score', 'Minute', 'Extra', 'Player', 'Notes']
102
+
103
+ puts "write #{out_path}..."
104
+ Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
105
+ end
106
+ end # module Worldfootballl
107
+
@@ -0,0 +1,61 @@
1
+
2
+
3
+ module Worldfootball
4
+
5
+ ##
6
+ ## note/fix!!!!
7
+ ## do NOT allow redirects for now - report error!!!
8
+ ## does NOT return 404 page not found errors; always redirects (301) to home page
9
+ ## on missing pages:
10
+ ## 301 Moved Permanently location=https://www.weltfussball.de/
11
+ ## 301 Moved Permanently location=https://www.weltfussball.de/
12
+
13
+
14
+
15
+
16
+ # url = "https://www.weltfussball.de/alle_spiele/eng-league-one-#{season}/"
17
+ # url = "https://www.weltfussball.de/alle_spiele/eng-league-two-#{season}/"
18
+ # https://www.weltfussball.de/alle_spiele/eng-national-league-2019-2020/
19
+ # https://www.weltfussball.de/alle_spiele/eng-fa-cup-2018-2019/
20
+ # https://www.weltfussball.de/alle_spiele/eng-league-cup-2019-2020/
21
+
22
+ # https://www.weltfussball.de/alle_spiele/fra-ligue-2-2019-2020/
23
+ # https://www.weltfussball.de/alle_spiele/ita-serie-b-2019-2020/
24
+ # https://www.weltfussball.de/alle_spiele/rus-premier-liga-2019-2020/
25
+ # https://www.weltfussball.de/alle_spiele/rus-1-division-2019-2020/
26
+ # https://www.weltfussball.de/alle_spiele/tur-sueperlig-2019-2020/
27
+ # https://www.weltfussball.de/alle_spiele/tur-1-lig-2019-2020/
28
+
29
+
30
+
31
+ def self.schedule( league:, season: )
32
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
33
+
34
+ league = find_league( league )
35
+
36
+ pages = league.pages( season: season )
37
+
38
+ ## if single (simple) page setup - wrap in array
39
+ pages = pages.is_a?(Array) ? pages : [pages]
40
+ pages.each do |page_meta|
41
+ Metal.schedule( page_meta[:slug] )
42
+ end # each page
43
+ end
44
+
45
+
46
+ def self.schedule_reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
47
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
48
+
49
+ league = find_league( league )
50
+
51
+ pages = league.pages( season: season )
52
+
53
+ ## if single (simple) page setup - wrap in array
54
+ pages = pages.is_a?(Array) ? pages : [pages]
55
+ pages.each do |page_meta|
56
+ Metal.schedule_reports( page_meta[:slug], cache: cache )
57
+ end # each page
58
+ end
59
+
60
+
61
+ end # module Worldfootball
@@ -0,0 +1,200 @@
1
+
2
+
3
+ require_relative 'leagues/europe'
4
+ require_relative 'leagues/north_america'
5
+ require_relative 'leagues/south_america'
6
+ require_relative 'leagues/pacific'
7
+ require_relative 'leagues/asia'
8
+
9
+
10
+ module Worldfootball
11
+
12
+ LEAGUES = [LEAGUES_EUROPE,
13
+ LEAGUES_NORTH_AMERICA,
14
+ LEAGUES_SOUTH_AMERICA,
15
+ LEAGUES_PACIFIC,
16
+ LEAGUES_ASIA].reduce({}) { |mem,h| mem.merge!( h ); mem }
17
+
18
+
19
+ class League
20
+ def initialize( key, data )
21
+ @key = key
22
+ ## @data = data
23
+
24
+ @pages = data[:pages]
25
+ @season_proc = data[:season] || ->(season) { nil }
26
+ end
27
+
28
+ def key() @key; end
29
+
30
+ def pages( season: )
31
+ ## note: return for no stages / simple case - just a string
32
+ ## and for the stages case ALWAYS an array (even if it has only one page (with stage))
33
+
34
+ if @pages.is_a?( String )
35
+ # assume always "simple/regular" format w/o stages
36
+ slug = @pages
37
+ { slug: fill_slug( slug, season: season ) }
38
+ else
39
+ ## check for league format / stages
40
+ ## return array (of strings) or nil (for no stages - "simple" format)
41
+ indices = @season_proc.call( season )
42
+ if indices.nil?
43
+ puts "!! ERROR - no configuration found for season >#{season}< for league >#{@key}< found; sorry"
44
+ exit 1
45
+ elsif indices.is_a?( Integer ) ## single number - single/regular format w/o stage
46
+ # note: starting with 0 (always use idx-1) !!!
47
+ slug = if @pages.is_a?( Array )
48
+ @pages[indices-1]
49
+ else ## assume hash (and key is page slug)
50
+ @pages.keys[indices-1]
51
+ end
52
+ { slug: fill_slug( slug, season: season ) }
53
+ else ## assume regular case - array of integers
54
+ recs = []
55
+ indices.each do |idx|
56
+ slug = key = @pages.keys[idx-1]
57
+ recs << { slug: fill_slug( slug, season: season ),
58
+ stage: @pages[key] } ## note: include mapping for page to stage name!!
59
+ end
60
+ recs
61
+ end
62
+ end
63
+ end # pages
64
+
65
+
66
+ ######
67
+ # helper method
68
+ def fill_slug( slug, season: )
69
+ ## note: fill-in/check for place holders too
70
+ slug = if slug.index( '{season}' )
71
+ slug.sub( '{season}', season.to_path( :long ) ) ## e.g. 2010-2011
72
+ elsif slug.index( '{end_year}' )
73
+ slug.sub( '{end_year}', season.end_year.to_s ) ## e.g. 2011
74
+ else
75
+ ## assume convenience fallback - append regular season
76
+ "#{slug}-#{season.to_path( :long )}"
77
+ end
78
+
79
+ puts " slug=>#{slug}<"
80
+
81
+ slug
82
+ end
83
+ end # class League
84
+
85
+
86
+
87
+ def self.find_league( key ) ## league info lookup
88
+ data = LEAGUES[ key ]
89
+ if data.nil?
90
+ puts "!! ERROR - no league found for >#{key}<; add to leagues tables"
91
+ exit 1
92
+ end
93
+ League.new( key, data ) ## use a convenience wrapper for now
94
+ end
95
+
96
+
97
+
98
+ ### "reverse" lookup by page - returns league AND season
99
+ ## note: "blind" season template para - might be season or start_year etc.
100
+ ## e.g. {season} or {start_year} becomes {}
101
+
102
+ PAGE_VAR_RE = /{
103
+ [^}]+
104
+ }/x
105
+
106
+
107
+ def self.norm_slug( slug )
108
+ ## assume convenience fallback - append regular season
109
+ slug.index( '{' ) ? slug : "#{slug}-{season}"
110
+ end
111
+
112
+ PAGES ||=
113
+ LEAGUES.reduce( {} ) do |pages, (key, data)|
114
+ if data[:pages].is_a?( String )
115
+ slug = data[:pages]
116
+ slug = Worldfootball.norm_slug( slug )
117
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
118
+ elsif data[:pages].is_a?( Array )
119
+ data[:pages].each do |slug|
120
+ slug = Worldfootball.norm_slug( slug )
121
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
122
+ end
123
+ ## elsif data[:pages].nil?
124
+ ## todo/fix: missing pages!!!
125
+ else ## assume hash
126
+ ## add stage to pages too - why? why not?
127
+ data[:pages].each do |slug, stage|
128
+ slug = Worldfootball.norm_slug( slug )
129
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug, stage: stage }
130
+ end
131
+ end
132
+ pages
133
+ end
134
+
135
+ # e.g. 2000 or 2000-2001
136
+ SEASON_RE = /[0-9]{4}
137
+ (?:
138
+ -[0-9]{4}
139
+ )?
140
+ /x
141
+
142
+
143
+ def self.find_page!( slug )
144
+ page = find_page( slug )
145
+ if page.nil?
146
+ puts "!! ERROR: no mapping for page >#{slug}< found; sorry"
147
+
148
+ season_str = nil
149
+ norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
150
+ season_str = match ## keep reference to season str
151
+ '{}' ## replace with {}
152
+ end
153
+
154
+ puts " season: >#{season_str}<"
155
+ puts " slug (norm): >#{norm}<"
156
+ puts
157
+ ## pp PAGES
158
+ exit 1
159
+ end
160
+ page
161
+ end
162
+
163
+
164
+
165
+ def self.find_page( slug )
166
+ ## return league key and season
167
+ season_str = nil
168
+ norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
169
+ season_str = match ## keep reference to season str
170
+ '{}' ## replace with {}
171
+ end
172
+
173
+ if season_str.nil?
174
+ puts "!! ERROR: no season found in page slug >#{slug}<; sorry"
175
+ exit 1
176
+ end
177
+
178
+ rec = PAGES[ norm ]
179
+ return nil if rec.nil?
180
+
181
+
182
+ league_key = rec[:league]
183
+ slug_tmpl = rec[:slug]
184
+ season = if slug_tmpl.index( '{start_year}' )
185
+ ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
186
+ Season( "#{season_str.to_i}-#{season_str.to_i+1}" )
187
+ elsif slug_tmpl.index( '{end_year}' )
188
+ ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
189
+ Season( "#{season_str.to_i-1}-#{season_str.to_i}" )
190
+ else ## assume "regular" seasson - pass through as is
191
+ Season( season_str )
192
+ end
193
+
194
+ ## return hash table / record
195
+ { league: league_key,
196
+ season: season.key }
197
+ end
198
+
199
+
200
+ end # module Worldfootball