football-sources 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +4 -0
  3. data/Manifest.txt +36 -0
  4. data/README.md +28 -0
  5. data/Rakefile +31 -0
  6. data/lib/football-sources.rb +46 -0
  7. data/lib/football-sources/apis.rb +86 -0
  8. data/lib/football-sources/apis/config.rb +17 -0
  9. data/lib/football-sources/apis/convert.rb +239 -0
  10. data/lib/football-sources/apis/convert_cl.rb +267 -0
  11. data/lib/football-sources/apis/download.rb +11 -0
  12. data/lib/football-sources/apis/stat.rb +59 -0
  13. data/lib/football-sources/version.rb +19 -0
  14. data/lib/football-sources/worldfootball.rb +24 -0
  15. data/lib/football-sources/worldfootball/build.rb +245 -0
  16. data/lib/football-sources/worldfootball/config.rb +16 -0
  17. data/lib/football-sources/worldfootball/convert.rb +100 -0
  18. data/lib/football-sources/worldfootball/convert_reports.rb +107 -0
  19. data/lib/football-sources/worldfootball/download.rb +61 -0
  20. data/lib/football-sources/worldfootball/leagues.rb +200 -0
  21. data/lib/football-sources/worldfootball/leagues/asia.rb +53 -0
  22. data/lib/football-sources/worldfootball/leagues/europe--british_isles.rb +59 -0
  23. data/lib/football-sources/worldfootball/leagues/europe--central.rb +127 -0
  24. data/lib/football-sources/worldfootball/leagues/europe--eastern.rb +82 -0
  25. data/lib/football-sources/worldfootball/leagues/europe--northern.rb +57 -0
  26. data/lib/football-sources/worldfootball/leagues/europe--southern.rb +86 -0
  27. data/lib/football-sources/worldfootball/leagues/europe--western.rb +38 -0
  28. data/lib/football-sources/worldfootball/leagues/europe.rb +13 -0
  29. data/lib/football-sources/worldfootball/leagues/north_america.rb +44 -0
  30. data/lib/football-sources/worldfootball/leagues/pacific.rb +21 -0
  31. data/lib/football-sources/worldfootball/leagues/south_america.rb +11 -0
  32. data/lib/football-sources/worldfootball/mods.rb +72 -0
  33. data/lib/football-sources/worldfootball/tool.rb +100 -0
  34. data/lib/football-sources/worldfootball/vacuum.rb +66 -0
  35. data/lib/football/sources.rb +6 -0
  36. data/test/helper.rb +8 -0
  37. data/test/test_version.rb +16 -0
  38. metadata +147 -0
@@ -0,0 +1,16 @@
1
+ module Worldfootball
2
+
3
+ ### add some more config options / settings
4
+ class Configuration
5
+ #########
6
+ ## nested configuration classes - use - why? why not?
7
+ class Convert
8
+ def out_dir() @out_dir || './o'; end
9
+ def out_dir=(value) @out_dir = value; end
10
+ end
11
+
12
+ def convert() @convert ||= Convert.new; end
13
+ end # class Configuration
14
+
15
+
16
+ end # module Worldfootball
@@ -0,0 +1,100 @@
1
+
2
+ module Worldfootball
3
+
4
+
5
+
6
+ def self.convert( league:, season:, offset: nil ) ## check: rename (optional) offset to time_offset or such?
7
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
8
+
9
+ league = find_league( league )
10
+
11
+ pages = league.pages( season: season )
12
+
13
+ # note: assume stages if pages is an array (of hash table/records)
14
+ # (and NOT a single hash table/record)
15
+ if pages.is_a?(Array)
16
+ recs = []
17
+ pages.each do |page_meta|
18
+ slug = page_meta[:slug]
19
+ stage_name = page_meta[:stage]
20
+ ## todo/fix: report error/check if stage.name is nil!!!
21
+
22
+ print " parsing #{slug}..."
23
+
24
+ # unless File.exist?( path )
25
+ # puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
26
+ # next
27
+ # end
28
+
29
+ page = Page::Schedule.from_cache( slug )
30
+ print " title=>#{page.title}<..."
31
+ print "\n"
32
+
33
+ rows = page.matches
34
+ stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
35
+
36
+ pp stage_recs[0] ## check first record
37
+ recs += stage_recs
38
+ end
39
+ else
40
+ page_meta = pages
41
+ slug = page_meta[:slug]
42
+
43
+ print " parsing #{slug}..."
44
+
45
+ page = Page::Schedule.from_cache( slug )
46
+ print " title=>#{page.title}<..."
47
+ print "\n"
48
+
49
+ rows = page.matches
50
+ recs = build( rows, season: season, league: league.key )
51
+
52
+ pp recs[0] ## check first record
53
+ end
54
+
55
+ recs = recs.map { |rec| fix_date( rec, offset ) } if offset
56
+
57
+ ## note: sort matches by date before saving/writing!!!!
58
+ ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
59
+ ## note: assume date is third column!!! (stage/round/date/...)
60
+ recs = recs.sort { |l,r| l[2] <=> r[2] }
61
+ ## reformat date / beautify e.g. Sat Aug 7 1993
62
+ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
63
+
64
+ ## remove unused columns (e.g. stage, et, p, etc.)
65
+ recs, headers = vacuum( recs )
66
+
67
+ puts headers
68
+ pp recs[0] ## check first record
69
+
70
+ out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
71
+
72
+ puts "write #{out_path}..."
73
+ Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
74
+ end
75
+
76
+
77
+
78
+ ## helper to fix dates to use local timezone (and not utc/london time)
79
+ def self.fix_date( row, offset )
80
+ return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
81
+
82
+ col = row[2]
83
+ if col =~ /^\d{4}-\d{2}-\d{2}$/
84
+ date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
85
+ else
86
+ puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
87
+ ## todo/fix: add to errors/warns list - why? why not?
88
+ exit 1
89
+ end
90
+
91
+ date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
92
+ ## NOTE - MUST be -7/24.0!!!! or such to work
93
+ date = date + (offset/24.0)
94
+
95
+ row[2] = date.strftime( date_fmt ) ## overwrite "old"
96
+ row[3] = date.strftime( '%H:%M' )
97
+ row ## return row for possible pipelining - why? why not?
98
+ end
99
+
100
+ end # module Worldfootball
@@ -0,0 +1,107 @@
1
+ module Worldfootball
2
+
3
+
4
+ def self.convert_reports( league:, season: )
5
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
6
+
7
+ league = find_league( league )
8
+
9
+ ## note: use only first part from key for lookup
10
+ ## e.g. at.1 => at
11
+ ## eng.1 => eng
12
+ ## and so on
13
+ mods = MODS[ league.key.split('.')[0] ] || {}
14
+
15
+
16
+
17
+ pages = league.pages( season: season )
18
+
19
+ recs = []
20
+
21
+ ## if single (simple) page setup - wrap in array
22
+ pages = pages.is_a?(Array) ? pages : [pages]
23
+ pages.each do |page_meta| # note: use page_info for now (or page_rec or page_meta or such)
24
+
25
+ page = Page::Schedule.from_cache( page_meta[:slug] )
26
+ print " page title=>#{page.title}<..."
27
+ print "\n"
28
+
29
+ matches = page.matches
30
+
31
+ puts "matches - #{matches.size} rows:"
32
+ pp matches[0]
33
+
34
+ puts "#{page.generated_in_days_ago} - #{page.generated}"
35
+
36
+
37
+ matches.each_with_index do |match,i|
38
+
39
+ report_ref = match[:report_ref]
40
+ if report_ref.nil?
41
+ puts "!! WARN: no match report ref found for match:"
42
+ pp match
43
+ next
44
+ end
45
+
46
+ puts "reading #{i+1}/#{matches.size} - #{report_ref}..."
47
+ report = Page::Report.from_cache( report_ref )
48
+
49
+ puts
50
+ puts report.title
51
+ puts report.generated
52
+
53
+ rows = report.goals
54
+ puts "goals - #{rows.size} records"
55
+ ## pp rows
56
+
57
+
58
+ if rows.size > 0
59
+ ## add goals
60
+ date = Date.strptime( match[:date], '%Y-%m-%d')
61
+
62
+ team1 = match[:team1]
63
+ team2 = match[:team2]
64
+
65
+ ## clean team name (e.g. remove (old))
66
+ ## and asciify (e.g. ’ to ' )
67
+ team1 = norm_team( team1 )
68
+ team2 = norm_team( team2 )
69
+
70
+ team1 = mods[ team1 ] if mods[ team1 ]
71
+ team2 = mods[ team2 ] if mods[ team2 ]
72
+
73
+ match_id = "#{team1} - #{team2} | #{date.strftime('%b %-d %Y')}"
74
+
75
+
76
+ rows.each do |row|
77
+ extra = if row[:owngoal]
78
+ '(og)' ## or use OG or O.G.- why? why not?
79
+ elsif row[:penalty]
80
+ '(pen)' ## or use P or PEN - why? why not?
81
+ else
82
+ ''
83
+ end
84
+
85
+ rec = [match_id,
86
+ row[:score],
87
+ "#{row[:minute]}'",
88
+ extra,
89
+ row[:player],
90
+ row[:notes]]
91
+ recs << rec
92
+ end
93
+ end
94
+ end # each match
95
+ end # each page
96
+
97
+ ## pp recs
98
+
99
+ out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}~goals.csv"
100
+
101
+ headers = ['Match', 'Score', 'Minute', 'Extra', 'Player', 'Notes']
102
+
103
+ puts "write #{out_path}..."
104
+ Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
105
+ end
106
+ end # module Worldfootballl
107
+
@@ -0,0 +1,61 @@
1
+
2
+
3
+ module Worldfootball
4
+
5
+ ##
6
+ ## note/fix!!!!
7
+ ## do NOT allow redirects for now - report error!!!
8
+ ## does NOT return 404 page not found errors; always redirects (301) to home page
9
+ ## on missing pages:
10
+ ## 301 Moved Permanently location=https://www.weltfussball.de/
11
+ ## 301 Moved Permanently location=https://www.weltfussball.de/
12
+
13
+
14
+
15
+
16
+ # url = "https://www.weltfussball.de/alle_spiele/eng-league-one-#{season}/"
17
+ # url = "https://www.weltfussball.de/alle_spiele/eng-league-two-#{season}/"
18
+ # https://www.weltfussball.de/alle_spiele/eng-national-league-2019-2020/
19
+ # https://www.weltfussball.de/alle_spiele/eng-fa-cup-2018-2019/
20
+ # https://www.weltfussball.de/alle_spiele/eng-league-cup-2019-2020/
21
+
22
+ # https://www.weltfussball.de/alle_spiele/fra-ligue-2-2019-2020/
23
+ # https://www.weltfussball.de/alle_spiele/ita-serie-b-2019-2020/
24
+ # https://www.weltfussball.de/alle_spiele/rus-premier-liga-2019-2020/
25
+ # https://www.weltfussball.de/alle_spiele/rus-1-division-2019-2020/
26
+ # https://www.weltfussball.de/alle_spiele/tur-sueperlig-2019-2020/
27
+ # https://www.weltfussball.de/alle_spiele/tur-1-lig-2019-2020/
28
+
29
+
30
+
31
+ def self.schedule( league:, season: )
32
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
33
+
34
+ league = find_league( league )
35
+
36
+ pages = league.pages( season: season )
37
+
38
+ ## if single (simple) page setup - wrap in array
39
+ pages = pages.is_a?(Array) ? pages : [pages]
40
+ pages.each do |page_meta|
41
+ Metal.schedule( page_meta[:slug] )
42
+ end # each page
43
+ end
44
+
45
+
46
+ def self.schedule_reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
47
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
48
+
49
+ league = find_league( league )
50
+
51
+ pages = league.pages( season: season )
52
+
53
+ ## if single (simple) page setup - wrap in array
54
+ pages = pages.is_a?(Array) ? pages : [pages]
55
+ pages.each do |page_meta|
56
+ Metal.schedule_reports( page_meta[:slug], cache: cache )
57
+ end # each page
58
+ end
59
+
60
+
61
+ end # module Worldfootball
@@ -0,0 +1,200 @@
1
+
2
+
3
+ require_relative 'leagues/europe'
4
+ require_relative 'leagues/north_america'
5
+ require_relative 'leagues/south_america'
6
+ require_relative 'leagues/pacific'
7
+ require_relative 'leagues/asia'
8
+
9
+
10
+ module Worldfootball
11
+
12
+ LEAGUES = [LEAGUES_EUROPE,
13
+ LEAGUES_NORTH_AMERICA,
14
+ LEAGUES_SOUTH_AMERICA,
15
+ LEAGUES_PACIFIC,
16
+ LEAGUES_ASIA].reduce({}) { |mem,h| mem.merge!( h ); mem }
17
+
18
+
19
+ class League
20
+ def initialize( key, data )
21
+ @key = key
22
+ ## @data = data
23
+
24
+ @pages = data[:pages]
25
+ @season_proc = data[:season] || ->(season) { nil }
26
+ end
27
+
28
+ def key() @key; end
29
+
30
+ def pages( season: )
31
+ ## note: return for no stages / simple case - just a string
32
+ ## and for the stages case ALWAYS an array (even if it has only one page (with stage))
33
+
34
+ if @pages.is_a?( String )
35
+ # assume always "simple/regular" format w/o stages
36
+ slug = @pages
37
+ { slug: fill_slug( slug, season: season ) }
38
+ else
39
+ ## check for league format / stages
40
+ ## return array (of strings) or nil (for no stages - "simple" format)
41
+ indices = @season_proc.call( season )
42
+ if indices.nil?
43
+ puts "!! ERROR - no configuration found for season >#{season}< for league >#{@key}< found; sorry"
44
+ exit 1
45
+ elsif indices.is_a?( Integer ) ## single number - single/regular format w/o stage
46
+ # note: starting with 0 (always use idx-1) !!!
47
+ slug = if @pages.is_a?( Array )
48
+ @pages[indices-1]
49
+ else ## assume hash (and key is page slug)
50
+ @pages.keys[indices-1]
51
+ end
52
+ { slug: fill_slug( slug, season: season ) }
53
+ else ## assume regular case - array of integers
54
+ recs = []
55
+ indices.each do |idx|
56
+ slug = key = @pages.keys[idx-1]
57
+ recs << { slug: fill_slug( slug, season: season ),
58
+ stage: @pages[key] } ## note: include mapping for page to stage name!!
59
+ end
60
+ recs
61
+ end
62
+ end
63
+ end # pages
64
+
65
+
66
+ ######
67
+ # helper method
68
+ def fill_slug( slug, season: )
69
+ ## note: fill-in/check for place holders too
70
+ slug = if slug.index( '{season}' )
71
+ slug.sub( '{season}', season.to_path( :long ) ) ## e.g. 2010-2011
72
+ elsif slug.index( '{end_year}' )
73
+ slug.sub( '{end_year}', season.end_year.to_s ) ## e.g. 2011
74
+ else
75
+ ## assume convenience fallback - append regular season
76
+ "#{slug}-#{season.to_path( :long )}"
77
+ end
78
+
79
+ puts " slug=>#{slug}<"
80
+
81
+ slug
82
+ end
83
+ end # class League
84
+
85
+
86
+
87
+ def self.find_league( key ) ## league info lookup
88
+ data = LEAGUES[ key ]
89
+ if data.nil?
90
+ puts "!! ERROR - no league found for >#{key}<; add to leagues tables"
91
+ exit 1
92
+ end
93
+ League.new( key, data ) ## use a convenience wrapper for now
94
+ end
95
+
96
+
97
+
98
+ ### "reverse" lookup by page - returns league AND season
99
+ ## note: "blind" season template para - might be season or start_year etc.
100
+ ## e.g. {season} or {start_year} becomes {}
101
+
102
+ PAGE_VAR_RE = /{
103
+ [^}]+
104
+ }/x
105
+
106
+
107
+ def self.norm_slug( slug )
108
+ ## assume convenience fallback - append regular season
109
+ slug.index( '{' ) ? slug : "#{slug}-{season}"
110
+ end
111
+
112
+ PAGES ||=
113
+ LEAGUES.reduce( {} ) do |pages, (key, data)|
114
+ if data[:pages].is_a?( String )
115
+ slug = data[:pages]
116
+ slug = Worldfootball.norm_slug( slug )
117
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
118
+ elsif data[:pages].is_a?( Array )
119
+ data[:pages].each do |slug|
120
+ slug = Worldfootball.norm_slug( slug )
121
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
122
+ end
123
+ ## elsif data[:pages].nil?
124
+ ## todo/fix: missing pages!!!
125
+ else ## assume hash
126
+ ## add stage to pages too - why? why not?
127
+ data[:pages].each do |slug, stage|
128
+ slug = Worldfootball.norm_slug( slug )
129
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug, stage: stage }
130
+ end
131
+ end
132
+ pages
133
+ end
134
+
135
+ # e.g. 2000 or 2000-2001
136
+ SEASON_RE = /[0-9]{4}
137
+ (?:
138
+ -[0-9]{4}
139
+ )?
140
+ /x
141
+
142
+
143
+ def self.find_page!( slug )
144
+ page = find_page( slug )
145
+ if page.nil?
146
+ puts "!! ERROR: no mapping for page >#{slug}< found; sorry"
147
+
148
+ season_str = nil
149
+ norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
150
+ season_str = match ## keep reference to season str
151
+ '{}' ## replace with {}
152
+ end
153
+
154
+ puts " season: >#{season_str}<"
155
+ puts " slug (norm): >#{norm}<"
156
+ puts
157
+ ## pp PAGES
158
+ exit 1
159
+ end
160
+ page
161
+ end
162
+
163
+
164
+
165
+ def self.find_page( slug )
166
+ ## return league key and season
167
+ season_str = nil
168
+ norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
169
+ season_str = match ## keep reference to season str
170
+ '{}' ## replace with {}
171
+ end
172
+
173
+ if season_str.nil?
174
+ puts "!! ERROR: no season found in page slug >#{slug}<; sorry"
175
+ exit 1
176
+ end
177
+
178
+ rec = PAGES[ norm ]
179
+ return nil if rec.nil?
180
+
181
+
182
+ league_key = rec[:league]
183
+ slug_tmpl = rec[:slug]
184
+ season = if slug_tmpl.index( '{start_year}' )
185
+ ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
186
+ Season( "#{season_str.to_i}-#{season_str.to_i+1}" )
187
+ elsif slug_tmpl.index( '{end_year}' )
188
+ ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
189
+ Season( "#{season_str.to_i-1}-#{season_str.to_i}" )
190
+ else ## assume "regular" seasson - pass through as is
191
+ Season( season_str )
192
+ end
193
+
194
+ ## return hash table / record
195
+ { league: league_key,
196
+ season: season.key }
197
+ end
198
+
199
+
200
+ end # module Worldfootball