webget-football 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +4 -0
- data/Manifest.txt +18 -0
- data/README.md +28 -0
- data/Rakefile +26 -0
- data/lib/webget-football.rb +16 -0
- data/lib/webget-football/apis.rb +10 -0
- data/lib/webget-football/apis/config.rb +17 -0
- data/lib/webget-football/apis/download.rb +72 -0
- data/lib/webget-football/version.rb +22 -0
- data/lib/webget-football/worldfootball.rb +12 -0
- data/lib/webget-football/worldfootball/config.rb +17 -0
- data/lib/webget-football/worldfootball/download.rb +89 -0
- data/lib/webget-football/worldfootball/page.rb +106 -0
- data/lib/webget-football/worldfootball/page_report.rb +186 -0
- data/lib/webget-football/worldfootball/page_schedule.rb +262 -0
- data/lib/webget/football.rb +4 -0
- data/test/helper.rb +6 -0
- data/test/test_version.rb +16 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c46b38f5ebe600370371d7032f6178fb4ae22dde
|
4
|
+
data.tar.gz: fd4ca8d6d218dc140f8c63f13f9028ace9a6eda1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 23e896f122b3f8068ea0acb82f85e60ebaff68d231d86923778dbc8de6cd345d2914f7401f2cd28fd10635ad276504f9d53655808b3d222559455d8f1adfdaee
|
7
|
+
data.tar.gz: 69516f9db9e2a495c378759e5c9195f596fb4be30147cba24fdcf04bf7f55e28325324d07a0f304200cb093d3ef61d239db4275d6d2a51863a1d2eea3e17fe17
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
CHANGELOG.md
|
2
|
+
Manifest.txt
|
3
|
+
README.md
|
4
|
+
Rakefile
|
5
|
+
lib/webget-football.rb
|
6
|
+
lib/webget-football/apis.rb
|
7
|
+
lib/webget-football/apis/config.rb
|
8
|
+
lib/webget-football/apis/download.rb
|
9
|
+
lib/webget-football/version.rb
|
10
|
+
lib/webget-football/worldfootball.rb
|
11
|
+
lib/webget-football/worldfootball/config.rb
|
12
|
+
lib/webget-football/worldfootball/download.rb
|
13
|
+
lib/webget-football/worldfootball/page.rb
|
14
|
+
lib/webget-football/worldfootball/page_report.rb
|
15
|
+
lib/webget-football/worldfootball/page_schedule.rb
|
16
|
+
lib/webget/football.rb
|
17
|
+
test/helper.rb
|
18
|
+
test/test_version.rb
|
data/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# webget-football - get football data via web pages or web api (json) calls
|
2
|
+
|
3
|
+
|
4
|
+
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
5
|
+
* bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
|
6
|
+
* gem :: [rubygems.org/gems/webget-football](https://rubygems.org/gems/webget-football)
|
7
|
+
* rdoc :: [rubydoc.info/gems/webget-football](http://rubydoc.info/gems/webget-football)
|
8
|
+
* forum :: [groups.google.com/group/opensport](https://groups.google.com/group/opensport)
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
TBD
|
14
|
+
|
15
|
+
|
16
|
+
## License
|
17
|
+
|
18
|
+
![](https://publicdomainworks.github.io/buttons/zero88x31.png)
|
19
|
+
|
20
|
+
The `webget-football` scripts are dedicated to the public domain.
|
21
|
+
Use it as you please with no restrictions whatsoever.
|
22
|
+
|
23
|
+
|
24
|
+
## Questions? Comments?
|
25
|
+
|
26
|
+
Send them along to the
|
27
|
+
[Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
|
28
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/webget-football/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'webget-football' do
|
5
|
+
|
6
|
+
self.version = Webget::Module::Football::VERSION
|
7
|
+
|
8
|
+
self.summary = 'webget-football - get football data via web pages or web api (json) calls'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'opensport@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
|
+
|
20
|
+
self.licenses = ['Public Domain']
|
21
|
+
|
22
|
+
self.spec_extras = {
|
23
|
+
required_ruby_version: '>= 2.2.2'
|
24
|
+
}
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
## 3rd party (our own)
|
2
|
+
require 'webget' ## incl. webget, webcache, webclient, etc.
|
3
|
+
|
4
|
+
## 3rd party
|
5
|
+
require 'nokogiri'
|
6
|
+
|
7
|
+
|
8
|
+
###
|
9
|
+
# our own code
|
10
|
+
require 'webget-football/version' # let version always go first
|
11
|
+
|
12
|
+
require 'webget-football/apis'
|
13
|
+
require 'webget-football/worldfootball'
|
14
|
+
|
15
|
+
|
16
|
+
puts Webget::Module::Football.banner # say hello
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Footballdata
|
2
|
+
|
3
|
+
class Configuration
|
4
|
+
## note: nothing here for now
|
5
|
+
end # class Configuration
|
6
|
+
|
7
|
+
|
8
|
+
## lets you use
|
9
|
+
## Footballdata.configure do |config|
|
10
|
+
## config.convert.out_dir = './o'
|
11
|
+
## end
|
12
|
+
|
13
|
+
def self.configure() yield( config ); end
|
14
|
+
|
15
|
+
def self.config() @config ||= Configuration.new; end
|
16
|
+
|
17
|
+
end # module Footballdata
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module Footballdata
|
2
|
+
|
3
|
+
## todo/check: put in Downloader namespace/class - why? why not?
|
4
|
+
## or use Metal - no "porcelain" downloaders / machinery
|
5
|
+
class Metal
|
6
|
+
BASE_URL = 'http://api.football-data.org/v2'
|
7
|
+
|
8
|
+
|
9
|
+
def self.competitions_url( plan ) "#{BASE_URL}/competitions?plan=#{plan}"; end
|
10
|
+
|
11
|
+
## just use matches_url - why? why not?
|
12
|
+
def self.competition_matches_url( code, year ) "#{BASE_URL}/competitions/#{code}/matches?season=#{year}"; end
|
13
|
+
def self.competition_teams_url( code, year ) "#{BASE_URL}/competitions/#{code}/teams?season=#{year}"; end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def self.competitions_tier_one
|
18
|
+
get( competitions_url( 'TIER_ONE' ))
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.competitions_tier_two
|
22
|
+
get( competions_url( 'TIER_TWO' ))
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.competitions_tier_three
|
26
|
+
get( competions_url( 'TIER_THREE' ))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.competition( code, year )
|
30
|
+
get( competition_matches_url( code, year ))
|
31
|
+
get( competition_teams_url( code, year ))
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
=begin
|
36
|
+
def self.matches
|
37
|
+
# note: Specified period must not exceed 10 days.
|
38
|
+
|
39
|
+
## try query (football) week by week - tuesday to monday!!
|
40
|
+
## note: TIER_ONE does NOT include goals!!!
|
41
|
+
code = 'FL1'
|
42
|
+
start_date = '2019-08-09'
|
43
|
+
end_date = '2019-08-16'
|
44
|
+
|
45
|
+
get( "matches?competitions=#{code}&dateFrom=#{start_date}&dateTo=#{end_date}" )
|
46
|
+
end
|
47
|
+
=end
|
48
|
+
|
49
|
+
|
50
|
+
def self.get( url )
|
51
|
+
token = ENV['FOOTBALLDATA']
|
52
|
+
## note: because of public workflow log - do NOT output token
|
53
|
+
## puts token
|
54
|
+
|
55
|
+
headers = {}
|
56
|
+
headers['X-Auth-Token'] = token if token
|
57
|
+
headers['User-Agent'] = 'ruby'
|
58
|
+
headers['Accept'] = '*/*'
|
59
|
+
|
60
|
+
## note: add format: 'json' for pretty printing json (before) save in cache
|
61
|
+
response = Webget.call( url, headers: headers )
|
62
|
+
|
63
|
+
## for debugging print pretty printed json first 400 chars
|
64
|
+
puts response.json.pretty_inspect[0..400]
|
65
|
+
|
66
|
+
exit 1 if response.status.nok? # e.g. HTTP status code != 200
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end ## class Metal
|
71
|
+
end # module Footballdata
|
72
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Webget ## note: Webget is for now a class (NOT a module)
|
2
|
+
module Module
|
3
|
+
module Football
|
4
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
5
|
+
MINOR = 0
|
6
|
+
PATCH = 1
|
7
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
8
|
+
|
9
|
+
def self.version
|
10
|
+
VERSION
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.banner
|
14
|
+
"webget-football/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.root
|
18
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
19
|
+
end
|
20
|
+
end # module Football
|
21
|
+
end # module Module
|
22
|
+
end # class Webget
|
@@ -0,0 +1,12 @@
|
|
1
|
+
###########################
|
2
|
+
# note: split code in two parts
|
3
|
+
# metal - "bare" basics - no ref to sportdb
|
4
|
+
# and rest / convert with sportdb references / goodies
|
5
|
+
|
6
|
+
|
7
|
+
## our own code
|
8
|
+
require_relative 'worldfootball/config'
|
9
|
+
require_relative 'worldfootball/download'
|
10
|
+
require_relative 'worldfootball/page'
|
11
|
+
require_relative 'worldfootball/page_schedule'
|
12
|
+
require_relative 'worldfootball/page_report'
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Worldfootball
|
2
|
+
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
# nothing here for now
|
6
|
+
end # class Configuration
|
7
|
+
|
8
|
+
|
9
|
+
## lets you use
|
10
|
+
## Worldfootball.configure do |config|
|
11
|
+
## config.convert.out_dir = './o'
|
12
|
+
## end
|
13
|
+
|
14
|
+
def self.configure() yield( config ); end
|
15
|
+
def self.config() @config ||= Configuration.new; end
|
16
|
+
|
17
|
+
end # module Worldfootball
|
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Worldfootball
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
## todo/check: put in Downloader namespace/class - why? why not?
|
9
|
+
## or use Metal - no "porcelain" downloaders / machinery
|
10
|
+
class Metal
|
11
|
+
|
12
|
+
BASE_URL = 'https://www.weltfussball.de'
|
13
|
+
|
14
|
+
|
15
|
+
def self.schedule_url( slug ) "#{BASE_URL}/alle_spiele/#{slug}/"; end
|
16
|
+
def self.report_url( slug ) "#{BASE_URL}/spielbericht/#{slug}/"; end
|
17
|
+
|
18
|
+
|
19
|
+
def self.download_schedule( slug )
|
20
|
+
url = schedule_url( slug )
|
21
|
+
get( url )
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.download_report( slug, cache: true )
|
25
|
+
url = report_url( slug )
|
26
|
+
|
27
|
+
## check check first
|
28
|
+
if cache && Webcache.cached?( url )
|
29
|
+
puts " reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
|
30
|
+
else
|
31
|
+
get( url )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
def self.download_schedule_reports( slug, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
|
40
|
+
|
41
|
+
page = Page::Schedule.from_cache( slug )
|
42
|
+
matches = page.matches
|
43
|
+
|
44
|
+
puts "matches - #{matches.size} rows:"
|
45
|
+
pp matches[0]
|
46
|
+
|
47
|
+
puts "#{page.generated_in_days_ago} - #{page.generated}"
|
48
|
+
|
49
|
+
## todo/fix: restore sleep to old value at the end!!!!
|
50
|
+
## Webget.config.sleep = 8 ## fetch 7-8 pages/min
|
51
|
+
|
52
|
+
matches.each_with_index do |match,i|
|
53
|
+
est = (Webget.config.sleep * (matches.size-(i+1)))/60.0 # estimated time left
|
54
|
+
|
55
|
+
puts "fetching #{i+1}/#{matches.size} (#{est} min(s)) - #{match[:round]} | #{match[:team1]} v #{match[:team2]}..."
|
56
|
+
report_ref = match[:report_ref ]
|
57
|
+
if report_ref
|
58
|
+
download_report( report_ref, cache: cache )
|
59
|
+
else
|
60
|
+
puts "!! WARN: report ref missing for match:"
|
61
|
+
pp match
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
### add some "old" (back compat) aliases - keep - why? why not?
|
68
|
+
class << self
|
69
|
+
alias_method :schedule, :download_schedule
|
70
|
+
alias_method :report, :download_report
|
71
|
+
alias_method :schedule_reports, :download_schedule_reports
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
##################
|
76
|
+
# helpers
|
77
|
+
def self.get( url ) ## get & record/save to cache
|
78
|
+
|
79
|
+
response = Webget.page( url ) ## fetch (and cache) html page (via HTTP GET)
|
80
|
+
|
81
|
+
## note: exit on get / fetch error - do NOT continue for now - why? why not?
|
82
|
+
exit 1 if response.status.nok? ## e.g. HTTP status code != 200
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
end # class Metal
|
87
|
+
end # module Worldfootball
|
88
|
+
|
89
|
+
|
@@ -0,0 +1,106 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
def self.from_file( path )
|
6
|
+
html = File.open( path, 'r:utf-8' ) {|f| f.read }
|
7
|
+
new( html )
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize( html )
|
11
|
+
@html = html
|
12
|
+
end
|
13
|
+
|
14
|
+
def doc
|
15
|
+
## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such)
|
16
|
+
@doc ||= Nokogiri::HTML( @html )
|
17
|
+
end
|
18
|
+
|
19
|
+
def title
|
20
|
+
# <title>Bundesliga 2010/2011 » Spielplan</title>
|
21
|
+
@title ||= doc.css( 'title' ).first
|
22
|
+
@title.text ## get element's text content
|
23
|
+
end
|
24
|
+
|
25
|
+
def keywords
|
26
|
+
# <meta name="keywords"
|
27
|
+
# content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" />
|
28
|
+
@keywords ||= doc.css( 'meta[name="keywords"]' ).first
|
29
|
+
@keywords[:content] ## get content attribute
|
30
|
+
## or doc.xpath( '//meta[@name="keywords"]' ).first
|
31
|
+
## pp keywords
|
32
|
+
# puts " #{keywords[:content]}"
|
33
|
+
|
34
|
+
# keywords = doc.at( 'meta[@name="Keywords"]' )
|
35
|
+
# pp keywords
|
36
|
+
## check for
|
37
|
+
end
|
38
|
+
|
39
|
+
# <meta property="og:url"
|
40
|
+
# content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
|
41
|
+
def url
|
42
|
+
@url ||= doc.css( 'meta[property="og:url"]' ).first
|
43
|
+
@url[:content]
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
## <!-- [generated 2020-06-30 22:30:19] -->
|
49
|
+
## <!-- [generated 2020-06-30 22:30:19] -->
|
50
|
+
GENERATED_RE = %r{
|
51
|
+
<!--
|
52
|
+
[ ]+
|
53
|
+
\[generated
|
54
|
+
[ ]+
|
55
|
+
(?<date>\d+-\d+-\d+)
|
56
|
+
[ ]+
|
57
|
+
(?<time>\d+:\d+:\d+)
|
58
|
+
\]
|
59
|
+
[ ]+
|
60
|
+
-->
|
61
|
+
}x
|
62
|
+
|
63
|
+
|
64
|
+
def generated
|
65
|
+
@generated ||= begin
|
66
|
+
m=GENERATED_RE.match( @html )
|
67
|
+
if m
|
68
|
+
DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
|
69
|
+
else
|
70
|
+
puts "!! WARN - no generated timestamp found in page"
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
### convenience helper / formatter
|
77
|
+
def generated_in_days_ago
|
78
|
+
if generated
|
79
|
+
diff_in_days = Date.today.jd - generated.jd
|
80
|
+
"#{diff_in_days}d"
|
81
|
+
else
|
82
|
+
'?'
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
######################
|
87
|
+
## helper methods
|
88
|
+
|
89
|
+
def squish( str )
|
90
|
+
str = str.strip
|
91
|
+
str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
|
92
|
+
str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max.
|
93
|
+
str
|
94
|
+
end
|
95
|
+
|
96
|
+
def assert( cond, msg )
|
97
|
+
if cond
|
98
|
+
# do nothing
|
99
|
+
else
|
100
|
+
puts "!!! assert failed (in parse page) - #{msg}"
|
101
|
+
exit 1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end # class Page
|
106
|
+
end # module Worldfootball
|
@@ -0,0 +1,186 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
class Report < Page ## note: use nested class for now - why? why not?
|
6
|
+
|
7
|
+
|
8
|
+
def self.from_cache( slug )
|
9
|
+
url = Worldfootball::Metal.report_url( slug )
|
10
|
+
html = Webcache.read( url )
|
11
|
+
new( html )
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def find_table_tore
|
17
|
+
# <table class="" cellpadding="3" cellspacing="1">
|
18
|
+
# <tr>
|
19
|
+
# <td colspan="2" class="ueberschrift" align="center">Tore</td>
|
20
|
+
# </tr>
|
21
|
+
|
22
|
+
## get table
|
23
|
+
## first table row is Tore
|
24
|
+
tables = doc.css( 'table.standard_tabelle' )
|
25
|
+
# puts " found #{tables.size} table.standard_tabelle" # e.g. found 6 table.standard_tabelle
|
26
|
+
tables.each do |table|
|
27
|
+
trs = table.css( 'tr' )
|
28
|
+
## puts " found #{trs.size} trs"
|
29
|
+
tds = trs[0].css( 'td' )
|
30
|
+
## puts " found #{tds.size} tds"
|
31
|
+
|
32
|
+
if tds.size > 0 && tds[0].text == 'Tore'
|
33
|
+
return table
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
nil ## nothing found; auto-report error -why? why not?
|
38
|
+
end
|
39
|
+
|
40
|
+
def goals
|
41
|
+
@goals ||= begin
|
42
|
+
|
43
|
+
# <div class="data">
|
44
|
+
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
45
|
+
|
46
|
+
# puts table.class.name #=> Nokogiri::XML::Element
|
47
|
+
# puts table.text
|
48
|
+
|
49
|
+
table = find_table_tore
|
50
|
+
## pp table
|
51
|
+
|
52
|
+
trs = table.css( 'tr' )
|
53
|
+
# puts trs.size
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
rows = []
|
58
|
+
last_score1 = 0
|
59
|
+
last_score2 = 0
|
60
|
+
|
61
|
+
trs.each_with_index do |tr,i|
|
62
|
+
|
63
|
+
next if i==0 # skip Tore headline row
|
64
|
+
|
65
|
+
break if i==1 && tr.text.strip == 'keine' ## assume 0:0 - no goals
|
66
|
+
|
67
|
+
# <tr>
|
68
|
+
# <td class="hell" align="center" width="20%">
|
69
|
+
# <b>0 : 1</b>
|
70
|
+
# </td>
|
71
|
+
# <td class="hell" style="padding-left: 50px;">
|
72
|
+
# <a href="/spieler_profil/luis-phelipe/" title="Luis Phelipe">Luis Phelipe</a> 34. / Rechtsschuss (<a href="/spieler_profil/alexander-prass/" title="Alexander Prass">Alexander Prass</a>)
|
73
|
+
# </td>
|
74
|
+
# </tr>
|
75
|
+
|
76
|
+
tds = tr.css( 'td' )
|
77
|
+
|
78
|
+
score_str = squish( tds[0].text )
|
79
|
+
|
80
|
+
player_str = squish( tds[1].text )
|
81
|
+
|
82
|
+
print '[%03d] ' % i
|
83
|
+
print score_str
|
84
|
+
print " | "
|
85
|
+
print player_str
|
86
|
+
print "\n"
|
87
|
+
|
88
|
+
score_str = score_str.gsub( ':', '-' )
|
89
|
+
score_str = score_str.gsub( ' ', '' ) ## remove all white space
|
90
|
+
|
91
|
+
|
92
|
+
### todo/fix: use new Score.split helper here
|
93
|
+
## score1, score2 = Score.split( score_str )
|
94
|
+
parts = score_str.split('-')
|
95
|
+
score1 = parts[0].to_i
|
96
|
+
score2 = parts[1].to_i
|
97
|
+
|
98
|
+
if last_score1+1 == score1 && last_score2 == score2
|
99
|
+
team = 1
|
100
|
+
elsif last_score2+1 == score2 && last_score1 == score1
|
101
|
+
team = 2
|
102
|
+
else
|
103
|
+
puts "!! ERROR - unexpected score advance (one goal at a time expected):"
|
104
|
+
puts " #{last_score1}-#{last_score2}=> #{score1}-#{score2}"
|
105
|
+
exit 1
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
last_score1 = score1
|
110
|
+
last_score2 = score2
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
if player_str.index('/')
|
115
|
+
parts = player_str.split('/')
|
116
|
+
# pp parts
|
117
|
+
notes = parts[1].strip
|
118
|
+
|
119
|
+
if parts[0].strip =~ /^([^0-9]+)[ ]+([0-9]+)\.$/
|
120
|
+
player_name = $1
|
121
|
+
goal_minute = $2
|
122
|
+
# puts " >#{player_name}< | >#{goal_minute}<"
|
123
|
+
else
|
124
|
+
puts "!! ERROR - unknown goal format (in part i):"
|
125
|
+
puts player_str
|
126
|
+
pp parts
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
else # (simple line with no divider (/)
|
130
|
+
# Andrés Andrade 88. (Nicolas Meister)
|
131
|
+
if m = %r{^([^0-9]+)
|
132
|
+
[ ]+
|
133
|
+
([0-9]+)\.
|
134
|
+
(?:
|
135
|
+
[ ]+
|
136
|
+
(\([^)]+\))
|
137
|
+
)?
|
138
|
+
$}x.match( player_str )
|
139
|
+
player_name = m[1]
|
140
|
+
goal_minute = m[2]
|
141
|
+
notes = m[3] ? m[3] : ''
|
142
|
+
else
|
143
|
+
puts "!! ERROR - unknown goal format:"
|
144
|
+
puts player_str
|
145
|
+
exit 1
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
|
150
|
+
## check for "flags" e.g. own goal or penalty
|
151
|
+
## if found - remove from notes (use its own flag)
|
152
|
+
owngoal = false
|
153
|
+
penalty = false
|
154
|
+
|
155
|
+
if notes.index( 'Eigentor' )
|
156
|
+
owngoal = true
|
157
|
+
notes = notes.sub('Eigentor', '' ).strip
|
158
|
+
elsif notes.index( 'Elfmeter' )
|
159
|
+
## e.g. Elfmeter (Marco Hausjell)
|
160
|
+
penalty = true
|
161
|
+
notes = notes.sub('Elfmeter', '' ).strip
|
162
|
+
else
|
163
|
+
## nothing - keep going
|
164
|
+
end
|
165
|
+
|
166
|
+
rec = { score: score_str,
|
167
|
+
team: team, # 1 or 2
|
168
|
+
player: player_name,
|
169
|
+
minute: goal_minute
|
170
|
+
}
|
171
|
+
rec[:owngoal] = true if owngoal
|
172
|
+
rec[:penalty] = true if penalty
|
173
|
+
rec[:notes] = notes unless notes.empty?
|
174
|
+
|
175
|
+
rows << rec
|
176
|
+
end ## each tr
|
177
|
+
rows
|
178
|
+
end
|
179
|
+
end # goals
|
180
|
+
|
181
|
+
|
182
|
+
end # class Report
|
183
|
+
|
184
|
+
|
185
|
+
end # class Page
|
186
|
+
end # module Worldfootball
|
@@ -0,0 +1,262 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
class Schedule < Page ## note: use nested class for now - why? why not?
|
6
|
+
|
7
|
+
|
8
|
+
def self.from_cache( slug )
|
9
|
+
url = Worldfootball::Metal.schedule_url( slug )
|
10
|
+
html = Webcache.read( url )
|
11
|
+
new( html )
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def matches
|
17
|
+
@matches ||= begin
|
18
|
+
|
19
|
+
# <div class="data">
|
20
|
+
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
21
|
+
|
22
|
+
## note: use > for "strict" sibling (child without any in-betweens)
|
23
|
+
table = doc.css( 'div.data > table.standard_tabelle' ).first ## get table
|
24
|
+
# puts table.class.name #=> Nokogiri::XML::Element
|
25
|
+
# puts table.text
|
26
|
+
|
27
|
+
trs = table.css( 'tr' )
|
28
|
+
# puts trs.size
|
29
|
+
i = 0
|
30
|
+
|
31
|
+
last_date_str = nil
|
32
|
+
last_round = nil
|
33
|
+
|
34
|
+
rows = []
|
35
|
+
|
36
|
+
trs.each do |tr|
|
37
|
+
i += 1
|
38
|
+
|
39
|
+
|
40
|
+
if tr.text.strip =~ /Spieltag/ ||
|
41
|
+
tr.text.strip =~ /[1-9]\.[ ]Runde|
|
42
|
+
Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
|
43
|
+
Qualifikation| # see CA Championship
|
44
|
+
Sechzehntelfinale| # see EL
|
45
|
+
Achtelfinale|
|
46
|
+
Viertelfinale|
|
47
|
+
Halbfinale|
|
48
|
+
Finale|
|
49
|
+
Gruppe[ ][A-Z]| # see CL
|
50
|
+
Playoffs # see EL Quali
|
51
|
+
/x
|
52
|
+
puts
|
53
|
+
print '[%03d] ' % i
|
54
|
+
## print squish( tr.text )
|
55
|
+
print "round >#{tr.text.strip}<"
|
56
|
+
print "\n"
|
57
|
+
|
58
|
+
last_round = tr.text.strip
|
59
|
+
else ## assume table row (tr) is match line
|
60
|
+
tds = tr.css( 'td' )
|
61
|
+
|
62
|
+
date_str = squish( tds[0].text )
|
63
|
+
time_str = squish( tds[1].text )
|
64
|
+
|
65
|
+
# was: team1_str = squish( tds[2].text )
|
66
|
+
|
67
|
+
## <td><a href="/teams/hibernian-fc/" title="Hibernian FC">Hibernian FC</a></td>
|
68
|
+
## todo/check: check if tooltip title always equals text - why? why not?
|
69
|
+
team1_anchor = tds[2].css( 'a' )[0]
|
70
|
+
if team1_anchor # note: <a> might be optional (and team name only be plain text)
|
71
|
+
team1_str = squish( team1_anchor.text )
|
72
|
+
team1_ref = norm_team_ref( team1_anchor[:href] )
|
73
|
+
else
|
74
|
+
team1_str = squish( tds[2].text )
|
75
|
+
team1_ref = nil
|
76
|
+
puts "!! WARN: no team1_ref for >#{team1_str}< found"
|
77
|
+
end
|
78
|
+
|
79
|
+
## <td> - </td>
|
80
|
+
## e.g. -
|
81
|
+
vs_str = squish( tds[3].text ) ## use to assert column!!!
|
82
|
+
assert( vs_str == '-', "- for vs. expected; got #{vs_str}")
|
83
|
+
## was: team2_str = squish( tds[4].text )
|
84
|
+
|
85
|
+
## <td><a href="/teams/st-johnstone-fc/" title="St. Johnstone FC">St. Johnstone FC</a></td>
|
86
|
+
team2_anchor = tds[4].css( 'a' )[0]
|
87
|
+
if team2_anchor
|
88
|
+
team2_str = squish( team2_anchor.text )
|
89
|
+
team2_ref = norm_team_ref( team2_anchor[:href] )
|
90
|
+
else
|
91
|
+
team2_str = squish( tds[4].text )
|
92
|
+
team2_ref = nil
|
93
|
+
puts "!! WARN: no team2_ref for >#{team2_str}< found"
|
94
|
+
end
|
95
|
+
|
96
|
+
### was: score_str = squish( tds[5].text )
|
97
|
+
## <a href="/spielbericht/premiership-2020-2021-hibernian-fc-st-johnstone-fc/" title="Spielschema Hibernian FC - St. Johnstone FC">-:-</a>
|
98
|
+
|
99
|
+
score_anchor = tds[5].css( 'a' )[0]
|
100
|
+
if score_anchor ## note: score ref (match report) is optional!!!!
|
101
|
+
score_str = squish( score_anchor.text )
|
102
|
+
score_ref = norm_score_ref( score_anchor[:href] )
|
103
|
+
else
|
104
|
+
score_str = squish( tds[5].text )
|
105
|
+
score_ref = nil
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
## todo - find a better way to check for live match
|
110
|
+
## check for live badge image
|
111
|
+
## <td>
|
112
|
+
## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
|
113
|
+
## </td>
|
114
|
+
img = tds[6].css( 'img' )[0]
|
115
|
+
if img && img[:src].index( '/live/')
|
116
|
+
puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
117
|
+
score_str = '-:-' # note: -:- gets replaced to ---
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
date_str = last_date_str if date_str.empty?
|
122
|
+
|
123
|
+
print '[%03d] ' % i
|
124
|
+
print "%-10s | " % date_str
|
125
|
+
print "%-5s | " % time_str
|
126
|
+
print "%-22s | " % team1_str
|
127
|
+
print "%-22s | " % team2_str
|
128
|
+
print "%-10s | " % score_str
|
129
|
+
print (score_ref ? score_ref : 'n/a')
|
130
|
+
print "\n"
|
131
|
+
|
132
|
+
|
133
|
+
## change 2:1 (1:1) to 2-1 (1-1)
|
134
|
+
score_str = score_str.gsub( ':', '-' )
|
135
|
+
|
136
|
+
## convert date from 25.10.2019 to 2019-25-10
|
137
|
+
date = Date.strptime( date_str, '%d.%m.%Y' )
|
138
|
+
|
139
|
+
## note: keep structure flat for now
|
140
|
+
## (AND not nested e.g. team:{text:,ref:}) - why? why not?
|
141
|
+
rows << { round: last_round,
|
142
|
+
date: date.strftime( '%Y-%m-%d' ),
|
143
|
+
time: time_str,
|
144
|
+
team1: team1_str,
|
145
|
+
team1_ref: team1_ref,
|
146
|
+
score: score_str,
|
147
|
+
team2: team2_str,
|
148
|
+
team2_ref: team2_ref,
|
149
|
+
report_ref: score_ref
|
150
|
+
}
|
151
|
+
|
152
|
+
last_date_str = date_str
|
153
|
+
end
|
154
|
+
end # each tr (table row)
|
155
|
+
|
156
|
+
rows
|
157
|
+
end
|
158
|
+
end # matches
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
def teams
|
163
|
+
@teams ||= begin
|
164
|
+
h = {}
|
165
|
+
matches.each do |match|
|
166
|
+
## index by name/text for now NOT ref - why? why not?
|
167
|
+
[{text: match[:team1],
|
168
|
+
ref: match[:team1_ref]},
|
169
|
+
{text: match[:team2],
|
170
|
+
ref: match[:team2_ref]}].each do |team|
|
171
|
+
rec = h[ team[:text] ] ||= { count: 0,
|
172
|
+
name: team[ :text],
|
173
|
+
ref: team[ :ref ] }
|
174
|
+
rec[ :count ] += 1
|
175
|
+
## todo/check: check/assert that name and ref are always equal - why? why not?
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
h.values
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def rounds
|
184
|
+
@rounds ||= begin
|
185
|
+
h = {}
|
186
|
+
matches.each do |match|
|
187
|
+
rec = h[ match[:round] ] ||= { count: 0,
|
188
|
+
name: match[ :round] }
|
189
|
+
rec[ :count ] += 1
|
190
|
+
end
|
191
|
+
|
192
|
+
h.values
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
def seasons
|
198
|
+
# <select name="saison" ...
|
199
|
+
@seasons ||= begin
|
200
|
+
recs = []
|
201
|
+
season = doc.css( 'select[name="saison"]').first
|
202
|
+
options = season.css( 'option' )
|
203
|
+
|
204
|
+
options.each do |option|
|
205
|
+
recs << { text: squish( option.text ),
|
206
|
+
ref: norm_season_ref( option[:value] )
|
207
|
+
}
|
208
|
+
end
|
209
|
+
recs
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
######
|
215
|
+
## helpers
|
216
|
+
|
217
|
+
## todo/check - rename/use HREF and not REF - why? why not?
|
218
|
+
REF_SCORE_RE = %r{^/spielbericht/
|
219
|
+
([a-z0-9_-]+)/$}x
|
220
|
+
|
221
|
+
def norm_score_ref( str )
|
222
|
+
## check ref format / path
|
223
|
+
if m=REF_SCORE_RE.match( str )
|
224
|
+
m[1]
|
225
|
+
else
|
226
|
+
puts "!! ERROR: unexpected score href format >#{str}<"
|
227
|
+
exit 1
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
REF_TEAM_RE = %r{^/teams/
|
233
|
+
([a-z0-9_-]+)/$}x
|
234
|
+
|
235
|
+
def norm_team_ref( str )
|
236
|
+
## check ref format / path
|
237
|
+
if m=REF_TEAM_RE.match( str )
|
238
|
+
m[1]
|
239
|
+
else
|
240
|
+
puts "!! ERROR: unexpected team href format >#{str}<"
|
241
|
+
exit 1
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
REF_SEASON_RE = %r{^/alle_spiele/
|
247
|
+
([a-z0-9_-]+)/$}x
|
248
|
+
|
249
|
+
def norm_season_ref( str )
|
250
|
+
## check ref format / path
|
251
|
+
if m=REF_SEASON_RE.match( str )
|
252
|
+
m[1]
|
253
|
+
else
|
254
|
+
puts "!! ERROR: unexpected season href format >#{str}<"
|
255
|
+
exit 1
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end # class Schedule
|
259
|
+
|
260
|
+
|
261
|
+
end # class Page
|
262
|
+
end # module Worldfootball
|
data/test/helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_version.rb
|
4
|
+
|
5
|
+
|
6
|
+
require 'helper'
|
7
|
+
|
8
|
+
class TestVersion < MiniTest::Test
|
9
|
+
|
10
|
+
def test_version
|
11
|
+
pp Webget::Module::Football::VERSION
|
12
|
+
pp Webget::Module::Football.banner
|
13
|
+
pp Webget::Module::Football.root
|
14
|
+
end
|
15
|
+
|
16
|
+
end # class TestVersion
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: webget-football
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdoc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '7'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '4.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '7'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: hoe
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '3.22'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3.22'
|
47
|
+
description: webget-football - get football data via web pages or web api (json) calls
|
48
|
+
email: opensport@googlegroups.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files:
|
52
|
+
- CHANGELOG.md
|
53
|
+
- Manifest.txt
|
54
|
+
- README.md
|
55
|
+
files:
|
56
|
+
- CHANGELOG.md
|
57
|
+
- Manifest.txt
|
58
|
+
- README.md
|
59
|
+
- Rakefile
|
60
|
+
- lib/webget-football.rb
|
61
|
+
- lib/webget-football/apis.rb
|
62
|
+
- lib/webget-football/apis/config.rb
|
63
|
+
- lib/webget-football/apis/download.rb
|
64
|
+
- lib/webget-football/version.rb
|
65
|
+
- lib/webget-football/worldfootball.rb
|
66
|
+
- lib/webget-football/worldfootball/config.rb
|
67
|
+
- lib/webget-football/worldfootball/download.rb
|
68
|
+
- lib/webget-football/worldfootball/page.rb
|
69
|
+
- lib/webget-football/worldfootball/page_report.rb
|
70
|
+
- lib/webget-football/worldfootball/page_schedule.rb
|
71
|
+
- lib/webget/football.rb
|
72
|
+
- test/helper.rb
|
73
|
+
- test/test_version.rb
|
74
|
+
homepage: https://github.com/sportdb/sport.db
|
75
|
+
licenses:
|
76
|
+
- Public Domain
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options:
|
80
|
+
- "--main"
|
81
|
+
- README.md
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 2.2.2
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.5.2
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: webget-football - get football data via web pages or web api (json) calls
|
100
|
+
test_files: []
|