webget-football 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +4 -0
- data/Manifest.txt +18 -0
- data/README.md +28 -0
- data/Rakefile +26 -0
- data/lib/webget-football.rb +16 -0
- data/lib/webget-football/apis.rb +10 -0
- data/lib/webget-football/apis/config.rb +17 -0
- data/lib/webget-football/apis/download.rb +72 -0
- data/lib/webget-football/version.rb +22 -0
- data/lib/webget-football/worldfootball.rb +12 -0
- data/lib/webget-football/worldfootball/config.rb +17 -0
- data/lib/webget-football/worldfootball/download.rb +89 -0
- data/lib/webget-football/worldfootball/page.rb +106 -0
- data/lib/webget-football/worldfootball/page_report.rb +186 -0
- data/lib/webget-football/worldfootball/page_schedule.rb +262 -0
- data/lib/webget/football.rb +4 -0
- data/test/helper.rb +6 -0
- data/test/test_version.rb +16 -0
- metadata +100 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: c46b38f5ebe600370371d7032f6178fb4ae22dde
|
4
|
+
data.tar.gz: fd4ca8d6d218dc140f8c63f13f9028ace9a6eda1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 23e896f122b3f8068ea0acb82f85e60ebaff68d231d86923778dbc8de6cd345d2914f7401f2cd28fd10635ad276504f9d53655808b3d222559455d8f1adfdaee
|
7
|
+
data.tar.gz: 69516f9db9e2a495c378759e5c9195f596fb4be30147cba24fdcf04bf7f55e28325324d07a0f304200cb093d3ef61d239db4275d6d2a51863a1d2eea3e17fe17
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
CHANGELOG.md
|
2
|
+
Manifest.txt
|
3
|
+
README.md
|
4
|
+
Rakefile
|
5
|
+
lib/webget-football.rb
|
6
|
+
lib/webget-football/apis.rb
|
7
|
+
lib/webget-football/apis/config.rb
|
8
|
+
lib/webget-football/apis/download.rb
|
9
|
+
lib/webget-football/version.rb
|
10
|
+
lib/webget-football/worldfootball.rb
|
11
|
+
lib/webget-football/worldfootball/config.rb
|
12
|
+
lib/webget-football/worldfootball/download.rb
|
13
|
+
lib/webget-football/worldfootball/page.rb
|
14
|
+
lib/webget-football/worldfootball/page_report.rb
|
15
|
+
lib/webget-football/worldfootball/page_schedule.rb
|
16
|
+
lib/webget/football.rb
|
17
|
+
test/helper.rb
|
18
|
+
test/test_version.rb
|
data/README.md
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# webget-football - get football data via web pages or web api (json) calls
|
2
|
+
|
3
|
+
|
4
|
+
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
5
|
+
* bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
|
6
|
+
* gem :: [rubygems.org/gems/webget-football](https://rubygems.org/gems/webget-football)
|
7
|
+
* rdoc :: [rubydoc.info/gems/webget-football](http://rubydoc.info/gems/webget-football)
|
8
|
+
* forum :: [groups.google.com/group/opensport](https://groups.google.com/group/opensport)
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
TBD
|
14
|
+
|
15
|
+
|
16
|
+
## License
|
17
|
+
|
18
|
+

|
19
|
+
|
20
|
+
The `webget-football` scripts are dedicated to the public domain.
|
21
|
+
Use it as you please with no restrictions whatsoever.
|
22
|
+
|
23
|
+
|
24
|
+
## Questions? Comments?
|
25
|
+
|
26
|
+
Send them along to the
|
27
|
+
[Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
|
28
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/webget-football/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'webget-football' do
|
5
|
+
|
6
|
+
self.version = Webget::Module::Football::VERSION
|
7
|
+
|
8
|
+
self.summary = 'webget-football - get football data via web pages or web api (json) calls'
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'opensport@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
|
+
|
20
|
+
self.licenses = ['Public Domain']
|
21
|
+
|
22
|
+
self.spec_extras = {
|
23
|
+
required_ruby_version: '>= 2.2.2'
|
24
|
+
}
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
## 3rd party (our own)
|
2
|
+
require 'webget' ## incl. webget, webcache, webclient, etc.
|
3
|
+
|
4
|
+
## 3rd party
|
5
|
+
require 'nokogiri'
|
6
|
+
|
7
|
+
|
8
|
+
###
|
9
|
+
# our own code
|
10
|
+
require 'webget-football/version' # let version always go first
|
11
|
+
|
12
|
+
require 'webget-football/apis'
|
13
|
+
require 'webget-football/worldfootball'
|
14
|
+
|
15
|
+
|
16
|
+
puts Webget::Module::Football.banner # say hello
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Footballdata
|
2
|
+
|
3
|
+
class Configuration
|
4
|
+
## note: nothing here for now
|
5
|
+
end # class Configuration
|
6
|
+
|
7
|
+
|
8
|
+
## lets you use
|
9
|
+
## Footballdata.configure do |config|
|
10
|
+
## config.convert.out_dir = './o'
|
11
|
+
## end
|
12
|
+
|
13
|
+
def self.configure() yield( config ); end
|
14
|
+
|
15
|
+
def self.config() @config ||= Configuration.new; end
|
16
|
+
|
17
|
+
end # module Footballdata
|
@@ -0,0 +1,72 @@
|
|
1
|
+
module Footballdata
|
2
|
+
|
3
|
+
## todo/check: put in Downloader namespace/class - why? why not?
|
4
|
+
## or use Metal - no "porcelain" downloaders / machinery
|
5
|
+
class Metal
|
6
|
+
BASE_URL = 'http://api.football-data.org/v2'
|
7
|
+
|
8
|
+
|
9
|
+
def self.competitions_url( plan ) "#{BASE_URL}/competitions?plan=#{plan}"; end
|
10
|
+
|
11
|
+
## just use matches_url - why? why not?
|
12
|
+
def self.competition_matches_url( code, year ) "#{BASE_URL}/competitions/#{code}/matches?season=#{year}"; end
|
13
|
+
def self.competition_teams_url( code, year ) "#{BASE_URL}/competitions/#{code}/teams?season=#{year}"; end
|
14
|
+
|
15
|
+
|
16
|
+
|
17
|
+
def self.competitions_tier_one
|
18
|
+
get( competitions_url( 'TIER_ONE' ))
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.competitions_tier_two
|
22
|
+
get( competions_url( 'TIER_TWO' ))
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.competitions_tier_three
|
26
|
+
get( competions_url( 'TIER_THREE' ))
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.competition( code, year )
|
30
|
+
get( competition_matches_url( code, year ))
|
31
|
+
get( competition_teams_url( code, year ))
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
=begin
|
36
|
+
def self.matches
|
37
|
+
# note: Specified period must not exceed 10 days.
|
38
|
+
|
39
|
+
## try query (football) week by week - tuesday to monday!!
|
40
|
+
## note: TIER_ONE does NOT include goals!!!
|
41
|
+
code = 'FL1'
|
42
|
+
start_date = '2019-08-09'
|
43
|
+
end_date = '2019-08-16'
|
44
|
+
|
45
|
+
get( "matches?competitions=#{code}&dateFrom=#{start_date}&dateTo=#{end_date}" )
|
46
|
+
end
|
47
|
+
=end
|
48
|
+
|
49
|
+
|
50
|
+
def self.get( url )
|
51
|
+
token = ENV['FOOTBALLDATA']
|
52
|
+
## note: because of public workflow log - do NOT output token
|
53
|
+
## puts token
|
54
|
+
|
55
|
+
headers = {}
|
56
|
+
headers['X-Auth-Token'] = token if token
|
57
|
+
headers['User-Agent'] = 'ruby'
|
58
|
+
headers['Accept'] = '*/*'
|
59
|
+
|
60
|
+
## note: add format: 'json' for pretty printing json (before) save in cache
|
61
|
+
response = Webget.call( url, headers: headers )
|
62
|
+
|
63
|
+
## for debugging print pretty printed json first 400 chars
|
64
|
+
puts response.json.pretty_inspect[0..400]
|
65
|
+
|
66
|
+
exit 1 if response.status.nok? # e.g. HTTP status code != 200
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end ## class Metal
|
71
|
+
end # module Footballdata
|
72
|
+
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Webget ## note: Webget is for now a class (NOT a module)
|
2
|
+
module Module
|
3
|
+
module Football
|
4
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
5
|
+
MINOR = 0
|
6
|
+
PATCH = 1
|
7
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
8
|
+
|
9
|
+
def self.version
|
10
|
+
VERSION
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.banner
|
14
|
+
"webget-football/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.root
|
18
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
19
|
+
end
|
20
|
+
end # module Football
|
21
|
+
end # module Module
|
22
|
+
end # class Webget
|
@@ -0,0 +1,12 @@
|
|
1
|
+
###########################
|
2
|
+
# note: split code in two parts
|
3
|
+
# metal - "bare" basics - no ref to sportdb
|
4
|
+
# and rest / convert with sportdb references / goodies
|
5
|
+
|
6
|
+
|
7
|
+
## our own code
|
8
|
+
require_relative 'worldfootball/config'
|
9
|
+
require_relative 'worldfootball/download'
|
10
|
+
require_relative 'worldfootball/page'
|
11
|
+
require_relative 'worldfootball/page_schedule'
|
12
|
+
require_relative 'worldfootball/page_report'
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Worldfootball
|
2
|
+
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
# nothing here for now
|
6
|
+
end # class Configuration
|
7
|
+
|
8
|
+
|
9
|
+
## lets you use
|
10
|
+
## Worldfootball.configure do |config|
|
11
|
+
## config.convert.out_dir = './o'
|
12
|
+
## end
|
13
|
+
|
14
|
+
def self.configure() yield( config ); end
|
15
|
+
def self.config() @config ||= Configuration.new; end
|
16
|
+
|
17
|
+
end # module Worldfootball
|
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
module Worldfootball
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
## todo/check: put in Downloader namespace/class - why? why not?
|
9
|
+
## or use Metal - no "porcelain" downloaders / machinery
|
10
|
+
class Metal
|
11
|
+
|
12
|
+
BASE_URL = 'https://www.weltfussball.de'
|
13
|
+
|
14
|
+
|
15
|
+
def self.schedule_url( slug ) "#{BASE_URL}/alle_spiele/#{slug}/"; end
|
16
|
+
def self.report_url( slug ) "#{BASE_URL}/spielbericht/#{slug}/"; end
|
17
|
+
|
18
|
+
|
19
|
+
def self.download_schedule( slug )
|
20
|
+
url = schedule_url( slug )
|
21
|
+
get( url )
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.download_report( slug, cache: true )
|
25
|
+
url = report_url( slug )
|
26
|
+
|
27
|
+
## check check first
|
28
|
+
if cache && Webcache.cached?( url )
|
29
|
+
puts " reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
|
30
|
+
else
|
31
|
+
get( url )
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
def self.download_schedule_reports( slug, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
|
40
|
+
|
41
|
+
page = Page::Schedule.from_cache( slug )
|
42
|
+
matches = page.matches
|
43
|
+
|
44
|
+
puts "matches - #{matches.size} rows:"
|
45
|
+
pp matches[0]
|
46
|
+
|
47
|
+
puts "#{page.generated_in_days_ago} - #{page.generated}"
|
48
|
+
|
49
|
+
## todo/fix: restore sleep to old value at the end!!!!
|
50
|
+
## Webget.config.sleep = 8 ## fetch 7-8 pages/min
|
51
|
+
|
52
|
+
matches.each_with_index do |match,i|
|
53
|
+
est = (Webget.config.sleep * (matches.size-(i+1)))/60.0 # estimated time left
|
54
|
+
|
55
|
+
puts "fetching #{i+1}/#{matches.size} (#{est} min(s)) - #{match[:round]} | #{match[:team1]} v #{match[:team2]}..."
|
56
|
+
report_ref = match[:report_ref ]
|
57
|
+
if report_ref
|
58
|
+
download_report( report_ref, cache: cache )
|
59
|
+
else
|
60
|
+
puts "!! WARN: report ref missing for match:"
|
61
|
+
pp match
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
### add some "old" (back compat) aliases - keep - why? why not?
|
68
|
+
class << self
|
69
|
+
alias_method :schedule, :download_schedule
|
70
|
+
alias_method :report, :download_report
|
71
|
+
alias_method :schedule_reports, :download_schedule_reports
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
##################
|
76
|
+
# helpers
|
77
|
+
def self.get( url ) ## get & record/save to cache
|
78
|
+
|
79
|
+
response = Webget.page( url ) ## fetch (and cache) html page (via HTTP GET)
|
80
|
+
|
81
|
+
## note: exit on get / fetch error - do NOT continue for now - why? why not?
|
82
|
+
exit 1 if response.status.nok? ## e.g. HTTP status code != 200
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
end # class Metal
|
87
|
+
end # module Worldfootball
|
88
|
+
|
89
|
+
|
@@ -0,0 +1,106 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
def self.from_file( path )
|
6
|
+
html = File.open( path, 'r:utf-8' ) {|f| f.read }
|
7
|
+
new( html )
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize( html )
|
11
|
+
@html = html
|
12
|
+
end
|
13
|
+
|
14
|
+
def doc
|
15
|
+
## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such)
|
16
|
+
@doc ||= Nokogiri::HTML( @html )
|
17
|
+
end
|
18
|
+
|
19
|
+
def title
|
20
|
+
# <title>Bundesliga 2010/2011 » Spielplan</title>
|
21
|
+
@title ||= doc.css( 'title' ).first
|
22
|
+
@title.text ## get element's text content
|
23
|
+
end
|
24
|
+
|
25
|
+
def keywords
|
26
|
+
# <meta name="keywords"
|
27
|
+
# content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" />
|
28
|
+
@keywords ||= doc.css( 'meta[name="keywords"]' ).first
|
29
|
+
@keywords[:content] ## get content attribute
|
30
|
+
## or doc.xpath( '//meta[@name="keywords"]' ).first
|
31
|
+
## pp keywords
|
32
|
+
# puts " #{keywords[:content]}"
|
33
|
+
|
34
|
+
# keywords = doc.at( 'meta[@name="Keywords"]' )
|
35
|
+
# pp keywords
|
36
|
+
## check for
|
37
|
+
end
|
38
|
+
|
39
|
+
# <meta property="og:url"
|
40
|
+
# content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
|
41
|
+
def url
|
42
|
+
@url ||= doc.css( 'meta[property="og:url"]' ).first
|
43
|
+
@url[:content]
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
## <!-- [generated 2020-06-30 22:30:19] -->
|
49
|
+
## <!-- [generated 2020-06-30 22:30:19] -->
|
50
|
+
GENERATED_RE = %r{
|
51
|
+
<!--
|
52
|
+
[ ]+
|
53
|
+
\[generated
|
54
|
+
[ ]+
|
55
|
+
(?<date>\d+-\d+-\d+)
|
56
|
+
[ ]+
|
57
|
+
(?<time>\d+:\d+:\d+)
|
58
|
+
\]
|
59
|
+
[ ]+
|
60
|
+
-->
|
61
|
+
}x
|
62
|
+
|
63
|
+
|
64
|
+
def generated
|
65
|
+
@generated ||= begin
|
66
|
+
m=GENERATED_RE.match( @html )
|
67
|
+
if m
|
68
|
+
DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
|
69
|
+
else
|
70
|
+
puts "!! WARN - no generated timestamp found in page"
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
### convenience helper / formatter
|
77
|
+
def generated_in_days_ago
|
78
|
+
if generated
|
79
|
+
diff_in_days = Date.today.jd - generated.jd
|
80
|
+
"#{diff_in_days}d"
|
81
|
+
else
|
82
|
+
'?'
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
######################
|
87
|
+
## helper methods
|
88
|
+
|
89
|
+
def squish( str )
|
90
|
+
str = str.strip
|
91
|
+
str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
|
92
|
+
str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max.
|
93
|
+
str
|
94
|
+
end
|
95
|
+
|
96
|
+
def assert( cond, msg )
|
97
|
+
if cond
|
98
|
+
# do nothing
|
99
|
+
else
|
100
|
+
puts "!!! assert failed (in parse page) - #{msg}"
|
101
|
+
exit 1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end # class Page
|
106
|
+
end # module Worldfootball
|
@@ -0,0 +1,186 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
class Report < Page ## note: use nested class for now - why? why not?
|
6
|
+
|
7
|
+
|
8
|
+
def self.from_cache( slug )
|
9
|
+
url = Worldfootball::Metal.report_url( slug )
|
10
|
+
html = Webcache.read( url )
|
11
|
+
new( html )
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def find_table_tore
|
17
|
+
# <table class="" cellpadding="3" cellspacing="1">
|
18
|
+
# <tr>
|
19
|
+
# <td colspan="2" class="ueberschrift" align="center">Tore</td>
|
20
|
+
# </tr>
|
21
|
+
|
22
|
+
## get table
|
23
|
+
## first table row is Tore
|
24
|
+
tables = doc.css( 'table.standard_tabelle' )
|
25
|
+
# puts " found #{tables.size} table.standard_tabelle" # e.g. found 6 table.standard_tabelle
|
26
|
+
tables.each do |table|
|
27
|
+
trs = table.css( 'tr' )
|
28
|
+
## puts " found #{trs.size} trs"
|
29
|
+
tds = trs[0].css( 'td' )
|
30
|
+
## puts " found #{tds.size} tds"
|
31
|
+
|
32
|
+
if tds.size > 0 && tds[0].text == 'Tore'
|
33
|
+
return table
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
nil ## nothing found; auto-report error -why? why not?
|
38
|
+
end
|
39
|
+
|
40
|
+
def goals
|
41
|
+
@goals ||= begin
|
42
|
+
|
43
|
+
# <div class="data">
|
44
|
+
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
45
|
+
|
46
|
+
# puts table.class.name #=> Nokogiri::XML::Element
|
47
|
+
# puts table.text
|
48
|
+
|
49
|
+
table = find_table_tore
|
50
|
+
## pp table
|
51
|
+
|
52
|
+
trs = table.css( 'tr' )
|
53
|
+
# puts trs.size
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
rows = []
|
58
|
+
last_score1 = 0
|
59
|
+
last_score2 = 0
|
60
|
+
|
61
|
+
trs.each_with_index do |tr,i|
|
62
|
+
|
63
|
+
next if i==0 # skip Tore headline row
|
64
|
+
|
65
|
+
break if i==1 && tr.text.strip == 'keine' ## assume 0:0 - no goals
|
66
|
+
|
67
|
+
# <tr>
|
68
|
+
# <td class="hell" align="center" width="20%">
|
69
|
+
# <b>0 : 1</b>
|
70
|
+
# </td>
|
71
|
+
# <td class="hell" style="padding-left: 50px;">
|
72
|
+
# <a href="/spieler_profil/luis-phelipe/" title="Luis Phelipe">Luis Phelipe</a> 34. / Rechtsschuss (<a href="/spieler_profil/alexander-prass/" title="Alexander Prass">Alexander Prass</a>)
|
73
|
+
# </td>
|
74
|
+
# </tr>
|
75
|
+
|
76
|
+
tds = tr.css( 'td' )
|
77
|
+
|
78
|
+
score_str = squish( tds[0].text )
|
79
|
+
|
80
|
+
player_str = squish( tds[1].text )
|
81
|
+
|
82
|
+
print '[%03d] ' % i
|
83
|
+
print score_str
|
84
|
+
print " | "
|
85
|
+
print player_str
|
86
|
+
print "\n"
|
87
|
+
|
88
|
+
score_str = score_str.gsub( ':', '-' )
|
89
|
+
score_str = score_str.gsub( ' ', '' ) ## remove all white space
|
90
|
+
|
91
|
+
|
92
|
+
### todo/fix: use new Score.split helper here
|
93
|
+
## score1, score2 = Score.split( score_str )
|
94
|
+
parts = score_str.split('-')
|
95
|
+
score1 = parts[0].to_i
|
96
|
+
score2 = parts[1].to_i
|
97
|
+
|
98
|
+
if last_score1+1 == score1 && last_score2 == score2
|
99
|
+
team = 1
|
100
|
+
elsif last_score2+1 == score2 && last_score1 == score1
|
101
|
+
team = 2
|
102
|
+
else
|
103
|
+
puts "!! ERROR - unexpected score advance (one goal at a time expected):"
|
104
|
+
puts " #{last_score1}-#{last_score2}=> #{score1}-#{score2}"
|
105
|
+
exit 1
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
last_score1 = score1
|
110
|
+
last_score2 = score2
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
if player_str.index('/')
|
115
|
+
parts = player_str.split('/')
|
116
|
+
# pp parts
|
117
|
+
notes = parts[1].strip
|
118
|
+
|
119
|
+
if parts[0].strip =~ /^([^0-9]+)[ ]+([0-9]+)\.$/
|
120
|
+
player_name = $1
|
121
|
+
goal_minute = $2
|
122
|
+
# puts " >#{player_name}< | >#{goal_minute}<"
|
123
|
+
else
|
124
|
+
puts "!! ERROR - unknown goal format (in part i):"
|
125
|
+
puts player_str
|
126
|
+
pp parts
|
127
|
+
exit 1
|
128
|
+
end
|
129
|
+
else # (simple line with no divider (/)
|
130
|
+
# Andrés Andrade 88. (Nicolas Meister)
|
131
|
+
if m = %r{^([^0-9]+)
|
132
|
+
[ ]+
|
133
|
+
([0-9]+)\.
|
134
|
+
(?:
|
135
|
+
[ ]+
|
136
|
+
(\([^)]+\))
|
137
|
+
)?
|
138
|
+
$}x.match( player_str )
|
139
|
+
player_name = m[1]
|
140
|
+
goal_minute = m[2]
|
141
|
+
notes = m[3] ? m[3] : ''
|
142
|
+
else
|
143
|
+
puts "!! ERROR - unknown goal format:"
|
144
|
+
puts player_str
|
145
|
+
exit 1
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
|
150
|
+
## check for "flags" e.g. own goal or penalty
|
151
|
+
## if found - remove from notes (use its own flag)
|
152
|
+
owngoal = false
|
153
|
+
penalty = false
|
154
|
+
|
155
|
+
if notes.index( 'Eigentor' )
|
156
|
+
owngoal = true
|
157
|
+
notes = notes.sub('Eigentor', '' ).strip
|
158
|
+
elsif notes.index( 'Elfmeter' )
|
159
|
+
## e.g. Elfmeter (Marco Hausjell)
|
160
|
+
penalty = true
|
161
|
+
notes = notes.sub('Elfmeter', '' ).strip
|
162
|
+
else
|
163
|
+
## nothing - keep going
|
164
|
+
end
|
165
|
+
|
166
|
+
rec = { score: score_str,
|
167
|
+
team: team, # 1 or 2
|
168
|
+
player: player_name,
|
169
|
+
minute: goal_minute
|
170
|
+
}
|
171
|
+
rec[:owngoal] = true if owngoal
|
172
|
+
rec[:penalty] = true if penalty
|
173
|
+
rec[:notes] = notes unless notes.empty?
|
174
|
+
|
175
|
+
rows << rec
|
176
|
+
end ## each tr
|
177
|
+
rows
|
178
|
+
end
|
179
|
+
end # goals
|
180
|
+
|
181
|
+
|
182
|
+
end # class Report
|
183
|
+
|
184
|
+
|
185
|
+
end # class Page
|
186
|
+
end # module Worldfootball
|
@@ -0,0 +1,262 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
class Schedule < Page ## note: use nested class for now - why? why not?
|
6
|
+
|
7
|
+
|
8
|
+
def self.from_cache( slug )
|
9
|
+
url = Worldfootball::Metal.schedule_url( slug )
|
10
|
+
html = Webcache.read( url )
|
11
|
+
new( html )
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
def matches
|
17
|
+
@matches ||= begin
|
18
|
+
|
19
|
+
# <div class="data">
|
20
|
+
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
21
|
+
|
22
|
+
## note: use > for "strict" sibling (child without any in-betweens)
|
23
|
+
table = doc.css( 'div.data > table.standard_tabelle' ).first ## get table
|
24
|
+
# puts table.class.name #=> Nokogiri::XML::Element
|
25
|
+
# puts table.text
|
26
|
+
|
27
|
+
trs = table.css( 'tr' )
|
28
|
+
# puts trs.size
|
29
|
+
i = 0
|
30
|
+
|
31
|
+
last_date_str = nil
|
32
|
+
last_round = nil
|
33
|
+
|
34
|
+
rows = []
|
35
|
+
|
36
|
+
trs.each do |tr|
|
37
|
+
i += 1
|
38
|
+
|
39
|
+
|
40
|
+
if tr.text.strip =~ /Spieltag/ ||
|
41
|
+
tr.text.strip =~ /[1-9]\.[ ]Runde|
|
42
|
+
Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
|
43
|
+
Qualifikation| # see CA Championship
|
44
|
+
Sechzehntelfinale| # see EL
|
45
|
+
Achtelfinale|
|
46
|
+
Viertelfinale|
|
47
|
+
Halbfinale|
|
48
|
+
Finale|
|
49
|
+
Gruppe[ ][A-Z]| # see CL
|
50
|
+
Playoffs # see EL Quali
|
51
|
+
/x
|
52
|
+
puts
|
53
|
+
print '[%03d] ' % i
|
54
|
+
## print squish( tr.text )
|
55
|
+
print "round >#{tr.text.strip}<"
|
56
|
+
print "\n"
|
57
|
+
|
58
|
+
last_round = tr.text.strip
|
59
|
+
else ## assume table row (tr) is match line
|
60
|
+
tds = tr.css( 'td' )
|
61
|
+
|
62
|
+
date_str = squish( tds[0].text )
|
63
|
+
time_str = squish( tds[1].text )
|
64
|
+
|
65
|
+
# was: team1_str = squish( tds[2].text )
|
66
|
+
|
67
|
+
## <td><a href="/teams/hibernian-fc/" title="Hibernian FC">Hibernian FC</a></td>
|
68
|
+
## todo/check: check if tooltip title always equals text - why? why not?
|
69
|
+
team1_anchor = tds[2].css( 'a' )[0]
|
70
|
+
if team1_anchor # note: <a> might be optional (and team name only be plain text)
|
71
|
+
team1_str = squish( team1_anchor.text )
|
72
|
+
team1_ref = norm_team_ref( team1_anchor[:href] )
|
73
|
+
else
|
74
|
+
team1_str = squish( tds[2].text )
|
75
|
+
team1_ref = nil
|
76
|
+
puts "!! WARN: no team1_ref for >#{team1_str}< found"
|
77
|
+
end
|
78
|
+
|
79
|
+
## <td> - </td>
|
80
|
+
## e.g. -
|
81
|
+
vs_str = squish( tds[3].text ) ## use to assert column!!!
|
82
|
+
assert( vs_str == '-', "- for vs. expected; got #{vs_str}")
|
83
|
+
## was: team2_str = squish( tds[4].text )
|
84
|
+
|
85
|
+
## <td><a href="/teams/st-johnstone-fc/" title="St. Johnstone FC">St. Johnstone FC</a></td>
|
86
|
+
team2_anchor = tds[4].css( 'a' )[0]
|
87
|
+
if team2_anchor
|
88
|
+
team2_str = squish( team2_anchor.text )
|
89
|
+
team2_ref = norm_team_ref( team2_anchor[:href] )
|
90
|
+
else
|
91
|
+
team2_str = squish( tds[4].text )
|
92
|
+
team2_ref = nil
|
93
|
+
puts "!! WARN: no team2_ref for >#{team2_str}< found"
|
94
|
+
end
|
95
|
+
|
96
|
+
### was: score_str = squish( tds[5].text )
|
97
|
+
## <a href="/spielbericht/premiership-2020-2021-hibernian-fc-st-johnstone-fc/" title="Spielschema Hibernian FC - St. Johnstone FC">-:-</a>
|
98
|
+
|
99
|
+
score_anchor = tds[5].css( 'a' )[0]
|
100
|
+
if score_anchor ## note: score ref (match report) is optional!!!!
|
101
|
+
score_str = squish( score_anchor.text )
|
102
|
+
score_ref = norm_score_ref( score_anchor[:href] )
|
103
|
+
else
|
104
|
+
score_str = squish( tds[5].text )
|
105
|
+
score_ref = nil
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
## todo - find a better way to check for live match
|
110
|
+
## check for live badge image
|
111
|
+
## <td>
|
112
|
+
## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
|
113
|
+
## </td>
|
114
|
+
img = tds[6].css( 'img' )[0]
|
115
|
+
if img && img[:src].index( '/live/')
|
116
|
+
puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
117
|
+
score_str = '-:-' # note: -:- gets replaced to ---
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
date_str = last_date_str if date_str.empty?
|
122
|
+
|
123
|
+
print '[%03d] ' % i
|
124
|
+
print "%-10s | " % date_str
|
125
|
+
print "%-5s | " % time_str
|
126
|
+
print "%-22s | " % team1_str
|
127
|
+
print "%-22s | " % team2_str
|
128
|
+
print "%-10s | " % score_str
|
129
|
+
print (score_ref ? score_ref : 'n/a')
|
130
|
+
print "\n"
|
131
|
+
|
132
|
+
|
133
|
+
## change 2:1 (1:1) to 2-1 (1-1)
|
134
|
+
score_str = score_str.gsub( ':', '-' )
|
135
|
+
|
136
|
+
## convert date from 25.10.2019 to 2019-25-10
|
137
|
+
date = Date.strptime( date_str, '%d.%m.%Y' )
|
138
|
+
|
139
|
+
## note: keep structure flat for now
|
140
|
+
## (AND not nested e.g. team:{text:,ref:}) - why? why not?
|
141
|
+
rows << { round: last_round,
|
142
|
+
date: date.strftime( '%Y-%m-%d' ),
|
143
|
+
time: time_str,
|
144
|
+
team1: team1_str,
|
145
|
+
team1_ref: team1_ref,
|
146
|
+
score: score_str,
|
147
|
+
team2: team2_str,
|
148
|
+
team2_ref: team2_ref,
|
149
|
+
report_ref: score_ref
|
150
|
+
}
|
151
|
+
|
152
|
+
last_date_str = date_str
|
153
|
+
end
|
154
|
+
end # each tr (table row)
|
155
|
+
|
156
|
+
rows
|
157
|
+
end
|
158
|
+
end # matches
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
def teams
|
163
|
+
@teams ||= begin
|
164
|
+
h = {}
|
165
|
+
matches.each do |match|
|
166
|
+
## index by name/text for now NOT ref - why? why not?
|
167
|
+
[{text: match[:team1],
|
168
|
+
ref: match[:team1_ref]},
|
169
|
+
{text: match[:team2],
|
170
|
+
ref: match[:team2_ref]}].each do |team|
|
171
|
+
rec = h[ team[:text] ] ||= { count: 0,
|
172
|
+
name: team[ :text],
|
173
|
+
ref: team[ :ref ] }
|
174
|
+
rec[ :count ] += 1
|
175
|
+
## todo/check: check/assert that name and ref are always equal - why? why not?
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
h.values
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def rounds
|
184
|
+
@rounds ||= begin
|
185
|
+
h = {}
|
186
|
+
matches.each do |match|
|
187
|
+
rec = h[ match[:round] ] ||= { count: 0,
|
188
|
+
name: match[ :round] }
|
189
|
+
rec[ :count ] += 1
|
190
|
+
end
|
191
|
+
|
192
|
+
h.values
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
def seasons
|
198
|
+
# <select name="saison" ...
|
199
|
+
@seasons ||= begin
|
200
|
+
recs = []
|
201
|
+
season = doc.css( 'select[name="saison"]').first
|
202
|
+
options = season.css( 'option' )
|
203
|
+
|
204
|
+
options.each do |option|
|
205
|
+
recs << { text: squish( option.text ),
|
206
|
+
ref: norm_season_ref( option[:value] )
|
207
|
+
}
|
208
|
+
end
|
209
|
+
recs
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
######
|
215
|
+
## helpers
|
216
|
+
|
217
|
+
## todo/check - rename/use HREF and not REF - why? why not?
|
218
|
+
REF_SCORE_RE = %r{^/spielbericht/
|
219
|
+
([a-z0-9_-]+)/$}x
|
220
|
+
|
221
|
+
def norm_score_ref( str )
|
222
|
+
## check ref format / path
|
223
|
+
if m=REF_SCORE_RE.match( str )
|
224
|
+
m[1]
|
225
|
+
else
|
226
|
+
puts "!! ERROR: unexpected score href format >#{str}<"
|
227
|
+
exit 1
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
REF_TEAM_RE = %r{^/teams/
|
233
|
+
([a-z0-9_-]+)/$}x
|
234
|
+
|
235
|
+
def norm_team_ref( str )
|
236
|
+
## check ref format / path
|
237
|
+
if m=REF_TEAM_RE.match( str )
|
238
|
+
m[1]
|
239
|
+
else
|
240
|
+
puts "!! ERROR: unexpected team href format >#{str}<"
|
241
|
+
exit 1
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
REF_SEASON_RE = %r{^/alle_spiele/
|
247
|
+
([a-z0-9_-]+)/$}x
|
248
|
+
|
249
|
+
def norm_season_ref( str )
|
250
|
+
## check ref format / path
|
251
|
+
if m=REF_SEASON_RE.match( str )
|
252
|
+
m[1]
|
253
|
+
else
|
254
|
+
puts "!! ERROR: unexpected season href format >#{str}<"
|
255
|
+
exit 1
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end # class Schedule
|
259
|
+
|
260
|
+
|
261
|
+
end # class Page
|
262
|
+
end # module Worldfootball
|
data/test/helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
###
|
2
|
+
# to run use
|
3
|
+
# ruby -I ./lib -I ./test test/test_version.rb
|
4
|
+
|
5
|
+
|
6
|
+
require 'helper'
|
7
|
+
|
8
|
+
class TestVersion < MiniTest::Test
|
9
|
+
|
10
|
+
def test_version
|
11
|
+
pp Webget::Module::Football::VERSION
|
12
|
+
pp Webget::Module::Football.banner
|
13
|
+
pp Webget::Module::Football.root
|
14
|
+
end
|
15
|
+
|
16
|
+
end # class TestVersion
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: webget-football
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Gerald Bauer
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-11-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdoc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '7'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '4.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '7'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: hoe
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '3.22'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3.22'
|
47
|
+
description: webget-football - get football data via web pages or web api (json) calls
|
48
|
+
email: opensport@googlegroups.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files:
|
52
|
+
- CHANGELOG.md
|
53
|
+
- Manifest.txt
|
54
|
+
- README.md
|
55
|
+
files:
|
56
|
+
- CHANGELOG.md
|
57
|
+
- Manifest.txt
|
58
|
+
- README.md
|
59
|
+
- Rakefile
|
60
|
+
- lib/webget-football.rb
|
61
|
+
- lib/webget-football/apis.rb
|
62
|
+
- lib/webget-football/apis/config.rb
|
63
|
+
- lib/webget-football/apis/download.rb
|
64
|
+
- lib/webget-football/version.rb
|
65
|
+
- lib/webget-football/worldfootball.rb
|
66
|
+
- lib/webget-football/worldfootball/config.rb
|
67
|
+
- lib/webget-football/worldfootball/download.rb
|
68
|
+
- lib/webget-football/worldfootball/page.rb
|
69
|
+
- lib/webget-football/worldfootball/page_report.rb
|
70
|
+
- lib/webget-football/worldfootball/page_schedule.rb
|
71
|
+
- lib/webget/football.rb
|
72
|
+
- test/helper.rb
|
73
|
+
- test/test_version.rb
|
74
|
+
homepage: https://github.com/sportdb/sport.db
|
75
|
+
licenses:
|
76
|
+
- Public Domain
|
77
|
+
metadata: {}
|
78
|
+
post_install_message:
|
79
|
+
rdoc_options:
|
80
|
+
- "--main"
|
81
|
+
- README.md
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 2.2.2
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.5.2
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: webget-football - get football data via web pages or web api (json) calls
|
100
|
+
test_files: []
|