webget-football 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c46b38f5ebe600370371d7032f6178fb4ae22dde
4
+ data.tar.gz: fd4ca8d6d218dc140f8c63f13f9028ace9a6eda1
5
+ SHA512:
6
+ metadata.gz: 23e896f122b3f8068ea0acb82f85e60ebaff68d231d86923778dbc8de6cd345d2914f7401f2cd28fd10635ad276504f9d53655808b3d222559455d8f1adfdaee
7
+ data.tar.gz: 69516f9db9e2a495c378759e5c9195f596fb4be30147cba24fdcf04bf7f55e28325324d07a0f304200cb093d3ef61d239db4275d6d2a51863a1d2eea3e17fe17
@@ -0,0 +1,4 @@
1
+ ### 0.0.1 / 2020-11-09
2
+
3
+ * Everything is new. First release.
4
+
@@ -0,0 +1,18 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/webget-football.rb
6
+ lib/webget-football/apis.rb
7
+ lib/webget-football/apis/config.rb
8
+ lib/webget-football/apis/download.rb
9
+ lib/webget-football/version.rb
10
+ lib/webget-football/worldfootball.rb
11
+ lib/webget-football/worldfootball/config.rb
12
+ lib/webget-football/worldfootball/download.rb
13
+ lib/webget-football/worldfootball/page.rb
14
+ lib/webget-football/worldfootball/page_report.rb
15
+ lib/webget-football/worldfootball/page_schedule.rb
16
+ lib/webget/football.rb
17
+ test/helper.rb
18
+ test/test_version.rb
@@ -0,0 +1,28 @@
1
+ # webget-football - get football data via web pages or web api (json) calls
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/webget-football](https://rubygems.org/gems/webget-football)
7
+ * rdoc :: [rubydoc.info/gems/webget-football](http://rubydoc.info/gems/webget-football)
8
+ * forum :: [groups.google.com/group/opensport](https://groups.google.com/group/opensport)
9
+
10
+
11
+ ## Usage
12
+
13
+ TBD
14
+
15
+
16
+ ## License
17
+
18
+ ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
19
+
20
+ The `webget-football` scripts are dedicated to the public domain.
21
+ Use it as you please with no restrictions whatsoever.
22
+
23
+
24
+ ## Questions? Comments?
25
+
26
+ Send them along to the
27
+ [Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
28
+ Thanks!
@@ -0,0 +1,26 @@
1
+ require 'hoe'
2
+ require './lib/webget-football/version.rb'
3
+
4
+ Hoe.spec 'webget-football' do
5
+
6
+ self.version = Webget::Module::Football::VERSION
7
+
8
+ self.summary = 'webget-football - get football data via web pages or web api (json) calls'
9
+ self.description = summary
10
+
11
+ self.urls = { home: 'https://github.com/sportdb/sport.db' }
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.licenses = ['Public Domain']
21
+
22
+ self.spec_extras = {
23
+ required_ruby_version: '>= 2.2.2'
24
+ }
25
+
26
+ end
@@ -0,0 +1,16 @@
1
+ ## 3rd party (our own)
2
+ require 'webget' ## incl. webget, webcache, webclient, etc.
3
+
4
+ ## 3rd party
5
+ require 'nokogiri'
6
+
7
+
8
+ ###
9
+ # our own code
10
+ require 'webget-football/version' # let version always go first
11
+
12
+ require 'webget-football/apis'
13
+ require 'webget-football/worldfootball'
14
+
15
+
16
+ puts Webget::Module::Football.banner # say hello
@@ -0,0 +1,10 @@
1
+ ###########################
2
+ # note: split code in two parts
3
+ # metal - "bare" basics - no ref to sportdb
4
+ # and rest / convert with sportdb references / goodies
5
+
6
+ ## our own code
7
+ require_relative 'apis/config'
8
+ require_relative 'apis/download'
9
+
10
+
@@ -0,0 +1,17 @@
1
+ module Footballdata
2
+
3
+ class Configuration
4
+ ## note: nothing here for now
5
+ end # class Configuration
6
+
7
+
8
+ ## lets you use
9
+ ## Footballdata.configure do |config|
10
+ ## config.convert.out_dir = './o'
11
+ ## end
12
+
13
+ def self.configure() yield( config ); end
14
+
15
+ def self.config() @config ||= Configuration.new; end
16
+
17
+ end # module Footballdata
@@ -0,0 +1,72 @@
1
+ module Footballdata
2
+
3
+ ## todo/check: put in Downloader namespace/class - why? why not?
4
+ ## or use Metal - no "porcelain" downloaders / machinery
5
+ class Metal
6
+ BASE_URL = 'http://api.football-data.org/v2'
7
+
8
+
9
+ def self.competitions_url( plan ) "#{BASE_URL}/competitions?plan=#{plan}"; end
10
+
11
+ ## just use matches_url - why? why not?
12
+ def self.competition_matches_url( code, year ) "#{BASE_URL}/competitions/#{code}/matches?season=#{year}"; end
13
+ def self.competition_teams_url( code, year ) "#{BASE_URL}/competitions/#{code}/teams?season=#{year}"; end
14
+
15
+
16
+
17
+ def self.competitions_tier_one
18
+ get( competitions_url( 'TIER_ONE' ))
19
+ end
20
+
21
+ def self.competitions_tier_two
22
+ get( competions_url( 'TIER_TWO' ))
23
+ end
24
+
25
+ def self.competitions_tier_three
26
+ get( competions_url( 'TIER_THREE' ))
27
+ end
28
+
29
+ def self.competition( code, year )
30
+ get( competition_matches_url( code, year ))
31
+ get( competition_teams_url( code, year ))
32
+ end
33
+
34
+
35
+ =begin
36
+ def self.matches
37
+ # note: Specified period must not exceed 10 days.
38
+
39
+ ## try query (football) week by week - tuesday to monday!!
40
+ ## note: TIER_ONE does NOT include goals!!!
41
+ code = 'FL1'
42
+ start_date = '2019-08-09'
43
+ end_date = '2019-08-16'
44
+
45
+ get( "matches?competitions=#{code}&dateFrom=#{start_date}&dateTo=#{end_date}" )
46
+ end
47
+ =end
48
+
49
+
50
+ def self.get( url )
51
+ token = ENV['FOOTBALLDATA']
52
+ ## note: because of public workflow log - do NOT output token
53
+ ## puts token
54
+
55
+ headers = {}
56
+ headers['X-Auth-Token'] = token if token
57
+ headers['User-Agent'] = 'ruby'
58
+ headers['Accept'] = '*/*'
59
+
60
+ ## note: add format: 'json' for pretty printing json (before) save in cache
61
+ response = Webget.call( url, headers: headers )
62
+
63
+ ## for debugging print pretty printed json first 400 chars
64
+ puts response.json.pretty_inspect[0..400]
65
+
66
+ exit 1 if response.status.nok? # e.g. HTTP status code != 200
67
+ end
68
+
69
+
70
+ end ## class Metal
71
+ end # module Footballdata
72
+
@@ -0,0 +1,22 @@
1
+ class Webget ## note: Webget is for now a class (NOT a module)
2
+ module Module
3
+ module Football
4
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
5
+ MINOR = 0
6
+ PATCH = 1
7
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
8
+
9
+ def self.version
10
+ VERSION
11
+ end
12
+
13
+ def self.banner
14
+ "webget-football/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
15
+ end
16
+
17
+ def self.root
18
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
19
+ end
20
+ end # module Football
21
+ end # module Module
22
+ end # class Webget
@@ -0,0 +1,12 @@
1
+ ###########################
2
+ # note: split code in two parts
3
+ # metal - "bare" basics - no ref to sportdb
4
+ # and rest / convert with sportdb references / goodies
5
+
6
+
7
+ ## our own code
8
+ require_relative 'worldfootball/config'
9
+ require_relative 'worldfootball/download'
10
+ require_relative 'worldfootball/page'
11
+ require_relative 'worldfootball/page_schedule'
12
+ require_relative 'worldfootball/page_report'
@@ -0,0 +1,17 @@
1
+ module Worldfootball
2
+
3
+
4
+ class Configuration
5
+ # nothing here for now
6
+ end # class Configuration
7
+
8
+
9
+ ## lets you use
10
+ ## Worldfootball.configure do |config|
11
+ ## config.convert.out_dir = './o'
12
+ ## end
13
+
14
+ def self.configure() yield( config ); end
15
+ def self.config() @config ||= Configuration.new; end
16
+
17
+ end # module Worldfootball
@@ -0,0 +1,89 @@
1
+
2
+
3
+ module Worldfootball
4
+
5
+
6
+
7
+
8
+ ## todo/check: put in Downloader namespace/class - why? why not?
9
+ ## or use Metal - no "porcelain" downloaders / machinery
10
+ class Metal
11
+
12
+ BASE_URL = 'https://www.weltfussball.de'
13
+
14
+
15
+ def self.schedule_url( slug ) "#{BASE_URL}/alle_spiele/#{slug}/"; end
16
+ def self.report_url( slug ) "#{BASE_URL}/spielbericht/#{slug}/"; end
17
+
18
+
19
+ def self.download_schedule( slug )
20
+ url = schedule_url( slug )
21
+ get( url )
22
+ end
23
+
24
+ def self.download_report( slug, cache: true )
25
+ url = report_url( slug )
26
+
27
+ ## check check first
28
+ if cache && Webcache.cached?( url )
29
+ puts " reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
30
+ else
31
+ get( url )
32
+ end
33
+ end
34
+
35
+
36
+
37
+
38
+
39
+ def self.download_schedule_reports( slug, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
40
+
41
+ page = Page::Schedule.from_cache( slug )
42
+ matches = page.matches
43
+
44
+ puts "matches - #{matches.size} rows:"
45
+ pp matches[0]
46
+
47
+ puts "#{page.generated_in_days_ago} - #{page.generated}"
48
+
49
+ ## todo/fix: restore sleep to old value at the end!!!!
50
+ ## Webget.config.sleep = 8 ## fetch 7-8 pages/min
51
+
52
+ matches.each_with_index do |match,i|
53
+ est = (Webget.config.sleep * (matches.size-(i+1)))/60.0 # estimated time left
54
+
55
+ puts "fetching #{i+1}/#{matches.size} (#{est} min(s)) - #{match[:round]} | #{match[:team1]} v #{match[:team2]}..."
56
+ report_ref = match[:report_ref ]
57
+ if report_ref
58
+ download_report( report_ref, cache: cache )
59
+ else
60
+ puts "!! WARN: report ref missing for match:"
61
+ pp match
62
+ end
63
+ end
64
+ end
65
+
66
+
67
+ ### add some "old" (back compat) aliases - keep - why? why not?
68
+ class << self
69
+ alias_method :schedule, :download_schedule
70
+ alias_method :report, :download_report
71
+ alias_method :schedule_reports, :download_schedule_reports
72
+ end
73
+
74
+
75
+ ##################
76
+ # helpers
77
+ def self.get( url ) ## get & record/save to cache
78
+
79
+ response = Webget.page( url ) ## fetch (and cache) html page (via HTTP GET)
80
+
81
+ ## note: exit on get / fetch error - do NOT continue for now - why? why not?
82
+ exit 1 if response.status.nok? ## e.g. HTTP status code != 200
83
+ end
84
+
85
+
86
+ end # class Metal
87
+ end # module Worldfootball
88
+
89
+
@@ -0,0 +1,106 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ def self.from_file( path )
6
+ html = File.open( path, 'r:utf-8' ) {|f| f.read }
7
+ new( html )
8
+ end
9
+
10
+ def initialize( html )
11
+ @html = html
12
+ end
13
+
14
+ def doc
15
+ ## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such)
16
+ @doc ||= Nokogiri::HTML( @html )
17
+ end
18
+
19
+ def title
20
+ # <title>Bundesliga 2010/2011 &raquo; Spielplan</title>
21
+ @title ||= doc.css( 'title' ).first
22
+ @title.text ## get element's text content
23
+ end
24
+
25
+ def keywords
26
+ # <meta name="keywords"
27
+ # content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" />
28
+ @keywords ||= doc.css( 'meta[name="keywords"]' ).first
29
+ @keywords[:content] ## get content attribute
30
+ ## or doc.xpath( '//meta[@name="keywords"]' ).first
31
+ ## pp keywords
32
+ # puts " #{keywords[:content]}"
33
+
34
+ # keywords = doc.at( 'meta[@name="Keywords"]' )
35
+ # pp keywords
36
+ ## check for
37
+ end
38
+
39
+ # <meta property="og:url"
40
+ # content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
41
+ def url
42
+ @url ||= doc.css( 'meta[property="og:url"]' ).first
43
+ @url[:content]
44
+ end
45
+
46
+
47
+
48
+ ## <!-- [generated 2020-06-30 22:30:19] -->
49
+ ## <!-- [generated 2020-06-30 22:30:19] -->
50
+ GENERATED_RE = %r{
51
+ <!--
52
+ [ ]+
53
+ \[generated
54
+ [ ]+
55
+ (?<date>\d+-\d+-\d+)
56
+ [ ]+
57
+ (?<time>\d+:\d+:\d+)
58
+ \]
59
+ [ ]+
60
+ -->
61
+ }x
62
+
63
+
64
+ def generated
65
+ @generated ||= begin
66
+ m=GENERATED_RE.match( @html )
67
+ if m
68
+ DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
69
+ else
70
+ puts "!! WARN - no generated timestamp found in page"
71
+ nil
72
+ end
73
+ end
74
+ end
75
+
76
+ ### convenience helper / formatter
77
+ def generated_in_days_ago
78
+ if generated
79
+ diff_in_days = Date.today.jd - generated.jd
80
+ "#{diff_in_days}d"
81
+ else
82
+ '?'
83
+ end
84
+ end
85
+
86
+ ######################
87
+ ## helper methods
88
+
89
+ def squish( str )
90
+ str = str.strip
91
+ str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
92
+ str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max.
93
+ str
94
+ end
95
+
96
+ def assert( cond, msg )
97
+ if cond
98
+ # do nothing
99
+ else
100
+ puts "!!! assert failed (in parse page) - #{msg}"
101
+ exit 1
102
+ end
103
+ end
104
+
105
+ end # class Page
106
+ end # module Worldfootball
@@ -0,0 +1,186 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ class Report < Page ## note: use nested class for now - why? why not?
6
+
7
+
8
+ def self.from_cache( slug )
9
+ url = Worldfootball::Metal.report_url( slug )
10
+ html = Webcache.read( url )
11
+ new( html )
12
+ end
13
+
14
+
15
+
16
+ def find_table_tore
17
+ # <table class="" cellpadding="3" cellspacing="1">
18
+ # <tr>
19
+ # <td colspan="2" class="ueberschrift" align="center">Tore</td>
20
+ # </tr>
21
+
22
+ ## get table
23
+ ## first table row is Tore
24
+ tables = doc.css( 'table.standard_tabelle' )
25
+ # puts " found #{tables.size} table.standard_tabelle" # e.g. found 6 table.standard_tabelle
26
+ tables.each do |table|
27
+ trs = table.css( 'tr' )
28
+ ## puts " found #{trs.size} trs"
29
+ tds = trs[0].css( 'td' )
30
+ ## puts " found #{tds.size} tds"
31
+
32
+ if tds.size > 0 && tds[0].text == 'Tore'
33
+ return table
34
+ end
35
+ end
36
+
37
+ nil ## nothing found; auto-report error -why? why not?
38
+ end
39
+
40
+ def goals
41
+ @goals ||= begin
42
+
43
+ # <div class="data">
44
+ # <table class="standard_tabelle" cellpadding="3" cellspacing="1">
45
+
46
+ # puts table.class.name #=> Nokogiri::XML::Element
47
+ # puts table.text
48
+
49
+ table = find_table_tore
50
+ ## pp table
51
+
52
+ trs = table.css( 'tr' )
53
+ # puts trs.size
54
+
55
+
56
+
57
+ rows = []
58
+ last_score1 = 0
59
+ last_score2 = 0
60
+
61
+ trs.each_with_index do |tr,i|
62
+
63
+ next if i==0 # skip Tore headline row
64
+
65
+ break if i==1 && tr.text.strip == 'keine' ## assume 0:0 - no goals
66
+
67
+ # <tr>
68
+ # <td class="hell" align="center" width="20%">
69
+ # <b>0 : 1</b>
70
+ # </td>
71
+ # <td class="hell" style="padding-left: 50px;">
72
+ # <a href="/spieler_profil/luis-phelipe/" title="Luis Phelipe">Luis Phelipe</a> 34. / Rechtsschuss &nbsp;(<a href="/spieler_profil/alexander-prass/" title="Alexander Prass">Alexander Prass</a>)
73
+ # </td>
74
+ # </tr>
75
+
76
+ tds = tr.css( 'td' )
77
+
78
+ score_str = squish( tds[0].text )
79
+
80
+ player_str = squish( tds[1].text )
81
+
82
+ print '[%03d] ' % i
83
+ print score_str
84
+ print " | "
85
+ print player_str
86
+ print "\n"
87
+
88
+ score_str = score_str.gsub( ':', '-' )
89
+ score_str = score_str.gsub( ' ', '' ) ## remove all white space
90
+
91
+
92
+ ### todo/fix: use new Score.split helper here
93
+ ## score1, score2 = Score.split( score_str )
94
+ parts = score_str.split('-')
95
+ score1 = parts[0].to_i
96
+ score2 = parts[1].to_i
97
+
98
+ if last_score1+1 == score1 && last_score2 == score2
99
+ team = 1
100
+ elsif last_score2+1 == score2 && last_score1 == score1
101
+ team = 2
102
+ else
103
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
104
+ puts " #{last_score1}-#{last_score2}=> #{score1}-#{score2}"
105
+ exit 1
106
+ end
107
+
108
+
109
+ last_score1 = score1
110
+ last_score2 = score2
111
+
112
+
113
+
114
+ if player_str.index('/')
115
+ parts = player_str.split('/')
116
+ # pp parts
117
+ notes = parts[1].strip
118
+
119
+ if parts[0].strip =~ /^([^0-9]+)[ ]+([0-9]+)\.$/
120
+ player_name = $1
121
+ goal_minute = $2
122
+ # puts " >#{player_name}< | >#{goal_minute}<"
123
+ else
124
+ puts "!! ERROR - unknown goal format (in part i):"
125
+ puts player_str
126
+ pp parts
127
+ exit 1
128
+ end
129
+ else # (simple line with no divider (/)
130
+ # Andrés Andrade 88.  (Nicolas Meister)
131
+ if m = %r{^([^0-9]+)
132
+ [ ]+
133
+ ([0-9]+)\.
134
+ (?:
135
+ [ ]+
136
+ (\([^)]+\))
137
+ )?
138
+ $}x.match( player_str )
139
+ player_name = m[1]
140
+ goal_minute = m[2]
141
+ notes = m[3] ? m[3] : ''
142
+ else
143
+ puts "!! ERROR - unknown goal format:"
144
+ puts player_str
145
+ exit 1
146
+ end
147
+ end
148
+
149
+
150
+ ## check for "flags" e.g. own goal or penalty
151
+ ## if found - remove from notes (use its own flag)
152
+ owngoal = false
153
+ penalty = false
154
+
155
+ if notes.index( 'Eigentor' )
156
+ owngoal = true
157
+ notes = notes.sub('Eigentor', '' ).strip
158
+ elsif notes.index( 'Elfmeter' )
159
+ ## e.g. Elfmeter  (Marco Hausjell)
160
+ penalty = true
161
+ notes = notes.sub('Elfmeter', '' ).strip
162
+ else
163
+ ## nothing - keep going
164
+ end
165
+
166
+ rec = { score: score_str,
167
+ team: team, # 1 or 2
168
+ player: player_name,
169
+ minute: goal_minute
170
+ }
171
+ rec[:owngoal] = true if owngoal
172
+ rec[:penalty] = true if penalty
173
+ rec[:notes] = notes unless notes.empty?
174
+
175
+ rows << rec
176
+ end ## each tr
177
+ rows
178
+ end
179
+ end # goals
180
+
181
+
182
+ end # class Report
183
+
184
+
185
+ end # class Page
186
+ end # module Worldfootball
@@ -0,0 +1,262 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ class Schedule < Page ## note: use nested class for now - why? why not?
6
+
7
+
8
+ def self.from_cache( slug )
9
+ url = Worldfootball::Metal.schedule_url( slug )
10
+ html = Webcache.read( url )
11
+ new( html )
12
+ end
13
+
14
+
15
+
16
+ def matches
17
+ @matches ||= begin
18
+
19
+ # <div class="data">
20
+ # <table class="standard_tabelle" cellpadding="3" cellspacing="1">
21
+
22
+ ## note: use > for "strict" sibling (child without any in-betweens)
23
+ table = doc.css( 'div.data > table.standard_tabelle' ).first ## get table
24
+ # puts table.class.name #=> Nokogiri::XML::Element
25
+ # puts table.text
26
+
27
+ trs = table.css( 'tr' )
28
+ # puts trs.size
29
+ i = 0
30
+
31
+ last_date_str = nil
32
+ last_round = nil
33
+
34
+ rows = []
35
+
36
+ trs.each do |tr|
37
+ i += 1
38
+
39
+
40
+ if tr.text.strip =~ /Spieltag/ ||
41
+ tr.text.strip =~ /[1-9]\.[ ]Runde|
42
+ Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
43
+ Qualifikation| # see CA Championship
44
+ Sechzehntelfinale| # see EL
45
+ Achtelfinale|
46
+ Viertelfinale|
47
+ Halbfinale|
48
+ Finale|
49
+ Gruppe[ ][A-Z]| # see CL
50
+ Playoffs # see EL Quali
51
+ /x
52
+ puts
53
+ print '[%03d] ' % i
54
+ ## print squish( tr.text )
55
+ print "round >#{tr.text.strip}<"
56
+ print "\n"
57
+
58
+ last_round = tr.text.strip
59
+ else ## assume table row (tr) is match line
60
+ tds = tr.css( 'td' )
61
+
62
+ date_str = squish( tds[0].text )
63
+ time_str = squish( tds[1].text )
64
+
65
+ # was: team1_str = squish( tds[2].text )
66
+
67
+ ## <td><a href="/teams/hibernian-fc/" title="Hibernian FC">Hibernian FC</a></td>
68
+ ## todo/check: check if tooltip title always equals text - why? why not?
69
+ team1_anchor = tds[2].css( 'a' )[0]
70
+ if team1_anchor # note: <a> might be optional (and team name only be plain text)
71
+ team1_str = squish( team1_anchor.text )
72
+ team1_ref = norm_team_ref( team1_anchor[:href] )
73
+ else
74
+ team1_str = squish( tds[2].text )
75
+ team1_ref = nil
76
+ puts "!! WARN: no team1_ref for >#{team1_str}< found"
77
+ end
78
+
79
+ ## <td> - </td>
80
+ ## e.g. -
81
+ vs_str = squish( tds[3].text ) ## use to assert column!!!
82
+ assert( vs_str == '-', "- for vs. expected; got #{vs_str}")
83
+ ## was: team2_str = squish( tds[4].text )
84
+
85
+ ## <td><a href="/teams/st-johnstone-fc/" title="St. Johnstone FC">St. Johnstone FC</a></td>
86
+ team2_anchor = tds[4].css( 'a' )[0]
87
+ if team2_anchor
88
+ team2_str = squish( team2_anchor.text )
89
+ team2_ref = norm_team_ref( team2_anchor[:href] )
90
+ else
91
+ team2_str = squish( tds[4].text )
92
+ team2_ref = nil
93
+ puts "!! WARN: no team2_ref for >#{team2_str}< found"
94
+ end
95
+
96
+ ### was: score_str = squish( tds[5].text )
97
+ ## <a href="/spielbericht/premiership-2020-2021-hibernian-fc-st-johnstone-fc/" title="Spielschema Hibernian FC - St. Johnstone FC">-:-</a>
98
+
99
+ score_anchor = tds[5].css( 'a' )[0]
100
+ if score_anchor ## note: score ref (match report) is optional!!!!
101
+ score_str = squish( score_anchor.text )
102
+ score_ref = norm_score_ref( score_anchor[:href] )
103
+ else
104
+ score_str = squish( tds[5].text )
105
+ score_ref = nil
106
+ end
107
+
108
+
109
+ ## todo - find a better way to check for live match
110
+ ## check for live badge image
111
+ ## <td>
112
+ ## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
113
+ ## </td>
114
+ img = tds[6].css( 'img' )[0]
115
+ if img && img[:src].index( '/live/')
116
+ puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
117
+ score_str = '-:-' # note: -:- gets replaced to ---
118
+ end
119
+
120
+
121
+ date_str = last_date_str if date_str.empty?
122
+
123
+ print '[%03d] ' % i
124
+ print "%-10s | " % date_str
125
+ print "%-5s | " % time_str
126
+ print "%-22s | " % team1_str
127
+ print "%-22s | " % team2_str
128
+ print "%-10s | " % score_str
129
+ print (score_ref ? score_ref : 'n/a')
130
+ print "\n"
131
+
132
+
133
+ ## change 2:1 (1:1) to 2-1 (1-1)
134
+ score_str = score_str.gsub( ':', '-' )
135
+
136
+ ## convert date from 25.10.2019 to 2019-25-10
137
+ date = Date.strptime( date_str, '%d.%m.%Y' )
138
+
139
+ ## note: keep structure flat for now
140
+ ## (AND not nested e.g. team:{text:,ref:}) - why? why not?
141
+ rows << { round: last_round,
142
+ date: date.strftime( '%Y-%m-%d' ),
143
+ time: time_str,
144
+ team1: team1_str,
145
+ team1_ref: team1_ref,
146
+ score: score_str,
147
+ team2: team2_str,
148
+ team2_ref: team2_ref,
149
+ report_ref: score_ref
150
+ }
151
+
152
+ last_date_str = date_str
153
+ end
154
+ end # each tr (table row)
155
+
156
+ rows
157
+ end
158
+ end # matches
159
+
160
+
161
+
162
+ def teams
163
+ @teams ||= begin
164
+ h = {}
165
+ matches.each do |match|
166
+ ## index by name/text for now NOT ref - why? why not?
167
+ [{text: match[:team1],
168
+ ref: match[:team1_ref]},
169
+ {text: match[:team2],
170
+ ref: match[:team2_ref]}].each do |team|
171
+ rec = h[ team[:text] ] ||= { count: 0,
172
+ name: team[ :text],
173
+ ref: team[ :ref ] }
174
+ rec[ :count ] += 1
175
+ ## todo/check: check/assert that name and ref are always equal - why? why not?
176
+ end
177
+ end
178
+
179
+ h.values
180
+ end
181
+ end
182
+
183
+ def rounds
184
+ @rounds ||= begin
185
+ h = {}
186
+ matches.each do |match|
187
+ rec = h[ match[:round] ] ||= { count: 0,
188
+ name: match[ :round] }
189
+ rec[ :count ] += 1
190
+ end
191
+
192
+ h.values
193
+ end
194
+ end
195
+
196
+
197
+ def seasons
198
+ # <select name="saison" ...
199
+ @seasons ||= begin
200
+ recs = []
201
+ season = doc.css( 'select[name="saison"]').first
202
+ options = season.css( 'option' )
203
+
204
+ options.each do |option|
205
+ recs << { text: squish( option.text ),
206
+ ref: norm_season_ref( option[:value] )
207
+ }
208
+ end
209
+ recs
210
+ end
211
+ end
212
+
213
+
214
+ ######
215
+ ## helpers
216
+
217
+ ## todo/check - rename/use HREF and not REF - why? why not?
218
+ REF_SCORE_RE = %r{^/spielbericht/
219
+ ([a-z0-9_-]+)/$}x
220
+
221
+ def norm_score_ref( str )
222
+ ## check ref format / path
223
+ if m=REF_SCORE_RE.match( str )
224
+ m[1]
225
+ else
226
+ puts "!! ERROR: unexpected score href format >#{str}<"
227
+ exit 1
228
+ end
229
+ end
230
+
231
+
232
+ REF_TEAM_RE = %r{^/teams/
233
+ ([a-z0-9_-]+)/$}x
234
+
235
+ def norm_team_ref( str )
236
+ ## check ref format / path
237
+ if m=REF_TEAM_RE.match( str )
238
+ m[1]
239
+ else
240
+ puts "!! ERROR: unexpected team href format >#{str}<"
241
+ exit 1
242
+ end
243
+ end
244
+
245
+
246
+ REF_SEASON_RE = %r{^/alle_spiele/
247
+ ([a-z0-9_-]+)/$}x
248
+
249
+ def norm_season_ref( str )
250
+ ## check ref format / path
251
+ if m=REF_SEASON_RE.match( str )
252
+ m[1]
253
+ else
254
+ puts "!! ERROR: unexpected season href format >#{str}<"
255
+ exit 1
256
+ end
257
+ end
258
+ end # class Schedule
259
+
260
+
261
+ end # class Page
262
+ end # module Worldfootball
@@ -0,0 +1,4 @@
1
+ # note: allow require 'webget/football' too
2
+ # (in addition to require 'webget-football')
3
+
4
+ require_relative '../webget-football'
@@ -0,0 +1,6 @@
1
+ require 'minitest/autorun'
2
+
3
+
4
+ ## our own code
5
+ require 'webget/football' ## or require 'webget-football'
6
+
@@ -0,0 +1,16 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_version.rb
4
+
5
+
6
+ require 'helper'
7
+
8
+ class TestVersion < MiniTest::Test
9
+
10
+ def test_version
11
+ pp Webget::Module::Football::VERSION
12
+ pp Webget::Module::Football.banner
13
+ pp Webget::Module::Football.root
14
+ end
15
+
16
+ end # class TestVersion
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webget-football
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rdoc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '7'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '7'
33
+ - !ruby/object:Gem::Dependency
34
+ name: hoe
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.22'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.22'
47
+ description: webget-football - get football data via web pages or web api (json) calls
48
+ email: opensport@googlegroups.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files:
52
+ - CHANGELOG.md
53
+ - Manifest.txt
54
+ - README.md
55
+ files:
56
+ - CHANGELOG.md
57
+ - Manifest.txt
58
+ - README.md
59
+ - Rakefile
60
+ - lib/webget-football.rb
61
+ - lib/webget-football/apis.rb
62
+ - lib/webget-football/apis/config.rb
63
+ - lib/webget-football/apis/download.rb
64
+ - lib/webget-football/version.rb
65
+ - lib/webget-football/worldfootball.rb
66
+ - lib/webget-football/worldfootball/config.rb
67
+ - lib/webget-football/worldfootball/download.rb
68
+ - lib/webget-football/worldfootball/page.rb
69
+ - lib/webget-football/worldfootball/page_report.rb
70
+ - lib/webget-football/worldfootball/page_schedule.rb
71
+ - lib/webget/football.rb
72
+ - test/helper.rb
73
+ - test/test_version.rb
74
+ homepage: https://github.com/sportdb/sport.db
75
+ licenses:
76
+ - Public Domain
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options:
80
+ - "--main"
81
+ - README.md
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 2.2.2
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.5.2
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: webget-football - get football data via web pages or web api (json) calls
100
+ test_files: []