webget-football 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c46b38f5ebe600370371d7032f6178fb4ae22dde
4
+ data.tar.gz: fd4ca8d6d218dc140f8c63f13f9028ace9a6eda1
5
+ SHA512:
6
+ metadata.gz: 23e896f122b3f8068ea0acb82f85e60ebaff68d231d86923778dbc8de6cd345d2914f7401f2cd28fd10635ad276504f9d53655808b3d222559455d8f1adfdaee
7
+ data.tar.gz: 69516f9db9e2a495c378759e5c9195f596fb4be30147cba24fdcf04bf7f55e28325324d07a0f304200cb093d3ef61d239db4275d6d2a51863a1d2eea3e17fe17
@@ -0,0 +1,4 @@
1
+ ### 0.0.1 / 2020-11-09
2
+
3
+ * Everything is new. First release.
4
+
@@ -0,0 +1,18 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ lib/webget-football.rb
6
+ lib/webget-football/apis.rb
7
+ lib/webget-football/apis/config.rb
8
+ lib/webget-football/apis/download.rb
9
+ lib/webget-football/version.rb
10
+ lib/webget-football/worldfootball.rb
11
+ lib/webget-football/worldfootball/config.rb
12
+ lib/webget-football/worldfootball/download.rb
13
+ lib/webget-football/worldfootball/page.rb
14
+ lib/webget-football/worldfootball/page_report.rb
15
+ lib/webget-football/worldfootball/page_schedule.rb
16
+ lib/webget/football.rb
17
+ test/helper.rb
18
+ test/test_version.rb
@@ -0,0 +1,28 @@
1
+ # webget-football - get football data via web pages or web api (json) calls
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/webget-football](https://rubygems.org/gems/webget-football)
7
+ * rdoc :: [rubydoc.info/gems/webget-football](http://rubydoc.info/gems/webget-football)
8
+ * forum :: [groups.google.com/group/opensport](https://groups.google.com/group/opensport)
9
+
10
+
11
+ ## Usage
12
+
13
+ TBD
14
+
15
+
16
+ ## License
17
+
18
+ ![](https://publicdomainworks.github.io/buttons/zero88x31.png)
19
+
20
+ The `webget-football` scripts are dedicated to the public domain.
21
+ Use it as you please with no restrictions whatsoever.
22
+
23
+
24
+ ## Questions? Comments?
25
+
26
+ Send them along to the
27
+ [Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
28
+ Thanks!
@@ -0,0 +1,26 @@
1
+ require 'hoe'
2
+ require './lib/webget-football/version.rb'
3
+
4
+ Hoe.spec 'webget-football' do
5
+
6
+ self.version = Webget::Module::Football::VERSION
7
+
8
+ self.summary = 'webget-football - get football data via web pages or web api (json) calls'
9
+ self.description = summary
10
+
11
+ self.urls = { home: 'https://github.com/sportdb/sport.db' }
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'opensport@googlegroups.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.licenses = ['Public Domain']
21
+
22
+ self.spec_extras = {
23
+ required_ruby_version: '>= 2.2.2'
24
+ }
25
+
26
+ end
@@ -0,0 +1,16 @@
1
+ ## 3rd party (our own)
2
+ require 'webget' ## incl. webget, webcache, webclient, etc.
3
+
4
+ ## 3rd party
5
+ require 'nokogiri'
6
+
7
+
8
+ ###
9
+ # our own code
10
+ require 'webget-football/version' # let version always go first
11
+
12
+ require 'webget-football/apis'
13
+ require 'webget-football/worldfootball'
14
+
15
+
16
+ puts Webget::Module::Football.banner # say hello
@@ -0,0 +1,10 @@
1
+ ###########################
2
+ # note: split code in two parts
3
+ # metal - "bare" basics - no ref to sportdb
4
+ # and rest / convert with sportdb references / goodies
5
+
6
+ ## our own code
7
+ require_relative 'apis/config'
8
+ require_relative 'apis/download'
9
+
10
+
@@ -0,0 +1,17 @@
1
+ module Footballdata
2
+
3
+ class Configuration
4
+ ## note: nothing here for now
5
+ end # class Configuration
6
+
7
+
8
+ ## lets you use
9
+ ## Footballdata.configure do |config|
10
+ ## config.convert.out_dir = './o'
11
+ ## end
12
+
13
+ def self.configure() yield( config ); end
14
+
15
+ def self.config() @config ||= Configuration.new; end
16
+
17
+ end # module Footballdata
@@ -0,0 +1,72 @@
1
+ module Footballdata
2
+
3
+ ## todo/check: put in Downloader namespace/class - why? why not?
4
+ ## or use Metal - no "porcelain" downloaders / machinery
5
+ class Metal
6
+ BASE_URL = 'http://api.football-data.org/v2'
7
+
8
+
9
+ def self.competitions_url( plan ) "#{BASE_URL}/competitions?plan=#{plan}"; end
10
+
11
+ ## just use matches_url - why? why not?
12
+ def self.competition_matches_url( code, year ) "#{BASE_URL}/competitions/#{code}/matches?season=#{year}"; end
13
+ def self.competition_teams_url( code, year ) "#{BASE_URL}/competitions/#{code}/teams?season=#{year}"; end
14
+
15
+
16
+
17
+ def self.competitions_tier_one
18
+ get( competitions_url( 'TIER_ONE' ))
19
+ end
20
+
21
+ def self.competitions_tier_two
22
+ get( competions_url( 'TIER_TWO' ))
23
+ end
24
+
25
+ def self.competitions_tier_three
26
+ get( competions_url( 'TIER_THREE' ))
27
+ end
28
+
29
+ def self.competition( code, year )
30
+ get( competition_matches_url( code, year ))
31
+ get( competition_teams_url( code, year ))
32
+ end
33
+
34
+
35
+ =begin
36
+ def self.matches
37
+ # note: Specified period must not exceed 10 days.
38
+
39
+ ## try query (football) week by week - tuesday to monday!!
40
+ ## note: TIER_ONE does NOT include goals!!!
41
+ code = 'FL1'
42
+ start_date = '2019-08-09'
43
+ end_date = '2019-08-16'
44
+
45
+ get( "matches?competitions=#{code}&dateFrom=#{start_date}&dateTo=#{end_date}" )
46
+ end
47
+ =end
48
+
49
+
50
+ def self.get( url )
51
+ token = ENV['FOOTBALLDATA']
52
+ ## note: because of public workflow log - do NOT output token
53
+ ## puts token
54
+
55
+ headers = {}
56
+ headers['X-Auth-Token'] = token if token
57
+ headers['User-Agent'] = 'ruby'
58
+ headers['Accept'] = '*/*'
59
+
60
+ ## note: add format: 'json' for pretty printing json (before) save in cache
61
+ response = Webget.call( url, headers: headers )
62
+
63
+ ## for debugging print pretty printed json first 400 chars
64
+ puts response.json.pretty_inspect[0..400]
65
+
66
+ exit 1 if response.status.nok? # e.g. HTTP status code != 200
67
+ end
68
+
69
+
70
+ end ## class Metal
71
+ end # module Footballdata
72
+
@@ -0,0 +1,22 @@
1
+ class Webget ## note: Webget is for now a class (NOT a module)
2
+ module Module
3
+ module Football
4
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
5
+ MINOR = 0
6
+ PATCH = 1
7
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
8
+
9
+ def self.version
10
+ VERSION
11
+ end
12
+
13
+ def self.banner
14
+ "webget-football/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
15
+ end
16
+
17
+ def self.root
18
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
19
+ end
20
+ end # module Football
21
+ end # module Module
22
+ end # class Webget
@@ -0,0 +1,12 @@
1
+ ###########################
2
+ # note: split code in two parts
3
+ # metal - "bare" basics - no ref to sportdb
4
+ # and rest / convert with sportdb references / goodies
5
+
6
+
7
+ ## our own code
8
+ require_relative 'worldfootball/config'
9
+ require_relative 'worldfootball/download'
10
+ require_relative 'worldfootball/page'
11
+ require_relative 'worldfootball/page_schedule'
12
+ require_relative 'worldfootball/page_report'
@@ -0,0 +1,17 @@
1
+ module Worldfootball
2
+
3
+
4
+ class Configuration
5
+ # nothing here for now
6
+ end # class Configuration
7
+
8
+
9
+ ## lets you use
10
+ ## Worldfootball.configure do |config|
11
+ ## config.convert.out_dir = './o'
12
+ ## end
13
+
14
+ def self.configure() yield( config ); end
15
+ def self.config() @config ||= Configuration.new; end
16
+
17
+ end # module Worldfootball
@@ -0,0 +1,89 @@
1
+
2
+
3
+ module Worldfootball
4
+
5
+
6
+
7
+
8
+ ## todo/check: put in Downloader namespace/class - why? why not?
9
+ ## or use Metal - no "porcelain" downloaders / machinery
10
+ class Metal
11
+
12
+ BASE_URL = 'https://www.weltfussball.de'
13
+
14
+
15
+ def self.schedule_url( slug ) "#{BASE_URL}/alle_spiele/#{slug}/"; end
16
+ def self.report_url( slug ) "#{BASE_URL}/spielbericht/#{slug}/"; end
17
+
18
+
19
+ def self.download_schedule( slug )
20
+ url = schedule_url( slug )
21
+ get( url )
22
+ end
23
+
24
+ def self.download_report( slug, cache: true )
25
+ url = report_url( slug )
26
+
27
+ ## check check first
28
+ if cache && Webcache.cached?( url )
29
+ puts " reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
30
+ else
31
+ get( url )
32
+ end
33
+ end
34
+
35
+
36
+
37
+
38
+
39
+ def self.download_schedule_reports( slug, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
40
+
41
+ page = Page::Schedule.from_cache( slug )
42
+ matches = page.matches
43
+
44
+ puts "matches - #{matches.size} rows:"
45
+ pp matches[0]
46
+
47
+ puts "#{page.generated_in_days_ago} - #{page.generated}"
48
+
49
+ ## todo/fix: restore sleep to old value at the end!!!!
50
+ ## Webget.config.sleep = 8 ## fetch 7-8 pages/min
51
+
52
+ matches.each_with_index do |match,i|
53
+ est = (Webget.config.sleep * (matches.size-(i+1)))/60.0 # estimated time left
54
+
55
+ puts "fetching #{i+1}/#{matches.size} (#{est} min(s)) - #{match[:round]} | #{match[:team1]} v #{match[:team2]}..."
56
+ report_ref = match[:report_ref ]
57
+ if report_ref
58
+ download_report( report_ref, cache: cache )
59
+ else
60
+ puts "!! WARN: report ref missing for match:"
61
+ pp match
62
+ end
63
+ end
64
+ end
65
+
66
+
67
+ ### add some "old" (back compat) aliases - keep - why? why not?
68
+ class << self
69
+ alias_method :schedule, :download_schedule
70
+ alias_method :report, :download_report
71
+ alias_method :schedule_reports, :download_schedule_reports
72
+ end
73
+
74
+
75
+ ##################
76
+ # helpers
77
+ def self.get( url ) ## get & record/save to cache
78
+
79
+ response = Webget.page( url ) ## fetch (and cache) html page (via HTTP GET)
80
+
81
+ ## note: exit on get / fetch error - do NOT continue for now - why? why not?
82
+ exit 1 if response.status.nok? ## e.g. HTTP status code != 200
83
+ end
84
+
85
+
86
+ end # class Metal
87
+ end # module Worldfootball
88
+
89
+
@@ -0,0 +1,106 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ def self.from_file( path )
6
+ html = File.open( path, 'r:utf-8' ) {|f| f.read }
7
+ new( html )
8
+ end
9
+
10
+ def initialize( html )
11
+ @html = html
12
+ end
13
+
14
+ def doc
15
+ ## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such)
16
+ @doc ||= Nokogiri::HTML( @html )
17
+ end
18
+
19
+ def title
20
+ # <title>Bundesliga 2010/2011 &raquo; Spielplan</title>
21
+ @title ||= doc.css( 'title' ).first
22
+ @title.text ## get element's text content
23
+ end
24
+
25
+ def keywords
26
+ # <meta name="keywords"
27
+ # content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" />
28
+ @keywords ||= doc.css( 'meta[name="keywords"]' ).first
29
+ @keywords[:content] ## get content attribute
30
+ ## or doc.xpath( '//meta[@name="keywords"]' ).first
31
+ ## pp keywords
32
+ # puts " #{keywords[:content]}"
33
+
34
+ # keywords = doc.at( 'meta[@name="Keywords"]' )
35
+ # pp keywords
36
+ ## check for
37
+ end
38
+
39
+ # <meta property="og:url"
40
+ # content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
41
+ def url
42
+ @url ||= doc.css( 'meta[property="og:url"]' ).first
43
+ @url[:content]
44
+ end
45
+
46
+
47
+
48
+ ## <!-- [generated 2020-06-30 22:30:19] -->
49
+ ## <!-- [generated 2020-06-30 22:30:19] -->
50
+ GENERATED_RE = %r{
51
+ <!--
52
+ [ ]+
53
+ \[generated
54
+ [ ]+
55
+ (?<date>\d+-\d+-\d+)
56
+ [ ]+
57
+ (?<time>\d+:\d+:\d+)
58
+ \]
59
+ [ ]+
60
+ -->
61
+ }x
62
+
63
+
64
+ def generated
65
+ @generated ||= begin
66
+ m=GENERATED_RE.match( @html )
67
+ if m
68
+ DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
69
+ else
70
+ puts "!! WARN - no generated timestamp found in page"
71
+ nil
72
+ end
73
+ end
74
+ end
75
+
76
+ ### convenience helper / formatter
77
+ def generated_in_days_ago
78
+ if generated
79
+ diff_in_days = Date.today.jd - generated.jd
80
+ "#{diff_in_days}d"
81
+ else
82
+ '?'
83
+ end
84
+ end
85
+
86
+ ######################
87
+ ## helper methods
88
+
89
+ def squish( str )
90
+ str = str.strip
91
+ str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
92
+ str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max.
93
+ str
94
+ end
95
+
96
+ def assert( cond, msg )
97
+ if cond
98
+ # do nothing
99
+ else
100
+ puts "!!! assert failed (in parse page) - #{msg}"
101
+ exit 1
102
+ end
103
+ end
104
+
105
+ end # class Page
106
+ end # module Worldfootball
@@ -0,0 +1,186 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ class Report < Page ## note: use nested class for now - why? why not?
6
+
7
+
8
+ def self.from_cache( slug )
9
+ url = Worldfootball::Metal.report_url( slug )
10
+ html = Webcache.read( url )
11
+ new( html )
12
+ end
13
+
14
+
15
+
16
+ def find_table_tore
17
+ # <table class="" cellpadding="3" cellspacing="1">
18
+ # <tr>
19
+ # <td colspan="2" class="ueberschrift" align="center">Tore</td>
20
+ # </tr>
21
+
22
+ ## get table
23
+ ## first table row is Tore
24
+ tables = doc.css( 'table.standard_tabelle' )
25
+ # puts " found #{tables.size} table.standard_tabelle" # e.g. found 6 table.standard_tabelle
26
+ tables.each do |table|
27
+ trs = table.css( 'tr' )
28
+ ## puts " found #{trs.size} trs"
29
+ tds = trs[0].css( 'td' )
30
+ ## puts " found #{tds.size} tds"
31
+
32
+ if tds.size > 0 && tds[0].text == 'Tore'
33
+ return table
34
+ end
35
+ end
36
+
37
+ nil ## nothing found; auto-report error -why? why not?
38
+ end
39
+
40
+ def goals
41
+ @goals ||= begin
42
+
43
+ # <div class="data">
44
+ # <table class="standard_tabelle" cellpadding="3" cellspacing="1">
45
+
46
+ # puts table.class.name #=> Nokogiri::XML::Element
47
+ # puts table.text
48
+
49
+ table = find_table_tore
50
+ ## pp table
51
+
52
+ trs = table.css( 'tr' )
53
+ # puts trs.size
54
+
55
+
56
+
57
+ rows = []
58
+ last_score1 = 0
59
+ last_score2 = 0
60
+
61
+ trs.each_with_index do |tr,i|
62
+
63
+ next if i==0 # skip Tore headline row
64
+
65
+ break if i==1 && tr.text.strip == 'keine' ## assume 0:0 - no goals
66
+
67
+ # <tr>
68
+ # <td class="hell" align="center" width="20%">
69
+ # <b>0 : 1</b>
70
+ # </td>
71
+ # <td class="hell" style="padding-left: 50px;">
72
+ # <a href="/spieler_profil/luis-phelipe/" title="Luis Phelipe">Luis Phelipe</a> 34. / Rechtsschuss &nbsp;(<a href="/spieler_profil/alexander-prass/" title="Alexander Prass">Alexander Prass</a>)
73
+ # </td>
74
+ # </tr>
75
+
76
+ tds = tr.css( 'td' )
77
+
78
+ score_str = squish( tds[0].text )
79
+
80
+ player_str = squish( tds[1].text )
81
+
82
+ print '[%03d] ' % i
83
+ print score_str
84
+ print " | "
85
+ print player_str
86
+ print "\n"
87
+
88
+ score_str = score_str.gsub( ':', '-' )
89
+ score_str = score_str.gsub( ' ', '' ) ## remove all white space
90
+
91
+
92
+ ### todo/fix: use new Score.split helper here
93
+ ## score1, score2 = Score.split( score_str )
94
+ parts = score_str.split('-')
95
+ score1 = parts[0].to_i
96
+ score2 = parts[1].to_i
97
+
98
+ if last_score1+1 == score1 && last_score2 == score2
99
+ team = 1
100
+ elsif last_score2+1 == score2 && last_score1 == score1
101
+ team = 2
102
+ else
103
+ puts "!! ERROR - unexpected score advance (one goal at a time expected):"
104
+ puts " #{last_score1}-#{last_score2}=> #{score1}-#{score2}"
105
+ exit 1
106
+ end
107
+
108
+
109
+ last_score1 = score1
110
+ last_score2 = score2
111
+
112
+
113
+
114
+ if player_str.index('/')
115
+ parts = player_str.split('/')
116
+ # pp parts
117
+ notes = parts[1].strip
118
+
119
+ if parts[0].strip =~ /^([^0-9]+)[ ]+([0-9]+)\.$/
120
+ player_name = $1
121
+ goal_minute = $2
122
+ # puts " >#{player_name}< | >#{goal_minute}<"
123
+ else
124
+ puts "!! ERROR - unknown goal format (in part i):"
125
+ puts player_str
126
+ pp parts
127
+ exit 1
128
+ end
129
+ else # (simple line with no divider (/)
130
+ # Andrés Andrade 88.  (Nicolas Meister)
131
+ if m = %r{^([^0-9]+)
132
+ [ ]+
133
+ ([0-9]+)\.
134
+ (?:
135
+ [ ]+
136
+ (\([^)]+\))
137
+ )?
138
+ $}x.match( player_str )
139
+ player_name = m[1]
140
+ goal_minute = m[2]
141
+ notes = m[3] ? m[3] : ''
142
+ else
143
+ puts "!! ERROR - unknown goal format:"
144
+ puts player_str
145
+ exit 1
146
+ end
147
+ end
148
+
149
+
150
+ ## check for "flags" e.g. own goal or penalty
151
+ ## if found - remove from notes (use its own flag)
152
+ owngoal = false
153
+ penalty = false
154
+
155
+ if notes.index( 'Eigentor' )
156
+ owngoal = true
157
+ notes = notes.sub('Eigentor', '' ).strip
158
+ elsif notes.index( 'Elfmeter' )
159
+ ## e.g. Elfmeter  (Marco Hausjell)
160
+ penalty = true
161
+ notes = notes.sub('Elfmeter', '' ).strip
162
+ else
163
+ ## nothing - keep going
164
+ end
165
+
166
+ rec = { score: score_str,
167
+ team: team, # 1 or 2
168
+ player: player_name,
169
+ minute: goal_minute
170
+ }
171
+ rec[:owngoal] = true if owngoal
172
+ rec[:penalty] = true if penalty
173
+ rec[:notes] = notes unless notes.empty?
174
+
175
+ rows << rec
176
+ end ## each tr
177
+ rows
178
+ end
179
+ end # goals
180
+
181
+
182
+ end # class Report
183
+
184
+
185
+ end # class Page
186
+ end # module Worldfootball
@@ -0,0 +1,262 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ class Schedule < Page ## note: use nested class for now - why? why not?
6
+
7
+
8
+ def self.from_cache( slug )
9
+ url = Worldfootball::Metal.schedule_url( slug )
10
+ html = Webcache.read( url )
11
+ new( html )
12
+ end
13
+
14
+
15
+
16
+ def matches
17
+ @matches ||= begin
18
+
19
+ # <div class="data">
20
+ # <table class="standard_tabelle" cellpadding="3" cellspacing="1">
21
+
22
+ ## note: use > for "strict" sibling (child without any in-betweens)
23
+ table = doc.css( 'div.data > table.standard_tabelle' ).first ## get table
24
+ # puts table.class.name #=> Nokogiri::XML::Element
25
+ # puts table.text
26
+
27
+ trs = table.css( 'tr' )
28
+ # puts trs.size
29
+ i = 0
30
+
31
+ last_date_str = nil
32
+ last_round = nil
33
+
34
+ rows = []
35
+
36
+ trs.each do |tr|
37
+ i += 1
38
+
39
+
40
+ if tr.text.strip =~ /Spieltag/ ||
41
+ tr.text.strip =~ /[1-9]\.[ ]Runde|
42
+ Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
43
+ Qualifikation| # see CA Championship
44
+ Sechzehntelfinale| # see EL
45
+ Achtelfinale|
46
+ Viertelfinale|
47
+ Halbfinale|
48
+ Finale|
49
+ Gruppe[ ][A-Z]| # see CL
50
+ Playoffs # see EL Quali
51
+ /x
52
+ puts
53
+ print '[%03d] ' % i
54
+ ## print squish( tr.text )
55
+ print "round >#{tr.text.strip}<"
56
+ print "\n"
57
+
58
+ last_round = tr.text.strip
59
+ else ## assume table row (tr) is match line
60
+ tds = tr.css( 'td' )
61
+
62
+ date_str = squish( tds[0].text )
63
+ time_str = squish( tds[1].text )
64
+
65
+ # was: team1_str = squish( tds[2].text )
66
+
67
+ ## <td><a href="/teams/hibernian-fc/" title="Hibernian FC">Hibernian FC</a></td>
68
+ ## todo/check: check if tooltip title always equals text - why? why not?
69
+ team1_anchor = tds[2].css( 'a' )[0]
70
+ if team1_anchor # note: <a> might be optional (and team name only be plain text)
71
+ team1_str = squish( team1_anchor.text )
72
+ team1_ref = norm_team_ref( team1_anchor[:href] )
73
+ else
74
+ team1_str = squish( tds[2].text )
75
+ team1_ref = nil
76
+ puts "!! WARN: no team1_ref for >#{team1_str}< found"
77
+ end
78
+
79
+ ## <td> - </td>
80
+ ## e.g. -
81
+ vs_str = squish( tds[3].text ) ## use to assert column!!!
82
+ assert( vs_str == '-', "- for vs. expected; got #{vs_str}")
83
+ ## was: team2_str = squish( tds[4].text )
84
+
85
+ ## <td><a href="/teams/st-johnstone-fc/" title="St. Johnstone FC">St. Johnstone FC</a></td>
86
+ team2_anchor = tds[4].css( 'a' )[0]
87
+ if team2_anchor
88
+ team2_str = squish( team2_anchor.text )
89
+ team2_ref = norm_team_ref( team2_anchor[:href] )
90
+ else
91
+ team2_str = squish( tds[4].text )
92
+ team2_ref = nil
93
+ puts "!! WARN: no team2_ref for >#{team2_str}< found"
94
+ end
95
+
96
+ ### was: score_str = squish( tds[5].text )
97
+ ## <a href="/spielbericht/premiership-2020-2021-hibernian-fc-st-johnstone-fc/" title="Spielschema Hibernian FC - St. Johnstone FC">-:-</a>
98
+
99
+ score_anchor = tds[5].css( 'a' )[0]
100
+ if score_anchor ## note: score ref (match report) is optional!!!!
101
+ score_str = squish( score_anchor.text )
102
+ score_ref = norm_score_ref( score_anchor[:href] )
103
+ else
104
+ score_str = squish( tds[5].text )
105
+ score_ref = nil
106
+ end
107
+
108
+
109
+ ## todo - find a better way to check for live match
110
+ ## check for live badge image
111
+ ## <td>
112
+ ## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
113
+ ## </td>
114
+ img = tds[6].css( 'img' )[0]
115
+ if img && img[:src].index( '/live/')
116
+ puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
117
+ score_str = '-:-' # note: -:- gets replaced to ---
118
+ end
119
+
120
+
121
+ date_str = last_date_str if date_str.empty?
122
+
123
+ print '[%03d] ' % i
124
+ print "%-10s | " % date_str
125
+ print "%-5s | " % time_str
126
+ print "%-22s | " % team1_str
127
+ print "%-22s | " % team2_str
128
+ print "%-10s | " % score_str
129
+ print (score_ref ? score_ref : 'n/a')
130
+ print "\n"
131
+
132
+
133
+ ## change 2:1 (1:1) to 2-1 (1-1)
134
+ score_str = score_str.gsub( ':', '-' )
135
+
136
+ ## convert date from 25.10.2019 to 2019-25-10
137
+ date = Date.strptime( date_str, '%d.%m.%Y' )
138
+
139
+ ## note: keep structure flat for now
140
+ ## (AND not nested e.g. team:{text:,ref:}) - why? why not?
141
+ rows << { round: last_round,
142
+ date: date.strftime( '%Y-%m-%d' ),
143
+ time: time_str,
144
+ team1: team1_str,
145
+ team1_ref: team1_ref,
146
+ score: score_str,
147
+ team2: team2_str,
148
+ team2_ref: team2_ref,
149
+ report_ref: score_ref
150
+ }
151
+
152
+ last_date_str = date_str
153
+ end
154
+ end # each tr (table row)
155
+
156
+ rows
157
+ end
158
+ end # matches
159
+
160
+
161
+
162
+ def teams
163
+ @teams ||= begin
164
+ h = {}
165
+ matches.each do |match|
166
+ ## index by name/text for now NOT ref - why? why not?
167
+ [{text: match[:team1],
168
+ ref: match[:team1_ref]},
169
+ {text: match[:team2],
170
+ ref: match[:team2_ref]}].each do |team|
171
+ rec = h[ team[:text] ] ||= { count: 0,
172
+ name: team[ :text],
173
+ ref: team[ :ref ] }
174
+ rec[ :count ] += 1
175
+ ## todo/check: check/assert that name and ref are always equal - why? why not?
176
+ end
177
+ end
178
+
179
+ h.values
180
+ end
181
+ end
182
+
183
+ def rounds
184
+ @rounds ||= begin
185
+ h = {}
186
+ matches.each do |match|
187
+ rec = h[ match[:round] ] ||= { count: 0,
188
+ name: match[ :round] }
189
+ rec[ :count ] += 1
190
+ end
191
+
192
+ h.values
193
+ end
194
+ end
195
+
196
+
197
+ def seasons
198
+ # <select name="saison" ...
199
+ @seasons ||= begin
200
+ recs = []
201
+ season = doc.css( 'select[name="saison"]').first
202
+ options = season.css( 'option' )
203
+
204
+ options.each do |option|
205
+ recs << { text: squish( option.text ),
206
+ ref: norm_season_ref( option[:value] )
207
+ }
208
+ end
209
+ recs
210
+ end
211
+ end
212
+
213
+
214
+ ######
215
+ ## helpers
216
+
217
+ ## todo/check - rename/use HREF and not REF - why? why not?
218
+ REF_SCORE_RE = %r{^/spielbericht/
219
+ ([a-z0-9_-]+)/$}x
220
+
221
+ def norm_score_ref( str )
222
+ ## check ref format / path
223
+ if m=REF_SCORE_RE.match( str )
224
+ m[1]
225
+ else
226
+ puts "!! ERROR: unexpected score href format >#{str}<"
227
+ exit 1
228
+ end
229
+ end
230
+
231
+
232
+ REF_TEAM_RE = %r{^/teams/
233
+ ([a-z0-9_-]+)/$}x
234
+
235
+ def norm_team_ref( str )
236
+ ## check ref format / path
237
+ if m=REF_TEAM_RE.match( str )
238
+ m[1]
239
+ else
240
+ puts "!! ERROR: unexpected team href format >#{str}<"
241
+ exit 1
242
+ end
243
+ end
244
+
245
+
246
+ REF_SEASON_RE = %r{^/alle_spiele/
247
+ ([a-z0-9_-]+)/$}x
248
+
249
+ def norm_season_ref( str )
250
+ ## check ref format / path
251
+ if m=REF_SEASON_RE.match( str )
252
+ m[1]
253
+ else
254
+ puts "!! ERROR: unexpected season href format >#{str}<"
255
+ exit 1
256
+ end
257
+ end
258
+ end # class Schedule
259
+
260
+
261
+ end # class Page
262
+ end # module Worldfootball
@@ -0,0 +1,4 @@
1
+ # note: allow require 'webget/football' too
2
+ # (in addition to require 'webget-football')
3
+
4
+ require_relative '../webget-football'
@@ -0,0 +1,6 @@
1
+ require 'minitest/autorun'
2
+
3
+
4
+ ## our own code
5
+ require 'webget/football' ## or require 'webget-football'
6
+
@@ -0,0 +1,16 @@
1
+ ###
2
+ # to run use
3
+ # ruby -I ./lib -I ./test test/test_version.rb
4
+
5
+
6
+ require 'helper'
7
+
8
+ class TestVersion < MiniTest::Test
9
+
10
+ def test_version
11
+ pp Webget::Module::Football::VERSION
12
+ pp Webget::Module::Football.banner
13
+ pp Webget::Module::Football.root
14
+ end
15
+
16
+ end # class TestVersion
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: webget-football
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Gerald Bauer
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rdoc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '7'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '7'
33
+ - !ruby/object:Gem::Dependency
34
+ name: hoe
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.22'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.22'
47
+ description: webget-football - get football data via web pages or web api (json) calls
48
+ email: opensport@googlegroups.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files:
52
+ - CHANGELOG.md
53
+ - Manifest.txt
54
+ - README.md
55
+ files:
56
+ - CHANGELOG.md
57
+ - Manifest.txt
58
+ - README.md
59
+ - Rakefile
60
+ - lib/webget-football.rb
61
+ - lib/webget-football/apis.rb
62
+ - lib/webget-football/apis/config.rb
63
+ - lib/webget-football/apis/download.rb
64
+ - lib/webget-football/version.rb
65
+ - lib/webget-football/worldfootball.rb
66
+ - lib/webget-football/worldfootball/config.rb
67
+ - lib/webget-football/worldfootball/download.rb
68
+ - lib/webget-football/worldfootball/page.rb
69
+ - lib/webget-football/worldfootball/page_report.rb
70
+ - lib/webget-football/worldfootball/page_schedule.rb
71
+ - lib/webget/football.rb
72
+ - test/helper.rb
73
+ - test/test_version.rb
74
+ homepage: https://github.com/sportdb/sport.db
75
+ licenses:
76
+ - Public Domain
77
+ metadata: {}
78
+ post_install_message:
79
+ rdoc_options:
80
+ - "--main"
81
+ - README.md
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: 2.2.2
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.5.2
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: webget-football - get football data via web pages or web api (json) calls
100
+ test_files: []