worldfootball 0.2.6 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,6 +12,15 @@ class Schedule < Page ## note: use nested class for now - why? why not?
12
12
  end
13
13
 
14
14
 
15
+ PLACEHOLDERS = [
16
+ 'N.N.',
17
+ 'Verlierer HF 1',
18
+ 'Verlierer HF 2',
19
+ 'Sieger HF 1',
20
+ 'Sieger HF 2',
21
+ ]
22
+ def placeholder?( str ) PLACEHOLDERS.include?( str ); end
23
+
15
24
 
16
25
  def matches
17
26
  @matches ||= begin
@@ -20,10 +29,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
20
29
  # <table class="standard_tabelle" cellpadding="3" cellspacing="1">
21
30
 
22
31
  ## note: use > for "strict" sibling (child without any in-betweens)
23
- table = doc.css( 'div.data > table.standard_tabelle' ).first ## get table
32
+ tables = doc.css( 'div.data > table.standard_tabelle' ) ## get table
24
33
  # puts table.class.name #=> Nokogiri::XML::Element
25
34
  # puts table.text
26
35
 
36
+ assert( tables.size==1, "expected one table.standard_tabelle; got #{tables.size}" )
37
+ table = tables.first
27
38
  assert( table, 'no table.standard_tabelle found in schedule page!!')
28
39
 
29
40
  trs = table.css( 'tr' )
@@ -77,8 +88,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
77
88
  ths = tr.css( 'th' )
78
89
  tds = tr.css( 'td' )
79
90
 
80
- if tr.text.strip =~ /Spieltag/ ||
81
- tr.text.strip =~ /[1-9]\.[ ]Runde|
91
+ tr_text = squish( tr.text )
92
+
93
+ if tr_text =~ /Spieltag/ ||
94
+ tr_text =~ /[1-9]\.[ ]Runde|
82
95
  Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
83
96
  Qualifikation| # see CA Championship
84
97
  Sechzehntelfinale| # see EL
@@ -110,25 +123,24 @@ class Schedule < Page ## note: use nested class for now - why? why not?
110
123
  if debug?
111
124
  puts
112
125
  print '[%03d] ' % i
113
- ## print squish( tr.text )
114
- print "round >#{tr.text.strip}<"
126
+ print "round >#{tr_text}<"
115
127
  print "\n"
116
128
  end
117
129
 
118
- last_round = tr.text.strip
130
+ last_round = tr_text
119
131
  elsif ths.count > 0 &&
120
132
  tds.count == 0
121
133
  ## check for round NOT yet configured!!!
122
- puts "!! WARN: found unregistered round line >#{tr.text.strip}<"
123
- log( "!! WARN: found unregistered round line >#{tr.text.strip}< in page #{title}" )
134
+ puts "!! WARN: found unregistered round line >#{tr_text}<"
135
+ log( "!! WARN: found unregistered round line >#{tr_text}< in page #{title}" )
124
136
 
125
- last_round = tr.text.strip
137
+ last_round = tr_text
126
138
  else ## assume table row (tr) is match line
127
139
 
128
140
  date_str = squish( tds[0].text )
129
141
  time_str = squish( tds[1].text )
130
142
 
131
- date_str = last_date_str if date_str.empty?
143
+ date_str = last_date_str if date_str.empty? && last_date_str
132
144
 
133
145
  if debug?
134
146
  ## note: for debugging - print as we go along (parsing)
@@ -148,7 +160,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
148
160
  else
149
161
  team1_str = squish( tds[2].text )
150
162
  team1_ref = nil
151
- puts "!! WARN: no team1_ref for >#{team1_str}< found"
163
+ puts "!! WARN: no team1_ref for >#{team1_str}< found" unless placeholder?( team1_str )
152
164
  end
153
165
 
154
166
  if debug?
@@ -170,9 +182,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
170
182
  else
171
183
  team2_str = squish( tds[4].text )
172
184
  team2_ref = nil
173
- puts "!! WARN: no team2_ref for >#{team2_str}< found"
185
+ puts "!! WARN: no team2_ref for >#{team2_str}< found" unless placeholder?( team2_str )
174
186
  end
175
187
 
188
+
176
189
  if debug?
177
190
  ## note: for debugging - print as we go along (parsing)
178
191
  print "%-22s | " % team2_str
@@ -198,8 +211,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
198
211
  ## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
199
212
  ## </td>
200
213
  img = tds[6].css( 'img' )[0]
214
+
215
+
216
+
201
217
  if img && img[:src].index( '/live/')
202
- puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
218
+ ## puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
219
+ print " LIVE BADGE "
203
220
  score_str = '-:-' # note: -:- gets replaced to ---
204
221
  end
205
222
 
@@ -217,8 +234,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
217
234
  ## special case for '00.00.0000'
218
235
  ## CANNOT parse
219
236
  ## use empty date - why? why not?
237
+ ## if start with 00.00. e.g. 00.00.1939
238
+
220
239
 
221
- date = if date_str == '00.00.0000'
240
+ date = if date_str == '00.00.0000' ||
241
+ date_str.start_with?( '00.00.' ) ||
242
+ date_str.empty?
222
243
  nil
223
244
  else
224
245
  Date.strptime( date_str, '%d.%m.%Y' )
@@ -237,7 +258,9 @@ class Schedule < Page ## note: use nested class for now - why? why not?
237
258
  report_ref: score_ref
238
259
  }
239
260
 
240
- last_date_str = date_str
261
+ ## note - only update last date if date present
262
+ ## might be empty (not available) in the beginning
263
+ last_date_str = date_str if !date_str.empty?
241
264
  end
242
265
  end # each tr (table row)
243
266
 
@@ -1,8 +1,8 @@
1
1
 
2
2
  module Worldfootball
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
- MINOR = 2
5
- PATCH = 6
4
+ MINOR = 3
5
+ PATCH = 1
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
data/lib/worldfootball.rb CHANGED
@@ -6,6 +6,9 @@ require 'webget' ## incl. webget, webcache, webclient, etc.
6
6
  require 'nokogiri'
7
7
 
8
8
 
9
+ require 'sportdb/writers'
10
+
11
+
9
12
  ###
10
13
  # our own code
11
14
  require_relative 'worldfootball/version'
@@ -38,6 +41,14 @@ module Worldfootball
38
41
  def self.debug=(value) @debug = value; end
39
42
  def self.debug?() @debug ||= false; end ## note: default is FALSE
40
43
 
44
+ def self.log( msg ) ### append to log
45
+ File.open( './logs.txt', 'a:utf-8' ) do |f|
46
+ f.write( msg )
47
+ f.write( "\n" )
48
+ end
49
+ end
50
+
51
+
41
52
 
42
53
 
43
54
  class Configuration
@@ -47,10 +58,16 @@ class Configuration
47
58
  def out_dir() @out_dir || './o'; end
48
59
  def out_dir=(value) @out_dir = value; end
49
60
  end
61
+ class Generate
62
+ def out_dir() @out_dir || './o'; end
63
+ def out_dir=(value) @out_dir = value; end
64
+ end
50
65
 
51
- def convert() @convert ||= Convert.new; end
66
+ def convert() @convert ||= Convert.new; end
67
+ def generate() @generate ||= Generate.new; end
52
68
  end # class Configuration
53
69
 
70
+
54
71
  ## lets you use
55
72
  ## Worldfootball.configure do |config|
56
73
  ## config.convert.out_dir = './o'
@@ -62,6 +79,60 @@ end # module Worldfootball
62
79
 
63
80
 
64
81
 
82
+ ###
83
+ # todo - move generate to generate file!!!
84
+ module Worldfootball
85
+ def self.generate( league:, season:,
86
+ overwrite: true )
87
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
88
+
89
+ league = find_league!( league )
90
+ pages = league.pages!( season: season )
91
+
92
+
93
+ out_path = if season >= Season( '2000' )
94
+ "#{config.generate.out_dir}/#{season.to_path}/#{league.key}.txt"
95
+ else
96
+ decade = season.start_year - (season.start_year%10)
97
+ ## use archive-style before 2000!!!
98
+ "#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league.key}.txt"
99
+ end
100
+
101
+ ## check if output exists already
102
+ if !overwrite && File.exist?( out_path )
103
+ ## skip generation
104
+ puts " OK #{league.key} #{season} (do NOT overwrite)"
105
+ return
106
+ end
107
+
108
+
109
+ ## get matches
110
+ path = "#{config.convert.out_dir}/#{season.to_path}/#{league.key}.csv"
111
+ puts " ---> reading matches in #{path} ..."
112
+ matches = SportDb::CsvMatchParser.read( path )
113
+ puts " #{matches.size} matches"
114
+
115
+ ## build
116
+ txt = SportDb::TxtMatchWriter.build( matches )
117
+ puts txt
118
+
119
+
120
+ buf = String.new
121
+ ## note - use league key for league name for now!!
122
+ buf << "= #{league.key.upcase.gsub('.', ' ')} #{season.key}\n\n"
123
+ buf << txt
124
+
125
+ puts " writing to >#{out_path}<..."
126
+ write_text( out_path, buf )
127
+
128
+ ## add to tmp too for debugging
129
+ out_path2 = "#{config.generate.out_dir}/tmp/#{league.key}/#{season.to_path}.txt"
130
+ puts " writing to >#{out_path2}<..."
131
+ write_text( out_path2, buf )
132
+ end
133
+ end # module Worldfootball
134
+
135
+
65
136
 
66
137
 
67
138
  puts Worldfootball.banner ## say hello
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: worldfootball
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-05 00:00:00.000000000 Z
11
+ date: 2024-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: football-timezones
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sportdb-writers
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: rdoc
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -92,19 +106,24 @@ dependencies:
92
106
  requirements:
93
107
  - - "~>"
94
108
  - !ruby/object:Gem::Version
95
- version: '4.1'
109
+ version: '4.2'
96
110
  type: :development
97
111
  prerelease: false
98
112
  version_requirements: !ruby/object:Gem::Requirement
99
113
  requirements:
100
114
  - - "~>"
101
115
  - !ruby/object:Gem::Version
102
- version: '4.1'
116
+ version: '4.2'
103
117
  description: worldfootball - get world football (leagues, cups & more) match data
104
118
  via the worldfootball.net/weltfussball.de pages
105
119
  email: gerald.bauer@gmail.com
106
120
  executables:
107
121
  - wfb
122
+ - wfbconf
123
+ - wfbconv
124
+ - wfbdump
125
+ - wfbgen
126
+ - wfbup
108
127
  extensions: []
109
128
  extra_rdoc_files:
110
129
  - CHANGELOG.md
@@ -116,12 +135,17 @@ files:
116
135
  - README.md
117
136
  - Rakefile
118
137
  - bin/wfb
119
- - config/leagues_africa.csv
120
- - config/leagues_america.csv
121
- - config/leagues_asia.csv
122
- - config/leagues_europe.csv
123
- - config/leagues_middle_east.csv
124
- - config/leagues_pacific.csv
138
+ - bin/wfbconf
139
+ - bin/wfbconv
140
+ - bin/wfbdump
141
+ - bin/wfbgen
142
+ - bin/wfbup
143
+ - config/leagues/africa.csv
144
+ - config/leagues/america.csv
145
+ - config/leagues/asia.csv
146
+ - config/leagues/europe.csv
147
+ - config/leagues/middle_east.csv
148
+ - config/leagues/pacific.csv
125
149
  - config/rounds.csv
126
150
  - config/stages.csv
127
151
  - lib/worldfootball.rb
@@ -161,7 +185,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
161
185
  - !ruby/object:Gem::Version
162
186
  version: '0'
163
187
  requirements: []
164
- rubygems_version: 3.4.10
188
+ rubygems_version: 3.5.22
165
189
  signing_key:
166
190
  specification_version: 4
167
191
  summary: worldfootball - get world football (leagues, cups & more) match data via
File without changes
File without changes