sportdb-quick 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 743ddb45d7379bdcb351178da359316544e663307b5d96c34f6ac90b9752fce3
4
- data.tar.gz: 86960f084c612aec1e04f183ba1f3a89959518355e498527734b23a265090fa9
3
+ metadata.gz: b43470a4bd201f62546d9a790b837f3bfc8210f6dedadef0fbe812dc64e7d97d
4
+ data.tar.gz: 95d9917fd58cc4b0967bcac350c5d11893acffd9f420b2d04310d3b8c9464baf
5
5
  SHA512:
6
- metadata.gz: 6068e848f99a756b9de1d4012cda3d5c6750dc99c329f773cbd692ffa9004f2cfa5835ffdb329aae055eaa30f1ffafeab42adc1a627d05ca040261d71d5476a7
7
- data.tar.gz: d143f15ecca6cc4bdf3c72e0d8883c94622198e9fd721ce14e93ebc882c127ef4300d865bb05d7c129e82e2f27d81ea25ca4894e0d254a0b80e6600911cd6ac5
6
+ metadata.gz: 89c899e6c26a654f8e799935f5c9a0521c7c985d1cd80957c60bbeacbe90304511f204bca1511d0b8fc621bf05629fad98fd438e38eb9c0f0d2d224960702f64
7
+ data.tar.gz: 57ae74125c564a83487a03bf47734f14590a35ff99e54d8b8343bbff71740d3bab6fc7edf707ca1f8cbc510502f2fb819555b0c6863f71bc4dca5d60a6f5d2bb
data/CHANGELOG.md CHANGED
@@ -1,5 +1,4 @@
1
- ### 0.2.1
2
-
1
+ ### 0.3.0
3
2
  ### 0.0.1 / 2024-08-27
4
3
 
5
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -3,11 +3,8 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  bin/fbt
6
+ bin/fbx
6
7
  lib/sportdb/quick.rb
7
- lib/sportdb/quick/csv/goal.rb
8
- lib/sportdb/quick/csv/goal_parser_csv.rb
9
- lib/sportdb/quick/csv/match_parser_csv.rb
10
- lib/sportdb/quick/csv/match_status_parser.rb
11
8
  lib/sportdb/quick/match_parser.rb
12
9
  lib/sportdb/quick/quick_league_outline_reader.rb
13
10
  lib/sportdb/quick/quick_match_reader.rb
data/Rakefile CHANGED
@@ -12,7 +12,7 @@ Hoe.spec 'sportdb-quick' do
12
12
  self.urls = { home: 'https://github.com/sportdb/sport.db' }
13
13
 
14
14
  self.author = 'Gerald Bauer'
15
- self.email = 'gerald.bauer@gmail.com'
15
+ self.email = 'gerald.bauer@gmail.com'
16
16
 
17
17
  # switch extension to .markdown for gihub formatting
18
18
  self.readme_file = 'README.md'
@@ -22,7 +22,7 @@ Hoe.spec 'sportdb-quick' do
22
22
 
23
23
  self.extra_deps = [
24
24
  ['sportdb-parser', '>= 0.2.2'],
25
- ['sportdb-structs', '>= 0.4.0'],
25
+ ['sportdb-structs', '>= 0.5.0'],
26
26
  ['logutils', '>= 0.6.1'],
27
27
  ]
28
28
 
data/bin/fbx ADDED
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib -I ../parser/lib bin/fbx
5
+
6
+ ##
7
+ ## todo/fix - use for testing
8
+ # reads textfile and dumps results in json
9
+ #
10
+ # check - keep fbx name or find a differnt name - why? why not?
11
+
12
+
13
+ ## our own code
14
+ require 'sportdb/quick'
15
+
16
+
17
+
18
+ require 'optparse'
19
+
20
+ ##
21
+ ## read textfile
22
+ ## and dump match parse results
23
+ ##
24
+ ## fbt ../openfootball/.../euro.txt
25
+
26
+
27
+
28
+
29
+ args = ARGV
30
+ opts = { debug: false,
31
+ outline: false }
32
+
33
+ parser = OptionParser.new do |parser|
34
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
35
+
36
+ ##
37
+ ## check if git has a offline option?? (use same)
38
+ ## check for other tools - why? why not?
39
+
40
+
41
+ parser.on( "--verbose", "--debug",
42
+ "turn on verbose / debug output (default: #{opts[:debug]})" ) do |debug|
43
+ opts[:debug] = debug
44
+ end
45
+
46
+ parser.on( "--outline",
47
+ "turn on outline (only) output (default: #{opts[:outline]})" ) do |outline|
48
+ opts[:outline] = outline
49
+ end
50
+ end
51
+ parser.parse!( args )
52
+
53
+ puts "OPTS:"
54
+ p opts
55
+ puts "ARGV:"
56
+ p args
57
+
58
+
59
+
60
+
61
+
62
+ paths = if args.empty?
63
+ [
64
+ '../../../openfootball/euro/2021--europe/euro.txt',
65
+ '../../../openfootball/euro/2024--germany/euro.txt',
66
+ ]
67
+ else
68
+ ## check for directories
69
+ ## and auto-expand
70
+
71
+ SportDb::Parser::Opts.expand_args( args )
72
+ end
73
+
74
+
75
+
76
+
77
+
78
+
79
+
80
+ SportDb::MatchParser.debug = true if opts[:debug]
81
+
82
+
83
+ ## errors = []
84
+
85
+
86
+ paths.each_with_index do |path,i|
87
+ puts "==> [#{i+1}/#{paths.size}] reading >#{path}<..."
88
+
89
+ txt = read_text( path )
90
+ secs = SportDb::LeagueOutlineReader.parse( txt )
91
+ ## pp secs
92
+
93
+ secs.each_with_index do |sec,j| ## sec(tion)s
94
+ season = sec[:season]
95
+ league = sec[:league]
96
+ stage = sec[:stage]
97
+ lines = sec[:lines]
98
+
99
+ puts " section #{j+1}/#{secs.size} - #{league.name} #{season}, #{stage} - #{lines.size} line(s)"
100
+
101
+ next if opts[:outline]
102
+
103
+ =begin
104
+ ### check if event info availabe - use start_date;
105
+ ## otherwise we have to guess (use a "synthetic" start_date)
106
+ event_info = catalog.events.find_by( season: season,
107
+ league: league )
108
+
109
+ start = if event_info && event_info.start_date
110
+ puts "event info found:"
111
+ puts " using start date from event: "
112
+ pp event_info
113
+ pp event_info.start_date
114
+ event_info.start_date
115
+ else
116
+ =end
117
+ start = if season.year?
118
+ Date.new( season.start_year, 1, 1 )
119
+ else
120
+ Date.new( season.start_year, 7, 1 )
121
+ end
122
+
123
+ parser = SportDb::MatchParser.new( lines,
124
+ start ) ## note: keep season start_at date for now (no need for more specific stage date need for now)
125
+
126
+ auto_conf_teams, matches, rounds, groups = parser.parse
127
+
128
+ puts ">>> #{auto_conf_teams.size} teams:"
129
+ pp auto_conf_teams
130
+ puts ">>> #{matches.size} matches:"
131
+ ## pp matches
132
+ puts ">>> #{rounds.size} rounds:"
133
+ pp rounds
134
+ puts ">>> #{groups.size} groups:"
135
+ pp groups
136
+ end # each secs
137
+ end # each paths
138
+
139
+ =begin
140
+ if errors.size > 0
141
+ puts
142
+ pp errors
143
+ puts
144
+ puts "!! #{errors.size} parse error(s) in #{paths.size} datafiles(s)"
145
+ else
146
+ puts
147
+ puts "OK no parse errors found in #{paths.size} datafile(s)"
148
+ end
149
+ =end
150
+
151
+ puts "bye"
152
+
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Quick
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 2
7
- PATCH = 1
6
+ MINOR = 3
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
data/lib/sportdb/quick.rb CHANGED
@@ -29,38 +29,6 @@ require_relative 'quick/quick_match_reader'
29
29
 
30
30
 
31
31
 
32
-
33
- ###
34
- # csv (tabular dataset) support / machinery
35
- require_relative 'quick/csv/match_status_parser'
36
- require_relative 'quick/csv/goal'
37
- require_relative 'quick/csv/goal_parser_csv'
38
- require_relative 'quick/csv/match_parser_csv'
39
-
40
-
41
- ### add convenience shortcut helpers
42
- module Sports
43
- class Match
44
- def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
45
- SportDb::CsvMatchParser.read( path,
46
- headers: headers,
47
- filters: filters,
48
- converters: converters,
49
- sep: sep )
50
- end
51
-
52
- def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
53
- SportDb::CsvMatchParser.parse( txt,
54
- headers: headers,
55
- filters: filters,
56
- converters: converters,
57
- sep: sep )
58
- end
59
- end # class Match
60
- end # module Sports
61
-
62
-
63
-
64
32
  puts SportDb::Module::Quick.banner # say hello
65
33
 
66
34
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-quick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-28 00:00:00.000000000 Z
11
+ date: 2024-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-parser
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: 0.4.0
33
+ version: 0.5.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: 0.4.0
40
+ version: 0.5.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: logutils
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -78,18 +78,19 @@ dependencies:
78
78
  requirements:
79
79
  - - "~>"
80
80
  - !ruby/object:Gem::Version
81
- version: '4.1'
81
+ version: '4.2'
82
82
  type: :development
83
83
  prerelease: false
84
84
  version_requirements: !ruby/object:Gem::Requirement
85
85
  requirements:
86
86
  - - "~>"
87
87
  - !ruby/object:Gem::Version
88
- version: '4.1'
88
+ version: '4.2'
89
89
  description: sportdb-quick - football.txt (quick) match readers and more
90
90
  email: gerald.bauer@gmail.com
91
91
  executables:
92
92
  - fbt
93
+ - fbx
93
94
  extensions: []
94
95
  extra_rdoc_files:
95
96
  - CHANGELOG.md
@@ -101,11 +102,8 @@ files:
101
102
  - README.md
102
103
  - Rakefile
103
104
  - bin/fbt
105
+ - bin/fbx
104
106
  - lib/sportdb/quick.rb
105
- - lib/sportdb/quick/csv/goal.rb
106
- - lib/sportdb/quick/csv/goal_parser_csv.rb
107
- - lib/sportdb/quick/csv/match_parser_csv.rb
108
- - lib/sportdb/quick/csv/match_status_parser.rb
109
107
  - lib/sportdb/quick/match_parser.rb
110
108
  - lib/sportdb/quick/quick_league_outline_reader.rb
111
109
  - lib/sportdb/quick/quick_match_reader.rb
@@ -131,7 +129,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
129
  - !ruby/object:Gem::Version
132
130
  version: '0'
133
131
  requirements: []
134
- rubygems_version: 3.4.10
132
+ rubygems_version: 3.5.22
135
133
  signing_key:
136
134
  specification_version: 4
137
135
  summary: sportdb-quick - football.txt (quick) match readers and more
@@ -1,192 +0,0 @@
1
-
2
- module Sports
3
-
4
- ## "free-standing" goal event - for import/export in separate event / goal datafiles
5
- ## returned by CsvGoalParser and others
6
- class GoalEvent
7
-
8
- def self.build( row ) ## rename to parse or such - why? why not?
9
-
10
- ## split match_id
11
- team_str, more_str = row['Match'].split( '|' )
12
- team1_str, team2_str = team_str.split( ' - ' )
13
-
14
- more_str = more_str.strip
15
- team1_str = team1_str.strip
16
- team2_str = team2_str.strip
17
-
18
- # check if more_str is a date otherwise assume round
19
- date_fmt = if more_str =~ /^[A-Z]{3} [0-9]{1,2}$/i ## Apr 4
20
- '%b %d'
21
- elsif more_str =~ /^[A-Z]{3} [0-9]{1,2} [0-9]{4}$/i ## Apr 4 2019
22
- '%b %d %Y'
23
- else
24
- nil
25
- end
26
-
27
- if date_fmt
28
- date = Date.strptime( more_str, date_fmt )
29
- round = nil
30
- else
31
- date = nil
32
- round = more_str
33
- end
34
-
35
-
36
- values = row['Score'].split('-')
37
- values = values.map { |value| value.strip }
38
- score1 = values[0].to_i
39
- score2 = values[1].to_i
40
-
41
- minute = nil
42
- offset = nil
43
- if m=%r{([0-9]+)
44
- (?:[ ]+
45
- \+([0-9]+)
46
- )?
47
- ['.]
48
- $}x.match( row['Minute'])
49
- minute = m[1].to_i
50
- offset = m[2] ? m[2].to_i : nil
51
- else
52
- puts "!! ERROR - unsupported minute (goal) format >#{row['Minute']}<"
53
- exit 1
54
- end
55
-
56
- attributes = {
57
- team1: team1_str,
58
- team2: team2_str,
59
- date: date,
60
- round: round,
61
- score1: score1,
62
- score2: score2,
63
- minute: minute,
64
- offset: offset,
65
- player: row['Player'],
66
- owngoal: ['(og)', '(o.g.)'].include?( row['Extra']),
67
- penalty: ['(pen)', '(pen.)'].include?( row['Extra']),
68
- notes: (row['Notes'].nil? || row['Notes'].empty?) ? nil : row['Notes']
69
- }
70
-
71
- new( **attributes )
72
- end
73
-
74
-
75
- ## match id
76
- attr_reader :team1,
77
- :team2,
78
- :round, ## optional
79
- :date ## optional
80
-
81
- ## main attributes
82
- attr_reader :score1,
83
- :score2,
84
- :player,
85
- :minute,
86
- :offset,
87
- :owngoal,
88
- :penalty,
89
- :notes
90
-
91
-
92
- ## todo/check: or just use match.hash or such if match mapping known - why? why not?
93
- def match_id
94
- if round
95
- "#{@team1} - #{@team2} | #{@round}"
96
- else
97
- "#{@team1} - #{@team2} | #{@date}"
98
- end
99
- end
100
-
101
-
102
- def owngoal?() @owngoal==true; end
103
- def penalty?() @penalty==true; end
104
-
105
- def initialize( team1:,
106
- team2:,
107
- round: nil,
108
- date: nil,
109
- score1:,
110
- score2:,
111
- player:,
112
- minute:,
113
- offset: nil,
114
- owngoal: false,
115
- penalty: false,
116
- notes: nil
117
- )
118
- @team1 = team1
119
- @team2 = team2
120
- @round = round
121
- @date = date
122
-
123
- @score1 = score1
124
- @score2 = score2
125
- @player = player
126
- @minute = minute
127
- @offset = offset
128
- @owngoal = owngoal
129
- @penalty = penalty
130
- @notes = notes
131
- end
132
-
133
-
134
- ## note: lets you use normalize teams or such acts like a Match struct
135
- def update( **kwargs )
136
- ## todo/fix: use team1_name, team2_name or similar - for compat with db activerecord version? why? why not?
137
- @team1 = kwargs[:team1] if kwargs.has_key? :team1
138
- @team2 = kwargs[:team2] if kwargs.has_key? :team2
139
- end
140
- end # class GoalEvent
141
-
142
-
143
- ### extend "basic" goal struct with goal event build
144
- class Goal ### nested (non-freestanding) inside match (match is parent)
145
-
146
- def self.build( events ) ## check/todo - rename to build_from_event/row or such - why? why not?
147
- ## build an array of goal structs from (csv) recs
148
- recs = []
149
-
150
- last_score1 = 0
151
- last_score2 = 0
152
-
153
- events.each do |event|
154
-
155
- if last_score1+1 == event.score1 && last_score2 == event.score2
156
- team = 1
157
- elsif last_score2+1 == event.score2 && last_score1 == event.score1
158
- team = 2
159
- else
160
- puts "!! ERROR - unexpected score advance (one goal at a time expected):"
161
- puts " #{last_score1}-#{last_score2}=> #{event.score1}-#{event.score2}"
162
- exit 1
163
- end
164
-
165
- last_score1 = event.score1
166
- last_score2 = event.score2
167
-
168
-
169
- attributes = {
170
- score1: event.score1,
171
- score2: event.score2,
172
- team: team,
173
- minute: event.minute,
174
- offset: event.offset,
175
- player: event.player,
176
- owngoal: event.owngoal,
177
- penalty: event.penalty,
178
- notes: event.notes
179
- }
180
-
181
- recs << new( **attributes )
182
- end
183
-
184
- recs
185
- end
186
- end # class Goal
187
-
188
-
189
- end # module Sports
190
-
191
-
192
-
@@ -1,28 +0,0 @@
1
-
2
- module SportDb
3
- class CsvGoalParser
4
-
5
-
6
- def self.read( path )
7
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
8
- parse( txt )
9
- end
10
-
11
- def self.parse( txt )
12
- new( txt ).parse
13
- end
14
-
15
-
16
- def initialize( txt )
17
- @txt = txt
18
- end
19
-
20
- def parse
21
- rows = parse_csv( @txt )
22
- recs = rows.map { |row| Sports::GoalEvent.build( row ) }
23
- ## pp recs[0]
24
- recs
25
- end
26
-
27
- end # class CsvGoalParser
28
- end # module Sports
@@ -1,490 +0,0 @@
1
-
2
- module SportDb
3
- class CsvMatchParser
4
-
5
- #############
6
- # helpers
7
- def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
8
-
9
- ## check if headers incl. season if yes,has priority over col mapping
10
- ## e.g. no need to specify twice (if using headers)
11
- col = headers[:season] if headers && headers[:season]
12
-
13
- seasons = Hash.new( 0 ) ## default value is 0
14
-
15
- ## todo/fix: yes, use CsvHash.foreach - why? why not?
16
- ## use read_csv with block to switch to foreach!!!!
17
- rows = read_csv( path, sep: sep )
18
-
19
- rows.each_with_index do |row,i|
20
- puts "[#{i}] " + row.inspect if i < 2
21
-
22
- season = row[ col ] ## column name defaults to 'Season'
23
- seasons[ season ] += 1
24
- end
25
-
26
- pp seasons
27
-
28
- ## note: only return season keys/names (not hash with usage counter)
29
- seasons.keys
30
- end
31
-
32
-
33
- ##########
34
- # main machinery
35
-
36
- ## todo/fix: use a generic "global" parse_csv method - why? why not?
37
- ## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
38
- ## ## note: do NOT symbolize keys - keep them as is!!!!!!
39
- ## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
40
- ## CsvHash.parse( text, sep: sep )
41
- ## end
42
-
43
- def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
44
- txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
45
- parse( txt, headers: headers,
46
- filters: filters,
47
- converters: converters,
48
- sep: sep )
49
- end
50
-
51
- def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
52
- new( txt ).parse( headers: headers,
53
- filters: filters,
54
- converters: converters,
55
- sep: sep )
56
- end
57
-
58
-
59
- def initialize( txt )
60
- @txt = txt
61
- end
62
-
63
- def parse( headers: nil, filters: nil, converters: nil, sep: nil )
64
-
65
- headers_mapping = {}
66
-
67
- rows = parse_csv( @txt, sep: sep )
68
-
69
- return [] if rows.empty? ## no rows / empty?
70
-
71
-
72
- ## fix/todo: use logger!!!!
73
- ## pp csv
74
-
75
- if headers ## use user supplied headers if present
76
- headers_mapping = headers_mapping.merge( headers )
77
- else
78
-
79
- ## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
80
- headers = rows[0].keys
81
- pp headers
82
-
83
- # note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
84
- # e.g. row['HomeTeam'] || row['HT'] will NOT work for now
85
-
86
- if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
87
- ## assume our own football.csv format, see github.com/footballcsv
88
- headers_mapping[:team1] = find_header( headers, ['Team 1'] )
89
- headers_mapping[:team2] = find_header( headers, ['Team 2'] )
90
- headers_mapping[:date] = find_header( headers, ['Date'] )
91
- headers_mapping[:time] = find_header( headers, ['Time'] )
92
-
93
- ## check for all-in-one full time (ft) and half time (ht9 scores?
94
- headers_mapping[:score] = find_header( headers, ['FT'] )
95
- headers_mapping[:scorei] = find_header( headers, ['HT'] )
96
-
97
- headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
98
-
99
- ## optional headers - note: find_header returns nil if header NOT found
100
- header_stage = find_header( headers, ['Stage'] )
101
- headers_mapping[:stage] = header_stage if header_stage
102
-
103
- header_group = find_header( headers, ['Group'] )
104
- headers_mapping[:group] = header_group if header_group
105
-
106
-
107
- header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
108
- headers_mapping[:score_et] = header_et if header_et
109
-
110
- header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
111
- headers_mapping[:score_p] = header_p if header_p
112
-
113
- header_notes = find_header( headers, ['Notes', 'Comments'] )
114
- headers_mapping[:notes] = header_notes if header_notes
115
-
116
-
117
- header_league = find_header( headers, ['League'] )
118
- headers_mapping[:league] = header_league if header_league
119
- else
120
- ## else try footballdata.uk and others
121
- headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
122
- headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
123
- headers_mapping[:date] = find_header( headers, ['Date'] )
124
- headers_mapping[:time] = find_header( headers, ['Time'] )
125
-
126
- ## note: FT = Full Time, HG = Home Goal, AG = Away Goal
127
- headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
128
- headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
129
-
130
- ## check for half time scores ?
131
- ## note: HT = Half Time
132
- headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
133
- headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
134
- end
135
- end
136
-
137
- pp headers_mapping
138
-
139
- ### todo/fix: check headers - how?
140
- ## if present HomeTeam or HT required etc.
141
- ## issue error/warn is not present
142
- ##
143
- ## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
144
- ## exit 1
145
- ##
146
-
147
- matches = []
148
-
149
- rows.each_with_index do |row,i|
150
-
151
- ## fix/todo: use logger!!!!
152
- ## puts "[#{i}] " + row.inspect if i < 2
153
-
154
-
155
- ## todo/fix: move to its own (helper) method - filter or such!!!!
156
- if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
157
- skip = false
158
- filters.each do |header, value|
159
- if row[ header ] != value ## e.g. row['Season']
160
- skip = true
161
- break
162
- end
163
- end
164
- next if skip ## if header values NOT matching
165
- end
166
-
167
-
168
- ## note:
169
- ## add converters after filters for now (why not before filters?)
170
- if converters ## any converters defined?
171
- ## convert single proc shortcut to array with single converter
172
- converters = [converters] if converters.is_a?( Proc )
173
-
174
- ## assumes array of procs
175
- converters.each do |converter|
176
- row = converter.call( row )
177
- end
178
- end
179
-
180
-
181
-
182
- team1 = row[ headers_mapping[ :team1 ]]
183
- team2 = row[ headers_mapping[ :team2 ]]
184
-
185
-
186
- ## check if data present - if not skip (might be empty row)
187
- ## note: (old classic) csv reader returns nil for empty fields
188
- ## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
189
- if (team1.nil? || team1.empty?) &&
190
- (team2.nil? || team2.empty?)
191
- puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
192
- pp row
193
- next
194
- end
195
-
196
- ## remove possible match played counters e.g. (4) (11) etc.
197
- team1 = team1.sub( /\(\d+\)/, '' ).strip
198
- team2 = team2.sub( /\(\d+\)/, '' ).strip
199
-
200
-
201
-
202
- col = row[ headers_mapping[ :time ]]
203
-
204
- if col.nil?
205
- time = nil
206
- else
207
- col = col.strip # make sure not leading or trailing spaces left over
208
-
209
- if col.empty?
210
- col =~ /^-{1,}$/ || # e.g. - or ---
211
- col =~ /^\?{1,}$/ # e.g. ? or ???
212
- ## note: allow missing / unknown date for match
213
- time = nil
214
- else
215
- if col =~ /^\d{1,2}:\d{2}$/
216
- time_fmt = '%H:%M' # e.g. 17:00 or 3:00
217
- elsif col =~ /^\d{1,2}.\d{2}$/
218
- time_fmt = '%H.%M' # e.g. 17:00 or 3:00
219
- else
220
- puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
221
- ## todo/fix: add to errors/warns list - why? why not?
222
- exit 1
223
- end
224
-
225
- ## todo/check: use date object (keep string?) - why? why not?
226
- ## todo/fix: yes!! use date object!!!! do NOT use string
227
- time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
228
- end
229
- end
230
-
231
-
232
-
233
- col = row[ headers_mapping[ :date ]]
234
- col = col.strip # make sure not leading or trailing spaces left over
235
-
236
- if col.empty? ||
237
- col =~ /^-{1,}$/ || # e.g. - or ---
238
- col =~ /^\?{1,}$/ # e.g. ? or ???
239
- ## note: allow missing / unknown date for match
240
- date = nil
241
- else
242
- ## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
243
- col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
244
- col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
245
- col = col.strip # make sure not leading or trailing spaces left over
246
-
247
- if col =~ /^\d{2}\/\d{2}\/\d{4}$/
248
- date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
249
- elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
250
- date_fmt = '%d/%m/%y' # e.g. 17/08/02
251
- elsif col =~ /^\d{4}-\d{1,2}-\d{1,2}$/ ## "standard" / default date format
252
- date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
253
- elsif col =~ /^\d{1,2} \w{3} \d{4}$/
254
- date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
255
- elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
256
- date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
257
- else
258
- puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
259
- ## todo/fix: add to errors/warns list - why? why not?
260
- exit 1
261
- end
262
-
263
- ## todo/check: use date object (keep string?) - why? why not?
264
- ## todo/fix: yes!! use date object!!!! do NOT use string
265
- date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
266
- end
267
-
268
-
269
- ##
270
- ## todo/fix: round might not always be just a simple integer number!!!
271
- ## might be text such as Final | Leg 1 or such!!!!
272
- round = nil
273
- ## check for (optional) round / matchday
274
- if headers_mapping[ :round ]
275
- col = row[ headers_mapping[ :round ]]
276
- ## todo: issue warning if not ? or - (and just empty string) why? why not
277
- ## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
278
-
279
- ## note: make round always a string for now!!!! e.g. "1", "2" too!!
280
- round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
281
- ## note: allow missing round for match / defaults to nil
282
- nil
283
- else
284
- col
285
- end
286
- end
287
-
288
-
289
- score1 = nil
290
- score2 = nil
291
- score1i = nil
292
- score2i = nil
293
-
294
- ## check for full time scores ?
295
- if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
296
- ft = [ row[ headers_mapping[ :score1 ]],
297
- row[ headers_mapping[ :score2 ]] ]
298
-
299
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
300
- score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
301
- score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
302
- end
303
-
304
- ## check for half time scores ?
305
- if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
306
- ht = [ row[ headers_mapping[ :score1i ]],
307
- row[ headers_mapping[ :score2i ]] ]
308
-
309
- ## todo/fix: issue warning if not ? or - (and just empty string) why? why not
310
- score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
311
- score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
312
- end
313
-
314
-
315
- ## check for all-in-one full time scores?
316
- if headers_mapping[ :score ]
317
- col = row[ headers_mapping[ :score ]]
318
- score = parse_score( col )
319
- if score
320
- score1 = score[0]
321
- score2 = score[1]
322
- else
323
- puts "!! ERROR - invalid score (ft) format >#{col}<:"
324
- pp row
325
- exit 1
326
- end
327
- end
328
-
329
- if headers_mapping[ :scorei ]
330
- col = row[ headers_mapping[ :scorei ]]
331
- score = parse_score( col )
332
- if score
333
- score1i = score[0]
334
- score2i = score[1]
335
- else
336
- puts "!! ERROR - invalid score (ht) format >#{col}<:"
337
- pp row
338
- exit 1
339
- end
340
- end
341
-
342
- ####
343
- ## try optional score - extra time (et) and penalities (p/pen)
344
- score1et = nil
345
- score2et = nil
346
- score1p = nil
347
- score2p = nil
348
-
349
- if headers_mapping[ :score_et ]
350
- col = row[ headers_mapping[ :score_et ]]
351
- score = parse_score( col )
352
- if score
353
- score1et = score[0]
354
- score2et = score[1]
355
- else
356
- puts "!! ERROR - invalid score (et) format >#{col}<:"
357
- pp row
358
- exit 1
359
- end
360
- end
361
-
362
- if headers_mapping[ :score_p ]
363
- col = row[ headers_mapping[ :score_p ]]
364
- score = parse_score( col )
365
- if score
366
- score1p = score[0]
367
- score2p = score[1]
368
- else
369
- puts "!! ERROR - invalid score (p) format >#{col}<:"
370
- pp row
371
- exit 1
372
- end
373
- end
374
-
375
-
376
- ## try some optional headings / columns
377
- stage = nil
378
- if headers_mapping[ :stage ]
379
- col = row[ headers_mapping[ :stage ]]
380
- ## todo/fix: check can col be nil e.g. col.nil? possible?
381
- stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
382
- ## note: allow missing stage for match / defaults to "regular"
383
- nil
384
- elsif col == '?'
385
- ## note: default explicit unknown to unknown for now AND not regular - why? why not?
386
- '?' ## todo/check: use unkown and NOT ? - why? why not?
387
- else
388
- col
389
- end
390
- end
391
-
392
- group = nil
393
- if headers_mapping[ :group ]
394
- col = row[ headers_mapping[ :group ]]
395
- ## todo/fix: check can col be nil e.g. col.nil? possible?
396
- group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
397
- ## note: allow missing stage for match / defaults to "regular"
398
- nil
399
- else
400
- col
401
- end
402
- end
403
-
404
- status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
405
- if headers_mapping[ :notes ]
406
- col = row[ headers_mapping[ :notes ]]
407
- ## check for optional (match) status in notes / comments
408
- status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
409
- nil
410
- else
411
- StatusParser.parse( col ) # note: returns nil if no (match) status found
412
- end
413
- end
414
-
415
-
416
- league = nil
417
- league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
418
-
419
-
420
- ## puts 'match attributes:'
421
- attributes = {
422
- date: date,
423
- time: time,
424
- team1: team1, team2: team2,
425
- score1: score1, score2: score2,
426
- score1i: score1i, score2i: score2i,
427
- score1et: score1et, score2et: score2et,
428
- score1p: score1p, score2p: score2p,
429
- round: round,
430
- stage: stage,
431
- group: group,
432
- status: status,
433
- league: league
434
- }
435
- ## pp attributes
436
-
437
- match = Sports::Match.new( **attributes )
438
- matches << match
439
- end
440
-
441
- ## pp matches
442
- matches
443
- end
444
-
445
-
446
- private
447
-
448
- def find_header( headers, candidates )
449
- ## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
450
-
451
- candidates.each do |candidate|
452
- return candidate if headers.include?( candidate ) ## bingo!!!
453
- end
454
- nil ## no matching header found!!!
455
- end
456
-
457
- ########
458
- # more helpers
459
- #
460
-
461
- def parse_score( str )
462
- if str.nil? ## todo/check: remove nil case - possible? - why? why not?
463
- [nil,nil]
464
- else
465
- ## remove (optional single) note/footnote/endnote markers
466
- ## e.g. (*) or (a), (b),
467
- ## or [*], [A], [1], etc.
468
- ## - allow (1) or maybe (*1) in the future - why? why not?
469
- str = str.sub( /\( [a-z*] \)
470
- |
471
- \[ [1-9a-z*] \]
472
- /ix, '' ).strip
473
-
474
- if str.empty? || str == '?' || str == '-' || str == 'n/a'
475
- [nil,nil]
476
- ### todo/check: use regex with named capture groups here - why? why not?
477
- elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
478
- score = str.split( /[:-]/ )
479
- [score[0].to_i, score[1].to_i]
480
- else
481
- nil ## note: returns nil if invalid / unparseable format!!!
482
- end
483
- end
484
- end # method parse_score
485
-
486
-
487
-
488
- end # class CsvMatchParser
489
- end # module Sports
490
-
@@ -1,90 +0,0 @@
1
- #####################
2
- # helpers for parsing & finding match status e.g.
3
- # - cancelled / canceled
4
- # - awarded
5
- # - abandoned
6
- # - replay
7
- # etc.
8
-
9
-
10
- module SportDb
11
-
12
-
13
- ### todo/fix: move Status inside Match struct - why? why not?
14
-
15
- class Status
16
- # note: use a class as an "enum"-like namespace for now - why? why not?
17
- # move class into Match e.g. Match::Status - why? why not?
18
- CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
19
- AWARDED = 'AWARDED'
20
- POSTPONED = 'POSTPONED'
21
- ABANDONED = 'ABANDONED'
22
- REPLAY = 'REPLAY'
23
- end # class Status
24
-
25
-
26
-
27
- class StatusParser
28
-
29
- def self.parse( str )
30
- ## note: returns nil if no match found
31
- ## note: english usage - cancelled (in UK), canceled (in US)
32
- if str =~ /^(cancelled|
33
- canceled|
34
- can\.
35
- )/xi
36
- Status::CANCELLED
37
- elsif str =~ /^(awarded|
38
- awd\.
39
- )/xi
40
- Status::AWARDED
41
- elsif str =~ /^(postponed
42
- )/xi
43
- Status::POSTPONED
44
- elsif str =~ /^(abandoned|
45
- abd\.
46
- )/xi
47
- Status::ABANDONED
48
- elsif str =~ /^(replay
49
- )/xi
50
- Status::REPLAY
51
- else
52
- # no match
53
- nil
54
- end
55
- end
56
-
57
-
58
- RUN_RE = /\[
59
- (?<text>[^\]]+)
60
- \]
61
- /x
62
- def self.find!( line )
63
- ## for now check all "protected" text run blocks e.g. []
64
- ## puts "line: >#{line}<"
65
-
66
- status = nil
67
-
68
- str = line
69
- while m = str.match( RUN_RE )
70
- str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
71
-
72
- ## check for status match
73
- match_str = m[0] ## keep a copy of the match string (for later sub)
74
- text = m[:text].strip
75
- ## puts " text: >#{text}<"
76
-
77
- status = parse( text )
78
-
79
- if status
80
- line.sub!( match_str, "[STATUS.#{status}]" )
81
- break
82
- end
83
- end # while match
84
-
85
- status
86
- end # method find!
87
- end # class StatusParser
88
-
89
- end # module SportDb
90
-