sportdb-structs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/Manifest.txt +29 -0
- data/README.md +29 -0
- data/Rakefile +33 -0
- data/lib/sportdb/structs.rb +125 -0
- data/lib/sportdb/structs/config.rb +39 -0
- data/lib/sportdb/structs/goal_parser_csv.rb +28 -0
- data/lib/sportdb/structs/match_parser_csv.rb +490 -0
- data/lib/sportdb/structs/match_status_parser.rb +90 -0
- data/lib/sportdb/structs/name_helper.rb +87 -0
- data/lib/sportdb/structs/season.rb +199 -0
- data/lib/sportdb/structs/structs/country.rb +26 -0
- data/lib/sportdb/structs/structs/goal.rb +231 -0
- data/lib/sportdb/structs/structs/group.rb +16 -0
- data/lib/sportdb/structs/structs/league.rb +35 -0
- data/lib/sportdb/structs/structs/match.rb +180 -0
- data/lib/sportdb/structs/structs/matchlist.rb +215 -0
- data/lib/sportdb/structs/structs/round.rb +23 -0
- data/lib/sportdb/structs/structs/standings.rb +271 -0
- data/lib/sportdb/structs/structs/team.rb +147 -0
- data/lib/sportdb/structs/structs/team_usage.rb +84 -0
- data/lib/sportdb/structs/version.rb +24 -0
- data/test/helper.rb +11 -0
- data/test/test_clubs.rb +38 -0
- data/test/test_csv_reader.rb +30 -0
- data/test/test_match.rb +30 -0
- data/test/test_match_status_parser.rb +57 -0
- data/test/test_name_helper.rb +65 -0
- data/test/test_season.rb +141 -0
- metadata +177 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6c25c7b64beba8af786dfd7140966707d859dc00
|
4
|
+
data.tar.gz: acb61fc5862d1d18aebcdec9a86d249fbfcd6fcb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ec8c7ae81f43d71285e38ef1c371c8349f31466513ba64762b31ad8563a48a69788c29d3cceeaa1cce00fcd763dcc339f98a054e21f297a883652d0895ef07a5
|
7
|
+
data.tar.gz: 6309474ac08d69ded6f8af10d771a0faf83abcc41f98251bb8eae5b4851daa8d5a8c405defa8ee66c33a279151a93faca79d585351e8c256f84df300e9f2653c
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
CHANGELOG.md
|
2
|
+
Manifest.txt
|
3
|
+
README.md
|
4
|
+
Rakefile
|
5
|
+
lib/sportdb/structs.rb
|
6
|
+
lib/sportdb/structs/config.rb
|
7
|
+
lib/sportdb/structs/goal_parser_csv.rb
|
8
|
+
lib/sportdb/structs/match_parser_csv.rb
|
9
|
+
lib/sportdb/structs/match_status_parser.rb
|
10
|
+
lib/sportdb/structs/name_helper.rb
|
11
|
+
lib/sportdb/structs/season.rb
|
12
|
+
lib/sportdb/structs/structs/country.rb
|
13
|
+
lib/sportdb/structs/structs/goal.rb
|
14
|
+
lib/sportdb/structs/structs/group.rb
|
15
|
+
lib/sportdb/structs/structs/league.rb
|
16
|
+
lib/sportdb/structs/structs/match.rb
|
17
|
+
lib/sportdb/structs/structs/matchlist.rb
|
18
|
+
lib/sportdb/structs/structs/round.rb
|
19
|
+
lib/sportdb/structs/structs/standings.rb
|
20
|
+
lib/sportdb/structs/structs/team.rb
|
21
|
+
lib/sportdb/structs/structs/team_usage.rb
|
22
|
+
lib/sportdb/structs/version.rb
|
23
|
+
test/helper.rb
|
24
|
+
test/test_clubs.rb
|
25
|
+
test/test_csv_reader.rb
|
26
|
+
test/test_match.rb
|
27
|
+
test/test_match_status_parser.rb
|
28
|
+
test/test_name_helper.rb
|
29
|
+
test/test_season.rb
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# sportdb-structs - sport data structures for matches, scores, leagues, seasons, rounds, groups, teams, clubs and more
|
2
|
+
|
3
|
+
|
4
|
+
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
5
|
+
* bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
|
6
|
+
* gem :: [rubygems.org/gems/sportdb-structs](https://rubygems.org/gems/sportdb-structs)
|
7
|
+
* rdoc :: [rubydoc.info/gems/sportdb-structs](http://rubydoc.info/gems/sportdb-structs)
|
8
|
+
* forum :: [opensport](http://groups.google.com/group/opensport)
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
## Usage
|
13
|
+
|
14
|
+
To be done
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
## License
|
20
|
+
|
21
|
+
The `sportdb-structs` scripts are dedicated to the public domain.
|
22
|
+
Use it as you please with no restrictions whatsoever.
|
23
|
+
|
24
|
+
|
25
|
+
## Questions? Comments?
|
26
|
+
|
27
|
+
Send them along to the
|
28
|
+
[Open Sports & Friends Forum/Mailing List](http://groups.google.com/group/opensport).
|
29
|
+
Thanks!
|
data/Rakefile
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/sportdb/structs/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'sportdb-structs' do
|
5
|
+
|
6
|
+
self.version = SportDb::Module::Structs::VERSION
|
7
|
+
|
8
|
+
self.summary = "sportdb-structs - sport data structures for matches, scores, leagues, seasons, rounds, groups, teams, clubs and more"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = ['https://github.com/sportdb/sport.db']
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'opensport@googlegroups.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
|
+
|
20
|
+
self.licenses = ['Public Domain']
|
21
|
+
|
22
|
+
self.extra_deps = [
|
23
|
+
['alphabets', '>= 1.0.0'],
|
24
|
+
['date-formats', '>= 1.0.1'],
|
25
|
+
['score-formats', '>= 0.1.0'],
|
26
|
+
['csvreader', '>= 1.2.4'],
|
27
|
+
['sportdb-langs', '>= 0.1.1'],
|
28
|
+
]
|
29
|
+
|
30
|
+
self.spec_extras = {
|
31
|
+
required_ruby_version: '>= 2.2.2'
|
32
|
+
}
|
33
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
## 3rd party gems
|
2
|
+
require 'alphabets' # downcase_i18n, unaccent, variants, ...
|
3
|
+
require 'date/formats' # DateFormats.parse, find!, ...
|
4
|
+
require 'score/formats'
|
5
|
+
require 'csvreader'
|
6
|
+
|
7
|
+
|
8
|
+
def read_csv( path, sep: nil,
|
9
|
+
symbolize_names: nil )
|
10
|
+
opts = {}
|
11
|
+
opts[:sep] = sep if sep
|
12
|
+
opts[:header_converters] = :symbol if symbolize_names
|
13
|
+
|
14
|
+
CsvHash.read( path, **opts )
|
15
|
+
end
|
16
|
+
|
17
|
+
def parse_csv( txt, sep: nil,
|
18
|
+
symbolize_names: nil )
|
19
|
+
opts = {}
|
20
|
+
opts[:sep] = sep if sep
|
21
|
+
opts[:header_converters] = :symbol if symbolize_names
|
22
|
+
|
23
|
+
CsvHash.parse( txt, **opts )
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
## more sportdb libs/gems
|
29
|
+
require 'sportdb/langs'
|
30
|
+
|
31
|
+
## todo/fix: move shortcut up to sportdb/langs!!!
|
32
|
+
module SportDb
|
33
|
+
Logging = LogUtils::Logging ## logging machinery shortcut; use LogUtils for now
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
###
|
38
|
+
# our own code
|
39
|
+
require 'sportdb/structs/version' # let version always go first
|
40
|
+
require 'sportdb/structs/config'
|
41
|
+
require 'sportdb/structs/season'
|
42
|
+
|
43
|
+
require 'sportdb/structs/name_helper'
|
44
|
+
|
45
|
+
require 'sportdb/structs/structs/country'
|
46
|
+
require 'sportdb/structs/structs/league'
|
47
|
+
require 'sportdb/structs/structs/team'
|
48
|
+
require 'sportdb/structs/structs/round'
|
49
|
+
require 'sportdb/structs/structs/group'
|
50
|
+
require 'sportdb/structs/structs/goal'
|
51
|
+
require 'sportdb/structs/structs/match'
|
52
|
+
require 'sportdb/structs/structs/matchlist'
|
53
|
+
require 'sportdb/structs/structs/standings'
|
54
|
+
require 'sportdb/structs/structs/team_usage'
|
55
|
+
|
56
|
+
|
57
|
+
require 'sportdb/structs/match_status_parser'
|
58
|
+
require 'sportdb/structs/match_parser_csv'
|
59
|
+
require 'sportdb/structs/goal_parser_csv'
|
60
|
+
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
### add convenience shortcut helpers
|
65
|
+
module Sports
|
66
|
+
class Match
|
67
|
+
def self.read_csv( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
68
|
+
SportDb::CsvMatchParser.read( path,
|
69
|
+
headers: headers,
|
70
|
+
filters: filters,
|
71
|
+
converters: converters,
|
72
|
+
sep: sep )
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.parse_csv( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
76
|
+
SportDb::CsvMatchParser.parse( txt,
|
77
|
+
headers: headers,
|
78
|
+
filters: filters,
|
79
|
+
converters: converters,
|
80
|
+
sep: sep )
|
81
|
+
end
|
82
|
+
end # class Match
|
83
|
+
end # module Sports
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
module Sports
|
88
|
+
## lets you use
|
89
|
+
## Sports.configure do |config|
|
90
|
+
## config.lang = 'it'
|
91
|
+
## end
|
92
|
+
|
93
|
+
## note: just forward to SportDb::Import configuration!!!!!
|
94
|
+
## keep Sports module / namespace "clean"
|
95
|
+
## that is, only include data structures (e.g. Match,League,etc) for now - why? why not?
|
96
|
+
def self.configure() yield( config ); end
|
97
|
+
def self.config() SportDb::Import.config; end
|
98
|
+
end # module Sports
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
#####
|
103
|
+
# note: add Sport and Football convenience alias - why? why not?
|
104
|
+
Sport = Sports
|
105
|
+
Football = Sports
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
## let's put test configuration in its own namespace / module
|
111
|
+
module SportDb
|
112
|
+
class Test ## todo/check: works with module too? use a module - why? why not?
|
113
|
+
|
114
|
+
####
|
115
|
+
# todo/fix: find a better way to configure shared test datasets - why? why not?
|
116
|
+
# note: use one-up (..) directory for now as default - why? why not?
|
117
|
+
def self.data_dir() @data_dir ||= '../test'; end
|
118
|
+
def self.data_dir=( path ) @data_dir = path; end
|
119
|
+
end
|
120
|
+
end # module SportDb
|
121
|
+
|
122
|
+
|
123
|
+
puts SportDb::Module::Structs.banner # say hello
|
124
|
+
|
125
|
+
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module SportDb
|
2
|
+
module Import
|
3
|
+
|
4
|
+
class Configuration
|
5
|
+
##
|
6
|
+
## todo: allow configure of countries_dir like clubs_dir
|
7
|
+
## "fallback" and use a default built-in world/countries.txt
|
8
|
+
|
9
|
+
attr_accessor :catalog
|
10
|
+
|
11
|
+
attr_reader :lang
|
12
|
+
def lang=(value)
|
13
|
+
## check/todo: always use to_sym - why? needed?
|
14
|
+
DateFormats.lang = value
|
15
|
+
ScoreFormats.lang = value
|
16
|
+
SportDb.lang.lang = value
|
17
|
+
|
18
|
+
## todo/fix: change SportDb.lang to SportDb.parser.lang or lang_parser or utils or someting !!!!
|
19
|
+
## use Sport.lang only as a read-only shortcut a la catalog for config.lang!!!!
|
20
|
+
end
|
21
|
+
|
22
|
+
end # class Configuration
|
23
|
+
|
24
|
+
|
25
|
+
## lets you use
|
26
|
+
## SportDb::Import.configure do |config|
|
27
|
+
## config.lang = 'it'
|
28
|
+
## end
|
29
|
+
|
30
|
+
def self.configure() yield( config ); end
|
31
|
+
|
32
|
+
def self.config() @config ||= Configuration.new; end
|
33
|
+
|
34
|
+
## e.g. use config.catalog -- keep Import.catalog as a shortcut (for "read-only" access)
|
35
|
+
def self.catalog() config.catalog; end
|
36
|
+
|
37
|
+
end # module Import
|
38
|
+
end # module SportDb
|
39
|
+
|
@@ -0,0 +1,28 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvGoalParser
|
4
|
+
|
5
|
+
|
6
|
+
def self.read( path )
|
7
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
8
|
+
parse( txt )
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.parse( txt )
|
12
|
+
new( txt ).parse
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def initialize( txt )
|
17
|
+
@txt = txt
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse
|
21
|
+
rows = parse_csv( @txt )
|
22
|
+
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
23
|
+
## pp recs[0]
|
24
|
+
recs
|
25
|
+
end
|
26
|
+
|
27
|
+
end # class CsvGoalParser
|
28
|
+
end # module Sports
|
@@ -0,0 +1,490 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class CsvMatchParser
|
4
|
+
|
5
|
+
#############
|
6
|
+
# helpers
|
7
|
+
def self.find_seasons( path, col: 'Season', sep: nil, headers: nil )
|
8
|
+
|
9
|
+
## check if headers incl. season if yes,has priority over col mapping
|
10
|
+
## e.g. no need to specify twice (if using headers)
|
11
|
+
col = headers[:season] if headers && headers[:season]
|
12
|
+
|
13
|
+
seasons = Hash.new( 0 ) ## default value is 0
|
14
|
+
|
15
|
+
## todo/fix: yes, use CsvHash.foreach - why? why not?
|
16
|
+
## use read_csv with block to switch to foreach!!!!
|
17
|
+
rows = read_csv( path, sep: sep )
|
18
|
+
|
19
|
+
rows.each_with_index do |row,i|
|
20
|
+
puts "[#{i}] " + row.inspect if i < 2
|
21
|
+
|
22
|
+
season = row[ col ] ## column name defaults to 'Season'
|
23
|
+
seasons[ season ] += 1
|
24
|
+
end
|
25
|
+
|
26
|
+
pp seasons
|
27
|
+
|
28
|
+
## note: only return season keys/names (not hash with usage counter)
|
29
|
+
seasons.keys
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
##########
|
34
|
+
# main machinery
|
35
|
+
|
36
|
+
## todo/fix: use a generic "global" parse_csv method - why? why not?
|
37
|
+
## def self.parse_csv( text, sep: ',' ) ## helper -lets you change the csv library in one place if needed/desired
|
38
|
+
## ## note: do NOT symbolize keys - keep them as is!!!!!!
|
39
|
+
## ## todo/fix: move "upstream" and remove symbolize keys too!!! - why? why not?
|
40
|
+
## CsvHash.parse( text, sep: sep )
|
41
|
+
## end
|
42
|
+
|
43
|
+
def self.read( path, headers: nil, filters: nil, converters: nil, sep: nil )
|
44
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
45
|
+
parse( txt, headers: headers,
|
46
|
+
filters: filters,
|
47
|
+
converters: converters,
|
48
|
+
sep: sep )
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.parse( txt, headers: nil, filters: nil, converters: nil, sep: nil )
|
52
|
+
new( txt ).parse( headers: headers,
|
53
|
+
filters: filters,
|
54
|
+
converters: converters,
|
55
|
+
sep: sep )
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def initialize( txt )
|
60
|
+
@txt = txt
|
61
|
+
end
|
62
|
+
|
63
|
+
def parse( headers: nil, filters: nil, converters: nil, sep: nil )
|
64
|
+
|
65
|
+
headers_mapping = {}
|
66
|
+
|
67
|
+
rows = parse_csv( @txt, sep: sep )
|
68
|
+
|
69
|
+
return [] if rows.empty? ## no rows / empty?
|
70
|
+
|
71
|
+
|
72
|
+
## fix/todo: use logger!!!!
|
73
|
+
## pp csv
|
74
|
+
|
75
|
+
if headers ## use user supplied headers if present
|
76
|
+
headers_mapping = headers_mapping.merge( headers )
|
77
|
+
else
|
78
|
+
|
79
|
+
## note: returns an array of strings (header names) - assume all rows have the same columns/fields!!!
|
80
|
+
headers = rows[0].keys
|
81
|
+
pp headers
|
82
|
+
|
83
|
+
# note: greece 2001-02 etc. use HT - check CVS reader row['HomeTeam'] may not be nil but an empty string?
|
84
|
+
# e.g. row['HomeTeam'] || row['HT'] will NOT work for now
|
85
|
+
|
86
|
+
if find_header( headers, ['Team 1']) && find_header( headers, ['Team 2'])
|
87
|
+
## assume our own football.csv format, see github.com/footballcsv
|
88
|
+
headers_mapping[:team1] = find_header( headers, ['Team 1'] )
|
89
|
+
headers_mapping[:team2] = find_header( headers, ['Team 2'] )
|
90
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
91
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
92
|
+
|
93
|
+
## check for all-in-one full time (ft) and half time (ht9 scores?
|
94
|
+
headers_mapping[:score] = find_header( headers, ['FT'] )
|
95
|
+
headers_mapping[:scorei] = find_header( headers, ['HT'] )
|
96
|
+
|
97
|
+
headers_mapping[:round] = find_header( headers, ['Round', 'Matchday'] )
|
98
|
+
|
99
|
+
## optional headers - note: find_header returns nil if header NOT found
|
100
|
+
header_stage = find_header( headers, ['Stage'] )
|
101
|
+
headers_mapping[:stage] = header_stage if header_stage
|
102
|
+
|
103
|
+
header_group = find_header( headers, ['Group'] )
|
104
|
+
headers_mapping[:group] = header_group if header_group
|
105
|
+
|
106
|
+
|
107
|
+
header_et = find_header( headers, ['ET', 'AET'] ) ## (after) extra time
|
108
|
+
headers_mapping[:score_et] = header_et if header_et
|
109
|
+
|
110
|
+
header_p = find_header( headers, ['P', 'PEN'] ) ## penalties
|
111
|
+
headers_mapping[:score_p] = header_p if header_p
|
112
|
+
|
113
|
+
header_notes = find_header( headers, ['Notes', 'Comments'] )
|
114
|
+
headers_mapping[:notes] = header_notes if header_notes
|
115
|
+
|
116
|
+
|
117
|
+
header_league = find_header( headers, ['League'] )
|
118
|
+
headers_mapping[:league] = header_league if header_league
|
119
|
+
else
|
120
|
+
## else try footballdata.uk and others
|
121
|
+
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
122
|
+
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
123
|
+
headers_mapping[:date] = find_header( headers, ['Date'] )
|
124
|
+
headers_mapping[:time] = find_header( headers, ['Time'] )
|
125
|
+
|
126
|
+
## note: FT = Full Time, HG = Home Goal, AG = Away Goal
|
127
|
+
headers_mapping[:score1] = find_header( headers, ['FTHG', 'HG'] )
|
128
|
+
headers_mapping[:score2] = find_header( headers, ['FTAG', 'AG'] )
|
129
|
+
|
130
|
+
## check for half time scores ?
|
131
|
+
## note: HT = Half Time
|
132
|
+
headers_mapping[:score1i] = find_header( headers, ['HTHG'] )
|
133
|
+
headers_mapping[:score2i] = find_header( headers, ['HTAG'] )
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
pp headers_mapping
|
138
|
+
|
139
|
+
### todo/fix: check headers - how?
|
140
|
+
## if present HomeTeam or HT required etc.
|
141
|
+
## issue error/warn is not present
|
142
|
+
##
|
143
|
+
## puts "*** !!! wrong (unknown) headers format; cannot continue; fix it; sorry"
|
144
|
+
## exit 1
|
145
|
+
##
|
146
|
+
|
147
|
+
matches = []
|
148
|
+
|
149
|
+
rows.each_with_index do |row,i|
|
150
|
+
|
151
|
+
## fix/todo: use logger!!!!
|
152
|
+
## puts "[#{i}] " + row.inspect if i < 2
|
153
|
+
|
154
|
+
|
155
|
+
## todo/fix: move to its own (helper) method - filter or such!!!!
|
156
|
+
if filters ## filter MUST match if present e.g. row['Season'] == '2017/2018'
|
157
|
+
skip = false
|
158
|
+
filters.each do |header, value|
|
159
|
+
if row[ header ] != value ## e.g. row['Season']
|
160
|
+
skip = true
|
161
|
+
break
|
162
|
+
end
|
163
|
+
end
|
164
|
+
next if skip ## if header values NOT matching
|
165
|
+
end
|
166
|
+
|
167
|
+
|
168
|
+
## note:
|
169
|
+
## add converters after filters for now (why not before filters?)
|
170
|
+
if converters ## any converters defined?
|
171
|
+
## convert single proc shortcut to array with single converter
|
172
|
+
converters = [converters] if converters.is_a?( Proc )
|
173
|
+
|
174
|
+
## assumes array of procs
|
175
|
+
converters.each do |converter|
|
176
|
+
row = converter.call( row )
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
team1 = row[ headers_mapping[ :team1 ]]
|
183
|
+
team2 = row[ headers_mapping[ :team2 ]]
|
184
|
+
|
185
|
+
|
186
|
+
## check if data present - if not skip (might be empty row)
|
187
|
+
## note: (old classic) csv reader returns nil for empty fields
|
188
|
+
## new modern csv reader ALWAYS returns strings (and empty strings for data not available (n/a))
|
189
|
+
if (team1.nil? || team1.empty?) &&
|
190
|
+
(team2.nil? || team2.empty?)
|
191
|
+
puts "*** WARN: skipping empty? row[#{i}] - no teams found:"
|
192
|
+
pp row
|
193
|
+
next
|
194
|
+
end
|
195
|
+
|
196
|
+
## remove possible match played counters e.g. (4) (11) etc.
|
197
|
+
team1 = team1.sub( /\(\d+\)/, '' ).strip
|
198
|
+
team2 = team2.sub( /\(\d+\)/, '' ).strip
|
199
|
+
|
200
|
+
|
201
|
+
|
202
|
+
col = row[ headers_mapping[ :time ]]
|
203
|
+
|
204
|
+
if col.nil?
|
205
|
+
time = nil
|
206
|
+
else
|
207
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
208
|
+
|
209
|
+
if col.empty?
|
210
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
211
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
212
|
+
## note: allow missing / unknown date for match
|
213
|
+
time = nil
|
214
|
+
else
|
215
|
+
if col =~ /^\d{1,2}:\d{2}$/
|
216
|
+
time_fmt = '%H:%M' # e.g. 17:00 or 3:00
|
217
|
+
elsif col =~ /^\d{1,2}.\d{2}$/
|
218
|
+
time_fmt = '%H.%M' # e.g. 17:00 or 3:00
|
219
|
+
else
|
220
|
+
puts "*** !!! wrong (unknown) time format >>#{col}<<; cannot continue; fix it; sorry"
|
221
|
+
## todo/fix: add to errors/warns list - why? why not?
|
222
|
+
exit 1
|
223
|
+
end
|
224
|
+
|
225
|
+
## todo/check: use date object (keep string?) - why? why not?
|
226
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
227
|
+
time = Time.strptime( col, time_fmt ).strftime( '%H:%M' )
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
|
232
|
+
|
233
|
+
col = row[ headers_mapping[ :date ]]
|
234
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
235
|
+
|
236
|
+
if col.empty? ||
|
237
|
+
col =~ /^-{1,}$/ || # e.g. - or ---
|
238
|
+
col =~ /^\?{1,}$/ # e.g. ? or ???
|
239
|
+
## note: allow missing / unknown date for match
|
240
|
+
date = nil
|
241
|
+
else
|
242
|
+
## remove possible weekday or weeknumber e.g. (Fri) (4) etc.
|
243
|
+
col = col.sub( /\(W?\d{1,2}\)/, '' ) ## e.g. (W11), (4), (21) etc.
|
244
|
+
col = col.sub( /\(\w+\)/, '' ) ## e.g. (Fri), (Fr) etc.
|
245
|
+
col = col.strip # make sure not leading or trailing spaces left over
|
246
|
+
|
247
|
+
if col =~ /^\d{2}\/\d{2}\/\d{4}$/
|
248
|
+
date_fmt = '%d/%m/%Y' # e.g. 17/08/2002
|
249
|
+
elsif col =~ /^\d{2}\/\d{2}\/\d{2}$/
|
250
|
+
date_fmt = '%d/%m/%y' # e.g. 17/08/02
|
251
|
+
elsif col =~ /^\d{4}-\d{2}-\d{2}$/ ## "standard" / default date format
|
252
|
+
date_fmt = '%Y-%m-%d' # e.g. 1995-08-04
|
253
|
+
elsif col =~ /^\d{1,2} \w{3} \d{4}$/
|
254
|
+
date_fmt = '%d %b %Y' # e.g. 8 Jul 2017
|
255
|
+
elsif col =~ /^\w{3} \w{3} \d{1,2} \d{4}$/
|
256
|
+
date_fmt = '%a %b %d %Y' # e.g. Sat Aug 7 1993
|
257
|
+
else
|
258
|
+
puts "*** !!! wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
259
|
+
## todo/fix: add to errors/warns list - why? why not?
|
260
|
+
exit 1
|
261
|
+
end
|
262
|
+
|
263
|
+
## todo/check: use date object (keep string?) - why? why not?
|
264
|
+
## todo/fix: yes!! use date object!!!! do NOT use string
|
265
|
+
date = Date.strptime( col, date_fmt ).strftime( '%Y-%m-%d' )
|
266
|
+
end
|
267
|
+
|
268
|
+
|
269
|
+
##
|
270
|
+
## todo/fix: round might not always be just a simple integer number!!!
|
271
|
+
## might be text such as Final | Leg 1 or such!!!!
|
272
|
+
round = nil
|
273
|
+
## check for (optional) round / matchday
|
274
|
+
if headers_mapping[ :round ]
|
275
|
+
col = row[ headers_mapping[ :round ]]
|
276
|
+
## todo: issue warning if not ? or - (and just empty string) why? why not
|
277
|
+
## (old attic) was: round = col.to_i if col =~ /^\d{1,2}$/ # check format - e.g. ignore ? or - or such non-numbers for now
|
278
|
+
|
279
|
+
## note: make round always a string for now!!!! e.g. "1", "2" too!!
|
280
|
+
round = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
281
|
+
## note: allow missing round for match / defaults to nil
|
282
|
+
nil
|
283
|
+
else
|
284
|
+
col
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
|
289
|
+
score1 = nil
|
290
|
+
score2 = nil
|
291
|
+
score1i = nil
|
292
|
+
score2i = nil
|
293
|
+
|
294
|
+
## check for full time scores ?
|
295
|
+
if headers_mapping[ :score1 ] && headers_mapping[ :score2 ]
|
296
|
+
ft = [ row[ headers_mapping[ :score1 ]],
|
297
|
+
row[ headers_mapping[ :score2 ]] ]
|
298
|
+
|
299
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
300
|
+
score1 = ft[0].to_i if ft[0] =~ /^\d{1,2}$/
|
301
|
+
score2 = ft[1].to_i if ft[1] =~ /^\d{1,2}$/
|
302
|
+
end
|
303
|
+
|
304
|
+
## check for half time scores ?
|
305
|
+
if headers_mapping[ :score1i ] && headers_mapping[ :score2i ]
|
306
|
+
ht = [ row[ headers_mapping[ :score1i ]],
|
307
|
+
row[ headers_mapping[ :score2i ]] ]
|
308
|
+
|
309
|
+
## todo/fix: issue warning if not ? or - (and just empty string) why? why not
|
310
|
+
score1i = ht[0].to_i if ht[0] =~ /^\d{1,2}$/
|
311
|
+
score2i = ht[1].to_i if ht[1] =~ /^\d{1,2}$/
|
312
|
+
end
|
313
|
+
|
314
|
+
|
315
|
+
## check for all-in-one full time scores?
|
316
|
+
if headers_mapping[ :score ]
|
317
|
+
col = row[ headers_mapping[ :score ]]
|
318
|
+
score = parse_score( col )
|
319
|
+
if score
|
320
|
+
score1 = score[0]
|
321
|
+
score2 = score[1]
|
322
|
+
else
|
323
|
+
puts "!! ERROR - invalid score (ft) format >#{col}<:"
|
324
|
+
pp row
|
325
|
+
exit 1
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
if headers_mapping[ :scorei ]
|
330
|
+
col = row[ headers_mapping[ :scorei ]]
|
331
|
+
score = parse_score( col )
|
332
|
+
if score
|
333
|
+
score1i = score[0]
|
334
|
+
score2i = score[1]
|
335
|
+
else
|
336
|
+
puts "!! ERROR - invalid score (ht) format >#{col}<:"
|
337
|
+
pp row
|
338
|
+
exit 1
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
####
|
343
|
+
## try optional score - extra time (et) and penalities (p/pen)
|
344
|
+
score1et = nil
|
345
|
+
score2et = nil
|
346
|
+
score1p = nil
|
347
|
+
score2p = nil
|
348
|
+
|
349
|
+
if headers_mapping[ :score_et ]
|
350
|
+
col = row[ headers_mapping[ :score_et ]]
|
351
|
+
score = parse_score( col )
|
352
|
+
if score
|
353
|
+
score1et = score[0]
|
354
|
+
score2et = score[1]
|
355
|
+
else
|
356
|
+
puts "!! ERROR - invalid score (et) format >#{col}<:"
|
357
|
+
pp row
|
358
|
+
exit 1
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
if headers_mapping[ :score_p ]
|
363
|
+
col = row[ headers_mapping[ :score_p ]]
|
364
|
+
score = parse_score( col )
|
365
|
+
if score
|
366
|
+
score1p = score[0]
|
367
|
+
score2p = score[1]
|
368
|
+
else
|
369
|
+
puts "!! ERROR - invalid score (p) format >#{col}<:"
|
370
|
+
pp row
|
371
|
+
exit 1
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
|
376
|
+
## try some optional headings / columns
|
377
|
+
stage = nil
|
378
|
+
if headers_mapping[ :stage ]
|
379
|
+
col = row[ headers_mapping[ :stage ]]
|
380
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
381
|
+
stage = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
382
|
+
## note: allow missing stage for match / defaults to "regular"
|
383
|
+
nil
|
384
|
+
elsif col == '?'
|
385
|
+
## note: default explicit unknown to unknown for now AND not regular - why? why not?
|
386
|
+
'?' ## todo/check: use unkown and NOT ? - why? why not?
|
387
|
+
else
|
388
|
+
col
|
389
|
+
end
|
390
|
+
end
|
391
|
+
|
392
|
+
group = nil
|
393
|
+
if headers_mapping[ :group ]
|
394
|
+
col = row[ headers_mapping[ :group ]]
|
395
|
+
## todo/fix: check can col be nil e.g. col.nil? possible?
|
396
|
+
group = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
397
|
+
## note: allow missing stage for match / defaults to "regular"
|
398
|
+
nil
|
399
|
+
else
|
400
|
+
col
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
status = nil ## e.g. AWARDED, CANCELLED, POSTPONED, etc.
|
405
|
+
if headers_mapping[ :notes ]
|
406
|
+
col = row[ headers_mapping[ :notes ]]
|
407
|
+
## check for optional (match) status in notes / comments
|
408
|
+
status = if col.nil? || col.empty? || col == '-' || col == 'n/a'
|
409
|
+
nil
|
410
|
+
else
|
411
|
+
StatusParser.parse( col ) # note: returns nil if no (match) status found
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
|
416
|
+
league = nil
|
417
|
+
league = row[ headers_mapping[ :league ]] if headers_mapping[ :league ]
|
418
|
+
|
419
|
+
|
420
|
+
## puts 'match attributes:'
|
421
|
+
attributes = {
|
422
|
+
date: date,
|
423
|
+
time: time,
|
424
|
+
team1: team1, team2: team2,
|
425
|
+
score1: score1, score2: score2,
|
426
|
+
score1i: score1i, score2i: score2i,
|
427
|
+
score1et: score1et, score2et: score2et,
|
428
|
+
score1p: score1p, score2p: score2p,
|
429
|
+
round: round,
|
430
|
+
stage: stage,
|
431
|
+
group: group,
|
432
|
+
status: status,
|
433
|
+
league: league
|
434
|
+
}
|
435
|
+
## pp attributes
|
436
|
+
|
437
|
+
match = Sports::Match.new( **attributes )
|
438
|
+
matches << match
|
439
|
+
end
|
440
|
+
|
441
|
+
## pp matches
|
442
|
+
matches
|
443
|
+
end
|
444
|
+
|
445
|
+
|
446
|
+
private
|
447
|
+
|
448
|
+
def find_header( headers, candidates )
|
449
|
+
## todo/fix: use find_first from enumare of similar ?! - why? more idiomatic code?
|
450
|
+
|
451
|
+
candidates.each do |candidate|
|
452
|
+
return candidate if headers.include?( candidate ) ## bingo!!!
|
453
|
+
end
|
454
|
+
nil ## no matching header found!!!
|
455
|
+
end
|
456
|
+
|
457
|
+
########
|
458
|
+
# more helpers
|
459
|
+
#
|
460
|
+
|
461
|
+
def parse_score( str )
|
462
|
+
if str.nil? ## todo/check: remove nil case - possible? - why? why not?
|
463
|
+
[nil,nil]
|
464
|
+
else
|
465
|
+
## remove (optional single) note/footnote/endnote markers
|
466
|
+
## e.g. (*) or (a), (b),
|
467
|
+
## or [*], [A], [1], etc.
|
468
|
+
## - allow (1) or maybe (*1) in the future - why? why not?
|
469
|
+
str = str.sub( /\( [a-z*] \)
|
470
|
+
|
|
471
|
+
\[ [1-9a-z*] \]
|
472
|
+
/ix, '' ).strip
|
473
|
+
|
474
|
+
if str.empty? || str == '?' || str == '-' || str == 'n/a'
|
475
|
+
[nil,nil]
|
476
|
+
### todo/check: use regex with named capture groups here - why? why not?
|
477
|
+
elsif str =~ /^\d{1,2}[:-]\d{1,2}$/ ## sanity check scores format
|
478
|
+
score = str.split( /[:-]/ )
|
479
|
+
[score[0].to_i, score[1].to_i]
|
480
|
+
else
|
481
|
+
nil ## note: returns nil if invalid / unparseable format!!!
|
482
|
+
end
|
483
|
+
end
|
484
|
+
end # method parse_score
|
485
|
+
|
486
|
+
|
487
|
+
|
488
|
+
end # class CsvMatchParser
|
489
|
+
end # module Sports
|
490
|
+
|