sportdb-formats 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -13
- data/Rakefile +1 -1
- data/lib/sportdb/formats.rb +5 -0
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +9 -11
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +4 -1
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/match_parser.rb +27 -15
- data/lib/sportdb/formats/match/match_parser_csv.rb +148 -21
- data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
- data/lib/sportdb/formats/name_helper.rb +4 -1
- data/lib/sportdb/formats/package.rb +30 -8
- data/lib/sportdb/formats/score/score_formats.rb +19 -0
- data/lib/sportdb/formats/score/score_parser.rb +4 -2
- data/lib/sportdb/formats/structs/match.rb +2 -0
- data/lib/sportdb/formats/structs/team.rb +7 -0
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +138 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/helper.rb +47 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_datafile_package.rb +1 -1
- data/test/test_match_status_parser.rb +49 -0
- data/test/test_scores.rb +2 -0
- metadata +10 -17
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_relegation.rb +0 -41
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_start_date.rb +0 -44
- data/test/test_match_worldcup.rb +0 -27
@@ -0,0 +1,203 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class ClubHistoryReader
|
9
|
+
|
10
|
+
def catalog() Import.catalog; end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
15
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
16
|
+
parse( txt )
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse( txt )
|
20
|
+
new( txt ).parse
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize( txt )
|
24
|
+
@txt = txt
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
###
|
29
|
+
## RENAME/RENAMED
|
30
|
+
## MOVE/MOVED
|
31
|
+
## BANKRUPT/BANKRUPTED
|
32
|
+
## REFORM/REFORMED
|
33
|
+
## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
|
34
|
+
|
35
|
+
|
36
|
+
KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
|
37
|
+
MOVED?|
|
38
|
+
BANKRUPT(?:ED)?|
|
39
|
+
REFORM(?:ED)?|
|
40
|
+
MERGED?
|
41
|
+
)
|
42
|
+
[ ]+
|
43
|
+
(?<text>.*) # rest of text
|
44
|
+
$
|
45
|
+
}x
|
46
|
+
|
47
|
+
|
48
|
+
def parse
|
49
|
+
recs = []
|
50
|
+
last_rec = nil
|
51
|
+
|
52
|
+
last_country = nil
|
53
|
+
last_season = nil
|
54
|
+
last_keyword = nil
|
55
|
+
last_teams = []
|
56
|
+
|
57
|
+
OutlineReader.parse( @txt ).each do |node|
|
58
|
+
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
59
|
+
heading_level = node[0][1].to_i
|
60
|
+
heading = node[1]
|
61
|
+
|
62
|
+
puts "heading #{heading_level} >#{heading}<"
|
63
|
+
|
64
|
+
|
65
|
+
if heading_level == 1
|
66
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
67
|
+
## Österreich • Austria (at)
|
68
|
+
## Österreich • Austria
|
69
|
+
## Austria
|
70
|
+
## Deutschland (de) • Germany
|
71
|
+
country = catalog.countries.parse( heading )
|
72
|
+
## check country code - MUST exist for now!!!!
|
73
|
+
if country.nil?
|
74
|
+
puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
puts " country >#{heading}< => #{country.name}, #{country.key}"
|
78
|
+
last_country = country
|
79
|
+
last_season = nil ## reset "lower levels" - season & keyword
|
80
|
+
last_keyword = nil
|
81
|
+
elsif heading_level == 2
|
82
|
+
## assume season
|
83
|
+
season = Season.new( heading )
|
84
|
+
puts " season >#{heading}< => #{season.key}"
|
85
|
+
last_season = season ## reset "lowwer levels" - keyword
|
86
|
+
last_keyword = nil
|
87
|
+
else
|
88
|
+
puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
|
89
|
+
exit 1
|
90
|
+
end
|
91
|
+
|
92
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
93
|
+
if last_country.nil?
|
94
|
+
puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
|
95
|
+
exit 1
|
96
|
+
end
|
97
|
+
if last_season.nil?
|
98
|
+
puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
|
99
|
+
exit 1
|
100
|
+
end
|
101
|
+
|
102
|
+
lines = node[1]
|
103
|
+
lines.each do |line|
|
104
|
+
if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
|
105
|
+
keyword = m[:keyword]
|
106
|
+
line = m[:text].strip
|
107
|
+
|
108
|
+
puts " keyword #{keyword}"
|
109
|
+
last_keyword = case keyword ## "normalize" keywords
|
110
|
+
when 'BANKRUPT', 'BANKRUPTED'
|
111
|
+
'BANKRUPT'
|
112
|
+
when 'RENAME', 'RENAMED'
|
113
|
+
'RENAME'
|
114
|
+
when 'REFORM', 'REFORMED'
|
115
|
+
'REFORM'
|
116
|
+
when 'MOVE', 'MOVED'
|
117
|
+
'MOVE'
|
118
|
+
when 'MERGE', 'MERGED'
|
119
|
+
'MERGE'
|
120
|
+
else
|
121
|
+
puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
|
122
|
+
exit 1
|
123
|
+
end
|
124
|
+
|
125
|
+
last_teams = []
|
126
|
+
end
|
127
|
+
|
128
|
+
if last_keyword.nil?
|
129
|
+
puts "!!! ERROR [club history reader] - line with keyword expected - got:"
|
130
|
+
puts line
|
131
|
+
exit 1
|
132
|
+
end
|
133
|
+
|
134
|
+
if last_keyword == 'BANKRUPT'
|
135
|
+
## requires / expects one team in one line
|
136
|
+
recs << [ last_keyword, last_season.key,
|
137
|
+
[ squish(line), last_country.key ]
|
138
|
+
]
|
139
|
+
elsif last_keyword == 'RENAME' ||
|
140
|
+
last_keyword == 'REFORM' ||
|
141
|
+
last_keyword == 'MOVE'
|
142
|
+
## requires / expects two teams in one line (separated by ⇒ or such)
|
143
|
+
teams = line.split( '⇒' )
|
144
|
+
if teams.size != 2
|
145
|
+
puts "!!! ERROR [club history reader] - expected two teams - got:"
|
146
|
+
pp teams
|
147
|
+
exit 1
|
148
|
+
end
|
149
|
+
teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
|
150
|
+
recs << [ last_keyword, last_season.key,
|
151
|
+
[ teams[0], last_country.key ],
|
152
|
+
[ teams[1], last_country.key ]
|
153
|
+
]
|
154
|
+
elsif last_keyword == 'MERGE'
|
155
|
+
## check if line starts with separator
|
156
|
+
## otherwise collect to be merged teams
|
157
|
+
if line.start_with?( '⇒' )
|
158
|
+
if last_teams.size < 2
|
159
|
+
puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
|
160
|
+
pp last_teams
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
## auto-add country to all teams
|
164
|
+
teams = last_teams.map {|team| [team, last_country.key]}
|
165
|
+
recs << [ last_keyword, last_season.key,
|
166
|
+
teams,
|
167
|
+
[ squish(line.sub('⇒','').strip), last_country.key ]
|
168
|
+
]
|
169
|
+
|
170
|
+
last_teams = []
|
171
|
+
else
|
172
|
+
last_teams << squish(line)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
|
176
|
+
exit 1
|
177
|
+
end
|
178
|
+
end # each line (in paragraph)
|
179
|
+
else
|
180
|
+
puts "** !!! ERROR [club history reader] - unknown line type:"
|
181
|
+
pp node
|
182
|
+
exit 1
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
recs
|
187
|
+
end # method read
|
188
|
+
|
189
|
+
|
190
|
+
###############
|
191
|
+
## helper
|
192
|
+
|
193
|
+
def squish( str )
|
194
|
+
## colapse all whitespace to one
|
195
|
+
str.gsub( /[ ]+/,' ' )
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
end # class ClubHistoryReader
|
200
|
+
|
201
|
+
|
202
|
+
end ## module Import
|
203
|
+
end ## module SportDb
|
@@ -36,17 +36,16 @@ class ClubPropsReader
|
|
36
36
|
|
37
37
|
## find / match club by (canocial) name
|
38
38
|
m = catalog.clubs.match( name )
|
39
|
-
if m
|
39
|
+
if m.size > 1
|
40
40
|
puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
|
41
41
|
pp rec
|
42
42
|
pp m
|
43
43
|
|
44
44
|
## todo/fix: try filter by canonical name if more than one match
|
45
45
|
m = m.select { |club| club.name == name }
|
46
|
-
m = nil if m.empty? ## note: reset to nil if no more matches
|
47
46
|
end
|
48
47
|
|
49
|
-
if m.
|
48
|
+
if m.empty?
|
50
49
|
puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
|
51
50
|
pp rec
|
52
51
|
exit 1
|
data/test/helper.rb
CHANGED
@@ -55,12 +55,56 @@ TXT
|
|
55
55
|
= England
|
56
56
|
|
57
57
|
Chelsea FC
|
58
|
-
Arsenal FC
|
59
58
|
Tottenham Hotspur
|
60
59
|
West Ham United
|
61
60
|
Crystal Palace
|
62
|
-
|
63
|
-
|
61
|
+
|
62
|
+
### note add move entires for testing club name history
|
63
|
+
Manchester United FC
|
64
|
+
| Manchester United
|
65
|
+
| Newton Heath FC
|
66
|
+
|
67
|
+
Manchester City FC
|
68
|
+
| Manchester City
|
69
|
+
| Ardwick FC
|
70
|
+
|
71
|
+
Arsenal FC
|
72
|
+
| The Arsenal FC
|
73
|
+
| Woolwich Arsenal FC
|
74
|
+
| Royal Arsenal FC
|
75
|
+
|
76
|
+
Gateshead FC
|
77
|
+
| South Shields FC
|
78
|
+
|
79
|
+
Sheffield Wednesday
|
80
|
+
| The Wednesday FC
|
81
|
+
|
82
|
+
Port Vale FC
|
83
|
+
| Burslem Port Vale FC
|
84
|
+
|
85
|
+
Chesterfield FC
|
86
|
+
| Chesterfield Town FC
|
87
|
+
|
88
|
+
Birmingham FC
|
89
|
+
| Small Heath FC
|
90
|
+
|
91
|
+
Burton Swifts FC
|
92
|
+
Burton Wanderers FC
|
93
|
+
Burton United FC
|
94
|
+
|
95
|
+
Blackpool FC
|
96
|
+
South Shore FC
|
97
|
+
|
98
|
+
Glossop FC
|
99
|
+
| Glossop North End FC
|
100
|
+
|
101
|
+
Walsall FC
|
102
|
+
| Walsall Town Swifts FC
|
103
|
+
|
104
|
+
|
105
|
+
Newcastle West End FC
|
106
|
+
Newcastle East End FC
|
107
|
+
Newcastle United FC
|
64
108
|
TXT
|
65
109
|
|
66
110
|
index = ClubIndex.new
|
@@ -83,81 +127,3 @@ end # module SportDb
|
|
83
127
|
|
84
128
|
|
85
129
|
|
86
|
-
################
|
87
|
-
## helper
|
88
|
-
|
89
|
-
def parse_auto_conf( txt, lang: 'en', start: nil )
|
90
|
-
start = start ? start : Date.new( 2017, 7, 1 )
|
91
|
-
|
92
|
-
SportDb::Import.config.lang = lang
|
93
|
-
|
94
|
-
parser = SportDb::AutoConfParser.new( txt, start )
|
95
|
-
parser.parse
|
96
|
-
end
|
97
|
-
|
98
|
-
def parse_conf( txt )
|
99
|
-
parser = SportDb::ConfParser.new( txt )
|
100
|
-
parser.parse
|
101
|
-
end
|
102
|
-
|
103
|
-
|
104
|
-
## note: json always returns hash tables with string keys (not symbols),
|
105
|
-
## thus, always stringify keys before comparing!!!!
|
106
|
-
class Object
|
107
|
-
def deep_stringify_keys
|
108
|
-
if self.is_a? Hash
|
109
|
-
self.reduce({}) {|memo,(k,v)| memo[k.to_s] = v.deep_stringify_keys; memo }
|
110
|
-
elsif self.is_a? Array
|
111
|
-
self.reduce([]) {|memo,v | memo << v.deep_stringify_keys; memo }
|
112
|
-
else
|
113
|
-
self
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
|
119
|
-
def read_blocks( path )
|
120
|
-
txt = File.open( path, 'r:utf-8' ).read
|
121
|
-
|
122
|
-
blocks = []
|
123
|
-
buf = String.new('')
|
124
|
-
txt.each_line do |line|
|
125
|
-
if line =~ /^[ ]*
|
126
|
-
([>]{3,} |
|
127
|
-
[<]{3,})
|
128
|
-
[ ]*
|
129
|
-
$/x ## three or more markers
|
130
|
-
blocks << buf
|
131
|
-
buf = String.new('')
|
132
|
-
else
|
133
|
-
buf << line
|
134
|
-
end
|
135
|
-
end
|
136
|
-
blocks << buf
|
137
|
-
blocks
|
138
|
-
end
|
139
|
-
|
140
|
-
|
141
|
-
def parse_json( str )
|
142
|
-
## note: allow empty string; fall back to empty hash
|
143
|
-
if str.strip.empty?
|
144
|
-
{}
|
145
|
-
else
|
146
|
-
JSON.parse( str )
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
def read_test( path )
|
151
|
-
blocks = read_blocks( "#{SportDb::Test.data_dir}/football.txt/#{path}" )
|
152
|
-
|
153
|
-
if blocks.size == 2
|
154
|
-
[blocks[0], parse_json( blocks[1] )]
|
155
|
-
elsif blocks.size == 3
|
156
|
-
## note: returned in different order
|
157
|
-
## optional option block that comes first returned last!
|
158
|
-
[blocks[1], parse_json( blocks[2] ), blocks[0]]
|
159
|
-
else
|
160
|
-
puts "!! ERROR: expected two or three text blocks in >#{path}<; got #{blocks.size}"
|
161
|
-
exit 1
|
162
|
-
end
|
163
|
-
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_club_index_history.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestClubHistoryIndex < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
txt =<<TXT
|
14
|
+
= England
|
15
|
+
|
16
|
+
## note: use history log by season (instead of by year) - why? why not?
|
17
|
+
##
|
18
|
+
## note/warn/remember !! a line starting with arrow (=>)
|
19
|
+
## will get turned into a heading 1!!!
|
20
|
+
## as an ascii-alternative to ⇒ use >> or -> or ??? - why? why not?
|
21
|
+
|
22
|
+
== 1930/1
|
23
|
+
MOVE South Shields FC, South Shields ⇒ Gateshead FC, Gateshead
|
24
|
+
|
25
|
+
|
26
|
+
== 1929/30
|
27
|
+
RENAME The Wednesday FC, Sheffield ⇒ Sheffield Wednesday
|
28
|
+
|
29
|
+
== 1927/8
|
30
|
+
RENAME The Arsenal FC, London ⇒ Arsenal FC
|
31
|
+
|
32
|
+
|
33
|
+
== 1914/5
|
34
|
+
RENAME Woolwich Arsenal FC, London ⇒ The Arsenal FC
|
35
|
+
|
36
|
+
|
37
|
+
== 1911/2
|
38
|
+
REFORM Burslem Port Vale FC, Burslem ⇒ Port Vale FC, Stoke-on-Trent
|
39
|
+
## the towns of Burslem having been merged in 1910 with the towns of Fenton, Hanley,
|
40
|
+
## Longton, Stoke-upon-Trent and Tunstall as the city of Stoke-on-Trent
|
41
|
+
|
42
|
+
|
43
|
+
== 1909/10
|
44
|
+
RENAME Chesterfield Town FC, Chesterfield ⇒ Chesterfield FC
|
45
|
+
|
46
|
+
|
47
|
+
== 1905/6
|
48
|
+
RENAME Chesterfield FC, Chesterfield ⇒ Chesterfield Town FC
|
49
|
+
Small Heath FC, Birmingham ⇒ Birmingham FC
|
50
|
+
|
51
|
+
== 1902/3
|
52
|
+
REFORM Newton Heath FC, Manchester ⇒ Manchester United
|
53
|
+
|
54
|
+
== 1901/2
|
55
|
+
MERGE Burton Swifts FC, Burton-upon-Trent
|
56
|
+
Burton Wanderers FC, Burton-upon-Trent
|
57
|
+
⇒ Burton United FC
|
58
|
+
|
59
|
+
BANKRUPT Newton Heath FC, Manchester
|
60
|
+
|
61
|
+
|
62
|
+
== 1899/00
|
63
|
+
MERGE Blackpool FC, Blackpool
|
64
|
+
South Shore FC, Blackpool
|
65
|
+
⇒ Blackpool FC
|
66
|
+
|
67
|
+
== 1898/9
|
68
|
+
RENAME Glossop North End FC, Glossop ⇒ Glossop FC
|
69
|
+
|
70
|
+
|
71
|
+
== 1895/6
|
72
|
+
RENAME Walsall Town Swifts FC, Walsall ⇒ Walsall FC
|
73
|
+
|
74
|
+
|
75
|
+
== 1894/5
|
76
|
+
REFORM Ardwick FC, Manchester ⇒ Manchester City FC
|
77
|
+
|
78
|
+
== 1893/4
|
79
|
+
BANKRUPT Ardwick FC, Manchester
|
80
|
+
|
81
|
+
MERGE Newcastle West End FC, Newcastle-upon-Tyne
|
82
|
+
Newcastle East End FC, Newcastle-upon-Tyne
|
83
|
+
⇒ Newcastle United FC
|
84
|
+
|
85
|
+
== 1892/3
|
86
|
+
RENAME Royal Arsenal FC, London ⇒ Woolwich Arsenal FC
|
87
|
+
TXT
|
88
|
+
|
89
|
+
recs = SportDb::Import::ClubHistoryReader.parse( txt )
|
90
|
+
|
91
|
+
history = SportDb::Import::ClubHistoryIndex.new
|
92
|
+
history.add( recs )
|
93
|
+
|
94
|
+
pp history.errors
|
95
|
+
pp history.mappings
|
96
|
+
|
97
|
+
# [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
|
98
|
+
# [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
|
99
|
+
# [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
|
100
|
+
assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '2000/1' )
|
101
|
+
assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1927/8' )
|
102
|
+
assert_equal 'The Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1926/7' )
|
103
|
+
assert_equal 'Woolwich Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1913/4' )
|
104
|
+
assert_equal 'Royal Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1891/2' )
|
105
|
+
end
|
106
|
+
|
107
|
+
end # class TestClubHistoryIndex
|