sportdb-formats 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -13
- data/Rakefile +1 -1
- data/lib/sportdb/formats.rb +5 -0
- data/lib/sportdb/formats/country/country_index.rb +2 -2
- data/lib/sportdb/formats/event/event_index.rb +9 -11
- data/lib/sportdb/formats/league/league_index.rb +22 -18
- data/lib/sportdb/formats/league/league_outline_reader.rb +4 -1
- data/lib/sportdb/formats/league/league_reader.rb +7 -1
- data/lib/sportdb/formats/match/match_parser.rb +27 -15
- data/lib/sportdb/formats/match/match_parser_csv.rb +148 -21
- data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
- data/lib/sportdb/formats/name_helper.rb +4 -1
- data/lib/sportdb/formats/package.rb +30 -8
- data/lib/sportdb/formats/score/score_formats.rb +19 -0
- data/lib/sportdb/formats/score/score_parser.rb +4 -2
- data/lib/sportdb/formats/structs/match.rb +2 -0
- data/lib/sportdb/formats/structs/team.rb +7 -0
- data/lib/sportdb/formats/team/club_index.rb +13 -11
- data/lib/sportdb/formats/team/club_index_history.rb +138 -0
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
- data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/helper.rb +47 -81
- data/test/test_club_index_history.rb +107 -0
- data/test/test_club_reader_history.rb +212 -0
- data/test/test_datafile_package.rb +1 -1
- data/test/test_match_status_parser.rb +49 -0
- data/test/test_scores.rb +2 -0
- metadata +10 -17
- data/test/test_conf.rb +0 -65
- data/test/test_csv_match_parser.rb +0 -114
- data/test/test_csv_match_parser_utils.rb +0 -20
- data/test/test_match_auto.rb +0 -72
- data/test/test_match_auto_champs.rb +0 -45
- data/test/test_match_auto_euro.rb +0 -37
- data/test/test_match_auto_relegation.rb +0 -41
- data/test/test_match_auto_worldcup.rb +0 -61
- data/test/test_match_champs.rb +0 -27
- data/test/test_match_eng.rb +0 -26
- data/test/test_match_euro.rb +0 -27
- data/test/test_match_start_date.rb +0 -44
- data/test/test_match_worldcup.rb +0 -27
@@ -0,0 +1,203 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class ClubHistoryReader
|
9
|
+
|
10
|
+
def catalog() Import.catalog; end
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
15
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
16
|
+
parse( txt )
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.parse( txt )
|
20
|
+
new( txt ).parse
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize( txt )
|
24
|
+
@txt = txt
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
###
|
29
|
+
## RENAME/RENAMED
|
30
|
+
## MOVE/MOVED
|
31
|
+
## BANKRUPT/BANKRUPTED
|
32
|
+
## REFORM/REFORMED
|
33
|
+
## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
|
34
|
+
|
35
|
+
|
36
|
+
KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
|
37
|
+
MOVED?|
|
38
|
+
BANKRUPT(?:ED)?|
|
39
|
+
REFORM(?:ED)?|
|
40
|
+
MERGED?
|
41
|
+
)
|
42
|
+
[ ]+
|
43
|
+
(?<text>.*) # rest of text
|
44
|
+
$
|
45
|
+
}x
|
46
|
+
|
47
|
+
|
48
|
+
def parse
|
49
|
+
recs = []
|
50
|
+
last_rec = nil
|
51
|
+
|
52
|
+
last_country = nil
|
53
|
+
last_season = nil
|
54
|
+
last_keyword = nil
|
55
|
+
last_teams = []
|
56
|
+
|
57
|
+
OutlineReader.parse( @txt ).each do |node|
|
58
|
+
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
59
|
+
heading_level = node[0][1].to_i
|
60
|
+
heading = node[1]
|
61
|
+
|
62
|
+
puts "heading #{heading_level} >#{heading}<"
|
63
|
+
|
64
|
+
|
65
|
+
if heading_level == 1
|
66
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
67
|
+
## Österreich • Austria (at)
|
68
|
+
## Österreich • Austria
|
69
|
+
## Austria
|
70
|
+
## Deutschland (de) • Germany
|
71
|
+
country = catalog.countries.parse( heading )
|
72
|
+
## check country code - MUST exist for now!!!!
|
73
|
+
if country.nil?
|
74
|
+
puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
75
|
+
exit 1
|
76
|
+
end
|
77
|
+
puts " country >#{heading}< => #{country.name}, #{country.key}"
|
78
|
+
last_country = country
|
79
|
+
last_season = nil ## reset "lower levels" - season & keyword
|
80
|
+
last_keyword = nil
|
81
|
+
elsif heading_level == 2
|
82
|
+
## assume season
|
83
|
+
season = Season.new( heading )
|
84
|
+
puts " season >#{heading}< => #{season.key}"
|
85
|
+
last_season = season ## reset "lowwer levels" - keyword
|
86
|
+
last_keyword = nil
|
87
|
+
else
|
88
|
+
puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
|
89
|
+
exit 1
|
90
|
+
end
|
91
|
+
|
92
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
93
|
+
if last_country.nil?
|
94
|
+
puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
|
95
|
+
exit 1
|
96
|
+
end
|
97
|
+
if last_season.nil?
|
98
|
+
puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
|
99
|
+
exit 1
|
100
|
+
end
|
101
|
+
|
102
|
+
lines = node[1]
|
103
|
+
lines.each do |line|
|
104
|
+
if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
|
105
|
+
keyword = m[:keyword]
|
106
|
+
line = m[:text].strip
|
107
|
+
|
108
|
+
puts " keyword #{keyword}"
|
109
|
+
last_keyword = case keyword ## "normalize" keywords
|
110
|
+
when 'BANKRUPT', 'BANKRUPTED'
|
111
|
+
'BANKRUPT'
|
112
|
+
when 'RENAME', 'RENAMED'
|
113
|
+
'RENAME'
|
114
|
+
when 'REFORM', 'REFORMED'
|
115
|
+
'REFORM'
|
116
|
+
when 'MOVE', 'MOVED'
|
117
|
+
'MOVE'
|
118
|
+
when 'MERGE', 'MERGED'
|
119
|
+
'MERGE'
|
120
|
+
else
|
121
|
+
puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
|
122
|
+
exit 1
|
123
|
+
end
|
124
|
+
|
125
|
+
last_teams = []
|
126
|
+
end
|
127
|
+
|
128
|
+
if last_keyword.nil?
|
129
|
+
puts "!!! ERROR [club history reader] - line with keyword expected - got:"
|
130
|
+
puts line
|
131
|
+
exit 1
|
132
|
+
end
|
133
|
+
|
134
|
+
if last_keyword == 'BANKRUPT'
|
135
|
+
## requires / expects one team in one line
|
136
|
+
recs << [ last_keyword, last_season.key,
|
137
|
+
[ squish(line), last_country.key ]
|
138
|
+
]
|
139
|
+
elsif last_keyword == 'RENAME' ||
|
140
|
+
last_keyword == 'REFORM' ||
|
141
|
+
last_keyword == 'MOVE'
|
142
|
+
## requires / expects two teams in one line (separated by ⇒ or such)
|
143
|
+
teams = line.split( '⇒' )
|
144
|
+
if teams.size != 2
|
145
|
+
puts "!!! ERROR [club history reader] - expected two teams - got:"
|
146
|
+
pp teams
|
147
|
+
exit 1
|
148
|
+
end
|
149
|
+
teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
|
150
|
+
recs << [ last_keyword, last_season.key,
|
151
|
+
[ teams[0], last_country.key ],
|
152
|
+
[ teams[1], last_country.key ]
|
153
|
+
]
|
154
|
+
elsif last_keyword == 'MERGE'
|
155
|
+
## check if line starts with separator
|
156
|
+
## otherwise collect to be merged teams
|
157
|
+
if line.start_with?( '⇒' )
|
158
|
+
if last_teams.size < 2
|
159
|
+
puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
|
160
|
+
pp last_teams
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
## auto-add country to all teams
|
164
|
+
teams = last_teams.map {|team| [team, last_country.key]}
|
165
|
+
recs << [ last_keyword, last_season.key,
|
166
|
+
teams,
|
167
|
+
[ squish(line.sub('⇒','').strip), last_country.key ]
|
168
|
+
]
|
169
|
+
|
170
|
+
last_teams = []
|
171
|
+
else
|
172
|
+
last_teams << squish(line)
|
173
|
+
end
|
174
|
+
else
|
175
|
+
puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
|
176
|
+
exit 1
|
177
|
+
end
|
178
|
+
end # each line (in paragraph)
|
179
|
+
else
|
180
|
+
puts "** !!! ERROR [club history reader] - unknown line type:"
|
181
|
+
pp node
|
182
|
+
exit 1
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
recs
|
187
|
+
end # method read
|
188
|
+
|
189
|
+
|
190
|
+
###############
|
191
|
+
## helper
|
192
|
+
|
193
|
+
def squish( str )
|
194
|
+
## colapse all whitespace to one
|
195
|
+
str.gsub( /[ ]+/,' ' )
|
196
|
+
end
|
197
|
+
|
198
|
+
|
199
|
+
end # class ClubHistoryReader
|
200
|
+
|
201
|
+
|
202
|
+
end ## module Import
|
203
|
+
end ## module SportDb
|
@@ -36,17 +36,16 @@ class ClubPropsReader
|
|
36
36
|
|
37
37
|
## find / match club by (canocial) name
|
38
38
|
m = catalog.clubs.match( name )
|
39
|
-
if m
|
39
|
+
if m.size > 1
|
40
40
|
puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
|
41
41
|
pp rec
|
42
42
|
pp m
|
43
43
|
|
44
44
|
## todo/fix: try filter by canonical name if more than one match
|
45
45
|
m = m.select { |club| club.name == name }
|
46
|
-
m = nil if m.empty? ## note: reset to nil if no more matches
|
47
46
|
end
|
48
47
|
|
49
|
-
if m.
|
48
|
+
if m.empty?
|
50
49
|
puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
|
51
50
|
pp rec
|
52
51
|
exit 1
|
data/test/helper.rb
CHANGED
@@ -55,12 +55,56 @@ TXT
|
|
55
55
|
= England
|
56
56
|
|
57
57
|
Chelsea FC
|
58
|
-
Arsenal FC
|
59
58
|
Tottenham Hotspur
|
60
59
|
West Ham United
|
61
60
|
Crystal Palace
|
62
|
-
|
63
|
-
|
61
|
+
|
62
|
+
### note add move entires for testing club name history
|
63
|
+
Manchester United FC
|
64
|
+
| Manchester United
|
65
|
+
| Newton Heath FC
|
66
|
+
|
67
|
+
Manchester City FC
|
68
|
+
| Manchester City
|
69
|
+
| Ardwick FC
|
70
|
+
|
71
|
+
Arsenal FC
|
72
|
+
| The Arsenal FC
|
73
|
+
| Woolwich Arsenal FC
|
74
|
+
| Royal Arsenal FC
|
75
|
+
|
76
|
+
Gateshead FC
|
77
|
+
| South Shields FC
|
78
|
+
|
79
|
+
Sheffield Wednesday
|
80
|
+
| The Wednesday FC
|
81
|
+
|
82
|
+
Port Vale FC
|
83
|
+
| Burslem Port Vale FC
|
84
|
+
|
85
|
+
Chesterfield FC
|
86
|
+
| Chesterfield Town FC
|
87
|
+
|
88
|
+
Birmingham FC
|
89
|
+
| Small Heath FC
|
90
|
+
|
91
|
+
Burton Swifts FC
|
92
|
+
Burton Wanderers FC
|
93
|
+
Burton United FC
|
94
|
+
|
95
|
+
Blackpool FC
|
96
|
+
South Shore FC
|
97
|
+
|
98
|
+
Glossop FC
|
99
|
+
| Glossop North End FC
|
100
|
+
|
101
|
+
Walsall FC
|
102
|
+
| Walsall Town Swifts FC
|
103
|
+
|
104
|
+
|
105
|
+
Newcastle West End FC
|
106
|
+
Newcastle East End FC
|
107
|
+
Newcastle United FC
|
64
108
|
TXT
|
65
109
|
|
66
110
|
index = ClubIndex.new
|
@@ -83,81 +127,3 @@ end # module SportDb
|
|
83
127
|
|
84
128
|
|
85
129
|
|
86
|
-
################
|
87
|
-
## helper
|
88
|
-
|
89
|
-
def parse_auto_conf( txt, lang: 'en', start: nil )
|
90
|
-
start = start ? start : Date.new( 2017, 7, 1 )
|
91
|
-
|
92
|
-
SportDb::Import.config.lang = lang
|
93
|
-
|
94
|
-
parser = SportDb::AutoConfParser.new( txt, start )
|
95
|
-
parser.parse
|
96
|
-
end
|
97
|
-
|
98
|
-
def parse_conf( txt )
|
99
|
-
parser = SportDb::ConfParser.new( txt )
|
100
|
-
parser.parse
|
101
|
-
end
|
102
|
-
|
103
|
-
|
104
|
-
## note: json always returns hash tables with string keys (not symbols),
|
105
|
-
## thus, always stringify keys before comparing!!!!
|
106
|
-
class Object
|
107
|
-
def deep_stringify_keys
|
108
|
-
if self.is_a? Hash
|
109
|
-
self.reduce({}) {|memo,(k,v)| memo[k.to_s] = v.deep_stringify_keys; memo }
|
110
|
-
elsif self.is_a? Array
|
111
|
-
self.reduce([]) {|memo,v | memo << v.deep_stringify_keys; memo }
|
112
|
-
else
|
113
|
-
self
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
|
119
|
-
def read_blocks( path )
|
120
|
-
txt = File.open( path, 'r:utf-8' ).read
|
121
|
-
|
122
|
-
blocks = []
|
123
|
-
buf = String.new('')
|
124
|
-
txt.each_line do |line|
|
125
|
-
if line =~ /^[ ]*
|
126
|
-
([>]{3,} |
|
127
|
-
[<]{3,})
|
128
|
-
[ ]*
|
129
|
-
$/x ## three or more markers
|
130
|
-
blocks << buf
|
131
|
-
buf = String.new('')
|
132
|
-
else
|
133
|
-
buf << line
|
134
|
-
end
|
135
|
-
end
|
136
|
-
blocks << buf
|
137
|
-
blocks
|
138
|
-
end
|
139
|
-
|
140
|
-
|
141
|
-
def parse_json( str )
|
142
|
-
## note: allow empty string; fall back to empty hash
|
143
|
-
if str.strip.empty?
|
144
|
-
{}
|
145
|
-
else
|
146
|
-
JSON.parse( str )
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
def read_test( path )
|
151
|
-
blocks = read_blocks( "#{SportDb::Test.data_dir}/football.txt/#{path}" )
|
152
|
-
|
153
|
-
if blocks.size == 2
|
154
|
-
[blocks[0], parse_json( blocks[1] )]
|
155
|
-
elsif blocks.size == 3
|
156
|
-
## note: returned in different order
|
157
|
-
## optional option block that comes first returned last!
|
158
|
-
[blocks[1], parse_json( blocks[2] ), blocks[0]]
|
159
|
-
else
|
160
|
-
puts "!! ERROR: expected two or three text blocks in >#{path}<; got #{blocks.size}"
|
161
|
-
exit 1
|
162
|
-
end
|
163
|
-
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_club_index_history.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestClubHistoryIndex < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
txt =<<TXT
|
14
|
+
= England
|
15
|
+
|
16
|
+
## note: use history log by season (instead of by year) - why? why not?
|
17
|
+
##
|
18
|
+
## note/warn/remember !! a line starting with arrow (=>)
|
19
|
+
## will get turned into a heading 1!!!
|
20
|
+
## as an ascii-alternative to ⇒ use >> or -> or ??? - why? why not?
|
21
|
+
|
22
|
+
== 1930/1
|
23
|
+
MOVE South Shields FC, South Shields ⇒ Gateshead FC, Gateshead
|
24
|
+
|
25
|
+
|
26
|
+
== 1929/30
|
27
|
+
RENAME The Wednesday FC, Sheffield ⇒ Sheffield Wednesday
|
28
|
+
|
29
|
+
== 1927/8
|
30
|
+
RENAME The Arsenal FC, London ⇒ Arsenal FC
|
31
|
+
|
32
|
+
|
33
|
+
== 1914/5
|
34
|
+
RENAME Woolwich Arsenal FC, London ⇒ The Arsenal FC
|
35
|
+
|
36
|
+
|
37
|
+
== 1911/2
|
38
|
+
REFORM Burslem Port Vale FC, Burslem ⇒ Port Vale FC, Stoke-on-Trent
|
39
|
+
## the towns of Burslem having been merged in 1910 with the towns of Fenton, Hanley,
|
40
|
+
## Longton, Stoke-upon-Trent and Tunstall as the city of Stoke-on-Trent
|
41
|
+
|
42
|
+
|
43
|
+
== 1909/10
|
44
|
+
RENAME Chesterfield Town FC, Chesterfield ⇒ Chesterfield FC
|
45
|
+
|
46
|
+
|
47
|
+
== 1905/6
|
48
|
+
RENAME Chesterfield FC, Chesterfield ⇒ Chesterfield Town FC
|
49
|
+
Small Heath FC, Birmingham ⇒ Birmingham FC
|
50
|
+
|
51
|
+
== 1902/3
|
52
|
+
REFORM Newton Heath FC, Manchester ⇒ Manchester United
|
53
|
+
|
54
|
+
== 1901/2
|
55
|
+
MERGE Burton Swifts FC, Burton-upon-Trent
|
56
|
+
Burton Wanderers FC, Burton-upon-Trent
|
57
|
+
⇒ Burton United FC
|
58
|
+
|
59
|
+
BANKRUPT Newton Heath FC, Manchester
|
60
|
+
|
61
|
+
|
62
|
+
== 1899/00
|
63
|
+
MERGE Blackpool FC, Blackpool
|
64
|
+
South Shore FC, Blackpool
|
65
|
+
⇒ Blackpool FC
|
66
|
+
|
67
|
+
== 1898/9
|
68
|
+
RENAME Glossop North End FC, Glossop ⇒ Glossop FC
|
69
|
+
|
70
|
+
|
71
|
+
== 1895/6
|
72
|
+
RENAME Walsall Town Swifts FC, Walsall ⇒ Walsall FC
|
73
|
+
|
74
|
+
|
75
|
+
== 1894/5
|
76
|
+
REFORM Ardwick FC, Manchester ⇒ Manchester City FC
|
77
|
+
|
78
|
+
== 1893/4
|
79
|
+
BANKRUPT Ardwick FC, Manchester
|
80
|
+
|
81
|
+
MERGE Newcastle West End FC, Newcastle-upon-Tyne
|
82
|
+
Newcastle East End FC, Newcastle-upon-Tyne
|
83
|
+
⇒ Newcastle United FC
|
84
|
+
|
85
|
+
== 1892/3
|
86
|
+
RENAME Royal Arsenal FC, London ⇒ Woolwich Arsenal FC
|
87
|
+
TXT
|
88
|
+
|
89
|
+
recs = SportDb::Import::ClubHistoryReader.parse( txt )
|
90
|
+
|
91
|
+
history = SportDb::Import::ClubHistoryIndex.new
|
92
|
+
history.add( recs )
|
93
|
+
|
94
|
+
pp history.errors
|
95
|
+
pp history.mappings
|
96
|
+
|
97
|
+
# [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
|
98
|
+
# [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
|
99
|
+
# [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
|
100
|
+
assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '2000/1' )
|
101
|
+
assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1927/8' )
|
102
|
+
assert_equal 'The Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1926/7' )
|
103
|
+
assert_equal 'Woolwich Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1913/4' )
|
104
|
+
assert_equal 'Royal Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1891/2' )
|
105
|
+
end
|
106
|
+
|
107
|
+
end # class TestClubHistoryIndex
|