sportdb-formats 1.1.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -13
  3. data/Rakefile +1 -1
  4. data/lib/sportdb/formats.rb +5 -0
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +9 -11
  7. data/lib/sportdb/formats/league/league_index.rb +22 -18
  8. data/lib/sportdb/formats/league/league_outline_reader.rb +4 -1
  9. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  10. data/lib/sportdb/formats/match/match_parser.rb +27 -15
  11. data/lib/sportdb/formats/match/match_parser_csv.rb +148 -21
  12. data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
  13. data/lib/sportdb/formats/name_helper.rb +4 -1
  14. data/lib/sportdb/formats/package.rb +30 -8
  15. data/lib/sportdb/formats/score/score_formats.rb +19 -0
  16. data/lib/sportdb/formats/score/score_parser.rb +4 -2
  17. data/lib/sportdb/formats/structs/match.rb +2 -0
  18. data/lib/sportdb/formats/structs/team.rb +7 -0
  19. data/lib/sportdb/formats/team/club_index.rb +13 -11
  20. data/lib/sportdb/formats/team/club_index_history.rb +138 -0
  21. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  22. data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
  23. data/lib/sportdb/formats/version.rb +1 -1
  24. data/test/helper.rb +47 -81
  25. data/test/test_club_index_history.rb +107 -0
  26. data/test/test_club_reader_history.rb +212 -0
  27. data/test/test_datafile_package.rb +1 -1
  28. data/test/test_match_status_parser.rb +49 -0
  29. data/test/test_scores.rb +2 -0
  30. metadata +10 -17
  31. data/test/test_conf.rb +0 -65
  32. data/test/test_csv_match_parser.rb +0 -114
  33. data/test/test_csv_match_parser_utils.rb +0 -20
  34. data/test/test_match_auto.rb +0 -72
  35. data/test/test_match_auto_champs.rb +0 -45
  36. data/test/test_match_auto_euro.rb +0 -37
  37. data/test/test_match_auto_relegation.rb +0 -41
  38. data/test/test_match_auto_worldcup.rb +0 -61
  39. data/test/test_match_champs.rb +0 -27
  40. data/test/test_match_eng.rb +0 -26
  41. data/test/test_match_euro.rb +0 -27
  42. data/test/test_match_start_date.rb +0 -44
  43. data/test/test_match_worldcup.rb +0 -27
@@ -0,0 +1,203 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubHistoryReader
9
+
10
+ def catalog() Import.catalog; end
11
+
12
+
13
+
14
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
15
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
16
+ parse( txt )
17
+ end
18
+
19
+ def self.parse( txt )
20
+ new( txt ).parse
21
+ end
22
+
23
+ def initialize( txt )
24
+ @txt = txt
25
+ end
26
+
27
+
28
+ ###
29
+ ## RENAME/RENAMED
30
+ ## MOVE/MOVED
31
+ ## BANKRUPT/BANKRUPTED
32
+ ## REFORM/REFORMED
33
+ ## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
34
+
35
+
36
+ KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
37
+ MOVED?|
38
+ BANKRUPT(?:ED)?|
39
+ REFORM(?:ED)?|
40
+ MERGED?
41
+ )
42
+ [ ]+
43
+ (?<text>.*) # rest of text
44
+ $
45
+ }x
46
+
47
+
48
+ def parse
49
+ recs = []
50
+ last_rec = nil
51
+
52
+ last_country = nil
53
+ last_season = nil
54
+ last_keyword = nil
55
+ last_teams = []
56
+
57
+ OutlineReader.parse( @txt ).each do |node|
58
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
59
+ heading_level = node[0][1].to_i
60
+ heading = node[1]
61
+
62
+ puts "heading #{heading_level} >#{heading}<"
63
+
64
+
65
+ if heading_level == 1
66
+ ## assume country in heading; allow all "formats" supported by parse e.g.
67
+ ## Österreich • Austria (at)
68
+ ## Österreich • Austria
69
+ ## Austria
70
+ ## Deutschland (de) • Germany
71
+ country = catalog.countries.parse( heading )
72
+ ## check country code - MUST exist for now!!!!
73
+ if country.nil?
74
+ puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
75
+ exit 1
76
+ end
77
+ puts " country >#{heading}< => #{country.name}, #{country.key}"
78
+ last_country = country
79
+ last_season = nil ## reset "lower levels" - season & keyword
80
+ last_keyword = nil
81
+ elsif heading_level == 2
82
+ ## assume season
83
+ season = Season.new( heading )
84
+ puts " season >#{heading}< => #{season.key}"
85
+ last_season = season ## reset "lowwer levels" - keyword
86
+ last_keyword = nil
87
+ else
88
+ puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
89
+ exit 1
90
+ end
91
+
92
+ elsif node[0] == :p ## paragraph with (text) lines
93
+ if last_country.nil?
94
+ puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
95
+ exit 1
96
+ end
97
+ if last_season.nil?
98
+ puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
99
+ exit 1
100
+ end
101
+
102
+ lines = node[1]
103
+ lines.each do |line|
104
+ if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
105
+ keyword = m[:keyword]
106
+ line = m[:text].strip
107
+
108
+ puts " keyword #{keyword}"
109
+ last_keyword = case keyword ## "normalize" keywords
110
+ when 'BANKRUPT', 'BANKRUPTED'
111
+ 'BANKRUPT'
112
+ when 'RENAME', 'RENAMED'
113
+ 'RENAME'
114
+ when 'REFORM', 'REFORMED'
115
+ 'REFORM'
116
+ when 'MOVE', 'MOVED'
117
+ 'MOVE'
118
+ when 'MERGE', 'MERGED'
119
+ 'MERGE'
120
+ else
121
+ puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
122
+ exit 1
123
+ end
124
+
125
+ last_teams = []
126
+ end
127
+
128
+ if last_keyword.nil?
129
+ puts "!!! ERROR [club history reader] - line with keyword expected - got:"
130
+ puts line
131
+ exit 1
132
+ end
133
+
134
+ if last_keyword == 'BANKRUPT'
135
+ ## requires / expects one team in one line
136
+ recs << [ last_keyword, last_season.key,
137
+ [ squish(line), last_country.key ]
138
+ ]
139
+ elsif last_keyword == 'RENAME' ||
140
+ last_keyword == 'REFORM' ||
141
+ last_keyword == 'MOVE'
142
+ ## requires / expects two teams in one line (separated by ⇒ or such)
143
+ teams = line.split( '⇒' )
144
+ if teams.size != 2
145
+ puts "!!! ERROR [club history reader] - expected two teams - got:"
146
+ pp teams
147
+ exit 1
148
+ end
149
+ teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
150
+ recs << [ last_keyword, last_season.key,
151
+ [ teams[0], last_country.key ],
152
+ [ teams[1], last_country.key ]
153
+ ]
154
+ elsif last_keyword == 'MERGE'
155
+ ## check if line starts with separator
156
+ ## otherwise collect to be merged teams
157
+ if line.start_with?( '⇒' )
158
+ if last_teams.size < 2
159
+ puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
160
+ pp last_teams
161
+ exit 1
162
+ end
163
+ ## auto-add country to all teams
164
+ teams = last_teams.map {|team| [team, last_country.key]}
165
+ recs << [ last_keyword, last_season.key,
166
+ teams,
167
+ [ squish(line.sub('⇒','').strip), last_country.key ]
168
+ ]
169
+
170
+ last_teams = []
171
+ else
172
+ last_teams << squish(line)
173
+ end
174
+ else
175
+ puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
176
+ exit 1
177
+ end
178
+ end # each line (in paragraph)
179
+ else
180
+ puts "** !!! ERROR [club history reader] - unknown line type:"
181
+ pp node
182
+ exit 1
183
+ end
184
+ end
185
+
186
+ recs
187
+ end # method read
188
+
189
+
190
+ ###############
191
+ ## helper
192
+
193
+ def squish( str )
194
+ ## colapse all whitespace to one
195
+ str.gsub( /[ ]+/,' ' )
196
+ end
197
+
198
+
199
+ end # class ClubHistoryReader
200
+
201
+
202
+ end ## module Import
203
+ end ## module SportDb
@@ -36,17 +36,16 @@ class ClubPropsReader
36
36
 
37
37
  ## find / match club by (canocial) name
38
38
  m = catalog.clubs.match( name )
39
- if m && m.size > 1
39
+ if m.size > 1
40
40
  puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
41
41
  pp rec
42
42
  pp m
43
43
 
44
44
  ## todo/fix: try filter by canonical name if more than one match
45
45
  m = m.select { |club| club.name == name }
46
- m = nil if m.empty? ## note: reset to nil if no more matches
47
46
  end
48
47
 
49
- if m.nil?
48
+ if m.empty?
50
49
  puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
51
50
  pp rec
52
51
  exit 1
@@ -7,7 +7,7 @@ module Formats
7
7
 
8
8
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
9
9
  MINOR = 1
10
- PATCH = 2
10
+ PATCH = 3
11
11
  VERSION = [MAJOR,MINOR,PATCH].join('.')
12
12
 
13
13
  def self.version
@@ -55,12 +55,56 @@ TXT
55
55
  = England
56
56
 
57
57
  Chelsea FC
58
- Arsenal FC
59
58
  Tottenham Hotspur
60
59
  West Ham United
61
60
  Crystal Palace
62
- Manchester United
63
- Manchester City
61
+
62
+ ### note add move entires for testing club name history
63
+ Manchester United FC
64
+ | Manchester United
65
+ | Newton Heath FC
66
+
67
+ Manchester City FC
68
+ | Manchester City
69
+ | Ardwick FC
70
+
71
+ Arsenal FC
72
+ | The Arsenal FC
73
+ | Woolwich Arsenal FC
74
+ | Royal Arsenal FC
75
+
76
+ Gateshead FC
77
+ | South Shields FC
78
+
79
+ Sheffield Wednesday
80
+ | The Wednesday FC
81
+
82
+ Port Vale FC
83
+ | Burslem Port Vale FC
84
+
85
+ Chesterfield FC
86
+ | Chesterfield Town FC
87
+
88
+ Birmingham FC
89
+ | Small Heath FC
90
+
91
+ Burton Swifts FC
92
+ Burton Wanderers FC
93
+ Burton United FC
94
+
95
+ Blackpool FC
96
+ South Shore FC
97
+
98
+ Glossop FC
99
+ | Glossop North End FC
100
+
101
+ Walsall FC
102
+ | Walsall Town Swifts FC
103
+
104
+
105
+ Newcastle West End FC
106
+ Newcastle East End FC
107
+ Newcastle United FC
64
108
  TXT
65
109
 
66
110
  index = ClubIndex.new
@@ -83,81 +127,3 @@ end # module SportDb
83
127
 
84
128
 
85
129
 
86
- ################
87
- ## helper
88
-
89
- def parse_auto_conf( txt, lang: 'en', start: nil )
90
- start = start ? start : Date.new( 2017, 7, 1 )
91
-
92
- SportDb::Import.config.lang = lang
93
-
94
- parser = SportDb::AutoConfParser.new( txt, start )
95
- parser.parse
96
- end
97
-
98
- def parse_conf( txt )
99
- parser = SportDb::ConfParser.new( txt )
100
- parser.parse
101
- end
102
-
103
-
104
- ## note: json always returns hash tables with string keys (not symbols),
105
- ## thus, always stringify keys before comparing!!!!
106
- class Object
107
- def deep_stringify_keys
108
- if self.is_a? Hash
109
- self.reduce({}) {|memo,(k,v)| memo[k.to_s] = v.deep_stringify_keys; memo }
110
- elsif self.is_a? Array
111
- self.reduce([]) {|memo,v | memo << v.deep_stringify_keys; memo }
112
- else
113
- self
114
- end
115
- end
116
- end
117
-
118
-
119
- def read_blocks( path )
120
- txt = File.open( path, 'r:utf-8' ).read
121
-
122
- blocks = []
123
- buf = String.new('')
124
- txt.each_line do |line|
125
- if line =~ /^[ ]*
126
- ([>]{3,} |
127
- [<]{3,})
128
- [ ]*
129
- $/x ## three or more markers
130
- blocks << buf
131
- buf = String.new('')
132
- else
133
- buf << line
134
- end
135
- end
136
- blocks << buf
137
- blocks
138
- end
139
-
140
-
141
- def parse_json( str )
142
- ## note: allow empty string; fall back to empty hash
143
- if str.strip.empty?
144
- {}
145
- else
146
- JSON.parse( str )
147
- end
148
- end
149
-
150
- def read_test( path )
151
- blocks = read_blocks( "#{SportDb::Test.data_dir}/football.txt/#{path}" )
152
-
153
- if blocks.size == 2
154
- [blocks[0], parse_json( blocks[1] )]
155
- elsif blocks.size == 3
156
- ## note: returned in different order
157
- ## optional option block that comes first returned last!
158
- [blocks[1], parse_json( blocks[2] ), blocks[0]]
159
- else
160
- puts "!! ERROR: expected two or three text blocks in >#{path}<; got #{blocks.size}"
161
- exit 1
162
- end
163
- end
@@ -0,0 +1,107 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_club_index_history.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestClubHistoryIndex < MiniTest::Test
11
+
12
+ def test_eng
13
+ txt =<<TXT
14
+ = England
15
+
16
+ ## note: use history log by season (instead of by year) - why? why not?
17
+ ##
18
+ ## note/warn/remember !! a line starting with arrow (=>)
19
+ ## will get turned into a heading 1!!!
20
+ ## as an ascii-alternative to ⇒ use >> or -> or ??? - why? why not?
21
+
22
+ == 1930/1
23
+ MOVE South Shields FC, South Shields ⇒ Gateshead FC, Gateshead
24
+
25
+
26
+ == 1929/30
27
+ RENAME The Wednesday FC, Sheffield ⇒ Sheffield Wednesday
28
+
29
+ == 1927/8
30
+ RENAME The Arsenal FC, London ⇒ Arsenal FC
31
+
32
+
33
+ == 1914/5
34
+ RENAME Woolwich Arsenal FC, London ⇒ The Arsenal FC
35
+
36
+
37
+ == 1911/2
38
+ REFORM Burslem Port Vale FC, Burslem ⇒ Port Vale FC, Stoke-on-Trent
39
+ ## the towns of Burslem having been merged in 1910 with the towns of Fenton, Hanley,
40
+ ## Longton, Stoke-upon-Trent and Tunstall as the city of Stoke-on-Trent
41
+
42
+
43
+ == 1909/10
44
+ RENAME Chesterfield Town FC, Chesterfield ⇒ Chesterfield FC
45
+
46
+
47
+ == 1905/6
48
+ RENAME Chesterfield FC, Chesterfield ⇒ Chesterfield Town FC
49
+ Small Heath FC, Birmingham ⇒ Birmingham FC
50
+
51
+ == 1902/3
52
+ REFORM Newton Heath FC, Manchester ⇒ Manchester United
53
+
54
+ == 1901/2
55
+ MERGE Burton Swifts FC, Burton-upon-Trent
56
+ Burton Wanderers FC, Burton-upon-Trent
57
+ ⇒ Burton United FC
58
+
59
+ BANKRUPT Newton Heath FC, Manchester
60
+
61
+
62
+ == 1899/00
63
+ MERGE Blackpool FC, Blackpool
64
+ South Shore FC, Blackpool
65
+ ⇒ Blackpool FC
66
+
67
+ == 1898/9
68
+ RENAME Glossop North End FC, Glossop ⇒ Glossop FC
69
+
70
+
71
+ == 1895/6
72
+ RENAME Walsall Town Swifts FC, Walsall ⇒ Walsall FC
73
+
74
+
75
+ == 1894/5
76
+ REFORM Ardwick FC, Manchester ⇒ Manchester City FC
77
+
78
+ == 1893/4
79
+ BANKRUPT Ardwick FC, Manchester
80
+
81
+ MERGE Newcastle West End FC, Newcastle-upon-Tyne
82
+ Newcastle East End FC, Newcastle-upon-Tyne
83
+ ⇒ Newcastle United FC
84
+
85
+ == 1892/3
86
+ RENAME Royal Arsenal FC, London ⇒ Woolwich Arsenal FC
87
+ TXT
88
+
89
+ recs = SportDb::Import::ClubHistoryReader.parse( txt )
90
+
91
+ history = SportDb::Import::ClubHistoryIndex.new
92
+ history.add( recs )
93
+
94
+ pp history.errors
95
+ pp history.mappings
96
+
97
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
98
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
99
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
100
+ assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '2000/1' )
101
+ assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1927/8' )
102
+ assert_equal 'The Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1926/7' )
103
+ assert_equal 'Woolwich Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1913/4' )
104
+ assert_equal 'Royal Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1891/2' )
105
+ end
106
+
107
+ end # class TestClubHistoryIndex