sportdb-formats 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/Manifest.txt +6 -13
  3. data/Rakefile +1 -1
  4. data/lib/sportdb/formats.rb +5 -0
  5. data/lib/sportdb/formats/country/country_index.rb +2 -2
  6. data/lib/sportdb/formats/event/event_index.rb +9 -11
  7. data/lib/sportdb/formats/league/league_index.rb +22 -18
  8. data/lib/sportdb/formats/league/league_outline_reader.rb +4 -1
  9. data/lib/sportdb/formats/league/league_reader.rb +7 -1
  10. data/lib/sportdb/formats/match/match_parser.rb +27 -15
  11. data/lib/sportdb/formats/match/match_parser_csv.rb +148 -21
  12. data/lib/sportdb/formats/match/match_status_parser.rb +86 -0
  13. data/lib/sportdb/formats/name_helper.rb +4 -1
  14. data/lib/sportdb/formats/package.rb +30 -8
  15. data/lib/sportdb/formats/score/score_formats.rb +19 -0
  16. data/lib/sportdb/formats/score/score_parser.rb +4 -2
  17. data/lib/sportdb/formats/structs/match.rb +2 -0
  18. data/lib/sportdb/formats/structs/team.rb +7 -0
  19. data/lib/sportdb/formats/team/club_index.rb +13 -11
  20. data/lib/sportdb/formats/team/club_index_history.rb +138 -0
  21. data/lib/sportdb/formats/team/club_reader_history.rb +203 -0
  22. data/lib/sportdb/formats/team/club_reader_props.rb +2 -3
  23. data/lib/sportdb/formats/version.rb +1 -1
  24. data/test/helper.rb +47 -81
  25. data/test/test_club_index_history.rb +107 -0
  26. data/test/test_club_reader_history.rb +212 -0
  27. data/test/test_datafile_package.rb +1 -1
  28. data/test/test_match_status_parser.rb +49 -0
  29. data/test/test_scores.rb +2 -0
  30. metadata +10 -17
  31. data/test/test_conf.rb +0 -65
  32. data/test/test_csv_match_parser.rb +0 -114
  33. data/test/test_csv_match_parser_utils.rb +0 -20
  34. data/test/test_match_auto.rb +0 -72
  35. data/test/test_match_auto_champs.rb +0 -45
  36. data/test/test_match_auto_euro.rb +0 -37
  37. data/test/test_match_auto_relegation.rb +0 -41
  38. data/test/test_match_auto_worldcup.rb +0 -61
  39. data/test/test_match_champs.rb +0 -27
  40. data/test/test_match_eng.rb +0 -26
  41. data/test/test_match_euro.rb +0 -27
  42. data/test/test_match_start_date.rb +0 -44
  43. data/test/test_match_worldcup.rb +0 -27
@@ -0,0 +1,203 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubHistoryReader
9
+
10
+ def catalog() Import.catalog; end
11
+
12
+
13
+
14
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
15
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
16
+ parse( txt )
17
+ end
18
+
19
+ def self.parse( txt )
20
+ new( txt ).parse
21
+ end
22
+
23
+ def initialize( txt )
24
+ @txt = txt
25
+ end
26
+
27
+
28
+ ###
29
+ ## RENAME/RENAMED
30
+ ## MOVE/MOVED
31
+ ## BANKRUPT/BANKRUPTED
32
+ ## REFORM/REFORMED
33
+ ## MERGE/MERGED - allow + or ++ or +++ or ; for "inline" - why? why not?
34
+
35
+
36
+ KEYWORD_LINE_RE = %r{ ^(?<keyword>RENAMED?|
37
+ MOVED?|
38
+ BANKRUPT(?:ED)?|
39
+ REFORM(?:ED)?|
40
+ MERGED?
41
+ )
42
+ [ ]+
43
+ (?<text>.*) # rest of text
44
+ $
45
+ }x
46
+
47
+
48
+ def parse
49
+ recs = []
50
+ last_rec = nil
51
+
52
+ last_country = nil
53
+ last_season = nil
54
+ last_keyword = nil
55
+ last_teams = []
56
+
57
+ OutlineReader.parse( @txt ).each do |node|
58
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
59
+ heading_level = node[0][1].to_i
60
+ heading = node[1]
61
+
62
+ puts "heading #{heading_level} >#{heading}<"
63
+
64
+
65
+ if heading_level == 1
66
+ ## assume country in heading; allow all "formats" supported by parse e.g.
67
+ ## Österreich • Austria (at)
68
+ ## Österreich • Austria
69
+ ## Austria
70
+ ## Deutschland (de) • Germany
71
+ country = catalog.countries.parse( heading )
72
+ ## check country code - MUST exist for now!!!!
73
+ if country.nil?
74
+ puts "!!! error [club history reader] - unknown country >#{heading}< - sorry - add country to config to fix"
75
+ exit 1
76
+ end
77
+ puts " country >#{heading}< => #{country.name}, #{country.key}"
78
+ last_country = country
79
+ last_season = nil ## reset "lower levels" - season & keyword
80
+ last_keyword = nil
81
+ elsif heading_level == 2
82
+ ## assume season
83
+ season = Season.new( heading )
84
+ puts " season >#{heading}< => #{season.key}"
85
+ last_season = season ## reset "lowwer levels" - keyword
86
+ last_keyword = nil
87
+ else
88
+ puts "!!! ERROR [club history reader] - for now only heading 1 & 2 supported; sorry"
89
+ exit 1
90
+ end
91
+
92
+ elsif node[0] == :p ## paragraph with (text) lines
93
+ if last_country.nil?
94
+ puts "!!! ERROR [club history reader] - country heading 1 required, sorry"
95
+ exit 1
96
+ end
97
+ if last_season.nil?
98
+ puts "!!! ERROR [club history reader] - season heading 2 required, sorry"
99
+ exit 1
100
+ end
101
+
102
+ lines = node[1]
103
+ lines.each do |line|
104
+ if m=line.match(KEYWORD_LINE_RE) ## extract keyword and continue
105
+ keyword = m[:keyword]
106
+ line = m[:text].strip
107
+
108
+ puts " keyword #{keyword}"
109
+ last_keyword = case keyword ## "normalize" keywords
110
+ when 'BANKRUPT', 'BANKRUPTED'
111
+ 'BANKRUPT'
112
+ when 'RENAME', 'RENAMED'
113
+ 'RENAME'
114
+ when 'REFORM', 'REFORMED'
115
+ 'REFORM'
116
+ when 'MOVE', 'MOVED'
117
+ 'MOVE'
118
+ when 'MERGE', 'MERGED'
119
+ 'MERGE'
120
+ else
121
+ puts "!!! ERROR [club history reader] - unexpected keyword >#{keyword}<; sorry - don't know how to normalize"
122
+ exit 1
123
+ end
124
+
125
+ last_teams = []
126
+ end
127
+
128
+ if last_keyword.nil?
129
+ puts "!!! ERROR [club history reader] - line with keyword expected - got:"
130
+ puts line
131
+ exit 1
132
+ end
133
+
134
+ if last_keyword == 'BANKRUPT'
135
+ ## requires / expects one team in one line
136
+ recs << [ last_keyword, last_season.key,
137
+ [ squish(line), last_country.key ]
138
+ ]
139
+ elsif last_keyword == 'RENAME' ||
140
+ last_keyword == 'REFORM' ||
141
+ last_keyword == 'MOVE'
142
+ ## requires / expects two teams in one line (separated by ⇒ or such)
143
+ teams = line.split( '⇒' )
144
+ if teams.size != 2
145
+ puts "!!! ERROR [club history reader] - expected two teams - got:"
146
+ pp teams
147
+ exit 1
148
+ end
149
+ teams = teams.map {|team| squish(team.strip) } ## remove whitespaces
150
+ recs << [ last_keyword, last_season.key,
151
+ [ teams[0], last_country.key ],
152
+ [ teams[1], last_country.key ]
153
+ ]
154
+ elsif last_keyword == 'MERGE'
155
+ ## check if line starts with separator
156
+ ## otherwise collect to be merged teams
157
+ if line.start_with?( '⇒' )
158
+ if last_teams.size < 2
159
+ puts "!!! ERROR [club history reader] - expected two or more teams for MERGE - got:"
160
+ pp last_teams
161
+ exit 1
162
+ end
163
+ ## auto-add country to all teams
164
+ teams = last_teams.map {|team| [team, last_country.key]}
165
+ recs << [ last_keyword, last_season.key,
166
+ teams,
167
+ [ squish(line.sub('⇒','').strip), last_country.key ]
168
+ ]
169
+
170
+ last_teams = []
171
+ else
172
+ last_teams << squish(line)
173
+ end
174
+ else
175
+ puts "!!! ERROR [club history reader] - unknown keyword >#{last_keyword}<; cannot process; sorry"
176
+ exit 1
177
+ end
178
+ end # each line (in paragraph)
179
+ else
180
+ puts "** !!! ERROR [club history reader] - unknown line type:"
181
+ pp node
182
+ exit 1
183
+ end
184
+ end
185
+
186
+ recs
187
+ end # method read
188
+
189
+
190
+ ###############
191
+ ## helper
192
+
193
+ def squish( str )
194
+ ## colapse all whitespace to one
195
+ str.gsub( /[ ]+/,' ' )
196
+ end
197
+
198
+
199
+ end # class ClubHistoryReader
200
+
201
+
202
+ end ## module Import
203
+ end ## module SportDb
@@ -36,17 +36,16 @@ class ClubPropsReader
36
36
 
37
37
  ## find / match club by (canocial) name
38
38
  m = catalog.clubs.match( name )
39
- if m && m.size > 1
39
+ if m.size > 1
40
40
  puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
41
41
  pp rec
42
42
  pp m
43
43
 
44
44
  ## todo/fix: try filter by canonical name if more than one match
45
45
  m = m.select { |club| club.name == name }
46
- m = nil if m.empty? ## note: reset to nil if no more matches
47
46
  end
48
47
 
49
- if m.nil?
48
+ if m.empty?
50
49
  puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
51
50
  pp rec
52
51
  exit 1
@@ -7,7 +7,7 @@ module Formats
7
7
 
8
8
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
9
9
  MINOR = 1
10
- PATCH = 2
10
+ PATCH = 3
11
11
  VERSION = [MAJOR,MINOR,PATCH].join('.')
12
12
 
13
13
  def self.version
@@ -55,12 +55,56 @@ TXT
55
55
  = England
56
56
 
57
57
  Chelsea FC
58
- Arsenal FC
59
58
  Tottenham Hotspur
60
59
  West Ham United
61
60
  Crystal Palace
62
- Manchester United
63
- Manchester City
61
+
62
+ ### note add move entires for testing club name history
63
+ Manchester United FC
64
+ | Manchester United
65
+ | Newton Heath FC
66
+
67
+ Manchester City FC
68
+ | Manchester City
69
+ | Ardwick FC
70
+
71
+ Arsenal FC
72
+ | The Arsenal FC
73
+ | Woolwich Arsenal FC
74
+ | Royal Arsenal FC
75
+
76
+ Gateshead FC
77
+ | South Shields FC
78
+
79
+ Sheffield Wednesday
80
+ | The Wednesday FC
81
+
82
+ Port Vale FC
83
+ | Burslem Port Vale FC
84
+
85
+ Chesterfield FC
86
+ | Chesterfield Town FC
87
+
88
+ Birmingham FC
89
+ | Small Heath FC
90
+
91
+ Burton Swifts FC
92
+ Burton Wanderers FC
93
+ Burton United FC
94
+
95
+ Blackpool FC
96
+ South Shore FC
97
+
98
+ Glossop FC
99
+ | Glossop North End FC
100
+
101
+ Walsall FC
102
+ | Walsall Town Swifts FC
103
+
104
+
105
+ Newcastle West End FC
106
+ Newcastle East End FC
107
+ Newcastle United FC
64
108
  TXT
65
109
 
66
110
  index = ClubIndex.new
@@ -83,81 +127,3 @@ end # module SportDb
83
127
 
84
128
 
85
129
 
86
- ################
87
- ## helper
88
-
89
- def parse_auto_conf( txt, lang: 'en', start: nil )
90
- start = start ? start : Date.new( 2017, 7, 1 )
91
-
92
- SportDb::Import.config.lang = lang
93
-
94
- parser = SportDb::AutoConfParser.new( txt, start )
95
- parser.parse
96
- end
97
-
98
- def parse_conf( txt )
99
- parser = SportDb::ConfParser.new( txt )
100
- parser.parse
101
- end
102
-
103
-
104
- ## note: json always returns hash tables with string keys (not symbols),
105
- ## thus, always stringify keys before comparing!!!!
106
- class Object
107
- def deep_stringify_keys
108
- if self.is_a? Hash
109
- self.reduce({}) {|memo,(k,v)| memo[k.to_s] = v.deep_stringify_keys; memo }
110
- elsif self.is_a? Array
111
- self.reduce([]) {|memo,v | memo << v.deep_stringify_keys; memo }
112
- else
113
- self
114
- end
115
- end
116
- end
117
-
118
-
119
- def read_blocks( path )
120
- txt = File.open( path, 'r:utf-8' ).read
121
-
122
- blocks = []
123
- buf = String.new('')
124
- txt.each_line do |line|
125
- if line =~ /^[ ]*
126
- ([>]{3,} |
127
- [<]{3,})
128
- [ ]*
129
- $/x ## three or more markers
130
- blocks << buf
131
- buf = String.new('')
132
- else
133
- buf << line
134
- end
135
- end
136
- blocks << buf
137
- blocks
138
- end
139
-
140
-
141
- def parse_json( str )
142
- ## note: allow empty string; fall back to empty hash
143
- if str.strip.empty?
144
- {}
145
- else
146
- JSON.parse( str )
147
- end
148
- end
149
-
150
- def read_test( path )
151
- blocks = read_blocks( "#{SportDb::Test.data_dir}/football.txt/#{path}" )
152
-
153
- if blocks.size == 2
154
- [blocks[0], parse_json( blocks[1] )]
155
- elsif blocks.size == 3
156
- ## note: returned in different order
157
- ## optional option block that comes first returned last!
158
- [blocks[1], parse_json( blocks[2] ), blocks[0]]
159
- else
160
- puts "!! ERROR: expected two or three text blocks in >#{path}<; got #{blocks.size}"
161
- exit 1
162
- end
163
- end
@@ -0,0 +1,107 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_club_index_history.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestClubHistoryIndex < MiniTest::Test
11
+
12
+ def test_eng
13
+ txt =<<TXT
14
+ = England
15
+
16
+ ## note: use history log by season (instead of by year) - why? why not?
17
+ ##
18
+ ## note/warn/remember !! a line starting with arrow (=>)
19
+ ## will get turned into a heading 1!!!
20
+ ## as an ascii-alternative to ⇒ use >> or -> or ??? - why? why not?
21
+
22
+ == 1930/1
23
+ MOVE South Shields FC, South Shields ⇒ Gateshead FC, Gateshead
24
+
25
+
26
+ == 1929/30
27
+ RENAME The Wednesday FC, Sheffield ⇒ Sheffield Wednesday
28
+
29
+ == 1927/8
30
+ RENAME The Arsenal FC, London ⇒ Arsenal FC
31
+
32
+
33
+ == 1914/5
34
+ RENAME Woolwich Arsenal FC, London ⇒ The Arsenal FC
35
+
36
+
37
+ == 1911/2
38
+ REFORM Burslem Port Vale FC, Burslem ⇒ Port Vale FC, Stoke-on-Trent
39
+ ## the towns of Burslem having been merged in 1910 with the towns of Fenton, Hanley,
40
+ ## Longton, Stoke-upon-Trent and Tunstall as the city of Stoke-on-Trent
41
+
42
+
43
+ == 1909/10
44
+ RENAME Chesterfield Town FC, Chesterfield ⇒ Chesterfield FC
45
+
46
+
47
+ == 1905/6
48
+ RENAME Chesterfield FC, Chesterfield ⇒ Chesterfield Town FC
49
+ Small Heath FC, Birmingham ⇒ Birmingham FC
50
+
51
+ == 1902/3
52
+ REFORM Newton Heath FC, Manchester ⇒ Manchester United
53
+
54
+ == 1901/2
55
+ MERGE Burton Swifts FC, Burton-upon-Trent
56
+ Burton Wanderers FC, Burton-upon-Trent
57
+ ⇒ Burton United FC
58
+
59
+ BANKRUPT Newton Heath FC, Manchester
60
+
61
+
62
+ == 1899/00
63
+ MERGE Blackpool FC, Blackpool
64
+ South Shore FC, Blackpool
65
+ ⇒ Blackpool FC
66
+
67
+ == 1898/9
68
+ RENAME Glossop North End FC, Glossop ⇒ Glossop FC
69
+
70
+
71
+ == 1895/6
72
+ RENAME Walsall Town Swifts FC, Walsall ⇒ Walsall FC
73
+
74
+
75
+ == 1894/5
76
+ REFORM Ardwick FC, Manchester ⇒ Manchester City FC
77
+
78
+ == 1893/4
79
+ BANKRUPT Ardwick FC, Manchester
80
+
81
+ MERGE Newcastle West End FC, Newcastle-upon-Tyne
82
+ Newcastle East End FC, Newcastle-upon-Tyne
83
+ ⇒ Newcastle United FC
84
+
85
+ == 1892/3
86
+ RENAME Royal Arsenal FC, London ⇒ Woolwich Arsenal FC
87
+ TXT
88
+
89
+ recs = SportDb::Import::ClubHistoryReader.parse( txt )
90
+
91
+ history = SportDb::Import::ClubHistoryIndex.new
92
+ history.add( recs )
93
+
94
+ pp history.errors
95
+ pp history.mappings
96
+
97
+ # [[1927/28, ["RENAME", [["The Arsenal FC, London", "eng"], ["Arsenal FC", "eng"]]]],
98
+ # [1914/15, ["RENAME", [["Woolwich Arsenal FC, London", "eng"], ["The Arsenal FC", "eng"]]]],
99
+ # [1892/93, ["RENAME", [["Royal Arsenal FC, London", "eng"], ["Woolwich Arsenal FC", "eng"]]]]],
100
+ assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '2000/1' )
101
+ assert_equal 'Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1927/8' )
102
+ assert_equal 'The Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1926/7' )
103
+ assert_equal 'Woolwich Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1913/4' )
104
+ assert_equal 'Royal Arsenal FC', history.find_name_by( name: 'Arsenal FC', season: '1891/2' )
105
+ end
106
+
107
+ end # class TestClubHistoryIndex