sportdb-formats 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +37 -1
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +102 -12
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -0,0 +1,289 @@
|
|
1
|
+
###
|
2
|
+
# todo - based on ClubReader
|
3
|
+
# share GeoReader or BaseReader or such for both
|
4
|
+
# plus maybe for PlayerReader too!!!
|
5
|
+
#
|
6
|
+
# fix/todo/cleanup - move alt_names_auto from reader to indexer!!!!
|
7
|
+
# indexer now handles unaccent (variants) etc.
|
8
|
+
|
9
|
+
module SportDb
|
10
|
+
module Import
|
11
|
+
|
12
|
+
|
13
|
+
class GroundReader
|
14
|
+
|
15
|
+
def world() Import.world; end
|
16
|
+
|
17
|
+
|
18
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
19
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
20
|
+
parse( txt )
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.parse( txt )
|
24
|
+
new( txt ).parse
|
25
|
+
end
|
26
|
+
|
27
|
+
def initialize( txt )
|
28
|
+
@txt = txt
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
## pattern for checking for address line e.g.
|
33
|
+
## use just one style / syntax - why? why not?
|
34
|
+
## Fischhofgasse 12 ~ 1100 Wien or
|
35
|
+
## Fischhofgasse 12 // 1100 Wien or Fischhofgasse 12 /// 1100 Wien
|
36
|
+
## Fischhofgasse 12 ++ 1100 Wien or Fischhofgasse 12 +++ 1100 Wien
|
37
|
+
ADDR_MARKER_RE = %r{ (?: ^|[ ] ) # space or beginning of line
|
38
|
+
(?: ~ | /{2,} | \+{2,} )
|
39
|
+
(?: [ ]|$) # space or end of line
|
40
|
+
}x
|
41
|
+
|
42
|
+
|
43
|
+
def parse
|
44
|
+
recs = []
|
45
|
+
last_rec = nil
|
46
|
+
headings = [] ## headings stack
|
47
|
+
|
48
|
+
OutlineReader.parse( @txt ).each do |node|
|
49
|
+
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
50
|
+
heading_level = node[0][1].to_i
|
51
|
+
heading = node[1]
|
52
|
+
|
53
|
+
puts "heading #{heading_level} >#{heading}<"
|
54
|
+
|
55
|
+
## 1) first pop headings if present
|
56
|
+
while headings.size+1 > heading_level
|
57
|
+
headings.pop
|
58
|
+
end
|
59
|
+
|
60
|
+
## 2) add missing (hierarchy) level if
|
61
|
+
while headings.size+1 < heading_level
|
62
|
+
## todo/fix: issue warning about "skipping" hierarchy level
|
63
|
+
puts "!!! warn [ground reader] - skipping hierarchy level in headings "
|
64
|
+
headings.push( nil )
|
65
|
+
end
|
66
|
+
|
67
|
+
if heading =~ /^\?+$/ ## note: use ? or ?? or ?? to reset level to nil
|
68
|
+
## keep level empty
|
69
|
+
else
|
70
|
+
## note: if level is 1 assume country for now
|
71
|
+
if heading_level == 1
|
72
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
73
|
+
## Österreich • Austria (at)
|
74
|
+
## Österreich • Austria
|
75
|
+
## Austria
|
76
|
+
## Deutschland (de) • Germany
|
77
|
+
country = world.countries.parse( heading )
|
78
|
+
## check country code - MUST exist for now!!!!
|
79
|
+
if country.nil?
|
80
|
+
puts "!!! error [ground reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
81
|
+
exit 1
|
82
|
+
end
|
83
|
+
|
84
|
+
headings.push( country.key )
|
85
|
+
else
|
86
|
+
## quick hack:
|
87
|
+
## remove known fill/dummy words incl:
|
88
|
+
## Provincia San Juan => San Juan (see argentina, for example)
|
89
|
+
##
|
90
|
+
## use geo tree long term with alternative names - why? why not?
|
91
|
+
words = ['Provincia']
|
92
|
+
words.each { |word| heading = heading.gsub( word, '' ) }
|
93
|
+
heading = heading.strip
|
94
|
+
|
95
|
+
headings.push( heading )
|
96
|
+
end
|
97
|
+
|
98
|
+
## assert that hierarchy level is ok
|
99
|
+
if headings.size != heading_level
|
100
|
+
puts "!!! error - headings hierarchy/stack out of order - #{heading.size}<=>#{heading_level}"
|
101
|
+
exit 1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
pp headings
|
106
|
+
|
107
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
108
|
+
lines = node[1]
|
109
|
+
lines.each do |line|
|
110
|
+
if line.start_with?( '|' )
|
111
|
+
## assume continuation with line of alternative names
|
112
|
+
## note: skip leading pipe
|
113
|
+
values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
|
114
|
+
values = values.map {|value| _norm(value) } ## squish/strip etc.
|
115
|
+
|
116
|
+
last_rec.alt_names += values
|
117
|
+
|
118
|
+
## check for address line e.g.
|
119
|
+
## use just one style / syntax - why? why not?
|
120
|
+
## Fischhofgasse 12 ~ 1100 Wien or
|
121
|
+
## Fischhofgasse 12 // 1100 Wien or Fischhofgasse 12 /// 1100 Wien
|
122
|
+
## Fischhofgasse 12 ++ 1100 Wien or Fischhofgasse 12 +++ 1100 Wien
|
123
|
+
elsif line =~ ADDR_MARKER_RE
|
124
|
+
## squish line here - why? why not?
|
125
|
+
last_rec.address = _squish( line )
|
126
|
+
else
|
127
|
+
values = line.split( ',' )
|
128
|
+
|
129
|
+
rec = Ground.new
|
130
|
+
|
131
|
+
col = values.shift ## get first item
|
132
|
+
## note: allow optional alt names for convenience with required canoncial name
|
133
|
+
names = col.split( '|' ) # team names - allow/use pipe(|)
|
134
|
+
names = names.map {|name| _norm(name) } ## squish/strip etc.
|
135
|
+
|
136
|
+
value = names[0] ## canonical name
|
137
|
+
alt_names = names[1..-1] ## optional (inline) alt names
|
138
|
+
|
139
|
+
rec.name = value # canoncial name (global unique "beautiful/long" name)
|
140
|
+
## note: add optional (inline) alternate names if present
|
141
|
+
rec.alt_names += alt_names if alt_names.size > 0
|
142
|
+
|
143
|
+
## note:
|
144
|
+
## check/todo!!!!!!!!!!!!!!!!!-
|
145
|
+
## strip year if to present e.g. (2011-)
|
146
|
+
##
|
147
|
+
## do NOT strip for defunct / historic clubs e.g.
|
148
|
+
## (1899-1910)
|
149
|
+
## or (-1914) or (-2011) etc.
|
150
|
+
|
151
|
+
###
|
152
|
+
## todo: move year out of canonical team name - why? why not?
|
153
|
+
|
154
|
+
## check if canonical name include (2011-) or similar in name
|
155
|
+
## if yes, remove (2011-) and add to (alt) names
|
156
|
+
## e.g. New York FC (2011) => New York FC
|
157
|
+
if rec.name =~ /\(.+?\)/ ## note: use non-greedy (?) match
|
158
|
+
name = rec.name.gsub( /\(.+?\)/, '' ).strip
|
159
|
+
|
160
|
+
if rec.name =~ /\(([0-9]{4})-\)/ ## e.g. (2014-)
|
161
|
+
rec.year = $1.to_i
|
162
|
+
elsif rec.name =~ /\(-([0-9]{4})\)/ ## e.g. (-2014)
|
163
|
+
rec.year_end = $1.to_i
|
164
|
+
elsif rec.name =~ /\(([0-9]{4})-([0-9]{4})\)/ ## e.g. (2011-2014)
|
165
|
+
rec.year = $1.to_i
|
166
|
+
rec.year_end = $2.to_i
|
167
|
+
else
|
168
|
+
## todo/check: warn about unknown year format
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
## todo/check - check for unknown format values
|
173
|
+
## e.g. too many values, duplicate years, etc.
|
174
|
+
## check for overwritting, etc.
|
175
|
+
|
176
|
+
## strip and squish (white)spaces
|
177
|
+
# e.g. León › Guanajuato => León › Guanajuato
|
178
|
+
values = values.map {|value| _squish(value) }
|
179
|
+
|
180
|
+
while values.size > 0
|
181
|
+
value = values.shift
|
182
|
+
if value =~/^\d{4}$/ # e.g 1904
|
183
|
+
## todo/check: issue warning if year is already set!!!!!!!
|
184
|
+
if rec.year
|
185
|
+
puts "!!! error - year already set to #{rec.year} - CANNOT overwrite with #{value}:"
|
186
|
+
pp rec
|
187
|
+
exit 1
|
188
|
+
end
|
189
|
+
rec.year = value.to_i
|
190
|
+
elsif value =~/^[0-9_]+$/ # e.g 1904
|
191
|
+
## skip capacity for now
|
192
|
+
else
|
193
|
+
## assume city / geo tree
|
194
|
+
## split into geo tree
|
195
|
+
geos = split_geo( value )
|
196
|
+
city = geos[0]
|
197
|
+
## check for "embedded" district e.g. London (Fulham) or Hamburg (St. Pauli) etc.
|
198
|
+
if city =~ /\((.+?)\)/ ## note: use non-greedy (?) match
|
199
|
+
rec.district = $1.strip
|
200
|
+
city = city.gsub( /\(.+?\)/, '' ).strip
|
201
|
+
end
|
202
|
+
rec.city = city
|
203
|
+
|
204
|
+
if geos.size > 1
|
205
|
+
## cut-off city and keep the rest (of geo tree)
|
206
|
+
rec.geos = geos[1..-1]
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end ## while values
|
210
|
+
|
211
|
+
|
212
|
+
###############
|
213
|
+
## use headings text for geo tree
|
214
|
+
|
215
|
+
## 1) add country if present
|
216
|
+
if headings.size > 0 && headings[0]
|
217
|
+
country = world.countries.find( headings[0] )
|
218
|
+
rec.country = country
|
219
|
+
else
|
220
|
+
## make it an error - why? why not?
|
221
|
+
puts "!!! error - country missing in headings hierarchy - sorry - add to quicklist"
|
222
|
+
exit 1
|
223
|
+
end
|
224
|
+
|
225
|
+
## 2) check geo tree with headings hierarchy
|
226
|
+
if headings.size > 1 && headings[1]
|
227
|
+
geos = split_geo( headings[1] )
|
228
|
+
if rec.geos
|
229
|
+
if rec.geos[0] != geos[0]
|
230
|
+
puts "!!! error - geo tree - headings mismatch >#{rec.geos[0]}< <=> >#{geos[0]}<"
|
231
|
+
exit 1
|
232
|
+
end
|
233
|
+
if rec.geos[1] && rec.geos[1] != geos[1] ## check optional 2nd level too
|
234
|
+
puts "!!! error - geo tree - headings mismatch >#{rec.geos[1]}< <=> >#{geos[1]}<"
|
235
|
+
exit 1
|
236
|
+
end
|
237
|
+
else
|
238
|
+
## add missing region (state/province) from headings hierarchy
|
239
|
+
rec.geos = geos
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
last_rec = rec
|
244
|
+
|
245
|
+
recs << rec
|
246
|
+
end
|
247
|
+
end # each line (in paragraph)
|
248
|
+
else
|
249
|
+
puts "** !!! ERROR !!! [ground reader] - unknown line type:"
|
250
|
+
pp node
|
251
|
+
exit 1
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
recs
|
256
|
+
end # method read
|
257
|
+
|
258
|
+
#######################
|
259
|
+
### helpers
|
260
|
+
|
261
|
+
def split_geo( str )
|
262
|
+
## assume city / geo tree
|
263
|
+
## strip and squish (white)spaces
|
264
|
+
# e.g. León › Guanajuato => León › Guanajuato
|
265
|
+
str = _squish( str )
|
266
|
+
|
267
|
+
## split into geo tree
|
268
|
+
geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹
|
269
|
+
geos = geos.map { |geo| geo.strip } ## remove all whitespaces
|
270
|
+
geos
|
271
|
+
end
|
272
|
+
|
273
|
+
## norm(alize) helper - squish (spaces)
|
274
|
+
## and remove dollars ($$$)
|
275
|
+
## and remove leading and trailing spaces
|
276
|
+
def _norm( str )
|
277
|
+
## only extra clean-up of dollars for now ($$$)
|
278
|
+
_squish( str.gsub( '$', '' ) )
|
279
|
+
end
|
280
|
+
|
281
|
+
def _squish( str )
|
282
|
+
str.gsub( /[ \t\u00a0]+/, ' ' ).strip
|
283
|
+
end
|
284
|
+
|
285
|
+
end # class GroundReader
|
286
|
+
|
287
|
+
|
288
|
+
end ## module Import
|
289
|
+
end ## module SportDb
|
@@ -1,168 +1,152 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
heading
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
##
|
63
|
-
##
|
64
|
-
##
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
##
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
##
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
end
|
154
|
-
end # each line
|
155
|
-
else
|
156
|
-
puts "** !!! ERROR !!! [league reader] - unknown line type:"
|
157
|
-
pp node
|
158
|
-
exit 1
|
159
|
-
end
|
160
|
-
## pp line
|
161
|
-
end
|
162
|
-
recs
|
163
|
-
end # method parse
|
164
|
-
|
165
|
-
end # class LeagueReader
|
166
|
-
|
167
|
-
end ## module Import
|
168
|
-
end ## module SportDb
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
module Import
|
4
|
+
|
5
|
+
|
6
|
+
class LeagueReader
|
7
|
+
|
8
|
+
def world() Import.world; end
|
9
|
+
|
10
|
+
|
11
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
12
|
+
txt = File.open( path, 'r:utf-8' ) { |f| f.read }
|
13
|
+
parse( txt )
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse( txt )
|
17
|
+
new( txt ).parse
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
include Logging
|
23
|
+
|
24
|
+
def initialize( txt )
|
25
|
+
@txt = txt
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse
|
29
|
+
recs = []
|
30
|
+
last_rec = nil
|
31
|
+
|
32
|
+
country = nil # last country
|
33
|
+
intl = false # is international (league/tournament/cup/competition)
|
34
|
+
clubs = true # or clubs|national teams
|
35
|
+
|
36
|
+
OutlineReader.parse( @txt ).each do |node|
|
37
|
+
if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
|
38
|
+
heading_level = node[0][1].to_i
|
39
|
+
heading = node[1]
|
40
|
+
|
41
|
+
logger.debug "heading #{heading_level} >#{heading}<"
|
42
|
+
|
43
|
+
if heading_level != 1
|
44
|
+
puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
|
45
|
+
pp line
|
46
|
+
exit 1
|
47
|
+
else
|
48
|
+
logger.debug "heading (#{heading_level}) >#{heading}<"
|
49
|
+
last_heading = heading
|
50
|
+
## map to country or international / int'l or national teams
|
51
|
+
if heading =~ /national team/i ## national team tournament
|
52
|
+
country = nil
|
53
|
+
intl = true
|
54
|
+
clubs = false
|
55
|
+
elsif heading =~ /international|int'l/i ## int'l club tournament
|
56
|
+
country = nil
|
57
|
+
intl = true
|
58
|
+
clubs = true
|
59
|
+
else
|
60
|
+
## assume country in heading; allow all "formats" supported by parse e.g.
|
61
|
+
## Österreich • Austria (at)
|
62
|
+
## Österreich • Austria
|
63
|
+
## Austria
|
64
|
+
## Deutschland (de) • Germany
|
65
|
+
country = world.countries.parse( heading )
|
66
|
+
intl = false
|
67
|
+
clubs = true
|
68
|
+
|
69
|
+
## check country code - MUST exist for now!!!!
|
70
|
+
if country.nil?
|
71
|
+
puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
|
72
|
+
exit 1
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
elsif node[0] == :p ## paragraph with (text) lines
|
77
|
+
lines = node[1]
|
78
|
+
lines.each do |line|
|
79
|
+
|
80
|
+
if line.start_with?( '|' )
|
81
|
+
## assume continuation with line of alternative names
|
82
|
+
## note: skip leading pipe
|
83
|
+
values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
|
84
|
+
values = values.map {|value| _norm(value) } ## squish/strip etc.
|
85
|
+
|
86
|
+
logger.debug "alt_names: #{values.join( '|' )}"
|
87
|
+
|
88
|
+
last_rec.alt_names += values
|
89
|
+
else
|
90
|
+
## assume "regular" line
|
91
|
+
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
92
|
+
if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
|
93
|
+
league_key = $1
|
94
|
+
## 1) strip (commercial) sponsor markers/tags e.g $$
|
95
|
+
## 2) strip and squish (white)spaces
|
96
|
+
league_name = _norm( $2 )
|
97
|
+
|
98
|
+
logger.debug "key: >#{league_key}<, name: >#{league_name}<"
|
99
|
+
|
100
|
+
|
101
|
+
## prepend country key/code if country present
|
102
|
+
## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
|
103
|
+
## why? lets you "overwrite" key if desired - use it - why? why not?
|
104
|
+
if country
|
105
|
+
league_key = "#{country.key}.#{league_key}"
|
106
|
+
end
|
107
|
+
|
108
|
+
rec = League.new( key: league_key,
|
109
|
+
name: league_name,
|
110
|
+
country: country,
|
111
|
+
intl: intl,
|
112
|
+
clubs: clubs)
|
113
|
+
recs << rec
|
114
|
+
last_rec = rec
|
115
|
+
else
|
116
|
+
puts "** !!! ERROR !!! missing key for (canonical) league name"
|
117
|
+
exit 1
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end # each line
|
121
|
+
else
|
122
|
+
puts "** !!! ERROR !!! [league reader] - unknown line type:"
|
123
|
+
pp node
|
124
|
+
exit 1
|
125
|
+
end
|
126
|
+
## pp line
|
127
|
+
end
|
128
|
+
recs
|
129
|
+
end # method parse
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
#######################
|
134
|
+
### helpers
|
135
|
+
|
136
|
+
## norm(alize) helper - squish (spaces)
|
137
|
+
## and remove dollars ($$$)
|
138
|
+
## and remove leading and trailing spaces
|
139
|
+
def _norm( str )
|
140
|
+
## only extra clean-up of dollars for now ($$$)
|
141
|
+
_squish( str.gsub( '$', '' ) )
|
142
|
+
end
|
143
|
+
|
144
|
+
def _squish( str )
|
145
|
+
str.gsub( /[ \t\u00a0]+/, ' ' ).strip
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
end # class LeagueReader
|
150
|
+
|
151
|
+
end ## module Import
|
152
|
+
end ## module SportDb
|
@@ -0,0 +1,47 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
|
4
|
+
class LinesReader ## change to LinesEnumerator - why? why not?
|
5
|
+
def initialize( lines )
|
6
|
+
@iter = lines.each ## get (external) enumerator (same as to_enum)
|
7
|
+
@lineno = 0
|
8
|
+
end
|
9
|
+
|
10
|
+
def each( &blk )
|
11
|
+
## note - StopIteration is rescued (automagically) by Kernel#loop.
|
12
|
+
## no need to rescue ourselves here
|
13
|
+
loop do
|
14
|
+
line = @iter.next ## note - raises StopIteration
|
15
|
+
blk.call( line )
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def each_with_index( &blk )
|
20
|
+
## note - StopIteration is rescued (automagically) by Kernel#loop.
|
21
|
+
loop do
|
22
|
+
line = @iter.next ## note - raises StopIteration
|
23
|
+
blk.call( line, @lineno )
|
24
|
+
@lineno += 1
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def peek
|
29
|
+
begin
|
30
|
+
@iter.peek
|
31
|
+
rescue StopIteration
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def next
|
37
|
+
## todo/check - do NOT catch StopIteration for next - why? why not?
|
38
|
+
begin
|
39
|
+
line = @iter.next
|
40
|
+
@lineno += 1
|
41
|
+
line
|
42
|
+
rescue StopIteration
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end # class LinesReader
|
47
|
+
end # module SportDb
|