sportdb-formats 1.1.6 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +37 -1
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +102 -12
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -0,0 +1,289 @@
1
+ ###
2
+ # todo - based on ClubReader
3
+ # share GeoReader or BaseReader or such for both
4
+ # plus maybe for PlayerReader too!!!
5
+ #
6
+ # fix/todo/cleanup - move alt_names_auto from reader to indexer!!!!
7
+ # indexer now handles unaccent (variants) etc.
8
+
9
+ module SportDb
10
+ module Import
11
+
12
+
13
+ class GroundReader
14
+
15
+ def world() Import.world; end
16
+
17
+
18
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
19
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
20
+ parse( txt )
21
+ end
22
+
23
+ def self.parse( txt )
24
+ new( txt ).parse
25
+ end
26
+
27
+ def initialize( txt )
28
+ @txt = txt
29
+ end
30
+
31
+
32
+ ## pattern for checking for address line e.g.
33
+ ## use just one style / syntax - why? why not?
34
+ ## Fischhofgasse 12 ~ 1100 Wien or
35
+ ## Fischhofgasse 12 // 1100 Wien or Fischhofgasse 12 /// 1100 Wien
36
+ ## Fischhofgasse 12 ++ 1100 Wien or Fischhofgasse 12 +++ 1100 Wien
37
+ ADDR_MARKER_RE = %r{ (?: ^|[ ] ) # space or beginning of line
38
+ (?: ~ | /{2,} | \+{2,} )
39
+ (?: [ ]|$) # space or end of line
40
+ }x
41
+
42
+
43
+ def parse
44
+ recs = []
45
+ last_rec = nil
46
+ headings = [] ## headings stack
47
+
48
+ OutlineReader.parse( @txt ).each do |node|
49
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
50
+ heading_level = node[0][1].to_i
51
+ heading = node[1]
52
+
53
+ puts "heading #{heading_level} >#{heading}<"
54
+
55
+ ## 1) first pop headings if present
56
+ while headings.size+1 > heading_level
57
+ headings.pop
58
+ end
59
+
60
+ ## 2) add missing (hierarchy) level if
61
+ while headings.size+1 < heading_level
62
+ ## todo/fix: issue warning about "skipping" hierarchy level
63
+ puts "!!! warn [ground reader] - skipping hierarchy level in headings "
64
+ headings.push( nil )
65
+ end
66
+
67
+ if heading =~ /^\?+$/ ## note: use ? or ?? or ?? to reset level to nil
68
+ ## keep level empty
69
+ else
70
+ ## note: if level is 1 assume country for now
71
+ if heading_level == 1
72
+ ## assume country in heading; allow all "formats" supported by parse e.g.
73
+ ## Österreich • Austria (at)
74
+ ## Österreich • Austria
75
+ ## Austria
76
+ ## Deutschland (de) • Germany
77
+ country = world.countries.parse( heading )
78
+ ## check country code - MUST exist for now!!!!
79
+ if country.nil?
80
+ puts "!!! error [ground reader] - unknown country >#{heading}< - sorry - add country to config to fix"
81
+ exit 1
82
+ end
83
+
84
+ headings.push( country.key )
85
+ else
86
+ ## quick hack:
87
+ ## remove known fill/dummy words incl:
88
+ ## Provincia San Juan => San Juan (see argentina, for example)
89
+ ##
90
+ ## use geo tree long term with alternative names - why? why not?
91
+ words = ['Provincia']
92
+ words.each { |word| heading = heading.gsub( word, '' ) }
93
+ heading = heading.strip
94
+
95
+ headings.push( heading )
96
+ end
97
+
98
+ ## assert that hierarchy level is ok
99
+ if headings.size != heading_level
100
+ puts "!!! error - headings hierarchy/stack out of order - #{heading.size}<=>#{heading_level}"
101
+ exit 1
102
+ end
103
+ end
104
+
105
+ pp headings
106
+
107
+ elsif node[0] == :p ## paragraph with (text) lines
108
+ lines = node[1]
109
+ lines.each do |line|
110
+ if line.start_with?( '|' )
111
+ ## assume continuation with line of alternative names
112
+ ## note: skip leading pipe
113
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
114
+ values = values.map {|value| _norm(value) } ## squish/strip etc.
115
+
116
+ last_rec.alt_names += values
117
+
118
+ ## check for address line e.g.
119
+ ## use just one style / syntax - why? why not?
120
+ ## Fischhofgasse 12 ~ 1100 Wien or
121
+ ## Fischhofgasse 12 // 1100 Wien or Fischhofgasse 12 /// 1100 Wien
122
+ ## Fischhofgasse 12 ++ 1100 Wien or Fischhofgasse 12 +++ 1100 Wien
123
+ elsif line =~ ADDR_MARKER_RE
124
+ ## squish line here - why? why not?
125
+ last_rec.address = _squish( line )
126
+ else
127
+ values = line.split( ',' )
128
+
129
+ rec = Ground.new
130
+
131
+ col = values.shift ## get first item
132
+ ## note: allow optional alt names for convenience with required canoncial name
133
+ names = col.split( '|' ) # team names - allow/use pipe(|)
134
+ names = names.map {|name| _norm(name) } ## squish/strip etc.
135
+
136
+ value = names[0] ## canonical name
137
+ alt_names = names[1..-1] ## optional (inline) alt names
138
+
139
+ rec.name = value # canoncial name (global unique "beautiful/long" name)
140
+ ## note: add optional (inline) alternate names if present
141
+ rec.alt_names += alt_names if alt_names.size > 0
142
+
143
+ ## note:
144
+ ## check/todo!!!!!!!!!!!!!!!!!-
145
+ ## strip year if to present e.g. (2011-)
146
+ ##
147
+ ## do NOT strip for defunct / historic clubs e.g.
148
+ ## (1899-1910)
149
+ ## or (-1914) or (-2011) etc.
150
+
151
+ ###
152
+ ## todo: move year out of canonical team name - why? why not?
153
+
154
+ ## check if canonical name include (2011-) or similar in name
155
+ ## if yes, remove (2011-) and add to (alt) names
156
+ ## e.g. New York FC (2011) => New York FC
157
+ if rec.name =~ /\(.+?\)/ ## note: use non-greedy (?) match
158
+ name = rec.name.gsub( /\(.+?\)/, '' ).strip
159
+
160
+ if rec.name =~ /\(([0-9]{4})-\)/ ## e.g. (2014-)
161
+ rec.year = $1.to_i
162
+ elsif rec.name =~ /\(-([0-9]{4})\)/ ## e.g. (-2014)
163
+ rec.year_end = $1.to_i
164
+ elsif rec.name =~ /\(([0-9]{4})-([0-9]{4})\)/ ## e.g. (2011-2014)
165
+ rec.year = $1.to_i
166
+ rec.year_end = $2.to_i
167
+ else
168
+ ## todo/check: warn about unknown year format
169
+ end
170
+ end
171
+
172
+ ## todo/check - check for unknown format values
173
+ ## e.g. too many values, duplicate years, etc.
174
+ ## check for overwritting, etc.
175
+
176
+ ## strip and squish (white)spaces
177
+ # e.g. León › Guanajuato => León › Guanajuato
178
+ values = values.map {|value| _squish(value) }
179
+
180
+ while values.size > 0
181
+ value = values.shift
182
+ if value =~/^\d{4}$/ # e.g 1904
183
+ ## todo/check: issue warning if year is already set!!!!!!!
184
+ if rec.year
185
+ puts "!!! error - year already set to #{rec.year} - CANNOT overwrite with #{value}:"
186
+ pp rec
187
+ exit 1
188
+ end
189
+ rec.year = value.to_i
190
+ elsif value =~/^[0-9_]+$/ # e.g 1904
191
+ ## skip capacity for now
192
+ else
193
+ ## assume city / geo tree
194
+ ## split into geo tree
195
+ geos = split_geo( value )
196
+ city = geos[0]
197
+ ## check for "embedded" district e.g. London (Fulham) or Hamburg (St. Pauli) etc.
198
+ if city =~ /\((.+?)\)/ ## note: use non-greedy (?) match
199
+ rec.district = $1.strip
200
+ city = city.gsub( /\(.+?\)/, '' ).strip
201
+ end
202
+ rec.city = city
203
+
204
+ if geos.size > 1
205
+ ## cut-off city and keep the rest (of geo tree)
206
+ rec.geos = geos[1..-1]
207
+ end
208
+ end
209
+ end ## while values
210
+
211
+
212
+ ###############
213
+ ## use headings text for geo tree
214
+
215
+ ## 1) add country if present
216
+ if headings.size > 0 && headings[0]
217
+ country = world.countries.find( headings[0] )
218
+ rec.country = country
219
+ else
220
+ ## make it an error - why? why not?
221
+ puts "!!! error - country missing in headings hierarchy - sorry - add to quicklist"
222
+ exit 1
223
+ end
224
+
225
+ ## 2) check geo tree with headings hierarchy
226
+ if headings.size > 1 && headings[1]
227
+ geos = split_geo( headings[1] )
228
+ if rec.geos
229
+ if rec.geos[0] != geos[0]
230
+ puts "!!! error - geo tree - headings mismatch >#{rec.geos[0]}< <=> >#{geos[0]}<"
231
+ exit 1
232
+ end
233
+ if rec.geos[1] && rec.geos[1] != geos[1] ## check optional 2nd level too
234
+ puts "!!! error - geo tree - headings mismatch >#{rec.geos[1]}< <=> >#{geos[1]}<"
235
+ exit 1
236
+ end
237
+ else
238
+ ## add missing region (state/province) from headings hierarchy
239
+ rec.geos = geos
240
+ end
241
+ end
242
+
243
+ last_rec = rec
244
+
245
+ recs << rec
246
+ end
247
+ end # each line (in paragraph)
248
+ else
249
+ puts "** !!! ERROR !!! [ground reader] - unknown line type:"
250
+ pp node
251
+ exit 1
252
+ end
253
+ end
254
+
255
+ recs
256
+ end # method read
257
+
258
+ #######################
259
+ ### helpers
260
+
261
+ def split_geo( str )
262
+ ## assume city / geo tree
263
+ ## strip and squish (white)spaces
264
+ # e.g. León › Guanajuato => León › Guanajuato
265
+ str = _squish( str )
266
+
267
+ ## split into geo tree
268
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹
269
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
270
+ geos
271
+ end
272
+
273
+ ## norm(alize) helper - squish (spaces)
274
+ ## and remove dollars ($$$)
275
+ ## and remove leading and trailing spaces
276
+ def _norm( str )
277
+ ## only extra clean-up of dollars for now ($$$)
278
+ _squish( str.gsub( '$', '' ) )
279
+ end
280
+
281
+ def _squish( str )
282
+ str.gsub( /[ \t\u00a0]+/, ' ' ).strip
283
+ end
284
+
285
+ end # class GroundReader
286
+
287
+
288
+ end ## module Import
289
+ end ## module SportDb
@@ -1,168 +1,152 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class LeagueReader
9
-
10
- def catalog() Import.catalog; end
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
-
23
-
24
- include Logging
25
-
26
- def initialize( txt )
27
- @txt = txt
28
- end
29
-
30
- def parse
31
- recs = []
32
- last_rec = nil
33
-
34
- country = nil # last country
35
- intl = false # is international (league/tournament/cup/competition)
36
- clubs = true # or clubs|national teams
37
-
38
- OutlineReader.parse( @txt ).each do |node|
39
- if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
40
- heading_level = node[0][1].to_i
41
- heading = node[1]
42
-
43
- logger.debug "heading #{heading_level} >#{heading}<"
44
-
45
- if heading_level != 1
46
- puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
47
- pp line
48
- exit 1
49
- else
50
- logger.debug "heading (#{heading_level}) >#{heading}<"
51
- last_heading = heading
52
- ## map to country or international / int'l or national teams
53
- if heading =~ /national team/i ## national team tournament
54
- country = nil
55
- intl = true
56
- clubs = false
57
- elsif heading =~ /international|int'l/i ## int'l club tournament
58
- country = nil
59
- intl = true
60
- clubs = true
61
- else
62
- ## assume country in heading; allow all "formats" supported by parse e.g.
63
- ## Österreich • Austria (at)
64
- ## ÖsterreichAustria
65
- ## Austria
66
- ## Deutschland (de) • Germany
67
- country = catalog.countries.parse( heading )
68
- intl = false
69
- clubs = true
70
-
71
- ## check country code - MUST exist for now!!!!
72
- if country.nil?
73
- puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
74
- exit 1
75
- end
76
- end
77
- end
78
- elsif node[0] == :p ## paragraph with (text) lines
79
- lines = node[1]
80
- lines.each do |line|
81
-
82
- if line.start_with?( '|' )
83
- ## assume continuation with line of alternative names
84
- ## note: skip leading pipe
85
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
86
- ## 1) strip (commerical) sponsor markers/tags e.g. $$ Liga $$BBV$$ MX
87
- ## 2) strip and squish (white)spaces
88
- # e.g. New York FC (2011-) => New York FC (2011-)
89
- values = values.map { |value| value.gsub( '$', '' )
90
- .gsub( /[ \t]+/, ' ' )
91
- .strip }
92
- logger.debug "alt_names: #{values.join( '|' )}"
93
-
94
- last_rec.alt_names += values
95
- else
96
- ## assume "regular" line
97
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
98
- if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
99
- league_key = $1
100
- ## 1) strip (commercial) sponsor markers/tags e.g $$
101
- ## 2) strip and squish (white)spaces
102
- league_name = $2.gsub( '$', '' )
103
- .gsub( /[ \t]+/, ' ' )
104
- .strip
105
-
106
- logger.debug "key: >#{league_key}<, name: >#{league_name}<"
107
-
108
-
109
- alt_names_auto = []
110
- if country
111
- alt_names_auto << "#{country.key.upcase} #{league_key.upcase.gsub('.', ' ')}"
112
- ## todo/check: add "hack" for cl (chile) and exclude?
113
- ## add a list of (auto-)excluded country codes with conflicts? why? why not?
114
- ## cl - a) Chile b) Champions League
115
- alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
116
- if country.key.upcase != country.code
117
- alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
118
- alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
119
- end
120
- alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
121
-
122
- ## auto-add with country prepended
123
- ## e.g. England Premier League, Austria Bundesliga etc.
124
- ## todo/check: also add variants with country alt name if present!!!
125
- ## todo/check: exclude cups or such from country + league name auto-add - why? why not?
126
- alt_names_auto << "#{country.name} #{league_name}"
127
- else ## assume int'l (no country) e.g. champions league, etc.
128
- ## only auto-add key (e.g. CL, EL, etc.)
129
- alt_names_auto << league_key.upcase.gsub('.', ' ') ## note: no country code (prefix/leading) used
130
- end
131
-
132
- ## pp alt_names_auto
133
-
134
- ## prepend country key/code if country present
135
- ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
136
- ## why? lets you "overwrite" key if desired - use it - why? why not?
137
- if country
138
- league_key = "#{country.key}.#{league_key}"
139
- end
140
-
141
- rec = League.new( key: league_key,
142
- name: league_name,
143
- alt_names_auto: alt_names_auto,
144
- country: country,
145
- intl: intl,
146
- clubs: clubs)
147
- recs << rec
148
- last_rec = rec
149
- else
150
- puts "** !!! ERROR !!! missing key for (canonical) league name"
151
- exit 1
152
- end
153
- end
154
- end # each line
155
- else
156
- puts "** !!! ERROR !!! [league reader] - unknown line type:"
157
- pp node
158
- exit 1
159
- end
160
- ## pp line
161
- end
162
- recs
163
- end # method parse
164
-
165
- end # class LeagueReader
166
-
167
- end ## module Import
168
- end ## module SportDb
1
+
2
+ module SportDb
3
+ module Import
4
+
5
+
6
+ class LeagueReader
7
+
8
+ def world() Import.world; end
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+
22
+ include Logging
23
+
24
+ def initialize( txt )
25
+ @txt = txt
26
+ end
27
+
28
+ def parse
29
+ recs = []
30
+ last_rec = nil
31
+
32
+ country = nil # last country
33
+ intl = false # is international (league/tournament/cup/competition)
34
+ clubs = true # or clubs|national teams
35
+
36
+ OutlineReader.parse( @txt ).each do |node|
37
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
38
+ heading_level = node[0][1].to_i
39
+ heading = node[1]
40
+
41
+ logger.debug "heading #{heading_level} >#{heading}<"
42
+
43
+ if heading_level != 1
44
+ puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
45
+ pp line
46
+ exit 1
47
+ else
48
+ logger.debug "heading (#{heading_level}) >#{heading}<"
49
+ last_heading = heading
50
+ ## map to country or international / int'l or national teams
51
+ if heading =~ /national team/i ## national team tournament
52
+ country = nil
53
+ intl = true
54
+ clubs = false
55
+ elsif heading =~ /international|int'l/i ## int'l club tournament
56
+ country = nil
57
+ intl = true
58
+ clubs = true
59
+ else
60
+ ## assume country in heading; allow all "formats" supported by parse e.g.
61
+ ## Österreich • Austria (at)
62
+ ## Österreich Austria
63
+ ## Austria
64
+ ## Deutschland (de) Germany
65
+ country = world.countries.parse( heading )
66
+ intl = false
67
+ clubs = true
68
+
69
+ ## check country code - MUST exist for now!!!!
70
+ if country.nil?
71
+ puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
72
+ exit 1
73
+ end
74
+ end
75
+ end
76
+ elsif node[0] == :p ## paragraph with (text) lines
77
+ lines = node[1]
78
+ lines.each do |line|
79
+
80
+ if line.start_with?( '|' )
81
+ ## assume continuation with line of alternative names
82
+ ## note: skip leading pipe
83
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
84
+ values = values.map {|value| _norm(value) } ## squish/strip etc.
85
+
86
+ logger.debug "alt_names: #{values.join( '|' )}"
87
+
88
+ last_rec.alt_names += values
89
+ else
90
+ ## assume "regular" line
91
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
92
+ if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
93
+ league_key = $1
94
+ ## 1) strip (commercial) sponsor markers/tags e.g $$
95
+ ## 2) strip and squish (white)spaces
96
+ league_name = _norm( $2 )
97
+
98
+ logger.debug "key: >#{league_key}<, name: >#{league_name}<"
99
+
100
+
101
+ ## prepend country key/code if country present
102
+ ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
103
+ ## why? lets you "overwrite" key if desired - use it - why? why not?
104
+ if country
105
+ league_key = "#{country.key}.#{league_key}"
106
+ end
107
+
108
+ rec = League.new( key: league_key,
109
+ name: league_name,
110
+ country: country,
111
+ intl: intl,
112
+ clubs: clubs)
113
+ recs << rec
114
+ last_rec = rec
115
+ else
116
+ puts "** !!! ERROR !!! missing key for (canonical) league name"
117
+ exit 1
118
+ end
119
+ end
120
+ end # each line
121
+ else
122
+ puts "** !!! ERROR !!! [league reader] - unknown line type:"
123
+ pp node
124
+ exit 1
125
+ end
126
+ ## pp line
127
+ end
128
+ recs
129
+ end # method parse
130
+
131
+
132
+
133
+ #######################
134
+ ### helpers
135
+
136
+ ## norm(alize) helper - squish (spaces)
137
+ ## and remove dollars ($$$)
138
+ ## and remove leading and trailing spaces
139
+ def _norm( str )
140
+ ## only extra clean-up of dollars for now ($$$)
141
+ _squish( str.gsub( '$', '' ) )
142
+ end
143
+
144
+ def _squish( str )
145
+ str.gsub( /[ \t\u00a0]+/, ' ' ).strip
146
+ end
147
+
148
+
149
+ end # class LeagueReader
150
+
151
+ end ## module Import
152
+ end ## module SportDb
@@ -0,0 +1,47 @@
1
+
2
+ module SportDb
3
+
4
+ class LinesReader ## change to LinesEnumerator - why? why not?
5
+ def initialize( lines )
6
+ @iter = lines.each ## get (external) enumerator (same as to_enum)
7
+ @lineno = 0
8
+ end
9
+
10
+ def each( &blk )
11
+ ## note - StopIteration is rescued (automagically) by Kernel#loop.
12
+ ## no need to rescue ourselves here
13
+ loop do
14
+ line = @iter.next ## note - raises StopIteration
15
+ blk.call( line )
16
+ end
17
+ end
18
+
19
+ def each_with_index( &blk )
20
+ ## note - StopIteration is rescued (automagically) by Kernel#loop.
21
+ loop do
22
+ line = @iter.next ## note - raises StopIteration
23
+ blk.call( line, @lineno )
24
+ @lineno += 1
25
+ end
26
+ end
27
+
28
+ def peek
29
+ begin
30
+ @iter.peek
31
+ rescue StopIteration
32
+ nil
33
+ end
34
+ end
35
+
36
+ def next
37
+ ## todo/check - do NOT catch StopIteration for next - why? why not?
38
+ begin
39
+ line = @iter.next
40
+ @lineno += 1
41
+ line
42
+ rescue StopIteration
43
+ nil
44
+ end
45
+ end
46
+ end # class LinesReader
47
+ end # module SportDb