sportdb-formats 1.1.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +4 -25
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/formats/country/country_reader.rb +142 -142
  6. data/lib/sportdb/formats/datafile.rb +59 -59
  7. data/lib/sportdb/formats/event/event_reader.rb +184 -183
  8. data/lib/sportdb/formats/goals.rb +37 -1
  9. data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
  10. data/lib/sportdb/formats/league/league_reader.rb +152 -168
  11. data/lib/sportdb/formats/lines_reader.rb +47 -0
  12. data/lib/sportdb/formats/match/match_parser.rb +102 -12
  13. data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
  14. data/lib/sportdb/formats/outline_reader.rb +0 -1
  15. data/lib/sportdb/formats/package.rb +394 -374
  16. data/lib/sportdb/formats/search/sport.rb +357 -0
  17. data/lib/sportdb/formats/search/world.rb +139 -0
  18. data/lib/sportdb/formats/team/club_index_history.rb +134 -134
  19. data/lib/sportdb/formats/team/club_reader.rb +318 -350
  20. data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
  21. data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
  22. data/lib/sportdb/formats/version.rb +4 -7
  23. data/lib/sportdb/formats.rb +60 -27
  24. metadata +13 -35
  25. data/lib/sportdb/formats/country/country_index.rb +0 -192
  26. data/lib/sportdb/formats/event/event_index.rb +0 -141
  27. data/lib/sportdb/formats/league/league_index.rb +0 -178
  28. data/lib/sportdb/formats/team/club_index.rb +0 -338
  29. data/lib/sportdb/formats/team/national_team_index.rb +0 -114
  30. data/lib/sportdb/formats/team/team_index.rb +0 -43
  31. data/test/helper.rb +0 -132
  32. data/test/test_club_index.rb +0 -183
  33. data/test/test_club_index_history.rb +0 -107
  34. data/test/test_club_reader.rb +0 -201
  35. data/test/test_club_reader_history.rb +0 -212
  36. data/test/test_club_reader_props.rb +0 -54
  37. data/test/test_country_index.rb +0 -63
  38. data/test/test_country_reader.rb +0 -89
  39. data/test/test_datafile.rb +0 -30
  40. data/test/test_datafile_package.rb +0 -46
  41. data/test/test_goals.rb +0 -113
  42. data/test/test_league_index.rb +0 -157
  43. data/test/test_league_outline_reader.rb +0 -55
  44. data/test/test_league_reader.rb +0 -72
  45. data/test/test_outline_reader.rb +0 -31
  46. data/test/test_package.rb +0 -78
  47. data/test/test_package_match.rb +0 -102
  48. data/test/test_regex.rb +0 -67
  49. data/test/test_wiki_reader.rb +0 -77
@@ -0,0 +1,289 @@
1
+ ###
2
+ # todo - based on ClubReader
3
+ # share GeoReader or BaseReader or such for both
4
+ # plus maybe for PlayerReader too!!!
5
+ #
6
+ # fix/todo/cleanup - move alt_names_auto from reader to indexer!!!!
7
+ # indexer now handles unaccent (variants) etc.
8
+
9
+ module SportDb
10
+ module Import
11
+
12
+
13
+ class GroundReader
14
+
15
+ def world() Import.world; end
16
+
17
+
18
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
19
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
20
+ parse( txt )
21
+ end
22
+
23
+ def self.parse( txt )
24
+ new( txt ).parse
25
+ end
26
+
27
+ def initialize( txt )
28
+ @txt = txt
29
+ end
30
+
31
+
32
+ ## pattern for checking for address line e.g.
33
+ ## use just one style / syntax - why? why not?
34
+ ## Fischhofgasse 12 ~ 1100 Wien or
35
+ ## Fischhofgasse 12 // 1100 Wien or Fischhofgasse 12 /// 1100 Wien
36
+ ## Fischhofgasse 12 ++ 1100 Wien or Fischhofgasse 12 +++ 1100 Wien
37
+ ADDR_MARKER_RE = %r{ (?: ^|[ ] ) # space or beginning of line
38
+ (?: ~ | /{2,} | \+{2,} )
39
+ (?: [ ]|$) # space or end of line
40
+ }x
41
+
42
+
43
+ def parse
44
+ recs = []
45
+ last_rec = nil
46
+ headings = [] ## headings stack
47
+
48
+ OutlineReader.parse( @txt ).each do |node|
49
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
50
+ heading_level = node[0][1].to_i
51
+ heading = node[1]
52
+
53
+ puts "heading #{heading_level} >#{heading}<"
54
+
55
+ ## 1) first pop headings if present
56
+ while headings.size+1 > heading_level
57
+ headings.pop
58
+ end
59
+
60
+ ## 2) add missing (hierarchy) level if
61
+ while headings.size+1 < heading_level
62
+ ## todo/fix: issue warning about "skipping" hierarchy level
63
+ puts "!!! warn [ground reader] - skipping hierarchy level in headings "
64
+ headings.push( nil )
65
+ end
66
+
67
+ if heading =~ /^\?+$/ ## note: use ? or ?? or ?? to reset level to nil
68
+ ## keep level empty
69
+ else
70
+ ## note: if level is 1 assume country for now
71
+ if heading_level == 1
72
+ ## assume country in heading; allow all "formats" supported by parse e.g.
73
+ ## Österreich • Austria (at)
74
+ ## Österreich • Austria
75
+ ## Austria
76
+ ## Deutschland (de) • Germany
77
+ country = world.countries.parse( heading )
78
+ ## check country code - MUST exist for now!!!!
79
+ if country.nil?
80
+ puts "!!! error [ground reader] - unknown country >#{heading}< - sorry - add country to config to fix"
81
+ exit 1
82
+ end
83
+
84
+ headings.push( country.key )
85
+ else
86
+ ## quick hack:
87
+ ## remove known fill/dummy words incl:
88
+ ## Provincia San Juan => San Juan (see argentina, for example)
89
+ ##
90
+ ## use geo tree long term with alternative names - why? why not?
91
+ words = ['Provincia']
92
+ words.each { |word| heading = heading.gsub( word, '' ) }
93
+ heading = heading.strip
94
+
95
+ headings.push( heading )
96
+ end
97
+
98
+ ## assert that hierarchy level is ok
99
+ if headings.size != heading_level
100
+ puts "!!! error - headings hierarchy/stack out of order - #{heading.size}<=>#{heading_level}"
101
+ exit 1
102
+ end
103
+ end
104
+
105
+ pp headings
106
+
107
+ elsif node[0] == :p ## paragraph with (text) lines
108
+ lines = node[1]
109
+ lines.each do |line|
110
+ if line.start_with?( '|' )
111
+ ## assume continuation with line of alternative names
112
+ ## note: skip leading pipe
113
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
114
+ values = values.map {|value| _norm(value) } ## squish/strip etc.
115
+
116
+ last_rec.alt_names += values
117
+
118
+ ## check for address line e.g.
119
+ ## use just one style / syntax - why? why not?
120
+ ## Fischhofgasse 12 ~ 1100 Wien or
121
+ ## Fischhofgasse 12 // 1100 Wien or Fischhofgasse 12 /// 1100 Wien
122
+ ## Fischhofgasse 12 ++ 1100 Wien or Fischhofgasse 12 +++ 1100 Wien
123
+ elsif line =~ ADDR_MARKER_RE
124
+ ## squish line here - why? why not?
125
+ last_rec.address = _squish( line )
126
+ else
127
+ values = line.split( ',' )
128
+
129
+ rec = Ground.new
130
+
131
+ col = values.shift ## get first item
132
+ ## note: allow optional alt names for convenience with required canoncial name
133
+ names = col.split( '|' ) # team names - allow/use pipe(|)
134
+ names = names.map {|name| _norm(name) } ## squish/strip etc.
135
+
136
+ value = names[0] ## canonical name
137
+ alt_names = names[1..-1] ## optional (inline) alt names
138
+
139
+ rec.name = value # canoncial name (global unique "beautiful/long" name)
140
+ ## note: add optional (inline) alternate names if present
141
+ rec.alt_names += alt_names if alt_names.size > 0
142
+
143
+ ## note:
144
+ ## check/todo!!!!!!!!!!!!!!!!!-
145
+ ## strip year if to present e.g. (2011-)
146
+ ##
147
+ ## do NOT strip for defunct / historic clubs e.g.
148
+ ## (1899-1910)
149
+ ## or (-1914) or (-2011) etc.
150
+
151
+ ###
152
+ ## todo: move year out of canonical team name - why? why not?
153
+
154
+ ## check if canonical name include (2011-) or similar in name
155
+ ## if yes, remove (2011-) and add to (alt) names
156
+ ## e.g. New York FC (2011) => New York FC
157
+ if rec.name =~ /\(.+?\)/ ## note: use non-greedy (?) match
158
+ name = rec.name.gsub( /\(.+?\)/, '' ).strip
159
+
160
+ if rec.name =~ /\(([0-9]{4})-\)/ ## e.g. (2014-)
161
+ rec.year = $1.to_i
162
+ elsif rec.name =~ /\(-([0-9]{4})\)/ ## e.g. (-2014)
163
+ rec.year_end = $1.to_i
164
+ elsif rec.name =~ /\(([0-9]{4})-([0-9]{4})\)/ ## e.g. (2011-2014)
165
+ rec.year = $1.to_i
166
+ rec.year_end = $2.to_i
167
+ else
168
+ ## todo/check: warn about unknown year format
169
+ end
170
+ end
171
+
172
+ ## todo/check - check for unknown format values
173
+ ## e.g. too many values, duplicate years, etc.
174
+ ## check for overwritting, etc.
175
+
176
+ ## strip and squish (white)spaces
177
+ # e.g. León › Guanajuato => León › Guanajuato
178
+ values = values.map {|value| _squish(value) }
179
+
180
+ while values.size > 0
181
+ value = values.shift
182
+ if value =~/^\d{4}$/ # e.g 1904
183
+ ## todo/check: issue warning if year is already set!!!!!!!
184
+ if rec.year
185
+ puts "!!! error - year already set to #{rec.year} - CANNOT overwrite with #{value}:"
186
+ pp rec
187
+ exit 1
188
+ end
189
+ rec.year = value.to_i
190
+ elsif value =~/^[0-9_]+$/ # e.g 1904
191
+ ## skip capacity for now
192
+ else
193
+ ## assume city / geo tree
194
+ ## split into geo tree
195
+ geos = split_geo( value )
196
+ city = geos[0]
197
+ ## check for "embedded" district e.g. London (Fulham) or Hamburg (St. Pauli) etc.
198
+ if city =~ /\((.+?)\)/ ## note: use non-greedy (?) match
199
+ rec.district = $1.strip
200
+ city = city.gsub( /\(.+?\)/, '' ).strip
201
+ end
202
+ rec.city = city
203
+
204
+ if geos.size > 1
205
+ ## cut-off city and keep the rest (of geo tree)
206
+ rec.geos = geos[1..-1]
207
+ end
208
+ end
209
+ end ## while values
210
+
211
+
212
+ ###############
213
+ ## use headings text for geo tree
214
+
215
+ ## 1) add country if present
216
+ if headings.size > 0 && headings[0]
217
+ country = world.countries.find( headings[0] )
218
+ rec.country = country
219
+ else
220
+ ## make it an error - why? why not?
221
+ puts "!!! error - country missing in headings hierarchy - sorry - add to quicklist"
222
+ exit 1
223
+ end
224
+
225
+ ## 2) check geo tree with headings hierarchy
226
+ if headings.size > 1 && headings[1]
227
+ geos = split_geo( headings[1] )
228
+ if rec.geos
229
+ if rec.geos[0] != geos[0]
230
+ puts "!!! error - geo tree - headings mismatch >#{rec.geos[0]}< <=> >#{geos[0]}<"
231
+ exit 1
232
+ end
233
+ if rec.geos[1] && rec.geos[1] != geos[1] ## check optional 2nd level too
234
+ puts "!!! error - geo tree - headings mismatch >#{rec.geos[1]}< <=> >#{geos[1]}<"
235
+ exit 1
236
+ end
237
+ else
238
+ ## add missing region (state/province) from headings hierarchy
239
+ rec.geos = geos
240
+ end
241
+ end
242
+
243
+ last_rec = rec
244
+
245
+ recs << rec
246
+ end
247
+ end # each line (in paragraph)
248
+ else
249
+ puts "** !!! ERROR !!! [ground reader] - unknown line type:"
250
+ pp node
251
+ exit 1
252
+ end
253
+ end
254
+
255
+ recs
256
+ end # method read
257
+
258
+ #######################
259
+ ### helpers
260
+
261
+ def split_geo( str )
262
+ ## assume city / geo tree
263
+ ## strip and squish (white)spaces
264
+ # e.g. León › Guanajuato => León › Guanajuato
265
+ str = _squish( str )
266
+
267
+ ## split into geo tree
268
+ geos = str.split( /[<>‹›]/ ) ## note: allow > < or › ‹
269
+ geos = geos.map { |geo| geo.strip } ## remove all whitespaces
270
+ geos
271
+ end
272
+
273
+ ## norm(alize) helper - squish (spaces)
274
+ ## and remove dollars ($$$)
275
+ ## and remove leading and trailing spaces
276
+ def _norm( str )
277
+ ## only extra clean-up of dollars for now ($$$)
278
+ _squish( str.gsub( '$', '' ) )
279
+ end
280
+
281
+ def _squish( str )
282
+ str.gsub( /[ \t\u00a0]+/, ' ' ).strip
283
+ end
284
+
285
+ end # class GroundReader
286
+
287
+
288
+ end ## module Import
289
+ end ## module SportDb
@@ -1,168 +1,152 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class LeagueReader
9
-
10
- def catalog() Import.catalog; end
11
-
12
-
13
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
- txt = File.open( path, 'r:utf-8' ) { |f| f.read }
15
- parse( txt )
16
- end
17
-
18
- def self.parse( txt )
19
- new( txt ).parse
20
- end
21
-
22
-
23
-
24
- include Logging
25
-
26
- def initialize( txt )
27
- @txt = txt
28
- end
29
-
30
- def parse
31
- recs = []
32
- last_rec = nil
33
-
34
- country = nil # last country
35
- intl = false # is international (league/tournament/cup/competition)
36
- clubs = true # or clubs|national teams
37
-
38
- OutlineReader.parse( @txt ).each do |node|
39
- if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
40
- heading_level = node[0][1].to_i
41
- heading = node[1]
42
-
43
- logger.debug "heading #{heading_level} >#{heading}<"
44
-
45
- if heading_level != 1
46
- puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
47
- pp line
48
- exit 1
49
- else
50
- logger.debug "heading (#{heading_level}) >#{heading}<"
51
- last_heading = heading
52
- ## map to country or international / int'l or national teams
53
- if heading =~ /national team/i ## national team tournament
54
- country = nil
55
- intl = true
56
- clubs = false
57
- elsif heading =~ /international|int'l/i ## int'l club tournament
58
- country = nil
59
- intl = true
60
- clubs = true
61
- else
62
- ## assume country in heading; allow all "formats" supported by parse e.g.
63
- ## Österreich • Austria (at)
64
- ## ÖsterreichAustria
65
- ## Austria
66
- ## Deutschland (de) • Germany
67
- country = catalog.countries.parse( heading )
68
- intl = false
69
- clubs = true
70
-
71
- ## check country code - MUST exist for now!!!!
72
- if country.nil?
73
- puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
74
- exit 1
75
- end
76
- end
77
- end
78
- elsif node[0] == :p ## paragraph with (text) lines
79
- lines = node[1]
80
- lines.each do |line|
81
-
82
- if line.start_with?( '|' )
83
- ## assume continuation with line of alternative names
84
- ## note: skip leading pipe
85
- values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
86
- ## 1) strip (commerical) sponsor markers/tags e.g. $$ Liga $$BBV$$ MX
87
- ## 2) strip and squish (white)spaces
88
- # e.g. New York FC (2011-) => New York FC (2011-)
89
- values = values.map { |value| value.gsub( '$', '' )
90
- .gsub( /[ \t]+/, ' ' )
91
- .strip }
92
- logger.debug "alt_names: #{values.join( '|' )}"
93
-
94
- last_rec.alt_names += values
95
- else
96
- ## assume "regular" line
97
- ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
98
- if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
99
- league_key = $1
100
- ## 1) strip (commercial) sponsor markers/tags e.g $$
101
- ## 2) strip and squish (white)spaces
102
- league_name = $2.gsub( '$', '' )
103
- .gsub( /[ \t]+/, ' ' )
104
- .strip
105
-
106
- logger.debug "key: >#{league_key}<, name: >#{league_name}<"
107
-
108
-
109
- alt_names_auto = []
110
- if country
111
- alt_names_auto << "#{country.key.upcase} #{league_key.upcase.gsub('.', ' ')}"
112
- ## todo/check: add "hack" for cl (chile) and exclude?
113
- ## add a list of (auto-)excluded country codes with conflicts? why? why not?
114
- ## cl - a) Chile b) Champions League
115
- alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
116
- if country.key.upcase != country.code
117
- alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
118
- alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
119
- end
120
- alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
121
-
122
- ## auto-add with country prepended
123
- ## e.g. England Premier League, Austria Bundesliga etc.
124
- ## todo/check: also add variants with country alt name if present!!!
125
- ## todo/check: exclude cups or such from country + league name auto-add - why? why not?
126
- alt_names_auto << "#{country.name} #{league_name}"
127
- else ## assume int'l (no country) e.g. champions league, etc.
128
- ## only auto-add key (e.g. CL, EL, etc.)
129
- alt_names_auto << league_key.upcase.gsub('.', ' ') ## note: no country code (prefix/leading) used
130
- end
131
-
132
- ## pp alt_names_auto
133
-
134
- ## prepend country key/code if country present
135
- ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
136
- ## why? lets you "overwrite" key if desired - use it - why? why not?
137
- if country
138
- league_key = "#{country.key}.#{league_key}"
139
- end
140
-
141
- rec = League.new( key: league_key,
142
- name: league_name,
143
- alt_names_auto: alt_names_auto,
144
- country: country,
145
- intl: intl,
146
- clubs: clubs)
147
- recs << rec
148
- last_rec = rec
149
- else
150
- puts "** !!! ERROR !!! missing key for (canonical) league name"
151
- exit 1
152
- end
153
- end
154
- end # each line
155
- else
156
- puts "** !!! ERROR !!! [league reader] - unknown line type:"
157
- pp node
158
- exit 1
159
- end
160
- ## pp line
161
- end
162
- recs
163
- end # method parse
164
-
165
- end # class LeagueReader
166
-
167
- end ## module Import
168
- end ## module SportDb
1
+
2
+ module SportDb
3
+ module Import
4
+
5
+
6
+ class LeagueReader
7
+
8
+ def world() Import.world; end
9
+
10
+
11
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
12
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
13
+ parse( txt )
14
+ end
15
+
16
+ def self.parse( txt )
17
+ new( txt ).parse
18
+ end
19
+
20
+
21
+
22
+ include Logging
23
+
24
+ def initialize( txt )
25
+ @txt = txt
26
+ end
27
+
28
+ def parse
29
+ recs = []
30
+ last_rec = nil
31
+
32
+ country = nil # last country
33
+ intl = false # is international (league/tournament/cup/competition)
34
+ clubs = true # or clubs|national teams
35
+
36
+ OutlineReader.parse( @txt ).each do |node|
37
+ if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] )
38
+ heading_level = node[0][1].to_i
39
+ heading = node[1]
40
+
41
+ logger.debug "heading #{heading_level} >#{heading}<"
42
+
43
+ if heading_level != 1
44
+ puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry"
45
+ pp line
46
+ exit 1
47
+ else
48
+ logger.debug "heading (#{heading_level}) >#{heading}<"
49
+ last_heading = heading
50
+ ## map to country or international / int'l or national teams
51
+ if heading =~ /national team/i ## national team tournament
52
+ country = nil
53
+ intl = true
54
+ clubs = false
55
+ elsif heading =~ /international|int'l/i ## int'l club tournament
56
+ country = nil
57
+ intl = true
58
+ clubs = true
59
+ else
60
+ ## assume country in heading; allow all "formats" supported by parse e.g.
61
+ ## Österreich • Austria (at)
62
+ ## Österreich Austria
63
+ ## Austria
64
+ ## Deutschland (de) Germany
65
+ country = world.countries.parse( heading )
66
+ intl = false
67
+ clubs = true
68
+
69
+ ## check country code - MUST exist for now!!!!
70
+ if country.nil?
71
+ puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix"
72
+ exit 1
73
+ end
74
+ end
75
+ end
76
+ elsif node[0] == :p ## paragraph with (text) lines
77
+ lines = node[1]
78
+ lines.each do |line|
79
+
80
+ if line.start_with?( '|' )
81
+ ## assume continuation with line of alternative names
82
+ ## note: skip leading pipe
83
+ values = line[1..-1].split( '|' ) # team names - allow/use pipe(|)
84
+ values = values.map {|value| _norm(value) } ## squish/strip etc.
85
+
86
+ logger.debug "alt_names: #{values.join( '|' )}"
87
+
88
+ last_rec.alt_names += values
89
+ else
90
+ ## assume "regular" line
91
+ ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
92
+ if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/
93
+ league_key = $1
94
+ ## 1) strip (commercial) sponsor markers/tags e.g $$
95
+ ## 2) strip and squish (white)spaces
96
+ league_name = _norm( $2 )
97
+
98
+ logger.debug "key: >#{league_key}<, name: >#{league_name}<"
99
+
100
+
101
+ ## prepend country key/code if country present
102
+ ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup
103
+ ## why? lets you "overwrite" key if desired - use it - why? why not?
104
+ if country
105
+ league_key = "#{country.key}.#{league_key}"
106
+ end
107
+
108
+ rec = League.new( key: league_key,
109
+ name: league_name,
110
+ country: country,
111
+ intl: intl,
112
+ clubs: clubs)
113
+ recs << rec
114
+ last_rec = rec
115
+ else
116
+ puts "** !!! ERROR !!! missing key for (canonical) league name"
117
+ exit 1
118
+ end
119
+ end
120
+ end # each line
121
+ else
122
+ puts "** !!! ERROR !!! [league reader] - unknown line type:"
123
+ pp node
124
+ exit 1
125
+ end
126
+ ## pp line
127
+ end
128
+ recs
129
+ end # method parse
130
+
131
+
132
+
133
+ #######################
134
+ ### helpers
135
+
136
+ ## norm(alize) helper - squish (spaces)
137
+ ## and remove dollars ($$$)
138
+ ## and remove leading and trailing spaces
139
+ def _norm( str )
140
+ ## only extra clean-up of dollars for now ($$$)
141
+ _squish( str.gsub( '$', '' ) )
142
+ end
143
+
144
+ def _squish( str )
145
+ str.gsub( /[ \t\u00a0]+/, ' ' ).strip
146
+ end
147
+
148
+
149
+ end # class LeagueReader
150
+
151
+ end ## module Import
152
+ end ## module SportDb
@@ -0,0 +1,47 @@
1
+
2
+ module SportDb
3
+
4
+ class LinesReader ## change to LinesEnumerator - why? why not?
5
+ def initialize( lines )
6
+ @iter = lines.each ## get (external) enumerator (same as to_enum)
7
+ @lineno = 0
8
+ end
9
+
10
+ def each( &blk )
11
+ ## note - StopIteration is rescued (automagically) by Kernel#loop.
12
+ ## no need to rescue ourselves here
13
+ loop do
14
+ line = @iter.next ## note - raises StopIteration
15
+ blk.call( line )
16
+ end
17
+ end
18
+
19
+ def each_with_index( &blk )
20
+ ## note - StopIteration is rescued (automagically) by Kernel#loop.
21
+ loop do
22
+ line = @iter.next ## note - raises StopIteration
23
+ blk.call( line, @lineno )
24
+ @lineno += 1
25
+ end
26
+ end
27
+
28
+ def peek
29
+ begin
30
+ @iter.peek
31
+ rescue StopIteration
32
+ nil
33
+ end
34
+ end
35
+
36
+ def next
37
+ ## todo/check - do NOT catch StopIteration for next - why? why not?
38
+ begin
39
+ line = @iter.next
40
+ @lineno += 1
41
+ line
42
+ rescue StopIteration
43
+ nil
44
+ end
45
+ end
46
+ end # class LinesReader
47
+ end # module SportDb