sportdb-formats 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +6 -7
- data/Rakefile +7 -3
- data/lib/sportdb/formats/csv/goal.rb +192 -0
- data/lib/sportdb/formats/csv/goal_parser_csv.rb +28 -0
- data/lib/sportdb/formats/csv/match_parser_csv.rb +490 -0
- data/lib/sportdb/formats/csv/match_status_parser.rb +90 -0
- data/lib/sportdb/formats/match/conf_parser.rb +14 -2
- data/lib/sportdb/formats/match/match_parser.rb +502 -466
- data/lib/sportdb/formats/name_helper.rb +87 -0
- data/lib/sportdb/formats/search/sport.rb +69 -54
- data/lib/sportdb/formats/search/structs.rb +116 -0
- data/lib/sportdb/formats/search/world.rb +40 -22
- data/lib/sportdb/formats/version.rb +2 -2
- data/lib/sportdb/formats.rb +82 -15
- metadata +69 -14
- data/lib/sportdb/formats/goals.rb +0 -313
- data/lib/sportdb/formats/lines_reader.rb +0 -47
- data/lib/sportdb/formats/match/mapper.rb +0 -319
- data/lib/sportdb/formats/match/mapper_teams.rb +0 -23
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +0 -270
- data/lib/sportdb/formats/outline_reader.rb +0 -90
- data/lib/sportdb/formats/parser_helper.rb +0 -90
@@ -1,319 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
##
|
6
|
-
## note: this was/is a cut-n-page (inline) copy of TextUtils::TitleMapper2
|
7
|
-
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
|
-
|
9
|
-
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
|
-
|
12
|
-
include Logging
|
13
|
-
|
14
|
-
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
|
-
|
16
|
-
########
|
17
|
-
## key: e.g. augsburg
|
18
|
-
## name: e.g. FC Augsburg
|
19
|
-
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
-
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
|
-
|
22
|
-
######
|
23
|
-
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :name, :alt_names )
|
25
|
-
def build_records( txt_or_lines )
|
26
|
-
recs = []
|
27
|
-
|
28
|
-
if txt_or_lines.is_a?( String )
|
29
|
-
## todo/fix: use ParserHelper read_lines !!! ????
|
30
|
-
txt = txt_or_lines
|
31
|
-
lines = []
|
32
|
-
|
33
|
-
txt.each_line do |line|
|
34
|
-
line = line.strip
|
35
|
-
|
36
|
-
next if line.empty? || line.start_with?( '#' ) ## note: skip empty and comment lines
|
37
|
-
lines << line
|
38
|
-
end
|
39
|
-
else
|
40
|
-
lines = txt_or_lines
|
41
|
-
end
|
42
|
-
|
43
|
-
lines.each do |line|
|
44
|
-
values = line.split( '|' )
|
45
|
-
values = values.map { |value| value.strip }
|
46
|
-
|
47
|
-
name = values[0]
|
48
|
-
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
-
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
|
-
|
52
|
-
recs << Record.new( key, name, alt_names )
|
53
|
-
end
|
54
|
-
recs
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def initialize( records_or_mapping, tag )
|
59
|
-
## for convenience allow easy (auto-)convert text (lines) to records
|
60
|
-
## as 1) text block/string or
|
61
|
-
## 2) array of lines/strings
|
62
|
-
records_or_mapping = build_records( records_or_mapping ) if records_or_mapping.is_a?( String ) ||
|
63
|
-
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
|
-
|
65
|
-
## build mapping lookup table
|
66
|
-
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
-
build_name_table_for_mapping( records_or_mapping )
|
68
|
-
else ## assume array of records
|
69
|
-
build_name_table_for_records( records_or_mapping )
|
70
|
-
end
|
71
|
-
|
72
|
-
## build lookup hash by record (e.g. team/club/etc.) key
|
73
|
-
records = if records_or_mapping.is_a?( Array )
|
74
|
-
records_or_mapping
|
75
|
-
else ## assume hash (uses values assuming to be all records - note might include duplicates)
|
76
|
-
records_or_mapping.values
|
77
|
-
end
|
78
|
-
|
79
|
-
@records = records.reduce({}) { |h,rec| h[rec.key]=rec; h }
|
80
|
-
|
81
|
-
|
82
|
-
## todo: rename tag to attrib or attrib_name - why ?? why not ???
|
83
|
-
@tag = tag # e.g. tag name use for @@brewery@@ @@team@@ etc.
|
84
|
-
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
|
-
begin
|
90
|
-
found = map_name_for!( @tag, line, @known_names )
|
91
|
-
end while found
|
92
|
-
end
|
93
|
-
|
94
|
-
def find_rec!( line )
|
95
|
-
find_rec_for!( @tag, line, @records )
|
96
|
-
end
|
97
|
-
|
98
|
-
def find_recs!( line ) # note: keys (plural!) - will return array
|
99
|
-
counter = 1
|
100
|
-
recs = []
|
101
|
-
|
102
|
-
rec = find_rec_for!( "#{@tag}#{counter}", line, @records )
|
103
|
-
while rec
|
104
|
-
recs << rec
|
105
|
-
counter += 1
|
106
|
-
rec = find_rec_for!( "#{@tag}#{counter}", line, @records )
|
107
|
-
end
|
108
|
-
recs
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
private
|
113
|
-
def build_name_table_for_mapping( mapping )
|
114
|
-
known_names = []
|
115
|
-
|
116
|
-
mapping.each do |name, rec|
|
117
|
-
m = MappingStruct.new
|
118
|
-
m.key = rec.key
|
119
|
-
m.name = name
|
120
|
-
m.length = name.length
|
121
|
-
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
|
-
|
123
|
-
known_names << m
|
124
|
-
end
|
125
|
-
|
126
|
-
## note: sort here by length (largest goes first - best match)
|
127
|
-
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
-
known_names
|
129
|
-
end
|
130
|
-
|
131
|
-
def build_name_table_for_records( records )
|
132
|
-
|
133
|
-
## build known tracks table w/ alt names e.g.
|
134
|
-
#
|
135
|
-
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
|
-
# [ 'augsburg', 'FC Augsburg'],
|
137
|
-
# [ 'augsburg', 'Augi2'],
|
138
|
-
# [ 'augsburg', 'Augi3' ],
|
139
|
-
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
|
-
|
141
|
-
known_names = []
|
142
|
-
|
143
|
-
records.each_with_index do |rec,index|
|
144
|
-
|
145
|
-
name_candidates = []
|
146
|
-
name_candidates << rec.name
|
147
|
-
|
148
|
-
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
|
-
|
150
|
-
|
151
|
-
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
-
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
|
-
|
154
|
-
names = []
|
155
|
-
name_candidates.each do |t|
|
156
|
-
names << t
|
157
|
-
if t =~ /\(.+\)/
|
158
|
-
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
|
-
# note: strip leading n trailing withspaces too!
|
160
|
-
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
extra_name.strip!
|
162
|
-
names << extra_name
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
names.each do |name|
|
167
|
-
m = MappingStruct.new
|
168
|
-
m.key = rec.key
|
169
|
-
m.name = name
|
170
|
-
m.length = name.length
|
171
|
-
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern = name_esc_regex( name )
|
173
|
-
|
174
|
-
known_names << m
|
175
|
-
end
|
176
|
-
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
|
-
|
179
|
-
## note: only include code field - if defined
|
180
|
-
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
|
-
m = MappingStruct.new
|
182
|
-
m.key = rec.key
|
183
|
-
m.name = rec.code
|
184
|
-
m.length = rec.code.length
|
185
|
-
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
|
-
|
187
|
-
known_names << m
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
## note: sort here by length (largest goes first - best match)
|
192
|
-
# exclude code and key (key should always go last)
|
193
|
-
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
-
known_names
|
195
|
-
end
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
def map_name_for!( tag, line, mappings )
|
200
|
-
mappings.each do |mapping|
|
201
|
-
key = mapping.key
|
202
|
-
pattern = mapping.pattern
|
203
|
-
## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
|
204
|
-
## (thus add it, allows match for Benfica Lis. for example - note . at the end)
|
205
|
-
|
206
|
-
## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
|
207
|
-
re = /\b#{pattern}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
|
208
|
-
if line =~ re
|
209
|
-
logger.debug " match for #{tag.downcase} >#{key}< >#{pattern}<"
|
210
|
-
# make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
|
211
|
-
line.sub!( re, "@@oo#{key}oo@@ " ) # NB: add one space char at end
|
212
|
-
return true # break out after first match (do NOT continue)
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
false
|
217
|
-
end
|
218
|
-
|
219
|
-
|
220
|
-
def find_rec_for!( tag, line, records )
|
221
|
-
re = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
|
222
|
-
|
223
|
-
if line =~ re
|
224
|
-
key = $1
|
225
|
-
logger.debug " #{tag.downcase}: >#{key}<"
|
226
|
-
|
227
|
-
line.sub!( re, "[#{tag.upcase}]" )
|
228
|
-
|
229
|
-
records[ key ] ## note: map key to record (using records hash table mapping)
|
230
|
-
else
|
231
|
-
nil
|
232
|
-
end
|
233
|
-
end # method find_key_for!
|
234
|
-
|
235
|
-
|
236
|
-
####
|
237
|
-
# name helper cut-n-paste copy from TextUtils
|
238
|
-
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def name_esc_regex( name_unescaped )
|
240
|
-
|
241
|
-
## escape regex special chars e.g.
|
242
|
-
# . to \. and
|
243
|
-
# ( to \(
|
244
|
-
# ) to \)
|
245
|
-
# ? to \? -- zero or one
|
246
|
-
# * to \* -- zero or more
|
247
|
-
# + to \+ -- one or more
|
248
|
-
# $ to \$ -- end of line
|
249
|
-
# ^ to \^ -- start of line etc.
|
250
|
-
|
251
|
-
### add { and } ???
|
252
|
-
### add [ and ] ???
|
253
|
-
### add \ too ???
|
254
|
-
### add | too ???
|
255
|
-
|
256
|
-
# e.g. Benfica Lis.
|
257
|
-
# e.g. Club Atlético Colón (Santa Fe)
|
258
|
-
# e.g. Bauer Anton (????)
|
259
|
-
|
260
|
-
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
-
## name = Regexp.escape( name_unescaped )
|
262
|
-
name = name_unescaped.gsub( '.', '\.' )
|
263
|
-
name = name.gsub( '(', '\(' )
|
264
|
-
name = name.gsub( ')', '\)' )
|
265
|
-
name = name.gsub( '?', '\?' )
|
266
|
-
name = name.gsub( '*', '\*' )
|
267
|
-
name = name.gsub( '+', '\+' )
|
268
|
-
name = name.gsub( '$', '\$' )
|
269
|
-
name = name.gsub( '^', '\^' )
|
270
|
-
|
271
|
-
## match accented char with or without accents
|
272
|
-
## add (ü|ue) etc.
|
273
|
-
## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
|
274
|
-
|
275
|
-
## todo: add some more
|
276
|
-
## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
|
277
|
-
##
|
278
|
-
## reuse for all readers!
|
279
|
-
|
280
|
-
alternatives = [
|
281
|
-
['-', '(-| )'], ## e.g. Blau-Weiß Linz
|
282
|
-
['æ', '(æ|ae)'], ## e.g.
|
283
|
-
['ä', '(ä|ae)'], ## e.g.
|
284
|
-
['Ö', '(Ö|Oe)'], ## e.g. Österreich
|
285
|
-
['ö', '(ö|oe)'], ## e.g. Mönchengladbach
|
286
|
-
['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
|
287
|
-
['ü', '(ü|ue)'], ## e.g.
|
288
|
-
|
289
|
-
['á', '(á|a)'], ## e.g. Bogotá, Sársfield
|
290
|
-
['ã', '(ã|a)'], ## e.g São Paulo
|
291
|
-
['ç', '(ç|c)'], ## e.g. Fenerbahçe
|
292
|
-
['é', '(é|e)'], ## e.g. Vélez
|
293
|
-
['ê', '(ê|e)'], ## e.g. Grêmio
|
294
|
-
['ï', '(ï|i)' ], ## e.g. El Djazaïr
|
295
|
-
['ñ', '(ñ|n)'], ## e.g. Porteño
|
296
|
-
['ň', '(ň|n)'], ## e.g. Plzeň
|
297
|
-
['ó', '(ó|o)'], ## e.g. Colón
|
298
|
-
['ō', '(ō|o)'], # # e.g. Tōkyō
|
299
|
-
['ș', '(ș|s)'], ## e.g. Bucarești
|
300
|
-
['ú', '(ú|u)'] ## e.g. Fútbol
|
301
|
-
]
|
302
|
-
|
303
|
-
### fix/todo: check for dot+space e.g. . and make dot optional
|
304
|
-
##
|
305
|
-
# e.g. make dot (.) optional plus allow alternative optional space e.g.
|
306
|
-
# -- for U.S.A. => allow USA or U S A
|
307
|
-
#
|
308
|
-
## e.g. U. de G. or U de G or U.de G. ??
|
309
|
-
## collect some more (real-world) examples first!!!!!
|
310
|
-
|
311
|
-
alternatives.each do |alt|
|
312
|
-
name = name.gsub( alt[0], alt[1] )
|
313
|
-
end
|
314
|
-
|
315
|
-
name
|
316
|
-
end
|
317
|
-
|
318
|
-
end # class MapperV2
|
319
|
-
end # module SportDb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
class TeamMapper
|
6
|
-
def initialize( records_or_mapping )
|
7
|
-
@mapper = MapperV2.new( records_or_mapping, 'team' )
|
8
|
-
end
|
9
|
-
|
10
|
-
def find_teams!( line ) # Note: returns an array - note: plural! (teamsssss)
|
11
|
-
@mapper.find_recs!( line )
|
12
|
-
end
|
13
|
-
|
14
|
-
def find_team!( line ) # Note: returns key (string or nil)
|
15
|
-
@mapper.find_rec!( line )
|
16
|
-
end
|
17
|
-
|
18
|
-
def map_teams!( line )
|
19
|
-
@mapper.map_names!( line )
|
20
|
-
end
|
21
|
-
end # class TeamMapper
|
22
|
-
|
23
|
-
end # module SportDb
|
@@ -1,270 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
|
4
|
-
|
5
|
-
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
6
|
-
|
7
|
-
def self.parse( lines, start: )
|
8
|
-
## todo/fix: add support for txt and lines
|
9
|
-
## check if lines_or_txt is an array or just a string
|
10
|
-
parser = new( lines, start )
|
11
|
-
parser.parse
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
include Logging ## e.g. logger#debug, logger#info, etc.
|
16
|
-
include ParserHelper ## e.g. read_lines, etc.
|
17
|
-
|
18
|
-
|
19
|
-
def initialize( lines, start )
|
20
|
-
# for convenience split string into lines
|
21
|
-
## note: removes/strips empty lines
|
22
|
-
## todo/check: change to text instead of array of lines - why? why not?
|
23
|
-
|
24
|
-
## note - wrap in enumerator/iterator a.k.a lines reader
|
25
|
-
@lines = LinesReader.new( lines.is_a?( String ) ?
|
26
|
-
read_lines( lines ) :
|
27
|
-
lines
|
28
|
-
)
|
29
|
-
|
30
|
-
@start = start
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
35
|
-
## make sure mon feb 12 18:10 will not match
|
36
|
-
## allow 1. FC Köln etc.
|
37
|
-
## Mainz 05:
|
38
|
-
## limit to 30 chars max
|
39
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
40
|
-
##
|
41
|
-
## Group A:
|
42
|
-
## Group B: - remove colon
|
43
|
-
## or lookup first
|
44
|
-
|
45
|
-
ATTRIB_REGEX = /^
|
46
|
-
[ ]*? # slurp leading spaces
|
47
|
-
(?<key>[^:|\]\[()\/; -]
|
48
|
-
[^:|\]\[()\/;]{0,30}
|
49
|
-
)
|
50
|
-
[ ]*? # slurp trailing spaces
|
51
|
-
:[ ]+
|
52
|
-
(?<value>.+)
|
53
|
-
[ ]*? # slurp trailing spaces
|
54
|
-
$
|
55
|
-
/ix
|
56
|
-
|
57
|
-
|
58
|
-
def parse
|
59
|
-
## try to find all teams in match schedule
|
60
|
-
@last_round = nil
|
61
|
-
@last_group = nil
|
62
|
-
|
63
|
-
## definitions/defs
|
64
|
-
@round_defs = Hash.new(0)
|
65
|
-
@group_defs = Hash.new(0)
|
66
|
-
|
67
|
-
## usage/refs
|
68
|
-
@rounds = {} ## track usage counter and match (two teams) counter
|
69
|
-
@groups = {} ## -"-
|
70
|
-
|
71
|
-
@teams = Hash.new(0) ## keep track of usage counter
|
72
|
-
|
73
|
-
## note: ground incl. optional city (timezone) etc. - why? why not?
|
74
|
-
@grounds = Hash.new(0)
|
75
|
-
|
76
|
-
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
77
|
-
|
78
|
-
|
79
|
-
## todo/fix - use @lines.rewind first here - why? why not?
|
80
|
-
@lines.each do |line|
|
81
|
-
if is_round_def?( line )
|
82
|
-
## todo/fix: add round definition (w begin n end date)
|
83
|
-
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
84
|
-
## -- how to deal with matches that get rescheduled/postponed?
|
85
|
-
logger.debug "skipping matched round def line: >#{line}<"
|
86
|
-
@round_defs[ line ] += 1
|
87
|
-
elsif is_round?( line )
|
88
|
-
logger.debug "skipping matched round line: >#{line}<"
|
89
|
-
|
90
|
-
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
91
|
-
round[:count] +=1
|
92
|
-
@last_round = round
|
93
|
-
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
94
|
-
### todo: add pipe (|) marker (required)
|
95
|
-
logger.debug "skipping matched group def line: >#{line}<"
|
96
|
-
@group_defs[ line ] += 1
|
97
|
-
elsif is_group?( line )
|
98
|
-
## -- lets you set group e.g. Group A etc.
|
99
|
-
logger.debug "skipping matched group line: >#{line}<"
|
100
|
-
|
101
|
-
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
102
|
-
group[:count] +=1
|
103
|
-
@last_group = group
|
104
|
-
## todo/fix: parse group line!!!
|
105
|
-
elsif m=ATTRIB_REGEX.match( line )
|
106
|
-
## note: check attrib regex AFTER group def e.g.:
|
107
|
-
## Group A:
|
108
|
-
## Group B: etc.
|
109
|
-
## todo/fix - change Group A: to Group A etc.
|
110
|
-
## Group B: to Group B
|
111
|
-
|
112
|
-
## check if line ends with dot
|
113
|
-
## if not slurp up lines to the next do!!!
|
114
|
-
logger.debug "skipping key/value line - >#{line}<"
|
115
|
-
while !line.end_with?( '.' ) || line.nil? do
|
116
|
-
line = @lines.next
|
117
|
-
logger.debug "skipping key/value line (cont.) - >#{line}<"
|
118
|
-
end
|
119
|
-
elsif is_goals?( line )
|
120
|
-
## note - goals must be AFTER attributes!!!
|
121
|
-
logger.debug "skipping matched goals line: >#{line}<"
|
122
|
-
elsif try_parse_game( line )
|
123
|
-
# do nothing here
|
124
|
-
else
|
125
|
-
logger.warn "skipping line (no match found): >#{line}<"
|
126
|
-
@warns << line
|
127
|
-
end
|
128
|
-
end # lines.each
|
129
|
-
|
130
|
-
## new - add grounds and cities
|
131
|
-
[@teams, @rounds, @groups, @round_defs, @group_defs,
|
132
|
-
@grounds, ## note: ground incl. optional city (timezone) etc.
|
133
|
-
@warns]
|
134
|
-
end
|
135
|
-
|
136
|
-
|
137
|
-
def try_parse_game( line )
|
138
|
-
# note: clone line; for possible test do NOT modify in place for now
|
139
|
-
# note: returns true if parsed, false if no match
|
140
|
-
parse_game( line.dup )
|
141
|
-
end
|
142
|
-
|
143
|
-
def parse_game( line )
|
144
|
-
logger.debug "parsing game (fixture) line: >#{line}<"
|
145
|
-
|
146
|
-
## remove all protected text runs e.g. []
|
147
|
-
## fix: add [ to end-of-line too
|
148
|
-
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
149
|
-
|
150
|
-
line = line.gsub( /\[
|
151
|
-
[^\]]+?
|
152
|
-
\]/x, '' ).strip
|
153
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
154
|
-
|
155
|
-
|
156
|
-
## split by geo (@) - remove for now
|
157
|
-
values = line.split( '@' )
|
158
|
-
|
159
|
-
### check for ground/stadium and cities
|
160
|
-
if values.size == 1
|
161
|
-
## no stadium
|
162
|
-
elsif values.size == 2 # bingo!!!
|
163
|
-
## process stadium, city (timezone) etc.
|
164
|
-
## for now keep it simple - pass along "unparsed" all-in-one
|
165
|
-
ground = values[1].gsub( /[ \t]+/, ' ').strip ## squish
|
166
|
-
@grounds[ ground ] += 1
|
167
|
-
else
|
168
|
-
puts "!! ERROR - too many @-markers found in line:"
|
169
|
-
puts line
|
170
|
-
exit 1
|
171
|
-
end
|
172
|
-
|
173
|
-
line = values[0]
|
174
|
-
|
175
|
-
|
176
|
-
## try find date
|
177
|
-
date = find_date!( line, start: @start )
|
178
|
-
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
179
|
-
line = line.sub( /\[
|
180
|
-
[^\]]+?
|
181
|
-
\]/x, '' ).strip
|
182
|
-
|
183
|
-
else
|
184
|
-
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
185
|
-
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
186
|
-
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
187
|
-
[012]?[0-9]
|
188
|
-
[.:hH]
|
189
|
-
[0-9][0-9]
|
190
|
-
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
191
|
-
}x, '' ).strip
|
192
|
-
end
|
193
|
-
|
194
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
195
|
-
|
196
|
-
|
197
|
-
score = find_score!( line )
|
198
|
-
|
199
|
-
logger.debug " line: >#{line}<"
|
200
|
-
|
201
|
-
line = line.sub( /\[
|
202
|
-
[^\]]+?
|
203
|
-
\]/x, '$$' ) # note: replace first score tag with $$
|
204
|
-
line = line.gsub( /\[
|
205
|
-
[^\]]+?
|
206
|
-
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
207
|
-
|
208
|
-
## clean-up remove all text run inside () or empty () too
|
209
|
-
line = line.gsub( /\(
|
210
|
-
[^)]*?
|
211
|
-
\)/x, '' )
|
212
|
-
|
213
|
-
|
214
|
-
## check for more match separators e.g. - or vs for now
|
215
|
-
line = line.sub( / \s+
|
216
|
-
( -
|
217
|
-
| v
|
218
|
-
| vs\.? # note: allow optional dot eg. vs.
|
219
|
-
)
|
220
|
-
\s+
|
221
|
-
/ix, '$$' )
|
222
|
-
|
223
|
-
values = line.split( '$$' )
|
224
|
-
values = values.map { |value| value.strip } ## strip spaces
|
225
|
-
values = values.select { |value| !value.empty? } ## remove empty strings
|
226
|
-
|
227
|
-
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
228
|
-
|
229
|
-
if values.size == 1
|
230
|
-
puts "(auto config) try matching teams separated by spaces (2+):"
|
231
|
-
pp values
|
232
|
-
|
233
|
-
values = values[0].split( /[ ]{2,}/ )
|
234
|
-
pp values
|
235
|
-
end
|
236
|
-
|
237
|
-
return false if values.size != 2
|
238
|
-
|
239
|
-
puts "(auto config) try matching teams:"
|
240
|
-
pp values
|
241
|
-
|
242
|
-
@teams[ values[0] ] += 1 ## update usage counters
|
243
|
-
@teams[ values[1] ] += 1
|
244
|
-
|
245
|
-
@last_round[ :match_count ] += 1 if @last_round
|
246
|
-
@last_group[ :match_count ] += 1 if @last_group
|
247
|
-
|
248
|
-
true
|
249
|
-
end
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
def find_score!( line )
|
254
|
-
# note: always call after find_dates !!!
|
255
|
-
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
256
|
-
# -- note: score might have two digits too
|
257
|
-
ScoreFormats.find!( line )
|
258
|
-
end
|
259
|
-
|
260
|
-
def find_date!( line, start: )
|
261
|
-
## NB: lets us pass in start_at/end_at date (for event)
|
262
|
-
# for auto-complete year
|
263
|
-
|
264
|
-
# extract date from line
|
265
|
-
# and return it
|
266
|
-
# NB: side effect - removes date from line string
|
267
|
-
DateFormats.find!( line, start: start )
|
268
|
-
end
|
269
|
-
end # class AutoConfParser
|
270
|
-
end # module SportDb
|
@@ -1,90 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
|
4
|
-
class OutlineReader
|
5
|
-
|
6
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
7
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
8
|
-
parse( txt )
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.parse( txt )
|
12
|
-
new( txt ).parse
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize( txt )
|
16
|
-
@txt = txt
|
17
|
-
end
|
18
|
-
|
19
|
-
## note: skip "decorative" only heading e.g. ========
|
20
|
-
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
21
|
-
HEADING_BLANK_RE = %r{\A
|
22
|
-
={1,}
|
23
|
-
\z}x
|
24
|
-
|
25
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
26
|
-
HEADING_RE = %r{\A
|
27
|
-
(?<marker>={1,}) ## 1. leading ======
|
28
|
-
[ ]*
|
29
|
-
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
30
|
-
[ ]*
|
31
|
-
=* ## 3. (optional) trailing ====
|
32
|
-
\z}x
|
33
|
-
|
34
|
-
def parse
|
35
|
-
outline=[] ## outline structure
|
36
|
-
start_para = true ## start new para(graph) on new text line?
|
37
|
-
|
38
|
-
@txt.each_line do |line|
|
39
|
-
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
40
|
-
|
41
|
-
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
42
|
-
start_para = true
|
43
|
-
next
|
44
|
-
end
|
45
|
-
|
46
|
-
break if line == '__END__'
|
47
|
-
|
48
|
-
next if line.start_with?( '#' ) ## skip comments too
|
49
|
-
## strip inline (until end-of-line) comments too
|
50
|
-
## e.g Eupen | KAS Eupen ## [de]
|
51
|
-
## => Eupen | KAS Eupen
|
52
|
-
## e.g bq Bonaire, BOE # CONCACAF
|
53
|
-
## => bq Bonaire, BOE
|
54
|
-
line = line.sub( /#.*/, '' ).strip
|
55
|
-
pp line
|
56
|
-
|
57
|
-
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
58
|
-
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
59
|
-
|
60
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
61
|
-
if m=HEADING_RE.match( line )
|
62
|
-
start_para = true
|
63
|
-
|
64
|
-
heading_marker = m[:marker]
|
65
|
-
heading_level = m[:marker].length ## count number of = for heading level
|
66
|
-
heading = m[:text].strip
|
67
|
-
|
68
|
-
puts "heading #{heading_level} >#{heading}<"
|
69
|
-
outline << [:"h#{heading_level}", heading]
|
70
|
-
else ## assume it's a (plain/regular) text line
|
71
|
-
if start_para
|
72
|
-
outline << [:p, [line]]
|
73
|
-
start_para = false
|
74
|
-
else
|
75
|
-
node = outline[-1] ## get last entry
|
76
|
-
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
77
|
-
node[1] << line ## add line to p(aragraph)
|
78
|
-
else
|
79
|
-
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
80
|
-
pp node
|
81
|
-
exit 1
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
outline
|
87
|
-
end # method read
|
88
|
-
end # class OutlineReader
|
89
|
-
|
90
|
-
end # module SportDb
|