sportdb-formats 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +6 -7
- data/Rakefile +7 -3
- data/lib/sportdb/formats/csv/goal.rb +192 -0
- data/lib/sportdb/formats/csv/goal_parser_csv.rb +28 -0
- data/lib/sportdb/formats/csv/match_parser_csv.rb +490 -0
- data/lib/sportdb/formats/csv/match_status_parser.rb +90 -0
- data/lib/sportdb/formats/match/conf_parser.rb +14 -2
- data/lib/sportdb/formats/match/match_parser.rb +502 -466
- data/lib/sportdb/formats/name_helper.rb +87 -0
- data/lib/sportdb/formats/search/sport.rb +69 -54
- data/lib/sportdb/formats/search/structs.rb +116 -0
- data/lib/sportdb/formats/search/world.rb +40 -22
- data/lib/sportdb/formats/version.rb +2 -2
- data/lib/sportdb/formats.rb +82 -15
- metadata +69 -14
- data/lib/sportdb/formats/goals.rb +0 -313
- data/lib/sportdb/formats/lines_reader.rb +0 -47
- data/lib/sportdb/formats/match/mapper.rb +0 -319
- data/lib/sportdb/formats/match/mapper_teams.rb +0 -23
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +0 -270
- data/lib/sportdb/formats/outline_reader.rb +0 -90
- data/lib/sportdb/formats/parser_helper.rb +0 -90
@@ -1,319 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
##
|
6
|
-
## note: this was/is a cut-n-page (inline) copy of TextUtils::TitleMapper2
|
7
|
-
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/title_mapper2.rb
|
8
|
-
|
9
|
-
|
10
|
-
class MapperV2 ## todo/check: rename to NameMapper ? why? why not??
|
11
|
-
|
12
|
-
include Logging
|
13
|
-
|
14
|
-
attr_reader :known_names ## rename to mapping or mappings or just names - why? why not?
|
15
|
-
|
16
|
-
########
|
17
|
-
## key: e.g. augsburg
|
18
|
-
## name: e.g. FC Augsburg
|
19
|
-
## length (of name(!!) - not regex pattern): e.g. 11 -- do not count dots (e.g. U.S.A. => 3 or 6) why? why not?
|
20
|
-
MappingStruct = Struct.new( :key, :name, :length, :pattern) ## todo/check: use (rename to) NameStruct - why? why not??
|
21
|
-
|
22
|
-
######
|
23
|
-
## convenience helper - (auto)build ActiveRecord-like team records/structs
|
24
|
-
Record = Struct.new( :key, :name, :alt_names )
|
25
|
-
def build_records( txt_or_lines )
|
26
|
-
recs = []
|
27
|
-
|
28
|
-
if txt_or_lines.is_a?( String )
|
29
|
-
## todo/fix: use ParserHelper read_lines !!! ????
|
30
|
-
txt = txt_or_lines
|
31
|
-
lines = []
|
32
|
-
|
33
|
-
txt.each_line do |line|
|
34
|
-
line = line.strip
|
35
|
-
|
36
|
-
next if line.empty? || line.start_with?( '#' ) ## note: skip empty and comment lines
|
37
|
-
lines << line
|
38
|
-
end
|
39
|
-
else
|
40
|
-
lines = txt_or_lines
|
41
|
-
end
|
42
|
-
|
43
|
-
lines.each do |line|
|
44
|
-
values = line.split( '|' )
|
45
|
-
values = values.map { |value| value.strip }
|
46
|
-
|
47
|
-
name = values[0]
|
48
|
-
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
49
|
-
key = name.downcase.gsub( /[^a-z]/, '' )
|
50
|
-
alt_names = values.size > 1 ? values[1..-1].join( '|' ) : nil
|
51
|
-
|
52
|
-
recs << Record.new( key, name, alt_names )
|
53
|
-
end
|
54
|
-
recs
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def initialize( records_or_mapping, tag )
|
59
|
-
## for convenience allow easy (auto-)convert text (lines) to records
|
60
|
-
## as 1) text block/string or
|
61
|
-
## 2) array of lines/strings
|
62
|
-
records_or_mapping = build_records( records_or_mapping ) if records_or_mapping.is_a?( String ) ||
|
63
|
-
(records_or_mapping.is_a?( Array ) && records_or_mapping[0].is_a?( String ))
|
64
|
-
|
65
|
-
## build mapping lookup table
|
66
|
-
@known_names = if records_or_mapping.is_a?( Hash ) ## assume "custom" mapping hash table (name=>record)
|
67
|
-
build_name_table_for_mapping( records_or_mapping )
|
68
|
-
else ## assume array of records
|
69
|
-
build_name_table_for_records( records_or_mapping )
|
70
|
-
end
|
71
|
-
|
72
|
-
## build lookup hash by record (e.g. team/club/etc.) key
|
73
|
-
records = if records_or_mapping.is_a?( Array )
|
74
|
-
records_or_mapping
|
75
|
-
else ## assume hash (uses values assuming to be all records - note might include duplicates)
|
76
|
-
records_or_mapping.values
|
77
|
-
end
|
78
|
-
|
79
|
-
@records = records.reduce({}) { |h,rec| h[rec.key]=rec; h }
|
80
|
-
|
81
|
-
|
82
|
-
## todo: rename tag to attrib or attrib_name - why ?? why not ???
|
83
|
-
@tag = tag # e.g. tag name use for @@brewery@@ @@team@@ etc.
|
84
|
-
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
def map_names!( line ) ## rename to just map! - why?? why not???
|
89
|
-
begin
|
90
|
-
found = map_name_for!( @tag, line, @known_names )
|
91
|
-
end while found
|
92
|
-
end
|
93
|
-
|
94
|
-
def find_rec!( line )
|
95
|
-
find_rec_for!( @tag, line, @records )
|
96
|
-
end
|
97
|
-
|
98
|
-
def find_recs!( line ) # note: keys (plural!) - will return array
|
99
|
-
counter = 1
|
100
|
-
recs = []
|
101
|
-
|
102
|
-
rec = find_rec_for!( "#{@tag}#{counter}", line, @records )
|
103
|
-
while rec
|
104
|
-
recs << rec
|
105
|
-
counter += 1
|
106
|
-
rec = find_rec_for!( "#{@tag}#{counter}", line, @records )
|
107
|
-
end
|
108
|
-
recs
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
private
|
113
|
-
def build_name_table_for_mapping( mapping )
|
114
|
-
known_names = []
|
115
|
-
|
116
|
-
mapping.each do |name, rec|
|
117
|
-
m = MappingStruct.new
|
118
|
-
m.key = rec.key
|
119
|
-
m.name = name
|
120
|
-
m.length = name.length
|
121
|
-
m.pattern = Regexp.escape( name ) ## note: just use "standard" regex escape (e.g. no extras for umlauts,accents,etc.)
|
122
|
-
|
123
|
-
known_names << m
|
124
|
-
end
|
125
|
-
|
126
|
-
## note: sort here by length (largest goes first - best match)
|
127
|
-
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
128
|
-
known_names
|
129
|
-
end
|
130
|
-
|
131
|
-
def build_name_table_for_records( records )
|
132
|
-
|
133
|
-
## build known tracks table w/ alt names e.g.
|
134
|
-
#
|
135
|
-
# [[ 'wolfsbrug', 'VfL Wolfsburg'],
|
136
|
-
# [ 'augsburg', 'FC Augsburg'],
|
137
|
-
# [ 'augsburg', 'Augi2'],
|
138
|
-
# [ 'augsburg', 'Augi3' ],
|
139
|
-
# [ 'stuttgart', 'VfB Stuttgart']]
|
140
|
-
|
141
|
-
known_names = []
|
142
|
-
|
143
|
-
records.each_with_index do |rec,index|
|
144
|
-
|
145
|
-
name_candidates = []
|
146
|
-
name_candidates << rec.name
|
147
|
-
|
148
|
-
name_candidates += rec.alt_names.split('|') if rec.alt_names && !rec.alt_names.empty?
|
149
|
-
|
150
|
-
|
151
|
-
## check if name includes subname e.g. Grand Prix Japan (Suzuka Circuit)
|
152
|
-
# make subname optional by adding name w/o subname e.g. Grand Prix Japan
|
153
|
-
|
154
|
-
names = []
|
155
|
-
name_candidates.each do |t|
|
156
|
-
names << t
|
157
|
-
if t =~ /\(.+\)/
|
158
|
-
extra_name = t.gsub( /\(.+\)/, '' ) # remove/delete subnames
|
159
|
-
# note: strip leading n trailing withspaces too!
|
160
|
-
# -- todo: add squish or something if () is inline e.g. leaves two spaces?
|
161
|
-
extra_name.strip!
|
162
|
-
names << extra_name
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
names.each do |name|
|
167
|
-
m = MappingStruct.new
|
168
|
-
m.key = rec.key
|
169
|
-
m.name = name
|
170
|
-
m.length = name.length
|
171
|
-
## note: escape for regex plus allow subs for special chars/accents
|
172
|
-
m.pattern = name_esc_regex( name )
|
173
|
-
|
174
|
-
known_names << m
|
175
|
-
end
|
176
|
-
|
177
|
-
logger.debug " #{rec.class.name}[#{index+1}] #{rec.key} >#{names.join('|')}<"
|
178
|
-
|
179
|
-
## note: only include code field - if defined
|
180
|
-
if rec.respond_to?(:code) && rec.code && !rec.code.empty?
|
181
|
-
m = MappingStruct.new
|
182
|
-
m.key = rec.key
|
183
|
-
m.name = rec.code
|
184
|
-
m.length = rec.code.length
|
185
|
-
m.pattern = rec.code ## note: use code for now as is (no variants allowed fow now)
|
186
|
-
|
187
|
-
known_names << m
|
188
|
-
end
|
189
|
-
end
|
190
|
-
|
191
|
-
## note: sort here by length (largest goes first - best match)
|
192
|
-
# exclude code and key (key should always go last)
|
193
|
-
known_names = known_names.sort { |l,r| r.length <=> l.length }
|
194
|
-
known_names
|
195
|
-
end
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
def map_name_for!( tag, line, mappings )
|
200
|
-
mappings.each do |mapping|
|
201
|
-
key = mapping.key
|
202
|
-
pattern = mapping.pattern
|
203
|
-
## nb: \b does NOT include space or newline for word boundry (only alphanums e.g. a-z0-9)
|
204
|
-
## (thus add it, allows match for Benfica Lis. for example - note . at the end)
|
205
|
-
|
206
|
-
## check add $ e.g. (\b| |\t|$) does this work? - check w/ Benfica Lis.$
|
207
|
-
re = /\b#{pattern}(\b| |\t|$)/ # wrap with world boundry (e.g. match only whole words e.g. not wac in wacker)
|
208
|
-
if line =~ re
|
209
|
-
logger.debug " match for #{tag.downcase} >#{key}< >#{pattern}<"
|
210
|
-
# make sure @@oo{key}oo@@ doesn't match itself with other key e.g. wacker, wac, etc.
|
211
|
-
line.sub!( re, "@@oo#{key}oo@@ " ) # NB: add one space char at end
|
212
|
-
return true # break out after first match (do NOT continue)
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
false
|
217
|
-
end
|
218
|
-
|
219
|
-
|
220
|
-
def find_rec_for!( tag, line, records )
|
221
|
-
re = /@@oo([^@]+?)oo@@/ # e.g. everything in @@ .... @@ (use non-greedy +? plus all chars but not @, that is [^@])
|
222
|
-
|
223
|
-
if line =~ re
|
224
|
-
key = $1
|
225
|
-
logger.debug " #{tag.downcase}: >#{key}<"
|
226
|
-
|
227
|
-
line.sub!( re, "[#{tag.upcase}]" )
|
228
|
-
|
229
|
-
records[ key ] ## note: map key to record (using records hash table mapping)
|
230
|
-
else
|
231
|
-
nil
|
232
|
-
end
|
233
|
-
end # method find_key_for!
|
234
|
-
|
235
|
-
|
236
|
-
####
|
237
|
-
# name helper cut-n-paste copy from TextUtils
|
238
|
-
## see https://github.com/textkit/textutils/blob/master/textutils/lib/textutils/helper/title_helper.rb
|
239
|
-
def name_esc_regex( name_unescaped )
|
240
|
-
|
241
|
-
## escape regex special chars e.g.
|
242
|
-
# . to \. and
|
243
|
-
# ( to \(
|
244
|
-
# ) to \)
|
245
|
-
# ? to \? -- zero or one
|
246
|
-
# * to \* -- zero or more
|
247
|
-
# + to \+ -- one or more
|
248
|
-
# $ to \$ -- end of line
|
249
|
-
# ^ to \^ -- start of line etc.
|
250
|
-
|
251
|
-
### add { and } ???
|
252
|
-
### add [ and ] ???
|
253
|
-
### add \ too ???
|
254
|
-
### add | too ???
|
255
|
-
|
256
|
-
# e.g. Benfica Lis.
|
257
|
-
# e.g. Club Atlético Colón (Santa Fe)
|
258
|
-
# e.g. Bauer Anton (????)
|
259
|
-
|
260
|
-
## note: cannot use Regexp.escape! will escape space '' to '\ '
|
261
|
-
## name = Regexp.escape( name_unescaped )
|
262
|
-
name = name_unescaped.gsub( '.', '\.' )
|
263
|
-
name = name.gsub( '(', '\(' )
|
264
|
-
name = name.gsub( ')', '\)' )
|
265
|
-
name = name.gsub( '?', '\?' )
|
266
|
-
name = name.gsub( '*', '\*' )
|
267
|
-
name = name.gsub( '+', '\+' )
|
268
|
-
name = name.gsub( '$', '\$' )
|
269
|
-
name = name.gsub( '^', '\^' )
|
270
|
-
|
271
|
-
## match accented char with or without accents
|
272
|
-
## add (ü|ue) etc.
|
273
|
-
## also make - optional change to (-| ) e.g. Blau-Weiss == Blau Weiss
|
274
|
-
|
275
|
-
## todo: add some more
|
276
|
-
## see http://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references for more
|
277
|
-
##
|
278
|
-
## reuse for all readers!
|
279
|
-
|
280
|
-
alternatives = [
|
281
|
-
['-', '(-| )'], ## e.g. Blau-Weiß Linz
|
282
|
-
['æ', '(æ|ae)'], ## e.g.
|
283
|
-
['ä', '(ä|ae)'], ## e.g.
|
284
|
-
['Ö', '(Ö|Oe)'], ## e.g. Österreich
|
285
|
-
['ö', '(ö|oe)'], ## e.g. Mönchengladbach
|
286
|
-
['ß', '(ß|ss)'], ## e.g. Blau-Weiß Linz
|
287
|
-
['ü', '(ü|ue)'], ## e.g.
|
288
|
-
|
289
|
-
['á', '(á|a)'], ## e.g. Bogotá, Sársfield
|
290
|
-
['ã', '(ã|a)'], ## e.g São Paulo
|
291
|
-
['ç', '(ç|c)'], ## e.g. Fenerbahçe
|
292
|
-
['é', '(é|e)'], ## e.g. Vélez
|
293
|
-
['ê', '(ê|e)'], ## e.g. Grêmio
|
294
|
-
['ï', '(ï|i)' ], ## e.g. El Djazaïr
|
295
|
-
['ñ', '(ñ|n)'], ## e.g. Porteño
|
296
|
-
['ň', '(ň|n)'], ## e.g. Plzeň
|
297
|
-
['ó', '(ó|o)'], ## e.g. Colón
|
298
|
-
['ō', '(ō|o)'], # # e.g. Tōkyō
|
299
|
-
['ș', '(ș|s)'], ## e.g. Bucarești
|
300
|
-
['ú', '(ú|u)'] ## e.g. Fútbol
|
301
|
-
]
|
302
|
-
|
303
|
-
### fix/todo: check for dot+space e.g. . and make dot optional
|
304
|
-
##
|
305
|
-
# e.g. make dot (.) optional plus allow alternative optional space e.g.
|
306
|
-
# -- for U.S.A. => allow USA or U S A
|
307
|
-
#
|
308
|
-
## e.g. U. de G. or U de G or U.de G. ??
|
309
|
-
## collect some more (real-world) examples first!!!!!
|
310
|
-
|
311
|
-
alternatives.each do |alt|
|
312
|
-
name = name.gsub( alt[0], alt[1] )
|
313
|
-
end
|
314
|
-
|
315
|
-
name
|
316
|
-
end
|
317
|
-
|
318
|
-
end # class MapperV2
|
319
|
-
end # module SportDb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
class TeamMapper
|
6
|
-
def initialize( records_or_mapping )
|
7
|
-
@mapper = MapperV2.new( records_or_mapping, 'team' )
|
8
|
-
end
|
9
|
-
|
10
|
-
def find_teams!( line ) # Note: returns an array - note: plural! (teamsssss)
|
11
|
-
@mapper.find_recs!( line )
|
12
|
-
end
|
13
|
-
|
14
|
-
def find_team!( line ) # Note: returns key (string or nil)
|
15
|
-
@mapper.find_rec!( line )
|
16
|
-
end
|
17
|
-
|
18
|
-
def map_teams!( line )
|
19
|
-
@mapper.map_names!( line )
|
20
|
-
end
|
21
|
-
end # class TeamMapper
|
22
|
-
|
23
|
-
end # module SportDb
|
@@ -1,270 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
|
4
|
-
|
5
|
-
class AutoConfParser ## todo/check: rename/change to MatchAutoConfParser - why? why not?
|
6
|
-
|
7
|
-
def self.parse( lines, start: )
|
8
|
-
## todo/fix: add support for txt and lines
|
9
|
-
## check if lines_or_txt is an array or just a string
|
10
|
-
parser = new( lines, start )
|
11
|
-
parser.parse
|
12
|
-
end
|
13
|
-
|
14
|
-
|
15
|
-
include Logging ## e.g. logger#debug, logger#info, etc.
|
16
|
-
include ParserHelper ## e.g. read_lines, etc.
|
17
|
-
|
18
|
-
|
19
|
-
def initialize( lines, start )
|
20
|
-
# for convenience split string into lines
|
21
|
-
## note: removes/strips empty lines
|
22
|
-
## todo/check: change to text instead of array of lines - why? why not?
|
23
|
-
|
24
|
-
## note - wrap in enumerator/iterator a.k.a lines reader
|
25
|
-
@lines = LinesReader.new( lines.is_a?( String ) ?
|
26
|
-
read_lines( lines ) :
|
27
|
-
lines
|
28
|
-
)
|
29
|
-
|
30
|
-
@start = start
|
31
|
-
end
|
32
|
-
|
33
|
-
|
34
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
35
|
-
## make sure mon feb 12 18:10 will not match
|
36
|
-
## allow 1. FC Köln etc.
|
37
|
-
## Mainz 05:
|
38
|
-
## limit to 30 chars max
|
39
|
-
## only allow chars incl. intl buut (NOT ()[]/;)
|
40
|
-
##
|
41
|
-
## Group A:
|
42
|
-
## Group B: - remove colon
|
43
|
-
## or lookup first
|
44
|
-
|
45
|
-
ATTRIB_REGEX = /^
|
46
|
-
[ ]*? # slurp leading spaces
|
47
|
-
(?<key>[^:|\]\[()\/; -]
|
48
|
-
[^:|\]\[()\/;]{0,30}
|
49
|
-
)
|
50
|
-
[ ]*? # slurp trailing spaces
|
51
|
-
:[ ]+
|
52
|
-
(?<value>.+)
|
53
|
-
[ ]*? # slurp trailing spaces
|
54
|
-
$
|
55
|
-
/ix
|
56
|
-
|
57
|
-
|
58
|
-
def parse
|
59
|
-
## try to find all teams in match schedule
|
60
|
-
@last_round = nil
|
61
|
-
@last_group = nil
|
62
|
-
|
63
|
-
## definitions/defs
|
64
|
-
@round_defs = Hash.new(0)
|
65
|
-
@group_defs = Hash.new(0)
|
66
|
-
|
67
|
-
## usage/refs
|
68
|
-
@rounds = {} ## track usage counter and match (two teams) counter
|
69
|
-
@groups = {} ## -"-
|
70
|
-
|
71
|
-
@teams = Hash.new(0) ## keep track of usage counter
|
72
|
-
|
73
|
-
## note: ground incl. optional city (timezone) etc. - why? why not?
|
74
|
-
@grounds = Hash.new(0)
|
75
|
-
|
76
|
-
@warns = [] ## track list of warnings (unmatched lines) too - why? why not?
|
77
|
-
|
78
|
-
|
79
|
-
## todo/fix - use @lines.rewind first here - why? why not?
|
80
|
-
@lines.each do |line|
|
81
|
-
if is_round_def?( line )
|
82
|
-
## todo/fix: add round definition (w begin n end date)
|
83
|
-
## todo: do not patch rounds with definition (already assume begin/end date is good)
|
84
|
-
## -- how to deal with matches that get rescheduled/postponed?
|
85
|
-
logger.debug "skipping matched round def line: >#{line}<"
|
86
|
-
@round_defs[ line ] += 1
|
87
|
-
elsif is_round?( line )
|
88
|
-
logger.debug "skipping matched round line: >#{line}<"
|
89
|
-
|
90
|
-
round = @rounds[ line ] ||= {count: 0, match_count: 0} ## usage counter, match counter
|
91
|
-
round[:count] +=1
|
92
|
-
@last_round = round
|
93
|
-
elsif is_group_def?( line ) ## NB: group goes after round (round may contain group marker too)
|
94
|
-
### todo: add pipe (|) marker (required)
|
95
|
-
logger.debug "skipping matched group def line: >#{line}<"
|
96
|
-
@group_defs[ line ] += 1
|
97
|
-
elsif is_group?( line )
|
98
|
-
## -- lets you set group e.g. Group A etc.
|
99
|
-
logger.debug "skipping matched group line: >#{line}<"
|
100
|
-
|
101
|
-
group = @groups[ line ] ||= {count: 0, match_count: 0}
|
102
|
-
group[:count] +=1
|
103
|
-
@last_group = group
|
104
|
-
## todo/fix: parse group line!!!
|
105
|
-
elsif m=ATTRIB_REGEX.match( line )
|
106
|
-
## note: check attrib regex AFTER group def e.g.:
|
107
|
-
## Group A:
|
108
|
-
## Group B: etc.
|
109
|
-
## todo/fix - change Group A: to Group A etc.
|
110
|
-
## Group B: to Group B
|
111
|
-
|
112
|
-
## check if line ends with dot
|
113
|
-
## if not slurp up lines to the next do!!!
|
114
|
-
logger.debug "skipping key/value line - >#{line}<"
|
115
|
-
while !line.end_with?( '.' ) || line.nil? do
|
116
|
-
line = @lines.next
|
117
|
-
logger.debug "skipping key/value line (cont.) - >#{line}<"
|
118
|
-
end
|
119
|
-
elsif is_goals?( line )
|
120
|
-
## note - goals must be AFTER attributes!!!
|
121
|
-
logger.debug "skipping matched goals line: >#{line}<"
|
122
|
-
elsif try_parse_game( line )
|
123
|
-
# do nothing here
|
124
|
-
else
|
125
|
-
logger.warn "skipping line (no match found): >#{line}<"
|
126
|
-
@warns << line
|
127
|
-
end
|
128
|
-
end # lines.each
|
129
|
-
|
130
|
-
## new - add grounds and cities
|
131
|
-
[@teams, @rounds, @groups, @round_defs, @group_defs,
|
132
|
-
@grounds, ## note: ground incl. optional city (timezone) etc.
|
133
|
-
@warns]
|
134
|
-
end
|
135
|
-
|
136
|
-
|
137
|
-
def try_parse_game( line )
|
138
|
-
# note: clone line; for possible test do NOT modify in place for now
|
139
|
-
# note: returns true if parsed, false if no match
|
140
|
-
parse_game( line.dup )
|
141
|
-
end
|
142
|
-
|
143
|
-
def parse_game( line )
|
144
|
-
logger.debug "parsing game (fixture) line: >#{line}<"
|
145
|
-
|
146
|
-
## remove all protected text runs e.g. []
|
147
|
-
## fix: add [ to end-of-line too
|
148
|
-
## todo/fix: move remove protected text runs AFTER find date!! - why? why not?
|
149
|
-
|
150
|
-
line = line.gsub( /\[
|
151
|
-
[^\]]+?
|
152
|
-
\]/x, '' ).strip
|
153
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
154
|
-
|
155
|
-
|
156
|
-
## split by geo (@) - remove for now
|
157
|
-
values = line.split( '@' )
|
158
|
-
|
159
|
-
### check for ground/stadium and cities
|
160
|
-
if values.size == 1
|
161
|
-
## no stadium
|
162
|
-
elsif values.size == 2 # bingo!!!
|
163
|
-
## process stadium, city (timezone) etc.
|
164
|
-
## for now keep it simple - pass along "unparsed" all-in-one
|
165
|
-
ground = values[1].gsub( /[ \t]+/, ' ').strip ## squish
|
166
|
-
@grounds[ ground ] += 1
|
167
|
-
else
|
168
|
-
puts "!! ERROR - too many @-markers found in line:"
|
169
|
-
puts line
|
170
|
-
exit 1
|
171
|
-
end
|
172
|
-
|
173
|
-
line = values[0]
|
174
|
-
|
175
|
-
|
176
|
-
## try find date
|
177
|
-
date = find_date!( line, start: @start )
|
178
|
-
if date ## if found remove tagged run too; note using singular sub (NOT global gsub)
|
179
|
-
line = line.sub( /\[
|
180
|
-
[^\]]+?
|
181
|
-
\]/x, '' ).strip
|
182
|
-
|
183
|
-
else
|
184
|
-
## check for leading hours only e.g. 20.30 or 20:30 or 20h30 or 20H30 or 09h00
|
185
|
-
## todo/fix: make language dependent (or move to find_date/hour etc.) - why? why not?
|
186
|
-
line = line.sub( %r{^ ## MUST be anchored to beginning of line
|
187
|
-
[012]?[0-9]
|
188
|
-
[.:hH]
|
189
|
-
[0-9][0-9]
|
190
|
-
(?=[ ]) ## must be followed by space for now (add end of line too - why? why not?)
|
191
|
-
}x, '' ).strip
|
192
|
-
end
|
193
|
-
|
194
|
-
return true if line.empty? ## note: return true (for valid line with no match/teams)
|
195
|
-
|
196
|
-
|
197
|
-
score = find_score!( line )
|
198
|
-
|
199
|
-
logger.debug " line: >#{line}<"
|
200
|
-
|
201
|
-
line = line.sub( /\[
|
202
|
-
[^\]]+?
|
203
|
-
\]/x, '$$' ) # note: replace first score tag with $$
|
204
|
-
line = line.gsub( /\[
|
205
|
-
[^\]]+?
|
206
|
-
\]/x, '' ) # note: replace/remove all other score tags with nothing
|
207
|
-
|
208
|
-
## clean-up remove all text run inside () or empty () too
|
209
|
-
line = line.gsub( /\(
|
210
|
-
[^)]*?
|
211
|
-
\)/x, '' )
|
212
|
-
|
213
|
-
|
214
|
-
## check for more match separators e.g. - or vs for now
|
215
|
-
line = line.sub( / \s+
|
216
|
-
( -
|
217
|
-
| v
|
218
|
-
| vs\.? # note: allow optional dot eg. vs.
|
219
|
-
)
|
220
|
-
\s+
|
221
|
-
/ix, '$$' )
|
222
|
-
|
223
|
-
values = line.split( '$$' )
|
224
|
-
values = values.map { |value| value.strip } ## strip spaces
|
225
|
-
values = values.select { |value| !value.empty? } ## remove empty strings
|
226
|
-
|
227
|
-
return true if values.size == 0 ## note: return true (for valid line with no match/teams)
|
228
|
-
|
229
|
-
if values.size == 1
|
230
|
-
puts "(auto config) try matching teams separated by spaces (2+):"
|
231
|
-
pp values
|
232
|
-
|
233
|
-
values = values[0].split( /[ ]{2,}/ )
|
234
|
-
pp values
|
235
|
-
end
|
236
|
-
|
237
|
-
return false if values.size != 2
|
238
|
-
|
239
|
-
puts "(auto config) try matching teams:"
|
240
|
-
pp values
|
241
|
-
|
242
|
-
@teams[ values[0] ] += 1 ## update usage counters
|
243
|
-
@teams[ values[1] ] += 1
|
244
|
-
|
245
|
-
@last_round[ :match_count ] += 1 if @last_round
|
246
|
-
@last_group[ :match_count ] += 1 if @last_group
|
247
|
-
|
248
|
-
true
|
249
|
-
end
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
def find_score!( line )
|
254
|
-
# note: always call after find_dates !!!
|
255
|
-
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
256
|
-
# -- note: score might have two digits too
|
257
|
-
ScoreFormats.find!( line )
|
258
|
-
end
|
259
|
-
|
260
|
-
def find_date!( line, start: )
|
261
|
-
## NB: lets us pass in start_at/end_at date (for event)
|
262
|
-
# for auto-complete year
|
263
|
-
|
264
|
-
# extract date from line
|
265
|
-
# and return it
|
266
|
-
# NB: side effect - removes date from line string
|
267
|
-
DateFormats.find!( line, start: start )
|
268
|
-
end
|
269
|
-
end # class AutoConfParser
|
270
|
-
end # module SportDb
|
@@ -1,90 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
|
4
|
-
class OutlineReader
|
5
|
-
|
6
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
7
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
8
|
-
parse( txt )
|
9
|
-
end
|
10
|
-
|
11
|
-
def self.parse( txt )
|
12
|
-
new( txt ).parse
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize( txt )
|
16
|
-
@txt = txt
|
17
|
-
end
|
18
|
-
|
19
|
-
## note: skip "decorative" only heading e.g. ========
|
20
|
-
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
21
|
-
HEADING_BLANK_RE = %r{\A
|
22
|
-
={1,}
|
23
|
-
\z}x
|
24
|
-
|
25
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
26
|
-
HEADING_RE = %r{\A
|
27
|
-
(?<marker>={1,}) ## 1. leading ======
|
28
|
-
[ ]*
|
29
|
-
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
30
|
-
[ ]*
|
31
|
-
=* ## 3. (optional) trailing ====
|
32
|
-
\z}x
|
33
|
-
|
34
|
-
def parse
|
35
|
-
outline=[] ## outline structure
|
36
|
-
start_para = true ## start new para(graph) on new text line?
|
37
|
-
|
38
|
-
@txt.each_line do |line|
|
39
|
-
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
40
|
-
|
41
|
-
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
42
|
-
start_para = true
|
43
|
-
next
|
44
|
-
end
|
45
|
-
|
46
|
-
break if line == '__END__'
|
47
|
-
|
48
|
-
next if line.start_with?( '#' ) ## skip comments too
|
49
|
-
## strip inline (until end-of-line) comments too
|
50
|
-
## e.g Eupen | KAS Eupen ## [de]
|
51
|
-
## => Eupen | KAS Eupen
|
52
|
-
## e.g bq Bonaire, BOE # CONCACAF
|
53
|
-
## => bq Bonaire, BOE
|
54
|
-
line = line.sub( /#.*/, '' ).strip
|
55
|
-
pp line
|
56
|
-
|
57
|
-
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
58
|
-
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
59
|
-
|
60
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
61
|
-
if m=HEADING_RE.match( line )
|
62
|
-
start_para = true
|
63
|
-
|
64
|
-
heading_marker = m[:marker]
|
65
|
-
heading_level = m[:marker].length ## count number of = for heading level
|
66
|
-
heading = m[:text].strip
|
67
|
-
|
68
|
-
puts "heading #{heading_level} >#{heading}<"
|
69
|
-
outline << [:"h#{heading_level}", heading]
|
70
|
-
else ## assume it's a (plain/regular) text line
|
71
|
-
if start_para
|
72
|
-
outline << [:p, [line]]
|
73
|
-
start_para = false
|
74
|
-
else
|
75
|
-
node = outline[-1] ## get last entry
|
76
|
-
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
77
|
-
node[1] << line ## add line to p(aragraph)
|
78
|
-
else
|
79
|
-
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
80
|
-
pp node
|
81
|
-
exit 1
|
82
|
-
end
|
83
|
-
end
|
84
|
-
end
|
85
|
-
end
|
86
|
-
outline
|
87
|
-
end # method read
|
88
|
-
end # class OutlineReader
|
89
|
-
|
90
|
-
end # module SportDb
|