sportdb-parser 0.5.5 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/lib/sportdb/parser/parser.rb +198 -190
- data/lib/sportdb/parser/racc_parser.rb +73 -0
- data/lib/sportdb/parser/racc_tree.rb +162 -0
- data/lib/sportdb/parser/tokenizer.rb +234 -9
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +9 -298
- metadata +4 -2
@@ -0,0 +1,162 @@
|
|
1
|
+
|
2
|
+
####
|
3
|
+
# RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
|
4
|
+
|
5
|
+
class RaccMatchParser
|
6
|
+
|
7
|
+
|
8
|
+
LineupLine = Struct.new( :team, :lineup ) do
|
9
|
+
def pretty_print( printer )
|
10
|
+
printer.text( "<LineupLine " )
|
11
|
+
printer.text( self.team )
|
12
|
+
printer.text( " lineup=" + self.lineup.pretty_inspect )
|
13
|
+
printer.text( ">" )
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
Lineup = Struct.new( :name, :card, :sub ) do
|
18
|
+
def pretty_print( printer )
|
19
|
+
buf = String.new
|
20
|
+
buf << self.name
|
21
|
+
buf << " card=" + self.card.pretty_inspect if card
|
22
|
+
buf << " sub=" + self.sub.pretty_inspect if sub
|
23
|
+
printer.text( buf )
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
Card = Struct.new( :name, :minute ) do
|
29
|
+
def to_s
|
30
|
+
buf = String.new
|
31
|
+
buf << "#{self.name}"
|
32
|
+
buf << " #{self.minute.to_s}" if self.minute
|
33
|
+
buf
|
34
|
+
end
|
35
|
+
|
36
|
+
def pretty_print( printer )
|
37
|
+
printer.text( to_s )
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
Sub = Struct.new( :minute, :sub ) do
|
43
|
+
def pretty_print( printer )
|
44
|
+
buf = String.new
|
45
|
+
buf << "(#{self.minute.to_s} "
|
46
|
+
buf << self.sub.pretty_inspect
|
47
|
+
buf << ")"
|
48
|
+
printer.text( buf )
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
|
53
|
+
|
54
|
+
GroupDef = Struct.new( :name, :teams ) do
|
55
|
+
def pretty_print( printer )
|
56
|
+
printer.text( "<GroupDef " )
|
57
|
+
printer.text( self.name )
|
58
|
+
printer.text( " teams=" + self.teams.pretty_inspect )
|
59
|
+
printer.text( ">" )
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
RoundDef = Struct.new( :name, :date, :duration ) do
|
65
|
+
def pretty_print( printer )
|
66
|
+
printer.text( "<RoundDef " )
|
67
|
+
printer.text( self.name )
|
68
|
+
printer.text( " date=" + self.date.pretty_inspect ) if date
|
69
|
+
printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
|
70
|
+
printer.text( ">" )
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
DateHeader = Struct.new( :date ) do
|
75
|
+
def pretty_print( printer )
|
76
|
+
printer.text( "<DateHeader " )
|
77
|
+
printer.text( "#{self.date.pretty_inspect}>" )
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
GroupHeader = Struct.new( :name ) do
|
82
|
+
def pretty_print( printer )
|
83
|
+
printer.text( "<GroupHeader " )
|
84
|
+
printer.text( "#{self.name}>" )
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
RoundHeader = Struct.new( :names ) do
|
89
|
+
def pretty_print( printer )
|
90
|
+
printer.text( "<RoundHeader " )
|
91
|
+
printer.text( "#{self.names.join(', ')}>" )
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
MatchLine = Struct.new( :ord, :date, :time,
|
96
|
+
:team1, :team2, :score,
|
97
|
+
:status,
|
98
|
+
:geo,
|
99
|
+
:timezone ) do ## change to geos - why? why not?
|
100
|
+
|
101
|
+
def pretty_print( printer )
|
102
|
+
printer.text( "<MatchLine " )
|
103
|
+
printer.text( "#{self.team1} v #{self.team2}")
|
104
|
+
printer.breakable
|
105
|
+
|
106
|
+
members.zip(values) do |name, value|
|
107
|
+
next if [:team1, :team2].include?( name )
|
108
|
+
next if value.nil?
|
109
|
+
|
110
|
+
printer.text( "#{name}=#{value.pretty_inspect}" )
|
111
|
+
end
|
112
|
+
|
113
|
+
printer.text( ">" )
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
GoalLine = Struct.new( :goals1, :goals2 ) do
|
119
|
+
def pretty_print( printer )
|
120
|
+
printer.text( "<GoalLine " )
|
121
|
+
printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
|
122
|
+
printer.breakable
|
123
|
+
printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
Goal = Struct.new( :player, :minutes ) do
|
128
|
+
def to_s
|
129
|
+
buf = String.new
|
130
|
+
buf << "#{self.player}"
|
131
|
+
buf << " "
|
132
|
+
buf << minutes.map { |min| min.to_s }.join(' ')
|
133
|
+
buf
|
134
|
+
end
|
135
|
+
|
136
|
+
def pretty_print( printer )
|
137
|
+
printer.text( to_s )
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
##
|
144
|
+
## fix - move :og, :pen to Goal if possible - why? why not?
|
145
|
+
## or change to GoalMinute ???
|
146
|
+
Minute = Struct.new( :m, :offset, :og, :pen ) do
|
147
|
+
def to_s
|
148
|
+
buf = String.new
|
149
|
+
buf << "#{self.m}"
|
150
|
+
buf << "+#{self.offset}" if self.offset
|
151
|
+
buf << "'"
|
152
|
+
buf << "(og)" if self.og
|
153
|
+
buf << "(pen)" if self.pen
|
154
|
+
buf
|
155
|
+
end
|
156
|
+
|
157
|
+
def pretty_print( printer )
|
158
|
+
printer.text( to_s )
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
end # class RaccMatchParser
|
@@ -14,8 +14,239 @@ def log( msg )
|
|
14
14
|
end
|
15
15
|
|
16
16
|
|
17
|
+
## transforms
|
18
|
+
##
|
19
|
+
## Netherlands 1-2 (1-1) England
|
20
|
+
## => text => team
|
21
|
+
## score|vs
|
22
|
+
## text => team
|
23
|
+
|
24
|
+
|
25
|
+
## token iter/find better name
|
26
|
+
## e.g. TokenBuffer/Scanner or such ??
|
27
|
+
class Tokens
|
28
|
+
def initialize( tokens )
|
29
|
+
@tokens = tokens
|
30
|
+
@pos = 0
|
31
|
+
end
|
32
|
+
|
33
|
+
def pos() @pos; end
|
34
|
+
def eos?() @pos >= @tokens.size; end
|
35
|
+
|
36
|
+
|
37
|
+
def include?( *types )
|
38
|
+
pos = @pos
|
39
|
+
## puts " starting include? #{types.inspect} @ #{pos}"
|
40
|
+
while pos < @tokens.size do
|
41
|
+
return true if types.include?( @tokens[pos][0] )
|
42
|
+
pos +=1
|
43
|
+
end
|
44
|
+
false
|
45
|
+
end
|
46
|
+
|
47
|
+
## pattern e.g. [:TEXT, [:VS,:SCORE], :TEXT]
|
48
|
+
def match?( *pattern )
|
49
|
+
## puts " starting match? #{pattern.inspect} @ #{@pos}"
|
50
|
+
pattern.each_with_index do |types,offset|
|
51
|
+
## if single symbol wrap in array
|
52
|
+
types = types.is_a?(Array) ? types : [types]
|
53
|
+
return false unless types.include?( peek(offset) )
|
54
|
+
end
|
55
|
+
true
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
## return token type (e.g. :TEXT, :NUM, etc.)
|
60
|
+
def cur() peek(0); end
|
61
|
+
## return content (assumed to be text)
|
62
|
+
def text(offset=0)
|
63
|
+
## raise error - why? why not?
|
64
|
+
## return nil?
|
65
|
+
if peek( offset ) != :text
|
66
|
+
raise ArgumentError, "text(#{offset}) - token not a text type"
|
67
|
+
end
|
68
|
+
@tokens[@pos+offset][1]
|
69
|
+
end
|
70
|
+
|
17
71
|
|
18
|
-
def
|
72
|
+
def peek(offset=1)
|
73
|
+
## return nil if eos
|
74
|
+
if @pos+offset >= @tokens.size
|
75
|
+
nil
|
76
|
+
else
|
77
|
+
@tokens[@pos+offset][0]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
## note - returns complete token
|
82
|
+
def next
|
83
|
+
# if @pos >= @tokens.size
|
84
|
+
# raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
|
85
|
+
# end
|
86
|
+
# throw (standard) end of iteration here why? why not?
|
87
|
+
|
88
|
+
t = @tokens[@pos]
|
89
|
+
@pos += 1
|
90
|
+
t
|
91
|
+
end
|
92
|
+
|
93
|
+
def collect( &blk )
|
94
|
+
tokens = []
|
95
|
+
loop do
|
96
|
+
break if eos?
|
97
|
+
tokens << if block_given?
|
98
|
+
blk.call( self.next )
|
99
|
+
else
|
100
|
+
self.next
|
101
|
+
end
|
102
|
+
end
|
103
|
+
tokens
|
104
|
+
end
|
105
|
+
end # class Tokens
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
### convience helper - ignore errors by default
|
111
|
+
def tokenize( lines, debug: false )
|
112
|
+
tokens, _ = tokenize_with_errors( lines, debug: debug )
|
113
|
+
tokens
|
114
|
+
end
|
115
|
+
|
116
|
+
def tokenize_with_errors( lines, debug: false )
|
117
|
+
|
118
|
+
##
|
119
|
+
## note - for convenience - add support
|
120
|
+
## comments (incl. inline end-of-line comments) and empty lines here
|
121
|
+
## why? why not?
|
122
|
+
## why? keeps handling "centralized" here in one place
|
123
|
+
|
124
|
+
## todo/fix - rework and make simpler
|
125
|
+
## no need to double join array of string to txt etc.
|
126
|
+
|
127
|
+
txt_pre = if lines.is_a?( Array )
|
128
|
+
## join together with newline
|
129
|
+
lines.reduce( String.new ) do |mem,line|
|
130
|
+
mem << line; mem << "\n"; mem
|
131
|
+
end
|
132
|
+
else ## assume single-all-in-one txt
|
133
|
+
lines
|
134
|
+
end
|
135
|
+
|
136
|
+
## preprocess automagically - why? why not?
|
137
|
+
## strip lines with comments and empty lines striped / removed
|
138
|
+
## keep empty lines? why? why not?
|
139
|
+
## keep leading spaces (indent) - why?
|
140
|
+
txt = String.new
|
141
|
+
txt_pre.each_line do |line| ## preprocess
|
142
|
+
line = line.strip
|
143
|
+
next if line.empty? || line.start_with?('#') ### skip empty lines and comments
|
144
|
+
|
145
|
+
line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
|
146
|
+
|
147
|
+
txt << line
|
148
|
+
txt << "\n"
|
149
|
+
end
|
150
|
+
|
151
|
+
|
152
|
+
tokens_by_line = [] ## note: add tokens line-by-line (flatten later)
|
153
|
+
errors = [] ## keep a list of errors - why? why not?
|
154
|
+
|
155
|
+
txt.each_line do |line|
|
156
|
+
line = line.rstrip ## note - MUST remove/strip trailing newline (spaces optional)!!!
|
157
|
+
|
158
|
+
more_tokens, more_errors = _tokenize_line( line, debug: debug )
|
159
|
+
|
160
|
+
tokens_by_line << more_tokens
|
161
|
+
errors += more_errors
|
162
|
+
end # each line
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
tokens_by_line = tokens_by_line.map do |tokens|
|
168
|
+
#############
|
169
|
+
## pass 1
|
170
|
+
## replace all texts with keyword matches
|
171
|
+
## (e.g. group, round, leg, etc.)
|
172
|
+
tokens = tokens.map do |t|
|
173
|
+
if t[0] == :TEXT
|
174
|
+
text = t[1]
|
175
|
+
t = if is_group?( text )
|
176
|
+
[:GROUP, text]
|
177
|
+
elsif is_round?( text ) || is_leg?( text )
|
178
|
+
[:ROUND, text]
|
179
|
+
else
|
180
|
+
t ## pass through as-is (1:1)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
t
|
184
|
+
end
|
185
|
+
|
186
|
+
#################
|
187
|
+
## pass 2
|
188
|
+
## transform tokens (using simple patterns)
|
189
|
+
## to help along the (racc look ahead 1 - LA1) parser
|
190
|
+
nodes = []
|
191
|
+
|
192
|
+
buf = Tokens.new( tokens )
|
193
|
+
## pp buf
|
194
|
+
|
195
|
+
|
196
|
+
loop do
|
197
|
+
break if buf.eos?
|
198
|
+
|
199
|
+
if buf.pos == 0 ## MUST start line
|
200
|
+
## check for
|
201
|
+
## group def or round def
|
202
|
+
if buf.match?( :ROUND, :'|' ) ## assume round def (change round to round_def)
|
203
|
+
nodes << [:ROUND_DEF, buf.next[1]]
|
204
|
+
nodes << buf.next
|
205
|
+
nodes += buf.collect
|
206
|
+
break
|
207
|
+
end
|
208
|
+
if buf.match?( :GROUP, :'|' ) ## assume group def (change group to group_def)
|
209
|
+
nodes << [:GROUP_DEF, buf.next[1]]
|
210
|
+
nodes << buf.next
|
211
|
+
## change all text to team - why? why not?
|
212
|
+
nodes += buf.collect { |t|
|
213
|
+
t[0] == :TEXT ? [:TEAM, t[1]] : t
|
214
|
+
}
|
215
|
+
break
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
|
221
|
+
nodes << [:TEAM, buf.next[1]]
|
222
|
+
nodes << buf.next
|
223
|
+
nodes << [:TEAM, buf.next[1]]
|
224
|
+
elsif buf.match?( :TEXT, :MINUTE )
|
225
|
+
nodes << [:PLAYER, buf.next[1]]
|
226
|
+
nodes << buf.next
|
227
|
+
else
|
228
|
+
## pass through
|
229
|
+
nodes << buf.next
|
230
|
+
end
|
231
|
+
end # loop
|
232
|
+
nodes
|
233
|
+
end # map tokens_by_line
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
## flatten tokens
|
238
|
+
tokens = []
|
239
|
+
tokens_by_line.each do |tok|
|
240
|
+
tokens += tok
|
241
|
+
tokens << [:NEWLINE, "\n"] ## auto-add newlines
|
242
|
+
end
|
243
|
+
|
244
|
+
[tokens,errors]
|
245
|
+
end # method tokenize_with_errors
|
246
|
+
|
247
|
+
|
248
|
+
|
249
|
+
def _tokenize_line( line, debug: false )
|
19
250
|
tokens = []
|
20
251
|
errors = [] ## keep a list of errors - why? why not?
|
21
252
|
|
@@ -100,7 +331,7 @@ def tokenize_with_errors( line, debug: false )
|
|
100
331
|
when '-' then [:'-']
|
101
332
|
when '.' then
|
102
333
|
## switch back to top-level mode!!
|
103
|
-
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"
|
334
|
+
puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug
|
104
335
|
@re = RE
|
105
336
|
[:'.']
|
106
337
|
else
|
@@ -121,7 +352,7 @@ def tokenize_with_errors( line, debug: false )
|
|
121
352
|
elsif m[:prop_key]
|
122
353
|
## switch context to PROP_RE
|
123
354
|
@re = PROP_RE
|
124
|
-
puts " ENTER PROP_RE MODE"
|
355
|
+
puts " ENTER PROP_RE MODE" if debug
|
125
356
|
[:PROP, m[:key]]
|
126
357
|
elsif m[:text]
|
127
358
|
[:TEXT, m[:text]] ## keep pos - why? why not?
|
@@ -252,11 +483,5 @@ def tokenize_with_errors( line, debug: false )
|
|
252
483
|
end
|
253
484
|
|
254
485
|
|
255
|
-
### convience helper - ignore errors by default
|
256
|
-
def tokenize( line, debug: false )
|
257
|
-
tokens, _ = tokenize_with_errors( line, debug: debug )
|
258
|
-
tokens
|
259
|
-
end
|
260
|
-
|
261
486
|
end # class Parser
|
262
487
|
end # module SportDb
|