sportdb-parser 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -2
- data/Manifest.txt +1 -0
- data/README.md +0 -5
- data/Rakefile +1 -0
- data/lib/sportdb/parser/parser.rb +817 -209
- data/lib/sportdb/parser/token-text.rb +1 -1
- data/lib/sportdb/parser/token.rb +146 -231
- data/lib/sportdb/parser/tokenizer.rb +262 -0
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +296 -0
- metadata +17 -2
data/lib/sportdb/parser.rb
CHANGED
@@ -22,6 +22,8 @@ require_relative 'parser/token-date'
|
|
22
22
|
require_relative 'parser/token-text'
|
23
23
|
require_relative 'parser/token-status'
|
24
24
|
require_relative 'parser/token'
|
25
|
+
require_relative 'parser/tokenizer'
|
26
|
+
|
25
27
|
require_relative 'parser/lang'
|
26
28
|
require_relative 'parser/parser'
|
27
29
|
|
@@ -47,5 +49,299 @@ end # module SportDb
|
|
47
49
|
=end
|
48
50
|
|
49
51
|
|
52
|
+
|
53
|
+
module SportDb
|
54
|
+
class Tokenizer
|
55
|
+
|
56
|
+
attr_reader :tokens
|
57
|
+
|
58
|
+
def initialize( txt )
|
59
|
+
parser = Parser.new
|
60
|
+
|
61
|
+
tree = []
|
62
|
+
|
63
|
+
lines = txt.split( "\n" )
|
64
|
+
lines.each_with_index do |line,i|
|
65
|
+
next if line.strip.empty? || line.strip.start_with?( '#' )
|
66
|
+
## support for inline (end-of-line) comments
|
67
|
+
line = line.sub( /#.*/, '' ).strip
|
68
|
+
|
69
|
+
puts "line >#{line}<"
|
70
|
+
tokens = parser.tokenize( line )
|
71
|
+
pp tokens
|
72
|
+
|
73
|
+
tree << tokens
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
=begin
|
78
|
+
## quick hack
|
79
|
+
## turn all text tokens followed by minute token
|
80
|
+
## into player tokens!!!
|
81
|
+
##
|
82
|
+
## also auto-convert text tokens into team tokens - why? why not?
|
83
|
+
tree.each do |tokens|
|
84
|
+
tokens.each_with_index do |t0,idx|
|
85
|
+
t1 = tokens[idx+1]
|
86
|
+
if t1 && t1[0] == :minute && t0[0] == :text
|
87
|
+
t0[0] = :player
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
=end
|
92
|
+
|
93
|
+
=begin
|
94
|
+
## auto-add/insert start tokens for known line patterns
|
95
|
+
## START_GOALS for goals_line
|
96
|
+
## why? why not?
|
97
|
+
=end
|
98
|
+
|
99
|
+
## flatten
|
100
|
+
@tokens = []
|
101
|
+
tree.each do |tokens|
|
102
|
+
@tokens += tokens
|
103
|
+
@tokens << [:NEWLINE, "\n"] ## auto-add newlines
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
## convert to racc format
|
108
|
+
@tokens = @tokens.map do |tok|
|
109
|
+
if tok.size == 1
|
110
|
+
[tok[0].to_s, tok[0].to_s]
|
111
|
+
elsif tok.size == 2
|
112
|
+
#############
|
113
|
+
## pass 1
|
114
|
+
## replace all texts with keyword matches (e.g. group, round, leg, etc.)
|
115
|
+
if tok[0] == :TEXT
|
116
|
+
text = tok[1]
|
117
|
+
tok = if parser.is_group?( text )
|
118
|
+
[:GROUP, text]
|
119
|
+
elsif parser.is_round?( text ) || parser.is_leg?( text )
|
120
|
+
[:ROUND, text]
|
121
|
+
else
|
122
|
+
tok ## pass through as-is (1:1)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
## pass 2
|
126
|
+
tok
|
127
|
+
else
|
128
|
+
raise ArgumentError, "tokens of size 1|2 expected; got #{tok.pretty_inspect}"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
def next_token
|
136
|
+
@tokens.shift
|
137
|
+
end
|
138
|
+
end # class Tokenizer
|
139
|
+
end # module SportDb
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
####
|
144
|
+
# RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
|
145
|
+
|
146
|
+
class RaccMatchParser
|
147
|
+
|
148
|
+
|
149
|
+
LineupLine = Struct.new( :team, :lineup ) do
|
150
|
+
def pretty_print( printer )
|
151
|
+
printer.text( "<LineupLine " )
|
152
|
+
printer.text( self.team )
|
153
|
+
printer.text( " lineup=" + self.lineup.pretty_inspect )
|
154
|
+
printer.text( ">" )
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
Lineup = Struct.new( :name, :card, :sub ) do
|
159
|
+
def pretty_print( printer )
|
160
|
+
buf = String.new
|
161
|
+
buf << self.name
|
162
|
+
buf << " card=" + self.card.pretty_inspect if card
|
163
|
+
buf << " sub=" + self.sub.pretty_inspect if sub
|
164
|
+
printer.text( buf )
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
Card = Struct.new( :name, :minute ) do
|
170
|
+
def to_s
|
171
|
+
buf = String.new
|
172
|
+
buf << "#{self.name}"
|
173
|
+
buf << " #{self.minute.to_s}" if self.minute
|
174
|
+
buf
|
175
|
+
end
|
176
|
+
|
177
|
+
def pretty_print( printer )
|
178
|
+
printer.text( to_s )
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
Sub = Struct.new( :minute, :sub ) do
|
184
|
+
def pretty_print( printer )
|
185
|
+
buf = String.new
|
186
|
+
buf << "(#{self.minute.to_s} "
|
187
|
+
buf << self.sub.pretty_inspect
|
188
|
+
buf << ")"
|
189
|
+
printer.text( buf )
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
|
195
|
+
GroupDef = Struct.new( :name, :teams ) do
|
196
|
+
def pretty_print( printer )
|
197
|
+
printer.text( "<GroupDef " )
|
198
|
+
printer.text( self.name )
|
199
|
+
printer.text( " teams=" + self.teams.pretty_inspect )
|
200
|
+
printer.text( ">" )
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
RoundDef = Struct.new( :name, :date, :duration ) do
|
206
|
+
def pretty_print( printer )
|
207
|
+
printer.text( "<RoundDef " )
|
208
|
+
printer.text( self.name )
|
209
|
+
printer.text( " date=" + self.date.pretty_inspect ) if date
|
210
|
+
printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
|
211
|
+
printer.text( ">" )
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
DateHeader = Struct.new( :date ) do
|
216
|
+
def pretty_print( printer )
|
217
|
+
printer.text( "<DateHeader " )
|
218
|
+
printer.text( "#{self.date.pretty_inspect}>" )
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
GroupHeader = Struct.new( :name ) do
|
223
|
+
def pretty_print( printer )
|
224
|
+
printer.text( "<GroupHeader " )
|
225
|
+
printer.text( "#{self.name}>" )
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
RoundHeader = Struct.new( :names ) do
|
230
|
+
def pretty_print( printer )
|
231
|
+
printer.text( "<RoundHeader " )
|
232
|
+
printer.text( "#{self.names.join(', ')}>" )
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
MatchLine = Struct.new( :ord, :date, :time,
|
237
|
+
:team1, :team2, :score,
|
238
|
+
:geo ) do ## change to geos - why? why not?
|
239
|
+
|
240
|
+
def pretty_print( printer )
|
241
|
+
printer.text( "<MatchLine " )
|
242
|
+
printer.text( "#{self.team1} v #{self.team2}")
|
243
|
+
printer.breakable
|
244
|
+
|
245
|
+
members.zip(values) do |name, value|
|
246
|
+
next if [:team1, :team2].include?( name )
|
247
|
+
next if value.nil?
|
248
|
+
|
249
|
+
printer.text( "#{name}=#{value.pretty_inspect}" )
|
250
|
+
end
|
251
|
+
|
252
|
+
printer.text( ">" )
|
253
|
+
end
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
GoalLine = Struct.new( :goals1, :goals2 ) do
|
258
|
+
def pretty_print( printer )
|
259
|
+
printer.text( "<GoalLine " )
|
260
|
+
printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
|
261
|
+
printer.breakable
|
262
|
+
printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
Goal = Struct.new( :player, :minutes ) do
|
267
|
+
def to_s
|
268
|
+
buf = String.new
|
269
|
+
buf << "#{self.player}"
|
270
|
+
buf << " "
|
271
|
+
buf << minutes.map { |min| min.to_s }.join(' ')
|
272
|
+
buf
|
273
|
+
end
|
274
|
+
|
275
|
+
def pretty_print( printer )
|
276
|
+
printer.text( to_s )
|
277
|
+
end
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
|
282
|
+
##
|
283
|
+
## fix - move :og, :pen to Goal if possible - why? why not?
|
284
|
+
## or change to GoalMinute ???
|
285
|
+
Minute = Struct.new( :m, :offset, :og, :pen ) do
|
286
|
+
def to_s
|
287
|
+
buf = String.new
|
288
|
+
buf << "#{self.m}"
|
289
|
+
buf << "+#{self.offset}" if self.offset
|
290
|
+
buf << "'"
|
291
|
+
buf << "(og)" if self.og
|
292
|
+
buf << "(pen)" if self.pen
|
293
|
+
buf
|
294
|
+
end
|
295
|
+
|
296
|
+
def pretty_print( printer )
|
297
|
+
printer.text( to_s )
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
|
304
|
+
def initialize(input)
|
305
|
+
puts "==> input:"
|
306
|
+
puts input
|
307
|
+
@tokenizer = SportDb::Tokenizer.new(input)
|
308
|
+
end
|
309
|
+
|
310
|
+
|
311
|
+
def next_token
|
312
|
+
tok = @tokenizer.next_token
|
313
|
+
puts "next_token => #{tok.pretty_inspect}"
|
314
|
+
tok
|
315
|
+
end
|
316
|
+
|
317
|
+
# on_error do |error_token_id, error_value, value_stack|
|
318
|
+
# puts "Parse error on token: #{error_token_id}, value: #{error_value}"
|
319
|
+
# end
|
320
|
+
|
321
|
+
def parse
|
322
|
+
puts "parse:"
|
323
|
+
@tree = []
|
324
|
+
do_parse
|
325
|
+
@tree
|
326
|
+
end
|
327
|
+
|
328
|
+
|
329
|
+
def on_error(*args)
|
330
|
+
puts
|
331
|
+
puts "!! on parse error:"
|
332
|
+
puts "args=#{args.pretty_inspect}"
|
333
|
+
exit 1 ## exit for now - get and print more info about context etc.!!
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
=begin
|
338
|
+
on_error do |error_token_id, error_value, value_stack|
|
339
|
+
puts "Parse error on token: #{error_token_id}, value: #{error_value}"
|
340
|
+
end
|
341
|
+
=end
|
342
|
+
|
343
|
+
end
|
344
|
+
|
345
|
+
|
50
346
|
puts SportDb::Module::Parser.banner # say hello
|
51
347
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: racc
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rdoc
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -104,6 +118,7 @@ files:
|
|
104
118
|
- lib/sportdb/parser/token-status.rb
|
105
119
|
- lib/sportdb/parser/token-text.rb
|
106
120
|
- lib/sportdb/parser/token.rb
|
121
|
+
- lib/sportdb/parser/tokenizer.rb
|
107
122
|
- lib/sportdb/parser/version.rb
|
108
123
|
homepage: https://github.com/sportdb/sport.db
|
109
124
|
licenses:
|