sportdb-parser 0.5.4 → 0.5.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -1
- data/Rakefile +1 -2
- data/lib/sportdb/parser/parser.rb +192 -184
- data/lib/sportdb/parser/racc_parser.rb +58 -0
- data/lib/sportdb/parser/racc_tree.rb +162 -0
- data/lib/sportdb/parser/tokenizer.rb +232 -7
- data/lib/sportdb/parser/version.rb +1 -1
- data/lib/sportdb/parser.rb +9 -307
- metadata +4 -17
- data/lib/sportdb/parser/outline_reader.rb +0 -155
data/lib/sportdb/parser.rb
CHANGED
@@ -1,8 +1,5 @@
|
|
1
1
|
## pulls in
|
2
2
|
require 'cocos'
|
3
|
-
require 'season/formats' # e.g. Season() support machinery
|
4
|
-
|
5
|
-
|
6
3
|
|
7
4
|
|
8
5
|
####
|
@@ -27,13 +24,9 @@ require_relative 'parser/token-status'
|
|
27
24
|
require_relative 'parser/token'
|
28
25
|
require_relative 'parser/tokenizer'
|
29
26
|
|
30
|
-
require_relative 'parser/parser'
|
31
|
-
|
32
|
-
|
33
|
-
####
|
34
|
-
## todo/check - move outline reader upstream to cocos - why? why not?
|
35
|
-
## use read_outline(), parse_outline() - why? why not?
|
36
|
-
require_relative 'parser/outline_reader'
|
27
|
+
require_relative 'parser/parser' ## auto-generated by racc (from parser.y)
|
28
|
+
require_relative 'parser/racc_parser'
|
29
|
+
require_relative 'parser/racc_tree'
|
37
30
|
|
38
31
|
|
39
32
|
|
@@ -53,9 +46,11 @@ end # module SportDb
|
|
53
46
|
|
54
47
|
|
55
48
|
module SportDb
|
56
|
-
|
57
|
-
|
58
|
-
|
49
|
+
###
|
50
|
+
## todo/fix - use LangHelper or such
|
51
|
+
## e.g. class Parser
|
52
|
+
## include LangHelper
|
53
|
+
## end
|
59
54
|
class Parser
|
60
55
|
## keep "old" access to checking for group, round & friends
|
61
56
|
## for now for compatibility
|
@@ -63,302 +58,9 @@ class Parser
|
|
63
58
|
def is_round?( text ) Lang.is_round?( text ); end
|
64
59
|
def is_leg?( text ) Lang.is_leg?( text ); end
|
65
60
|
end
|
61
|
+
end # module SportDb
|
66
62
|
|
67
63
|
|
68
64
|
|
69
|
-
class Tokenizer
|
70
|
-
|
71
|
-
attr_reader :tokens
|
72
|
-
|
73
|
-
def initialize( txt )
|
74
|
-
parser = Parser.new
|
75
|
-
|
76
|
-
tree = []
|
77
|
-
|
78
|
-
lines = txt.split( "\n" )
|
79
|
-
lines.each_with_index do |line,i|
|
80
|
-
next if line.strip.empty? || line.strip.start_with?( '#' )
|
81
|
-
## support for inline (end-of-line) comments
|
82
|
-
line = line.sub( /#.*/, '' ).strip
|
83
|
-
|
84
|
-
puts "line >#{line}<"
|
85
|
-
tokens = parser.tokenize( line )
|
86
|
-
pp tokens
|
87
|
-
|
88
|
-
tree << tokens
|
89
|
-
end
|
90
|
-
|
91
|
-
|
92
|
-
=begin
|
93
|
-
## quick hack
|
94
|
-
## turn all text tokens followed by minute token
|
95
|
-
## into player tokens!!!
|
96
|
-
##
|
97
|
-
## also auto-convert text tokens into team tokens - why? why not?
|
98
|
-
tree.each do |tokens|
|
99
|
-
tokens.each_with_index do |t0,idx|
|
100
|
-
t1 = tokens[idx+1]
|
101
|
-
if t1 && t1[0] == :minute && t0[0] == :text
|
102
|
-
t0[0] = :player
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
=end
|
107
|
-
|
108
|
-
=begin
|
109
|
-
## auto-add/insert start tokens for known line patterns
|
110
|
-
## START_GOALS for goals_line
|
111
|
-
## why? why not?
|
112
|
-
=end
|
113
|
-
|
114
|
-
## flatten
|
115
|
-
@tokens = []
|
116
|
-
tree.each do |tokens|
|
117
|
-
@tokens += tokens
|
118
|
-
@tokens << [:NEWLINE, "\n"] ## auto-add newlines
|
119
|
-
end
|
120
|
-
|
121
|
-
|
122
|
-
## convert to racc format
|
123
|
-
@tokens = @tokens.map do |tok|
|
124
|
-
if tok.size == 1
|
125
|
-
[tok[0].to_s, tok[0].to_s]
|
126
|
-
elsif tok.size == 2
|
127
|
-
#############
|
128
|
-
## pass 1
|
129
|
-
## replace all texts with keyword matches (e.g. group, round, leg, etc.)
|
130
|
-
if tok[0] == :TEXT
|
131
|
-
text = tok[1]
|
132
|
-
tok = if parser.is_group?( text )
|
133
|
-
[:GROUP, text]
|
134
|
-
elsif parser.is_round?( text ) || parser.is_leg?( text )
|
135
|
-
[:ROUND, text]
|
136
|
-
else
|
137
|
-
tok ## pass through as-is (1:1)
|
138
|
-
end
|
139
|
-
end
|
140
|
-
## pass 2
|
141
|
-
tok
|
142
|
-
else
|
143
|
-
raise ArgumentError, "tokens of size 1|2 expected; got #{tok.pretty_inspect}"
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
def next_token
|
151
|
-
@tokens.shift
|
152
|
-
end
|
153
|
-
end # class Tokenizer
|
154
|
-
end # module SportDb
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
####
|
159
|
-
# RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
|
160
|
-
|
161
|
-
class RaccMatchParser
|
162
|
-
|
163
|
-
|
164
|
-
LineupLine = Struct.new( :team, :lineup ) do
|
165
|
-
def pretty_print( printer )
|
166
|
-
printer.text( "<LineupLine " )
|
167
|
-
printer.text( self.team )
|
168
|
-
printer.text( " lineup=" + self.lineup.pretty_inspect )
|
169
|
-
printer.text( ">" )
|
170
|
-
end
|
171
|
-
end
|
172
|
-
|
173
|
-
Lineup = Struct.new( :name, :card, :sub ) do
|
174
|
-
def pretty_print( printer )
|
175
|
-
buf = String.new
|
176
|
-
buf << self.name
|
177
|
-
buf << " card=" + self.card.pretty_inspect if card
|
178
|
-
buf << " sub=" + self.sub.pretty_inspect if sub
|
179
|
-
printer.text( buf )
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
|
184
|
-
Card = Struct.new( :name, :minute ) do
|
185
|
-
def to_s
|
186
|
-
buf = String.new
|
187
|
-
buf << "#{self.name}"
|
188
|
-
buf << " #{self.minute.to_s}" if self.minute
|
189
|
-
buf
|
190
|
-
end
|
191
|
-
|
192
|
-
def pretty_print( printer )
|
193
|
-
printer.text( to_s )
|
194
|
-
end
|
195
|
-
end
|
196
|
-
|
197
|
-
|
198
|
-
Sub = Struct.new( :minute, :sub ) do
|
199
|
-
def pretty_print( printer )
|
200
|
-
buf = String.new
|
201
|
-
buf << "(#{self.minute.to_s} "
|
202
|
-
buf << self.sub.pretty_inspect
|
203
|
-
buf << ")"
|
204
|
-
printer.text( buf )
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
GroupDef = Struct.new( :name, :teams ) do
|
211
|
-
def pretty_print( printer )
|
212
|
-
printer.text( "<GroupDef " )
|
213
|
-
printer.text( self.name )
|
214
|
-
printer.text( " teams=" + self.teams.pretty_inspect )
|
215
|
-
printer.text( ">" )
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
|
220
|
-
RoundDef = Struct.new( :name, :date, :duration ) do
|
221
|
-
def pretty_print( printer )
|
222
|
-
printer.text( "<RoundDef " )
|
223
|
-
printer.text( self.name )
|
224
|
-
printer.text( " date=" + self.date.pretty_inspect ) if date
|
225
|
-
printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
|
226
|
-
printer.text( ">" )
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
DateHeader = Struct.new( :date ) do
|
231
|
-
def pretty_print( printer )
|
232
|
-
printer.text( "<DateHeader " )
|
233
|
-
printer.text( "#{self.date.pretty_inspect}>" )
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
GroupHeader = Struct.new( :name ) do
|
238
|
-
def pretty_print( printer )
|
239
|
-
printer.text( "<GroupHeader " )
|
240
|
-
printer.text( "#{self.name}>" )
|
241
|
-
end
|
242
|
-
end
|
243
|
-
|
244
|
-
RoundHeader = Struct.new( :names ) do
|
245
|
-
def pretty_print( printer )
|
246
|
-
printer.text( "<RoundHeader " )
|
247
|
-
printer.text( "#{self.names.join(', ')}>" )
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
MatchLine = Struct.new( :ord, :date, :time,
|
252
|
-
:team1, :team2, :score,
|
253
|
-
:status,
|
254
|
-
:geo,
|
255
|
-
:timezone ) do ## change to geos - why? why not?
|
256
|
-
|
257
|
-
def pretty_print( printer )
|
258
|
-
printer.text( "<MatchLine " )
|
259
|
-
printer.text( "#{self.team1} v #{self.team2}")
|
260
|
-
printer.breakable
|
261
|
-
|
262
|
-
members.zip(values) do |name, value|
|
263
|
-
next if [:team1, :team2].include?( name )
|
264
|
-
next if value.nil?
|
265
|
-
|
266
|
-
printer.text( "#{name}=#{value.pretty_inspect}" )
|
267
|
-
end
|
268
|
-
|
269
|
-
printer.text( ">" )
|
270
|
-
end
|
271
|
-
|
272
|
-
end
|
273
|
-
|
274
|
-
GoalLine = Struct.new( :goals1, :goals2 ) do
|
275
|
-
def pretty_print( printer )
|
276
|
-
printer.text( "<GoalLine " )
|
277
|
-
printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
|
278
|
-
printer.breakable
|
279
|
-
printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
|
280
|
-
end
|
281
|
-
end
|
282
|
-
|
283
|
-
Goal = Struct.new( :player, :minutes ) do
|
284
|
-
def to_s
|
285
|
-
buf = String.new
|
286
|
-
buf << "#{self.player}"
|
287
|
-
buf << " "
|
288
|
-
buf << minutes.map { |min| min.to_s }.join(' ')
|
289
|
-
buf
|
290
|
-
end
|
291
|
-
|
292
|
-
def pretty_print( printer )
|
293
|
-
printer.text( to_s )
|
294
|
-
end
|
295
|
-
|
296
|
-
end
|
297
|
-
|
298
|
-
|
299
|
-
##
|
300
|
-
## fix - move :og, :pen to Goal if possible - why? why not?
|
301
|
-
## or change to GoalMinute ???
|
302
|
-
Minute = Struct.new( :m, :offset, :og, :pen ) do
|
303
|
-
def to_s
|
304
|
-
buf = String.new
|
305
|
-
buf << "#{self.m}"
|
306
|
-
buf << "+#{self.offset}" if self.offset
|
307
|
-
buf << "'"
|
308
|
-
buf << "(og)" if self.og
|
309
|
-
buf << "(pen)" if self.pen
|
310
|
-
buf
|
311
|
-
end
|
312
|
-
|
313
|
-
def pretty_print( printer )
|
314
|
-
printer.text( to_s )
|
315
|
-
end
|
316
|
-
end
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
def initialize(input)
|
322
|
-
puts "==> input:"
|
323
|
-
puts input
|
324
|
-
@tokenizer = SportDb::Tokenizer.new(input)
|
325
|
-
end
|
326
|
-
|
327
|
-
|
328
|
-
def next_token
|
329
|
-
tok = @tokenizer.next_token
|
330
|
-
puts "next_token => #{tok.pretty_inspect}"
|
331
|
-
tok
|
332
|
-
end
|
333
|
-
|
334
|
-
# on_error do |error_token_id, error_value, value_stack|
|
335
|
-
# puts "Parse error on token: #{error_token_id}, value: #{error_value}"
|
336
|
-
# end
|
337
|
-
|
338
|
-
def parse
|
339
|
-
puts "parse:"
|
340
|
-
@tree = []
|
341
|
-
do_parse
|
342
|
-
@tree
|
343
|
-
end
|
344
|
-
|
345
|
-
|
346
|
-
def on_error(*args)
|
347
|
-
puts
|
348
|
-
puts "!! on parse error:"
|
349
|
-
puts "args=#{args.pretty_inspect}"
|
350
|
-
exit 1 ## exit for now - get and print more info about context etc.!!
|
351
|
-
end
|
352
|
-
|
353
|
-
|
354
|
-
=begin
|
355
|
-
on_error do |error_token_id, error_value, value_stack|
|
356
|
-
puts "Parse error on token: #{error_token_id}, value: #{error_value}"
|
357
|
-
end
|
358
|
-
=end
|
359
|
-
|
360
|
-
end
|
361
|
-
|
362
|
-
|
363
65
|
puts SportDb::Module::Parser.banner # say hello
|
364
66
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.4.0
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: season-formats
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: racc
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -111,8 +97,9 @@ files:
|
|
111
97
|
- config/rounds_pt.txt
|
112
98
|
- lib/sportdb/parser.rb
|
113
99
|
- lib/sportdb/parser/lang.rb
|
114
|
-
- lib/sportdb/parser/outline_reader.rb
|
115
100
|
- lib/sportdb/parser/parser.rb
|
101
|
+
- lib/sportdb/parser/racc_parser.rb
|
102
|
+
- lib/sportdb/parser/racc_tree.rb
|
116
103
|
- lib/sportdb/parser/token-date.rb
|
117
104
|
- lib/sportdb/parser/token-score.rb
|
118
105
|
- lib/sportdb/parser/token-status.rb
|
@@ -1,155 +0,0 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
###
|
6
|
-
# add a simple Outline convenience class
|
7
|
-
# for processing OUtlines with OUtlineReader
|
8
|
-
|
9
|
-
class QuickMatchOutline
|
10
|
-
def self.read( path )
|
11
|
-
nodes = OutlineReader.read( path )
|
12
|
-
new( nodes )
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize( nodes )
|
16
|
-
@nodes = nodes
|
17
|
-
end
|
18
|
-
|
19
|
-
def each_para( &blk )
|
20
|
-
## note: every (new) read call - resets errors list to empty
|
21
|
-
### @errors = []
|
22
|
-
|
23
|
-
## process nodes
|
24
|
-
h1 = nil
|
25
|
-
h2 = nil
|
26
|
-
orphans = 0 ## track paragraphs's with no heading
|
27
|
-
|
28
|
-
@nodes.each do |node|
|
29
|
-
type = node[0]
|
30
|
-
|
31
|
-
if type == :h1
|
32
|
-
h1 = node[1] ## get heading text
|
33
|
-
puts " = Heading 1 >#{node[1]}<"
|
34
|
-
elsif type == :h2
|
35
|
-
if h1.nil?
|
36
|
-
puts "!! WARN - no heading for subheading; skipping processing"
|
37
|
-
next
|
38
|
-
end
|
39
|
-
h2 = node[1] ## get heading text
|
40
|
-
puts " == Heading 2 >#{node[1]}<"
|
41
|
-
elsif type == :p
|
42
|
-
if h1.nil?
|
43
|
-
orphans += 1 ## only warn once
|
44
|
-
puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
|
45
|
-
next
|
46
|
-
end
|
47
|
-
|
48
|
-
lines = node[1]
|
49
|
-
blk.call( lines )
|
50
|
-
else
|
51
|
-
pp node
|
52
|
-
raise ArgumentError, "unsupported (node) type >#{type}<"
|
53
|
-
end
|
54
|
-
end # each node
|
55
|
-
end # each_para
|
56
|
-
alias_method :each_paragraph, :each_para
|
57
|
-
alias_method :each_p, :each_para
|
58
|
-
end # class QuickMatchOutline
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
class OutlineReader
|
64
|
-
|
65
|
-
def self.debug=(value) @@debug = value; end
|
66
|
-
def self.debug?() @@debug ||= false; end
|
67
|
-
def debug?() self.class.debug?; end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
72
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read }
|
73
|
-
parse( txt )
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.parse( txt )
|
77
|
-
new( txt ).parse
|
78
|
-
end
|
79
|
-
|
80
|
-
def initialize( txt )
|
81
|
-
@txt = txt
|
82
|
-
end
|
83
|
-
|
84
|
-
## note: skip "decorative" only heading e.g. ========
|
85
|
-
## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
|
86
|
-
HEADING_BLANK_RE = %r{\A
|
87
|
-
={1,}
|
88
|
-
\z}x
|
89
|
-
|
90
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
91
|
-
HEADING_RE = %r{\A
|
92
|
-
(?<marker>={1,}) ## 1. leading ======
|
93
|
-
[ ]*
|
94
|
-
(?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
|
95
|
-
[ ]*
|
96
|
-
=* ## 3. (optional) trailing ====
|
97
|
-
\z}x
|
98
|
-
|
99
|
-
def parse
|
100
|
-
outline=[] ## outline structure
|
101
|
-
start_para = true ## start new para(graph) on new text line?
|
102
|
-
|
103
|
-
@txt.each_line do |line|
|
104
|
-
line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
|
105
|
-
|
106
|
-
if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
|
107
|
-
start_para = true
|
108
|
-
next
|
109
|
-
end
|
110
|
-
|
111
|
-
break if line == '__END__'
|
112
|
-
|
113
|
-
next if line.start_with?( '#' ) ## skip comments too
|
114
|
-
## strip inline (until end-of-line) comments too
|
115
|
-
## e.g Eupen | KAS Eupen ## [de]
|
116
|
-
## => Eupen | KAS Eupen
|
117
|
-
## e.g bq Bonaire, BOE # CONCACAF
|
118
|
-
## => bq Bonaire, BOE
|
119
|
-
line = line.sub( /#.*/, '' ).strip
|
120
|
-
pp line if debug?
|
121
|
-
|
122
|
-
## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
|
123
|
-
next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
|
124
|
-
|
125
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
126
|
-
if m=HEADING_RE.match( line )
|
127
|
-
start_para = true
|
128
|
-
|
129
|
-
heading_marker = m[:marker]
|
130
|
-
heading_level = heading_marker.length ## count number of = for heading level
|
131
|
-
heading = m[:text].strip
|
132
|
-
|
133
|
-
puts "heading #{heading_level} >#{heading}<" if debug?
|
134
|
-
outline << [:"h#{heading_level}", heading]
|
135
|
-
else ## assume it's a (plain/regular) text line
|
136
|
-
if start_para
|
137
|
-
outline << [:p, [line]]
|
138
|
-
start_para = false
|
139
|
-
else
|
140
|
-
node = outline[-1] ## get last entry
|
141
|
-
if node[0] == :p ## assert it's a p(aragraph) node!!!
|
142
|
-
node[1] << line ## add line to p(aragraph)
|
143
|
-
else
|
144
|
-
puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
|
145
|
-
pp node
|
146
|
-
exit 1
|
147
|
-
end
|
148
|
-
end
|
149
|
-
end
|
150
|
-
end
|
151
|
-
outline
|
152
|
-
end # method read
|
153
|
-
end # class OutlineReader
|
154
|
-
|
155
|
-
end # module SportDb
|