sportdb-parser 0.5.4 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,5 @@
1
1
  ## pulls in
2
2
  require 'cocos'
3
- require 'season/formats' # e.g. Season() support machinery
4
-
5
-
6
3
 
7
4
 
8
5
  ####
@@ -27,13 +24,9 @@ require_relative 'parser/token-status'
27
24
  require_relative 'parser/token'
28
25
  require_relative 'parser/tokenizer'
29
26
 
30
- require_relative 'parser/parser'
31
-
32
-
33
- ####
34
- ## todo/check - move outline reader upstream to cocos - why? why not?
35
- ## use read_outline(), parse_outline() - why? why not?
36
- require_relative 'parser/outline_reader'
27
+ require_relative 'parser/parser' ## auto-generated by racc (from parser.y)
28
+ require_relative 'parser/racc_parser'
29
+ require_relative 'parser/racc_tree'
37
30
 
38
31
 
39
32
 
@@ -53,9 +46,11 @@ end # module SportDb
53
46
 
54
47
 
55
48
  module SportDb
56
-
57
-
58
-
49
+ ###
50
+ ## todo/fix - use LangHelper or such
51
+ ## e.g. class Parser
52
+ ## include LangHelper
53
+ ## end
59
54
  class Parser
60
55
  ## keep "old" access to checking for group, round & friends
61
56
  ## for now for compatibility
@@ -63,302 +58,9 @@ class Parser
63
58
  def is_round?( text ) Lang.is_round?( text ); end
64
59
  def is_leg?( text ) Lang.is_leg?( text ); end
65
60
  end
61
+ end # module SportDb
66
62
 
67
63
 
68
64
 
69
- class Tokenizer
70
-
71
- attr_reader :tokens
72
-
73
- def initialize( txt )
74
- parser = Parser.new
75
-
76
- tree = []
77
-
78
- lines = txt.split( "\n" )
79
- lines.each_with_index do |line,i|
80
- next if line.strip.empty? || line.strip.start_with?( '#' )
81
- ## support for inline (end-of-line) comments
82
- line = line.sub( /#.*/, '' ).strip
83
-
84
- puts "line >#{line}<"
85
- tokens = parser.tokenize( line )
86
- pp tokens
87
-
88
- tree << tokens
89
- end
90
-
91
-
92
- =begin
93
- ## quick hack
94
- ## turn all text tokens followed by minute token
95
- ## into player tokens!!!
96
- ##
97
- ## also auto-convert text tokens into team tokens - why? why not?
98
- tree.each do |tokens|
99
- tokens.each_with_index do |t0,idx|
100
- t1 = tokens[idx+1]
101
- if t1 && t1[0] == :minute && t0[0] == :text
102
- t0[0] = :player
103
- end
104
- end
105
- end
106
- =end
107
-
108
- =begin
109
- ## auto-add/insert start tokens for known line patterns
110
- ## START_GOALS for goals_line
111
- ## why? why not?
112
- =end
113
-
114
- ## flatten
115
- @tokens = []
116
- tree.each do |tokens|
117
- @tokens += tokens
118
- @tokens << [:NEWLINE, "\n"] ## auto-add newlines
119
- end
120
-
121
-
122
- ## convert to racc format
123
- @tokens = @tokens.map do |tok|
124
- if tok.size == 1
125
- [tok[0].to_s, tok[0].to_s]
126
- elsif tok.size == 2
127
- #############
128
- ## pass 1
129
- ## replace all texts with keyword matches (e.g. group, round, leg, etc.)
130
- if tok[0] == :TEXT
131
- text = tok[1]
132
- tok = if parser.is_group?( text )
133
- [:GROUP, text]
134
- elsif parser.is_round?( text ) || parser.is_leg?( text )
135
- [:ROUND, text]
136
- else
137
- tok ## pass through as-is (1:1)
138
- end
139
- end
140
- ## pass 2
141
- tok
142
- else
143
- raise ArgumentError, "tokens of size 1|2 expected; got #{tok.pretty_inspect}"
144
- end
145
- end
146
- end
147
-
148
-
149
-
150
- def next_token
151
- @tokens.shift
152
- end
153
- end # class Tokenizer
154
- end # module SportDb
155
-
156
-
157
-
158
- ####
159
- # RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
160
-
161
- class RaccMatchParser
162
-
163
-
164
- LineupLine = Struct.new( :team, :lineup ) do
165
- def pretty_print( printer )
166
- printer.text( "<LineupLine " )
167
- printer.text( self.team )
168
- printer.text( " lineup=" + self.lineup.pretty_inspect )
169
- printer.text( ">" )
170
- end
171
- end
172
-
173
- Lineup = Struct.new( :name, :card, :sub ) do
174
- def pretty_print( printer )
175
- buf = String.new
176
- buf << self.name
177
- buf << " card=" + self.card.pretty_inspect if card
178
- buf << " sub=" + self.sub.pretty_inspect if sub
179
- printer.text( buf )
180
- end
181
- end
182
-
183
-
184
- Card = Struct.new( :name, :minute ) do
185
- def to_s
186
- buf = String.new
187
- buf << "#{self.name}"
188
- buf << " #{self.minute.to_s}" if self.minute
189
- buf
190
- end
191
-
192
- def pretty_print( printer )
193
- printer.text( to_s )
194
- end
195
- end
196
-
197
-
198
- Sub = Struct.new( :minute, :sub ) do
199
- def pretty_print( printer )
200
- buf = String.new
201
- buf << "(#{self.minute.to_s} "
202
- buf << self.sub.pretty_inspect
203
- buf << ")"
204
- printer.text( buf )
205
- end
206
- end
207
-
208
-
209
-
210
- GroupDef = Struct.new( :name, :teams ) do
211
- def pretty_print( printer )
212
- printer.text( "<GroupDef " )
213
- printer.text( self.name )
214
- printer.text( " teams=" + self.teams.pretty_inspect )
215
- printer.text( ">" )
216
- end
217
- end
218
-
219
-
220
- RoundDef = Struct.new( :name, :date, :duration ) do
221
- def pretty_print( printer )
222
- printer.text( "<RoundDef " )
223
- printer.text( self.name )
224
- printer.text( " date=" + self.date.pretty_inspect ) if date
225
- printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
226
- printer.text( ">" )
227
- end
228
- end
229
-
230
- DateHeader = Struct.new( :date ) do
231
- def pretty_print( printer )
232
- printer.text( "<DateHeader " )
233
- printer.text( "#{self.date.pretty_inspect}>" )
234
- end
235
- end
236
-
237
- GroupHeader = Struct.new( :name ) do
238
- def pretty_print( printer )
239
- printer.text( "<GroupHeader " )
240
- printer.text( "#{self.name}>" )
241
- end
242
- end
243
-
244
- RoundHeader = Struct.new( :names ) do
245
- def pretty_print( printer )
246
- printer.text( "<RoundHeader " )
247
- printer.text( "#{self.names.join(', ')}>" )
248
- end
249
- end
250
-
251
- MatchLine = Struct.new( :ord, :date, :time,
252
- :team1, :team2, :score,
253
- :status,
254
- :geo,
255
- :timezone ) do ## change to geos - why? why not?
256
-
257
- def pretty_print( printer )
258
- printer.text( "<MatchLine " )
259
- printer.text( "#{self.team1} v #{self.team2}")
260
- printer.breakable
261
-
262
- members.zip(values) do |name, value|
263
- next if [:team1, :team2].include?( name )
264
- next if value.nil?
265
-
266
- printer.text( "#{name}=#{value.pretty_inspect}" )
267
- end
268
-
269
- printer.text( ">" )
270
- end
271
-
272
- end
273
-
274
- GoalLine = Struct.new( :goals1, :goals2 ) do
275
- def pretty_print( printer )
276
- printer.text( "<GoalLine " )
277
- printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
278
- printer.breakable
279
- printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
280
- end
281
- end
282
-
283
- Goal = Struct.new( :player, :minutes ) do
284
- def to_s
285
- buf = String.new
286
- buf << "#{self.player}"
287
- buf << " "
288
- buf << minutes.map { |min| min.to_s }.join(' ')
289
- buf
290
- end
291
-
292
- def pretty_print( printer )
293
- printer.text( to_s )
294
- end
295
-
296
- end
297
-
298
-
299
- ##
300
- ## fix - move :og, :pen to Goal if possible - why? why not?
301
- ## or change to GoalMinute ???
302
- Minute = Struct.new( :m, :offset, :og, :pen ) do
303
- def to_s
304
- buf = String.new
305
- buf << "#{self.m}"
306
- buf << "+#{self.offset}" if self.offset
307
- buf << "'"
308
- buf << "(og)" if self.og
309
- buf << "(pen)" if self.pen
310
- buf
311
- end
312
-
313
- def pretty_print( printer )
314
- printer.text( to_s )
315
- end
316
- end
317
-
318
-
319
-
320
-
321
- def initialize(input)
322
- puts "==> input:"
323
- puts input
324
- @tokenizer = SportDb::Tokenizer.new(input)
325
- end
326
-
327
-
328
- def next_token
329
- tok = @tokenizer.next_token
330
- puts "next_token => #{tok.pretty_inspect}"
331
- tok
332
- end
333
-
334
- # on_error do |error_token_id, error_value, value_stack|
335
- # puts "Parse error on token: #{error_token_id}, value: #{error_value}"
336
- # end
337
-
338
- def parse
339
- puts "parse:"
340
- @tree = []
341
- do_parse
342
- @tree
343
- end
344
-
345
-
346
- def on_error(*args)
347
- puts
348
- puts "!! on parse error:"
349
- puts "args=#{args.pretty_inspect}"
350
- exit 1 ## exit for now - get and print more info about context etc.!!
351
- end
352
-
353
-
354
- =begin
355
- on_error do |error_token_id, error_value, value_stack|
356
- puts "Parse error on token: #{error_token_id}, value: #{error_value}"
357
- end
358
- =end
359
-
360
- end
361
-
362
-
363
65
  puts SportDb::Module::Parser.banner # say hello
364
66
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.4
4
+ version: 0.5.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-01-18 00:00:00.000000000 Z
11
+ date: 2025-01-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -24,20 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.4.0
27
- - !ruby/object:Gem::Dependency
28
- name: season-formats
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: racc
43
29
  requirement: !ruby/object:Gem::Requirement
@@ -111,8 +97,9 @@ files:
111
97
  - config/rounds_pt.txt
112
98
  - lib/sportdb/parser.rb
113
99
  - lib/sportdb/parser/lang.rb
114
- - lib/sportdb/parser/outline_reader.rb
115
100
  - lib/sportdb/parser/parser.rb
101
+ - lib/sportdb/parser/racc_parser.rb
102
+ - lib/sportdb/parser/racc_tree.rb
116
103
  - lib/sportdb/parser/token-date.rb
117
104
  - lib/sportdb/parser/token-score.rb
118
105
  - lib/sportdb/parser/token-status.rb
@@ -1,155 +0,0 @@
1
-
2
-
3
- module SportDb
4
-
5
- ###
6
- # add a simple Outline convenience class
7
- # for processing OUtlines with OUtlineReader
8
-
9
- class QuickMatchOutline
10
- def self.read( path )
11
- nodes = OutlineReader.read( path )
12
- new( nodes )
13
- end
14
-
15
- def initialize( nodes )
16
- @nodes = nodes
17
- end
18
-
19
- def each_para( &blk )
20
- ## note: every (new) read call - resets errors list to empty
21
- ### @errors = []
22
-
23
- ## process nodes
24
- h1 = nil
25
- h2 = nil
26
- orphans = 0 ## track paragraphs's with no heading
27
-
28
- @nodes.each do |node|
29
- type = node[0]
30
-
31
- if type == :h1
32
- h1 = node[1] ## get heading text
33
- puts " = Heading 1 >#{node[1]}<"
34
- elsif type == :h2
35
- if h1.nil?
36
- puts "!! WARN - no heading for subheading; skipping processing"
37
- next
38
- end
39
- h2 = node[1] ## get heading text
40
- puts " == Heading 2 >#{node[1]}<"
41
- elsif type == :p
42
- if h1.nil?
43
- orphans += 1 ## only warn once
44
- puts "!! WARN - no heading for #{orphans} text paragraph(s); skipping parse"
45
- next
46
- end
47
-
48
- lines = node[1]
49
- blk.call( lines )
50
- else
51
- pp node
52
- raise ArgumentError, "unsupported (node) type >#{type}<"
53
- end
54
- end # each node
55
- end # each_para
56
- alias_method :each_paragraph, :each_para
57
- alias_method :each_p, :each_para
58
- end # class QuickMatchOutline
59
-
60
-
61
-
62
-
63
- class OutlineReader
64
-
65
- def self.debug=(value) @@debug = value; end
66
- def self.debug?() @@debug ||= false; end
67
- def debug?() self.class.debug?; end
68
-
69
-
70
-
71
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
72
- txt = File.open( path, 'r:utf-8' ) {|f| f.read }
73
- parse( txt )
74
- end
75
-
76
- def self.parse( txt )
77
- new( txt ).parse
78
- end
79
-
80
- def initialize( txt )
81
- @txt = txt
82
- end
83
-
84
- ## note: skip "decorative" only heading e.g. ========
85
- ## todo/check: find a better name e.g. HEADING_EMPTY_RE or HEADING_LINE_RE or ???
86
- HEADING_BLANK_RE = %r{\A
87
- ={1,}
88
- \z}x
89
-
90
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
91
- HEADING_RE = %r{\A
92
- (?<marker>={1,}) ## 1. leading ======
93
- [ ]*
94
- (?<text>[^=]+) ## 2. text (note: for now no "inline" = allowed)
95
- [ ]*
96
- =* ## 3. (optional) trailing ====
97
- \z}x
98
-
99
- def parse
100
- outline=[] ## outline structure
101
- start_para = true ## start new para(graph) on new text line?
102
-
103
- @txt.each_line do |line|
104
- line = line.strip ## todo/fix: keep leading and trailing spaces - why? why not?
105
-
106
- if line.empty? ## todo/fix: keep blank line nodes?? and just remove comments and process headings?! - why? why not?
107
- start_para = true
108
- next
109
- end
110
-
111
- break if line == '__END__'
112
-
113
- next if line.start_with?( '#' ) ## skip comments too
114
- ## strip inline (until end-of-line) comments too
115
- ## e.g Eupen | KAS Eupen ## [de]
116
- ## => Eupen | KAS Eupen
117
- ## e.g bq Bonaire, BOE # CONCACAF
118
- ## => bq Bonaire, BOE
119
- line = line.sub( /#.*/, '' ).strip
120
- pp line if debug?
121
-
122
- ## todo/check: also use heading blank as paragraph "breaker" or treat it like a comment ?? - why? why not?
123
- next if HEADING_BLANK_RE.match( line ) # skip "decorative" only heading e.g. ========
124
-
125
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
126
- if m=HEADING_RE.match( line )
127
- start_para = true
128
-
129
- heading_marker = m[:marker]
130
- heading_level = heading_marker.length ## count number of = for heading level
131
- heading = m[:text].strip
132
-
133
- puts "heading #{heading_level} >#{heading}<" if debug?
134
- outline << [:"h#{heading_level}", heading]
135
- else ## assume it's a (plain/regular) text line
136
- if start_para
137
- outline << [:p, [line]]
138
- start_para = false
139
- else
140
- node = outline[-1] ## get last entry
141
- if node[0] == :p ## assert it's a p(aragraph) node!!!
142
- node[1] << line ## add line to p(aragraph)
143
- else
144
- puts "!! ERROR - invalid outline state / format - expected p(aragraph) node; got:"
145
- pp node
146
- exit 1
147
- end
148
- end
149
- end
150
- end
151
- outline
152
- end # method read
153
- end # class OutlineReader
154
-
155
- end # module SportDb