sportdb-parser 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+
2
+ ####
3
+ # RaccMatchParser support machinery (incl. node classes/abstract syntax tree)
4
+
5
+ class RaccMatchParser
6
+
7
+
8
+ LineupLine = Struct.new( :team, :lineup ) do
9
+ def pretty_print( printer )
10
+ printer.text( "<LineupLine " )
11
+ printer.text( self.team )
12
+ printer.text( " lineup=" + self.lineup.pretty_inspect )
13
+ printer.text( ">" )
14
+ end
15
+ end
16
+
17
+ Lineup = Struct.new( :name, :card, :sub ) do
18
+ def pretty_print( printer )
19
+ buf = String.new
20
+ buf << self.name
21
+ buf << " card=" + self.card.pretty_inspect if card
22
+ buf << " sub=" + self.sub.pretty_inspect if sub
23
+ printer.text( buf )
24
+ end
25
+ end
26
+
27
+
28
+ Card = Struct.new( :name, :minute ) do
29
+ def to_s
30
+ buf = String.new
31
+ buf << "#{self.name}"
32
+ buf << " #{self.minute.to_s}" if self.minute
33
+ buf
34
+ end
35
+
36
+ def pretty_print( printer )
37
+ printer.text( to_s )
38
+ end
39
+ end
40
+
41
+
42
+ Sub = Struct.new( :minute, :sub ) do
43
+ def pretty_print( printer )
44
+ buf = String.new
45
+ buf << "(#{self.minute.to_s} "
46
+ buf << self.sub.pretty_inspect
47
+ buf << ")"
48
+ printer.text( buf )
49
+ end
50
+ end
51
+
52
+
53
+
54
+ GroupDef = Struct.new( :name, :teams ) do
55
+ def pretty_print( printer )
56
+ printer.text( "<GroupDef " )
57
+ printer.text( self.name )
58
+ printer.text( " teams=" + self.teams.pretty_inspect )
59
+ printer.text( ">" )
60
+ end
61
+ end
62
+
63
+
64
+ RoundDef = Struct.new( :name, :date, :duration ) do
65
+ def pretty_print( printer )
66
+ printer.text( "<RoundDef " )
67
+ printer.text( self.name )
68
+ printer.text( " date=" + self.date.pretty_inspect ) if date
69
+ printer.text( " durattion=" + self.duration.pretty_inspect ) if duration
70
+ printer.text( ">" )
71
+ end
72
+ end
73
+
74
+ DateHeader = Struct.new( :date ) do
75
+ def pretty_print( printer )
76
+ printer.text( "<DateHeader " )
77
+ printer.text( "#{self.date.pretty_inspect}>" )
78
+ end
79
+ end
80
+
81
+ GroupHeader = Struct.new( :name ) do
82
+ def pretty_print( printer )
83
+ printer.text( "<GroupHeader " )
84
+ printer.text( "#{self.name}>" )
85
+ end
86
+ end
87
+
88
+ RoundHeader = Struct.new( :names ) do
89
+ def pretty_print( printer )
90
+ printer.text( "<RoundHeader " )
91
+ printer.text( "#{self.names.join(', ')}>" )
92
+ end
93
+ end
94
+
95
+ MatchLine = Struct.new( :ord, :date, :time,
96
+ :team1, :team2, :score,
97
+ :status,
98
+ :geo,
99
+ :timezone ) do ## change to geos - why? why not?
100
+
101
+ def pretty_print( printer )
102
+ printer.text( "<MatchLine " )
103
+ printer.text( "#{self.team1} v #{self.team2}")
104
+ printer.breakable
105
+
106
+ members.zip(values) do |name, value|
107
+ next if [:team1, :team2].include?( name )
108
+ next if value.nil?
109
+
110
+ printer.text( "#{name}=#{value.pretty_inspect}" )
111
+ end
112
+
113
+ printer.text( ">" )
114
+ end
115
+
116
+ end
117
+
118
+ GoalLine = Struct.new( :goals1, :goals2 ) do
119
+ def pretty_print( printer )
120
+ printer.text( "<GoalLine " )
121
+ printer.text( "goals1=" + self.goals1.pretty_inspect + "," )
122
+ printer.breakable
123
+ printer.text( "goals2=" + self.goals2.pretty_inspect + ">" )
124
+ end
125
+ end
126
+
127
+ Goal = Struct.new( :player, :minutes ) do
128
+ def to_s
129
+ buf = String.new
130
+ buf << "#{self.player}"
131
+ buf << " "
132
+ buf << minutes.map { |min| min.to_s }.join(' ')
133
+ buf
134
+ end
135
+
136
+ def pretty_print( printer )
137
+ printer.text( to_s )
138
+ end
139
+
140
+ end
141
+
142
+
143
+ ##
144
+ ## fix - move :og, :pen to Goal if possible - why? why not?
145
+ ## or change to GoalMinute ???
146
+ Minute = Struct.new( :m, :offset, :og, :pen ) do
147
+ def to_s
148
+ buf = String.new
149
+ buf << "#{self.m}"
150
+ buf << "+#{self.offset}" if self.offset
151
+ buf << "'"
152
+ buf << "(og)" if self.og
153
+ buf << "(pen)" if self.pen
154
+ buf
155
+ end
156
+
157
+ def pretty_print( printer )
158
+ printer.text( to_s )
159
+ end
160
+ end
161
+
162
+ end # class RaccMatchParser
@@ -14,8 +14,239 @@ def log( msg )
14
14
  end
15
15
 
16
16
 
17
+ ## transforms
18
+ ##
19
+ ## Netherlands 1-2 (1-1) England
20
+ ## => text => team
21
+ ## score|vs
22
+ ## text => team
23
+
24
+
25
+ ## token iter/find better name
26
+ ## e.g. TokenBuffer/Scanner or such ??
27
+ class Tokens
28
+ def initialize( tokens )
29
+ @tokens = tokens
30
+ @pos = 0
31
+ end
32
+
33
+ def pos() @pos; end
34
+ def eos?() @pos >= @tokens.size; end
35
+
36
+
37
+ def include?( *types )
38
+ pos = @pos
39
+ ## puts " starting include? #{types.inspect} @ #{pos}"
40
+ while pos < @tokens.size do
41
+ return true if types.include?( @tokens[pos][0] )
42
+ pos +=1
43
+ end
44
+ false
45
+ end
46
+
47
+ ## pattern e.g. [:TEXT, [:VS,:SCORE], :TEXT]
48
+ def match?( *pattern )
49
+ ## puts " starting match? #{pattern.inspect} @ #{@pos}"
50
+ pattern.each_with_index do |types,offset|
51
+ ## if single symbol wrap in array
52
+ types = types.is_a?(Array) ? types : [types]
53
+ return false unless types.include?( peek(offset) )
54
+ end
55
+ true
56
+ end
57
+
58
+
59
+ ## return token type (e.g. :TEXT, :NUM, etc.)
60
+ def cur() peek(0); end
61
+ ## return content (assumed to be text)
62
+ def text(offset=0)
63
+ ## raise error - why? why not?
64
+ ## return nil?
65
+ if peek( offset ) != :text
66
+ raise ArgumentError, "text(#{offset}) - token not a text type"
67
+ end
68
+ @tokens[@pos+offset][1]
69
+ end
70
+
17
71
 
18
- def tokenize_with_errors( line, debug: false )
72
+ def peek(offset=1)
73
+ ## return nil if eos
74
+ if @pos+offset >= @tokens.size
75
+ nil
76
+ else
77
+ @tokens[@pos+offset][0]
78
+ end
79
+ end
80
+
81
+ ## note - returns complete token
82
+ def next
83
+ # if @pos >= @tokens.size
84
+ # raise ArgumentError, "end of array - #{@pos} >= #{@tokens.size}"
85
+ # end
86
+ # throw (standard) end of iteration here why? why not?
87
+
88
+ t = @tokens[@pos]
89
+ @pos += 1
90
+ t
91
+ end
92
+
93
+ def collect( &blk )
94
+ tokens = []
95
+ loop do
96
+ break if eos?
97
+ tokens << if block_given?
98
+ blk.call( self.next )
99
+ else
100
+ self.next
101
+ end
102
+ end
103
+ tokens
104
+ end
105
+ end # class Tokens
106
+
107
+
108
+
109
+
110
+ ### convience helper - ignore errors by default
111
+ def tokenize( lines, debug: false )
112
+ tokens, _ = tokenize_with_errors( lines, debug: debug )
113
+ tokens
114
+ end
115
+
116
+ def tokenize_with_errors( lines, debug: false )
117
+
118
+ ##
119
+ ## note - for convenience - add support
120
+ ## comments (incl. inline end-of-line comments) and empty lines here
121
+ ## why? why not?
122
+ ## why? keeps handling "centralized" here in one place
123
+
124
+ ## todo/fix - rework and make simpler
125
+ ## no need to double join array of string to txt etc.
126
+
127
+ txt_pre = if lines.is_a?( Array )
128
+ ## join together with newline
129
+ lines.reduce( String.new ) do |mem,line|
130
+ mem << line; mem << "\n"; mem
131
+ end
132
+ else ## assume single-all-in-one txt
133
+ lines
134
+ end
135
+
136
+ ## preprocess automagically - why? why not?
137
+ ## strip lines with comments and empty lines striped / removed
138
+ ## keep empty lines? why? why not?
139
+ ## keep leading spaces (indent) - why?
140
+ txt = String.new
141
+ txt_pre.each_line do |line| ## preprocess
142
+ line = line.strip
143
+ next if line.empty? || line.start_with?('#') ### skip empty lines and comments
144
+
145
+ line = line.sub( /#.*/, '' ).strip ### cut-off end-of line comments too
146
+
147
+ txt << line
148
+ txt << "\n"
149
+ end
150
+
151
+
152
+ tokens_by_line = [] ## note: add tokens line-by-line (flatten later)
153
+ errors = [] ## keep a list of errors - why? why not?
154
+
155
+ txt.each_line do |line|
156
+ line = line.rstrip ## note - MUST remove/strip trailing newline (spaces optional)!!!
157
+
158
+ more_tokens, more_errors = _tokenize_line( line, debug: debug )
159
+
160
+ tokens_by_line << more_tokens
161
+ errors += more_errors
162
+ end # each line
163
+
164
+
165
+
166
+
167
+ tokens_by_line = tokens_by_line.map do |tokens|
168
+ #############
169
+ ## pass 1
170
+ ## replace all texts with keyword matches
171
+ ## (e.g. group, round, leg, etc.)
172
+ tokens = tokens.map do |t|
173
+ if t[0] == :TEXT
174
+ text = t[1]
175
+ t = if is_group?( text )
176
+ [:GROUP, text]
177
+ elsif is_round?( text ) || is_leg?( text )
178
+ [:ROUND, text]
179
+ else
180
+ t ## pass through as-is (1:1)
181
+ end
182
+ end
183
+ t
184
+ end
185
+
186
+ #################
187
+ ## pass 2
188
+ ## transform tokens (using simple patterns)
189
+ ## to help along the (racc look ahead 1 - LA1) parser
190
+ nodes = []
191
+
192
+ buf = Tokens.new( tokens )
193
+ ## pp buf
194
+
195
+
196
+ loop do
197
+ break if buf.eos?
198
+
199
+ if buf.pos == 0 ## MUST start line
200
+ ## check for
201
+ ## group def or round def
202
+ if buf.match?( :ROUND, :'|' ) ## assume round def (change round to round_def)
203
+ nodes << [:ROUND_DEF, buf.next[1]]
204
+ nodes << buf.next
205
+ nodes += buf.collect
206
+ break
207
+ end
208
+ if buf.match?( :GROUP, :'|' ) ## assume group def (change group to group_def)
209
+ nodes << [:GROUP_DEF, buf.next[1]]
210
+ nodes << buf.next
211
+ ## change all text to team - why? why not?
212
+ nodes += buf.collect { |t|
213
+ t[0] == :TEXT ? [:TEAM, t[1]] : t
214
+ }
215
+ break
216
+ end
217
+ end
218
+
219
+
220
+ if buf.match?( :TEXT, [:SCORE, :VS, :'-'], :TEXT )
221
+ nodes << [:TEAM, buf.next[1]]
222
+ nodes << buf.next
223
+ nodes << [:TEAM, buf.next[1]]
224
+ elsif buf.match?( :TEXT, :MINUTE )
225
+ nodes << [:PLAYER, buf.next[1]]
226
+ nodes << buf.next
227
+ else
228
+ ## pass through
229
+ nodes << buf.next
230
+ end
231
+ end # loop
232
+ nodes
233
+ end # map tokens_by_line
234
+
235
+
236
+
237
+ ## flatten tokens
238
+ tokens = []
239
+ tokens_by_line.each do |tok|
240
+ tokens += tok
241
+ tokens << [:NEWLINE, "\n"] ## auto-add newlines
242
+ end
243
+
244
+ [tokens,errors]
245
+ end # method tokenize_with_errors
246
+
247
+
248
+
249
+ def _tokenize_line( line, debug: false )
19
250
  tokens = []
20
251
  errors = [] ## keep a list of errors - why? why not?
21
252
 
@@ -100,7 +331,7 @@ def tokenize_with_errors( line, debug: false )
100
331
  when '-' then [:'-']
101
332
  when '.' then
102
333
  ## switch back to top-level mode!!
103
- puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE"
334
+ puts " LEAVE PROP_RE MODE, BACK TO TOP_LEVEL/RE" if debug
104
335
  @re = RE
105
336
  [:'.']
106
337
  else
@@ -121,7 +352,7 @@ def tokenize_with_errors( line, debug: false )
121
352
  elsif m[:prop_key]
122
353
  ## switch context to PROP_RE
123
354
  @re = PROP_RE
124
- puts " ENTER PROP_RE MODE"
355
+ puts " ENTER PROP_RE MODE" if debug
125
356
  [:PROP, m[:key]]
126
357
  elsif m[:text]
127
358
  [:TEXT, m[:text]] ## keep pos - why? why not?
@@ -252,11 +483,5 @@ def tokenize_with_errors( line, debug: false )
252
483
  end
253
484
 
254
485
 
255
- ### convience helper - ignore errors by default
256
- def tokenize( line, debug: false )
257
- tokens, _ = tokenize_with_errors( line, debug: debug )
258
- tokens
259
- end
260
-
261
486
  end # class Parser
262
487
  end # module SportDb
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 5
7
- PATCH = 5
7
+ PATCH = 7
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version