sportdb-parser 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +196 -26
- data/lib/sportdb/parser/parser.rb +678 -449
- data/lib/sportdb/parser/racc_tree.rb +62 -3
- data/lib/sportdb/parser/token-date.rb +20 -0
- data/lib/sportdb/parser/token-minute.rb +140 -0
- data/lib/sportdb/parser/token-prop.rb +57 -9
- data/lib/sportdb/parser/token.rb +47 -10
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
@@ -4,16 +4,51 @@
|
|
4
4
|
|
5
5
|
class RaccMatchParser
|
6
6
|
|
7
|
+
RefereeLine = Struct.new( :name, :country ) do
|
8
|
+
def pretty_print( printer )
|
9
|
+
printer.text( "<RefereeLine " )
|
10
|
+
printer.text( self.name )
|
11
|
+
printer.text( " (#{self.country})" ) if self.country
|
12
|
+
printer.text( ">" )
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
## find a better name for player (use bookings?) - note - red/yellow card for trainer possible
|
17
|
+
CardsLine = Struct.new( :type, :bookings ) do
|
18
|
+
def pretty_print( printer )
|
19
|
+
printer.text( "<CardsLine " )
|
20
|
+
printer.text( self.type )
|
21
|
+
printer.text( " bookings=" + self.bookings.pretty_inspect )
|
22
|
+
printer.text( ">" )
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Booking = Struct.new( :name, :minute ) do
|
27
|
+
def to_s
|
28
|
+
buf = String.new
|
29
|
+
buf << "#{self.name}"
|
30
|
+
buf << " #{self.minute.to_s}" if self.minute
|
31
|
+
buf
|
32
|
+
end
|
33
|
+
|
34
|
+
def pretty_print( printer )
|
35
|
+
printer.text( to_s )
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
7
40
|
|
8
|
-
LineupLine = Struct.new( :team, :lineup ) do
|
41
|
+
LineupLine = Struct.new( :team, :lineup, :coach ) do
|
9
42
|
def pretty_print( printer )
|
10
43
|
printer.text( "<LineupLine " )
|
11
44
|
printer.text( self.team )
|
12
45
|
printer.text( " lineup=" + self.lineup.pretty_inspect )
|
46
|
+
printer.text( " coach=" + self.coach ) if self.coach
|
13
47
|
printer.text( ">" )
|
14
48
|
end
|
15
49
|
end
|
16
50
|
|
51
|
+
|
17
52
|
Lineup = Struct.new( :name, :card, :sub ) do
|
18
53
|
def pretty_print( printer )
|
19
54
|
buf = String.new
|
@@ -42,8 +77,8 @@ end
|
|
42
77
|
Sub = Struct.new( :minute, :sub ) do
|
43
78
|
def pretty_print( printer )
|
44
79
|
buf = String.new
|
45
|
-
buf << "(#{self.
|
46
|
-
buf << self.
|
80
|
+
buf << "(#{self.sub.pretty_inspect}"
|
81
|
+
buf << " #{self.minute.to_s}" if self.minute
|
47
82
|
buf << ")"
|
48
83
|
printer.text( buf )
|
49
84
|
end
|
@@ -122,6 +157,30 @@ MatchLine = Struct.new( :ord, :date, :time, :wday,
|
|
122
157
|
|
123
158
|
end
|
124
159
|
|
160
|
+
## check - use a different name e.g. GoalLineScore or such - why? why not?
|
161
|
+
GoalLineAlt = Struct.new( :goals ) do
|
162
|
+
def pretty_print( printer )
|
163
|
+
printer.text( "<GoalLineAlt " )
|
164
|
+
printer.text( "goals=" + self.goals.pretty_inspect + ">" )
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
GoalAlt = Struct.new( :score, :player, :minute ) do
|
169
|
+
def to_s
|
170
|
+
buf = String.new
|
171
|
+
buf << "#{score} "
|
172
|
+
buf << "#{self.player}"
|
173
|
+
buf << " #{self.minute}" if self.minute
|
174
|
+
buf
|
175
|
+
end
|
176
|
+
|
177
|
+
def pretty_print( printer )
|
178
|
+
printer.text( to_s )
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
|
125
184
|
GoalLine = Struct.new( :goals1, :goals2 ) do
|
126
185
|
def pretty_print( printer )
|
127
186
|
printer.text( "<GoalLine " )
|
@@ -159,6 +159,25 @@ DATE_III_RE = %r{
|
|
159
159
|
\b
|
160
160
|
)}ix
|
161
161
|
|
162
|
+
## allow (short)"european" style 8.8.
|
163
|
+
## note - assume day/month!!!
|
164
|
+
DATE_IIII_RE = %r{
|
165
|
+
(?<date>
|
166
|
+
\b
|
167
|
+
(?<day>\d{1,2})
|
168
|
+
\.
|
169
|
+
(?<month>\d{1,2})
|
170
|
+
\.
|
171
|
+
(?: (?:
|
172
|
+
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
173
|
+
|
|
174
|
+
(?<yy>\d{2}) ## optional year 25 (yy)
|
175
|
+
)
|
176
|
+
\b
|
177
|
+
)?
|
178
|
+
)
|
179
|
+
}ix
|
180
|
+
|
162
181
|
|
163
182
|
|
164
183
|
|
@@ -169,6 +188,7 @@ DATE_RE = Regexp.union(
|
|
169
188
|
DATE_I_RE,
|
170
189
|
DATE_II_RE,
|
171
190
|
DATE_III_RE,
|
191
|
+
DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
|
172
192
|
)
|
173
193
|
|
174
194
|
|
@@ -54,6 +54,146 @@ MINUTE_RE = %r{
|
|
54
54
|
}ix
|
55
55
|
|
56
56
|
|
57
|
+
#####
|
58
|
+
# player with minute (top-level) regex
|
59
|
+
# - starts new player/goal mode (until end of line)!!!
|
60
|
+
# - note: allow one or more spaces between name and minute
|
61
|
+
#
|
62
|
+
# note - aaa bbb 40'
|
63
|
+
# make sure anchor (^) - beginning of line - present!!!
|
64
|
+
# note - will NOT work with ^ anchor!!
|
65
|
+
# use special \G - Matches first matching position !!!!
|
66
|
+
# otherwise you get matches such as >bbb 40'< skipping >aaa< etc.!!!
|
67
|
+
#
|
68
|
+
# regex question - check if in an regex union - space regex gets matches
|
69
|
+
# or others with first matching position
|
70
|
+
# or if chars get eaten-up?
|
71
|
+
# let us know if \G is required here or not
|
72
|
+
|
73
|
+
|
74
|
+
PLAYER_WITH_MINUTE_RE = %r{
|
75
|
+
^ ### note - MUST start line; leading spaces optional (eat-up)
|
76
|
+
[ ]*
|
77
|
+
(?: # optional open bracket ([) -- remove later
|
78
|
+
(?<open_bracket> \[ )
|
79
|
+
[ ]*
|
80
|
+
)?
|
81
|
+
(?: # optional none a.k.a. -; - what todo here?
|
82
|
+
(?<none> - [ ]* ; [ ]* )
|
83
|
+
)?
|
84
|
+
(?<player_with_minute>
|
85
|
+
(?<name>
|
86
|
+
\p{L}+
|
87
|
+
\.? ## optional dot
|
88
|
+
|
89
|
+
(?:
|
90
|
+
## rule for space; only one single space allowed inline!!!
|
91
|
+
(?:
|
92
|
+
(?<![ ]) ## use negative lookbehind
|
93
|
+
[ ]
|
94
|
+
(?=\p{L}|') ## use lookahead
|
95
|
+
)
|
96
|
+
|
|
97
|
+
(?:
|
98
|
+
(?<=\p{L}) ## use lookbehind
|
99
|
+
['-] ## must be surrounded by letters
|
100
|
+
## e.g. One/Two NOT
|
101
|
+
## One/ Two or One / Two or One /Two etc.
|
102
|
+
(?=\p{L}) ## use lookahead
|
103
|
+
)
|
104
|
+
|
|
105
|
+
(?:
|
106
|
+
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
107
|
+
['] ## must be surrounded by leading space and
|
108
|
+
## traling letters (e.g. UDI 'Beter Bed)
|
109
|
+
(?=\p{L}) ## use lookahead
|
110
|
+
)
|
111
|
+
|
|
112
|
+
(?:
|
113
|
+
(?<=\p{L}) ## use lookbehind
|
114
|
+
['] ## must be surrounded by leading letter and
|
115
|
+
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
116
|
+
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
117
|
+
)
|
118
|
+
| ## standard case with letter(s) and optinal dot
|
119
|
+
(?: \p{L}+
|
120
|
+
\.? ## optional dot
|
121
|
+
)
|
122
|
+
)*
|
123
|
+
)
|
124
|
+
#### spaces
|
125
|
+
(?: [ ]+)
|
126
|
+
#### minute (see above)
|
127
|
+
##### use MINUTE_RE.source or such - for inline (reference) use? do not copy
|
128
|
+
(?<minute>
|
129
|
+
(?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
|
130
|
+
# todo - add more lookbehinds e.g. ,) etc. - why? why not?
|
131
|
+
(?:
|
132
|
+
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
133
|
+
(?: \+
|
134
|
+
(?<value2>\d{1,3})
|
135
|
+
)?
|
136
|
+
|
|
137
|
+
(?<value> \?{2} | _{2} ) ## add support for n/a (not/available)
|
138
|
+
)
|
139
|
+
' ## must have minute marker!!!!
|
140
|
+
)
|
141
|
+
|
142
|
+
)
|
143
|
+
}ix
|
144
|
+
|
145
|
+
|
146
|
+
PLAYER_WITH_SCORE_RE = %r{
|
147
|
+
^ ### note - MUST start line; leading spaces optional (eat-up)
|
148
|
+
[ ]*
|
149
|
+
(?<player_with_score>
|
150
|
+
(?<score>
|
151
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
152
|
+
)
|
153
|
+
[ ]+
|
154
|
+
(?<name>
|
155
|
+
\p{L}+
|
156
|
+
\.? ## optional dot
|
157
|
+
|
158
|
+
(?:
|
159
|
+
## rule for space; only one single space allowed inline!!!
|
160
|
+
(?:
|
161
|
+
(?<![ ]) ## use negative lookbehind
|
162
|
+
[ ]
|
163
|
+
(?=\p{L}|') ## use lookahead
|
164
|
+
)
|
165
|
+
|
|
166
|
+
(?:
|
167
|
+
(?<=\p{L}) ## use lookbehind
|
168
|
+
['-] ## must be surrounded by letters
|
169
|
+
## e.g. One/Two NOT
|
170
|
+
## One/ Two or One / Two or One /Two etc.
|
171
|
+
(?=\p{L}) ## use lookahead
|
172
|
+
)
|
173
|
+
|
|
174
|
+
(?:
|
175
|
+
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
176
|
+
['] ## must be surrounded by leading space and
|
177
|
+
## traling letters (e.g. UDI 'Beter Bed)
|
178
|
+
(?=\p{L}) ## use lookahead
|
179
|
+
)
|
180
|
+
|
|
181
|
+
(?:
|
182
|
+
(?<=\p{L}) ## use lookbehind
|
183
|
+
['] ## must be surrounded by leading letter and
|
184
|
+
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
185
|
+
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
186
|
+
)
|
187
|
+
| ## standard case with letter(s) and optinal dot
|
188
|
+
(?: \p{L}+
|
189
|
+
\.? ## optional dot
|
190
|
+
)
|
191
|
+
)*
|
192
|
+
) ## name
|
193
|
+
### check/todo - add lookahead (e.g. must be space or ,$) why? why not?
|
194
|
+
) ## player_with_score
|
195
|
+
}ix
|
196
|
+
|
57
197
|
|
58
198
|
|
59
199
|
end # module SportDb
|
@@ -19,18 +19,23 @@ class Lexer
|
|
19
19
|
|
20
20
|
|
21
21
|
## name different from text (does NOT allow number in name/text)
|
22
|
-
|
23
22
|
PROP_NAME_RE = %r{
|
24
|
-
(?<prop_name>
|
23
|
+
(?<prop_name>
|
24
|
+
\b
|
25
25
|
(?<name>
|
26
26
|
\p{L}+
|
27
27
|
\.? ## optional dot
|
28
|
-
(?:
|
29
|
-
[ ]? # only single spaces allowed inline!!!
|
30
28
|
(?:
|
29
|
+
## rule for space; only one single space allowed inline!!!
|
31
30
|
(?:
|
31
|
+
(?<![ ]) ## use negative lookbehind
|
32
|
+
[ ]
|
33
|
+
(?=\p{L}|') ## use lookahead
|
34
|
+
)
|
35
|
+
|
|
36
|
+
(?:
|
32
37
|
(?<=\p{L}) ## use lookbehind
|
33
|
-
[
|
38
|
+
['-] ## must be surrounded by letters
|
34
39
|
## e.g. One/Two NOT
|
35
40
|
## One/ Two or One / Two or One /Two etc.
|
36
41
|
(?=\p{L}) ## use lookahead
|
@@ -53,9 +58,8 @@ PROP_NAME_RE = %r{
|
|
53
58
|
(?: \p{L}+
|
54
59
|
\.? ## optional dot
|
55
60
|
)
|
56
|
-
)
|
57
|
-
|
58
|
-
)
|
61
|
+
)*
|
62
|
+
)
|
59
63
|
## add lookahead - must be non-alphanum
|
60
64
|
(?=[ ,;\]\)]|$)
|
61
65
|
)
|
@@ -83,10 +87,14 @@ PROP_NAME_RE = %r{
|
|
83
87
|
##
|
84
88
|
## todo/fix:
|
85
89
|
## check if St. Pölten works; with starting St. ???
|
90
|
+
##
|
91
|
+
## note - use special \G - Matches first matching position !!!!
|
86
92
|
|
87
93
|
|
88
94
|
PROP_KEY_RE = %r{
|
89
|
-
|
95
|
+
^ # note - MUST start line; leading spaces optional (eat-up)
|
96
|
+
[ ]*
|
97
|
+
(?<prop_key>
|
90
98
|
(?<key>
|
91
99
|
(?:\p{L}+
|
92
100
|
|
|
@@ -113,6 +121,35 @@ PROP_NAME_RE = %r{
|
|
113
121
|
|
114
122
|
|
115
123
|
|
124
|
+
################
|
125
|
+
## todo/check - use token for card short cuts?
|
126
|
+
## if m[:name] == 'Y'
|
127
|
+
## [:YELLOW_CARD, m[:name]]
|
128
|
+
## elsif m[:name] == 'R'
|
129
|
+
## [:RED_CARD, m[:name]]
|
130
|
+
## - [Y], [R], [Y/R] Yellow-Red Card
|
131
|
+
## check if minutes possible inside [Y 46']
|
132
|
+
## add [c] for captain too
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
### simple prop key for inline use e.g.
|
137
|
+
### Coach: or Trainer: or ... add more here later
|
138
|
+
|
139
|
+
PROP_KEY_INLINE_RE = %r{
|
140
|
+
\b
|
141
|
+
(?<prop_key> ## note: use prop_key (NOT prop_key_inline or such)
|
142
|
+
(?<key>
|
143
|
+
\p{L}+
|
144
|
+
)
|
145
|
+
## note - NO spaces allowed for key for now!!!
|
146
|
+
:
|
147
|
+
(?=[ ]+) ## possitive lookahead (must be followed by space!!)
|
148
|
+
)
|
149
|
+
}ix
|
150
|
+
|
151
|
+
|
152
|
+
|
116
153
|
PROP_BASICS_RE = %r{
|
117
154
|
(?<spaces> [ ]{2,}) |
|
118
155
|
(?<space> [ ])
|
@@ -125,9 +162,20 @@ PROP_BASICS_RE = %r{
|
|
125
162
|
PROP_RE = Regexp.union(
|
126
163
|
PROP_BASICS_RE,
|
127
164
|
MINUTE_RE,
|
165
|
+
PROP_KEY_INLINE_RE,
|
128
166
|
PROP_NAME_RE,
|
167
|
+
## todo/fix - add ANY_RE here too!!!
|
129
168
|
)
|
130
169
|
|
170
|
+
## note - no inline keys possible
|
171
|
+
## todo/fix - use custom (limited) prop basics too
|
172
|
+
PROP_CARDS_RE = Regexp.union(
|
173
|
+
PROP_BASICS_RE,
|
174
|
+
MINUTE_RE,
|
175
|
+
PROP_NAME_RE,
|
176
|
+
## todo/fix - add ANY_RE here too!!!
|
177
|
+
)
|
178
|
+
|
131
179
|
|
132
180
|
end # class Lexer
|
133
181
|
end # module SportDb
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -84,7 +84,8 @@ WDAY_RE = %r{
|
|
84
84
|
Sat|Sa|
|
85
85
|
Sun|Su
|
86
86
|
))
|
87
|
-
|
87
|
+
(?=[ ]{2}) # positive lookahead for two space
|
88
|
+
## todo/check - must be followed by two spaces or space + [( etc.
|
88
89
|
## to allow words starting with weekday abbrevations - why? why not?
|
89
90
|
## check if any names (teams, rounds, etc) come up in practice
|
90
91
|
## or maybe remove three letter abbrevations Mon/Tue
|
@@ -123,26 +124,62 @@ BASICS_RE = %r{
|
|
123
124
|
}ix
|
124
125
|
|
125
126
|
|
127
|
+
## general catch-all (RECOMMENDED (ALWAYS) use as last entry in union)
|
128
|
+
## to avoid advance of pos match!!!
|
129
|
+
ANY_RE = %r{
|
130
|
+
(?<any> .)
|
131
|
+
}ix
|
126
132
|
|
127
133
|
|
128
|
-
RE = Regexp.union(
|
134
|
+
RE = Regexp.union(
|
129
135
|
STATUS_RE,
|
130
136
|
NOTE_RE,
|
131
137
|
TIMEZONE_RE,
|
138
|
+
DURATION_RE, # note - duration MUST match before date
|
139
|
+
DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
|
132
140
|
TIME_RE,
|
133
|
-
DURATION_RE, # note - duration MUST match before date
|
134
|
-
DATE_RE,
|
135
141
|
SCORE_MORE_RE,
|
136
142
|
SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
|
137
143
|
BASICS_RE,
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
143
|
-
# note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
|
144
|
+
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
145
|
+
# note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
|
146
|
+
TEXT_RE,
|
147
|
+
ANY_RE,
|
144
148
|
)
|
145
149
|
|
146
150
|
|
151
|
+
|
152
|
+
######################################################
|
153
|
+
## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
|
154
|
+
|
155
|
+
GOAL_BASICS_RE = %r{
|
156
|
+
(?<spaces> [ ]{2,}) |
|
157
|
+
(?<space> [ ])
|
158
|
+
|
|
159
|
+
(?<sym>
|
160
|
+
[;,\[\]] ## add (-) dash too - why? why not?
|
161
|
+
)
|
162
|
+
}ix
|
163
|
+
|
164
|
+
|
165
|
+
GOAL_RE = Regexp.union(
|
166
|
+
GOAL_BASICS_RE,
|
167
|
+
MINUTE_RE,
|
168
|
+
MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
|
169
|
+
GOAL_OG_RE, GOAL_PEN_RE,
|
170
|
+
SCORE_RE,
|
171
|
+
PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
|
172
|
+
)
|
173
|
+
|
174
|
+
PROP_GOAL_RE = Regexp.union(
|
175
|
+
GOAL_BASICS_RE,
|
176
|
+
MINUTE_RE,
|
177
|
+
MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
|
178
|
+
GOAL_OG_RE, GOAL_PEN_RE,
|
179
|
+
SCORE_RE,
|
180
|
+
PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
|
181
|
+
)
|
182
|
+
|
183
|
+
|
147
184
|
end # class Lexer
|
148
185
|
end # module SportDb
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|