sportdb-parser 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +140 -17
- data/lib/sportdb/parser/parser.rb +414 -354
- data/lib/sportdb/parser/racc_tree.rb +24 -0
- data/lib/sportdb/parser/token-date.rb +20 -0
- data/lib/sportdb/parser/token-minute.rb +140 -0
- data/lib/sportdb/parser/token-prop.rb +17 -9
- data/lib/sportdb/parser/token.rb +39 -10
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
@@ -122,6 +122,30 @@ MatchLine = Struct.new( :ord, :date, :time, :wday,
|
|
122
122
|
|
123
123
|
end
|
124
124
|
|
125
|
+
## check - use a different name e.g. GoalLineScore or such - why? why not?
|
126
|
+
GoalLineAlt = Struct.new( :goals ) do
|
127
|
+
def pretty_print( printer )
|
128
|
+
printer.text( "<GoalLineAlt " )
|
129
|
+
printer.text( "goals=" + self.goals.pretty_inspect + ">" )
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
GoalAlt = Struct.new( :score, :player, :minute ) do
|
134
|
+
def to_s
|
135
|
+
buf = String.new
|
136
|
+
buf << "#{score} "
|
137
|
+
buf << "#{self.player}"
|
138
|
+
buf << " #{self.minute}" if self.minute
|
139
|
+
buf
|
140
|
+
end
|
141
|
+
|
142
|
+
def pretty_print( printer )
|
143
|
+
printer.text( to_s )
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
|
148
|
+
|
125
149
|
GoalLine = Struct.new( :goals1, :goals2 ) do
|
126
150
|
def pretty_print( printer )
|
127
151
|
printer.text( "<GoalLine " )
|
@@ -159,6 +159,25 @@ DATE_III_RE = %r{
|
|
159
159
|
\b
|
160
160
|
)}ix
|
161
161
|
|
162
|
+
## allow (short)"european" style 8.8.
|
163
|
+
## note - assume day/month!!!
|
164
|
+
DATE_IIII_RE = %r{
|
165
|
+
(?<date>
|
166
|
+
\b
|
167
|
+
(?<day>\d{1,2})
|
168
|
+
\.
|
169
|
+
(?<month>\d{1,2})
|
170
|
+
\.
|
171
|
+
(?: (?:
|
172
|
+
(?<year>\d{4}) ## optional year 2025 (yyyy)
|
173
|
+
|
|
174
|
+
(?<yy>\d{2}) ## optional year 25 (yy)
|
175
|
+
)
|
176
|
+
\b
|
177
|
+
)?
|
178
|
+
)
|
179
|
+
}ix
|
180
|
+
|
162
181
|
|
163
182
|
|
164
183
|
|
@@ -169,6 +188,7 @@ DATE_RE = Regexp.union(
|
|
169
188
|
DATE_I_RE,
|
170
189
|
DATE_II_RE,
|
171
190
|
DATE_III_RE,
|
191
|
+
DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
|
172
192
|
)
|
173
193
|
|
174
194
|
|
@@ -54,6 +54,146 @@ MINUTE_RE = %r{
|
|
54
54
|
}ix
|
55
55
|
|
56
56
|
|
57
|
+
#####
|
58
|
+
# player with minute (top-level) regex
|
59
|
+
# - starts new player/goal mode (until end of line)!!!
|
60
|
+
# - note: allow one or more spaces between name and minute
|
61
|
+
#
|
62
|
+
# note - aaa bbb 40'
|
63
|
+
# make sure anchor (^) - beginning of line - present!!!
|
64
|
+
# note - will NOT work with ^ anchor!!
|
65
|
+
# use special \G - Matches first matching position !!!!
|
66
|
+
# otherwise you get matches such as >bbb 40'< skipping >aaa< etc.!!!
|
67
|
+
#
|
68
|
+
# regex question - check if in an regex union - space regex gets matches
|
69
|
+
# or others with first matching position
|
70
|
+
# or if chars get eaten-up?
|
71
|
+
# let us know if \G is required here or not
|
72
|
+
|
73
|
+
|
74
|
+
PLAYER_WITH_MINUTE_RE = %r{
|
75
|
+
^ ### note - MUST start line; leading spaces optional (eat-up)
|
76
|
+
[ ]*
|
77
|
+
(?: # optional open bracket ([) -- remove later
|
78
|
+
(?<open_bracket> \[ )
|
79
|
+
[ ]*
|
80
|
+
)?
|
81
|
+
(?: # optional none a.k.a. -; - what todo here?
|
82
|
+
(?<none> - [ ]* ; [ ]* )
|
83
|
+
)?
|
84
|
+
(?<player_with_minute>
|
85
|
+
(?<name>
|
86
|
+
\p{L}+
|
87
|
+
\.? ## optional dot
|
88
|
+
|
89
|
+
(?:
|
90
|
+
## rule for space; only one single space allowed inline!!!
|
91
|
+
(?:
|
92
|
+
(?<![ ]) ## use negative lookbehind
|
93
|
+
[ ]
|
94
|
+
(?=\p{L}|') ## use lookahead
|
95
|
+
)
|
96
|
+
|
|
97
|
+
(?:
|
98
|
+
(?<=\p{L}) ## use lookbehind
|
99
|
+
['-] ## must be surrounded by letters
|
100
|
+
## e.g. One/Two NOT
|
101
|
+
## One/ Two or One / Two or One /Two etc.
|
102
|
+
(?=\p{L}) ## use lookahead
|
103
|
+
)
|
104
|
+
|
|
105
|
+
(?:
|
106
|
+
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
107
|
+
['] ## must be surrounded by leading space and
|
108
|
+
## traling letters (e.g. UDI 'Beter Bed)
|
109
|
+
(?=\p{L}) ## use lookahead
|
110
|
+
)
|
111
|
+
|
|
112
|
+
(?:
|
113
|
+
(?<=\p{L}) ## use lookbehind
|
114
|
+
['] ## must be surrounded by leading letter and
|
115
|
+
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
116
|
+
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
117
|
+
)
|
118
|
+
| ## standard case with letter(s) and optinal dot
|
119
|
+
(?: \p{L}+
|
120
|
+
\.? ## optional dot
|
121
|
+
)
|
122
|
+
)*
|
123
|
+
)
|
124
|
+
#### spaces
|
125
|
+
(?: [ ]+)
|
126
|
+
#### minute (see above)
|
127
|
+
##### use MINUTE_RE.source or such - for inline (reference) use? do not copy
|
128
|
+
(?<minute>
|
129
|
+
(?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
|
130
|
+
# todo - add more lookbehinds e.g. ,) etc. - why? why not?
|
131
|
+
(?:
|
132
|
+
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
133
|
+
(?: \+
|
134
|
+
(?<value2>\d{1,3})
|
135
|
+
)?
|
136
|
+
|
|
137
|
+
(?<value> \?{2} | _{2} ) ## add support for n/a (not/available)
|
138
|
+
)
|
139
|
+
' ## must have minute marker!!!!
|
140
|
+
)
|
141
|
+
|
142
|
+
)
|
143
|
+
}ix
|
144
|
+
|
145
|
+
|
146
|
+
PLAYER_WITH_SCORE_RE = %r{
|
147
|
+
^ ### note - MUST start line; leading spaces optional (eat-up)
|
148
|
+
[ ]*
|
149
|
+
(?<player_with_score>
|
150
|
+
(?<score>
|
151
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
152
|
+
)
|
153
|
+
[ ]+
|
154
|
+
(?<name>
|
155
|
+
\p{L}+
|
156
|
+
\.? ## optional dot
|
157
|
+
|
158
|
+
(?:
|
159
|
+
## rule for space; only one single space allowed inline!!!
|
160
|
+
(?:
|
161
|
+
(?<![ ]) ## use negative lookbehind
|
162
|
+
[ ]
|
163
|
+
(?=\p{L}|') ## use lookahead
|
164
|
+
)
|
165
|
+
|
|
166
|
+
(?:
|
167
|
+
(?<=\p{L}) ## use lookbehind
|
168
|
+
['-] ## must be surrounded by letters
|
169
|
+
## e.g. One/Two NOT
|
170
|
+
## One/ Two or One / Two or One /Two etc.
|
171
|
+
(?=\p{L}) ## use lookahead
|
172
|
+
)
|
173
|
+
|
|
174
|
+
(?:
|
175
|
+
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
176
|
+
['] ## must be surrounded by leading space and
|
177
|
+
## traling letters (e.g. UDI 'Beter Bed)
|
178
|
+
(?=\p{L}) ## use lookahead
|
179
|
+
)
|
180
|
+
|
|
181
|
+
(?:
|
182
|
+
(?<=\p{L}) ## use lookbehind
|
183
|
+
['] ## must be surrounded by leading letter and
|
184
|
+
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
185
|
+
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
186
|
+
)
|
187
|
+
| ## standard case with letter(s) and optinal dot
|
188
|
+
(?: \p{L}+
|
189
|
+
\.? ## optional dot
|
190
|
+
)
|
191
|
+
)*
|
192
|
+
) ## name
|
193
|
+
### check/todo - add lookahead (e.g. must be space or ,$) why? why not?
|
194
|
+
) ## player_with_score
|
195
|
+
}ix
|
196
|
+
|
57
197
|
|
58
198
|
|
59
199
|
end # module SportDb
|
@@ -19,18 +19,23 @@ class Lexer
|
|
19
19
|
|
20
20
|
|
21
21
|
## name different from text (does NOT allow number in name/text)
|
22
|
-
|
23
22
|
PROP_NAME_RE = %r{
|
24
|
-
(?<prop_name>
|
23
|
+
(?<prop_name>
|
24
|
+
\b
|
25
25
|
(?<name>
|
26
26
|
\p{L}+
|
27
27
|
\.? ## optional dot
|
28
|
-
(?:
|
29
|
-
[ ]? # only single spaces allowed inline!!!
|
30
28
|
(?:
|
29
|
+
## rule for space; only one single space allowed inline!!!
|
31
30
|
(?:
|
31
|
+
(?<![ ]) ## use negative lookbehind
|
32
|
+
[ ]
|
33
|
+
(?=\p{L}|') ## use lookahead
|
34
|
+
)
|
35
|
+
|
|
36
|
+
(?:
|
32
37
|
(?<=\p{L}) ## use lookbehind
|
33
|
-
[
|
38
|
+
['-] ## must be surrounded by letters
|
34
39
|
## e.g. One/Two NOT
|
35
40
|
## One/ Two or One / Two or One /Two etc.
|
36
41
|
(?=\p{L}) ## use lookahead
|
@@ -53,9 +58,8 @@ PROP_NAME_RE = %r{
|
|
53
58
|
(?: \p{L}+
|
54
59
|
\.? ## optional dot
|
55
60
|
)
|
56
|
-
)
|
57
|
-
|
58
|
-
)
|
61
|
+
)*
|
62
|
+
)
|
59
63
|
## add lookahead - must be non-alphanum
|
60
64
|
(?=[ ,;\]\)]|$)
|
61
65
|
)
|
@@ -83,10 +87,14 @@ PROP_NAME_RE = %r{
|
|
83
87
|
##
|
84
88
|
## todo/fix:
|
85
89
|
## check if St. Pölten works; with starting St. ???
|
90
|
+
##
|
91
|
+
## note - use special \G - Matches first matching position !!!!
|
86
92
|
|
87
93
|
|
88
94
|
PROP_KEY_RE = %r{
|
89
|
-
|
95
|
+
^ # note - MUST start line; leading spaces optional (eat-up)
|
96
|
+
[ ]*
|
97
|
+
(?<prop_key>
|
90
98
|
(?<key>
|
91
99
|
(?:\p{L}+
|
92
100
|
|
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -84,7 +84,8 @@ WDAY_RE = %r{
|
|
84
84
|
Sat|Sa|
|
85
85
|
Sun|Su
|
86
86
|
))
|
87
|
-
|
87
|
+
(?=[ ]{2}) # positive lookahead for two space
|
88
|
+
## todo/check - must be followed by two spaces or space + [( etc.
|
88
89
|
## to allow words starting with weekday abbrevations - why? why not?
|
89
90
|
## check if any names (teams, rounds, etc) come up in practice
|
90
91
|
## or maybe remove three letter abbrevations Mon/Tue
|
@@ -123,26 +124,54 @@ BASICS_RE = %r{
|
|
123
124
|
}ix
|
124
125
|
|
125
126
|
|
127
|
+
## general catch-all (RECOMMENDED (ALWAYS) use as last entry in union)
|
128
|
+
## to avoid advance of pos match!!!
|
129
|
+
ANY_RE = %r{
|
130
|
+
(?<any> .)
|
131
|
+
}ix
|
126
132
|
|
127
133
|
|
128
|
-
RE = Regexp.union(
|
134
|
+
RE = Regexp.union(
|
129
135
|
STATUS_RE,
|
130
136
|
NOTE_RE,
|
131
137
|
TIMEZONE_RE,
|
138
|
+
DURATION_RE, # note - duration MUST match before date
|
139
|
+
DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
|
132
140
|
TIME_RE,
|
133
|
-
DURATION_RE, # note - duration MUST match before date
|
134
|
-
DATE_RE,
|
135
141
|
SCORE_MORE_RE,
|
136
142
|
SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
|
137
143
|
BASICS_RE,
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
143
|
-
# note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
|
144
|
+
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
145
|
+
# note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
|
146
|
+
TEXT_RE,
|
147
|
+
ANY_RE,
|
144
148
|
)
|
145
149
|
|
146
150
|
|
151
|
+
|
152
|
+
######################################################
|
153
|
+
## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
|
154
|
+
|
155
|
+
GOAL_BASICS_RE = %r{
|
156
|
+
(?<spaces> [ ]{2,}) |
|
157
|
+
(?<space> [ ])
|
158
|
+
|
|
159
|
+
(?<sym>
|
160
|
+
[;,\[\]] ## add (-) dash too - why? why not?
|
161
|
+
)
|
162
|
+
}ix
|
163
|
+
|
164
|
+
|
165
|
+
GOAL_RE = Regexp.union(
|
166
|
+
GOAL_BASICS_RE,
|
167
|
+
MINUTE_RE,
|
168
|
+
MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
|
169
|
+
GOAL_OG_RE, GOAL_PEN_RE,
|
170
|
+
SCORE_RE,
|
171
|
+
PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
|
172
|
+
)
|
173
|
+
|
174
|
+
|
175
|
+
|
147
176
|
end # class Lexer
|
148
177
|
end # module SportDb
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|