sportdb-parser 0.5.9 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +2 -0
- data/lib/sportdb/parser/lexer.rb +47 -28
- data/lib/sportdb/parser/parser.rb +421 -344
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +12 -5
- data/lib/sportdb/parser/token-date.rb +18 -1
- data/lib/sportdb/parser/token-minute.rb +45 -0
- data/lib/sportdb/parser/token-prop.rb +133 -0
- data/lib/sportdb/parser/token-text.rb +9 -2
- data/lib/sportdb/parser/token.rb +43 -177
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +2 -0
- metadata +4 -2
@@ -14,7 +14,7 @@ def initialize( txt, debug: false )
|
|
14
14
|
|
15
15
|
### todo:
|
16
16
|
## - pass along debug flag
|
17
|
-
lexer = SportDb::Lexer.new( txt )
|
17
|
+
lexer = SportDb::Lexer.new( txt, debug: debug )
|
18
18
|
## note - use tokenize_with_errors and add/collect tokenize errors
|
19
19
|
@tokens, @errors = lexer.tokenize_with_errors
|
20
20
|
## pp @tokens
|
@@ -71,10 +71,14 @@ RoundDef = Struct.new( :name, :date, :duration ) do
|
|
71
71
|
end
|
72
72
|
end
|
73
73
|
|
74
|
-
DateHeader = Struct.new( :date ) do
|
74
|
+
DateHeader = Struct.new( :date, :time, :geo, :timezone ) do
|
75
75
|
def pretty_print( printer )
|
76
76
|
printer.text( "<DateHeader " )
|
77
|
-
printer.text( "#{self.date.pretty_inspect}
|
77
|
+
printer.text( "#{self.date.pretty_inspect}" )
|
78
|
+
printer.text( " time=#{self.time.pretty_inspect}" ) if self.time
|
79
|
+
printer.text( " geo=#{self.geo.pretty_inspect}" ) if self.geo
|
80
|
+
printer.text( " timezone=#{self.timezone}") if self.timezone
|
81
|
+
printer.text( ">")
|
78
82
|
end
|
79
83
|
end
|
80
84
|
|
@@ -85,14 +89,17 @@ GroupHeader = Struct.new( :name ) do
|
|
85
89
|
end
|
86
90
|
end
|
87
91
|
|
88
|
-
RoundHeader = Struct.new( :names ) do
|
92
|
+
RoundHeader = Struct.new( :names, :group ) do
|
89
93
|
def pretty_print( printer )
|
90
94
|
printer.text( "<RoundHeader " )
|
91
|
-
printer.text( "#{self.names.join(', ')}
|
95
|
+
printer.text( "#{self.names.join(', ')}" )
|
96
|
+
printer.text( " group=#{self.group}") if self.group
|
97
|
+
printer.text( ">" )
|
92
98
|
end
|
93
99
|
end
|
94
100
|
|
95
|
-
|
101
|
+
|
102
|
+
MatchLine = Struct.new( :ord, :date, :time, :wday,
|
96
103
|
:team1, :team2, :score,
|
97
104
|
:status,
|
98
105
|
:geo,
|
@@ -146,12 +146,29 @@ DATE_II_RE = %r{
|
|
146
146
|
)}ix
|
147
147
|
|
148
148
|
|
149
|
+
# e.g. iso-date - 2011-08-25
|
150
|
+
## todo/check - allow 2011-8-25 or 2011-8-3 / 2011-08-03 etc. - why? why not?
|
151
|
+
DATE_III_RE = %r{
|
152
|
+
(?<date>
|
153
|
+
\b
|
154
|
+
(?<year>\d{4})
|
155
|
+
-
|
156
|
+
(?<month>\d{2})
|
157
|
+
-
|
158
|
+
(?<day>\d{2})
|
159
|
+
\b
|
160
|
+
)}ix
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
|
149
165
|
#############################################
|
150
166
|
# map tables
|
151
167
|
# note: order matters; first come-first matched/served
|
152
168
|
DATE_RE = Regexp.union(
|
153
169
|
DATE_I_RE,
|
154
|
-
DATE_II_RE
|
170
|
+
DATE_II_RE,
|
171
|
+
DATE_III_RE,
|
155
172
|
)
|
156
173
|
|
157
174
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
|
2
|
+
module SportDb
|
3
|
+
class Lexer
|
4
|
+
|
5
|
+
#
|
6
|
+
# todo/check - move goal type regexes to goal or somewhere else?
|
7
|
+
#
|
8
|
+
|
9
|
+
## goal types
|
10
|
+
# (pen.) or (pen) or (p.) or (p)
|
11
|
+
## (o.g.) or (og)
|
12
|
+
## todo/check - keep case-insensitive
|
13
|
+
## or allow OG or P or PEN or
|
14
|
+
## only lower case - why? why not?
|
15
|
+
GOAL_PEN_RE = %r{
|
16
|
+
(?<pen> \(
|
17
|
+
(?:pen|p)\.?
|
18
|
+
\)
|
19
|
+
)
|
20
|
+
}ix
|
21
|
+
GOAL_OG_RE = %r{
|
22
|
+
(?<og> \(
|
23
|
+
(?:og|o\.g\.)
|
24
|
+
\)
|
25
|
+
)
|
26
|
+
}ix
|
27
|
+
|
28
|
+
|
29
|
+
MINUTE_RE = %r{
|
30
|
+
(?<minute>
|
31
|
+
(?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
|
32
|
+
# todo - add more lookbehinds e.g. ,) etc. - why? why not?
|
33
|
+
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
34
|
+
(?: \+
|
35
|
+
(?<value2>\d{1,3})
|
36
|
+
)?
|
37
|
+
' ## must have minute marker!!!!
|
38
|
+
)
|
39
|
+
}ix
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
end # module SportDb
|
45
|
+
end # class Lexer
|
@@ -0,0 +1,133 @@
|
|
1
|
+
###
|
2
|
+
## team prop mode e.g.
|
3
|
+
##
|
4
|
+
##
|
5
|
+
## Fri Jun 14 21:00 @ München Fußball Arena, München
|
6
|
+
## (1) Germany v Scotland 5-1 (3-0)
|
7
|
+
## Wirtz 10' Musiala 19' Havertz 45+1' (pen.) Füllkrug 68' Can 90+3'; Rüdiger 87' (o.g.)
|
8
|
+
##
|
9
|
+
## Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt - Andrich [Y] (Groß 46'),
|
10
|
+
## Kroos (Can 80') - Musiala (Müller 74'), Gündogan, Wirtz (Sane 63') -
|
11
|
+
## Havertz (Füllkrug 63')
|
12
|
+
## Scotland: Gunn - Porteous [R 44'], Hendry, Tierney (McKenna 78') - Ralston [Y],
|
13
|
+
## McTominay, McGregor (Gilmour 67'), Robertson - Christie (Shankland 82'),
|
14
|
+
## Adams (Hanley 46'), McGinn (McLean 67')
|
15
|
+
|
16
|
+
|
17
|
+
module SportDb
|
18
|
+
class Lexer
|
19
|
+
|
20
|
+
|
21
|
+
## name different from text (does NOT allow number in name/text)
|
22
|
+
|
23
|
+
PROP_NAME_RE = %r{
|
24
|
+
(?<prop_name> \b
|
25
|
+
(?<name>
|
26
|
+
\p{L}+
|
27
|
+
\.? ## optional dot
|
28
|
+
(?:
|
29
|
+
[ ]? # only single spaces allowed inline!!!
|
30
|
+
(?:
|
31
|
+
(?:
|
32
|
+
(?<=\p{L}) ## use lookbehind
|
33
|
+
[/'-] ## must be surrounded by letters
|
34
|
+
## e.g. One/Two NOT
|
35
|
+
## One/ Two or One / Two or One /Two etc.
|
36
|
+
(?=\p{L}) ## use lookahead
|
37
|
+
)
|
38
|
+
|
|
39
|
+
(?:
|
40
|
+
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
41
|
+
['] ## must be surrounded by leading space and
|
42
|
+
## traling letters (e.g. UDI 'Beter Bed)
|
43
|
+
(?=\p{L}) ## use lookahead
|
44
|
+
)
|
45
|
+
|
|
46
|
+
(?:
|
47
|
+
(?<=\p{L}) ## use lookbehind
|
48
|
+
['] ## must be surrounded by leading letter and
|
49
|
+
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
50
|
+
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
51
|
+
)
|
52
|
+
| ## standard case with letter(s) and optinal dot
|
53
|
+
(?: \p{L}+
|
54
|
+
\.? ## optional dot
|
55
|
+
)
|
56
|
+
)+
|
57
|
+
)*
|
58
|
+
)
|
59
|
+
## add lookahead - must be non-alphanum
|
60
|
+
(?=[ ,;\]\)]|$)
|
61
|
+
)
|
62
|
+
}ix
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
|
67
|
+
##############
|
68
|
+
# add support for props/ attributes e.g.
|
69
|
+
#
|
70
|
+
# Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt - Andrich [Y] (46' Groß),
|
71
|
+
# Kroos (80' Can) - Musiala (74' Müller), Gündogan,
|
72
|
+
# Wirtz (63' Sane) - Havertz (63' Füllkrug)
|
73
|
+
# Scotland: Gunn - Porteous [R 44'], Hendry, Tierney (78' McKenna) - Ralston [Y],
|
74
|
+
# McTominay, McGregor (67' Gilmour), Robertson - Christie (82' Shankland),
|
75
|
+
# Adams (46' Hanley), McGinn (67' McLean)
|
76
|
+
#
|
77
|
+
## note: colon (:) MUST be followed by one (or more) spaces
|
78
|
+
## make sure mon feb 12 18:10 will not match
|
79
|
+
## allow 1. FC Köln etc.
|
80
|
+
## Mainz 05:
|
81
|
+
## limit to 30 chars max
|
82
|
+
## only allow chars incl. intl but (NOT ()[]/;)
|
83
|
+
##
|
84
|
+
## todo/fix:
|
85
|
+
## check if St. Pölten works; with starting St. ???
|
86
|
+
|
87
|
+
|
88
|
+
PROP_KEY_RE = %r{
|
89
|
+
(?<prop_key> \b
|
90
|
+
(?<key>
|
91
|
+
(?:\p{L}+
|
92
|
+
|
|
93
|
+
\d+ # check for num lookahead (MUST be space or dot)
|
94
|
+
## MUST be followed by (optional dot) and
|
95
|
+
## required space !!!
|
96
|
+
## MUST be follow by a to z!!!!
|
97
|
+
\.? ## optional dot
|
98
|
+
[ ]? ## make space optional too - why? why not?
|
99
|
+
## yes - eg. 1st, 2nd, 5th etc.
|
100
|
+
\p{L}+
|
101
|
+
)
|
102
|
+
[\d\p{L}'/° -]*? ## allow almost anyting
|
103
|
+
## fix - add negative lookahead
|
104
|
+
## no space and dash etc.
|
105
|
+
## only allowed "inline" not at the end
|
106
|
+
## must end with latter or digit!
|
107
|
+
)
|
108
|
+
[ ]*? # slurp trailing spaces
|
109
|
+
:
|
110
|
+
(?=[ ]+) ## possitive lookahead (must be followed by space!!)
|
111
|
+
)
|
112
|
+
}ix
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
PROP_BASICS_RE = %r{
|
117
|
+
(?<spaces> [ ]{2,}) |
|
118
|
+
(?<space> [ ])
|
119
|
+
|
|
120
|
+
(?<sym>
|
121
|
+
[;,\(\)\[\]-]
|
122
|
+
)
|
123
|
+
}ix
|
124
|
+
|
125
|
+
PROP_RE = Regexp.union(
|
126
|
+
PROP_BASICS_RE,
|
127
|
+
MINUTE_RE,
|
128
|
+
PROP_NAME_RE,
|
129
|
+
)
|
130
|
+
|
131
|
+
|
132
|
+
end # class Lexer
|
133
|
+
end # module SportDb
|
@@ -24,6 +24,13 @@ class Lexer
|
|
24
24
|
# allow Cote'd Ivoir or such
|
25
25
|
## e.g. add '
|
26
26
|
|
27
|
+
## note:
|
28
|
+
## make sure these do NOT match!!!
|
29
|
+
## TEXT => "Matchday 1 / Group A"
|
30
|
+
## TEXT => "Matchday 2 / Group A"
|
31
|
+
## TEXT => "Matchday 3 / Group A"
|
32
|
+
|
33
|
+
|
27
34
|
|
28
35
|
TEXT_RE = %r{
|
29
36
|
## must start with alpha (allow unicode letters!!)
|
@@ -59,11 +66,11 @@ TEXT_RE = %r{
|
|
59
66
|
## AND switch to case-sensitive (via -i!!!)
|
60
67
|
)
|
61
68
|
| # only single spaces allowed inline!!!
|
62
|
-
[
|
69
|
+
[-/]
|
63
70
|
)?
|
64
71
|
(?:
|
65
72
|
\p{L} |
|
66
|
-
[
|
73
|
+
[&'°]
|
67
74
|
|
|
68
75
|
(?:
|
69
76
|
\d+
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -7,13 +7,14 @@ class Lexer
|
|
7
7
|
##
|
8
8
|
# keep 18h30 - why? why not?
|
9
9
|
# add support for 6:30pm 8:20am etc. - why? why not?
|
10
|
-
|
10
|
+
#
|
11
|
+
# check - only support h e.g. 18h30 or 18H30 too - why? why not?
|
12
|
+
# e.g. 18.30 (or 18:30 or 18h30)
|
11
13
|
TIME_RE = %r{
|
12
|
-
## e.g. 18.30 (or 18:30 or 18h30)
|
13
14
|
(?<time> \b
|
14
|
-
|
15
|
+
(?: (?<hour>\d{1,2})
|
15
16
|
(?: :|\.|h )
|
16
|
-
(?<minute>\d{2})
|
17
|
+
(?<minute>\d{2}))
|
17
18
|
\b
|
18
19
|
)
|
19
20
|
}ix
|
@@ -42,9 +43,12 @@ TIME_RE = %r{
|
|
42
43
|
# https://en.wikipedia.org/wiki/Time_zone
|
43
44
|
# https://en.wikipedia.org/wiki/List_of_UTC_offsets
|
44
45
|
# https://en.wikipedia.org/wiki/UTC−04:00 etc.
|
45
|
-
|
46
|
+
#
|
47
|
+
# e.g. (UTC-2) or (CEST/UTC-2) etc.
|
48
|
+
# todo check - only allow upcase
|
49
|
+
# or (utc-2) and (cest/utc-2) too - why? why not?
|
50
|
+
|
46
51
|
TIMEZONE_RE = %r{
|
47
|
-
## e.g. (UTC-2) or (CEST/UTC-2) etc.
|
48
52
|
(?<timezone>
|
49
53
|
\(
|
50
54
|
## optional "local" timezone name eg. BRT or CEST etc.
|
@@ -60,6 +64,35 @@ TIMEZONE_RE = %r{
|
|
60
64
|
|
61
65
|
|
62
66
|
|
67
|
+
## add wday / stand-alone week day - as separate regex or
|
68
|
+
## use TEXT with is_wday? check or such with
|
69
|
+
## requirement of beginning of line (anchored to line) only??
|
70
|
+
## - why? why not?
|
71
|
+
|
72
|
+
WDAY_RE = %r{
|
73
|
+
(?<wday>
|
74
|
+
\b # note - alternation (|) is lowest precedence (such
|
75
|
+
# parathenes required around \b()\b !!!
|
76
|
+
## note - NOT case sensitive!!!
|
77
|
+
(?<day_name>
|
78
|
+
(?-i:
|
79
|
+
Mon|Mo|
|
80
|
+
Tue|Tu|
|
81
|
+
Wed|We|
|
82
|
+
Thu|Th|
|
83
|
+
Fri|Fr|
|
84
|
+
Sat|Sa|
|
85
|
+
Sun|Su
|
86
|
+
))
|
87
|
+
\b ## todo/check - must be followed by two spaces or space + [( etc.
|
88
|
+
## to allow words starting with weekday abbrevations - why? why not?
|
89
|
+
## check if any names (teams, rounds, etc) come up in practice
|
90
|
+
## or maybe remove three letter abbrevations Mon/Tue
|
91
|
+
## and keep only Mo/Tu/We etc. - why? why not?
|
92
|
+
)}x
|
93
|
+
|
94
|
+
|
95
|
+
|
63
96
|
|
64
97
|
BASICS_RE = %r{
|
65
98
|
## e.g. (51) or (1) etc. - limit digits of number???
|
@@ -78,189 +111,22 @@ BASICS_RE = %r{
|
|
78
111
|
(?<spaces> [ ]{2,}) |
|
79
112
|
(?<space> [ ])
|
80
113
|
|
|
81
|
-
(?<sym>[
|
82
|
-
}ix
|
83
|
-
|
84
|
-
|
85
|
-
## removed from basics
|
86
|
-
=begin
|
87
|
-
(?<none>
|
88
|
-
(?<=[ \[]|^) # Positive lookbehind for space or [
|
89
|
-
-
|
90
|
-
(?=[ ]*;) # positive lookahead for space
|
91
|
-
)
|
92
|
-
|
|
93
|
-
(?<vs>
|
94
|
-
(?<=[ ]) # Positive lookbehind for space
|
95
|
-
(?:
|
96
|
-
vs\.?| ## allow optional dot (eg. vs. v.)
|
97
|
-
v\.?|
|
98
|
-
-
|
99
|
-
) # not bigger match first e.g. vs than v etc.
|
100
|
-
(?=[ ]) # positive lookahead for space
|
101
|
-
)
|
102
|
-
|
|
103
|
-
|
104
|
-
make - into a simple symbol !!!
|
105
|
-
=end
|
106
|
-
|
107
|
-
|
108
|
-
MINUTE_RE = %r{
|
109
|
-
(?<minute>
|
110
|
-
(?<=[ (]) # Positive lookbehind for space or opening ( e.g. (61') required
|
111
|
-
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
112
|
-
(?: \+
|
113
|
-
(?<value2>\d{1,3})
|
114
|
-
)?
|
115
|
-
' ## must have minute marker!!!!
|
116
|
-
)
|
117
|
-
}ix
|
118
|
-
|
119
|
-
|
120
|
-
## goal types
|
121
|
-
# (pen.) or (pen) or (p.) or (p)
|
122
|
-
## (o.g.) or (og)
|
123
|
-
GOAL_PEN_RE = %r{
|
124
|
-
(?<pen> \(
|
125
|
-
(?:pen|p)\.?
|
126
|
-
\)
|
127
|
-
)
|
128
|
-
}ix
|
129
|
-
GOAL_OG_RE = %r{
|
130
|
-
(?<og> \(
|
131
|
-
(?:og|o\.g\.)
|
132
|
-
\)
|
133
|
-
)
|
114
|
+
(?<sym>[;,/@|\[\]-])
|
134
115
|
}ix
|
135
116
|
|
136
117
|
|
137
118
|
|
138
119
|
|
139
|
-
|
140
|
-
|
141
|
-
PROP_BASICS_RE = %r{
|
142
|
-
(?<spaces> [ ]{2,}) |
|
143
|
-
(?<space> [ ])
|
144
|
-
|
|
145
|
-
(?<sym>[.;,\(\)\[\]-]) ## note - dot (.) is the (all-important) end-of-prop marker!!!
|
146
|
-
}ix
|
147
|
-
|
148
|
-
|
149
|
-
## name different from text (does not allow number in name/text)
|
150
|
-
##
|
151
|
-
## note - includes special handling for dot (.) if at the end of line!!!
|
152
|
-
## end-of-line dot (.) is the prop end-of-marker - do NOT eat-up!!!
|
153
|
-
|
154
|
-
PROP_NAME_RE = %r{
|
155
|
-
(?<prop_name> \b
|
156
|
-
(?<name>
|
157
|
-
\p{L}+
|
158
|
-
(?: \. (?: (?![ ]*$) )
|
159
|
-
)? ## edge case - check for end of prop marker! (e.g. Stop.)
|
160
|
-
(?:
|
161
|
-
[ ]? # only single spaces allowed inline!!!
|
162
|
-
(?:
|
163
|
-
(?:
|
164
|
-
(?<=\p{L}) ## use lookbehind
|
165
|
-
[/'-] ## must be surrounded by letters
|
166
|
-
## e.g. One/Two NOT
|
167
|
-
## One/ Two or One / Two or One /Two etc.
|
168
|
-
(?=\p{L}) ## use lookahead
|
169
|
-
)
|
170
|
-
|
|
171
|
-
(?:
|
172
|
-
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
173
|
-
['] ## must be surrounded by leading space and
|
174
|
-
## traling letters (e.g. UDI 'Beter Bed)
|
175
|
-
(?=\p{L}) ## use lookahead
|
176
|
-
)
|
177
|
-
|
|
178
|
-
(?:
|
179
|
-
(?<=\p{L}) ## use lookbehind
|
180
|
-
['] ## must be surrounded by leading letter and
|
181
|
-
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
182
|
-
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
183
|
-
)
|
184
|
-
|
|
185
|
-
(?: \p{L}+
|
186
|
-
(?: \.
|
187
|
-
(?: (?![ ]*$) )
|
188
|
-
)? ## last dot is delimiter!!!
|
189
|
-
)
|
190
|
-
)+
|
191
|
-
)*
|
192
|
-
)
|
193
|
-
## add lookahead - must be non-alphanum (or dot)
|
194
|
-
(?=[ .,;\]\)]|$)
|
195
|
-
)
|
196
|
-
}ix
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
##############
|
202
|
-
# add support for props/ attributes e.g.
|
203
|
-
#
|
204
|
-
# Germany: Neuer - Kimmich, Rüdiger, Tah [Y], Mittelstädt – Andrich [Y] (46' Groß),
|
205
|
-
# Kroos (80' Can) – Musiala (74' Müller), Gündogan,
|
206
|
-
# Wirtz (63' Sane) – Havertz (63' Füllkrug).
|
207
|
-
# Scotland: Gunn – Porteous [R 44'], Hendry, Tierney (78' McKenna) – Ralston [Y],
|
208
|
-
# McTominay, McGregor (67' Gilmour), Robertson – Christie (82' Shankland),
|
209
|
-
# Adams (46' Hanley), McGinn (67' McLean).
|
210
|
-
#
|
211
|
-
## note: colon (:) MUST be followed by one (or more) spaces
|
212
|
-
## make sure mon feb 12 18:10 will not match
|
213
|
-
## allow 1. FC Köln etc.
|
214
|
-
## Mainz 05:
|
215
|
-
## limit to 30 chars max
|
216
|
-
## only allow chars incl. intl but (NOT ()[]/;)
|
217
|
-
|
218
|
-
|
219
|
-
PROP_KEY_RE = %r{
|
220
|
-
(?<prop_key> \b
|
221
|
-
(?<key>
|
222
|
-
(?:\p{L}+
|
223
|
-
|
|
224
|
-
\d+ # check for num lookahead (MUST be space or dot)
|
225
|
-
## MUST be followed by (optional dot) and
|
226
|
-
## required space !!!
|
227
|
-
## MUST be follow by a to z!!!!
|
228
|
-
\.? ## optional dot
|
229
|
-
[ ]? ## make space optional too - why? why not?
|
230
|
-
## yes - eg. 1st, 2nd, 5th etc.
|
231
|
-
\p{L}+
|
232
|
-
)
|
233
|
-
[\d\p{L}'/° -]*? ## allow almost anyting
|
234
|
-
## fix - add negative lookahead
|
235
|
-
## no space and dash etc.
|
236
|
-
## only allowed "inline" not at the end
|
237
|
-
## must end with latter or digit!
|
238
|
-
)
|
239
|
-
[ ]*? # slurp trailing spaces
|
240
|
-
:
|
241
|
-
(?=[ ]+) ## possitive lookahead (must be followed by space!!)
|
242
|
-
)
|
243
|
-
}ix
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
PROP_RE = Regexp.union(
|
249
|
-
PROP_BASICS_RE,
|
250
|
-
MINUTE_RE,
|
251
|
-
PROP_NAME_RE,
|
252
|
-
)
|
253
|
-
|
254
|
-
|
255
|
-
|
256
120
|
RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should switch into prop mode!!!)
|
257
121
|
STATUS_RE,
|
258
122
|
TIMEZONE_RE,
|
259
123
|
TIME_RE,
|
260
124
|
DURATION_RE, # note - duration MUST match before date
|
261
125
|
DATE_RE,
|
126
|
+
WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
|
262
127
|
SCORE_RE,
|
263
|
-
BASICS_RE,
|
128
|
+
BASICS_RE,
|
129
|
+
MINUTE_RE,
|
264
130
|
GOAL_OG_RE, GOAL_PEN_RE,
|
265
131
|
TEXT_RE )
|
266
132
|
|
data/lib/sportdb/parser.rb
CHANGED
@@ -21,6 +21,8 @@ require_relative 'parser/token-score'
|
|
21
21
|
require_relative 'parser/token-date'
|
22
22
|
require_relative 'parser/token-text'
|
23
23
|
require_relative 'parser/token-status'
|
24
|
+
require_relative 'parser/token-minute'
|
25
|
+
require_relative 'parser/token-prop' ## team prop(erty) mode (note - must be before token)
|
24
26
|
require_relative 'parser/token'
|
25
27
|
require_relative 'parser/lexer'
|
26
28
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-01-
|
11
|
+
date: 2025-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -102,6 +102,8 @@ files:
|
|
102
102
|
- lib/sportdb/parser/racc_parser.rb
|
103
103
|
- lib/sportdb/parser/racc_tree.rb
|
104
104
|
- lib/sportdb/parser/token-date.rb
|
105
|
+
- lib/sportdb/parser/token-minute.rb
|
106
|
+
- lib/sportdb/parser/token-prop.rb
|
105
107
|
- lib/sportdb/parser/token-score.rb
|
106
108
|
- lib/sportdb/parser/token-status.rb
|
107
109
|
- lib/sportdb/parser/token-text.rb
|