sportdb-parser 0.6.20 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +14 -8
- data/Rakefile +1 -1
- data/lib/sportdb/parser/blocktxt.rb +99 -0
- data/lib/sportdb/parser/lexer.rb +958 -395
- data/lib/sportdb/parser/lexer_buffer.rb +97 -0
- data/lib/sportdb/parser/lexer_tty.rb +111 -0
- data/lib/sportdb/parser/parser.rb +1768 -855
- data/lib/sportdb/parser/racc_parser.rb +1 -1
- data/lib/sportdb/parser/racc_tree.rb +327 -41
- data/lib/sportdb/parser/token-date.rb +160 -178
- data/lib/sportdb/parser/token-date_duration.rb +190 -0
- data/lib/sportdb/parser/token-geo.rb +59 -59
- data/lib/sportdb/parser/token-goals.rb +460 -0
- data/lib/sportdb/parser/token-group.rb +43 -0
- data/lib/sportdb/parser/token-note.rb +40 -0
- data/lib/sportdb/parser/token-prop.rb +70 -54
- data/lib/sportdb/parser/token-prop_name.rb +74 -0
- data/lib/sportdb/parser/token-round.rb +102 -0
- data/lib/sportdb/parser/token-score.rb +323 -47
- data/lib/sportdb/parser/token-score_fuller.rb +435 -0
- data/lib/sportdb/parser/token-score_legs.rb +59 -0
- data/lib/sportdb/parser/token-status.rb +157 -160
- data/lib/sportdb/parser/token-table.rb +149 -0
- data/lib/sportdb/parser/token-text.rb +72 -23
- data/lib/sportdb/parser/token-time.rb +141 -0
- data/lib/sportdb/parser/token.rb +242 -105
- data/lib/sportdb/parser/token_helpers.rb +92 -0
- data/lib/sportdb/parser/version.rb +2 -2
- data/lib/sportdb/parser.rb +24 -2
- metadata +18 -18
- data/config/rounds_de.txt +0 -125
- data/config/rounds_en.txt +0 -29
- data/config/rounds_es.txt +0 -26
- data/config/rounds_misc.txt +0 -25
- data/config/rounds_pt.txt +0 -4
- data/config/zones_en.txt +0 -20
- data/lib/sportdb/parser/lang.rb +0 -298
- data/lib/sportdb/parser/token-minute.rb +0 -205
|
@@ -1,205 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
module SportDb
|
|
3
|
-
class Lexer
|
|
4
|
-
|
|
5
|
-
#
|
|
6
|
-
# todo/check - move goal type regexes to goal or somewhere else?
|
|
7
|
-
#
|
|
8
|
-
|
|
9
|
-
## goal types
|
|
10
|
-
# (pen.) or (pen) or (p.) or (p)
|
|
11
|
-
## (o.g.) or (og)
|
|
12
|
-
## todo/check - keep case-insensitive
|
|
13
|
-
## or allow OG or P or PEN or
|
|
14
|
-
## only lower case - why? why not?
|
|
15
|
-
GOAL_PEN_RE = %r{
|
|
16
|
-
(?<pen> \(
|
|
17
|
-
(?:pen|p)\.?
|
|
18
|
-
\)
|
|
19
|
-
)
|
|
20
|
-
}ix
|
|
21
|
-
GOAL_OG_RE = %r{
|
|
22
|
-
(?<og> \(
|
|
23
|
-
(?:og|o\.g\.)
|
|
24
|
-
\)
|
|
25
|
-
)
|
|
26
|
-
}ix
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
## minute variant for N/A not/available
|
|
30
|
-
## todo/check - find a better syntax - why? why not?
|
|
31
|
-
##
|
|
32
|
-
## note "??".to_i(10) returns 0 or
|
|
33
|
-
## "__".to_i(10) returns 0
|
|
34
|
-
## quick hack - assume 0 for n/a for now
|
|
35
|
-
|
|
36
|
-
MINUTE_NA_RE = %r{
|
|
37
|
-
(?<minute>
|
|
38
|
-
(?<=[ (]) # positive lookbehind for space or opening
|
|
39
|
-
(?<value> \?{2} | _{2} )
|
|
40
|
-
' ## must have minute marker!!!!
|
|
41
|
-
)
|
|
42
|
-
}ix
|
|
43
|
-
|
|
44
|
-
MINUTE_RE = %r{
|
|
45
|
-
(?<minute>
|
|
46
|
-
(?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
|
|
47
|
-
# todo - add more lookbehinds e.g. ,) etc. - why? why not?
|
|
48
|
-
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
|
49
|
-
(?: \+
|
|
50
|
-
(?<value2>\d{1,3})
|
|
51
|
-
)?
|
|
52
|
-
' ## must have minute marker!!!!
|
|
53
|
-
)
|
|
54
|
-
}ix
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
#####
|
|
58
|
-
# player with minute (top-level) regex
|
|
59
|
-
# - starts new player/goal mode (until end of line)!!!
|
|
60
|
-
# - note: allow one or more spaces between name and minute
|
|
61
|
-
#
|
|
62
|
-
# note - aaa bbb 40'
|
|
63
|
-
# make sure anchor (^) - beginning of line - present!!!
|
|
64
|
-
# note - will NOT work with ^ anchor!!
|
|
65
|
-
# use special \G - Matches first matching position !!!!
|
|
66
|
-
# otherwise you get matches such as >bbb 40'< skipping >aaa< etc.!!!
|
|
67
|
-
#
|
|
68
|
-
# regex question - check if in an regex union - space regex gets matches
|
|
69
|
-
# or others with first matching position
|
|
70
|
-
# or if chars get eaten-up?
|
|
71
|
-
# let us know if \G is required here or not
|
|
72
|
-
#
|
|
73
|
-
## note - use \A (instead of ^) - \A strictly matches the start of the string.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
PLAYER_WITH_MINUTE_RE = %r{
|
|
77
|
-
\A ### note - MUST start line; leading spaces optional (eat-up)
|
|
78
|
-
[ ]*
|
|
79
|
-
(?: # optional open bracket ([) -- remove later
|
|
80
|
-
(?<open_bracket> \[ )
|
|
81
|
-
[ ]*
|
|
82
|
-
)?
|
|
83
|
-
(?: # optional none a.k.a. -; - what todo here?
|
|
84
|
-
(?<none> - [ ]* ; [ ]* )
|
|
85
|
-
)?
|
|
86
|
-
(?<player_with_minute>
|
|
87
|
-
(?<name>
|
|
88
|
-
\p{L}+
|
|
89
|
-
\.? ## optional dot
|
|
90
|
-
|
|
91
|
-
(?:
|
|
92
|
-
## rule for space; only one single space allowed inline!!!
|
|
93
|
-
(?:
|
|
94
|
-
(?<![ ]) ## use negative lookbehind
|
|
95
|
-
[ ]
|
|
96
|
-
(?=\p{L}|') ## use lookahead
|
|
97
|
-
)
|
|
98
|
-
|
|
|
99
|
-
(?:
|
|
100
|
-
(?<=\p{L}) ## use lookbehind
|
|
101
|
-
['-] ## must be surrounded by letters
|
|
102
|
-
## e.g. One/Two NOT
|
|
103
|
-
## One/ Two or One / Two or One /Two etc.
|
|
104
|
-
(?=\p{L}) ## use lookahead
|
|
105
|
-
)
|
|
106
|
-
|
|
|
107
|
-
(?:
|
|
108
|
-
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
|
109
|
-
['] ## must be surrounded by leading space and
|
|
110
|
-
## traling letters (e.g. UDI 'Beter Bed)
|
|
111
|
-
(?=\p{L}) ## use lookahead
|
|
112
|
-
)
|
|
113
|
-
|
|
|
114
|
-
(?:
|
|
115
|
-
(?<=\p{L}) ## use lookbehind
|
|
116
|
-
['] ## must be surrounded by leading letter and
|
|
117
|
-
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
|
118
|
-
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
|
119
|
-
)
|
|
120
|
-
| ## standard case with letter(s) and optinal dot
|
|
121
|
-
(?: \p{L}+
|
|
122
|
-
\.? ## optional dot
|
|
123
|
-
)
|
|
124
|
-
)*
|
|
125
|
-
)
|
|
126
|
-
#### spaces
|
|
127
|
-
(?: [ ]+)
|
|
128
|
-
#### minute (see above)
|
|
129
|
-
##### use MINUTE_RE.source or such - for inline (reference) use? do not copy
|
|
130
|
-
(?<minute>
|
|
131
|
-
(?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
|
|
132
|
-
# todo - add more lookbehinds e.g. ,) etc. - why? why not?
|
|
133
|
-
(?:
|
|
134
|
-
(?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
|
|
135
|
-
(?: \+
|
|
136
|
-
(?<value2>\d{1,3})
|
|
137
|
-
)?
|
|
138
|
-
|
|
|
139
|
-
(?<value> \?{2} | _{2} ) ## add support for n/a (not/available)
|
|
140
|
-
)
|
|
141
|
-
' ## must have minute marker!!!!
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
)
|
|
145
|
-
}ix
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
## note - use \A (instead of ^) - \A strictly matches the start of the string.
|
|
150
|
-
|
|
151
|
-
PLAYER_WITH_SCORE_RE = %r{
|
|
152
|
-
\A ### note - MUST start line; leading spaces optional (eat-up)
|
|
153
|
-
[ ]*
|
|
154
|
-
(?<player_with_score>
|
|
155
|
-
(?<score>
|
|
156
|
-
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
|
157
|
-
)
|
|
158
|
-
[ ]+
|
|
159
|
-
(?<name>
|
|
160
|
-
\p{L}+
|
|
161
|
-
\.? ## optional dot
|
|
162
|
-
|
|
163
|
-
(?:
|
|
164
|
-
## rule for space; only one single space allowed inline!!!
|
|
165
|
-
(?:
|
|
166
|
-
(?<![ ]) ## use negative lookbehind
|
|
167
|
-
[ ]
|
|
168
|
-
(?=\p{L}|') ## use lookahead
|
|
169
|
-
)
|
|
170
|
-
|
|
|
171
|
-
(?:
|
|
172
|
-
(?<=\p{L}) ## use lookbehind
|
|
173
|
-
['-] ## must be surrounded by letters
|
|
174
|
-
## e.g. One/Two NOT
|
|
175
|
-
## One/ Two or One / Two or One /Two etc.
|
|
176
|
-
(?=\p{L}) ## use lookahead
|
|
177
|
-
)
|
|
178
|
-
|
|
|
179
|
-
(?:
|
|
180
|
-
(?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
|
|
181
|
-
['] ## must be surrounded by leading space and
|
|
182
|
-
## traling letters (e.g. UDI 'Beter Bed)
|
|
183
|
-
(?=\p{L}) ## use lookahead
|
|
184
|
-
)
|
|
185
|
-
|
|
|
186
|
-
(?:
|
|
187
|
-
(?<=\p{L}) ## use lookbehind
|
|
188
|
-
['] ## must be surrounded by leading letter and
|
|
189
|
-
## trailing space PLUS letter (e.g. UDI' Beter Bed)
|
|
190
|
-
(?=[ ]\p{L}) ## use lookahead (space WITH letter
|
|
191
|
-
)
|
|
192
|
-
| ## standard case with letter(s) and optinal dot
|
|
193
|
-
(?: \p{L}+
|
|
194
|
-
\.? ## optional dot
|
|
195
|
-
)
|
|
196
|
-
)*
|
|
197
|
-
) ## name
|
|
198
|
-
### check/todo - add lookahead (e.g. must be space or ,$) why? why not?
|
|
199
|
-
) ## player_with_score
|
|
200
|
-
}ix
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
end # module SportDb
|
|
205
|
-
end # class Lexer
|