sportdb-parser 0.6.15 → 0.6.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +21 -2
- data/lib/sportdb/parser/token-minute.rb +7 -2
- data/lib/sportdb/parser/token-score.rb +24 -1
- data/lib/sportdb/parser/token-text.rb +19 -6
- data/lib/sportdb/parser/token.rb +2 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ba31cc4284de6a4ea05615ad37a30546375c5e3218847a8f69c3c359074fb9c
|
4
|
+
data.tar.gz: 5b8c73196c6bd08a399c7687cd65f4e05f4c3a66580eaeecd6dbaad33837b822
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf4bc5b5a112effc59895c405e3484224d4d94aadf4771fecff90458b01aa63b43703632fdada70df9ee906aa8e517b1736dbdb2de795ba53b3b8e05d6c1ba4c
|
7
|
+
data.tar.gz: 46b6a97cb0af77debec3bd58ee24bd12b1236680dc53ff76a50b725621961bb9aabdfc151d9379d01672ebbef2423c3ce4f4d948adc506789c846491a7f30f15
|
data/CHANGELOG.md
CHANGED
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -290,6 +290,20 @@ end # method tokenize_with_errors
|
|
290
290
|
|
291
291
|
|
292
292
|
|
293
|
+
### add a QUICK_PLAYER_WITH_MINUTE check
|
294
|
+
QUICK_PLAYER_WITH_MINUTE_RE = %r{
|
295
|
+
\b
|
296
|
+
\d{1,3} ## constrain numbers to 0 to 999!!!
|
297
|
+
(?: (?:
|
298
|
+
\+\d{1,3}
|
299
|
+
)?
|
300
|
+
|
|
301
|
+
(?: \?{2} | _{2} ) ## add support for n/a (not/available)
|
302
|
+
)
|
303
|
+
' ## must have minute marker!!!!
|
304
|
+
}ix
|
305
|
+
|
306
|
+
|
293
307
|
def _tokenize_line( line )
|
294
308
|
tokens = []
|
295
309
|
errors = [] ## keep a list of errors - why? why not?
|
@@ -387,7 +401,12 @@ def _tokenize_line( line )
|
|
387
401
|
|
388
402
|
offsets = [m.begin(0), m.end(0)]
|
389
403
|
pos = offsets[1] ## update pos
|
390
|
-
|
404
|
+
|
405
|
+
#### FIX/FIX/TODO
|
406
|
+
### looks to hang in player with minute
|
407
|
+
### FIX - improve / rework PLAYER_WITH_MINUTE_RE regex!!!!
|
408
|
+
elsif (_quick = QUICK_PLAYER_WITH_MINUTE_RE.match(line) &&
|
409
|
+
m = PLAYER_WITH_MINUTE_RE.match( line ))
|
391
410
|
## switch context to GOAL_RE (goalline(s)
|
392
411
|
## split token (automagically) into two!! - player AND minute!!!
|
393
412
|
@re = GOAL_RE
|
@@ -414,7 +433,7 @@ def _tokenize_line( line )
|
|
414
433
|
end
|
415
434
|
end
|
416
435
|
|
417
|
-
|
436
|
+
|
418
437
|
|
419
438
|
old_pos = -1 ## allows to backtrack to old pos (used in geo)
|
420
439
|
|
@@ -69,10 +69,12 @@ MINUTE_RE = %r{
|
|
69
69
|
# or others with first matching position
|
70
70
|
# or if chars get eaten-up?
|
71
71
|
# let us know if \G is required here or not
|
72
|
+
#
|
73
|
+
## note - use \A (instead of ^) - \A strictly matches the start of the string.
|
72
74
|
|
73
75
|
|
74
76
|
PLAYER_WITH_MINUTE_RE = %r{
|
75
|
-
|
77
|
+
\A ### note - MUST start line; leading spaces optional (eat-up)
|
76
78
|
[ ]*
|
77
79
|
(?: # optional open bracket ([) -- remove later
|
78
80
|
(?<open_bracket> \[ )
|
@@ -143,8 +145,11 @@ PLAYER_WITH_MINUTE_RE = %r{
|
|
143
145
|
}ix
|
144
146
|
|
145
147
|
|
148
|
+
|
149
|
+
## note - use \A (instead of ^) - \A strictly matches the start of the string.
|
150
|
+
|
146
151
|
PLAYER_WITH_SCORE_RE = %r{
|
147
|
-
|
152
|
+
\A ### note - MUST start line; leading spaces optional (eat-up)
|
148
153
|
[ ]*
|
149
154
|
(?<player_with_score>
|
150
155
|
(?<score>
|
@@ -43,6 +43,28 @@ class Lexer
|
|
43
43
|
## todo/check: remove loakahead assertion here - why require space?
|
44
44
|
## note: \b works only after non-alphanum e.g. )
|
45
45
|
|
46
|
+
####
|
47
|
+
## support short all-in-one e.g.
|
48
|
+
## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) becomes
|
49
|
+
## 3-4 pen. (2-2, 1-1, 1-1)
|
50
|
+
|
51
|
+
SCORE__P_ET_FT_HT_V2__RE = %r{
|
52
|
+
(?<score_more>
|
53
|
+
\b
|
54
|
+
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
55
|
+
[ ]* #{P_EN} [ ]+
|
56
|
+
\(
|
57
|
+
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
58
|
+
[ ]*, [ ]*
|
59
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
60
|
+
[ ]*, [ ]*
|
61
|
+
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
62
|
+
[ ]*
|
63
|
+
\)
|
64
|
+
(?=[ ,\]]|$)
|
65
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
66
|
+
## note: \b works only after non-alphanum e.g. )
|
67
|
+
|
46
68
|
|
47
69
|
## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
|
48
70
|
## 3-4p 2-2aet (1-1, ) or
|
@@ -128,7 +150,8 @@ class Lexer
|
|
128
150
|
## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
|
129
151
|
|
130
152
|
SCORE_MORE_RE = Regexp.union(
|
131
|
-
|
153
|
+
SCORE__P_ET_FT_HT_V2__RE, # e.g. 5-1 pen. (2-2, 1-1, 1-0)
|
154
|
+
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
132
155
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
133
156
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
134
157
|
SCORE__P__RE, # e.g. 5-1 pen.
|
@@ -60,6 +60,10 @@ TEXT_RE = %r{
|
|
60
60
|
1/ \d{1,2} [ ] \p{L}+
|
61
61
|
|
|
62
62
|
## opt 4 - add another weirdo case
|
63
|
+
## e.g. 's Gravenwezel-Schilde
|
64
|
+
'[s]
|
65
|
+
|
|
66
|
+
## opt 5 - add another weirdo case
|
63
67
|
## e.g. 5.-8. Platz Playoffs - keep - why? why not?
|
64
68
|
\d+\.-\d+\. [ ]? \p{L}+
|
65
69
|
)
|
@@ -70,18 +74,27 @@ TEXT_RE = %r{
|
|
70
74
|
## AND switch to case-sensitive (via -i!!!)
|
71
75
|
)
|
72
76
|
| # only single spaces allowed inline!!!
|
73
|
-
[
|
77
|
+
[_/]
|
74
78
|
)?
|
75
79
|
(?:
|
76
|
-
\p{L}
|
80
|
+
\p{L}
|
81
|
+
|
|
77
82
|
[&'°]
|
78
|
-
|
83
|
+
|
|
84
|
+
(?: (?<! [ ]) ## todo - check regex - make sure lookbehind is always first/before!!
|
85
|
+
[-] ### allow e.g. Sport- if lookbehind is unicode letter or dot (.)
|
86
|
+
### or U.N.A.M.-Pumas
|
87
|
+
## (?<= [\p{L}.] )
|
88
|
+
## try more flexible (use negative lookbehind - no space)
|
89
|
+
)
|
90
|
+
|
|
79
91
|
(?:
|
80
92
|
\d+
|
81
93
|
(?!
|
82
|
-
[0-9h'
|
83
|
-
## check usege for 3+4 - possible? where ? why?
|
84
|
-
(?:[
|
94
|
+
[0-9h'+] | ## protected break on 12h / 12' / 1-1
|
95
|
+
## check usege for 3+4 - possible? where ? why?
|
96
|
+
(?:[.:-]\d) ## protected/exclude/break on 12.03 / 12:03 / 12-12
|
97
|
+
## BUT allow Park21-Arena for example e.g. 21-A :-)
|
85
98
|
)
|
86
99
|
## negative lookahead for numbers
|
87
100
|
## note - include digits itself!!!
|
data/lib/sportdb/parser/token.rb
CHANGED
@@ -199,7 +199,8 @@ PROP_GOAL_RE = Regexp.union(
|
|
199
199
|
|
200
200
|
####
|
201
201
|
#
|
202
|
-
|
202
|
+
## note - use \A (instead of ^) - \A strictly matches the start of the string.
|
203
|
+
ROUND_OUTLINE_RE = %r{ \A
|
203
204
|
[ ]* ## ignore leading spaces (if any)
|
204
205
|
(?: »|>> )
|
205
206
|
[ ]+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.17
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-03-
|
11
|
+
date: 2025-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|