sportdb-parser 0.6.14 → 0.6.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/lib/sportdb/parser/lexer.rb +23 -15
- data/lib/sportdb/parser/token-score.rb +24 -1
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3da9280d27bf1e4662eb10f9451679e4aace18b9a0e1bfa29dd1e7b6bcbdc5e5
|
4
|
+
data.tar.gz: e6786f648848cd075ef3e0f6d8d7fda2d31743f989653c0fcf2312a33a223357
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04250d17d120c12dc0b3980ff971b02fa178e617f35af70651f86011d9f5d4cad1d81df84a1f5af97ab73cb9023cc6cb190b13c420af71f3bcb2af7df6a526f1
|
7
|
+
data.tar.gz: 120486063a9a82891a63914654965b799aef774680695de8bda3bb52894399d0800b98efb852f87f672dbe303dc6c415b91a10a094989e94ddf3e319b3183cc9
|
data/CHANGELOG.md
CHANGED
data/lib/sportdb/parser/lexer.rb
CHANGED
@@ -290,6 +290,20 @@ end # method tokenize_with_errors
|
|
290
290
|
|
291
291
|
|
292
292
|
|
293
|
+
### add a QUICK_PLAYER_WITH_MINUTE check
|
294
|
+
QUICK_PLAYER_WITH_MINUTE_RE = %r{
|
295
|
+
\b
|
296
|
+
\d{1,3} ## constrain numbers to 0 to 999!!!
|
297
|
+
(?: (?:
|
298
|
+
\+\d{1,3}
|
299
|
+
)?
|
300
|
+
|
|
301
|
+
(?: \?{2} | _{2} ) ## add support for n/a (not/available)
|
302
|
+
)
|
303
|
+
' ## must have minute marker!!!!
|
304
|
+
}ix
|
305
|
+
|
306
|
+
|
293
307
|
def _tokenize_line( line )
|
294
308
|
tokens = []
|
295
309
|
errors = [] ## keep a list of errors - why? why not?
|
@@ -326,8 +340,7 @@ def _tokenize_line( line )
|
|
326
340
|
|
327
341
|
## start with prop key (match will switch into prop mode!!!)
|
328
342
|
## - fix - remove leading spaces in regex (upstream) - why? why not?
|
329
|
-
m = PROP_KEY_RE.match( line )
|
330
|
-
if m
|
343
|
+
if (m = PROP_KEY_RE.match( line ))
|
331
344
|
### switch into new mode
|
332
345
|
## switch context to PROP_RE
|
333
346
|
puts " ENTER PROP_RE MODE" if debug?
|
@@ -360,10 +373,7 @@ def _tokenize_line( line )
|
|
360
373
|
|
361
374
|
offsets = [m.begin(0), m.end(0)]
|
362
375
|
pos = offsets[1] ## update pos
|
363
|
-
|
364
|
-
|
365
|
-
m = ROUND_OUTLINE_RE.match( line )
|
366
|
-
if m
|
376
|
+
elsif (m = ROUND_OUTLINE_RE.match( line ))
|
367
377
|
puts " ROUND_OUTLINE" if debug?
|
368
378
|
|
369
379
|
tokens << [:ROUND_OUTLINE, m[:round_outline]]
|
@@ -371,10 +381,7 @@ def _tokenize_line( line )
|
|
371
381
|
## note - eats-up line for now (change later to only eat-up marker e.g. »|>>)
|
372
382
|
offsets = [m.begin(0), m.end(0)]
|
373
383
|
pos = offsets[1] ## update pos
|
374
|
-
|
375
|
-
|
376
|
-
m = PLAYER_WITH_SCORE_RE.match( line )
|
377
|
-
if m
|
384
|
+
elsif (m = PLAYER_WITH_SCORE_RE.match( line ))
|
378
385
|
## switch context to GOAL_RE (goalline(s)
|
379
386
|
## split token (automagically) into two!! - player AND minute!!!
|
380
387
|
@re = GOAL_RE
|
@@ -394,10 +401,12 @@ def _tokenize_line( line )
|
|
394
401
|
|
395
402
|
offsets = [m.begin(0), m.end(0)]
|
396
403
|
pos = offsets[1] ## update pos
|
397
|
-
end
|
398
404
|
|
399
|
-
|
400
|
-
|
405
|
+
#### FIX/FIX/TODO
|
406
|
+
### looks to hang in player with minute
|
407
|
+
### FIX - improve / rework PLAYER_WITH_MINUTE_RE regex!!!!
|
408
|
+
elsif (_quick = QUICK_PLAYER_WITH_MINUTE_RE.match(line) &&
|
409
|
+
m = PLAYER_WITH_MINUTE_RE.match( line ))
|
401
410
|
## switch context to GOAL_RE (goalline(s)
|
402
411
|
## split token (automagically) into two!! - player AND minute!!!
|
403
412
|
@re = GOAL_RE
|
@@ -410,8 +419,6 @@ def _tokenize_line( line )
|
|
410
419
|
## todo - find a better way? how possible?
|
411
420
|
tokens << [:NONE, "<|NONE|>"] if m[:none]
|
412
421
|
|
413
|
-
|
414
|
-
|
415
422
|
## auto-add player token first
|
416
423
|
tokens << [:PLAYER, m[:name]]
|
417
424
|
## minute props
|
@@ -427,6 +434,7 @@ def _tokenize_line( line )
|
|
427
434
|
end
|
428
435
|
|
429
436
|
|
437
|
+
|
430
438
|
old_pos = -1 ## allows to backtrack to old pos (used in geo)
|
431
439
|
|
432
440
|
while m = @re.match( line, pos )
|
@@ -43,6 +43,28 @@ class Lexer
|
|
43
43
|
## todo/check: remove loakahead assertion here - why require space?
|
44
44
|
## note: \b works only after non-alphanum e.g. )
|
45
45
|
|
46
|
+
####
|
47
|
+
## support short all-in-one e.g.
|
48
|
+
## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) becomes
|
49
|
+
## 3-4 pen. (2-2, 1-1, 1-1)
|
50
|
+
|
51
|
+
SCORE__P_ET_FT_HT_V2__RE = %r{
|
52
|
+
(?<score_more>
|
53
|
+
\b
|
54
|
+
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
55
|
+
[ ]* #{P_EN} [ ]+
|
56
|
+
\(
|
57
|
+
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
58
|
+
[ ]*, [ ]*
|
59
|
+
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
60
|
+
[ ]*, [ ]*
|
61
|
+
(?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
|
62
|
+
[ ]*
|
63
|
+
\)
|
64
|
+
(?=[ ,\]]|$)
|
65
|
+
)}ix ## todo/check: remove loakahead assertion here - why require space?
|
66
|
+
## note: \b works only after non-alphanum e.g. )
|
67
|
+
|
46
68
|
|
47
69
|
## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
|
48
70
|
## 3-4p 2-2aet (1-1, ) or
|
@@ -128,7 +150,8 @@ class Lexer
|
|
128
150
|
## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
|
129
151
|
|
130
152
|
SCORE_MORE_RE = Regexp.union(
|
131
|
-
|
153
|
+
SCORE__P_ET_FT_HT_V2__RE, # e.g. 5-1 pen. (2-2, 1-1, 1-0)
|
154
|
+
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
132
155
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
133
156
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
134
157
|
SCORE__P__RE, # e.g. 5-1 pen.
|