sportdb-parser 0.6.15 → 0.6.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d907a1b09e04c9c17884afe7881961b597b547ca9ab742ecb6e06f89f8bfe151
4
- data.tar.gz: da287bc13de7217bf3c6a2ffcfdd8d7f3d376baff413b79a58dd544bb2bfcc3a
3
+ metadata.gz: 1ba31cc4284de6a4ea05615ad37a30546375c5e3218847a8f69c3c359074fb9c
4
+ data.tar.gz: 5b8c73196c6bd08a399c7687cd65f4e05f4c3a66580eaeecd6dbaad33837b822
5
5
  SHA512:
6
- metadata.gz: 3be1466a2d4ef5a5d1129f6b0fa9f67c6258c5a1cc60aab831ac5c4c7121e691fa64a9f33e71508d91b9809bcdc1b77b152fc2f7c1580172937662865e8ee33e
7
- data.tar.gz: fe14835fc3195b5e441bdd3f9763bfe1303aa471e041a908cafc027becfbfd5ffd2e9696948e2dd88647ef7de88ebabf39674e42ba6656144e478150822e699e
6
+ metadata.gz: cf4bc5b5a112effc59895c405e3484224d4d94aadf4771fecff90458b01aa63b43703632fdada70df9ee906aa8e517b1736dbdb2de795ba53b3b8e05d6c1ba4c
7
+ data.tar.gz: 46b6a97cb0af77debec3bd58ee24bd12b1236680dc53ff76a50b725621961bb9aabdfc151d9379d01672ebbef2423c3ce4f4d948adc506789c846491a7f30f15
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.6.15
1
+ ### 0.6.17
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
@@ -290,6 +290,20 @@ end # method tokenize_with_errors
290
290
 
291
291
 
292
292
 
293
+ ### add a QUICK_PLAYER_WITH_MINUTE check
294
+ QUICK_PLAYER_WITH_MINUTE_RE = %r{
295
+ \b
296
+ \d{1,3} ## constrain numbers to 0 to 999!!!
297
+ (?: (?:
298
+ \+\d{1,3}
299
+ )?
300
+ |
301
+ (?: \?{2} | _{2} ) ## add support for n/a (not/available)
302
+ )
303
+ ' ## must have minute marker!!!!
304
+ }ix
305
+
306
+
293
307
  def _tokenize_line( line )
294
308
  tokens = []
295
309
  errors = [] ## keep a list of errors - why? why not?
@@ -387,7 +401,12 @@ def _tokenize_line( line )
387
401
 
388
402
  offsets = [m.begin(0), m.end(0)]
389
403
  pos = offsets[1] ## update pos
390
- elsif (m = PLAYER_WITH_MINUTE_RE.match( line ))
404
+
405
+ #### FIX/FIX/TODO
406
+ ### looks to hang in player with minute
407
+ ### FIX - improve / rework PLAYER_WITH_MINUTE_RE regex!!!!
408
+ elsif (_quick = QUICK_PLAYER_WITH_MINUTE_RE.match(line) &&
409
+ m = PLAYER_WITH_MINUTE_RE.match( line ))
391
410
  ## switch context to GOAL_RE (goalline(s)
392
411
  ## split token (automagically) into two!! - player AND minute!!!
393
412
  @re = GOAL_RE
@@ -414,7 +433,7 @@ def _tokenize_line( line )
414
433
  end
415
434
  end
416
435
 
417
-
436
+
418
437
 
419
438
  old_pos = -1 ## allows to backtrack to old pos (used in geo)
420
439
 
@@ -69,10 +69,12 @@ MINUTE_RE = %r{
69
69
  # or others with first matching position
70
70
  # or if chars get eaten-up?
71
71
  # let us know if \G is required here or not
72
+ #
73
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
72
74
 
73
75
 
74
76
  PLAYER_WITH_MINUTE_RE = %r{
75
- ^ ### note - MUST start line; leading spaces optional (eat-up)
77
+ \A ### note - MUST start line; leading spaces optional (eat-up)
76
78
  [ ]*
77
79
  (?: # optional open bracket ([) -- remove later
78
80
  (?<open_bracket> \[ )
@@ -143,8 +145,11 @@ PLAYER_WITH_MINUTE_RE = %r{
143
145
  }ix
144
146
 
145
147
 
148
+
149
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
150
+
146
151
  PLAYER_WITH_SCORE_RE = %r{
147
- ^ ### note - MUST start line; leading spaces optional (eat-up)
152
+ \A ### note - MUST start line; leading spaces optional (eat-up)
148
153
  [ ]*
149
154
  (?<player_with_score>
150
155
  (?<score>
@@ -43,6 +43,28 @@ class Lexer
43
43
  ## todo/check: remove loakahead assertion here - why require space?
44
44
  ## note: \b works only after non-alphanum e.g. )
45
45
 
46
+ ####
47
+ ## support short all-in-one e.g.
48
+ ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) becomes
49
+ ## 3-4 pen. (2-2, 1-1, 1-1)
50
+
51
+ SCORE__P_ET_FT_HT_V2__RE = %r{
52
+ (?<score_more>
53
+ \b
54
+ (?<p1>\d{1,2}) - (?<p2>\d{1,2})
55
+ [ ]* #{P_EN} [ ]+
56
+ \(
57
+ (?<et1>\d{1,2}) - (?<et2>\d{1,2})
58
+ [ ]*, [ ]*
59
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
60
+ [ ]*, [ ]*
61
+ (?<ht1>\d{1,2}) - (?<ht2>\d{1,2})
62
+ [ ]*
63
+ \)
64
+ (?=[ ,\]]|$)
65
+ )}ix ## todo/check: remove loakahead assertion here - why require space?
66
+ ## note: \b works only after non-alphanum e.g. )
67
+
46
68
 
47
69
  ## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
48
70
  ## 3-4p 2-2aet (1-1, ) or
@@ -128,7 +150,8 @@ class Lexer
128
150
  ## check - find a better name for SCORE_MORE - SCORE_EX, SCORE_BIG, or ___ - why? why not?
129
151
 
130
152
  SCORE_MORE_RE = Regexp.union(
131
- SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
153
+ SCORE__P_ET_FT_HT_V2__RE, # e.g. 5-1 pen. (2-2, 1-1, 1-0)
154
+ SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
132
155
  SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
133
156
  SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
134
157
  SCORE__P__RE, # e.g. 5-1 pen.
@@ -60,6 +60,10 @@ TEXT_RE = %r{
60
60
  1/ \d{1,2} [ ] \p{L}+
61
61
  |
62
62
  ## opt 4 - add another weirdo case
63
+ ## e.g. 's Gravenwezel-Schilde
64
+ '[s]
65
+ |
66
+ ## opt 5 - add another weirdo case
63
67
  ## e.g. 5.-8. Platz Playoffs - keep - why? why not?
64
68
  \d+\.-\d+\. [ ]? \p{L}+
65
69
  )
@@ -70,18 +74,27 @@ TEXT_RE = %r{
70
74
  ## AND switch to case-sensitive (via -i!!!)
71
75
  )
72
76
  | # only single spaces allowed inline!!!
73
- [-/]
77
+ [_/]
74
78
  )?
75
79
  (?:
76
- \p{L} |
80
+ \p{L}
81
+ |
77
82
  [&'°]
78
- |
83
+ |
84
+ (?: (?<! [ ]) ## todo - check regex - make sure lookbehind is always first/before!!
85
+ [-] ### allow e.g. Sport- if lookbehind is unicode letter or dot (.)
86
+ ### or U.N.A.M.-Pumas
87
+ ## (?<= [\p{L}.] )
88
+ ## try more flexible (use negative lookbehind - no space)
89
+ )
90
+ |
79
91
  (?:
80
92
  \d+
81
93
  (?!
82
- [0-9h'+-] | ## protected break on 12h / 12' / 1-1
83
- ## check usege for 3+4 - possible? where ? why?
84
- (?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
94
+ [0-9h'+] | ## protected break on 12h / 12' / 1-1
95
+ ## check usege for 3+4 - possible? where ? why?
96
+ (?:[.:-]\d) ## protected/exclude/break on 12.03 / 12:03 / 12-12
97
+ ## BUT allow Park21-Arena for example e.g. 21-A :-)
85
98
  )
86
99
  ## negative lookahead for numbers
87
100
  ## note - include digits itself!!!
@@ -199,7 +199,8 @@ PROP_GOAL_RE = Regexp.union(
199
199
 
200
200
  ####
201
201
  #
202
- ROUND_OUTLINE_RE = %r{ ^
202
+ ## note - use \A (instead of ^) - \A strictly matches the start of the string.
203
+ ROUND_OUTLINE_RE = %r{ \A
203
204
  [ ]* ## ignore leading spaces (if any)
204
205
  (?: »|>> )
205
206
  [ ]+
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 6
7
- PATCH = 15
7
+ PATCH = 17
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.15
4
+ version: 0.6.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-03-09 00:00:00.000000000 Z
11
+ date: 2025-03-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos