sportdb-parser 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,6 +122,30 @@ MatchLine = Struct.new( :ord, :date, :time, :wday,
122
122
 
123
123
  end
124
124
 
125
+ ## check - use a different name e.g. GoalLineScore or such - why? why not?
126
+ GoalLineAlt = Struct.new( :goals ) do
127
+ def pretty_print( printer )
128
+ printer.text( "<GoalLineAlt " )
129
+ printer.text( "goals=" + self.goals.pretty_inspect + ">" )
130
+ end
131
+ end
132
+
133
+ GoalAlt = Struct.new( :score, :player, :minute ) do
134
+ def to_s
135
+ buf = String.new
136
+ buf << "#{score} "
137
+ buf << "#{self.player}"
138
+ buf << " #{self.minute}" if self.minute
139
+ buf
140
+ end
141
+
142
+ def pretty_print( printer )
143
+ printer.text( to_s )
144
+ end
145
+ end
146
+
147
+
148
+
125
149
  GoalLine = Struct.new( :goals1, :goals2 ) do
126
150
  def pretty_print( printer )
127
151
  printer.text( "<GoalLine " )
@@ -159,6 +159,25 @@ DATE_III_RE = %r{
159
159
  \b
160
160
  )}ix
161
161
 
162
+ ## allow (short)"european" style 8.8.
163
+ ## note - assume day/month!!!
164
+ DATE_IIII_RE = %r{
165
+ (?<date>
166
+ \b
167
+ (?<day>\d{1,2})
168
+ \.
169
+ (?<month>\d{1,2})
170
+ \.
171
+ (?: (?:
172
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
173
+ |
174
+ (?<yy>\d{2}) ## optional year 25 (yy)
175
+ )
176
+ \b
177
+ )?
178
+ )
179
+ }ix
180
+
162
181
 
163
182
 
164
183
 
@@ -169,6 +188,7 @@ DATE_RE = Regexp.union(
169
188
  DATE_I_RE,
170
189
  DATE_II_RE,
171
190
  DATE_III_RE,
191
+ DATE_IIII_RE, ## e.g. 8.8. or 8.13.79 or 08.14.1973
172
192
  )
173
193
 
174
194
 
@@ -54,6 +54,146 @@ MINUTE_RE = %r{
54
54
  }ix
55
55
 
56
56
 
57
+ #####
58
+ # player with minute (top-level) regex
59
+ # - starts new player/goal mode (until end of line)!!!
60
+ # - note: allow one or more spaces between name and minute
61
+ #
62
+ # note - aaa bbb 40'
63
+ # make sure anchor (^) - beginning of line - present!!!
64
+ # note - will NOT work with ^ anchor!!
65
+ # use special \G - Matches first matching position !!!!
66
+ # otherwise you get matches such as >bbb 40'< skipping >aaa< etc.!!!
67
+ #
68
+ # regex question - check if in an regex union - space regex gets matches
69
+ # or others with first matching position
70
+ # or if chars get eaten-up?
71
+ # let us know if \G is required here or not
72
+
73
+
74
+ PLAYER_WITH_MINUTE_RE = %r{
75
+ ^ ### note - MUST start line; leading spaces optional (eat-up)
76
+ [ ]*
77
+ (?: # optional open bracket ([) -- remove later
78
+ (?<open_bracket> \[ )
79
+ [ ]*
80
+ )?
81
+ (?: # optional none a.k.a. -; - what todo here?
82
+ (?<none> - [ ]* ; [ ]* )
83
+ )?
84
+ (?<player_with_minute>
85
+ (?<name>
86
+ \p{L}+
87
+ \.? ## optional dot
88
+
89
+ (?:
90
+ ## rule for space; only one single space allowed inline!!!
91
+ (?:
92
+ (?<![ ]) ## use negative lookbehind
93
+ [ ]
94
+ (?=\p{L}|') ## use lookahead
95
+ )
96
+ |
97
+ (?:
98
+ (?<=\p{L}) ## use lookbehind
99
+ ['-] ## must be surrounded by letters
100
+ ## e.g. One/Two NOT
101
+ ## One/ Two or One / Two or One /Two etc.
102
+ (?=\p{L}) ## use lookahead
103
+ )
104
+ |
105
+ (?:
106
+ (?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
107
+ ['] ## must be surrounded by leading space and
108
+ ## traling letters (e.g. UDI 'Beter Bed)
109
+ (?=\p{L}) ## use lookahead
110
+ )
111
+ |
112
+ (?:
113
+ (?<=\p{L}) ## use lookbehind
114
+ ['] ## must be surrounded by leading letter and
115
+ ## trailing space PLUS letter (e.g. UDI' Beter Bed)
116
+ (?=[ ]\p{L}) ## use lookahead (space WITH letter
117
+ )
118
+ | ## standard case with letter(s) and optinal dot
119
+ (?: \p{L}+
120
+ \.? ## optional dot
121
+ )
122
+ )*
123
+ )
124
+ #### spaces
125
+ (?: [ ]+)
126
+ #### minute (see above)
127
+ ##### use MINUTE_RE.source or such - for inline (reference) use? do not copy
128
+ (?<minute>
129
+ (?<=[ (]) # positive lookbehind for space or opening ( e.g. (61') required
130
+ # todo - add more lookbehinds e.g. ,) etc. - why? why not?
131
+ (?:
132
+ (?<value>\d{1,3}) ## constrain numbers to 0 to 999!!!
133
+ (?: \+
134
+ (?<value2>\d{1,3})
135
+ )?
136
+ |
137
+ (?<value> \?{2} | _{2} ) ## add support for n/a (not/available)
138
+ )
139
+ ' ## must have minute marker!!!!
140
+ )
141
+
142
+ )
143
+ }ix
144
+
145
+
146
+ PLAYER_WITH_SCORE_RE = %r{
147
+ ^ ### note - MUST start line; leading spaces optional (eat-up)
148
+ [ ]*
149
+ (?<player_with_score>
150
+ (?<score>
151
+ (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
152
+ )
153
+ [ ]+
154
+ (?<name>
155
+ \p{L}+
156
+ \.? ## optional dot
157
+
158
+ (?:
159
+ ## rule for space; only one single space allowed inline!!!
160
+ (?:
161
+ (?<![ ]) ## use negative lookbehind
162
+ [ ]
163
+ (?=\p{L}|') ## use lookahead
164
+ )
165
+ |
166
+ (?:
167
+ (?<=\p{L}) ## use lookbehind
168
+ ['-] ## must be surrounded by letters
169
+ ## e.g. One/Two NOT
170
+ ## One/ Two or One / Two or One /Two etc.
171
+ (?=\p{L}) ## use lookahead
172
+ )
173
+ |
174
+ (?:
175
+ (?<=[ ]) ## use lookbehind -- add letter (plus dot) or such - why? why not?
176
+ ['] ## must be surrounded by leading space and
177
+ ## traling letters (e.g. UDI 'Beter Bed)
178
+ (?=\p{L}) ## use lookahead
179
+ )
180
+ |
181
+ (?:
182
+ (?<=\p{L}) ## use lookbehind
183
+ ['] ## must be surrounded by leading letter and
184
+ ## trailing space PLUS letter (e.g. UDI' Beter Bed)
185
+ (?=[ ]\p{L}) ## use lookahead (space WITH letter
186
+ )
187
+ | ## standard case with letter(s) and optinal dot
188
+ (?: \p{L}+
189
+ \.? ## optional dot
190
+ )
191
+ )*
192
+ ) ## name
193
+ ### check/todo - add lookahead (e.g. must be space or ,$) why? why not?
194
+ ) ## player_with_score
195
+ }ix
196
+
57
197
 
58
198
 
59
199
  end # module SportDb
@@ -19,18 +19,23 @@ class Lexer
19
19
 
20
20
 
21
21
  ## name different from text (does NOT allow number in name/text)
22
-
23
22
  PROP_NAME_RE = %r{
24
- (?<prop_name> \b
23
+ (?<prop_name>
24
+ \b
25
25
  (?<name>
26
26
  \p{L}+
27
27
  \.? ## optional dot
28
- (?:
29
- [ ]? # only single spaces allowed inline!!!
30
28
  (?:
29
+ ## rule for space; only one single space allowed inline!!!
31
30
  (?:
31
+ (?<![ ]) ## use negative lookbehind
32
+ [ ]
33
+ (?=\p{L}|') ## use lookahead
34
+ )
35
+ |
36
+ (?:
32
37
  (?<=\p{L}) ## use lookbehind
33
- [/'-] ## must be surrounded by letters
38
+ ['-] ## must be surrounded by letters
34
39
  ## e.g. One/Two NOT
35
40
  ## One/ Two or One / Two or One /Two etc.
36
41
  (?=\p{L}) ## use lookahead
@@ -53,9 +58,8 @@ PROP_NAME_RE = %r{
53
58
  (?: \p{L}+
54
59
  \.? ## optional dot
55
60
  )
56
- )+
57
- )*
58
- )
61
+ )*
62
+ )
59
63
  ## add lookahead - must be non-alphanum
60
64
  (?=[ ,;\]\)]|$)
61
65
  )
@@ -83,10 +87,14 @@ PROP_NAME_RE = %r{
83
87
  ##
84
88
  ## todo/fix:
85
89
  ## check if St. Pölten works; with starting St. ???
90
+ ##
91
+ ## note - use special \G - Matches first matching position !!!!
86
92
 
87
93
 
88
94
  PROP_KEY_RE = %r{
89
- (?<prop_key> \b
95
+ ^ # note - MUST start line; leading spaces optional (eat-up)
96
+ [ ]*
97
+ (?<prop_key>
90
98
  (?<key>
91
99
  (?:\p{L}+
92
100
  |
@@ -84,7 +84,8 @@ WDAY_RE = %r{
84
84
  Sat|Sa|
85
85
  Sun|Su
86
86
  ))
87
- \b ## todo/check - must be followed by two spaces or space + [( etc.
87
+ (?=[ ]{2}) # positive lookahead for two space
88
+ ## todo/check - must be followed by two spaces or space + [( etc.
88
89
  ## to allow words starting with weekday abbrevations - why? why not?
89
90
  ## check if any names (teams, rounds, etc) come up in practice
90
91
  ## or maybe remove three letter abbrevations Mon/Tue
@@ -123,26 +124,54 @@ BASICS_RE = %r{
123
124
  }ix
124
125
 
125
126
 
127
+ ## general catch-all (RECOMMENDED (ALWAYS) use as last entry in union)
128
+ ## to avoid advance of pos match!!!
129
+ ANY_RE = %r{
130
+ (?<any> .)
131
+ }ix
126
132
 
127
133
 
128
- RE = Regexp.union( PROP_KEY_RE, ## start with prop key (match will/should switch into prop mode!!!)
134
+ RE = Regexp.union(
129
135
  STATUS_RE,
130
136
  NOTE_RE,
131
137
  TIMEZONE_RE,
138
+ DURATION_RE, # note - duration MUST match before date
139
+ DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
132
140
  TIME_RE,
133
- DURATION_RE, # note - duration MUST match before date
134
- DATE_RE,
135
141
  SCORE_MORE_RE,
136
142
  SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
137
143
  BASICS_RE,
138
- MINUTE_RE,
139
- MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
140
- GOAL_OG_RE, GOAL_PEN_RE,
141
- TEXT_RE,
142
- WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
143
- # note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
144
+ WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
145
+ # note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
146
+ TEXT_RE,
147
+ ANY_RE,
144
148
  )
145
149
 
146
150
 
151
+
152
+ ######################################################
153
+ ## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
154
+
155
+ GOAL_BASICS_RE = %r{
156
+ (?<spaces> [ ]{2,}) |
157
+ (?<space> [ ])
158
+ |
159
+ (?<sym>
160
+ [;,\[\]] ## add (-) dash too - why? why not?
161
+ )
162
+ }ix
163
+
164
+
165
+ GOAL_RE = Regexp.union(
166
+ GOAL_BASICS_RE,
167
+ MINUTE_RE,
168
+ MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
169
+ GOAL_OG_RE, GOAL_PEN_RE,
170
+ SCORE_RE,
171
+ PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
172
+ )
173
+
174
+
175
+
147
176
  end # class Lexer
148
177
  end # module SportDb
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 6
7
- PATCH = 3
7
+ PATCH = 4
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-02-25 00:00:00.000000000 Z
11
+ date: 2025-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos