sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -0,0 +1,141 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+
6
+ ##
7
+ # keep 18h30 - why? why not?
8
+ # add support for 6:30pm 8:20am etc. - why? why not?
9
+ #
10
+ # check - only support h e.g. 18h30 or 18H30 too - why? why not?
11
+ # e.g. 18:30 (or 18h30)
12
+ # note - optional timezone possible e.g.
13
+ # 18:30 UTC+1 or 18:30 BST/UTC+1 or such!!!
14
+ # 18:30 UTC+01 or 18:30 BST/UTC+01
15
+ #
16
+ #
17
+ # note 18.30 no longer supported - MUST use 18:30 or 18h30 !!!
18
+ #
19
+ #
20
+ #
21
+ # note - local time is now (inline) part of time!!!
22
+ # and, thus, must always follow time
23
+ # e.g. 18:30 (19:30 BST)
24
+ #
25
+ ## local time e.g (19:30 UTC+1) or (19:30 BST/UTC+1) or
26
+ ## note - timezone is optional! e.g. (19:30) works too
27
+
28
+
29
+ TIME_RE = %r{
30
+ \b
31
+ (?<time>
32
+ (?<hour>\d{1,2})
33
+ [:h]
34
+ (?<minute>\d{2})
35
+
36
+ #### optional (inline) timezone
37
+ ## note - non-utc timezone MUST be hard-coded (added) here!!!
38
+ ## avoids eating-up team names (separated by one space)
39
+ ## e.g. 18:30 MEX v MEX
40
+ (?:
41
+ [ ] ## require space - why? why not
42
+ (?<timezone>
43
+ (?:
44
+ ## GMT - Greenwich Mean Time
45
+ ## BST - British Summer Time
46
+ ## CES?T - Central European (Summer) Time
47
+ ## EES?T - Eastern European (Summer) Time
48
+ ##
49
+ (?: GMT|BST|CES?T|EES?T)
50
+ (?: /
51
+ UTC (?: [+-]\d{1,4} | ±0)
52
+ )?
53
+ )
54
+ |
55
+ (?:
56
+ UTC (?: [+-]\d{1,4} | ±0)
57
+ )
58
+ )
59
+ )?
60
+ )
61
+ \b
62
+
63
+ ####
64
+ ### note - local time is now INLINE and MUST follow time
65
+ (?:
66
+ [ ]+ ## todo/check - make space optional - why? why not?
67
+ \(
68
+ (?<time_local>
69
+ (?<local_hour>\d{1,2})
70
+ [:h] ### todo/fix - MUST match style in time above!!!
71
+ (?<local_minute>\d{2})
72
+
73
+ ####
74
+ ## optional "local" timezone name eg. BRT or CEST etc.
75
+ (?:
76
+ [ ] ## require space - why? why not
77
+ (?<local_timezone>
78
+ (?: [A-Z]{3,4}
79
+ (?: /
80
+ UTC (?: [+-]\d{1,4} | ±0)
81
+ )?
82
+ )
83
+ |
84
+ (?: ## e.g. 0 or 00 or 0000
85
+ UTC (?: [+-]\d{1,4} | ±0)
86
+ )
87
+ )
88
+ )? # note - make timezone optional!!!
89
+ )
90
+ \)
91
+ )?
92
+ }ix
93
+
94
+
95
+ def self._build_time( m )
96
+ ## unify to iso-format
97
+ ### 12.40 => 12:40
98
+ ## 12h40 => 12:40 etc.
99
+ ## keep string (no time-only type in ruby)
100
+ data = { time: {} }
101
+
102
+ hour = m[:hour].to_i(10) ## allow 08/07/etc.
103
+ minute = m[:minute].to_i(10)
104
+
105
+ ## check if 24:00 possible? or only 0:00 (23:59)
106
+ unless (hour >=0 && hour <=23) &&
107
+ (minute >=0 && minute <=59)
108
+ raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
109
+ end
110
+
111
+ data[:time][:h] = hour
112
+ data[:time][:m] = minute
113
+ data[:time][:timezone] = m[:timezone] if m[:timezone]
114
+
115
+
116
+ ## check if local time present e.g.
117
+ ## 18:30 (19:30)
118
+ ## 18:30 (19:30 BST) etc.
119
+ if m[:time_local]
120
+ data[:time_local] = {}
121
+
122
+ local_hour = m[:local_hour].to_i(10) ## allow 08/07/etc.
123
+ local_minute = m[:local_minute].to_i(10)
124
+
125
+ ## check if 24:00 possible? or only 0:00 (23:59)
126
+ unless (hour >=0 && hour <=23) &&
127
+ (minute >=0 && minute <=59)
128
+ raise ArgumentError, "parse error - local time >#{m[:time_local]}< out-of-range"
129
+ end
130
+
131
+ data[:time_local][:h] = local_hour
132
+ data[:time_local][:m] = local_minute
133
+ data[:time_local][:timezone] = m[:local_timezone] if m[:local_timezone]
134
+ end
135
+
136
+ data
137
+ end
138
+ def _build_time(m) self.class._build_time(m); end
139
+
140
+ end # class Lexer
141
+ end # module SportDb
@@ -4,63 +4,12 @@ module SportDb
4
4
  class Lexer
5
5
 
6
6
 
7
- ##
8
- # keep 18h30 - why? why not?
9
- # add support for 6:30pm 8:20am etc. - why? why not?
10
- #
11
- # check - only support h e.g. 18h30 or 18H30 too - why? why not?
12
- # e.g. 18.30 (or 18:30 or 18h30)
13
- TIME_RE = %r{
14
- (?<time> \b
15
- (?: (?<hour>\d{1,2})
16
- (?: :|\.|h )
17
- (?<minute>\d{2}))
18
- \b
19
- )
20
- }ix
21
-
22
-
23
-
24
- ## add wday / stand-alone week day - as separate regex or
25
- ## use TEXT with is_wday? check or such with
26
- ## requirement of beginning of line (anchored to line) only??
27
- ## - why? why not?
28
-
29
- WDAY_RE = %r{
30
- (?<wday>
31
- \b # note - alternation (|) is lowest precedence (such
32
- # parathenes required around \b()\b !!!
33
- ## note - NOT case sensitive!!!
34
- (?<day_name>
35
- (?-i:
36
- Mon|Mo|
37
- Tue|Tu|
38
- Wed|We|
39
- Thu|Th|
40
- Fri|Fr|
41
- Sat|Sa|
42
- Sun|Su
43
- ))
44
- (?=[ ]{2}) # positive lookahead for two space
45
- ## todo/check - must be followed by two spaces or space + [( etc.
46
- ## to allow words starting with weekday abbrevations - why? why not?
47
- ## check if any names (teams, rounds, etc) come up in practice
48
- ## or maybe remove three letter abbrevations Mon/Tue
49
- ## and keep only Mo/Tu/We etc. - why? why not?
50
- )}x
51
-
52
-
53
-
54
7
 
55
8
  BASICS_RE = %r{
56
- ## e.g. (51) or (1) etc. - limit digits of number???
57
- ## todo/fix - change num to ord (for ordinal number)!!!!!
58
- (?<num> \( (?<value>\d+) \) )
59
- |
60
9
  (?<vs>
61
10
  (?<=[ ]) # positive lookbehind for space
62
11
  (?-i:
63
- vs|v
12
+ vs\.?|v|VS
64
13
  ) # note - only match case sensitive (downcased letters)!!!
65
14
  # note - bigger match first e.g. vs than v etc.
66
15
  (?=[ ]) # positive lookahead for space
@@ -69,87 +18,275 @@ BASICS_RE = %r{
69
18
  (?<spaces> [ ]{2,}) |
70
19
  (?<space> [ ])
71
20
  |
72
- (?<sym> (?<=^|[ ]) ## positive lookahead
73
- (?: ----|
74
- ---|
75
- --
76
- )
77
- (?=[ ]) ## positive lookahead
78
- )
79
- |
80
- (?<sym> [;,/@|\[\]-] )
21
+ (?<sym> [,;/@|()\[\]-] ) ### note: add parantheses too e.g () - why? why not?
81
22
  }ix
82
23
 
83
24
 
84
25
 
26
+
27
+ ###
28
+ ## add att(endance) e.g. att: 18000
29
+ ##
30
+ ## A v B 2-1 att: 18000
31
+
32
+ ATTENDANCE_RE = %r{
33
+ (?<attendance>
34
+ \b
35
+ att: [ ]*
36
+ (?<value>
37
+ [1-9]
38
+ (?: _? \d+ )*
39
+ )
40
+ \b
41
+ )}ix
42
+
43
+
44
+ ## "inline" match status e.g.
45
+ ## Clapham Rovers w/o Hitchin
46
+ ## Queen's Park bye
47
+
48
+ ## add support for WO or W-0 too - why? why not?
49
+ INLINE_WO_RE = %r{
50
+ (?<inline_wo>
51
+ \b (?: w/o | W/O ) \b
52
+ )}x ## note - NOT case insensitive
53
+
54
+ INLINE_BYE_RE = %r{
55
+ (?<inline_bye>
56
+ \b (?: bye | BYE ) \b
57
+ )}x ## note - NOT case insensitive
58
+
59
+
60
+ ###
61
+ # A n/p B (note - basically a inline short form of A v B [cancelled] )
62
+ # N/P
63
+ INLINE_NP_RE = %r{
64
+ (?<inline_np>
65
+ \b (?: n/p | N/P ) \b
66
+ )}x ## note - NOT case insensitive
67
+
68
+
69
+ ###
70
+ # abd/abd. or aban/aban. [abandoned]
71
+ # ABD/ABAN
72
+ INLINE_ABD_RE = %r{
73
+ (?<inline_abd>
74
+ \b (?: abd\.? |
75
+ aban\.? |
76
+ ABD | ABAN
77
+ )
78
+ ## POSITIVE lookahead - requires space
79
+ (?= [ ])
80
+ )}x ## note - NOT case insensitive
81
+
82
+ ####
83
+ # susp/susp. [suspended]
84
+ # SUSP
85
+ INLINE_SUSP_RE = %r{
86
+ (?<inline_susp>
87
+ \b (?: susp\.? |
88
+ SUSP )
89
+ ## POSITIVE lookahead - requires space
90
+ (?= [ ])
91
+ )}x ## note - NOT case insensitive
92
+
93
+
94
+ ####
95
+ # ppd/ppd. or pst/pst. or pstp/pstp. or postp/postp. [postponed]
96
+ # PPD/PSTP/POSTP/P-P
97
+ # todo/check - add/allow p-p too - why? why not?
98
+ INLINE_PPD_RE = %r{
99
+ (?<inline_ppd>
100
+ \b (?: ppd\.? |
101
+ pst\.? |
102
+ po?stp\.? |
103
+ PPD | PST | PO?STP | P-P
104
+ )
105
+ ## POSITIVE lookahead - requires space
106
+ (?= [ ])
107
+ )}x ## note - NOT case insensitive
108
+
109
+ ####
110
+ # void via x-x X-X
111
+ # todo/check - only allow X-X - why? why not?
112
+ INLINE_VOID_RE = %r{
113
+ (?<inline_void>
114
+ \b (?: x-x |
115
+ X-X
116
+ )
117
+ ## POSITIVE lookahead - requires space
118
+ (?= [ ])
119
+ )}x ## note - NOT case insensitive
120
+
121
+
122
+ ####
123
+ # awd/awd. [awarded]
124
+ # AWD
125
+ # note - recommendation is to allways include score
126
+ # thus, use/prefer SCORE_AWD e.g. 0-3 awd
127
+ INLINE_AWD_RE = %r{
128
+ (?<inline_awd>
129
+ \b (?: awd\.? | AWD )
130
+ ## POSITIVE lookahead - requires space
131
+ (?= [ ])
132
+ )}x ## note - NOT case insensitive
133
+
134
+ ###
135
+ # canc/canc. [cancelled]
136
+ # CANC
137
+ INLINE_CANC_RE = %r{
138
+ (?<inline_canc>
139
+ \b (?: canc\.? | CANC )
140
+ ## POSITIVE lookahead - requires space
141
+ (?= [ ])
142
+ )}x ## note - NOT case insensitive
143
+
144
+
145
+ ###
146
+ ## home/away/neutral - (h), (a), (n)
147
+ ## add support for h/a/n
148
+ ## with (?-i \b [han] \b) lower-case and \b boundry - why? why not?
149
+
150
+ TEAM_HOME_RE = %r{ (?<team_home> \(h\) )}xi
151
+ TEAM_AWAY_RE = %r{ (?<team_away> \(a\) )}xi
152
+ TEAM_NEUTRAL_RE = %r{ (?<team_neutral> \(n\) )}xi
153
+
154
+
155
+
156
+ ## "top-level" regex used for:
157
+ ## - date_header
158
+ ## - match_header & match_line_more
159
+ ## - match_line
160
+
161
+
85
162
  RE = Regexp.union(
86
- STATUS_RE,
87
- SCORE_NOTE_RE,
88
- NOTE_RE,
89
- DURATION_RE, # note - duration MUST match before date
163
+ STATUS_RE, ## match status e.g. [cancelled], etc.
164
+
165
+ INLINE_WO_RE, ## (inline) match status - w/o (walkout)
166
+ INLINE_NP_RE, ## (inline) match status - n/p (not played)
167
+ INLINE_BYE_RE, ## (inline) match status - bye (advance to next round)
168
+ INLINE_ABD_RE, ## (inline) match status - abd/abd. (abandoned)
169
+ INLINE_SUSP_RE, ## (inline) match status - susp/susp. (suspended)
170
+ INLINE_PPD_RE, ## (inline) match status - ppd/ppd. or pstp/pstp. or postp/postp. or p-p (postponed)
171
+ INLINE_VOID_RE, ## (inline) match status - x-x (voided)
172
+ INLINE_AWD_RE, ## (inline) match status - awd/awd. (awarded)
173
+ INLINE_CANC_RE, ## (inline) match status - canc/canc. (cancelled/canceled)
174
+
175
+
176
+ TEAM_HOME_RE, ## (H)
177
+ TEAM_AWAY_RE, ## (A)
178
+ TEAM_NEUTRAL_RE, ## (N)
179
+
180
+ NOTE_RE, ### fix - change to INLINE_NOTE !!!
181
+ DATE_LEGS_RE, # note - must go before date!!!
90
182
  DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
91
183
  TIME_RE,
92
- SCORE_MORE_RE,
93
- SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_MORE_RE!!!
184
+ ATTENDANCE_RE, # note - allow att: for now inline in matches too - why? why not?
185
+ SCORE_LEGS_RE,
186
+ SCORE_FULL_RE,
187
+ SCORE_FULLER_RE,
188
+ SCORE_FULLER_MORE_RE,
189
+ SCORE_AWD_RE, # (inline) score awarded e.g. 3-0 awd or 0-1 awd. etc.
190
+ SCORE_ABD_RE, # (inline) score abandoned e.g. 2-1 abd.
191
+ SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_FULL_RE!!!
192
+
193
+ ## note - add "experimental" "split" scores for now
194
+ SCORE_TEAM_RE, ## e.g. (2) 1 for "split" scores
195
+ SCORE_TEAM_PEN_RE, ## e.g. 1 (2)
196
+
94
197
  BASICS_RE,
95
- WDAY_RE, # allow standalone weekday name (e.g. Mo/Tu/etc.) - why? why not?
96
- # note - wday MUST be after text e.g. Sun Ke 68' is Sun Ke (NOT Sun) etc.
97
198
  TEXT_RE,
199
+ ## note - score_team_num (e.g. 0 or 10 etc.)
200
+ ## MUST BE after TEXT
201
+ ## only match if nothing else matches (expect ANY)
202
+ SCORE_TEAM_NUM_RE, ## e.g. 0 or 1 or 9 or 11 etc. (<100)
98
203
  ANY_RE,
99
204
  )
100
205
 
101
206
 
102
207
 
103
- ######################################################
104
- ## goal mode (switched to by PLAYER_WITH_MINUTE_RE)
105
208
 
106
- GOAL_BASICS_RE = %r{
107
- (?<spaces> [ ]{2,}) |
108
- (?<space> [ ])
109
- |
110
- (?<sym>
111
- [;,\[\]] ## add (-) dash too - why? why not?
112
- )
113
- }ix
114
209
 
115
210
 
116
- GOAL_RE = Regexp.union(
117
- GOAL_BASICS_RE,
118
- MINUTE_RE,
119
- MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
120
- GOAL_OG_RE, GOAL_PEN_RE,
121
- SCORE_RE,
122
- PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
123
- )
211
+ ## ord (for ordinal number)
212
+ ## e.g. (51) or (1) etc. - limit digits of number - why? why not???
213
+
214
+ START_WITH_ORD = %r{
215
+ \A
216
+ [ ]* ## ignore leading spaces (if any)
217
+ (?<ord>
218
+ \(
219
+ (?<value>\d+)
220
+ \)
221
+ )}ix
124
222
 
125
- ## note - leave out n/a minute in goals - make minutes optional!!!
126
- PROP_GOAL_RE = Regexp.union(
127
- GOAL_BASICS_RE,
128
- MINUTE_RE,
129
- ## MINUTE_NA_RE, ## note - add/allow not/available (n/a,na) minutes hack for now
130
- GOAL_OG_RE, GOAL_PEN_RE,
131
- SCORE_RE,
132
- PROP_NAME_RE, ## note - (re)use prop name for now for (player) name
133
- )
134
223
 
224
+ ###
225
+ ## e.g. 1930, 1986, 2002, 2010, 2022, 2026
226
+ ## note - only YYYY
227
+ ## note - look out for clubs like 1860 München (de) !!!
228
+ ## 1899 Hoffenheim (de)
229
+ ## 1896 Löwenherz (ch - a.k.a. FC Winterthur ??)
230
+ ## any others starting with YYYY ?!
231
+ ## note - YEAR requires TWO (trailing) spaces !!!!! e.g.
232
+ ## 1930 Uruguay 4-2 Argentina
233
+ ## 1934 Italy 2-1 Czechoslovakia (AET)
234
+ ## 2022 Argentina 3-3 France (AET, 4-2 pen)
235
+ ##
236
+ ## do NOT match (iso date!!) - 2020-11-12
237
+ ## 2020/11/12
238
+ ## 2020.11.12 etc.
135
239
 
136
- ####
137
- #
240
+ START_WITH_YEAR = %r{
241
+ \A
242
+ [ ]* ## ignore leading spaces (if any)
243
+ (?<year>
244
+ \d{4}
245
+ )
246
+ ## positive lookahead
247
+ (?= [ ]{2} | ## min. TWO spaces or
248
+ [ ]@ | ## space with geo marker or
249
+ [ ]* \z ## year (date) header (end-of-line/string)
250
+ )
251
+ }x
252
+
253
+
254
+
255
+ ###
256
+ ## check for headings
257
+ ## e.g. = heading 1
258
+ ## == heading 2 etc.
259
+ ## =Eurochampionship=
260
+ ## note - no spaces required (same as in wikipedia!!)
261
+ ## same as in wikipedia support six (6) levels
262
+ ##
138
263
  ## note - use \A (instead of ^) - \A strictly matches the start of the string.
139
- ROUND_OUTLINE_RE = %r{ \A
264
+
265
+
266
+ HEADING_RE = %r{ \A
140
267
  [ ]* ## ignore leading spaces (if any)
141
- (?: »|>> )
142
- [ ]+
143
- (?<round_outline>
268
+ (?<heading_marker> ={1,6} )
269
+ [ ]*
270
+ (?<heading>
144
271
  ## must start with letter - why? why not?
145
272
  ### 1st round
146
273
  ## allow numbers e.g. Group A - 1
147
- .+? ## use non-greedy
274
+ [^=]+? ## use non-greedy
148
275
  )
149
- [ ]* ## ignore trailing spaces (if any)
150
- $
276
+ [ ]* ## ignore trailing spaces (if any)
277
+ (?: =* ) ## allow any trailing heading markers
278
+ [ ]* ## ignore trailing spaces (if any)
279
+ \z
151
280
  }ix
152
281
 
153
282
 
283
+ HRULER_RE = %r{
284
+ \A
285
+ [ ]* ## ignore leading spaces (if any)
286
+ -{3,} ## must be at least three dashes!!!
287
+ [ ]* ## ignore trailing spaces (if any)
288
+ \z
289
+ }ix
290
+
154
291
  end # class Lexer
155
292
  end # module SportDb
@@ -0,0 +1,92 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ =begin
6
+ def self._mk_is( re )
7
+ ## add \A ... \z to regex
8
+ ## for strict matching of beginning and end of string
9
+ ## regex note - \z will NOT allow trailing newline(s)!!!!
10
+ ## note - must double espace \\A,\\z in quoted string!!
11
+ Regexp.new( %Q< \\A
12
+ (?:#{re.source})
13
+ \\z
14
+ >, re.options )
15
+ end
16
+
17
+
18
+ IS_TEAM_RE = _mk_is( TEXT_RE ) ## todo/fix - rename TEXT_RE to TEAM_RE!!!
19
+ IS_DATE_RE = _mk_is( DATE_IIII_RE ) ## DATE_RE )
20
+ =end
21
+
22
+
23
+ def self._parse_team( str )
24
+ ## note - strip - leading/trailing spaces
25
+ m = TEXT_RE.match( str.strip )
26
+ if m && m.pre_match == '' && m.post_match == ''
27
+ m
28
+ elsif m
29
+ ## note - match BUT not anchored to start and end-of-string!!!
30
+ ## report, error somehow??
31
+ nil
32
+ else
33
+ nil ## no match - return nil
34
+ end
35
+ end
36
+
37
+
38
+ def self._parse_date( str )
39
+ ## note - strip - leading/trailing spaces
40
+ m = DATE_RE.match( str.strip )
41
+
42
+ #### todo/fix/check:
43
+ ### wrapped with \A \z NOT working with union - check later - why?
44
+ ### use hand-coded with pre_match = "" and post_match = ""
45
+
46
+ if m && m.pre_match == '' && m.post_match == ''
47
+ ## return hash table with captured components
48
+ date = {}
49
+ ## map month names
50
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
51
+ date[:y] = m[:year].to_i(10) if m[:year]
52
+ ## check - use y too for two-digit year or keep separate - why? why not?
53
+ date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
54
+ date[:m] = m[:month].to_i(10) if m[:month]
55
+ date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
56
+ date[:d] = m[:day].to_i(10) if m[:day]
57
+ date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
58
+ date
59
+ elsif m
60
+ ## note - match BUT not anchored to start and end-of-string!!!
61
+ ## report, error somehow??
62
+ nil
63
+ else
64
+ nil ## no match - return nil
65
+ end
66
+ end
67
+
68
+
69
+ def self._parse_score_full( str )
70
+ ## note - strip - leading/trailing spaces
71
+ m=SCORE_FULL_RE.match( str )
72
+
73
+ if m && m.pre_match == '' && m.post_match == ''
74
+ score = {}
75
+ score[:p] = [m[:p1].to_i,m[:p2].to_i] if m[:p1] && m[:p2]
76
+ score[:et] = [m[:et1].to_i,m[:et2].to_i] if m[:et1] && m[:et2]
77
+ score[:ft] = [m[:ft1].to_i,m[:ft2].to_i] if m[:ft1] && m[:ft2]
78
+ score[:ht] = [m[:ht1].to_i,m[:ht2].to_i] if m[:ht1] && m[:ht2]
79
+ ## score[:agg] = [m[:agg1].to_i,m[:agg2].to_i] if m[:agg1] && m[:agg2]
80
+ score
81
+ elsif m
82
+ ## note - match BUT not anchored to start and end-of-string!!!
83
+ ## report, error somehow??
84
+ nil
85
+ else
86
+ nil ## no match - return nil
87
+ end
88
+ end
89
+
90
+
91
+ end # class Lexer
92
+ end # module SportDb
@@ -3,8 +3,8 @@ module SportDb
3
3
  module Module
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 6
7
- PATCH = 20
6
+ MINOR = 7
7
+ PATCH = 0
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
@@ -15,17 +15,39 @@ require 'cocos'
15
15
 
16
16
  require_relative 'parser/version'
17
17
 
18
- require_relative 'parser/lang'
18
+ ##
19
+ ## generic helper
20
+ require_relative 'parser/blocktxt'
21
+
22
+ ## core machinery
19
23
 
20
24
  require_relative 'parser/token-score'
25
+ require_relative 'parser/token-score_fuller'
26
+ require_relative 'parser/token-score_legs'
27
+ require_relative 'parser/token-time'
21
28
  require_relative 'parser/token-date'
29
+ require_relative 'parser/token-date_duration'
22
30
  require_relative 'parser/token-text'
31
+ require_relative 'parser/token-prop_name' ## a.k.a token-text_ii
23
32
  require_relative 'parser/token-status'
24
- require_relative 'parser/token-minute'
33
+ require_relative 'parser/token-note'
34
+ require_relative 'parser/token-goals'
25
35
  require_relative 'parser/token-prop' ## team prop(erty) mode (note - must be before token)
26
36
  require_relative 'parser/token-geo'
37
+ require_relative 'parser/token-group'
38
+ require_relative 'parser/token-round'
39
+ require_relative 'parser/token-table'
27
40
  require_relative 'parser/token'
41
+
42
+
43
+ ### add token ("private") parse helpers e.g. _parse_team() etc.
44
+ require_relative 'parser/token_helpers'
45
+
46
+
47
+ require_relative 'parser/lexer_buffer' ## incl. Tokens (aka TokenBuffer)
28
48
  require_relative 'parser/lexer'
49
+ require_relative 'parser/lexer_tty' ## teletype (tty) mode
50
+
29
51
 
30
52
  require_relative 'parser/parser' ## auto-generated by racc (from parser.y)
31
53
  require_relative 'parser/racc_parser'