sportdb-parser 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +17 -4
  4. data/lib/sportdb/parser/lexer-on_goal.rb +172 -0
  5. data/lib/sportdb/parser/lexer-on_group_def.rb +31 -0
  6. data/lib/sportdb/parser/lexer-on_prop_lineup.rb +79 -0
  7. data/lib/sportdb/parser/lexer-on_prop_misc.rb +110 -0
  8. data/lib/sportdb/parser/lexer-on_prop_penalties.rb +40 -0
  9. data/lib/sportdb/parser/lexer-on_round_def.rb +37 -0
  10. data/lib/sportdb/parser/lexer-on_top.rb +125 -0
  11. data/lib/sportdb/parser/lexer-prep_doc.rb +131 -0
  12. data/lib/sportdb/parser/lexer-prep_line.rb +63 -0
  13. data/lib/sportdb/parser/lexer-tokenize.rb +449 -0
  14. data/lib/sportdb/parser/lexer.rb +133 -1363
  15. data/lib/sportdb/parser/lexer_buffer.rb +8 -37
  16. data/lib/sportdb/parser/lexer_token.rb +126 -0
  17. data/lib/sportdb/parser/parser.rb +1104 -1403
  18. data/lib/sportdb/parser/racc_parser.rb +36 -32
  19. data/lib/sportdb/parser/racc_tree.rb +65 -98
  20. data/lib/sportdb/parser/token-date--helpers.rb +130 -0
  21. data/lib/sportdb/parser/token-date--names.rb +108 -0
  22. data/lib/sportdb/parser/token-date.rb +20 -192
  23. data/lib/sportdb/parser/token-date_duration.rb +8 -27
  24. data/lib/sportdb/parser/token-geo.rb +16 -16
  25. data/lib/sportdb/parser/token-goals--helpers.rb +114 -0
  26. data/lib/sportdb/parser/token-goals.rb +103 -249
  27. data/lib/sportdb/parser/token-group.rb +8 -22
  28. data/lib/sportdb/parser/token-prop.rb +138 -124
  29. data/lib/sportdb/parser/token-prop_name.rb +48 -39
  30. data/lib/sportdb/parser/token-round.rb +21 -35
  31. data/lib/sportdb/parser/token-score--helpers.rb +189 -0
  32. data/lib/sportdb/parser/token-score.rb +9 -393
  33. data/lib/sportdb/parser/token-score_full.rb +331 -0
  34. data/lib/sportdb/parser/token-status.rb +44 -46
  35. data/lib/sportdb/parser/token-status_inline.rb +112 -0
  36. data/lib/sportdb/parser/token-text.rb +41 -31
  37. data/lib/sportdb/parser/token-time.rb +29 -26
  38. data/lib/sportdb/parser/token.rb +58 -159
  39. data/lib/sportdb/parser/version.rb +1 -1
  40. data/lib/sportdb/parser.rb +45 -17
  41. metadata +19 -6
  42. data/lib/sportdb/parser/blocktxt.rb +0 -99
  43. data/lib/sportdb/parser/lexer_tty.rb +0 -111
  44. data/lib/sportdb/parser/token-table.rb +0 -149
  45. data/lib/sportdb/parser/token_helpers.rb +0 -92
@@ -7,23 +7,12 @@ class Lexer
7
7
  ## do NOT use (anymore) as generic TEXT_RE
8
8
 
9
9
 
10
-
11
- ## todo - use ANY_RE to token_commons or such - for shared by many?
12
-
13
- ## general catch-all (RECOMMENDED (ALWAYS) use as last entry in union)
14
- ## to avoid advance of pos match!!!
15
- ANY_RE = %r{
16
- (?<any> .)
17
- }ix
18
-
19
-
20
-
21
10
  ## note - TEXT_RE used for TEAM_NAMES
22
11
  ## plus as "legacy" shortcut for (simple) group or round names e.g.
23
12
  ## Group A, Group 1, ..
24
13
  ## Matchday 1, 1. Round,
25
- ## note - no exception for (shortcut) group or round (MUST match team name pattern!)
26
-
14
+ ## note - no exception for (shortcut) group or round (MUST match team name pattern!)
15
+
27
16
 
28
17
 
29
18
  ## note - do NOT allow single alpha text for now
@@ -39,7 +28,7 @@ ANY_RE = %r{
39
28
  # 1 FC ## allow 1-FC or 1FC - why? why not?
40
29
  # 1FC"
41
30
  # 1. FC
42
- # 1.FC
31
+ # 1.FC
43
32
  # 23° Noviembre
44
33
  # 1890 Munich
45
34
  # 1-FC - XXXX - not allowed for now, parse error
@@ -76,7 +65,7 @@ TEXT_RE = %r{
76
65
  ## MUST be followed by (optional dot) and
77
66
  ## required space !!!
78
67
  ## MUST be follow by a to z!!!!
79
- [.°]? ## optional dot (.) or degree(°) - todo - add number sign too!!
68
+ [.°]? ## optional dot (.) or degree(°) - todo - add number sign too!!
80
69
  [ ]? ## make space optional too - why? why not?
81
70
  ## yes - eg. 1st, 2nd, 5th etc.
82
71
  \p{L}+
@@ -91,10 +80,10 @@ TEXT_RE = %r{
91
80
  ## note - exclude (v[ ]/vs[ ]/vs.[ ])
92
81
  ## AND switch to case-sensitive (via -i!!!)
93
82
  (?! (?-i: (?: ## note - (big) V not matching for versus!!!
94
- vs\.?|v|VS|
95
-
96
- n/p|N/P|
97
- w/o|W/O|
83
+ vs\.?|v|
84
+
85
+ n/p|N/P|
86
+ w/o|W/O|
98
87
  abd\.?|ABD|
99
88
  aban\.?|ABAN|
100
89
  susp\.?|SUSP|
@@ -103,20 +92,20 @@ TEXT_RE = %r{
103
92
  po?stp\.?|PO?STP|P-P|
104
93
  x-x|X-X|
105
94
  awd\.?|AWD|
106
- canc\.?|CANC ) [ ]
95
+ canc\.?|CANC ) [ ]
107
96
  |
108
97
  (?: bye|BYE ) (?:[ ]|$))
109
- )
98
+ )
110
99
  )
111
- |
112
- [/-] ## must NOT be surrounded by spaces
100
+ |
101
+ [/-] ## must NOT be surrounded by spaces
113
102
  )?
114
103
  (?:
115
- \p{L}
104
+ \p{L}
116
105
  |
117
106
  (?: ## note - restrict [.&'] to single char usage (no doubled e.g. && etc.)
118
107
  \. (?! \.) ## allow single points only (now two or more etc.)
119
- |
108
+ |
120
109
  & (?! &)
121
110
  |
122
111
  ' (?! ')
@@ -126,11 +115,11 @@ TEXT_RE = %r{
126
115
  \d+
127
116
  (?!
128
117
  [0-9h'+] | ## protected break on 12h / 12' / 1-1
129
- ## check usege for 3+4 - possible? where ? why?
118
+ ## check usege for 3+4 - possible? where ? why?
130
119
  (?:[.:-]\d) ## protected/exclude/break on 12.03 / 12:03 / 12-12
131
120
  ## BUT allow Park21-Arena for example e.g. 21-A :-)
132
121
  )
133
- [°]? ## followed by optional ord
122
+ [°]? ## followed by optional ord
134
123
  ## negative lookahead for numbers
135
124
  ## note - include digits itself!!!
136
125
  ## note - remove / (slash) e.g. allows UDI'19/Beter Bed
@@ -176,15 +165,15 @@ TEXT_RE = %r{
176
165
  # e.g. (AUT) or ,AUT or AUT
177
166
  (?:
178
167
  [ ] ## note - do NOT allow more than one space!!! - why? why not?
179
- \(
168
+ \(
180
169
  ## note - auto-exclude reserved (aet) from SCORE_FULLER_MORE!!!
181
170
  ## plus golden goal (gg)/sudden death (sd), silver goal (sg)
182
- ## (ht), (ft)
171
+ ## (ht), (ft)
183
172
  (?! (?: aet | agget | asdet | asget | ht | ft )
184
173
  \)
185
- )
174
+ )
186
175
  (?:
187
- [A-Z]{1,5}
176
+ [A-Z]{1,5}
188
177
  )
189
178
  \)
190
179
  )
@@ -207,5 +196,26 @@ TEXT_RE = %r{
207
196
  }ix
208
197
 
209
198
 
199
+
200
+
201
+
202
+ ###
203
+ ## helper for testing regex match for team names
204
+
205
+ def self._parse_team( str )
206
+ ## note - strip - leading/trailing spaces
207
+ m = TEXT_RE.match( str.strip )
208
+ if m && m.pre_match == '' && m.post_match == ''
209
+ m
210
+ elsif m
211
+ ## note - match BUT not anchored to start and end-of-string!!!
212
+ ## report, error somehow??
213
+ nil
214
+ else
215
+ nil ## no match - return nil
216
+ end
217
+ end
218
+
219
+
210
220
  end # class Lexer
211
221
  end # module SportDb
@@ -12,7 +12,7 @@ class Lexer
12
12
  # note - optional timezone possible e.g.
13
13
  # 18:30 UTC+1 or 18:30 BST/UTC+1 or such!!!
14
14
  # 18:30 UTC+01 or 18:30 BST/UTC+01
15
- #
15
+ #
16
16
  #
17
17
  # note 18.30 no longer supported - MUST use 18:30 or 18h30 !!!
18
18
  #
@@ -22,31 +22,31 @@ class Lexer
22
22
  # and, thus, must always follow time
23
23
  # e.g. 18:30 (19:30 BST)
24
24
  #
25
- ## local time e.g (19:30 UTC+1) or (19:30 BST/UTC+1) or
25
+ ## local time e.g (19:30 UTC+1) or (19:30 BST/UTC+1) or
26
26
  ## note - timezone is optional! e.g. (19:30) works too
27
27
 
28
28
 
29
29
  TIME_RE = %r{
30
30
  \b
31
- (?<time>
31
+ (?<time>
32
32
  (?<hour>\d{1,2})
33
- [:h]
33
+ [:h]
34
34
  (?<minute>\d{2})
35
-
35
+
36
36
  #### optional (inline) timezone
37
37
  ## note - non-utc timezone MUST be hard-coded (added) here!!!
38
38
  ## avoids eating-up team names (separated by one space)
39
- ## e.g. 18:30 MEX v MEX
39
+ ## e.g. 18:30 MEX v MEX
40
40
  (?:
41
41
  [ ] ## require space - why? why not
42
42
  (?<timezone>
43
- (?:
43
+ (?:
44
44
  ## GMT - Greenwich Mean Time
45
45
  ## BST - British Summer Time
46
46
  ## CES?T - Central European (Summer) Time
47
47
  ## EES?T - Eastern European (Summer) Time
48
48
  ##
49
- (?: GMT|BST|CES?T|EES?T)
49
+ (?: GMT|BST|CES?T|EES?T)
50
50
  (?: /
51
51
  UTC (?: [+-]\d{1,4} | ±0)
52
52
  )?
@@ -57,19 +57,20 @@ TIME_RE = %r{
57
57
  )
58
58
  )
59
59
  )?
60
- )
61
- \b
60
+ )
61
+ \b
62
62
 
63
63
  ####
64
64
  ### note - local time is now INLINE and MUST follow time
65
- (?:
65
+ (?:
66
66
  [ ]+ ## todo/check - make space optional - why? why not?
67
67
  \(
68
- (?<time_local>
68
+ (?<time_local>
69
69
  (?<local_hour>\d{1,2})
70
70
  [:h] ### todo/fix - MUST match style in time above!!!
71
+ ### use capture with backref!!!!
71
72
  (?<local_minute>\d{2})
72
-
73
+
73
74
  ####
74
75
  ## optional "local" timezone name eg. BRT or CEST etc.
75
76
  (?:
@@ -78,16 +79,16 @@ TIME_RE = %r{
78
79
  (?: [A-Z]{3,4}
79
80
  (?: /
80
81
  UTC (?: [+-]\d{1,4} | ±0)
81
- )?
82
+ )?
82
83
  )
83
- |
84
+ |
84
85
  (?: ## e.g. 0 or 00 or 0000
85
86
  UTC (?: [+-]\d{1,4} | ±0)
86
- )
87
+ )
87
88
  )
88
89
  )? # note - make timezone optional!!!
89
90
  )
90
- \)
91
+ \)
91
92
  )?
92
93
  }ix
93
94
 
@@ -98,20 +99,20 @@ def self._build_time( m )
98
99
  ## 12h40 => 12:40 etc.
99
100
  ## keep string (no time-only type in ruby)
100
101
  data = { time: {} }
101
-
102
+
102
103
  hour = m[:hour].to_i(10) ## allow 08/07/etc.
103
104
  minute = m[:minute].to_i(10)
104
-
105
+
105
106
  ## check if 24:00 possible? or only 0:00 (23:59)
106
107
  unless (hour >=0 && hour <=23) &&
107
108
  (minute >=0 && minute <=59)
108
109
  raise ArgumentError, "parse error - time >#{m[:time]}< out-of-range"
109
110
  end
110
-
111
+
111
112
  data[:time][:h] = hour
112
113
  data[:time][:m] = minute
113
- data[:time][:timezone] = m[:timezone] if m[:timezone]
114
-
114
+ data[:time][:timezone] = m[:timezone] if m[:timezone]
115
+
115
116
 
116
117
  ## check if local time present e.g.
117
118
  ## 18:30 (19:30)
@@ -121,21 +122,23 @@ def self._build_time( m )
121
122
 
122
123
  local_hour = m[:local_hour].to_i(10) ## allow 08/07/etc.
123
124
  local_minute = m[:local_minute].to_i(10)
124
-
125
+
125
126
  ## check if 24:00 possible? or only 0:00 (23:59)
126
127
  unless (hour >=0 && hour <=23) &&
127
128
  (minute >=0 && minute <=59)
128
129
  raise ArgumentError, "parse error - local time >#{m[:time_local]}< out-of-range"
129
130
  end
130
-
131
+
131
132
  data[:time_local][:h] = local_hour
132
133
  data[:time_local][:m] = local_minute
133
- data[:time_local][:timezone] = m[:local_timezone] if m[:local_timezone]
134
- end
134
+ data[:time_local][:timezone] = m[:local_timezone] if m[:local_timezone]
135
+ end
135
136
 
136
137
  data
137
138
  end
138
139
  def _build_time(m) self.class._build_time(m); end
139
140
 
141
+
142
+
140
143
  end # class Lexer
141
144
  end # module SportDb
@@ -5,25 +5,6 @@ class Lexer
5
5
 
6
6
 
7
7
 
8
- BASICS_RE = %r{
9
- (?<vs>
10
- (?<=[ ]) # positive lookbehind for space
11
- (?-i:
12
- vs\.?|v|VS
13
- ) # note - only match case sensitive (downcased letters)!!!
14
- # note - bigger match first e.g. vs than v etc.
15
- (?=[ ]) # positive lookahead for space
16
- )
17
- |
18
- (?<spaces> [ ]{2,}) |
19
- (?<space> [ ])
20
- |
21
- (?<sym> [,;/@|()\[\]-] ) ### note: add parantheses too e.g () - why? why not?
22
- }ix
23
-
24
-
25
-
26
-
27
8
  ###
28
9
  ## add att(endance) e.g. att: 18000
29
10
  ##
@@ -32,7 +13,8 @@ BASICS_RE = %r{
32
13
  ATTENDANCE_RE = %r{
33
14
  (?<attendance>
34
15
  \b
35
- att: [ ]*
16
+ (?: attendance|att )
17
+ : [ ]*
36
18
  (?<value>
37
19
  [1-9]
38
20
  (?: _? \d+ )*
@@ -41,125 +23,51 @@ ATTENDANCE_RE = %r{
41
23
  )}ix
42
24
 
43
25
 
44
- ## "inline" match status e.g.
45
- ## Clapham Rovers w/o Hitchin
46
- ## Queen's Park bye
47
-
48
- ## add support for WO or W-0 too - why? why not?
49
- INLINE_WO_RE = %r{
50
- (?<inline_wo>
51
- \b (?: w/o | W/O ) \b
52
- )}x ## note - NOT case insensitive
53
-
54
- INLINE_BYE_RE = %r{
55
- (?<inline_bye>
56
- \b (?: bye | BYE ) \b
57
- )}x ## note - NOT case insensitive
58
26
 
59
27
 
60
28
  ###
61
- # A n/p B (note - basically a inline short form of A v B [cancelled] )
62
- # N/P
63
- INLINE_NP_RE = %r{
64
- (?<inline_np>
65
- \b (?: n/p | N/P ) \b
66
- )}x ## note - NOT case insensitive
29
+ ## home/away/neutral - (h), (a), (n)
30
+ ## add support for h/a/n
31
+ ## with (?-i \b [han] \b) lower-case and \b boundry - why? why not?
67
32
 
33
+ TEAM_HOME_RE = %r{ (?<team_home> \(h\) )}ix
34
+ TEAM_AWAY_RE = %r{ (?<team_away> \(a\) )}ix
35
+ TEAM_NEUTRAL_RE = %r{ (?<team_neutral> \(n\) )}ix
68
36
 
69
- ###
70
- # abd/abd. or aban/aban. [abandoned]
71
- # ABD/ABAN
72
- INLINE_ABD_RE = %r{
73
- (?<inline_abd>
74
- \b (?: abd\.? |
75
- aban\.? |
76
- ABD | ABAN
77
- )
78
- ## POSITIVE lookahead - requires space
79
- (?= [ ])
80
- )}x ## note - NOT case insensitive
81
-
82
- ####
83
- # susp/susp. [suspended]
84
- # SUSP
85
- INLINE_SUSP_RE = %r{
86
- (?<inline_susp>
87
- \b (?: susp\.? |
88
- SUSP )
89
- ## POSITIVE lookahead - requires space
90
- (?= [ ])
91
- )}x ## note - NOT case insensitive
92
-
93
-
94
- ####
95
- # ppd/ppd. or pst/pst. or pstp/pstp. or postp/postp. [postponed]
96
- # PPD/PSTP/POSTP/P-P
97
- # todo/check - add/allow p-p too - why? why not?
98
- INLINE_PPD_RE = %r{
99
- (?<inline_ppd>
100
- \b (?: ppd\.? |
101
- pst\.? |
102
- po?stp\.? |
103
- PPD | PST | PO?STP | P-P
104
- )
105
- ## POSITIVE lookahead - requires space
106
- (?= [ ])
107
- )}x ## note - NOT case insensitive
108
-
109
- ####
110
- # void via x-x X-X
111
- # todo/check - only allow X-X - why? why not?
112
- INLINE_VOID_RE = %r{
113
- (?<inline_void>
114
- \b (?: x-x |
115
- X-X
116
- )
117
- ## POSITIVE lookahead - requires space
118
- (?= [ ])
119
- )}x ## note - NOT case insensitive
120
-
121
-
122
- ####
123
- # awd/awd. [awarded]
124
- # AWD
125
- # note - recommendation is to allways include score
126
- # thus, use/prefer SCORE_AWD e.g. 0-3 awd
127
- INLINE_AWD_RE = %r{
128
- (?<inline_awd>
129
- \b (?: awd\.? | AWD )
130
- ## POSITIVE lookahead - requires space
131
- (?= [ ])
132
- )}x ## note - NOT case insensitive
133
37
 
134
- ###
135
- # canc/canc. [cancelled]
136
- # CANC
137
- INLINE_CANC_RE = %r{
138
- (?<inline_canc>
139
- \b (?: canc\.? | CANC )
140
- ## POSITIVE lookahead - requires space
141
- (?= [ ])
142
- )}x ## note - NOT case insensitive
143
38
 
144
39
 
145
- ###
146
- ## home/away/neutral - (h), (a), (n)
147
- ## add support for h/a/n
148
- ## with (?-i \b [han] \b) lower-case and \b boundry - why? why not?
149
40
 
150
- TEAM_HOME_RE = %r{ (?<team_home> \(h\) )}xi
151
- TEAM_AWAY_RE = %r{ (?<team_away> \(a\) )}xi
152
- TEAM_NEUTRAL_RE = %r{ (?<team_neutral> \(n\) )}xi
41
+ ##
42
+ ## note VS
43
+ ## remove VS for now
44
+ ## e.g. Olympia Wijgmaal v VS Kortenaken
45
+
46
+
47
+
48
+ ## note - only match case sensitive (downcased letters)!!!
49
+ ## note - bigger match first e.g. vs than v etc.
50
+ VS_RE = %r{
51
+ (?<vs>
52
+ (?<=[ ]) # positive lookBEHIND for space
53
+ (?-i:
54
+ vs\.?|v
55
+ )
56
+ (?=[ ]) # positive lookAHEAD for space
57
+ )
58
+ }ix
59
+
153
60
 
154
61
 
155
62
 
63
+ ##############
156
64
  ## "top-level" regex used for:
157
65
  ## - date_header
158
66
  ## - match_header & match_line_more
159
67
  ## - match_line
160
68
 
161
-
162
69
  RE = Regexp.union(
70
+ SPACES_RE,
163
71
  STATUS_RE, ## match status e.g. [cancelled], etc.
164
72
 
165
73
  INLINE_WO_RE, ## (inline) match status - w/o (walkout)
@@ -167,11 +75,11 @@ RE = Regexp.union(
167
75
  INLINE_BYE_RE, ## (inline) match status - bye (advance to next round)
168
76
  INLINE_ABD_RE, ## (inline) match status - abd/abd. (abandoned)
169
77
  INLINE_SUSP_RE, ## (inline) match status - susp/susp. (suspended)
170
- INLINE_PPD_RE, ## (inline) match status - ppd/ppd. or pstp/pstp. or postp/postp. or p-p (postponed)
171
- INLINE_VOID_RE, ## (inline) match status - x-x (voided)
78
+ INLINE_PPD_RE, ## (inline) match status - ppd/ppd. or pstp/pstp. or postp/postp. or p-p (postponed)
79
+ INLINE_VOID_RE, ## (inline) match status - x-x (voided)
172
80
  INLINE_AWD_RE, ## (inline) match status - awd/awd. (awarded)
173
81
  INLINE_CANC_RE, ## (inline) match status - canc/canc. (cancelled/canceled)
174
-
82
+
175
83
 
176
84
  TEAM_HOME_RE, ## (H)
177
85
  TEAM_AWAY_RE, ## (A)
@@ -180,26 +88,25 @@ RE = Regexp.union(
180
88
  NOTE_RE, ### fix - change to INLINE_NOTE !!!
181
89
  DATE_LEGS_RE, # note - must go before date!!!
182
90
  DATE_RE, ## note - date must go before time (e.g. 12.12. vs 12.12)
183
- TIME_RE,
184
- ATTENDANCE_RE, # note - allow att: for now inline in matches too - why? why not?
91
+ TIME_RE,
92
+
93
+ ATTENDANCE_RE, # note - allow att: for now inline in matches too - why? why not?
94
+
95
+ SCORE_FULL_1ST_RE, # note - MUST go before SCORE_LEGS_RE!!
96
+ ## e.g. 2-2, 5-1 pen.
185
97
  SCORE_LEGS_RE,
186
- SCORE_FULL_RE,
98
+ SCORE_FULL_RE,
187
99
  SCORE_FULLER_RE,
188
100
  SCORE_FULLER_MORE_RE,
189
101
  SCORE_AWD_RE, # (inline) score awarded e.g. 3-0 awd or 0-1 awd. etc.
190
102
  SCORE_ABD_RE, # (inline) score abandoned e.g. 2-1 abd.
191
103
  SCORE_RE, ## note basic score e.g. 1-1 must go after SCORE_FULL_RE!!!
192
-
193
- ## note - add "experimental" "split" scores for now
194
- SCORE_TEAM_RE, ## e.g. (2) 1 for "split" scores
195
- SCORE_TEAM_PEN_RE, ## e.g. 1 (2)
196
-
197
- BASICS_RE,
104
+
105
+ VS_RE,
106
+
198
107
  TEXT_RE,
199
- ## note - score_team_num (e.g. 0 or 10 etc.)
200
- ## MUST BE after TEXT
201
- ## only match if nothing else matches (expect ANY)
202
- SCORE_TEAM_NUM_RE, ## e.g. 0 or 1 or 9 or 11 etc. (<100)
108
+
109
+ %r{ (?<sym> [,@()-] ) }x, ## todo - check if "standalone" comma (,) in use?
203
110
  ANY_RE,
204
111
  )
205
112
 
@@ -210,25 +117,25 @@ RE = Regexp.union(
210
117
 
211
118
  ## ord (for ordinal number)
212
119
  ## e.g. (51) or (1) etc. - limit digits of number - why? why not???
213
-
120
+
214
121
  START_WITH_ORD = %r{
215
- \A
122
+ \A
216
123
  [ ]* ## ignore leading spaces (if any)
217
124
  (?<ord>
218
- \(
219
- (?<value>\d+)
125
+ \(
126
+ (?<value>\d+)
220
127
  \)
221
128
  )}ix
222
129
 
223
130
 
224
- ###
131
+ ###
225
132
  ## e.g. 1930, 1986, 2002, 2010, 2022, 2026
226
133
  ## note - only YYYY
227
134
  ## note - look out for clubs like 1860 München (de) !!!
228
135
  ## 1899 Hoffenheim (de)
229
136
  ## 1896 Löwenherz (ch - a.k.a. FC Winterthur ??)
230
137
  ## any others starting with YYYY ?!
231
- ## note - YEAR requires TWO (trailing) spaces !!!!! e.g.
138
+ ## note - YEAR requires TWO (trailing) spaces !!!!! e.g.
232
139
  ## 1930 Uruguay 4-2 Argentina
233
140
  ## 1934 Italy 2-1 Czechoslovakia (AET)
234
141
  ## 2022 Argentina 3-3 France (AET, 4-2 pen)
@@ -243,17 +150,17 @@ START_WITH_YEAR = %r{
243
150
  (?<year>
244
151
  \d{4}
245
152
  )
246
- ## positive lookahead
247
- (?= [ ]{2} | ## min. TWO spaces or
153
+ ## positive lookahead
154
+ (?= [ ]{2} | ## min. TWO spaces!!! or
248
155
  [ ]@ | ## space with geo marker or
249
156
  [ ]* \z ## year (date) header (end-of-line/string)
250
- )
157
+ )
251
158
  }x
252
159
 
253
160
 
254
161
 
255
162
  ###
256
- ## check for headings
163
+ ## check for headings
257
164
  ## e.g. = heading 1
258
165
  ## == heading 2 etc.
259
166
  ## =Eurochampionship=
@@ -265,28 +172,20 @@ START_WITH_YEAR = %r{
265
172
 
266
173
  HEADING_RE = %r{ \A
267
174
  [ ]* ## ignore leading spaces (if any)
268
- (?<heading_marker> ={1,6} )
175
+ (?<heading_marker> ={1,6} )
269
176
  [ ]*
270
177
  (?<heading>
271
178
  ## must start with letter - why? why not?
272
179
  ### 1st round
273
- ## allow numbers e.g. Group A - 1
274
- [^=]+? ## use non-greedy
180
+ ## allow numbers e.g. Group A - 1
181
+ [^=]+? ## use non-greedy
275
182
  )
276
183
  [ ]* ## ignore trailing spaces (if any)
277
- (?: =* ) ## allow any trailing heading markers
184
+ (?: =*) ## allow any trailing heading markers
278
185
  [ ]* ## ignore trailing spaces (if any)
279
186
  \z
280
187
  }ix
281
188
 
282
189
 
283
- HRULER_RE = %r{
284
- \A
285
- [ ]* ## ignore leading spaces (if any)
286
- -{3,} ## must be at least three dashes!!!
287
- [ ]* ## ignore trailing spaces (if any)
288
- \z
289
- }ix
290
-
291
190
  end # class Lexer
292
191
  end # module SportDb
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 7
7
- PATCH = 1
7
+ PATCH = 2
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version