sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
@@ -7,191 +7,188 @@ class Lexer
7
7
  ## add more variants - why? why not?
8
8
 
9
9
 
10
+ POSTPONED = %Q{ (?<postponed> postponed | pst\\.? | po?stp\\.? | ppd\\.? ) }
11
+ CANCELED = %Q{ (?<canceled> cancell?ed | canc\\.? ) } ## add can/can. - why? why not?
12
+ WALKOVER = %Q{ (?<walkover> walkover | w/o | wo ) } ## add o/w too - why? why not?
13
+ AWARDED = %Q{ (?<awarded> awarded | awd\\.? ) }
14
+ SUSPENDED = %Q{ (?<suspended> suspended | susp\\.? ) }
15
+ ABANDONED = %Q{ (?<abandoned> abandoned | aban\\.? | abd\\.? ) }
16
+ ANNULLED = %Q{ (?<annulled> annulled ) }
17
+ VOIDED = %Q{ (?<voided> voided | void ) } ### note - alternative (name) to annulled
18
+
19
+ REPLAY = %Q{ (?<replay> replay | repl\\.? ) }
20
+
21
+
22
+ ##
23
+ ## note - status_note incl. complete text incl. <status> (not normalized)
24
+ ## <status> gets normalized e.g. ppt => postponed etc.
25
+
10
26
  STATUS_RE = %r{
11
27
  \[
12
28
  (?:
13
- ### opt 1 - allow long forms with note/comment for some stati
14
- (?: (?<status> awarded
15
- ## e.g. [awarded match to Leones Negros by undue alignment; original result 1-2]
16
- ## [awarded 3-0 to Cafetaleros by undue alignment; originally ended 2-0]
17
- ## [awarded 3-0; originally 0-2, América used ineligible player (Federico Viñas)]
29
+ #############################################
30
+ ### opt 1 - allow long forms with note/comment for some stati
31
+ ## e.g. [postponed due to tropical storm "Hanna"]
32
+ ## [suspended at 84' by storm; result stood]
33
+ #########################
34
+ (?: (?<status_note>
35
+ (?<status>
36
+ ####################
37
+ ## pre-match (not played)
38
+ #{POSTPONED}
39
+ |
40
+ #{CANCELED}
41
+ |
42
+ #{WALKOVER}
43
+ |
44
+ ######################
45
+ ## pre/post match
46
+ #{AWARDED}
18
47
  |
19
- annulled
48
+ ########################
49
+ ## post match - (partially) played
50
+ #{SUSPENDED}
51
+ |
52
+ #{ABANDONED}
20
53
  |
21
- abandoned
22
- ## e.g. [abandoned at 1-1 in 65' due to cardiac arrest Luton player Tom Lockyer]
23
- ## [abandoned at 0-0 in 6' due to waterlogged pitch]
24
- ## [abandoned at 5-0 in 80' due to attack on assistant referee by Cerro; result stood]
25
- ## [abandoned at 1-0 in 31']
26
- ## [abandoned at 0-1' in 85 due to crowd trouble]
54
+ #{ANNULLED}
27
55
  |
28
- postponed
29
- ## e.g. [postponed due to problems with the screen of the stadium]
30
- ## [postponed by storm]
31
- ## [postponed due to tropical storm "Hanna"]
32
- ## [postponed from Sep 10-12 due to death Queen Elizabeth II]
33
- |
34
- suspended
35
- ## e.g. [suspended at 0-0 in 12' due to storm]
36
- ## [suspended at 84' by storm; result stood]
37
- |
38
- verified
39
- ## e.g. [verified 2:0 wo.]
40
-
41
-
42
- ) [ ;,]* (?<status_note> [^\]]+ )
43
- [ ]*
56
+ #{VOIDED} ### note - alternative to annulled
57
+ ) ## end-of-<status>
58
+ [ :;,-]+ ## leading spaces (or separators)
59
+ [^\]]+? ## note - add non-greedy match
60
+ ) ## end-of-<status-note>
61
+ [ ]* ## eat-up optional trailing spaces
44
62
  )
45
- |
46
-
47
- ## opt 2 - short from only (no note/comments)
63
+ |
64
+ ########################################
65
+ ## opt 2 - short form only (no note/comments) e.g. [postponed], [Canceled], etc.
66
+ ####################################
48
67
  (?<status>
49
- cancelled|canceled|can\.
68
+ ####################
69
+ ## pre-match (not played)
70
+ #{POSTPONED}
50
71
  |
51
- abandoned|abd\.
72
+ #{CANCELED}
52
73
  |
53
- postponed
74
+ #{WALKOVER}
54
75
  |
55
- awarded|awd\.
76
+ ######################
77
+ ## pre/post match
78
+ #{AWARDED}
56
79
  |
57
- walkover|w/o ## add o/w too - why? why not?
80
+ ########################
81
+ ## post match - (partially) played
82
+ #{SUSPENDED}
58
83
  |
59
- replay
84
+ #{ABANDONED}
60
85
  |
61
- annulled
86
+ #{ANNULLED}
62
87
  |
63
- suspended ### todo/fix - add status upstream - why? why not?
64
- ### move to note(s) - do NOT interpret as status - why? why not?
88
+ #{VOIDED} ### note - alternative to annulled
65
89
  |
66
- verified ### todo/fix - add status upstream (same as ??) - why? why not?
67
- ### move to note(s) - do NOT interpret as status - why? why not?
90
+ #{REPLAY} ### todo/fix - keep replay - why? why not?
91
+ ### prefer replay in round e.g.
92
+ ## ▪ Round 17, Replay
93
+ ## ▪ Semi-finals, Replays
68
94
  )
69
95
  )
70
96
  \]
71
97
  }ix
72
98
 
73
99
 
100
+ def self._build_status( m )
101
+ status = {}
102
+ ## note - norm status text - why? why not?
103
+ status[:status] = if m[:postponed] then 'postponed'
104
+ elsif m[:canceled] then 'canceled'
105
+ elsif m[:walkover] then 'walkover'
106
+ elsif m[:awarded] then 'awarded'
107
+ elsif m[:suspended] then 'suspended'
108
+ elsif m[:abandoned] then 'abandoned'
109
+ elsif m[:annulled] ||
110
+ m[:voided] then 'annulled'
111
+ elsif m[:replay] then 'replay'
112
+ else ## fallback on "generic" status (shouldn't happen)
113
+ m[:status]
114
+ end
74
115
 
75
-
76
- ###
77
- ## todo/fix - move to token-note.rb (standalone) file
78
-
79
- NOTE_RE = %r{
80
- \[
81
- (?<note>
82
- (?: ## starting with ___ PLUS requiring more text
83
- (?:
84
- nb:
85
- ## e.g. [NB: between top-8 of regular season]
86
- # [NB: América, Morelia and Tigres qualified on better record regular season]
87
- # [NB: Celaya qualified on away goals]
88
- # [NB: Alebrijes qualified on away goal]
89
- # [NB: Leones Negros qualified on away goals]
90
- #
91
- # todo/fix:
92
- # add "top-level" NB: version
93
- ## with full (end-of) line note - why? why not?
94
- |
95
- rescheduled
96
- ## e.g. [rescheduled due to earthquake occurred in Mexico on September 19]
97
- |
98
- declared
99
- ## e.g. [declared void]
100
- |
101
- remaining
102
- ## e.g. [remaining 79']
103
- ## [remaining 84']
104
- ## [remaining 59']
105
- ## [remaining 5']
106
- )
107
- [ ]
108
- [^\]]+? ## slurp all to next ] - (use non-greedy)
109
- )
110
- )
111
- \]
112
- }ix
113
-
114
-
115
-
116
- SCORE_NOTE_RE = %r{
117
- \[
118
- (?<score_note>
119
- (?: # plain aet e.g. [aet]
120
- aet | a\.e\.t\. |
121
- after [ ] extra [ -] time
122
- )
123
- |
124
- (?: # plain penalties e.g. [3-2 pen]
125
- \d{1,2}-\d{1,2}
126
- [ ]* (?: p|pen )
127
- )
128
- |
129
- (?: # plain aet with penalties e.g. [aet; 4-3 pen] or [aet, 4-3p]
130
- aet [ ]* [,;]
131
- [ ]*
132
- \d{1,2}-\d{1,2}
133
- [ ]* (?: p|pen )
134
- )
135
- |
136
- (?:
137
- ## e.g. Spain wins on penalties
138
- ## 1860 München wins on penalties etc.
139
- ## must start with digit 1-9 or letter
140
- ## todo - add more special chars - why? why not?
141
- ##
142
- (?:
143
- aet [ ]* ## allow space here - why? why not
144
- [,;][ ]
145
- )?
146
-
147
- (?:
148
- (?: # opt 1 - no team listed/named - requires score
149
- (?: won|wins? ) [ ] ## note - allow won,win or wins
150
- (?: ## score
151
- \d{1,2}-\d{1,2}
152
- [ ]
153
- )
154
- on [ ] (?: pens | penalties |
155
- aggregate )
156
- )
157
- |
158
- (?: # opt 2 - team required; score optional
159
- (?: ## team required
160
- [1-9\p{L}][0-9\p{L} .-]+?
161
- [ ]
162
- )
163
- (?: won|wins? ) [ ] ## won/win/wins
164
- (?: ## score optional
165
- \d{1,2}-\d{1,2}
166
- [ ]
167
- )?
168
- on [ ] (?: pens | penalties |
169
- aggregate )
170
- ### [^\]]*? ## allow more? use non-greedy
171
- )
172
- ))
173
- |
174
- (?: ## e.g. agg 3-2 etc.
175
- agg [ ] \d{1,2}-\d{1,2}
176
- )
177
- |
178
- (?: ## e.g. agg 4-4, Ajax win on away goals
179
- (?: ## agg 4-4, optional for now - why? why not?
180
- agg [ ] \d{1,2}-\d{1,2}
181
- [ ]*[,;][ ]
182
- )?
183
- (?: ## team required
184
- [1-9\p{L}][0-9\p{L} .-]+?
185
- [ ]
186
- )
187
- (?: won|wins? ) [ ] # won/win/wins
188
- on [ ] away [ ] goals
189
- )
190
- ) # score_note ref
191
- \]
192
- }ix
116
+ ## includes note? e.g. awarded; originally 2-0
117
+ status[:status_note] = m[:status_note] if m[:status_note]
118
+
119
+ status
120
+ end
121
+ def _build_status( m ) self.class._build_status( m ); end
193
122
 
194
123
 
195
124
  end # class Lexer
196
125
  end # module SportDb
126
+
127
+
128
+ __END__
129
+
130
+
131
+
132
+
133
+ ####################
134
+ ## pre-match (not played)
135
+ postponed|postp\.|ppd\.
136
+ ## e.g. [postponed due to problems with the screen of the stadium]
137
+ ## [postponed by storm]
138
+ ## [postponed due to tropical storm "Hanna"]
139
+ ## [postponed from Sep 10-12 due to death Queen Elizabeth II]
140
+
141
+ cancell?ed|canc.\
142
+
143
+ walkover|w/o|wo
144
+ ## A victory awarded to one team because the opponent was unable
145
+ ## or unwilling to compete (e.g., failing to show up or being disqualified).
146
+ ## -or-
147
+ ## A walkover or "win over" reveals when a team has won a game
148
+ ## without it being played.
149
+ ## -or-
150
+ ## see <https://en.wikipedia.org/wiki/Walkover>
151
+
152
+
153
+
154
+ ######################
155
+ ## pre/post match
156
+ awarded|awd\.
157
+
158
+ ## e.g. [awarded match to Leones Negros by undue alignment; original result 1-2]
159
+ ## [awarded 3-0 to Cafetaleros by undue alignment; originally ended 2-0]
160
+ ## [awarded 3-0; originally 0-2, América used ineligible player (Federico Viñas)]
161
+
162
+ ## A result that is decided by a governing body
163
+ ## (like FIFA or a domestic league) rather than by the play on the pitch.
164
+ ## Usually follows a Forfeit or Walkover.
165
+ ## If a team refuses to play, abandons a match, or fields an ineligible player,
166
+ ## the opponent is typically awarded a 3-0 victory.
167
+
168
+ ########################
169
+ ## post match - (partially) played
170
+ suspended|susp\.
171
+
172
+ ## e.g. [suspended at 0-0 in 12' due to storm]
173
+ ## [suspended at 84' by storm; result stood]
174
+
175
+ ## The match is temporarily halted but intended to be resumed or restarted later.
176
+
177
+ abandoned|aban.\|abd\.
178
+
179
+ ## e.g. [abandoned at 1-1 in 65' due to cardiac arrest
180
+ ## Luton player Tom Lockyer]
181
+ ## [abandoned at 0-0 in 6' due to waterlogged pitch]
182
+ ## [abandoned at 5-0 in 80' due to attack
183
+ ## on assistant referee by Cerro; result stood]
184
+ ## [abandoned at 1-0 in 31']
185
+ ## [abandoned at 0-1' in 85 due to crowd trouble]
186
+
187
+ ## The match started but was stopped by the referee before the final whistle
188
+ ## (e.g., due to a waterlogged pitch or player injury) and did not resume
189
+
190
+ annulled OR voided|void
191
+ ## The match result is struck from the record entirely,
192
+ ## usually due to a team's withdrawal from the league or a severe rule violation.
193
+
197
194
 
@@ -0,0 +1,149 @@
1
+ module SportDb
2
+ class Lexer
3
+
4
+
5
+ ###
6
+ ## check for
7
+ ## table (standing) lines
8
+ ##
9
+ ## e.g.
10
+ ##
11
+ ## Pld W D L GF-GA Pts | d d d d-d d
12
+ ## Pld GF-GA Pts | d d-d d
13
+ ## Pld Pts W D L GF-GA | d d d d d d-d
14
+ ##
15
+ ## Pld = matches played
16
+ ## GF-GA = goal for, goal against
17
+
18
+
19
+ ## Pld W D L GF-GA Pts | d d d d-d d
20
+ ##
21
+ ## 1.BRAZIL 3 2 1 0 7- 2 7
22
+ ## 2.MEXICO 3 2 1 0 4- 1 7
23
+ ## 3.Croatia 3 1 0 2 6- 6 3
24
+ ## 4.Cameroon 3 0 0 3 1- 9 0
25
+
26
+ ## add more headings?? e.g.
27
+ ## Final Table:
28
+ ##
29
+
30
+
31
+ TABLE_HEADING_I_RE = %r{
32
+ \A
33
+ [ ]* ## ignore leading spaces (if any)
34
+ (?<table_heading>
35
+ \b
36
+ P(?:ld)? [ ]+
37
+ W [ ]+
38
+ D [ ]+
39
+ L [ ]+
40
+ Gls [ ]+
41
+ Pts
42
+ \b
43
+ )
44
+ [ ]* ## ignore trailing spaces (if any)
45
+ \z
46
+ }xi
47
+
48
+
49
+ ##
50
+ ## "solid"-style
51
+ ## -----------------------------------------------------
52
+ ## "dashed"-style ??
53
+ ## - - - - - - - - - - - - - - - - - - - - - - - - - - -
54
+
55
+
56
+ TABLE_DIVIDER_RE = %r{
57
+ \A
58
+ [ ]* ## ignore leading spaces (if any)
59
+ (?<table_divider>
60
+ (?: --- ## note - require three dashes minimum (---)
61
+ [-]*
62
+ )
63
+ |
64
+ (?: - [ ]+ - [ ]+ - ## note - require three dashes minimum (- - -)
65
+ (?: [ ]+ -)* ## todo/check - restrict spaces to 2 or 3 or such - why? why not?
66
+ )
67
+ )
68
+ [ ]* ## ignore trailing spaces (if any)
69
+ \z
70
+ }xi
71
+
72
+
73
+ ####
74
+ ## 1.SOLOMON I. 1 1 0 0 3- 1 3
75
+ ## 2.TAHITI 1 0 0 1 1- 3 0
76
+ ## -.Cook Islands withdrew after first match (annulled) due to Covid-19 outbreak in squad
77
+ ## -.Vanuatu withdrew before playing any matches due to Covid-19 outbreak in squad -->
78
+ ##
79
+ ## note - starting with -. is a table note!!!
80
+
81
+
82
+ TABLE_NOTE_RE = %r{
83
+ \A
84
+ [ ]* ## ignore leading spaces (if any)
85
+ -\.
86
+ [ ]*
87
+ (?<table_note>
88
+ .+? ## note - use non-greedy
89
+ )
90
+ [ ]* ## ignore trailing spaces (if any)
91
+ \z
92
+ }xi
93
+
94
+ TABLE_I_RE = %r{
95
+ (?<table>\b
96
+ \d{1,2} [ ]+ # Pld
97
+ \d{1,2} [ ]+ # W
98
+ \d{1,2} [ ]+ # D
99
+ \d{1,2} [ ]+ # L
100
+ (?: \d{1,3} - [ ]* \d{1,3} [ ]+ ) # GF-GA
101
+ \d{1,3} # Pts
102
+ \b
103
+ )}xi
104
+
105
+ ## Pld Pts W D L GF-GA | d d d d d d-d
106
+ ##
107
+ ## 1. ARG^ 3 6 3 0 0 10-4
108
+ ## 2. CHI 3 4 2 0 1 5-3
109
+ ## 3. FRA 3 2 1 0 2 4-3
110
+ ## 4. MEX 3 0 0 0 3 4-13
111
+
112
+ TABLE_II_RE = %r{
113
+ (?<table>\b
114
+ \d{1,2} [ ]+ # Pld
115
+ \d{1,3} [ ]+ # Pts
116
+ \d{1,2} [ ]+ # W
117
+ \d{1,2} [ ]+ # D
118
+ \d{1,2} [ ]+ # L
119
+ (?: \d{1,3} - [ ]* \d{1,3}) # GF-GA
120
+ \b
121
+ )}xi
122
+
123
+
124
+
125
+ #############################################
126
+ # map tables
127
+ # note: order matters; first come-first matched/served
128
+
129
+ ## possible start lines for a table
130
+ ## excludes NOTE
131
+ ## and RULER (e.g. --- or) or such in the future
132
+ TABLE_RE = Regexp.union(
133
+ TABLE_HEADING_I_RE,
134
+ TABLE_I_RE,
135
+ TABLE_II_RE,
136
+ )
137
+
138
+ ## all possible continuation for a table
139
+ ## excludes HEADING
140
+ TABLE_MORE_RE = Regexp.union(
141
+ TABLE_NOTE_RE,
142
+ TABLE_DIVIDER_RE,
143
+ TABLE_I_RE,
144
+ TABLE_II_RE,
145
+ )
146
+
147
+
148
+ end # class Lexer
149
+ end # module SportDb
@@ -2,6 +2,11 @@ module SportDb
2
2
  class Lexer
3
3
 
4
4
 
5
+ ##
6
+ ## todo/fix - change TEXT_RE to TEAM_RE !!!!
7
+ ## do NOT use (anymore) as generic TEXT_RE
8
+
9
+
5
10
 
6
11
  ## todo - use ANY_RE to token_commons or such - for shared by many?
7
12
 
@@ -13,6 +18,14 @@ ANY_RE = %r{
13
18
 
14
19
 
15
20
 
21
+ ## note - TEXT_RE used for TEAM_NAMES
22
+ ## plus as "legacy" shortcut for (simple) group or round names e.g.
23
+ ## Group A, Group 1, ..
24
+ ## Matchday 1, 1. Round,
25
+ ## note - no exception for (shortcut) group or round (MUST match team name pattern!)
26
+
27
+
28
+
16
29
  ## note - do NOT allow single alpha text for now
17
30
  ## add later?? A - B C - D - why?
18
31
  ## opt 1) one alpha
@@ -24,11 +37,13 @@ ANY_RE = %r{
24
37
  ### allow special case - starting text with number e.g.
25
38
  ## number must be follow by space or dot ()
26
39
  # 1 FC ## allow 1-FC or 1FC - why? why not?
40
+ # 1FC"
27
41
  # 1. FC
28
- # 1.FC - XXXX - not allowed for now, parse error
29
- # 1FC - XXXX - now allowed for now, parse error
42
+ # 1.FC
43
+ # 23° Noviembre
30
44
  # 1890 Munich
31
- #
45
+ # 1-FC - XXXX - not allowed for now, parse error
46
+ # 1/FC - XXXX - not allowed for now
32
47
 
33
48
 
34
49
  ##
@@ -61,28 +76,37 @@ TEXT_RE = %r{
61
76
  ## MUST be followed by (optional dot) and
62
77
  ## required space !!!
63
78
  ## MUST be follow by a to z!!!!
64
- \.? ## optional dot
79
+ [.°]? ## optional dot (.) or degree(°) - todo - add number sign too!!
65
80
  [ ]? ## make space optional too - why? why not?
66
81
  ## yes - eg. 1st, 2nd, 5th etc.
67
82
  \p{L}+
68
83
  |
69
- ## opt 3 - add weirdo case
70
- ## e.g. 1/8 Finals 1/4 1/2 ...
71
- 1/ \d{1,2} [ ] \p{L}+
72
- |
73
- ## opt 4 - add another weirdo case
84
+ ## opt 3 - add another weirdo case
74
85
  ## e.g. 's Gravenwezel-Schilde
75
- '[s]
76
- |
77
- ## opt 5 - add another weirdo case
78
- ## e.g. 5.-8. Platz Playoffs - keep - why? why not?
79
- \d+\.-\d+\. [ ]? \p{L}+
86
+ '[s] [ ] \p{L}+
80
87
  )
81
88
 
82
- (?:(?: (?:[ ] # only single spaces allowed inline!!!
83
- (?! (?-i: vs?[ ])
84
- ) ## note - exclude (v[ ]/vs[ ])
85
- ## AND switch to case-sensitive (via -i!!!)
89
+
90
+ (?:(?: (?:[ ] # only single spaces allowed inline!!!
91
+ ## note - exclude (v[ ]/vs[ ]/vs.[ ])
92
+ ## AND switch to case-sensitive (via -i!!!)
93
+ (?! (?-i: (?: ## note - (big) V not matching for versus!!!
94
+ vs\.?|v|VS|
95
+
96
+ n/p|N/P|
97
+ w/o|W/O|
98
+ abd\.?|ABD|
99
+ aban\.?|ABAN|
100
+ susp\.?|SUSP|
101
+ ppd\.?|PPD|
102
+ pst\.?|PST|
103
+ po?stp\.?|PO?STP|P-P|
104
+ x-x|X-X|
105
+ awd\.?|AWD|
106
+ canc\.?|CANC ) [ ]
107
+ |
108
+ (?: bye|BYE ) (?:[ ]|$))
109
+ )
86
110
  )
87
111
  |
88
112
  [/-] ## must NOT be surrounded by spaces
@@ -90,7 +114,13 @@ TEXT_RE = %r{
90
114
  (?:
91
115
  \p{L}
92
116
  |
93
- [.&'°]
117
+ (?: ## note - restrict [.&'] to single char usage (no doubled e.g. && etc.)
118
+ \. (?! \.) ## allow single points only (now two or more etc.)
119
+ |
120
+ & (?! &)
121
+ |
122
+ ' (?! ')
123
+ )
94
124
  |
95
125
  (?:
96
126
  \d+
@@ -98,8 +128,9 @@ TEXT_RE = %r{
98
128
  [0-9h'+] | ## protected break on 12h / 12' / 1-1
99
129
  ## check usege for 3+4 - possible? where ? why?
100
130
  (?:[.:-]\d) ## protected/exclude/break on 12.03 / 12:03 / 12-12
101
- ## BUT allow Park21-Arena for example e.g. 21-A :-)
131
+ ## BUT allow Park21-Arena for example e.g. 21-A :-)
102
132
  )
133
+ [°]? ## followed by optional ord
103
134
  ## negative lookahead for numbers
104
135
  ## note - include digits itself!!!
105
136
  ## note - remove / (slash) e.g. allows UDI'19/Beter Bed
@@ -140,11 +171,29 @@ TEXT_RE = %r{
140
171
  \)
141
172
  )?
142
173
  (?:
143
- [ ]+ ## allow more than once space - why? why not?
144
- \( (?:
145
- [A-Z]{1,5}
174
+ ######
175
+ # check for country code (cc)
176
+ # e.g. (AUT) or ,AUT or AUT
177
+ (?:
178
+ [ ] ## note - do NOT allow more than one space!!! - why? why not?
179
+ \(
180
+ ## note - auto-exclude reserved (aet) from SCORE_FULLER_MORE!!!
181
+ ## plus golden goal (gg)/sudden death (sd), silver goal (sg)
182
+ ## (ht), (ft)
183
+ (?! (?: aet | agget | asdet | asget | ht | ft )
184
+ \)
185
+ )
186
+ (?:
187
+ [A-Z]{1,5}
146
188
  )
147
189
  \)
190
+ )
191
+ |
192
+ (?:
193
+ [ ]*[,›>][ ]*
194
+ [A-Z]{1,5}
195
+ \b
196
+ )
148
197
  )?
149
198
  ## add lookahead/lookbehind
150
199
  ## must be space!!!