sportdb-parser 0.6.11 → 0.6.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0902e4f811d2584b7c7360f00557d979579d5df8b48e3370ace0d3e7d211c45d'
4
- data.tar.gz: e03327cfb9f33e39c3cc44063f9915ccc48d1c78b8076ee9fd7ba511d7b7bdc5
3
+ metadata.gz: 4a2e9867bfc8e1dec97214c0d3c335fe0d3cdb0cf5835184081af1447d509e4a
4
+ data.tar.gz: 1b038a316ad0de74bc559972e59832bfe3af5530b6c0547a9b2eaca0ecbfefbf
5
5
  SHA512:
6
- metadata.gz: 14e1f06f0fd5137208cce8a26f9a534317f0538a59f1701ae0140914afae9bed2963979df71d1032771798983dcf5ee5197c0105c6fb040c0e72052b86d282b5
7
- data.tar.gz: e8b973f129cf11f951ba5629b81c53896bec02b949b4eb8d7c175b5fae54e33e31788cdfc18985f0d2759dfc473758512523adddf122e4c5293ce2c45d28543f
6
+ metadata.gz: 11b6cb434a959e39b8dd8a935b9bb0d973e9e47e647c1f19dbc1c536a013b982dc948ca62c2d7a9a661a7aeb276444cfbfb81c4975df30ece993076124fce7bd
7
+ data.tar.gz: b0487df7f949c28c0d4df23d040aeff4eb3247c01b088fe45b65aa6a29a3891781d24bd56d85e99cd1e0d31d82efe953c13f6c689301a9623a01ffa2bdbb45f0
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.6.11
1
+ ### 0.6.13
2
2
  ### 0.0.1 / 2024-07-12
3
3
 
4
4
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -7,6 +7,7 @@ config/rounds_en.txt
7
7
  config/rounds_es.txt
8
8
  config/rounds_misc.txt
9
9
  config/rounds_pt.txt
10
+ config/zones_en.txt
10
11
  lib/sportdb/parser.rb
11
12
  lib/sportdb/parser/lang.rb
12
13
  lib/sportdb/parser/lexer.rb
data/config/rounds_en.txt CHANGED
@@ -17,9 +17,6 @@ Major Semi-Final
17
17
  Minor Semi-Final
18
18
 
19
19
 
20
- 1/8 Finals
21
- Qualifying Round
22
-
23
20
 
24
21
 
25
22
  ## keep weirdo matchday ??
@@ -0,0 +1,20 @@
1
+ #####
2
+ # zone names in english
3
+
4
+
5
+ Western Region
6
+ West Region
7
+ Eastern Region
8
+ East Region
9
+
10
+ Western Conference
11
+ Eastern Conference
12
+
13
+ Northern Zone
14
+ Western Zone A
15
+ Western Zone B
16
+ Central Zone
17
+ Central & Eastern Zone
18
+ Southern Zone
19
+
20
+
@@ -31,17 +31,20 @@ end
31
31
 
32
32
 
33
33
 
34
- ROUND_RE = %r{^(
35
- ## add special case for group play-off rounds!
34
+ ROUND_RE = %r{^
35
+ (?:
36
+
37
+ ## add special case for group play-off rounds!
36
38
  ## group 2 play-off (e.g. worldcup 1954, 1958)
37
39
  ##
38
40
  ### note - allow Group ("stand-alone") as "generic" round for now
39
41
  ## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
40
- (?: Group [ ] [A-Z0-9]+ [ ] Play-?offs? |
41
- Group (?: [ ] phase)? |
42
+ (?: Group [ ] [a-z0-9]+ [ ] Play-?offs? |
43
+ Group (?: [ ] (?: phase|stage))? |
42
44
  League (?: [ ] phase)?
43
45
  )
44
46
  |
47
+
45
48
  # round - note - requiers number e.g. round 1,2, etc.
46
49
  # note - use 1-9 regex (cannot start with 0) - why? why not?
47
50
  # make week 01 or round 01 or matchday 01 possible?
@@ -50,16 +53,22 @@ ROUND_RE = %r{^(
50
53
  Week
51
54
  )
52
55
  [ ] [1-9][0-9]*
56
+ (?: ## note - add optional Matchday 1 of 2 or such
57
+ [ ] of [ ] [1-9][0-9]*
58
+ )?
53
59
  )
54
60
  |
61
+ (?: Round [ ] One
62
+ )
63
+ |
55
64
  ## starting with qual(ification)
56
65
  ## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
57
66
  ## or
58
67
  ## Playoff Round 1
59
68
  ## Play-in Round 1
60
69
  (?: (?: Qual \. |
61
- Play-?off |
62
- Play-?in
70
+ Play[ -]?off |
71
+ Play[ -]?in
63
72
  )
64
73
  [ ] Round [ ] [1-9][0-9]* )
65
74
  |
@@ -67,44 +76,71 @@ ROUND_RE = %r{^(
67
76
  ## First Round
68
77
  ## Play-off Round
69
78
  ## Final Round (e.g. Worldcup 1950)
70
- (?:
71
- (?: [1-9][0-9]* \. |
72
- 1st | First |
73
- 2nd | Second |
74
- Play-?off |
75
- Final
76
- )
77
- [ ] Round
79
+ (?: (?:
80
+ Play[ -]?off |
81
+ Final |
82
+ Wildcard |
83
+ Qualifying |
84
+ (?:
85
+ (?:
86
+ [1-9][0-9]* \. |
87
+ 1st | First |
88
+ 2nd | Second |
89
+ 3rd | Third |
90
+ 4th | Fourth |
91
+ 5th | Fifth
92
+ )
93
+ (?: ## with optionals
94
+ [ ] Qualifying
95
+ )?
96
+ )
97
+ )
98
+ [ ] Round
78
99
  )
79
100
  |
80
101
  ## starting with preliminary
81
102
  # e.g. Preliminary round
82
103
  (?: Preliminary [ ]
83
104
  (?: Round |
84
- Semi-?finals |
85
- Final
105
+ Semi[ -]?finals |
106
+ Final |
107
+ Qualifier
86
108
  )
87
109
  )
88
110
  |
89
111
  # more (kockout) rounds
90
- # playoffs - playoff, play-off, play-offs
91
- (?: Play-?offs?
92
- (?: [ ]for[ ]quarter-?finals )?
112
+ # playoffs - playoff, play-off, play-offs &
113
+ # playins
114
+ (?:
115
+ Play[ -]?offs? (?: [ ]for[ ]quarter-?finals )?
116
+ |
117
+ Play[ -]?ins?
93
118
  )
94
119
  |
95
120
  # round32
96
121
  (?: Round[ ]of[ ]32 |
97
- Last[ ]32 )
122
+ Last[ ]32 |
123
+ 16th[ ]finals |
124
+ 1/16[ ]finals )
98
125
  |
99
126
  # round16
100
127
  (?: Round[ ]of[ ]16 |
101
128
  Last[ ]16 |
102
- 8th[ ]finals )
129
+ 8th[ ]finals |
130
+ 1/8[ ]finals )
103
131
  |
132
+ # round8 aka quarterfinals
133
+ # note - allow quarter-finals/quarter finals/quarterfinals
134
+ (?: Round[ ]of[ ]8 |
135
+ Last[ ]8 |
136
+ 1/4[ ]finals |
137
+ Quarter[ -]?finals? |
138
+ Quarters )
139
+ |
104
140
  # fifthplace
105
141
  (?:
106
142
  (?: (Fifth|5th)[ -]place
107
- (?: [ ] (?: match|play-?off|final ))?
143
+ (?: [ ] (?: match|final|play[ -]?off ))?
108
144
  ) |
109
145
  (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
110
146
  )
@@ -112,38 +148,32 @@ ROUND_RE = %r{^(
112
148
  # thirdplace
113
149
  (?:
114
150
  (?: (Third|3rd)[ -]place
115
- (?: [ ] (?: match|play-?off|final ))?
151
+ (?: [ ] (?: match|final|play[ -]?off ))?
116
152
  ) |
117
153
  (?: Match[ ]for[ ](?: third|3rd )[ -]place )
118
154
  )
119
155
  |
120
- # quarterfinals
121
- (?:
122
- Quarter-?finals? |
123
- Quarters |
124
- Last[ ]8
125
- )
126
- |
127
- # semifinals
156
+ # round4 aka semifinals
128
157
  (?:
129
- Semi-?finals? |
130
- Semis |
131
- Last[ ]4
132
- )
158
+ Round[ ]of[ ]4 |
159
+ Last[ ]4 |
160
+ Semi[ -]?finals? |
161
+ Semis )
133
162
  |
134
- # final
163
+ # round2 aka final
135
164
  Finals?
136
- |
137
- # decider e.g. Entscheidungsspiel
138
- Decider
139
- |
165
+ |
140
166
  ## add replays
141
167
  ## e.g. Final Replay
142
168
  ## Quarter-finals replays
143
169
  ## First round replays
144
170
  (?:
145
- (?: First [ ] Round |
146
- Quarter-?finals? |
171
+ (?: (?: 1st | First |
172
+ 2nd | Second |
173
+ 3rd | Third |
174
+ 4th | Fourth |
175
+ 5th | Fifth ) [ ] Round |
176
+ Quarter[ -]?finals? |
147
177
  Finals?
148
178
  )
149
179
  [ ] Replays?
@@ -151,7 +181,8 @@ ROUND_RE = %r{^(
151
181
  |
152
182
  ## more
153
183
  (?:
154
- Reclassification
184
+ Decider | # decider e.g. Entscheidungsspiel
185
+ Reclassification
155
186
  )
156
187
  )$}ix
157
188
 
@@ -196,6 +227,19 @@ def self.more_round_names
196
227
  end
197
228
  end
198
229
 
230
+ def self.zone_names
231
+ @zone_name ||= begin
232
+ names = []
233
+ langs = ['en']
234
+ ## sort names by length??
235
+ langs.each do |lang|
236
+ path = "#{SportDb::Module::Parser.root}/config/zones_#{lang}.txt"
237
+ names += read_names( path )
238
+ end
239
+ names
240
+ end
241
+ end
242
+
199
243
 
200
244
  def self.is_round?( text )
201
245
  ### note - use check for case-insensitive
@@ -208,20 +252,37 @@ def self.is_round?( text )
208
252
  ## maybe in the future use our own unaccent and downcase - wyh? why not?
209
253
  ## note - for now ROUND_RE is also case-insensitive!!
210
254
 
211
- ROUND_RE.match?( text ) || more_round_names.any?{ |str| str.casecmp( text )==0 }
255
+ ROUND_RE.match?( text ) ||
256
+ more_round_names.any?{ |str| str.casecmp( text )==0 }
212
257
  end
213
258
 
259
+ def self.is_zone?( text )
260
+ zone_names.any?{ |str| str.casecmp( text )==0 }
261
+ end
262
+
263
+
214
264
  ##
215
265
  ## keep leg separate (from round) - why? why not?
216
266
  ##
217
267
  LEG_RE = %r{^
218
268
  # leg1
219
- (?: 1st|First)[ ]leg
269
+ (?: 1st|First) [ ] leg
220
270
  |
221
271
  # leg2
222
- (?: 2nd|Second)[ ]leg
272
+ (?: 2nd|Second) [ ] leg
273
+ |
274
+ # leg 1 of 2 / leg 2 of 2
275
+ # note - leg limited to ALWAY 1/2 of 2 for now - why? why not?
276
+ # for more use match 1/2/3 etc.
277
+ ## allow leg of three (e.g. leg 1 of 3) - why? why not?
278
+ (?: leg [ ] [12]
279
+ (?: [ ] of [ ] 2)? )
280
+ |
281
+ (?: match [ ] [1-9][0-9]* )
223
282
  $}ix
224
283
 
284
+
285
+
225
286
  ### Pair matches/games if marked with leg1 n leg2
226
287
  def self.is_leg?( text )
227
288
  LEG_RE.match?( text )
@@ -26,7 +26,7 @@ end
26
26
  def is_group?( text ) Lang.is_group?( text ); end
27
27
  def is_round?( text ) Lang.is_round?( text ); end
28
28
  def is_leg?( text ) Lang.is_leg?( text ); end
29
-
29
+ def is_zone?( text ) Lang.is_zone?( text ); end
30
30
 
31
31
  ## transforms
32
32
  ##
@@ -187,7 +187,7 @@ def tokenize_with_errors
187
187
  text = t[1]
188
188
  t = if is_group?( text )
189
189
  [:GROUP, text]
190
- elsif is_round?( text ) || is_leg?( text )
190
+ elsif is_round?( text ) || is_leg?( text ) || is_zone?( text )
191
191
  [:ROUND, text]
192
192
  else
193
193
  t ## pass through as-is (1:1)
@@ -90,53 +90,21 @@ NOTE_RE = %r{
90
90
  # add "top-level" NB: version
91
91
  ## with full (end-of) line note - why? why not?
92
92
  |
93
- (?: originally[ ])? scheduled
94
- ## e.g. [originally scheduled to play in Mexico City]
95
- |
96
93
  rescheduled
97
- ## e.g. [Rescheduled due to earthquake occurred in Mexico on September 19]
94
+ ## e.g. [rescheduled due to earthquake occurred in Mexico on September 19]
95
+ |
96
+ declared
97
+ ## e.g. [declared void]
98
98
  |
99
99
  remaining
100
100
  ## e.g. [remaining 79']
101
101
  ## [remaining 84']
102
102
  ## [remaining 59']
103
103
  ## [remaining 5']
104
- |
105
- played
106
- ## e.g. [played in Macaé-RJ]
107
- ## [played in Caxias do Sul-RS]
108
- ## [played in Sete Lagoas-MG]
109
- ## [played in Uberlândia-MG]
110
- ## [played in Brasília-DF]
111
- ## [played in Vöcklabruck]
112
- ## [played in Pasching]
113
- |
114
- declared
115
- ## e.g. [declared void]
116
- |
117
- inter-group
118
- ## e.g. [inter-group A-B]
119
- ## [inter-group C-D]
120
104
  )
121
105
  [ ]
122
106
  [^\]]+? ## slurp all to next ] - (use non-greedy)
123
107
  )
124
- |
125
- (?:
126
- ## starting with in - do NOT allow digits
127
- ## name starting with in possible - why? why not?
128
- in[ ]
129
- [^0-9\]]+?
130
- ## e.g. [In Estadio La Corregidora]
131
- ## [in Unidad Deportiva Centenario]
132
- ## [in Estadio Olímpico Universitario]
133
- ## [in Estadio Victoria]
134
- ## [in UD José Brindis]
135
- ## [in Colomos Alfredo "Pistache" Torres stadium]
136
- ##
137
- ## TODO/FIX
138
- ## remove in ?? - is same as @ Estadio Victoria and such - why? why not=
139
- )
140
108
  )
141
109
  \]
142
110
  }ix
@@ -148,7 +116,7 @@ SCORE_NOTE_RE = %r{
148
116
  (?<score_note>
149
117
  (?: # plain aet e.g. [aet]
150
118
  aet | a\.e\.t\. |
151
- after [ ] extra [ ] time
119
+ after [ ] extra [ -] time
152
120
  )
153
121
  |
154
122
  (?: # plain penalties e.g. [3-2 pen]
@@ -176,7 +144,7 @@ SCORE_NOTE_RE = %r{
176
144
 
177
145
  (?:
178
146
  (?: # opt 1 - no team listed/named - requires score
179
- wins? [ ] ## note - allow win or wins
147
+ (?: won|wins? ) [ ] ## note - allow won,win or wins
180
148
  (?: ## score
181
149
  \d{1,2}-\d{1,2}
182
150
  [ ]
@@ -190,7 +158,7 @@ SCORE_NOTE_RE = %r{
190
158
  [1-9\p{L}][0-9\p{L} .-]+?
191
159
  [ ]
192
160
  )
193
- wins [ ]
161
+ (?: won|wins? ) [ ] ## won/win/wins
194
162
  (?: ## score optional
195
163
  \d{1,2}-\d{1,2}
196
164
  [ ]
@@ -214,7 +182,7 @@ SCORE_NOTE_RE = %r{
214
182
  [1-9\p{L}][0-9\p{L} .-]+?
215
183
  [ ]
216
184
  )
217
- wins? [ ]
185
+ (?: won|wins? ) [ ] # won/win/wins
218
186
  on [ ] away [ ] goals
219
187
  )
220
188
  ) # score_note ref
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 6
7
- PATCH = 11
7
+ PATCH = 13
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.11
4
+ version: 0.6.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-03-03 00:00:00.000000000 Z
11
+ date: 2025-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -85,6 +85,7 @@ extra_rdoc_files:
85
85
  - config/rounds_es.txt
86
86
  - config/rounds_misc.txt
87
87
  - config/rounds_pt.txt
88
+ - config/zones_en.txt
88
89
  files:
89
90
  - CHANGELOG.md
90
91
  - Manifest.txt
@@ -95,6 +96,7 @@ files:
95
96
  - config/rounds_es.txt
96
97
  - config/rounds_misc.txt
97
98
  - config/rounds_pt.txt
99
+ - config/zones_en.txt
98
100
  - lib/sportdb/parser.rb
99
101
  - lib/sportdb/parser/lang.rb
100
102
  - lib/sportdb/parser/lexer.rb