sportdb-parser 0.6.20 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -1
  3. data/Manifest.txt +14 -8
  4. data/Rakefile +1 -1
  5. data/lib/sportdb/parser/blocktxt.rb +99 -0
  6. data/lib/sportdb/parser/lexer.rb +958 -395
  7. data/lib/sportdb/parser/lexer_buffer.rb +97 -0
  8. data/lib/sportdb/parser/lexer_tty.rb +111 -0
  9. data/lib/sportdb/parser/parser.rb +1768 -855
  10. data/lib/sportdb/parser/racc_parser.rb +1 -1
  11. data/lib/sportdb/parser/racc_tree.rb +327 -41
  12. data/lib/sportdb/parser/token-date.rb +160 -178
  13. data/lib/sportdb/parser/token-date_duration.rb +190 -0
  14. data/lib/sportdb/parser/token-geo.rb +59 -59
  15. data/lib/sportdb/parser/token-goals.rb +460 -0
  16. data/lib/sportdb/parser/token-group.rb +43 -0
  17. data/lib/sportdb/parser/token-note.rb +40 -0
  18. data/lib/sportdb/parser/token-prop.rb +70 -54
  19. data/lib/sportdb/parser/token-prop_name.rb +74 -0
  20. data/lib/sportdb/parser/token-round.rb +102 -0
  21. data/lib/sportdb/parser/token-score.rb +323 -47
  22. data/lib/sportdb/parser/token-score_fuller.rb +435 -0
  23. data/lib/sportdb/parser/token-score_legs.rb +59 -0
  24. data/lib/sportdb/parser/token-status.rb +157 -160
  25. data/lib/sportdb/parser/token-table.rb +149 -0
  26. data/lib/sportdb/parser/token-text.rb +72 -23
  27. data/lib/sportdb/parser/token-time.rb +141 -0
  28. data/lib/sportdb/parser/token.rb +242 -105
  29. data/lib/sportdb/parser/token_helpers.rb +92 -0
  30. data/lib/sportdb/parser/version.rb +2 -2
  31. data/lib/sportdb/parser.rb +24 -2
  32. metadata +18 -18
  33. data/config/rounds_de.txt +0 -125
  34. data/config/rounds_en.txt +0 -29
  35. data/config/rounds_es.txt +0 -26
  36. data/config/rounds_misc.txt +0 -25
  37. data/config/rounds_pt.txt +0 -4
  38. data/config/zones_en.txt +0 -20
  39. data/lib/sportdb/parser/lang.rb +0 -298
  40. data/lib/sportdb/parser/token-minute.rb +0 -205
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.20
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-03-11 00:00:00.000000000 Z
11
+ date: 2026-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.4.0
19
+ version: 0.4.1
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 0.4.0
26
+ version: 0.4.1
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: racc
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -80,37 +80,37 @@ extra_rdoc_files:
80
80
  - CHANGELOG.md
81
81
  - Manifest.txt
82
82
  - README.md
83
- - config/rounds_de.txt
84
- - config/rounds_en.txt
85
- - config/rounds_es.txt
86
- - config/rounds_misc.txt
87
- - config/rounds_pt.txt
88
- - config/zones_en.txt
89
83
  files:
90
84
  - CHANGELOG.md
91
85
  - Manifest.txt
92
86
  - README.md
93
87
  - Rakefile
94
- - config/rounds_de.txt
95
- - config/rounds_en.txt
96
- - config/rounds_es.txt
97
- - config/rounds_misc.txt
98
- - config/rounds_pt.txt
99
- - config/zones_en.txt
100
88
  - lib/sportdb/parser.rb
101
- - lib/sportdb/parser/lang.rb
89
+ - lib/sportdb/parser/blocktxt.rb
102
90
  - lib/sportdb/parser/lexer.rb
91
+ - lib/sportdb/parser/lexer_buffer.rb
92
+ - lib/sportdb/parser/lexer_tty.rb
103
93
  - lib/sportdb/parser/parser.rb
104
94
  - lib/sportdb/parser/racc_parser.rb
105
95
  - lib/sportdb/parser/racc_tree.rb
106
96
  - lib/sportdb/parser/token-date.rb
97
+ - lib/sportdb/parser/token-date_duration.rb
107
98
  - lib/sportdb/parser/token-geo.rb
108
- - lib/sportdb/parser/token-minute.rb
99
+ - lib/sportdb/parser/token-goals.rb
100
+ - lib/sportdb/parser/token-group.rb
101
+ - lib/sportdb/parser/token-note.rb
109
102
  - lib/sportdb/parser/token-prop.rb
103
+ - lib/sportdb/parser/token-prop_name.rb
104
+ - lib/sportdb/parser/token-round.rb
110
105
  - lib/sportdb/parser/token-score.rb
106
+ - lib/sportdb/parser/token-score_fuller.rb
107
+ - lib/sportdb/parser/token-score_legs.rb
111
108
  - lib/sportdb/parser/token-status.rb
109
+ - lib/sportdb/parser/token-table.rb
112
110
  - lib/sportdb/parser/token-text.rb
111
+ - lib/sportdb/parser/token-time.rb
113
112
  - lib/sportdb/parser/token.rb
113
+ - lib/sportdb/parser/token_helpers.rb
114
114
  - lib/sportdb/parser/version.rb
115
115
  homepage: https://github.com/sportdb/sport.db
116
116
  licenses:
data/config/rounds_de.txt DELETED
@@ -1,125 +0,0 @@
1
- ###############
2
- # rounds in deutsch (de) / german
3
-
4
- Vorrunde
5
- 1. Vorrunde
6
- 2. Vorrunde
7
- Gruppenphase
8
- Ligaphase
9
- Spiele # in 2017/uy.1.txt -- double check if missing something
10
- # in 1960-61/it.1.txt
11
-
12
-
13
- 10. Runde
14
-
15
-
16
- Zwischenrunde
17
-
18
- Sechzehntelfinale
19
- Platzierungsspiel
20
-
21
- Qualifikation
22
- Qualifikation Copa Lib.
23
- Qual. 3. Runde
24
-
25
-
26
- 2. Aufstieg Halbfinale
27
- 2. Aufstieg Finale
28
-
29
-
30
- Halbfinale Gruppe A
31
- Halbfinale Gruppe B
32
-
33
- Entscheidungsspiele Abstieg
34
-
35
- Trostrunde Finale
36
- Trostrunde Halbfinale
37
-
38
-
39
- Playoff-Runde
40
- Relegation
41
- Aufstieg
42
- Endrunde
43
- Aufstiegsrunde
44
- Aufstiegsrunde Zone A
45
- Entscheidung Zone B
46
- 1. Aufstieg
47
- 1. Aufstieg Zone A
48
- 1. Aufstieg Zone B
49
- 2. Aufstieg Zone A
50
- 2. Aufstieg Zone B
51
- 2. Aufstieg 1. Phase
52
- 2. Aufstieg 2. Phase
53
- 2. Aufstieg 3. Phase
54
- Direkter Aufstieg
55
- Direkter Abstieg
56
- 3. Platz
57
- 5. Platz
58
- 7. Platz
59
- 9. Platz
60
- 11. Platz
61
- 13. Platz
62
-
63
- 5.-8. Platz Playoffs
64
- 9.-12. Platz Playoffs
65
- 13.-16. Platz Playoffs
66
-
67
-
68
- Entscheidung 1. Runde
69
- Entscheidung 2. Runde
70
-
71
-
72
- 1. Runde Gruppe 1
73
- 1. Runde Gruppe 2
74
-
75
-
76
- Zwischenrunde Gr. A ## move to group_de - why? why not?
77
- Zwischenrunde Gr. B
78
- Zwischenrunde Gr. C
79
- Zwischenrunde Gr. D
80
-
81
-
82
- Vorrunde Gr. A
83
- Vorrunde Gr. B
84
- Vorrunde Gr. C
85
- Vorrunde Gr. D
86
- Vorrunde Gr. E
87
- Vorrunde Gr. F
88
- Vorrunde Gr. G
89
- Vorrunde Gr. H
90
-
91
-
92
-
93
- ### todo/fix
94
- ### move to group - why? why not?
95
- Gruppe 1
96
- Gruppe 2
97
- Gruppe 3
98
- Gruppe 4
99
- Gruppe 5
100
- Gruppe 6
101
- Gruppe 7
102
- Gruppe 8
103
- Gruppe 9
104
- Gruppe 10
105
- Gruppe 11
106
- Gruppe 12
107
- Gruppe 13
108
- Gruppe 14
109
- Gruppe 15
110
- Gruppe 16
111
-
112
-
113
- Gruppe A
114
- Gruppe B
115
- Gruppe C
116
- Gruppe D
117
- Gruppe E
118
- Gruppe F
119
- Gruppe G
120
- Gruppe H
121
- Gruppe I
122
- Gruppe J
123
- Gruppe K
124
- Gruppe L
125
-
data/config/rounds_en.txt DELETED
@@ -1,29 +0,0 @@
1
- ##########
2
- # note - more english rounds here
3
- # remove here if added to regex!!!
4
-
5
- Play-in round
6
- First semifinal
7
- Second semifinal
8
-
9
- Conference Semifinals
10
- Conference Finals
11
- Wildcard
12
-
13
- Elimination Final
14
- Quadrangular
15
-
16
- Major Semi-Final
17
- Minor Semi-Final
18
-
19
-
20
-
21
-
22
- ## keep weirdo matchday ??
23
- Matchday 0 ## in 2003-04/az.1.txt
24
-
25
-
26
- ## from australia
27
- Elimination finals
28
- Grand Final
29
-
data/config/rounds_es.txt DELETED
@@ -1,26 +0,0 @@
1
- #########
2
- # rounds in español (es) / spanish
3
-
4
- Recalificación
5
- Reclasificación
6
-
7
-
8
- Preclasificación Nacional B
9
-
10
- Final Segunda Ronda
11
- Gran Final
12
-
13
- Interzone
14
- Zona A
15
- Zona B
16
-
17
- Final de Grupos
18
- Repechaje
19
-
20
- Final Absoluta
21
-
22
-
23
- Copa Libertadores
24
- Copa Sudamericana
25
-
26
-
@@ -1,25 +0,0 @@
1
- #######################
2
- # more rounds misc(ellaneous)
3
-
4
- District West I
5
- District West II
6
- District Noord
7
- District Oost
8
- District Zuid I
9
- District Zuid II
10
-
11
- Tussenronde
12
- Replay achtste finale
13
- Replay kwartfinale
14
-
15
- Replay 1e ronde
16
- Replay 2e ronde
17
- Replay halve finale
18
- Replay finale
19
-
20
- Beslissingswedstrijd
21
- Groep 15
22
- Groep 17
23
- Groep 18
24
- Groep 19
25
- Groep 20
data/config/rounds_pt.txt DELETED
@@ -1,4 +0,0 @@
1
- ############################
2
- # rounds in português (pt) / portuguese
3
-
4
- Troféu do Interior
data/config/zones_en.txt DELETED
@@ -1,20 +0,0 @@
1
- #####
2
- # zone names in english
3
-
4
-
5
- Western Region
6
- West Region
7
- Eastern Region
8
- East Region
9
-
10
- Western Conference
11
- Eastern Conference
12
-
13
- Northern Zone
14
- Western Zone A
15
- Western Zone B
16
- Central Zone
17
- Central & Eastern Zone
18
- Southern Zone
19
-
20
-
@@ -1,298 +0,0 @@
1
-
2
- ## use Sports (not SportDb) for module - why? why not?
3
-
4
-
5
-
6
- module SportDb
7
-
8
- ## use module or class for Lang namespace??
9
- ## start with module for now
10
-
11
-
12
- module Lang
13
-
14
- ## Group A-Z
15
- ## Group 1-99
16
- ## Group HEX # used in concaf world cup quali
17
- ## Group 1A or A1, B1 - used anywhere
18
- ## yes - A1, A2, B1, C1, etc. used in UEFA Nations League for example!!
19
- ##
20
- ## exlcude
21
- ## use "key" of group - why? why not?
22
- ##
23
- ## note - will include group stage too
24
- ## make sure is_round gets called before is_group for now!!!
25
-
26
- GROUP_RE = %r{^
27
- Group [ ]
28
- (?<key> [a-z0-9]+ )
29
- $}ix
30
-
31
- def self.is_group?( text )
32
- ## use regex for match
33
- GROUP_RE.match?( text )
34
- end
35
-
36
-
37
-
38
-
39
- ROUND_RE = %r{^
40
- (?:
41
-
42
- ## add special case for group play-off rounds!
43
- ## group 2 play-off (e.g. worldcup 1954, 1958)
44
- ##
45
- ### note - allow Group ("stand-alone") as "generic" round for now
46
- ## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
47
- (?: Group [ ] [a-z0-9]+ [ ] Play-?offs? |
48
- Group (?: [ ] (?: phase|stage))? |
49
- League (?: [ ] phase)?
50
- )
51
- |
52
-
53
- # round - note - requiers number e.g. round 1,2, etc.
54
- # note - use 1-9 regex (cannot start with 0) - why? why not?
55
- # make week 01 or round 01 or matchday 01 possible?
56
- (?: (?: Round |
57
- Matchday |
58
- Week
59
- )
60
- [ ] [1-9][0-9]*
61
- (?: ## note - add optional Matchday 1 of 2 or such
62
- [ ] of [ ] [1-9][0-9]*
63
- )?
64
- )
65
- |
66
- (?: Round [ ] One
67
- )
68
- |
69
- ## starting with qual(ification)
70
- ## Qual. Round 1 / Qual. Round 2 / Qual. Round 3
71
- ## or
72
- ## Playoff Round 1
73
- ## Play-in Round 1
74
- (?: (?: Qual \. |
75
- Play[ -]?off |
76
- Play[ -]?in
77
- )
78
- [ ] Round [ ] [1-9][0-9]* )
79
- |
80
- ## 1. Round / 2. Round / 3. Round / etc.
81
- ## First Round
82
- ## Play-off Round
83
- ## Final Round (e.g. Worldcup 1950)
84
- (?: (?:
85
- Play[ -]?off |
86
- Final |
87
- Wildcard |
88
- Qualifying |
89
- (?:
90
- (?:
91
- [1-9][0-9]* \. |
92
- 1st | First |
93
- 2nd | Second |
94
- 3rd | Third |
95
- 4th | Fourth |
96
- 5th | Fifth
97
- )
98
- (?: ## with optionals
99
- [ ] Qualifying
100
- )?
101
- )
102
- )
103
- [ ] Round
104
- )
105
- |
106
- ## starting with preliminary
107
- # e.g. Preliminary round
108
- (?: Preliminary [ ]
109
- (?: Round |
110
- Semi[ -]?finals |
111
- Final |
112
- Qualifier
113
- )
114
- )
115
- |
116
- # more (kockout) rounds
117
- # playoffs - playoff, play-off, play-offs &
118
- # playins
119
- (?:
120
- Play[ -]?offs? (?: [ ]for[ ]quarter-?finals )?
121
- |
122
- Play[ -]?ins?
123
- )
124
- |
125
- # round32
126
- (?: Round[ ]of[ ]32 |
127
- Last[ ]32 |
128
- 16th[ ]finals |
129
- 1/16[ ]finals )
130
- |
131
- # round16
132
- (?: Round[ ]of[ ]16 |
133
- Last[ ]16 |
134
- 8th[ ]finals |
135
- 1/8[ ]finals )
136
- |
137
- # round8 aka quarterfinals
138
- # note - allow quarter-finals/quarter finals/quarterfinals
139
- (?: Round[ ]of[ ]8 |
140
- Last[ ]8 |
141
- 1/4[ ]finals |
142
- Quarter[ -]?finals? |
143
- Quarters )
144
- |
145
- # fifthplace
146
- (?:
147
- (?: (Fifth|5th)[ -]place
148
- (?: [ ] (?: match|final|play[ -]?off ))?
149
- ) |
150
- (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
151
- )
152
- |
153
- # thirdplace
154
- (?:
155
- (?: (Third|3rd)[ -]place
156
- (?: [ ] (?: match|final|play[ -]?off ))?
157
- ) |
158
- (?: Match[ ]for[ ](?: third|3rd )[ -]place )
159
- )
160
- |
161
- # round4 aka semifinals
162
- (?:
163
- Round[ ]of[ ]4 |
164
- Last[ ]4 |
165
- Semi[ -]?finals? |
166
- Semis )
167
- |
168
- # round2 aka final
169
- Finals?
170
- |
171
- ## add replays
172
- ## e.g. Final Replay
173
- ## Quarter-finals replays
174
- ## First round replays
175
- (?:
176
- (?: (?: 1st | First |
177
- 2nd | Second |
178
- 3rd | Third |
179
- 4th | Fourth |
180
- 5th | Fifth ) [ ] Round |
181
- Quarter[ -]?finals? |
182
- Finals?
183
- )
184
- [ ] Replays?
185
- )
186
- |
187
- ## more
188
- (?:
189
- Decider | # decider e.g. Entscheidungsspiel
190
- Reclassification
191
- )
192
- )$}ix
193
-
194
-
195
- ####
196
- # add more round names in different languages
197
- # via txt files
198
- #
199
- # for now must match case - maybe make caseinsensitive later - why? why not?
200
- def self.read_names( path )
201
- txt = read_text( path )
202
- names = [] # array of lines (with words)
203
- txt.each_line do |line|
204
- line = line.strip
205
-
206
- next if line.empty?
207
- next if line.start_with?( '#' ) ## skip comments too
208
-
209
- ## strip inline (until end-of-line) comments too
210
- ## e.g. Janvier Janv Jan ## check janv in use??
211
- ## => Janvier Janv Jan
212
-
213
- line = line.sub( /#.*/, '' ).strip
214
- ## pp line
215
-
216
- names << line
217
- end
218
- names
219
- end
220
-
221
-
222
- def self.more_round_names
223
- @more_round_name ||= begin
224
- names = []
225
- langs = ['en', 'de', 'es', 'pt', 'misc']
226
- ## sort names by length??
227
- langs.each do |lang|
228
- path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
229
- names += read_names( path )
230
- end
231
- names
232
- end
233
- end
234
-
235
- def self.zone_names
236
- @zone_name ||= begin
237
- names = []
238
- langs = ['en']
239
- ## sort names by length??
240
- langs.each do |lang|
241
- path = "#{SportDb::Module::Parser.root}/config/zones_#{lang}.txt"
242
- names += read_names( path )
243
- end
244
- names
245
- end
246
- end
247
-
248
-
249
- def self.is_round?( text )
250
- ### note - use check for case-insensitive
251
- ## was:
252
- ## more_round_names.include?( text )
253
- ## change to:
254
- ## more_round_names.any?{ |str| str.casecmp( text )==0 }
255
- ##
256
- ## todo/fix:
257
- ## maybe in the future use our own unaccent and downcase - wyh? why not?
258
- ## note - for now ROUND_RE is also case-insensitive!!
259
-
260
- ROUND_RE.match?( text ) ||
261
- more_round_names.any?{ |str| str.casecmp( text )==0 }
262
- end
263
-
264
- def self.is_zone?( text )
265
- zone_names.any?{ |str| str.casecmp( text )==0 }
266
- end
267
-
268
-
269
- ##
270
- ## keep leg separate (from round) - why? why not?
271
- ##
272
- LEG_RE = %r{^
273
- # leg1
274
- (?: 1st|First) [ ] leg
275
- |
276
- # leg2
277
- (?: 2nd|Second) [ ] leg
278
- |
279
- # leg 1 of 2 / leg 2 of 2
280
- # note - leg limited to ALWAY 1/2 of 2 for now - why? why not?
281
- # for more use match 1/2/3 etc.
282
- ## allow leg of three (e.g. leg 1 of 3) - why? why not?
283
- (?: leg [ ] [12]
284
- (?: [ ] of [ ] 2)? )
285
- |
286
- (?: match [ ] [1-9][0-9]* )
287
- $}ix
288
-
289
-
290
-
291
- ### Pair matches/games if marked with leg1 n leg2
292
- def self.is_leg?( text )
293
- LEG_RE.match?( text )
294
- end
295
-
296
-
297
- end # module Lang
298
- end # module SportDb