sportdb-parser 0.3.6 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e55fb30d76a2b9ce61c129408590506d02fdfc0fdd699211d8255cd97bbdd30
4
- data.tar.gz: 0b4af2b72f2375597ae4ff7e99925eb92971833c50e489064ca143706198b2b9
3
+ metadata.gz: eadc79627364072a1d05801fec096aca27e914f214639c4bbe6bbffca2acab0d
4
+ data.tar.gz: f052a6d668246082d9fbcc5b90c71b440048080dc423f9428a4560591e797080
5
5
  SHA512:
6
- metadata.gz: e6f9c3fa8f3ac153208977662f6435858623a4b56d5506dd3238618fc23e34cc983c385325ce0346698ce2397e53b8c2ea3a16563caf1205d17be7448a2ae858
7
- data.tar.gz: fd7a20830cd66a892db370e2f521139412266bc02821d6fed4ce36a7d8ac5c27913dcc4736c7a1834d46ed8c3156c5386e85bc57af9419dcac1f3d52b438f17d
6
+ metadata.gz: 394f74c596ad1a624d626757972911eee7e0b009df66d5cac7ed1508cb3b056ece6f7fbc1d3c310468d5d065ae22ba18a28d7f8992f333767f20279eb5ce2f78
7
+ data.tar.gz: ecaa4a25b3552e69013dea0cc9c87a91b3409cd02e86302f4387e658de6cdfadba2c82c7c31935d4f7c62aaab187a25ce6e85f9dc4239faa346838f5be8dce7b
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.3.6
1
+ ### 0.3.8
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -3,6 +3,11 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  bin/fbtok
6
+ config/rounds_de.txt
7
+ config/rounds_en.txt
8
+ config/rounds_es.txt
9
+ config/rounds_misc.txt
10
+ config/rounds_pt.txt
6
11
  lib/sportdb/parser.rb
7
12
  lib/sportdb/parser/fbtok/main.rb
8
13
  lib/sportdb/parser/lang.rb
@@ -0,0 +1,93 @@
1
+ ###############
2
+ # rounds in deutsch (de) / german
3
+
4
+ Vorrunde
5
+ 1. Vorrunde
6
+ 2. Vorrunde
7
+ Gruppenphase
8
+ Ligaphase
9
+ Spiele # in 2017/uy.1.txt -- double check if missing something
10
+ # in 1960-61/it.1.txt
11
+
12
+
13
+ Zwischenrunde
14
+
15
+ Sechzehntelfinale
16
+ Platzierungsspiel
17
+
18
+ Qualifikation
19
+ Qual. 3. Runde
20
+
21
+ 2. Aufstieg Halbfinale
22
+ 2. Aufstieg Finale
23
+
24
+ Playoff-Runde
25
+ Relegation
26
+ Aufstieg
27
+ Endrunde
28
+ Aufstiegsrunde
29
+ Aufstiegsrunde Zone A
30
+ Entscheidung Zone B
31
+ 1. Aufstieg
32
+ 1. Aufstieg Zone A
33
+ 1. Aufstieg Zone B
34
+ 2. Aufstieg Zone A
35
+ 2. Aufstieg Zone B
36
+ 2. Aufstieg 1. Phase
37
+ 2. Aufstieg 2. Phase
38
+ 2. Aufstieg 3. Phase
39
+ Direkter Aufstieg
40
+ Direkter Abstieg
41
+ 3. Platz
42
+ 5. Platz
43
+ 7. Platz
44
+ 9. Platz
45
+ 11. Platz
46
+
47
+ 5.-8. Platz Playoffs
48
+ 9.-12. Platz Playoffs
49
+ 13.-16. Platz Playoffs
50
+
51
+
52
+ Entscheidung 1. Runde
53
+ Entscheidung 2. Runde
54
+
55
+
56
+ Zwischenrunde Gr. B ## move to group_de - why? why not?
57
+ 1. Runde Gruppe 1
58
+ 1. Runde Gruppe 2
59
+
60
+
61
+ ### todo/fix
62
+ ### move to group - why? why not?
63
+ Gruppe 1
64
+ Gruppe 2
65
+ Gruppe 3
66
+ Gruppe 4
67
+ Gruppe 5
68
+ Gruppe 6
69
+ Gruppe 7
70
+ Gruppe 8
71
+ Gruppe 9
72
+ Gruppe 10
73
+ Gruppe 11
74
+ Gruppe 12
75
+ Gruppe 13
76
+ Gruppe 14
77
+ Gruppe 15
78
+ Gruppe 16
79
+
80
+
81
+ Gruppe A
82
+ Gruppe B
83
+ Gruppe C
84
+ Gruppe D
85
+ Gruppe E
86
+ Gruppe F
87
+ Gruppe G
88
+ Gruppe H
89
+ Gruppe I
90
+ Gruppe J
91
+ Gruppe K
92
+ Gruppe L
93
+
@@ -0,0 +1,15 @@
1
+ ##########
2
+ # note - more english rounds here
3
+ # remove here if added to regex!!!
4
+
5
+ Play-in round
6
+ First semifinal
7
+ Second semifinal
8
+
9
+ Conference Semifinals
10
+ Conference Finals
11
+ Wildcard
12
+
13
+ Elimination Final
14
+ Quadrangular
15
+
@@ -0,0 +1,19 @@
1
+ #########
2
+ # rounds in español (es) / spanish
3
+
4
+ Recalificación
5
+ Reclasificación
6
+
7
+
8
+ Preclasificación Nacional B
9
+
10
+ Final Segunda Ronda
11
+ Gran Final
12
+
13
+ Interzone
14
+ Zone A
15
+ Zona B
16
+
17
+ Final de Grupos
18
+ Repechaje
19
+
@@ -0,0 +1,18 @@
1
+ #######################
2
+ # more rounds misc(ellaneous)
3
+
4
+ District West I
5
+ District West II
6
+ District Noord
7
+ District Oost
8
+ District Zuid I
9
+ District Zuid II
10
+
11
+ Tussenronde
12
+ Replay achtste finale
13
+ Replay kwartfinale
14
+
15
+ Replay 1e ronde
16
+ Replay 2e ronde
17
+ Replay halve finale
18
+ Replay finale
@@ -0,0 +1,4 @@
1
+ ############################
2
+ # rounds in português (pt) / portuguese
3
+
4
+ Troféu do Interior
@@ -150,8 +150,50 @@ ROUND_RE = %r{^(
150
150
  )$}ix
151
151
 
152
152
 
153
+ ####
154
+ # add more round names in different languages
155
+ # via txt files
156
+ #
157
+ # for now must match case - maybe make caseinsensitive later - why? why not?
158
+ def self.read_names( path )
159
+ txt = read_text( path )
160
+ names = [] # array of lines (with words)
161
+ txt.each_line do |line|
162
+ line = line.strip
163
+
164
+ next if line.empty?
165
+ next if line.start_with?( '#' ) ## skip comments too
166
+
167
+ ## strip inline (until end-of-line) comments too
168
+ ## e.g. Janvier Janv Jan ## check janv in use??
169
+ ## => Janvier Janv Jan
170
+
171
+ line = line.sub( /#.*/, '' ).strip
172
+ ## pp line
173
+
174
+ names << line
175
+ end
176
+ names
177
+ end
178
+
179
+
180
+ def self.more_round_names
181
+ @more_round_name ||= begin
182
+ names = []
183
+ langs = ['en', 'de', 'es', 'pt', 'misc']
184
+ ## sort names by length??
185
+ langs.each do |lang|
186
+ path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
187
+ names += read_names( path )
188
+ end
189
+ names
190
+ end
191
+ end
192
+
193
+
153
194
  def is_round?( text )
154
- ROUND_RE.match?( text )
195
+ ROUND_RE.match?( text ) ||
196
+ self.class.more_round_names.include?( text )
155
197
  end
156
198
 
157
199
  ##
@@ -1,13 +1,13 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
  ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
6
6
 
7
7
  #####
8
8
  # english helpers (penalty, extra time, ...)
9
9
  ## note - p must go last (shortest match)
10
- # pso = penalty shootout
10
+ # pso = penalty shootout
11
11
  P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
12
12
  ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
13
13
 
@@ -26,7 +26,20 @@ class Parser
26
26
  (?<et1>\d{1,2}) - (?<et2>\d{1,2})
27
27
  [ ]* #{ET_EN}
28
28
  (?=[ \]]|$)
29
- )}ix
29
+ )}ix
30
+ ## todo/check: remove loakahead assertion here - why require space?
31
+ ## note: \b works only after non-alphanum e.g. )
32
+
33
+
34
+ ## note: allow SPECIAL with penalty only
35
+ ## 3-4 pen.
36
+ SCORE__P__RE = %r{
37
+ (?<score>
38
+ \b
39
+ (?<p1>\d{1,2}) - (?<p2>\d{1,2})
40
+ [ ]* #{P_EN}
41
+ (?=[ \]]|$)
42
+ )}ix
30
43
  ## todo/check: remove loakahead assertion here - why require space?
31
44
  ## note: \b works only after non-alphanum e.g. )
32
45
 
@@ -89,8 +102,8 @@ class Parser
89
102
 
90
103
  ## e.g. 2-1 (1-1) or
91
104
  ## 2-1
92
-
93
- SCORE__FT_HT__RE = %r{
105
+
106
+ SCORE__FT_HT__RE = %r{
94
107
  (?<score>
95
108
  \b
96
109
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
@@ -104,18 +117,18 @@ class Parser
104
117
  ## note: \b works only after non-alphanum e.g. )
105
118
 
106
119
 
107
-
120
+
108
121
  #############################################
109
- # map tables
122
+ # map tables
110
123
  # note: order matters; first come-first matched/served
111
124
 
112
- SCORE_RE = Regexp.union(
125
+ SCORE_RE = Regexp.union(
113
126
  SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
114
127
  SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
115
128
  SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
116
- SCORE__FT_HT__RE # e.g. 1-1 (1-0)
129
+ SCORE__P__RE, # e.g. 5-1 pen.
130
+ SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
117
131
  )
118
132
 
119
133
  end # class Parser
120
- end # module SportDb
121
-
134
+ end # module SportDb
@@ -47,6 +47,10 @@ TEXT_RE = %r{
47
47
  [ ]? ## make space optional too - why? why not?
48
48
  ## yes - eg. 1st, 2nd, 5th etc.
49
49
  \p{L}+
50
+ |
51
+ ## opt 3 - add weirdo case
52
+ ## e.g. 5.-8. Platz Playoffs - keep - why? why not?
53
+ \d+\.-\d+\. [ ]? \p{L}+
50
54
  )
51
55
 
52
56
  (?:(?: (?:[ ]
@@ -57,13 +61,18 @@ TEXT_RE = %r{
57
61
  )?
58
62
  (?:
59
63
  \p{L} |
60
- [&/']
64
+ [&/'°]
61
65
  |
62
66
  (?:
63
67
  \d+
64
- (?![0-9.:h'/+-])
68
+ (?!
69
+ [0-9h'+-] | ## protected break on 12h / 12' / 1-1
70
+ ## check usege for 3+4 - possible? where ? why?
71
+ (?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
72
+ )
65
73
  ## negative lookahead for numbers
66
74
  ## note - include digits itself!!!
75
+ ## note - remove / (slash) e.g. allows UDI'19/Beter Bed
67
76
  )|
68
77
  \.
69
78
  )
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 3
7
- PATCH = 6
7
+ PATCH = 8
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-09 00:00:00.000000000 Z
11
+ date: 2024-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -81,12 +81,22 @@ extra_rdoc_files:
81
81
  - CHANGELOG.md
82
82
  - Manifest.txt
83
83
  - README.md
84
+ - config/rounds_de.txt
85
+ - config/rounds_en.txt
86
+ - config/rounds_es.txt
87
+ - config/rounds_misc.txt
88
+ - config/rounds_pt.txt
84
89
  files:
85
90
  - CHANGELOG.md
86
91
  - Manifest.txt
87
92
  - README.md
88
93
  - Rakefile
89
94
  - bin/fbtok
95
+ - config/rounds_de.txt
96
+ - config/rounds_en.txt
97
+ - config/rounds_es.txt
98
+ - config/rounds_misc.txt
99
+ - config/rounds_pt.txt
90
100
  - lib/sportdb/parser.rb
91
101
  - lib/sportdb/parser/fbtok/main.rb
92
102
  - lib/sportdb/parser/lang.rb