sportdb-parser 0.3.6 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7e55fb30d76a2b9ce61c129408590506d02fdfc0fdd699211d8255cd97bbdd30
4
- data.tar.gz: 0b4af2b72f2375597ae4ff7e99925eb92971833c50e489064ca143706198b2b9
3
+ metadata.gz: eadc79627364072a1d05801fec096aca27e914f214639c4bbe6bbffca2acab0d
4
+ data.tar.gz: f052a6d668246082d9fbcc5b90c71b440048080dc423f9428a4560591e797080
5
5
  SHA512:
6
- metadata.gz: e6f9c3fa8f3ac153208977662f6435858623a4b56d5506dd3238618fc23e34cc983c385325ce0346698ce2397e53b8c2ea3a16563caf1205d17be7448a2ae858
7
- data.tar.gz: fd7a20830cd66a892db370e2f521139412266bc02821d6fed4ce36a7d8ac5c27913dcc4736c7a1834d46ed8c3156c5386e85bc57af9419dcac1f3d52b438f17d
6
+ metadata.gz: 394f74c596ad1a624d626757972911eee7e0b009df66d5cac7ed1508cb3b056ece6f7fbc1d3c310468d5d065ae22ba18a28d7f8992f333767f20279eb5ce2f78
7
+ data.tar.gz: ecaa4a25b3552e69013dea0cc9c87a91b3409cd02e86302f4387e658de6cdfadba2c82c7c31935d4f7c62aaab187a25ce6e85f9dc4239faa346838f5be8dce7b
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.3.6
1
+ ### 0.3.8
2
2
 
3
3
  ### 0.0.1 / 2024-07-12
4
4
 
data/Manifest.txt CHANGED
@@ -3,6 +3,11 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  bin/fbtok
6
+ config/rounds_de.txt
7
+ config/rounds_en.txt
8
+ config/rounds_es.txt
9
+ config/rounds_misc.txt
10
+ config/rounds_pt.txt
6
11
  lib/sportdb/parser.rb
7
12
  lib/sportdb/parser/fbtok/main.rb
8
13
  lib/sportdb/parser/lang.rb
@@ -0,0 +1,93 @@
1
+ ###############
2
+ # rounds in deutsch (de) / german
3
+
4
+ Vorrunde
5
+ 1. Vorrunde
6
+ 2. Vorrunde
7
+ Gruppenphase
8
+ Ligaphase
9
+ Spiele # in 2017/uy.1.txt -- double check if missing something
10
+ # in 1960-61/it.1.txt
11
+
12
+
13
+ Zwischenrunde
14
+
15
+ Sechzehntelfinale
16
+ Platzierungsspiel
17
+
18
+ Qualifikation
19
+ Qual. 3. Runde
20
+
21
+ 2. Aufstieg Halbfinale
22
+ 2. Aufstieg Finale
23
+
24
+ Playoff-Runde
25
+ Relegation
26
+ Aufstieg
27
+ Endrunde
28
+ Aufstiegsrunde
29
+ Aufstiegsrunde Zone A
30
+ Entscheidung Zone B
31
+ 1. Aufstieg
32
+ 1. Aufstieg Zone A
33
+ 1. Aufstieg Zone B
34
+ 2. Aufstieg Zone A
35
+ 2. Aufstieg Zone B
36
+ 2. Aufstieg 1. Phase
37
+ 2. Aufstieg 2. Phase
38
+ 2. Aufstieg 3. Phase
39
+ Direkter Aufstieg
40
+ Direkter Abstieg
41
+ 3. Platz
42
+ 5. Platz
43
+ 7. Platz
44
+ 9. Platz
45
+ 11. Platz
46
+
47
+ 5.-8. Platz Playoffs
48
+ 9.-12. Platz Playoffs
49
+ 13.-16. Platz Playoffs
50
+
51
+
52
+ Entscheidung 1. Runde
53
+ Entscheidung 2. Runde
54
+
55
+
56
+ Zwischenrunde Gr. B ## move to group_de - why? why not?
57
+ 1. Runde Gruppe 1
58
+ 1. Runde Gruppe 2
59
+
60
+
61
+ ### todo/fix
62
+ ### move to group - why? why not?
63
+ Gruppe 1
64
+ Gruppe 2
65
+ Gruppe 3
66
+ Gruppe 4
67
+ Gruppe 5
68
+ Gruppe 6
69
+ Gruppe 7
70
+ Gruppe 8
71
+ Gruppe 9
72
+ Gruppe 10
73
+ Gruppe 11
74
+ Gruppe 12
75
+ Gruppe 13
76
+ Gruppe 14
77
+ Gruppe 15
78
+ Gruppe 16
79
+
80
+
81
+ Gruppe A
82
+ Gruppe B
83
+ Gruppe C
84
+ Gruppe D
85
+ Gruppe E
86
+ Gruppe F
87
+ Gruppe G
88
+ Gruppe H
89
+ Gruppe I
90
+ Gruppe J
91
+ Gruppe K
92
+ Gruppe L
93
+
@@ -0,0 +1,15 @@
1
+ ##########
2
+ # note - more english rounds here
3
+ # remove here if added to regex!!!
4
+
5
+ Play-in round
6
+ First semifinal
7
+ Second semifinal
8
+
9
+ Conference Semifinals
10
+ Conference Finals
11
+ Wildcard
12
+
13
+ Elimination Final
14
+ Quadrangular
15
+
@@ -0,0 +1,19 @@
1
+ #########
2
+ # rounds in español (es) / spanish
3
+
4
+ Recalificación
5
+ Reclasificación
6
+
7
+
8
+ Preclasificación Nacional B
9
+
10
+ Final Segunda Ronda
11
+ Gran Final
12
+
13
+ Interzone
14
+ Zone A
15
+ Zona B
16
+
17
+ Final de Grupos
18
+ Repechaje
19
+
@@ -0,0 +1,18 @@
1
+ #######################
2
+ # more rounds misc(ellaneous)
3
+
4
+ District West I
5
+ District West II
6
+ District Noord
7
+ District Oost
8
+ District Zuid I
9
+ District Zuid II
10
+
11
+ Tussenronde
12
+ Replay achtste finale
13
+ Replay kwartfinale
14
+
15
+ Replay 1e ronde
16
+ Replay 2e ronde
17
+ Replay halve finale
18
+ Replay finale
@@ -0,0 +1,4 @@
1
+ ############################
2
+ # rounds in português (pt) / portuguese
3
+
4
+ Troféu do Interior
@@ -150,8 +150,50 @@ ROUND_RE = %r{^(
150
150
  )$}ix
151
151
 
152
152
 
153
+ ####
154
+ # add more round names in different languages
155
+ # via txt files
156
+ #
157
+ # for now must match case - maybe make caseinsensitive later - why? why not?
158
+ def self.read_names( path )
159
+ txt = read_text( path )
160
+ names = [] # array of lines (with words)
161
+ txt.each_line do |line|
162
+ line = line.strip
163
+
164
+ next if line.empty?
165
+ next if line.start_with?( '#' ) ## skip comments too
166
+
167
+ ## strip inline (until end-of-line) comments too
168
+ ## e.g. Janvier Janv Jan ## check janv in use??
169
+ ## => Janvier Janv Jan
170
+
171
+ line = line.sub( /#.*/, '' ).strip
172
+ ## pp line
173
+
174
+ names << line
175
+ end
176
+ names
177
+ end
178
+
179
+
180
+ def self.more_round_names
181
+ @more_round_name ||= begin
182
+ names = []
183
+ langs = ['en', 'de', 'es', 'pt', 'misc']
184
+ ## sort names by length??
185
+ langs.each do |lang|
186
+ path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
187
+ names += read_names( path )
188
+ end
189
+ names
190
+ end
191
+ end
192
+
193
+
153
194
  def is_round?( text )
154
- ROUND_RE.match?( text )
195
+ ROUND_RE.match?( text ) ||
196
+ self.class.more_round_names.include?( text )
155
197
  end
156
198
 
157
199
  ##
@@ -1,13 +1,13 @@
1
- module SportDb
1
+ module SportDb
2
2
  class Parser
3
-
3
+
4
4
 
5
5
  ## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
6
6
 
7
7
  #####
8
8
  # english helpers (penalty, extra time, ...)
9
9
  ## note - p must go last (shortest match)
10
- # pso = penalty shootout
10
+ # pso = penalty shootout
11
11
  P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
12
12
  ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
13
13
 
@@ -26,7 +26,20 @@ class Parser
26
26
  (?<et1>\d{1,2}) - (?<et2>\d{1,2})
27
27
  [ ]* #{ET_EN}
28
28
  (?=[ \]]|$)
29
- )}ix
29
+ )}ix
30
+ ## todo/check: remove loakahead assertion here - why require space?
31
+ ## note: \b works only after non-alphanum e.g. )
32
+
33
+
34
+ ## note: allow SPECIAL with penalty only
35
+ ## 3-4 pen.
36
+ SCORE__P__RE = %r{
37
+ (?<score>
38
+ \b
39
+ (?<p1>\d{1,2}) - (?<p2>\d{1,2})
40
+ [ ]* #{P_EN}
41
+ (?=[ \]]|$)
42
+ )}ix
30
43
  ## todo/check: remove loakahead assertion here - why require space?
31
44
  ## note: \b works only after non-alphanum e.g. )
32
45
 
@@ -89,8 +102,8 @@ class Parser
89
102
 
90
103
  ## e.g. 2-1 (1-1) or
91
104
  ## 2-1
92
-
93
- SCORE__FT_HT__RE = %r{
105
+
106
+ SCORE__FT_HT__RE = %r{
94
107
  (?<score>
95
108
  \b
96
109
  (?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
@@ -104,18 +117,18 @@ class Parser
104
117
  ## note: \b works only after non-alphanum e.g. )
105
118
 
106
119
 
107
-
120
+
108
121
  #############################################
109
- # map tables
122
+ # map tables
110
123
  # note: order matters; first come-first matched/served
111
124
 
112
- SCORE_RE = Regexp.union(
125
+ SCORE_RE = Regexp.union(
113
126
  SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
114
127
  SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
115
128
  SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
116
- SCORE__FT_HT__RE # e.g. 1-1 (1-0)
129
+ SCORE__P__RE, # e.g. 5-1 pen.
130
+ SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
117
131
  )
118
132
 
119
133
  end # class Parser
120
- end # module SportDb
121
-
134
+ end # module SportDb
@@ -47,6 +47,10 @@ TEXT_RE = %r{
47
47
  [ ]? ## make space optional too - why? why not?
48
48
  ## yes - eg. 1st, 2nd, 5th etc.
49
49
  \p{L}+
50
+ |
51
+ ## opt 3 - add weirdo case
52
+ ## e.g. 5.-8. Platz Playoffs - keep - why? why not?
53
+ \d+\.-\d+\. [ ]? \p{L}+
50
54
  )
51
55
 
52
56
  (?:(?: (?:[ ]
@@ -57,13 +61,18 @@ TEXT_RE = %r{
57
61
  )?
58
62
  (?:
59
63
  \p{L} |
60
- [&/']
64
+ [&/'°]
61
65
  |
62
66
  (?:
63
67
  \d+
64
- (?![0-9.:h'/+-])
68
+ (?!
69
+ [0-9h'+-] | ## protected break on 12h / 12' / 1-1
70
+ ## check usege for 3+4 - possible? where ? why?
71
+ (?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
72
+ )
65
73
  ## negative lookahead for numbers
66
74
  ## note - include digits itself!!!
75
+ ## note - remove / (slash) e.g. allows UDI'19/Beter Bed
67
76
  )|
68
77
  \.
69
78
  )
@@ -4,7 +4,7 @@ module SportDb
4
4
  module Parser
5
5
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
6
  MINOR = 3
7
- PATCH = 6
7
+ PATCH = 8
8
8
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
9
 
10
10
  def self.version
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-09 00:00:00.000000000 Z
11
+ date: 2024-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cocos
@@ -81,12 +81,22 @@ extra_rdoc_files:
81
81
  - CHANGELOG.md
82
82
  - Manifest.txt
83
83
  - README.md
84
+ - config/rounds_de.txt
85
+ - config/rounds_en.txt
86
+ - config/rounds_es.txt
87
+ - config/rounds_misc.txt
88
+ - config/rounds_pt.txt
84
89
  files:
85
90
  - CHANGELOG.md
86
91
  - Manifest.txt
87
92
  - README.md
88
93
  - Rakefile
89
94
  - bin/fbtok
95
+ - config/rounds_de.txt
96
+ - config/rounds_en.txt
97
+ - config/rounds_es.txt
98
+ - config/rounds_misc.txt
99
+ - config/rounds_pt.txt
90
100
  - lib/sportdb/parser.rb
91
101
  - lib/sportdb/parser/fbtok/main.rb
92
102
  - lib/sportdb/parser/lang.rb