sportdb-parser 0.3.6 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +5 -0
- data/config/rounds_de.txt +93 -0
- data/config/rounds_en.txt +15 -0
- data/config/rounds_es.txt +19 -0
- data/config/rounds_misc.txt +18 -0
- data/config/rounds_pt.txt +4 -0
- data/lib/sportdb/parser/lang.rb +43 -1
- data/lib/sportdb/parser/token-score.rb +25 -12
- data/lib/sportdb/parser/token-text.rb +11 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eadc79627364072a1d05801fec096aca27e914f214639c4bbe6bbffca2acab0d
|
4
|
+
data.tar.gz: f052a6d668246082d9fbcc5b90c71b440048080dc423f9428a4560591e797080
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 394f74c596ad1a624d626757972911eee7e0b009df66d5cac7ed1508cb3b056ece6f7fbc1d3c310468d5d065ae22ba18a28d7f8992f333767f20279eb5ce2f78
|
7
|
+
data.tar.gz: ecaa4a25b3552e69013dea0cc9c87a91b3409cd02e86302f4387e658de6cdfadba2c82c7c31935d4f7c62aaab187a25ce6e85f9dc4239faa346838f5be8dce7b
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -0,0 +1,93 @@
|
|
1
|
+
###############
|
2
|
+
# rounds in deutsch (de) / german
|
3
|
+
|
4
|
+
Vorrunde
|
5
|
+
1. Vorrunde
|
6
|
+
2. Vorrunde
|
7
|
+
Gruppenphase
|
8
|
+
Ligaphase
|
9
|
+
Spiele # in 2017/uy.1.txt -- double check if missing something
|
10
|
+
# in 1960-61/it.1.txt
|
11
|
+
|
12
|
+
|
13
|
+
Zwischenrunde
|
14
|
+
|
15
|
+
Sechzehntelfinale
|
16
|
+
Platzierungsspiel
|
17
|
+
|
18
|
+
Qualifikation
|
19
|
+
Qual. 3. Runde
|
20
|
+
|
21
|
+
2. Aufstieg Halbfinale
|
22
|
+
2. Aufstieg Finale
|
23
|
+
|
24
|
+
Playoff-Runde
|
25
|
+
Relegation
|
26
|
+
Aufstieg
|
27
|
+
Endrunde
|
28
|
+
Aufstiegsrunde
|
29
|
+
Aufstiegsrunde Zone A
|
30
|
+
Entscheidung Zone B
|
31
|
+
1. Aufstieg
|
32
|
+
1. Aufstieg Zone A
|
33
|
+
1. Aufstieg Zone B
|
34
|
+
2. Aufstieg Zone A
|
35
|
+
2. Aufstieg Zone B
|
36
|
+
2. Aufstieg 1. Phase
|
37
|
+
2. Aufstieg 2. Phase
|
38
|
+
2. Aufstieg 3. Phase
|
39
|
+
Direkter Aufstieg
|
40
|
+
Direkter Abstieg
|
41
|
+
3. Platz
|
42
|
+
5. Platz
|
43
|
+
7. Platz
|
44
|
+
9. Platz
|
45
|
+
11. Platz
|
46
|
+
|
47
|
+
5.-8. Platz Playoffs
|
48
|
+
9.-12. Platz Playoffs
|
49
|
+
13.-16. Platz Playoffs
|
50
|
+
|
51
|
+
|
52
|
+
Entscheidung 1. Runde
|
53
|
+
Entscheidung 2. Runde
|
54
|
+
|
55
|
+
|
56
|
+
Zwischenrunde Gr. B ## move to group_de - why? why not?
|
57
|
+
1. Runde Gruppe 1
|
58
|
+
1. Runde Gruppe 2
|
59
|
+
|
60
|
+
|
61
|
+
### todo/fix
|
62
|
+
### move to group - why? why not?
|
63
|
+
Gruppe 1
|
64
|
+
Gruppe 2
|
65
|
+
Gruppe 3
|
66
|
+
Gruppe 4
|
67
|
+
Gruppe 5
|
68
|
+
Gruppe 6
|
69
|
+
Gruppe 7
|
70
|
+
Gruppe 8
|
71
|
+
Gruppe 9
|
72
|
+
Gruppe 10
|
73
|
+
Gruppe 11
|
74
|
+
Gruppe 12
|
75
|
+
Gruppe 13
|
76
|
+
Gruppe 14
|
77
|
+
Gruppe 15
|
78
|
+
Gruppe 16
|
79
|
+
|
80
|
+
|
81
|
+
Gruppe A
|
82
|
+
Gruppe B
|
83
|
+
Gruppe C
|
84
|
+
Gruppe D
|
85
|
+
Gruppe E
|
86
|
+
Gruppe F
|
87
|
+
Gruppe G
|
88
|
+
Gruppe H
|
89
|
+
Gruppe I
|
90
|
+
Gruppe J
|
91
|
+
Gruppe K
|
92
|
+
Gruppe L
|
93
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#######################
|
2
|
+
# more rounds misc(ellaneous)
|
3
|
+
|
4
|
+
District West I
|
5
|
+
District West II
|
6
|
+
District Noord
|
7
|
+
District Oost
|
8
|
+
District Zuid I
|
9
|
+
District Zuid II
|
10
|
+
|
11
|
+
Tussenronde
|
12
|
+
Replay achtste finale
|
13
|
+
Replay kwartfinale
|
14
|
+
|
15
|
+
Replay 1e ronde
|
16
|
+
Replay 2e ronde
|
17
|
+
Replay halve finale
|
18
|
+
Replay finale
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -150,8 +150,50 @@ ROUND_RE = %r{^(
|
|
150
150
|
)$}ix
|
151
151
|
|
152
152
|
|
153
|
+
####
|
154
|
+
# add more round names in different languages
|
155
|
+
# via txt files
|
156
|
+
#
|
157
|
+
# for now must match case - maybe make caseinsensitive later - why? why not?
|
158
|
+
def self.read_names( path )
|
159
|
+
txt = read_text( path )
|
160
|
+
names = [] # array of lines (with words)
|
161
|
+
txt.each_line do |line|
|
162
|
+
line = line.strip
|
163
|
+
|
164
|
+
next if line.empty?
|
165
|
+
next if line.start_with?( '#' ) ## skip comments too
|
166
|
+
|
167
|
+
## strip inline (until end-of-line) comments too
|
168
|
+
## e.g. Janvier Janv Jan ## check janv in use??
|
169
|
+
## => Janvier Janv Jan
|
170
|
+
|
171
|
+
line = line.sub( /#.*/, '' ).strip
|
172
|
+
## pp line
|
173
|
+
|
174
|
+
names << line
|
175
|
+
end
|
176
|
+
names
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
def self.more_round_names
|
181
|
+
@more_round_name ||= begin
|
182
|
+
names = []
|
183
|
+
langs = ['en', 'de', 'es', 'pt', 'misc']
|
184
|
+
## sort names by length??
|
185
|
+
langs.each do |lang|
|
186
|
+
path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
|
187
|
+
names += read_names( path )
|
188
|
+
end
|
189
|
+
names
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
|
153
194
|
def is_round?( text )
|
154
|
-
ROUND_RE.match?( text )
|
195
|
+
ROUND_RE.match?( text ) ||
|
196
|
+
self.class.more_round_names.include?( text )
|
155
197
|
end
|
156
198
|
|
157
199
|
##
|
@@ -1,13 +1,13 @@
|
|
1
|
-
module SportDb
|
1
|
+
module SportDb
|
2
2
|
class Parser
|
3
|
-
|
3
|
+
|
4
4
|
|
5
5
|
## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
|
6
6
|
|
7
7
|
#####
|
8
8
|
# english helpers (penalty, extra time, ...)
|
9
9
|
## note - p must go last (shortest match)
|
10
|
-
# pso = penalty shootout
|
10
|
+
# pso = penalty shootout
|
11
11
|
P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
|
12
12
|
ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
|
13
13
|
|
@@ -26,7 +26,20 @@ class Parser
|
|
26
26
|
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
27
27
|
[ ]* #{ET_EN}
|
28
28
|
(?=[ \]]|$)
|
29
|
-
)}ix
|
29
|
+
)}ix
|
30
|
+
## todo/check: remove loakahead assertion here - why require space?
|
31
|
+
## note: \b works only after non-alphanum e.g. )
|
32
|
+
|
33
|
+
|
34
|
+
## note: allow SPECIAL with penalty only
|
35
|
+
## 3-4 pen.
|
36
|
+
SCORE__P__RE = %r{
|
37
|
+
(?<score>
|
38
|
+
\b
|
39
|
+
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
|
+
[ ]* #{P_EN}
|
41
|
+
(?=[ \]]|$)
|
42
|
+
)}ix
|
30
43
|
## todo/check: remove loakahead assertion here - why require space?
|
31
44
|
## note: \b works only after non-alphanum e.g. )
|
32
45
|
|
@@ -89,8 +102,8 @@ class Parser
|
|
89
102
|
|
90
103
|
## e.g. 2-1 (1-1) or
|
91
104
|
## 2-1
|
92
|
-
|
93
|
-
SCORE__FT_HT__RE = %r{
|
105
|
+
|
106
|
+
SCORE__FT_HT__RE = %r{
|
94
107
|
(?<score>
|
95
108
|
\b
|
96
109
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
@@ -104,18 +117,18 @@ class Parser
|
|
104
117
|
## note: \b works only after non-alphanum e.g. )
|
105
118
|
|
106
119
|
|
107
|
-
|
120
|
+
|
108
121
|
#############################################
|
109
|
-
# map tables
|
122
|
+
# map tables
|
110
123
|
# note: order matters; first come-first matched/served
|
111
124
|
|
112
|
-
SCORE_RE = Regexp.union(
|
125
|
+
SCORE_RE = Regexp.union(
|
113
126
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
114
127
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
115
128
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
116
|
-
|
129
|
+
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
|
117
131
|
)
|
118
132
|
|
119
133
|
end # class Parser
|
120
|
-
end # module SportDb
|
121
|
-
|
134
|
+
end # module SportDb
|
@@ -47,6 +47,10 @@ TEXT_RE = %r{
|
|
47
47
|
[ ]? ## make space optional too - why? why not?
|
48
48
|
## yes - eg. 1st, 2nd, 5th etc.
|
49
49
|
\p{L}+
|
50
|
+
|
|
51
|
+
## opt 3 - add weirdo case
|
52
|
+
## e.g. 5.-8. Platz Playoffs - keep - why? why not?
|
53
|
+
\d+\.-\d+\. [ ]? \p{L}+
|
50
54
|
)
|
51
55
|
|
52
56
|
(?:(?: (?:[ ]
|
@@ -57,13 +61,18 @@ TEXT_RE = %r{
|
|
57
61
|
)?
|
58
62
|
(?:
|
59
63
|
\p{L} |
|
60
|
-
[&/']
|
64
|
+
[&/'°]
|
61
65
|
|
|
62
66
|
(?:
|
63
67
|
\d+
|
64
|
-
(?!
|
68
|
+
(?!
|
69
|
+
[0-9h'+-] | ## protected break on 12h / 12' / 1-1
|
70
|
+
## check usege for 3+4 - possible? where ? why?
|
71
|
+
(?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
|
72
|
+
)
|
65
73
|
## negative lookahead for numbers
|
66
74
|
## note - include digits itself!!!
|
75
|
+
## note - remove / (slash) e.g. allows UDI'19/Beter Bed
|
67
76
|
)|
|
68
77
|
\.
|
69
78
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -81,12 +81,22 @@ extra_rdoc_files:
|
|
81
81
|
- CHANGELOG.md
|
82
82
|
- Manifest.txt
|
83
83
|
- README.md
|
84
|
+
- config/rounds_de.txt
|
85
|
+
- config/rounds_en.txt
|
86
|
+
- config/rounds_es.txt
|
87
|
+
- config/rounds_misc.txt
|
88
|
+
- config/rounds_pt.txt
|
84
89
|
files:
|
85
90
|
- CHANGELOG.md
|
86
91
|
- Manifest.txt
|
87
92
|
- README.md
|
88
93
|
- Rakefile
|
89
94
|
- bin/fbtok
|
95
|
+
- config/rounds_de.txt
|
96
|
+
- config/rounds_en.txt
|
97
|
+
- config/rounds_es.txt
|
98
|
+
- config/rounds_misc.txt
|
99
|
+
- config/rounds_pt.txt
|
90
100
|
- lib/sportdb/parser.rb
|
91
101
|
- lib/sportdb/parser/fbtok/main.rb
|
92
102
|
- lib/sportdb/parser/lang.rb
|