sportdb-parser 0.3.5 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +5 -0
- data/config/rounds_de.txt +29 -0
- data/config/rounds_en.txt +12 -0
- data/config/rounds_es.txt +9 -0
- data/config/rounds_misc.txt +14 -0
- data/config/rounds_pt.txt +4 -0
- data/lib/sportdb/parser/lang.rb +48 -2
- data/lib/sportdb/parser/token-score.rb +25 -12
- data/lib/sportdb/parser/token-text.rb +7 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6323a39fa903163b57c2416f732be300590217e6818ffbb782c30af06bf52052
|
4
|
+
data.tar.gz: 6491846e993a62bd815853acca1a4790ee2210aec5ab804c788332280f91d861
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70ea59920f73fef1255a4bcd07c3e58571b5ad5a21a35d3775183ca254dfd64d048106066d6cbb53b65692bdb523a08a7d0e2c210e2ef6ee68be1efd2287677b
|
7
|
+
data.tar.gz: a2d61dc09cc2cfdbe3669d7e4376594d63440a50b09c5adc1dfa4e4c3ce983374d5157db1c84a6b44425d4068b041041d7650071f50a1de2f652cc2ab022edc9
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
###############
|
2
|
+
# rounds in deutsch (de) / german
|
3
|
+
|
4
|
+
Vorrunde
|
5
|
+
Gruppenphase
|
6
|
+
|
7
|
+
|
8
|
+
Aufstieg
|
9
|
+
Endrunde
|
10
|
+
Aufstiegsrunde
|
11
|
+
Aufstiegsrunde Zone A
|
12
|
+
Entscheidung Zone B
|
13
|
+
1. Aufstieg
|
14
|
+
1. Aufstieg Zone A
|
15
|
+
1. Aufstieg Zone B
|
16
|
+
2. Aufstieg Zone A
|
17
|
+
2. Aufstieg Zone B
|
18
|
+
2. Aufstieg 1. Phase
|
19
|
+
2. Aufstieg 2. Phase
|
20
|
+
2. Aufstieg 3. Phase
|
21
|
+
Direkter Aufstieg
|
22
|
+
Direkter Abstieg
|
23
|
+
5. Platz
|
24
|
+
7. Platz
|
25
|
+
9. Platz
|
26
|
+
11. Platz
|
27
|
+
|
28
|
+
Zwischenrunde Gr. B ## move to group_de - why? why not?
|
29
|
+
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -32,7 +32,8 @@ ROUND_RE = %r{^(
|
|
32
32
|
### note - allow Group ("stand-alone") as "generic" round for now
|
33
33
|
## BUT do NOT allow Group 1, Group 2, Group A, Group B, etc.
|
34
34
|
(?: Group [ ] [A-Z0-9]+ [ ] Play-?offs? |
|
35
|
-
Group
|
35
|
+
Group (?: [ ] phase)? |
|
36
|
+
League (?: [ ] phase)?
|
36
37
|
)
|
37
38
|
|
|
38
39
|
# round - note - requiers number e.g. round 1,2, etc.
|
@@ -127,6 +128,9 @@ ROUND_RE = %r{^(
|
|
127
128
|
# final
|
128
129
|
Finals?
|
129
130
|
|
|
131
|
+
# decider e.g. Entscheidungsspiel
|
132
|
+
Decider
|
133
|
+
|
|
130
134
|
## add replays
|
131
135
|
## e.g. Final Replay
|
132
136
|
## Quarter-finals replays
|
@@ -146,8 +150,50 @@ ROUND_RE = %r{^(
|
|
146
150
|
)$}ix
|
147
151
|
|
148
152
|
|
153
|
+
####
|
154
|
+
# add more round names in different languages
|
155
|
+
# via txt files
|
156
|
+
#
|
157
|
+
# for now must match case - maybe make caseinsensitive later - why? why not?
|
158
|
+
def self.read_names( path )
|
159
|
+
txt = read_text( path )
|
160
|
+
names = [] # array of lines (with words)
|
161
|
+
txt.each_line do |line|
|
162
|
+
line = line.strip
|
163
|
+
|
164
|
+
next if line.empty?
|
165
|
+
next if line.start_with?( '#' ) ## skip comments too
|
166
|
+
|
167
|
+
## strip inline (until end-of-line) comments too
|
168
|
+
## e.g. Janvier Janv Jan ## check janv in use??
|
169
|
+
## => Janvier Janv Jan
|
170
|
+
|
171
|
+
line = line.sub( /#.*/, '' ).strip
|
172
|
+
## pp line
|
173
|
+
|
174
|
+
names << line
|
175
|
+
end
|
176
|
+
names
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
def self.more_round_names
|
181
|
+
@more_round_name ||= begin
|
182
|
+
names = []
|
183
|
+
langs = ['en', 'de', 'es', 'pt', 'misc']
|
184
|
+
## sort names by length??
|
185
|
+
langs.each do |lang|
|
186
|
+
path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
|
187
|
+
names += read_names( path )
|
188
|
+
end
|
189
|
+
names
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
|
149
194
|
def is_round?( text )
|
150
|
-
ROUND_RE.match?( text )
|
195
|
+
ROUND_RE.match?( text ) ||
|
196
|
+
self.class.more_round_names.include?( text )
|
151
197
|
end
|
152
198
|
|
153
199
|
##
|
@@ -1,13 +1,13 @@
|
|
1
|
-
module SportDb
|
1
|
+
module SportDb
|
2
2
|
class Parser
|
3
|
-
|
3
|
+
|
4
4
|
|
5
5
|
## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
|
6
6
|
|
7
7
|
#####
|
8
8
|
# english helpers (penalty, extra time, ...)
|
9
9
|
## note - p must go last (shortest match)
|
10
|
-
# pso = penalty shootout
|
10
|
+
# pso = penalty shootout
|
11
11
|
P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
|
12
12
|
ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
|
13
13
|
|
@@ -26,7 +26,20 @@ class Parser
|
|
26
26
|
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
27
27
|
[ ]* #{ET_EN}
|
28
28
|
(?=[ \]]|$)
|
29
|
-
)}ix
|
29
|
+
)}ix
|
30
|
+
## todo/check: remove loakahead assertion here - why require space?
|
31
|
+
## note: \b works only after non-alphanum e.g. )
|
32
|
+
|
33
|
+
|
34
|
+
## note: allow SPECIAL with penalty only
|
35
|
+
## 3-4 pen.
|
36
|
+
SCORE__P__RE = %r{
|
37
|
+
(?<score>
|
38
|
+
\b
|
39
|
+
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
|
+
[ ]* #{P_EN}
|
41
|
+
(?=[ \]]|$)
|
42
|
+
)}ix
|
30
43
|
## todo/check: remove loakahead assertion here - why require space?
|
31
44
|
## note: \b works only after non-alphanum e.g. )
|
32
45
|
|
@@ -89,8 +102,8 @@ class Parser
|
|
89
102
|
|
90
103
|
## e.g. 2-1 (1-1) or
|
91
104
|
## 2-1
|
92
|
-
|
93
|
-
SCORE__FT_HT__RE = %r{
|
105
|
+
|
106
|
+
SCORE__FT_HT__RE = %r{
|
94
107
|
(?<score>
|
95
108
|
\b
|
96
109
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
@@ -104,18 +117,18 @@ class Parser
|
|
104
117
|
## note: \b works only after non-alphanum e.g. )
|
105
118
|
|
106
119
|
|
107
|
-
|
120
|
+
|
108
121
|
#############################################
|
109
|
-
# map tables
|
122
|
+
# map tables
|
110
123
|
# note: order matters; first come-first matched/served
|
111
124
|
|
112
|
-
SCORE_RE = Regexp.union(
|
125
|
+
SCORE_RE = Regexp.union(
|
113
126
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
114
127
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
115
128
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
116
|
-
|
129
|
+
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
|
117
131
|
)
|
118
132
|
|
119
133
|
end # class Parser
|
120
|
-
end # module SportDb
|
121
|
-
|
134
|
+
end # module SportDb
|
@@ -57,13 +57,18 @@ TEXT_RE = %r{
|
|
57
57
|
)?
|
58
58
|
(?:
|
59
59
|
\p{L} |
|
60
|
-
[&/']
|
60
|
+
[&/'°]
|
61
61
|
|
|
62
62
|
(?:
|
63
63
|
\d+
|
64
|
-
(?!
|
64
|
+
(?!
|
65
|
+
[0-9h'+-] | ## protected break on 12h / 12' / 1-1
|
66
|
+
## check usege for 3+4 - possible? where ? why?
|
67
|
+
(?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
|
68
|
+
)
|
65
69
|
## negative lookahead for numbers
|
66
70
|
## note - include digits itself!!!
|
71
|
+
## note - remove / (slash) e.g. allows UDI'19/Beter Bed
|
67
72
|
)|
|
68
73
|
\.
|
69
74
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -81,12 +81,22 @@ extra_rdoc_files:
|
|
81
81
|
- CHANGELOG.md
|
82
82
|
- Manifest.txt
|
83
83
|
- README.md
|
84
|
+
- config/rounds_de.txt
|
85
|
+
- config/rounds_en.txt
|
86
|
+
- config/rounds_es.txt
|
87
|
+
- config/rounds_misc.txt
|
88
|
+
- config/rounds_pt.txt
|
84
89
|
files:
|
85
90
|
- CHANGELOG.md
|
86
91
|
- Manifest.txt
|
87
92
|
- README.md
|
88
93
|
- Rakefile
|
89
94
|
- bin/fbtok
|
95
|
+
- config/rounds_de.txt
|
96
|
+
- config/rounds_en.txt
|
97
|
+
- config/rounds_es.txt
|
98
|
+
- config/rounds_misc.txt
|
99
|
+
- config/rounds_pt.txt
|
90
100
|
- lib/sportdb/parser.rb
|
91
101
|
- lib/sportdb/parser/fbtok/main.rb
|
92
102
|
- lib/sportdb/parser/lang.rb
|