sportdb-parser 0.3.6 → 0.3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +5 -0
- data/config/rounds_de.txt +29 -0
- data/config/rounds_en.txt +12 -0
- data/config/rounds_es.txt +9 -0
- data/config/rounds_misc.txt +14 -0
- data/config/rounds_pt.txt +4 -0
- data/lib/sportdb/parser/lang.rb +43 -1
- data/lib/sportdb/parser/token-score.rb +25 -12
- data/lib/sportdb/parser/token-text.rb +7 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6323a39fa903163b57c2416f732be300590217e6818ffbb782c30af06bf52052
|
4
|
+
data.tar.gz: 6491846e993a62bd815853acca1a4790ee2210aec5ab804c788332280f91d861
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70ea59920f73fef1255a4bcd07c3e58571b5ad5a21a35d3775183ca254dfd64d048106066d6cbb53b65692bdb523a08a7d0e2c210e2ef6ee68be1efd2287677b
|
7
|
+
data.tar.gz: a2d61dc09cc2cfdbe3669d7e4376594d63440a50b09c5adc1dfa4e4c3ce983374d5157db1c84a6b44425d4068b041041d7650071f50a1de2f652cc2ab022edc9
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
###############
|
2
|
+
# rounds in deutsch (de) / german
|
3
|
+
|
4
|
+
Vorrunde
|
5
|
+
Gruppenphase
|
6
|
+
|
7
|
+
|
8
|
+
Aufstieg
|
9
|
+
Endrunde
|
10
|
+
Aufstiegsrunde
|
11
|
+
Aufstiegsrunde Zone A
|
12
|
+
Entscheidung Zone B
|
13
|
+
1. Aufstieg
|
14
|
+
1. Aufstieg Zone A
|
15
|
+
1. Aufstieg Zone B
|
16
|
+
2. Aufstieg Zone A
|
17
|
+
2. Aufstieg Zone B
|
18
|
+
2. Aufstieg 1. Phase
|
19
|
+
2. Aufstieg 2. Phase
|
20
|
+
2. Aufstieg 3. Phase
|
21
|
+
Direkter Aufstieg
|
22
|
+
Direkter Abstieg
|
23
|
+
5. Platz
|
24
|
+
7. Platz
|
25
|
+
9. Platz
|
26
|
+
11. Platz
|
27
|
+
|
28
|
+
Zwischenrunde Gr. B ## move to group_de - why? why not?
|
29
|
+
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -150,8 +150,50 @@ ROUND_RE = %r{^(
|
|
150
150
|
)$}ix
|
151
151
|
|
152
152
|
|
153
|
+
####
|
154
|
+
# add more round names in different languages
|
155
|
+
# via txt files
|
156
|
+
#
|
157
|
+
# for now must match case - maybe make caseinsensitive later - why? why not?
|
158
|
+
def self.read_names( path )
|
159
|
+
txt = read_text( path )
|
160
|
+
names = [] # array of lines (with words)
|
161
|
+
txt.each_line do |line|
|
162
|
+
line = line.strip
|
163
|
+
|
164
|
+
next if line.empty?
|
165
|
+
next if line.start_with?( '#' ) ## skip comments too
|
166
|
+
|
167
|
+
## strip inline (until end-of-line) comments too
|
168
|
+
## e.g. Janvier Janv Jan ## check janv in use??
|
169
|
+
## => Janvier Janv Jan
|
170
|
+
|
171
|
+
line = line.sub( /#.*/, '' ).strip
|
172
|
+
## pp line
|
173
|
+
|
174
|
+
names << line
|
175
|
+
end
|
176
|
+
names
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
def self.more_round_names
|
181
|
+
@more_round_name ||= begin
|
182
|
+
names = []
|
183
|
+
langs = ['en', 'de', 'es', 'pt', 'misc']
|
184
|
+
## sort names by length??
|
185
|
+
langs.each do |lang|
|
186
|
+
path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
|
187
|
+
names += read_names( path )
|
188
|
+
end
|
189
|
+
names
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
|
153
194
|
def is_round?( text )
|
154
|
-
ROUND_RE.match?( text )
|
195
|
+
ROUND_RE.match?( text ) ||
|
196
|
+
self.class.more_round_names.include?( text )
|
155
197
|
end
|
156
198
|
|
157
199
|
##
|
@@ -1,13 +1,13 @@
|
|
1
|
-
module SportDb
|
1
|
+
module SportDb
|
2
2
|
class Parser
|
3
|
-
|
3
|
+
|
4
4
|
|
5
5
|
## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
|
6
6
|
|
7
7
|
#####
|
8
8
|
# english helpers (penalty, extra time, ...)
|
9
9
|
## note - p must go last (shortest match)
|
10
|
-
# pso = penalty shootout
|
10
|
+
# pso = penalty shootout
|
11
11
|
P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
|
12
12
|
ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
|
13
13
|
|
@@ -26,7 +26,20 @@ class Parser
|
|
26
26
|
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
27
27
|
[ ]* #{ET_EN}
|
28
28
|
(?=[ \]]|$)
|
29
|
-
)}ix
|
29
|
+
)}ix
|
30
|
+
## todo/check: remove loakahead assertion here - why require space?
|
31
|
+
## note: \b works only after non-alphanum e.g. )
|
32
|
+
|
33
|
+
|
34
|
+
## note: allow SPECIAL with penalty only
|
35
|
+
## 3-4 pen.
|
36
|
+
SCORE__P__RE = %r{
|
37
|
+
(?<score>
|
38
|
+
\b
|
39
|
+
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
|
+
[ ]* #{P_EN}
|
41
|
+
(?=[ \]]|$)
|
42
|
+
)}ix
|
30
43
|
## todo/check: remove loakahead assertion here - why require space?
|
31
44
|
## note: \b works only after non-alphanum e.g. )
|
32
45
|
|
@@ -89,8 +102,8 @@ class Parser
|
|
89
102
|
|
90
103
|
## e.g. 2-1 (1-1) or
|
91
104
|
## 2-1
|
92
|
-
|
93
|
-
SCORE__FT_HT__RE = %r{
|
105
|
+
|
106
|
+
SCORE__FT_HT__RE = %r{
|
94
107
|
(?<score>
|
95
108
|
\b
|
96
109
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
@@ -104,18 +117,18 @@ class Parser
|
|
104
117
|
## note: \b works only after non-alphanum e.g. )
|
105
118
|
|
106
119
|
|
107
|
-
|
120
|
+
|
108
121
|
#############################################
|
109
|
-
# map tables
|
122
|
+
# map tables
|
110
123
|
# note: order matters; first come-first matched/served
|
111
124
|
|
112
|
-
SCORE_RE = Regexp.union(
|
125
|
+
SCORE_RE = Regexp.union(
|
113
126
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
114
127
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
115
128
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
116
|
-
|
129
|
+
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
|
117
131
|
)
|
118
132
|
|
119
133
|
end # class Parser
|
120
|
-
end # module SportDb
|
121
|
-
|
134
|
+
end # module SportDb
|
@@ -57,13 +57,18 @@ TEXT_RE = %r{
|
|
57
57
|
)?
|
58
58
|
(?:
|
59
59
|
\p{L} |
|
60
|
-
[&/']
|
60
|
+
[&/'°]
|
61
61
|
|
|
62
62
|
(?:
|
63
63
|
\d+
|
64
|
-
(?!
|
64
|
+
(?!
|
65
|
+
[0-9h'+-] | ## protected break on 12h / 12' / 1-1
|
66
|
+
## check usege for 3+4 - possible? where ? why?
|
67
|
+
(?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
|
68
|
+
)
|
65
69
|
## negative lookahead for numbers
|
66
70
|
## note - include digits itself!!!
|
71
|
+
## note - remove / (slash) e.g. allows UDI'19/Beter Bed
|
67
72
|
)|
|
68
73
|
\.
|
69
74
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -81,12 +81,22 @@ extra_rdoc_files:
|
|
81
81
|
- CHANGELOG.md
|
82
82
|
- Manifest.txt
|
83
83
|
- README.md
|
84
|
+
- config/rounds_de.txt
|
85
|
+
- config/rounds_en.txt
|
86
|
+
- config/rounds_es.txt
|
87
|
+
- config/rounds_misc.txt
|
88
|
+
- config/rounds_pt.txt
|
84
89
|
files:
|
85
90
|
- CHANGELOG.md
|
86
91
|
- Manifest.txt
|
87
92
|
- README.md
|
88
93
|
- Rakefile
|
89
94
|
- bin/fbtok
|
95
|
+
- config/rounds_de.txt
|
96
|
+
- config/rounds_en.txt
|
97
|
+
- config/rounds_es.txt
|
98
|
+
- config/rounds_misc.txt
|
99
|
+
- config/rounds_pt.txt
|
90
100
|
- lib/sportdb/parser.rb
|
91
101
|
- lib/sportdb/parser/fbtok/main.rb
|
92
102
|
- lib/sportdb/parser/lang.rb
|