sportdb-parser 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +5 -0
- data/config/rounds_de.txt +29 -0
- data/config/rounds_en.txt +12 -0
- data/config/rounds_es.txt +9 -0
- data/config/rounds_misc.txt +14 -0
- data/config/rounds_pt.txt +4 -0
- data/lib/sportdb/parser/lang.rb +43 -1
- data/lib/sportdb/parser/token-score.rb +25 -12
- data/lib/sportdb/parser/token-text.rb +7 -2
- data/lib/sportdb/parser/version.rb +1 -1
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6323a39fa903163b57c2416f732be300590217e6818ffbb782c30af06bf52052
|
4
|
+
data.tar.gz: 6491846e993a62bd815853acca1a4790ee2210aec5ab804c788332280f91d861
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70ea59920f73fef1255a4bcd07c3e58571b5ad5a21a35d3775183ca254dfd64d048106066d6cbb53b65692bdb523a08a7d0e2c210e2ef6ee68be1efd2287677b
|
7
|
+
data.tar.gz: a2d61dc09cc2cfdbe3669d7e4376594d63440a50b09c5adc1dfa4e4c3ce983374d5157db1c84a6b44425d4068b041041d7650071f50a1de2f652cc2ab022edc9
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
@@ -0,0 +1,29 @@
|
|
1
|
+
###############
|
2
|
+
# rounds in deutsch (de) / german
|
3
|
+
|
4
|
+
Vorrunde
|
5
|
+
Gruppenphase
|
6
|
+
|
7
|
+
|
8
|
+
Aufstieg
|
9
|
+
Endrunde
|
10
|
+
Aufstiegsrunde
|
11
|
+
Aufstiegsrunde Zone A
|
12
|
+
Entscheidung Zone B
|
13
|
+
1. Aufstieg
|
14
|
+
1. Aufstieg Zone A
|
15
|
+
1. Aufstieg Zone B
|
16
|
+
2. Aufstieg Zone A
|
17
|
+
2. Aufstieg Zone B
|
18
|
+
2. Aufstieg 1. Phase
|
19
|
+
2. Aufstieg 2. Phase
|
20
|
+
2. Aufstieg 3. Phase
|
21
|
+
Direkter Aufstieg
|
22
|
+
Direkter Abstieg
|
23
|
+
5. Platz
|
24
|
+
7. Platz
|
25
|
+
9. Platz
|
26
|
+
11. Platz
|
27
|
+
|
28
|
+
Zwischenrunde Gr. B ## move to group_de - why? why not?
|
29
|
+
|
data/lib/sportdb/parser/lang.rb
CHANGED
@@ -150,8 +150,50 @@ ROUND_RE = %r{^(
|
|
150
150
|
)$}ix
|
151
151
|
|
152
152
|
|
153
|
+
####
|
154
|
+
# add more round names in different languages
|
155
|
+
# via txt files
|
156
|
+
#
|
157
|
+
# for now must match case - maybe make caseinsensitive later - why? why not?
|
158
|
+
def self.read_names( path )
|
159
|
+
txt = read_text( path )
|
160
|
+
names = [] # array of lines (with words)
|
161
|
+
txt.each_line do |line|
|
162
|
+
line = line.strip
|
163
|
+
|
164
|
+
next if line.empty?
|
165
|
+
next if line.start_with?( '#' ) ## skip comments too
|
166
|
+
|
167
|
+
## strip inline (until end-of-line) comments too
|
168
|
+
## e.g. Janvier Janv Jan ## check janv in use??
|
169
|
+
## => Janvier Janv Jan
|
170
|
+
|
171
|
+
line = line.sub( /#.*/, '' ).strip
|
172
|
+
## pp line
|
173
|
+
|
174
|
+
names << line
|
175
|
+
end
|
176
|
+
names
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
def self.more_round_names
|
181
|
+
@more_round_name ||= begin
|
182
|
+
names = []
|
183
|
+
langs = ['en', 'de', 'es', 'pt', 'misc']
|
184
|
+
## sort names by length??
|
185
|
+
langs.each do |lang|
|
186
|
+
path = "#{SportDb::Module::Parser.root}/config/rounds_#{lang}.txt"
|
187
|
+
names += read_names( path )
|
188
|
+
end
|
189
|
+
names
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
|
153
194
|
def is_round?( text )
|
154
|
-
ROUND_RE.match?( text )
|
195
|
+
ROUND_RE.match?( text ) ||
|
196
|
+
self.class.more_round_names.include?( text )
|
155
197
|
end
|
156
198
|
|
157
199
|
##
|
@@ -1,13 +1,13 @@
|
|
1
|
-
module SportDb
|
1
|
+
module SportDb
|
2
2
|
class Parser
|
3
|
-
|
3
|
+
|
4
4
|
|
5
5
|
## todo/check: use ‹› (unicode chars) to mark optional parts in regex constant name - why? why not?
|
6
6
|
|
7
7
|
#####
|
8
8
|
# english helpers (penalty, extra time, ...)
|
9
9
|
## note - p must go last (shortest match)
|
10
|
-
# pso = penalty shootout
|
10
|
+
# pso = penalty shootout
|
11
11
|
P_EN = '(?: pso | pen\.? | p\.? )' # e.g. p., p, pen, pen., PSO, etc.
|
12
12
|
ET_EN = '(?: aet | a\.e\.t\.? )' # note: make last . optional (e.g a.e.t) allowed too
|
13
13
|
|
@@ -26,7 +26,20 @@ class Parser
|
|
26
26
|
(?<et1>\d{1,2}) - (?<et2>\d{1,2})
|
27
27
|
[ ]* #{ET_EN}
|
28
28
|
(?=[ \]]|$)
|
29
|
-
)}ix
|
29
|
+
)}ix
|
30
|
+
## todo/check: remove loakahead assertion here - why require space?
|
31
|
+
## note: \b works only after non-alphanum e.g. )
|
32
|
+
|
33
|
+
|
34
|
+
## note: allow SPECIAL with penalty only
|
35
|
+
## 3-4 pen.
|
36
|
+
SCORE__P__RE = %r{
|
37
|
+
(?<score>
|
38
|
+
\b
|
39
|
+
(?<p1>\d{1,2}) - (?<p2>\d{1,2})
|
40
|
+
[ ]* #{P_EN}
|
41
|
+
(?=[ \]]|$)
|
42
|
+
)}ix
|
30
43
|
## todo/check: remove loakahead assertion here - why require space?
|
31
44
|
## note: \b works only after non-alphanum e.g. )
|
32
45
|
|
@@ -89,8 +102,8 @@ class Parser
|
|
89
102
|
|
90
103
|
## e.g. 2-1 (1-1) or
|
91
104
|
## 2-1
|
92
|
-
|
93
|
-
SCORE__FT_HT__RE = %r{
|
105
|
+
|
106
|
+
SCORE__FT_HT__RE = %r{
|
94
107
|
(?<score>
|
95
108
|
\b
|
96
109
|
(?<ft1>\d{1,2}) - (?<ft2>\d{1,2})
|
@@ -104,18 +117,18 @@ class Parser
|
|
104
117
|
## note: \b works only after non-alphanum e.g. )
|
105
118
|
|
106
119
|
|
107
|
-
|
120
|
+
|
108
121
|
#############################################
|
109
|
-
# map tables
|
122
|
+
# map tables
|
110
123
|
# note: order matters; first come-first matched/served
|
111
124
|
|
112
|
-
SCORE_RE = Regexp.union(
|
125
|
+
SCORE_RE = Regexp.union(
|
113
126
|
SCORE__P_ET_FT_HT__RE, # e.g. 5-1 pen. 2-2 a.e.t. (1-1, 1-0)
|
114
127
|
SCORE__P_FT_HT__RE, # e.g. 5-1 pen. (1-1)
|
115
128
|
SCORE__P_ET__RE, # e.g. 2-2 a.e.t. or 5-1 pen. 2-2 a.e.t.
|
116
|
-
|
129
|
+
SCORE__P__RE, # e.g. 5-1 pen.
|
130
|
+
SCORE__FT_HT__RE, # e.g. 1-1 (1-0) or 1-1 -- note - must go last!!!
|
117
131
|
)
|
118
132
|
|
119
133
|
end # class Parser
|
120
|
-
end # module SportDb
|
121
|
-
|
134
|
+
end # module SportDb
|
@@ -57,13 +57,18 @@ TEXT_RE = %r{
|
|
57
57
|
)?
|
58
58
|
(?:
|
59
59
|
\p{L} |
|
60
|
-
[&/']
|
60
|
+
[&/'°]
|
61
61
|
|
|
62
62
|
(?:
|
63
63
|
\d+
|
64
|
-
(?!
|
64
|
+
(?!
|
65
|
+
[0-9h'+-] | ## protected break on 12h / 12' / 1-1
|
66
|
+
## check usege for 3+4 - possible? where ? why?
|
67
|
+
(?:[.:]\d) ## protected/exclude/break on 12.03 / 12:03
|
68
|
+
)
|
65
69
|
## negative lookahead for numbers
|
66
70
|
## note - include digits itself!!!
|
71
|
+
## note - remove / (slash) e.g. allows UDI'19/Beter Bed
|
67
72
|
)|
|
68
73
|
\.
|
69
74
|
)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-10-
|
11
|
+
date: 2024-10-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cocos
|
@@ -81,12 +81,22 @@ extra_rdoc_files:
|
|
81
81
|
- CHANGELOG.md
|
82
82
|
- Manifest.txt
|
83
83
|
- README.md
|
84
|
+
- config/rounds_de.txt
|
85
|
+
- config/rounds_en.txt
|
86
|
+
- config/rounds_es.txt
|
87
|
+
- config/rounds_misc.txt
|
88
|
+
- config/rounds_pt.txt
|
84
89
|
files:
|
85
90
|
- CHANGELOG.md
|
86
91
|
- Manifest.txt
|
87
92
|
- README.md
|
88
93
|
- Rakefile
|
89
94
|
- bin/fbtok
|
95
|
+
- config/rounds_de.txt
|
96
|
+
- config/rounds_en.txt
|
97
|
+
- config/rounds_es.txt
|
98
|
+
- config/rounds_misc.txt
|
99
|
+
- config/rounds_pt.txt
|
90
100
|
- lib/sportdb/parser.rb
|
91
101
|
- lib/sportdb/parser/fbtok/main.rb
|
92
102
|
- lib/sportdb/parser/lang.rb
|