rsssf 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +4 -0
- data/Manifest.txt +41 -7
- data/README.md +93 -71
- data/Rakefile +8 -7
- data/config/groups_en.txt +44 -0
- data/config/rounds_en.txt +283 -0
- data/config/rounds_es.txt +20 -0
- data/config/rounds_misc.txt +7 -0
- data/lib/_cocos_.rb +158 -0
- data/lib/rsssf/convert/convert.rb +71 -0
- data/lib/rsssf/convert/errata.rb +103 -0
- data/lib/rsssf/convert/html_entities.rb +150 -0
- data/lib/rsssf/convert/html_to_txt/beautify_anchors.rb +96 -0
- data/lib/rsssf/convert/html_to_txt/make_heading.rb +70 -0
- data/lib/rsssf/convert/html_to_txt/remove_emails.rb +43 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_href.rb +85 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_name.rb +87 -0
- data/lib/rsssf/convert/html_to_txt/replace_heading.rb +76 -0
- data/lib/rsssf/convert/html_to_txt/replace_hr.rb +25 -0
- data/lib/rsssf/convert/html_to_txt.rb +247 -0
- data/lib/rsssf/download.rb +20 -0
- data/lib/rsssf/fmtfix/dates.rb +541 -0
- data/lib/rsssf/fmtfix/dates_helpers.rb +63 -0
- data/lib/rsssf/fmtfix/errata.rb +44 -0
- data/lib/rsssf/fmtfix/fmtfix-base.rb +68 -0
- data/lib/rsssf/fmtfix/fmtfix.rb +101 -0
- data/lib/rsssf/fmtfix/goals.rb +173 -0
- data/lib/rsssf/fmtfix/headers.rb +326 -0
- data/lib/rsssf/fmtfix/outline.rb +228 -0
- data/lib/rsssf/fmtfix/patch_headings.rb +141 -0
- data/lib/rsssf/fmtfix/rounds.rb +74 -0
- data/lib/rsssf/fmtfix/score.rb +92 -0
- data/lib/rsssf/fmtfix/tables.rb +316 -0
- data/lib/rsssf/fmtfix/topscorers.rb +50 -0
- data/lib/rsssf/page-find_schedule.rb +127 -0
- data/lib/rsssf/page-meta.rb +68 -0
- data/lib/rsssf/page.rb +125 -238
- data/lib/rsssf/parse_schedules.rb +34 -0
- data/lib/rsssf/prepare/convert-links.rb +77 -0
- data/lib/rsssf/prepare/convert-meta.rb +111 -0
- data/lib/rsssf/prepare/convert-navlines.rb +154 -0
- data/lib/rsssf/prepare/convert-postproc.rb +141 -0
- data/lib/rsssf/prepare/convert.rb +100 -0
- data/lib/rsssf/prepare/download.rb +40 -0
- data/lib/rsssf/project.rb +154 -0
- data/lib/rsssf/reports/page.rb +66 -23
- data/lib/rsssf/reports/schedule.rb +89 -40
- data/lib/rsssf/schedule.rb +4 -14
- data/lib/rsssf/utils.rb +37 -45
- data/lib/rsssf/version.rb +7 -6
- data/lib/rsssf.rb +82 -19
- metadata +68 -26
- data/.gemtest +0 -0
- data/lib/rsssf/fetch.rb +0 -80
- data/lib/rsssf/html2txt.rb +0 -157
- data/lib/rsssf/patch.rb +0 -28
- data/lib/rsssf/repo.rb +0 -220
- data/test/helper.rb +0 -12
- data/test/test_utils.rb +0 -83
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
##
|
|
6
|
+
## note - ascii hr replacement is
|
|
7
|
+
## =-=-= (do NOT match) !!!!!!
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
HX_RE = %r{ ## negative lookahead
|
|
11
|
+
## do NOT match =-=
|
|
12
|
+
## do NOT match =========== (without any heading text!!)
|
|
13
|
+
## e.g.
|
|
14
|
+
## Fall season
|
|
15
|
+
## ===========
|
|
16
|
+
|
|
17
|
+
(?! ^[ ]* (?: =-=
|
|
18
|
+
| ={1,} [ ]* $
|
|
19
|
+
)
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
^
|
|
23
|
+
[ ]*
|
|
24
|
+
|
|
25
|
+
(?<marker> ={1,6})
|
|
26
|
+
[ ]*
|
|
27
|
+
(?<text> .+?)
|
|
28
|
+
#{OPT_REF}
|
|
29
|
+
[ ]*
|
|
30
|
+
$}x
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def autofix_outline( txt, title: )
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
hx = txt.scan( HX_RE )
|
|
40
|
+
|
|
41
|
+
### note - shortcircuit if no headings found!!!
|
|
42
|
+
return txt if hx.size == 0
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
## update counts/usage of h1,h2,h3,h4,h5,h6
|
|
48
|
+
counts = [nil,0,0,0,0,0,0]
|
|
49
|
+
hx.each do |marker,_|
|
|
50
|
+
level = marker.size;
|
|
51
|
+
counts[ level ] += 1
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
## flatten level; only record levels with heading counts
|
|
55
|
+
levels = []
|
|
56
|
+
counts.each_with_index do |count,level|
|
|
57
|
+
levels << level if count && count > 0
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
#####
|
|
63
|
+
### special case for first heading
|
|
64
|
+
## check if heading is matching title AND the only one in top level
|
|
65
|
+
htop_marker, htop_header = hx[0]
|
|
66
|
+
htop_level = htop_marker.size
|
|
67
|
+
|
|
68
|
+
## top heading MUST always be lowest (top)
|
|
69
|
+
if htop_level == levels[0]
|
|
70
|
+
if counts[htop_level] == 1
|
|
71
|
+
## check if same as title
|
|
72
|
+
## if yes pop (that is, remove too)
|
|
73
|
+
if htop_header == title
|
|
74
|
+
counts[htop_level] = 0 ## update/reset counter
|
|
75
|
+
levels.shift ## remove first level (inline op)!!!
|
|
76
|
+
|
|
77
|
+
### note - space in header must be replaces with [ ]!!!!
|
|
78
|
+
## or \\ with Regex.escape!!!
|
|
79
|
+
### note - MUST escape string for regex e.g. [Bra..] or 1.
|
|
80
|
+
###
|
|
81
|
+
### V COPA BRASIL - 1979 [Brazilian Championship]
|
|
82
|
+
## check if space works with escape??
|
|
83
|
+
|
|
84
|
+
htop_re = %r{
|
|
85
|
+
^
|
|
86
|
+
[ ]* #{htop_marker}
|
|
87
|
+
[ ]* #{Regexp.escape(htop_header)}
|
|
88
|
+
.*?
|
|
89
|
+
$ ## or use \n - why? why not?
|
|
90
|
+
}x
|
|
91
|
+
## remove line in txt too
|
|
92
|
+
txt = txt.sub( htop_re ) do |match|
|
|
93
|
+
puts " removing top heading matching title -- >#{match}<"
|
|
94
|
+
''
|
|
95
|
+
end
|
|
96
|
+
else
|
|
97
|
+
## warn/log - heading top NOT matching page title
|
|
98
|
+
msg = "first top heading NOT matching page title #{htop_header} <=> #{title}"
|
|
99
|
+
puts "!! WARN #{msg}"
|
|
100
|
+
log( msg )
|
|
101
|
+
end
|
|
102
|
+
else
|
|
103
|
+
## warn/log - more than one top level heading!!!
|
|
104
|
+
msg = "more than one (#{counts[htop_level]}) top heading #{htop_level} found " +
|
|
105
|
+
"in page with title #{title}"
|
|
106
|
+
## maybe add headers in the future - why? why not?
|
|
107
|
+
log( msg )
|
|
108
|
+
end
|
|
109
|
+
else
|
|
110
|
+
## warn/log - top heading NOT top!!
|
|
111
|
+
msg = "top heading #{htop_level} not top (#{levels[0]}) " +
|
|
112
|
+
"in page with title #{title}"
|
|
113
|
+
log( msg )
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
mapping = {}
|
|
118
|
+
levels.each_with_index do |level,i|
|
|
119
|
+
from = level
|
|
120
|
+
to = i+1
|
|
121
|
+
mapping[from] = to
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# rewrite headings
|
|
126
|
+
txt = txt.gsub( HX_RE ) do
|
|
127
|
+
m = Regexp.last_match
|
|
128
|
+
old_marker = m[:marker]
|
|
129
|
+
old_level = m[:marker].size
|
|
130
|
+
|
|
131
|
+
new_level = mapping[old_level]
|
|
132
|
+
|
|
133
|
+
if new_level.nil?
|
|
134
|
+
puts "!! no heading #{old_level} mapping found in page >#{title}<:"
|
|
135
|
+
puts "match:"
|
|
136
|
+
pp m
|
|
137
|
+
puts "counts:"
|
|
138
|
+
pp counts
|
|
139
|
+
puts "levels:"
|
|
140
|
+
pp levels
|
|
141
|
+
puts "mapping:"
|
|
142
|
+
pp mapping
|
|
143
|
+
exit 1
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
new_marker = '=' * new_level
|
|
147
|
+
|
|
148
|
+
## remove level diff from marker
|
|
149
|
+
##
|
|
150
|
+
## maybe in the future use track trailing marker too
|
|
151
|
+
## and rebuild heading/header instead of gsub
|
|
152
|
+
|
|
153
|
+
## note - always start at level 2 (page title like in wikipedia is level 1)
|
|
154
|
+
## thus, new_level+1
|
|
155
|
+
|
|
156
|
+
if (old_level - new_level+1) > 0
|
|
157
|
+
## note - will remove diff from leading (and possibly trailing) marker too
|
|
158
|
+
m[0].gsub( old_marker, new_marker+'=' )
|
|
159
|
+
else
|
|
160
|
+
m[0]
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
txt
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _scan_outline( txt ) txt.scan( HX_RE ); end
|
|
172
|
+
|
|
173
|
+
def build_outline( txt )
|
|
174
|
+
|
|
175
|
+
hx = txt.scan( HX_RE )
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
counts = [nil,0,0,0,0,0,0] ## note - index 0 is nil
|
|
179
|
+
## index 1 (h1) is 0 etc.
|
|
180
|
+
|
|
181
|
+
hx.each { |marker,_| counts[ marker.size ] +=1 }
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
buf = String.new
|
|
185
|
+
buf += " outline:"
|
|
186
|
+
buf += " " +
|
|
187
|
+
"#{counts[1]==0 ? '-' : 'h1'}/" +
|
|
188
|
+
"#{counts[2]==0 ? '-' : 'h2'}/" +
|
|
189
|
+
"#{counts[3]==0 ? '-' : 'h3'}/" +
|
|
190
|
+
"#{counts[4]==0 ? '-' : 'h4'}/" +
|
|
191
|
+
"#{counts[5]==0 ? '-' : 'h5'}/" +
|
|
192
|
+
"#{counts[6]==0 ? '-' : 'h6'}" +
|
|
193
|
+
"\n"
|
|
194
|
+
|
|
195
|
+
buf += " " +
|
|
196
|
+
"#{counts[1]==0 ? '-' : counts[1]}/" +
|
|
197
|
+
"#{counts[2]==0 ? '-' : counts[2]}/" +
|
|
198
|
+
"#{counts[3]==0 ? '-' : counts[3]}/" +
|
|
199
|
+
"#{counts[4]==0 ? '-' : counts[4]}/" +
|
|
200
|
+
"#{counts[5]==0 ? '-' : counts[5]}/" +
|
|
201
|
+
"#{counts[6]==0 ? '-' : counts[6]}" +
|
|
202
|
+
"\n"
|
|
203
|
+
|
|
204
|
+
hx.each do |marker,text|
|
|
205
|
+
buf << " (%d) %-6s" % [marker.size, marker]
|
|
206
|
+
buf << " "
|
|
207
|
+
buf << text
|
|
208
|
+
buf << "\n"
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
## count anchors (aka a name)
|
|
213
|
+
## e.g
|
|
214
|
+
aname = txt.scan( /‹§ [^›]+ ›/x )
|
|
215
|
+
|
|
216
|
+
if aname.size > 0
|
|
217
|
+
buf << "\n"
|
|
218
|
+
buf << " aname #{aname.size}: "
|
|
219
|
+
buf << aname.join( ',' )
|
|
220
|
+
buf << "\n"
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
buf
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
end ## class Fmtfix
|
|
228
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
## e.g. 2008/09
|
|
5
|
+
## note: also support 1999/2000
|
|
6
|
+
## note: use single quotes - quotes do NOT get escaped (e.g. '\d' => "\\d")
|
|
7
|
+
##
|
|
8
|
+
## SEASON = \d{4}/(?:\d{2}|\d{4})
|
|
9
|
+
## built-in for now
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def self.mkheading_regex( str )
|
|
13
|
+
|
|
14
|
+
str = str.strip
|
|
15
|
+
##
|
|
16
|
+
## change all spaces (other than [ ] and $$ ) to interpunkt
|
|
17
|
+
str = str.gsub( %r{
|
|
18
|
+
(?<charclass> [ ]* \[ [^\[\]]+ \] [*?+]? [ ]*)
|
|
19
|
+
| (?<newline> [ ]* \$\$ [ ]*)
|
|
20
|
+
| (?<spaces> [ ]+)
|
|
21
|
+
}x
|
|
22
|
+
) do
|
|
23
|
+
m = Regexp.last_match
|
|
24
|
+
if m[:spaces]
|
|
25
|
+
' [ ] ' ## change space to [ ]
|
|
26
|
+
elsif m[:newline]
|
|
27
|
+
' \s+ ' ## $$ => \s+ -- note - make sure \s incl. newline!!
|
|
28
|
+
else
|
|
29
|
+
m[0] ## keep as is
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
## escpape . to \.
|
|
34
|
+
## change ~ to [ ]? -- that is, optional space
|
|
35
|
+
## change ( ) to \( \)
|
|
36
|
+
str = str.gsub( '~', ' [ ]? ' )
|
|
37
|
+
str = str.gsub( '.', '\.' )
|
|
38
|
+
str = str.gsub( '(', '\(' )
|
|
39
|
+
str = str.gsub( ')', '\)' )
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
### last step change builtins
|
|
43
|
+
## '$SEASON$' => '\d{4}/(?:\d{2}|\d{4})',
|
|
44
|
+
str = str.gsub( '$SEASON$', '\d{4}/(?:\d{2}|\d{4})' )
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def self.parse_heading_patches( txt )
|
|
51
|
+
patches = {}
|
|
52
|
+
|
|
53
|
+
header = nil
|
|
54
|
+
|
|
55
|
+
txt.each_line do |line|
|
|
56
|
+
|
|
57
|
+
line = line.strip
|
|
58
|
+
next if line.empty? || line.start_with?('#')
|
|
59
|
+
break if line == '__END__'
|
|
60
|
+
|
|
61
|
+
## check if heading
|
|
62
|
+
if m=%r{ ^
|
|
63
|
+
[ ]* =+ [ ]*
|
|
64
|
+
(?<text> .+?)
|
|
65
|
+
(?: [ ]* =+ )?
|
|
66
|
+
[ ]*
|
|
67
|
+
$
|
|
68
|
+
}x.match(line)
|
|
69
|
+
|
|
70
|
+
header = patches[m[:text]] = []
|
|
71
|
+
else
|
|
72
|
+
re = mkheading_regex( line )
|
|
73
|
+
## note - wrap in %r{^$}ix
|
|
74
|
+
header << %r{^ #{re} $}ix
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
patches
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def self.read_heading_patches( path ) parse_heading_patches( read_text( path)); end
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _patch_heading( txt, rxs, title )
|
|
90
|
+
found_match = false
|
|
91
|
+
rxs.each do |rx|
|
|
92
|
+
txt = txt.sub( rx ) do |match|
|
|
93
|
+
puts " found heading match >#{match}< replace with >== #{title}<"
|
|
94
|
+
|
|
95
|
+
if title == '*' ## use orginal title/ do NOT replace/normalize
|
|
96
|
+
## note - keep going with replacements here
|
|
97
|
+
##
|
|
98
|
+
## note - autoremove (optional) trailing colon (:)
|
|
99
|
+
## or dot (.)
|
|
100
|
+
## use .chomp(':') instead - why? why not?
|
|
101
|
+
match = match.sub( /[.:]$/, '').rstrip
|
|
102
|
+
"== #{match}\n"
|
|
103
|
+
else
|
|
104
|
+
## note - only short-circuit match if NOT generic replace
|
|
105
|
+
found_match = true
|
|
106
|
+
"== #{title}\n"
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
## note - break on first match
|
|
110
|
+
break if found_match
|
|
111
|
+
end
|
|
112
|
+
txt
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def patch_headings( txt, patches )
|
|
117
|
+
|
|
118
|
+
patches.each do |title, rxs|
|
|
119
|
+
txt = _patch_heading( txt, rxs, title )
|
|
120
|
+
end
|
|
121
|
+
txt
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
end ## class Fmtfix
|
|
126
|
+
end ## module Rsssf
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
__END__
|
|
133
|
+
|
|
134
|
+
patches = read_headings( "./make/de_headings.txt" )
|
|
135
|
+
pp patches
|
|
136
|
+
|
|
137
|
+
txt = read_text( "../clubs/germany/pages/duit09.txt" )
|
|
138
|
+
patch_headings( txt, patches )
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
puts "bye"
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
##
|
|
6
|
+
## note all "type" for round
|
|
7
|
+
## eg. round 26 - todo/fix - later use squish to autofix!!
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## e.g. round 1, round 2, etc.
|
|
11
|
+
## matchday 1
|
|
12
|
+
## week 1
|
|
13
|
+
# note - add optional Matchday 1 of 2 or such
|
|
14
|
+
## keep why? why not?
|
|
15
|
+
#
|
|
16
|
+
# matchweek used by premerleague.com
|
|
17
|
+
# week used in msl/usa (no matchdays/rounds)
|
|
18
|
+
# note - matchweek might start on tuesday (e.g. tue to mon)
|
|
19
|
+
# or check if always 7day week?
|
|
20
|
+
#
|
|
21
|
+
# note - use 1-9 regex (cannot start with 0) - why? why not?
|
|
22
|
+
# make week 01 or round 01 or matchday 01 possible?
|
|
23
|
+
|
|
24
|
+
ROUND_PAT_BASE = %q{
|
|
25
|
+
( Round
|
|
26
|
+
| Matchday
|
|
27
|
+
| Matchweek
|
|
28
|
+
| Week ) [ ]{1,2} [1-9][0-9]*
|
|
29
|
+
|
|
30
|
+
(?: ## note - add optional Matchday 1 of 2 or such
|
|
31
|
+
[ ] of [ ] [1-9][0-9]*
|
|
32
|
+
)?
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
##
|
|
38
|
+
## add more pattern via config
|
|
39
|
+
##
|
|
40
|
+
## todo/fix - check if .txt is empty
|
|
41
|
+
## do NOT add ( || will match everything!!)
|
|
42
|
+
##
|
|
43
|
+
## rename names_misc to names_more - why? why not?
|
|
44
|
+
|
|
45
|
+
ROUND_NAMES_EN = read_patterns( "#{Rsssf.config_dir}/rounds_en.txt" )
|
|
46
|
+
ROUND_NAMES_ES = read_patterns( "#{Rsssf.config_dir}/rounds_es.txt" )
|
|
47
|
+
ROUND_NAMES_MISC = read_patterns( "#{Rsssf.config_dir}/rounds_misc.txt" )
|
|
48
|
+
|
|
49
|
+
ROUND_PAT = ROUND_PAT_BASE + ' | ' + ROUND_NAMES_EN.join( ' | ' ) +
|
|
50
|
+
' | ' + ROUND_NAMES_ES.join( ' | ' ) +
|
|
51
|
+
' | ' + ROUND_NAMES_MISC.join( ' | ' )
|
|
52
|
+
## pp ROUND_PAT
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
end ## class Fmtfix
|
|
56
|
+
end ## module Rsssf
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
__END__
|
|
60
|
+
|
|
61
|
+
"\n" +
|
|
62
|
+
" ( Round\n" +
|
|
63
|
+
" | Matchday\n" +
|
|
64
|
+
" | Matchweek\n" +
|
|
65
|
+
" | Week ) [ ]{1,2} [1-9][0-9]*\n" +
|
|
66
|
+
"\n" +
|
|
67
|
+
" (?: ## note - add optional Matchday 1 of 2 or such\n" +
|
|
68
|
+
" [ ] of [ ] [1-9][0-9]*\n" +
|
|
69
|
+
" )?\n" +
|
|
70
|
+
" | Preliminary [ ] round | (?: First | Second | Third | Fourth | Fifth
|
|
71
|
+
| 1st | 2nd | 3rd | 4th | 5th ) [ ] (?: round (?: [ ] replays? )? | phase )
|
|
72
|
+
| Round [ ] one | (?: 1/32 | 1/16 | 16th | 1/8 | 8th | 1/4 | 1/2 ) [ ] finals? (?: [ ] replays? )?
|
|
73
|
+
| (?: Eight | Quarter | Semi ) [ -]? finals? (?: [ ] replays? )?
|
|
74
|
+
| Semis | Quarters | (?: Round [ ] of | Last ) [ ] (?: 4 | 8 | 16 | 32) | (?: fifth | 5th ) [ -] place [ ] (?: match | final | play[ -]?off ) | Match [ ] for [ ] (?: fifth | 5th ) [ -] place | (?: third | 3rd ) [ -] place [ ] (?: match | final | play[ -]?off ) | Match [ ] for [ ] (?: third | 3rd ) [ -] place | Finals? (?: [ ] replays? )? | Final [ ] (?: round | group | pool) | (?: Minor | Major) [ ] Semi [ -]? finals? | Preliminary [ ] final | Final [ ] series | Grand [ ] final | Conference [ ] wild [ ] card [ ] round | Wild [ ] card [ ] games | (?: Eastern | Western) [ ] conference [ ] quarter [ -]? finals | (?: Eastern | Western) [ ] semi [ -]? finals | (?: Eastern | Western) [ ] conference [ ] final | Conference [ ] quarterfinals | Conference [ ] semifinals | Conference [ ] finals | Divisional [ ] finals | Championship [ ] final | MLS [ ] cup (?: [ ] final)? (?: [ ] \\d{4})? | Qualification [ ] MLS | Super [ ] semi [ -]? final | Super [ ]? cup [ ] final | Deciding [ ] match | Decider | play-?offs? [ ] round | Prime [ ] round | Round [ ] (?: 1|2|3) [ ] \\( no [ ] extra [ ] time \\) | Quarter [ -]? finals? [ ] \\( no [ ] extra [ ] time \\) | Moved [ ] match(?: es)? | match(?: es)? [ ] from [ ] round [ ] \\d{1,2} | Replays? | Replayed [ ] match(?: es)? | (?: First | Second | Third | Fourth | Fifth | 1st | 2nd | 3rd | 4th | 5th ) [ ] stage | (?: Regular | Group | League | Playoff ) [ ] stage | Knock-?out [ ] stage | Regular [ ] season | Tournament [ ] proper | Championship [ ] play-?offs? | Europa [ ] league [ ] play-?offs? | Conference [ ] league [ ] play-?offs? | Promotion [ ] (?: play-?offs? | match(?: es)?) | Relegation [ ] (?: play-?offs? | match(?: es)?) | Promotion/relegation [ ] (?: play-?offs? | match(?: es)?) | Promotion/relegation [ ] (?: play-?offs? | match(?: es)?) [ ] (?: 1st/2nd | 2nd/3rd | 3rd/4th) [ ] level | Relegation [ ] playout | Play-?out | Play-in [ ] round [ ] (?: A | B | A-B ) | play-?offs? | play-?offs? [ ] (?: 1 | 2)? | (?: First | Second | Third | Fourth | Fifth | 1st | 2nd | 3rd | 4th | 5th ) [ ] legs? (?: [ ] replays? )? | Third [ ] leg [ ] minigame | Playoffs [ ] \\( Liguilla \\) | Recalificación | Round [ ] of [ ] 64 [ ] - [ ] 32 [ ] avos [ ] de [ ] final | Round [ ] of [ ] 32 [ ] - [ ] 16 [ ] avos [ ] de [ ] final | Round [ ] of [ ] 16 [ ] - [ ] Octavos [ ] de [ ] Final | Quarter [ ] finals [ ] - [ ] Cuartos [ ] de [ ] final | Semi [ ] finals [ ] - [ ] Semifinales | Primera [ ] fase [ ] de [ ] zonas [ ] - [ ] Phase [ ] of [ ] groups | Play-off [ ] o [ ] umístění | Skupina [ ] o [ ] záchranu"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def handle_score( txt )
|
|
6
|
+
|
|
7
|
+
## fix typos - move to errata
|
|
8
|
+
txt = txt.gsub( 'paet, 3-4 pen]', '[aet, 3-4 pen]' )
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### [aet] => (aet) -- after extra time
|
|
14
|
+
## [asdet] => (asdet) -- after sudden death extra time
|
|
15
|
+
txt = txt.gsub( '[aet]', '(aet)' )
|
|
16
|
+
txt = txt.gsub( '[asdet]', '(asdet)' )
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
## [aet, 2-3 pen] => (aet, 2-3 pen)
|
|
22
|
+
## [aet, 9-10 pen]
|
|
23
|
+
## [aet, 2-3pen]
|
|
24
|
+
## [aet, 7-6pen]
|
|
25
|
+
|
|
26
|
+
txt = txt.gsub( %r{
|
|
27
|
+
\[
|
|
28
|
+
aet[,;.] [ ]?
|
|
29
|
+
(\d{1,2}-\d{1,2}) [ ]? pen
|
|
30
|
+
\]
|
|
31
|
+
}ix,
|
|
32
|
+
'(aet, \1 pen)')
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
### [aet, pen 4-3]
|
|
36
|
+
## [aet, pen 2-4]
|
|
37
|
+
### =>
|
|
38
|
+
## (aet, 2-4 pen)
|
|
39
|
+
txt = txt.gsub( %r{
|
|
40
|
+
\[
|
|
41
|
+
aet[,;.] [ ]?
|
|
42
|
+
pen [ ] (\d{1,2}-\d{1,2})
|
|
43
|
+
\]
|
|
44
|
+
}ix,
|
|
45
|
+
'(aet, \1 pen)')
|
|
46
|
+
|
|
47
|
+
## [5-4 pen]
|
|
48
|
+
## [3-4 pen]
|
|
49
|
+
## [1-3 pen], [1-3pen]
|
|
50
|
+
txt = txt.gsub( %r{
|
|
51
|
+
\[
|
|
52
|
+
(\d{1,2}-\d{1,2}) [ ]? pen
|
|
53
|
+
\]
|
|
54
|
+
}ix,
|
|
55
|
+
'(\1 pen)')
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
## [Pen 4-1], [Pen 4-5], [Pen 1-3]
|
|
59
|
+
## =>
|
|
60
|
+
## (4-1 pen)
|
|
61
|
+
txt = txt.gsub( %r{
|
|
62
|
+
\[
|
|
63
|
+
pen [ ] (\d{1,2}-\d{1,2})
|
|
64
|
+
\]
|
|
65
|
+
}ix,
|
|
66
|
+
'(\1 pen)')
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
## [5-3 PK], [6-5 PK]
|
|
70
|
+
## =>
|
|
71
|
+
## (6-5 pen)
|
|
72
|
+
txt = txt.gsub( %r{
|
|
73
|
+
\[
|
|
74
|
+
(\d{1,2}-\d{1,2}) [ ] PK
|
|
75
|
+
\]
|
|
76
|
+
}ix,
|
|
77
|
+
'(\1 pen)' )
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
##
|
|
82
|
+
## check special case usage - uniques?
|
|
83
|
+
## [8-7 pen(no extra time)]
|
|
84
|
+
## [Pen 2-4 (1-3?)]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
txt
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
end ## class Fmtfix
|
|
92
|
+
end ## module Rsssf
|