rsssf 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +39 -2
- data/README.md +67 -62
- data/Rakefile +2 -2
- data/config/groups_en.txt +44 -0
- data/config/rounds_en.txt +283 -0
- data/config/rounds_es.txt +20 -0
- data/config/rounds_misc.txt +7 -0
- data/lib/_cocos_.rb +158 -0
- data/lib/rsssf/convert/convert.rb +71 -0
- data/lib/rsssf/convert/errata.rb +103 -0
- data/lib/rsssf/convert/html_entities.rb +150 -0
- data/lib/rsssf/convert/html_to_txt/beautify_anchors.rb +96 -0
- data/lib/rsssf/convert/html_to_txt/make_heading.rb +70 -0
- data/lib/rsssf/convert/html_to_txt/remove_emails.rb +43 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_href.rb +85 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_name.rb +87 -0
- data/lib/rsssf/convert/html_to_txt/replace_heading.rb +76 -0
- data/lib/rsssf/convert/html_to_txt/replace_hr.rb +25 -0
- data/lib/rsssf/convert/html_to_txt.rb +247 -0
- data/lib/rsssf/download.rb +4 -135
- data/lib/rsssf/fmtfix/dates.rb +541 -0
- data/lib/rsssf/fmtfix/dates_helpers.rb +63 -0
- data/lib/rsssf/fmtfix/errata.rb +44 -0
- data/lib/rsssf/fmtfix/fmtfix-base.rb +68 -0
- data/lib/rsssf/fmtfix/fmtfix.rb +101 -0
- data/lib/rsssf/fmtfix/goals.rb +173 -0
- data/lib/rsssf/fmtfix/headers.rb +326 -0
- data/lib/rsssf/fmtfix/outline.rb +228 -0
- data/lib/rsssf/fmtfix/patch_headings.rb +141 -0
- data/lib/rsssf/fmtfix/rounds.rb +74 -0
- data/lib/rsssf/fmtfix/score.rb +92 -0
- data/lib/rsssf/fmtfix/tables.rb +316 -0
- data/lib/rsssf/fmtfix/topscorers.rb +50 -0
- data/lib/rsssf/page-find_schedule.rb +127 -0
- data/lib/rsssf/page-meta.rb +68 -0
- data/lib/rsssf/page.rb +89 -227
- data/lib/rsssf/parse_schedules.rb +34 -0
- data/lib/rsssf/prepare/convert-links.rb +77 -0
- data/lib/rsssf/prepare/convert-meta.rb +111 -0
- data/lib/rsssf/prepare/convert-navlines.rb +154 -0
- data/lib/rsssf/prepare/convert-postproc.rb +141 -0
- data/lib/rsssf/prepare/convert.rb +100 -0
- data/lib/rsssf/prepare/download.rb +40 -0
- data/lib/rsssf/project.rb +154 -0
- data/lib/rsssf/reports/page.rb +40 -8
- data/lib/rsssf/reports/schedule.rb +18 -55
- data/lib/rsssf/utils.rb +28 -17
- data/lib/rsssf/version.rb +5 -2
- data/lib/rsssf.rb +53 -13
- metadata +50 -9
- data/lib/rsssf/convert.rb +0 -495
- data/lib/rsssf/repo.rb +0 -144
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
|
|
2
|
+
module Rsssf
|
|
3
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## convenience helper
|
|
8
|
+
def self.fmtfix_pages( pages, outdir:, path:, heading_patches: nil )
|
|
9
|
+
@@fmtfix ||= new ## use a "shared" built-in fmtfix
|
|
10
|
+
@@fmtfix.fmtfix_pages( pages, outdir: outdir,
|
|
11
|
+
path: path,
|
|
12
|
+
heading_patches: heading_patches )
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def fmtfix_pages( pages, outdir:,
|
|
16
|
+
path:, ## (lookup search) path (array expected!!!)
|
|
17
|
+
heading_patches: nil )
|
|
18
|
+
|
|
19
|
+
pages.each_with_index do |config,i|
|
|
20
|
+
|
|
21
|
+
puts "==> #{i+1}/#{pages.size} #{config.pretty_inspect}..."
|
|
22
|
+
|
|
23
|
+
page = config['page']
|
|
24
|
+
dirname = File.dirname( page )
|
|
25
|
+
basename = File.basename( page, File.extname( page ) )
|
|
26
|
+
extname = File.extname( page )
|
|
27
|
+
|
|
28
|
+
inname = "#{dirname}/#{basename}.txt"
|
|
29
|
+
filename = find_file!( inname, path: path )
|
|
30
|
+
|
|
31
|
+
txt = read_text( filename )
|
|
32
|
+
newtxt = fmtfix( txt, heading_patches: heading_patches )
|
|
33
|
+
|
|
34
|
+
outfile = File.join( outdir, "#{basename}.txt" )
|
|
35
|
+
write_text( outfile, newtxt )
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
## convenience helper
|
|
42
|
+
def self.fmtfix( txt, heading_patches: nil )
|
|
43
|
+
@@fmtfix ||= new ## use a "shared" built-in fmtfix
|
|
44
|
+
@@fmtfix.fmtfix( txt, heading_patches: heading_patches )
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def fmtfix( txt, heading_patches: nil )
|
|
50
|
+
|
|
51
|
+
### note - step 1
|
|
52
|
+
## autofix-outline
|
|
53
|
+
## and patch headings/outline if empty
|
|
54
|
+
## with at_headings.txt, de_headings.txt etc.
|
|
55
|
+
|
|
56
|
+
## get title
|
|
57
|
+
meta = Page.parse_meta( txt )
|
|
58
|
+
title = meta[:title] || 'n/a'
|
|
59
|
+
|
|
60
|
+
newtxt = autofix_outline( txt, title: title )
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if heading_patches
|
|
64
|
+
##
|
|
65
|
+
## check if any headings / outline
|
|
66
|
+
headings = _scan_outline( newtxt )
|
|
67
|
+
if headings.size == 0
|
|
68
|
+
newtxt = patch_headings( newtxt, heading_patches )
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
newtxt = autofix( newtxt )
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
=begin
|
|
78
|
+
##
|
|
79
|
+
## add (quick) outline
|
|
80
|
+
outline = build_outline( newtxt )
|
|
81
|
+
|
|
82
|
+
## add inside <!-- source: ... [auto-add here] -->
|
|
83
|
+
## e.g.
|
|
84
|
+
## <!--
|
|
85
|
+
## source: https://rsssf.org/tableso/oost98.html
|
|
86
|
+
## -->
|
|
87
|
+
|
|
88
|
+
newtxt = newtxt.sub( %r{^[ ]*<!--
|
|
89
|
+
[ \n]*
|
|
90
|
+
(source: .+?)
|
|
91
|
+
[ \n]*
|
|
92
|
+
-->
|
|
93
|
+
}ix,
|
|
94
|
+
"<!--\n \\1\n\n#{outline} -->" )
|
|
95
|
+
=end
|
|
96
|
+
newtxt
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
end ## class Fmtfix
|
|
101
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
###############
|
|
7
|
+
## todo - fix/fix/fix/fix
|
|
8
|
+
|
|
9
|
+
=begin
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
(Match winners plus best 3 overall qualify for Quarterfinals)
|
|
13
|
+
(Os vencedores dos jogos e os outros 3 melhores passam às Quartas-de-Final)
|
|
14
|
+
|
|
15
|
+
-- rework goal line!!!!
|
|
16
|
+
only if following score line must include 4-4 or such !!!
|
|
17
|
+
=end
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
##
|
|
21
|
+
## note - exclude colon (:) too!!
|
|
22
|
+
## will note match props such as [red card: ...]
|
|
23
|
+
## [ref: ...]
|
|
24
|
+
## and others
|
|
25
|
+
#
|
|
26
|
+
## note - [^] by default always excludes newline (\n)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
GOALS_ = %q{
|
|
30
|
+
[^:\[\]\n]*?
|
|
31
|
+
\b
|
|
32
|
+
\d{1,3} '? ## incl. minute
|
|
33
|
+
[^\[\]\n]*?
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def handle_goals( txt, opts: {} )
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
##
|
|
42
|
+
## quick fix - change [pen] to (pen) and
|
|
43
|
+
## [og] to (og)
|
|
44
|
+
## e.g. [Parkin 57 [og] - Nogan 47]
|
|
45
|
+
## [McIndoe 11 [pen] Green 20, Blundell 90 - Robinson 74]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
txt = txt.gsub( '[pen]', '(pen)')
|
|
49
|
+
txt = txt.gsub( '[og]', '(og)')
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
=begin
|
|
54
|
+
## [15' Barisic, 80' Gilewicz; 10' (og) Barisic]
|
|
55
|
+
## try (simple) goal line
|
|
56
|
+
## note keep leading spaces / indent
|
|
57
|
+
|
|
58
|
+
## note - first line must include a score!!
|
|
59
|
+
### change to named captures!! - use \k<> !!!
|
|
60
|
+
txt = txt.gsub( %r{^
|
|
61
|
+
( .+?
|
|
62
|
+
\d{1,2}-\d{1,2}
|
|
63
|
+
.*?
|
|
64
|
+
\n
|
|
65
|
+
)
|
|
66
|
+
([ ]*)
|
|
67
|
+
\[
|
|
68
|
+
( .*?
|
|
69
|
+
\b\d{1,3}' ## incl. minute
|
|
70
|
+
.*?
|
|
71
|
+
)
|
|
72
|
+
\]
|
|
73
|
+
[ ]*
|
|
74
|
+
$}ix,
|
|
75
|
+
'\1\2(\3)' )
|
|
76
|
+
=end
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
## try (simple double) goal line
|
|
82
|
+
## note keep leading spaces / indent
|
|
83
|
+
## [21' Dospel, 42' and 64' Mayrleb, 51' Datoru, 72' Sobczak; 25' and
|
|
84
|
+
## 90' B.Akwuegbu]
|
|
85
|
+
## -or-
|
|
86
|
+
### [Jose Manuel Jurado 12, Diego Forlán 40, 63,
|
|
87
|
+
## "Simao" Pedro Fonseca 90]
|
|
88
|
+
## [Rubén Suárez 10; Abdoulay Konko 12, 63, Alvaro Negredo 27,
|
|
89
|
+
## "Renato" Dirnei Florencio 87]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
## ["Edmilson" Gomes de Moraes 40, Marco Perez 68,
|
|
93
|
+
## Ander Herrera 82; Fernando Fernandez 1, 27,
|
|
94
|
+
## Juan Miguel Jimenez "Juanmi" 6, 28, Quincy Owusu-abeyie 35]
|
|
95
|
+
## or
|
|
96
|
+
## [Jose Manuel Casado 16,Emiliano Armenteros 20,
|
|
97
|
+
## Jorge Andujar Moreno "Coke" 60; Jose Javier Barkero 14pen,
|
|
98
|
+
## Jose Antonio Culebras 90+].
|
|
99
|
+
## note - remove optional
|
|
100
|
+
|
|
101
|
+
txt = txt.gsub( %r{^
|
|
102
|
+
([ ]*)
|
|
103
|
+
\[
|
|
104
|
+
( #{GOALS_}
|
|
105
|
+
\n #{GOALS_}
|
|
106
|
+
(?:
|
|
107
|
+
\n #{GOALS_}
|
|
108
|
+
)?
|
|
109
|
+
)
|
|
110
|
+
\]
|
|
111
|
+
\.? ## optional trailing dot
|
|
112
|
+
[ ]*
|
|
113
|
+
$}ix,
|
|
114
|
+
'\1(\2)' )
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
## note - match for single line goes last !!
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
###
|
|
121
|
+
### [Fernando Llorente 47]
|
|
122
|
+
## [Sebastián Fernández 44; Aritz Aduriz 9, Joaquín Sanchez 71, 75]
|
|
123
|
+
## try (simple) goal line with number only!!!
|
|
124
|
+
## note keep leading spaces / indent
|
|
125
|
+
|
|
126
|
+
## Fluminense 3-0 0-2 São Caetano
|
|
127
|
+
## [Magno Alves 70', 88', Roni 75']
|
|
128
|
+
## [Daniel 15', Magrão 46'p]
|
|
129
|
+
|
|
130
|
+
## fix/fix/fix - merge with rule above!!!
|
|
131
|
+
## make minute optional!!!
|
|
132
|
+
|
|
133
|
+
txt = txt.gsub( %r{^
|
|
134
|
+
(?<match> .+?
|
|
135
|
+
\d{1,2}-\d{1,2}
|
|
136
|
+
.*?
|
|
137
|
+
)
|
|
138
|
+
\n
|
|
139
|
+
(?<indent1> [ ]*)
|
|
140
|
+
\[
|
|
141
|
+
(?<goals1> #{GOALS_})
|
|
142
|
+
\]
|
|
143
|
+
[ ]*
|
|
144
|
+
(?: ## check for second goal line following
|
|
145
|
+
## used in br for aggregate matches
|
|
146
|
+
\n
|
|
147
|
+
(?<indent2> [ ]*)
|
|
148
|
+
\[
|
|
149
|
+
(?<goals2> #{GOALS_})
|
|
150
|
+
\]
|
|
151
|
+
[ ]*
|
|
152
|
+
)?
|
|
153
|
+
$}ix ) do |match|
|
|
154
|
+
|
|
155
|
+
if opts[:goals]
|
|
156
|
+
puts " match:"
|
|
157
|
+
puts match
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
m = Regexp.last_match
|
|
161
|
+
buf = String.new
|
|
162
|
+
buf += "#{m[:match]}\n"
|
|
163
|
+
buf += "#{m[:indent1]}(#{m[:goals1]})"
|
|
164
|
+
buf += "\n#{m[:indent2]}(#{m[:goals2]})" if m[:indent2] && m[:goals2]
|
|
165
|
+
buf
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
txt
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
end ## class Fmtfix
|
|
173
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## let's you check optional ref e.g. ‹§fin›
|
|
7
|
+
OPT_REF = %q{
|
|
8
|
+
(?: [ ]*
|
|
9
|
+
‹ (?<ref> §[^›]+?) ›
|
|
10
|
+
)?
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
=begin
|
|
16
|
+
## let's you check optional ref e.g. ‹§fin›
|
|
17
|
+
OPT_REF = %q{
|
|
18
|
+
(?: [ ]*
|
|
19
|
+
‹§ (?<ref> [^›]+?) ›
|
|
20
|
+
)?
|
|
21
|
+
}
|
|
22
|
+
=end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
###
|
|
26
|
+
### note - allow optional colon e.g.
|
|
27
|
+
## Playoff:
|
|
28
|
+
## Round 21:
|
|
29
|
+
|
|
30
|
+
HEADER_ROUND_RE = %r{\A
|
|
31
|
+
[ ]*
|
|
32
|
+
(?<round> #{ROUND_PAT})
|
|
33
|
+
:? ## note - allow optional colon (:) e.g. Playoff:
|
|
34
|
+
#{OPT_REF}
|
|
35
|
+
[ ]*
|
|
36
|
+
\z}ix
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
## date header (w/ brackets)
|
|
41
|
+
## [Aug 7]
|
|
42
|
+
## [Oct 23]
|
|
43
|
+
##
|
|
44
|
+
### note - might be date range or date list!!!
|
|
45
|
+
## [Aug 7-9]
|
|
46
|
+
## [Aug 7, 8]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
## helper for inline regexes (with union) and escaped
|
|
51
|
+
def self.date_( *re )
|
|
52
|
+
raise ArgumentError, "more than one date regex expected, got #{re}" if re.size < 1
|
|
53
|
+
|
|
54
|
+
## (auto-)wrap in non-capature group - why? why not?
|
|
55
|
+
"(?: #{Regexp.union( *re ).source})"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
HEADER_DATE_RE = %r{\A
|
|
60
|
+
[ ]*
|
|
61
|
+
\[ #{date_(DATE_I_RE, DATE_IB_RE,
|
|
62
|
+
DATE_II_RE,
|
|
63
|
+
DATE_RANGE_RE,
|
|
64
|
+
DATE_LIST_RE, DATE_LEGS_RE,
|
|
65
|
+
)}
|
|
66
|
+
\]
|
|
67
|
+
[ ]*
|
|
68
|
+
\z}ix
|
|
69
|
+
|
|
70
|
+
## pp HEADER_DATE_RE
|
|
71
|
+
## pp DATE_I_RE
|
|
72
|
+
## pp DATE_I_RE.source ## note - will NOT include re flags (e.g. +i/insensitive)
|
|
73
|
+
## exit 1
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
## alternate date header (no brackets incl. year)
|
|
78
|
+
## Aug 7 1999
|
|
79
|
+
## Sep 4 1999
|
|
80
|
+
## Oct 23 1999
|
|
81
|
+
## Nov 20 1999
|
|
82
|
+
## Apr 1 2000
|
|
83
|
+
|
|
84
|
+
HEADER_DATE_II_RE = %r{\A
|
|
85
|
+
[ ]*
|
|
86
|
+
#{date_(DATE_I_RE, DATE_II_RE)}
|
|
87
|
+
[ ]*
|
|
88
|
+
\z}ix
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
##
|
|
93
|
+
## [Sep 16, Berchtold 26, Glasner 54, Kuljic 60]
|
|
94
|
+
## --- note - exclude numbers in follow-up text!!!
|
|
95
|
+
##
|
|
96
|
+
## use a shared pattern for city-like text !!
|
|
97
|
+
## maybe allow more and make more specific later
|
|
98
|
+
#
|
|
99
|
+
## exclude comma (,) - why? why not?
|
|
100
|
+
## split in CITY_ and CITY_PLUS_ or such?
|
|
101
|
+
## or find a better name ??
|
|
102
|
+
##
|
|
103
|
+
## allow number if:
|
|
104
|
+
## Happel-Stadion, Wien, att: 9,200
|
|
105
|
+
## Happel-Stadion, Wien; att: 7000
|
|
106
|
+
## Innsbruck; att: 6700
|
|
107
|
+
## Wörthersee-Stadion, Klagenfurt; att: 30,000
|
|
108
|
+
## Wörthersee Stadion, Klagenfurt; att: 20,500
|
|
109
|
+
## Hayward, Calif.; att: 5.528 -- note: dot (.) NOT comma (,)
|
|
110
|
+
##
|
|
111
|
+
##
|
|
112
|
+
## Apr 30, 28 Black Arena, Klagenfurt; att: 30,000
|
|
113
|
+
### Wörthersee Stadion, known as 28 Black Arena for sponsorship reasons
|
|
114
|
+
##
|
|
115
|
+
## Ernst-Happel-Stadion, Wien; att: 20100; ref: Hofmann
|
|
116
|
+
|
|
117
|
+
CITY_ = %q{ (?<city> (?: [^0-9:;\[\]]+?
|
|
118
|
+
| .+?
|
|
119
|
+
[ ] att: [ ] [0-9,.]+
|
|
120
|
+
(?: [;,] [ ] ref: [ ] .+? ## w/ optional ref:
|
|
121
|
+
)?
|
|
122
|
+
)
|
|
123
|
+
)
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
## [Jun 3, Ferrol]
|
|
128
|
+
## [Apr 2, Wembley]
|
|
129
|
+
## -or-
|
|
130
|
+
## [Sat May 17 - at Millennium Stadium, Cardiff]
|
|
131
|
+
## [Sun May 25 - at Millennium Stadium, Cardiff]
|
|
132
|
+
|
|
133
|
+
HEADER_DATE_N_CITY_RE = %r{\A
|
|
134
|
+
[ ]*
|
|
135
|
+
\[ #{date_(DATE_I_RE,
|
|
136
|
+
DATE_II_RE)}
|
|
137
|
+
(?: , [ ]*
|
|
138
|
+
| [ ] - [ ] at [ ]
|
|
139
|
+
)
|
|
140
|
+
#{CITY_}
|
|
141
|
+
\]
|
|
142
|
+
[ ]*
|
|
143
|
+
\z}ix
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
###
|
|
147
|
+
## alternate date header with brackets (in oost02.txt)
|
|
148
|
+
## [31-08] change to _ 31/08 _
|
|
149
|
+
## [07-09]
|
|
150
|
+
## [07-09]
|
|
151
|
+
## [30-05, Thaur]
|
|
152
|
+
|
|
153
|
+
HEADER_DATE_ALT_RE = %r{\A
|
|
154
|
+
[ ]*
|
|
155
|
+
\[ (?<date>
|
|
156
|
+
(?<day> \d{1,2}) - (?<month> \d{1,2})
|
|
157
|
+
)
|
|
158
|
+
(?:
|
|
159
|
+
, [ ]*
|
|
160
|
+
#{CITY_}
|
|
161
|
+
)?
|
|
162
|
+
\]
|
|
163
|
+
[ ]*
|
|
164
|
+
\z}ix
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
## Round 24 [Mar 21]
|
|
173
|
+
## note - might be date range or date list
|
|
174
|
+
## Round 24 [Mar 21, 22]
|
|
175
|
+
## Round 24 [Mar 21-23]
|
|
176
|
+
## Round 2 [Aug 4-6]
|
|
177
|
+
## Round 1 [Aug 13-16]
|
|
178
|
+
## Round 2 [Aug 20-23]
|
|
179
|
+
##
|
|
180
|
+
### note - with optional ref/anchor
|
|
181
|
+
## Preliminary Round [Nov 20] ‹§inplay›
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
HEADER_ROUND_N_DATE_RE = %r{\A
|
|
185
|
+
[ ]*
|
|
186
|
+
(?<round> #{ROUND_PAT})
|
|
187
|
+
[ ]+
|
|
188
|
+
\[
|
|
189
|
+
#{date_(DATE_I_RE, DATE_IB_RE, DATE_II_RE,
|
|
190
|
+
DATE_RANGE_RE,
|
|
191
|
+
DATE_LIST_RE, DATE_LEGS_RE)}
|
|
192
|
+
\]
|
|
193
|
+
#{OPT_REF}
|
|
194
|
+
[ ]*
|
|
195
|
+
\z}ix
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
## Final [May 1, Klagenfurt]
|
|
199
|
+
HEADER_ROUND_N_DATE_N_CITY_RE = %r{\A
|
|
200
|
+
[ ]*
|
|
201
|
+
(?<round> #{ROUND_PAT})
|
|
202
|
+
[ ]+
|
|
203
|
+
\[ #{date_(DATE_I_RE, DATE_II_RE)}
|
|
204
|
+
, [ ]*
|
|
205
|
+
#{CITY_}
|
|
206
|
+
\]
|
|
207
|
+
[ ]*
|
|
208
|
+
\z}ix
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
###
|
|
212
|
+
## Final [in Völs]
|
|
213
|
+
## Final [in Kundl]
|
|
214
|
+
HEADER_ROUND_N_CITY_RE = %r{\A
|
|
215
|
+
[ ]*
|
|
216
|
+
(?<round> #{ROUND_PAT})
|
|
217
|
+
[ ]+
|
|
218
|
+
\[in [ ]+ #{CITY_}
|
|
219
|
+
\]
|
|
220
|
+
[ ]*
|
|
221
|
+
\z}ix
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
##
|
|
225
|
+
## reverse
|
|
226
|
+
## Final [Graz, May 12]
|
|
227
|
+
## Super Cup Final [Graz, Jul 6]
|
|
228
|
+
## Final [London, Feb 27]
|
|
229
|
+
HEADER_ROUND_N_CITY_N_DATE_RE = %r{\A
|
|
230
|
+
[ ]*
|
|
231
|
+
(?<round> #{ROUND_PAT})
|
|
232
|
+
[ ]+
|
|
233
|
+
\[ #{CITY_}
|
|
234
|
+
, [ ]*
|
|
235
|
+
#{date_(DATE_I_RE, DATE_II_RE)}
|
|
236
|
+
\]
|
|
237
|
+
[ ]*
|
|
238
|
+
\z}ix
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
#####
|
|
244
|
+
## note - line-by-line processing / matching
|
|
245
|
+
def _norm_date( m, format: nil )
|
|
246
|
+
## quick fix for undefined group name reference
|
|
247
|
+
m = m.named_captures.transform_keys(&:to_sym) if m.is_a?(MatchData)
|
|
248
|
+
|
|
249
|
+
if m[:date_list]
|
|
250
|
+
_fmt_date_list(_build_date_list( m ), format: format )
|
|
251
|
+
elsif m[:date_legs]
|
|
252
|
+
_fmt_date_legs(_build_date_legs( m ), format: format )
|
|
253
|
+
elsif m[:date_range]
|
|
254
|
+
_fmt_date_range(_build_date_range( m ), format: format )
|
|
255
|
+
else ## assume m[:date]
|
|
256
|
+
_fmt_date(_build_date( m ), format: format )
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def handle_header( line )
|
|
262
|
+
## note - returns newline (matched header line reformatted)
|
|
263
|
+
## or nil (if no match!!)
|
|
264
|
+
##
|
|
265
|
+
line = line.rstrip ## expect chomp of newline "upstream" - why? why not?
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
if m = HEADER_ROUND_RE.match(line.rstrip)
|
|
269
|
+
"▪ #{m[:round]} ▪\n"
|
|
270
|
+
elsif m = HEADER_DATE_RE.match(line.rstrip)
|
|
271
|
+
## e.g. [Nov 20]
|
|
272
|
+
## e.g. [April 1]
|
|
273
|
+
date = _norm_date( m )
|
|
274
|
+
"_ #{date} _\n"
|
|
275
|
+
elsif m = HEADER_DATE_N_CITY_RE.match(line.rstrip)
|
|
276
|
+
## e.g. [Jun 3, Ferrol]
|
|
277
|
+
## e.g. [Apr 2, Wembley]
|
|
278
|
+
## [Sat May 17 - at Millennium Stadium, Cardiff]
|
|
279
|
+
## [Sun May 25 - at Millennium Stadium, Cardiff]
|
|
280
|
+
|
|
281
|
+
date = _norm_date( m )
|
|
282
|
+
|
|
283
|
+
## note - check for special case
|
|
284
|
+
## [Dec 10, replay]
|
|
285
|
+
## change to ▪ Replay ▪ _ Dec 10 _
|
|
286
|
+
if m[:city] == 'replay'
|
|
287
|
+
"▪ Replay ▪ _ #{date} _\n"
|
|
288
|
+
else
|
|
289
|
+
"_ #{date} _ @ #{m[:city]}\n"
|
|
290
|
+
end
|
|
291
|
+
elsif m = HEADER_DATE_II_RE.match(line.rstrip)
|
|
292
|
+
## note - no enclosing brackets []!!!
|
|
293
|
+
## e.g. Nov 20 1999 or Nov 20, 1999
|
|
294
|
+
## Apr 1 2000 or Apr 1, 2000
|
|
295
|
+
date = _norm_date( m )
|
|
296
|
+
"_ #{date} _\n"
|
|
297
|
+
elsif m = HEADER_DATE_ALT_RE.match(line.rstrip)
|
|
298
|
+
## e.g. [07-09]
|
|
299
|
+
## [30-05, Thaur]
|
|
300
|
+
## date = _norm_date( m, format: 'numeric' )
|
|
301
|
+
date = _norm_date( m )
|
|
302
|
+
buf = String.new
|
|
303
|
+
buf += "_ #{date} _"
|
|
304
|
+
buf += " @ #{m[:city]}" if m[:city]
|
|
305
|
+
buf += "\n"
|
|
306
|
+
buf
|
|
307
|
+
elsif m = HEADER_ROUND_N_DATE_RE.match(line.strip)
|
|
308
|
+
date = _norm_date( m )
|
|
309
|
+
"▪ #{m[:round]} ▪ #{date}\n"
|
|
310
|
+
elsif m = HEADER_ROUND_N_DATE_N_CITY_RE.match(line.strip)
|
|
311
|
+
date = _norm_date( m )
|
|
312
|
+
"▪ #{m[:round]} ▪ #{date} @ #{m[:city]}\n"
|
|
313
|
+
elsif m = HEADER_ROUND_N_CITY_RE.match(line.strip)
|
|
314
|
+
"▪ #{m[:round]} ▪ @ #{m[:city]}\n"
|
|
315
|
+
elsif m = HEADER_ROUND_N_CITY_N_DATE_RE.match(line.strip)
|
|
316
|
+
date = _norm_date( m )
|
|
317
|
+
## note - reverse (rotate) date & city
|
|
318
|
+
"▪ #{m[:round]} ▪ #{date} @ #{m[:city]}\n"
|
|
319
|
+
else
|
|
320
|
+
nil
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
end ## class Fmtfix
|
|
326
|
+
end ## module Rsssf
|