rsssf 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +39 -2
- data/README.md +67 -62
- data/Rakefile +2 -2
- data/config/groups_en.txt +44 -0
- data/config/rounds_en.txt +283 -0
- data/config/rounds_es.txt +20 -0
- data/config/rounds_misc.txt +7 -0
- data/lib/_cocos_.rb +158 -0
- data/lib/rsssf/convert/convert.rb +71 -0
- data/lib/rsssf/convert/errata.rb +103 -0
- data/lib/rsssf/convert/html_entities.rb +150 -0
- data/lib/rsssf/convert/html_to_txt/beautify_anchors.rb +96 -0
- data/lib/rsssf/convert/html_to_txt/make_heading.rb +70 -0
- data/lib/rsssf/convert/html_to_txt/remove_emails.rb +43 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_href.rb +85 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_name.rb +87 -0
- data/lib/rsssf/convert/html_to_txt/replace_heading.rb +76 -0
- data/lib/rsssf/convert/html_to_txt/replace_hr.rb +25 -0
- data/lib/rsssf/convert/html_to_txt.rb +247 -0
- data/lib/rsssf/download.rb +4 -135
- data/lib/rsssf/fmtfix/dates.rb +541 -0
- data/lib/rsssf/fmtfix/dates_helpers.rb +63 -0
- data/lib/rsssf/fmtfix/errata.rb +44 -0
- data/lib/rsssf/fmtfix/fmtfix-base.rb +68 -0
- data/lib/rsssf/fmtfix/fmtfix.rb +101 -0
- data/lib/rsssf/fmtfix/goals.rb +173 -0
- data/lib/rsssf/fmtfix/headers.rb +326 -0
- data/lib/rsssf/fmtfix/outline.rb +228 -0
- data/lib/rsssf/fmtfix/patch_headings.rb +141 -0
- data/lib/rsssf/fmtfix/rounds.rb +74 -0
- data/lib/rsssf/fmtfix/score.rb +92 -0
- data/lib/rsssf/fmtfix/tables.rb +316 -0
- data/lib/rsssf/fmtfix/topscorers.rb +50 -0
- data/lib/rsssf/page-find_schedule.rb +127 -0
- data/lib/rsssf/page-meta.rb +68 -0
- data/lib/rsssf/page.rb +89 -227
- data/lib/rsssf/parse_schedules.rb +34 -0
- data/lib/rsssf/prepare/convert-links.rb +77 -0
- data/lib/rsssf/prepare/convert-meta.rb +111 -0
- data/lib/rsssf/prepare/convert-navlines.rb +154 -0
- data/lib/rsssf/prepare/convert-postproc.rb +141 -0
- data/lib/rsssf/prepare/convert.rb +100 -0
- data/lib/rsssf/prepare/download.rb +40 -0
- data/lib/rsssf/project.rb +154 -0
- data/lib/rsssf/reports/page.rb +40 -8
- data/lib/rsssf/reports/schedule.rb +18 -55
- data/lib/rsssf/utils.rb +28 -17
- data/lib/rsssf/version.rb +5 -2
- data/lib/rsssf.rb +53 -13
- metadata +50 -9
- data/lib/rsssf/convert.rb +0 -495
- data/lib/rsssf/repo.rb +0 -144
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def self.table_heading_( line )
|
|
7
|
+
## M W T L GF GA PTS AVGE
|
|
8
|
+
## =>
|
|
9
|
+
## (?:
|
|
10
|
+
## [ ]+ M [ ]+ W [ ]+ T [ ]+ L [ ]+ GF [ ]+ GA [ ]+ PTS [ ]+ AVGE [ ]*
|
|
11
|
+
## )
|
|
12
|
+
|
|
13
|
+
cols = line.strip.split( /[ ]+/ )
|
|
14
|
+
|
|
15
|
+
"(?: [ ]+ #{cols.join(' [ ]+ ')} [ ]*)"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
##
|
|
23
|
+
### note - may start with blank line OR
|
|
24
|
+
## header
|
|
25
|
+
## followed by optional heading (e.g. M W T L GF GA PTS)
|
|
26
|
+
## and table lines ( 1. rapid 38 17 ...)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
##
|
|
31
|
+
## note simple/compact table standing format needs more thinking
|
|
32
|
+
## will match
|
|
33
|
+
## FC Schalke 04 1-3 1. FSV Mainz 05
|
|
34
|
+
## Hannover 96 3-1 1. FC Nürnberg
|
|
35
|
+
## FC Schalke 04 0-1 1. FC Kaiserslautern
|
|
36
|
+
## Hannover 96 2-0 1. FSV Mainz 05
|
|
37
|
+
## FSV Mainz 05 3-1 1. FC Köln
|
|
38
|
+
##
|
|
39
|
+
## - add a required ranking in the beginning e.g. 1., 2. or such?
|
|
40
|
+
|
|
41
|
+
=begin
|
|
42
|
+
| (?: ## or compact/min form -- 22 37-15 51
|
|
43
|
+
## maybe allow spaces later inbetween 37- 15 - why? why not?
|
|
44
|
+
## 1. 1. FC Köln 30 17 11 2 78- 40 45
|
|
45
|
+
|
|
46
|
+
[ ]+ \d{1,3} ## played
|
|
47
|
+
[ ]+ \d{1,3} [ ]? -[ ]? \d{1,3} ## gf-ga
|
|
48
|
+
[ ]+ \d{1,3} \b ## pts
|
|
49
|
+
)
|
|
50
|
+
=end
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
=begin
|
|
54
|
+
## (i) table header
|
|
55
|
+
##
|
|
56
|
+
## fix - make header match more strict!!!
|
|
57
|
+
## e.g. do NOT match --- or more than three spaces or such
|
|
58
|
+
## exlcude in header
|
|
59
|
+
## NB:
|
|
60
|
+
## [*]
|
|
61
|
+
## [1]
|
|
62
|
+
## exclude heading === e.g.
|
|
63
|
+
## ==== USL Premier Development
|
|
64
|
+
^
|
|
65
|
+
[ ]*
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
## exclude comma (,) - why? why not?
|
|
69
|
+
## and numbers - unless group 1
|
|
70
|
+
## e.g. Kaczor 78 - Dreßel 19, Steinkogler 50,
|
|
71
|
+
## B'schweig 2-1 Schalke (Handschuh 38, Popivoda 55 - Fischer 82)
|
|
72
|
+
## M'gladbach 2-1 1. FC Köln (Jensen 6, Wittkamp 35 - D.Müller 78)
|
|
73
|
+
## Kraft 3, E.Kremers 38)
|
|
74
|
+
## Schalke 4-0 Tasmania (Klose 2, 78, Herrmann 40, Kreuz 82)
|
|
75
|
+
##
|
|
76
|
+
## allow name such as
|
|
77
|
+
## USL - 1ST DIVISION (2nd Division)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
(?<header> [^=*:,0-9\[\]\n]+?
|
|
81
|
+
([ ] \d{1,2} \b)? ## optional number only at the end e.g. group 1
|
|
82
|
+
)
|
|
83
|
+
:? ## optional colon (:) e.g. final table:
|
|
84
|
+
=end
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
TABLE_HEADER_RE = %r{
|
|
88
|
+
############
|
|
89
|
+
## negative & positive lookaheads
|
|
90
|
+
|
|
91
|
+
## (?!
|
|
92
|
+
## .* [ ]{2,} ## no (inline) double (or more) spaces allowed
|
|
93
|
+
## )
|
|
94
|
+
|
|
95
|
+
(?:
|
|
96
|
+
## (i) can only start with non-zero number
|
|
97
|
+
## or alpha
|
|
98
|
+
##
|
|
99
|
+
## A. or
|
|
100
|
+
## 1. or
|
|
101
|
+
## mixed with dot 1A. yes/no?, A1. yes/no?, 1B1. ?
|
|
102
|
+
## 1.K - 1.Klasse
|
|
103
|
+
|
|
104
|
+
##
|
|
105
|
+
## note - \b(oundary) - to always get complete tokens (alphanum) tokens
|
|
106
|
+
## note - \b includes [a-z0-9_] PLUS underscore (_)
|
|
107
|
+
## check if underscore is \b
|
|
108
|
+
## e.g. 09_ or _09 or match \b[0-9]\b ???
|
|
109
|
+
## use our own asserts?
|
|
110
|
+
## BNUM (boundary number) e.g. [^0-9]
|
|
111
|
+
## BALPHA (boundary alpha) e.g. [^a-z]
|
|
112
|
+
## BALNUM (boundary alphanum) e.g. [^a-z0-9]
|
|
113
|
+
## classic is [^a-z0-9_]
|
|
114
|
+
|
|
115
|
+
(?<header>
|
|
116
|
+
(?=
|
|
117
|
+
.* \p{L}+ ## must incl. alpha character - not only numbers!!
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
## note
|
|
121
|
+
## order matters
|
|
122
|
+
## move specific first!!
|
|
123
|
+
|
|
124
|
+
\b
|
|
125
|
+
(?: [0-9]+\p{L} [0-9\p{L}]* \b ## (ii) mixed alphanum (starting w/ num)
|
|
126
|
+
| [0-9]+ \b \.? (?! \d) ## (i) num
|
|
127
|
+
| \p{L}+[0-9] [0-9\p{L}]* \b ## (iiii) mixed alphanum (starting w/ alpha)
|
|
128
|
+
| \p{L}+ \b \.? ## (iii) alpha
|
|
129
|
+
)
|
|
130
|
+
(?:
|
|
131
|
+
## " (i-iiii) connector options (a) single space
|
|
132
|
+
## -- exclude numbers on numbers (FIX)
|
|
133
|
+
## (b) dash (-) or slash (/)
|
|
134
|
+
## -- must be alpha(.?)-alpha
|
|
135
|
+
## incl. K.-H. with trailing dot
|
|
136
|
+
## add ampersand (&) too
|
|
137
|
+
## w/ leading & trailing opt space?
|
|
138
|
+
## incl. K.&H., K. & H.
|
|
139
|
+
(?: [ ]?
|
|
140
|
+
| (?<! \d) - ## add negative lookbehind&ahead (no numbers please)
|
|
141
|
+
(?! \d)
|
|
142
|
+
| /
|
|
143
|
+
)
|
|
144
|
+
## repeat (i-iiii) see above
|
|
145
|
+
## todo - do NOT allow numbers followed by numbers
|
|
146
|
+
\b
|
|
147
|
+
(?: [0-9]+ \b (?! [ ] \d) ## (i) num - no more ordinals - why? why not?
|
|
148
|
+
| [0-9]+\p{L} [0-9\p{L}]* \b ## (ii) mixed alphanum (starting w/ num)
|
|
149
|
+
## group 1a 1FC?? - why? why not?
|
|
150
|
+
| \p{L}+ \b \.? ## (iii) alpha
|
|
151
|
+
| \p{L}+[0-9] [0-9\p{L}]* \b ## (iiii) mixed alphanum (starting w/ alpha)
|
|
152
|
+
)
|
|
153
|
+
)*
|
|
154
|
+
(?:
|
|
155
|
+
[ ]
|
|
156
|
+
\( [^:()\[\]]+? \)
|
|
157
|
+
)?
|
|
158
|
+
) ## end-of-capture header
|
|
159
|
+
)
|
|
160
|
+
:? ## optional colon (:) e.g. final table:
|
|
161
|
+
}ix
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
TABLE_RE = %r{
|
|
165
|
+
|
|
166
|
+
### optional table header
|
|
167
|
+
(?:
|
|
168
|
+
### negative lookahead
|
|
169
|
+
## MUST NOT match standing line e.g. 10 3 4
|
|
170
|
+
## or table heading (see below)
|
|
171
|
+
## or ----- (old style structured heading left overs)
|
|
172
|
+
(?! ^[ ]* (?: [^\n]+? [ ]+ \d{1,3} [ ]+ \d{1,3} [ ]+ \d{1,3}
|
|
173
|
+
| (?: GP | M | Team ) [ ]
|
|
174
|
+
| -{3,}
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
## (i) table header
|
|
179
|
+
##
|
|
180
|
+
## fix - make header match more strict!!!
|
|
181
|
+
## e.g. do NOT match --- or more than three spaces or such
|
|
182
|
+
## exlcude in header
|
|
183
|
+
## NB:
|
|
184
|
+
## [*]
|
|
185
|
+
## [1]
|
|
186
|
+
## exclude heading === e.g.
|
|
187
|
+
## ==== USL Premier Development
|
|
188
|
+
^
|
|
189
|
+
[ ]*
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
## exclude comma (,) - why? why not?
|
|
193
|
+
## and numbers - unless group 1
|
|
194
|
+
## e.g. Kaczor 78 - Dreßel 19, Steinkogler 50,
|
|
195
|
+
## B'schweig 2-1 Schalke (Handschuh 38, Popivoda 55 - Fischer 82)
|
|
196
|
+
## M'gladbach 2-1 1. FC Köln (Jensen 6, Wittkamp 35 - D.Müller 78)
|
|
197
|
+
## Kraft 3, E.Kremers 38)
|
|
198
|
+
## Schalke 4-0 Tasmania (Klose 2, 78, Herrmann 40, Kreuz 82)
|
|
199
|
+
##
|
|
200
|
+
## allow name such as
|
|
201
|
+
## USL - 1ST DIVISION (2nd Division)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
(?<header> [^=*:,0-9\[\]\n]+?
|
|
205
|
+
([ ] \d{1,2} \b)? ## optional number only at the end e.g. group 1
|
|
206
|
+
)
|
|
207
|
+
:? ## optional colon (:) e.g. final table:
|
|
208
|
+
## cut-off everything separated by more than three spaces
|
|
209
|
+
## e.g. might be "inline" table heading (follow table header name)
|
|
210
|
+
## e.g. Group 1 M W T L GF GA DIF PTS
|
|
211
|
+
(?: [ ]{4,} (?: GP | M |Team ) [ ] [^\n]+? )?
|
|
212
|
+
[ ]*
|
|
213
|
+
## note - allow optional blank line - why? why not?
|
|
214
|
+
(?: \n ^[ ]* )?
|
|
215
|
+
\n
|
|
216
|
+
)?
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
#### optional table heading line
|
|
220
|
+
(?: ^(?:
|
|
221
|
+
#{table_heading_( 'GP W L D GF GA PTS?' )}
|
|
222
|
+
| #{table_heading_( 'GP W L T GF GA PTS?' )}
|
|
223
|
+
| #{table_heading_( 'GP W T L GF GA PTS?' )}
|
|
224
|
+
| #{table_heading_( 'GP W D L GF GA PTS?' )}
|
|
225
|
+
## SW sudden death win, SL sudden death lose
|
|
226
|
+
| #{table_heading_( 'GP W L SW GF GA PTS?' )}
|
|
227
|
+
| #{table_heading_( 'GP W SW SL L GF GA PTS?' )}
|
|
228
|
+
| #{table_heading_( 'GP W SOW SOL L GF GA PTS?' )}
|
|
229
|
+
## mx/spanish
|
|
230
|
+
| #{table_heading_( 'M W T L GF GC DIF PTS' )}
|
|
231
|
+
| #{table_heading_( 'M W T L GF GA PTS AVGE' )}
|
|
232
|
+
| #{table_heading_( 'Team M W T L GF-GA PTS')}
|
|
233
|
+
| #{table_heading_( 'Team M W T L GF-GA PTS EP TP')}
|
|
234
|
+
)
|
|
235
|
+
## note - allow optional blank line - why? why not?
|
|
236
|
+
(?: \n ^[ ]* )?
|
|
237
|
+
\n
|
|
238
|
+
)?
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
## MUST be followed by a table (standing) line
|
|
242
|
+
## e.g. 1.FC Cincinnati 34 20 9 5 57-39 69
|
|
243
|
+
##
|
|
244
|
+
## note - allow "run-on" e.g. LB14 on first number
|
|
245
|
+
## Hudson Valley Quickstrike LB14 12 0 2 40 9 38
|
|
246
|
+
## Hudson Valley Quickstrike LB12 11 1 0 26 9 33
|
|
247
|
+
##
|
|
248
|
+
## 17 11 5 1 40 16 +24 38
|
|
249
|
+
## or
|
|
250
|
+
### + 1.DC United 32 17 6/ 3 6 65-43 57
|
|
251
|
+
|
|
252
|
+
^
|
|
253
|
+
(?:
|
|
254
|
+
[^\n]+?
|
|
255
|
+
(?:
|
|
256
|
+
(?:
|
|
257
|
+
|
|
258
|
+
\d{1,3}
|
|
259
|
+
[ ]+ \d{1,3} ## win
|
|
260
|
+
(?: [ ]+ | [ ]* / [ ]* ) \d{1,3} ## draw
|
|
261
|
+
[ ]+ \d{1,3} ## lose
|
|
262
|
+
[ ]+ \d{1,3} (?: [ ]* [:-] [ ]*
|
|
263
|
+
| [ ]+ ) \d{1,3}
|
|
264
|
+
[ ]+ [+-]? \d{1,3} \b # might be diff or point allow +/-!!
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
[^\n]*?
|
|
268
|
+
)
|
|
269
|
+
\n
|
|
270
|
+
|
|
271
|
+
## eat-up the rest
|
|
272
|
+
.*? ## non-greedy - match everything (incl. newline!) until
|
|
273
|
+
(?: \n (?= \n) ## break on blank line (\n\n) or end-of-string/file
|
|
274
|
+
| \z
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
}ixm
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def handle_tables( txt, tables: [] )
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
txt = txt.gsub( TABLE_RE ) do |match|
|
|
286
|
+
|
|
287
|
+
m = Regexp.last_match
|
|
288
|
+
|
|
289
|
+
puts " proc table >#{m[:header]}< block:"
|
|
290
|
+
puts ">>> (begin)"
|
|
291
|
+
puts match
|
|
292
|
+
puts "<<< (end)"
|
|
293
|
+
|
|
294
|
+
## remove everyting
|
|
295
|
+
## or put in comment block later with command line option/switch!!
|
|
296
|
+
## ''
|
|
297
|
+
|
|
298
|
+
## replace with "collapsed" marker
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
tables << match
|
|
303
|
+
table_id = tables.size
|
|
304
|
+
if m[:header] ## note - header might be missing
|
|
305
|
+
## table starting w/ blank line
|
|
306
|
+
"<!-- $table#{table_id}$ - #{m[:header]} -->\n"
|
|
307
|
+
else
|
|
308
|
+
"<!-- $table#{table_id}$ -->\n"
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
txt
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
end ## class Fmtfix
|
|
316
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
|
|
3
|
+
|
|
4
|
+
##
|
|
5
|
+
## process/handle Topscoreres: ... to first blank line (\n\n)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
## e.g.
|
|
9
|
+
## topscorer, topscorers
|
|
10
|
+
## top scorer, top scorers
|
|
11
|
+
## scorer, scorers
|
|
12
|
+
|
|
13
|
+
TOPSCORERS_RE = %r{^ [ ]*
|
|
14
|
+
(?<header>
|
|
15
|
+
(?: top [ ]?)? ## note - optional top
|
|
16
|
+
scorers? ## singular or plural
|
|
17
|
+
)
|
|
18
|
+
(?: [ ]* :)? ## note - optional colon
|
|
19
|
+
[ ]*
|
|
20
|
+
\n{0,2} ## note - optional leading blank line!!
|
|
21
|
+
|
|
22
|
+
.*? ## non-greedy - match everything until
|
|
23
|
+
(?: \n (?= \n) ## blank line (\n\n) or end-of-string/file
|
|
24
|
+
| \z
|
|
25
|
+
)
|
|
26
|
+
}ixm
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def handle_topscorers( txt, topscorers: [], opts: {} )
|
|
30
|
+
txt = txt.gsub( TOPSCORERS_RE ) do |match|
|
|
31
|
+
if opts[:topscorers]
|
|
32
|
+
puts " proc topscorers block:"
|
|
33
|
+
puts match
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
## remove everyting
|
|
37
|
+
## or put in comment block later with command line option/switch!!
|
|
38
|
+
## ''
|
|
39
|
+
|
|
40
|
+
## replace with "collapsed" marker
|
|
41
|
+
topscorers << match
|
|
42
|
+
topscorers_id = topscorers.size
|
|
43
|
+
"<!-- $topscorers#{topscorers_id}$ -->\n\n"
|
|
44
|
+
end
|
|
45
|
+
txt
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
end ## class Fmtfix
|
|
50
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Page
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
###
|
|
6
|
+
# (experimental)
|
|
7
|
+
# machinery to split document by leagues & cups
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
## for now simply split
|
|
12
|
+
## on headings
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### fix - support match with trailing ==== too!!!
|
|
16
|
+
|
|
17
|
+
### note - starts at
|
|
18
|
+
HEADER_RE = %r{ ## negative lookahead
|
|
19
|
+
## do NOT match =-=
|
|
20
|
+
## do NOT match =========== (without any heading text!!)
|
|
21
|
+
## e.g.
|
|
22
|
+
## Fall season
|
|
23
|
+
## ===========
|
|
24
|
+
|
|
25
|
+
(?! ^[ ]* (?: =-=
|
|
26
|
+
| ={1,} [ ]* $
|
|
27
|
+
)
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
^
|
|
31
|
+
[ ]*
|
|
32
|
+
(?<marker> ={1,6})
|
|
33
|
+
[ ]*
|
|
34
|
+
(?<text> .+?)
|
|
35
|
+
#{OPT_REF}
|
|
36
|
+
[ ]*
|
|
37
|
+
$}x
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _split_sections( txt, level: 2 )
|
|
43
|
+
|
|
44
|
+
sections = {}
|
|
45
|
+
current = nil
|
|
46
|
+
|
|
47
|
+
txt.each_line do |line|
|
|
48
|
+
if m=HEADER_RE.match( line )
|
|
49
|
+
header_level = m[:marker].size
|
|
50
|
+
header_text = m[:text]
|
|
51
|
+
if header_level == level
|
|
52
|
+
current = String.new
|
|
53
|
+
sections[ header_text ] = current
|
|
54
|
+
next
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
current << line if current
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
sections
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
## make header required - yes
|
|
69
|
+
## change to build_schedule - why? why not???
|
|
70
|
+
## add level: 2 or such - why? why not?
|
|
71
|
+
def find_schedule!( header: )
|
|
72
|
+
_find_schedule( header: header, strict: true )
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _find_schedule( header:, strict: false )
|
|
77
|
+
## make sure header is an array
|
|
78
|
+
header = [header] if header.is_a?( String )
|
|
79
|
+
|
|
80
|
+
txt = _walk_sections( @txt, header: header,
|
|
81
|
+
depth: 0,
|
|
82
|
+
strict: strict )
|
|
83
|
+
|
|
84
|
+
if txt
|
|
85
|
+
## wrap in schedule class - why? why not?
|
|
86
|
+
schedule = Schedule.new( txt )
|
|
87
|
+
schedule
|
|
88
|
+
else
|
|
89
|
+
nil
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _walk_sections( txt, header:,
|
|
95
|
+
depth:,
|
|
96
|
+
strict: false )
|
|
97
|
+
|
|
98
|
+
query = header[depth]
|
|
99
|
+
query_next = header[depth+1]
|
|
100
|
+
|
|
101
|
+
## note - start at level 2
|
|
102
|
+
sections = _split_sections( txt, level: depth+2 )
|
|
103
|
+
|
|
104
|
+
txt = sections[ query ]
|
|
105
|
+
if txt
|
|
106
|
+
if query_next
|
|
107
|
+
txt = _walk_sections( txt, header: header,
|
|
108
|
+
depth: depth+1,
|
|
109
|
+
strict: strict )
|
|
110
|
+
txt
|
|
111
|
+
else
|
|
112
|
+
txt
|
|
113
|
+
end
|
|
114
|
+
else
|
|
115
|
+
if strict
|
|
116
|
+
## note - return nil if not found!!!
|
|
117
|
+
raise ArgumentError, "section with header >#{query}< not found; sections incl. #{sections.keys}"
|
|
118
|
+
else
|
|
119
|
+
nil
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end # method _find_schedule
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
end # class Page
|
|
127
|
+
end # module Rsssf
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
module Rsssf
|
|
4
|
+
class Page
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
##
|
|
8
|
+
## note - \A - start of string
|
|
9
|
+
## comment must start .txt document!!!
|
|
10
|
+
|
|
11
|
+
HTML_COMMENT_HEADER_RE = %r{ \A
|
|
12
|
+
[ \n]* ## trailing spaces and blank lines
|
|
13
|
+
<!--
|
|
14
|
+
[ \n]*
|
|
15
|
+
(?<text> .+?)
|
|
16
|
+
[ \n]*
|
|
17
|
+
-->
|
|
18
|
+
}imx
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
###
|
|
24
|
+
## find meta data block (via html-style comment header )
|
|
25
|
+
## incl. title, autor(s), url, updated
|
|
26
|
+
## e.g.
|
|
27
|
+
## <!--
|
|
28
|
+
## title: Austria 2024/25
|
|
29
|
+
## source: https://rsssf.org/tableso/oost2025.html
|
|
30
|
+
## author: Hans Schöggl
|
|
31
|
+
## updated: 7 Jul 2025
|
|
32
|
+
## -->
|
|
33
|
+
## -or-
|
|
34
|
+
## authors: Hans Schöggl and Karel Stokkermans
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def self.parse_meta( txt )
|
|
41
|
+
meta = {}
|
|
42
|
+
m = HTML_COMMENT_HEADER_RE.match( txt )
|
|
43
|
+
if m
|
|
44
|
+
text = m[:text]
|
|
45
|
+
text.each_line do |line|
|
|
46
|
+
line = line.strip
|
|
47
|
+
|
|
48
|
+
## note - allow "inline" blank lines and comment lines (starting w/ #)
|
|
49
|
+
next if line.empty? || line.start_with?('#')
|
|
50
|
+
|
|
51
|
+
## split line on first colon (:) (only)
|
|
52
|
+
## note - limit split to two pieces!!!
|
|
53
|
+
key, value = line.split( /[ ]*:[ ]*/, 2)
|
|
54
|
+
## use a symbol (not string) as key - why? why not?
|
|
55
|
+
meta[ key.to_sym ] = value
|
|
56
|
+
end
|
|
57
|
+
meta
|
|
58
|
+
else
|
|
59
|
+
nil ## no meta data (comment header) found
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
def parse_meta( txt ) self.class.parse_meta( txt ); end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
end # class Page
|
|
68
|
+
end # module Rsssf
|