rsssf 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +2 -0
  3. data/Manifest.txt +39 -2
  4. data/README.md +67 -62
  5. data/Rakefile +2 -2
  6. data/config/groups_en.txt +44 -0
  7. data/config/rounds_en.txt +283 -0
  8. data/config/rounds_es.txt +20 -0
  9. data/config/rounds_misc.txt +7 -0
  10. data/lib/_cocos_.rb +158 -0
  11. data/lib/rsssf/convert/convert.rb +71 -0
  12. data/lib/rsssf/convert/errata.rb +103 -0
  13. data/lib/rsssf/convert/html_entities.rb +150 -0
  14. data/lib/rsssf/convert/html_to_txt/beautify_anchors.rb +96 -0
  15. data/lib/rsssf/convert/html_to_txt/make_heading.rb +70 -0
  16. data/lib/rsssf/convert/html_to_txt/remove_emails.rb +43 -0
  17. data/lib/rsssf/convert/html_to_txt/replace_a_href.rb +85 -0
  18. data/lib/rsssf/convert/html_to_txt/replace_a_name.rb +87 -0
  19. data/lib/rsssf/convert/html_to_txt/replace_heading.rb +76 -0
  20. data/lib/rsssf/convert/html_to_txt/replace_hr.rb +25 -0
  21. data/lib/rsssf/convert/html_to_txt.rb +247 -0
  22. data/lib/rsssf/download.rb +4 -135
  23. data/lib/rsssf/fmtfix/dates.rb +541 -0
  24. data/lib/rsssf/fmtfix/dates_helpers.rb +63 -0
  25. data/lib/rsssf/fmtfix/errata.rb +44 -0
  26. data/lib/rsssf/fmtfix/fmtfix-base.rb +68 -0
  27. data/lib/rsssf/fmtfix/fmtfix.rb +101 -0
  28. data/lib/rsssf/fmtfix/goals.rb +173 -0
  29. data/lib/rsssf/fmtfix/headers.rb +326 -0
  30. data/lib/rsssf/fmtfix/outline.rb +228 -0
  31. data/lib/rsssf/fmtfix/patch_headings.rb +141 -0
  32. data/lib/rsssf/fmtfix/rounds.rb +74 -0
  33. data/lib/rsssf/fmtfix/score.rb +92 -0
  34. data/lib/rsssf/fmtfix/tables.rb +316 -0
  35. data/lib/rsssf/fmtfix/topscorers.rb +50 -0
  36. data/lib/rsssf/page-find_schedule.rb +127 -0
  37. data/lib/rsssf/page-meta.rb +68 -0
  38. data/lib/rsssf/page.rb +89 -227
  39. data/lib/rsssf/parse_schedules.rb +34 -0
  40. data/lib/rsssf/prepare/convert-links.rb +77 -0
  41. data/lib/rsssf/prepare/convert-meta.rb +111 -0
  42. data/lib/rsssf/prepare/convert-navlines.rb +154 -0
  43. data/lib/rsssf/prepare/convert-postproc.rb +141 -0
  44. data/lib/rsssf/prepare/convert.rb +100 -0
  45. data/lib/rsssf/prepare/download.rb +40 -0
  46. data/lib/rsssf/project.rb +154 -0
  47. data/lib/rsssf/reports/page.rb +40 -8
  48. data/lib/rsssf/reports/schedule.rb +18 -55
  49. data/lib/rsssf/utils.rb +28 -17
  50. data/lib/rsssf/version.rb +5 -2
  51. data/lib/rsssf.rb +53 -13
  52. metadata +50 -9
  53. data/lib/rsssf/convert.rb +0 -495
  54. data/lib/rsssf/repo.rb +0 -144
@@ -0,0 +1,541 @@
1
+ module Rsssf
2
+ class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
3
+
4
+
5
+ ##
6
+ # note - (re)use the same date regex style & capture names
7
+ # from football.txt tokenizer
8
+
9
+ MONTH_LINES = parse_names( <<TXT )
10
+ January Jan
11
+ February Feb
12
+ March Mar
13
+ April Apr
14
+ May
15
+ June Jun
16
+ July Jul
17
+ August Aug
18
+ September Sept Sep
19
+ October Oct
20
+ November Nov
21
+ December Dec
22
+ TXT
23
+
24
+ MONTH_NAMES = build_names( MONTH_LINES )
25
+ # pp MONTH_NAMES
26
+ MONTH_MAP = build_map( MONTH_LINES, downcase: true )
27
+ # pp MONTH_MAP
28
+
29
+
30
+ DAY_LINES = parse_names( <<TXT )
31
+ Monday Mon Mo
32
+ Tuesday Tues Tue Tu
33
+ Wednesday Wed We
34
+ Thursday Thurs Thur Thu Th
35
+ Friday Fri Fr
36
+ Saturday Sat Sa
37
+ Sunday Sun Su
38
+ TXT
39
+
40
+ DAY_NAMES = build_names( DAY_LINES )
41
+ # pp DAY_NAMES
42
+ DAY_MAP = build_map( DAY_LINES, downcase: true )
43
+ # pp DAY_MAP
44
+
45
+
46
+ #=>
47
+ # "January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|
48
+ # July|Jul|August|Aug|September|Sept|Sep|October|Oct|
49
+ # November|Nov|December|Dec"
50
+ #
51
+ # "Monday|Mon|Mo|Tuesday|Tues|Tue|Tu|Wednesday|Wed|We|
52
+ # Thursday|Thurs|Thur|Thu|Th|Friday|Fri|Fr|
53
+ # Saturday|Sat|Sa|Sunday|Sun|Su"
54
+
55
+
56
+
57
+ ###
58
+ # br / pt - future - add portugese / "brazilian"
59
+ # Fevereiro
60
+ # Outubro
61
+ # Novembro
62
+ # Dezembro e.g.
63
+ #
64
+ # [9 Outubro]
65
+ # [02 Novembro]
66
+ # [09 Dezembro]
67
+ # [02 Fevereiro 1989]
68
+
69
+
70
+
71
+
72
+
73
+
74
+ # e.g. Aug 9
75
+ ## Fri Aug 9
76
+ ## Fri Aug 9
77
+ ## Fri, Aug 9
78
+ ## Fri, Aug 9 2024
79
+ ## Fri, Aug 9, 2024
80
+ ## Aug 9, 2024
81
+ ## Aug 9, 2024
82
+ ## note - eat-up optional comma after DAY_NAMES!!
83
+ ##
84
+ ## add around for date not known perfectly
85
+ ## around Mar 29
86
+ ## ca. Nov 1
87
+ ##
88
+ ## Jan 25/87 - support two-digit year
89
+ ## Jan 28/87
90
+ ##
91
+ ## extra/bonus - allows (double) space typo for month day e.g
92
+ ## Aug 9
93
+ DATE_I_RE = %r{
94
+ (?<date>
95
+ \b
96
+ ## optional around qualifier
97
+ ((?<around> around
98
+ | ca?\.)
99
+ [ ]
100
+ )?
101
+ ## optional day name
102
+ ((?<day_name>#{DAY_NAMES})
103
+ (?: ,?[ ]+)
104
+ )?
105
+ (?<month_name>#{MONTH_NAMES})
106
+ [ ]{1,2} ## note - allow (double) space typo
107
+ (?<day>\d{1,2})
108
+ \b
109
+ ## optional year
110
+ ( (?: ,? [ ] ## note - comma optional with single space required for now
111
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
112
+ | /
113
+ (?<yy>\d{2})
114
+ )
115
+ \b
116
+ )?
117
+ )}ix
118
+
119
+
120
+ #### date i - alt style with weekday at the end (used in arg2026.txt) e.g.
121
+ ## Mar 23, Mon
122
+ ## Mar 25, Wed
123
+ ## Apr 1, Wed
124
+ ## May 3, Sun
125
+ ## Jul 26, Sun
126
+
127
+ DATE_IB_RE = %r{
128
+ (?<date>
129
+ \b
130
+ (?<month_name>#{MONTH_NAMES})
131
+ [ ]{1,2} ## note - allow (double) space typo
132
+ (?<day>\d{1,2})
133
+ , [ ]?
134
+ (?<day_name>#{DAY_NAMES})
135
+ \b
136
+ )}ix
137
+
138
+
139
+
140
+ ###
141
+ ## e.g. 3 June
142
+ ## 10 June
143
+ ## note - allow more spaces between DAY_NAMES and DAY e.g.
144
+ ## Sun 1 Mar
145
+ ## Wed 4 Mar
146
+ ## Sat 14 Mar
147
+ ## Sat 11 Apr
148
+ ## Sat 11 Apr 2021
149
+ ##
150
+ ## Sat, 11 Apr
151
+ ## note - eat-up optional comma after DAY_NAMES!!
152
+
153
+
154
+ DATE_II_RE = %r{
155
+ (?<date>
156
+ \b
157
+ ## optional day name
158
+ ((?<day_name>#{DAY_NAMES})
159
+ (?: ,?[ ]+)
160
+ )?
161
+ (?<day>\d{1,2})
162
+ [ ]
163
+ (?<month_name>#{MONTH_NAMES})
164
+ \b
165
+ ## optional year
166
+ (?: [ ]
167
+ (?<year>\d{4}) ## optional year 2025 (yyyy)
168
+ \b
169
+ )?
170
+ )}ix
171
+
172
+
173
+
174
+ ###
175
+ ## Aug 4,5
176
+ ## Aug 13,14
177
+ ## Aug 20,21
178
+ ## Mar 4, 5
179
+ ## Mar 11, 12
180
+ ## Apr 1, 2
181
+ ## -or-
182
+ ## Nov 24 and 27 - use in br
183
+ ## Nov 24 and 28
184
+ ## - or -
185
+ ## Feb 27 and Mar 7
186
+ ## Feb 28 and Mar 7
187
+ ## - or -
188
+ ## Nov 24 & 28
189
+ ## Nov 24&28
190
+ ##
191
+ # e.g. Aug 9 & Aug 10
192
+ ### note - allow shortcut e.g. Aug 9 & 10
193
+ ##
194
+ ## note allow two-digit year
195
+ ## Jan 31 and Feb 4/87
196
+ ## Feb 1 and 4/87
197
+
198
+
199
+ DATE_LEGS_RE = %r{
200
+ (?<date_legs>
201
+ \b
202
+ (?<month_name1>#{MONTH_NAMES})
203
+ [ ]
204
+ (?<day1>\d{1,2})
205
+ (?:
206
+ , [ ]{0,5}
207
+ | [ ]{1,5} and [ ]{1,5}
208
+ | [ ]{0,5} & [ ]{0,5}
209
+ )
210
+ (?: ## note - make 2nd month_name optiona
211
+ (?<month_name2>#{MONTH_NAMES})
212
+ [ ]
213
+ )?
214
+ (?<day2>\d{1,2})
215
+ \b
216
+ ## optional two-digit year
217
+ (?: /
218
+ (?<yy2>\d{2})
219
+ \b
220
+ )?
221
+ )}ix
222
+
223
+
224
+ ##
225
+ ## merge date_list and date_legs ??
226
+ ## or keep date_legs always with two dates by definition??
227
+ ## and date_list starting w/ three or more dates?
228
+ ##
229
+ ## May 2,3,4
230
+ ## Feb 28, Mar 1,2
231
+
232
+ DATE_LIST_RE = %r{
233
+ (?<date_list>
234
+ \b
235
+ (?<month_name1>#{MONTH_NAMES})
236
+ [ ]
237
+ (?<day1>\d{1,2})
238
+ (?: [,;] [ ]{0,5} )
239
+
240
+
241
+ (?: ## note - make 2nd month_name optiona
242
+ (?<month_name2>#{MONTH_NAMES})
243
+ [ ]
244
+ )?
245
+ (?<day2>\d{1,2})
246
+ (?: [,;] [ ]{0,5} )
247
+
248
+
249
+ (?: ## note - make 3rd month_name optiona
250
+ (?<month_name3>#{MONTH_NAMES})
251
+ [ ]
252
+ )?
253
+ (?<day3>\d{1,2})
254
+ \b
255
+
256
+
257
+ ### optional fourth date
258
+ (?:
259
+ [,;] [ ]{0,5}
260
+ (?: ## note - make 4th month_name optiona
261
+ (?<month_name4>#{MONTH_NAMES})
262
+ [ ]
263
+ )?
264
+ (?<day4>\d{1,2})
265
+ \b
266
+ )?
267
+ )}ix
268
+
269
+
270
+
271
+
272
+
273
+ ###
274
+ ## Aug 4-6
275
+ ## Aug 13-16
276
+ ## Aug 20-23
277
+ ## -or-
278
+ ## Jul 30-Aug 1
279
+ ## Sep 30-Oct 1
280
+ ## Sep 29-Oct 1
281
+ ## Mar 30-Apr 1
282
+
283
+
284
+ DATE_RANGE_RE = %r{
285
+ (?<date_range>
286
+ \b
287
+ (?<month_name1>#{MONTH_NAMES})
288
+ [ ]
289
+ (?<day1>\d{1,2})
290
+ [ ]? - [ ]?
291
+ (?: ## optional month
292
+ (?<month_name2>#{MONTH_NAMES})
293
+ [ ]
294
+ )?
295
+ (?<day2>\d{1,2})
296
+ \b
297
+ )}ix
298
+
299
+
300
+
301
+
302
+
303
+
304
+ ## "internal" date helpers
305
+ def _build_date( m )
306
+ ## quick fix for undefined group name reference
307
+ m = m.named_captures.transform_keys(&:to_sym) if m.is_a?(MatchData)
308
+
309
+ date = {}
310
+ ## map month names
311
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
312
+ date[:y] = m[:year].to_i(10) if m[:year]
313
+ ## check - use y too for two-digit year or keep separate - why? why not?
314
+ date[:yy] = m[:yy].to_i(10) if m[:yy] ## two digit year (e.g. 25 or 78 etc.)
315
+ date[:m] = m[:month].to_i(10) if m[:month]
316
+ date[:m] = MONTH_MAP[ m[:month_name].downcase ] if m[:month_name]
317
+ date[:d] = m[:day].to_i(10) if m[:day]
318
+ date[:wday] = DAY_MAP[ m[:day_name].downcase ] if m[:day_name]
319
+
320
+ date[:around] = true if m[:around]
321
+
322
+ date
323
+ end
324
+
325
+ def _build_date_legs( m )
326
+ ## quick fix for undefined group name reference
327
+ m = m.named_captures.transform_keys(&:to_sym) if m.is_a?(MatchData)
328
+
329
+ legs = {}
330
+ ## map month names
331
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
332
+ date = {}
333
+ date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
334
+ date[:d] = m[:day1].to_i(10)
335
+ legs[:date1] = date
336
+
337
+ date = {}
338
+ date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
339
+ date[:d] = m[:day2].to_i(10)
340
+ date[:yy] = m[:yy2].to_i(10) if m[:yy2] ## two digit year (e.g. 25 or 78 etc.)
341
+ legs[:date2] = date
342
+
343
+ legs
344
+ end
345
+
346
+
347
+
348
+ def _build_date_list( m )
349
+ ## quick fix for undefined group name reference
350
+ m = m.named_captures.transform_keys(&:to_sym) if m.is_a?(MatchData)
351
+
352
+ list = {}
353
+ ## map month names
354
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
355
+ date = {}
356
+ date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
357
+ date[:d] = m[:day1].to_i(10)
358
+ list[:date1] = date
359
+
360
+ date = {}
361
+ date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
362
+ date[:d] = m[:day2].to_i(10)
363
+ list[:date2] = date
364
+
365
+ date = {}
366
+ date[:m] = MONTH_MAP[ m[:month_name3].downcase ] if m[:month_name3]
367
+ date[:d] = m[:day3].to_i(10)
368
+ list[:date3] = date
369
+
370
+ if m[:day4]
371
+ date = {}
372
+ date[:m] = MONTH_MAP[ m[:month_name4].downcase ] if m[:month_name4]
373
+ date[:d] = m[:day4].to_i(10)
374
+ list[:date4] = date
375
+ end
376
+
377
+ list
378
+ end
379
+
380
+
381
+ def _build_date_range( m )
382
+ ## quick fix for undefined group name reference
383
+ m = m.named_captures.transform_keys(&:to_sym) if m.is_a?(MatchData)
384
+
385
+ range = {}
386
+ ## map month names
387
+ ## note - allow any/upcase JULY/JUL etc. thus ALWAYS downcase for lookup
388
+ date = {}
389
+ date[:m] = MONTH_MAP[ m[:month_name1].downcase ]
390
+ date[:d] = m[:day1].to_i(10)
391
+ range[:date1] = date
392
+
393
+ date = {}
394
+ date[:m] = MONTH_MAP[ m[:month_name2].downcase ] if m[:month_name2]
395
+ date[:d] = m[:day2].to_i(10)
396
+ range[:date2] = date
397
+
398
+ range
399
+ end
400
+
401
+
402
+ FMT_DAY_NAMES = [
403
+ nil, ## or use '!ERROR!' - why? why not?
404
+ 'Mon', # 1
405
+ 'Tue', # 2
406
+ 'Wed', # 3
407
+ 'Thu', # 4
408
+ 'Fri', # 5
409
+ 'Sat', # 6
410
+ 'Sun', # 7
411
+ ]
412
+ FMT_MONTH_NAMES = [
413
+ nil, ## or use '!ERROR!' - why? why not?
414
+ 'Jan', # 1
415
+ 'Feb', # 2
416
+ 'Mar', # 3
417
+ 'Apr', # 4
418
+ 'May', # 5
419
+ 'Jun', # 6
420
+ 'Jul', # 7
421
+ 'Aug', # 8
422
+ 'Sep', # 9
423
+ 'Oct', # 10
424
+ 'Nov', # 11
425
+ 'Dec', # 12
426
+ ]
427
+
428
+
429
+
430
+ def _fmt_date( date, format: nil ) ### use format: 'numeric' for 23/7 or 23/7/2010 etc.
431
+ buf = String.new
432
+
433
+ if format && format.downcase == 'numeric'
434
+ buf << "#{date[:d]}/#{date[:m]}"
435
+
436
+ if date[:y] ## (optional) four-digit year e.g. 2010
437
+ buf << "/#{date[:y]}"
438
+ elsif date[:yy] ## (optional) two-digit year e.g. 98
439
+ buf << ("/%02d" % date[:yy]) ## note - make sure 0,1,2 become 00, 01, 02
440
+ end
441
+
442
+ buf
443
+ else ## use Fri Feb 7 2025
444
+ ## check for "canonical" convention for around/ca. date or such
445
+ buf << "c. " if date[:around]
446
+
447
+ buf << "#{FMT_DAY_NAMES[date[:wday]]} " if date[:wday]
448
+ buf << "#{FMT_MONTH_NAMES[date[:m]]} "
449
+ buf << "#{date[:d]}"
450
+
451
+
452
+ if date[:y]
453
+ buf << " #{date[:y]}"
454
+ elsif date[:yy]
455
+ ## note - expand two-digit year to four-digit year
456
+ buf << if date[:yy] < 30
457
+ ## note - make sure 0,1,2 become 00, 01, 02
458
+ " 20%02d" % date[:yy] ## 2000, 2001, .. 2029
459
+ else
460
+ " 19%02d" % date[:yy] ## 1930, 1931 .. 1999
461
+ end
462
+ end
463
+
464
+ buf
465
+ end
466
+
467
+ buf
468
+ end
469
+
470
+ def _fmt_date_legs( legs, format: nil ) ### use format: 'numeric' for 23/7 or 23/7/2010 etc.
471
+ buf = String.new
472
+
473
+ buf << "#{FMT_MONTH_NAMES[legs[:date1][:m]]} "
474
+ buf << "#{legs[:date1][:d]}"
475
+ buf << " & "
476
+ buf << "#{FMT_MONTH_NAMES[legs[:date2][:m]]} " if legs[:date2][:m]
477
+ buf << "#{legs[:date2][:d]}"
478
+
479
+ if legs[:date2][:yy]
480
+ ## note - expand two-digit year to four-digit year
481
+ buf << if legs[:date2][:yy] < 30
482
+ ## note - make sure 0,1,2 become 00, 01, 02
483
+ " 20%02d" % legs[:date2][:yy] ## 2000, 2001, .. 2029
484
+ else
485
+ " 19%02d" % legs[:date2][:yy] ## 1930, 1931 .. 1999
486
+ end
487
+ end
488
+
489
+ buf
490
+ end
491
+
492
+
493
+ def _fmt_date_list( list, format: nil ) ### use format: 'numeric' for 23/7 or 23/7/2010 etc.
494
+ buf = String.new
495
+
496
+ buf << "#{FMT_MONTH_NAMES[list[:date1][:m]]} "
497
+ buf << "#{list[:date1][:d]}"
498
+
499
+ if list[:date2][:m] ## add extra space if month present
500
+ buf << "; #{FMT_MONTH_NAMES[list[:date2][:m]]} "
501
+ else
502
+ buf << ","
503
+ end
504
+ buf << "#{list[:date2][:d]}"
505
+
506
+ if list[:date3][:m] ## add extra space if month present
507
+ buf << "; #{FMT_MONTH_NAMES[list[:date3][:m]]} "
508
+ else
509
+ buf << ","
510
+ end
511
+ buf << "#{list[:date3][:d]}"
512
+
513
+ if list[:date4]
514
+ if list[:date4][:m] ## add extra space if month present
515
+ buf << "; #{FMT_MONTH_NAMES[list[:date4][:m]]} "
516
+ else
517
+ buf << ","
518
+ end
519
+ buf << "#{list[:date4][:d]}"
520
+ end
521
+
522
+
523
+ buf
524
+ end
525
+
526
+
527
+ def _fmt_date_range( range, format: nil ) ### use format: 'numeric' for 23/7 or 23/7/2010 etc.
528
+ buf = String.new
529
+
530
+ buf << "#{FMT_MONTH_NAMES[range[:date1][:m]]} "
531
+ buf << "#{range[:date1][:d]}"
532
+ buf << "-"
533
+ buf << "#{FMT_MONTH_NAMES[range[:date2][:m]]} " if range[:date2][:m]
534
+ buf << "#{range[:date2][:d]}"
535
+
536
+ buf
537
+ end
538
+
539
+
540
+ end ## class Fmtfix
541
+ end ## module Rsssf
@@ -0,0 +1,63 @@
1
+ module Rsssf
2
+ class Fmtfix
3
+
4
+
5
+ ### todo/fix
6
+ ## make more (re)usable instead of copy-n-paste here
7
+
8
+
9
+ def self.parse_names( txt )
10
+ lines = [] # array of lines (with words)
11
+
12
+ txt.each_line do |line|
13
+ line = line.strip
14
+
15
+ next if line.empty?
16
+ next if line.start_with?( '#' ) ## skip comments too
17
+
18
+ ## strip inline (until end-of-line) comments too
19
+ ## e.g. Janvier Janv Jan ## check janv in use??
20
+ ## => Janvier Janv Jan
21
+
22
+ line = line.sub( /#.*/, '' ).strip
23
+ ## pp line
24
+
25
+ values = line.split( /[ \t]+/ )
26
+ ## pp values
27
+
28
+ ## todo/fix -- add check for duplicates
29
+ lines << values
30
+ end
31
+ lines
32
+
33
+ end # method parse
34
+
35
+
36
+ def self.build_names( lines )
37
+ ## join all words together into a single string e.g.
38
+ ## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
39
+ lines.map { |line| line.join('|') }.join('|')
40
+ end
41
+
42
+
43
+ def self.build_map( lines,
44
+ downcase: false )
45
+ ## note: downcase name!!!
46
+ ## build a lookup map that maps the word to the index (line no) plus 1 e.g.
47
+ ## {"january" => 1, "jan" => 1,
48
+ ## "february" => 2, "feb" => 2,
49
+ ## "march" => 3, "mar" => 3,
50
+ ## "april" => 4, "apr" => 4,
51
+ ## "may" => 5,
52
+ ## "june" => 6, "jun" => 6, ...
53
+ lines.each_with_index.reduce( {} ) do |h,(line,i)|
54
+ line.each do |name|
55
+ h[ downcase ? name.downcase : name ] = i+1
56
+ end ## note: start mapping with 1 (and NOT zero-based, that is, 0)
57
+ h
58
+ end
59
+ end
60
+
61
+
62
+ end ## class Fmtfix
63
+ end ## module Rsssf
@@ -0,0 +1,44 @@
1
+ module Rsssf
2
+ class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
3
+
4
+
5
+
6
+
7
+ ERRATAS = {
8
+ ## in austria
9
+ ## avoid confusion with /DD is year!!!
10
+ ## maybe make it a switch to turn on
11
+ '[Nov 13/14]' => '[Nov 13,14]',
12
+ '[Mar 25/26]' => '[Mar 25,26]',
13
+ '[Aug 12/13]' => '[Aug 12,13]',
14
+
15
+
16
+
17
+ ## "classic" typos
18
+ ## month
19
+ '[Niv 8]' => '[Nov 8]',
20
+ '[Mov 7]' => '[Nov 7]',
21
+ '[Mov 26]' => '[Nov 26]',
22
+ ## double brackets
23
+ '[Apr 15]]' => '[Apr 15]',
24
+ "[[36' Hansen, 58' Glasner]" => "[36' Hansen, 58' Glasner]",
25
+ ### more
26
+ ' att; ' => ' att: ' ## e.g. Wembley; att; 11,689
27
+
28
+ }
29
+
30
+
31
+
32
+
33
+ def handle_errata_txt( txt )
34
+ ERRATAS.each do |errata,replace|
35
+ txt = txt.gsub( errata, replace )
36
+ end
37
+
38
+ txt
39
+ end
40
+
41
+
42
+
43
+ end ## class Fmtfix
44
+ end ## module Rsssf
@@ -0,0 +1,68 @@
1
+
2
+ module Rsssf
3
+ class Fmtfix ## todo: find a better name e.g. Format or Fixer or ??
4
+
5
+
6
+
7
+
8
+
9
+ def autofix( txt )
10
+
11
+ ##
12
+ ## make sure no tabs (expand to two spaces)
13
+ txt = txt.gsub( "\t", ' ' )
14
+ txt = txt.gsub( "\r\n", "\n" ) ## unify newline
15
+
16
+ ## fix unicode space !! use code point!!
17
+ txt = txt.gsub( /[ ]/, ' ' )
18
+
19
+
20
+
21
+
22
+ txt = handle_tables( txt ) ## e.g. final/halfway table (aka standings)
23
+ txt = handle_topscorers( txt )
24
+
25
+
26
+ txt = handle_errata_txt( txt )
27
+
28
+
29
+
30
+ #####
31
+ ## line-by-line processing / matching
32
+
33
+ newtxt = String.new
34
+ txt.each_line do |line|
35
+ ## check if line incl. newline? - yes
36
+
37
+ ## note - handle_header returns nil if no match
38
+ ## otherwise the reformatted (new) line !!!
39
+ newline = handle_header( line.rstrip )
40
+
41
+ newtxt << (newline ? newline : line)
42
+ end
43
+
44
+ txt = newtxt
45
+
46
+
47
+ txt = handle_score( txt )
48
+
49
+
50
+
51
+ txt = handle_goals( txt )
52
+
53
+
54
+ ###
55
+ ## todo
56
+ ## fix subs in lineup in oost00.txt
57
+ # Salzburg: Safar - Szewczyk (97./Lipcsei) - Winklhofer, C.Jank - Laessig,
58
+ # Hütter (71./Meyssen) - Nikolic, Aufhauser, Kitzbichler - Struber,
59
+ # Polster (56./Sabitzer)
60
+
61
+
62
+
63
+ txt
64
+ end
65
+
66
+
67
+ end ## class Fmtfix
68
+ end ## module Rsssf