rsssf 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +5 -5
  2. data/{HISTORY.md → CHANGELOG.md} +4 -0
  3. data/Manifest.txt +41 -7
  4. data/README.md +93 -71
  5. data/Rakefile +8 -7
  6. data/config/groups_en.txt +44 -0
  7. data/config/rounds_en.txt +283 -0
  8. data/config/rounds_es.txt +20 -0
  9. data/config/rounds_misc.txt +7 -0
  10. data/lib/_cocos_.rb +158 -0
  11. data/lib/rsssf/convert/convert.rb +71 -0
  12. data/lib/rsssf/convert/errata.rb +103 -0
  13. data/lib/rsssf/convert/html_entities.rb +150 -0
  14. data/lib/rsssf/convert/html_to_txt/beautify_anchors.rb +96 -0
  15. data/lib/rsssf/convert/html_to_txt/make_heading.rb +70 -0
  16. data/lib/rsssf/convert/html_to_txt/remove_emails.rb +43 -0
  17. data/lib/rsssf/convert/html_to_txt/replace_a_href.rb +85 -0
  18. data/lib/rsssf/convert/html_to_txt/replace_a_name.rb +87 -0
  19. data/lib/rsssf/convert/html_to_txt/replace_heading.rb +76 -0
  20. data/lib/rsssf/convert/html_to_txt/replace_hr.rb +25 -0
  21. data/lib/rsssf/convert/html_to_txt.rb +247 -0
  22. data/lib/rsssf/download.rb +20 -0
  23. data/lib/rsssf/fmtfix/dates.rb +541 -0
  24. data/lib/rsssf/fmtfix/dates_helpers.rb +63 -0
  25. data/lib/rsssf/fmtfix/errata.rb +44 -0
  26. data/lib/rsssf/fmtfix/fmtfix-base.rb +68 -0
  27. data/lib/rsssf/fmtfix/fmtfix.rb +101 -0
  28. data/lib/rsssf/fmtfix/goals.rb +173 -0
  29. data/lib/rsssf/fmtfix/headers.rb +326 -0
  30. data/lib/rsssf/fmtfix/outline.rb +228 -0
  31. data/lib/rsssf/fmtfix/patch_headings.rb +141 -0
  32. data/lib/rsssf/fmtfix/rounds.rb +74 -0
  33. data/lib/rsssf/fmtfix/score.rb +92 -0
  34. data/lib/rsssf/fmtfix/tables.rb +316 -0
  35. data/lib/rsssf/fmtfix/topscorers.rb +50 -0
  36. data/lib/rsssf/page-find_schedule.rb +127 -0
  37. data/lib/rsssf/page-meta.rb +68 -0
  38. data/lib/rsssf/page.rb +125 -238
  39. data/lib/rsssf/parse_schedules.rb +34 -0
  40. data/lib/rsssf/prepare/convert-links.rb +77 -0
  41. data/lib/rsssf/prepare/convert-meta.rb +111 -0
  42. data/lib/rsssf/prepare/convert-navlines.rb +154 -0
  43. data/lib/rsssf/prepare/convert-postproc.rb +141 -0
  44. data/lib/rsssf/prepare/convert.rb +100 -0
  45. data/lib/rsssf/prepare/download.rb +40 -0
  46. data/lib/rsssf/project.rb +154 -0
  47. data/lib/rsssf/reports/page.rb +66 -23
  48. data/lib/rsssf/reports/schedule.rb +89 -40
  49. data/lib/rsssf/schedule.rb +4 -14
  50. data/lib/rsssf/utils.rb +37 -45
  51. data/lib/rsssf/version.rb +7 -6
  52. data/lib/rsssf.rb +82 -19
  53. metadata +68 -26
  54. data/.gemtest +0 -0
  55. data/lib/rsssf/fetch.rb +0 -80
  56. data/lib/rsssf/html2txt.rb +0 -157
  57. data/lib/rsssf/patch.rb +0 -28
  58. data/lib/rsssf/repo.rb +0 -220
  59. data/test/helper.rb +0 -12
  60. data/test/test_utils.rb +0 -83
@@ -0,0 +1,20 @@
1
+
2
+
3
+
4
+ ## used in mexico
5
+
6
+ Playoffs [ ] \( Liguilla \)
7
+
8
+ Recalificación
9
+
10
+
11
+ ## used in argentina
12
+
13
+ Round·of·64·-·32·avos·de·final
14
+ Round·of·32·-·16·avos·de·final
15
+ Round·of·16·-·Octavos·de·Final
16
+ Quarter·finals·-·Cuartos·de·final
17
+ Semi·finals·-·Semifinales
18
+
19
+ Primera·fase·de·zonas·-·Phase·of·groups
20
+
@@ -0,0 +1,7 @@
1
+
2
+
3
+
4
+ # in cz
5
+
6
+ Play-off·o·umístění
7
+ Skupina·o·záchranu
data/lib/_cocos_.rb ADDED
@@ -0,0 +1,158 @@
1
+ ###
2
+ ## move "upstream" to cocos for sharing
3
+
4
+
5
+ ##
6
+ ## note - use File.file? instead of File.exist?
7
+ ## (checks if file exists AND file is a file NOT a directory)
8
+ ##
9
+ ## todo/fix - add an option to check if file found or not
10
+ ## return nil if not found or such
11
+ ##
12
+ ## use find_file! and find_file or such - why? why not?
13
+
14
+
15
+ =begin
16
+ def find_file(name, path: [])
17
+ path.each do |dir|
18
+ full = File.join(dir, name)
19
+ return full if File.exist?(full)
20
+ end
21
+ nil
22
+ end
23
+
24
+ def find_file!(name, path: [])
25
+ find_file(name, path:) or
26
+ raise Errno::ENOENT, ""
27
+ end
28
+
29
+ plus add option - raise_on_error: false - why? why not?
30
+ def find_file! - find_file( raise_on_error: false )
31
+
32
+ =end
33
+
34
+
35
+
36
+
37
+ def find_file!( name, path: )
38
+ filepath = find_file( name, path: path )
39
+ raise Errorno::ENOENT, "file <#{name}> not found; looking in path #{path.inspect}" if filepath.nil?
40
+ filepath
41
+ end
42
+
43
+ ##
44
+ ## note - find_file will NOT find directories!!!
45
+ ## File.file? will only check if a file (not directory) exits!!
46
+
47
+ def find_file( name, path: )
48
+ return name if File.file?( name )
49
+
50
+ path.each do |dir|
51
+ filepath = File.join( dir, name )
52
+ return filepath if File.file?( filepath )
53
+ end
54
+
55
+ nil ## return nil if not found
56
+ end
57
+
58
+
59
+
60
+
61
+ ####
62
+ # parse/find_patterns
63
+
64
+ ## use/rename to VARDEF_LINE or such - why? why not?
65
+ VARDEF_RE = %r{\A
66
+ [ ]*
67
+ \$(?<key> [a-z][a-z0-9_]*)
68
+ [ ]*
69
+ =
70
+ [ ]*
71
+ (?<value> .+?) ## eat-up (non-greedy) the rest until end-of-line
72
+ [ ]*
73
+ \z
74
+ }ix
75
+
76
+ VAR_RE = %r{ \$(?<key> [a-z][a-z0-9_]*)
77
+ \b
78
+ }ix
79
+
80
+
81
+
82
+
83
+ def read_patterns( path )
84
+ parse_patterns( read_text( path ))
85
+ end
86
+
87
+ def parse_patterns( txt )
88
+
89
+ ## norm newline (windows cr/lf \r\n) to (lf - \n)
90
+ txt = txt.gsub( /\r\n/, "\n" )
91
+
92
+ ### check for line continuations with backslash (\)
93
+ ## note - allow spaces before newline
94
+ txt = txt.gsub( /\\[ ]*$\n/, '' )
95
+
96
+
97
+ vars = {}
98
+ names = [] # array of lines (with words)
99
+ txt.each_line do |line|
100
+ line = line.strip
101
+
102
+ next if line.empty?
103
+ next if line.start_with?( '#' ) ## skip comments too
104
+
105
+ break if line == '__END__'
106
+
107
+ ## strip inline (until end-of-line) comments too
108
+ ## e.g. Janvier Janv Jan ## check janv in use??
109
+ ## => Janvier Janv Jan
110
+
111
+ line = line.sub( /#.*/, '' ).strip
112
+ ## pp line
113
+
114
+ ###
115
+ ## check for variable defs
116
+ if m = VARDEF_RE.match( line )
117
+ vars[ m[:key].downcase ] = m[:value ]
118
+ next
119
+ end
120
+
121
+ line = line.gsub( VAR_RE ) do |_|
122
+ m = Regexp.last_match
123
+ key = m[:key].downcase
124
+
125
+ value = vars[key]
126
+ raise ArgumentError, "subvars - no vardef found for key >#{key}<" if value.nil?
127
+ value
128
+ end
129
+
130
+ ### use squish - remove more than one inline space
131
+ line = line.gsub( /[ ]{2,}/, ' ' )
132
+
133
+
134
+ ## open paren (use for grouping to non-capture grouping) e.g.
135
+ ## () => (?: )
136
+ ## note - do NOT replace escaped /( !!!
137
+ ## e.g. playoffs (liguilla)
138
+ line = line.gsub( / ## negative lookbehind
139
+ (?<! \\)
140
+ \(
141
+ /x, '(?: ')
142
+
143
+ ## expand space shortcuts
144
+ ## replace Middle Dot (·) Unicode: U+00B7 or
145
+ ## White Square (□) Unicode: U+25A1 or
146
+ ## White Small Square (▫) Unicode: U+25AB
147
+ ## Open Box (␣) Unicode: U+2423 or
148
+ ##
149
+ ## add more - why? why not?
150
+
151
+
152
+ line = line.gsub( /[·□▫␣]/, ' [ ] ' )
153
+
154
+
155
+ names << line
156
+ end
157
+ names
158
+ end
@@ -0,0 +1,71 @@
1
+
2
+ module Rsssf
3
+ class PageConverter
4
+
5
+ ## convenience helper
6
+ def self.convert( html, url: )
7
+ @@converter ||= new ## use a "shared" built-in converter
8
+ @@converter.convert( html, url: url )
9
+ end
10
+
11
+
12
+
13
+
14
+ ##
15
+ ## add anchor: options or such
16
+ ## lets you toggle adding anchors (§premier etc.) - why? why not?
17
+
18
+ def convert( html, url: )
19
+ ### todo/fix: first check if html is all ascii-7bit e.g.
20
+ ## includes only chars from 64 to 127!!!
21
+
22
+ ## normalize newlines
23
+ ## replace \r\n (form feed \r) used by Windows - ff+lf;
24
+ ## just use \n (new line a.k.a. line feed)
25
+ html = html.gsub( "\r\n", "\n" )
26
+
27
+ ## convert tabs to two spaces (or use four??)
28
+ html = html.gsub( "\t", ' ' )
29
+
30
+
31
+
32
+ html = convert_html_entities( html, url: url )
33
+
34
+ ###################################
35
+ ### smart quotes quick fixes
36
+ ### convert all "smart" quote to (standard) single and double quotes
37
+ ## D´Alessandro => D'Alessandro
38
+ ## 81´ and 88' => 81' and 88'
39
+
40
+
41
+ html = html.gsub( /[´’‘]/, "'" )
42
+ html = html.gsub( /[“”]/, '"' )
43
+
44
+ ### convert fancy (unicode) dashes/hyphens to plain dash/hyphen
45
+ html = html.gsub( '–', '-' )
46
+
47
+
48
+
49
+ txt = html_to_txt( html, url: url )
50
+ txt
51
+ end ## method convert
52
+
53
+
54
+
55
+
56
+ ###################
57
+ # more helpers
58
+ def self.log( msg )
59
+ ## append msg to ./logs.txt
60
+ ## use ./errors.txt - why? why not?
61
+ File.open( './logs.txt', 'a:utf-8' ) do |f|
62
+ f.write( msg )
63
+ f.write( "\n" )
64
+ end
65
+ end
66
+ def log( msg ) self.class.log( msg ); end
67
+
68
+
69
+
70
+ end # module PageConverter
71
+ end # module Rsssf
@@ -0,0 +1,103 @@
1
+ module Rsssf
2
+ class PageConverter
3
+
4
+
5
+ ##
6
+ ## todo/fix/fix/fix
7
+ ## add filenames/urls for quick fixes!!!
8
+
9
+
10
+ def self.errata_html( html )
11
+ ## auto-fix known typos / errors
12
+ ### kind of PRE-processing, see errata_txt for POST-processing
13
+ ### check - rename to errata_pre/post - why? why not?
14
+
15
+
16
+ ## quick fix - rm </ADDRESS>
17
+ ## </ADDRESS>
18
+ ## tablesb/braz94.html
19
+ html = html.gsub( '</ADDRESS>', '' )
20
+
21
+ ## quick fix </a => </a>
22
+ ## <a href="#play6">Gold League (Calle 6)</a
23
+ ## <a href="#zpl">PBZ Premier League 2025/26</a
24
+ ## <a href="#lig1">Championnat National Ligue 1</a
25
+
26
+ html = html.gsub( /<\/A
27
+ (?! [ ]*>) ## negative lookahead
28
+ /ix, '</A>' )
29
+
30
+ ## quick fix </br> => <br>
31
+ html = html.gsub( /<\/BR>/i, '<BR>' )
32
+
33
+
34
+
35
+ ## quick fix - change typo <H1></H2>
36
+ ## tables/58full.html
37
+ html = html.gsub( '<H1>Quarterfinals</H2>', '<H2>Quarterfinals</H2>' )
38
+
39
+ ## quick fix - change typo <M>,<N> to <B>
40
+ ## tables/54full.html
41
+ html = html.gsub( '<M>MEX</B>', '<B>MEX</B>' )
42
+ ## tables/58full.html
43
+ html = html.gsub( '<N>CZE</B>', '<B>CZE</B>' )
44
+
45
+
46
+ ## quick fix -
47
+ ## tablesb/braz88.html
48
+ html = html.gsub( '<</TITLE>', '</TITLE>' )
49
+
50
+
51
+ ## quick fix
52
+ ## hr (horizontal rule) via img
53
+ ## in tables/30full.html and others
54
+ ##
55
+ ## <IMG SRC="xshadow.gif.pagespeed.ic.AbdeNVcmzw.png" ALT="-----------">
56
+ ## look for
57
+ ## <IMG ALT="---">
58
+ html = html.gsub( /<IMG
59
+ [^>]+?
60
+ ALT="-{3,}"
61
+ >/ixm, '<HR>' )
62
+
63
+
64
+ html
65
+ end
66
+ def errata_html( html ) self.class.errata_html( html ); end
67
+
68
+
69
+ def self.errata_html_entities( html )
70
+ ########
71
+ ## typos / autofix - keep - why? why not?
72
+ html = html.gsub( "&oulm;", 'ö' ) ## support typo in entity (&ouml;)
73
+ html = html.gsub( "&uml;", 'ü' ) ## support typo in entity (&uuml;) - why? why not?
74
+ html = html.gsub( "&slig;", "ß" ) ## support typo in entity (&szlig;)
75
+ html = html.gsub( "&aaacute;", "á" ) ## typo for &aacute;
76
+ html = html.gsub( "&nitlde;", "ñ" ) ## typ for &ntilde;
77
+ html
78
+ end
79
+ def errata_html_entities( html ) self.class.errata_html_entities( html ); end
80
+
81
+
82
+
83
+ def errata_txt( txt )
84
+ ## kind-of POST-processing, see errata_html for PRE-processing
85
+
86
+ ## quick fix - squish spaces (to single)
87
+ ## tables/82full.html
88
+ txt = txt.gsub( 'Second phase', 'Second phase' )
89
+
90
+
91
+ ## quick fix - add (missing) closing bracket (])
92
+ ## tables/70q.html
93
+ txt = txt.gsub(/^South America Group 10 \[Brazil$/,
94
+ 'South America Group 10 [Brazil]' )
95
+
96
+
97
+ txt
98
+ end
99
+
100
+
101
+ end # module PageConverter
102
+ end # module Rsssf
103
+
@@ -0,0 +1,150 @@
1
+
2
+ module Rsssf
3
+ class PageConverter
4
+
5
+
6
+
7
+ ENTITIES = %w[
8
+ À &Agrave;
9
+ Á &Aacute;
10
+ Â &Acirc;
11
+ Ã &Atilde;
12
+ Ä &Auml;
13
+ Å &Aring;
14
+
15
+ à &agrave;
16
+ á &aacute;
17
+ â &acirc;
18
+ ã &atilde;
19
+ ä &auml;
20
+ å &aring;
21
+ Æ &AElig;
22
+ æ &aelig;
23
+ ß &szlig;
24
+ Ç &Ccedil;
25
+ ç &ccedil;
26
+ È &Egrave;
27
+ É &Eacute;
28
+ Ê &Ecirc;
29
+ Ë &Euml;
30
+ è &egrave;
31
+ é &eacute;
32
+ ê &ecirc;
33
+ ë &euml;
34
+
35
+ ð &eth;
36
+
37
+ Ì &Igrave;
38
+ Í &Iacute;
39
+ Î &Icirc;
40
+ Ï &Iuml;
41
+ ì &igrave;
42
+ í &iacute;
43
+ î &icirc;
44
+ ï &iuml;
45
+ Ñ &Ntilde;
46
+ ñ &ntilde;
47
+ Ò &Ograve;
48
+ Ó &Oacute;
49
+ Ô &Ocirc;
50
+ Õ &Otilde;
51
+ Ö &Ouml;
52
+ ò &ograve;
53
+ ó &oacute;
54
+ ô &ocirc;
55
+ õ &otilde;
56
+ ö &ouml;
57
+ Ø &Oslash;
58
+ ø &oslash;
59
+ Ù &Ugrave;
60
+ Ú &Uacute;
61
+ Û &Ucirc;
62
+ Ü &Uuml;
63
+ ù &ugrave;
64
+ ú &uacute;
65
+ û &ucirc;
66
+ ü &uuml;
67
+ Ý &Yacute;
68
+ ý &yacute;
69
+ ÿ &yuml;
70
+
71
+ < &lt;
72
+ > &gt;
73
+ & &amp;
74
+ © &copy;
75
+ ® &reg;
76
+
77
+ ]
78
+
79
+
80
+
81
+ def self.convert_html_entities( html, url: nil )
82
+ ## check for html entities
83
+ html = html.gsub( "&auml;", 'ä' )
84
+ html = html.gsub( "&ouml;", 'ö' )
85
+ html = html.gsub( "&uuml;", 'ü' )
86
+ html = html.gsub( "&Auml;", 'Ä' )
87
+ html = html.gsub( "&Ouml;", 'Ö' )
88
+ html = html.gsub( "&Uuml;", 'Ü' )
89
+ html = html.gsub( "&szlig;", 'ß' )
90
+
91
+
92
+ html = errata_html_entities( html )
93
+
94
+
95
+ ENTITIES.each_slice(2) do |str, entity|
96
+ html = html.gsub( entity, str )
97
+ end
98
+
99
+
100
+
101
+ ##############
102
+ ## check for more entities
103
+ ## limit &---; to length 10 - why? why not?
104
+
105
+
106
+ ## check for decimal entities (mapping 1:1 to unicode)
107
+ html = html.gsub(/&#(\d+);/) do |match|
108
+ uni = if match == '&#307;' ## use like Van D&#307;k -> Van Dijk
109
+ 'ij'
110
+ else
111
+ [$1.to_i].pack("U")
112
+ end
113
+
114
+ ##puts " converting numeric html entity #{match} to unicode char #{uni}"
115
+
116
+ uni
117
+ end
118
+
119
+
120
+ html = html.gsub( /&[^; ]{1,10};/) do |match|
121
+ ## ignore weird edge case of &A;
122
+ ## e.g. [M&A; moved from pool B] - where M&A is name of club
123
+ ##
124
+ ## in ital03.html:
125
+ ### [Eugenio Corini 22pen&36pen; Christian Vieri 69]
126
+ ## Francesco Totti 31, Vincenzo Montella 49&68; Antonio Di Natale 11]
127
+
128
+ if match == '&A;' ||
129
+ match == '&36pen;' || match == '&68;'
130
+ else
131
+ msg = "found unencoded html entity #{match}"
132
+ msg += " in >#{url}<" if url
133
+
134
+ puts "*** WARN - #{msg}"
135
+ log( msg ) ## log too (see log.txt)
136
+ end
137
+
138
+ match ## pass through as is (1:1)
139
+ end
140
+
141
+ html
142
+ end
143
+ def convert_html_entities( html, url: nil ) self.class.convert_html_entities( html, url: url ); end
144
+
145
+
146
+
147
+
148
+ end # module PageConverter
149
+ end # module Rsssf
150
+
@@ -0,0 +1,96 @@
1
+ module Rsssf
2
+ class PageConverter
3
+
4
+
5
+ def beautify_anchors( html )
6
+ ## beautify
7
+ ## ‹§2fin›
8
+ ##
9
+ ## == Semifinals
10
+ ##
11
+ ## merge anchor (a name) with heading into one line e.g.
12
+ ## =>
13
+ ## == Semifinals ‹§2fin›
14
+
15
+ html = html.gsub( /\s*
16
+ (?<name>‹§
17
+ [^›]+?
18
+ ›)
19
+ \s*
20
+ (?<heading>={2,}
21
+ [^=\n]+?
22
+ )
23
+ \n
24
+ \s*/ixm ) do |match|
25
+
26
+ m = Regexp.last_match
27
+
28
+ match = match.gsub( "\n", '$$' ) ## make newlines visible for debugging
29
+ puts " mergeing anchor (a name) with heading into one line - >#{match}<"
30
+
31
+ "\n\n#{m[:heading]} #{m[:name]}\n\n"
32
+ end
33
+
34
+ ###
35
+ ##
36
+ ## beautify
37
+ ## ‹§argsquad›Argentine Squad Full Info
38
+ ## ‹§eng›ENGLAND
39
+ ##
40
+ ##
41
+ ## reformat anchor (a name) start line with text e.g.
42
+ ## =>
43
+ ## Argentine Squad Full Info ‹§argsquad›
44
+ ## ENGLAND ‹§eng›
45
+
46
+ html = html.gsub( /\n
47
+ (?<name>‹§
48
+ [^›]+?
49
+ ›)
50
+ [ ]*
51
+ (?<text>[^\n]+?
52
+ )
53
+ \n
54
+ /ixm ) do |match|
55
+
56
+ m = Regexp.last_match
57
+
58
+ match = match.gsub( "\n", '$$' ) ## make newlines visible for debugging
59
+ puts " move anchor (a name) starting line with text to end - >#{match}<"
60
+
61
+ "\n#{m[:text]} #{m[:name]}\n"
62
+ end
63
+
64
+ ###
65
+ ## beautify heading
66
+ ## ==== ‹§gra›Group A
67
+ ## =>
68
+ ## ==== Group A ‹§gra›
69
+
70
+ html = html.gsub( /\n
71
+ (?<heading_marker>
72
+ ={2,})
73
+ [ ]*
74
+ (?<name>‹§
75
+ [^›]+?
76
+ ›)
77
+ [ ]*
78
+ (?<heading_text>[^\n]+?
79
+ )
80
+ \n
81
+ /ixm ) do |match|
82
+
83
+ m = Regexp.last_match
84
+
85
+ match = match.gsub( "\n", '$$' ) ## make newlines visible for debugging
86
+ puts " move anchor (a name) in heading to end - >#{match}<"
87
+
88
+ "\n#{m[:heading_marker]} #{m[:heading_text]} #{m[:name]}\n"
89
+ end
90
+ html
91
+ end
92
+
93
+
94
+ end # module PageConverter
95
+ end # module Rsssf
96
+
@@ -0,0 +1,70 @@
1
+
2
+
3
+ ###
4
+ ## <b><a name="fall">Opening Season 2024</a></b> => <hb> ... </hb>
5
+ ## <u><a name="fplay">Playoff Stage</a></u> => <hu> ... </hu>
6
+ ##
7
+ ## (inofficial) heading "bold", heading "underscore"
8
+ ## note - MUST be one single "stand-alone" line (in pre block) !!!
9
+
10
+ =begin
11
+ BU_ANAME_LINE_RE = %r{^ [ ]* < (?<tag>B|U) >
12
+ [ ]* (?<text>
13
+ <A [ ]+ NAME
14
+ .+?
15
+ </A>
16
+ )
17
+ [ ]* </ \k<tag> >
18
+ [ ]*
19
+ $}ix
20
+ =end
21
+
22
+
23
+ ## scan for now only (do NOT replace)
24
+ BOLD_OR_UNDERLINE_LINE_RE = %r{^ [ ]* < (?<tag> [BU]) >
25
+ [ ]* (?<text>
26
+ .+? ## note - use non-greedy match
27
+ )
28
+ [ ]* </ \k<tag> >
29
+ [ ]*
30
+ $}ix
31
+
32
+
33
+
34
+ def make_heading( html )
35
+ edits = []
36
+
37
+ html = html.gsub( BOLD_OR_UNDERLINE_LINE_RE ) do |match|
38
+ m = Regexp.last_match
39
+
40
+ tag = m[:tag].downcase
41
+ text = m[:text]
42
+
43
+ if text.downcase.start_with?( '<a name' )
44
+ msg = "make heading (h#{tag}) out of #{tag}-enclosed a name in line >#{text}<"
45
+ puts " #{msg}"
46
+
47
+ ## note - edit line MUST start with --
48
+ ## might be multi-line
49
+ edits << "-- #{msg}"
50
+
51
+ "<h#{tag}>#{text}</h#{tag}>"
52
+ else
53
+ ## note - skip (false positive) copyright line (in about this document)
54
+ ## (C) Copyright RSSSF
55
+ ## Copyright
56
+ if %r{copyright}i.match?( text )
57
+ else
58
+ msg = "found #{tag}-enclosed line >#{text}< - heading?"
59
+ puts " #{msg}"
60
+
61
+ edits << "-- #{msg}"
62
+ end
63
+
64
+ match ## keep as is (do NOT change)
65
+ end
66
+ end
67
+
68
+
69
+ [html, edits]
70
+ end