rsssf 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +4 -0
- data/Manifest.txt +41 -7
- data/README.md +93 -71
- data/Rakefile +8 -7
- data/config/groups_en.txt +44 -0
- data/config/rounds_en.txt +283 -0
- data/config/rounds_es.txt +20 -0
- data/config/rounds_misc.txt +7 -0
- data/lib/_cocos_.rb +158 -0
- data/lib/rsssf/convert/convert.rb +71 -0
- data/lib/rsssf/convert/errata.rb +103 -0
- data/lib/rsssf/convert/html_entities.rb +150 -0
- data/lib/rsssf/convert/html_to_txt/beautify_anchors.rb +96 -0
- data/lib/rsssf/convert/html_to_txt/make_heading.rb +70 -0
- data/lib/rsssf/convert/html_to_txt/remove_emails.rb +43 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_href.rb +85 -0
- data/lib/rsssf/convert/html_to_txt/replace_a_name.rb +87 -0
- data/lib/rsssf/convert/html_to_txt/replace_heading.rb +76 -0
- data/lib/rsssf/convert/html_to_txt/replace_hr.rb +25 -0
- data/lib/rsssf/convert/html_to_txt.rb +247 -0
- data/lib/rsssf/download.rb +20 -0
- data/lib/rsssf/fmtfix/dates.rb +541 -0
- data/lib/rsssf/fmtfix/dates_helpers.rb +63 -0
- data/lib/rsssf/fmtfix/errata.rb +44 -0
- data/lib/rsssf/fmtfix/fmtfix-base.rb +68 -0
- data/lib/rsssf/fmtfix/fmtfix.rb +101 -0
- data/lib/rsssf/fmtfix/goals.rb +173 -0
- data/lib/rsssf/fmtfix/headers.rb +326 -0
- data/lib/rsssf/fmtfix/outline.rb +228 -0
- data/lib/rsssf/fmtfix/patch_headings.rb +141 -0
- data/lib/rsssf/fmtfix/rounds.rb +74 -0
- data/lib/rsssf/fmtfix/score.rb +92 -0
- data/lib/rsssf/fmtfix/tables.rb +316 -0
- data/lib/rsssf/fmtfix/topscorers.rb +50 -0
- data/lib/rsssf/page-find_schedule.rb +127 -0
- data/lib/rsssf/page-meta.rb +68 -0
- data/lib/rsssf/page.rb +125 -238
- data/lib/rsssf/parse_schedules.rb +34 -0
- data/lib/rsssf/prepare/convert-links.rb +77 -0
- data/lib/rsssf/prepare/convert-meta.rb +111 -0
- data/lib/rsssf/prepare/convert-navlines.rb +154 -0
- data/lib/rsssf/prepare/convert-postproc.rb +141 -0
- data/lib/rsssf/prepare/convert.rb +100 -0
- data/lib/rsssf/prepare/download.rb +40 -0
- data/lib/rsssf/project.rb +154 -0
- data/lib/rsssf/reports/page.rb +66 -23
- data/lib/rsssf/reports/schedule.rb +89 -40
- data/lib/rsssf/schedule.rb +4 -14
- data/lib/rsssf/utils.rb +37 -45
- data/lib/rsssf/version.rb +7 -6
- data/lib/rsssf.rb +82 -19
- metadata +68 -26
- data/.gemtest +0 -0
- data/lib/rsssf/fetch.rb +0 -80
- data/lib/rsssf/html2txt.rb +0 -157
- data/lib/rsssf/patch.rb +0 -28
- data/lib/rsssf/repo.rb +0 -220
- data/test/helper.rb +0 -12
- data/test/test_utils.rb +0 -83
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Prep ## todo: find a better name e.g. BatchPrep or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
=begin
|
|
7
|
+
|
|
8
|
+
todo - remove all "trailing" nav links in section
|
|
9
|
+
|
|
10
|
+
‹1974/75, see page oost75›.
|
|
11
|
+
|
|
12
|
+
‹1976/77, see page oost77›.
|
|
13
|
+
|
|
14
|
+
‹list of final tables, see page oosthist›.
|
|
15
|
+
|
|
16
|
+
‹list of champions, see page oostchamp›.
|
|
17
|
+
|
|
18
|
+
‹list of cup finals, see page oostcuphist›.
|
|
19
|
+
|
|
20
|
+
‹list of super cup finals, see page oostsupcuphist›.
|
|
21
|
+
|
|
22
|
+
‹list of foundation dates, see page oostfound›.
|
|
23
|
+
=end
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def strip_navlines( lines, heading: true )
|
|
29
|
+
## note - expects an array of lines (e.g. txt.lines!!!)
|
|
30
|
+
|
|
31
|
+
newlines = []
|
|
32
|
+
navlines = []
|
|
33
|
+
body = false ## hit/seen body?
|
|
34
|
+
lines.each_with_index do |line,lineno|
|
|
35
|
+
## check for optional leading heading line
|
|
36
|
+
## note - first line is heading
|
|
37
|
+
## (only optional for first section)
|
|
38
|
+
if heading && lineno == 0 && line.lstrip.start_with?( '==' )
|
|
39
|
+
newlines << line
|
|
40
|
+
next
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
## possibly remove leading nav link lines
|
|
44
|
+
if !body
|
|
45
|
+
if line.strip.empty?
|
|
46
|
+
newlines << line
|
|
47
|
+
next
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
## remove leading nav link lines only
|
|
51
|
+
newline = line.strip.gsub( /‹.+?›/, '' )
|
|
52
|
+
## check what's left over?
|
|
53
|
+
## if only space or pipe (|) or dot (.) than remove
|
|
54
|
+
if newline.match?( %r{\A
|
|
55
|
+
[ |.]*
|
|
56
|
+
\z}ix )
|
|
57
|
+
## puts " removing nav line #{line}"
|
|
58
|
+
navlines << line
|
|
59
|
+
## eat-up; record edit
|
|
60
|
+
else
|
|
61
|
+
body = true
|
|
62
|
+
newlines << line
|
|
63
|
+
end
|
|
64
|
+
else
|
|
65
|
+
newlines << line
|
|
66
|
+
end
|
|
67
|
+
end # each line
|
|
68
|
+
|
|
69
|
+
[newlines,navlines]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def proc_navlines_by_sections( txt )
|
|
76
|
+
|
|
77
|
+
edits = []
|
|
78
|
+
|
|
79
|
+
###
|
|
80
|
+
## remove remaing nav html elements
|
|
81
|
+
## <MENU></MENU>
|
|
82
|
+
## <UL></UL>
|
|
83
|
+
## <LI></LI>
|
|
84
|
+
|
|
85
|
+
tags = []
|
|
86
|
+
txt = txt.gsub( %r{ <MENU> | </MENU>
|
|
87
|
+
| <UL> | </UL>
|
|
88
|
+
| <LI> | </LI>
|
|
89
|
+
}ix ) do |match|
|
|
90
|
+
tags << match
|
|
91
|
+
''
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
if tags.size > 0
|
|
95
|
+
edit = String.new
|
|
96
|
+
edit += "-- removed #{tags.size} remaining nav html element(s):\n"
|
|
97
|
+
edit += tags.join( ' ')
|
|
98
|
+
|
|
99
|
+
puts edit
|
|
100
|
+
|
|
101
|
+
edits << edit ## record edit
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
sections = txt.split( %r{^
|
|
106
|
+
(?= [ ]* ={2,} [ ]*
|
|
107
|
+
[\p{L}0-9] ## one letter or digit required
|
|
108
|
+
)
|
|
109
|
+
}ix
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
newsections = []
|
|
114
|
+
sections.each_with_index do |sect,sectno|
|
|
115
|
+
newlines, navlines = strip_navlines( sect.lines, heading: true )
|
|
116
|
+
|
|
117
|
+
if navlines.size > 0
|
|
118
|
+
edit = String.new
|
|
119
|
+
edit += "-- removing #{navlines.size} leading nav line(s) in section #{sectno+1}:\n"
|
|
120
|
+
edit += navlines.join
|
|
121
|
+
puts edit
|
|
122
|
+
|
|
123
|
+
edits << edit
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
## special check for last section
|
|
128
|
+
if sectno+1 == sections.size
|
|
129
|
+
## reverse lines
|
|
130
|
+
## and remove trailing navlines until hitting body
|
|
131
|
+
## note - set heading to false
|
|
132
|
+
newlines, navlines = strip_navlines( newlines.reverse, heading: false )
|
|
133
|
+
newlines = newlines.reverse
|
|
134
|
+
navlines = navlines.reverse
|
|
135
|
+
|
|
136
|
+
if navlines.size > 0
|
|
137
|
+
edit = String.new
|
|
138
|
+
edit += "-- removing #{navlines.size} trailing nav line(s) in last section #{sectno+1}:\n"
|
|
139
|
+
edit += navlines.join
|
|
140
|
+
puts edit
|
|
141
|
+
|
|
142
|
+
edits << edit
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
newsections << newlines.join
|
|
147
|
+
end # each section
|
|
148
|
+
|
|
149
|
+
[newsections.join, edits]
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
end ## class Prep
|
|
154
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
module Rsssf
|
|
2
|
+
class Prep ## todo: find a better name e.g. BatchPrep or ??
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
###
|
|
6
|
+
## remove trailing about document meta backmatter
|
|
7
|
+
## == About this document ‹§about›
|
|
8
|
+
##
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
## note - start_with anchored w/ \A to start of string
|
|
12
|
+
|
|
13
|
+
START_WITH_ABOUT_RE = %r{ \A
|
|
14
|
+
[ \n]* ## trailing spaces or blank lines
|
|
15
|
+
={2,} [ ]* About [ ]+ this [ ]+ document
|
|
16
|
+
.*?
|
|
17
|
+
}ix
|
|
18
|
+
|
|
19
|
+
###
|
|
20
|
+
## remove "custom" sections by title
|
|
21
|
+
## e.g. === Index of groups
|
|
22
|
+
START_WITH_CUSTOM_RE = %r{ \A
|
|
23
|
+
[ \n]* ## trailing spaces or blank lines
|
|
24
|
+
={2,}
|
|
25
|
+
[ ]*
|
|
26
|
+
(?<title>
|
|
27
|
+
Index [ ] of [ ] groups
|
|
28
|
+
)
|
|
29
|
+
[ ]*
|
|
30
|
+
$
|
|
31
|
+
}ix
|
|
32
|
+
|
|
33
|
+
##
|
|
34
|
+
## todo - fix
|
|
35
|
+
##
|
|
36
|
+
## remove all menu, ul,li, tags etc.
|
|
37
|
+
## before nav check
|
|
38
|
+
## see https://rsssf.github.io/tables/2014q.html
|
|
39
|
+
## as an example!!!
|
|
40
|
+
|
|
41
|
+
START_WITH_NAV_RE = %r{ \A
|
|
42
|
+
[ \n]* ## trailing spaces or blank lines
|
|
43
|
+
‹.+?› ## link (exlude named anchor - why? why not? §)
|
|
44
|
+
}ix
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def postproc_page( txt, basename:, dirname: )
|
|
52
|
+
|
|
53
|
+
### record edits in its own txt file
|
|
54
|
+
edits = []
|
|
55
|
+
links = []
|
|
56
|
+
about = nil
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
###
|
|
60
|
+
## step 1
|
|
61
|
+
## split by horizontal rules (hrs)
|
|
62
|
+
## and remove navigations sections
|
|
63
|
+
## starting with links e.g.
|
|
64
|
+
## ‹Bundesliga, see §bund›
|
|
65
|
+
|
|
66
|
+
sects = txt.split( /^=-=-=-=-=-=-=-=-=-=-=-=-=-=-=$/ )
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
sects = sects.select do |sect|
|
|
72
|
+
if START_WITH_NAV_RE.match?( sect )
|
|
73
|
+
links += collect_links( sect, basename: basename,
|
|
74
|
+
dirname: dirname )
|
|
75
|
+
|
|
76
|
+
edit = String.new
|
|
77
|
+
edit += "-- removing nav(igation) section:"
|
|
78
|
+
edit += sect
|
|
79
|
+
|
|
80
|
+
puts edit
|
|
81
|
+
|
|
82
|
+
edits << edit ## record edit
|
|
83
|
+
|
|
84
|
+
false ## remove section
|
|
85
|
+
elsif m=START_WITH_CUSTOM_RE.match( sect )
|
|
86
|
+
links += collect_links( sect, basename: basename,
|
|
87
|
+
dirname: dirname )
|
|
88
|
+
|
|
89
|
+
edit = String.new
|
|
90
|
+
edit += "-- removing custom section with title >#{m[:title]}<:"
|
|
91
|
+
edit += sect
|
|
92
|
+
|
|
93
|
+
puts edit
|
|
94
|
+
|
|
95
|
+
edits << edit ## record edit
|
|
96
|
+
|
|
97
|
+
false ## remove section
|
|
98
|
+
|
|
99
|
+
elsif START_WITH_ABOUT_RE.match?( sect )
|
|
100
|
+
## note - do NOT collect links in about section!!!
|
|
101
|
+
|
|
102
|
+
about = sect
|
|
103
|
+
false ## remove (about) section
|
|
104
|
+
else
|
|
105
|
+
links += collect_links( sect, basename: basename,
|
|
106
|
+
dirname: dirname )
|
|
107
|
+
true ## keep section
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
## sects.each_with_index do |sect,i|
|
|
112
|
+
## puts "==> #{i+1}/#{sects.size}"
|
|
113
|
+
## pp sect
|
|
114
|
+
## end
|
|
115
|
+
## puts " #{sects.size} sect(s)"
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
## note - replace hr with blank line
|
|
119
|
+
txt = sects.join( "\n\n" )
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
###
|
|
123
|
+
## remove pre comments
|
|
124
|
+
txt = txt.gsub( "<!-- start pre -->\n", '' )
|
|
125
|
+
txt = txt.gsub( "<!-- end pre -->\n", '' )
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
## try to remove leading and trailing nav(igation) lines
|
|
130
|
+
txt, more_edits = proc_navlines_by_sections( txt )
|
|
131
|
+
edits += more_edits
|
|
132
|
+
|
|
133
|
+
## note - return (new) txt AND recorded edits (& erratas)
|
|
134
|
+
## return edits as array or joined (single) string - why? why not?
|
|
135
|
+
## note - return empty array if no edits!!
|
|
136
|
+
[txt, edits, links, about]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
end ## class Prep
|
|
141
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
|
|
2
|
+
module Rsssf
|
|
3
|
+
class Prep ## todo: find a better name e.g. BatchPrep or ??
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## convenience helper
|
|
7
|
+
def self.convert_pages( pages, outdir: )
|
|
8
|
+
@@prep ||= new ## use a "shared" built-in prep
|
|
9
|
+
@@prep.convert_pages( pages, outdir: outdir )
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def convert_pages( pages, outdir: )
|
|
14
|
+
pages.each_with_index do |config,i|
|
|
15
|
+
puts
|
|
16
|
+
puts "==> [#{i+1}/#{pages.size}] converting #{config.pretty_inspect}..."
|
|
17
|
+
|
|
18
|
+
page = config['page']
|
|
19
|
+
url = "https://rsssf.org/#{page}"
|
|
20
|
+
|
|
21
|
+
html = Webcache.read( url )
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
edits = []
|
|
25
|
+
|
|
26
|
+
txt, more_edits = PageConverter.convert( html, url: url )
|
|
27
|
+
edits += more_edits
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
basename = File.basename( page, File.extname( page ))
|
|
31
|
+
dirname = File.dirname( page )
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
##
|
|
35
|
+
## post-process .txt page
|
|
36
|
+
|
|
37
|
+
txt, more_edits, links, about = postproc_page( txt, basename: basename,
|
|
38
|
+
dirname: dirname )
|
|
39
|
+
edits += more_edits
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
title = find_title( html ) || 'n/a'
|
|
44
|
+
|
|
45
|
+
authors, updated = about ? find_author_n_date( about ) : [nil,nil]
|
|
46
|
+
|
|
47
|
+
header_props = <<EOS
|
|
48
|
+
title: #{title}
|
|
49
|
+
source: #{url}
|
|
50
|
+
EOS
|
|
51
|
+
|
|
52
|
+
if authors && updated
|
|
53
|
+
## assume plural if and or command (,)
|
|
54
|
+
header_props += if /\band\b|,/i.match( authors )
|
|
55
|
+
" authors: #{authors}\n"
|
|
56
|
+
else
|
|
57
|
+
" author: #{authors}\n"
|
|
58
|
+
end
|
|
59
|
+
header_props += " updated: #{updated}"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
header = <<EOS
|
|
64
|
+
<!--
|
|
65
|
+
#{header_props}
|
|
66
|
+
-->
|
|
67
|
+
EOS
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
## note - (auto-) add (comment) header to written out txt!!!
|
|
71
|
+
write_text( "#{outdir}/#{dirname}/#{basename}.txt", header+txt )
|
|
72
|
+
|
|
73
|
+
## todo/check - delete edits file if no edits - why? why not?
|
|
74
|
+
if edits.size > 0
|
|
75
|
+
write_text( "#{outdir}/#{dirname}/#{basename}.edits.txt", edits.join("\n") )
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
## todo/check - delete links file if no links - why? why not?
|
|
79
|
+
if links.size > 0
|
|
80
|
+
buf = links.map do |link|
|
|
81
|
+
title = link[0]
|
|
82
|
+
pageref = link[1]
|
|
83
|
+
"#{'%-30s' % pageref} : #{title}"
|
|
84
|
+
end.join( "\n")
|
|
85
|
+
|
|
86
|
+
write_text( "#{outdir}/#{dirname}/#{basename}.links.txt", buf )
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
## todo/check - delete about file if no about - why? why not?
|
|
90
|
+
if about
|
|
91
|
+
write_text( "#{outdir}/#{dirname}/#{basename}.about.txt", about )
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
end ## class Prep
|
|
100
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
|
|
2
|
+
module Rsssf
|
|
3
|
+
class Prep ## todo: find a better name e.g. BatchPrep or ??
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## convenience helper
|
|
7
|
+
def self.download_pages( pages, force: )
|
|
8
|
+
@@prep ||= new ## use a "shared" built-in prep
|
|
9
|
+
@@prep.download_pages( pages, force: force )
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def download_pages( pages, force: )
|
|
14
|
+
pages.each_with_index do |config,i|
|
|
15
|
+
|
|
16
|
+
## todo / double check fix read_csv upstream
|
|
17
|
+
## if empty column has comment it is "" empty string otherwise
|
|
18
|
+
## it is nil!!! ??
|
|
19
|
+
|
|
20
|
+
encoding = config['encoding']
|
|
21
|
+
encoding = 'windows-1252' if encoding.nil? || encoding.empty?
|
|
22
|
+
|
|
23
|
+
page = config['page']
|
|
24
|
+
url = "https://rsssf.org/#{page}"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## check if not in cache
|
|
28
|
+
## note - use force == true to always (force) download
|
|
29
|
+
if Webcache.cached?( url ) && force == false
|
|
30
|
+
puts " CACHE HIT - #{url}"
|
|
31
|
+
else
|
|
32
|
+
puts "==> [#{i+1}/#{pages.size}] download #{config.pretty_inspect}..."
|
|
33
|
+
html = Rsssf.download_page( url, encoding: encoding )
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
end ## class Prep
|
|
40
|
+
end ## module Rsssf
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
|
|
2
|
+
module Rsssf
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class Project
|
|
6
|
+
include Utils ## e.g. year_from_file, etc.
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
attr_reader :title,
|
|
11
|
+
:root_dir
|
|
12
|
+
|
|
13
|
+
def initialize( dir,
|
|
14
|
+
title: 'Your Title Here',
|
|
15
|
+
slug: nil )
|
|
16
|
+
@root_dir = dir
|
|
17
|
+
@title = title
|
|
18
|
+
@slug = slug ## note - might be a proc e.g. ->(season) {}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def pages_dir() "#{root_dir}/pages"; end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _find_pages
|
|
27
|
+
glob = "#{pages_dir}/**/*.txt"
|
|
28
|
+
print " glob >#{glob}<..."
|
|
29
|
+
|
|
30
|
+
files = Dir.glob( glob )
|
|
31
|
+
puts " #{files.size} page(s) .txt found"
|
|
32
|
+
|
|
33
|
+
## pp files
|
|
34
|
+
files
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def make_pages_summary
|
|
40
|
+
|
|
41
|
+
files = _find_pages()
|
|
42
|
+
|
|
43
|
+
report = PageReport.build( files, title: @title ) ## pass in title etc.
|
|
44
|
+
|
|
45
|
+
### save report as README.md in pages/ dir in project root_dir
|
|
46
|
+
report.save( "#{pages_dir}/README.md" )
|
|
47
|
+
end # method make_pages_summary
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def make_schedules_summary
|
|
52
|
+
## find all match datafiles
|
|
53
|
+
## note - looks for season pattern for now
|
|
54
|
+
## YYYY-YY or YYYY
|
|
55
|
+
glob = "#{root_dir}/**/{[12][0-9][0-9][0-9]-[0-9][0-9],[12][0-9][0-9][0-9]}/*.txt"
|
|
56
|
+
print " glob >#{glob}<..."
|
|
57
|
+
files = Dir.glob( glob )
|
|
58
|
+
puts " #{files.size} datatfile(s) .txt found"
|
|
59
|
+
pp files
|
|
60
|
+
|
|
61
|
+
report = ScheduleReport.build( files, title: @title ) ## pass in title etc.
|
|
62
|
+
report.save( "#{root_dir}/README.md" )
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def make_schedules( txt, archive: false )
|
|
67
|
+
configs = parse_schedules( txt )
|
|
68
|
+
## pp configs
|
|
69
|
+
|
|
70
|
+
configs.each do |config|
|
|
71
|
+
header = config['header']
|
|
72
|
+
seasons = config['seasons']
|
|
73
|
+
basename = config['basename'] || config['slug']
|
|
74
|
+
title_tmpl = config['title']
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
## note: header allows hierarchy e.g. (see england and others)
|
|
78
|
+
## Cup Tournaments › FA Cup or
|
|
79
|
+
## Cup Tournaments > FA Cup
|
|
80
|
+
header_hiera = header.split( /[ ]* [›>] [ ]*/x )
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
puts "==> #{header_hiera.join(' › ')} - #{seasons.size} season(s)..."
|
|
84
|
+
|
|
85
|
+
i=0
|
|
86
|
+
each_page( seasons ) do |season, page|
|
|
87
|
+
title = title_tmpl.sub( '{season}', season.to_s )
|
|
88
|
+
puts " [#{i+1}/#{seasons.size}] #{season} => #{basename}, #{title}..."
|
|
89
|
+
|
|
90
|
+
sched = page.find_schedule!( header: header_hiera )
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
outpath = if archive
|
|
94
|
+
## use archive/1990s and such if season <= 2009/10
|
|
95
|
+
"#{root_dir}/#{archive_dir_for_season(season)}/#{basename}.txt"
|
|
96
|
+
else
|
|
97
|
+
"#{root_dir}/#{season.to_path}/#{basename}.txt"
|
|
98
|
+
end
|
|
99
|
+
sched.save( outpath, header: "= #{title}\n\n" )
|
|
100
|
+
i+=1
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def each_page( seasons, &blk )
|
|
110
|
+
seasons.each do |season|
|
|
111
|
+
season = Season( season )
|
|
112
|
+
basename = _mk_basename( season )
|
|
113
|
+
|
|
114
|
+
path = "#{pages_dir}/#{basename}.txt"
|
|
115
|
+
page = Page.read_txt( path )
|
|
116
|
+
|
|
117
|
+
blk.call( season, page )
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _mk_basename( season )
|
|
124
|
+
slug = @slug.is_a?(Proc) ? @slug.call( season ) : @slug
|
|
125
|
+
|
|
126
|
+
## e.g. braz01, braz09 or braz2010
|
|
127
|
+
basename = "#{slug}#{_mk_year(season)}"
|
|
128
|
+
basename
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def _mk_year( season )
|
|
132
|
+
##
|
|
133
|
+
## note - 00, 01, 02, 03, 04, 05, 06, 07, 08, 09 => 2000, 2001, .. 2009
|
|
134
|
+
## 10, 11, 12, .. 99 => 1910 !!, 1911, 1912, .. 1999
|
|
135
|
+
##
|
|
136
|
+
## 2010, 2011, 2012, ...
|
|
137
|
+
##
|
|
138
|
+
## fix - check for 18xx ??? requires full year!!!
|
|
139
|
+
## only 1910 to 2009 (10..09)
|
|
140
|
+
|
|
141
|
+
slug = if season.end_year >= 1910 &&
|
|
142
|
+
season.end_year < 2010
|
|
143
|
+
## cut off all digits (only keep last two)s
|
|
144
|
+
## convert end_year to string with leading zero
|
|
145
|
+
## e.g. 00 / 01 / 99 / 98 / 11 / etc.
|
|
146
|
+
'%02d' % (season.end_year % 100)
|
|
147
|
+
else
|
|
148
|
+
'%4d' % season.end_year
|
|
149
|
+
end
|
|
150
|
+
slug
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
end ## class Project
|
|
154
|
+
end ## module Rsssf
|
data/lib/rsssf/reports/page.rb
CHANGED
|
@@ -1,25 +1,31 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
1
|
|
|
3
2
|
|
|
4
3
|
module Rsssf
|
|
5
4
|
|
|
6
5
|
class PageReport
|
|
7
6
|
|
|
7
|
+
|
|
8
|
+
def self.build( files, title: )
|
|
9
|
+
stats = []
|
|
10
|
+
files.each do |file|
|
|
11
|
+
page = Page.read_txt( file )
|
|
12
|
+
stats << page.build_stat
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
new( stats, title: title )
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
|
|
8
19
|
attr_reader :title
|
|
9
20
|
|
|
10
|
-
def initialize( stats,
|
|
21
|
+
def initialize( stats, title: )
|
|
11
22
|
@stats = stats
|
|
12
|
-
@
|
|
13
|
-
|
|
14
|
-
@title = opts[:title] || 'Your Title Here'
|
|
23
|
+
@title = title
|
|
15
24
|
end
|
|
16
25
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
f.write build_summary
|
|
21
|
-
end
|
|
22
|
-
end
|
|
26
|
+
### save report as README.md in repo
|
|
27
|
+
def save( path ) write_text( path, build_summary ); end
|
|
28
|
+
|
|
23
29
|
|
|
24
30
|
def build_summary
|
|
25
31
|
|
|
@@ -33,32 +39,69 @@ def build_summary
|
|
|
33
39
|
|
|
34
40
|
football.db RSSSF Archive Data Summary for #{title}
|
|
35
41
|
|
|
36
|
-
_Last Update: #{Time.now}_
|
|
37
|
-
|
|
38
42
|
EOS
|
|
39
43
|
|
|
44
|
+
## no longer add last update
|
|
45
|
+
## _Last Update: #{Time.now}_
|
|
46
|
+
|
|
47
|
+
|
|
40
48
|
txt = ''
|
|
41
49
|
txt << header
|
|
42
50
|
|
|
43
|
-
txt << "|
|
|
44
|
-
txt << "| :----- |
|
|
51
|
+
txt << "| File | Sections | Last Updated | Lines (Chars) |\n"
|
|
52
|
+
txt << "| :----- | :------- | :----------- | ------------: |\n"
|
|
53
|
+
|
|
54
|
+
## note - removed season (no longer tracked here)
|
|
45
55
|
|
|
46
56
|
stats.each do |stat|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
57
|
+
## get basename from source url
|
|
58
|
+
url_path = URI.parse( stat.source ).path
|
|
59
|
+
basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
|
|
60
|
+
|
|
61
|
+
txt << "| [#{basename}.txt](#{basename}.txt) "
|
|
62
|
+
txt << "| **#{stat.title}** "
|
|
63
|
+
if stat.sections.size > 0
|
|
64
|
+
txt << "<br> "
|
|
65
|
+
txt << stat.sections.join( " <br> " )
|
|
66
|
+
end
|
|
67
|
+
txt << %Q{| <span title="by #{stat.authors}">#{stat.last_updated}</span> }
|
|
51
68
|
txt << "| #{stat.line_count} (#{stat.char_count}) "
|
|
52
|
-
txt << "| #{stat.sections.join(', ')} "
|
|
53
69
|
txt << "|\n"
|
|
54
70
|
end
|
|
55
71
|
|
|
56
|
-
txt << "\n\n"
|
|
72
|
+
txt << "\n\n"
|
|
57
73
|
txt
|
|
58
74
|
end # method build_summary
|
|
59
75
|
|
|
60
76
|
end ## class PageReport
|
|
61
77
|
end ## module Rsssf
|
|
62
78
|
|
|
63
|
-
|
|
64
|
-
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
__END__
|
|
82
|
+
|
|
83
|
+
old version:
|
|
84
|
+
|
|
85
|
+
txt << header
|
|
86
|
+
|
|
87
|
+
txt << "| File | Authors | Last Updated | Lines (Chars) | Sections |\n"
|
|
88
|
+
txt << "| :----- | :------- | :----------- | ------------: | :------- |\n"
|
|
89
|
+
|
|
90
|
+
## note - removed season (no longer tracked here)
|
|
91
|
+
|
|
92
|
+
stats.each do |stat|
|
|
93
|
+
## get basename from source url
|
|
94
|
+
url_path = URI.parse( stat.source ).path
|
|
95
|
+
basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
|
|
96
|
+
|
|
97
|
+
txt << "| [#{basename}.txt](#{basename}.txt) "
|
|
98
|
+
txt << "| #{stat.authors} "
|
|
99
|
+
txt << "| #{stat.last_updated} "
|
|
100
|
+
txt << "| #{stat.line_count} (#{stat.char_count}) "
|
|
101
|
+
txt << "| **#{stat.title}** "
|
|
102
|
+
if stat.sections.size > 0
|
|
103
|
+
txt << "<br> "
|
|
104
|
+
txt << stat.sections.join( " <br> " )
|
|
105
|
+
end
|
|
106
|
+
txt << "|\n"
|
|
107
|
+
end
|