rsssf 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gemtest +0 -0
- data/Manifest.txt +11 -0
- data/README.md +171 -2
- data/lib/rsssf.rb +13 -0
- data/lib/rsssf/fetch.rb +80 -0
- data/lib/rsssf/html2txt.rb +157 -0
- data/lib/rsssf/page.rb +295 -0
- data/lib/rsssf/patch.rb +28 -0
- data/lib/rsssf/repo.rb +220 -0
- data/lib/rsssf/reports/page.rb +64 -0
- data/lib/rsssf/reports/schedule.rb +77 -0
- data/lib/rsssf/schedule.rb +31 -0
- data/lib/rsssf/utils.rb +75 -0
- data/lib/rsssf/version.rb +2 -2
- data/test/helper.rb +12 -0
- data/test/test_utils.rb +83 -0
- metadata +13 -1
data/lib/rsssf/page.rb
ADDED
@@ -0,0 +1,295 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Rsssf
|
5
|
+
|
6
|
+
PageStat = Struct.new(
|
7
|
+
:source, ## e.g. http://rsssf.org/tabled/duit89.html
|
8
|
+
:basename, ## e.g. duit89 -- note: filename w/o extension (and path)
|
9
|
+
:year, ## e.g. 1989 -- note: always four digits
|
10
|
+
:season, ## e.g. 1990-91 -- note: always a string (NOT a number)
|
11
|
+
:authors,
|
12
|
+
:last_updated,
|
13
|
+
:line_count, ## todo: rename to (just) lines - why? why not?
|
14
|
+
:char_count, ## todo: rename to (just) char(ectar)s - why? why not?
|
15
|
+
:sections)
|
16
|
+
|
17
|
+
|
18
|
+
###
|
19
|
+
## note:
|
20
|
+
# a rsssf page may contain:
|
21
|
+
# many leagues, cups
|
22
|
+
# - tables, schedules (rounds), notes, etc.
|
23
|
+
#
|
24
|
+
# a rsssf page MUST be in plain text (.txt) and utf-8 character encoding assumed
|
25
|
+
#
|
26
|
+
|
27
|
+
class Page
|
28
|
+
|
29
|
+
include Utils ## e.g. year_from_name, etc.
|
30
|
+
|
31
|
+
def self.from_url( src )
|
32
|
+
txt = PageFetcher.new.fetch( src )
|
33
|
+
self.from_string( txt )
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def self.from_file( path )
|
38
|
+
txt = File.read_utf8( path ) # note: always assume sources (already) converted to utf-8
|
39
|
+
self.from_string( txt )
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.from_string( txt )
|
43
|
+
self.new( txt )
|
44
|
+
end
|
45
|
+
|
46
|
+
def initialize( txt )
|
47
|
+
@txt = txt
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
LEAGUE_ROUND_REGEX = /\b
|
52
|
+
Round
|
53
|
+
\b/ix
|
54
|
+
|
55
|
+
CUP_ROUND_REGEX = /\b(
|
56
|
+
Round |
|
57
|
+
1\/8\sFinals |
|
58
|
+
1\/16\sFinals |
|
59
|
+
Quarterfinals |
|
60
|
+
Semifinals |
|
61
|
+
Final
|
62
|
+
)\b/ix
|
63
|
+
|
64
|
+
def find_schedule( opts={} ) ## change to build_schedule - why? why not???
|
65
|
+
|
66
|
+
## find match schedule/fixtures in multi-league doc
|
67
|
+
new_txt = ''
|
68
|
+
|
69
|
+
## note: keep track of statistics
|
70
|
+
## e.g. number of rounds found
|
71
|
+
|
72
|
+
round_count = 0
|
73
|
+
|
74
|
+
header = opts[:header]
|
75
|
+
if header
|
76
|
+
league_header_found = false
|
77
|
+
|
78
|
+
## header:
|
79
|
+
## - assumes heading 4 e.g. #### Premier League or
|
80
|
+
## - bold e.g. **FA Cup** for now
|
81
|
+
## note: markers must start line (^)
|
82
|
+
|
83
|
+
## note:
|
84
|
+
## header gsub spaces to \s otherwise no match in regex (using free-form x-flag)!!!
|
85
|
+
header_esc = header.gsub( ' ', '\s' )
|
86
|
+
|
87
|
+
## note: somehow #{2,4} will not work with free-form /xi defined (picked up as comment?)
|
88
|
+
## use [#] hack ??
|
89
|
+
header_regex = /^
|
90
|
+
([#]{2,4}\s+(#{header_esc}))
|
91
|
+
|
|
92
|
+
(\*{2}(#{header_esc})\*{2})
|
93
|
+
/ix
|
94
|
+
|
95
|
+
## todo:
|
96
|
+
## use new stage_regex e.g. **xxx** - why? why not?
|
97
|
+
## allow more than one stage in one schedule (e.g. regular stage,playoff stage etc)
|
98
|
+
|
99
|
+
else
|
100
|
+
league_header_found = true # default (no header; assume single league file)
|
101
|
+
header_regex = /^---dummy---$/ ## non-matching dummy regex
|
102
|
+
end
|
103
|
+
|
104
|
+
## puts "header_regex:"
|
105
|
+
## pp header_regex
|
106
|
+
|
107
|
+
|
108
|
+
if opts[:cup]
|
109
|
+
round_regex = CUP_ROUND_REGEX ## note: only allow final, quaterfinals, etc. if knockout cup
|
110
|
+
else
|
111
|
+
round_regex = LEAGUE_ROUND_REGEX
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
## stages
|
116
|
+
first_round_header_found = false
|
117
|
+
round_header_found = false
|
118
|
+
round_body_found = false ## allow round header followed by blank lines
|
119
|
+
|
120
|
+
blank_found = false
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
@txt.each_line do |line|
|
125
|
+
|
126
|
+
if league_header_found == false
|
127
|
+
## first find start of league header/section
|
128
|
+
if line =~ header_regex
|
129
|
+
puts "!!! bingo - found header >#{line}<"
|
130
|
+
league_header_found = true
|
131
|
+
title = line.gsub( /[#*]/, '' ).strip ## quick hack: extract title from header
|
132
|
+
new_txt << "## #{title}\n\n" # note: use header/stage title (regex group capture)
|
133
|
+
else
|
134
|
+
puts " searching for header >#{header}<; skipping line >#{line}<"
|
135
|
+
next
|
136
|
+
end
|
137
|
+
elsif first_round_header_found == false
|
138
|
+
## next look for first round (starting w/ Round)
|
139
|
+
if line =~ round_regex
|
140
|
+
puts "!!! bingo - found first round >#{line}<"
|
141
|
+
round_count += 1
|
142
|
+
first_round_header_found = true
|
143
|
+
round_header_found = true
|
144
|
+
round_body_found = false
|
145
|
+
new_txt << line
|
146
|
+
elsif line =~ /^=-=-=-=/
|
147
|
+
puts "*** no rounds found; hit section marker (horizontal rule)"
|
148
|
+
break
|
149
|
+
elsif line =~ /^\*{2}[^*]+\*{2}/ ## e.g. **FA Cup**
|
150
|
+
puts "*** no rounds found; hit section/stage header: #{line}"
|
151
|
+
break
|
152
|
+
else
|
153
|
+
puts " searching for first round; skipping line >#{line}<"
|
154
|
+
next ## continue; searching
|
155
|
+
end
|
156
|
+
elsif round_header_found == true
|
157
|
+
## collect rounds;
|
158
|
+
## assume text block until next blank line
|
159
|
+
## new block must allways start w/ round
|
160
|
+
if line =~ /^\s*$/ ## blank line?
|
161
|
+
if round_body_found
|
162
|
+
round_header_found = false
|
163
|
+
blank_found = true ## keep track of blank (lines) - allow inside round block (can continue w/ date header/marker)
|
164
|
+
new_txt << line
|
165
|
+
else
|
166
|
+
## note: skip blanks following header
|
167
|
+
next
|
168
|
+
end
|
169
|
+
else
|
170
|
+
round_body_found = true
|
171
|
+
new_txt << line ## keep going until next blank line
|
172
|
+
end
|
173
|
+
else
|
174
|
+
## skip (more) blank lines
|
175
|
+
if line =~ /^\s*$/
|
176
|
+
next ## continue; skip extra blank line
|
177
|
+
elsif line =~ round_regex
|
178
|
+
puts "!!! bingo - found new round >#{line}<"
|
179
|
+
round_count += 1
|
180
|
+
round_header_found = true # more rounds; continue
|
181
|
+
round_body_found = false
|
182
|
+
blank_found = false # reset blank tracker
|
183
|
+
new_txt << line
|
184
|
+
elsif blank_found && line =~ /\[[a-z]{3} \d{1,2}\]/i ## e.g. [Mar 13] or [May 5] with leading blank line; continue round
|
185
|
+
puts "!!! bingo - continue round >#{line}<"
|
186
|
+
round_header_found = true
|
187
|
+
blank_found = false # reset blank tracker
|
188
|
+
new_txt << line
|
189
|
+
elsif blank_found && line =~ /First Legs|Second Legs/i
|
190
|
+
puts "!!! bingo - continue round >#{line}<"
|
191
|
+
round_header_found = true
|
192
|
+
blank_found = false # reset blank tracker
|
193
|
+
new_txt << line
|
194
|
+
elsif line =~ /=-=-=-=/
|
195
|
+
puts "!!! stop schedule; hit section marker (horizontal rule)"
|
196
|
+
break;
|
197
|
+
elsif line =~ /^\*{2}[^*]+\*{2}/ ## e.g. **FA Cup**
|
198
|
+
puts "!!! stop schedule; hit section/stage header: #{line}"
|
199
|
+
break
|
200
|
+
else
|
201
|
+
blank_found = false
|
202
|
+
puts "skipping line in schedule >#{line}<"
|
203
|
+
next # continue
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end # each line
|
207
|
+
|
208
|
+
schedule = Schedule.from_string( new_txt )
|
209
|
+
schedule.rounds = round_count
|
210
|
+
|
211
|
+
schedule
|
212
|
+
end # method find_schedule
|
213
|
+
|
214
|
+
|
215
|
+
def build_stat
|
216
|
+
source = nil
|
217
|
+
authors = nil
|
218
|
+
last_updated = nil
|
219
|
+
|
220
|
+
### find source ref
|
221
|
+
if @txt =~ /source: ([^ \n]+)/im
|
222
|
+
source = $1.to_s
|
223
|
+
puts "source: >#{source}<"
|
224
|
+
end
|
225
|
+
|
226
|
+
##
|
227
|
+
## fix/todo: move authors n last updated whitespace cleanup to sanitize - why? why not??
|
228
|
+
|
229
|
+
if @txt =~ /authors?:\s+(.+?)\s+last updated:\s+(\d{1,2} [a-z]{3,10} \d{4})/im
|
230
|
+
last_updated = $2.to_s # note: save a copy first (gets "reset" by next regex)
|
231
|
+
authors = $1.to_s.strip.gsub(/\s+/, ' ' ) # cleanup whitespace; squish-style
|
232
|
+
authors = authors.gsub( /[ ]*,[ ]*/, ', ' ) # prettify commas - always single space after comma (no space before)
|
233
|
+
puts "authors: >#{authors}<"
|
234
|
+
puts "last updated: >#{last_updated}<"
|
235
|
+
end
|
236
|
+
|
237
|
+
puts "*** !!! missing source" if source.nil?
|
238
|
+
puts "*** !!! missing authors n last updated" if authors.nil? || last_updated.nil?
|
239
|
+
|
240
|
+
sections = []
|
241
|
+
|
242
|
+
## count lines
|
243
|
+
line_count = 0
|
244
|
+
@txt.each_line do |line|
|
245
|
+
line_count +=1
|
246
|
+
|
247
|
+
### find sections
|
248
|
+
## todo: add more patterns? how? why?
|
249
|
+
if line =~ /####\s+(.+)/
|
250
|
+
puts " found section >#{$1}<"
|
251
|
+
sections << $1.strip
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
# get path from url
|
257
|
+
url = URI.parse( source )
|
258
|
+
## pp url
|
259
|
+
## puts url.host
|
260
|
+
path = url.path
|
261
|
+
extname = File.extname( path )
|
262
|
+
basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
|
263
|
+
year = year_from_name( basename )
|
264
|
+
season = year_to_season( year )
|
265
|
+
|
266
|
+
rec = PageStat.new
|
267
|
+
rec.source = source # e.g. http://rsssf.org/tabled/duit89.html -- use source_url - why?? why not??
|
268
|
+
rec.basename = basename # e.g. duit89
|
269
|
+
rec.year = year # e.g. 89 => 1989 -- note: always four digits
|
270
|
+
rec.season = season
|
271
|
+
rec.authors = authors
|
272
|
+
rec.last_updated = last_updated
|
273
|
+
rec.line_count = line_count
|
274
|
+
rec.char_count = @txt.size ## fix: use "true" char count not byte count
|
275
|
+
rec.sections = sections
|
276
|
+
|
277
|
+
rec
|
278
|
+
end ## method build_stat
|
279
|
+
|
280
|
+
|
281
|
+
def save( path )
|
282
|
+
File.open( path, 'w' ) do |f|
|
283
|
+
f.write @txt
|
284
|
+
end
|
285
|
+
end ## method save
|
286
|
+
|
287
|
+
end ## class Page
|
288
|
+
end ## module Rsssf
|
289
|
+
|
290
|
+
|
291
|
+
## add (shortcut) alias
|
292
|
+
RsssfPageStat = Rsssf::PageStat
|
293
|
+
RsssfPage = Rsssf::Page
|
294
|
+
|
295
|
+
|
data/lib/rsssf/patch.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Rsssf
|
4
|
+
|
5
|
+
class Patcher
|
6
|
+
|
7
|
+
## e.g. 2008/09
|
8
|
+
## note: also support 1999/2000
|
9
|
+
SEASON = '\d{4}\/(\d{2}|\d{4})' ## note: use single quotes - quotes do NOT get escaped (e.g. '\d' => "\\d")
|
10
|
+
|
11
|
+
def patch_heading( txt, rxs, title )
|
12
|
+
rxs.each do |rx|
|
13
|
+
txt = txt.sub( rx ) do |match|
|
14
|
+
match = match.gsub( "\n", '$$') ## change newlines to $$ for single-line outputs/dumps
|
15
|
+
puts " found heading >#{match}<"
|
16
|
+
"\n\n#### #{title}\n\n"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
txt
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
end # class Patcher
|
24
|
+
end ## module Rsssf
|
25
|
+
|
26
|
+
## add (shortcut) alias
|
27
|
+
RsssfPatcher = Rsssf::Patcher
|
28
|
+
|
data/lib/rsssf/repo.rb
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Rsssf
|
4
|
+
|
5
|
+
## used by Repo#make_schedules
|
6
|
+
ScheduleConfig = Struct.new(
|
7
|
+
:name,
|
8
|
+
:opts_for_year, ## hash or proc ->(year){ Hash[...] }
|
9
|
+
:dir_for_year, ## proc ->(year){ 'path_here'} ## rename to path_for_year - why, why not??
|
10
|
+
:includes ## array of years to include e.g. [2011,2012] etc.
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
ScheduleStat = Struct.new(
|
15
|
+
:path, ## e.g. 2012-13 or archive/1980s/1984-85
|
16
|
+
:filename, ## e.g. 1-bundesliga.txt -- note: w/o path
|
17
|
+
:year, ## e.g. 2013 -- note: numeric (integer)
|
18
|
+
:season, ## e.g. 2012-13 -- note: is a string
|
19
|
+
:rounds ## e.g. 36 -- note: numeric (integer)
|
20
|
+
)
|
21
|
+
|
22
|
+
|
23
|
+
class Repo
|
24
|
+
|
25
|
+
include Filters ## e.g. sanitize, etc.
|
26
|
+
include Utils ## e.g. year_from_file, etc.
|
27
|
+
|
28
|
+
|
29
|
+
def initialize( path, opts ) ## pass in title etc.
|
30
|
+
@repo_path = path
|
31
|
+
@opts = opts
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def fetch_pages
|
36
|
+
puts "fetch_pages:"
|
37
|
+
cfg = YAML.load_file( "#{@repo_path}/tables/config.yml")
|
38
|
+
pp cfg
|
39
|
+
|
40
|
+
dl_base = 'http://rsssf.com'
|
41
|
+
|
42
|
+
cfg.each do |k,v|
|
43
|
+
## season = k # as string e.g. 2011-12 or 2011 etc.
|
44
|
+
path = v # as string e.g. tablesd/duit2011.html
|
45
|
+
|
46
|
+
## note: assumes extension is .html
|
47
|
+
# e.g. tablesd/duit2011.html => duit2011
|
48
|
+
basename = File.basename( path, '.html' )
|
49
|
+
|
50
|
+
src_url = "#{dl_base}/#{path}"
|
51
|
+
dest_path = "#{@repo_path}/tables/#{basename}.txt"
|
52
|
+
|
53
|
+
page = Page.from_url( src_url )
|
54
|
+
page.save( dest_path )
|
55
|
+
end # each year
|
56
|
+
end # method fetch_pages
|
57
|
+
|
58
|
+
|
59
|
+
def make_pages_summary
|
60
|
+
stats = []
|
61
|
+
|
62
|
+
files = Dir[ "#{@repo_path}/tables/*.txt" ]
|
63
|
+
files.each do |file|
|
64
|
+
page = Page.from_file( file )
|
65
|
+
stats << page.build_stat
|
66
|
+
end
|
67
|
+
|
68
|
+
### save report as README.md in tables/ folder in repo
|
69
|
+
report = PageReport.new( stats, @opts ) ## pass in title etc.
|
70
|
+
report.save( "#{@repo_path}/tables/README.md" )
|
71
|
+
end # method make_pages_summary
|
72
|
+
|
73
|
+
|
74
|
+
def make_schedules_summary( stats ) ## note: requires stats to be passed in for now
|
75
|
+
report = ScheduleReport.new( stats, @opts ) ## pass in title etc.
|
76
|
+
report.save( "#{@repo_path}/README.md" )
|
77
|
+
end # method make_schedules_summary
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
def patch_pages( patcher )
|
82
|
+
## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.)
|
83
|
+
patch_dir( "#{@repo_path}/tables" ) do |txt, name, year|
|
84
|
+
puts "patching #{year} (#{name}) (#{@repo_path})..."
|
85
|
+
patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt)
|
86
|
+
end
|
87
|
+
end ## method patch_pages
|
88
|
+
|
89
|
+
|
90
|
+
def sanitize_pages
|
91
|
+
## for debugging/testing lets you (re)run sanitize (alreay incl. in html2txt filter by default)
|
92
|
+
sanitize_dir( "#{@repo_path}/tables" )
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
def make_schedules( cfg )
|
98
|
+
|
99
|
+
## note: return stats (for report eg. README)
|
100
|
+
stats = []
|
101
|
+
|
102
|
+
files = Dir[ "#{@repo_path}/tables/*.txt" ]
|
103
|
+
files.each do |file|
|
104
|
+
|
105
|
+
## todo/check/fix:
|
106
|
+
## use source: prop in rsssf page - why? why not???
|
107
|
+
## move year/season/basename into page ???
|
108
|
+
#
|
109
|
+
# assume every rsssf page has at least:
|
110
|
+
## - basename e.g. duit2014
|
111
|
+
## - year e.g. 2014 (numeric)
|
112
|
+
## - season (derived from config lookup???) - string e.g. 2014-15 or 2014 etc.
|
113
|
+
extname = File.extname( file )
|
114
|
+
basename = File.basename( file, extname )
|
115
|
+
year = year_from_name( basename )
|
116
|
+
season = year_to_season( year )
|
117
|
+
|
118
|
+
if cfg.includes && cfg.includes.include?( year ) == false
|
119
|
+
puts " skipping #{basename}; not listed in includes"
|
120
|
+
next
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
puts " reading >#{basename}<"
|
125
|
+
|
126
|
+
page = Page.from_file( file ) # note: always assume sources (already) converted to utf-8
|
127
|
+
|
128
|
+
if cfg.opts_for_year.is_a?( Hash )
|
129
|
+
opts = cfg.opts_for_year ## just use as is 1:1 (constant/same for all years)
|
130
|
+
else
|
131
|
+
## assume it's a proc/lambda (call to calculate)
|
132
|
+
opts = cfg.opts_for_year.call( year )
|
133
|
+
end
|
134
|
+
pp opts
|
135
|
+
|
136
|
+
schedule = page.find_schedule( opts )
|
137
|
+
## pp schedule
|
138
|
+
|
139
|
+
|
140
|
+
if cfg.dir_for_year.nil?
|
141
|
+
## use default setting, that is, archive for dir (e.g. archive/1980s/1985-86 etc.)
|
142
|
+
dir_for_year = archive_dir_for_year( year )
|
143
|
+
else
|
144
|
+
## assume it's a proc/lambda
|
145
|
+
dir_for_year = cfg.dir_for_year.call( year )
|
146
|
+
end
|
147
|
+
|
148
|
+
## -- cfg.name e.g. => 1-liga
|
149
|
+
|
150
|
+
dest_path = "#{@repo_path}/#{dir_for_year}/#{cfg.name}.txt"
|
151
|
+
puts " save to >#{dest_path}<"
|
152
|
+
FileUtils.mkdir_p( File.dirname( dest_path ))
|
153
|
+
schedule.save( dest_path )
|
154
|
+
|
155
|
+
rec = ScheduleStat.new
|
156
|
+
rec.path = dir_for_year
|
157
|
+
rec.filename = "#{cfg.name}.txt" ## change to basename - why?? why not??
|
158
|
+
rec.year = year
|
159
|
+
rec.season = season
|
160
|
+
rec.rounds = schedule.rounds
|
161
|
+
|
162
|
+
stats << rec
|
163
|
+
end
|
164
|
+
|
165
|
+
stats # return stats for reporting
|
166
|
+
end # method make_schedules
|
167
|
+
|
168
|
+
|
169
|
+
private
|
170
|
+
def patch_dir( root )
|
171
|
+
files = Dir[ "#{root}/*.txt" ]
|
172
|
+
## pp files
|
173
|
+
|
174
|
+
## sort files by year (latest first)
|
175
|
+
files = files.sort do |l,r|
|
176
|
+
lyear = year_from_file( l )
|
177
|
+
ryear = year_from_file( r )
|
178
|
+
|
179
|
+
ryear <=> lyear
|
180
|
+
end
|
181
|
+
|
182
|
+
files.each do |file|
|
183
|
+
txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
|
184
|
+
|
185
|
+
basename = File.basename( file, '.txt' ) ## e.g. duit92.txt => duit92
|
186
|
+
year = year_from_name( basename )
|
187
|
+
|
188
|
+
new_txt = yield( txt, basename, year )
|
189
|
+
## calculate hash to see if anything changed ?? why? why not??
|
190
|
+
|
191
|
+
File.open( file, 'w' ) do |f|
|
192
|
+
f.write new_txt
|
193
|
+
end
|
194
|
+
end # each file
|
195
|
+
end ## patch_dir
|
196
|
+
|
197
|
+
def sanitize_dir( root )
|
198
|
+
files = Dir[ "#{root}/*.txt" ]
|
199
|
+
|
200
|
+
files.each do |file|
|
201
|
+
txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
|
202
|
+
|
203
|
+
new_txt = sanitize( txt )
|
204
|
+
|
205
|
+
File.open( file, 'w' ) do |f|
|
206
|
+
f.write new_txt
|
207
|
+
end
|
208
|
+
end # each file
|
209
|
+
end ## sanitize_dir
|
210
|
+
|
211
|
+
|
212
|
+
end ## class Repo
|
213
|
+
end ## module Rsssf
|
214
|
+
|
215
|
+
## add (shortcut) alias
|
216
|
+
RsssfRepo = Rsssf::Repo
|
217
|
+
RsssfScheduleConfig = Rsssf::ScheduleConfig
|
218
|
+
RsssfScheduleStat = Rsssf::ScheduleStat
|
219
|
+
|
220
|
+
|