rsssf 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gemtest +0 -0
- data/Manifest.txt +11 -0
- data/README.md +171 -2
- data/lib/rsssf.rb +13 -0
- data/lib/rsssf/fetch.rb +80 -0
- data/lib/rsssf/html2txt.rb +157 -0
- data/lib/rsssf/page.rb +295 -0
- data/lib/rsssf/patch.rb +28 -0
- data/lib/rsssf/repo.rb +220 -0
- data/lib/rsssf/reports/page.rb +64 -0
- data/lib/rsssf/reports/schedule.rb +77 -0
- data/lib/rsssf/schedule.rb +31 -0
- data/lib/rsssf/utils.rb +75 -0
- data/lib/rsssf/version.rb +2 -2
- data/test/helper.rb +12 -0
- data/test/test_utils.rb +83 -0
- metadata +13 -1
data/lib/rsssf/page.rb
ADDED
@@ -0,0 +1,295 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module Rsssf
|
5
|
+
|
6
|
+
PageStat = Struct.new(
|
7
|
+
:source, ## e.g. http://rsssf.org/tabled/duit89.html
|
8
|
+
:basename, ## e.g. duit89 -- note: filename w/o extension (and path)
|
9
|
+
:year, ## e.g. 1989 -- note: always four digits
|
10
|
+
:season, ## e.g. 1990-91 -- note: always a string (NOT a number)
|
11
|
+
:authors,
|
12
|
+
:last_updated,
|
13
|
+
:line_count, ## todo: rename to (just) lines - why? why not?
|
14
|
+
:char_count, ## todo: rename to (just) char(ectar)s - why? why not?
|
15
|
+
:sections)
|
16
|
+
|
17
|
+
|
18
|
+
###
|
19
|
+
## note:
|
20
|
+
# a rsssf page may contain:
|
21
|
+
# many leagues, cups
|
22
|
+
# - tables, schedules (rounds), notes, etc.
|
23
|
+
#
|
24
|
+
# a rsssf page MUST be in plain text (.txt) and utf-8 character encoding assumed
|
25
|
+
#
|
26
|
+
|
27
|
+
class Page
|
28
|
+
|
29
|
+
include Utils ## e.g. year_from_name, etc.
|
30
|
+
|
31
|
+
def self.from_url( src )
|
32
|
+
txt = PageFetcher.new.fetch( src )
|
33
|
+
self.from_string( txt )
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def self.from_file( path )
|
38
|
+
txt = File.read_utf8( path ) # note: always assume sources (already) converted to utf-8
|
39
|
+
self.from_string( txt )
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.from_string( txt )
|
43
|
+
self.new( txt )
|
44
|
+
end
|
45
|
+
|
46
|
+
def initialize( txt )
|
47
|
+
@txt = txt
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
LEAGUE_ROUND_REGEX = /\b
|
52
|
+
Round
|
53
|
+
\b/ix
|
54
|
+
|
55
|
+
CUP_ROUND_REGEX = /\b(
|
56
|
+
Round |
|
57
|
+
1\/8\sFinals |
|
58
|
+
1\/16\sFinals |
|
59
|
+
Quarterfinals |
|
60
|
+
Semifinals |
|
61
|
+
Final
|
62
|
+
)\b/ix
|
63
|
+
|
64
|
+
def find_schedule( opts={} ) ## change to build_schedule - why? why not???
|
65
|
+
|
66
|
+
## find match schedule/fixtures in multi-league doc
|
67
|
+
new_txt = ''
|
68
|
+
|
69
|
+
## note: keep track of statistics
|
70
|
+
## e.g. number of rounds found
|
71
|
+
|
72
|
+
round_count = 0
|
73
|
+
|
74
|
+
header = opts[:header]
|
75
|
+
if header
|
76
|
+
league_header_found = false
|
77
|
+
|
78
|
+
## header:
|
79
|
+
## - assumes heading 4 e.g. #### Premier League or
|
80
|
+
## - bold e.g. **FA Cup** for now
|
81
|
+
## note: markers must start line (^)
|
82
|
+
|
83
|
+
## note:
|
84
|
+
## header gsub spaces to \s otherwise no match in regex (using free-form x-flag)!!!
|
85
|
+
header_esc = header.gsub( ' ', '\s' )
|
86
|
+
|
87
|
+
## note: somehow #{2,4} will not work with free-form /xi defined (picked up as comment?)
|
88
|
+
## use [#] hack ??
|
89
|
+
header_regex = /^
|
90
|
+
([#]{2,4}\s+(#{header_esc}))
|
91
|
+
|
|
92
|
+
(\*{2}(#{header_esc})\*{2})
|
93
|
+
/ix
|
94
|
+
|
95
|
+
## todo:
|
96
|
+
## use new stage_regex e.g. **xxx** - why? why not?
|
97
|
+
## allow more than one stage in one schedule (e.g. regular stage,playoff stage etc)
|
98
|
+
|
99
|
+
else
|
100
|
+
league_header_found = true # default (no header; assume single league file)
|
101
|
+
header_regex = /^---dummy---$/ ## non-matching dummy regex
|
102
|
+
end
|
103
|
+
|
104
|
+
## puts "header_regex:"
|
105
|
+
## pp header_regex
|
106
|
+
|
107
|
+
|
108
|
+
if opts[:cup]
|
109
|
+
round_regex = CUP_ROUND_REGEX ## note: only allow final, quaterfinals, etc. if knockout cup
|
110
|
+
else
|
111
|
+
round_regex = LEAGUE_ROUND_REGEX
|
112
|
+
end
|
113
|
+
|
114
|
+
|
115
|
+
## stages
|
116
|
+
first_round_header_found = false
|
117
|
+
round_header_found = false
|
118
|
+
round_body_found = false ## allow round header followed by blank lines
|
119
|
+
|
120
|
+
blank_found = false
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
@txt.each_line do |line|
|
125
|
+
|
126
|
+
if league_header_found == false
|
127
|
+
## first find start of league header/section
|
128
|
+
if line =~ header_regex
|
129
|
+
puts "!!! bingo - found header >#{line}<"
|
130
|
+
league_header_found = true
|
131
|
+
title = line.gsub( /[#*]/, '' ).strip ## quick hack: extract title from header
|
132
|
+
new_txt << "## #{title}\n\n" # note: use header/stage title (regex group capture)
|
133
|
+
else
|
134
|
+
puts " searching for header >#{header}<; skipping line >#{line}<"
|
135
|
+
next
|
136
|
+
end
|
137
|
+
elsif first_round_header_found == false
|
138
|
+
## next look for first round (starting w/ Round)
|
139
|
+
if line =~ round_regex
|
140
|
+
puts "!!! bingo - found first round >#{line}<"
|
141
|
+
round_count += 1
|
142
|
+
first_round_header_found = true
|
143
|
+
round_header_found = true
|
144
|
+
round_body_found = false
|
145
|
+
new_txt << line
|
146
|
+
elsif line =~ /^=-=-=-=/
|
147
|
+
puts "*** no rounds found; hit section marker (horizontal rule)"
|
148
|
+
break
|
149
|
+
elsif line =~ /^\*{2}[^*]+\*{2}/ ## e.g. **FA Cup**
|
150
|
+
puts "*** no rounds found; hit section/stage header: #{line}"
|
151
|
+
break
|
152
|
+
else
|
153
|
+
puts " searching for first round; skipping line >#{line}<"
|
154
|
+
next ## continue; searching
|
155
|
+
end
|
156
|
+
elsif round_header_found == true
|
157
|
+
## collect rounds;
|
158
|
+
## assume text block until next blank line
|
159
|
+
## new block must allways start w/ round
|
160
|
+
if line =~ /^\s*$/ ## blank line?
|
161
|
+
if round_body_found
|
162
|
+
round_header_found = false
|
163
|
+
blank_found = true ## keep track of blank (lines) - allow inside round block (can continue w/ date header/marker)
|
164
|
+
new_txt << line
|
165
|
+
else
|
166
|
+
## note: skip blanks following header
|
167
|
+
next
|
168
|
+
end
|
169
|
+
else
|
170
|
+
round_body_found = true
|
171
|
+
new_txt << line ## keep going until next blank line
|
172
|
+
end
|
173
|
+
else
|
174
|
+
## skip (more) blank lines
|
175
|
+
if line =~ /^\s*$/
|
176
|
+
next ## continue; skip extra blank line
|
177
|
+
elsif line =~ round_regex
|
178
|
+
puts "!!! bingo - found new round >#{line}<"
|
179
|
+
round_count += 1
|
180
|
+
round_header_found = true # more rounds; continue
|
181
|
+
round_body_found = false
|
182
|
+
blank_found = false # reset blank tracker
|
183
|
+
new_txt << line
|
184
|
+
elsif blank_found && line =~ /\[[a-z]{3} \d{1,2}\]/i ## e.g. [Mar 13] or [May 5] with leading blank line; continue round
|
185
|
+
puts "!!! bingo - continue round >#{line}<"
|
186
|
+
round_header_found = true
|
187
|
+
blank_found = false # reset blank tracker
|
188
|
+
new_txt << line
|
189
|
+
elsif blank_found && line =~ /First Legs|Second Legs/i
|
190
|
+
puts "!!! bingo - continue round >#{line}<"
|
191
|
+
round_header_found = true
|
192
|
+
blank_found = false # reset blank tracker
|
193
|
+
new_txt << line
|
194
|
+
elsif line =~ /=-=-=-=/
|
195
|
+
puts "!!! stop schedule; hit section marker (horizontal rule)"
|
196
|
+
break;
|
197
|
+
elsif line =~ /^\*{2}[^*]+\*{2}/ ## e.g. **FA Cup**
|
198
|
+
puts "!!! stop schedule; hit section/stage header: #{line}"
|
199
|
+
break
|
200
|
+
else
|
201
|
+
blank_found = false
|
202
|
+
puts "skipping line in schedule >#{line}<"
|
203
|
+
next # continue
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end # each line
|
207
|
+
|
208
|
+
schedule = Schedule.from_string( new_txt )
|
209
|
+
schedule.rounds = round_count
|
210
|
+
|
211
|
+
schedule
|
212
|
+
end # method find_schedule
|
213
|
+
|
214
|
+
|
215
|
+
def build_stat
|
216
|
+
source = nil
|
217
|
+
authors = nil
|
218
|
+
last_updated = nil
|
219
|
+
|
220
|
+
### find source ref
|
221
|
+
if @txt =~ /source: ([^ \n]+)/im
|
222
|
+
source = $1.to_s
|
223
|
+
puts "source: >#{source}<"
|
224
|
+
end
|
225
|
+
|
226
|
+
##
|
227
|
+
## fix/todo: move authors n last updated whitespace cleanup to sanitize - why? why not??
|
228
|
+
|
229
|
+
if @txt =~ /authors?:\s+(.+?)\s+last updated:\s+(\d{1,2} [a-z]{3,10} \d{4})/im
|
230
|
+
last_updated = $2.to_s # note: save a copy first (gets "reset" by next regex)
|
231
|
+
authors = $1.to_s.strip.gsub(/\s+/, ' ' ) # cleanup whitespace; squish-style
|
232
|
+
authors = authors.gsub( /[ ]*,[ ]*/, ', ' ) # prettify commas - always single space after comma (no space before)
|
233
|
+
puts "authors: >#{authors}<"
|
234
|
+
puts "last updated: >#{last_updated}<"
|
235
|
+
end
|
236
|
+
|
237
|
+
puts "*** !!! missing source" if source.nil?
|
238
|
+
puts "*** !!! missing authors n last updated" if authors.nil? || last_updated.nil?
|
239
|
+
|
240
|
+
sections = []
|
241
|
+
|
242
|
+
## count lines
|
243
|
+
line_count = 0
|
244
|
+
@txt.each_line do |line|
|
245
|
+
line_count +=1
|
246
|
+
|
247
|
+
### find sections
|
248
|
+
## todo: add more patterns? how? why?
|
249
|
+
if line =~ /####\s+(.+)/
|
250
|
+
puts " found section >#{$1}<"
|
251
|
+
sections << $1.strip
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
# get path from url
|
257
|
+
url = URI.parse( source )
|
258
|
+
## pp url
|
259
|
+
## puts url.host
|
260
|
+
path = url.path
|
261
|
+
extname = File.extname( path )
|
262
|
+
basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
|
263
|
+
year = year_from_name( basename )
|
264
|
+
season = year_to_season( year )
|
265
|
+
|
266
|
+
rec = PageStat.new
|
267
|
+
rec.source = source # e.g. http://rsssf.org/tabled/duit89.html -- use source_url - why?? why not??
|
268
|
+
rec.basename = basename # e.g. duit89
|
269
|
+
rec.year = year # e.g. 89 => 1989 -- note: always four digits
|
270
|
+
rec.season = season
|
271
|
+
rec.authors = authors
|
272
|
+
rec.last_updated = last_updated
|
273
|
+
rec.line_count = line_count
|
274
|
+
rec.char_count = @txt.size ## fix: use "true" char count not byte count
|
275
|
+
rec.sections = sections
|
276
|
+
|
277
|
+
rec
|
278
|
+
end ## method build_stat
|
279
|
+
|
280
|
+
|
281
|
+
def save( path )
|
282
|
+
File.open( path, 'w' ) do |f|
|
283
|
+
f.write @txt
|
284
|
+
end
|
285
|
+
end ## method save
|
286
|
+
|
287
|
+
end ## class Page
|
288
|
+
end ## module Rsssf
|
289
|
+
|
290
|
+
|
291
|
+
## add (shortcut) alias
|
292
|
+
RsssfPageStat = Rsssf::PageStat
|
293
|
+
RsssfPage = Rsssf::Page
|
294
|
+
|
295
|
+
|
data/lib/rsssf/patch.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Rsssf
|
4
|
+
|
5
|
+
class Patcher
|
6
|
+
|
7
|
+
## e.g. 2008/09
|
8
|
+
## note: also support 1999/2000
|
9
|
+
SEASON = '\d{4}\/(\d{2}|\d{4})' ## note: use single quotes - quotes do NOT get escaped (e.g. '\d' => "\\d")
|
10
|
+
|
11
|
+
def patch_heading( txt, rxs, title )
|
12
|
+
rxs.each do |rx|
|
13
|
+
txt = txt.sub( rx ) do |match|
|
14
|
+
match = match.gsub( "\n", '$$') ## change newlines to $$ for single-line outputs/dumps
|
15
|
+
puts " found heading >#{match}<"
|
16
|
+
"\n\n#### #{title}\n\n"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
txt
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
end # class Patcher
|
24
|
+
end ## module Rsssf
|
25
|
+
|
26
|
+
## add (shortcut) alias
|
27
|
+
RsssfPatcher = Rsssf::Patcher
|
28
|
+
|
data/lib/rsssf/repo.rb
ADDED
@@ -0,0 +1,220 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Rsssf
|
4
|
+
|
5
|
+
## used by Repo#make_schedules
|
6
|
+
ScheduleConfig = Struct.new(
|
7
|
+
:name,
|
8
|
+
:opts_for_year, ## hash or proc ->(year){ Hash[...] }
|
9
|
+
:dir_for_year, ## proc ->(year){ 'path_here'} ## rename to path_for_year - why, why not??
|
10
|
+
:includes ## array of years to include e.g. [2011,2012] etc.
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
ScheduleStat = Struct.new(
|
15
|
+
:path, ## e.g. 2012-13 or archive/1980s/1984-85
|
16
|
+
:filename, ## e.g. 1-bundesliga.txt -- note: w/o path
|
17
|
+
:year, ## e.g. 2013 -- note: numeric (integer)
|
18
|
+
:season, ## e.g. 2012-13 -- note: is a string
|
19
|
+
:rounds ## e.g. 36 -- note: numeric (integer)
|
20
|
+
)
|
21
|
+
|
22
|
+
|
23
|
+
class Repo
|
24
|
+
|
25
|
+
include Filters ## e.g. sanitize, etc.
|
26
|
+
include Utils ## e.g. year_from_file, etc.
|
27
|
+
|
28
|
+
|
29
|
+
def initialize( path, opts ) ## pass in title etc.
|
30
|
+
@repo_path = path
|
31
|
+
@opts = opts
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
def fetch_pages
|
36
|
+
puts "fetch_pages:"
|
37
|
+
cfg = YAML.load_file( "#{@repo_path}/tables/config.yml")
|
38
|
+
pp cfg
|
39
|
+
|
40
|
+
dl_base = 'http://rsssf.com'
|
41
|
+
|
42
|
+
cfg.each do |k,v|
|
43
|
+
## season = k # as string e.g. 2011-12 or 2011 etc.
|
44
|
+
path = v # as string e.g. tablesd/duit2011.html
|
45
|
+
|
46
|
+
## note: assumes extension is .html
|
47
|
+
# e.g. tablesd/duit2011.html => duit2011
|
48
|
+
basename = File.basename( path, '.html' )
|
49
|
+
|
50
|
+
src_url = "#{dl_base}/#{path}"
|
51
|
+
dest_path = "#{@repo_path}/tables/#{basename}.txt"
|
52
|
+
|
53
|
+
page = Page.from_url( src_url )
|
54
|
+
page.save( dest_path )
|
55
|
+
end # each year
|
56
|
+
end # method fetch_pages
|
57
|
+
|
58
|
+
|
59
|
+
def make_pages_summary
|
60
|
+
stats = []
|
61
|
+
|
62
|
+
files = Dir[ "#{@repo_path}/tables/*.txt" ]
|
63
|
+
files.each do |file|
|
64
|
+
page = Page.from_file( file )
|
65
|
+
stats << page.build_stat
|
66
|
+
end
|
67
|
+
|
68
|
+
### save report as README.md in tables/ folder in repo
|
69
|
+
report = PageReport.new( stats, @opts ) ## pass in title etc.
|
70
|
+
report.save( "#{@repo_path}/tables/README.md" )
|
71
|
+
end # method make_pages_summary
|
72
|
+
|
73
|
+
|
74
|
+
def make_schedules_summary( stats ) ## note: requires stats to be passed in for now
|
75
|
+
report = ScheduleReport.new( stats, @opts ) ## pass in title etc.
|
76
|
+
report.save( "#{@repo_path}/README.md" )
|
77
|
+
end # method make_schedules_summary
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
def patch_pages( patcher )
|
82
|
+
## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.)
|
83
|
+
patch_dir( "#{@repo_path}/tables" ) do |txt, name, year|
|
84
|
+
puts "patching #{year} (#{name}) (#{@repo_path})..."
|
85
|
+
patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt)
|
86
|
+
end
|
87
|
+
end ## method patch_pages
|
88
|
+
|
89
|
+
|
90
|
+
def sanitize_pages
|
91
|
+
## for debugging/testing lets you (re)run sanitize (alreay incl. in html2txt filter by default)
|
92
|
+
sanitize_dir( "#{@repo_path}/tables" )
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
def make_schedules( cfg )
|
98
|
+
|
99
|
+
## note: return stats (for report eg. README)
|
100
|
+
stats = []
|
101
|
+
|
102
|
+
files = Dir[ "#{@repo_path}/tables/*.txt" ]
|
103
|
+
files.each do |file|
|
104
|
+
|
105
|
+
## todo/check/fix:
|
106
|
+
## use source: prop in rsssf page - why? why not???
|
107
|
+
## move year/season/basename into page ???
|
108
|
+
#
|
109
|
+
# assume every rsssf page has at least:
|
110
|
+
## - basename e.g. duit2014
|
111
|
+
## - year e.g. 2014 (numeric)
|
112
|
+
## - season (derived from config lookup???) - string e.g. 2014-15 or 2014 etc.
|
113
|
+
extname = File.extname( file )
|
114
|
+
basename = File.basename( file, extname )
|
115
|
+
year = year_from_name( basename )
|
116
|
+
season = year_to_season( year )
|
117
|
+
|
118
|
+
if cfg.includes && cfg.includes.include?( year ) == false
|
119
|
+
puts " skipping #{basename}; not listed in includes"
|
120
|
+
next
|
121
|
+
end
|
122
|
+
|
123
|
+
|
124
|
+
puts " reading >#{basename}<"
|
125
|
+
|
126
|
+
page = Page.from_file( file ) # note: always assume sources (already) converted to utf-8
|
127
|
+
|
128
|
+
if cfg.opts_for_year.is_a?( Hash )
|
129
|
+
opts = cfg.opts_for_year ## just use as is 1:1 (constant/same for all years)
|
130
|
+
else
|
131
|
+
## assume it's a proc/lambda (call to calculate)
|
132
|
+
opts = cfg.opts_for_year.call( year )
|
133
|
+
end
|
134
|
+
pp opts
|
135
|
+
|
136
|
+
schedule = page.find_schedule( opts )
|
137
|
+
## pp schedule
|
138
|
+
|
139
|
+
|
140
|
+
if cfg.dir_for_year.nil?
|
141
|
+
## use default setting, that is, archive for dir (e.g. archive/1980s/1985-86 etc.)
|
142
|
+
dir_for_year = archive_dir_for_year( year )
|
143
|
+
else
|
144
|
+
## assume it's a proc/lambda
|
145
|
+
dir_for_year = cfg.dir_for_year.call( year )
|
146
|
+
end
|
147
|
+
|
148
|
+
## -- cfg.name e.g. => 1-liga
|
149
|
+
|
150
|
+
dest_path = "#{@repo_path}/#{dir_for_year}/#{cfg.name}.txt"
|
151
|
+
puts " save to >#{dest_path}<"
|
152
|
+
FileUtils.mkdir_p( File.dirname( dest_path ))
|
153
|
+
schedule.save( dest_path )
|
154
|
+
|
155
|
+
rec = ScheduleStat.new
|
156
|
+
rec.path = dir_for_year
|
157
|
+
rec.filename = "#{cfg.name}.txt" ## change to basename - why?? why not??
|
158
|
+
rec.year = year
|
159
|
+
rec.season = season
|
160
|
+
rec.rounds = schedule.rounds
|
161
|
+
|
162
|
+
stats << rec
|
163
|
+
end
|
164
|
+
|
165
|
+
stats # return stats for reporting
|
166
|
+
end # method make_schedules
|
167
|
+
|
168
|
+
|
169
|
+
private
|
170
|
+
def patch_dir( root )
|
171
|
+
files = Dir[ "#{root}/*.txt" ]
|
172
|
+
## pp files
|
173
|
+
|
174
|
+
## sort files by year (latest first)
|
175
|
+
files = files.sort do |l,r|
|
176
|
+
lyear = year_from_file( l )
|
177
|
+
ryear = year_from_file( r )
|
178
|
+
|
179
|
+
ryear <=> lyear
|
180
|
+
end
|
181
|
+
|
182
|
+
files.each do |file|
|
183
|
+
txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
|
184
|
+
|
185
|
+
basename = File.basename( file, '.txt' ) ## e.g. duit92.txt => duit92
|
186
|
+
year = year_from_name( basename )
|
187
|
+
|
188
|
+
new_txt = yield( txt, basename, year )
|
189
|
+
## calculate hash to see if anything changed ?? why? why not??
|
190
|
+
|
191
|
+
File.open( file, 'w' ) do |f|
|
192
|
+
f.write new_txt
|
193
|
+
end
|
194
|
+
end # each file
|
195
|
+
end ## patch_dir
|
196
|
+
|
197
|
+
def sanitize_dir( root )
|
198
|
+
files = Dir[ "#{root}/*.txt" ]
|
199
|
+
|
200
|
+
files.each do |file|
|
201
|
+
txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
|
202
|
+
|
203
|
+
new_txt = sanitize( txt )
|
204
|
+
|
205
|
+
File.open( file, 'w' ) do |f|
|
206
|
+
f.write new_txt
|
207
|
+
end
|
208
|
+
end # each file
|
209
|
+
end ## sanitize_dir
|
210
|
+
|
211
|
+
|
212
|
+
end ## class Repo
|
213
|
+
end ## module Rsssf
|
214
|
+
|
215
|
+
## add (shortcut) alias
|
216
|
+
RsssfRepo = Rsssf::Repo
|
217
|
+
RsssfScheduleConfig = Rsssf::ScheduleConfig
|
218
|
+
RsssfScheduleStat = Rsssf::ScheduleStat
|
219
|
+
|
220
|
+
|