rsssf 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,151 @@
1
+
2
+ module Rsssf
3
+
4
+ ## end_year to slug_year
5
+ ## check if generic rule/convention in use ???
6
+ ## 2007-08: tablesd/duit08.html
7
+ ## 2008-09: tablesd/duit09.html
8
+ ## 2009-10: tablesd/duit2010.html
9
+ ## 2010-11: tablesd/duit2011.html
10
+ ## 2011-12: tablesd/duit2012.html
11
+
12
+
13
+ ## map country codes to table pages
14
+ ## add options about (char) encoding ??? - why? why not?
15
+ TABLE = {
16
+ 'eng' => ['tablese/eng{year}', { encoding: 'Windows-1252' } ],
17
+ 'es' => ['tabless/span{year}', { encoding: 'Windows-1252' } ],
18
+ 'de' => ['tablesd/duit{year}', { encoding: 'Windows-1252' } ],
19
+ 'at' => ['tableso/oost{year}', { encoding: 'Windows-1252' } ],
20
+ 'br' => [
21
+ ->(season) {
22
+ ## note: special slug/case for year/season 2000
23
+ ## see rsssf.org/tablesb/brazchamp.html
24
+ if season == Season('2000')
25
+ 'tablesb/braz-joao{year}' ## use braz-joao00 - why? why not?
26
+ else
27
+ 'tablesb/braz{year}'
28
+ end
29
+ }, { encoding: 'Windows-1252' } ],
30
+ }
31
+
32
+
33
+ BASE_URL = "https://rsssf.org"
34
+
35
+
36
+ def self.table_url( code, season: )
37
+ url, _ = table_url_and_encoding( code, season: season )
38
+ url
39
+ end
40
+
41
+ def self.table_url_and_encoding( code, season: )
42
+ season = Season( season )
43
+
44
+ table = TABLE[ code.downcase ]
45
+ tmpl = table[0]
46
+ tmpl = tmpl.call( season ) if tmpl.is_a?(Proc) ## check for proc
47
+
48
+ opts = table[1] || {}
49
+ encoding = opts[:encoding] || 'UTF-8'
50
+
51
+
52
+ slug = if season.end_year < 2010 ## cut off all digits (only keep last two)s
53
+ ## convert end_year to string with leading zero
54
+ '%02d' % (season.end_year % 100) ## e.g. 00 / 01 / 99 / 98 / 11 / etc.
55
+ else
56
+ '%4d' % season.end_year
57
+ end
58
+
59
+ tmpl = tmpl.sub( '{year}', slug )
60
+ url = "#{BASE_URL}/#{tmpl}.html"
61
+
62
+ [url, encoding]
63
+ end
64
+
65
+
66
+ def self.download_table( code, season: )
67
+ url, encoding = table_url_and_encoding( code, season: season )
68
+
69
+ download_page( url, encoding: encoding )
70
+ end
71
+
72
+
73
+ def self.download_page( url, encoding: )
74
+
75
+ ## note: assume plain 7-bit ascii for now
76
+ ## -- assume rsssf uses ISO_8859_15 (updated version of ISO_8859_1)
77
+ ###-- does NOT use utf-8 character encoding!!!
78
+ response = Webget.page( url, encoding: encoding ) ## fetch (and cache) html page (via HTTP GET)
79
+
80
+ ## note: exit on get / fetch error - do NOT continue for now - why? why not?
81
+ exit 1 if response.status.nok? ## e.g. HTTP status code != 200
82
+
83
+
84
+ puts "html:"
85
+ html = response.text( encoding: encoding )
86
+ pp html[0..400]
87
+ html
88
+ end
89
+ end # module Rsssf
90
+
91
+
92
+
93
+ __END__
94
+
95
+ 1998-99: tablesd/duit99.html
96
+ 1999-00: tablesd/duit00.html ## use 1999-2000 - why?? why not??
97
+ 2000-01: tablesd/duit01.html
98
+ 2001-02: tablesd/duit02.html
99
+ 2002-03: tablesd/duit03.html
100
+ 2003-04: tablesd/duit04.html
101
+ 2004-05: tablesd/duit05.html
102
+ 2005-06: tablesd/duit06.html
103
+ 2006-07: tablesd/duit07.html
104
+ 2007-08: tablesd/duit08.html
105
+ 2008-09: tablesd/duit09.html
106
+ 2009-10: tablesd/duit2010.html
107
+ 2010-11: tablesd/duit2011.html
108
+ 2011-12: tablesd/duit2012.html
109
+ 2012-13: tablesd/duit2013.html
110
+ 2013-14: tablesd/duit2014.html
111
+ 2014-15: tablesd/duit2015.html
112
+
113
+
114
+ 2010-11: tableso/oost2011.html
115
+ 2011-12: tableso/oost2012.html
116
+ 2012-13: tableso/oost2013.html
117
+ 2013-14: tableso/oost2014.html
118
+ 2014-15: tableso/oost2015.html
119
+ 2015-16: tableso/oost2016.html
120
+
121
+ 2011: tablesb/braz2011.html !! Windows-1252
122
+ 2012: tablesb/braz2012.html !! Windows-1252
123
+ 2013: tablesb/braz2013.html !! Windows-1252
124
+ 2014: tablesb/braz2014.html !! Windows-1252
125
+ 2015: tablesb/braz2015.html !! Windows-1252
126
+ 2016: tablesb/braz2016.html !! Windows-1252
127
+ 2017: tablesb/braz2017.html !! Windows-1252
128
+ 2018: tablesb/braz2018.html !! Windows-1252
129
+ 2019: tablesb/braz2019.html !! Windows-1252
130
+ 2020: tablesb/braz2020.html !! Windows-1252 ## 2020/21 - extended for corona
131
+ 2021: tablesb/braz2021.html !! Windows-1252
132
+ 2022: tablesb/braz2022.html !! Windows-1252
133
+ 2023: tablesb/braz2023.html !! Windows-1252
134
+ 2024: tablesb/braz2024.html !! Windows-1252
135
+
136
+ 2010-11: tablese/eng2011.html !! Windows-1252
137
+ 2011-12: tablese/eng2012.html !! Windows-1252
138
+ 2012-13: tablese/eng2013.html !! Windows-1252
139
+ 2013-14: tablese/eng2014.html !! Windows-1252
140
+ 2014-15: tablese/eng2015.html !! Windows-1252
141
+ 2015-16: tablese/eng2016.html !! Windows-1252
142
+ 2016-17: tablese/eng2017.html !! Windows-1252
143
+ 2017-18: tablese/eng2018.html !! Windows-1252
144
+ 2018-19: tablese/eng2019.html !! Windows-1252
145
+ 2019-20: tablese/eng2020.html !! Windows-1252
146
+ 2020-21: tablese/eng2021.html !! Windows-1252
147
+ 2021-22: tablese/eng2022.html !! Windows-1252
148
+ 2022-23: tablese/eng2023.html !! Windows-1252
149
+ 2023-24: tablese/eng2024.html !! Windows-1252
150
+
151
+
data/lib/rsssf/page.rb CHANGED
@@ -1,13 +1,11 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  module Rsssf
4
+
5
5
 
6
6
  PageStat = Struct.new(
7
- :source, ## e.g. http://rsssf.org/tabled/duit89.html
8
- :basename, ## e.g. duit89 -- note: filename w/o extension (and path)
7
+ :source, ## e.g. https://rsssf.org/tabled/duit89.html
9
8
  :year, ## e.g. 1989 -- note: always four digits
10
- :season, ## e.g. 1990-91 -- note: always a string (NOT a number)
11
9
  :authors,
12
10
  :last_updated,
13
11
  :line_count, ## todo: rename to (just) lines - why? why not?
@@ -27,24 +25,41 @@ module Rsssf
27
25
  class Page
28
26
 
29
27
  include Utils ## e.g. year_from_name, etc.
28
+
29
+ def self.read_cache( url ) ### use read_cache /web/html or such - why? why not?
30
+ html = Webcache.read( url )
31
+
32
+ puts "html:"
33
+ pp html[0..400]
34
+
35
+ txt = PageConverter.convert( html, url: url )
36
+ txt
30
37
 
31
- def self.from_url( src )
32
- txt = PageFetcher.new.fetch( src )
33
- self.from_string( txt )
38
+ new( txt )
34
39
  end
35
40
 
36
41
 
37
- def self.from_file( path )
38
- txt = File.read_utf8( path ) # note: always assume sources (already) converted to utf-8
39
- self.from_string( txt )
42
+ def self.read_txt( path ) ## use read_txt
43
+ # note: always assume sources (already) converted from html to txt!!!!
44
+ txt = read_text( path )
45
+ new( txt )
40
46
  end
41
47
 
42
- def self.from_string( txt )
43
- self.new( txt )
44
- end
45
-
48
+
49
+
50
+ ### use text alias too (for txt) - why? why not?
51
+ attr_accessor :txt
52
+
53
+ ## quick hack? used for auto-patch machinery
54
+ attr_accessor :patch
55
+ attr_accessor :url ### source url
56
+
57
+
46
58
  def initialize( txt )
47
59
  @txt = txt
60
+
61
+ @patch = nil
62
+ @url = nil
48
63
  end
49
64
 
50
65
 
@@ -61,17 +76,20 @@ CUP_ROUND_REGEX = /\b(
61
76
  Final
62
77
  )\b/ix
63
78
 
64
- def find_schedule( opts={} ) ## change to build_schedule - why? why not???
79
+
80
+
81
+ ## make header required - why? why not?
82
+ def find_schedule( header: nil,
83
+ cup: false ) ## change to build_schedule - why? why not???
65
84
 
66
85
  ## find match schedule/fixtures in multi-league doc
67
- new_txt = ''
86
+ new_txt = String.new
68
87
 
69
88
  ## note: keep track of statistics
70
89
  ## e.g. number of rounds found
71
90
 
72
91
  round_count = 0
73
92
 
74
- header = opts[:header]
75
93
  if header
76
94
  league_header_found = false
77
95
 
@@ -89,7 +107,8 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
89
107
  header_regex = /^
90
108
  ([#]{2,4}\s+(#{header_esc}))
91
109
  |
92
- (\*{2}(#{header_esc})\*{2})
110
+ (\*{2}(#{header_esc})) ## was: \*{2})
111
+ ## do not inluce trailing ** for now (allows anchors e.g. §)
93
112
  /ix
94
113
 
95
114
  ## todo:
@@ -105,7 +124,7 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
105
124
  ## pp header_regex
106
125
 
107
126
 
108
- if opts[:cup]
127
+ if cup
109
128
  round_regex = CUP_ROUND_REGEX ## note: only allow final, quaterfinals, etc. if knockout cup
110
129
  else
111
130
  round_regex = LEAGUE_ROUND_REGEX
@@ -128,8 +147,10 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
128
147
  if line =~ header_regex
129
148
  puts "!!! bingo - found header >#{line}<"
130
149
  league_header_found = true
131
- title = line.gsub( /[#*]/, '' ).strip ## quick hack: extract title from header
132
- new_txt << "## #{title}\n\n" # note: use header/stage title (regex group capture)
150
+
151
+ ## note - do NOT auto-add header/title !!!
152
+ # title = line.gsub( /[#*]/, '' ).strip ## quick hack: extract title from header
153
+ # new_txt << "## #{title}\n\n" # note: use header/stage title (regex group capture)
133
154
  else
134
155
  puts " searching for header >#{header}<; skipping line >#{line}<"
135
156
  next
@@ -205,13 +226,24 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
205
226
  end
206
227
  end # each line
207
228
 
208
- schedule = Schedule.from_string( new_txt )
209
- schedule.rounds = round_count
229
+
230
+ ## quick hack?
231
+ ### auto-apply patch if patch configured
232
+ if @patch && @patch.respond_to?(:on_patch)
233
+ url_path = URI.parse( url ).path
234
+ basename = File.basename( url_path, File.extname( url_path ))
235
+ year = year_from_name( basename )
236
+ new_txt = @patch.on_patch( new_txt, basename, year )
237
+ end
238
+
239
+ schedule = Schedule.new( new_txt )
240
+ ## schedule.rounds = round_count
210
241
 
211
242
  schedule
212
243
  end # method find_schedule
213
244
 
214
245
 
246
+
215
247
  def build_stat
216
248
  source = nil
217
249
  authors = nil
@@ -224,7 +256,7 @@ def build_stat
224
256
  end
225
257
 
226
258
  ##
227
- ## fix/todo: move authors n last updated whitespace cleanup to sanitize - why? why not??
259
+ ## fix/todo: move authors n last updated whitespace cleanup - why? why not??
228
260
 
229
261
  if @txt =~ /authors?:\s+(.+?)\s+last updated:\s+(\d{1,2} [a-z]{3,10} \d{4})/im
230
262
  last_updated = $2.to_s # note: save a copy first (gets "reset" by next regex)
@@ -235,7 +267,15 @@ def build_stat
235
267
  end
236
268
 
237
269
  puts "*** !!! missing source" if source.nil?
238
- puts "*** !!! missing authors n last updated" if authors.nil? || last_updated.nil?
270
+ puts "*** !!! missing authors and last updated" if authors.nil? || last_updated.nil?
271
+
272
+
273
+ ## get year from source (url)
274
+ url_path = URI.parse( source ).path
275
+ basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
276
+ puts " basename=>#{basename}<"
277
+ year = year_from_name( basename )
278
+
239
279
 
240
280
  sections = []
241
281
 
@@ -248,26 +288,16 @@ def build_stat
248
288
  ## todo: add more patterns? how? why?
249
289
  if line =~ /####\s+(.+)/
250
290
  puts " found section >#{$1}<"
251
- sections << $1.strip
291
+ ## remove anchors first e.g. ‹§sa› etc.
292
+ ## check if anchors with underscore (_) or dash/hyphen (-) ???
293
+ sections << $1.sub( /‹§[a-z0-9]+›/, '' ).strip
252
294
  end
253
295
  end
254
296
 
255
297
 
256
- # get path from url
257
- url = URI.parse( source )
258
- ## pp url
259
- ## puts url.host
260
- path = url.path
261
- extname = File.extname( path )
262
- basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
263
- year = year_from_name( basename )
264
- season = year_to_season( year )
265
-
266
298
  rec = PageStat.new
267
299
  rec.source = source # e.g. http://rsssf.org/tabled/duit89.html -- use source_url - why?? why not??
268
- rec.basename = basename # e.g. duit89
269
- rec.year = year # e.g. 89 => 1989 -- note: always four digits
270
- rec.season = season
300
+ rec.year = year
271
301
  rec.authors = authors
272
302
  rec.last_updated = last_updated
273
303
  rec.line_count = line_count
@@ -279,17 +309,12 @@ end ## method build_stat
279
309
 
280
310
 
281
311
  def save( path )
282
- File.open( path, 'w' ) do |f|
283
- f.write @txt
284
- end
312
+ write_text( path, @txt )
285
313
  end ## method save
286
314
 
287
315
  end ## class Page
288
316
  end ## module Rsssf
289
317
 
290
318
 
291
- ## add (shortcut) alias
292
- RsssfPageStat = Rsssf::PageStat
293
- RsssfPage = Rsssf::Page
294
319
 
295
320
 
data/lib/rsssf/repo.rb CHANGED
@@ -1,174 +1,115 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Rsssf
4
3
 
5
- ## used by Repo#make_schedules
6
- ScheduleConfig = Struct.new(
7
- :name,
8
- :opts_for_year, ## hash or proc ->(year){ Hash[...] }
9
- :dir_for_year, ## proc ->(year){ 'path_here'} ## rename to path_for_year - why, why not??
10
- :includes ## array of years to include e.g. [2011,2012] etc.
11
- )
12
-
13
-
14
- ScheduleStat = Struct.new(
15
- :path, ## e.g. 2012-13 or archive/1980s/1984-85
16
- :filename, ## e.g. 1-bundesliga.txt -- note: w/o path
17
- :year, ## e.g. 2013 -- note: numeric (integer)
18
- :season, ## e.g. 2012-13 -- note: is a string
19
- :rounds ## e.g. 36 -- note: numeric (integer)
20
- )
21
4
 
22
5
 
23
6
  class Repo
24
-
25
- include Filters ## e.g. sanitize, etc.
26
7
  include Utils ## e.g. year_from_file, etc.
27
8
 
28
9
 
29
- def initialize( path, opts ) ## pass in title etc.
10
+ def initialize( path, title: 'Your Title Here',
11
+ patch: nil )
30
12
  @repo_path = path
31
- @opts = opts
13
+ @title = title
14
+ @patch = patch
32
15
  end
33
16
 
34
17
 
35
- def fetch_pages
36
- puts "fetch_pages:"
37
- cfg = YAML.load_file( "#{@repo_path}/tables/config.yml")
38
- pp cfg
18
+ def root() @repo_path; end ## use/rename to path - why? why not?
19
+ alias_method :root_dir, :root
39
20
 
40
- dl_base = 'http://rsssf.com'
41
21
 
42
- cfg.each do |k,v|
43
- ## season = k # as string e.g. 2011-12 or 2011 etc.
44
- path = v # as string e.g. tablesd/duit2011.html
22
+ ## for now use single country repos - why? why not?
23
+ ## add support for all-in-one repos
24
+ def prepare_pages( code, seasons )
25
+ seasons.each do |season|
26
+ url = Rsssf.table_url( code, season: season )
45
27
 
46
- ## note: assumes extension is .html
47
- # e.g. tablesd/duit2011.html => duit2011
48
- basename = File.basename( path, '.html' )
28
+ ## check if not in cache
29
+ unless Webcache.cached?( url )
30
+ ## download - if not cached
31
+ Rsssf.download_table( code, season: season )
32
+ end
49
33
 
50
- src_url = "#{dl_base}/#{path}"
51
- dest_path = "#{@repo_path}/tables/#{basename}.txt"
34
+ page = Page.read_cache( url )
52
35
 
53
- page = Page.from_url( src_url )
54
- page.save( dest_path )
55
- end # each year
56
- end # method fetch_pages
36
+ url_path = URI.parse( url ).path
37
+ puts " url = >#{url}<"
38
+ puts " url_path = >#{url_path}<"
57
39
 
40
+ basename = File.basename( url_path, File.extname( url_path ))
58
41
 
59
- def make_pages_summary
60
- stats = []
42
+ ###
43
+ ## check for on_prepare (apply patches)
44
+ if @patch && @patch.respond_to?(:on_prepare)
45
+ year = year_from_name( basename )
46
+ page.txt = @patch.on_prepare( page.txt, basename, year )
47
+ end
61
48
 
62
- files = Dir[ "#{@repo_path}/tables/*.txt" ]
63
- files.each do |file|
64
- page = Page.from_file( file )
65
- stats << page.build_stat
66
- end
67
49
 
68
- ### save report as README.md in tables/ folder in repo
69
- report = PageReport.new( stats, @opts ) ## pass in title etc.
70
- report.save( "#{@repo_path}/tables/README.md" )
71
- end # method make_pages_summary
50
+ path = "#{@repo_path}/tables/#{basename}.txt"
51
+ page.save( path )
52
+ end
53
+ end # method prepare_pages
72
54
 
73
55
 
74
- def make_schedules_summary( stats ) ## note: requires stats to be passed in for now
75
- report = ScheduleReport.new( stats, @opts ) ## pass in title etc.
76
- report.save( "#{@repo_path}/README.md" )
77
- end # method make_schedules_summary
56
+ def each_page( code, seasons, &blk ) ## use each table or such - why? why not?
57
+ seasons.each do |season|
58
+ url = Rsssf.table_url( code, season: season )
59
+ url_path = URI.parse( url ).path
60
+ puts " url = >#{url}<"
61
+ puts " url_path = >#{url_path}<"
62
+ basename = File.basename( url_path, File.extname( url_path ))
78
63
 
64
+ path = "#{@repo_path}/tables/#{basename}.txt"
65
+ page = Page.read_txt( path )
79
66
 
67
+ ## add/pass along patcher if patcher
68
+ if @patch
69
+ page.patch = @patch
70
+ page.url = url
71
+ end
80
72
 
81
- def patch_pages( patcher )
82
- ## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.)
83
- patch_dir( "#{@repo_path}/tables" ) do |txt, name, year|
84
- puts "patching #{year} (#{name}) (#{@repo_path})..."
85
- patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt)
73
+ season = Season( season )
74
+ blk.call( season, page )
86
75
  end
87
- end ## method patch_pages
88
-
89
-
90
- def sanitize_pages
91
- ## for debugging/testing lets you (re)run sanitize (alreay incl. in html2txt filter by default)
92
- sanitize_dir( "#{@repo_path}/tables" )
93
76
  end
94
77
 
95
78
 
79
+ def make_pages_summary
80
+ files = Dir.glob( "#{@repo_path}/tables/*.txt" )
81
+ report = PageReport.build( files, title: @title ) ## pass in title etc.
96
82
 
97
- def make_schedules( cfg )
98
-
99
- ## note: return stats (for report eg. README)
100
- stats = []
101
-
102
- files = Dir[ "#{@repo_path}/tables/*.txt" ]
103
- files.each do |file|
104
-
105
- ## todo/check/fix:
106
- ## use source: prop in rsssf page - why? why not???
107
- ## move year/season/basename into page ???
108
- #
109
- # assume every rsssf page has at least:
110
- ## - basename e.g. duit2014
111
- ## - year e.g. 2014 (numeric)
112
- ## - season (derived from config lookup???) - string e.g. 2014-15 or 2014 etc.
113
- extname = File.extname( file )
114
- basename = File.basename( file, extname )
115
- year = year_from_name( basename )
116
- season = year_to_season( year )
117
-
118
- if cfg.includes && cfg.includes.include?( year ) == false
119
- puts " skipping #{basename}; not listed in includes"
120
- next
121
- end
122
-
123
-
124
- puts " reading >#{basename}<"
83
+ ### save report as README.md in tables/ folder in repo
84
+ report.save( "#{@repo_path}/tables/README.md" )
85
+ end # method make_pages_summary
125
86
 
126
- page = Page.from_file( file ) # note: always assume sources (already) converted to utf-8
127
87
 
128
- if cfg.opts_for_year.is_a?( Hash )
129
- opts = cfg.opts_for_year ## just use as is 1:1 (constant/same for all years)
130
- else
131
- ## assume it's a proc/lambda (call to calculate)
132
- opts = cfg.opts_for_year.call( year )
133
- end
134
- pp opts
135
-
136
- schedule = page.find_schedule( opts )
137
- ## pp schedule
138
-
139
-
140
- if cfg.dir_for_year.nil?
141
- ## use default setting, that is, archive for dir (e.g. archive/1980s/1985-86 etc.)
142
- dir_for_year = archive_dir_for_year( year )
143
- else
144
- ## assume it's a proc/lambda
145
- dir_for_year = cfg.dir_for_year.call( year )
146
- end
88
+ def make_schedules_summary
89
+ ## find all match datafiles
90
+ args = [@repo_path]
91
+ files = SportDb::Parser::Opts.expand_args( args )
92
+ pp files
93
+
94
+ report = ScheduleReport.build( files, title: @title,
95
+ patch: @patch ) ## pass in title etc.
96
+ report.save( "#{@repo_path}/README.md" )
97
+ end
147
98
 
148
- ## -- cfg.name e.g. => 1-liga
149
99
 
150
- dest_path = "#{@repo_path}/#{dir_for_year}/#{cfg.name}.txt"
151
- puts " save to >#{dest_path}<"
152
- FileUtils.mkdir_p( File.dirname( dest_path ))
153
- schedule.save( dest_path )
154
100
 
155
- rec = ScheduleStat.new
156
- rec.path = dir_for_year
157
- rec.filename = "#{cfg.name}.txt" ## change to basename - why?? why not??
158
- rec.year = year
159
- rec.season = season
160
- rec.rounds = schedule.rounds
161
101
 
162
- stats << rec
102
+ def patch_pages( patcher )
103
+ ## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.)
104
+ patch_dir( "#{@repo_path}/tables" ) do |txt, name, year|
105
+ puts "patching #{year} (#{name}) (#{@repo_path})..."
106
+ patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt)
163
107
  end
164
-
165
- stats # return stats for reporting
166
- end # method make_schedules
108
+ end ## method patch_pages
167
109
 
168
110
 
169
- private
170
- def patch_dir( root )
171
- files = Dir[ "#{root}/*.txt" ]
111
+ def patch_dir( root, &blk )
112
+ files = Dir.glob( "#{root}/**/*.txt" )
172
113
  ## pp files
173
114
 
174
115
  ## sort files by year (latest first)
@@ -180,41 +121,24 @@ def patch_dir( root )
180
121
  end
181
122
 
182
123
  files.each do |file|
183
- txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
124
+ txt = read_text( file ) ## note: assumes already converted to utf-8
184
125
 
185
126
  basename = File.basename( file, '.txt' ) ## e.g. duit92.txt => duit92
186
127
  year = year_from_name( basename )
187
128
 
188
- new_txt = yield( txt, basename, year )
189
- ## calculate hash to see if anything changed ?? why? why not??
129
+ new_txt = blk.call( txt, basename, year )
190
130
 
191
- File.open( file, 'w' ) do |f|
192
- f.write new_txt
131
+ ## calculate hash to see if anything changed ?? why? why not??
132
+ if txt != new_txt
133
+ puts " patching #{file}, text changed"
134
+ write_text( file, new_txt )
193
135
  end
194
136
  end # each file
195
137
  end ## patch_dir
196
138
 
197
- def sanitize_dir( root )
198
- files = Dir[ "#{root}/*.txt" ]
199
-
200
- files.each do |file|
201
- txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
202
-
203
- new_txt = sanitize( txt )
204
139
 
205
- File.open( file, 'w' ) do |f|
206
- f.write new_txt
207
- end
208
- end # each file
209
- end ## sanitize_dir
210
140
 
211
141
 
212
142
  end ## class Repo
213
143
  end ## module Rsssf
214
144
 
215
- ## add (shortcut) alias
216
- RsssfRepo = Rsssf::Repo
217
- RsssfScheduleConfig = Rsssf::ScheduleConfig
218
- RsssfScheduleStat = Rsssf::ScheduleStat
219
-
220
-