rsssf 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+
2
+ module Rsssf
3
+
4
+ ## end_year to slug_year
5
+ ## check if generic rule/convention in use ???
6
+ ## 2007-08: tablesd/duit08.html
7
+ ## 2008-09: tablesd/duit09.html
8
+ ## 2009-10: tablesd/duit2010.html
9
+ ## 2010-11: tablesd/duit2011.html
10
+ ## 2011-12: tablesd/duit2012.html
11
+
12
+
13
+ ## map country codes to table pages
14
+ ## add options about (char) encoding ??? - why? why not?
15
+ TABLE = {
16
+ 'eng' => ['tablese/eng{year}', { encoding: 'Windows-1252' } ],
17
+ 'es' => ['tabless/span{year}', { encoding: 'Windows-1252' } ],
18
+ 'de' => ['tablesd/duit{year}', { encoding: 'Windows-1252' } ],
19
+ 'at' => ['tableso/oost{year}', { encoding: 'Windows-1252' } ],
20
+ 'br' => [
21
+ ->(season) {
22
+ ## note: special slug/case for year/season 2000
23
+ ## see rsssf.org/tablesb/brazchamp.html
24
+ if season == Season('2000')
25
+ 'tablesb/braz-joao{year}' ## use braz-joao00 - why? why not?
26
+ else
27
+ 'tablesb/braz{year}'
28
+ end
29
+ }, { encoding: 'Windows-1252' } ],
30
+ }
31
+
32
+
33
+ BASE_URL = "https://rsssf.org"
34
+
35
+
36
+ def self.table_url( code, season: )
37
+ url, _ = table_url_and_encoding( code, season: season )
38
+ url
39
+ end
40
+
41
+ def self.table_url_and_encoding( code, season: )
42
+ season = Season( season )
43
+
44
+ table = TABLE[ code.downcase ]
45
+ tmpl = table[0]
46
+ tmpl = tmpl.call( season ) if tmpl.is_a?(Proc) ## check for proc
47
+
48
+ opts = table[1] || {}
49
+ encoding = opts[:encoding] || 'UTF-8'
50
+
51
+
52
+ slug = if season.end_year < 2010 ## cut off all digits (only keep last two)s
53
+ ## convert end_year to string with leading zero
54
+ '%02d' % (season.end_year % 100) ## e.g. 00 / 01 / 99 / 98 / 11 / etc.
55
+ else
56
+ '%4d' % season.end_year
57
+ end
58
+
59
+ tmpl = tmpl.sub( '{year}', slug )
60
+ url = "#{BASE_URL}/#{tmpl}.html"
61
+
62
+ [url, encoding]
63
+ end
64
+
65
+
66
+ def self.download_table( code, season: )
67
+ url, encoding = table_url_and_encoding( code, season: season )
68
+
69
+ download_page( url, encoding: encoding )
70
+ end
71
+
72
+
73
+ def self.download_page( url, encoding: )
74
+
75
+ ## note: assume plain 7-bit ascii for now
76
+ ## -- assume rsssf uses ISO_8859_15 (updated version of ISO_8859_1)
77
+ ###-- does NOT use utf-8 character encoding!!!
78
+ response = Webget.page( url, encoding: encoding ) ## fetch (and cache) html page (via HTTP GET)
79
+
80
+ ## note: exit on get / fetch error - do NOT continue for now - why? why not?
81
+ exit 1 if response.status.nok? ## e.g. HTTP status code != 200
82
+
83
+
84
+ puts "html:"
85
+ html = response.text( encoding: encoding )
86
+ pp html[0..400]
87
+ html
88
+ end
89
+ end # module Rsssf
90
+
91
+
92
+
93
+ __END__
94
+
95
+ 1998-99: tablesd/duit99.html
96
+ 1999-00: tablesd/duit00.html ## use 1999-2000 - why?? why not??
97
+ 2000-01: tablesd/duit01.html
98
+ 2001-02: tablesd/duit02.html
99
+ 2002-03: tablesd/duit03.html
100
+ 2003-04: tablesd/duit04.html
101
+ 2004-05: tablesd/duit05.html
102
+ 2005-06: tablesd/duit06.html
103
+ 2006-07: tablesd/duit07.html
104
+ 2007-08: tablesd/duit08.html
105
+ 2008-09: tablesd/duit09.html
106
+ 2009-10: tablesd/duit2010.html
107
+ 2010-11: tablesd/duit2011.html
108
+ 2011-12: tablesd/duit2012.html
109
+ 2012-13: tablesd/duit2013.html
110
+ 2013-14: tablesd/duit2014.html
111
+ 2014-15: tablesd/duit2015.html
112
+
113
+
114
+ 2010-11: tableso/oost2011.html
115
+ 2011-12: tableso/oost2012.html
116
+ 2012-13: tableso/oost2013.html
117
+ 2013-14: tableso/oost2014.html
118
+ 2014-15: tableso/oost2015.html
119
+ 2015-16: tableso/oost2016.html
120
+
121
+ 2011: tablesb/braz2011.html !! Windows-1252
122
+ 2012: tablesb/braz2012.html !! Windows-1252
123
+ 2013: tablesb/braz2013.html !! Windows-1252
124
+ 2014: tablesb/braz2014.html !! Windows-1252
125
+ 2015: tablesb/braz2015.html !! Windows-1252
126
+ 2016: tablesb/braz2016.html !! Windows-1252
127
+ 2017: tablesb/braz2017.html !! Windows-1252
128
+ 2018: tablesb/braz2018.html !! Windows-1252
129
+ 2019: tablesb/braz2019.html !! Windows-1252
130
+ 2020: tablesb/braz2020.html !! Windows-1252 ## 2020/21 - extended for corona
131
+ 2021: tablesb/braz2021.html !! Windows-1252
132
+ 2022: tablesb/braz2022.html !! Windows-1252
133
+ 2023: tablesb/braz2023.html !! Windows-1252
134
+ 2024: tablesb/braz2024.html !! Windows-1252
135
+
136
+ 2010-11: tablese/eng2011.html !! Windows-1252
137
+ 2011-12: tablese/eng2012.html !! Windows-1252
138
+ 2012-13: tablese/eng2013.html !! Windows-1252
139
+ 2013-14: tablese/eng2014.html !! Windows-1252
140
+ 2014-15: tablese/eng2015.html !! Windows-1252
141
+ 2015-16: tablese/eng2016.html !! Windows-1252
142
+ 2016-17: tablese/eng2017.html !! Windows-1252
143
+ 2017-18: tablese/eng2018.html !! Windows-1252
144
+ 2018-19: tablese/eng2019.html !! Windows-1252
145
+ 2019-20: tablese/eng2020.html !! Windows-1252
146
+ 2020-21: tablese/eng2021.html !! Windows-1252
147
+ 2021-22: tablese/eng2022.html !! Windows-1252
148
+ 2022-23: tablese/eng2023.html !! Windows-1252
149
+ 2023-24: tablese/eng2024.html !! Windows-1252
150
+
151
+
data/lib/rsssf/page.rb CHANGED
@@ -1,13 +1,11 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  module Rsssf
4
+
5
5
 
6
6
  PageStat = Struct.new(
7
- :source, ## e.g. http://rsssf.org/tabled/duit89.html
8
- :basename, ## e.g. duit89 -- note: filename w/o extension (and path)
7
+ :source, ## e.g. https://rsssf.org/tabled/duit89.html
9
8
  :year, ## e.g. 1989 -- note: always four digits
10
- :season, ## e.g. 1990-91 -- note: always a string (NOT a number)
11
9
  :authors,
12
10
  :last_updated,
13
11
  :line_count, ## todo: rename to (just) lines - why? why not?
@@ -27,24 +25,41 @@ module Rsssf
27
25
  class Page
28
26
 
29
27
  include Utils ## e.g. year_from_name, etc.
28
+
29
+ def self.read_cache( url ) ### use read_cache /web/html or such - why? why not?
30
+ html = Webcache.read( url )
31
+
32
+ puts "html:"
33
+ pp html[0..400]
34
+
35
+ txt = PageConverter.convert( html, url: url )
36
+ txt
30
37
 
31
- def self.from_url( src )
32
- txt = PageFetcher.new.fetch( src )
33
- self.from_string( txt )
38
+ new( txt )
34
39
  end
35
40
 
36
41
 
37
- def self.from_file( path )
38
- txt = File.read_utf8( path ) # note: always assume sources (already) converted to utf-8
39
- self.from_string( txt )
42
+ def self.read_txt( path ) ## use read_txt
43
+ # note: always assume sources (already) converted from html to txt!!!!
44
+ txt = read_text( path )
45
+ new( txt )
40
46
  end
41
47
 
42
- def self.from_string( txt )
43
- self.new( txt )
44
- end
45
-
48
+
49
+
50
+ ### use text alias too (for txt) - why? why not?
51
+ attr_accessor :txt
52
+
53
+ ## quick hack? used for auto-patch machinery
54
+ attr_accessor :patch
55
+ attr_accessor :url ### source url
56
+
57
+
46
58
  def initialize( txt )
47
59
  @txt = txt
60
+
61
+ @patch = nil
62
+ @url = nil
48
63
  end
49
64
 
50
65
 
@@ -61,17 +76,20 @@ CUP_ROUND_REGEX = /\b(
61
76
  Final
62
77
  )\b/ix
63
78
 
64
- def find_schedule( opts={} ) ## change to build_schedule - why? why not???
79
+
80
+
81
+ ## make header required - why? why not?
82
+ def find_schedule( header: nil,
83
+ cup: false ) ## change to build_schedule - why? why not???
65
84
 
66
85
  ## find match schedule/fixtures in multi-league doc
67
- new_txt = ''
86
+ new_txt = String.new
68
87
 
69
88
  ## note: keep track of statistics
70
89
  ## e.g. number of rounds found
71
90
 
72
91
  round_count = 0
73
92
 
74
- header = opts[:header]
75
93
  if header
76
94
  league_header_found = false
77
95
 
@@ -89,7 +107,8 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
89
107
  header_regex = /^
90
108
  ([#]{2,4}\s+(#{header_esc}))
91
109
  |
92
- (\*{2}(#{header_esc})\*{2})
110
+ (\*{2}(#{header_esc})) ## was: \*{2})
111
+ ## do not inluce trailing ** for now (allows anchors e.g. §)
93
112
  /ix
94
113
 
95
114
  ## todo:
@@ -105,7 +124,7 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
105
124
  ## pp header_regex
106
125
 
107
126
 
108
- if opts[:cup]
127
+ if cup
109
128
  round_regex = CUP_ROUND_REGEX ## note: only allow final, quaterfinals, etc. if knockout cup
110
129
  else
111
130
  round_regex = LEAGUE_ROUND_REGEX
@@ -128,8 +147,10 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
128
147
  if line =~ header_regex
129
148
  puts "!!! bingo - found header >#{line}<"
130
149
  league_header_found = true
131
- title = line.gsub( /[#*]/, '' ).strip ## quick hack: extract title from header
132
- new_txt << "## #{title}\n\n" # note: use header/stage title (regex group capture)
150
+
151
+ ## note - do NOT auto-add header/title !!!
152
+ # title = line.gsub( /[#*]/, '' ).strip ## quick hack: extract title from header
153
+ # new_txt << "## #{title}\n\n" # note: use header/stage title (regex group capture)
133
154
  else
134
155
  puts " searching for header >#{header}<; skipping line >#{line}<"
135
156
  next
@@ -205,13 +226,24 @@ def find_schedule( opts={} ) ## change to build_schedule - why? why not???
205
226
  end
206
227
  end # each line
207
228
 
208
- schedule = Schedule.from_string( new_txt )
209
- schedule.rounds = round_count
229
+
230
+ ## quick hack?
231
+ ### auto-apply patch if patch configured
232
+ if @patch && @patch.respond_to?(:on_patch)
233
+ url_path = URI.parse( url ).path
234
+ basename = File.basename( url_path, File.extname( url_path ))
235
+ year = year_from_name( basename )
236
+ new_txt = @patch.on_patch( new_txt, basename, year )
237
+ end
238
+
239
+ schedule = Schedule.new( new_txt )
240
+ ## schedule.rounds = round_count
210
241
 
211
242
  schedule
212
243
  end # method find_schedule
213
244
 
214
245
 
246
+
215
247
  def build_stat
216
248
  source = nil
217
249
  authors = nil
@@ -224,7 +256,7 @@ def build_stat
224
256
  end
225
257
 
226
258
  ##
227
- ## fix/todo: move authors n last updated whitespace cleanup to sanitize - why? why not??
259
+ ## fix/todo: move authors n last updated whitespace cleanup - why? why not??
228
260
 
229
261
  if @txt =~ /authors?:\s+(.+?)\s+last updated:\s+(\d{1,2} [a-z]{3,10} \d{4})/im
230
262
  last_updated = $2.to_s # note: save a copy first (gets "reset" by next regex)
@@ -235,7 +267,15 @@ def build_stat
235
267
  end
236
268
 
237
269
  puts "*** !!! missing source" if source.nil?
238
- puts "*** !!! missing authors n last updated" if authors.nil? || last_updated.nil?
270
+ puts "*** !!! missing authors and last updated" if authors.nil? || last_updated.nil?
271
+
272
+
273
+ ## get year from source (url)
274
+ url_path = URI.parse( source ).path
275
+ basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
276
+ puts " basename=>#{basename}<"
277
+ year = year_from_name( basename )
278
+
239
279
 
240
280
  sections = []
241
281
 
@@ -248,26 +288,16 @@ def build_stat
248
288
  ## todo: add more patterns? how? why?
249
289
  if line =~ /####\s+(.+)/
250
290
  puts " found section >#{$1}<"
251
- sections << $1.strip
291
+ ## remove anchors first e.g. ‹§sa› etc.
292
+ ## check if anchors with underscore (_) or dash/hyphen (-) ???
293
+ sections << $1.sub( /‹§[a-z0-9]+›/, '' ).strip
252
294
  end
253
295
  end
254
296
 
255
297
 
256
- # get path from url
257
- url = URI.parse( source )
258
- ## pp url
259
- ## puts url.host
260
- path = url.path
261
- extname = File.extname( path )
262
- basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
263
- year = year_from_name( basename )
264
- season = year_to_season( year )
265
-
266
298
  rec = PageStat.new
267
299
  rec.source = source # e.g. http://rsssf.org/tabled/duit89.html -- use source_url - why?? why not??
268
- rec.basename = basename # e.g. duit89
269
- rec.year = year # e.g. 89 => 1989 -- note: always four digits
270
- rec.season = season
300
+ rec.year = year
271
301
  rec.authors = authors
272
302
  rec.last_updated = last_updated
273
303
  rec.line_count = line_count
@@ -279,17 +309,12 @@ end ## method build_stat
279
309
 
280
310
 
281
311
  def save( path )
282
- File.open( path, 'w' ) do |f|
283
- f.write @txt
284
- end
312
+ write_text( path, @txt )
285
313
  end ## method save
286
314
 
287
315
  end ## class Page
288
316
  end ## module Rsssf
289
317
 
290
318
 
291
- ## add (shortcut) alias
292
- RsssfPageStat = Rsssf::PageStat
293
- RsssfPage = Rsssf::Page
294
319
 
295
320
 
data/lib/rsssf/repo.rb CHANGED
@@ -1,174 +1,115 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Rsssf
4
3
 
5
- ## used by Repo#make_schedules
6
- ScheduleConfig = Struct.new(
7
- :name,
8
- :opts_for_year, ## hash or proc ->(year){ Hash[...] }
9
- :dir_for_year, ## proc ->(year){ 'path_here'} ## rename to path_for_year - why, why not??
10
- :includes ## array of years to include e.g. [2011,2012] etc.
11
- )
12
-
13
-
14
- ScheduleStat = Struct.new(
15
- :path, ## e.g. 2012-13 or archive/1980s/1984-85
16
- :filename, ## e.g. 1-bundesliga.txt -- note: w/o path
17
- :year, ## e.g. 2013 -- note: numeric (integer)
18
- :season, ## e.g. 2012-13 -- note: is a string
19
- :rounds ## e.g. 36 -- note: numeric (integer)
20
- )
21
4
 
22
5
 
23
6
  class Repo
24
-
25
- include Filters ## e.g. sanitize, etc.
26
7
  include Utils ## e.g. year_from_file, etc.
27
8
 
28
9
 
29
- def initialize( path, opts ) ## pass in title etc.
10
+ def initialize( path, title: 'Your Title Here',
11
+ patch: nil )
30
12
  @repo_path = path
31
- @opts = opts
13
+ @title = title
14
+ @patch = patch
32
15
  end
33
16
 
34
17
 
35
- def fetch_pages
36
- puts "fetch_pages:"
37
- cfg = YAML.load_file( "#{@repo_path}/tables/config.yml")
38
- pp cfg
18
+ def root() @repo_path; end ## use/rename to path - why? why not?
19
+ alias_method :root_dir, :root
39
20
 
40
- dl_base = 'http://rsssf.com'
41
21
 
42
- cfg.each do |k,v|
43
- ## season = k # as string e.g. 2011-12 or 2011 etc.
44
- path = v # as string e.g. tablesd/duit2011.html
22
+ ## for now use single country repos - why? why not?
23
+ ## add support for all-in-one repos
24
+ def prepare_pages( code, seasons )
25
+ seasons.each do |season|
26
+ url = Rsssf.table_url( code, season: season )
45
27
 
46
- ## note: assumes extension is .html
47
- # e.g. tablesd/duit2011.html => duit2011
48
- basename = File.basename( path, '.html' )
28
+ ## check if not in cache
29
+ unless Webcache.cached?( url )
30
+ ## download - if not cached
31
+ Rsssf.download_table( code, season: season )
32
+ end
49
33
 
50
- src_url = "#{dl_base}/#{path}"
51
- dest_path = "#{@repo_path}/tables/#{basename}.txt"
34
+ page = Page.read_cache( url )
52
35
 
53
- page = Page.from_url( src_url )
54
- page.save( dest_path )
55
- end # each year
56
- end # method fetch_pages
36
+ url_path = URI.parse( url ).path
37
+ puts " url = >#{url}<"
38
+ puts " url_path = >#{url_path}<"
57
39
 
40
+ basename = File.basename( url_path, File.extname( url_path ))
58
41
 
59
- def make_pages_summary
60
- stats = []
42
+ ###
43
+ ## check for on_prepare (apply patches)
44
+ if @patch && @patch.respond_to?(:on_prepare)
45
+ year = year_from_name( basename )
46
+ page.txt = @patch.on_prepare( page.txt, basename, year )
47
+ end
61
48
 
62
- files = Dir[ "#{@repo_path}/tables/*.txt" ]
63
- files.each do |file|
64
- page = Page.from_file( file )
65
- stats << page.build_stat
66
- end
67
49
 
68
- ### save report as README.md in tables/ folder in repo
69
- report = PageReport.new( stats, @opts ) ## pass in title etc.
70
- report.save( "#{@repo_path}/tables/README.md" )
71
- end # method make_pages_summary
50
+ path = "#{@repo_path}/tables/#{basename}.txt"
51
+ page.save( path )
52
+ end
53
+ end # method prepare_pages
72
54
 
73
55
 
74
- def make_schedules_summary( stats ) ## note: requires stats to be passed in for now
75
- report = ScheduleReport.new( stats, @opts ) ## pass in title etc.
76
- report.save( "#{@repo_path}/README.md" )
77
- end # method make_schedules_summary
56
+ def each_page( code, seasons, &blk ) ## use each table or such - why? why not?
57
+ seasons.each do |season|
58
+ url = Rsssf.table_url( code, season: season )
59
+ url_path = URI.parse( url ).path
60
+ puts " url = >#{url}<"
61
+ puts " url_path = >#{url_path}<"
62
+ basename = File.basename( url_path, File.extname( url_path ))
78
63
 
64
+ path = "#{@repo_path}/tables/#{basename}.txt"
65
+ page = Page.read_txt( path )
79
66
 
67
+ ## add/pass along patcher if patcher
68
+ if @patch
69
+ page.patch = @patch
70
+ page.url = url
71
+ end
80
72
 
81
- def patch_pages( patcher )
82
- ## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.)
83
- patch_dir( "#{@repo_path}/tables" ) do |txt, name, year|
84
- puts "patching #{year} (#{name}) (#{@repo_path})..."
85
- patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt)
73
+ season = Season( season )
74
+ blk.call( season, page )
86
75
  end
87
- end ## method patch_pages
88
-
89
-
90
- def sanitize_pages
91
- ## for debugging/testing lets you (re)run sanitize (alreay incl. in html2txt filter by default)
92
- sanitize_dir( "#{@repo_path}/tables" )
93
76
  end
94
77
 
95
78
 
79
+ def make_pages_summary
80
+ files = Dir.glob( "#{@repo_path}/tables/*.txt" )
81
+ report = PageReport.build( files, title: @title ) ## pass in title etc.
96
82
 
97
- def make_schedules( cfg )
98
-
99
- ## note: return stats (for report eg. README)
100
- stats = []
101
-
102
- files = Dir[ "#{@repo_path}/tables/*.txt" ]
103
- files.each do |file|
104
-
105
- ## todo/check/fix:
106
- ## use source: prop in rsssf page - why? why not???
107
- ## move year/season/basename into page ???
108
- #
109
- # assume every rsssf page has at least:
110
- ## - basename e.g. duit2014
111
- ## - year e.g. 2014 (numeric)
112
- ## - season (derived from config lookup???) - string e.g. 2014-15 or 2014 etc.
113
- extname = File.extname( file )
114
- basename = File.basename( file, extname )
115
- year = year_from_name( basename )
116
- season = year_to_season( year )
117
-
118
- if cfg.includes && cfg.includes.include?( year ) == false
119
- puts " skipping #{basename}; not listed in includes"
120
- next
121
- end
122
-
123
-
124
- puts " reading >#{basename}<"
83
+ ### save report as README.md in tables/ folder in repo
84
+ report.save( "#{@repo_path}/tables/README.md" )
85
+ end # method make_pages_summary
125
86
 
126
- page = Page.from_file( file ) # note: always assume sources (already) converted to utf-8
127
87
 
128
- if cfg.opts_for_year.is_a?( Hash )
129
- opts = cfg.opts_for_year ## just use as is 1:1 (constant/same for all years)
130
- else
131
- ## assume it's a proc/lambda (call to calculate)
132
- opts = cfg.opts_for_year.call( year )
133
- end
134
- pp opts
135
-
136
- schedule = page.find_schedule( opts )
137
- ## pp schedule
138
-
139
-
140
- if cfg.dir_for_year.nil?
141
- ## use default setting, that is, archive for dir (e.g. archive/1980s/1985-86 etc.)
142
- dir_for_year = archive_dir_for_year( year )
143
- else
144
- ## assume it's a proc/lambda
145
- dir_for_year = cfg.dir_for_year.call( year )
146
- end
88
+ def make_schedules_summary
89
+ ## find all match datafiles
90
+ args = [@repo_path]
91
+ files = SportDb::Parser::Opts.expand_args( args )
92
+ pp files
93
+
94
+ report = ScheduleReport.build( files, title: @title,
95
+ patch: @patch ) ## pass in title etc.
96
+ report.save( "#{@repo_path}/README.md" )
97
+ end
147
98
 
148
- ## -- cfg.name e.g. => 1-liga
149
99
 
150
- dest_path = "#{@repo_path}/#{dir_for_year}/#{cfg.name}.txt"
151
- puts " save to >#{dest_path}<"
152
- FileUtils.mkdir_p( File.dirname( dest_path ))
153
- schedule.save( dest_path )
154
100
 
155
- rec = ScheduleStat.new
156
- rec.path = dir_for_year
157
- rec.filename = "#{cfg.name}.txt" ## change to basename - why?? why not??
158
- rec.year = year
159
- rec.season = season
160
- rec.rounds = schedule.rounds
161
101
 
162
- stats << rec
102
+ def patch_pages( patcher )
103
+ ## lets you run/use custom (repo/country-specific patches e.g. for adding/patching headings etc.)
104
+ patch_dir( "#{@repo_path}/tables" ) do |txt, name, year|
105
+ puts "patching #{year} (#{name}) (#{@repo_path})..."
106
+ patcher.patch( txt, name, year ) ## note: must be last (that is, must return (patcher) t(e)xt)
163
107
  end
164
-
165
- stats # return stats for reporting
166
- end # method make_schedules
108
+ end ## method patch_pages
167
109
 
168
110
 
169
- private
170
- def patch_dir( root )
171
- files = Dir[ "#{root}/*.txt" ]
111
+ def patch_dir( root, &blk )
112
+ files = Dir.glob( "#{root}/**/*.txt" )
172
113
  ## pp files
173
114
 
174
115
  ## sort files by year (latest first)
@@ -180,41 +121,24 @@ def patch_dir( root )
180
121
  end
181
122
 
182
123
  files.each do |file|
183
- txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
124
+ txt = read_text( file ) ## note: assumes already converted to utf-8
184
125
 
185
126
  basename = File.basename( file, '.txt' ) ## e.g. duit92.txt => duit92
186
127
  year = year_from_name( basename )
187
128
 
188
- new_txt = yield( txt, basename, year )
189
- ## calculate hash to see if anything changed ?? why? why not??
129
+ new_txt = blk.call( txt, basename, year )
190
130
 
191
- File.open( file, 'w' ) do |f|
192
- f.write new_txt
131
+ ## calculate hash to see if anything changed ?? why? why not??
132
+ if txt != new_txt
133
+ puts " patching #{file}, text changed"
134
+ write_text( file, new_txt )
193
135
  end
194
136
  end # each file
195
137
  end ## patch_dir
196
138
 
197
- def sanitize_dir( root )
198
- files = Dir[ "#{root}/*.txt" ]
199
-
200
- files.each do |file|
201
- txt = File.read_utf8( file ) ## note: assumes already converted to utf-8
202
-
203
- new_txt = sanitize( txt )
204
139
 
205
- File.open( file, 'w' ) do |f|
206
- f.write new_txt
207
- end
208
- end # each file
209
- end ## sanitize_dir
210
140
 
211
141
 
212
142
  end ## class Repo
213
143
  end ## module Rsssf
214
144
 
215
- ## add (shortcut) alias
216
- RsssfRepo = Rsssf::Repo
217
- RsssfScheduleConfig = Rsssf::ScheduleConfig
218
- RsssfScheduleStat = Rsssf::ScheduleStat
219
-
220
-