rsssf 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,25 +1,31 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  module Rsssf
5
4
 
6
5
  class PageReport
7
6
 
7
+
8
+ def self.build( files, title: )
9
+ stats = []
10
+ files.each do |file|
11
+ page = Page.read_txt( file )
12
+ stats << page.build_stat
13
+ end
14
+
15
+ new( stats, title: title )
16
+ end
17
+
18
+
8
19
  attr_reader :title
9
20
 
10
- def initialize( stats, opts )
21
+ def initialize( stats, title: )
11
22
  @stats = stats
12
- @opts = opts
13
-
14
- @title = opts[:title] || 'Your Title Here'
23
+ @title = title
15
24
  end
16
25
 
17
- def save( path )
18
- ### save report as README.md in repo
19
- File.open( path, 'w' ) do |f|
20
- f.write build_summary
21
- end
22
- end
26
+ ### save report as README.md in repo
27
+ def save( path ) write_text( path, build_summary ); end
28
+
23
29
 
24
30
  def build_summary
25
31
 
@@ -33,19 +39,26 @@ def build_summary
33
39
 
34
40
  football.db RSSSF Archive Data Summary for #{title}
35
41
 
36
- _Last Update: #{Time.now}_
37
-
38
42
  EOS
39
43
 
44
+ ## no longer add last update
45
+ ## _Last Update: #{Time.now}_
46
+
47
+
40
48
  txt = ''
41
49
  txt << header
42
50
 
43
- txt << "| Season | File | Authors | Last Updated | Lines (Chars) | Sections |\n"
44
- txt << "| :----- | :----- | :------- | :----------- | ------------: | :------- |\n"
51
+ txt << "| File | Authors | Last Updated | Lines (Chars) | Sections |\n"
52
+ txt << "| :----- | :------- | :----------- | ------------: | :------- |\n"
53
+
54
+ ## note - removed season (no longer tracked here)
45
55
 
46
56
  stats.each do |stat|
47
- txt << "| #{stat.season} "
48
- txt << "| [#{stat.basename}.txt](#{stat.basename}.txt) "
57
+ ## get basename from source url
58
+ url_path = URI.parse( stat.source ).path
59
+ basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
60
+
61
+ txt << "| [#{basename}.txt](#{basename}.txt) "
49
62
  txt << "| #{stat.authors} "
50
63
  txt << "| #{stat.last_updated} "
51
64
  txt << "| #{stat.line_count} (#{stat.char_count}) "
@@ -60,5 +73,3 @@ end # method build_summary
60
73
  end ## class PageReport
61
74
  end ## module Rsssf
62
75
 
63
- ## add (shortcut) alias
64
- RsssfPageReport = Rsssf::PageReport
@@ -1,30 +1,72 @@
1
- # encoding: utf-8
1
+
2
2
 
3
3
  module Rsssf
4
-
4
+
5
+
6
+ ScheduleStat = Struct.new(
7
+ :path, ## path to .txt file
8
+ :errors ## array or nil
9
+ )
10
+
11
+
12
+
5
13
  class ScheduleReport
6
14
 
7
- attr_reader :title
15
+ include Utils ## e.g. year_from_file, etc.
16
+
17
+ ##
18
+ ## quick hack? pass along (optional) patch
19
+
20
+ def self.build( files, title:,
21
+ patch: nil )
22
+ linter = Parser::Linter.new
8
23
 
9
- def initialize( stats, opts )
10
- @stats = stats
11
- @opts = opts
24
+ stats = []
25
+ files.each_with_index do |file,i|
26
+
27
+ puts "==> [#{i+1}/#{files.size}] reading >#{file}<..."
28
+
29
+ txt = read_text( file )
30
+
31
+ if patch && patch.respond_to?(:on_parse)
32
+ season_dir = File.basename(File.dirname(file))
33
+ season = Season( season_dir )
34
+ basename = File.basename(file, File.extname(file))
35
+ puts " [debug] before patch.on_parse #{basename}, #{season}"
36
+ txt = patch.on_parse( txt, basename, season )
37
+ end
38
+
39
+ linter.parse( txt, parse: true,
40
+ path: file ) ## todo/fix - change path to file/filename - why? why not?
12
41
 
13
- @title = opts[:title] || 'Your Title Here'
42
+ stat = ScheduleStat.new
43
+ stat.path = file
44
+ stat.errors = linter.errors
45
+
46
+ stats << stat
47
+ end
48
+
49
+ new( stats, title: title )
14
50
  end
15
51
 
16
- def save( path )
17
- ### save report as README.md in repo
18
- File.open( path, 'w' ) do |f|
19
- f.write build_summary
20
- end
52
+
53
+ attr_reader :title
54
+
55
+ def initialize( stats, title: )
56
+ @stats = stats
57
+ @title = title
21
58
  end
22
59
 
60
+ ### save report as README.md in repo
61
+ def save( path ) write_text( path, build_summary ); end
62
+
63
+
23
64
  def build_summary
24
- ## sort start by season (latest first) than by name (e.g. 1-bundesliga, cup, etc.)
65
+ ## sort start 1) by season (latest first) than
66
+ ## 2) by name (e.g. 1-bundesliga, cup, etc.)
25
67
  stats = @stats.sort do |l,r|
26
- v = r.season <=> l.season
27
- v = l.filename <=> r.filename if v == 0 ## same season
68
+ v = File.basename(File.dirname(r.path)) <=> File.basename(File.dirname(l.path))
69
+ v = File.basename(l.path) <=> File.basename(r.path) if v == 0 ## same season
28
70
  v
29
71
  end
30
72
 
@@ -35,11 +77,14 @@ def build_summary
35
77
  football.db RSSSF (Rec.Sport.Soccer Statistics Foundation) Archive Data for
36
78
  #{title}
37
79
 
38
- _Last Update: #{Time.now}_
39
-
40
80
  EOS
41
81
 
82
+ ## no longer add last update
83
+ ## _Last Update: #{Time.now}_
84
+ ##
42
85
 
86
+
87
+ =begin
43
88
  footer =<<EOS
44
89
 
45
90
  ## Questions? Comments?
@@ -48,30 +93,71 @@ Send them along to the
48
93
  [Open Sports & Friends Forum](http://groups.google.com/group/opensport).
49
94
  Thanks!
50
95
  EOS
96
+ =end
97
+
51
98
 
99
+ errors = []
52
100
 
53
- txt = ''
101
+
102
+ txt = String.new
54
103
  txt << header
55
104
 
56
- txt << "| Season | League, Cup | Rounds |\n"
105
+ txt << "| Season | League, Cup | Errors |\n"
57
106
  txt << "| :----- | :---------- | -----: |\n"
58
107
 
108
+
109
+ stats.each_with_index do |stat,i|
110
+
111
+ path = stat.path
112
+ season_dir = File.basename(File.dirname( path ))
113
+ filename = File.basename( path ) ## incl. extension !!
114
+
115
+ season = Season( season_dir )
116
+ ## note - use archive_dir_for_season for archive path
117
+
118
+
119
+ txt << "| #{season_dir} "
120
+ txt << "| [#{filename}](#{archive_dir_for_season(season)}/#{filename}) "
121
+
122
+ txt << if stat.errors.size > 0
123
+ "| **!! #{stat.errors.size}** "
124
+ else
125
+ "| OK "
126
+ end
127
+ txt << "|\n"
128
+
129
+ errors += stat.errors if stat.errors.size > 0
130
+ end
131
+
132
+ if errors.size > 0
133
+ txt << "\n\n"
134
+ txt << "#{errors.size} errors in #{stats.size} datafile(s)\n\n"
135
+
136
+ txt << "```\n"
137
+ errors.each do |path, msg, line|
138
+ season_dir = File.basename(File.dirname( path ))
139
+ filename = File.basename( path ) ## incl. extension !!
140
+
141
+ txt <<"#{season_dir}/#{filename} -- #{msg}\n"
142
+ txt << " in line >#{line}<\n" unless line.empty?
143
+ end
144
+ txt << "```\n"
145
+ end
146
+
147
+ =begin
59
148
  stats.each do |stat|
60
149
  txt << "| #{stat.season} "
61
150
  txt << "| [#{stat.filename}](#{stat.path}/#{stat.filename}) "
62
151
  txt << "| #{stat.rounds} "
63
152
  txt << "|\n"
64
153
  end
154
+ =end
65
155
 
66
- txt << "\n\n"
67
-
68
- txt << footer
156
+
157
+ ## txt << footer
69
158
  txt
70
159
  end # method build_summary
71
160
 
72
161
  end ## class ScheduleReport
73
162
  end ## module Rsssf
74
163
 
75
- ## add (shortcut) alias
76
- RsssfScheduleReport = Rsssf::ScheduleReport
77
-
@@ -1,31 +1,21 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Rsssf
4
3
 
5
4
  class Schedule
6
5
 
7
- def self.from_string( txt )
8
- self.new( txt )
9
- end
10
6
 
11
- attr_accessor :rounds # track no of rounds
7
+ # attr_accessor :rounds # track no of rounds - why? why not?
12
8
 
13
9
  def initialize( txt )
14
10
  @txt = txt
15
11
 
16
- @rounds = nil # undefined
12
+ ## @rounds = nil # undefined
17
13
  end
18
14
 
19
15
 
20
- def save( path )
21
- File.open( path, 'w' ) do |f|
22
- f.write @txt
23
- end
16
+ def save( path, header: )
17
+ write_text( path, header + @txt )
24
18
  end
25
19
 
26
20
  end ## class Schedule
27
21
  end ## module Rsssf
28
-
29
- ## add (shortcut) alias
30
- RsssfSchedule = Rsssf::Schedule
31
-
data/lib/rsssf/utils.rb CHANGED
@@ -1,8 +1,10 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Rsssf
4
3
  module Utils
5
4
 
5
+
6
+ ## move to Page - why? why not?
7
+
6
8
  def year_from_file( path )
7
9
  extname = File.extname( path )
8
10
  basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
@@ -32,44 +34,23 @@ def year_from_name( name )
32
34
  end # method year_from_name
33
35
 
34
36
 
35
- def year_to_season( year )
36
-
37
- ## todo: require four digit years? why? why not??
38
-
39
- ## e.g. 64 => 1963-64
40
- ## 2011 => 2010-11 etc.
41
-
42
- if year <= 16 ## assume 20xx for now from 00..16
43
- year += 2000
44
- elsif year <= 99
45
- year += 1900
46
- else
47
- # use as is; assume four digit year
48
- end
49
37
 
50
- year_prev = year-1
38
+ def archive_dir_for_season( season )
39
+ season = Season( season )
51
40
 
52
- "%4d-%02d" % [year_prev, year%100] ## e.g. return 1974-75
53
- end
54
-
55
-
56
- def archive_dir_for_year( year )
57
- season = year_to_season( year )
58
- if year <= 2010 # e.g. season 2009-10
41
+ if season < Season('2010') # e.g. season 2009-10
59
42
  ## use archive folder (w/ 1980s etc)
60
43
  ## get decade folder
61
- decade = year-1
44
+ decade = season.start_year ## 1999/2000 2000
62
45
  decade -= decade % 10 ## turn 1987 into 1980 etc
63
- "archive/#{decade}s/#{season}"
46
+ "archive/#{decade}s/#{season.to_path}"
64
47
  else
65
- season
48
+ season.to_path
66
49
  end
67
50
  end
68
51
 
69
52
 
53
+
70
54
  end # module Utils
71
55
  end # module Rsssf
72
56
 
73
- ## add (shortcut) alias
74
- RsssfUtils = Rsssf::Utils
75
-
data/lib/rsssf/version.rb CHANGED
@@ -1,10 +1,8 @@
1
- # encoding: utf-8
2
-
3
1
 
4
2
  module Rsssf
5
3
 
6
4
  MAJOR = 0
7
- MINOR = 1
5
+ MINOR = 2
8
6
  PATCH = 0
9
7
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
8
 
@@ -13,11 +11,11 @@ module Rsssf
13
11
  end
14
12
 
15
13
  def self.banner
16
- "rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
14
+ "rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
17
15
  end
18
16
 
19
17
  def self.root
20
- "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
18
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
21
19
  end
22
20
 
23
21
  end # module Rsssf
data/lib/rsssf.rb CHANGED
@@ -1,34 +1,57 @@
1
- # encoding: utf-8
2
1
 
3
- ## stdlibs
4
- require 'pp'
5
- require 'yaml'
6
- require 'uri'
2
+ ## 3rd party (our own)
3
+ require 'season/formats' ## add season support
4
+ require 'webget' ## incl. webget, webcache, webclient, etc.
5
+
6
+ require 'cocos'
7
+
8
+
9
+ ## (old) 3rd party libs
10
+ ## require 'textutils' ## used for File.read_utf8 etc.
11
+ ## require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
12
+
13
+
14
+ #######
15
+ ## add RsssfParser too
16
+ require 'rsssf/parser' ## from rsssf-parser gem
7
17
 
8
18
 
9
- ## 3rd party libs
10
- require 'textutils' ## used for File.read_utf8 etc.
11
- require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
12
19
 
13
20
 
14
21
  ## our own code
15
- require 'rsssf/version' # note: let version always go first
22
+ require_relative 'rsssf/version' # note: let version always go first
23
+
24
+ require_relative 'rsssf/utils' # include Utils - goes first
25
+
26
+ require_relative 'rsssf/download'
27
+
28
+ require_relative 'rsssf/convert'
29
+ require_relative 'rsssf/page'
30
+ require_relative 'rsssf/schedule'
31
+
32
+ require_relative 'rsssf/reports/schedule'
33
+ require_relative 'rsssf/reports/page'
34
+
35
+ require_relative 'rsssf/repo'
36
+
16
37
 
17
- require 'rsssf/utils' # include Utils - goes first
18
- require 'rsssf/html2txt' # include Filters - goes first
19
38
 
20
- require 'rsssf/fetch'
21
- require 'rsssf/page'
22
- require 'rsssf/schedule'
23
- require 'rsssf/patch'
24
39
 
25
- require 'rsssf/reports/schedule'
26
- require 'rsssf/reports/page'
40
+ #############
41
+ ## add (shortcut) alias(es)
42
+ RsssfPage = Rsssf::Page
43
+ RsssfPageConverter = Rsssf::PageConverter
44
+ RsssfPageStat = Rsssf::PageStat
45
+ RsssfPageReport = Rsssf::PageReport
27
46
 
28
- require 'rsssf/repo'
47
+ RsssfSchedule = Rsssf::Schedule
48
+ RsssfScheduleStat = Rsssf::ScheduleStat
49
+ RsssfScheduleReport = Rsssf::ScheduleReport
29
50
 
51
+ RsssfRepo = Rsssf::Repo
52
+ RsssfUtils = Rsssf::Utils
30
53
 
31
54
 
32
55
 
33
56
  ## say hello
34
- puts Rsssf.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
57
+ puts Rsssf.banner ## if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rsssf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-15 00:00:00.000000000 Z
11
+ date: 2024-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logutils
14
+ name: cocos
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: textutils
28
+ name: season-formats
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: fetcher
42
+ name: rsssf-parser
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -56,63 +56,65 @@ dependencies:
56
56
  name: rdoc
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '4.0'
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '7'
62
65
  type: :development
63
66
  prerelease: false
64
67
  version_requirements: !ruby/object:Gem::Requirement
65
68
  requirements:
66
- - - "~>"
69
+ - - ">="
67
70
  - !ruby/object:Gem::Version
68
71
  version: '4.0'
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '7'
69
75
  - !ruby/object:Gem::Dependency
70
76
  name: hoe
71
77
  requirement: !ruby/object:Gem::Requirement
72
78
  requirements:
73
79
  - - "~>"
74
80
  - !ruby/object:Gem::Version
75
- version: '3.13'
81
+ version: '4.1'
76
82
  type: :development
77
83
  prerelease: false
78
84
  version_requirements: !ruby/object:Gem::Requirement
79
85
  requirements:
80
86
  - - "~>"
81
87
  - !ruby/object:Gem::Version
82
- version: '3.13'
88
+ version: '4.1'
83
89
  description: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
84
90
  archive data
85
- email: opensport@googlegroups.com
91
+ email: gerald.bauer@gmail.com
86
92
  executables: []
87
93
  extensions: []
88
94
  extra_rdoc_files:
89
- - HISTORY.md
95
+ - CHANGELOG.md
90
96
  - Manifest.txt
91
97
  - README.md
92
98
  files:
93
- - ".gemtest"
94
- - HISTORY.md
99
+ - CHANGELOG.md
95
100
  - Manifest.txt
96
101
  - README.md
97
102
  - Rakefile
98
103
  - lib/rsssf.rb
99
- - lib/rsssf/fetch.rb
100
- - lib/rsssf/html2txt.rb
104
+ - lib/rsssf/convert.rb
105
+ - lib/rsssf/download.rb
101
106
  - lib/rsssf/page.rb
102
- - lib/rsssf/patch.rb
103
107
  - lib/rsssf/repo.rb
104
108
  - lib/rsssf/reports/page.rb
105
109
  - lib/rsssf/reports/schedule.rb
106
110
  - lib/rsssf/schedule.rb
107
111
  - lib/rsssf/utils.rb
108
112
  - lib/rsssf/version.rb
109
- - test/helper.rb
110
- - test/test_utils.rb
111
- homepage: https://github.com/sportdb/rsssf
113
+ homepage: https://github.com/sportdb/sport.db.sources
112
114
  licenses:
113
115
  - Public Domain
114
116
  metadata: {}
115
- post_install_message:
117
+ post_install_message:
116
118
  rdoc_options:
117
119
  - "--main"
118
120
  - README.md
@@ -122,16 +124,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
122
124
  requirements:
123
125
  - - ">="
124
126
  - !ruby/object:Gem::Version
125
- version: 1.9.2
127
+ version: 2.2.2
126
128
  required_rubygems_version: !ruby/object:Gem::Requirement
127
129
  requirements:
128
130
  - - ">="
129
131
  - !ruby/object:Gem::Version
130
132
  version: '0'
131
133
  requirements: []
132
- rubyforge_project:
133
- rubygems_version: 2.2.3
134
- signing_key:
134
+ rubygems_version: 3.4.10
135
+ signing_key:
135
136
  specification_version: 4
136
137
  summary: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
137
138
  archive data
data/.gemtest DELETED
File without changes
data/lib/rsssf/fetch.rb DELETED
@@ -1,80 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Rsssf
4
-
5
- class PageFetcher
6
-
7
- include Filters # e.g. html2text, sanitize etc.
8
-
9
-
10
- def initialize
11
- @worker = Fetcher::Worker.new
12
- end
13
-
14
- def fetch( src_url )
15
-
16
- ## note: assume plain 7-bit ascii for now
17
- ## -- assume rsssf uses ISO_8859_15 (updated version of ISO_8859_1) -- does NOT use utf-8 character encoding!!!
18
- html = @worker.read( src_url )
19
-
20
- ### todo/fix: first check if html is all ascii-7bit e.g.
21
- ## includes only chars from 64 to 127!!!
22
-
23
- ## normalize newlines
24
- ## remove \r (form feed) used by Windows; just use \n (new line)
25
- html = html.gsub( "\r", '' )
26
-
27
- ## note:
28
- ## assume (default) to ISO 3166-15 (an updated version of ISO 3166-1) for now
29
- ##
30
- ## other possible alternatives - try:
31
- ## - Windows CP 1562 or
32
- ## - ISO 3166-2 (for eastern european languages )
33
- ##
34
- ## note: german umlaut use the same code (int)
35
- ## in ISO 3166-1/15 and 2 and Windows CP1562 (other chars ARE different!!!)
36
-
37
- html = html.force_encoding( Encoding::ISO_8859_15 )
38
- html = html.encode( Encoding::UTF_8 ) # try conversion to utf-8
39
-
40
- ## check for html entities
41
- html = html.gsub( "&auml;", 'ä' )
42
- html = html.gsub( "&ouml;", 'ö' )
43
- html = html.gsub( "&uuml;", 'ü' )
44
- html = html.gsub( "&Auml;", 'Ä' )
45
- html = html.gsub( "&Ouml;", 'Ö' )
46
- html = html.gsub( "&Uuml;", 'Ü' )
47
- html = html.gsub( "&szlig;", 'ß' )
48
-
49
- html = html.gsub( "&oulm;", 'ö' ) ## support typo in entity (&ouml;)
50
- html = html.gsub( "&slig;", "ß" ) ## support typo in entity (&szlig;)
51
-
52
- html = html.gsub( "&Eacute;", 'É' )
53
- html = html.gsub( "&oslash;", 'ø' )
54
-
55
- ## check for more entities
56
- html = html.gsub( /&[^;]+;/) do |match|
57
- puts "*** found unencoded html entity #{match}"
58
- match ## pass through as is (1:1)
59
- end
60
- ## todo/fix: add more entities
61
-
62
-
63
- txt = html_to_txt( html )
64
-
65
- header = <<EOS
66
- <!--
67
- source: #{src_url}
68
- -->
69
-
70
- EOS
71
-
72
- header+txt ## return txt w/ header
73
- end ## method fetch
74
-
75
- end ## class PageFetcher
76
- end ## module Rsssf
77
-
78
- ## add (shortcut) alias
79
- RsssfPageFetcher = Rsssf::PageFetcher
80
-