rsssf 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,25 +1,31 @@
1
- # encoding: utf-8
2
1
 
3
2
 
4
3
  module Rsssf
5
4
 
6
5
  class PageReport
7
6
 
7
+
8
+ def self.build( files, title: )
9
+ stats = []
10
+ files.each do |file|
11
+ page = Page.read_txt( file )
12
+ stats << page.build_stat
13
+ end
14
+
15
+ new( stats, title: title )
16
+ end
17
+
18
+
8
19
  attr_reader :title
9
20
 
10
- def initialize( stats, opts )
21
+ def initialize( stats, title: )
11
22
  @stats = stats
12
- @opts = opts
13
-
14
- @title = opts[:title] || 'Your Title Here'
23
+ @title = title
15
24
  end
16
25
 
17
- def save( path )
18
- ### save report as README.md in repo
19
- File.open( path, 'w' ) do |f|
20
- f.write build_summary
21
- end
22
- end
26
+ ### save report as README.md in repo
27
+ def save( path ) write_text( path, build_summary ); end
28
+
23
29
 
24
30
  def build_summary
25
31
 
@@ -33,19 +39,26 @@ def build_summary
33
39
 
34
40
  football.db RSSSF Archive Data Summary for #{title}
35
41
 
36
- _Last Update: #{Time.now}_
37
-
38
42
  EOS
39
43
 
44
+ ## no longer add last update
45
+ ## _Last Update: #{Time.now}_
46
+
47
+
40
48
  txt = ''
41
49
  txt << header
42
50
 
43
- txt << "| Season | File | Authors | Last Updated | Lines (Chars) | Sections |\n"
44
- txt << "| :----- | :----- | :------- | :----------- | ------------: | :------- |\n"
51
+ txt << "| File | Authors | Last Updated | Lines (Chars) | Sections |\n"
52
+ txt << "| :----- | :------- | :----------- | ------------: | :------- |\n"
53
+
54
+ ## note - removed season (no longer tracked here)
45
55
 
46
56
  stats.each do |stat|
47
- txt << "| #{stat.season} "
48
- txt << "| [#{stat.basename}.txt](#{stat.basename}.txt) "
57
+ ## get basename from source url
58
+ url_path = URI.parse( stat.source ).path
59
+ basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
60
+
61
+ txt << "| [#{basename}.txt](#{basename}.txt) "
49
62
  txt << "| #{stat.authors} "
50
63
  txt << "| #{stat.last_updated} "
51
64
  txt << "| #{stat.line_count} (#{stat.char_count}) "
@@ -60,5 +73,3 @@ end # method build_summary
60
73
  end ## class PageReport
61
74
  end ## module Rsssf
62
75
 
63
- ## add (shortcut) alias
64
- RsssfPageReport = Rsssf::PageReport
@@ -1,30 +1,72 @@
1
- # encoding: utf-8
1
+
2
2
 
3
3
  module Rsssf
4
-
4
+
5
+
6
+ ScheduleStat = Struct.new(
7
+ :path, ## path to .txt file
8
+ :errors ## array or nil
9
+ )
10
+
11
+
12
+
5
13
  class ScheduleReport
6
14
 
7
- attr_reader :title
15
+ include Utils ## e.g. year_from_file, etc.
16
+
17
+ ##
18
+ ## quick hack? pass along (optional) patch
19
+
20
+ def self.build( files, title:,
21
+ patch: nil )
22
+ linter = Parser::Linter.new
8
23
 
9
- def initialize( stats, opts )
10
- @stats = stats
11
- @opts = opts
24
+ stats = []
25
+ files.each_with_index do |file,i|
26
+
27
+ puts "==> [#{i+1}/#{files.size}] reading >#{file}<..."
28
+
29
+ txt = read_text( file )
30
+
31
+ if patch && patch.respond_to?(:on_parse)
32
+ season_dir = File.basename(File.dirname(file))
33
+ season = Season( season_dir )
34
+ basename = File.basename(file, File.extname(file))
35
+ puts " [debug] before patch.on_parse #{basename}, #{season}"
36
+ txt = patch.on_parse( txt, basename, season )
37
+ end
38
+
39
+ linter.parse( txt, parse: true,
40
+ path: file ) ## todo/fix - change path to file/filename - why? why not?
12
41
 
13
- @title = opts[:title] || 'Your Title Here'
42
+ stat = ScheduleStat.new
43
+ stat.path = file
44
+ stat.errors = linter.errors
45
+
46
+ stats << stat
47
+ end
48
+
49
+ new( stats, title: title )
14
50
  end
15
51
 
16
- def save( path )
17
- ### save report as README.md in repo
18
- File.open( path, 'w' ) do |f|
19
- f.write build_summary
20
- end
52
+
53
+ attr_reader :title
54
+
55
+ def initialize( stats, title: )
56
+ @stats = stats
57
+ @title = title
21
58
  end
22
59
 
60
+ ### save report as README.md in repo
61
+ def save( path ) write_text( path, build_summary ); end
62
+
63
+
23
64
  def build_summary
24
- ## sort start by season (latest first) than by name (e.g. 1-bundesliga, cup, etc.)
65
+ ## sort start 1) by season (latest first) than
66
+ ## 2) by name (e.g. 1-bundesliga, cup, etc.)
25
67
  stats = @stats.sort do |l,r|
26
- v = r.season <=> l.season
27
- v = l.filename <=> r.filename if v == 0 ## same season
68
+ v = File.basename(File.dirname(r.path)) <=> File.basename(File.dirname(l.path))
69
+ v = File.basename(l.path) <=> File.basename(r.path) if v == 0 ## same season
28
70
  v
29
71
  end
30
72
 
@@ -35,11 +77,14 @@ def build_summary
35
77
  football.db RSSSF (Rec.Sport.Soccer Statistics Foundation) Archive Data for
36
78
  #{title}
37
79
 
38
- _Last Update: #{Time.now}_
39
-
40
80
  EOS
41
81
 
82
+ ## no longer add last update
83
+ ## _Last Update: #{Time.now}_
84
+ ##
42
85
 
86
+
87
+ =begin
43
88
  footer =<<EOS
44
89
 
45
90
  ## Questions? Comments?
@@ -48,30 +93,71 @@ Send them along to the
48
93
  [Open Sports & Friends Forum](http://groups.google.com/group/opensport).
49
94
  Thanks!
50
95
  EOS
96
+ =end
97
+
51
98
 
99
+ errors = []
52
100
 
53
- txt = ''
101
+
102
+ txt = String.new
54
103
  txt << header
55
104
 
56
- txt << "| Season | League, Cup | Rounds |\n"
105
+ txt << "| Season | League, Cup | Errors |\n"
57
106
  txt << "| :----- | :---------- | -----: |\n"
58
107
 
108
+
109
+ stats.each_with_index do |stat,i|
110
+
111
+ path = stat.path
112
+ season_dir = File.basename(File.dirname( path ))
113
+ filename = File.basename( path ) ## incl. extension !!
114
+
115
+ season = Season( season_dir )
116
+ ## note - use archive_dir_for_season for archive path
117
+
118
+
119
+ txt << "| #{season_dir} "
120
+ txt << "| [#{filename}](#{archive_dir_for_season(season)}/#{filename}) "
121
+
122
+ txt << if stat.errors.size > 0
123
+ "| **!! #{stat.errors.size}** "
124
+ else
125
+ "| OK "
126
+ end
127
+ txt << "|\n"
128
+
129
+ errors += stat.errors if stat.errors.size > 0
130
+ end
131
+
132
+ if errors.size > 0
133
+ txt << "\n\n"
134
+ txt << "#{errors.size} errors in #{stats.size} datafile(s)\n\n"
135
+
136
+ txt << "```\n"
137
+ errors.each do |path, msg, line|
138
+ season_dir = File.basename(File.dirname( path ))
139
+ filename = File.basename( path ) ## incl. extension !!
140
+
141
+ txt <<"#{season_dir}/#{filename} -- #{msg}\n"
142
+ txt << " in line >#{line}<\n" unless line.empty?
143
+ end
144
+ txt << "```\n"
145
+ end
146
+
147
+ =begin
59
148
  stats.each do |stat|
60
149
  txt << "| #{stat.season} "
61
150
  txt << "| [#{stat.filename}](#{stat.path}/#{stat.filename}) "
62
151
  txt << "| #{stat.rounds} "
63
152
  txt << "|\n"
64
153
  end
154
+ =end
65
155
 
66
- txt << "\n\n"
67
-
68
- txt << footer
156
+
157
+ ## txt << footer
69
158
  txt
70
159
  end # method build_summary
71
160
 
72
161
  end ## class ScheduleReport
73
162
  end ## module Rsssf
74
163
 
75
- ## add (shortcut) alias
76
- RsssfScheduleReport = Rsssf::ScheduleReport
77
-
@@ -1,31 +1,21 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Rsssf
4
3
 
5
4
  class Schedule
6
5
 
7
- def self.from_string( txt )
8
- self.new( txt )
9
- end
10
6
 
11
- attr_accessor :rounds # track no of rounds
7
+ # attr_accessor :rounds # track no of rounds - why? why not?
12
8
 
13
9
  def initialize( txt )
14
10
  @txt = txt
15
11
 
16
- @rounds = nil # undefined
12
+ ## @rounds = nil # undefined
17
13
  end
18
14
 
19
15
 
20
- def save( path )
21
- File.open( path, 'w' ) do |f|
22
- f.write @txt
23
- end
16
+ def save( path, header: )
17
+ write_text( path, header + @txt )
24
18
  end
25
19
 
26
20
  end ## class Schedule
27
21
  end ## module Rsssf
28
-
29
- ## add (shortcut) alias
30
- RsssfSchedule = Rsssf::Schedule
31
-
data/lib/rsssf/utils.rb CHANGED
@@ -1,8 +1,10 @@
1
- # encoding: utf-8
2
1
 
3
2
  module Rsssf
4
3
  module Utils
5
4
 
5
+
6
+ ## move to Page - why? why not?
7
+
6
8
  def year_from_file( path )
7
9
  extname = File.extname( path )
8
10
  basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
@@ -32,44 +34,23 @@ def year_from_name( name )
32
34
  end # method year_from_name
33
35
 
34
36
 
35
- def year_to_season( year )
36
-
37
- ## todo: require four digit years? why? why not??
38
-
39
- ## e.g. 64 => 1963-64
40
- ## 2011 => 2010-11 etc.
41
-
42
- if year <= 16 ## assume 20xx for now from 00..16
43
- year += 2000
44
- elsif year <= 99
45
- year += 1900
46
- else
47
- # use as is; assume four digit year
48
- end
49
37
 
50
- year_prev = year-1
38
+ def archive_dir_for_season( season )
39
+ season = Season( season )
51
40
 
52
- "%4d-%02d" % [year_prev, year%100] ## e.g. return 1974-75
53
- end
54
-
55
-
56
- def archive_dir_for_year( year )
57
- season = year_to_season( year )
58
- if year <= 2010 # e.g. season 2009-10
41
+ if season < Season('2010') # e.g. season 2009-10
59
42
  ## use archive folder (w/ 1980s etc)
60
43
  ## get decade folder
61
- decade = year-1
44
+ decade = season.start_year ## 1999/2000 2000
62
45
  decade -= decade % 10 ## turn 1987 into 1980 etc
63
- "archive/#{decade}s/#{season}"
46
+ "archive/#{decade}s/#{season.to_path}"
64
47
  else
65
- season
48
+ season.to_path
66
49
  end
67
50
  end
68
51
 
69
52
 
53
+
70
54
  end # module Utils
71
55
  end # module Rsssf
72
56
 
73
- ## add (shortcut) alias
74
- RsssfUtils = Rsssf::Utils
75
-
data/lib/rsssf/version.rb CHANGED
@@ -1,10 +1,8 @@
1
- # encoding: utf-8
2
-
3
1
 
4
2
  module Rsssf
5
3
 
6
4
  MAJOR = 0
7
- MINOR = 1
5
+ MINOR = 2
8
6
  PATCH = 0
9
7
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
8
 
@@ -13,11 +11,11 @@ module Rsssf
13
11
  end
14
12
 
15
13
  def self.banner
16
- "rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
14
+ "rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
17
15
  end
18
16
 
19
17
  def self.root
20
- "#{File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )}"
18
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
21
19
  end
22
20
 
23
21
  end # module Rsssf
data/lib/rsssf.rb CHANGED
@@ -1,34 +1,57 @@
1
- # encoding: utf-8
2
1
 
3
- ## stdlibs
4
- require 'pp'
5
- require 'yaml'
6
- require 'uri'
2
+ ## 3rd party (our own)
3
+ require 'season/formats' ## add season support
4
+ require 'webget' ## incl. webget, webcache, webclient, etc.
5
+
6
+ require 'cocos'
7
+
8
+
9
+ ## (old) 3rd party libs
10
+ ## require 'textutils' ## used for File.read_utf8 etc.
11
+ ## require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
12
+
13
+
14
+ #######
15
+ ## add RsssfParser too
16
+ require 'rsssf/parser' ## from rsssf-parser gem
7
17
 
8
18
 
9
- ## 3rd party libs
10
- require 'textutils' ## used for File.read_utf8 etc.
11
- require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
12
19
 
13
20
 
14
21
  ## our own code
15
- require 'rsssf/version' # note: let version always go first
22
+ require_relative 'rsssf/version' # note: let version always go first
23
+
24
+ require_relative 'rsssf/utils' # include Utils - goes first
25
+
26
+ require_relative 'rsssf/download'
27
+
28
+ require_relative 'rsssf/convert'
29
+ require_relative 'rsssf/page'
30
+ require_relative 'rsssf/schedule'
31
+
32
+ require_relative 'rsssf/reports/schedule'
33
+ require_relative 'rsssf/reports/page'
34
+
35
+ require_relative 'rsssf/repo'
36
+
16
37
 
17
- require 'rsssf/utils' # include Utils - goes first
18
- require 'rsssf/html2txt' # include Filters - goes first
19
38
 
20
- require 'rsssf/fetch'
21
- require 'rsssf/page'
22
- require 'rsssf/schedule'
23
- require 'rsssf/patch'
24
39
 
25
- require 'rsssf/reports/schedule'
26
- require 'rsssf/reports/page'
40
+ #############
41
+ ## add (shortcut) alias(es)
42
+ RsssfPage = Rsssf::Page
43
+ RsssfPageConverter = Rsssf::PageConverter
44
+ RsssfPageStat = Rsssf::PageStat
45
+ RsssfPageReport = Rsssf::PageReport
27
46
 
28
- require 'rsssf/repo'
47
+ RsssfSchedule = Rsssf::Schedule
48
+ RsssfScheduleStat = Rsssf::ScheduleStat
49
+ RsssfScheduleReport = Rsssf::ScheduleReport
29
50
 
51
+ RsssfRepo = Rsssf::Repo
52
+ RsssfUtils = Rsssf::Utils
30
53
 
31
54
 
32
55
 
33
56
  ## say hello
34
- puts Rsssf.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
57
+ puts Rsssf.banner ## if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
metadata CHANGED
@@ -1,17 +1,17 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rsssf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-15 00:00:00.000000000 Z
11
+ date: 2024-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: logutils
14
+ name: cocos
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - ">="
@@ -25,7 +25,7 @@ dependencies:
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
- name: textutils
28
+ name: season-formats
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - ">="
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: fetcher
42
+ name: rsssf-parser
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -56,63 +56,65 @@ dependencies:
56
56
  name: rdoc
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '4.0'
62
+ - - "<"
63
+ - !ruby/object:Gem::Version
64
+ version: '7'
62
65
  type: :development
63
66
  prerelease: false
64
67
  version_requirements: !ruby/object:Gem::Requirement
65
68
  requirements:
66
- - - "~>"
69
+ - - ">="
67
70
  - !ruby/object:Gem::Version
68
71
  version: '4.0'
72
+ - - "<"
73
+ - !ruby/object:Gem::Version
74
+ version: '7'
69
75
  - !ruby/object:Gem::Dependency
70
76
  name: hoe
71
77
  requirement: !ruby/object:Gem::Requirement
72
78
  requirements:
73
79
  - - "~>"
74
80
  - !ruby/object:Gem::Version
75
- version: '3.13'
81
+ version: '4.1'
76
82
  type: :development
77
83
  prerelease: false
78
84
  version_requirements: !ruby/object:Gem::Requirement
79
85
  requirements:
80
86
  - - "~>"
81
87
  - !ruby/object:Gem::Version
82
- version: '3.13'
88
+ version: '4.1'
83
89
  description: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
84
90
  archive data
85
- email: opensport@googlegroups.com
91
+ email: gerald.bauer@gmail.com
86
92
  executables: []
87
93
  extensions: []
88
94
  extra_rdoc_files:
89
- - HISTORY.md
95
+ - CHANGELOG.md
90
96
  - Manifest.txt
91
97
  - README.md
92
98
  files:
93
- - ".gemtest"
94
- - HISTORY.md
99
+ - CHANGELOG.md
95
100
  - Manifest.txt
96
101
  - README.md
97
102
  - Rakefile
98
103
  - lib/rsssf.rb
99
- - lib/rsssf/fetch.rb
100
- - lib/rsssf/html2txt.rb
104
+ - lib/rsssf/convert.rb
105
+ - lib/rsssf/download.rb
101
106
  - lib/rsssf/page.rb
102
- - lib/rsssf/patch.rb
103
107
  - lib/rsssf/repo.rb
104
108
  - lib/rsssf/reports/page.rb
105
109
  - lib/rsssf/reports/schedule.rb
106
110
  - lib/rsssf/schedule.rb
107
111
  - lib/rsssf/utils.rb
108
112
  - lib/rsssf/version.rb
109
- - test/helper.rb
110
- - test/test_utils.rb
111
- homepage: https://github.com/sportdb/rsssf
113
+ homepage: https://github.com/sportdb/sport.db.sources
112
114
  licenses:
113
115
  - Public Domain
114
116
  metadata: {}
115
- post_install_message:
117
+ post_install_message:
116
118
  rdoc_options:
117
119
  - "--main"
118
120
  - README.md
@@ -122,16 +124,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
122
124
  requirements:
123
125
  - - ">="
124
126
  - !ruby/object:Gem::Version
125
- version: 1.9.2
127
+ version: 2.2.2
126
128
  required_rubygems_version: !ruby/object:Gem::Requirement
127
129
  requirements:
128
130
  - - ">="
129
131
  - !ruby/object:Gem::Version
130
132
  version: '0'
131
133
  requirements: []
132
- rubyforge_project:
133
- rubygems_version: 2.2.3
134
- signing_key:
134
+ rubygems_version: 3.4.10
135
+ signing_key:
135
136
  specification_version: 4
136
137
  summary: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
137
138
  archive data
data/.gemtest DELETED
File without changes
data/lib/rsssf/fetch.rb DELETED
@@ -1,80 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module Rsssf
4
-
5
- class PageFetcher
6
-
7
- include Filters # e.g. html2text, sanitize etc.
8
-
9
-
10
- def initialize
11
- @worker = Fetcher::Worker.new
12
- end
13
-
14
- def fetch( src_url )
15
-
16
- ## note: assume plain 7-bit ascii for now
17
- ## -- assume rsssf uses ISO_8859_15 (updated version of ISO_8859_1) -- does NOT use utf-8 character encoding!!!
18
- html = @worker.read( src_url )
19
-
20
- ### todo/fix: first check if html is all ascii-7bit e.g.
21
- ## includes only chars from 64 to 127!!!
22
-
23
- ## normalize newlines
24
- ## remove \r (form feed) used by Windows; just use \n (new line)
25
- html = html.gsub( "\r", '' )
26
-
27
- ## note:
28
- ## assume (default) to ISO 3166-15 (an updated version of ISO 3166-1) for now
29
- ##
30
- ## other possible alternatives - try:
31
- ## - Windows CP 1562 or
32
- ## - ISO 3166-2 (for eastern european languages )
33
- ##
34
- ## note: german umlaut use the same code (int)
35
- ## in ISO 3166-1/15 and 2 and Windows CP1562 (other chars ARE different!!!)
36
-
37
- html = html.force_encoding( Encoding::ISO_8859_15 )
38
- html = html.encode( Encoding::UTF_8 ) # try conversion to utf-8
39
-
40
- ## check for html entities
41
- html = html.gsub( "&auml;", 'ä' )
42
- html = html.gsub( "&ouml;", 'ö' )
43
- html = html.gsub( "&uuml;", 'ü' )
44
- html = html.gsub( "&Auml;", 'Ä' )
45
- html = html.gsub( "&Ouml;", 'Ö' )
46
- html = html.gsub( "&Uuml;", 'Ü' )
47
- html = html.gsub( "&szlig;", 'ß' )
48
-
49
- html = html.gsub( "&oulm;", 'ö' ) ## support typo in entity (&ouml;)
50
- html = html.gsub( "&slig;", "ß" ) ## support typo in entity (&szlig;)
51
-
52
- html = html.gsub( "&Eacute;", 'É' )
53
- html = html.gsub( "&oslash;", 'ø' )
54
-
55
- ## check for more entities
56
- html = html.gsub( /&[^;]+;/) do |match|
57
- puts "*** found unencoded html entity #{match}"
58
- match ## pass through as is (1:1)
59
- end
60
- ## todo/fix: add more entities
61
-
62
-
63
- txt = html_to_txt( html )
64
-
65
- header = <<EOS
66
- <!--
67
- source: #{src_url}
68
- -->
69
-
70
- EOS
71
-
72
- header+txt ## return txt w/ header
73
- end ## method fetch
74
-
75
- end ## class PageFetcher
76
- end ## module Rsssf
77
-
78
- ## add (shortcut) alias
79
- RsssfPageFetcher = Rsssf::PageFetcher
80
-