rsssf 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/{HISTORY.md → CHANGELOG.md} +2 -0
- data/Manifest.txt +3 -6
- data/README.md +43 -26
- data/Rakefile +8 -7
- data/lib/rsssf/convert.rb +495 -0
- data/lib/rsssf/download.rb +151 -0
- data/lib/rsssf/page.rb +70 -45
- data/lib/rsssf/repo.rb +77 -153
- data/lib/rsssf/reports/page.rb +30 -19
- data/lib/rsssf/reports/schedule.rb +111 -25
- data/lib/rsssf/schedule.rb +4 -14
- data/lib/rsssf/utils.rb +10 -29
- data/lib/rsssf/version.rb +3 -5
- data/lib/rsssf.rb +42 -19
- metadata +26 -25
- data/.gemtest +0 -0
- data/lib/rsssf/fetch.rb +0 -80
- data/lib/rsssf/html2txt.rb +0 -157
- data/lib/rsssf/patch.rb +0 -28
- data/test/helper.rb +0 -12
- data/test/test_utils.rb +0 -83
data/lib/rsssf/reports/page.rb
CHANGED
@@ -1,25 +1,31 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
|
4
3
|
module Rsssf
|
5
4
|
|
6
5
|
class PageReport
|
7
6
|
|
7
|
+
|
8
|
+
def self.build( files, title: )
|
9
|
+
stats = []
|
10
|
+
files.each do |file|
|
11
|
+
page = Page.read_txt( file )
|
12
|
+
stats << page.build_stat
|
13
|
+
end
|
14
|
+
|
15
|
+
new( stats, title: title )
|
16
|
+
end
|
17
|
+
|
18
|
+
|
8
19
|
attr_reader :title
|
9
20
|
|
10
|
-
def initialize( stats,
|
21
|
+
def initialize( stats, title: )
|
11
22
|
@stats = stats
|
12
|
-
@
|
13
|
-
|
14
|
-
@title = opts[:title] || 'Your Title Here'
|
23
|
+
@title = title
|
15
24
|
end
|
16
25
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
f.write build_summary
|
21
|
-
end
|
22
|
-
end
|
26
|
+
### save report as README.md in repo
|
27
|
+
def save( path ) write_text( path, build_summary ); end
|
28
|
+
|
23
29
|
|
24
30
|
def build_summary
|
25
31
|
|
@@ -33,19 +39,26 @@ def build_summary
|
|
33
39
|
|
34
40
|
football.db RSSSF Archive Data Summary for #{title}
|
35
41
|
|
36
|
-
_Last Update: #{Time.now}_
|
37
|
-
|
38
42
|
EOS
|
39
43
|
|
44
|
+
## no longer add last update
|
45
|
+
## _Last Update: #{Time.now}_
|
46
|
+
|
47
|
+
|
40
48
|
txt = ''
|
41
49
|
txt << header
|
42
50
|
|
43
|
-
txt << "|
|
44
|
-
txt << "| :----- |
|
51
|
+
txt << "| File | Authors | Last Updated | Lines (Chars) | Sections |\n"
|
52
|
+
txt << "| :----- | :------- | :----------- | ------------: | :------- |\n"
|
53
|
+
|
54
|
+
## note - removed season (no longer tracked here)
|
45
55
|
|
46
56
|
stats.each do |stat|
|
47
|
-
|
48
|
-
|
57
|
+
## get basename from source url
|
58
|
+
url_path = URI.parse( stat.source ).path
|
59
|
+
basename = File.basename( url_path, File.extname( url_path ) ) ## e.g. duit92.txt or duit92.html => duit92
|
60
|
+
|
61
|
+
txt << "| [#{basename}.txt](#{basename}.txt) "
|
49
62
|
txt << "| #{stat.authors} "
|
50
63
|
txt << "| #{stat.last_updated} "
|
51
64
|
txt << "| #{stat.line_count} (#{stat.char_count}) "
|
@@ -60,5 +73,3 @@ end # method build_summary
|
|
60
73
|
end ## class PageReport
|
61
74
|
end ## module Rsssf
|
62
75
|
|
63
|
-
## add (shortcut) alias
|
64
|
-
RsssfPageReport = Rsssf::PageReport
|
@@ -1,30 +1,72 @@
|
|
1
|
-
|
1
|
+
|
2
2
|
|
3
3
|
module Rsssf
|
4
|
-
|
4
|
+
|
5
|
+
|
6
|
+
ScheduleStat = Struct.new(
|
7
|
+
:path, ## path to .txt file
|
8
|
+
:errors ## array or nil
|
9
|
+
)
|
10
|
+
|
11
|
+
|
12
|
+
|
5
13
|
class ScheduleReport
|
6
14
|
|
7
|
-
|
15
|
+
include Utils ## e.g. year_from_file, etc.
|
16
|
+
|
17
|
+
##
|
18
|
+
## quick hack? pass along (optional) patch
|
19
|
+
|
20
|
+
def self.build( files, title:,
|
21
|
+
patch: nil )
|
22
|
+
linter = Parser::Linter.new
|
8
23
|
|
9
|
-
|
10
|
-
|
11
|
-
|
24
|
+
stats = []
|
25
|
+
files.each_with_index do |file,i|
|
26
|
+
|
27
|
+
puts "==> [#{i+1}/#{files.size}] reading >#{file}<..."
|
28
|
+
|
29
|
+
txt = read_text( file )
|
30
|
+
|
31
|
+
if patch && patch.respond_to?(:on_parse)
|
32
|
+
season_dir = File.basename(File.dirname(file))
|
33
|
+
season = Season( season_dir )
|
34
|
+
basename = File.basename(file, File.extname(file))
|
35
|
+
puts " [debug] before patch.on_parse #{basename}, #{season}"
|
36
|
+
txt = patch.on_parse( txt, basename, season )
|
37
|
+
end
|
38
|
+
|
39
|
+
linter.parse( txt, parse: true,
|
40
|
+
path: file ) ## todo/fix - change path to file/filename - why? why not?
|
12
41
|
|
13
|
-
|
42
|
+
stat = ScheduleStat.new
|
43
|
+
stat.path = file
|
44
|
+
stat.errors = linter.errors
|
45
|
+
|
46
|
+
stats << stat
|
47
|
+
end
|
48
|
+
|
49
|
+
new( stats, title: title )
|
14
50
|
end
|
15
51
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
52
|
+
|
53
|
+
attr_reader :title
|
54
|
+
|
55
|
+
def initialize( stats, title: )
|
56
|
+
@stats = stats
|
57
|
+
@title = title
|
21
58
|
end
|
22
59
|
|
60
|
+
### save report as README.md in repo
|
61
|
+
def save( path ) write_text( path, build_summary ); end
|
62
|
+
|
63
|
+
|
23
64
|
def build_summary
|
24
|
-
## sort start by season (latest first) than
|
65
|
+
## sort start 1) by season (latest first) than
|
66
|
+
## 2) by name (e.g. 1-bundesliga, cup, etc.)
|
25
67
|
stats = @stats.sort do |l,r|
|
26
|
-
v = r.
|
27
|
-
v = l.
|
68
|
+
v = File.basename(File.dirname(r.path)) <=> File.basename(File.dirname(l.path))
|
69
|
+
v = File.basename(l.path) <=> File.basename(r.path) if v == 0 ## same season
|
28
70
|
v
|
29
71
|
end
|
30
72
|
|
@@ -35,11 +77,14 @@ def build_summary
|
|
35
77
|
football.db RSSSF (Rec.Sport.Soccer Statistics Foundation) Archive Data for
|
36
78
|
#{title}
|
37
79
|
|
38
|
-
_Last Update: #{Time.now}_
|
39
|
-
|
40
80
|
EOS
|
41
81
|
|
82
|
+
## no longer add last update
|
83
|
+
## _Last Update: #{Time.now}_
|
84
|
+
##
|
42
85
|
|
86
|
+
|
87
|
+
=begin
|
43
88
|
footer =<<EOS
|
44
89
|
|
45
90
|
## Questions? Comments?
|
@@ -48,30 +93,71 @@ Send them along to the
|
|
48
93
|
[Open Sports & Friends Forum](http://groups.google.com/group/opensport).
|
49
94
|
Thanks!
|
50
95
|
EOS
|
96
|
+
=end
|
97
|
+
|
51
98
|
|
99
|
+
errors = []
|
52
100
|
|
53
|
-
|
101
|
+
|
102
|
+
txt = String.new
|
54
103
|
txt << header
|
55
104
|
|
56
|
-
txt << "| Season | League, Cup |
|
105
|
+
txt << "| Season | League, Cup | Errors |\n"
|
57
106
|
txt << "| :----- | :---------- | -----: |\n"
|
58
107
|
|
108
|
+
|
109
|
+
stats.each_with_index do |stat,i|
|
110
|
+
|
111
|
+
path = stat.path
|
112
|
+
season_dir = File.basename(File.dirname( path ))
|
113
|
+
filename = File.basename( path ) ## incl. extension !!
|
114
|
+
|
115
|
+
season = Season( season_dir )
|
116
|
+
## note - use archive_dir_for_season for archive path
|
117
|
+
|
118
|
+
|
119
|
+
txt << "| #{season_dir} "
|
120
|
+
txt << "| [#{filename}](#{archive_dir_for_season(season)}/#{filename}) "
|
121
|
+
|
122
|
+
txt << if stat.errors.size > 0
|
123
|
+
"| **!! #{stat.errors.size}** "
|
124
|
+
else
|
125
|
+
"| OK "
|
126
|
+
end
|
127
|
+
txt << "|\n"
|
128
|
+
|
129
|
+
errors += stat.errors if stat.errors.size > 0
|
130
|
+
end
|
131
|
+
|
132
|
+
if errors.size > 0
|
133
|
+
txt << "\n\n"
|
134
|
+
txt << "#{errors.size} errors in #{stats.size} datafile(s)\n\n"
|
135
|
+
|
136
|
+
txt << "```\n"
|
137
|
+
errors.each do |path, msg, line|
|
138
|
+
season_dir = File.basename(File.dirname( path ))
|
139
|
+
filename = File.basename( path ) ## incl. extension !!
|
140
|
+
|
141
|
+
txt <<"#{season_dir}/#{filename} -- #{msg}\n"
|
142
|
+
txt << " in line >#{line}<\n" unless line.empty?
|
143
|
+
end
|
144
|
+
txt << "```\n"
|
145
|
+
end
|
146
|
+
|
147
|
+
=begin
|
59
148
|
stats.each do |stat|
|
60
149
|
txt << "| #{stat.season} "
|
61
150
|
txt << "| [#{stat.filename}](#{stat.path}/#{stat.filename}) "
|
62
151
|
txt << "| #{stat.rounds} "
|
63
152
|
txt << "|\n"
|
64
153
|
end
|
154
|
+
=end
|
65
155
|
|
66
|
-
|
67
|
-
|
68
|
-
txt << footer
|
156
|
+
|
157
|
+
## txt << footer
|
69
158
|
txt
|
70
159
|
end # method build_summary
|
71
160
|
|
72
161
|
end ## class ScheduleReport
|
73
162
|
end ## module Rsssf
|
74
163
|
|
75
|
-
## add (shortcut) alias
|
76
|
-
RsssfScheduleReport = Rsssf::ScheduleReport
|
77
|
-
|
data/lib/rsssf/schedule.rb
CHANGED
@@ -1,31 +1,21 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module Rsssf
|
4
3
|
|
5
4
|
class Schedule
|
6
5
|
|
7
|
-
def self.from_string( txt )
|
8
|
-
self.new( txt )
|
9
|
-
end
|
10
6
|
|
11
|
-
attr_accessor :rounds # track no of rounds
|
7
|
+
# attr_accessor :rounds # track no of rounds - why? why not?
|
12
8
|
|
13
9
|
def initialize( txt )
|
14
10
|
@txt = txt
|
15
11
|
|
16
|
-
@rounds = nil # undefined
|
12
|
+
## @rounds = nil # undefined
|
17
13
|
end
|
18
14
|
|
19
15
|
|
20
|
-
def save( path )
|
21
|
-
|
22
|
-
f.write @txt
|
23
|
-
end
|
16
|
+
def save( path, header: )
|
17
|
+
write_text( path, header + @txt )
|
24
18
|
end
|
25
19
|
|
26
20
|
end ## class Schedule
|
27
21
|
end ## module Rsssf
|
28
|
-
|
29
|
-
## add (shortcut) alias
|
30
|
-
RsssfSchedule = Rsssf::Schedule
|
31
|
-
|
data/lib/rsssf/utils.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
2
|
module Rsssf
|
4
3
|
module Utils
|
5
4
|
|
5
|
+
|
6
|
+
## move to Page - why? why not?
|
7
|
+
|
6
8
|
def year_from_file( path )
|
7
9
|
extname = File.extname( path )
|
8
10
|
basename = File.basename( path, extname ) ## e.g. duit92.txt or duit92.html => duit92
|
@@ -32,44 +34,23 @@ def year_from_name( name )
|
|
32
34
|
end # method year_from_name
|
33
35
|
|
34
36
|
|
35
|
-
def year_to_season( year )
|
36
|
-
|
37
|
-
## todo: require four digit years? why? why not??
|
38
|
-
|
39
|
-
## e.g. 64 => 1963-64
|
40
|
-
## 2011 => 2010-11 etc.
|
41
|
-
|
42
|
-
if year <= 16 ## assume 20xx for now from 00..16
|
43
|
-
year += 2000
|
44
|
-
elsif year <= 99
|
45
|
-
year += 1900
|
46
|
-
else
|
47
|
-
# use as is; assume four digit year
|
48
|
-
end
|
49
37
|
|
50
|
-
|
38
|
+
def archive_dir_for_season( season )
|
39
|
+
season = Season( season )
|
51
40
|
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
|
56
|
-
def archive_dir_for_year( year )
|
57
|
-
season = year_to_season( year )
|
58
|
-
if year <= 2010 # e.g. season 2009-10
|
41
|
+
if season < Season('2010') # e.g. season 2009-10
|
59
42
|
## use archive folder (w/ 1980s etc)
|
60
43
|
## get decade folder
|
61
|
-
decade =
|
44
|
+
decade = season.start_year ## 1999/2000 2000
|
62
45
|
decade -= decade % 10 ## turn 1987 into 1980 etc
|
63
|
-
"archive/#{decade}s/#{season}"
|
46
|
+
"archive/#{decade}s/#{season.to_path}"
|
64
47
|
else
|
65
|
-
season
|
48
|
+
season.to_path
|
66
49
|
end
|
67
50
|
end
|
68
51
|
|
69
52
|
|
53
|
+
|
70
54
|
end # module Utils
|
71
55
|
end # module Rsssf
|
72
56
|
|
73
|
-
## add (shortcut) alias
|
74
|
-
RsssfUtils = Rsssf::Utils
|
75
|
-
|
data/lib/rsssf/version.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
|
4
2
|
module Rsssf
|
5
3
|
|
6
4
|
MAJOR = 0
|
7
|
-
MINOR =
|
5
|
+
MINOR = 2
|
8
6
|
PATCH = 0
|
9
7
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
10
8
|
|
@@ -13,11 +11,11 @@ module Rsssf
|
|
13
11
|
end
|
14
12
|
|
15
13
|
def self.banner
|
16
|
-
"rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
14
|
+
"rsssf/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
|
17
15
|
end
|
18
16
|
|
19
17
|
def self.root
|
20
|
-
|
18
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
21
19
|
end
|
22
20
|
|
23
21
|
end # module Rsssf
|
data/lib/rsssf.rb
CHANGED
@@ -1,34 +1,57 @@
|
|
1
|
-
# encoding: utf-8
|
2
1
|
|
3
|
-
##
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
|
2
|
+
## 3rd party (our own)
|
3
|
+
require 'season/formats' ## add season support
|
4
|
+
require 'webget' ## incl. webget, webcache, webclient, etc.
|
5
|
+
|
6
|
+
require 'cocos'
|
7
|
+
|
8
|
+
|
9
|
+
## (old) 3rd party libs
|
10
|
+
## require 'textutils' ## used for File.read_utf8 etc.
|
11
|
+
## require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
|
12
|
+
|
13
|
+
|
14
|
+
#######
|
15
|
+
## add RsssfParser too
|
16
|
+
require 'rsssf/parser' ## from rsssf-parser gem
|
7
17
|
|
8
18
|
|
9
|
-
## 3rd party libs
|
10
|
-
require 'textutils' ## used for File.read_utf8 etc.
|
11
|
-
require 'fetcher' ## used for Fetcher::Worker.new.fetch etc.
|
12
19
|
|
13
20
|
|
14
21
|
## our own code
|
15
|
-
|
22
|
+
require_relative 'rsssf/version' # note: let version always go first
|
23
|
+
|
24
|
+
require_relative 'rsssf/utils' # include Utils - goes first
|
25
|
+
|
26
|
+
require_relative 'rsssf/download'
|
27
|
+
|
28
|
+
require_relative 'rsssf/convert'
|
29
|
+
require_relative 'rsssf/page'
|
30
|
+
require_relative 'rsssf/schedule'
|
31
|
+
|
32
|
+
require_relative 'rsssf/reports/schedule'
|
33
|
+
require_relative 'rsssf/reports/page'
|
34
|
+
|
35
|
+
require_relative 'rsssf/repo'
|
36
|
+
|
16
37
|
|
17
|
-
require 'rsssf/utils' # include Utils - goes first
|
18
|
-
require 'rsssf/html2txt' # include Filters - goes first
|
19
38
|
|
20
|
-
require 'rsssf/fetch'
|
21
|
-
require 'rsssf/page'
|
22
|
-
require 'rsssf/schedule'
|
23
|
-
require 'rsssf/patch'
|
24
39
|
|
25
|
-
|
26
|
-
|
40
|
+
#############
|
41
|
+
## add (shortcut) alias(es)
|
42
|
+
RsssfPage = Rsssf::Page
|
43
|
+
RsssfPageConverter = Rsssf::PageConverter
|
44
|
+
RsssfPageStat = Rsssf::PageStat
|
45
|
+
RsssfPageReport = Rsssf::PageReport
|
27
46
|
|
28
|
-
|
47
|
+
RsssfSchedule = Rsssf::Schedule
|
48
|
+
RsssfScheduleStat = Rsssf::ScheduleStat
|
49
|
+
RsssfScheduleReport = Rsssf::ScheduleReport
|
29
50
|
|
51
|
+
RsssfRepo = Rsssf::Repo
|
52
|
+
RsssfUtils = Rsssf::Utils
|
30
53
|
|
31
54
|
|
32
55
|
|
33
56
|
## say hello
|
34
|
-
puts Rsssf.banner if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
57
|
+
puts Rsssf.banner ## if defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG
|
metadata
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rsssf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: cocos
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
@@ -25,7 +25,7 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: season-formats
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: rsssf-parser
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -56,63 +56,65 @@ dependencies:
|
|
56
56
|
name: rdoc
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '4.0'
|
62
|
+
- - "<"
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '7'
|
62
65
|
type: :development
|
63
66
|
prerelease: false
|
64
67
|
version_requirements: !ruby/object:Gem::Requirement
|
65
68
|
requirements:
|
66
|
-
- - "
|
69
|
+
- - ">="
|
67
70
|
- !ruby/object:Gem::Version
|
68
71
|
version: '4.0'
|
72
|
+
- - "<"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '7'
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: hoe
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
79
|
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
81
|
+
version: '4.1'
|
76
82
|
type: :development
|
77
83
|
prerelease: false
|
78
84
|
version_requirements: !ruby/object:Gem::Requirement
|
79
85
|
requirements:
|
80
86
|
- - "~>"
|
81
87
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
88
|
+
version: '4.1'
|
83
89
|
description: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
|
84
90
|
archive data
|
85
|
-
email:
|
91
|
+
email: gerald.bauer@gmail.com
|
86
92
|
executables: []
|
87
93
|
extensions: []
|
88
94
|
extra_rdoc_files:
|
89
|
-
-
|
95
|
+
- CHANGELOG.md
|
90
96
|
- Manifest.txt
|
91
97
|
- README.md
|
92
98
|
files:
|
93
|
-
-
|
94
|
-
- HISTORY.md
|
99
|
+
- CHANGELOG.md
|
95
100
|
- Manifest.txt
|
96
101
|
- README.md
|
97
102
|
- Rakefile
|
98
103
|
- lib/rsssf.rb
|
99
|
-
- lib/rsssf/
|
100
|
-
- lib/rsssf/
|
104
|
+
- lib/rsssf/convert.rb
|
105
|
+
- lib/rsssf/download.rb
|
101
106
|
- lib/rsssf/page.rb
|
102
|
-
- lib/rsssf/patch.rb
|
103
107
|
- lib/rsssf/repo.rb
|
104
108
|
- lib/rsssf/reports/page.rb
|
105
109
|
- lib/rsssf/reports/schedule.rb
|
106
110
|
- lib/rsssf/schedule.rb
|
107
111
|
- lib/rsssf/utils.rb
|
108
112
|
- lib/rsssf/version.rb
|
109
|
-
|
110
|
-
- test/test_utils.rb
|
111
|
-
homepage: https://github.com/sportdb/rsssf
|
113
|
+
homepage: https://github.com/sportdb/sport.db.sources
|
112
114
|
licenses:
|
113
115
|
- Public Domain
|
114
116
|
metadata: {}
|
115
|
-
post_install_message:
|
117
|
+
post_install_message:
|
116
118
|
rdoc_options:
|
117
119
|
- "--main"
|
118
120
|
- README.md
|
@@ -122,16 +124,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
124
|
requirements:
|
123
125
|
- - ">="
|
124
126
|
- !ruby/object:Gem::Version
|
125
|
-
version:
|
127
|
+
version: 2.2.2
|
126
128
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
127
129
|
requirements:
|
128
130
|
- - ">="
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
|
133
|
-
|
134
|
-
signing_key:
|
134
|
+
rubygems_version: 3.4.10
|
135
|
+
signing_key:
|
135
136
|
specification_version: 4
|
136
137
|
summary: rsssf - tools 'n' scripts for RSSSF (Rec.Sport.Soccer Statistics Foundation)
|
137
138
|
archive data
|
data/.gemtest
DELETED
File without changes
|
data/lib/rsssf/fetch.rb
DELETED
@@ -1,80 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module Rsssf
|
4
|
-
|
5
|
-
class PageFetcher
|
6
|
-
|
7
|
-
include Filters # e.g. html2text, sanitize etc.
|
8
|
-
|
9
|
-
|
10
|
-
def initialize
|
11
|
-
@worker = Fetcher::Worker.new
|
12
|
-
end
|
13
|
-
|
14
|
-
def fetch( src_url )
|
15
|
-
|
16
|
-
## note: assume plain 7-bit ascii for now
|
17
|
-
## -- assume rsssf uses ISO_8859_15 (updated version of ISO_8859_1) -- does NOT use utf-8 character encoding!!!
|
18
|
-
html = @worker.read( src_url )
|
19
|
-
|
20
|
-
### todo/fix: first check if html is all ascii-7bit e.g.
|
21
|
-
## includes only chars from 64 to 127!!!
|
22
|
-
|
23
|
-
## normalize newlines
|
24
|
-
## remove \r (form feed) used by Windows; just use \n (new line)
|
25
|
-
html = html.gsub( "\r", '' )
|
26
|
-
|
27
|
-
## note:
|
28
|
-
## assume (default) to ISO 3166-15 (an updated version of ISO 3166-1) for now
|
29
|
-
##
|
30
|
-
## other possible alternatives - try:
|
31
|
-
## - Windows CP 1562 or
|
32
|
-
## - ISO 3166-2 (for eastern european languages )
|
33
|
-
##
|
34
|
-
## note: german umlaut use the same code (int)
|
35
|
-
## in ISO 3166-1/15 and 2 and Windows CP1562 (other chars ARE different!!!)
|
36
|
-
|
37
|
-
html = html.force_encoding( Encoding::ISO_8859_15 )
|
38
|
-
html = html.encode( Encoding::UTF_8 ) # try conversion to utf-8
|
39
|
-
|
40
|
-
## check for html entities
|
41
|
-
html = html.gsub( "ä", 'ä' )
|
42
|
-
html = html.gsub( "ö", 'ö' )
|
43
|
-
html = html.gsub( "ü", 'ü' )
|
44
|
-
html = html.gsub( "Ä", 'Ä' )
|
45
|
-
html = html.gsub( "Ö", 'Ö' )
|
46
|
-
html = html.gsub( "Ü", 'Ü' )
|
47
|
-
html = html.gsub( "ß", 'ß' )
|
48
|
-
|
49
|
-
html = html.gsub( "&oulm;", 'ö' ) ## support typo in entity (ö)
|
50
|
-
html = html.gsub( "&slig;", "ß" ) ## support typo in entity (ß)
|
51
|
-
|
52
|
-
html = html.gsub( "É", 'É' )
|
53
|
-
html = html.gsub( "ø", 'ø' )
|
54
|
-
|
55
|
-
## check for more entities
|
56
|
-
html = html.gsub( /&[^;]+;/) do |match|
|
57
|
-
puts "*** found unencoded html entity #{match}"
|
58
|
-
match ## pass through as is (1:1)
|
59
|
-
end
|
60
|
-
## todo/fix: add more entities
|
61
|
-
|
62
|
-
|
63
|
-
txt = html_to_txt( html )
|
64
|
-
|
65
|
-
header = <<EOS
|
66
|
-
<!--
|
67
|
-
source: #{src_url}
|
68
|
-
-->
|
69
|
-
|
70
|
-
EOS
|
71
|
-
|
72
|
-
header+txt ## return txt w/ header
|
73
|
-
end ## method fetch
|
74
|
-
|
75
|
-
end ## class PageFetcher
|
76
|
-
end ## module Rsssf
|
77
|
-
|
78
|
-
## add (shortcut) alias
|
79
|
-
RsssfPageFetcher = Rsssf::PageFetcher
|
80
|
-
|